{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999759713578585, "eval_steps": 500, "global_step": 39015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 26.623228073120117, "learning_rate": 1.707941929974381e-08, "loss": 2.6647, "step": 1 }, { "epoch": 0.0, "grad_norm": 16.228601455688477, "learning_rate": 3.415883859948762e-08, "loss": 2.4728, "step": 2 }, { "epoch": 0.0, "grad_norm": 16.450407028198242, "learning_rate": 5.123825789923143e-08, "loss": 2.0035, "step": 3 }, { "epoch": 0.0, "grad_norm": 14.694679260253906, "learning_rate": 6.831767719897524e-08, "loss": 2.0123, "step": 4 }, { "epoch": 0.0, "grad_norm": 21.48790168762207, "learning_rate": 8.539709649871905e-08, "loss": 2.4318, "step": 5 }, { "epoch": 0.0, "grad_norm": 13.999150276184082, "learning_rate": 1.0247651579846286e-07, "loss": 2.096, "step": 6 }, { "epoch": 0.0, "grad_norm": 18.363306045532227, "learning_rate": 1.1955593509820666e-07, "loss": 2.4165, "step": 7 }, { "epoch": 0.0, "grad_norm": 15.461152076721191, "learning_rate": 1.3663535439795047e-07, "loss": 2.1114, "step": 8 }, { "epoch": 0.0, "grad_norm": 23.37445831298828, "learning_rate": 1.5371477369769428e-07, "loss": 3.1955, "step": 9 }, { "epoch": 0.0, "grad_norm": 20.391740798950195, "learning_rate": 1.707941929974381e-07, "loss": 2.48, "step": 10 }, { "epoch": 0.0, "grad_norm": 16.160728454589844, "learning_rate": 1.8787361229718188e-07, "loss": 2.091, "step": 11 }, { "epoch": 0.0, "grad_norm": 24.420082092285156, "learning_rate": 2.0495303159692572e-07, "loss": 2.6316, "step": 12 }, { "epoch": 0.0, "grad_norm": 15.29490852355957, "learning_rate": 2.2203245089666953e-07, "loss": 2.1345, "step": 13 }, { "epoch": 0.0, "grad_norm": 17.99287986755371, "learning_rate": 2.391118701964133e-07, "loss": 2.3256, "step": 14 }, { "epoch": 0.0, "grad_norm": 27.284839630126953, "learning_rate": 2.5619128949615716e-07, "loss": 2.8994, "step": 15 }, { "epoch": 0.0, "grad_norm": 19.496061325073242, "learning_rate": 2.7327070879590094e-07, "loss": 2.2401, "step": 16 }, { "epoch": 0.0, "grad_norm": 17.969148635864258, "learning_rate": 2.903501280956448e-07, "loss": 2.335, "step": 17 }, { "epoch": 0.0, "grad_norm": 16.94575309753418, "learning_rate": 3.0742954739538857e-07, "loss": 2.1719, "step": 18 }, { "epoch": 0.0, "grad_norm": 18.35658836364746, "learning_rate": 3.245089666951324e-07, "loss": 2.4023, "step": 19 }, { "epoch": 0.0, "grad_norm": 12.907666206359863, "learning_rate": 3.415883859948762e-07, "loss": 1.8126, "step": 20 }, { "epoch": 0.0, "grad_norm": 9.585938453674316, "learning_rate": 3.5866780529462e-07, "loss": 1.6911, "step": 21 }, { "epoch": 0.0, "grad_norm": 20.32583999633789, "learning_rate": 3.7574722459436376e-07, "loss": 2.2428, "step": 22 }, { "epoch": 0.0, "grad_norm": 24.550111770629883, "learning_rate": 3.9282664389410765e-07, "loss": 2.5396, "step": 23 }, { "epoch": 0.0, "grad_norm": 28.81334686279297, "learning_rate": 4.0990606319385144e-07, "loss": 3.0622, "step": 24 }, { "epoch": 0.0, "grad_norm": 12.753887176513672, "learning_rate": 4.269854824935953e-07, "loss": 2.0031, "step": 25 }, { "epoch": 0.0, "grad_norm": 17.898876190185547, "learning_rate": 4.4406490179333906e-07, "loss": 2.2238, "step": 26 }, { "epoch": 0.0, "grad_norm": 23.089675903320312, "learning_rate": 4.6114432109308285e-07, "loss": 2.3943, "step": 27 }, { "epoch": 0.0, "grad_norm": 13.874866485595703, "learning_rate": 4.782237403928266e-07, "loss": 2.112, "step": 28 }, { "epoch": 0.0, "grad_norm": 21.723756790161133, "learning_rate": 4.953031596925705e-07, "loss": 2.768, "step": 29 }, { "epoch": 0.0, "grad_norm": 27.98687171936035, "learning_rate": 5.123825789923143e-07, "loss": 2.7871, "step": 30 }, { "epoch": 0.0, "grad_norm": 21.161762237548828, "learning_rate": 5.294619982920581e-07, "loss": 2.6257, "step": 31 }, { "epoch": 0.0, "grad_norm": 20.001375198364258, "learning_rate": 5.465414175918019e-07, "loss": 2.4487, "step": 32 }, { "epoch": 0.0, "grad_norm": 13.298815727233887, "learning_rate": 5.636208368915457e-07, "loss": 2.1659, "step": 33 }, { "epoch": 0.0, "grad_norm": 20.84868812561035, "learning_rate": 5.807002561912896e-07, "loss": 2.4226, "step": 34 }, { "epoch": 0.0, "grad_norm": 13.097651481628418, "learning_rate": 5.977796754910333e-07, "loss": 1.701, "step": 35 }, { "epoch": 0.0, "grad_norm": 22.089733123779297, "learning_rate": 6.148590947907771e-07, "loss": 2.5445, "step": 36 }, { "epoch": 0.0, "grad_norm": 26.06630516052246, "learning_rate": 6.31938514090521e-07, "loss": 2.5221, "step": 37 }, { "epoch": 0.0, "grad_norm": 22.89533805847168, "learning_rate": 6.490179333902648e-07, "loss": 2.4354, "step": 38 }, { "epoch": 0.0, "grad_norm": 11.209356307983398, "learning_rate": 6.660973526900086e-07, "loss": 1.6869, "step": 39 }, { "epoch": 0.0, "grad_norm": 38.81280517578125, "learning_rate": 6.831767719897524e-07, "loss": 3.1804, "step": 40 }, { "epoch": 0.0, "grad_norm": 16.885278701782227, "learning_rate": 7.002561912894962e-07, "loss": 2.1967, "step": 41 }, { "epoch": 0.0, "grad_norm": 16.307231903076172, "learning_rate": 7.1733561058924e-07, "loss": 1.8982, "step": 42 }, { "epoch": 0.0, "grad_norm": 19.922805786132812, "learning_rate": 7.344150298889837e-07, "loss": 2.0398, "step": 43 }, { "epoch": 0.0, "grad_norm": 22.304086685180664, "learning_rate": 7.514944491887275e-07, "loss": 2.3999, "step": 44 }, { "epoch": 0.0, "grad_norm": 21.227266311645508, "learning_rate": 7.685738684884715e-07, "loss": 2.3659, "step": 45 }, { "epoch": 0.0, "grad_norm": 14.555649757385254, "learning_rate": 7.856532877882153e-07, "loss": 2.0992, "step": 46 }, { "epoch": 0.0, "grad_norm": 17.047693252563477, "learning_rate": 8.027327070879591e-07, "loss": 1.857, "step": 47 }, { "epoch": 0.0, "grad_norm": 15.166969299316406, "learning_rate": 8.198121263877029e-07, "loss": 1.9477, "step": 48 }, { "epoch": 0.0, "grad_norm": 20.212709426879883, "learning_rate": 8.368915456874467e-07, "loss": 1.9358, "step": 49 }, { "epoch": 0.0, "grad_norm": 23.74566078186035, "learning_rate": 8.539709649871906e-07, "loss": 2.2781, "step": 50 }, { "epoch": 0.0, "grad_norm": 20.762487411499023, "learning_rate": 8.710503842869343e-07, "loss": 2.0395, "step": 51 }, { "epoch": 0.0, "grad_norm": 20.073095321655273, "learning_rate": 8.881298035866781e-07, "loss": 2.1746, "step": 52 }, { "epoch": 0.0, "grad_norm": 17.056549072265625, "learning_rate": 9.052092228864219e-07, "loss": 1.9481, "step": 53 }, { "epoch": 0.0, "grad_norm": 13.391158103942871, "learning_rate": 9.222886421861657e-07, "loss": 1.8299, "step": 54 }, { "epoch": 0.0, "grad_norm": 32.10649871826172, "learning_rate": 9.393680614859095e-07, "loss": 2.5126, "step": 55 }, { "epoch": 0.0, "grad_norm": 16.605857849121094, "learning_rate": 9.564474807856533e-07, "loss": 1.8924, "step": 56 }, { "epoch": 0.0, "grad_norm": 20.161941528320312, "learning_rate": 9.735269000853973e-07, "loss": 2.2113, "step": 57 }, { "epoch": 0.0, "grad_norm": 20.720741271972656, "learning_rate": 9.90606319385141e-07, "loss": 1.9245, "step": 58 }, { "epoch": 0.0, "grad_norm": 8.267911911010742, "learning_rate": 1.0076857386848848e-06, "loss": 1.569, "step": 59 }, { "epoch": 0.0, "grad_norm": 18.517112731933594, "learning_rate": 1.0247651579846286e-06, "loss": 1.8604, "step": 60 }, { "epoch": 0.0, "grad_norm": 16.331003189086914, "learning_rate": 1.0418445772843724e-06, "loss": 1.8519, "step": 61 }, { "epoch": 0.0, "grad_norm": 17.77668571472168, "learning_rate": 1.0589239965841162e-06, "loss": 1.885, "step": 62 }, { "epoch": 0.0, "grad_norm": 6.866775035858154, "learning_rate": 1.07600341588386e-06, "loss": 1.4775, "step": 63 }, { "epoch": 0.0, "grad_norm": 17.954511642456055, "learning_rate": 1.0930828351836038e-06, "loss": 1.9072, "step": 64 }, { "epoch": 0.0, "grad_norm": 18.866588592529297, "learning_rate": 1.1101622544833476e-06, "loss": 1.856, "step": 65 }, { "epoch": 0.0, "grad_norm": 11.334185600280762, "learning_rate": 1.1272416737830913e-06, "loss": 1.7029, "step": 66 }, { "epoch": 0.0, "grad_norm": 18.313156127929688, "learning_rate": 1.1443210930828353e-06, "loss": 1.731, "step": 67 }, { "epoch": 0.0, "grad_norm": 9.609277725219727, "learning_rate": 1.1614005123825791e-06, "loss": 1.3872, "step": 68 }, { "epoch": 0.0, "grad_norm": 8.96480655670166, "learning_rate": 1.178479931682323e-06, "loss": 1.4201, "step": 69 }, { "epoch": 0.0, "grad_norm": 12.787301063537598, "learning_rate": 1.1955593509820667e-06, "loss": 1.524, "step": 70 }, { "epoch": 0.0, "grad_norm": 8.323314666748047, "learning_rate": 1.2126387702818105e-06, "loss": 1.5355, "step": 71 }, { "epoch": 0.0, "grad_norm": 11.577178955078125, "learning_rate": 1.2297181895815543e-06, "loss": 1.7045, "step": 72 }, { "epoch": 0.0, "grad_norm": 6.475673675537109, "learning_rate": 1.2467976088812983e-06, "loss": 1.3079, "step": 73 }, { "epoch": 0.0, "grad_norm": 10.568434715270996, "learning_rate": 1.263877028181042e-06, "loss": 1.3682, "step": 74 }, { "epoch": 0.0, "grad_norm": 6.140779495239258, "learning_rate": 1.2809564474807856e-06, "loss": 1.3712, "step": 75 }, { "epoch": 0.0, "grad_norm": 10.099708557128906, "learning_rate": 1.2980358667805296e-06, "loss": 1.4113, "step": 76 }, { "epoch": 0.0, "grad_norm": 6.626410961151123, "learning_rate": 1.3151152860802734e-06, "loss": 1.4414, "step": 77 }, { "epoch": 0.0, "grad_norm": 4.871510982513428, "learning_rate": 1.3321947053800172e-06, "loss": 1.2487, "step": 78 }, { "epoch": 0.0, "grad_norm": 6.250209331512451, "learning_rate": 1.3492741246797612e-06, "loss": 1.2984, "step": 79 }, { "epoch": 0.0, "grad_norm": 5.746150970458984, "learning_rate": 1.3663535439795048e-06, "loss": 1.2788, "step": 80 }, { "epoch": 0.0, "grad_norm": 7.341178894042969, "learning_rate": 1.3834329632792488e-06, "loss": 1.2882, "step": 81 }, { "epoch": 0.0, "grad_norm": 8.447110176086426, "learning_rate": 1.4005123825789923e-06, "loss": 1.1734, "step": 82 }, { "epoch": 0.0, "grad_norm": 9.208840370178223, "learning_rate": 1.4175918018787363e-06, "loss": 1.315, "step": 83 }, { "epoch": 0.0, "grad_norm": 6.752046585083008, "learning_rate": 1.43467122117848e-06, "loss": 1.3301, "step": 84 }, { "epoch": 0.0, "grad_norm": 5.302206039428711, "learning_rate": 1.451750640478224e-06, "loss": 1.152, "step": 85 }, { "epoch": 0.0, "grad_norm": 6.00246000289917, "learning_rate": 1.4688300597779675e-06, "loss": 1.2738, "step": 86 }, { "epoch": 0.0, "grad_norm": 6.86639928817749, "learning_rate": 1.4859094790777115e-06, "loss": 1.3223, "step": 87 }, { "epoch": 0.0, "grad_norm": 8.224950790405273, "learning_rate": 1.502988898377455e-06, "loss": 1.2736, "step": 88 }, { "epoch": 0.0, "grad_norm": 6.302464962005615, "learning_rate": 1.520068317677199e-06, "loss": 1.2885, "step": 89 }, { "epoch": 0.0, "grad_norm": 6.44295072555542, "learning_rate": 1.537147736976943e-06, "loss": 1.0981, "step": 90 }, { "epoch": 0.0, "grad_norm": 5.970033645629883, "learning_rate": 1.5542271562766866e-06, "loss": 1.1372, "step": 91 }, { "epoch": 0.0, "grad_norm": 6.707165241241455, "learning_rate": 1.5713065755764306e-06, "loss": 1.253, "step": 92 }, { "epoch": 0.0, "grad_norm": 5.83744478225708, "learning_rate": 1.5883859948761742e-06, "loss": 1.0691, "step": 93 }, { "epoch": 0.0, "grad_norm": 5.115400791168213, "learning_rate": 1.6054654141759182e-06, "loss": 1.1319, "step": 94 }, { "epoch": 0.0, "grad_norm": 6.039933681488037, "learning_rate": 1.622544833475662e-06, "loss": 1.184, "step": 95 }, { "epoch": 0.0, "grad_norm": 12.560708999633789, "learning_rate": 1.6396242527754058e-06, "loss": 1.1874, "step": 96 }, { "epoch": 0.0, "grad_norm": 5.57773494720459, "learning_rate": 1.6567036720751495e-06, "loss": 1.1532, "step": 97 }, { "epoch": 0.0, "grad_norm": 5.837147235870361, "learning_rate": 1.6737830913748933e-06, "loss": 0.9675, "step": 98 }, { "epoch": 0.0, "grad_norm": 4.668105602264404, "learning_rate": 1.6908625106746371e-06, "loss": 1.2127, "step": 99 }, { "epoch": 0.0, "grad_norm": 9.814312934875488, "learning_rate": 1.7079419299743811e-06, "loss": 1.2841, "step": 100 }, { "epoch": 0.0, "grad_norm": 4.474944114685059, "learning_rate": 1.725021349274125e-06, "loss": 1.0193, "step": 101 }, { "epoch": 0.0, "grad_norm": 5.401273727416992, "learning_rate": 1.7421007685738687e-06, "loss": 1.1046, "step": 102 }, { "epoch": 0.0, "grad_norm": 13.345062255859375, "learning_rate": 1.7591801878736125e-06, "loss": 1.2372, "step": 103 }, { "epoch": 0.0, "grad_norm": 4.053001880645752, "learning_rate": 1.7762596071733563e-06, "loss": 1.1385, "step": 104 }, { "epoch": 0.0, "grad_norm": 5.15719747543335, "learning_rate": 1.7933390264731003e-06, "loss": 1.0215, "step": 105 }, { "epoch": 0.0, "grad_norm": 9.619086265563965, "learning_rate": 1.8104184457728438e-06, "loss": 1.2708, "step": 106 }, { "epoch": 0.0, "grad_norm": 7.211161136627197, "learning_rate": 1.8274978650725878e-06, "loss": 1.0187, "step": 107 }, { "epoch": 0.0, "grad_norm": 6.346713066101074, "learning_rate": 1.8445772843723314e-06, "loss": 0.9849, "step": 108 }, { "epoch": 0.0, "grad_norm": 6.219532489776611, "learning_rate": 1.8616567036720754e-06, "loss": 1.1058, "step": 109 }, { "epoch": 0.0, "grad_norm": 3.220867395401001, "learning_rate": 1.878736122971819e-06, "loss": 1.0119, "step": 110 }, { "epoch": 0.0, "grad_norm": 7.2745161056518555, "learning_rate": 1.895815542271563e-06, "loss": 1.2039, "step": 111 }, { "epoch": 0.0, "grad_norm": 10.53455924987793, "learning_rate": 1.9128949615713065e-06, "loss": 1.0803, "step": 112 }, { "epoch": 0.0, "grad_norm": 4.314168453216553, "learning_rate": 1.9299743808710505e-06, "loss": 1.0683, "step": 113 }, { "epoch": 0.0, "grad_norm": 8.949599266052246, "learning_rate": 1.9470538001707945e-06, "loss": 1.2951, "step": 114 }, { "epoch": 0.0, "grad_norm": 5.298095703125, "learning_rate": 1.964133219470538e-06, "loss": 1.1222, "step": 115 }, { "epoch": 0.0, "grad_norm": 5.240574836730957, "learning_rate": 1.981212638770282e-06, "loss": 1.0756, "step": 116 }, { "epoch": 0.0, "grad_norm": 3.632437229156494, "learning_rate": 1.9982920580700257e-06, "loss": 1.174, "step": 117 }, { "epoch": 0.0, "grad_norm": 5.155664920806885, "learning_rate": 2.0153714773697697e-06, "loss": 1.0152, "step": 118 }, { "epoch": 0.0, "grad_norm": 5.007312297821045, "learning_rate": 2.0324508966695133e-06, "loss": 0.9827, "step": 119 }, { "epoch": 0.0, "grad_norm": 4.004393100738525, "learning_rate": 2.0495303159692572e-06, "loss": 0.9099, "step": 120 }, { "epoch": 0.0, "grad_norm": 5.806015491485596, "learning_rate": 2.066609735269001e-06, "loss": 1.0657, "step": 121 }, { "epoch": 0.0, "grad_norm": 4.669259548187256, "learning_rate": 2.083689154568745e-06, "loss": 0.9002, "step": 122 }, { "epoch": 0.0, "grad_norm": 4.537733554840088, "learning_rate": 2.1007685738684884e-06, "loss": 1.1081, "step": 123 }, { "epoch": 0.0, "grad_norm": 4.6660943031311035, "learning_rate": 2.1178479931682324e-06, "loss": 0.8662, "step": 124 }, { "epoch": 0.0, "grad_norm": 6.164588928222656, "learning_rate": 2.134927412467976e-06, "loss": 0.9243, "step": 125 }, { "epoch": 0.0, "grad_norm": 6.482878684997559, "learning_rate": 2.15200683176772e-06, "loss": 1.0512, "step": 126 }, { "epoch": 0.0, "grad_norm": 3.6324915885925293, "learning_rate": 2.169086251067464e-06, "loss": 1.0732, "step": 127 }, { "epoch": 0.0, "grad_norm": 5.1955647468566895, "learning_rate": 2.1861656703672075e-06, "loss": 0.9349, "step": 128 }, { "epoch": 0.0, "grad_norm": 7.885522842407227, "learning_rate": 2.2032450896669515e-06, "loss": 0.9426, "step": 129 }, { "epoch": 0.0, "grad_norm": 7.361526012420654, "learning_rate": 2.220324508966695e-06, "loss": 1.0341, "step": 130 }, { "epoch": 0.0, "grad_norm": 11.259896278381348, "learning_rate": 2.237403928266439e-06, "loss": 1.3438, "step": 131 }, { "epoch": 0.0, "grad_norm": 6.278465747833252, "learning_rate": 2.2544833475661827e-06, "loss": 0.9909, "step": 132 }, { "epoch": 0.0, "grad_norm": 6.575812339782715, "learning_rate": 2.2715627668659267e-06, "loss": 0.9974, "step": 133 }, { "epoch": 0.0, "grad_norm": 3.2091546058654785, "learning_rate": 2.2886421861656707e-06, "loss": 1.0611, "step": 134 }, { "epoch": 0.0, "grad_norm": 8.074238777160645, "learning_rate": 2.3057216054654142e-06, "loss": 1.0536, "step": 135 }, { "epoch": 0.0, "grad_norm": 5.864865303039551, "learning_rate": 2.3228010247651582e-06, "loss": 1.0328, "step": 136 }, { "epoch": 0.0, "grad_norm": 4.718499660491943, "learning_rate": 2.339880444064902e-06, "loss": 1.0955, "step": 137 }, { "epoch": 0.0, "grad_norm": 4.311838626861572, "learning_rate": 2.356959863364646e-06, "loss": 0.8977, "step": 138 }, { "epoch": 0.0, "grad_norm": 4.500857353210449, "learning_rate": 2.3740392826643894e-06, "loss": 0.8769, "step": 139 }, { "epoch": 0.0, "grad_norm": 5.713533401489258, "learning_rate": 2.3911187019641334e-06, "loss": 0.9832, "step": 140 }, { "epoch": 0.0, "grad_norm": 4.9846415519714355, "learning_rate": 2.4081981212638774e-06, "loss": 1.0346, "step": 141 }, { "epoch": 0.0, "grad_norm": 4.787264347076416, "learning_rate": 2.425277540563621e-06, "loss": 0.9731, "step": 142 }, { "epoch": 0.0, "grad_norm": 3.257652759552002, "learning_rate": 2.442356959863365e-06, "loss": 1.0089, "step": 143 }, { "epoch": 0.0, "grad_norm": 5.066526889801025, "learning_rate": 2.4594363791631085e-06, "loss": 1.0313, "step": 144 }, { "epoch": 0.0, "grad_norm": 4.313302993774414, "learning_rate": 2.4765157984628525e-06, "loss": 0.9062, "step": 145 }, { "epoch": 0.0, "grad_norm": 6.13519287109375, "learning_rate": 2.4935952177625965e-06, "loss": 1.0983, "step": 146 }, { "epoch": 0.0, "grad_norm": 6.110402584075928, "learning_rate": 2.51067463706234e-06, "loss": 1.1289, "step": 147 }, { "epoch": 0.0, "grad_norm": 3.801274299621582, "learning_rate": 2.527754056362084e-06, "loss": 0.9595, "step": 148 }, { "epoch": 0.0, "grad_norm": 3.8572349548339844, "learning_rate": 2.5448334756618277e-06, "loss": 0.8296, "step": 149 }, { "epoch": 0.0, "grad_norm": 6.0982279777526855, "learning_rate": 2.5619128949615712e-06, "loss": 0.9875, "step": 150 }, { "epoch": 0.0, "grad_norm": 8.13244915008545, "learning_rate": 2.5789923142613157e-06, "loss": 1.0763, "step": 151 }, { "epoch": 0.0, "grad_norm": 3.234046697616577, "learning_rate": 2.5960717335610592e-06, "loss": 1.1485, "step": 152 }, { "epoch": 0.0, "grad_norm": 4.441356658935547, "learning_rate": 2.613151152860803e-06, "loss": 1.0302, "step": 153 }, { "epoch": 0.0, "grad_norm": 4.8266921043396, "learning_rate": 2.630230572160547e-06, "loss": 0.9415, "step": 154 }, { "epoch": 0.0, "grad_norm": 5.886941432952881, "learning_rate": 2.647309991460291e-06, "loss": 1.0281, "step": 155 }, { "epoch": 0.0, "grad_norm": 6.281396389007568, "learning_rate": 2.6643894107600344e-06, "loss": 0.9057, "step": 156 }, { "epoch": 0.0, "grad_norm": 5.030860424041748, "learning_rate": 2.681468830059778e-06, "loss": 0.9877, "step": 157 }, { "epoch": 0.0, "grad_norm": 5.768747806549072, "learning_rate": 2.6985482493595224e-06, "loss": 0.9916, "step": 158 }, { "epoch": 0.0, "grad_norm": 4.738161563873291, "learning_rate": 2.715627668659266e-06, "loss": 0.9699, "step": 159 }, { "epoch": 0.0, "grad_norm": 5.647141456604004, "learning_rate": 2.7327070879590095e-06, "loss": 0.9364, "step": 160 }, { "epoch": 0.0, "grad_norm": 6.329531192779541, "learning_rate": 2.749786507258753e-06, "loss": 0.9373, "step": 161 }, { "epoch": 0.0, "grad_norm": 3.467031955718994, "learning_rate": 2.7668659265584975e-06, "loss": 1.0286, "step": 162 }, { "epoch": 0.0, "grad_norm": 3.272486448287964, "learning_rate": 2.783945345858241e-06, "loss": 0.9424, "step": 163 }, { "epoch": 0.0, "grad_norm": 4.080572605133057, "learning_rate": 2.8010247651579847e-06, "loss": 0.8323, "step": 164 }, { "epoch": 0.0, "grad_norm": 3.863115072250366, "learning_rate": 2.818104184457729e-06, "loss": 1.1335, "step": 165 }, { "epoch": 0.0, "grad_norm": 6.054915428161621, "learning_rate": 2.8351836037574727e-06, "loss": 0.935, "step": 166 }, { "epoch": 0.0, "grad_norm": 7.2071309089660645, "learning_rate": 2.8522630230572162e-06, "loss": 0.8272, "step": 167 }, { "epoch": 0.0, "grad_norm": 2.987950086593628, "learning_rate": 2.86934244235696e-06, "loss": 0.9115, "step": 168 }, { "epoch": 0.0, "grad_norm": 4.869021415710449, "learning_rate": 2.8864218616567042e-06, "loss": 1.1176, "step": 169 }, { "epoch": 0.0, "grad_norm": 6.318040370941162, "learning_rate": 2.903501280956448e-06, "loss": 0.9075, "step": 170 }, { "epoch": 0.0, "grad_norm": 6.166134834289551, "learning_rate": 2.9205807002561914e-06, "loss": 0.9606, "step": 171 }, { "epoch": 0.0, "grad_norm": 3.388665199279785, "learning_rate": 2.937660119555935e-06, "loss": 0.9049, "step": 172 }, { "epoch": 0.0, "grad_norm": 2.9847283363342285, "learning_rate": 2.9547395388556794e-06, "loss": 0.9289, "step": 173 }, { "epoch": 0.0, "grad_norm": 6.4618239402771, "learning_rate": 2.971818958155423e-06, "loss": 0.9711, "step": 174 }, { "epoch": 0.0, "grad_norm": 8.157238006591797, "learning_rate": 2.9888983774551665e-06, "loss": 1.0333, "step": 175 }, { "epoch": 0.0, "grad_norm": 4.640444755554199, "learning_rate": 3.00597779675491e-06, "loss": 1.0248, "step": 176 }, { "epoch": 0.0, "grad_norm": 6.423614025115967, "learning_rate": 3.0230572160546545e-06, "loss": 0.8703, "step": 177 }, { "epoch": 0.0, "grad_norm": 3.9806699752807617, "learning_rate": 3.040136635354398e-06, "loss": 0.9074, "step": 178 }, { "epoch": 0.0, "grad_norm": 4.2115559577941895, "learning_rate": 3.0572160546541417e-06, "loss": 1.0635, "step": 179 }, { "epoch": 0.0, "grad_norm": 6.954541206359863, "learning_rate": 3.074295473953886e-06, "loss": 0.943, "step": 180 }, { "epoch": 0.0, "grad_norm": 4.041561603546143, "learning_rate": 3.0913748932536297e-06, "loss": 0.8607, "step": 181 }, { "epoch": 0.0, "grad_norm": 4.094394683837891, "learning_rate": 3.1084543125533732e-06, "loss": 1.1325, "step": 182 }, { "epoch": 0.0, "grad_norm": 5.158052444458008, "learning_rate": 3.1255337318531172e-06, "loss": 0.813, "step": 183 }, { "epoch": 0.0, "grad_norm": 4.360172748565674, "learning_rate": 3.1426131511528612e-06, "loss": 0.9972, "step": 184 }, { "epoch": 0.0, "grad_norm": 3.5655720233917236, "learning_rate": 3.159692570452605e-06, "loss": 0.9016, "step": 185 }, { "epoch": 0.0, "grad_norm": 4.075615406036377, "learning_rate": 3.1767719897523484e-06, "loss": 0.964, "step": 186 }, { "epoch": 0.0, "grad_norm": 6.554036617279053, "learning_rate": 3.1938514090520924e-06, "loss": 0.8784, "step": 187 }, { "epoch": 0.0, "grad_norm": 6.020033359527588, "learning_rate": 3.2109308283518364e-06, "loss": 1.0757, "step": 188 }, { "epoch": 0.0, "grad_norm": 4.077256202697754, "learning_rate": 3.22801024765158e-06, "loss": 0.9232, "step": 189 }, { "epoch": 0.0, "grad_norm": 5.943790435791016, "learning_rate": 3.245089666951324e-06, "loss": 0.9482, "step": 190 }, { "epoch": 0.0, "grad_norm": 4.016002178192139, "learning_rate": 3.262169086251068e-06, "loss": 0.8427, "step": 191 }, { "epoch": 0.0, "grad_norm": 3.983959674835205, "learning_rate": 3.2792485055508115e-06, "loss": 0.8852, "step": 192 }, { "epoch": 0.0, "grad_norm": 4.907987117767334, "learning_rate": 3.2963279248505555e-06, "loss": 0.9905, "step": 193 }, { "epoch": 0.0, "grad_norm": 6.103016376495361, "learning_rate": 3.313407344150299e-06, "loss": 0.9117, "step": 194 }, { "epoch": 0.0, "grad_norm": 4.986430644989014, "learning_rate": 3.330486763450043e-06, "loss": 1.0233, "step": 195 }, { "epoch": 0.01, "grad_norm": 3.3456928730010986, "learning_rate": 3.3475661827497867e-06, "loss": 0.9998, "step": 196 }, { "epoch": 0.01, "grad_norm": 3.6677606105804443, "learning_rate": 3.3646456020495307e-06, "loss": 0.7612, "step": 197 }, { "epoch": 0.01, "grad_norm": 4.829411029815674, "learning_rate": 3.3817250213492742e-06, "loss": 0.801, "step": 198 }, { "epoch": 0.01, "grad_norm": 5.238762378692627, "learning_rate": 3.3988044406490182e-06, "loss": 0.8312, "step": 199 }, { "epoch": 0.01, "grad_norm": 5.156816482543945, "learning_rate": 3.4158838599487622e-06, "loss": 1.1202, "step": 200 }, { "epoch": 0.01, "grad_norm": 5.391708850860596, "learning_rate": 3.432963279248506e-06, "loss": 1.0221, "step": 201 }, { "epoch": 0.01, "grad_norm": 3.1787288188934326, "learning_rate": 3.45004269854825e-06, "loss": 0.9447, "step": 202 }, { "epoch": 0.01, "grad_norm": 5.844205856323242, "learning_rate": 3.4671221178479934e-06, "loss": 1.1106, "step": 203 }, { "epoch": 0.01, "grad_norm": 3.650352954864502, "learning_rate": 3.4842015371477374e-06, "loss": 0.8253, "step": 204 }, { "epoch": 0.01, "grad_norm": 4.3158063888549805, "learning_rate": 3.501280956447481e-06, "loss": 0.9716, "step": 205 }, { "epoch": 0.01, "grad_norm": 4.114768028259277, "learning_rate": 3.518360375747225e-06, "loss": 0.9673, "step": 206 }, { "epoch": 0.01, "grad_norm": 3.400887966156006, "learning_rate": 3.535439795046969e-06, "loss": 0.9507, "step": 207 }, { "epoch": 0.01, "grad_norm": 5.3997297286987305, "learning_rate": 3.5525192143467125e-06, "loss": 0.7469, "step": 208 }, { "epoch": 0.01, "grad_norm": 13.219932556152344, "learning_rate": 3.569598633646456e-06, "loss": 1.2165, "step": 209 }, { "epoch": 0.01, "grad_norm": 3.827848434448242, "learning_rate": 3.5866780529462005e-06, "loss": 0.7306, "step": 210 }, { "epoch": 0.01, "grad_norm": 3.611203670501709, "learning_rate": 3.603757472245944e-06, "loss": 0.8639, "step": 211 }, { "epoch": 0.01, "grad_norm": 4.663977146148682, "learning_rate": 3.6208368915456876e-06, "loss": 0.8654, "step": 212 }, { "epoch": 0.01, "grad_norm": 4.6009745597839355, "learning_rate": 3.6379163108454312e-06, "loss": 0.9091, "step": 213 }, { "epoch": 0.01, "grad_norm": 5.685643196105957, "learning_rate": 3.6549957301451756e-06, "loss": 0.9325, "step": 214 }, { "epoch": 0.01, "grad_norm": 7.5004472732543945, "learning_rate": 3.6720751494449192e-06, "loss": 0.8747, "step": 215 }, { "epoch": 0.01, "grad_norm": 22.742950439453125, "learning_rate": 3.689154568744663e-06, "loss": 1.0778, "step": 216 }, { "epoch": 0.01, "grad_norm": 3.2548704147338867, "learning_rate": 3.7062339880444072e-06, "loss": 0.863, "step": 217 }, { "epoch": 0.01, "grad_norm": 5.923584461212158, "learning_rate": 3.7233134073441508e-06, "loss": 0.9149, "step": 218 }, { "epoch": 0.01, "grad_norm": 3.6704797744750977, "learning_rate": 3.7403928266438944e-06, "loss": 0.7923, "step": 219 }, { "epoch": 0.01, "grad_norm": 7.32305908203125, "learning_rate": 3.757472245943638e-06, "loss": 0.9942, "step": 220 }, { "epoch": 0.01, "grad_norm": 4.095962047576904, "learning_rate": 3.7745516652433824e-06, "loss": 1.0819, "step": 221 }, { "epoch": 0.01, "grad_norm": 5.538272857666016, "learning_rate": 3.791631084543126e-06, "loss": 0.8884, "step": 222 }, { "epoch": 0.01, "grad_norm": 3.717073678970337, "learning_rate": 3.8087105038428695e-06, "loss": 0.9032, "step": 223 }, { "epoch": 0.01, "grad_norm": 7.4549641609191895, "learning_rate": 3.825789923142613e-06, "loss": 0.9744, "step": 224 }, { "epoch": 0.01, "grad_norm": 3.7365753650665283, "learning_rate": 3.842869342442357e-06, "loss": 0.9452, "step": 225 }, { "epoch": 0.01, "grad_norm": 3.1889145374298096, "learning_rate": 3.859948761742101e-06, "loss": 1.0184, "step": 226 }, { "epoch": 0.01, "grad_norm": 3.9221529960632324, "learning_rate": 3.877028181041845e-06, "loss": 0.9321, "step": 227 }, { "epoch": 0.01, "grad_norm": 4.830116271972656, "learning_rate": 3.894107600341589e-06, "loss": 0.7366, "step": 228 }, { "epoch": 0.01, "grad_norm": 2.6991567611694336, "learning_rate": 3.911187019641332e-06, "loss": 0.762, "step": 229 }, { "epoch": 0.01, "grad_norm": 2.3814616203308105, "learning_rate": 3.928266438941076e-06, "loss": 0.9505, "step": 230 }, { "epoch": 0.01, "grad_norm": 4.2140960693359375, "learning_rate": 3.94534585824082e-06, "loss": 1.1405, "step": 231 }, { "epoch": 0.01, "grad_norm": 4.433155059814453, "learning_rate": 3.962425277540564e-06, "loss": 0.8588, "step": 232 }, { "epoch": 0.01, "grad_norm": 4.42085599899292, "learning_rate": 3.979504696840308e-06, "loss": 0.8136, "step": 233 }, { "epoch": 0.01, "grad_norm": 4.771530628204346, "learning_rate": 3.996584116140051e-06, "loss": 0.911, "step": 234 }, { "epoch": 0.01, "grad_norm": 2.6656723022460938, "learning_rate": 4.013663535439795e-06, "loss": 0.8738, "step": 235 }, { "epoch": 0.01, "grad_norm": 3.3199708461761475, "learning_rate": 4.030742954739539e-06, "loss": 0.8284, "step": 236 }, { "epoch": 0.01, "grad_norm": 4.374816417694092, "learning_rate": 4.047822374039283e-06, "loss": 0.8835, "step": 237 }, { "epoch": 0.01, "grad_norm": 6.100769996643066, "learning_rate": 4.0649017933390265e-06, "loss": 0.9549, "step": 238 }, { "epoch": 0.01, "grad_norm": 2.870486259460449, "learning_rate": 4.0819812126387705e-06, "loss": 0.9277, "step": 239 }, { "epoch": 0.01, "grad_norm": 5.611959457397461, "learning_rate": 4.0990606319385145e-06, "loss": 0.8437, "step": 240 }, { "epoch": 0.01, "grad_norm": 8.18527603149414, "learning_rate": 4.1161400512382585e-06, "loss": 0.9602, "step": 241 }, { "epoch": 0.01, "grad_norm": 3.103623390197754, "learning_rate": 4.133219470538002e-06, "loss": 0.9392, "step": 242 }, { "epoch": 0.01, "grad_norm": 5.806754112243652, "learning_rate": 4.1502988898377465e-06, "loss": 1.0102, "step": 243 }, { "epoch": 0.01, "grad_norm": 5.686233997344971, "learning_rate": 4.16737830913749e-06, "loss": 1.0145, "step": 244 }, { "epoch": 0.01, "grad_norm": 3.954648017883301, "learning_rate": 4.184457728437234e-06, "loss": 0.9722, "step": 245 }, { "epoch": 0.01, "grad_norm": 6.106964111328125, "learning_rate": 4.201537147736977e-06, "loss": 1.2455, "step": 246 }, { "epoch": 0.01, "grad_norm": 5.535809516906738, "learning_rate": 4.218616567036722e-06, "loss": 1.046, "step": 247 }, { "epoch": 0.01, "grad_norm": 2.93692946434021, "learning_rate": 4.235695986336465e-06, "loss": 0.7796, "step": 248 }, { "epoch": 0.01, "grad_norm": 5.471179485321045, "learning_rate": 4.252775405636209e-06, "loss": 0.8814, "step": 249 }, { "epoch": 0.01, "grad_norm": 4.182723045349121, "learning_rate": 4.269854824935952e-06, "loss": 0.9226, "step": 250 }, { "epoch": 0.01, "grad_norm": 8.318793296813965, "learning_rate": 4.286934244235697e-06, "loss": 0.9657, "step": 251 }, { "epoch": 0.01, "grad_norm": 6.207652568817139, "learning_rate": 4.30401366353544e-06, "loss": 1.0427, "step": 252 }, { "epoch": 0.01, "grad_norm": 3.05940842628479, "learning_rate": 4.321093082835184e-06, "loss": 0.9086, "step": 253 }, { "epoch": 0.01, "grad_norm": 5.023497104644775, "learning_rate": 4.338172502134928e-06, "loss": 0.7511, "step": 254 }, { "epoch": 0.01, "grad_norm": 4.117283821105957, "learning_rate": 4.355251921434672e-06, "loss": 0.8653, "step": 255 }, { "epoch": 0.01, "grad_norm": 2.8736467361450195, "learning_rate": 4.372331340734415e-06, "loss": 0.8661, "step": 256 }, { "epoch": 0.01, "grad_norm": 3.0761685371398926, "learning_rate": 4.389410760034159e-06, "loss": 0.9038, "step": 257 }, { "epoch": 0.01, "grad_norm": 4.253943920135498, "learning_rate": 4.406490179333903e-06, "loss": 0.9798, "step": 258 }, { "epoch": 0.01, "grad_norm": 3.8363325595855713, "learning_rate": 4.423569598633647e-06, "loss": 0.8623, "step": 259 }, { "epoch": 0.01, "grad_norm": 3.276005744934082, "learning_rate": 4.44064901793339e-06, "loss": 0.7184, "step": 260 }, { "epoch": 0.01, "grad_norm": 3.6499695777893066, "learning_rate": 4.457728437233134e-06, "loss": 0.9079, "step": 261 }, { "epoch": 0.01, "grad_norm": 3.879276752471924, "learning_rate": 4.474807856532878e-06, "loss": 0.7583, "step": 262 }, { "epoch": 0.01, "grad_norm": 3.6802597045898438, "learning_rate": 4.491887275832622e-06, "loss": 0.9572, "step": 263 }, { "epoch": 0.01, "grad_norm": 3.6372389793395996, "learning_rate": 4.508966695132365e-06, "loss": 0.9031, "step": 264 }, { "epoch": 0.01, "grad_norm": 3.790015697479248, "learning_rate": 4.52604611443211e-06, "loss": 1.0186, "step": 265 }, { "epoch": 0.01, "grad_norm": 4.5225605964660645, "learning_rate": 4.543125533731853e-06, "loss": 0.9051, "step": 266 }, { "epoch": 0.01, "grad_norm": 4.09251070022583, "learning_rate": 4.560204953031597e-06, "loss": 0.8208, "step": 267 }, { "epoch": 0.01, "grad_norm": 5.228858470916748, "learning_rate": 4.577284372331341e-06, "loss": 0.9266, "step": 268 }, { "epoch": 0.01, "grad_norm": 3.9953067302703857, "learning_rate": 4.594363791631085e-06, "loss": 0.7786, "step": 269 }, { "epoch": 0.01, "grad_norm": 3.4749977588653564, "learning_rate": 4.6114432109308285e-06, "loss": 0.8828, "step": 270 }, { "epoch": 0.01, "grad_norm": 3.216704845428467, "learning_rate": 4.6285226302305725e-06, "loss": 0.76, "step": 271 }, { "epoch": 0.01, "grad_norm": 4.965713977813721, "learning_rate": 4.6456020495303165e-06, "loss": 0.9492, "step": 272 }, { "epoch": 0.01, "grad_norm": 3.851771593093872, "learning_rate": 4.6626814688300605e-06, "loss": 0.7567, "step": 273 }, { "epoch": 0.01, "grad_norm": 5.300203323364258, "learning_rate": 4.679760888129804e-06, "loss": 0.8234, "step": 274 }, { "epoch": 0.01, "grad_norm": 3.51833176612854, "learning_rate": 4.696840307429548e-06, "loss": 0.9004, "step": 275 }, { "epoch": 0.01, "grad_norm": 3.310274600982666, "learning_rate": 4.713919726729292e-06, "loss": 0.7669, "step": 276 }, { "epoch": 0.01, "grad_norm": 3.7267045974731445, "learning_rate": 4.730999146029036e-06, "loss": 0.7147, "step": 277 }, { "epoch": 0.01, "grad_norm": 1.9763094186782837, "learning_rate": 4.748078565328779e-06, "loss": 0.8553, "step": 278 }, { "epoch": 0.01, "grad_norm": 5.028616905212402, "learning_rate": 4.765157984628523e-06, "loss": 0.9216, "step": 279 }, { "epoch": 0.01, "grad_norm": 5.1566481590271, "learning_rate": 4.782237403928267e-06, "loss": 0.9561, "step": 280 }, { "epoch": 0.01, "grad_norm": 4.541168689727783, "learning_rate": 4.799316823228011e-06, "loss": 0.8012, "step": 281 }, { "epoch": 0.01, "grad_norm": 5.064681529998779, "learning_rate": 4.816396242527755e-06, "loss": 0.9808, "step": 282 }, { "epoch": 0.01, "grad_norm": 5.18087911605835, "learning_rate": 4.833475661827498e-06, "loss": 0.958, "step": 283 }, { "epoch": 0.01, "grad_norm": 3.561119794845581, "learning_rate": 4.850555081127242e-06, "loss": 0.8835, "step": 284 }, { "epoch": 0.01, "grad_norm": 3.208221435546875, "learning_rate": 4.867634500426986e-06, "loss": 0.7897, "step": 285 }, { "epoch": 0.01, "grad_norm": 3.9331371784210205, "learning_rate": 4.88471391972673e-06, "loss": 0.9296, "step": 286 }, { "epoch": 0.01, "grad_norm": 6.666583061218262, "learning_rate": 4.901793339026473e-06, "loss": 0.9203, "step": 287 }, { "epoch": 0.01, "grad_norm": 3.7060747146606445, "learning_rate": 4.918872758326217e-06, "loss": 0.8329, "step": 288 }, { "epoch": 0.01, "grad_norm": 2.4035046100616455, "learning_rate": 4.935952177625961e-06, "loss": 0.8183, "step": 289 }, { "epoch": 0.01, "grad_norm": 2.922024965286255, "learning_rate": 4.953031596925705e-06, "loss": 0.7745, "step": 290 }, { "epoch": 0.01, "grad_norm": 5.195305824279785, "learning_rate": 4.970111016225449e-06, "loss": 0.709, "step": 291 }, { "epoch": 0.01, "grad_norm": 6.082737922668457, "learning_rate": 4.987190435525193e-06, "loss": 0.9461, "step": 292 }, { "epoch": 0.01, "grad_norm": 2.7064945697784424, "learning_rate": 5.004269854824937e-06, "loss": 0.7515, "step": 293 }, { "epoch": 0.01, "grad_norm": 3.9433701038360596, "learning_rate": 5.02134927412468e-06, "loss": 0.8427, "step": 294 }, { "epoch": 0.01, "grad_norm": 3.1792378425598145, "learning_rate": 5.038428693424424e-06, "loss": 0.7038, "step": 295 }, { "epoch": 0.01, "grad_norm": 3.0622074604034424, "learning_rate": 5.055508112724168e-06, "loss": 0.7806, "step": 296 }, { "epoch": 0.01, "grad_norm": 4.254012107849121, "learning_rate": 5.072587532023911e-06, "loss": 1.0115, "step": 297 }, { "epoch": 0.01, "grad_norm": 3.4714646339416504, "learning_rate": 5.089666951323655e-06, "loss": 0.955, "step": 298 }, { "epoch": 0.01, "grad_norm": 3.892864227294922, "learning_rate": 5.1067463706233985e-06, "loss": 0.8321, "step": 299 }, { "epoch": 0.01, "grad_norm": 5.241533279418945, "learning_rate": 5.1238257899231425e-06, "loss": 0.9229, "step": 300 }, { "epoch": 0.01, "grad_norm": 3.0465493202209473, "learning_rate": 5.140905209222887e-06, "loss": 0.8378, "step": 301 }, { "epoch": 0.01, "grad_norm": 3.5057196617126465, "learning_rate": 5.157984628522631e-06, "loss": 0.7148, "step": 302 }, { "epoch": 0.01, "grad_norm": 4.006376266479492, "learning_rate": 5.1750640478223745e-06, "loss": 0.8784, "step": 303 }, { "epoch": 0.01, "grad_norm": 3.8740694522857666, "learning_rate": 5.1921434671221185e-06, "loss": 0.7871, "step": 304 }, { "epoch": 0.01, "grad_norm": 4.367591857910156, "learning_rate": 5.209222886421862e-06, "loss": 0.8055, "step": 305 }, { "epoch": 0.01, "grad_norm": 5.008371353149414, "learning_rate": 5.226302305721606e-06, "loss": 0.9293, "step": 306 }, { "epoch": 0.01, "grad_norm": 5.643738746643066, "learning_rate": 5.24338172502135e-06, "loss": 0.8027, "step": 307 }, { "epoch": 0.01, "grad_norm": 2.5084197521209717, "learning_rate": 5.260461144321094e-06, "loss": 0.9196, "step": 308 }, { "epoch": 0.01, "grad_norm": 3.3635671138763428, "learning_rate": 5.277540563620838e-06, "loss": 0.946, "step": 309 }, { "epoch": 0.01, "grad_norm": 3.8511240482330322, "learning_rate": 5.294619982920582e-06, "loss": 1.0195, "step": 310 }, { "epoch": 0.01, "grad_norm": 3.3021857738494873, "learning_rate": 5.311699402220325e-06, "loss": 0.8799, "step": 311 }, { "epoch": 0.01, "grad_norm": 3.0527005195617676, "learning_rate": 5.328778821520069e-06, "loss": 0.8177, "step": 312 }, { "epoch": 0.01, "grad_norm": 3.5212934017181396, "learning_rate": 5.345858240819812e-06, "loss": 0.7966, "step": 313 }, { "epoch": 0.01, "grad_norm": 4.070886135101318, "learning_rate": 5.362937660119556e-06, "loss": 0.8324, "step": 314 }, { "epoch": 0.01, "grad_norm": 4.882476329803467, "learning_rate": 5.380017079419301e-06, "loss": 0.7815, "step": 315 }, { "epoch": 0.01, "grad_norm": 3.8647208213806152, "learning_rate": 5.397096498719045e-06, "loss": 0.7954, "step": 316 }, { "epoch": 0.01, "grad_norm": 3.2483394145965576, "learning_rate": 5.414175918018788e-06, "loss": 1.0037, "step": 317 }, { "epoch": 0.01, "grad_norm": 3.5775949954986572, "learning_rate": 5.431255337318532e-06, "loss": 0.8361, "step": 318 }, { "epoch": 0.01, "grad_norm": 2.840724229812622, "learning_rate": 5.448334756618275e-06, "loss": 0.9549, "step": 319 }, { "epoch": 0.01, "grad_norm": 2.4174742698669434, "learning_rate": 5.465414175918019e-06, "loss": 0.667, "step": 320 }, { "epoch": 0.01, "grad_norm": 3.251253604888916, "learning_rate": 5.482493595217763e-06, "loss": 0.8567, "step": 321 }, { "epoch": 0.01, "grad_norm": 2.9634480476379395, "learning_rate": 5.499573014517506e-06, "loss": 0.7712, "step": 322 }, { "epoch": 0.01, "grad_norm": 4.725203037261963, "learning_rate": 5.516652433817251e-06, "loss": 0.8922, "step": 323 }, { "epoch": 0.01, "grad_norm": 4.06951904296875, "learning_rate": 5.533731853116995e-06, "loss": 0.8245, "step": 324 }, { "epoch": 0.01, "grad_norm": 7.744103908538818, "learning_rate": 5.550811272416738e-06, "loss": 0.8055, "step": 325 }, { "epoch": 0.01, "grad_norm": 3.1317005157470703, "learning_rate": 5.567890691716482e-06, "loss": 1.1073, "step": 326 }, { "epoch": 0.01, "grad_norm": 3.4536020755767822, "learning_rate": 5.584970111016226e-06, "loss": 0.8625, "step": 327 }, { "epoch": 0.01, "grad_norm": 3.7910971641540527, "learning_rate": 5.602049530315969e-06, "loss": 0.9029, "step": 328 }, { "epoch": 0.01, "grad_norm": 2.836439609527588, "learning_rate": 5.619128949615713e-06, "loss": 0.7353, "step": 329 }, { "epoch": 0.01, "grad_norm": 2.9106104373931885, "learning_rate": 5.636208368915458e-06, "loss": 0.8735, "step": 330 }, { "epoch": 0.01, "grad_norm": 3.069978713989258, "learning_rate": 5.653287788215201e-06, "loss": 0.7891, "step": 331 }, { "epoch": 0.01, "grad_norm": 3.5853796005249023, "learning_rate": 5.670367207514945e-06, "loss": 0.8848, "step": 332 }, { "epoch": 0.01, "grad_norm": 9.94426441192627, "learning_rate": 5.6874466268146885e-06, "loss": 1.096, "step": 333 }, { "epoch": 0.01, "grad_norm": 3.028080463409424, "learning_rate": 5.7045260461144325e-06, "loss": 0.8576, "step": 334 }, { "epoch": 0.01, "grad_norm": 3.5819737911224365, "learning_rate": 5.7216054654141765e-06, "loss": 0.8148, "step": 335 }, { "epoch": 0.01, "grad_norm": 3.770321846008301, "learning_rate": 5.73868488471392e-06, "loss": 0.9725, "step": 336 }, { "epoch": 0.01, "grad_norm": 3.084895610809326, "learning_rate": 5.755764304013664e-06, "loss": 0.9345, "step": 337 }, { "epoch": 0.01, "grad_norm": 3.433753490447998, "learning_rate": 5.7728437233134085e-06, "loss": 0.9575, "step": 338 }, { "epoch": 0.01, "grad_norm": 2.8959131240844727, "learning_rate": 5.789923142613152e-06, "loss": 0.8461, "step": 339 }, { "epoch": 0.01, "grad_norm": 3.3833460807800293, "learning_rate": 5.807002561912896e-06, "loss": 0.7747, "step": 340 }, { "epoch": 0.01, "grad_norm": 4.434955596923828, "learning_rate": 5.82408198121264e-06, "loss": 0.7138, "step": 341 }, { "epoch": 0.01, "grad_norm": 4.6009697914123535, "learning_rate": 5.841161400512383e-06, "loss": 0.8379, "step": 342 }, { "epoch": 0.01, "grad_norm": 4.936135768890381, "learning_rate": 5.858240819812127e-06, "loss": 0.7452, "step": 343 }, { "epoch": 0.01, "grad_norm": 2.924855947494507, "learning_rate": 5.87532023911187e-06, "loss": 0.8073, "step": 344 }, { "epoch": 0.01, "grad_norm": 3.8618996143341064, "learning_rate": 5.892399658411615e-06, "loss": 0.7885, "step": 345 }, { "epoch": 0.01, "grad_norm": 2.736598253250122, "learning_rate": 5.909479077711359e-06, "loss": 0.9845, "step": 346 }, { "epoch": 0.01, "grad_norm": 2.1763298511505127, "learning_rate": 5.926558497011102e-06, "loss": 0.8586, "step": 347 }, { "epoch": 0.01, "grad_norm": 2.202244281768799, "learning_rate": 5.943637916310846e-06, "loss": 0.9085, "step": 348 }, { "epoch": 0.01, "grad_norm": 3.1199936866760254, "learning_rate": 5.96071733561059e-06, "loss": 0.9148, "step": 349 }, { "epoch": 0.01, "grad_norm": 4.551786422729492, "learning_rate": 5.977796754910333e-06, "loss": 0.9684, "step": 350 }, { "epoch": 0.01, "grad_norm": 4.23246431350708, "learning_rate": 5.994876174210077e-06, "loss": 1.1296, "step": 351 }, { "epoch": 0.01, "grad_norm": 3.2631912231445312, "learning_rate": 6.01195559350982e-06, "loss": 0.9712, "step": 352 }, { "epoch": 0.01, "grad_norm": 4.043980598449707, "learning_rate": 6.029035012809565e-06, "loss": 0.7691, "step": 353 }, { "epoch": 0.01, "grad_norm": 4.384788990020752, "learning_rate": 6.046114432109309e-06, "loss": 0.9122, "step": 354 }, { "epoch": 0.01, "grad_norm": 4.859692096710205, "learning_rate": 6.063193851409053e-06, "loss": 0.862, "step": 355 }, { "epoch": 0.01, "grad_norm": 3.4996085166931152, "learning_rate": 6.080273270708796e-06, "loss": 0.9158, "step": 356 }, { "epoch": 0.01, "grad_norm": 3.592615842819214, "learning_rate": 6.09735269000854e-06, "loss": 0.9982, "step": 357 }, { "epoch": 0.01, "grad_norm": 3.5554914474487305, "learning_rate": 6.114432109308283e-06, "loss": 0.9314, "step": 358 }, { "epoch": 0.01, "grad_norm": 2.185805082321167, "learning_rate": 6.131511528608027e-06, "loss": 0.8569, "step": 359 }, { "epoch": 0.01, "grad_norm": 3.7463490962982178, "learning_rate": 6.148590947907772e-06, "loss": 0.8572, "step": 360 }, { "epoch": 0.01, "grad_norm": 2.9205031394958496, "learning_rate": 6.165670367207516e-06, "loss": 0.9867, "step": 361 }, { "epoch": 0.01, "grad_norm": 2.944648027420044, "learning_rate": 6.182749786507259e-06, "loss": 0.7233, "step": 362 }, { "epoch": 0.01, "grad_norm": 2.7396607398986816, "learning_rate": 6.199829205807003e-06, "loss": 0.8842, "step": 363 }, { "epoch": 0.01, "grad_norm": 4.1257171630859375, "learning_rate": 6.2169086251067465e-06, "loss": 0.8007, "step": 364 }, { "epoch": 0.01, "grad_norm": 2.7899296283721924, "learning_rate": 6.2339880444064905e-06, "loss": 0.728, "step": 365 }, { "epoch": 0.01, "grad_norm": 2.5457780361175537, "learning_rate": 6.2510674637062345e-06, "loss": 0.8254, "step": 366 }, { "epoch": 0.01, "grad_norm": 2.5618577003479004, "learning_rate": 6.2681468830059785e-06, "loss": 0.8752, "step": 367 }, { "epoch": 0.01, "grad_norm": 4.503549098968506, "learning_rate": 6.2852263023057225e-06, "loss": 0.7486, "step": 368 }, { "epoch": 0.01, "grad_norm": 2.6565170288085938, "learning_rate": 6.3023057216054665e-06, "loss": 0.8334, "step": 369 }, { "epoch": 0.01, "grad_norm": 2.8318543434143066, "learning_rate": 6.31938514090521e-06, "loss": 1.0001, "step": 370 }, { "epoch": 0.01, "grad_norm": 2.6709537506103516, "learning_rate": 6.336464560204954e-06, "loss": 0.7606, "step": 371 }, { "epoch": 0.01, "grad_norm": 3.324038028717041, "learning_rate": 6.353543979504697e-06, "loss": 0.9395, "step": 372 }, { "epoch": 0.01, "grad_norm": 3.9034106731414795, "learning_rate": 6.370623398804441e-06, "loss": 0.8211, "step": 373 }, { "epoch": 0.01, "grad_norm": 8.499550819396973, "learning_rate": 6.387702818104185e-06, "loss": 0.8759, "step": 374 }, { "epoch": 0.01, "grad_norm": 4.858789920806885, "learning_rate": 6.40478223740393e-06, "loss": 1.0032, "step": 375 }, { "epoch": 0.01, "grad_norm": 6.840332508087158, "learning_rate": 6.421861656703673e-06, "loss": 1.0128, "step": 376 }, { "epoch": 0.01, "grad_norm": 2.694453716278076, "learning_rate": 6.438941076003417e-06, "loss": 0.9563, "step": 377 }, { "epoch": 0.01, "grad_norm": 2.404749631881714, "learning_rate": 6.45602049530316e-06, "loss": 0.9355, "step": 378 }, { "epoch": 0.01, "grad_norm": 3.4457647800445557, "learning_rate": 6.473099914602904e-06, "loss": 0.9237, "step": 379 }, { "epoch": 0.01, "grad_norm": 4.252995014190674, "learning_rate": 6.490179333902648e-06, "loss": 0.7444, "step": 380 }, { "epoch": 0.01, "grad_norm": 2.7945494651794434, "learning_rate": 6.507258753202391e-06, "loss": 0.8359, "step": 381 }, { "epoch": 0.01, "grad_norm": 1.7338663339614868, "learning_rate": 6.524338172502136e-06, "loss": 0.8523, "step": 382 }, { "epoch": 0.01, "grad_norm": 3.376497983932495, "learning_rate": 6.54141759180188e-06, "loss": 0.898, "step": 383 }, { "epoch": 0.01, "grad_norm": 3.2246546745300293, "learning_rate": 6.558497011101623e-06, "loss": 0.9863, "step": 384 }, { "epoch": 0.01, "grad_norm": 2.7955710887908936, "learning_rate": 6.575576430401367e-06, "loss": 0.8312, "step": 385 }, { "epoch": 0.01, "grad_norm": 2.9878900051116943, "learning_rate": 6.592655849701111e-06, "loss": 1.001, "step": 386 }, { "epoch": 0.01, "grad_norm": 2.3803462982177734, "learning_rate": 6.609735269000854e-06, "loss": 0.8224, "step": 387 }, { "epoch": 0.01, "grad_norm": 3.1382524967193604, "learning_rate": 6.626814688300598e-06, "loss": 0.8543, "step": 388 }, { "epoch": 0.01, "grad_norm": 3.640228033065796, "learning_rate": 6.643894107600341e-06, "loss": 0.6651, "step": 389 }, { "epoch": 0.01, "grad_norm": 2.824918270111084, "learning_rate": 6.660973526900086e-06, "loss": 0.9189, "step": 390 }, { "epoch": 0.01, "grad_norm": 3.1348981857299805, "learning_rate": 6.67805294619983e-06, "loss": 0.7848, "step": 391 }, { "epoch": 0.01, "grad_norm": 2.6200296878814697, "learning_rate": 6.695132365499573e-06, "loss": 0.7225, "step": 392 }, { "epoch": 0.01, "grad_norm": 2.3111560344696045, "learning_rate": 6.712211784799317e-06, "loss": 0.8578, "step": 393 }, { "epoch": 0.01, "grad_norm": 4.243932247161865, "learning_rate": 6.729291204099061e-06, "loss": 0.8624, "step": 394 }, { "epoch": 0.01, "grad_norm": 2.7364625930786133, "learning_rate": 6.7463706233988045e-06, "loss": 0.6876, "step": 395 }, { "epoch": 0.01, "grad_norm": 4.3745198249816895, "learning_rate": 6.7634500426985485e-06, "loss": 0.6499, "step": 396 }, { "epoch": 0.01, "grad_norm": 4.292744159698486, "learning_rate": 6.780529461998293e-06, "loss": 0.9685, "step": 397 }, { "epoch": 0.01, "grad_norm": 8.157849311828613, "learning_rate": 6.7976088812980364e-06, "loss": 0.891, "step": 398 }, { "epoch": 0.01, "grad_norm": 3.3074934482574463, "learning_rate": 6.8146883005977804e-06, "loss": 0.7608, "step": 399 }, { "epoch": 0.01, "grad_norm": 3.0447425842285156, "learning_rate": 6.8317677198975244e-06, "loss": 0.7435, "step": 400 }, { "epoch": 0.01, "grad_norm": 2.3231873512268066, "learning_rate": 6.848847139197268e-06, "loss": 0.6213, "step": 401 }, { "epoch": 0.01, "grad_norm": 2.8794901371002197, "learning_rate": 6.865926558497012e-06, "loss": 0.9026, "step": 402 }, { "epoch": 0.01, "grad_norm": 2.793694496154785, "learning_rate": 6.883005977796755e-06, "loss": 0.8353, "step": 403 }, { "epoch": 0.01, "grad_norm": 2.2745304107666016, "learning_rate": 6.9000853970965e-06, "loss": 0.8686, "step": 404 }, { "epoch": 0.01, "grad_norm": 6.424152851104736, "learning_rate": 6.917164816396244e-06, "loss": 1.0367, "step": 405 }, { "epoch": 0.01, "grad_norm": 3.9025590419769287, "learning_rate": 6.934244235695987e-06, "loss": 0.8442, "step": 406 }, { "epoch": 0.01, "grad_norm": 3.305973768234253, "learning_rate": 6.951323654995731e-06, "loss": 0.7007, "step": 407 }, { "epoch": 0.01, "grad_norm": 7.831313610076904, "learning_rate": 6.968403074295475e-06, "loss": 1.1508, "step": 408 }, { "epoch": 0.01, "grad_norm": 4.397701263427734, "learning_rate": 6.985482493595218e-06, "loss": 0.8978, "step": 409 }, { "epoch": 0.01, "grad_norm": 2.6655125617980957, "learning_rate": 7.002561912894962e-06, "loss": 0.8113, "step": 410 }, { "epoch": 0.01, "grad_norm": 3.9066333770751953, "learning_rate": 7.019641332194705e-06, "loss": 0.8165, "step": 411 }, { "epoch": 0.01, "grad_norm": 3.263031482696533, "learning_rate": 7.03672075149445e-06, "loss": 0.8832, "step": 412 }, { "epoch": 0.01, "grad_norm": 2.8858160972595215, "learning_rate": 7.053800170794194e-06, "loss": 0.7278, "step": 413 }, { "epoch": 0.01, "grad_norm": 4.372429847717285, "learning_rate": 7.070879590093938e-06, "loss": 0.763, "step": 414 }, { "epoch": 0.01, "grad_norm": 3.16634202003479, "learning_rate": 7.087959009393681e-06, "loss": 0.8111, "step": 415 }, { "epoch": 0.01, "grad_norm": 3.152989149093628, "learning_rate": 7.105038428693425e-06, "loss": 0.7213, "step": 416 }, { "epoch": 0.01, "grad_norm": 3.015136957168579, "learning_rate": 7.122117847993168e-06, "loss": 0.8716, "step": 417 }, { "epoch": 0.01, "grad_norm": 4.670164585113525, "learning_rate": 7.139197267292912e-06, "loss": 0.9381, "step": 418 }, { "epoch": 0.01, "grad_norm": 3.1923882961273193, "learning_rate": 7.156276686592657e-06, "loss": 0.9448, "step": 419 }, { "epoch": 0.01, "grad_norm": 2.857733726501465, "learning_rate": 7.173356105892401e-06, "loss": 0.7485, "step": 420 }, { "epoch": 0.01, "grad_norm": 6.204648494720459, "learning_rate": 7.190435525192144e-06, "loss": 0.842, "step": 421 }, { "epoch": 0.01, "grad_norm": 2.9626967906951904, "learning_rate": 7.207514944491888e-06, "loss": 0.8316, "step": 422 }, { "epoch": 0.01, "grad_norm": 3.857004165649414, "learning_rate": 7.224594363791631e-06, "loss": 0.8392, "step": 423 }, { "epoch": 0.01, "grad_norm": 3.1215274333953857, "learning_rate": 7.241673783091375e-06, "loss": 0.8983, "step": 424 }, { "epoch": 0.01, "grad_norm": 2.8426907062530518, "learning_rate": 7.258753202391119e-06, "loss": 0.9266, "step": 425 }, { "epoch": 0.01, "grad_norm": 3.5433151721954346, "learning_rate": 7.2758326216908624e-06, "loss": 0.9347, "step": 426 }, { "epoch": 0.01, "grad_norm": 4.011114597320557, "learning_rate": 7.292912040990607e-06, "loss": 0.9396, "step": 427 }, { "epoch": 0.01, "grad_norm": 3.452965259552002, "learning_rate": 7.309991460290351e-06, "loss": 0.842, "step": 428 }, { "epoch": 0.01, "grad_norm": 2.6588833332061768, "learning_rate": 7.3270708795900944e-06, "loss": 0.846, "step": 429 }, { "epoch": 0.01, "grad_norm": 3.0256547927856445, "learning_rate": 7.3441502988898384e-06, "loss": 0.773, "step": 430 }, { "epoch": 0.01, "grad_norm": 3.030120372772217, "learning_rate": 7.361229718189582e-06, "loss": 0.6698, "step": 431 }, { "epoch": 0.01, "grad_norm": 4.014083385467529, "learning_rate": 7.378309137489326e-06, "loss": 0.7764, "step": 432 }, { "epoch": 0.01, "grad_norm": 2.523857831954956, "learning_rate": 7.39538855678907e-06, "loss": 0.8721, "step": 433 }, { "epoch": 0.01, "grad_norm": 5.672163009643555, "learning_rate": 7.4124679760888144e-06, "loss": 0.852, "step": 434 }, { "epoch": 0.01, "grad_norm": 5.7022600173950195, "learning_rate": 7.429547395388558e-06, "loss": 0.6913, "step": 435 }, { "epoch": 0.01, "grad_norm": 6.497093200683594, "learning_rate": 7.4466268146883016e-06, "loss": 0.9022, "step": 436 }, { "epoch": 0.01, "grad_norm": 4.426800727844238, "learning_rate": 7.463706233988045e-06, "loss": 0.8454, "step": 437 }, { "epoch": 0.01, "grad_norm": 3.398794174194336, "learning_rate": 7.480785653287789e-06, "loss": 0.8075, "step": 438 }, { "epoch": 0.01, "grad_norm": 3.159959077835083, "learning_rate": 7.497865072587533e-06, "loss": 0.9586, "step": 439 }, { "epoch": 0.01, "grad_norm": 2.8916890621185303, "learning_rate": 7.514944491887276e-06, "loss": 0.8638, "step": 440 }, { "epoch": 0.01, "grad_norm": 3.8080530166625977, "learning_rate": 7.532023911187021e-06, "loss": 0.6471, "step": 441 }, { "epoch": 0.01, "grad_norm": 3.9324965476989746, "learning_rate": 7.549103330486765e-06, "loss": 0.8792, "step": 442 }, { "epoch": 0.01, "grad_norm": 4.449000835418701, "learning_rate": 7.566182749786508e-06, "loss": 0.8601, "step": 443 }, { "epoch": 0.01, "grad_norm": 3.3898069858551025, "learning_rate": 7.583262169086252e-06, "loss": 0.893, "step": 444 }, { "epoch": 0.01, "grad_norm": 3.5522074699401855, "learning_rate": 7.600341588385996e-06, "loss": 0.7064, "step": 445 }, { "epoch": 0.01, "grad_norm": 5.100362300872803, "learning_rate": 7.617421007685739e-06, "loss": 0.8104, "step": 446 }, { "epoch": 0.01, "grad_norm": 4.5904011726379395, "learning_rate": 7.634500426985483e-06, "loss": 0.8396, "step": 447 }, { "epoch": 0.01, "grad_norm": 3.9026613235473633, "learning_rate": 7.651579846285226e-06, "loss": 0.9373, "step": 448 }, { "epoch": 0.01, "grad_norm": 3.6731929779052734, "learning_rate": 7.668659265584971e-06, "loss": 0.8708, "step": 449 }, { "epoch": 0.01, "grad_norm": 4.522058010101318, "learning_rate": 7.685738684884714e-06, "loss": 0.9636, "step": 450 }, { "epoch": 0.01, "grad_norm": 3.123067617416382, "learning_rate": 7.702818104184459e-06, "loss": 0.9237, "step": 451 }, { "epoch": 0.01, "grad_norm": 4.5688300132751465, "learning_rate": 7.719897523484202e-06, "loss": 1.0432, "step": 452 }, { "epoch": 0.01, "grad_norm": 2.761467456817627, "learning_rate": 7.736976942783945e-06, "loss": 0.8115, "step": 453 }, { "epoch": 0.01, "grad_norm": 3.520416736602783, "learning_rate": 7.75405636208369e-06, "loss": 0.7382, "step": 454 }, { "epoch": 0.01, "grad_norm": 3.9938740730285645, "learning_rate": 7.771135781383433e-06, "loss": 0.8653, "step": 455 }, { "epoch": 0.01, "grad_norm": 3.1446731090545654, "learning_rate": 7.788215200683178e-06, "loss": 0.8016, "step": 456 }, { "epoch": 0.01, "grad_norm": 5.512768745422363, "learning_rate": 7.805294619982921e-06, "loss": 0.9653, "step": 457 }, { "epoch": 0.01, "grad_norm": 4.928500652313232, "learning_rate": 7.822374039282664e-06, "loss": 0.9358, "step": 458 }, { "epoch": 0.01, "grad_norm": 3.3970701694488525, "learning_rate": 7.83945345858241e-06, "loss": 0.705, "step": 459 }, { "epoch": 0.01, "grad_norm": 2.3682830333709717, "learning_rate": 7.856532877882152e-06, "loss": 0.7772, "step": 460 }, { "epoch": 0.01, "grad_norm": 2.1239140033721924, "learning_rate": 7.873612297181896e-06, "loss": 0.8898, "step": 461 }, { "epoch": 0.01, "grad_norm": 3.9700887203216553, "learning_rate": 7.89069171648164e-06, "loss": 0.8515, "step": 462 }, { "epoch": 0.01, "grad_norm": 2.58101749420166, "learning_rate": 7.907771135781384e-06, "loss": 0.8, "step": 463 }, { "epoch": 0.01, "grad_norm": 3.2250404357910156, "learning_rate": 7.924850555081128e-06, "loss": 0.7865, "step": 464 }, { "epoch": 0.01, "grad_norm": 3.1365268230438232, "learning_rate": 7.941929974380872e-06, "loss": 0.8911, "step": 465 }, { "epoch": 0.01, "grad_norm": 7.678693771362305, "learning_rate": 7.959009393680616e-06, "loss": 0.979, "step": 466 }, { "epoch": 0.01, "grad_norm": 3.5642881393432617, "learning_rate": 7.97608881298036e-06, "loss": 0.8783, "step": 467 }, { "epoch": 0.01, "grad_norm": 2.9137930870056152, "learning_rate": 7.993168232280103e-06, "loss": 0.7306, "step": 468 }, { "epoch": 0.01, "grad_norm": 5.497262001037598, "learning_rate": 8.010247651579846e-06, "loss": 0.7337, "step": 469 }, { "epoch": 0.01, "grad_norm": 2.5107860565185547, "learning_rate": 8.02732707087959e-06, "loss": 0.7146, "step": 470 }, { "epoch": 0.01, "grad_norm": 2.8850085735321045, "learning_rate": 8.044406490179336e-06, "loss": 0.9115, "step": 471 }, { "epoch": 0.01, "grad_norm": 2.590420722961426, "learning_rate": 8.061485909479079e-06, "loss": 0.9396, "step": 472 }, { "epoch": 0.01, "grad_norm": 6.36300802230835, "learning_rate": 8.078565328778822e-06, "loss": 1.0953, "step": 473 }, { "epoch": 0.01, "grad_norm": 3.631923198699951, "learning_rate": 8.095644748078567e-06, "loss": 0.7865, "step": 474 }, { "epoch": 0.01, "grad_norm": 2.9009788036346436, "learning_rate": 8.11272416737831e-06, "loss": 0.8773, "step": 475 }, { "epoch": 0.01, "grad_norm": 2.215235948562622, "learning_rate": 8.129803586678053e-06, "loss": 0.8665, "step": 476 }, { "epoch": 0.01, "grad_norm": 3.405111789703369, "learning_rate": 8.146883005977798e-06, "loss": 0.8405, "step": 477 }, { "epoch": 0.01, "grad_norm": 3.496751308441162, "learning_rate": 8.163962425277541e-06, "loss": 0.8193, "step": 478 }, { "epoch": 0.01, "grad_norm": 5.478484630584717, "learning_rate": 8.181041844577286e-06, "loss": 0.8046, "step": 479 }, { "epoch": 0.01, "grad_norm": 2.6335177421569824, "learning_rate": 8.198121263877029e-06, "loss": 0.7262, "step": 480 }, { "epoch": 0.01, "grad_norm": 3.363515615463257, "learning_rate": 8.215200683176772e-06, "loss": 0.8021, "step": 481 }, { "epoch": 0.01, "grad_norm": 3.001474618911743, "learning_rate": 8.232280102476517e-06, "loss": 0.6559, "step": 482 }, { "epoch": 0.01, "grad_norm": 4.025134086608887, "learning_rate": 8.24935952177626e-06, "loss": 0.8401, "step": 483 }, { "epoch": 0.01, "grad_norm": 2.9495444297790527, "learning_rate": 8.266438941076003e-06, "loss": 0.781, "step": 484 }, { "epoch": 0.01, "grad_norm": 3.899292230606079, "learning_rate": 8.283518360375748e-06, "loss": 0.7232, "step": 485 }, { "epoch": 0.01, "grad_norm": 3.3395822048187256, "learning_rate": 8.300597779675493e-06, "loss": 0.8246, "step": 486 }, { "epoch": 0.01, "grad_norm": 6.109034538269043, "learning_rate": 8.317677198975236e-06, "loss": 0.861, "step": 487 }, { "epoch": 0.01, "grad_norm": 3.582104444503784, "learning_rate": 8.33475661827498e-06, "loss": 0.9697, "step": 488 }, { "epoch": 0.01, "grad_norm": 3.7347357273101807, "learning_rate": 8.351836037574722e-06, "loss": 0.7417, "step": 489 }, { "epoch": 0.01, "grad_norm": 4.58115816116333, "learning_rate": 8.368915456874467e-06, "loss": 0.8109, "step": 490 }, { "epoch": 0.01, "grad_norm": 2.46883487701416, "learning_rate": 8.38599487617421e-06, "loss": 0.7059, "step": 491 }, { "epoch": 0.01, "grad_norm": 2.952794313430786, "learning_rate": 8.403074295473954e-06, "loss": 0.6831, "step": 492 }, { "epoch": 0.01, "grad_norm": 3.100076913833618, "learning_rate": 8.420153714773698e-06, "loss": 0.9848, "step": 493 }, { "epoch": 0.01, "grad_norm": 2.5787482261657715, "learning_rate": 8.437233134073443e-06, "loss": 0.8419, "step": 494 }, { "epoch": 0.01, "grad_norm": 3.793858289718628, "learning_rate": 8.454312553373186e-06, "loss": 0.7688, "step": 495 }, { "epoch": 0.01, "grad_norm": 2.594566822052002, "learning_rate": 8.47139197267293e-06, "loss": 0.752, "step": 496 }, { "epoch": 0.01, "grad_norm": 3.2434682846069336, "learning_rate": 8.488471391972673e-06, "loss": 0.8802, "step": 497 }, { "epoch": 0.01, "grad_norm": 4.931363105773926, "learning_rate": 8.505550811272418e-06, "loss": 0.8609, "step": 498 }, { "epoch": 0.01, "grad_norm": 3.540067672729492, "learning_rate": 8.52263023057216e-06, "loss": 0.8245, "step": 499 }, { "epoch": 0.01, "grad_norm": 4.9244232177734375, "learning_rate": 8.539709649871904e-06, "loss": 0.9164, "step": 500 }, { "epoch": 0.01, "grad_norm": 3.0294454097747803, "learning_rate": 8.556789069171649e-06, "loss": 0.8386, "step": 501 }, { "epoch": 0.01, "grad_norm": 5.520294189453125, "learning_rate": 8.573868488471394e-06, "loss": 0.9567, "step": 502 }, { "epoch": 0.01, "grad_norm": 1.911596417427063, "learning_rate": 8.590947907771137e-06, "loss": 0.7642, "step": 503 }, { "epoch": 0.01, "grad_norm": 2.8724966049194336, "learning_rate": 8.60802732707088e-06, "loss": 0.7671, "step": 504 }, { "epoch": 0.01, "grad_norm": 3.83320951461792, "learning_rate": 8.625106746370625e-06, "loss": 0.8269, "step": 505 }, { "epoch": 0.01, "grad_norm": 2.492074966430664, "learning_rate": 8.642186165670368e-06, "loss": 0.7577, "step": 506 }, { "epoch": 0.01, "grad_norm": 2.5299978256225586, "learning_rate": 8.659265584970111e-06, "loss": 0.6531, "step": 507 }, { "epoch": 0.01, "grad_norm": 2.936781167984009, "learning_rate": 8.676345004269856e-06, "loss": 0.8028, "step": 508 }, { "epoch": 0.01, "grad_norm": 2.7059359550476074, "learning_rate": 8.693424423569599e-06, "loss": 0.7458, "step": 509 }, { "epoch": 0.01, "grad_norm": 3.337296485900879, "learning_rate": 8.710503842869344e-06, "loss": 0.948, "step": 510 }, { "epoch": 0.01, "grad_norm": 4.3177971839904785, "learning_rate": 8.727583262169087e-06, "loss": 0.8251, "step": 511 }, { "epoch": 0.01, "grad_norm": 8.680696487426758, "learning_rate": 8.74466268146883e-06, "loss": 1.0465, "step": 512 }, { "epoch": 0.01, "grad_norm": 3.9015936851501465, "learning_rate": 8.761742100768575e-06, "loss": 0.7508, "step": 513 }, { "epoch": 0.01, "grad_norm": 4.080055236816406, "learning_rate": 8.778821520068318e-06, "loss": 0.9292, "step": 514 }, { "epoch": 0.01, "grad_norm": 3.542787551879883, "learning_rate": 8.795900939368061e-06, "loss": 0.9645, "step": 515 }, { "epoch": 0.01, "grad_norm": 4.969405651092529, "learning_rate": 8.812980358667806e-06, "loss": 0.8248, "step": 516 }, { "epoch": 0.01, "grad_norm": 2.8489980697631836, "learning_rate": 8.83005977796755e-06, "loss": 0.7683, "step": 517 }, { "epoch": 0.01, "grad_norm": 7.069876194000244, "learning_rate": 8.847139197267294e-06, "loss": 0.8594, "step": 518 }, { "epoch": 0.01, "grad_norm": 3.3883588314056396, "learning_rate": 8.864218616567037e-06, "loss": 0.9131, "step": 519 }, { "epoch": 0.01, "grad_norm": 3.7203903198242188, "learning_rate": 8.88129803586678e-06, "loss": 0.9069, "step": 520 }, { "epoch": 0.01, "grad_norm": 3.0363054275512695, "learning_rate": 8.898377455166525e-06, "loss": 0.8191, "step": 521 }, { "epoch": 0.01, "grad_norm": 3.6267662048339844, "learning_rate": 8.915456874466268e-06, "loss": 0.8427, "step": 522 }, { "epoch": 0.01, "grad_norm": 2.446706533432007, "learning_rate": 8.932536293766013e-06, "loss": 0.65, "step": 523 }, { "epoch": 0.01, "grad_norm": 2.283273220062256, "learning_rate": 8.949615713065756e-06, "loss": 0.7296, "step": 524 }, { "epoch": 0.01, "grad_norm": 2.2116146087646484, "learning_rate": 8.966695132365501e-06, "loss": 0.7878, "step": 525 }, { "epoch": 0.01, "grad_norm": 3.084474563598633, "learning_rate": 8.983774551665244e-06, "loss": 0.7847, "step": 526 }, { "epoch": 0.01, "grad_norm": 2.316312074661255, "learning_rate": 9.000853970964988e-06, "loss": 0.8377, "step": 527 }, { "epoch": 0.01, "grad_norm": 2.1219534873962402, "learning_rate": 9.01793339026473e-06, "loss": 0.6776, "step": 528 }, { "epoch": 0.01, "grad_norm": 4.795619487762451, "learning_rate": 9.035012809564476e-06, "loss": 0.8615, "step": 529 }, { "epoch": 0.01, "grad_norm": 4.676130294799805, "learning_rate": 9.05209222886422e-06, "loss": 0.8368, "step": 530 }, { "epoch": 0.01, "grad_norm": 2.6622984409332275, "learning_rate": 9.069171648163964e-06, "loss": 0.7518, "step": 531 }, { "epoch": 0.01, "grad_norm": 3.3522233963012695, "learning_rate": 9.086251067463707e-06, "loss": 0.6789, "step": 532 }, { "epoch": 0.01, "grad_norm": 3.3454928398132324, "learning_rate": 9.103330486763452e-06, "loss": 0.962, "step": 533 }, { "epoch": 0.01, "grad_norm": 2.4748120307922363, "learning_rate": 9.120409906063195e-06, "loss": 0.8593, "step": 534 }, { "epoch": 0.01, "grad_norm": 4.945930004119873, "learning_rate": 9.137489325362938e-06, "loss": 1.0454, "step": 535 }, { "epoch": 0.01, "grad_norm": 3.1708221435546875, "learning_rate": 9.154568744662683e-06, "loss": 0.6818, "step": 536 }, { "epoch": 0.01, "grad_norm": 2.594486951828003, "learning_rate": 9.171648163962426e-06, "loss": 0.823, "step": 537 }, { "epoch": 0.01, "grad_norm": 4.358098983764648, "learning_rate": 9.18872758326217e-06, "loss": 0.8859, "step": 538 }, { "epoch": 0.01, "grad_norm": 3.0870959758758545, "learning_rate": 9.205807002561914e-06, "loss": 0.7, "step": 539 }, { "epoch": 0.01, "grad_norm": 3.562258243560791, "learning_rate": 9.222886421861657e-06, "loss": 0.9407, "step": 540 }, { "epoch": 0.01, "grad_norm": 2.185425043106079, "learning_rate": 9.239965841161402e-06, "loss": 0.9076, "step": 541 }, { "epoch": 0.01, "grad_norm": 2.7180252075195312, "learning_rate": 9.257045260461145e-06, "loss": 0.8001, "step": 542 }, { "epoch": 0.01, "grad_norm": 4.859086036682129, "learning_rate": 9.274124679760888e-06, "loss": 0.7265, "step": 543 }, { "epoch": 0.01, "grad_norm": 3.320221185684204, "learning_rate": 9.291204099060633e-06, "loss": 0.7314, "step": 544 }, { "epoch": 0.01, "grad_norm": 3.917106866836548, "learning_rate": 9.308283518360378e-06, "loss": 0.8389, "step": 545 }, { "epoch": 0.01, "grad_norm": 3.9716084003448486, "learning_rate": 9.325362937660121e-06, "loss": 0.8435, "step": 546 }, { "epoch": 0.01, "grad_norm": 6.612184047698975, "learning_rate": 9.342442356959864e-06, "loss": 0.9243, "step": 547 }, { "epoch": 0.01, "grad_norm": 3.5479280948638916, "learning_rate": 9.359521776259607e-06, "loss": 0.8687, "step": 548 }, { "epoch": 0.01, "grad_norm": 2.4012608528137207, "learning_rate": 9.376601195559352e-06, "loss": 0.6491, "step": 549 }, { "epoch": 0.01, "grad_norm": 2.534519910812378, "learning_rate": 9.393680614859095e-06, "loss": 0.7764, "step": 550 }, { "epoch": 0.01, "grad_norm": 2.3477656841278076, "learning_rate": 9.410760034158838e-06, "loss": 0.6364, "step": 551 }, { "epoch": 0.01, "grad_norm": 2.2753729820251465, "learning_rate": 9.427839453458583e-06, "loss": 0.7853, "step": 552 }, { "epoch": 0.01, "grad_norm": 2.727893829345703, "learning_rate": 9.444918872758328e-06, "loss": 0.8153, "step": 553 }, { "epoch": 0.01, "grad_norm": 2.195695638656616, "learning_rate": 9.461998292058071e-06, "loss": 0.7228, "step": 554 }, { "epoch": 0.01, "grad_norm": 3.016671657562256, "learning_rate": 9.479077711357814e-06, "loss": 0.6886, "step": 555 }, { "epoch": 0.01, "grad_norm": 4.1894659996032715, "learning_rate": 9.496157130657558e-06, "loss": 0.8199, "step": 556 }, { "epoch": 0.01, "grad_norm": 2.926382064819336, "learning_rate": 9.513236549957302e-06, "loss": 0.8066, "step": 557 }, { "epoch": 0.01, "grad_norm": 3.1306650638580322, "learning_rate": 9.530315969257046e-06, "loss": 0.7352, "step": 558 }, { "epoch": 0.01, "grad_norm": 3.982576608657837, "learning_rate": 9.547395388556789e-06, "loss": 1.0221, "step": 559 }, { "epoch": 0.01, "grad_norm": 2.3989675045013428, "learning_rate": 9.564474807856534e-06, "loss": 0.7601, "step": 560 }, { "epoch": 0.01, "grad_norm": 3.9547464847564697, "learning_rate": 9.581554227156278e-06, "loss": 0.6137, "step": 561 }, { "epoch": 0.01, "grad_norm": 3.5110526084899902, "learning_rate": 9.598633646456022e-06, "loss": 0.6523, "step": 562 }, { "epoch": 0.01, "grad_norm": 1.9636839628219604, "learning_rate": 9.615713065755765e-06, "loss": 0.7176, "step": 563 }, { "epoch": 0.01, "grad_norm": 3.6176140308380127, "learning_rate": 9.63279248505551e-06, "loss": 0.8262, "step": 564 }, { "epoch": 0.01, "grad_norm": 3.2634949684143066, "learning_rate": 9.649871904355253e-06, "loss": 0.6811, "step": 565 }, { "epoch": 0.01, "grad_norm": 3.2356581687927246, "learning_rate": 9.666951323654996e-06, "loss": 0.9877, "step": 566 }, { "epoch": 0.01, "grad_norm": 3.2829434871673584, "learning_rate": 9.68403074295474e-06, "loss": 0.9109, "step": 567 }, { "epoch": 0.01, "grad_norm": 2.298534631729126, "learning_rate": 9.701110162254484e-06, "loss": 0.8806, "step": 568 }, { "epoch": 0.01, "grad_norm": 3.613476514816284, "learning_rate": 9.718189581554229e-06, "loss": 0.9067, "step": 569 }, { "epoch": 0.01, "grad_norm": 3.547389030456543, "learning_rate": 9.735269000853972e-06, "loss": 0.7344, "step": 570 }, { "epoch": 0.01, "grad_norm": 2.045125722885132, "learning_rate": 9.752348420153715e-06, "loss": 0.7502, "step": 571 }, { "epoch": 0.01, "grad_norm": 3.057828903198242, "learning_rate": 9.76942783945346e-06, "loss": 0.7911, "step": 572 }, { "epoch": 0.01, "grad_norm": 2.477973461151123, "learning_rate": 9.786507258753203e-06, "loss": 0.714, "step": 573 }, { "epoch": 0.01, "grad_norm": 3.502427577972412, "learning_rate": 9.803586678052946e-06, "loss": 0.819, "step": 574 }, { "epoch": 0.01, "grad_norm": 4.445509910583496, "learning_rate": 9.820666097352691e-06, "loss": 0.983, "step": 575 }, { "epoch": 0.01, "grad_norm": 5.226978302001953, "learning_rate": 9.837745516652434e-06, "loss": 0.8355, "step": 576 }, { "epoch": 0.01, "grad_norm": 6.120433330535889, "learning_rate": 9.854824935952179e-06, "loss": 0.8997, "step": 577 }, { "epoch": 0.01, "grad_norm": 4.149734973907471, "learning_rate": 9.871904355251922e-06, "loss": 0.8112, "step": 578 }, { "epoch": 0.01, "grad_norm": 2.0132715702056885, "learning_rate": 9.888983774551665e-06, "loss": 0.6188, "step": 579 }, { "epoch": 0.01, "grad_norm": 2.991532802581787, "learning_rate": 9.90606319385141e-06, "loss": 0.7845, "step": 580 }, { "epoch": 0.01, "grad_norm": 2.689547300338745, "learning_rate": 9.923142613151153e-06, "loss": 0.783, "step": 581 }, { "epoch": 0.01, "grad_norm": 2.3636679649353027, "learning_rate": 9.940222032450898e-06, "loss": 0.7928, "step": 582 }, { "epoch": 0.01, "grad_norm": 3.455918073654175, "learning_rate": 9.957301451750641e-06, "loss": 0.7934, "step": 583 }, { "epoch": 0.01, "grad_norm": 2.879328966140747, "learning_rate": 9.974380871050386e-06, "loss": 0.7993, "step": 584 }, { "epoch": 0.01, "grad_norm": 4.099844455718994, "learning_rate": 9.99146029035013e-06, "loss": 0.8566, "step": 585 }, { "epoch": 0.02, "grad_norm": 3.0923609733581543, "learning_rate": 1.0008539709649874e-05, "loss": 0.8526, "step": 586 }, { "epoch": 0.02, "grad_norm": 4.939364910125732, "learning_rate": 1.0025619128949617e-05, "loss": 0.8204, "step": 587 }, { "epoch": 0.02, "grad_norm": 2.695359706878662, "learning_rate": 1.004269854824936e-05, "loss": 0.7479, "step": 588 }, { "epoch": 0.02, "grad_norm": 2.5201165676116943, "learning_rate": 1.0059777967549105e-05, "loss": 0.7103, "step": 589 }, { "epoch": 0.02, "grad_norm": 4.444264888763428, "learning_rate": 1.0076857386848848e-05, "loss": 0.8405, "step": 590 }, { "epoch": 0.02, "grad_norm": 3.356898546218872, "learning_rate": 1.0093936806148592e-05, "loss": 0.8113, "step": 591 }, { "epoch": 0.02, "grad_norm": 5.085420608520508, "learning_rate": 1.0111016225448336e-05, "loss": 0.7954, "step": 592 }, { "epoch": 0.02, "grad_norm": 2.417889356613159, "learning_rate": 1.012809564474808e-05, "loss": 0.7793, "step": 593 }, { "epoch": 0.02, "grad_norm": 3.3079257011413574, "learning_rate": 1.0145175064047823e-05, "loss": 0.867, "step": 594 }, { "epoch": 0.02, "grad_norm": 3.507485866546631, "learning_rate": 1.0162254483347568e-05, "loss": 0.883, "step": 595 }, { "epoch": 0.02, "grad_norm": 2.542987585067749, "learning_rate": 1.017933390264731e-05, "loss": 0.8717, "step": 596 }, { "epoch": 0.02, "grad_norm": 4.310905933380127, "learning_rate": 1.0196413321947054e-05, "loss": 0.8816, "step": 597 }, { "epoch": 0.02, "grad_norm": 3.8547818660736084, "learning_rate": 1.0213492741246797e-05, "loss": 0.7641, "step": 598 }, { "epoch": 0.02, "grad_norm": 3.3788256645202637, "learning_rate": 1.0230572160546542e-05, "loss": 0.7802, "step": 599 }, { "epoch": 0.02, "grad_norm": 6.727125644683838, "learning_rate": 1.0247651579846285e-05, "loss": 0.7374, "step": 600 }, { "epoch": 0.02, "grad_norm": 2.6413278579711914, "learning_rate": 1.0264730999146032e-05, "loss": 0.8565, "step": 601 }, { "epoch": 0.02, "grad_norm": 3.6861572265625, "learning_rate": 1.0281810418445775e-05, "loss": 0.8049, "step": 602 }, { "epoch": 0.02, "grad_norm": 3.737578868865967, "learning_rate": 1.0298889837745518e-05, "loss": 0.7365, "step": 603 }, { "epoch": 0.02, "grad_norm": 2.304172992706299, "learning_rate": 1.0315969257045263e-05, "loss": 0.7701, "step": 604 }, { "epoch": 0.02, "grad_norm": 2.2491202354431152, "learning_rate": 1.0333048676345006e-05, "loss": 0.7427, "step": 605 }, { "epoch": 0.02, "grad_norm": 3.4794297218322754, "learning_rate": 1.0350128095644749e-05, "loss": 0.8026, "step": 606 }, { "epoch": 0.02, "grad_norm": 2.5849263668060303, "learning_rate": 1.0367207514944492e-05, "loss": 0.7353, "step": 607 }, { "epoch": 0.02, "grad_norm": 3.3980460166931152, "learning_rate": 1.0384286934244237e-05, "loss": 0.7445, "step": 608 }, { "epoch": 0.02, "grad_norm": 2.9424283504486084, "learning_rate": 1.040136635354398e-05, "loss": 0.7785, "step": 609 }, { "epoch": 0.02, "grad_norm": 2.4273953437805176, "learning_rate": 1.0418445772843723e-05, "loss": 0.8976, "step": 610 }, { "epoch": 0.02, "grad_norm": 3.2920422554016113, "learning_rate": 1.0435525192143468e-05, "loss": 0.8415, "step": 611 }, { "epoch": 0.02, "grad_norm": 2.198826551437378, "learning_rate": 1.0452604611443211e-05, "loss": 0.8306, "step": 612 }, { "epoch": 0.02, "grad_norm": 2.3005266189575195, "learning_rate": 1.0469684030742954e-05, "loss": 0.5466, "step": 613 }, { "epoch": 0.02, "grad_norm": 2.763404369354248, "learning_rate": 1.04867634500427e-05, "loss": 0.8851, "step": 614 }, { "epoch": 0.02, "grad_norm": 4.109264373779297, "learning_rate": 1.0503842869342442e-05, "loss": 0.8119, "step": 615 }, { "epoch": 0.02, "grad_norm": 3.793832302093506, "learning_rate": 1.0520922288642187e-05, "loss": 0.7401, "step": 616 }, { "epoch": 0.02, "grad_norm": 3.937117099761963, "learning_rate": 1.0538001707941932e-05, "loss": 0.6678, "step": 617 }, { "epoch": 0.02, "grad_norm": 3.6953539848327637, "learning_rate": 1.0555081127241675e-05, "loss": 0.8194, "step": 618 }, { "epoch": 0.02, "grad_norm": 3.560394287109375, "learning_rate": 1.0572160546541418e-05, "loss": 0.7846, "step": 619 }, { "epoch": 0.02, "grad_norm": 3.7734553813934326, "learning_rate": 1.0589239965841163e-05, "loss": 0.755, "step": 620 }, { "epoch": 0.02, "grad_norm": 2.045543670654297, "learning_rate": 1.0606319385140906e-05, "loss": 0.6053, "step": 621 }, { "epoch": 0.02, "grad_norm": 3.071855306625366, "learning_rate": 1.062339880444065e-05, "loss": 0.8523, "step": 622 }, { "epoch": 0.02, "grad_norm": 2.160470962524414, "learning_rate": 1.0640478223740394e-05, "loss": 0.808, "step": 623 }, { "epoch": 0.02, "grad_norm": 3.836820602416992, "learning_rate": 1.0657557643040138e-05, "loss": 0.7759, "step": 624 }, { "epoch": 0.02, "grad_norm": 1.8425239324569702, "learning_rate": 1.067463706233988e-05, "loss": 0.7318, "step": 625 }, { "epoch": 0.02, "grad_norm": 4.275631427764893, "learning_rate": 1.0691716481639624e-05, "loss": 0.9288, "step": 626 }, { "epoch": 0.02, "grad_norm": 2.9701812267303467, "learning_rate": 1.0708795900939369e-05, "loss": 0.7343, "step": 627 }, { "epoch": 0.02, "grad_norm": 3.5109639167785645, "learning_rate": 1.0725875320239112e-05, "loss": 0.7314, "step": 628 }, { "epoch": 0.02, "grad_norm": 4.260982036590576, "learning_rate": 1.0742954739538855e-05, "loss": 0.7803, "step": 629 }, { "epoch": 0.02, "grad_norm": 3.2291131019592285, "learning_rate": 1.0760034158838602e-05, "loss": 0.7684, "step": 630 }, { "epoch": 0.02, "grad_norm": 8.758312225341797, "learning_rate": 1.0777113578138345e-05, "loss": 1.0043, "step": 631 }, { "epoch": 0.02, "grad_norm": 2.898860216140747, "learning_rate": 1.079419299743809e-05, "loss": 0.7377, "step": 632 }, { "epoch": 0.02, "grad_norm": 2.5983657836914062, "learning_rate": 1.0811272416737833e-05, "loss": 0.8086, "step": 633 }, { "epoch": 0.02, "grad_norm": 2.25891375541687, "learning_rate": 1.0828351836037576e-05, "loss": 0.6381, "step": 634 }, { "epoch": 0.02, "grad_norm": 3.042973518371582, "learning_rate": 1.0845431255337319e-05, "loss": 0.766, "step": 635 }, { "epoch": 0.02, "grad_norm": 3.4589767456054688, "learning_rate": 1.0862510674637064e-05, "loss": 0.781, "step": 636 }, { "epoch": 0.02, "grad_norm": 2.783015251159668, "learning_rate": 1.0879590093936807e-05, "loss": 0.6407, "step": 637 }, { "epoch": 0.02, "grad_norm": 2.8874902725219727, "learning_rate": 1.089666951323655e-05, "loss": 0.8518, "step": 638 }, { "epoch": 0.02, "grad_norm": 2.456463575363159, "learning_rate": 1.0913748932536295e-05, "loss": 0.6068, "step": 639 }, { "epoch": 0.02, "grad_norm": 6.961274147033691, "learning_rate": 1.0930828351836038e-05, "loss": 1.0172, "step": 640 }, { "epoch": 0.02, "grad_norm": 2.916649580001831, "learning_rate": 1.0947907771135781e-05, "loss": 0.7946, "step": 641 }, { "epoch": 0.02, "grad_norm": 1.9355584383010864, "learning_rate": 1.0964987190435526e-05, "loss": 0.7668, "step": 642 }, { "epoch": 0.02, "grad_norm": 3.7172882556915283, "learning_rate": 1.098206660973527e-05, "loss": 0.695, "step": 643 }, { "epoch": 0.02, "grad_norm": 3.5141525268554688, "learning_rate": 1.0999146029035012e-05, "loss": 0.7184, "step": 644 }, { "epoch": 0.02, "grad_norm": 4.877233982086182, "learning_rate": 1.1016225448334759e-05, "loss": 0.7365, "step": 645 }, { "epoch": 0.02, "grad_norm": 2.8183376789093018, "learning_rate": 1.1033304867634502e-05, "loss": 0.8868, "step": 646 }, { "epoch": 0.02, "grad_norm": 3.3897545337677, "learning_rate": 1.1050384286934245e-05, "loss": 0.8073, "step": 647 }, { "epoch": 0.02, "grad_norm": 2.7247824668884277, "learning_rate": 1.106746370623399e-05, "loss": 0.8105, "step": 648 }, { "epoch": 0.02, "grad_norm": 3.3970682621002197, "learning_rate": 1.1084543125533733e-05, "loss": 0.9264, "step": 649 }, { "epoch": 0.02, "grad_norm": 3.3502085208892822, "learning_rate": 1.1101622544833476e-05, "loss": 0.6656, "step": 650 }, { "epoch": 0.02, "grad_norm": 3.056995391845703, "learning_rate": 1.1118701964133221e-05, "loss": 0.7356, "step": 651 }, { "epoch": 0.02, "grad_norm": 1.6540746688842773, "learning_rate": 1.1135781383432964e-05, "loss": 0.8333, "step": 652 }, { "epoch": 0.02, "grad_norm": 3.110551118850708, "learning_rate": 1.1152860802732708e-05, "loss": 0.7475, "step": 653 }, { "epoch": 0.02, "grad_norm": 2.9428813457489014, "learning_rate": 1.1169940222032452e-05, "loss": 0.7822, "step": 654 }, { "epoch": 0.02, "grad_norm": 4.066218376159668, "learning_rate": 1.1187019641332196e-05, "loss": 0.8142, "step": 655 }, { "epoch": 0.02, "grad_norm": 3.898283004760742, "learning_rate": 1.1204099060631939e-05, "loss": 0.9406, "step": 656 }, { "epoch": 0.02, "grad_norm": 4.212637424468994, "learning_rate": 1.1221178479931682e-05, "loss": 0.8934, "step": 657 }, { "epoch": 0.02, "grad_norm": 8.231829643249512, "learning_rate": 1.1238257899231427e-05, "loss": 0.9836, "step": 658 }, { "epoch": 0.02, "grad_norm": 5.578640937805176, "learning_rate": 1.125533731853117e-05, "loss": 0.9245, "step": 659 }, { "epoch": 0.02, "grad_norm": 2.6345458030700684, "learning_rate": 1.1272416737830916e-05, "loss": 0.8753, "step": 660 }, { "epoch": 0.02, "grad_norm": 3.0369646549224854, "learning_rate": 1.128949615713066e-05, "loss": 0.6444, "step": 661 }, { "epoch": 0.02, "grad_norm": 6.080307960510254, "learning_rate": 1.1306575576430403e-05, "loss": 0.8295, "step": 662 }, { "epoch": 0.02, "grad_norm": 3.668437957763672, "learning_rate": 1.1323654995730147e-05, "loss": 0.8815, "step": 663 }, { "epoch": 0.02, "grad_norm": 3.529513120651245, "learning_rate": 1.134073441502989e-05, "loss": 0.8516, "step": 664 }, { "epoch": 0.02, "grad_norm": 3.6341607570648193, "learning_rate": 1.1357813834329634e-05, "loss": 0.7121, "step": 665 }, { "epoch": 0.02, "grad_norm": 1.6046475172042847, "learning_rate": 1.1374893253629377e-05, "loss": 0.7719, "step": 666 }, { "epoch": 0.02, "grad_norm": 4.0426130294799805, "learning_rate": 1.1391972672929122e-05, "loss": 0.8165, "step": 667 }, { "epoch": 0.02, "grad_norm": 4.442148208618164, "learning_rate": 1.1409052092228865e-05, "loss": 0.9359, "step": 668 }, { "epoch": 0.02, "grad_norm": 4.616211414337158, "learning_rate": 1.1426131511528608e-05, "loss": 0.8473, "step": 669 }, { "epoch": 0.02, "grad_norm": 2.693934202194214, "learning_rate": 1.1443210930828353e-05, "loss": 0.7358, "step": 670 }, { "epoch": 0.02, "grad_norm": 2.208000898361206, "learning_rate": 1.1460290350128096e-05, "loss": 0.5857, "step": 671 }, { "epoch": 0.02, "grad_norm": 3.3220183849334717, "learning_rate": 1.147736976942784e-05, "loss": 0.8347, "step": 672 }, { "epoch": 0.02, "grad_norm": 2.8911969661712646, "learning_rate": 1.1494449188727584e-05, "loss": 0.8865, "step": 673 }, { "epoch": 0.02, "grad_norm": 3.2412848472595215, "learning_rate": 1.1511528608027327e-05, "loss": 0.7385, "step": 674 }, { "epoch": 0.02, "grad_norm": 3.0361344814300537, "learning_rate": 1.1528608027327072e-05, "loss": 0.7318, "step": 675 }, { "epoch": 0.02, "grad_norm": 2.946446418762207, "learning_rate": 1.1545687446626817e-05, "loss": 0.6853, "step": 676 }, { "epoch": 0.02, "grad_norm": 3.596503496170044, "learning_rate": 1.156276686592656e-05, "loss": 0.7745, "step": 677 }, { "epoch": 0.02, "grad_norm": 4.013326168060303, "learning_rate": 1.1579846285226303e-05, "loss": 0.7411, "step": 678 }, { "epoch": 0.02, "grad_norm": 4.5120158195495605, "learning_rate": 1.1596925704526048e-05, "loss": 0.6379, "step": 679 }, { "epoch": 0.02, "grad_norm": 2.8464086055755615, "learning_rate": 1.1614005123825791e-05, "loss": 0.7379, "step": 680 }, { "epoch": 0.02, "grad_norm": 2.8108229637145996, "learning_rate": 1.1631084543125534e-05, "loss": 0.8053, "step": 681 }, { "epoch": 0.02, "grad_norm": 2.0504274368286133, "learning_rate": 1.164816396242528e-05, "loss": 0.7177, "step": 682 }, { "epoch": 0.02, "grad_norm": 2.4824209213256836, "learning_rate": 1.1665243381725022e-05, "loss": 0.7757, "step": 683 }, { "epoch": 0.02, "grad_norm": 3.118147134780884, "learning_rate": 1.1682322801024766e-05, "loss": 0.7674, "step": 684 }, { "epoch": 0.02, "grad_norm": 2.1982481479644775, "learning_rate": 1.1699402220324509e-05, "loss": 0.6474, "step": 685 }, { "epoch": 0.02, "grad_norm": 3.4874346256256104, "learning_rate": 1.1716481639624254e-05, "loss": 0.8052, "step": 686 }, { "epoch": 0.02, "grad_norm": 2.003042459487915, "learning_rate": 1.1733561058923997e-05, "loss": 0.7926, "step": 687 }, { "epoch": 0.02, "grad_norm": 3.0326383113861084, "learning_rate": 1.175064047822374e-05, "loss": 0.886, "step": 688 }, { "epoch": 0.02, "grad_norm": 2.5668835639953613, "learning_rate": 1.1767719897523485e-05, "loss": 0.6685, "step": 689 }, { "epoch": 0.02, "grad_norm": 4.151274681091309, "learning_rate": 1.178479931682323e-05, "loss": 0.8635, "step": 690 }, { "epoch": 0.02, "grad_norm": 2.7690203189849854, "learning_rate": 1.1801878736122974e-05, "loss": 0.7488, "step": 691 }, { "epoch": 0.02, "grad_norm": 2.9099643230438232, "learning_rate": 1.1818958155422717e-05, "loss": 0.7312, "step": 692 }, { "epoch": 0.02, "grad_norm": 2.9892075061798096, "learning_rate": 1.183603757472246e-05, "loss": 0.8704, "step": 693 }, { "epoch": 0.02, "grad_norm": 2.7101047039031982, "learning_rate": 1.1853116994022204e-05, "loss": 0.6712, "step": 694 }, { "epoch": 0.02, "grad_norm": 3.546537399291992, "learning_rate": 1.1870196413321949e-05, "loss": 0.8074, "step": 695 }, { "epoch": 0.02, "grad_norm": 3.0881409645080566, "learning_rate": 1.1887275832621692e-05, "loss": 0.772, "step": 696 }, { "epoch": 0.02, "grad_norm": 2.4055821895599365, "learning_rate": 1.1904355251921435e-05, "loss": 0.8157, "step": 697 }, { "epoch": 0.02, "grad_norm": 2.0585110187530518, "learning_rate": 1.192143467122118e-05, "loss": 0.747, "step": 698 }, { "epoch": 0.02, "grad_norm": 3.02641224861145, "learning_rate": 1.1938514090520923e-05, "loss": 0.873, "step": 699 }, { "epoch": 0.02, "grad_norm": 4.62553071975708, "learning_rate": 1.1955593509820666e-05, "loss": 0.7733, "step": 700 }, { "epoch": 0.02, "grad_norm": 4.668773651123047, "learning_rate": 1.1972672929120411e-05, "loss": 0.6904, "step": 701 }, { "epoch": 0.02, "grad_norm": 3.0870940685272217, "learning_rate": 1.1989752348420154e-05, "loss": 0.6351, "step": 702 }, { "epoch": 0.02, "grad_norm": 3.142413854598999, "learning_rate": 1.2006831767719897e-05, "loss": 0.6403, "step": 703 }, { "epoch": 0.02, "grad_norm": 4.061575412750244, "learning_rate": 1.202391118701964e-05, "loss": 0.8361, "step": 704 }, { "epoch": 0.02, "grad_norm": 6.185072898864746, "learning_rate": 1.2040990606319387e-05, "loss": 0.6738, "step": 705 }, { "epoch": 0.02, "grad_norm": 3.187251567840576, "learning_rate": 1.205807002561913e-05, "loss": 0.9625, "step": 706 }, { "epoch": 0.02, "grad_norm": 4.648924350738525, "learning_rate": 1.2075149444918875e-05, "loss": 0.9574, "step": 707 }, { "epoch": 0.02, "grad_norm": 2.876337766647339, "learning_rate": 1.2092228864218618e-05, "loss": 0.7833, "step": 708 }, { "epoch": 0.02, "grad_norm": 3.2264692783355713, "learning_rate": 1.2109308283518361e-05, "loss": 0.8964, "step": 709 }, { "epoch": 0.02, "grad_norm": 2.8002398014068604, "learning_rate": 1.2126387702818106e-05, "loss": 0.6658, "step": 710 }, { "epoch": 0.02, "grad_norm": 3.313262462615967, "learning_rate": 1.214346712211785e-05, "loss": 0.7375, "step": 711 }, { "epoch": 0.02, "grad_norm": 2.9167604446411133, "learning_rate": 1.2160546541417592e-05, "loss": 0.7827, "step": 712 }, { "epoch": 0.02, "grad_norm": 4.746559143066406, "learning_rate": 1.2177625960717337e-05, "loss": 0.832, "step": 713 }, { "epoch": 0.02, "grad_norm": 3.9411983489990234, "learning_rate": 1.219470538001708e-05, "loss": 0.826, "step": 714 }, { "epoch": 0.02, "grad_norm": 3.187473773956299, "learning_rate": 1.2211784799316824e-05, "loss": 0.8282, "step": 715 }, { "epoch": 0.02, "grad_norm": 3.74277925491333, "learning_rate": 1.2228864218616567e-05, "loss": 0.933, "step": 716 }, { "epoch": 0.02, "grad_norm": 2.357206344604492, "learning_rate": 1.2245943637916311e-05, "loss": 0.6732, "step": 717 }, { "epoch": 0.02, "grad_norm": 4.196084022521973, "learning_rate": 1.2263023057216055e-05, "loss": 0.7101, "step": 718 }, { "epoch": 0.02, "grad_norm": 2.7986514568328857, "learning_rate": 1.2280102476515801e-05, "loss": 0.7704, "step": 719 }, { "epoch": 0.02, "grad_norm": 4.532196998596191, "learning_rate": 1.2297181895815544e-05, "loss": 0.7433, "step": 720 }, { "epoch": 0.02, "grad_norm": 2.232556104660034, "learning_rate": 1.2314261315115287e-05, "loss": 0.7521, "step": 721 }, { "epoch": 0.02, "grad_norm": 3.1965179443359375, "learning_rate": 1.2331340734415032e-05, "loss": 0.8208, "step": 722 }, { "epoch": 0.02, "grad_norm": 3.7490596771240234, "learning_rate": 1.2348420153714775e-05, "loss": 0.7989, "step": 723 }, { "epoch": 0.02, "grad_norm": 2.860229969024658, "learning_rate": 1.2365499573014519e-05, "loss": 0.907, "step": 724 }, { "epoch": 0.02, "grad_norm": 2.1412172317504883, "learning_rate": 1.2382578992314262e-05, "loss": 0.8212, "step": 725 }, { "epoch": 0.02, "grad_norm": 4.05622673034668, "learning_rate": 1.2399658411614007e-05, "loss": 0.7753, "step": 726 }, { "epoch": 0.02, "grad_norm": 2.3033666610717773, "learning_rate": 1.241673783091375e-05, "loss": 0.7331, "step": 727 }, { "epoch": 0.02, "grad_norm": 2.941500186920166, "learning_rate": 1.2433817250213493e-05, "loss": 0.8786, "step": 728 }, { "epoch": 0.02, "grad_norm": 3.2294387817382812, "learning_rate": 1.2450896669513238e-05, "loss": 0.9563, "step": 729 }, { "epoch": 0.02, "grad_norm": 3.112511396408081, "learning_rate": 1.2467976088812981e-05, "loss": 0.7884, "step": 730 }, { "epoch": 0.02, "grad_norm": 2.699444055557251, "learning_rate": 1.2485055508112724e-05, "loss": 0.8197, "step": 731 }, { "epoch": 0.02, "grad_norm": 2.566727638244629, "learning_rate": 1.2502134927412469e-05, "loss": 0.8442, "step": 732 }, { "epoch": 0.02, "grad_norm": 2.511369466781616, "learning_rate": 1.2519214346712212e-05, "loss": 0.7512, "step": 733 }, { "epoch": 0.02, "grad_norm": 3.2615649700164795, "learning_rate": 1.2536293766011957e-05, "loss": 0.7315, "step": 734 }, { "epoch": 0.02, "grad_norm": 2.936147928237915, "learning_rate": 1.2553373185311702e-05, "loss": 0.7684, "step": 735 }, { "epoch": 0.02, "grad_norm": 3.2956299781799316, "learning_rate": 1.2570452604611445e-05, "loss": 0.7678, "step": 736 }, { "epoch": 0.02, "grad_norm": 5.580329895019531, "learning_rate": 1.2587532023911188e-05, "loss": 0.8011, "step": 737 }, { "epoch": 0.02, "grad_norm": 4.336657524108887, "learning_rate": 1.2604611443210933e-05, "loss": 0.8084, "step": 738 }, { "epoch": 0.02, "grad_norm": 2.4090776443481445, "learning_rate": 1.2621690862510676e-05, "loss": 0.877, "step": 739 }, { "epoch": 0.02, "grad_norm": 3.614872455596924, "learning_rate": 1.263877028181042e-05, "loss": 0.7914, "step": 740 }, { "epoch": 0.02, "grad_norm": 3.6448051929473877, "learning_rate": 1.2655849701110164e-05, "loss": 0.8766, "step": 741 }, { "epoch": 0.02, "grad_norm": 5.817109107971191, "learning_rate": 1.2672929120409907e-05, "loss": 1.1051, "step": 742 }, { "epoch": 0.02, "grad_norm": 2.774996757507324, "learning_rate": 1.269000853970965e-05, "loss": 0.774, "step": 743 }, { "epoch": 0.02, "grad_norm": 2.619610548019409, "learning_rate": 1.2707087959009393e-05, "loss": 0.7596, "step": 744 }, { "epoch": 0.02, "grad_norm": 2.3999979496002197, "learning_rate": 1.2724167378309138e-05, "loss": 0.8086, "step": 745 }, { "epoch": 0.02, "grad_norm": 3.749920606613159, "learning_rate": 1.2741246797608881e-05, "loss": 0.7061, "step": 746 }, { "epoch": 0.02, "grad_norm": 3.3086071014404297, "learning_rate": 1.2758326216908625e-05, "loss": 0.7404, "step": 747 }, { "epoch": 0.02, "grad_norm": 3.1183838844299316, "learning_rate": 1.277540563620837e-05, "loss": 0.8058, "step": 748 }, { "epoch": 0.02, "grad_norm": 2.2490079402923584, "learning_rate": 1.2792485055508114e-05, "loss": 0.6317, "step": 749 }, { "epoch": 0.02, "grad_norm": 3.466470718383789, "learning_rate": 1.280956447480786e-05, "loss": 0.9882, "step": 750 }, { "epoch": 0.02, "grad_norm": 4.249578475952148, "learning_rate": 1.2826643894107602e-05, "loss": 0.8387, "step": 751 }, { "epoch": 0.02, "grad_norm": 2.3434529304504395, "learning_rate": 1.2843723313407345e-05, "loss": 0.8216, "step": 752 }, { "epoch": 0.02, "grad_norm": 3.018197774887085, "learning_rate": 1.2860802732707089e-05, "loss": 0.7291, "step": 753 }, { "epoch": 0.02, "grad_norm": 3.0312862396240234, "learning_rate": 1.2877882152006833e-05, "loss": 0.6646, "step": 754 }, { "epoch": 0.02, "grad_norm": 3.9808242321014404, "learning_rate": 1.2894961571306577e-05, "loss": 0.7762, "step": 755 }, { "epoch": 0.02, "grad_norm": 2.2993264198303223, "learning_rate": 1.291204099060632e-05, "loss": 0.7422, "step": 756 }, { "epoch": 0.02, "grad_norm": 2.5416982173919678, "learning_rate": 1.2929120409906065e-05, "loss": 0.6795, "step": 757 }, { "epoch": 0.02, "grad_norm": 2.667382001876831, "learning_rate": 1.2946199829205808e-05, "loss": 0.7989, "step": 758 }, { "epoch": 0.02, "grad_norm": 2.730283737182617, "learning_rate": 1.2963279248505551e-05, "loss": 0.7183, "step": 759 }, { "epoch": 0.02, "grad_norm": 3.0337905883789062, "learning_rate": 1.2980358667805296e-05, "loss": 0.7008, "step": 760 }, { "epoch": 0.02, "grad_norm": 4.910609722137451, "learning_rate": 1.2997438087105039e-05, "loss": 0.7456, "step": 761 }, { "epoch": 0.02, "grad_norm": 4.024744510650635, "learning_rate": 1.3014517506404782e-05, "loss": 0.8628, "step": 762 }, { "epoch": 0.02, "grad_norm": 3.2185280323028564, "learning_rate": 1.3031596925704525e-05, "loss": 0.8889, "step": 763 }, { "epoch": 0.02, "grad_norm": 2.5667431354522705, "learning_rate": 1.3048676345004272e-05, "loss": 0.7991, "step": 764 }, { "epoch": 0.02, "grad_norm": 2.31144642829895, "learning_rate": 1.3065755764304015e-05, "loss": 0.7326, "step": 765 }, { "epoch": 0.02, "grad_norm": 1.643700122833252, "learning_rate": 1.308283518360376e-05, "loss": 0.7791, "step": 766 }, { "epoch": 0.02, "grad_norm": 2.742544651031494, "learning_rate": 1.3099914602903503e-05, "loss": 0.5786, "step": 767 }, { "epoch": 0.02, "grad_norm": 2.8121495246887207, "learning_rate": 1.3116994022203246e-05, "loss": 0.7164, "step": 768 }, { "epoch": 0.02, "grad_norm": 3.3002848625183105, "learning_rate": 1.3134073441502991e-05, "loss": 0.8718, "step": 769 }, { "epoch": 0.02, "grad_norm": 3.8596832752227783, "learning_rate": 1.3151152860802734e-05, "loss": 0.6842, "step": 770 }, { "epoch": 0.02, "grad_norm": 3.374643325805664, "learning_rate": 1.3168232280102477e-05, "loss": 0.7282, "step": 771 }, { "epoch": 0.02, "grad_norm": 1.9516253471374512, "learning_rate": 1.3185311699402222e-05, "loss": 0.6356, "step": 772 }, { "epoch": 0.02, "grad_norm": 3.1914408206939697, "learning_rate": 1.3202391118701965e-05, "loss": 0.8356, "step": 773 }, { "epoch": 0.02, "grad_norm": 3.507594108581543, "learning_rate": 1.3219470538001708e-05, "loss": 0.7548, "step": 774 }, { "epoch": 0.02, "grad_norm": 3.818682909011841, "learning_rate": 1.3236549957301451e-05, "loss": 0.811, "step": 775 }, { "epoch": 0.02, "grad_norm": 2.279670476913452, "learning_rate": 1.3253629376601196e-05, "loss": 0.7866, "step": 776 }, { "epoch": 0.02, "grad_norm": 3.0152480602264404, "learning_rate": 1.327070879590094e-05, "loss": 0.8579, "step": 777 }, { "epoch": 0.02, "grad_norm": 3.453826665878296, "learning_rate": 1.3287788215200683e-05, "loss": 0.7781, "step": 778 }, { "epoch": 0.02, "grad_norm": 2.9560041427612305, "learning_rate": 1.330486763450043e-05, "loss": 0.7878, "step": 779 }, { "epoch": 0.02, "grad_norm": 4.6884002685546875, "learning_rate": 1.3321947053800172e-05, "loss": 0.8459, "step": 780 }, { "epoch": 0.02, "grad_norm": 3.02754545211792, "learning_rate": 1.3339026473099917e-05, "loss": 0.7951, "step": 781 }, { "epoch": 0.02, "grad_norm": 3.1526825428009033, "learning_rate": 1.335610589239966e-05, "loss": 0.8261, "step": 782 }, { "epoch": 0.02, "grad_norm": 2.6167166233062744, "learning_rate": 1.3373185311699403e-05, "loss": 0.6886, "step": 783 }, { "epoch": 0.02, "grad_norm": 2.856337785720825, "learning_rate": 1.3390264730999147e-05, "loss": 0.8062, "step": 784 }, { "epoch": 0.02, "grad_norm": 2.5240566730499268, "learning_rate": 1.3407344150298891e-05, "loss": 0.6945, "step": 785 }, { "epoch": 0.02, "grad_norm": 2.9511828422546387, "learning_rate": 1.3424423569598635e-05, "loss": 0.8376, "step": 786 }, { "epoch": 0.02, "grad_norm": 3.386659860610962, "learning_rate": 1.3441502988898378e-05, "loss": 0.8103, "step": 787 }, { "epoch": 0.02, "grad_norm": 3.7089123725891113, "learning_rate": 1.3458582408198123e-05, "loss": 0.7613, "step": 788 }, { "epoch": 0.02, "grad_norm": 3.306945562362671, "learning_rate": 1.3475661827497866e-05, "loss": 0.7975, "step": 789 }, { "epoch": 0.02, "grad_norm": 2.674121141433716, "learning_rate": 1.3492741246797609e-05, "loss": 0.8355, "step": 790 }, { "epoch": 0.02, "grad_norm": 2.544180393218994, "learning_rate": 1.3509820666097354e-05, "loss": 0.8328, "step": 791 }, { "epoch": 0.02, "grad_norm": 4.019703388214111, "learning_rate": 1.3526900085397097e-05, "loss": 0.6987, "step": 792 }, { "epoch": 0.02, "grad_norm": 3.12965726852417, "learning_rate": 1.354397950469684e-05, "loss": 0.9729, "step": 793 }, { "epoch": 0.02, "grad_norm": 7.8302412033081055, "learning_rate": 1.3561058923996587e-05, "loss": 0.995, "step": 794 }, { "epoch": 0.02, "grad_norm": 2.96563458442688, "learning_rate": 1.357813834329633e-05, "loss": 0.7997, "step": 795 }, { "epoch": 0.02, "grad_norm": 5.452208042144775, "learning_rate": 1.3595217762596073e-05, "loss": 0.6808, "step": 796 }, { "epoch": 0.02, "grad_norm": 3.0249600410461426, "learning_rate": 1.3612297181895818e-05, "loss": 0.8565, "step": 797 }, { "epoch": 0.02, "grad_norm": 2.6709420680999756, "learning_rate": 1.3629376601195561e-05, "loss": 0.7456, "step": 798 }, { "epoch": 0.02, "grad_norm": 2.8818552494049072, "learning_rate": 1.3646456020495304e-05, "loss": 0.9593, "step": 799 }, { "epoch": 0.02, "grad_norm": 4.351900100708008, "learning_rate": 1.3663535439795049e-05, "loss": 0.7755, "step": 800 }, { "epoch": 0.02, "grad_norm": 3.4632697105407715, "learning_rate": 1.3680614859094792e-05, "loss": 0.6819, "step": 801 }, { "epoch": 0.02, "grad_norm": 3.6609363555908203, "learning_rate": 1.3697694278394535e-05, "loss": 0.9141, "step": 802 }, { "epoch": 0.02, "grad_norm": 3.0789883136749268, "learning_rate": 1.3714773697694278e-05, "loss": 0.6957, "step": 803 }, { "epoch": 0.02, "grad_norm": 3.8184306621551514, "learning_rate": 1.3731853116994023e-05, "loss": 0.8148, "step": 804 }, { "epoch": 0.02, "grad_norm": 5.173587322235107, "learning_rate": 1.3748932536293766e-05, "loss": 0.7638, "step": 805 }, { "epoch": 0.02, "grad_norm": 2.454754590988159, "learning_rate": 1.376601195559351e-05, "loss": 0.7647, "step": 806 }, { "epoch": 0.02, "grad_norm": 2.3847603797912598, "learning_rate": 1.3783091374893254e-05, "loss": 0.7205, "step": 807 }, { "epoch": 0.02, "grad_norm": 2.237233877182007, "learning_rate": 1.3800170794193e-05, "loss": 0.6526, "step": 808 }, { "epoch": 0.02, "grad_norm": 2.576564311981201, "learning_rate": 1.3817250213492744e-05, "loss": 0.762, "step": 809 }, { "epoch": 0.02, "grad_norm": 2.6249682903289795, "learning_rate": 1.3834329632792487e-05, "loss": 0.8038, "step": 810 }, { "epoch": 0.02, "grad_norm": 3.097668409347534, "learning_rate": 1.385140905209223e-05, "loss": 0.9266, "step": 811 }, { "epoch": 0.02, "grad_norm": 3.519441843032837, "learning_rate": 1.3868488471391973e-05, "loss": 0.8113, "step": 812 }, { "epoch": 0.02, "grad_norm": 2.919684648513794, "learning_rate": 1.3885567890691718e-05, "loss": 0.8243, "step": 813 }, { "epoch": 0.02, "grad_norm": 2.580141067504883, "learning_rate": 1.3902647309991461e-05, "loss": 0.801, "step": 814 }, { "epoch": 0.02, "grad_norm": 2.6389875411987305, "learning_rate": 1.3919726729291205e-05, "loss": 0.8775, "step": 815 }, { "epoch": 0.02, "grad_norm": 3.2771482467651367, "learning_rate": 1.393680614859095e-05, "loss": 0.7697, "step": 816 }, { "epoch": 0.02, "grad_norm": 2.770529270172119, "learning_rate": 1.3953885567890693e-05, "loss": 0.7771, "step": 817 }, { "epoch": 0.02, "grad_norm": 3.445558786392212, "learning_rate": 1.3970964987190436e-05, "loss": 0.7399, "step": 818 }, { "epoch": 0.02, "grad_norm": 3.434093713760376, "learning_rate": 1.398804440649018e-05, "loss": 0.7521, "step": 819 }, { "epoch": 0.02, "grad_norm": 2.1680610179901123, "learning_rate": 1.4005123825789924e-05, "loss": 0.815, "step": 820 }, { "epoch": 0.02, "grad_norm": 3.819737434387207, "learning_rate": 1.4022203245089667e-05, "loss": 0.8283, "step": 821 }, { "epoch": 0.02, "grad_norm": 1.7282003164291382, "learning_rate": 1.403928266438941e-05, "loss": 0.7574, "step": 822 }, { "epoch": 0.02, "grad_norm": 2.494852066040039, "learning_rate": 1.4056362083689157e-05, "loss": 0.7276, "step": 823 }, { "epoch": 0.02, "grad_norm": 2.6341419219970703, "learning_rate": 1.40734415029889e-05, "loss": 0.809, "step": 824 }, { "epoch": 0.02, "grad_norm": 2.7496519088745117, "learning_rate": 1.4090520922288645e-05, "loss": 0.7094, "step": 825 }, { "epoch": 0.02, "grad_norm": 2.4753074645996094, "learning_rate": 1.4107600341588388e-05, "loss": 0.867, "step": 826 }, { "epoch": 0.02, "grad_norm": 4.747101306915283, "learning_rate": 1.4124679760888131e-05, "loss": 0.7928, "step": 827 }, { "epoch": 0.02, "grad_norm": 1.6359885931015015, "learning_rate": 1.4141759180187876e-05, "loss": 0.6299, "step": 828 }, { "epoch": 0.02, "grad_norm": 2.9521708488464355, "learning_rate": 1.4158838599487619e-05, "loss": 0.7934, "step": 829 }, { "epoch": 0.02, "grad_norm": 2.9229631423950195, "learning_rate": 1.4175918018787362e-05, "loss": 0.8305, "step": 830 }, { "epoch": 0.02, "grad_norm": 2.58524751663208, "learning_rate": 1.4192997438087107e-05, "loss": 0.7785, "step": 831 }, { "epoch": 0.02, "grad_norm": 3.1622867584228516, "learning_rate": 1.421007685738685e-05, "loss": 0.7564, "step": 832 }, { "epoch": 0.02, "grad_norm": 2.24251127243042, "learning_rate": 1.4227156276686593e-05, "loss": 0.7386, "step": 833 }, { "epoch": 0.02, "grad_norm": 2.669055461883545, "learning_rate": 1.4244235695986336e-05, "loss": 0.7724, "step": 834 }, { "epoch": 0.02, "grad_norm": 2.588347911834717, "learning_rate": 1.4261315115286081e-05, "loss": 0.909, "step": 835 }, { "epoch": 0.02, "grad_norm": 3.764509439468384, "learning_rate": 1.4278394534585824e-05, "loss": 0.8858, "step": 836 }, { "epoch": 0.02, "grad_norm": 3.1135752201080322, "learning_rate": 1.4295473953885567e-05, "loss": 0.8062, "step": 837 }, { "epoch": 0.02, "grad_norm": 2.1589877605438232, "learning_rate": 1.4312553373185314e-05, "loss": 0.6317, "step": 838 }, { "epoch": 0.02, "grad_norm": 3.5860910415649414, "learning_rate": 1.4329632792485057e-05, "loss": 0.7535, "step": 839 }, { "epoch": 0.02, "grad_norm": 3.079777479171753, "learning_rate": 1.4346712211784802e-05, "loss": 0.818, "step": 840 }, { "epoch": 0.02, "grad_norm": 2.9600226879119873, "learning_rate": 1.4363791631084545e-05, "loss": 0.8323, "step": 841 }, { "epoch": 0.02, "grad_norm": 3.8128116130828857, "learning_rate": 1.4380871050384288e-05, "loss": 0.8933, "step": 842 }, { "epoch": 0.02, "grad_norm": 3.543454885482788, "learning_rate": 1.4397950469684031e-05, "loss": 0.9809, "step": 843 }, { "epoch": 0.02, "grad_norm": 7.7166972160339355, "learning_rate": 1.4415029888983776e-05, "loss": 1.1253, "step": 844 }, { "epoch": 0.02, "grad_norm": 3.2158377170562744, "learning_rate": 1.443210930828352e-05, "loss": 0.8234, "step": 845 }, { "epoch": 0.02, "grad_norm": 3.0003814697265625, "learning_rate": 1.4449188727583263e-05, "loss": 0.6594, "step": 846 }, { "epoch": 0.02, "grad_norm": 1.8523085117340088, "learning_rate": 1.4466268146883007e-05, "loss": 0.7667, "step": 847 }, { "epoch": 0.02, "grad_norm": 3.635009527206421, "learning_rate": 1.448334756618275e-05, "loss": 0.6236, "step": 848 }, { "epoch": 0.02, "grad_norm": 2.4013283252716064, "learning_rate": 1.4500426985482494e-05, "loss": 0.9172, "step": 849 }, { "epoch": 0.02, "grad_norm": 2.2681682109832764, "learning_rate": 1.4517506404782239e-05, "loss": 0.7501, "step": 850 }, { "epoch": 0.02, "grad_norm": 1.8830595016479492, "learning_rate": 1.4534585824081982e-05, "loss": 0.8136, "step": 851 }, { "epoch": 0.02, "grad_norm": 2.754767417907715, "learning_rate": 1.4551665243381725e-05, "loss": 0.6814, "step": 852 }, { "epoch": 0.02, "grad_norm": 3.5315043926239014, "learning_rate": 1.4568744662681471e-05, "loss": 0.7434, "step": 853 }, { "epoch": 0.02, "grad_norm": 3.1944215297698975, "learning_rate": 1.4585824081981215e-05, "loss": 0.8161, "step": 854 }, { "epoch": 0.02, "grad_norm": 2.135037899017334, "learning_rate": 1.4602903501280958e-05, "loss": 0.6894, "step": 855 }, { "epoch": 0.02, "grad_norm": 5.266753673553467, "learning_rate": 1.4619982920580703e-05, "loss": 0.7565, "step": 856 }, { "epoch": 0.02, "grad_norm": 2.5856382846832275, "learning_rate": 1.4637062339880446e-05, "loss": 0.7985, "step": 857 }, { "epoch": 0.02, "grad_norm": 2.6645781993865967, "learning_rate": 1.4654141759180189e-05, "loss": 0.8118, "step": 858 }, { "epoch": 0.02, "grad_norm": 2.2212798595428467, "learning_rate": 1.4671221178479934e-05, "loss": 0.6553, "step": 859 }, { "epoch": 0.02, "grad_norm": 5.8295698165893555, "learning_rate": 1.4688300597779677e-05, "loss": 0.865, "step": 860 }, { "epoch": 0.02, "grad_norm": 2.0738306045532227, "learning_rate": 1.470538001707942e-05, "loss": 0.7348, "step": 861 }, { "epoch": 0.02, "grad_norm": 2.82796049118042, "learning_rate": 1.4722459436379163e-05, "loss": 0.7723, "step": 862 }, { "epoch": 0.02, "grad_norm": 2.832493305206299, "learning_rate": 1.4739538855678908e-05, "loss": 0.8081, "step": 863 }, { "epoch": 0.02, "grad_norm": 2.785722255706787, "learning_rate": 1.4756618274978651e-05, "loss": 0.7429, "step": 864 }, { "epoch": 0.02, "grad_norm": 3.7535040378570557, "learning_rate": 1.4773697694278394e-05, "loss": 0.8649, "step": 865 }, { "epoch": 0.02, "grad_norm": 2.2660975456237793, "learning_rate": 1.479077711357814e-05, "loss": 0.7781, "step": 866 }, { "epoch": 0.02, "grad_norm": 2.435584783554077, "learning_rate": 1.4807856532877882e-05, "loss": 0.6967, "step": 867 }, { "epoch": 0.02, "grad_norm": 6.973475933074951, "learning_rate": 1.4824935952177629e-05, "loss": 0.7899, "step": 868 }, { "epoch": 0.02, "grad_norm": 2.8231730461120605, "learning_rate": 1.4842015371477372e-05, "loss": 0.7513, "step": 869 }, { "epoch": 0.02, "grad_norm": 5.662015914916992, "learning_rate": 1.4859094790777115e-05, "loss": 0.9403, "step": 870 }, { "epoch": 0.02, "grad_norm": 3.435380697250366, "learning_rate": 1.4876174210076858e-05, "loss": 0.8392, "step": 871 }, { "epoch": 0.02, "grad_norm": 2.154661178588867, "learning_rate": 1.4893253629376603e-05, "loss": 0.7228, "step": 872 }, { "epoch": 0.02, "grad_norm": 3.2156777381896973, "learning_rate": 1.4910333048676346e-05, "loss": 0.7985, "step": 873 }, { "epoch": 0.02, "grad_norm": 2.4912517070770264, "learning_rate": 1.492741246797609e-05, "loss": 0.7637, "step": 874 }, { "epoch": 0.02, "grad_norm": 2.2826309204101562, "learning_rate": 1.4944491887275834e-05, "loss": 0.753, "step": 875 }, { "epoch": 0.02, "grad_norm": 2.9507603645324707, "learning_rate": 1.4961571306575577e-05, "loss": 0.8078, "step": 876 }, { "epoch": 0.02, "grad_norm": 3.3670265674591064, "learning_rate": 1.497865072587532e-05, "loss": 0.9307, "step": 877 }, { "epoch": 0.02, "grad_norm": 2.352262258529663, "learning_rate": 1.4995730145175065e-05, "loss": 0.698, "step": 878 }, { "epoch": 0.02, "grad_norm": 2.6778056621551514, "learning_rate": 1.5012809564474809e-05, "loss": 0.8891, "step": 879 }, { "epoch": 0.02, "grad_norm": 3.341905355453491, "learning_rate": 1.5029888983774552e-05, "loss": 0.7243, "step": 880 }, { "epoch": 0.02, "grad_norm": 3.041945695877075, "learning_rate": 1.5046968403074297e-05, "loss": 0.8065, "step": 881 }, { "epoch": 0.02, "grad_norm": 9.081212997436523, "learning_rate": 1.5064047822374041e-05, "loss": 0.85, "step": 882 }, { "epoch": 0.02, "grad_norm": 2.6685194969177246, "learning_rate": 1.5081127241673785e-05, "loss": 0.7386, "step": 883 }, { "epoch": 0.02, "grad_norm": 3.8211896419525146, "learning_rate": 1.509820666097353e-05, "loss": 0.762, "step": 884 }, { "epoch": 0.02, "grad_norm": 3.112929582595825, "learning_rate": 1.5115286080273273e-05, "loss": 0.8156, "step": 885 }, { "epoch": 0.02, "grad_norm": 2.7675089836120605, "learning_rate": 1.5132365499573016e-05, "loss": 0.8606, "step": 886 }, { "epoch": 0.02, "grad_norm": 3.9130961894989014, "learning_rate": 1.514944491887276e-05, "loss": 0.9323, "step": 887 }, { "epoch": 0.02, "grad_norm": 3.154071092605591, "learning_rate": 1.5166524338172504e-05, "loss": 0.7538, "step": 888 }, { "epoch": 0.02, "grad_norm": 2.238497018814087, "learning_rate": 1.5183603757472247e-05, "loss": 0.752, "step": 889 }, { "epoch": 0.02, "grad_norm": 2.42651104927063, "learning_rate": 1.5200683176771992e-05, "loss": 0.7709, "step": 890 }, { "epoch": 0.02, "grad_norm": 5.014307498931885, "learning_rate": 1.5217762596071735e-05, "loss": 0.7255, "step": 891 }, { "epoch": 0.02, "grad_norm": 3.188631057739258, "learning_rate": 1.5234842015371478e-05, "loss": 0.7927, "step": 892 }, { "epoch": 0.02, "grad_norm": 3.3015780448913574, "learning_rate": 1.5251921434671221e-05, "loss": 0.9653, "step": 893 }, { "epoch": 0.02, "grad_norm": 4.200216770172119, "learning_rate": 1.5269000853970966e-05, "loss": 0.7913, "step": 894 }, { "epoch": 0.02, "grad_norm": 3.1887943744659424, "learning_rate": 1.528608027327071e-05, "loss": 0.8674, "step": 895 }, { "epoch": 0.02, "grad_norm": 2.3897628784179688, "learning_rate": 1.5303159692570452e-05, "loss": 0.6924, "step": 896 }, { "epoch": 0.02, "grad_norm": 4.090991020202637, "learning_rate": 1.53202391118702e-05, "loss": 0.8052, "step": 897 }, { "epoch": 0.02, "grad_norm": 2.70516037940979, "learning_rate": 1.5337318531169942e-05, "loss": 0.9763, "step": 898 }, { "epoch": 0.02, "grad_norm": 2.6622474193573, "learning_rate": 1.5354397950469685e-05, "loss": 0.8176, "step": 899 }, { "epoch": 0.02, "grad_norm": 2.5183897018432617, "learning_rate": 1.5371477369769428e-05, "loss": 0.7143, "step": 900 }, { "epoch": 0.02, "grad_norm": 2.619964361190796, "learning_rate": 1.5388556789069175e-05, "loss": 0.8146, "step": 901 }, { "epoch": 0.02, "grad_norm": 3.1043105125427246, "learning_rate": 1.5405636208368918e-05, "loss": 0.8496, "step": 902 }, { "epoch": 0.02, "grad_norm": 1.437910795211792, "learning_rate": 1.542271562766866e-05, "loss": 0.7291, "step": 903 }, { "epoch": 0.02, "grad_norm": 3.217322826385498, "learning_rate": 1.5439795046968404e-05, "loss": 0.741, "step": 904 }, { "epoch": 0.02, "grad_norm": 4.226545810699463, "learning_rate": 1.5456874466268147e-05, "loss": 0.9701, "step": 905 }, { "epoch": 0.02, "grad_norm": 4.008462429046631, "learning_rate": 1.547395388556789e-05, "loss": 0.9994, "step": 906 }, { "epoch": 0.02, "grad_norm": 6.915613651275635, "learning_rate": 1.5491033304867634e-05, "loss": 0.9416, "step": 907 }, { "epoch": 0.02, "grad_norm": 2.843380928039551, "learning_rate": 1.550811272416738e-05, "loss": 0.747, "step": 908 }, { "epoch": 0.02, "grad_norm": 3.32609486579895, "learning_rate": 1.5525192143467123e-05, "loss": 0.8541, "step": 909 }, { "epoch": 0.02, "grad_norm": 2.442087173461914, "learning_rate": 1.5542271562766867e-05, "loss": 0.8065, "step": 910 }, { "epoch": 0.02, "grad_norm": 3.2463438510894775, "learning_rate": 1.555935098206661e-05, "loss": 0.9034, "step": 911 }, { "epoch": 0.02, "grad_norm": 2.005885362625122, "learning_rate": 1.5576430401366356e-05, "loss": 0.7854, "step": 912 }, { "epoch": 0.02, "grad_norm": 2.4072635173797607, "learning_rate": 1.55935098206661e-05, "loss": 0.792, "step": 913 }, { "epoch": 0.02, "grad_norm": 3.186060905456543, "learning_rate": 1.5610589239965843e-05, "loss": 0.8734, "step": 914 }, { "epoch": 0.02, "grad_norm": 2.5938010215759277, "learning_rate": 1.5627668659265586e-05, "loss": 0.7238, "step": 915 }, { "epoch": 0.02, "grad_norm": 3.417628049850464, "learning_rate": 1.564474807856533e-05, "loss": 0.8185, "step": 916 }, { "epoch": 0.02, "grad_norm": 2.9414596557617188, "learning_rate": 1.5661827497865075e-05, "loss": 0.7183, "step": 917 }, { "epoch": 0.02, "grad_norm": 1.8203840255737305, "learning_rate": 1.567890691716482e-05, "loss": 0.7229, "step": 918 }, { "epoch": 0.02, "grad_norm": 2.521333694458008, "learning_rate": 1.5695986336464562e-05, "loss": 0.9127, "step": 919 }, { "epoch": 0.02, "grad_norm": 2.638359785079956, "learning_rate": 1.5713065755764305e-05, "loss": 0.9759, "step": 920 }, { "epoch": 0.02, "grad_norm": 2.9102516174316406, "learning_rate": 1.5730145175064048e-05, "loss": 0.8315, "step": 921 }, { "epoch": 0.02, "grad_norm": 2.285825490951538, "learning_rate": 1.574722459436379e-05, "loss": 0.5457, "step": 922 }, { "epoch": 0.02, "grad_norm": 2.3878862857818604, "learning_rate": 1.5764304013663538e-05, "loss": 0.7567, "step": 923 }, { "epoch": 0.02, "grad_norm": 2.7471814155578613, "learning_rate": 1.578138343296328e-05, "loss": 0.9042, "step": 924 }, { "epoch": 0.02, "grad_norm": 2.988367795944214, "learning_rate": 1.5798462852263024e-05, "loss": 0.77, "step": 925 }, { "epoch": 0.02, "grad_norm": 4.2609782218933105, "learning_rate": 1.5815542271562767e-05, "loss": 0.9825, "step": 926 }, { "epoch": 0.02, "grad_norm": 3.6844727993011475, "learning_rate": 1.5832621690862514e-05, "loss": 0.9313, "step": 927 }, { "epoch": 0.02, "grad_norm": 2.6971631050109863, "learning_rate": 1.5849701110162257e-05, "loss": 0.7763, "step": 928 }, { "epoch": 0.02, "grad_norm": 3.44460129737854, "learning_rate": 1.5866780529462e-05, "loss": 0.9405, "step": 929 }, { "epoch": 0.02, "grad_norm": 2.535343647003174, "learning_rate": 1.5883859948761743e-05, "loss": 0.9035, "step": 930 }, { "epoch": 0.02, "grad_norm": 4.281418800354004, "learning_rate": 1.5900939368061486e-05, "loss": 0.8085, "step": 931 }, { "epoch": 0.02, "grad_norm": 2.155865430831909, "learning_rate": 1.5918018787361233e-05, "loss": 0.6768, "step": 932 }, { "epoch": 0.02, "grad_norm": 4.292337417602539, "learning_rate": 1.5935098206660976e-05, "loss": 0.9362, "step": 933 }, { "epoch": 0.02, "grad_norm": 2.9223246574401855, "learning_rate": 1.595217762596072e-05, "loss": 0.851, "step": 934 }, { "epoch": 0.02, "grad_norm": 2.827951669692993, "learning_rate": 1.5969257045260462e-05, "loss": 0.8147, "step": 935 }, { "epoch": 0.02, "grad_norm": 3.9000961780548096, "learning_rate": 1.5986336464560205e-05, "loss": 0.8056, "step": 936 }, { "epoch": 0.02, "grad_norm": 3.540894031524658, "learning_rate": 1.600341588385995e-05, "loss": 0.8145, "step": 937 }, { "epoch": 0.02, "grad_norm": 4.508580207824707, "learning_rate": 1.6020495303159692e-05, "loss": 0.7715, "step": 938 }, { "epoch": 0.02, "grad_norm": 2.233579635620117, "learning_rate": 1.6037574722459438e-05, "loss": 0.813, "step": 939 }, { "epoch": 0.02, "grad_norm": 3.0132105350494385, "learning_rate": 1.605465414175918e-05, "loss": 0.775, "step": 940 }, { "epoch": 0.02, "grad_norm": 3.078601837158203, "learning_rate": 1.6071733561058925e-05, "loss": 0.8325, "step": 941 }, { "epoch": 0.02, "grad_norm": 4.092113494873047, "learning_rate": 1.608881298035867e-05, "loss": 0.8324, "step": 942 }, { "epoch": 0.02, "grad_norm": 3.954735040664673, "learning_rate": 1.6105892399658414e-05, "loss": 0.7896, "step": 943 }, { "epoch": 0.02, "grad_norm": 3.3004422187805176, "learning_rate": 1.6122971818958157e-05, "loss": 0.884, "step": 944 }, { "epoch": 0.02, "grad_norm": 2.2446794509887695, "learning_rate": 1.61400512382579e-05, "loss": 0.8323, "step": 945 }, { "epoch": 0.02, "grad_norm": 2.4992454051971436, "learning_rate": 1.6157130657557644e-05, "loss": 0.897, "step": 946 }, { "epoch": 0.02, "grad_norm": 2.635991334915161, "learning_rate": 1.6174210076857387e-05, "loss": 0.7816, "step": 947 }, { "epoch": 0.02, "grad_norm": 3.4757039546966553, "learning_rate": 1.6191289496157133e-05, "loss": 0.8021, "step": 948 }, { "epoch": 0.02, "grad_norm": 3.6280601024627686, "learning_rate": 1.6208368915456877e-05, "loss": 0.7905, "step": 949 }, { "epoch": 0.02, "grad_norm": 2.5179669857025146, "learning_rate": 1.622544833475662e-05, "loss": 0.7926, "step": 950 }, { "epoch": 0.02, "grad_norm": 1.886633038520813, "learning_rate": 1.6242527754056363e-05, "loss": 0.9035, "step": 951 }, { "epoch": 0.02, "grad_norm": 2.736003875732422, "learning_rate": 1.6259607173356106e-05, "loss": 0.6952, "step": 952 }, { "epoch": 0.02, "grad_norm": 2.4319961071014404, "learning_rate": 1.627668659265585e-05, "loss": 0.8038, "step": 953 }, { "epoch": 0.02, "grad_norm": 2.677124261856079, "learning_rate": 1.6293766011955596e-05, "loss": 0.8132, "step": 954 }, { "epoch": 0.02, "grad_norm": 2.4246180057525635, "learning_rate": 1.631084543125534e-05, "loss": 0.792, "step": 955 }, { "epoch": 0.02, "grad_norm": 4.968759059906006, "learning_rate": 1.6327924850555082e-05, "loss": 0.8125, "step": 956 }, { "epoch": 0.02, "grad_norm": 3.558786153793335, "learning_rate": 1.634500426985483e-05, "loss": 0.7819, "step": 957 }, { "epoch": 0.02, "grad_norm": 2.4055652618408203, "learning_rate": 1.636208368915457e-05, "loss": 0.7183, "step": 958 }, { "epoch": 0.02, "grad_norm": 2.319672107696533, "learning_rate": 1.6379163108454315e-05, "loss": 0.701, "step": 959 }, { "epoch": 0.02, "grad_norm": 2.700363874435425, "learning_rate": 1.6396242527754058e-05, "loss": 0.8529, "step": 960 }, { "epoch": 0.02, "grad_norm": 1.5508159399032593, "learning_rate": 1.64133219470538e-05, "loss": 0.7206, "step": 961 }, { "epoch": 0.02, "grad_norm": 2.168226957321167, "learning_rate": 1.6430401366353544e-05, "loss": 0.6121, "step": 962 }, { "epoch": 0.02, "grad_norm": 2.9037981033325195, "learning_rate": 1.644748078565329e-05, "loss": 0.6535, "step": 963 }, { "epoch": 0.02, "grad_norm": 3.5887017250061035, "learning_rate": 1.6464560204953034e-05, "loss": 1.0751, "step": 964 }, { "epoch": 0.02, "grad_norm": 3.225578784942627, "learning_rate": 1.6481639624252777e-05, "loss": 0.6105, "step": 965 }, { "epoch": 0.02, "grad_norm": 2.962453842163086, "learning_rate": 1.649871904355252e-05, "loss": 0.7418, "step": 966 }, { "epoch": 0.02, "grad_norm": 5.992652416229248, "learning_rate": 1.6515798462852263e-05, "loss": 0.7197, "step": 967 }, { "epoch": 0.02, "grad_norm": 2.4350192546844482, "learning_rate": 1.6532877882152007e-05, "loss": 0.716, "step": 968 }, { "epoch": 0.02, "grad_norm": 4.371222019195557, "learning_rate": 1.654995730145175e-05, "loss": 0.8364, "step": 969 }, { "epoch": 0.02, "grad_norm": 2.655705690383911, "learning_rate": 1.6567036720751496e-05, "loss": 0.8715, "step": 970 }, { "epoch": 0.02, "grad_norm": 2.573885440826416, "learning_rate": 1.658411614005124e-05, "loss": 0.8821, "step": 971 }, { "epoch": 0.02, "grad_norm": 1.8690526485443115, "learning_rate": 1.6601195559350986e-05, "loss": 0.7728, "step": 972 }, { "epoch": 0.02, "grad_norm": 2.356935977935791, "learning_rate": 1.661827497865073e-05, "loss": 0.776, "step": 973 }, { "epoch": 0.02, "grad_norm": 2.451805353164673, "learning_rate": 1.6635354397950472e-05, "loss": 0.8454, "step": 974 }, { "epoch": 0.02, "grad_norm": 3.5289952754974365, "learning_rate": 1.6652433817250215e-05, "loss": 0.7003, "step": 975 }, { "epoch": 0.03, "grad_norm": 4.057528018951416, "learning_rate": 1.666951323654996e-05, "loss": 0.9259, "step": 976 }, { "epoch": 0.03, "grad_norm": 2.178936719894409, "learning_rate": 1.6686592655849702e-05, "loss": 0.8998, "step": 977 }, { "epoch": 0.03, "grad_norm": 2.130765199661255, "learning_rate": 1.6703672075149445e-05, "loss": 0.8032, "step": 978 }, { "epoch": 0.03, "grad_norm": 3.1957502365112305, "learning_rate": 1.672075149444919e-05, "loss": 0.8274, "step": 979 }, { "epoch": 0.03, "grad_norm": 2.0377044677734375, "learning_rate": 1.6737830913748935e-05, "loss": 0.7236, "step": 980 }, { "epoch": 0.03, "grad_norm": 2.257361650466919, "learning_rate": 1.6754910333048678e-05, "loss": 0.7494, "step": 981 }, { "epoch": 0.03, "grad_norm": 4.391912460327148, "learning_rate": 1.677198975234842e-05, "loss": 0.7714, "step": 982 }, { "epoch": 0.03, "grad_norm": 4.832991123199463, "learning_rate": 1.6789069171648164e-05, "loss": 0.8916, "step": 983 }, { "epoch": 0.03, "grad_norm": 2.5776641368865967, "learning_rate": 1.6806148590947907e-05, "loss": 0.8707, "step": 984 }, { "epoch": 0.03, "grad_norm": 2.7279813289642334, "learning_rate": 1.682322801024765e-05, "loss": 0.7939, "step": 985 }, { "epoch": 0.03, "grad_norm": 3.0142598152160645, "learning_rate": 1.6840307429547397e-05, "loss": 0.8906, "step": 986 }, { "epoch": 0.03, "grad_norm": 3.768833637237549, "learning_rate": 1.685738684884714e-05, "loss": 0.8492, "step": 987 }, { "epoch": 0.03, "grad_norm": 2.3993136882781982, "learning_rate": 1.6874466268146887e-05, "loss": 0.8557, "step": 988 }, { "epoch": 0.03, "grad_norm": 2.3177542686462402, "learning_rate": 1.689154568744663e-05, "loss": 0.7507, "step": 989 }, { "epoch": 0.03, "grad_norm": 3.358872652053833, "learning_rate": 1.6908625106746373e-05, "loss": 0.7497, "step": 990 }, { "epoch": 0.03, "grad_norm": 3.1594834327697754, "learning_rate": 1.6925704526046116e-05, "loss": 0.7364, "step": 991 }, { "epoch": 0.03, "grad_norm": 2.238678455352783, "learning_rate": 1.694278394534586e-05, "loss": 0.6748, "step": 992 }, { "epoch": 0.03, "grad_norm": 3.5831758975982666, "learning_rate": 1.6959863364645602e-05, "loss": 0.7083, "step": 993 }, { "epoch": 0.03, "grad_norm": 3.030291795730591, "learning_rate": 1.6976942783945345e-05, "loss": 0.9875, "step": 994 }, { "epoch": 0.03, "grad_norm": 2.070420265197754, "learning_rate": 1.6994022203245092e-05, "loss": 0.8762, "step": 995 }, { "epoch": 0.03, "grad_norm": 3.3566908836364746, "learning_rate": 1.7011101622544835e-05, "loss": 0.7551, "step": 996 }, { "epoch": 0.03, "grad_norm": 2.1418421268463135, "learning_rate": 1.7028181041844578e-05, "loss": 0.6811, "step": 997 }, { "epoch": 0.03, "grad_norm": 3.0155398845672607, "learning_rate": 1.704526046114432e-05, "loss": 0.7373, "step": 998 }, { "epoch": 0.03, "grad_norm": 2.5152995586395264, "learning_rate": 1.7062339880444065e-05, "loss": 0.6414, "step": 999 }, { "epoch": 0.03, "grad_norm": 2.2490408420562744, "learning_rate": 1.7079419299743808e-05, "loss": 0.8125, "step": 1000 }, { "epoch": 0.03, "grad_norm": 3.405811309814453, "learning_rate": 1.7096498719043554e-05, "loss": 0.8983, "step": 1001 }, { "epoch": 0.03, "grad_norm": 3.04483962059021, "learning_rate": 1.7113578138343297e-05, "loss": 0.7417, "step": 1002 }, { "epoch": 0.03, "grad_norm": 2.632514476776123, "learning_rate": 1.7130657557643044e-05, "loss": 0.6962, "step": 1003 }, { "epoch": 0.03, "grad_norm": 4.526426792144775, "learning_rate": 1.7147736976942787e-05, "loss": 0.8874, "step": 1004 }, { "epoch": 0.03, "grad_norm": 2.666090965270996, "learning_rate": 1.716481639624253e-05, "loss": 0.8228, "step": 1005 }, { "epoch": 0.03, "grad_norm": 2.14530348777771, "learning_rate": 1.7181895815542273e-05, "loss": 0.9354, "step": 1006 }, { "epoch": 0.03, "grad_norm": 4.118762016296387, "learning_rate": 1.7198975234842017e-05, "loss": 0.9285, "step": 1007 }, { "epoch": 0.03, "grad_norm": 2.2870116233825684, "learning_rate": 1.721605465414176e-05, "loss": 0.7013, "step": 1008 }, { "epoch": 0.03, "grad_norm": 3.4648890495300293, "learning_rate": 1.7233134073441503e-05, "loss": 0.6596, "step": 1009 }, { "epoch": 0.03, "grad_norm": 2.826615333557129, "learning_rate": 1.725021349274125e-05, "loss": 0.8784, "step": 1010 }, { "epoch": 0.03, "grad_norm": 2.3151590824127197, "learning_rate": 1.7267292912040993e-05, "loss": 0.6764, "step": 1011 }, { "epoch": 0.03, "grad_norm": 4.277312278747559, "learning_rate": 1.7284372331340736e-05, "loss": 1.0518, "step": 1012 }, { "epoch": 0.03, "grad_norm": 5.03536319732666, "learning_rate": 1.730145175064048e-05, "loss": 0.8646, "step": 1013 }, { "epoch": 0.03, "grad_norm": 4.568234443664551, "learning_rate": 1.7318531169940222e-05, "loss": 0.87, "step": 1014 }, { "epoch": 0.03, "grad_norm": 3.083894729614258, "learning_rate": 1.7335610589239965e-05, "loss": 0.6741, "step": 1015 }, { "epoch": 0.03, "grad_norm": 2.5036656856536865, "learning_rate": 1.735269000853971e-05, "loss": 0.8554, "step": 1016 }, { "epoch": 0.03, "grad_norm": 3.951734781265259, "learning_rate": 1.7369769427839455e-05, "loss": 1.0907, "step": 1017 }, { "epoch": 0.03, "grad_norm": 2.495894432067871, "learning_rate": 1.7386848847139198e-05, "loss": 0.8394, "step": 1018 }, { "epoch": 0.03, "grad_norm": 2.3821654319763184, "learning_rate": 1.7403928266438945e-05, "loss": 0.7803, "step": 1019 }, { "epoch": 0.03, "grad_norm": 3.081557273864746, "learning_rate": 1.7421007685738688e-05, "loss": 0.8568, "step": 1020 }, { "epoch": 0.03, "grad_norm": 2.039357900619507, "learning_rate": 1.743808710503843e-05, "loss": 0.6642, "step": 1021 }, { "epoch": 0.03, "grad_norm": 2.2692019939422607, "learning_rate": 1.7455166524338174e-05, "loss": 0.828, "step": 1022 }, { "epoch": 0.03, "grad_norm": 1.9309641122817993, "learning_rate": 1.7472245943637917e-05, "loss": 0.5829, "step": 1023 }, { "epoch": 0.03, "grad_norm": 2.1415462493896484, "learning_rate": 1.748932536293766e-05, "loss": 0.7512, "step": 1024 }, { "epoch": 0.03, "grad_norm": 2.301450729370117, "learning_rate": 1.7506404782237403e-05, "loss": 0.8588, "step": 1025 }, { "epoch": 0.03, "grad_norm": 3.1023614406585693, "learning_rate": 1.752348420153715e-05, "loss": 0.7587, "step": 1026 }, { "epoch": 0.03, "grad_norm": 1.950189232826233, "learning_rate": 1.7540563620836893e-05, "loss": 0.6765, "step": 1027 }, { "epoch": 0.03, "grad_norm": 2.979086399078369, "learning_rate": 1.7557643040136636e-05, "loss": 0.7326, "step": 1028 }, { "epoch": 0.03, "grad_norm": 4.7761735916137695, "learning_rate": 1.757472245943638e-05, "loss": 0.7849, "step": 1029 }, { "epoch": 0.03, "grad_norm": 2.6181039810180664, "learning_rate": 1.7591801878736123e-05, "loss": 0.743, "step": 1030 }, { "epoch": 0.03, "grad_norm": 3.2917568683624268, "learning_rate": 1.760888129803587e-05, "loss": 0.818, "step": 1031 }, { "epoch": 0.03, "grad_norm": 4.123427391052246, "learning_rate": 1.7625960717335612e-05, "loss": 0.8069, "step": 1032 }, { "epoch": 0.03, "grad_norm": 2.0404815673828125, "learning_rate": 1.7643040136635355e-05, "loss": 0.6814, "step": 1033 }, { "epoch": 0.03, "grad_norm": 3.7378408908843994, "learning_rate": 1.76601195559351e-05, "loss": 0.9792, "step": 1034 }, { "epoch": 0.03, "grad_norm": 2.5134787559509277, "learning_rate": 1.7677198975234845e-05, "loss": 0.6633, "step": 1035 }, { "epoch": 0.03, "grad_norm": 2.8117971420288086, "learning_rate": 1.7694278394534588e-05, "loss": 0.6744, "step": 1036 }, { "epoch": 0.03, "grad_norm": 1.978379726409912, "learning_rate": 1.771135781383433e-05, "loss": 0.6294, "step": 1037 }, { "epoch": 0.03, "grad_norm": 3.540844678878784, "learning_rate": 1.7728437233134075e-05, "loss": 0.8406, "step": 1038 }, { "epoch": 0.03, "grad_norm": 2.6411705017089844, "learning_rate": 1.7745516652433818e-05, "loss": 0.7808, "step": 1039 }, { "epoch": 0.03, "grad_norm": 1.7740825414657593, "learning_rate": 1.776259607173356e-05, "loss": 0.8539, "step": 1040 }, { "epoch": 0.03, "grad_norm": 2.471078634262085, "learning_rate": 1.7779675491033307e-05, "loss": 0.7612, "step": 1041 }, { "epoch": 0.03, "grad_norm": 1.335935354232788, "learning_rate": 1.779675491033305e-05, "loss": 0.7247, "step": 1042 }, { "epoch": 0.03, "grad_norm": 2.8882505893707275, "learning_rate": 1.7813834329632794e-05, "loss": 0.8588, "step": 1043 }, { "epoch": 0.03, "grad_norm": 2.0537195205688477, "learning_rate": 1.7830913748932537e-05, "loss": 0.6383, "step": 1044 }, { "epoch": 0.03, "grad_norm": 3.9161298274993896, "learning_rate": 1.784799316823228e-05, "loss": 0.7128, "step": 1045 }, { "epoch": 0.03, "grad_norm": 3.8747034072875977, "learning_rate": 1.7865072587532027e-05, "loss": 0.9194, "step": 1046 }, { "epoch": 0.03, "grad_norm": 1.9995126724243164, "learning_rate": 1.788215200683177e-05, "loss": 0.7288, "step": 1047 }, { "epoch": 0.03, "grad_norm": 1.8629298210144043, "learning_rate": 1.7899231426131513e-05, "loss": 0.7086, "step": 1048 }, { "epoch": 0.03, "grad_norm": 3.5039751529693604, "learning_rate": 1.7916310845431256e-05, "loss": 0.8217, "step": 1049 }, { "epoch": 0.03, "grad_norm": 2.3049352169036865, "learning_rate": 1.7933390264731003e-05, "loss": 0.733, "step": 1050 }, { "epoch": 0.03, "grad_norm": 2.7990527153015137, "learning_rate": 1.7950469684030746e-05, "loss": 0.8156, "step": 1051 }, { "epoch": 0.03, "grad_norm": 2.6152760982513428, "learning_rate": 1.796754910333049e-05, "loss": 0.6585, "step": 1052 }, { "epoch": 0.03, "grad_norm": 2.708345413208008, "learning_rate": 1.7984628522630232e-05, "loss": 0.6147, "step": 1053 }, { "epoch": 0.03, "grad_norm": 3.6725683212280273, "learning_rate": 1.8001707941929975e-05, "loss": 0.7956, "step": 1054 }, { "epoch": 0.03, "grad_norm": 2.1063272953033447, "learning_rate": 1.8018787361229718e-05, "loss": 0.6095, "step": 1055 }, { "epoch": 0.03, "grad_norm": 2.2786262035369873, "learning_rate": 1.803586678052946e-05, "loss": 0.8414, "step": 1056 }, { "epoch": 0.03, "grad_norm": 2.5469205379486084, "learning_rate": 1.8052946199829208e-05, "loss": 0.7328, "step": 1057 }, { "epoch": 0.03, "grad_norm": 2.9873368740081787, "learning_rate": 1.807002561912895e-05, "loss": 0.7599, "step": 1058 }, { "epoch": 0.03, "grad_norm": 2.40712308883667, "learning_rate": 1.8087105038428694e-05, "loss": 0.6999, "step": 1059 }, { "epoch": 0.03, "grad_norm": 2.2091264724731445, "learning_rate": 1.810418445772844e-05, "loss": 0.6606, "step": 1060 }, { "epoch": 0.03, "grad_norm": 3.636197566986084, "learning_rate": 1.8121263877028184e-05, "loss": 0.907, "step": 1061 }, { "epoch": 0.03, "grad_norm": 2.184514045715332, "learning_rate": 1.8138343296327927e-05, "loss": 0.7789, "step": 1062 }, { "epoch": 0.03, "grad_norm": 2.8204739093780518, "learning_rate": 1.815542271562767e-05, "loss": 0.6861, "step": 1063 }, { "epoch": 0.03, "grad_norm": 2.2026312351226807, "learning_rate": 1.8172502134927413e-05, "loss": 0.7492, "step": 1064 }, { "epoch": 0.03, "grad_norm": 2.9578776359558105, "learning_rate": 1.8189581554227157e-05, "loss": 0.7056, "step": 1065 }, { "epoch": 0.03, "grad_norm": 2.4182302951812744, "learning_rate": 1.8206660973526903e-05, "loss": 0.7118, "step": 1066 }, { "epoch": 0.03, "grad_norm": 2.2237603664398193, "learning_rate": 1.8223740392826646e-05, "loss": 0.669, "step": 1067 }, { "epoch": 0.03, "grad_norm": 2.8544251918792725, "learning_rate": 1.824081981212639e-05, "loss": 0.7767, "step": 1068 }, { "epoch": 0.03, "grad_norm": 1.8052538633346558, "learning_rate": 1.8257899231426133e-05, "loss": 0.7668, "step": 1069 }, { "epoch": 0.03, "grad_norm": 2.2052950859069824, "learning_rate": 1.8274978650725876e-05, "loss": 0.606, "step": 1070 }, { "epoch": 0.03, "grad_norm": 3.306347131729126, "learning_rate": 1.829205807002562e-05, "loss": 0.892, "step": 1071 }, { "epoch": 0.03, "grad_norm": 3.121760606765747, "learning_rate": 1.8309137489325365e-05, "loss": 0.7123, "step": 1072 }, { "epoch": 0.03, "grad_norm": 3.592468500137329, "learning_rate": 1.832621690862511e-05, "loss": 0.7743, "step": 1073 }, { "epoch": 0.03, "grad_norm": 3.2071163654327393, "learning_rate": 1.834329632792485e-05, "loss": 0.7504, "step": 1074 }, { "epoch": 0.03, "grad_norm": 2.103132486343384, "learning_rate": 1.8360375747224598e-05, "loss": 0.8355, "step": 1075 }, { "epoch": 0.03, "grad_norm": 2.829335927963257, "learning_rate": 1.837745516652434e-05, "loss": 0.877, "step": 1076 }, { "epoch": 0.03, "grad_norm": 2.389340400695801, "learning_rate": 1.8394534585824085e-05, "loss": 0.8213, "step": 1077 }, { "epoch": 0.03, "grad_norm": 2.682692050933838, "learning_rate": 1.8411614005123828e-05, "loss": 0.8037, "step": 1078 }, { "epoch": 0.03, "grad_norm": 3.809706926345825, "learning_rate": 1.842869342442357e-05, "loss": 0.8162, "step": 1079 }, { "epoch": 0.03, "grad_norm": 2.653491497039795, "learning_rate": 1.8445772843723314e-05, "loss": 0.7048, "step": 1080 }, { "epoch": 0.03, "grad_norm": 3.639702081680298, "learning_rate": 1.846285226302306e-05, "loss": 0.896, "step": 1081 }, { "epoch": 0.03, "grad_norm": 2.426255226135254, "learning_rate": 1.8479931682322804e-05, "loss": 0.7184, "step": 1082 }, { "epoch": 0.03, "grad_norm": 2.3134984970092773, "learning_rate": 1.8497011101622547e-05, "loss": 0.8803, "step": 1083 }, { "epoch": 0.03, "grad_norm": 3.0868093967437744, "learning_rate": 1.851409052092229e-05, "loss": 0.8564, "step": 1084 }, { "epoch": 0.03, "grad_norm": 5.332130432128906, "learning_rate": 1.8531169940222033e-05, "loss": 0.9437, "step": 1085 }, { "epoch": 0.03, "grad_norm": 2.8503451347351074, "learning_rate": 1.8548249359521776e-05, "loss": 0.9471, "step": 1086 }, { "epoch": 0.03, "grad_norm": 2.0830018520355225, "learning_rate": 1.856532877882152e-05, "loss": 0.744, "step": 1087 }, { "epoch": 0.03, "grad_norm": 2.097691059112549, "learning_rate": 1.8582408198121266e-05, "loss": 0.8284, "step": 1088 }, { "epoch": 0.03, "grad_norm": 2.2152819633483887, "learning_rate": 1.859948761742101e-05, "loss": 0.7831, "step": 1089 }, { "epoch": 0.03, "grad_norm": 1.7972685098648071, "learning_rate": 1.8616567036720756e-05, "loss": 0.5726, "step": 1090 }, { "epoch": 0.03, "grad_norm": 3.010390043258667, "learning_rate": 1.86336464560205e-05, "loss": 0.9077, "step": 1091 }, { "epoch": 0.03, "grad_norm": 5.839191913604736, "learning_rate": 1.8650725875320242e-05, "loss": 0.6986, "step": 1092 }, { "epoch": 0.03, "grad_norm": 2.020425319671631, "learning_rate": 1.8667805294619985e-05, "loss": 0.7114, "step": 1093 }, { "epoch": 0.03, "grad_norm": 1.9859302043914795, "learning_rate": 1.8684884713919728e-05, "loss": 0.6747, "step": 1094 }, { "epoch": 0.03, "grad_norm": 2.8802947998046875, "learning_rate": 1.870196413321947e-05, "loss": 0.8107, "step": 1095 }, { "epoch": 0.03, "grad_norm": 2.548832893371582, "learning_rate": 1.8719043552519215e-05, "loss": 0.696, "step": 1096 }, { "epoch": 0.03, "grad_norm": 2.1739227771759033, "learning_rate": 1.873612297181896e-05, "loss": 0.7554, "step": 1097 }, { "epoch": 0.03, "grad_norm": 3.477830648422241, "learning_rate": 1.8753202391118704e-05, "loss": 0.7591, "step": 1098 }, { "epoch": 0.03, "grad_norm": 1.888677716255188, "learning_rate": 1.8770281810418447e-05, "loss": 0.6435, "step": 1099 }, { "epoch": 0.03, "grad_norm": 2.423578977584839, "learning_rate": 1.878736122971819e-05, "loss": 0.7436, "step": 1100 }, { "epoch": 0.03, "grad_norm": 3.4077494144439697, "learning_rate": 1.8804440649017934e-05, "loss": 0.7225, "step": 1101 }, { "epoch": 0.03, "grad_norm": 1.6487576961517334, "learning_rate": 1.8821520068317677e-05, "loss": 0.677, "step": 1102 }, { "epoch": 0.03, "grad_norm": 2.071155309677124, "learning_rate": 1.883859948761742e-05, "loss": 0.7271, "step": 1103 }, { "epoch": 0.03, "grad_norm": 3.1662168502807617, "learning_rate": 1.8855678906917167e-05, "loss": 0.8803, "step": 1104 }, { "epoch": 0.03, "grad_norm": 2.7326815128326416, "learning_rate": 1.887275832621691e-05, "loss": 0.7967, "step": 1105 }, { "epoch": 0.03, "grad_norm": 4.663082122802734, "learning_rate": 1.8889837745516656e-05, "loss": 0.7975, "step": 1106 }, { "epoch": 0.03, "grad_norm": 2.160766839981079, "learning_rate": 1.89069171648164e-05, "loss": 0.8103, "step": 1107 }, { "epoch": 0.03, "grad_norm": 3.1768229007720947, "learning_rate": 1.8923996584116143e-05, "loss": 0.9831, "step": 1108 }, { "epoch": 0.03, "grad_norm": 2.542081117630005, "learning_rate": 1.8941076003415886e-05, "loss": 0.8918, "step": 1109 }, { "epoch": 0.03, "grad_norm": 2.706131935119629, "learning_rate": 1.895815542271563e-05, "loss": 0.8641, "step": 1110 }, { "epoch": 0.03, "grad_norm": 2.8602302074432373, "learning_rate": 1.8975234842015372e-05, "loss": 0.7635, "step": 1111 }, { "epoch": 0.03, "grad_norm": 3.7660038471221924, "learning_rate": 1.8992314261315115e-05, "loss": 0.8785, "step": 1112 }, { "epoch": 0.03, "grad_norm": 2.6990842819213867, "learning_rate": 1.900939368061486e-05, "loss": 0.8345, "step": 1113 }, { "epoch": 0.03, "grad_norm": 2.635401725769043, "learning_rate": 1.9026473099914605e-05, "loss": 0.7571, "step": 1114 }, { "epoch": 0.03, "grad_norm": 2.870137929916382, "learning_rate": 1.9043552519214348e-05, "loss": 0.8255, "step": 1115 }, { "epoch": 0.03, "grad_norm": 3.6114730834960938, "learning_rate": 1.906063193851409e-05, "loss": 0.8316, "step": 1116 }, { "epoch": 0.03, "grad_norm": 2.6597633361816406, "learning_rate": 1.9077711357813834e-05, "loss": 0.8829, "step": 1117 }, { "epoch": 0.03, "grad_norm": 2.605487585067749, "learning_rate": 1.9094790777113577e-05, "loss": 0.7506, "step": 1118 }, { "epoch": 0.03, "grad_norm": 2.059455394744873, "learning_rate": 1.9111870196413324e-05, "loss": 0.7958, "step": 1119 }, { "epoch": 0.03, "grad_norm": 2.5712430477142334, "learning_rate": 1.9128949615713067e-05, "loss": 0.7953, "step": 1120 }, { "epoch": 0.03, "grad_norm": 4.92414665222168, "learning_rate": 1.9146029035012814e-05, "loss": 1.0335, "step": 1121 }, { "epoch": 0.03, "grad_norm": 2.3180694580078125, "learning_rate": 1.9163108454312557e-05, "loss": 0.706, "step": 1122 }, { "epoch": 0.03, "grad_norm": 2.8132121562957764, "learning_rate": 1.91801878736123e-05, "loss": 0.7048, "step": 1123 }, { "epoch": 0.03, "grad_norm": 4.114880561828613, "learning_rate": 1.9197267292912043e-05, "loss": 0.8833, "step": 1124 }, { "epoch": 0.03, "grad_norm": 2.206150770187378, "learning_rate": 1.9214346712211786e-05, "loss": 0.7267, "step": 1125 }, { "epoch": 0.03, "grad_norm": 2.077110528945923, "learning_rate": 1.923142613151153e-05, "loss": 0.7814, "step": 1126 }, { "epoch": 0.03, "grad_norm": 2.720113515853882, "learning_rate": 1.9248505550811273e-05, "loss": 0.82, "step": 1127 }, { "epoch": 0.03, "grad_norm": 2.893799304962158, "learning_rate": 1.926558497011102e-05, "loss": 0.9533, "step": 1128 }, { "epoch": 0.03, "grad_norm": 1.8332384824752808, "learning_rate": 1.9282664389410762e-05, "loss": 0.7824, "step": 1129 }, { "epoch": 0.03, "grad_norm": 3.359837293624878, "learning_rate": 1.9299743808710505e-05, "loss": 0.881, "step": 1130 }, { "epoch": 0.03, "grad_norm": 2.577730894088745, "learning_rate": 1.931682322801025e-05, "loss": 0.7557, "step": 1131 }, { "epoch": 0.03, "grad_norm": 2.395321846008301, "learning_rate": 1.933390264730999e-05, "loss": 0.7032, "step": 1132 }, { "epoch": 0.03, "grad_norm": 2.8533718585968018, "learning_rate": 1.9350982066609735e-05, "loss": 0.8097, "step": 1133 }, { "epoch": 0.03, "grad_norm": 4.6098480224609375, "learning_rate": 1.936806148590948e-05, "loss": 0.9107, "step": 1134 }, { "epoch": 0.03, "grad_norm": 2.6480772495269775, "learning_rate": 1.9385140905209225e-05, "loss": 0.819, "step": 1135 }, { "epoch": 0.03, "grad_norm": 2.1839632987976074, "learning_rate": 1.9402220324508968e-05, "loss": 0.735, "step": 1136 }, { "epoch": 0.03, "grad_norm": 4.088872909545898, "learning_rate": 1.9419299743808714e-05, "loss": 1.0128, "step": 1137 }, { "epoch": 0.03, "grad_norm": 2.3347136974334717, "learning_rate": 1.9436379163108457e-05, "loss": 0.7188, "step": 1138 }, { "epoch": 0.03, "grad_norm": 2.81487774848938, "learning_rate": 1.94534585824082e-05, "loss": 0.9031, "step": 1139 }, { "epoch": 0.03, "grad_norm": 5.48395299911499, "learning_rate": 1.9470538001707944e-05, "loss": 0.7736, "step": 1140 }, { "epoch": 0.03, "grad_norm": 2.135453939437866, "learning_rate": 1.9487617421007687e-05, "loss": 0.828, "step": 1141 }, { "epoch": 0.03, "grad_norm": 2.1623427867889404, "learning_rate": 1.950469684030743e-05, "loss": 0.8903, "step": 1142 }, { "epoch": 0.03, "grad_norm": 2.670928478240967, "learning_rate": 1.9521776259607173e-05, "loss": 0.8362, "step": 1143 }, { "epoch": 0.03, "grad_norm": 2.3840975761413574, "learning_rate": 1.953885567890692e-05, "loss": 0.6911, "step": 1144 }, { "epoch": 0.03, "grad_norm": 2.391115188598633, "learning_rate": 1.9555935098206663e-05, "loss": 0.781, "step": 1145 }, { "epoch": 0.03, "grad_norm": 1.763578176498413, "learning_rate": 1.9573014517506406e-05, "loss": 0.8143, "step": 1146 }, { "epoch": 0.03, "grad_norm": 5.8013505935668945, "learning_rate": 1.959009393680615e-05, "loss": 0.8273, "step": 1147 }, { "epoch": 0.03, "grad_norm": 3.587977647781372, "learning_rate": 1.9607173356105892e-05, "loss": 0.5832, "step": 1148 }, { "epoch": 0.03, "grad_norm": 3.6358094215393066, "learning_rate": 1.962425277540564e-05, "loss": 0.8791, "step": 1149 }, { "epoch": 0.03, "grad_norm": 1.862975835800171, "learning_rate": 1.9641332194705382e-05, "loss": 0.7145, "step": 1150 }, { "epoch": 0.03, "grad_norm": 3.332857131958008, "learning_rate": 1.9658411614005125e-05, "loss": 0.8618, "step": 1151 }, { "epoch": 0.03, "grad_norm": 3.577820062637329, "learning_rate": 1.9675491033304868e-05, "loss": 0.9672, "step": 1152 }, { "epoch": 0.03, "grad_norm": 2.340301275253296, "learning_rate": 1.9692570452604615e-05, "loss": 0.7457, "step": 1153 }, { "epoch": 0.03, "grad_norm": 1.964760184288025, "learning_rate": 1.9709649871904358e-05, "loss": 0.8255, "step": 1154 }, { "epoch": 0.03, "grad_norm": 2.8815221786499023, "learning_rate": 1.97267292912041e-05, "loss": 0.7875, "step": 1155 }, { "epoch": 0.03, "grad_norm": 2.6934049129486084, "learning_rate": 1.9743808710503844e-05, "loss": 0.8029, "step": 1156 }, { "epoch": 0.03, "grad_norm": 2.6157941818237305, "learning_rate": 1.9760888129803587e-05, "loss": 0.7417, "step": 1157 }, { "epoch": 0.03, "grad_norm": 2.5781939029693604, "learning_rate": 1.977796754910333e-05, "loss": 0.8077, "step": 1158 }, { "epoch": 0.03, "grad_norm": 3.393946647644043, "learning_rate": 1.9795046968403077e-05, "loss": 0.8539, "step": 1159 }, { "epoch": 0.03, "grad_norm": 2.7947781085968018, "learning_rate": 1.981212638770282e-05, "loss": 0.7935, "step": 1160 }, { "epoch": 0.03, "grad_norm": 2.44028902053833, "learning_rate": 1.9829205807002563e-05, "loss": 0.7035, "step": 1161 }, { "epoch": 0.03, "grad_norm": 3.0463805198669434, "learning_rate": 1.9846285226302307e-05, "loss": 0.825, "step": 1162 }, { "epoch": 0.03, "grad_norm": 4.495447158813477, "learning_rate": 1.986336464560205e-05, "loss": 0.811, "step": 1163 }, { "epoch": 0.03, "grad_norm": 2.2154059410095215, "learning_rate": 1.9880444064901796e-05, "loss": 0.8406, "step": 1164 }, { "epoch": 0.03, "grad_norm": 2.6677186489105225, "learning_rate": 1.989752348420154e-05, "loss": 0.6969, "step": 1165 }, { "epoch": 0.03, "grad_norm": 3.564049482345581, "learning_rate": 1.9914602903501282e-05, "loss": 0.8087, "step": 1166 }, { "epoch": 0.03, "grad_norm": 2.7859885692596436, "learning_rate": 1.9931682322801026e-05, "loss": 0.7995, "step": 1167 }, { "epoch": 0.03, "grad_norm": 2.166673421859741, "learning_rate": 1.9948761742100772e-05, "loss": 0.8207, "step": 1168 }, { "epoch": 0.03, "grad_norm": 3.1860055923461914, "learning_rate": 1.9965841161400515e-05, "loss": 0.8714, "step": 1169 }, { "epoch": 0.03, "grad_norm": 2.8002853393554688, "learning_rate": 1.998292058070026e-05, "loss": 0.9132, "step": 1170 }, { "epoch": 0.03, "grad_norm": 2.488776206970215, "learning_rate": 2e-05, "loss": 0.8249, "step": 1171 }, { "epoch": 0.03, "grad_norm": 2.9376888275146484, "learning_rate": 1.9999999965543142e-05, "loss": 0.6398, "step": 1172 }, { "epoch": 0.03, "grad_norm": 2.9233317375183105, "learning_rate": 1.999999986217257e-05, "loss": 0.939, "step": 1173 }, { "epoch": 0.03, "grad_norm": 2.1614420413970947, "learning_rate": 1.9999999689888273e-05, "loss": 0.8274, "step": 1174 }, { "epoch": 0.03, "grad_norm": 3.2883598804473877, "learning_rate": 1.9999999448690265e-05, "loss": 0.7674, "step": 1175 }, { "epoch": 0.03, "grad_norm": 2.451068162918091, "learning_rate": 1.9999999138578546e-05, "loss": 0.8453, "step": 1176 }, { "epoch": 0.03, "grad_norm": 3.1402997970581055, "learning_rate": 1.9999998759553115e-05, "loss": 0.7796, "step": 1177 }, { "epoch": 0.03, "grad_norm": 2.9160194396972656, "learning_rate": 1.999999831161397e-05, "loss": 0.782, "step": 1178 }, { "epoch": 0.03, "grad_norm": 2.2930099964141846, "learning_rate": 1.9999997794761125e-05, "loss": 0.7999, "step": 1179 }, { "epoch": 0.03, "grad_norm": 4.547730922698975, "learning_rate": 1.9999997208994575e-05, "loss": 0.9613, "step": 1180 }, { "epoch": 0.03, "grad_norm": 4.298677921295166, "learning_rate": 1.9999996554314326e-05, "loss": 0.6734, "step": 1181 }, { "epoch": 0.03, "grad_norm": 2.9612481594085693, "learning_rate": 1.9999995830720388e-05, "loss": 0.7984, "step": 1182 }, { "epoch": 0.03, "grad_norm": 2.6363348960876465, "learning_rate": 1.9999995038212758e-05, "loss": 0.7458, "step": 1183 }, { "epoch": 0.03, "grad_norm": 2.103581666946411, "learning_rate": 1.9999994176791445e-05, "loss": 0.7133, "step": 1184 }, { "epoch": 0.03, "grad_norm": 2.19177508354187, "learning_rate": 1.9999993246456456e-05, "loss": 0.8185, "step": 1185 }, { "epoch": 0.03, "grad_norm": 1.9358954429626465, "learning_rate": 1.9999992247207792e-05, "loss": 0.8107, "step": 1186 }, { "epoch": 0.03, "grad_norm": 2.0016486644744873, "learning_rate": 1.999999117904547e-05, "loss": 0.7, "step": 1187 }, { "epoch": 0.03, "grad_norm": 2.5664687156677246, "learning_rate": 1.9999990041969487e-05, "loss": 0.7479, "step": 1188 }, { "epoch": 0.03, "grad_norm": 2.283181667327881, "learning_rate": 1.9999988835979857e-05, "loss": 0.9063, "step": 1189 }, { "epoch": 0.03, "grad_norm": 2.7739460468292236, "learning_rate": 1.9999987561076585e-05, "loss": 0.8887, "step": 1190 }, { "epoch": 0.03, "grad_norm": 4.971985816955566, "learning_rate": 1.9999986217259685e-05, "loss": 0.8562, "step": 1191 }, { "epoch": 0.03, "grad_norm": 3.444392442703247, "learning_rate": 1.9999984804529157e-05, "loss": 0.8594, "step": 1192 }, { "epoch": 0.03, "grad_norm": 2.2426376342773438, "learning_rate": 1.999998332288502e-05, "loss": 0.8474, "step": 1193 }, { "epoch": 0.03, "grad_norm": 2.128126382827759, "learning_rate": 1.999998177232728e-05, "loss": 0.7286, "step": 1194 }, { "epoch": 0.03, "grad_norm": 2.4587748050689697, "learning_rate": 1.999998015285595e-05, "loss": 0.822, "step": 1195 }, { "epoch": 0.03, "grad_norm": 4.472164630889893, "learning_rate": 1.9999978464471036e-05, "loss": 0.83, "step": 1196 }, { "epoch": 0.03, "grad_norm": 2.850472927093506, "learning_rate": 1.9999976707172554e-05, "loss": 0.6579, "step": 1197 }, { "epoch": 0.03, "grad_norm": 2.3929808139801025, "learning_rate": 1.9999974880960517e-05, "loss": 0.7223, "step": 1198 }, { "epoch": 0.03, "grad_norm": 1.4750920534133911, "learning_rate": 1.9999972985834934e-05, "loss": 0.5791, "step": 1199 }, { "epoch": 0.03, "grad_norm": 2.1804816722869873, "learning_rate": 1.999997102179582e-05, "loss": 0.86, "step": 1200 }, { "epoch": 0.03, "grad_norm": 3.090529680252075, "learning_rate": 1.999996898884319e-05, "loss": 0.8435, "step": 1201 }, { "epoch": 0.03, "grad_norm": 2.8055639266967773, "learning_rate": 1.9999966886977055e-05, "loss": 0.734, "step": 1202 }, { "epoch": 0.03, "grad_norm": 7.2935662269592285, "learning_rate": 1.999996471619743e-05, "loss": 0.9775, "step": 1203 }, { "epoch": 0.03, "grad_norm": 2.7535691261291504, "learning_rate": 1.999996247650433e-05, "loss": 0.8534, "step": 1204 }, { "epoch": 0.03, "grad_norm": 2.2137324810028076, "learning_rate": 1.9999960167897775e-05, "loss": 0.9023, "step": 1205 }, { "epoch": 0.03, "grad_norm": 2.773667335510254, "learning_rate": 1.9999957790377774e-05, "loss": 0.6993, "step": 1206 }, { "epoch": 0.03, "grad_norm": 2.520871639251709, "learning_rate": 1.9999955343944347e-05, "loss": 0.79, "step": 1207 }, { "epoch": 0.03, "grad_norm": 2.455678939819336, "learning_rate": 1.999995282859751e-05, "loss": 0.8943, "step": 1208 }, { "epoch": 0.03, "grad_norm": 3.929715871810913, "learning_rate": 1.9999950244337284e-05, "loss": 0.8094, "step": 1209 }, { "epoch": 0.03, "grad_norm": 1.762320637702942, "learning_rate": 1.9999947591163677e-05, "loss": 0.7845, "step": 1210 }, { "epoch": 0.03, "grad_norm": 3.2845282554626465, "learning_rate": 1.999994486907672e-05, "loss": 0.8849, "step": 1211 }, { "epoch": 0.03, "grad_norm": 2.0613510608673096, "learning_rate": 1.9999942078076424e-05, "loss": 0.7372, "step": 1212 }, { "epoch": 0.03, "grad_norm": 1.8258863687515259, "learning_rate": 1.9999939218162807e-05, "loss": 0.6571, "step": 1213 }, { "epoch": 0.03, "grad_norm": 3.5120372772216797, "learning_rate": 1.9999936289335894e-05, "loss": 0.6813, "step": 1214 }, { "epoch": 0.03, "grad_norm": 5.442984580993652, "learning_rate": 1.9999933291595705e-05, "loss": 0.9644, "step": 1215 }, { "epoch": 0.03, "grad_norm": 2.583207368850708, "learning_rate": 1.999993022494225e-05, "loss": 0.8855, "step": 1216 }, { "epoch": 0.03, "grad_norm": 3.6377134323120117, "learning_rate": 1.9999927089375564e-05, "loss": 0.8537, "step": 1217 }, { "epoch": 0.03, "grad_norm": 2.0719237327575684, "learning_rate": 1.9999923884895667e-05, "loss": 0.7086, "step": 1218 }, { "epoch": 0.03, "grad_norm": 2.0738186836242676, "learning_rate": 1.999992061150257e-05, "loss": 0.7755, "step": 1219 }, { "epoch": 0.03, "grad_norm": 2.9225311279296875, "learning_rate": 1.9999917269196308e-05, "loss": 0.8175, "step": 1220 }, { "epoch": 0.03, "grad_norm": 3.6789026260375977, "learning_rate": 1.9999913857976894e-05, "loss": 0.904, "step": 1221 }, { "epoch": 0.03, "grad_norm": 2.9698801040649414, "learning_rate": 1.999991037784436e-05, "loss": 0.6736, "step": 1222 }, { "epoch": 0.03, "grad_norm": 2.8396804332733154, "learning_rate": 1.9999906828798724e-05, "loss": 0.9808, "step": 1223 }, { "epoch": 0.03, "grad_norm": 2.6825647354125977, "learning_rate": 1.9999903210840015e-05, "loss": 0.7159, "step": 1224 }, { "epoch": 0.03, "grad_norm": 2.6640985012054443, "learning_rate": 1.9999899523968254e-05, "loss": 0.8092, "step": 1225 }, { "epoch": 0.03, "grad_norm": 3.3977575302124023, "learning_rate": 1.9999895768183468e-05, "loss": 0.7807, "step": 1226 }, { "epoch": 0.03, "grad_norm": 2.6723990440368652, "learning_rate": 1.9999891943485683e-05, "loss": 0.6816, "step": 1227 }, { "epoch": 0.03, "grad_norm": 3.2725603580474854, "learning_rate": 1.9999888049874928e-05, "loss": 0.7156, "step": 1228 }, { "epoch": 0.03, "grad_norm": 2.5029056072235107, "learning_rate": 1.9999884087351226e-05, "loss": 0.7496, "step": 1229 }, { "epoch": 0.03, "grad_norm": 2.613997220993042, "learning_rate": 1.9999880055914603e-05, "loss": 0.8079, "step": 1230 }, { "epoch": 0.03, "grad_norm": 3.0414884090423584, "learning_rate": 1.9999875955565088e-05, "loss": 0.804, "step": 1231 }, { "epoch": 0.03, "grad_norm": 2.091546058654785, "learning_rate": 1.9999871786302713e-05, "loss": 0.7553, "step": 1232 }, { "epoch": 0.03, "grad_norm": 2.230516195297241, "learning_rate": 1.9999867548127507e-05, "loss": 0.8132, "step": 1233 }, { "epoch": 0.03, "grad_norm": 5.672163486480713, "learning_rate": 1.999986324103949e-05, "loss": 0.9205, "step": 1234 }, { "epoch": 0.03, "grad_norm": 3.7676477432250977, "learning_rate": 1.9999858865038703e-05, "loss": 0.7557, "step": 1235 }, { "epoch": 0.03, "grad_norm": 1.7673674821853638, "learning_rate": 1.999985442012517e-05, "loss": 0.7775, "step": 1236 }, { "epoch": 0.03, "grad_norm": 3.5325446128845215, "learning_rate": 1.999984990629892e-05, "loss": 0.824, "step": 1237 }, { "epoch": 0.03, "grad_norm": 2.8470873832702637, "learning_rate": 1.999984532355999e-05, "loss": 0.8675, "step": 1238 }, { "epoch": 0.03, "grad_norm": 3.0087647438049316, "learning_rate": 1.999984067190841e-05, "loss": 0.8743, "step": 1239 }, { "epoch": 0.03, "grad_norm": 2.019643545150757, "learning_rate": 1.9999835951344208e-05, "loss": 0.7905, "step": 1240 }, { "epoch": 0.03, "grad_norm": 3.6492996215820312, "learning_rate": 1.999983116186742e-05, "loss": 0.8341, "step": 1241 }, { "epoch": 0.03, "grad_norm": 2.7598445415496826, "learning_rate": 1.9999826303478077e-05, "loss": 0.7306, "step": 1242 }, { "epoch": 0.03, "grad_norm": 2.000840663909912, "learning_rate": 1.9999821376176218e-05, "loss": 0.8106, "step": 1243 }, { "epoch": 0.03, "grad_norm": 3.124112367630005, "learning_rate": 1.999981637996187e-05, "loss": 0.7735, "step": 1244 }, { "epoch": 0.03, "grad_norm": 2.056258201599121, "learning_rate": 1.9999811314835063e-05, "loss": 0.7009, "step": 1245 }, { "epoch": 0.03, "grad_norm": 1.7200430631637573, "learning_rate": 1.999980618079585e-05, "loss": 0.8052, "step": 1246 }, { "epoch": 0.03, "grad_norm": 1.9844621419906616, "learning_rate": 1.9999800977844248e-05, "loss": 0.7838, "step": 1247 }, { "epoch": 0.03, "grad_norm": 3.8848915100097656, "learning_rate": 1.9999795705980302e-05, "loss": 0.7197, "step": 1248 }, { "epoch": 0.03, "grad_norm": 1.656175971031189, "learning_rate": 1.999979036520405e-05, "loss": 0.7576, "step": 1249 }, { "epoch": 0.03, "grad_norm": 1.914423942565918, "learning_rate": 1.999978495551552e-05, "loss": 0.6298, "step": 1250 }, { "epoch": 0.03, "grad_norm": 5.08666467666626, "learning_rate": 1.9999779476914756e-05, "loss": 0.942, "step": 1251 }, { "epoch": 0.03, "grad_norm": 2.964418888092041, "learning_rate": 1.9999773929401796e-05, "loss": 0.7424, "step": 1252 }, { "epoch": 0.03, "grad_norm": 4.737927436828613, "learning_rate": 1.9999768312976675e-05, "loss": 0.7601, "step": 1253 }, { "epoch": 0.03, "grad_norm": 2.6574881076812744, "learning_rate": 1.9999762627639434e-05, "loss": 0.9099, "step": 1254 }, { "epoch": 0.03, "grad_norm": 2.508768081665039, "learning_rate": 1.9999756873390113e-05, "loss": 0.9064, "step": 1255 }, { "epoch": 0.03, "grad_norm": 2.6557536125183105, "learning_rate": 1.9999751050228746e-05, "loss": 0.8568, "step": 1256 }, { "epoch": 0.03, "grad_norm": 4.711428642272949, "learning_rate": 1.999974515815538e-05, "loss": 0.849, "step": 1257 }, { "epoch": 0.03, "grad_norm": 3.9490087032318115, "learning_rate": 1.999973919717005e-05, "loss": 0.8858, "step": 1258 }, { "epoch": 0.03, "grad_norm": 2.097507953643799, "learning_rate": 1.9999733167272806e-05, "loss": 0.7201, "step": 1259 }, { "epoch": 0.03, "grad_norm": 2.783205986022949, "learning_rate": 1.999972706846368e-05, "loss": 0.7341, "step": 1260 }, { "epoch": 0.03, "grad_norm": 2.328115463256836, "learning_rate": 1.999972090074272e-05, "loss": 0.699, "step": 1261 }, { "epoch": 0.03, "grad_norm": 2.3484957218170166, "learning_rate": 1.999971466410996e-05, "loss": 0.7196, "step": 1262 }, { "epoch": 0.03, "grad_norm": 3.6506712436676025, "learning_rate": 1.999970835856545e-05, "loss": 0.8226, "step": 1263 }, { "epoch": 0.03, "grad_norm": 2.3228182792663574, "learning_rate": 1.9999701984109238e-05, "loss": 0.6686, "step": 1264 }, { "epoch": 0.03, "grad_norm": 2.896345376968384, "learning_rate": 1.999969554074136e-05, "loss": 0.7636, "step": 1265 }, { "epoch": 0.03, "grad_norm": 4.124942302703857, "learning_rate": 1.999968902846186e-05, "loss": 0.7984, "step": 1266 }, { "epoch": 0.03, "grad_norm": 2.1914238929748535, "learning_rate": 1.9999682447270786e-05, "loss": 0.5938, "step": 1267 }, { "epoch": 0.03, "grad_norm": 2.7585952281951904, "learning_rate": 1.9999675797168185e-05, "loss": 0.8916, "step": 1268 }, { "epoch": 0.03, "grad_norm": 6.11654806137085, "learning_rate": 1.9999669078154103e-05, "loss": 0.6081, "step": 1269 }, { "epoch": 0.03, "grad_norm": 5.9160475730896, "learning_rate": 1.999966229022858e-05, "loss": 0.9237, "step": 1270 }, { "epoch": 0.03, "grad_norm": 2.6680095195770264, "learning_rate": 1.9999655433391666e-05, "loss": 0.7336, "step": 1271 }, { "epoch": 0.03, "grad_norm": 2.4768929481506348, "learning_rate": 1.9999648507643412e-05, "loss": 0.7989, "step": 1272 }, { "epoch": 0.03, "grad_norm": 2.7827188968658447, "learning_rate": 1.9999641512983864e-05, "loss": 0.7446, "step": 1273 }, { "epoch": 0.03, "grad_norm": 3.4116454124450684, "learning_rate": 1.9999634449413066e-05, "loss": 1.0556, "step": 1274 }, { "epoch": 0.03, "grad_norm": 3.8413121700286865, "learning_rate": 1.9999627316931073e-05, "loss": 0.7013, "step": 1275 }, { "epoch": 0.03, "grad_norm": 1.94118332862854, "learning_rate": 1.999962011553793e-05, "loss": 0.7841, "step": 1276 }, { "epoch": 0.03, "grad_norm": 2.569218635559082, "learning_rate": 1.9999612845233683e-05, "loss": 0.8919, "step": 1277 }, { "epoch": 0.03, "grad_norm": 2.5996572971343994, "learning_rate": 1.9999605506018392e-05, "loss": 0.7528, "step": 1278 }, { "epoch": 0.03, "grad_norm": 2.0718722343444824, "learning_rate": 1.99995980978921e-05, "loss": 0.8119, "step": 1279 }, { "epoch": 0.03, "grad_norm": 2.658686876296997, "learning_rate": 1.9999590620854862e-05, "loss": 0.8955, "step": 1280 }, { "epoch": 0.03, "grad_norm": 1.9748315811157227, "learning_rate": 1.9999583074906726e-05, "loss": 0.7917, "step": 1281 }, { "epoch": 0.03, "grad_norm": 5.351077079772949, "learning_rate": 1.9999575460047747e-05, "loss": 0.8716, "step": 1282 }, { "epoch": 0.03, "grad_norm": 2.7169394493103027, "learning_rate": 1.9999567776277975e-05, "loss": 0.719, "step": 1283 }, { "epoch": 0.03, "grad_norm": 3.1796622276306152, "learning_rate": 1.9999560023597465e-05, "loss": 0.7371, "step": 1284 }, { "epoch": 0.03, "grad_norm": 1.8803632259368896, "learning_rate": 1.999955220200627e-05, "loss": 0.7226, "step": 1285 }, { "epoch": 0.03, "grad_norm": 1.9650315046310425, "learning_rate": 1.999954431150444e-05, "loss": 0.7671, "step": 1286 }, { "epoch": 0.03, "grad_norm": 9.564555168151855, "learning_rate": 1.999953635209204e-05, "loss": 0.8529, "step": 1287 }, { "epoch": 0.03, "grad_norm": 2.5883283615112305, "learning_rate": 1.9999528323769112e-05, "loss": 0.5699, "step": 1288 }, { "epoch": 0.03, "grad_norm": 2.672804832458496, "learning_rate": 1.999952022653572e-05, "loss": 0.7839, "step": 1289 }, { "epoch": 0.03, "grad_norm": 2.7538421154022217, "learning_rate": 1.9999512060391914e-05, "loss": 0.9386, "step": 1290 }, { "epoch": 0.03, "grad_norm": 2.3031113147735596, "learning_rate": 1.9999503825337756e-05, "loss": 0.6792, "step": 1291 }, { "epoch": 0.03, "grad_norm": 2.2227702140808105, "learning_rate": 1.99994955213733e-05, "loss": 0.6964, "step": 1292 }, { "epoch": 0.03, "grad_norm": 2.7923266887664795, "learning_rate": 1.9999487148498603e-05, "loss": 0.848, "step": 1293 }, { "epoch": 0.03, "grad_norm": 2.2238643169403076, "learning_rate": 1.999947870671372e-05, "loss": 0.7774, "step": 1294 }, { "epoch": 0.03, "grad_norm": 3.1836190223693848, "learning_rate": 1.9999470196018716e-05, "loss": 0.8488, "step": 1295 }, { "epoch": 0.03, "grad_norm": 2.6909525394439697, "learning_rate": 1.9999461616413645e-05, "loss": 0.8174, "step": 1296 }, { "epoch": 0.03, "grad_norm": 4.916670322418213, "learning_rate": 1.999945296789856e-05, "loss": 0.9071, "step": 1297 }, { "epoch": 0.03, "grad_norm": 3.1591315269470215, "learning_rate": 1.9999444250473535e-05, "loss": 0.9193, "step": 1298 }, { "epoch": 0.03, "grad_norm": 2.1953933238983154, "learning_rate": 1.9999435464138625e-05, "loss": 0.7098, "step": 1299 }, { "epoch": 0.03, "grad_norm": 2.5683348178863525, "learning_rate": 1.999942660889388e-05, "loss": 0.8219, "step": 1300 }, { "epoch": 0.03, "grad_norm": 2.2956411838531494, "learning_rate": 1.999941768473937e-05, "loss": 0.8311, "step": 1301 }, { "epoch": 0.03, "grad_norm": 1.9394632577896118, "learning_rate": 1.999940869167516e-05, "loss": 0.8423, "step": 1302 }, { "epoch": 0.03, "grad_norm": 3.214689254760742, "learning_rate": 1.999939962970131e-05, "loss": 0.8989, "step": 1303 }, { "epoch": 0.03, "grad_norm": 2.3661789894104004, "learning_rate": 1.999939049881787e-05, "loss": 0.7739, "step": 1304 }, { "epoch": 0.03, "grad_norm": 2.3678910732269287, "learning_rate": 1.9999381299024923e-05, "loss": 0.6992, "step": 1305 }, { "epoch": 0.03, "grad_norm": 2.993074893951416, "learning_rate": 1.9999372030322516e-05, "loss": 0.7045, "step": 1306 }, { "epoch": 0.03, "grad_norm": 2.5204906463623047, "learning_rate": 1.9999362692710723e-05, "loss": 0.6966, "step": 1307 }, { "epoch": 0.03, "grad_norm": 3.040278673171997, "learning_rate": 1.99993532861896e-05, "loss": 0.8511, "step": 1308 }, { "epoch": 0.03, "grad_norm": 2.4543824195861816, "learning_rate": 1.999934381075922e-05, "loss": 0.7553, "step": 1309 }, { "epoch": 0.03, "grad_norm": 2.765939712524414, "learning_rate": 1.9999334266419644e-05, "loss": 0.6843, "step": 1310 }, { "epoch": 0.03, "grad_norm": 1.5938842296600342, "learning_rate": 1.9999324653170937e-05, "loss": 0.7676, "step": 1311 }, { "epoch": 0.03, "grad_norm": 2.5774221420288086, "learning_rate": 1.9999314971013168e-05, "loss": 0.7651, "step": 1312 }, { "epoch": 0.03, "grad_norm": 3.571558952331543, "learning_rate": 1.99993052199464e-05, "loss": 0.7918, "step": 1313 }, { "epoch": 0.03, "grad_norm": 5.560577869415283, "learning_rate": 1.9999295399970706e-05, "loss": 0.9296, "step": 1314 }, { "epoch": 0.03, "grad_norm": 3.140331745147705, "learning_rate": 1.999928551108615e-05, "loss": 0.683, "step": 1315 }, { "epoch": 0.03, "grad_norm": 2.1369009017944336, "learning_rate": 1.9999275553292802e-05, "loss": 0.7517, "step": 1316 }, { "epoch": 0.03, "grad_norm": 2.7297203540802, "learning_rate": 1.9999265526590725e-05, "loss": 0.8998, "step": 1317 }, { "epoch": 0.03, "grad_norm": 2.8986425399780273, "learning_rate": 1.9999255430979993e-05, "loss": 0.8225, "step": 1318 }, { "epoch": 0.03, "grad_norm": 3.0624499320983887, "learning_rate": 1.9999245266460675e-05, "loss": 0.9595, "step": 1319 }, { "epoch": 0.03, "grad_norm": 2.083866596221924, "learning_rate": 1.999923503303284e-05, "loss": 0.9677, "step": 1320 }, { "epoch": 0.03, "grad_norm": 1.7175583839416504, "learning_rate": 1.9999224730696562e-05, "loss": 0.718, "step": 1321 }, { "epoch": 0.03, "grad_norm": 5.304843425750732, "learning_rate": 1.9999214359451907e-05, "loss": 0.7917, "step": 1322 }, { "epoch": 0.03, "grad_norm": 3.265045642852783, "learning_rate": 1.999920391929895e-05, "loss": 0.9817, "step": 1323 }, { "epoch": 0.03, "grad_norm": 2.623035430908203, "learning_rate": 1.9999193410237762e-05, "loss": 0.8007, "step": 1324 }, { "epoch": 0.03, "grad_norm": 2.348212718963623, "learning_rate": 1.9999182832268415e-05, "loss": 0.7352, "step": 1325 }, { "epoch": 0.03, "grad_norm": 2.4670214653015137, "learning_rate": 1.9999172185390982e-05, "loss": 0.7478, "step": 1326 }, { "epoch": 0.03, "grad_norm": 1.7439966201782227, "learning_rate": 1.9999161469605535e-05, "loss": 0.8046, "step": 1327 }, { "epoch": 0.03, "grad_norm": 2.7468175888061523, "learning_rate": 1.999915068491215e-05, "loss": 0.7608, "step": 1328 }, { "epoch": 0.03, "grad_norm": 2.480069637298584, "learning_rate": 1.9999139831310897e-05, "loss": 0.6447, "step": 1329 }, { "epoch": 0.03, "grad_norm": 2.613889217376709, "learning_rate": 1.999912890880186e-05, "loss": 0.6865, "step": 1330 }, { "epoch": 0.03, "grad_norm": 2.2111847400665283, "learning_rate": 1.9999117917385108e-05, "loss": 0.7837, "step": 1331 }, { "epoch": 0.03, "grad_norm": 3.1227500438690186, "learning_rate": 1.9999106857060716e-05, "loss": 0.8242, "step": 1332 }, { "epoch": 0.03, "grad_norm": 2.168473243713379, "learning_rate": 1.999909572782876e-05, "loss": 0.8111, "step": 1333 }, { "epoch": 0.03, "grad_norm": 2.798457622528076, "learning_rate": 1.999908452968932e-05, "loss": 0.7554, "step": 1334 }, { "epoch": 0.03, "grad_norm": 2.022883176803589, "learning_rate": 1.9999073262642473e-05, "loss": 0.7723, "step": 1335 }, { "epoch": 0.03, "grad_norm": 2.401785135269165, "learning_rate": 1.999906192668829e-05, "loss": 0.8499, "step": 1336 }, { "epoch": 0.03, "grad_norm": 4.060828685760498, "learning_rate": 1.999905052182686e-05, "loss": 0.8339, "step": 1337 }, { "epoch": 0.03, "grad_norm": 3.17158842086792, "learning_rate": 1.9999039048058253e-05, "loss": 0.9319, "step": 1338 }, { "epoch": 0.03, "grad_norm": 2.066091537475586, "learning_rate": 1.999902750538255e-05, "loss": 0.686, "step": 1339 }, { "epoch": 0.03, "grad_norm": 2.053875207901001, "learning_rate": 1.9999015893799832e-05, "loss": 0.7273, "step": 1340 }, { "epoch": 0.03, "grad_norm": 4.56614351272583, "learning_rate": 1.999900421331018e-05, "loss": 0.9374, "step": 1341 }, { "epoch": 0.03, "grad_norm": 2.503169059753418, "learning_rate": 1.999899246391367e-05, "loss": 0.8391, "step": 1342 }, { "epoch": 0.03, "grad_norm": 3.4926400184631348, "learning_rate": 1.999898064561039e-05, "loss": 0.8833, "step": 1343 }, { "epoch": 0.03, "grad_norm": 3.0955755710601807, "learning_rate": 1.9998968758400418e-05, "loss": 0.7359, "step": 1344 }, { "epoch": 0.03, "grad_norm": 2.438472032546997, "learning_rate": 1.999895680228383e-05, "loss": 0.8078, "step": 1345 }, { "epoch": 0.03, "grad_norm": 3.3515419960021973, "learning_rate": 1.9998944777260715e-05, "loss": 0.7626, "step": 1346 }, { "epoch": 0.03, "grad_norm": 2.1857638359069824, "learning_rate": 1.999893268333116e-05, "loss": 0.7064, "step": 1347 }, { "epoch": 0.03, "grad_norm": 2.3722290992736816, "learning_rate": 1.9998920520495238e-05, "loss": 0.6149, "step": 1348 }, { "epoch": 0.03, "grad_norm": 2.893453359603882, "learning_rate": 1.9998908288753038e-05, "loss": 0.9896, "step": 1349 }, { "epoch": 0.03, "grad_norm": 1.856703519821167, "learning_rate": 1.9998895988104644e-05, "loss": 0.6896, "step": 1350 }, { "epoch": 0.03, "grad_norm": 1.91659677028656, "learning_rate": 1.999888361855014e-05, "loss": 0.7255, "step": 1351 }, { "epoch": 0.03, "grad_norm": 2.455984354019165, "learning_rate": 1.9998871180089616e-05, "loss": 0.7629, "step": 1352 }, { "epoch": 0.03, "grad_norm": 2.553988218307495, "learning_rate": 1.999885867272315e-05, "loss": 0.8739, "step": 1353 }, { "epoch": 0.03, "grad_norm": 4.19890022277832, "learning_rate": 1.9998846096450833e-05, "loss": 0.8836, "step": 1354 }, { "epoch": 0.03, "grad_norm": 1.8930350542068481, "learning_rate": 1.999883345127275e-05, "loss": 0.7435, "step": 1355 }, { "epoch": 0.03, "grad_norm": 1.7815955877304077, "learning_rate": 1.999882073718899e-05, "loss": 0.7095, "step": 1356 }, { "epoch": 0.03, "grad_norm": 3.059129476547241, "learning_rate": 1.999880795419964e-05, "loss": 0.7732, "step": 1357 }, { "epoch": 0.03, "grad_norm": 3.519099712371826, "learning_rate": 1.9998795102304784e-05, "loss": 0.8597, "step": 1358 }, { "epoch": 0.03, "grad_norm": 1.5839701890945435, "learning_rate": 1.9998782181504517e-05, "loss": 0.8357, "step": 1359 }, { "epoch": 0.03, "grad_norm": 3.231685161590576, "learning_rate": 1.9998769191798923e-05, "loss": 0.7665, "step": 1360 }, { "epoch": 0.03, "grad_norm": 2.377455711364746, "learning_rate": 1.9998756133188096e-05, "loss": 0.8463, "step": 1361 }, { "epoch": 0.03, "grad_norm": 2.862420082092285, "learning_rate": 1.9998743005672122e-05, "loss": 0.7801, "step": 1362 }, { "epoch": 0.03, "grad_norm": 2.523491382598877, "learning_rate": 1.999872980925109e-05, "loss": 0.7967, "step": 1363 }, { "epoch": 0.03, "grad_norm": 2.216400623321533, "learning_rate": 1.99987165439251e-05, "loss": 0.7165, "step": 1364 }, { "epoch": 0.03, "grad_norm": 3.0812745094299316, "learning_rate": 1.9998703209694238e-05, "loss": 0.9018, "step": 1365 }, { "epoch": 0.04, "grad_norm": 2.6911346912384033, "learning_rate": 1.9998689806558588e-05, "loss": 0.7846, "step": 1366 }, { "epoch": 0.04, "grad_norm": 3.1039204597473145, "learning_rate": 1.9998676334518254e-05, "loss": 0.7674, "step": 1367 }, { "epoch": 0.04, "grad_norm": 1.5332412719726562, "learning_rate": 1.9998662793573325e-05, "loss": 0.7278, "step": 1368 }, { "epoch": 0.04, "grad_norm": 2.8774964809417725, "learning_rate": 1.9998649183723892e-05, "loss": 0.731, "step": 1369 }, { "epoch": 0.04, "grad_norm": 4.096654891967773, "learning_rate": 1.9998635504970053e-05, "loss": 0.8166, "step": 1370 }, { "epoch": 0.04, "grad_norm": 2.5106725692749023, "learning_rate": 1.9998621757311896e-05, "loss": 0.8257, "step": 1371 }, { "epoch": 0.04, "grad_norm": 2.3430938720703125, "learning_rate": 1.9998607940749524e-05, "loss": 0.7585, "step": 1372 }, { "epoch": 0.04, "grad_norm": 2.2131125926971436, "learning_rate": 1.9998594055283024e-05, "loss": 0.7931, "step": 1373 }, { "epoch": 0.04, "grad_norm": 2.9664883613586426, "learning_rate": 1.99985801009125e-05, "loss": 0.6755, "step": 1374 }, { "epoch": 0.04, "grad_norm": 2.2242074012756348, "learning_rate": 1.999856607763804e-05, "loss": 0.6823, "step": 1375 }, { "epoch": 0.04, "grad_norm": 2.8084967136383057, "learning_rate": 1.9998551985459745e-05, "loss": 0.6144, "step": 1376 }, { "epoch": 0.04, "grad_norm": 2.8517510890960693, "learning_rate": 1.999853782437771e-05, "loss": 0.7892, "step": 1377 }, { "epoch": 0.04, "grad_norm": 1.602195382118225, "learning_rate": 1.9998523594392036e-05, "loss": 0.6298, "step": 1378 }, { "epoch": 0.04, "grad_norm": 2.956799030303955, "learning_rate": 1.9998509295502822e-05, "loss": 0.9049, "step": 1379 }, { "epoch": 0.04, "grad_norm": 1.966255784034729, "learning_rate": 1.999849492771016e-05, "loss": 0.7597, "step": 1380 }, { "epoch": 0.04, "grad_norm": 1.8306150436401367, "learning_rate": 1.999848049101415e-05, "loss": 0.8671, "step": 1381 }, { "epoch": 0.04, "grad_norm": 2.2734105587005615, "learning_rate": 1.9998465985414898e-05, "loss": 0.8524, "step": 1382 }, { "epoch": 0.04, "grad_norm": 3.1078293323516846, "learning_rate": 1.99984514109125e-05, "loss": 0.7511, "step": 1383 }, { "epoch": 0.04, "grad_norm": 2.106123447418213, "learning_rate": 1.9998436767507057e-05, "loss": 0.7263, "step": 1384 }, { "epoch": 0.04, "grad_norm": 4.739280700683594, "learning_rate": 1.999842205519867e-05, "loss": 0.7713, "step": 1385 }, { "epoch": 0.04, "grad_norm": 2.17899227142334, "learning_rate": 1.9998407273987434e-05, "loss": 0.591, "step": 1386 }, { "epoch": 0.04, "grad_norm": 2.4079389572143555, "learning_rate": 1.999839242387346e-05, "loss": 0.7992, "step": 1387 }, { "epoch": 0.04, "grad_norm": 3.0217583179473877, "learning_rate": 1.999837750485685e-05, "loss": 0.7233, "step": 1388 }, { "epoch": 0.04, "grad_norm": 2.6735880374908447, "learning_rate": 1.99983625169377e-05, "loss": 0.8736, "step": 1389 }, { "epoch": 0.04, "grad_norm": 2.169785737991333, "learning_rate": 1.999834746011612e-05, "loss": 0.725, "step": 1390 }, { "epoch": 0.04, "grad_norm": 2.3354523181915283, "learning_rate": 1.9998332334392208e-05, "loss": 0.7641, "step": 1391 }, { "epoch": 0.04, "grad_norm": 3.6109354496002197, "learning_rate": 1.9998317139766072e-05, "loss": 0.6479, "step": 1392 }, { "epoch": 0.04, "grad_norm": 2.6534035205841064, "learning_rate": 1.9998301876237817e-05, "loss": 0.6918, "step": 1393 }, { "epoch": 0.04, "grad_norm": 2.7540504932403564, "learning_rate": 1.9998286543807546e-05, "loss": 0.7526, "step": 1394 }, { "epoch": 0.04, "grad_norm": 1.8227596282958984, "learning_rate": 1.999827114247537e-05, "loss": 0.628, "step": 1395 }, { "epoch": 0.04, "grad_norm": 1.9279747009277344, "learning_rate": 1.9998255672241385e-05, "loss": 0.7862, "step": 1396 }, { "epoch": 0.04, "grad_norm": 1.92811119556427, "learning_rate": 1.9998240133105705e-05, "loss": 0.5433, "step": 1397 }, { "epoch": 0.04, "grad_norm": 1.9699126482009888, "learning_rate": 1.9998224525068436e-05, "loss": 0.8541, "step": 1398 }, { "epoch": 0.04, "grad_norm": 2.470296859741211, "learning_rate": 1.9998208848129685e-05, "loss": 0.8839, "step": 1399 }, { "epoch": 0.04, "grad_norm": 3.233259439468384, "learning_rate": 1.999819310228956e-05, "loss": 0.9061, "step": 1400 }, { "epoch": 0.04, "grad_norm": 4.118078231811523, "learning_rate": 1.9998177287548168e-05, "loss": 0.9086, "step": 1401 }, { "epoch": 0.04, "grad_norm": 1.9154999256134033, "learning_rate": 1.9998161403905622e-05, "loss": 0.6861, "step": 1402 }, { "epoch": 0.04, "grad_norm": 3.2467613220214844, "learning_rate": 1.999814545136203e-05, "loss": 0.7887, "step": 1403 }, { "epoch": 0.04, "grad_norm": 2.6875898838043213, "learning_rate": 1.99981294299175e-05, "loss": 0.8134, "step": 1404 }, { "epoch": 0.04, "grad_norm": 2.7995357513427734, "learning_rate": 1.9998113339572144e-05, "loss": 0.7872, "step": 1405 }, { "epoch": 0.04, "grad_norm": 2.8102493286132812, "learning_rate": 1.9998097180326067e-05, "loss": 0.7416, "step": 1406 }, { "epoch": 0.04, "grad_norm": 1.6881604194641113, "learning_rate": 1.9998080952179392e-05, "loss": 0.6215, "step": 1407 }, { "epoch": 0.04, "grad_norm": 1.8793256282806396, "learning_rate": 1.999806465513222e-05, "loss": 0.7485, "step": 1408 }, { "epoch": 0.04, "grad_norm": 2.9559059143066406, "learning_rate": 1.999804828918467e-05, "loss": 0.7155, "step": 1409 }, { "epoch": 0.04, "grad_norm": 2.03121018409729, "learning_rate": 1.9998031854336854e-05, "loss": 0.9254, "step": 1410 }, { "epoch": 0.04, "grad_norm": 2.0672433376312256, "learning_rate": 1.9998015350588883e-05, "loss": 0.5556, "step": 1411 }, { "epoch": 0.04, "grad_norm": 2.1428186893463135, "learning_rate": 1.999799877794087e-05, "loss": 0.7467, "step": 1412 }, { "epoch": 0.04, "grad_norm": 2.968665838241577, "learning_rate": 1.9997982136392933e-05, "loss": 0.8123, "step": 1413 }, { "epoch": 0.04, "grad_norm": 1.6940042972564697, "learning_rate": 1.9997965425945183e-05, "loss": 0.867, "step": 1414 }, { "epoch": 0.04, "grad_norm": 3.047807455062866, "learning_rate": 1.9997948646597737e-05, "loss": 0.8273, "step": 1415 }, { "epoch": 0.04, "grad_norm": 3.1892006397247314, "learning_rate": 1.999793179835071e-05, "loss": 0.817, "step": 1416 }, { "epoch": 0.04, "grad_norm": 2.604928970336914, "learning_rate": 1.9997914881204217e-05, "loss": 0.9002, "step": 1417 }, { "epoch": 0.04, "grad_norm": 2.0877344608306885, "learning_rate": 1.999789789515838e-05, "loss": 0.7724, "step": 1418 }, { "epoch": 0.04, "grad_norm": 2.283512592315674, "learning_rate": 1.9997880840213308e-05, "loss": 0.8832, "step": 1419 }, { "epoch": 0.04, "grad_norm": 2.1860437393188477, "learning_rate": 1.9997863716369122e-05, "loss": 0.7779, "step": 1420 }, { "epoch": 0.04, "grad_norm": 2.196051597595215, "learning_rate": 1.9997846523625942e-05, "loss": 0.8254, "step": 1421 }, { "epoch": 0.04, "grad_norm": 2.6630570888519287, "learning_rate": 1.999782926198389e-05, "loss": 0.8168, "step": 1422 }, { "epoch": 0.04, "grad_norm": 2.636960506439209, "learning_rate": 1.9997811931443074e-05, "loss": 0.7013, "step": 1423 }, { "epoch": 0.04, "grad_norm": 2.7567265033721924, "learning_rate": 1.999779453200362e-05, "loss": 0.878, "step": 1424 }, { "epoch": 0.04, "grad_norm": 2.2208786010742188, "learning_rate": 1.999777706366565e-05, "loss": 0.807, "step": 1425 }, { "epoch": 0.04, "grad_norm": 3.942502021789551, "learning_rate": 1.999775952642928e-05, "loss": 0.8755, "step": 1426 }, { "epoch": 0.04, "grad_norm": 2.1400864124298096, "learning_rate": 1.999774192029463e-05, "loss": 0.6934, "step": 1427 }, { "epoch": 0.04, "grad_norm": 2.122835397720337, "learning_rate": 1.9997724245261828e-05, "loss": 0.9304, "step": 1428 }, { "epoch": 0.04, "grad_norm": 3.6582822799682617, "learning_rate": 1.9997706501330988e-05, "loss": 0.808, "step": 1429 }, { "epoch": 0.04, "grad_norm": 3.0788791179656982, "learning_rate": 1.9997688688502238e-05, "loss": 0.8589, "step": 1430 }, { "epoch": 0.04, "grad_norm": 4.054766654968262, "learning_rate": 1.99976708067757e-05, "loss": 0.805, "step": 1431 }, { "epoch": 0.04, "grad_norm": 2.1207597255706787, "learning_rate": 1.9997652856151496e-05, "loss": 0.7651, "step": 1432 }, { "epoch": 0.04, "grad_norm": 2.286280870437622, "learning_rate": 1.999763483662975e-05, "loss": 0.6335, "step": 1433 }, { "epoch": 0.04, "grad_norm": 2.3581156730651855, "learning_rate": 1.9997616748210583e-05, "loss": 0.7489, "step": 1434 }, { "epoch": 0.04, "grad_norm": 2.933107614517212, "learning_rate": 1.9997598590894122e-05, "loss": 0.8436, "step": 1435 }, { "epoch": 0.04, "grad_norm": 3.16357159614563, "learning_rate": 1.9997580364680496e-05, "loss": 0.711, "step": 1436 }, { "epoch": 0.04, "grad_norm": 2.8622748851776123, "learning_rate": 1.9997562069569824e-05, "loss": 0.7264, "step": 1437 }, { "epoch": 0.04, "grad_norm": 3.08320951461792, "learning_rate": 1.9997543705562237e-05, "loss": 0.6359, "step": 1438 }, { "epoch": 0.04, "grad_norm": 1.5015310049057007, "learning_rate": 1.9997525272657857e-05, "loss": 0.766, "step": 1439 }, { "epoch": 0.04, "grad_norm": 2.8729050159454346, "learning_rate": 1.999750677085682e-05, "loss": 0.7501, "step": 1440 }, { "epoch": 0.04, "grad_norm": 1.8008018732070923, "learning_rate": 1.9997488200159242e-05, "loss": 0.6587, "step": 1441 }, { "epoch": 0.04, "grad_norm": 2.688471794128418, "learning_rate": 1.999746956056526e-05, "loss": 0.7023, "step": 1442 }, { "epoch": 0.04, "grad_norm": 5.993284702301025, "learning_rate": 1.9997450852075e-05, "loss": 0.8358, "step": 1443 }, { "epoch": 0.04, "grad_norm": 3.4666531085968018, "learning_rate": 1.9997432074688584e-05, "loss": 0.7923, "step": 1444 }, { "epoch": 0.04, "grad_norm": 3.253229856491089, "learning_rate": 1.999741322840615e-05, "loss": 0.7218, "step": 1445 }, { "epoch": 0.04, "grad_norm": 2.0378317832946777, "learning_rate": 1.9997394313227823e-05, "loss": 0.7462, "step": 1446 }, { "epoch": 0.04, "grad_norm": 2.1220853328704834, "learning_rate": 1.9997375329153738e-05, "loss": 0.7729, "step": 1447 }, { "epoch": 0.04, "grad_norm": 1.911155104637146, "learning_rate": 1.9997356276184023e-05, "loss": 0.7347, "step": 1448 }, { "epoch": 0.04, "grad_norm": 2.8633925914764404, "learning_rate": 1.999733715431881e-05, "loss": 0.7793, "step": 1449 }, { "epoch": 0.04, "grad_norm": 2.4262428283691406, "learning_rate": 1.999731796355823e-05, "loss": 0.8117, "step": 1450 }, { "epoch": 0.04, "grad_norm": 1.8687645196914673, "learning_rate": 1.999729870390241e-05, "loss": 0.6849, "step": 1451 }, { "epoch": 0.04, "grad_norm": 2.969918966293335, "learning_rate": 1.99972793753515e-05, "loss": 0.6569, "step": 1452 }, { "epoch": 0.04, "grad_norm": 4.043516635894775, "learning_rate": 1.9997259977905613e-05, "loss": 0.7796, "step": 1453 }, { "epoch": 0.04, "grad_norm": 2.006584405899048, "learning_rate": 1.999724051156489e-05, "loss": 0.6986, "step": 1454 }, { "epoch": 0.04, "grad_norm": 3.610280990600586, "learning_rate": 1.999722097632947e-05, "loss": 0.8305, "step": 1455 }, { "epoch": 0.04, "grad_norm": 2.407777786254883, "learning_rate": 1.9997201372199486e-05, "loss": 0.6925, "step": 1456 }, { "epoch": 0.04, "grad_norm": 2.0629384517669678, "learning_rate": 1.9997181699175066e-05, "loss": 0.7687, "step": 1457 }, { "epoch": 0.04, "grad_norm": 1.5981688499450684, "learning_rate": 1.999716195725635e-05, "loss": 0.7447, "step": 1458 }, { "epoch": 0.04, "grad_norm": 2.4092376232147217, "learning_rate": 1.999714214644348e-05, "loss": 0.8092, "step": 1459 }, { "epoch": 0.04, "grad_norm": 1.7515642642974854, "learning_rate": 1.9997122266736583e-05, "loss": 0.8286, "step": 1460 }, { "epoch": 0.04, "grad_norm": 2.351187229156494, "learning_rate": 1.99971023181358e-05, "loss": 0.5975, "step": 1461 }, { "epoch": 0.04, "grad_norm": 2.1391870975494385, "learning_rate": 1.999708230064127e-05, "loss": 0.761, "step": 1462 }, { "epoch": 0.04, "grad_norm": 2.364511489868164, "learning_rate": 1.999706221425313e-05, "loss": 0.7324, "step": 1463 }, { "epoch": 0.04, "grad_norm": 3.189462423324585, "learning_rate": 1.9997042058971518e-05, "loss": 0.9428, "step": 1464 }, { "epoch": 0.04, "grad_norm": 2.917609453201294, "learning_rate": 1.9997021834796573e-05, "loss": 0.9459, "step": 1465 }, { "epoch": 0.04, "grad_norm": 3.2275633811950684, "learning_rate": 1.9997001541728434e-05, "loss": 0.8228, "step": 1466 }, { "epoch": 0.04, "grad_norm": 2.381343126296997, "learning_rate": 1.9996981179767242e-05, "loss": 0.8446, "step": 1467 }, { "epoch": 0.04, "grad_norm": 2.5718166828155518, "learning_rate": 1.9996960748913134e-05, "loss": 0.6158, "step": 1468 }, { "epoch": 0.04, "grad_norm": 2.569814920425415, "learning_rate": 1.9996940249166255e-05, "loss": 0.7651, "step": 1469 }, { "epoch": 0.04, "grad_norm": 2.783658504486084, "learning_rate": 1.9996919680526745e-05, "loss": 0.6684, "step": 1470 }, { "epoch": 0.04, "grad_norm": 2.4483225345611572, "learning_rate": 1.999689904299474e-05, "loss": 0.7962, "step": 1471 }, { "epoch": 0.04, "grad_norm": 2.0372228622436523, "learning_rate": 1.9996878336570393e-05, "loss": 0.7232, "step": 1472 }, { "epoch": 0.04, "grad_norm": 2.668329954147339, "learning_rate": 1.999685756125384e-05, "loss": 0.7259, "step": 1473 }, { "epoch": 0.04, "grad_norm": 3.871582269668579, "learning_rate": 1.9996836717045225e-05, "loss": 0.7462, "step": 1474 }, { "epoch": 0.04, "grad_norm": 3.759896993637085, "learning_rate": 1.9996815803944692e-05, "loss": 0.8949, "step": 1475 }, { "epoch": 0.04, "grad_norm": 2.3431499004364014, "learning_rate": 1.9996794821952385e-05, "loss": 0.8674, "step": 1476 }, { "epoch": 0.04, "grad_norm": 2.399919033050537, "learning_rate": 1.999677377106845e-05, "loss": 0.7374, "step": 1477 }, { "epoch": 0.04, "grad_norm": 2.7263901233673096, "learning_rate": 1.999675265129303e-05, "loss": 0.7716, "step": 1478 }, { "epoch": 0.04, "grad_norm": 2.948776960372925, "learning_rate": 1.999673146262627e-05, "loss": 0.8938, "step": 1479 }, { "epoch": 0.04, "grad_norm": 2.7322463989257812, "learning_rate": 1.9996710205068318e-05, "loss": 0.6441, "step": 1480 }, { "epoch": 0.04, "grad_norm": 3.5274603366851807, "learning_rate": 1.9996688878619323e-05, "loss": 0.7021, "step": 1481 }, { "epoch": 0.04, "grad_norm": 1.7808719873428345, "learning_rate": 1.9996667483279423e-05, "loss": 0.6628, "step": 1482 }, { "epoch": 0.04, "grad_norm": 1.5852553844451904, "learning_rate": 1.9996646019048775e-05, "loss": 0.7776, "step": 1483 }, { "epoch": 0.04, "grad_norm": 3.287876844406128, "learning_rate": 1.999662448592752e-05, "loss": 0.822, "step": 1484 }, { "epoch": 0.04, "grad_norm": 2.138143301010132, "learning_rate": 1.9996602883915814e-05, "loss": 0.8806, "step": 1485 }, { "epoch": 0.04, "grad_norm": 2.4036996364593506, "learning_rate": 1.99965812130138e-05, "loss": 0.8991, "step": 1486 }, { "epoch": 0.04, "grad_norm": 2.5356011390686035, "learning_rate": 1.999655947322163e-05, "loss": 0.7476, "step": 1487 }, { "epoch": 0.04, "grad_norm": 2.5518829822540283, "learning_rate": 1.9996537664539447e-05, "loss": 0.7628, "step": 1488 }, { "epoch": 0.04, "grad_norm": 1.7929613590240479, "learning_rate": 1.999651578696741e-05, "loss": 0.6893, "step": 1489 }, { "epoch": 0.04, "grad_norm": 6.6661224365234375, "learning_rate": 1.999649384050567e-05, "loss": 0.7747, "step": 1490 }, { "epoch": 0.04, "grad_norm": 2.2602522373199463, "learning_rate": 1.9996471825154372e-05, "loss": 0.7608, "step": 1491 }, { "epoch": 0.04, "grad_norm": 3.5425314903259277, "learning_rate": 1.999644974091367e-05, "loss": 0.6714, "step": 1492 }, { "epoch": 0.04, "grad_norm": 2.3752753734588623, "learning_rate": 1.9996427587783715e-05, "loss": 0.7761, "step": 1493 }, { "epoch": 0.04, "grad_norm": 2.4197628498077393, "learning_rate": 1.9996405365764664e-05, "loss": 0.8472, "step": 1494 }, { "epoch": 0.04, "grad_norm": 2.808776378631592, "learning_rate": 1.999638307485667e-05, "loss": 0.8495, "step": 1495 }, { "epoch": 0.04, "grad_norm": 2.634056806564331, "learning_rate": 1.999636071505988e-05, "loss": 0.6981, "step": 1496 }, { "epoch": 0.04, "grad_norm": 3.5101802349090576, "learning_rate": 1.9996338286374454e-05, "loss": 0.7237, "step": 1497 }, { "epoch": 0.04, "grad_norm": 1.7084776163101196, "learning_rate": 1.999631578880054e-05, "loss": 0.7255, "step": 1498 }, { "epoch": 0.04, "grad_norm": 1.9188768863677979, "learning_rate": 1.9996293222338306e-05, "loss": 0.7079, "step": 1499 }, { "epoch": 0.04, "grad_norm": 2.73439621925354, "learning_rate": 1.9996270586987895e-05, "loss": 0.8159, "step": 1500 }, { "epoch": 0.04, "grad_norm": 2.9884729385375977, "learning_rate": 1.999624788274947e-05, "loss": 0.8063, "step": 1501 }, { "epoch": 0.04, "grad_norm": 2.4646947383880615, "learning_rate": 1.9996225109623182e-05, "loss": 0.7017, "step": 1502 }, { "epoch": 0.04, "grad_norm": 5.104576587677002, "learning_rate": 1.9996202267609193e-05, "loss": 0.8733, "step": 1503 }, { "epoch": 0.04, "grad_norm": 1.9057221412658691, "learning_rate": 1.9996179356707657e-05, "loss": 0.8687, "step": 1504 }, { "epoch": 0.04, "grad_norm": 1.8261858224868774, "learning_rate": 1.999615637691873e-05, "loss": 0.7037, "step": 1505 }, { "epoch": 0.04, "grad_norm": 2.139402389526367, "learning_rate": 1.9996133328242577e-05, "loss": 0.8121, "step": 1506 }, { "epoch": 0.04, "grad_norm": 2.521575450897217, "learning_rate": 1.9996110210679355e-05, "loss": 0.8487, "step": 1507 }, { "epoch": 0.04, "grad_norm": 2.0702381134033203, "learning_rate": 1.999608702422922e-05, "loss": 0.7883, "step": 1508 }, { "epoch": 0.04, "grad_norm": 2.1346893310546875, "learning_rate": 1.9996063768892334e-05, "loss": 0.706, "step": 1509 }, { "epoch": 0.04, "grad_norm": 1.4567673206329346, "learning_rate": 1.999604044466885e-05, "loss": 0.6302, "step": 1510 }, { "epoch": 0.04, "grad_norm": 3.120668649673462, "learning_rate": 1.9996017051558946e-05, "loss": 0.7165, "step": 1511 }, { "epoch": 0.04, "grad_norm": 2.7891135215759277, "learning_rate": 1.9995993589562765e-05, "loss": 0.7881, "step": 1512 }, { "epoch": 0.04, "grad_norm": 1.743410348892212, "learning_rate": 1.9995970058680482e-05, "loss": 0.7638, "step": 1513 }, { "epoch": 0.04, "grad_norm": 2.6982626914978027, "learning_rate": 1.9995946458912248e-05, "loss": 0.6956, "step": 1514 }, { "epoch": 0.04, "grad_norm": 2.5860612392425537, "learning_rate": 1.9995922790258233e-05, "loss": 0.9472, "step": 1515 }, { "epoch": 0.04, "grad_norm": 3.312765121459961, "learning_rate": 1.9995899052718595e-05, "loss": 0.8169, "step": 1516 }, { "epoch": 0.04, "grad_norm": 3.411463499069214, "learning_rate": 1.999587524629351e-05, "loss": 0.7918, "step": 1517 }, { "epoch": 0.04, "grad_norm": 2.5533034801483154, "learning_rate": 1.9995851370983126e-05, "loss": 0.7785, "step": 1518 }, { "epoch": 0.04, "grad_norm": 1.7820333242416382, "learning_rate": 1.9995827426787616e-05, "loss": 0.7487, "step": 1519 }, { "epoch": 0.04, "grad_norm": 2.0042922496795654, "learning_rate": 1.9995803413707138e-05, "loss": 0.6498, "step": 1520 }, { "epoch": 0.04, "grad_norm": 2.531383991241455, "learning_rate": 1.999577933174187e-05, "loss": 0.7174, "step": 1521 }, { "epoch": 0.04, "grad_norm": 1.6134353876113892, "learning_rate": 1.999575518089197e-05, "loss": 0.7894, "step": 1522 }, { "epoch": 0.04, "grad_norm": 2.867426872253418, "learning_rate": 1.9995730961157604e-05, "loss": 0.7017, "step": 1523 }, { "epoch": 0.04, "grad_norm": 1.9844701290130615, "learning_rate": 1.9995706672538938e-05, "loss": 0.7888, "step": 1524 }, { "epoch": 0.04, "grad_norm": 2.5175230503082275, "learning_rate": 1.9995682315036144e-05, "loss": 0.6767, "step": 1525 }, { "epoch": 0.04, "grad_norm": 2.4402451515197754, "learning_rate": 1.999565788864939e-05, "loss": 0.8508, "step": 1526 }, { "epoch": 0.04, "grad_norm": 1.9169481992721558, "learning_rate": 1.9995633393378834e-05, "loss": 0.6857, "step": 1527 }, { "epoch": 0.04, "grad_norm": 3.3316121101379395, "learning_rate": 1.9995608829224656e-05, "loss": 0.749, "step": 1528 }, { "epoch": 0.04, "grad_norm": 3.11327862739563, "learning_rate": 1.9995584196187025e-05, "loss": 0.8612, "step": 1529 }, { "epoch": 0.04, "grad_norm": 3.295652151107788, "learning_rate": 1.9995559494266103e-05, "loss": 0.6354, "step": 1530 }, { "epoch": 0.04, "grad_norm": 2.6732184886932373, "learning_rate": 1.999553472346207e-05, "loss": 0.6223, "step": 1531 }, { "epoch": 0.04, "grad_norm": 1.8807547092437744, "learning_rate": 1.9995509883775088e-05, "loss": 0.7414, "step": 1532 }, { "epoch": 0.04, "grad_norm": 2.6971874237060547, "learning_rate": 1.9995484975205334e-05, "loss": 0.7958, "step": 1533 }, { "epoch": 0.04, "grad_norm": 3.084787130355835, "learning_rate": 1.9995459997752973e-05, "loss": 0.7307, "step": 1534 }, { "epoch": 0.04, "grad_norm": 1.888489007949829, "learning_rate": 1.9995434951418186e-05, "loss": 0.8256, "step": 1535 }, { "epoch": 0.04, "grad_norm": 3.064013957977295, "learning_rate": 1.9995409836201137e-05, "loss": 0.7794, "step": 1536 }, { "epoch": 0.04, "grad_norm": 2.645970582962036, "learning_rate": 1.9995384652102007e-05, "loss": 0.7411, "step": 1537 }, { "epoch": 0.04, "grad_norm": 4.682852745056152, "learning_rate": 1.9995359399120965e-05, "loss": 0.7169, "step": 1538 }, { "epoch": 0.04, "grad_norm": 2.5678627490997314, "learning_rate": 1.999533407725818e-05, "loss": 0.6688, "step": 1539 }, { "epoch": 0.04, "grad_norm": 2.0789096355438232, "learning_rate": 1.9995308686513837e-05, "loss": 0.647, "step": 1540 }, { "epoch": 0.04, "grad_norm": 2.5534274578094482, "learning_rate": 1.999528322688811e-05, "loss": 0.7751, "step": 1541 }, { "epoch": 0.04, "grad_norm": 1.6425188779830933, "learning_rate": 1.9995257698381166e-05, "loss": 0.7176, "step": 1542 }, { "epoch": 0.04, "grad_norm": 2.255749464035034, "learning_rate": 1.9995232100993184e-05, "loss": 0.8416, "step": 1543 }, { "epoch": 0.04, "grad_norm": 3.2019779682159424, "learning_rate": 1.9995206434724347e-05, "loss": 0.792, "step": 1544 }, { "epoch": 0.04, "grad_norm": 2.069438934326172, "learning_rate": 1.9995180699574824e-05, "loss": 0.6105, "step": 1545 }, { "epoch": 0.04, "grad_norm": 1.607580542564392, "learning_rate": 1.9995154895544796e-05, "loss": 0.8286, "step": 1546 }, { "epoch": 0.04, "grad_norm": 2.856182098388672, "learning_rate": 1.999512902263444e-05, "loss": 0.5627, "step": 1547 }, { "epoch": 0.04, "grad_norm": 2.3092243671417236, "learning_rate": 1.999510308084393e-05, "loss": 0.7804, "step": 1548 }, { "epoch": 0.04, "grad_norm": 3.1634340286254883, "learning_rate": 1.9995077070173456e-05, "loss": 0.7625, "step": 1549 }, { "epoch": 0.04, "grad_norm": 2.008314609527588, "learning_rate": 1.9995050990623186e-05, "loss": 0.8419, "step": 1550 }, { "epoch": 0.04, "grad_norm": 2.5282652378082275, "learning_rate": 1.9995024842193306e-05, "loss": 0.6885, "step": 1551 }, { "epoch": 0.04, "grad_norm": 3.549466133117676, "learning_rate": 1.9994998624883993e-05, "loss": 0.8279, "step": 1552 }, { "epoch": 0.04, "grad_norm": 2.2198851108551025, "learning_rate": 1.9994972338695432e-05, "loss": 0.745, "step": 1553 }, { "epoch": 0.04, "grad_norm": 3.59018611907959, "learning_rate": 1.9994945983627798e-05, "loss": 0.89, "step": 1554 }, { "epoch": 0.04, "grad_norm": 2.4597585201263428, "learning_rate": 1.9994919559681275e-05, "loss": 0.7664, "step": 1555 }, { "epoch": 0.04, "grad_norm": 2.653608798980713, "learning_rate": 1.9994893066856048e-05, "loss": 0.8289, "step": 1556 }, { "epoch": 0.04, "grad_norm": 2.844773769378662, "learning_rate": 1.9994866505152294e-05, "loss": 0.7574, "step": 1557 }, { "epoch": 0.04, "grad_norm": 2.1671106815338135, "learning_rate": 1.9994839874570206e-05, "loss": 0.6721, "step": 1558 }, { "epoch": 0.04, "grad_norm": 5.64248514175415, "learning_rate": 1.9994813175109956e-05, "loss": 0.9257, "step": 1559 }, { "epoch": 0.04, "grad_norm": 1.9931769371032715, "learning_rate": 1.9994786406771732e-05, "loss": 0.7146, "step": 1560 }, { "epoch": 0.04, "grad_norm": 2.4405643939971924, "learning_rate": 1.9994759569555723e-05, "loss": 1.0387, "step": 1561 }, { "epoch": 0.04, "grad_norm": 2.4781455993652344, "learning_rate": 1.9994732663462108e-05, "loss": 0.7786, "step": 1562 }, { "epoch": 0.04, "grad_norm": 2.405139207839966, "learning_rate": 1.9994705688491077e-05, "loss": 0.7662, "step": 1563 }, { "epoch": 0.04, "grad_norm": 2.449634075164795, "learning_rate": 1.999467864464281e-05, "loss": 0.7062, "step": 1564 }, { "epoch": 0.04, "grad_norm": 3.2013027667999268, "learning_rate": 1.99946515319175e-05, "loss": 0.7858, "step": 1565 }, { "epoch": 0.04, "grad_norm": 2.16363263130188, "learning_rate": 1.999462435031533e-05, "loss": 0.6591, "step": 1566 }, { "epoch": 0.04, "grad_norm": 3.5421206951141357, "learning_rate": 1.999459709983649e-05, "loss": 0.8314, "step": 1567 }, { "epoch": 0.04, "grad_norm": 2.141594648361206, "learning_rate": 1.9994569780481164e-05, "loss": 0.6672, "step": 1568 }, { "epoch": 0.04, "grad_norm": 2.706874132156372, "learning_rate": 1.9994542392249545e-05, "loss": 0.7115, "step": 1569 }, { "epoch": 0.04, "grad_norm": 2.627664804458618, "learning_rate": 1.9994514935141814e-05, "loss": 0.6922, "step": 1570 }, { "epoch": 0.04, "grad_norm": 2.6026804447174072, "learning_rate": 1.9994487409158168e-05, "loss": 0.7673, "step": 1571 }, { "epoch": 0.04, "grad_norm": 3.3849799633026123, "learning_rate": 1.9994459814298794e-05, "loss": 0.6481, "step": 1572 }, { "epoch": 0.04, "grad_norm": 1.6808561086654663, "learning_rate": 1.999443215056388e-05, "loss": 0.7901, "step": 1573 }, { "epoch": 0.04, "grad_norm": 2.4325480461120605, "learning_rate": 1.9994404417953622e-05, "loss": 0.8564, "step": 1574 }, { "epoch": 0.04, "grad_norm": 1.8487828969955444, "learning_rate": 1.9994376616468204e-05, "loss": 0.8922, "step": 1575 }, { "epoch": 0.04, "grad_norm": 3.721294641494751, "learning_rate": 1.9994348746107827e-05, "loss": 0.6439, "step": 1576 }, { "epoch": 0.04, "grad_norm": 5.651505947113037, "learning_rate": 1.9994320806872673e-05, "loss": 0.7018, "step": 1577 }, { "epoch": 0.04, "grad_norm": 2.913544178009033, "learning_rate": 1.999429279876294e-05, "loss": 0.8625, "step": 1578 }, { "epoch": 0.04, "grad_norm": 2.719144582748413, "learning_rate": 1.9994264721778822e-05, "loss": 0.7433, "step": 1579 }, { "epoch": 0.04, "grad_norm": 2.142302989959717, "learning_rate": 1.999423657592051e-05, "loss": 0.7405, "step": 1580 }, { "epoch": 0.04, "grad_norm": 1.8262975215911865, "learning_rate": 1.9994208361188196e-05, "loss": 0.6673, "step": 1581 }, { "epoch": 0.04, "grad_norm": 1.9898645877838135, "learning_rate": 1.999418007758208e-05, "loss": 0.7914, "step": 1582 }, { "epoch": 0.04, "grad_norm": 2.782688856124878, "learning_rate": 1.999415172510235e-05, "loss": 0.9157, "step": 1583 }, { "epoch": 0.04, "grad_norm": 3.6222352981567383, "learning_rate": 1.999412330374921e-05, "loss": 0.7606, "step": 1584 }, { "epoch": 0.04, "grad_norm": 2.670055627822876, "learning_rate": 1.999409481352285e-05, "loss": 0.7497, "step": 1585 }, { "epoch": 0.04, "grad_norm": 2.139141798019409, "learning_rate": 1.9994066254423465e-05, "loss": 0.6471, "step": 1586 }, { "epoch": 0.04, "grad_norm": 2.113340377807617, "learning_rate": 1.9994037626451256e-05, "loss": 0.7907, "step": 1587 }, { "epoch": 0.04, "grad_norm": 1.562617301940918, "learning_rate": 1.9994008929606415e-05, "loss": 0.532, "step": 1588 }, { "epoch": 0.04, "grad_norm": 3.1329309940338135, "learning_rate": 1.999398016388915e-05, "loss": 0.7291, "step": 1589 }, { "epoch": 0.04, "grad_norm": 2.3234338760375977, "learning_rate": 1.999395132929965e-05, "loss": 0.7419, "step": 1590 }, { "epoch": 0.04, "grad_norm": 2.6061995029449463, "learning_rate": 1.999392242583811e-05, "loss": 0.633, "step": 1591 }, { "epoch": 0.04, "grad_norm": 3.2097511291503906, "learning_rate": 1.9993893453504746e-05, "loss": 0.7369, "step": 1592 }, { "epoch": 0.04, "grad_norm": 3.0300586223602295, "learning_rate": 1.9993864412299738e-05, "loss": 0.7724, "step": 1593 }, { "epoch": 0.04, "grad_norm": 2.109217882156372, "learning_rate": 1.99938353022233e-05, "loss": 0.7601, "step": 1594 }, { "epoch": 0.04, "grad_norm": 2.641854763031006, "learning_rate": 1.9993806123275625e-05, "loss": 0.8612, "step": 1595 }, { "epoch": 0.04, "grad_norm": 2.2153117656707764, "learning_rate": 1.999377687545692e-05, "loss": 0.665, "step": 1596 }, { "epoch": 0.04, "grad_norm": 2.8675143718719482, "learning_rate": 1.999374755876738e-05, "loss": 0.8023, "step": 1597 }, { "epoch": 0.04, "grad_norm": 2.0075795650482178, "learning_rate": 1.9993718173207213e-05, "loss": 0.6837, "step": 1598 }, { "epoch": 0.04, "grad_norm": 2.5102386474609375, "learning_rate": 1.9993688718776623e-05, "loss": 0.6705, "step": 1599 }, { "epoch": 0.04, "grad_norm": 3.8279640674591064, "learning_rate": 1.9993659195475803e-05, "loss": 0.9244, "step": 1600 }, { "epoch": 0.04, "grad_norm": 2.424015522003174, "learning_rate": 1.9993629603304963e-05, "loss": 0.7795, "step": 1601 }, { "epoch": 0.04, "grad_norm": 2.621859550476074, "learning_rate": 1.9993599942264308e-05, "loss": 0.7306, "step": 1602 }, { "epoch": 0.04, "grad_norm": 2.003169059753418, "learning_rate": 1.9993570212354042e-05, "loss": 0.6255, "step": 1603 }, { "epoch": 0.04, "grad_norm": 1.6731641292572021, "learning_rate": 1.9993540413574367e-05, "loss": 0.8038, "step": 1604 }, { "epoch": 0.04, "grad_norm": 2.3737435340881348, "learning_rate": 1.999351054592549e-05, "loss": 0.6352, "step": 1605 }, { "epoch": 0.04, "grad_norm": 3.553191661834717, "learning_rate": 1.9993480609407615e-05, "loss": 1.0569, "step": 1606 }, { "epoch": 0.04, "grad_norm": 2.6268413066864014, "learning_rate": 1.9993450604020954e-05, "loss": 0.7656, "step": 1607 }, { "epoch": 0.04, "grad_norm": 1.5448908805847168, "learning_rate": 1.9993420529765707e-05, "loss": 0.7125, "step": 1608 }, { "epoch": 0.04, "grad_norm": 1.9470148086547852, "learning_rate": 1.9993390386642084e-05, "loss": 0.7799, "step": 1609 }, { "epoch": 0.04, "grad_norm": 2.948852777481079, "learning_rate": 1.9993360174650297e-05, "loss": 0.8002, "step": 1610 }, { "epoch": 0.04, "grad_norm": 2.4816527366638184, "learning_rate": 1.9993329893790548e-05, "loss": 0.7168, "step": 1611 }, { "epoch": 0.04, "grad_norm": 5.299691677093506, "learning_rate": 1.9993299544063046e-05, "loss": 0.9426, "step": 1612 }, { "epoch": 0.04, "grad_norm": 2.9696457386016846, "learning_rate": 1.9993269125468e-05, "loss": 0.7105, "step": 1613 }, { "epoch": 0.04, "grad_norm": 1.8893924951553345, "learning_rate": 1.9993238638005627e-05, "loss": 0.6889, "step": 1614 }, { "epoch": 0.04, "grad_norm": 2.777820587158203, "learning_rate": 1.999320808167613e-05, "loss": 0.7637, "step": 1615 }, { "epoch": 0.04, "grad_norm": 3.1512982845306396, "learning_rate": 1.999317745647972e-05, "loss": 0.7588, "step": 1616 }, { "epoch": 0.04, "grad_norm": 1.9120118618011475, "learning_rate": 1.9993146762416608e-05, "loss": 0.8144, "step": 1617 }, { "epoch": 0.04, "grad_norm": 4.075115203857422, "learning_rate": 1.999311599948701e-05, "loss": 0.9478, "step": 1618 }, { "epoch": 0.04, "grad_norm": 1.8424803018569946, "learning_rate": 1.9993085167691133e-05, "loss": 0.5433, "step": 1619 }, { "epoch": 0.04, "grad_norm": 2.188939094543457, "learning_rate": 1.999305426702919e-05, "loss": 0.8415, "step": 1620 }, { "epoch": 0.04, "grad_norm": 4.485774993896484, "learning_rate": 1.99930232975014e-05, "loss": 0.7333, "step": 1621 }, { "epoch": 0.04, "grad_norm": 1.4987964630126953, "learning_rate": 1.9992992259107966e-05, "loss": 0.6712, "step": 1622 }, { "epoch": 0.04, "grad_norm": 2.7177531719207764, "learning_rate": 1.9992961151849114e-05, "loss": 0.7475, "step": 1623 }, { "epoch": 0.04, "grad_norm": 2.4414665699005127, "learning_rate": 1.9992929975725047e-05, "loss": 0.7898, "step": 1624 }, { "epoch": 0.04, "grad_norm": 2.7233121395111084, "learning_rate": 1.9992898730735985e-05, "loss": 0.8441, "step": 1625 }, { "epoch": 0.04, "grad_norm": 4.060360431671143, "learning_rate": 1.999286741688214e-05, "loss": 0.7417, "step": 1626 }, { "epoch": 0.04, "grad_norm": 2.4922432899475098, "learning_rate": 1.9992836034163737e-05, "loss": 0.721, "step": 1627 }, { "epoch": 0.04, "grad_norm": 2.595442771911621, "learning_rate": 1.9992804582580984e-05, "loss": 0.6899, "step": 1628 }, { "epoch": 0.04, "grad_norm": 1.6322784423828125, "learning_rate": 1.99927730621341e-05, "loss": 0.7386, "step": 1629 }, { "epoch": 0.04, "grad_norm": 2.1957430839538574, "learning_rate": 1.9992741472823303e-05, "loss": 0.7312, "step": 1630 }, { "epoch": 0.04, "grad_norm": 3.4648566246032715, "learning_rate": 1.9992709814648808e-05, "loss": 0.8819, "step": 1631 }, { "epoch": 0.04, "grad_norm": 3.007211685180664, "learning_rate": 1.9992678087610836e-05, "loss": 0.7076, "step": 1632 }, { "epoch": 0.04, "grad_norm": 1.6020662784576416, "learning_rate": 1.9992646291709604e-05, "loss": 0.7467, "step": 1633 }, { "epoch": 0.04, "grad_norm": 2.908642292022705, "learning_rate": 1.9992614426945335e-05, "loss": 0.7649, "step": 1634 }, { "epoch": 0.04, "grad_norm": 2.6224207878112793, "learning_rate": 1.9992582493318243e-05, "loss": 0.9888, "step": 1635 }, { "epoch": 0.04, "grad_norm": 2.143449068069458, "learning_rate": 1.999255049082855e-05, "loss": 0.7464, "step": 1636 }, { "epoch": 0.04, "grad_norm": 2.87756609916687, "learning_rate": 1.9992518419476477e-05, "loss": 0.7194, "step": 1637 }, { "epoch": 0.04, "grad_norm": 1.6997991800308228, "learning_rate": 1.9992486279262243e-05, "loss": 0.7653, "step": 1638 }, { "epoch": 0.04, "grad_norm": 3.2638051509857178, "learning_rate": 1.9992454070186075e-05, "loss": 0.7792, "step": 1639 }, { "epoch": 0.04, "grad_norm": 2.1069540977478027, "learning_rate": 1.999242179224819e-05, "loss": 0.6897, "step": 1640 }, { "epoch": 0.04, "grad_norm": 2.602851390838623, "learning_rate": 1.999238944544881e-05, "loss": 0.7306, "step": 1641 }, { "epoch": 0.04, "grad_norm": 2.264091968536377, "learning_rate": 1.9992357029788164e-05, "loss": 0.7922, "step": 1642 }, { "epoch": 0.04, "grad_norm": 2.676469326019287, "learning_rate": 1.999232454526647e-05, "loss": 0.7661, "step": 1643 }, { "epoch": 0.04, "grad_norm": 1.7618842124938965, "learning_rate": 1.999229199188395e-05, "loss": 0.5946, "step": 1644 }, { "epoch": 0.04, "grad_norm": 5.585408687591553, "learning_rate": 1.9992259369640834e-05, "loss": 0.9561, "step": 1645 }, { "epoch": 0.04, "grad_norm": 2.4774510860443115, "learning_rate": 1.9992226678537344e-05, "loss": 0.5435, "step": 1646 }, { "epoch": 0.04, "grad_norm": 3.613877296447754, "learning_rate": 1.9992193918573705e-05, "loss": 0.9508, "step": 1647 }, { "epoch": 0.04, "grad_norm": 3.67812180519104, "learning_rate": 1.9992161089750142e-05, "loss": 0.8105, "step": 1648 }, { "epoch": 0.04, "grad_norm": 2.371919870376587, "learning_rate": 1.9992128192066884e-05, "loss": 0.6155, "step": 1649 }, { "epoch": 0.04, "grad_norm": 2.925776481628418, "learning_rate": 1.9992095225524153e-05, "loss": 0.7748, "step": 1650 }, { "epoch": 0.04, "grad_norm": 3.093528985977173, "learning_rate": 1.9992062190122183e-05, "loss": 0.767, "step": 1651 }, { "epoch": 0.04, "grad_norm": 2.9576404094696045, "learning_rate": 1.99920290858612e-05, "loss": 0.9331, "step": 1652 }, { "epoch": 0.04, "grad_norm": 2.388745069503784, "learning_rate": 1.999199591274142e-05, "loss": 0.8165, "step": 1653 }, { "epoch": 0.04, "grad_norm": 2.175628185272217, "learning_rate": 1.999196267076309e-05, "loss": 0.6854, "step": 1654 }, { "epoch": 0.04, "grad_norm": 1.5843127965927124, "learning_rate": 1.999192935992643e-05, "loss": 0.7104, "step": 1655 }, { "epoch": 0.04, "grad_norm": 2.0252621173858643, "learning_rate": 1.9991895980231674e-05, "loss": 0.7704, "step": 1656 }, { "epoch": 0.04, "grad_norm": 2.7179417610168457, "learning_rate": 1.9991862531679044e-05, "loss": 0.7829, "step": 1657 }, { "epoch": 0.04, "grad_norm": 2.141791343688965, "learning_rate": 1.9991829014268775e-05, "loss": 0.7763, "step": 1658 }, { "epoch": 0.04, "grad_norm": 1.9670466184616089, "learning_rate": 1.9991795428001097e-05, "loss": 0.777, "step": 1659 }, { "epoch": 0.04, "grad_norm": 2.747040271759033, "learning_rate": 1.9991761772876243e-05, "loss": 0.5822, "step": 1660 }, { "epoch": 0.04, "grad_norm": 3.424006462097168, "learning_rate": 1.999172804889445e-05, "loss": 0.8199, "step": 1661 }, { "epoch": 0.04, "grad_norm": 2.715005397796631, "learning_rate": 1.9991694256055936e-05, "loss": 0.7953, "step": 1662 }, { "epoch": 0.04, "grad_norm": 2.0137922763824463, "learning_rate": 1.9991660394360947e-05, "loss": 0.6084, "step": 1663 }, { "epoch": 0.04, "grad_norm": 3.512798309326172, "learning_rate": 1.999162646380971e-05, "loss": 0.9148, "step": 1664 }, { "epoch": 0.04, "grad_norm": 9.138687133789062, "learning_rate": 1.9991592464402464e-05, "loss": 1.1622, "step": 1665 }, { "epoch": 0.04, "grad_norm": 3.7085812091827393, "learning_rate": 1.9991558396139436e-05, "loss": 0.8859, "step": 1666 }, { "epoch": 0.04, "grad_norm": 1.9152432680130005, "learning_rate": 1.9991524259020868e-05, "loss": 0.8016, "step": 1667 }, { "epoch": 0.04, "grad_norm": 7.497103691101074, "learning_rate": 1.9991490053046993e-05, "loss": 0.8039, "step": 1668 }, { "epoch": 0.04, "grad_norm": 1.7931102514266968, "learning_rate": 1.9991455778218043e-05, "loss": 0.5721, "step": 1669 }, { "epoch": 0.04, "grad_norm": 2.2087223529815674, "learning_rate": 1.999142143453426e-05, "loss": 0.8078, "step": 1670 }, { "epoch": 0.04, "grad_norm": 2.1828558444976807, "learning_rate": 1.999138702199587e-05, "loss": 0.5557, "step": 1671 }, { "epoch": 0.04, "grad_norm": 2.676095962524414, "learning_rate": 1.9991352540603124e-05, "loss": 0.8393, "step": 1672 }, { "epoch": 0.04, "grad_norm": 2.4310967922210693, "learning_rate": 1.9991317990356258e-05, "loss": 0.7333, "step": 1673 }, { "epoch": 0.04, "grad_norm": 2.6169638633728027, "learning_rate": 1.9991283371255496e-05, "loss": 0.7361, "step": 1674 }, { "epoch": 0.04, "grad_norm": 2.4958412647247314, "learning_rate": 1.999124868330109e-05, "loss": 0.9048, "step": 1675 }, { "epoch": 0.04, "grad_norm": 2.446403741836548, "learning_rate": 1.9991213926493276e-05, "loss": 0.7081, "step": 1676 }, { "epoch": 0.04, "grad_norm": 2.506544589996338, "learning_rate": 1.9991179100832295e-05, "loss": 0.7273, "step": 1677 }, { "epoch": 0.04, "grad_norm": 3.6845126152038574, "learning_rate": 1.999114420631838e-05, "loss": 0.8578, "step": 1678 }, { "epoch": 0.04, "grad_norm": 2.283154010772705, "learning_rate": 1.9991109242951777e-05, "loss": 0.6244, "step": 1679 }, { "epoch": 0.04, "grad_norm": 2.962533712387085, "learning_rate": 1.9991074210732732e-05, "loss": 0.8492, "step": 1680 }, { "epoch": 0.04, "grad_norm": 2.509960889816284, "learning_rate": 1.9991039109661475e-05, "loss": 0.8077, "step": 1681 }, { "epoch": 0.04, "grad_norm": 2.5425963401794434, "learning_rate": 1.9991003939738254e-05, "loss": 0.808, "step": 1682 }, { "epoch": 0.04, "grad_norm": 3.4345459938049316, "learning_rate": 1.9990968700963313e-05, "loss": 0.7956, "step": 1683 }, { "epoch": 0.04, "grad_norm": 2.103574275970459, "learning_rate": 1.999093339333689e-05, "loss": 0.7596, "step": 1684 }, { "epoch": 0.04, "grad_norm": 2.0520882606506348, "learning_rate": 1.9990898016859232e-05, "loss": 0.8121, "step": 1685 }, { "epoch": 0.04, "grad_norm": 6.469956874847412, "learning_rate": 1.999086257153058e-05, "loss": 0.6915, "step": 1686 }, { "epoch": 0.04, "grad_norm": 2.5104572772979736, "learning_rate": 1.9990827057351184e-05, "loss": 0.7652, "step": 1687 }, { "epoch": 0.04, "grad_norm": 1.9525545835494995, "learning_rate": 1.999079147432128e-05, "loss": 0.7543, "step": 1688 }, { "epoch": 0.04, "grad_norm": 2.2917404174804688, "learning_rate": 1.999075582244112e-05, "loss": 0.7617, "step": 1689 }, { "epoch": 0.04, "grad_norm": 2.315877676010132, "learning_rate": 1.999072010171095e-05, "loss": 0.7582, "step": 1690 }, { "epoch": 0.04, "grad_norm": 2.6820807456970215, "learning_rate": 1.9990684312131012e-05, "loss": 0.8437, "step": 1691 }, { "epoch": 0.04, "grad_norm": 2.877002477645874, "learning_rate": 1.9990648453701556e-05, "loss": 0.6865, "step": 1692 }, { "epoch": 0.04, "grad_norm": 2.930565357208252, "learning_rate": 1.9990612526422826e-05, "loss": 0.6748, "step": 1693 }, { "epoch": 0.04, "grad_norm": 2.5001370906829834, "learning_rate": 1.9990576530295072e-05, "loss": 0.785, "step": 1694 }, { "epoch": 0.04, "grad_norm": 2.9556567668914795, "learning_rate": 1.9990540465318536e-05, "loss": 0.8299, "step": 1695 }, { "epoch": 0.04, "grad_norm": 2.047318935394287, "learning_rate": 1.9990504331493478e-05, "loss": 0.6965, "step": 1696 }, { "epoch": 0.04, "grad_norm": 2.7493667602539062, "learning_rate": 1.999046812882014e-05, "loss": 0.7906, "step": 1697 }, { "epoch": 0.04, "grad_norm": 2.356961488723755, "learning_rate": 1.9990431857298772e-05, "loss": 0.6433, "step": 1698 }, { "epoch": 0.04, "grad_norm": 2.2404403686523438, "learning_rate": 1.9990395516929622e-05, "loss": 0.731, "step": 1699 }, { "epoch": 0.04, "grad_norm": 3.106975793838501, "learning_rate": 1.9990359107712947e-05, "loss": 0.6682, "step": 1700 }, { "epoch": 0.04, "grad_norm": 1.751843810081482, "learning_rate": 1.999032262964899e-05, "loss": 0.7685, "step": 1701 }, { "epoch": 0.04, "grad_norm": 2.9243686199188232, "learning_rate": 1.9990286082738008e-05, "loss": 0.896, "step": 1702 }, { "epoch": 0.04, "grad_norm": 2.638411283493042, "learning_rate": 1.999024946698025e-05, "loss": 0.6261, "step": 1703 }, { "epoch": 0.04, "grad_norm": 2.995875835418701, "learning_rate": 1.9990212782375965e-05, "loss": 0.8358, "step": 1704 }, { "epoch": 0.04, "grad_norm": 2.1784493923187256, "learning_rate": 1.9990176028925418e-05, "loss": 0.7357, "step": 1705 }, { "epoch": 0.04, "grad_norm": 1.9457571506500244, "learning_rate": 1.999013920662885e-05, "loss": 0.7312, "step": 1706 }, { "epoch": 0.04, "grad_norm": 1.9603967666625977, "learning_rate": 1.9990102315486517e-05, "loss": 0.7929, "step": 1707 }, { "epoch": 0.04, "grad_norm": 2.631044864654541, "learning_rate": 1.999006535549868e-05, "loss": 0.7231, "step": 1708 }, { "epoch": 0.04, "grad_norm": 2.4801464080810547, "learning_rate": 1.9990028326665585e-05, "loss": 0.9006, "step": 1709 }, { "epoch": 0.04, "grad_norm": 1.9882739782333374, "learning_rate": 1.9989991228987492e-05, "loss": 0.5596, "step": 1710 }, { "epoch": 0.04, "grad_norm": 1.9167619943618774, "learning_rate": 1.9989954062464656e-05, "loss": 0.8195, "step": 1711 }, { "epoch": 0.04, "grad_norm": 2.8159868717193604, "learning_rate": 1.9989916827097333e-05, "loss": 0.9311, "step": 1712 }, { "epoch": 0.04, "grad_norm": 1.9711041450500488, "learning_rate": 1.998987952288578e-05, "loss": 0.6285, "step": 1713 }, { "epoch": 0.04, "grad_norm": 6.775775909423828, "learning_rate": 1.9989842149830253e-05, "loss": 0.9577, "step": 1714 }, { "epoch": 0.04, "grad_norm": 2.53291654586792, "learning_rate": 1.9989804707931012e-05, "loss": 0.7211, "step": 1715 }, { "epoch": 0.04, "grad_norm": 2.3846395015716553, "learning_rate": 1.998976719718831e-05, "loss": 0.7199, "step": 1716 }, { "epoch": 0.04, "grad_norm": 1.8026739358901978, "learning_rate": 1.998972961760241e-05, "loss": 0.5854, "step": 1717 }, { "epoch": 0.04, "grad_norm": 2.5280842781066895, "learning_rate": 1.998969196917357e-05, "loss": 0.7689, "step": 1718 }, { "epoch": 0.04, "grad_norm": 3.7949299812316895, "learning_rate": 1.998965425190205e-05, "loss": 0.7405, "step": 1719 }, { "epoch": 0.04, "grad_norm": 2.700195550918579, "learning_rate": 1.9989616465788106e-05, "loss": 0.6039, "step": 1720 }, { "epoch": 0.04, "grad_norm": 3.1208736896514893, "learning_rate": 1.9989578610832005e-05, "loss": 0.7621, "step": 1721 }, { "epoch": 0.04, "grad_norm": 3.736980438232422, "learning_rate": 1.9989540687034002e-05, "loss": 0.6348, "step": 1722 }, { "epoch": 0.04, "grad_norm": 2.1449053287506104, "learning_rate": 1.9989502694394362e-05, "loss": 0.7517, "step": 1723 }, { "epoch": 0.04, "grad_norm": 5.07258939743042, "learning_rate": 1.9989464632913346e-05, "loss": 0.7861, "step": 1724 }, { "epoch": 0.04, "grad_norm": 3.4977643489837646, "learning_rate": 1.998942650259121e-05, "loss": 0.9313, "step": 1725 }, { "epoch": 0.04, "grad_norm": 2.3340187072753906, "learning_rate": 1.998938830342823e-05, "loss": 0.9378, "step": 1726 }, { "epoch": 0.04, "grad_norm": 3.99223256111145, "learning_rate": 1.9989350035424658e-05, "loss": 0.7847, "step": 1727 }, { "epoch": 0.04, "grad_norm": 2.5526623725891113, "learning_rate": 1.9989311698580762e-05, "loss": 0.7204, "step": 1728 }, { "epoch": 0.04, "grad_norm": 3.998924493789673, "learning_rate": 1.9989273292896808e-05, "loss": 0.9799, "step": 1729 }, { "epoch": 0.04, "grad_norm": 2.7766189575195312, "learning_rate": 1.9989234818373053e-05, "loss": 0.6353, "step": 1730 }, { "epoch": 0.04, "grad_norm": 2.759160280227661, "learning_rate": 1.998919627500977e-05, "loss": 0.6779, "step": 1731 }, { "epoch": 0.04, "grad_norm": 3.1664483547210693, "learning_rate": 1.9989157662807222e-05, "loss": 0.7867, "step": 1732 }, { "epoch": 0.04, "grad_norm": 3.852294921875, "learning_rate": 1.9989118981765678e-05, "loss": 0.5445, "step": 1733 }, { "epoch": 0.04, "grad_norm": 2.313201665878296, "learning_rate": 1.99890802318854e-05, "loss": 0.8775, "step": 1734 }, { "epoch": 0.04, "grad_norm": 1.8451313972473145, "learning_rate": 1.9989041413166656e-05, "loss": 0.7563, "step": 1735 }, { "epoch": 0.04, "grad_norm": 3.208122968673706, "learning_rate": 1.9989002525609714e-05, "loss": 0.8172, "step": 1736 }, { "epoch": 0.04, "grad_norm": 3.1248395442962646, "learning_rate": 1.9988963569214845e-05, "loss": 0.8408, "step": 1737 }, { "epoch": 0.04, "grad_norm": 4.264171600341797, "learning_rate": 1.9988924543982313e-05, "loss": 0.8891, "step": 1738 }, { "epoch": 0.04, "grad_norm": 4.63418436050415, "learning_rate": 1.998888544991239e-05, "loss": 0.9434, "step": 1739 }, { "epoch": 0.04, "grad_norm": 2.485240936279297, "learning_rate": 1.998884628700534e-05, "loss": 0.7905, "step": 1740 }, { "epoch": 0.04, "grad_norm": 2.6376795768737793, "learning_rate": 1.9988807055261442e-05, "loss": 0.7614, "step": 1741 }, { "epoch": 0.04, "grad_norm": 2.541971445083618, "learning_rate": 1.998876775468096e-05, "loss": 0.5495, "step": 1742 }, { "epoch": 0.04, "grad_norm": 2.858706474304199, "learning_rate": 1.9988728385264165e-05, "loss": 0.9168, "step": 1743 }, { "epoch": 0.04, "grad_norm": 2.6791563034057617, "learning_rate": 1.9988688947011327e-05, "loss": 0.64, "step": 1744 }, { "epoch": 0.04, "grad_norm": 2.3728556632995605, "learning_rate": 1.9988649439922724e-05, "loss": 0.7304, "step": 1745 }, { "epoch": 0.04, "grad_norm": 2.6125051975250244, "learning_rate": 1.9988609863998626e-05, "loss": 0.6714, "step": 1746 }, { "epoch": 0.04, "grad_norm": 4.48891019821167, "learning_rate": 1.99885702192393e-05, "loss": 0.9083, "step": 1747 }, { "epoch": 0.04, "grad_norm": 2.0246407985687256, "learning_rate": 1.9988530505645024e-05, "loss": 0.6806, "step": 1748 }, { "epoch": 0.04, "grad_norm": 2.0897083282470703, "learning_rate": 1.9988490723216073e-05, "loss": 0.8091, "step": 1749 }, { "epoch": 0.04, "grad_norm": 1.9887398481369019, "learning_rate": 1.9988450871952718e-05, "loss": 0.822, "step": 1750 }, { "epoch": 0.04, "grad_norm": 2.0697555541992188, "learning_rate": 1.9988410951855237e-05, "loss": 0.7371, "step": 1751 }, { "epoch": 0.04, "grad_norm": 2.340061902999878, "learning_rate": 1.99883709629239e-05, "loss": 0.9189, "step": 1752 }, { "epoch": 0.04, "grad_norm": 4.000455379486084, "learning_rate": 1.998833090515899e-05, "loss": 0.8919, "step": 1753 }, { "epoch": 0.04, "grad_norm": 2.554020404815674, "learning_rate": 1.9988290778560775e-05, "loss": 0.6481, "step": 1754 }, { "epoch": 0.04, "grad_norm": 1.790106177330017, "learning_rate": 1.998825058312954e-05, "loss": 0.6758, "step": 1755 }, { "epoch": 0.05, "grad_norm": 5.03372049331665, "learning_rate": 1.998821031886555e-05, "loss": 0.8604, "step": 1756 }, { "epoch": 0.05, "grad_norm": 2.0907394886016846, "learning_rate": 1.9988169985769095e-05, "loss": 0.7673, "step": 1757 }, { "epoch": 0.05, "grad_norm": 2.4858133792877197, "learning_rate": 1.9988129583840446e-05, "loss": 0.8464, "step": 1758 }, { "epoch": 0.05, "grad_norm": 1.7931182384490967, "learning_rate": 1.9988089113079884e-05, "loss": 0.6734, "step": 1759 }, { "epoch": 0.05, "grad_norm": 2.360917329788208, "learning_rate": 1.9988048573487682e-05, "loss": 0.7648, "step": 1760 }, { "epoch": 0.05, "grad_norm": 3.523656129837036, "learning_rate": 1.998800796506413e-05, "loss": 0.6935, "step": 1761 }, { "epoch": 0.05, "grad_norm": 2.6043717861175537, "learning_rate": 1.99879672878095e-05, "loss": 0.6932, "step": 1762 }, { "epoch": 0.05, "grad_norm": 2.490736484527588, "learning_rate": 1.9987926541724075e-05, "loss": 0.7433, "step": 1763 }, { "epoch": 0.05, "grad_norm": 1.9999123811721802, "learning_rate": 1.9987885726808134e-05, "loss": 0.7792, "step": 1764 }, { "epoch": 0.05, "grad_norm": 2.7758405208587646, "learning_rate": 1.9987844843061964e-05, "loss": 0.847, "step": 1765 }, { "epoch": 0.05, "grad_norm": 7.450160503387451, "learning_rate": 1.9987803890485836e-05, "loss": 0.8231, "step": 1766 }, { "epoch": 0.05, "grad_norm": 1.7236040830612183, "learning_rate": 1.9987762869080044e-05, "loss": 0.8538, "step": 1767 }, { "epoch": 0.05, "grad_norm": 4.263588905334473, "learning_rate": 1.9987721778844863e-05, "loss": 0.9493, "step": 1768 }, { "epoch": 0.05, "grad_norm": 3.480710506439209, "learning_rate": 1.9987680619780577e-05, "loss": 0.9637, "step": 1769 }, { "epoch": 0.05, "grad_norm": 2.456843852996826, "learning_rate": 1.9987639391887476e-05, "loss": 0.6616, "step": 1770 }, { "epoch": 0.05, "grad_norm": 5.047243595123291, "learning_rate": 1.9987598095165835e-05, "loss": 1.0294, "step": 1771 }, { "epoch": 0.05, "grad_norm": 3.053337812423706, "learning_rate": 1.9987556729615944e-05, "loss": 0.9074, "step": 1772 }, { "epoch": 0.05, "grad_norm": 2.1351945400238037, "learning_rate": 1.9987515295238086e-05, "loss": 0.6824, "step": 1773 }, { "epoch": 0.05, "grad_norm": 1.4139193296432495, "learning_rate": 1.9987473792032544e-05, "loss": 0.5655, "step": 1774 }, { "epoch": 0.05, "grad_norm": 2.242034912109375, "learning_rate": 1.9987432219999615e-05, "loss": 0.7071, "step": 1775 }, { "epoch": 0.05, "grad_norm": 1.9964338541030884, "learning_rate": 1.998739057913957e-05, "loss": 0.6405, "step": 1776 }, { "epoch": 0.05, "grad_norm": 1.7638087272644043, "learning_rate": 1.998734886945271e-05, "loss": 0.7128, "step": 1777 }, { "epoch": 0.05, "grad_norm": 2.8749594688415527, "learning_rate": 1.9987307090939314e-05, "loss": 0.7424, "step": 1778 }, { "epoch": 0.05, "grad_norm": 2.438624382019043, "learning_rate": 1.998726524359967e-05, "loss": 0.7938, "step": 1779 }, { "epoch": 0.05, "grad_norm": 2.439349412918091, "learning_rate": 1.998722332743407e-05, "loss": 0.8741, "step": 1780 }, { "epoch": 0.05, "grad_norm": 4.243280410766602, "learning_rate": 1.9987181342442802e-05, "loss": 0.7236, "step": 1781 }, { "epoch": 0.05, "grad_norm": 2.7017438411712646, "learning_rate": 1.9987139288626155e-05, "loss": 0.8034, "step": 1782 }, { "epoch": 0.05, "grad_norm": 1.7708207368850708, "learning_rate": 1.998709716598442e-05, "loss": 0.6532, "step": 1783 }, { "epoch": 0.05, "grad_norm": 1.7482879161834717, "learning_rate": 1.9987054974517882e-05, "loss": 0.7134, "step": 1784 }, { "epoch": 0.05, "grad_norm": 3.6123666763305664, "learning_rate": 1.9987012714226838e-05, "loss": 0.8441, "step": 1785 }, { "epoch": 0.05, "grad_norm": 1.8654803037643433, "learning_rate": 1.9986970385111578e-05, "loss": 0.83, "step": 1786 }, { "epoch": 0.05, "grad_norm": 1.5877472162246704, "learning_rate": 1.998692798717239e-05, "loss": 0.8081, "step": 1787 }, { "epoch": 0.05, "grad_norm": 2.3575973510742188, "learning_rate": 1.998688552040957e-05, "loss": 0.7313, "step": 1788 }, { "epoch": 0.05, "grad_norm": 3.7382869720458984, "learning_rate": 1.998684298482341e-05, "loss": 0.7694, "step": 1789 }, { "epoch": 0.05, "grad_norm": 2.694744348526001, "learning_rate": 1.9986800380414202e-05, "loss": 0.8405, "step": 1790 }, { "epoch": 0.05, "grad_norm": 2.07605242729187, "learning_rate": 1.998675770718224e-05, "loss": 0.8916, "step": 1791 }, { "epoch": 0.05, "grad_norm": 2.3395888805389404, "learning_rate": 1.9986714965127817e-05, "loss": 0.669, "step": 1792 }, { "epoch": 0.05, "grad_norm": 2.51584792137146, "learning_rate": 1.9986672154251228e-05, "loss": 0.8504, "step": 1793 }, { "epoch": 0.05, "grad_norm": 2.2492783069610596, "learning_rate": 1.9986629274552772e-05, "loss": 0.7007, "step": 1794 }, { "epoch": 0.05, "grad_norm": 2.233295202255249, "learning_rate": 1.998658632603274e-05, "loss": 0.8747, "step": 1795 }, { "epoch": 0.05, "grad_norm": 2.574273109436035, "learning_rate": 1.9986543308691427e-05, "loss": 0.8376, "step": 1796 }, { "epoch": 0.05, "grad_norm": 2.164931058883667, "learning_rate": 1.9986500222529134e-05, "loss": 0.7291, "step": 1797 }, { "epoch": 0.05, "grad_norm": 3.0133509635925293, "learning_rate": 1.9986457067546156e-05, "loss": 0.7136, "step": 1798 }, { "epoch": 0.05, "grad_norm": 1.9117523431777954, "learning_rate": 1.9986413843742788e-05, "loss": 0.5995, "step": 1799 }, { "epoch": 0.05, "grad_norm": 3.600735664367676, "learning_rate": 1.9986370551119334e-05, "loss": 0.7654, "step": 1800 }, { "epoch": 0.05, "grad_norm": 2.4303696155548096, "learning_rate": 1.9986327189676084e-05, "loss": 0.7769, "step": 1801 }, { "epoch": 0.05, "grad_norm": 4.505244255065918, "learning_rate": 1.9986283759413344e-05, "loss": 0.6983, "step": 1802 }, { "epoch": 0.05, "grad_norm": 1.5358986854553223, "learning_rate": 1.9986240260331407e-05, "loss": 0.6258, "step": 1803 }, { "epoch": 0.05, "grad_norm": 2.0188910961151123, "learning_rate": 1.9986196692430577e-05, "loss": 0.6036, "step": 1804 }, { "epoch": 0.05, "grad_norm": 3.4656965732574463, "learning_rate": 1.9986153055711156e-05, "loss": 0.9498, "step": 1805 }, { "epoch": 0.05, "grad_norm": 1.5377119779586792, "learning_rate": 1.998610935017344e-05, "loss": 0.6378, "step": 1806 }, { "epoch": 0.05, "grad_norm": 2.1923043727874756, "learning_rate": 1.998606557581773e-05, "loss": 0.8643, "step": 1807 }, { "epoch": 0.05, "grad_norm": 2.9895379543304443, "learning_rate": 1.9986021732644335e-05, "loss": 0.7312, "step": 1808 }, { "epoch": 0.05, "grad_norm": 2.0674829483032227, "learning_rate": 1.9985977820653548e-05, "loss": 0.6467, "step": 1809 }, { "epoch": 0.05, "grad_norm": 3.0738797187805176, "learning_rate": 1.9985933839845678e-05, "loss": 0.7565, "step": 1810 }, { "epoch": 0.05, "grad_norm": 2.1832659244537354, "learning_rate": 1.9985889790221026e-05, "loss": 0.739, "step": 1811 }, { "epoch": 0.05, "grad_norm": 2.4771499633789062, "learning_rate": 1.9985845671779893e-05, "loss": 0.5209, "step": 1812 }, { "epoch": 0.05, "grad_norm": 2.280380964279175, "learning_rate": 1.9985801484522585e-05, "loss": 0.7219, "step": 1813 }, { "epoch": 0.05, "grad_norm": 3.877877712249756, "learning_rate": 1.9985757228449407e-05, "loss": 1.0301, "step": 1814 }, { "epoch": 0.05, "grad_norm": 2.2674272060394287, "learning_rate": 1.9985712903560667e-05, "loss": 0.7731, "step": 1815 }, { "epoch": 0.05, "grad_norm": 3.0000803470611572, "learning_rate": 1.9985668509856663e-05, "loss": 0.6481, "step": 1816 }, { "epoch": 0.05, "grad_norm": 1.9008370637893677, "learning_rate": 1.998562404733771e-05, "loss": 0.6701, "step": 1817 }, { "epoch": 0.05, "grad_norm": 4.7959747314453125, "learning_rate": 1.998557951600411e-05, "loss": 0.8916, "step": 1818 }, { "epoch": 0.05, "grad_norm": 2.6343390941619873, "learning_rate": 1.9985534915856163e-05, "loss": 0.7247, "step": 1819 }, { "epoch": 0.05, "grad_norm": 3.2202494144439697, "learning_rate": 1.998549024689419e-05, "loss": 0.7306, "step": 1820 }, { "epoch": 0.05, "grad_norm": 2.9900028705596924, "learning_rate": 1.9985445509118487e-05, "loss": 0.7485, "step": 1821 }, { "epoch": 0.05, "grad_norm": 2.768064498901367, "learning_rate": 1.9985400702529367e-05, "loss": 0.6434, "step": 1822 }, { "epoch": 0.05, "grad_norm": 3.656059741973877, "learning_rate": 1.998535582712714e-05, "loss": 0.7979, "step": 1823 }, { "epoch": 0.05, "grad_norm": 2.607985258102417, "learning_rate": 1.998531088291212e-05, "loss": 0.7694, "step": 1824 }, { "epoch": 0.05, "grad_norm": 2.1297528743743896, "learning_rate": 1.99852658698846e-05, "loss": 0.8787, "step": 1825 }, { "epoch": 0.05, "grad_norm": 3.450529098510742, "learning_rate": 1.9985220788044906e-05, "loss": 0.6186, "step": 1826 }, { "epoch": 0.05, "grad_norm": 2.7331886291503906, "learning_rate": 1.9985175637393344e-05, "loss": 0.8198, "step": 1827 }, { "epoch": 0.05, "grad_norm": 2.8125452995300293, "learning_rate": 1.9985130417930225e-05, "loss": 0.8233, "step": 1828 }, { "epoch": 0.05, "grad_norm": 2.873102903366089, "learning_rate": 1.998508512965586e-05, "loss": 0.7722, "step": 1829 }, { "epoch": 0.05, "grad_norm": 2.5360827445983887, "learning_rate": 1.998503977257056e-05, "loss": 0.7195, "step": 1830 }, { "epoch": 0.05, "grad_norm": 1.9575769901275635, "learning_rate": 1.9984994346674644e-05, "loss": 0.8108, "step": 1831 }, { "epoch": 0.05, "grad_norm": 2.412566900253296, "learning_rate": 1.9984948851968415e-05, "loss": 0.7662, "step": 1832 }, { "epoch": 0.05, "grad_norm": 2.039363145828247, "learning_rate": 1.9984903288452192e-05, "loss": 0.6575, "step": 1833 }, { "epoch": 0.05, "grad_norm": 3.3868050575256348, "learning_rate": 1.998485765612629e-05, "loss": 0.8062, "step": 1834 }, { "epoch": 0.05, "grad_norm": 2.457078456878662, "learning_rate": 1.9984811954991022e-05, "loss": 0.5511, "step": 1835 }, { "epoch": 0.05, "grad_norm": 2.069772243499756, "learning_rate": 1.9984766185046703e-05, "loss": 0.7128, "step": 1836 }, { "epoch": 0.05, "grad_norm": 1.6534876823425293, "learning_rate": 1.9984720346293652e-05, "loss": 0.657, "step": 1837 }, { "epoch": 0.05, "grad_norm": 3.327099084854126, "learning_rate": 1.9984674438732178e-05, "loss": 0.6834, "step": 1838 }, { "epoch": 0.05, "grad_norm": 1.9938931465148926, "learning_rate": 1.9984628462362602e-05, "loss": 0.7292, "step": 1839 }, { "epoch": 0.05, "grad_norm": 2.189180612564087, "learning_rate": 1.998458241718524e-05, "loss": 0.8023, "step": 1840 }, { "epoch": 0.05, "grad_norm": 2.530225992202759, "learning_rate": 1.9984536303200404e-05, "loss": 0.7558, "step": 1841 }, { "epoch": 0.05, "grad_norm": 2.646328926086426, "learning_rate": 1.998449012040842e-05, "loss": 0.8714, "step": 1842 }, { "epoch": 0.05, "grad_norm": 2.359081268310547, "learning_rate": 1.9984443868809604e-05, "loss": 0.9748, "step": 1843 }, { "epoch": 0.05, "grad_norm": 1.5852245092391968, "learning_rate": 1.998439754840427e-05, "loss": 0.5712, "step": 1844 }, { "epoch": 0.05, "grad_norm": 2.3616440296173096, "learning_rate": 1.9984351159192745e-05, "loss": 0.8833, "step": 1845 }, { "epoch": 0.05, "grad_norm": 3.7948670387268066, "learning_rate": 1.9984304701175346e-05, "loss": 0.7611, "step": 1846 }, { "epoch": 0.05, "grad_norm": 2.1118898391723633, "learning_rate": 1.9984258174352387e-05, "loss": 0.6967, "step": 1847 }, { "epoch": 0.05, "grad_norm": 3.1566662788391113, "learning_rate": 1.9984211578724197e-05, "loss": 0.8314, "step": 1848 }, { "epoch": 0.05, "grad_norm": 2.258423089981079, "learning_rate": 1.9984164914291093e-05, "loss": 0.8332, "step": 1849 }, { "epoch": 0.05, "grad_norm": 3.705446481704712, "learning_rate": 1.9984118181053397e-05, "loss": 0.796, "step": 1850 }, { "epoch": 0.05, "grad_norm": 1.5965059995651245, "learning_rate": 1.998407137901143e-05, "loss": 0.7168, "step": 1851 }, { "epoch": 0.05, "grad_norm": 1.7867130041122437, "learning_rate": 1.9984024508165514e-05, "loss": 0.7622, "step": 1852 }, { "epoch": 0.05, "grad_norm": 3.1343705654144287, "learning_rate": 1.9983977568515976e-05, "loss": 0.876, "step": 1853 }, { "epoch": 0.05, "grad_norm": 2.6695847511291504, "learning_rate": 1.9983930560063137e-05, "loss": 0.75, "step": 1854 }, { "epoch": 0.05, "grad_norm": 2.5167341232299805, "learning_rate": 1.998388348280732e-05, "loss": 0.6537, "step": 1855 }, { "epoch": 0.05, "grad_norm": 2.6112005710601807, "learning_rate": 1.9983836336748853e-05, "loss": 0.8724, "step": 1856 }, { "epoch": 0.05, "grad_norm": 2.9949285984039307, "learning_rate": 1.9983789121888055e-05, "loss": 0.8112, "step": 1857 }, { "epoch": 0.05, "grad_norm": 2.5081450939178467, "learning_rate": 1.9983741838225257e-05, "loss": 0.701, "step": 1858 }, { "epoch": 0.05, "grad_norm": 2.1309285163879395, "learning_rate": 1.998369448576078e-05, "loss": 0.7557, "step": 1859 }, { "epoch": 0.05, "grad_norm": 1.8552340269088745, "learning_rate": 1.9983647064494957e-05, "loss": 0.7319, "step": 1860 }, { "epoch": 0.05, "grad_norm": 2.961531400680542, "learning_rate": 1.998359957442811e-05, "loss": 0.7695, "step": 1861 }, { "epoch": 0.05, "grad_norm": 2.358152151107788, "learning_rate": 1.9983552015560566e-05, "loss": 0.8044, "step": 1862 }, { "epoch": 0.05, "grad_norm": 2.53491473197937, "learning_rate": 1.9983504387892653e-05, "loss": 0.706, "step": 1863 }, { "epoch": 0.05, "grad_norm": 2.808757781982422, "learning_rate": 1.99834566914247e-05, "loss": 0.9226, "step": 1864 }, { "epoch": 0.05, "grad_norm": 2.8477346897125244, "learning_rate": 1.9983408926157037e-05, "loss": 0.7596, "step": 1865 }, { "epoch": 0.05, "grad_norm": 2.1762709617614746, "learning_rate": 1.998336109208999e-05, "loss": 0.6998, "step": 1866 }, { "epoch": 0.05, "grad_norm": 2.1924030780792236, "learning_rate": 1.9983313189223895e-05, "loss": 0.9155, "step": 1867 }, { "epoch": 0.05, "grad_norm": 3.1854166984558105, "learning_rate": 1.998326521755907e-05, "loss": 0.8829, "step": 1868 }, { "epoch": 0.05, "grad_norm": 2.606074094772339, "learning_rate": 1.9983217177095858e-05, "loss": 0.6079, "step": 1869 }, { "epoch": 0.05, "grad_norm": 3.103337287902832, "learning_rate": 1.9983169067834587e-05, "loss": 0.6116, "step": 1870 }, { "epoch": 0.05, "grad_norm": 3.2200591564178467, "learning_rate": 1.9983120889775588e-05, "loss": 0.9791, "step": 1871 }, { "epoch": 0.05, "grad_norm": 2.2270514965057373, "learning_rate": 1.9983072642919185e-05, "loss": 0.8233, "step": 1872 }, { "epoch": 0.05, "grad_norm": 2.8137710094451904, "learning_rate": 1.998302432726572e-05, "loss": 0.7086, "step": 1873 }, { "epoch": 0.05, "grad_norm": 2.2094149589538574, "learning_rate": 1.9982975942815524e-05, "loss": 0.7981, "step": 1874 }, { "epoch": 0.05, "grad_norm": 2.0748648643493652, "learning_rate": 1.998292748956893e-05, "loss": 0.7795, "step": 1875 }, { "epoch": 0.05, "grad_norm": 2.636291027069092, "learning_rate": 1.9982878967526272e-05, "loss": 0.7302, "step": 1876 }, { "epoch": 0.05, "grad_norm": 2.1924400329589844, "learning_rate": 1.9982830376687884e-05, "loss": 0.798, "step": 1877 }, { "epoch": 0.05, "grad_norm": 2.1428985595703125, "learning_rate": 1.9982781717054104e-05, "loss": 0.6371, "step": 1878 }, { "epoch": 0.05, "grad_norm": 2.767662286758423, "learning_rate": 1.998273298862526e-05, "loss": 0.6785, "step": 1879 }, { "epoch": 0.05, "grad_norm": 1.5667916536331177, "learning_rate": 1.9982684191401694e-05, "loss": 0.6739, "step": 1880 }, { "epoch": 0.05, "grad_norm": 4.932434558868408, "learning_rate": 1.9982635325383736e-05, "loss": 0.796, "step": 1881 }, { "epoch": 0.05, "grad_norm": 3.4799745082855225, "learning_rate": 1.998258639057173e-05, "loss": 0.8789, "step": 1882 }, { "epoch": 0.05, "grad_norm": 2.658721923828125, "learning_rate": 1.998253738696601e-05, "loss": 0.8431, "step": 1883 }, { "epoch": 0.05, "grad_norm": 1.9984469413757324, "learning_rate": 1.9982488314566916e-05, "loss": 0.7876, "step": 1884 }, { "epoch": 0.05, "grad_norm": 2.183602809906006, "learning_rate": 1.998243917337478e-05, "loss": 0.7616, "step": 1885 }, { "epoch": 0.05, "grad_norm": 2.638331890106201, "learning_rate": 1.998238996338995e-05, "loss": 0.742, "step": 1886 }, { "epoch": 0.05, "grad_norm": 1.6623473167419434, "learning_rate": 1.9982340684612756e-05, "loss": 0.6573, "step": 1887 }, { "epoch": 0.05, "grad_norm": 4.460470199584961, "learning_rate": 1.9982291337043543e-05, "loss": 0.9893, "step": 1888 }, { "epoch": 0.05, "grad_norm": 2.324395179748535, "learning_rate": 1.998224192068265e-05, "loss": 0.8302, "step": 1889 }, { "epoch": 0.05, "grad_norm": 4.762779712677002, "learning_rate": 1.9982192435530418e-05, "loss": 0.7368, "step": 1890 }, { "epoch": 0.05, "grad_norm": 1.826295256614685, "learning_rate": 1.9982142881587183e-05, "loss": 0.8068, "step": 1891 }, { "epoch": 0.05, "grad_norm": 2.8557748794555664, "learning_rate": 1.9982093258853293e-05, "loss": 0.7272, "step": 1892 }, { "epoch": 0.05, "grad_norm": 2.4360084533691406, "learning_rate": 1.998204356732909e-05, "loss": 0.6506, "step": 1893 }, { "epoch": 0.05, "grad_norm": 2.7002243995666504, "learning_rate": 1.998199380701491e-05, "loss": 0.7614, "step": 1894 }, { "epoch": 0.05, "grad_norm": 1.9162954092025757, "learning_rate": 1.99819439779111e-05, "loss": 0.6812, "step": 1895 }, { "epoch": 0.05, "grad_norm": 2.7377169132232666, "learning_rate": 1.998189408001801e-05, "loss": 0.6938, "step": 1896 }, { "epoch": 0.05, "grad_norm": 1.96051824092865, "learning_rate": 1.9981844113335972e-05, "loss": 0.7355, "step": 1897 }, { "epoch": 0.05, "grad_norm": 2.895961284637451, "learning_rate": 1.9981794077865336e-05, "loss": 0.7301, "step": 1898 }, { "epoch": 0.05, "grad_norm": 2.19978404045105, "learning_rate": 1.9981743973606446e-05, "loss": 0.6999, "step": 1899 }, { "epoch": 0.05, "grad_norm": 2.801776647567749, "learning_rate": 1.998169380055965e-05, "loss": 0.7236, "step": 1900 }, { "epoch": 0.05, "grad_norm": 1.9988243579864502, "learning_rate": 1.9981643558725286e-05, "loss": 0.7198, "step": 1901 }, { "epoch": 0.05, "grad_norm": 2.454315185546875, "learning_rate": 1.998159324810371e-05, "loss": 0.8335, "step": 1902 }, { "epoch": 0.05, "grad_norm": 3.293574571609497, "learning_rate": 1.9981542868695266e-05, "loss": 0.9013, "step": 1903 }, { "epoch": 0.05, "grad_norm": 2.44256329536438, "learning_rate": 1.9981492420500298e-05, "loss": 0.7386, "step": 1904 }, { "epoch": 0.05, "grad_norm": 3.0672268867492676, "learning_rate": 1.9981441903519152e-05, "loss": 0.7897, "step": 1905 }, { "epoch": 0.05, "grad_norm": 2.424527645111084, "learning_rate": 1.9981391317752183e-05, "loss": 0.6906, "step": 1906 }, { "epoch": 0.05, "grad_norm": 2.3945653438568115, "learning_rate": 1.9981340663199734e-05, "loss": 0.6642, "step": 1907 }, { "epoch": 0.05, "grad_norm": 2.873654842376709, "learning_rate": 1.9981289939862156e-05, "loss": 0.8831, "step": 1908 }, { "epoch": 0.05, "grad_norm": 6.637257099151611, "learning_rate": 1.99812391477398e-05, "loss": 1.023, "step": 1909 }, { "epoch": 0.05, "grad_norm": 2.9962236881256104, "learning_rate": 1.9981188286833015e-05, "loss": 0.7898, "step": 1910 }, { "epoch": 0.05, "grad_norm": 2.0664846897125244, "learning_rate": 1.998113735714215e-05, "loss": 0.6431, "step": 1911 }, { "epoch": 0.05, "grad_norm": 2.0879218578338623, "learning_rate": 1.9981086358667556e-05, "loss": 0.7696, "step": 1912 }, { "epoch": 0.05, "grad_norm": 6.690244674682617, "learning_rate": 1.9981035291409586e-05, "loss": 0.8503, "step": 1913 }, { "epoch": 0.05, "grad_norm": 2.0572521686553955, "learning_rate": 1.9980984155368594e-05, "loss": 0.7971, "step": 1914 }, { "epoch": 0.05, "grad_norm": 2.619384527206421, "learning_rate": 1.998093295054493e-05, "loss": 0.6712, "step": 1915 }, { "epoch": 0.05, "grad_norm": 1.8262571096420288, "learning_rate": 1.9980881676938945e-05, "loss": 0.7328, "step": 1916 }, { "epoch": 0.05, "grad_norm": 2.5385684967041016, "learning_rate": 1.9980830334550993e-05, "loss": 0.871, "step": 1917 }, { "epoch": 0.05, "grad_norm": 3.319852590560913, "learning_rate": 1.998077892338143e-05, "loss": 0.7799, "step": 1918 }, { "epoch": 0.05, "grad_norm": 1.9595664739608765, "learning_rate": 1.998072744343061e-05, "loss": 0.7626, "step": 1919 }, { "epoch": 0.05, "grad_norm": 2.608848810195923, "learning_rate": 1.9980675894698884e-05, "loss": 0.7784, "step": 1920 }, { "epoch": 0.05, "grad_norm": 3.0634829998016357, "learning_rate": 1.9980624277186615e-05, "loss": 0.6986, "step": 1921 }, { "epoch": 0.05, "grad_norm": 1.8425602912902832, "learning_rate": 1.998057259089415e-05, "loss": 0.7656, "step": 1922 }, { "epoch": 0.05, "grad_norm": 2.3669023513793945, "learning_rate": 1.998052083582185e-05, "loss": 0.8636, "step": 1923 }, { "epoch": 0.05, "grad_norm": 1.757086157798767, "learning_rate": 1.998046901197007e-05, "loss": 0.7814, "step": 1924 }, { "epoch": 0.05, "grad_norm": 1.8306183815002441, "learning_rate": 1.998041711933917e-05, "loss": 0.6147, "step": 1925 }, { "epoch": 0.05, "grad_norm": 2.6324374675750732, "learning_rate": 1.9980365157929507e-05, "loss": 0.7457, "step": 1926 }, { "epoch": 0.05, "grad_norm": 3.0523617267608643, "learning_rate": 1.9980313127741433e-05, "loss": 0.9538, "step": 1927 }, { "epoch": 0.05, "grad_norm": 2.610732316970825, "learning_rate": 1.9980261028775314e-05, "loss": 0.602, "step": 1928 }, { "epoch": 0.05, "grad_norm": 3.877683162689209, "learning_rate": 1.9980208861031506e-05, "loss": 0.5685, "step": 1929 }, { "epoch": 0.05, "grad_norm": 2.0738883018493652, "learning_rate": 1.9980156624510367e-05, "loss": 0.7027, "step": 1930 }, { "epoch": 0.05, "grad_norm": 3.1363728046417236, "learning_rate": 1.998010431921226e-05, "loss": 0.8122, "step": 1931 }, { "epoch": 0.05, "grad_norm": 2.015998601913452, "learning_rate": 1.9980051945137548e-05, "loss": 0.7311, "step": 1932 }, { "epoch": 0.05, "grad_norm": 3.3179502487182617, "learning_rate": 1.9979999502286583e-05, "loss": 0.7122, "step": 1933 }, { "epoch": 0.05, "grad_norm": 3.219709634780884, "learning_rate": 1.997994699065973e-05, "loss": 0.7197, "step": 1934 }, { "epoch": 0.05, "grad_norm": 1.5561671257019043, "learning_rate": 1.9979894410257357e-05, "loss": 0.7122, "step": 1935 }, { "epoch": 0.05, "grad_norm": 2.9557297229766846, "learning_rate": 1.997984176107982e-05, "loss": 0.8002, "step": 1936 }, { "epoch": 0.05, "grad_norm": 2.599368095397949, "learning_rate": 1.9979789043127483e-05, "loss": 0.7301, "step": 1937 }, { "epoch": 0.05, "grad_norm": 3.5339059829711914, "learning_rate": 1.997973625640071e-05, "loss": 0.6847, "step": 1938 }, { "epoch": 0.05, "grad_norm": 2.6740505695343018, "learning_rate": 1.9979683400899862e-05, "loss": 0.7311, "step": 1939 }, { "epoch": 0.05, "grad_norm": 2.3325765132904053, "learning_rate": 1.997963047662531e-05, "loss": 0.9218, "step": 1940 }, { "epoch": 0.05, "grad_norm": 6.582281589508057, "learning_rate": 1.9979577483577413e-05, "loss": 0.9814, "step": 1941 }, { "epoch": 0.05, "grad_norm": 2.0360970497131348, "learning_rate": 1.997952442175654e-05, "loss": 0.7846, "step": 1942 }, { "epoch": 0.05, "grad_norm": 2.236194610595703, "learning_rate": 1.997947129116305e-05, "loss": 0.6851, "step": 1943 }, { "epoch": 0.05, "grad_norm": 3.1435070037841797, "learning_rate": 1.9979418091797317e-05, "loss": 0.7163, "step": 1944 }, { "epoch": 0.05, "grad_norm": 2.0420570373535156, "learning_rate": 1.9979364823659702e-05, "loss": 0.6754, "step": 1945 }, { "epoch": 0.05, "grad_norm": 2.283759832382202, "learning_rate": 1.9979311486750577e-05, "loss": 0.6942, "step": 1946 }, { "epoch": 0.05, "grad_norm": 2.6079835891723633, "learning_rate": 1.99792580810703e-05, "loss": 0.7435, "step": 1947 }, { "epoch": 0.05, "grad_norm": 2.257375955581665, "learning_rate": 1.9979204606619255e-05, "loss": 0.7581, "step": 1948 }, { "epoch": 0.05, "grad_norm": 2.3352317810058594, "learning_rate": 1.9979151063397796e-05, "loss": 0.6259, "step": 1949 }, { "epoch": 0.05, "grad_norm": 2.1744134426116943, "learning_rate": 1.9979097451406302e-05, "loss": 0.6379, "step": 1950 }, { "epoch": 0.05, "grad_norm": 1.9327045679092407, "learning_rate": 1.997904377064513e-05, "loss": 0.5994, "step": 1951 }, { "epoch": 0.05, "grad_norm": 2.597590684890747, "learning_rate": 1.9978990021114667e-05, "loss": 0.7095, "step": 1952 }, { "epoch": 0.05, "grad_norm": 2.427577018737793, "learning_rate": 1.9978936202815268e-05, "loss": 0.7618, "step": 1953 }, { "epoch": 0.05, "grad_norm": 2.9074771404266357, "learning_rate": 1.9978882315747313e-05, "loss": 0.6696, "step": 1954 }, { "epoch": 0.05, "grad_norm": 1.7623041868209839, "learning_rate": 1.997882835991117e-05, "loss": 0.7025, "step": 1955 }, { "epoch": 0.05, "grad_norm": 2.0161514282226562, "learning_rate": 1.997877433530721e-05, "loss": 0.6774, "step": 1956 }, { "epoch": 0.05, "grad_norm": 2.248384952545166, "learning_rate": 1.997872024193581e-05, "loss": 0.7965, "step": 1957 }, { "epoch": 0.05, "grad_norm": 1.7760809659957886, "learning_rate": 1.9978666079797333e-05, "loss": 0.6923, "step": 1958 }, { "epoch": 0.05, "grad_norm": 2.5560805797576904, "learning_rate": 1.9978611848892163e-05, "loss": 0.5724, "step": 1959 }, { "epoch": 0.05, "grad_norm": 3.199814796447754, "learning_rate": 1.9978557549220667e-05, "loss": 0.7701, "step": 1960 }, { "epoch": 0.05, "grad_norm": 2.84617280960083, "learning_rate": 1.9978503180783225e-05, "loss": 0.5897, "step": 1961 }, { "epoch": 0.05, "grad_norm": 2.0078322887420654, "learning_rate": 1.9978448743580205e-05, "loss": 0.7681, "step": 1962 }, { "epoch": 0.05, "grad_norm": 1.940298080444336, "learning_rate": 1.9978394237611988e-05, "loss": 0.6843, "step": 1963 }, { "epoch": 0.05, "grad_norm": 2.412320852279663, "learning_rate": 1.9978339662878943e-05, "loss": 0.8428, "step": 1964 }, { "epoch": 0.05, "grad_norm": 1.8678613901138306, "learning_rate": 1.9978285019381453e-05, "loss": 0.7077, "step": 1965 }, { "epoch": 0.05, "grad_norm": 2.015195608139038, "learning_rate": 1.997823030711989e-05, "loss": 0.7157, "step": 1966 }, { "epoch": 0.05, "grad_norm": 2.995091199874878, "learning_rate": 1.9978175526094633e-05, "loss": 0.6229, "step": 1967 }, { "epoch": 0.05, "grad_norm": 2.9318971633911133, "learning_rate": 1.997812067630606e-05, "loss": 0.8274, "step": 1968 }, { "epoch": 0.05, "grad_norm": 2.2479143142700195, "learning_rate": 1.997806575775455e-05, "loss": 0.7865, "step": 1969 }, { "epoch": 0.05, "grad_norm": 5.605006217956543, "learning_rate": 1.9978010770440476e-05, "loss": 0.6001, "step": 1970 }, { "epoch": 0.05, "grad_norm": 2.4155211448669434, "learning_rate": 1.9977955714364224e-05, "loss": 0.8158, "step": 1971 }, { "epoch": 0.05, "grad_norm": 2.517526149749756, "learning_rate": 1.9977900589526167e-05, "loss": 0.8163, "step": 1972 }, { "epoch": 0.05, "grad_norm": 2.1490530967712402, "learning_rate": 1.997784539592669e-05, "loss": 0.7085, "step": 1973 }, { "epoch": 0.05, "grad_norm": 3.1670777797698975, "learning_rate": 1.997779013356617e-05, "loss": 0.9204, "step": 1974 }, { "epoch": 0.05, "grad_norm": 2.772595167160034, "learning_rate": 1.997773480244499e-05, "loss": 0.671, "step": 1975 }, { "epoch": 0.05, "grad_norm": 4.430738925933838, "learning_rate": 1.997767940256353e-05, "loss": 0.925, "step": 1976 }, { "epoch": 0.05, "grad_norm": 2.7219066619873047, "learning_rate": 1.997762393392217e-05, "loss": 0.7511, "step": 1977 }, { "epoch": 0.05, "grad_norm": 2.882463216781616, "learning_rate": 1.9977568396521295e-05, "loss": 0.7413, "step": 1978 }, { "epoch": 0.05, "grad_norm": 3.9446861743927, "learning_rate": 1.9977512790361292e-05, "loss": 0.7804, "step": 1979 }, { "epoch": 0.05, "grad_norm": 3.36254620552063, "learning_rate": 1.9977457115442532e-05, "loss": 0.7897, "step": 1980 }, { "epoch": 0.05, "grad_norm": 2.086482286453247, "learning_rate": 1.9977401371765412e-05, "loss": 0.6964, "step": 1981 }, { "epoch": 0.05, "grad_norm": 2.374141216278076, "learning_rate": 1.9977345559330308e-05, "loss": 0.7813, "step": 1982 }, { "epoch": 0.05, "grad_norm": 2.2510879039764404, "learning_rate": 1.9977289678137606e-05, "loss": 0.8197, "step": 1983 }, { "epoch": 0.05, "grad_norm": 2.327105760574341, "learning_rate": 1.9977233728187692e-05, "loss": 0.5993, "step": 1984 }, { "epoch": 0.05, "grad_norm": 3.131993293762207, "learning_rate": 1.9977177709480952e-05, "loss": 0.7338, "step": 1985 }, { "epoch": 0.05, "grad_norm": 2.826542377471924, "learning_rate": 1.9977121622017774e-05, "loss": 0.8069, "step": 1986 }, { "epoch": 0.05, "grad_norm": 1.9384353160858154, "learning_rate": 1.997706546579854e-05, "loss": 0.6743, "step": 1987 }, { "epoch": 0.05, "grad_norm": 2.513165235519409, "learning_rate": 1.9977009240823637e-05, "loss": 0.6707, "step": 1988 }, { "epoch": 0.05, "grad_norm": 3.0457913875579834, "learning_rate": 1.9976952947093455e-05, "loss": 0.7819, "step": 1989 }, { "epoch": 0.05, "grad_norm": 3.22605562210083, "learning_rate": 1.9976896584608382e-05, "loss": 0.708, "step": 1990 }, { "epoch": 0.05, "grad_norm": 3.8047382831573486, "learning_rate": 1.9976840153368808e-05, "loss": 0.8105, "step": 1991 }, { "epoch": 0.05, "grad_norm": 2.365035057067871, "learning_rate": 1.997678365337512e-05, "loss": 0.8823, "step": 1992 }, { "epoch": 0.05, "grad_norm": 2.173185348510742, "learning_rate": 1.9976727084627704e-05, "loss": 0.774, "step": 1993 }, { "epoch": 0.05, "grad_norm": 5.911550998687744, "learning_rate": 1.9976670447126954e-05, "loss": 0.7405, "step": 1994 }, { "epoch": 0.05, "grad_norm": 2.065998077392578, "learning_rate": 1.9976613740873258e-05, "loss": 0.6725, "step": 1995 }, { "epoch": 0.05, "grad_norm": 2.715533971786499, "learning_rate": 1.997655696586701e-05, "loss": 0.6892, "step": 1996 }, { "epoch": 0.05, "grad_norm": 2.0544848442077637, "learning_rate": 1.9976500122108596e-05, "loss": 0.7585, "step": 1997 }, { "epoch": 0.05, "grad_norm": 4.488911151885986, "learning_rate": 1.9976443209598416e-05, "loss": 0.8844, "step": 1998 }, { "epoch": 0.05, "grad_norm": 1.8391250371932983, "learning_rate": 1.9976386228336856e-05, "loss": 0.6531, "step": 1999 }, { "epoch": 0.05, "grad_norm": 2.258517026901245, "learning_rate": 1.9976329178324308e-05, "loss": 0.8043, "step": 2000 }, { "epoch": 0.05, "grad_norm": 3.014747381210327, "learning_rate": 1.997627205956117e-05, "loss": 0.6155, "step": 2001 }, { "epoch": 0.05, "grad_norm": 2.512629747390747, "learning_rate": 1.9976214872047828e-05, "loss": 0.6126, "step": 2002 }, { "epoch": 0.05, "grad_norm": 4.784884929656982, "learning_rate": 1.997615761578468e-05, "loss": 0.7769, "step": 2003 }, { "epoch": 0.05, "grad_norm": 2.683345317840576, "learning_rate": 1.9976100290772126e-05, "loss": 0.7018, "step": 2004 }, { "epoch": 0.05, "grad_norm": 4.546337127685547, "learning_rate": 1.9976042897010556e-05, "loss": 0.7161, "step": 2005 }, { "epoch": 0.05, "grad_norm": 1.510151982307434, "learning_rate": 1.9975985434500365e-05, "loss": 0.7427, "step": 2006 }, { "epoch": 0.05, "grad_norm": 2.6156368255615234, "learning_rate": 1.9975927903241952e-05, "loss": 0.7189, "step": 2007 }, { "epoch": 0.05, "grad_norm": 2.1224188804626465, "learning_rate": 1.9975870303235706e-05, "loss": 0.6954, "step": 2008 }, { "epoch": 0.05, "grad_norm": 4.855563640594482, "learning_rate": 1.9975812634482032e-05, "loss": 0.7197, "step": 2009 }, { "epoch": 0.05, "grad_norm": 3.8908514976501465, "learning_rate": 1.997575489698133e-05, "loss": 0.811, "step": 2010 }, { "epoch": 0.05, "grad_norm": 5.087156295776367, "learning_rate": 1.9975697090733984e-05, "loss": 0.9198, "step": 2011 }, { "epoch": 0.05, "grad_norm": 5.811931133270264, "learning_rate": 1.9975639215740405e-05, "loss": 0.9581, "step": 2012 }, { "epoch": 0.05, "grad_norm": 4.288698673248291, "learning_rate": 1.997558127200099e-05, "loss": 0.7162, "step": 2013 }, { "epoch": 0.05, "grad_norm": 2.3925182819366455, "learning_rate": 1.997552325951613e-05, "loss": 0.7943, "step": 2014 }, { "epoch": 0.05, "grad_norm": 5.334035873413086, "learning_rate": 1.997546517828624e-05, "loss": 0.8192, "step": 2015 }, { "epoch": 0.05, "grad_norm": 2.8675248622894287, "learning_rate": 1.99754070283117e-05, "loss": 0.835, "step": 2016 }, { "epoch": 0.05, "grad_norm": 3.4370462894439697, "learning_rate": 1.9975348809592932e-05, "loss": 0.8639, "step": 2017 }, { "epoch": 0.05, "grad_norm": 3.02303147315979, "learning_rate": 1.997529052213032e-05, "loss": 0.8041, "step": 2018 }, { "epoch": 0.05, "grad_norm": 1.914177417755127, "learning_rate": 1.997523216592428e-05, "loss": 0.8915, "step": 2019 }, { "epoch": 0.05, "grad_norm": 2.7155418395996094, "learning_rate": 1.99751737409752e-05, "loss": 0.7749, "step": 2020 }, { "epoch": 0.05, "grad_norm": 4.087367534637451, "learning_rate": 1.9975115247283492e-05, "loss": 0.778, "step": 2021 }, { "epoch": 0.05, "grad_norm": 3.8763325214385986, "learning_rate": 1.9975056684849558e-05, "loss": 0.8979, "step": 2022 }, { "epoch": 0.05, "grad_norm": 3.201260566711426, "learning_rate": 1.99749980536738e-05, "loss": 0.7051, "step": 2023 }, { "epoch": 0.05, "grad_norm": 1.936992883682251, "learning_rate": 1.9974939353756623e-05, "loss": 0.8645, "step": 2024 }, { "epoch": 0.05, "grad_norm": 4.983245372772217, "learning_rate": 1.997488058509843e-05, "loss": 0.913, "step": 2025 }, { "epoch": 0.05, "grad_norm": 4.484483242034912, "learning_rate": 1.9974821747699623e-05, "loss": 0.704, "step": 2026 }, { "epoch": 0.05, "grad_norm": 5.998348236083984, "learning_rate": 1.9974762841560616e-05, "loss": 0.7474, "step": 2027 }, { "epoch": 0.05, "grad_norm": 6.056666851043701, "learning_rate": 1.997470386668181e-05, "loss": 0.7379, "step": 2028 }, { "epoch": 0.05, "grad_norm": 2.3744843006134033, "learning_rate": 1.9974644823063613e-05, "loss": 0.7552, "step": 2029 }, { "epoch": 0.05, "grad_norm": 1.9507763385772705, "learning_rate": 1.9974585710706426e-05, "loss": 0.6568, "step": 2030 }, { "epoch": 0.05, "grad_norm": 3.747042179107666, "learning_rate": 1.997452652961066e-05, "loss": 0.8735, "step": 2031 }, { "epoch": 0.05, "grad_norm": 2.5173487663269043, "learning_rate": 1.997446727977673e-05, "loss": 0.6597, "step": 2032 }, { "epoch": 0.05, "grad_norm": 1.6172277927398682, "learning_rate": 1.9974407961205035e-05, "loss": 0.6594, "step": 2033 }, { "epoch": 0.05, "grad_norm": 2.523944139480591, "learning_rate": 1.9974348573895987e-05, "loss": 0.7206, "step": 2034 }, { "epoch": 0.05, "grad_norm": 1.8002293109893799, "learning_rate": 1.9974289117849994e-05, "loss": 0.7985, "step": 2035 }, { "epoch": 0.05, "grad_norm": 4.298697471618652, "learning_rate": 1.997422959306747e-05, "loss": 0.7306, "step": 2036 }, { "epoch": 0.05, "grad_norm": 2.1637418270111084, "learning_rate": 1.9974169999548817e-05, "loss": 0.8323, "step": 2037 }, { "epoch": 0.05, "grad_norm": 2.3529279232025146, "learning_rate": 1.9974110337294455e-05, "loss": 0.6868, "step": 2038 }, { "epoch": 0.05, "grad_norm": 2.405493974685669, "learning_rate": 1.997405060630479e-05, "loss": 0.6709, "step": 2039 }, { "epoch": 0.05, "grad_norm": 3.9416987895965576, "learning_rate": 1.997399080658023e-05, "loss": 0.7154, "step": 2040 }, { "epoch": 0.05, "grad_norm": 2.6185264587402344, "learning_rate": 1.9973930938121197e-05, "loss": 0.7064, "step": 2041 }, { "epoch": 0.05, "grad_norm": 5.089044570922852, "learning_rate": 1.99738710009281e-05, "loss": 0.8431, "step": 2042 }, { "epoch": 0.05, "grad_norm": 4.939505100250244, "learning_rate": 1.9973810995001345e-05, "loss": 0.6776, "step": 2043 }, { "epoch": 0.05, "grad_norm": 3.992128610610962, "learning_rate": 1.9973750920341353e-05, "loss": 0.8212, "step": 2044 }, { "epoch": 0.05, "grad_norm": 2.0228240489959717, "learning_rate": 1.9973690776948537e-05, "loss": 0.6641, "step": 2045 }, { "epoch": 0.05, "grad_norm": 2.443035840988159, "learning_rate": 1.997363056482331e-05, "loss": 0.6572, "step": 2046 }, { "epoch": 0.05, "grad_norm": 2.9974253177642822, "learning_rate": 1.9973570283966084e-05, "loss": 0.7238, "step": 2047 }, { "epoch": 0.05, "grad_norm": 2.030402183532715, "learning_rate": 1.9973509934377283e-05, "loss": 0.665, "step": 2048 }, { "epoch": 0.05, "grad_norm": 2.159552812576294, "learning_rate": 1.997344951605731e-05, "loss": 0.8599, "step": 2049 }, { "epoch": 0.05, "grad_norm": 6.204172134399414, "learning_rate": 1.9973389029006597e-05, "loss": 0.7954, "step": 2050 }, { "epoch": 0.05, "grad_norm": 3.8589627742767334, "learning_rate": 1.9973328473225545e-05, "loss": 0.698, "step": 2051 }, { "epoch": 0.05, "grad_norm": 6.85826301574707, "learning_rate": 1.9973267848714586e-05, "loss": 0.6931, "step": 2052 }, { "epoch": 0.05, "grad_norm": 1.6653510332107544, "learning_rate": 1.9973207155474127e-05, "loss": 0.7518, "step": 2053 }, { "epoch": 0.05, "grad_norm": 3.744321823120117, "learning_rate": 1.997314639350459e-05, "loss": 0.93, "step": 2054 }, { "epoch": 0.05, "grad_norm": 2.087768077850342, "learning_rate": 1.9973085562806396e-05, "loss": 0.7241, "step": 2055 }, { "epoch": 0.05, "grad_norm": 1.915062427520752, "learning_rate": 1.997302466337996e-05, "loss": 0.8123, "step": 2056 }, { "epoch": 0.05, "grad_norm": 3.0220227241516113, "learning_rate": 1.9972963695225706e-05, "loss": 0.7991, "step": 2057 }, { "epoch": 0.05, "grad_norm": 8.064908027648926, "learning_rate": 1.9972902658344048e-05, "loss": 0.8753, "step": 2058 }, { "epoch": 0.05, "grad_norm": 2.5235395431518555, "learning_rate": 1.9972841552735414e-05, "loss": 0.8679, "step": 2059 }, { "epoch": 0.05, "grad_norm": 6.86114501953125, "learning_rate": 1.997278037840022e-05, "loss": 0.8899, "step": 2060 }, { "epoch": 0.05, "grad_norm": 2.090247869491577, "learning_rate": 1.997271913533889e-05, "loss": 0.5183, "step": 2061 }, { "epoch": 0.05, "grad_norm": 2.9835965633392334, "learning_rate": 1.9972657823551844e-05, "loss": 0.9647, "step": 2062 }, { "epoch": 0.05, "grad_norm": 1.7535595893859863, "learning_rate": 1.9972596443039504e-05, "loss": 0.6536, "step": 2063 }, { "epoch": 0.05, "grad_norm": 2.494112730026245, "learning_rate": 1.99725349938023e-05, "loss": 0.6464, "step": 2064 }, { "epoch": 0.05, "grad_norm": 3.525038957595825, "learning_rate": 1.9972473475840645e-05, "loss": 0.7346, "step": 2065 }, { "epoch": 0.05, "grad_norm": 2.519399404525757, "learning_rate": 1.997241188915497e-05, "loss": 0.7381, "step": 2066 }, { "epoch": 0.05, "grad_norm": 2.2609691619873047, "learning_rate": 1.99723502337457e-05, "loss": 0.653, "step": 2067 }, { "epoch": 0.05, "grad_norm": 1.741136074066162, "learning_rate": 1.9972288509613255e-05, "loss": 0.5794, "step": 2068 }, { "epoch": 0.05, "grad_norm": 2.360426664352417, "learning_rate": 1.9972226716758063e-05, "loss": 0.7789, "step": 2069 }, { "epoch": 0.05, "grad_norm": 1.8664145469665527, "learning_rate": 1.997216485518055e-05, "loss": 0.6952, "step": 2070 }, { "epoch": 0.05, "grad_norm": 2.2481300830841064, "learning_rate": 1.997210292488114e-05, "loss": 0.7297, "step": 2071 }, { "epoch": 0.05, "grad_norm": 2.9577088356018066, "learning_rate": 1.9972040925860267e-05, "loss": 0.7432, "step": 2072 }, { "epoch": 0.05, "grad_norm": 1.7317324876785278, "learning_rate": 1.9971978858118346e-05, "loss": 0.7705, "step": 2073 }, { "epoch": 0.05, "grad_norm": 1.6022226810455322, "learning_rate": 1.9971916721655816e-05, "loss": 0.581, "step": 2074 }, { "epoch": 0.05, "grad_norm": 3.2721540927886963, "learning_rate": 1.99718545164731e-05, "loss": 0.6765, "step": 2075 }, { "epoch": 0.05, "grad_norm": 2.524531364440918, "learning_rate": 1.9971792242570628e-05, "loss": 0.7607, "step": 2076 }, { "epoch": 0.05, "grad_norm": 3.621087074279785, "learning_rate": 1.9971729899948827e-05, "loss": 0.6267, "step": 2077 }, { "epoch": 0.05, "grad_norm": 4.983328342437744, "learning_rate": 1.997166748860813e-05, "loss": 0.6896, "step": 2078 }, { "epoch": 0.05, "grad_norm": 3.13061785697937, "learning_rate": 1.9971605008548965e-05, "loss": 0.8687, "step": 2079 }, { "epoch": 0.05, "grad_norm": 2.9115986824035645, "learning_rate": 1.9971542459771766e-05, "loss": 0.8043, "step": 2080 }, { "epoch": 0.05, "grad_norm": 2.5462324619293213, "learning_rate": 1.9971479842276955e-05, "loss": 0.8029, "step": 2081 }, { "epoch": 0.05, "grad_norm": 2.295285224914551, "learning_rate": 1.9971417156064973e-05, "loss": 0.6217, "step": 2082 }, { "epoch": 0.05, "grad_norm": 2.447617530822754, "learning_rate": 1.9971354401136248e-05, "loss": 0.7624, "step": 2083 }, { "epoch": 0.05, "grad_norm": 2.379493474960327, "learning_rate": 1.9971291577491216e-05, "loss": 0.6513, "step": 2084 }, { "epoch": 0.05, "grad_norm": 2.6920387744903564, "learning_rate": 1.9971228685130307e-05, "loss": 0.6388, "step": 2085 }, { "epoch": 0.05, "grad_norm": 3.4660239219665527, "learning_rate": 1.997116572405395e-05, "loss": 0.7365, "step": 2086 }, { "epoch": 0.05, "grad_norm": 3.0295310020446777, "learning_rate": 1.9971102694262584e-05, "loss": 0.7621, "step": 2087 }, { "epoch": 0.05, "grad_norm": 3.60492205619812, "learning_rate": 1.9971039595756645e-05, "loss": 0.6679, "step": 2088 }, { "epoch": 0.05, "grad_norm": 6.233969211578369, "learning_rate": 1.9970976428536564e-05, "loss": 0.6062, "step": 2089 }, { "epoch": 0.05, "grad_norm": 2.802532911300659, "learning_rate": 1.997091319260278e-05, "loss": 0.7391, "step": 2090 }, { "epoch": 0.05, "grad_norm": 5.572067737579346, "learning_rate": 1.9970849887955724e-05, "loss": 0.9459, "step": 2091 }, { "epoch": 0.05, "grad_norm": 3.4158713817596436, "learning_rate": 1.9970786514595836e-05, "loss": 0.7596, "step": 2092 }, { "epoch": 0.05, "grad_norm": 2.9954910278320312, "learning_rate": 1.9970723072523552e-05, "loss": 0.653, "step": 2093 }, { "epoch": 0.05, "grad_norm": 4.015544414520264, "learning_rate": 1.997065956173931e-05, "loss": 0.7039, "step": 2094 }, { "epoch": 0.05, "grad_norm": 5.098118305206299, "learning_rate": 1.9970595982243544e-05, "loss": 0.6557, "step": 2095 }, { "epoch": 0.05, "grad_norm": 4.729681491851807, "learning_rate": 1.9970532334036698e-05, "loss": 0.8149, "step": 2096 }, { "epoch": 0.05, "grad_norm": 3.0555551052093506, "learning_rate": 1.9970468617119203e-05, "loss": 0.8179, "step": 2097 }, { "epoch": 0.05, "grad_norm": 1.9157624244689941, "learning_rate": 1.9970404831491504e-05, "loss": 0.7115, "step": 2098 }, { "epoch": 0.05, "grad_norm": 2.9537603855133057, "learning_rate": 1.997034097715404e-05, "loss": 0.8236, "step": 2099 }, { "epoch": 0.05, "grad_norm": 2.1878175735473633, "learning_rate": 1.9970277054107246e-05, "loss": 0.7593, "step": 2100 }, { "epoch": 0.05, "grad_norm": 2.0355372428894043, "learning_rate": 1.9970213062351572e-05, "loss": 0.628, "step": 2101 }, { "epoch": 0.05, "grad_norm": 2.780663251876831, "learning_rate": 1.997014900188745e-05, "loss": 0.6152, "step": 2102 }, { "epoch": 0.05, "grad_norm": 3.1128153800964355, "learning_rate": 1.997008487271533e-05, "loss": 0.7073, "step": 2103 }, { "epoch": 0.05, "grad_norm": 2.8482892513275146, "learning_rate": 1.9970020674835643e-05, "loss": 0.6669, "step": 2104 }, { "epoch": 0.05, "grad_norm": 1.6008884906768799, "learning_rate": 1.9969956408248837e-05, "loss": 0.6325, "step": 2105 }, { "epoch": 0.05, "grad_norm": 4.416617393493652, "learning_rate": 1.9969892072955356e-05, "loss": 0.8895, "step": 2106 }, { "epoch": 0.05, "grad_norm": 9.166812896728516, "learning_rate": 1.9969827668955644e-05, "loss": 0.9177, "step": 2107 }, { "epoch": 0.05, "grad_norm": 2.7861833572387695, "learning_rate": 1.9969763196250146e-05, "loss": 0.701, "step": 2108 }, { "epoch": 0.05, "grad_norm": 3.860072374343872, "learning_rate": 1.99696986548393e-05, "loss": 0.7379, "step": 2109 }, { "epoch": 0.05, "grad_norm": 12.947519302368164, "learning_rate": 1.9969634044723555e-05, "loss": 0.7652, "step": 2110 }, { "epoch": 0.05, "grad_norm": 2.860522508621216, "learning_rate": 1.9969569365903357e-05, "loss": 0.7144, "step": 2111 }, { "epoch": 0.05, "grad_norm": 2.7761013507843018, "learning_rate": 1.9969504618379148e-05, "loss": 0.7685, "step": 2112 }, { "epoch": 0.05, "grad_norm": 4.934789657592773, "learning_rate": 1.996943980215138e-05, "loss": 0.7283, "step": 2113 }, { "epoch": 0.05, "grad_norm": 2.1555068492889404, "learning_rate": 1.9969374917220494e-05, "loss": 0.8114, "step": 2114 }, { "epoch": 0.05, "grad_norm": 5.42287540435791, "learning_rate": 1.9969309963586937e-05, "loss": 0.9177, "step": 2115 }, { "epoch": 0.05, "grad_norm": 4.375769138336182, "learning_rate": 1.9969244941251165e-05, "loss": 0.737, "step": 2116 }, { "epoch": 0.05, "grad_norm": 3.7762203216552734, "learning_rate": 1.9969179850213618e-05, "loss": 0.9774, "step": 2117 }, { "epoch": 0.05, "grad_norm": 2.012444257736206, "learning_rate": 1.9969114690474743e-05, "loss": 0.7234, "step": 2118 }, { "epoch": 0.05, "grad_norm": 3.471444606781006, "learning_rate": 1.9969049462034998e-05, "loss": 0.7353, "step": 2119 }, { "epoch": 0.05, "grad_norm": 3.2935619354248047, "learning_rate": 1.9968984164894825e-05, "loss": 0.8754, "step": 2120 }, { "epoch": 0.05, "grad_norm": 3.0101828575134277, "learning_rate": 1.9968918799054674e-05, "loss": 0.6711, "step": 2121 }, { "epoch": 0.05, "grad_norm": 2.8929266929626465, "learning_rate": 1.9968853364515e-05, "loss": 0.672, "step": 2122 }, { "epoch": 0.05, "grad_norm": 3.6209397315979004, "learning_rate": 1.9968787861276252e-05, "loss": 0.7042, "step": 2123 }, { "epoch": 0.05, "grad_norm": 1.9653172492980957, "learning_rate": 1.9968722289338883e-05, "loss": 0.6212, "step": 2124 }, { "epoch": 0.05, "grad_norm": 1.9451581239700317, "learning_rate": 1.9968656648703337e-05, "loss": 0.7354, "step": 2125 }, { "epoch": 0.05, "grad_norm": 4.669367790222168, "learning_rate": 1.996859093937008e-05, "loss": 0.7223, "step": 2126 }, { "epoch": 0.05, "grad_norm": 2.9155185222625732, "learning_rate": 1.9968525161339548e-05, "loss": 0.8392, "step": 2127 }, { "epoch": 0.05, "grad_norm": 1.5655584335327148, "learning_rate": 1.9968459314612212e-05, "loss": 0.6105, "step": 2128 }, { "epoch": 0.05, "grad_norm": 2.3601458072662354, "learning_rate": 1.996839339918851e-05, "loss": 0.7053, "step": 2129 }, { "epoch": 0.05, "grad_norm": 3.365131139755249, "learning_rate": 1.996832741506891e-05, "loss": 0.6668, "step": 2130 }, { "epoch": 0.05, "grad_norm": 3.916684627532959, "learning_rate": 1.9968261362253857e-05, "loss": 0.9238, "step": 2131 }, { "epoch": 0.05, "grad_norm": 2.257948398590088, "learning_rate": 1.996819524074381e-05, "loss": 0.7608, "step": 2132 }, { "epoch": 0.05, "grad_norm": 2.0214900970458984, "learning_rate": 1.9968129050539222e-05, "loss": 0.6733, "step": 2133 }, { "epoch": 0.05, "grad_norm": 2.23523211479187, "learning_rate": 1.9968062791640556e-05, "loss": 0.6719, "step": 2134 }, { "epoch": 0.05, "grad_norm": 2.9583818912506104, "learning_rate": 1.9967996464048255e-05, "loss": 0.718, "step": 2135 }, { "epoch": 0.05, "grad_norm": 2.222913980484009, "learning_rate": 1.9967930067762793e-05, "loss": 0.5924, "step": 2136 }, { "epoch": 0.05, "grad_norm": 2.027592420578003, "learning_rate": 1.9967863602784615e-05, "loss": 0.7186, "step": 2137 }, { "epoch": 0.05, "grad_norm": 2.519784927368164, "learning_rate": 1.996779706911419e-05, "loss": 0.722, "step": 2138 }, { "epoch": 0.05, "grad_norm": 2.767310619354248, "learning_rate": 1.9967730466751963e-05, "loss": 0.8676, "step": 2139 }, { "epoch": 0.05, "grad_norm": 3.0953550338745117, "learning_rate": 1.99676637956984e-05, "loss": 0.8054, "step": 2140 }, { "epoch": 0.05, "grad_norm": 3.2614002227783203, "learning_rate": 1.9967597055953963e-05, "loss": 0.7193, "step": 2141 }, { "epoch": 0.05, "grad_norm": 2.998023509979248, "learning_rate": 1.9967530247519107e-05, "loss": 0.7566, "step": 2142 }, { "epoch": 0.05, "grad_norm": 4.4813947677612305, "learning_rate": 1.99674633703943e-05, "loss": 0.6976, "step": 2143 }, { "epoch": 0.05, "grad_norm": 3.4918739795684814, "learning_rate": 1.9967396424579995e-05, "loss": 0.7214, "step": 2144 }, { "epoch": 0.05, "grad_norm": 5.447615623474121, "learning_rate": 1.9967329410076653e-05, "loss": 0.8666, "step": 2145 }, { "epoch": 0.06, "grad_norm": 2.846780776977539, "learning_rate": 1.9967262326884743e-05, "loss": 0.7752, "step": 2146 }, { "epoch": 0.06, "grad_norm": 2.342198371887207, "learning_rate": 1.9967195175004722e-05, "loss": 0.6652, "step": 2147 }, { "epoch": 0.06, "grad_norm": 3.1996679306030273, "learning_rate": 1.9967127954437055e-05, "loss": 0.7936, "step": 2148 }, { "epoch": 0.06, "grad_norm": 2.784518241882324, "learning_rate": 1.99670606651822e-05, "loss": 0.6417, "step": 2149 }, { "epoch": 0.06, "grad_norm": 3.807720184326172, "learning_rate": 1.9966993307240627e-05, "loss": 0.9456, "step": 2150 }, { "epoch": 0.06, "grad_norm": 6.036256790161133, "learning_rate": 1.9966925880612798e-05, "loss": 0.7041, "step": 2151 }, { "epoch": 0.06, "grad_norm": 2.384542942047119, "learning_rate": 1.996685838529918e-05, "loss": 0.8118, "step": 2152 }, { "epoch": 0.06, "grad_norm": 1.5192525386810303, "learning_rate": 1.996679082130023e-05, "loss": 0.623, "step": 2153 }, { "epoch": 0.06, "grad_norm": 4.465844631195068, "learning_rate": 1.9966723188616425e-05, "loss": 0.817, "step": 2154 }, { "epoch": 0.06, "grad_norm": 5.242680072784424, "learning_rate": 1.9966655487248222e-05, "loss": 0.8205, "step": 2155 }, { "epoch": 0.06, "grad_norm": 2.9474642276763916, "learning_rate": 1.9966587717196092e-05, "loss": 0.843, "step": 2156 }, { "epoch": 0.06, "grad_norm": 5.134483337402344, "learning_rate": 1.9966519878460504e-05, "loss": 0.6615, "step": 2157 }, { "epoch": 0.06, "grad_norm": 2.625196933746338, "learning_rate": 1.9966451971041917e-05, "loss": 0.7059, "step": 2158 }, { "epoch": 0.06, "grad_norm": 3.147404193878174, "learning_rate": 1.996638399494081e-05, "loss": 0.7513, "step": 2159 }, { "epoch": 0.06, "grad_norm": 1.563179612159729, "learning_rate": 1.9966315950157643e-05, "loss": 0.6848, "step": 2160 }, { "epoch": 0.06, "grad_norm": 2.6598117351531982, "learning_rate": 1.9966247836692888e-05, "loss": 0.7351, "step": 2161 }, { "epoch": 0.06, "grad_norm": 1.7122092247009277, "learning_rate": 1.996617965454702e-05, "loss": 0.6676, "step": 2162 }, { "epoch": 0.06, "grad_norm": 2.221074104309082, "learning_rate": 1.9966111403720492e-05, "loss": 0.7233, "step": 2163 }, { "epoch": 0.06, "grad_norm": 1.7111846208572388, "learning_rate": 1.9966043084213794e-05, "loss": 0.7315, "step": 2164 }, { "epoch": 0.06, "grad_norm": 2.6931893825531006, "learning_rate": 1.9965974696027387e-05, "loss": 0.8075, "step": 2165 }, { "epoch": 0.06, "grad_norm": 2.435307502746582, "learning_rate": 1.9965906239161743e-05, "loss": 0.8703, "step": 2166 }, { "epoch": 0.06, "grad_norm": 2.280108690261841, "learning_rate": 1.9965837713617332e-05, "loss": 0.7313, "step": 2167 }, { "epoch": 0.06, "grad_norm": 5.879705429077148, "learning_rate": 1.9965769119394633e-05, "loss": 0.8705, "step": 2168 }, { "epoch": 0.06, "grad_norm": 1.9088587760925293, "learning_rate": 1.996570045649411e-05, "loss": 0.7196, "step": 2169 }, { "epoch": 0.06, "grad_norm": 3.511796474456787, "learning_rate": 1.9965631724916243e-05, "loss": 0.6552, "step": 2170 }, { "epoch": 0.06, "grad_norm": 2.539371967315674, "learning_rate": 1.9965562924661504e-05, "loss": 0.8693, "step": 2171 }, { "epoch": 0.06, "grad_norm": 5.177274227142334, "learning_rate": 1.9965494055730364e-05, "loss": 0.9242, "step": 2172 }, { "epoch": 0.06, "grad_norm": 1.735933542251587, "learning_rate": 1.9965425118123304e-05, "loss": 0.6737, "step": 2173 }, { "epoch": 0.06, "grad_norm": 2.9896392822265625, "learning_rate": 1.9965356111840787e-05, "loss": 0.7907, "step": 2174 }, { "epoch": 0.06, "grad_norm": 3.0356082916259766, "learning_rate": 1.9965287036883302e-05, "loss": 0.7342, "step": 2175 }, { "epoch": 0.06, "grad_norm": 3.4082813262939453, "learning_rate": 1.996521789325132e-05, "loss": 0.9916, "step": 2176 }, { "epoch": 0.06, "grad_norm": 3.2020394802093506, "learning_rate": 1.9965148680945315e-05, "loss": 0.8455, "step": 2177 }, { "epoch": 0.06, "grad_norm": 2.292915105819702, "learning_rate": 1.996507939996577e-05, "loss": 0.6928, "step": 2178 }, { "epoch": 0.06, "grad_norm": 2.5260744094848633, "learning_rate": 1.9965010050313154e-05, "loss": 0.7961, "step": 2179 }, { "epoch": 0.06, "grad_norm": 2.8242270946502686, "learning_rate": 1.996494063198795e-05, "loss": 0.6951, "step": 2180 }, { "epoch": 0.06, "grad_norm": 2.391810417175293, "learning_rate": 1.9964871144990638e-05, "loss": 0.7431, "step": 2181 }, { "epoch": 0.06, "grad_norm": 5.3040032386779785, "learning_rate": 1.9964801589321694e-05, "loss": 0.7295, "step": 2182 }, { "epoch": 0.06, "grad_norm": 3.4682424068450928, "learning_rate": 1.9964731964981598e-05, "loss": 0.851, "step": 2183 }, { "epoch": 0.06, "grad_norm": 3.065107583999634, "learning_rate": 1.9964662271970826e-05, "loss": 0.4929, "step": 2184 }, { "epoch": 0.06, "grad_norm": 3.0611090660095215, "learning_rate": 1.9964592510289866e-05, "loss": 0.5834, "step": 2185 }, { "epoch": 0.06, "grad_norm": 1.7483341693878174, "learning_rate": 1.9964522679939193e-05, "loss": 0.7289, "step": 2186 }, { "epoch": 0.06, "grad_norm": 2.1410582065582275, "learning_rate": 1.9964452780919292e-05, "loss": 0.6987, "step": 2187 }, { "epoch": 0.06, "grad_norm": 2.806553840637207, "learning_rate": 1.996438281323064e-05, "loss": 0.7695, "step": 2188 }, { "epoch": 0.06, "grad_norm": 2.079364776611328, "learning_rate": 1.9964312776873724e-05, "loss": 0.6939, "step": 2189 }, { "epoch": 0.06, "grad_norm": 2.9931628704071045, "learning_rate": 1.9964242671849027e-05, "loss": 0.6479, "step": 2190 }, { "epoch": 0.06, "grad_norm": 1.7766848802566528, "learning_rate": 1.9964172498157025e-05, "loss": 0.7565, "step": 2191 }, { "epoch": 0.06, "grad_norm": 3.55253267288208, "learning_rate": 1.9964102255798207e-05, "loss": 0.6561, "step": 2192 }, { "epoch": 0.06, "grad_norm": 6.66431999206543, "learning_rate": 1.9964031944773057e-05, "loss": 0.7423, "step": 2193 }, { "epoch": 0.06, "grad_norm": 1.9905874729156494, "learning_rate": 1.996396156508206e-05, "loss": 0.7567, "step": 2194 }, { "epoch": 0.06, "grad_norm": 2.1321983337402344, "learning_rate": 1.9963891116725696e-05, "loss": 0.7086, "step": 2195 }, { "epoch": 0.06, "grad_norm": 4.11445426940918, "learning_rate": 1.9963820599704458e-05, "loss": 0.9322, "step": 2196 }, { "epoch": 0.06, "grad_norm": 1.785121202468872, "learning_rate": 1.996375001401883e-05, "loss": 0.7598, "step": 2197 }, { "epoch": 0.06, "grad_norm": 1.8415274620056152, "learning_rate": 1.996367935966929e-05, "loss": 0.7094, "step": 2198 }, { "epoch": 0.06, "grad_norm": 2.199052333831787, "learning_rate": 1.996360863665634e-05, "loss": 0.7891, "step": 2199 }, { "epoch": 0.06, "grad_norm": 3.5308499336242676, "learning_rate": 1.9963537844980455e-05, "loss": 0.8453, "step": 2200 }, { "epoch": 0.06, "grad_norm": 1.5148507356643677, "learning_rate": 1.9963466984642123e-05, "loss": 0.6247, "step": 2201 }, { "epoch": 0.06, "grad_norm": 2.914342164993286, "learning_rate": 1.996339605564184e-05, "loss": 0.7187, "step": 2202 }, { "epoch": 0.06, "grad_norm": 2.8655526638031006, "learning_rate": 1.996332505798009e-05, "loss": 0.7926, "step": 2203 }, { "epoch": 0.06, "grad_norm": 3.8763816356658936, "learning_rate": 1.996325399165737e-05, "loss": 0.7925, "step": 2204 }, { "epoch": 0.06, "grad_norm": 2.909029483795166, "learning_rate": 1.9963182856674154e-05, "loss": 0.7769, "step": 2205 }, { "epoch": 0.06, "grad_norm": 3.639935255050659, "learning_rate": 1.9963111653030946e-05, "loss": 0.6353, "step": 2206 }, { "epoch": 0.06, "grad_norm": 3.3721065521240234, "learning_rate": 1.9963040380728233e-05, "loss": 0.6728, "step": 2207 }, { "epoch": 0.06, "grad_norm": 2.623098850250244, "learning_rate": 1.99629690397665e-05, "loss": 0.8113, "step": 2208 }, { "epoch": 0.06, "grad_norm": 2.445988655090332, "learning_rate": 1.9962897630146244e-05, "loss": 0.7414, "step": 2209 }, { "epoch": 0.06, "grad_norm": 3.018946647644043, "learning_rate": 1.996282615186796e-05, "loss": 0.8015, "step": 2210 }, { "epoch": 0.06, "grad_norm": 3.9201345443725586, "learning_rate": 1.9962754604932136e-05, "loss": 0.6845, "step": 2211 }, { "epoch": 0.06, "grad_norm": 4.613283634185791, "learning_rate": 1.996268298933927e-05, "loss": 0.7557, "step": 2212 }, { "epoch": 0.06, "grad_norm": 3.1008384227752686, "learning_rate": 1.9962611305089847e-05, "loss": 0.6678, "step": 2213 }, { "epoch": 0.06, "grad_norm": 2.9284510612487793, "learning_rate": 1.9962539552184367e-05, "loss": 0.6667, "step": 2214 }, { "epoch": 0.06, "grad_norm": 3.608513116836548, "learning_rate": 1.9962467730623326e-05, "loss": 0.6812, "step": 2215 }, { "epoch": 0.06, "grad_norm": 2.398261547088623, "learning_rate": 1.9962395840407215e-05, "loss": 0.6761, "step": 2216 }, { "epoch": 0.06, "grad_norm": 2.273740530014038, "learning_rate": 1.996232388153653e-05, "loss": 0.5981, "step": 2217 }, { "epoch": 0.06, "grad_norm": 3.3450326919555664, "learning_rate": 1.996225185401177e-05, "loss": 0.7074, "step": 2218 }, { "epoch": 0.06, "grad_norm": 3.7721664905548096, "learning_rate": 1.9962179757833428e-05, "loss": 0.6856, "step": 2219 }, { "epoch": 0.06, "grad_norm": 1.5235986709594727, "learning_rate": 1.9962107593002002e-05, "loss": 0.6149, "step": 2220 }, { "epoch": 0.06, "grad_norm": 2.5627121925354004, "learning_rate": 1.996203535951799e-05, "loss": 0.8511, "step": 2221 }, { "epoch": 0.06, "grad_norm": 2.8425662517547607, "learning_rate": 1.9961963057381886e-05, "loss": 0.9202, "step": 2222 }, { "epoch": 0.06, "grad_norm": 3.904031276702881, "learning_rate": 1.9961890686594192e-05, "loss": 0.7422, "step": 2223 }, { "epoch": 0.06, "grad_norm": 2.0565617084503174, "learning_rate": 1.996181824715541e-05, "loss": 0.7565, "step": 2224 }, { "epoch": 0.06, "grad_norm": 8.10700511932373, "learning_rate": 1.996174573906603e-05, "loss": 0.9526, "step": 2225 }, { "epoch": 0.06, "grad_norm": 3.4045684337615967, "learning_rate": 1.996167316232656e-05, "loss": 0.563, "step": 2226 }, { "epoch": 0.06, "grad_norm": 2.1259889602661133, "learning_rate": 1.9961600516937498e-05, "loss": 0.6569, "step": 2227 }, { "epoch": 0.06, "grad_norm": 7.134080410003662, "learning_rate": 1.996152780289934e-05, "loss": 0.715, "step": 2228 }, { "epoch": 0.06, "grad_norm": 3.3680520057678223, "learning_rate": 1.9961455020212597e-05, "loss": 0.9373, "step": 2229 }, { "epoch": 0.06, "grad_norm": 7.916531085968018, "learning_rate": 1.9961382168877762e-05, "loss": 0.6688, "step": 2230 }, { "epoch": 0.06, "grad_norm": 3.744206666946411, "learning_rate": 1.9961309248895335e-05, "loss": 0.7403, "step": 2231 }, { "epoch": 0.06, "grad_norm": 4.014562606811523, "learning_rate": 1.9961236260265825e-05, "loss": 0.907, "step": 2232 }, { "epoch": 0.06, "grad_norm": 3.2238125801086426, "learning_rate": 1.9961163202989735e-05, "loss": 0.6993, "step": 2233 }, { "epoch": 0.06, "grad_norm": 2.666090250015259, "learning_rate": 1.9961090077067567e-05, "loss": 0.7463, "step": 2234 }, { "epoch": 0.06, "grad_norm": 6.736996173858643, "learning_rate": 1.996101688249982e-05, "loss": 0.634, "step": 2235 }, { "epoch": 0.06, "grad_norm": 3.3112266063690186, "learning_rate": 1.996094361928701e-05, "loss": 0.5804, "step": 2236 }, { "epoch": 0.06, "grad_norm": 2.4719536304473877, "learning_rate": 1.9960870287429628e-05, "loss": 0.761, "step": 2237 }, { "epoch": 0.06, "grad_norm": 7.248157024383545, "learning_rate": 1.996079688692819e-05, "loss": 0.906, "step": 2238 }, { "epoch": 0.06, "grad_norm": 3.163032054901123, "learning_rate": 1.9960723417783195e-05, "loss": 0.7422, "step": 2239 }, { "epoch": 0.06, "grad_norm": 5.278548717498779, "learning_rate": 1.9960649879995154e-05, "loss": 0.8616, "step": 2240 }, { "epoch": 0.06, "grad_norm": 2.2277650833129883, "learning_rate": 1.996057627356457e-05, "loss": 0.6964, "step": 2241 }, { "epoch": 0.06, "grad_norm": 1.7754124402999878, "learning_rate": 1.9960502598491954e-05, "loss": 0.7734, "step": 2242 }, { "epoch": 0.06, "grad_norm": 3.0894527435302734, "learning_rate": 1.9960428854777813e-05, "loss": 0.8964, "step": 2243 }, { "epoch": 0.06, "grad_norm": 2.7891077995300293, "learning_rate": 1.996035504242265e-05, "loss": 0.7588, "step": 2244 }, { "epoch": 0.06, "grad_norm": 1.7048135995864868, "learning_rate": 1.996028116142698e-05, "loss": 0.5743, "step": 2245 }, { "epoch": 0.06, "grad_norm": 1.6575360298156738, "learning_rate": 1.996020721179131e-05, "loss": 0.7299, "step": 2246 }, { "epoch": 0.06, "grad_norm": 2.634276866912842, "learning_rate": 1.9960133193516148e-05, "loss": 0.5986, "step": 2247 }, { "epoch": 0.06, "grad_norm": 2.5449256896972656, "learning_rate": 1.9960059106602007e-05, "loss": 0.7998, "step": 2248 }, { "epoch": 0.06, "grad_norm": 2.141967535018921, "learning_rate": 1.9959984951049394e-05, "loss": 0.5858, "step": 2249 }, { "epoch": 0.06, "grad_norm": 3.2979092597961426, "learning_rate": 1.995991072685883e-05, "loss": 0.9028, "step": 2250 }, { "epoch": 0.06, "grad_norm": 1.9036024808883667, "learning_rate": 1.995983643403081e-05, "loss": 0.7841, "step": 2251 }, { "epoch": 0.06, "grad_norm": 5.14810848236084, "learning_rate": 1.995976207256586e-05, "loss": 1.0378, "step": 2252 }, { "epoch": 0.06, "grad_norm": 2.5368664264678955, "learning_rate": 1.9959687642464484e-05, "loss": 0.7199, "step": 2253 }, { "epoch": 0.06, "grad_norm": 3.06445050239563, "learning_rate": 1.99596131437272e-05, "loss": 0.7509, "step": 2254 }, { "epoch": 0.06, "grad_norm": 4.335337162017822, "learning_rate": 1.9959538576354517e-05, "loss": 0.7618, "step": 2255 }, { "epoch": 0.06, "grad_norm": 2.0808050632476807, "learning_rate": 1.995946394034695e-05, "loss": 0.6604, "step": 2256 }, { "epoch": 0.06, "grad_norm": 2.7774999141693115, "learning_rate": 1.995938923570502e-05, "loss": 0.6339, "step": 2257 }, { "epoch": 0.06, "grad_norm": 6.001291275024414, "learning_rate": 1.9959314462429234e-05, "loss": 0.8885, "step": 2258 }, { "epoch": 0.06, "grad_norm": 2.3756327629089355, "learning_rate": 1.995923962052011e-05, "loss": 0.6134, "step": 2259 }, { "epoch": 0.06, "grad_norm": 2.353649854660034, "learning_rate": 1.9959164709978163e-05, "loss": 0.6352, "step": 2260 }, { "epoch": 0.06, "grad_norm": 2.320728302001953, "learning_rate": 1.995908973080391e-05, "loss": 0.8011, "step": 2261 }, { "epoch": 0.06, "grad_norm": 3.199187994003296, "learning_rate": 1.995901468299787e-05, "loss": 0.7543, "step": 2262 }, { "epoch": 0.06, "grad_norm": 2.324686050415039, "learning_rate": 1.9958939566560555e-05, "loss": 0.6391, "step": 2263 }, { "epoch": 0.06, "grad_norm": 1.8424845933914185, "learning_rate": 1.9958864381492483e-05, "loss": 0.6151, "step": 2264 }, { "epoch": 0.06, "grad_norm": 2.4927375316619873, "learning_rate": 1.9958789127794178e-05, "loss": 0.7831, "step": 2265 }, { "epoch": 0.06, "grad_norm": 2.841484785079956, "learning_rate": 1.9958713805466155e-05, "loss": 0.6529, "step": 2266 }, { "epoch": 0.06, "grad_norm": 4.993950843811035, "learning_rate": 1.9958638414508932e-05, "loss": 1.1777, "step": 2267 }, { "epoch": 0.06, "grad_norm": 3.022834300994873, "learning_rate": 1.995856295492303e-05, "loss": 0.5663, "step": 2268 }, { "epoch": 0.06, "grad_norm": 2.3713653087615967, "learning_rate": 1.9958487426708964e-05, "loss": 0.7989, "step": 2269 }, { "epoch": 0.06, "grad_norm": 2.7191262245178223, "learning_rate": 1.9958411829867268e-05, "loss": 0.7693, "step": 2270 }, { "epoch": 0.06, "grad_norm": 3.1402831077575684, "learning_rate": 1.995833616439845e-05, "loss": 0.6604, "step": 2271 }, { "epoch": 0.06, "grad_norm": 1.240810751914978, "learning_rate": 1.9958260430303036e-05, "loss": 0.6108, "step": 2272 }, { "epoch": 0.06, "grad_norm": 3.6473922729492188, "learning_rate": 1.9958184627581545e-05, "loss": 0.6707, "step": 2273 }, { "epoch": 0.06, "grad_norm": 3.6715288162231445, "learning_rate": 1.9958108756234505e-05, "loss": 0.8325, "step": 2274 }, { "epoch": 0.06, "grad_norm": 2.0899484157562256, "learning_rate": 1.9958032816262434e-05, "loss": 0.6478, "step": 2275 }, { "epoch": 0.06, "grad_norm": 1.8193011283874512, "learning_rate": 1.995795680766586e-05, "loss": 0.6725, "step": 2276 }, { "epoch": 0.06, "grad_norm": 1.9852404594421387, "learning_rate": 1.99578807304453e-05, "loss": 0.6459, "step": 2277 }, { "epoch": 0.06, "grad_norm": 3.613426923751831, "learning_rate": 1.995780458460128e-05, "loss": 0.7565, "step": 2278 }, { "epoch": 0.06, "grad_norm": 1.97367262840271, "learning_rate": 1.9957728370134337e-05, "loss": 0.6913, "step": 2279 }, { "epoch": 0.06, "grad_norm": 2.8101773262023926, "learning_rate": 1.9957652087044977e-05, "loss": 0.7305, "step": 2280 }, { "epoch": 0.06, "grad_norm": 2.5015430450439453, "learning_rate": 1.9957575735333735e-05, "loss": 0.6927, "step": 2281 }, { "epoch": 0.06, "grad_norm": 2.0317554473876953, "learning_rate": 1.9957499315001142e-05, "loss": 0.6983, "step": 2282 }, { "epoch": 0.06, "grad_norm": 1.4176815748214722, "learning_rate": 1.9957422826047716e-05, "loss": 0.5613, "step": 2283 }, { "epoch": 0.06, "grad_norm": 2.3035383224487305, "learning_rate": 1.995734626847399e-05, "loss": 0.6474, "step": 2284 }, { "epoch": 0.06, "grad_norm": 1.7113434076309204, "learning_rate": 1.9957269642280487e-05, "loss": 0.6011, "step": 2285 }, { "epoch": 0.06, "grad_norm": 2.6437807083129883, "learning_rate": 1.9957192947467742e-05, "loss": 0.6774, "step": 2286 }, { "epoch": 0.06, "grad_norm": 2.875784158706665, "learning_rate": 1.9957116184036274e-05, "loss": 0.6294, "step": 2287 }, { "epoch": 0.06, "grad_norm": 2.1974191665649414, "learning_rate": 1.9957039351986617e-05, "loss": 0.7393, "step": 2288 }, { "epoch": 0.06, "grad_norm": 2.627906322479248, "learning_rate": 1.9956962451319302e-05, "loss": 0.815, "step": 2289 }, { "epoch": 0.06, "grad_norm": 2.9566173553466797, "learning_rate": 1.9956885482034857e-05, "loss": 0.6682, "step": 2290 }, { "epoch": 0.06, "grad_norm": 2.223282814025879, "learning_rate": 1.9956808444133815e-05, "loss": 0.7327, "step": 2291 }, { "epoch": 0.06, "grad_norm": 2.604702949523926, "learning_rate": 1.9956731337616704e-05, "loss": 0.6985, "step": 2292 }, { "epoch": 0.06, "grad_norm": 4.538128852844238, "learning_rate": 1.9956654162484056e-05, "loss": 0.8526, "step": 2293 }, { "epoch": 0.06, "grad_norm": 2.331190347671509, "learning_rate": 1.99565769187364e-05, "loss": 0.6818, "step": 2294 }, { "epoch": 0.06, "grad_norm": 3.2483866214752197, "learning_rate": 1.9956499606374273e-05, "loss": 0.884, "step": 2295 }, { "epoch": 0.06, "grad_norm": 2.914353847503662, "learning_rate": 1.9956422225398206e-05, "loss": 0.6967, "step": 2296 }, { "epoch": 0.06, "grad_norm": 2.194352865219116, "learning_rate": 1.9956344775808733e-05, "loss": 0.6019, "step": 2297 }, { "epoch": 0.06, "grad_norm": 2.8887858390808105, "learning_rate": 1.9956267257606386e-05, "loss": 0.7987, "step": 2298 }, { "epoch": 0.06, "grad_norm": 3.4244465827941895, "learning_rate": 1.99561896707917e-05, "loss": 0.8339, "step": 2299 }, { "epoch": 0.06, "grad_norm": 2.2355797290802, "learning_rate": 1.9956112015365208e-05, "loss": 0.7716, "step": 2300 }, { "epoch": 0.06, "grad_norm": 3.0995426177978516, "learning_rate": 1.995603429132745e-05, "loss": 0.7921, "step": 2301 }, { "epoch": 0.06, "grad_norm": 2.936863899230957, "learning_rate": 1.9955956498678958e-05, "loss": 0.8805, "step": 2302 }, { "epoch": 0.06, "grad_norm": 2.1877498626708984, "learning_rate": 1.9955878637420268e-05, "loss": 0.7195, "step": 2303 }, { "epoch": 0.06, "grad_norm": 3.407780647277832, "learning_rate": 1.995580070755192e-05, "loss": 0.8752, "step": 2304 }, { "epoch": 0.06, "grad_norm": 3.2149851322174072, "learning_rate": 1.9955722709074448e-05, "loss": 0.7197, "step": 2305 }, { "epoch": 0.06, "grad_norm": 2.022023916244507, "learning_rate": 1.9955644641988385e-05, "loss": 0.6869, "step": 2306 }, { "epoch": 0.06, "grad_norm": 2.9007818698883057, "learning_rate": 1.995556650629428e-05, "loss": 0.911, "step": 2307 }, { "epoch": 0.06, "grad_norm": 2.2576825618743896, "learning_rate": 1.9955488301992663e-05, "loss": 0.8127, "step": 2308 }, { "epoch": 0.06, "grad_norm": 3.9807655811309814, "learning_rate": 1.995541002908407e-05, "loss": 0.7409, "step": 2309 }, { "epoch": 0.06, "grad_norm": 7.3030781745910645, "learning_rate": 1.9955331687569054e-05, "loss": 0.943, "step": 2310 }, { "epoch": 0.06, "grad_norm": 3.4336354732513428, "learning_rate": 1.9955253277448142e-05, "loss": 0.7286, "step": 2311 }, { "epoch": 0.06, "grad_norm": 2.300720691680908, "learning_rate": 1.9955174798721878e-05, "loss": 0.6128, "step": 2312 }, { "epoch": 0.06, "grad_norm": 3.0316169261932373, "learning_rate": 1.9955096251390807e-05, "loss": 0.8252, "step": 2313 }, { "epoch": 0.06, "grad_norm": 2.46040940284729, "learning_rate": 1.995501763545547e-05, "loss": 0.4672, "step": 2314 }, { "epoch": 0.06, "grad_norm": 2.43619441986084, "learning_rate": 1.9954938950916397e-05, "loss": 0.6804, "step": 2315 }, { "epoch": 0.06, "grad_norm": 4.287526607513428, "learning_rate": 1.9954860197774148e-05, "loss": 0.6925, "step": 2316 }, { "epoch": 0.06, "grad_norm": 5.427709579467773, "learning_rate": 1.995478137602925e-05, "loss": 0.7012, "step": 2317 }, { "epoch": 0.06, "grad_norm": 1.863039493560791, "learning_rate": 1.9954702485682254e-05, "loss": 0.6497, "step": 2318 }, { "epoch": 0.06, "grad_norm": 2.2654871940612793, "learning_rate": 1.9954623526733706e-05, "loss": 0.6685, "step": 2319 }, { "epoch": 0.06, "grad_norm": 2.68735408782959, "learning_rate": 1.9954544499184143e-05, "loss": 0.7099, "step": 2320 }, { "epoch": 0.06, "grad_norm": 1.8410006761550903, "learning_rate": 1.9954465403034117e-05, "loss": 0.5927, "step": 2321 }, { "epoch": 0.06, "grad_norm": 1.7100282907485962, "learning_rate": 1.9954386238284166e-05, "loss": 0.7717, "step": 2322 }, { "epoch": 0.06, "grad_norm": 2.2519404888153076, "learning_rate": 1.995430700493484e-05, "loss": 0.7251, "step": 2323 }, { "epoch": 0.06, "grad_norm": 1.80549156665802, "learning_rate": 1.9954227702986687e-05, "loss": 0.7876, "step": 2324 }, { "epoch": 0.06, "grad_norm": 2.887202262878418, "learning_rate": 1.995414833244025e-05, "loss": 0.6701, "step": 2325 }, { "epoch": 0.06, "grad_norm": 3.517364263534546, "learning_rate": 1.995406889329607e-05, "loss": 0.8894, "step": 2326 }, { "epoch": 0.06, "grad_norm": 3.4601240158081055, "learning_rate": 1.9953989385554706e-05, "loss": 0.7195, "step": 2327 }, { "epoch": 0.06, "grad_norm": 2.1957640647888184, "learning_rate": 1.9953909809216704e-05, "loss": 0.6003, "step": 2328 }, { "epoch": 0.06, "grad_norm": 2.2030107975006104, "learning_rate": 1.9953830164282603e-05, "loss": 0.7056, "step": 2329 }, { "epoch": 0.06, "grad_norm": 2.648569345474243, "learning_rate": 1.9953750450752962e-05, "loss": 0.6074, "step": 2330 }, { "epoch": 0.06, "grad_norm": 6.455957889556885, "learning_rate": 1.9953670668628323e-05, "loss": 0.776, "step": 2331 }, { "epoch": 0.06, "grad_norm": 4.61039924621582, "learning_rate": 1.995359081790924e-05, "loss": 0.8087, "step": 2332 }, { "epoch": 0.06, "grad_norm": 2.3007051944732666, "learning_rate": 1.9953510898596266e-05, "loss": 0.8155, "step": 2333 }, { "epoch": 0.06, "grad_norm": 2.5672762393951416, "learning_rate": 1.995343091068995e-05, "loss": 0.6866, "step": 2334 }, { "epoch": 0.06, "grad_norm": 2.833310604095459, "learning_rate": 1.9953350854190834e-05, "loss": 0.673, "step": 2335 }, { "epoch": 0.06, "grad_norm": 3.49888277053833, "learning_rate": 1.995327072909948e-05, "loss": 0.9385, "step": 2336 }, { "epoch": 0.06, "grad_norm": 2.8169236183166504, "learning_rate": 1.995319053541644e-05, "loss": 0.6732, "step": 2337 }, { "epoch": 0.06, "grad_norm": 2.3091933727264404, "learning_rate": 1.995311027314226e-05, "loss": 0.6674, "step": 2338 }, { "epoch": 0.06, "grad_norm": 2.39778733253479, "learning_rate": 1.9953029942277502e-05, "loss": 0.8052, "step": 2339 }, { "epoch": 0.06, "grad_norm": 4.2159857749938965, "learning_rate": 1.9952949542822715e-05, "loss": 0.7287, "step": 2340 }, { "epoch": 0.06, "grad_norm": 2.056656837463379, "learning_rate": 1.995286907477845e-05, "loss": 0.6036, "step": 2341 }, { "epoch": 0.06, "grad_norm": 3.5817954540252686, "learning_rate": 1.9952788538145263e-05, "loss": 0.6865, "step": 2342 }, { "epoch": 0.06, "grad_norm": 6.5241265296936035, "learning_rate": 1.9952707932923713e-05, "loss": 0.6088, "step": 2343 }, { "epoch": 0.06, "grad_norm": 3.1529722213745117, "learning_rate": 1.995262725911435e-05, "loss": 0.774, "step": 2344 }, { "epoch": 0.06, "grad_norm": 2.1951658725738525, "learning_rate": 1.9952546516717736e-05, "loss": 0.6709, "step": 2345 }, { "epoch": 0.06, "grad_norm": 1.6389331817626953, "learning_rate": 1.9952465705734425e-05, "loss": 0.6339, "step": 2346 }, { "epoch": 0.06, "grad_norm": 6.184557914733887, "learning_rate": 1.9952384826164968e-05, "loss": 0.7216, "step": 2347 }, { "epoch": 0.06, "grad_norm": 1.5064438581466675, "learning_rate": 1.9952303878009932e-05, "loss": 0.7017, "step": 2348 }, { "epoch": 0.06, "grad_norm": 3.9263784885406494, "learning_rate": 1.995222286126987e-05, "loss": 0.9454, "step": 2349 }, { "epoch": 0.06, "grad_norm": 2.7889158725738525, "learning_rate": 1.9952141775945338e-05, "loss": 0.7872, "step": 2350 }, { "epoch": 0.06, "grad_norm": 1.7605570554733276, "learning_rate": 1.99520606220369e-05, "loss": 0.6592, "step": 2351 }, { "epoch": 0.06, "grad_norm": 2.221808433532715, "learning_rate": 1.995197939954511e-05, "loss": 0.671, "step": 2352 }, { "epoch": 0.06, "grad_norm": 2.9537389278411865, "learning_rate": 1.995189810847053e-05, "loss": 0.7834, "step": 2353 }, { "epoch": 0.06, "grad_norm": 3.0028676986694336, "learning_rate": 1.9951816748813725e-05, "loss": 0.8311, "step": 2354 }, { "epoch": 0.06, "grad_norm": 1.933286190032959, "learning_rate": 1.9951735320575248e-05, "loss": 0.7379, "step": 2355 }, { "epoch": 0.06, "grad_norm": 2.8799548149108887, "learning_rate": 1.9951653823755664e-05, "loss": 0.7647, "step": 2356 }, { "epoch": 0.06, "grad_norm": 2.493959665298462, "learning_rate": 1.9951572258355532e-05, "loss": 0.5922, "step": 2357 }, { "epoch": 0.06, "grad_norm": 2.7651283740997314, "learning_rate": 1.995149062437542e-05, "loss": 0.7844, "step": 2358 }, { "epoch": 0.06, "grad_norm": 1.687620759010315, "learning_rate": 1.9951408921815886e-05, "loss": 0.7508, "step": 2359 }, { "epoch": 0.06, "grad_norm": 1.6557756662368774, "learning_rate": 1.9951327150677492e-05, "loss": 0.5835, "step": 2360 }, { "epoch": 0.06, "grad_norm": 2.4315195083618164, "learning_rate": 1.9951245310960804e-05, "loss": 0.734, "step": 2361 }, { "epoch": 0.06, "grad_norm": 2.535369634628296, "learning_rate": 1.9951163402666387e-05, "loss": 0.7065, "step": 2362 }, { "epoch": 0.06, "grad_norm": 2.5538387298583984, "learning_rate": 1.9951081425794803e-05, "loss": 0.8617, "step": 2363 }, { "epoch": 0.06, "grad_norm": 2.019934892654419, "learning_rate": 1.9950999380346616e-05, "loss": 0.5984, "step": 2364 }, { "epoch": 0.06, "grad_norm": 2.1904184818267822, "learning_rate": 1.9950917266322394e-05, "loss": 0.7416, "step": 2365 }, { "epoch": 0.06, "grad_norm": 1.9821857213974, "learning_rate": 1.9950835083722703e-05, "loss": 0.596, "step": 2366 }, { "epoch": 0.06, "grad_norm": 2.4073777198791504, "learning_rate": 1.995075283254811e-05, "loss": 0.746, "step": 2367 }, { "epoch": 0.06, "grad_norm": 2.0004210472106934, "learning_rate": 1.9950670512799174e-05, "loss": 0.6126, "step": 2368 }, { "epoch": 0.06, "grad_norm": 2.5965487957000732, "learning_rate": 1.9950588124476474e-05, "loss": 0.661, "step": 2369 }, { "epoch": 0.06, "grad_norm": 2.418602705001831, "learning_rate": 1.995050566758057e-05, "loss": 0.6409, "step": 2370 }, { "epoch": 0.06, "grad_norm": 2.538280487060547, "learning_rate": 1.995042314211203e-05, "loss": 0.6469, "step": 2371 }, { "epoch": 0.06, "grad_norm": 5.641152381896973, "learning_rate": 1.995034054807143e-05, "loss": 0.9032, "step": 2372 }, { "epoch": 0.06, "grad_norm": 2.054100513458252, "learning_rate": 1.9950257885459332e-05, "loss": 0.6237, "step": 2373 }, { "epoch": 0.06, "grad_norm": 1.8349865674972534, "learning_rate": 1.9950175154276307e-05, "loss": 0.7323, "step": 2374 }, { "epoch": 0.06, "grad_norm": 4.864741325378418, "learning_rate": 1.9950092354522928e-05, "loss": 0.7176, "step": 2375 }, { "epoch": 0.06, "grad_norm": 2.1387686729431152, "learning_rate": 1.9950009486199766e-05, "loss": 0.6861, "step": 2376 }, { "epoch": 0.06, "grad_norm": 2.115692377090454, "learning_rate": 1.9949926549307386e-05, "loss": 0.6884, "step": 2377 }, { "epoch": 0.06, "grad_norm": 2.6532435417175293, "learning_rate": 1.9949843543846365e-05, "loss": 0.62, "step": 2378 }, { "epoch": 0.06, "grad_norm": 2.2527503967285156, "learning_rate": 1.9949760469817274e-05, "loss": 0.7652, "step": 2379 }, { "epoch": 0.06, "grad_norm": 2.6233561038970947, "learning_rate": 1.9949677327220683e-05, "loss": 0.7755, "step": 2380 }, { "epoch": 0.06, "grad_norm": 3.233673095703125, "learning_rate": 1.9949594116057168e-05, "loss": 0.7341, "step": 2381 }, { "epoch": 0.06, "grad_norm": 3.153005838394165, "learning_rate": 1.99495108363273e-05, "loss": 0.8479, "step": 2382 }, { "epoch": 0.06, "grad_norm": 7.620964527130127, "learning_rate": 1.9949427488031652e-05, "loss": 0.9599, "step": 2383 }, { "epoch": 0.06, "grad_norm": 2.2568769454956055, "learning_rate": 1.9949344071170808e-05, "loss": 0.5787, "step": 2384 }, { "epoch": 0.06, "grad_norm": 2.7235305309295654, "learning_rate": 1.994926058574533e-05, "loss": 0.6882, "step": 2385 }, { "epoch": 0.06, "grad_norm": 2.801189422607422, "learning_rate": 1.99491770317558e-05, "loss": 0.7842, "step": 2386 }, { "epoch": 0.06, "grad_norm": 2.0345585346221924, "learning_rate": 1.9949093409202792e-05, "loss": 0.7658, "step": 2387 }, { "epoch": 0.06, "grad_norm": 3.5789268016815186, "learning_rate": 1.9949009718086885e-05, "loss": 0.7755, "step": 2388 }, { "epoch": 0.06, "grad_norm": 2.1033987998962402, "learning_rate": 1.9948925958408652e-05, "loss": 0.6491, "step": 2389 }, { "epoch": 0.06, "grad_norm": 1.5828909873962402, "learning_rate": 1.9948842130168672e-05, "loss": 0.681, "step": 2390 }, { "epoch": 0.06, "grad_norm": 2.9708549976348877, "learning_rate": 1.994875823336752e-05, "loss": 0.7732, "step": 2391 }, { "epoch": 0.06, "grad_norm": 1.7113765478134155, "learning_rate": 1.994867426800578e-05, "loss": 0.585, "step": 2392 }, { "epoch": 0.06, "grad_norm": 6.577815055847168, "learning_rate": 1.9948590234084026e-05, "loss": 0.8696, "step": 2393 }, { "epoch": 0.06, "grad_norm": 2.2585058212280273, "learning_rate": 1.9948506131602842e-05, "loss": 0.9107, "step": 2394 }, { "epoch": 0.06, "grad_norm": 2.5608654022216797, "learning_rate": 1.9948421960562797e-05, "loss": 0.6712, "step": 2395 }, { "epoch": 0.06, "grad_norm": 1.9129266738891602, "learning_rate": 1.9948337720964485e-05, "loss": 0.7602, "step": 2396 }, { "epoch": 0.06, "grad_norm": 3.9200081825256348, "learning_rate": 1.9948253412808474e-05, "loss": 0.8877, "step": 2397 }, { "epoch": 0.06, "grad_norm": 1.7785781621932983, "learning_rate": 1.9948169036095355e-05, "loss": 0.701, "step": 2398 }, { "epoch": 0.06, "grad_norm": 2.198957920074463, "learning_rate": 1.9948084590825704e-05, "loss": 0.7374, "step": 2399 }, { "epoch": 0.06, "grad_norm": 2.5368125438690186, "learning_rate": 1.9948000077000102e-05, "loss": 0.7049, "step": 2400 }, { "epoch": 0.06, "grad_norm": 1.6522786617279053, "learning_rate": 1.9947915494619133e-05, "loss": 0.76, "step": 2401 }, { "epoch": 0.06, "grad_norm": 3.0312342643737793, "learning_rate": 1.9947830843683384e-05, "loss": 0.7255, "step": 2402 }, { "epoch": 0.06, "grad_norm": 2.5335559844970703, "learning_rate": 1.994774612419343e-05, "loss": 0.823, "step": 2403 }, { "epoch": 0.06, "grad_norm": 2.948986291885376, "learning_rate": 1.9947661336149862e-05, "loss": 0.5841, "step": 2404 }, { "epoch": 0.06, "grad_norm": 3.218923807144165, "learning_rate": 1.9947576479553263e-05, "loss": 0.6682, "step": 2405 }, { "epoch": 0.06, "grad_norm": 4.213892936706543, "learning_rate": 1.9947491554404213e-05, "loss": 0.8874, "step": 2406 }, { "epoch": 0.06, "grad_norm": 3.2552640438079834, "learning_rate": 1.9947406560703303e-05, "loss": 0.8102, "step": 2407 }, { "epoch": 0.06, "grad_norm": 2.5016794204711914, "learning_rate": 1.994732149845112e-05, "loss": 0.7022, "step": 2408 }, { "epoch": 0.06, "grad_norm": 2.649989366531372, "learning_rate": 1.9947236367648243e-05, "loss": 0.748, "step": 2409 }, { "epoch": 0.06, "grad_norm": 3.73020339012146, "learning_rate": 1.9947151168295262e-05, "loss": 0.8035, "step": 2410 }, { "epoch": 0.06, "grad_norm": 1.7471237182617188, "learning_rate": 1.9947065900392765e-05, "loss": 0.6858, "step": 2411 }, { "epoch": 0.06, "grad_norm": 1.9450513124465942, "learning_rate": 1.9946980563941342e-05, "loss": 0.6373, "step": 2412 }, { "epoch": 0.06, "grad_norm": 3.920990228652954, "learning_rate": 1.9946895158941575e-05, "loss": 0.6399, "step": 2413 }, { "epoch": 0.06, "grad_norm": 3.6320579051971436, "learning_rate": 1.9946809685394055e-05, "loss": 0.6446, "step": 2414 }, { "epoch": 0.06, "grad_norm": 3.185983419418335, "learning_rate": 1.9946724143299373e-05, "loss": 0.7572, "step": 2415 }, { "epoch": 0.06, "grad_norm": 4.07523775100708, "learning_rate": 1.994663853265812e-05, "loss": 0.7244, "step": 2416 }, { "epoch": 0.06, "grad_norm": 2.735629081726074, "learning_rate": 1.994655285347088e-05, "loss": 0.939, "step": 2417 }, { "epoch": 0.06, "grad_norm": 4.569522857666016, "learning_rate": 1.9946467105738246e-05, "loss": 0.7492, "step": 2418 }, { "epoch": 0.06, "grad_norm": 3.286886692047119, "learning_rate": 1.9946381289460815e-05, "loss": 0.649, "step": 2419 }, { "epoch": 0.06, "grad_norm": 2.0498037338256836, "learning_rate": 1.9946295404639167e-05, "loss": 0.7499, "step": 2420 }, { "epoch": 0.06, "grad_norm": 2.2775087356567383, "learning_rate": 1.9946209451273906e-05, "loss": 0.7361, "step": 2421 }, { "epoch": 0.06, "grad_norm": 3.0457828044891357, "learning_rate": 1.9946123429365614e-05, "loss": 0.7484, "step": 2422 }, { "epoch": 0.06, "grad_norm": 2.4847958087921143, "learning_rate": 1.9946037338914887e-05, "loss": 0.7588, "step": 2423 }, { "epoch": 0.06, "grad_norm": 2.1186187267303467, "learning_rate": 1.9945951179922325e-05, "loss": 0.7138, "step": 2424 }, { "epoch": 0.06, "grad_norm": 2.5759048461914062, "learning_rate": 1.994586495238851e-05, "loss": 0.7182, "step": 2425 }, { "epoch": 0.06, "grad_norm": 4.638031482696533, "learning_rate": 1.9945778656314044e-05, "loss": 0.74, "step": 2426 }, { "epoch": 0.06, "grad_norm": 2.208004951477051, "learning_rate": 1.9945692291699522e-05, "loss": 0.7042, "step": 2427 }, { "epoch": 0.06, "grad_norm": 3.9868972301483154, "learning_rate": 1.994560585854554e-05, "loss": 0.8512, "step": 2428 }, { "epoch": 0.06, "grad_norm": 3.0264458656311035, "learning_rate": 1.9945519356852686e-05, "loss": 0.81, "step": 2429 }, { "epoch": 0.06, "grad_norm": 2.300863265991211, "learning_rate": 1.994543278662156e-05, "loss": 0.6907, "step": 2430 }, { "epoch": 0.06, "grad_norm": 2.3356564044952393, "learning_rate": 1.994534614785276e-05, "loss": 0.7551, "step": 2431 }, { "epoch": 0.06, "grad_norm": 2.3661372661590576, "learning_rate": 1.9945259440546886e-05, "loss": 0.6095, "step": 2432 }, { "epoch": 0.06, "grad_norm": 10.00365924835205, "learning_rate": 1.994517266470453e-05, "loss": 0.7718, "step": 2433 }, { "epoch": 0.06, "grad_norm": 5.770806789398193, "learning_rate": 1.9945085820326292e-05, "loss": 0.7691, "step": 2434 }, { "epoch": 0.06, "grad_norm": 3.446976661682129, "learning_rate": 1.994499890741277e-05, "loss": 0.8648, "step": 2435 }, { "epoch": 0.06, "grad_norm": 3.0949110984802246, "learning_rate": 1.9944911925964566e-05, "loss": 0.9615, "step": 2436 }, { "epoch": 0.06, "grad_norm": 2.8853511810302734, "learning_rate": 1.994482487598228e-05, "loss": 0.7604, "step": 2437 }, { "epoch": 0.06, "grad_norm": 3.193606376647949, "learning_rate": 1.9944737757466505e-05, "loss": 0.6672, "step": 2438 }, { "epoch": 0.06, "grad_norm": 2.7203195095062256, "learning_rate": 1.9944650570417844e-05, "loss": 0.7837, "step": 2439 }, { "epoch": 0.06, "grad_norm": 3.4490103721618652, "learning_rate": 1.99445633148369e-05, "loss": 0.7719, "step": 2440 }, { "epoch": 0.06, "grad_norm": 2.656219959259033, "learning_rate": 1.994447599072427e-05, "loss": 0.8443, "step": 2441 }, { "epoch": 0.06, "grad_norm": 5.769047737121582, "learning_rate": 1.9944388598080564e-05, "loss": 0.6409, "step": 2442 }, { "epoch": 0.06, "grad_norm": 3.2435052394866943, "learning_rate": 1.994430113690638e-05, "loss": 0.7032, "step": 2443 }, { "epoch": 0.06, "grad_norm": 1.7228294610977173, "learning_rate": 1.9944213607202318e-05, "loss": 0.5844, "step": 2444 }, { "epoch": 0.06, "grad_norm": 4.024240970611572, "learning_rate": 1.9944126008968983e-05, "loss": 0.6708, "step": 2445 }, { "epoch": 0.06, "grad_norm": 3.57795786857605, "learning_rate": 1.994403834220698e-05, "loss": 0.6733, "step": 2446 }, { "epoch": 0.06, "grad_norm": 5.111224174499512, "learning_rate": 1.9943950606916912e-05, "loss": 0.9176, "step": 2447 }, { "epoch": 0.06, "grad_norm": 2.659780979156494, "learning_rate": 1.9943862803099385e-05, "loss": 0.7099, "step": 2448 }, { "epoch": 0.06, "grad_norm": 3.3739776611328125, "learning_rate": 1.9943774930755e-05, "loss": 0.7562, "step": 2449 }, { "epoch": 0.06, "grad_norm": 3.8183693885803223, "learning_rate": 1.9943686989884368e-05, "loss": 0.8317, "step": 2450 }, { "epoch": 0.06, "grad_norm": 3.7511043548583984, "learning_rate": 1.9943598980488092e-05, "loss": 0.9544, "step": 2451 }, { "epoch": 0.06, "grad_norm": 2.8030788898468018, "learning_rate": 1.994351090256678e-05, "loss": 0.6723, "step": 2452 }, { "epoch": 0.06, "grad_norm": 2.2471213340759277, "learning_rate": 1.9943422756121036e-05, "loss": 0.7661, "step": 2453 }, { "epoch": 0.06, "grad_norm": 2.68841552734375, "learning_rate": 1.994333454115147e-05, "loss": 0.7528, "step": 2454 }, { "epoch": 0.06, "grad_norm": 5.763272762298584, "learning_rate": 1.994324625765869e-05, "loss": 0.9984, "step": 2455 }, { "epoch": 0.06, "grad_norm": 3.1187057495117188, "learning_rate": 1.9943157905643302e-05, "loss": 0.6839, "step": 2456 }, { "epoch": 0.06, "grad_norm": 2.27107310295105, "learning_rate": 1.994306948510592e-05, "loss": 0.7948, "step": 2457 }, { "epoch": 0.06, "grad_norm": 2.2840473651885986, "learning_rate": 1.9942980996047145e-05, "loss": 0.7779, "step": 2458 }, { "epoch": 0.06, "grad_norm": 3.6049728393554688, "learning_rate": 1.9942892438467594e-05, "loss": 0.7085, "step": 2459 }, { "epoch": 0.06, "grad_norm": 3.293050765991211, "learning_rate": 1.9942803812367876e-05, "loss": 0.696, "step": 2460 }, { "epoch": 0.06, "grad_norm": 2.2653298377990723, "learning_rate": 1.99427151177486e-05, "loss": 0.721, "step": 2461 }, { "epoch": 0.06, "grad_norm": 3.0385923385620117, "learning_rate": 1.9942626354610383e-05, "loss": 0.6615, "step": 2462 }, { "epoch": 0.06, "grad_norm": 4.4889750480651855, "learning_rate": 1.9942537522953824e-05, "loss": 0.6573, "step": 2463 }, { "epoch": 0.06, "grad_norm": 2.782235860824585, "learning_rate": 1.9942448622779547e-05, "loss": 0.6033, "step": 2464 }, { "epoch": 0.06, "grad_norm": 2.8511126041412354, "learning_rate": 1.9942359654088158e-05, "loss": 0.7069, "step": 2465 }, { "epoch": 0.06, "grad_norm": 6.2184882164001465, "learning_rate": 1.9942270616880274e-05, "loss": 0.6817, "step": 2466 }, { "epoch": 0.06, "grad_norm": 2.7184197902679443, "learning_rate": 1.9942181511156508e-05, "loss": 0.7082, "step": 2467 }, { "epoch": 0.06, "grad_norm": 4.174422740936279, "learning_rate": 1.9942092336917473e-05, "loss": 0.6829, "step": 2468 }, { "epoch": 0.06, "grad_norm": 8.043590545654297, "learning_rate": 1.9942003094163783e-05, "loss": 0.6351, "step": 2469 }, { "epoch": 0.06, "grad_norm": 3.6734135150909424, "learning_rate": 1.9941913782896055e-05, "loss": 0.7249, "step": 2470 }, { "epoch": 0.06, "grad_norm": 3.0150146484375, "learning_rate": 1.9941824403114904e-05, "loss": 0.6971, "step": 2471 }, { "epoch": 0.06, "grad_norm": 3.0401108264923096, "learning_rate": 1.9941734954820945e-05, "loss": 0.6086, "step": 2472 }, { "epoch": 0.06, "grad_norm": 2.7109227180480957, "learning_rate": 1.9941645438014793e-05, "loss": 0.7632, "step": 2473 }, { "epoch": 0.06, "grad_norm": 8.33030891418457, "learning_rate": 1.994155585269707e-05, "loss": 0.9619, "step": 2474 }, { "epoch": 0.06, "grad_norm": 7.2615647315979, "learning_rate": 1.9941466198868387e-05, "loss": 0.7644, "step": 2475 }, { "epoch": 0.06, "grad_norm": 4.85188627243042, "learning_rate": 1.9941376476529365e-05, "loss": 0.8377, "step": 2476 }, { "epoch": 0.06, "grad_norm": 1.90773344039917, "learning_rate": 1.994128668568062e-05, "loss": 0.901, "step": 2477 }, { "epoch": 0.06, "grad_norm": 2.4321653842926025, "learning_rate": 1.9941196826322775e-05, "loss": 0.5628, "step": 2478 }, { "epoch": 0.06, "grad_norm": 3.174741268157959, "learning_rate": 1.9941106898456447e-05, "loss": 0.7437, "step": 2479 }, { "epoch": 0.06, "grad_norm": 2.6596338748931885, "learning_rate": 1.994101690208226e-05, "loss": 0.6274, "step": 2480 }, { "epoch": 0.06, "grad_norm": 3.987035036087036, "learning_rate": 1.9940926837200825e-05, "loss": 0.6839, "step": 2481 }, { "epoch": 0.06, "grad_norm": 2.039179801940918, "learning_rate": 1.994083670381277e-05, "loss": 0.7068, "step": 2482 }, { "epoch": 0.06, "grad_norm": 2.3077099323272705, "learning_rate": 1.994074650191871e-05, "loss": 0.8645, "step": 2483 }, { "epoch": 0.06, "grad_norm": 2.3548812866210938, "learning_rate": 1.9940656231519275e-05, "loss": 0.6812, "step": 2484 }, { "epoch": 0.06, "grad_norm": 3.225813388824463, "learning_rate": 1.994056589261508e-05, "loss": 0.7381, "step": 2485 }, { "epoch": 0.06, "grad_norm": 5.566400051116943, "learning_rate": 1.9940475485206755e-05, "loss": 0.6938, "step": 2486 }, { "epoch": 0.06, "grad_norm": 5.213603973388672, "learning_rate": 1.994038500929491e-05, "loss": 0.7611, "step": 2487 }, { "epoch": 0.06, "grad_norm": 4.497725009918213, "learning_rate": 1.9940294464880182e-05, "loss": 0.6998, "step": 2488 }, { "epoch": 0.06, "grad_norm": 2.513249158859253, "learning_rate": 1.9940203851963188e-05, "loss": 0.7594, "step": 2489 }, { "epoch": 0.06, "grad_norm": 8.908708572387695, "learning_rate": 1.9940113170544556e-05, "loss": 0.6751, "step": 2490 }, { "epoch": 0.06, "grad_norm": 2.90143084526062, "learning_rate": 1.9940022420624905e-05, "loss": 0.7881, "step": 2491 }, { "epoch": 0.06, "grad_norm": 1.963226079940796, "learning_rate": 1.993993160220487e-05, "loss": 0.7033, "step": 2492 }, { "epoch": 0.06, "grad_norm": 2.8650739192962646, "learning_rate": 1.993984071528507e-05, "loss": 0.5796, "step": 2493 }, { "epoch": 0.06, "grad_norm": 5.922427177429199, "learning_rate": 1.993974975986613e-05, "loss": 0.8156, "step": 2494 }, { "epoch": 0.06, "grad_norm": 8.207282066345215, "learning_rate": 1.9939658735948683e-05, "loss": 0.7018, "step": 2495 }, { "epoch": 0.06, "grad_norm": 3.6943247318267822, "learning_rate": 1.9939567643533347e-05, "loss": 0.7318, "step": 2496 }, { "epoch": 0.06, "grad_norm": 2.2241861820220947, "learning_rate": 1.993947648262076e-05, "loss": 0.8172, "step": 2497 }, { "epoch": 0.06, "grad_norm": 3.144862174987793, "learning_rate": 1.993938525321154e-05, "loss": 0.7391, "step": 2498 }, { "epoch": 0.06, "grad_norm": 4.229213714599609, "learning_rate": 1.9939293955306328e-05, "loss": 0.6796, "step": 2499 }, { "epoch": 0.06, "grad_norm": 5.927068710327148, "learning_rate": 1.9939202588905748e-05, "loss": 0.8321, "step": 2500 }, { "epoch": 0.06, "grad_norm": 4.861433506011963, "learning_rate": 1.9939111154010424e-05, "loss": 0.7759, "step": 2501 }, { "epoch": 0.06, "grad_norm": 2.179077625274658, "learning_rate": 1.993901965062099e-05, "loss": 0.9911, "step": 2502 }, { "epoch": 0.06, "grad_norm": 6.799010753631592, "learning_rate": 1.993892807873808e-05, "loss": 0.7349, "step": 2503 }, { "epoch": 0.06, "grad_norm": 1.8412669897079468, "learning_rate": 1.9938836438362316e-05, "loss": 0.7302, "step": 2504 }, { "epoch": 0.06, "grad_norm": 5.740142345428467, "learning_rate": 1.993874472949434e-05, "loss": 0.6937, "step": 2505 }, { "epoch": 0.06, "grad_norm": 4.625904560089111, "learning_rate": 1.993865295213478e-05, "loss": 0.5943, "step": 2506 }, { "epoch": 0.06, "grad_norm": 2.8335695266723633, "learning_rate": 1.9938561106284264e-05, "loss": 0.6942, "step": 2507 }, { "epoch": 0.06, "grad_norm": 4.377411365509033, "learning_rate": 1.9938469191943433e-05, "loss": 0.7694, "step": 2508 }, { "epoch": 0.06, "grad_norm": 4.504324436187744, "learning_rate": 1.9938377209112915e-05, "loss": 0.5405, "step": 2509 }, { "epoch": 0.06, "grad_norm": 2.7257578372955322, "learning_rate": 1.993828515779334e-05, "loss": 0.541, "step": 2510 }, { "epoch": 0.06, "grad_norm": 3.317387580871582, "learning_rate": 1.9938193037985358e-05, "loss": 0.6757, "step": 2511 }, { "epoch": 0.06, "grad_norm": 2.515160083770752, "learning_rate": 1.9938100849689586e-05, "loss": 0.7844, "step": 2512 }, { "epoch": 0.06, "grad_norm": 2.513071298599243, "learning_rate": 1.9938008592906668e-05, "loss": 0.7192, "step": 2513 }, { "epoch": 0.06, "grad_norm": 1.8802330493927002, "learning_rate": 1.993791626763724e-05, "loss": 0.776, "step": 2514 }, { "epoch": 0.06, "grad_norm": 1.8828850984573364, "learning_rate": 1.9937823873881934e-05, "loss": 0.615, "step": 2515 }, { "epoch": 0.06, "grad_norm": 2.059156656265259, "learning_rate": 1.993773141164139e-05, "loss": 0.7552, "step": 2516 }, { "epoch": 0.06, "grad_norm": 3.066408395767212, "learning_rate": 1.9937638880916245e-05, "loss": 0.803, "step": 2517 }, { "epoch": 0.06, "grad_norm": 3.4653799533843994, "learning_rate": 1.993754628170714e-05, "loss": 0.6626, "step": 2518 }, { "epoch": 0.06, "grad_norm": 2.289865493774414, "learning_rate": 1.9937453614014703e-05, "loss": 0.7198, "step": 2519 }, { "epoch": 0.06, "grad_norm": 3.1558589935302734, "learning_rate": 1.9937360877839583e-05, "loss": 0.7611, "step": 2520 }, { "epoch": 0.06, "grad_norm": 5.698667526245117, "learning_rate": 1.9937268073182416e-05, "loss": 0.6025, "step": 2521 }, { "epoch": 0.06, "grad_norm": 3.051450729370117, "learning_rate": 1.9937175200043837e-05, "loss": 0.7244, "step": 2522 }, { "epoch": 0.06, "grad_norm": 2.017712116241455, "learning_rate": 1.993708225842449e-05, "loss": 0.6189, "step": 2523 }, { "epoch": 0.06, "grad_norm": 3.0140130519866943, "learning_rate": 1.9936989248325017e-05, "loss": 0.7458, "step": 2524 }, { "epoch": 0.06, "grad_norm": 2.3239705562591553, "learning_rate": 1.9936896169746058e-05, "loss": 0.8029, "step": 2525 }, { "epoch": 0.06, "grad_norm": 4.178267002105713, "learning_rate": 1.9936803022688252e-05, "loss": 0.9504, "step": 2526 }, { "epoch": 0.06, "grad_norm": 2.7974958419799805, "learning_rate": 1.9936709807152244e-05, "loss": 0.6824, "step": 2527 }, { "epoch": 0.06, "grad_norm": 7.420337200164795, "learning_rate": 1.9936616523138673e-05, "loss": 0.7044, "step": 2528 }, { "epoch": 0.06, "grad_norm": 2.2573931217193604, "learning_rate": 1.9936523170648185e-05, "loss": 0.6866, "step": 2529 }, { "epoch": 0.06, "grad_norm": 3.272597312927246, "learning_rate": 1.993642974968142e-05, "loss": 0.6573, "step": 2530 }, { "epoch": 0.06, "grad_norm": 2.7380731105804443, "learning_rate": 1.9936336260239027e-05, "loss": 0.9095, "step": 2531 }, { "epoch": 0.06, "grad_norm": 2.3669209480285645, "learning_rate": 1.9936242702321643e-05, "loss": 0.8829, "step": 2532 }, { "epoch": 0.06, "grad_norm": 5.508926868438721, "learning_rate": 1.9936149075929922e-05, "loss": 0.9246, "step": 2533 }, { "epoch": 0.06, "grad_norm": 2.4229419231414795, "learning_rate": 1.99360553810645e-05, "loss": 0.7687, "step": 2534 }, { "epoch": 0.06, "grad_norm": 1.9139080047607422, "learning_rate": 1.9935961617726028e-05, "loss": 0.5849, "step": 2535 }, { "epoch": 0.06, "grad_norm": 3.101032257080078, "learning_rate": 1.993586778591515e-05, "loss": 0.5936, "step": 2536 }, { "epoch": 0.07, "grad_norm": 5.311905860900879, "learning_rate": 1.9935773885632516e-05, "loss": 0.6327, "step": 2537 }, { "epoch": 0.07, "grad_norm": 3.9463531970977783, "learning_rate": 1.993567991687877e-05, "loss": 0.647, "step": 2538 }, { "epoch": 0.07, "grad_norm": 2.782684326171875, "learning_rate": 1.9935585879654556e-05, "loss": 0.7549, "step": 2539 }, { "epoch": 0.07, "grad_norm": 1.6985679864883423, "learning_rate": 1.9935491773960534e-05, "loss": 0.6094, "step": 2540 }, { "epoch": 0.07, "grad_norm": 7.661222457885742, "learning_rate": 1.993539759979734e-05, "loss": 0.8239, "step": 2541 }, { "epoch": 0.07, "grad_norm": 3.368295192718506, "learning_rate": 1.993530335716563e-05, "loss": 0.566, "step": 2542 }, { "epoch": 0.07, "grad_norm": 2.2895967960357666, "learning_rate": 1.9935209046066046e-05, "loss": 0.7018, "step": 2543 }, { "epoch": 0.07, "grad_norm": 3.3316307067871094, "learning_rate": 1.993511466649925e-05, "loss": 0.6009, "step": 2544 }, { "epoch": 0.07, "grad_norm": 10.016035079956055, "learning_rate": 1.993502021846588e-05, "loss": 0.8064, "step": 2545 }, { "epoch": 0.07, "grad_norm": 4.260888576507568, "learning_rate": 1.9934925701966598e-05, "loss": 0.7439, "step": 2546 }, { "epoch": 0.07, "grad_norm": 1.9654557704925537, "learning_rate": 1.9934831117002042e-05, "loss": 0.5834, "step": 2547 }, { "epoch": 0.07, "grad_norm": 2.8074066638946533, "learning_rate": 1.9934736463572878e-05, "loss": 0.7096, "step": 2548 }, { "epoch": 0.07, "grad_norm": 3.0015268325805664, "learning_rate": 1.993464174167975e-05, "loss": 0.7572, "step": 2549 }, { "epoch": 0.07, "grad_norm": 3.0347583293914795, "learning_rate": 1.9934546951323313e-05, "loss": 0.7406, "step": 2550 }, { "epoch": 0.07, "grad_norm": 2.9904112815856934, "learning_rate": 1.993445209250422e-05, "loss": 0.7996, "step": 2551 }, { "epoch": 0.07, "grad_norm": 2.1436243057250977, "learning_rate": 1.993435716522312e-05, "loss": 0.6313, "step": 2552 }, { "epoch": 0.07, "grad_norm": 3.8930864334106445, "learning_rate": 1.993426216948068e-05, "loss": 0.7851, "step": 2553 }, { "epoch": 0.07, "grad_norm": 9.2115478515625, "learning_rate": 1.993416710527754e-05, "loss": 0.8989, "step": 2554 }, { "epoch": 0.07, "grad_norm": 2.653883218765259, "learning_rate": 1.9934071972614367e-05, "loss": 0.6364, "step": 2555 }, { "epoch": 0.07, "grad_norm": 2.8982951641082764, "learning_rate": 1.9933976771491808e-05, "loss": 0.8038, "step": 2556 }, { "epoch": 0.07, "grad_norm": 2.5078940391540527, "learning_rate": 1.9933881501910522e-05, "loss": 0.6717, "step": 2557 }, { "epoch": 0.07, "grad_norm": 4.174463272094727, "learning_rate": 1.9933786163871165e-05, "loss": 0.845, "step": 2558 }, { "epoch": 0.07, "grad_norm": 7.602718830108643, "learning_rate": 1.9933690757374396e-05, "loss": 0.9909, "step": 2559 }, { "epoch": 0.07, "grad_norm": 3.2883481979370117, "learning_rate": 1.993359528242087e-05, "loss": 0.6896, "step": 2560 }, { "epoch": 0.07, "grad_norm": 2.142765998840332, "learning_rate": 1.9933499739011248e-05, "loss": 0.7479, "step": 2561 }, { "epoch": 0.07, "grad_norm": 2.723686695098877, "learning_rate": 1.9933404127146187e-05, "loss": 0.7337, "step": 2562 }, { "epoch": 0.07, "grad_norm": 7.358110427856445, "learning_rate": 1.9933308446826346e-05, "loss": 0.8415, "step": 2563 }, { "epoch": 0.07, "grad_norm": 1.6591601371765137, "learning_rate": 1.993321269805238e-05, "loss": 0.6156, "step": 2564 }, { "epoch": 0.07, "grad_norm": 2.0170936584472656, "learning_rate": 1.9933116880824955e-05, "loss": 0.7473, "step": 2565 }, { "epoch": 0.07, "grad_norm": 17.277772903442383, "learning_rate": 1.993302099514473e-05, "loss": 0.6535, "step": 2566 }, { "epoch": 0.07, "grad_norm": 4.854493141174316, "learning_rate": 1.993292504101236e-05, "loss": 0.709, "step": 2567 }, { "epoch": 0.07, "grad_norm": 3.6763598918914795, "learning_rate": 1.993282901842852e-05, "loss": 0.8145, "step": 2568 }, { "epoch": 0.07, "grad_norm": 6.532716274261475, "learning_rate": 1.9932732927393857e-05, "loss": 0.5869, "step": 2569 }, { "epoch": 0.07, "grad_norm": 2.412264823913574, "learning_rate": 1.9932636767909043e-05, "loss": 0.7833, "step": 2570 }, { "epoch": 0.07, "grad_norm": 3.5764617919921875, "learning_rate": 1.993254053997473e-05, "loss": 0.9109, "step": 2571 }, { "epoch": 0.07, "grad_norm": 7.773634433746338, "learning_rate": 1.9932444243591595e-05, "loss": 0.8326, "step": 2572 }, { "epoch": 0.07, "grad_norm": 2.509474515914917, "learning_rate": 1.9932347878760288e-05, "loss": 0.7403, "step": 2573 }, { "epoch": 0.07, "grad_norm": 3.714622735977173, "learning_rate": 1.9932251445481483e-05, "loss": 0.7844, "step": 2574 }, { "epoch": 0.07, "grad_norm": 2.325124979019165, "learning_rate": 1.993215494375584e-05, "loss": 0.6021, "step": 2575 }, { "epoch": 0.07, "grad_norm": 3.863987445831299, "learning_rate": 1.9932058373584024e-05, "loss": 0.57, "step": 2576 }, { "epoch": 0.07, "grad_norm": 2.3399782180786133, "learning_rate": 1.9931961734966705e-05, "loss": 0.6892, "step": 2577 }, { "epoch": 0.07, "grad_norm": 4.701816082000732, "learning_rate": 1.9931865027904544e-05, "loss": 0.989, "step": 2578 }, { "epoch": 0.07, "grad_norm": 2.750256299972534, "learning_rate": 1.993176825239821e-05, "loss": 0.7692, "step": 2579 }, { "epoch": 0.07, "grad_norm": 2.1890106201171875, "learning_rate": 1.993167140844837e-05, "loss": 0.644, "step": 2580 }, { "epoch": 0.07, "grad_norm": 2.424506187438965, "learning_rate": 1.9931574496055687e-05, "loss": 0.7934, "step": 2581 }, { "epoch": 0.07, "grad_norm": 4.383705139160156, "learning_rate": 1.9931477515220832e-05, "loss": 0.7347, "step": 2582 }, { "epoch": 0.07, "grad_norm": 1.5422128438949585, "learning_rate": 1.9931380465944475e-05, "loss": 0.6837, "step": 2583 }, { "epoch": 0.07, "grad_norm": 3.77180552482605, "learning_rate": 1.9931283348227283e-05, "loss": 0.678, "step": 2584 }, { "epoch": 0.07, "grad_norm": 2.4584858417510986, "learning_rate": 1.9931186162069927e-05, "loss": 0.592, "step": 2585 }, { "epoch": 0.07, "grad_norm": 4.618023872375488, "learning_rate": 1.993108890747307e-05, "loss": 0.7605, "step": 2586 }, { "epoch": 0.07, "grad_norm": 4.823701858520508, "learning_rate": 1.9930991584437397e-05, "loss": 0.682, "step": 2587 }, { "epoch": 0.07, "grad_norm": 3.251664876937866, "learning_rate": 1.9930894192963565e-05, "loss": 0.7603, "step": 2588 }, { "epoch": 0.07, "grad_norm": 58.4820442199707, "learning_rate": 1.9930796733052245e-05, "loss": 0.7501, "step": 2589 }, { "epoch": 0.07, "grad_norm": 2.739393711090088, "learning_rate": 1.993069920470412e-05, "loss": 0.6876, "step": 2590 }, { "epoch": 0.07, "grad_norm": 3.1272904872894287, "learning_rate": 1.993060160791985e-05, "loss": 0.8192, "step": 2591 }, { "epoch": 0.07, "grad_norm": 3.4573402404785156, "learning_rate": 1.993050394270011e-05, "loss": 0.8924, "step": 2592 }, { "epoch": 0.07, "grad_norm": 2.0529916286468506, "learning_rate": 1.9930406209045585e-05, "loss": 0.601, "step": 2593 }, { "epoch": 0.07, "grad_norm": 2.237569808959961, "learning_rate": 1.9930308406956932e-05, "loss": 0.7099, "step": 2594 }, { "epoch": 0.07, "grad_norm": 3.0152928829193115, "learning_rate": 1.9930210536434837e-05, "loss": 0.6746, "step": 2595 }, { "epoch": 0.07, "grad_norm": 4.337177753448486, "learning_rate": 1.993011259747997e-05, "loss": 0.627, "step": 2596 }, { "epoch": 0.07, "grad_norm": 1.4625706672668457, "learning_rate": 1.9930014590093e-05, "loss": 0.6438, "step": 2597 }, { "epoch": 0.07, "grad_norm": 2.1466751098632812, "learning_rate": 1.992991651427461e-05, "loss": 0.606, "step": 2598 }, { "epoch": 0.07, "grad_norm": 2.6840720176696777, "learning_rate": 1.992981837002548e-05, "loss": 0.6154, "step": 2599 }, { "epoch": 0.07, "grad_norm": 12.186302185058594, "learning_rate": 1.992972015734627e-05, "loss": 0.8425, "step": 2600 }, { "epoch": 0.07, "grad_norm": 4.693686008453369, "learning_rate": 1.9929621876237675e-05, "loss": 0.6798, "step": 2601 }, { "epoch": 0.07, "grad_norm": 1.7770256996154785, "learning_rate": 1.9929523526700363e-05, "loss": 0.6084, "step": 2602 }, { "epoch": 0.07, "grad_norm": 3.9757797718048096, "learning_rate": 1.9929425108735013e-05, "loss": 0.7578, "step": 2603 }, { "epoch": 0.07, "grad_norm": 12.019624710083008, "learning_rate": 1.9929326622342303e-05, "loss": 0.669, "step": 2604 }, { "epoch": 0.07, "grad_norm": 1.7840183973312378, "learning_rate": 1.992922806752291e-05, "loss": 0.6934, "step": 2605 }, { "epoch": 0.07, "grad_norm": 2.897294759750366, "learning_rate": 1.9929129444277517e-05, "loss": 0.776, "step": 2606 }, { "epoch": 0.07, "grad_norm": 1.7048765420913696, "learning_rate": 1.9929030752606802e-05, "loss": 0.685, "step": 2607 }, { "epoch": 0.07, "grad_norm": 1.3745665550231934, "learning_rate": 1.9928931992511445e-05, "loss": 0.6612, "step": 2608 }, { "epoch": 0.07, "grad_norm": 4.940317153930664, "learning_rate": 1.9928833163992126e-05, "loss": 0.6932, "step": 2609 }, { "epoch": 0.07, "grad_norm": 5.381643295288086, "learning_rate": 1.992873426704953e-05, "loss": 0.8231, "step": 2610 }, { "epoch": 0.07, "grad_norm": 3.1320362091064453, "learning_rate": 1.9928635301684333e-05, "loss": 0.6958, "step": 2611 }, { "epoch": 0.07, "grad_norm": 8.441234588623047, "learning_rate": 1.9928536267897218e-05, "loss": 0.6275, "step": 2612 }, { "epoch": 0.07, "grad_norm": 2.3333115577697754, "learning_rate": 1.9928437165688872e-05, "loss": 0.6614, "step": 2613 }, { "epoch": 0.07, "grad_norm": 2.4898929595947266, "learning_rate": 1.9928337995059973e-05, "loss": 0.7763, "step": 2614 }, { "epoch": 0.07, "grad_norm": 3.4936909675598145, "learning_rate": 1.9928238756011208e-05, "loss": 0.8945, "step": 2615 }, { "epoch": 0.07, "grad_norm": 2.2310333251953125, "learning_rate": 1.9928139448543255e-05, "loss": 0.6948, "step": 2616 }, { "epoch": 0.07, "grad_norm": 8.206375122070312, "learning_rate": 1.99280400726568e-05, "loss": 0.8167, "step": 2617 }, { "epoch": 0.07, "grad_norm": 1.725716471672058, "learning_rate": 1.992794062835254e-05, "loss": 0.6626, "step": 2618 }, { "epoch": 0.07, "grad_norm": 2.8798344135284424, "learning_rate": 1.9927841115631142e-05, "loss": 0.9414, "step": 2619 }, { "epoch": 0.07, "grad_norm": 3.2506802082061768, "learning_rate": 1.9927741534493305e-05, "loss": 0.6832, "step": 2620 }, { "epoch": 0.07, "grad_norm": 2.4287192821502686, "learning_rate": 1.992764188493971e-05, "loss": 0.7902, "step": 2621 }, { "epoch": 0.07, "grad_norm": 3.4040098190307617, "learning_rate": 1.992754216697104e-05, "loss": 0.7639, "step": 2622 }, { "epoch": 0.07, "grad_norm": 3.967573404312134, "learning_rate": 1.992744238058799e-05, "loss": 0.7748, "step": 2623 }, { "epoch": 0.07, "grad_norm": 2.005758047103882, "learning_rate": 1.9927342525791243e-05, "loss": 0.8558, "step": 2624 }, { "epoch": 0.07, "grad_norm": 3.7151122093200684, "learning_rate": 1.992724260258149e-05, "loss": 0.7124, "step": 2625 }, { "epoch": 0.07, "grad_norm": 2.7562546730041504, "learning_rate": 1.992714261095942e-05, "loss": 0.6717, "step": 2626 }, { "epoch": 0.07, "grad_norm": 3.3448069095611572, "learning_rate": 1.9927042550925715e-05, "loss": 0.728, "step": 2627 }, { "epoch": 0.07, "grad_norm": 4.594882011413574, "learning_rate": 1.9926942422481073e-05, "loss": 0.8915, "step": 2628 }, { "epoch": 0.07, "grad_norm": 2.093379020690918, "learning_rate": 1.9926842225626177e-05, "loss": 0.6979, "step": 2629 }, { "epoch": 0.07, "grad_norm": 1.941071629524231, "learning_rate": 1.9926741960361723e-05, "loss": 0.4978, "step": 2630 }, { "epoch": 0.07, "grad_norm": 3.4687817096710205, "learning_rate": 1.99266416266884e-05, "loss": 0.6817, "step": 2631 }, { "epoch": 0.07, "grad_norm": 1.9234296083450317, "learning_rate": 1.9926541224606902e-05, "loss": 0.6754, "step": 2632 }, { "epoch": 0.07, "grad_norm": 3.352550506591797, "learning_rate": 1.9926440754117914e-05, "loss": 0.7981, "step": 2633 }, { "epoch": 0.07, "grad_norm": 1.9471195936203003, "learning_rate": 1.9926340215222136e-05, "loss": 0.6683, "step": 2634 }, { "epoch": 0.07, "grad_norm": 2.140028476715088, "learning_rate": 1.9926239607920255e-05, "loss": 0.6989, "step": 2635 }, { "epoch": 0.07, "grad_norm": 3.5853545665740967, "learning_rate": 1.992613893221297e-05, "loss": 0.7631, "step": 2636 }, { "epoch": 0.07, "grad_norm": 2.5502634048461914, "learning_rate": 1.992603818810097e-05, "loss": 0.8734, "step": 2637 }, { "epoch": 0.07, "grad_norm": 2.588787317276001, "learning_rate": 1.992593737558495e-05, "loss": 0.6338, "step": 2638 }, { "epoch": 0.07, "grad_norm": 1.6373447179794312, "learning_rate": 1.9925836494665605e-05, "loss": 0.8238, "step": 2639 }, { "epoch": 0.07, "grad_norm": 2.266181707382202, "learning_rate": 1.992573554534363e-05, "loss": 0.6038, "step": 2640 }, { "epoch": 0.07, "grad_norm": 2.8210031986236572, "learning_rate": 1.9925634527619727e-05, "loss": 0.6074, "step": 2641 }, { "epoch": 0.07, "grad_norm": 3.6492598056793213, "learning_rate": 1.9925533441494583e-05, "loss": 0.8445, "step": 2642 }, { "epoch": 0.07, "grad_norm": 4.062193870544434, "learning_rate": 1.99254322869689e-05, "loss": 0.5658, "step": 2643 }, { "epoch": 0.07, "grad_norm": 1.8014872074127197, "learning_rate": 1.992533106404337e-05, "loss": 0.6407, "step": 2644 }, { "epoch": 0.07, "grad_norm": 3.6239678859710693, "learning_rate": 1.9925229772718698e-05, "loss": 0.71, "step": 2645 }, { "epoch": 0.07, "grad_norm": 3.3686673641204834, "learning_rate": 1.9925128412995576e-05, "loss": 0.6262, "step": 2646 }, { "epoch": 0.07, "grad_norm": 2.4340133666992188, "learning_rate": 1.9925026984874706e-05, "loss": 0.6916, "step": 2647 }, { "epoch": 0.07, "grad_norm": 2.311218500137329, "learning_rate": 1.9924925488356783e-05, "loss": 0.6192, "step": 2648 }, { "epoch": 0.07, "grad_norm": 2.086158275604248, "learning_rate": 1.992482392344251e-05, "loss": 0.7414, "step": 2649 }, { "epoch": 0.07, "grad_norm": 2.132683277130127, "learning_rate": 1.992472229013259e-05, "loss": 0.7173, "step": 2650 }, { "epoch": 0.07, "grad_norm": 2.6748266220092773, "learning_rate": 1.9924620588427714e-05, "loss": 0.8103, "step": 2651 }, { "epoch": 0.07, "grad_norm": 1.9914910793304443, "learning_rate": 1.9924518818328588e-05, "loss": 0.633, "step": 2652 }, { "epoch": 0.07, "grad_norm": 2.570279121398926, "learning_rate": 1.9924416979835918e-05, "loss": 0.6874, "step": 2653 }, { "epoch": 0.07, "grad_norm": 1.8298041820526123, "learning_rate": 1.9924315072950397e-05, "loss": 0.6861, "step": 2654 }, { "epoch": 0.07, "grad_norm": 2.6408584117889404, "learning_rate": 1.9924213097672735e-05, "loss": 0.7923, "step": 2655 }, { "epoch": 0.07, "grad_norm": 3.3133206367492676, "learning_rate": 1.9924111054003627e-05, "loss": 0.7502, "step": 2656 }, { "epoch": 0.07, "grad_norm": 3.60516357421875, "learning_rate": 1.9924008941943785e-05, "loss": 0.5954, "step": 2657 }, { "epoch": 0.07, "grad_norm": 2.5832509994506836, "learning_rate": 1.9923906761493906e-05, "loss": 0.7582, "step": 2658 }, { "epoch": 0.07, "grad_norm": 2.054396390914917, "learning_rate": 1.9923804512654695e-05, "loss": 0.5595, "step": 2659 }, { "epoch": 0.07, "grad_norm": 3.2416880130767822, "learning_rate": 1.992370219542686e-05, "loss": 0.6592, "step": 2660 }, { "epoch": 0.07, "grad_norm": 2.23398756980896, "learning_rate": 1.9923599809811105e-05, "loss": 0.7178, "step": 2661 }, { "epoch": 0.07, "grad_norm": 1.8315004110336304, "learning_rate": 1.992349735580813e-05, "loss": 0.7943, "step": 2662 }, { "epoch": 0.07, "grad_norm": 2.380690097808838, "learning_rate": 1.992339483341865e-05, "loss": 0.6869, "step": 2663 }, { "epoch": 0.07, "grad_norm": 2.9036405086517334, "learning_rate": 1.992329224264337e-05, "loss": 0.6657, "step": 2664 }, { "epoch": 0.07, "grad_norm": 2.4888195991516113, "learning_rate": 1.9923189583482992e-05, "loss": 0.9588, "step": 2665 }, { "epoch": 0.07, "grad_norm": 3.0635929107666016, "learning_rate": 1.9923086855938222e-05, "loss": 0.5627, "step": 2666 }, { "epoch": 0.07, "grad_norm": 7.805595874786377, "learning_rate": 1.992298406000978e-05, "loss": 0.8989, "step": 2667 }, { "epoch": 0.07, "grad_norm": 3.5294880867004395, "learning_rate": 1.9922881195698357e-05, "loss": 0.5524, "step": 2668 }, { "epoch": 0.07, "grad_norm": 4.243929386138916, "learning_rate": 1.9922778263004674e-05, "loss": 0.9748, "step": 2669 }, { "epoch": 0.07, "grad_norm": 2.8067784309387207, "learning_rate": 1.9922675261929438e-05, "loss": 0.7165, "step": 2670 }, { "epoch": 0.07, "grad_norm": 4.601934909820557, "learning_rate": 1.9922572192473358e-05, "loss": 0.778, "step": 2671 }, { "epoch": 0.07, "grad_norm": 2.2854390144348145, "learning_rate": 1.9922469054637145e-05, "loss": 0.7038, "step": 2672 }, { "epoch": 0.07, "grad_norm": 3.420557975769043, "learning_rate": 1.992236584842151e-05, "loss": 0.8016, "step": 2673 }, { "epoch": 0.07, "grad_norm": 1.8526049852371216, "learning_rate": 1.992226257382716e-05, "loss": 0.7389, "step": 2674 }, { "epoch": 0.07, "grad_norm": 3.133056402206421, "learning_rate": 1.9922159230854813e-05, "loss": 0.6714, "step": 2675 }, { "epoch": 0.07, "grad_norm": 4.023059844970703, "learning_rate": 1.9922055819505177e-05, "loss": 0.7811, "step": 2676 }, { "epoch": 0.07, "grad_norm": 2.617286205291748, "learning_rate": 1.9921952339778965e-05, "loss": 0.705, "step": 2677 }, { "epoch": 0.07, "grad_norm": 2.4954442977905273, "learning_rate": 1.9921848791676895e-05, "loss": 0.7171, "step": 2678 }, { "epoch": 0.07, "grad_norm": 4.745936393737793, "learning_rate": 1.9921745175199674e-05, "loss": 0.6008, "step": 2679 }, { "epoch": 0.07, "grad_norm": 1.801533579826355, "learning_rate": 1.9921641490348018e-05, "loss": 0.5898, "step": 2680 }, { "epoch": 0.07, "grad_norm": 3.130239963531494, "learning_rate": 1.9921537737122643e-05, "loss": 0.8374, "step": 2681 }, { "epoch": 0.07, "grad_norm": 3.008125066757202, "learning_rate": 1.992143391552426e-05, "loss": 0.7683, "step": 2682 }, { "epoch": 0.07, "grad_norm": 2.2720677852630615, "learning_rate": 1.9921330025553592e-05, "loss": 0.6863, "step": 2683 }, { "epoch": 0.07, "grad_norm": 2.7159321308135986, "learning_rate": 1.992122606721135e-05, "loss": 0.9234, "step": 2684 }, { "epoch": 0.07, "grad_norm": 3.6399874687194824, "learning_rate": 1.9921122040498253e-05, "loss": 0.9518, "step": 2685 }, { "epoch": 0.07, "grad_norm": 2.91123628616333, "learning_rate": 1.9921017945415012e-05, "loss": 0.6827, "step": 2686 }, { "epoch": 0.07, "grad_norm": 3.4558255672454834, "learning_rate": 1.992091378196235e-05, "loss": 0.803, "step": 2687 }, { "epoch": 0.07, "grad_norm": 5.919217586517334, "learning_rate": 1.9920809550140983e-05, "loss": 0.7607, "step": 2688 }, { "epoch": 0.07, "grad_norm": 6.80197811126709, "learning_rate": 1.992070524995163e-05, "loss": 0.7856, "step": 2689 }, { "epoch": 0.07, "grad_norm": 2.6422154903411865, "learning_rate": 1.9920600881395007e-05, "loss": 0.7377, "step": 2690 }, { "epoch": 0.07, "grad_norm": 1.7122890949249268, "learning_rate": 1.992049644447184e-05, "loss": 0.7736, "step": 2691 }, { "epoch": 0.07, "grad_norm": 3.5374703407287598, "learning_rate": 1.992039193918284e-05, "loss": 0.8089, "step": 2692 }, { "epoch": 0.07, "grad_norm": 2.354123115539551, "learning_rate": 1.9920287365528734e-05, "loss": 0.7259, "step": 2693 }, { "epoch": 0.07, "grad_norm": 5.049844264984131, "learning_rate": 1.992018272351024e-05, "loss": 0.7672, "step": 2694 }, { "epoch": 0.07, "grad_norm": 1.9573636054992676, "learning_rate": 1.9920078013128075e-05, "loss": 0.5311, "step": 2695 }, { "epoch": 0.07, "grad_norm": 2.7521653175354004, "learning_rate": 1.9919973234382966e-05, "loss": 0.6068, "step": 2696 }, { "epoch": 0.07, "grad_norm": 7.535379409790039, "learning_rate": 1.991986838727564e-05, "loss": 0.8077, "step": 2697 }, { "epoch": 0.07, "grad_norm": 3.687446355819702, "learning_rate": 1.9919763471806805e-05, "loss": 0.8129, "step": 2698 }, { "epoch": 0.07, "grad_norm": 2.717881441116333, "learning_rate": 1.99196584879772e-05, "loss": 0.6775, "step": 2699 }, { "epoch": 0.07, "grad_norm": 2.819561243057251, "learning_rate": 1.9919553435787534e-05, "loss": 0.9505, "step": 2700 }, { "epoch": 0.07, "grad_norm": 2.50069260597229, "learning_rate": 1.991944831523854e-05, "loss": 0.7424, "step": 2701 }, { "epoch": 0.07, "grad_norm": 2.1099517345428467, "learning_rate": 1.9919343126330944e-05, "loss": 0.6626, "step": 2702 }, { "epoch": 0.07, "grad_norm": 3.5922744274139404, "learning_rate": 1.9919237869065462e-05, "loss": 0.8254, "step": 2703 }, { "epoch": 0.07, "grad_norm": 2.453644037246704, "learning_rate": 1.991913254344283e-05, "loss": 0.7233, "step": 2704 }, { "epoch": 0.07, "grad_norm": 1.4749802350997925, "learning_rate": 1.9919027149463762e-05, "loss": 0.7361, "step": 2705 }, { "epoch": 0.07, "grad_norm": 2.3439338207244873, "learning_rate": 1.9918921687128994e-05, "loss": 0.696, "step": 2706 }, { "epoch": 0.07, "grad_norm": 1.4896268844604492, "learning_rate": 1.991881615643925e-05, "loss": 0.674, "step": 2707 }, { "epoch": 0.07, "grad_norm": 4.021912574768066, "learning_rate": 1.9918710557395252e-05, "loss": 0.7095, "step": 2708 }, { "epoch": 0.07, "grad_norm": 3.1013753414154053, "learning_rate": 1.991860488999774e-05, "loss": 0.7395, "step": 2709 }, { "epoch": 0.07, "grad_norm": 3.6027748584747314, "learning_rate": 1.991849915424743e-05, "loss": 0.9199, "step": 2710 }, { "epoch": 0.07, "grad_norm": 2.0007822513580322, "learning_rate": 1.9918393350145056e-05, "loss": 0.6895, "step": 2711 }, { "epoch": 0.07, "grad_norm": 3.603168249130249, "learning_rate": 1.9918287477691345e-05, "loss": 0.8045, "step": 2712 }, { "epoch": 0.07, "grad_norm": 2.377903699874878, "learning_rate": 1.991818153688703e-05, "loss": 0.5611, "step": 2713 }, { "epoch": 0.07, "grad_norm": 2.839629888534546, "learning_rate": 1.991807552773284e-05, "loss": 0.7396, "step": 2714 }, { "epoch": 0.07, "grad_norm": 3.1986703872680664, "learning_rate": 1.99179694502295e-05, "loss": 0.7432, "step": 2715 }, { "epoch": 0.07, "grad_norm": 2.4335060119628906, "learning_rate": 1.991786330437775e-05, "loss": 0.6216, "step": 2716 }, { "epoch": 0.07, "grad_norm": 1.9443970918655396, "learning_rate": 1.9917757090178312e-05, "loss": 0.7309, "step": 2717 }, { "epoch": 0.07, "grad_norm": 2.2005183696746826, "learning_rate": 1.9917650807631926e-05, "loss": 0.6584, "step": 2718 }, { "epoch": 0.07, "grad_norm": 4.47362756729126, "learning_rate": 1.9917544456739322e-05, "loss": 0.601, "step": 2719 }, { "epoch": 0.07, "grad_norm": 3.512800455093384, "learning_rate": 1.9917438037501236e-05, "loss": 0.5676, "step": 2720 }, { "epoch": 0.07, "grad_norm": 2.2122268676757812, "learning_rate": 1.9917331549918395e-05, "loss": 0.5267, "step": 2721 }, { "epoch": 0.07, "grad_norm": 4.105897426605225, "learning_rate": 1.9917224993991536e-05, "loss": 0.76, "step": 2722 }, { "epoch": 0.07, "grad_norm": 3.0025007724761963, "learning_rate": 1.991711836972139e-05, "loss": 0.8096, "step": 2723 }, { "epoch": 0.07, "grad_norm": 2.66589617729187, "learning_rate": 1.9917011677108698e-05, "loss": 0.6695, "step": 2724 }, { "epoch": 0.07, "grad_norm": 2.8579514026641846, "learning_rate": 1.991690491615419e-05, "loss": 0.8009, "step": 2725 }, { "epoch": 0.07, "grad_norm": 1.7445483207702637, "learning_rate": 1.9916798086858604e-05, "loss": 0.6375, "step": 2726 }, { "epoch": 0.07, "grad_norm": 2.4045510292053223, "learning_rate": 1.9916691189222675e-05, "loss": 0.6519, "step": 2727 }, { "epoch": 0.07, "grad_norm": 3.0678021907806396, "learning_rate": 1.9916584223247142e-05, "loss": 0.6592, "step": 2728 }, { "epoch": 0.07, "grad_norm": 12.281411170959473, "learning_rate": 1.9916477188932744e-05, "loss": 0.64, "step": 2729 }, { "epoch": 0.07, "grad_norm": 2.292351484298706, "learning_rate": 1.9916370086280212e-05, "loss": 0.6191, "step": 2730 }, { "epoch": 0.07, "grad_norm": 4.283313274383545, "learning_rate": 1.9916262915290285e-05, "loss": 0.8078, "step": 2731 }, { "epoch": 0.07, "grad_norm": 2.812544822692871, "learning_rate": 1.9916155675963708e-05, "loss": 0.6976, "step": 2732 }, { "epoch": 0.07, "grad_norm": 1.7064974308013916, "learning_rate": 1.9916048368301216e-05, "loss": 0.6302, "step": 2733 }, { "epoch": 0.07, "grad_norm": 2.137535572052002, "learning_rate": 1.9915940992303543e-05, "loss": 0.6569, "step": 2734 }, { "epoch": 0.07, "grad_norm": 3.496914863586426, "learning_rate": 1.991583354797144e-05, "loss": 0.7831, "step": 2735 }, { "epoch": 0.07, "grad_norm": 4.310373306274414, "learning_rate": 1.9915726035305642e-05, "loss": 0.6909, "step": 2736 }, { "epoch": 0.07, "grad_norm": 3.5648770332336426, "learning_rate": 1.9915618454306886e-05, "loss": 0.6277, "step": 2737 }, { "epoch": 0.07, "grad_norm": 4.862259864807129, "learning_rate": 1.991551080497592e-05, "loss": 0.7806, "step": 2738 }, { "epoch": 0.07, "grad_norm": 1.5937026739120483, "learning_rate": 1.991540308731348e-05, "loss": 0.5468, "step": 2739 }, { "epoch": 0.07, "grad_norm": 3.660315990447998, "learning_rate": 1.9915295301320318e-05, "loss": 0.6309, "step": 2740 }, { "epoch": 0.07, "grad_norm": 6.080138683319092, "learning_rate": 1.9915187446997165e-05, "loss": 0.7462, "step": 2741 }, { "epoch": 0.07, "grad_norm": 7.3351149559021, "learning_rate": 1.9915079524344773e-05, "loss": 0.6204, "step": 2742 }, { "epoch": 0.07, "grad_norm": 5.359007835388184, "learning_rate": 1.9914971533363876e-05, "loss": 0.8201, "step": 2743 }, { "epoch": 0.07, "grad_norm": 6.281933307647705, "learning_rate": 1.991486347405523e-05, "loss": 0.6463, "step": 2744 }, { "epoch": 0.07, "grad_norm": 4.3224382400512695, "learning_rate": 1.9914755346419573e-05, "loss": 1.0331, "step": 2745 }, { "epoch": 0.07, "grad_norm": 2.54239559173584, "learning_rate": 1.991464715045765e-05, "loss": 0.5478, "step": 2746 }, { "epoch": 0.07, "grad_norm": 4.601893901824951, "learning_rate": 1.9914538886170207e-05, "loss": 0.6236, "step": 2747 }, { "epoch": 0.07, "grad_norm": 3.036365270614624, "learning_rate": 1.9914430553557992e-05, "loss": 0.6596, "step": 2748 }, { "epoch": 0.07, "grad_norm": 4.268160820007324, "learning_rate": 1.991432215262175e-05, "loss": 0.7174, "step": 2749 }, { "epoch": 0.07, "grad_norm": 2.4787895679473877, "learning_rate": 1.991421368336223e-05, "loss": 0.7807, "step": 2750 }, { "epoch": 0.07, "grad_norm": 5.767643451690674, "learning_rate": 1.9914105145780177e-05, "loss": 0.744, "step": 2751 }, { "epoch": 0.07, "grad_norm": 2.778719663619995, "learning_rate": 1.991399653987634e-05, "loss": 0.9796, "step": 2752 }, { "epoch": 0.07, "grad_norm": 4.850071907043457, "learning_rate": 1.9913887865651467e-05, "loss": 0.8641, "step": 2753 }, { "epoch": 0.07, "grad_norm": 4.874581813812256, "learning_rate": 1.991377912310631e-05, "loss": 0.7232, "step": 2754 }, { "epoch": 0.07, "grad_norm": 2.317408800125122, "learning_rate": 1.9913670312241613e-05, "loss": 0.5847, "step": 2755 }, { "epoch": 0.07, "grad_norm": 3.4938719272613525, "learning_rate": 1.991356143305813e-05, "loss": 0.7177, "step": 2756 }, { "epoch": 0.07, "grad_norm": 3.1860854625701904, "learning_rate": 1.991345248555661e-05, "loss": 0.664, "step": 2757 }, { "epoch": 0.07, "grad_norm": 4.091212749481201, "learning_rate": 1.9913343469737803e-05, "loss": 0.805, "step": 2758 }, { "epoch": 0.07, "grad_norm": 2.464816093444824, "learning_rate": 1.991323438560246e-05, "loss": 0.6365, "step": 2759 }, { "epoch": 0.07, "grad_norm": 4.133347988128662, "learning_rate": 1.9913125233151334e-05, "loss": 0.5909, "step": 2760 }, { "epoch": 0.07, "grad_norm": 1.9703177213668823, "learning_rate": 1.9913016012385178e-05, "loss": 0.6364, "step": 2761 }, { "epoch": 0.07, "grad_norm": 2.8527228832244873, "learning_rate": 1.9912906723304744e-05, "loss": 0.7793, "step": 2762 }, { "epoch": 0.07, "grad_norm": 3.128164529800415, "learning_rate": 1.991279736591078e-05, "loss": 0.7397, "step": 2763 }, { "epoch": 0.07, "grad_norm": 6.112663745880127, "learning_rate": 1.991268794020405e-05, "loss": 0.738, "step": 2764 }, { "epoch": 0.07, "grad_norm": 4.970628261566162, "learning_rate": 1.99125784461853e-05, "loss": 0.8291, "step": 2765 }, { "epoch": 0.07, "grad_norm": 3.118668556213379, "learning_rate": 1.9912468883855288e-05, "loss": 0.5634, "step": 2766 }, { "epoch": 0.07, "grad_norm": 1.925061583518982, "learning_rate": 1.9912359253214767e-05, "loss": 0.6597, "step": 2767 }, { "epoch": 0.07, "grad_norm": 6.626150131225586, "learning_rate": 1.9912249554264496e-05, "loss": 0.7291, "step": 2768 }, { "epoch": 0.07, "grad_norm": 6.656291961669922, "learning_rate": 1.9912139787005225e-05, "loss": 0.8201, "step": 2769 }, { "epoch": 0.07, "grad_norm": 3.319899082183838, "learning_rate": 1.9912029951437718e-05, "loss": 0.6973, "step": 2770 }, { "epoch": 0.07, "grad_norm": 2.4654860496520996, "learning_rate": 1.9911920047562726e-05, "loss": 0.5381, "step": 2771 }, { "epoch": 0.07, "grad_norm": 3.3767271041870117, "learning_rate": 1.9911810075381008e-05, "loss": 0.6805, "step": 2772 }, { "epoch": 0.07, "grad_norm": 2.6993680000305176, "learning_rate": 1.9911700034893324e-05, "loss": 0.5935, "step": 2773 }, { "epoch": 0.07, "grad_norm": 2.0810720920562744, "learning_rate": 1.9911589926100428e-05, "loss": 0.5964, "step": 2774 }, { "epoch": 0.07, "grad_norm": 3.661789894104004, "learning_rate": 1.991147974900308e-05, "loss": 0.7177, "step": 2775 }, { "epoch": 0.07, "grad_norm": 1.9818311929702759, "learning_rate": 1.9911369503602045e-05, "loss": 0.7135, "step": 2776 }, { "epoch": 0.07, "grad_norm": 3.173699378967285, "learning_rate": 1.9911259189898076e-05, "loss": 0.6699, "step": 2777 }, { "epoch": 0.07, "grad_norm": 2.6314187049865723, "learning_rate": 1.9911148807891932e-05, "loss": 0.7381, "step": 2778 }, { "epoch": 0.07, "grad_norm": 3.221109628677368, "learning_rate": 1.9911038357584385e-05, "loss": 0.6764, "step": 2779 }, { "epoch": 0.07, "grad_norm": 3.3929498195648193, "learning_rate": 1.9910927838976183e-05, "loss": 0.8296, "step": 2780 }, { "epoch": 0.07, "grad_norm": 3.2591710090637207, "learning_rate": 1.9910817252068093e-05, "loss": 0.8859, "step": 2781 }, { "epoch": 0.07, "grad_norm": 3.5004630088806152, "learning_rate": 1.9910706596860877e-05, "loss": 0.8388, "step": 2782 }, { "epoch": 0.07, "grad_norm": 1.8814479112625122, "learning_rate": 1.99105958733553e-05, "loss": 0.7764, "step": 2783 }, { "epoch": 0.07, "grad_norm": 3.298189401626587, "learning_rate": 1.9910485081552123e-05, "loss": 0.6913, "step": 2784 }, { "epoch": 0.07, "grad_norm": 3.0215671062469482, "learning_rate": 1.991037422145211e-05, "loss": 0.7703, "step": 2785 }, { "epoch": 0.07, "grad_norm": 3.4167940616607666, "learning_rate": 1.991026329305602e-05, "loss": 0.8115, "step": 2786 }, { "epoch": 0.07, "grad_norm": 4.375882625579834, "learning_rate": 1.9910152296364624e-05, "loss": 0.5916, "step": 2787 }, { "epoch": 0.07, "grad_norm": 2.170888662338257, "learning_rate": 1.9910041231378684e-05, "loss": 0.7574, "step": 2788 }, { "epoch": 0.07, "grad_norm": 2.1768746376037598, "learning_rate": 1.9909930098098968e-05, "loss": 0.783, "step": 2789 }, { "epoch": 0.07, "grad_norm": 3.1603240966796875, "learning_rate": 1.9909818896526234e-05, "loss": 0.6697, "step": 2790 }, { "epoch": 0.07, "grad_norm": 2.223787307739258, "learning_rate": 1.990970762666126e-05, "loss": 0.8592, "step": 2791 }, { "epoch": 0.07, "grad_norm": 2.051734447479248, "learning_rate": 1.9909596288504805e-05, "loss": 0.8259, "step": 2792 }, { "epoch": 0.07, "grad_norm": 8.446090698242188, "learning_rate": 1.9909484882057637e-05, "loss": 0.7614, "step": 2793 }, { "epoch": 0.07, "grad_norm": 1.7187000513076782, "learning_rate": 1.990937340732053e-05, "loss": 0.727, "step": 2794 }, { "epoch": 0.07, "grad_norm": 3.673809289932251, "learning_rate": 1.990926186429424e-05, "loss": 0.7957, "step": 2795 }, { "epoch": 0.07, "grad_norm": 4.447478294372559, "learning_rate": 1.9909150252979545e-05, "loss": 0.706, "step": 2796 }, { "epoch": 0.07, "grad_norm": 6.0304083824157715, "learning_rate": 1.990903857337721e-05, "loss": 0.6637, "step": 2797 }, { "epoch": 0.07, "grad_norm": 2.4423208236694336, "learning_rate": 1.9908926825488013e-05, "loss": 0.8668, "step": 2798 }, { "epoch": 0.07, "grad_norm": 2.33661150932312, "learning_rate": 1.9908815009312713e-05, "loss": 0.7711, "step": 2799 }, { "epoch": 0.07, "grad_norm": 2.008676290512085, "learning_rate": 1.9908703124852087e-05, "loss": 0.6518, "step": 2800 }, { "epoch": 0.07, "grad_norm": 2.4211528301239014, "learning_rate": 1.9908591172106907e-05, "loss": 0.8516, "step": 2801 }, { "epoch": 0.07, "grad_norm": 1.9721709489822388, "learning_rate": 1.990847915107794e-05, "loss": 0.7238, "step": 2802 }, { "epoch": 0.07, "grad_norm": 2.7559280395507812, "learning_rate": 1.9908367061765955e-05, "loss": 0.8345, "step": 2803 }, { "epoch": 0.07, "grad_norm": 2.5821170806884766, "learning_rate": 1.9908254904171735e-05, "loss": 0.8335, "step": 2804 }, { "epoch": 0.07, "grad_norm": 3.5199801921844482, "learning_rate": 1.9908142678296044e-05, "loss": 0.6555, "step": 2805 }, { "epoch": 0.07, "grad_norm": 1.8498811721801758, "learning_rate": 1.990803038413966e-05, "loss": 0.7098, "step": 2806 }, { "epoch": 0.07, "grad_norm": 3.6095967292785645, "learning_rate": 1.9907918021703355e-05, "loss": 0.8842, "step": 2807 }, { "epoch": 0.07, "grad_norm": 3.918186664581299, "learning_rate": 1.99078055909879e-05, "loss": 0.6809, "step": 2808 }, { "epoch": 0.07, "grad_norm": 2.036627769470215, "learning_rate": 1.9907693091994077e-05, "loss": 0.7474, "step": 2809 }, { "epoch": 0.07, "grad_norm": 2.268547773361206, "learning_rate": 1.990758052472266e-05, "loss": 0.7057, "step": 2810 }, { "epoch": 0.07, "grad_norm": 2.6013638973236084, "learning_rate": 1.990746788917442e-05, "loss": 0.8475, "step": 2811 }, { "epoch": 0.07, "grad_norm": 2.446967124938965, "learning_rate": 1.9907355185350137e-05, "loss": 0.7457, "step": 2812 }, { "epoch": 0.07, "grad_norm": 1.8896796703338623, "learning_rate": 1.9907242413250586e-05, "loss": 0.7613, "step": 2813 }, { "epoch": 0.07, "grad_norm": 2.684051990509033, "learning_rate": 1.9907129572876545e-05, "loss": 0.709, "step": 2814 }, { "epoch": 0.07, "grad_norm": 2.595398187637329, "learning_rate": 1.990701666422879e-05, "loss": 0.8024, "step": 2815 }, { "epoch": 0.07, "grad_norm": 2.8956847190856934, "learning_rate": 1.9906903687308103e-05, "loss": 0.7925, "step": 2816 }, { "epoch": 0.07, "grad_norm": 2.3764846324920654, "learning_rate": 1.9906790642115257e-05, "loss": 0.7011, "step": 2817 }, { "epoch": 0.07, "grad_norm": 2.862488031387329, "learning_rate": 1.9906677528651034e-05, "loss": 0.5346, "step": 2818 }, { "epoch": 0.07, "grad_norm": 1.440959095954895, "learning_rate": 1.9906564346916216e-05, "loss": 0.6819, "step": 2819 }, { "epoch": 0.07, "grad_norm": 1.7729039192199707, "learning_rate": 1.990645109691158e-05, "loss": 0.7883, "step": 2820 }, { "epoch": 0.07, "grad_norm": 5.985317230224609, "learning_rate": 1.9906337778637904e-05, "loss": 0.9889, "step": 2821 }, { "epoch": 0.07, "grad_norm": 3.6731014251708984, "learning_rate": 1.9906224392095974e-05, "loss": 0.7576, "step": 2822 }, { "epoch": 0.07, "grad_norm": 3.036224603652954, "learning_rate": 1.9906110937286567e-05, "loss": 0.7377, "step": 2823 }, { "epoch": 0.07, "grad_norm": 3.494694471359253, "learning_rate": 1.9905997414210472e-05, "loss": 0.8119, "step": 2824 }, { "epoch": 0.07, "grad_norm": 2.1827151775360107, "learning_rate": 1.990588382286846e-05, "loss": 0.7007, "step": 2825 }, { "epoch": 0.07, "grad_norm": 3.9268088340759277, "learning_rate": 1.9905770163261328e-05, "loss": 0.6972, "step": 2826 }, { "epoch": 0.07, "grad_norm": 2.294647693634033, "learning_rate": 1.9905656435389847e-05, "loss": 0.689, "step": 2827 }, { "epoch": 0.07, "grad_norm": 1.8254796266555786, "learning_rate": 1.9905542639254802e-05, "loss": 0.6444, "step": 2828 }, { "epoch": 0.07, "grad_norm": 2.131334066390991, "learning_rate": 1.9905428774856986e-05, "loss": 0.752, "step": 2829 }, { "epoch": 0.07, "grad_norm": 2.0256235599517822, "learning_rate": 1.9905314842197174e-05, "loss": 0.6805, "step": 2830 }, { "epoch": 0.07, "grad_norm": 2.3748691082000732, "learning_rate": 1.9905200841276156e-05, "loss": 0.7099, "step": 2831 }, { "epoch": 0.07, "grad_norm": 1.9127994775772095, "learning_rate": 1.9905086772094718e-05, "loss": 0.809, "step": 2832 }, { "epoch": 0.07, "grad_norm": 1.4380073547363281, "learning_rate": 1.9904972634653646e-05, "loss": 0.66, "step": 2833 }, { "epoch": 0.07, "grad_norm": 1.8959839344024658, "learning_rate": 1.9904858428953725e-05, "loss": 0.6454, "step": 2834 }, { "epoch": 0.07, "grad_norm": 1.8000462055206299, "learning_rate": 1.990474415499574e-05, "loss": 0.8516, "step": 2835 }, { "epoch": 0.07, "grad_norm": 5.097937107086182, "learning_rate": 1.990462981278048e-05, "loss": 0.8301, "step": 2836 }, { "epoch": 0.07, "grad_norm": 1.6775304079055786, "learning_rate": 1.990451540230874e-05, "loss": 0.5577, "step": 2837 }, { "epoch": 0.07, "grad_norm": 4.866496562957764, "learning_rate": 1.99044009235813e-05, "loss": 0.6558, "step": 2838 }, { "epoch": 0.07, "grad_norm": 3.188138961791992, "learning_rate": 1.990428637659895e-05, "loss": 0.8109, "step": 2839 }, { "epoch": 0.07, "grad_norm": 1.7412893772125244, "learning_rate": 1.9904171761362482e-05, "loss": 0.7907, "step": 2840 }, { "epoch": 0.07, "grad_norm": 2.4932007789611816, "learning_rate": 1.9904057077872686e-05, "loss": 0.627, "step": 2841 }, { "epoch": 0.07, "grad_norm": 2.9008865356445312, "learning_rate": 1.9903942326130347e-05, "loss": 0.7587, "step": 2842 }, { "epoch": 0.07, "grad_norm": 4.718896389007568, "learning_rate": 1.9903827506136262e-05, "loss": 0.7137, "step": 2843 }, { "epoch": 0.07, "grad_norm": 3.092336893081665, "learning_rate": 1.990371261789122e-05, "loss": 0.6614, "step": 2844 }, { "epoch": 0.07, "grad_norm": 2.8064827919006348, "learning_rate": 1.990359766139601e-05, "loss": 0.6595, "step": 2845 }, { "epoch": 0.07, "grad_norm": 2.080075740814209, "learning_rate": 1.990348263665143e-05, "loss": 0.7345, "step": 2846 }, { "epoch": 0.07, "grad_norm": 2.1424810886383057, "learning_rate": 1.990336754365827e-05, "loss": 0.7352, "step": 2847 }, { "epoch": 0.07, "grad_norm": 2.451749086380005, "learning_rate": 1.9903252382417322e-05, "loss": 0.8134, "step": 2848 }, { "epoch": 0.07, "grad_norm": 2.739621162414551, "learning_rate": 1.990313715292938e-05, "loss": 0.6841, "step": 2849 }, { "epoch": 0.07, "grad_norm": 3.928182601928711, "learning_rate": 1.9903021855195238e-05, "loss": 0.7976, "step": 2850 }, { "epoch": 0.07, "grad_norm": 3.3453330993652344, "learning_rate": 1.9902906489215695e-05, "loss": 0.7254, "step": 2851 }, { "epoch": 0.07, "grad_norm": 2.4998373985290527, "learning_rate": 1.9902791054991538e-05, "loss": 0.7405, "step": 2852 }, { "epoch": 0.07, "grad_norm": 1.8237794637680054, "learning_rate": 1.9902675552523566e-05, "loss": 0.7331, "step": 2853 }, { "epoch": 0.07, "grad_norm": 2.0931472778320312, "learning_rate": 1.990255998181258e-05, "loss": 0.7337, "step": 2854 }, { "epoch": 0.07, "grad_norm": 2.3852531909942627, "learning_rate": 1.990244434285937e-05, "loss": 0.813, "step": 2855 }, { "epoch": 0.07, "grad_norm": 2.0628292560577393, "learning_rate": 1.9902328635664733e-05, "loss": 0.6012, "step": 2856 }, { "epoch": 0.07, "grad_norm": 3.5610883235931396, "learning_rate": 1.990221286022947e-05, "loss": 0.6808, "step": 2857 }, { "epoch": 0.07, "grad_norm": 1.9198195934295654, "learning_rate": 1.9902097016554377e-05, "loss": 0.6691, "step": 2858 }, { "epoch": 0.07, "grad_norm": 3.076622724533081, "learning_rate": 1.990198110464025e-05, "loss": 0.6947, "step": 2859 }, { "epoch": 0.07, "grad_norm": 1.947942852973938, "learning_rate": 1.990186512448789e-05, "loss": 0.6787, "step": 2860 }, { "epoch": 0.07, "grad_norm": 2.4071028232574463, "learning_rate": 1.99017490760981e-05, "loss": 0.6176, "step": 2861 }, { "epoch": 0.07, "grad_norm": 2.438347578048706, "learning_rate": 1.9901632959471675e-05, "loss": 0.3934, "step": 2862 }, { "epoch": 0.07, "grad_norm": 2.8790442943573, "learning_rate": 1.9901516774609417e-05, "loss": 0.718, "step": 2863 }, { "epoch": 0.07, "grad_norm": 3.427029609680176, "learning_rate": 1.9901400521512127e-05, "loss": 0.8278, "step": 2864 }, { "epoch": 0.07, "grad_norm": 3.269233465194702, "learning_rate": 1.9901284200180602e-05, "loss": 0.7947, "step": 2865 }, { "epoch": 0.07, "grad_norm": 1.9261562824249268, "learning_rate": 1.9901167810615648e-05, "loss": 0.7134, "step": 2866 }, { "epoch": 0.07, "grad_norm": 2.063408851623535, "learning_rate": 1.9901051352818067e-05, "loss": 0.6313, "step": 2867 }, { "epoch": 0.07, "grad_norm": 1.980548620223999, "learning_rate": 1.9900934826788664e-05, "loss": 0.6543, "step": 2868 }, { "epoch": 0.07, "grad_norm": 1.8513609170913696, "learning_rate": 1.990081823252823e-05, "loss": 0.6817, "step": 2869 }, { "epoch": 0.07, "grad_norm": 2.5323281288146973, "learning_rate": 1.9900701570037585e-05, "loss": 0.5997, "step": 2870 }, { "epoch": 0.07, "grad_norm": 8.496514320373535, "learning_rate": 1.990058483931752e-05, "loss": 0.7341, "step": 2871 }, { "epoch": 0.07, "grad_norm": 2.2440078258514404, "learning_rate": 1.9900468040368848e-05, "loss": 0.8011, "step": 2872 }, { "epoch": 0.07, "grad_norm": 2.3875620365142822, "learning_rate": 1.990035117319237e-05, "loss": 0.8551, "step": 2873 }, { "epoch": 0.07, "grad_norm": 3.01613187789917, "learning_rate": 1.990023423778889e-05, "loss": 0.6041, "step": 2874 }, { "epoch": 0.07, "grad_norm": 1.8356523513793945, "learning_rate": 1.9900117234159215e-05, "loss": 0.6741, "step": 2875 }, { "epoch": 0.07, "grad_norm": 4.408482551574707, "learning_rate": 1.9900000162304153e-05, "loss": 0.724, "step": 2876 }, { "epoch": 0.07, "grad_norm": 4.632290363311768, "learning_rate": 1.9899883022224507e-05, "loss": 0.7127, "step": 2877 }, { "epoch": 0.07, "grad_norm": 2.9476418495178223, "learning_rate": 1.9899765813921094e-05, "loss": 0.8112, "step": 2878 }, { "epoch": 0.07, "grad_norm": 2.762498378753662, "learning_rate": 1.9899648537394707e-05, "loss": 0.7026, "step": 2879 }, { "epoch": 0.07, "grad_norm": 7.141941070556641, "learning_rate": 1.9899531192646164e-05, "loss": 0.6034, "step": 2880 }, { "epoch": 0.07, "grad_norm": 3.3232972621917725, "learning_rate": 1.9899413779676274e-05, "loss": 0.6682, "step": 2881 }, { "epoch": 0.07, "grad_norm": 2.8437206745147705, "learning_rate": 1.9899296298485842e-05, "loss": 0.9303, "step": 2882 }, { "epoch": 0.07, "grad_norm": 3.2367470264434814, "learning_rate": 1.989917874907568e-05, "loss": 0.7623, "step": 2883 }, { "epoch": 0.07, "grad_norm": 2.668931484222412, "learning_rate": 1.9899061131446595e-05, "loss": 0.7408, "step": 2884 }, { "epoch": 0.07, "grad_norm": 1.904051423072815, "learning_rate": 1.9898943445599403e-05, "loss": 0.6743, "step": 2885 }, { "epoch": 0.07, "grad_norm": 1.9687668085098267, "learning_rate": 1.9898825691534908e-05, "loss": 0.7203, "step": 2886 }, { "epoch": 0.07, "grad_norm": 2.859360456466675, "learning_rate": 1.989870786925393e-05, "loss": 0.739, "step": 2887 }, { "epoch": 0.07, "grad_norm": 1.5064555406570435, "learning_rate": 1.9898589978757273e-05, "loss": 0.7512, "step": 2888 }, { "epoch": 0.07, "grad_norm": 2.78078031539917, "learning_rate": 1.9898472020045756e-05, "loss": 0.6916, "step": 2889 }, { "epoch": 0.07, "grad_norm": 3.729357957839966, "learning_rate": 1.9898353993120185e-05, "loss": 0.5251, "step": 2890 }, { "epoch": 0.07, "grad_norm": 2.364595651626587, "learning_rate": 1.989823589798138e-05, "loss": 0.6999, "step": 2891 }, { "epoch": 0.07, "grad_norm": 2.4788379669189453, "learning_rate": 1.9898117734630152e-05, "loss": 0.7766, "step": 2892 }, { "epoch": 0.07, "grad_norm": 1.895538330078125, "learning_rate": 1.9897999503067313e-05, "loss": 0.7391, "step": 2893 }, { "epoch": 0.07, "grad_norm": 4.024960041046143, "learning_rate": 1.989788120329368e-05, "loss": 0.921, "step": 2894 }, { "epoch": 0.07, "grad_norm": 3.5197553634643555, "learning_rate": 1.989776283531007e-05, "loss": 0.749, "step": 2895 }, { "epoch": 0.07, "grad_norm": 6.63744592666626, "learning_rate": 1.9897644399117294e-05, "loss": 0.5666, "step": 2896 }, { "epoch": 0.07, "grad_norm": 2.2967309951782227, "learning_rate": 1.989752589471617e-05, "loss": 0.6192, "step": 2897 }, { "epoch": 0.07, "grad_norm": 4.110851764678955, "learning_rate": 1.9897407322107524e-05, "loss": 0.8837, "step": 2898 }, { "epoch": 0.07, "grad_norm": 3.4627697467803955, "learning_rate": 1.9897288681292158e-05, "loss": 0.7627, "step": 2899 }, { "epoch": 0.07, "grad_norm": 4.734696865081787, "learning_rate": 1.9897169972270897e-05, "loss": 0.8249, "step": 2900 }, { "epoch": 0.07, "grad_norm": 3.7039668560028076, "learning_rate": 1.989705119504456e-05, "loss": 0.562, "step": 2901 }, { "epoch": 0.07, "grad_norm": 3.3183021545410156, "learning_rate": 1.9896932349613964e-05, "loss": 0.8201, "step": 2902 }, { "epoch": 0.07, "grad_norm": 3.4815027713775635, "learning_rate": 1.9896813435979928e-05, "loss": 0.7496, "step": 2903 }, { "epoch": 0.07, "grad_norm": 3.268310308456421, "learning_rate": 1.989669445414327e-05, "loss": 0.9842, "step": 2904 }, { "epoch": 0.07, "grad_norm": 3.606419324874878, "learning_rate": 1.9896575404104813e-05, "loss": 0.7352, "step": 2905 }, { "epoch": 0.07, "grad_norm": 2.69417142868042, "learning_rate": 1.9896456285865378e-05, "loss": 0.6626, "step": 2906 }, { "epoch": 0.07, "grad_norm": 3.5862979888916016, "learning_rate": 1.9896337099425784e-05, "loss": 0.8025, "step": 2907 }, { "epoch": 0.07, "grad_norm": 11.820014953613281, "learning_rate": 1.9896217844786847e-05, "loss": 0.814, "step": 2908 }, { "epoch": 0.07, "grad_norm": 2.378913402557373, "learning_rate": 1.98960985219494e-05, "loss": 0.7699, "step": 2909 }, { "epoch": 0.07, "grad_norm": 2.824965238571167, "learning_rate": 1.9895979130914254e-05, "loss": 0.7421, "step": 2910 }, { "epoch": 0.07, "grad_norm": 2.1336605548858643, "learning_rate": 1.9895859671682238e-05, "loss": 0.8205, "step": 2911 }, { "epoch": 0.07, "grad_norm": 1.8471781015396118, "learning_rate": 1.9895740144254176e-05, "loss": 0.7186, "step": 2912 }, { "epoch": 0.07, "grad_norm": 1.7350682020187378, "learning_rate": 1.989562054863089e-05, "loss": 0.7475, "step": 2913 }, { "epoch": 0.07, "grad_norm": 1.9872069358825684, "learning_rate": 1.9895500884813205e-05, "loss": 0.6917, "step": 2914 }, { "epoch": 0.07, "grad_norm": 3.141120195388794, "learning_rate": 1.9895381152801946e-05, "loss": 0.5923, "step": 2915 }, { "epoch": 0.07, "grad_norm": 1.6411590576171875, "learning_rate": 1.989526135259793e-05, "loss": 0.541, "step": 2916 }, { "epoch": 0.07, "grad_norm": 2.9392173290252686, "learning_rate": 1.9895141484201997e-05, "loss": 0.628, "step": 2917 }, { "epoch": 0.07, "grad_norm": 1.6379958391189575, "learning_rate": 1.989502154761496e-05, "loss": 0.7375, "step": 2918 }, { "epoch": 0.07, "grad_norm": 2.0844531059265137, "learning_rate": 1.9894901542837657e-05, "loss": 0.559, "step": 2919 }, { "epoch": 0.07, "grad_norm": 2.7134463787078857, "learning_rate": 1.9894781469870907e-05, "loss": 0.5631, "step": 2920 }, { "epoch": 0.07, "grad_norm": 2.291231393814087, "learning_rate": 1.9894661328715538e-05, "loss": 0.638, "step": 2921 }, { "epoch": 0.07, "grad_norm": 2.1733343601226807, "learning_rate": 1.989454111937238e-05, "loss": 0.6718, "step": 2922 }, { "epoch": 0.07, "grad_norm": 2.575606346130371, "learning_rate": 1.9894420841842263e-05, "loss": 0.7773, "step": 2923 }, { "epoch": 0.07, "grad_norm": 2.7613751888275146, "learning_rate": 1.9894300496126012e-05, "loss": 0.7709, "step": 2924 }, { "epoch": 0.07, "grad_norm": 4.913426876068115, "learning_rate": 1.9894180082224458e-05, "loss": 0.8733, "step": 2925 }, { "epoch": 0.07, "grad_norm": 4.622008800506592, "learning_rate": 1.9894059600138433e-05, "loss": 0.7713, "step": 2926 }, { "epoch": 0.08, "grad_norm": 1.685867190361023, "learning_rate": 1.9893939049868766e-05, "loss": 0.5517, "step": 2927 }, { "epoch": 0.08, "grad_norm": 3.9256832599639893, "learning_rate": 1.989381843141628e-05, "loss": 0.837, "step": 2928 }, { "epoch": 0.08, "grad_norm": 2.5447263717651367, "learning_rate": 1.989369774478182e-05, "loss": 0.611, "step": 2929 }, { "epoch": 0.08, "grad_norm": 2.4925413131713867, "learning_rate": 1.989357698996621e-05, "loss": 0.6236, "step": 2930 }, { "epoch": 0.08, "grad_norm": 4.682148456573486, "learning_rate": 1.989345616697028e-05, "loss": 0.8692, "step": 2931 }, { "epoch": 0.08, "grad_norm": 3.9268529415130615, "learning_rate": 1.989333527579487e-05, "loss": 0.6058, "step": 2932 }, { "epoch": 0.08, "grad_norm": 1.964194416999817, "learning_rate": 1.9893214316440805e-05, "loss": 0.5453, "step": 2933 }, { "epoch": 0.08, "grad_norm": 2.7128958702087402, "learning_rate": 1.9893093288908923e-05, "loss": 0.7464, "step": 2934 }, { "epoch": 0.08, "grad_norm": 2.4907922744750977, "learning_rate": 1.989297219320006e-05, "loss": 0.7727, "step": 2935 }, { "epoch": 0.08, "grad_norm": 2.9271786212921143, "learning_rate": 1.989285102931505e-05, "loss": 0.7705, "step": 2936 }, { "epoch": 0.08, "grad_norm": 2.103714942932129, "learning_rate": 1.989272979725472e-05, "loss": 0.7763, "step": 2937 }, { "epoch": 0.08, "grad_norm": 1.8133702278137207, "learning_rate": 1.9892608497019918e-05, "loss": 0.6376, "step": 2938 }, { "epoch": 0.08, "grad_norm": 3.2463223934173584, "learning_rate": 1.9892487128611468e-05, "loss": 0.7256, "step": 2939 }, { "epoch": 0.08, "grad_norm": 2.311018705368042, "learning_rate": 1.9892365692030214e-05, "loss": 0.6077, "step": 2940 }, { "epoch": 0.08, "grad_norm": 3.1832005977630615, "learning_rate": 1.989224418727699e-05, "loss": 0.8436, "step": 2941 }, { "epoch": 0.08, "grad_norm": 2.47763729095459, "learning_rate": 1.9892122614352633e-05, "loss": 0.803, "step": 2942 }, { "epoch": 0.08, "grad_norm": 2.811426877975464, "learning_rate": 1.9892000973257982e-05, "loss": 0.7916, "step": 2943 }, { "epoch": 0.08, "grad_norm": 2.655888557434082, "learning_rate": 1.989187926399388e-05, "loss": 0.9018, "step": 2944 }, { "epoch": 0.08, "grad_norm": 1.827528715133667, "learning_rate": 1.9891757486561153e-05, "loss": 0.7418, "step": 2945 }, { "epoch": 0.08, "grad_norm": 2.656888246536255, "learning_rate": 1.9891635640960653e-05, "loss": 0.6537, "step": 2946 }, { "epoch": 0.08, "grad_norm": 2.2643423080444336, "learning_rate": 1.9891513727193215e-05, "loss": 0.586, "step": 2947 }, { "epoch": 0.08, "grad_norm": 3.893169641494751, "learning_rate": 1.9891391745259675e-05, "loss": 0.7468, "step": 2948 }, { "epoch": 0.08, "grad_norm": 5.113545894622803, "learning_rate": 1.989126969516088e-05, "loss": 0.8736, "step": 2949 }, { "epoch": 0.08, "grad_norm": 3.192667245864868, "learning_rate": 1.989114757689767e-05, "loss": 0.6788, "step": 2950 }, { "epoch": 0.08, "grad_norm": 1.7042943239212036, "learning_rate": 1.9891025390470884e-05, "loss": 0.6978, "step": 2951 }, { "epoch": 0.08, "grad_norm": 3.305562973022461, "learning_rate": 1.9890903135881365e-05, "loss": 0.7429, "step": 2952 }, { "epoch": 0.08, "grad_norm": 2.5969345569610596, "learning_rate": 1.9890780813129953e-05, "loss": 0.713, "step": 2953 }, { "epoch": 0.08, "grad_norm": 1.22757887840271, "learning_rate": 1.9890658422217495e-05, "loss": 0.6481, "step": 2954 }, { "epoch": 0.08, "grad_norm": 1.7751884460449219, "learning_rate": 1.9890535963144835e-05, "loss": 0.7701, "step": 2955 }, { "epoch": 0.08, "grad_norm": 4.092086315155029, "learning_rate": 1.9890413435912815e-05, "loss": 0.615, "step": 2956 }, { "epoch": 0.08, "grad_norm": 1.6938121318817139, "learning_rate": 1.989029084052228e-05, "loss": 0.7922, "step": 2957 }, { "epoch": 0.08, "grad_norm": 4.284241676330566, "learning_rate": 1.989016817697407e-05, "loss": 0.6568, "step": 2958 }, { "epoch": 0.08, "grad_norm": 1.8699216842651367, "learning_rate": 1.9890045445269036e-05, "loss": 0.6012, "step": 2959 }, { "epoch": 0.08, "grad_norm": 2.2140915393829346, "learning_rate": 1.9889922645408027e-05, "loss": 0.5987, "step": 2960 }, { "epoch": 0.08, "grad_norm": 7.032151699066162, "learning_rate": 1.988979977739188e-05, "loss": 0.8575, "step": 2961 }, { "epoch": 0.08, "grad_norm": 4.496265888214111, "learning_rate": 1.9889676841221445e-05, "loss": 0.7342, "step": 2962 }, { "epoch": 0.08, "grad_norm": 2.222378730773926, "learning_rate": 1.9889553836897576e-05, "loss": 0.6596, "step": 2963 }, { "epoch": 0.08, "grad_norm": 2.874319553375244, "learning_rate": 1.988943076442111e-05, "loss": 0.7018, "step": 2964 }, { "epoch": 0.08, "grad_norm": 1.9075486660003662, "learning_rate": 1.9889307623792903e-05, "loss": 0.7584, "step": 2965 }, { "epoch": 0.08, "grad_norm": 1.915212869644165, "learning_rate": 1.98891844150138e-05, "loss": 0.6481, "step": 2966 }, { "epoch": 0.08, "grad_norm": 2.0903849601745605, "learning_rate": 1.9889061138084652e-05, "loss": 0.7573, "step": 2967 }, { "epoch": 0.08, "grad_norm": 1.920596718788147, "learning_rate": 1.988893779300631e-05, "loss": 0.7748, "step": 2968 }, { "epoch": 0.08, "grad_norm": 2.1657400131225586, "learning_rate": 1.9888814379779616e-05, "loss": 0.8159, "step": 2969 }, { "epoch": 0.08, "grad_norm": 3.8451833724975586, "learning_rate": 1.988869089840543e-05, "loss": 0.7199, "step": 2970 }, { "epoch": 0.08, "grad_norm": 4.960944652557373, "learning_rate": 1.9888567348884598e-05, "loss": 0.6922, "step": 2971 }, { "epoch": 0.08, "grad_norm": 2.1016714572906494, "learning_rate": 1.988844373121797e-05, "loss": 0.7269, "step": 2972 }, { "epoch": 0.08, "grad_norm": 1.8693976402282715, "learning_rate": 1.9888320045406403e-05, "loss": 0.6133, "step": 2973 }, { "epoch": 0.08, "grad_norm": 1.3094700574874878, "learning_rate": 1.9888196291450745e-05, "loss": 0.5822, "step": 2974 }, { "epoch": 0.08, "grad_norm": 2.926244020462036, "learning_rate": 1.9888072469351853e-05, "loss": 0.7752, "step": 2975 }, { "epoch": 0.08, "grad_norm": 2.014394760131836, "learning_rate": 1.9887948579110576e-05, "loss": 0.6004, "step": 2976 }, { "epoch": 0.08, "grad_norm": 2.95682954788208, "learning_rate": 1.988782462072777e-05, "loss": 0.8458, "step": 2977 }, { "epoch": 0.08, "grad_norm": 2.663829803466797, "learning_rate": 1.9887700594204288e-05, "loss": 0.7352, "step": 2978 }, { "epoch": 0.08, "grad_norm": 5.236591339111328, "learning_rate": 1.9887576499540988e-05, "loss": 0.7848, "step": 2979 }, { "epoch": 0.08, "grad_norm": 3.023317813873291, "learning_rate": 1.9887452336738718e-05, "loss": 0.742, "step": 2980 }, { "epoch": 0.08, "grad_norm": 2.496326208114624, "learning_rate": 1.9887328105798344e-05, "loss": 0.6041, "step": 2981 }, { "epoch": 0.08, "grad_norm": 4.0182905197143555, "learning_rate": 1.988720380672071e-05, "loss": 0.6561, "step": 2982 }, { "epoch": 0.08, "grad_norm": 2.656071662902832, "learning_rate": 1.9887079439506688e-05, "loss": 0.5983, "step": 2983 }, { "epoch": 0.08, "grad_norm": 2.9658782482147217, "learning_rate": 1.988695500415712e-05, "loss": 0.6407, "step": 2984 }, { "epoch": 0.08, "grad_norm": 3.514892339706421, "learning_rate": 1.988683050067287e-05, "loss": 0.8004, "step": 2985 }, { "epoch": 0.08, "grad_norm": 3.394869804382324, "learning_rate": 1.98867059290548e-05, "loss": 0.7523, "step": 2986 }, { "epoch": 0.08, "grad_norm": 4.934231758117676, "learning_rate": 1.988658128930376e-05, "loss": 0.7092, "step": 2987 }, { "epoch": 0.08, "grad_norm": 4.33647346496582, "learning_rate": 1.9886456581420613e-05, "loss": 0.9071, "step": 2988 }, { "epoch": 0.08, "grad_norm": 2.2935702800750732, "learning_rate": 1.988633180540622e-05, "loss": 0.8714, "step": 2989 }, { "epoch": 0.08, "grad_norm": 6.53270959854126, "learning_rate": 1.9886206961261438e-05, "loss": 0.7389, "step": 2990 }, { "epoch": 0.08, "grad_norm": 2.2877769470214844, "learning_rate": 1.988608204898713e-05, "loss": 0.6268, "step": 2991 }, { "epoch": 0.08, "grad_norm": 3.127795934677124, "learning_rate": 1.9885957068584158e-05, "loss": 0.6528, "step": 2992 }, { "epoch": 0.08, "grad_norm": 2.076326608657837, "learning_rate": 1.9885832020053376e-05, "loss": 0.6318, "step": 2993 }, { "epoch": 0.08, "grad_norm": 1.7446199655532837, "learning_rate": 1.9885706903395653e-05, "loss": 0.673, "step": 2994 }, { "epoch": 0.08, "grad_norm": 3.9652600288391113, "learning_rate": 1.988558171861185e-05, "loss": 0.6602, "step": 2995 }, { "epoch": 0.08, "grad_norm": 2.304093837738037, "learning_rate": 1.9885456465702825e-05, "loss": 0.7556, "step": 2996 }, { "epoch": 0.08, "grad_norm": 4.876405239105225, "learning_rate": 1.9885331144669445e-05, "loss": 0.7779, "step": 2997 }, { "epoch": 0.08, "grad_norm": 2.9419050216674805, "learning_rate": 1.9885205755512576e-05, "loss": 0.6326, "step": 2998 }, { "epoch": 0.08, "grad_norm": 3.2753050327301025, "learning_rate": 1.988508029823308e-05, "loss": 0.6098, "step": 2999 }, { "epoch": 0.08, "grad_norm": 6.486987590789795, "learning_rate": 1.988495477283182e-05, "loss": 1.015, "step": 3000 }, { "epoch": 0.08, "grad_norm": 1.9554799795150757, "learning_rate": 1.988482917930966e-05, "loss": 0.6462, "step": 3001 }, { "epoch": 0.08, "grad_norm": 1.937889575958252, "learning_rate": 1.988470351766747e-05, "loss": 0.7389, "step": 3002 }, { "epoch": 0.08, "grad_norm": 3.667128562927246, "learning_rate": 1.988457778790611e-05, "loss": 0.8021, "step": 3003 }, { "epoch": 0.08, "grad_norm": 1.9961977005004883, "learning_rate": 1.9884451990026456e-05, "loss": 0.555, "step": 3004 }, { "epoch": 0.08, "grad_norm": 2.884176731109619, "learning_rate": 1.9884326124029364e-05, "loss": 0.7571, "step": 3005 }, { "epoch": 0.08, "grad_norm": 3.7120673656463623, "learning_rate": 1.988420018991571e-05, "loss": 0.801, "step": 3006 }, { "epoch": 0.08, "grad_norm": 1.2724837064743042, "learning_rate": 1.9884074187686354e-05, "loss": 0.5642, "step": 3007 }, { "epoch": 0.08, "grad_norm": 2.03200101852417, "learning_rate": 1.988394811734217e-05, "loss": 0.6685, "step": 3008 }, { "epoch": 0.08, "grad_norm": 1.9331413507461548, "learning_rate": 1.988382197888403e-05, "loss": 0.7702, "step": 3009 }, { "epoch": 0.08, "grad_norm": 1.8925185203552246, "learning_rate": 1.988369577231279e-05, "loss": 0.5416, "step": 3010 }, { "epoch": 0.08, "grad_norm": 3.895334005355835, "learning_rate": 1.9883569497629335e-05, "loss": 0.558, "step": 3011 }, { "epoch": 0.08, "grad_norm": 4.906906604766846, "learning_rate": 1.9883443154834524e-05, "loss": 0.7311, "step": 3012 }, { "epoch": 0.08, "grad_norm": 1.9155488014221191, "learning_rate": 1.9883316743929234e-05, "loss": 0.5702, "step": 3013 }, { "epoch": 0.08, "grad_norm": 2.2136144638061523, "learning_rate": 1.9883190264914337e-05, "loss": 0.7025, "step": 3014 }, { "epoch": 0.08, "grad_norm": 2.6449673175811768, "learning_rate": 1.9883063717790697e-05, "loss": 0.7966, "step": 3015 }, { "epoch": 0.08, "grad_norm": 4.663353443145752, "learning_rate": 1.9882937102559193e-05, "loss": 0.6714, "step": 3016 }, { "epoch": 0.08, "grad_norm": 7.6222825050354, "learning_rate": 1.9882810419220698e-05, "loss": 0.7522, "step": 3017 }, { "epoch": 0.08, "grad_norm": 4.896763324737549, "learning_rate": 1.988268366777608e-05, "loss": 0.6358, "step": 3018 }, { "epoch": 0.08, "grad_norm": 5.3655829429626465, "learning_rate": 1.9882556848226215e-05, "loss": 0.69, "step": 3019 }, { "epoch": 0.08, "grad_norm": 4.33710241317749, "learning_rate": 1.988242996057198e-05, "loss": 0.7465, "step": 3020 }, { "epoch": 0.08, "grad_norm": 2.1963021755218506, "learning_rate": 1.9882303004814243e-05, "loss": 0.7987, "step": 3021 }, { "epoch": 0.08, "grad_norm": 2.271552801132202, "learning_rate": 1.9882175980953883e-05, "loss": 0.7637, "step": 3022 }, { "epoch": 0.08, "grad_norm": 3.8838629722595215, "learning_rate": 1.9882048888991776e-05, "loss": 0.8504, "step": 3023 }, { "epoch": 0.08, "grad_norm": 1.6046831607818604, "learning_rate": 1.9881921728928793e-05, "loss": 0.6268, "step": 3024 }, { "epoch": 0.08, "grad_norm": 3.643979549407959, "learning_rate": 1.988179450076582e-05, "loss": 0.8233, "step": 3025 }, { "epoch": 0.08, "grad_norm": 5.622528076171875, "learning_rate": 1.9881667204503724e-05, "loss": 0.7615, "step": 3026 }, { "epoch": 0.08, "grad_norm": 7.791062831878662, "learning_rate": 1.9881539840143387e-05, "loss": 0.6569, "step": 3027 }, { "epoch": 0.08, "grad_norm": 11.001097679138184, "learning_rate": 1.9881412407685684e-05, "loss": 0.8783, "step": 3028 }, { "epoch": 0.08, "grad_norm": 2.5902774333953857, "learning_rate": 1.9881284907131495e-05, "loss": 0.6891, "step": 3029 }, { "epoch": 0.08, "grad_norm": 6.035061836242676, "learning_rate": 1.98811573384817e-05, "loss": 0.9795, "step": 3030 }, { "epoch": 0.08, "grad_norm": 5.400858402252197, "learning_rate": 1.9881029701737175e-05, "loss": 0.6877, "step": 3031 }, { "epoch": 0.08, "grad_norm": 2.639946699142456, "learning_rate": 1.9880901996898802e-05, "loss": 0.7079, "step": 3032 }, { "epoch": 0.08, "grad_norm": 1.5186221599578857, "learning_rate": 1.988077422396746e-05, "loss": 0.7523, "step": 3033 }, { "epoch": 0.08, "grad_norm": 2.8578238487243652, "learning_rate": 1.9880646382944032e-05, "loss": 0.7416, "step": 3034 }, { "epoch": 0.08, "grad_norm": 3.5421361923217773, "learning_rate": 1.9880518473829394e-05, "loss": 0.6838, "step": 3035 }, { "epoch": 0.08, "grad_norm": 4.0931477546691895, "learning_rate": 1.9880390496624432e-05, "loss": 0.7219, "step": 3036 }, { "epoch": 0.08, "grad_norm": 3.091343641281128, "learning_rate": 1.9880262451330023e-05, "loss": 0.8325, "step": 3037 }, { "epoch": 0.08, "grad_norm": 6.815834999084473, "learning_rate": 1.9880134337947057e-05, "loss": 0.8377, "step": 3038 }, { "epoch": 0.08, "grad_norm": 2.6842429637908936, "learning_rate": 1.9880006156476407e-05, "loss": 0.7221, "step": 3039 }, { "epoch": 0.08, "grad_norm": 2.7341203689575195, "learning_rate": 1.9879877906918966e-05, "loss": 0.7253, "step": 3040 }, { "epoch": 0.08, "grad_norm": 3.008744955062866, "learning_rate": 1.9879749589275613e-05, "loss": 0.7777, "step": 3041 }, { "epoch": 0.08, "grad_norm": 5.427276134490967, "learning_rate": 1.987962120354723e-05, "loss": 0.7281, "step": 3042 }, { "epoch": 0.08, "grad_norm": 3.037583589553833, "learning_rate": 1.987949274973471e-05, "loss": 0.5377, "step": 3043 }, { "epoch": 0.08, "grad_norm": 1.9458738565444946, "learning_rate": 1.987936422783893e-05, "loss": 0.652, "step": 3044 }, { "epoch": 0.08, "grad_norm": 3.453925371170044, "learning_rate": 1.987923563786078e-05, "loss": 0.6256, "step": 3045 }, { "epoch": 0.08, "grad_norm": 1.651785969734192, "learning_rate": 1.987910697980114e-05, "loss": 0.7154, "step": 3046 }, { "epoch": 0.08, "grad_norm": 3.9363627433776855, "learning_rate": 1.9878978253660906e-05, "loss": 0.7349, "step": 3047 }, { "epoch": 0.08, "grad_norm": 1.6127504110336304, "learning_rate": 1.9878849459440958e-05, "loss": 0.5182, "step": 3048 }, { "epoch": 0.08, "grad_norm": 1.7723798751831055, "learning_rate": 1.9878720597142186e-05, "loss": 0.7537, "step": 3049 }, { "epoch": 0.08, "grad_norm": 3.00041127204895, "learning_rate": 1.987859166676548e-05, "loss": 0.7445, "step": 3050 }, { "epoch": 0.08, "grad_norm": 2.246265411376953, "learning_rate": 1.9878462668311725e-05, "loss": 0.707, "step": 3051 }, { "epoch": 0.08, "grad_norm": 3.150043487548828, "learning_rate": 1.9878333601781814e-05, "loss": 0.6873, "step": 3052 }, { "epoch": 0.08, "grad_norm": 2.544139862060547, "learning_rate": 1.987820446717663e-05, "loss": 0.6805, "step": 3053 }, { "epoch": 0.08, "grad_norm": 4.378101348876953, "learning_rate": 1.9878075264497073e-05, "loss": 0.7202, "step": 3054 }, { "epoch": 0.08, "grad_norm": 3.988105535507202, "learning_rate": 1.9877945993744022e-05, "loss": 0.6952, "step": 3055 }, { "epoch": 0.08, "grad_norm": 1.7603529691696167, "learning_rate": 1.9877816654918375e-05, "loss": 0.6431, "step": 3056 }, { "epoch": 0.08, "grad_norm": 12.192733764648438, "learning_rate": 1.987768724802102e-05, "loss": 0.5749, "step": 3057 }, { "epoch": 0.08, "grad_norm": 4.260084629058838, "learning_rate": 1.9877557773052852e-05, "loss": 0.677, "step": 3058 }, { "epoch": 0.08, "grad_norm": 2.861468553543091, "learning_rate": 1.987742823001476e-05, "loss": 0.7589, "step": 3059 }, { "epoch": 0.08, "grad_norm": 4.727591514587402, "learning_rate": 1.987729861890764e-05, "loss": 0.7574, "step": 3060 }, { "epoch": 0.08, "grad_norm": 1.59665846824646, "learning_rate": 1.987716893973238e-05, "loss": 0.6113, "step": 3061 }, { "epoch": 0.08, "grad_norm": 7.451632022857666, "learning_rate": 1.987703919248988e-05, "loss": 0.809, "step": 3062 }, { "epoch": 0.08, "grad_norm": 4.42319917678833, "learning_rate": 1.987690937718103e-05, "loss": 0.7094, "step": 3063 }, { "epoch": 0.08, "grad_norm": 1.9147047996520996, "learning_rate": 1.987677949380673e-05, "loss": 0.7136, "step": 3064 }, { "epoch": 0.08, "grad_norm": 4.959279537200928, "learning_rate": 1.9876649542367867e-05, "loss": 0.7419, "step": 3065 }, { "epoch": 0.08, "grad_norm": 5.086423873901367, "learning_rate": 1.987651952286534e-05, "loss": 0.5865, "step": 3066 }, { "epoch": 0.08, "grad_norm": 4.232515335083008, "learning_rate": 1.987638943530005e-05, "loss": 0.8494, "step": 3067 }, { "epoch": 0.08, "grad_norm": 4.173007488250732, "learning_rate": 1.9876259279672884e-05, "loss": 0.8153, "step": 3068 }, { "epoch": 0.08, "grad_norm": 4.647392749786377, "learning_rate": 1.9876129055984745e-05, "loss": 0.7524, "step": 3069 }, { "epoch": 0.08, "grad_norm": 3.613356113433838, "learning_rate": 1.987599876423653e-05, "loss": 0.7155, "step": 3070 }, { "epoch": 0.08, "grad_norm": 3.27821946144104, "learning_rate": 1.9875868404429135e-05, "loss": 0.9134, "step": 3071 }, { "epoch": 0.08, "grad_norm": 2.1297006607055664, "learning_rate": 1.987573797656346e-05, "loss": 0.6776, "step": 3072 }, { "epoch": 0.08, "grad_norm": 2.892521858215332, "learning_rate": 1.9875607480640405e-05, "loss": 0.5819, "step": 3073 }, { "epoch": 0.08, "grad_norm": 1.6532882452011108, "learning_rate": 1.9875476916660866e-05, "loss": 0.761, "step": 3074 }, { "epoch": 0.08, "grad_norm": 4.382393836975098, "learning_rate": 1.987534628462575e-05, "loss": 0.6597, "step": 3075 }, { "epoch": 0.08, "grad_norm": 4.147754669189453, "learning_rate": 1.9875215584535943e-05, "loss": 0.57, "step": 3076 }, { "epoch": 0.08, "grad_norm": 7.012301445007324, "learning_rate": 1.987508481639236e-05, "loss": 0.7687, "step": 3077 }, { "epoch": 0.08, "grad_norm": 2.8973336219787598, "learning_rate": 1.987495398019589e-05, "loss": 0.679, "step": 3078 }, { "epoch": 0.08, "grad_norm": 2.7573115825653076, "learning_rate": 1.987482307594745e-05, "loss": 0.6748, "step": 3079 }, { "epoch": 0.08, "grad_norm": 2.573770523071289, "learning_rate": 1.9874692103647928e-05, "loss": 0.6877, "step": 3080 }, { "epoch": 0.08, "grad_norm": 4.100937366485596, "learning_rate": 1.9874561063298235e-05, "loss": 0.8722, "step": 3081 }, { "epoch": 0.08, "grad_norm": 2.5023467540740967, "learning_rate": 1.987442995489927e-05, "loss": 0.5894, "step": 3082 }, { "epoch": 0.08, "grad_norm": 2.125194549560547, "learning_rate": 1.9874298778451934e-05, "loss": 0.6603, "step": 3083 }, { "epoch": 0.08, "grad_norm": 3.190599203109741, "learning_rate": 1.987416753395714e-05, "loss": 0.5394, "step": 3084 }, { "epoch": 0.08, "grad_norm": 2.027311086654663, "learning_rate": 1.9874036221415783e-05, "loss": 0.605, "step": 3085 }, { "epoch": 0.08, "grad_norm": 4.371392726898193, "learning_rate": 1.9873904840828774e-05, "loss": 0.718, "step": 3086 }, { "epoch": 0.08, "grad_norm": 3.0602798461914062, "learning_rate": 1.9873773392197012e-05, "loss": 0.623, "step": 3087 }, { "epoch": 0.08, "grad_norm": 2.6768059730529785, "learning_rate": 1.987364187552141e-05, "loss": 0.8742, "step": 3088 }, { "epoch": 0.08, "grad_norm": 2.728339195251465, "learning_rate": 1.9873510290802877e-05, "loss": 0.7498, "step": 3089 }, { "epoch": 0.08, "grad_norm": 2.794273614883423, "learning_rate": 1.9873378638042308e-05, "loss": 0.6997, "step": 3090 }, { "epoch": 0.08, "grad_norm": 1.7353962659835815, "learning_rate": 1.9873246917240615e-05, "loss": 0.5426, "step": 3091 }, { "epoch": 0.08, "grad_norm": 3.3966283798217773, "learning_rate": 1.987311512839871e-05, "loss": 0.7955, "step": 3092 }, { "epoch": 0.08, "grad_norm": 1.88619863986969, "learning_rate": 1.98729832715175e-05, "loss": 0.6031, "step": 3093 }, { "epoch": 0.08, "grad_norm": 4.410421848297119, "learning_rate": 1.987285134659789e-05, "loss": 0.5434, "step": 3094 }, { "epoch": 0.08, "grad_norm": 2.7236359119415283, "learning_rate": 1.987271935364079e-05, "loss": 0.626, "step": 3095 }, { "epoch": 0.08, "grad_norm": 2.901855945587158, "learning_rate": 1.9872587292647113e-05, "loss": 0.7432, "step": 3096 }, { "epoch": 0.08, "grad_norm": 2.761552572250366, "learning_rate": 1.9872455163617767e-05, "loss": 0.6822, "step": 3097 }, { "epoch": 0.08, "grad_norm": 4.4229817390441895, "learning_rate": 1.987232296655366e-05, "loss": 0.7065, "step": 3098 }, { "epoch": 0.08, "grad_norm": 2.8472723960876465, "learning_rate": 1.987219070145571e-05, "loss": 0.7477, "step": 3099 }, { "epoch": 0.08, "grad_norm": 5.9690141677856445, "learning_rate": 1.987205836832482e-05, "loss": 0.603, "step": 3100 }, { "epoch": 0.08, "grad_norm": 2.510420322418213, "learning_rate": 1.9871925967161908e-05, "loss": 0.7196, "step": 3101 }, { "epoch": 0.08, "grad_norm": 3.3925883769989014, "learning_rate": 1.9871793497967884e-05, "loss": 0.8381, "step": 3102 }, { "epoch": 0.08, "grad_norm": 4.418654441833496, "learning_rate": 1.987166096074366e-05, "loss": 0.7853, "step": 3103 }, { "epoch": 0.08, "grad_norm": 4.920774459838867, "learning_rate": 1.987152835549015e-05, "loss": 0.7701, "step": 3104 }, { "epoch": 0.08, "grad_norm": 1.743406057357788, "learning_rate": 1.987139568220827e-05, "loss": 0.6918, "step": 3105 }, { "epoch": 0.08, "grad_norm": 3.055921792984009, "learning_rate": 1.9871262940898933e-05, "loss": 0.7417, "step": 3106 }, { "epoch": 0.08, "grad_norm": 3.302107572555542, "learning_rate": 1.9871130131563054e-05, "loss": 0.7429, "step": 3107 }, { "epoch": 0.08, "grad_norm": 4.1404523849487305, "learning_rate": 1.987099725420155e-05, "loss": 0.8429, "step": 3108 }, { "epoch": 0.08, "grad_norm": 1.9883760213851929, "learning_rate": 1.987086430881533e-05, "loss": 0.6802, "step": 3109 }, { "epoch": 0.08, "grad_norm": 14.110407829284668, "learning_rate": 1.9870731295405313e-05, "loss": 0.6449, "step": 3110 }, { "epoch": 0.08, "grad_norm": 2.276869058609009, "learning_rate": 1.9870598213972422e-05, "loss": 0.585, "step": 3111 }, { "epoch": 0.08, "grad_norm": 2.588303804397583, "learning_rate": 1.987046506451757e-05, "loss": 0.7693, "step": 3112 }, { "epoch": 0.08, "grad_norm": 3.8202505111694336, "learning_rate": 1.987033184704167e-05, "loss": 0.6005, "step": 3113 }, { "epoch": 0.08, "grad_norm": 3.028907299041748, "learning_rate": 1.987019856154565e-05, "loss": 0.6482, "step": 3114 }, { "epoch": 0.08, "grad_norm": 2.0171315670013428, "learning_rate": 1.9870065208030417e-05, "loss": 0.6634, "step": 3115 }, { "epoch": 0.08, "grad_norm": 1.6481108665466309, "learning_rate": 1.9869931786496897e-05, "loss": 0.6852, "step": 3116 }, { "epoch": 0.08, "grad_norm": 7.635830879211426, "learning_rate": 1.986979829694601e-05, "loss": 0.7305, "step": 3117 }, { "epoch": 0.08, "grad_norm": 5.532427787780762, "learning_rate": 1.986966473937867e-05, "loss": 0.762, "step": 3118 }, { "epoch": 0.08, "grad_norm": 3.6576108932495117, "learning_rate": 1.9869531113795805e-05, "loss": 0.7845, "step": 3119 }, { "epoch": 0.08, "grad_norm": 1.5670086145401, "learning_rate": 1.9869397420198332e-05, "loss": 0.5747, "step": 3120 }, { "epoch": 0.08, "grad_norm": 2.324112892150879, "learning_rate": 1.986926365858717e-05, "loss": 0.6053, "step": 3121 }, { "epoch": 0.08, "grad_norm": 7.709425449371338, "learning_rate": 1.9869129828963246e-05, "loss": 0.8812, "step": 3122 }, { "epoch": 0.08, "grad_norm": 1.8545438051223755, "learning_rate": 1.9868995931327476e-05, "loss": 0.6845, "step": 3123 }, { "epoch": 0.08, "grad_norm": 3.4807116985321045, "learning_rate": 1.986886196568079e-05, "loss": 0.6436, "step": 3124 }, { "epoch": 0.08, "grad_norm": 5.1895432472229, "learning_rate": 1.9868727932024107e-05, "loss": 0.8677, "step": 3125 }, { "epoch": 0.08, "grad_norm": 2.734673261642456, "learning_rate": 1.986859383035835e-05, "loss": 0.6563, "step": 3126 }, { "epoch": 0.08, "grad_norm": 1.7905992269515991, "learning_rate": 1.9868459660684444e-05, "loss": 0.7016, "step": 3127 }, { "epoch": 0.08, "grad_norm": 8.396180152893066, "learning_rate": 1.9868325423003317e-05, "loss": 0.65, "step": 3128 }, { "epoch": 0.08, "grad_norm": 2.374302625656128, "learning_rate": 1.9868191117315885e-05, "loss": 0.6541, "step": 3129 }, { "epoch": 0.08, "grad_norm": 2.110358238220215, "learning_rate": 1.9868056743623086e-05, "loss": 0.7801, "step": 3130 }, { "epoch": 0.08, "grad_norm": 2.5979855060577393, "learning_rate": 1.9867922301925836e-05, "loss": 0.5919, "step": 3131 }, { "epoch": 0.08, "grad_norm": 3.7982115745544434, "learning_rate": 1.9867787792225068e-05, "loss": 0.9319, "step": 3132 }, { "epoch": 0.08, "grad_norm": 2.0135996341705322, "learning_rate": 1.9867653214521707e-05, "loss": 0.6808, "step": 3133 }, { "epoch": 0.08, "grad_norm": 3.93825101852417, "learning_rate": 1.9867518568816677e-05, "loss": 0.8582, "step": 3134 }, { "epoch": 0.08, "grad_norm": 3.2209506034851074, "learning_rate": 1.9867383855110906e-05, "loss": 0.7669, "step": 3135 }, { "epoch": 0.08, "grad_norm": 2.708124876022339, "learning_rate": 1.9867249073405324e-05, "loss": 0.7905, "step": 3136 }, { "epoch": 0.08, "grad_norm": 2.5746910572052, "learning_rate": 1.9867114223700866e-05, "loss": 0.6547, "step": 3137 }, { "epoch": 0.08, "grad_norm": 3.1677966117858887, "learning_rate": 1.9866979305998452e-05, "loss": 0.6546, "step": 3138 }, { "epoch": 0.08, "grad_norm": 2.196012020111084, "learning_rate": 1.986684432029902e-05, "loss": 0.6314, "step": 3139 }, { "epoch": 0.08, "grad_norm": 2.066495418548584, "learning_rate": 1.9866709266603493e-05, "loss": 0.6137, "step": 3140 }, { "epoch": 0.08, "grad_norm": 3.390033483505249, "learning_rate": 1.98665741449128e-05, "loss": 0.6694, "step": 3141 }, { "epoch": 0.08, "grad_norm": 2.8280954360961914, "learning_rate": 1.9866438955227885e-05, "loss": 0.7967, "step": 3142 }, { "epoch": 0.08, "grad_norm": 2.835831880569458, "learning_rate": 1.986630369754967e-05, "loss": 0.734, "step": 3143 }, { "epoch": 0.08, "grad_norm": 2.5641510486602783, "learning_rate": 1.9866168371879087e-05, "loss": 0.7363, "step": 3144 }, { "epoch": 0.08, "grad_norm": 1.6573940515518188, "learning_rate": 1.9866032978217067e-05, "loss": 0.6577, "step": 3145 }, { "epoch": 0.08, "grad_norm": 3.59851336479187, "learning_rate": 1.9865897516564548e-05, "loss": 0.7246, "step": 3146 }, { "epoch": 0.08, "grad_norm": 4.0221076011657715, "learning_rate": 1.9865761986922463e-05, "loss": 0.7269, "step": 3147 }, { "epoch": 0.08, "grad_norm": 4.322096824645996, "learning_rate": 1.9865626389291744e-05, "loss": 0.8717, "step": 3148 }, { "epoch": 0.08, "grad_norm": 2.435234308242798, "learning_rate": 1.986549072367333e-05, "loss": 0.7731, "step": 3149 }, { "epoch": 0.08, "grad_norm": 3.284761667251587, "learning_rate": 1.9865354990068147e-05, "loss": 0.7826, "step": 3150 }, { "epoch": 0.08, "grad_norm": 2.011756420135498, "learning_rate": 1.9865219188477133e-05, "loss": 0.7425, "step": 3151 }, { "epoch": 0.08, "grad_norm": 3.895484685897827, "learning_rate": 1.9865083318901234e-05, "loss": 0.7794, "step": 3152 }, { "epoch": 0.08, "grad_norm": 2.4014365673065186, "learning_rate": 1.9864947381341374e-05, "loss": 0.6477, "step": 3153 }, { "epoch": 0.08, "grad_norm": 5.073541641235352, "learning_rate": 1.9864811375798493e-05, "loss": 0.8061, "step": 3154 }, { "epoch": 0.08, "grad_norm": 3.934823513031006, "learning_rate": 1.9864675302273534e-05, "loss": 0.9068, "step": 3155 }, { "epoch": 0.08, "grad_norm": 1.7375648021697998, "learning_rate": 1.9864539160767425e-05, "loss": 0.7197, "step": 3156 }, { "epoch": 0.08, "grad_norm": 1.8219470977783203, "learning_rate": 1.9864402951281115e-05, "loss": 0.8334, "step": 3157 }, { "epoch": 0.08, "grad_norm": 1.8925135135650635, "learning_rate": 1.9864266673815535e-05, "loss": 0.6138, "step": 3158 }, { "epoch": 0.08, "grad_norm": 3.809359312057495, "learning_rate": 1.9864130328371628e-05, "loss": 0.8554, "step": 3159 }, { "epoch": 0.08, "grad_norm": 2.3538033962249756, "learning_rate": 1.986399391495033e-05, "loss": 0.5884, "step": 3160 }, { "epoch": 0.08, "grad_norm": 2.4922702312469482, "learning_rate": 1.986385743355258e-05, "loss": 0.7085, "step": 3161 }, { "epoch": 0.08, "grad_norm": 3.732750654220581, "learning_rate": 1.986372088417933e-05, "loss": 0.6975, "step": 3162 }, { "epoch": 0.08, "grad_norm": 2.037566900253296, "learning_rate": 1.9863584266831506e-05, "loss": 0.5684, "step": 3163 }, { "epoch": 0.08, "grad_norm": 2.02215838432312, "learning_rate": 1.986344758151006e-05, "loss": 0.7397, "step": 3164 }, { "epoch": 0.08, "grad_norm": 1.879148006439209, "learning_rate": 1.9863310828215928e-05, "loss": 0.7956, "step": 3165 }, { "epoch": 0.08, "grad_norm": 2.5411148071289062, "learning_rate": 1.9863174006950053e-05, "loss": 0.6854, "step": 3166 }, { "epoch": 0.08, "grad_norm": 2.504011869430542, "learning_rate": 1.986303711771338e-05, "loss": 0.696, "step": 3167 }, { "epoch": 0.08, "grad_norm": 2.987281084060669, "learning_rate": 1.9862900160506854e-05, "loss": 0.6865, "step": 3168 }, { "epoch": 0.08, "grad_norm": 3.8995447158813477, "learning_rate": 1.9862763135331414e-05, "loss": 0.8109, "step": 3169 }, { "epoch": 0.08, "grad_norm": 2.604299783706665, "learning_rate": 1.986262604218801e-05, "loss": 0.6259, "step": 3170 }, { "epoch": 0.08, "grad_norm": 5.070635795593262, "learning_rate": 1.9862488881077582e-05, "loss": 0.8004, "step": 3171 }, { "epoch": 0.08, "grad_norm": 2.288987159729004, "learning_rate": 1.9862351652001078e-05, "loss": 0.5617, "step": 3172 }, { "epoch": 0.08, "grad_norm": 2.9646353721618652, "learning_rate": 1.986221435495944e-05, "loss": 0.776, "step": 3173 }, { "epoch": 0.08, "grad_norm": 1.7473034858703613, "learning_rate": 1.9862076989953618e-05, "loss": 0.6693, "step": 3174 }, { "epoch": 0.08, "grad_norm": 1.860681414604187, "learning_rate": 1.9861939556984557e-05, "loss": 0.6827, "step": 3175 }, { "epoch": 0.08, "grad_norm": 1.6520893573760986, "learning_rate": 1.9861802056053204e-05, "loss": 0.5817, "step": 3176 }, { "epoch": 0.08, "grad_norm": 2.0837109088897705, "learning_rate": 1.986166448716051e-05, "loss": 0.6048, "step": 3177 }, { "epoch": 0.08, "grad_norm": 3.1855196952819824, "learning_rate": 1.9861526850307418e-05, "loss": 0.7416, "step": 3178 }, { "epoch": 0.08, "grad_norm": 2.4177587032318115, "learning_rate": 1.986138914549488e-05, "loss": 0.8163, "step": 3179 }, { "epoch": 0.08, "grad_norm": 2.2149667739868164, "learning_rate": 1.986125137272384e-05, "loss": 0.6443, "step": 3180 }, { "epoch": 0.08, "grad_norm": 2.200345754623413, "learning_rate": 1.9861113531995252e-05, "loss": 0.6422, "step": 3181 }, { "epoch": 0.08, "grad_norm": 2.516690969467163, "learning_rate": 1.9860975623310065e-05, "loss": 0.589, "step": 3182 }, { "epoch": 0.08, "grad_norm": 3.468860149383545, "learning_rate": 1.986083764666923e-05, "loss": 0.9129, "step": 3183 }, { "epoch": 0.08, "grad_norm": 4.000942707061768, "learning_rate": 1.9860699602073697e-05, "loss": 0.8184, "step": 3184 }, { "epoch": 0.08, "grad_norm": 3.437809705734253, "learning_rate": 1.986056148952442e-05, "loss": 0.7655, "step": 3185 }, { "epoch": 0.08, "grad_norm": 1.529163122177124, "learning_rate": 1.9860423309022346e-05, "loss": 0.8385, "step": 3186 }, { "epoch": 0.08, "grad_norm": 3.7542858123779297, "learning_rate": 1.986028506056843e-05, "loss": 0.8307, "step": 3187 }, { "epoch": 0.08, "grad_norm": 8.41721248626709, "learning_rate": 1.986014674416362e-05, "loss": 0.7826, "step": 3188 }, { "epoch": 0.08, "grad_norm": 5.463390350341797, "learning_rate": 1.986000835980888e-05, "loss": 0.6941, "step": 3189 }, { "epoch": 0.08, "grad_norm": 2.250303030014038, "learning_rate": 1.9859869907505155e-05, "loss": 0.818, "step": 3190 }, { "epoch": 0.08, "grad_norm": 3.165074110031128, "learning_rate": 1.9859731387253398e-05, "loss": 0.6965, "step": 3191 }, { "epoch": 0.08, "grad_norm": 1.9489293098449707, "learning_rate": 1.985959279905457e-05, "loss": 0.5455, "step": 3192 }, { "epoch": 0.08, "grad_norm": 1.9690901041030884, "learning_rate": 1.9859454142909623e-05, "loss": 0.5788, "step": 3193 }, { "epoch": 0.08, "grad_norm": 2.1143109798431396, "learning_rate": 1.985931541881951e-05, "loss": 0.6222, "step": 3194 }, { "epoch": 0.08, "grad_norm": 3.9193522930145264, "learning_rate": 1.985917662678519e-05, "loss": 0.7335, "step": 3195 }, { "epoch": 0.08, "grad_norm": 3.556344747543335, "learning_rate": 1.985903776680762e-05, "loss": 0.6387, "step": 3196 }, { "epoch": 0.08, "grad_norm": 2.7369649410247803, "learning_rate": 1.9858898838887755e-05, "loss": 0.6293, "step": 3197 }, { "epoch": 0.08, "grad_norm": 2.7820887565612793, "learning_rate": 1.9858759843026554e-05, "loss": 0.6516, "step": 3198 }, { "epoch": 0.08, "grad_norm": 4.61041784286499, "learning_rate": 1.9858620779224973e-05, "loss": 0.8047, "step": 3199 }, { "epoch": 0.08, "grad_norm": 5.761265754699707, "learning_rate": 1.985848164748397e-05, "loss": 0.6942, "step": 3200 }, { "epoch": 0.08, "grad_norm": 1.61125910282135, "learning_rate": 1.9858342447804507e-05, "loss": 0.781, "step": 3201 }, { "epoch": 0.08, "grad_norm": 4.769045829772949, "learning_rate": 1.9858203180187544e-05, "loss": 0.7745, "step": 3202 }, { "epoch": 0.08, "grad_norm": 3.926440477371216, "learning_rate": 1.9858063844634036e-05, "loss": 0.8034, "step": 3203 }, { "epoch": 0.08, "grad_norm": 4.836987018585205, "learning_rate": 1.985792444114494e-05, "loss": 0.8053, "step": 3204 }, { "epoch": 0.08, "grad_norm": 2.2770626544952393, "learning_rate": 1.985778496972123e-05, "loss": 0.6507, "step": 3205 }, { "epoch": 0.08, "grad_norm": 3.1246225833892822, "learning_rate": 1.985764543036386e-05, "loss": 0.7463, "step": 3206 }, { "epoch": 0.08, "grad_norm": 2.740036964416504, "learning_rate": 1.9857505823073785e-05, "loss": 0.4928, "step": 3207 }, { "epoch": 0.08, "grad_norm": 1.9352071285247803, "learning_rate": 1.9857366147851974e-05, "loss": 0.7073, "step": 3208 }, { "epoch": 0.08, "grad_norm": 3.345824718475342, "learning_rate": 1.985722640469939e-05, "loss": 0.7564, "step": 3209 }, { "epoch": 0.08, "grad_norm": 2.4453113079071045, "learning_rate": 1.9857086593616995e-05, "loss": 0.6397, "step": 3210 }, { "epoch": 0.08, "grad_norm": 2.6606204509735107, "learning_rate": 1.985694671460575e-05, "loss": 0.7494, "step": 3211 }, { "epoch": 0.08, "grad_norm": 4.533125400543213, "learning_rate": 1.9856806767666623e-05, "loss": 0.7413, "step": 3212 }, { "epoch": 0.08, "grad_norm": 2.3614397048950195, "learning_rate": 1.9856666752800575e-05, "loss": 0.6777, "step": 3213 }, { "epoch": 0.08, "grad_norm": 2.5814530849456787, "learning_rate": 1.9856526670008574e-05, "loss": 0.6288, "step": 3214 }, { "epoch": 0.08, "grad_norm": 4.72600793838501, "learning_rate": 1.985638651929158e-05, "loss": 0.6851, "step": 3215 }, { "epoch": 0.08, "grad_norm": 5.023351192474365, "learning_rate": 1.9856246300650567e-05, "loss": 0.7305, "step": 3216 }, { "epoch": 0.08, "grad_norm": 3.3932464122772217, "learning_rate": 1.9856106014086494e-05, "loss": 0.7134, "step": 3217 }, { "epoch": 0.08, "grad_norm": 2.5432868003845215, "learning_rate": 1.9855965659600333e-05, "loss": 0.7167, "step": 3218 }, { "epoch": 0.08, "grad_norm": 4.292895317077637, "learning_rate": 1.985582523719305e-05, "loss": 0.7342, "step": 3219 }, { "epoch": 0.08, "grad_norm": 3.1767899990081787, "learning_rate": 1.9855684746865606e-05, "loss": 0.7195, "step": 3220 }, { "epoch": 0.08, "grad_norm": 5.796153545379639, "learning_rate": 1.9855544188618977e-05, "loss": 0.7585, "step": 3221 }, { "epoch": 0.08, "grad_norm": 2.6671597957611084, "learning_rate": 1.985540356245413e-05, "loss": 0.6955, "step": 3222 }, { "epoch": 0.08, "grad_norm": 1.1740775108337402, "learning_rate": 1.9855262868372037e-05, "loss": 0.5055, "step": 3223 }, { "epoch": 0.08, "grad_norm": 4.637459754943848, "learning_rate": 1.985512210637366e-05, "loss": 0.763, "step": 3224 }, { "epoch": 0.08, "grad_norm": 5.825387954711914, "learning_rate": 1.9854981276459974e-05, "loss": 0.7802, "step": 3225 }, { "epoch": 0.08, "grad_norm": 2.623659610748291, "learning_rate": 1.9854840378631945e-05, "loss": 0.6594, "step": 3226 }, { "epoch": 0.08, "grad_norm": 4.162414073944092, "learning_rate": 1.985469941289055e-05, "loss": 0.7041, "step": 3227 }, { "epoch": 0.08, "grad_norm": 5.3021626472473145, "learning_rate": 1.985455837923676e-05, "loss": 0.8265, "step": 3228 }, { "epoch": 0.08, "grad_norm": 2.3485448360443115, "learning_rate": 1.9854417277671543e-05, "loss": 0.6227, "step": 3229 }, { "epoch": 0.08, "grad_norm": 1.5257688760757446, "learning_rate": 1.985427610819587e-05, "loss": 0.5824, "step": 3230 }, { "epoch": 0.08, "grad_norm": 3.5038156509399414, "learning_rate": 1.9854134870810724e-05, "loss": 0.7762, "step": 3231 }, { "epoch": 0.08, "grad_norm": 5.174821376800537, "learning_rate": 1.9853993565517066e-05, "loss": 0.74, "step": 3232 }, { "epoch": 0.08, "grad_norm": 2.5358293056488037, "learning_rate": 1.9853852192315878e-05, "loss": 0.6306, "step": 3233 }, { "epoch": 0.08, "grad_norm": 3.036583662033081, "learning_rate": 1.985371075120813e-05, "loss": 0.7826, "step": 3234 }, { "epoch": 0.08, "grad_norm": 5.272675037384033, "learning_rate": 1.98535692421948e-05, "loss": 0.7937, "step": 3235 }, { "epoch": 0.08, "grad_norm": 3.2599449157714844, "learning_rate": 1.985342766527686e-05, "loss": 0.669, "step": 3236 }, { "epoch": 0.08, "grad_norm": 1.6385581493377686, "learning_rate": 1.9853286020455287e-05, "loss": 0.6709, "step": 3237 }, { "epoch": 0.08, "grad_norm": 2.389573335647583, "learning_rate": 1.985314430773106e-05, "loss": 0.6624, "step": 3238 }, { "epoch": 0.08, "grad_norm": 1.6207140684127808, "learning_rate": 1.985300252710515e-05, "loss": 0.5683, "step": 3239 }, { "epoch": 0.08, "grad_norm": 3.0640251636505127, "learning_rate": 1.9852860678578536e-05, "loss": 0.7986, "step": 3240 }, { "epoch": 0.08, "grad_norm": 2.6464123725891113, "learning_rate": 1.98527187621522e-05, "loss": 0.653, "step": 3241 }, { "epoch": 0.08, "grad_norm": 1.6869547367095947, "learning_rate": 1.9852576777827114e-05, "loss": 0.7444, "step": 3242 }, { "epoch": 0.08, "grad_norm": 2.3590052127838135, "learning_rate": 1.985243472560426e-05, "loss": 0.6302, "step": 3243 }, { "epoch": 0.08, "grad_norm": 2.9985427856445312, "learning_rate": 1.985229260548461e-05, "loss": 0.6814, "step": 3244 }, { "epoch": 0.08, "grad_norm": 2.400186538696289, "learning_rate": 1.9852150417469156e-05, "loss": 0.7366, "step": 3245 }, { "epoch": 0.08, "grad_norm": 4.582605361938477, "learning_rate": 1.9852008161558867e-05, "loss": 0.6834, "step": 3246 }, { "epoch": 0.08, "grad_norm": 2.2284903526306152, "learning_rate": 1.985186583775473e-05, "loss": 0.6258, "step": 3247 }, { "epoch": 0.08, "grad_norm": 5.049607276916504, "learning_rate": 1.9851723446057726e-05, "loss": 0.5796, "step": 3248 }, { "epoch": 0.08, "grad_norm": 3.3487443923950195, "learning_rate": 1.9851580986468828e-05, "loss": 0.704, "step": 3249 }, { "epoch": 0.08, "grad_norm": 1.7634568214416504, "learning_rate": 1.9851438458989026e-05, "loss": 0.8051, "step": 3250 }, { "epoch": 0.08, "grad_norm": 1.6280956268310547, "learning_rate": 1.9851295863619296e-05, "loss": 0.7543, "step": 3251 }, { "epoch": 0.08, "grad_norm": 2.6609606742858887, "learning_rate": 1.9851153200360628e-05, "loss": 0.7935, "step": 3252 }, { "epoch": 0.08, "grad_norm": 6.16087007522583, "learning_rate": 1.9851010469214e-05, "loss": 0.6923, "step": 3253 }, { "epoch": 0.08, "grad_norm": 2.0806312561035156, "learning_rate": 1.98508676701804e-05, "loss": 0.689, "step": 3254 }, { "epoch": 0.08, "grad_norm": 2.0198047161102295, "learning_rate": 1.98507248032608e-05, "loss": 0.8247, "step": 3255 }, { "epoch": 0.08, "grad_norm": 2.0408408641815186, "learning_rate": 1.98505818684562e-05, "loss": 0.6801, "step": 3256 }, { "epoch": 0.08, "grad_norm": 6.441802978515625, "learning_rate": 1.985043886576758e-05, "loss": 0.9616, "step": 3257 }, { "epoch": 0.08, "grad_norm": 4.648197650909424, "learning_rate": 1.985029579519592e-05, "loss": 0.6357, "step": 3258 }, { "epoch": 0.08, "grad_norm": 7.884597301483154, "learning_rate": 1.9850152656742212e-05, "loss": 0.6656, "step": 3259 }, { "epoch": 0.08, "grad_norm": 1.7920011281967163, "learning_rate": 1.9850009450407438e-05, "loss": 0.718, "step": 3260 }, { "epoch": 0.08, "grad_norm": 3.732034683227539, "learning_rate": 1.9849866176192592e-05, "loss": 0.8203, "step": 3261 }, { "epoch": 0.08, "grad_norm": 3.338360071182251, "learning_rate": 1.9849722834098653e-05, "loss": 0.6396, "step": 3262 }, { "epoch": 0.08, "grad_norm": 1.2523037195205688, "learning_rate": 1.9849579424126615e-05, "loss": 0.5011, "step": 3263 }, { "epoch": 0.08, "grad_norm": 7.1889424324035645, "learning_rate": 1.9849435946277462e-05, "loss": 0.6313, "step": 3264 }, { "epoch": 0.08, "grad_norm": 4.455082893371582, "learning_rate": 1.9849292400552182e-05, "loss": 0.7157, "step": 3265 }, { "epoch": 0.08, "grad_norm": 2.98291015625, "learning_rate": 1.984914878695177e-05, "loss": 0.7391, "step": 3266 }, { "epoch": 0.08, "grad_norm": 2.754225015640259, "learning_rate": 1.984900510547721e-05, "loss": 0.7482, "step": 3267 }, { "epoch": 0.08, "grad_norm": 8.46441650390625, "learning_rate": 1.98488613561295e-05, "loss": 0.803, "step": 3268 }, { "epoch": 0.08, "grad_norm": 7.00941276550293, "learning_rate": 1.984871753890962e-05, "loss": 0.8811, "step": 3269 }, { "epoch": 0.08, "grad_norm": 2.1903717517852783, "learning_rate": 1.9848573653818567e-05, "loss": 0.5839, "step": 3270 }, { "epoch": 0.08, "grad_norm": 5.504976749420166, "learning_rate": 1.984842970085733e-05, "loss": 0.7496, "step": 3271 }, { "epoch": 0.08, "grad_norm": 2.7304067611694336, "learning_rate": 1.984828568002691e-05, "loss": 0.7277, "step": 3272 }, { "epoch": 0.08, "grad_norm": 2.047081470489502, "learning_rate": 1.9848141591328286e-05, "loss": 0.6381, "step": 3273 }, { "epoch": 0.08, "grad_norm": 3.4483275413513184, "learning_rate": 1.9847997434762458e-05, "loss": 0.7118, "step": 3274 }, { "epoch": 0.08, "grad_norm": 3.186671733856201, "learning_rate": 1.9847853210330422e-05, "loss": 0.6431, "step": 3275 }, { "epoch": 0.08, "grad_norm": 3.479276180267334, "learning_rate": 1.9847708918033165e-05, "loss": 0.4952, "step": 3276 }, { "epoch": 0.08, "grad_norm": 2.555614709854126, "learning_rate": 1.9847564557871685e-05, "loss": 0.6953, "step": 3277 }, { "epoch": 0.08, "grad_norm": 1.7938586473464966, "learning_rate": 1.984742012984698e-05, "loss": 0.6306, "step": 3278 }, { "epoch": 0.08, "grad_norm": 5.440009117126465, "learning_rate": 1.984727563396004e-05, "loss": 0.7607, "step": 3279 }, { "epoch": 0.08, "grad_norm": 2.366476535797119, "learning_rate": 1.9847131070211865e-05, "loss": 0.6475, "step": 3280 }, { "epoch": 0.08, "grad_norm": 3.4527957439422607, "learning_rate": 1.984698643860345e-05, "loss": 0.8596, "step": 3281 }, { "epoch": 0.08, "grad_norm": 1.9438018798828125, "learning_rate": 1.984684173913579e-05, "loss": 0.5691, "step": 3282 }, { "epoch": 0.08, "grad_norm": 4.085956573486328, "learning_rate": 1.984669697180988e-05, "loss": 0.9446, "step": 3283 }, { "epoch": 0.08, "grad_norm": 2.8966593742370605, "learning_rate": 1.9846552136626722e-05, "loss": 0.6659, "step": 3284 }, { "epoch": 0.08, "grad_norm": 3.235954523086548, "learning_rate": 1.9846407233587315e-05, "loss": 0.6963, "step": 3285 }, { "epoch": 0.08, "grad_norm": 2.796238422393799, "learning_rate": 1.984626226269265e-05, "loss": 0.6887, "step": 3286 }, { "epoch": 0.08, "grad_norm": 4.530532360076904, "learning_rate": 1.984611722394374e-05, "loss": 0.7521, "step": 3287 }, { "epoch": 0.08, "grad_norm": 4.043076515197754, "learning_rate": 1.984597211734157e-05, "loss": 0.6507, "step": 3288 }, { "epoch": 0.08, "grad_norm": 2.6922309398651123, "learning_rate": 1.984582694288715e-05, "loss": 0.7144, "step": 3289 }, { "epoch": 0.08, "grad_norm": 1.8010544776916504, "learning_rate": 1.9845681700581473e-05, "loss": 0.7498, "step": 3290 }, { "epoch": 0.08, "grad_norm": 2.2448792457580566, "learning_rate": 1.9845536390425546e-05, "loss": 0.7685, "step": 3291 }, { "epoch": 0.08, "grad_norm": 2.3501150608062744, "learning_rate": 1.9845391012420366e-05, "loss": 0.5609, "step": 3292 }, { "epoch": 0.08, "grad_norm": 2.5099377632141113, "learning_rate": 1.984524556656694e-05, "loss": 0.6415, "step": 3293 }, { "epoch": 0.08, "grad_norm": 1.7621952295303345, "learning_rate": 1.984510005286626e-05, "loss": 0.7823, "step": 3294 }, { "epoch": 0.08, "grad_norm": 2.548919200897217, "learning_rate": 1.9844954471319342e-05, "loss": 0.7644, "step": 3295 }, { "epoch": 0.08, "grad_norm": 2.173417806625366, "learning_rate": 1.984480882192718e-05, "loss": 0.7706, "step": 3296 }, { "epoch": 0.08, "grad_norm": 3.0355892181396484, "learning_rate": 1.984466310469078e-05, "loss": 0.7738, "step": 3297 }, { "epoch": 0.08, "grad_norm": 4.19245719909668, "learning_rate": 1.9844517319611148e-05, "loss": 0.6345, "step": 3298 }, { "epoch": 0.08, "grad_norm": 2.4438095092773438, "learning_rate": 1.984437146668929e-05, "loss": 0.7127, "step": 3299 }, { "epoch": 0.08, "grad_norm": 3.505793571472168, "learning_rate": 1.9844225545926206e-05, "loss": 0.7897, "step": 3300 }, { "epoch": 0.08, "grad_norm": 2.947836399078369, "learning_rate": 1.9844079557322903e-05, "loss": 0.7652, "step": 3301 }, { "epoch": 0.08, "grad_norm": 1.5492815971374512, "learning_rate": 1.9843933500880392e-05, "loss": 0.6757, "step": 3302 }, { "epoch": 0.08, "grad_norm": 3.8192074298858643, "learning_rate": 1.9843787376599674e-05, "loss": 0.7261, "step": 3303 }, { "epoch": 0.08, "grad_norm": 2.1575326919555664, "learning_rate": 1.9843641184481756e-05, "loss": 0.762, "step": 3304 }, { "epoch": 0.08, "grad_norm": 5.299115180969238, "learning_rate": 1.984349492452765e-05, "loss": 0.6421, "step": 3305 }, { "epoch": 0.08, "grad_norm": 1.8312174081802368, "learning_rate": 1.9843348596738362e-05, "loss": 0.6254, "step": 3306 }, { "epoch": 0.08, "grad_norm": 5.439855098724365, "learning_rate": 1.98432022011149e-05, "loss": 0.7731, "step": 3307 }, { "epoch": 0.08, "grad_norm": 2.8182191848754883, "learning_rate": 1.984305573765827e-05, "loss": 0.7219, "step": 3308 }, { "epoch": 0.08, "grad_norm": 2.416120767593384, "learning_rate": 1.984290920636948e-05, "loss": 0.6557, "step": 3309 }, { "epoch": 0.08, "grad_norm": 5.730042934417725, "learning_rate": 1.984276260724955e-05, "loss": 0.9025, "step": 3310 }, { "epoch": 0.08, "grad_norm": 3.412628412246704, "learning_rate": 1.9842615940299484e-05, "loss": 0.6854, "step": 3311 }, { "epoch": 0.08, "grad_norm": 1.5808030366897583, "learning_rate": 1.984246920552029e-05, "loss": 0.5134, "step": 3312 }, { "epoch": 0.08, "grad_norm": 1.8525174856185913, "learning_rate": 1.9842322402912985e-05, "loss": 0.6645, "step": 3313 }, { "epoch": 0.08, "grad_norm": 3.0312082767486572, "learning_rate": 1.9842175532478575e-05, "loss": 0.649, "step": 3314 }, { "epoch": 0.08, "grad_norm": 2.6055753231048584, "learning_rate": 1.9842028594218076e-05, "loss": 0.7489, "step": 3315 }, { "epoch": 0.08, "grad_norm": 3.6630172729492188, "learning_rate": 1.9841881588132497e-05, "loss": 0.642, "step": 3316 }, { "epoch": 0.09, "grad_norm": 1.9826854467391968, "learning_rate": 1.9841734514222853e-05, "loss": 0.8474, "step": 3317 }, { "epoch": 0.09, "grad_norm": 2.557730197906494, "learning_rate": 1.984158737249016e-05, "loss": 0.6864, "step": 3318 }, { "epoch": 0.09, "grad_norm": 2.3257532119750977, "learning_rate": 1.9841440162935433e-05, "loss": 0.662, "step": 3319 }, { "epoch": 0.09, "grad_norm": 2.365769147872925, "learning_rate": 1.984129288555968e-05, "loss": 0.74, "step": 3320 }, { "epoch": 0.09, "grad_norm": 1.8152152299880981, "learning_rate": 1.984114554036392e-05, "loss": 0.5492, "step": 3321 }, { "epoch": 0.09, "grad_norm": 2.7444677352905273, "learning_rate": 1.9840998127349164e-05, "loss": 0.7487, "step": 3322 }, { "epoch": 0.09, "grad_norm": 2.4883663654327393, "learning_rate": 1.9840850646516435e-05, "loss": 0.5985, "step": 3323 }, { "epoch": 0.09, "grad_norm": 6.605597019195557, "learning_rate": 1.984070309786675e-05, "loss": 0.8738, "step": 3324 }, { "epoch": 0.09, "grad_norm": 1.9408541917800903, "learning_rate": 1.9840555481401114e-05, "loss": 0.8307, "step": 3325 }, { "epoch": 0.09, "grad_norm": 3.3203794956207275, "learning_rate": 1.9840407797120555e-05, "loss": 0.7735, "step": 3326 }, { "epoch": 0.09, "grad_norm": 2.996128797531128, "learning_rate": 1.9840260045026087e-05, "loss": 0.6455, "step": 3327 }, { "epoch": 0.09, "grad_norm": 4.38912296295166, "learning_rate": 1.9840112225118734e-05, "loss": 0.7057, "step": 3328 }, { "epoch": 0.09, "grad_norm": 2.254375696182251, "learning_rate": 1.9839964337399503e-05, "loss": 0.7092, "step": 3329 }, { "epoch": 0.09, "grad_norm": 2.63303279876709, "learning_rate": 1.9839816381869422e-05, "loss": 0.6668, "step": 3330 }, { "epoch": 0.09, "grad_norm": 3.7605197429656982, "learning_rate": 1.9839668358529512e-05, "loss": 0.6401, "step": 3331 }, { "epoch": 0.09, "grad_norm": 4.1005706787109375, "learning_rate": 1.9839520267380787e-05, "loss": 0.6721, "step": 3332 }, { "epoch": 0.09, "grad_norm": 1.556363582611084, "learning_rate": 1.9839372108424268e-05, "loss": 0.7312, "step": 3333 }, { "epoch": 0.09, "grad_norm": 3.0149354934692383, "learning_rate": 1.9839223881660983e-05, "loss": 0.7523, "step": 3334 }, { "epoch": 0.09, "grad_norm": 2.8381831645965576, "learning_rate": 1.9839075587091945e-05, "loss": 0.5128, "step": 3335 }, { "epoch": 0.09, "grad_norm": 3.179211378097534, "learning_rate": 1.983892722471818e-05, "loss": 0.7423, "step": 3336 }, { "epoch": 0.09, "grad_norm": 3.807175636291504, "learning_rate": 1.983877879454071e-05, "loss": 0.6499, "step": 3337 }, { "epoch": 0.09, "grad_norm": 1.9096148014068604, "learning_rate": 1.983863029656056e-05, "loss": 0.7407, "step": 3338 }, { "epoch": 0.09, "grad_norm": 1.982957124710083, "learning_rate": 1.9838481730778747e-05, "loss": 0.6452, "step": 3339 }, { "epoch": 0.09, "grad_norm": 2.50842547416687, "learning_rate": 1.9838333097196302e-05, "loss": 0.7146, "step": 3340 }, { "epoch": 0.09, "grad_norm": 3.4873645305633545, "learning_rate": 1.9838184395814246e-05, "loss": 0.8129, "step": 3341 }, { "epoch": 0.09, "grad_norm": 3.280482769012451, "learning_rate": 1.9838035626633603e-05, "loss": 0.6487, "step": 3342 }, { "epoch": 0.09, "grad_norm": 2.019181489944458, "learning_rate": 1.98378867896554e-05, "loss": 0.6104, "step": 3343 }, { "epoch": 0.09, "grad_norm": 1.7604209184646606, "learning_rate": 1.9837737884880662e-05, "loss": 0.5542, "step": 3344 }, { "epoch": 0.09, "grad_norm": 2.4034371376037598, "learning_rate": 1.9837588912310417e-05, "loss": 0.7442, "step": 3345 }, { "epoch": 0.09, "grad_norm": 1.864544153213501, "learning_rate": 1.9837439871945688e-05, "loss": 0.6371, "step": 3346 }, { "epoch": 0.09, "grad_norm": 3.104482889175415, "learning_rate": 1.9837290763787505e-05, "loss": 0.7633, "step": 3347 }, { "epoch": 0.09, "grad_norm": 1.8695390224456787, "learning_rate": 1.9837141587836894e-05, "loss": 0.6296, "step": 3348 }, { "epoch": 0.09, "grad_norm": 3.151803970336914, "learning_rate": 1.983699234409488e-05, "loss": 0.6604, "step": 3349 }, { "epoch": 0.09, "grad_norm": 1.7268906831741333, "learning_rate": 1.9836843032562498e-05, "loss": 0.5934, "step": 3350 }, { "epoch": 0.09, "grad_norm": 5.288184642791748, "learning_rate": 1.9836693653240777e-05, "loss": 0.7881, "step": 3351 }, { "epoch": 0.09, "grad_norm": 8.177361488342285, "learning_rate": 1.983654420613074e-05, "loss": 0.6843, "step": 3352 }, { "epoch": 0.09, "grad_norm": 1.7183820009231567, "learning_rate": 1.983639469123342e-05, "loss": 0.8137, "step": 3353 }, { "epoch": 0.09, "grad_norm": 2.9870083332061768, "learning_rate": 1.983624510854985e-05, "loss": 0.6984, "step": 3354 }, { "epoch": 0.09, "grad_norm": 2.5040414333343506, "learning_rate": 1.9836095458081053e-05, "loss": 0.8885, "step": 3355 }, { "epoch": 0.09, "grad_norm": 3.7921910285949707, "learning_rate": 1.983594573982807e-05, "loss": 0.7445, "step": 3356 }, { "epoch": 0.09, "grad_norm": 3.4180963039398193, "learning_rate": 1.983579595379193e-05, "loss": 0.7048, "step": 3357 }, { "epoch": 0.09, "grad_norm": 1.8293405771255493, "learning_rate": 1.983564609997366e-05, "loss": 0.7045, "step": 3358 }, { "epoch": 0.09, "grad_norm": 1.4257326126098633, "learning_rate": 1.98354961783743e-05, "loss": 0.6128, "step": 3359 }, { "epoch": 0.09, "grad_norm": 1.4060065746307373, "learning_rate": 1.9835346188994876e-05, "loss": 0.5757, "step": 3360 }, { "epoch": 0.09, "grad_norm": 1.468679666519165, "learning_rate": 1.9835196131836428e-05, "loss": 0.5537, "step": 3361 }, { "epoch": 0.09, "grad_norm": 3.7029175758361816, "learning_rate": 1.9835046006899986e-05, "loss": 0.8778, "step": 3362 }, { "epoch": 0.09, "grad_norm": 2.978402614593506, "learning_rate": 1.9834895814186587e-05, "loss": 0.6886, "step": 3363 }, { "epoch": 0.09, "grad_norm": 2.4657742977142334, "learning_rate": 1.983474555369726e-05, "loss": 0.7087, "step": 3364 }, { "epoch": 0.09, "grad_norm": 1.9443461894989014, "learning_rate": 1.983459522543305e-05, "loss": 0.6788, "step": 3365 }, { "epoch": 0.09, "grad_norm": 2.8676869869232178, "learning_rate": 1.983444482939499e-05, "loss": 0.8133, "step": 3366 }, { "epoch": 0.09, "grad_norm": 1.5216870307922363, "learning_rate": 1.9834294365584114e-05, "loss": 0.6392, "step": 3367 }, { "epoch": 0.09, "grad_norm": 2.708369016647339, "learning_rate": 1.983414383400146e-05, "loss": 0.6081, "step": 3368 }, { "epoch": 0.09, "grad_norm": 3.9886908531188965, "learning_rate": 1.983399323464806e-05, "loss": 0.8661, "step": 3369 }, { "epoch": 0.09, "grad_norm": 1.9565385580062866, "learning_rate": 1.983384256752496e-05, "loss": 0.6414, "step": 3370 }, { "epoch": 0.09, "grad_norm": 5.871531009674072, "learning_rate": 1.9833691832633196e-05, "loss": 0.902, "step": 3371 }, { "epoch": 0.09, "grad_norm": 2.02944016456604, "learning_rate": 1.9833541029973807e-05, "loss": 0.8021, "step": 3372 }, { "epoch": 0.09, "grad_norm": 4.763683319091797, "learning_rate": 1.983339015954783e-05, "loss": 0.8004, "step": 3373 }, { "epoch": 0.09, "grad_norm": 7.583322525024414, "learning_rate": 1.9833239221356308e-05, "loss": 0.7513, "step": 3374 }, { "epoch": 0.09, "grad_norm": 4.8588080406188965, "learning_rate": 1.9833088215400277e-05, "loss": 0.7752, "step": 3375 }, { "epoch": 0.09, "grad_norm": 3.160402297973633, "learning_rate": 1.9832937141680778e-05, "loss": 0.7098, "step": 3376 }, { "epoch": 0.09, "grad_norm": 2.7598586082458496, "learning_rate": 1.9832786000198856e-05, "loss": 0.7357, "step": 3377 }, { "epoch": 0.09, "grad_norm": 4.999602794647217, "learning_rate": 1.9832634790955548e-05, "loss": 0.7675, "step": 3378 }, { "epoch": 0.09, "grad_norm": 1.596071720123291, "learning_rate": 1.9832483513951905e-05, "loss": 0.6039, "step": 3379 }, { "epoch": 0.09, "grad_norm": 1.9804991483688354, "learning_rate": 1.9832332169188957e-05, "loss": 0.7274, "step": 3380 }, { "epoch": 0.09, "grad_norm": 1.801308512687683, "learning_rate": 1.9832180756667757e-05, "loss": 0.8878, "step": 3381 }, { "epoch": 0.09, "grad_norm": 3.1864845752716064, "learning_rate": 1.983202927638934e-05, "loss": 0.8132, "step": 3382 }, { "epoch": 0.09, "grad_norm": 2.19781756401062, "learning_rate": 1.9831877728354757e-05, "loss": 0.5753, "step": 3383 }, { "epoch": 0.09, "grad_norm": 2.817389488220215, "learning_rate": 1.9831726112565047e-05, "loss": 0.6598, "step": 3384 }, { "epoch": 0.09, "grad_norm": 3.0416462421417236, "learning_rate": 1.983157442902126e-05, "loss": 0.7952, "step": 3385 }, { "epoch": 0.09, "grad_norm": 2.735614061355591, "learning_rate": 1.983142267772444e-05, "loss": 0.6849, "step": 3386 }, { "epoch": 0.09, "grad_norm": 2.4274933338165283, "learning_rate": 1.983127085867563e-05, "loss": 0.7743, "step": 3387 }, { "epoch": 0.09, "grad_norm": 1.8618119955062866, "learning_rate": 1.9831118971875876e-05, "loss": 0.7307, "step": 3388 }, { "epoch": 0.09, "grad_norm": 1.5461459159851074, "learning_rate": 1.9830967017326232e-05, "loss": 0.6739, "step": 3389 }, { "epoch": 0.09, "grad_norm": 2.3463001251220703, "learning_rate": 1.9830814995027735e-05, "loss": 0.6189, "step": 3390 }, { "epoch": 0.09, "grad_norm": 3.1642017364501953, "learning_rate": 1.9830662904981438e-05, "loss": 0.6265, "step": 3391 }, { "epoch": 0.09, "grad_norm": 3.782036781311035, "learning_rate": 1.983051074718839e-05, "loss": 0.5866, "step": 3392 }, { "epoch": 0.09, "grad_norm": 1.9489576816558838, "learning_rate": 1.9830358521649636e-05, "loss": 0.5811, "step": 3393 }, { "epoch": 0.09, "grad_norm": 2.580606698989868, "learning_rate": 1.9830206228366227e-05, "loss": 0.5977, "step": 3394 }, { "epoch": 0.09, "grad_norm": 2.876088857650757, "learning_rate": 1.9830053867339214e-05, "loss": 0.81, "step": 3395 }, { "epoch": 0.09, "grad_norm": 3.506316900253296, "learning_rate": 1.982990143856965e-05, "loss": 0.6812, "step": 3396 }, { "epoch": 0.09, "grad_norm": 2.931328058242798, "learning_rate": 1.9829748942058574e-05, "loss": 0.6482, "step": 3397 }, { "epoch": 0.09, "grad_norm": 1.6937769651412964, "learning_rate": 1.9829596377807045e-05, "loss": 0.6961, "step": 3398 }, { "epoch": 0.09, "grad_norm": 2.5014078617095947, "learning_rate": 1.9829443745816116e-05, "loss": 0.7124, "step": 3399 }, { "epoch": 0.09, "grad_norm": 1.1601014137268066, "learning_rate": 1.9829291046086834e-05, "loss": 0.6217, "step": 3400 }, { "epoch": 0.09, "grad_norm": 2.7544679641723633, "learning_rate": 1.9829138278620253e-05, "loss": 0.683, "step": 3401 }, { "epoch": 0.09, "grad_norm": 4.4234442710876465, "learning_rate": 1.982898544341743e-05, "loss": 0.5911, "step": 3402 }, { "epoch": 0.09, "grad_norm": 3.751584529876709, "learning_rate": 1.982883254047941e-05, "loss": 0.6819, "step": 3403 }, { "epoch": 0.09, "grad_norm": 3.1353938579559326, "learning_rate": 1.9828679569807254e-05, "loss": 0.6147, "step": 3404 }, { "epoch": 0.09, "grad_norm": 3.860879898071289, "learning_rate": 1.9828526531402016e-05, "loss": 0.7625, "step": 3405 }, { "epoch": 0.09, "grad_norm": 4.2835588455200195, "learning_rate": 1.982837342526474e-05, "loss": 0.6975, "step": 3406 }, { "epoch": 0.09, "grad_norm": 3.0749528408050537, "learning_rate": 1.9828220251396498e-05, "loss": 0.826, "step": 3407 }, { "epoch": 0.09, "grad_norm": 3.08791184425354, "learning_rate": 1.9828067009798332e-05, "loss": 0.6944, "step": 3408 }, { "epoch": 0.09, "grad_norm": 3.7691731452941895, "learning_rate": 1.9827913700471303e-05, "loss": 0.78, "step": 3409 }, { "epoch": 0.09, "grad_norm": 2.7631425857543945, "learning_rate": 1.982776032341647e-05, "loss": 0.6904, "step": 3410 }, { "epoch": 0.09, "grad_norm": 1.6746933460235596, "learning_rate": 1.9827606878634882e-05, "loss": 0.6801, "step": 3411 }, { "epoch": 0.09, "grad_norm": 4.0402655601501465, "learning_rate": 1.9827453366127604e-05, "loss": 0.8318, "step": 3412 }, { "epoch": 0.09, "grad_norm": 5.805769920349121, "learning_rate": 1.9827299785895694e-05, "loss": 0.7435, "step": 3413 }, { "epoch": 0.09, "grad_norm": 1.9972721338272095, "learning_rate": 1.9827146137940204e-05, "loss": 0.7943, "step": 3414 }, { "epoch": 0.09, "grad_norm": 2.4830219745635986, "learning_rate": 1.98269924222622e-05, "loss": 0.8849, "step": 3415 }, { "epoch": 0.09, "grad_norm": 2.6820802688598633, "learning_rate": 1.9826838638862737e-05, "loss": 0.6639, "step": 3416 }, { "epoch": 0.09, "grad_norm": 3.794405698776245, "learning_rate": 1.9826684787742878e-05, "loss": 0.7441, "step": 3417 }, { "epoch": 0.09, "grad_norm": 2.611732006072998, "learning_rate": 1.982653086890368e-05, "loss": 0.7165, "step": 3418 }, { "epoch": 0.09, "grad_norm": 4.213864326477051, "learning_rate": 1.98263768823462e-05, "loss": 0.6609, "step": 3419 }, { "epoch": 0.09, "grad_norm": 2.0129542350769043, "learning_rate": 1.9826222828071512e-05, "loss": 0.728, "step": 3420 }, { "epoch": 0.09, "grad_norm": 1.8907328844070435, "learning_rate": 1.9826068706080663e-05, "loss": 0.7304, "step": 3421 }, { "epoch": 0.09, "grad_norm": 2.4174184799194336, "learning_rate": 1.9825914516374728e-05, "loss": 0.7639, "step": 3422 }, { "epoch": 0.09, "grad_norm": 2.0507075786590576, "learning_rate": 1.9825760258954757e-05, "loss": 0.7455, "step": 3423 }, { "epoch": 0.09, "grad_norm": 2.1413867473602295, "learning_rate": 1.9825605933821826e-05, "loss": 0.6181, "step": 3424 }, { "epoch": 0.09, "grad_norm": 3.9787981510162354, "learning_rate": 1.9825451540976987e-05, "loss": 0.7834, "step": 3425 }, { "epoch": 0.09, "grad_norm": 2.113201856613159, "learning_rate": 1.982529708042131e-05, "loss": 0.5688, "step": 3426 }, { "epoch": 0.09, "grad_norm": 2.0468785762786865, "learning_rate": 1.9825142552155858e-05, "loss": 0.5418, "step": 3427 }, { "epoch": 0.09, "grad_norm": 1.9277924299240112, "learning_rate": 1.98249879561817e-05, "loss": 0.5386, "step": 3428 }, { "epoch": 0.09, "grad_norm": 8.093246459960938, "learning_rate": 1.9824833292499895e-05, "loss": 0.6975, "step": 3429 }, { "epoch": 0.09, "grad_norm": 4.01707649230957, "learning_rate": 1.982467856111151e-05, "loss": 0.8047, "step": 3430 }, { "epoch": 0.09, "grad_norm": 1.3730778694152832, "learning_rate": 1.9824523762017615e-05, "loss": 0.5312, "step": 3431 }, { "epoch": 0.09, "grad_norm": 4.749396800994873, "learning_rate": 1.982436889521927e-05, "loss": 0.7432, "step": 3432 }, { "epoch": 0.09, "grad_norm": 4.058465480804443, "learning_rate": 1.9824213960717553e-05, "loss": 0.5436, "step": 3433 }, { "epoch": 0.09, "grad_norm": 1.3306399583816528, "learning_rate": 1.9824058958513522e-05, "loss": 0.6895, "step": 3434 }, { "epoch": 0.09, "grad_norm": 2.2976999282836914, "learning_rate": 1.982390388860825e-05, "loss": 0.5316, "step": 3435 }, { "epoch": 0.09, "grad_norm": 3.1783831119537354, "learning_rate": 1.9823748751002805e-05, "loss": 0.727, "step": 3436 }, { "epoch": 0.09, "grad_norm": 2.8292391300201416, "learning_rate": 1.9823593545698254e-05, "loss": 0.411, "step": 3437 }, { "epoch": 0.09, "grad_norm": 3.051582098007202, "learning_rate": 1.982343827269567e-05, "loss": 0.6553, "step": 3438 }, { "epoch": 0.09, "grad_norm": 2.4074440002441406, "learning_rate": 1.9823282931996123e-05, "loss": 0.6039, "step": 3439 }, { "epoch": 0.09, "grad_norm": 1.3884276151657104, "learning_rate": 1.9823127523600674e-05, "loss": 0.69, "step": 3440 }, { "epoch": 0.09, "grad_norm": 1.5610312223434448, "learning_rate": 1.9822972047510407e-05, "loss": 0.6887, "step": 3441 }, { "epoch": 0.09, "grad_norm": 3.3881120681762695, "learning_rate": 1.982281650372639e-05, "loss": 0.5905, "step": 3442 }, { "epoch": 0.09, "grad_norm": 2.440692901611328, "learning_rate": 1.982266089224969e-05, "loss": 0.6899, "step": 3443 }, { "epoch": 0.09, "grad_norm": 3.4462945461273193, "learning_rate": 1.9822505213081383e-05, "loss": 0.8991, "step": 3444 }, { "epoch": 0.09, "grad_norm": 1.8623319864273071, "learning_rate": 1.982234946622254e-05, "loss": 0.6012, "step": 3445 }, { "epoch": 0.09, "grad_norm": 3.216818332672119, "learning_rate": 1.9822193651674236e-05, "loss": 0.6445, "step": 3446 }, { "epoch": 0.09, "grad_norm": 4.581181526184082, "learning_rate": 1.9822037769437545e-05, "loss": 0.833, "step": 3447 }, { "epoch": 0.09, "grad_norm": 3.851276397705078, "learning_rate": 1.982188181951354e-05, "loss": 0.6007, "step": 3448 }, { "epoch": 0.09, "grad_norm": 4.466125965118408, "learning_rate": 1.9821725801903295e-05, "loss": 0.7528, "step": 3449 }, { "epoch": 0.09, "grad_norm": 2.3069300651550293, "learning_rate": 1.9821569716607887e-05, "loss": 0.5985, "step": 3450 }, { "epoch": 0.09, "grad_norm": 2.28023099899292, "learning_rate": 1.9821413563628393e-05, "loss": 0.6672, "step": 3451 }, { "epoch": 0.09, "grad_norm": 3.732853412628174, "learning_rate": 1.9821257342965885e-05, "loss": 0.7449, "step": 3452 }, { "epoch": 0.09, "grad_norm": 1.7202922105789185, "learning_rate": 1.982110105462144e-05, "loss": 0.5427, "step": 3453 }, { "epoch": 0.09, "grad_norm": 2.6227149963378906, "learning_rate": 1.982094469859614e-05, "loss": 0.6882, "step": 3454 }, { "epoch": 0.09, "grad_norm": 4.769813537597656, "learning_rate": 1.9820788274891057e-05, "loss": 0.5845, "step": 3455 }, { "epoch": 0.09, "grad_norm": 2.6354708671569824, "learning_rate": 1.9820631783507275e-05, "loss": 0.5926, "step": 3456 }, { "epoch": 0.09, "grad_norm": 4.940619945526123, "learning_rate": 1.9820475224445865e-05, "loss": 0.7541, "step": 3457 }, { "epoch": 0.09, "grad_norm": 3.5594482421875, "learning_rate": 1.982031859770791e-05, "loss": 0.7, "step": 3458 }, { "epoch": 0.09, "grad_norm": 3.737926483154297, "learning_rate": 1.9820161903294492e-05, "loss": 0.7939, "step": 3459 }, { "epoch": 0.09, "grad_norm": 2.5148065090179443, "learning_rate": 1.9820005141206688e-05, "loss": 0.7778, "step": 3460 }, { "epoch": 0.09, "grad_norm": 3.5010626316070557, "learning_rate": 1.9819848311445576e-05, "loss": 0.4988, "step": 3461 }, { "epoch": 0.09, "grad_norm": 1.8814277648925781, "learning_rate": 1.9819691414012237e-05, "loss": 0.7045, "step": 3462 }, { "epoch": 0.09, "grad_norm": 2.467611789703369, "learning_rate": 1.9819534448907755e-05, "loss": 0.7696, "step": 3463 }, { "epoch": 0.09, "grad_norm": 2.5975608825683594, "learning_rate": 1.9819377416133212e-05, "loss": 0.8454, "step": 3464 }, { "epoch": 0.09, "grad_norm": 2.8819570541381836, "learning_rate": 1.9819220315689687e-05, "loss": 0.8404, "step": 3465 }, { "epoch": 0.09, "grad_norm": 2.8995232582092285, "learning_rate": 1.981906314757827e-05, "loss": 0.7927, "step": 3466 }, { "epoch": 0.09, "grad_norm": 6.7108073234558105, "learning_rate": 1.9818905911800032e-05, "loss": 0.9289, "step": 3467 }, { "epoch": 0.09, "grad_norm": 3.4771885871887207, "learning_rate": 1.981874860835607e-05, "loss": 0.7955, "step": 3468 }, { "epoch": 0.09, "grad_norm": 2.4282567501068115, "learning_rate": 1.9818591237247453e-05, "loss": 0.7153, "step": 3469 }, { "epoch": 0.09, "grad_norm": 3.5181920528411865, "learning_rate": 1.981843379847528e-05, "loss": 0.5838, "step": 3470 }, { "epoch": 0.09, "grad_norm": 4.1179986000061035, "learning_rate": 1.9818276292040627e-05, "loss": 0.6496, "step": 3471 }, { "epoch": 0.09, "grad_norm": 2.65122389793396, "learning_rate": 1.9818118717944586e-05, "loss": 0.5742, "step": 3472 }, { "epoch": 0.09, "grad_norm": 2.6002752780914307, "learning_rate": 1.9817961076188234e-05, "loss": 0.8516, "step": 3473 }, { "epoch": 0.09, "grad_norm": 2.539045572280884, "learning_rate": 1.981780336677267e-05, "loss": 0.5246, "step": 3474 }, { "epoch": 0.09, "grad_norm": 2.0816423892974854, "learning_rate": 1.9817645589698966e-05, "loss": 0.6991, "step": 3475 }, { "epoch": 0.09, "grad_norm": 4.933387756347656, "learning_rate": 1.9817487744968217e-05, "loss": 0.6545, "step": 3476 }, { "epoch": 0.09, "grad_norm": 2.406367778778076, "learning_rate": 1.9817329832581515e-05, "loss": 0.6454, "step": 3477 }, { "epoch": 0.09, "grad_norm": 2.704960823059082, "learning_rate": 1.9817171852539937e-05, "loss": 0.9529, "step": 3478 }, { "epoch": 0.09, "grad_norm": 3.3383677005767822, "learning_rate": 1.9817013804844583e-05, "loss": 0.74, "step": 3479 }, { "epoch": 0.09, "grad_norm": 4.507795333862305, "learning_rate": 1.981685568949654e-05, "loss": 0.6192, "step": 3480 }, { "epoch": 0.09, "grad_norm": 2.5435285568237305, "learning_rate": 1.981669750649689e-05, "loss": 0.6313, "step": 3481 }, { "epoch": 0.09, "grad_norm": 2.7171103954315186, "learning_rate": 1.981653925584673e-05, "loss": 0.7075, "step": 3482 }, { "epoch": 0.09, "grad_norm": 4.04151725769043, "learning_rate": 1.981638093754715e-05, "loss": 0.7105, "step": 3483 }, { "epoch": 0.09, "grad_norm": 3.3046982288360596, "learning_rate": 1.9816222551599238e-05, "loss": 0.6819, "step": 3484 }, { "epoch": 0.09, "grad_norm": 1.8407896757125854, "learning_rate": 1.981606409800409e-05, "loss": 0.8351, "step": 3485 }, { "epoch": 0.09, "grad_norm": 2.291853666305542, "learning_rate": 1.9815905576762794e-05, "loss": 0.5421, "step": 3486 }, { "epoch": 0.09, "grad_norm": 2.8983657360076904, "learning_rate": 1.9815746987876444e-05, "loss": 0.6188, "step": 3487 }, { "epoch": 0.09, "grad_norm": 4.1627044677734375, "learning_rate": 1.981558833134613e-05, "loss": 0.6586, "step": 3488 }, { "epoch": 0.09, "grad_norm": 2.3874974250793457, "learning_rate": 1.9815429607172952e-05, "loss": 0.9103, "step": 3489 }, { "epoch": 0.09, "grad_norm": 1.7249884605407715, "learning_rate": 1.9815270815357995e-05, "loss": 0.7362, "step": 3490 }, { "epoch": 0.09, "grad_norm": 4.755847930908203, "learning_rate": 1.9815111955902362e-05, "loss": 0.7934, "step": 3491 }, { "epoch": 0.09, "grad_norm": 1.9274078607559204, "learning_rate": 1.9814953028807144e-05, "loss": 0.6151, "step": 3492 }, { "epoch": 0.09, "grad_norm": 3.3586487770080566, "learning_rate": 1.9814794034073434e-05, "loss": 0.7873, "step": 3493 }, { "epoch": 0.09, "grad_norm": 1.6730966567993164, "learning_rate": 1.9814634971702327e-05, "loss": 0.5925, "step": 3494 }, { "epoch": 0.09, "grad_norm": 1.883857011795044, "learning_rate": 1.981447584169493e-05, "loss": 0.721, "step": 3495 }, { "epoch": 0.09, "grad_norm": 2.0338516235351562, "learning_rate": 1.9814316644052325e-05, "loss": 0.6994, "step": 3496 }, { "epoch": 0.09, "grad_norm": 2.8503198623657227, "learning_rate": 1.9814157378775617e-05, "loss": 0.6851, "step": 3497 }, { "epoch": 0.09, "grad_norm": 3.5859994888305664, "learning_rate": 1.9813998045865904e-05, "loss": 0.7415, "step": 3498 }, { "epoch": 0.09, "grad_norm": 2.117021083831787, "learning_rate": 1.9813838645324283e-05, "loss": 0.7407, "step": 3499 }, { "epoch": 0.09, "grad_norm": 1.944482445716858, "learning_rate": 1.981367917715185e-05, "loss": 0.6299, "step": 3500 }, { "epoch": 0.09, "grad_norm": 2.019237756729126, "learning_rate": 1.9813519641349703e-05, "loss": 0.6588, "step": 3501 }, { "epoch": 0.09, "grad_norm": 3.0202298164367676, "learning_rate": 1.9813360037918947e-05, "loss": 0.631, "step": 3502 }, { "epoch": 0.09, "grad_norm": 2.9083218574523926, "learning_rate": 1.9813200366860678e-05, "loss": 0.7185, "step": 3503 }, { "epoch": 0.09, "grad_norm": 4.806107044219971, "learning_rate": 1.9813040628176e-05, "loss": 0.9496, "step": 3504 }, { "epoch": 0.09, "grad_norm": 4.6447529792785645, "learning_rate": 1.9812880821866006e-05, "loss": 0.6495, "step": 3505 }, { "epoch": 0.09, "grad_norm": 3.137087821960449, "learning_rate": 1.9812720947931806e-05, "loss": 0.5387, "step": 3506 }, { "epoch": 0.09, "grad_norm": 1.3567148447036743, "learning_rate": 1.9812561006374496e-05, "loss": 0.5231, "step": 3507 }, { "epoch": 0.09, "grad_norm": 3.4249343872070312, "learning_rate": 1.9812400997195186e-05, "loss": 0.6369, "step": 3508 }, { "epoch": 0.09, "grad_norm": 2.4970574378967285, "learning_rate": 1.9812240920394968e-05, "loss": 0.7726, "step": 3509 }, { "epoch": 0.09, "grad_norm": 2.155930280685425, "learning_rate": 1.9812080775974952e-05, "loss": 0.7207, "step": 3510 }, { "epoch": 0.09, "grad_norm": 1.9198347330093384, "learning_rate": 1.9811920563936238e-05, "loss": 0.6082, "step": 3511 }, { "epoch": 0.09, "grad_norm": 2.7567200660705566, "learning_rate": 1.9811760284279934e-05, "loss": 0.6948, "step": 3512 }, { "epoch": 0.09, "grad_norm": 2.30106520652771, "learning_rate": 1.981159993700714e-05, "loss": 0.8004, "step": 3513 }, { "epoch": 0.09, "grad_norm": 3.003326654434204, "learning_rate": 1.9811439522118967e-05, "loss": 0.6845, "step": 3514 }, { "epoch": 0.09, "grad_norm": 2.1142525672912598, "learning_rate": 1.9811279039616514e-05, "loss": 0.6394, "step": 3515 }, { "epoch": 0.09, "grad_norm": 3.12025785446167, "learning_rate": 1.9811118489500892e-05, "loss": 0.5648, "step": 3516 }, { "epoch": 0.09, "grad_norm": 2.5874264240264893, "learning_rate": 1.9810957871773204e-05, "loss": 0.6277, "step": 3517 }, { "epoch": 0.09, "grad_norm": 2.0816400051116943, "learning_rate": 1.981079718643456e-05, "loss": 0.5741, "step": 3518 }, { "epoch": 0.09, "grad_norm": 1.5976372957229614, "learning_rate": 1.9810636433486063e-05, "loss": 0.7307, "step": 3519 }, { "epoch": 0.09, "grad_norm": 1.71770441532135, "learning_rate": 1.9810475612928824e-05, "loss": 0.7542, "step": 3520 }, { "epoch": 0.09, "grad_norm": 3.7291436195373535, "learning_rate": 1.9810314724763952e-05, "loss": 0.8175, "step": 3521 }, { "epoch": 0.09, "grad_norm": 7.670107364654541, "learning_rate": 1.9810153768992553e-05, "loss": 0.7179, "step": 3522 }, { "epoch": 0.09, "grad_norm": 3.5285043716430664, "learning_rate": 1.9809992745615737e-05, "loss": 0.6371, "step": 3523 }, { "epoch": 0.09, "grad_norm": 2.369152784347534, "learning_rate": 1.9809831654634613e-05, "loss": 0.7872, "step": 3524 }, { "epoch": 0.09, "grad_norm": 1.6569017171859741, "learning_rate": 1.9809670496050296e-05, "loss": 0.6771, "step": 3525 }, { "epoch": 0.09, "grad_norm": 1.5938645601272583, "learning_rate": 1.9809509269863893e-05, "loss": 0.7473, "step": 3526 }, { "epoch": 0.09, "grad_norm": 2.929499387741089, "learning_rate": 1.980934797607651e-05, "loss": 0.7207, "step": 3527 }, { "epoch": 0.09, "grad_norm": 4.333584785461426, "learning_rate": 1.9809186614689267e-05, "loss": 0.7379, "step": 3528 }, { "epoch": 0.09, "grad_norm": 1.578668475151062, "learning_rate": 1.9809025185703278e-05, "loss": 0.7307, "step": 3529 }, { "epoch": 0.09, "grad_norm": 2.8544576168060303, "learning_rate": 1.980886368911964e-05, "loss": 0.6993, "step": 3530 }, { "epoch": 0.09, "grad_norm": 2.5866408348083496, "learning_rate": 1.9808702124939482e-05, "loss": 0.6439, "step": 3531 }, { "epoch": 0.09, "grad_norm": 2.3117287158966064, "learning_rate": 1.980854049316391e-05, "loss": 0.7394, "step": 3532 }, { "epoch": 0.09, "grad_norm": 2.3578803539276123, "learning_rate": 1.980837879379404e-05, "loss": 0.7038, "step": 3533 }, { "epoch": 0.09, "grad_norm": 3.1157305240631104, "learning_rate": 1.9808217026830986e-05, "loss": 0.6806, "step": 3534 }, { "epoch": 0.09, "grad_norm": 6.691228866577148, "learning_rate": 1.980805519227586e-05, "loss": 0.757, "step": 3535 }, { "epoch": 0.09, "grad_norm": 5.204026222229004, "learning_rate": 1.9807893290129782e-05, "loss": 0.6528, "step": 3536 }, { "epoch": 0.09, "grad_norm": 2.733452796936035, "learning_rate": 1.9807731320393863e-05, "loss": 0.6469, "step": 3537 }, { "epoch": 0.09, "grad_norm": 2.4258759021759033, "learning_rate": 1.9807569283069224e-05, "loss": 0.7557, "step": 3538 }, { "epoch": 0.09, "grad_norm": 2.2116754055023193, "learning_rate": 1.9807407178156977e-05, "loss": 0.6678, "step": 3539 }, { "epoch": 0.09, "grad_norm": 2.6585769653320312, "learning_rate": 1.9807245005658243e-05, "loss": 0.7651, "step": 3540 }, { "epoch": 0.09, "grad_norm": 1.9619336128234863, "learning_rate": 1.980708276557414e-05, "loss": 0.6172, "step": 3541 }, { "epoch": 0.09, "grad_norm": 1.9266513586044312, "learning_rate": 1.980692045790578e-05, "loss": 0.5516, "step": 3542 }, { "epoch": 0.09, "grad_norm": 10.116819381713867, "learning_rate": 1.9806758082654288e-05, "loss": 0.9397, "step": 3543 }, { "epoch": 0.09, "grad_norm": 8.461087226867676, "learning_rate": 1.9806595639820778e-05, "loss": 0.7267, "step": 3544 }, { "epoch": 0.09, "grad_norm": 2.4235470294952393, "learning_rate": 1.9806433129406374e-05, "loss": 0.651, "step": 3545 }, { "epoch": 0.09, "grad_norm": 2.4022529125213623, "learning_rate": 1.9806270551412192e-05, "loss": 0.5358, "step": 3546 }, { "epoch": 0.09, "grad_norm": 3.1357977390289307, "learning_rate": 1.9806107905839357e-05, "loss": 0.6587, "step": 3547 }, { "epoch": 0.09, "grad_norm": 3.426973819732666, "learning_rate": 1.9805945192688983e-05, "loss": 0.8469, "step": 3548 }, { "epoch": 0.09, "grad_norm": 1.7149099111557007, "learning_rate": 1.98057824119622e-05, "loss": 0.5974, "step": 3549 }, { "epoch": 0.09, "grad_norm": 3.800971746444702, "learning_rate": 1.9805619563660122e-05, "loss": 0.735, "step": 3550 }, { "epoch": 0.09, "grad_norm": 1.731825828552246, "learning_rate": 1.9805456647783878e-05, "loss": 0.6817, "step": 3551 }, { "epoch": 0.09, "grad_norm": 2.979997396469116, "learning_rate": 1.9805293664334584e-05, "loss": 0.832, "step": 3552 }, { "epoch": 0.09, "grad_norm": 4.1213531494140625, "learning_rate": 1.980513061331337e-05, "loss": 0.7994, "step": 3553 }, { "epoch": 0.09, "grad_norm": 3.6648852825164795, "learning_rate": 1.980496749472135e-05, "loss": 0.6675, "step": 3554 }, { "epoch": 0.09, "grad_norm": 2.162998914718628, "learning_rate": 1.9804804308559657e-05, "loss": 0.4368, "step": 3555 }, { "epoch": 0.09, "grad_norm": 5.128551959991455, "learning_rate": 1.980464105482941e-05, "loss": 0.7368, "step": 3556 }, { "epoch": 0.09, "grad_norm": 3.9627344608306885, "learning_rate": 1.9804477733531746e-05, "loss": 0.7834, "step": 3557 }, { "epoch": 0.09, "grad_norm": 2.991243362426758, "learning_rate": 1.9804314344667773e-05, "loss": 0.7843, "step": 3558 }, { "epoch": 0.09, "grad_norm": 2.240318775177002, "learning_rate": 1.9804150888238626e-05, "loss": 0.6391, "step": 3559 }, { "epoch": 0.09, "grad_norm": 1.6944538354873657, "learning_rate": 1.9803987364245435e-05, "loss": 0.7684, "step": 3560 }, { "epoch": 0.09, "grad_norm": 2.04437518119812, "learning_rate": 1.9803823772689317e-05, "loss": 0.6524, "step": 3561 }, { "epoch": 0.09, "grad_norm": 1.8658866882324219, "learning_rate": 1.980366011357141e-05, "loss": 0.6856, "step": 3562 }, { "epoch": 0.09, "grad_norm": 4.962900638580322, "learning_rate": 1.9803496386892832e-05, "loss": 0.8139, "step": 3563 }, { "epoch": 0.09, "grad_norm": 2.676975727081299, "learning_rate": 1.9803332592654718e-05, "loss": 0.8084, "step": 3564 }, { "epoch": 0.09, "grad_norm": 3.7615833282470703, "learning_rate": 1.9803168730858197e-05, "loss": 0.7213, "step": 3565 }, { "epoch": 0.09, "grad_norm": 2.035322427749634, "learning_rate": 1.9803004801504392e-05, "loss": 0.6345, "step": 3566 }, { "epoch": 0.09, "grad_norm": 3.0605547428131104, "learning_rate": 1.9802840804594437e-05, "loss": 0.6628, "step": 3567 }, { "epoch": 0.09, "grad_norm": 1.4246065616607666, "learning_rate": 1.9802676740129465e-05, "loss": 0.6973, "step": 3568 }, { "epoch": 0.09, "grad_norm": 1.3645861148834229, "learning_rate": 1.9802512608110603e-05, "loss": 0.5821, "step": 3569 }, { "epoch": 0.09, "grad_norm": 2.966585397720337, "learning_rate": 1.980234840853898e-05, "loss": 0.7854, "step": 3570 }, { "epoch": 0.09, "grad_norm": 2.587765693664551, "learning_rate": 1.9802184141415733e-05, "loss": 0.4511, "step": 3571 }, { "epoch": 0.09, "grad_norm": 2.743319511413574, "learning_rate": 1.980201980674199e-05, "loss": 0.7939, "step": 3572 }, { "epoch": 0.09, "grad_norm": 3.059462547302246, "learning_rate": 1.9801855404518884e-05, "loss": 0.7221, "step": 3573 }, { "epoch": 0.09, "grad_norm": 2.6450743675231934, "learning_rate": 1.9801690934747552e-05, "loss": 0.7182, "step": 3574 }, { "epoch": 0.09, "grad_norm": 2.808248996734619, "learning_rate": 1.980152639742912e-05, "loss": 0.8342, "step": 3575 }, { "epoch": 0.09, "grad_norm": 4.8344197273254395, "learning_rate": 1.9801361792564727e-05, "loss": 0.6352, "step": 3576 }, { "epoch": 0.09, "grad_norm": 2.337958812713623, "learning_rate": 1.9801197120155508e-05, "loss": 0.6576, "step": 3577 }, { "epoch": 0.09, "grad_norm": 2.142192840576172, "learning_rate": 1.9801032380202597e-05, "loss": 0.7974, "step": 3578 }, { "epoch": 0.09, "grad_norm": 3.031870126724243, "learning_rate": 1.9800867572707126e-05, "loss": 0.6772, "step": 3579 }, { "epoch": 0.09, "grad_norm": 1.693376898765564, "learning_rate": 1.9800702697670234e-05, "loss": 0.5813, "step": 3580 }, { "epoch": 0.09, "grad_norm": 4.9811530113220215, "learning_rate": 1.9800537755093056e-05, "loss": 0.8045, "step": 3581 }, { "epoch": 0.09, "grad_norm": 1.8887197971343994, "learning_rate": 1.980037274497673e-05, "loss": 0.7068, "step": 3582 }, { "epoch": 0.09, "grad_norm": 2.4646928310394287, "learning_rate": 1.980020766732239e-05, "loss": 0.7599, "step": 3583 }, { "epoch": 0.09, "grad_norm": 3.377668619155884, "learning_rate": 1.9800042522131182e-05, "loss": 0.683, "step": 3584 }, { "epoch": 0.09, "grad_norm": 1.8646751642227173, "learning_rate": 1.9799877309404233e-05, "loss": 0.678, "step": 3585 }, { "epoch": 0.09, "grad_norm": 2.056684970855713, "learning_rate": 1.9799712029142686e-05, "loss": 0.7822, "step": 3586 }, { "epoch": 0.09, "grad_norm": 3.1467418670654297, "learning_rate": 1.9799546681347683e-05, "loss": 0.8749, "step": 3587 }, { "epoch": 0.09, "grad_norm": 1.5564868450164795, "learning_rate": 1.979938126602036e-05, "loss": 0.6832, "step": 3588 }, { "epoch": 0.09, "grad_norm": 1.7471821308135986, "learning_rate": 1.9799215783161855e-05, "loss": 0.6102, "step": 3589 }, { "epoch": 0.09, "grad_norm": 2.684237003326416, "learning_rate": 1.9799050232773312e-05, "loss": 0.5713, "step": 3590 }, { "epoch": 0.09, "grad_norm": 4.352978229522705, "learning_rate": 1.9798884614855874e-05, "loss": 0.6717, "step": 3591 }, { "epoch": 0.09, "grad_norm": 6.225577354431152, "learning_rate": 1.979871892941068e-05, "loss": 0.7404, "step": 3592 }, { "epoch": 0.09, "grad_norm": 2.209972381591797, "learning_rate": 1.979855317643887e-05, "loss": 0.5484, "step": 3593 }, { "epoch": 0.09, "grad_norm": 2.71620774269104, "learning_rate": 1.9798387355941586e-05, "loss": 0.6342, "step": 3594 }, { "epoch": 0.09, "grad_norm": 7.611282825469971, "learning_rate": 1.9798221467919974e-05, "loss": 0.9378, "step": 3595 }, { "epoch": 0.09, "grad_norm": 3.4470009803771973, "learning_rate": 1.9798055512375173e-05, "loss": 0.6984, "step": 3596 }, { "epoch": 0.09, "grad_norm": 2.97340989112854, "learning_rate": 1.9797889489308334e-05, "loss": 0.6899, "step": 3597 }, { "epoch": 0.09, "grad_norm": 5.871802806854248, "learning_rate": 1.9797723398720592e-05, "loss": 0.847, "step": 3598 }, { "epoch": 0.09, "grad_norm": 2.965672492980957, "learning_rate": 1.9797557240613096e-05, "loss": 0.7384, "step": 3599 }, { "epoch": 0.09, "grad_norm": 3.515221357345581, "learning_rate": 1.9797391014986994e-05, "loss": 0.6215, "step": 3600 }, { "epoch": 0.09, "grad_norm": 2.1181275844573975, "learning_rate": 1.9797224721843426e-05, "loss": 0.5967, "step": 3601 }, { "epoch": 0.09, "grad_norm": 1.8138649463653564, "learning_rate": 1.979705836118354e-05, "loss": 0.6749, "step": 3602 }, { "epoch": 0.09, "grad_norm": 3.227959394454956, "learning_rate": 1.9796891933008484e-05, "loss": 0.7141, "step": 3603 }, { "epoch": 0.09, "grad_norm": 1.6604877710342407, "learning_rate": 1.9796725437319406e-05, "loss": 0.7215, "step": 3604 }, { "epoch": 0.09, "grad_norm": 2.289172887802124, "learning_rate": 1.9796558874117447e-05, "loss": 0.5707, "step": 3605 }, { "epoch": 0.09, "grad_norm": 2.839447498321533, "learning_rate": 1.9796392243403764e-05, "loss": 0.6037, "step": 3606 }, { "epoch": 0.09, "grad_norm": 2.0498085021972656, "learning_rate": 1.9796225545179496e-05, "loss": 0.74, "step": 3607 }, { "epoch": 0.09, "grad_norm": 5.689173698425293, "learning_rate": 1.9796058779445797e-05, "loss": 0.758, "step": 3608 }, { "epoch": 0.09, "grad_norm": 2.686577558517456, "learning_rate": 1.9795891946203815e-05, "loss": 0.6767, "step": 3609 }, { "epoch": 0.09, "grad_norm": 3.917630672454834, "learning_rate": 1.97957250454547e-05, "loss": 0.6152, "step": 3610 }, { "epoch": 0.09, "grad_norm": 2.91410231590271, "learning_rate": 1.9795558077199605e-05, "loss": 0.6745, "step": 3611 }, { "epoch": 0.09, "grad_norm": 3.540804386138916, "learning_rate": 1.9795391041439676e-05, "loss": 0.7755, "step": 3612 }, { "epoch": 0.09, "grad_norm": 1.9219746589660645, "learning_rate": 1.9795223938176063e-05, "loss": 0.6141, "step": 3613 }, { "epoch": 0.09, "grad_norm": 2.3849282264709473, "learning_rate": 1.979505676740993e-05, "loss": 0.762, "step": 3614 }, { "epoch": 0.09, "grad_norm": 2.5394511222839355, "learning_rate": 1.979488952914241e-05, "loss": 0.7507, "step": 3615 }, { "epoch": 0.09, "grad_norm": 2.2473411560058594, "learning_rate": 1.9794722223374666e-05, "loss": 0.6008, "step": 3616 }, { "epoch": 0.09, "grad_norm": 2.034081220626831, "learning_rate": 1.9794554850107856e-05, "loss": 0.5782, "step": 3617 }, { "epoch": 0.09, "grad_norm": 4.167763710021973, "learning_rate": 1.9794387409343125e-05, "loss": 0.5687, "step": 3618 }, { "epoch": 0.09, "grad_norm": 6.491054058074951, "learning_rate": 1.9794219901081625e-05, "loss": 0.6329, "step": 3619 }, { "epoch": 0.09, "grad_norm": 1.5472617149353027, "learning_rate": 1.979405232532452e-05, "loss": 0.7173, "step": 3620 }, { "epoch": 0.09, "grad_norm": 3.9513909816741943, "learning_rate": 1.979388468207296e-05, "loss": 0.6675, "step": 3621 }, { "epoch": 0.09, "grad_norm": 3.3635356426239014, "learning_rate": 1.9793716971328094e-05, "loss": 0.6977, "step": 3622 }, { "epoch": 0.09, "grad_norm": 2.460665702819824, "learning_rate": 1.9793549193091088e-05, "loss": 0.6149, "step": 3623 }, { "epoch": 0.09, "grad_norm": 6.760932445526123, "learning_rate": 1.9793381347363093e-05, "loss": 0.9621, "step": 3624 }, { "epoch": 0.09, "grad_norm": 2.4022178649902344, "learning_rate": 1.9793213434145268e-05, "loss": 0.6349, "step": 3625 }, { "epoch": 0.09, "grad_norm": 2.0508577823638916, "learning_rate": 1.9793045453438768e-05, "loss": 0.5635, "step": 3626 }, { "epoch": 0.09, "grad_norm": 3.1025164127349854, "learning_rate": 1.9792877405244752e-05, "loss": 0.6524, "step": 3627 }, { "epoch": 0.09, "grad_norm": 3.2485413551330566, "learning_rate": 1.9792709289564375e-05, "loss": 0.6355, "step": 3628 }, { "epoch": 0.09, "grad_norm": 2.8424594402313232, "learning_rate": 1.9792541106398796e-05, "loss": 0.6242, "step": 3629 }, { "epoch": 0.09, "grad_norm": 2.039126396179199, "learning_rate": 1.979237285574918e-05, "loss": 0.5537, "step": 3630 }, { "epoch": 0.09, "grad_norm": 2.7812845706939697, "learning_rate": 1.9792204537616685e-05, "loss": 0.7694, "step": 3631 }, { "epoch": 0.09, "grad_norm": 4.2711920738220215, "learning_rate": 1.9792036152002466e-05, "loss": 0.6268, "step": 3632 }, { "epoch": 0.09, "grad_norm": 1.8943814039230347, "learning_rate": 1.979186769890768e-05, "loss": 0.7785, "step": 3633 }, { "epoch": 0.09, "grad_norm": 2.716374635696411, "learning_rate": 1.97916991783335e-05, "loss": 0.6929, "step": 3634 }, { "epoch": 0.09, "grad_norm": 4.912908554077148, "learning_rate": 1.979153059028108e-05, "loss": 0.7211, "step": 3635 }, { "epoch": 0.09, "grad_norm": 3.2588343620300293, "learning_rate": 1.9791361934751583e-05, "loss": 0.7847, "step": 3636 }, { "epoch": 0.09, "grad_norm": 2.564096212387085, "learning_rate": 1.979119321174617e-05, "loss": 0.585, "step": 3637 }, { "epoch": 0.09, "grad_norm": 2.2500784397125244, "learning_rate": 1.9791024421266005e-05, "loss": 0.6671, "step": 3638 }, { "epoch": 0.09, "grad_norm": 2.9561402797698975, "learning_rate": 1.9790855563312252e-05, "loss": 0.6064, "step": 3639 }, { "epoch": 0.09, "grad_norm": 1.6248421669006348, "learning_rate": 1.979068663788607e-05, "loss": 0.5754, "step": 3640 }, { "epoch": 0.09, "grad_norm": 2.2400474548339844, "learning_rate": 1.9790517644988633e-05, "loss": 0.5947, "step": 3641 }, { "epoch": 0.09, "grad_norm": 3.1445891857147217, "learning_rate": 1.9790348584621094e-05, "loss": 0.7509, "step": 3642 }, { "epoch": 0.09, "grad_norm": 8.608214378356934, "learning_rate": 1.9790179456784626e-05, "loss": 0.639, "step": 3643 }, { "epoch": 0.09, "grad_norm": 3.5967183113098145, "learning_rate": 1.979001026148039e-05, "loss": 0.7891, "step": 3644 }, { "epoch": 0.09, "grad_norm": 2.3173813819885254, "learning_rate": 1.9789840998709556e-05, "loss": 0.6961, "step": 3645 }, { "epoch": 0.09, "grad_norm": 1.8242279291152954, "learning_rate": 1.9789671668473288e-05, "loss": 0.7596, "step": 3646 }, { "epoch": 0.09, "grad_norm": 1.8471242189407349, "learning_rate": 1.9789502270772757e-05, "loss": 0.7336, "step": 3647 }, { "epoch": 0.09, "grad_norm": 1.9194815158843994, "learning_rate": 1.978933280560912e-05, "loss": 0.8457, "step": 3648 }, { "epoch": 0.09, "grad_norm": 4.454719543457031, "learning_rate": 1.9789163272983556e-05, "loss": 0.6294, "step": 3649 }, { "epoch": 0.09, "grad_norm": 4.900404930114746, "learning_rate": 1.9788993672897224e-05, "loss": 0.8643, "step": 3650 }, { "epoch": 0.09, "grad_norm": 4.424870014190674, "learning_rate": 1.9788824005351305e-05, "loss": 0.5718, "step": 3651 }, { "epoch": 0.09, "grad_norm": 2.3639204502105713, "learning_rate": 1.9788654270346957e-05, "loss": 0.7408, "step": 3652 }, { "epoch": 0.09, "grad_norm": 5.773767948150635, "learning_rate": 1.9788484467885355e-05, "loss": 0.9117, "step": 3653 }, { "epoch": 0.09, "grad_norm": 17.08587074279785, "learning_rate": 1.9788314597967663e-05, "loss": 0.7424, "step": 3654 }, { "epoch": 0.09, "grad_norm": 2.116546630859375, "learning_rate": 1.978814466059506e-05, "loss": 0.6606, "step": 3655 }, { "epoch": 0.09, "grad_norm": 2.1134567260742188, "learning_rate": 1.9787974655768713e-05, "loss": 0.651, "step": 3656 }, { "epoch": 0.09, "grad_norm": 2.3812193870544434, "learning_rate": 1.9787804583489794e-05, "loss": 0.8849, "step": 3657 }, { "epoch": 0.09, "grad_norm": 2.0675010681152344, "learning_rate": 1.9787634443759477e-05, "loss": 0.6515, "step": 3658 }, { "epoch": 0.09, "grad_norm": 1.9211548566818237, "learning_rate": 1.978746423657893e-05, "loss": 0.5744, "step": 3659 }, { "epoch": 0.09, "grad_norm": 3.5195367336273193, "learning_rate": 1.978729396194933e-05, "loss": 0.8085, "step": 3660 }, { "epoch": 0.09, "grad_norm": 2.0913901329040527, "learning_rate": 1.978712361987185e-05, "loss": 0.4822, "step": 3661 }, { "epoch": 0.09, "grad_norm": 6.303021430969238, "learning_rate": 1.9786953210347663e-05, "loss": 0.832, "step": 3662 }, { "epoch": 0.09, "grad_norm": 3.5976877212524414, "learning_rate": 1.9786782733377937e-05, "loss": 0.8329, "step": 3663 }, { "epoch": 0.09, "grad_norm": 4.264325141906738, "learning_rate": 1.978661218896386e-05, "loss": 0.7171, "step": 3664 }, { "epoch": 0.09, "grad_norm": 7.451259613037109, "learning_rate": 1.97864415771066e-05, "loss": 0.6848, "step": 3665 }, { "epoch": 0.09, "grad_norm": 4.5677900314331055, "learning_rate": 1.978627089780733e-05, "loss": 0.7171, "step": 3666 }, { "epoch": 0.09, "grad_norm": 4.124577045440674, "learning_rate": 1.978610015106723e-05, "loss": 0.7714, "step": 3667 }, { "epoch": 0.09, "grad_norm": 2.3892266750335693, "learning_rate": 1.9785929336887476e-05, "loss": 0.7002, "step": 3668 }, { "epoch": 0.09, "grad_norm": 7.881112575531006, "learning_rate": 1.9785758455269246e-05, "loss": 0.9394, "step": 3669 }, { "epoch": 0.09, "grad_norm": 2.5173349380493164, "learning_rate": 1.9785587506213717e-05, "loss": 0.7101, "step": 3670 }, { "epoch": 0.09, "grad_norm": 2.69927716255188, "learning_rate": 1.9785416489722065e-05, "loss": 0.8829, "step": 3671 }, { "epoch": 0.09, "grad_norm": 3.75071382522583, "learning_rate": 1.978524540579547e-05, "loss": 0.7186, "step": 3672 }, { "epoch": 0.09, "grad_norm": 4.746037006378174, "learning_rate": 1.9785074254435118e-05, "loss": 0.6241, "step": 3673 }, { "epoch": 0.09, "grad_norm": 3.482137680053711, "learning_rate": 1.9784903035642173e-05, "loss": 0.6868, "step": 3674 }, { "epoch": 0.09, "grad_norm": 2.7367851734161377, "learning_rate": 1.978473174941783e-05, "loss": 0.6878, "step": 3675 }, { "epoch": 0.09, "grad_norm": 4.925342559814453, "learning_rate": 1.978456039576326e-05, "loss": 0.9826, "step": 3676 }, { "epoch": 0.09, "grad_norm": 5.119863510131836, "learning_rate": 1.9784388974679647e-05, "loss": 0.6484, "step": 3677 }, { "epoch": 0.09, "grad_norm": 2.586134433746338, "learning_rate": 1.9784217486168173e-05, "loss": 0.5788, "step": 3678 }, { "epoch": 0.09, "grad_norm": 2.1083028316497803, "learning_rate": 1.978404593023002e-05, "loss": 0.7127, "step": 3679 }, { "epoch": 0.09, "grad_norm": 2.3109874725341797, "learning_rate": 1.978387430686637e-05, "loss": 0.7393, "step": 3680 }, { "epoch": 0.09, "grad_norm": 1.5786620378494263, "learning_rate": 1.9783702616078403e-05, "loss": 0.5401, "step": 3681 }, { "epoch": 0.09, "grad_norm": 2.0369858741760254, "learning_rate": 1.9783530857867305e-05, "loss": 0.6734, "step": 3682 }, { "epoch": 0.09, "grad_norm": 1.3977221250534058, "learning_rate": 1.9783359032234258e-05, "loss": 0.6114, "step": 3683 }, { "epoch": 0.09, "grad_norm": 2.5097732543945312, "learning_rate": 1.9783187139180447e-05, "loss": 0.777, "step": 3684 }, { "epoch": 0.09, "grad_norm": 2.0024821758270264, "learning_rate": 1.9783015178707056e-05, "loss": 0.7975, "step": 3685 }, { "epoch": 0.09, "grad_norm": 2.714777708053589, "learning_rate": 1.9782843150815277e-05, "loss": 0.7209, "step": 3686 }, { "epoch": 0.09, "grad_norm": 2.6281168460845947, "learning_rate": 1.9782671055506283e-05, "loss": 0.6424, "step": 3687 }, { "epoch": 0.09, "grad_norm": 1.7702592611312866, "learning_rate": 1.9782498892781268e-05, "loss": 0.7273, "step": 3688 }, { "epoch": 0.09, "grad_norm": 1.6002368927001953, "learning_rate": 1.9782326662641416e-05, "loss": 0.6337, "step": 3689 }, { "epoch": 0.09, "grad_norm": 2.7358744144439697, "learning_rate": 1.9782154365087913e-05, "loss": 0.6304, "step": 3690 }, { "epoch": 0.09, "grad_norm": 2.6613094806671143, "learning_rate": 1.978198200012195e-05, "loss": 0.7601, "step": 3691 }, { "epoch": 0.09, "grad_norm": 2.675396680831909, "learning_rate": 1.9781809567744713e-05, "loss": 0.8104, "step": 3692 }, { "epoch": 0.09, "grad_norm": 2.2691102027893066, "learning_rate": 1.9781637067957387e-05, "loss": 0.6825, "step": 3693 }, { "epoch": 0.09, "grad_norm": 3.630368709564209, "learning_rate": 1.9781464500761167e-05, "loss": 0.6406, "step": 3694 }, { "epoch": 0.09, "grad_norm": 1.3653287887573242, "learning_rate": 1.978129186615724e-05, "loss": 0.4645, "step": 3695 }, { "epoch": 0.09, "grad_norm": 1.755251169204712, "learning_rate": 1.978111916414679e-05, "loss": 0.7589, "step": 3696 }, { "epoch": 0.09, "grad_norm": 1.9695838689804077, "learning_rate": 1.978094639473102e-05, "loss": 0.603, "step": 3697 }, { "epoch": 0.09, "grad_norm": 1.9988163709640503, "learning_rate": 1.9780773557911103e-05, "loss": 0.6835, "step": 3698 }, { "epoch": 0.09, "grad_norm": 1.9464234113693237, "learning_rate": 1.9780600653688243e-05, "loss": 0.6004, "step": 3699 }, { "epoch": 0.09, "grad_norm": 4.606352806091309, "learning_rate": 1.978042768206363e-05, "loss": 0.7525, "step": 3700 }, { "epoch": 0.09, "grad_norm": 1.7550139427185059, "learning_rate": 1.9780254643038454e-05, "loss": 0.5747, "step": 3701 }, { "epoch": 0.09, "grad_norm": 2.322150945663452, "learning_rate": 1.9780081536613904e-05, "loss": 0.7718, "step": 3702 }, { "epoch": 0.09, "grad_norm": 4.212844371795654, "learning_rate": 1.977990836279118e-05, "loss": 0.7976, "step": 3703 }, { "epoch": 0.09, "grad_norm": 5.075280666351318, "learning_rate": 1.977973512157147e-05, "loss": 0.7756, "step": 3704 }, { "epoch": 0.09, "grad_norm": 2.5154523849487305, "learning_rate": 1.977956181295597e-05, "loss": 0.7756, "step": 3705 }, { "epoch": 0.09, "grad_norm": 1.5252504348754883, "learning_rate": 1.9779388436945876e-05, "loss": 0.6349, "step": 3706 }, { "epoch": 0.1, "grad_norm": 4.387324810028076, "learning_rate": 1.977921499354238e-05, "loss": 0.7661, "step": 3707 }, { "epoch": 0.1, "grad_norm": 1.684133529663086, "learning_rate": 1.9779041482746676e-05, "loss": 0.4839, "step": 3708 }, { "epoch": 0.1, "grad_norm": 14.341387748718262, "learning_rate": 1.9778867904559963e-05, "loss": 0.474, "step": 3709 }, { "epoch": 0.1, "grad_norm": 3.300868272781372, "learning_rate": 1.9778694258983435e-05, "loss": 0.7243, "step": 3710 }, { "epoch": 0.1, "grad_norm": 3.5044329166412354, "learning_rate": 1.9778520546018294e-05, "loss": 0.8634, "step": 3711 }, { "epoch": 0.1, "grad_norm": 4.437572002410889, "learning_rate": 1.9778346765665728e-05, "loss": 0.9471, "step": 3712 }, { "epoch": 0.1, "grad_norm": 2.626920223236084, "learning_rate": 1.9778172917926942e-05, "loss": 0.8196, "step": 3713 }, { "epoch": 0.1, "grad_norm": 6.553318500518799, "learning_rate": 1.9777999002803134e-05, "loss": 0.7257, "step": 3714 }, { "epoch": 0.1, "grad_norm": 5.566270351409912, "learning_rate": 1.9777825020295496e-05, "loss": 0.6769, "step": 3715 }, { "epoch": 0.1, "grad_norm": 2.5761091709136963, "learning_rate": 1.9777650970405233e-05, "loss": 0.6442, "step": 3716 }, { "epoch": 0.1, "grad_norm": 1.4395278692245483, "learning_rate": 1.977747685313354e-05, "loss": 0.5733, "step": 3717 }, { "epoch": 0.1, "grad_norm": 3.2615575790405273, "learning_rate": 1.977730266848162e-05, "loss": 0.5716, "step": 3718 }, { "epoch": 0.1, "grad_norm": 1.7068283557891846, "learning_rate": 1.9777128416450675e-05, "loss": 0.6128, "step": 3719 }, { "epoch": 0.1, "grad_norm": 3.0385146141052246, "learning_rate": 1.97769540970419e-05, "loss": 0.7642, "step": 3720 }, { "epoch": 0.1, "grad_norm": 10.2214994430542, "learning_rate": 1.9776779710256502e-05, "loss": 0.6413, "step": 3721 }, { "epoch": 0.1, "grad_norm": 3.3121840953826904, "learning_rate": 1.9776605256095682e-05, "loss": 0.8015, "step": 3722 }, { "epoch": 0.1, "grad_norm": 3.9510679244995117, "learning_rate": 1.9776430734560638e-05, "loss": 0.8674, "step": 3723 }, { "epoch": 0.1, "grad_norm": 1.6112561225891113, "learning_rate": 1.9776256145652576e-05, "loss": 0.6612, "step": 3724 }, { "epoch": 0.1, "grad_norm": 2.6107630729675293, "learning_rate": 1.9776081489372702e-05, "loss": 0.6895, "step": 3725 }, { "epoch": 0.1, "grad_norm": 4.193789005279541, "learning_rate": 1.9775906765722214e-05, "loss": 0.8459, "step": 3726 }, { "epoch": 0.1, "grad_norm": 2.0884549617767334, "learning_rate": 1.977573197470232e-05, "loss": 0.593, "step": 3727 }, { "epoch": 0.1, "grad_norm": 2.150440216064453, "learning_rate": 1.977555711631422e-05, "loss": 0.7962, "step": 3728 }, { "epoch": 0.1, "grad_norm": 1.9481768608093262, "learning_rate": 1.977538219055912e-05, "loss": 0.6966, "step": 3729 }, { "epoch": 0.1, "grad_norm": 3.1002228260040283, "learning_rate": 1.9775207197438233e-05, "loss": 0.7851, "step": 3730 }, { "epoch": 0.1, "grad_norm": 3.875974416732788, "learning_rate": 1.977503213695276e-05, "loss": 0.748, "step": 3731 }, { "epoch": 0.1, "grad_norm": 2.327380657196045, "learning_rate": 1.9774857009103902e-05, "loss": 0.7449, "step": 3732 }, { "epoch": 0.1, "grad_norm": 3.182288646697998, "learning_rate": 1.977468181389287e-05, "loss": 0.7892, "step": 3733 }, { "epoch": 0.1, "grad_norm": 6.7776408195495605, "learning_rate": 1.9774506551320875e-05, "loss": 0.8645, "step": 3734 }, { "epoch": 0.1, "grad_norm": 1.514453411102295, "learning_rate": 1.9774331221389123e-05, "loss": 0.5639, "step": 3735 }, { "epoch": 0.1, "grad_norm": 4.260493755340576, "learning_rate": 1.977415582409882e-05, "loss": 0.8822, "step": 3736 }, { "epoch": 0.1, "grad_norm": 5.0987348556518555, "learning_rate": 1.9773980359451173e-05, "loss": 0.6051, "step": 3737 }, { "epoch": 0.1, "grad_norm": 2.558436870574951, "learning_rate": 1.97738048274474e-05, "loss": 0.6129, "step": 3738 }, { "epoch": 0.1, "grad_norm": 3.774620771408081, "learning_rate": 1.97736292280887e-05, "loss": 0.641, "step": 3739 }, { "epoch": 0.1, "grad_norm": 2.4504029750823975, "learning_rate": 1.9773453561376284e-05, "loss": 0.5902, "step": 3740 }, { "epoch": 0.1, "grad_norm": 1.9352848529815674, "learning_rate": 1.9773277827311374e-05, "loss": 0.8002, "step": 3741 }, { "epoch": 0.1, "grad_norm": 7.893134117126465, "learning_rate": 1.977310202589517e-05, "loss": 0.7581, "step": 3742 }, { "epoch": 0.1, "grad_norm": 2.5748867988586426, "learning_rate": 1.9772926157128887e-05, "loss": 0.7249, "step": 3743 }, { "epoch": 0.1, "grad_norm": 2.805023193359375, "learning_rate": 1.977275022101374e-05, "loss": 0.7572, "step": 3744 }, { "epoch": 0.1, "grad_norm": 3.077838182449341, "learning_rate": 1.9772574217550932e-05, "loss": 0.73, "step": 3745 }, { "epoch": 0.1, "grad_norm": 1.7854974269866943, "learning_rate": 1.9772398146741687e-05, "loss": 0.8133, "step": 3746 }, { "epoch": 0.1, "grad_norm": 2.384275197982788, "learning_rate": 1.977222200858721e-05, "loss": 0.6412, "step": 3747 }, { "epoch": 0.1, "grad_norm": 1.918235182762146, "learning_rate": 1.977204580308872e-05, "loss": 0.5909, "step": 3748 }, { "epoch": 0.1, "grad_norm": 4.19413423538208, "learning_rate": 1.9771869530247434e-05, "loss": 0.6717, "step": 3749 }, { "epoch": 0.1, "grad_norm": 3.1880083084106445, "learning_rate": 1.9771693190064557e-05, "loss": 0.7723, "step": 3750 }, { "epoch": 0.1, "grad_norm": 2.4977779388427734, "learning_rate": 1.9771516782541315e-05, "loss": 0.7494, "step": 3751 }, { "epoch": 0.1, "grad_norm": 6.0619425773620605, "learning_rate": 1.9771340307678912e-05, "loss": 0.7242, "step": 3752 }, { "epoch": 0.1, "grad_norm": 1.6969496011734009, "learning_rate": 1.9771163765478572e-05, "loss": 0.6436, "step": 3753 }, { "epoch": 0.1, "grad_norm": 1.7010798454284668, "learning_rate": 1.9770987155941515e-05, "loss": 0.5811, "step": 3754 }, { "epoch": 0.1, "grad_norm": 1.112704873085022, "learning_rate": 1.9770810479068948e-05, "loss": 0.678, "step": 3755 }, { "epoch": 0.1, "grad_norm": 2.9593541622161865, "learning_rate": 1.9770633734862096e-05, "loss": 0.8531, "step": 3756 }, { "epoch": 0.1, "grad_norm": 1.4298908710479736, "learning_rate": 1.9770456923322175e-05, "loss": 0.7696, "step": 3757 }, { "epoch": 0.1, "grad_norm": 4.116416931152344, "learning_rate": 1.9770280044450403e-05, "loss": 0.71, "step": 3758 }, { "epoch": 0.1, "grad_norm": 2.1184682846069336, "learning_rate": 1.9770103098247996e-05, "loss": 0.7658, "step": 3759 }, { "epoch": 0.1, "grad_norm": 3.657360315322876, "learning_rate": 1.9769926084716183e-05, "loss": 0.7995, "step": 3760 }, { "epoch": 0.1, "grad_norm": 2.765277624130249, "learning_rate": 1.9769749003856174e-05, "loss": 0.7455, "step": 3761 }, { "epoch": 0.1, "grad_norm": 2.425952434539795, "learning_rate": 1.9769571855669192e-05, "loss": 0.6087, "step": 3762 }, { "epoch": 0.1, "grad_norm": 2.359302282333374, "learning_rate": 1.9769394640156457e-05, "loss": 0.7479, "step": 3763 }, { "epoch": 0.1, "grad_norm": 3.603044033050537, "learning_rate": 1.97692173573192e-05, "loss": 0.7791, "step": 3764 }, { "epoch": 0.1, "grad_norm": 3.452112913131714, "learning_rate": 1.9769040007158627e-05, "loss": 0.7775, "step": 3765 }, { "epoch": 0.1, "grad_norm": 3.4754316806793213, "learning_rate": 1.976886258967597e-05, "loss": 0.6097, "step": 3766 }, { "epoch": 0.1, "grad_norm": 2.1308443546295166, "learning_rate": 1.976868510487245e-05, "loss": 0.6764, "step": 3767 }, { "epoch": 0.1, "grad_norm": 3.520550012588501, "learning_rate": 1.9768507552749288e-05, "loss": 0.7031, "step": 3768 }, { "epoch": 0.1, "grad_norm": 1.6719006299972534, "learning_rate": 1.976832993330771e-05, "loss": 0.5341, "step": 3769 }, { "epoch": 0.1, "grad_norm": 1.9528754949569702, "learning_rate": 1.9768152246548936e-05, "loss": 0.6668, "step": 3770 }, { "epoch": 0.1, "grad_norm": 2.19242262840271, "learning_rate": 1.9767974492474202e-05, "loss": 0.5523, "step": 3771 }, { "epoch": 0.1, "grad_norm": 2.0839078426361084, "learning_rate": 1.976779667108472e-05, "loss": 0.8183, "step": 3772 }, { "epoch": 0.1, "grad_norm": 2.411297559738159, "learning_rate": 1.9767618782381717e-05, "loss": 0.7234, "step": 3773 }, { "epoch": 0.1, "grad_norm": 2.4524152278900146, "learning_rate": 1.9767440826366423e-05, "loss": 0.7719, "step": 3774 }, { "epoch": 0.1, "grad_norm": 3.3538143634796143, "learning_rate": 1.9767262803040068e-05, "loss": 0.7353, "step": 3775 }, { "epoch": 0.1, "grad_norm": 5.595300674438477, "learning_rate": 1.976708471240387e-05, "loss": 0.7656, "step": 3776 }, { "epoch": 0.1, "grad_norm": 1.7122774124145508, "learning_rate": 1.976690655445906e-05, "loss": 0.6676, "step": 3777 }, { "epoch": 0.1, "grad_norm": 3.80176043510437, "learning_rate": 1.9766728329206868e-05, "loss": 0.947, "step": 3778 }, { "epoch": 0.1, "grad_norm": 4.720219612121582, "learning_rate": 1.976655003664852e-05, "loss": 0.6467, "step": 3779 }, { "epoch": 0.1, "grad_norm": 3.5145065784454346, "learning_rate": 1.9766371676785246e-05, "loss": 0.6855, "step": 3780 }, { "epoch": 0.1, "grad_norm": 2.059924364089966, "learning_rate": 1.9766193249618273e-05, "loss": 0.6533, "step": 3781 }, { "epoch": 0.1, "grad_norm": 3.3720126152038574, "learning_rate": 1.976601475514883e-05, "loss": 0.7042, "step": 3782 }, { "epoch": 0.1, "grad_norm": 1.795085072517395, "learning_rate": 1.9765836193378152e-05, "loss": 0.6139, "step": 3783 }, { "epoch": 0.1, "grad_norm": 4.334336757659912, "learning_rate": 1.9765657564307466e-05, "loss": 0.5352, "step": 3784 }, { "epoch": 0.1, "grad_norm": 2.4708385467529297, "learning_rate": 1.9765478867938004e-05, "loss": 0.6984, "step": 3785 }, { "epoch": 0.1, "grad_norm": 3.1381285190582275, "learning_rate": 1.9765300104270998e-05, "loss": 0.7023, "step": 3786 }, { "epoch": 0.1, "grad_norm": 4.879907608032227, "learning_rate": 1.9765121273307675e-05, "loss": 0.5355, "step": 3787 }, { "epoch": 0.1, "grad_norm": 4.009403228759766, "learning_rate": 1.976494237504927e-05, "loss": 0.7341, "step": 3788 }, { "epoch": 0.1, "grad_norm": 3.314135789871216, "learning_rate": 1.9764763409497024e-05, "loss": 0.8298, "step": 3789 }, { "epoch": 0.1, "grad_norm": 1.5808477401733398, "learning_rate": 1.976458437665216e-05, "loss": 0.4922, "step": 3790 }, { "epoch": 0.1, "grad_norm": 2.269423246383667, "learning_rate": 1.9764405276515912e-05, "loss": 0.8178, "step": 3791 }, { "epoch": 0.1, "grad_norm": 2.350625514984131, "learning_rate": 1.9764226109089523e-05, "loss": 0.762, "step": 3792 }, { "epoch": 0.1, "grad_norm": 3.510725259780884, "learning_rate": 1.9764046874374217e-05, "loss": 0.8557, "step": 3793 }, { "epoch": 0.1, "grad_norm": 5.5189619064331055, "learning_rate": 1.9763867572371237e-05, "loss": 0.6402, "step": 3794 }, { "epoch": 0.1, "grad_norm": 4.130826473236084, "learning_rate": 1.9763688203081816e-05, "loss": 0.8646, "step": 3795 }, { "epoch": 0.1, "grad_norm": 2.1878745555877686, "learning_rate": 1.976350876650719e-05, "loss": 0.717, "step": 3796 }, { "epoch": 0.1, "grad_norm": 1.4574503898620605, "learning_rate": 1.9763329262648593e-05, "loss": 0.5932, "step": 3797 }, { "epoch": 0.1, "grad_norm": 2.921914577484131, "learning_rate": 1.9763149691507267e-05, "loss": 0.8715, "step": 3798 }, { "epoch": 0.1, "grad_norm": 1.7809659242630005, "learning_rate": 1.976297005308445e-05, "loss": 0.8124, "step": 3799 }, { "epoch": 0.1, "grad_norm": 3.049260139465332, "learning_rate": 1.9762790347381374e-05, "loss": 0.7614, "step": 3800 }, { "epoch": 0.1, "grad_norm": 1.3269264698028564, "learning_rate": 1.9762610574399283e-05, "loss": 0.5524, "step": 3801 }, { "epoch": 0.1, "grad_norm": 2.181140661239624, "learning_rate": 1.9762430734139408e-05, "loss": 0.7152, "step": 3802 }, { "epoch": 0.1, "grad_norm": 2.8198564052581787, "learning_rate": 1.9762250826602996e-05, "loss": 0.6637, "step": 3803 }, { "epoch": 0.1, "grad_norm": 5.880499839782715, "learning_rate": 1.9762070851791287e-05, "loss": 0.861, "step": 3804 }, { "epoch": 0.1, "grad_norm": 1.3666554689407349, "learning_rate": 1.9761890809705514e-05, "loss": 0.679, "step": 3805 }, { "epoch": 0.1, "grad_norm": 3.448847770690918, "learning_rate": 1.9761710700346927e-05, "loss": 0.8022, "step": 3806 }, { "epoch": 0.1, "grad_norm": 2.032137870788574, "learning_rate": 1.9761530523716764e-05, "loss": 0.6006, "step": 3807 }, { "epoch": 0.1, "grad_norm": 2.77824330329895, "learning_rate": 1.9761350279816264e-05, "loss": 0.8753, "step": 3808 }, { "epoch": 0.1, "grad_norm": 2.387544631958008, "learning_rate": 1.9761169968646667e-05, "loss": 0.7955, "step": 3809 }, { "epoch": 0.1, "grad_norm": 2.7971627712249756, "learning_rate": 1.9760989590209223e-05, "loss": 0.7488, "step": 3810 }, { "epoch": 0.1, "grad_norm": 1.7405281066894531, "learning_rate": 1.976080914450517e-05, "loss": 0.8008, "step": 3811 }, { "epoch": 0.1, "grad_norm": 1.8202518224716187, "learning_rate": 1.9760628631535753e-05, "loss": 0.6013, "step": 3812 }, { "epoch": 0.1, "grad_norm": 2.142772912979126, "learning_rate": 1.9760448051302213e-05, "loss": 0.8944, "step": 3813 }, { "epoch": 0.1, "grad_norm": 1.9446707963943481, "learning_rate": 1.97602674038058e-05, "loss": 0.7861, "step": 3814 }, { "epoch": 0.1, "grad_norm": 3.9709153175354004, "learning_rate": 1.9760086689047756e-05, "loss": 0.5447, "step": 3815 }, { "epoch": 0.1, "grad_norm": 2.4079554080963135, "learning_rate": 1.9759905907029325e-05, "loss": 0.7685, "step": 3816 }, { "epoch": 0.1, "grad_norm": 2.8411688804626465, "learning_rate": 1.9759725057751755e-05, "loss": 0.5713, "step": 3817 }, { "epoch": 0.1, "grad_norm": 2.608982801437378, "learning_rate": 1.9759544141216293e-05, "loss": 0.8611, "step": 3818 }, { "epoch": 0.1, "grad_norm": 4.637069225311279, "learning_rate": 1.975936315742418e-05, "loss": 0.6756, "step": 3819 }, { "epoch": 0.1, "grad_norm": 3.309748649597168, "learning_rate": 1.9759182106376672e-05, "loss": 0.5142, "step": 3820 }, { "epoch": 0.1, "grad_norm": 1.5848225355148315, "learning_rate": 1.9759000988075014e-05, "loss": 0.6567, "step": 3821 }, { "epoch": 0.1, "grad_norm": 3.4308934211730957, "learning_rate": 1.9758819802520444e-05, "loss": 0.8045, "step": 3822 }, { "epoch": 0.1, "grad_norm": 4.851306438446045, "learning_rate": 1.9758638549714227e-05, "loss": 0.7703, "step": 3823 }, { "epoch": 0.1, "grad_norm": 3.1525607109069824, "learning_rate": 1.9758457229657598e-05, "loss": 0.5457, "step": 3824 }, { "epoch": 0.1, "grad_norm": 2.0883092880249023, "learning_rate": 1.9758275842351818e-05, "loss": 0.5894, "step": 3825 }, { "epoch": 0.1, "grad_norm": 4.893929958343506, "learning_rate": 1.975809438779813e-05, "loss": 0.716, "step": 3826 }, { "epoch": 0.1, "grad_norm": 3.3251876831054688, "learning_rate": 1.9757912865997787e-05, "loss": 0.8015, "step": 3827 }, { "epoch": 0.1, "grad_norm": 4.133260726928711, "learning_rate": 1.9757731276952035e-05, "loss": 0.6275, "step": 3828 }, { "epoch": 0.1, "grad_norm": 1.759742259979248, "learning_rate": 1.9757549620662134e-05, "loss": 0.6629, "step": 3829 }, { "epoch": 0.1, "grad_norm": 3.802257776260376, "learning_rate": 1.975736789712933e-05, "loss": 0.8889, "step": 3830 }, { "epoch": 0.1, "grad_norm": 2.344365358352661, "learning_rate": 1.9757186106354876e-05, "loss": 0.731, "step": 3831 }, { "epoch": 0.1, "grad_norm": 4.738372802734375, "learning_rate": 1.9757004248340026e-05, "loss": 0.7167, "step": 3832 }, { "epoch": 0.1, "grad_norm": 2.327578067779541, "learning_rate": 1.975682232308603e-05, "loss": 0.6324, "step": 3833 }, { "epoch": 0.1, "grad_norm": 2.8867380619049072, "learning_rate": 1.9756640330594146e-05, "loss": 0.5444, "step": 3834 }, { "epoch": 0.1, "grad_norm": 1.7627451419830322, "learning_rate": 1.975645827086563e-05, "loss": 0.507, "step": 3835 }, { "epoch": 0.1, "grad_norm": 2.2396788597106934, "learning_rate": 1.975627614390173e-05, "loss": 0.6619, "step": 3836 }, { "epoch": 0.1, "grad_norm": 2.1138808727264404, "learning_rate": 1.9756093949703704e-05, "loss": 0.5157, "step": 3837 }, { "epoch": 0.1, "grad_norm": 2.048713207244873, "learning_rate": 1.9755911688272808e-05, "loss": 0.7508, "step": 3838 }, { "epoch": 0.1, "grad_norm": 2.1210873126983643, "learning_rate": 1.9755729359610298e-05, "loss": 0.7467, "step": 3839 }, { "epoch": 0.1, "grad_norm": 7.910731792449951, "learning_rate": 1.9755546963717435e-05, "loss": 0.6765, "step": 3840 }, { "epoch": 0.1, "grad_norm": 3.374452590942383, "learning_rate": 1.9755364500595464e-05, "loss": 0.6735, "step": 3841 }, { "epoch": 0.1, "grad_norm": 2.84521222114563, "learning_rate": 1.9755181970245655e-05, "loss": 0.649, "step": 3842 }, { "epoch": 0.1, "grad_norm": 1.9288859367370605, "learning_rate": 1.9754999372669256e-05, "loss": 0.6072, "step": 3843 }, { "epoch": 0.1, "grad_norm": 3.119913339614868, "learning_rate": 1.975481670786753e-05, "loss": 0.7234, "step": 3844 }, { "epoch": 0.1, "grad_norm": 2.9773001670837402, "learning_rate": 1.975463397584174e-05, "loss": 0.7575, "step": 3845 }, { "epoch": 0.1, "grad_norm": 3.328770637512207, "learning_rate": 1.975445117659314e-05, "loss": 0.8174, "step": 3846 }, { "epoch": 0.1, "grad_norm": 2.930724859237671, "learning_rate": 1.975426831012299e-05, "loss": 0.8176, "step": 3847 }, { "epoch": 0.1, "grad_norm": 4.394259452819824, "learning_rate": 1.975408537643255e-05, "loss": 0.7163, "step": 3848 }, { "epoch": 0.1, "grad_norm": 3.8370819091796875, "learning_rate": 1.9753902375523085e-05, "loss": 0.709, "step": 3849 }, { "epoch": 0.1, "grad_norm": 3.1382853984832764, "learning_rate": 1.975371930739585e-05, "loss": 0.7329, "step": 3850 }, { "epoch": 0.1, "grad_norm": 2.7986347675323486, "learning_rate": 1.9753536172052112e-05, "loss": 0.7004, "step": 3851 }, { "epoch": 0.1, "grad_norm": 3.9658355712890625, "learning_rate": 1.975335296949313e-05, "loss": 0.9254, "step": 3852 }, { "epoch": 0.1, "grad_norm": 3.272822141647339, "learning_rate": 1.9753169699720166e-05, "loss": 0.5744, "step": 3853 }, { "epoch": 0.1, "grad_norm": 2.5953168869018555, "learning_rate": 1.9752986362734484e-05, "loss": 0.7027, "step": 3854 }, { "epoch": 0.1, "grad_norm": 2.3955304622650146, "learning_rate": 1.9752802958537347e-05, "loss": 0.7232, "step": 3855 }, { "epoch": 0.1, "grad_norm": 2.611337423324585, "learning_rate": 1.975261948713002e-05, "loss": 0.6823, "step": 3856 }, { "epoch": 0.1, "grad_norm": 3.6472294330596924, "learning_rate": 1.9752435948513766e-05, "loss": 0.6562, "step": 3857 }, { "epoch": 0.1, "grad_norm": 2.00016188621521, "learning_rate": 1.975225234268985e-05, "loss": 0.6684, "step": 3858 }, { "epoch": 0.1, "grad_norm": 3.808656692504883, "learning_rate": 1.9752068669659538e-05, "loss": 0.9032, "step": 3859 }, { "epoch": 0.1, "grad_norm": 3.2186598777770996, "learning_rate": 1.97518849294241e-05, "loss": 0.6203, "step": 3860 }, { "epoch": 0.1, "grad_norm": 2.6260416507720947, "learning_rate": 1.975170112198479e-05, "loss": 0.5692, "step": 3861 }, { "epoch": 0.1, "grad_norm": 1.793998122215271, "learning_rate": 1.975151724734289e-05, "loss": 0.8436, "step": 3862 }, { "epoch": 0.1, "grad_norm": 2.313992738723755, "learning_rate": 1.9751333305499653e-05, "loss": 0.5801, "step": 3863 }, { "epoch": 0.1, "grad_norm": 2.0262975692749023, "learning_rate": 1.9751149296456357e-05, "loss": 0.6097, "step": 3864 }, { "epoch": 0.1, "grad_norm": 2.6899352073669434, "learning_rate": 1.9750965220214267e-05, "loss": 0.7769, "step": 3865 }, { "epoch": 0.1, "grad_norm": 4.176825046539307, "learning_rate": 1.9750781076774648e-05, "loss": 0.7204, "step": 3866 }, { "epoch": 0.1, "grad_norm": 2.8305909633636475, "learning_rate": 1.9750596866138774e-05, "loss": 0.7446, "step": 3867 }, { "epoch": 0.1, "grad_norm": 3.199101209640503, "learning_rate": 1.9750412588307912e-05, "loss": 0.8713, "step": 3868 }, { "epoch": 0.1, "grad_norm": 3.0905332565307617, "learning_rate": 1.9750228243283332e-05, "loss": 0.7975, "step": 3869 }, { "epoch": 0.1, "grad_norm": 2.1368789672851562, "learning_rate": 1.9750043831066305e-05, "loss": 0.6998, "step": 3870 }, { "epoch": 0.1, "grad_norm": 1.7425352334976196, "learning_rate": 1.97498593516581e-05, "loss": 0.6808, "step": 3871 }, { "epoch": 0.1, "grad_norm": 1.3528472185134888, "learning_rate": 1.9749674805059992e-05, "loss": 0.6288, "step": 3872 }, { "epoch": 0.1, "grad_norm": 1.4329776763916016, "learning_rate": 1.974949019127325e-05, "loss": 0.5961, "step": 3873 }, { "epoch": 0.1, "grad_norm": 2.8065690994262695, "learning_rate": 1.9749305510299147e-05, "loss": 0.6903, "step": 3874 }, { "epoch": 0.1, "grad_norm": 2.9638373851776123, "learning_rate": 1.9749120762138957e-05, "loss": 0.6343, "step": 3875 }, { "epoch": 0.1, "grad_norm": 2.233637809753418, "learning_rate": 1.9748935946793946e-05, "loss": 0.714, "step": 3876 }, { "epoch": 0.1, "grad_norm": 3.1954355239868164, "learning_rate": 1.97487510642654e-05, "loss": 0.5057, "step": 3877 }, { "epoch": 0.1, "grad_norm": 3.665555953979492, "learning_rate": 1.9748566114554582e-05, "loss": 0.795, "step": 3878 }, { "epoch": 0.1, "grad_norm": 10.47835636138916, "learning_rate": 1.9748381097662774e-05, "loss": 0.6104, "step": 3879 }, { "epoch": 0.1, "grad_norm": 3.2074646949768066, "learning_rate": 1.9748196013591244e-05, "loss": 0.747, "step": 3880 }, { "epoch": 0.1, "grad_norm": 1.236518383026123, "learning_rate": 1.9748010862341274e-05, "loss": 0.6449, "step": 3881 }, { "epoch": 0.1, "grad_norm": 3.1908252239227295, "learning_rate": 1.9747825643914137e-05, "loss": 0.8207, "step": 3882 }, { "epoch": 0.1, "grad_norm": 2.5374560356140137, "learning_rate": 1.974764035831111e-05, "loss": 0.7391, "step": 3883 }, { "epoch": 0.1, "grad_norm": 2.3758561611175537, "learning_rate": 1.974745500553347e-05, "loss": 0.6424, "step": 3884 }, { "epoch": 0.1, "grad_norm": 2.1065287590026855, "learning_rate": 1.9747269585582494e-05, "loss": 0.5694, "step": 3885 }, { "epoch": 0.1, "grad_norm": 2.9700872898101807, "learning_rate": 1.9747084098459458e-05, "loss": 0.8535, "step": 3886 }, { "epoch": 0.1, "grad_norm": 2.8981547355651855, "learning_rate": 1.9746898544165644e-05, "loss": 0.7296, "step": 3887 }, { "epoch": 0.1, "grad_norm": 2.969095230102539, "learning_rate": 1.9746712922702332e-05, "loss": 0.8732, "step": 3888 }, { "epoch": 0.1, "grad_norm": 3.3196628093719482, "learning_rate": 1.9746527234070794e-05, "loss": 0.6963, "step": 3889 }, { "epoch": 0.1, "grad_norm": 6.956338405609131, "learning_rate": 1.9746341478272315e-05, "loss": 0.7753, "step": 3890 }, { "epoch": 0.1, "grad_norm": 2.5240414142608643, "learning_rate": 1.9746155655308172e-05, "loss": 0.6525, "step": 3891 }, { "epoch": 0.1, "grad_norm": 1.7923215627670288, "learning_rate": 1.974596976517965e-05, "loss": 0.5759, "step": 3892 }, { "epoch": 0.1, "grad_norm": 4.438896179199219, "learning_rate": 1.9745783807888024e-05, "loss": 0.6924, "step": 3893 }, { "epoch": 0.1, "grad_norm": 1.4189687967300415, "learning_rate": 1.974559778343458e-05, "loss": 0.6259, "step": 3894 }, { "epoch": 0.1, "grad_norm": 1.9712247848510742, "learning_rate": 1.9745411691820603e-05, "loss": 0.696, "step": 3895 }, { "epoch": 0.1, "grad_norm": 5.385493278503418, "learning_rate": 1.9745225533047368e-05, "loss": 0.629, "step": 3896 }, { "epoch": 0.1, "grad_norm": 4.296065330505371, "learning_rate": 1.9745039307116157e-05, "loss": 0.6098, "step": 3897 }, { "epoch": 0.1, "grad_norm": 2.9838979244232178, "learning_rate": 1.9744853014028264e-05, "loss": 0.7878, "step": 3898 }, { "epoch": 0.1, "grad_norm": 1.7015835046768188, "learning_rate": 1.9744666653784967e-05, "loss": 0.5771, "step": 3899 }, { "epoch": 0.1, "grad_norm": 1.6592589616775513, "learning_rate": 1.9744480226387542e-05, "loss": 0.7091, "step": 3900 }, { "epoch": 0.1, "grad_norm": 2.197913646697998, "learning_rate": 1.9744293731837287e-05, "loss": 0.5056, "step": 3901 }, { "epoch": 0.1, "grad_norm": 2.0640411376953125, "learning_rate": 1.974410717013548e-05, "loss": 0.7077, "step": 3902 }, { "epoch": 0.1, "grad_norm": 2.172715187072754, "learning_rate": 1.974392054128341e-05, "loss": 0.6562, "step": 3903 }, { "epoch": 0.1, "grad_norm": 6.325788974761963, "learning_rate": 1.974373384528236e-05, "loss": 0.7722, "step": 3904 }, { "epoch": 0.1, "grad_norm": 2.931295394897461, "learning_rate": 1.9743547082133617e-05, "loss": 0.6423, "step": 3905 }, { "epoch": 0.1, "grad_norm": 2.12328839302063, "learning_rate": 1.9743360251838472e-05, "loss": 0.7385, "step": 3906 }, { "epoch": 0.1, "grad_norm": 1.7860006093978882, "learning_rate": 1.9743173354398206e-05, "loss": 0.805, "step": 3907 }, { "epoch": 0.1, "grad_norm": 2.5959041118621826, "learning_rate": 1.9742986389814112e-05, "loss": 0.6845, "step": 3908 }, { "epoch": 0.1, "grad_norm": 1.5868370532989502, "learning_rate": 1.974279935808748e-05, "loss": 0.6392, "step": 3909 }, { "epoch": 0.1, "grad_norm": 4.384357452392578, "learning_rate": 1.9742612259219593e-05, "loss": 0.8427, "step": 3910 }, { "epoch": 0.1, "grad_norm": 3.448927640914917, "learning_rate": 1.974242509321174e-05, "loss": 0.7172, "step": 3911 }, { "epoch": 0.1, "grad_norm": 2.4886722564697266, "learning_rate": 1.974223786006522e-05, "loss": 0.7258, "step": 3912 }, { "epoch": 0.1, "grad_norm": 3.275496244430542, "learning_rate": 1.9742050559781315e-05, "loss": 0.7325, "step": 3913 }, { "epoch": 0.1, "grad_norm": 2.590466260910034, "learning_rate": 1.9741863192361318e-05, "loss": 0.694, "step": 3914 }, { "epoch": 0.1, "grad_norm": 3.493586301803589, "learning_rate": 1.9741675757806516e-05, "loss": 0.7617, "step": 3915 }, { "epoch": 0.1, "grad_norm": 2.2239511013031006, "learning_rate": 1.9741488256118214e-05, "loss": 0.7659, "step": 3916 }, { "epoch": 0.1, "grad_norm": 5.004641532897949, "learning_rate": 1.974130068729769e-05, "loss": 0.5956, "step": 3917 }, { "epoch": 0.1, "grad_norm": 2.554819107055664, "learning_rate": 1.9741113051346238e-05, "loss": 0.6173, "step": 3918 }, { "epoch": 0.1, "grad_norm": 3.08271861076355, "learning_rate": 1.974092534826516e-05, "loss": 0.824, "step": 3919 }, { "epoch": 0.1, "grad_norm": 3.330681800842285, "learning_rate": 1.974073757805574e-05, "loss": 0.7592, "step": 3920 }, { "epoch": 0.1, "grad_norm": 2.6385080814361572, "learning_rate": 1.9740549740719283e-05, "loss": 0.7809, "step": 3921 }, { "epoch": 0.1, "grad_norm": 1.5190621614456177, "learning_rate": 1.974036183625707e-05, "loss": 0.655, "step": 3922 }, { "epoch": 0.1, "grad_norm": 2.155566453933716, "learning_rate": 1.974017386467041e-05, "loss": 1.0416, "step": 3923 }, { "epoch": 0.1, "grad_norm": 3.403512477874756, "learning_rate": 1.9739985825960587e-05, "loss": 0.6992, "step": 3924 }, { "epoch": 0.1, "grad_norm": 1.4824187755584717, "learning_rate": 1.9739797720128898e-05, "loss": 0.5368, "step": 3925 }, { "epoch": 0.1, "grad_norm": 7.038959980010986, "learning_rate": 1.9739609547176644e-05, "loss": 0.7551, "step": 3926 }, { "epoch": 0.1, "grad_norm": 3.2504541873931885, "learning_rate": 1.9739421307105124e-05, "loss": 0.7093, "step": 3927 }, { "epoch": 0.1, "grad_norm": 3.715324640274048, "learning_rate": 1.9739232999915628e-05, "loss": 0.7617, "step": 3928 }, { "epoch": 0.1, "grad_norm": 2.0719082355499268, "learning_rate": 1.9739044625609457e-05, "loss": 0.6814, "step": 3929 }, { "epoch": 0.1, "grad_norm": 5.151337623596191, "learning_rate": 1.9738856184187912e-05, "loss": 0.8658, "step": 3930 }, { "epoch": 0.1, "grad_norm": 2.947289228439331, "learning_rate": 1.9738667675652284e-05, "loss": 0.8594, "step": 3931 }, { "epoch": 0.1, "grad_norm": 2.219696044921875, "learning_rate": 1.973847910000388e-05, "loss": 0.7, "step": 3932 }, { "epoch": 0.1, "grad_norm": 1.900689959526062, "learning_rate": 1.9738290457243994e-05, "loss": 0.6462, "step": 3933 }, { "epoch": 0.1, "grad_norm": 3.589691638946533, "learning_rate": 1.973810174737393e-05, "loss": 0.7576, "step": 3934 }, { "epoch": 0.1, "grad_norm": 2.0521786212921143, "learning_rate": 1.9737912970394987e-05, "loss": 0.739, "step": 3935 }, { "epoch": 0.1, "grad_norm": 1.8514130115509033, "learning_rate": 1.9737724126308467e-05, "loss": 0.7069, "step": 3936 }, { "epoch": 0.1, "grad_norm": 4.233641624450684, "learning_rate": 1.973753521511567e-05, "loss": 0.7687, "step": 3937 }, { "epoch": 0.1, "grad_norm": 2.5437638759613037, "learning_rate": 1.9737346236817894e-05, "loss": 0.5989, "step": 3938 }, { "epoch": 0.1, "grad_norm": 2.2885169982910156, "learning_rate": 1.973715719141645e-05, "loss": 0.8217, "step": 3939 }, { "epoch": 0.1, "grad_norm": 3.0021588802337646, "learning_rate": 1.9736968078912632e-05, "loss": 0.7186, "step": 3940 }, { "epoch": 0.1, "grad_norm": 1.9369269609451294, "learning_rate": 1.973677889930775e-05, "loss": 0.7532, "step": 3941 }, { "epoch": 0.1, "grad_norm": 2.825920343399048, "learning_rate": 1.9736589652603104e-05, "loss": 0.7093, "step": 3942 }, { "epoch": 0.1, "grad_norm": 2.927112102508545, "learning_rate": 1.97364003388e-05, "loss": 0.5678, "step": 3943 }, { "epoch": 0.1, "grad_norm": 2.8200480937957764, "learning_rate": 1.973621095789974e-05, "loss": 0.6003, "step": 3944 }, { "epoch": 0.1, "grad_norm": 2.11453914642334, "learning_rate": 1.9736021509903632e-05, "loss": 0.6362, "step": 3945 }, { "epoch": 0.1, "grad_norm": 2.957794427871704, "learning_rate": 1.9735831994812983e-05, "loss": 0.8236, "step": 3946 }, { "epoch": 0.1, "grad_norm": 1.936596155166626, "learning_rate": 1.9735642412629094e-05, "loss": 0.7623, "step": 3947 }, { "epoch": 0.1, "grad_norm": 2.7163846492767334, "learning_rate": 1.9735452763353275e-05, "loss": 0.5603, "step": 3948 }, { "epoch": 0.1, "grad_norm": 1.8383333683013916, "learning_rate": 1.973526304698683e-05, "loss": 0.7523, "step": 3949 }, { "epoch": 0.1, "grad_norm": 1.9408495426177979, "learning_rate": 1.9735073263531072e-05, "loss": 0.6531, "step": 3950 }, { "epoch": 0.1, "grad_norm": 3.8548192977905273, "learning_rate": 1.9734883412987305e-05, "loss": 0.6098, "step": 3951 }, { "epoch": 0.1, "grad_norm": 2.9188218116760254, "learning_rate": 1.9734693495356835e-05, "loss": 0.8367, "step": 3952 }, { "epoch": 0.1, "grad_norm": 2.9741976261138916, "learning_rate": 1.9734503510640972e-05, "loss": 0.6613, "step": 3953 }, { "epoch": 0.1, "grad_norm": 1.600257396697998, "learning_rate": 1.973431345884103e-05, "loss": 0.743, "step": 3954 }, { "epoch": 0.1, "grad_norm": 3.682265520095825, "learning_rate": 1.9734123339958314e-05, "loss": 0.7031, "step": 3955 }, { "epoch": 0.1, "grad_norm": 2.0233285427093506, "learning_rate": 1.9733933153994135e-05, "loss": 0.7475, "step": 3956 }, { "epoch": 0.1, "grad_norm": 6.612287521362305, "learning_rate": 1.9733742900949805e-05, "loss": 0.725, "step": 3957 }, { "epoch": 0.1, "grad_norm": 3.2696516513824463, "learning_rate": 1.9733552580826635e-05, "loss": 0.7105, "step": 3958 }, { "epoch": 0.1, "grad_norm": 3.197657346725464, "learning_rate": 1.9733362193625934e-05, "loss": 0.6212, "step": 3959 }, { "epoch": 0.1, "grad_norm": 2.883387804031372, "learning_rate": 1.9733171739349015e-05, "loss": 0.6265, "step": 3960 }, { "epoch": 0.1, "grad_norm": 4.106778144836426, "learning_rate": 1.9732981217997197e-05, "loss": 0.8303, "step": 3961 }, { "epoch": 0.1, "grad_norm": 1.9788885116577148, "learning_rate": 1.973279062957178e-05, "loss": 0.7091, "step": 3962 }, { "epoch": 0.1, "grad_norm": 2.500328540802002, "learning_rate": 1.9732599974074088e-05, "loss": 0.7046, "step": 3963 }, { "epoch": 0.1, "grad_norm": 2.237546443939209, "learning_rate": 1.973240925150543e-05, "loss": 0.6335, "step": 3964 }, { "epoch": 0.1, "grad_norm": 2.629906177520752, "learning_rate": 1.9732218461867127e-05, "loss": 0.69, "step": 3965 }, { "epoch": 0.1, "grad_norm": 2.2895846366882324, "learning_rate": 1.9732027605160484e-05, "loss": 0.5992, "step": 3966 }, { "epoch": 0.1, "grad_norm": 3.9543795585632324, "learning_rate": 1.9731836681386823e-05, "loss": 0.6914, "step": 3967 }, { "epoch": 0.1, "grad_norm": 2.772897958755493, "learning_rate": 1.9731645690547458e-05, "loss": 0.7602, "step": 3968 }, { "epoch": 0.1, "grad_norm": 1.9082956314086914, "learning_rate": 1.9731454632643704e-05, "loss": 0.5394, "step": 3969 }, { "epoch": 0.1, "grad_norm": 1.7237434387207031, "learning_rate": 1.9731263507676875e-05, "loss": 0.5033, "step": 3970 }, { "epoch": 0.1, "grad_norm": 5.013640880584717, "learning_rate": 1.97310723156483e-05, "loss": 0.847, "step": 3971 }, { "epoch": 0.1, "grad_norm": 2.5958011150360107, "learning_rate": 1.9730881056559282e-05, "loss": 0.6229, "step": 3972 }, { "epoch": 0.1, "grad_norm": 3.3266658782958984, "learning_rate": 1.9730689730411144e-05, "loss": 0.6346, "step": 3973 }, { "epoch": 0.1, "grad_norm": 4.964326858520508, "learning_rate": 1.973049833720521e-05, "loss": 0.8786, "step": 3974 }, { "epoch": 0.1, "grad_norm": 1.8793022632598877, "learning_rate": 1.9730306876942795e-05, "loss": 0.6821, "step": 3975 }, { "epoch": 0.1, "grad_norm": 2.6725659370422363, "learning_rate": 1.9730115349625216e-05, "loss": 0.6716, "step": 3976 }, { "epoch": 0.1, "grad_norm": 6.2654242515563965, "learning_rate": 1.9729923755253795e-05, "loss": 0.8652, "step": 3977 }, { "epoch": 0.1, "grad_norm": 2.9075961112976074, "learning_rate": 1.9729732093829854e-05, "loss": 0.8315, "step": 3978 }, { "epoch": 0.1, "grad_norm": 2.6459767818450928, "learning_rate": 1.972954036535471e-05, "loss": 0.7466, "step": 3979 }, { "epoch": 0.1, "grad_norm": 3.348707675933838, "learning_rate": 1.9729348569829688e-05, "loss": 0.6824, "step": 3980 }, { "epoch": 0.1, "grad_norm": 1.3872613906860352, "learning_rate": 1.9729156707256106e-05, "loss": 0.6753, "step": 3981 }, { "epoch": 0.1, "grad_norm": 2.731372594833374, "learning_rate": 1.9728964777635292e-05, "loss": 0.6713, "step": 3982 }, { "epoch": 0.1, "grad_norm": 1.894671082496643, "learning_rate": 1.9728772780968565e-05, "loss": 0.7055, "step": 3983 }, { "epoch": 0.1, "grad_norm": 2.3177859783172607, "learning_rate": 1.9728580717257245e-05, "loss": 0.7071, "step": 3984 }, { "epoch": 0.1, "grad_norm": 1.7900586128234863, "learning_rate": 1.972838858650266e-05, "loss": 0.7123, "step": 3985 }, { "epoch": 0.1, "grad_norm": 3.7914462089538574, "learning_rate": 1.9728196388706134e-05, "loss": 0.6642, "step": 3986 }, { "epoch": 0.1, "grad_norm": 2.6741321086883545, "learning_rate": 1.9728004123868993e-05, "loss": 0.6795, "step": 3987 }, { "epoch": 0.1, "grad_norm": 2.550591230392456, "learning_rate": 1.9727811791992554e-05, "loss": 0.6493, "step": 3988 }, { "epoch": 0.1, "grad_norm": 2.066324234008789, "learning_rate": 1.972761939307815e-05, "loss": 0.6449, "step": 3989 }, { "epoch": 0.1, "grad_norm": 3.273397207260132, "learning_rate": 1.9727426927127103e-05, "loss": 0.853, "step": 3990 }, { "epoch": 0.1, "grad_norm": 1.9938338994979858, "learning_rate": 1.9727234394140744e-05, "loss": 0.6425, "step": 3991 }, { "epoch": 0.1, "grad_norm": 3.1094753742218018, "learning_rate": 1.9727041794120393e-05, "loss": 0.8674, "step": 3992 }, { "epoch": 0.1, "grad_norm": 2.253220319747925, "learning_rate": 1.9726849127067383e-05, "loss": 0.7475, "step": 3993 }, { "epoch": 0.1, "grad_norm": 2.682002544403076, "learning_rate": 1.972665639298304e-05, "loss": 0.7822, "step": 3994 }, { "epoch": 0.1, "grad_norm": 1.5874054431915283, "learning_rate": 1.9726463591868694e-05, "loss": 0.693, "step": 3995 }, { "epoch": 0.1, "grad_norm": 3.8304553031921387, "learning_rate": 1.9726270723725668e-05, "loss": 0.4724, "step": 3996 }, { "epoch": 0.1, "grad_norm": 1.5127933025360107, "learning_rate": 1.9726077788555298e-05, "loss": 0.61, "step": 3997 }, { "epoch": 0.1, "grad_norm": 4.945430278778076, "learning_rate": 1.972588478635891e-05, "loss": 0.7252, "step": 3998 }, { "epoch": 0.1, "grad_norm": 2.1100571155548096, "learning_rate": 1.9725691717137834e-05, "loss": 0.7009, "step": 3999 }, { "epoch": 0.1, "grad_norm": 3.1434497833251953, "learning_rate": 1.97254985808934e-05, "loss": 0.6945, "step": 4000 }, { "epoch": 0.1, "grad_norm": 2.711383581161499, "learning_rate": 1.972530537762694e-05, "loss": 0.6575, "step": 4001 }, { "epoch": 0.1, "grad_norm": 2.195675849914551, "learning_rate": 1.9725112107339784e-05, "loss": 0.6994, "step": 4002 }, { "epoch": 0.1, "grad_norm": 3.272498607635498, "learning_rate": 1.972491877003327e-05, "loss": 0.4847, "step": 4003 }, { "epoch": 0.1, "grad_norm": 2.3016281127929688, "learning_rate": 1.972472536570872e-05, "loss": 0.6693, "step": 4004 }, { "epoch": 0.1, "grad_norm": 4.14322566986084, "learning_rate": 1.9724531894367475e-05, "loss": 0.8417, "step": 4005 }, { "epoch": 0.1, "grad_norm": 5.032844066619873, "learning_rate": 1.9724338356010866e-05, "loss": 0.7249, "step": 4006 }, { "epoch": 0.1, "grad_norm": 2.661494255065918, "learning_rate": 1.9724144750640225e-05, "loss": 0.5345, "step": 4007 }, { "epoch": 0.1, "grad_norm": 1.8871468305587769, "learning_rate": 1.9723951078256892e-05, "loss": 0.6886, "step": 4008 }, { "epoch": 0.1, "grad_norm": 2.234135627746582, "learning_rate": 1.9723757338862193e-05, "loss": 0.5746, "step": 4009 }, { "epoch": 0.1, "grad_norm": 4.5151214599609375, "learning_rate": 1.972356353245747e-05, "loss": 0.6432, "step": 4010 }, { "epoch": 0.1, "grad_norm": 1.8920552730560303, "learning_rate": 1.9723369659044054e-05, "loss": 0.7908, "step": 4011 }, { "epoch": 0.1, "grad_norm": 1.6795060634613037, "learning_rate": 1.972317571862328e-05, "loss": 0.7164, "step": 4012 }, { "epoch": 0.1, "grad_norm": 3.1223487854003906, "learning_rate": 1.9722981711196495e-05, "loss": 0.5407, "step": 4013 }, { "epoch": 0.1, "grad_norm": 2.8986048698425293, "learning_rate": 1.9722787636765022e-05, "loss": 0.8384, "step": 4014 }, { "epoch": 0.1, "grad_norm": 1.6366288661956787, "learning_rate": 1.972259349533021e-05, "loss": 0.7652, "step": 4015 }, { "epoch": 0.1, "grad_norm": 3.009355306625366, "learning_rate": 1.9722399286893393e-05, "loss": 0.6892, "step": 4016 }, { "epoch": 0.1, "grad_norm": 1.6687067747116089, "learning_rate": 1.9722205011455906e-05, "loss": 0.5698, "step": 4017 }, { "epoch": 0.1, "grad_norm": 1.8150850534439087, "learning_rate": 1.9722010669019086e-05, "loss": 0.5427, "step": 4018 }, { "epoch": 0.1, "grad_norm": 5.24580192565918, "learning_rate": 1.9721816259584284e-05, "loss": 0.741, "step": 4019 }, { "epoch": 0.1, "grad_norm": 4.378663063049316, "learning_rate": 1.9721621783152828e-05, "loss": 0.7417, "step": 4020 }, { "epoch": 0.1, "grad_norm": 2.965306520462036, "learning_rate": 1.9721427239726063e-05, "loss": 0.6498, "step": 4021 }, { "epoch": 0.1, "grad_norm": 2.7934417724609375, "learning_rate": 1.9721232629305328e-05, "loss": 0.5734, "step": 4022 }, { "epoch": 0.1, "grad_norm": 1.7092183828353882, "learning_rate": 1.9721037951891965e-05, "loss": 0.6882, "step": 4023 }, { "epoch": 0.1, "grad_norm": 1.8125215768814087, "learning_rate": 1.9720843207487317e-05, "loss": 0.705, "step": 4024 }, { "epoch": 0.1, "grad_norm": 1.9831798076629639, "learning_rate": 1.9720648396092725e-05, "loss": 0.4613, "step": 4025 }, { "epoch": 0.1, "grad_norm": 2.875751495361328, "learning_rate": 1.972045351770953e-05, "loss": 0.6597, "step": 4026 }, { "epoch": 0.1, "grad_norm": 1.2334742546081543, "learning_rate": 1.9720258572339078e-05, "loss": 0.4739, "step": 4027 }, { "epoch": 0.1, "grad_norm": 3.707437038421631, "learning_rate": 1.972006355998271e-05, "loss": 0.7094, "step": 4028 }, { "epoch": 0.1, "grad_norm": 1.9270637035369873, "learning_rate": 1.971986848064177e-05, "loss": 0.7258, "step": 4029 }, { "epoch": 0.1, "grad_norm": 2.989208698272705, "learning_rate": 1.9719673334317605e-05, "loss": 0.5786, "step": 4030 }, { "epoch": 0.1, "grad_norm": 8.597994804382324, "learning_rate": 1.9719478121011558e-05, "loss": 0.7593, "step": 4031 }, { "epoch": 0.1, "grad_norm": 4.458554744720459, "learning_rate": 1.9719282840724974e-05, "loss": 0.6691, "step": 4032 }, { "epoch": 0.1, "grad_norm": 3.614805221557617, "learning_rate": 1.9719087493459196e-05, "loss": 0.7856, "step": 4033 }, { "epoch": 0.1, "grad_norm": 4.188426494598389, "learning_rate": 1.9718892079215575e-05, "loss": 0.6773, "step": 4034 }, { "epoch": 0.1, "grad_norm": 3.824040174484253, "learning_rate": 1.9718696597995456e-05, "loss": 0.6375, "step": 4035 }, { "epoch": 0.1, "grad_norm": 2.5744619369506836, "learning_rate": 1.9718501049800184e-05, "loss": 0.6439, "step": 4036 }, { "epoch": 0.1, "grad_norm": 1.8717542886734009, "learning_rate": 1.9718305434631112e-05, "loss": 0.7219, "step": 4037 }, { "epoch": 0.1, "grad_norm": 1.8678935766220093, "learning_rate": 1.9718109752489584e-05, "loss": 0.6414, "step": 4038 }, { "epoch": 0.1, "grad_norm": 4.28572940826416, "learning_rate": 1.971791400337695e-05, "loss": 0.5561, "step": 4039 }, { "epoch": 0.1, "grad_norm": 2.3883957862854004, "learning_rate": 1.971771818729455e-05, "loss": 0.7759, "step": 4040 }, { "epoch": 0.1, "grad_norm": 3.9840710163116455, "learning_rate": 1.971752230424375e-05, "loss": 0.8711, "step": 4041 }, { "epoch": 0.1, "grad_norm": 4.536350727081299, "learning_rate": 1.9717326354225888e-05, "loss": 0.7016, "step": 4042 }, { "epoch": 0.1, "grad_norm": 4.281323432922363, "learning_rate": 1.971713033724232e-05, "loss": 0.6365, "step": 4043 }, { "epoch": 0.1, "grad_norm": 3.586674213409424, "learning_rate": 1.9716934253294392e-05, "loss": 0.563, "step": 4044 }, { "epoch": 0.1, "grad_norm": 3.168001890182495, "learning_rate": 1.971673810238346e-05, "loss": 0.7393, "step": 4045 }, { "epoch": 0.1, "grad_norm": 4.150044918060303, "learning_rate": 1.9716541884510874e-05, "loss": 0.8034, "step": 4046 }, { "epoch": 0.1, "grad_norm": 2.0944604873657227, "learning_rate": 1.9716345599677984e-05, "loss": 0.7314, "step": 4047 }, { "epoch": 0.1, "grad_norm": 4.934842109680176, "learning_rate": 1.9716149247886143e-05, "loss": 0.7757, "step": 4048 }, { "epoch": 0.1, "grad_norm": 1.862615942955017, "learning_rate": 1.971595282913671e-05, "loss": 0.6018, "step": 4049 }, { "epoch": 0.1, "grad_norm": 2.509838104248047, "learning_rate": 1.971575634343103e-05, "loss": 0.666, "step": 4050 }, { "epoch": 0.1, "grad_norm": 1.9102989435195923, "learning_rate": 1.9715559790770467e-05, "loss": 0.7474, "step": 4051 }, { "epoch": 0.1, "grad_norm": 3.9877309799194336, "learning_rate": 1.9715363171156366e-05, "loss": 0.6909, "step": 4052 }, { "epoch": 0.1, "grad_norm": 6.873552322387695, "learning_rate": 1.9715166484590084e-05, "loss": 0.8044, "step": 4053 }, { "epoch": 0.1, "grad_norm": 3.579244613647461, "learning_rate": 1.971496973107298e-05, "loss": 0.9099, "step": 4054 }, { "epoch": 0.1, "grad_norm": 3.9489693641662598, "learning_rate": 1.971477291060641e-05, "loss": 0.7995, "step": 4055 }, { "epoch": 0.1, "grad_norm": 2.8056106567382812, "learning_rate": 1.971457602319173e-05, "loss": 0.6867, "step": 4056 }, { "epoch": 0.1, "grad_norm": 1.5216494798660278, "learning_rate": 1.971437906883029e-05, "loss": 0.6012, "step": 4057 }, { "epoch": 0.1, "grad_norm": 3.441359758377075, "learning_rate": 1.9714182047523454e-05, "loss": 0.6424, "step": 4058 }, { "epoch": 0.1, "grad_norm": 2.728665590286255, "learning_rate": 1.971398495927258e-05, "loss": 0.8025, "step": 4059 }, { "epoch": 0.1, "grad_norm": 3.370229721069336, "learning_rate": 1.9713787804079024e-05, "loss": 0.6469, "step": 4060 }, { "epoch": 0.1, "grad_norm": 1.648259162902832, "learning_rate": 1.9713590581944144e-05, "loss": 0.6513, "step": 4061 }, { "epoch": 0.1, "grad_norm": 4.866837501525879, "learning_rate": 1.9713393292869303e-05, "loss": 0.7203, "step": 4062 }, { "epoch": 0.1, "grad_norm": 3.3057336807250977, "learning_rate": 1.9713195936855854e-05, "loss": 0.703, "step": 4063 }, { "epoch": 0.1, "grad_norm": 2.81947660446167, "learning_rate": 1.9712998513905166e-05, "loss": 0.6488, "step": 4064 }, { "epoch": 0.1, "grad_norm": 2.4732308387756348, "learning_rate": 1.9712801024018593e-05, "loss": 0.6124, "step": 4065 }, { "epoch": 0.1, "grad_norm": 3.656447410583496, "learning_rate": 1.9712603467197495e-05, "loss": 0.6902, "step": 4066 }, { "epoch": 0.1, "grad_norm": 3.8044943809509277, "learning_rate": 1.971240584344324e-05, "loss": 0.7313, "step": 4067 }, { "epoch": 0.1, "grad_norm": 4.048802375793457, "learning_rate": 1.9712208152757184e-05, "loss": 0.7161, "step": 4068 }, { "epoch": 0.1, "grad_norm": 2.8038618564605713, "learning_rate": 1.9712010395140688e-05, "loss": 0.6685, "step": 4069 }, { "epoch": 0.1, "grad_norm": 2.81788969039917, "learning_rate": 1.971181257059512e-05, "loss": 0.7883, "step": 4070 }, { "epoch": 0.1, "grad_norm": 4.7270636558532715, "learning_rate": 1.9711614679121843e-05, "loss": 0.7099, "step": 4071 }, { "epoch": 0.1, "grad_norm": 1.9133728742599487, "learning_rate": 1.9711416720722218e-05, "loss": 0.4972, "step": 4072 }, { "epoch": 0.1, "grad_norm": 1.8554328680038452, "learning_rate": 1.9711218695397615e-05, "loss": 0.5869, "step": 4073 }, { "epoch": 0.1, "grad_norm": 4.354671478271484, "learning_rate": 1.9711020603149386e-05, "loss": 0.7249, "step": 4074 }, { "epoch": 0.1, "grad_norm": 2.8956289291381836, "learning_rate": 1.9710822443978907e-05, "loss": 0.6461, "step": 4075 }, { "epoch": 0.1, "grad_norm": 4.698858261108398, "learning_rate": 1.9710624217887543e-05, "loss": 0.8379, "step": 4076 }, { "epoch": 0.1, "grad_norm": 3.0990145206451416, "learning_rate": 1.9710425924876656e-05, "loss": 0.7444, "step": 4077 }, { "epoch": 0.1, "grad_norm": 6.127426624298096, "learning_rate": 1.971022756494761e-05, "loss": 0.67, "step": 4078 }, { "epoch": 0.1, "grad_norm": 2.3037571907043457, "learning_rate": 1.9710029138101782e-05, "loss": 0.6953, "step": 4079 }, { "epoch": 0.1, "grad_norm": 2.483466863632202, "learning_rate": 1.9709830644340532e-05, "loss": 0.6363, "step": 4080 }, { "epoch": 0.1, "grad_norm": 5.953220367431641, "learning_rate": 1.970963208366523e-05, "loss": 0.7562, "step": 4081 }, { "epoch": 0.1, "grad_norm": 2.364445209503174, "learning_rate": 1.9709433456077245e-05, "loss": 0.558, "step": 4082 }, { "epoch": 0.1, "grad_norm": 2.3411202430725098, "learning_rate": 1.9709234761577942e-05, "loss": 0.6606, "step": 4083 }, { "epoch": 0.1, "grad_norm": 3.413886070251465, "learning_rate": 1.9709036000168694e-05, "loss": 0.6567, "step": 4084 }, { "epoch": 0.1, "grad_norm": 2.7552437782287598, "learning_rate": 1.9708837171850867e-05, "loss": 0.7203, "step": 4085 }, { "epoch": 0.1, "grad_norm": 2.6353254318237305, "learning_rate": 1.970863827662584e-05, "loss": 0.7324, "step": 4086 }, { "epoch": 0.1, "grad_norm": 3.6842129230499268, "learning_rate": 1.970843931449497e-05, "loss": 0.8373, "step": 4087 }, { "epoch": 0.1, "grad_norm": 5.474079608917236, "learning_rate": 1.9708240285459642e-05, "loss": 0.5627, "step": 4088 }, { "epoch": 0.1, "grad_norm": 3.1550261974334717, "learning_rate": 1.970804118952122e-05, "loss": 0.5185, "step": 4089 }, { "epoch": 0.1, "grad_norm": 2.3955938816070557, "learning_rate": 1.9707842026681073e-05, "loss": 0.6786, "step": 4090 }, { "epoch": 0.1, "grad_norm": 2.4568116664886475, "learning_rate": 1.9707642796940583e-05, "loss": 0.4636, "step": 4091 }, { "epoch": 0.1, "grad_norm": 1.699596881866455, "learning_rate": 1.9707443500301116e-05, "loss": 0.5389, "step": 4092 }, { "epoch": 0.1, "grad_norm": 1.7212368249893188, "learning_rate": 1.9707244136764047e-05, "loss": 0.7026, "step": 4093 }, { "epoch": 0.1, "grad_norm": 2.246952772140503, "learning_rate": 1.9707044706330753e-05, "loss": 0.5936, "step": 4094 }, { "epoch": 0.1, "grad_norm": 2.4397387504577637, "learning_rate": 1.97068452090026e-05, "loss": 0.638, "step": 4095 }, { "epoch": 0.1, "grad_norm": 7.2826313972473145, "learning_rate": 1.970664564478097e-05, "loss": 0.7395, "step": 4096 }, { "epoch": 0.11, "grad_norm": 4.338316917419434, "learning_rate": 1.9706446013667236e-05, "loss": 0.6967, "step": 4097 }, { "epoch": 0.11, "grad_norm": 1.972777009010315, "learning_rate": 1.970624631566278e-05, "loss": 0.6358, "step": 4098 }, { "epoch": 0.11, "grad_norm": 1.6112796068191528, "learning_rate": 1.9706046550768967e-05, "loss": 0.7339, "step": 4099 }, { "epoch": 0.11, "grad_norm": 2.2933590412139893, "learning_rate": 1.970584671898718e-05, "loss": 0.4671, "step": 4100 }, { "epoch": 0.11, "grad_norm": 4.7284345626831055, "learning_rate": 1.9705646820318794e-05, "loss": 0.7479, "step": 4101 }, { "epoch": 0.11, "grad_norm": 2.586829662322998, "learning_rate": 1.970544685476519e-05, "loss": 0.7717, "step": 4102 }, { "epoch": 0.11, "grad_norm": 2.865419387817383, "learning_rate": 1.9705246822327745e-05, "loss": 0.7189, "step": 4103 }, { "epoch": 0.11, "grad_norm": 5.037824630737305, "learning_rate": 1.9705046723007833e-05, "loss": 0.5776, "step": 4104 }, { "epoch": 0.11, "grad_norm": 5.004678249359131, "learning_rate": 1.970484655680684e-05, "loss": 0.5942, "step": 4105 }, { "epoch": 0.11, "grad_norm": 2.271672010421753, "learning_rate": 1.9704646323726137e-05, "loss": 0.5673, "step": 4106 }, { "epoch": 0.11, "grad_norm": 3.6926610469818115, "learning_rate": 1.970444602376711e-05, "loss": 0.7746, "step": 4107 }, { "epoch": 0.11, "grad_norm": 5.455587863922119, "learning_rate": 1.970424565693114e-05, "loss": 0.6664, "step": 4108 }, { "epoch": 0.11, "grad_norm": 3.3543853759765625, "learning_rate": 1.9704045223219605e-05, "loss": 0.5743, "step": 4109 }, { "epoch": 0.11, "grad_norm": 2.153477907180786, "learning_rate": 1.9703844722633887e-05, "loss": 0.6108, "step": 4110 }, { "epoch": 0.11, "grad_norm": 3.8000266551971436, "learning_rate": 1.9703644155175368e-05, "loss": 0.7848, "step": 4111 }, { "epoch": 0.11, "grad_norm": 5.400299549102783, "learning_rate": 1.9703443520845427e-05, "loss": 0.6348, "step": 4112 }, { "epoch": 0.11, "grad_norm": 1.7451590299606323, "learning_rate": 1.9703242819645454e-05, "loss": 0.6487, "step": 4113 }, { "epoch": 0.11, "grad_norm": 3.1526310443878174, "learning_rate": 1.9703042051576825e-05, "loss": 0.6493, "step": 4114 }, { "epoch": 0.11, "grad_norm": 3.6865592002868652, "learning_rate": 1.9702841216640927e-05, "loss": 0.5969, "step": 4115 }, { "epoch": 0.11, "grad_norm": 3.7558753490448, "learning_rate": 1.9702640314839144e-05, "loss": 0.713, "step": 4116 }, { "epoch": 0.11, "grad_norm": 5.004479885101318, "learning_rate": 1.9702439346172858e-05, "loss": 0.7232, "step": 4117 }, { "epoch": 0.11, "grad_norm": 4.116197109222412, "learning_rate": 1.9702238310643458e-05, "loss": 0.5956, "step": 4118 }, { "epoch": 0.11, "grad_norm": 1.1669089794158936, "learning_rate": 1.9702037208252323e-05, "loss": 0.6045, "step": 4119 }, { "epoch": 0.11, "grad_norm": 2.9721784591674805, "learning_rate": 1.970183603900085e-05, "loss": 0.6598, "step": 4120 }, { "epoch": 0.11, "grad_norm": 4.2296271324157715, "learning_rate": 1.970163480289041e-05, "loss": 0.8725, "step": 4121 }, { "epoch": 0.11, "grad_norm": 4.0802226066589355, "learning_rate": 1.97014334999224e-05, "loss": 0.6858, "step": 4122 }, { "epoch": 0.11, "grad_norm": 4.028998851776123, "learning_rate": 1.970123213009821e-05, "loss": 0.6511, "step": 4123 }, { "epoch": 0.11, "grad_norm": 5.319946765899658, "learning_rate": 1.970103069341922e-05, "loss": 0.6391, "step": 4124 }, { "epoch": 0.11, "grad_norm": 1.8616019487380981, "learning_rate": 1.9700829189886822e-05, "loss": 0.7772, "step": 4125 }, { "epoch": 0.11, "grad_norm": 7.206233978271484, "learning_rate": 1.9700627619502404e-05, "loss": 0.8207, "step": 4126 }, { "epoch": 0.11, "grad_norm": 1.8674864768981934, "learning_rate": 1.970042598226735e-05, "loss": 0.7313, "step": 4127 }, { "epoch": 0.11, "grad_norm": 3.552091121673584, "learning_rate": 1.9700224278183066e-05, "loss": 0.6212, "step": 4128 }, { "epoch": 0.11, "grad_norm": 5.074785232543945, "learning_rate": 1.970002250725092e-05, "loss": 0.6854, "step": 4129 }, { "epoch": 0.11, "grad_norm": 4.387091636657715, "learning_rate": 1.969982066947232e-05, "loss": 0.6707, "step": 4130 }, { "epoch": 0.11, "grad_norm": 5.321166515350342, "learning_rate": 1.9699618764848643e-05, "loss": 0.6635, "step": 4131 }, { "epoch": 0.11, "grad_norm": 3.5580735206604004, "learning_rate": 1.969941679338129e-05, "loss": 0.7023, "step": 4132 }, { "epoch": 0.11, "grad_norm": 1.3792494535446167, "learning_rate": 1.9699214755071652e-05, "loss": 0.609, "step": 4133 }, { "epoch": 0.11, "grad_norm": 2.638838768005371, "learning_rate": 1.969901264992112e-05, "loss": 0.8561, "step": 4134 }, { "epoch": 0.11, "grad_norm": 2.206033706665039, "learning_rate": 1.9698810477931086e-05, "loss": 0.8171, "step": 4135 }, { "epoch": 0.11, "grad_norm": 2.275934934616089, "learning_rate": 1.969860823910294e-05, "loss": 0.7837, "step": 4136 }, { "epoch": 0.11, "grad_norm": 3.043637990951538, "learning_rate": 1.9698405933438085e-05, "loss": 0.711, "step": 4137 }, { "epoch": 0.11, "grad_norm": 3.235962390899658, "learning_rate": 1.9698203560937905e-05, "loss": 0.6304, "step": 4138 }, { "epoch": 0.11, "grad_norm": 2.5285582542419434, "learning_rate": 1.96980011216038e-05, "loss": 0.6164, "step": 4139 }, { "epoch": 0.11, "grad_norm": 10.888628005981445, "learning_rate": 1.9697798615437166e-05, "loss": 0.7632, "step": 4140 }, { "epoch": 0.11, "grad_norm": 3.4006216526031494, "learning_rate": 1.969759604243939e-05, "loss": 0.7141, "step": 4141 }, { "epoch": 0.11, "grad_norm": 3.4012279510498047, "learning_rate": 1.9697393402611883e-05, "loss": 0.7935, "step": 4142 }, { "epoch": 0.11, "grad_norm": 2.281538248062134, "learning_rate": 1.9697190695956033e-05, "loss": 0.7799, "step": 4143 }, { "epoch": 0.11, "grad_norm": 3.4174346923828125, "learning_rate": 1.969698792247323e-05, "loss": 0.7311, "step": 4144 }, { "epoch": 0.11, "grad_norm": 2.38203763961792, "learning_rate": 1.9696785082164886e-05, "loss": 0.7543, "step": 4145 }, { "epoch": 0.11, "grad_norm": 5.971510887145996, "learning_rate": 1.9696582175032387e-05, "loss": 0.8125, "step": 4146 }, { "epoch": 0.11, "grad_norm": 13.715375900268555, "learning_rate": 1.9696379201077137e-05, "loss": 0.6208, "step": 4147 }, { "epoch": 0.11, "grad_norm": 1.8463890552520752, "learning_rate": 1.9696176160300532e-05, "loss": 0.7728, "step": 4148 }, { "epoch": 0.11, "grad_norm": 3.7501797676086426, "learning_rate": 1.9695973052703974e-05, "loss": 0.8025, "step": 4149 }, { "epoch": 0.11, "grad_norm": 3.0588362216949463, "learning_rate": 1.9695769878288863e-05, "loss": 0.6123, "step": 4150 }, { "epoch": 0.11, "grad_norm": 2.068484306335449, "learning_rate": 1.9695566637056592e-05, "loss": 0.6686, "step": 4151 }, { "epoch": 0.11, "grad_norm": 3.034721612930298, "learning_rate": 1.9695363329008576e-05, "loss": 0.6537, "step": 4152 }, { "epoch": 0.11, "grad_norm": 1.1303614377975464, "learning_rate": 1.9695159954146198e-05, "loss": 0.6959, "step": 4153 }, { "epoch": 0.11, "grad_norm": 2.699148178100586, "learning_rate": 1.9694956512470874e-05, "loss": 0.6846, "step": 4154 }, { "epoch": 0.11, "grad_norm": 3.0620830059051514, "learning_rate": 1.9694753003984e-05, "loss": 0.6796, "step": 4155 }, { "epoch": 0.11, "grad_norm": 4.240923881530762, "learning_rate": 1.9694549428686976e-05, "loss": 0.6081, "step": 4156 }, { "epoch": 0.11, "grad_norm": 2.4785196781158447, "learning_rate": 1.9694345786581214e-05, "loss": 0.5609, "step": 4157 }, { "epoch": 0.11, "grad_norm": 4.256080150604248, "learning_rate": 1.9694142077668107e-05, "loss": 0.6245, "step": 4158 }, { "epoch": 0.11, "grad_norm": 5.425586700439453, "learning_rate": 1.9693938301949064e-05, "loss": 0.7842, "step": 4159 }, { "epoch": 0.11, "grad_norm": 3.471041440963745, "learning_rate": 1.9693734459425487e-05, "loss": 0.5079, "step": 4160 }, { "epoch": 0.11, "grad_norm": 1.6116364002227783, "learning_rate": 1.9693530550098784e-05, "loss": 0.6984, "step": 4161 }, { "epoch": 0.11, "grad_norm": 2.7026660442352295, "learning_rate": 1.969332657397036e-05, "loss": 0.7579, "step": 4162 }, { "epoch": 0.11, "grad_norm": 5.151602268218994, "learning_rate": 1.9693122531041617e-05, "loss": 0.8944, "step": 4163 }, { "epoch": 0.11, "grad_norm": 3.078126907348633, "learning_rate": 1.9692918421313966e-05, "loss": 0.7302, "step": 4164 }, { "epoch": 0.11, "grad_norm": 2.8142223358154297, "learning_rate": 1.9692714244788805e-05, "loss": 0.7405, "step": 4165 }, { "epoch": 0.11, "grad_norm": 3.1002163887023926, "learning_rate": 1.969251000146755e-05, "loss": 0.7615, "step": 4166 }, { "epoch": 0.11, "grad_norm": 3.3753857612609863, "learning_rate": 1.969230569135161e-05, "loss": 0.6096, "step": 4167 }, { "epoch": 0.11, "grad_norm": 7.106903076171875, "learning_rate": 1.969210131444238e-05, "loss": 1.204, "step": 4168 }, { "epoch": 0.11, "grad_norm": 2.90958833694458, "learning_rate": 1.9691896870741284e-05, "loss": 0.6173, "step": 4169 }, { "epoch": 0.11, "grad_norm": 1.7631144523620605, "learning_rate": 1.9691692360249724e-05, "loss": 0.7081, "step": 4170 }, { "epoch": 0.11, "grad_norm": 10.71125602722168, "learning_rate": 1.9691487782969105e-05, "loss": 0.7074, "step": 4171 }, { "epoch": 0.11, "grad_norm": 3.267561435699463, "learning_rate": 1.9691283138900843e-05, "loss": 0.7491, "step": 4172 }, { "epoch": 0.11, "grad_norm": 3.8953332901000977, "learning_rate": 1.9691078428046347e-05, "loss": 0.7998, "step": 4173 }, { "epoch": 0.11, "grad_norm": 3.2086100578308105, "learning_rate": 1.9690873650407026e-05, "loss": 0.5889, "step": 4174 }, { "epoch": 0.11, "grad_norm": 2.498873710632324, "learning_rate": 1.969066880598429e-05, "loss": 0.7108, "step": 4175 }, { "epoch": 0.11, "grad_norm": 2.76517915725708, "learning_rate": 1.9690463894779557e-05, "loss": 0.593, "step": 4176 }, { "epoch": 0.11, "grad_norm": 2.6677093505859375, "learning_rate": 1.969025891679423e-05, "loss": 0.6194, "step": 4177 }, { "epoch": 0.11, "grad_norm": 3.046868085861206, "learning_rate": 1.9690053872029734e-05, "loss": 0.7513, "step": 4178 }, { "epoch": 0.11, "grad_norm": 2.9007277488708496, "learning_rate": 1.9689848760487468e-05, "loss": 0.7417, "step": 4179 }, { "epoch": 0.11, "grad_norm": 12.329561233520508, "learning_rate": 1.9689643582168855e-05, "loss": 0.922, "step": 4180 }, { "epoch": 0.11, "grad_norm": 2.066021203994751, "learning_rate": 1.9689438337075307e-05, "loss": 0.5295, "step": 4181 }, { "epoch": 0.11, "grad_norm": 1.803053379058838, "learning_rate": 1.9689233025208234e-05, "loss": 0.6591, "step": 4182 }, { "epoch": 0.11, "grad_norm": 4.920092582702637, "learning_rate": 1.9689027646569056e-05, "loss": 0.7197, "step": 4183 }, { "epoch": 0.11, "grad_norm": 3.2922301292419434, "learning_rate": 1.968882220115919e-05, "loss": 0.6929, "step": 4184 }, { "epoch": 0.11, "grad_norm": 3.6082963943481445, "learning_rate": 1.9688616688980042e-05, "loss": 0.9516, "step": 4185 }, { "epoch": 0.11, "grad_norm": 2.060332775115967, "learning_rate": 1.968841111003304e-05, "loss": 0.7665, "step": 4186 }, { "epoch": 0.11, "grad_norm": 3.184582233428955, "learning_rate": 1.9688205464319595e-05, "loss": 0.6586, "step": 4187 }, { "epoch": 0.11, "grad_norm": 2.9543421268463135, "learning_rate": 1.968799975184112e-05, "loss": 0.5901, "step": 4188 }, { "epoch": 0.11, "grad_norm": 4.484618663787842, "learning_rate": 1.968779397259904e-05, "loss": 0.835, "step": 4189 }, { "epoch": 0.11, "grad_norm": 6.6704487800598145, "learning_rate": 1.9687588126594774e-05, "loss": 0.7269, "step": 4190 }, { "epoch": 0.11, "grad_norm": 1.576866626739502, "learning_rate": 1.9687382213829732e-05, "loss": 0.5678, "step": 4191 }, { "epoch": 0.11, "grad_norm": 2.9685986042022705, "learning_rate": 1.9687176234305344e-05, "loss": 0.6094, "step": 4192 }, { "epoch": 0.11, "grad_norm": 1.3839969635009766, "learning_rate": 1.968697018802302e-05, "loss": 0.8619, "step": 4193 }, { "epoch": 0.11, "grad_norm": 3.1712334156036377, "learning_rate": 1.9686764074984182e-05, "loss": 0.7238, "step": 4194 }, { "epoch": 0.11, "grad_norm": 5.567575454711914, "learning_rate": 1.968655789519025e-05, "loss": 0.69, "step": 4195 }, { "epoch": 0.11, "grad_norm": 2.587761878967285, "learning_rate": 1.968635164864265e-05, "loss": 0.6386, "step": 4196 }, { "epoch": 0.11, "grad_norm": 1.934404969215393, "learning_rate": 1.96861453353428e-05, "loss": 0.6436, "step": 4197 }, { "epoch": 0.11, "grad_norm": 3.0908260345458984, "learning_rate": 1.968593895529212e-05, "loss": 0.5971, "step": 4198 }, { "epoch": 0.11, "grad_norm": 2.8024444580078125, "learning_rate": 1.9685732508492036e-05, "loss": 0.5554, "step": 4199 }, { "epoch": 0.11, "grad_norm": 2.6289756298065186, "learning_rate": 1.9685525994943966e-05, "loss": 0.7798, "step": 4200 }, { "epoch": 0.11, "grad_norm": 4.311968803405762, "learning_rate": 1.968531941464934e-05, "loss": 0.6709, "step": 4201 }, { "epoch": 0.11, "grad_norm": 2.9035274982452393, "learning_rate": 1.9685112767609577e-05, "loss": 0.5451, "step": 4202 }, { "epoch": 0.11, "grad_norm": 2.2753164768218994, "learning_rate": 1.96849060538261e-05, "loss": 0.7854, "step": 4203 }, { "epoch": 0.11, "grad_norm": 2.0973241329193115, "learning_rate": 1.9684699273300332e-05, "loss": 0.6615, "step": 4204 }, { "epoch": 0.11, "grad_norm": 1.8214889764785767, "learning_rate": 1.9684492426033706e-05, "loss": 0.7166, "step": 4205 }, { "epoch": 0.11, "grad_norm": 2.6792454719543457, "learning_rate": 1.9684285512027642e-05, "loss": 0.5864, "step": 4206 }, { "epoch": 0.11, "grad_norm": 2.625304937362671, "learning_rate": 1.9684078531283564e-05, "loss": 0.619, "step": 4207 }, { "epoch": 0.11, "grad_norm": 6.7401838302612305, "learning_rate": 1.9683871483802902e-05, "loss": 0.7598, "step": 4208 }, { "epoch": 0.11, "grad_norm": 5.426627159118652, "learning_rate": 1.9683664369587083e-05, "loss": 0.639, "step": 4209 }, { "epoch": 0.11, "grad_norm": 3.646230697631836, "learning_rate": 1.9683457188637532e-05, "loss": 0.4889, "step": 4210 }, { "epoch": 0.11, "grad_norm": 2.4806504249572754, "learning_rate": 1.9683249940955676e-05, "loss": 0.5897, "step": 4211 }, { "epoch": 0.11, "grad_norm": 3.947981357574463, "learning_rate": 1.968304262654295e-05, "loss": 0.7231, "step": 4212 }, { "epoch": 0.11, "grad_norm": 5.4723801612854, "learning_rate": 1.9682835245400772e-05, "loss": 0.7584, "step": 4213 }, { "epoch": 0.11, "grad_norm": 3.577291965484619, "learning_rate": 1.968262779753058e-05, "loss": 0.8048, "step": 4214 }, { "epoch": 0.11, "grad_norm": 2.841780662536621, "learning_rate": 1.96824202829338e-05, "loss": 0.7422, "step": 4215 }, { "epoch": 0.11, "grad_norm": 2.271703004837036, "learning_rate": 1.9682212701611863e-05, "loss": 0.6913, "step": 4216 }, { "epoch": 0.11, "grad_norm": 2.796031951904297, "learning_rate": 1.9682005053566197e-05, "loss": 0.6169, "step": 4217 }, { "epoch": 0.11, "grad_norm": 9.403924942016602, "learning_rate": 1.9681797338798238e-05, "loss": 0.6358, "step": 4218 }, { "epoch": 0.11, "grad_norm": 4.633848667144775, "learning_rate": 1.968158955730941e-05, "loss": 0.6187, "step": 4219 }, { "epoch": 0.11, "grad_norm": 7.851351261138916, "learning_rate": 1.9681381709101152e-05, "loss": 0.8199, "step": 4220 }, { "epoch": 0.11, "grad_norm": 1.8649131059646606, "learning_rate": 1.9681173794174896e-05, "loss": 0.7049, "step": 4221 }, { "epoch": 0.11, "grad_norm": 2.399641752243042, "learning_rate": 1.968096581253207e-05, "loss": 0.8218, "step": 4222 }, { "epoch": 0.11, "grad_norm": 2.972395181655884, "learning_rate": 1.968075776417411e-05, "loss": 0.7873, "step": 4223 }, { "epoch": 0.11, "grad_norm": 1.8695183992385864, "learning_rate": 1.968054964910245e-05, "loss": 0.5386, "step": 4224 }, { "epoch": 0.11, "grad_norm": 1.5560061931610107, "learning_rate": 1.9680341467318523e-05, "loss": 0.5725, "step": 4225 }, { "epoch": 0.11, "grad_norm": 2.300448179244995, "learning_rate": 1.9680133218823764e-05, "loss": 0.6109, "step": 4226 }, { "epoch": 0.11, "grad_norm": 2.0067689418792725, "learning_rate": 1.967992490361961e-05, "loss": 0.6358, "step": 4227 }, { "epoch": 0.11, "grad_norm": 2.789213180541992, "learning_rate": 1.9679716521707495e-05, "loss": 0.5486, "step": 4228 }, { "epoch": 0.11, "grad_norm": 1.8527997732162476, "learning_rate": 1.9679508073088852e-05, "loss": 0.6197, "step": 4229 }, { "epoch": 0.11, "grad_norm": 3.45650577545166, "learning_rate": 1.9679299557765125e-05, "loss": 0.6487, "step": 4230 }, { "epoch": 0.11, "grad_norm": 2.492748260498047, "learning_rate": 1.9679090975737746e-05, "loss": 0.5526, "step": 4231 }, { "epoch": 0.11, "grad_norm": 2.9348838329315186, "learning_rate": 1.967888232700815e-05, "loss": 0.6512, "step": 4232 }, { "epoch": 0.11, "grad_norm": 2.9472150802612305, "learning_rate": 1.9678673611577783e-05, "loss": 0.7679, "step": 4233 }, { "epoch": 0.11, "grad_norm": 1.6883625984191895, "learning_rate": 1.9678464829448072e-05, "loss": 0.6096, "step": 4234 }, { "epoch": 0.11, "grad_norm": 5.733981609344482, "learning_rate": 1.9678255980620463e-05, "loss": 0.6318, "step": 4235 }, { "epoch": 0.11, "grad_norm": 3.9257006645202637, "learning_rate": 1.96780470650964e-05, "loss": 0.6807, "step": 4236 }, { "epoch": 0.11, "grad_norm": 4.955016136169434, "learning_rate": 1.967783808287731e-05, "loss": 0.7193, "step": 4237 }, { "epoch": 0.11, "grad_norm": 2.7313079833984375, "learning_rate": 1.9677629033964644e-05, "loss": 0.7477, "step": 4238 }, { "epoch": 0.11, "grad_norm": 5.523954391479492, "learning_rate": 1.9677419918359838e-05, "loss": 0.735, "step": 4239 }, { "epoch": 0.11, "grad_norm": 2.5343737602233887, "learning_rate": 1.9677210736064332e-05, "loss": 0.8661, "step": 4240 }, { "epoch": 0.11, "grad_norm": 5.333656311035156, "learning_rate": 1.9677001487079575e-05, "loss": 0.8267, "step": 4241 }, { "epoch": 0.11, "grad_norm": 3.37276291847229, "learning_rate": 1.9676792171407e-05, "loss": 0.8106, "step": 4242 }, { "epoch": 0.11, "grad_norm": 3.678267002105713, "learning_rate": 1.9676582789048048e-05, "loss": 0.8403, "step": 4243 }, { "epoch": 0.11, "grad_norm": 7.133005619049072, "learning_rate": 1.9676373340004175e-05, "loss": 0.7025, "step": 4244 }, { "epoch": 0.11, "grad_norm": 2.5520834922790527, "learning_rate": 1.9676163824276813e-05, "loss": 0.7122, "step": 4245 }, { "epoch": 0.11, "grad_norm": 2.6701207160949707, "learning_rate": 1.967595424186741e-05, "loss": 0.5746, "step": 4246 }, { "epoch": 0.11, "grad_norm": 1.706444501876831, "learning_rate": 1.9675744592777406e-05, "loss": 0.6307, "step": 4247 }, { "epoch": 0.11, "grad_norm": 2.4120960235595703, "learning_rate": 1.9675534877008252e-05, "loss": 0.6071, "step": 4248 }, { "epoch": 0.11, "grad_norm": 15.614742279052734, "learning_rate": 1.967532509456139e-05, "loss": 0.6555, "step": 4249 }, { "epoch": 0.11, "grad_norm": 4.36005973815918, "learning_rate": 1.9675115245438264e-05, "loss": 0.792, "step": 4250 }, { "epoch": 0.11, "grad_norm": 1.5779789686203003, "learning_rate": 1.9674905329640324e-05, "loss": 0.5418, "step": 4251 }, { "epoch": 0.11, "grad_norm": 4.2776198387146, "learning_rate": 1.9674695347169016e-05, "loss": 0.5816, "step": 4252 }, { "epoch": 0.11, "grad_norm": 3.736964225769043, "learning_rate": 1.9674485298025786e-05, "loss": 0.6489, "step": 4253 }, { "epoch": 0.11, "grad_norm": 2.553410530090332, "learning_rate": 1.9674275182212078e-05, "loss": 0.76, "step": 4254 }, { "epoch": 0.11, "grad_norm": 1.8670858144760132, "learning_rate": 1.967406499972935e-05, "loss": 0.6618, "step": 4255 }, { "epoch": 0.11, "grad_norm": 5.331273555755615, "learning_rate": 1.9673854750579034e-05, "loss": 0.8307, "step": 4256 }, { "epoch": 0.11, "grad_norm": 1.5574204921722412, "learning_rate": 1.9673644434762596e-05, "loss": 0.5739, "step": 4257 }, { "epoch": 0.11, "grad_norm": 2.1363072395324707, "learning_rate": 1.9673434052281476e-05, "loss": 0.7676, "step": 4258 }, { "epoch": 0.11, "grad_norm": 3.385284900665283, "learning_rate": 1.9673223603137125e-05, "loss": 0.6711, "step": 4259 }, { "epoch": 0.11, "grad_norm": 8.695706367492676, "learning_rate": 1.9673013087330996e-05, "loss": 0.7497, "step": 4260 }, { "epoch": 0.11, "grad_norm": 4.0218634605407715, "learning_rate": 1.967280250486454e-05, "loss": 0.8303, "step": 4261 }, { "epoch": 0.11, "grad_norm": 1.4107780456542969, "learning_rate": 1.9672591855739203e-05, "loss": 0.6724, "step": 4262 }, { "epoch": 0.11, "grad_norm": 1.8223968744277954, "learning_rate": 1.967238113995644e-05, "loss": 0.5891, "step": 4263 }, { "epoch": 0.11, "grad_norm": 5.555389881134033, "learning_rate": 1.9672170357517705e-05, "loss": 0.6894, "step": 4264 }, { "epoch": 0.11, "grad_norm": 3.987265110015869, "learning_rate": 1.9671959508424447e-05, "loss": 0.9722, "step": 4265 }, { "epoch": 0.11, "grad_norm": 1.79398512840271, "learning_rate": 1.967174859267812e-05, "loss": 0.6126, "step": 4266 }, { "epoch": 0.11, "grad_norm": 2.5307819843292236, "learning_rate": 1.967153761028018e-05, "loss": 0.6857, "step": 4267 }, { "epoch": 0.11, "grad_norm": 2.333670139312744, "learning_rate": 1.9671326561232074e-05, "loss": 0.7014, "step": 4268 }, { "epoch": 0.11, "grad_norm": 1.779754638671875, "learning_rate": 1.9671115445535268e-05, "loss": 0.6497, "step": 4269 }, { "epoch": 0.11, "grad_norm": 4.040135383605957, "learning_rate": 1.9670904263191208e-05, "loss": 0.7003, "step": 4270 }, { "epoch": 0.11, "grad_norm": 2.55936336517334, "learning_rate": 1.9670693014201353e-05, "loss": 0.5473, "step": 4271 }, { "epoch": 0.11, "grad_norm": 4.866360664367676, "learning_rate": 1.9670481698567152e-05, "loss": 0.7113, "step": 4272 }, { "epoch": 0.11, "grad_norm": 2.874042272567749, "learning_rate": 1.9670270316290073e-05, "loss": 0.7048, "step": 4273 }, { "epoch": 0.11, "grad_norm": 2.646005153656006, "learning_rate": 1.9670058867371562e-05, "loss": 0.5804, "step": 4274 }, { "epoch": 0.11, "grad_norm": 2.101881504058838, "learning_rate": 1.9669847351813086e-05, "loss": 0.6491, "step": 4275 }, { "epoch": 0.11, "grad_norm": 2.3219029903411865, "learning_rate": 1.9669635769616092e-05, "loss": 0.6544, "step": 4276 }, { "epoch": 0.11, "grad_norm": 2.751339912414551, "learning_rate": 1.9669424120782047e-05, "loss": 0.5601, "step": 4277 }, { "epoch": 0.11, "grad_norm": 2.0039734840393066, "learning_rate": 1.96692124053124e-05, "loss": 0.7788, "step": 4278 }, { "epoch": 0.11, "grad_norm": 2.3368422985076904, "learning_rate": 1.9669000623208622e-05, "loss": 0.7597, "step": 4279 }, { "epoch": 0.11, "grad_norm": 4.598141193389893, "learning_rate": 1.9668788774472162e-05, "loss": 0.7075, "step": 4280 }, { "epoch": 0.11, "grad_norm": 2.1972317695617676, "learning_rate": 1.966857685910449e-05, "loss": 0.7552, "step": 4281 }, { "epoch": 0.11, "grad_norm": 2.098792314529419, "learning_rate": 1.9668364877107056e-05, "loss": 0.6887, "step": 4282 }, { "epoch": 0.11, "grad_norm": 3.535984992980957, "learning_rate": 1.9668152828481328e-05, "loss": 0.8539, "step": 4283 }, { "epoch": 0.11, "grad_norm": 2.212461233139038, "learning_rate": 1.9667940713228763e-05, "loss": 0.6749, "step": 4284 }, { "epoch": 0.11, "grad_norm": 2.536010980606079, "learning_rate": 1.9667728531350823e-05, "loss": 0.5927, "step": 4285 }, { "epoch": 0.11, "grad_norm": 3.7456982135772705, "learning_rate": 1.9667516282848975e-05, "loss": 0.8691, "step": 4286 }, { "epoch": 0.11, "grad_norm": 2.0478599071502686, "learning_rate": 1.9667303967724682e-05, "loss": 0.6293, "step": 4287 }, { "epoch": 0.11, "grad_norm": 2.215064525604248, "learning_rate": 1.9667091585979398e-05, "loss": 0.68, "step": 4288 }, { "epoch": 0.11, "grad_norm": 4.016119003295898, "learning_rate": 1.9666879137614592e-05, "loss": 0.639, "step": 4289 }, { "epoch": 0.11, "grad_norm": 5.988341331481934, "learning_rate": 1.966666662263173e-05, "loss": 0.7065, "step": 4290 }, { "epoch": 0.11, "grad_norm": 3.3974132537841797, "learning_rate": 1.966645404103227e-05, "loss": 0.715, "step": 4291 }, { "epoch": 0.11, "grad_norm": 3.4280874729156494, "learning_rate": 1.966624139281769e-05, "loss": 0.7306, "step": 4292 }, { "epoch": 0.11, "grad_norm": 2.7259905338287354, "learning_rate": 1.9666028677989445e-05, "loss": 0.7799, "step": 4293 }, { "epoch": 0.11, "grad_norm": 2.022321939468384, "learning_rate": 1.9665815896549e-05, "loss": 0.6292, "step": 4294 }, { "epoch": 0.11, "grad_norm": 2.6831226348876953, "learning_rate": 1.966560304849783e-05, "loss": 0.6662, "step": 4295 }, { "epoch": 0.11, "grad_norm": 8.29903507232666, "learning_rate": 1.966539013383739e-05, "loss": 0.6414, "step": 4296 }, { "epoch": 0.11, "grad_norm": 6.7163519859313965, "learning_rate": 1.9665177152569156e-05, "loss": 0.9286, "step": 4297 }, { "epoch": 0.11, "grad_norm": 3.050553321838379, "learning_rate": 1.9664964104694594e-05, "loss": 0.8258, "step": 4298 }, { "epoch": 0.11, "grad_norm": 4.821875095367432, "learning_rate": 1.966475099021517e-05, "loss": 0.6274, "step": 4299 }, { "epoch": 0.11, "grad_norm": 2.511084794998169, "learning_rate": 1.9664537809132355e-05, "loss": 0.8529, "step": 4300 }, { "epoch": 0.11, "grad_norm": 2.3767359256744385, "learning_rate": 1.966432456144762e-05, "loss": 0.7791, "step": 4301 }, { "epoch": 0.11, "grad_norm": 2.596581220626831, "learning_rate": 1.966411124716243e-05, "loss": 0.5418, "step": 4302 }, { "epoch": 0.11, "grad_norm": 4.291410446166992, "learning_rate": 1.9663897866278256e-05, "loss": 0.5097, "step": 4303 }, { "epoch": 0.11, "grad_norm": 1.7939105033874512, "learning_rate": 1.9663684418796568e-05, "loss": 0.6166, "step": 4304 }, { "epoch": 0.11, "grad_norm": 2.2315773963928223, "learning_rate": 1.966347090471884e-05, "loss": 0.6358, "step": 4305 }, { "epoch": 0.11, "grad_norm": 2.2041704654693604, "learning_rate": 1.966325732404654e-05, "loss": 0.6052, "step": 4306 }, { "epoch": 0.11, "grad_norm": 12.230769157409668, "learning_rate": 1.9663043676781143e-05, "loss": 0.6964, "step": 4307 }, { "epoch": 0.11, "grad_norm": 3.3682167530059814, "learning_rate": 1.966282996292412e-05, "loss": 0.7877, "step": 4308 }, { "epoch": 0.11, "grad_norm": 3.1667935848236084, "learning_rate": 1.9662616182476946e-05, "loss": 0.7333, "step": 4309 }, { "epoch": 0.11, "grad_norm": 14.472387313842773, "learning_rate": 1.9662402335441092e-05, "loss": 0.6619, "step": 4310 }, { "epoch": 0.11, "grad_norm": 1.694502353668213, "learning_rate": 1.9662188421818027e-05, "loss": 0.6386, "step": 4311 }, { "epoch": 0.11, "grad_norm": 4.914583683013916, "learning_rate": 1.966197444160923e-05, "loss": 0.6528, "step": 4312 }, { "epoch": 0.11, "grad_norm": 2.4852969646453857, "learning_rate": 1.966176039481618e-05, "loss": 0.6085, "step": 4313 }, { "epoch": 0.11, "grad_norm": 1.6584008932113647, "learning_rate": 1.9661546281440345e-05, "loss": 0.6628, "step": 4314 }, { "epoch": 0.11, "grad_norm": 4.640936851501465, "learning_rate": 1.9661332101483202e-05, "loss": 0.6854, "step": 4315 }, { "epoch": 0.11, "grad_norm": 2.0399975776672363, "learning_rate": 1.966111785494623e-05, "loss": 0.6596, "step": 4316 }, { "epoch": 0.11, "grad_norm": 3.2553329467773438, "learning_rate": 1.96609035418309e-05, "loss": 0.6822, "step": 4317 }, { "epoch": 0.11, "grad_norm": 4.019149303436279, "learning_rate": 1.9660689162138697e-05, "loss": 0.6975, "step": 4318 }, { "epoch": 0.11, "grad_norm": 1.4935623407363892, "learning_rate": 1.9660474715871093e-05, "loss": 0.6463, "step": 4319 }, { "epoch": 0.11, "grad_norm": 4.476993560791016, "learning_rate": 1.966026020302956e-05, "loss": 0.7708, "step": 4320 }, { "epoch": 0.11, "grad_norm": 4.001997947692871, "learning_rate": 1.966004562361559e-05, "loss": 0.7233, "step": 4321 }, { "epoch": 0.11, "grad_norm": 1.4715555906295776, "learning_rate": 1.9659830977630655e-05, "loss": 0.723, "step": 4322 }, { "epoch": 0.11, "grad_norm": 2.0041215419769287, "learning_rate": 1.9659616265076228e-05, "loss": 0.6804, "step": 4323 }, { "epoch": 0.11, "grad_norm": 15.816032409667969, "learning_rate": 1.96594014859538e-05, "loss": 0.7901, "step": 4324 }, { "epoch": 0.11, "grad_norm": 2.1027956008911133, "learning_rate": 1.9659186640264844e-05, "loss": 0.6448, "step": 4325 }, { "epoch": 0.11, "grad_norm": 2.413262367248535, "learning_rate": 1.9658971728010842e-05, "loss": 0.6861, "step": 4326 }, { "epoch": 0.11, "grad_norm": 4.961363315582275, "learning_rate": 1.9658756749193275e-05, "loss": 0.7686, "step": 4327 }, { "epoch": 0.11, "grad_norm": 2.4972219467163086, "learning_rate": 1.9658541703813622e-05, "loss": 0.596, "step": 4328 }, { "epoch": 0.11, "grad_norm": 3.9757416248321533, "learning_rate": 1.965832659187337e-05, "loss": 0.7237, "step": 4329 }, { "epoch": 0.11, "grad_norm": 2.10103178024292, "learning_rate": 1.9658111413374e-05, "loss": 0.7303, "step": 4330 }, { "epoch": 0.11, "grad_norm": 2.3690226078033447, "learning_rate": 1.9657896168316993e-05, "loss": 0.6566, "step": 4331 }, { "epoch": 0.11, "grad_norm": 3.193624496459961, "learning_rate": 1.9657680856703833e-05, "loss": 0.7508, "step": 4332 }, { "epoch": 0.11, "grad_norm": 2.3773033618927, "learning_rate": 1.9657465478536007e-05, "loss": 0.8047, "step": 4333 }, { "epoch": 0.11, "grad_norm": 6.329593658447266, "learning_rate": 1.965725003381499e-05, "loss": 0.6193, "step": 4334 }, { "epoch": 0.11, "grad_norm": 4.343924045562744, "learning_rate": 1.965703452254228e-05, "loss": 0.7887, "step": 4335 }, { "epoch": 0.11, "grad_norm": 4.348198890686035, "learning_rate": 1.965681894471935e-05, "loss": 0.6694, "step": 4336 }, { "epoch": 0.11, "grad_norm": 4.315205097198486, "learning_rate": 1.9656603300347694e-05, "loss": 0.7399, "step": 4337 }, { "epoch": 0.11, "grad_norm": 2.524627208709717, "learning_rate": 1.9656387589428796e-05, "loss": 0.7256, "step": 4338 }, { "epoch": 0.11, "grad_norm": 1.698374629020691, "learning_rate": 1.9656171811964137e-05, "loss": 0.7414, "step": 4339 }, { "epoch": 0.11, "grad_norm": 3.660254716873169, "learning_rate": 1.965595596795521e-05, "loss": 0.7263, "step": 4340 }, { "epoch": 0.11, "grad_norm": 3.1125705242156982, "learning_rate": 1.9655740057403502e-05, "loss": 0.6699, "step": 4341 }, { "epoch": 0.11, "grad_norm": 2.6167078018188477, "learning_rate": 1.96555240803105e-05, "loss": 0.7139, "step": 4342 }, { "epoch": 0.11, "grad_norm": 3.327359914779663, "learning_rate": 1.9655308036677695e-05, "loss": 0.6375, "step": 4343 }, { "epoch": 0.11, "grad_norm": 1.7101995944976807, "learning_rate": 1.965509192650657e-05, "loss": 0.7277, "step": 4344 }, { "epoch": 0.11, "grad_norm": 1.637998342514038, "learning_rate": 1.9654875749798615e-05, "loss": 0.6758, "step": 4345 }, { "epoch": 0.11, "grad_norm": 2.627838134765625, "learning_rate": 1.9654659506555326e-05, "loss": 0.7365, "step": 4346 }, { "epoch": 0.11, "grad_norm": 3.490302085876465, "learning_rate": 1.965444319677819e-05, "loss": 0.814, "step": 4347 }, { "epoch": 0.11, "grad_norm": 8.356999397277832, "learning_rate": 1.9654226820468693e-05, "loss": 0.5687, "step": 4348 }, { "epoch": 0.11, "grad_norm": 3.5577025413513184, "learning_rate": 1.965401037762833e-05, "loss": 0.8713, "step": 4349 }, { "epoch": 0.11, "grad_norm": 2.4235501289367676, "learning_rate": 1.9653793868258596e-05, "loss": 0.6351, "step": 4350 }, { "epoch": 0.11, "grad_norm": 2.4986467361450195, "learning_rate": 1.965357729236098e-05, "loss": 0.6003, "step": 4351 }, { "epoch": 0.11, "grad_norm": 3.9938039779663086, "learning_rate": 1.965336064993697e-05, "loss": 0.5809, "step": 4352 }, { "epoch": 0.11, "grad_norm": 4.507776737213135, "learning_rate": 1.9653143940988065e-05, "loss": 0.7201, "step": 4353 }, { "epoch": 0.11, "grad_norm": 3.041529655456543, "learning_rate": 1.9652927165515757e-05, "loss": 0.9052, "step": 4354 }, { "epoch": 0.11, "grad_norm": 2.2818009853363037, "learning_rate": 1.9652710323521544e-05, "loss": 0.6573, "step": 4355 }, { "epoch": 0.11, "grad_norm": 1.8732517957687378, "learning_rate": 1.965249341500691e-05, "loss": 0.7039, "step": 4356 }, { "epoch": 0.11, "grad_norm": 2.2492480278015137, "learning_rate": 1.9652276439973357e-05, "loss": 0.6936, "step": 4357 }, { "epoch": 0.11, "grad_norm": 4.35518741607666, "learning_rate": 1.965205939842238e-05, "loss": 0.6873, "step": 4358 }, { "epoch": 0.11, "grad_norm": 2.443159818649292, "learning_rate": 1.9651842290355473e-05, "loss": 0.5866, "step": 4359 }, { "epoch": 0.11, "grad_norm": 2.3221731185913086, "learning_rate": 1.9651625115774132e-05, "loss": 0.6116, "step": 4360 }, { "epoch": 0.11, "grad_norm": 4.164247989654541, "learning_rate": 1.9651407874679857e-05, "loss": 0.733, "step": 4361 }, { "epoch": 0.11, "grad_norm": 1.8471864461898804, "learning_rate": 1.965119056707414e-05, "loss": 0.6279, "step": 4362 }, { "epoch": 0.11, "grad_norm": 2.4221842288970947, "learning_rate": 1.965097319295848e-05, "loss": 0.7878, "step": 4363 }, { "epoch": 0.11, "grad_norm": 7.360451698303223, "learning_rate": 1.965075575233438e-05, "loss": 0.6761, "step": 4364 }, { "epoch": 0.11, "grad_norm": 2.3977110385894775, "learning_rate": 1.9650538245203334e-05, "loss": 0.804, "step": 4365 }, { "epoch": 0.11, "grad_norm": 7.1648945808410645, "learning_rate": 1.9650320671566842e-05, "loss": 0.665, "step": 4366 }, { "epoch": 0.11, "grad_norm": 2.4574763774871826, "learning_rate": 1.96501030314264e-05, "loss": 0.6038, "step": 4367 }, { "epoch": 0.11, "grad_norm": 4.114452838897705, "learning_rate": 1.9649885324783513e-05, "loss": 0.7594, "step": 4368 }, { "epoch": 0.11, "grad_norm": 4.2516398429870605, "learning_rate": 1.9649667551639676e-05, "loss": 0.8638, "step": 4369 }, { "epoch": 0.11, "grad_norm": 3.0503437519073486, "learning_rate": 1.9649449711996396e-05, "loss": 0.7786, "step": 4370 }, { "epoch": 0.11, "grad_norm": 5.827682971954346, "learning_rate": 1.964923180585517e-05, "loss": 0.7524, "step": 4371 }, { "epoch": 0.11, "grad_norm": 4.592159748077393, "learning_rate": 1.9649013833217498e-05, "loss": 0.7248, "step": 4372 }, { "epoch": 0.11, "grad_norm": 2.9354615211486816, "learning_rate": 1.9648795794084888e-05, "loss": 0.6326, "step": 4373 }, { "epoch": 0.11, "grad_norm": 1.7770228385925293, "learning_rate": 1.964857768845884e-05, "loss": 0.6179, "step": 4374 }, { "epoch": 0.11, "grad_norm": 3.5690109729766846, "learning_rate": 1.9648359516340854e-05, "loss": 0.679, "step": 4375 }, { "epoch": 0.11, "grad_norm": 2.0758934020996094, "learning_rate": 1.9648141277732435e-05, "loss": 0.616, "step": 4376 }, { "epoch": 0.11, "grad_norm": 2.7091050148010254, "learning_rate": 1.964792297263509e-05, "loss": 0.6564, "step": 4377 }, { "epoch": 0.11, "grad_norm": 1.6048355102539062, "learning_rate": 1.9647704601050323e-05, "loss": 0.6611, "step": 4378 }, { "epoch": 0.11, "grad_norm": 4.4322991371154785, "learning_rate": 1.964748616297963e-05, "loss": 0.8257, "step": 4379 }, { "epoch": 0.11, "grad_norm": 2.033315658569336, "learning_rate": 1.9647267658424528e-05, "loss": 0.6086, "step": 4380 }, { "epoch": 0.11, "grad_norm": 4.225358009338379, "learning_rate": 1.9647049087386523e-05, "loss": 0.7443, "step": 4381 }, { "epoch": 0.11, "grad_norm": 2.7696313858032227, "learning_rate": 1.9646830449867108e-05, "loss": 0.6627, "step": 4382 }, { "epoch": 0.11, "grad_norm": 1.7042120695114136, "learning_rate": 1.9646611745867804e-05, "loss": 0.5721, "step": 4383 }, { "epoch": 0.11, "grad_norm": 2.762432336807251, "learning_rate": 1.964639297539011e-05, "loss": 0.5571, "step": 4384 }, { "epoch": 0.11, "grad_norm": 1.7623769044876099, "learning_rate": 1.964617413843554e-05, "loss": 0.481, "step": 4385 }, { "epoch": 0.11, "grad_norm": 1.5203490257263184, "learning_rate": 1.9645955235005596e-05, "loss": 0.8674, "step": 4386 }, { "epoch": 0.11, "grad_norm": 2.338115692138672, "learning_rate": 1.9645736265101786e-05, "loss": 0.616, "step": 4387 }, { "epoch": 0.11, "grad_norm": 2.8227221965789795, "learning_rate": 1.9645517228725625e-05, "loss": 0.6258, "step": 4388 }, { "epoch": 0.11, "grad_norm": 2.093872308731079, "learning_rate": 1.9645298125878618e-05, "loss": 0.6758, "step": 4389 }, { "epoch": 0.11, "grad_norm": 4.887840270996094, "learning_rate": 1.9645078956562272e-05, "loss": 0.8073, "step": 4390 }, { "epoch": 0.11, "grad_norm": 1.7449266910552979, "learning_rate": 1.9644859720778106e-05, "loss": 0.713, "step": 4391 }, { "epoch": 0.11, "grad_norm": 3.6342687606811523, "learning_rate": 1.9644640418527627e-05, "loss": 0.7842, "step": 4392 }, { "epoch": 0.11, "grad_norm": 2.932856798171997, "learning_rate": 1.9644421049812346e-05, "loss": 0.7748, "step": 4393 }, { "epoch": 0.11, "grad_norm": 1.3970729112625122, "learning_rate": 1.964420161463377e-05, "loss": 0.5994, "step": 4394 }, { "epoch": 0.11, "grad_norm": 2.6021828651428223, "learning_rate": 1.9643982112993417e-05, "loss": 0.7293, "step": 4395 }, { "epoch": 0.11, "grad_norm": 3.215541124343872, "learning_rate": 1.96437625448928e-05, "loss": 0.6867, "step": 4396 }, { "epoch": 0.11, "grad_norm": 1.5204522609710693, "learning_rate": 1.964354291033343e-05, "loss": 0.6608, "step": 4397 }, { "epoch": 0.11, "grad_norm": 9.472814559936523, "learning_rate": 1.9643323209316822e-05, "loss": 0.5344, "step": 4398 }, { "epoch": 0.11, "grad_norm": 2.123823642730713, "learning_rate": 1.9643103441844488e-05, "loss": 0.6061, "step": 4399 }, { "epoch": 0.11, "grad_norm": 4.722005844116211, "learning_rate": 1.9642883607917943e-05, "loss": 0.7814, "step": 4400 }, { "epoch": 0.11, "grad_norm": 3.360553026199341, "learning_rate": 1.9642663707538703e-05, "loss": 0.7512, "step": 4401 }, { "epoch": 0.11, "grad_norm": 7.6100993156433105, "learning_rate": 1.964244374070828e-05, "loss": 0.6958, "step": 4402 }, { "epoch": 0.11, "grad_norm": 3.1106412410736084, "learning_rate": 1.9642223707428195e-05, "loss": 0.5629, "step": 4403 }, { "epoch": 0.11, "grad_norm": 1.3532590866088867, "learning_rate": 1.9642003607699964e-05, "loss": 0.5023, "step": 4404 }, { "epoch": 0.11, "grad_norm": 1.3505090475082397, "learning_rate": 1.96417834415251e-05, "loss": 0.5664, "step": 4405 }, { "epoch": 0.11, "grad_norm": 2.4209189414978027, "learning_rate": 1.9641563208905122e-05, "loss": 0.6915, "step": 4406 }, { "epoch": 0.11, "grad_norm": 7.503271102905273, "learning_rate": 1.9641342909841548e-05, "loss": 0.5654, "step": 4407 }, { "epoch": 0.11, "grad_norm": 3.205448627471924, "learning_rate": 1.9641122544335895e-05, "loss": 0.7111, "step": 4408 }, { "epoch": 0.11, "grad_norm": 1.7905633449554443, "learning_rate": 1.9640902112389684e-05, "loss": 0.5263, "step": 4409 }, { "epoch": 0.11, "grad_norm": 3.7446320056915283, "learning_rate": 1.964068161400443e-05, "loss": 0.821, "step": 4410 }, { "epoch": 0.11, "grad_norm": 3.297971248626709, "learning_rate": 1.964046104918166e-05, "loss": 0.6348, "step": 4411 }, { "epoch": 0.11, "grad_norm": 3.764949083328247, "learning_rate": 1.9640240417922886e-05, "loss": 0.7866, "step": 4412 }, { "epoch": 0.11, "grad_norm": 2.8975095748901367, "learning_rate": 1.9640019720229632e-05, "loss": 0.6522, "step": 4413 }, { "epoch": 0.11, "grad_norm": 2.87766695022583, "learning_rate": 1.9639798956103417e-05, "loss": 0.7632, "step": 4414 }, { "epoch": 0.11, "grad_norm": 4.93872594833374, "learning_rate": 1.9639578125545767e-05, "loss": 0.6952, "step": 4415 }, { "epoch": 0.11, "grad_norm": 2.2629892826080322, "learning_rate": 1.96393572285582e-05, "loss": 0.7803, "step": 4416 }, { "epoch": 0.11, "grad_norm": 2.933464765548706, "learning_rate": 1.9639136265142236e-05, "loss": 0.7957, "step": 4417 }, { "epoch": 0.11, "grad_norm": 2.730969190597534, "learning_rate": 1.9638915235299404e-05, "loss": 0.6598, "step": 4418 }, { "epoch": 0.11, "grad_norm": 7.87911319732666, "learning_rate": 1.963869413903122e-05, "loss": 0.7341, "step": 4419 }, { "epoch": 0.11, "grad_norm": 1.813786506652832, "learning_rate": 1.9638472976339217e-05, "loss": 0.6604, "step": 4420 }, { "epoch": 0.11, "grad_norm": 2.491192579269409, "learning_rate": 1.963825174722491e-05, "loss": 0.5525, "step": 4421 }, { "epoch": 0.11, "grad_norm": 4.0143256187438965, "learning_rate": 1.963803045168983e-05, "loss": 0.6806, "step": 4422 }, { "epoch": 0.11, "grad_norm": 3.107372999191284, "learning_rate": 1.96378090897355e-05, "loss": 0.8314, "step": 4423 }, { "epoch": 0.11, "grad_norm": 2.4441561698913574, "learning_rate": 1.963758766136344e-05, "loss": 0.4753, "step": 4424 }, { "epoch": 0.11, "grad_norm": 1.8922899961471558, "learning_rate": 1.9637366166575185e-05, "loss": 0.7446, "step": 4425 }, { "epoch": 0.11, "grad_norm": 1.9517844915390015, "learning_rate": 1.9637144605372254e-05, "loss": 0.6488, "step": 4426 }, { "epoch": 0.11, "grad_norm": 2.7375898361206055, "learning_rate": 1.963692297775618e-05, "loss": 0.6783, "step": 4427 }, { "epoch": 0.11, "grad_norm": 7.657415390014648, "learning_rate": 1.9636701283728486e-05, "loss": 0.6826, "step": 4428 }, { "epoch": 0.11, "grad_norm": 2.7768781185150146, "learning_rate": 1.96364795232907e-05, "loss": 0.5479, "step": 4429 }, { "epoch": 0.11, "grad_norm": 2.6031136512756348, "learning_rate": 1.9636257696444353e-05, "loss": 0.7239, "step": 4430 }, { "epoch": 0.11, "grad_norm": 2.8015177249908447, "learning_rate": 1.963603580319097e-05, "loss": 0.535, "step": 4431 }, { "epoch": 0.11, "grad_norm": 1.7473455667495728, "learning_rate": 1.9635813843532087e-05, "loss": 0.7902, "step": 4432 }, { "epoch": 0.11, "grad_norm": 1.8237465620040894, "learning_rate": 1.9635591817469225e-05, "loss": 0.4904, "step": 4433 }, { "epoch": 0.11, "grad_norm": 4.811523914337158, "learning_rate": 1.963536972500392e-05, "loss": 0.7907, "step": 4434 }, { "epoch": 0.11, "grad_norm": 4.266890525817871, "learning_rate": 1.9635147566137696e-05, "loss": 0.8356, "step": 4435 }, { "epoch": 0.11, "grad_norm": 2.6140847206115723, "learning_rate": 1.963492534087209e-05, "loss": 0.5785, "step": 4436 }, { "epoch": 0.11, "grad_norm": 1.8826124668121338, "learning_rate": 1.9634703049208635e-05, "loss": 0.7113, "step": 4437 }, { "epoch": 0.11, "grad_norm": 2.7595248222351074, "learning_rate": 1.9634480691148858e-05, "loss": 0.6333, "step": 4438 }, { "epoch": 0.11, "grad_norm": 2.120838165283203, "learning_rate": 1.963425826669429e-05, "loss": 0.6079, "step": 4439 }, { "epoch": 0.11, "grad_norm": 5.988911151885986, "learning_rate": 1.9634035775846473e-05, "loss": 0.549, "step": 4440 }, { "epoch": 0.11, "grad_norm": 3.1569321155548096, "learning_rate": 1.963381321860693e-05, "loss": 0.5698, "step": 4441 }, { "epoch": 0.11, "grad_norm": 1.7359193563461304, "learning_rate": 1.96335905949772e-05, "loss": 0.6154, "step": 4442 }, { "epoch": 0.11, "grad_norm": 3.240949869155884, "learning_rate": 1.9633367904958814e-05, "loss": 0.7231, "step": 4443 }, { "epoch": 0.11, "grad_norm": 2.634248971939087, "learning_rate": 1.963314514855331e-05, "loss": 0.7625, "step": 4444 }, { "epoch": 0.11, "grad_norm": 1.9995274543762207, "learning_rate": 1.9632922325762224e-05, "loss": 0.6085, "step": 4445 }, { "epoch": 0.11, "grad_norm": 2.6439547538757324, "learning_rate": 1.9632699436587086e-05, "loss": 0.7474, "step": 4446 }, { "epoch": 0.11, "grad_norm": 6.703827381134033, "learning_rate": 1.9632476481029438e-05, "loss": 0.7891, "step": 4447 }, { "epoch": 0.11, "grad_norm": 4.2870965003967285, "learning_rate": 1.963225345909081e-05, "loss": 0.6579, "step": 4448 }, { "epoch": 0.11, "grad_norm": 2.7160160541534424, "learning_rate": 1.9632030370772745e-05, "loss": 0.816, "step": 4449 }, { "epoch": 0.11, "grad_norm": 3.5660476684570312, "learning_rate": 1.963180721607678e-05, "loss": 0.6908, "step": 4450 }, { "epoch": 0.11, "grad_norm": 1.8197983503341675, "learning_rate": 1.9631583995004447e-05, "loss": 0.675, "step": 4451 }, { "epoch": 0.11, "grad_norm": 2.784877061843872, "learning_rate": 1.963136070755729e-05, "loss": 0.8115, "step": 4452 }, { "epoch": 0.11, "grad_norm": 3.5277650356292725, "learning_rate": 1.963113735373685e-05, "loss": 0.7074, "step": 4453 }, { "epoch": 0.11, "grad_norm": 2.322417974472046, "learning_rate": 1.9630913933544658e-05, "loss": 0.6142, "step": 4454 }, { "epoch": 0.11, "grad_norm": 4.885661602020264, "learning_rate": 1.9630690446982255e-05, "loss": 0.729, "step": 4455 }, { "epoch": 0.11, "grad_norm": 2.6492812633514404, "learning_rate": 1.9630466894051192e-05, "loss": 0.7147, "step": 4456 }, { "epoch": 0.11, "grad_norm": 2.7997899055480957, "learning_rate": 1.9630243274752997e-05, "loss": 0.6073, "step": 4457 }, { "epoch": 0.11, "grad_norm": 1.6912742853164673, "learning_rate": 1.9630019589089214e-05, "loss": 0.6634, "step": 4458 }, { "epoch": 0.11, "grad_norm": 3.0029706954956055, "learning_rate": 1.962979583706139e-05, "loss": 0.7715, "step": 4459 }, { "epoch": 0.11, "grad_norm": 2.6219794750213623, "learning_rate": 1.9629572018671062e-05, "loss": 0.7278, "step": 4460 }, { "epoch": 0.11, "grad_norm": 3.159829616546631, "learning_rate": 1.962934813391977e-05, "loss": 0.7009, "step": 4461 }, { "epoch": 0.11, "grad_norm": 3.1302037239074707, "learning_rate": 1.9629124182809064e-05, "loss": 0.5872, "step": 4462 }, { "epoch": 0.11, "grad_norm": 4.810798168182373, "learning_rate": 1.9628900165340483e-05, "loss": 0.7649, "step": 4463 }, { "epoch": 0.11, "grad_norm": 2.992616891860962, "learning_rate": 1.9628676081515573e-05, "loss": 0.7775, "step": 4464 }, { "epoch": 0.11, "grad_norm": 3.145634651184082, "learning_rate": 1.9628451931335876e-05, "loss": 0.726, "step": 4465 }, { "epoch": 0.11, "grad_norm": 2.1033122539520264, "learning_rate": 1.9628227714802935e-05, "loss": 0.5215, "step": 4466 }, { "epoch": 0.11, "grad_norm": 1.8891178369522095, "learning_rate": 1.96280034319183e-05, "loss": 0.6987, "step": 4467 }, { "epoch": 0.11, "grad_norm": 2.728214740753174, "learning_rate": 1.9627779082683512e-05, "loss": 0.761, "step": 4468 }, { "epoch": 0.11, "grad_norm": 2.8791232109069824, "learning_rate": 1.9627554667100123e-05, "loss": 0.6705, "step": 4469 }, { "epoch": 0.11, "grad_norm": 2.200054883956909, "learning_rate": 1.9627330185169672e-05, "loss": 0.7035, "step": 4470 }, { "epoch": 0.11, "grad_norm": 2.0709164142608643, "learning_rate": 1.9627105636893713e-05, "loss": 0.7102, "step": 4471 }, { "epoch": 0.11, "grad_norm": 2.8742785453796387, "learning_rate": 1.962688102227379e-05, "loss": 0.6206, "step": 4472 }, { "epoch": 0.11, "grad_norm": 8.461350440979004, "learning_rate": 1.962665634131145e-05, "loss": 0.6474, "step": 4473 }, { "epoch": 0.11, "grad_norm": 3.9046976566314697, "learning_rate": 1.962643159400824e-05, "loss": 0.6293, "step": 4474 }, { "epoch": 0.11, "grad_norm": 2.387834310531616, "learning_rate": 1.9626206780365716e-05, "loss": 0.6659, "step": 4475 }, { "epoch": 0.11, "grad_norm": 1.306858777999878, "learning_rate": 1.962598190038542e-05, "loss": 0.5538, "step": 4476 }, { "epoch": 0.11, "grad_norm": 2.410552740097046, "learning_rate": 1.9625756954068907e-05, "loss": 0.6786, "step": 4477 }, { "epoch": 0.11, "grad_norm": 2.346235752105713, "learning_rate": 1.9625531941417722e-05, "loss": 0.5882, "step": 4478 }, { "epoch": 0.11, "grad_norm": 2.7398481369018555, "learning_rate": 1.962530686243342e-05, "loss": 0.5874, "step": 4479 }, { "epoch": 0.11, "grad_norm": 1.7973649501800537, "learning_rate": 1.962508171711755e-05, "loss": 0.6865, "step": 4480 }, { "epoch": 0.11, "grad_norm": 2.4074671268463135, "learning_rate": 1.962485650547166e-05, "loss": 0.6436, "step": 4481 }, { "epoch": 0.11, "grad_norm": 1.430604100227356, "learning_rate": 1.962463122749731e-05, "loss": 0.6487, "step": 4482 }, { "epoch": 0.11, "grad_norm": 5.40153169631958, "learning_rate": 1.9624405883196048e-05, "loss": 0.8046, "step": 4483 }, { "epoch": 0.11, "grad_norm": 2.5997843742370605, "learning_rate": 1.9624180472569425e-05, "loss": 0.6118, "step": 4484 }, { "epoch": 0.11, "grad_norm": 2.6417646408081055, "learning_rate": 1.9623954995619e-05, "loss": 0.6278, "step": 4485 }, { "epoch": 0.11, "grad_norm": 4.904764175415039, "learning_rate": 1.962372945234632e-05, "loss": 0.7579, "step": 4486 }, { "epoch": 0.12, "grad_norm": 2.6190054416656494, "learning_rate": 1.962350384275295e-05, "loss": 0.6888, "step": 4487 }, { "epoch": 0.12, "grad_norm": 2.475085735321045, "learning_rate": 1.962327816684043e-05, "loss": 0.7864, "step": 4488 }, { "epoch": 0.12, "grad_norm": 1.596673607826233, "learning_rate": 1.9623052424610322e-05, "loss": 0.7476, "step": 4489 }, { "epoch": 0.12, "grad_norm": 2.0590453147888184, "learning_rate": 1.962282661606419e-05, "loss": 0.6009, "step": 4490 }, { "epoch": 0.12, "grad_norm": 2.0750832557678223, "learning_rate": 1.9622600741203574e-05, "loss": 0.7381, "step": 4491 }, { "epoch": 0.12, "grad_norm": 3.511776924133301, "learning_rate": 1.9622374800030045e-05, "loss": 0.6047, "step": 4492 }, { "epoch": 0.12, "grad_norm": 5.2954230308532715, "learning_rate": 1.9622148792545152e-05, "loss": 0.6935, "step": 4493 }, { "epoch": 0.12, "grad_norm": 1.5687748193740845, "learning_rate": 1.9621922718750457e-05, "loss": 0.7385, "step": 4494 }, { "epoch": 0.12, "grad_norm": 1.5717207193374634, "learning_rate": 1.962169657864751e-05, "loss": 0.603, "step": 4495 }, { "epoch": 0.12, "grad_norm": 3.3062679767608643, "learning_rate": 1.962147037223788e-05, "loss": 0.671, "step": 4496 }, { "epoch": 0.12, "grad_norm": 2.2748801708221436, "learning_rate": 1.962124409952312e-05, "loss": 0.5377, "step": 4497 }, { "epoch": 0.12, "grad_norm": 1.836353063583374, "learning_rate": 1.9621017760504792e-05, "loss": 0.5184, "step": 4498 }, { "epoch": 0.12, "grad_norm": 3.1853554248809814, "learning_rate": 1.962079135518445e-05, "loss": 0.68, "step": 4499 }, { "epoch": 0.12, "grad_norm": 4.404201507568359, "learning_rate": 1.9620564883563664e-05, "loss": 0.6211, "step": 4500 }, { "epoch": 0.12, "grad_norm": 2.7916953563690186, "learning_rate": 1.9620338345643984e-05, "loss": 0.7284, "step": 4501 }, { "epoch": 0.12, "grad_norm": 1.6310451030731201, "learning_rate": 1.9620111741426982e-05, "loss": 0.5083, "step": 4502 }, { "epoch": 0.12, "grad_norm": 5.99937629699707, "learning_rate": 1.961988507091421e-05, "loss": 0.8379, "step": 4503 }, { "epoch": 0.12, "grad_norm": 6.5200371742248535, "learning_rate": 1.9619658334107233e-05, "loss": 0.6387, "step": 4504 }, { "epoch": 0.12, "grad_norm": 2.2626309394836426, "learning_rate": 1.9619431531007613e-05, "loss": 1.0082, "step": 4505 }, { "epoch": 0.12, "grad_norm": 2.917039155960083, "learning_rate": 1.9619204661616918e-05, "loss": 0.7268, "step": 4506 }, { "epoch": 0.12, "grad_norm": 1.8124936819076538, "learning_rate": 1.9618977725936706e-05, "loss": 0.7117, "step": 4507 }, { "epoch": 0.12, "grad_norm": 1.9642959833145142, "learning_rate": 1.9618750723968546e-05, "loss": 0.7143, "step": 4508 }, { "epoch": 0.12, "grad_norm": 2.386688470840454, "learning_rate": 1.9618523655713993e-05, "loss": 0.8265, "step": 4509 }, { "epoch": 0.12, "grad_norm": 3.6667683124542236, "learning_rate": 1.9618296521174623e-05, "loss": 0.7037, "step": 4510 }, { "epoch": 0.12, "grad_norm": 2.8021905422210693, "learning_rate": 1.9618069320351997e-05, "loss": 0.6661, "step": 4511 }, { "epoch": 0.12, "grad_norm": 2.0519397258758545, "learning_rate": 1.9617842053247675e-05, "loss": 0.7378, "step": 4512 }, { "epoch": 0.12, "grad_norm": 2.122846841812134, "learning_rate": 1.961761471986323e-05, "loss": 0.7343, "step": 4513 }, { "epoch": 0.12, "grad_norm": 4.160940647125244, "learning_rate": 1.961738732020023e-05, "loss": 0.723, "step": 4514 }, { "epoch": 0.12, "grad_norm": 1.6979166269302368, "learning_rate": 1.9617159854260236e-05, "loss": 0.6705, "step": 4515 }, { "epoch": 0.12, "grad_norm": 9.835819244384766, "learning_rate": 1.9616932322044816e-05, "loss": 0.8053, "step": 4516 }, { "epoch": 0.12, "grad_norm": 2.8374197483062744, "learning_rate": 1.9616704723555543e-05, "loss": 0.6011, "step": 4517 }, { "epoch": 0.12, "grad_norm": 4.550479888916016, "learning_rate": 1.9616477058793985e-05, "loss": 0.7277, "step": 4518 }, { "epoch": 0.12, "grad_norm": 4.680670261383057, "learning_rate": 1.9616249327761704e-05, "loss": 0.7052, "step": 4519 }, { "epoch": 0.12, "grad_norm": 3.04242205619812, "learning_rate": 1.9616021530460276e-05, "loss": 0.6935, "step": 4520 }, { "epoch": 0.12, "grad_norm": 7.0370402336120605, "learning_rate": 1.9615793666891273e-05, "loss": 0.7794, "step": 4521 }, { "epoch": 0.12, "grad_norm": 1.9917656183242798, "learning_rate": 1.9615565737056256e-05, "loss": 0.7548, "step": 4522 }, { "epoch": 0.12, "grad_norm": 4.272830486297607, "learning_rate": 1.9615337740956805e-05, "loss": 0.7401, "step": 4523 }, { "epoch": 0.12, "grad_norm": 5.087112903594971, "learning_rate": 1.9615109678594485e-05, "loss": 0.6626, "step": 4524 }, { "epoch": 0.12, "grad_norm": 2.029435157775879, "learning_rate": 1.961488154997087e-05, "loss": 0.6259, "step": 4525 }, { "epoch": 0.12, "grad_norm": 1.135124683380127, "learning_rate": 1.961465335508753e-05, "loss": 0.5357, "step": 4526 }, { "epoch": 0.12, "grad_norm": 3.1668827533721924, "learning_rate": 1.9614425093946042e-05, "loss": 0.5669, "step": 4527 }, { "epoch": 0.12, "grad_norm": 2.5046796798706055, "learning_rate": 1.9614196766547977e-05, "loss": 0.6219, "step": 4528 }, { "epoch": 0.12, "grad_norm": 2.9683210849761963, "learning_rate": 1.9613968372894906e-05, "loss": 0.7701, "step": 4529 }, { "epoch": 0.12, "grad_norm": 1.6414051055908203, "learning_rate": 1.9613739912988406e-05, "loss": 0.5756, "step": 4530 }, { "epoch": 0.12, "grad_norm": 7.443824768066406, "learning_rate": 1.961351138683005e-05, "loss": 0.9245, "step": 4531 }, { "epoch": 0.12, "grad_norm": 2.3416364192962646, "learning_rate": 1.961328279442142e-05, "loss": 0.6864, "step": 4532 }, { "epoch": 0.12, "grad_norm": 2.2254416942596436, "learning_rate": 1.9613054135764075e-05, "loss": 0.6823, "step": 4533 }, { "epoch": 0.12, "grad_norm": 2.108536958694458, "learning_rate": 1.9612825410859607e-05, "loss": 0.5801, "step": 4534 }, { "epoch": 0.12, "grad_norm": 1.3955384492874146, "learning_rate": 1.9612596619709584e-05, "loss": 0.5097, "step": 4535 }, { "epoch": 0.12, "grad_norm": 2.2956173419952393, "learning_rate": 1.961236776231558e-05, "loss": 0.877, "step": 4536 }, { "epoch": 0.12, "grad_norm": 2.559645175933838, "learning_rate": 1.961213883867918e-05, "loss": 0.7026, "step": 4537 }, { "epoch": 0.12, "grad_norm": 5.505665302276611, "learning_rate": 1.961190984880196e-05, "loss": 0.7541, "step": 4538 }, { "epoch": 0.12, "grad_norm": 2.8177855014801025, "learning_rate": 1.9611680792685493e-05, "loss": 0.5432, "step": 4539 }, { "epoch": 0.12, "grad_norm": 3.3592262268066406, "learning_rate": 1.961145167033136e-05, "loss": 0.5323, "step": 4540 }, { "epoch": 0.12, "grad_norm": 3.3286147117614746, "learning_rate": 1.9611222481741142e-05, "loss": 0.6928, "step": 4541 }, { "epoch": 0.12, "grad_norm": 2.2448060512542725, "learning_rate": 1.961099322691642e-05, "loss": 0.5996, "step": 4542 }, { "epoch": 0.12, "grad_norm": 2.4485316276550293, "learning_rate": 1.9610763905858766e-05, "loss": 0.8589, "step": 4543 }, { "epoch": 0.12, "grad_norm": 4.471728801727295, "learning_rate": 1.9610534518569767e-05, "loss": 0.8639, "step": 4544 }, { "epoch": 0.12, "grad_norm": 4.5138373374938965, "learning_rate": 1.9610305065051003e-05, "loss": 0.7115, "step": 4545 }, { "epoch": 0.12, "grad_norm": 2.089393138885498, "learning_rate": 1.9610075545304052e-05, "loss": 0.5934, "step": 4546 }, { "epoch": 0.12, "grad_norm": 3.355408191680908, "learning_rate": 1.96098459593305e-05, "loss": 0.6176, "step": 4547 }, { "epoch": 0.12, "grad_norm": 2.1352994441986084, "learning_rate": 1.9609616307131927e-05, "loss": 0.6998, "step": 4548 }, { "epoch": 0.12, "grad_norm": 8.895397186279297, "learning_rate": 1.960938658870992e-05, "loss": 0.7562, "step": 4549 }, { "epoch": 0.12, "grad_norm": 3.45345401763916, "learning_rate": 1.960915680406605e-05, "loss": 0.6887, "step": 4550 }, { "epoch": 0.12, "grad_norm": 2.198214530944824, "learning_rate": 1.9608926953201913e-05, "loss": 0.8289, "step": 4551 }, { "epoch": 0.12, "grad_norm": 3.8970184326171875, "learning_rate": 1.9608697036119087e-05, "loss": 0.5414, "step": 4552 }, { "epoch": 0.12, "grad_norm": 4.006371974945068, "learning_rate": 1.960846705281916e-05, "loss": 0.7652, "step": 4553 }, { "epoch": 0.12, "grad_norm": 3.8477513790130615, "learning_rate": 1.960823700330371e-05, "loss": 0.6848, "step": 4554 }, { "epoch": 0.12, "grad_norm": 3.3795907497406006, "learning_rate": 1.960800688757433e-05, "loss": 0.6649, "step": 4555 }, { "epoch": 0.12, "grad_norm": 3.743251085281372, "learning_rate": 1.96077767056326e-05, "loss": 0.7494, "step": 4556 }, { "epoch": 0.12, "grad_norm": 2.026014566421509, "learning_rate": 1.9607546457480114e-05, "loss": 0.5226, "step": 4557 }, { "epoch": 0.12, "grad_norm": 3.0899109840393066, "learning_rate": 1.9607316143118453e-05, "loss": 0.5055, "step": 4558 }, { "epoch": 0.12, "grad_norm": 2.0889878273010254, "learning_rate": 1.9607085762549203e-05, "loss": 0.6341, "step": 4559 }, { "epoch": 0.12, "grad_norm": 2.8926985263824463, "learning_rate": 1.9606855315773954e-05, "loss": 0.567, "step": 4560 }, { "epoch": 0.12, "grad_norm": 3.476759433746338, "learning_rate": 1.9606624802794295e-05, "loss": 0.624, "step": 4561 }, { "epoch": 0.12, "grad_norm": 4.060877799987793, "learning_rate": 1.960639422361181e-05, "loss": 0.5065, "step": 4562 }, { "epoch": 0.12, "grad_norm": 4.222362518310547, "learning_rate": 1.9606163578228093e-05, "loss": 0.8936, "step": 4563 }, { "epoch": 0.12, "grad_norm": 1.7555204629898071, "learning_rate": 1.9605932866644734e-05, "loss": 0.4752, "step": 4564 }, { "epoch": 0.12, "grad_norm": 3.051138162612915, "learning_rate": 1.9605702088863317e-05, "loss": 0.8796, "step": 4565 }, { "epoch": 0.12, "grad_norm": 1.5507030487060547, "learning_rate": 1.9605471244885443e-05, "loss": 0.6235, "step": 4566 }, { "epoch": 0.12, "grad_norm": 1.9289909601211548, "learning_rate": 1.960524033471269e-05, "loss": 0.6515, "step": 4567 }, { "epoch": 0.12, "grad_norm": 2.0761609077453613, "learning_rate": 1.9605009358346657e-05, "loss": 0.6007, "step": 4568 }, { "epoch": 0.12, "grad_norm": 3.9098470211029053, "learning_rate": 1.960477831578893e-05, "loss": 0.5566, "step": 4569 }, { "epoch": 0.12, "grad_norm": 2.390329122543335, "learning_rate": 1.960454720704111e-05, "loss": 0.8566, "step": 4570 }, { "epoch": 0.12, "grad_norm": 2.522676944732666, "learning_rate": 1.9604316032104783e-05, "loss": 0.6473, "step": 4571 }, { "epoch": 0.12, "grad_norm": 2.0752298831939697, "learning_rate": 1.9604084790981542e-05, "loss": 0.6257, "step": 4572 }, { "epoch": 0.12, "grad_norm": 2.4006943702697754, "learning_rate": 1.960385348367299e-05, "loss": 0.6888, "step": 4573 }, { "epoch": 0.12, "grad_norm": 2.7181460857391357, "learning_rate": 1.9603622110180707e-05, "loss": 0.734, "step": 4574 }, { "epoch": 0.12, "grad_norm": 2.227017402648926, "learning_rate": 1.9603390670506296e-05, "loss": 0.6155, "step": 4575 }, { "epoch": 0.12, "grad_norm": 4.009254455566406, "learning_rate": 1.9603159164651347e-05, "loss": 0.7071, "step": 4576 }, { "epoch": 0.12, "grad_norm": 2.5765700340270996, "learning_rate": 1.960292759261746e-05, "loss": 0.6452, "step": 4577 }, { "epoch": 0.12, "grad_norm": 2.1232857704162598, "learning_rate": 1.960269595440623e-05, "loss": 0.6964, "step": 4578 }, { "epoch": 0.12, "grad_norm": 3.314711332321167, "learning_rate": 1.9602464250019253e-05, "loss": 0.6873, "step": 4579 }, { "epoch": 0.12, "grad_norm": 2.1259007453918457, "learning_rate": 1.9602232479458125e-05, "loss": 0.7385, "step": 4580 }, { "epoch": 0.12, "grad_norm": 3.69623064994812, "learning_rate": 1.9602000642724443e-05, "loss": 0.5165, "step": 4581 }, { "epoch": 0.12, "grad_norm": 2.7664859294891357, "learning_rate": 1.9601768739819805e-05, "loss": 0.6292, "step": 4582 }, { "epoch": 0.12, "grad_norm": 4.182775974273682, "learning_rate": 1.960153677074581e-05, "loss": 0.6243, "step": 4583 }, { "epoch": 0.12, "grad_norm": 2.4056546688079834, "learning_rate": 1.9601304735504057e-05, "loss": 0.8178, "step": 4584 }, { "epoch": 0.12, "grad_norm": 4.618160724639893, "learning_rate": 1.960107263409614e-05, "loss": 0.785, "step": 4585 }, { "epoch": 0.12, "grad_norm": 1.9365400075912476, "learning_rate": 1.9600840466523667e-05, "loss": 0.6799, "step": 4586 }, { "epoch": 0.12, "grad_norm": 2.6784698963165283, "learning_rate": 1.960060823278823e-05, "loss": 0.7673, "step": 4587 }, { "epoch": 0.12, "grad_norm": 3.2639856338500977, "learning_rate": 1.960037593289143e-05, "loss": 0.6419, "step": 4588 }, { "epoch": 0.12, "grad_norm": 2.1635282039642334, "learning_rate": 1.960014356683488e-05, "loss": 0.6955, "step": 4589 }, { "epoch": 0.12, "grad_norm": 3.1052424907684326, "learning_rate": 1.9599911134620165e-05, "loss": 0.7878, "step": 4590 }, { "epoch": 0.12, "grad_norm": 1.2827727794647217, "learning_rate": 1.9599678636248894e-05, "loss": 0.6143, "step": 4591 }, { "epoch": 0.12, "grad_norm": 2.714958906173706, "learning_rate": 1.959944607172267e-05, "loss": 0.7145, "step": 4592 }, { "epoch": 0.12, "grad_norm": 2.9507651329040527, "learning_rate": 1.9599213441043092e-05, "loss": 0.6229, "step": 4593 }, { "epoch": 0.12, "grad_norm": 2.2146646976470947, "learning_rate": 1.959898074421177e-05, "loss": 0.7291, "step": 4594 }, { "epoch": 0.12, "grad_norm": 2.746481418609619, "learning_rate": 1.9598747981230303e-05, "loss": 0.607, "step": 4595 }, { "epoch": 0.12, "grad_norm": 2.425132989883423, "learning_rate": 1.9598515152100293e-05, "loss": 0.7921, "step": 4596 }, { "epoch": 0.12, "grad_norm": 2.2647039890289307, "learning_rate": 1.9598282256823347e-05, "loss": 0.7806, "step": 4597 }, { "epoch": 0.12, "grad_norm": 4.122949600219727, "learning_rate": 1.9598049295401072e-05, "loss": 0.5789, "step": 4598 }, { "epoch": 0.12, "grad_norm": 6.43050479888916, "learning_rate": 1.959781626783507e-05, "loss": 0.8658, "step": 4599 }, { "epoch": 0.12, "grad_norm": 3.707318067550659, "learning_rate": 1.959758317412695e-05, "loss": 0.6026, "step": 4600 }, { "epoch": 0.12, "grad_norm": 2.0460894107818604, "learning_rate": 1.9597350014278312e-05, "loss": 0.636, "step": 4601 }, { "epoch": 0.12, "grad_norm": 3.540980577468872, "learning_rate": 1.959711678829077e-05, "loss": 0.7322, "step": 4602 }, { "epoch": 0.12, "grad_norm": 3.9130656719207764, "learning_rate": 1.959688349616593e-05, "loss": 0.7408, "step": 4603 }, { "epoch": 0.12, "grad_norm": 1.6519542932510376, "learning_rate": 1.95966501379054e-05, "loss": 0.6869, "step": 4604 }, { "epoch": 0.12, "grad_norm": 2.3246257305145264, "learning_rate": 1.959641671351078e-05, "loss": 0.6252, "step": 4605 }, { "epoch": 0.12, "grad_norm": 2.6762189865112305, "learning_rate": 1.959618322298369e-05, "loss": 0.7242, "step": 4606 }, { "epoch": 0.12, "grad_norm": 1.5840553045272827, "learning_rate": 1.9595949666325733e-05, "loss": 0.6231, "step": 4607 }, { "epoch": 0.12, "grad_norm": 4.295938968658447, "learning_rate": 1.959571604353852e-05, "loss": 0.6638, "step": 4608 }, { "epoch": 0.12, "grad_norm": 2.2105183601379395, "learning_rate": 1.9595482354623662e-05, "loss": 0.7337, "step": 4609 }, { "epoch": 0.12, "grad_norm": 2.1552865505218506, "learning_rate": 1.9595248599582764e-05, "loss": 0.7707, "step": 4610 }, { "epoch": 0.12, "grad_norm": 7.193402290344238, "learning_rate": 1.9595014778417444e-05, "loss": 0.5899, "step": 4611 }, { "epoch": 0.12, "grad_norm": 1.6837801933288574, "learning_rate": 1.959478089112931e-05, "loss": 0.6835, "step": 4612 }, { "epoch": 0.12, "grad_norm": 5.647593021392822, "learning_rate": 1.9594546937719973e-05, "loss": 0.6847, "step": 4613 }, { "epoch": 0.12, "grad_norm": 1.973013162612915, "learning_rate": 1.959431291819105e-05, "loss": 0.6948, "step": 4614 }, { "epoch": 0.12, "grad_norm": 1.759953498840332, "learning_rate": 1.9594078832544146e-05, "loss": 0.6153, "step": 4615 }, { "epoch": 0.12, "grad_norm": 2.467972755432129, "learning_rate": 1.959384468078088e-05, "loss": 0.6325, "step": 4616 }, { "epoch": 0.12, "grad_norm": 3.1250851154327393, "learning_rate": 1.9593610462902864e-05, "loss": 0.665, "step": 4617 }, { "epoch": 0.12, "grad_norm": 2.1981279850006104, "learning_rate": 1.959337617891171e-05, "loss": 0.6952, "step": 4618 }, { "epoch": 0.12, "grad_norm": 1.9975180625915527, "learning_rate": 1.9593141828809036e-05, "loss": 0.6132, "step": 4619 }, { "epoch": 0.12, "grad_norm": 2.3810079097747803, "learning_rate": 1.9592907412596456e-05, "loss": 0.6968, "step": 4620 }, { "epoch": 0.12, "grad_norm": 4.800151348114014, "learning_rate": 1.9592672930275584e-05, "loss": 0.8547, "step": 4621 }, { "epoch": 0.12, "grad_norm": 2.61891508102417, "learning_rate": 1.959243838184804e-05, "loss": 0.7329, "step": 4622 }, { "epoch": 0.12, "grad_norm": 3.4298477172851562, "learning_rate": 1.959220376731543e-05, "loss": 0.7899, "step": 4623 }, { "epoch": 0.12, "grad_norm": 2.1889092922210693, "learning_rate": 1.9591969086679385e-05, "loss": 0.6583, "step": 4624 }, { "epoch": 0.12, "grad_norm": 1.6786693334579468, "learning_rate": 1.9591734339941512e-05, "loss": 0.7162, "step": 4625 }, { "epoch": 0.12, "grad_norm": 2.2818918228149414, "learning_rate": 1.9591499527103434e-05, "loss": 0.7204, "step": 4626 }, { "epoch": 0.12, "grad_norm": 2.814523935317993, "learning_rate": 1.9591264648166767e-05, "loss": 0.6213, "step": 4627 }, { "epoch": 0.12, "grad_norm": 1.7563834190368652, "learning_rate": 1.9591029703133127e-05, "loss": 0.7176, "step": 4628 }, { "epoch": 0.12, "grad_norm": 1.9516782760620117, "learning_rate": 1.959079469200414e-05, "loss": 0.636, "step": 4629 }, { "epoch": 0.12, "grad_norm": 3.4363183975219727, "learning_rate": 1.959055961478142e-05, "loss": 0.6753, "step": 4630 }, { "epoch": 0.12, "grad_norm": 3.0345921516418457, "learning_rate": 1.9590324471466585e-05, "loss": 0.7498, "step": 4631 }, { "epoch": 0.12, "grad_norm": 3.2106873989105225, "learning_rate": 1.9590089262061264e-05, "loss": 0.8913, "step": 4632 }, { "epoch": 0.12, "grad_norm": 3.6937179565429688, "learning_rate": 1.958985398656707e-05, "loss": 0.6703, "step": 4633 }, { "epoch": 0.12, "grad_norm": 3.749925374984741, "learning_rate": 1.958961864498563e-05, "loss": 0.614, "step": 4634 }, { "epoch": 0.12, "grad_norm": 1.5899673700332642, "learning_rate": 1.958938323731856e-05, "loss": 0.6697, "step": 4635 }, { "epoch": 0.12, "grad_norm": 2.2369396686553955, "learning_rate": 1.958914776356749e-05, "loss": 0.6995, "step": 4636 }, { "epoch": 0.12, "grad_norm": 3.903858184814453, "learning_rate": 1.9588912223734033e-05, "loss": 0.6869, "step": 4637 }, { "epoch": 0.12, "grad_norm": 2.461900472640991, "learning_rate": 1.958867661781982e-05, "loss": 0.629, "step": 4638 }, { "epoch": 0.12, "grad_norm": 5.721465110778809, "learning_rate": 1.958844094582647e-05, "loss": 0.733, "step": 4639 }, { "epoch": 0.12, "grad_norm": 1.7680894136428833, "learning_rate": 1.958820520775561e-05, "loss": 0.6402, "step": 4640 }, { "epoch": 0.12, "grad_norm": 2.2280094623565674, "learning_rate": 1.9587969403608863e-05, "loss": 0.6682, "step": 4641 }, { "epoch": 0.12, "grad_norm": 8.531926155090332, "learning_rate": 1.9587733533387857e-05, "loss": 1.0099, "step": 4642 }, { "epoch": 0.12, "grad_norm": 2.320384979248047, "learning_rate": 1.9587497597094213e-05, "loss": 0.6102, "step": 4643 }, { "epoch": 0.12, "grad_norm": 2.552060127258301, "learning_rate": 1.958726159472956e-05, "loss": 0.5861, "step": 4644 }, { "epoch": 0.12, "grad_norm": 2.5764424800872803, "learning_rate": 1.958702552629552e-05, "loss": 0.5064, "step": 4645 }, { "epoch": 0.12, "grad_norm": 3.6829020977020264, "learning_rate": 1.9586789391793728e-05, "loss": 0.6635, "step": 4646 }, { "epoch": 0.12, "grad_norm": 2.094911813735962, "learning_rate": 1.9586553191225804e-05, "loss": 0.606, "step": 4647 }, { "epoch": 0.12, "grad_norm": 2.423067569732666, "learning_rate": 1.958631692459338e-05, "loss": 0.7258, "step": 4648 }, { "epoch": 0.12, "grad_norm": 3.7710211277008057, "learning_rate": 1.9586080591898083e-05, "loss": 0.7637, "step": 4649 }, { "epoch": 0.12, "grad_norm": 2.1620442867279053, "learning_rate": 1.958584419314154e-05, "loss": 0.6311, "step": 4650 }, { "epoch": 0.12, "grad_norm": 3.321507453918457, "learning_rate": 1.9585607728325376e-05, "loss": 0.62, "step": 4651 }, { "epoch": 0.12, "grad_norm": 4.68124532699585, "learning_rate": 1.958537119745123e-05, "loss": 0.7343, "step": 4652 }, { "epoch": 0.12, "grad_norm": 2.1179497241973877, "learning_rate": 1.958513460052073e-05, "loss": 0.6513, "step": 4653 }, { "epoch": 0.12, "grad_norm": 2.2442262172698975, "learning_rate": 1.9584897937535502e-05, "loss": 0.6733, "step": 4654 }, { "epoch": 0.12, "grad_norm": 1.8272273540496826, "learning_rate": 1.958466120849718e-05, "loss": 0.7191, "step": 4655 }, { "epoch": 0.12, "grad_norm": 5.673636436462402, "learning_rate": 1.9584424413407394e-05, "loss": 0.8383, "step": 4656 }, { "epoch": 0.12, "grad_norm": 3.9981226921081543, "learning_rate": 1.9584187552267778e-05, "loss": 0.6911, "step": 4657 }, { "epoch": 0.12, "grad_norm": 8.122071266174316, "learning_rate": 1.958395062507996e-05, "loss": 0.5612, "step": 4658 }, { "epoch": 0.12, "grad_norm": 1.400197148323059, "learning_rate": 1.958371363184558e-05, "loss": 0.8598, "step": 4659 }, { "epoch": 0.12, "grad_norm": 3.2587268352508545, "learning_rate": 1.9583476572566263e-05, "loss": 0.8345, "step": 4660 }, { "epoch": 0.12, "grad_norm": 8.000726699829102, "learning_rate": 1.9583239447243648e-05, "loss": 0.6559, "step": 4661 }, { "epoch": 0.12, "grad_norm": 1.0006815195083618, "learning_rate": 1.9583002255879367e-05, "loss": 0.6077, "step": 4662 }, { "epoch": 0.12, "grad_norm": 3.2813632488250732, "learning_rate": 1.9582764998475055e-05, "loss": 0.6888, "step": 4663 }, { "epoch": 0.12, "grad_norm": 4.028326988220215, "learning_rate": 1.9582527675032348e-05, "loss": 0.9758, "step": 4664 }, { "epoch": 0.12, "grad_norm": 5.1704912185668945, "learning_rate": 1.958229028555288e-05, "loss": 0.834, "step": 4665 }, { "epoch": 0.12, "grad_norm": 2.31321120262146, "learning_rate": 1.9582052830038284e-05, "loss": 0.7632, "step": 4666 }, { "epoch": 0.12, "grad_norm": 1.2332996129989624, "learning_rate": 1.9581815308490206e-05, "loss": 0.5335, "step": 4667 }, { "epoch": 0.12, "grad_norm": 3.0008633136749268, "learning_rate": 1.9581577720910273e-05, "loss": 0.7679, "step": 4668 }, { "epoch": 0.12, "grad_norm": 3.172516345977783, "learning_rate": 1.9581340067300126e-05, "loss": 0.7802, "step": 4669 }, { "epoch": 0.12, "grad_norm": 2.197058916091919, "learning_rate": 1.9581102347661404e-05, "loss": 0.7028, "step": 4670 }, { "epoch": 0.12, "grad_norm": 3.109066963195801, "learning_rate": 1.9580864561995745e-05, "loss": 0.7969, "step": 4671 }, { "epoch": 0.12, "grad_norm": 3.9971768856048584, "learning_rate": 1.9580626710304785e-05, "loss": 0.704, "step": 4672 }, { "epoch": 0.12, "grad_norm": 3.190124273300171, "learning_rate": 1.9580388792590166e-05, "loss": 0.6065, "step": 4673 }, { "epoch": 0.12, "grad_norm": 2.454331398010254, "learning_rate": 1.9580150808853526e-05, "loss": 0.6304, "step": 4674 }, { "epoch": 0.12, "grad_norm": 1.592105746269226, "learning_rate": 1.9579912759096506e-05, "loss": 0.7276, "step": 4675 }, { "epoch": 0.12, "grad_norm": 1.9695961475372314, "learning_rate": 1.9579674643320745e-05, "loss": 0.6734, "step": 4676 }, { "epoch": 0.12, "grad_norm": 5.532238960266113, "learning_rate": 1.9579436461527886e-05, "loss": 0.7305, "step": 4677 }, { "epoch": 0.12, "grad_norm": 2.014498233795166, "learning_rate": 1.9579198213719567e-05, "loss": 0.5393, "step": 4678 }, { "epoch": 0.12, "grad_norm": 3.052093982696533, "learning_rate": 1.9578959899897434e-05, "loss": 0.6107, "step": 4679 }, { "epoch": 0.12, "grad_norm": 3.2539052963256836, "learning_rate": 1.9578721520063126e-05, "loss": 0.6367, "step": 4680 }, { "epoch": 0.12, "grad_norm": 2.472942590713501, "learning_rate": 1.957848307421829e-05, "loss": 0.8149, "step": 4681 }, { "epoch": 0.12, "grad_norm": 2.563289165496826, "learning_rate": 1.9578244562364563e-05, "loss": 0.5548, "step": 4682 }, { "epoch": 0.12, "grad_norm": 2.7064459323883057, "learning_rate": 1.9578005984503594e-05, "loss": 0.6103, "step": 4683 }, { "epoch": 0.12, "grad_norm": 2.9731688499450684, "learning_rate": 1.9577767340637028e-05, "loss": 0.6645, "step": 4684 }, { "epoch": 0.12, "grad_norm": 3.0355989933013916, "learning_rate": 1.9577528630766503e-05, "loss": 0.7246, "step": 4685 }, { "epoch": 0.12, "grad_norm": 2.15592098236084, "learning_rate": 1.957728985489367e-05, "loss": 0.6103, "step": 4686 }, { "epoch": 0.12, "grad_norm": 1.6872832775115967, "learning_rate": 1.957705101302017e-05, "loss": 0.7915, "step": 4687 }, { "epoch": 0.12, "grad_norm": 3.775085687637329, "learning_rate": 1.9576812105147653e-05, "loss": 0.7189, "step": 4688 }, { "epoch": 0.12, "grad_norm": 1.7583101987838745, "learning_rate": 1.957657313127776e-05, "loss": 0.7188, "step": 4689 }, { "epoch": 0.12, "grad_norm": 4.280486583709717, "learning_rate": 1.957633409141215e-05, "loss": 0.6883, "step": 4690 }, { "epoch": 0.12, "grad_norm": 2.3571412563323975, "learning_rate": 1.9576094985552454e-05, "loss": 0.6535, "step": 4691 }, { "epoch": 0.12, "grad_norm": 2.218325614929199, "learning_rate": 1.957585581370033e-05, "loss": 0.5747, "step": 4692 }, { "epoch": 0.12, "grad_norm": 4.193375587463379, "learning_rate": 1.9575616575857426e-05, "loss": 0.6707, "step": 4693 }, { "epoch": 0.12, "grad_norm": 2.3148889541625977, "learning_rate": 1.9575377272025386e-05, "loss": 0.6729, "step": 4694 }, { "epoch": 0.12, "grad_norm": 3.90397572517395, "learning_rate": 1.957513790220586e-05, "loss": 0.5918, "step": 4695 }, { "epoch": 0.12, "grad_norm": 1.8891433477401733, "learning_rate": 1.9574898466400504e-05, "loss": 0.8041, "step": 4696 }, { "epoch": 0.12, "grad_norm": 2.9196808338165283, "learning_rate": 1.9574658964610962e-05, "loss": 0.7562, "step": 4697 }, { "epoch": 0.12, "grad_norm": 3.624354839324951, "learning_rate": 1.9574419396838885e-05, "loss": 0.6125, "step": 4698 }, { "epoch": 0.12, "grad_norm": 2.948660135269165, "learning_rate": 1.9574179763085923e-05, "loss": 0.6347, "step": 4699 }, { "epoch": 0.12, "grad_norm": 3.5373804569244385, "learning_rate": 1.9573940063353733e-05, "loss": 0.7281, "step": 4700 }, { "epoch": 0.12, "grad_norm": 1.8610094785690308, "learning_rate": 1.957370029764396e-05, "loss": 0.6073, "step": 4701 }, { "epoch": 0.12, "grad_norm": 1.8458774089813232, "learning_rate": 1.9573460465958258e-05, "loss": 0.6303, "step": 4702 }, { "epoch": 0.12, "grad_norm": 2.7483432292938232, "learning_rate": 1.9573220568298286e-05, "loss": 0.7537, "step": 4703 }, { "epoch": 0.12, "grad_norm": 1.7320013046264648, "learning_rate": 1.957298060466569e-05, "loss": 0.7145, "step": 4704 }, { "epoch": 0.12, "grad_norm": 5.652550220489502, "learning_rate": 1.9572740575062122e-05, "loss": 0.9495, "step": 4705 }, { "epoch": 0.12, "grad_norm": 1.9236239194869995, "learning_rate": 1.9572500479489247e-05, "loss": 0.6617, "step": 4706 }, { "epoch": 0.12, "grad_norm": 1.4283370971679688, "learning_rate": 1.9572260317948707e-05, "loss": 0.7213, "step": 4707 }, { "epoch": 0.12, "grad_norm": 3.6821773052215576, "learning_rate": 1.9572020090442166e-05, "loss": 0.8539, "step": 4708 }, { "epoch": 0.12, "grad_norm": 2.75419545173645, "learning_rate": 1.9571779796971278e-05, "loss": 0.7849, "step": 4709 }, { "epoch": 0.12, "grad_norm": 2.2920148372650146, "learning_rate": 1.9571539437537695e-05, "loss": 0.6151, "step": 4710 }, { "epoch": 0.12, "grad_norm": 1.8916280269622803, "learning_rate": 1.9571299012143075e-05, "loss": 0.7228, "step": 4711 }, { "epoch": 0.12, "grad_norm": 2.714918851852417, "learning_rate": 1.9571058520789075e-05, "loss": 0.676, "step": 4712 }, { "epoch": 0.12, "grad_norm": 7.199004650115967, "learning_rate": 1.957081796347735e-05, "loss": 0.7629, "step": 4713 }, { "epoch": 0.12, "grad_norm": 3.7512264251708984, "learning_rate": 1.9570577340209566e-05, "loss": 0.6402, "step": 4714 }, { "epoch": 0.12, "grad_norm": 2.9792444705963135, "learning_rate": 1.9570336650987375e-05, "loss": 0.6414, "step": 4715 }, { "epoch": 0.12, "grad_norm": 1.9099313020706177, "learning_rate": 1.9570095895812437e-05, "loss": 0.7853, "step": 4716 }, { "epoch": 0.12, "grad_norm": 2.3387112617492676, "learning_rate": 1.9569855074686406e-05, "loss": 0.6385, "step": 4717 }, { "epoch": 0.12, "grad_norm": 2.4242515563964844, "learning_rate": 1.9569614187610952e-05, "loss": 0.5971, "step": 4718 }, { "epoch": 0.12, "grad_norm": 3.0989973545074463, "learning_rate": 1.9569373234587725e-05, "loss": 0.7595, "step": 4719 }, { "epoch": 0.12, "grad_norm": 3.191791296005249, "learning_rate": 1.9569132215618393e-05, "loss": 0.5898, "step": 4720 }, { "epoch": 0.12, "grad_norm": 3.9112396240234375, "learning_rate": 1.956889113070461e-05, "loss": 0.6732, "step": 4721 }, { "epoch": 0.12, "grad_norm": 3.295942544937134, "learning_rate": 1.9568649979848044e-05, "loss": 0.6238, "step": 4722 }, { "epoch": 0.12, "grad_norm": 2.889249324798584, "learning_rate": 1.956840876305035e-05, "loss": 0.6023, "step": 4723 }, { "epoch": 0.12, "grad_norm": 3.8685688972473145, "learning_rate": 1.95681674803132e-05, "loss": 0.6499, "step": 4724 }, { "epoch": 0.12, "grad_norm": 2.3939409255981445, "learning_rate": 1.9567926131638248e-05, "loss": 0.7087, "step": 4725 }, { "epoch": 0.12, "grad_norm": 2.411904811859131, "learning_rate": 1.956768471702716e-05, "loss": 0.6157, "step": 4726 }, { "epoch": 0.12, "grad_norm": 2.14278244972229, "learning_rate": 1.95674432364816e-05, "loss": 0.8057, "step": 4727 }, { "epoch": 0.12, "grad_norm": 4.658954620361328, "learning_rate": 1.9567201690003234e-05, "loss": 0.614, "step": 4728 }, { "epoch": 0.12, "grad_norm": 2.525562047958374, "learning_rate": 1.9566960077593725e-05, "loss": 0.6995, "step": 4729 }, { "epoch": 0.12, "grad_norm": 2.0014395713806152, "learning_rate": 1.9566718399254732e-05, "loss": 0.657, "step": 4730 }, { "epoch": 0.12, "grad_norm": 2.9508273601531982, "learning_rate": 1.9566476654987933e-05, "loss": 0.6547, "step": 4731 }, { "epoch": 0.12, "grad_norm": 1.9074808359146118, "learning_rate": 1.9566234844794983e-05, "loss": 0.7859, "step": 4732 }, { "epoch": 0.12, "grad_norm": 2.8845298290252686, "learning_rate": 1.9565992968677555e-05, "loss": 0.6173, "step": 4733 }, { "epoch": 0.12, "grad_norm": 2.43133282661438, "learning_rate": 1.9565751026637313e-05, "loss": 0.7238, "step": 4734 }, { "epoch": 0.12, "grad_norm": 5.759819507598877, "learning_rate": 1.9565509018675923e-05, "loss": 0.7227, "step": 4735 }, { "epoch": 0.12, "grad_norm": 2.14996075630188, "learning_rate": 1.9565266944795055e-05, "loss": 0.5332, "step": 4736 }, { "epoch": 0.12, "grad_norm": 2.185988664627075, "learning_rate": 1.9565024804996378e-05, "loss": 0.6974, "step": 4737 }, { "epoch": 0.12, "grad_norm": 3.257355213165283, "learning_rate": 1.9564782599281557e-05, "loss": 0.744, "step": 4738 }, { "epoch": 0.12, "grad_norm": 3.539450168609619, "learning_rate": 1.9564540327652264e-05, "loss": 0.7572, "step": 4739 }, { "epoch": 0.12, "grad_norm": 2.048161029815674, "learning_rate": 1.956429799011017e-05, "loss": 0.5666, "step": 4740 }, { "epoch": 0.12, "grad_norm": 2.8104264736175537, "learning_rate": 1.9564055586656945e-05, "loss": 0.7095, "step": 4741 }, { "epoch": 0.12, "grad_norm": 1.775496244430542, "learning_rate": 1.9563813117294256e-05, "loss": 0.7293, "step": 4742 }, { "epoch": 0.12, "grad_norm": 3.0771026611328125, "learning_rate": 1.956357058202377e-05, "loss": 0.6821, "step": 4743 }, { "epoch": 0.12, "grad_norm": 3.728097438812256, "learning_rate": 1.9563327980847168e-05, "loss": 0.7088, "step": 4744 }, { "epoch": 0.12, "grad_norm": 2.4493155479431152, "learning_rate": 1.956308531376612e-05, "loss": 0.7018, "step": 4745 }, { "epoch": 0.12, "grad_norm": 2.479581594467163, "learning_rate": 1.9562842580782293e-05, "loss": 0.5433, "step": 4746 }, { "epoch": 0.12, "grad_norm": 3.1621146202087402, "learning_rate": 1.9562599781897366e-05, "loss": 0.7414, "step": 4747 }, { "epoch": 0.12, "grad_norm": 2.367248296737671, "learning_rate": 1.9562356917113008e-05, "loss": 0.6212, "step": 4748 }, { "epoch": 0.12, "grad_norm": 2.4512405395507812, "learning_rate": 1.9562113986430893e-05, "loss": 0.5077, "step": 4749 }, { "epoch": 0.12, "grad_norm": 2.0472404956817627, "learning_rate": 1.9561870989852695e-05, "loss": 0.6658, "step": 4750 }, { "epoch": 0.12, "grad_norm": 1.9173771142959595, "learning_rate": 1.956162792738009e-05, "loss": 0.5868, "step": 4751 }, { "epoch": 0.12, "grad_norm": 2.269308567047119, "learning_rate": 1.9561384799014753e-05, "loss": 0.6426, "step": 4752 }, { "epoch": 0.12, "grad_norm": 2.5565407276153564, "learning_rate": 1.956114160475836e-05, "loss": 0.8119, "step": 4753 }, { "epoch": 0.12, "grad_norm": 4.236024856567383, "learning_rate": 1.9560898344612585e-05, "loss": 0.7606, "step": 4754 }, { "epoch": 0.12, "grad_norm": 2.9816462993621826, "learning_rate": 1.9560655018579106e-05, "loss": 0.6497, "step": 4755 }, { "epoch": 0.12, "grad_norm": 2.220799684524536, "learning_rate": 1.95604116266596e-05, "loss": 0.7647, "step": 4756 }, { "epoch": 0.12, "grad_norm": 1.8026823997497559, "learning_rate": 1.956016816885574e-05, "loss": 0.6303, "step": 4757 }, { "epoch": 0.12, "grad_norm": 2.966709613800049, "learning_rate": 1.9559924645169212e-05, "loss": 0.5308, "step": 4758 }, { "epoch": 0.12, "grad_norm": 1.4996910095214844, "learning_rate": 1.9559681055601685e-05, "loss": 0.8094, "step": 4759 }, { "epoch": 0.12, "grad_norm": 2.205540418624878, "learning_rate": 1.9559437400154847e-05, "loss": 0.4959, "step": 4760 }, { "epoch": 0.12, "grad_norm": 2.640190601348877, "learning_rate": 1.955919367883037e-05, "loss": 0.7602, "step": 4761 }, { "epoch": 0.12, "grad_norm": 2.650235652923584, "learning_rate": 1.9558949891629934e-05, "loss": 0.8626, "step": 4762 }, { "epoch": 0.12, "grad_norm": 3.7810683250427246, "learning_rate": 1.955870603855522e-05, "loss": 0.8206, "step": 4763 }, { "epoch": 0.12, "grad_norm": 4.6010942459106445, "learning_rate": 1.955846211960791e-05, "loss": 0.8531, "step": 4764 }, { "epoch": 0.12, "grad_norm": 1.8295323848724365, "learning_rate": 1.9558218134789686e-05, "loss": 0.452, "step": 4765 }, { "epoch": 0.12, "grad_norm": 2.895488977432251, "learning_rate": 1.9557974084102227e-05, "loss": 0.673, "step": 4766 }, { "epoch": 0.12, "grad_norm": 5.4631757736206055, "learning_rate": 1.9557729967547216e-05, "loss": 0.6223, "step": 4767 }, { "epoch": 0.12, "grad_norm": 2.646094560623169, "learning_rate": 1.9557485785126334e-05, "loss": 0.8309, "step": 4768 }, { "epoch": 0.12, "grad_norm": 2.962327241897583, "learning_rate": 1.9557241536841268e-05, "loss": 0.629, "step": 4769 }, { "epoch": 0.12, "grad_norm": 2.776297092437744, "learning_rate": 1.9556997222693694e-05, "loss": 0.7147, "step": 4770 }, { "epoch": 0.12, "grad_norm": 2.493351936340332, "learning_rate": 1.95567528426853e-05, "loss": 0.7276, "step": 4771 }, { "epoch": 0.12, "grad_norm": 1.9629405736923218, "learning_rate": 1.955650839681777e-05, "loss": 0.5595, "step": 4772 }, { "epoch": 0.12, "grad_norm": 2.6635148525238037, "learning_rate": 1.9556263885092787e-05, "loss": 0.6732, "step": 4773 }, { "epoch": 0.12, "grad_norm": 1.7352397441864014, "learning_rate": 1.955601930751204e-05, "loss": 0.6173, "step": 4774 }, { "epoch": 0.12, "grad_norm": 1.7387468814849854, "learning_rate": 1.955577466407721e-05, "loss": 0.6838, "step": 4775 }, { "epoch": 0.12, "grad_norm": 2.8074333667755127, "learning_rate": 1.9555529954789984e-05, "loss": 0.8223, "step": 4776 }, { "epoch": 0.12, "grad_norm": 2.1040329933166504, "learning_rate": 1.9555285179652047e-05, "loss": 0.6309, "step": 4777 }, { "epoch": 0.12, "grad_norm": 1.3049492835998535, "learning_rate": 1.955504033866509e-05, "loss": 0.5745, "step": 4778 }, { "epoch": 0.12, "grad_norm": 2.2057430744171143, "learning_rate": 1.95547954318308e-05, "loss": 0.69, "step": 4779 }, { "epoch": 0.12, "grad_norm": 2.6327340602874756, "learning_rate": 1.9554550459150862e-05, "loss": 0.7088, "step": 4780 }, { "epoch": 0.12, "grad_norm": 4.0368852615356445, "learning_rate": 1.9554305420626964e-05, "loss": 0.6216, "step": 4781 }, { "epoch": 0.12, "grad_norm": 1.3339110612869263, "learning_rate": 1.9554060316260798e-05, "loss": 0.7799, "step": 4782 }, { "epoch": 0.12, "grad_norm": 2.3929603099823, "learning_rate": 1.9553815146054047e-05, "loss": 0.7743, "step": 4783 }, { "epoch": 0.12, "grad_norm": 1.6899861097335815, "learning_rate": 1.9553569910008404e-05, "loss": 0.6325, "step": 4784 }, { "epoch": 0.12, "grad_norm": 3.554774045944214, "learning_rate": 1.955332460812556e-05, "loss": 0.6591, "step": 4785 }, { "epoch": 0.12, "grad_norm": 2.932450532913208, "learning_rate": 1.9553079240407205e-05, "loss": 0.4735, "step": 4786 }, { "epoch": 0.12, "grad_norm": 3.111440420150757, "learning_rate": 1.9552833806855034e-05, "loss": 0.7698, "step": 4787 }, { "epoch": 0.12, "grad_norm": 2.7170116901397705, "learning_rate": 1.9552588307470727e-05, "loss": 0.5662, "step": 4788 }, { "epoch": 0.12, "grad_norm": 4.727442741394043, "learning_rate": 1.955234274225599e-05, "loss": 0.5508, "step": 4789 }, { "epoch": 0.12, "grad_norm": 4.377284049987793, "learning_rate": 1.9552097111212506e-05, "loss": 0.6062, "step": 4790 }, { "epoch": 0.12, "grad_norm": 3.28171443939209, "learning_rate": 1.9551851414341966e-05, "loss": 0.7523, "step": 4791 }, { "epoch": 0.12, "grad_norm": 1.8163772821426392, "learning_rate": 1.955160565164607e-05, "loss": 0.6163, "step": 4792 }, { "epoch": 0.12, "grad_norm": 4.390381813049316, "learning_rate": 1.9551359823126508e-05, "loss": 0.8726, "step": 4793 }, { "epoch": 0.12, "grad_norm": 2.342662811279297, "learning_rate": 1.9551113928784975e-05, "loss": 0.8112, "step": 4794 }, { "epoch": 0.12, "grad_norm": 1.6306636333465576, "learning_rate": 1.9550867968623167e-05, "loss": 0.6187, "step": 4795 }, { "epoch": 0.12, "grad_norm": 3.7502989768981934, "learning_rate": 1.9550621942642777e-05, "loss": 0.7476, "step": 4796 }, { "epoch": 0.12, "grad_norm": 5.098234176635742, "learning_rate": 1.9550375850845497e-05, "loss": 0.7643, "step": 4797 }, { "epoch": 0.12, "grad_norm": 2.4763667583465576, "learning_rate": 1.9550129693233032e-05, "loss": 0.5901, "step": 4798 }, { "epoch": 0.12, "grad_norm": 1.4908933639526367, "learning_rate": 1.954988346980707e-05, "loss": 0.782, "step": 4799 }, { "epoch": 0.12, "grad_norm": 3.1929502487182617, "learning_rate": 1.9549637180569313e-05, "loss": 0.8869, "step": 4800 }, { "epoch": 0.12, "grad_norm": 2.802107810974121, "learning_rate": 1.9549390825521453e-05, "loss": 0.8186, "step": 4801 }, { "epoch": 0.12, "grad_norm": 1.8100448846817017, "learning_rate": 1.9549144404665196e-05, "loss": 0.6221, "step": 4802 }, { "epoch": 0.12, "grad_norm": 1.7993369102478027, "learning_rate": 1.954889791800223e-05, "loss": 0.6221, "step": 4803 }, { "epoch": 0.12, "grad_norm": 2.1346590518951416, "learning_rate": 1.954865136553426e-05, "loss": 0.6767, "step": 4804 }, { "epoch": 0.12, "grad_norm": 4.115543842315674, "learning_rate": 1.9548404747262985e-05, "loss": 0.7114, "step": 4805 }, { "epoch": 0.12, "grad_norm": 3.4280927181243896, "learning_rate": 1.9548158063190103e-05, "loss": 0.7005, "step": 4806 }, { "epoch": 0.12, "grad_norm": 2.3386147022247314, "learning_rate": 1.9547911313317318e-05, "loss": 0.7727, "step": 4807 }, { "epoch": 0.12, "grad_norm": 2.2781059741973877, "learning_rate": 1.9547664497646318e-05, "loss": 0.7059, "step": 4808 }, { "epoch": 0.12, "grad_norm": 3.223841667175293, "learning_rate": 1.9547417616178822e-05, "loss": 0.6438, "step": 4809 }, { "epoch": 0.12, "grad_norm": 1.820256233215332, "learning_rate": 1.9547170668916514e-05, "loss": 0.8237, "step": 4810 }, { "epoch": 0.12, "grad_norm": 2.001460552215576, "learning_rate": 1.954692365586111e-05, "loss": 0.7408, "step": 4811 }, { "epoch": 0.12, "grad_norm": 3.1410892009735107, "learning_rate": 1.9546676577014305e-05, "loss": 0.7484, "step": 4812 }, { "epoch": 0.12, "grad_norm": 2.791201114654541, "learning_rate": 1.95464294323778e-05, "loss": 0.7809, "step": 4813 }, { "epoch": 0.12, "grad_norm": 1.8680546283721924, "learning_rate": 1.95461822219533e-05, "loss": 0.6778, "step": 4814 }, { "epoch": 0.12, "grad_norm": 2.4502768516540527, "learning_rate": 1.9545934945742516e-05, "loss": 0.7607, "step": 4815 }, { "epoch": 0.12, "grad_norm": 2.8161604404449463, "learning_rate": 1.954568760374714e-05, "loss": 0.7924, "step": 4816 }, { "epoch": 0.12, "grad_norm": 1.4290201663970947, "learning_rate": 1.9545440195968886e-05, "loss": 0.4986, "step": 4817 }, { "epoch": 0.12, "grad_norm": 4.700461387634277, "learning_rate": 1.9545192722409452e-05, "loss": 0.8246, "step": 4818 }, { "epoch": 0.12, "grad_norm": 2.366804361343384, "learning_rate": 1.9544945183070552e-05, "loss": 0.7337, "step": 4819 }, { "epoch": 0.12, "grad_norm": 3.0494120121002197, "learning_rate": 1.954469757795388e-05, "loss": 0.7089, "step": 4820 }, { "epoch": 0.12, "grad_norm": 1.8449867963790894, "learning_rate": 1.9544449907061152e-05, "loss": 0.653, "step": 4821 }, { "epoch": 0.12, "grad_norm": 2.4843811988830566, "learning_rate": 1.9544202170394074e-05, "loss": 0.8, "step": 4822 }, { "epoch": 0.12, "grad_norm": 4.520292282104492, "learning_rate": 1.9543954367954347e-05, "loss": 0.6547, "step": 4823 }, { "epoch": 0.12, "grad_norm": 2.5224356651306152, "learning_rate": 1.9543706499743683e-05, "loss": 0.6418, "step": 4824 }, { "epoch": 0.12, "grad_norm": 2.1251680850982666, "learning_rate": 1.954345856576379e-05, "loss": 0.6971, "step": 4825 }, { "epoch": 0.12, "grad_norm": 4.099669933319092, "learning_rate": 1.9543210566016377e-05, "loss": 0.8526, "step": 4826 }, { "epoch": 0.12, "grad_norm": 1.6249644756317139, "learning_rate": 1.9542962500503154e-05, "loss": 0.7441, "step": 4827 }, { "epoch": 0.12, "grad_norm": 1.8888274431228638, "learning_rate": 1.9542714369225826e-05, "loss": 0.6845, "step": 4828 }, { "epoch": 0.12, "grad_norm": 2.4701955318450928, "learning_rate": 1.9542466172186106e-05, "loss": 0.5973, "step": 4829 }, { "epoch": 0.12, "grad_norm": 1.9017224311828613, "learning_rate": 1.954221790938571e-05, "loss": 0.6859, "step": 4830 }, { "epoch": 0.12, "grad_norm": 2.2013587951660156, "learning_rate": 1.9541969580826334e-05, "loss": 0.6206, "step": 4831 }, { "epoch": 0.12, "grad_norm": 2.3003058433532715, "learning_rate": 1.9541721186509703e-05, "loss": 0.7096, "step": 4832 }, { "epoch": 0.12, "grad_norm": 2.170931577682495, "learning_rate": 1.9541472726437525e-05, "loss": 0.4668, "step": 4833 }, { "epoch": 0.12, "grad_norm": 2.579230546951294, "learning_rate": 1.954122420061151e-05, "loss": 0.6791, "step": 4834 }, { "epoch": 0.12, "grad_norm": 2.3612020015716553, "learning_rate": 1.9540975609033375e-05, "loss": 0.7233, "step": 4835 }, { "epoch": 0.12, "grad_norm": 2.625307321548462, "learning_rate": 1.9540726951704825e-05, "loss": 0.7202, "step": 4836 }, { "epoch": 0.12, "grad_norm": 3.88307523727417, "learning_rate": 1.9540478228627586e-05, "loss": 0.5959, "step": 4837 }, { "epoch": 0.12, "grad_norm": 2.655641555786133, "learning_rate": 1.954022943980336e-05, "loss": 0.5975, "step": 4838 }, { "epoch": 0.12, "grad_norm": 2.400418758392334, "learning_rate": 1.953998058523387e-05, "loss": 0.6426, "step": 4839 }, { "epoch": 0.12, "grad_norm": 4.627746105194092, "learning_rate": 1.9539731664920823e-05, "loss": 0.6848, "step": 4840 }, { "epoch": 0.12, "grad_norm": 4.799234867095947, "learning_rate": 1.953948267886594e-05, "loss": 0.6478, "step": 4841 }, { "epoch": 0.12, "grad_norm": 2.5800886154174805, "learning_rate": 1.953923362707094e-05, "loss": 0.6666, "step": 4842 }, { "epoch": 0.12, "grad_norm": 1.7102513313293457, "learning_rate": 1.9538984509537528e-05, "loss": 0.7195, "step": 4843 }, { "epoch": 0.12, "grad_norm": 2.285367727279663, "learning_rate": 1.9538735326267434e-05, "loss": 0.734, "step": 4844 }, { "epoch": 0.12, "grad_norm": 2.663081645965576, "learning_rate": 1.9538486077262365e-05, "loss": 0.6173, "step": 4845 }, { "epoch": 0.12, "grad_norm": 3.918076753616333, "learning_rate": 1.9538236762524044e-05, "loss": 0.629, "step": 4846 }, { "epoch": 0.12, "grad_norm": 4.0898966789245605, "learning_rate": 1.9537987382054187e-05, "loss": 0.7297, "step": 4847 }, { "epoch": 0.12, "grad_norm": 2.1335103511810303, "learning_rate": 1.9537737935854515e-05, "loss": 0.6494, "step": 4848 }, { "epoch": 0.12, "grad_norm": 5.402776718139648, "learning_rate": 1.9537488423926744e-05, "loss": 0.7047, "step": 4849 }, { "epoch": 0.12, "grad_norm": 2.236722230911255, "learning_rate": 1.9537238846272593e-05, "loss": 0.6648, "step": 4850 }, { "epoch": 0.12, "grad_norm": 1.846611738204956, "learning_rate": 1.953698920289379e-05, "loss": 0.7018, "step": 4851 }, { "epoch": 0.12, "grad_norm": 3.068157434463501, "learning_rate": 1.953673949379204e-05, "loss": 0.66, "step": 4852 }, { "epoch": 0.12, "grad_norm": 1.665274739265442, "learning_rate": 1.9536489718969082e-05, "loss": 0.6414, "step": 4853 }, { "epoch": 0.12, "grad_norm": 1.2095201015472412, "learning_rate": 1.9536239878426625e-05, "loss": 0.7378, "step": 4854 }, { "epoch": 0.12, "grad_norm": 4.111462593078613, "learning_rate": 1.9535989972166392e-05, "loss": 0.7441, "step": 4855 }, { "epoch": 0.12, "grad_norm": 3.5381956100463867, "learning_rate": 1.9535740000190107e-05, "loss": 0.6742, "step": 4856 }, { "epoch": 0.12, "grad_norm": 1.658524751663208, "learning_rate": 1.9535489962499496e-05, "loss": 0.5891, "step": 4857 }, { "epoch": 0.12, "grad_norm": 3.3748152256011963, "learning_rate": 1.9535239859096274e-05, "loss": 0.7351, "step": 4858 }, { "epoch": 0.12, "grad_norm": 3.600984573364258, "learning_rate": 1.9534989689982175e-05, "loss": 0.7903, "step": 4859 }, { "epoch": 0.12, "grad_norm": 5.507456302642822, "learning_rate": 1.9534739455158915e-05, "loss": 0.8579, "step": 4860 }, { "epoch": 0.12, "grad_norm": 1.8711885213851929, "learning_rate": 1.9534489154628223e-05, "loss": 0.7701, "step": 4861 }, { "epoch": 0.12, "grad_norm": 1.5320323705673218, "learning_rate": 1.953423878839182e-05, "loss": 0.6729, "step": 4862 }, { "epoch": 0.12, "grad_norm": 2.679086685180664, "learning_rate": 1.9533988356451434e-05, "loss": 0.6388, "step": 4863 }, { "epoch": 0.12, "grad_norm": 3.4811477661132812, "learning_rate": 1.953373785880879e-05, "loss": 0.6922, "step": 4864 }, { "epoch": 0.12, "grad_norm": 2.955779790878296, "learning_rate": 1.953348729546561e-05, "loss": 0.7115, "step": 4865 }, { "epoch": 0.12, "grad_norm": 1.3204096555709839, "learning_rate": 1.9533236666423632e-05, "loss": 0.5075, "step": 4866 }, { "epoch": 0.12, "grad_norm": 3.288696765899658, "learning_rate": 1.9532985971684572e-05, "loss": 0.7713, "step": 4867 }, { "epoch": 0.12, "grad_norm": 3.044541120529175, "learning_rate": 1.9532735211250165e-05, "loss": 0.76, "step": 4868 }, { "epoch": 0.12, "grad_norm": 2.098116397857666, "learning_rate": 1.9532484385122133e-05, "loss": 0.6304, "step": 4869 }, { "epoch": 0.12, "grad_norm": 4.5967302322387695, "learning_rate": 1.953223349330221e-05, "loss": 0.7459, "step": 4870 }, { "epoch": 0.12, "grad_norm": 2.089197874069214, "learning_rate": 1.953198253579212e-05, "loss": 0.7181, "step": 4871 }, { "epoch": 0.12, "grad_norm": 2.591716766357422, "learning_rate": 1.9531731512593595e-05, "loss": 0.7342, "step": 4872 }, { "epoch": 0.12, "grad_norm": 2.2780773639678955, "learning_rate": 1.9531480423708367e-05, "loss": 0.6151, "step": 4873 }, { "epoch": 0.12, "grad_norm": 2.0792605876922607, "learning_rate": 1.9531229269138163e-05, "loss": 0.7828, "step": 4874 }, { "epoch": 0.12, "grad_norm": 2.3837461471557617, "learning_rate": 1.9530978048884714e-05, "loss": 0.7413, "step": 4875 }, { "epoch": 0.12, "grad_norm": 2.0469021797180176, "learning_rate": 1.9530726762949756e-05, "loss": 0.5736, "step": 4876 }, { "epoch": 0.13, "grad_norm": 2.5544159412384033, "learning_rate": 1.9530475411335012e-05, "loss": 0.5106, "step": 4877 }, { "epoch": 0.13, "grad_norm": 1.8519407510757446, "learning_rate": 1.953022399404222e-05, "loss": 0.7526, "step": 4878 }, { "epoch": 0.13, "grad_norm": 3.3324790000915527, "learning_rate": 1.9529972511073115e-05, "loss": 0.7705, "step": 4879 }, { "epoch": 0.13, "grad_norm": 3.0735020637512207, "learning_rate": 1.9529720962429425e-05, "loss": 0.7485, "step": 4880 }, { "epoch": 0.13, "grad_norm": 2.3971481323242188, "learning_rate": 1.9529469348112884e-05, "loss": 0.6131, "step": 4881 }, { "epoch": 0.13, "grad_norm": 2.125561475753784, "learning_rate": 1.952921766812523e-05, "loss": 0.6508, "step": 4882 }, { "epoch": 0.13, "grad_norm": 2.458040714263916, "learning_rate": 1.9528965922468194e-05, "loss": 0.7886, "step": 4883 }, { "epoch": 0.13, "grad_norm": 1.9669123888015747, "learning_rate": 1.9528714111143506e-05, "loss": 0.7814, "step": 4884 }, { "epoch": 0.13, "grad_norm": 3.2165093421936035, "learning_rate": 1.9528462234152914e-05, "loss": 0.6332, "step": 4885 }, { "epoch": 0.13, "grad_norm": 2.025905132293701, "learning_rate": 1.9528210291498143e-05, "loss": 0.6434, "step": 4886 }, { "epoch": 0.13, "grad_norm": 2.8672640323638916, "learning_rate": 1.9527958283180934e-05, "loss": 0.6097, "step": 4887 }, { "epoch": 0.13, "grad_norm": 4.030189514160156, "learning_rate": 1.9527706209203026e-05, "loss": 0.7391, "step": 4888 }, { "epoch": 0.13, "grad_norm": 2.709576368331909, "learning_rate": 1.9527454069566148e-05, "loss": 0.7042, "step": 4889 }, { "epoch": 0.13, "grad_norm": 3.5226001739501953, "learning_rate": 1.9527201864272044e-05, "loss": 0.6811, "step": 4890 }, { "epoch": 0.13, "grad_norm": 7.261837959289551, "learning_rate": 1.9526949593322448e-05, "loss": 0.6398, "step": 4891 }, { "epoch": 0.13, "grad_norm": 4.069058895111084, "learning_rate": 1.9526697256719104e-05, "loss": 0.9221, "step": 4892 }, { "epoch": 0.13, "grad_norm": 6.845010280609131, "learning_rate": 1.9526444854463747e-05, "loss": 0.6209, "step": 4893 }, { "epoch": 0.13, "grad_norm": 2.110802173614502, "learning_rate": 1.9526192386558116e-05, "loss": 0.4805, "step": 4894 }, { "epoch": 0.13, "grad_norm": 2.9690773487091064, "learning_rate": 1.9525939853003952e-05, "loss": 0.9239, "step": 4895 }, { "epoch": 0.13, "grad_norm": 2.1574151515960693, "learning_rate": 1.9525687253802997e-05, "loss": 0.529, "step": 4896 }, { "epoch": 0.13, "grad_norm": 2.4414279460906982, "learning_rate": 1.9525434588956988e-05, "loss": 0.5755, "step": 4897 }, { "epoch": 0.13, "grad_norm": 1.5815014839172363, "learning_rate": 1.952518185846767e-05, "loss": 0.548, "step": 4898 }, { "epoch": 0.13, "grad_norm": 4.082975387573242, "learning_rate": 1.9524929062336783e-05, "loss": 0.778, "step": 4899 }, { "epoch": 0.13, "grad_norm": 1.8117820024490356, "learning_rate": 1.9524676200566066e-05, "loss": 0.5403, "step": 4900 }, { "epoch": 0.13, "grad_norm": 4.13193416595459, "learning_rate": 1.952442327315727e-05, "loss": 0.6699, "step": 4901 }, { "epoch": 0.13, "grad_norm": 1.7379651069641113, "learning_rate": 1.952417028011213e-05, "loss": 0.6607, "step": 4902 }, { "epoch": 0.13, "grad_norm": 1.771628499031067, "learning_rate": 1.9523917221432388e-05, "loss": 0.6211, "step": 4903 }, { "epoch": 0.13, "grad_norm": 3.892784833908081, "learning_rate": 1.9523664097119797e-05, "loss": 0.5892, "step": 4904 }, { "epoch": 0.13, "grad_norm": 2.8985648155212402, "learning_rate": 1.9523410907176096e-05, "loss": 0.7952, "step": 4905 }, { "epoch": 0.13, "grad_norm": 1.9367070198059082, "learning_rate": 1.9523157651603028e-05, "loss": 0.7479, "step": 4906 }, { "epoch": 0.13, "grad_norm": 1.7515268325805664, "learning_rate": 1.952290433040234e-05, "loss": 0.5154, "step": 4907 }, { "epoch": 0.13, "grad_norm": 1.7452073097229004, "learning_rate": 1.952265094357578e-05, "loss": 0.7647, "step": 4908 }, { "epoch": 0.13, "grad_norm": 4.070102691650391, "learning_rate": 1.9522397491125095e-05, "loss": 0.6664, "step": 4909 }, { "epoch": 0.13, "grad_norm": 2.9726953506469727, "learning_rate": 1.9522143973052024e-05, "loss": 0.5236, "step": 4910 }, { "epoch": 0.13, "grad_norm": 1.9670724868774414, "learning_rate": 1.952189038935832e-05, "loss": 0.5442, "step": 4911 }, { "epoch": 0.13, "grad_norm": 3.703822612762451, "learning_rate": 1.952163674004573e-05, "loss": 0.8982, "step": 4912 }, { "epoch": 0.13, "grad_norm": 6.082249641418457, "learning_rate": 1.9521383025116007e-05, "loss": 0.7125, "step": 4913 }, { "epoch": 0.13, "grad_norm": 2.7660627365112305, "learning_rate": 1.9521129244570888e-05, "loss": 0.78, "step": 4914 }, { "epoch": 0.13, "grad_norm": 5.281984329223633, "learning_rate": 1.9520875398412135e-05, "loss": 0.5913, "step": 4915 }, { "epoch": 0.13, "grad_norm": 2.3945064544677734, "learning_rate": 1.9520621486641484e-05, "loss": 0.582, "step": 4916 }, { "epoch": 0.13, "grad_norm": 2.0771687030792236, "learning_rate": 1.952036750926069e-05, "loss": 0.5452, "step": 4917 }, { "epoch": 0.13, "grad_norm": 3.008681297302246, "learning_rate": 1.9520113466271507e-05, "loss": 0.7911, "step": 4918 }, { "epoch": 0.13, "grad_norm": 1.4219194650650024, "learning_rate": 1.9519859357675685e-05, "loss": 0.6392, "step": 4919 }, { "epoch": 0.13, "grad_norm": 6.128592014312744, "learning_rate": 1.951960518347497e-05, "loss": 0.6701, "step": 4920 }, { "epoch": 0.13, "grad_norm": 5.512735843658447, "learning_rate": 1.9519350943671117e-05, "loss": 0.6703, "step": 4921 }, { "epoch": 0.13, "grad_norm": 2.5065040588378906, "learning_rate": 1.9519096638265882e-05, "loss": 0.7222, "step": 4922 }, { "epoch": 0.13, "grad_norm": 2.5021049976348877, "learning_rate": 1.9518842267261008e-05, "loss": 0.685, "step": 4923 }, { "epoch": 0.13, "grad_norm": 4.586691856384277, "learning_rate": 1.9518587830658256e-05, "loss": 0.6657, "step": 4924 }, { "epoch": 0.13, "grad_norm": 7.095287322998047, "learning_rate": 1.951833332845938e-05, "loss": 0.8162, "step": 4925 }, { "epoch": 0.13, "grad_norm": 4.886776924133301, "learning_rate": 1.9518078760666126e-05, "loss": 0.7805, "step": 4926 }, { "epoch": 0.13, "grad_norm": 2.6173806190490723, "learning_rate": 1.9517824127280253e-05, "loss": 0.7243, "step": 4927 }, { "epoch": 0.13, "grad_norm": 1.7518538236618042, "learning_rate": 1.9517569428303517e-05, "loss": 0.7681, "step": 4928 }, { "epoch": 0.13, "grad_norm": 1.844370722770691, "learning_rate": 1.9517314663737676e-05, "loss": 0.607, "step": 4929 }, { "epoch": 0.13, "grad_norm": 1.7749121189117432, "learning_rate": 1.9517059833584474e-05, "loss": 0.7421, "step": 4930 }, { "epoch": 0.13, "grad_norm": 1.461841106414795, "learning_rate": 1.951680493784568e-05, "loss": 0.6939, "step": 4931 }, { "epoch": 0.13, "grad_norm": 2.1247072219848633, "learning_rate": 1.9516549976523044e-05, "loss": 0.6395, "step": 4932 }, { "epoch": 0.13, "grad_norm": 2.4358572959899902, "learning_rate": 1.9516294949618326e-05, "loss": 0.7975, "step": 4933 }, { "epoch": 0.13, "grad_norm": 2.1908140182495117, "learning_rate": 1.951603985713328e-05, "loss": 0.5776, "step": 4934 }, { "epoch": 0.13, "grad_norm": 1.928451418876648, "learning_rate": 1.9515784699069668e-05, "loss": 0.6455, "step": 4935 }, { "epoch": 0.13, "grad_norm": 2.8718080520629883, "learning_rate": 1.951552947542924e-05, "loss": 0.7821, "step": 4936 }, { "epoch": 0.13, "grad_norm": 2.263214588165283, "learning_rate": 1.9515274186213768e-05, "loss": 0.8139, "step": 4937 }, { "epoch": 0.13, "grad_norm": 2.0654244422912598, "learning_rate": 1.9515018831425004e-05, "loss": 0.7286, "step": 4938 }, { "epoch": 0.13, "grad_norm": 2.63958477973938, "learning_rate": 1.9514763411064707e-05, "loss": 0.7479, "step": 4939 }, { "epoch": 0.13, "grad_norm": 2.3708858489990234, "learning_rate": 1.9514507925134637e-05, "loss": 0.5805, "step": 4940 }, { "epoch": 0.13, "grad_norm": 2.006476879119873, "learning_rate": 1.9514252373636556e-05, "loss": 0.6465, "step": 4941 }, { "epoch": 0.13, "grad_norm": 1.943933367729187, "learning_rate": 1.9513996756572226e-05, "loss": 0.8022, "step": 4942 }, { "epoch": 0.13, "grad_norm": 3.8649022579193115, "learning_rate": 1.951374107394341e-05, "loss": 0.5765, "step": 4943 }, { "epoch": 0.13, "grad_norm": 1.7865042686462402, "learning_rate": 1.951348532575186e-05, "loss": 0.7201, "step": 4944 }, { "epoch": 0.13, "grad_norm": 2.365227222442627, "learning_rate": 1.9513229511999356e-05, "loss": 0.5842, "step": 4945 }, { "epoch": 0.13, "grad_norm": 1.9711568355560303, "learning_rate": 1.9512973632687645e-05, "loss": 0.7373, "step": 4946 }, { "epoch": 0.13, "grad_norm": 1.180811882019043, "learning_rate": 1.9512717687818495e-05, "loss": 0.5188, "step": 4947 }, { "epoch": 0.13, "grad_norm": 2.1888420581817627, "learning_rate": 1.9512461677393678e-05, "loss": 0.5534, "step": 4948 }, { "epoch": 0.13, "grad_norm": 2.65262508392334, "learning_rate": 1.9512205601414944e-05, "loss": 0.6662, "step": 4949 }, { "epoch": 0.13, "grad_norm": 5.705424785614014, "learning_rate": 1.951194945988407e-05, "loss": 0.6959, "step": 4950 }, { "epoch": 0.13, "grad_norm": 2.7078728675842285, "learning_rate": 1.9511693252802813e-05, "loss": 0.5882, "step": 4951 }, { "epoch": 0.13, "grad_norm": 5.059498310089111, "learning_rate": 1.9511436980172943e-05, "loss": 1.0075, "step": 4952 }, { "epoch": 0.13, "grad_norm": 2.5293655395507812, "learning_rate": 1.9511180641996228e-05, "loss": 0.6719, "step": 4953 }, { "epoch": 0.13, "grad_norm": 1.7493489980697632, "learning_rate": 1.9510924238274426e-05, "loss": 0.5801, "step": 4954 }, { "epoch": 0.13, "grad_norm": 1.9424090385437012, "learning_rate": 1.9510667769009316e-05, "loss": 0.5937, "step": 4955 }, { "epoch": 0.13, "grad_norm": 1.8780561685562134, "learning_rate": 1.9510411234202654e-05, "loss": 0.688, "step": 4956 }, { "epoch": 0.13, "grad_norm": 1.1435695886611938, "learning_rate": 1.9510154633856214e-05, "loss": 0.6195, "step": 4957 }, { "epoch": 0.13, "grad_norm": 1.4819875955581665, "learning_rate": 1.9509897967971762e-05, "loss": 0.6845, "step": 4958 }, { "epoch": 0.13, "grad_norm": 1.4379037618637085, "learning_rate": 1.950964123655107e-05, "loss": 0.5979, "step": 4959 }, { "epoch": 0.13, "grad_norm": 1.9974433183670044, "learning_rate": 1.9509384439595906e-05, "loss": 0.7021, "step": 4960 }, { "epoch": 0.13, "grad_norm": 3.7345592975616455, "learning_rate": 1.950912757710804e-05, "loss": 0.7099, "step": 4961 }, { "epoch": 0.13, "grad_norm": 2.5510354042053223, "learning_rate": 1.9508870649089235e-05, "loss": 0.7129, "step": 4962 }, { "epoch": 0.13, "grad_norm": 2.3999032974243164, "learning_rate": 1.9508613655541272e-05, "loss": 0.6836, "step": 4963 }, { "epoch": 0.13, "grad_norm": 3.2491321563720703, "learning_rate": 1.9508356596465915e-05, "loss": 0.6584, "step": 4964 }, { "epoch": 0.13, "grad_norm": 3.149028778076172, "learning_rate": 1.950809947186494e-05, "loss": 0.964, "step": 4965 }, { "epoch": 0.13, "grad_norm": 2.036486864089966, "learning_rate": 1.9507842281740117e-05, "loss": 0.8076, "step": 4966 }, { "epoch": 0.13, "grad_norm": 3.435595989227295, "learning_rate": 1.9507585026093218e-05, "loss": 0.8133, "step": 4967 }, { "epoch": 0.13, "grad_norm": 1.600956678390503, "learning_rate": 1.9507327704926015e-05, "loss": 0.6351, "step": 4968 }, { "epoch": 0.13, "grad_norm": 3.495864152908325, "learning_rate": 1.9507070318240283e-05, "loss": 0.5847, "step": 4969 }, { "epoch": 0.13, "grad_norm": 3.9481513500213623, "learning_rate": 1.95068128660378e-05, "loss": 0.7898, "step": 4970 }, { "epoch": 0.13, "grad_norm": 2.414294719696045, "learning_rate": 1.950655534832033e-05, "loss": 0.7851, "step": 4971 }, { "epoch": 0.13, "grad_norm": 2.74180269241333, "learning_rate": 1.9506297765089654e-05, "loss": 0.5616, "step": 4972 }, { "epoch": 0.13, "grad_norm": 2.346163034439087, "learning_rate": 1.9506040116347546e-05, "loss": 0.6129, "step": 4973 }, { "epoch": 0.13, "grad_norm": 3.005779266357422, "learning_rate": 1.9505782402095782e-05, "loss": 0.8115, "step": 4974 }, { "epoch": 0.13, "grad_norm": 2.064502477645874, "learning_rate": 1.9505524622336137e-05, "loss": 0.5179, "step": 4975 }, { "epoch": 0.13, "grad_norm": 3.705564260482788, "learning_rate": 1.9505266777070392e-05, "loss": 0.8628, "step": 4976 }, { "epoch": 0.13, "grad_norm": 3.100627899169922, "learning_rate": 1.9505008866300317e-05, "loss": 0.6256, "step": 4977 }, { "epoch": 0.13, "grad_norm": 2.0449469089508057, "learning_rate": 1.9504750890027692e-05, "loss": 0.664, "step": 4978 }, { "epoch": 0.13, "grad_norm": 1.9472635984420776, "learning_rate": 1.95044928482543e-05, "loss": 0.6286, "step": 4979 }, { "epoch": 0.13, "grad_norm": 2.7373509407043457, "learning_rate": 1.950423474098191e-05, "loss": 0.6976, "step": 4980 }, { "epoch": 0.13, "grad_norm": 2.4920713901519775, "learning_rate": 1.9503976568212306e-05, "loss": 0.7182, "step": 4981 }, { "epoch": 0.13, "grad_norm": 2.4042110443115234, "learning_rate": 1.950371832994727e-05, "loss": 0.5791, "step": 4982 }, { "epoch": 0.13, "grad_norm": 2.402083396911621, "learning_rate": 1.9503460026188573e-05, "loss": 0.7097, "step": 4983 }, { "epoch": 0.13, "grad_norm": 3.2365288734436035, "learning_rate": 1.9503201656938e-05, "loss": 0.6968, "step": 4984 }, { "epoch": 0.13, "grad_norm": 2.5147933959960938, "learning_rate": 1.9502943222197335e-05, "loss": 0.7464, "step": 4985 }, { "epoch": 0.13, "grad_norm": 3.867194175720215, "learning_rate": 1.9502684721968355e-05, "loss": 0.5016, "step": 4986 }, { "epoch": 0.13, "grad_norm": 2.5619256496429443, "learning_rate": 1.950242615625284e-05, "loss": 0.5347, "step": 4987 }, { "epoch": 0.13, "grad_norm": 3.1903557777404785, "learning_rate": 1.950216752505258e-05, "loss": 0.8014, "step": 4988 }, { "epoch": 0.13, "grad_norm": 3.6531548500061035, "learning_rate": 1.9501908828369344e-05, "loss": 0.5582, "step": 4989 }, { "epoch": 0.13, "grad_norm": 2.548973560333252, "learning_rate": 1.950165006620493e-05, "loss": 0.6967, "step": 4990 }, { "epoch": 0.13, "grad_norm": 1.6868075132369995, "learning_rate": 1.9501391238561105e-05, "loss": 0.637, "step": 4991 }, { "epoch": 0.13, "grad_norm": 2.1863341331481934, "learning_rate": 1.9501132345439663e-05, "loss": 0.6752, "step": 4992 }, { "epoch": 0.13, "grad_norm": 1.4955989122390747, "learning_rate": 1.950087338684239e-05, "loss": 0.6628, "step": 4993 }, { "epoch": 0.13, "grad_norm": 2.087116003036499, "learning_rate": 1.950061436277106e-05, "loss": 0.5964, "step": 4994 }, { "epoch": 0.13, "grad_norm": 2.648648262023926, "learning_rate": 1.9500355273227473e-05, "loss": 0.578, "step": 4995 }, { "epoch": 0.13, "grad_norm": 3.1205079555511475, "learning_rate": 1.95000961182134e-05, "loss": 0.6345, "step": 4996 }, { "epoch": 0.13, "grad_norm": 1.5206047296524048, "learning_rate": 1.949983689773063e-05, "loss": 0.638, "step": 4997 }, { "epoch": 0.13, "grad_norm": 2.415454387664795, "learning_rate": 1.9499577611780963e-05, "loss": 0.7692, "step": 4998 }, { "epoch": 0.13, "grad_norm": 2.8959052562713623, "learning_rate": 1.949931826036617e-05, "loss": 0.6524, "step": 4999 }, { "epoch": 0.13, "grad_norm": 3.428938865661621, "learning_rate": 1.9499058843488043e-05, "loss": 0.8915, "step": 5000 }, { "epoch": 0.13, "grad_norm": 3.3543853759765625, "learning_rate": 1.949879936114837e-05, "loss": 0.5701, "step": 5001 }, { "epoch": 0.13, "grad_norm": 3.0819091796875, "learning_rate": 1.949853981334894e-05, "loss": 0.7805, "step": 5002 }, { "epoch": 0.13, "grad_norm": 1.321841835975647, "learning_rate": 1.9498280200091542e-05, "loss": 0.6361, "step": 5003 }, { "epoch": 0.13, "grad_norm": 2.8669190406799316, "learning_rate": 1.949802052137796e-05, "loss": 0.8122, "step": 5004 }, { "epoch": 0.13, "grad_norm": 4.294612407684326, "learning_rate": 1.9497760777209994e-05, "loss": 0.6213, "step": 5005 }, { "epoch": 0.13, "grad_norm": 3.413925886154175, "learning_rate": 1.9497500967589423e-05, "loss": 0.6284, "step": 5006 }, { "epoch": 0.13, "grad_norm": 3.3711235523223877, "learning_rate": 1.9497241092518047e-05, "loss": 0.6817, "step": 5007 }, { "epoch": 0.13, "grad_norm": 1.6612508296966553, "learning_rate": 1.9496981151997648e-05, "loss": 0.7405, "step": 5008 }, { "epoch": 0.13, "grad_norm": 3.7325170040130615, "learning_rate": 1.9496721146030025e-05, "loss": 0.6666, "step": 5009 }, { "epoch": 0.13, "grad_norm": 2.245288848876953, "learning_rate": 1.9496461074616964e-05, "loss": 0.7785, "step": 5010 }, { "epoch": 0.13, "grad_norm": 1.338454246520996, "learning_rate": 1.949620093776026e-05, "loss": 0.4949, "step": 5011 }, { "epoch": 0.13, "grad_norm": 2.795217514038086, "learning_rate": 1.9495940735461703e-05, "loss": 0.5425, "step": 5012 }, { "epoch": 0.13, "grad_norm": 2.7536206245422363, "learning_rate": 1.949568046772309e-05, "loss": 0.6093, "step": 5013 }, { "epoch": 0.13, "grad_norm": 1.595487356185913, "learning_rate": 1.9495420134546218e-05, "loss": 0.8135, "step": 5014 }, { "epoch": 0.13, "grad_norm": 9.014505386352539, "learning_rate": 1.949515973593287e-05, "loss": 0.8577, "step": 5015 }, { "epoch": 0.13, "grad_norm": 1.2853413820266724, "learning_rate": 1.9494899271884846e-05, "loss": 0.5869, "step": 5016 }, { "epoch": 0.13, "grad_norm": 2.296638250350952, "learning_rate": 1.9494638742403943e-05, "loss": 0.6756, "step": 5017 }, { "epoch": 0.13, "grad_norm": 3.364254951477051, "learning_rate": 1.9494378147491957e-05, "loss": 0.5979, "step": 5018 }, { "epoch": 0.13, "grad_norm": 1.866726279258728, "learning_rate": 1.9494117487150678e-05, "loss": 0.6226, "step": 5019 }, { "epoch": 0.13, "grad_norm": 2.030132532119751, "learning_rate": 1.949385676138191e-05, "loss": 0.7687, "step": 5020 }, { "epoch": 0.13, "grad_norm": 2.1421453952789307, "learning_rate": 1.9493595970187444e-05, "loss": 0.4534, "step": 5021 }, { "epoch": 0.13, "grad_norm": 2.3793203830718994, "learning_rate": 1.9493335113569077e-05, "loss": 0.6218, "step": 5022 }, { "epoch": 0.13, "grad_norm": 3.578324556350708, "learning_rate": 1.9493074191528614e-05, "loss": 0.6834, "step": 5023 }, { "epoch": 0.13, "grad_norm": 1.990785837173462, "learning_rate": 1.9492813204067845e-05, "loss": 0.6663, "step": 5024 }, { "epoch": 0.13, "grad_norm": 2.158489942550659, "learning_rate": 1.949255215118857e-05, "loss": 0.6737, "step": 5025 }, { "epoch": 0.13, "grad_norm": 1.556146502494812, "learning_rate": 1.9492291032892588e-05, "loss": 0.5952, "step": 5026 }, { "epoch": 0.13, "grad_norm": 2.290347099304199, "learning_rate": 1.9492029849181706e-05, "loss": 0.6829, "step": 5027 }, { "epoch": 0.13, "grad_norm": 4.288005828857422, "learning_rate": 1.949176860005771e-05, "loss": 0.8599, "step": 5028 }, { "epoch": 0.13, "grad_norm": 1.8223659992218018, "learning_rate": 1.9491507285522417e-05, "loss": 0.6447, "step": 5029 }, { "epoch": 0.13, "grad_norm": 3.6291608810424805, "learning_rate": 1.9491245905577615e-05, "loss": 0.6438, "step": 5030 }, { "epoch": 0.13, "grad_norm": 1.6555227041244507, "learning_rate": 1.9490984460225103e-05, "loss": 0.6162, "step": 5031 }, { "epoch": 0.13, "grad_norm": 3.954423427581787, "learning_rate": 1.9490722949466695e-05, "loss": 0.6238, "step": 5032 }, { "epoch": 0.13, "grad_norm": 2.9936418533325195, "learning_rate": 1.9490461373304187e-05, "loss": 0.5589, "step": 5033 }, { "epoch": 0.13, "grad_norm": 3.4867169857025146, "learning_rate": 1.9490199731739382e-05, "loss": 0.811, "step": 5034 }, { "epoch": 0.13, "grad_norm": 1.5424212217330933, "learning_rate": 1.9489938024774082e-05, "loss": 0.6165, "step": 5035 }, { "epoch": 0.13, "grad_norm": 3.283320665359497, "learning_rate": 1.948967625241009e-05, "loss": 0.7727, "step": 5036 }, { "epoch": 0.13, "grad_norm": 4.495748043060303, "learning_rate": 1.9489414414649213e-05, "loss": 0.6942, "step": 5037 }, { "epoch": 0.13, "grad_norm": 3.3849198818206787, "learning_rate": 1.9489152511493254e-05, "loss": 0.7276, "step": 5038 }, { "epoch": 0.13, "grad_norm": 1.667833924293518, "learning_rate": 1.9488890542944014e-05, "loss": 0.7382, "step": 5039 }, { "epoch": 0.13, "grad_norm": 6.159302234649658, "learning_rate": 1.9488628509003306e-05, "loss": 0.6874, "step": 5040 }, { "epoch": 0.13, "grad_norm": 3.0125863552093506, "learning_rate": 1.948836640967293e-05, "loss": 0.7645, "step": 5041 }, { "epoch": 0.13, "grad_norm": 1.614789605140686, "learning_rate": 1.9488104244954696e-05, "loss": 0.7118, "step": 5042 }, { "epoch": 0.13, "grad_norm": 2.400193214416504, "learning_rate": 1.9487842014850407e-05, "loss": 0.7689, "step": 5043 }, { "epoch": 0.13, "grad_norm": 7.304494380950928, "learning_rate": 1.948757971936187e-05, "loss": 0.7978, "step": 5044 }, { "epoch": 0.13, "grad_norm": 3.9279770851135254, "learning_rate": 1.9487317358490895e-05, "loss": 0.6734, "step": 5045 }, { "epoch": 0.13, "grad_norm": 2.598076343536377, "learning_rate": 1.9487054932239287e-05, "loss": 0.6626, "step": 5046 }, { "epoch": 0.13, "grad_norm": 1.8992096185684204, "learning_rate": 1.9486792440608863e-05, "loss": 0.557, "step": 5047 }, { "epoch": 0.13, "grad_norm": 3.3763644695281982, "learning_rate": 1.9486529883601423e-05, "loss": 0.7407, "step": 5048 }, { "epoch": 0.13, "grad_norm": 3.319722890853882, "learning_rate": 1.9486267261218777e-05, "loss": 0.5641, "step": 5049 }, { "epoch": 0.13, "grad_norm": 2.786306381225586, "learning_rate": 1.948600457346274e-05, "loss": 0.7216, "step": 5050 }, { "epoch": 0.13, "grad_norm": 2.7526376247406006, "learning_rate": 1.9485741820335117e-05, "loss": 0.8201, "step": 5051 }, { "epoch": 0.13, "grad_norm": 1.8692078590393066, "learning_rate": 1.948547900183772e-05, "loss": 0.6084, "step": 5052 }, { "epoch": 0.13, "grad_norm": 5.892155170440674, "learning_rate": 1.9485216117972362e-05, "loss": 0.7581, "step": 5053 }, { "epoch": 0.13, "grad_norm": 1.7361352443695068, "learning_rate": 1.948495316874085e-05, "loss": 0.7419, "step": 5054 }, { "epoch": 0.13, "grad_norm": 1.4298714399337769, "learning_rate": 1.9484690154145007e-05, "loss": 0.7855, "step": 5055 }, { "epoch": 0.13, "grad_norm": 2.340099811553955, "learning_rate": 1.9484427074186635e-05, "loss": 0.7645, "step": 5056 }, { "epoch": 0.13, "grad_norm": 2.575169324874878, "learning_rate": 1.9484163928867552e-05, "loss": 0.7421, "step": 5057 }, { "epoch": 0.13, "grad_norm": 1.919022798538208, "learning_rate": 1.9483900718189568e-05, "loss": 0.7039, "step": 5058 }, { "epoch": 0.13, "grad_norm": 2.610550880432129, "learning_rate": 1.94836374421545e-05, "loss": 0.6705, "step": 5059 }, { "epoch": 0.13, "grad_norm": 2.183680295944214, "learning_rate": 1.948337410076416e-05, "loss": 0.7557, "step": 5060 }, { "epoch": 0.13, "grad_norm": 1.605452299118042, "learning_rate": 1.9483110694020363e-05, "loss": 0.5577, "step": 5061 }, { "epoch": 0.13, "grad_norm": 2.6870806217193604, "learning_rate": 1.9482847221924924e-05, "loss": 0.774, "step": 5062 }, { "epoch": 0.13, "grad_norm": 2.7742621898651123, "learning_rate": 1.948258368447966e-05, "loss": 0.7382, "step": 5063 }, { "epoch": 0.13, "grad_norm": 2.2727816104888916, "learning_rate": 1.9482320081686386e-05, "loss": 0.6179, "step": 5064 }, { "epoch": 0.13, "grad_norm": 1.5525424480438232, "learning_rate": 1.9482056413546924e-05, "loss": 0.6401, "step": 5065 }, { "epoch": 0.13, "grad_norm": 2.2021186351776123, "learning_rate": 1.9481792680063084e-05, "loss": 0.617, "step": 5066 }, { "epoch": 0.13, "grad_norm": 2.770754814147949, "learning_rate": 1.9481528881236683e-05, "loss": 0.6739, "step": 5067 }, { "epoch": 0.13, "grad_norm": 2.4587206840515137, "learning_rate": 1.9481265017069547e-05, "loss": 0.7253, "step": 5068 }, { "epoch": 0.13, "grad_norm": 3.6760499477386475, "learning_rate": 1.9481001087563486e-05, "loss": 0.5999, "step": 5069 }, { "epoch": 0.13, "grad_norm": 2.5890936851501465, "learning_rate": 1.9480737092720322e-05, "loss": 0.6605, "step": 5070 }, { "epoch": 0.13, "grad_norm": 2.535322904586792, "learning_rate": 1.9480473032541875e-05, "loss": 0.6339, "step": 5071 }, { "epoch": 0.13, "grad_norm": 1.399897813796997, "learning_rate": 1.9480208907029963e-05, "loss": 0.6615, "step": 5072 }, { "epoch": 0.13, "grad_norm": 1.965139627456665, "learning_rate": 1.947994471618641e-05, "loss": 0.7464, "step": 5073 }, { "epoch": 0.13, "grad_norm": 2.85653018951416, "learning_rate": 1.9479680460013034e-05, "loss": 0.7309, "step": 5074 }, { "epoch": 0.13, "grad_norm": 3.1929595470428467, "learning_rate": 1.9479416138511655e-05, "loss": 0.7581, "step": 5075 }, { "epoch": 0.13, "grad_norm": 3.3659708499908447, "learning_rate": 1.9479151751684092e-05, "loss": 0.5985, "step": 5076 }, { "epoch": 0.13, "grad_norm": 1.8183118104934692, "learning_rate": 1.9478887299532174e-05, "loss": 0.5729, "step": 5077 }, { "epoch": 0.13, "grad_norm": 2.019458532333374, "learning_rate": 1.947862278205772e-05, "loss": 0.5802, "step": 5078 }, { "epoch": 0.13, "grad_norm": 3.797212839126587, "learning_rate": 1.9478358199262554e-05, "loss": 0.7392, "step": 5079 }, { "epoch": 0.13, "grad_norm": 1.7188462018966675, "learning_rate": 1.9478093551148496e-05, "loss": 0.7177, "step": 5080 }, { "epoch": 0.13, "grad_norm": 2.5938801765441895, "learning_rate": 1.947782883771737e-05, "loss": 0.575, "step": 5081 }, { "epoch": 0.13, "grad_norm": 7.322521209716797, "learning_rate": 1.9477564058971006e-05, "loss": 0.7009, "step": 5082 }, { "epoch": 0.13, "grad_norm": 2.2801098823547363, "learning_rate": 1.9477299214911223e-05, "loss": 0.6828, "step": 5083 }, { "epoch": 0.13, "grad_norm": 2.302020311355591, "learning_rate": 1.9477034305539847e-05, "loss": 0.6512, "step": 5084 }, { "epoch": 0.13, "grad_norm": 2.627272129058838, "learning_rate": 1.9476769330858708e-05, "loss": 0.7321, "step": 5085 }, { "epoch": 0.13, "grad_norm": 2.739288330078125, "learning_rate": 1.9476504290869623e-05, "loss": 0.5038, "step": 5086 }, { "epoch": 0.13, "grad_norm": 4.589797019958496, "learning_rate": 1.947623918557443e-05, "loss": 0.7992, "step": 5087 }, { "epoch": 0.13, "grad_norm": 2.0735201835632324, "learning_rate": 1.947597401497495e-05, "loss": 0.6786, "step": 5088 }, { "epoch": 0.13, "grad_norm": 2.317246198654175, "learning_rate": 1.9475708779073006e-05, "loss": 0.7457, "step": 5089 }, { "epoch": 0.13, "grad_norm": 1.9546416997909546, "learning_rate": 1.9475443477870436e-05, "loss": 0.5493, "step": 5090 }, { "epoch": 0.13, "grad_norm": 2.3718366622924805, "learning_rate": 1.9475178111369057e-05, "loss": 0.7272, "step": 5091 }, { "epoch": 0.13, "grad_norm": 3.137364387512207, "learning_rate": 1.9474912679570707e-05, "loss": 0.7782, "step": 5092 }, { "epoch": 0.13, "grad_norm": 2.4993228912353516, "learning_rate": 1.947464718247721e-05, "loss": 0.8118, "step": 5093 }, { "epoch": 0.13, "grad_norm": 4.35783052444458, "learning_rate": 1.9474381620090397e-05, "loss": 0.6319, "step": 5094 }, { "epoch": 0.13, "grad_norm": 3.774235725402832, "learning_rate": 1.94741159924121e-05, "loss": 0.8212, "step": 5095 }, { "epoch": 0.13, "grad_norm": 1.7378952503204346, "learning_rate": 1.9473850299444145e-05, "loss": 0.7393, "step": 5096 }, { "epoch": 0.13, "grad_norm": 2.36686372756958, "learning_rate": 1.9473584541188367e-05, "loss": 0.6237, "step": 5097 }, { "epoch": 0.13, "grad_norm": 1.8435776233673096, "learning_rate": 1.9473318717646596e-05, "loss": 0.7294, "step": 5098 }, { "epoch": 0.13, "grad_norm": 3.694117307662964, "learning_rate": 1.9473052828820665e-05, "loss": 0.6409, "step": 5099 }, { "epoch": 0.13, "grad_norm": 7.497751235961914, "learning_rate": 1.9472786874712404e-05, "loss": 0.8064, "step": 5100 }, { "epoch": 0.13, "grad_norm": 4.349837303161621, "learning_rate": 1.947252085532365e-05, "loss": 0.7046, "step": 5101 }, { "epoch": 0.13, "grad_norm": 1.7658896446228027, "learning_rate": 1.947225477065623e-05, "loss": 0.5192, "step": 5102 }, { "epoch": 0.13, "grad_norm": 1.9501606225967407, "learning_rate": 1.9471988620711984e-05, "loss": 0.6017, "step": 5103 }, { "epoch": 0.13, "grad_norm": 9.091826438903809, "learning_rate": 1.947172240549274e-05, "loss": 0.6133, "step": 5104 }, { "epoch": 0.13, "grad_norm": 1.6574628353118896, "learning_rate": 1.947145612500034e-05, "loss": 0.6476, "step": 5105 }, { "epoch": 0.13, "grad_norm": 1.6859312057495117, "learning_rate": 1.9471189779236614e-05, "loss": 0.5975, "step": 5106 }, { "epoch": 0.13, "grad_norm": 1.5823434591293335, "learning_rate": 1.94709233682034e-05, "loss": 0.5666, "step": 5107 }, { "epoch": 0.13, "grad_norm": 2.022803783416748, "learning_rate": 1.9470656891902525e-05, "loss": 0.6401, "step": 5108 }, { "epoch": 0.13, "grad_norm": 3.0417723655700684, "learning_rate": 1.947039035033584e-05, "loss": 0.5776, "step": 5109 }, { "epoch": 0.13, "grad_norm": 2.9806900024414062, "learning_rate": 1.947012374350517e-05, "loss": 0.8314, "step": 5110 }, { "epoch": 0.13, "grad_norm": 4.331430912017822, "learning_rate": 1.9469857071412362e-05, "loss": 0.5588, "step": 5111 }, { "epoch": 0.13, "grad_norm": 1.8697888851165771, "learning_rate": 1.9469590334059244e-05, "loss": 0.6852, "step": 5112 }, { "epoch": 0.13, "grad_norm": 3.012700319290161, "learning_rate": 1.9469323531447658e-05, "loss": 0.7935, "step": 5113 }, { "epoch": 0.13, "grad_norm": 2.1307578086853027, "learning_rate": 1.9469056663579447e-05, "loss": 0.6388, "step": 5114 }, { "epoch": 0.13, "grad_norm": 2.6721765995025635, "learning_rate": 1.9468789730456445e-05, "loss": 0.6296, "step": 5115 }, { "epoch": 0.13, "grad_norm": 3.490863084793091, "learning_rate": 1.9468522732080495e-05, "loss": 0.4892, "step": 5116 }, { "epoch": 0.13, "grad_norm": 3.169567108154297, "learning_rate": 1.9468255668453433e-05, "loss": 0.6648, "step": 5117 }, { "epoch": 0.13, "grad_norm": 2.092607021331787, "learning_rate": 1.9467988539577102e-05, "loss": 0.486, "step": 5118 }, { "epoch": 0.13, "grad_norm": 1.5971064567565918, "learning_rate": 1.9467721345453342e-05, "loss": 0.5823, "step": 5119 }, { "epoch": 0.13, "grad_norm": 5.827827453613281, "learning_rate": 1.9467454086083996e-05, "loss": 0.6499, "step": 5120 }, { "epoch": 0.13, "grad_norm": 2.3616082668304443, "learning_rate": 1.9467186761470904e-05, "loss": 0.7217, "step": 5121 }, { "epoch": 0.13, "grad_norm": 8.3374662399292, "learning_rate": 1.946691937161591e-05, "loss": 0.7182, "step": 5122 }, { "epoch": 0.13, "grad_norm": 2.2871510982513428, "learning_rate": 1.9466651916520854e-05, "loss": 0.6268, "step": 5123 }, { "epoch": 0.13, "grad_norm": 1.7132136821746826, "learning_rate": 1.946638439618758e-05, "loss": 0.5403, "step": 5124 }, { "epoch": 0.13, "grad_norm": 2.35068941116333, "learning_rate": 1.946611681061793e-05, "loss": 0.6124, "step": 5125 }, { "epoch": 0.13, "grad_norm": 4.044684886932373, "learning_rate": 1.9465849159813755e-05, "loss": 0.7884, "step": 5126 }, { "epoch": 0.13, "grad_norm": 5.0199713706970215, "learning_rate": 1.9465581443776892e-05, "loss": 0.7333, "step": 5127 }, { "epoch": 0.13, "grad_norm": 4.564304351806641, "learning_rate": 1.9465313662509185e-05, "loss": 0.5953, "step": 5128 }, { "epoch": 0.13, "grad_norm": 4.335474014282227, "learning_rate": 1.9465045816012487e-05, "loss": 0.7108, "step": 5129 }, { "epoch": 0.13, "grad_norm": 3.0386674404144287, "learning_rate": 1.946477790428864e-05, "loss": 0.6655, "step": 5130 }, { "epoch": 0.13, "grad_norm": 1.659191370010376, "learning_rate": 1.946450992733949e-05, "loss": 0.658, "step": 5131 }, { "epoch": 0.13, "grad_norm": 3.722883462905884, "learning_rate": 1.946424188516688e-05, "loss": 0.8403, "step": 5132 }, { "epoch": 0.13, "grad_norm": 2.8947463035583496, "learning_rate": 1.9463973777772667e-05, "loss": 0.6155, "step": 5133 }, { "epoch": 0.13, "grad_norm": 1.2511941194534302, "learning_rate": 1.946370560515869e-05, "loss": 0.6488, "step": 5134 }, { "epoch": 0.13, "grad_norm": 5.031335353851318, "learning_rate": 1.94634373673268e-05, "loss": 0.6701, "step": 5135 }, { "epoch": 0.13, "grad_norm": 2.753093719482422, "learning_rate": 1.9463169064278846e-05, "loss": 0.6856, "step": 5136 }, { "epoch": 0.13, "grad_norm": 6.35104513168335, "learning_rate": 1.9462900696016673e-05, "loss": 0.7304, "step": 5137 }, { "epoch": 0.13, "grad_norm": 9.811905860900879, "learning_rate": 1.9462632262542134e-05, "loss": 0.8402, "step": 5138 }, { "epoch": 0.13, "grad_norm": 2.83373761177063, "learning_rate": 1.946236376385708e-05, "loss": 0.7518, "step": 5139 }, { "epoch": 0.13, "grad_norm": 10.401748657226562, "learning_rate": 1.946209519996336e-05, "loss": 0.9361, "step": 5140 }, { "epoch": 0.13, "grad_norm": 1.6132347583770752, "learning_rate": 1.9461826570862824e-05, "loss": 0.7764, "step": 5141 }, { "epoch": 0.13, "grad_norm": 1.4020367860794067, "learning_rate": 1.9461557876557323e-05, "loss": 0.6843, "step": 5142 }, { "epoch": 0.13, "grad_norm": 2.34993577003479, "learning_rate": 1.946128911704871e-05, "loss": 0.5021, "step": 5143 }, { "epoch": 0.13, "grad_norm": 3.468144178390503, "learning_rate": 1.9461020292338836e-05, "loss": 0.7119, "step": 5144 }, { "epoch": 0.13, "grad_norm": 5.905263900756836, "learning_rate": 1.9460751402429556e-05, "loss": 0.6855, "step": 5145 }, { "epoch": 0.13, "grad_norm": 3.0952060222625732, "learning_rate": 1.946048244732272e-05, "loss": 0.5693, "step": 5146 }, { "epoch": 0.13, "grad_norm": 2.011265277862549, "learning_rate": 1.9460213427020184e-05, "loss": 0.6632, "step": 5147 }, { "epoch": 0.13, "grad_norm": 4.782446384429932, "learning_rate": 1.9459944341523796e-05, "loss": 0.6529, "step": 5148 }, { "epoch": 0.13, "grad_norm": 2.34716796875, "learning_rate": 1.945967519083542e-05, "loss": 0.6703, "step": 5149 }, { "epoch": 0.13, "grad_norm": 10.570140838623047, "learning_rate": 1.9459405974956904e-05, "loss": 0.8995, "step": 5150 }, { "epoch": 0.13, "grad_norm": 1.5501466989517212, "learning_rate": 1.9459136693890105e-05, "loss": 0.6089, "step": 5151 }, { "epoch": 0.13, "grad_norm": 2.161881685256958, "learning_rate": 1.9458867347636878e-05, "loss": 0.541, "step": 5152 }, { "epoch": 0.13, "grad_norm": 3.3565568923950195, "learning_rate": 1.945859793619908e-05, "loss": 0.8069, "step": 5153 }, { "epoch": 0.13, "grad_norm": 5.938107967376709, "learning_rate": 1.945832845957857e-05, "loss": 0.6661, "step": 5154 }, { "epoch": 0.13, "grad_norm": 6.292395114898682, "learning_rate": 1.9458058917777198e-05, "loss": 0.6178, "step": 5155 }, { "epoch": 0.13, "grad_norm": 2.225590467453003, "learning_rate": 1.945778931079683e-05, "loss": 0.5114, "step": 5156 }, { "epoch": 0.13, "grad_norm": 1.7666512727737427, "learning_rate": 1.945751963863932e-05, "loss": 0.7534, "step": 5157 }, { "epoch": 0.13, "grad_norm": 4.236350059509277, "learning_rate": 1.9457249901306524e-05, "loss": 0.8086, "step": 5158 }, { "epoch": 0.13, "grad_norm": 2.188844680786133, "learning_rate": 1.9456980098800304e-05, "loss": 0.5105, "step": 5159 }, { "epoch": 0.13, "grad_norm": 1.8740092515945435, "learning_rate": 1.9456710231122518e-05, "loss": 0.7058, "step": 5160 }, { "epoch": 0.13, "grad_norm": 1.377822756767273, "learning_rate": 1.945644029827503e-05, "loss": 0.651, "step": 5161 }, { "epoch": 0.13, "grad_norm": 3.2987828254699707, "learning_rate": 1.9456170300259695e-05, "loss": 0.6647, "step": 5162 }, { "epoch": 0.13, "grad_norm": 2.3347251415252686, "learning_rate": 1.945590023707837e-05, "loss": 0.6973, "step": 5163 }, { "epoch": 0.13, "grad_norm": 6.156787872314453, "learning_rate": 1.9455630108732924e-05, "loss": 0.6178, "step": 5164 }, { "epoch": 0.13, "grad_norm": 2.270329236984253, "learning_rate": 1.945535991522522e-05, "loss": 0.6706, "step": 5165 }, { "epoch": 0.13, "grad_norm": 1.8961167335510254, "learning_rate": 1.9455089656557114e-05, "loss": 0.6606, "step": 5166 }, { "epoch": 0.13, "grad_norm": 4.176749229431152, "learning_rate": 1.945481933273047e-05, "loss": 0.8194, "step": 5167 }, { "epoch": 0.13, "grad_norm": 2.431116819381714, "learning_rate": 1.945454894374715e-05, "loss": 0.6755, "step": 5168 }, { "epoch": 0.13, "grad_norm": 2.2716495990753174, "learning_rate": 1.945427848960902e-05, "loss": 0.7232, "step": 5169 }, { "epoch": 0.13, "grad_norm": 2.3581089973449707, "learning_rate": 1.9454007970317942e-05, "loss": 0.8446, "step": 5170 }, { "epoch": 0.13, "grad_norm": 1.7768980264663696, "learning_rate": 1.945373738587578e-05, "loss": 0.6375, "step": 5171 }, { "epoch": 0.13, "grad_norm": 2.193082809448242, "learning_rate": 1.9453466736284402e-05, "loss": 0.6537, "step": 5172 }, { "epoch": 0.13, "grad_norm": 2.17549991607666, "learning_rate": 1.945319602154567e-05, "loss": 0.7185, "step": 5173 }, { "epoch": 0.13, "grad_norm": 4.156865119934082, "learning_rate": 1.9452925241661447e-05, "loss": 0.6465, "step": 5174 }, { "epoch": 0.13, "grad_norm": 3.7853376865386963, "learning_rate": 1.94526543966336e-05, "loss": 0.7032, "step": 5175 }, { "epoch": 0.13, "grad_norm": 5.8681182861328125, "learning_rate": 1.9452383486464005e-05, "loss": 0.7426, "step": 5176 }, { "epoch": 0.13, "grad_norm": 2.3824894428253174, "learning_rate": 1.9452112511154516e-05, "loss": 0.6339, "step": 5177 }, { "epoch": 0.13, "grad_norm": 5.884268283843994, "learning_rate": 1.9451841470707008e-05, "loss": 0.722, "step": 5178 }, { "epoch": 0.13, "grad_norm": 1.7716975212097168, "learning_rate": 1.9451570365123347e-05, "loss": 0.6844, "step": 5179 }, { "epoch": 0.13, "grad_norm": 4.234655857086182, "learning_rate": 1.9451299194405403e-05, "loss": 0.6197, "step": 5180 }, { "epoch": 0.13, "grad_norm": 4.9041337966918945, "learning_rate": 1.945102795855504e-05, "loss": 0.6713, "step": 5181 }, { "epoch": 0.13, "grad_norm": 2.215514659881592, "learning_rate": 1.945075665757413e-05, "loss": 0.5358, "step": 5182 }, { "epoch": 0.13, "grad_norm": 2.881380796432495, "learning_rate": 1.9450485291464544e-05, "loss": 0.7044, "step": 5183 }, { "epoch": 0.13, "grad_norm": 3.3452517986297607, "learning_rate": 1.945021386022815e-05, "loss": 0.5342, "step": 5184 }, { "epoch": 0.13, "grad_norm": 2.104635715484619, "learning_rate": 1.9449942363866817e-05, "loss": 0.7305, "step": 5185 }, { "epoch": 0.13, "grad_norm": 2.7058091163635254, "learning_rate": 1.9449670802382424e-05, "loss": 0.7123, "step": 5186 }, { "epoch": 0.13, "grad_norm": 1.56976318359375, "learning_rate": 1.944939917577683e-05, "loss": 0.5558, "step": 5187 }, { "epoch": 0.13, "grad_norm": 1.9331384897232056, "learning_rate": 1.944912748405192e-05, "loss": 0.7629, "step": 5188 }, { "epoch": 0.13, "grad_norm": 2.519886016845703, "learning_rate": 1.9448855727209555e-05, "loss": 0.7083, "step": 5189 }, { "epoch": 0.13, "grad_norm": 3.727675437927246, "learning_rate": 1.9448583905251618e-05, "loss": 0.6794, "step": 5190 }, { "epoch": 0.13, "grad_norm": 4.211310386657715, "learning_rate": 1.944831201817997e-05, "loss": 0.7522, "step": 5191 }, { "epoch": 0.13, "grad_norm": 6.324059963226318, "learning_rate": 1.9448040065996498e-05, "loss": 0.9112, "step": 5192 }, { "epoch": 0.13, "grad_norm": 2.539048194885254, "learning_rate": 1.9447768048703066e-05, "loss": 0.5734, "step": 5193 }, { "epoch": 0.13, "grad_norm": 3.134500026702881, "learning_rate": 1.9447495966301552e-05, "loss": 0.6227, "step": 5194 }, { "epoch": 0.13, "grad_norm": 2.4630911350250244, "learning_rate": 1.9447223818793833e-05, "loss": 0.6711, "step": 5195 }, { "epoch": 0.13, "grad_norm": 2.612381935119629, "learning_rate": 1.9446951606181783e-05, "loss": 0.6047, "step": 5196 }, { "epoch": 0.13, "grad_norm": 3.3399205207824707, "learning_rate": 1.9446679328467275e-05, "loss": 0.5989, "step": 5197 }, { "epoch": 0.13, "grad_norm": 3.129286527633667, "learning_rate": 1.944640698565219e-05, "loss": 0.7358, "step": 5198 }, { "epoch": 0.13, "grad_norm": 3.5801618099212646, "learning_rate": 1.94461345777384e-05, "loss": 0.8286, "step": 5199 }, { "epoch": 0.13, "grad_norm": 1.8898497819900513, "learning_rate": 1.944586210472779e-05, "loss": 0.8544, "step": 5200 }, { "epoch": 0.13, "grad_norm": 2.968341827392578, "learning_rate": 1.944558956662223e-05, "loss": 0.6456, "step": 5201 }, { "epoch": 0.13, "grad_norm": 2.6496291160583496, "learning_rate": 1.9445316963423604e-05, "loss": 0.7646, "step": 5202 }, { "epoch": 0.13, "grad_norm": 2.6555447578430176, "learning_rate": 1.9445044295133785e-05, "loss": 0.6975, "step": 5203 }, { "epoch": 0.13, "grad_norm": 1.8636456727981567, "learning_rate": 1.9444771561754654e-05, "loss": 0.7563, "step": 5204 }, { "epoch": 0.13, "grad_norm": 2.960513114929199, "learning_rate": 1.9444498763288092e-05, "loss": 0.7784, "step": 5205 }, { "epoch": 0.13, "grad_norm": 3.9684441089630127, "learning_rate": 1.944422589973598e-05, "loss": 0.8493, "step": 5206 }, { "epoch": 0.13, "grad_norm": 2.319242000579834, "learning_rate": 1.944395297110019e-05, "loss": 0.5698, "step": 5207 }, { "epoch": 0.13, "grad_norm": 2.4278616905212402, "learning_rate": 1.9443679977382618e-05, "loss": 0.6734, "step": 5208 }, { "epoch": 0.13, "grad_norm": 2.742941379547119, "learning_rate": 1.944340691858513e-05, "loss": 0.82, "step": 5209 }, { "epoch": 0.13, "grad_norm": 4.217016696929932, "learning_rate": 1.9443133794709622e-05, "loss": 0.7698, "step": 5210 }, { "epoch": 0.13, "grad_norm": 1.2250672578811646, "learning_rate": 1.944286060575796e-05, "loss": 0.6289, "step": 5211 }, { "epoch": 0.13, "grad_norm": 1.689251184463501, "learning_rate": 1.9442587351732044e-05, "loss": 0.659, "step": 5212 }, { "epoch": 0.13, "grad_norm": 2.147319793701172, "learning_rate": 1.9442314032633744e-05, "loss": 0.6251, "step": 5213 }, { "epoch": 0.13, "grad_norm": 1.801468849182129, "learning_rate": 1.9442040648464945e-05, "loss": 0.6929, "step": 5214 }, { "epoch": 0.13, "grad_norm": 2.9662046432495117, "learning_rate": 1.9441767199227537e-05, "loss": 0.6186, "step": 5215 }, { "epoch": 0.13, "grad_norm": 3.0807793140411377, "learning_rate": 1.9441493684923402e-05, "loss": 0.6447, "step": 5216 }, { "epoch": 0.13, "grad_norm": 2.871105670928955, "learning_rate": 1.944122010555442e-05, "loss": 0.637, "step": 5217 }, { "epoch": 0.13, "grad_norm": 2.330385208129883, "learning_rate": 1.9440946461122485e-05, "loss": 0.7854, "step": 5218 }, { "epoch": 0.13, "grad_norm": 2.389803886413574, "learning_rate": 1.9440672751629477e-05, "loss": 0.7813, "step": 5219 }, { "epoch": 0.13, "grad_norm": 2.159067392349243, "learning_rate": 1.9440398977077284e-05, "loss": 0.6808, "step": 5220 }, { "epoch": 0.13, "grad_norm": 3.7227795124053955, "learning_rate": 1.9440125137467795e-05, "loss": 0.6291, "step": 5221 }, { "epoch": 0.13, "grad_norm": 3.529275417327881, "learning_rate": 1.9439851232802888e-05, "loss": 0.7037, "step": 5222 }, { "epoch": 0.13, "grad_norm": 1.418704628944397, "learning_rate": 1.9439577263084462e-05, "loss": 0.6822, "step": 5223 }, { "epoch": 0.13, "grad_norm": 3.601703643798828, "learning_rate": 1.9439303228314397e-05, "loss": 0.7498, "step": 5224 }, { "epoch": 0.13, "grad_norm": 2.5875298976898193, "learning_rate": 1.9439029128494587e-05, "loss": 0.5928, "step": 5225 }, { "epoch": 0.13, "grad_norm": 2.022526502609253, "learning_rate": 1.943875496362692e-05, "loss": 0.6797, "step": 5226 }, { "epoch": 0.13, "grad_norm": 1.4337798357009888, "learning_rate": 1.9438480733713277e-05, "loss": 0.6377, "step": 5227 }, { "epoch": 0.13, "grad_norm": 5.894556045532227, "learning_rate": 1.943820643875556e-05, "loss": 0.395, "step": 5228 }, { "epoch": 0.13, "grad_norm": 2.04453444480896, "learning_rate": 1.9437932078755653e-05, "loss": 0.6048, "step": 5229 }, { "epoch": 0.13, "grad_norm": 2.0063014030456543, "learning_rate": 1.943765765371545e-05, "loss": 0.6496, "step": 5230 }, { "epoch": 0.13, "grad_norm": 2.1326379776000977, "learning_rate": 1.9437383163636833e-05, "loss": 0.7135, "step": 5231 }, { "epoch": 0.13, "grad_norm": 2.309650421142578, "learning_rate": 1.9437108608521706e-05, "loss": 0.7242, "step": 5232 }, { "epoch": 0.13, "grad_norm": 2.692883253097534, "learning_rate": 1.943683398837195e-05, "loss": 0.836, "step": 5233 }, { "epoch": 0.13, "grad_norm": 3.843273878097534, "learning_rate": 1.943655930318947e-05, "loss": 0.7298, "step": 5234 }, { "epoch": 0.13, "grad_norm": 2.5551106929779053, "learning_rate": 1.9436284552976147e-05, "loss": 0.7014, "step": 5235 }, { "epoch": 0.13, "grad_norm": 5.6993021965026855, "learning_rate": 1.9436009737733882e-05, "loss": 0.6916, "step": 5236 }, { "epoch": 0.13, "grad_norm": 2.62614107131958, "learning_rate": 1.9435734857464565e-05, "loss": 0.5272, "step": 5237 }, { "epoch": 0.13, "grad_norm": 2.6060144901275635, "learning_rate": 1.9435459912170094e-05, "loss": 0.6716, "step": 5238 }, { "epoch": 0.13, "grad_norm": 2.0079052448272705, "learning_rate": 1.9435184901852357e-05, "loss": 0.586, "step": 5239 }, { "epoch": 0.13, "grad_norm": 3.423676013946533, "learning_rate": 1.9434909826513256e-05, "loss": 0.65, "step": 5240 }, { "epoch": 0.13, "grad_norm": 3.1126344203948975, "learning_rate": 1.9434634686154684e-05, "loss": 0.7716, "step": 5241 }, { "epoch": 0.13, "grad_norm": 3.1744067668914795, "learning_rate": 1.9434359480778536e-05, "loss": 0.7229, "step": 5242 }, { "epoch": 0.13, "grad_norm": 2.213087558746338, "learning_rate": 1.943408421038671e-05, "loss": 0.74, "step": 5243 }, { "epoch": 0.13, "grad_norm": 2.882875442504883, "learning_rate": 1.9433808874981103e-05, "loss": 0.6647, "step": 5244 }, { "epoch": 0.13, "grad_norm": 3.8335816860198975, "learning_rate": 1.9433533474563612e-05, "loss": 0.6645, "step": 5245 }, { "epoch": 0.13, "grad_norm": 5.565515995025635, "learning_rate": 1.9433258009136136e-05, "loss": 0.8522, "step": 5246 }, { "epoch": 0.13, "grad_norm": 3.2351889610290527, "learning_rate": 1.943298247870057e-05, "loss": 0.7998, "step": 5247 }, { "epoch": 0.13, "grad_norm": 1.663283348083496, "learning_rate": 1.943270688325882e-05, "loss": 0.667, "step": 5248 }, { "epoch": 0.13, "grad_norm": 1.7843236923217773, "learning_rate": 1.9432431222812774e-05, "loss": 0.7312, "step": 5249 }, { "epoch": 0.13, "grad_norm": 2.954223155975342, "learning_rate": 1.9432155497364343e-05, "loss": 0.5902, "step": 5250 }, { "epoch": 0.13, "grad_norm": 3.230309247970581, "learning_rate": 1.943187970691542e-05, "loss": 0.7514, "step": 5251 }, { "epoch": 0.13, "grad_norm": 2.9718241691589355, "learning_rate": 1.9431603851467907e-05, "loss": 0.558, "step": 5252 }, { "epoch": 0.13, "grad_norm": 2.5220460891723633, "learning_rate": 1.9431327931023706e-05, "loss": 0.722, "step": 5253 }, { "epoch": 0.13, "grad_norm": 4.024313926696777, "learning_rate": 1.943105194558472e-05, "loss": 0.7699, "step": 5254 }, { "epoch": 0.13, "grad_norm": 4.604762077331543, "learning_rate": 1.943077589515285e-05, "loss": 0.6193, "step": 5255 }, { "epoch": 0.13, "grad_norm": 1.7864850759506226, "learning_rate": 1.9430499779729993e-05, "loss": 0.5111, "step": 5256 }, { "epoch": 0.13, "grad_norm": 3.152909517288208, "learning_rate": 1.943022359931806e-05, "loss": 0.8215, "step": 5257 }, { "epoch": 0.13, "grad_norm": 2.957254648208618, "learning_rate": 1.9429947353918948e-05, "loss": 0.866, "step": 5258 }, { "epoch": 0.13, "grad_norm": 1.8862546682357788, "learning_rate": 1.9429671043534565e-05, "loss": 0.6824, "step": 5259 }, { "epoch": 0.13, "grad_norm": 2.06235408782959, "learning_rate": 1.9429394668166813e-05, "loss": 0.4838, "step": 5260 }, { "epoch": 0.13, "grad_norm": 2.706326961517334, "learning_rate": 1.9429118227817598e-05, "loss": 0.6682, "step": 5261 }, { "epoch": 0.13, "grad_norm": 1.593981385231018, "learning_rate": 1.9428841722488822e-05, "loss": 0.7366, "step": 5262 }, { "epoch": 0.13, "grad_norm": 3.382420301437378, "learning_rate": 1.9428565152182398e-05, "loss": 0.6294, "step": 5263 }, { "epoch": 0.13, "grad_norm": 3.451953649520874, "learning_rate": 1.9428288516900222e-05, "loss": 0.7969, "step": 5264 }, { "epoch": 0.13, "grad_norm": 2.4000203609466553, "learning_rate": 1.9428011816644205e-05, "loss": 0.7657, "step": 5265 }, { "epoch": 0.13, "grad_norm": 3.696305274963379, "learning_rate": 1.9427735051416255e-05, "loss": 0.7404, "step": 5266 }, { "epoch": 0.13, "grad_norm": 2.8527936935424805, "learning_rate": 1.9427458221218275e-05, "loss": 0.7143, "step": 5267 }, { "epoch": 0.14, "grad_norm": 2.58967924118042, "learning_rate": 1.942718132605218e-05, "loss": 0.6917, "step": 5268 }, { "epoch": 0.14, "grad_norm": 3.9964511394500732, "learning_rate": 1.9426904365919873e-05, "loss": 0.7291, "step": 5269 }, { "epoch": 0.14, "grad_norm": 2.1303157806396484, "learning_rate": 1.9426627340823264e-05, "loss": 0.7247, "step": 5270 }, { "epoch": 0.14, "grad_norm": 3.6660988330841064, "learning_rate": 1.942635025076426e-05, "loss": 0.6999, "step": 5271 }, { "epoch": 0.14, "grad_norm": 1.6238924264907837, "learning_rate": 1.9426073095744775e-05, "loss": 0.6453, "step": 5272 }, { "epoch": 0.14, "grad_norm": 2.5699267387390137, "learning_rate": 1.9425795875766713e-05, "loss": 0.4558, "step": 5273 }, { "epoch": 0.14, "grad_norm": 1.7868160009384155, "learning_rate": 1.9425518590831988e-05, "loss": 0.6151, "step": 5274 }, { "epoch": 0.14, "grad_norm": 2.5820417404174805, "learning_rate": 1.942524124094251e-05, "loss": 0.6389, "step": 5275 }, { "epoch": 0.14, "grad_norm": 3.321347951889038, "learning_rate": 1.9424963826100195e-05, "loss": 0.8226, "step": 5276 }, { "epoch": 0.14, "grad_norm": 2.3509554862976074, "learning_rate": 1.942468634630695e-05, "loss": 0.6352, "step": 5277 }, { "epoch": 0.14, "grad_norm": 1.7527977228164673, "learning_rate": 1.9424408801564685e-05, "loss": 0.6181, "step": 5278 }, { "epoch": 0.14, "grad_norm": 2.736168384552002, "learning_rate": 1.9424131191875318e-05, "loss": 0.608, "step": 5279 }, { "epoch": 0.14, "grad_norm": 1.3049583435058594, "learning_rate": 1.9423853517240758e-05, "loss": 0.5438, "step": 5280 }, { "epoch": 0.14, "grad_norm": 1.7103708982467651, "learning_rate": 1.942357577766292e-05, "loss": 0.5878, "step": 5281 }, { "epoch": 0.14, "grad_norm": 2.697518825531006, "learning_rate": 1.942329797314372e-05, "loss": 0.7905, "step": 5282 }, { "epoch": 0.14, "grad_norm": 6.949844837188721, "learning_rate": 1.9423020103685068e-05, "loss": 0.7894, "step": 5283 }, { "epoch": 0.14, "grad_norm": 1.8744698762893677, "learning_rate": 1.942274216928888e-05, "loss": 0.8321, "step": 5284 }, { "epoch": 0.14, "grad_norm": 2.345616102218628, "learning_rate": 1.9422464169957077e-05, "loss": 0.58, "step": 5285 }, { "epoch": 0.14, "grad_norm": 1.939171314239502, "learning_rate": 1.9422186105691572e-05, "loss": 0.744, "step": 5286 }, { "epoch": 0.14, "grad_norm": 8.292525291442871, "learning_rate": 1.9421907976494276e-05, "loss": 0.6329, "step": 5287 }, { "epoch": 0.14, "grad_norm": 2.0299072265625, "learning_rate": 1.9421629782367115e-05, "loss": 0.7506, "step": 5288 }, { "epoch": 0.14, "grad_norm": 3.9437897205352783, "learning_rate": 1.9421351523311994e-05, "loss": 0.7166, "step": 5289 }, { "epoch": 0.14, "grad_norm": 3.2628183364868164, "learning_rate": 1.9421073199330843e-05, "loss": 0.7357, "step": 5290 }, { "epoch": 0.14, "grad_norm": 1.5462770462036133, "learning_rate": 1.9420794810425568e-05, "loss": 0.5942, "step": 5291 }, { "epoch": 0.14, "grad_norm": 1.4555143117904663, "learning_rate": 1.94205163565981e-05, "loss": 0.5051, "step": 5292 }, { "epoch": 0.14, "grad_norm": 3.2792391777038574, "learning_rate": 1.942023783785035e-05, "loss": 0.5126, "step": 5293 }, { "epoch": 0.14, "grad_norm": 2.64833927154541, "learning_rate": 1.941995925418424e-05, "loss": 0.8032, "step": 5294 }, { "epoch": 0.14, "grad_norm": 2.6089487075805664, "learning_rate": 1.9419680605601684e-05, "loss": 0.5983, "step": 5295 }, { "epoch": 0.14, "grad_norm": 1.542135238647461, "learning_rate": 1.9419401892104612e-05, "loss": 0.6056, "step": 5296 }, { "epoch": 0.14, "grad_norm": 2.22664213180542, "learning_rate": 1.941912311369494e-05, "loss": 0.7369, "step": 5297 }, { "epoch": 0.14, "grad_norm": 1.5748056173324585, "learning_rate": 1.941884427037459e-05, "loss": 0.592, "step": 5298 }, { "epoch": 0.14, "grad_norm": 1.841768503189087, "learning_rate": 1.941856536214548e-05, "loss": 0.7231, "step": 5299 }, { "epoch": 0.14, "grad_norm": 5.410403728485107, "learning_rate": 1.9418286389009532e-05, "loss": 0.6844, "step": 5300 }, { "epoch": 0.14, "grad_norm": 2.223372459411621, "learning_rate": 1.9418007350968676e-05, "loss": 0.6215, "step": 5301 }, { "epoch": 0.14, "grad_norm": 6.8035454750061035, "learning_rate": 1.9417728248024832e-05, "loss": 0.836, "step": 5302 }, { "epoch": 0.14, "grad_norm": 1.6654961109161377, "learning_rate": 1.941744908017992e-05, "loss": 0.6032, "step": 5303 }, { "epoch": 0.14, "grad_norm": 8.97557544708252, "learning_rate": 1.9417169847435863e-05, "loss": 0.7512, "step": 5304 }, { "epoch": 0.14, "grad_norm": 2.651185989379883, "learning_rate": 1.9416890549794588e-05, "loss": 0.7173, "step": 5305 }, { "epoch": 0.14, "grad_norm": 4.06797981262207, "learning_rate": 1.9416611187258022e-05, "loss": 0.5846, "step": 5306 }, { "epoch": 0.14, "grad_norm": 4.840145111083984, "learning_rate": 1.941633175982809e-05, "loss": 0.5581, "step": 5307 }, { "epoch": 0.14, "grad_norm": 6.830573558807373, "learning_rate": 1.941605226750671e-05, "loss": 0.7644, "step": 5308 }, { "epoch": 0.14, "grad_norm": 2.6360483169555664, "learning_rate": 1.9415772710295817e-05, "loss": 0.618, "step": 5309 }, { "epoch": 0.14, "grad_norm": 1.3093198537826538, "learning_rate": 1.941549308819733e-05, "loss": 0.6082, "step": 5310 }, { "epoch": 0.14, "grad_norm": 3.557614803314209, "learning_rate": 1.9415213401213187e-05, "loss": 0.8743, "step": 5311 }, { "epoch": 0.14, "grad_norm": 2.7445526123046875, "learning_rate": 1.9414933649345303e-05, "loss": 0.8014, "step": 5312 }, { "epoch": 0.14, "grad_norm": 6.4758687019348145, "learning_rate": 1.941465383259561e-05, "loss": 0.61, "step": 5313 }, { "epoch": 0.14, "grad_norm": 3.4579367637634277, "learning_rate": 1.9414373950966042e-05, "loss": 0.8399, "step": 5314 }, { "epoch": 0.14, "grad_norm": 1.4919811487197876, "learning_rate": 1.9414094004458525e-05, "loss": 0.5482, "step": 5315 }, { "epoch": 0.14, "grad_norm": 4.26968240737915, "learning_rate": 1.9413813993074983e-05, "loss": 0.7125, "step": 5316 }, { "epoch": 0.14, "grad_norm": 4.8741841316223145, "learning_rate": 1.941353391681735e-05, "loss": 0.573, "step": 5317 }, { "epoch": 0.14, "grad_norm": 1.3368486166000366, "learning_rate": 1.9413253775687556e-05, "loss": 0.6756, "step": 5318 }, { "epoch": 0.14, "grad_norm": 2.2055130004882812, "learning_rate": 1.9412973569687528e-05, "loss": 0.7086, "step": 5319 }, { "epoch": 0.14, "grad_norm": 1.5513581037521362, "learning_rate": 1.9412693298819203e-05, "loss": 0.5872, "step": 5320 }, { "epoch": 0.14, "grad_norm": 1.1602920293807983, "learning_rate": 1.9412412963084508e-05, "loss": 0.594, "step": 5321 }, { "epoch": 0.14, "grad_norm": 4.153432369232178, "learning_rate": 1.9412132562485375e-05, "loss": 0.4056, "step": 5322 }, { "epoch": 0.14, "grad_norm": 6.239410400390625, "learning_rate": 1.9411852097023743e-05, "loss": 0.8522, "step": 5323 }, { "epoch": 0.14, "grad_norm": 3.0613255500793457, "learning_rate": 1.9411571566701535e-05, "loss": 0.6225, "step": 5324 }, { "epoch": 0.14, "grad_norm": 4.704567909240723, "learning_rate": 1.941129097152069e-05, "loss": 0.6935, "step": 5325 }, { "epoch": 0.14, "grad_norm": 3.1525051593780518, "learning_rate": 1.941101031148314e-05, "loss": 0.7752, "step": 5326 }, { "epoch": 0.14, "grad_norm": 2.154378890991211, "learning_rate": 1.9410729586590817e-05, "loss": 0.6223, "step": 5327 }, { "epoch": 0.14, "grad_norm": 2.561223030090332, "learning_rate": 1.9410448796845663e-05, "loss": 0.5829, "step": 5328 }, { "epoch": 0.14, "grad_norm": 3.3909823894500732, "learning_rate": 1.9410167942249604e-05, "loss": 0.5902, "step": 5329 }, { "epoch": 0.14, "grad_norm": 2.642444133758545, "learning_rate": 1.9409887022804583e-05, "loss": 0.6993, "step": 5330 }, { "epoch": 0.14, "grad_norm": 2.4254508018493652, "learning_rate": 1.9409606038512533e-05, "loss": 0.733, "step": 5331 }, { "epoch": 0.14, "grad_norm": 2.2891337871551514, "learning_rate": 1.9409324989375387e-05, "loss": 0.7091, "step": 5332 }, { "epoch": 0.14, "grad_norm": 14.142940521240234, "learning_rate": 1.9409043875395084e-05, "loss": 0.9021, "step": 5333 }, { "epoch": 0.14, "grad_norm": 2.2161879539489746, "learning_rate": 1.9408762696573566e-05, "loss": 0.6785, "step": 5334 }, { "epoch": 0.14, "grad_norm": 2.153315305709839, "learning_rate": 1.940848145291276e-05, "loss": 0.4702, "step": 5335 }, { "epoch": 0.14, "grad_norm": 4.7748308181762695, "learning_rate": 1.9408200144414616e-05, "loss": 0.5601, "step": 5336 }, { "epoch": 0.14, "grad_norm": 2.6021761894226074, "learning_rate": 1.940791877108107e-05, "loss": 0.7405, "step": 5337 }, { "epoch": 0.14, "grad_norm": 3.4285969734191895, "learning_rate": 1.940763733291405e-05, "loss": 0.7294, "step": 5338 }, { "epoch": 0.14, "grad_norm": 3.4455511569976807, "learning_rate": 1.940735582991551e-05, "loss": 0.707, "step": 5339 }, { "epoch": 0.14, "grad_norm": 10.198298454284668, "learning_rate": 1.940707426208738e-05, "loss": 0.8331, "step": 5340 }, { "epoch": 0.14, "grad_norm": 1.8798584938049316, "learning_rate": 1.940679262943161e-05, "loss": 0.6702, "step": 5341 }, { "epoch": 0.14, "grad_norm": 1.8308039903640747, "learning_rate": 1.940651093195013e-05, "loss": 0.6569, "step": 5342 }, { "epoch": 0.14, "grad_norm": 2.094534397125244, "learning_rate": 1.940622916964489e-05, "loss": 0.7128, "step": 5343 }, { "epoch": 0.14, "grad_norm": 8.175469398498535, "learning_rate": 1.9405947342517825e-05, "loss": 0.6666, "step": 5344 }, { "epoch": 0.14, "grad_norm": 2.3884470462799072, "learning_rate": 1.9405665450570883e-05, "loss": 0.6175, "step": 5345 }, { "epoch": 0.14, "grad_norm": 1.5576248168945312, "learning_rate": 1.9405383493806004e-05, "loss": 0.5911, "step": 5346 }, { "epoch": 0.14, "grad_norm": 6.287469863891602, "learning_rate": 1.9405101472225128e-05, "loss": 0.8481, "step": 5347 }, { "epoch": 0.14, "grad_norm": 3.2587075233459473, "learning_rate": 1.9404819385830205e-05, "loss": 0.7232, "step": 5348 }, { "epoch": 0.14, "grad_norm": 2.7868127822875977, "learning_rate": 1.9404537234623174e-05, "loss": 0.8419, "step": 5349 }, { "epoch": 0.14, "grad_norm": 2.853168487548828, "learning_rate": 1.940425501860598e-05, "loss": 0.7946, "step": 5350 }, { "epoch": 0.14, "grad_norm": 5.903851509094238, "learning_rate": 1.9403972737780567e-05, "loss": 0.7428, "step": 5351 }, { "epoch": 0.14, "grad_norm": 2.2050697803497314, "learning_rate": 1.9403690392148886e-05, "loss": 0.6803, "step": 5352 }, { "epoch": 0.14, "grad_norm": 5.038975715637207, "learning_rate": 1.9403407981712877e-05, "loss": 0.621, "step": 5353 }, { "epoch": 0.14, "grad_norm": 3.5314643383026123, "learning_rate": 1.940312550647449e-05, "loss": 0.7671, "step": 5354 }, { "epoch": 0.14, "grad_norm": 9.301679611206055, "learning_rate": 1.9402842966435666e-05, "loss": 0.6632, "step": 5355 }, { "epoch": 0.14, "grad_norm": 2.079983949661255, "learning_rate": 1.940256036159836e-05, "loss": 0.8317, "step": 5356 }, { "epoch": 0.14, "grad_norm": 3.118751287460327, "learning_rate": 1.9402277691964514e-05, "loss": 0.6963, "step": 5357 }, { "epoch": 0.14, "grad_norm": 4.434342384338379, "learning_rate": 1.9401994957536075e-05, "loss": 0.8358, "step": 5358 }, { "epoch": 0.14, "grad_norm": 1.3184539079666138, "learning_rate": 1.9401712158314995e-05, "loss": 0.7273, "step": 5359 }, { "epoch": 0.14, "grad_norm": 1.8195720911026, "learning_rate": 1.940142929430322e-05, "loss": 0.5287, "step": 5360 }, { "epoch": 0.14, "grad_norm": 1.9839297533035278, "learning_rate": 1.9401146365502705e-05, "loss": 0.5143, "step": 5361 }, { "epoch": 0.14, "grad_norm": 8.515040397644043, "learning_rate": 1.9400863371915387e-05, "loss": 0.7284, "step": 5362 }, { "epoch": 0.14, "grad_norm": 3.1176366806030273, "learning_rate": 1.9400580313543232e-05, "loss": 0.7929, "step": 5363 }, { "epoch": 0.14, "grad_norm": 3.0074944496154785, "learning_rate": 1.9400297190388185e-05, "loss": 0.8122, "step": 5364 }, { "epoch": 0.14, "grad_norm": 3.626669406890869, "learning_rate": 1.940001400245219e-05, "loss": 0.5316, "step": 5365 }, { "epoch": 0.14, "grad_norm": 2.9482946395874023, "learning_rate": 1.939973074973721e-05, "loss": 0.8111, "step": 5366 }, { "epoch": 0.14, "grad_norm": 1.3971192836761475, "learning_rate": 1.9399447432245187e-05, "loss": 0.5471, "step": 5367 }, { "epoch": 0.14, "grad_norm": 5.5186614990234375, "learning_rate": 1.9399164049978075e-05, "loss": 0.6735, "step": 5368 }, { "epoch": 0.14, "grad_norm": 2.2967946529388428, "learning_rate": 1.939888060293784e-05, "loss": 0.6106, "step": 5369 }, { "epoch": 0.14, "grad_norm": 3.8224575519561768, "learning_rate": 1.9398597091126413e-05, "loss": 0.6127, "step": 5370 }, { "epoch": 0.14, "grad_norm": 2.879871368408203, "learning_rate": 1.9398313514545765e-05, "loss": 0.7201, "step": 5371 }, { "epoch": 0.14, "grad_norm": 2.652099609375, "learning_rate": 1.9398029873197845e-05, "loss": 0.8123, "step": 5372 }, { "epoch": 0.14, "grad_norm": 2.5654428005218506, "learning_rate": 1.939774616708461e-05, "loss": 0.6229, "step": 5373 }, { "epoch": 0.14, "grad_norm": 1.7108979225158691, "learning_rate": 1.9397462396208007e-05, "loss": 0.6632, "step": 5374 }, { "epoch": 0.14, "grad_norm": 1.8124275207519531, "learning_rate": 1.939717856057e-05, "loss": 0.8073, "step": 5375 }, { "epoch": 0.14, "grad_norm": 3.6204288005828857, "learning_rate": 1.9396894660172543e-05, "loss": 0.5737, "step": 5376 }, { "epoch": 0.14, "grad_norm": 3.370917558670044, "learning_rate": 1.939661069501759e-05, "loss": 0.7839, "step": 5377 }, { "epoch": 0.14, "grad_norm": 2.6211562156677246, "learning_rate": 1.93963266651071e-05, "loss": 0.7288, "step": 5378 }, { "epoch": 0.14, "grad_norm": 3.2981715202331543, "learning_rate": 1.939604257044303e-05, "loss": 0.7553, "step": 5379 }, { "epoch": 0.14, "grad_norm": 6.107547283172607, "learning_rate": 1.939575841102734e-05, "loss": 0.7384, "step": 5380 }, { "epoch": 0.14, "grad_norm": 2.0325162410736084, "learning_rate": 1.9395474186861983e-05, "loss": 0.583, "step": 5381 }, { "epoch": 0.14, "grad_norm": 2.107905149459839, "learning_rate": 1.939518989794892e-05, "loss": 0.6749, "step": 5382 }, { "epoch": 0.14, "grad_norm": 2.351205587387085, "learning_rate": 1.9394905544290114e-05, "loss": 0.7413, "step": 5383 }, { "epoch": 0.14, "grad_norm": 3.5605664253234863, "learning_rate": 1.939462112588752e-05, "loss": 0.588, "step": 5384 }, { "epoch": 0.14, "grad_norm": 2.6310582160949707, "learning_rate": 1.93943366427431e-05, "loss": 0.8202, "step": 5385 }, { "epoch": 0.14, "grad_norm": 5.424671649932861, "learning_rate": 1.9394052094858816e-05, "loss": 0.7357, "step": 5386 }, { "epoch": 0.14, "grad_norm": 1.7461529970169067, "learning_rate": 1.9393767482236626e-05, "loss": 0.7314, "step": 5387 }, { "epoch": 0.14, "grad_norm": 3.578092098236084, "learning_rate": 1.939348280487849e-05, "loss": 0.5394, "step": 5388 }, { "epoch": 0.14, "grad_norm": 3.339916229248047, "learning_rate": 1.9393198062786372e-05, "loss": 0.6008, "step": 5389 }, { "epoch": 0.14, "grad_norm": 2.8721914291381836, "learning_rate": 1.9392913255962234e-05, "loss": 0.7564, "step": 5390 }, { "epoch": 0.14, "grad_norm": 1.317254900932312, "learning_rate": 1.9392628384408042e-05, "loss": 0.7171, "step": 5391 }, { "epoch": 0.14, "grad_norm": 3.1076934337615967, "learning_rate": 1.9392343448125754e-05, "loss": 0.7953, "step": 5392 }, { "epoch": 0.14, "grad_norm": 1.967822790145874, "learning_rate": 1.9392058447117338e-05, "loss": 0.6118, "step": 5393 }, { "epoch": 0.14, "grad_norm": 1.7806507349014282, "learning_rate": 1.9391773381384752e-05, "loss": 0.5679, "step": 5394 }, { "epoch": 0.14, "grad_norm": 3.400977849960327, "learning_rate": 1.9391488250929966e-05, "loss": 0.7359, "step": 5395 }, { "epoch": 0.14, "grad_norm": 2.4888851642608643, "learning_rate": 1.9391203055754944e-05, "loss": 0.7308, "step": 5396 }, { "epoch": 0.14, "grad_norm": 1.3961907625198364, "learning_rate": 1.939091779586165e-05, "loss": 0.594, "step": 5397 }, { "epoch": 0.14, "grad_norm": 1.5517995357513428, "learning_rate": 1.9390632471252053e-05, "loss": 0.6644, "step": 5398 }, { "epoch": 0.14, "grad_norm": 2.4512970447540283, "learning_rate": 1.9390347081928115e-05, "loss": 0.8434, "step": 5399 }, { "epoch": 0.14, "grad_norm": 1.6670812368392944, "learning_rate": 1.9390061627891804e-05, "loss": 0.5653, "step": 5400 }, { "epoch": 0.14, "grad_norm": 3.1388731002807617, "learning_rate": 1.9389776109145087e-05, "loss": 0.8279, "step": 5401 }, { "epoch": 0.14, "grad_norm": 2.973567247390747, "learning_rate": 1.9389490525689932e-05, "loss": 0.6385, "step": 5402 }, { "epoch": 0.14, "grad_norm": 3.872065305709839, "learning_rate": 1.938920487752831e-05, "loss": 0.6626, "step": 5403 }, { "epoch": 0.14, "grad_norm": 3.010761022567749, "learning_rate": 1.9388919164662187e-05, "loss": 0.5409, "step": 5404 }, { "epoch": 0.14, "grad_norm": 5.869908809661865, "learning_rate": 1.9388633387093528e-05, "loss": 0.6716, "step": 5405 }, { "epoch": 0.14, "grad_norm": 4.659322738647461, "learning_rate": 1.9388347544824308e-05, "loss": 0.7649, "step": 5406 }, { "epoch": 0.14, "grad_norm": 3.462399482727051, "learning_rate": 1.9388061637856496e-05, "loss": 0.6371, "step": 5407 }, { "epoch": 0.14, "grad_norm": 1.68644380569458, "learning_rate": 1.9387775666192063e-05, "loss": 0.7248, "step": 5408 }, { "epoch": 0.14, "grad_norm": 2.4479029178619385, "learning_rate": 1.9387489629832975e-05, "loss": 0.5067, "step": 5409 }, { "epoch": 0.14, "grad_norm": 3.0104496479034424, "learning_rate": 1.9387203528781206e-05, "loss": 0.6402, "step": 5410 }, { "epoch": 0.14, "grad_norm": 3.4115793704986572, "learning_rate": 1.938691736303873e-05, "loss": 0.784, "step": 5411 }, { "epoch": 0.14, "grad_norm": 4.79442024230957, "learning_rate": 1.9386631132607517e-05, "loss": 0.7242, "step": 5412 }, { "epoch": 0.14, "grad_norm": 2.0197489261627197, "learning_rate": 1.938634483748954e-05, "loss": 0.608, "step": 5413 }, { "epoch": 0.14, "grad_norm": 2.19429087638855, "learning_rate": 1.9386058477686767e-05, "loss": 0.7553, "step": 5414 }, { "epoch": 0.14, "grad_norm": 2.1279499530792236, "learning_rate": 1.938577205320118e-05, "loss": 0.901, "step": 5415 }, { "epoch": 0.14, "grad_norm": 3.4473447799682617, "learning_rate": 1.938548556403475e-05, "loss": 0.7259, "step": 5416 }, { "epoch": 0.14, "grad_norm": 3.597419261932373, "learning_rate": 1.9385199010189446e-05, "loss": 0.9019, "step": 5417 }, { "epoch": 0.14, "grad_norm": 2.457900285720825, "learning_rate": 1.9384912391667246e-05, "loss": 0.4925, "step": 5418 }, { "epoch": 0.14, "grad_norm": 1.9804495573043823, "learning_rate": 1.9384625708470133e-05, "loss": 0.563, "step": 5419 }, { "epoch": 0.14, "grad_norm": 3.883308172225952, "learning_rate": 1.938433896060007e-05, "loss": 0.8178, "step": 5420 }, { "epoch": 0.14, "grad_norm": 4.170117378234863, "learning_rate": 1.9384052148059037e-05, "loss": 0.6478, "step": 5421 }, { "epoch": 0.14, "grad_norm": 4.174603462219238, "learning_rate": 1.9383765270849017e-05, "loss": 0.8635, "step": 5422 }, { "epoch": 0.14, "grad_norm": 2.302020788192749, "learning_rate": 1.9383478328971982e-05, "loss": 0.7114, "step": 5423 }, { "epoch": 0.14, "grad_norm": 1.3446820974349976, "learning_rate": 1.9383191322429905e-05, "loss": 0.6891, "step": 5424 }, { "epoch": 0.14, "grad_norm": 4.383627414703369, "learning_rate": 1.9382904251224773e-05, "loss": 0.7106, "step": 5425 }, { "epoch": 0.14, "grad_norm": 2.6770222187042236, "learning_rate": 1.938261711535856e-05, "loss": 0.6947, "step": 5426 }, { "epoch": 0.14, "grad_norm": 1.7903891801834106, "learning_rate": 1.9382329914833244e-05, "loss": 0.7447, "step": 5427 }, { "epoch": 0.14, "grad_norm": 2.934584856033325, "learning_rate": 1.93820426496508e-05, "loss": 0.7592, "step": 5428 }, { "epoch": 0.14, "grad_norm": 2.178999900817871, "learning_rate": 1.9381755319813215e-05, "loss": 0.8299, "step": 5429 }, { "epoch": 0.14, "grad_norm": 3.3671677112579346, "learning_rate": 1.9381467925322468e-05, "loss": 0.7391, "step": 5430 }, { "epoch": 0.14, "grad_norm": 4.302799701690674, "learning_rate": 1.938118046618054e-05, "loss": 0.7351, "step": 5431 }, { "epoch": 0.14, "grad_norm": 2.255354404449463, "learning_rate": 1.9380892942389407e-05, "loss": 0.6381, "step": 5432 }, { "epoch": 0.14, "grad_norm": 2.4769845008850098, "learning_rate": 1.9380605353951053e-05, "loss": 0.7629, "step": 5433 }, { "epoch": 0.14, "grad_norm": 2.051973819732666, "learning_rate": 1.9380317700867463e-05, "loss": 0.6279, "step": 5434 }, { "epoch": 0.14, "grad_norm": 1.425116777420044, "learning_rate": 1.9380029983140615e-05, "loss": 0.5851, "step": 5435 }, { "epoch": 0.14, "grad_norm": 1.5321733951568604, "learning_rate": 1.9379742200772492e-05, "loss": 0.6574, "step": 5436 }, { "epoch": 0.14, "grad_norm": 1.7606761455535889, "learning_rate": 1.9379454353765083e-05, "loss": 0.5903, "step": 5437 }, { "epoch": 0.14, "grad_norm": 1.9977635145187378, "learning_rate": 1.9379166442120362e-05, "loss": 0.7139, "step": 5438 }, { "epoch": 0.14, "grad_norm": 4.565695762634277, "learning_rate": 1.9378878465840323e-05, "loss": 0.8153, "step": 5439 }, { "epoch": 0.14, "grad_norm": 1.3645641803741455, "learning_rate": 1.9378590424926943e-05, "loss": 0.6288, "step": 5440 }, { "epoch": 0.14, "grad_norm": 4.48105525970459, "learning_rate": 1.9378302319382212e-05, "loss": 0.7815, "step": 5441 }, { "epoch": 0.14, "grad_norm": 1.8143095970153809, "learning_rate": 1.937801414920811e-05, "loss": 0.6228, "step": 5442 }, { "epoch": 0.14, "grad_norm": 2.366985559463501, "learning_rate": 1.937772591440663e-05, "loss": 0.6828, "step": 5443 }, { "epoch": 0.14, "grad_norm": 2.0634753704071045, "learning_rate": 1.9377437614979753e-05, "loss": 0.7122, "step": 5444 }, { "epoch": 0.14, "grad_norm": 1.9575181007385254, "learning_rate": 1.937714925092947e-05, "loss": 0.7039, "step": 5445 }, { "epoch": 0.14, "grad_norm": 3.508840799331665, "learning_rate": 1.9376860822257763e-05, "loss": 0.7612, "step": 5446 }, { "epoch": 0.14, "grad_norm": 2.8350942134857178, "learning_rate": 1.9376572328966624e-05, "loss": 0.5935, "step": 5447 }, { "epoch": 0.14, "grad_norm": 1.9500648975372314, "learning_rate": 1.937628377105804e-05, "loss": 0.6509, "step": 5448 }, { "epoch": 0.14, "grad_norm": 2.3851687908172607, "learning_rate": 1.9375995148534e-05, "loss": 0.7875, "step": 5449 }, { "epoch": 0.14, "grad_norm": 1.8426889181137085, "learning_rate": 1.9375706461396486e-05, "loss": 0.5292, "step": 5450 }, { "epoch": 0.14, "grad_norm": 2.340606927871704, "learning_rate": 1.9375417709647494e-05, "loss": 0.5826, "step": 5451 }, { "epoch": 0.14, "grad_norm": 1.893078088760376, "learning_rate": 1.9375128893289017e-05, "loss": 0.6685, "step": 5452 }, { "epoch": 0.14, "grad_norm": 2.513791561126709, "learning_rate": 1.937484001232304e-05, "loss": 0.6387, "step": 5453 }, { "epoch": 0.14, "grad_norm": 3.231424570083618, "learning_rate": 1.9374551066751558e-05, "loss": 0.7703, "step": 5454 }, { "epoch": 0.14, "grad_norm": 4.484420299530029, "learning_rate": 1.9374262056576557e-05, "loss": 0.8992, "step": 5455 }, { "epoch": 0.14, "grad_norm": 2.638047695159912, "learning_rate": 1.9373972981800034e-05, "loss": 0.5247, "step": 5456 }, { "epoch": 0.14, "grad_norm": 3.692030429840088, "learning_rate": 1.9373683842423977e-05, "loss": 0.6818, "step": 5457 }, { "epoch": 0.14, "grad_norm": 2.5832290649414062, "learning_rate": 1.9373394638450383e-05, "loss": 0.6996, "step": 5458 }, { "epoch": 0.14, "grad_norm": 3.6954705715179443, "learning_rate": 1.937310536988124e-05, "loss": 0.8059, "step": 5459 }, { "epoch": 0.14, "grad_norm": 3.135542154312134, "learning_rate": 1.9372816036718542e-05, "loss": 0.7516, "step": 5460 }, { "epoch": 0.14, "grad_norm": 2.1784586906433105, "learning_rate": 1.9372526638964283e-05, "loss": 0.5783, "step": 5461 }, { "epoch": 0.14, "grad_norm": 2.7631120681762695, "learning_rate": 1.9372237176620463e-05, "loss": 0.5361, "step": 5462 }, { "epoch": 0.14, "grad_norm": 4.78092622756958, "learning_rate": 1.937194764968907e-05, "loss": 0.72, "step": 5463 }, { "epoch": 0.14, "grad_norm": 2.9935173988342285, "learning_rate": 1.9371658058172103e-05, "loss": 0.5911, "step": 5464 }, { "epoch": 0.14, "grad_norm": 2.0349841117858887, "learning_rate": 1.9371368402071556e-05, "loss": 0.843, "step": 5465 }, { "epoch": 0.14, "grad_norm": 4.111967086791992, "learning_rate": 1.9371078681389425e-05, "loss": 0.632, "step": 5466 }, { "epoch": 0.14, "grad_norm": 3.5574898719787598, "learning_rate": 1.9370788896127707e-05, "loss": 0.6699, "step": 5467 }, { "epoch": 0.14, "grad_norm": 3.4099180698394775, "learning_rate": 1.93704990462884e-05, "loss": 0.8416, "step": 5468 }, { "epoch": 0.14, "grad_norm": 3.4188356399536133, "learning_rate": 1.9370209131873503e-05, "loss": 0.5939, "step": 5469 }, { "epoch": 0.14, "grad_norm": 2.698984384536743, "learning_rate": 1.936991915288501e-05, "loss": 0.5942, "step": 5470 }, { "epoch": 0.14, "grad_norm": 2.5280513763427734, "learning_rate": 1.9369629109324917e-05, "loss": 0.6475, "step": 5471 }, { "epoch": 0.14, "grad_norm": 2.3960816860198975, "learning_rate": 1.9369339001195232e-05, "loss": 0.5293, "step": 5472 }, { "epoch": 0.14, "grad_norm": 1.6177988052368164, "learning_rate": 1.936904882849795e-05, "loss": 0.6241, "step": 5473 }, { "epoch": 0.14, "grad_norm": 1.7872809171676636, "learning_rate": 1.9368758591235064e-05, "loss": 0.6644, "step": 5474 }, { "epoch": 0.14, "grad_norm": 3.3947689533233643, "learning_rate": 1.9368468289408585e-05, "loss": 0.6364, "step": 5475 }, { "epoch": 0.14, "grad_norm": 3.9659647941589355, "learning_rate": 1.9368177923020502e-05, "loss": 0.5728, "step": 5476 }, { "epoch": 0.14, "grad_norm": 2.312032699584961, "learning_rate": 1.936788749207283e-05, "loss": 0.614, "step": 5477 }, { "epoch": 0.14, "grad_norm": 2.9542312622070312, "learning_rate": 1.9367596996567557e-05, "loss": 0.7985, "step": 5478 }, { "epoch": 0.14, "grad_norm": 2.1894564628601074, "learning_rate": 1.9367306436506696e-05, "loss": 0.5398, "step": 5479 }, { "epoch": 0.14, "grad_norm": 1.2833409309387207, "learning_rate": 1.936701581189224e-05, "loss": 0.6011, "step": 5480 }, { "epoch": 0.14, "grad_norm": 4.181057453155518, "learning_rate": 1.9366725122726198e-05, "loss": 0.768, "step": 5481 }, { "epoch": 0.14, "grad_norm": 2.308117151260376, "learning_rate": 1.936643436901057e-05, "loss": 0.6223, "step": 5482 }, { "epoch": 0.14, "grad_norm": 3.6570510864257812, "learning_rate": 1.936614355074736e-05, "loss": 0.7297, "step": 5483 }, { "epoch": 0.14, "grad_norm": 2.8552262783050537, "learning_rate": 1.9365852667938577e-05, "loss": 0.7332, "step": 5484 }, { "epoch": 0.14, "grad_norm": 2.354280471801758, "learning_rate": 1.9365561720586215e-05, "loss": 0.5328, "step": 5485 }, { "epoch": 0.14, "grad_norm": 1.8707510232925415, "learning_rate": 1.936527070869229e-05, "loss": 0.6095, "step": 5486 }, { "epoch": 0.14, "grad_norm": 2.5952212810516357, "learning_rate": 1.9364979632258807e-05, "loss": 0.7151, "step": 5487 }, { "epoch": 0.14, "grad_norm": 2.7690703868865967, "learning_rate": 1.9364688491287764e-05, "loss": 0.7257, "step": 5488 }, { "epoch": 0.14, "grad_norm": 4.823000907897949, "learning_rate": 1.936439728578117e-05, "loss": 0.6533, "step": 5489 }, { "epoch": 0.14, "grad_norm": 2.009798765182495, "learning_rate": 1.9364106015741036e-05, "loss": 0.6402, "step": 5490 }, { "epoch": 0.14, "grad_norm": 8.289822578430176, "learning_rate": 1.9363814681169365e-05, "loss": 0.8724, "step": 5491 }, { "epoch": 0.14, "grad_norm": 3.380174160003662, "learning_rate": 1.9363523282068163e-05, "loss": 0.7023, "step": 5492 }, { "epoch": 0.14, "grad_norm": 2.55946946144104, "learning_rate": 1.936323181843945e-05, "loss": 0.7151, "step": 5493 }, { "epoch": 0.14, "grad_norm": 4.116768836975098, "learning_rate": 1.9362940290285218e-05, "loss": 0.6831, "step": 5494 }, { "epoch": 0.14, "grad_norm": 2.5357561111450195, "learning_rate": 1.9362648697607483e-05, "loss": 0.77, "step": 5495 }, { "epoch": 0.14, "grad_norm": 2.5545594692230225, "learning_rate": 1.936235704040826e-05, "loss": 0.802, "step": 5496 }, { "epoch": 0.14, "grad_norm": 1.762019395828247, "learning_rate": 1.9362065318689553e-05, "loss": 0.6673, "step": 5497 }, { "epoch": 0.14, "grad_norm": 3.453075885772705, "learning_rate": 1.9361773532453375e-05, "loss": 0.7133, "step": 5498 }, { "epoch": 0.14, "grad_norm": 3.3591246604919434, "learning_rate": 1.9361481681701734e-05, "loss": 0.7914, "step": 5499 }, { "epoch": 0.14, "grad_norm": 1.8615385293960571, "learning_rate": 1.9361189766436638e-05, "loss": 0.6306, "step": 5500 }, { "epoch": 0.14, "grad_norm": 4.392769813537598, "learning_rate": 1.936089778666011e-05, "loss": 0.7623, "step": 5501 }, { "epoch": 0.14, "grad_norm": 3.6427974700927734, "learning_rate": 1.9360605742374155e-05, "loss": 0.7465, "step": 5502 }, { "epoch": 0.14, "grad_norm": 2.9220805168151855, "learning_rate": 1.9360313633580784e-05, "loss": 0.6492, "step": 5503 }, { "epoch": 0.14, "grad_norm": 2.4572722911834717, "learning_rate": 1.936002146028201e-05, "loss": 0.5271, "step": 5504 }, { "epoch": 0.14, "grad_norm": 4.466747760772705, "learning_rate": 1.9359729222479854e-05, "loss": 0.7828, "step": 5505 }, { "epoch": 0.14, "grad_norm": 2.7725772857666016, "learning_rate": 1.935943692017632e-05, "loss": 0.7777, "step": 5506 }, { "epoch": 0.14, "grad_norm": 1.5145102739334106, "learning_rate": 1.935914455337343e-05, "loss": 0.5345, "step": 5507 }, { "epoch": 0.14, "grad_norm": 2.235992908477783, "learning_rate": 1.9358852122073196e-05, "loss": 0.7822, "step": 5508 }, { "epoch": 0.14, "grad_norm": 3.6393377780914307, "learning_rate": 1.935855962627763e-05, "loss": 0.6875, "step": 5509 }, { "epoch": 0.14, "grad_norm": 3.0397770404815674, "learning_rate": 1.9358267065988753e-05, "loss": 0.7207, "step": 5510 }, { "epoch": 0.14, "grad_norm": 2.4819376468658447, "learning_rate": 1.9357974441208582e-05, "loss": 0.7954, "step": 5511 }, { "epoch": 0.14, "grad_norm": 2.792396068572998, "learning_rate": 1.9357681751939124e-05, "loss": 0.7228, "step": 5512 }, { "epoch": 0.14, "grad_norm": 6.720598220825195, "learning_rate": 1.93573889981824e-05, "loss": 0.7051, "step": 5513 }, { "epoch": 0.14, "grad_norm": 2.1436901092529297, "learning_rate": 1.9357096179940437e-05, "loss": 0.611, "step": 5514 }, { "epoch": 0.14, "grad_norm": 1.9302783012390137, "learning_rate": 1.9356803297215244e-05, "loss": 0.6498, "step": 5515 }, { "epoch": 0.14, "grad_norm": 7.1393938064575195, "learning_rate": 1.935651035000884e-05, "loss": 0.7137, "step": 5516 }, { "epoch": 0.14, "grad_norm": 2.2020180225372314, "learning_rate": 1.9356217338323248e-05, "loss": 0.6199, "step": 5517 }, { "epoch": 0.14, "grad_norm": 1.4387590885162354, "learning_rate": 1.9355924262160483e-05, "loss": 0.6611, "step": 5518 }, { "epoch": 0.14, "grad_norm": 4.461764335632324, "learning_rate": 1.9355631121522563e-05, "loss": 0.9017, "step": 5519 }, { "epoch": 0.14, "grad_norm": 2.479327440261841, "learning_rate": 1.9355337916411514e-05, "loss": 0.5104, "step": 5520 }, { "epoch": 0.14, "grad_norm": 2.1629600524902344, "learning_rate": 1.9355044646829352e-05, "loss": 0.6357, "step": 5521 }, { "epoch": 0.14, "grad_norm": 2.6012210845947266, "learning_rate": 1.9354751312778102e-05, "loss": 0.7226, "step": 5522 }, { "epoch": 0.14, "grad_norm": 2.118734359741211, "learning_rate": 1.9354457914259782e-05, "loss": 0.6175, "step": 5523 }, { "epoch": 0.14, "grad_norm": 3.059828519821167, "learning_rate": 1.9354164451276417e-05, "loss": 0.6135, "step": 5524 }, { "epoch": 0.14, "grad_norm": 1.7887641191482544, "learning_rate": 1.9353870923830022e-05, "loss": 0.7434, "step": 5525 }, { "epoch": 0.14, "grad_norm": 3.549168586730957, "learning_rate": 1.935357733192263e-05, "loss": 0.6671, "step": 5526 }, { "epoch": 0.14, "grad_norm": 6.773070335388184, "learning_rate": 1.9353283675556257e-05, "loss": 0.6343, "step": 5527 }, { "epoch": 0.14, "grad_norm": 7.832727909088135, "learning_rate": 1.935298995473293e-05, "loss": 0.728, "step": 5528 }, { "epoch": 0.14, "grad_norm": 4.17554235458374, "learning_rate": 1.9352696169454675e-05, "loss": 0.7775, "step": 5529 }, { "epoch": 0.14, "grad_norm": 3.5462353229522705, "learning_rate": 1.9352402319723513e-05, "loss": 0.5508, "step": 5530 }, { "epoch": 0.14, "grad_norm": 2.147876262664795, "learning_rate": 1.9352108405541468e-05, "loss": 0.6782, "step": 5531 }, { "epoch": 0.14, "grad_norm": 2.429924488067627, "learning_rate": 1.935181442691057e-05, "loss": 0.7218, "step": 5532 }, { "epoch": 0.14, "grad_norm": 6.23410701751709, "learning_rate": 1.935152038383284e-05, "loss": 0.852, "step": 5533 }, { "epoch": 0.14, "grad_norm": 4.283238410949707, "learning_rate": 1.9351226276310306e-05, "loss": 0.8202, "step": 5534 }, { "epoch": 0.14, "grad_norm": 5.638176441192627, "learning_rate": 1.9350932104344995e-05, "loss": 0.8257, "step": 5535 }, { "epoch": 0.14, "grad_norm": 3.7844839096069336, "learning_rate": 1.935063786793894e-05, "loss": 0.608, "step": 5536 }, { "epoch": 0.14, "grad_norm": 2.4085004329681396, "learning_rate": 1.935034356709416e-05, "loss": 0.7112, "step": 5537 }, { "epoch": 0.14, "grad_norm": 2.1756503582000732, "learning_rate": 1.9350049201812684e-05, "loss": 0.6109, "step": 5538 }, { "epoch": 0.14, "grad_norm": 2.8717238903045654, "learning_rate": 1.9349754772096545e-05, "loss": 0.6387, "step": 5539 }, { "epoch": 0.14, "grad_norm": 3.800931930541992, "learning_rate": 1.9349460277947772e-05, "loss": 0.7353, "step": 5540 }, { "epoch": 0.14, "grad_norm": 1.8721413612365723, "learning_rate": 1.934916571936839e-05, "loss": 0.653, "step": 5541 }, { "epoch": 0.14, "grad_norm": 3.5774199962615967, "learning_rate": 1.9348871096360434e-05, "loss": 0.8222, "step": 5542 }, { "epoch": 0.14, "grad_norm": 2.499563694000244, "learning_rate": 1.934857640892593e-05, "loss": 0.625, "step": 5543 }, { "epoch": 0.14, "grad_norm": 2.6530344486236572, "learning_rate": 1.934828165706691e-05, "loss": 0.8852, "step": 5544 }, { "epoch": 0.14, "grad_norm": 2.7920916080474854, "learning_rate": 1.9347986840785412e-05, "loss": 0.7096, "step": 5545 }, { "epoch": 0.14, "grad_norm": 2.5702497959136963, "learning_rate": 1.9347691960083453e-05, "loss": 0.6546, "step": 5546 }, { "epoch": 0.14, "grad_norm": 3.6669070720672607, "learning_rate": 1.934739701496308e-05, "loss": 0.6211, "step": 5547 }, { "epoch": 0.14, "grad_norm": 1.3921399116516113, "learning_rate": 1.934710200542632e-05, "loss": 0.6465, "step": 5548 }, { "epoch": 0.14, "grad_norm": 1.978554129600525, "learning_rate": 1.9346806931475202e-05, "loss": 0.6464, "step": 5549 }, { "epoch": 0.14, "grad_norm": 1.8672635555267334, "learning_rate": 1.9346511793111764e-05, "loss": 0.6551, "step": 5550 }, { "epoch": 0.14, "grad_norm": 3.982724666595459, "learning_rate": 1.934621659033804e-05, "loss": 0.7125, "step": 5551 }, { "epoch": 0.14, "grad_norm": 2.049579381942749, "learning_rate": 1.934592132315606e-05, "loss": 0.7272, "step": 5552 }, { "epoch": 0.14, "grad_norm": 2.4418587684631348, "learning_rate": 1.9345625991567866e-05, "loss": 0.5516, "step": 5553 }, { "epoch": 0.14, "grad_norm": 3.5333943367004395, "learning_rate": 1.9345330595575484e-05, "loss": 0.9554, "step": 5554 }, { "epoch": 0.14, "grad_norm": 1.908542275428772, "learning_rate": 1.934503513518096e-05, "loss": 0.6144, "step": 5555 }, { "epoch": 0.14, "grad_norm": 2.5048179626464844, "learning_rate": 1.934473961038632e-05, "loss": 0.7216, "step": 5556 }, { "epoch": 0.14, "grad_norm": 4.969273567199707, "learning_rate": 1.9344444021193612e-05, "loss": 0.9692, "step": 5557 }, { "epoch": 0.14, "grad_norm": 1.9174047708511353, "learning_rate": 1.9344148367604863e-05, "loss": 0.7906, "step": 5558 }, { "epoch": 0.14, "grad_norm": 5.429101943969727, "learning_rate": 1.9343852649622112e-05, "loss": 0.6895, "step": 5559 }, { "epoch": 0.14, "grad_norm": 3.081444025039673, "learning_rate": 1.93435568672474e-05, "loss": 0.6503, "step": 5560 }, { "epoch": 0.14, "grad_norm": 2.8984806537628174, "learning_rate": 1.9343261020482766e-05, "loss": 0.6622, "step": 5561 }, { "epoch": 0.14, "grad_norm": 1.8819022178649902, "learning_rate": 1.9342965109330248e-05, "loss": 0.6999, "step": 5562 }, { "epoch": 0.14, "grad_norm": 3.380150318145752, "learning_rate": 1.9342669133791882e-05, "loss": 0.8683, "step": 5563 }, { "epoch": 0.14, "grad_norm": 2.390700340270996, "learning_rate": 1.9342373093869708e-05, "loss": 0.7242, "step": 5564 }, { "epoch": 0.14, "grad_norm": 2.450279951095581, "learning_rate": 1.934207698956577e-05, "loss": 0.6488, "step": 5565 }, { "epoch": 0.14, "grad_norm": 1.759040355682373, "learning_rate": 1.9341780820882106e-05, "loss": 0.5378, "step": 5566 }, { "epoch": 0.14, "grad_norm": 2.8664700984954834, "learning_rate": 1.934148458782076e-05, "loss": 0.7319, "step": 5567 }, { "epoch": 0.14, "grad_norm": 2.0337300300598145, "learning_rate": 1.934118829038377e-05, "loss": 0.7194, "step": 5568 }, { "epoch": 0.14, "grad_norm": 1.7799402475357056, "learning_rate": 1.934089192857318e-05, "loss": 0.8038, "step": 5569 }, { "epoch": 0.14, "grad_norm": 2.0190999507904053, "learning_rate": 1.934059550239103e-05, "loss": 0.7647, "step": 5570 }, { "epoch": 0.14, "grad_norm": 2.545408248901367, "learning_rate": 1.9340299011839365e-05, "loss": 0.7763, "step": 5571 }, { "epoch": 0.14, "grad_norm": 2.721043348312378, "learning_rate": 1.9340002456920228e-05, "loss": 0.6864, "step": 5572 }, { "epoch": 0.14, "grad_norm": 3.500913143157959, "learning_rate": 1.933970583763566e-05, "loss": 0.6974, "step": 5573 }, { "epoch": 0.14, "grad_norm": 3.1067287921905518, "learning_rate": 1.9339409153987708e-05, "loss": 0.5618, "step": 5574 }, { "epoch": 0.14, "grad_norm": 2.040121555328369, "learning_rate": 1.9339112405978415e-05, "loss": 0.5817, "step": 5575 }, { "epoch": 0.14, "grad_norm": 4.067219257354736, "learning_rate": 1.9338815593609827e-05, "loss": 0.7264, "step": 5576 }, { "epoch": 0.14, "grad_norm": 1.394147515296936, "learning_rate": 1.933851871688399e-05, "loss": 0.6695, "step": 5577 }, { "epoch": 0.14, "grad_norm": 2.398707866668701, "learning_rate": 1.9338221775802946e-05, "loss": 0.6536, "step": 5578 }, { "epoch": 0.14, "grad_norm": 2.79201602935791, "learning_rate": 1.933792477036875e-05, "loss": 0.5976, "step": 5579 }, { "epoch": 0.14, "grad_norm": 4.583566188812256, "learning_rate": 1.9337627700583437e-05, "loss": 0.7629, "step": 5580 }, { "epoch": 0.14, "grad_norm": 2.80326247215271, "learning_rate": 1.9337330566449063e-05, "loss": 0.675, "step": 5581 }, { "epoch": 0.14, "grad_norm": 2.1040797233581543, "learning_rate": 1.9337033367967675e-05, "loss": 0.6433, "step": 5582 }, { "epoch": 0.14, "grad_norm": 1.9097142219543457, "learning_rate": 1.9336736105141318e-05, "loss": 0.5931, "step": 5583 }, { "epoch": 0.14, "grad_norm": 3.419839859008789, "learning_rate": 1.933643877797204e-05, "loss": 0.7233, "step": 5584 }, { "epoch": 0.14, "grad_norm": 2.1086795330047607, "learning_rate": 1.9336141386461894e-05, "loss": 0.6408, "step": 5585 }, { "epoch": 0.14, "grad_norm": 2.7393057346343994, "learning_rate": 1.9335843930612927e-05, "loss": 0.6626, "step": 5586 }, { "epoch": 0.14, "grad_norm": 2.2706186771392822, "learning_rate": 1.9335546410427186e-05, "loss": 0.6112, "step": 5587 }, { "epoch": 0.14, "grad_norm": 1.5867104530334473, "learning_rate": 1.9335248825906728e-05, "loss": 0.4839, "step": 5588 }, { "epoch": 0.14, "grad_norm": 1.6997580528259277, "learning_rate": 1.93349511770536e-05, "loss": 0.6593, "step": 5589 }, { "epoch": 0.14, "grad_norm": 1.477785587310791, "learning_rate": 1.933465346386985e-05, "loss": 0.5772, "step": 5590 }, { "epoch": 0.14, "grad_norm": 1.7829790115356445, "learning_rate": 1.9334355686357535e-05, "loss": 0.5241, "step": 5591 }, { "epoch": 0.14, "grad_norm": 3.6747243404388428, "learning_rate": 1.9334057844518704e-05, "loss": 0.8313, "step": 5592 }, { "epoch": 0.14, "grad_norm": 1.9443838596343994, "learning_rate": 1.933375993835541e-05, "loss": 0.5386, "step": 5593 }, { "epoch": 0.14, "grad_norm": 2.0436041355133057, "learning_rate": 1.933346196786971e-05, "loss": 0.7424, "step": 5594 }, { "epoch": 0.14, "grad_norm": 3.472824811935425, "learning_rate": 1.933316393306365e-05, "loss": 0.584, "step": 5595 }, { "epoch": 0.14, "grad_norm": 4.0067667961120605, "learning_rate": 1.9332865833939292e-05, "loss": 0.7794, "step": 5596 }, { "epoch": 0.14, "grad_norm": 3.1172382831573486, "learning_rate": 1.9332567670498682e-05, "loss": 0.6978, "step": 5597 }, { "epoch": 0.14, "grad_norm": 4.24906587600708, "learning_rate": 1.933226944274388e-05, "loss": 0.6602, "step": 5598 }, { "epoch": 0.14, "grad_norm": 3.114173173904419, "learning_rate": 1.933197115067694e-05, "loss": 0.6682, "step": 5599 }, { "epoch": 0.14, "grad_norm": 2.472043991088867, "learning_rate": 1.933167279429992e-05, "loss": 0.7894, "step": 5600 }, { "epoch": 0.14, "grad_norm": 2.8137612342834473, "learning_rate": 1.933137437361487e-05, "loss": 0.7562, "step": 5601 }, { "epoch": 0.14, "grad_norm": 3.1847336292266846, "learning_rate": 1.9331075888623854e-05, "loss": 0.6861, "step": 5602 }, { "epoch": 0.14, "grad_norm": 1.9231462478637695, "learning_rate": 1.9330777339328922e-05, "loss": 0.395, "step": 5603 }, { "epoch": 0.14, "grad_norm": 2.6063642501831055, "learning_rate": 1.933047872573214e-05, "loss": 0.825, "step": 5604 }, { "epoch": 0.14, "grad_norm": 2.20532488822937, "learning_rate": 1.9330180047835558e-05, "loss": 0.7395, "step": 5605 }, { "epoch": 0.14, "grad_norm": 2.7807106971740723, "learning_rate": 1.9329881305641235e-05, "loss": 0.4684, "step": 5606 }, { "epoch": 0.14, "grad_norm": 2.332834482192993, "learning_rate": 1.9329582499151233e-05, "loss": 0.6882, "step": 5607 }, { "epoch": 0.14, "grad_norm": 2.9608495235443115, "learning_rate": 1.932928362836761e-05, "loss": 0.777, "step": 5608 }, { "epoch": 0.14, "grad_norm": 1.6452628374099731, "learning_rate": 1.9328984693292426e-05, "loss": 0.7603, "step": 5609 }, { "epoch": 0.14, "grad_norm": 2.641505479812622, "learning_rate": 1.9328685693927736e-05, "loss": 0.7603, "step": 5610 }, { "epoch": 0.14, "grad_norm": 3.44281268119812, "learning_rate": 1.932838663027561e-05, "loss": 0.8916, "step": 5611 }, { "epoch": 0.14, "grad_norm": 3.004483938217163, "learning_rate": 1.9328087502338104e-05, "loss": 0.6958, "step": 5612 }, { "epoch": 0.14, "grad_norm": 2.5683059692382812, "learning_rate": 1.932778831011728e-05, "loss": 0.7898, "step": 5613 }, { "epoch": 0.14, "grad_norm": 2.3051254749298096, "learning_rate": 1.9327489053615194e-05, "loss": 0.8125, "step": 5614 }, { "epoch": 0.14, "grad_norm": 2.442858934402466, "learning_rate": 1.932718973283392e-05, "loss": 0.5937, "step": 5615 }, { "epoch": 0.14, "grad_norm": 4.016500473022461, "learning_rate": 1.932689034777551e-05, "loss": 0.7084, "step": 5616 }, { "epoch": 0.14, "grad_norm": 4.148168087005615, "learning_rate": 1.932659089844203e-05, "loss": 0.6945, "step": 5617 }, { "epoch": 0.14, "grad_norm": 4.030148029327393, "learning_rate": 1.932629138483555e-05, "loss": 0.737, "step": 5618 }, { "epoch": 0.14, "grad_norm": 2.7884440422058105, "learning_rate": 1.9325991806958128e-05, "loss": 0.6624, "step": 5619 }, { "epoch": 0.14, "grad_norm": 4.638529300689697, "learning_rate": 1.932569216481183e-05, "loss": 0.7964, "step": 5620 }, { "epoch": 0.14, "grad_norm": 2.160198211669922, "learning_rate": 1.9325392458398715e-05, "loss": 0.7528, "step": 5621 }, { "epoch": 0.14, "grad_norm": 2.563419818878174, "learning_rate": 1.932509268772086e-05, "loss": 0.5149, "step": 5622 }, { "epoch": 0.14, "grad_norm": 2.731300115585327, "learning_rate": 1.9324792852780323e-05, "loss": 0.6114, "step": 5623 }, { "epoch": 0.14, "grad_norm": 5.740233898162842, "learning_rate": 1.932449295357917e-05, "loss": 0.8697, "step": 5624 }, { "epoch": 0.14, "grad_norm": 3.271696090698242, "learning_rate": 1.9324192990119474e-05, "loss": 0.6991, "step": 5625 }, { "epoch": 0.14, "grad_norm": 2.1372997760772705, "learning_rate": 1.9323892962403295e-05, "loss": 0.764, "step": 5626 }, { "epoch": 0.14, "grad_norm": 4.581451416015625, "learning_rate": 1.9323592870432704e-05, "loss": 0.7585, "step": 5627 }, { "epoch": 0.14, "grad_norm": 1.8867582082748413, "learning_rate": 1.932329271420977e-05, "loss": 0.6423, "step": 5628 }, { "epoch": 0.14, "grad_norm": 1.7947258949279785, "learning_rate": 1.9322992493736558e-05, "loss": 0.6933, "step": 5629 }, { "epoch": 0.14, "grad_norm": 2.0729081630706787, "learning_rate": 1.932269220901514e-05, "loss": 0.7689, "step": 5630 }, { "epoch": 0.14, "grad_norm": 1.7210134267807007, "learning_rate": 1.9322391860047583e-05, "loss": 0.4796, "step": 5631 }, { "epoch": 0.14, "grad_norm": 2.329597234725952, "learning_rate": 1.932209144683596e-05, "loss": 0.7995, "step": 5632 }, { "epoch": 0.14, "grad_norm": 1.7780948877334595, "learning_rate": 1.9321790969382338e-05, "loss": 0.6354, "step": 5633 }, { "epoch": 0.14, "grad_norm": 2.502253532409668, "learning_rate": 1.9321490427688792e-05, "loss": 0.774, "step": 5634 }, { "epoch": 0.14, "grad_norm": 1.8155579566955566, "learning_rate": 1.9321189821757386e-05, "loss": 0.5968, "step": 5635 }, { "epoch": 0.14, "grad_norm": 1.5373562574386597, "learning_rate": 1.93208891515902e-05, "loss": 0.6689, "step": 5636 }, { "epoch": 0.14, "grad_norm": 1.4915814399719238, "learning_rate": 1.93205884171893e-05, "loss": 0.6931, "step": 5637 }, { "epoch": 0.14, "grad_norm": 1.533111810684204, "learning_rate": 1.932028761855676e-05, "loss": 0.646, "step": 5638 }, { "epoch": 0.14, "grad_norm": 2.4825003147125244, "learning_rate": 1.9319986755694657e-05, "loss": 0.6679, "step": 5639 }, { "epoch": 0.14, "grad_norm": 2.167708396911621, "learning_rate": 1.9319685828605054e-05, "loss": 0.8081, "step": 5640 }, { "epoch": 0.14, "grad_norm": 2.098334312438965, "learning_rate": 1.9319384837290035e-05, "loss": 0.5652, "step": 5641 }, { "epoch": 0.14, "grad_norm": 2.216139316558838, "learning_rate": 1.9319083781751674e-05, "loss": 0.6009, "step": 5642 }, { "epoch": 0.14, "grad_norm": 1.7641029357910156, "learning_rate": 1.9318782661992037e-05, "loss": 0.6806, "step": 5643 }, { "epoch": 0.14, "grad_norm": 1.4438915252685547, "learning_rate": 1.9318481478013207e-05, "loss": 0.6957, "step": 5644 }, { "epoch": 0.14, "grad_norm": 1.723944067955017, "learning_rate": 1.931818022981726e-05, "loss": 0.7578, "step": 5645 }, { "epoch": 0.14, "grad_norm": 2.0347588062286377, "learning_rate": 1.9317878917406264e-05, "loss": 0.6887, "step": 5646 }, { "epoch": 0.14, "grad_norm": 1.388226866722107, "learning_rate": 1.93175775407823e-05, "loss": 0.696, "step": 5647 }, { "epoch": 0.14, "grad_norm": 4.401010990142822, "learning_rate": 1.9317276099947448e-05, "loss": 0.6375, "step": 5648 }, { "epoch": 0.14, "grad_norm": 1.6968104839324951, "learning_rate": 1.9316974594903782e-05, "loss": 0.5437, "step": 5649 }, { "epoch": 0.14, "grad_norm": 4.522639751434326, "learning_rate": 1.9316673025653383e-05, "loss": 0.8863, "step": 5650 }, { "epoch": 0.14, "grad_norm": 2.0270161628723145, "learning_rate": 1.9316371392198326e-05, "loss": 0.6967, "step": 5651 }, { "epoch": 0.14, "grad_norm": 1.7539793252944946, "learning_rate": 1.931606969454069e-05, "loss": 0.6404, "step": 5652 }, { "epoch": 0.14, "grad_norm": 1.6114803552627563, "learning_rate": 1.9315767932682553e-05, "loss": 0.7492, "step": 5653 }, { "epoch": 0.14, "grad_norm": 4.941051006317139, "learning_rate": 1.9315466106625997e-05, "loss": 0.8318, "step": 5654 }, { "epoch": 0.14, "grad_norm": 2.4201793670654297, "learning_rate": 1.9315164216373102e-05, "loss": 0.6996, "step": 5655 }, { "epoch": 0.14, "grad_norm": 4.493096351623535, "learning_rate": 1.9314862261925945e-05, "loss": 0.6667, "step": 5656 }, { "epoch": 0.14, "grad_norm": 1.6736831665039062, "learning_rate": 1.9314560243286613e-05, "loss": 0.7258, "step": 5657 }, { "epoch": 0.15, "grad_norm": 4.080166816711426, "learning_rate": 1.9314258160457182e-05, "loss": 0.8042, "step": 5658 }, { "epoch": 0.15, "grad_norm": 3.169782876968384, "learning_rate": 1.9313956013439738e-05, "loss": 0.8364, "step": 5659 }, { "epoch": 0.15, "grad_norm": 3.0376720428466797, "learning_rate": 1.931365380223636e-05, "loss": 0.7039, "step": 5660 }, { "epoch": 0.15, "grad_norm": 2.133488416671753, "learning_rate": 1.9313351526849126e-05, "loss": 0.665, "step": 5661 }, { "epoch": 0.15, "grad_norm": 2.2740445137023926, "learning_rate": 1.931304918728013e-05, "loss": 0.6271, "step": 5662 }, { "epoch": 0.15, "grad_norm": 5.943812370300293, "learning_rate": 1.9312746783531448e-05, "loss": 1.0708, "step": 5663 }, { "epoch": 0.15, "grad_norm": 1.5032446384429932, "learning_rate": 1.931244431560517e-05, "loss": 0.6764, "step": 5664 }, { "epoch": 0.15, "grad_norm": 2.0689656734466553, "learning_rate": 1.9312141783503372e-05, "loss": 0.6434, "step": 5665 }, { "epoch": 0.15, "grad_norm": 2.032130241394043, "learning_rate": 1.9311839187228143e-05, "loss": 0.7307, "step": 5666 }, { "epoch": 0.15, "grad_norm": 2.297243356704712, "learning_rate": 1.931153652678157e-05, "loss": 0.7095, "step": 5667 }, { "epoch": 0.15, "grad_norm": 1.8480565547943115, "learning_rate": 1.9311233802165737e-05, "loss": 0.5477, "step": 5668 }, { "epoch": 0.15, "grad_norm": 2.9964921474456787, "learning_rate": 1.931093101338273e-05, "loss": 0.742, "step": 5669 }, { "epoch": 0.15, "grad_norm": 2.61457896232605, "learning_rate": 1.9310628160434638e-05, "loss": 0.6021, "step": 5670 }, { "epoch": 0.15, "grad_norm": 4.57523250579834, "learning_rate": 1.9310325243323545e-05, "loss": 0.6941, "step": 5671 }, { "epoch": 0.15, "grad_norm": 2.266371011734009, "learning_rate": 1.9310022262051536e-05, "loss": 0.6214, "step": 5672 }, { "epoch": 0.15, "grad_norm": 7.512096405029297, "learning_rate": 1.930971921662071e-05, "loss": 0.5975, "step": 5673 }, { "epoch": 0.15, "grad_norm": 7.333917140960693, "learning_rate": 1.9309416107033145e-05, "loss": 0.8109, "step": 5674 }, { "epoch": 0.15, "grad_norm": 4.2960357666015625, "learning_rate": 1.930911293329093e-05, "loss": 0.7743, "step": 5675 }, { "epoch": 0.15, "grad_norm": 2.951702356338501, "learning_rate": 1.930880969539616e-05, "loss": 0.651, "step": 5676 }, { "epoch": 0.15, "grad_norm": 0.825618326663971, "learning_rate": 1.9308506393350923e-05, "loss": 0.5846, "step": 5677 }, { "epoch": 0.15, "grad_norm": 4.080297946929932, "learning_rate": 1.9308203027157307e-05, "loss": 0.8725, "step": 5678 }, { "epoch": 0.15, "grad_norm": 1.339911937713623, "learning_rate": 1.9307899596817405e-05, "loss": 0.7477, "step": 5679 }, { "epoch": 0.15, "grad_norm": 1.6309912204742432, "learning_rate": 1.9307596102333303e-05, "loss": 0.7324, "step": 5680 }, { "epoch": 0.15, "grad_norm": 2.2065200805664062, "learning_rate": 1.93072925437071e-05, "loss": 0.5912, "step": 5681 }, { "epoch": 0.15, "grad_norm": 3.0705056190490723, "learning_rate": 1.9306988920940882e-05, "loss": 0.7973, "step": 5682 }, { "epoch": 0.15, "grad_norm": 2.22117018699646, "learning_rate": 1.9306685234036747e-05, "loss": 0.6186, "step": 5683 }, { "epoch": 0.15, "grad_norm": 2.319479465484619, "learning_rate": 1.930638148299678e-05, "loss": 0.7385, "step": 5684 }, { "epoch": 0.15, "grad_norm": 2.9732887744903564, "learning_rate": 1.930607766782308e-05, "loss": 0.6539, "step": 5685 }, { "epoch": 0.15, "grad_norm": 2.0955002307891846, "learning_rate": 1.930577378851774e-05, "loss": 0.8017, "step": 5686 }, { "epoch": 0.15, "grad_norm": 1.4967674016952515, "learning_rate": 1.9305469845082852e-05, "loss": 0.5919, "step": 5687 }, { "epoch": 0.15, "grad_norm": 1.4001024961471558, "learning_rate": 1.9305165837520517e-05, "loss": 0.5838, "step": 5688 }, { "epoch": 0.15, "grad_norm": 1.7498235702514648, "learning_rate": 1.930486176583282e-05, "loss": 0.7309, "step": 5689 }, { "epoch": 0.15, "grad_norm": 2.8443610668182373, "learning_rate": 1.9304557630021863e-05, "loss": 0.6955, "step": 5690 }, { "epoch": 0.15, "grad_norm": 2.703125238418579, "learning_rate": 1.930425343008974e-05, "loss": 0.8172, "step": 5691 }, { "epoch": 0.15, "grad_norm": 1.603468894958496, "learning_rate": 1.930394916603855e-05, "loss": 0.5624, "step": 5692 }, { "epoch": 0.15, "grad_norm": 3.1256189346313477, "learning_rate": 1.9303644837870385e-05, "loss": 0.7036, "step": 5693 }, { "epoch": 0.15, "grad_norm": 4.474739074707031, "learning_rate": 1.9303340445587345e-05, "loss": 0.5647, "step": 5694 }, { "epoch": 0.15, "grad_norm": 1.8379920721054077, "learning_rate": 1.930303598919153e-05, "loss": 0.4911, "step": 5695 }, { "epoch": 0.15, "grad_norm": 5.724305629730225, "learning_rate": 1.9302731468685033e-05, "loss": 0.7473, "step": 5696 }, { "epoch": 0.15, "grad_norm": 3.3725993633270264, "learning_rate": 1.930242688406996e-05, "loss": 0.6784, "step": 5697 }, { "epoch": 0.15, "grad_norm": 3.7578043937683105, "learning_rate": 1.9302122235348402e-05, "loss": 0.7238, "step": 5698 }, { "epoch": 0.15, "grad_norm": 1.5031431913375854, "learning_rate": 1.930181752252246e-05, "loss": 0.7006, "step": 5699 }, { "epoch": 0.15, "grad_norm": 2.9669411182403564, "learning_rate": 1.9301512745594237e-05, "loss": 0.7124, "step": 5700 }, { "epoch": 0.15, "grad_norm": 4.262080192565918, "learning_rate": 1.9301207904565834e-05, "loss": 0.5331, "step": 5701 }, { "epoch": 0.15, "grad_norm": 1.9133776426315308, "learning_rate": 1.9300902999439347e-05, "loss": 0.6783, "step": 5702 }, { "epoch": 0.15, "grad_norm": 2.9418399333953857, "learning_rate": 1.9300598030216878e-05, "loss": 0.7107, "step": 5703 }, { "epoch": 0.15, "grad_norm": 4.136821746826172, "learning_rate": 1.9300292996900537e-05, "loss": 0.7552, "step": 5704 }, { "epoch": 0.15, "grad_norm": 1.4681341648101807, "learning_rate": 1.9299987899492418e-05, "loss": 0.6711, "step": 5705 }, { "epoch": 0.15, "grad_norm": 3.204540491104126, "learning_rate": 1.929968273799462e-05, "loss": 0.4692, "step": 5706 }, { "epoch": 0.15, "grad_norm": 2.4175801277160645, "learning_rate": 1.9299377512409255e-05, "loss": 0.5536, "step": 5707 }, { "epoch": 0.15, "grad_norm": 5.689659118652344, "learning_rate": 1.9299072222738423e-05, "loss": 0.6168, "step": 5708 }, { "epoch": 0.15, "grad_norm": 2.6208367347717285, "learning_rate": 1.9298766868984225e-05, "loss": 0.732, "step": 5709 }, { "epoch": 0.15, "grad_norm": 3.389472484588623, "learning_rate": 1.929846145114877e-05, "loss": 0.6101, "step": 5710 }, { "epoch": 0.15, "grad_norm": 1.4244247674942017, "learning_rate": 1.929815596923416e-05, "loss": 0.6221, "step": 5711 }, { "epoch": 0.15, "grad_norm": 1.76738703250885, "learning_rate": 1.9297850423242503e-05, "loss": 0.6293, "step": 5712 }, { "epoch": 0.15, "grad_norm": 3.4592249393463135, "learning_rate": 1.92975448131759e-05, "loss": 0.684, "step": 5713 }, { "epoch": 0.15, "grad_norm": 2.5663537979125977, "learning_rate": 1.929723913903646e-05, "loss": 0.7998, "step": 5714 }, { "epoch": 0.15, "grad_norm": 1.171718716621399, "learning_rate": 1.9296933400826286e-05, "loss": 0.637, "step": 5715 }, { "epoch": 0.15, "grad_norm": 2.032400608062744, "learning_rate": 1.9296627598547492e-05, "loss": 0.8565, "step": 5716 }, { "epoch": 0.15, "grad_norm": 2.0115652084350586, "learning_rate": 1.9296321732202176e-05, "loss": 0.5688, "step": 5717 }, { "epoch": 0.15, "grad_norm": 2.4400391578674316, "learning_rate": 1.9296015801792457e-05, "loss": 0.664, "step": 5718 }, { "epoch": 0.15, "grad_norm": 5.194868087768555, "learning_rate": 1.9295709807320435e-05, "loss": 0.8319, "step": 5719 }, { "epoch": 0.15, "grad_norm": 1.4042922258377075, "learning_rate": 1.929540374878822e-05, "loss": 0.6773, "step": 5720 }, { "epoch": 0.15, "grad_norm": 2.6862542629241943, "learning_rate": 1.9295097626197925e-05, "loss": 0.8283, "step": 5721 }, { "epoch": 0.15, "grad_norm": 1.614692211151123, "learning_rate": 1.9294791439551653e-05, "loss": 0.5375, "step": 5722 }, { "epoch": 0.15, "grad_norm": 1.9366549253463745, "learning_rate": 1.9294485188851524e-05, "loss": 0.6871, "step": 5723 }, { "epoch": 0.15, "grad_norm": 2.1627988815307617, "learning_rate": 1.929417887409964e-05, "loss": 0.7665, "step": 5724 }, { "epoch": 0.15, "grad_norm": 2.5303337574005127, "learning_rate": 1.9293872495298113e-05, "loss": 0.7532, "step": 5725 }, { "epoch": 0.15, "grad_norm": 2.6062979698181152, "learning_rate": 1.9293566052449054e-05, "loss": 0.5728, "step": 5726 }, { "epoch": 0.15, "grad_norm": 1.5925660133361816, "learning_rate": 1.929325954555458e-05, "loss": 0.5722, "step": 5727 }, { "epoch": 0.15, "grad_norm": 2.637249231338501, "learning_rate": 1.9292952974616795e-05, "loss": 0.5398, "step": 5728 }, { "epoch": 0.15, "grad_norm": 7.049862384796143, "learning_rate": 1.9292646339637825e-05, "loss": 0.7979, "step": 5729 }, { "epoch": 0.15, "grad_norm": 1.4261877536773682, "learning_rate": 1.9292339640619768e-05, "loss": 0.5905, "step": 5730 }, { "epoch": 0.15, "grad_norm": 2.551703453063965, "learning_rate": 1.9292032877564746e-05, "loss": 0.7754, "step": 5731 }, { "epoch": 0.15, "grad_norm": 9.804439544677734, "learning_rate": 1.929172605047487e-05, "loss": 0.6349, "step": 5732 }, { "epoch": 0.15, "grad_norm": 8.003095626831055, "learning_rate": 1.929141915935226e-05, "loss": 1.0613, "step": 5733 }, { "epoch": 0.15, "grad_norm": 2.0089547634124756, "learning_rate": 1.9291112204199022e-05, "loss": 0.633, "step": 5734 }, { "epoch": 0.15, "grad_norm": 2.0637028217315674, "learning_rate": 1.9290805185017277e-05, "loss": 0.604, "step": 5735 }, { "epoch": 0.15, "grad_norm": 3.0076894760131836, "learning_rate": 1.929049810180914e-05, "loss": 0.575, "step": 5736 }, { "epoch": 0.15, "grad_norm": 2.825040340423584, "learning_rate": 1.9290190954576726e-05, "loss": 0.6274, "step": 5737 }, { "epoch": 0.15, "grad_norm": 2.972344160079956, "learning_rate": 1.9289883743322155e-05, "loss": 0.653, "step": 5738 }, { "epoch": 0.15, "grad_norm": 2.7297375202178955, "learning_rate": 1.928957646804754e-05, "loss": 0.908, "step": 5739 }, { "epoch": 0.15, "grad_norm": 1.4700217247009277, "learning_rate": 1.9289269128755e-05, "loss": 0.6505, "step": 5740 }, { "epoch": 0.15, "grad_norm": 3.4395995140075684, "learning_rate": 1.9288961725446654e-05, "loss": 0.7128, "step": 5741 }, { "epoch": 0.15, "grad_norm": 2.4236056804656982, "learning_rate": 1.928865425812462e-05, "loss": 0.6674, "step": 5742 }, { "epoch": 0.15, "grad_norm": 3.385986804962158, "learning_rate": 1.9288346726791015e-05, "loss": 0.5061, "step": 5743 }, { "epoch": 0.15, "grad_norm": 2.4416542053222656, "learning_rate": 1.9288039131447962e-05, "loss": 0.6526, "step": 5744 }, { "epoch": 0.15, "grad_norm": 2.259244918823242, "learning_rate": 1.9287731472097577e-05, "loss": 0.69, "step": 5745 }, { "epoch": 0.15, "grad_norm": 8.399290084838867, "learning_rate": 1.928742374874198e-05, "loss": 0.8474, "step": 5746 }, { "epoch": 0.15, "grad_norm": 2.3569467067718506, "learning_rate": 1.9287115961383298e-05, "loss": 0.6843, "step": 5747 }, { "epoch": 0.15, "grad_norm": 2.9720709323883057, "learning_rate": 1.9286808110023645e-05, "loss": 0.7356, "step": 5748 }, { "epoch": 0.15, "grad_norm": 2.92181134223938, "learning_rate": 1.9286500194665145e-05, "loss": 0.7062, "step": 5749 }, { "epoch": 0.15, "grad_norm": 2.759739637374878, "learning_rate": 1.9286192215309924e-05, "loss": 0.5965, "step": 5750 }, { "epoch": 0.15, "grad_norm": 1.3318798542022705, "learning_rate": 1.9285884171960093e-05, "loss": 0.7763, "step": 5751 }, { "epoch": 0.15, "grad_norm": 2.4931764602661133, "learning_rate": 1.9285576064617787e-05, "loss": 0.717, "step": 5752 }, { "epoch": 0.15, "grad_norm": 2.1064324378967285, "learning_rate": 1.9285267893285123e-05, "loss": 0.46, "step": 5753 }, { "epoch": 0.15, "grad_norm": 2.445204257965088, "learning_rate": 1.928495965796423e-05, "loss": 0.7423, "step": 5754 }, { "epoch": 0.15, "grad_norm": 2.15035080909729, "learning_rate": 1.9284651358657224e-05, "loss": 0.6648, "step": 5755 }, { "epoch": 0.15, "grad_norm": 5.82537841796875, "learning_rate": 1.9284342995366234e-05, "loss": 0.6711, "step": 5756 }, { "epoch": 0.15, "grad_norm": 2.9558303356170654, "learning_rate": 1.9284034568093387e-05, "loss": 0.7189, "step": 5757 }, { "epoch": 0.15, "grad_norm": 2.2085859775543213, "learning_rate": 1.9283726076840804e-05, "loss": 0.6271, "step": 5758 }, { "epoch": 0.15, "grad_norm": 4.289816856384277, "learning_rate": 1.9283417521610616e-05, "loss": 0.4499, "step": 5759 }, { "epoch": 0.15, "grad_norm": 4.556619167327881, "learning_rate": 1.9283108902404948e-05, "loss": 0.6146, "step": 5760 }, { "epoch": 0.15, "grad_norm": 1.8446013927459717, "learning_rate": 1.9282800219225924e-05, "loss": 0.8046, "step": 5761 }, { "epoch": 0.15, "grad_norm": 1.9934542179107666, "learning_rate": 1.9282491472075672e-05, "loss": 0.6544, "step": 5762 }, { "epoch": 0.15, "grad_norm": 3.191392183303833, "learning_rate": 1.928218266095632e-05, "loss": 0.6819, "step": 5763 }, { "epoch": 0.15, "grad_norm": 2.725581169128418, "learning_rate": 1.928187378587e-05, "loss": 0.6626, "step": 5764 }, { "epoch": 0.15, "grad_norm": 3.7523889541625977, "learning_rate": 1.9281564846818838e-05, "loss": 0.6287, "step": 5765 }, { "epoch": 0.15, "grad_norm": 3.0204617977142334, "learning_rate": 1.9281255843804957e-05, "loss": 0.6992, "step": 5766 }, { "epoch": 0.15, "grad_norm": 2.85583233833313, "learning_rate": 1.9280946776830497e-05, "loss": 0.6683, "step": 5767 }, { "epoch": 0.15, "grad_norm": 6.985625267028809, "learning_rate": 1.928063764589758e-05, "loss": 0.9039, "step": 5768 }, { "epoch": 0.15, "grad_norm": 8.09335994720459, "learning_rate": 1.928032845100834e-05, "loss": 0.713, "step": 5769 }, { "epoch": 0.15, "grad_norm": 1.856234073638916, "learning_rate": 1.9280019192164905e-05, "loss": 0.5962, "step": 5770 }, { "epoch": 0.15, "grad_norm": 2.7586450576782227, "learning_rate": 1.927970986936941e-05, "loss": 0.6838, "step": 5771 }, { "epoch": 0.15, "grad_norm": 9.373138427734375, "learning_rate": 1.927940048262398e-05, "loss": 0.6256, "step": 5772 }, { "epoch": 0.15, "grad_norm": 5.5653839111328125, "learning_rate": 1.927909103193076e-05, "loss": 0.8855, "step": 5773 }, { "epoch": 0.15, "grad_norm": 2.603893756866455, "learning_rate": 1.9278781517291868e-05, "loss": 0.7136, "step": 5774 }, { "epoch": 0.15, "grad_norm": 2.40399432182312, "learning_rate": 1.9278471938709447e-05, "loss": 0.6644, "step": 5775 }, { "epoch": 0.15, "grad_norm": 3.938128709793091, "learning_rate": 1.9278162296185626e-05, "loss": 0.6064, "step": 5776 }, { "epoch": 0.15, "grad_norm": 3.4752111434936523, "learning_rate": 1.9277852589722538e-05, "loss": 0.7298, "step": 5777 }, { "epoch": 0.15, "grad_norm": 1.9033424854278564, "learning_rate": 1.9277542819322316e-05, "loss": 0.6665, "step": 5778 }, { "epoch": 0.15, "grad_norm": 2.0095527172088623, "learning_rate": 1.9277232984987103e-05, "loss": 0.6786, "step": 5779 }, { "epoch": 0.15, "grad_norm": 2.940936803817749, "learning_rate": 1.9276923086719024e-05, "loss": 0.565, "step": 5780 }, { "epoch": 0.15, "grad_norm": 5.370674133300781, "learning_rate": 1.9276613124520223e-05, "loss": 0.8738, "step": 5781 }, { "epoch": 0.15, "grad_norm": 2.2223546504974365, "learning_rate": 1.9276303098392833e-05, "loss": 0.928, "step": 5782 }, { "epoch": 0.15, "grad_norm": 2.123347282409668, "learning_rate": 1.9275993008338986e-05, "loss": 0.7461, "step": 5783 }, { "epoch": 0.15, "grad_norm": 2.901925802230835, "learning_rate": 1.927568285436083e-05, "loss": 0.6097, "step": 5784 }, { "epoch": 0.15, "grad_norm": 3.609428882598877, "learning_rate": 1.9275372636460487e-05, "loss": 0.69, "step": 5785 }, { "epoch": 0.15, "grad_norm": 2.0332727432250977, "learning_rate": 1.927506235464011e-05, "loss": 0.538, "step": 5786 }, { "epoch": 0.15, "grad_norm": 4.2592949867248535, "learning_rate": 1.9274752008901828e-05, "loss": 0.7615, "step": 5787 }, { "epoch": 0.15, "grad_norm": 2.245239734649658, "learning_rate": 1.927444159924778e-05, "loss": 0.7384, "step": 5788 }, { "epoch": 0.15, "grad_norm": 1.9181498289108276, "learning_rate": 1.927413112568011e-05, "loss": 0.7333, "step": 5789 }, { "epoch": 0.15, "grad_norm": 2.284374237060547, "learning_rate": 1.9273820588200952e-05, "loss": 0.6152, "step": 5790 }, { "epoch": 0.15, "grad_norm": 2.3559980392456055, "learning_rate": 1.927350998681245e-05, "loss": 0.7427, "step": 5791 }, { "epoch": 0.15, "grad_norm": 1.5696017742156982, "learning_rate": 1.9273199321516743e-05, "loss": 0.7332, "step": 5792 }, { "epoch": 0.15, "grad_norm": 2.5209808349609375, "learning_rate": 1.9272888592315974e-05, "loss": 0.601, "step": 5793 }, { "epoch": 0.15, "grad_norm": 1.883272647857666, "learning_rate": 1.9272577799212282e-05, "loss": 0.7458, "step": 5794 }, { "epoch": 0.15, "grad_norm": 3.1140427589416504, "learning_rate": 1.927226694220781e-05, "loss": 0.6844, "step": 5795 }, { "epoch": 0.15, "grad_norm": 2.848994731903076, "learning_rate": 1.92719560213047e-05, "loss": 0.6038, "step": 5796 }, { "epoch": 0.15, "grad_norm": 2.6282107830047607, "learning_rate": 1.9271645036505094e-05, "loss": 0.7149, "step": 5797 }, { "epoch": 0.15, "grad_norm": 1.877632975578308, "learning_rate": 1.927133398781113e-05, "loss": 0.6048, "step": 5798 }, { "epoch": 0.15, "grad_norm": 3.2129013538360596, "learning_rate": 1.9271022875224964e-05, "loss": 0.6826, "step": 5799 }, { "epoch": 0.15, "grad_norm": 2.4308440685272217, "learning_rate": 1.9270711698748732e-05, "loss": 0.7821, "step": 5800 }, { "epoch": 0.15, "grad_norm": 2.3115522861480713, "learning_rate": 1.9270400458384576e-05, "loss": 0.6912, "step": 5801 }, { "epoch": 0.15, "grad_norm": 3.8335471153259277, "learning_rate": 1.9270089154134647e-05, "loss": 0.7356, "step": 5802 }, { "epoch": 0.15, "grad_norm": 1.8586804866790771, "learning_rate": 1.9269777786001086e-05, "loss": 0.5339, "step": 5803 }, { "epoch": 0.15, "grad_norm": 2.484095811843872, "learning_rate": 1.9269466353986042e-05, "loss": 0.6466, "step": 5804 }, { "epoch": 0.15, "grad_norm": 2.9560835361480713, "learning_rate": 1.926915485809166e-05, "loss": 0.8348, "step": 5805 }, { "epoch": 0.15, "grad_norm": 3.778874397277832, "learning_rate": 1.9268843298320083e-05, "loss": 0.8969, "step": 5806 }, { "epoch": 0.15, "grad_norm": 3.6425371170043945, "learning_rate": 1.9268531674673464e-05, "loss": 0.8167, "step": 5807 }, { "epoch": 0.15, "grad_norm": 2.9369518756866455, "learning_rate": 1.926821998715395e-05, "loss": 0.8169, "step": 5808 }, { "epoch": 0.15, "grad_norm": 3.4613373279571533, "learning_rate": 1.926790823576368e-05, "loss": 0.9494, "step": 5809 }, { "epoch": 0.15, "grad_norm": 3.606920003890991, "learning_rate": 1.9267596420504815e-05, "loss": 0.5498, "step": 5810 }, { "epoch": 0.15, "grad_norm": 2.074279308319092, "learning_rate": 1.9267284541379493e-05, "loss": 0.6882, "step": 5811 }, { "epoch": 0.15, "grad_norm": 2.4911611080169678, "learning_rate": 1.9266972598389872e-05, "loss": 0.698, "step": 5812 }, { "epoch": 0.15, "grad_norm": 1.2414404153823853, "learning_rate": 1.9266660591538097e-05, "loss": 0.6489, "step": 5813 }, { "epoch": 0.15, "grad_norm": 2.369462251663208, "learning_rate": 1.926634852082632e-05, "loss": 0.6334, "step": 5814 }, { "epoch": 0.15, "grad_norm": 1.7199259996414185, "learning_rate": 1.926603638625669e-05, "loss": 0.7117, "step": 5815 }, { "epoch": 0.15, "grad_norm": 2.201629877090454, "learning_rate": 1.9265724187831355e-05, "loss": 0.7911, "step": 5816 }, { "epoch": 0.15, "grad_norm": 2.077108144760132, "learning_rate": 1.9265411925552475e-05, "loss": 0.7107, "step": 5817 }, { "epoch": 0.15, "grad_norm": 3.5127408504486084, "learning_rate": 1.9265099599422195e-05, "loss": 0.8936, "step": 5818 }, { "epoch": 0.15, "grad_norm": 4.033448219299316, "learning_rate": 1.926478720944267e-05, "loss": 0.7505, "step": 5819 }, { "epoch": 0.15, "grad_norm": 2.185161590576172, "learning_rate": 1.9264474755616053e-05, "loss": 0.5046, "step": 5820 }, { "epoch": 0.15, "grad_norm": 3.434300184249878, "learning_rate": 1.9264162237944498e-05, "loss": 0.7241, "step": 5821 }, { "epoch": 0.15, "grad_norm": 2.5044901371002197, "learning_rate": 1.9263849656430152e-05, "loss": 0.6609, "step": 5822 }, { "epoch": 0.15, "grad_norm": 3.640737295150757, "learning_rate": 1.9263537011075177e-05, "loss": 0.6359, "step": 5823 }, { "epoch": 0.15, "grad_norm": 1.6479880809783936, "learning_rate": 1.9263224301881727e-05, "loss": 0.5346, "step": 5824 }, { "epoch": 0.15, "grad_norm": 2.6184067726135254, "learning_rate": 1.9262911528851954e-05, "loss": 0.646, "step": 5825 }, { "epoch": 0.15, "grad_norm": 3.0940303802490234, "learning_rate": 1.9262598691988014e-05, "loss": 0.7612, "step": 5826 }, { "epoch": 0.15, "grad_norm": 3.5092873573303223, "learning_rate": 1.9262285791292063e-05, "loss": 0.8696, "step": 5827 }, { "epoch": 0.15, "grad_norm": 4.993701934814453, "learning_rate": 1.9261972826766254e-05, "loss": 0.6255, "step": 5828 }, { "epoch": 0.15, "grad_norm": 2.370170831680298, "learning_rate": 1.926165979841275e-05, "loss": 0.6224, "step": 5829 }, { "epoch": 0.15, "grad_norm": 3.2239155769348145, "learning_rate": 1.9261346706233707e-05, "loss": 0.7296, "step": 5830 }, { "epoch": 0.15, "grad_norm": 4.838778972625732, "learning_rate": 1.926103355023128e-05, "loss": 0.703, "step": 5831 }, { "epoch": 0.15, "grad_norm": 2.1964287757873535, "learning_rate": 1.926072033040763e-05, "loss": 0.6989, "step": 5832 }, { "epoch": 0.15, "grad_norm": 3.353456497192383, "learning_rate": 1.9260407046764908e-05, "loss": 0.5605, "step": 5833 }, { "epoch": 0.15, "grad_norm": 2.0336036682128906, "learning_rate": 1.926009369930528e-05, "loss": 0.6061, "step": 5834 }, { "epoch": 0.15, "grad_norm": 1.961838722229004, "learning_rate": 1.9259780288030908e-05, "loss": 0.6006, "step": 5835 }, { "epoch": 0.15, "grad_norm": 2.5488991737365723, "learning_rate": 1.9259466812943945e-05, "loss": 0.6363, "step": 5836 }, { "epoch": 0.15, "grad_norm": 1.630836009979248, "learning_rate": 1.9259153274046555e-05, "loss": 0.5792, "step": 5837 }, { "epoch": 0.15, "grad_norm": 1.6299554109573364, "learning_rate": 1.92588396713409e-05, "loss": 0.5641, "step": 5838 }, { "epoch": 0.15, "grad_norm": 2.235926628112793, "learning_rate": 1.9258526004829135e-05, "loss": 0.651, "step": 5839 }, { "epoch": 0.15, "grad_norm": 3.031576633453369, "learning_rate": 1.9258212274513428e-05, "loss": 0.7294, "step": 5840 }, { "epoch": 0.15, "grad_norm": 2.4837093353271484, "learning_rate": 1.9257898480395937e-05, "loss": 0.8139, "step": 5841 }, { "epoch": 0.15, "grad_norm": 2.349698066711426, "learning_rate": 1.9257584622478827e-05, "loss": 0.6874, "step": 5842 }, { "epoch": 0.15, "grad_norm": 3.075827121734619, "learning_rate": 1.925727070076426e-05, "loss": 0.7811, "step": 5843 }, { "epoch": 0.15, "grad_norm": 2.28973650932312, "learning_rate": 1.92569567152544e-05, "loss": 0.6289, "step": 5844 }, { "epoch": 0.15, "grad_norm": 2.225123405456543, "learning_rate": 1.9256642665951407e-05, "loss": 0.6579, "step": 5845 }, { "epoch": 0.15, "grad_norm": 1.8834174871444702, "learning_rate": 1.925632855285745e-05, "loss": 0.4851, "step": 5846 }, { "epoch": 0.15, "grad_norm": 1.557398796081543, "learning_rate": 1.9256014375974694e-05, "loss": 0.5611, "step": 5847 }, { "epoch": 0.15, "grad_norm": 1.813207745552063, "learning_rate": 1.92557001353053e-05, "loss": 0.6939, "step": 5848 }, { "epoch": 0.15, "grad_norm": 4.974868297576904, "learning_rate": 1.9255385830851437e-05, "loss": 0.5499, "step": 5849 }, { "epoch": 0.15, "grad_norm": 1.742186188697815, "learning_rate": 1.9255071462615266e-05, "loss": 0.6204, "step": 5850 }, { "epoch": 0.15, "grad_norm": 1.7538764476776123, "learning_rate": 1.9254757030598963e-05, "loss": 0.6819, "step": 5851 }, { "epoch": 0.15, "grad_norm": 2.6468393802642822, "learning_rate": 1.9254442534804685e-05, "loss": 0.6213, "step": 5852 }, { "epoch": 0.15, "grad_norm": 1.348397970199585, "learning_rate": 1.9254127975234606e-05, "loss": 0.5997, "step": 5853 }, { "epoch": 0.15, "grad_norm": 3.501201629638672, "learning_rate": 1.9253813351890887e-05, "loss": 0.6961, "step": 5854 }, { "epoch": 0.15, "grad_norm": 1.2617741823196411, "learning_rate": 1.9253498664775705e-05, "loss": 0.7694, "step": 5855 }, { "epoch": 0.15, "grad_norm": 2.1364870071411133, "learning_rate": 1.925318391389122e-05, "loss": 0.6157, "step": 5856 }, { "epoch": 0.15, "grad_norm": 1.6725374460220337, "learning_rate": 1.9252869099239605e-05, "loss": 0.7598, "step": 5857 }, { "epoch": 0.15, "grad_norm": 1.8343881368637085, "learning_rate": 1.925255422082303e-05, "loss": 0.6626, "step": 5858 }, { "epoch": 0.15, "grad_norm": 3.7737905979156494, "learning_rate": 1.9252239278643667e-05, "loss": 0.5643, "step": 5859 }, { "epoch": 0.15, "grad_norm": 4.463397979736328, "learning_rate": 1.925192427270368e-05, "loss": 0.726, "step": 5860 }, { "epoch": 0.15, "grad_norm": 1.7886067628860474, "learning_rate": 1.9251609203005247e-05, "loss": 0.7907, "step": 5861 }, { "epoch": 0.15, "grad_norm": 2.6689252853393555, "learning_rate": 1.9251294069550536e-05, "loss": 0.9127, "step": 5862 }, { "epoch": 0.15, "grad_norm": 2.6962997913360596, "learning_rate": 1.925097887234171e-05, "loss": 0.594, "step": 5863 }, { "epoch": 0.15, "grad_norm": 2.094801664352417, "learning_rate": 1.925066361138096e-05, "loss": 0.6978, "step": 5864 }, { "epoch": 0.15, "grad_norm": 3.361966133117676, "learning_rate": 1.9250348286670443e-05, "loss": 0.8447, "step": 5865 }, { "epoch": 0.15, "grad_norm": 1.5449206829071045, "learning_rate": 1.925003289821234e-05, "loss": 0.7099, "step": 5866 }, { "epoch": 0.15, "grad_norm": 1.1961950063705444, "learning_rate": 1.924971744600882e-05, "loss": 0.6923, "step": 5867 }, { "epoch": 0.15, "grad_norm": 2.5560686588287354, "learning_rate": 1.924940193006206e-05, "loss": 0.6284, "step": 5868 }, { "epoch": 0.15, "grad_norm": 2.793299674987793, "learning_rate": 1.924908635037423e-05, "loss": 0.7354, "step": 5869 }, { "epoch": 0.15, "grad_norm": 2.6356234550476074, "learning_rate": 1.924877070694751e-05, "loss": 0.5845, "step": 5870 }, { "epoch": 0.15, "grad_norm": 1.8205440044403076, "learning_rate": 1.924845499978407e-05, "loss": 0.7239, "step": 5871 }, { "epoch": 0.15, "grad_norm": 5.430325984954834, "learning_rate": 1.9248139228886093e-05, "loss": 0.703, "step": 5872 }, { "epoch": 0.15, "grad_norm": 8.43315315246582, "learning_rate": 1.924782339425575e-05, "loss": 0.7435, "step": 5873 }, { "epoch": 0.15, "grad_norm": 3.373009204864502, "learning_rate": 1.9247507495895216e-05, "loss": 0.8724, "step": 5874 }, { "epoch": 0.15, "grad_norm": 2.443057060241699, "learning_rate": 1.924719153380667e-05, "loss": 0.7128, "step": 5875 }, { "epoch": 0.15, "grad_norm": 2.3153574466705322, "learning_rate": 1.9246875507992295e-05, "loss": 0.5255, "step": 5876 }, { "epoch": 0.15, "grad_norm": 2.855464220046997, "learning_rate": 1.9246559418454257e-05, "loss": 0.6079, "step": 5877 }, { "epoch": 0.15, "grad_norm": 1.6657490730285645, "learning_rate": 1.9246243265194743e-05, "loss": 0.603, "step": 5878 }, { "epoch": 0.15, "grad_norm": 1.8357378244400024, "learning_rate": 1.924592704821593e-05, "loss": 0.5887, "step": 5879 }, { "epoch": 0.15, "grad_norm": 1.1560864448547363, "learning_rate": 1.9245610767519994e-05, "loss": 0.5423, "step": 5880 }, { "epoch": 0.15, "grad_norm": 1.8855383396148682, "learning_rate": 1.9245294423109122e-05, "loss": 0.7568, "step": 5881 }, { "epoch": 0.15, "grad_norm": 1.3887779712677002, "learning_rate": 1.9244978014985486e-05, "loss": 0.6857, "step": 5882 }, { "epoch": 0.15, "grad_norm": 2.953850507736206, "learning_rate": 1.9244661543151267e-05, "loss": 0.7624, "step": 5883 }, { "epoch": 0.15, "grad_norm": 1.5594263076782227, "learning_rate": 1.9244345007608654e-05, "loss": 0.7276, "step": 5884 }, { "epoch": 0.15, "grad_norm": 2.143791437149048, "learning_rate": 1.924402840835982e-05, "loss": 0.661, "step": 5885 }, { "epoch": 0.15, "grad_norm": 2.167116403579712, "learning_rate": 1.924371174540695e-05, "loss": 0.6743, "step": 5886 }, { "epoch": 0.15, "grad_norm": 7.416129112243652, "learning_rate": 1.924339501875223e-05, "loss": 0.7823, "step": 5887 }, { "epoch": 0.15, "grad_norm": 1.5161375999450684, "learning_rate": 1.924307822839784e-05, "loss": 0.6919, "step": 5888 }, { "epoch": 0.15, "grad_norm": 3.106520891189575, "learning_rate": 1.924276137434595e-05, "loss": 0.8445, "step": 5889 }, { "epoch": 0.15, "grad_norm": 3.4906365871429443, "learning_rate": 1.924244445659877e-05, "loss": 0.7929, "step": 5890 }, { "epoch": 0.15, "grad_norm": 1.6704543828964233, "learning_rate": 1.924212747515846e-05, "loss": 0.5545, "step": 5891 }, { "epoch": 0.15, "grad_norm": 4.227437973022461, "learning_rate": 1.924181043002722e-05, "loss": 0.8927, "step": 5892 }, { "epoch": 0.15, "grad_norm": 1.3282749652862549, "learning_rate": 1.9241493321207223e-05, "loss": 0.7772, "step": 5893 }, { "epoch": 0.15, "grad_norm": 2.412787437438965, "learning_rate": 1.9241176148700665e-05, "loss": 0.717, "step": 5894 }, { "epoch": 0.15, "grad_norm": 3.33122181892395, "learning_rate": 1.924085891250972e-05, "loss": 0.7365, "step": 5895 }, { "epoch": 0.15, "grad_norm": 7.233210563659668, "learning_rate": 1.9240541612636588e-05, "loss": 0.7949, "step": 5896 }, { "epoch": 0.15, "grad_norm": 5.731998443603516, "learning_rate": 1.9240224249083447e-05, "loss": 0.7924, "step": 5897 }, { "epoch": 0.15, "grad_norm": 5.976964473724365, "learning_rate": 1.9239906821852482e-05, "loss": 0.7247, "step": 5898 }, { "epoch": 0.15, "grad_norm": 4.458568572998047, "learning_rate": 1.923958933094589e-05, "loss": 0.7188, "step": 5899 }, { "epoch": 0.15, "grad_norm": 1.5542877912521362, "learning_rate": 1.923927177636585e-05, "loss": 0.6621, "step": 5900 }, { "epoch": 0.15, "grad_norm": 4.659394264221191, "learning_rate": 1.9238954158114557e-05, "loss": 0.6839, "step": 5901 }, { "epoch": 0.15, "grad_norm": 7.611391544342041, "learning_rate": 1.923863647619419e-05, "loss": 0.864, "step": 5902 }, { "epoch": 0.15, "grad_norm": 3.605551242828369, "learning_rate": 1.923831873060695e-05, "loss": 0.8669, "step": 5903 }, { "epoch": 0.15, "grad_norm": 6.175504684448242, "learning_rate": 1.923800092135502e-05, "loss": 1.0401, "step": 5904 }, { "epoch": 0.15, "grad_norm": 5.355963230133057, "learning_rate": 1.923768304844059e-05, "loss": 0.7031, "step": 5905 }, { "epoch": 0.15, "grad_norm": 6.91444206237793, "learning_rate": 1.9237365111865853e-05, "loss": 0.665, "step": 5906 }, { "epoch": 0.15, "grad_norm": 1.8908582925796509, "learning_rate": 1.9237047111632997e-05, "loss": 0.7029, "step": 5907 }, { "epoch": 0.15, "grad_norm": 2.0257527828216553, "learning_rate": 1.923672904774422e-05, "loss": 0.6192, "step": 5908 }, { "epoch": 0.15, "grad_norm": 1.5398415327072144, "learning_rate": 1.9236410920201708e-05, "loss": 0.7741, "step": 5909 }, { "epoch": 0.15, "grad_norm": 1.7586604356765747, "learning_rate": 1.9236092729007653e-05, "loss": 0.7035, "step": 5910 }, { "epoch": 0.15, "grad_norm": 2.83748722076416, "learning_rate": 1.923577447416425e-05, "loss": 0.5479, "step": 5911 }, { "epoch": 0.15, "grad_norm": 2.420220136642456, "learning_rate": 1.9235456155673694e-05, "loss": 0.7789, "step": 5912 }, { "epoch": 0.15, "grad_norm": 2.59588360786438, "learning_rate": 1.9235137773538178e-05, "loss": 0.779, "step": 5913 }, { "epoch": 0.15, "grad_norm": 4.709530353546143, "learning_rate": 1.923481932775989e-05, "loss": 0.7713, "step": 5914 }, { "epoch": 0.15, "grad_norm": 2.1116268634796143, "learning_rate": 1.923450081834103e-05, "loss": 0.6329, "step": 5915 }, { "epoch": 0.15, "grad_norm": 1.6746019124984741, "learning_rate": 1.9234182245283794e-05, "loss": 0.4384, "step": 5916 }, { "epoch": 0.15, "grad_norm": 5.403308868408203, "learning_rate": 1.9233863608590372e-05, "loss": 0.7048, "step": 5917 }, { "epoch": 0.15, "grad_norm": 3.262394666671753, "learning_rate": 1.9233544908262965e-05, "loss": 0.7435, "step": 5918 }, { "epoch": 0.15, "grad_norm": 3.2327539920806885, "learning_rate": 1.923322614430377e-05, "loss": 0.7985, "step": 5919 }, { "epoch": 0.15, "grad_norm": 2.067087173461914, "learning_rate": 1.923290731671498e-05, "loss": 0.6448, "step": 5920 }, { "epoch": 0.15, "grad_norm": 3.6199357509613037, "learning_rate": 1.923258842549879e-05, "loss": 0.7657, "step": 5921 }, { "epoch": 0.15, "grad_norm": 2.4620144367218018, "learning_rate": 1.923226947065741e-05, "loss": 0.7655, "step": 5922 }, { "epoch": 0.15, "grad_norm": 1.684470772743225, "learning_rate": 1.923195045219302e-05, "loss": 0.6686, "step": 5923 }, { "epoch": 0.15, "grad_norm": 1.4364277124404907, "learning_rate": 1.923163137010783e-05, "loss": 0.64, "step": 5924 }, { "epoch": 0.15, "grad_norm": 2.647247552871704, "learning_rate": 1.923131222440404e-05, "loss": 0.7124, "step": 5925 }, { "epoch": 0.15, "grad_norm": 1.466498613357544, "learning_rate": 1.9230993015083838e-05, "loss": 0.5631, "step": 5926 }, { "epoch": 0.15, "grad_norm": 4.77100944519043, "learning_rate": 1.9230673742149436e-05, "loss": 1.0263, "step": 5927 }, { "epoch": 0.15, "grad_norm": 2.3101303577423096, "learning_rate": 1.923035440560303e-05, "loss": 0.6755, "step": 5928 }, { "epoch": 0.15, "grad_norm": 1.9315307140350342, "learning_rate": 1.9230035005446825e-05, "loss": 0.655, "step": 5929 }, { "epoch": 0.15, "grad_norm": 1.3309437036514282, "learning_rate": 1.9229715541683012e-05, "loss": 0.608, "step": 5930 }, { "epoch": 0.15, "grad_norm": 6.546064376831055, "learning_rate": 1.9229396014313798e-05, "loss": 0.7597, "step": 5931 }, { "epoch": 0.15, "grad_norm": 3.5943665504455566, "learning_rate": 1.9229076423341388e-05, "loss": 0.5785, "step": 5932 }, { "epoch": 0.15, "grad_norm": 1.944143295288086, "learning_rate": 1.922875676876798e-05, "loss": 0.7133, "step": 5933 }, { "epoch": 0.15, "grad_norm": 6.516128063201904, "learning_rate": 1.922843705059578e-05, "loss": 0.7342, "step": 5934 }, { "epoch": 0.15, "grad_norm": 6.72327995300293, "learning_rate": 1.922811726882699e-05, "loss": 0.8662, "step": 5935 }, { "epoch": 0.15, "grad_norm": 1.5954095125198364, "learning_rate": 1.922779742346381e-05, "loss": 0.8144, "step": 5936 }, { "epoch": 0.15, "grad_norm": 2.0185844898223877, "learning_rate": 1.922747751450845e-05, "loss": 0.7238, "step": 5937 }, { "epoch": 0.15, "grad_norm": 1.8929635286331177, "learning_rate": 1.922715754196311e-05, "loss": 0.6516, "step": 5938 }, { "epoch": 0.15, "grad_norm": 2.2974467277526855, "learning_rate": 1.9226837505830002e-05, "loss": 0.62, "step": 5939 }, { "epoch": 0.15, "grad_norm": 2.0708024501800537, "learning_rate": 1.922651740611132e-05, "loss": 0.6982, "step": 5940 }, { "epoch": 0.15, "grad_norm": 3.910330057144165, "learning_rate": 1.9226197242809283e-05, "loss": 0.7387, "step": 5941 }, { "epoch": 0.15, "grad_norm": 2.0004327297210693, "learning_rate": 1.922587701592609e-05, "loss": 0.728, "step": 5942 }, { "epoch": 0.15, "grad_norm": 3.850847005844116, "learning_rate": 1.9225556725463947e-05, "loss": 0.6949, "step": 5943 }, { "epoch": 0.15, "grad_norm": 1.8323383331298828, "learning_rate": 1.922523637142506e-05, "loss": 0.7958, "step": 5944 }, { "epoch": 0.15, "grad_norm": 2.616349220275879, "learning_rate": 1.9224915953811646e-05, "loss": 0.662, "step": 5945 }, { "epoch": 0.15, "grad_norm": 1.3061957359313965, "learning_rate": 1.9224595472625904e-05, "loss": 0.592, "step": 5946 }, { "epoch": 0.15, "grad_norm": 2.2706258296966553, "learning_rate": 1.9224274927870045e-05, "loss": 0.6174, "step": 5947 }, { "epoch": 0.15, "grad_norm": 4.4292988777160645, "learning_rate": 1.922395431954628e-05, "loss": 0.6962, "step": 5948 }, { "epoch": 0.15, "grad_norm": 1.0073658227920532, "learning_rate": 1.9223633647656818e-05, "loss": 0.5377, "step": 5949 }, { "epoch": 0.15, "grad_norm": 3.270669460296631, "learning_rate": 1.9223312912203864e-05, "loss": 0.6973, "step": 5950 }, { "epoch": 0.15, "grad_norm": 3.143756628036499, "learning_rate": 1.9222992113189633e-05, "loss": 0.854, "step": 5951 }, { "epoch": 0.15, "grad_norm": 2.2013092041015625, "learning_rate": 1.9222671250616336e-05, "loss": 0.6662, "step": 5952 }, { "epoch": 0.15, "grad_norm": 1.9405454397201538, "learning_rate": 1.9222350324486183e-05, "loss": 0.5645, "step": 5953 }, { "epoch": 0.15, "grad_norm": 4.787731647491455, "learning_rate": 1.9222029334801386e-05, "loss": 0.7294, "step": 5954 }, { "epoch": 0.15, "grad_norm": 2.153313159942627, "learning_rate": 1.9221708281564154e-05, "loss": 0.7675, "step": 5955 }, { "epoch": 0.15, "grad_norm": 3.998573064804077, "learning_rate": 1.9221387164776707e-05, "loss": 0.725, "step": 5956 }, { "epoch": 0.15, "grad_norm": 2.4950737953186035, "learning_rate": 1.922106598444125e-05, "loss": 0.6247, "step": 5957 }, { "epoch": 0.15, "grad_norm": 2.7576279640197754, "learning_rate": 1.922074474056e-05, "loss": 0.7751, "step": 5958 }, { "epoch": 0.15, "grad_norm": 2.244786262512207, "learning_rate": 1.922042343313517e-05, "loss": 0.6134, "step": 5959 }, { "epoch": 0.15, "grad_norm": 2.287876844406128, "learning_rate": 1.9220102062168974e-05, "loss": 0.5761, "step": 5960 }, { "epoch": 0.15, "grad_norm": 2.559286594390869, "learning_rate": 1.9219780627663625e-05, "loss": 0.6648, "step": 5961 }, { "epoch": 0.15, "grad_norm": 2.1373369693756104, "learning_rate": 1.9219459129621346e-05, "loss": 0.4406, "step": 5962 }, { "epoch": 0.15, "grad_norm": 3.150550365447998, "learning_rate": 1.9219137568044345e-05, "loss": 0.6411, "step": 5963 }, { "epoch": 0.15, "grad_norm": 2.5700855255126953, "learning_rate": 1.9218815942934838e-05, "loss": 0.7443, "step": 5964 }, { "epoch": 0.15, "grad_norm": 3.58666729927063, "learning_rate": 1.9218494254295045e-05, "loss": 0.8147, "step": 5965 }, { "epoch": 0.15, "grad_norm": 5.602112770080566, "learning_rate": 1.921817250212718e-05, "loss": 0.5987, "step": 5966 }, { "epoch": 0.15, "grad_norm": 2.6939570903778076, "learning_rate": 1.9217850686433462e-05, "loss": 0.665, "step": 5967 }, { "epoch": 0.15, "grad_norm": 1.5419992208480835, "learning_rate": 1.9217528807216107e-05, "loss": 0.6819, "step": 5968 }, { "epoch": 0.15, "grad_norm": 2.2085533142089844, "learning_rate": 1.9217206864477336e-05, "loss": 0.7141, "step": 5969 }, { "epoch": 0.15, "grad_norm": 2.361227035522461, "learning_rate": 1.9216884858219368e-05, "loss": 0.6976, "step": 5970 }, { "epoch": 0.15, "grad_norm": 2.552791118621826, "learning_rate": 1.9216562788444415e-05, "loss": 0.7165, "step": 5971 }, { "epoch": 0.15, "grad_norm": 2.2848503589630127, "learning_rate": 1.9216240655154706e-05, "loss": 0.6127, "step": 5972 }, { "epoch": 0.15, "grad_norm": 1.8968254327774048, "learning_rate": 1.9215918458352455e-05, "loss": 0.6727, "step": 5973 }, { "epoch": 0.15, "grad_norm": 1.8589128255844116, "learning_rate": 1.9215596198039886e-05, "loss": 0.6737, "step": 5974 }, { "epoch": 0.15, "grad_norm": 2.322059392929077, "learning_rate": 1.9215273874219217e-05, "loss": 0.6903, "step": 5975 }, { "epoch": 0.15, "grad_norm": 1.6849035024642944, "learning_rate": 1.921495148689267e-05, "loss": 0.6303, "step": 5976 }, { "epoch": 0.15, "grad_norm": 2.8593828678131104, "learning_rate": 1.9214629036062465e-05, "loss": 0.6882, "step": 5977 }, { "epoch": 0.15, "grad_norm": 3.568908929824829, "learning_rate": 1.9214306521730825e-05, "loss": 0.5418, "step": 5978 }, { "epoch": 0.15, "grad_norm": 2.0592567920684814, "learning_rate": 1.9213983943899977e-05, "loss": 0.7219, "step": 5979 }, { "epoch": 0.15, "grad_norm": 3.0984060764312744, "learning_rate": 1.9213661302572138e-05, "loss": 0.8049, "step": 5980 }, { "epoch": 0.15, "grad_norm": 4.138469219207764, "learning_rate": 1.9213338597749533e-05, "loss": 0.6741, "step": 5981 }, { "epoch": 0.15, "grad_norm": 2.3337388038635254, "learning_rate": 1.9213015829434393e-05, "loss": 0.7296, "step": 5982 }, { "epoch": 0.15, "grad_norm": 2.053410530090332, "learning_rate": 1.921269299762893e-05, "loss": 0.7165, "step": 5983 }, { "epoch": 0.15, "grad_norm": 3.19777512550354, "learning_rate": 1.9212370102335374e-05, "loss": 0.7293, "step": 5984 }, { "epoch": 0.15, "grad_norm": 2.5591022968292236, "learning_rate": 1.9212047143555954e-05, "loss": 0.7737, "step": 5985 }, { "epoch": 0.15, "grad_norm": 3.2177083492279053, "learning_rate": 1.9211724121292895e-05, "loss": 0.7577, "step": 5986 }, { "epoch": 0.15, "grad_norm": 2.7310385704040527, "learning_rate": 1.9211401035548417e-05, "loss": 0.5339, "step": 5987 }, { "epoch": 0.15, "grad_norm": 2.3396360874176025, "learning_rate": 1.921107788632475e-05, "loss": 0.5489, "step": 5988 }, { "epoch": 0.15, "grad_norm": 1.6488922834396362, "learning_rate": 1.921075467362412e-05, "loss": 0.7305, "step": 5989 }, { "epoch": 0.15, "grad_norm": 2.9726758003234863, "learning_rate": 1.921043139744876e-05, "loss": 0.6742, "step": 5990 }, { "epoch": 0.15, "grad_norm": 2.6051745414733887, "learning_rate": 1.921010805780089e-05, "loss": 0.7143, "step": 5991 }, { "epoch": 0.15, "grad_norm": 4.312603950500488, "learning_rate": 1.9209784654682742e-05, "loss": 0.6565, "step": 5992 }, { "epoch": 0.15, "grad_norm": 2.4610211849212646, "learning_rate": 1.9209461188096544e-05, "loss": 0.6843, "step": 5993 }, { "epoch": 0.15, "grad_norm": 2.5799202919006348, "learning_rate": 1.920913765804453e-05, "loss": 0.7528, "step": 5994 }, { "epoch": 0.15, "grad_norm": 2.5649874210357666, "learning_rate": 1.920881406452892e-05, "loss": 0.6807, "step": 5995 }, { "epoch": 0.15, "grad_norm": 2.7533042430877686, "learning_rate": 1.920849040755195e-05, "loss": 0.6212, "step": 5996 }, { "epoch": 0.15, "grad_norm": 1.7523839473724365, "learning_rate": 1.920816668711585e-05, "loss": 0.7241, "step": 5997 }, { "epoch": 0.15, "grad_norm": 2.413360834121704, "learning_rate": 1.9207842903222854e-05, "loss": 0.7543, "step": 5998 }, { "epoch": 0.15, "grad_norm": 3.239305019378662, "learning_rate": 1.9207519055875183e-05, "loss": 0.6243, "step": 5999 }, { "epoch": 0.15, "grad_norm": 3.0049023628234863, "learning_rate": 1.9207195145075078e-05, "loss": 0.6798, "step": 6000 }, { "epoch": 0.15, "grad_norm": 2.7297143936157227, "learning_rate": 1.920687117082477e-05, "loss": 0.5956, "step": 6001 }, { "epoch": 0.15, "grad_norm": 4.631251335144043, "learning_rate": 1.920654713312649e-05, "loss": 0.4934, "step": 6002 }, { "epoch": 0.15, "grad_norm": 1.8201876878738403, "learning_rate": 1.920622303198247e-05, "loss": 0.6165, "step": 6003 }, { "epoch": 0.15, "grad_norm": 1.332058310508728, "learning_rate": 1.9205898867394945e-05, "loss": 0.443, "step": 6004 }, { "epoch": 0.15, "grad_norm": 3.9631776809692383, "learning_rate": 1.920557463936615e-05, "loss": 0.7953, "step": 6005 }, { "epoch": 0.15, "grad_norm": 3.436424493789673, "learning_rate": 1.9205250347898316e-05, "loss": 0.8599, "step": 6006 }, { "epoch": 0.15, "grad_norm": 1.823623538017273, "learning_rate": 1.920492599299368e-05, "loss": 0.6938, "step": 6007 }, { "epoch": 0.15, "grad_norm": 1.5218349695205688, "learning_rate": 1.920460157465448e-05, "loss": 0.5619, "step": 6008 }, { "epoch": 0.15, "grad_norm": 4.035768032073975, "learning_rate": 1.9204277092882945e-05, "loss": 0.7578, "step": 6009 }, { "epoch": 0.15, "grad_norm": 2.3774218559265137, "learning_rate": 1.920395254768132e-05, "loss": 0.7391, "step": 6010 }, { "epoch": 0.15, "grad_norm": 4.608096599578857, "learning_rate": 1.920362793905183e-05, "loss": 0.6856, "step": 6011 }, { "epoch": 0.15, "grad_norm": 2.0140066146850586, "learning_rate": 1.9203303266996723e-05, "loss": 0.5757, "step": 6012 }, { "epoch": 0.15, "grad_norm": 2.025719404220581, "learning_rate": 1.920297853151823e-05, "loss": 0.6311, "step": 6013 }, { "epoch": 0.15, "grad_norm": 3.2167742252349854, "learning_rate": 1.9202653732618593e-05, "loss": 0.6209, "step": 6014 }, { "epoch": 0.15, "grad_norm": 1.203831672668457, "learning_rate": 1.9202328870300045e-05, "loss": 0.6044, "step": 6015 }, { "epoch": 0.15, "grad_norm": 1.3606641292572021, "learning_rate": 1.9202003944564832e-05, "loss": 0.6681, "step": 6016 }, { "epoch": 0.15, "grad_norm": 2.268615484237671, "learning_rate": 1.9201678955415186e-05, "loss": 0.6747, "step": 6017 }, { "epoch": 0.15, "grad_norm": 1.8429105281829834, "learning_rate": 1.920135390285335e-05, "loss": 0.6667, "step": 6018 }, { "epoch": 0.15, "grad_norm": 2.737787961959839, "learning_rate": 1.9201028786881564e-05, "loss": 0.4729, "step": 6019 }, { "epoch": 0.15, "grad_norm": 1.621401309967041, "learning_rate": 1.920070360750207e-05, "loss": 0.5606, "step": 6020 }, { "epoch": 0.15, "grad_norm": 1.6225119829177856, "learning_rate": 1.9200378364717106e-05, "loss": 0.6423, "step": 6021 }, { "epoch": 0.15, "grad_norm": 4.518871307373047, "learning_rate": 1.9200053058528914e-05, "loss": 0.7716, "step": 6022 }, { "epoch": 0.15, "grad_norm": 4.789550304412842, "learning_rate": 1.9199727688939735e-05, "loss": 0.6632, "step": 6023 }, { "epoch": 0.15, "grad_norm": 1.5508825778961182, "learning_rate": 1.9199402255951815e-05, "loss": 0.561, "step": 6024 }, { "epoch": 0.15, "grad_norm": 1.5577064752578735, "learning_rate": 1.9199076759567395e-05, "loss": 0.5194, "step": 6025 }, { "epoch": 0.15, "grad_norm": 3.0139694213867188, "learning_rate": 1.9198751199788715e-05, "loss": 0.7602, "step": 6026 }, { "epoch": 0.15, "grad_norm": 1.8634463548660278, "learning_rate": 1.9198425576618022e-05, "loss": 0.4911, "step": 6027 }, { "epoch": 0.15, "grad_norm": 4.156155109405518, "learning_rate": 1.919809989005756e-05, "loss": 0.8252, "step": 6028 }, { "epoch": 0.15, "grad_norm": 7.229860305786133, "learning_rate": 1.919777414010957e-05, "loss": 0.7158, "step": 6029 }, { "epoch": 0.15, "grad_norm": 2.3389925956726074, "learning_rate": 1.9197448326776304e-05, "loss": 0.7243, "step": 6030 }, { "epoch": 0.15, "grad_norm": 2.426750421524048, "learning_rate": 1.9197122450059996e-05, "loss": 0.7681, "step": 6031 }, { "epoch": 0.15, "grad_norm": 1.654168725013733, "learning_rate": 1.9196796509962902e-05, "loss": 0.6294, "step": 6032 }, { "epoch": 0.15, "grad_norm": 1.4243587255477905, "learning_rate": 1.9196470506487264e-05, "loss": 0.6129, "step": 6033 }, { "epoch": 0.15, "grad_norm": 2.248953342437744, "learning_rate": 1.919614443963533e-05, "loss": 0.6973, "step": 6034 }, { "epoch": 0.15, "grad_norm": 4.997608661651611, "learning_rate": 1.9195818309409344e-05, "loss": 0.8852, "step": 6035 }, { "epoch": 0.15, "grad_norm": 8.241135597229004, "learning_rate": 1.9195492115811553e-05, "loss": 0.8406, "step": 6036 }, { "epoch": 0.15, "grad_norm": 2.557126998901367, "learning_rate": 1.9195165858844215e-05, "loss": 0.6865, "step": 6037 }, { "epoch": 0.15, "grad_norm": 4.698651313781738, "learning_rate": 1.9194839538509566e-05, "loss": 0.764, "step": 6038 }, { "epoch": 0.15, "grad_norm": 2.422532081604004, "learning_rate": 1.919451315480986e-05, "loss": 0.6034, "step": 6039 }, { "epoch": 0.15, "grad_norm": 1.3105218410491943, "learning_rate": 1.9194186707747346e-05, "loss": 0.6618, "step": 6040 }, { "epoch": 0.15, "grad_norm": 1.9965424537658691, "learning_rate": 1.9193860197324274e-05, "loss": 0.6269, "step": 6041 }, { "epoch": 0.15, "grad_norm": 2.455275297164917, "learning_rate": 1.9193533623542896e-05, "loss": 0.51, "step": 6042 }, { "epoch": 0.15, "grad_norm": 2.1981985569000244, "learning_rate": 1.9193206986405454e-05, "loss": 0.7017, "step": 6043 }, { "epoch": 0.15, "grad_norm": 1.766464352607727, "learning_rate": 1.9192880285914212e-05, "loss": 0.5673, "step": 6044 }, { "epoch": 0.15, "grad_norm": 1.45173180103302, "learning_rate": 1.919255352207141e-05, "loss": 0.587, "step": 6045 }, { "epoch": 0.15, "grad_norm": 2.287740707397461, "learning_rate": 1.9192226694879304e-05, "loss": 0.6387, "step": 6046 }, { "epoch": 0.15, "grad_norm": 3.2701220512390137, "learning_rate": 1.919189980434015e-05, "loss": 0.5947, "step": 6047 }, { "epoch": 0.16, "grad_norm": 1.7413817644119263, "learning_rate": 1.9191572850456198e-05, "loss": 0.6464, "step": 6048 }, { "epoch": 0.16, "grad_norm": 3.9588968753814697, "learning_rate": 1.9191245833229698e-05, "loss": 0.7142, "step": 6049 }, { "epoch": 0.16, "grad_norm": 2.465494155883789, "learning_rate": 1.9190918752662908e-05, "loss": 0.6316, "step": 6050 }, { "epoch": 0.16, "grad_norm": 5.827014923095703, "learning_rate": 1.9190591608758077e-05, "loss": 0.5681, "step": 6051 }, { "epoch": 0.16, "grad_norm": 3.340298891067505, "learning_rate": 1.919026440151747e-05, "loss": 0.5885, "step": 6052 }, { "epoch": 0.16, "grad_norm": 3.042391300201416, "learning_rate": 1.9189937130943327e-05, "loss": 0.7029, "step": 6053 }, { "epoch": 0.16, "grad_norm": 3.4763076305389404, "learning_rate": 1.9189609797037913e-05, "loss": 0.8784, "step": 6054 }, { "epoch": 0.16, "grad_norm": 2.9874353408813477, "learning_rate": 1.9189282399803485e-05, "loss": 0.6394, "step": 6055 }, { "epoch": 0.16, "grad_norm": 3.471243381500244, "learning_rate": 1.9188954939242292e-05, "loss": 0.6357, "step": 6056 }, { "epoch": 0.16, "grad_norm": 4.9295477867126465, "learning_rate": 1.9188627415356597e-05, "loss": 0.7662, "step": 6057 }, { "epoch": 0.16, "grad_norm": 2.031080484390259, "learning_rate": 1.9188299828148654e-05, "loss": 0.6076, "step": 6058 }, { "epoch": 0.16, "grad_norm": 1.804558277130127, "learning_rate": 1.9187972177620722e-05, "loss": 0.6569, "step": 6059 }, { "epoch": 0.16, "grad_norm": 2.654466152191162, "learning_rate": 1.9187644463775058e-05, "loss": 0.5777, "step": 6060 }, { "epoch": 0.16, "grad_norm": 3.838542938232422, "learning_rate": 1.9187316686613918e-05, "loss": 0.7346, "step": 6061 }, { "epoch": 0.16, "grad_norm": 2.1835262775421143, "learning_rate": 1.9186988846139566e-05, "loss": 0.6226, "step": 6062 }, { "epoch": 0.16, "grad_norm": 1.548496127128601, "learning_rate": 1.918666094235426e-05, "loss": 0.5845, "step": 6063 }, { "epoch": 0.16, "grad_norm": 3.3071775436401367, "learning_rate": 1.9186332975260255e-05, "loss": 0.5718, "step": 6064 }, { "epoch": 0.16, "grad_norm": 5.3783793449401855, "learning_rate": 1.9186004944859815e-05, "loss": 0.7007, "step": 6065 }, { "epoch": 0.16, "grad_norm": 3.2071187496185303, "learning_rate": 1.91856768511552e-05, "loss": 0.6612, "step": 6066 }, { "epoch": 0.16, "grad_norm": 3.7555835247039795, "learning_rate": 1.9185348694148673e-05, "loss": 0.7343, "step": 6067 }, { "epoch": 0.16, "grad_norm": 1.7124561071395874, "learning_rate": 1.9185020473842494e-05, "loss": 0.4612, "step": 6068 }, { "epoch": 0.16, "grad_norm": 2.7328524589538574, "learning_rate": 1.9184692190238922e-05, "loss": 0.5772, "step": 6069 }, { "epoch": 0.16, "grad_norm": 2.0698368549346924, "learning_rate": 1.9184363843340223e-05, "loss": 0.5261, "step": 6070 }, { "epoch": 0.16, "grad_norm": 3.0164599418640137, "learning_rate": 1.918403543314866e-05, "loss": 0.809, "step": 6071 }, { "epoch": 0.16, "grad_norm": 4.463143825531006, "learning_rate": 1.9183706959666493e-05, "loss": 0.7786, "step": 6072 }, { "epoch": 0.16, "grad_norm": 1.6312404870986938, "learning_rate": 1.9183378422895988e-05, "loss": 0.6337, "step": 6073 }, { "epoch": 0.16, "grad_norm": 3.2148277759552, "learning_rate": 1.918304982283941e-05, "loss": 0.6696, "step": 6074 }, { "epoch": 0.16, "grad_norm": 1.6709957122802734, "learning_rate": 1.9182721159499017e-05, "loss": 0.6027, "step": 6075 }, { "epoch": 0.16, "grad_norm": 1.6947497129440308, "learning_rate": 1.9182392432877085e-05, "loss": 0.6013, "step": 6076 }, { "epoch": 0.16, "grad_norm": 3.571720600128174, "learning_rate": 1.918206364297587e-05, "loss": 0.7915, "step": 6077 }, { "epoch": 0.16, "grad_norm": 4.822144508361816, "learning_rate": 1.9181734789797643e-05, "loss": 0.7899, "step": 6078 }, { "epoch": 0.16, "grad_norm": 2.345621109008789, "learning_rate": 1.9181405873344666e-05, "loss": 0.6807, "step": 6079 }, { "epoch": 0.16, "grad_norm": 1.7000744342803955, "learning_rate": 1.918107689361921e-05, "loss": 0.4879, "step": 6080 }, { "epoch": 0.16, "grad_norm": 2.9054439067840576, "learning_rate": 1.9180747850623536e-05, "loss": 0.6605, "step": 6081 }, { "epoch": 0.16, "grad_norm": 2.8999507427215576, "learning_rate": 1.9180418744359923e-05, "loss": 0.6928, "step": 6082 }, { "epoch": 0.16, "grad_norm": 2.713977336883545, "learning_rate": 1.918008957483063e-05, "loss": 0.6505, "step": 6083 }, { "epoch": 0.16, "grad_norm": 3.3775196075439453, "learning_rate": 1.917976034203792e-05, "loss": 0.7074, "step": 6084 }, { "epoch": 0.16, "grad_norm": 1.5467678308486938, "learning_rate": 1.9179431045984073e-05, "loss": 0.5851, "step": 6085 }, { "epoch": 0.16, "grad_norm": 4.010659694671631, "learning_rate": 1.9179101686671355e-05, "loss": 0.6068, "step": 6086 }, { "epoch": 0.16, "grad_norm": 4.394802570343018, "learning_rate": 1.9178772264102037e-05, "loss": 0.7832, "step": 6087 }, { "epoch": 0.16, "grad_norm": 2.829263210296631, "learning_rate": 1.9178442778278386e-05, "loss": 0.603, "step": 6088 }, { "epoch": 0.16, "grad_norm": 1.61531662940979, "learning_rate": 1.9178113229202674e-05, "loss": 0.6933, "step": 6089 }, { "epoch": 0.16, "grad_norm": 2.4262614250183105, "learning_rate": 1.917778361687717e-05, "loss": 0.575, "step": 6090 }, { "epoch": 0.16, "grad_norm": 3.1143088340759277, "learning_rate": 1.917745394130415e-05, "loss": 0.7914, "step": 6091 }, { "epoch": 0.16, "grad_norm": 2.2711188793182373, "learning_rate": 1.917712420248588e-05, "loss": 0.7961, "step": 6092 }, { "epoch": 0.16, "grad_norm": 4.780198574066162, "learning_rate": 1.9176794400424637e-05, "loss": 0.7052, "step": 6093 }, { "epoch": 0.16, "grad_norm": 1.7135584354400635, "learning_rate": 1.9176464535122693e-05, "loss": 0.7773, "step": 6094 }, { "epoch": 0.16, "grad_norm": 3.3944661617279053, "learning_rate": 1.917613460658232e-05, "loss": 0.8009, "step": 6095 }, { "epoch": 0.16, "grad_norm": 2.947654962539673, "learning_rate": 1.9175804614805795e-05, "loss": 0.5266, "step": 6096 }, { "epoch": 0.16, "grad_norm": 1.2781375646591187, "learning_rate": 1.9175474559795386e-05, "loss": 0.4929, "step": 6097 }, { "epoch": 0.16, "grad_norm": 1.350405216217041, "learning_rate": 1.9175144441553372e-05, "loss": 0.6683, "step": 6098 }, { "epoch": 0.16, "grad_norm": 2.3464224338531494, "learning_rate": 1.9174814260082025e-05, "loss": 0.6004, "step": 6099 }, { "epoch": 0.16, "grad_norm": 3.1325528621673584, "learning_rate": 1.9174484015383626e-05, "loss": 0.7371, "step": 6100 }, { "epoch": 0.16, "grad_norm": 2.556135892868042, "learning_rate": 1.917415370746045e-05, "loss": 0.4971, "step": 6101 }, { "epoch": 0.16, "grad_norm": 1.5443192720413208, "learning_rate": 1.9173823336314763e-05, "loss": 0.6454, "step": 6102 }, { "epoch": 0.16, "grad_norm": 3.560363531112671, "learning_rate": 1.917349290194885e-05, "loss": 0.6512, "step": 6103 }, { "epoch": 0.16, "grad_norm": 2.4587600231170654, "learning_rate": 1.917316240436499e-05, "loss": 0.6293, "step": 6104 }, { "epoch": 0.16, "grad_norm": 2.648897409439087, "learning_rate": 1.9172831843565457e-05, "loss": 0.8104, "step": 6105 }, { "epoch": 0.16, "grad_norm": 3.6607139110565186, "learning_rate": 1.917250121955253e-05, "loss": 0.751, "step": 6106 }, { "epoch": 0.16, "grad_norm": 1.9699510335922241, "learning_rate": 1.9172170532328488e-05, "loss": 0.6842, "step": 6107 }, { "epoch": 0.16, "grad_norm": 1.8778802156448364, "learning_rate": 1.9171839781895607e-05, "loss": 0.6385, "step": 6108 }, { "epoch": 0.16, "grad_norm": 1.8614120483398438, "learning_rate": 1.9171508968256172e-05, "loss": 0.5596, "step": 6109 }, { "epoch": 0.16, "grad_norm": 2.915113925933838, "learning_rate": 1.917117809141246e-05, "loss": 0.7682, "step": 6110 }, { "epoch": 0.16, "grad_norm": 1.3469759225845337, "learning_rate": 1.9170847151366744e-05, "loss": 0.7557, "step": 6111 }, { "epoch": 0.16, "grad_norm": 2.2466375827789307, "learning_rate": 1.9170516148121315e-05, "loss": 0.8109, "step": 6112 }, { "epoch": 0.16, "grad_norm": 2.3787381649017334, "learning_rate": 1.9170185081678452e-05, "loss": 0.663, "step": 6113 }, { "epoch": 0.16, "grad_norm": 2.0145998001098633, "learning_rate": 1.9169853952040434e-05, "loss": 0.4976, "step": 6114 }, { "epoch": 0.16, "grad_norm": 1.8861290216445923, "learning_rate": 1.916952275920954e-05, "loss": 0.575, "step": 6115 }, { "epoch": 0.16, "grad_norm": 3.7507619857788086, "learning_rate": 1.9169191503188062e-05, "loss": 0.5981, "step": 6116 }, { "epoch": 0.16, "grad_norm": 2.7204573154449463, "learning_rate": 1.9168860183978273e-05, "loss": 0.7407, "step": 6117 }, { "epoch": 0.16, "grad_norm": 3.018360137939453, "learning_rate": 1.916852880158246e-05, "loss": 0.6416, "step": 6118 }, { "epoch": 0.16, "grad_norm": 2.4763782024383545, "learning_rate": 1.9168197356002905e-05, "loss": 0.6723, "step": 6119 }, { "epoch": 0.16, "grad_norm": 2.5104494094848633, "learning_rate": 1.9167865847241898e-05, "loss": 0.6061, "step": 6120 }, { "epoch": 0.16, "grad_norm": 2.314147472381592, "learning_rate": 1.9167534275301715e-05, "loss": 0.708, "step": 6121 }, { "epoch": 0.16, "grad_norm": 7.750453948974609, "learning_rate": 1.916720264018465e-05, "loss": 0.7729, "step": 6122 }, { "epoch": 0.16, "grad_norm": 3.095123529434204, "learning_rate": 1.9166870941892983e-05, "loss": 0.4976, "step": 6123 }, { "epoch": 0.16, "grad_norm": 2.058631181716919, "learning_rate": 1.9166539180429e-05, "loss": 0.661, "step": 6124 }, { "epoch": 0.16, "grad_norm": 3.1028289794921875, "learning_rate": 1.9166207355794982e-05, "loss": 0.7776, "step": 6125 }, { "epoch": 0.16, "grad_norm": 2.085803747177124, "learning_rate": 1.916587546799323e-05, "loss": 0.5843, "step": 6126 }, { "epoch": 0.16, "grad_norm": 2.96162486076355, "learning_rate": 1.916554351702602e-05, "loss": 0.5627, "step": 6127 }, { "epoch": 0.16, "grad_norm": 1.9338973760604858, "learning_rate": 1.916521150289564e-05, "loss": 0.6716, "step": 6128 }, { "epoch": 0.16, "grad_norm": 1.9098447561264038, "learning_rate": 1.9164879425604382e-05, "loss": 0.545, "step": 6129 }, { "epoch": 0.16, "grad_norm": 6.1589250564575195, "learning_rate": 1.9164547285154534e-05, "loss": 0.43, "step": 6130 }, { "epoch": 0.16, "grad_norm": 1.7552568912506104, "learning_rate": 1.9164215081548385e-05, "loss": 0.5568, "step": 6131 }, { "epoch": 0.16, "grad_norm": 4.627854347229004, "learning_rate": 1.916388281478822e-05, "loss": 0.5988, "step": 6132 }, { "epoch": 0.16, "grad_norm": 1.509608268737793, "learning_rate": 1.9163550484876334e-05, "loss": 0.6274, "step": 6133 }, { "epoch": 0.16, "grad_norm": 4.206636905670166, "learning_rate": 1.9163218091815016e-05, "loss": 0.6762, "step": 6134 }, { "epoch": 0.16, "grad_norm": 1.6255944967269897, "learning_rate": 1.9162885635606556e-05, "loss": 0.5073, "step": 6135 }, { "epoch": 0.16, "grad_norm": 2.5800461769104004, "learning_rate": 1.916255311625324e-05, "loss": 0.6616, "step": 6136 }, { "epoch": 0.16, "grad_norm": 1.3863486051559448, "learning_rate": 1.9162220533757367e-05, "loss": 0.609, "step": 6137 }, { "epoch": 0.16, "grad_norm": 2.289501667022705, "learning_rate": 1.916188788812123e-05, "loss": 0.8699, "step": 6138 }, { "epoch": 0.16, "grad_norm": 1.6001276969909668, "learning_rate": 1.9161555179347114e-05, "loss": 0.7432, "step": 6139 }, { "epoch": 0.16, "grad_norm": 5.73637580871582, "learning_rate": 1.9161222407437317e-05, "loss": 0.681, "step": 6140 }, { "epoch": 0.16, "grad_norm": 1.9923005104064941, "learning_rate": 1.916088957239413e-05, "loss": 0.5651, "step": 6141 }, { "epoch": 0.16, "grad_norm": 1.3404721021652222, "learning_rate": 1.916055667421985e-05, "loss": 0.5951, "step": 6142 }, { "epoch": 0.16, "grad_norm": 7.682086944580078, "learning_rate": 1.9160223712916762e-05, "loss": 0.7046, "step": 6143 }, { "epoch": 0.16, "grad_norm": 1.399095058441162, "learning_rate": 1.9159890688487172e-05, "loss": 0.821, "step": 6144 }, { "epoch": 0.16, "grad_norm": 2.0155460834503174, "learning_rate": 1.915955760093337e-05, "loss": 0.7194, "step": 6145 }, { "epoch": 0.16, "grad_norm": 6.935630798339844, "learning_rate": 1.9159224450257653e-05, "loss": 0.7424, "step": 6146 }, { "epoch": 0.16, "grad_norm": 4.18574333190918, "learning_rate": 1.9158891236462314e-05, "loss": 0.7796, "step": 6147 }, { "epoch": 0.16, "grad_norm": 1.7639479637145996, "learning_rate": 1.915855795954965e-05, "loss": 0.7705, "step": 6148 }, { "epoch": 0.16, "grad_norm": 2.8199353218078613, "learning_rate": 1.915822461952196e-05, "loss": 0.7702, "step": 6149 }, { "epoch": 0.16, "grad_norm": 1.4306514263153076, "learning_rate": 1.9157891216381536e-05, "loss": 0.6549, "step": 6150 }, { "epoch": 0.16, "grad_norm": 6.392928600311279, "learning_rate": 1.9157557750130684e-05, "loss": 0.4201, "step": 6151 }, { "epoch": 0.16, "grad_norm": 2.3459675312042236, "learning_rate": 1.9157224220771696e-05, "loss": 0.7901, "step": 6152 }, { "epoch": 0.16, "grad_norm": 2.0096585750579834, "learning_rate": 1.915689062830687e-05, "loss": 0.6158, "step": 6153 }, { "epoch": 0.16, "grad_norm": 3.237637519836426, "learning_rate": 1.915655697273851e-05, "loss": 0.706, "step": 6154 }, { "epoch": 0.16, "grad_norm": 2.6333768367767334, "learning_rate": 1.915622325406891e-05, "loss": 0.7154, "step": 6155 }, { "epoch": 0.16, "grad_norm": 1.8560360670089722, "learning_rate": 1.915588947230037e-05, "loss": 0.5237, "step": 6156 }, { "epoch": 0.16, "grad_norm": 2.177675724029541, "learning_rate": 1.9155555627435194e-05, "loss": 0.7183, "step": 6157 }, { "epoch": 0.16, "grad_norm": 4.196212291717529, "learning_rate": 1.915522171947568e-05, "loss": 0.632, "step": 6158 }, { "epoch": 0.16, "grad_norm": 2.5244431495666504, "learning_rate": 1.9154887748424132e-05, "loss": 0.6116, "step": 6159 }, { "epoch": 0.16, "grad_norm": 3.0174050331115723, "learning_rate": 1.915455371428285e-05, "loss": 0.6156, "step": 6160 }, { "epoch": 0.16, "grad_norm": 3.2766025066375732, "learning_rate": 1.915421961705413e-05, "loss": 0.6194, "step": 6161 }, { "epoch": 0.16, "grad_norm": 3.1248316764831543, "learning_rate": 1.9153885456740286e-05, "loss": 0.6906, "step": 6162 }, { "epoch": 0.16, "grad_norm": 3.0659730434417725, "learning_rate": 1.915355123334361e-05, "loss": 0.5224, "step": 6163 }, { "epoch": 0.16, "grad_norm": 4.006773471832275, "learning_rate": 1.9153216946866407e-05, "loss": 0.4643, "step": 6164 }, { "epoch": 0.16, "grad_norm": 3.6037511825561523, "learning_rate": 1.915288259731099e-05, "loss": 0.7885, "step": 6165 }, { "epoch": 0.16, "grad_norm": 1.7996703386306763, "learning_rate": 1.9152548184679652e-05, "loss": 0.645, "step": 6166 }, { "epoch": 0.16, "grad_norm": 2.0473625659942627, "learning_rate": 1.9152213708974707e-05, "loss": 0.7112, "step": 6167 }, { "epoch": 0.16, "grad_norm": 2.404524564743042, "learning_rate": 1.915187917019845e-05, "loss": 0.8126, "step": 6168 }, { "epoch": 0.16, "grad_norm": 5.2766947746276855, "learning_rate": 1.9151544568353194e-05, "loss": 0.7221, "step": 6169 }, { "epoch": 0.16, "grad_norm": 1.8216121196746826, "learning_rate": 1.915120990344124e-05, "loss": 0.6446, "step": 6170 }, { "epoch": 0.16, "grad_norm": 5.837532997131348, "learning_rate": 1.91508751754649e-05, "loss": 0.5679, "step": 6171 }, { "epoch": 0.16, "grad_norm": 1.7215052843093872, "learning_rate": 1.9150540384426474e-05, "loss": 0.5895, "step": 6172 }, { "epoch": 0.16, "grad_norm": 1.2828179597854614, "learning_rate": 1.9150205530328276e-05, "loss": 0.5555, "step": 6173 }, { "epoch": 0.16, "grad_norm": 3.5005910396575928, "learning_rate": 1.9149870613172608e-05, "loss": 0.7479, "step": 6174 }, { "epoch": 0.16, "grad_norm": 1.93776273727417, "learning_rate": 1.914953563296178e-05, "loss": 0.6285, "step": 6175 }, { "epoch": 0.16, "grad_norm": 2.5118908882141113, "learning_rate": 1.9149200589698103e-05, "loss": 0.6495, "step": 6176 }, { "epoch": 0.16, "grad_norm": 2.2914724349975586, "learning_rate": 1.914886548338388e-05, "loss": 0.6625, "step": 6177 }, { "epoch": 0.16, "grad_norm": 2.5004525184631348, "learning_rate": 1.914853031402143e-05, "loss": 0.7535, "step": 6178 }, { "epoch": 0.16, "grad_norm": 1.3620795011520386, "learning_rate": 1.914819508161305e-05, "loss": 0.6504, "step": 6179 }, { "epoch": 0.16, "grad_norm": 1.3646416664123535, "learning_rate": 1.914785978616106e-05, "loss": 0.6889, "step": 6180 }, { "epoch": 0.16, "grad_norm": 1.6164677143096924, "learning_rate": 1.9147524427667774e-05, "loss": 0.5681, "step": 6181 }, { "epoch": 0.16, "grad_norm": 6.064964294433594, "learning_rate": 1.914718900613549e-05, "loss": 0.6028, "step": 6182 }, { "epoch": 0.16, "grad_norm": 4.6345014572143555, "learning_rate": 1.9146853521566523e-05, "loss": 0.4366, "step": 6183 }, { "epoch": 0.16, "grad_norm": 1.8229550123214722, "learning_rate": 1.9146517973963193e-05, "loss": 0.7853, "step": 6184 }, { "epoch": 0.16, "grad_norm": 3.1150121688842773, "learning_rate": 1.9146182363327807e-05, "loss": 0.7492, "step": 6185 }, { "epoch": 0.16, "grad_norm": 5.0613837242126465, "learning_rate": 1.9145846689662674e-05, "loss": 0.6116, "step": 6186 }, { "epoch": 0.16, "grad_norm": 5.287619113922119, "learning_rate": 1.9145510952970117e-05, "loss": 0.6767, "step": 6187 }, { "epoch": 0.16, "grad_norm": 1.7558560371398926, "learning_rate": 1.914517515325244e-05, "loss": 0.501, "step": 6188 }, { "epoch": 0.16, "grad_norm": 4.728367805480957, "learning_rate": 1.914483929051197e-05, "loss": 0.7318, "step": 6189 }, { "epoch": 0.16, "grad_norm": 1.6091158390045166, "learning_rate": 1.9144503364751002e-05, "loss": 0.6447, "step": 6190 }, { "epoch": 0.16, "grad_norm": 1.6449576616287231, "learning_rate": 1.9144167375971866e-05, "loss": 0.5368, "step": 6191 }, { "epoch": 0.16, "grad_norm": 3.381500244140625, "learning_rate": 1.9143831324176876e-05, "loss": 0.7339, "step": 6192 }, { "epoch": 0.16, "grad_norm": 2.436518430709839, "learning_rate": 1.9143495209368344e-05, "loss": 0.7649, "step": 6193 }, { "epoch": 0.16, "grad_norm": 3.947063684463501, "learning_rate": 1.9143159031548587e-05, "loss": 0.8259, "step": 6194 }, { "epoch": 0.16, "grad_norm": 2.1848597526550293, "learning_rate": 1.9142822790719923e-05, "loss": 0.669, "step": 6195 }, { "epoch": 0.16, "grad_norm": 2.7853357791900635, "learning_rate": 1.9142486486884665e-05, "loss": 0.626, "step": 6196 }, { "epoch": 0.16, "grad_norm": 5.6829938888549805, "learning_rate": 1.914215012004514e-05, "loss": 0.6755, "step": 6197 }, { "epoch": 0.16, "grad_norm": 2.5451016426086426, "learning_rate": 1.9141813690203654e-05, "loss": 0.7352, "step": 6198 }, { "epoch": 0.16, "grad_norm": 2.701709032058716, "learning_rate": 1.9141477197362536e-05, "loss": 0.7176, "step": 6199 }, { "epoch": 0.16, "grad_norm": 1.489142656326294, "learning_rate": 1.9141140641524097e-05, "loss": 0.6637, "step": 6200 }, { "epoch": 0.16, "grad_norm": 4.047367572784424, "learning_rate": 1.914080402269066e-05, "loss": 0.5617, "step": 6201 }, { "epoch": 0.16, "grad_norm": 1.654918909072876, "learning_rate": 1.9140467340864548e-05, "loss": 0.5969, "step": 6202 }, { "epoch": 0.16, "grad_norm": 1.3053290843963623, "learning_rate": 1.9140130596048075e-05, "loss": 0.7329, "step": 6203 }, { "epoch": 0.16, "grad_norm": 3.7709755897521973, "learning_rate": 1.9139793788243565e-05, "loss": 0.6166, "step": 6204 }, { "epoch": 0.16, "grad_norm": 3.475980281829834, "learning_rate": 1.9139456917453337e-05, "loss": 0.6522, "step": 6205 }, { "epoch": 0.16, "grad_norm": 3.335871696472168, "learning_rate": 1.9139119983679717e-05, "loss": 0.7934, "step": 6206 }, { "epoch": 0.16, "grad_norm": 5.5090179443359375, "learning_rate": 1.913878298692502e-05, "loss": 0.4975, "step": 6207 }, { "epoch": 0.16, "grad_norm": 4.498958110809326, "learning_rate": 1.9138445927191574e-05, "loss": 0.5608, "step": 6208 }, { "epoch": 0.16, "grad_norm": 2.849074602127075, "learning_rate": 1.91381088044817e-05, "loss": 0.5902, "step": 6209 }, { "epoch": 0.16, "grad_norm": 4.338355541229248, "learning_rate": 1.913777161879772e-05, "loss": 0.7192, "step": 6210 }, { "epoch": 0.16, "grad_norm": 1.694929599761963, "learning_rate": 1.9137434370141963e-05, "loss": 0.6856, "step": 6211 }, { "epoch": 0.16, "grad_norm": 2.353097438812256, "learning_rate": 1.9137097058516745e-05, "loss": 0.6002, "step": 6212 }, { "epoch": 0.16, "grad_norm": 3.5436458587646484, "learning_rate": 1.9136759683924397e-05, "loss": 0.4583, "step": 6213 }, { "epoch": 0.16, "grad_norm": 1.5153948068618774, "learning_rate": 1.913642224636724e-05, "loss": 0.631, "step": 6214 }, { "epoch": 0.16, "grad_norm": 2.1178486347198486, "learning_rate": 1.9136084745847602e-05, "loss": 0.5963, "step": 6215 }, { "epoch": 0.16, "grad_norm": 1.5256558656692505, "learning_rate": 1.9135747182367805e-05, "loss": 0.6409, "step": 6216 }, { "epoch": 0.16, "grad_norm": 4.17116641998291, "learning_rate": 1.913540955593018e-05, "loss": 0.6273, "step": 6217 }, { "epoch": 0.16, "grad_norm": 4.988790035247803, "learning_rate": 1.9135071866537053e-05, "loss": 0.6884, "step": 6218 }, { "epoch": 0.16, "grad_norm": 8.150060653686523, "learning_rate": 1.9134734114190746e-05, "loss": 0.7295, "step": 6219 }, { "epoch": 0.16, "grad_norm": 6.377823829650879, "learning_rate": 1.9134396298893594e-05, "loss": 0.768, "step": 6220 }, { "epoch": 0.16, "grad_norm": 1.830429196357727, "learning_rate": 1.9134058420647924e-05, "loss": 0.6242, "step": 6221 }, { "epoch": 0.16, "grad_norm": 4.558141231536865, "learning_rate": 1.913372047945606e-05, "loss": 0.7739, "step": 6222 }, { "epoch": 0.16, "grad_norm": 4.130598545074463, "learning_rate": 1.9133382475320327e-05, "loss": 0.5229, "step": 6223 }, { "epoch": 0.16, "grad_norm": 3.999408721923828, "learning_rate": 1.9133044408243065e-05, "loss": 0.5661, "step": 6224 }, { "epoch": 0.16, "grad_norm": 1.4563095569610596, "learning_rate": 1.91327062782266e-05, "loss": 0.6827, "step": 6225 }, { "epoch": 0.16, "grad_norm": 3.002302646636963, "learning_rate": 1.9132368085273262e-05, "loss": 0.6032, "step": 6226 }, { "epoch": 0.16, "grad_norm": 2.2791831493377686, "learning_rate": 1.9132029829385373e-05, "loss": 0.6024, "step": 6227 }, { "epoch": 0.16, "grad_norm": 2.220942497253418, "learning_rate": 1.913169151056528e-05, "loss": 0.4494, "step": 6228 }, { "epoch": 0.16, "grad_norm": 2.3654191493988037, "learning_rate": 1.9131353128815303e-05, "loss": 0.6567, "step": 6229 }, { "epoch": 0.16, "grad_norm": 1.400920033454895, "learning_rate": 1.913101468413778e-05, "loss": 0.6493, "step": 6230 }, { "epoch": 0.16, "grad_norm": 1.711255431175232, "learning_rate": 1.9130676176535033e-05, "loss": 0.6641, "step": 6231 }, { "epoch": 0.16, "grad_norm": 2.0525317192077637, "learning_rate": 1.913033760600941e-05, "loss": 0.6726, "step": 6232 }, { "epoch": 0.16, "grad_norm": 4.081098556518555, "learning_rate": 1.9129998972563233e-05, "loss": 0.5897, "step": 6233 }, { "epoch": 0.16, "grad_norm": 1.4882540702819824, "learning_rate": 1.9129660276198838e-05, "loss": 0.6433, "step": 6234 }, { "epoch": 0.16, "grad_norm": 3.821732759475708, "learning_rate": 1.912932151691856e-05, "loss": 0.5988, "step": 6235 }, { "epoch": 0.16, "grad_norm": 4.044562816619873, "learning_rate": 1.9128982694724738e-05, "loss": 0.7676, "step": 6236 }, { "epoch": 0.16, "grad_norm": 2.3967134952545166, "learning_rate": 1.91286438096197e-05, "loss": 0.6885, "step": 6237 }, { "epoch": 0.16, "grad_norm": 1.8640044927597046, "learning_rate": 1.9128304861605785e-05, "loss": 0.6096, "step": 6238 }, { "epoch": 0.16, "grad_norm": 2.24436616897583, "learning_rate": 1.9127965850685328e-05, "loss": 0.6841, "step": 6239 }, { "epoch": 0.16, "grad_norm": 16.92270851135254, "learning_rate": 1.9127626776860663e-05, "loss": 0.7211, "step": 6240 }, { "epoch": 0.16, "grad_norm": 3.6699025630950928, "learning_rate": 1.9127287640134132e-05, "loss": 0.8019, "step": 6241 }, { "epoch": 0.16, "grad_norm": 6.005701541900635, "learning_rate": 1.9126948440508066e-05, "loss": 0.6453, "step": 6242 }, { "epoch": 0.16, "grad_norm": 1.5817691087722778, "learning_rate": 1.9126609177984806e-05, "loss": 0.7232, "step": 6243 }, { "epoch": 0.16, "grad_norm": 4.736435890197754, "learning_rate": 1.9126269852566687e-05, "loss": 0.836, "step": 6244 }, { "epoch": 0.16, "grad_norm": 1.6528213024139404, "learning_rate": 1.9125930464256056e-05, "loss": 0.6393, "step": 6245 }, { "epoch": 0.16, "grad_norm": 4.11285400390625, "learning_rate": 1.912559101305524e-05, "loss": 0.7534, "step": 6246 }, { "epoch": 0.16, "grad_norm": 1.9218389987945557, "learning_rate": 1.9125251498966588e-05, "loss": 0.5592, "step": 6247 }, { "epoch": 0.16, "grad_norm": 10.097025871276855, "learning_rate": 1.9124911921992433e-05, "loss": 0.6394, "step": 6248 }, { "epoch": 0.16, "grad_norm": 2.5152597427368164, "learning_rate": 1.9124572282135118e-05, "loss": 0.6501, "step": 6249 }, { "epoch": 0.16, "grad_norm": 2.2473363876342773, "learning_rate": 1.9124232579396987e-05, "loss": 0.6603, "step": 6250 }, { "epoch": 0.16, "grad_norm": 3.073511838912964, "learning_rate": 1.9123892813780375e-05, "loss": 0.5978, "step": 6251 }, { "epoch": 0.16, "grad_norm": 1.4658770561218262, "learning_rate": 1.9123552985287623e-05, "loss": 0.5942, "step": 6252 }, { "epoch": 0.16, "grad_norm": 1.955250859260559, "learning_rate": 1.912321309392108e-05, "loss": 0.5428, "step": 6253 }, { "epoch": 0.16, "grad_norm": 1.3393117189407349, "learning_rate": 1.912287313968308e-05, "loss": 0.5853, "step": 6254 }, { "epoch": 0.16, "grad_norm": 1.1977766752243042, "learning_rate": 1.9122533122575973e-05, "loss": 0.8067, "step": 6255 }, { "epoch": 0.16, "grad_norm": 1.6532349586486816, "learning_rate": 1.91221930426021e-05, "loss": 0.6506, "step": 6256 }, { "epoch": 0.16, "grad_norm": 2.4046242237091064, "learning_rate": 1.9121852899763804e-05, "loss": 0.7555, "step": 6257 }, { "epoch": 0.16, "grad_norm": 2.152237892150879, "learning_rate": 1.9121512694063424e-05, "loss": 0.5437, "step": 6258 }, { "epoch": 0.16, "grad_norm": 5.477294445037842, "learning_rate": 1.9121172425503314e-05, "loss": 0.6315, "step": 6259 }, { "epoch": 0.16, "grad_norm": 2.893641948699951, "learning_rate": 1.9120832094085813e-05, "loss": 0.5458, "step": 6260 }, { "epoch": 0.16, "grad_norm": 1.843888282775879, "learning_rate": 1.9120491699813267e-05, "loss": 0.6196, "step": 6261 }, { "epoch": 0.16, "grad_norm": 1.4579198360443115, "learning_rate": 1.912015124268802e-05, "loss": 0.6627, "step": 6262 }, { "epoch": 0.16, "grad_norm": 16.090831756591797, "learning_rate": 1.9119810722712424e-05, "loss": 0.6791, "step": 6263 }, { "epoch": 0.16, "grad_norm": 2.331868886947632, "learning_rate": 1.911947013988882e-05, "loss": 0.5593, "step": 6264 }, { "epoch": 0.16, "grad_norm": 1.6348164081573486, "learning_rate": 1.9119129494219558e-05, "loss": 0.7526, "step": 6265 }, { "epoch": 0.16, "grad_norm": 1.3956849575042725, "learning_rate": 1.9118788785706984e-05, "loss": 0.6819, "step": 6266 }, { "epoch": 0.16, "grad_norm": 1.83368980884552, "learning_rate": 1.911844801435345e-05, "loss": 0.6334, "step": 6267 }, { "epoch": 0.16, "grad_norm": 4.553959846496582, "learning_rate": 1.9118107180161297e-05, "loss": 1.1608, "step": 6268 }, { "epoch": 0.16, "grad_norm": 2.487908124923706, "learning_rate": 1.911776628313288e-05, "loss": 0.7793, "step": 6269 }, { "epoch": 0.16, "grad_norm": 11.238476753234863, "learning_rate": 1.9117425323270544e-05, "loss": 0.5905, "step": 6270 }, { "epoch": 0.16, "grad_norm": 2.3271195888519287, "learning_rate": 1.911708430057664e-05, "loss": 0.5689, "step": 6271 }, { "epoch": 0.16, "grad_norm": 1.4183984994888306, "learning_rate": 1.9116743215053524e-05, "loss": 0.6828, "step": 6272 }, { "epoch": 0.16, "grad_norm": 1.1707868576049805, "learning_rate": 1.9116402066703537e-05, "loss": 0.4938, "step": 6273 }, { "epoch": 0.16, "grad_norm": 3.4963858127593994, "learning_rate": 1.9116060855529035e-05, "loss": 0.5574, "step": 6274 }, { "epoch": 0.16, "grad_norm": 2.6960067749023438, "learning_rate": 1.911571958153237e-05, "loss": 0.6508, "step": 6275 }, { "epoch": 0.16, "grad_norm": 1.6473408937454224, "learning_rate": 1.9115378244715896e-05, "loss": 0.5196, "step": 6276 }, { "epoch": 0.16, "grad_norm": 3.363816976547241, "learning_rate": 1.9115036845081957e-05, "loss": 0.6794, "step": 6277 }, { "epoch": 0.16, "grad_norm": 1.7596312761306763, "learning_rate": 1.9114695382632912e-05, "loss": 0.6123, "step": 6278 }, { "epoch": 0.16, "grad_norm": 1.7036038637161255, "learning_rate": 1.9114353857371112e-05, "loss": 0.6314, "step": 6279 }, { "epoch": 0.16, "grad_norm": 3.71856427192688, "learning_rate": 1.9114012269298913e-05, "loss": 0.5631, "step": 6280 }, { "epoch": 0.16, "grad_norm": 1.5084835290908813, "learning_rate": 1.9113670618418664e-05, "loss": 0.5733, "step": 6281 }, { "epoch": 0.16, "grad_norm": 3.3581039905548096, "learning_rate": 1.911332890473273e-05, "loss": 0.6503, "step": 6282 }, { "epoch": 0.16, "grad_norm": 3.245743751525879, "learning_rate": 1.911298712824345e-05, "loss": 0.709, "step": 6283 }, { "epoch": 0.16, "grad_norm": 2.605309009552002, "learning_rate": 1.9112645288953195e-05, "loss": 0.6944, "step": 6284 }, { "epoch": 0.16, "grad_norm": 5.892098903656006, "learning_rate": 1.911230338686431e-05, "loss": 0.8149, "step": 6285 }, { "epoch": 0.16, "grad_norm": 2.5629522800445557, "learning_rate": 1.9111961421979156e-05, "loss": 0.9007, "step": 6286 }, { "epoch": 0.16, "grad_norm": 3.8546042442321777, "learning_rate": 1.911161939430009e-05, "loss": 0.6093, "step": 6287 }, { "epoch": 0.16, "grad_norm": 1.709256887435913, "learning_rate": 1.9111277303829466e-05, "loss": 0.8197, "step": 6288 }, { "epoch": 0.16, "grad_norm": 1.9006229639053345, "learning_rate": 1.9110935150569643e-05, "loss": 0.7734, "step": 6289 }, { "epoch": 0.16, "grad_norm": 1.3045026063919067, "learning_rate": 1.911059293452298e-05, "loss": 0.601, "step": 6290 }, { "epoch": 0.16, "grad_norm": 4.626048564910889, "learning_rate": 1.9110250655691832e-05, "loss": 0.713, "step": 6291 }, { "epoch": 0.16, "grad_norm": 4.224141597747803, "learning_rate": 1.9109908314078563e-05, "loss": 0.6624, "step": 6292 }, { "epoch": 0.16, "grad_norm": 4.118167400360107, "learning_rate": 1.910956590968553e-05, "loss": 0.9501, "step": 6293 }, { "epoch": 0.16, "grad_norm": 1.2788140773773193, "learning_rate": 1.910922344251509e-05, "loss": 0.5529, "step": 6294 }, { "epoch": 0.16, "grad_norm": 2.079282283782959, "learning_rate": 1.9108880912569602e-05, "loss": 0.5249, "step": 6295 }, { "epoch": 0.16, "grad_norm": 2.6889495849609375, "learning_rate": 1.9108538319851434e-05, "loss": 0.6694, "step": 6296 }, { "epoch": 0.16, "grad_norm": 1.813663125038147, "learning_rate": 1.910819566436294e-05, "loss": 0.6316, "step": 6297 }, { "epoch": 0.16, "grad_norm": 7.092612266540527, "learning_rate": 1.9107852946106487e-05, "loss": 0.8058, "step": 6298 }, { "epoch": 0.16, "grad_norm": 1.932944893836975, "learning_rate": 1.910751016508443e-05, "loss": 0.6242, "step": 6299 }, { "epoch": 0.16, "grad_norm": 6.176270484924316, "learning_rate": 1.9107167321299138e-05, "loss": 0.6322, "step": 6300 }, { "epoch": 0.16, "grad_norm": 3.9980294704437256, "learning_rate": 1.910682441475297e-05, "loss": 0.8489, "step": 6301 }, { "epoch": 0.16, "grad_norm": 1.7519540786743164, "learning_rate": 1.910648144544829e-05, "loss": 0.5682, "step": 6302 }, { "epoch": 0.16, "grad_norm": 2.8685483932495117, "learning_rate": 1.9106138413387458e-05, "loss": 0.7105, "step": 6303 }, { "epoch": 0.16, "grad_norm": 1.9932804107666016, "learning_rate": 1.910579531857284e-05, "loss": 0.6203, "step": 6304 }, { "epoch": 0.16, "grad_norm": 2.6398894786834717, "learning_rate": 1.9105452161006805e-05, "loss": 0.5574, "step": 6305 }, { "epoch": 0.16, "grad_norm": 2.887639045715332, "learning_rate": 1.9105108940691714e-05, "loss": 0.6629, "step": 6306 }, { "epoch": 0.16, "grad_norm": 8.79641342163086, "learning_rate": 1.910476565762993e-05, "loss": 0.793, "step": 6307 }, { "epoch": 0.16, "grad_norm": 2.5397114753723145, "learning_rate": 1.9104422311823826e-05, "loss": 0.6769, "step": 6308 }, { "epoch": 0.16, "grad_norm": 3.2032833099365234, "learning_rate": 1.910407890327576e-05, "loss": 0.7207, "step": 6309 }, { "epoch": 0.16, "grad_norm": 1.8096857070922852, "learning_rate": 1.9103735431988105e-05, "loss": 0.7522, "step": 6310 }, { "epoch": 0.16, "grad_norm": 1.9209082126617432, "learning_rate": 1.910339189796322e-05, "loss": 0.6878, "step": 6311 }, { "epoch": 0.16, "grad_norm": 2.458472967147827, "learning_rate": 1.910304830120348e-05, "loss": 0.6686, "step": 6312 }, { "epoch": 0.16, "grad_norm": 1.6659936904907227, "learning_rate": 1.910270464171125e-05, "loss": 0.652, "step": 6313 }, { "epoch": 0.16, "grad_norm": 3.205881357192993, "learning_rate": 1.9102360919488897e-05, "loss": 0.7108, "step": 6314 }, { "epoch": 0.16, "grad_norm": 2.6382617950439453, "learning_rate": 1.9102017134538794e-05, "loss": 0.6483, "step": 6315 }, { "epoch": 0.16, "grad_norm": 2.0142080783843994, "learning_rate": 1.9101673286863306e-05, "loss": 0.566, "step": 6316 }, { "epoch": 0.16, "grad_norm": 2.9718806743621826, "learning_rate": 1.9101329376464805e-05, "loss": 0.8051, "step": 6317 }, { "epoch": 0.16, "grad_norm": 7.0476531982421875, "learning_rate": 1.910098540334566e-05, "loss": 0.6088, "step": 6318 }, { "epoch": 0.16, "grad_norm": 2.3710427284240723, "learning_rate": 1.910064136750824e-05, "loss": 0.7129, "step": 6319 }, { "epoch": 0.16, "grad_norm": 2.2434499263763428, "learning_rate": 1.910029726895492e-05, "loss": 0.697, "step": 6320 }, { "epoch": 0.16, "grad_norm": 1.78174889087677, "learning_rate": 1.9099953107688064e-05, "loss": 0.6215, "step": 6321 }, { "epoch": 0.16, "grad_norm": 2.46591854095459, "learning_rate": 1.909960888371005e-05, "loss": 0.5782, "step": 6322 }, { "epoch": 0.16, "grad_norm": 2.060804843902588, "learning_rate": 1.909926459702325e-05, "loss": 0.7641, "step": 6323 }, { "epoch": 0.16, "grad_norm": 1.5002944469451904, "learning_rate": 1.9098920247630034e-05, "loss": 0.7173, "step": 6324 }, { "epoch": 0.16, "grad_norm": 2.1244964599609375, "learning_rate": 1.9098575835532776e-05, "loss": 0.6834, "step": 6325 }, { "epoch": 0.16, "grad_norm": 1.4605942964553833, "learning_rate": 1.909823136073385e-05, "loss": 0.5103, "step": 6326 }, { "epoch": 0.16, "grad_norm": 5.361940860748291, "learning_rate": 1.909788682323563e-05, "loss": 0.5471, "step": 6327 }, { "epoch": 0.16, "grad_norm": 1.7359243631362915, "learning_rate": 1.909754222304049e-05, "loss": 0.5142, "step": 6328 }, { "epoch": 0.16, "grad_norm": 1.9242247343063354, "learning_rate": 1.9097197560150805e-05, "loss": 0.7318, "step": 6329 }, { "epoch": 0.16, "grad_norm": 3.3060269355773926, "learning_rate": 1.9096852834568948e-05, "loss": 0.7835, "step": 6330 }, { "epoch": 0.16, "grad_norm": 3.210538387298584, "learning_rate": 1.9096508046297297e-05, "loss": 0.9118, "step": 6331 }, { "epoch": 0.16, "grad_norm": 1.9646146297454834, "learning_rate": 1.909616319533823e-05, "loss": 0.5796, "step": 6332 }, { "epoch": 0.16, "grad_norm": 1.3119901418685913, "learning_rate": 1.909581828169412e-05, "loss": 0.5839, "step": 6333 }, { "epoch": 0.16, "grad_norm": 2.9099338054656982, "learning_rate": 1.909547330536734e-05, "loss": 0.5973, "step": 6334 }, { "epoch": 0.16, "grad_norm": 1.4641588926315308, "learning_rate": 1.9095128266360277e-05, "loss": 0.6975, "step": 6335 }, { "epoch": 0.16, "grad_norm": 1.388763427734375, "learning_rate": 1.9094783164675306e-05, "loss": 0.7003, "step": 6336 }, { "epoch": 0.16, "grad_norm": 3.0400524139404297, "learning_rate": 1.90944380003148e-05, "loss": 0.6363, "step": 6337 }, { "epoch": 0.16, "grad_norm": 2.846916675567627, "learning_rate": 1.909409277328114e-05, "loss": 0.7119, "step": 6338 }, { "epoch": 0.16, "grad_norm": 4.210483551025391, "learning_rate": 1.909374748357671e-05, "loss": 0.62, "step": 6339 }, { "epoch": 0.16, "grad_norm": 1.6720811128616333, "learning_rate": 1.9093402131203884e-05, "loss": 0.6893, "step": 6340 }, { "epoch": 0.16, "grad_norm": 2.56330943107605, "learning_rate": 1.909305671616504e-05, "loss": 0.7097, "step": 6341 }, { "epoch": 0.16, "grad_norm": 1.417970895767212, "learning_rate": 1.9092711238462564e-05, "loss": 0.445, "step": 6342 }, { "epoch": 0.16, "grad_norm": 1.7880610227584839, "learning_rate": 1.9092365698098837e-05, "loss": 0.6374, "step": 6343 }, { "epoch": 0.16, "grad_norm": 7.432562828063965, "learning_rate": 1.9092020095076237e-05, "loss": 0.5986, "step": 6344 }, { "epoch": 0.16, "grad_norm": 2.4958736896514893, "learning_rate": 1.9091674429397147e-05, "loss": 0.8666, "step": 6345 }, { "epoch": 0.16, "grad_norm": 2.2529101371765137, "learning_rate": 1.9091328701063947e-05, "loss": 0.5901, "step": 6346 }, { "epoch": 0.16, "grad_norm": 2.675943613052368, "learning_rate": 1.909098291007902e-05, "loss": 0.7458, "step": 6347 }, { "epoch": 0.16, "grad_norm": 2.2936458587646484, "learning_rate": 1.9090637056444755e-05, "loss": 0.6638, "step": 6348 }, { "epoch": 0.16, "grad_norm": 1.418609857559204, "learning_rate": 1.9090291140163527e-05, "loss": 0.6074, "step": 6349 }, { "epoch": 0.16, "grad_norm": 1.7898029088974, "learning_rate": 1.9089945161237727e-05, "loss": 0.6091, "step": 6350 }, { "epoch": 0.16, "grad_norm": 4.399179935455322, "learning_rate": 1.908959911966973e-05, "loss": 0.588, "step": 6351 }, { "epoch": 0.16, "grad_norm": 1.4309502840042114, "learning_rate": 1.9089253015461934e-05, "loss": 0.6811, "step": 6352 }, { "epoch": 0.16, "grad_norm": 2.60357928276062, "learning_rate": 1.9088906848616712e-05, "loss": 0.8571, "step": 6353 }, { "epoch": 0.16, "grad_norm": 4.0564703941345215, "learning_rate": 1.9088560619136455e-05, "loss": 0.6175, "step": 6354 }, { "epoch": 0.16, "grad_norm": 3.1443469524383545, "learning_rate": 1.9088214327023547e-05, "loss": 0.5944, "step": 6355 }, { "epoch": 0.16, "grad_norm": 3.1372172832489014, "learning_rate": 1.908786797228038e-05, "loss": 0.648, "step": 6356 }, { "epoch": 0.16, "grad_norm": 1.7313083410263062, "learning_rate": 1.9087521554909333e-05, "loss": 0.7873, "step": 6357 }, { "epoch": 0.16, "grad_norm": 1.8604867458343506, "learning_rate": 1.9087175074912797e-05, "loss": 0.5757, "step": 6358 }, { "epoch": 0.16, "grad_norm": 6.213621139526367, "learning_rate": 1.908682853229316e-05, "loss": 0.6735, "step": 6359 }, { "epoch": 0.16, "grad_norm": 2.2430295944213867, "learning_rate": 1.908648192705281e-05, "loss": 0.8443, "step": 6360 }, { "epoch": 0.16, "grad_norm": 5.8428754806518555, "learning_rate": 1.9086135259194134e-05, "loss": 0.6977, "step": 6361 }, { "epoch": 0.16, "grad_norm": 2.651334285736084, "learning_rate": 1.908578852871952e-05, "loss": 0.6604, "step": 6362 }, { "epoch": 0.16, "grad_norm": 2.6669723987579346, "learning_rate": 1.9085441735631365e-05, "loss": 0.5748, "step": 6363 }, { "epoch": 0.16, "grad_norm": 5.271778106689453, "learning_rate": 1.908509487993205e-05, "loss": 0.6705, "step": 6364 }, { "epoch": 0.16, "grad_norm": 5.928548812866211, "learning_rate": 1.908474796162397e-05, "loss": 0.8028, "step": 6365 }, { "epoch": 0.16, "grad_norm": 1.791134238243103, "learning_rate": 1.9084400980709515e-05, "loss": 0.5421, "step": 6366 }, { "epoch": 0.16, "grad_norm": 1.2815042734146118, "learning_rate": 1.9084053937191077e-05, "loss": 0.6278, "step": 6367 }, { "epoch": 0.16, "grad_norm": 4.451802730560303, "learning_rate": 1.9083706831071046e-05, "loss": 0.8571, "step": 6368 }, { "epoch": 0.16, "grad_norm": 1.5004462003707886, "learning_rate": 1.908335966235181e-05, "loss": 0.5738, "step": 6369 }, { "epoch": 0.16, "grad_norm": 1.1402523517608643, "learning_rate": 1.908301243103577e-05, "loss": 0.6323, "step": 6370 }, { "epoch": 0.16, "grad_norm": 2.8276617527008057, "learning_rate": 1.9082665137125317e-05, "loss": 0.6497, "step": 6371 }, { "epoch": 0.16, "grad_norm": 2.0727500915527344, "learning_rate": 1.908231778062284e-05, "loss": 0.8593, "step": 6372 }, { "epoch": 0.16, "grad_norm": 4.809029579162598, "learning_rate": 1.908197036153073e-05, "loss": 0.7979, "step": 6373 }, { "epoch": 0.16, "grad_norm": 3.092418909072876, "learning_rate": 1.9081622879851395e-05, "loss": 0.5886, "step": 6374 }, { "epoch": 0.16, "grad_norm": 3.7328073978424072, "learning_rate": 1.908127533558721e-05, "loss": 0.7691, "step": 6375 }, { "epoch": 0.16, "grad_norm": 2.7353434562683105, "learning_rate": 1.908092772874059e-05, "loss": 0.6466, "step": 6376 }, { "epoch": 0.16, "grad_norm": 2.092987060546875, "learning_rate": 1.908058005931392e-05, "loss": 0.5233, "step": 6377 }, { "epoch": 0.16, "grad_norm": 1.725599765777588, "learning_rate": 1.9080232327309592e-05, "loss": 0.5835, "step": 6378 }, { "epoch": 0.16, "grad_norm": 1.9284899234771729, "learning_rate": 1.907988453273001e-05, "loss": 0.684, "step": 6379 }, { "epoch": 0.16, "grad_norm": 1.8353157043457031, "learning_rate": 1.9079536675577572e-05, "loss": 0.6357, "step": 6380 }, { "epoch": 0.16, "grad_norm": 3.8261516094207764, "learning_rate": 1.907918875585467e-05, "loss": 0.7419, "step": 6381 }, { "epoch": 0.16, "grad_norm": 2.185563802719116, "learning_rate": 1.9078840773563702e-05, "loss": 0.7616, "step": 6382 }, { "epoch": 0.16, "grad_norm": 3.5521790981292725, "learning_rate": 1.9078492728707068e-05, "loss": 0.8659, "step": 6383 }, { "epoch": 0.16, "grad_norm": 2.1655101776123047, "learning_rate": 1.9078144621287164e-05, "loss": 0.6111, "step": 6384 }, { "epoch": 0.16, "grad_norm": 2.531934976577759, "learning_rate": 1.9077796451306393e-05, "loss": 0.6774, "step": 6385 }, { "epoch": 0.16, "grad_norm": 3.9815287590026855, "learning_rate": 1.907744821876715e-05, "loss": 0.7337, "step": 6386 }, { "epoch": 0.16, "grad_norm": 2.053504705429077, "learning_rate": 1.907709992367184e-05, "loss": 0.8157, "step": 6387 }, { "epoch": 0.16, "grad_norm": 2.2042298316955566, "learning_rate": 1.9076751566022858e-05, "loss": 0.6316, "step": 6388 }, { "epoch": 0.16, "grad_norm": 3.0458779335021973, "learning_rate": 1.9076403145822608e-05, "loss": 0.7114, "step": 6389 }, { "epoch": 0.16, "grad_norm": 1.5407792329788208, "learning_rate": 1.9076054663073487e-05, "loss": 0.6828, "step": 6390 }, { "epoch": 0.16, "grad_norm": 2.327946901321411, "learning_rate": 1.90757061177779e-05, "loss": 0.7361, "step": 6391 }, { "epoch": 0.16, "grad_norm": 1.9103585481643677, "learning_rate": 1.907535750993825e-05, "loss": 0.6934, "step": 6392 }, { "epoch": 0.16, "grad_norm": 1.6714317798614502, "learning_rate": 1.9075008839556934e-05, "loss": 0.429, "step": 6393 }, { "epoch": 0.16, "grad_norm": 2.839262008666992, "learning_rate": 1.9074660106636362e-05, "loss": 0.5333, "step": 6394 }, { "epoch": 0.16, "grad_norm": 4.173665523529053, "learning_rate": 1.9074311311178934e-05, "loss": 0.817, "step": 6395 }, { "epoch": 0.16, "grad_norm": 2.415757417678833, "learning_rate": 1.9073962453187055e-05, "loss": 0.7767, "step": 6396 }, { "epoch": 0.16, "grad_norm": 1.5108519792556763, "learning_rate": 1.9073613532663125e-05, "loss": 0.6863, "step": 6397 }, { "epoch": 0.16, "grad_norm": 1.5921568870544434, "learning_rate": 1.9073264549609553e-05, "loss": 0.51, "step": 6398 }, { "epoch": 0.16, "grad_norm": 2.2667243480682373, "learning_rate": 1.907291550402874e-05, "loss": 0.7465, "step": 6399 }, { "epoch": 0.16, "grad_norm": 2.4699530601501465, "learning_rate": 1.9072566395923093e-05, "loss": 0.689, "step": 6400 }, { "epoch": 0.16, "grad_norm": 1.453254222869873, "learning_rate": 1.907221722529502e-05, "loss": 0.646, "step": 6401 }, { "epoch": 0.16, "grad_norm": 1.827350378036499, "learning_rate": 1.9071867992146926e-05, "loss": 0.6738, "step": 6402 }, { "epoch": 0.16, "grad_norm": 3.1139237880706787, "learning_rate": 1.9071518696481214e-05, "loss": 0.7288, "step": 6403 }, { "epoch": 0.16, "grad_norm": 1.815110445022583, "learning_rate": 1.90711693383003e-05, "loss": 0.5381, "step": 6404 }, { "epoch": 0.16, "grad_norm": 2.8041269779205322, "learning_rate": 1.9070819917606583e-05, "loss": 0.6998, "step": 6405 }, { "epoch": 0.16, "grad_norm": 3.0719621181488037, "learning_rate": 1.9070470434402474e-05, "loss": 0.7286, "step": 6406 }, { "epoch": 0.16, "grad_norm": 2.962573766708374, "learning_rate": 1.907012088869038e-05, "loss": 0.5601, "step": 6407 }, { "epoch": 0.16, "grad_norm": 3.6364500522613525, "learning_rate": 1.9069771280472713e-05, "loss": 0.7423, "step": 6408 }, { "epoch": 0.16, "grad_norm": 3.0627949237823486, "learning_rate": 1.9069421609751878e-05, "loss": 0.5576, "step": 6409 }, { "epoch": 0.16, "grad_norm": 2.220853567123413, "learning_rate": 1.906907187653029e-05, "loss": 0.7419, "step": 6410 }, { "epoch": 0.16, "grad_norm": 2.3805606365203857, "learning_rate": 1.9068722080810358e-05, "loss": 0.575, "step": 6411 }, { "epoch": 0.16, "grad_norm": 2.9083340167999268, "learning_rate": 1.9068372222594487e-05, "loss": 0.7194, "step": 6412 }, { "epoch": 0.16, "grad_norm": 1.4977375268936157, "learning_rate": 1.9068022301885095e-05, "loss": 0.5694, "step": 6413 }, { "epoch": 0.16, "grad_norm": 2.4121835231781006, "learning_rate": 1.906767231868459e-05, "loss": 0.7399, "step": 6414 }, { "epoch": 0.16, "grad_norm": 2.067592144012451, "learning_rate": 1.9067322272995385e-05, "loss": 0.5656, "step": 6415 }, { "epoch": 0.16, "grad_norm": 3.4013853073120117, "learning_rate": 1.9066972164819888e-05, "loss": 0.7156, "step": 6416 }, { "epoch": 0.16, "grad_norm": 2.3988215923309326, "learning_rate": 1.9066621994160516e-05, "loss": 0.543, "step": 6417 }, { "epoch": 0.16, "grad_norm": 4.0164923667907715, "learning_rate": 1.906627176101968e-05, "loss": 0.68, "step": 6418 }, { "epoch": 0.16, "grad_norm": 3.1015822887420654, "learning_rate": 1.90659214653998e-05, "loss": 0.6766, "step": 6419 }, { "epoch": 0.16, "grad_norm": 2.92453670501709, "learning_rate": 1.9065571107303286e-05, "loss": 0.7441, "step": 6420 }, { "epoch": 0.16, "grad_norm": 3.6255557537078857, "learning_rate": 1.9065220686732548e-05, "loss": 0.59, "step": 6421 }, { "epoch": 0.16, "grad_norm": 1.6601972579956055, "learning_rate": 1.9064870203690003e-05, "loss": 0.74, "step": 6422 }, { "epoch": 0.16, "grad_norm": 1.6706516742706299, "learning_rate": 1.906451965817807e-05, "loss": 0.559, "step": 6423 }, { "epoch": 0.16, "grad_norm": 4.605011940002441, "learning_rate": 1.906416905019916e-05, "loss": 0.8033, "step": 6424 }, { "epoch": 0.16, "grad_norm": 2.612328290939331, "learning_rate": 1.9063818379755693e-05, "loss": 0.6946, "step": 6425 }, { "epoch": 0.16, "grad_norm": 3.701296806335449, "learning_rate": 1.9063467646850084e-05, "loss": 0.5466, "step": 6426 }, { "epoch": 0.16, "grad_norm": 5.415468692779541, "learning_rate": 1.906311685148475e-05, "loss": 0.4904, "step": 6427 }, { "epoch": 0.16, "grad_norm": 1.7036875486373901, "learning_rate": 1.906276599366211e-05, "loss": 0.6279, "step": 6428 }, { "epoch": 0.16, "grad_norm": 1.61092209815979, "learning_rate": 1.9062415073384576e-05, "loss": 0.6501, "step": 6429 }, { "epoch": 0.16, "grad_norm": 7.338366508483887, "learning_rate": 1.906206409065458e-05, "loss": 0.7157, "step": 6430 }, { "epoch": 0.16, "grad_norm": 1.8877410888671875, "learning_rate": 1.9061713045474522e-05, "loss": 0.5452, "step": 6431 }, { "epoch": 0.16, "grad_norm": 6.570652484893799, "learning_rate": 1.9061361937846836e-05, "loss": 0.6912, "step": 6432 }, { "epoch": 0.16, "grad_norm": 2.85945463180542, "learning_rate": 1.9061010767773936e-05, "loss": 0.9073, "step": 6433 }, { "epoch": 0.16, "grad_norm": 2.446697473526001, "learning_rate": 1.9060659535258242e-05, "loss": 0.7146, "step": 6434 }, { "epoch": 0.16, "grad_norm": 5.349118709564209, "learning_rate": 1.9060308240302173e-05, "loss": 0.6641, "step": 6435 }, { "epoch": 0.16, "grad_norm": 3.3763997554779053, "learning_rate": 1.9059956882908154e-05, "loss": 0.5894, "step": 6436 }, { "epoch": 0.16, "grad_norm": 1.8868170976638794, "learning_rate": 1.9059605463078604e-05, "loss": 0.4855, "step": 6437 }, { "epoch": 0.17, "grad_norm": 1.7757997512817383, "learning_rate": 1.9059253980815947e-05, "loss": 0.6804, "step": 6438 }, { "epoch": 0.17, "grad_norm": 4.531579971313477, "learning_rate": 1.90589024361226e-05, "loss": 0.7188, "step": 6439 }, { "epoch": 0.17, "grad_norm": 2.1832029819488525, "learning_rate": 1.905855082900099e-05, "loss": 0.6328, "step": 6440 }, { "epoch": 0.17, "grad_norm": 6.530206203460693, "learning_rate": 1.905819915945354e-05, "loss": 0.7981, "step": 6441 }, { "epoch": 0.17, "grad_norm": 5.9279937744140625, "learning_rate": 1.905784742748267e-05, "loss": 0.7674, "step": 6442 }, { "epoch": 0.17, "grad_norm": 1.5456712245941162, "learning_rate": 1.9057495633090807e-05, "loss": 0.6835, "step": 6443 }, { "epoch": 0.17, "grad_norm": 3.4307749271392822, "learning_rate": 1.9057143776280375e-05, "loss": 0.8671, "step": 6444 }, { "epoch": 0.17, "grad_norm": 2.3820838928222656, "learning_rate": 1.9056791857053797e-05, "loss": 0.7163, "step": 6445 }, { "epoch": 0.17, "grad_norm": 2.3727867603302, "learning_rate": 1.90564398754135e-05, "loss": 0.51, "step": 6446 }, { "epoch": 0.17, "grad_norm": 1.433310866355896, "learning_rate": 1.9056087831361913e-05, "loss": 0.6694, "step": 6447 }, { "epoch": 0.17, "grad_norm": 3.3376142978668213, "learning_rate": 1.9055735724901452e-05, "loss": 0.7114, "step": 6448 }, { "epoch": 0.17, "grad_norm": 2.340588331222534, "learning_rate": 1.9055383556034552e-05, "loss": 0.6385, "step": 6449 }, { "epoch": 0.17, "grad_norm": 4.3751349449157715, "learning_rate": 1.905503132476364e-05, "loss": 0.7212, "step": 6450 }, { "epoch": 0.17, "grad_norm": 6.515434265136719, "learning_rate": 1.905467903109114e-05, "loss": 0.746, "step": 6451 }, { "epoch": 0.17, "grad_norm": 1.9070836305618286, "learning_rate": 1.9054326675019478e-05, "loss": 0.7917, "step": 6452 }, { "epoch": 0.17, "grad_norm": 2.113795518875122, "learning_rate": 1.905397425655109e-05, "loss": 0.5406, "step": 6453 }, { "epoch": 0.17, "grad_norm": 2.2156505584716797, "learning_rate": 1.9053621775688396e-05, "loss": 0.7227, "step": 6454 }, { "epoch": 0.17, "grad_norm": 1.412925362586975, "learning_rate": 1.9053269232433828e-05, "loss": 0.5763, "step": 6455 }, { "epoch": 0.17, "grad_norm": 6.269386291503906, "learning_rate": 1.9052916626789818e-05, "loss": 0.6187, "step": 6456 }, { "epoch": 0.17, "grad_norm": 1.934743881225586, "learning_rate": 1.9052563958758795e-05, "loss": 0.5296, "step": 6457 }, { "epoch": 0.17, "grad_norm": 1.4145848751068115, "learning_rate": 1.905221122834319e-05, "loss": 0.781, "step": 6458 }, { "epoch": 0.17, "grad_norm": 6.15929651260376, "learning_rate": 1.905185843554543e-05, "loss": 0.7239, "step": 6459 }, { "epoch": 0.17, "grad_norm": 2.5828113555908203, "learning_rate": 1.9051505580367947e-05, "loss": 0.6962, "step": 6460 }, { "epoch": 0.17, "grad_norm": 2.0098931789398193, "learning_rate": 1.9051152662813178e-05, "loss": 0.5528, "step": 6461 }, { "epoch": 0.17, "grad_norm": 2.40568208694458, "learning_rate": 1.9050799682883547e-05, "loss": 0.5053, "step": 6462 }, { "epoch": 0.17, "grad_norm": 2.2105143070220947, "learning_rate": 1.9050446640581496e-05, "loss": 0.4471, "step": 6463 }, { "epoch": 0.17, "grad_norm": 2.6319632530212402, "learning_rate": 1.9050093535909448e-05, "loss": 0.5676, "step": 6464 }, { "epoch": 0.17, "grad_norm": 3.4599573612213135, "learning_rate": 1.9049740368869846e-05, "loss": 0.6381, "step": 6465 }, { "epoch": 0.17, "grad_norm": 2.4986720085144043, "learning_rate": 1.9049387139465115e-05, "loss": 0.6231, "step": 6466 }, { "epoch": 0.17, "grad_norm": 1.2249181270599365, "learning_rate": 1.9049033847697696e-05, "loss": 0.7711, "step": 6467 }, { "epoch": 0.17, "grad_norm": 6.139021396636963, "learning_rate": 1.9048680493570016e-05, "loss": 0.6576, "step": 6468 }, { "epoch": 0.17, "grad_norm": 1.555309534072876, "learning_rate": 1.9048327077084517e-05, "loss": 0.5695, "step": 6469 }, { "epoch": 0.17, "grad_norm": 4.864535331726074, "learning_rate": 1.9047973598243637e-05, "loss": 1.1452, "step": 6470 }, { "epoch": 0.17, "grad_norm": 4.61484432220459, "learning_rate": 1.9047620057049805e-05, "loss": 0.7651, "step": 6471 }, { "epoch": 0.17, "grad_norm": 1.7987890243530273, "learning_rate": 1.904726645350546e-05, "loss": 0.6895, "step": 6472 }, { "epoch": 0.17, "grad_norm": 2.2271153926849365, "learning_rate": 1.9046912787613035e-05, "loss": 0.4717, "step": 6473 }, { "epoch": 0.17, "grad_norm": 1.7115898132324219, "learning_rate": 1.9046559059374973e-05, "loss": 0.6954, "step": 6474 }, { "epoch": 0.17, "grad_norm": 1.4396382570266724, "learning_rate": 1.9046205268793713e-05, "loss": 0.3833, "step": 6475 }, { "epoch": 0.17, "grad_norm": 3.9863173961639404, "learning_rate": 1.9045851415871686e-05, "loss": 0.6784, "step": 6476 }, { "epoch": 0.17, "grad_norm": 4.2018938064575195, "learning_rate": 1.9045497500611333e-05, "loss": 0.5654, "step": 6477 }, { "epoch": 0.17, "grad_norm": 1.4073113203048706, "learning_rate": 1.90451435230151e-05, "loss": 0.5296, "step": 6478 }, { "epoch": 0.17, "grad_norm": 2.1606051921844482, "learning_rate": 1.9044789483085415e-05, "loss": 0.6554, "step": 6479 }, { "epoch": 0.17, "grad_norm": 1.8022832870483398, "learning_rate": 1.9044435380824725e-05, "loss": 0.5989, "step": 6480 }, { "epoch": 0.17, "grad_norm": 1.8451400995254517, "learning_rate": 1.904408121623547e-05, "loss": 0.6318, "step": 6481 }, { "epoch": 0.17, "grad_norm": 3.3538315296173096, "learning_rate": 1.904372698932009e-05, "loss": 0.7121, "step": 6482 }, { "epoch": 0.17, "grad_norm": 1.9270358085632324, "learning_rate": 1.9043372700081023e-05, "loss": 0.63, "step": 6483 }, { "epoch": 0.17, "grad_norm": 2.0723066329956055, "learning_rate": 1.904301834852071e-05, "loss": 0.7568, "step": 6484 }, { "epoch": 0.17, "grad_norm": 2.47829532623291, "learning_rate": 1.90426639346416e-05, "loss": 0.6489, "step": 6485 }, { "epoch": 0.17, "grad_norm": 1.5972199440002441, "learning_rate": 1.904230945844613e-05, "loss": 0.6395, "step": 6486 }, { "epoch": 0.17, "grad_norm": 5.823526859283447, "learning_rate": 1.9041954919936743e-05, "loss": 0.6766, "step": 6487 }, { "epoch": 0.17, "grad_norm": 3.105440139770508, "learning_rate": 1.9041600319115886e-05, "loss": 0.773, "step": 6488 }, { "epoch": 0.17, "grad_norm": 2.3246941566467285, "learning_rate": 1.9041245655986e-05, "loss": 0.609, "step": 6489 }, { "epoch": 0.17, "grad_norm": 1.9785239696502686, "learning_rate": 1.904089093054953e-05, "loss": 0.661, "step": 6490 }, { "epoch": 0.17, "grad_norm": 4.129680633544922, "learning_rate": 1.9040536142808918e-05, "loss": 0.8843, "step": 6491 }, { "epoch": 0.17, "grad_norm": 3.8095123767852783, "learning_rate": 1.9040181292766614e-05, "loss": 0.7692, "step": 6492 }, { "epoch": 0.17, "grad_norm": 2.0204291343688965, "learning_rate": 1.9039826380425056e-05, "loss": 0.7263, "step": 6493 }, { "epoch": 0.17, "grad_norm": 3.1199028491973877, "learning_rate": 1.9039471405786692e-05, "loss": 0.7168, "step": 6494 }, { "epoch": 0.17, "grad_norm": 1.8728746175765991, "learning_rate": 1.9039116368853977e-05, "loss": 0.6381, "step": 6495 }, { "epoch": 0.17, "grad_norm": 2.0471343994140625, "learning_rate": 1.9038761269629345e-05, "loss": 0.6566, "step": 6496 }, { "epoch": 0.17, "grad_norm": 3.3178350925445557, "learning_rate": 1.9038406108115252e-05, "loss": 0.6429, "step": 6497 }, { "epoch": 0.17, "grad_norm": 4.037414073944092, "learning_rate": 1.903805088431414e-05, "loss": 0.6396, "step": 6498 }, { "epoch": 0.17, "grad_norm": 4.680878639221191, "learning_rate": 1.9037695598228466e-05, "loss": 0.997, "step": 6499 }, { "epoch": 0.17, "grad_norm": 2.059652805328369, "learning_rate": 1.9037340249860666e-05, "loss": 0.7511, "step": 6500 }, { "epoch": 0.17, "grad_norm": 2.6820969581604004, "learning_rate": 1.9036984839213194e-05, "loss": 0.6641, "step": 6501 }, { "epoch": 0.17, "grad_norm": 2.605119228363037, "learning_rate": 1.9036629366288504e-05, "loss": 0.5917, "step": 6502 }, { "epoch": 0.17, "grad_norm": 2.720407009124756, "learning_rate": 1.903627383108904e-05, "loss": 0.8038, "step": 6503 }, { "epoch": 0.17, "grad_norm": 3.2325117588043213, "learning_rate": 1.9035918233617255e-05, "loss": 0.7231, "step": 6504 }, { "epoch": 0.17, "grad_norm": 4.771612644195557, "learning_rate": 1.9035562573875597e-05, "loss": 0.7024, "step": 6505 }, { "epoch": 0.17, "grad_norm": 1.515174150466919, "learning_rate": 1.903520685186652e-05, "loss": 0.4378, "step": 6506 }, { "epoch": 0.17, "grad_norm": 2.9359993934631348, "learning_rate": 1.9034851067592473e-05, "loss": 0.6963, "step": 6507 }, { "epoch": 0.17, "grad_norm": 2.3100080490112305, "learning_rate": 1.9034495221055906e-05, "loss": 0.7213, "step": 6508 }, { "epoch": 0.17, "grad_norm": 2.067746162414551, "learning_rate": 1.9034139312259277e-05, "loss": 0.616, "step": 6509 }, { "epoch": 0.17, "grad_norm": 4.130148887634277, "learning_rate": 1.9033783341205035e-05, "loss": 0.7542, "step": 6510 }, { "epoch": 0.17, "grad_norm": 1.417451024055481, "learning_rate": 1.9033427307895633e-05, "loss": 0.5169, "step": 6511 }, { "epoch": 0.17, "grad_norm": 1.7350001335144043, "learning_rate": 1.9033071212333528e-05, "loss": 0.6413, "step": 6512 }, { "epoch": 0.17, "grad_norm": 4.283254623413086, "learning_rate": 1.903271505452117e-05, "loss": 0.617, "step": 6513 }, { "epoch": 0.17, "grad_norm": 2.6177515983581543, "learning_rate": 1.9032358834461013e-05, "loss": 0.6407, "step": 6514 }, { "epoch": 0.17, "grad_norm": 1.9684029817581177, "learning_rate": 1.9032002552155515e-05, "loss": 0.5267, "step": 6515 }, { "epoch": 0.17, "grad_norm": 4.166814804077148, "learning_rate": 1.903164620760713e-05, "loss": 0.7391, "step": 6516 }, { "epoch": 0.17, "grad_norm": 2.0527572631835938, "learning_rate": 1.9031289800818313e-05, "loss": 0.5175, "step": 6517 }, { "epoch": 0.17, "grad_norm": 1.7382841110229492, "learning_rate": 1.903093333179152e-05, "loss": 0.5524, "step": 6518 }, { "epoch": 0.17, "grad_norm": 1.8080445528030396, "learning_rate": 1.903057680052921e-05, "loss": 0.7106, "step": 6519 }, { "epoch": 0.17, "grad_norm": 4.020391464233398, "learning_rate": 1.9030220207033837e-05, "loss": 0.6958, "step": 6520 }, { "epoch": 0.17, "grad_norm": 2.3181960582733154, "learning_rate": 1.902986355130786e-05, "loss": 0.7701, "step": 6521 }, { "epoch": 0.17, "grad_norm": 5.308746814727783, "learning_rate": 1.902950683335374e-05, "loss": 0.7131, "step": 6522 }, { "epoch": 0.17, "grad_norm": 3.942937135696411, "learning_rate": 1.9029150053173927e-05, "loss": 0.6111, "step": 6523 }, { "epoch": 0.17, "grad_norm": 2.6320676803588867, "learning_rate": 1.9028793210770885e-05, "loss": 0.7774, "step": 6524 }, { "epoch": 0.17, "grad_norm": 1.842320203781128, "learning_rate": 1.9028436306147076e-05, "loss": 0.6506, "step": 6525 }, { "epoch": 0.17, "grad_norm": 2.1085805892944336, "learning_rate": 1.9028079339304954e-05, "loss": 0.8332, "step": 6526 }, { "epoch": 0.17, "grad_norm": 4.416774272918701, "learning_rate": 1.902772231024698e-05, "loss": 0.563, "step": 6527 }, { "epoch": 0.17, "grad_norm": 3.1560468673706055, "learning_rate": 1.9027365218975616e-05, "loss": 0.7212, "step": 6528 }, { "epoch": 0.17, "grad_norm": 3.098175287246704, "learning_rate": 1.902700806549332e-05, "loss": 0.6489, "step": 6529 }, { "epoch": 0.17, "grad_norm": 2.0314812660217285, "learning_rate": 1.902665084980256e-05, "loss": 0.7117, "step": 6530 }, { "epoch": 0.17, "grad_norm": 2.7827494144439697, "learning_rate": 1.9026293571905792e-05, "loss": 0.6562, "step": 6531 }, { "epoch": 0.17, "grad_norm": 2.7717761993408203, "learning_rate": 1.9025936231805483e-05, "loss": 0.7323, "step": 6532 }, { "epoch": 0.17, "grad_norm": 1.5453263521194458, "learning_rate": 1.9025578829504088e-05, "loss": 0.7211, "step": 6533 }, { "epoch": 0.17, "grad_norm": 2.2236690521240234, "learning_rate": 1.9025221365004074e-05, "loss": 0.7348, "step": 6534 }, { "epoch": 0.17, "grad_norm": 4.055694580078125, "learning_rate": 1.9024863838307905e-05, "loss": 0.8446, "step": 6535 }, { "epoch": 0.17, "grad_norm": 3.7939867973327637, "learning_rate": 1.9024506249418047e-05, "loss": 0.7175, "step": 6536 }, { "epoch": 0.17, "grad_norm": 1.298556923866272, "learning_rate": 1.902414859833696e-05, "loss": 0.7388, "step": 6537 }, { "epoch": 0.17, "grad_norm": 4.018649578094482, "learning_rate": 1.902379088506711e-05, "loss": 0.7221, "step": 6538 }, { "epoch": 0.17, "grad_norm": 2.0091753005981445, "learning_rate": 1.902343310961096e-05, "loss": 0.4883, "step": 6539 }, { "epoch": 0.17, "grad_norm": 3.595776081085205, "learning_rate": 1.9023075271970986e-05, "loss": 0.7029, "step": 6540 }, { "epoch": 0.17, "grad_norm": 1.8617221117019653, "learning_rate": 1.902271737214964e-05, "loss": 0.5462, "step": 6541 }, { "epoch": 0.17, "grad_norm": 1.6760187149047852, "learning_rate": 1.9022359410149396e-05, "loss": 0.5213, "step": 6542 }, { "epoch": 0.17, "grad_norm": 3.2430975437164307, "learning_rate": 1.902200138597272e-05, "loss": 0.7719, "step": 6543 }, { "epoch": 0.17, "grad_norm": 1.254563570022583, "learning_rate": 1.9021643299622074e-05, "loss": 0.6101, "step": 6544 }, { "epoch": 0.17, "grad_norm": 2.373990297317505, "learning_rate": 1.9021285151099933e-05, "loss": 0.6069, "step": 6545 }, { "epoch": 0.17, "grad_norm": 2.233898401260376, "learning_rate": 1.9020926940408762e-05, "loss": 0.7793, "step": 6546 }, { "epoch": 0.17, "grad_norm": 1.7040679454803467, "learning_rate": 1.9020568667551034e-05, "loss": 0.7586, "step": 6547 }, { "epoch": 0.17, "grad_norm": 2.043668031692505, "learning_rate": 1.902021033252921e-05, "loss": 0.6154, "step": 6548 }, { "epoch": 0.17, "grad_norm": 7.033199787139893, "learning_rate": 1.9019851935345764e-05, "loss": 0.6691, "step": 6549 }, { "epoch": 0.17, "grad_norm": 2.275033950805664, "learning_rate": 1.901949347600317e-05, "loss": 0.6535, "step": 6550 }, { "epoch": 0.17, "grad_norm": 4.801924228668213, "learning_rate": 1.9019134954503886e-05, "loss": 0.7158, "step": 6551 }, { "epoch": 0.17, "grad_norm": 2.6898367404937744, "learning_rate": 1.9018776370850394e-05, "loss": 0.6226, "step": 6552 }, { "epoch": 0.17, "grad_norm": 2.234067678451538, "learning_rate": 1.9018417725045164e-05, "loss": 0.689, "step": 6553 }, { "epoch": 0.17, "grad_norm": 3.254101514816284, "learning_rate": 1.901805901709066e-05, "loss": 0.7401, "step": 6554 }, { "epoch": 0.17, "grad_norm": 3.0397276878356934, "learning_rate": 1.901770024698936e-05, "loss": 0.6276, "step": 6555 }, { "epoch": 0.17, "grad_norm": 2.2042922973632812, "learning_rate": 1.9017341414743737e-05, "loss": 0.6766, "step": 6556 }, { "epoch": 0.17, "grad_norm": 6.209243297576904, "learning_rate": 1.901698252035626e-05, "loss": 0.6432, "step": 6557 }, { "epoch": 0.17, "grad_norm": 2.403803586959839, "learning_rate": 1.901662356382941e-05, "loss": 0.678, "step": 6558 }, { "epoch": 0.17, "grad_norm": 1.7128280401229858, "learning_rate": 1.901626454516565e-05, "loss": 0.5899, "step": 6559 }, { "epoch": 0.17, "grad_norm": 1.6278403997421265, "learning_rate": 1.901590546436746e-05, "loss": 0.7503, "step": 6560 }, { "epoch": 0.17, "grad_norm": 2.00605845451355, "learning_rate": 1.9015546321437315e-05, "loss": 0.5798, "step": 6561 }, { "epoch": 0.17, "grad_norm": 2.045109987258911, "learning_rate": 1.9015187116377687e-05, "loss": 0.6873, "step": 6562 }, { "epoch": 0.17, "grad_norm": 2.820913791656494, "learning_rate": 1.9014827849191055e-05, "loss": 0.7916, "step": 6563 }, { "epoch": 0.17, "grad_norm": 1.668270230293274, "learning_rate": 1.9014468519879893e-05, "loss": 0.6078, "step": 6564 }, { "epoch": 0.17, "grad_norm": 8.582554817199707, "learning_rate": 1.9014109128446678e-05, "loss": 0.9481, "step": 6565 }, { "epoch": 0.17, "grad_norm": 2.291738748550415, "learning_rate": 1.9013749674893885e-05, "loss": 0.5741, "step": 6566 }, { "epoch": 0.17, "grad_norm": 7.087491989135742, "learning_rate": 1.9013390159223992e-05, "loss": 0.6932, "step": 6567 }, { "epoch": 0.17, "grad_norm": 3.019026517868042, "learning_rate": 1.901303058143948e-05, "loss": 0.7161, "step": 6568 }, { "epoch": 0.17, "grad_norm": 3.2260477542877197, "learning_rate": 1.901267094154282e-05, "loss": 0.7821, "step": 6569 }, { "epoch": 0.17, "grad_norm": 4.304914951324463, "learning_rate": 1.9012311239536494e-05, "loss": 0.6693, "step": 6570 }, { "epoch": 0.17, "grad_norm": 1.7464388608932495, "learning_rate": 1.9011951475422984e-05, "loss": 0.7065, "step": 6571 }, { "epoch": 0.17, "grad_norm": 1.3174887895584106, "learning_rate": 1.9011591649204765e-05, "loss": 0.6627, "step": 6572 }, { "epoch": 0.17, "grad_norm": 3.3246045112609863, "learning_rate": 1.9011231760884316e-05, "loss": 0.6656, "step": 6573 }, { "epoch": 0.17, "grad_norm": 4.09409761428833, "learning_rate": 1.901087181046412e-05, "loss": 0.6907, "step": 6574 }, { "epoch": 0.17, "grad_norm": 1.8674262762069702, "learning_rate": 1.9010511797946657e-05, "loss": 0.5238, "step": 6575 }, { "epoch": 0.17, "grad_norm": 2.1127123832702637, "learning_rate": 1.9010151723334413e-05, "loss": 0.8161, "step": 6576 }, { "epoch": 0.17, "grad_norm": 1.869115948677063, "learning_rate": 1.9009791586629857e-05, "loss": 0.5998, "step": 6577 }, { "epoch": 0.17, "grad_norm": 2.1742677688598633, "learning_rate": 1.9009431387835478e-05, "loss": 0.6022, "step": 6578 }, { "epoch": 0.17, "grad_norm": 1.665730595588684, "learning_rate": 1.900907112695376e-05, "loss": 0.5266, "step": 6579 }, { "epoch": 0.17, "grad_norm": 1.8774605989456177, "learning_rate": 1.9008710803987184e-05, "loss": 0.6934, "step": 6580 }, { "epoch": 0.17, "grad_norm": 2.38580584526062, "learning_rate": 1.9008350418938236e-05, "loss": 0.5956, "step": 6581 }, { "epoch": 0.17, "grad_norm": 3.4586660861968994, "learning_rate": 1.9007989971809393e-05, "loss": 0.693, "step": 6582 }, { "epoch": 0.17, "grad_norm": 2.6418395042419434, "learning_rate": 1.900762946260314e-05, "loss": 0.5071, "step": 6583 }, { "epoch": 0.17, "grad_norm": 2.2565581798553467, "learning_rate": 1.9007268891321968e-05, "loss": 0.7182, "step": 6584 }, { "epoch": 0.17, "grad_norm": 2.5975327491760254, "learning_rate": 1.9006908257968358e-05, "loss": 0.6602, "step": 6585 }, { "epoch": 0.17, "grad_norm": 2.170893430709839, "learning_rate": 1.9006547562544793e-05, "loss": 0.5795, "step": 6586 }, { "epoch": 0.17, "grad_norm": 2.9477832317352295, "learning_rate": 1.900618680505376e-05, "loss": 0.6707, "step": 6587 }, { "epoch": 0.17, "grad_norm": 3.33235239982605, "learning_rate": 1.9005825985497747e-05, "loss": 0.7484, "step": 6588 }, { "epoch": 0.17, "grad_norm": 3.23490834236145, "learning_rate": 1.900546510387924e-05, "loss": 0.7294, "step": 6589 }, { "epoch": 0.17, "grad_norm": 1.3343983888626099, "learning_rate": 1.9005104160200723e-05, "loss": 0.621, "step": 6590 }, { "epoch": 0.17, "grad_norm": 2.2329940795898438, "learning_rate": 1.900474315446469e-05, "loss": 0.5752, "step": 6591 }, { "epoch": 0.17, "grad_norm": 2.553973436355591, "learning_rate": 1.900438208667362e-05, "loss": 0.5927, "step": 6592 }, { "epoch": 0.17, "grad_norm": 2.5187454223632812, "learning_rate": 1.900402095683001e-05, "loss": 0.7206, "step": 6593 }, { "epoch": 0.17, "grad_norm": 6.287361145019531, "learning_rate": 1.900365976493634e-05, "loss": 0.6068, "step": 6594 }, { "epoch": 0.17, "grad_norm": 1.84746515750885, "learning_rate": 1.900329851099511e-05, "loss": 0.7226, "step": 6595 }, { "epoch": 0.17, "grad_norm": 2.5805556774139404, "learning_rate": 1.9002937195008796e-05, "loss": 0.5974, "step": 6596 }, { "epoch": 0.17, "grad_norm": 2.5284316539764404, "learning_rate": 1.9002575816979898e-05, "loss": 0.8896, "step": 6597 }, { "epoch": 0.17, "grad_norm": 2.501291513442993, "learning_rate": 1.9002214376910902e-05, "loss": 0.7104, "step": 6598 }, { "epoch": 0.17, "grad_norm": 2.783294677734375, "learning_rate": 1.9001852874804302e-05, "loss": 0.7417, "step": 6599 }, { "epoch": 0.17, "grad_norm": 2.3983306884765625, "learning_rate": 1.9001491310662587e-05, "loss": 0.6598, "step": 6600 }, { "epoch": 0.17, "grad_norm": 1.8974107503890991, "learning_rate": 1.900112968448825e-05, "loss": 0.6109, "step": 6601 }, { "epoch": 0.17, "grad_norm": 3.1281542778015137, "learning_rate": 1.900076799628378e-05, "loss": 0.6235, "step": 6602 }, { "epoch": 0.17, "grad_norm": 1.592514157295227, "learning_rate": 1.9000406246051675e-05, "loss": 0.568, "step": 6603 }, { "epoch": 0.17, "grad_norm": 2.038922071456909, "learning_rate": 1.9000044433794424e-05, "loss": 0.6328, "step": 6604 }, { "epoch": 0.17, "grad_norm": 3.7503855228424072, "learning_rate": 1.8999682559514516e-05, "loss": 0.7616, "step": 6605 }, { "epoch": 0.17, "grad_norm": 1.3753399848937988, "learning_rate": 1.8999320623214456e-05, "loss": 0.5576, "step": 6606 }, { "epoch": 0.17, "grad_norm": 2.8307456970214844, "learning_rate": 1.899895862489673e-05, "loss": 0.775, "step": 6607 }, { "epoch": 0.17, "grad_norm": 1.3541048765182495, "learning_rate": 1.8998596564563835e-05, "loss": 0.7835, "step": 6608 }, { "epoch": 0.17, "grad_norm": 3.213709831237793, "learning_rate": 1.8998234442218267e-05, "loss": 0.7723, "step": 6609 }, { "epoch": 0.17, "grad_norm": 2.6263954639434814, "learning_rate": 1.899787225786252e-05, "loss": 0.5785, "step": 6610 }, { "epoch": 0.17, "grad_norm": 2.432288408279419, "learning_rate": 1.899751001149909e-05, "loss": 0.7712, "step": 6611 }, { "epoch": 0.17, "grad_norm": 1.6684083938598633, "learning_rate": 1.8997147703130472e-05, "loss": 0.6121, "step": 6612 }, { "epoch": 0.17, "grad_norm": 4.491273403167725, "learning_rate": 1.8996785332759167e-05, "loss": 0.4989, "step": 6613 }, { "epoch": 0.17, "grad_norm": 1.9975730180740356, "learning_rate": 1.899642290038767e-05, "loss": 0.5412, "step": 6614 }, { "epoch": 0.17, "grad_norm": 1.9993209838867188, "learning_rate": 1.8996060406018477e-05, "loss": 0.716, "step": 6615 }, { "epoch": 0.17, "grad_norm": 4.881052017211914, "learning_rate": 1.8995697849654088e-05, "loss": 0.6892, "step": 6616 }, { "epoch": 0.17, "grad_norm": 1.8966609239578247, "learning_rate": 1.8995335231297e-05, "loss": 0.6774, "step": 6617 }, { "epoch": 0.17, "grad_norm": 2.0608582496643066, "learning_rate": 1.8994972550949715e-05, "loss": 0.6171, "step": 6618 }, { "epoch": 0.17, "grad_norm": 1.2746747732162476, "learning_rate": 1.899460980861473e-05, "loss": 0.5255, "step": 6619 }, { "epoch": 0.17, "grad_norm": 3.746758222579956, "learning_rate": 1.8994247004294542e-05, "loss": 0.581, "step": 6620 }, { "epoch": 0.17, "grad_norm": 2.288069486618042, "learning_rate": 1.899388413799166e-05, "loss": 0.6072, "step": 6621 }, { "epoch": 0.17, "grad_norm": 1.995316505432129, "learning_rate": 1.8993521209708577e-05, "loss": 0.8781, "step": 6622 }, { "epoch": 0.17, "grad_norm": 1.4412965774536133, "learning_rate": 1.8993158219447797e-05, "loss": 0.6055, "step": 6623 }, { "epoch": 0.17, "grad_norm": 4.827112197875977, "learning_rate": 1.8992795167211817e-05, "loss": 0.7909, "step": 6624 }, { "epoch": 0.17, "grad_norm": 2.5636820793151855, "learning_rate": 1.8992432053003143e-05, "loss": 0.7597, "step": 6625 }, { "epoch": 0.17, "grad_norm": 1.8933531045913696, "learning_rate": 1.8992068876824278e-05, "loss": 0.5707, "step": 6626 }, { "epoch": 0.17, "grad_norm": 1.8796665668487549, "learning_rate": 1.8991705638677723e-05, "loss": 0.4338, "step": 6627 }, { "epoch": 0.17, "grad_norm": 4.64209508895874, "learning_rate": 1.8991342338565985e-05, "loss": 0.6763, "step": 6628 }, { "epoch": 0.17, "grad_norm": 2.1660444736480713, "learning_rate": 1.8990978976491564e-05, "loss": 0.631, "step": 6629 }, { "epoch": 0.17, "grad_norm": 2.236419439315796, "learning_rate": 1.8990615552456964e-05, "loss": 0.7572, "step": 6630 }, { "epoch": 0.17, "grad_norm": 2.6979453563690186, "learning_rate": 1.899025206646469e-05, "loss": 0.8001, "step": 6631 }, { "epoch": 0.17, "grad_norm": 3.1145761013031006, "learning_rate": 1.8989888518517243e-05, "loss": 0.6282, "step": 6632 }, { "epoch": 0.17, "grad_norm": 2.4770150184631348, "learning_rate": 1.8989524908617136e-05, "loss": 0.6806, "step": 6633 }, { "epoch": 0.17, "grad_norm": 2.568605899810791, "learning_rate": 1.8989161236766872e-05, "loss": 0.6132, "step": 6634 }, { "epoch": 0.17, "grad_norm": 1.3836966753005981, "learning_rate": 1.8988797502968956e-05, "loss": 0.681, "step": 6635 }, { "epoch": 0.17, "grad_norm": 2.5868327617645264, "learning_rate": 1.898843370722589e-05, "loss": 0.6612, "step": 6636 }, { "epoch": 0.17, "grad_norm": 3.283088445663452, "learning_rate": 1.898806984954019e-05, "loss": 0.7649, "step": 6637 }, { "epoch": 0.17, "grad_norm": 1.8854109048843384, "learning_rate": 1.898770592991436e-05, "loss": 0.7149, "step": 6638 }, { "epoch": 0.17, "grad_norm": 3.733574867248535, "learning_rate": 1.8987341948350904e-05, "loss": 0.6125, "step": 6639 }, { "epoch": 0.17, "grad_norm": 12.339569091796875, "learning_rate": 1.8986977904852338e-05, "loss": 0.8776, "step": 6640 }, { "epoch": 0.17, "grad_norm": 1.2948534488677979, "learning_rate": 1.8986613799421165e-05, "loss": 0.5233, "step": 6641 }, { "epoch": 0.17, "grad_norm": 2.5355756282806396, "learning_rate": 1.8986249632059897e-05, "loss": 0.7902, "step": 6642 }, { "epoch": 0.17, "grad_norm": 4.3349809646606445, "learning_rate": 1.8985885402771037e-05, "loss": 0.6343, "step": 6643 }, { "epoch": 0.17, "grad_norm": 2.1561193466186523, "learning_rate": 1.8985521111557106e-05, "loss": 0.8146, "step": 6644 }, { "epoch": 0.17, "grad_norm": 3.8595781326293945, "learning_rate": 1.8985156758420608e-05, "loss": 0.5856, "step": 6645 }, { "epoch": 0.17, "grad_norm": 1.924428939819336, "learning_rate": 1.8984792343364053e-05, "loss": 0.5591, "step": 6646 }, { "epoch": 0.17, "grad_norm": 1.9372130632400513, "learning_rate": 1.8984427866389952e-05, "loss": 0.5268, "step": 6647 }, { "epoch": 0.17, "grad_norm": 2.1902222633361816, "learning_rate": 1.8984063327500822e-05, "loss": 0.5532, "step": 6648 }, { "epoch": 0.17, "grad_norm": 3.0420162677764893, "learning_rate": 1.898369872669917e-05, "loss": 0.6227, "step": 6649 }, { "epoch": 0.17, "grad_norm": 7.42888879776001, "learning_rate": 1.8983334063987513e-05, "loss": 0.7391, "step": 6650 }, { "epoch": 0.17, "grad_norm": 4.232893466949463, "learning_rate": 1.8982969339368358e-05, "loss": 0.5284, "step": 6651 }, { "epoch": 0.17, "grad_norm": 2.781567096710205, "learning_rate": 1.8982604552844223e-05, "loss": 0.7248, "step": 6652 }, { "epoch": 0.17, "grad_norm": 6.624143123626709, "learning_rate": 1.8982239704417623e-05, "loss": 0.5778, "step": 6653 }, { "epoch": 0.17, "grad_norm": 3.609942674636841, "learning_rate": 1.8981874794091068e-05, "loss": 0.685, "step": 6654 }, { "epoch": 0.17, "grad_norm": 2.1325347423553467, "learning_rate": 1.8981509821867076e-05, "loss": 0.7549, "step": 6655 }, { "epoch": 0.17, "grad_norm": 3.0991950035095215, "learning_rate": 1.898114478774816e-05, "loss": 0.625, "step": 6656 }, { "epoch": 0.17, "grad_norm": 1.4886860847473145, "learning_rate": 1.8980779691736838e-05, "loss": 0.7803, "step": 6657 }, { "epoch": 0.17, "grad_norm": 3.1500802040100098, "learning_rate": 1.8980414533835624e-05, "loss": 0.7744, "step": 6658 }, { "epoch": 0.17, "grad_norm": 1.8737072944641113, "learning_rate": 1.8980049314047035e-05, "loss": 0.5787, "step": 6659 }, { "epoch": 0.17, "grad_norm": 2.239539623260498, "learning_rate": 1.897968403237359e-05, "loss": 0.7314, "step": 6660 }, { "epoch": 0.17, "grad_norm": 2.547081708908081, "learning_rate": 1.8979318688817803e-05, "loss": 0.8122, "step": 6661 }, { "epoch": 0.17, "grad_norm": 4.44359016418457, "learning_rate": 1.897895328338219e-05, "loss": 0.624, "step": 6662 }, { "epoch": 0.17, "grad_norm": 6.157943248748779, "learning_rate": 1.8978587816069278e-05, "loss": 0.6191, "step": 6663 }, { "epoch": 0.17, "grad_norm": 2.0214359760284424, "learning_rate": 1.8978222286881573e-05, "loss": 0.6587, "step": 6664 }, { "epoch": 0.17, "grad_norm": 1.6465940475463867, "learning_rate": 1.8977856695821604e-05, "loss": 0.6141, "step": 6665 }, { "epoch": 0.17, "grad_norm": 4.199982643127441, "learning_rate": 1.8977491042891886e-05, "loss": 0.6183, "step": 6666 }, { "epoch": 0.17, "grad_norm": 4.27625846862793, "learning_rate": 1.8977125328094943e-05, "loss": 0.7509, "step": 6667 }, { "epoch": 0.17, "grad_norm": 1.2441315650939941, "learning_rate": 1.897675955143329e-05, "loss": 0.6135, "step": 6668 }, { "epoch": 0.17, "grad_norm": 1.5022203922271729, "learning_rate": 1.897639371290945e-05, "loss": 0.6242, "step": 6669 }, { "epoch": 0.17, "grad_norm": 3.3232228755950928, "learning_rate": 1.8976027812525943e-05, "loss": 0.6306, "step": 6670 }, { "epoch": 0.17, "grad_norm": 2.6332898139953613, "learning_rate": 1.897566185028529e-05, "loss": 0.6045, "step": 6671 }, { "epoch": 0.17, "grad_norm": 3.4117753505706787, "learning_rate": 1.897529582619002e-05, "loss": 0.5591, "step": 6672 }, { "epoch": 0.17, "grad_norm": 3.2234387397766113, "learning_rate": 1.8974929740242648e-05, "loss": 0.655, "step": 6673 }, { "epoch": 0.17, "grad_norm": 3.013026475906372, "learning_rate": 1.8974563592445697e-05, "loss": 0.8956, "step": 6674 }, { "epoch": 0.17, "grad_norm": 4.148579120635986, "learning_rate": 1.8974197382801694e-05, "loss": 0.615, "step": 6675 }, { "epoch": 0.17, "grad_norm": 2.224668502807617, "learning_rate": 1.897383111131316e-05, "loss": 0.6952, "step": 6676 }, { "epoch": 0.17, "grad_norm": 3.3865623474121094, "learning_rate": 1.8973464777982617e-05, "loss": 0.6232, "step": 6677 }, { "epoch": 0.17, "grad_norm": 3.5218708515167236, "learning_rate": 1.8973098382812594e-05, "loss": 0.781, "step": 6678 }, { "epoch": 0.17, "grad_norm": 3.272336721420288, "learning_rate": 1.8972731925805616e-05, "loss": 0.6988, "step": 6679 }, { "epoch": 0.17, "grad_norm": 2.131532669067383, "learning_rate": 1.897236540696421e-05, "loss": 0.5847, "step": 6680 }, { "epoch": 0.17, "grad_norm": 4.200900077819824, "learning_rate": 1.897199882629089e-05, "loss": 0.6365, "step": 6681 }, { "epoch": 0.17, "grad_norm": 3.342313289642334, "learning_rate": 1.89716321837882e-05, "loss": 0.8689, "step": 6682 }, { "epoch": 0.17, "grad_norm": 1.7806740999221802, "learning_rate": 1.8971265479458653e-05, "loss": 0.5513, "step": 6683 }, { "epoch": 0.17, "grad_norm": 3.1154327392578125, "learning_rate": 1.897089871330478e-05, "loss": 0.7392, "step": 6684 }, { "epoch": 0.17, "grad_norm": 6.3069047927856445, "learning_rate": 1.8970531885329108e-05, "loss": 0.5769, "step": 6685 }, { "epoch": 0.17, "grad_norm": 2.1653828620910645, "learning_rate": 1.897016499553417e-05, "loss": 0.6053, "step": 6686 }, { "epoch": 0.17, "grad_norm": 1.4657400846481323, "learning_rate": 1.896979804392249e-05, "loss": 0.7035, "step": 6687 }, { "epoch": 0.17, "grad_norm": 1.523435115814209, "learning_rate": 1.8969431030496594e-05, "loss": 0.7222, "step": 6688 }, { "epoch": 0.17, "grad_norm": 1.7735967636108398, "learning_rate": 1.8969063955259018e-05, "loss": 0.7213, "step": 6689 }, { "epoch": 0.17, "grad_norm": 2.4749879837036133, "learning_rate": 1.8968696818212288e-05, "loss": 0.8218, "step": 6690 }, { "epoch": 0.17, "grad_norm": 2.9085028171539307, "learning_rate": 1.8968329619358933e-05, "loss": 0.768, "step": 6691 }, { "epoch": 0.17, "grad_norm": 2.326151132583618, "learning_rate": 1.896796235870148e-05, "loss": 0.5854, "step": 6692 }, { "epoch": 0.17, "grad_norm": 2.312037706375122, "learning_rate": 1.896759503624247e-05, "loss": 0.5705, "step": 6693 }, { "epoch": 0.17, "grad_norm": 2.3896350860595703, "learning_rate": 1.8967227651984432e-05, "loss": 0.5355, "step": 6694 }, { "epoch": 0.17, "grad_norm": 2.8483619689941406, "learning_rate": 1.8966860205929888e-05, "loss": 0.4404, "step": 6695 }, { "epoch": 0.17, "grad_norm": 5.211879730224609, "learning_rate": 1.896649269808138e-05, "loss": 0.6227, "step": 6696 }, { "epoch": 0.17, "grad_norm": 2.8401143550872803, "learning_rate": 1.8966125128441436e-05, "loss": 0.6942, "step": 6697 }, { "epoch": 0.17, "grad_norm": 2.6305058002471924, "learning_rate": 1.8965757497012594e-05, "loss": 0.7887, "step": 6698 }, { "epoch": 0.17, "grad_norm": 1.5619244575500488, "learning_rate": 1.896538980379738e-05, "loss": 0.6197, "step": 6699 }, { "epoch": 0.17, "grad_norm": 5.177735328674316, "learning_rate": 1.8965022048798336e-05, "loss": 0.6879, "step": 6700 }, { "epoch": 0.17, "grad_norm": 2.048140048980713, "learning_rate": 1.896465423201799e-05, "loss": 0.8448, "step": 6701 }, { "epoch": 0.17, "grad_norm": 1.9180835485458374, "learning_rate": 1.8964286353458882e-05, "loss": 0.6104, "step": 6702 }, { "epoch": 0.17, "grad_norm": 1.826891541481018, "learning_rate": 1.896391841312354e-05, "loss": 0.5433, "step": 6703 }, { "epoch": 0.17, "grad_norm": 3.1029064655303955, "learning_rate": 1.8963550411014507e-05, "loss": 0.7064, "step": 6704 }, { "epoch": 0.17, "grad_norm": 3.6849937438964844, "learning_rate": 1.8963182347134318e-05, "loss": 0.744, "step": 6705 }, { "epoch": 0.17, "grad_norm": 2.4586544036865234, "learning_rate": 1.8962814221485505e-05, "loss": 0.6476, "step": 6706 }, { "epoch": 0.17, "grad_norm": 2.5642778873443604, "learning_rate": 1.896244603407061e-05, "loss": 0.5837, "step": 6707 }, { "epoch": 0.17, "grad_norm": 2.8514509201049805, "learning_rate": 1.896207778489216e-05, "loss": 0.5523, "step": 6708 }, { "epoch": 0.17, "grad_norm": 1.3521554470062256, "learning_rate": 1.8961709473952705e-05, "loss": 0.6299, "step": 6709 }, { "epoch": 0.17, "grad_norm": 1.5877290964126587, "learning_rate": 1.896134110125478e-05, "loss": 0.7148, "step": 6710 }, { "epoch": 0.17, "grad_norm": 2.4552972316741943, "learning_rate": 1.896097266680092e-05, "loss": 0.6113, "step": 6711 }, { "epoch": 0.17, "grad_norm": 1.8029255867004395, "learning_rate": 1.896060417059367e-05, "loss": 0.5998, "step": 6712 }, { "epoch": 0.17, "grad_norm": 4.49073600769043, "learning_rate": 1.8960235612635564e-05, "loss": 0.9231, "step": 6713 }, { "epoch": 0.17, "grad_norm": 1.8041764497756958, "learning_rate": 1.8959866992929143e-05, "loss": 0.6589, "step": 6714 }, { "epoch": 0.17, "grad_norm": 1.9480736255645752, "learning_rate": 1.8959498311476944e-05, "loss": 0.74, "step": 6715 }, { "epoch": 0.17, "grad_norm": 4.928186893463135, "learning_rate": 1.8959129568281515e-05, "loss": 0.7769, "step": 6716 }, { "epoch": 0.17, "grad_norm": 3.350118637084961, "learning_rate": 1.8958760763345397e-05, "loss": 0.6445, "step": 6717 }, { "epoch": 0.17, "grad_norm": 1.4078117609024048, "learning_rate": 1.8958391896671124e-05, "loss": 0.5817, "step": 6718 }, { "epoch": 0.17, "grad_norm": 3.374600887298584, "learning_rate": 1.8958022968261242e-05, "loss": 0.6078, "step": 6719 }, { "epoch": 0.17, "grad_norm": 2.5234060287475586, "learning_rate": 1.8957653978118297e-05, "loss": 0.6163, "step": 6720 }, { "epoch": 0.17, "grad_norm": 3.362232208251953, "learning_rate": 1.8957284926244824e-05, "loss": 0.6861, "step": 6721 }, { "epoch": 0.17, "grad_norm": 3.7763547897338867, "learning_rate": 1.8956915812643375e-05, "loss": 0.7685, "step": 6722 }, { "epoch": 0.17, "grad_norm": 1.783625841140747, "learning_rate": 1.8956546637316486e-05, "loss": 0.639, "step": 6723 }, { "epoch": 0.17, "grad_norm": 2.5507900714874268, "learning_rate": 1.895617740026671e-05, "loss": 0.6766, "step": 6724 }, { "epoch": 0.17, "grad_norm": 4.7423095703125, "learning_rate": 1.895580810149658e-05, "loss": 0.7244, "step": 6725 }, { "epoch": 0.17, "grad_norm": 1.5510982275009155, "learning_rate": 1.895543874100865e-05, "loss": 0.5747, "step": 6726 }, { "epoch": 0.17, "grad_norm": 2.4489314556121826, "learning_rate": 1.895506931880546e-05, "loss": 0.7552, "step": 6727 }, { "epoch": 0.17, "grad_norm": 2.432443618774414, "learning_rate": 1.895469983488956e-05, "loss": 0.7144, "step": 6728 }, { "epoch": 0.17, "grad_norm": 2.2129976749420166, "learning_rate": 1.8954330289263498e-05, "loss": 0.7686, "step": 6729 }, { "epoch": 0.17, "grad_norm": 3.9740428924560547, "learning_rate": 1.8953960681929816e-05, "loss": 0.7209, "step": 6730 }, { "epoch": 0.17, "grad_norm": 2.984694004058838, "learning_rate": 1.895359101289106e-05, "loss": 0.4145, "step": 6731 }, { "epoch": 0.17, "grad_norm": 2.532179832458496, "learning_rate": 1.895322128214978e-05, "loss": 0.9131, "step": 6732 }, { "epoch": 0.17, "grad_norm": 2.9563889503479004, "learning_rate": 1.8952851489708526e-05, "loss": 0.621, "step": 6733 }, { "epoch": 0.17, "grad_norm": 2.9365575313568115, "learning_rate": 1.895248163556984e-05, "loss": 0.6581, "step": 6734 }, { "epoch": 0.17, "grad_norm": 2.21455717086792, "learning_rate": 1.895211171973628e-05, "loss": 0.8185, "step": 6735 }, { "epoch": 0.17, "grad_norm": 2.266838788986206, "learning_rate": 1.895174174221039e-05, "loss": 0.5076, "step": 6736 }, { "epoch": 0.17, "grad_norm": 2.5591471195220947, "learning_rate": 1.8951371702994717e-05, "loss": 0.644, "step": 6737 }, { "epoch": 0.17, "grad_norm": 1.7466049194335938, "learning_rate": 1.8951001602091813e-05, "loss": 0.4761, "step": 6738 }, { "epoch": 0.17, "grad_norm": 1.9390724897384644, "learning_rate": 1.8950631439504233e-05, "loss": 0.4954, "step": 6739 }, { "epoch": 0.17, "grad_norm": 1.775019884109497, "learning_rate": 1.8950261215234525e-05, "loss": 0.7648, "step": 6740 }, { "epoch": 0.17, "grad_norm": 4.050777435302734, "learning_rate": 1.8949890929285236e-05, "loss": 0.6312, "step": 6741 }, { "epoch": 0.17, "grad_norm": 2.9985337257385254, "learning_rate": 1.8949520581658928e-05, "loss": 0.6898, "step": 6742 }, { "epoch": 0.17, "grad_norm": 2.0632879734039307, "learning_rate": 1.8949150172358138e-05, "loss": 0.6214, "step": 6743 }, { "epoch": 0.17, "grad_norm": 2.0924363136291504, "learning_rate": 1.8948779701385435e-05, "loss": 0.7359, "step": 6744 }, { "epoch": 0.17, "grad_norm": 1.4310141801834106, "learning_rate": 1.894840916874336e-05, "loss": 0.6301, "step": 6745 }, { "epoch": 0.17, "grad_norm": 1.017709493637085, "learning_rate": 1.8948038574434475e-05, "loss": 0.5932, "step": 6746 }, { "epoch": 0.17, "grad_norm": 1.5931707620620728, "learning_rate": 1.8947667918461326e-05, "loss": 0.5656, "step": 6747 }, { "epoch": 0.17, "grad_norm": 2.943446159362793, "learning_rate": 1.8947297200826473e-05, "loss": 0.6576, "step": 6748 }, { "epoch": 0.17, "grad_norm": 2.578556776046753, "learning_rate": 1.894692642153247e-05, "loss": 0.7814, "step": 6749 }, { "epoch": 0.17, "grad_norm": 1.327722191810608, "learning_rate": 1.8946555580581867e-05, "loss": 0.5403, "step": 6750 }, { "epoch": 0.17, "grad_norm": 2.773562431335449, "learning_rate": 1.8946184677977227e-05, "loss": 0.5701, "step": 6751 }, { "epoch": 0.17, "grad_norm": 2.2911338806152344, "learning_rate": 1.89458137137211e-05, "loss": 0.6196, "step": 6752 }, { "epoch": 0.17, "grad_norm": 3.3812289237976074, "learning_rate": 1.894544268781605e-05, "loss": 0.5301, "step": 6753 }, { "epoch": 0.17, "grad_norm": 1.533324956893921, "learning_rate": 1.8945071600264623e-05, "loss": 0.5101, "step": 6754 }, { "epoch": 0.17, "grad_norm": 1.952677607536316, "learning_rate": 1.8944700451069387e-05, "loss": 0.7162, "step": 6755 }, { "epoch": 0.17, "grad_norm": 2.720183849334717, "learning_rate": 1.8944329240232893e-05, "loss": 0.7562, "step": 6756 }, { "epoch": 0.17, "grad_norm": 4.475295066833496, "learning_rate": 1.8943957967757703e-05, "loss": 0.724, "step": 6757 }, { "epoch": 0.17, "grad_norm": 2.901031494140625, "learning_rate": 1.8943586633646372e-05, "loss": 0.7121, "step": 6758 }, { "epoch": 0.17, "grad_norm": 2.7438809871673584, "learning_rate": 1.894321523790146e-05, "loss": 0.636, "step": 6759 }, { "epoch": 0.17, "grad_norm": 2.299551486968994, "learning_rate": 1.894284378052553e-05, "loss": 0.7735, "step": 6760 }, { "epoch": 0.17, "grad_norm": 4.3258538246154785, "learning_rate": 1.8942472261521138e-05, "loss": 0.6898, "step": 6761 }, { "epoch": 0.17, "grad_norm": 4.200240612030029, "learning_rate": 1.8942100680890846e-05, "loss": 0.8168, "step": 6762 }, { "epoch": 0.17, "grad_norm": 2.2899322509765625, "learning_rate": 1.8941729038637216e-05, "loss": 0.656, "step": 6763 }, { "epoch": 0.17, "grad_norm": 2.910022497177124, "learning_rate": 1.8941357334762803e-05, "loss": 0.8057, "step": 6764 }, { "epoch": 0.17, "grad_norm": 4.5300493240356445, "learning_rate": 1.8940985569270172e-05, "loss": 0.6591, "step": 6765 }, { "epoch": 0.17, "grad_norm": 2.075037717819214, "learning_rate": 1.894061374216189e-05, "loss": 0.5491, "step": 6766 }, { "epoch": 0.17, "grad_norm": 2.140152931213379, "learning_rate": 1.8940241853440512e-05, "loss": 0.6651, "step": 6767 }, { "epoch": 0.17, "grad_norm": 2.1084322929382324, "learning_rate": 1.8939869903108605e-05, "loss": 0.6756, "step": 6768 }, { "epoch": 0.17, "grad_norm": 2.439047336578369, "learning_rate": 1.8939497891168733e-05, "loss": 0.7426, "step": 6769 }, { "epoch": 0.17, "grad_norm": 2.0848941802978516, "learning_rate": 1.8939125817623457e-05, "loss": 0.713, "step": 6770 }, { "epoch": 0.17, "grad_norm": 3.7036144733428955, "learning_rate": 1.893875368247534e-05, "loss": 0.652, "step": 6771 }, { "epoch": 0.17, "grad_norm": 4.527890682220459, "learning_rate": 1.893838148572695e-05, "loss": 0.7878, "step": 6772 }, { "epoch": 0.17, "grad_norm": 1.8600255250930786, "learning_rate": 1.893800922738085e-05, "loss": 0.6567, "step": 6773 }, { "epoch": 0.17, "grad_norm": 3.1728155612945557, "learning_rate": 1.893763690743961e-05, "loss": 0.6853, "step": 6774 }, { "epoch": 0.17, "grad_norm": 1.8455415964126587, "learning_rate": 1.8937264525905786e-05, "loss": 0.6015, "step": 6775 }, { "epoch": 0.17, "grad_norm": 3.9346671104431152, "learning_rate": 1.893689208278195e-05, "loss": 0.8129, "step": 6776 }, { "epoch": 0.17, "grad_norm": 2.16418719291687, "learning_rate": 1.893651957807067e-05, "loss": 0.5359, "step": 6777 }, { "epoch": 0.17, "grad_norm": 1.3683794736862183, "learning_rate": 1.8936147011774512e-05, "loss": 0.696, "step": 6778 }, { "epoch": 0.17, "grad_norm": 4.071057319641113, "learning_rate": 1.893577438389604e-05, "loss": 0.583, "step": 6779 }, { "epoch": 0.17, "grad_norm": 3.9092555046081543, "learning_rate": 1.893540169443783e-05, "loss": 0.6507, "step": 6780 }, { "epoch": 0.17, "grad_norm": 4.822513580322266, "learning_rate": 1.893502894340244e-05, "loss": 0.6085, "step": 6781 }, { "epoch": 0.17, "grad_norm": 3.1490368843078613, "learning_rate": 1.8934656130792448e-05, "loss": 0.5326, "step": 6782 }, { "epoch": 0.17, "grad_norm": 2.118100881576538, "learning_rate": 1.893428325661042e-05, "loss": 0.6957, "step": 6783 }, { "epoch": 0.17, "grad_norm": 2.527818202972412, "learning_rate": 1.8933910320858922e-05, "loss": 0.751, "step": 6784 }, { "epoch": 0.17, "grad_norm": 4.340505123138428, "learning_rate": 1.893353732354053e-05, "loss": 0.5324, "step": 6785 }, { "epoch": 0.17, "grad_norm": 1.3240447044372559, "learning_rate": 1.8933164264657808e-05, "loss": 0.4905, "step": 6786 }, { "epoch": 0.17, "grad_norm": 5.016998291015625, "learning_rate": 1.893279114421333e-05, "loss": 0.9382, "step": 6787 }, { "epoch": 0.17, "grad_norm": 2.368586778640747, "learning_rate": 1.893241796220967e-05, "loss": 0.628, "step": 6788 }, { "epoch": 0.17, "grad_norm": 1.7079503536224365, "learning_rate": 1.8932044718649395e-05, "loss": 0.7763, "step": 6789 }, { "epoch": 0.17, "grad_norm": 2.3647379875183105, "learning_rate": 1.8931671413535082e-05, "loss": 0.4454, "step": 6790 }, { "epoch": 0.17, "grad_norm": 5.178879737854004, "learning_rate": 1.89312980468693e-05, "loss": 0.9485, "step": 6791 }, { "epoch": 0.17, "grad_norm": 1.4642763137817383, "learning_rate": 1.8930924618654624e-05, "loss": 0.5038, "step": 6792 }, { "epoch": 0.17, "grad_norm": 2.6468567848205566, "learning_rate": 1.8930551128893624e-05, "loss": 0.8066, "step": 6793 }, { "epoch": 0.17, "grad_norm": 4.33207368850708, "learning_rate": 1.8930177577588878e-05, "loss": 0.5821, "step": 6794 }, { "epoch": 0.17, "grad_norm": 2.4796459674835205, "learning_rate": 1.892980396474296e-05, "loss": 0.5681, "step": 6795 }, { "epoch": 0.17, "grad_norm": 3.0004756450653076, "learning_rate": 1.892943029035844e-05, "loss": 0.7737, "step": 6796 }, { "epoch": 0.17, "grad_norm": 1.45949125289917, "learning_rate": 1.89290565544379e-05, "loss": 0.6314, "step": 6797 }, { "epoch": 0.17, "grad_norm": 1.8992269039154053, "learning_rate": 1.892868275698391e-05, "loss": 0.6763, "step": 6798 }, { "epoch": 0.17, "grad_norm": 3.701446056365967, "learning_rate": 1.8928308897999047e-05, "loss": 0.6308, "step": 6799 }, { "epoch": 0.17, "grad_norm": 2.686225414276123, "learning_rate": 1.892793497748589e-05, "loss": 0.7312, "step": 6800 }, { "epoch": 0.17, "grad_norm": 5.090061187744141, "learning_rate": 1.8927560995447013e-05, "loss": 0.6363, "step": 6801 }, { "epoch": 0.17, "grad_norm": 3.908444881439209, "learning_rate": 1.8927186951884997e-05, "loss": 0.6904, "step": 6802 }, { "epoch": 0.17, "grad_norm": 3.425057888031006, "learning_rate": 1.8926812846802415e-05, "loss": 0.7127, "step": 6803 }, { "epoch": 0.17, "grad_norm": 2.020098924636841, "learning_rate": 1.8926438680201846e-05, "loss": 0.6294, "step": 6804 }, { "epoch": 0.17, "grad_norm": 4.033981800079346, "learning_rate": 1.892606445208587e-05, "loss": 0.7424, "step": 6805 }, { "epoch": 0.17, "grad_norm": 3.901740550994873, "learning_rate": 1.892569016245707e-05, "loss": 0.6745, "step": 6806 }, { "epoch": 0.17, "grad_norm": 2.148331880569458, "learning_rate": 1.8925315811318014e-05, "loss": 0.6683, "step": 6807 }, { "epoch": 0.17, "grad_norm": 1.6284221410751343, "learning_rate": 1.8924941398671294e-05, "loss": 0.6252, "step": 6808 }, { "epoch": 0.17, "grad_norm": 2.1541481018066406, "learning_rate": 1.8924566924519486e-05, "loss": 0.4919, "step": 6809 }, { "epoch": 0.17, "grad_norm": 1.4615370035171509, "learning_rate": 1.8924192388865168e-05, "loss": 0.6361, "step": 6810 }, { "epoch": 0.17, "grad_norm": 1.9135491847991943, "learning_rate": 1.892381779171092e-05, "loss": 0.7209, "step": 6811 }, { "epoch": 0.17, "grad_norm": 1.6367746591567993, "learning_rate": 1.892344313305933e-05, "loss": 0.736, "step": 6812 }, { "epoch": 0.17, "grad_norm": 1.3669100999832153, "learning_rate": 1.8923068412912974e-05, "loss": 0.6621, "step": 6813 }, { "epoch": 0.17, "grad_norm": 1.7281641960144043, "learning_rate": 1.892269363127444e-05, "loss": 0.6353, "step": 6814 }, { "epoch": 0.17, "grad_norm": 1.7907304763793945, "learning_rate": 1.8922318788146305e-05, "loss": 0.6148, "step": 6815 }, { "epoch": 0.17, "grad_norm": 1.785261631011963, "learning_rate": 1.8921943883531153e-05, "loss": 0.7251, "step": 6816 }, { "epoch": 0.17, "grad_norm": 2.622429847717285, "learning_rate": 1.892156891743157e-05, "loss": 0.7189, "step": 6817 }, { "epoch": 0.17, "grad_norm": 2.1851425170898438, "learning_rate": 1.892119388985014e-05, "loss": 0.5583, "step": 6818 }, { "epoch": 0.17, "grad_norm": 2.9637272357940674, "learning_rate": 1.8920818800789447e-05, "loss": 0.5377, "step": 6819 }, { "epoch": 0.17, "grad_norm": 1.8880890607833862, "learning_rate": 1.8920443650252073e-05, "loss": 0.5967, "step": 6820 }, { "epoch": 0.17, "grad_norm": 1.4495265483856201, "learning_rate": 1.8920068438240608e-05, "loss": 0.6249, "step": 6821 }, { "epoch": 0.17, "grad_norm": 2.7276341915130615, "learning_rate": 1.8919693164757634e-05, "loss": 0.5301, "step": 6822 }, { "epoch": 0.17, "grad_norm": 2.479097366333008, "learning_rate": 1.891931782980574e-05, "loss": 0.545, "step": 6823 }, { "epoch": 0.17, "grad_norm": 4.087993144989014, "learning_rate": 1.891894243338751e-05, "loss": 0.6913, "step": 6824 }, { "epoch": 0.17, "grad_norm": 1.624500036239624, "learning_rate": 1.8918566975505534e-05, "loss": 0.7815, "step": 6825 }, { "epoch": 0.17, "grad_norm": 1.828986406326294, "learning_rate": 1.8918191456162394e-05, "loss": 0.68, "step": 6826 }, { "epoch": 0.17, "grad_norm": 1.9675512313842773, "learning_rate": 1.8917815875360685e-05, "loss": 0.6032, "step": 6827 }, { "epoch": 0.18, "grad_norm": 2.5465245246887207, "learning_rate": 1.8917440233102988e-05, "loss": 0.6729, "step": 6828 }, { "epoch": 0.18, "grad_norm": 3.0066535472869873, "learning_rate": 1.89170645293919e-05, "loss": 0.7682, "step": 6829 }, { "epoch": 0.18, "grad_norm": 2.3743395805358887, "learning_rate": 1.8916688764230003e-05, "loss": 0.6003, "step": 6830 }, { "epoch": 0.18, "grad_norm": 3.107645034790039, "learning_rate": 1.891631293761989e-05, "loss": 0.5202, "step": 6831 }, { "epoch": 0.18, "grad_norm": 6.336981773376465, "learning_rate": 1.8915937049564147e-05, "loss": 0.6131, "step": 6832 }, { "epoch": 0.18, "grad_norm": 2.206523895263672, "learning_rate": 1.8915561100065368e-05, "loss": 0.6384, "step": 6833 }, { "epoch": 0.18, "grad_norm": 2.795966148376465, "learning_rate": 1.8915185089126145e-05, "loss": 0.5522, "step": 6834 }, { "epoch": 0.18, "grad_norm": 3.6553728580474854, "learning_rate": 1.891480901674907e-05, "loss": 0.7921, "step": 6835 }, { "epoch": 0.18, "grad_norm": 2.501502513885498, "learning_rate": 1.891443288293673e-05, "loss": 0.7079, "step": 6836 }, { "epoch": 0.18, "grad_norm": 2.1386775970458984, "learning_rate": 1.8914056687691717e-05, "loss": 0.6107, "step": 6837 }, { "epoch": 0.18, "grad_norm": 2.3838794231414795, "learning_rate": 1.891368043101663e-05, "loss": 0.7269, "step": 6838 }, { "epoch": 0.18, "grad_norm": 2.538062572479248, "learning_rate": 1.891330411291405e-05, "loss": 0.5734, "step": 6839 }, { "epoch": 0.18, "grad_norm": 1.3884599208831787, "learning_rate": 1.8912927733386584e-05, "loss": 0.5751, "step": 6840 }, { "epoch": 0.18, "grad_norm": 3.6726319789886475, "learning_rate": 1.891255129243682e-05, "loss": 0.6345, "step": 6841 }, { "epoch": 0.18, "grad_norm": 2.7742044925689697, "learning_rate": 1.891217479006735e-05, "loss": 0.6424, "step": 6842 }, { "epoch": 0.18, "grad_norm": 1.3449196815490723, "learning_rate": 1.891179822628077e-05, "loss": 0.5747, "step": 6843 }, { "epoch": 0.18, "grad_norm": 2.3490147590637207, "learning_rate": 1.891142160107968e-05, "loss": 0.5008, "step": 6844 }, { "epoch": 0.18, "grad_norm": 1.4372568130493164, "learning_rate": 1.8911044914466666e-05, "loss": 0.5855, "step": 6845 }, { "epoch": 0.18, "grad_norm": 3.2226884365081787, "learning_rate": 1.891066816644433e-05, "loss": 0.6809, "step": 6846 }, { "epoch": 0.18, "grad_norm": 9.263650894165039, "learning_rate": 1.8910291357015266e-05, "loss": 0.6793, "step": 6847 }, { "epoch": 0.18, "grad_norm": 3.1795382499694824, "learning_rate": 1.8909914486182073e-05, "loss": 0.6241, "step": 6848 }, { "epoch": 0.18, "grad_norm": 2.7295374870300293, "learning_rate": 1.890953755394735e-05, "loss": 0.5973, "step": 6849 }, { "epoch": 0.18, "grad_norm": 1.8940999507904053, "learning_rate": 1.890916056031369e-05, "loss": 0.6546, "step": 6850 }, { "epoch": 0.18, "grad_norm": 5.151452541351318, "learning_rate": 1.890878350528369e-05, "loss": 0.7171, "step": 6851 }, { "epoch": 0.18, "grad_norm": 4.009024620056152, "learning_rate": 1.890840638885995e-05, "loss": 0.6162, "step": 6852 }, { "epoch": 0.18, "grad_norm": 3.1892828941345215, "learning_rate": 1.8908029211045078e-05, "loss": 0.6387, "step": 6853 }, { "epoch": 0.18, "grad_norm": 3.001573324203491, "learning_rate": 1.890765197184166e-05, "loss": 0.8978, "step": 6854 }, { "epoch": 0.18, "grad_norm": 1.9460982084274292, "learning_rate": 1.89072746712523e-05, "loss": 0.6689, "step": 6855 }, { "epoch": 0.18, "grad_norm": 2.359464645385742, "learning_rate": 1.8906897309279602e-05, "loss": 0.5725, "step": 6856 }, { "epoch": 0.18, "grad_norm": 2.5622055530548096, "learning_rate": 1.890651988592616e-05, "loss": 0.6484, "step": 6857 }, { "epoch": 0.18, "grad_norm": 4.565415859222412, "learning_rate": 1.890614240119458e-05, "loss": 0.7058, "step": 6858 }, { "epoch": 0.18, "grad_norm": 2.4940364360809326, "learning_rate": 1.8905764855087464e-05, "loss": 0.4144, "step": 6859 }, { "epoch": 0.18, "grad_norm": 4.29351806640625, "learning_rate": 1.890538724760741e-05, "loss": 0.7821, "step": 6860 }, { "epoch": 0.18, "grad_norm": 3.0671474933624268, "learning_rate": 1.8905009578757024e-05, "loss": 0.5435, "step": 6861 }, { "epoch": 0.18, "grad_norm": 2.3069605827331543, "learning_rate": 1.8904631848538904e-05, "loss": 0.5968, "step": 6862 }, { "epoch": 0.18, "grad_norm": 2.7088072299957275, "learning_rate": 1.8904254056955656e-05, "loss": 0.701, "step": 6863 }, { "epoch": 0.18, "grad_norm": 1.5247999429702759, "learning_rate": 1.8903876204009886e-05, "loss": 0.5931, "step": 6864 }, { "epoch": 0.18, "grad_norm": 3.465364456176758, "learning_rate": 1.890349828970419e-05, "loss": 0.7852, "step": 6865 }, { "epoch": 0.18, "grad_norm": 4.155904293060303, "learning_rate": 1.8903120314041182e-05, "loss": 0.7472, "step": 6866 }, { "epoch": 0.18, "grad_norm": 2.407978057861328, "learning_rate": 1.8902742277023463e-05, "loss": 0.7684, "step": 6867 }, { "epoch": 0.18, "grad_norm": 1.8580766916275024, "learning_rate": 1.8902364178653633e-05, "loss": 0.5048, "step": 6868 }, { "epoch": 0.18, "grad_norm": 1.8213435411453247, "learning_rate": 1.8901986018934305e-05, "loss": 0.6879, "step": 6869 }, { "epoch": 0.18, "grad_norm": 1.7784587144851685, "learning_rate": 1.8901607797868083e-05, "loss": 0.6157, "step": 6870 }, { "epoch": 0.18, "grad_norm": 1.9390615224838257, "learning_rate": 1.890122951545757e-05, "loss": 0.6798, "step": 6871 }, { "epoch": 0.18, "grad_norm": 3.5501785278320312, "learning_rate": 1.890085117170538e-05, "loss": 0.6382, "step": 6872 }, { "epoch": 0.18, "grad_norm": 2.169100522994995, "learning_rate": 1.8900472766614108e-05, "loss": 0.638, "step": 6873 }, { "epoch": 0.18, "grad_norm": 1.9914076328277588, "learning_rate": 1.8900094300186375e-05, "loss": 0.7231, "step": 6874 }, { "epoch": 0.18, "grad_norm": 1.5474839210510254, "learning_rate": 1.8899715772424783e-05, "loss": 0.6273, "step": 6875 }, { "epoch": 0.18, "grad_norm": 1.8810534477233887, "learning_rate": 1.889933718333194e-05, "loss": 0.5961, "step": 6876 }, { "epoch": 0.18, "grad_norm": 2.1316661834716797, "learning_rate": 1.889895853291046e-05, "loss": 0.5299, "step": 6877 }, { "epoch": 0.18, "grad_norm": 2.5972354412078857, "learning_rate": 1.8898579821162943e-05, "loss": 0.6276, "step": 6878 }, { "epoch": 0.18, "grad_norm": 4.925384044647217, "learning_rate": 1.8898201048092007e-05, "loss": 0.6734, "step": 6879 }, { "epoch": 0.18, "grad_norm": 2.338610887527466, "learning_rate": 1.8897822213700264e-05, "loss": 0.5763, "step": 6880 }, { "epoch": 0.18, "grad_norm": 2.618967056274414, "learning_rate": 1.8897443317990315e-05, "loss": 0.5415, "step": 6881 }, { "epoch": 0.18, "grad_norm": 2.0070433616638184, "learning_rate": 1.889706436096478e-05, "loss": 0.6973, "step": 6882 }, { "epoch": 0.18, "grad_norm": 1.635716199874878, "learning_rate": 1.889668534262626e-05, "loss": 0.5963, "step": 6883 }, { "epoch": 0.18, "grad_norm": 1.7926521301269531, "learning_rate": 1.889630626297738e-05, "loss": 0.6376, "step": 6884 }, { "epoch": 0.18, "grad_norm": 4.156677722930908, "learning_rate": 1.8895927122020748e-05, "loss": 0.6626, "step": 6885 }, { "epoch": 0.18, "grad_norm": 3.2099769115448, "learning_rate": 1.8895547919758973e-05, "loss": 0.6578, "step": 6886 }, { "epoch": 0.18, "grad_norm": 2.165163040161133, "learning_rate": 1.889516865619467e-05, "loss": 0.5919, "step": 6887 }, { "epoch": 0.18, "grad_norm": 1.2652658224105835, "learning_rate": 1.8894789331330453e-05, "loss": 0.4764, "step": 6888 }, { "epoch": 0.18, "grad_norm": 2.2797954082489014, "learning_rate": 1.889440994516894e-05, "loss": 0.6598, "step": 6889 }, { "epoch": 0.18, "grad_norm": 2.0305774211883545, "learning_rate": 1.8894030497712736e-05, "loss": 0.7842, "step": 6890 }, { "epoch": 0.18, "grad_norm": 1.5291388034820557, "learning_rate": 1.8893650988964466e-05, "loss": 0.5815, "step": 6891 }, { "epoch": 0.18, "grad_norm": 1.7481091022491455, "learning_rate": 1.889327141892674e-05, "loss": 0.4771, "step": 6892 }, { "epoch": 0.18, "grad_norm": 1.0108165740966797, "learning_rate": 1.8892891787602173e-05, "loss": 0.6013, "step": 6893 }, { "epoch": 0.18, "grad_norm": 6.990487098693848, "learning_rate": 1.889251209499338e-05, "loss": 0.5026, "step": 6894 }, { "epoch": 0.18, "grad_norm": 1.2616549730300903, "learning_rate": 1.8892132341102986e-05, "loss": 0.622, "step": 6895 }, { "epoch": 0.18, "grad_norm": 1.5124467611312866, "learning_rate": 1.88917525259336e-05, "loss": 0.6774, "step": 6896 }, { "epoch": 0.18, "grad_norm": 1.581620454788208, "learning_rate": 1.8891372649487842e-05, "loss": 0.5951, "step": 6897 }, { "epoch": 0.18, "grad_norm": 1.7247252464294434, "learning_rate": 1.889099271176833e-05, "loss": 0.6289, "step": 6898 }, { "epoch": 0.18, "grad_norm": 5.001400947570801, "learning_rate": 1.8890612712777684e-05, "loss": 0.9582, "step": 6899 }, { "epoch": 0.18, "grad_norm": 1.9855918884277344, "learning_rate": 1.8890232652518517e-05, "loss": 0.69, "step": 6900 }, { "epoch": 0.18, "grad_norm": 1.8299198150634766, "learning_rate": 1.8889852530993454e-05, "loss": 0.6288, "step": 6901 }, { "epoch": 0.18, "grad_norm": 3.1597235202789307, "learning_rate": 1.8889472348205117e-05, "loss": 0.8229, "step": 6902 }, { "epoch": 0.18, "grad_norm": 1.9018930196762085, "learning_rate": 1.8889092104156114e-05, "loss": 0.5965, "step": 6903 }, { "epoch": 0.18, "grad_norm": 1.7309292554855347, "learning_rate": 1.8888711798849076e-05, "loss": 0.6059, "step": 6904 }, { "epoch": 0.18, "grad_norm": 3.449871778488159, "learning_rate": 1.888833143228662e-05, "loss": 0.5764, "step": 6905 }, { "epoch": 0.18, "grad_norm": 2.5005953311920166, "learning_rate": 1.888795100447137e-05, "loss": 0.6397, "step": 6906 }, { "epoch": 0.18, "grad_norm": 2.2154221534729004, "learning_rate": 1.8887570515405945e-05, "loss": 0.6852, "step": 6907 }, { "epoch": 0.18, "grad_norm": 4.079555988311768, "learning_rate": 1.8887189965092966e-05, "loss": 0.6094, "step": 6908 }, { "epoch": 0.18, "grad_norm": 2.1597843170166016, "learning_rate": 1.8886809353535057e-05, "loss": 0.5439, "step": 6909 }, { "epoch": 0.18, "grad_norm": 1.5983113050460815, "learning_rate": 1.8886428680734846e-05, "loss": 0.5756, "step": 6910 }, { "epoch": 0.18, "grad_norm": 1.2668179273605347, "learning_rate": 1.888604794669495e-05, "loss": 0.5361, "step": 6911 }, { "epoch": 0.18, "grad_norm": 3.261906385421753, "learning_rate": 1.888566715141799e-05, "loss": 0.613, "step": 6912 }, { "epoch": 0.18, "grad_norm": 9.973724365234375, "learning_rate": 1.8885286294906598e-05, "loss": 0.8051, "step": 6913 }, { "epoch": 0.18, "grad_norm": 2.1815574169158936, "learning_rate": 1.888490537716339e-05, "loss": 0.7139, "step": 6914 }, { "epoch": 0.18, "grad_norm": 2.1716861724853516, "learning_rate": 1.8884524398191003e-05, "loss": 0.784, "step": 6915 }, { "epoch": 0.18, "grad_norm": 2.9123849868774414, "learning_rate": 1.888414335799205e-05, "loss": 0.5687, "step": 6916 }, { "epoch": 0.18, "grad_norm": 1.8946187496185303, "learning_rate": 1.8883762256569166e-05, "loss": 0.6759, "step": 6917 }, { "epoch": 0.18, "grad_norm": 6.8803935050964355, "learning_rate": 1.8883381093924974e-05, "loss": 0.564, "step": 6918 }, { "epoch": 0.18, "grad_norm": 2.197770118713379, "learning_rate": 1.88829998700621e-05, "loss": 0.5216, "step": 6919 }, { "epoch": 0.18, "grad_norm": 1.5553340911865234, "learning_rate": 1.888261858498317e-05, "loss": 0.7704, "step": 6920 }, { "epoch": 0.18, "grad_norm": 2.1592395305633545, "learning_rate": 1.8882237238690816e-05, "loss": 0.8404, "step": 6921 }, { "epoch": 0.18, "grad_norm": 1.7550297975540161, "learning_rate": 1.8881855831187664e-05, "loss": 0.6557, "step": 6922 }, { "epoch": 0.18, "grad_norm": 1.788972020149231, "learning_rate": 1.8881474362476336e-05, "loss": 0.6339, "step": 6923 }, { "epoch": 0.18, "grad_norm": 4.286524772644043, "learning_rate": 1.8881092832559474e-05, "loss": 0.6509, "step": 6924 }, { "epoch": 0.18, "grad_norm": 1.843530535697937, "learning_rate": 1.8880711241439695e-05, "loss": 0.6094, "step": 6925 }, { "epoch": 0.18, "grad_norm": 3.017632007598877, "learning_rate": 1.8880329589119633e-05, "loss": 0.6198, "step": 6926 }, { "epoch": 0.18, "grad_norm": 1.9032353162765503, "learning_rate": 1.8879947875601922e-05, "loss": 0.6879, "step": 6927 }, { "epoch": 0.18, "grad_norm": 1.467015027999878, "learning_rate": 1.8879566100889188e-05, "loss": 0.5328, "step": 6928 }, { "epoch": 0.18, "grad_norm": 1.869411587715149, "learning_rate": 1.8879184264984063e-05, "loss": 0.5307, "step": 6929 }, { "epoch": 0.18, "grad_norm": 7.094916820526123, "learning_rate": 1.887880236788918e-05, "loss": 0.6754, "step": 6930 }, { "epoch": 0.18, "grad_norm": 2.3370554447174072, "learning_rate": 1.8878420409607166e-05, "loss": 0.7158, "step": 6931 }, { "epoch": 0.18, "grad_norm": 1.7855212688446045, "learning_rate": 1.8878038390140657e-05, "loss": 0.6861, "step": 6932 }, { "epoch": 0.18, "grad_norm": 2.7510733604431152, "learning_rate": 1.8877656309492287e-05, "loss": 0.6736, "step": 6933 }, { "epoch": 0.18, "grad_norm": 3.6158626079559326, "learning_rate": 1.8877274167664685e-05, "loss": 0.7658, "step": 6934 }, { "epoch": 0.18, "grad_norm": 1.6803607940673828, "learning_rate": 1.8876891964660488e-05, "loss": 0.6384, "step": 6935 }, { "epoch": 0.18, "grad_norm": 1.8552333116531372, "learning_rate": 1.887650970048233e-05, "loss": 0.6759, "step": 6936 }, { "epoch": 0.18, "grad_norm": 4.559374809265137, "learning_rate": 1.887612737513284e-05, "loss": 0.6946, "step": 6937 }, { "epoch": 0.18, "grad_norm": 1.7101472616195679, "learning_rate": 1.8875744988614664e-05, "loss": 0.6619, "step": 6938 }, { "epoch": 0.18, "grad_norm": 4.227534770965576, "learning_rate": 1.887536254093042e-05, "loss": 0.7947, "step": 6939 }, { "epoch": 0.18, "grad_norm": 3.495814085006714, "learning_rate": 1.8874980032082762e-05, "loss": 0.6733, "step": 6940 }, { "epoch": 0.18, "grad_norm": 5.12890100479126, "learning_rate": 1.8874597462074314e-05, "loss": 0.6854, "step": 6941 }, { "epoch": 0.18, "grad_norm": 3.833350896835327, "learning_rate": 1.8874214830907715e-05, "loss": 0.6, "step": 6942 }, { "epoch": 0.18, "grad_norm": 1.248268485069275, "learning_rate": 1.8873832138585605e-05, "loss": 0.6291, "step": 6943 }, { "epoch": 0.18, "grad_norm": 6.853400230407715, "learning_rate": 1.8873449385110616e-05, "loss": 0.7616, "step": 6944 }, { "epoch": 0.18, "grad_norm": 2.3596255779266357, "learning_rate": 1.8873066570485388e-05, "loss": 0.7206, "step": 6945 }, { "epoch": 0.18, "grad_norm": 1.739287257194519, "learning_rate": 1.8872683694712567e-05, "loss": 0.591, "step": 6946 }, { "epoch": 0.18, "grad_norm": 2.109755277633667, "learning_rate": 1.8872300757794776e-05, "loss": 0.7472, "step": 6947 }, { "epoch": 0.18, "grad_norm": 1.9628822803497314, "learning_rate": 1.8871917759734667e-05, "loss": 0.5338, "step": 6948 }, { "epoch": 0.18, "grad_norm": 1.5729297399520874, "learning_rate": 1.8871534700534876e-05, "loss": 0.6921, "step": 6949 }, { "epoch": 0.18, "grad_norm": 2.6244776248931885, "learning_rate": 1.887115158019804e-05, "loss": 0.5922, "step": 6950 }, { "epoch": 0.18, "grad_norm": 2.4242217540740967, "learning_rate": 1.8870768398726796e-05, "loss": 0.5763, "step": 6951 }, { "epoch": 0.18, "grad_norm": 3.2783989906311035, "learning_rate": 1.8870385156123792e-05, "loss": 0.725, "step": 6952 }, { "epoch": 0.18, "grad_norm": 3.361985683441162, "learning_rate": 1.8870001852391667e-05, "loss": 0.5587, "step": 6953 }, { "epoch": 0.18, "grad_norm": 2.030291795730591, "learning_rate": 1.8869618487533064e-05, "loss": 0.6053, "step": 6954 }, { "epoch": 0.18, "grad_norm": 3.458559274673462, "learning_rate": 1.886923506155062e-05, "loss": 0.7118, "step": 6955 }, { "epoch": 0.18, "grad_norm": 2.8792245388031006, "learning_rate": 1.886885157444698e-05, "loss": 0.7145, "step": 6956 }, { "epoch": 0.18, "grad_norm": 3.4449431896209717, "learning_rate": 1.886846802622479e-05, "loss": 0.6612, "step": 6957 }, { "epoch": 0.18, "grad_norm": 5.379483222961426, "learning_rate": 1.886808441688669e-05, "loss": 0.5361, "step": 6958 }, { "epoch": 0.18, "grad_norm": 1.761633038520813, "learning_rate": 1.886770074643532e-05, "loss": 0.6609, "step": 6959 }, { "epoch": 0.18, "grad_norm": 1.8968994617462158, "learning_rate": 1.886731701487333e-05, "loss": 0.6968, "step": 6960 }, { "epoch": 0.18, "grad_norm": 1.557165265083313, "learning_rate": 1.886693322220336e-05, "loss": 0.5931, "step": 6961 }, { "epoch": 0.18, "grad_norm": 1.8934545516967773, "learning_rate": 1.886654936842806e-05, "loss": 0.5038, "step": 6962 }, { "epoch": 0.18, "grad_norm": 1.2003443241119385, "learning_rate": 1.8866165453550073e-05, "loss": 0.5761, "step": 6963 }, { "epoch": 0.18, "grad_norm": 7.237086296081543, "learning_rate": 1.886578147757204e-05, "loss": 0.8787, "step": 6964 }, { "epoch": 0.18, "grad_norm": 2.6351468563079834, "learning_rate": 1.886539744049661e-05, "loss": 0.5819, "step": 6965 }, { "epoch": 0.18, "grad_norm": 2.7041149139404297, "learning_rate": 1.8865013342326436e-05, "loss": 0.7818, "step": 6966 }, { "epoch": 0.18, "grad_norm": 2.4337430000305176, "learning_rate": 1.8864629183064157e-05, "loss": 0.6931, "step": 6967 }, { "epoch": 0.18, "grad_norm": 4.3224639892578125, "learning_rate": 1.8864244962712425e-05, "loss": 0.8215, "step": 6968 }, { "epoch": 0.18, "grad_norm": 2.3739991188049316, "learning_rate": 1.8863860681273883e-05, "loss": 0.5464, "step": 6969 }, { "epoch": 0.18, "grad_norm": 1.6858665943145752, "learning_rate": 1.8863476338751184e-05, "loss": 0.6677, "step": 6970 }, { "epoch": 0.18, "grad_norm": 1.409415602684021, "learning_rate": 1.886309193514697e-05, "loss": 0.6988, "step": 6971 }, { "epoch": 0.18, "grad_norm": 3.4959561824798584, "learning_rate": 1.8862707470463902e-05, "loss": 0.8296, "step": 6972 }, { "epoch": 0.18, "grad_norm": 5.706629753112793, "learning_rate": 1.8862322944704615e-05, "loss": 0.7074, "step": 6973 }, { "epoch": 0.18, "grad_norm": 1.9837589263916016, "learning_rate": 1.8861938357871773e-05, "loss": 0.5941, "step": 6974 }, { "epoch": 0.18, "grad_norm": 1.6028517484664917, "learning_rate": 1.8861553709968014e-05, "loss": 0.6991, "step": 6975 }, { "epoch": 0.18, "grad_norm": 1.9434573650360107, "learning_rate": 1.8861169000996e-05, "loss": 0.5195, "step": 6976 }, { "epoch": 0.18, "grad_norm": 2.514036178588867, "learning_rate": 1.886078423095837e-05, "loss": 0.6968, "step": 6977 }, { "epoch": 0.18, "grad_norm": 1.5446069240570068, "learning_rate": 1.8860399399857783e-05, "loss": 0.7367, "step": 6978 }, { "epoch": 0.18, "grad_norm": 4.049993515014648, "learning_rate": 1.8860014507696896e-05, "loss": 0.7075, "step": 6979 }, { "epoch": 0.18, "grad_norm": 2.691239356994629, "learning_rate": 1.885962955447835e-05, "loss": 0.7134, "step": 6980 }, { "epoch": 0.18, "grad_norm": 1.8292630910873413, "learning_rate": 1.88592445402048e-05, "loss": 0.7338, "step": 6981 }, { "epoch": 0.18, "grad_norm": 1.76642906665802, "learning_rate": 1.885885946487891e-05, "loss": 0.5352, "step": 6982 }, { "epoch": 0.18, "grad_norm": 1.4968565702438354, "learning_rate": 1.8858474328503322e-05, "loss": 0.6031, "step": 6983 }, { "epoch": 0.18, "grad_norm": 2.692919969558716, "learning_rate": 1.8858089131080698e-05, "loss": 0.7003, "step": 6984 }, { "epoch": 0.18, "grad_norm": 1.4122029542922974, "learning_rate": 1.8857703872613683e-05, "loss": 0.5753, "step": 6985 }, { "epoch": 0.18, "grad_norm": 1.922975778579712, "learning_rate": 1.8857318553104945e-05, "loss": 0.6406, "step": 6986 }, { "epoch": 0.18, "grad_norm": 6.434741020202637, "learning_rate": 1.8856933172557128e-05, "loss": 0.796, "step": 6987 }, { "epoch": 0.18, "grad_norm": 6.277744293212891, "learning_rate": 1.8856547730972893e-05, "loss": 0.6243, "step": 6988 }, { "epoch": 0.18, "grad_norm": 1.6966465711593628, "learning_rate": 1.8856162228354896e-05, "loss": 0.6635, "step": 6989 }, { "epoch": 0.18, "grad_norm": 1.5572075843811035, "learning_rate": 1.885577666470579e-05, "loss": 0.5181, "step": 6990 }, { "epoch": 0.18, "grad_norm": 1.4922593832015991, "learning_rate": 1.8855391040028237e-05, "loss": 0.535, "step": 6991 }, { "epoch": 0.18, "grad_norm": 2.6889967918395996, "learning_rate": 1.8855005354324892e-05, "loss": 0.7958, "step": 6992 }, { "epoch": 0.18, "grad_norm": 4.644407749176025, "learning_rate": 1.8854619607598413e-05, "loss": 0.6825, "step": 6993 }, { "epoch": 0.18, "grad_norm": 1.8558317422866821, "learning_rate": 1.885423379985146e-05, "loss": 0.7575, "step": 6994 }, { "epoch": 0.18, "grad_norm": 3.443253993988037, "learning_rate": 1.8853847931086687e-05, "loss": 0.5835, "step": 6995 }, { "epoch": 0.18, "grad_norm": 1.3159524202346802, "learning_rate": 1.8853462001306763e-05, "loss": 0.7057, "step": 6996 }, { "epoch": 0.18, "grad_norm": 2.064189910888672, "learning_rate": 1.8853076010514337e-05, "loss": 0.5574, "step": 6997 }, { "epoch": 0.18, "grad_norm": 1.60892653465271, "learning_rate": 1.8852689958712073e-05, "loss": 0.6909, "step": 6998 }, { "epoch": 0.18, "grad_norm": 2.061678886413574, "learning_rate": 1.8852303845902633e-05, "loss": 0.7035, "step": 6999 }, { "epoch": 0.18, "grad_norm": 2.6059021949768066, "learning_rate": 1.8851917672088675e-05, "loss": 0.6396, "step": 7000 }, { "epoch": 0.18, "grad_norm": 1.5631499290466309, "learning_rate": 1.885153143727286e-05, "loss": 0.6991, "step": 7001 }, { "epoch": 0.18, "grad_norm": 3.229142427444458, "learning_rate": 1.8851145141457853e-05, "loss": 0.6312, "step": 7002 }, { "epoch": 0.18, "grad_norm": 5.406753063201904, "learning_rate": 1.885075878464632e-05, "loss": 0.7801, "step": 7003 }, { "epoch": 0.18, "grad_norm": 2.9348766803741455, "learning_rate": 1.885037236684091e-05, "loss": 0.5107, "step": 7004 }, { "epoch": 0.18, "grad_norm": 2.2807669639587402, "learning_rate": 1.8849985888044297e-05, "loss": 0.6147, "step": 7005 }, { "epoch": 0.18, "grad_norm": 1.4980217218399048, "learning_rate": 1.8849599348259145e-05, "loss": 0.6936, "step": 7006 }, { "epoch": 0.18, "grad_norm": 2.572844982147217, "learning_rate": 1.884921274748811e-05, "loss": 0.709, "step": 7007 }, { "epoch": 0.18, "grad_norm": 2.1256368160247803, "learning_rate": 1.884882608573386e-05, "loss": 0.7752, "step": 7008 }, { "epoch": 0.18, "grad_norm": 2.067915678024292, "learning_rate": 1.8848439362999064e-05, "loss": 0.5982, "step": 7009 }, { "epoch": 0.18, "grad_norm": 2.8747875690460205, "learning_rate": 1.8848052579286377e-05, "loss": 0.5919, "step": 7010 }, { "epoch": 0.18, "grad_norm": 2.3123559951782227, "learning_rate": 1.8847665734598475e-05, "loss": 0.7095, "step": 7011 }, { "epoch": 0.18, "grad_norm": 2.7738771438598633, "learning_rate": 1.8847278828938018e-05, "loss": 0.7024, "step": 7012 }, { "epoch": 0.18, "grad_norm": 1.9039063453674316, "learning_rate": 1.8846891862307675e-05, "loss": 0.7695, "step": 7013 }, { "epoch": 0.18, "grad_norm": 1.9041597843170166, "learning_rate": 1.884650483471011e-05, "loss": 0.6688, "step": 7014 }, { "epoch": 0.18, "grad_norm": 3.3156471252441406, "learning_rate": 1.884611774614799e-05, "loss": 0.598, "step": 7015 }, { "epoch": 0.18, "grad_norm": 2.425046920776367, "learning_rate": 1.8845730596623987e-05, "loss": 0.7573, "step": 7016 }, { "epoch": 0.18, "grad_norm": 2.2514147758483887, "learning_rate": 1.8845343386140767e-05, "loss": 0.544, "step": 7017 }, { "epoch": 0.18, "grad_norm": 1.2014422416687012, "learning_rate": 1.8844956114700993e-05, "loss": 0.6447, "step": 7018 }, { "epoch": 0.18, "grad_norm": 2.3324437141418457, "learning_rate": 1.8844568782307343e-05, "loss": 0.631, "step": 7019 }, { "epoch": 0.18, "grad_norm": 5.058338642120361, "learning_rate": 1.8844181388962482e-05, "loss": 0.6966, "step": 7020 }, { "epoch": 0.18, "grad_norm": 2.464299440383911, "learning_rate": 1.8843793934669078e-05, "loss": 0.6758, "step": 7021 }, { "epoch": 0.18, "grad_norm": 8.547632217407227, "learning_rate": 1.88434064194298e-05, "loss": 0.7037, "step": 7022 }, { "epoch": 0.18, "grad_norm": 2.75186824798584, "learning_rate": 1.884301884324732e-05, "loss": 0.504, "step": 7023 }, { "epoch": 0.18, "grad_norm": 2.029172897338867, "learning_rate": 1.8842631206124315e-05, "loss": 0.6044, "step": 7024 }, { "epoch": 0.18, "grad_norm": 1.5031981468200684, "learning_rate": 1.884224350806345e-05, "loss": 0.6921, "step": 7025 }, { "epoch": 0.18, "grad_norm": 2.4084720611572266, "learning_rate": 1.8841855749067394e-05, "loss": 0.7137, "step": 7026 }, { "epoch": 0.18, "grad_norm": 2.526651382446289, "learning_rate": 1.8841467929138824e-05, "loss": 0.9172, "step": 7027 }, { "epoch": 0.18, "grad_norm": 7.470376014709473, "learning_rate": 1.8841080048280414e-05, "loss": 0.6844, "step": 7028 }, { "epoch": 0.18, "grad_norm": 2.7690236568450928, "learning_rate": 1.8840692106494836e-05, "loss": 0.7919, "step": 7029 }, { "epoch": 0.18, "grad_norm": 1.746815800666809, "learning_rate": 1.8840304103784762e-05, "loss": 0.5976, "step": 7030 }, { "epoch": 0.18, "grad_norm": 1.548338770866394, "learning_rate": 1.8839916040152864e-05, "loss": 0.5768, "step": 7031 }, { "epoch": 0.18, "grad_norm": 1.5735039710998535, "learning_rate": 1.8839527915601818e-05, "loss": 0.5573, "step": 7032 }, { "epoch": 0.18, "grad_norm": 2.2759337425231934, "learning_rate": 1.88391397301343e-05, "loss": 0.4495, "step": 7033 }, { "epoch": 0.18, "grad_norm": 1.7022181749343872, "learning_rate": 1.8838751483752987e-05, "loss": 0.7012, "step": 7034 }, { "epoch": 0.18, "grad_norm": 9.597500801086426, "learning_rate": 1.883836317646055e-05, "loss": 0.9408, "step": 7035 }, { "epoch": 0.18, "grad_norm": 1.6989185810089111, "learning_rate": 1.8837974808259666e-05, "loss": 0.6126, "step": 7036 }, { "epoch": 0.18, "grad_norm": 1.451682686805725, "learning_rate": 1.8837586379153014e-05, "loss": 0.5501, "step": 7037 }, { "epoch": 0.18, "grad_norm": 3.37680721282959, "learning_rate": 1.8837197889143266e-05, "loss": 0.6196, "step": 7038 }, { "epoch": 0.18, "grad_norm": 2.702983856201172, "learning_rate": 1.8836809338233104e-05, "loss": 0.7545, "step": 7039 }, { "epoch": 0.18, "grad_norm": 3.1602323055267334, "learning_rate": 1.8836420726425203e-05, "loss": 0.8318, "step": 7040 }, { "epoch": 0.18, "grad_norm": 3.165003776550293, "learning_rate": 1.8836032053722243e-05, "loss": 0.631, "step": 7041 }, { "epoch": 0.18, "grad_norm": 2.0305373668670654, "learning_rate": 1.88356433201269e-05, "loss": 0.7301, "step": 7042 }, { "epoch": 0.18, "grad_norm": 2.3399553298950195, "learning_rate": 1.8835254525641854e-05, "loss": 0.67, "step": 7043 }, { "epoch": 0.18, "grad_norm": 1.9427553415298462, "learning_rate": 1.8834865670269788e-05, "loss": 0.7877, "step": 7044 }, { "epoch": 0.18, "grad_norm": 3.3237082958221436, "learning_rate": 1.8834476754013376e-05, "loss": 0.599, "step": 7045 }, { "epoch": 0.18, "grad_norm": 2.4108848571777344, "learning_rate": 1.88340877768753e-05, "loss": 0.6637, "step": 7046 }, { "epoch": 0.18, "grad_norm": 1.5440186262130737, "learning_rate": 1.8833698738858244e-05, "loss": 0.5548, "step": 7047 }, { "epoch": 0.18, "grad_norm": 15.91136360168457, "learning_rate": 1.8833309639964884e-05, "loss": 0.696, "step": 7048 }, { "epoch": 0.18, "grad_norm": 1.4119359254837036, "learning_rate": 1.88329204801979e-05, "loss": 0.6774, "step": 7049 }, { "epoch": 0.18, "grad_norm": 3.0161314010620117, "learning_rate": 1.8832531259559987e-05, "loss": 0.6523, "step": 7050 }, { "epoch": 0.18, "grad_norm": 1.8757946491241455, "learning_rate": 1.8832141978053807e-05, "loss": 0.5985, "step": 7051 }, { "epoch": 0.18, "grad_norm": 3.0036697387695312, "learning_rate": 1.8831752635682062e-05, "loss": 0.6855, "step": 7052 }, { "epoch": 0.18, "grad_norm": 3.2807464599609375, "learning_rate": 1.8831363232447423e-05, "loss": 0.6661, "step": 7053 }, { "epoch": 0.18, "grad_norm": 1.182355523109436, "learning_rate": 1.8830973768352578e-05, "loss": 0.6279, "step": 7054 }, { "epoch": 0.18, "grad_norm": 1.4036049842834473, "learning_rate": 1.8830584243400208e-05, "loss": 0.8058, "step": 7055 }, { "epoch": 0.18, "grad_norm": 1.9529963731765747, "learning_rate": 1.8830194657592997e-05, "loss": 0.7262, "step": 7056 }, { "epoch": 0.18, "grad_norm": 1.8439189195632935, "learning_rate": 1.8829805010933638e-05, "loss": 0.6812, "step": 7057 }, { "epoch": 0.18, "grad_norm": 1.9221885204315186, "learning_rate": 1.8829415303424804e-05, "loss": 0.5798, "step": 7058 }, { "epoch": 0.18, "grad_norm": 1.5296473503112793, "learning_rate": 1.8829025535069192e-05, "loss": 0.6889, "step": 7059 }, { "epoch": 0.18, "grad_norm": 2.080380916595459, "learning_rate": 1.8828635705869483e-05, "loss": 0.5412, "step": 7060 }, { "epoch": 0.18, "grad_norm": 1.3507658243179321, "learning_rate": 1.8828245815828362e-05, "loss": 0.6438, "step": 7061 }, { "epoch": 0.18, "grad_norm": 1.8375884294509888, "learning_rate": 1.8827855864948517e-05, "loss": 0.7166, "step": 7062 }, { "epoch": 0.18, "grad_norm": 1.8484315872192383, "learning_rate": 1.8827465853232635e-05, "loss": 0.5541, "step": 7063 }, { "epoch": 0.18, "grad_norm": 1.172337293624878, "learning_rate": 1.8827075780683403e-05, "loss": 0.7162, "step": 7064 }, { "epoch": 0.18, "grad_norm": 6.632951259613037, "learning_rate": 1.8826685647303513e-05, "loss": 0.6871, "step": 7065 }, { "epoch": 0.18, "grad_norm": 2.9566328525543213, "learning_rate": 1.882629545309565e-05, "loss": 0.8879, "step": 7066 }, { "epoch": 0.18, "grad_norm": 2.0210158824920654, "learning_rate": 1.8825905198062506e-05, "loss": 0.6102, "step": 7067 }, { "epoch": 0.18, "grad_norm": 2.7635364532470703, "learning_rate": 1.8825514882206766e-05, "loss": 0.6158, "step": 7068 }, { "epoch": 0.18, "grad_norm": 1.7631278038024902, "learning_rate": 1.8825124505531123e-05, "loss": 0.647, "step": 7069 }, { "epoch": 0.18, "grad_norm": 3.3310110569000244, "learning_rate": 1.8824734068038266e-05, "loss": 0.5895, "step": 7070 }, { "epoch": 0.18, "grad_norm": 3.612362861633301, "learning_rate": 1.8824343569730884e-05, "loss": 0.6423, "step": 7071 }, { "epoch": 0.18, "grad_norm": 5.785120487213135, "learning_rate": 1.8823953010611676e-05, "loss": 0.7774, "step": 7072 }, { "epoch": 0.18, "grad_norm": 2.351191997528076, "learning_rate": 1.882356239068332e-05, "loss": 0.6631, "step": 7073 }, { "epoch": 0.18, "grad_norm": 3.269443988800049, "learning_rate": 1.8823171709948522e-05, "loss": 0.5947, "step": 7074 }, { "epoch": 0.18, "grad_norm": 1.8216049671173096, "learning_rate": 1.8822780968409967e-05, "loss": 0.613, "step": 7075 }, { "epoch": 0.18, "grad_norm": 3.6106317043304443, "learning_rate": 1.8822390166070344e-05, "loss": 0.4723, "step": 7076 }, { "epoch": 0.18, "grad_norm": 2.1772561073303223, "learning_rate": 1.8821999302932356e-05, "loss": 0.4854, "step": 7077 }, { "epoch": 0.18, "grad_norm": 3.4348928928375244, "learning_rate": 1.8821608378998687e-05, "loss": 0.6098, "step": 7078 }, { "epoch": 0.18, "grad_norm": 12.269613265991211, "learning_rate": 1.8821217394272036e-05, "loss": 0.6368, "step": 7079 }, { "epoch": 0.18, "grad_norm": 1.9735616445541382, "learning_rate": 1.88208263487551e-05, "loss": 0.7615, "step": 7080 }, { "epoch": 0.18, "grad_norm": 1.2865262031555176, "learning_rate": 1.8820435242450566e-05, "loss": 0.5564, "step": 7081 }, { "epoch": 0.18, "grad_norm": 1.768738865852356, "learning_rate": 1.8820044075361138e-05, "loss": 0.4111, "step": 7082 }, { "epoch": 0.18, "grad_norm": 1.4001693725585938, "learning_rate": 1.8819652847489506e-05, "loss": 0.5253, "step": 7083 }, { "epoch": 0.18, "grad_norm": 8.448812484741211, "learning_rate": 1.8819261558838367e-05, "loss": 0.5964, "step": 7084 }, { "epoch": 0.18, "grad_norm": 1.2148640155792236, "learning_rate": 1.8818870209410422e-05, "loss": 0.6117, "step": 7085 }, { "epoch": 0.18, "grad_norm": 6.039222240447998, "learning_rate": 1.881847879920836e-05, "loss": 0.8809, "step": 7086 }, { "epoch": 0.18, "grad_norm": 2.93327260017395, "learning_rate": 1.8818087328234882e-05, "loss": 0.6867, "step": 7087 }, { "epoch": 0.18, "grad_norm": 2.0776731967926025, "learning_rate": 1.881769579649269e-05, "loss": 0.5565, "step": 7088 }, { "epoch": 0.18, "grad_norm": 3.1204092502593994, "learning_rate": 1.8817304203984475e-05, "loss": 0.6648, "step": 7089 }, { "epoch": 0.18, "grad_norm": 2.2731404304504395, "learning_rate": 1.881691255071294e-05, "loss": 0.7545, "step": 7090 }, { "epoch": 0.18, "grad_norm": 2.0242373943328857, "learning_rate": 1.881652083668078e-05, "loss": 0.6254, "step": 7091 }, { "epoch": 0.18, "grad_norm": 1.22919762134552, "learning_rate": 1.8816129061890702e-05, "loss": 0.6294, "step": 7092 }, { "epoch": 0.18, "grad_norm": 3.8402397632598877, "learning_rate": 1.88157372263454e-05, "loss": 0.622, "step": 7093 }, { "epoch": 0.18, "grad_norm": 3.8748526573181152, "learning_rate": 1.8815345330047576e-05, "loss": 0.7495, "step": 7094 }, { "epoch": 0.18, "grad_norm": 2.598315477371216, "learning_rate": 1.881495337299993e-05, "loss": 0.7153, "step": 7095 }, { "epoch": 0.18, "grad_norm": 3.8085803985595703, "learning_rate": 1.881456135520516e-05, "loss": 0.6251, "step": 7096 }, { "epoch": 0.18, "grad_norm": 1.667300820350647, "learning_rate": 1.8814169276665977e-05, "loss": 0.6192, "step": 7097 }, { "epoch": 0.18, "grad_norm": 4.112527370452881, "learning_rate": 1.881377713738507e-05, "loss": 0.6742, "step": 7098 }, { "epoch": 0.18, "grad_norm": 2.8411595821380615, "learning_rate": 1.8813384937365157e-05, "loss": 0.6014, "step": 7099 }, { "epoch": 0.18, "grad_norm": 3.4013900756835938, "learning_rate": 1.8812992676608925e-05, "loss": 0.6609, "step": 7100 }, { "epoch": 0.18, "grad_norm": 3.4535815715789795, "learning_rate": 1.881260035511909e-05, "loss": 0.397, "step": 7101 }, { "epoch": 0.18, "grad_norm": 1.60044264793396, "learning_rate": 1.8812207972898345e-05, "loss": 0.6638, "step": 7102 }, { "epoch": 0.18, "grad_norm": 3.224581241607666, "learning_rate": 1.88118155299494e-05, "loss": 0.538, "step": 7103 }, { "epoch": 0.18, "grad_norm": 1.6304396390914917, "learning_rate": 1.881142302627496e-05, "loss": 0.6718, "step": 7104 }, { "epoch": 0.18, "grad_norm": 1.2704269886016846, "learning_rate": 1.8811030461877727e-05, "loss": 0.7669, "step": 7105 }, { "epoch": 0.18, "grad_norm": 1.4599701166152954, "learning_rate": 1.8810637836760406e-05, "loss": 0.4768, "step": 7106 }, { "epoch": 0.18, "grad_norm": 5.172676086425781, "learning_rate": 1.881024515092571e-05, "loss": 0.5079, "step": 7107 }, { "epoch": 0.18, "grad_norm": 2.2903966903686523, "learning_rate": 1.8809852404376337e-05, "loss": 0.6277, "step": 7108 }, { "epoch": 0.18, "grad_norm": 2.654711961746216, "learning_rate": 1.8809459597114993e-05, "loss": 0.5816, "step": 7109 }, { "epoch": 0.18, "grad_norm": 2.6419150829315186, "learning_rate": 1.8809066729144392e-05, "loss": 0.7185, "step": 7110 }, { "epoch": 0.18, "grad_norm": 1.8990998268127441, "learning_rate": 1.8808673800467238e-05, "loss": 0.6862, "step": 7111 }, { "epoch": 0.18, "grad_norm": 4.52529764175415, "learning_rate": 1.8808280811086234e-05, "loss": 0.6037, "step": 7112 }, { "epoch": 0.18, "grad_norm": 3.021843433380127, "learning_rate": 1.8807887761004094e-05, "loss": 0.5129, "step": 7113 }, { "epoch": 0.18, "grad_norm": 2.3498473167419434, "learning_rate": 1.8807494650223527e-05, "loss": 0.7933, "step": 7114 }, { "epoch": 0.18, "grad_norm": 3.4580066204071045, "learning_rate": 1.880710147874724e-05, "loss": 0.6291, "step": 7115 }, { "epoch": 0.18, "grad_norm": 3.258012056350708, "learning_rate": 1.880670824657794e-05, "loss": 0.6213, "step": 7116 }, { "epoch": 0.18, "grad_norm": 1.5580867528915405, "learning_rate": 1.8806314953718344e-05, "loss": 0.6402, "step": 7117 }, { "epoch": 0.18, "grad_norm": 1.6349207162857056, "learning_rate": 1.8805921600171158e-05, "loss": 0.6696, "step": 7118 }, { "epoch": 0.18, "grad_norm": 4.440701484680176, "learning_rate": 1.8805528185939092e-05, "loss": 0.5936, "step": 7119 }, { "epoch": 0.18, "grad_norm": 13.005300521850586, "learning_rate": 1.8805134711024854e-05, "loss": 0.6306, "step": 7120 }, { "epoch": 0.18, "grad_norm": 2.43143892288208, "learning_rate": 1.8804741175431164e-05, "loss": 0.7318, "step": 7121 }, { "epoch": 0.18, "grad_norm": 1.8731893301010132, "learning_rate": 1.8804347579160723e-05, "loss": 0.6321, "step": 7122 }, { "epoch": 0.18, "grad_norm": 2.622314453125, "learning_rate": 1.8803953922216255e-05, "loss": 0.6151, "step": 7123 }, { "epoch": 0.18, "grad_norm": 3.4856090545654297, "learning_rate": 1.8803560204600467e-05, "loss": 0.7051, "step": 7124 }, { "epoch": 0.18, "grad_norm": 3.222376585006714, "learning_rate": 1.8803166426316074e-05, "loss": 0.6621, "step": 7125 }, { "epoch": 0.18, "grad_norm": 2.0158987045288086, "learning_rate": 1.880277258736579e-05, "loss": 0.7152, "step": 7126 }, { "epoch": 0.18, "grad_norm": 4.758250713348389, "learning_rate": 1.8802378687752326e-05, "loss": 0.7294, "step": 7127 }, { "epoch": 0.18, "grad_norm": 1.4672266244888306, "learning_rate": 1.8801984727478397e-05, "loss": 0.408, "step": 7128 }, { "epoch": 0.18, "grad_norm": 2.1574273109436035, "learning_rate": 1.8801590706546718e-05, "loss": 0.5446, "step": 7129 }, { "epoch": 0.18, "grad_norm": 0.8756296634674072, "learning_rate": 1.880119662496001e-05, "loss": 0.5515, "step": 7130 }, { "epoch": 0.18, "grad_norm": 6.84871244430542, "learning_rate": 1.880080248272098e-05, "loss": 0.5968, "step": 7131 }, { "epoch": 0.18, "grad_norm": 2.8510019779205322, "learning_rate": 1.8800408279832353e-05, "loss": 0.6541, "step": 7132 }, { "epoch": 0.18, "grad_norm": 2.475616931915283, "learning_rate": 1.880001401629684e-05, "loss": 0.7192, "step": 7133 }, { "epoch": 0.18, "grad_norm": 3.110597610473633, "learning_rate": 1.8799619692117158e-05, "loss": 0.7519, "step": 7134 }, { "epoch": 0.18, "grad_norm": 2.534210205078125, "learning_rate": 1.8799225307296024e-05, "loss": 0.5473, "step": 7135 }, { "epoch": 0.18, "grad_norm": 3.1768789291381836, "learning_rate": 1.8798830861836158e-05, "loss": 0.6482, "step": 7136 }, { "epoch": 0.18, "grad_norm": 1.988564133644104, "learning_rate": 1.879843635574028e-05, "loss": 0.4549, "step": 7137 }, { "epoch": 0.18, "grad_norm": 2.054069995880127, "learning_rate": 1.87980417890111e-05, "loss": 0.6294, "step": 7138 }, { "epoch": 0.18, "grad_norm": 2.4163568019866943, "learning_rate": 1.8797647161651352e-05, "loss": 0.6988, "step": 7139 }, { "epoch": 0.18, "grad_norm": 2.840513229370117, "learning_rate": 1.8797252473663744e-05, "loss": 0.6359, "step": 7140 }, { "epoch": 0.18, "grad_norm": 3.182309627532959, "learning_rate": 1.8796857725050996e-05, "loss": 0.6801, "step": 7141 }, { "epoch": 0.18, "grad_norm": 2.6961140632629395, "learning_rate": 1.8796462915815832e-05, "loss": 0.58, "step": 7142 }, { "epoch": 0.18, "grad_norm": 3.0528180599212646, "learning_rate": 1.8796068045960974e-05, "loss": 0.7318, "step": 7143 }, { "epoch": 0.18, "grad_norm": 2.159916400909424, "learning_rate": 1.879567311548914e-05, "loss": 0.6988, "step": 7144 }, { "epoch": 0.18, "grad_norm": 3.618906259536743, "learning_rate": 1.8795278124403057e-05, "loss": 0.6618, "step": 7145 }, { "epoch": 0.18, "grad_norm": 5.305885314941406, "learning_rate": 1.8794883072705438e-05, "loss": 0.753, "step": 7146 }, { "epoch": 0.18, "grad_norm": 2.1078953742980957, "learning_rate": 1.8794487960399012e-05, "loss": 0.6788, "step": 7147 }, { "epoch": 0.18, "grad_norm": 1.8703590631484985, "learning_rate": 1.87940927874865e-05, "loss": 0.5466, "step": 7148 }, { "epoch": 0.18, "grad_norm": 3.81264066696167, "learning_rate": 1.8793697553970624e-05, "loss": 0.6158, "step": 7149 }, { "epoch": 0.18, "grad_norm": 2.8847954273223877, "learning_rate": 1.879330225985411e-05, "loss": 0.6781, "step": 7150 }, { "epoch": 0.18, "grad_norm": 2.044072389602661, "learning_rate": 1.8792906905139686e-05, "loss": 0.6454, "step": 7151 }, { "epoch": 0.18, "grad_norm": 2.15262508392334, "learning_rate": 1.8792511489830067e-05, "loss": 0.6289, "step": 7152 }, { "epoch": 0.18, "grad_norm": 1.048783302307129, "learning_rate": 1.8792116013927983e-05, "loss": 0.4707, "step": 7153 }, { "epoch": 0.18, "grad_norm": 3.2114009857177734, "learning_rate": 1.8791720477436158e-05, "loss": 0.6262, "step": 7154 }, { "epoch": 0.18, "grad_norm": 1.9486242532730103, "learning_rate": 1.8791324880357324e-05, "loss": 0.6777, "step": 7155 }, { "epoch": 0.18, "grad_norm": 2.771759510040283, "learning_rate": 1.8790929222694197e-05, "loss": 0.8516, "step": 7156 }, { "epoch": 0.18, "grad_norm": 3.007333755493164, "learning_rate": 1.8790533504449512e-05, "loss": 0.7171, "step": 7157 }, { "epoch": 0.18, "grad_norm": 2.1460940837860107, "learning_rate": 1.879013772562599e-05, "loss": 0.6315, "step": 7158 }, { "epoch": 0.18, "grad_norm": 2.6518545150756836, "learning_rate": 1.8789741886226364e-05, "loss": 0.6746, "step": 7159 }, { "epoch": 0.18, "grad_norm": 3.888805866241455, "learning_rate": 1.878934598625336e-05, "loss": 0.6615, "step": 7160 }, { "epoch": 0.18, "grad_norm": 1.986065149307251, "learning_rate": 1.8788950025709702e-05, "loss": 0.66, "step": 7161 }, { "epoch": 0.18, "grad_norm": 2.0954036712646484, "learning_rate": 1.8788554004598124e-05, "loss": 0.6426, "step": 7162 }, { "epoch": 0.18, "grad_norm": 2.106424331665039, "learning_rate": 1.8788157922921353e-05, "loss": 0.6301, "step": 7163 }, { "epoch": 0.18, "grad_norm": 2.627645969390869, "learning_rate": 1.878776178068212e-05, "loss": 0.7873, "step": 7164 }, { "epoch": 0.18, "grad_norm": 2.511326789855957, "learning_rate": 1.8787365577883153e-05, "loss": 0.7735, "step": 7165 }, { "epoch": 0.18, "grad_norm": 1.2682385444641113, "learning_rate": 1.8786969314527182e-05, "loss": 0.7097, "step": 7166 }, { "epoch": 0.18, "grad_norm": 1.7372835874557495, "learning_rate": 1.878657299061694e-05, "loss": 0.5593, "step": 7167 }, { "epoch": 0.18, "grad_norm": 1.9876846075057983, "learning_rate": 1.8786176606155155e-05, "loss": 0.621, "step": 7168 }, { "epoch": 0.18, "grad_norm": 1.7744486331939697, "learning_rate": 1.8785780161144563e-05, "loss": 0.6555, "step": 7169 }, { "epoch": 0.18, "grad_norm": 3.9105031490325928, "learning_rate": 1.8785383655587894e-05, "loss": 0.8738, "step": 7170 }, { "epoch": 0.18, "grad_norm": 2.316488265991211, "learning_rate": 1.8784987089487884e-05, "loss": 0.7819, "step": 7171 }, { "epoch": 0.18, "grad_norm": 2.666544198989868, "learning_rate": 1.8784590462847255e-05, "loss": 0.5773, "step": 7172 }, { "epoch": 0.18, "grad_norm": 2.197589635848999, "learning_rate": 1.8784193775668754e-05, "loss": 0.6191, "step": 7173 }, { "epoch": 0.18, "grad_norm": 3.9862887859344482, "learning_rate": 1.8783797027955102e-05, "loss": 0.6322, "step": 7174 }, { "epoch": 0.18, "grad_norm": 4.07805871963501, "learning_rate": 1.8783400219709044e-05, "loss": 0.6664, "step": 7175 }, { "epoch": 0.18, "grad_norm": 3.9648423194885254, "learning_rate": 1.878300335093331e-05, "loss": 0.6029, "step": 7176 }, { "epoch": 0.18, "grad_norm": 1.5765032768249512, "learning_rate": 1.8782606421630635e-05, "loss": 0.6668, "step": 7177 }, { "epoch": 0.18, "grad_norm": 1.7147200107574463, "learning_rate": 1.878220943180375e-05, "loss": 0.5981, "step": 7178 }, { "epoch": 0.18, "grad_norm": 3.930269956588745, "learning_rate": 1.87818123814554e-05, "loss": 0.7804, "step": 7179 }, { "epoch": 0.18, "grad_norm": 1.0487830638885498, "learning_rate": 1.878141527058831e-05, "loss": 0.4977, "step": 7180 }, { "epoch": 0.18, "grad_norm": 1.8847140073776245, "learning_rate": 1.8781018099205232e-05, "loss": 0.5515, "step": 7181 }, { "epoch": 0.18, "grad_norm": 1.56197190284729, "learning_rate": 1.878062086730889e-05, "loss": 0.5556, "step": 7182 }, { "epoch": 0.18, "grad_norm": 2.1682755947113037, "learning_rate": 1.8780223574902024e-05, "loss": 0.6684, "step": 7183 }, { "epoch": 0.18, "grad_norm": 1.6041877269744873, "learning_rate": 1.8779826221987373e-05, "loss": 0.5561, "step": 7184 }, { "epoch": 0.18, "grad_norm": 2.712358236312866, "learning_rate": 1.8779428808567675e-05, "loss": 0.7615, "step": 7185 }, { "epoch": 0.18, "grad_norm": 4.785501956939697, "learning_rate": 1.8779031334645674e-05, "loss": 0.8187, "step": 7186 }, { "epoch": 0.18, "grad_norm": 1.4801892042160034, "learning_rate": 1.87786338002241e-05, "loss": 0.7391, "step": 7187 }, { "epoch": 0.18, "grad_norm": 4.139397621154785, "learning_rate": 1.8778236205305703e-05, "loss": 0.7049, "step": 7188 }, { "epoch": 0.18, "grad_norm": 3.9249377250671387, "learning_rate": 1.8777838549893212e-05, "loss": 0.6069, "step": 7189 }, { "epoch": 0.18, "grad_norm": 2.2758326530456543, "learning_rate": 1.8777440833989375e-05, "loss": 0.5866, "step": 7190 }, { "epoch": 0.18, "grad_norm": 1.5452709197998047, "learning_rate": 1.877704305759693e-05, "loss": 0.6542, "step": 7191 }, { "epoch": 0.18, "grad_norm": 0.9335570335388184, "learning_rate": 1.8776645220718618e-05, "loss": 0.5742, "step": 7192 }, { "epoch": 0.18, "grad_norm": 2.3085904121398926, "learning_rate": 1.877624732335718e-05, "loss": 0.7029, "step": 7193 }, { "epoch": 0.18, "grad_norm": 1.2616307735443115, "learning_rate": 1.877584936551536e-05, "loss": 0.6109, "step": 7194 }, { "epoch": 0.18, "grad_norm": 1.667138695716858, "learning_rate": 1.8775451347195897e-05, "loss": 0.5314, "step": 7195 }, { "epoch": 0.18, "grad_norm": 3.180379629135132, "learning_rate": 1.877505326840154e-05, "loss": 0.6756, "step": 7196 }, { "epoch": 0.18, "grad_norm": 1.9153155088424683, "learning_rate": 1.877465512913503e-05, "loss": 0.6664, "step": 7197 }, { "epoch": 0.18, "grad_norm": 2.13322114944458, "learning_rate": 1.8774256929399106e-05, "loss": 0.6048, "step": 7198 }, { "epoch": 0.18, "grad_norm": 2.711308717727661, "learning_rate": 1.8773858669196518e-05, "loss": 0.6081, "step": 7199 }, { "epoch": 0.18, "grad_norm": 2.09928297996521, "learning_rate": 1.877346034853001e-05, "loss": 0.6069, "step": 7200 }, { "epoch": 0.18, "grad_norm": 2.7066545486450195, "learning_rate": 1.877306196740232e-05, "loss": 0.6703, "step": 7201 }, { "epoch": 0.18, "grad_norm": 1.7659536600112915, "learning_rate": 1.8772663525816203e-05, "loss": 0.5925, "step": 7202 }, { "epoch": 0.18, "grad_norm": 1.5562002658843994, "learning_rate": 1.87722650237744e-05, "loss": 0.6521, "step": 7203 }, { "epoch": 0.18, "grad_norm": 2.375951051712036, "learning_rate": 1.8771866461279658e-05, "loss": 0.8564, "step": 7204 }, { "epoch": 0.18, "grad_norm": 1.4002963304519653, "learning_rate": 1.8771467838334723e-05, "loss": 0.6399, "step": 7205 }, { "epoch": 0.18, "grad_norm": 1.472355842590332, "learning_rate": 1.8771069154942342e-05, "loss": 0.634, "step": 7206 }, { "epoch": 0.18, "grad_norm": 1.9222042560577393, "learning_rate": 1.8770670411105263e-05, "loss": 0.6474, "step": 7207 }, { "epoch": 0.18, "grad_norm": 3.7697079181671143, "learning_rate": 1.8770271606826234e-05, "loss": 0.6482, "step": 7208 }, { "epoch": 0.18, "grad_norm": 2.1398377418518066, "learning_rate": 1.8769872742108003e-05, "loss": 0.6576, "step": 7209 }, { "epoch": 0.18, "grad_norm": 4.276154041290283, "learning_rate": 1.876947381695332e-05, "loss": 0.8004, "step": 7210 }, { "epoch": 0.18, "grad_norm": 2.366408109664917, "learning_rate": 1.876907483136493e-05, "loss": 0.7734, "step": 7211 }, { "epoch": 0.18, "grad_norm": 6.4287333488464355, "learning_rate": 1.8768675785345588e-05, "loss": 0.8871, "step": 7212 }, { "epoch": 0.18, "grad_norm": 13.204826354980469, "learning_rate": 1.876827667889804e-05, "loss": 0.6668, "step": 7213 }, { "epoch": 0.18, "grad_norm": 1.0760935544967651, "learning_rate": 1.8767877512025038e-05, "loss": 0.5934, "step": 7214 }, { "epoch": 0.18, "grad_norm": 1.2127310037612915, "learning_rate": 1.876747828472933e-05, "loss": 0.6697, "step": 7215 }, { "epoch": 0.18, "grad_norm": 2.2215020656585693, "learning_rate": 1.8767078997013675e-05, "loss": 0.6772, "step": 7216 }, { "epoch": 0.18, "grad_norm": 1.344001293182373, "learning_rate": 1.8766679648880818e-05, "loss": 0.7098, "step": 7217 }, { "epoch": 0.19, "grad_norm": 2.1605679988861084, "learning_rate": 1.8766280240333512e-05, "loss": 0.5556, "step": 7218 }, { "epoch": 0.19, "grad_norm": 2.4430360794067383, "learning_rate": 1.8765880771374512e-05, "loss": 0.6545, "step": 7219 }, { "epoch": 0.19, "grad_norm": 3.5192384719848633, "learning_rate": 1.8765481242006566e-05, "loss": 0.8766, "step": 7220 }, { "epoch": 0.19, "grad_norm": 6.72195291519165, "learning_rate": 1.8765081652232434e-05, "loss": 0.7899, "step": 7221 }, { "epoch": 0.19, "grad_norm": 3.117241621017456, "learning_rate": 1.8764682002054862e-05, "loss": 0.6218, "step": 7222 }, { "epoch": 0.19, "grad_norm": 2.0059304237365723, "learning_rate": 1.876428229147661e-05, "loss": 0.5686, "step": 7223 }, { "epoch": 0.19, "grad_norm": 1.9158867597579956, "learning_rate": 1.876388252050043e-05, "loss": 0.7285, "step": 7224 }, { "epoch": 0.19, "grad_norm": 2.1666996479034424, "learning_rate": 1.876348268912908e-05, "loss": 0.5952, "step": 7225 }, { "epoch": 0.19, "grad_norm": 3.0020360946655273, "learning_rate": 1.8763082797365314e-05, "loss": 0.7408, "step": 7226 }, { "epoch": 0.19, "grad_norm": 4.272555828094482, "learning_rate": 1.876268284521188e-05, "loss": 0.7091, "step": 7227 }, { "epoch": 0.19, "grad_norm": 2.219752073287964, "learning_rate": 1.8762282832671547e-05, "loss": 0.5609, "step": 7228 }, { "epoch": 0.19, "grad_norm": 6.011653423309326, "learning_rate": 1.8761882759747064e-05, "loss": 0.8412, "step": 7229 }, { "epoch": 0.19, "grad_norm": 2.2918803691864014, "learning_rate": 1.876148262644119e-05, "loss": 0.8103, "step": 7230 }, { "epoch": 0.19, "grad_norm": 4.167177677154541, "learning_rate": 1.8761082432756685e-05, "loss": 0.779, "step": 7231 }, { "epoch": 0.19, "grad_norm": 1.9383373260498047, "learning_rate": 1.87606821786963e-05, "loss": 0.5446, "step": 7232 }, { "epoch": 0.19, "grad_norm": 3.204982280731201, "learning_rate": 1.8760281864262795e-05, "loss": 0.7161, "step": 7233 }, { "epoch": 0.19, "grad_norm": 3.0709643363952637, "learning_rate": 1.875988148945894e-05, "loss": 0.628, "step": 7234 }, { "epoch": 0.19, "grad_norm": 1.5724810361862183, "learning_rate": 1.8759481054287477e-05, "loss": 0.5587, "step": 7235 }, { "epoch": 0.19, "grad_norm": 4.139406204223633, "learning_rate": 1.8759080558751176e-05, "loss": 0.6679, "step": 7236 }, { "epoch": 0.19, "grad_norm": 1.3077729940414429, "learning_rate": 1.8758680002852796e-05, "loss": 0.5596, "step": 7237 }, { "epoch": 0.19, "grad_norm": 3.698976993560791, "learning_rate": 1.8758279386595096e-05, "loss": 0.6634, "step": 7238 }, { "epoch": 0.19, "grad_norm": 1.8721429109573364, "learning_rate": 1.875787870998084e-05, "loss": 0.7541, "step": 7239 }, { "epoch": 0.19, "grad_norm": 2.429199695587158, "learning_rate": 1.8757477973012783e-05, "loss": 0.747, "step": 7240 }, { "epoch": 0.19, "grad_norm": 1.7988839149475098, "learning_rate": 1.875707717569369e-05, "loss": 0.5869, "step": 7241 }, { "epoch": 0.19, "grad_norm": 1.5033079385757446, "learning_rate": 1.875667631802632e-05, "loss": 0.5728, "step": 7242 }, { "epoch": 0.19, "grad_norm": 2.3027775287628174, "learning_rate": 1.875627540001344e-05, "loss": 0.6077, "step": 7243 }, { "epoch": 0.19, "grad_norm": 1.4648630619049072, "learning_rate": 1.8755874421657812e-05, "loss": 0.6598, "step": 7244 }, { "epoch": 0.19, "grad_norm": 1.8353091478347778, "learning_rate": 1.87554733829622e-05, "loss": 0.6966, "step": 7245 }, { "epoch": 0.19, "grad_norm": 1.8556714057922363, "learning_rate": 1.8755072283929367e-05, "loss": 0.489, "step": 7246 }, { "epoch": 0.19, "grad_norm": 6.138103485107422, "learning_rate": 1.8754671124562072e-05, "loss": 0.8049, "step": 7247 }, { "epoch": 0.19, "grad_norm": 4.792807102203369, "learning_rate": 1.8754269904863084e-05, "loss": 0.5827, "step": 7248 }, { "epoch": 0.19, "grad_norm": 1.343267560005188, "learning_rate": 1.875386862483517e-05, "loss": 0.5995, "step": 7249 }, { "epoch": 0.19, "grad_norm": 2.6484248638153076, "learning_rate": 1.8753467284481093e-05, "loss": 0.579, "step": 7250 }, { "epoch": 0.19, "grad_norm": 5.250120162963867, "learning_rate": 1.875306588380362e-05, "loss": 0.7972, "step": 7251 }, { "epoch": 0.19, "grad_norm": 1.7696950435638428, "learning_rate": 1.8752664422805514e-05, "loss": 0.6558, "step": 7252 }, { "epoch": 0.19, "grad_norm": 1.4612312316894531, "learning_rate": 1.8752262901489545e-05, "loss": 0.6303, "step": 7253 }, { "epoch": 0.19, "grad_norm": 3.4205477237701416, "learning_rate": 1.875186131985848e-05, "loss": 0.6109, "step": 7254 }, { "epoch": 0.19, "grad_norm": 2.5353891849517822, "learning_rate": 1.8751459677915084e-05, "loss": 0.7085, "step": 7255 }, { "epoch": 0.19, "grad_norm": 2.9945898056030273, "learning_rate": 1.8751057975662124e-05, "loss": 0.6794, "step": 7256 }, { "epoch": 0.19, "grad_norm": 3.7723002433776855, "learning_rate": 1.8750656213102373e-05, "loss": 0.775, "step": 7257 }, { "epoch": 0.19, "grad_norm": 3.3400697708129883, "learning_rate": 1.8750254390238595e-05, "loss": 0.6027, "step": 7258 }, { "epoch": 0.19, "grad_norm": 2.644916296005249, "learning_rate": 1.8749852507073564e-05, "loss": 0.6621, "step": 7259 }, { "epoch": 0.19, "grad_norm": 1.9118566513061523, "learning_rate": 1.8749450563610042e-05, "loss": 0.7771, "step": 7260 }, { "epoch": 0.19, "grad_norm": 2.744609832763672, "learning_rate": 1.8749048559850805e-05, "loss": 0.5178, "step": 7261 }, { "epoch": 0.19, "grad_norm": 1.2493077516555786, "learning_rate": 1.8748646495798624e-05, "loss": 0.4591, "step": 7262 }, { "epoch": 0.19, "grad_norm": 1.9420102834701538, "learning_rate": 1.8748244371456272e-05, "loss": 0.6865, "step": 7263 }, { "epoch": 0.19, "grad_norm": 2.890761137008667, "learning_rate": 1.8747842186826512e-05, "loss": 0.7207, "step": 7264 }, { "epoch": 0.19, "grad_norm": 1.959105372428894, "learning_rate": 1.874743994191212e-05, "loss": 0.8426, "step": 7265 }, { "epoch": 0.19, "grad_norm": 2.879701852798462, "learning_rate": 1.8747037636715862e-05, "loss": 0.8308, "step": 7266 }, { "epoch": 0.19, "grad_norm": 2.349020004272461, "learning_rate": 1.8746635271240523e-05, "loss": 0.6268, "step": 7267 }, { "epoch": 0.19, "grad_norm": 3.8601841926574707, "learning_rate": 1.8746232845488868e-05, "loss": 0.7015, "step": 7268 }, { "epoch": 0.19, "grad_norm": 3.596781015396118, "learning_rate": 1.8745830359463667e-05, "loss": 0.6541, "step": 7269 }, { "epoch": 0.19, "grad_norm": 2.935681104660034, "learning_rate": 1.87454278131677e-05, "loss": 0.7056, "step": 7270 }, { "epoch": 0.19, "grad_norm": 1.7878628969192505, "learning_rate": 1.8745025206603742e-05, "loss": 0.43, "step": 7271 }, { "epoch": 0.19, "grad_norm": 1.9540865421295166, "learning_rate": 1.874462253977456e-05, "loss": 0.456, "step": 7272 }, { "epoch": 0.19, "grad_norm": 2.4527816772460938, "learning_rate": 1.8744219812682937e-05, "loss": 0.7235, "step": 7273 }, { "epoch": 0.19, "grad_norm": 2.281949758529663, "learning_rate": 1.8743817025331643e-05, "loss": 0.6154, "step": 7274 }, { "epoch": 0.19, "grad_norm": 2.196316957473755, "learning_rate": 1.8743414177723456e-05, "loss": 0.6461, "step": 7275 }, { "epoch": 0.19, "grad_norm": 2.2146153450012207, "learning_rate": 1.874301126986115e-05, "loss": 0.7361, "step": 7276 }, { "epoch": 0.19, "grad_norm": 1.6438655853271484, "learning_rate": 1.8742608301747506e-05, "loss": 0.7174, "step": 7277 }, { "epoch": 0.19, "grad_norm": 2.1113779544830322, "learning_rate": 1.8742205273385297e-05, "loss": 0.6507, "step": 7278 }, { "epoch": 0.19, "grad_norm": 2.100083112716675, "learning_rate": 1.8741802184777303e-05, "loss": 0.7358, "step": 7279 }, { "epoch": 0.19, "grad_norm": 1.707686424255371, "learning_rate": 1.8741399035926296e-05, "loss": 0.7516, "step": 7280 }, { "epoch": 0.19, "grad_norm": 4.891973972320557, "learning_rate": 1.8740995826835062e-05, "loss": 0.5958, "step": 7281 }, { "epoch": 0.19, "grad_norm": 4.159295082092285, "learning_rate": 1.8740592557506376e-05, "loss": 0.7246, "step": 7282 }, { "epoch": 0.19, "grad_norm": 1.9094281196594238, "learning_rate": 1.874018922794302e-05, "loss": 0.5508, "step": 7283 }, { "epoch": 0.19, "grad_norm": 3.363654613494873, "learning_rate": 1.8739785838147767e-05, "loss": 0.6066, "step": 7284 }, { "epoch": 0.19, "grad_norm": 1.7939348220825195, "learning_rate": 1.87393823881234e-05, "loss": 0.6216, "step": 7285 }, { "epoch": 0.19, "grad_norm": 2.0349271297454834, "learning_rate": 1.8738978877872703e-05, "loss": 0.5039, "step": 7286 }, { "epoch": 0.19, "grad_norm": 3.911621570587158, "learning_rate": 1.8738575307398454e-05, "loss": 0.5791, "step": 7287 }, { "epoch": 0.19, "grad_norm": 2.2373404502868652, "learning_rate": 1.8738171676703434e-05, "loss": 0.7255, "step": 7288 }, { "epoch": 0.19, "grad_norm": 2.0674660205841064, "learning_rate": 1.8737767985790424e-05, "loss": 0.6324, "step": 7289 }, { "epoch": 0.19, "grad_norm": 1.5836447477340698, "learning_rate": 1.8737364234662207e-05, "loss": 0.5962, "step": 7290 }, { "epoch": 0.19, "grad_norm": 1.6262468099594116, "learning_rate": 1.8736960423321566e-05, "loss": 0.5587, "step": 7291 }, { "epoch": 0.19, "grad_norm": 5.197154998779297, "learning_rate": 1.873655655177128e-05, "loss": 0.7698, "step": 7292 }, { "epoch": 0.19, "grad_norm": 1.6017168760299683, "learning_rate": 1.8736152620014135e-05, "loss": 0.6735, "step": 7293 }, { "epoch": 0.19, "grad_norm": 2.0250508785247803, "learning_rate": 1.873574862805292e-05, "loss": 0.6893, "step": 7294 }, { "epoch": 0.19, "grad_norm": 2.600827693939209, "learning_rate": 1.873534457589041e-05, "loss": 0.7522, "step": 7295 }, { "epoch": 0.19, "grad_norm": 2.4309158325195312, "learning_rate": 1.873494046352939e-05, "loss": 0.6839, "step": 7296 }, { "epoch": 0.19, "grad_norm": 3.2417166233062744, "learning_rate": 1.8734536290972653e-05, "loss": 0.5321, "step": 7297 }, { "epoch": 0.19, "grad_norm": 7.2805304527282715, "learning_rate": 1.8734132058222976e-05, "loss": 0.6525, "step": 7298 }, { "epoch": 0.19, "grad_norm": 1.582105040550232, "learning_rate": 1.8733727765283155e-05, "loss": 0.6966, "step": 7299 }, { "epoch": 0.19, "grad_norm": 4.453383922576904, "learning_rate": 1.8733323412155965e-05, "loss": 0.9412, "step": 7300 }, { "epoch": 0.19, "grad_norm": 2.1452996730804443, "learning_rate": 1.8732918998844197e-05, "loss": 0.5859, "step": 7301 }, { "epoch": 0.19, "grad_norm": 1.1868934631347656, "learning_rate": 1.8732514525350636e-05, "loss": 0.5132, "step": 7302 }, { "epoch": 0.19, "grad_norm": 3.357819080352783, "learning_rate": 1.873210999167807e-05, "loss": 0.8444, "step": 7303 }, { "epoch": 0.19, "grad_norm": 4.8963422775268555, "learning_rate": 1.873170539782929e-05, "loss": 0.6391, "step": 7304 }, { "epoch": 0.19, "grad_norm": 1.7678821086883545, "learning_rate": 1.8731300743807086e-05, "loss": 0.5847, "step": 7305 }, { "epoch": 0.19, "grad_norm": 1.885353446006775, "learning_rate": 1.873089602961424e-05, "loss": 0.5843, "step": 7306 }, { "epoch": 0.19, "grad_norm": 3.3379745483398438, "learning_rate": 1.8730491255253543e-05, "loss": 0.7102, "step": 7307 }, { "epoch": 0.19, "grad_norm": 3.412285566329956, "learning_rate": 1.8730086420727784e-05, "loss": 0.6448, "step": 7308 }, { "epoch": 0.19, "grad_norm": 6.574073314666748, "learning_rate": 1.8729681526039755e-05, "loss": 0.8983, "step": 7309 }, { "epoch": 0.19, "grad_norm": 2.7433485984802246, "learning_rate": 1.8729276571192248e-05, "loss": 0.6618, "step": 7310 }, { "epoch": 0.19, "grad_norm": 2.0773584842681885, "learning_rate": 1.8728871556188047e-05, "loss": 0.7536, "step": 7311 }, { "epoch": 0.19, "grad_norm": 1.9501498937606812, "learning_rate": 1.8728466481029952e-05, "loss": 0.6747, "step": 7312 }, { "epoch": 0.19, "grad_norm": 3.6058571338653564, "learning_rate": 1.8728061345720747e-05, "loss": 0.66, "step": 7313 }, { "epoch": 0.19, "grad_norm": 7.694303512573242, "learning_rate": 1.8727656150263228e-05, "loss": 0.5682, "step": 7314 }, { "epoch": 0.19, "grad_norm": 1.341855525970459, "learning_rate": 1.8727250894660183e-05, "loss": 0.6047, "step": 7315 }, { "epoch": 0.19, "grad_norm": 2.790252923965454, "learning_rate": 1.8726845578914414e-05, "loss": 0.667, "step": 7316 }, { "epoch": 0.19, "grad_norm": 5.366899490356445, "learning_rate": 1.8726440203028702e-05, "loss": 0.4999, "step": 7317 }, { "epoch": 0.19, "grad_norm": 1.7161844968795776, "learning_rate": 1.8726034767005847e-05, "loss": 0.609, "step": 7318 }, { "epoch": 0.19, "grad_norm": 3.5388333797454834, "learning_rate": 1.8725629270848643e-05, "loss": 0.7887, "step": 7319 }, { "epoch": 0.19, "grad_norm": 2.979109764099121, "learning_rate": 1.872522371455989e-05, "loss": 0.5284, "step": 7320 }, { "epoch": 0.19, "grad_norm": 2.3394434452056885, "learning_rate": 1.872481809814237e-05, "loss": 0.6039, "step": 7321 }, { "epoch": 0.19, "grad_norm": 1.2044880390167236, "learning_rate": 1.872441242159889e-05, "loss": 0.6318, "step": 7322 }, { "epoch": 0.19, "grad_norm": 2.0411953926086426, "learning_rate": 1.8724006684932236e-05, "loss": 0.5937, "step": 7323 }, { "epoch": 0.19, "grad_norm": 1.6449267864227295, "learning_rate": 1.872360088814521e-05, "loss": 0.5743, "step": 7324 }, { "epoch": 0.19, "grad_norm": 1.8970746994018555, "learning_rate": 1.8723195031240608e-05, "loss": 0.6471, "step": 7325 }, { "epoch": 0.19, "grad_norm": 2.684201717376709, "learning_rate": 1.8722789114221227e-05, "loss": 0.7551, "step": 7326 }, { "epoch": 0.19, "grad_norm": 5.2185516357421875, "learning_rate": 1.8722383137089862e-05, "loss": 0.7234, "step": 7327 }, { "epoch": 0.19, "grad_norm": 2.6142537593841553, "learning_rate": 1.8721977099849316e-05, "loss": 0.5184, "step": 7328 }, { "epoch": 0.19, "grad_norm": 3.239607572555542, "learning_rate": 1.872157100250238e-05, "loss": 0.7426, "step": 7329 }, { "epoch": 0.19, "grad_norm": 2.013148069381714, "learning_rate": 1.8721164845051857e-05, "loss": 0.6616, "step": 7330 }, { "epoch": 0.19, "grad_norm": 2.3610875606536865, "learning_rate": 1.8720758627500544e-05, "loss": 0.7469, "step": 7331 }, { "epoch": 0.19, "grad_norm": 2.4440507888793945, "learning_rate": 1.8720352349851245e-05, "loss": 0.5969, "step": 7332 }, { "epoch": 0.19, "grad_norm": 3.168031930923462, "learning_rate": 1.8719946012106756e-05, "loss": 0.5047, "step": 7333 }, { "epoch": 0.19, "grad_norm": 4.173730850219727, "learning_rate": 1.8719539614269875e-05, "loss": 0.8973, "step": 7334 }, { "epoch": 0.19, "grad_norm": 1.5769625902175903, "learning_rate": 1.8719133156343402e-05, "loss": 0.6363, "step": 7335 }, { "epoch": 0.19, "grad_norm": 2.249145030975342, "learning_rate": 1.8718726638330147e-05, "loss": 0.6043, "step": 7336 }, { "epoch": 0.19, "grad_norm": 4.410606861114502, "learning_rate": 1.8718320060232904e-05, "loss": 0.7596, "step": 7337 }, { "epoch": 0.19, "grad_norm": 1.6526261568069458, "learning_rate": 1.8717913422054476e-05, "loss": 0.7014, "step": 7338 }, { "epoch": 0.19, "grad_norm": 1.6153442859649658, "learning_rate": 1.8717506723797668e-05, "loss": 0.5964, "step": 7339 }, { "epoch": 0.19, "grad_norm": 3.393540382385254, "learning_rate": 1.8717099965465278e-05, "loss": 0.741, "step": 7340 }, { "epoch": 0.19, "grad_norm": 1.3302357196807861, "learning_rate": 1.871669314706011e-05, "loss": 0.5538, "step": 7341 }, { "epoch": 0.19, "grad_norm": 1.719645619392395, "learning_rate": 1.871628626858497e-05, "loss": 0.6476, "step": 7342 }, { "epoch": 0.19, "grad_norm": 1.6250598430633545, "learning_rate": 1.871587933004266e-05, "loss": 0.6608, "step": 7343 }, { "epoch": 0.19, "grad_norm": 2.033881425857544, "learning_rate": 1.8715472331435994e-05, "loss": 0.532, "step": 7344 }, { "epoch": 0.19, "grad_norm": 1.7014002799987793, "learning_rate": 1.8715065272767757e-05, "loss": 0.5565, "step": 7345 }, { "epoch": 0.19, "grad_norm": 3.244906425476074, "learning_rate": 1.8714658154040776e-05, "loss": 0.7095, "step": 7346 }, { "epoch": 0.19, "grad_norm": 1.7785478830337524, "learning_rate": 1.871425097525784e-05, "loss": 0.594, "step": 7347 }, { "epoch": 0.19, "grad_norm": 1.7301652431488037, "learning_rate": 1.871384373642176e-05, "loss": 0.7012, "step": 7348 }, { "epoch": 0.19, "grad_norm": 1.683398723602295, "learning_rate": 1.8713436437535345e-05, "loss": 0.6589, "step": 7349 }, { "epoch": 0.19, "grad_norm": 1.716964840888977, "learning_rate": 1.8713029078601403e-05, "loss": 0.7268, "step": 7350 }, { "epoch": 0.19, "grad_norm": 2.274503231048584, "learning_rate": 1.8712621659622735e-05, "loss": 0.5762, "step": 7351 }, { "epoch": 0.19, "grad_norm": 1.4155091047286987, "learning_rate": 1.8712214180602154e-05, "loss": 0.597, "step": 7352 }, { "epoch": 0.19, "grad_norm": 4.284697532653809, "learning_rate": 1.8711806641542465e-05, "loss": 0.7805, "step": 7353 }, { "epoch": 0.19, "grad_norm": 2.1455934047698975, "learning_rate": 1.871139904244648e-05, "loss": 0.6164, "step": 7354 }, { "epoch": 0.19, "grad_norm": 3.848687171936035, "learning_rate": 1.8710991383317004e-05, "loss": 0.813, "step": 7355 }, { "epoch": 0.19, "grad_norm": 4.4906511306762695, "learning_rate": 1.871058366415685e-05, "loss": 0.4709, "step": 7356 }, { "epoch": 0.19, "grad_norm": 1.7966828346252441, "learning_rate": 1.8710175884968826e-05, "loss": 0.5534, "step": 7357 }, { "epoch": 0.19, "grad_norm": 2.8396401405334473, "learning_rate": 1.870976804575574e-05, "loss": 0.6306, "step": 7358 }, { "epoch": 0.19, "grad_norm": 1.8551535606384277, "learning_rate": 1.8709360146520404e-05, "loss": 0.6085, "step": 7359 }, { "epoch": 0.19, "grad_norm": 3.2314422130584717, "learning_rate": 1.870895218726563e-05, "loss": 0.7276, "step": 7360 }, { "epoch": 0.19, "grad_norm": 3.5212578773498535, "learning_rate": 1.8708544167994232e-05, "loss": 0.5252, "step": 7361 }, { "epoch": 0.19, "grad_norm": 1.5469766855239868, "learning_rate": 1.870813608870902e-05, "loss": 0.6016, "step": 7362 }, { "epoch": 0.19, "grad_norm": 2.5291929244995117, "learning_rate": 1.87077279494128e-05, "loss": 0.5746, "step": 7363 }, { "epoch": 0.19, "grad_norm": 2.0438523292541504, "learning_rate": 1.8707319750108392e-05, "loss": 0.7007, "step": 7364 }, { "epoch": 0.19, "grad_norm": 1.9327000379562378, "learning_rate": 1.8706911490798607e-05, "loss": 0.6691, "step": 7365 }, { "epoch": 0.19, "grad_norm": 1.9916877746582031, "learning_rate": 1.8706503171486258e-05, "loss": 0.621, "step": 7366 }, { "epoch": 0.19, "grad_norm": 1.8980978727340698, "learning_rate": 1.870609479217416e-05, "loss": 0.5429, "step": 7367 }, { "epoch": 0.19, "grad_norm": 1.635015606880188, "learning_rate": 1.8705686352865125e-05, "loss": 0.4955, "step": 7368 }, { "epoch": 0.19, "grad_norm": 2.2301058769226074, "learning_rate": 1.870527785356197e-05, "loss": 0.7448, "step": 7369 }, { "epoch": 0.19, "grad_norm": 1.2888414859771729, "learning_rate": 1.870486929426751e-05, "loss": 0.5463, "step": 7370 }, { "epoch": 0.19, "grad_norm": 3.3243653774261475, "learning_rate": 1.8704460674984558e-05, "loss": 0.6856, "step": 7371 }, { "epoch": 0.19, "grad_norm": 1.8381015062332153, "learning_rate": 1.870405199571593e-05, "loss": 0.7159, "step": 7372 }, { "epoch": 0.19, "grad_norm": 3.375481128692627, "learning_rate": 1.870364325646445e-05, "loss": 0.7522, "step": 7373 }, { "epoch": 0.19, "grad_norm": 3.918231248855591, "learning_rate": 1.8703234457232922e-05, "loss": 0.6284, "step": 7374 }, { "epoch": 0.19, "grad_norm": 4.336569309234619, "learning_rate": 1.8702825598024175e-05, "loss": 0.7018, "step": 7375 }, { "epoch": 0.19, "grad_norm": 4.420961856842041, "learning_rate": 1.870241667884102e-05, "loss": 0.6355, "step": 7376 }, { "epoch": 0.19, "grad_norm": 1.6153268814086914, "learning_rate": 1.8702007699686274e-05, "loss": 0.6793, "step": 7377 }, { "epoch": 0.19, "grad_norm": 7.487771987915039, "learning_rate": 1.8701598660562762e-05, "loss": 0.6992, "step": 7378 }, { "epoch": 0.19, "grad_norm": 3.424513578414917, "learning_rate": 1.8701189561473295e-05, "loss": 0.5746, "step": 7379 }, { "epoch": 0.19, "grad_norm": 2.2718913555145264, "learning_rate": 1.8700780402420702e-05, "loss": 0.5464, "step": 7380 }, { "epoch": 0.19, "grad_norm": 5.548966407775879, "learning_rate": 1.8700371183407792e-05, "loss": 0.6727, "step": 7381 }, { "epoch": 0.19, "grad_norm": 2.227325201034546, "learning_rate": 1.869996190443739e-05, "loss": 0.6332, "step": 7382 }, { "epoch": 0.19, "grad_norm": 9.949682235717773, "learning_rate": 1.869955256551232e-05, "loss": 0.7211, "step": 7383 }, { "epoch": 0.19, "grad_norm": 2.1214728355407715, "learning_rate": 1.8699143166635395e-05, "loss": 0.6469, "step": 7384 }, { "epoch": 0.19, "grad_norm": 1.5197153091430664, "learning_rate": 1.8698733707809443e-05, "loss": 0.6431, "step": 7385 }, { "epoch": 0.19, "grad_norm": 1.782021164894104, "learning_rate": 1.8698324189037285e-05, "loss": 0.5359, "step": 7386 }, { "epoch": 0.19, "grad_norm": 3.7672269344329834, "learning_rate": 1.8697914610321737e-05, "loss": 0.6633, "step": 7387 }, { "epoch": 0.19, "grad_norm": 3.3171937465667725, "learning_rate": 1.869750497166563e-05, "loss": 0.595, "step": 7388 }, { "epoch": 0.19, "grad_norm": 1.6010544300079346, "learning_rate": 1.8697095273071785e-05, "loss": 0.7915, "step": 7389 }, { "epoch": 0.19, "grad_norm": 2.3889551162719727, "learning_rate": 1.8696685514543017e-05, "loss": 0.5575, "step": 7390 }, { "epoch": 0.19, "grad_norm": 1.4346908330917358, "learning_rate": 1.869627569608216e-05, "loss": 0.563, "step": 7391 }, { "epoch": 0.19, "grad_norm": 7.423793315887451, "learning_rate": 1.8695865817692034e-05, "loss": 0.8568, "step": 7392 }, { "epoch": 0.19, "grad_norm": 2.9470674991607666, "learning_rate": 1.8695455879375465e-05, "loss": 0.4515, "step": 7393 }, { "epoch": 0.19, "grad_norm": 2.349997043609619, "learning_rate": 1.8695045881135276e-05, "loss": 0.8057, "step": 7394 }, { "epoch": 0.19, "grad_norm": 2.012960433959961, "learning_rate": 1.8694635822974292e-05, "loss": 0.717, "step": 7395 }, { "epoch": 0.19, "grad_norm": 1.2026910781860352, "learning_rate": 1.869422570489534e-05, "loss": 0.5492, "step": 7396 }, { "epoch": 0.19, "grad_norm": 1.714475393295288, "learning_rate": 1.869381552690125e-05, "loss": 0.5509, "step": 7397 }, { "epoch": 0.19, "grad_norm": 3.0629448890686035, "learning_rate": 1.8693405288994845e-05, "loss": 0.6152, "step": 7398 }, { "epoch": 0.19, "grad_norm": 2.4126362800598145, "learning_rate": 1.869299499117895e-05, "loss": 0.5003, "step": 7399 }, { "epoch": 0.19, "grad_norm": 5.675660610198975, "learning_rate": 1.8692584633456396e-05, "loss": 0.7082, "step": 7400 }, { "epoch": 0.19, "grad_norm": 3.2308788299560547, "learning_rate": 1.869217421583001e-05, "loss": 0.4707, "step": 7401 }, { "epoch": 0.19, "grad_norm": 4.309744358062744, "learning_rate": 1.8691763738302623e-05, "loss": 0.7278, "step": 7402 }, { "epoch": 0.19, "grad_norm": 4.785208225250244, "learning_rate": 1.869135320087706e-05, "loss": 0.6106, "step": 7403 }, { "epoch": 0.19, "grad_norm": 4.517434120178223, "learning_rate": 1.8690942603556148e-05, "loss": 0.5972, "step": 7404 }, { "epoch": 0.19, "grad_norm": 3.7380762100219727, "learning_rate": 1.869053194634272e-05, "loss": 0.6914, "step": 7405 }, { "epoch": 0.19, "grad_norm": 3.0229532718658447, "learning_rate": 1.8690121229239608e-05, "loss": 0.7478, "step": 7406 }, { "epoch": 0.19, "grad_norm": 3.935929298400879, "learning_rate": 1.868971045224964e-05, "loss": 0.7518, "step": 7407 }, { "epoch": 0.19, "grad_norm": 3.363041877746582, "learning_rate": 1.8689299615375648e-05, "loss": 0.626, "step": 7408 }, { "epoch": 0.19, "grad_norm": 3.0320396423339844, "learning_rate": 1.868888871862046e-05, "loss": 0.5588, "step": 7409 }, { "epoch": 0.19, "grad_norm": 1.2577052116394043, "learning_rate": 1.868847776198691e-05, "loss": 0.6168, "step": 7410 }, { "epoch": 0.19, "grad_norm": 2.795644760131836, "learning_rate": 1.868806674547783e-05, "loss": 0.5494, "step": 7411 }, { "epoch": 0.19, "grad_norm": 4.421703815460205, "learning_rate": 1.8687655669096053e-05, "loss": 0.7455, "step": 7412 }, { "epoch": 0.19, "grad_norm": 3.536407232284546, "learning_rate": 1.8687244532844412e-05, "loss": 0.716, "step": 7413 }, { "epoch": 0.19, "grad_norm": 2.1289777755737305, "learning_rate": 1.868683333672574e-05, "loss": 0.6798, "step": 7414 }, { "epoch": 0.19, "grad_norm": 1.863594651222229, "learning_rate": 1.8686422080742868e-05, "loss": 0.5695, "step": 7415 }, { "epoch": 0.19, "grad_norm": 1.9563668966293335, "learning_rate": 1.868601076489863e-05, "loss": 0.6691, "step": 7416 }, { "epoch": 0.19, "grad_norm": 1.8327032327651978, "learning_rate": 1.8685599389195868e-05, "loss": 0.5917, "step": 7417 }, { "epoch": 0.19, "grad_norm": 5.406789779663086, "learning_rate": 1.868518795363741e-05, "loss": 0.6459, "step": 7418 }, { "epoch": 0.19, "grad_norm": 2.1075363159179688, "learning_rate": 1.8684776458226094e-05, "loss": 0.8578, "step": 7419 }, { "epoch": 0.19, "grad_norm": 1.9263533353805542, "learning_rate": 1.8684364902964754e-05, "loss": 0.5662, "step": 7420 }, { "epoch": 0.19, "grad_norm": 3.6163175106048584, "learning_rate": 1.8683953287856226e-05, "loss": 0.6755, "step": 7421 }, { "epoch": 0.19, "grad_norm": 1.6285656690597534, "learning_rate": 1.8683541612903348e-05, "loss": 0.5802, "step": 7422 }, { "epoch": 0.19, "grad_norm": 6.548663139343262, "learning_rate": 1.868312987810896e-05, "loss": 0.715, "step": 7423 }, { "epoch": 0.19, "grad_norm": 1.5612674951553345, "learning_rate": 1.8682718083475892e-05, "loss": 0.5877, "step": 7424 }, { "epoch": 0.19, "grad_norm": 1.6848423480987549, "learning_rate": 1.8682306229006987e-05, "loss": 0.6122, "step": 7425 }, { "epoch": 0.19, "grad_norm": 3.4653666019439697, "learning_rate": 1.8681894314705085e-05, "loss": 0.7195, "step": 7426 }, { "epoch": 0.19, "grad_norm": 2.708226442337036, "learning_rate": 1.868148234057302e-05, "loss": 0.8095, "step": 7427 }, { "epoch": 0.19, "grad_norm": 2.2021067142486572, "learning_rate": 1.8681070306613632e-05, "loss": 0.6581, "step": 7428 }, { "epoch": 0.19, "grad_norm": 1.4124897718429565, "learning_rate": 1.8680658212829765e-05, "loss": 0.7298, "step": 7429 }, { "epoch": 0.19, "grad_norm": 1.4329249858856201, "learning_rate": 1.868024605922425e-05, "loss": 0.6078, "step": 7430 }, { "epoch": 0.19, "grad_norm": 1.7693339586257935, "learning_rate": 1.8679833845799936e-05, "loss": 0.6311, "step": 7431 }, { "epoch": 0.19, "grad_norm": 2.705239772796631, "learning_rate": 1.867942157255966e-05, "loss": 0.6847, "step": 7432 }, { "epoch": 0.19, "grad_norm": 2.010329246520996, "learning_rate": 1.8679009239506264e-05, "loss": 0.521, "step": 7433 }, { "epoch": 0.19, "grad_norm": 4.0693559646606445, "learning_rate": 1.8678596846642584e-05, "loss": 0.6527, "step": 7434 }, { "epoch": 0.19, "grad_norm": 2.1469690799713135, "learning_rate": 1.8678184393971476e-05, "loss": 0.6162, "step": 7435 }, { "epoch": 0.19, "grad_norm": 1.1125068664550781, "learning_rate": 1.8677771881495767e-05, "loss": 0.6201, "step": 7436 }, { "epoch": 0.19, "grad_norm": 1.454188346862793, "learning_rate": 1.8677359309218305e-05, "loss": 0.5388, "step": 7437 }, { "epoch": 0.19, "grad_norm": 1.6755988597869873, "learning_rate": 1.867694667714194e-05, "loss": 0.6373, "step": 7438 }, { "epoch": 0.19, "grad_norm": 2.294477939605713, "learning_rate": 1.8676533985269504e-05, "loss": 0.6591, "step": 7439 }, { "epoch": 0.19, "grad_norm": 1.442877173423767, "learning_rate": 1.867612123360385e-05, "loss": 0.5564, "step": 7440 }, { "epoch": 0.19, "grad_norm": 2.2979393005371094, "learning_rate": 1.8675708422147817e-05, "loss": 0.4706, "step": 7441 }, { "epoch": 0.19, "grad_norm": 1.5322452783584595, "learning_rate": 1.8675295550904256e-05, "loss": 0.603, "step": 7442 }, { "epoch": 0.19, "grad_norm": 3.555809259414673, "learning_rate": 1.8674882619876008e-05, "loss": 0.7079, "step": 7443 }, { "epoch": 0.19, "grad_norm": 1.299848198890686, "learning_rate": 1.8674469629065916e-05, "loss": 0.6297, "step": 7444 }, { "epoch": 0.19, "grad_norm": 3.969923496246338, "learning_rate": 1.8674056578476832e-05, "loss": 0.7396, "step": 7445 }, { "epoch": 0.19, "grad_norm": 1.2454434633255005, "learning_rate": 1.8673643468111597e-05, "loss": 0.6184, "step": 7446 }, { "epoch": 0.19, "grad_norm": 1.6825255155563354, "learning_rate": 1.8673230297973063e-05, "loss": 0.6461, "step": 7447 }, { "epoch": 0.19, "grad_norm": 3.7788684368133545, "learning_rate": 1.8672817068064073e-05, "loss": 0.5892, "step": 7448 }, { "epoch": 0.19, "grad_norm": 2.024231195449829, "learning_rate": 1.867240377838748e-05, "loss": 0.6455, "step": 7449 }, { "epoch": 0.19, "grad_norm": 3.4545669555664062, "learning_rate": 1.8671990428946124e-05, "loss": 0.7571, "step": 7450 }, { "epoch": 0.19, "grad_norm": 6.9876251220703125, "learning_rate": 1.867157701974286e-05, "loss": 0.5604, "step": 7451 }, { "epoch": 0.19, "grad_norm": 1.805560827255249, "learning_rate": 1.8671163550780535e-05, "loss": 0.5472, "step": 7452 }, { "epoch": 0.19, "grad_norm": 1.8587855100631714, "learning_rate": 1.8670750022062e-05, "loss": 0.5731, "step": 7453 }, { "epoch": 0.19, "grad_norm": 4.355788230895996, "learning_rate": 1.8670336433590103e-05, "loss": 0.6859, "step": 7454 }, { "epoch": 0.19, "grad_norm": 2.9397671222686768, "learning_rate": 1.8669922785367694e-05, "loss": 0.6929, "step": 7455 }, { "epoch": 0.19, "grad_norm": 1.3089474439620972, "learning_rate": 1.866950907739762e-05, "loss": 0.6452, "step": 7456 }, { "epoch": 0.19, "grad_norm": 2.699101686477661, "learning_rate": 1.8669095309682744e-05, "loss": 0.6527, "step": 7457 }, { "epoch": 0.19, "grad_norm": 2.035660743713379, "learning_rate": 1.8668681482225907e-05, "loss": 0.5332, "step": 7458 }, { "epoch": 0.19, "grad_norm": 2.165457010269165, "learning_rate": 1.866826759502996e-05, "loss": 0.6794, "step": 7459 }, { "epoch": 0.19, "grad_norm": 1.5836747884750366, "learning_rate": 1.866785364809776e-05, "loss": 0.6388, "step": 7460 }, { "epoch": 0.19, "grad_norm": 2.219403028488159, "learning_rate": 1.8667439641432162e-05, "loss": 0.6894, "step": 7461 }, { "epoch": 0.19, "grad_norm": 2.415252923965454, "learning_rate": 1.8667025575036012e-05, "loss": 0.7102, "step": 7462 }, { "epoch": 0.19, "grad_norm": 6.344020843505859, "learning_rate": 1.8666611448912167e-05, "loss": 0.5769, "step": 7463 }, { "epoch": 0.19, "grad_norm": 1.6399872303009033, "learning_rate": 1.8666197263063483e-05, "loss": 0.5868, "step": 7464 }, { "epoch": 0.19, "grad_norm": 2.561347723007202, "learning_rate": 1.866578301749281e-05, "loss": 0.7277, "step": 7465 }, { "epoch": 0.19, "grad_norm": 1.9276580810546875, "learning_rate": 1.8665368712203003e-05, "loss": 0.5979, "step": 7466 }, { "epoch": 0.19, "grad_norm": 3.7126715183258057, "learning_rate": 1.866495434719692e-05, "loss": 0.5838, "step": 7467 }, { "epoch": 0.19, "grad_norm": 3.741424322128296, "learning_rate": 1.8664539922477417e-05, "loss": 0.7917, "step": 7468 }, { "epoch": 0.19, "grad_norm": 1.2471113204956055, "learning_rate": 1.866412543804735e-05, "loss": 0.6378, "step": 7469 }, { "epoch": 0.19, "grad_norm": 3.5463194847106934, "learning_rate": 1.866371089390957e-05, "loss": 0.6872, "step": 7470 }, { "epoch": 0.19, "grad_norm": 2.907618284225464, "learning_rate": 1.866329629006694e-05, "loss": 0.5054, "step": 7471 }, { "epoch": 0.19, "grad_norm": 2.7025763988494873, "learning_rate": 1.8662881626522314e-05, "loss": 0.6532, "step": 7472 }, { "epoch": 0.19, "grad_norm": 4.300128936767578, "learning_rate": 1.8662466903278554e-05, "loss": 0.7055, "step": 7473 }, { "epoch": 0.19, "grad_norm": 6.101241588592529, "learning_rate": 1.866205212033851e-05, "loss": 0.7626, "step": 7474 }, { "epoch": 0.19, "grad_norm": 2.124988555908203, "learning_rate": 1.8661637277705046e-05, "loss": 0.7631, "step": 7475 }, { "epoch": 0.19, "grad_norm": 4.111902236938477, "learning_rate": 1.866122237538102e-05, "loss": 0.4784, "step": 7476 }, { "epoch": 0.19, "grad_norm": 1.988675832748413, "learning_rate": 1.8660807413369292e-05, "loss": 0.579, "step": 7477 }, { "epoch": 0.19, "grad_norm": 1.9153352975845337, "learning_rate": 1.8660392391672717e-05, "loss": 0.6688, "step": 7478 }, { "epoch": 0.19, "grad_norm": 1.8919657468795776, "learning_rate": 1.865997731029416e-05, "loss": 0.6963, "step": 7479 }, { "epoch": 0.19, "grad_norm": 2.1464171409606934, "learning_rate": 1.865956216923648e-05, "loss": 0.6672, "step": 7480 }, { "epoch": 0.19, "grad_norm": 2.0354502201080322, "learning_rate": 1.865914696850254e-05, "loss": 0.8368, "step": 7481 }, { "epoch": 0.19, "grad_norm": 2.847604513168335, "learning_rate": 1.8658731708095196e-05, "loss": 0.5095, "step": 7482 }, { "epoch": 0.19, "grad_norm": 2.7193408012390137, "learning_rate": 1.8658316388017317e-05, "loss": 0.6846, "step": 7483 }, { "epoch": 0.19, "grad_norm": 3.4995787143707275, "learning_rate": 1.865790100827176e-05, "loss": 0.9024, "step": 7484 }, { "epoch": 0.19, "grad_norm": 4.129681587219238, "learning_rate": 1.8657485568861386e-05, "loss": 0.7649, "step": 7485 }, { "epoch": 0.19, "grad_norm": 1.4854058027267456, "learning_rate": 1.8657070069789066e-05, "loss": 0.6339, "step": 7486 }, { "epoch": 0.19, "grad_norm": 3.05918025970459, "learning_rate": 1.8656654511057652e-05, "loss": 0.7029, "step": 7487 }, { "epoch": 0.19, "grad_norm": 3.0880091190338135, "learning_rate": 1.865623889267002e-05, "loss": 0.5464, "step": 7488 }, { "epoch": 0.19, "grad_norm": 2.043661117553711, "learning_rate": 1.865582321462902e-05, "loss": 0.6346, "step": 7489 }, { "epoch": 0.19, "grad_norm": 1.9412906169891357, "learning_rate": 1.865540747693753e-05, "loss": 0.658, "step": 7490 }, { "epoch": 0.19, "grad_norm": 3.3050177097320557, "learning_rate": 1.865499167959841e-05, "loss": 0.7632, "step": 7491 }, { "epoch": 0.19, "grad_norm": 2.2266881465911865, "learning_rate": 1.8654575822614523e-05, "loss": 0.597, "step": 7492 }, { "epoch": 0.19, "grad_norm": 1.9961121082305908, "learning_rate": 1.8654159905988734e-05, "loss": 0.6532, "step": 7493 }, { "epoch": 0.19, "grad_norm": 1.6821945905685425, "learning_rate": 1.8653743929723917e-05, "loss": 0.5724, "step": 7494 }, { "epoch": 0.19, "grad_norm": 2.084296703338623, "learning_rate": 1.865332789382293e-05, "loss": 0.6701, "step": 7495 }, { "epoch": 0.19, "grad_norm": 2.0311005115509033, "learning_rate": 1.8652911798288646e-05, "loss": 0.685, "step": 7496 }, { "epoch": 0.19, "grad_norm": 2.495459794998169, "learning_rate": 1.865249564312393e-05, "loss": 0.6226, "step": 7497 }, { "epoch": 0.19, "grad_norm": 3.208566904067993, "learning_rate": 1.865207942833165e-05, "loss": 0.5789, "step": 7498 }, { "epoch": 0.19, "grad_norm": 3.302349805831909, "learning_rate": 1.865166315391467e-05, "loss": 0.6454, "step": 7499 }, { "epoch": 0.19, "grad_norm": 1.6938422918319702, "learning_rate": 1.865124681987587e-05, "loss": 0.5785, "step": 7500 }, { "epoch": 0.19, "grad_norm": 2.252074956893921, "learning_rate": 1.865083042621811e-05, "loss": 0.5756, "step": 7501 }, { "epoch": 0.19, "grad_norm": 2.7713420391082764, "learning_rate": 1.8650413972944262e-05, "loss": 0.7118, "step": 7502 }, { "epoch": 0.19, "grad_norm": 2.7674849033355713, "learning_rate": 1.864999746005719e-05, "loss": 0.6266, "step": 7503 }, { "epoch": 0.19, "grad_norm": 1.8053840398788452, "learning_rate": 1.8649580887559778e-05, "loss": 0.5172, "step": 7504 }, { "epoch": 0.19, "grad_norm": 2.5054821968078613, "learning_rate": 1.8649164255454884e-05, "loss": 0.6409, "step": 7505 }, { "epoch": 0.19, "grad_norm": 1.9807015657424927, "learning_rate": 1.8648747563745385e-05, "loss": 0.7001, "step": 7506 }, { "epoch": 0.19, "grad_norm": 3.5872504711151123, "learning_rate": 1.8648330812434153e-05, "loss": 0.5702, "step": 7507 }, { "epoch": 0.19, "grad_norm": 1.7265657186508179, "learning_rate": 1.8647914001524056e-05, "loss": 0.6392, "step": 7508 }, { "epoch": 0.19, "grad_norm": 1.6757566928863525, "learning_rate": 1.864749713101797e-05, "loss": 0.8069, "step": 7509 }, { "epoch": 0.19, "grad_norm": 1.728572964668274, "learning_rate": 1.8647080200918765e-05, "loss": 0.6631, "step": 7510 }, { "epoch": 0.19, "grad_norm": 2.607778787612915, "learning_rate": 1.864666321122932e-05, "loss": 0.5436, "step": 7511 }, { "epoch": 0.19, "grad_norm": 2.071241855621338, "learning_rate": 1.86462461619525e-05, "loss": 0.7854, "step": 7512 }, { "epoch": 0.19, "grad_norm": 1.4208322763442993, "learning_rate": 1.8645829053091188e-05, "loss": 0.6761, "step": 7513 }, { "epoch": 0.19, "grad_norm": 1.3639222383499146, "learning_rate": 1.8645411884648247e-05, "loss": 0.5333, "step": 7514 }, { "epoch": 0.19, "grad_norm": 2.5827035903930664, "learning_rate": 1.8644994656626563e-05, "loss": 0.5737, "step": 7515 }, { "epoch": 0.19, "grad_norm": 2.28916335105896, "learning_rate": 1.8644577369029007e-05, "loss": 0.5811, "step": 7516 }, { "epoch": 0.19, "grad_norm": 2.045320510864258, "learning_rate": 1.864416002185846e-05, "loss": 0.5914, "step": 7517 }, { "epoch": 0.19, "grad_norm": 3.085832357406616, "learning_rate": 1.8643742615117787e-05, "loss": 0.684, "step": 7518 }, { "epoch": 0.19, "grad_norm": 1.5713330507278442, "learning_rate": 1.8643325148809872e-05, "loss": 0.5919, "step": 7519 }, { "epoch": 0.19, "grad_norm": 1.7612581253051758, "learning_rate": 1.864290762293759e-05, "loss": 0.5889, "step": 7520 }, { "epoch": 0.19, "grad_norm": 1.143903374671936, "learning_rate": 1.8642490037503816e-05, "loss": 0.6038, "step": 7521 }, { "epoch": 0.19, "grad_norm": 1.4461172819137573, "learning_rate": 1.8642072392511435e-05, "loss": 0.4916, "step": 7522 }, { "epoch": 0.19, "grad_norm": 3.244994878768921, "learning_rate": 1.864165468796332e-05, "loss": 0.7402, "step": 7523 }, { "epoch": 0.19, "grad_norm": 1.8701999187469482, "learning_rate": 1.8641236923862348e-05, "loss": 0.6289, "step": 7524 }, { "epoch": 0.19, "grad_norm": 5.610455513000488, "learning_rate": 1.86408191002114e-05, "loss": 0.5703, "step": 7525 }, { "epoch": 0.19, "grad_norm": 3.21724271774292, "learning_rate": 1.8640401217013356e-05, "loss": 0.7495, "step": 7526 }, { "epoch": 0.19, "grad_norm": 2.3891701698303223, "learning_rate": 1.8639983274271096e-05, "loss": 0.9244, "step": 7527 }, { "epoch": 0.19, "grad_norm": 2.2279186248779297, "learning_rate": 1.8639565271987498e-05, "loss": 0.7009, "step": 7528 }, { "epoch": 0.19, "grad_norm": 1.5417741537094116, "learning_rate": 1.8639147210165447e-05, "loss": 0.7716, "step": 7529 }, { "epoch": 0.19, "grad_norm": 3.618762969970703, "learning_rate": 1.8638729088807817e-05, "loss": 0.7477, "step": 7530 }, { "epoch": 0.19, "grad_norm": 1.7656410932540894, "learning_rate": 1.8638310907917497e-05, "loss": 0.6102, "step": 7531 }, { "epoch": 0.19, "grad_norm": 4.286649703979492, "learning_rate": 1.8637892667497363e-05, "loss": 0.6936, "step": 7532 }, { "epoch": 0.19, "grad_norm": 1.362486720085144, "learning_rate": 1.86374743675503e-05, "loss": 0.6493, "step": 7533 }, { "epoch": 0.19, "grad_norm": 3.801704168319702, "learning_rate": 1.863705600807919e-05, "loss": 0.7356, "step": 7534 }, { "epoch": 0.19, "grad_norm": 1.7428938150405884, "learning_rate": 1.863663758908692e-05, "loss": 0.6842, "step": 7535 }, { "epoch": 0.19, "grad_norm": 3.2932581901550293, "learning_rate": 1.8636219110576365e-05, "loss": 0.5116, "step": 7536 }, { "epoch": 0.19, "grad_norm": 1.6010615825653076, "learning_rate": 1.8635800572550415e-05, "loss": 0.7494, "step": 7537 }, { "epoch": 0.19, "grad_norm": 1.2395648956298828, "learning_rate": 1.8635381975011954e-05, "loss": 0.5929, "step": 7538 }, { "epoch": 0.19, "grad_norm": 3.106560707092285, "learning_rate": 1.8634963317963865e-05, "loss": 0.4639, "step": 7539 }, { "epoch": 0.19, "grad_norm": 4.207903861999512, "learning_rate": 1.8634544601409035e-05, "loss": 0.566, "step": 7540 }, { "epoch": 0.19, "grad_norm": 1.2522512674331665, "learning_rate": 1.8634125825350347e-05, "loss": 0.6727, "step": 7541 }, { "epoch": 0.19, "grad_norm": 1.3768385648727417, "learning_rate": 1.8633706989790688e-05, "loss": 0.5818, "step": 7542 }, { "epoch": 0.19, "grad_norm": 2.114074468612671, "learning_rate": 1.8633288094732948e-05, "loss": 0.699, "step": 7543 }, { "epoch": 0.19, "grad_norm": 1.709625005722046, "learning_rate": 1.8632869140180007e-05, "loss": 0.5053, "step": 7544 }, { "epoch": 0.19, "grad_norm": 2.309117078781128, "learning_rate": 1.8632450126134754e-05, "loss": 0.6702, "step": 7545 }, { "epoch": 0.19, "grad_norm": 1.8607959747314453, "learning_rate": 1.8632031052600084e-05, "loss": 0.6305, "step": 7546 }, { "epoch": 0.19, "grad_norm": 3.5829246044158936, "learning_rate": 1.8631611919578873e-05, "loss": 0.6462, "step": 7547 }, { "epoch": 0.19, "grad_norm": 3.836595296859741, "learning_rate": 1.863119272707402e-05, "loss": 0.8848, "step": 7548 }, { "epoch": 0.19, "grad_norm": 1.9422389268875122, "learning_rate": 1.8630773475088407e-05, "loss": 0.6672, "step": 7549 }, { "epoch": 0.19, "grad_norm": 3.8049376010894775, "learning_rate": 1.8630354163624927e-05, "loss": 0.6296, "step": 7550 }, { "epoch": 0.19, "grad_norm": 3.316168785095215, "learning_rate": 1.8629934792686465e-05, "loss": 0.7551, "step": 7551 }, { "epoch": 0.19, "grad_norm": 4.142681121826172, "learning_rate": 1.862951536227592e-05, "loss": 0.7868, "step": 7552 }, { "epoch": 0.19, "grad_norm": 1.5243761539459229, "learning_rate": 1.8629095872396174e-05, "loss": 0.6242, "step": 7553 }, { "epoch": 0.19, "grad_norm": 1.7848091125488281, "learning_rate": 1.862867632305012e-05, "loss": 0.6548, "step": 7554 }, { "epoch": 0.19, "grad_norm": 4.637393474578857, "learning_rate": 1.862825671424065e-05, "loss": 0.8156, "step": 7555 }, { "epoch": 0.19, "grad_norm": 2.0899083614349365, "learning_rate": 1.8627837045970653e-05, "loss": 0.6441, "step": 7556 }, { "epoch": 0.19, "grad_norm": 1.043655514717102, "learning_rate": 1.8627417318243025e-05, "loss": 0.5116, "step": 7557 }, { "epoch": 0.19, "grad_norm": 2.929236650466919, "learning_rate": 1.862699753106066e-05, "loss": 0.8989, "step": 7558 }, { "epoch": 0.19, "grad_norm": 1.1722464561462402, "learning_rate": 1.8626577684426446e-05, "loss": 0.5443, "step": 7559 }, { "epoch": 0.19, "grad_norm": 2.84369158744812, "learning_rate": 1.862615777834328e-05, "loss": 0.6487, "step": 7560 }, { "epoch": 0.19, "grad_norm": 3.3830885887145996, "learning_rate": 1.862573781281405e-05, "loss": 0.7498, "step": 7561 }, { "epoch": 0.19, "grad_norm": 1.4107853174209595, "learning_rate": 1.862531778784166e-05, "loss": 0.6825, "step": 7562 }, { "epoch": 0.19, "grad_norm": 3.571927309036255, "learning_rate": 1.8624897703428994e-05, "loss": 0.5628, "step": 7563 }, { "epoch": 0.19, "grad_norm": 1.8233355283737183, "learning_rate": 1.8624477559578952e-05, "loss": 0.5463, "step": 7564 }, { "epoch": 0.19, "grad_norm": 1.5540529489517212, "learning_rate": 1.862405735629443e-05, "loss": 0.5496, "step": 7565 }, { "epoch": 0.19, "grad_norm": 2.3444790840148926, "learning_rate": 1.8623637093578323e-05, "loss": 0.6966, "step": 7566 }, { "epoch": 0.19, "grad_norm": 3.0119409561157227, "learning_rate": 1.8623216771433527e-05, "loss": 0.5842, "step": 7567 }, { "epoch": 0.19, "grad_norm": 1.2681182622909546, "learning_rate": 1.862279638986294e-05, "loss": 0.5754, "step": 7568 }, { "epoch": 0.19, "grad_norm": 2.4040493965148926, "learning_rate": 1.8622375948869457e-05, "loss": 0.4716, "step": 7569 }, { "epoch": 0.19, "grad_norm": 2.7913119792938232, "learning_rate": 1.8621955448455976e-05, "loss": 0.6228, "step": 7570 }, { "epoch": 0.19, "grad_norm": 2.0913333892822266, "learning_rate": 1.8621534888625393e-05, "loss": 0.6995, "step": 7571 }, { "epoch": 0.19, "grad_norm": 4.294766902923584, "learning_rate": 1.8621114269380613e-05, "loss": 0.8266, "step": 7572 }, { "epoch": 0.19, "grad_norm": 3.1539011001586914, "learning_rate": 1.8620693590724525e-05, "loss": 0.6279, "step": 7573 }, { "epoch": 0.19, "grad_norm": 1.3721294403076172, "learning_rate": 1.8620272852660033e-05, "loss": 0.507, "step": 7574 }, { "epoch": 0.19, "grad_norm": 1.3322066068649292, "learning_rate": 1.861985205519004e-05, "loss": 0.5432, "step": 7575 }, { "epoch": 0.19, "grad_norm": 5.977782249450684, "learning_rate": 1.8619431198317438e-05, "loss": 0.6266, "step": 7576 }, { "epoch": 0.19, "grad_norm": 3.1210527420043945, "learning_rate": 1.8619010282045132e-05, "loss": 0.7652, "step": 7577 }, { "epoch": 0.19, "grad_norm": 2.7745766639709473, "learning_rate": 1.8618589306376023e-05, "loss": 0.5877, "step": 7578 }, { "epoch": 0.19, "grad_norm": 3.7104570865631104, "learning_rate": 1.861816827131301e-05, "loss": 0.6939, "step": 7579 }, { "epoch": 0.19, "grad_norm": 4.8350629806518555, "learning_rate": 1.8617747176859004e-05, "loss": 0.7022, "step": 7580 }, { "epoch": 0.19, "grad_norm": 5.557226181030273, "learning_rate": 1.8617326023016887e-05, "loss": 0.6803, "step": 7581 }, { "epoch": 0.19, "grad_norm": 2.1510629653930664, "learning_rate": 1.8616904809789578e-05, "loss": 0.5919, "step": 7582 }, { "epoch": 0.19, "grad_norm": 2.7305853366851807, "learning_rate": 1.8616483537179974e-05, "loss": 0.704, "step": 7583 }, { "epoch": 0.19, "grad_norm": 2.4523251056671143, "learning_rate": 1.8616062205190983e-05, "loss": 0.811, "step": 7584 }, { "epoch": 0.19, "grad_norm": 1.9472075700759888, "learning_rate": 1.86156408138255e-05, "loss": 0.6176, "step": 7585 }, { "epoch": 0.19, "grad_norm": 4.742920398712158, "learning_rate": 1.8615219363086436e-05, "loss": 0.7718, "step": 7586 }, { "epoch": 0.19, "grad_norm": 2.3074963092803955, "learning_rate": 1.8614797852976688e-05, "loss": 0.6959, "step": 7587 }, { "epoch": 0.19, "grad_norm": 4.402862548828125, "learning_rate": 1.861437628349917e-05, "loss": 0.8291, "step": 7588 }, { "epoch": 0.19, "grad_norm": 1.6029070615768433, "learning_rate": 1.8613954654656782e-05, "loss": 0.6042, "step": 7589 }, { "epoch": 0.19, "grad_norm": 1.5863126516342163, "learning_rate": 1.8613532966452428e-05, "loss": 0.7287, "step": 7590 }, { "epoch": 0.19, "grad_norm": 2.7875401973724365, "learning_rate": 1.861311121888902e-05, "loss": 0.8617, "step": 7591 }, { "epoch": 0.19, "grad_norm": 5.386903762817383, "learning_rate": 1.861268941196946e-05, "loss": 0.5799, "step": 7592 }, { "epoch": 0.19, "grad_norm": 1.9871467351913452, "learning_rate": 1.861226754569665e-05, "loss": 0.7379, "step": 7593 }, { "epoch": 0.19, "grad_norm": 3.821908950805664, "learning_rate": 1.8611845620073508e-05, "loss": 0.6726, "step": 7594 }, { "epoch": 0.19, "grad_norm": 1.6643673181533813, "learning_rate": 1.8611423635102938e-05, "loss": 0.5881, "step": 7595 }, { "epoch": 0.19, "grad_norm": 3.718322277069092, "learning_rate": 1.8611001590787843e-05, "loss": 0.7464, "step": 7596 }, { "epoch": 0.19, "grad_norm": 2.059697389602661, "learning_rate": 1.8610579487131135e-05, "loss": 0.6814, "step": 7597 }, { "epoch": 0.19, "grad_norm": 4.959624767303467, "learning_rate": 1.8610157324135723e-05, "loss": 0.7739, "step": 7598 }, { "epoch": 0.19, "grad_norm": 1.8423467874526978, "learning_rate": 1.860973510180452e-05, "loss": 0.5203, "step": 7599 }, { "epoch": 0.19, "grad_norm": 2.2957868576049805, "learning_rate": 1.8609312820140428e-05, "loss": 0.7544, "step": 7600 }, { "epoch": 0.19, "grad_norm": 1.9332537651062012, "learning_rate": 1.8608890479146362e-05, "loss": 0.7061, "step": 7601 }, { "epoch": 0.19, "grad_norm": 4.365057945251465, "learning_rate": 1.8608468078825234e-05, "loss": 0.7337, "step": 7602 }, { "epoch": 0.19, "grad_norm": 2.5922176837921143, "learning_rate": 1.8608045619179948e-05, "loss": 0.6537, "step": 7603 }, { "epoch": 0.19, "grad_norm": 2.568535327911377, "learning_rate": 1.860762310021342e-05, "loss": 0.6167, "step": 7604 }, { "epoch": 0.19, "grad_norm": 2.974966049194336, "learning_rate": 1.8607200521928567e-05, "loss": 0.6366, "step": 7605 }, { "epoch": 0.19, "grad_norm": 1.7381340265274048, "learning_rate": 1.860677788432829e-05, "loss": 0.6455, "step": 7606 }, { "epoch": 0.19, "grad_norm": 2.547079563140869, "learning_rate": 1.8606355187415505e-05, "loss": 0.6609, "step": 7607 }, { "epoch": 0.19, "grad_norm": 2.3120551109313965, "learning_rate": 1.8605932431193135e-05, "loss": 0.5708, "step": 7608 }, { "epoch": 0.2, "grad_norm": 1.8323994874954224, "learning_rate": 1.860550961566408e-05, "loss": 0.6988, "step": 7609 }, { "epoch": 0.2, "grad_norm": 2.394041061401367, "learning_rate": 1.860508674083126e-05, "loss": 0.7264, "step": 7610 }, { "epoch": 0.2, "grad_norm": 3.0618650913238525, "learning_rate": 1.8604663806697588e-05, "loss": 0.6399, "step": 7611 }, { "epoch": 0.2, "grad_norm": 2.472430944442749, "learning_rate": 1.8604240813265982e-05, "loss": 0.6703, "step": 7612 }, { "epoch": 0.2, "grad_norm": 3.721266269683838, "learning_rate": 1.860381776053935e-05, "loss": 0.745, "step": 7613 }, { "epoch": 0.2, "grad_norm": 3.7671456336975098, "learning_rate": 1.8603394648520614e-05, "loss": 0.6723, "step": 7614 }, { "epoch": 0.2, "grad_norm": 1.3661909103393555, "learning_rate": 1.860297147721269e-05, "loss": 0.6346, "step": 7615 }, { "epoch": 0.2, "grad_norm": 2.001636028289795, "learning_rate": 1.8602548246618486e-05, "loss": 0.6783, "step": 7616 }, { "epoch": 0.2, "grad_norm": 2.0581512451171875, "learning_rate": 1.8602124956740924e-05, "loss": 0.6727, "step": 7617 }, { "epoch": 0.2, "grad_norm": 5.959755897521973, "learning_rate": 1.860170160758292e-05, "loss": 0.4881, "step": 7618 }, { "epoch": 0.2, "grad_norm": 3.433171272277832, "learning_rate": 1.86012781991474e-05, "loss": 0.6699, "step": 7619 }, { "epoch": 0.2, "grad_norm": 4.172399520874023, "learning_rate": 1.860085473143727e-05, "loss": 0.8339, "step": 7620 }, { "epoch": 0.2, "grad_norm": 1.575188398361206, "learning_rate": 1.8600431204455455e-05, "loss": 0.5858, "step": 7621 }, { "epoch": 0.2, "grad_norm": 2.3379688262939453, "learning_rate": 1.860000761820487e-05, "loss": 0.7419, "step": 7622 }, { "epoch": 0.2, "grad_norm": 3.3168439865112305, "learning_rate": 1.8599583972688436e-05, "loss": 0.7701, "step": 7623 }, { "epoch": 0.2, "grad_norm": 2.742971181869507, "learning_rate": 1.859916026790907e-05, "loss": 0.7365, "step": 7624 }, { "epoch": 0.2, "grad_norm": 4.110212326049805, "learning_rate": 1.8598736503869696e-05, "loss": 0.8339, "step": 7625 }, { "epoch": 0.2, "grad_norm": 2.8261263370513916, "learning_rate": 1.859831268057323e-05, "loss": 0.5578, "step": 7626 }, { "epoch": 0.2, "grad_norm": 1.8935831785202026, "learning_rate": 1.8597888798022595e-05, "loss": 0.4844, "step": 7627 }, { "epoch": 0.2, "grad_norm": 2.294311761856079, "learning_rate": 1.8597464856220718e-05, "loss": 0.6915, "step": 7628 }, { "epoch": 0.2, "grad_norm": 3.0664682388305664, "learning_rate": 1.859704085517051e-05, "loss": 0.738, "step": 7629 }, { "epoch": 0.2, "grad_norm": 2.757157564163208, "learning_rate": 1.85966167948749e-05, "loss": 0.7064, "step": 7630 }, { "epoch": 0.2, "grad_norm": 1.882514476776123, "learning_rate": 1.8596192675336805e-05, "loss": 0.6906, "step": 7631 }, { "epoch": 0.2, "grad_norm": 1.7417083978652954, "learning_rate": 1.859576849655915e-05, "loss": 0.5766, "step": 7632 }, { "epoch": 0.2, "grad_norm": 1.986027479171753, "learning_rate": 1.8595344258544862e-05, "loss": 0.7819, "step": 7633 }, { "epoch": 0.2, "grad_norm": 2.3375155925750732, "learning_rate": 1.8594919961296857e-05, "loss": 0.8125, "step": 7634 }, { "epoch": 0.2, "grad_norm": 1.682627558708191, "learning_rate": 1.859449560481807e-05, "loss": 0.683, "step": 7635 }, { "epoch": 0.2, "grad_norm": 2.0122945308685303, "learning_rate": 1.8594071189111414e-05, "loss": 0.705, "step": 7636 }, { "epoch": 0.2, "grad_norm": 2.1620736122131348, "learning_rate": 1.859364671417982e-05, "loss": 0.6602, "step": 7637 }, { "epoch": 0.2, "grad_norm": 2.3994009494781494, "learning_rate": 1.859322218002621e-05, "loss": 0.5738, "step": 7638 }, { "epoch": 0.2, "grad_norm": 9.06663703918457, "learning_rate": 1.8592797586653516e-05, "loss": 0.7043, "step": 7639 }, { "epoch": 0.2, "grad_norm": 1.9766647815704346, "learning_rate": 1.8592372934064656e-05, "loss": 0.6599, "step": 7640 }, { "epoch": 0.2, "grad_norm": 2.797567844390869, "learning_rate": 1.8591948222262562e-05, "loss": 0.6904, "step": 7641 }, { "epoch": 0.2, "grad_norm": 2.558986186981201, "learning_rate": 1.8591523451250158e-05, "loss": 0.5423, "step": 7642 }, { "epoch": 0.2, "grad_norm": 8.599542617797852, "learning_rate": 1.859109862103037e-05, "loss": 0.8521, "step": 7643 }, { "epoch": 0.2, "grad_norm": 1.4642622470855713, "learning_rate": 1.8590673731606132e-05, "loss": 0.6614, "step": 7644 }, { "epoch": 0.2, "grad_norm": 2.684248924255371, "learning_rate": 1.8590248782980368e-05, "loss": 0.8028, "step": 7645 }, { "epoch": 0.2, "grad_norm": 2.0841476917266846, "learning_rate": 1.8589823775156004e-05, "loss": 0.6573, "step": 7646 }, { "epoch": 0.2, "grad_norm": 2.0719048976898193, "learning_rate": 1.858939870813597e-05, "loss": 0.5958, "step": 7647 }, { "epoch": 0.2, "grad_norm": 4.555496692657471, "learning_rate": 1.85889735819232e-05, "loss": 0.5414, "step": 7648 }, { "epoch": 0.2, "grad_norm": 3.28816556930542, "learning_rate": 1.8588548396520617e-05, "loss": 0.7417, "step": 7649 }, { "epoch": 0.2, "grad_norm": 2.2116506099700928, "learning_rate": 1.8588123151931153e-05, "loss": 0.5857, "step": 7650 }, { "epoch": 0.2, "grad_norm": 2.54009747505188, "learning_rate": 1.8587697848157743e-05, "loss": 0.6536, "step": 7651 }, { "epoch": 0.2, "grad_norm": 5.553451061248779, "learning_rate": 1.8587272485203312e-05, "loss": 0.7054, "step": 7652 }, { "epoch": 0.2, "grad_norm": 1.992669701576233, "learning_rate": 1.85868470630708e-05, "loss": 0.6878, "step": 7653 }, { "epoch": 0.2, "grad_norm": 2.48675799369812, "learning_rate": 1.858642158176313e-05, "loss": 0.8219, "step": 7654 }, { "epoch": 0.2, "grad_norm": 2.1660494804382324, "learning_rate": 1.8585996041283234e-05, "loss": 0.6918, "step": 7655 }, { "epoch": 0.2, "grad_norm": 1.6726818084716797, "learning_rate": 1.858557044163405e-05, "loss": 0.6696, "step": 7656 }, { "epoch": 0.2, "grad_norm": 3.1130964756011963, "learning_rate": 1.858514478281851e-05, "loss": 0.571, "step": 7657 }, { "epoch": 0.2, "grad_norm": 2.2316370010375977, "learning_rate": 1.858471906483954e-05, "loss": 0.7028, "step": 7658 }, { "epoch": 0.2, "grad_norm": 3.610657215118408, "learning_rate": 1.8584293287700082e-05, "loss": 0.8266, "step": 7659 }, { "epoch": 0.2, "grad_norm": 1.605196475982666, "learning_rate": 1.8583867451403068e-05, "loss": 0.5822, "step": 7660 }, { "epoch": 0.2, "grad_norm": 3.0932815074920654, "learning_rate": 1.8583441555951435e-05, "loss": 0.5194, "step": 7661 }, { "epoch": 0.2, "grad_norm": 2.6909430027008057, "learning_rate": 1.8583015601348116e-05, "loss": 0.6088, "step": 7662 }, { "epoch": 0.2, "grad_norm": 2.7693376541137695, "learning_rate": 1.8582589587596043e-05, "loss": 0.719, "step": 7663 }, { "epoch": 0.2, "grad_norm": 13.764195442199707, "learning_rate": 1.8582163514698153e-05, "loss": 0.5696, "step": 7664 }, { "epoch": 0.2, "grad_norm": 2.7365832328796387, "learning_rate": 1.8581737382657386e-05, "loss": 0.7607, "step": 7665 }, { "epoch": 0.2, "grad_norm": 2.597712993621826, "learning_rate": 1.8581311191476678e-05, "loss": 0.5361, "step": 7666 }, { "epoch": 0.2, "grad_norm": 5.976609706878662, "learning_rate": 1.858088494115896e-05, "loss": 0.6147, "step": 7667 }, { "epoch": 0.2, "grad_norm": 1.553207516670227, "learning_rate": 1.858045863170718e-05, "loss": 0.58, "step": 7668 }, { "epoch": 0.2, "grad_norm": 2.3182356357574463, "learning_rate": 1.858003226312427e-05, "loss": 0.8476, "step": 7669 }, { "epoch": 0.2, "grad_norm": 6.593744277954102, "learning_rate": 1.8579605835413167e-05, "loss": 0.4559, "step": 7670 }, { "epoch": 0.2, "grad_norm": 1.9780852794647217, "learning_rate": 1.8579179348576808e-05, "loss": 0.6309, "step": 7671 }, { "epoch": 0.2, "grad_norm": 1.868240237236023, "learning_rate": 1.8578752802618135e-05, "loss": 0.5942, "step": 7672 }, { "epoch": 0.2, "grad_norm": 1.98539137840271, "learning_rate": 1.857832619754009e-05, "loss": 0.5672, "step": 7673 }, { "epoch": 0.2, "grad_norm": 1.7755833864212036, "learning_rate": 1.857789953334561e-05, "loss": 0.5757, "step": 7674 }, { "epoch": 0.2, "grad_norm": 1.599911093711853, "learning_rate": 1.8577472810037634e-05, "loss": 0.6346, "step": 7675 }, { "epoch": 0.2, "grad_norm": 2.3409628868103027, "learning_rate": 1.8577046027619103e-05, "loss": 0.601, "step": 7676 }, { "epoch": 0.2, "grad_norm": 4.949123382568359, "learning_rate": 1.8576619186092962e-05, "loss": 0.7486, "step": 7677 }, { "epoch": 0.2, "grad_norm": 2.494757652282715, "learning_rate": 1.857619228546215e-05, "loss": 0.7333, "step": 7678 }, { "epoch": 0.2, "grad_norm": 2.413163661956787, "learning_rate": 1.857576532572961e-05, "loss": 0.7301, "step": 7679 }, { "epoch": 0.2, "grad_norm": 9.263509750366211, "learning_rate": 1.8575338306898277e-05, "loss": 0.6398, "step": 7680 }, { "epoch": 0.2, "grad_norm": 3.53200626373291, "learning_rate": 1.8574911228971106e-05, "loss": 0.7229, "step": 7681 }, { "epoch": 0.2, "grad_norm": 4.608739376068115, "learning_rate": 1.857448409195103e-05, "loss": 0.7526, "step": 7682 }, { "epoch": 0.2, "grad_norm": 2.222576379776001, "learning_rate": 1.8574056895841e-05, "loss": 0.6871, "step": 7683 }, { "epoch": 0.2, "grad_norm": 4.823070049285889, "learning_rate": 1.8573629640643958e-05, "loss": 0.6765, "step": 7684 }, { "epoch": 0.2, "grad_norm": 7.651839733123779, "learning_rate": 1.8573202326362844e-05, "loss": 0.7315, "step": 7685 }, { "epoch": 0.2, "grad_norm": 1.7553387880325317, "learning_rate": 1.8572774953000606e-05, "loss": 0.5526, "step": 7686 }, { "epoch": 0.2, "grad_norm": 1.5565240383148193, "learning_rate": 1.857234752056019e-05, "loss": 0.6352, "step": 7687 }, { "epoch": 0.2, "grad_norm": 3.2733724117279053, "learning_rate": 1.857192002904454e-05, "loss": 0.724, "step": 7688 }, { "epoch": 0.2, "grad_norm": 3.596262216567993, "learning_rate": 1.85714924784566e-05, "loss": 0.7451, "step": 7689 }, { "epoch": 0.2, "grad_norm": 1.029693841934204, "learning_rate": 1.8571064868799323e-05, "loss": 0.4711, "step": 7690 }, { "epoch": 0.2, "grad_norm": 1.519773244857788, "learning_rate": 1.857063720007565e-05, "loss": 0.6128, "step": 7691 }, { "epoch": 0.2, "grad_norm": 2.551575183868408, "learning_rate": 1.857020947228853e-05, "loss": 0.8193, "step": 7692 }, { "epoch": 0.2, "grad_norm": 2.4350595474243164, "learning_rate": 1.8569781685440912e-05, "loss": 0.5753, "step": 7693 }, { "epoch": 0.2, "grad_norm": 8.380209922790527, "learning_rate": 1.856935383953574e-05, "loss": 0.6342, "step": 7694 }, { "epoch": 0.2, "grad_norm": 2.5720932483673096, "learning_rate": 1.8568925934575967e-05, "loss": 0.5572, "step": 7695 }, { "epoch": 0.2, "grad_norm": 1.3428319692611694, "learning_rate": 1.856849797056454e-05, "loss": 0.7424, "step": 7696 }, { "epoch": 0.2, "grad_norm": 4.054581165313721, "learning_rate": 1.856806994750441e-05, "loss": 0.7222, "step": 7697 }, { "epoch": 0.2, "grad_norm": 2.1777279376983643, "learning_rate": 1.8567641865398523e-05, "loss": 0.5937, "step": 7698 }, { "epoch": 0.2, "grad_norm": 2.317876100540161, "learning_rate": 1.856721372424983e-05, "loss": 0.658, "step": 7699 }, { "epoch": 0.2, "grad_norm": 3.7679786682128906, "learning_rate": 1.8566785524061282e-05, "loss": 0.8964, "step": 7700 }, { "epoch": 0.2, "grad_norm": 1.6451926231384277, "learning_rate": 1.8566357264835833e-05, "loss": 0.7522, "step": 7701 }, { "epoch": 0.2, "grad_norm": 2.1918039321899414, "learning_rate": 1.8565928946576426e-05, "loss": 0.5591, "step": 7702 }, { "epoch": 0.2, "grad_norm": 4.479883670806885, "learning_rate": 1.8565500569286026e-05, "loss": 0.6209, "step": 7703 }, { "epoch": 0.2, "grad_norm": 3.0243616104125977, "learning_rate": 1.8565072132967574e-05, "loss": 0.6418, "step": 7704 }, { "epoch": 0.2, "grad_norm": 2.378431797027588, "learning_rate": 1.8564643637624023e-05, "loss": 0.6449, "step": 7705 }, { "epoch": 0.2, "grad_norm": 3.1884512901306152, "learning_rate": 1.8564215083258336e-05, "loss": 0.5751, "step": 7706 }, { "epoch": 0.2, "grad_norm": 3.17800235748291, "learning_rate": 1.8563786469873454e-05, "loss": 0.8201, "step": 7707 }, { "epoch": 0.2, "grad_norm": 3.337146520614624, "learning_rate": 1.8563357797472336e-05, "loss": 0.6097, "step": 7708 }, { "epoch": 0.2, "grad_norm": 2.0967209339141846, "learning_rate": 1.856292906605794e-05, "loss": 0.6954, "step": 7709 }, { "epoch": 0.2, "grad_norm": 1.902197003364563, "learning_rate": 1.8562500275633212e-05, "loss": 0.5267, "step": 7710 }, { "epoch": 0.2, "grad_norm": 1.3017797470092773, "learning_rate": 1.8562071426201114e-05, "loss": 0.6394, "step": 7711 }, { "epoch": 0.2, "grad_norm": 4.2119059562683105, "learning_rate": 1.85616425177646e-05, "loss": 0.6092, "step": 7712 }, { "epoch": 0.2, "grad_norm": 2.816441774368286, "learning_rate": 1.856121355032662e-05, "loss": 0.6468, "step": 7713 }, { "epoch": 0.2, "grad_norm": 3.5764997005462646, "learning_rate": 1.8560784523890142e-05, "loss": 0.8071, "step": 7714 }, { "epoch": 0.2, "grad_norm": 4.007608413696289, "learning_rate": 1.8560355438458107e-05, "loss": 0.5357, "step": 7715 }, { "epoch": 0.2, "grad_norm": 2.6843159198760986, "learning_rate": 1.8559926294033487e-05, "loss": 0.6318, "step": 7716 }, { "epoch": 0.2, "grad_norm": 5.2849345207214355, "learning_rate": 1.855949709061923e-05, "loss": 0.7112, "step": 7717 }, { "epoch": 0.2, "grad_norm": 1.9368098974227905, "learning_rate": 1.85590678282183e-05, "loss": 0.6391, "step": 7718 }, { "epoch": 0.2, "grad_norm": 3.59625244140625, "learning_rate": 1.8558638506833646e-05, "loss": 0.688, "step": 7719 }, { "epoch": 0.2, "grad_norm": 1.8951469659805298, "learning_rate": 1.8558209126468235e-05, "loss": 0.5094, "step": 7720 }, { "epoch": 0.2, "grad_norm": 1.6731010675430298, "learning_rate": 1.8557779687125026e-05, "loss": 0.6991, "step": 7721 }, { "epoch": 0.2, "grad_norm": 1.5678298473358154, "learning_rate": 1.8557350188806974e-05, "loss": 0.5645, "step": 7722 }, { "epoch": 0.2, "grad_norm": 4.295379161834717, "learning_rate": 1.8556920631517042e-05, "loss": 0.6025, "step": 7723 }, { "epoch": 0.2, "grad_norm": 2.8966691493988037, "learning_rate": 1.8556491015258187e-05, "loss": 0.4845, "step": 7724 }, { "epoch": 0.2, "grad_norm": 1.834052324295044, "learning_rate": 1.8556061340033374e-05, "loss": 0.6928, "step": 7725 }, { "epoch": 0.2, "grad_norm": 2.2618017196655273, "learning_rate": 1.8555631605845557e-05, "loss": 0.5994, "step": 7726 }, { "epoch": 0.2, "grad_norm": 2.680736541748047, "learning_rate": 1.855520181269771e-05, "loss": 0.7311, "step": 7727 }, { "epoch": 0.2, "grad_norm": 2.301048994064331, "learning_rate": 1.855477196059278e-05, "loss": 0.5998, "step": 7728 }, { "epoch": 0.2, "grad_norm": 2.72625732421875, "learning_rate": 1.855434204953374e-05, "loss": 0.5441, "step": 7729 }, { "epoch": 0.2, "grad_norm": 1.7825263738632202, "learning_rate": 1.8553912079523547e-05, "loss": 0.6789, "step": 7730 }, { "epoch": 0.2, "grad_norm": 2.350198268890381, "learning_rate": 1.8553482050565165e-05, "loss": 0.7165, "step": 7731 }, { "epoch": 0.2, "grad_norm": 2.3202757835388184, "learning_rate": 1.855305196266156e-05, "loss": 0.5918, "step": 7732 }, { "epoch": 0.2, "grad_norm": 3.620058536529541, "learning_rate": 1.8552621815815697e-05, "loss": 0.575, "step": 7733 }, { "epoch": 0.2, "grad_norm": 1.6502838134765625, "learning_rate": 1.8552191610030534e-05, "loss": 0.5986, "step": 7734 }, { "epoch": 0.2, "grad_norm": 9.59402084350586, "learning_rate": 1.855176134530904e-05, "loss": 0.6241, "step": 7735 }, { "epoch": 0.2, "grad_norm": 2.4847829341888428, "learning_rate": 1.8551331021654178e-05, "loss": 0.5034, "step": 7736 }, { "epoch": 0.2, "grad_norm": 2.515289068222046, "learning_rate": 1.8550900639068922e-05, "loss": 0.7291, "step": 7737 }, { "epoch": 0.2, "grad_norm": 1.7034426927566528, "learning_rate": 1.8550470197556224e-05, "loss": 0.683, "step": 7738 }, { "epoch": 0.2, "grad_norm": 5.17063570022583, "learning_rate": 1.855003969711906e-05, "loss": 0.8439, "step": 7739 }, { "epoch": 0.2, "grad_norm": 2.610980749130249, "learning_rate": 1.8549609137760396e-05, "loss": 0.7253, "step": 7740 }, { "epoch": 0.2, "grad_norm": 2.3620827198028564, "learning_rate": 1.8549178519483194e-05, "loss": 0.7183, "step": 7741 }, { "epoch": 0.2, "grad_norm": 2.9403350353240967, "learning_rate": 1.8548747842290423e-05, "loss": 0.5939, "step": 7742 }, { "epoch": 0.2, "grad_norm": 2.0649707317352295, "learning_rate": 1.854831710618506e-05, "loss": 0.6833, "step": 7743 }, { "epoch": 0.2, "grad_norm": 1.3635133504867554, "learning_rate": 1.854788631117006e-05, "loss": 0.5653, "step": 7744 }, { "epoch": 0.2, "grad_norm": 16.1840763092041, "learning_rate": 1.85474554572484e-05, "loss": 0.7954, "step": 7745 }, { "epoch": 0.2, "grad_norm": 4.1624979972839355, "learning_rate": 1.8547024544423044e-05, "loss": 0.6155, "step": 7746 }, { "epoch": 0.2, "grad_norm": 1.5136876106262207, "learning_rate": 1.854659357269697e-05, "loss": 0.6874, "step": 7747 }, { "epoch": 0.2, "grad_norm": 3.0401761531829834, "learning_rate": 1.854616254207314e-05, "loss": 0.5895, "step": 7748 }, { "epoch": 0.2, "grad_norm": 3.542893171310425, "learning_rate": 1.8545731452554524e-05, "loss": 0.6113, "step": 7749 }, { "epoch": 0.2, "grad_norm": 2.302415609359741, "learning_rate": 1.8545300304144097e-05, "loss": 0.7288, "step": 7750 }, { "epoch": 0.2, "grad_norm": 2.914829969406128, "learning_rate": 1.8544869096844832e-05, "loss": 0.8145, "step": 7751 }, { "epoch": 0.2, "grad_norm": 2.8972575664520264, "learning_rate": 1.8544437830659698e-05, "loss": 0.6292, "step": 7752 }, { "epoch": 0.2, "grad_norm": 1.4331234693527222, "learning_rate": 1.8544006505591663e-05, "loss": 0.7368, "step": 7753 }, { "epoch": 0.2, "grad_norm": 1.693312168121338, "learning_rate": 1.8543575121643704e-05, "loss": 0.6512, "step": 7754 }, { "epoch": 0.2, "grad_norm": 6.661121368408203, "learning_rate": 1.8543143678818797e-05, "loss": 0.7812, "step": 7755 }, { "epoch": 0.2, "grad_norm": 1.223249912261963, "learning_rate": 1.8542712177119906e-05, "loss": 0.7145, "step": 7756 }, { "epoch": 0.2, "grad_norm": 3.0415713787078857, "learning_rate": 1.8542280616550013e-05, "loss": 0.5767, "step": 7757 }, { "epoch": 0.2, "grad_norm": 2.580721855163574, "learning_rate": 1.8541848997112085e-05, "loss": 0.6399, "step": 7758 }, { "epoch": 0.2, "grad_norm": 5.401332855224609, "learning_rate": 1.85414173188091e-05, "loss": 0.8512, "step": 7759 }, { "epoch": 0.2, "grad_norm": 2.849465847015381, "learning_rate": 1.8540985581644037e-05, "loss": 0.6458, "step": 7760 }, { "epoch": 0.2, "grad_norm": 1.78216552734375, "learning_rate": 1.8540553785619865e-05, "loss": 0.7172, "step": 7761 }, { "epoch": 0.2, "grad_norm": 2.411984443664551, "learning_rate": 1.8540121930739565e-05, "loss": 0.504, "step": 7762 }, { "epoch": 0.2, "grad_norm": 2.8923234939575195, "learning_rate": 1.8539690017006108e-05, "loss": 0.8023, "step": 7763 }, { "epoch": 0.2, "grad_norm": 2.8418736457824707, "learning_rate": 1.8539258044422472e-05, "loss": 0.6533, "step": 7764 }, { "epoch": 0.2, "grad_norm": 2.299938917160034, "learning_rate": 1.8538826012991636e-05, "loss": 0.6909, "step": 7765 }, { "epoch": 0.2, "grad_norm": 3.6437559127807617, "learning_rate": 1.8538393922716574e-05, "loss": 0.7404, "step": 7766 }, { "epoch": 0.2, "grad_norm": 1.6601388454437256, "learning_rate": 1.8537961773600267e-05, "loss": 0.5775, "step": 7767 }, { "epoch": 0.2, "grad_norm": 2.745185375213623, "learning_rate": 1.8537529565645688e-05, "loss": 0.5504, "step": 7768 }, { "epoch": 0.2, "grad_norm": 2.5325679779052734, "learning_rate": 1.853709729885582e-05, "loss": 0.5628, "step": 7769 }, { "epoch": 0.2, "grad_norm": 3.8845889568328857, "learning_rate": 1.8536664973233646e-05, "loss": 0.6376, "step": 7770 }, { "epoch": 0.2, "grad_norm": 1.8225784301757812, "learning_rate": 1.8536232588782134e-05, "loss": 0.6617, "step": 7771 }, { "epoch": 0.2, "grad_norm": 2.4923853874206543, "learning_rate": 1.8535800145504278e-05, "loss": 0.6214, "step": 7772 }, { "epoch": 0.2, "grad_norm": 2.2686715126037598, "learning_rate": 1.853536764340304e-05, "loss": 0.6445, "step": 7773 }, { "epoch": 0.2, "grad_norm": 1.9784386157989502, "learning_rate": 1.853493508248142e-05, "loss": 0.6046, "step": 7774 }, { "epoch": 0.2, "grad_norm": 2.320573568344116, "learning_rate": 1.8534502462742382e-05, "loss": 0.6646, "step": 7775 }, { "epoch": 0.2, "grad_norm": 1.3710713386535645, "learning_rate": 1.8534069784188918e-05, "loss": 0.6001, "step": 7776 }, { "epoch": 0.2, "grad_norm": 2.7873544692993164, "learning_rate": 1.8533637046824008e-05, "loss": 1.0301, "step": 7777 }, { "epoch": 0.2, "grad_norm": 2.004909038543701, "learning_rate": 1.853320425065063e-05, "loss": 0.683, "step": 7778 }, { "epoch": 0.2, "grad_norm": 2.5980892181396484, "learning_rate": 1.8532771395671774e-05, "loss": 0.6417, "step": 7779 }, { "epoch": 0.2, "grad_norm": 4.942553520202637, "learning_rate": 1.8532338481890413e-05, "loss": 0.7115, "step": 7780 }, { "epoch": 0.2, "grad_norm": 7.884330749511719, "learning_rate": 1.853190550930954e-05, "loss": 0.638, "step": 7781 }, { "epoch": 0.2, "grad_norm": 3.3296196460723877, "learning_rate": 1.853147247793213e-05, "loss": 0.5593, "step": 7782 }, { "epoch": 0.2, "grad_norm": 4.272146224975586, "learning_rate": 1.8531039387761176e-05, "loss": 0.6665, "step": 7783 }, { "epoch": 0.2, "grad_norm": 1.5220311880111694, "learning_rate": 1.8530606238799657e-05, "loss": 0.7994, "step": 7784 }, { "epoch": 0.2, "grad_norm": 2.125854253768921, "learning_rate": 1.8530173031050558e-05, "loss": 0.6748, "step": 7785 }, { "epoch": 0.2, "grad_norm": 2.074005603790283, "learning_rate": 1.8529739764516868e-05, "loss": 0.6245, "step": 7786 }, { "epoch": 0.2, "grad_norm": 2.362959623336792, "learning_rate": 1.8529306439201568e-05, "loss": 0.8156, "step": 7787 }, { "epoch": 0.2, "grad_norm": 3.511392831802368, "learning_rate": 1.852887305510765e-05, "loss": 0.6769, "step": 7788 }, { "epoch": 0.2, "grad_norm": 2.806205987930298, "learning_rate": 1.852843961223809e-05, "loss": 0.6807, "step": 7789 }, { "epoch": 0.2, "grad_norm": 2.0949015617370605, "learning_rate": 1.852800611059589e-05, "loss": 0.8133, "step": 7790 }, { "epoch": 0.2, "grad_norm": 2.0716638565063477, "learning_rate": 1.8527572550184026e-05, "loss": 0.3904, "step": 7791 }, { "epoch": 0.2, "grad_norm": 1.4998642206192017, "learning_rate": 1.8527138931005488e-05, "loss": 0.6929, "step": 7792 }, { "epoch": 0.2, "grad_norm": 1.4729233980178833, "learning_rate": 1.8526705253063267e-05, "loss": 0.4569, "step": 7793 }, { "epoch": 0.2, "grad_norm": 2.2003166675567627, "learning_rate": 1.8526271516360354e-05, "loss": 0.6637, "step": 7794 }, { "epoch": 0.2, "grad_norm": 1.2972630262374878, "learning_rate": 1.8525837720899732e-05, "loss": 0.7881, "step": 7795 }, { "epoch": 0.2, "grad_norm": 1.863594651222229, "learning_rate": 1.852540386668439e-05, "loss": 0.594, "step": 7796 }, { "epoch": 0.2, "grad_norm": 2.194869041442871, "learning_rate": 1.8524969953717328e-05, "loss": 0.7195, "step": 7797 }, { "epoch": 0.2, "grad_norm": 2.3807356357574463, "learning_rate": 1.8524535982001523e-05, "loss": 0.7242, "step": 7798 }, { "epoch": 0.2, "grad_norm": 3.4148623943328857, "learning_rate": 1.852410195153997e-05, "loss": 0.7237, "step": 7799 }, { "epoch": 0.2, "grad_norm": 17.208959579467773, "learning_rate": 1.8523667862335668e-05, "loss": 0.6823, "step": 7800 }, { "epoch": 0.2, "grad_norm": 3.5263619422912598, "learning_rate": 1.85232337143916e-05, "loss": 0.5223, "step": 7801 }, { "epoch": 0.2, "grad_norm": 4.491160869598389, "learning_rate": 1.8522799507710758e-05, "loss": 0.7627, "step": 7802 }, { "epoch": 0.2, "grad_norm": 4.460641384124756, "learning_rate": 1.8522365242296138e-05, "loss": 0.6494, "step": 7803 }, { "epoch": 0.2, "grad_norm": 2.312826156616211, "learning_rate": 1.852193091815073e-05, "loss": 0.8308, "step": 7804 }, { "epoch": 0.2, "grad_norm": 5.352192401885986, "learning_rate": 1.852149653527753e-05, "loss": 0.7524, "step": 7805 }, { "epoch": 0.2, "grad_norm": 4.467291355133057, "learning_rate": 1.8521062093679524e-05, "loss": 0.7601, "step": 7806 }, { "epoch": 0.2, "grad_norm": 2.873903274536133, "learning_rate": 1.8520627593359716e-05, "loss": 0.6049, "step": 7807 }, { "epoch": 0.2, "grad_norm": 1.5979887247085571, "learning_rate": 1.8520193034321098e-05, "loss": 0.6591, "step": 7808 }, { "epoch": 0.2, "grad_norm": 4.250797748565674, "learning_rate": 1.851975841656666e-05, "loss": 0.7579, "step": 7809 }, { "epoch": 0.2, "grad_norm": 2.6596145629882812, "learning_rate": 1.85193237400994e-05, "loss": 0.5501, "step": 7810 }, { "epoch": 0.2, "grad_norm": 3.106509208679199, "learning_rate": 1.8518889004922315e-05, "loss": 0.5857, "step": 7811 }, { "epoch": 0.2, "grad_norm": 2.3112549781799316, "learning_rate": 1.8518454211038394e-05, "loss": 0.5544, "step": 7812 }, { "epoch": 0.2, "grad_norm": 3.368283748626709, "learning_rate": 1.851801935845064e-05, "loss": 0.485, "step": 7813 }, { "epoch": 0.2, "grad_norm": 2.534905433654785, "learning_rate": 1.8517584447162052e-05, "loss": 0.6995, "step": 7814 }, { "epoch": 0.2, "grad_norm": 3.3332934379577637, "learning_rate": 1.851714947717562e-05, "loss": 0.6503, "step": 7815 }, { "epoch": 0.2, "grad_norm": 1.8837467432022095, "learning_rate": 1.8516714448494344e-05, "loss": 0.5353, "step": 7816 }, { "epoch": 0.2, "grad_norm": 1.8742363452911377, "learning_rate": 1.8516279361121225e-05, "loss": 0.5845, "step": 7817 }, { "epoch": 0.2, "grad_norm": 2.6529018878936768, "learning_rate": 1.8515844215059257e-05, "loss": 0.7678, "step": 7818 }, { "epoch": 0.2, "grad_norm": 8.491800308227539, "learning_rate": 1.8515409010311444e-05, "loss": 0.855, "step": 7819 }, { "epoch": 0.2, "grad_norm": 1.7188284397125244, "learning_rate": 1.851497374688078e-05, "loss": 0.6432, "step": 7820 }, { "epoch": 0.2, "grad_norm": 2.267421245574951, "learning_rate": 1.8514538424770266e-05, "loss": 0.7869, "step": 7821 }, { "epoch": 0.2, "grad_norm": 1.124752163887024, "learning_rate": 1.8514103043982902e-05, "loss": 0.7631, "step": 7822 }, { "epoch": 0.2, "grad_norm": 2.4744672775268555, "learning_rate": 1.8513667604521694e-05, "loss": 0.5937, "step": 7823 }, { "epoch": 0.2, "grad_norm": 1.8486905097961426, "learning_rate": 1.851323210638963e-05, "loss": 0.67, "step": 7824 }, { "epoch": 0.2, "grad_norm": 1.119856357574463, "learning_rate": 1.8512796549589724e-05, "loss": 0.5712, "step": 7825 }, { "epoch": 0.2, "grad_norm": 1.5012824535369873, "learning_rate": 1.851236093412497e-05, "loss": 0.5288, "step": 7826 }, { "epoch": 0.2, "grad_norm": 3.1333792209625244, "learning_rate": 1.8511925259998376e-05, "loss": 0.6865, "step": 7827 }, { "epoch": 0.2, "grad_norm": 2.357808828353882, "learning_rate": 1.8511489527212938e-05, "loss": 0.5415, "step": 7828 }, { "epoch": 0.2, "grad_norm": 5.327604293823242, "learning_rate": 1.8511053735771664e-05, "loss": 0.6302, "step": 7829 }, { "epoch": 0.2, "grad_norm": 1.9096051454544067, "learning_rate": 1.851061788567755e-05, "loss": 0.6612, "step": 7830 }, { "epoch": 0.2, "grad_norm": 2.1641554832458496, "learning_rate": 1.8510181976933607e-05, "loss": 0.6142, "step": 7831 }, { "epoch": 0.2, "grad_norm": 1.4593632221221924, "learning_rate": 1.850974600954284e-05, "loss": 0.5176, "step": 7832 }, { "epoch": 0.2, "grad_norm": 4.386693477630615, "learning_rate": 1.8509309983508245e-05, "loss": 0.6881, "step": 7833 }, { "epoch": 0.2, "grad_norm": 6.280194282531738, "learning_rate": 1.850887389883283e-05, "loss": 0.633, "step": 7834 }, { "epoch": 0.2, "grad_norm": 1.9774296283721924, "learning_rate": 1.8508437755519608e-05, "loss": 0.6414, "step": 7835 }, { "epoch": 0.2, "grad_norm": 1.848745346069336, "learning_rate": 1.8508001553571575e-05, "loss": 0.6276, "step": 7836 }, { "epoch": 0.2, "grad_norm": 2.3577539920806885, "learning_rate": 1.8507565292991742e-05, "loss": 0.6148, "step": 7837 }, { "epoch": 0.2, "grad_norm": 4.732737064361572, "learning_rate": 1.850712897378311e-05, "loss": 0.4838, "step": 7838 }, { "epoch": 0.2, "grad_norm": 7.339036464691162, "learning_rate": 1.8506692595948697e-05, "loss": 0.7304, "step": 7839 }, { "epoch": 0.2, "grad_norm": 3.0040361881256104, "learning_rate": 1.8506256159491495e-05, "loss": 0.6337, "step": 7840 }, { "epoch": 0.2, "grad_norm": 2.123551845550537, "learning_rate": 1.8505819664414524e-05, "loss": 0.6044, "step": 7841 }, { "epoch": 0.2, "grad_norm": 3.0658886432647705, "learning_rate": 1.850538311072079e-05, "loss": 0.5455, "step": 7842 }, { "epoch": 0.2, "grad_norm": 1.9009768962860107, "learning_rate": 1.8504946498413297e-05, "loss": 0.6439, "step": 7843 }, { "epoch": 0.2, "grad_norm": 2.1886675357818604, "learning_rate": 1.8504509827495055e-05, "loss": 0.5757, "step": 7844 }, { "epoch": 0.2, "grad_norm": 2.098512649536133, "learning_rate": 1.8504073097969075e-05, "loss": 0.6972, "step": 7845 }, { "epoch": 0.2, "grad_norm": 1.9533342123031616, "learning_rate": 1.8503636309838366e-05, "loss": 0.664, "step": 7846 }, { "epoch": 0.2, "grad_norm": 1.8628342151641846, "learning_rate": 1.850319946310594e-05, "loss": 0.6622, "step": 7847 }, { "epoch": 0.2, "grad_norm": 0.9451366662979126, "learning_rate": 1.8502762557774802e-05, "loss": 0.5916, "step": 7848 }, { "epoch": 0.2, "grad_norm": 1.9809398651123047, "learning_rate": 1.8502325593847968e-05, "loss": 0.634, "step": 7849 }, { "epoch": 0.2, "grad_norm": 2.1018741130828857, "learning_rate": 1.8501888571328446e-05, "loss": 0.5873, "step": 7850 }, { "epoch": 0.2, "grad_norm": 4.005948543548584, "learning_rate": 1.8501451490219253e-05, "loss": 0.6479, "step": 7851 }, { "epoch": 0.2, "grad_norm": 2.493964672088623, "learning_rate": 1.8501014350523395e-05, "loss": 0.5398, "step": 7852 }, { "epoch": 0.2, "grad_norm": 2.83648681640625, "learning_rate": 1.8500577152243887e-05, "loss": 0.6909, "step": 7853 }, { "epoch": 0.2, "grad_norm": 2.9586005210876465, "learning_rate": 1.8500139895383742e-05, "loss": 0.7052, "step": 7854 }, { "epoch": 0.2, "grad_norm": 2.9476499557495117, "learning_rate": 1.8499702579945973e-05, "loss": 0.5628, "step": 7855 }, { "epoch": 0.2, "grad_norm": 2.580373525619507, "learning_rate": 1.8499265205933596e-05, "loss": 0.5358, "step": 7856 }, { "epoch": 0.2, "grad_norm": 3.350524663925171, "learning_rate": 1.8498827773349623e-05, "loss": 0.6735, "step": 7857 }, { "epoch": 0.2, "grad_norm": 1.9423260688781738, "learning_rate": 1.8498390282197065e-05, "loss": 0.8102, "step": 7858 }, { "epoch": 0.2, "grad_norm": 1.989113688468933, "learning_rate": 1.849795273247894e-05, "loss": 0.6726, "step": 7859 }, { "epoch": 0.2, "grad_norm": 4.0338053703308105, "learning_rate": 1.8497515124198267e-05, "loss": 0.4843, "step": 7860 }, { "epoch": 0.2, "grad_norm": 3.1751248836517334, "learning_rate": 1.8497077457358055e-05, "loss": 0.5991, "step": 7861 }, { "epoch": 0.2, "grad_norm": 1.6946134567260742, "learning_rate": 1.8496639731961328e-05, "loss": 0.595, "step": 7862 }, { "epoch": 0.2, "grad_norm": 2.408334732055664, "learning_rate": 1.84962019480111e-05, "loss": 0.6875, "step": 7863 }, { "epoch": 0.2, "grad_norm": 1.9724786281585693, "learning_rate": 1.849576410551038e-05, "loss": 0.6291, "step": 7864 }, { "epoch": 0.2, "grad_norm": 2.445969343185425, "learning_rate": 1.8495326204462193e-05, "loss": 0.6922, "step": 7865 }, { "epoch": 0.2, "grad_norm": 1.4875818490982056, "learning_rate": 1.8494888244869553e-05, "loss": 0.6442, "step": 7866 }, { "epoch": 0.2, "grad_norm": 5.527311325073242, "learning_rate": 1.8494450226735484e-05, "loss": 0.6428, "step": 7867 }, { "epoch": 0.2, "grad_norm": 4.444491863250732, "learning_rate": 1.8494012150063e-05, "loss": 0.5912, "step": 7868 }, { "epoch": 0.2, "grad_norm": 3.1221706867218018, "learning_rate": 1.849357401485512e-05, "loss": 0.4981, "step": 7869 }, { "epoch": 0.2, "grad_norm": 3.9974374771118164, "learning_rate": 1.8493135821114864e-05, "loss": 0.583, "step": 7870 }, { "epoch": 0.2, "grad_norm": 1.2845443487167358, "learning_rate": 1.8492697568845252e-05, "loss": 0.6054, "step": 7871 }, { "epoch": 0.2, "grad_norm": 1.8520208597183228, "learning_rate": 1.8492259258049303e-05, "loss": 0.6732, "step": 7872 }, { "epoch": 0.2, "grad_norm": 2.1569063663482666, "learning_rate": 1.849182088873004e-05, "loss": 0.6602, "step": 7873 }, { "epoch": 0.2, "grad_norm": 3.1360700130462646, "learning_rate": 1.8491382460890482e-05, "loss": 0.5942, "step": 7874 }, { "epoch": 0.2, "grad_norm": 2.956223726272583, "learning_rate": 1.8490943974533653e-05, "loss": 0.8492, "step": 7875 }, { "epoch": 0.2, "grad_norm": 3.310903549194336, "learning_rate": 1.849050542966257e-05, "loss": 0.6225, "step": 7876 }, { "epoch": 0.2, "grad_norm": 3.9783012866973877, "learning_rate": 1.849006682628026e-05, "loss": 0.5598, "step": 7877 }, { "epoch": 0.2, "grad_norm": 2.331946849822998, "learning_rate": 1.8489628164389742e-05, "loss": 0.5973, "step": 7878 }, { "epoch": 0.2, "grad_norm": 2.4128665924072266, "learning_rate": 1.848918944399404e-05, "loss": 0.5768, "step": 7879 }, { "epoch": 0.2, "grad_norm": 6.330771446228027, "learning_rate": 1.8488750665096177e-05, "loss": 0.8835, "step": 7880 }, { "epoch": 0.2, "grad_norm": 1.1569420099258423, "learning_rate": 1.848831182769918e-05, "loss": 0.5779, "step": 7881 }, { "epoch": 0.2, "grad_norm": 2.457939624786377, "learning_rate": 1.8487872931806068e-05, "loss": 0.7468, "step": 7882 }, { "epoch": 0.2, "grad_norm": 4.655061721801758, "learning_rate": 1.848743397741987e-05, "loss": 0.7362, "step": 7883 }, { "epoch": 0.2, "grad_norm": 1.7381962537765503, "learning_rate": 1.8486994964543608e-05, "loss": 0.7827, "step": 7884 }, { "epoch": 0.2, "grad_norm": 2.841024398803711, "learning_rate": 1.8486555893180312e-05, "loss": 0.6617, "step": 7885 }, { "epoch": 0.2, "grad_norm": 2.354830741882324, "learning_rate": 1.8486116763333003e-05, "loss": 0.7076, "step": 7886 }, { "epoch": 0.2, "grad_norm": 2.4994778633117676, "learning_rate": 1.8485677575004707e-05, "loss": 0.6812, "step": 7887 }, { "epoch": 0.2, "grad_norm": 3.94828462600708, "learning_rate": 1.8485238328198454e-05, "loss": 0.7498, "step": 7888 }, { "epoch": 0.2, "grad_norm": 5.425349712371826, "learning_rate": 1.848479902291727e-05, "loss": 0.6763, "step": 7889 }, { "epoch": 0.2, "grad_norm": 4.289998531341553, "learning_rate": 1.848435965916418e-05, "loss": 0.5026, "step": 7890 }, { "epoch": 0.2, "grad_norm": 1.5562963485717773, "learning_rate": 1.8483920236942216e-05, "loss": 0.5588, "step": 7891 }, { "epoch": 0.2, "grad_norm": 3.1967649459838867, "learning_rate": 1.84834807562544e-05, "loss": 0.718, "step": 7892 }, { "epoch": 0.2, "grad_norm": 5.445417881011963, "learning_rate": 1.8483041217103768e-05, "loss": 0.7352, "step": 7893 }, { "epoch": 0.2, "grad_norm": 1.779735803604126, "learning_rate": 1.8482601619493342e-05, "loss": 0.8092, "step": 7894 }, { "epoch": 0.2, "grad_norm": 2.153996229171753, "learning_rate": 1.8482161963426158e-05, "loss": 0.7171, "step": 7895 }, { "epoch": 0.2, "grad_norm": 6.568982124328613, "learning_rate": 1.848172224890524e-05, "loss": 0.6211, "step": 7896 }, { "epoch": 0.2, "grad_norm": 2.987053632736206, "learning_rate": 1.8481282475933623e-05, "loss": 0.5525, "step": 7897 }, { "epoch": 0.2, "grad_norm": 2.136659622192383, "learning_rate": 1.8480842644514338e-05, "loss": 0.5786, "step": 7898 }, { "epoch": 0.2, "grad_norm": 3.6523325443267822, "learning_rate": 1.848040275465041e-05, "loss": 0.7312, "step": 7899 }, { "epoch": 0.2, "grad_norm": 2.060258626937866, "learning_rate": 1.8479962806344875e-05, "loss": 0.6357, "step": 7900 }, { "epoch": 0.2, "grad_norm": 0.9365465044975281, "learning_rate": 1.8479522799600764e-05, "loss": 0.5449, "step": 7901 }, { "epoch": 0.2, "grad_norm": 2.7179789543151855, "learning_rate": 1.847908273442111e-05, "loss": 0.682, "step": 7902 }, { "epoch": 0.2, "grad_norm": 2.26103138923645, "learning_rate": 1.8478642610808944e-05, "loss": 0.7524, "step": 7903 }, { "epoch": 0.2, "grad_norm": 1.9565366506576538, "learning_rate": 1.8478202428767303e-05, "loss": 0.6712, "step": 7904 }, { "epoch": 0.2, "grad_norm": 3.6805288791656494, "learning_rate": 1.8477762188299216e-05, "loss": 0.5732, "step": 7905 }, { "epoch": 0.2, "grad_norm": 3.256537914276123, "learning_rate": 1.8477321889407713e-05, "loss": 0.8096, "step": 7906 }, { "epoch": 0.2, "grad_norm": 3.050623893737793, "learning_rate": 1.8476881532095842e-05, "loss": 0.5898, "step": 7907 }, { "epoch": 0.2, "grad_norm": 2.761159896850586, "learning_rate": 1.847644111636662e-05, "loss": 0.5967, "step": 7908 }, { "epoch": 0.2, "grad_norm": 1.4796723127365112, "learning_rate": 1.84760006422231e-05, "loss": 0.549, "step": 7909 }, { "epoch": 0.2, "grad_norm": 1.9524253606796265, "learning_rate": 1.8475560109668304e-05, "loss": 0.6401, "step": 7910 }, { "epoch": 0.2, "grad_norm": 1.4946715831756592, "learning_rate": 1.8475119518705275e-05, "loss": 0.5565, "step": 7911 }, { "epoch": 0.2, "grad_norm": 2.782512903213501, "learning_rate": 1.847467886933704e-05, "loss": 0.5045, "step": 7912 }, { "epoch": 0.2, "grad_norm": 1.7283262014389038, "learning_rate": 1.847423816156665e-05, "loss": 0.6193, "step": 7913 }, { "epoch": 0.2, "grad_norm": 3.051145553588867, "learning_rate": 1.8473797395397132e-05, "loss": 0.5924, "step": 7914 }, { "epoch": 0.2, "grad_norm": 4.266486644744873, "learning_rate": 1.8473356570831527e-05, "loss": 0.8775, "step": 7915 }, { "epoch": 0.2, "grad_norm": 2.9642622470855713, "learning_rate": 1.847291568787287e-05, "loss": 0.8491, "step": 7916 }, { "epoch": 0.2, "grad_norm": 1.9615291357040405, "learning_rate": 1.8472474746524206e-05, "loss": 0.5751, "step": 7917 }, { "epoch": 0.2, "grad_norm": 2.5967609882354736, "learning_rate": 1.8472033746788565e-05, "loss": 0.6188, "step": 7918 }, { "epoch": 0.2, "grad_norm": 2.544330596923828, "learning_rate": 1.847159268866899e-05, "loss": 0.6384, "step": 7919 }, { "epoch": 0.2, "grad_norm": 3.261744976043701, "learning_rate": 1.8471151572168523e-05, "loss": 0.5674, "step": 7920 }, { "epoch": 0.2, "grad_norm": 3.2025814056396484, "learning_rate": 1.8470710397290196e-05, "loss": 0.644, "step": 7921 }, { "epoch": 0.2, "grad_norm": 4.7180256843566895, "learning_rate": 1.847026916403706e-05, "loss": 0.7307, "step": 7922 }, { "epoch": 0.2, "grad_norm": 3.6401827335357666, "learning_rate": 1.846982787241215e-05, "loss": 0.7317, "step": 7923 }, { "epoch": 0.2, "grad_norm": 2.83547043800354, "learning_rate": 1.8469386522418505e-05, "loss": 0.4382, "step": 7924 }, { "epoch": 0.2, "grad_norm": 1.7994462251663208, "learning_rate": 1.846894511405917e-05, "loss": 0.5792, "step": 7925 }, { "epoch": 0.2, "grad_norm": 1.575857400894165, "learning_rate": 1.8468503647337187e-05, "loss": 0.6843, "step": 7926 }, { "epoch": 0.2, "grad_norm": 2.2008416652679443, "learning_rate": 1.84680621222556e-05, "loss": 0.6119, "step": 7927 }, { "epoch": 0.2, "grad_norm": 3.116795063018799, "learning_rate": 1.8467620538817445e-05, "loss": 0.6672, "step": 7928 }, { "epoch": 0.2, "grad_norm": 1.647064208984375, "learning_rate": 1.846717889702577e-05, "loss": 0.6955, "step": 7929 }, { "epoch": 0.2, "grad_norm": 2.780143976211548, "learning_rate": 1.8466737196883615e-05, "loss": 0.7383, "step": 7930 }, { "epoch": 0.2, "grad_norm": 6.822174549102783, "learning_rate": 1.846629543839403e-05, "loss": 0.8393, "step": 7931 }, { "epoch": 0.2, "grad_norm": 3.1203203201293945, "learning_rate": 1.8465853621560055e-05, "loss": 0.5951, "step": 7932 }, { "epoch": 0.2, "grad_norm": 1.9637084007263184, "learning_rate": 1.846541174638474e-05, "loss": 0.892, "step": 7933 }, { "epoch": 0.2, "grad_norm": 2.5893664360046387, "learning_rate": 1.8464969812871118e-05, "loss": 0.6824, "step": 7934 }, { "epoch": 0.2, "grad_norm": 2.6242198944091797, "learning_rate": 1.8464527821022246e-05, "loss": 0.4727, "step": 7935 }, { "epoch": 0.2, "grad_norm": 8.415660858154297, "learning_rate": 1.8464085770841165e-05, "loss": 0.9661, "step": 7936 }, { "epoch": 0.2, "grad_norm": 5.652241230010986, "learning_rate": 1.8463643662330926e-05, "loss": 0.6099, "step": 7937 }, { "epoch": 0.2, "grad_norm": 2.5390074253082275, "learning_rate": 1.8463201495494568e-05, "loss": 0.499, "step": 7938 }, { "epoch": 0.2, "grad_norm": 4.134559154510498, "learning_rate": 1.8462759270335146e-05, "loss": 0.6773, "step": 7939 }, { "epoch": 0.2, "grad_norm": 2.3443551063537598, "learning_rate": 1.8462316986855703e-05, "loss": 0.5281, "step": 7940 }, { "epoch": 0.2, "grad_norm": 1.5117392539978027, "learning_rate": 1.8461874645059285e-05, "loss": 0.4899, "step": 7941 }, { "epoch": 0.2, "grad_norm": 4.440179347991943, "learning_rate": 1.8461432244948945e-05, "loss": 0.6951, "step": 7942 }, { "epoch": 0.2, "grad_norm": 2.7648019790649414, "learning_rate": 1.8460989786527732e-05, "loss": 0.8481, "step": 7943 }, { "epoch": 0.2, "grad_norm": 3.8628456592559814, "learning_rate": 1.846054726979869e-05, "loss": 0.8701, "step": 7944 }, { "epoch": 0.2, "grad_norm": 2.831799268722534, "learning_rate": 1.8460104694764874e-05, "loss": 0.7576, "step": 7945 }, { "epoch": 0.2, "grad_norm": 2.6499454975128174, "learning_rate": 1.8459662061429328e-05, "loss": 0.6694, "step": 7946 }, { "epoch": 0.2, "grad_norm": 1.7911032438278198, "learning_rate": 1.8459219369795113e-05, "loss": 0.5482, "step": 7947 }, { "epoch": 0.2, "grad_norm": 1.4863214492797852, "learning_rate": 1.8458776619865265e-05, "loss": 0.6715, "step": 7948 }, { "epoch": 0.2, "grad_norm": 1.932727336883545, "learning_rate": 1.8458333811642846e-05, "loss": 0.6342, "step": 7949 }, { "epoch": 0.2, "grad_norm": 2.4437248706817627, "learning_rate": 1.8457890945130907e-05, "loss": 0.5902, "step": 7950 }, { "epoch": 0.2, "grad_norm": 3.0399677753448486, "learning_rate": 1.8457448020332492e-05, "loss": 0.6009, "step": 7951 }, { "epoch": 0.2, "grad_norm": 2.571762800216675, "learning_rate": 1.8457005037250662e-05, "loss": 0.5583, "step": 7952 }, { "epoch": 0.2, "grad_norm": 2.218446731567383, "learning_rate": 1.8456561995888466e-05, "loss": 0.7331, "step": 7953 }, { "epoch": 0.2, "grad_norm": 2.44217848777771, "learning_rate": 1.8456118896248957e-05, "loss": 0.5489, "step": 7954 }, { "epoch": 0.2, "grad_norm": 2.187404155731201, "learning_rate": 1.845567573833519e-05, "loss": 0.6007, "step": 7955 }, { "epoch": 0.2, "grad_norm": 3.5303971767425537, "learning_rate": 1.8455232522150218e-05, "loss": 0.5394, "step": 7956 }, { "epoch": 0.2, "grad_norm": 2.204181671142578, "learning_rate": 1.8454789247697097e-05, "loss": 0.6332, "step": 7957 }, { "epoch": 0.2, "grad_norm": 4.490728855133057, "learning_rate": 1.8454345914978878e-05, "loss": 0.6746, "step": 7958 }, { "epoch": 0.2, "grad_norm": 2.558870553970337, "learning_rate": 1.8453902523998622e-05, "loss": 0.6179, "step": 7959 }, { "epoch": 0.2, "grad_norm": 2.308133125305176, "learning_rate": 1.8453459074759375e-05, "loss": 0.7004, "step": 7960 }, { "epoch": 0.2, "grad_norm": 1.664868712425232, "learning_rate": 1.8453015567264204e-05, "loss": 0.5477, "step": 7961 }, { "epoch": 0.2, "grad_norm": 2.262462615966797, "learning_rate": 1.845257200151616e-05, "loss": 0.6423, "step": 7962 }, { "epoch": 0.2, "grad_norm": 5.801109313964844, "learning_rate": 1.8452128377518298e-05, "loss": 0.7229, "step": 7963 }, { "epoch": 0.2, "grad_norm": 2.283085584640503, "learning_rate": 1.845168469527368e-05, "loss": 0.8121, "step": 7964 }, { "epoch": 0.2, "grad_norm": 3.5343081951141357, "learning_rate": 1.845124095478536e-05, "loss": 0.8044, "step": 7965 }, { "epoch": 0.2, "grad_norm": 1.869653344154358, "learning_rate": 1.8450797156056396e-05, "loss": 0.5968, "step": 7966 }, { "epoch": 0.2, "grad_norm": 2.3516275882720947, "learning_rate": 1.8450353299089848e-05, "loss": 0.594, "step": 7967 }, { "epoch": 0.2, "grad_norm": 2.7756779193878174, "learning_rate": 1.8449909383888775e-05, "loss": 0.7232, "step": 7968 }, { "epoch": 0.2, "grad_norm": 4.346944808959961, "learning_rate": 1.8449465410456233e-05, "loss": 0.7975, "step": 7969 }, { "epoch": 0.2, "grad_norm": 3.384105682373047, "learning_rate": 1.8449021378795285e-05, "loss": 0.7565, "step": 7970 }, { "epoch": 0.2, "grad_norm": 3.475403070449829, "learning_rate": 1.8448577288908988e-05, "loss": 0.6883, "step": 7971 }, { "epoch": 0.2, "grad_norm": 1.966576337814331, "learning_rate": 1.8448133140800407e-05, "loss": 0.6243, "step": 7972 }, { "epoch": 0.2, "grad_norm": 2.1026194095611572, "learning_rate": 1.8447688934472602e-05, "loss": 0.7198, "step": 7973 }, { "epoch": 0.2, "grad_norm": 1.3046109676361084, "learning_rate": 1.8447244669928626e-05, "loss": 0.6634, "step": 7974 }, { "epoch": 0.2, "grad_norm": 2.5786499977111816, "learning_rate": 1.844680034717155e-05, "loss": 0.6759, "step": 7975 }, { "epoch": 0.2, "grad_norm": 2.858863592147827, "learning_rate": 1.8446355966204432e-05, "loss": 0.5498, "step": 7976 }, { "epoch": 0.2, "grad_norm": 1.9936891794204712, "learning_rate": 1.8445911527030336e-05, "loss": 0.5515, "step": 7977 }, { "epoch": 0.2, "grad_norm": 2.680873394012451, "learning_rate": 1.8445467029652328e-05, "loss": 0.5609, "step": 7978 }, { "epoch": 0.2, "grad_norm": 1.4386756420135498, "learning_rate": 1.8445022474073462e-05, "loss": 0.4969, "step": 7979 }, { "epoch": 0.2, "grad_norm": 3.575418710708618, "learning_rate": 1.844457786029681e-05, "loss": 0.5283, "step": 7980 }, { "epoch": 0.2, "grad_norm": 1.7631202936172485, "learning_rate": 1.8444133188325428e-05, "loss": 0.4722, "step": 7981 }, { "epoch": 0.2, "grad_norm": 1.747603178024292, "learning_rate": 1.844368845816239e-05, "loss": 0.6152, "step": 7982 }, { "epoch": 0.2, "grad_norm": 1.541398286819458, "learning_rate": 1.8443243669810752e-05, "loss": 0.7079, "step": 7983 }, { "epoch": 0.2, "grad_norm": 3.9083614349365234, "learning_rate": 1.8442798823273585e-05, "loss": 0.6515, "step": 7984 }, { "epoch": 0.2, "grad_norm": 1.1719014644622803, "learning_rate": 1.8442353918553956e-05, "loss": 0.6066, "step": 7985 }, { "epoch": 0.2, "grad_norm": 1.519655466079712, "learning_rate": 1.8441908955654923e-05, "loss": 0.632, "step": 7986 }, { "epoch": 0.2, "grad_norm": 2.8779592514038086, "learning_rate": 1.844146393457956e-05, "loss": 0.6008, "step": 7987 }, { "epoch": 0.2, "grad_norm": 2.3826005458831787, "learning_rate": 1.844101885533093e-05, "loss": 0.6512, "step": 7988 }, { "epoch": 0.2, "grad_norm": 6.815363883972168, "learning_rate": 1.8440573717912102e-05, "loss": 0.5784, "step": 7989 }, { "epoch": 0.2, "grad_norm": 2.0781047344207764, "learning_rate": 1.8440128522326143e-05, "loss": 0.4272, "step": 7990 }, { "epoch": 0.2, "grad_norm": 3.747539758682251, "learning_rate": 1.843968326857612e-05, "loss": 0.7164, "step": 7991 }, { "epoch": 0.2, "grad_norm": 3.5717341899871826, "learning_rate": 1.8439237956665102e-05, "loss": 0.599, "step": 7992 }, { "epoch": 0.2, "grad_norm": 5.434354305267334, "learning_rate": 1.8438792586596156e-05, "loss": 0.765, "step": 7993 }, { "epoch": 0.2, "grad_norm": 2.132244110107422, "learning_rate": 1.8438347158372357e-05, "loss": 0.6157, "step": 7994 }, { "epoch": 0.2, "grad_norm": 2.286396026611328, "learning_rate": 1.8437901671996768e-05, "loss": 0.6698, "step": 7995 }, { "epoch": 0.2, "grad_norm": 2.305915355682373, "learning_rate": 1.8437456127472466e-05, "loss": 0.6428, "step": 7996 }, { "epoch": 0.2, "grad_norm": 3.604464292526245, "learning_rate": 1.8437010524802512e-05, "loss": 0.7427, "step": 7997 }, { "epoch": 0.2, "grad_norm": 3.1191868782043457, "learning_rate": 1.843656486398999e-05, "loss": 0.503, "step": 7998 }, { "epoch": 0.21, "grad_norm": 5.169461727142334, "learning_rate": 1.8436119145037953e-05, "loss": 0.7488, "step": 7999 }, { "epoch": 0.21, "grad_norm": 2.963498592376709, "learning_rate": 1.843567336794949e-05, "loss": 0.6554, "step": 8000 }, { "epoch": 0.21, "grad_norm": 4.147454261779785, "learning_rate": 1.8435227532727664e-05, "loss": 0.6893, "step": 8001 }, { "epoch": 0.21, "grad_norm": 2.8529558181762695, "learning_rate": 1.843478163937555e-05, "loss": 0.8226, "step": 8002 }, { "epoch": 0.21, "grad_norm": 3.448747158050537, "learning_rate": 1.843433568789622e-05, "loss": 0.6858, "step": 8003 }, { "epoch": 0.21, "grad_norm": 2.6137208938598633, "learning_rate": 1.8433889678292745e-05, "loss": 0.5554, "step": 8004 }, { "epoch": 0.21, "grad_norm": 3.3505964279174805, "learning_rate": 1.8433443610568202e-05, "loss": 0.7661, "step": 8005 }, { "epoch": 0.21, "grad_norm": 3.1776297092437744, "learning_rate": 1.8432997484725666e-05, "loss": 0.7852, "step": 8006 }, { "epoch": 0.21, "grad_norm": 1.8465152978897095, "learning_rate": 1.843255130076821e-05, "loss": 0.54, "step": 8007 }, { "epoch": 0.21, "grad_norm": 2.527334690093994, "learning_rate": 1.8432105058698905e-05, "loss": 0.8716, "step": 8008 }, { "epoch": 0.21, "grad_norm": 1.976277232170105, "learning_rate": 1.843165875852083e-05, "loss": 0.494, "step": 8009 }, { "epoch": 0.21, "grad_norm": 2.0738701820373535, "learning_rate": 1.8431212400237064e-05, "loss": 0.5264, "step": 8010 }, { "epoch": 0.21, "grad_norm": 3.0922467708587646, "learning_rate": 1.8430765983850677e-05, "loss": 0.6345, "step": 8011 }, { "epoch": 0.21, "grad_norm": 1.4281498193740845, "learning_rate": 1.8430319509364744e-05, "loss": 0.4342, "step": 8012 }, { "epoch": 0.21, "grad_norm": 2.1661179065704346, "learning_rate": 1.842987297678235e-05, "loss": 0.5053, "step": 8013 }, { "epoch": 0.21, "grad_norm": 1.9519811868667603, "learning_rate": 1.8429426386106567e-05, "loss": 0.6544, "step": 8014 }, { "epoch": 0.21, "grad_norm": 2.3822407722473145, "learning_rate": 1.842897973734047e-05, "loss": 0.7503, "step": 8015 }, { "epoch": 0.21, "grad_norm": 3.8120615482330322, "learning_rate": 1.8428533030487147e-05, "loss": 0.7008, "step": 8016 }, { "epoch": 0.21, "grad_norm": 1.3966180086135864, "learning_rate": 1.8428086265549663e-05, "loss": 0.6297, "step": 8017 }, { "epoch": 0.21, "grad_norm": 2.3221118450164795, "learning_rate": 1.8427639442531106e-05, "loss": 0.7753, "step": 8018 }, { "epoch": 0.21, "grad_norm": 1.4479351043701172, "learning_rate": 1.8427192561434552e-05, "loss": 0.7525, "step": 8019 }, { "epoch": 0.21, "grad_norm": 2.877714157104492, "learning_rate": 1.842674562226308e-05, "loss": 0.7345, "step": 8020 }, { "epoch": 0.21, "grad_norm": 2.4789090156555176, "learning_rate": 1.8426298625019775e-05, "loss": 0.6285, "step": 8021 }, { "epoch": 0.21, "grad_norm": 3.505060911178589, "learning_rate": 1.842585156970771e-05, "loss": 0.7301, "step": 8022 }, { "epoch": 0.21, "grad_norm": 1.5120683908462524, "learning_rate": 1.8425404456329973e-05, "loss": 0.6387, "step": 8023 }, { "epoch": 0.21, "grad_norm": 2.0859222412109375, "learning_rate": 1.8424957284889638e-05, "loss": 0.7102, "step": 8024 }, { "epoch": 0.21, "grad_norm": 1.7461285591125488, "learning_rate": 1.84245100553898e-05, "loss": 0.6105, "step": 8025 }, { "epoch": 0.21, "grad_norm": 1.7917507886886597, "learning_rate": 1.8424062767833518e-05, "loss": 0.7385, "step": 8026 }, { "epoch": 0.21, "grad_norm": 1.8241084814071655, "learning_rate": 1.8423615422223897e-05, "loss": 0.6407, "step": 8027 }, { "epoch": 0.21, "grad_norm": 2.0105271339416504, "learning_rate": 1.8423168018564007e-05, "loss": 0.6347, "step": 8028 }, { "epoch": 0.21, "grad_norm": 1.7412300109863281, "learning_rate": 1.8422720556856936e-05, "loss": 0.7518, "step": 8029 }, { "epoch": 0.21, "grad_norm": 1.467585563659668, "learning_rate": 1.8422273037105767e-05, "loss": 0.6925, "step": 8030 }, { "epoch": 0.21, "grad_norm": 1.6524522304534912, "learning_rate": 1.8421825459313582e-05, "loss": 0.7095, "step": 8031 }, { "epoch": 0.21, "grad_norm": 2.3074913024902344, "learning_rate": 1.842137782348347e-05, "loss": 0.506, "step": 8032 }, { "epoch": 0.21, "grad_norm": 1.9272772073745728, "learning_rate": 1.842093012961851e-05, "loss": 0.6493, "step": 8033 }, { "epoch": 0.21, "grad_norm": 1.588416576385498, "learning_rate": 1.842048237772179e-05, "loss": 0.5692, "step": 8034 }, { "epoch": 0.21, "grad_norm": 2.344454050064087, "learning_rate": 1.84200345677964e-05, "loss": 0.779, "step": 8035 }, { "epoch": 0.21, "grad_norm": 1.8635245561599731, "learning_rate": 1.841958669984542e-05, "loss": 0.5333, "step": 8036 }, { "epoch": 0.21, "grad_norm": 1.265173316001892, "learning_rate": 1.8419138773871935e-05, "loss": 0.7049, "step": 8037 }, { "epoch": 0.21, "grad_norm": 2.2975940704345703, "learning_rate": 1.8418690789879036e-05, "loss": 0.6551, "step": 8038 }, { "epoch": 0.21, "grad_norm": 3.8202438354492188, "learning_rate": 1.841824274786981e-05, "loss": 0.711, "step": 8039 }, { "epoch": 0.21, "grad_norm": 2.396714687347412, "learning_rate": 1.8417794647847344e-05, "loss": 0.5917, "step": 8040 }, { "epoch": 0.21, "grad_norm": 2.234633684158325, "learning_rate": 1.8417346489814728e-05, "loss": 0.6914, "step": 8041 }, { "epoch": 0.21, "grad_norm": 5.925871849060059, "learning_rate": 1.8416898273775046e-05, "loss": 0.5531, "step": 8042 }, { "epoch": 0.21, "grad_norm": 2.241934299468994, "learning_rate": 1.8416449999731388e-05, "loss": 0.6036, "step": 8043 }, { "epoch": 0.21, "grad_norm": 2.4025487899780273, "learning_rate": 1.8416001667686844e-05, "loss": 0.6416, "step": 8044 }, { "epoch": 0.21, "grad_norm": 5.133525848388672, "learning_rate": 1.8415553277644507e-05, "loss": 0.7633, "step": 8045 }, { "epoch": 0.21, "grad_norm": 3.54101824760437, "learning_rate": 1.841510482960746e-05, "loss": 0.8129, "step": 8046 }, { "epoch": 0.21, "grad_norm": 1.5540934801101685, "learning_rate": 1.8414656323578805e-05, "loss": 0.6146, "step": 8047 }, { "epoch": 0.21, "grad_norm": 1.9033911228179932, "learning_rate": 1.8414207759561616e-05, "loss": 0.7282, "step": 8048 }, { "epoch": 0.21, "grad_norm": 1.5401959419250488, "learning_rate": 1.8413759137559e-05, "loss": 0.4923, "step": 8049 }, { "epoch": 0.21, "grad_norm": 4.166677951812744, "learning_rate": 1.8413310457574038e-05, "loss": 0.7674, "step": 8050 }, { "epoch": 0.21, "grad_norm": 2.3500452041625977, "learning_rate": 1.8412861719609828e-05, "loss": 0.5591, "step": 8051 }, { "epoch": 0.21, "grad_norm": 5.482210159301758, "learning_rate": 1.841241292366946e-05, "loss": 0.6306, "step": 8052 }, { "epoch": 0.21, "grad_norm": 3.249082326889038, "learning_rate": 1.8411964069756027e-05, "loss": 0.5542, "step": 8053 }, { "epoch": 0.21, "grad_norm": 3.848322629928589, "learning_rate": 1.8411515157872622e-05, "loss": 0.5819, "step": 8054 }, { "epoch": 0.21, "grad_norm": 1.98776113986969, "learning_rate": 1.8411066188022345e-05, "loss": 0.5586, "step": 8055 }, { "epoch": 0.21, "grad_norm": 3.1996238231658936, "learning_rate": 1.841061716020828e-05, "loss": 0.5606, "step": 8056 }, { "epoch": 0.21, "grad_norm": 1.1785354614257812, "learning_rate": 1.841016807443352e-05, "loss": 0.6684, "step": 8057 }, { "epoch": 0.21, "grad_norm": 1.1273791790008545, "learning_rate": 1.840971893070117e-05, "loss": 0.5503, "step": 8058 }, { "epoch": 0.21, "grad_norm": 2.0669078826904297, "learning_rate": 1.8409269729014324e-05, "loss": 0.5091, "step": 8059 }, { "epoch": 0.21, "grad_norm": 1.4911595582962036, "learning_rate": 1.840882046937607e-05, "loss": 0.5413, "step": 8060 }, { "epoch": 0.21, "grad_norm": 1.8956809043884277, "learning_rate": 1.840837115178951e-05, "loss": 0.5944, "step": 8061 }, { "epoch": 0.21, "grad_norm": 5.403730869293213, "learning_rate": 1.840792177625774e-05, "loss": 0.7926, "step": 8062 }, { "epoch": 0.21, "grad_norm": 1.6610333919525146, "learning_rate": 1.8407472342783852e-05, "loss": 0.5836, "step": 8063 }, { "epoch": 0.21, "grad_norm": 1.7910702228546143, "learning_rate": 1.8407022851370947e-05, "loss": 0.5023, "step": 8064 }, { "epoch": 0.21, "grad_norm": 1.7542380094528198, "learning_rate": 1.8406573302022127e-05, "loss": 0.6157, "step": 8065 }, { "epoch": 0.21, "grad_norm": 2.153505325317383, "learning_rate": 1.8406123694740482e-05, "loss": 0.6197, "step": 8066 }, { "epoch": 0.21, "grad_norm": 1.8961281776428223, "learning_rate": 1.840567402952911e-05, "loss": 0.6751, "step": 8067 }, { "epoch": 0.21, "grad_norm": 1.5738526582717896, "learning_rate": 1.840522430639112e-05, "loss": 0.6489, "step": 8068 }, { "epoch": 0.21, "grad_norm": 2.2032525539398193, "learning_rate": 1.8404774525329603e-05, "loss": 0.8024, "step": 8069 }, { "epoch": 0.21, "grad_norm": 2.8360421657562256, "learning_rate": 1.8404324686347658e-05, "loss": 0.5918, "step": 8070 }, { "epoch": 0.21, "grad_norm": 2.324103593826294, "learning_rate": 1.840387478944839e-05, "loss": 0.6066, "step": 8071 }, { "epoch": 0.21, "grad_norm": 1.5854476690292358, "learning_rate": 1.8403424834634895e-05, "loss": 0.6138, "step": 8072 }, { "epoch": 0.21, "grad_norm": 2.638038158416748, "learning_rate": 1.8402974821910277e-05, "loss": 0.7159, "step": 8073 }, { "epoch": 0.21, "grad_norm": 5.124494552612305, "learning_rate": 1.8402524751277635e-05, "loss": 0.5403, "step": 8074 }, { "epoch": 0.21, "grad_norm": 1.5494577884674072, "learning_rate": 1.8402074622740073e-05, "loss": 0.5808, "step": 8075 }, { "epoch": 0.21, "grad_norm": 3.795124053955078, "learning_rate": 1.8401624436300687e-05, "loss": 0.7262, "step": 8076 }, { "epoch": 0.21, "grad_norm": 1.9503625631332397, "learning_rate": 1.8401174191962587e-05, "loss": 0.7118, "step": 8077 }, { "epoch": 0.21, "grad_norm": 1.337626576423645, "learning_rate": 1.8400723889728874e-05, "loss": 0.5117, "step": 8078 }, { "epoch": 0.21, "grad_norm": 3.437300682067871, "learning_rate": 1.8400273529602648e-05, "loss": 0.6657, "step": 8079 }, { "epoch": 0.21, "grad_norm": 2.7656476497650146, "learning_rate": 1.8399823111587012e-05, "loss": 0.5929, "step": 8080 }, { "epoch": 0.21, "grad_norm": 1.7483696937561035, "learning_rate": 1.8399372635685076e-05, "loss": 0.6234, "step": 8081 }, { "epoch": 0.21, "grad_norm": 1.0841392278671265, "learning_rate": 1.839892210189994e-05, "loss": 0.5606, "step": 8082 }, { "epoch": 0.21, "grad_norm": 1.3062069416046143, "learning_rate": 1.839847151023471e-05, "loss": 0.6082, "step": 8083 }, { "epoch": 0.21, "grad_norm": 2.418919086456299, "learning_rate": 1.839802086069249e-05, "loss": 0.7711, "step": 8084 }, { "epoch": 0.21, "grad_norm": 4.030369758605957, "learning_rate": 1.8397570153276385e-05, "loss": 0.6112, "step": 8085 }, { "epoch": 0.21, "grad_norm": 3.113548755645752, "learning_rate": 1.8397119387989505e-05, "loss": 0.5692, "step": 8086 }, { "epoch": 0.21, "grad_norm": 1.688241720199585, "learning_rate": 1.839666856483495e-05, "loss": 0.6722, "step": 8087 }, { "epoch": 0.21, "grad_norm": 2.188044786453247, "learning_rate": 1.8396217683815836e-05, "loss": 0.7514, "step": 8088 }, { "epoch": 0.21, "grad_norm": 1.6411925554275513, "learning_rate": 1.8395766744935263e-05, "loss": 0.4985, "step": 8089 }, { "epoch": 0.21, "grad_norm": 2.4182910919189453, "learning_rate": 1.839531574819634e-05, "loss": 0.7366, "step": 8090 }, { "epoch": 0.21, "grad_norm": 2.9422082901000977, "learning_rate": 1.839486469360217e-05, "loss": 0.673, "step": 8091 }, { "epoch": 0.21, "grad_norm": 3.4618098735809326, "learning_rate": 1.8394413581155875e-05, "loss": 0.4818, "step": 8092 }, { "epoch": 0.21, "grad_norm": 3.0024759769439697, "learning_rate": 1.839396241086055e-05, "loss": 0.5893, "step": 8093 }, { "epoch": 0.21, "grad_norm": 2.302924394607544, "learning_rate": 1.839351118271931e-05, "loss": 0.6049, "step": 8094 }, { "epoch": 0.21, "grad_norm": 2.310570001602173, "learning_rate": 1.8393059896735268e-05, "loss": 0.6541, "step": 8095 }, { "epoch": 0.21, "grad_norm": 1.6436960697174072, "learning_rate": 1.8392608552911526e-05, "loss": 0.7967, "step": 8096 }, { "epoch": 0.21, "grad_norm": 2.088999032974243, "learning_rate": 1.83921571512512e-05, "loss": 0.6589, "step": 8097 }, { "epoch": 0.21, "grad_norm": 4.976990222930908, "learning_rate": 1.83917056917574e-05, "loss": 0.8887, "step": 8098 }, { "epoch": 0.21, "grad_norm": 1.5645146369934082, "learning_rate": 1.8391254174433237e-05, "loss": 0.6773, "step": 8099 }, { "epoch": 0.21, "grad_norm": 2.899562120437622, "learning_rate": 1.8390802599281823e-05, "loss": 0.5067, "step": 8100 }, { "epoch": 0.21, "grad_norm": 2.2557425498962402, "learning_rate": 1.8390350966306268e-05, "loss": 0.7125, "step": 8101 }, { "epoch": 0.21, "grad_norm": 3.003685712814331, "learning_rate": 1.8389899275509683e-05, "loss": 0.9218, "step": 8102 }, { "epoch": 0.21, "grad_norm": 2.8511908054351807, "learning_rate": 1.8389447526895187e-05, "loss": 0.7841, "step": 8103 }, { "epoch": 0.21, "grad_norm": 1.6831200122833252, "learning_rate": 1.8388995720465885e-05, "loss": 0.7092, "step": 8104 }, { "epoch": 0.21, "grad_norm": 2.513835906982422, "learning_rate": 1.83885438562249e-05, "loss": 0.6636, "step": 8105 }, { "epoch": 0.21, "grad_norm": 3.024359941482544, "learning_rate": 1.8388091934175337e-05, "loss": 0.636, "step": 8106 }, { "epoch": 0.21, "grad_norm": 2.247724771499634, "learning_rate": 1.838763995432032e-05, "loss": 0.6846, "step": 8107 }, { "epoch": 0.21, "grad_norm": 1.7158781290054321, "learning_rate": 1.838718791666295e-05, "loss": 0.5147, "step": 8108 }, { "epoch": 0.21, "grad_norm": 2.7705421447753906, "learning_rate": 1.8386735821206354e-05, "loss": 0.6857, "step": 8109 }, { "epoch": 0.21, "grad_norm": 4.694155693054199, "learning_rate": 1.8386283667953645e-05, "loss": 0.7153, "step": 8110 }, { "epoch": 0.21, "grad_norm": 2.25935697555542, "learning_rate": 1.8385831456907936e-05, "loss": 0.6638, "step": 8111 }, { "epoch": 0.21, "grad_norm": 4.722159385681152, "learning_rate": 1.8385379188072347e-05, "loss": 0.6987, "step": 8112 }, { "epoch": 0.21, "grad_norm": 1.785338282585144, "learning_rate": 1.8384926861449992e-05, "loss": 0.7555, "step": 8113 }, { "epoch": 0.21, "grad_norm": 3.232877731323242, "learning_rate": 1.838447447704399e-05, "loss": 0.4988, "step": 8114 }, { "epoch": 0.21, "grad_norm": 7.971562385559082, "learning_rate": 1.8384022034857455e-05, "loss": 0.7976, "step": 8115 }, { "epoch": 0.21, "grad_norm": 3.0315303802490234, "learning_rate": 1.838356953489351e-05, "loss": 0.6611, "step": 8116 }, { "epoch": 0.21, "grad_norm": 2.19521427154541, "learning_rate": 1.8383116977155267e-05, "loss": 0.551, "step": 8117 }, { "epoch": 0.21, "grad_norm": 3.4974441528320312, "learning_rate": 1.8382664361645854e-05, "loss": 0.7739, "step": 8118 }, { "epoch": 0.21, "grad_norm": 2.3895370960235596, "learning_rate": 1.838221168836838e-05, "loss": 0.5463, "step": 8119 }, { "epoch": 0.21, "grad_norm": 4.895572185516357, "learning_rate": 1.8381758957325974e-05, "loss": 0.6997, "step": 8120 }, { "epoch": 0.21, "grad_norm": 4.016380310058594, "learning_rate": 1.8381306168521748e-05, "loss": 0.7635, "step": 8121 }, { "epoch": 0.21, "grad_norm": 3.050143241882324, "learning_rate": 1.8380853321958826e-05, "loss": 0.6926, "step": 8122 }, { "epoch": 0.21, "grad_norm": 2.01338791847229, "learning_rate": 1.8380400417640326e-05, "loss": 0.6268, "step": 8123 }, { "epoch": 0.21, "grad_norm": 2.5343501567840576, "learning_rate": 1.8379947455569375e-05, "loss": 0.8399, "step": 8124 }, { "epoch": 0.21, "grad_norm": 2.8885788917541504, "learning_rate": 1.837949443574909e-05, "loss": 0.7205, "step": 8125 }, { "epoch": 0.21, "grad_norm": 1.675404667854309, "learning_rate": 1.8379041358182593e-05, "loss": 0.6167, "step": 8126 }, { "epoch": 0.21, "grad_norm": 1.8621714115142822, "learning_rate": 1.8378588222873008e-05, "loss": 0.6281, "step": 8127 }, { "epoch": 0.21, "grad_norm": 1.8341268301010132, "learning_rate": 1.8378135029823456e-05, "loss": 0.6236, "step": 8128 }, { "epoch": 0.21, "grad_norm": 3.588951587677002, "learning_rate": 1.8377681779037062e-05, "loss": 0.587, "step": 8129 }, { "epoch": 0.21, "grad_norm": 3.464176654815674, "learning_rate": 1.8377228470516947e-05, "loss": 0.7154, "step": 8130 }, { "epoch": 0.21, "grad_norm": 3.1113147735595703, "learning_rate": 1.8376775104266237e-05, "loss": 0.7085, "step": 8131 }, { "epoch": 0.21, "grad_norm": 1.557812213897705, "learning_rate": 1.8376321680288057e-05, "loss": 0.7058, "step": 8132 }, { "epoch": 0.21, "grad_norm": 2.672067165374756, "learning_rate": 1.837586819858553e-05, "loss": 0.612, "step": 8133 }, { "epoch": 0.21, "grad_norm": 1.5380923748016357, "learning_rate": 1.837541465916178e-05, "loss": 0.5645, "step": 8134 }, { "epoch": 0.21, "grad_norm": 6.350584506988525, "learning_rate": 1.8374961062019938e-05, "loss": 0.6839, "step": 8135 }, { "epoch": 0.21, "grad_norm": 1.6676403284072876, "learning_rate": 1.837450740716312e-05, "loss": 0.6236, "step": 8136 }, { "epoch": 0.21, "grad_norm": 2.383315324783325, "learning_rate": 1.8374053694594466e-05, "loss": 0.6869, "step": 8137 }, { "epoch": 0.21, "grad_norm": 2.1140694618225098, "learning_rate": 1.837359992431709e-05, "loss": 0.5985, "step": 8138 }, { "epoch": 0.21, "grad_norm": 2.0709245204925537, "learning_rate": 1.8373146096334126e-05, "loss": 0.5951, "step": 8139 }, { "epoch": 0.21, "grad_norm": 2.032954454421997, "learning_rate": 1.8372692210648697e-05, "loss": 0.6631, "step": 8140 }, { "epoch": 0.21, "grad_norm": 4.021633625030518, "learning_rate": 1.8372238267263934e-05, "loss": 0.566, "step": 8141 }, { "epoch": 0.21, "grad_norm": 2.0050675868988037, "learning_rate": 1.837178426618297e-05, "loss": 0.5934, "step": 8142 }, { "epoch": 0.21, "grad_norm": 1.9912238121032715, "learning_rate": 1.8371330207408923e-05, "loss": 0.5576, "step": 8143 }, { "epoch": 0.21, "grad_norm": 1.6453206539154053, "learning_rate": 1.837087609094493e-05, "loss": 0.7019, "step": 8144 }, { "epoch": 0.21, "grad_norm": 2.103929042816162, "learning_rate": 1.8370421916794117e-05, "loss": 0.4992, "step": 8145 }, { "epoch": 0.21, "grad_norm": 1.701769232749939, "learning_rate": 1.836996768495961e-05, "loss": 0.6656, "step": 8146 }, { "epoch": 0.21, "grad_norm": 1.8057835102081299, "learning_rate": 1.8369513395444555e-05, "loss": 0.6142, "step": 8147 }, { "epoch": 0.21, "grad_norm": 2.216707468032837, "learning_rate": 1.8369059048252063e-05, "loss": 0.5757, "step": 8148 }, { "epoch": 0.21, "grad_norm": 2.633493661880493, "learning_rate": 1.8368604643385278e-05, "loss": 0.6664, "step": 8149 }, { "epoch": 0.21, "grad_norm": 3.154982566833496, "learning_rate": 1.836815018084733e-05, "loss": 0.7445, "step": 8150 }, { "epoch": 0.21, "grad_norm": 2.9197115898132324, "learning_rate": 1.836769566064134e-05, "loss": 0.7078, "step": 8151 }, { "epoch": 0.21, "grad_norm": 1.5954326391220093, "learning_rate": 1.836724108277046e-05, "loss": 0.4825, "step": 8152 }, { "epoch": 0.21, "grad_norm": 1.874630331993103, "learning_rate": 1.8366786447237804e-05, "loss": 0.6469, "step": 8153 }, { "epoch": 0.21, "grad_norm": 3.032667398452759, "learning_rate": 1.8366331754046513e-05, "loss": 0.581, "step": 8154 }, { "epoch": 0.21, "grad_norm": 4.208440780639648, "learning_rate": 1.8365877003199718e-05, "loss": 0.6078, "step": 8155 }, { "epoch": 0.21, "grad_norm": 2.4645018577575684, "learning_rate": 1.8365422194700557e-05, "loss": 0.674, "step": 8156 }, { "epoch": 0.21, "grad_norm": 4.600740909576416, "learning_rate": 1.8364967328552165e-05, "loss": 0.6101, "step": 8157 }, { "epoch": 0.21, "grad_norm": 3.2821059226989746, "learning_rate": 1.8364512404757668e-05, "loss": 0.6552, "step": 8158 }, { "epoch": 0.21, "grad_norm": 2.9480841159820557, "learning_rate": 1.836405742332021e-05, "loss": 0.5975, "step": 8159 }, { "epoch": 0.21, "grad_norm": 3.3979406356811523, "learning_rate": 1.836360238424292e-05, "loss": 0.5381, "step": 8160 }, { "epoch": 0.21, "grad_norm": 2.3131942749023438, "learning_rate": 1.836314728752894e-05, "loss": 0.6536, "step": 8161 }, { "epoch": 0.21, "grad_norm": 2.095970869064331, "learning_rate": 1.8362692133181404e-05, "loss": 0.5257, "step": 8162 }, { "epoch": 0.21, "grad_norm": 1.9018299579620361, "learning_rate": 1.8362236921203445e-05, "loss": 0.497, "step": 8163 }, { "epoch": 0.21, "grad_norm": 1.3548606634140015, "learning_rate": 1.8361781651598203e-05, "loss": 0.5529, "step": 8164 }, { "epoch": 0.21, "grad_norm": 2.28381085395813, "learning_rate": 1.8361326324368815e-05, "loss": 0.5926, "step": 8165 }, { "epoch": 0.21, "grad_norm": 5.227548599243164, "learning_rate": 1.8360870939518422e-05, "loss": 0.5622, "step": 8166 }, { "epoch": 0.21, "grad_norm": 1.5750404596328735, "learning_rate": 1.8360415497050157e-05, "loss": 0.6099, "step": 8167 }, { "epoch": 0.21, "grad_norm": 1.6717053651809692, "learning_rate": 1.835995999696716e-05, "loss": 0.6108, "step": 8168 }, { "epoch": 0.21, "grad_norm": 4.2691144943237305, "learning_rate": 1.8359504439272574e-05, "loss": 0.6943, "step": 8169 }, { "epoch": 0.21, "grad_norm": 1.4630038738250732, "learning_rate": 1.835904882396953e-05, "loss": 0.6815, "step": 8170 }, { "epoch": 0.21, "grad_norm": 1.9515687227249146, "learning_rate": 1.835859315106118e-05, "loss": 0.7003, "step": 8171 }, { "epoch": 0.21, "grad_norm": 1.885182499885559, "learning_rate": 1.8358137420550653e-05, "loss": 0.8611, "step": 8172 }, { "epoch": 0.21, "grad_norm": 4.621291637420654, "learning_rate": 1.8357681632441094e-05, "loss": 0.7525, "step": 8173 }, { "epoch": 0.21, "grad_norm": 1.9527807235717773, "learning_rate": 1.8357225786735643e-05, "loss": 0.5626, "step": 8174 }, { "epoch": 0.21, "grad_norm": 7.353684902191162, "learning_rate": 1.8356769883437446e-05, "loss": 0.7906, "step": 8175 }, { "epoch": 0.21, "grad_norm": 1.6003303527832031, "learning_rate": 1.835631392254964e-05, "loss": 0.5845, "step": 8176 }, { "epoch": 0.21, "grad_norm": 1.799054503440857, "learning_rate": 1.835585790407537e-05, "loss": 0.6723, "step": 8177 }, { "epoch": 0.21, "grad_norm": 11.071527481079102, "learning_rate": 1.8355401828017772e-05, "loss": 0.881, "step": 8178 }, { "epoch": 0.21, "grad_norm": 1.6279006004333496, "learning_rate": 1.8354945694379998e-05, "loss": 0.6091, "step": 8179 }, { "epoch": 0.21, "grad_norm": 1.40259850025177, "learning_rate": 1.8354489503165186e-05, "loss": 0.5954, "step": 8180 }, { "epoch": 0.21, "grad_norm": 1.7169362306594849, "learning_rate": 1.8354033254376484e-05, "loss": 0.6296, "step": 8181 }, { "epoch": 0.21, "grad_norm": 4.546257019042969, "learning_rate": 1.835357694801703e-05, "loss": 0.8687, "step": 8182 }, { "epoch": 0.21, "grad_norm": 2.8857359886169434, "learning_rate": 1.8353120584089973e-05, "loss": 0.8513, "step": 8183 }, { "epoch": 0.21, "grad_norm": 2.32002854347229, "learning_rate": 1.8352664162598457e-05, "loss": 0.6642, "step": 8184 }, { "epoch": 0.21, "grad_norm": 2.4947211742401123, "learning_rate": 1.8352207683545623e-05, "loss": 0.6701, "step": 8185 }, { "epoch": 0.21, "grad_norm": 1.6427313089370728, "learning_rate": 1.8351751146934628e-05, "loss": 0.7341, "step": 8186 }, { "epoch": 0.21, "grad_norm": 2.4226882457733154, "learning_rate": 1.8351294552768608e-05, "loss": 0.8331, "step": 8187 }, { "epoch": 0.21, "grad_norm": 8.226033210754395, "learning_rate": 1.8350837901050713e-05, "loss": 0.5406, "step": 8188 }, { "epoch": 0.21, "grad_norm": 1.7822198867797852, "learning_rate": 1.835038119178409e-05, "loss": 0.6562, "step": 8189 }, { "epoch": 0.21, "grad_norm": 6.148187160491943, "learning_rate": 1.8349924424971884e-05, "loss": 0.9336, "step": 8190 }, { "epoch": 0.21, "grad_norm": 4.503020763397217, "learning_rate": 1.8349467600617247e-05, "loss": 0.8306, "step": 8191 }, { "epoch": 0.21, "grad_norm": 3.1241250038146973, "learning_rate": 1.8349010718723326e-05, "loss": 0.695, "step": 8192 }, { "epoch": 0.21, "grad_norm": 2.8261027336120605, "learning_rate": 1.8348553779293267e-05, "loss": 0.6062, "step": 8193 }, { "epoch": 0.21, "grad_norm": 1.5859252214431763, "learning_rate": 1.834809678233022e-05, "loss": 0.569, "step": 8194 }, { "epoch": 0.21, "grad_norm": 2.5425708293914795, "learning_rate": 1.8347639727837333e-05, "loss": 0.602, "step": 8195 }, { "epoch": 0.21, "grad_norm": 1.9644662141799927, "learning_rate": 1.834718261581776e-05, "loss": 0.6259, "step": 8196 }, { "epoch": 0.21, "grad_norm": 2.5766732692718506, "learning_rate": 1.834672544627465e-05, "loss": 0.799, "step": 8197 }, { "epoch": 0.21, "grad_norm": 2.8705286979675293, "learning_rate": 1.834626821921115e-05, "loss": 0.7215, "step": 8198 }, { "epoch": 0.21, "grad_norm": 1.401002049446106, "learning_rate": 1.8345810934630412e-05, "loss": 0.6908, "step": 8199 }, { "epoch": 0.21, "grad_norm": 2.6062798500061035, "learning_rate": 1.834535359253559e-05, "loss": 0.5551, "step": 8200 }, { "epoch": 0.21, "grad_norm": 4.916840076446533, "learning_rate": 1.8344896192929833e-05, "loss": 0.6874, "step": 8201 }, { "epoch": 0.21, "grad_norm": 3.7235872745513916, "learning_rate": 1.8344438735816295e-05, "loss": 0.8171, "step": 8202 }, { "epoch": 0.21, "grad_norm": 2.527486562728882, "learning_rate": 1.8343981221198128e-05, "loss": 0.6154, "step": 8203 }, { "epoch": 0.21, "grad_norm": 3.365513324737549, "learning_rate": 1.8343523649078486e-05, "loss": 0.7209, "step": 8204 }, { "epoch": 0.21, "grad_norm": 2.595902919769287, "learning_rate": 1.8343066019460515e-05, "loss": 0.7054, "step": 8205 }, { "epoch": 0.21, "grad_norm": 2.0800814628601074, "learning_rate": 1.834260833234738e-05, "loss": 0.5985, "step": 8206 }, { "epoch": 0.21, "grad_norm": 6.839323043823242, "learning_rate": 1.834215058774223e-05, "loss": 0.6925, "step": 8207 }, { "epoch": 0.21, "grad_norm": 6.089657783508301, "learning_rate": 1.8341692785648218e-05, "loss": 0.7293, "step": 8208 }, { "epoch": 0.21, "grad_norm": 2.1209166049957275, "learning_rate": 1.83412349260685e-05, "loss": 0.6299, "step": 8209 }, { "epoch": 0.21, "grad_norm": 2.2436444759368896, "learning_rate": 1.8340777009006233e-05, "loss": 0.5025, "step": 8210 }, { "epoch": 0.21, "grad_norm": 1.593696117401123, "learning_rate": 1.8340319034464568e-05, "loss": 0.6086, "step": 8211 }, { "epoch": 0.21, "grad_norm": 2.606574296951294, "learning_rate": 1.8339861002446665e-05, "loss": 0.6438, "step": 8212 }, { "epoch": 0.21, "grad_norm": 3.486875057220459, "learning_rate": 1.833940291295568e-05, "loss": 0.6185, "step": 8213 }, { "epoch": 0.21, "grad_norm": 3.0721797943115234, "learning_rate": 1.833894476599477e-05, "loss": 0.7461, "step": 8214 }, { "epoch": 0.21, "grad_norm": 2.535513401031494, "learning_rate": 1.833848656156709e-05, "loss": 0.6135, "step": 8215 }, { "epoch": 0.21, "grad_norm": 0.9457362294197083, "learning_rate": 1.83380282996758e-05, "loss": 0.5506, "step": 8216 }, { "epoch": 0.21, "grad_norm": 1.265581488609314, "learning_rate": 1.8337569980324058e-05, "loss": 0.5319, "step": 8217 }, { "epoch": 0.21, "grad_norm": 1.3799585103988647, "learning_rate": 1.8337111603515025e-05, "loss": 0.6365, "step": 8218 }, { "epoch": 0.21, "grad_norm": 4.118235111236572, "learning_rate": 1.833665316925185e-05, "loss": 0.701, "step": 8219 }, { "epoch": 0.21, "grad_norm": 4.031488418579102, "learning_rate": 1.8336194677537703e-05, "loss": 0.7042, "step": 8220 }, { "epoch": 0.21, "grad_norm": 2.2478833198547363, "learning_rate": 1.833573612837574e-05, "loss": 0.7243, "step": 8221 }, { "epoch": 0.21, "grad_norm": 1.2420270442962646, "learning_rate": 1.8335277521769122e-05, "loss": 0.5292, "step": 8222 }, { "epoch": 0.21, "grad_norm": 1.761700987815857, "learning_rate": 1.8334818857721e-05, "loss": 0.567, "step": 8223 }, { "epoch": 0.21, "grad_norm": 2.2279610633850098, "learning_rate": 1.8334360136234552e-05, "loss": 0.5403, "step": 8224 }, { "epoch": 0.21, "grad_norm": 1.3572667837142944, "learning_rate": 1.8333901357312928e-05, "loss": 0.6375, "step": 8225 }, { "epoch": 0.21, "grad_norm": 1.700724720954895, "learning_rate": 1.833344252095929e-05, "loss": 0.5462, "step": 8226 }, { "epoch": 0.21, "grad_norm": 1.7499547004699707, "learning_rate": 1.83329836271768e-05, "loss": 0.5774, "step": 8227 }, { "epoch": 0.21, "grad_norm": 1.800826072692871, "learning_rate": 1.8332524675968626e-05, "loss": 0.7086, "step": 8228 }, { "epoch": 0.21, "grad_norm": 2.044349193572998, "learning_rate": 1.8332065667337927e-05, "loss": 0.6397, "step": 8229 }, { "epoch": 0.21, "grad_norm": 3.7251217365264893, "learning_rate": 1.8331606601287865e-05, "loss": 0.6936, "step": 8230 }, { "epoch": 0.21, "grad_norm": 1.6238542795181274, "learning_rate": 1.8331147477821602e-05, "loss": 0.5455, "step": 8231 }, { "epoch": 0.21, "grad_norm": 3.7999870777130127, "learning_rate": 1.833068829694231e-05, "loss": 0.6317, "step": 8232 }, { "epoch": 0.21, "grad_norm": 14.484721183776855, "learning_rate": 1.833022905865314e-05, "loss": 0.7504, "step": 8233 }, { "epoch": 0.21, "grad_norm": 2.623053550720215, "learning_rate": 1.8329769762957274e-05, "loss": 0.676, "step": 8234 }, { "epoch": 0.21, "grad_norm": 2.1268670558929443, "learning_rate": 1.8329310409857862e-05, "loss": 0.6449, "step": 8235 }, { "epoch": 0.21, "grad_norm": 1.655460000038147, "learning_rate": 1.832885099935808e-05, "loss": 0.6113, "step": 8236 }, { "epoch": 0.21, "grad_norm": 1.5751975774765015, "learning_rate": 1.8328391531461086e-05, "loss": 0.7041, "step": 8237 }, { "epoch": 0.21, "grad_norm": 2.410632610321045, "learning_rate": 1.832793200617005e-05, "loss": 0.7312, "step": 8238 }, { "epoch": 0.21, "grad_norm": 2.4043996334075928, "learning_rate": 1.832747242348814e-05, "loss": 0.6503, "step": 8239 }, { "epoch": 0.21, "grad_norm": 3.066246747970581, "learning_rate": 1.8327012783418522e-05, "loss": 0.6076, "step": 8240 }, { "epoch": 0.21, "grad_norm": 2.538736343383789, "learning_rate": 1.8326553085964364e-05, "loss": 0.5798, "step": 8241 }, { "epoch": 0.21, "grad_norm": 4.313669681549072, "learning_rate": 1.832609333112883e-05, "loss": 0.6458, "step": 8242 }, { "epoch": 0.21, "grad_norm": 4.216272830963135, "learning_rate": 1.8325633518915097e-05, "loss": 0.6768, "step": 8243 }, { "epoch": 0.21, "grad_norm": 6.940773010253906, "learning_rate": 1.8325173649326324e-05, "loss": 0.528, "step": 8244 }, { "epoch": 0.21, "grad_norm": 2.7980639934539795, "learning_rate": 1.832471372236569e-05, "loss": 0.6038, "step": 8245 }, { "epoch": 0.21, "grad_norm": 3.8514716625213623, "learning_rate": 1.8324253738036353e-05, "loss": 0.6487, "step": 8246 }, { "epoch": 0.21, "grad_norm": 1.448279619216919, "learning_rate": 1.8323793696341493e-05, "loss": 0.5451, "step": 8247 }, { "epoch": 0.21, "grad_norm": 2.0549261569976807, "learning_rate": 1.8323333597284275e-05, "loss": 0.6478, "step": 8248 }, { "epoch": 0.21, "grad_norm": 4.208417892456055, "learning_rate": 1.832287344086787e-05, "loss": 0.606, "step": 8249 }, { "epoch": 0.21, "grad_norm": 2.5646913051605225, "learning_rate": 1.8322413227095454e-05, "loss": 0.5594, "step": 8250 }, { "epoch": 0.21, "grad_norm": 4.3678364753723145, "learning_rate": 1.8321952955970187e-05, "loss": 0.6079, "step": 8251 }, { "epoch": 0.21, "grad_norm": 4.89299201965332, "learning_rate": 1.8321492627495257e-05, "loss": 0.6724, "step": 8252 }, { "epoch": 0.21, "grad_norm": 2.493562698364258, "learning_rate": 1.832103224167382e-05, "loss": 0.7126, "step": 8253 }, { "epoch": 0.21, "grad_norm": 2.373847484588623, "learning_rate": 1.8320571798509065e-05, "loss": 0.5932, "step": 8254 }, { "epoch": 0.21, "grad_norm": 2.7161567211151123, "learning_rate": 1.832011129800415e-05, "loss": 0.614, "step": 8255 }, { "epoch": 0.21, "grad_norm": 1.781050205230713, "learning_rate": 1.831965074016226e-05, "loss": 0.5261, "step": 8256 }, { "epoch": 0.21, "grad_norm": 5.73952579498291, "learning_rate": 1.831919012498656e-05, "loss": 0.6175, "step": 8257 }, { "epoch": 0.21, "grad_norm": 4.531005382537842, "learning_rate": 1.831872945248023e-05, "loss": 0.8576, "step": 8258 }, { "epoch": 0.21, "grad_norm": 2.233891725540161, "learning_rate": 1.8318268722646443e-05, "loss": 0.6707, "step": 8259 }, { "epoch": 0.21, "grad_norm": 9.335075378417969, "learning_rate": 1.8317807935488373e-05, "loss": 0.5588, "step": 8260 }, { "epoch": 0.21, "grad_norm": 2.078280210494995, "learning_rate": 1.8317347091009198e-05, "loss": 0.6178, "step": 8261 }, { "epoch": 0.21, "grad_norm": 6.109729766845703, "learning_rate": 1.8316886189212092e-05, "loss": 0.6849, "step": 8262 }, { "epoch": 0.21, "grad_norm": 1.9234882593154907, "learning_rate": 1.8316425230100232e-05, "loss": 0.6385, "step": 8263 }, { "epoch": 0.21, "grad_norm": 1.549436092376709, "learning_rate": 1.8315964213676794e-05, "loss": 0.5205, "step": 8264 }, { "epoch": 0.21, "grad_norm": 1.1764882802963257, "learning_rate": 1.8315503139944953e-05, "loss": 0.4787, "step": 8265 }, { "epoch": 0.21, "grad_norm": 2.9038009643554688, "learning_rate": 1.831504200890789e-05, "loss": 0.6322, "step": 8266 }, { "epoch": 0.21, "grad_norm": 6.005995273590088, "learning_rate": 1.8314580820568783e-05, "loss": 0.6419, "step": 8267 }, { "epoch": 0.21, "grad_norm": 3.692776918411255, "learning_rate": 1.831411957493081e-05, "loss": 0.7587, "step": 8268 }, { "epoch": 0.21, "grad_norm": 3.394237756729126, "learning_rate": 1.8313658271997145e-05, "loss": 0.6005, "step": 8269 }, { "epoch": 0.21, "grad_norm": 1.6834626197814941, "learning_rate": 1.8313196911770973e-05, "loss": 0.7777, "step": 8270 }, { "epoch": 0.21, "grad_norm": 2.016603946685791, "learning_rate": 1.8312735494255465e-05, "loss": 0.6501, "step": 8271 }, { "epoch": 0.21, "grad_norm": 1.7526683807373047, "learning_rate": 1.8312274019453812e-05, "loss": 0.5743, "step": 8272 }, { "epoch": 0.21, "grad_norm": 5.228272914886475, "learning_rate": 1.8311812487369188e-05, "loss": 0.734, "step": 8273 }, { "epoch": 0.21, "grad_norm": 1.7865153551101685, "learning_rate": 1.8311350898004773e-05, "loss": 0.5807, "step": 8274 }, { "epoch": 0.21, "grad_norm": 2.376377820968628, "learning_rate": 1.831088925136375e-05, "loss": 0.4428, "step": 8275 }, { "epoch": 0.21, "grad_norm": 1.7709320783615112, "learning_rate": 1.83104275474493e-05, "loss": 0.5653, "step": 8276 }, { "epoch": 0.21, "grad_norm": 1.4358948469161987, "learning_rate": 1.8309965786264605e-05, "loss": 0.6728, "step": 8277 }, { "epoch": 0.21, "grad_norm": 2.84454607963562, "learning_rate": 1.8309503967812845e-05, "loss": 0.6713, "step": 8278 }, { "epoch": 0.21, "grad_norm": 2.0394153594970703, "learning_rate": 1.8309042092097207e-05, "loss": 0.6298, "step": 8279 }, { "epoch": 0.21, "grad_norm": 2.995723247528076, "learning_rate": 1.8308580159120867e-05, "loss": 0.594, "step": 8280 }, { "epoch": 0.21, "grad_norm": 1.6722633838653564, "learning_rate": 1.8308118168887014e-05, "loss": 0.5402, "step": 8281 }, { "epoch": 0.21, "grad_norm": 1.8591665029525757, "learning_rate": 1.8307656121398833e-05, "loss": 0.7314, "step": 8282 }, { "epoch": 0.21, "grad_norm": 3.074052095413208, "learning_rate": 1.83071940166595e-05, "loss": 0.7118, "step": 8283 }, { "epoch": 0.21, "grad_norm": 7.759222984313965, "learning_rate": 1.8306731854672212e-05, "loss": 0.8115, "step": 8284 }, { "epoch": 0.21, "grad_norm": 4.217955589294434, "learning_rate": 1.8306269635440142e-05, "loss": 0.8201, "step": 8285 }, { "epoch": 0.21, "grad_norm": 3.947227716445923, "learning_rate": 1.830580735896648e-05, "loss": 0.7599, "step": 8286 }, { "epoch": 0.21, "grad_norm": 2.90714693069458, "learning_rate": 1.8305345025254415e-05, "loss": 0.8126, "step": 8287 }, { "epoch": 0.21, "grad_norm": 1.8237625360488892, "learning_rate": 1.830488263430713e-05, "loss": 0.7133, "step": 8288 }, { "epoch": 0.21, "grad_norm": 2.2501397132873535, "learning_rate": 1.8304420186127808e-05, "loss": 0.5829, "step": 8289 }, { "epoch": 0.21, "grad_norm": 2.108651638031006, "learning_rate": 1.8303957680719643e-05, "loss": 0.7352, "step": 8290 }, { "epoch": 0.21, "grad_norm": 2.0579071044921875, "learning_rate": 1.8303495118085813e-05, "loss": 0.7096, "step": 8291 }, { "epoch": 0.21, "grad_norm": 3.114206314086914, "learning_rate": 1.8303032498229514e-05, "loss": 0.7063, "step": 8292 }, { "epoch": 0.21, "grad_norm": 1.3810417652130127, "learning_rate": 1.8302569821153934e-05, "loss": 0.6369, "step": 8293 }, { "epoch": 0.21, "grad_norm": 4.274394512176514, "learning_rate": 1.830210708686226e-05, "loss": 0.6334, "step": 8294 }, { "epoch": 0.21, "grad_norm": 1.7897652387619019, "learning_rate": 1.8301644295357676e-05, "loss": 0.5263, "step": 8295 }, { "epoch": 0.21, "grad_norm": 4.15877103805542, "learning_rate": 1.830118144664338e-05, "loss": 0.827, "step": 8296 }, { "epoch": 0.21, "grad_norm": 2.620633125305176, "learning_rate": 1.8300718540722552e-05, "loss": 0.8311, "step": 8297 }, { "epoch": 0.21, "grad_norm": 2.710864782333374, "learning_rate": 1.830025557759839e-05, "loss": 0.7263, "step": 8298 }, { "epoch": 0.21, "grad_norm": 1.6353245973587036, "learning_rate": 1.8299792557274078e-05, "loss": 0.5642, "step": 8299 }, { "epoch": 0.21, "grad_norm": 3.4737393856048584, "learning_rate": 1.8299329479752813e-05, "loss": 0.5304, "step": 8300 }, { "epoch": 0.21, "grad_norm": 2.2370963096618652, "learning_rate": 1.8298866345037782e-05, "loss": 0.6392, "step": 8301 }, { "epoch": 0.21, "grad_norm": 1.8428043127059937, "learning_rate": 1.829840315313218e-05, "loss": 0.7648, "step": 8302 }, { "epoch": 0.21, "grad_norm": 2.601043462753296, "learning_rate": 1.829793990403919e-05, "loss": 0.6689, "step": 8303 }, { "epoch": 0.21, "grad_norm": 4.828835964202881, "learning_rate": 1.829747659776202e-05, "loss": 0.6885, "step": 8304 }, { "epoch": 0.21, "grad_norm": 3.966377019882202, "learning_rate": 1.829701323430385e-05, "loss": 0.7374, "step": 8305 }, { "epoch": 0.21, "grad_norm": 3.0224528312683105, "learning_rate": 1.829654981366788e-05, "loss": 0.6158, "step": 8306 }, { "epoch": 0.21, "grad_norm": 1.7704590559005737, "learning_rate": 1.8296086335857296e-05, "loss": 0.6774, "step": 8307 }, { "epoch": 0.21, "grad_norm": 1.2242796421051025, "learning_rate": 1.8295622800875305e-05, "loss": 0.4785, "step": 8308 }, { "epoch": 0.21, "grad_norm": 4.479025840759277, "learning_rate": 1.8295159208725085e-05, "loss": 0.6969, "step": 8309 }, { "epoch": 0.21, "grad_norm": 2.344524383544922, "learning_rate": 1.8294695559409845e-05, "loss": 0.7619, "step": 8310 }, { "epoch": 0.21, "grad_norm": 1.9603543281555176, "learning_rate": 1.8294231852932772e-05, "loss": 0.6651, "step": 8311 }, { "epoch": 0.21, "grad_norm": 1.5899802446365356, "learning_rate": 1.8293768089297063e-05, "loss": 0.499, "step": 8312 }, { "epoch": 0.21, "grad_norm": 2.73765230178833, "learning_rate": 1.829330426850592e-05, "loss": 0.7697, "step": 8313 }, { "epoch": 0.21, "grad_norm": 9.003142356872559, "learning_rate": 1.829284039056253e-05, "loss": 0.7395, "step": 8314 }, { "epoch": 0.21, "grad_norm": 1.879367470741272, "learning_rate": 1.8292376455470096e-05, "loss": 0.6122, "step": 8315 }, { "epoch": 0.21, "grad_norm": 2.1636476516723633, "learning_rate": 1.8291912463231812e-05, "loss": 0.6614, "step": 8316 }, { "epoch": 0.21, "grad_norm": 1.3902093172073364, "learning_rate": 1.829144841385088e-05, "loss": 0.6143, "step": 8317 }, { "epoch": 0.21, "grad_norm": 2.4320220947265625, "learning_rate": 1.829098430733049e-05, "loss": 0.6824, "step": 8318 }, { "epoch": 0.21, "grad_norm": 3.9594998359680176, "learning_rate": 1.8290520143673846e-05, "loss": 0.7564, "step": 8319 }, { "epoch": 0.21, "grad_norm": 3.132300853729248, "learning_rate": 1.8290055922884148e-05, "loss": 0.4801, "step": 8320 }, { "epoch": 0.21, "grad_norm": 2.5730953216552734, "learning_rate": 1.8289591644964592e-05, "loss": 0.507, "step": 8321 }, { "epoch": 0.21, "grad_norm": 8.6979398727417, "learning_rate": 1.828912730991838e-05, "loss": 0.6796, "step": 8322 }, { "epoch": 0.21, "grad_norm": 3.9844727516174316, "learning_rate": 1.8288662917748705e-05, "loss": 0.7797, "step": 8323 }, { "epoch": 0.21, "grad_norm": 3.315526008605957, "learning_rate": 1.8288198468458778e-05, "loss": 0.7143, "step": 8324 }, { "epoch": 0.21, "grad_norm": 3.214887857437134, "learning_rate": 1.828773396205179e-05, "loss": 0.6224, "step": 8325 }, { "epoch": 0.21, "grad_norm": 1.4834949970245361, "learning_rate": 1.828726939853095e-05, "loss": 0.4637, "step": 8326 }, { "epoch": 0.21, "grad_norm": 1.9520002603530884, "learning_rate": 1.8286804777899453e-05, "loss": 0.5386, "step": 8327 }, { "epoch": 0.21, "grad_norm": 2.6882758140563965, "learning_rate": 1.828634010016051e-05, "loss": 0.7084, "step": 8328 }, { "epoch": 0.21, "grad_norm": 3.7198615074157715, "learning_rate": 1.828587536531731e-05, "loss": 0.5705, "step": 8329 }, { "epoch": 0.21, "grad_norm": 2.490760087966919, "learning_rate": 1.8285410573373065e-05, "loss": 0.689, "step": 8330 }, { "epoch": 0.21, "grad_norm": 5.086747646331787, "learning_rate": 1.8284945724330977e-05, "loss": 0.6218, "step": 8331 }, { "epoch": 0.21, "grad_norm": 1.7176684141159058, "learning_rate": 1.8284480818194246e-05, "loss": 0.7544, "step": 8332 }, { "epoch": 0.21, "grad_norm": 2.1147420406341553, "learning_rate": 1.8284015854966076e-05, "loss": 0.6038, "step": 8333 }, { "epoch": 0.21, "grad_norm": 1.7408320903778076, "learning_rate": 1.8283550834649677e-05, "loss": 0.5342, "step": 8334 }, { "epoch": 0.21, "grad_norm": 2.234997034072876, "learning_rate": 1.828308575724825e-05, "loss": 0.5882, "step": 8335 }, { "epoch": 0.21, "grad_norm": 1.9546133279800415, "learning_rate": 1.8282620622765e-05, "loss": 0.6524, "step": 8336 }, { "epoch": 0.21, "grad_norm": 8.439465522766113, "learning_rate": 1.828215543120313e-05, "loss": 0.6652, "step": 8337 }, { "epoch": 0.21, "grad_norm": 5.1394171714782715, "learning_rate": 1.8281690182565854e-05, "loss": 0.6382, "step": 8338 }, { "epoch": 0.21, "grad_norm": 1.289999008178711, "learning_rate": 1.8281224876856363e-05, "loss": 0.5892, "step": 8339 }, { "epoch": 0.21, "grad_norm": 3.603278160095215, "learning_rate": 1.828075951407788e-05, "loss": 0.5514, "step": 8340 }, { "epoch": 0.21, "grad_norm": 3.4547955989837646, "learning_rate": 1.82802940942336e-05, "loss": 0.6186, "step": 8341 }, { "epoch": 0.21, "grad_norm": 2.4597511291503906, "learning_rate": 1.827982861732674e-05, "loss": 0.6112, "step": 8342 }, { "epoch": 0.21, "grad_norm": 2.5979506969451904, "learning_rate": 1.8279363083360502e-05, "loss": 0.6032, "step": 8343 }, { "epoch": 0.21, "grad_norm": 1.3590117692947388, "learning_rate": 1.827889749233809e-05, "loss": 0.6409, "step": 8344 }, { "epoch": 0.21, "grad_norm": 5.376216411590576, "learning_rate": 1.827843184426273e-05, "loss": 0.7539, "step": 8345 }, { "epoch": 0.21, "grad_norm": 2.5480265617370605, "learning_rate": 1.8277966139137607e-05, "loss": 0.5649, "step": 8346 }, { "epoch": 0.21, "grad_norm": 1.7903724908828735, "learning_rate": 1.8277500376965944e-05, "loss": 0.5306, "step": 8347 }, { "epoch": 0.21, "grad_norm": 2.239001989364624, "learning_rate": 1.8277034557750953e-05, "loss": 0.6142, "step": 8348 }, { "epoch": 0.21, "grad_norm": 3.308128595352173, "learning_rate": 1.8276568681495838e-05, "loss": 0.8088, "step": 8349 }, { "epoch": 0.21, "grad_norm": 2.530836582183838, "learning_rate": 1.827610274820381e-05, "loss": 0.7379, "step": 8350 }, { "epoch": 0.21, "grad_norm": 4.162795066833496, "learning_rate": 1.8275636757878085e-05, "loss": 0.9116, "step": 8351 }, { "epoch": 0.21, "grad_norm": 4.002975940704346, "learning_rate": 1.8275170710521867e-05, "loss": 0.6787, "step": 8352 }, { "epoch": 0.21, "grad_norm": 1.7319711446762085, "learning_rate": 1.8274704606138374e-05, "loss": 0.6855, "step": 8353 }, { "epoch": 0.21, "grad_norm": 1.997715950012207, "learning_rate": 1.8274238444730813e-05, "loss": 0.6654, "step": 8354 }, { "epoch": 0.21, "grad_norm": 3.052243947982788, "learning_rate": 1.82737722263024e-05, "loss": 0.6187, "step": 8355 }, { "epoch": 0.21, "grad_norm": 2.4894237518310547, "learning_rate": 1.8273305950856348e-05, "loss": 0.5974, "step": 8356 }, { "epoch": 0.21, "grad_norm": 2.745992422103882, "learning_rate": 1.8272839618395867e-05, "loss": 0.602, "step": 8357 }, { "epoch": 0.21, "grad_norm": 2.3211593627929688, "learning_rate": 1.8272373228924177e-05, "loss": 0.5938, "step": 8358 }, { "epoch": 0.21, "grad_norm": 2.0037944316864014, "learning_rate": 1.8271906782444483e-05, "loss": 0.4049, "step": 8359 }, { "epoch": 0.21, "grad_norm": 2.884705066680908, "learning_rate": 1.827144027896001e-05, "loss": 0.6295, "step": 8360 }, { "epoch": 0.21, "grad_norm": 1.7827550172805786, "learning_rate": 1.8270973718473964e-05, "loss": 0.7392, "step": 8361 }, { "epoch": 0.21, "grad_norm": 2.33205509185791, "learning_rate": 1.827050710098956e-05, "loss": 0.8175, "step": 8362 }, { "epoch": 0.21, "grad_norm": 1.5626980066299438, "learning_rate": 1.827004042651002e-05, "loss": 0.496, "step": 8363 }, { "epoch": 0.21, "grad_norm": 1.2819873094558716, "learning_rate": 1.8269573695038558e-05, "loss": 0.6968, "step": 8364 }, { "epoch": 0.21, "grad_norm": 2.178386926651001, "learning_rate": 1.8269106906578392e-05, "loss": 0.4763, "step": 8365 }, { "epoch": 0.21, "grad_norm": 3.219590425491333, "learning_rate": 1.8268640061132732e-05, "loss": 0.6598, "step": 8366 }, { "epoch": 0.21, "grad_norm": 1.6693487167358398, "learning_rate": 1.82681731587048e-05, "loss": 0.5975, "step": 8367 }, { "epoch": 0.21, "grad_norm": 1.3102874755859375, "learning_rate": 1.8267706199297817e-05, "loss": 0.566, "step": 8368 }, { "epoch": 0.21, "grad_norm": 1.3268462419509888, "learning_rate": 1.8267239182914993e-05, "loss": 0.545, "step": 8369 }, { "epoch": 0.21, "grad_norm": 1.255797266960144, "learning_rate": 1.826677210955955e-05, "loss": 0.5473, "step": 8370 }, { "epoch": 0.21, "grad_norm": 1.3975807428359985, "learning_rate": 1.826630497923471e-05, "loss": 0.7287, "step": 8371 }, { "epoch": 0.21, "grad_norm": 1.7709583044052124, "learning_rate": 1.826583779194369e-05, "loss": 0.6068, "step": 8372 }, { "epoch": 0.21, "grad_norm": 1.9852021932601929, "learning_rate": 1.8265370547689706e-05, "loss": 0.5945, "step": 8373 }, { "epoch": 0.21, "grad_norm": 1.6274642944335938, "learning_rate": 1.8264903246475983e-05, "loss": 0.6811, "step": 8374 }, { "epoch": 0.21, "grad_norm": 1.2884634733200073, "learning_rate": 1.826443588830574e-05, "loss": 0.5343, "step": 8375 }, { "epoch": 0.21, "grad_norm": 2.553131341934204, "learning_rate": 1.8263968473182195e-05, "loss": 0.6032, "step": 8376 }, { "epoch": 0.21, "grad_norm": 1.8230489492416382, "learning_rate": 1.826350100110857e-05, "loss": 0.6099, "step": 8377 }, { "epoch": 0.21, "grad_norm": 1.7534947395324707, "learning_rate": 1.8263033472088092e-05, "loss": 0.727, "step": 8378 }, { "epoch": 0.21, "grad_norm": 1.9830517768859863, "learning_rate": 1.826256588612398e-05, "loss": 0.5882, "step": 8379 }, { "epoch": 0.21, "grad_norm": 2.2124152183532715, "learning_rate": 1.826209824321945e-05, "loss": 0.7165, "step": 8380 }, { "epoch": 0.21, "grad_norm": 2.1172897815704346, "learning_rate": 1.8261630543377734e-05, "loss": 0.7769, "step": 8381 }, { "epoch": 0.21, "grad_norm": 3.3690176010131836, "learning_rate": 1.8261162786602046e-05, "loss": 0.6405, "step": 8382 }, { "epoch": 0.21, "grad_norm": 3.592454195022583, "learning_rate": 1.8260694972895616e-05, "loss": 0.6553, "step": 8383 }, { "epoch": 0.21, "grad_norm": 1.9848607778549194, "learning_rate": 1.8260227102261667e-05, "loss": 0.6926, "step": 8384 }, { "epoch": 0.21, "grad_norm": 7.198005199432373, "learning_rate": 1.825975917470342e-05, "loss": 0.7224, "step": 8385 }, { "epoch": 0.21, "grad_norm": 4.540231704711914, "learning_rate": 1.8259291190224107e-05, "loss": 0.5867, "step": 8386 }, { "epoch": 0.21, "grad_norm": 1.624380111694336, "learning_rate": 1.8258823148826943e-05, "loss": 0.4505, "step": 8387 }, { "epoch": 0.21, "grad_norm": 1.3393045663833618, "learning_rate": 1.8258355050515163e-05, "loss": 0.6937, "step": 8388 }, { "epoch": 0.22, "grad_norm": 2.549915313720703, "learning_rate": 1.8257886895291984e-05, "loss": 0.4449, "step": 8389 }, { "epoch": 0.22, "grad_norm": 2.9384920597076416, "learning_rate": 1.825741868316064e-05, "loss": 0.7049, "step": 8390 }, { "epoch": 0.22, "grad_norm": 2.5668866634368896, "learning_rate": 1.8256950414124354e-05, "loss": 0.8366, "step": 8391 }, { "epoch": 0.22, "grad_norm": 1.5884933471679688, "learning_rate": 1.8256482088186353e-05, "loss": 0.6318, "step": 8392 }, { "epoch": 0.22, "grad_norm": 1.7551487684249878, "learning_rate": 1.8256013705349863e-05, "loss": 0.6404, "step": 8393 }, { "epoch": 0.22, "grad_norm": 1.8057838678359985, "learning_rate": 1.8255545265618112e-05, "loss": 0.7126, "step": 8394 }, { "epoch": 0.22, "grad_norm": 2.0510001182556152, "learning_rate": 1.8255076768994333e-05, "loss": 0.553, "step": 8395 }, { "epoch": 0.22, "grad_norm": 3.056610584259033, "learning_rate": 1.825460821548175e-05, "loss": 0.6493, "step": 8396 }, { "epoch": 0.22, "grad_norm": 2.9838900566101074, "learning_rate": 1.8254139605083595e-05, "loss": 0.789, "step": 8397 }, { "epoch": 0.22, "grad_norm": 1.5209991931915283, "learning_rate": 1.8253670937803094e-05, "loss": 0.5878, "step": 8398 }, { "epoch": 0.22, "grad_norm": 1.3193916082382202, "learning_rate": 1.825320221364348e-05, "loss": 0.6616, "step": 8399 }, { "epoch": 0.22, "grad_norm": 1.5768593549728394, "learning_rate": 1.825273343260798e-05, "loss": 0.6685, "step": 8400 }, { "epoch": 0.22, "grad_norm": 2.387847423553467, "learning_rate": 1.8252264594699826e-05, "loss": 0.7019, "step": 8401 }, { "epoch": 0.22, "grad_norm": 1.638632893562317, "learning_rate": 1.8251795699922247e-05, "loss": 0.654, "step": 8402 }, { "epoch": 0.22, "grad_norm": 1.3626315593719482, "learning_rate": 1.825132674827848e-05, "loss": 0.6316, "step": 8403 }, { "epoch": 0.22, "grad_norm": 3.2179858684539795, "learning_rate": 1.825085773977175e-05, "loss": 0.6948, "step": 8404 }, { "epoch": 0.22, "grad_norm": 1.6677464246749878, "learning_rate": 1.8250388674405296e-05, "loss": 0.6834, "step": 8405 }, { "epoch": 0.22, "grad_norm": 2.205409288406372, "learning_rate": 1.8249919552182342e-05, "loss": 0.7005, "step": 8406 }, { "epoch": 0.22, "grad_norm": 1.4132168292999268, "learning_rate": 1.8249450373106126e-05, "loss": 0.6597, "step": 8407 }, { "epoch": 0.22, "grad_norm": 3.350004196166992, "learning_rate": 1.8248981137179885e-05, "loss": 0.7386, "step": 8408 }, { "epoch": 0.22, "grad_norm": 3.897073984146118, "learning_rate": 1.8248511844406844e-05, "loss": 0.6869, "step": 8409 }, { "epoch": 0.22, "grad_norm": 1.8106093406677246, "learning_rate": 1.8248042494790244e-05, "loss": 0.7246, "step": 8410 }, { "epoch": 0.22, "grad_norm": 1.4642821550369263, "learning_rate": 1.824757308833332e-05, "loss": 0.5655, "step": 8411 }, { "epoch": 0.22, "grad_norm": 2.6178436279296875, "learning_rate": 1.8247103625039298e-05, "loss": 0.644, "step": 8412 }, { "epoch": 0.22, "grad_norm": 3.391728162765503, "learning_rate": 1.8246634104911423e-05, "loss": 0.4554, "step": 8413 }, { "epoch": 0.22, "grad_norm": 1.1721129417419434, "learning_rate": 1.8246164527952925e-05, "loss": 0.622, "step": 8414 }, { "epoch": 0.22, "grad_norm": 3.841431140899658, "learning_rate": 1.824569489416704e-05, "loss": 0.6111, "step": 8415 }, { "epoch": 0.22, "grad_norm": 3.3176686763763428, "learning_rate": 1.824522520355701e-05, "loss": 0.7429, "step": 8416 }, { "epoch": 0.22, "grad_norm": 1.6730329990386963, "learning_rate": 1.824475545612607e-05, "loss": 0.7511, "step": 8417 }, { "epoch": 0.22, "grad_norm": 3.7387964725494385, "learning_rate": 1.824428565187745e-05, "loss": 0.7686, "step": 8418 }, { "epoch": 0.22, "grad_norm": 3.5270872116088867, "learning_rate": 1.8243815790814396e-05, "loss": 0.4923, "step": 8419 }, { "epoch": 0.22, "grad_norm": 1.7223455905914307, "learning_rate": 1.8243345872940143e-05, "loss": 0.6891, "step": 8420 }, { "epoch": 0.22, "grad_norm": 1.905340552330017, "learning_rate": 1.8242875898257928e-05, "loss": 0.6197, "step": 8421 }, { "epoch": 0.22, "grad_norm": 2.0180063247680664, "learning_rate": 1.824240586677099e-05, "loss": 0.7393, "step": 8422 }, { "epoch": 0.22, "grad_norm": 3.0467381477355957, "learning_rate": 1.8241935778482572e-05, "loss": 0.6371, "step": 8423 }, { "epoch": 0.22, "grad_norm": 2.998142719268799, "learning_rate": 1.8241465633395906e-05, "loss": 0.66, "step": 8424 }, { "epoch": 0.22, "grad_norm": 1.9288369417190552, "learning_rate": 1.824099543151424e-05, "loss": 0.806, "step": 8425 }, { "epoch": 0.22, "grad_norm": 4.476876735687256, "learning_rate": 1.824052517284081e-05, "loss": 0.6867, "step": 8426 }, { "epoch": 0.22, "grad_norm": 1.384581446647644, "learning_rate": 1.8240054857378858e-05, "loss": 0.6375, "step": 8427 }, { "epoch": 0.22, "grad_norm": 2.425679922103882, "learning_rate": 1.8239584485131626e-05, "loss": 0.6739, "step": 8428 }, { "epoch": 0.22, "grad_norm": 2.574753999710083, "learning_rate": 1.823911405610235e-05, "loss": 0.5799, "step": 8429 }, { "epoch": 0.22, "grad_norm": 2.229804754257202, "learning_rate": 1.8238643570294283e-05, "loss": 0.5424, "step": 8430 }, { "epoch": 0.22, "grad_norm": 1.193227767944336, "learning_rate": 1.8238173027710656e-05, "loss": 0.6762, "step": 8431 }, { "epoch": 0.22, "grad_norm": 2.1942131519317627, "learning_rate": 1.8237702428354718e-05, "loss": 0.8424, "step": 8432 }, { "epoch": 0.22, "grad_norm": 3.4256339073181152, "learning_rate": 1.823723177222971e-05, "loss": 0.709, "step": 8433 }, { "epoch": 0.22, "grad_norm": 5.057536602020264, "learning_rate": 1.8236761059338874e-05, "loss": 0.5849, "step": 8434 }, { "epoch": 0.22, "grad_norm": 1.66573965549469, "learning_rate": 1.8236290289685457e-05, "loss": 0.559, "step": 8435 }, { "epoch": 0.22, "grad_norm": 1.4089076519012451, "learning_rate": 1.8235819463272705e-05, "loss": 0.616, "step": 8436 }, { "epoch": 0.22, "grad_norm": 2.129732847213745, "learning_rate": 1.8235348580103856e-05, "loss": 0.6511, "step": 8437 }, { "epoch": 0.22, "grad_norm": 5.134948253631592, "learning_rate": 1.8234877640182156e-05, "loss": 0.7017, "step": 8438 }, { "epoch": 0.22, "grad_norm": 1.7576631307601929, "learning_rate": 1.8234406643510855e-05, "loss": 0.5089, "step": 8439 }, { "epoch": 0.22, "grad_norm": 2.338667392730713, "learning_rate": 1.82339355900932e-05, "loss": 0.7377, "step": 8440 }, { "epoch": 0.22, "grad_norm": 2.467339515686035, "learning_rate": 1.823346447993243e-05, "loss": 0.6636, "step": 8441 }, { "epoch": 0.22, "grad_norm": 10.069986343383789, "learning_rate": 1.8232993313031797e-05, "loss": 0.6068, "step": 8442 }, { "epoch": 0.22, "grad_norm": 1.4534279108047485, "learning_rate": 1.8232522089394548e-05, "loss": 0.6096, "step": 8443 }, { "epoch": 0.22, "grad_norm": 2.2638089656829834, "learning_rate": 1.8232050809023927e-05, "loss": 0.7172, "step": 8444 }, { "epoch": 0.22, "grad_norm": 3.854961395263672, "learning_rate": 1.8231579471923186e-05, "loss": 0.6905, "step": 8445 }, { "epoch": 0.22, "grad_norm": 2.7667500972747803, "learning_rate": 1.823110807809557e-05, "loss": 0.6771, "step": 8446 }, { "epoch": 0.22, "grad_norm": 5.700901031494141, "learning_rate": 1.823063662754433e-05, "loss": 0.702, "step": 8447 }, { "epoch": 0.22, "grad_norm": 1.97844660282135, "learning_rate": 1.8230165120272707e-05, "loss": 0.4535, "step": 8448 }, { "epoch": 0.22, "grad_norm": 2.188307762145996, "learning_rate": 1.8229693556283962e-05, "loss": 0.7055, "step": 8449 }, { "epoch": 0.22, "grad_norm": 1.262181282043457, "learning_rate": 1.8229221935581337e-05, "loss": 0.6504, "step": 8450 }, { "epoch": 0.22, "grad_norm": 2.5005979537963867, "learning_rate": 1.8228750258168087e-05, "loss": 0.5733, "step": 8451 }, { "epoch": 0.22, "grad_norm": 1.8426637649536133, "learning_rate": 1.822827852404746e-05, "loss": 0.5612, "step": 8452 }, { "epoch": 0.22, "grad_norm": 1.8359147310256958, "learning_rate": 1.8227806733222705e-05, "loss": 0.5027, "step": 8453 }, { "epoch": 0.22, "grad_norm": 3.6896378993988037, "learning_rate": 1.8227334885697074e-05, "loss": 0.6446, "step": 8454 }, { "epoch": 0.22, "grad_norm": 3.291165351867676, "learning_rate": 1.822686298147382e-05, "loss": 0.6274, "step": 8455 }, { "epoch": 0.22, "grad_norm": 3.124242067337036, "learning_rate": 1.8226391020556196e-05, "loss": 0.5914, "step": 8456 }, { "epoch": 0.22, "grad_norm": 2.195500612258911, "learning_rate": 1.8225919002947458e-05, "loss": 0.5977, "step": 8457 }, { "epoch": 0.22, "grad_norm": 2.52994441986084, "learning_rate": 1.822544692865085e-05, "loss": 0.5966, "step": 8458 }, { "epoch": 0.22, "grad_norm": 1.3426218032836914, "learning_rate": 1.822497479766963e-05, "loss": 0.5416, "step": 8459 }, { "epoch": 0.22, "grad_norm": 2.8451144695281982, "learning_rate": 1.822450261000705e-05, "loss": 0.5404, "step": 8460 }, { "epoch": 0.22, "grad_norm": 2.3764615058898926, "learning_rate": 1.8224030365666365e-05, "loss": 0.6025, "step": 8461 }, { "epoch": 0.22, "grad_norm": 3.4648332595825195, "learning_rate": 1.822355806465083e-05, "loss": 0.8031, "step": 8462 }, { "epoch": 0.22, "grad_norm": 5.140274524688721, "learning_rate": 1.8223085706963704e-05, "loss": 0.6622, "step": 8463 }, { "epoch": 0.22, "grad_norm": 2.111680746078491, "learning_rate": 1.8222613292608233e-05, "loss": 0.628, "step": 8464 }, { "epoch": 0.22, "grad_norm": 1.7719273567199707, "learning_rate": 1.8222140821587677e-05, "loss": 0.6244, "step": 8465 }, { "epoch": 0.22, "grad_norm": 2.0842459201812744, "learning_rate": 1.8221668293905292e-05, "loss": 0.6615, "step": 8466 }, { "epoch": 0.22, "grad_norm": 1.8673107624053955, "learning_rate": 1.8221195709564335e-05, "loss": 0.5138, "step": 8467 }, { "epoch": 0.22, "grad_norm": 5.211010932922363, "learning_rate": 1.8220723068568062e-05, "loss": 0.7224, "step": 8468 }, { "epoch": 0.22, "grad_norm": 2.276667833328247, "learning_rate": 1.822025037091973e-05, "loss": 0.6061, "step": 8469 }, { "epoch": 0.22, "grad_norm": 5.011968612670898, "learning_rate": 1.82197776166226e-05, "loss": 0.6735, "step": 8470 }, { "epoch": 0.22, "grad_norm": 5.697561264038086, "learning_rate": 1.8219304805679926e-05, "loss": 0.6222, "step": 8471 }, { "epoch": 0.22, "grad_norm": 1.7761116027832031, "learning_rate": 1.8218831938094963e-05, "loss": 0.5908, "step": 8472 }, { "epoch": 0.22, "grad_norm": 2.7536003589630127, "learning_rate": 1.8218359013870977e-05, "loss": 0.6046, "step": 8473 }, { "epoch": 0.22, "grad_norm": 1.8334071636199951, "learning_rate": 1.8217886033011224e-05, "loss": 0.7232, "step": 8474 }, { "epoch": 0.22, "grad_norm": 1.8930550813674927, "learning_rate": 1.8217412995518964e-05, "loss": 0.5643, "step": 8475 }, { "epoch": 0.22, "grad_norm": 4.927357196807861, "learning_rate": 1.8216939901397458e-05, "loss": 0.6323, "step": 8476 }, { "epoch": 0.22, "grad_norm": 8.784130096435547, "learning_rate": 1.821646675064996e-05, "loss": 0.9114, "step": 8477 }, { "epoch": 0.22, "grad_norm": 1.5420821905136108, "learning_rate": 1.8215993543279737e-05, "loss": 0.5916, "step": 8478 }, { "epoch": 0.22, "grad_norm": 1.6029584407806396, "learning_rate": 1.821552027929005e-05, "loss": 0.6618, "step": 8479 }, { "epoch": 0.22, "grad_norm": 2.290858268737793, "learning_rate": 1.8215046958684155e-05, "loss": 0.6096, "step": 8480 }, { "epoch": 0.22, "grad_norm": 4.812405109405518, "learning_rate": 1.821457358146532e-05, "loss": 0.5604, "step": 8481 }, { "epoch": 0.22, "grad_norm": 1.5822962522506714, "learning_rate": 1.8214100147636805e-05, "loss": 0.8762, "step": 8482 }, { "epoch": 0.22, "grad_norm": 2.4811911582946777, "learning_rate": 1.821362665720187e-05, "loss": 0.5876, "step": 8483 }, { "epoch": 0.22, "grad_norm": 2.804121971130371, "learning_rate": 1.8213153110163782e-05, "loss": 0.4926, "step": 8484 }, { "epoch": 0.22, "grad_norm": 2.7976443767547607, "learning_rate": 1.8212679506525806e-05, "loss": 0.7371, "step": 8485 }, { "epoch": 0.22, "grad_norm": 8.618741989135742, "learning_rate": 1.82122058462912e-05, "loss": 0.6624, "step": 8486 }, { "epoch": 0.22, "grad_norm": 5.650725841522217, "learning_rate": 1.8211732129463228e-05, "loss": 0.673, "step": 8487 }, { "epoch": 0.22, "grad_norm": 2.0161843299865723, "learning_rate": 1.8211258356045164e-05, "loss": 0.6897, "step": 8488 }, { "epoch": 0.22, "grad_norm": 3.730337619781494, "learning_rate": 1.821078452604026e-05, "loss": 0.5462, "step": 8489 }, { "epoch": 0.22, "grad_norm": 2.3532655239105225, "learning_rate": 1.821031063945179e-05, "loss": 0.6021, "step": 8490 }, { "epoch": 0.22, "grad_norm": 3.2564752101898193, "learning_rate": 1.8209836696283016e-05, "loss": 0.505, "step": 8491 }, { "epoch": 0.22, "grad_norm": 1.3442984819412231, "learning_rate": 1.8209362696537207e-05, "loss": 0.603, "step": 8492 }, { "epoch": 0.22, "grad_norm": 1.4288188219070435, "learning_rate": 1.8208888640217627e-05, "loss": 0.5838, "step": 8493 }, { "epoch": 0.22, "grad_norm": 3.326314687728882, "learning_rate": 1.8208414527327542e-05, "loss": 0.6102, "step": 8494 }, { "epoch": 0.22, "grad_norm": 1.4429867267608643, "learning_rate": 1.8207940357870223e-05, "loss": 0.5306, "step": 8495 }, { "epoch": 0.22, "grad_norm": 1.8757554292678833, "learning_rate": 1.820746613184894e-05, "loss": 0.6472, "step": 8496 }, { "epoch": 0.22, "grad_norm": 1.52638578414917, "learning_rate": 1.820699184926695e-05, "loss": 0.6458, "step": 8497 }, { "epoch": 0.22, "grad_norm": 1.9824203252792358, "learning_rate": 1.8206517510127533e-05, "loss": 0.5676, "step": 8498 }, { "epoch": 0.22, "grad_norm": 3.202571153640747, "learning_rate": 1.820604311443395e-05, "loss": 0.6682, "step": 8499 }, { "epoch": 0.22, "grad_norm": 3.0093767642974854, "learning_rate": 1.8205568662189473e-05, "loss": 0.7383, "step": 8500 }, { "epoch": 0.22, "grad_norm": 2.967588424682617, "learning_rate": 1.8205094153397375e-05, "loss": 0.6488, "step": 8501 }, { "epoch": 0.22, "grad_norm": 3.675550699234009, "learning_rate": 1.820461958806092e-05, "loss": 0.844, "step": 8502 }, { "epoch": 0.22, "grad_norm": 1.1514610052108765, "learning_rate": 1.8204144966183386e-05, "loss": 0.5572, "step": 8503 }, { "epoch": 0.22, "grad_norm": 6.316755771636963, "learning_rate": 1.8203670287768034e-05, "loss": 0.6533, "step": 8504 }, { "epoch": 0.22, "grad_norm": 2.2112913131713867, "learning_rate": 1.8203195552818145e-05, "loss": 0.6886, "step": 8505 }, { "epoch": 0.22, "grad_norm": 3.6524105072021484, "learning_rate": 1.8202720761336983e-05, "loss": 0.5465, "step": 8506 }, { "epoch": 0.22, "grad_norm": 1.7229270935058594, "learning_rate": 1.8202245913327824e-05, "loss": 0.6804, "step": 8507 }, { "epoch": 0.22, "grad_norm": 1.353389859199524, "learning_rate": 1.8201771008793936e-05, "loss": 0.5845, "step": 8508 }, { "epoch": 0.22, "grad_norm": 1.8555408716201782, "learning_rate": 1.82012960477386e-05, "loss": 0.6942, "step": 8509 }, { "epoch": 0.22, "grad_norm": 6.909871578216553, "learning_rate": 1.820082103016508e-05, "loss": 0.6937, "step": 8510 }, { "epoch": 0.22, "grad_norm": 1.3641740083694458, "learning_rate": 1.8200345956076658e-05, "loss": 0.517, "step": 8511 }, { "epoch": 0.22, "grad_norm": 2.838630437850952, "learning_rate": 1.8199870825476602e-05, "loss": 0.6255, "step": 8512 }, { "epoch": 0.22, "grad_norm": 1.6601896286010742, "learning_rate": 1.819939563836819e-05, "loss": 0.4523, "step": 8513 }, { "epoch": 0.22, "grad_norm": 2.4505860805511475, "learning_rate": 1.819892039475469e-05, "loss": 0.5084, "step": 8514 }, { "epoch": 0.22, "grad_norm": 2.4367029666900635, "learning_rate": 1.8198445094639386e-05, "loss": 0.5815, "step": 8515 }, { "epoch": 0.22, "grad_norm": 4.71992826461792, "learning_rate": 1.8197969738025546e-05, "loss": 0.6915, "step": 8516 }, { "epoch": 0.22, "grad_norm": 5.012837886810303, "learning_rate": 1.8197494324916448e-05, "loss": 0.8949, "step": 8517 }, { "epoch": 0.22, "grad_norm": 1.4128313064575195, "learning_rate": 1.8197018855315372e-05, "loss": 0.5592, "step": 8518 }, { "epoch": 0.22, "grad_norm": 3.152337074279785, "learning_rate": 1.8196543329225595e-05, "loss": 0.6467, "step": 8519 }, { "epoch": 0.22, "grad_norm": 1.967156171798706, "learning_rate": 1.8196067746650385e-05, "loss": 0.6425, "step": 8520 }, { "epoch": 0.22, "grad_norm": 3.828430652618408, "learning_rate": 1.8195592107593026e-05, "loss": 1.025, "step": 8521 }, { "epoch": 0.22, "grad_norm": 4.3594865798950195, "learning_rate": 1.8195116412056797e-05, "loss": 0.6724, "step": 8522 }, { "epoch": 0.22, "grad_norm": 2.279496192932129, "learning_rate": 1.8194640660044976e-05, "loss": 0.5663, "step": 8523 }, { "epoch": 0.22, "grad_norm": 2.3718907833099365, "learning_rate": 1.8194164851560837e-05, "loss": 0.6304, "step": 8524 }, { "epoch": 0.22, "grad_norm": 1.5339653491973877, "learning_rate": 1.8193688986607662e-05, "loss": 0.6787, "step": 8525 }, { "epoch": 0.22, "grad_norm": 1.3544965982437134, "learning_rate": 1.8193213065188732e-05, "loss": 0.6016, "step": 8526 }, { "epoch": 0.22, "grad_norm": 2.659604072570801, "learning_rate": 1.8192737087307325e-05, "loss": 0.6299, "step": 8527 }, { "epoch": 0.22, "grad_norm": 3.056713581085205, "learning_rate": 1.819226105296672e-05, "loss": 0.6241, "step": 8528 }, { "epoch": 0.22, "grad_norm": 2.183180570602417, "learning_rate": 1.81917849621702e-05, "loss": 0.6893, "step": 8529 }, { "epoch": 0.22, "grad_norm": 3.100398540496826, "learning_rate": 1.8191308814921042e-05, "loss": 0.7373, "step": 8530 }, { "epoch": 0.22, "grad_norm": 4.524762153625488, "learning_rate": 1.8190832611222532e-05, "loss": 0.6826, "step": 8531 }, { "epoch": 0.22, "grad_norm": 1.4779213666915894, "learning_rate": 1.819035635107795e-05, "loss": 0.537, "step": 8532 }, { "epoch": 0.22, "grad_norm": 2.063326120376587, "learning_rate": 1.8189880034490577e-05, "loss": 0.6187, "step": 8533 }, { "epoch": 0.22, "grad_norm": 3.906102180480957, "learning_rate": 1.81894036614637e-05, "loss": 0.6068, "step": 8534 }, { "epoch": 0.22, "grad_norm": 1.3340953588485718, "learning_rate": 1.818892723200059e-05, "loss": 0.7756, "step": 8535 }, { "epoch": 0.22, "grad_norm": 3.4105355739593506, "learning_rate": 1.8188450746104544e-05, "loss": 0.5627, "step": 8536 }, { "epoch": 0.22, "grad_norm": 2.634746551513672, "learning_rate": 1.818797420377884e-05, "loss": 0.7441, "step": 8537 }, { "epoch": 0.22, "grad_norm": 3.2682902812957764, "learning_rate": 1.818749760502676e-05, "loss": 0.6155, "step": 8538 }, { "epoch": 0.22, "grad_norm": 3.4944491386413574, "learning_rate": 1.8187020949851592e-05, "loss": 0.5977, "step": 8539 }, { "epoch": 0.22, "grad_norm": 2.1130549907684326, "learning_rate": 1.818654423825662e-05, "loss": 0.6722, "step": 8540 }, { "epoch": 0.22, "grad_norm": 2.699674606323242, "learning_rate": 1.8186067470245125e-05, "loss": 0.6469, "step": 8541 }, { "epoch": 0.22, "grad_norm": 1.3617057800292969, "learning_rate": 1.8185590645820396e-05, "loss": 0.7226, "step": 8542 }, { "epoch": 0.22, "grad_norm": 2.007894992828369, "learning_rate": 1.818511376498572e-05, "loss": 0.5526, "step": 8543 }, { "epoch": 0.22, "grad_norm": 3.79952073097229, "learning_rate": 1.8184636827744385e-05, "loss": 0.5674, "step": 8544 }, { "epoch": 0.22, "grad_norm": 2.9072132110595703, "learning_rate": 1.818415983409967e-05, "loss": 0.4726, "step": 8545 }, { "epoch": 0.22, "grad_norm": 0.9220971465110779, "learning_rate": 1.8183682784054873e-05, "loss": 0.4907, "step": 8546 }, { "epoch": 0.22, "grad_norm": 5.764665126800537, "learning_rate": 1.8183205677613272e-05, "loss": 0.6712, "step": 8547 }, { "epoch": 0.22, "grad_norm": 5.255614280700684, "learning_rate": 1.818272851477816e-05, "loss": 0.5901, "step": 8548 }, { "epoch": 0.22, "grad_norm": 1.8945657014846802, "learning_rate": 1.818225129555282e-05, "loss": 0.6936, "step": 8549 }, { "epoch": 0.22, "grad_norm": 2.9556329250335693, "learning_rate": 1.818177401994055e-05, "loss": 0.4356, "step": 8550 }, { "epoch": 0.22, "grad_norm": 5.914109230041504, "learning_rate": 1.8181296687944633e-05, "loss": 0.4945, "step": 8551 }, { "epoch": 0.22, "grad_norm": 3.295355796813965, "learning_rate": 1.818081929956836e-05, "loss": 0.7252, "step": 8552 }, { "epoch": 0.22, "grad_norm": 3.3145294189453125, "learning_rate": 1.8180341854815017e-05, "loss": 0.538, "step": 8553 }, { "epoch": 0.22, "grad_norm": 4.442361831665039, "learning_rate": 1.81798643536879e-05, "loss": 0.5997, "step": 8554 }, { "epoch": 0.22, "grad_norm": 1.30026113986969, "learning_rate": 1.8179386796190295e-05, "loss": 0.6414, "step": 8555 }, { "epoch": 0.22, "grad_norm": 1.3981316089630127, "learning_rate": 1.8178909182325496e-05, "loss": 0.5262, "step": 8556 }, { "epoch": 0.22, "grad_norm": 2.010317087173462, "learning_rate": 1.817843151209679e-05, "loss": 0.674, "step": 8557 }, { "epoch": 0.22, "grad_norm": 3.0848264694213867, "learning_rate": 1.8177953785507475e-05, "loss": 0.5848, "step": 8558 }, { "epoch": 0.22, "grad_norm": 1.343836784362793, "learning_rate": 1.8177476002560843e-05, "loss": 0.6374, "step": 8559 }, { "epoch": 0.22, "grad_norm": 4.529226779937744, "learning_rate": 1.8176998163260183e-05, "loss": 0.7738, "step": 8560 }, { "epoch": 0.22, "grad_norm": 2.156400680541992, "learning_rate": 1.8176520267608787e-05, "loss": 0.6042, "step": 8561 }, { "epoch": 0.22, "grad_norm": 9.861776351928711, "learning_rate": 1.817604231560995e-05, "loss": 0.6612, "step": 8562 }, { "epoch": 0.22, "grad_norm": 2.1352226734161377, "learning_rate": 1.8175564307266966e-05, "loss": 0.5095, "step": 8563 }, { "epoch": 0.22, "grad_norm": 2.7144110202789307, "learning_rate": 1.817508624258313e-05, "loss": 0.6294, "step": 8564 }, { "epoch": 0.22, "grad_norm": 2.389765501022339, "learning_rate": 1.8174608121561736e-05, "loss": 0.6887, "step": 8565 }, { "epoch": 0.22, "grad_norm": 1.402280330657959, "learning_rate": 1.8174129944206077e-05, "loss": 0.5305, "step": 8566 }, { "epoch": 0.22, "grad_norm": 2.111801862716675, "learning_rate": 1.8173651710519454e-05, "loss": 0.8019, "step": 8567 }, { "epoch": 0.22, "grad_norm": 2.2697761058807373, "learning_rate": 1.8173173420505153e-05, "loss": 0.7211, "step": 8568 }, { "epoch": 0.22, "grad_norm": 3.885540723800659, "learning_rate": 1.817269507416648e-05, "loss": 0.6296, "step": 8569 }, { "epoch": 0.22, "grad_norm": 3.428314685821533, "learning_rate": 1.8172216671506727e-05, "loss": 0.6853, "step": 8570 }, { "epoch": 0.22, "grad_norm": 2.04079008102417, "learning_rate": 1.817173821252919e-05, "loss": 0.5097, "step": 8571 }, { "epoch": 0.22, "grad_norm": 2.1292998790740967, "learning_rate": 1.8171259697237164e-05, "loss": 0.5234, "step": 8572 }, { "epoch": 0.22, "grad_norm": 12.167033195495605, "learning_rate": 1.8170781125633956e-05, "loss": 0.9432, "step": 8573 }, { "epoch": 0.22, "grad_norm": 2.87286114692688, "learning_rate": 1.8170302497722855e-05, "loss": 0.5805, "step": 8574 }, { "epoch": 0.22, "grad_norm": 2.0027847290039062, "learning_rate": 1.816982381350716e-05, "loss": 0.6971, "step": 8575 }, { "epoch": 0.22, "grad_norm": 4.783571243286133, "learning_rate": 1.8169345072990173e-05, "loss": 0.7038, "step": 8576 }, { "epoch": 0.22, "grad_norm": 2.43921160697937, "learning_rate": 1.8168866276175196e-05, "loss": 0.7234, "step": 8577 }, { "epoch": 0.22, "grad_norm": 5.149572849273682, "learning_rate": 1.816838742306552e-05, "loss": 0.8741, "step": 8578 }, { "epoch": 0.22, "grad_norm": 1.8546830415725708, "learning_rate": 1.8167908513664453e-05, "loss": 0.6273, "step": 8579 }, { "epoch": 0.22, "grad_norm": 5.093642711639404, "learning_rate": 1.816742954797529e-05, "loss": 0.7046, "step": 8580 }, { "epoch": 0.22, "grad_norm": 3.9966232776641846, "learning_rate": 1.8166950526001335e-05, "loss": 0.6533, "step": 8581 }, { "epoch": 0.22, "grad_norm": 2.1645584106445312, "learning_rate": 1.816647144774589e-05, "loss": 0.7412, "step": 8582 }, { "epoch": 0.22, "grad_norm": 2.2217462062835693, "learning_rate": 1.8165992313212252e-05, "loss": 0.7089, "step": 8583 }, { "epoch": 0.22, "grad_norm": 1.3868341445922852, "learning_rate": 1.8165513122403725e-05, "loss": 0.6129, "step": 8584 }, { "epoch": 0.22, "grad_norm": 3.6560451984405518, "learning_rate": 1.816503387532361e-05, "loss": 0.6174, "step": 8585 }, { "epoch": 0.22, "grad_norm": 3.587047815322876, "learning_rate": 1.8164554571975216e-05, "loss": 0.7349, "step": 8586 }, { "epoch": 0.22, "grad_norm": 1.0890010595321655, "learning_rate": 1.816407521236184e-05, "loss": 0.5917, "step": 8587 }, { "epoch": 0.22, "grad_norm": 1.1744720935821533, "learning_rate": 1.8163595796486784e-05, "loss": 0.5068, "step": 8588 }, { "epoch": 0.22, "grad_norm": 2.2206177711486816, "learning_rate": 1.8163116324353357e-05, "loss": 0.6049, "step": 8589 }, { "epoch": 0.22, "grad_norm": 5.527002811431885, "learning_rate": 1.816263679596486e-05, "loss": 0.8326, "step": 8590 }, { "epoch": 0.22, "grad_norm": 1.6327919960021973, "learning_rate": 1.81621572113246e-05, "loss": 0.7133, "step": 8591 }, { "epoch": 0.22, "grad_norm": 2.4605178833007812, "learning_rate": 1.8161677570435876e-05, "loss": 0.5545, "step": 8592 }, { "epoch": 0.22, "grad_norm": 2.7244369983673096, "learning_rate": 1.8161197873302e-05, "loss": 0.5475, "step": 8593 }, { "epoch": 0.22, "grad_norm": 3.7116312980651855, "learning_rate": 1.816071811992628e-05, "loss": 0.5224, "step": 8594 }, { "epoch": 0.22, "grad_norm": 2.437884569168091, "learning_rate": 1.8160238310312014e-05, "loss": 0.7559, "step": 8595 }, { "epoch": 0.22, "grad_norm": 2.373587131500244, "learning_rate": 1.815975844446251e-05, "loss": 0.6592, "step": 8596 }, { "epoch": 0.22, "grad_norm": 2.609264612197876, "learning_rate": 1.8159278522381082e-05, "loss": 0.5872, "step": 8597 }, { "epoch": 0.22, "grad_norm": 1.329264521598816, "learning_rate": 1.815879854407103e-05, "loss": 0.5757, "step": 8598 }, { "epoch": 0.22, "grad_norm": 2.195051670074463, "learning_rate": 1.8158318509535666e-05, "loss": 0.719, "step": 8599 }, { "epoch": 0.22, "grad_norm": 1.7502405643463135, "learning_rate": 1.8157838418778295e-05, "loss": 0.7313, "step": 8600 }, { "epoch": 0.22, "grad_norm": 3.162808656692505, "learning_rate": 1.815735827180223e-05, "loss": 0.6952, "step": 8601 }, { "epoch": 0.22, "grad_norm": 2.208620071411133, "learning_rate": 1.8156878068610772e-05, "loss": 0.5722, "step": 8602 }, { "epoch": 0.22, "grad_norm": 2.7753682136535645, "learning_rate": 1.8156397809207237e-05, "loss": 0.7474, "step": 8603 }, { "epoch": 0.22, "grad_norm": 7.382412433624268, "learning_rate": 1.815591749359493e-05, "loss": 0.721, "step": 8604 }, { "epoch": 0.22, "grad_norm": 3.1746678352355957, "learning_rate": 1.8155437121777168e-05, "loss": 0.5209, "step": 8605 }, { "epoch": 0.22, "grad_norm": 4.241663455963135, "learning_rate": 1.8154956693757254e-05, "loss": 0.4712, "step": 8606 }, { "epoch": 0.22, "grad_norm": 2.195554733276367, "learning_rate": 1.8154476209538504e-05, "loss": 0.7954, "step": 8607 }, { "epoch": 0.22, "grad_norm": 5.089507579803467, "learning_rate": 1.8153995669124226e-05, "loss": 0.7167, "step": 8608 }, { "epoch": 0.22, "grad_norm": 1.7949941158294678, "learning_rate": 1.8153515072517732e-05, "loss": 0.5669, "step": 8609 }, { "epoch": 0.22, "grad_norm": 2.3689794540405273, "learning_rate": 1.815303441972234e-05, "loss": 0.5812, "step": 8610 }, { "epoch": 0.22, "grad_norm": 2.8061575889587402, "learning_rate": 1.815255371074135e-05, "loss": 0.7299, "step": 8611 }, { "epoch": 0.22, "grad_norm": 2.624396562576294, "learning_rate": 1.8152072945578084e-05, "loss": 0.7274, "step": 8612 }, { "epoch": 0.22, "grad_norm": 1.4397895336151123, "learning_rate": 1.8151592124235856e-05, "loss": 0.5434, "step": 8613 }, { "epoch": 0.22, "grad_norm": 2.4281344413757324, "learning_rate": 1.815111124671797e-05, "loss": 0.7803, "step": 8614 }, { "epoch": 0.22, "grad_norm": 2.234269380569458, "learning_rate": 1.8150630313027752e-05, "loss": 0.7067, "step": 8615 }, { "epoch": 0.22, "grad_norm": 2.9691162109375, "learning_rate": 1.8150149323168506e-05, "loss": 0.5918, "step": 8616 }, { "epoch": 0.22, "grad_norm": 2.939177989959717, "learning_rate": 1.8149668277143555e-05, "loss": 0.8063, "step": 8617 }, { "epoch": 0.22, "grad_norm": 3.5101191997528076, "learning_rate": 1.814918717495621e-05, "loss": 0.8869, "step": 8618 }, { "epoch": 0.22, "grad_norm": 2.7541046142578125, "learning_rate": 1.814870601660979e-05, "loss": 0.6368, "step": 8619 }, { "epoch": 0.22, "grad_norm": 2.6405436992645264, "learning_rate": 1.8148224802107603e-05, "loss": 0.5689, "step": 8620 }, { "epoch": 0.22, "grad_norm": 3.200620174407959, "learning_rate": 1.8147743531452967e-05, "loss": 0.6868, "step": 8621 }, { "epoch": 0.22, "grad_norm": 5.720776081085205, "learning_rate": 1.8147262204649205e-05, "loss": 0.417, "step": 8622 }, { "epoch": 0.22, "grad_norm": 1.9866905212402344, "learning_rate": 1.814678082169963e-05, "loss": 0.6328, "step": 8623 }, { "epoch": 0.22, "grad_norm": 1.621677279472351, "learning_rate": 1.8146299382607564e-05, "loss": 0.5508, "step": 8624 }, { "epoch": 0.22, "grad_norm": 2.323079824447632, "learning_rate": 1.8145817887376315e-05, "loss": 0.6562, "step": 8625 }, { "epoch": 0.22, "grad_norm": 1.8770846128463745, "learning_rate": 1.814533633600921e-05, "loss": 0.6182, "step": 8626 }, { "epoch": 0.22, "grad_norm": 1.7363839149475098, "learning_rate": 1.8144854728509562e-05, "loss": 0.7751, "step": 8627 }, { "epoch": 0.22, "grad_norm": 4.141340255737305, "learning_rate": 1.8144373064880696e-05, "loss": 0.6634, "step": 8628 }, { "epoch": 0.22, "grad_norm": 1.3596138954162598, "learning_rate": 1.8143891345125925e-05, "loss": 0.5882, "step": 8629 }, { "epoch": 0.22, "grad_norm": 1.7964986562728882, "learning_rate": 1.814340956924857e-05, "loss": 0.5556, "step": 8630 }, { "epoch": 0.22, "grad_norm": 2.645995855331421, "learning_rate": 1.8142927737251957e-05, "loss": 0.8078, "step": 8631 }, { "epoch": 0.22, "grad_norm": 2.1240217685699463, "learning_rate": 1.8142445849139397e-05, "loss": 0.6748, "step": 8632 }, { "epoch": 0.22, "grad_norm": 1.7461532354354858, "learning_rate": 1.814196390491422e-05, "loss": 0.4722, "step": 8633 }, { "epoch": 0.22, "grad_norm": 2.606341600418091, "learning_rate": 1.814148190457974e-05, "loss": 0.6686, "step": 8634 }, { "epoch": 0.22, "grad_norm": 8.278312683105469, "learning_rate": 1.814099984813928e-05, "loss": 0.8296, "step": 8635 }, { "epoch": 0.22, "grad_norm": 3.088981866836548, "learning_rate": 1.8140517735596168e-05, "loss": 0.6033, "step": 8636 }, { "epoch": 0.22, "grad_norm": 1.7191277742385864, "learning_rate": 1.814003556695372e-05, "loss": 0.7311, "step": 8637 }, { "epoch": 0.22, "grad_norm": 1.571533203125, "learning_rate": 1.8139553342215263e-05, "loss": 0.6558, "step": 8638 }, { "epoch": 0.22, "grad_norm": 1.999139666557312, "learning_rate": 1.8139071061384116e-05, "loss": 0.6015, "step": 8639 }, { "epoch": 0.22, "grad_norm": 3.3851053714752197, "learning_rate": 1.8138588724463604e-05, "loss": 0.7868, "step": 8640 }, { "epoch": 0.22, "grad_norm": 4.7256035804748535, "learning_rate": 1.8138106331457055e-05, "loss": 0.7684, "step": 8641 }, { "epoch": 0.22, "grad_norm": 1.508854627609253, "learning_rate": 1.8137623882367784e-05, "loss": 0.5801, "step": 8642 }, { "epoch": 0.22, "grad_norm": 2.518364906311035, "learning_rate": 1.8137141377199128e-05, "loss": 0.809, "step": 8643 }, { "epoch": 0.22, "grad_norm": 5.2882161140441895, "learning_rate": 1.81366588159544e-05, "loss": 0.66, "step": 8644 }, { "epoch": 0.22, "grad_norm": 1.9721320867538452, "learning_rate": 1.813617619863694e-05, "loss": 0.6395, "step": 8645 }, { "epoch": 0.22, "grad_norm": 5.3656158447265625, "learning_rate": 1.813569352525006e-05, "loss": 0.6684, "step": 8646 }, { "epoch": 0.22, "grad_norm": 4.490046977996826, "learning_rate": 1.8135210795797093e-05, "loss": 0.6872, "step": 8647 }, { "epoch": 0.22, "grad_norm": 1.4345804452896118, "learning_rate": 1.813472801028136e-05, "loss": 0.6165, "step": 8648 }, { "epoch": 0.22, "grad_norm": 1.8201271295547485, "learning_rate": 1.8134245168706198e-05, "loss": 0.7247, "step": 8649 }, { "epoch": 0.22, "grad_norm": 2.4616153240203857, "learning_rate": 1.8133762271074923e-05, "loss": 0.6977, "step": 8650 }, { "epoch": 0.22, "grad_norm": 1.9571869373321533, "learning_rate": 1.8133279317390873e-05, "loss": 0.655, "step": 8651 }, { "epoch": 0.22, "grad_norm": 1.6167309284210205, "learning_rate": 1.8132796307657372e-05, "loss": 0.7364, "step": 8652 }, { "epoch": 0.22, "grad_norm": 1.4237191677093506, "learning_rate": 1.8132313241877747e-05, "loss": 0.5196, "step": 8653 }, { "epoch": 0.22, "grad_norm": 2.5065605640411377, "learning_rate": 1.8131830120055326e-05, "loss": 0.5222, "step": 8654 }, { "epoch": 0.22, "grad_norm": 1.2737269401550293, "learning_rate": 1.8131346942193443e-05, "loss": 0.5966, "step": 8655 }, { "epoch": 0.22, "grad_norm": 1.1319477558135986, "learning_rate": 1.8130863708295427e-05, "loss": 0.5856, "step": 8656 }, { "epoch": 0.22, "grad_norm": 3.1517441272735596, "learning_rate": 1.8130380418364603e-05, "loss": 0.6202, "step": 8657 }, { "epoch": 0.22, "grad_norm": 8.410287857055664, "learning_rate": 1.8129897072404308e-05, "loss": 0.4842, "step": 8658 }, { "epoch": 0.22, "grad_norm": 1.33747136592865, "learning_rate": 1.812941367041787e-05, "loss": 0.6244, "step": 8659 }, { "epoch": 0.22, "grad_norm": 2.826043128967285, "learning_rate": 1.812893021240862e-05, "loss": 0.6685, "step": 8660 }, { "epoch": 0.22, "grad_norm": 1.3436076641082764, "learning_rate": 1.8128446698379888e-05, "loss": 0.5163, "step": 8661 }, { "epoch": 0.22, "grad_norm": 2.4045374393463135, "learning_rate": 1.812796312833501e-05, "loss": 0.5155, "step": 8662 }, { "epoch": 0.22, "grad_norm": 3.0309510231018066, "learning_rate": 1.8127479502277315e-05, "loss": 0.7275, "step": 8663 }, { "epoch": 0.22, "grad_norm": 2.9817912578582764, "learning_rate": 1.8126995820210142e-05, "loss": 0.6727, "step": 8664 }, { "epoch": 0.22, "grad_norm": 1.4751020669937134, "learning_rate": 1.8126512082136815e-05, "loss": 0.7092, "step": 8665 }, { "epoch": 0.22, "grad_norm": 1.7373294830322266, "learning_rate": 1.8126028288060675e-05, "loss": 0.6499, "step": 8666 }, { "epoch": 0.22, "grad_norm": 1.6638579368591309, "learning_rate": 1.812554443798505e-05, "loss": 0.6571, "step": 8667 }, { "epoch": 0.22, "grad_norm": 1.5229159593582153, "learning_rate": 1.8125060531913283e-05, "loss": 0.651, "step": 8668 }, { "epoch": 0.22, "grad_norm": 3.772766351699829, "learning_rate": 1.81245765698487e-05, "loss": 0.8395, "step": 8669 }, { "epoch": 0.22, "grad_norm": 5.153385162353516, "learning_rate": 1.812409255179464e-05, "loss": 0.4909, "step": 8670 }, { "epoch": 0.22, "grad_norm": 2.9190878868103027, "learning_rate": 1.812360847775444e-05, "loss": 0.5726, "step": 8671 }, { "epoch": 0.22, "grad_norm": 5.443135738372803, "learning_rate": 1.8123124347731428e-05, "loss": 0.6366, "step": 8672 }, { "epoch": 0.22, "grad_norm": 1.758710265159607, "learning_rate": 1.8122640161728952e-05, "loss": 0.7008, "step": 8673 }, { "epoch": 0.22, "grad_norm": 1.8399370908737183, "learning_rate": 1.812215591975034e-05, "loss": 0.6773, "step": 8674 }, { "epoch": 0.22, "grad_norm": 2.193899393081665, "learning_rate": 1.812167162179894e-05, "loss": 0.6133, "step": 8675 }, { "epoch": 0.22, "grad_norm": 2.963590383529663, "learning_rate": 1.8121187267878072e-05, "loss": 0.5532, "step": 8676 }, { "epoch": 0.22, "grad_norm": 2.7776284217834473, "learning_rate": 1.8120702857991087e-05, "loss": 0.5912, "step": 8677 }, { "epoch": 0.22, "grad_norm": 2.9906091690063477, "learning_rate": 1.8120218392141322e-05, "loss": 0.5826, "step": 8678 }, { "epoch": 0.22, "grad_norm": 2.8522238731384277, "learning_rate": 1.811973387033211e-05, "loss": 0.573, "step": 8679 }, { "epoch": 0.22, "grad_norm": 9.965924263000488, "learning_rate": 1.8119249292566798e-05, "loss": 0.6885, "step": 8680 }, { "epoch": 0.22, "grad_norm": 1.356058120727539, "learning_rate": 1.8118764658848716e-05, "loss": 0.6048, "step": 8681 }, { "epoch": 0.22, "grad_norm": 2.3502235412597656, "learning_rate": 1.8118279969181212e-05, "loss": 0.7248, "step": 8682 }, { "epoch": 0.22, "grad_norm": 2.4239540100097656, "learning_rate": 1.8117795223567622e-05, "loss": 0.5825, "step": 8683 }, { "epoch": 0.22, "grad_norm": 2.0223164558410645, "learning_rate": 1.8117310422011283e-05, "loss": 0.6301, "step": 8684 }, { "epoch": 0.22, "grad_norm": 1.707425594329834, "learning_rate": 1.8116825564515547e-05, "loss": 0.5544, "step": 8685 }, { "epoch": 0.22, "grad_norm": 1.6475375890731812, "learning_rate": 1.8116340651083744e-05, "loss": 0.5517, "step": 8686 }, { "epoch": 0.22, "grad_norm": 1.519912600517273, "learning_rate": 1.8115855681719225e-05, "loss": 0.6066, "step": 8687 }, { "epoch": 0.22, "grad_norm": 2.3539206981658936, "learning_rate": 1.8115370656425324e-05, "loss": 0.7789, "step": 8688 }, { "epoch": 0.22, "grad_norm": 3.5155038833618164, "learning_rate": 1.811488557520539e-05, "loss": 0.7017, "step": 8689 }, { "epoch": 0.22, "grad_norm": 1.6922508478164673, "learning_rate": 1.811440043806276e-05, "loss": 0.6775, "step": 8690 }, { "epoch": 0.22, "grad_norm": 3.001211166381836, "learning_rate": 1.811391524500078e-05, "loss": 0.5233, "step": 8691 }, { "epoch": 0.22, "grad_norm": 1.1485943794250488, "learning_rate": 1.8113429996022795e-05, "loss": 0.4705, "step": 8692 }, { "epoch": 0.22, "grad_norm": 2.3362059593200684, "learning_rate": 1.8112944691132147e-05, "loss": 0.5058, "step": 8693 }, { "epoch": 0.22, "grad_norm": 1.7151288986206055, "learning_rate": 1.8112459330332185e-05, "loss": 0.6162, "step": 8694 }, { "epoch": 0.22, "grad_norm": 5.400396823883057, "learning_rate": 1.8111973913626245e-05, "loss": 0.7168, "step": 8695 }, { "epoch": 0.22, "grad_norm": 4.392353534698486, "learning_rate": 1.811148844101768e-05, "loss": 0.5046, "step": 8696 }, { "epoch": 0.22, "grad_norm": 5.519993305206299, "learning_rate": 1.8111002912509833e-05, "loss": 0.5813, "step": 8697 }, { "epoch": 0.22, "grad_norm": 3.350741386413574, "learning_rate": 1.8110517328106048e-05, "loss": 0.6753, "step": 8698 }, { "epoch": 0.22, "grad_norm": 2.656252145767212, "learning_rate": 1.8110031687809675e-05, "loss": 0.661, "step": 8699 }, { "epoch": 0.22, "grad_norm": 1.394494652748108, "learning_rate": 1.8109545991624056e-05, "loss": 0.5269, "step": 8700 }, { "epoch": 0.22, "grad_norm": 1.2115925550460815, "learning_rate": 1.8109060239552542e-05, "loss": 0.6663, "step": 8701 }, { "epoch": 0.22, "grad_norm": 1.5367356538772583, "learning_rate": 1.810857443159848e-05, "loss": 0.6818, "step": 8702 }, { "epoch": 0.22, "grad_norm": 3.3073275089263916, "learning_rate": 1.810808856776522e-05, "loss": 0.7636, "step": 8703 }, { "epoch": 0.22, "grad_norm": 5.524086952209473, "learning_rate": 1.8107602648056104e-05, "loss": 0.709, "step": 8704 }, { "epoch": 0.22, "grad_norm": 3.9512288570404053, "learning_rate": 1.8107116672474485e-05, "loss": 0.72, "step": 8705 }, { "epoch": 0.22, "grad_norm": 4.334006309509277, "learning_rate": 1.8106630641023712e-05, "loss": 0.7578, "step": 8706 }, { "epoch": 0.22, "grad_norm": 2.8899083137512207, "learning_rate": 1.8106144553707135e-05, "loss": 0.4327, "step": 8707 }, { "epoch": 0.22, "grad_norm": 1.5346660614013672, "learning_rate": 1.8105658410528098e-05, "loss": 0.5168, "step": 8708 }, { "epoch": 0.22, "grad_norm": 1.8741930723190308, "learning_rate": 1.8105172211489962e-05, "loss": 0.5437, "step": 8709 }, { "epoch": 0.22, "grad_norm": 3.649352550506592, "learning_rate": 1.8104685956596065e-05, "loss": 0.6955, "step": 8710 }, { "epoch": 0.22, "grad_norm": 1.6351392269134521, "learning_rate": 1.8104199645849767e-05, "loss": 0.7271, "step": 8711 }, { "epoch": 0.22, "grad_norm": 1.627163052558899, "learning_rate": 1.8103713279254416e-05, "loss": 0.681, "step": 8712 }, { "epoch": 0.22, "grad_norm": 2.8841075897216797, "learning_rate": 1.8103226856813367e-05, "loss": 0.6989, "step": 8713 }, { "epoch": 0.22, "grad_norm": 4.552351474761963, "learning_rate": 1.8102740378529966e-05, "loss": 0.7204, "step": 8714 }, { "epoch": 0.22, "grad_norm": 1.4862749576568604, "learning_rate": 1.810225384440757e-05, "loss": 0.6628, "step": 8715 }, { "epoch": 0.22, "grad_norm": 2.9188926219940186, "learning_rate": 1.810176725444953e-05, "loss": 0.6239, "step": 8716 }, { "epoch": 0.22, "grad_norm": 1.5993098020553589, "learning_rate": 1.81012806086592e-05, "loss": 0.6109, "step": 8717 }, { "epoch": 0.22, "grad_norm": 1.7511664628982544, "learning_rate": 1.8100793907039938e-05, "loss": 0.5112, "step": 8718 }, { "epoch": 0.22, "grad_norm": 3.093410015106201, "learning_rate": 1.810030714959509e-05, "loss": 0.6182, "step": 8719 }, { "epoch": 0.22, "grad_norm": 5.792956352233887, "learning_rate": 1.8099820336328015e-05, "loss": 0.5995, "step": 8720 }, { "epoch": 0.22, "grad_norm": 2.2234702110290527, "learning_rate": 1.8099333467242068e-05, "loss": 0.592, "step": 8721 }, { "epoch": 0.22, "grad_norm": 1.5942518711090088, "learning_rate": 1.8098846542340603e-05, "loss": 0.7741, "step": 8722 }, { "epoch": 0.22, "grad_norm": 2.370410680770874, "learning_rate": 1.8098359561626975e-05, "loss": 0.5966, "step": 8723 }, { "epoch": 0.22, "grad_norm": 1.2985873222351074, "learning_rate": 1.8097872525104542e-05, "loss": 0.6078, "step": 8724 }, { "epoch": 0.22, "grad_norm": 3.4539952278137207, "learning_rate": 1.8097385432776657e-05, "loss": 0.5666, "step": 8725 }, { "epoch": 0.22, "grad_norm": 1.9809770584106445, "learning_rate": 1.8096898284646683e-05, "loss": 0.6716, "step": 8726 }, { "epoch": 0.22, "grad_norm": 1.1805169582366943, "learning_rate": 1.809641108071797e-05, "loss": 0.4386, "step": 8727 }, { "epoch": 0.22, "grad_norm": 3.6096417903900146, "learning_rate": 1.8095923820993877e-05, "loss": 0.5337, "step": 8728 }, { "epoch": 0.22, "grad_norm": 2.3626303672790527, "learning_rate": 1.8095436505477765e-05, "loss": 0.5989, "step": 8729 }, { "epoch": 0.22, "grad_norm": 2.0373008251190186, "learning_rate": 1.809494913417299e-05, "loss": 0.6777, "step": 8730 }, { "epoch": 0.22, "grad_norm": 2.225841760635376, "learning_rate": 1.8094461707082912e-05, "loss": 0.5659, "step": 8731 }, { "epoch": 0.22, "grad_norm": 4.38824987411499, "learning_rate": 1.8093974224210887e-05, "loss": 0.9094, "step": 8732 }, { "epoch": 0.22, "grad_norm": 1.9692699909210205, "learning_rate": 1.809348668556028e-05, "loss": 0.5818, "step": 8733 }, { "epoch": 0.22, "grad_norm": 1.3578664064407349, "learning_rate": 1.8092999091134448e-05, "loss": 0.5911, "step": 8734 }, { "epoch": 0.22, "grad_norm": 1.7747005224227905, "learning_rate": 1.809251144093675e-05, "loss": 0.5967, "step": 8735 }, { "epoch": 0.22, "grad_norm": 1.270699143409729, "learning_rate": 1.8092023734970544e-05, "loss": 0.7063, "step": 8736 }, { "epoch": 0.22, "grad_norm": 1.754321813583374, "learning_rate": 1.80915359732392e-05, "loss": 0.6951, "step": 8737 }, { "epoch": 0.22, "grad_norm": 3.7867069244384766, "learning_rate": 1.809104815574607e-05, "loss": 0.5746, "step": 8738 }, { "epoch": 0.22, "grad_norm": 6.215571403503418, "learning_rate": 1.8090560282494517e-05, "loss": 0.51, "step": 8739 }, { "epoch": 0.22, "grad_norm": 6.663272857666016, "learning_rate": 1.809007235348791e-05, "loss": 0.6748, "step": 8740 }, { "epoch": 0.22, "grad_norm": 4.357447624206543, "learning_rate": 1.8089584368729607e-05, "loss": 0.5, "step": 8741 }, { "epoch": 0.22, "grad_norm": 2.4091074466705322, "learning_rate": 1.8089096328222968e-05, "loss": 0.7276, "step": 8742 }, { "epoch": 0.22, "grad_norm": 2.374882459640503, "learning_rate": 1.808860823197136e-05, "loss": 0.6256, "step": 8743 }, { "epoch": 0.22, "grad_norm": 3.4445412158966064, "learning_rate": 1.8088120079978147e-05, "loss": 0.6907, "step": 8744 }, { "epoch": 0.22, "grad_norm": 3.269411087036133, "learning_rate": 1.8087631872246693e-05, "loss": 0.7144, "step": 8745 }, { "epoch": 0.22, "grad_norm": 1.8545970916748047, "learning_rate": 1.808714360878036e-05, "loss": 0.4701, "step": 8746 }, { "epoch": 0.22, "grad_norm": 1.238303303718567, "learning_rate": 1.8086655289582514e-05, "loss": 0.7552, "step": 8747 }, { "epoch": 0.22, "grad_norm": 11.941033363342285, "learning_rate": 1.808616691465652e-05, "loss": 0.7098, "step": 8748 }, { "epoch": 0.22, "grad_norm": 3.1012516021728516, "learning_rate": 1.8085678484005744e-05, "loss": 0.6866, "step": 8749 }, { "epoch": 0.22, "grad_norm": 2.0386810302734375, "learning_rate": 1.8085189997633553e-05, "loss": 0.5803, "step": 8750 }, { "epoch": 0.22, "grad_norm": 2.6590137481689453, "learning_rate": 1.808470145554331e-05, "loss": 0.4654, "step": 8751 }, { "epoch": 0.22, "grad_norm": 2.0255706310272217, "learning_rate": 1.8084212857738385e-05, "loss": 0.7626, "step": 8752 }, { "epoch": 0.22, "grad_norm": 3.155400037765503, "learning_rate": 1.8083724204222144e-05, "loss": 0.6837, "step": 8753 }, { "epoch": 0.22, "grad_norm": 1.8962997198104858, "learning_rate": 1.8083235494997952e-05, "loss": 0.4899, "step": 8754 }, { "epoch": 0.22, "grad_norm": 1.5229284763336182, "learning_rate": 1.8082746730069184e-05, "loss": 0.7338, "step": 8755 }, { "epoch": 0.22, "grad_norm": 2.174527168273926, "learning_rate": 1.80822579094392e-05, "loss": 0.5213, "step": 8756 }, { "epoch": 0.22, "grad_norm": 2.460428237915039, "learning_rate": 1.8081769033111374e-05, "loss": 0.7932, "step": 8757 }, { "epoch": 0.22, "grad_norm": 1.974332332611084, "learning_rate": 1.808128010108907e-05, "loss": 0.7534, "step": 8758 }, { "epoch": 0.22, "grad_norm": 2.041858196258545, "learning_rate": 1.8080791113375665e-05, "loss": 0.5695, "step": 8759 }, { "epoch": 0.22, "grad_norm": 4.448564529418945, "learning_rate": 1.8080302069974523e-05, "loss": 0.7306, "step": 8760 }, { "epoch": 0.22, "grad_norm": 2.855091094970703, "learning_rate": 1.8079812970889017e-05, "loss": 0.6698, "step": 8761 }, { "epoch": 0.22, "grad_norm": 2.0415236949920654, "learning_rate": 1.8079323816122514e-05, "loss": 0.6921, "step": 8762 }, { "epoch": 0.22, "grad_norm": 4.19081974029541, "learning_rate": 1.807883460567839e-05, "loss": 0.6215, "step": 8763 }, { "epoch": 0.22, "grad_norm": 2.93719744682312, "learning_rate": 1.807834533956001e-05, "loss": 0.57, "step": 8764 }, { "epoch": 0.22, "grad_norm": 3.133052349090576, "learning_rate": 1.807785601777075e-05, "loss": 0.8121, "step": 8765 }, { "epoch": 0.22, "grad_norm": 6.604334831237793, "learning_rate": 1.807736664031398e-05, "loss": 0.6222, "step": 8766 }, { "epoch": 0.22, "grad_norm": 1.941945195198059, "learning_rate": 1.8076877207193078e-05, "loss": 0.6923, "step": 8767 }, { "epoch": 0.22, "grad_norm": 7.82371187210083, "learning_rate": 1.807638771841141e-05, "loss": 0.607, "step": 8768 }, { "epoch": 0.22, "grad_norm": 1.865185260772705, "learning_rate": 1.8075898173972353e-05, "loss": 0.6419, "step": 8769 }, { "epoch": 0.22, "grad_norm": 2.9841179847717285, "learning_rate": 1.807540857387928e-05, "loss": 0.6498, "step": 8770 }, { "epoch": 0.22, "grad_norm": 2.219961643218994, "learning_rate": 1.8074918918135564e-05, "loss": 0.6442, "step": 8771 }, { "epoch": 0.22, "grad_norm": 3.096851110458374, "learning_rate": 1.8074429206744578e-05, "loss": 0.7298, "step": 8772 }, { "epoch": 0.22, "grad_norm": 2.1271514892578125, "learning_rate": 1.8073939439709704e-05, "loss": 0.7907, "step": 8773 }, { "epoch": 0.22, "grad_norm": 1.3539817333221436, "learning_rate": 1.8073449617034306e-05, "loss": 0.4832, "step": 8774 }, { "epoch": 0.22, "grad_norm": 1.9945870637893677, "learning_rate": 1.807295973872177e-05, "loss": 0.6405, "step": 8775 }, { "epoch": 0.22, "grad_norm": 3.4204299449920654, "learning_rate": 1.807246980477546e-05, "loss": 0.5825, "step": 8776 }, { "epoch": 0.22, "grad_norm": 2.006401777267456, "learning_rate": 1.807197981519877e-05, "loss": 0.5966, "step": 8777 }, { "epoch": 0.22, "grad_norm": 1.5429809093475342, "learning_rate": 1.807148976999506e-05, "loss": 0.5661, "step": 8778 }, { "epoch": 0.23, "grad_norm": 3.175764560699463, "learning_rate": 1.8070999669167715e-05, "loss": 0.6476, "step": 8779 }, { "epoch": 0.23, "grad_norm": 3.681694269180298, "learning_rate": 1.8070509512720112e-05, "loss": 0.641, "step": 8780 }, { "epoch": 0.23, "grad_norm": 1.3788928985595703, "learning_rate": 1.8070019300655625e-05, "loss": 0.5666, "step": 8781 }, { "epoch": 0.23, "grad_norm": 3.098050355911255, "learning_rate": 1.806952903297764e-05, "loss": 0.5337, "step": 8782 }, { "epoch": 0.23, "grad_norm": 3.2506208419799805, "learning_rate": 1.8069038709689526e-05, "loss": 0.5231, "step": 8783 }, { "epoch": 0.23, "grad_norm": 1.9595515727996826, "learning_rate": 1.806854833079467e-05, "loss": 0.756, "step": 8784 }, { "epoch": 0.23, "grad_norm": 4.812494277954102, "learning_rate": 1.8068057896296444e-05, "loss": 0.9372, "step": 8785 }, { "epoch": 0.23, "grad_norm": 1.3596768379211426, "learning_rate": 1.8067567406198234e-05, "loss": 0.4833, "step": 8786 }, { "epoch": 0.23, "grad_norm": 1.9374090433120728, "learning_rate": 1.806707686050342e-05, "loss": 0.7773, "step": 8787 }, { "epoch": 0.23, "grad_norm": 1.7150733470916748, "learning_rate": 1.8066586259215374e-05, "loss": 0.4642, "step": 8788 }, { "epoch": 0.23, "grad_norm": 2.7151336669921875, "learning_rate": 1.806609560233749e-05, "loss": 0.5224, "step": 8789 }, { "epoch": 0.23, "grad_norm": 1.5798794031143188, "learning_rate": 1.806560488987314e-05, "loss": 0.6516, "step": 8790 }, { "epoch": 0.23, "grad_norm": 1.980668067932129, "learning_rate": 1.8065114121825712e-05, "loss": 0.6348, "step": 8791 }, { "epoch": 0.23, "grad_norm": 3.5780889987945557, "learning_rate": 1.806462329819858e-05, "loss": 0.7033, "step": 8792 }, { "epoch": 0.23, "grad_norm": 4.411059856414795, "learning_rate": 1.8064132418995133e-05, "loss": 0.7206, "step": 8793 }, { "epoch": 0.23, "grad_norm": 2.1752984523773193, "learning_rate": 1.806364148421875e-05, "loss": 0.8595, "step": 8794 }, { "epoch": 0.23, "grad_norm": 1.3192917108535767, "learning_rate": 1.806315049387282e-05, "loss": 0.4997, "step": 8795 }, { "epoch": 0.23, "grad_norm": 5.0263776779174805, "learning_rate": 1.8062659447960716e-05, "loss": 0.6753, "step": 8796 }, { "epoch": 0.23, "grad_norm": 2.458444356918335, "learning_rate": 1.8062168346485834e-05, "loss": 0.6834, "step": 8797 }, { "epoch": 0.23, "grad_norm": 5.452530860900879, "learning_rate": 1.806167718945155e-05, "loss": 0.7192, "step": 8798 }, { "epoch": 0.23, "grad_norm": 1.4840956926345825, "learning_rate": 1.8061185976861253e-05, "loss": 0.6678, "step": 8799 }, { "epoch": 0.23, "grad_norm": 6.218702793121338, "learning_rate": 1.8060694708718327e-05, "loss": 0.6241, "step": 8800 }, { "epoch": 0.23, "grad_norm": 3.923994302749634, "learning_rate": 1.8060203385026157e-05, "loss": 0.7565, "step": 8801 }, { "epoch": 0.23, "grad_norm": 3.5650992393493652, "learning_rate": 1.805971200578813e-05, "loss": 0.7914, "step": 8802 }, { "epoch": 0.23, "grad_norm": 1.796839952468872, "learning_rate": 1.8059220571007633e-05, "loss": 0.6036, "step": 8803 }, { "epoch": 0.23, "grad_norm": 1.8321415185928345, "learning_rate": 1.8058729080688047e-05, "loss": 0.6178, "step": 8804 }, { "epoch": 0.23, "grad_norm": 3.32720947265625, "learning_rate": 1.8058237534832765e-05, "loss": 0.8048, "step": 8805 }, { "epoch": 0.23, "grad_norm": 2.5838558673858643, "learning_rate": 1.8057745933445173e-05, "loss": 0.6873, "step": 8806 }, { "epoch": 0.23, "grad_norm": 2.437432289123535, "learning_rate": 1.8057254276528656e-05, "loss": 0.7803, "step": 8807 }, { "epoch": 0.23, "grad_norm": 3.1138858795166016, "learning_rate": 1.8056762564086608e-05, "loss": 0.6036, "step": 8808 }, { "epoch": 0.23, "grad_norm": 1.7239938974380493, "learning_rate": 1.805627079612241e-05, "loss": 0.6271, "step": 8809 }, { "epoch": 0.23, "grad_norm": 2.603965997695923, "learning_rate": 1.805577897263946e-05, "loss": 0.6899, "step": 8810 }, { "epoch": 0.23, "grad_norm": 1.3203542232513428, "learning_rate": 1.805528709364114e-05, "loss": 0.6098, "step": 8811 }, { "epoch": 0.23, "grad_norm": 1.9327462911605835, "learning_rate": 1.805479515913084e-05, "loss": 0.6709, "step": 8812 }, { "epoch": 0.23, "grad_norm": 3.68636155128479, "learning_rate": 1.8054303169111957e-05, "loss": 0.7306, "step": 8813 }, { "epoch": 0.23, "grad_norm": 1.3596256971359253, "learning_rate": 1.8053811123587874e-05, "loss": 0.6767, "step": 8814 }, { "epoch": 0.23, "grad_norm": 1.9609546661376953, "learning_rate": 1.8053319022561987e-05, "loss": 0.7889, "step": 8815 }, { "epoch": 0.23, "grad_norm": 4.790369510650635, "learning_rate": 1.8052826866037682e-05, "loss": 0.6919, "step": 8816 }, { "epoch": 0.23, "grad_norm": 1.7236920595169067, "learning_rate": 1.8052334654018353e-05, "loss": 0.5657, "step": 8817 }, { "epoch": 0.23, "grad_norm": 1.8025381565093994, "learning_rate": 1.8051842386507396e-05, "loss": 0.6938, "step": 8818 }, { "epoch": 0.23, "grad_norm": 1.9210076332092285, "learning_rate": 1.8051350063508197e-05, "loss": 0.6028, "step": 8819 }, { "epoch": 0.23, "grad_norm": 3.1071345806121826, "learning_rate": 1.8050857685024153e-05, "loss": 0.6728, "step": 8820 }, { "epoch": 0.23, "grad_norm": 2.109236478805542, "learning_rate": 1.8050365251058655e-05, "loss": 0.6624, "step": 8821 }, { "epoch": 0.23, "grad_norm": 5.000871181488037, "learning_rate": 1.80498727616151e-05, "loss": 0.7266, "step": 8822 }, { "epoch": 0.23, "grad_norm": 3.2605369091033936, "learning_rate": 1.8049380216696875e-05, "loss": 0.8013, "step": 8823 }, { "epoch": 0.23, "grad_norm": 3.774970054626465, "learning_rate": 1.8048887616307383e-05, "loss": 0.6923, "step": 8824 }, { "epoch": 0.23, "grad_norm": 3.6429171562194824, "learning_rate": 1.804839496045001e-05, "loss": 0.8115, "step": 8825 }, { "epoch": 0.23, "grad_norm": 1.6056119203567505, "learning_rate": 1.8047902249128157e-05, "loss": 0.5685, "step": 8826 }, { "epoch": 0.23, "grad_norm": 1.5540963411331177, "learning_rate": 1.804740948234522e-05, "loss": 0.5743, "step": 8827 }, { "epoch": 0.23, "grad_norm": 1.1449111700057983, "learning_rate": 1.8046916660104592e-05, "loss": 0.5607, "step": 8828 }, { "epoch": 0.23, "grad_norm": 4.419830322265625, "learning_rate": 1.804642378240967e-05, "loss": 0.6816, "step": 8829 }, { "epoch": 0.23, "grad_norm": 3.7400598526000977, "learning_rate": 1.8045930849263847e-05, "loss": 0.6958, "step": 8830 }, { "epoch": 0.23, "grad_norm": 3.817214012145996, "learning_rate": 1.8045437860670526e-05, "loss": 0.5244, "step": 8831 }, { "epoch": 0.23, "grad_norm": 1.9373841285705566, "learning_rate": 1.8044944816633105e-05, "loss": 0.4977, "step": 8832 }, { "epoch": 0.23, "grad_norm": 1.4926574230194092, "learning_rate": 1.8044451717154976e-05, "loss": 0.7182, "step": 8833 }, { "epoch": 0.23, "grad_norm": 1.2123944759368896, "learning_rate": 1.8043958562239542e-05, "loss": 0.5519, "step": 8834 }, { "epoch": 0.23, "grad_norm": 7.464956760406494, "learning_rate": 1.8043465351890194e-05, "loss": 0.4725, "step": 8835 }, { "epoch": 0.23, "grad_norm": 5.139125823974609, "learning_rate": 1.804297208611034e-05, "loss": 0.7198, "step": 8836 }, { "epoch": 0.23, "grad_norm": 3.8158743381500244, "learning_rate": 1.8042478764903374e-05, "loss": 1.0131, "step": 8837 }, { "epoch": 0.23, "grad_norm": 3.9805281162261963, "learning_rate": 1.80419853882727e-05, "loss": 0.5304, "step": 8838 }, { "epoch": 0.23, "grad_norm": 1.6516540050506592, "learning_rate": 1.8041491956221713e-05, "loss": 0.6109, "step": 8839 }, { "epoch": 0.23, "grad_norm": 2.151865005493164, "learning_rate": 1.8040998468753813e-05, "loss": 0.6875, "step": 8840 }, { "epoch": 0.23, "grad_norm": 1.491803526878357, "learning_rate": 1.8040504925872408e-05, "loss": 0.568, "step": 8841 }, { "epoch": 0.23, "grad_norm": 2.764995574951172, "learning_rate": 1.8040011327580892e-05, "loss": 0.6249, "step": 8842 }, { "epoch": 0.23, "grad_norm": 5.2447123527526855, "learning_rate": 1.803951767388267e-05, "loss": 0.6346, "step": 8843 }, { "epoch": 0.23, "grad_norm": 4.667759895324707, "learning_rate": 1.8039023964781143e-05, "loss": 0.7782, "step": 8844 }, { "epoch": 0.23, "grad_norm": 4.631060600280762, "learning_rate": 1.8038530200279713e-05, "loss": 0.5575, "step": 8845 }, { "epoch": 0.23, "grad_norm": 2.4704396724700928, "learning_rate": 1.8038036380381782e-05, "loss": 0.7192, "step": 8846 }, { "epoch": 0.23, "grad_norm": 1.7254167795181274, "learning_rate": 1.803754250509076e-05, "loss": 0.607, "step": 8847 }, { "epoch": 0.23, "grad_norm": 3.2018823623657227, "learning_rate": 1.8037048574410038e-05, "loss": 0.6212, "step": 8848 }, { "epoch": 0.23, "grad_norm": 1.2684472799301147, "learning_rate": 1.803655458834303e-05, "loss": 0.6033, "step": 8849 }, { "epoch": 0.23, "grad_norm": 1.2469847202301025, "learning_rate": 1.8036060546893133e-05, "loss": 0.655, "step": 8850 }, { "epoch": 0.23, "grad_norm": 2.6073625087738037, "learning_rate": 1.803556645006376e-05, "loss": 0.5757, "step": 8851 }, { "epoch": 0.23, "grad_norm": 3.86525559425354, "learning_rate": 1.8035072297858312e-05, "loss": 0.7615, "step": 8852 }, { "epoch": 0.23, "grad_norm": 2.9732391834259033, "learning_rate": 1.8034578090280192e-05, "loss": 0.8352, "step": 8853 }, { "epoch": 0.23, "grad_norm": 1.8273497819900513, "learning_rate": 1.8034083827332805e-05, "loss": 0.6174, "step": 8854 }, { "epoch": 0.23, "grad_norm": 3.421457290649414, "learning_rate": 1.8033589509019563e-05, "loss": 0.7337, "step": 8855 }, { "epoch": 0.23, "grad_norm": 2.1161997318267822, "learning_rate": 1.803309513534387e-05, "loss": 0.4364, "step": 8856 }, { "epoch": 0.23, "grad_norm": 1.5152103900909424, "learning_rate": 1.803260070630913e-05, "loss": 0.6828, "step": 8857 }, { "epoch": 0.23, "grad_norm": 3.760737180709839, "learning_rate": 1.803210622191875e-05, "loss": 0.6121, "step": 8858 }, { "epoch": 0.23, "grad_norm": 7.475980281829834, "learning_rate": 1.8031611682176143e-05, "loss": 0.5121, "step": 8859 }, { "epoch": 0.23, "grad_norm": 2.770859479904175, "learning_rate": 1.8031117087084715e-05, "loss": 0.7192, "step": 8860 }, { "epoch": 0.23, "grad_norm": 1.6013083457946777, "learning_rate": 1.8030622436647876e-05, "loss": 0.4958, "step": 8861 }, { "epoch": 0.23, "grad_norm": 1.3384648561477661, "learning_rate": 1.803012773086903e-05, "loss": 0.6528, "step": 8862 }, { "epoch": 0.23, "grad_norm": 3.2021021842956543, "learning_rate": 1.8029632969751587e-05, "loss": 0.6499, "step": 8863 }, { "epoch": 0.23, "grad_norm": 1.6120306253433228, "learning_rate": 1.802913815329896e-05, "loss": 0.5816, "step": 8864 }, { "epoch": 0.23, "grad_norm": 1.67583429813385, "learning_rate": 1.8028643281514554e-05, "loss": 0.5131, "step": 8865 }, { "epoch": 0.23, "grad_norm": 2.9119699001312256, "learning_rate": 1.802814835440179e-05, "loss": 0.5912, "step": 8866 }, { "epoch": 0.23, "grad_norm": 1.8001042604446411, "learning_rate": 1.8027653371964065e-05, "loss": 0.7807, "step": 8867 }, { "epoch": 0.23, "grad_norm": 2.3452811241149902, "learning_rate": 1.80271583342048e-05, "loss": 0.5782, "step": 8868 }, { "epoch": 0.23, "grad_norm": 2.645545482635498, "learning_rate": 1.8026663241127397e-05, "loss": 0.7211, "step": 8869 }, { "epoch": 0.23, "grad_norm": 1.6561588048934937, "learning_rate": 1.8026168092735277e-05, "loss": 0.7619, "step": 8870 }, { "epoch": 0.23, "grad_norm": 2.225212812423706, "learning_rate": 1.8025672889031852e-05, "loss": 0.4791, "step": 8871 }, { "epoch": 0.23, "grad_norm": 1.8777129650115967, "learning_rate": 1.8025177630020533e-05, "loss": 0.6477, "step": 8872 }, { "epoch": 0.23, "grad_norm": 3.8290464878082275, "learning_rate": 1.8024682315704725e-05, "loss": 0.7711, "step": 8873 }, { "epoch": 0.23, "grad_norm": 1.933906078338623, "learning_rate": 1.802418694608785e-05, "loss": 0.5766, "step": 8874 }, { "epoch": 0.23, "grad_norm": 4.209635257720947, "learning_rate": 1.802369152117332e-05, "loss": 0.6441, "step": 8875 }, { "epoch": 0.23, "grad_norm": 1.898047685623169, "learning_rate": 1.8023196040964556e-05, "loss": 0.615, "step": 8876 }, { "epoch": 0.23, "grad_norm": 2.8082945346832275, "learning_rate": 1.802270050546496e-05, "loss": 0.6142, "step": 8877 }, { "epoch": 0.23, "grad_norm": 3.4828224182128906, "learning_rate": 1.8022204914677953e-05, "loss": 0.5894, "step": 8878 }, { "epoch": 0.23, "grad_norm": 3.406036138534546, "learning_rate": 1.8021709268606946e-05, "loss": 0.8379, "step": 8879 }, { "epoch": 0.23, "grad_norm": 3.255469560623169, "learning_rate": 1.802121356725536e-05, "loss": 0.6721, "step": 8880 }, { "epoch": 0.23, "grad_norm": 2.5104105472564697, "learning_rate": 1.8020717810626612e-05, "loss": 0.6287, "step": 8881 }, { "epoch": 0.23, "grad_norm": 2.405677556991577, "learning_rate": 1.8020221998724115e-05, "loss": 0.442, "step": 8882 }, { "epoch": 0.23, "grad_norm": 2.4775402545928955, "learning_rate": 1.801972613155129e-05, "loss": 0.6464, "step": 8883 }, { "epoch": 0.23, "grad_norm": 2.3564846515655518, "learning_rate": 1.8019230209111547e-05, "loss": 0.3938, "step": 8884 }, { "epoch": 0.23, "grad_norm": 2.942178726196289, "learning_rate": 1.8018734231408307e-05, "loss": 0.7022, "step": 8885 }, { "epoch": 0.23, "grad_norm": 1.9770028591156006, "learning_rate": 1.801823819844499e-05, "loss": 0.6809, "step": 8886 }, { "epoch": 0.23, "grad_norm": 3.092752695083618, "learning_rate": 1.8017742110225014e-05, "loss": 0.7332, "step": 8887 }, { "epoch": 0.23, "grad_norm": 4.29902982711792, "learning_rate": 1.8017245966751792e-05, "loss": 0.5925, "step": 8888 }, { "epoch": 0.23, "grad_norm": 2.2286486625671387, "learning_rate": 1.8016749768028755e-05, "loss": 0.6215, "step": 8889 }, { "epoch": 0.23, "grad_norm": 4.092479228973389, "learning_rate": 1.801625351405931e-05, "loss": 0.789, "step": 8890 }, { "epoch": 0.23, "grad_norm": 1.6191773414611816, "learning_rate": 1.8015757204846882e-05, "loss": 0.6152, "step": 8891 }, { "epoch": 0.23, "grad_norm": 1.4559162855148315, "learning_rate": 1.8015260840394893e-05, "loss": 0.5593, "step": 8892 }, { "epoch": 0.23, "grad_norm": 3.303001642227173, "learning_rate": 1.8014764420706764e-05, "loss": 0.5927, "step": 8893 }, { "epoch": 0.23, "grad_norm": 2.1771602630615234, "learning_rate": 1.801426794578591e-05, "loss": 0.6996, "step": 8894 }, { "epoch": 0.23, "grad_norm": 2.0018374919891357, "learning_rate": 1.8013771415635758e-05, "loss": 0.6874, "step": 8895 }, { "epoch": 0.23, "grad_norm": 3.3573415279388428, "learning_rate": 1.8013274830259728e-05, "loss": 0.7008, "step": 8896 }, { "epoch": 0.23, "grad_norm": 4.151729583740234, "learning_rate": 1.8012778189661242e-05, "loss": 0.645, "step": 8897 }, { "epoch": 0.23, "grad_norm": 3.417710065841675, "learning_rate": 1.8012281493843727e-05, "loss": 0.5358, "step": 8898 }, { "epoch": 0.23, "grad_norm": 1.5363129377365112, "learning_rate": 1.8011784742810597e-05, "loss": 0.5771, "step": 8899 }, { "epoch": 0.23, "grad_norm": 1.6917022466659546, "learning_rate": 1.801128793656528e-05, "loss": 0.5434, "step": 8900 }, { "epoch": 0.23, "grad_norm": 2.87111759185791, "learning_rate": 1.80107910751112e-05, "loss": 0.7105, "step": 8901 }, { "epoch": 0.23, "grad_norm": 2.676971435546875, "learning_rate": 1.8010294158451783e-05, "loss": 0.4651, "step": 8902 }, { "epoch": 0.23, "grad_norm": 6.027674198150635, "learning_rate": 1.800979718659045e-05, "loss": 0.6355, "step": 8903 }, { "epoch": 0.23, "grad_norm": 1.3067210912704468, "learning_rate": 1.8009300159530626e-05, "loss": 0.6902, "step": 8904 }, { "epoch": 0.23, "grad_norm": 2.2568607330322266, "learning_rate": 1.8008803077275742e-05, "loss": 0.6013, "step": 8905 }, { "epoch": 0.23, "grad_norm": 1.979512333869934, "learning_rate": 1.800830593982921e-05, "loss": 0.5955, "step": 8906 }, { "epoch": 0.23, "grad_norm": 1.9875580072402954, "learning_rate": 1.8007808747194474e-05, "loss": 0.6348, "step": 8907 }, { "epoch": 0.23, "grad_norm": 1.5940858125686646, "learning_rate": 1.8007311499374945e-05, "loss": 0.7641, "step": 8908 }, { "epoch": 0.23, "grad_norm": 1.7294013500213623, "learning_rate": 1.800681419637406e-05, "loss": 0.6706, "step": 8909 }, { "epoch": 0.23, "grad_norm": 1.7459492683410645, "learning_rate": 1.8006316838195237e-05, "loss": 0.6735, "step": 8910 }, { "epoch": 0.23, "grad_norm": 1.424030065536499, "learning_rate": 1.800581942484191e-05, "loss": 0.5519, "step": 8911 }, { "epoch": 0.23, "grad_norm": 2.7071518898010254, "learning_rate": 1.8005321956317507e-05, "loss": 0.6814, "step": 8912 }, { "epoch": 0.23, "grad_norm": 1.4631869792938232, "learning_rate": 1.8004824432625456e-05, "loss": 0.6202, "step": 8913 }, { "epoch": 0.23, "grad_norm": 2.161748170852661, "learning_rate": 1.800432685376918e-05, "loss": 0.6544, "step": 8914 }, { "epoch": 0.23, "grad_norm": 3.3497118949890137, "learning_rate": 1.8003829219752115e-05, "loss": 0.759, "step": 8915 }, { "epoch": 0.23, "grad_norm": 1.1730633974075317, "learning_rate": 1.8003331530577687e-05, "loss": 0.3607, "step": 8916 }, { "epoch": 0.23, "grad_norm": 5.641363143920898, "learning_rate": 1.8002833786249324e-05, "loss": 0.8319, "step": 8917 }, { "epoch": 0.23, "grad_norm": 1.7481645345687866, "learning_rate": 1.8002335986770464e-05, "loss": 0.6104, "step": 8918 }, { "epoch": 0.23, "grad_norm": 2.514664888381958, "learning_rate": 1.8001838132144524e-05, "loss": 0.5016, "step": 8919 }, { "epoch": 0.23, "grad_norm": 2.8291432857513428, "learning_rate": 1.800134022237495e-05, "loss": 0.727, "step": 8920 }, { "epoch": 0.23, "grad_norm": 3.6655871868133545, "learning_rate": 1.800084225746516e-05, "loss": 0.6302, "step": 8921 }, { "epoch": 0.23, "grad_norm": 4.066927909851074, "learning_rate": 1.8000344237418597e-05, "loss": 0.881, "step": 8922 }, { "epoch": 0.23, "grad_norm": 3.906256675720215, "learning_rate": 1.7999846162238684e-05, "loss": 0.7391, "step": 8923 }, { "epoch": 0.23, "grad_norm": 1.7001034021377563, "learning_rate": 1.799934803192886e-05, "loss": 0.443, "step": 8924 }, { "epoch": 0.23, "grad_norm": 1.9166145324707031, "learning_rate": 1.7998849846492556e-05, "loss": 0.5457, "step": 8925 }, { "epoch": 0.23, "grad_norm": 3.382948160171509, "learning_rate": 1.79983516059332e-05, "loss": 0.8118, "step": 8926 }, { "epoch": 0.23, "grad_norm": 2.491576671600342, "learning_rate": 1.799785331025423e-05, "loss": 0.7387, "step": 8927 }, { "epoch": 0.23, "grad_norm": 1.6547943353652954, "learning_rate": 1.7997354959459087e-05, "loss": 0.6473, "step": 8928 }, { "epoch": 0.23, "grad_norm": 4.5517048835754395, "learning_rate": 1.7996856553551193e-05, "loss": 0.5532, "step": 8929 }, { "epoch": 0.23, "grad_norm": 4.3086724281311035, "learning_rate": 1.7996358092533987e-05, "loss": 0.6407, "step": 8930 }, { "epoch": 0.23, "grad_norm": 3.7912425994873047, "learning_rate": 1.7995859576410904e-05, "loss": 0.5273, "step": 8931 }, { "epoch": 0.23, "grad_norm": 1.3012593984603882, "learning_rate": 1.7995361005185384e-05, "loss": 0.4578, "step": 8932 }, { "epoch": 0.23, "grad_norm": 2.2998456954956055, "learning_rate": 1.7994862378860855e-05, "loss": 0.7969, "step": 8933 }, { "epoch": 0.23, "grad_norm": 2.164616346359253, "learning_rate": 1.799436369744076e-05, "loss": 0.748, "step": 8934 }, { "epoch": 0.23, "grad_norm": 3.306209087371826, "learning_rate": 1.7993864960928533e-05, "loss": 0.8003, "step": 8935 }, { "epoch": 0.23, "grad_norm": 1.787575364112854, "learning_rate": 1.799336616932761e-05, "loss": 0.446, "step": 8936 }, { "epoch": 0.23, "grad_norm": 2.434898853302002, "learning_rate": 1.7992867322641428e-05, "loss": 0.6107, "step": 8937 }, { "epoch": 0.23, "grad_norm": 2.35217547416687, "learning_rate": 1.7992368420873426e-05, "loss": 0.4674, "step": 8938 }, { "epoch": 0.23, "grad_norm": 1.8601328134536743, "learning_rate": 1.7991869464027046e-05, "loss": 0.5762, "step": 8939 }, { "epoch": 0.23, "grad_norm": 3.0946290493011475, "learning_rate": 1.799137045210572e-05, "loss": 0.5326, "step": 8940 }, { "epoch": 0.23, "grad_norm": 1.8523163795471191, "learning_rate": 1.799087138511289e-05, "loss": 0.7059, "step": 8941 }, { "epoch": 0.23, "grad_norm": 1.1793785095214844, "learning_rate": 1.7990372263051994e-05, "loss": 0.5496, "step": 8942 }, { "epoch": 0.23, "grad_norm": 2.8737571239471436, "learning_rate": 1.7989873085926473e-05, "loss": 0.5864, "step": 8943 }, { "epoch": 0.23, "grad_norm": 2.0279366970062256, "learning_rate": 1.7989373853739765e-05, "loss": 0.6467, "step": 8944 }, { "epoch": 0.23, "grad_norm": 1.4100148677825928, "learning_rate": 1.7988874566495312e-05, "loss": 0.5969, "step": 8945 }, { "epoch": 0.23, "grad_norm": 1.560861349105835, "learning_rate": 1.798837522419656e-05, "loss": 0.6563, "step": 8946 }, { "epoch": 0.23, "grad_norm": 2.0336904525756836, "learning_rate": 1.798787582684694e-05, "loss": 0.5783, "step": 8947 }, { "epoch": 0.23, "grad_norm": 1.1543875932693481, "learning_rate": 1.79873763744499e-05, "loss": 0.4949, "step": 8948 }, { "epoch": 0.23, "grad_norm": 1.887725591659546, "learning_rate": 1.7986876867008874e-05, "loss": 0.6086, "step": 8949 }, { "epoch": 0.23, "grad_norm": 1.730411171913147, "learning_rate": 1.7986377304527316e-05, "loss": 0.6567, "step": 8950 }, { "epoch": 0.23, "grad_norm": 9.450427055358887, "learning_rate": 1.7985877687008664e-05, "loss": 0.6851, "step": 8951 }, { "epoch": 0.23, "grad_norm": 1.3477903604507446, "learning_rate": 1.7985378014456357e-05, "loss": 0.561, "step": 8952 }, { "epoch": 0.23, "grad_norm": 3.726262092590332, "learning_rate": 1.7984878286873845e-05, "loss": 0.757, "step": 8953 }, { "epoch": 0.23, "grad_norm": 3.081353187561035, "learning_rate": 1.7984378504264568e-05, "loss": 0.6691, "step": 8954 }, { "epoch": 0.23, "grad_norm": 1.8500113487243652, "learning_rate": 1.7983878666631967e-05, "loss": 0.5999, "step": 8955 }, { "epoch": 0.23, "grad_norm": 3.215548038482666, "learning_rate": 1.7983378773979494e-05, "loss": 0.63, "step": 8956 }, { "epoch": 0.23, "grad_norm": 7.312432289123535, "learning_rate": 1.7982878826310588e-05, "loss": 0.7888, "step": 8957 }, { "epoch": 0.23, "grad_norm": 1.318045735359192, "learning_rate": 1.7982378823628697e-05, "loss": 0.5769, "step": 8958 }, { "epoch": 0.23, "grad_norm": 1.035984754562378, "learning_rate": 1.798187876593727e-05, "loss": 0.4968, "step": 8959 }, { "epoch": 0.23, "grad_norm": 3.2880420684814453, "learning_rate": 1.7981378653239744e-05, "loss": 0.5541, "step": 8960 }, { "epoch": 0.23, "grad_norm": 1.8261781930923462, "learning_rate": 1.7980878485539573e-05, "loss": 0.5675, "step": 8961 }, { "epoch": 0.23, "grad_norm": 2.0643882751464844, "learning_rate": 1.79803782628402e-05, "loss": 0.6786, "step": 8962 }, { "epoch": 0.23, "grad_norm": 3.635141611099243, "learning_rate": 1.7979877985145075e-05, "loss": 0.526, "step": 8963 }, { "epoch": 0.23, "grad_norm": 2.5374014377593994, "learning_rate": 1.7979377652457644e-05, "loss": 0.7016, "step": 8964 }, { "epoch": 0.23, "grad_norm": 6.321733474731445, "learning_rate": 1.7978877264781356e-05, "loss": 0.5328, "step": 8965 }, { "epoch": 0.23, "grad_norm": 1.1614845991134644, "learning_rate": 1.797837682211966e-05, "loss": 0.5824, "step": 8966 }, { "epoch": 0.23, "grad_norm": 3.2577145099639893, "learning_rate": 1.7977876324476e-05, "loss": 0.7046, "step": 8967 }, { "epoch": 0.23, "grad_norm": 1.188374400138855, "learning_rate": 1.797737577185383e-05, "loss": 0.6359, "step": 8968 }, { "epoch": 0.23, "grad_norm": 5.93253755569458, "learning_rate": 1.79768751642566e-05, "loss": 0.7133, "step": 8969 }, { "epoch": 0.23, "grad_norm": 3.378464698791504, "learning_rate": 1.7976374501687757e-05, "loss": 0.6785, "step": 8970 }, { "epoch": 0.23, "grad_norm": 1.6999694108963013, "learning_rate": 1.797587378415075e-05, "loss": 0.7266, "step": 8971 }, { "epoch": 0.23, "grad_norm": 5.311555862426758, "learning_rate": 1.797537301164903e-05, "loss": 0.5779, "step": 8972 }, { "epoch": 0.23, "grad_norm": 5.177865028381348, "learning_rate": 1.7974872184186054e-05, "loss": 0.6514, "step": 8973 }, { "epoch": 0.23, "grad_norm": 2.5822863578796387, "learning_rate": 1.7974371301765268e-05, "loss": 0.6786, "step": 8974 }, { "epoch": 0.23, "grad_norm": 2.8645217418670654, "learning_rate": 1.7973870364390127e-05, "loss": 0.709, "step": 8975 }, { "epoch": 0.23, "grad_norm": 1.5252677202224731, "learning_rate": 1.797336937206408e-05, "loss": 0.5681, "step": 8976 }, { "epoch": 0.23, "grad_norm": 7.168221950531006, "learning_rate": 1.797286832479058e-05, "loss": 0.7202, "step": 8977 }, { "epoch": 0.23, "grad_norm": 2.3159289360046387, "learning_rate": 1.797236722257308e-05, "loss": 0.7072, "step": 8978 }, { "epoch": 0.23, "grad_norm": 1.544420838356018, "learning_rate": 1.7971866065415033e-05, "loss": 0.5307, "step": 8979 }, { "epoch": 0.23, "grad_norm": 3.25044846534729, "learning_rate": 1.7971364853319894e-05, "loss": 0.7767, "step": 8980 }, { "epoch": 0.23, "grad_norm": 3.1434593200683594, "learning_rate": 1.797086358629112e-05, "loss": 0.5969, "step": 8981 }, { "epoch": 0.23, "grad_norm": 1.3606301546096802, "learning_rate": 1.7970362264332158e-05, "loss": 0.6611, "step": 8982 }, { "epoch": 0.23, "grad_norm": 1.7763971090316772, "learning_rate": 1.7969860887446467e-05, "loss": 0.5983, "step": 8983 }, { "epoch": 0.23, "grad_norm": 3.5505576133728027, "learning_rate": 1.7969359455637504e-05, "loss": 0.8604, "step": 8984 }, { "epoch": 0.23, "grad_norm": 1.7995986938476562, "learning_rate": 1.7968857968908722e-05, "loss": 0.684, "step": 8985 }, { "epoch": 0.23, "grad_norm": 2.695006847381592, "learning_rate": 1.7968356427263577e-05, "loss": 0.6731, "step": 8986 }, { "epoch": 0.23, "grad_norm": 1.688001036643982, "learning_rate": 1.7967854830705525e-05, "loss": 0.6173, "step": 8987 }, { "epoch": 0.23, "grad_norm": 4.023952484130859, "learning_rate": 1.7967353179238025e-05, "loss": 0.7471, "step": 8988 }, { "epoch": 0.23, "grad_norm": 2.0494189262390137, "learning_rate": 1.796685147286453e-05, "loss": 0.6179, "step": 8989 }, { "epoch": 0.23, "grad_norm": 1.8136966228485107, "learning_rate": 1.7966349711588505e-05, "loss": 0.5017, "step": 8990 }, { "epoch": 0.23, "grad_norm": 2.438906669616699, "learning_rate": 1.7965847895413398e-05, "loss": 0.5891, "step": 8991 }, { "epoch": 0.23, "grad_norm": 4.1189985275268555, "learning_rate": 1.7965346024342675e-05, "loss": 0.604, "step": 8992 }, { "epoch": 0.23, "grad_norm": 1.9597399234771729, "learning_rate": 1.796484409837979e-05, "loss": 0.6585, "step": 8993 }, { "epoch": 0.23, "grad_norm": 2.449875831604004, "learning_rate": 1.7964342117528198e-05, "loss": 0.7127, "step": 8994 }, { "epoch": 0.23, "grad_norm": 3.104732036590576, "learning_rate": 1.796384008179137e-05, "loss": 0.733, "step": 8995 }, { "epoch": 0.23, "grad_norm": 3.4807441234588623, "learning_rate": 1.7963337991172756e-05, "loss": 0.7533, "step": 8996 }, { "epoch": 0.23, "grad_norm": 1.695345401763916, "learning_rate": 1.7962835845675827e-05, "loss": 0.5754, "step": 8997 }, { "epoch": 0.23, "grad_norm": 2.0627031326293945, "learning_rate": 1.7962333645304027e-05, "loss": 0.661, "step": 8998 }, { "epoch": 0.23, "grad_norm": 2.287517547607422, "learning_rate": 1.7961831390060833e-05, "loss": 0.6877, "step": 8999 }, { "epoch": 0.23, "grad_norm": 2.0179593563079834, "learning_rate": 1.7961329079949698e-05, "loss": 0.5276, "step": 9000 }, { "epoch": 0.23, "grad_norm": 1.186020851135254, "learning_rate": 1.7960826714974084e-05, "loss": 0.5055, "step": 9001 }, { "epoch": 0.23, "grad_norm": 1.377249836921692, "learning_rate": 1.796032429513745e-05, "loss": 0.6052, "step": 9002 }, { "epoch": 0.23, "grad_norm": 2.936405897140503, "learning_rate": 1.7959821820443263e-05, "loss": 0.6265, "step": 9003 }, { "epoch": 0.23, "grad_norm": 3.300489664077759, "learning_rate": 1.7959319290894988e-05, "loss": 0.8471, "step": 9004 }, { "epoch": 0.23, "grad_norm": 1.7272388935089111, "learning_rate": 1.7958816706496085e-05, "loss": 0.6043, "step": 9005 }, { "epoch": 0.23, "grad_norm": 1.8199342489242554, "learning_rate": 1.7958314067250017e-05, "loss": 0.5651, "step": 9006 }, { "epoch": 0.23, "grad_norm": 2.6186892986297607, "learning_rate": 1.7957811373160246e-05, "loss": 0.6303, "step": 9007 }, { "epoch": 0.23, "grad_norm": 1.7273229360580444, "learning_rate": 1.795730862423024e-05, "loss": 0.6047, "step": 9008 }, { "epoch": 0.23, "grad_norm": 1.5460091829299927, "learning_rate": 1.7956805820463463e-05, "loss": 0.621, "step": 9009 }, { "epoch": 0.23, "grad_norm": 3.648761034011841, "learning_rate": 1.795630296186338e-05, "loss": 0.7237, "step": 9010 }, { "epoch": 0.23, "grad_norm": 4.226509094238281, "learning_rate": 1.795580004843345e-05, "loss": 0.6658, "step": 9011 }, { "epoch": 0.23, "grad_norm": 1.294284701347351, "learning_rate": 1.795529708017715e-05, "loss": 0.5241, "step": 9012 }, { "epoch": 0.23, "grad_norm": 3.3726532459259033, "learning_rate": 1.7954794057097942e-05, "loss": 0.5618, "step": 9013 }, { "epoch": 0.23, "grad_norm": 2.3007724285125732, "learning_rate": 1.7954290979199287e-05, "loss": 0.6153, "step": 9014 }, { "epoch": 0.23, "grad_norm": 2.9386179447174072, "learning_rate": 1.7953787846484658e-05, "loss": 0.7707, "step": 9015 }, { "epoch": 0.23, "grad_norm": 1.0692301988601685, "learning_rate": 1.795328465895752e-05, "loss": 0.5004, "step": 9016 }, { "epoch": 0.23, "grad_norm": 1.2552664279937744, "learning_rate": 1.7952781416621337e-05, "loss": 0.5787, "step": 9017 }, { "epoch": 0.23, "grad_norm": 2.313915729522705, "learning_rate": 1.7952278119479585e-05, "loss": 0.9114, "step": 9018 }, { "epoch": 0.23, "grad_norm": 4.404982566833496, "learning_rate": 1.795177476753573e-05, "loss": 0.6173, "step": 9019 }, { "epoch": 0.23, "grad_norm": 1.805902123451233, "learning_rate": 1.7951271360793233e-05, "loss": 0.4729, "step": 9020 }, { "epoch": 0.23, "grad_norm": 2.5417206287384033, "learning_rate": 1.7950767899255575e-05, "loss": 0.5948, "step": 9021 }, { "epoch": 0.23, "grad_norm": 1.6518360376358032, "learning_rate": 1.795026438292622e-05, "loss": 0.6496, "step": 9022 }, { "epoch": 0.23, "grad_norm": 2.660179615020752, "learning_rate": 1.7949760811808634e-05, "loss": 0.6682, "step": 9023 }, { "epoch": 0.23, "grad_norm": 4.54086971282959, "learning_rate": 1.794925718590629e-05, "loss": 0.6529, "step": 9024 }, { "epoch": 0.23, "grad_norm": 4.389313220977783, "learning_rate": 1.7948753505222666e-05, "loss": 0.9946, "step": 9025 }, { "epoch": 0.23, "grad_norm": 2.181762218475342, "learning_rate": 1.794824976976122e-05, "loss": 0.555, "step": 9026 }, { "epoch": 0.23, "grad_norm": 1.2389942407608032, "learning_rate": 1.7947745979525435e-05, "loss": 0.6501, "step": 9027 }, { "epoch": 0.23, "grad_norm": 3.429267644882202, "learning_rate": 1.7947242134518778e-05, "loss": 0.5971, "step": 9028 }, { "epoch": 0.23, "grad_norm": 1.7511872053146362, "learning_rate": 1.7946738234744715e-05, "loss": 0.5633, "step": 9029 }, { "epoch": 0.23, "grad_norm": 2.227987051010132, "learning_rate": 1.794623428020673e-05, "loss": 0.5789, "step": 9030 }, { "epoch": 0.23, "grad_norm": 3.063225746154785, "learning_rate": 1.794573027090829e-05, "loss": 0.5545, "step": 9031 }, { "epoch": 0.23, "grad_norm": 2.947918653488159, "learning_rate": 1.794522620685287e-05, "loss": 0.552, "step": 9032 }, { "epoch": 0.23, "grad_norm": 2.654167890548706, "learning_rate": 1.794472208804394e-05, "loss": 0.6943, "step": 9033 }, { "epoch": 0.23, "grad_norm": 2.363572120666504, "learning_rate": 1.794421791448498e-05, "loss": 0.8519, "step": 9034 }, { "epoch": 0.23, "grad_norm": 2.087756633758545, "learning_rate": 1.7943713686179457e-05, "loss": 0.6394, "step": 9035 }, { "epoch": 0.23, "grad_norm": 4.1938581466674805, "learning_rate": 1.7943209403130853e-05, "loss": 0.6186, "step": 9036 }, { "epoch": 0.23, "grad_norm": 1.785717487335205, "learning_rate": 1.794270506534264e-05, "loss": 0.6701, "step": 9037 }, { "epoch": 0.23, "grad_norm": 2.3243815898895264, "learning_rate": 1.794220067281829e-05, "loss": 0.6721, "step": 9038 }, { "epoch": 0.23, "grad_norm": 1.7793939113616943, "learning_rate": 1.7941696225561286e-05, "loss": 0.627, "step": 9039 }, { "epoch": 0.23, "grad_norm": 3.1116697788238525, "learning_rate": 1.79411917235751e-05, "loss": 0.698, "step": 9040 }, { "epoch": 0.23, "grad_norm": 4.148991584777832, "learning_rate": 1.7940687166863214e-05, "loss": 0.5355, "step": 9041 }, { "epoch": 0.23, "grad_norm": 2.080498456954956, "learning_rate": 1.7940182555429094e-05, "loss": 0.5666, "step": 9042 }, { "epoch": 0.23, "grad_norm": 1.1980620622634888, "learning_rate": 1.793967788927623e-05, "loss": 0.6787, "step": 9043 }, { "epoch": 0.23, "grad_norm": 2.156118154525757, "learning_rate": 1.7939173168408092e-05, "loss": 0.6201, "step": 9044 }, { "epoch": 0.23, "grad_norm": 3.6118881702423096, "learning_rate": 1.793866839282816e-05, "loss": 0.7105, "step": 9045 }, { "epoch": 0.23, "grad_norm": 5.718747138977051, "learning_rate": 1.7938163562539913e-05, "loss": 0.6302, "step": 9046 }, { "epoch": 0.23, "grad_norm": 3.9864251613616943, "learning_rate": 1.793765867754683e-05, "loss": 0.5908, "step": 9047 }, { "epoch": 0.23, "grad_norm": 2.6164040565490723, "learning_rate": 1.7937153737852392e-05, "loss": 0.5969, "step": 9048 }, { "epoch": 0.23, "grad_norm": 4.866814613342285, "learning_rate": 1.7936648743460074e-05, "loss": 0.5889, "step": 9049 }, { "epoch": 0.23, "grad_norm": 6.16686487197876, "learning_rate": 1.793614369437336e-05, "loss": 0.6524, "step": 9050 }, { "epoch": 0.23, "grad_norm": 2.3389432430267334, "learning_rate": 1.793563859059573e-05, "loss": 0.7153, "step": 9051 }, { "epoch": 0.23, "grad_norm": 1.8941415548324585, "learning_rate": 1.793513343213067e-05, "loss": 0.6604, "step": 9052 }, { "epoch": 0.23, "grad_norm": 3.1691462993621826, "learning_rate": 1.793462821898165e-05, "loss": 0.8035, "step": 9053 }, { "epoch": 0.23, "grad_norm": 1.7716950178146362, "learning_rate": 1.7934122951152157e-05, "loss": 0.4968, "step": 9054 }, { "epoch": 0.23, "grad_norm": 1.7265042066574097, "learning_rate": 1.7933617628645675e-05, "loss": 0.6642, "step": 9055 }, { "epoch": 0.23, "grad_norm": 2.0204250812530518, "learning_rate": 1.7933112251465683e-05, "loss": 0.6736, "step": 9056 }, { "epoch": 0.23, "grad_norm": 1.5157309770584106, "learning_rate": 1.7932606819615668e-05, "loss": 0.4824, "step": 9057 }, { "epoch": 0.23, "grad_norm": 2.8311047554016113, "learning_rate": 1.7932101333099108e-05, "loss": 0.7478, "step": 9058 }, { "epoch": 0.23, "grad_norm": 1.9015921354293823, "learning_rate": 1.793159579191949e-05, "loss": 0.5486, "step": 9059 }, { "epoch": 0.23, "grad_norm": 3.323012113571167, "learning_rate": 1.79310901960803e-05, "loss": 0.7358, "step": 9060 }, { "epoch": 0.23, "grad_norm": 3.98583722114563, "learning_rate": 1.7930584545585014e-05, "loss": 0.5521, "step": 9061 }, { "epoch": 0.23, "grad_norm": 1.4865076541900635, "learning_rate": 1.7930078840437125e-05, "loss": 0.6743, "step": 9062 }, { "epoch": 0.23, "grad_norm": 1.5841808319091797, "learning_rate": 1.7929573080640113e-05, "loss": 0.5457, "step": 9063 }, { "epoch": 0.23, "grad_norm": 2.053973436355591, "learning_rate": 1.7929067266197465e-05, "loss": 0.6875, "step": 9064 }, { "epoch": 0.23, "grad_norm": 2.9870829582214355, "learning_rate": 1.7928561397112667e-05, "loss": 0.6396, "step": 9065 }, { "epoch": 0.23, "grad_norm": 2.2051467895507812, "learning_rate": 1.7928055473389207e-05, "loss": 0.7458, "step": 9066 }, { "epoch": 0.23, "grad_norm": 4.902453422546387, "learning_rate": 1.792754949503057e-05, "loss": 0.6507, "step": 9067 }, { "epoch": 0.23, "grad_norm": 1.7315409183502197, "learning_rate": 1.7927043462040237e-05, "loss": 0.6608, "step": 9068 }, { "epoch": 0.23, "grad_norm": 1.951375126838684, "learning_rate": 1.7926537374421705e-05, "loss": 0.5481, "step": 9069 }, { "epoch": 0.23, "grad_norm": 1.451897144317627, "learning_rate": 1.792603123217846e-05, "loss": 0.5923, "step": 9070 }, { "epoch": 0.23, "grad_norm": 1.2146471738815308, "learning_rate": 1.7925525035313984e-05, "loss": 0.5613, "step": 9071 }, { "epoch": 0.23, "grad_norm": 3.333237648010254, "learning_rate": 1.792501878383177e-05, "loss": 0.8768, "step": 9072 }, { "epoch": 0.23, "grad_norm": 3.8435144424438477, "learning_rate": 1.7924512477735304e-05, "loss": 0.6376, "step": 9073 }, { "epoch": 0.23, "grad_norm": 1.7755091190338135, "learning_rate": 1.7924006117028077e-05, "loss": 0.6581, "step": 9074 }, { "epoch": 0.23, "grad_norm": 3.1513803005218506, "learning_rate": 1.792349970171358e-05, "loss": 0.6516, "step": 9075 }, { "epoch": 0.23, "grad_norm": 7.125844478607178, "learning_rate": 1.7922993231795298e-05, "loss": 0.8815, "step": 9076 }, { "epoch": 0.23, "grad_norm": 1.7430421113967896, "learning_rate": 1.7922486707276727e-05, "loss": 0.8024, "step": 9077 }, { "epoch": 0.23, "grad_norm": 3.8701865673065186, "learning_rate": 1.7921980128161354e-05, "loss": 0.7799, "step": 9078 }, { "epoch": 0.23, "grad_norm": 2.4705870151519775, "learning_rate": 1.792147349445267e-05, "loss": 0.5939, "step": 9079 }, { "epoch": 0.23, "grad_norm": 2.4999189376831055, "learning_rate": 1.792096680615417e-05, "loss": 0.6304, "step": 9080 }, { "epoch": 0.23, "grad_norm": 1.8981517553329468, "learning_rate": 1.7920460063269342e-05, "loss": 0.6093, "step": 9081 }, { "epoch": 0.23, "grad_norm": 3.7704756259918213, "learning_rate": 1.791995326580168e-05, "loss": 0.8192, "step": 9082 }, { "epoch": 0.23, "grad_norm": 1.8236218690872192, "learning_rate": 1.7919446413754675e-05, "loss": 0.5825, "step": 9083 }, { "epoch": 0.23, "grad_norm": 3.1979987621307373, "learning_rate": 1.791893950713182e-05, "loss": 0.8049, "step": 9084 }, { "epoch": 0.23, "grad_norm": 4.039605617523193, "learning_rate": 1.791843254593661e-05, "loss": 0.7469, "step": 9085 }, { "epoch": 0.23, "grad_norm": 7.356955051422119, "learning_rate": 1.7917925530172536e-05, "loss": 0.5773, "step": 9086 }, { "epoch": 0.23, "grad_norm": 4.154190540313721, "learning_rate": 1.7917418459843096e-05, "loss": 0.7672, "step": 9087 }, { "epoch": 0.23, "grad_norm": 1.65265953540802, "learning_rate": 1.7916911334951784e-05, "loss": 0.661, "step": 9088 }, { "epoch": 0.23, "grad_norm": 5.228870391845703, "learning_rate": 1.791640415550209e-05, "loss": 0.6947, "step": 9089 }, { "epoch": 0.23, "grad_norm": 3.4350552558898926, "learning_rate": 1.7915896921497512e-05, "loss": 0.9329, "step": 9090 }, { "epoch": 0.23, "grad_norm": 2.8320822715759277, "learning_rate": 1.7915389632941546e-05, "loss": 0.8166, "step": 9091 }, { "epoch": 0.23, "grad_norm": 2.4907946586608887, "learning_rate": 1.7914882289837687e-05, "loss": 0.6501, "step": 9092 }, { "epoch": 0.23, "grad_norm": 1.4696874618530273, "learning_rate": 1.7914374892189434e-05, "loss": 0.649, "step": 9093 }, { "epoch": 0.23, "grad_norm": 1.5132474899291992, "learning_rate": 1.791386744000028e-05, "loss": 0.5959, "step": 9094 }, { "epoch": 0.23, "grad_norm": 2.5163376331329346, "learning_rate": 1.7913359933273727e-05, "loss": 0.7072, "step": 9095 }, { "epoch": 0.23, "grad_norm": 3.0967233180999756, "learning_rate": 1.7912852372013267e-05, "loss": 0.5004, "step": 9096 }, { "epoch": 0.23, "grad_norm": 1.2581983804702759, "learning_rate": 1.7912344756222394e-05, "loss": 0.587, "step": 9097 }, { "epoch": 0.23, "grad_norm": 1.6157290935516357, "learning_rate": 1.791183708590462e-05, "loss": 0.5623, "step": 9098 }, { "epoch": 0.23, "grad_norm": 4.479727268218994, "learning_rate": 1.791132936106343e-05, "loss": 0.6711, "step": 9099 }, { "epoch": 0.23, "grad_norm": 3.225022792816162, "learning_rate": 1.7910821581702332e-05, "loss": 0.6742, "step": 9100 }, { "epoch": 0.23, "grad_norm": 2.578556537628174, "learning_rate": 1.791031374782482e-05, "loss": 0.7054, "step": 9101 }, { "epoch": 0.23, "grad_norm": 2.410327196121216, "learning_rate": 1.7909805859434393e-05, "loss": 0.6812, "step": 9102 }, { "epoch": 0.23, "grad_norm": 3.6301751136779785, "learning_rate": 1.790929791653456e-05, "loss": 0.6411, "step": 9103 }, { "epoch": 0.23, "grad_norm": 1.4023966789245605, "learning_rate": 1.7908789919128807e-05, "loss": 0.5972, "step": 9104 }, { "epoch": 0.23, "grad_norm": 2.0391287803649902, "learning_rate": 1.790828186722065e-05, "loss": 0.5991, "step": 9105 }, { "epoch": 0.23, "grad_norm": 1.76224684715271, "learning_rate": 1.790777376081358e-05, "loss": 0.756, "step": 9106 }, { "epoch": 0.23, "grad_norm": 1.4626530408859253, "learning_rate": 1.79072655999111e-05, "loss": 0.712, "step": 9107 }, { "epoch": 0.23, "grad_norm": 6.680670738220215, "learning_rate": 1.7906757384516714e-05, "loss": 0.8091, "step": 9108 }, { "epoch": 0.23, "grad_norm": 1.5509085655212402, "learning_rate": 1.790624911463392e-05, "loss": 0.5713, "step": 9109 }, { "epoch": 0.23, "grad_norm": 2.8166027069091797, "learning_rate": 1.7905740790266232e-05, "loss": 0.8098, "step": 9110 }, { "epoch": 0.23, "grad_norm": 1.5276248455047607, "learning_rate": 1.790523241141714e-05, "loss": 0.5702, "step": 9111 }, { "epoch": 0.23, "grad_norm": 1.772040843963623, "learning_rate": 1.7904723978090155e-05, "loss": 0.6459, "step": 9112 }, { "epoch": 0.23, "grad_norm": 2.762025833129883, "learning_rate": 1.790421549028878e-05, "loss": 0.623, "step": 9113 }, { "epoch": 0.23, "grad_norm": 2.8273544311523438, "learning_rate": 1.7903706948016512e-05, "loss": 0.7268, "step": 9114 }, { "epoch": 0.23, "grad_norm": 5.083693981170654, "learning_rate": 1.790319835127687e-05, "loss": 0.5883, "step": 9115 }, { "epoch": 0.23, "grad_norm": 2.445117473602295, "learning_rate": 1.7902689700073345e-05, "loss": 0.5646, "step": 9116 }, { "epoch": 0.23, "grad_norm": 2.0675408840179443, "learning_rate": 1.7902180994409444e-05, "loss": 0.6527, "step": 9117 }, { "epoch": 0.23, "grad_norm": 1.667643427848816, "learning_rate": 1.790167223428868e-05, "loss": 0.7652, "step": 9118 }, { "epoch": 0.23, "grad_norm": 1.322239637374878, "learning_rate": 1.7901163419714556e-05, "loss": 0.4548, "step": 9119 }, { "epoch": 0.23, "grad_norm": 1.1826972961425781, "learning_rate": 1.7900654550690577e-05, "loss": 0.5288, "step": 9120 }, { "epoch": 0.23, "grad_norm": 2.0235486030578613, "learning_rate": 1.790014562722025e-05, "loss": 0.7551, "step": 9121 }, { "epoch": 0.23, "grad_norm": 1.8499115705490112, "learning_rate": 1.7899636649307084e-05, "loss": 0.5383, "step": 9122 }, { "epoch": 0.23, "grad_norm": 2.0523569583892822, "learning_rate": 1.7899127616954584e-05, "loss": 0.7195, "step": 9123 }, { "epoch": 0.23, "grad_norm": 2.1086318492889404, "learning_rate": 1.789861853016626e-05, "loss": 0.464, "step": 9124 }, { "epoch": 0.23, "grad_norm": 1.6854379177093506, "learning_rate": 1.789810938894562e-05, "loss": 0.5233, "step": 9125 }, { "epoch": 0.23, "grad_norm": 3.9245035648345947, "learning_rate": 1.789760019329617e-05, "loss": 0.6983, "step": 9126 }, { "epoch": 0.23, "grad_norm": 2.685480833053589, "learning_rate": 1.7897090943221424e-05, "loss": 0.6064, "step": 9127 }, { "epoch": 0.23, "grad_norm": 2.053105354309082, "learning_rate": 1.7896581638724884e-05, "loss": 0.6491, "step": 9128 }, { "epoch": 0.23, "grad_norm": 1.8912971019744873, "learning_rate": 1.789607227981007e-05, "loss": 0.6272, "step": 9129 }, { "epoch": 0.23, "grad_norm": 1.296743392944336, "learning_rate": 1.7895562866480483e-05, "loss": 0.7381, "step": 9130 }, { "epoch": 0.23, "grad_norm": 7.109858512878418, "learning_rate": 1.789505339873964e-05, "loss": 0.6803, "step": 9131 }, { "epoch": 0.23, "grad_norm": 5.004048824310303, "learning_rate": 1.7894543876591047e-05, "loss": 0.6783, "step": 9132 }, { "epoch": 0.23, "grad_norm": 4.25340461730957, "learning_rate": 1.7894034300038218e-05, "loss": 0.6144, "step": 9133 }, { "epoch": 0.23, "grad_norm": 5.9358015060424805, "learning_rate": 1.7893524669084664e-05, "loss": 0.5788, "step": 9134 }, { "epoch": 0.23, "grad_norm": 2.370267391204834, "learning_rate": 1.78930149837339e-05, "loss": 0.7133, "step": 9135 }, { "epoch": 0.23, "grad_norm": 2.739766836166382, "learning_rate": 1.7892505243989432e-05, "loss": 0.6232, "step": 9136 }, { "epoch": 0.23, "grad_norm": 8.03809928894043, "learning_rate": 1.7891995449854774e-05, "loss": 0.6362, "step": 9137 }, { "epoch": 0.23, "grad_norm": 2.3558080196380615, "learning_rate": 1.789148560133345e-05, "loss": 0.6562, "step": 9138 }, { "epoch": 0.23, "grad_norm": 2.846651315689087, "learning_rate": 1.7890975698428955e-05, "loss": 0.6803, "step": 9139 }, { "epoch": 0.23, "grad_norm": 2.064177989959717, "learning_rate": 1.7890465741144818e-05, "loss": 0.6454, "step": 9140 }, { "epoch": 0.23, "grad_norm": 2.3879587650299072, "learning_rate": 1.788995572948455e-05, "loss": 0.6428, "step": 9141 }, { "epoch": 0.23, "grad_norm": 2.2474684715270996, "learning_rate": 1.7889445663451663e-05, "loss": 0.6396, "step": 9142 }, { "epoch": 0.23, "grad_norm": 7.982139587402344, "learning_rate": 1.788893554304967e-05, "loss": 0.6485, "step": 9143 }, { "epoch": 0.23, "grad_norm": 2.445805788040161, "learning_rate": 1.7888425368282094e-05, "loss": 0.5944, "step": 9144 }, { "epoch": 0.23, "grad_norm": 1.4226115942001343, "learning_rate": 1.7887915139152446e-05, "loss": 0.6605, "step": 9145 }, { "epoch": 0.23, "grad_norm": 6.296281814575195, "learning_rate": 1.788740485566424e-05, "loss": 0.6299, "step": 9146 }, { "epoch": 0.23, "grad_norm": 5.7774529457092285, "learning_rate": 1.7886894517820995e-05, "loss": 0.6529, "step": 9147 }, { "epoch": 0.23, "grad_norm": 8.547567367553711, "learning_rate": 1.788638412562623e-05, "loss": 0.5511, "step": 9148 }, { "epoch": 0.23, "grad_norm": 4.330964088439941, "learning_rate": 1.788587367908346e-05, "loss": 0.7571, "step": 9149 }, { "epoch": 0.23, "grad_norm": 1.6749476194381714, "learning_rate": 1.78853631781962e-05, "loss": 0.5489, "step": 9150 }, { "epoch": 0.23, "grad_norm": 1.4032951593399048, "learning_rate": 1.788485262296798e-05, "loss": 0.6517, "step": 9151 }, { "epoch": 0.23, "grad_norm": 1.8505311012268066, "learning_rate": 1.78843420134023e-05, "loss": 0.661, "step": 9152 }, { "epoch": 0.23, "grad_norm": 3.30087947845459, "learning_rate": 1.7883831349502692e-05, "loss": 0.5478, "step": 9153 }, { "epoch": 0.23, "grad_norm": 2.4269449710845947, "learning_rate": 1.7883320631272673e-05, "loss": 0.6843, "step": 9154 }, { "epoch": 0.23, "grad_norm": 2.160521984100342, "learning_rate": 1.7882809858715756e-05, "loss": 0.6542, "step": 9155 }, { "epoch": 0.23, "grad_norm": 1.2262731790542603, "learning_rate": 1.788229903183547e-05, "loss": 0.5542, "step": 9156 }, { "epoch": 0.23, "grad_norm": 3.243149518966675, "learning_rate": 1.7881788150635332e-05, "loss": 0.6835, "step": 9157 }, { "epoch": 0.23, "grad_norm": 3.750504732131958, "learning_rate": 1.788127721511886e-05, "loss": 0.6422, "step": 9158 }, { "epoch": 0.23, "grad_norm": 4.028286933898926, "learning_rate": 1.7880766225289575e-05, "loss": 0.7894, "step": 9159 }, { "epoch": 0.23, "grad_norm": 6.479123592376709, "learning_rate": 1.7880255181151002e-05, "loss": 0.6447, "step": 9160 }, { "epoch": 0.23, "grad_norm": 2.7620248794555664, "learning_rate": 1.787974408270666e-05, "loss": 0.7332, "step": 9161 }, { "epoch": 0.23, "grad_norm": 2.2224836349487305, "learning_rate": 1.7879232929960072e-05, "loss": 0.6988, "step": 9162 }, { "epoch": 0.23, "grad_norm": 5.344337463378906, "learning_rate": 1.7878721722914766e-05, "loss": 0.6523, "step": 9163 }, { "epoch": 0.23, "grad_norm": 1.9695022106170654, "learning_rate": 1.7878210461574255e-05, "loss": 0.6518, "step": 9164 }, { "epoch": 0.23, "grad_norm": 3.768338680267334, "learning_rate": 1.787769914594207e-05, "loss": 0.5663, "step": 9165 }, { "epoch": 0.23, "grad_norm": 1.3618242740631104, "learning_rate": 1.787718777602173e-05, "loss": 0.5647, "step": 9166 }, { "epoch": 0.23, "grad_norm": 1.7902923822402954, "learning_rate": 1.787667635181676e-05, "loss": 0.7153, "step": 9167 }, { "epoch": 0.23, "grad_norm": 2.1861369609832764, "learning_rate": 1.7876164873330686e-05, "loss": 0.6955, "step": 9168 }, { "epoch": 0.24, "grad_norm": 3.391732692718506, "learning_rate": 1.7875653340567032e-05, "loss": 0.8034, "step": 9169 }, { "epoch": 0.24, "grad_norm": 1.669554352760315, "learning_rate": 1.7875141753529322e-05, "loss": 0.5814, "step": 9170 }, { "epoch": 0.24, "grad_norm": 2.395139455795288, "learning_rate": 1.7874630112221087e-05, "loss": 0.7123, "step": 9171 }, { "epoch": 0.24, "grad_norm": 1.4611936807632446, "learning_rate": 1.7874118416645845e-05, "loss": 0.5611, "step": 9172 }, { "epoch": 0.24, "grad_norm": 2.2064149379730225, "learning_rate": 1.7873606666807126e-05, "loss": 0.6892, "step": 9173 }, { "epoch": 0.24, "grad_norm": 6.972620010375977, "learning_rate": 1.7873094862708454e-05, "loss": 0.6502, "step": 9174 }, { "epoch": 0.24, "grad_norm": 2.774920701980591, "learning_rate": 1.7872583004353362e-05, "loss": 0.5813, "step": 9175 }, { "epoch": 0.24, "grad_norm": 2.5472524166107178, "learning_rate": 1.7872071091745376e-05, "loss": 0.672, "step": 9176 }, { "epoch": 0.24, "grad_norm": 1.9907158613204956, "learning_rate": 1.787155912488802e-05, "loss": 0.6907, "step": 9177 }, { "epoch": 0.24, "grad_norm": 3.335487127304077, "learning_rate": 1.7871047103784823e-05, "loss": 0.4994, "step": 9178 }, { "epoch": 0.24, "grad_norm": 4.678742408752441, "learning_rate": 1.7870535028439314e-05, "loss": 0.7647, "step": 9179 }, { "epoch": 0.24, "grad_norm": 1.1485899686813354, "learning_rate": 1.7870022898855023e-05, "loss": 0.6722, "step": 9180 }, { "epoch": 0.24, "grad_norm": 3.00990629196167, "learning_rate": 1.7869510715035476e-05, "loss": 0.6106, "step": 9181 }, { "epoch": 0.24, "grad_norm": 1.954485297203064, "learning_rate": 1.786899847698421e-05, "loss": 0.6539, "step": 9182 }, { "epoch": 0.24, "grad_norm": 2.977904796600342, "learning_rate": 1.7868486184704745e-05, "loss": 0.5764, "step": 9183 }, { "epoch": 0.24, "grad_norm": 4.407736301422119, "learning_rate": 1.786797383820062e-05, "loss": 0.7018, "step": 9184 }, { "epoch": 0.24, "grad_norm": 2.326526641845703, "learning_rate": 1.786746143747536e-05, "loss": 0.676, "step": 9185 }, { "epoch": 0.24, "grad_norm": 2.587873697280884, "learning_rate": 1.7866948982532504e-05, "loss": 0.7378, "step": 9186 }, { "epoch": 0.24, "grad_norm": 2.302243232727051, "learning_rate": 1.7866436473375575e-05, "loss": 0.536, "step": 9187 }, { "epoch": 0.24, "grad_norm": 3.254720449447632, "learning_rate": 1.7865923910008106e-05, "loss": 0.6402, "step": 9188 }, { "epoch": 0.24, "grad_norm": 3.928011894226074, "learning_rate": 1.7865411292433636e-05, "loss": 0.5057, "step": 9189 }, { "epoch": 0.24, "grad_norm": 2.503880739212036, "learning_rate": 1.7864898620655688e-05, "loss": 0.7816, "step": 9190 }, { "epoch": 0.24, "grad_norm": 4.065121173858643, "learning_rate": 1.7864385894677802e-05, "loss": 0.6977, "step": 9191 }, { "epoch": 0.24, "grad_norm": 1.489056944847107, "learning_rate": 1.786387311450351e-05, "loss": 0.6557, "step": 9192 }, { "epoch": 0.24, "grad_norm": 2.122208833694458, "learning_rate": 1.7863360280136347e-05, "loss": 0.7012, "step": 9193 }, { "epoch": 0.24, "grad_norm": 2.9701874256134033, "learning_rate": 1.786284739157984e-05, "loss": 0.6404, "step": 9194 }, { "epoch": 0.24, "grad_norm": 2.1524736881256104, "learning_rate": 1.7862334448837534e-05, "loss": 0.6979, "step": 9195 }, { "epoch": 0.24, "grad_norm": 2.6843225955963135, "learning_rate": 1.786182145191295e-05, "loss": 0.5072, "step": 9196 }, { "epoch": 0.24, "grad_norm": 3.1081345081329346, "learning_rate": 1.786130840080964e-05, "loss": 0.6069, "step": 9197 }, { "epoch": 0.24, "grad_norm": 2.4050538539886475, "learning_rate": 1.786079529553113e-05, "loss": 0.6047, "step": 9198 }, { "epoch": 0.24, "grad_norm": 1.4052503108978271, "learning_rate": 1.786028213608096e-05, "loss": 0.4637, "step": 9199 }, { "epoch": 0.24, "grad_norm": 1.6720218658447266, "learning_rate": 1.7859768922462658e-05, "loss": 0.5275, "step": 9200 }, { "epoch": 0.24, "grad_norm": 5.115659236907959, "learning_rate": 1.7859255654679768e-05, "loss": 0.8051, "step": 9201 }, { "epoch": 0.24, "grad_norm": 9.562615394592285, "learning_rate": 1.7858742332735826e-05, "loss": 0.6078, "step": 9202 }, { "epoch": 0.24, "grad_norm": 2.5090208053588867, "learning_rate": 1.785822895663437e-05, "loss": 0.6417, "step": 9203 }, { "epoch": 0.24, "grad_norm": 3.3618991374969482, "learning_rate": 1.785771552637894e-05, "loss": 0.6982, "step": 9204 }, { "epoch": 0.24, "grad_norm": 3.3310341835021973, "learning_rate": 1.785720204197307e-05, "loss": 0.5259, "step": 9205 }, { "epoch": 0.24, "grad_norm": 2.3750839233398438, "learning_rate": 1.7856688503420295e-05, "loss": 0.5879, "step": 9206 }, { "epoch": 0.24, "grad_norm": 3.071544647216797, "learning_rate": 1.7856174910724162e-05, "loss": 0.7889, "step": 9207 }, { "epoch": 0.24, "grad_norm": 2.0379629135131836, "learning_rate": 1.7855661263888208e-05, "loss": 0.5131, "step": 9208 }, { "epoch": 0.24, "grad_norm": 3.8104753494262695, "learning_rate": 1.7855147562915972e-05, "loss": 0.5292, "step": 9209 }, { "epoch": 0.24, "grad_norm": 1.719731330871582, "learning_rate": 1.7854633807810992e-05, "loss": 0.6349, "step": 9210 }, { "epoch": 0.24, "grad_norm": 2.760530948638916, "learning_rate": 1.7854119998576812e-05, "loss": 0.5328, "step": 9211 }, { "epoch": 0.24, "grad_norm": 7.397510528564453, "learning_rate": 1.7853606135216973e-05, "loss": 0.6256, "step": 9212 }, { "epoch": 0.24, "grad_norm": 2.6951560974121094, "learning_rate": 1.785309221773501e-05, "loss": 0.4668, "step": 9213 }, { "epoch": 0.24, "grad_norm": 1.7667438983917236, "learning_rate": 1.785257824613447e-05, "loss": 0.588, "step": 9214 }, { "epoch": 0.24, "grad_norm": 1.4742313623428345, "learning_rate": 1.78520642204189e-05, "loss": 0.6075, "step": 9215 }, { "epoch": 0.24, "grad_norm": 1.723486304283142, "learning_rate": 1.7851550140591833e-05, "loss": 0.6534, "step": 9216 }, { "epoch": 0.24, "grad_norm": 2.3569979667663574, "learning_rate": 1.7851036006656816e-05, "loss": 0.6616, "step": 9217 }, { "epoch": 0.24, "grad_norm": 2.0427980422973633, "learning_rate": 1.785052181861739e-05, "loss": 0.6534, "step": 9218 }, { "epoch": 0.24, "grad_norm": 6.316742420196533, "learning_rate": 1.78500075764771e-05, "loss": 0.4753, "step": 9219 }, { "epoch": 0.24, "grad_norm": 2.435148239135742, "learning_rate": 1.784949328023949e-05, "loss": 0.3528, "step": 9220 }, { "epoch": 0.24, "grad_norm": 2.3454229831695557, "learning_rate": 1.7848978929908102e-05, "loss": 0.4284, "step": 9221 }, { "epoch": 0.24, "grad_norm": 2.216942071914673, "learning_rate": 1.7848464525486486e-05, "loss": 0.6588, "step": 9222 }, { "epoch": 0.24, "grad_norm": 2.239288330078125, "learning_rate": 1.784795006697818e-05, "loss": 0.6917, "step": 9223 }, { "epoch": 0.24, "grad_norm": 2.993194103240967, "learning_rate": 1.7847435554386737e-05, "loss": 0.5725, "step": 9224 }, { "epoch": 0.24, "grad_norm": 3.796314239501953, "learning_rate": 1.7846920987715696e-05, "loss": 0.8015, "step": 9225 }, { "epoch": 0.24, "grad_norm": 2.115596055984497, "learning_rate": 1.7846406366968604e-05, "loss": 0.7673, "step": 9226 }, { "epoch": 0.24, "grad_norm": 1.621299147605896, "learning_rate": 1.784589169214901e-05, "loss": 0.5967, "step": 9227 }, { "epoch": 0.24, "grad_norm": 2.416105031967163, "learning_rate": 1.784537696326046e-05, "loss": 0.8127, "step": 9228 }, { "epoch": 0.24, "grad_norm": 2.367687940597534, "learning_rate": 1.78448621803065e-05, "loss": 0.6624, "step": 9229 }, { "epoch": 0.24, "grad_norm": 1.7930335998535156, "learning_rate": 1.784434734329068e-05, "loss": 0.6004, "step": 9230 }, { "epoch": 0.24, "grad_norm": 1.3677515983581543, "learning_rate": 1.7843832452216546e-05, "loss": 0.6188, "step": 9231 }, { "epoch": 0.24, "grad_norm": 3.8541970252990723, "learning_rate": 1.7843317507087647e-05, "loss": 0.6212, "step": 9232 }, { "epoch": 0.24, "grad_norm": 2.7557766437530518, "learning_rate": 1.7842802507907532e-05, "loss": 0.694, "step": 9233 }, { "epoch": 0.24, "grad_norm": 1.722644567489624, "learning_rate": 1.784228745467975e-05, "loss": 0.5984, "step": 9234 }, { "epoch": 0.24, "grad_norm": 3.1128804683685303, "learning_rate": 1.7841772347407845e-05, "loss": 0.7205, "step": 9235 }, { "epoch": 0.24, "grad_norm": 3.5775959491729736, "learning_rate": 1.7841257186095372e-05, "loss": 0.6628, "step": 9236 }, { "epoch": 0.24, "grad_norm": 4.872361660003662, "learning_rate": 1.7840741970745886e-05, "loss": 0.5324, "step": 9237 }, { "epoch": 0.24, "grad_norm": 1.523320198059082, "learning_rate": 1.784022670136293e-05, "loss": 0.6109, "step": 9238 }, { "epoch": 0.24, "grad_norm": 2.2329728603363037, "learning_rate": 1.7839711377950056e-05, "loss": 0.6065, "step": 9239 }, { "epoch": 0.24, "grad_norm": 4.471942901611328, "learning_rate": 1.7839196000510818e-05, "loss": 0.6832, "step": 9240 }, { "epoch": 0.24, "grad_norm": 4.900966644287109, "learning_rate": 1.7838680569048766e-05, "loss": 0.6375, "step": 9241 }, { "epoch": 0.24, "grad_norm": 1.6913976669311523, "learning_rate": 1.7838165083567453e-05, "loss": 0.6169, "step": 9242 }, { "epoch": 0.24, "grad_norm": 2.9591805934906006, "learning_rate": 1.783764954407043e-05, "loss": 0.5937, "step": 9243 }, { "epoch": 0.24, "grad_norm": 2.0612058639526367, "learning_rate": 1.783713395056125e-05, "loss": 0.6882, "step": 9244 }, { "epoch": 0.24, "grad_norm": 3.365233898162842, "learning_rate": 1.7836618303043466e-05, "loss": 0.7712, "step": 9245 }, { "epoch": 0.24, "grad_norm": 1.7378554344177246, "learning_rate": 1.7836102601520636e-05, "loss": 0.7169, "step": 9246 }, { "epoch": 0.24, "grad_norm": 1.2536261081695557, "learning_rate": 1.7835586845996305e-05, "loss": 0.6092, "step": 9247 }, { "epoch": 0.24, "grad_norm": 1.9221161603927612, "learning_rate": 1.7835071036474034e-05, "loss": 0.5874, "step": 9248 }, { "epoch": 0.24, "grad_norm": 1.7860182523727417, "learning_rate": 1.7834555172957376e-05, "loss": 0.7673, "step": 9249 }, { "epoch": 0.24, "grad_norm": 5.584598064422607, "learning_rate": 1.7834039255449886e-05, "loss": 0.5566, "step": 9250 }, { "epoch": 0.24, "grad_norm": 1.3020501136779785, "learning_rate": 1.783352328395512e-05, "loss": 0.631, "step": 9251 }, { "epoch": 0.24, "grad_norm": 2.3490140438079834, "learning_rate": 1.7833007258476633e-05, "loss": 0.6373, "step": 9252 }, { "epoch": 0.24, "grad_norm": 1.527245283126831, "learning_rate": 1.7832491179017982e-05, "loss": 0.5956, "step": 9253 }, { "epoch": 0.24, "grad_norm": 2.0199952125549316, "learning_rate": 1.783197504558272e-05, "loss": 0.7069, "step": 9254 }, { "epoch": 0.24, "grad_norm": 2.5952396392822266, "learning_rate": 1.7831458858174407e-05, "loss": 0.6062, "step": 9255 }, { "epoch": 0.24, "grad_norm": 3.002183675765991, "learning_rate": 1.78309426167966e-05, "loss": 0.7782, "step": 9256 }, { "epoch": 0.24, "grad_norm": 3.538182258605957, "learning_rate": 1.7830426321452858e-05, "loss": 0.5507, "step": 9257 }, { "epoch": 0.24, "grad_norm": 7.146656036376953, "learning_rate": 1.7829909972146735e-05, "loss": 0.6806, "step": 9258 }, { "epoch": 0.24, "grad_norm": 6.044074535369873, "learning_rate": 1.7829393568881795e-05, "loss": 0.9426, "step": 9259 }, { "epoch": 0.24, "grad_norm": 3.024007558822632, "learning_rate": 1.782887711166159e-05, "loss": 0.6685, "step": 9260 }, { "epoch": 0.24, "grad_norm": 1.9975236654281616, "learning_rate": 1.7828360600489684e-05, "loss": 0.4896, "step": 9261 }, { "epoch": 0.24, "grad_norm": 8.11415958404541, "learning_rate": 1.7827844035369635e-05, "loss": 0.6375, "step": 9262 }, { "epoch": 0.24, "grad_norm": 1.1727747917175293, "learning_rate": 1.7827327416305003e-05, "loss": 0.4424, "step": 9263 }, { "epoch": 0.24, "grad_norm": 2.6334292888641357, "learning_rate": 1.7826810743299347e-05, "loss": 0.6209, "step": 9264 }, { "epoch": 0.24, "grad_norm": 2.736419916152954, "learning_rate": 1.782629401635623e-05, "loss": 0.6905, "step": 9265 }, { "epoch": 0.24, "grad_norm": 7.448407173156738, "learning_rate": 1.782577723547921e-05, "loss": 0.7018, "step": 9266 }, { "epoch": 0.24, "grad_norm": 1.4562904834747314, "learning_rate": 1.782526040067185e-05, "loss": 0.5434, "step": 9267 }, { "epoch": 0.24, "grad_norm": 1.3509747982025146, "learning_rate": 1.7824743511937718e-05, "loss": 0.4939, "step": 9268 }, { "epoch": 0.24, "grad_norm": 2.2523066997528076, "learning_rate": 1.782422656928036e-05, "loss": 0.7056, "step": 9269 }, { "epoch": 0.24, "grad_norm": 9.205890655517578, "learning_rate": 1.7823709572703355e-05, "loss": 0.5555, "step": 9270 }, { "epoch": 0.24, "grad_norm": 2.222215414047241, "learning_rate": 1.7823192522210256e-05, "loss": 0.7832, "step": 9271 }, { "epoch": 0.24, "grad_norm": 3.532104969024658, "learning_rate": 1.782267541780463e-05, "loss": 0.7599, "step": 9272 }, { "epoch": 0.24, "grad_norm": 2.4136147499084473, "learning_rate": 1.782215825949004e-05, "loss": 0.5523, "step": 9273 }, { "epoch": 0.24, "grad_norm": 3.6288137435913086, "learning_rate": 1.7821641047270047e-05, "loss": 0.6613, "step": 9274 }, { "epoch": 0.24, "grad_norm": 3.163947105407715, "learning_rate": 1.7821123781148218e-05, "loss": 0.6321, "step": 9275 }, { "epoch": 0.24, "grad_norm": 1.351043462753296, "learning_rate": 1.782060646112812e-05, "loss": 0.5479, "step": 9276 }, { "epoch": 0.24, "grad_norm": 3.1504030227661133, "learning_rate": 1.7820089087213313e-05, "loss": 0.7385, "step": 9277 }, { "epoch": 0.24, "grad_norm": 1.922908902168274, "learning_rate": 1.7819571659407372e-05, "loss": 0.7767, "step": 9278 }, { "epoch": 0.24, "grad_norm": 3.932962656021118, "learning_rate": 1.781905417771385e-05, "loss": 0.6618, "step": 9279 }, { "epoch": 0.24, "grad_norm": 1.770397663116455, "learning_rate": 1.781853664213632e-05, "loss": 0.6875, "step": 9280 }, { "epoch": 0.24, "grad_norm": 3.3043527603149414, "learning_rate": 1.7818019052678348e-05, "loss": 0.6627, "step": 9281 }, { "epoch": 0.24, "grad_norm": 1.6554840803146362, "learning_rate": 1.7817501409343498e-05, "loss": 0.7159, "step": 9282 }, { "epoch": 0.24, "grad_norm": 3.467419385910034, "learning_rate": 1.7816983712135343e-05, "loss": 0.574, "step": 9283 }, { "epoch": 0.24, "grad_norm": 3.5232722759246826, "learning_rate": 1.7816465961057445e-05, "loss": 0.8525, "step": 9284 }, { "epoch": 0.24, "grad_norm": 10.40895938873291, "learning_rate": 1.7815948156113378e-05, "loss": 0.6884, "step": 9285 }, { "epoch": 0.24, "grad_norm": 1.49386465549469, "learning_rate": 1.7815430297306702e-05, "loss": 0.7795, "step": 9286 }, { "epoch": 0.24, "grad_norm": 1.5705676078796387, "learning_rate": 1.7814912384640993e-05, "loss": 0.7083, "step": 9287 }, { "epoch": 0.24, "grad_norm": 2.0392367839813232, "learning_rate": 1.7814394418119817e-05, "loss": 0.5553, "step": 9288 }, { "epoch": 0.24, "grad_norm": 2.3064308166503906, "learning_rate": 1.7813876397746746e-05, "loss": 0.5874, "step": 9289 }, { "epoch": 0.24, "grad_norm": 2.301750659942627, "learning_rate": 1.7813358323525345e-05, "loss": 0.5432, "step": 9290 }, { "epoch": 0.24, "grad_norm": 1.3081728219985962, "learning_rate": 1.781284019545919e-05, "loss": 0.5458, "step": 9291 }, { "epoch": 0.24, "grad_norm": 3.0635986328125, "learning_rate": 1.781232201355185e-05, "loss": 0.6, "step": 9292 }, { "epoch": 0.24, "grad_norm": 2.611443281173706, "learning_rate": 1.7811803777806892e-05, "loss": 0.5495, "step": 9293 }, { "epoch": 0.24, "grad_norm": 2.0101888179779053, "learning_rate": 1.781128548822789e-05, "loss": 0.5686, "step": 9294 }, { "epoch": 0.24, "grad_norm": 3.234459400177002, "learning_rate": 1.7810767144818414e-05, "loss": 0.6189, "step": 9295 }, { "epoch": 0.24, "grad_norm": 3.3960790634155273, "learning_rate": 1.7810248747582042e-05, "loss": 0.5698, "step": 9296 }, { "epoch": 0.24, "grad_norm": 2.001265287399292, "learning_rate": 1.7809730296522344e-05, "loss": 0.5263, "step": 9297 }, { "epoch": 0.24, "grad_norm": 2.9797873497009277, "learning_rate": 1.7809211791642887e-05, "loss": 0.7429, "step": 9298 }, { "epoch": 0.24, "grad_norm": 2.1122114658355713, "learning_rate": 1.780869323294725e-05, "loss": 0.4791, "step": 9299 }, { "epoch": 0.24, "grad_norm": 2.214966297149658, "learning_rate": 1.7808174620439005e-05, "loss": 0.6994, "step": 9300 }, { "epoch": 0.24, "grad_norm": 1.678858757019043, "learning_rate": 1.7807655954121726e-05, "loss": 0.5334, "step": 9301 }, { "epoch": 0.24, "grad_norm": 1.9062895774841309, "learning_rate": 1.7807137233998986e-05, "loss": 0.6936, "step": 9302 }, { "epoch": 0.24, "grad_norm": 2.0187456607818604, "learning_rate": 1.7806618460074364e-05, "loss": 0.7696, "step": 9303 }, { "epoch": 0.24, "grad_norm": 1.5045878887176514, "learning_rate": 1.7806099632351433e-05, "loss": 0.5271, "step": 9304 }, { "epoch": 0.24, "grad_norm": 3.13249135017395, "learning_rate": 1.7805580750833765e-05, "loss": 0.7061, "step": 9305 }, { "epoch": 0.24, "grad_norm": 2.5061306953430176, "learning_rate": 1.780506181552494e-05, "loss": 0.48, "step": 9306 }, { "epoch": 0.24, "grad_norm": 1.297822117805481, "learning_rate": 1.7804542826428532e-05, "loss": 0.6547, "step": 9307 }, { "epoch": 0.24, "grad_norm": 1.7790697813034058, "learning_rate": 1.7804023783548116e-05, "loss": 0.5387, "step": 9308 }, { "epoch": 0.24, "grad_norm": 1.6058592796325684, "learning_rate": 1.7803504686887275e-05, "loss": 0.6171, "step": 9309 }, { "epoch": 0.24, "grad_norm": 5.952277183532715, "learning_rate": 1.7802985536449582e-05, "loss": 0.9692, "step": 9310 }, { "epoch": 0.24, "grad_norm": 2.9588797092437744, "learning_rate": 1.780246633223861e-05, "loss": 0.7186, "step": 9311 }, { "epoch": 0.24, "grad_norm": 2.818887233734131, "learning_rate": 1.7801947074257947e-05, "loss": 0.7411, "step": 9312 }, { "epoch": 0.24, "grad_norm": 1.9998100996017456, "learning_rate": 1.7801427762511164e-05, "loss": 0.5364, "step": 9313 }, { "epoch": 0.24, "grad_norm": 2.0255367755889893, "learning_rate": 1.7800908397001845e-05, "loss": 0.6824, "step": 9314 }, { "epoch": 0.24, "grad_norm": 1.6908278465270996, "learning_rate": 1.7800388977733564e-05, "loss": 0.7586, "step": 9315 }, { "epoch": 0.24, "grad_norm": 2.8157479763031006, "learning_rate": 1.77998695047099e-05, "loss": 0.6119, "step": 9316 }, { "epoch": 0.24, "grad_norm": 4.718708038330078, "learning_rate": 1.779934997793444e-05, "loss": 0.8388, "step": 9317 }, { "epoch": 0.24, "grad_norm": 2.9167017936706543, "learning_rate": 1.779883039741076e-05, "loss": 0.6237, "step": 9318 }, { "epoch": 0.24, "grad_norm": 2.8452672958374023, "learning_rate": 1.7798310763142438e-05, "loss": 0.6773, "step": 9319 }, { "epoch": 0.24, "grad_norm": 2.24849796295166, "learning_rate": 1.779779107513306e-05, "loss": 0.6941, "step": 9320 }, { "epoch": 0.24, "grad_norm": 4.395511150360107, "learning_rate": 1.7797271333386205e-05, "loss": 0.7461, "step": 9321 }, { "epoch": 0.24, "grad_norm": 2.2504937648773193, "learning_rate": 1.7796751537905456e-05, "loss": 0.6602, "step": 9322 }, { "epoch": 0.24, "grad_norm": 1.5461056232452393, "learning_rate": 1.779623168869439e-05, "loss": 0.5474, "step": 9323 }, { "epoch": 0.24, "grad_norm": 2.7523040771484375, "learning_rate": 1.779571178575659e-05, "loss": 0.5748, "step": 9324 }, { "epoch": 0.24, "grad_norm": 1.7018086910247803, "learning_rate": 1.779519182909565e-05, "loss": 0.7124, "step": 9325 }, { "epoch": 0.24, "grad_norm": 5.720916271209717, "learning_rate": 1.7794671818715143e-05, "loss": 0.6375, "step": 9326 }, { "epoch": 0.24, "grad_norm": 1.517728328704834, "learning_rate": 1.7794151754618653e-05, "loss": 0.6688, "step": 9327 }, { "epoch": 0.24, "grad_norm": 7.07667875289917, "learning_rate": 1.7793631636809768e-05, "loss": 0.5781, "step": 9328 }, { "epoch": 0.24, "grad_norm": 1.2101006507873535, "learning_rate": 1.779311146529207e-05, "loss": 0.6358, "step": 9329 }, { "epoch": 0.24, "grad_norm": 2.2637343406677246, "learning_rate": 1.7792591240069143e-05, "loss": 0.5512, "step": 9330 }, { "epoch": 0.24, "grad_norm": 1.1770182847976685, "learning_rate": 1.7792070961144575e-05, "loss": 0.6049, "step": 9331 }, { "epoch": 0.24, "grad_norm": 8.14243221282959, "learning_rate": 1.7791550628521944e-05, "loss": 0.5285, "step": 9332 }, { "epoch": 0.24, "grad_norm": 3.2069337368011475, "learning_rate": 1.7791030242204845e-05, "loss": 0.7066, "step": 9333 }, { "epoch": 0.24, "grad_norm": 1.2712329626083374, "learning_rate": 1.7790509802196864e-05, "loss": 0.7259, "step": 9334 }, { "epoch": 0.24, "grad_norm": 3.055979013442993, "learning_rate": 1.7789989308501577e-05, "loss": 0.4641, "step": 9335 }, { "epoch": 0.24, "grad_norm": 7.411863803863525, "learning_rate": 1.7789468761122582e-05, "loss": 0.6528, "step": 9336 }, { "epoch": 0.24, "grad_norm": 3.4879517555236816, "learning_rate": 1.778894816006346e-05, "loss": 0.5931, "step": 9337 }, { "epoch": 0.24, "grad_norm": 1.2612900733947754, "learning_rate": 1.7788427505327803e-05, "loss": 0.5081, "step": 9338 }, { "epoch": 0.24, "grad_norm": 2.615292549133301, "learning_rate": 1.7787906796919194e-05, "loss": 0.6297, "step": 9339 }, { "epoch": 0.24, "grad_norm": 3.3472530841827393, "learning_rate": 1.7787386034841227e-05, "loss": 0.8897, "step": 9340 }, { "epoch": 0.24, "grad_norm": 1.4301059246063232, "learning_rate": 1.7786865219097484e-05, "loss": 0.558, "step": 9341 }, { "epoch": 0.24, "grad_norm": 3.5752365589141846, "learning_rate": 1.7786344349691562e-05, "loss": 0.5453, "step": 9342 }, { "epoch": 0.24, "grad_norm": 1.6702078580856323, "learning_rate": 1.7785823426627048e-05, "loss": 0.6636, "step": 9343 }, { "epoch": 0.24, "grad_norm": 6.0419511795043945, "learning_rate": 1.7785302449907526e-05, "loss": 0.6002, "step": 9344 }, { "epoch": 0.24, "grad_norm": 2.0294692516326904, "learning_rate": 1.7784781419536592e-05, "loss": 0.7202, "step": 9345 }, { "epoch": 0.24, "grad_norm": 2.0299036502838135, "learning_rate": 1.7784260335517837e-05, "loss": 0.6348, "step": 9346 }, { "epoch": 0.24, "grad_norm": 1.9177438020706177, "learning_rate": 1.778373919785485e-05, "loss": 0.6051, "step": 9347 }, { "epoch": 0.24, "grad_norm": 5.354192733764648, "learning_rate": 1.778321800655122e-05, "loss": 0.8502, "step": 9348 }, { "epoch": 0.24, "grad_norm": 1.6541402339935303, "learning_rate": 1.7782696761610543e-05, "loss": 0.6833, "step": 9349 }, { "epoch": 0.24, "grad_norm": 3.9733357429504395, "learning_rate": 1.778217546303641e-05, "loss": 0.4794, "step": 9350 }, { "epoch": 0.24, "grad_norm": 1.7324174642562866, "learning_rate": 1.7781654110832415e-05, "loss": 0.7274, "step": 9351 }, { "epoch": 0.24, "grad_norm": 13.170720100402832, "learning_rate": 1.7781132705002143e-05, "loss": 0.5401, "step": 9352 }, { "epoch": 0.24, "grad_norm": 2.915571689605713, "learning_rate": 1.77806112455492e-05, "loss": 0.8186, "step": 9353 }, { "epoch": 0.24, "grad_norm": 2.0169270038604736, "learning_rate": 1.7780089732477168e-05, "loss": 0.7075, "step": 9354 }, { "epoch": 0.24, "grad_norm": 2.709076166152954, "learning_rate": 1.7779568165789648e-05, "loss": 0.5966, "step": 9355 }, { "epoch": 0.24, "grad_norm": 3.1264007091522217, "learning_rate": 1.7779046545490227e-05, "loss": 0.6206, "step": 9356 }, { "epoch": 0.24, "grad_norm": 1.7280431985855103, "learning_rate": 1.7778524871582508e-05, "loss": 0.6815, "step": 9357 }, { "epoch": 0.24, "grad_norm": 1.3678709268569946, "learning_rate": 1.7778003144070085e-05, "loss": 0.7516, "step": 9358 }, { "epoch": 0.24, "grad_norm": 7.282217502593994, "learning_rate": 1.7777481362956547e-05, "loss": 0.6543, "step": 9359 }, { "epoch": 0.24, "grad_norm": 1.7181386947631836, "learning_rate": 1.7776959528245498e-05, "loss": 0.5822, "step": 9360 }, { "epoch": 0.24, "grad_norm": 3.8700008392333984, "learning_rate": 1.7776437639940526e-05, "loss": 0.7582, "step": 9361 }, { "epoch": 0.24, "grad_norm": 3.1598358154296875, "learning_rate": 1.777591569804523e-05, "loss": 0.7351, "step": 9362 }, { "epoch": 0.24, "grad_norm": 1.6285666227340698, "learning_rate": 1.777539370256321e-05, "loss": 0.6007, "step": 9363 }, { "epoch": 0.24, "grad_norm": 1.5011826753616333, "learning_rate": 1.7774871653498064e-05, "loss": 0.6419, "step": 9364 }, { "epoch": 0.24, "grad_norm": 3.3693506717681885, "learning_rate": 1.7774349550853384e-05, "loss": 0.6361, "step": 9365 }, { "epoch": 0.24, "grad_norm": 2.557547092437744, "learning_rate": 1.7773827394632775e-05, "loss": 0.6942, "step": 9366 }, { "epoch": 0.24, "grad_norm": 2.651364803314209, "learning_rate": 1.777330518483983e-05, "loss": 0.5579, "step": 9367 }, { "epoch": 0.24, "grad_norm": 9.522406578063965, "learning_rate": 1.777278292147815e-05, "loss": 0.578, "step": 9368 }, { "epoch": 0.24, "grad_norm": 7.136559963226318, "learning_rate": 1.7772260604551328e-05, "loss": 0.7098, "step": 9369 }, { "epoch": 0.24, "grad_norm": 1.14838445186615, "learning_rate": 1.7771738234062974e-05, "loss": 0.6651, "step": 9370 }, { "epoch": 0.24, "grad_norm": 2.4108407497406006, "learning_rate": 1.777121581001668e-05, "loss": 0.6463, "step": 9371 }, { "epoch": 0.24, "grad_norm": 2.5458109378814697, "learning_rate": 1.777069333241605e-05, "loss": 0.6986, "step": 9372 }, { "epoch": 0.24, "grad_norm": 4.496621131896973, "learning_rate": 1.7770170801264687e-05, "loss": 0.6328, "step": 9373 }, { "epoch": 0.24, "grad_norm": 1.3427590131759644, "learning_rate": 1.7769648216566187e-05, "loss": 0.5892, "step": 9374 }, { "epoch": 0.24, "grad_norm": 13.594757080078125, "learning_rate": 1.776912557832415e-05, "loss": 0.5166, "step": 9375 }, { "epoch": 0.24, "grad_norm": 2.234416961669922, "learning_rate": 1.7768602886542183e-05, "loss": 0.6738, "step": 9376 }, { "epoch": 0.24, "grad_norm": 3.850216865539551, "learning_rate": 1.7768080141223884e-05, "loss": 0.6823, "step": 9377 }, { "epoch": 0.24, "grad_norm": 2.7064127922058105, "learning_rate": 1.776755734237286e-05, "loss": 0.844, "step": 9378 }, { "epoch": 0.24, "grad_norm": 5.712628364562988, "learning_rate": 1.7767034489992708e-05, "loss": 0.5009, "step": 9379 }, { "epoch": 0.24, "grad_norm": 2.0805115699768066, "learning_rate": 1.7766511584087032e-05, "loss": 0.5653, "step": 9380 }, { "epoch": 0.24, "grad_norm": 2.1349375247955322, "learning_rate": 1.7765988624659442e-05, "loss": 0.4652, "step": 9381 }, { "epoch": 0.24, "grad_norm": 2.1476705074310303, "learning_rate": 1.7765465611713536e-05, "loss": 0.6895, "step": 9382 }, { "epoch": 0.24, "grad_norm": 5.934951305389404, "learning_rate": 1.776494254525292e-05, "loss": 0.7225, "step": 9383 }, { "epoch": 0.24, "grad_norm": 5.330917835235596, "learning_rate": 1.77644194252812e-05, "loss": 0.7505, "step": 9384 }, { "epoch": 0.24, "grad_norm": 1.7296042442321777, "learning_rate": 1.7763896251801974e-05, "loss": 0.6631, "step": 9385 }, { "epoch": 0.24, "grad_norm": 1.575953483581543, "learning_rate": 1.7763373024818858e-05, "loss": 0.5532, "step": 9386 }, { "epoch": 0.24, "grad_norm": 3.9117090702056885, "learning_rate": 1.776284974433545e-05, "loss": 0.7907, "step": 9387 }, { "epoch": 0.24, "grad_norm": 1.2447429895401, "learning_rate": 1.7762326410355363e-05, "loss": 0.6523, "step": 9388 }, { "epoch": 0.24, "grad_norm": 1.8703312873840332, "learning_rate": 1.7761803022882197e-05, "loss": 0.5713, "step": 9389 }, { "epoch": 0.24, "grad_norm": 3.5096914768218994, "learning_rate": 1.7761279581919558e-05, "loss": 0.9327, "step": 9390 }, { "epoch": 0.24, "grad_norm": 2.9922006130218506, "learning_rate": 1.7760756087471057e-05, "loss": 0.7622, "step": 9391 }, { "epoch": 0.24, "grad_norm": 3.157895803451538, "learning_rate": 1.77602325395403e-05, "loss": 0.6951, "step": 9392 }, { "epoch": 0.24, "grad_norm": 2.372471332550049, "learning_rate": 1.77597089381309e-05, "loss": 0.515, "step": 9393 }, { "epoch": 0.24, "grad_norm": 2.819004774093628, "learning_rate": 1.775918528324646e-05, "loss": 0.7347, "step": 9394 }, { "epoch": 0.24, "grad_norm": 2.770548105239868, "learning_rate": 1.7758661574890585e-05, "loss": 0.5413, "step": 9395 }, { "epoch": 0.24, "grad_norm": 1.2579636573791504, "learning_rate": 1.7758137813066892e-05, "loss": 0.6888, "step": 9396 }, { "epoch": 0.24, "grad_norm": 1.6211296319961548, "learning_rate": 1.7757613997778987e-05, "loss": 0.5942, "step": 9397 }, { "epoch": 0.24, "grad_norm": 2.4336256980895996, "learning_rate": 1.7757090129030483e-05, "loss": 0.6589, "step": 9398 }, { "epoch": 0.24, "grad_norm": 1.560250997543335, "learning_rate": 1.7756566206824987e-05, "loss": 0.7229, "step": 9399 }, { "epoch": 0.24, "grad_norm": 2.3086259365081787, "learning_rate": 1.7756042231166105e-05, "loss": 0.6605, "step": 9400 }, { "epoch": 0.24, "grad_norm": 3.236816644668579, "learning_rate": 1.775551820205746e-05, "loss": 0.616, "step": 9401 }, { "epoch": 0.24, "grad_norm": 3.600428819656372, "learning_rate": 1.775499411950265e-05, "loss": 0.7542, "step": 9402 }, { "epoch": 0.24, "grad_norm": 2.2897493839263916, "learning_rate": 1.7754469983505292e-05, "loss": 0.5004, "step": 9403 }, { "epoch": 0.24, "grad_norm": 1.438506841659546, "learning_rate": 1.7753945794069003e-05, "loss": 0.6149, "step": 9404 }, { "epoch": 0.24, "grad_norm": 1.7961875200271606, "learning_rate": 1.7753421551197386e-05, "loss": 0.3533, "step": 9405 }, { "epoch": 0.24, "grad_norm": 2.9685628414154053, "learning_rate": 1.7752897254894063e-05, "loss": 0.5811, "step": 9406 }, { "epoch": 0.24, "grad_norm": 2.391780376434326, "learning_rate": 1.7752372905162643e-05, "loss": 0.6566, "step": 9407 }, { "epoch": 0.24, "grad_norm": 3.2482893466949463, "learning_rate": 1.7751848502006737e-05, "loss": 0.7422, "step": 9408 }, { "epoch": 0.24, "grad_norm": 1.1971046924591064, "learning_rate": 1.7751324045429965e-05, "loss": 0.4632, "step": 9409 }, { "epoch": 0.24, "grad_norm": 2.148435115814209, "learning_rate": 1.7750799535435933e-05, "loss": 0.6904, "step": 9410 }, { "epoch": 0.24, "grad_norm": 1.5528901815414429, "learning_rate": 1.7750274972028265e-05, "loss": 0.6487, "step": 9411 }, { "epoch": 0.24, "grad_norm": 1.990151286125183, "learning_rate": 1.774975035521057e-05, "loss": 0.5371, "step": 9412 }, { "epoch": 0.24, "grad_norm": 3.222468614578247, "learning_rate": 1.7749225684986462e-05, "loss": 0.4339, "step": 9413 }, { "epoch": 0.24, "grad_norm": 2.423647403717041, "learning_rate": 1.774870096135956e-05, "loss": 0.569, "step": 9414 }, { "epoch": 0.24, "grad_norm": 2.0590999126434326, "learning_rate": 1.774817618433348e-05, "loss": 0.5999, "step": 9415 }, { "epoch": 0.24, "grad_norm": 1.8741743564605713, "learning_rate": 1.7747651353911838e-05, "loss": 0.7684, "step": 9416 }, { "epoch": 0.24, "grad_norm": 4.225874900817871, "learning_rate": 1.7747126470098247e-05, "loss": 0.8845, "step": 9417 }, { "epoch": 0.24, "grad_norm": 1.741803765296936, "learning_rate": 1.774660153289633e-05, "loss": 0.6835, "step": 9418 }, { "epoch": 0.24, "grad_norm": 3.0247559547424316, "learning_rate": 1.7746076542309704e-05, "loss": 0.5749, "step": 9419 }, { "epoch": 0.24, "grad_norm": 2.2148520946502686, "learning_rate": 1.774555149834198e-05, "loss": 0.6853, "step": 9420 }, { "epoch": 0.24, "grad_norm": 4.446715354919434, "learning_rate": 1.7745026400996786e-05, "loss": 0.4457, "step": 9421 }, { "epoch": 0.24, "grad_norm": 1.4235769510269165, "learning_rate": 1.7744501250277736e-05, "loss": 0.6251, "step": 9422 }, { "epoch": 0.24, "grad_norm": 2.4052999019622803, "learning_rate": 1.7743976046188445e-05, "loss": 0.5271, "step": 9423 }, { "epoch": 0.24, "grad_norm": 1.5350258350372314, "learning_rate": 1.774345078873254e-05, "loss": 0.4422, "step": 9424 }, { "epoch": 0.24, "grad_norm": 1.6879712343215942, "learning_rate": 1.7742925477913635e-05, "loss": 0.5495, "step": 9425 }, { "epoch": 0.24, "grad_norm": 2.4345266819000244, "learning_rate": 1.7742400113735354e-05, "loss": 0.8002, "step": 9426 }, { "epoch": 0.24, "grad_norm": 1.7224267721176147, "learning_rate": 1.7741874696201312e-05, "loss": 0.7139, "step": 9427 }, { "epoch": 0.24, "grad_norm": 1.6196008920669556, "learning_rate": 1.7741349225315138e-05, "loss": 0.4652, "step": 9428 }, { "epoch": 0.24, "grad_norm": 1.9410253763198853, "learning_rate": 1.7740823701080447e-05, "loss": 0.7278, "step": 9429 }, { "epoch": 0.24, "grad_norm": 2.084336757659912, "learning_rate": 1.774029812350086e-05, "loss": 0.5576, "step": 9430 }, { "epoch": 0.24, "grad_norm": 1.742354393005371, "learning_rate": 1.7739772492580006e-05, "loss": 0.7085, "step": 9431 }, { "epoch": 0.24, "grad_norm": 8.392521858215332, "learning_rate": 1.77392468083215e-05, "loss": 0.4793, "step": 9432 }, { "epoch": 0.24, "grad_norm": 3.270397186279297, "learning_rate": 1.7738721070728966e-05, "loss": 0.91, "step": 9433 }, { "epoch": 0.24, "grad_norm": 1.3570539951324463, "learning_rate": 1.773819527980603e-05, "loss": 0.6973, "step": 9434 }, { "epoch": 0.24, "grad_norm": 2.1487882137298584, "learning_rate": 1.773766943555631e-05, "loss": 0.5249, "step": 9435 }, { "epoch": 0.24, "grad_norm": 4.532017707824707, "learning_rate": 1.773714353798344e-05, "loss": 0.7444, "step": 9436 }, { "epoch": 0.24, "grad_norm": 2.642568349838257, "learning_rate": 1.7736617587091033e-05, "loss": 0.6192, "step": 9437 }, { "epoch": 0.24, "grad_norm": 2.575930595397949, "learning_rate": 1.773609158288272e-05, "loss": 0.6551, "step": 9438 }, { "epoch": 0.24, "grad_norm": 4.846686363220215, "learning_rate": 1.7735565525362127e-05, "loss": 0.8754, "step": 9439 }, { "epoch": 0.24, "grad_norm": 1.5760866403579712, "learning_rate": 1.773503941453287e-05, "loss": 0.6282, "step": 9440 }, { "epoch": 0.24, "grad_norm": 2.2709262371063232, "learning_rate": 1.7734513250398586e-05, "loss": 0.5933, "step": 9441 }, { "epoch": 0.24, "grad_norm": 1.6080665588378906, "learning_rate": 1.7733987032962895e-05, "loss": 0.5599, "step": 9442 }, { "epoch": 0.24, "grad_norm": 4.469772815704346, "learning_rate": 1.7733460762229424e-05, "loss": 0.7662, "step": 9443 }, { "epoch": 0.24, "grad_norm": 1.4784144163131714, "learning_rate": 1.7732934438201796e-05, "loss": 0.5952, "step": 9444 }, { "epoch": 0.24, "grad_norm": 2.469660997390747, "learning_rate": 1.773240806088365e-05, "loss": 0.6851, "step": 9445 }, { "epoch": 0.24, "grad_norm": 1.246623158454895, "learning_rate": 1.77318816302786e-05, "loss": 0.6329, "step": 9446 }, { "epoch": 0.24, "grad_norm": 1.6200759410858154, "learning_rate": 1.773135514639028e-05, "loss": 0.6128, "step": 9447 }, { "epoch": 0.24, "grad_norm": 1.4518779516220093, "learning_rate": 1.7730828609222318e-05, "loss": 0.708, "step": 9448 }, { "epoch": 0.24, "grad_norm": 3.625976800918579, "learning_rate": 1.7730302018778345e-05, "loss": 0.576, "step": 9449 }, { "epoch": 0.24, "grad_norm": 2.119231939315796, "learning_rate": 1.7729775375061984e-05, "loss": 0.7748, "step": 9450 }, { "epoch": 0.24, "grad_norm": 1.4124140739440918, "learning_rate": 1.772924867807687e-05, "loss": 0.4354, "step": 9451 }, { "epoch": 0.24, "grad_norm": 1.3982760906219482, "learning_rate": 1.7728721927826625e-05, "loss": 0.4693, "step": 9452 }, { "epoch": 0.24, "grad_norm": 3.268909215927124, "learning_rate": 1.772819512431489e-05, "loss": 0.6308, "step": 9453 }, { "epoch": 0.24, "grad_norm": 1.409113883972168, "learning_rate": 1.7727668267545282e-05, "loss": 0.554, "step": 9454 }, { "epoch": 0.24, "grad_norm": 5.017343044281006, "learning_rate": 1.772714135752145e-05, "loss": 0.6757, "step": 9455 }, { "epoch": 0.24, "grad_norm": 3.1775898933410645, "learning_rate": 1.7726614394247005e-05, "loss": 0.6775, "step": 9456 }, { "epoch": 0.24, "grad_norm": 1.6787959337234497, "learning_rate": 1.7726087377725595e-05, "loss": 0.481, "step": 9457 }, { "epoch": 0.24, "grad_norm": 9.755888938903809, "learning_rate": 1.772556030796084e-05, "loss": 0.6217, "step": 9458 }, { "epoch": 0.24, "grad_norm": 2.393767833709717, "learning_rate": 1.772503318495638e-05, "loss": 0.5775, "step": 9459 }, { "epoch": 0.24, "grad_norm": 3.6615712642669678, "learning_rate": 1.7724506008715843e-05, "loss": 0.6963, "step": 9460 }, { "epoch": 0.24, "grad_norm": 2.6272566318511963, "learning_rate": 1.7723978779242862e-05, "loss": 0.6482, "step": 9461 }, { "epoch": 0.24, "grad_norm": 1.2203127145767212, "learning_rate": 1.7723451496541075e-05, "loss": 0.4935, "step": 9462 }, { "epoch": 0.24, "grad_norm": 2.028154134750366, "learning_rate": 1.772292416061411e-05, "loss": 0.6752, "step": 9463 }, { "epoch": 0.24, "grad_norm": 1.4458750486373901, "learning_rate": 1.7722396771465605e-05, "loss": 0.5655, "step": 9464 }, { "epoch": 0.24, "grad_norm": 13.333402633666992, "learning_rate": 1.7721869329099194e-05, "loss": 0.8077, "step": 9465 }, { "epoch": 0.24, "grad_norm": 5.002239227294922, "learning_rate": 1.772134183351851e-05, "loss": 0.7491, "step": 9466 }, { "epoch": 0.24, "grad_norm": 3.0572056770324707, "learning_rate": 1.7720814284727187e-05, "loss": 0.6782, "step": 9467 }, { "epoch": 0.24, "grad_norm": 1.5870258808135986, "learning_rate": 1.7720286682728866e-05, "loss": 0.6614, "step": 9468 }, { "epoch": 0.24, "grad_norm": 1.7387315034866333, "learning_rate": 1.771975902752718e-05, "loss": 0.6633, "step": 9469 }, { "epoch": 0.24, "grad_norm": 2.731499195098877, "learning_rate": 1.771923131912576e-05, "loss": 0.8068, "step": 9470 }, { "epoch": 0.24, "grad_norm": 1.5675837993621826, "learning_rate": 1.7718703557528255e-05, "loss": 0.5306, "step": 9471 }, { "epoch": 0.24, "grad_norm": 1.9080358743667603, "learning_rate": 1.7718175742738286e-05, "loss": 0.5152, "step": 9472 }, { "epoch": 0.24, "grad_norm": 1.371494174003601, "learning_rate": 1.7717647874759503e-05, "loss": 0.5642, "step": 9473 }, { "epoch": 0.24, "grad_norm": 1.8827974796295166, "learning_rate": 1.7717119953595542e-05, "loss": 0.7444, "step": 9474 }, { "epoch": 0.24, "grad_norm": 4.329324722290039, "learning_rate": 1.7716591979250035e-05, "loss": 0.6829, "step": 9475 }, { "epoch": 0.24, "grad_norm": 2.229275941848755, "learning_rate": 1.7716063951726626e-05, "loss": 0.5657, "step": 9476 }, { "epoch": 0.24, "grad_norm": 1.5855480432510376, "learning_rate": 1.771553587102895e-05, "loss": 0.47, "step": 9477 }, { "epoch": 0.24, "grad_norm": 2.827461004257202, "learning_rate": 1.771500773716065e-05, "loss": 0.67, "step": 9478 }, { "epoch": 0.24, "grad_norm": 1.8742473125457764, "learning_rate": 1.7714479550125362e-05, "loss": 0.6497, "step": 9479 }, { "epoch": 0.24, "grad_norm": 4.626952648162842, "learning_rate": 1.771395130992673e-05, "loss": 0.6504, "step": 9480 }, { "epoch": 0.24, "grad_norm": 1.5530381202697754, "learning_rate": 1.771342301656839e-05, "loss": 0.4992, "step": 9481 }, { "epoch": 0.24, "grad_norm": 1.2569653987884521, "learning_rate": 1.7712894670053983e-05, "loss": 0.6379, "step": 9482 }, { "epoch": 0.24, "grad_norm": 4.5371832847595215, "learning_rate": 1.7712366270387154e-05, "loss": 0.5872, "step": 9483 }, { "epoch": 0.24, "grad_norm": 4.425838470458984, "learning_rate": 1.7711837817571545e-05, "loss": 0.8524, "step": 9484 }, { "epoch": 0.24, "grad_norm": 2.987036943435669, "learning_rate": 1.771130931161079e-05, "loss": 0.6669, "step": 9485 }, { "epoch": 0.24, "grad_norm": 2.176266670227051, "learning_rate": 1.7710780752508538e-05, "loss": 0.5534, "step": 9486 }, { "epoch": 0.24, "grad_norm": 1.936485767364502, "learning_rate": 1.771025214026843e-05, "loss": 0.7171, "step": 9487 }, { "epoch": 0.24, "grad_norm": 4.02123498916626, "learning_rate": 1.7709723474894108e-05, "loss": 0.565, "step": 9488 }, { "epoch": 0.24, "grad_norm": 1.759577751159668, "learning_rate": 1.7709194756389214e-05, "loss": 0.6387, "step": 9489 }, { "epoch": 0.24, "grad_norm": 1.4338080883026123, "learning_rate": 1.770866598475739e-05, "loss": 0.5976, "step": 9490 }, { "epoch": 0.24, "grad_norm": 2.938694953918457, "learning_rate": 1.770813716000229e-05, "loss": 0.6357, "step": 9491 }, { "epoch": 0.24, "grad_norm": 1.4459773302078247, "learning_rate": 1.7707608282127547e-05, "loss": 0.4049, "step": 9492 }, { "epoch": 0.24, "grad_norm": 1.7097262144088745, "learning_rate": 1.7707079351136815e-05, "loss": 0.7119, "step": 9493 }, { "epoch": 0.24, "grad_norm": 1.7969858646392822, "learning_rate": 1.770655036703373e-05, "loss": 0.645, "step": 9494 }, { "epoch": 0.24, "grad_norm": 1.3360646963119507, "learning_rate": 1.770602132982194e-05, "loss": 0.4785, "step": 9495 }, { "epoch": 0.24, "grad_norm": 1.9896900653839111, "learning_rate": 1.7705492239505098e-05, "loss": 0.5792, "step": 9496 }, { "epoch": 0.24, "grad_norm": 1.4318069219589233, "learning_rate": 1.770496309608684e-05, "loss": 0.4738, "step": 9497 }, { "epoch": 0.24, "grad_norm": 2.2925782203674316, "learning_rate": 1.770443389957082e-05, "loss": 0.6056, "step": 9498 }, { "epoch": 0.24, "grad_norm": 1.974080204963684, "learning_rate": 1.770390464996068e-05, "loss": 0.473, "step": 9499 }, { "epoch": 0.24, "grad_norm": 2.9980885982513428, "learning_rate": 1.7703375347260068e-05, "loss": 0.5164, "step": 9500 }, { "epoch": 0.24, "grad_norm": 4.5273847579956055, "learning_rate": 1.7702845991472633e-05, "loss": 0.6163, "step": 9501 }, { "epoch": 0.24, "grad_norm": 2.2800302505493164, "learning_rate": 1.7702316582602026e-05, "loss": 0.6496, "step": 9502 }, { "epoch": 0.24, "grad_norm": 1.322041392326355, "learning_rate": 1.7701787120651893e-05, "loss": 0.5187, "step": 9503 }, { "epoch": 0.24, "grad_norm": 1.5586726665496826, "learning_rate": 1.770125760562588e-05, "loss": 0.7354, "step": 9504 }, { "epoch": 0.24, "grad_norm": 8.431245803833008, "learning_rate": 1.7700728037527633e-05, "loss": 0.547, "step": 9505 }, { "epoch": 0.24, "grad_norm": 1.767133116722107, "learning_rate": 1.7700198416360815e-05, "loss": 0.5999, "step": 9506 }, { "epoch": 0.24, "grad_norm": 3.647972822189331, "learning_rate": 1.769966874212906e-05, "loss": 0.5658, "step": 9507 }, { "epoch": 0.24, "grad_norm": 1.8186391592025757, "learning_rate": 1.7699139014836027e-05, "loss": 0.6319, "step": 9508 }, { "epoch": 0.24, "grad_norm": 3.393876552581787, "learning_rate": 1.769860923448537e-05, "loss": 0.6066, "step": 9509 }, { "epoch": 0.24, "grad_norm": 2.554492473602295, "learning_rate": 1.769807940108073e-05, "loss": 0.6264, "step": 9510 }, { "epoch": 0.24, "grad_norm": 8.408620834350586, "learning_rate": 1.7697549514625765e-05, "loss": 0.6464, "step": 9511 }, { "epoch": 0.24, "grad_norm": 1.4949851036071777, "learning_rate": 1.7697019575124125e-05, "loss": 0.7237, "step": 9512 }, { "epoch": 0.24, "grad_norm": 2.5965652465820312, "learning_rate": 1.769648958257946e-05, "loss": 0.5548, "step": 9513 }, { "epoch": 0.24, "grad_norm": 2.4522228240966797, "learning_rate": 1.7695959536995425e-05, "loss": 0.6814, "step": 9514 }, { "epoch": 0.24, "grad_norm": 3.265191078186035, "learning_rate": 1.7695429438375674e-05, "loss": 0.7371, "step": 9515 }, { "epoch": 0.24, "grad_norm": 2.336859941482544, "learning_rate": 1.7694899286723857e-05, "loss": 0.612, "step": 9516 }, { "epoch": 0.24, "grad_norm": 2.005110025405884, "learning_rate": 1.7694369082043628e-05, "loss": 0.6867, "step": 9517 }, { "epoch": 0.24, "grad_norm": 3.2822017669677734, "learning_rate": 1.769383882433864e-05, "loss": 0.6443, "step": 9518 }, { "epoch": 0.24, "grad_norm": 2.570802688598633, "learning_rate": 1.769330851361255e-05, "loss": 0.7252, "step": 9519 }, { "epoch": 0.24, "grad_norm": 3.0797815322875977, "learning_rate": 1.7692778149869013e-05, "loss": 0.4894, "step": 9520 }, { "epoch": 0.24, "grad_norm": 2.174121141433716, "learning_rate": 1.769224773311168e-05, "loss": 0.6268, "step": 9521 }, { "epoch": 0.24, "grad_norm": 3.470153570175171, "learning_rate": 1.769171726334421e-05, "loss": 0.5327, "step": 9522 }, { "epoch": 0.24, "grad_norm": 2.4521069526672363, "learning_rate": 1.7691186740570253e-05, "loss": 0.7596, "step": 9523 }, { "epoch": 0.24, "grad_norm": 3.496925115585327, "learning_rate": 1.769065616479347e-05, "loss": 0.705, "step": 9524 }, { "epoch": 0.24, "grad_norm": 5.117413520812988, "learning_rate": 1.769012553601752e-05, "loss": 0.7481, "step": 9525 }, { "epoch": 0.24, "grad_norm": 2.096299409866333, "learning_rate": 1.7689594854246054e-05, "loss": 0.6386, "step": 9526 }, { "epoch": 0.24, "grad_norm": 5.942174911499023, "learning_rate": 1.768906411948273e-05, "loss": 0.7281, "step": 9527 }, { "epoch": 0.24, "grad_norm": 6.944858551025391, "learning_rate": 1.768853333173121e-05, "loss": 0.6047, "step": 9528 }, { "epoch": 0.24, "grad_norm": 1.9096531867980957, "learning_rate": 1.7688002490995146e-05, "loss": 0.4052, "step": 9529 }, { "epoch": 0.24, "grad_norm": 1.5304291248321533, "learning_rate": 1.7687471597278202e-05, "loss": 0.6554, "step": 9530 }, { "epoch": 0.24, "grad_norm": 1.5659189224243164, "learning_rate": 1.768694065058403e-05, "loss": 0.5698, "step": 9531 }, { "epoch": 0.24, "grad_norm": 1.9523123502731323, "learning_rate": 1.7686409650916294e-05, "loss": 0.7471, "step": 9532 }, { "epoch": 0.24, "grad_norm": 2.386838912963867, "learning_rate": 1.7685878598278652e-05, "loss": 0.5085, "step": 9533 }, { "epoch": 0.24, "grad_norm": 1.307648777961731, "learning_rate": 1.7685347492674762e-05, "loss": 0.6347, "step": 9534 }, { "epoch": 0.24, "grad_norm": 2.9831643104553223, "learning_rate": 1.7684816334108288e-05, "loss": 0.6382, "step": 9535 }, { "epoch": 0.24, "grad_norm": 3.7467243671417236, "learning_rate": 1.7684285122582884e-05, "loss": 0.6698, "step": 9536 }, { "epoch": 0.24, "grad_norm": 1.5291863679885864, "learning_rate": 1.768375385810222e-05, "loss": 0.6557, "step": 9537 }, { "epoch": 0.24, "grad_norm": 6.211865425109863, "learning_rate": 1.7683222540669947e-05, "loss": 0.5566, "step": 9538 }, { "epoch": 0.24, "grad_norm": 1.6225895881652832, "learning_rate": 1.7682691170289734e-05, "loss": 0.6675, "step": 9539 }, { "epoch": 0.24, "grad_norm": 2.848137617111206, "learning_rate": 1.7682159746965243e-05, "loss": 0.6339, "step": 9540 }, { "epoch": 0.24, "grad_norm": 1.4770256280899048, "learning_rate": 1.7681628270700126e-05, "loss": 0.6815, "step": 9541 }, { "epoch": 0.24, "grad_norm": 4.8392720222473145, "learning_rate": 1.7681096741498058e-05, "loss": 0.8265, "step": 9542 }, { "epoch": 0.24, "grad_norm": 2.1816182136535645, "learning_rate": 1.7680565159362695e-05, "loss": 0.5329, "step": 9543 }, { "epoch": 0.24, "grad_norm": 3.9692225456237793, "learning_rate": 1.7680033524297704e-05, "loss": 0.5578, "step": 9544 }, { "epoch": 0.24, "grad_norm": 4.6135945320129395, "learning_rate": 1.7679501836306747e-05, "loss": 0.5738, "step": 9545 }, { "epoch": 0.24, "grad_norm": 4.27571964263916, "learning_rate": 1.7678970095393487e-05, "loss": 0.7213, "step": 9546 }, { "epoch": 0.24, "grad_norm": 2.4582345485687256, "learning_rate": 1.767843830156159e-05, "loss": 0.7339, "step": 9547 }, { "epoch": 0.24, "grad_norm": 1.94883394241333, "learning_rate": 1.767790645481472e-05, "loss": 0.6408, "step": 9548 }, { "epoch": 0.24, "grad_norm": 1.3333264589309692, "learning_rate": 1.767737455515654e-05, "loss": 0.546, "step": 9549 }, { "epoch": 0.24, "grad_norm": 3.4672839641571045, "learning_rate": 1.7676842602590723e-05, "loss": 0.7636, "step": 9550 }, { "epoch": 0.24, "grad_norm": 1.786867380142212, "learning_rate": 1.767631059712093e-05, "loss": 0.5697, "step": 9551 }, { "epoch": 0.24, "grad_norm": 1.8795751333236694, "learning_rate": 1.7675778538750825e-05, "loss": 0.5166, "step": 9552 }, { "epoch": 0.24, "grad_norm": 8.312231063842773, "learning_rate": 1.7675246427484073e-05, "loss": 0.5787, "step": 9553 }, { "epoch": 0.24, "grad_norm": 3.891474723815918, "learning_rate": 1.767471426332435e-05, "loss": 0.7017, "step": 9554 }, { "epoch": 0.24, "grad_norm": 1.6536787748336792, "learning_rate": 1.7674182046275316e-05, "loss": 0.4946, "step": 9555 }, { "epoch": 0.24, "grad_norm": 1.9268996715545654, "learning_rate": 1.767364977634064e-05, "loss": 0.6027, "step": 9556 }, { "epoch": 0.24, "grad_norm": 2.508183240890503, "learning_rate": 1.767311745352399e-05, "loss": 0.5051, "step": 9557 }, { "epoch": 0.24, "grad_norm": 2.19588303565979, "learning_rate": 1.767258507782904e-05, "loss": 0.5193, "step": 9558 }, { "epoch": 0.25, "grad_norm": 3.427844762802124, "learning_rate": 1.7672052649259446e-05, "loss": 0.7525, "step": 9559 }, { "epoch": 0.25, "grad_norm": 1.9931596517562866, "learning_rate": 1.767152016781889e-05, "loss": 0.7178, "step": 9560 }, { "epoch": 0.25, "grad_norm": 1.5496952533721924, "learning_rate": 1.7670987633511037e-05, "loss": 0.6314, "step": 9561 }, { "epoch": 0.25, "grad_norm": 1.1836636066436768, "learning_rate": 1.767045504633956e-05, "loss": 0.3656, "step": 9562 }, { "epoch": 0.25, "grad_norm": 2.604348659515381, "learning_rate": 1.766992240630812e-05, "loss": 0.7949, "step": 9563 }, { "epoch": 0.25, "grad_norm": 4.214980125427246, "learning_rate": 1.7669389713420395e-05, "loss": 0.6885, "step": 9564 }, { "epoch": 0.25, "grad_norm": 3.350666046142578, "learning_rate": 1.7668856967680054e-05, "loss": 0.6998, "step": 9565 }, { "epoch": 0.25, "grad_norm": 2.248655319213867, "learning_rate": 1.7668324169090772e-05, "loss": 0.7495, "step": 9566 }, { "epoch": 0.25, "grad_norm": 3.7620139122009277, "learning_rate": 1.7667791317656216e-05, "loss": 0.7333, "step": 9567 }, { "epoch": 0.25, "grad_norm": 2.8153960704803467, "learning_rate": 1.766725841338006e-05, "loss": 0.6555, "step": 9568 }, { "epoch": 0.25, "grad_norm": 1.127126932144165, "learning_rate": 1.7666725456265972e-05, "loss": 0.598, "step": 9569 }, { "epoch": 0.25, "grad_norm": 2.6291794776916504, "learning_rate": 1.7666192446317633e-05, "loss": 0.6826, "step": 9570 }, { "epoch": 0.25, "grad_norm": 1.7877036333084106, "learning_rate": 1.766565938353871e-05, "loss": 0.644, "step": 9571 }, { "epoch": 0.25, "grad_norm": 2.1576366424560547, "learning_rate": 1.766512626793288e-05, "loss": 0.6406, "step": 9572 }, { "epoch": 0.25, "grad_norm": 1.9105334281921387, "learning_rate": 1.7664593099503818e-05, "loss": 0.4806, "step": 9573 }, { "epoch": 0.25, "grad_norm": 3.2031795978546143, "learning_rate": 1.7664059878255192e-05, "loss": 0.8142, "step": 9574 }, { "epoch": 0.25, "grad_norm": 7.081739902496338, "learning_rate": 1.766352660419068e-05, "loss": 0.778, "step": 9575 }, { "epoch": 0.25, "grad_norm": 8.699617385864258, "learning_rate": 1.766299327731396e-05, "loss": 0.8052, "step": 9576 }, { "epoch": 0.25, "grad_norm": 2.507035255432129, "learning_rate": 1.7662459897628702e-05, "loss": 0.6915, "step": 9577 }, { "epoch": 0.25, "grad_norm": 1.868115782737732, "learning_rate": 1.7661926465138587e-05, "loss": 0.5235, "step": 9578 }, { "epoch": 0.25, "grad_norm": 1.294724941253662, "learning_rate": 1.7661392979847288e-05, "loss": 0.6162, "step": 9579 }, { "epoch": 0.25, "grad_norm": 2.369260787963867, "learning_rate": 1.7660859441758484e-05, "loss": 0.6013, "step": 9580 }, { "epoch": 0.25, "grad_norm": 6.552789211273193, "learning_rate": 1.7660325850875844e-05, "loss": 0.6992, "step": 9581 }, { "epoch": 0.25, "grad_norm": 2.212308168411255, "learning_rate": 1.7659792207203054e-05, "loss": 0.6638, "step": 9582 }, { "epoch": 0.25, "grad_norm": 1.959112286567688, "learning_rate": 1.7659258510743788e-05, "loss": 0.5428, "step": 9583 }, { "epoch": 0.25, "grad_norm": 2.655686378479004, "learning_rate": 1.7658724761501725e-05, "loss": 0.7218, "step": 9584 }, { "epoch": 0.25, "grad_norm": 2.640202045440674, "learning_rate": 1.765819095948054e-05, "loss": 0.6721, "step": 9585 }, { "epoch": 0.25, "grad_norm": 5.049601078033447, "learning_rate": 1.765765710468392e-05, "loss": 0.6049, "step": 9586 }, { "epoch": 0.25, "grad_norm": 2.474353075027466, "learning_rate": 1.765712319711553e-05, "loss": 0.6208, "step": 9587 }, { "epoch": 0.25, "grad_norm": 2.0631918907165527, "learning_rate": 1.7656589236779064e-05, "loss": 0.714, "step": 9588 }, { "epoch": 0.25, "grad_norm": 1.2613390684127808, "learning_rate": 1.7656055223678195e-05, "loss": 0.6794, "step": 9589 }, { "epoch": 0.25, "grad_norm": 1.6560910940170288, "learning_rate": 1.7655521157816602e-05, "loss": 0.5505, "step": 9590 }, { "epoch": 0.25, "grad_norm": 2.76505708694458, "learning_rate": 1.7654987039197968e-05, "loss": 0.5626, "step": 9591 }, { "epoch": 0.25, "grad_norm": 1.8109709024429321, "learning_rate": 1.765445286782597e-05, "loss": 0.7948, "step": 9592 }, { "epoch": 0.25, "grad_norm": 4.8274970054626465, "learning_rate": 1.7653918643704294e-05, "loss": 0.7446, "step": 9593 }, { "epoch": 0.25, "grad_norm": 2.432859420776367, "learning_rate": 1.7653384366836616e-05, "loss": 0.6479, "step": 9594 }, { "epoch": 0.25, "grad_norm": 3.5746347904205322, "learning_rate": 1.765285003722663e-05, "loss": 0.6127, "step": 9595 }, { "epoch": 0.25, "grad_norm": 2.782848358154297, "learning_rate": 1.7652315654878e-05, "loss": 0.6082, "step": 9596 }, { "epoch": 0.25, "grad_norm": 1.2871731519699097, "learning_rate": 1.7651781219794422e-05, "loss": 0.5238, "step": 9597 }, { "epoch": 0.25, "grad_norm": 1.7073291540145874, "learning_rate": 1.7651246731979575e-05, "loss": 0.6335, "step": 9598 }, { "epoch": 0.25, "grad_norm": 2.465909957885742, "learning_rate": 1.7650712191437144e-05, "loss": 0.6161, "step": 9599 }, { "epoch": 0.25, "grad_norm": 3.322902202606201, "learning_rate": 1.7650177598170808e-05, "loss": 0.5995, "step": 9600 }, { "epoch": 0.25, "grad_norm": 1.671634316444397, "learning_rate": 1.7649642952184257e-05, "loss": 0.7463, "step": 9601 }, { "epoch": 0.25, "grad_norm": 1.8190938234329224, "learning_rate": 1.764910825348117e-05, "loss": 0.6154, "step": 9602 }, { "epoch": 0.25, "grad_norm": 1.7566123008728027, "learning_rate": 1.7648573502065236e-05, "loss": 0.597, "step": 9603 }, { "epoch": 0.25, "grad_norm": 2.567823886871338, "learning_rate": 1.764803869794014e-05, "loss": 0.6562, "step": 9604 }, { "epoch": 0.25, "grad_norm": 2.09151029586792, "learning_rate": 1.7647503841109563e-05, "loss": 0.7586, "step": 9605 }, { "epoch": 0.25, "grad_norm": 2.722559690475464, "learning_rate": 1.7646968931577198e-05, "loss": 0.7174, "step": 9606 }, { "epoch": 0.25, "grad_norm": 2.060361623764038, "learning_rate": 1.7646433969346725e-05, "loss": 0.554, "step": 9607 }, { "epoch": 0.25, "grad_norm": 2.0415329933166504, "learning_rate": 1.7645898954421835e-05, "loss": 0.6691, "step": 9608 }, { "epoch": 0.25, "grad_norm": 4.237059116363525, "learning_rate": 1.7645363886806214e-05, "loss": 0.7614, "step": 9609 }, { "epoch": 0.25, "grad_norm": 3.8054404258728027, "learning_rate": 1.7644828766503544e-05, "loss": 0.613, "step": 9610 }, { "epoch": 0.25, "grad_norm": 6.9452433586120605, "learning_rate": 1.7644293593517518e-05, "loss": 0.8597, "step": 9611 }, { "epoch": 0.25, "grad_norm": 1.6356443166732788, "learning_rate": 1.7643758367851824e-05, "loss": 0.6941, "step": 9612 }, { "epoch": 0.25, "grad_norm": 3.2721049785614014, "learning_rate": 1.764322308951015e-05, "loss": 0.7325, "step": 9613 }, { "epoch": 0.25, "grad_norm": 4.505667686462402, "learning_rate": 1.7642687758496185e-05, "loss": 0.7148, "step": 9614 }, { "epoch": 0.25, "grad_norm": 1.4709112644195557, "learning_rate": 1.7642152374813618e-05, "loss": 0.622, "step": 9615 }, { "epoch": 0.25, "grad_norm": 1.4169087409973145, "learning_rate": 1.7641616938466137e-05, "loss": 0.689, "step": 9616 }, { "epoch": 0.25, "grad_norm": 4.616573810577393, "learning_rate": 1.7641081449457435e-05, "loss": 0.631, "step": 9617 }, { "epoch": 0.25, "grad_norm": 2.2690608501434326, "learning_rate": 1.7640545907791198e-05, "loss": 0.6334, "step": 9618 }, { "epoch": 0.25, "grad_norm": 2.526447057723999, "learning_rate": 1.764001031347112e-05, "loss": 0.6396, "step": 9619 }, { "epoch": 0.25, "grad_norm": 1.5583395957946777, "learning_rate": 1.763947466650089e-05, "loss": 0.6887, "step": 9620 }, { "epoch": 0.25, "grad_norm": 4.900195121765137, "learning_rate": 1.76389389668842e-05, "loss": 0.5561, "step": 9621 }, { "epoch": 0.25, "grad_norm": 2.4896633625030518, "learning_rate": 1.763840321462474e-05, "loss": 0.6021, "step": 9622 }, { "epoch": 0.25, "grad_norm": 6.87863302230835, "learning_rate": 1.7637867409726207e-05, "loss": 0.6755, "step": 9623 }, { "epoch": 0.25, "grad_norm": 1.6584815979003906, "learning_rate": 1.763733155219229e-05, "loss": 0.5703, "step": 9624 }, { "epoch": 0.25, "grad_norm": 2.8900487422943115, "learning_rate": 1.763679564202668e-05, "loss": 0.739, "step": 9625 }, { "epoch": 0.25, "grad_norm": 1.8069249391555786, "learning_rate": 1.7636259679233078e-05, "loss": 0.5434, "step": 9626 }, { "epoch": 0.25, "grad_norm": 3.435744285583496, "learning_rate": 1.7635723663815168e-05, "loss": 0.9682, "step": 9627 }, { "epoch": 0.25, "grad_norm": 2.7161903381347656, "learning_rate": 1.7635187595776647e-05, "loss": 0.6632, "step": 9628 }, { "epoch": 0.25, "grad_norm": 1.7398765087127686, "learning_rate": 1.763465147512121e-05, "loss": 0.4991, "step": 9629 }, { "epoch": 0.25, "grad_norm": 2.1828670501708984, "learning_rate": 1.7634115301852553e-05, "loss": 0.5928, "step": 9630 }, { "epoch": 0.25, "grad_norm": 2.5152077674865723, "learning_rate": 1.763357907597437e-05, "loss": 0.5086, "step": 9631 }, { "epoch": 0.25, "grad_norm": 2.0701072216033936, "learning_rate": 1.7633042797490353e-05, "loss": 0.8543, "step": 9632 }, { "epoch": 0.25, "grad_norm": 2.315105438232422, "learning_rate": 1.7632506466404202e-05, "loss": 0.7701, "step": 9633 }, { "epoch": 0.25, "grad_norm": 2.3836727142333984, "learning_rate": 1.7631970082719613e-05, "loss": 0.5273, "step": 9634 }, { "epoch": 0.25, "grad_norm": 1.5911675691604614, "learning_rate": 1.763143364644028e-05, "loss": 0.5232, "step": 9635 }, { "epoch": 0.25, "grad_norm": 3.261362075805664, "learning_rate": 1.7630897157569903e-05, "loss": 0.6955, "step": 9636 }, { "epoch": 0.25, "grad_norm": 2.384732961654663, "learning_rate": 1.7630360616112173e-05, "loss": 0.6708, "step": 9637 }, { "epoch": 0.25, "grad_norm": 1.47037672996521, "learning_rate": 1.7629824022070793e-05, "loss": 0.5345, "step": 9638 }, { "epoch": 0.25, "grad_norm": 1.8042707443237305, "learning_rate": 1.7629287375449456e-05, "loss": 0.5941, "step": 9639 }, { "epoch": 0.25, "grad_norm": 3.3656599521636963, "learning_rate": 1.762875067625187e-05, "loss": 0.6853, "step": 9640 }, { "epoch": 0.25, "grad_norm": 4.851570129394531, "learning_rate": 1.7628213924481724e-05, "loss": 0.6179, "step": 9641 }, { "epoch": 0.25, "grad_norm": 3.333760976791382, "learning_rate": 1.7627677120142724e-05, "loss": 0.6453, "step": 9642 }, { "epoch": 0.25, "grad_norm": 4.426587104797363, "learning_rate": 1.762714026323856e-05, "loss": 0.6042, "step": 9643 }, { "epoch": 0.25, "grad_norm": 1.8944305181503296, "learning_rate": 1.762660335377294e-05, "loss": 0.6971, "step": 9644 }, { "epoch": 0.25, "grad_norm": 1.4227250814437866, "learning_rate": 1.762606639174956e-05, "loss": 0.5635, "step": 9645 }, { "epoch": 0.25, "grad_norm": 1.9814385175704956, "learning_rate": 1.762552937717212e-05, "loss": 0.575, "step": 9646 }, { "epoch": 0.25, "grad_norm": 1.7879598140716553, "learning_rate": 1.7624992310044328e-05, "loss": 0.7724, "step": 9647 }, { "epoch": 0.25, "grad_norm": 1.2942814826965332, "learning_rate": 1.7624455190369873e-05, "loss": 0.5829, "step": 9648 }, { "epoch": 0.25, "grad_norm": 3.5588979721069336, "learning_rate": 1.7623918018152467e-05, "loss": 0.7177, "step": 9649 }, { "epoch": 0.25, "grad_norm": 3.133338689804077, "learning_rate": 1.7623380793395806e-05, "loss": 0.719, "step": 9650 }, { "epoch": 0.25, "grad_norm": 2.7876193523406982, "learning_rate": 1.7622843516103593e-05, "loss": 0.6377, "step": 9651 }, { "epoch": 0.25, "grad_norm": 2.6910223960876465, "learning_rate": 1.7622306186279535e-05, "loss": 0.6024, "step": 9652 }, { "epoch": 0.25, "grad_norm": 2.712491273880005, "learning_rate": 1.7621768803927333e-05, "loss": 0.8964, "step": 9653 }, { "epoch": 0.25, "grad_norm": 2.2853362560272217, "learning_rate": 1.7621231369050684e-05, "loss": 0.5472, "step": 9654 }, { "epoch": 0.25, "grad_norm": 1.592787742614746, "learning_rate": 1.7620693881653298e-05, "loss": 0.6128, "step": 9655 }, { "epoch": 0.25, "grad_norm": 1.2845557928085327, "learning_rate": 1.7620156341738876e-05, "loss": 0.4881, "step": 9656 }, { "epoch": 0.25, "grad_norm": 2.882955312728882, "learning_rate": 1.7619618749311128e-05, "loss": 0.5596, "step": 9657 }, { "epoch": 0.25, "grad_norm": 1.5204098224639893, "learning_rate": 1.761908110437375e-05, "loss": 0.5819, "step": 9658 }, { "epoch": 0.25, "grad_norm": 5.211949348449707, "learning_rate": 1.7618543406930456e-05, "loss": 0.7704, "step": 9659 }, { "epoch": 0.25, "grad_norm": 2.3701694011688232, "learning_rate": 1.7618005656984948e-05, "loss": 0.559, "step": 9660 }, { "epoch": 0.25, "grad_norm": 1.4561856985092163, "learning_rate": 1.7617467854540927e-05, "loss": 0.6127, "step": 9661 }, { "epoch": 0.25, "grad_norm": 1.7624471187591553, "learning_rate": 1.7616929999602104e-05, "loss": 0.6121, "step": 9662 }, { "epoch": 0.25, "grad_norm": 1.5200563669204712, "learning_rate": 1.761639209217219e-05, "loss": 0.5744, "step": 9663 }, { "epoch": 0.25, "grad_norm": 1.717706561088562, "learning_rate": 1.761585413225488e-05, "loss": 0.5236, "step": 9664 }, { "epoch": 0.25, "grad_norm": 2.265301465988159, "learning_rate": 1.7615316119853892e-05, "loss": 0.6083, "step": 9665 }, { "epoch": 0.25, "grad_norm": 1.9420620203018188, "learning_rate": 1.761477805497293e-05, "loss": 0.5162, "step": 9666 }, { "epoch": 0.25, "grad_norm": 1.5066719055175781, "learning_rate": 1.76142399376157e-05, "loss": 0.7466, "step": 9667 }, { "epoch": 0.25, "grad_norm": 4.105382919311523, "learning_rate": 1.761370176778591e-05, "loss": 0.7828, "step": 9668 }, { "epoch": 0.25, "grad_norm": 3.041171073913574, "learning_rate": 1.761316354548727e-05, "loss": 0.7054, "step": 9669 }, { "epoch": 0.25, "grad_norm": 2.814929246902466, "learning_rate": 1.7612625270723497e-05, "loss": 0.7111, "step": 9670 }, { "epoch": 0.25, "grad_norm": 2.190103769302368, "learning_rate": 1.761208694349829e-05, "loss": 0.6604, "step": 9671 }, { "epoch": 0.25, "grad_norm": 5.291024208068848, "learning_rate": 1.761154856381536e-05, "loss": 0.5973, "step": 9672 }, { "epoch": 0.25, "grad_norm": 1.4461091756820679, "learning_rate": 1.7611010131678423e-05, "loss": 0.5658, "step": 9673 }, { "epoch": 0.25, "grad_norm": 2.213996171951294, "learning_rate": 1.761047164709118e-05, "loss": 0.5443, "step": 9674 }, { "epoch": 0.25, "grad_norm": 2.5046136379241943, "learning_rate": 1.760993311005735e-05, "loss": 0.5753, "step": 9675 }, { "epoch": 0.25, "grad_norm": 4.027712345123291, "learning_rate": 1.7609394520580644e-05, "loss": 0.744, "step": 9676 }, { "epoch": 0.25, "grad_norm": 2.462570905685425, "learning_rate": 1.7608855878664773e-05, "loss": 0.6565, "step": 9677 }, { "epoch": 0.25, "grad_norm": 6.527665615081787, "learning_rate": 1.760831718431345e-05, "loss": 0.5635, "step": 9678 }, { "epoch": 0.25, "grad_norm": 3.4382991790771484, "learning_rate": 1.7607778437530377e-05, "loss": 0.5411, "step": 9679 }, { "epoch": 0.25, "grad_norm": 4.291146755218506, "learning_rate": 1.760723963831928e-05, "loss": 0.7613, "step": 9680 }, { "epoch": 0.25, "grad_norm": 1.7091410160064697, "learning_rate": 1.7606700786683864e-05, "loss": 0.647, "step": 9681 }, { "epoch": 0.25, "grad_norm": 1.3762435913085938, "learning_rate": 1.7606161882627848e-05, "loss": 0.6628, "step": 9682 }, { "epoch": 0.25, "grad_norm": 1.423012375831604, "learning_rate": 1.760562292615494e-05, "loss": 0.5369, "step": 9683 }, { "epoch": 0.25, "grad_norm": 1.5558123588562012, "learning_rate": 1.760508391726886e-05, "loss": 0.5844, "step": 9684 }, { "epoch": 0.25, "grad_norm": 2.079545736312866, "learning_rate": 1.7604544855973317e-05, "loss": 0.6751, "step": 9685 }, { "epoch": 0.25, "grad_norm": 3.654571771621704, "learning_rate": 1.760400574227203e-05, "loss": 0.5885, "step": 9686 }, { "epoch": 0.25, "grad_norm": 1.265043020248413, "learning_rate": 1.760346657616871e-05, "loss": 0.5958, "step": 9687 }, { "epoch": 0.25, "grad_norm": 1.0689537525177002, "learning_rate": 1.760292735766708e-05, "loss": 0.5487, "step": 9688 }, { "epoch": 0.25, "grad_norm": 4.152801990509033, "learning_rate": 1.7602388086770848e-05, "loss": 0.6968, "step": 9689 }, { "epoch": 0.25, "grad_norm": 3.8339173793792725, "learning_rate": 1.7601848763483734e-05, "loss": 0.6539, "step": 9690 }, { "epoch": 0.25, "grad_norm": 4.4790263175964355, "learning_rate": 1.760130938780946e-05, "loss": 0.4396, "step": 9691 }, { "epoch": 0.25, "grad_norm": 1.5125808715820312, "learning_rate": 1.760076995975173e-05, "loss": 0.5637, "step": 9692 }, { "epoch": 0.25, "grad_norm": 1.7061543464660645, "learning_rate": 1.7600230479314268e-05, "loss": 0.6248, "step": 9693 }, { "epoch": 0.25, "grad_norm": 1.5281442403793335, "learning_rate": 1.7599690946500798e-05, "loss": 0.4496, "step": 9694 }, { "epoch": 0.25, "grad_norm": 1.1618839502334595, "learning_rate": 1.759915136131503e-05, "loss": 0.4244, "step": 9695 }, { "epoch": 0.25, "grad_norm": 2.099388599395752, "learning_rate": 1.7598611723760683e-05, "loss": 0.4985, "step": 9696 }, { "epoch": 0.25, "grad_norm": 2.1483423709869385, "learning_rate": 1.759807203384148e-05, "loss": 0.7832, "step": 9697 }, { "epoch": 0.25, "grad_norm": 1.5431846380233765, "learning_rate": 1.7597532291561137e-05, "loss": 0.7158, "step": 9698 }, { "epoch": 0.25, "grad_norm": 3.820178270339966, "learning_rate": 1.7596992496923377e-05, "loss": 0.7348, "step": 9699 }, { "epoch": 0.25, "grad_norm": 3.2459936141967773, "learning_rate": 1.7596452649931916e-05, "loss": 0.5932, "step": 9700 }, { "epoch": 0.25, "grad_norm": 4.057492733001709, "learning_rate": 1.7595912750590475e-05, "loss": 0.6384, "step": 9701 }, { "epoch": 0.25, "grad_norm": 1.554243564605713, "learning_rate": 1.7595372798902778e-05, "loss": 0.5244, "step": 9702 }, { "epoch": 0.25, "grad_norm": 4.194108963012695, "learning_rate": 1.7594832794872544e-05, "loss": 0.6338, "step": 9703 }, { "epoch": 0.25, "grad_norm": 2.4483020305633545, "learning_rate": 1.759429273850349e-05, "loss": 0.6843, "step": 9704 }, { "epoch": 0.25, "grad_norm": 1.6527520418167114, "learning_rate": 1.7593752629799343e-05, "loss": 0.6064, "step": 9705 }, { "epoch": 0.25, "grad_norm": 1.8007080554962158, "learning_rate": 1.7593212468763824e-05, "loss": 0.6523, "step": 9706 }, { "epoch": 0.25, "grad_norm": 2.8173439502716064, "learning_rate": 1.759267225540066e-05, "loss": 1.0111, "step": 9707 }, { "epoch": 0.25, "grad_norm": 2.4261157512664795, "learning_rate": 1.7592131989713562e-05, "loss": 0.5568, "step": 9708 }, { "epoch": 0.25, "grad_norm": 7.670602798461914, "learning_rate": 1.7591591671706263e-05, "loss": 0.9339, "step": 9709 }, { "epoch": 0.25, "grad_norm": 2.5370664596557617, "learning_rate": 1.7591051301382486e-05, "loss": 0.6664, "step": 9710 }, { "epoch": 0.25, "grad_norm": 2.4925811290740967, "learning_rate": 1.7590510878745947e-05, "loss": 0.4761, "step": 9711 }, { "epoch": 0.25, "grad_norm": 2.145570993423462, "learning_rate": 1.758997040380038e-05, "loss": 0.7049, "step": 9712 }, { "epoch": 0.25, "grad_norm": 1.5741819143295288, "learning_rate": 1.7589429876549505e-05, "loss": 0.5507, "step": 9713 }, { "epoch": 0.25, "grad_norm": 1.4395579099655151, "learning_rate": 1.7588889296997046e-05, "loss": 0.7054, "step": 9714 }, { "epoch": 0.25, "grad_norm": 2.5125317573547363, "learning_rate": 1.7588348665146734e-05, "loss": 0.569, "step": 9715 }, { "epoch": 0.25, "grad_norm": 2.5586860179901123, "learning_rate": 1.7587807981002284e-05, "loss": 0.6057, "step": 9716 }, { "epoch": 0.25, "grad_norm": 1.6715867519378662, "learning_rate": 1.7587267244567434e-05, "loss": 0.6532, "step": 9717 }, { "epoch": 0.25, "grad_norm": 1.8116130828857422, "learning_rate": 1.7586726455845902e-05, "loss": 0.7461, "step": 9718 }, { "epoch": 0.25, "grad_norm": 1.2540414333343506, "learning_rate": 1.7586185614841416e-05, "loss": 0.5348, "step": 9719 }, { "epoch": 0.25, "grad_norm": 2.81624698638916, "learning_rate": 1.7585644721557708e-05, "loss": 0.5957, "step": 9720 }, { "epoch": 0.25, "grad_norm": 5.939542293548584, "learning_rate": 1.7585103775998505e-05, "loss": 0.6218, "step": 9721 }, { "epoch": 0.25, "grad_norm": 2.65388822555542, "learning_rate": 1.7584562778167526e-05, "loss": 0.4817, "step": 9722 }, { "epoch": 0.25, "grad_norm": 3.556330919265747, "learning_rate": 1.758402172806851e-05, "loss": 0.5527, "step": 9723 }, { "epoch": 0.25, "grad_norm": 2.4615631103515625, "learning_rate": 1.758348062570518e-05, "loss": 0.7613, "step": 9724 }, { "epoch": 0.25, "grad_norm": 2.6629302501678467, "learning_rate": 1.7582939471081264e-05, "loss": 0.7975, "step": 9725 }, { "epoch": 0.25, "grad_norm": 3.130756378173828, "learning_rate": 1.7582398264200497e-05, "loss": 0.6815, "step": 9726 }, { "epoch": 0.25, "grad_norm": 1.6753754615783691, "learning_rate": 1.75818570050666e-05, "loss": 0.6463, "step": 9727 }, { "epoch": 0.25, "grad_norm": 2.974756956100464, "learning_rate": 1.758131569368331e-05, "loss": 0.5139, "step": 9728 }, { "epoch": 0.25, "grad_norm": 2.8461475372314453, "learning_rate": 1.7580774330054355e-05, "loss": 0.5491, "step": 9729 }, { "epoch": 0.25, "grad_norm": 3.729017972946167, "learning_rate": 1.7580232914183465e-05, "loss": 0.7676, "step": 9730 }, { "epoch": 0.25, "grad_norm": 2.2764484882354736, "learning_rate": 1.7579691446074373e-05, "loss": 0.7486, "step": 9731 }, { "epoch": 0.25, "grad_norm": 1.6929543018341064, "learning_rate": 1.7579149925730812e-05, "loss": 0.6363, "step": 9732 }, { "epoch": 0.25, "grad_norm": 2.158984899520874, "learning_rate": 1.757860835315651e-05, "loss": 0.614, "step": 9733 }, { "epoch": 0.25, "grad_norm": 3.172084331512451, "learning_rate": 1.7578066728355202e-05, "loss": 0.7862, "step": 9734 }, { "epoch": 0.25, "grad_norm": 2.468202829360962, "learning_rate": 1.7577525051330617e-05, "loss": 0.5843, "step": 9735 }, { "epoch": 0.25, "grad_norm": 1.5476150512695312, "learning_rate": 1.757698332208649e-05, "loss": 0.4593, "step": 9736 }, { "epoch": 0.25, "grad_norm": 3.4195518493652344, "learning_rate": 1.7576441540626553e-05, "loss": 0.6604, "step": 9737 }, { "epoch": 0.25, "grad_norm": 1.548403024673462, "learning_rate": 1.757589970695454e-05, "loss": 0.7077, "step": 9738 }, { "epoch": 0.25, "grad_norm": 1.359089732170105, "learning_rate": 1.7575357821074193e-05, "loss": 0.5217, "step": 9739 }, { "epoch": 0.25, "grad_norm": 2.445627212524414, "learning_rate": 1.7574815882989232e-05, "loss": 0.7153, "step": 9740 }, { "epoch": 0.25, "grad_norm": 3.6121811866760254, "learning_rate": 1.7574273892703403e-05, "loss": 0.6093, "step": 9741 }, { "epoch": 0.25, "grad_norm": 3.447815418243408, "learning_rate": 1.7573731850220436e-05, "loss": 0.6672, "step": 9742 }, { "epoch": 0.25, "grad_norm": 1.5607140064239502, "learning_rate": 1.7573189755544064e-05, "loss": 0.7201, "step": 9743 }, { "epoch": 0.25, "grad_norm": 2.126500368118286, "learning_rate": 1.7572647608678032e-05, "loss": 0.7583, "step": 9744 }, { "epoch": 0.25, "grad_norm": 6.939602851867676, "learning_rate": 1.7572105409626066e-05, "loss": 0.4486, "step": 9745 }, { "epoch": 0.25, "grad_norm": 4.1458024978637695, "learning_rate": 1.757156315839191e-05, "loss": 0.6782, "step": 9746 }, { "epoch": 0.25, "grad_norm": 3.6606431007385254, "learning_rate": 1.7571020854979295e-05, "loss": 0.7077, "step": 9747 }, { "epoch": 0.25, "grad_norm": 1.6545605659484863, "learning_rate": 1.757047849939196e-05, "loss": 0.6655, "step": 9748 }, { "epoch": 0.25, "grad_norm": 10.202613830566406, "learning_rate": 1.7569936091633645e-05, "loss": 0.6139, "step": 9749 }, { "epoch": 0.25, "grad_norm": 1.6929179430007935, "learning_rate": 1.7569393631708088e-05, "loss": 0.6159, "step": 9750 }, { "epoch": 0.25, "grad_norm": 6.959954261779785, "learning_rate": 1.7568851119619023e-05, "loss": 0.4969, "step": 9751 }, { "epoch": 0.25, "grad_norm": 2.045686721801758, "learning_rate": 1.7568308555370194e-05, "loss": 0.7056, "step": 9752 }, { "epoch": 0.25, "grad_norm": 0.9913466572761536, "learning_rate": 1.7567765938965332e-05, "loss": 0.621, "step": 9753 }, { "epoch": 0.25, "grad_norm": 3.9578068256378174, "learning_rate": 1.7567223270408187e-05, "loss": 0.588, "step": 9754 }, { "epoch": 0.25, "grad_norm": 1.7892097234725952, "learning_rate": 1.7566680549702493e-05, "loss": 0.6133, "step": 9755 }, { "epoch": 0.25, "grad_norm": 1.796612024307251, "learning_rate": 1.7566137776851987e-05, "loss": 0.596, "step": 9756 }, { "epoch": 0.25, "grad_norm": 3.0374205112457275, "learning_rate": 1.7565594951860417e-05, "loss": 0.61, "step": 9757 }, { "epoch": 0.25, "grad_norm": 2.364790439605713, "learning_rate": 1.756505207473152e-05, "loss": 0.5549, "step": 9758 }, { "epoch": 0.25, "grad_norm": 2.0878536701202393, "learning_rate": 1.7564509145469034e-05, "loss": 0.6018, "step": 9759 }, { "epoch": 0.25, "grad_norm": 2.8340184688568115, "learning_rate": 1.7563966164076703e-05, "loss": 0.5994, "step": 9760 }, { "epoch": 0.25, "grad_norm": 2.9085865020751953, "learning_rate": 1.7563423130558272e-05, "loss": 0.5916, "step": 9761 }, { "epoch": 0.25, "grad_norm": 1.525180459022522, "learning_rate": 1.7562880044917476e-05, "loss": 0.4731, "step": 9762 }, { "epoch": 0.25, "grad_norm": 1.8185651302337646, "learning_rate": 1.7562336907158068e-05, "loss": 0.8174, "step": 9763 }, { "epoch": 0.25, "grad_norm": 1.8269058465957642, "learning_rate": 1.7561793717283782e-05, "loss": 0.5275, "step": 9764 }, { "epoch": 0.25, "grad_norm": 1.154345154762268, "learning_rate": 1.7561250475298366e-05, "loss": 0.7088, "step": 9765 }, { "epoch": 0.25, "grad_norm": 2.8102867603302, "learning_rate": 1.7560707181205557e-05, "loss": 0.6318, "step": 9766 }, { "epoch": 0.25, "grad_norm": 3.973569393157959, "learning_rate": 1.756016383500911e-05, "loss": 0.8055, "step": 9767 }, { "epoch": 0.25, "grad_norm": 1.5769132375717163, "learning_rate": 1.7559620436712764e-05, "loss": 0.7278, "step": 9768 }, { "epoch": 0.25, "grad_norm": 2.196420431137085, "learning_rate": 1.7559076986320262e-05, "loss": 0.5763, "step": 9769 }, { "epoch": 0.25, "grad_norm": 1.526571273803711, "learning_rate": 1.7558533483835348e-05, "loss": 0.5204, "step": 9770 }, { "epoch": 0.25, "grad_norm": 2.8375582695007324, "learning_rate": 1.755798992926177e-05, "loss": 0.6563, "step": 9771 }, { "epoch": 0.25, "grad_norm": 1.5815887451171875, "learning_rate": 1.7557446322603277e-05, "loss": 0.5762, "step": 9772 }, { "epoch": 0.25, "grad_norm": 1.8677705526351929, "learning_rate": 1.755690266386361e-05, "loss": 0.6472, "step": 9773 }, { "epoch": 0.25, "grad_norm": 1.8318119049072266, "learning_rate": 1.755635895304652e-05, "loss": 0.5716, "step": 9774 }, { "epoch": 0.25, "grad_norm": 1.3265751600265503, "learning_rate": 1.755581519015575e-05, "loss": 0.6856, "step": 9775 }, { "epoch": 0.25, "grad_norm": 1.4583325386047363, "learning_rate": 1.7555271375195045e-05, "loss": 0.5481, "step": 9776 }, { "epoch": 0.25, "grad_norm": 1.2866321802139282, "learning_rate": 1.755472750816816e-05, "loss": 0.6291, "step": 9777 }, { "epoch": 0.25, "grad_norm": 1.6526345014572144, "learning_rate": 1.755418358907884e-05, "loss": 0.7161, "step": 9778 }, { "epoch": 0.25, "grad_norm": 2.8791663646698, "learning_rate": 1.7553639617930834e-05, "loss": 0.7897, "step": 9779 }, { "epoch": 0.25, "grad_norm": 5.227235794067383, "learning_rate": 1.7553095594727886e-05, "loss": 0.5637, "step": 9780 }, { "epoch": 0.25, "grad_norm": 1.7758208513259888, "learning_rate": 1.755255151947375e-05, "loss": 0.5795, "step": 9781 }, { "epoch": 0.25, "grad_norm": 2.0895071029663086, "learning_rate": 1.7552007392172175e-05, "loss": 0.4997, "step": 9782 }, { "epoch": 0.25, "grad_norm": 3.4214870929718018, "learning_rate": 1.7551463212826906e-05, "loss": 0.7067, "step": 9783 }, { "epoch": 0.25, "grad_norm": 1.6935633420944214, "learning_rate": 1.75509189814417e-05, "loss": 0.5685, "step": 9784 }, { "epoch": 0.25, "grad_norm": 1.6125764846801758, "learning_rate": 1.7550374698020305e-05, "loss": 0.7217, "step": 9785 }, { "epoch": 0.25, "grad_norm": 1.6604912281036377, "learning_rate": 1.7549830362566468e-05, "loss": 0.6056, "step": 9786 }, { "epoch": 0.25, "grad_norm": 2.5379207134246826, "learning_rate": 1.7549285975083946e-05, "loss": 0.7095, "step": 9787 }, { "epoch": 0.25, "grad_norm": 2.47900128364563, "learning_rate": 1.754874153557649e-05, "loss": 0.4831, "step": 9788 }, { "epoch": 0.25, "grad_norm": 4.237349033355713, "learning_rate": 1.7548197044047842e-05, "loss": 0.5988, "step": 9789 }, { "epoch": 0.25, "grad_norm": 2.0699336528778076, "learning_rate": 1.754765250050177e-05, "loss": 0.5673, "step": 9790 }, { "epoch": 0.25, "grad_norm": 5.769728660583496, "learning_rate": 1.7547107904942014e-05, "loss": 0.8705, "step": 9791 }, { "epoch": 0.25, "grad_norm": 1.9828276634216309, "learning_rate": 1.7546563257372334e-05, "loss": 0.6968, "step": 9792 }, { "epoch": 0.25, "grad_norm": 3.9477062225341797, "learning_rate": 1.7546018557796482e-05, "loss": 0.6868, "step": 9793 }, { "epoch": 0.25, "grad_norm": 1.9171669483184814, "learning_rate": 1.754547380621821e-05, "loss": 0.5178, "step": 9794 }, { "epoch": 0.25, "grad_norm": 4.221776962280273, "learning_rate": 1.7544929002641274e-05, "loss": 0.62, "step": 9795 }, { "epoch": 0.25, "grad_norm": 7.602507591247559, "learning_rate": 1.7544384147069427e-05, "loss": 0.9007, "step": 9796 }, { "epoch": 0.25, "grad_norm": 3.9985501766204834, "learning_rate": 1.7543839239506423e-05, "loss": 0.7421, "step": 9797 }, { "epoch": 0.25, "grad_norm": 1.4693623781204224, "learning_rate": 1.754329427995602e-05, "loss": 0.6802, "step": 9798 }, { "epoch": 0.25, "grad_norm": 4.293375492095947, "learning_rate": 1.754274926842197e-05, "loss": 0.7672, "step": 9799 }, { "epoch": 0.25, "grad_norm": 3.6606686115264893, "learning_rate": 1.7542204204908035e-05, "loss": 0.5822, "step": 9800 }, { "epoch": 0.25, "grad_norm": 5.755372047424316, "learning_rate": 1.7541659089417964e-05, "loss": 0.6658, "step": 9801 }, { "epoch": 0.25, "grad_norm": 2.669382333755493, "learning_rate": 1.7541113921955518e-05, "loss": 0.7148, "step": 9802 }, { "epoch": 0.25, "grad_norm": 1.9456450939178467, "learning_rate": 1.7540568702524453e-05, "loss": 0.6627, "step": 9803 }, { "epoch": 0.25, "grad_norm": 1.5310102701187134, "learning_rate": 1.7540023431128523e-05, "loss": 0.5835, "step": 9804 }, { "epoch": 0.25, "grad_norm": 1.8024617433547974, "learning_rate": 1.7539478107771492e-05, "loss": 0.7529, "step": 9805 }, { "epoch": 0.25, "grad_norm": 3.4683947563171387, "learning_rate": 1.753893273245711e-05, "loss": 0.7942, "step": 9806 }, { "epoch": 0.25, "grad_norm": 2.8734512329101562, "learning_rate": 1.7538387305189145e-05, "loss": 0.6109, "step": 9807 }, { "epoch": 0.25, "grad_norm": 3.1980812549591064, "learning_rate": 1.753784182597135e-05, "loss": 0.6821, "step": 9808 }, { "epoch": 0.25, "grad_norm": 2.227355480194092, "learning_rate": 1.753729629480748e-05, "loss": 0.7566, "step": 9809 }, { "epoch": 0.25, "grad_norm": 1.313779354095459, "learning_rate": 1.7536750711701305e-05, "loss": 0.5247, "step": 9810 }, { "epoch": 0.25, "grad_norm": 2.586393356323242, "learning_rate": 1.7536205076656577e-05, "loss": 0.7611, "step": 9811 }, { "epoch": 0.25, "grad_norm": 6.717231273651123, "learning_rate": 1.753565938967706e-05, "loss": 0.6303, "step": 9812 }, { "epoch": 0.25, "grad_norm": 2.379399299621582, "learning_rate": 1.753511365076651e-05, "loss": 0.7175, "step": 9813 }, { "epoch": 0.25, "grad_norm": 2.372673988342285, "learning_rate": 1.753456785992869e-05, "loss": 0.671, "step": 9814 }, { "epoch": 0.25, "grad_norm": 1.3022841215133667, "learning_rate": 1.7534022017167368e-05, "loss": 0.6555, "step": 9815 }, { "epoch": 0.25, "grad_norm": 2.0260915756225586, "learning_rate": 1.7533476122486296e-05, "loss": 0.4993, "step": 9816 }, { "epoch": 0.25, "grad_norm": 3.0676214694976807, "learning_rate": 1.7532930175889236e-05, "loss": 0.548, "step": 9817 }, { "epoch": 0.25, "grad_norm": 2.1812241077423096, "learning_rate": 1.7532384177379956e-05, "loss": 0.5654, "step": 9818 }, { "epoch": 0.25, "grad_norm": 2.489346742630005, "learning_rate": 1.753183812696222e-05, "loss": 0.6622, "step": 9819 }, { "epoch": 0.25, "grad_norm": 2.4943718910217285, "learning_rate": 1.7531292024639786e-05, "loss": 0.6333, "step": 9820 }, { "epoch": 0.25, "grad_norm": 1.2891607284545898, "learning_rate": 1.7530745870416418e-05, "loss": 0.5982, "step": 9821 }, { "epoch": 0.25, "grad_norm": 1.6321643590927124, "learning_rate": 1.7530199664295883e-05, "loss": 0.6515, "step": 9822 }, { "epoch": 0.25, "grad_norm": 1.6260541677474976, "learning_rate": 1.752965340628194e-05, "loss": 0.6365, "step": 9823 }, { "epoch": 0.25, "grad_norm": 1.512483835220337, "learning_rate": 1.7529107096378353e-05, "loss": 0.5699, "step": 9824 }, { "epoch": 0.25, "grad_norm": 5.940677642822266, "learning_rate": 1.7528560734588897e-05, "loss": 0.957, "step": 9825 }, { "epoch": 0.25, "grad_norm": 2.425854444503784, "learning_rate": 1.7528014320917325e-05, "loss": 0.5064, "step": 9826 }, { "epoch": 0.25, "grad_norm": 1.8807259798049927, "learning_rate": 1.7527467855367414e-05, "loss": 0.6909, "step": 9827 }, { "epoch": 0.25, "grad_norm": 2.4522953033447266, "learning_rate": 1.752692133794292e-05, "loss": 0.8191, "step": 9828 }, { "epoch": 0.25, "grad_norm": 2.6418652534484863, "learning_rate": 1.752637476864761e-05, "loss": 0.6669, "step": 9829 }, { "epoch": 0.25, "grad_norm": 1.6875723600387573, "learning_rate": 1.752582814748526e-05, "loss": 0.5324, "step": 9830 }, { "epoch": 0.25, "grad_norm": 1.6333187818527222, "learning_rate": 1.7525281474459625e-05, "loss": 0.5745, "step": 9831 }, { "epoch": 0.25, "grad_norm": 3.0091772079467773, "learning_rate": 1.752473474957448e-05, "loss": 0.441, "step": 9832 }, { "epoch": 0.25, "grad_norm": 1.158387541770935, "learning_rate": 1.752418797283359e-05, "loss": 0.4699, "step": 9833 }, { "epoch": 0.25, "grad_norm": 5.258322238922119, "learning_rate": 1.7523641144240727e-05, "loss": 0.684, "step": 9834 }, { "epoch": 0.25, "grad_norm": 2.2402710914611816, "learning_rate": 1.7523094263799655e-05, "loss": 0.5993, "step": 9835 }, { "epoch": 0.25, "grad_norm": 2.2548768520355225, "learning_rate": 1.7522547331514142e-05, "loss": 0.5808, "step": 9836 }, { "epoch": 0.25, "grad_norm": 1.5007444620132446, "learning_rate": 1.7522000347387957e-05, "loss": 0.5626, "step": 9837 }, { "epoch": 0.25, "grad_norm": 3.260747194290161, "learning_rate": 1.7521453311424873e-05, "loss": 0.5868, "step": 9838 }, { "epoch": 0.25, "grad_norm": 2.540231943130493, "learning_rate": 1.752090622362866e-05, "loss": 0.572, "step": 9839 }, { "epoch": 0.25, "grad_norm": 2.7502238750457764, "learning_rate": 1.7520359084003087e-05, "loss": 0.5318, "step": 9840 }, { "epoch": 0.25, "grad_norm": 1.5656421184539795, "learning_rate": 1.7519811892551923e-05, "loss": 0.7192, "step": 9841 }, { "epoch": 0.25, "grad_norm": 2.6331052780151367, "learning_rate": 1.7519264649278938e-05, "loss": 0.769, "step": 9842 }, { "epoch": 0.25, "grad_norm": 1.6129655838012695, "learning_rate": 1.7518717354187906e-05, "loss": 0.6639, "step": 9843 }, { "epoch": 0.25, "grad_norm": 3.074397325515747, "learning_rate": 1.75181700072826e-05, "loss": 0.6148, "step": 9844 }, { "epoch": 0.25, "grad_norm": 2.6211190223693848, "learning_rate": 1.7517622608566786e-05, "loss": 0.65, "step": 9845 }, { "epoch": 0.25, "grad_norm": 1.9917529821395874, "learning_rate": 1.7517075158044242e-05, "loss": 0.7194, "step": 9846 }, { "epoch": 0.25, "grad_norm": 2.963860034942627, "learning_rate": 1.751652765571874e-05, "loss": 0.7117, "step": 9847 }, { "epoch": 0.25, "grad_norm": 5.0228047370910645, "learning_rate": 1.751598010159405e-05, "loss": 0.522, "step": 9848 }, { "epoch": 0.25, "grad_norm": 2.4547133445739746, "learning_rate": 1.7515432495673946e-05, "loss": 0.5149, "step": 9849 }, { "epoch": 0.25, "grad_norm": 1.5568517446517944, "learning_rate": 1.7514884837962204e-05, "loss": 0.6396, "step": 9850 }, { "epoch": 0.25, "grad_norm": 1.6041772365570068, "learning_rate": 1.7514337128462596e-05, "loss": 0.7143, "step": 9851 }, { "epoch": 0.25, "grad_norm": 3.923522710800171, "learning_rate": 1.75137893671789e-05, "loss": 0.6766, "step": 9852 }, { "epoch": 0.25, "grad_norm": 1.9659364223480225, "learning_rate": 1.7513241554114886e-05, "loss": 0.6123, "step": 9853 }, { "epoch": 0.25, "grad_norm": 3.539652109146118, "learning_rate": 1.7512693689274335e-05, "loss": 0.6283, "step": 9854 }, { "epoch": 0.25, "grad_norm": 1.4603769779205322, "learning_rate": 1.7512145772661016e-05, "loss": 0.6001, "step": 9855 }, { "epoch": 0.25, "grad_norm": 1.1597274541854858, "learning_rate": 1.7511597804278707e-05, "loss": 0.6097, "step": 9856 }, { "epoch": 0.25, "grad_norm": 2.58349347114563, "learning_rate": 1.751104978413119e-05, "loss": 0.5653, "step": 9857 }, { "epoch": 0.25, "grad_norm": 1.3019455671310425, "learning_rate": 1.751050171222223e-05, "loss": 0.6871, "step": 9858 }, { "epoch": 0.25, "grad_norm": 1.4124925136566162, "learning_rate": 1.7509953588555614e-05, "loss": 0.84, "step": 9859 }, { "epoch": 0.25, "grad_norm": 1.9576846361160278, "learning_rate": 1.7509405413135115e-05, "loss": 0.7071, "step": 9860 }, { "epoch": 0.25, "grad_norm": 4.626882076263428, "learning_rate": 1.7508857185964515e-05, "loss": 0.6361, "step": 9861 }, { "epoch": 0.25, "grad_norm": 1.8818999528884888, "learning_rate": 1.7508308907047588e-05, "loss": 0.7182, "step": 9862 }, { "epoch": 0.25, "grad_norm": 1.6118731498718262, "learning_rate": 1.7507760576388112e-05, "loss": 0.5725, "step": 9863 }, { "epoch": 0.25, "grad_norm": 1.47458815574646, "learning_rate": 1.7507212193989868e-05, "loss": 0.5417, "step": 9864 }, { "epoch": 0.25, "grad_norm": 4.315226078033447, "learning_rate": 1.750666375985663e-05, "loss": 0.4923, "step": 9865 }, { "epoch": 0.25, "grad_norm": 1.0930447578430176, "learning_rate": 1.7506115273992183e-05, "loss": 0.6146, "step": 9866 }, { "epoch": 0.25, "grad_norm": 1.4345396757125854, "learning_rate": 1.750556673640031e-05, "loss": 0.6515, "step": 9867 }, { "epoch": 0.25, "grad_norm": 3.9107353687286377, "learning_rate": 1.7505018147084783e-05, "loss": 0.7207, "step": 9868 }, { "epoch": 0.25, "grad_norm": 8.99720287322998, "learning_rate": 1.7504469506049385e-05, "loss": 0.4974, "step": 9869 }, { "epoch": 0.25, "grad_norm": 4.312400817871094, "learning_rate": 1.75039208132979e-05, "loss": 0.6505, "step": 9870 }, { "epoch": 0.25, "grad_norm": 2.330885887145996, "learning_rate": 1.7503372068834105e-05, "loss": 0.4743, "step": 9871 }, { "epoch": 0.25, "grad_norm": 1.1010922193527222, "learning_rate": 1.7502823272661787e-05, "loss": 0.5001, "step": 9872 }, { "epoch": 0.25, "grad_norm": 2.535017251968384, "learning_rate": 1.7502274424784725e-05, "loss": 0.622, "step": 9873 }, { "epoch": 0.25, "grad_norm": 1.78798246383667, "learning_rate": 1.75017255252067e-05, "loss": 0.7311, "step": 9874 }, { "epoch": 0.25, "grad_norm": 1.8930141925811768, "learning_rate": 1.7501176573931492e-05, "loss": 0.5512, "step": 9875 }, { "epoch": 0.25, "grad_norm": 1.554344892501831, "learning_rate": 1.750062757096289e-05, "loss": 0.6652, "step": 9876 }, { "epoch": 0.25, "grad_norm": 1.8576061725616455, "learning_rate": 1.7500078516304676e-05, "loss": 0.6805, "step": 9877 }, { "epoch": 0.25, "grad_norm": 2.0566837787628174, "learning_rate": 1.7499529409960633e-05, "loss": 0.6338, "step": 9878 }, { "epoch": 0.25, "grad_norm": 5.373683929443359, "learning_rate": 1.7498980251934546e-05, "loss": 0.753, "step": 9879 }, { "epoch": 0.25, "grad_norm": 1.6231805086135864, "learning_rate": 1.7498431042230196e-05, "loss": 0.5866, "step": 9880 }, { "epoch": 0.25, "grad_norm": 12.266666412353516, "learning_rate": 1.749788178085137e-05, "loss": 0.6149, "step": 9881 }, { "epoch": 0.25, "grad_norm": 1.9386354684829712, "learning_rate": 1.7497332467801857e-05, "loss": 0.4748, "step": 9882 }, { "epoch": 0.25, "grad_norm": 2.0442569255828857, "learning_rate": 1.7496783103085436e-05, "loss": 0.5833, "step": 9883 }, { "epoch": 0.25, "grad_norm": 1.7595170736312866, "learning_rate": 1.7496233686705894e-05, "loss": 0.6104, "step": 9884 }, { "epoch": 0.25, "grad_norm": 2.1789016723632812, "learning_rate": 1.7495684218667023e-05, "loss": 0.6443, "step": 9885 }, { "epoch": 0.25, "grad_norm": 4.848660469055176, "learning_rate": 1.7495134698972602e-05, "loss": 0.6584, "step": 9886 }, { "epoch": 0.25, "grad_norm": 2.890136241912842, "learning_rate": 1.7494585127626424e-05, "loss": 0.767, "step": 9887 }, { "epoch": 0.25, "grad_norm": 1.956856608390808, "learning_rate": 1.7494035504632277e-05, "loss": 0.7672, "step": 9888 }, { "epoch": 0.25, "grad_norm": 4.366715908050537, "learning_rate": 1.749348582999394e-05, "loss": 0.611, "step": 9889 }, { "epoch": 0.25, "grad_norm": 1.570462942123413, "learning_rate": 1.749293610371521e-05, "loss": 0.5459, "step": 9890 }, { "epoch": 0.25, "grad_norm": 3.4174015522003174, "learning_rate": 1.7492386325799867e-05, "loss": 0.571, "step": 9891 }, { "epoch": 0.25, "grad_norm": 1.3070539236068726, "learning_rate": 1.749183649625171e-05, "loss": 0.522, "step": 9892 }, { "epoch": 0.25, "grad_norm": 2.835355758666992, "learning_rate": 1.749128661507452e-05, "loss": 0.7311, "step": 9893 }, { "epoch": 0.25, "grad_norm": 1.9659099578857422, "learning_rate": 1.749073668227209e-05, "loss": 0.5305, "step": 9894 }, { "epoch": 0.25, "grad_norm": 2.9879672527313232, "learning_rate": 1.749018669784821e-05, "loss": 0.8587, "step": 9895 }, { "epoch": 0.25, "grad_norm": 1.9037131071090698, "learning_rate": 1.7489636661806667e-05, "loss": 0.4867, "step": 9896 }, { "epoch": 0.25, "grad_norm": 2.1398351192474365, "learning_rate": 1.7489086574151254e-05, "loss": 0.669, "step": 9897 }, { "epoch": 0.25, "grad_norm": 2.075637102127075, "learning_rate": 1.7488536434885762e-05, "loss": 0.6132, "step": 9898 }, { "epoch": 0.25, "grad_norm": 4.562466144561768, "learning_rate": 1.748798624401398e-05, "loss": 0.5686, "step": 9899 }, { "epoch": 0.25, "grad_norm": 10.2160062789917, "learning_rate": 1.7487436001539702e-05, "loss": 0.682, "step": 9900 }, { "epoch": 0.25, "grad_norm": 3.404834032058716, "learning_rate": 1.7486885707466725e-05, "loss": 0.7703, "step": 9901 }, { "epoch": 0.25, "grad_norm": 3.2296671867370605, "learning_rate": 1.748633536179883e-05, "loss": 0.622, "step": 9902 }, { "epoch": 0.25, "grad_norm": 1.793083667755127, "learning_rate": 1.7485784964539813e-05, "loss": 0.6448, "step": 9903 }, { "epoch": 0.25, "grad_norm": 1.9727805852890015, "learning_rate": 1.748523451569347e-05, "loss": 0.5145, "step": 9904 }, { "epoch": 0.25, "grad_norm": 2.582596778869629, "learning_rate": 1.7484684015263596e-05, "loss": 0.642, "step": 9905 }, { "epoch": 0.25, "grad_norm": 1.2257025241851807, "learning_rate": 1.7484133463253982e-05, "loss": 0.643, "step": 9906 }, { "epoch": 0.25, "grad_norm": 2.004868984222412, "learning_rate": 1.748358285966842e-05, "loss": 0.6432, "step": 9907 }, { "epoch": 0.25, "grad_norm": 5.99680233001709, "learning_rate": 1.7483032204510707e-05, "loss": 0.7429, "step": 9908 }, { "epoch": 0.25, "grad_norm": 2.157910108566284, "learning_rate": 1.748248149778464e-05, "loss": 0.8231, "step": 9909 }, { "epoch": 0.25, "grad_norm": 1.8221683502197266, "learning_rate": 1.7481930739494005e-05, "loss": 0.7582, "step": 9910 }, { "epoch": 0.25, "grad_norm": 3.000682830810547, "learning_rate": 1.748137992964261e-05, "loss": 0.4689, "step": 9911 }, { "epoch": 0.25, "grad_norm": 1.9332857131958008, "learning_rate": 1.7480829068234243e-05, "loss": 0.6499, "step": 9912 }, { "epoch": 0.25, "grad_norm": 1.4317948818206787, "learning_rate": 1.7480278155272702e-05, "loss": 0.6493, "step": 9913 }, { "epoch": 0.25, "grad_norm": 1.49766206741333, "learning_rate": 1.7479727190761786e-05, "loss": 0.4578, "step": 9914 }, { "epoch": 0.25, "grad_norm": 2.2873003482818604, "learning_rate": 1.7479176174705287e-05, "loss": 0.7472, "step": 9915 }, { "epoch": 0.25, "grad_norm": 3.249175548553467, "learning_rate": 1.7478625107107002e-05, "loss": 0.6784, "step": 9916 }, { "epoch": 0.25, "grad_norm": 2.9121639728546143, "learning_rate": 1.7478073987970736e-05, "loss": 0.7385, "step": 9917 }, { "epoch": 0.25, "grad_norm": 1.4123427867889404, "learning_rate": 1.747752281730028e-05, "loss": 0.5736, "step": 9918 }, { "epoch": 0.25, "grad_norm": 1.2340495586395264, "learning_rate": 1.7476971595099436e-05, "loss": 0.5679, "step": 9919 }, { "epoch": 0.25, "grad_norm": 2.7228705883026123, "learning_rate": 1.7476420321372e-05, "loss": 0.5338, "step": 9920 }, { "epoch": 0.25, "grad_norm": 1.6816760301589966, "learning_rate": 1.7475868996121773e-05, "loss": 0.6121, "step": 9921 }, { "epoch": 0.25, "grad_norm": 2.7617430686950684, "learning_rate": 1.7475317619352553e-05, "loss": 0.6947, "step": 9922 }, { "epoch": 0.25, "grad_norm": 2.391611337661743, "learning_rate": 1.747476619106814e-05, "loss": 0.5838, "step": 9923 }, { "epoch": 0.25, "grad_norm": 3.2068119049072266, "learning_rate": 1.7474214711272334e-05, "loss": 0.6162, "step": 9924 }, { "epoch": 0.25, "grad_norm": 2.2732861042022705, "learning_rate": 1.747366317996894e-05, "loss": 0.5707, "step": 9925 }, { "epoch": 0.25, "grad_norm": 2.2704715728759766, "learning_rate": 1.747311159716175e-05, "loss": 0.4653, "step": 9926 }, { "epoch": 0.25, "grad_norm": 1.140160083770752, "learning_rate": 1.7472559962854573e-05, "loss": 0.5641, "step": 9927 }, { "epoch": 0.25, "grad_norm": 2.5304393768310547, "learning_rate": 1.7472008277051206e-05, "loss": 0.4998, "step": 9928 }, { "epoch": 0.25, "grad_norm": 2.9109864234924316, "learning_rate": 1.7471456539755452e-05, "loss": 0.6588, "step": 9929 }, { "epoch": 0.25, "grad_norm": 5.337095260620117, "learning_rate": 1.7470904750971113e-05, "loss": 0.5672, "step": 9930 }, { "epoch": 0.25, "grad_norm": 1.4279265403747559, "learning_rate": 1.7470352910701995e-05, "loss": 0.5347, "step": 9931 }, { "epoch": 0.25, "grad_norm": 2.6209988594055176, "learning_rate": 1.7469801018951894e-05, "loss": 0.7045, "step": 9932 }, { "epoch": 0.25, "grad_norm": 2.41233229637146, "learning_rate": 1.7469249075724623e-05, "loss": 0.5798, "step": 9933 }, { "epoch": 0.25, "grad_norm": 5.127735614776611, "learning_rate": 1.7468697081023976e-05, "loss": 0.5738, "step": 9934 }, { "epoch": 0.25, "grad_norm": 1.4556249380111694, "learning_rate": 1.7468145034853762e-05, "loss": 0.653, "step": 9935 }, { "epoch": 0.25, "grad_norm": 2.550210952758789, "learning_rate": 1.7467592937217782e-05, "loss": 0.6262, "step": 9936 }, { "epoch": 0.25, "grad_norm": 2.351405620574951, "learning_rate": 1.7467040788119847e-05, "loss": 0.57, "step": 9937 }, { "epoch": 0.25, "grad_norm": 3.2509877681732178, "learning_rate": 1.7466488587563754e-05, "loss": 0.6075, "step": 9938 }, { "epoch": 0.25, "grad_norm": 2.1311113834381104, "learning_rate": 1.746593633555332e-05, "loss": 0.6117, "step": 9939 }, { "epoch": 0.25, "grad_norm": 3.0342776775360107, "learning_rate": 1.7465384032092334e-05, "loss": 0.831, "step": 9940 }, { "epoch": 0.25, "grad_norm": 2.1186773777008057, "learning_rate": 1.7464831677184617e-05, "loss": 0.6474, "step": 9941 }, { "epoch": 0.25, "grad_norm": 2.610185384750366, "learning_rate": 1.746427927083397e-05, "loss": 0.5723, "step": 9942 }, { "epoch": 0.25, "grad_norm": 3.974600076675415, "learning_rate": 1.7463726813044196e-05, "loss": 0.542, "step": 9943 }, { "epoch": 0.25, "grad_norm": 2.9317805767059326, "learning_rate": 1.7463174303819112e-05, "loss": 0.6168, "step": 9944 }, { "epoch": 0.25, "grad_norm": 1.797041416168213, "learning_rate": 1.7462621743162512e-05, "loss": 0.7676, "step": 9945 }, { "epoch": 0.25, "grad_norm": 1.495540976524353, "learning_rate": 1.7462069131078215e-05, "loss": 0.6573, "step": 9946 }, { "epoch": 0.25, "grad_norm": 1.1729527711868286, "learning_rate": 1.7461516467570026e-05, "loss": 0.5182, "step": 9947 }, { "epoch": 0.25, "grad_norm": 10.754365921020508, "learning_rate": 1.7460963752641754e-05, "loss": 0.5957, "step": 9948 }, { "epoch": 0.25, "grad_norm": 1.9397799968719482, "learning_rate": 1.746041098629721e-05, "loss": 0.6506, "step": 9949 }, { "epoch": 0.26, "grad_norm": 3.9158337116241455, "learning_rate": 1.7459858168540194e-05, "loss": 0.6322, "step": 9950 }, { "epoch": 0.26, "grad_norm": 2.4644246101379395, "learning_rate": 1.7459305299374526e-05, "loss": 0.6413, "step": 9951 }, { "epoch": 0.26, "grad_norm": 1.3682442903518677, "learning_rate": 1.745875237880401e-05, "loss": 0.614, "step": 9952 }, { "epoch": 0.26, "grad_norm": 4.599488258361816, "learning_rate": 1.745819940683246e-05, "loss": 0.7619, "step": 9953 }, { "epoch": 0.26, "grad_norm": 1.9956459999084473, "learning_rate": 1.7457646383463687e-05, "loss": 0.6803, "step": 9954 }, { "epoch": 0.26, "grad_norm": 1.1162993907928467, "learning_rate": 1.74570933087015e-05, "loss": 0.6119, "step": 9955 }, { "epoch": 0.26, "grad_norm": 3.70212984085083, "learning_rate": 1.745654018254971e-05, "loss": 0.5736, "step": 9956 }, { "epoch": 0.26, "grad_norm": 3.8595995903015137, "learning_rate": 1.745598700501213e-05, "loss": 0.5783, "step": 9957 }, { "epoch": 0.26, "grad_norm": 3.1913342475891113, "learning_rate": 1.7455433776092572e-05, "loss": 0.7587, "step": 9958 }, { "epoch": 0.26, "grad_norm": 1.3870292901992798, "learning_rate": 1.745488049579485e-05, "loss": 0.5949, "step": 9959 }, { "epoch": 0.26, "grad_norm": 1.1512086391448975, "learning_rate": 1.745432716412277e-05, "loss": 0.5263, "step": 9960 }, { "epoch": 0.26, "grad_norm": 2.7726328372955322, "learning_rate": 1.7453773781080158e-05, "loss": 0.7289, "step": 9961 }, { "epoch": 0.26, "grad_norm": 4.862994194030762, "learning_rate": 1.745322034667081e-05, "loss": 0.6596, "step": 9962 }, { "epoch": 0.26, "grad_norm": 1.82870352268219, "learning_rate": 1.7452666860898558e-05, "loss": 0.6789, "step": 9963 }, { "epoch": 0.26, "grad_norm": 2.1735260486602783, "learning_rate": 1.7452113323767208e-05, "loss": 0.5518, "step": 9964 }, { "epoch": 0.26, "grad_norm": 3.6769490242004395, "learning_rate": 1.745155973528057e-05, "loss": 0.7788, "step": 9965 }, { "epoch": 0.26, "grad_norm": 2.0733795166015625, "learning_rate": 1.745100609544247e-05, "loss": 0.6047, "step": 9966 }, { "epoch": 0.26, "grad_norm": 6.014875888824463, "learning_rate": 1.7450452404256713e-05, "loss": 0.7503, "step": 9967 }, { "epoch": 0.26, "grad_norm": 2.969193696975708, "learning_rate": 1.7449898661727117e-05, "loss": 0.649, "step": 9968 }, { "epoch": 0.26, "grad_norm": 2.5807764530181885, "learning_rate": 1.7449344867857502e-05, "loss": 0.6929, "step": 9969 }, { "epoch": 0.26, "grad_norm": 2.5598268508911133, "learning_rate": 1.7448791022651684e-05, "loss": 0.6287, "step": 9970 }, { "epoch": 0.26, "grad_norm": 1.8469921350479126, "learning_rate": 1.7448237126113476e-05, "loss": 0.7136, "step": 9971 }, { "epoch": 0.26, "grad_norm": 2.8560168743133545, "learning_rate": 1.74476831782467e-05, "loss": 0.6636, "step": 9972 }, { "epoch": 0.26, "grad_norm": 4.522398948669434, "learning_rate": 1.744712917905517e-05, "loss": 0.6431, "step": 9973 }, { "epoch": 0.26, "grad_norm": 1.756506323814392, "learning_rate": 1.7446575128542703e-05, "loss": 0.5543, "step": 9974 }, { "epoch": 0.26, "grad_norm": 3.5706894397735596, "learning_rate": 1.7446021026713118e-05, "loss": 0.4436, "step": 9975 }, { "epoch": 0.26, "grad_norm": 1.5831820964813232, "learning_rate": 1.7445466873570237e-05, "loss": 0.578, "step": 9976 }, { "epoch": 0.26, "grad_norm": 1.8891501426696777, "learning_rate": 1.744491266911787e-05, "loss": 0.4463, "step": 9977 }, { "epoch": 0.26, "grad_norm": 1.1934746503829956, "learning_rate": 1.7444358413359848e-05, "loss": 0.6633, "step": 9978 }, { "epoch": 0.26, "grad_norm": 1.736739158630371, "learning_rate": 1.7443804106299984e-05, "loss": 0.607, "step": 9979 }, { "epoch": 0.26, "grad_norm": 2.507986545562744, "learning_rate": 1.7443249747942097e-05, "loss": 0.6616, "step": 9980 }, { "epoch": 0.26, "grad_norm": 1.8848165273666382, "learning_rate": 1.744269533829001e-05, "loss": 0.6258, "step": 9981 }, { "epoch": 0.26, "grad_norm": 4.1684184074401855, "learning_rate": 1.744214087734754e-05, "loss": 0.7442, "step": 9982 }, { "epoch": 0.26, "grad_norm": 1.3902119398117065, "learning_rate": 1.7441586365118513e-05, "loss": 0.5526, "step": 9983 }, { "epoch": 0.26, "grad_norm": 2.3328194618225098, "learning_rate": 1.744103180160675e-05, "loss": 0.5516, "step": 9984 }, { "epoch": 0.26, "grad_norm": 4.886640548706055, "learning_rate": 1.7440477186816068e-05, "loss": 0.7338, "step": 9985 }, { "epoch": 0.26, "grad_norm": 1.9759138822555542, "learning_rate": 1.7439922520750295e-05, "loss": 0.6639, "step": 9986 }, { "epoch": 0.26, "grad_norm": 1.6423348188400269, "learning_rate": 1.743936780341325e-05, "loss": 0.6162, "step": 9987 }, { "epoch": 0.26, "grad_norm": 4.30892276763916, "learning_rate": 1.743881303480875e-05, "loss": 0.6685, "step": 9988 }, { "epoch": 0.26, "grad_norm": 1.4753329753875732, "learning_rate": 1.7438258214940628e-05, "loss": 0.6943, "step": 9989 }, { "epoch": 0.26, "grad_norm": 4.1193084716796875, "learning_rate": 1.7437703343812705e-05, "loss": 0.8003, "step": 9990 }, { "epoch": 0.26, "grad_norm": 1.5446542501449585, "learning_rate": 1.7437148421428802e-05, "loss": 0.6139, "step": 9991 }, { "epoch": 0.26, "grad_norm": 1.5915310382843018, "learning_rate": 1.7436593447792742e-05, "loss": 0.5669, "step": 9992 }, { "epoch": 0.26, "grad_norm": 1.986153244972229, "learning_rate": 1.7436038422908356e-05, "loss": 0.6403, "step": 9993 }, { "epoch": 0.26, "grad_norm": 3.0125155448913574, "learning_rate": 1.7435483346779466e-05, "loss": 0.6494, "step": 9994 }, { "epoch": 0.26, "grad_norm": 1.4849059581756592, "learning_rate": 1.743492821940989e-05, "loss": 0.628, "step": 9995 }, { "epoch": 0.26, "grad_norm": 2.102524757385254, "learning_rate": 1.7434373040803464e-05, "loss": 0.6909, "step": 9996 }, { "epoch": 0.26, "grad_norm": 1.9892293214797974, "learning_rate": 1.743381781096401e-05, "loss": 0.6089, "step": 9997 }, { "epoch": 0.26, "grad_norm": 1.9696030616760254, "learning_rate": 1.7433262529895352e-05, "loss": 0.5746, "step": 9998 }, { "epoch": 0.26, "grad_norm": 2.0521655082702637, "learning_rate": 1.743270719760132e-05, "loss": 0.7032, "step": 9999 }, { "epoch": 0.26, "grad_norm": 5.778636932373047, "learning_rate": 1.7432151814085742e-05, "loss": 0.6176, "step": 10000 }, { "epoch": 0.26, "grad_norm": 2.590379238128662, "learning_rate": 1.743159637935244e-05, "loss": 0.635, "step": 10001 }, { "epoch": 0.26, "grad_norm": 1.7260181903839111, "learning_rate": 1.7431040893405246e-05, "loss": 0.5869, "step": 10002 }, { "epoch": 0.26, "grad_norm": 2.5280284881591797, "learning_rate": 1.7430485356247984e-05, "loss": 0.7886, "step": 10003 }, { "epoch": 0.26, "grad_norm": 1.239031434059143, "learning_rate": 1.742992976788449e-05, "loss": 0.6016, "step": 10004 }, { "epoch": 0.26, "grad_norm": 2.2839195728302, "learning_rate": 1.7429374128318585e-05, "loss": 0.4882, "step": 10005 }, { "epoch": 0.26, "grad_norm": 1.9311527013778687, "learning_rate": 1.7428818437554104e-05, "loss": 0.7261, "step": 10006 }, { "epoch": 0.26, "grad_norm": 5.582841873168945, "learning_rate": 1.7428262695594867e-05, "loss": 0.7314, "step": 10007 }, { "epoch": 0.26, "grad_norm": 2.077343463897705, "learning_rate": 1.7427706902444717e-05, "loss": 0.6318, "step": 10008 }, { "epoch": 0.26, "grad_norm": 2.7736122608184814, "learning_rate": 1.7427151058107474e-05, "loss": 0.5899, "step": 10009 }, { "epoch": 0.26, "grad_norm": 1.8002499341964722, "learning_rate": 1.742659516258697e-05, "loss": 0.6666, "step": 10010 }, { "epoch": 0.26, "grad_norm": 5.006195545196533, "learning_rate": 1.7426039215887044e-05, "loss": 0.7062, "step": 10011 }, { "epoch": 0.26, "grad_norm": 2.7512874603271484, "learning_rate": 1.742548321801152e-05, "loss": 0.766, "step": 10012 }, { "epoch": 0.26, "grad_norm": 1.7522438764572144, "learning_rate": 1.742492716896423e-05, "loss": 0.6483, "step": 10013 }, { "epoch": 0.26, "grad_norm": 2.924441337585449, "learning_rate": 1.7424371068749005e-05, "loss": 0.5264, "step": 10014 }, { "epoch": 0.26, "grad_norm": 2.66499924659729, "learning_rate": 1.7423814917369678e-05, "loss": 0.5783, "step": 10015 }, { "epoch": 0.26, "grad_norm": 2.809025287628174, "learning_rate": 1.7423258714830086e-05, "loss": 0.5934, "step": 10016 }, { "epoch": 0.26, "grad_norm": 1.1487040519714355, "learning_rate": 1.7422702461134058e-05, "loss": 0.5075, "step": 10017 }, { "epoch": 0.26, "grad_norm": 1.6919937133789062, "learning_rate": 1.7422146156285425e-05, "loss": 0.4612, "step": 10018 }, { "epoch": 0.26, "grad_norm": 3.2656073570251465, "learning_rate": 1.7421589800288027e-05, "loss": 0.5965, "step": 10019 }, { "epoch": 0.26, "grad_norm": 2.3258869647979736, "learning_rate": 1.7421033393145692e-05, "loss": 0.534, "step": 10020 }, { "epoch": 0.26, "grad_norm": 2.1335222721099854, "learning_rate": 1.742047693486226e-05, "loss": 0.7313, "step": 10021 }, { "epoch": 0.26, "grad_norm": 2.195535182952881, "learning_rate": 1.741992042544156e-05, "loss": 0.5974, "step": 10022 }, { "epoch": 0.26, "grad_norm": 3.293560028076172, "learning_rate": 1.7419363864887436e-05, "loss": 0.6156, "step": 10023 }, { "epoch": 0.26, "grad_norm": 5.504318714141846, "learning_rate": 1.741880725320371e-05, "loss": 0.7189, "step": 10024 }, { "epoch": 0.26, "grad_norm": 1.6229654550552368, "learning_rate": 1.741825059039423e-05, "loss": 0.6508, "step": 10025 }, { "epoch": 0.26, "grad_norm": 3.762342929840088, "learning_rate": 1.7417693876462828e-05, "loss": 0.7839, "step": 10026 }, { "epoch": 0.26, "grad_norm": 2.1699013710021973, "learning_rate": 1.7417137111413338e-05, "loss": 0.6614, "step": 10027 }, { "epoch": 0.26, "grad_norm": 2.6639134883880615, "learning_rate": 1.74165802952496e-05, "loss": 0.6888, "step": 10028 }, { "epoch": 0.26, "grad_norm": 2.205469846725464, "learning_rate": 1.7416023427975452e-05, "loss": 0.5965, "step": 10029 }, { "epoch": 0.26, "grad_norm": 3.5807409286499023, "learning_rate": 1.7415466509594726e-05, "loss": 0.6876, "step": 10030 }, { "epoch": 0.26, "grad_norm": 2.1913976669311523, "learning_rate": 1.7414909540111263e-05, "loss": 0.7256, "step": 10031 }, { "epoch": 0.26, "grad_norm": 1.8423322439193726, "learning_rate": 1.7414352519528904e-05, "loss": 0.5462, "step": 10032 }, { "epoch": 0.26, "grad_norm": 2.393606185913086, "learning_rate": 1.741379544785149e-05, "loss": 0.4946, "step": 10033 }, { "epoch": 0.26, "grad_norm": 1.7657864093780518, "learning_rate": 1.7413238325082848e-05, "loss": 0.6962, "step": 10034 }, { "epoch": 0.26, "grad_norm": 3.86562180519104, "learning_rate": 1.7412681151226828e-05, "loss": 0.6441, "step": 10035 }, { "epoch": 0.26, "grad_norm": 2.062845468521118, "learning_rate": 1.7412123926287267e-05, "loss": 0.6843, "step": 10036 }, { "epoch": 0.26, "grad_norm": 3.6449532508850098, "learning_rate": 1.7411566650268005e-05, "loss": 0.4061, "step": 10037 }, { "epoch": 0.26, "grad_norm": 4.4690961837768555, "learning_rate": 1.741100932317288e-05, "loss": 0.6913, "step": 10038 }, { "epoch": 0.26, "grad_norm": 2.0342206954956055, "learning_rate": 1.7410451945005736e-05, "loss": 0.7252, "step": 10039 }, { "epoch": 0.26, "grad_norm": 2.0400447845458984, "learning_rate": 1.7409894515770414e-05, "loss": 0.567, "step": 10040 }, { "epoch": 0.26, "grad_norm": 1.9737262725830078, "learning_rate": 1.7409337035470756e-05, "loss": 0.5377, "step": 10041 }, { "epoch": 0.26, "grad_norm": 2.892583131790161, "learning_rate": 1.74087795041106e-05, "loss": 0.5932, "step": 10042 }, { "epoch": 0.26, "grad_norm": 1.5157983303070068, "learning_rate": 1.740822192169379e-05, "loss": 0.4792, "step": 10043 }, { "epoch": 0.26, "grad_norm": 1.6259324550628662, "learning_rate": 1.7407664288224168e-05, "loss": 0.6007, "step": 10044 }, { "epoch": 0.26, "grad_norm": 4.945371150970459, "learning_rate": 1.7407106603705577e-05, "loss": 0.5259, "step": 10045 }, { "epoch": 0.26, "grad_norm": 1.6461677551269531, "learning_rate": 1.740654886814186e-05, "loss": 0.6047, "step": 10046 }, { "epoch": 0.26, "grad_norm": 2.4601473808288574, "learning_rate": 1.7405991081536866e-05, "loss": 0.738, "step": 10047 }, { "epoch": 0.26, "grad_norm": 2.2377140522003174, "learning_rate": 1.740543324389443e-05, "loss": 0.7333, "step": 10048 }, { "epoch": 0.26, "grad_norm": 1.7473663091659546, "learning_rate": 1.7404875355218406e-05, "loss": 0.6201, "step": 10049 }, { "epoch": 0.26, "grad_norm": 1.871813178062439, "learning_rate": 1.7404317415512627e-05, "loss": 0.7047, "step": 10050 }, { "epoch": 0.26, "grad_norm": 1.7144724130630493, "learning_rate": 1.740375942478095e-05, "loss": 0.3891, "step": 10051 }, { "epoch": 0.26, "grad_norm": 2.694654703140259, "learning_rate": 1.740320138302721e-05, "loss": 0.7593, "step": 10052 }, { "epoch": 0.26, "grad_norm": 2.5884549617767334, "learning_rate": 1.740264329025526e-05, "loss": 0.4906, "step": 10053 }, { "epoch": 0.26, "grad_norm": 5.370244979858398, "learning_rate": 1.740208514646894e-05, "loss": 0.6195, "step": 10054 }, { "epoch": 0.26, "grad_norm": 2.5098159313201904, "learning_rate": 1.74015269516721e-05, "loss": 0.5947, "step": 10055 }, { "epoch": 0.26, "grad_norm": 2.053490161895752, "learning_rate": 1.7400968705868592e-05, "loss": 0.6473, "step": 10056 }, { "epoch": 0.26, "grad_norm": 1.815464973449707, "learning_rate": 1.7400410409062254e-05, "loss": 0.6308, "step": 10057 }, { "epoch": 0.26, "grad_norm": 1.8330867290496826, "learning_rate": 1.7399852061256936e-05, "loss": 0.6582, "step": 10058 }, { "epoch": 0.26, "grad_norm": 2.499525308609009, "learning_rate": 1.7399293662456488e-05, "loss": 0.5722, "step": 10059 }, { "epoch": 0.26, "grad_norm": 1.6301190853118896, "learning_rate": 1.7398735212664758e-05, "loss": 0.7078, "step": 10060 }, { "epoch": 0.26, "grad_norm": 1.4837092161178589, "learning_rate": 1.739817671188559e-05, "loss": 0.4646, "step": 10061 }, { "epoch": 0.26, "grad_norm": 2.074052333831787, "learning_rate": 1.7397618160122837e-05, "loss": 0.7538, "step": 10062 }, { "epoch": 0.26, "grad_norm": 2.9606473445892334, "learning_rate": 1.7397059557380345e-05, "loss": 0.7339, "step": 10063 }, { "epoch": 0.26, "grad_norm": 2.821077346801758, "learning_rate": 1.739650090366197e-05, "loss": 0.6459, "step": 10064 }, { "epoch": 0.26, "grad_norm": 2.4268081188201904, "learning_rate": 1.7395942198971555e-05, "loss": 0.7802, "step": 10065 }, { "epoch": 0.26, "grad_norm": 2.644041061401367, "learning_rate": 1.7395383443312955e-05, "loss": 0.6052, "step": 10066 }, { "epoch": 0.26, "grad_norm": 7.708565711975098, "learning_rate": 1.739482463669002e-05, "loss": 0.6825, "step": 10067 }, { "epoch": 0.26, "grad_norm": 1.818186640739441, "learning_rate": 1.7394265779106595e-05, "loss": 0.5495, "step": 10068 }, { "epoch": 0.26, "grad_norm": 2.139979362487793, "learning_rate": 1.7393706870566537e-05, "loss": 0.5073, "step": 10069 }, { "epoch": 0.26, "grad_norm": 2.0476491451263428, "learning_rate": 1.7393147911073696e-05, "loss": 0.4728, "step": 10070 }, { "epoch": 0.26, "grad_norm": 2.9498398303985596, "learning_rate": 1.7392588900631926e-05, "loss": 0.7722, "step": 10071 }, { "epoch": 0.26, "grad_norm": 6.461981773376465, "learning_rate": 1.739202983924508e-05, "loss": 0.834, "step": 10072 }, { "epoch": 0.26, "grad_norm": 5.443655490875244, "learning_rate": 1.7391470726917003e-05, "loss": 0.7007, "step": 10073 }, { "epoch": 0.26, "grad_norm": 1.2261614799499512, "learning_rate": 1.739091156365156e-05, "loss": 0.4311, "step": 10074 }, { "epoch": 0.26, "grad_norm": 1.2336629629135132, "learning_rate": 1.739035234945259e-05, "loss": 0.5904, "step": 10075 }, { "epoch": 0.26, "grad_norm": 2.2836313247680664, "learning_rate": 1.738979308432396e-05, "loss": 0.7156, "step": 10076 }, { "epoch": 0.26, "grad_norm": 6.128686904907227, "learning_rate": 1.7389233768269518e-05, "loss": 0.649, "step": 10077 }, { "epoch": 0.26, "grad_norm": 3.168813705444336, "learning_rate": 1.738867440129312e-05, "loss": 0.711, "step": 10078 }, { "epoch": 0.26, "grad_norm": 3.5213842391967773, "learning_rate": 1.738811498339862e-05, "loss": 0.6169, "step": 10079 }, { "epoch": 0.26, "grad_norm": 2.0131595134735107, "learning_rate": 1.7387555514589872e-05, "loss": 0.5104, "step": 10080 }, { "epoch": 0.26, "grad_norm": 1.6621172428131104, "learning_rate": 1.7386995994870735e-05, "loss": 0.5447, "step": 10081 }, { "epoch": 0.26, "grad_norm": 5.109004020690918, "learning_rate": 1.738643642424506e-05, "loss": 0.5944, "step": 10082 }, { "epoch": 0.26, "grad_norm": 1.4723730087280273, "learning_rate": 1.7385876802716705e-05, "loss": 0.7262, "step": 10083 }, { "epoch": 0.26, "grad_norm": 4.187794208526611, "learning_rate": 1.7385317130289528e-05, "loss": 0.8746, "step": 10084 }, { "epoch": 0.26, "grad_norm": 1.9598079919815063, "learning_rate": 1.7384757406967387e-05, "loss": 0.5617, "step": 10085 }, { "epoch": 0.26, "grad_norm": 3.805753707885742, "learning_rate": 1.7384197632754137e-05, "loss": 0.717, "step": 10086 }, { "epoch": 0.26, "grad_norm": 2.9076550006866455, "learning_rate": 1.7383637807653635e-05, "loss": 0.7562, "step": 10087 }, { "epoch": 0.26, "grad_norm": 2.2335121631622314, "learning_rate": 1.738307793166974e-05, "loss": 0.8469, "step": 10088 }, { "epoch": 0.26, "grad_norm": 2.201817750930786, "learning_rate": 1.7382518004806313e-05, "loss": 0.7627, "step": 10089 }, { "epoch": 0.26, "grad_norm": 3.1313414573669434, "learning_rate": 1.7381958027067205e-05, "loss": 0.8033, "step": 10090 }, { "epoch": 0.26, "grad_norm": 2.843266010284424, "learning_rate": 1.7381397998456283e-05, "loss": 0.8134, "step": 10091 }, { "epoch": 0.26, "grad_norm": 3.148411750793457, "learning_rate": 1.7380837918977404e-05, "loss": 0.7014, "step": 10092 }, { "epoch": 0.26, "grad_norm": 1.6858148574829102, "learning_rate": 1.738027778863443e-05, "loss": 0.5853, "step": 10093 }, { "epoch": 0.26, "grad_norm": 1.4759225845336914, "learning_rate": 1.7379717607431215e-05, "loss": 0.6533, "step": 10094 }, { "epoch": 0.26, "grad_norm": 1.511685848236084, "learning_rate": 1.7379157375371623e-05, "loss": 0.7321, "step": 10095 }, { "epoch": 0.26, "grad_norm": 1.5611088275909424, "learning_rate": 1.7378597092459513e-05, "loss": 0.6847, "step": 10096 }, { "epoch": 0.26, "grad_norm": 1.5042551755905151, "learning_rate": 1.7378036758698748e-05, "loss": 0.4983, "step": 10097 }, { "epoch": 0.26, "grad_norm": 1.4387280941009521, "learning_rate": 1.737747637409319e-05, "loss": 0.5719, "step": 10098 }, { "epoch": 0.26, "grad_norm": 4.389552593231201, "learning_rate": 1.73769159386467e-05, "loss": 0.665, "step": 10099 }, { "epoch": 0.26, "grad_norm": 1.6655521392822266, "learning_rate": 1.7376355452363138e-05, "loss": 0.6441, "step": 10100 }, { "epoch": 0.26, "grad_norm": 2.006105661392212, "learning_rate": 1.7375794915246368e-05, "loss": 0.7728, "step": 10101 }, { "epoch": 0.26, "grad_norm": 2.4350664615631104, "learning_rate": 1.7375234327300255e-05, "loss": 0.622, "step": 10102 }, { "epoch": 0.26, "grad_norm": 2.9274163246154785, "learning_rate": 1.7374673688528662e-05, "loss": 0.8026, "step": 10103 }, { "epoch": 0.26, "grad_norm": 2.081226110458374, "learning_rate": 1.737411299893545e-05, "loss": 0.6264, "step": 10104 }, { "epoch": 0.26, "grad_norm": 5.049841403961182, "learning_rate": 1.7373552258524486e-05, "loss": 0.7552, "step": 10105 }, { "epoch": 0.26, "grad_norm": 2.0170669555664062, "learning_rate": 1.737299146729963e-05, "loss": 0.53, "step": 10106 }, { "epoch": 0.26, "grad_norm": 1.0976063013076782, "learning_rate": 1.737243062526475e-05, "loss": 0.629, "step": 10107 }, { "epoch": 0.26, "grad_norm": 1.406195878982544, "learning_rate": 1.7371869732423708e-05, "loss": 0.6662, "step": 10108 }, { "epoch": 0.26, "grad_norm": 4.351249694824219, "learning_rate": 1.7371308788780373e-05, "loss": 0.6189, "step": 10109 }, { "epoch": 0.26, "grad_norm": 1.5760215520858765, "learning_rate": 1.737074779433861e-05, "loss": 0.5576, "step": 10110 }, { "epoch": 0.26, "grad_norm": 6.99795389175415, "learning_rate": 1.737018674910228e-05, "loss": 0.6429, "step": 10111 }, { "epoch": 0.26, "grad_norm": 1.4392733573913574, "learning_rate": 1.736962565307526e-05, "loss": 0.6036, "step": 10112 }, { "epoch": 0.26, "grad_norm": 4.231525421142578, "learning_rate": 1.7369064506261405e-05, "loss": 0.6125, "step": 10113 }, { "epoch": 0.26, "grad_norm": 1.955366611480713, "learning_rate": 1.736850330866459e-05, "loss": 0.6427, "step": 10114 }, { "epoch": 0.26, "grad_norm": 5.283190727233887, "learning_rate": 1.7367942060288676e-05, "loss": 0.628, "step": 10115 }, { "epoch": 0.26, "grad_norm": 2.7455334663391113, "learning_rate": 1.7367380761137536e-05, "loss": 0.7875, "step": 10116 }, { "epoch": 0.26, "grad_norm": 1.9434659481048584, "learning_rate": 1.7366819411215034e-05, "loss": 0.6419, "step": 10117 }, { "epoch": 0.26, "grad_norm": 2.4526124000549316, "learning_rate": 1.7366258010525047e-05, "loss": 0.6394, "step": 10118 }, { "epoch": 0.26, "grad_norm": 2.420668601989746, "learning_rate": 1.7365696559071433e-05, "loss": 0.7331, "step": 10119 }, { "epoch": 0.26, "grad_norm": 4.948244571685791, "learning_rate": 1.7365135056858066e-05, "loss": 0.6189, "step": 10120 }, { "epoch": 0.26, "grad_norm": 1.2206610441207886, "learning_rate": 1.7364573503888817e-05, "loss": 0.447, "step": 10121 }, { "epoch": 0.26, "grad_norm": 2.5376391410827637, "learning_rate": 1.7364011900167553e-05, "loss": 0.6635, "step": 10122 }, { "epoch": 0.26, "grad_norm": 2.6323657035827637, "learning_rate": 1.7363450245698146e-05, "loss": 0.7723, "step": 10123 }, { "epoch": 0.26, "grad_norm": 3.5725200176239014, "learning_rate": 1.7362888540484466e-05, "loss": 0.9837, "step": 10124 }, { "epoch": 0.26, "grad_norm": 2.4141201972961426, "learning_rate": 1.7362326784530385e-05, "loss": 0.6235, "step": 10125 }, { "epoch": 0.26, "grad_norm": 2.219419479370117, "learning_rate": 1.7361764977839773e-05, "loss": 0.6799, "step": 10126 }, { "epoch": 0.26, "grad_norm": 1.6786702871322632, "learning_rate": 1.73612031204165e-05, "loss": 0.6026, "step": 10127 }, { "epoch": 0.26, "grad_norm": 2.1693813800811768, "learning_rate": 1.736064121226444e-05, "loss": 0.8163, "step": 10128 }, { "epoch": 0.26, "grad_norm": 3.5997731685638428, "learning_rate": 1.7360079253387466e-05, "loss": 0.672, "step": 10129 }, { "epoch": 0.26, "grad_norm": 1.995333194732666, "learning_rate": 1.7359517243789454e-05, "loss": 0.6285, "step": 10130 }, { "epoch": 0.26, "grad_norm": 1.431782603263855, "learning_rate": 1.7358955183474266e-05, "loss": 0.5006, "step": 10131 }, { "epoch": 0.26, "grad_norm": 2.4336674213409424, "learning_rate": 1.7358393072445787e-05, "loss": 0.5741, "step": 10132 }, { "epoch": 0.26, "grad_norm": 2.105346441268921, "learning_rate": 1.7357830910707884e-05, "loss": 0.7841, "step": 10133 }, { "epoch": 0.26, "grad_norm": 1.9370735883712769, "learning_rate": 1.735726869826443e-05, "loss": 0.7016, "step": 10134 }, { "epoch": 0.26, "grad_norm": 1.920952320098877, "learning_rate": 1.735670643511931e-05, "loss": 0.6117, "step": 10135 }, { "epoch": 0.26, "grad_norm": 1.369114637374878, "learning_rate": 1.7356144121276387e-05, "loss": 0.5926, "step": 10136 }, { "epoch": 0.26, "grad_norm": 1.259385347366333, "learning_rate": 1.735558175673954e-05, "loss": 0.5599, "step": 10137 }, { "epoch": 0.26, "grad_norm": 4.357100009918213, "learning_rate": 1.7355019341512643e-05, "loss": 0.7657, "step": 10138 }, { "epoch": 0.26, "grad_norm": 1.7307485342025757, "learning_rate": 1.7354456875599578e-05, "loss": 0.6651, "step": 10139 }, { "epoch": 0.26, "grad_norm": 1.6528569459915161, "learning_rate": 1.7353894359004212e-05, "loss": 0.6339, "step": 10140 }, { "epoch": 0.26, "grad_norm": 1.2963056564331055, "learning_rate": 1.735333179173043e-05, "loss": 0.4347, "step": 10141 }, { "epoch": 0.26, "grad_norm": 4.476770401000977, "learning_rate": 1.7352769173782104e-05, "loss": 0.6534, "step": 10142 }, { "epoch": 0.26, "grad_norm": 3.2952752113342285, "learning_rate": 1.735220650516311e-05, "loss": 0.7447, "step": 10143 }, { "epoch": 0.26, "grad_norm": 2.145141839981079, "learning_rate": 1.735164378587733e-05, "loss": 0.7079, "step": 10144 }, { "epoch": 0.26, "grad_norm": 2.972435474395752, "learning_rate": 1.735108101592864e-05, "loss": 0.4931, "step": 10145 }, { "epoch": 0.26, "grad_norm": 1.6339246034622192, "learning_rate": 1.7350518195320917e-05, "loss": 0.5494, "step": 10146 }, { "epoch": 0.26, "grad_norm": 6.28958797454834, "learning_rate": 1.734995532405804e-05, "loss": 0.6871, "step": 10147 }, { "epoch": 0.26, "grad_norm": 1.599104881286621, "learning_rate": 1.7349392402143893e-05, "loss": 0.4631, "step": 10148 }, { "epoch": 0.26, "grad_norm": 1.9597042798995972, "learning_rate": 1.7348829429582346e-05, "loss": 0.4992, "step": 10149 }, { "epoch": 0.26, "grad_norm": 2.394493341445923, "learning_rate": 1.7348266406377285e-05, "loss": 0.6627, "step": 10150 }, { "epoch": 0.26, "grad_norm": 2.9686081409454346, "learning_rate": 1.734770333253259e-05, "loss": 0.6177, "step": 10151 }, { "epoch": 0.26, "grad_norm": 2.5411505699157715, "learning_rate": 1.7347140208052137e-05, "loss": 0.7382, "step": 10152 }, { "epoch": 0.26, "grad_norm": 1.8392826318740845, "learning_rate": 1.734657703293981e-05, "loss": 0.536, "step": 10153 }, { "epoch": 0.26, "grad_norm": 2.4263975620269775, "learning_rate": 1.7346013807199493e-05, "loss": 0.7189, "step": 10154 }, { "epoch": 0.26, "grad_norm": 2.576688528060913, "learning_rate": 1.7345450530835065e-05, "loss": 0.6238, "step": 10155 }, { "epoch": 0.26, "grad_norm": 1.909888505935669, "learning_rate": 1.7344887203850403e-05, "loss": 0.5465, "step": 10156 }, { "epoch": 0.26, "grad_norm": 1.5030395984649658, "learning_rate": 1.7344323826249394e-05, "loss": 0.7233, "step": 10157 }, { "epoch": 0.26, "grad_norm": 8.861769676208496, "learning_rate": 1.734376039803592e-05, "loss": 0.7279, "step": 10158 }, { "epoch": 0.26, "grad_norm": 1.5915939807891846, "learning_rate": 1.734319691921386e-05, "loss": 0.6688, "step": 10159 }, { "epoch": 0.26, "grad_norm": 3.833913564682007, "learning_rate": 1.7342633389787102e-05, "loss": 0.6738, "step": 10160 }, { "epoch": 0.26, "grad_norm": 4.0991997718811035, "learning_rate": 1.734206980975953e-05, "loss": 0.6775, "step": 10161 }, { "epoch": 0.26, "grad_norm": 1.4033360481262207, "learning_rate": 1.7341506179135023e-05, "loss": 0.6033, "step": 10162 }, { "epoch": 0.26, "grad_norm": 1.5696570873260498, "learning_rate": 1.734094249791747e-05, "loss": 0.5461, "step": 10163 }, { "epoch": 0.26, "grad_norm": 1.9212511777877808, "learning_rate": 1.734037876611075e-05, "loss": 0.6013, "step": 10164 }, { "epoch": 0.26, "grad_norm": 4.385760307312012, "learning_rate": 1.7339814983718753e-05, "loss": 0.7894, "step": 10165 }, { "epoch": 0.26, "grad_norm": 1.862751841545105, "learning_rate": 1.7339251150745364e-05, "loss": 0.6628, "step": 10166 }, { "epoch": 0.26, "grad_norm": 1.231728434562683, "learning_rate": 1.7338687267194468e-05, "loss": 0.6812, "step": 10167 }, { "epoch": 0.26, "grad_norm": 1.7107231616973877, "learning_rate": 1.7338123333069945e-05, "loss": 0.5593, "step": 10168 }, { "epoch": 0.26, "grad_norm": 4.83525276184082, "learning_rate": 1.733755934837569e-05, "loss": 0.8209, "step": 10169 }, { "epoch": 0.26, "grad_norm": 1.3203836679458618, "learning_rate": 1.7336995313115585e-05, "loss": 0.4898, "step": 10170 }, { "epoch": 0.26, "grad_norm": 2.2658612728118896, "learning_rate": 1.7336431227293516e-05, "loss": 0.6892, "step": 10171 }, { "epoch": 0.26, "grad_norm": 3.1677517890930176, "learning_rate": 1.7335867090913375e-05, "loss": 0.5861, "step": 10172 }, { "epoch": 0.26, "grad_norm": 3.821669101715088, "learning_rate": 1.7335302903979045e-05, "loss": 0.5408, "step": 10173 }, { "epoch": 0.26, "grad_norm": 1.7560372352600098, "learning_rate": 1.7334738666494416e-05, "loss": 0.6524, "step": 10174 }, { "epoch": 0.26, "grad_norm": 1.7388572692871094, "learning_rate": 1.7334174378463377e-05, "loss": 0.3854, "step": 10175 }, { "epoch": 0.26, "grad_norm": 1.1884832382202148, "learning_rate": 1.7333610039889817e-05, "loss": 0.4217, "step": 10176 }, { "epoch": 0.26, "grad_norm": 1.7355083227157593, "learning_rate": 1.7333045650777624e-05, "loss": 0.6993, "step": 10177 }, { "epoch": 0.26, "grad_norm": 2.3698863983154297, "learning_rate": 1.733248121113069e-05, "loss": 0.5786, "step": 10178 }, { "epoch": 0.26, "grad_norm": 2.9275460243225098, "learning_rate": 1.7331916720952896e-05, "loss": 0.5706, "step": 10179 }, { "epoch": 0.26, "grad_norm": 2.343144416809082, "learning_rate": 1.733135218024814e-05, "loss": 0.6922, "step": 10180 }, { "epoch": 0.26, "grad_norm": 1.1709399223327637, "learning_rate": 1.733078758902031e-05, "loss": 0.7436, "step": 10181 }, { "epoch": 0.26, "grad_norm": 2.3415281772613525, "learning_rate": 1.7330222947273302e-05, "loss": 0.6356, "step": 10182 }, { "epoch": 0.26, "grad_norm": 1.9711722135543823, "learning_rate": 1.7329658255010998e-05, "loss": 0.5641, "step": 10183 }, { "epoch": 0.26, "grad_norm": 3.874161958694458, "learning_rate": 1.7329093512237297e-05, "loss": 0.79, "step": 10184 }, { "epoch": 0.26, "grad_norm": 1.7272558212280273, "learning_rate": 1.732852871895609e-05, "loss": 0.5932, "step": 10185 }, { "epoch": 0.26, "grad_norm": 3.4521307945251465, "learning_rate": 1.732796387517126e-05, "loss": 0.7625, "step": 10186 }, { "epoch": 0.26, "grad_norm": 2.1425163745880127, "learning_rate": 1.7327398980886715e-05, "loss": 0.5374, "step": 10187 }, { "epoch": 0.26, "grad_norm": 1.9913228750228882, "learning_rate": 1.7326834036106336e-05, "loss": 0.4899, "step": 10188 }, { "epoch": 0.26, "grad_norm": 2.4862563610076904, "learning_rate": 1.7326269040834017e-05, "loss": 0.6732, "step": 10189 }, { "epoch": 0.26, "grad_norm": 1.8630378246307373, "learning_rate": 1.732570399507366e-05, "loss": 0.6376, "step": 10190 }, { "epoch": 0.26, "grad_norm": 2.9669406414031982, "learning_rate": 1.732513889882915e-05, "loss": 0.6385, "step": 10191 }, { "epoch": 0.26, "grad_norm": 2.2110002040863037, "learning_rate": 1.732457375210439e-05, "loss": 0.5316, "step": 10192 }, { "epoch": 0.26, "grad_norm": 1.583518385887146, "learning_rate": 1.7324008554903265e-05, "loss": 0.6227, "step": 10193 }, { "epoch": 0.26, "grad_norm": 2.5194289684295654, "learning_rate": 1.7323443307229674e-05, "loss": 0.5896, "step": 10194 }, { "epoch": 0.26, "grad_norm": 2.2527737617492676, "learning_rate": 1.7322878009087515e-05, "loss": 0.6701, "step": 10195 }, { "epoch": 0.26, "grad_norm": 2.286233425140381, "learning_rate": 1.7322312660480678e-05, "loss": 0.7125, "step": 10196 }, { "epoch": 0.26, "grad_norm": 3.154726266860962, "learning_rate": 1.7321747261413066e-05, "loss": 0.5008, "step": 10197 }, { "epoch": 0.26, "grad_norm": 3.1191506385803223, "learning_rate": 1.732118181188857e-05, "loss": 0.718, "step": 10198 }, { "epoch": 0.26, "grad_norm": 4.274800777435303, "learning_rate": 1.732061631191109e-05, "loss": 0.6752, "step": 10199 }, { "epoch": 0.26, "grad_norm": 4.824596881866455, "learning_rate": 1.732005076148452e-05, "loss": 0.6491, "step": 10200 }, { "epoch": 0.26, "grad_norm": 3.257488250732422, "learning_rate": 1.7319485160612763e-05, "loss": 0.4755, "step": 10201 }, { "epoch": 0.26, "grad_norm": 7.6511993408203125, "learning_rate": 1.7318919509299705e-05, "loss": 0.7264, "step": 10202 }, { "epoch": 0.26, "grad_norm": 5.7909979820251465, "learning_rate": 1.7318353807549257e-05, "loss": 0.6801, "step": 10203 }, { "epoch": 0.26, "grad_norm": 2.261113405227661, "learning_rate": 1.7317788055365312e-05, "loss": 0.7067, "step": 10204 }, { "epoch": 0.26, "grad_norm": 1.769057035446167, "learning_rate": 1.731722225275177e-05, "loss": 0.682, "step": 10205 }, { "epoch": 0.26, "grad_norm": 2.223801612854004, "learning_rate": 1.731665639971253e-05, "loss": 0.5571, "step": 10206 }, { "epoch": 0.26, "grad_norm": 1.8604322671890259, "learning_rate": 1.731609049625149e-05, "loss": 0.6159, "step": 10207 }, { "epoch": 0.26, "grad_norm": 4.080758094787598, "learning_rate": 1.731552454237255e-05, "loss": 0.6156, "step": 10208 }, { "epoch": 0.26, "grad_norm": 1.3449586629867554, "learning_rate": 1.731495853807961e-05, "loss": 0.7112, "step": 10209 }, { "epoch": 0.26, "grad_norm": 2.509826421737671, "learning_rate": 1.7314392483376573e-05, "loss": 0.6906, "step": 10210 }, { "epoch": 0.26, "grad_norm": 1.0163793563842773, "learning_rate": 1.7313826378267338e-05, "loss": 0.5366, "step": 10211 }, { "epoch": 0.26, "grad_norm": 3.8800573348999023, "learning_rate": 1.7313260222755804e-05, "loss": 0.6621, "step": 10212 }, { "epoch": 0.26, "grad_norm": 5.563163757324219, "learning_rate": 1.7312694016845877e-05, "loss": 0.786, "step": 10213 }, { "epoch": 0.26, "grad_norm": 4.347696781158447, "learning_rate": 1.7312127760541458e-05, "loss": 0.6482, "step": 10214 }, { "epoch": 0.26, "grad_norm": 2.9542667865753174, "learning_rate": 1.731156145384645e-05, "loss": 0.6229, "step": 10215 }, { "epoch": 0.26, "grad_norm": 1.3014228343963623, "learning_rate": 1.7310995096764753e-05, "loss": 0.5807, "step": 10216 }, { "epoch": 0.26, "grad_norm": 3.193472146987915, "learning_rate": 1.731042868930027e-05, "loss": 0.4809, "step": 10217 }, { "epoch": 0.26, "grad_norm": 1.2259036302566528, "learning_rate": 1.7309862231456907e-05, "loss": 0.5165, "step": 10218 }, { "epoch": 0.26, "grad_norm": 1.9944959878921509, "learning_rate": 1.730929572323856e-05, "loss": 0.6397, "step": 10219 }, { "epoch": 0.26, "grad_norm": 3.989450216293335, "learning_rate": 1.7308729164649146e-05, "loss": 0.5599, "step": 10220 }, { "epoch": 0.26, "grad_norm": 1.678340196609497, "learning_rate": 1.730816255569256e-05, "loss": 0.5094, "step": 10221 }, { "epoch": 0.26, "grad_norm": 3.6149158477783203, "learning_rate": 1.7307595896372708e-05, "loss": 0.7209, "step": 10222 }, { "epoch": 0.26, "grad_norm": 3.2734341621398926, "learning_rate": 1.7307029186693497e-05, "loss": 0.6277, "step": 10223 }, { "epoch": 0.26, "grad_norm": 3.943052053451538, "learning_rate": 1.730646242665883e-05, "loss": 0.5584, "step": 10224 }, { "epoch": 0.26, "grad_norm": 2.9052417278289795, "learning_rate": 1.730589561627262e-05, "loss": 0.5703, "step": 10225 }, { "epoch": 0.26, "grad_norm": 2.711911201477051, "learning_rate": 1.7305328755538764e-05, "loss": 0.7801, "step": 10226 }, { "epoch": 0.26, "grad_norm": 1.2269035577774048, "learning_rate": 1.730476184446117e-05, "loss": 0.6086, "step": 10227 }, { "epoch": 0.26, "grad_norm": 2.0451998710632324, "learning_rate": 1.7304194883043746e-05, "loss": 0.6135, "step": 10228 }, { "epoch": 0.26, "grad_norm": 3.590580463409424, "learning_rate": 1.7303627871290404e-05, "loss": 0.5573, "step": 10229 }, { "epoch": 0.26, "grad_norm": 1.5643284320831299, "learning_rate": 1.7303060809205046e-05, "loss": 0.598, "step": 10230 }, { "epoch": 0.26, "grad_norm": 2.406212568283081, "learning_rate": 1.730249369679158e-05, "loss": 0.6482, "step": 10231 }, { "epoch": 0.26, "grad_norm": 4.41058349609375, "learning_rate": 1.7301926534053916e-05, "loss": 0.569, "step": 10232 }, { "epoch": 0.26, "grad_norm": 2.9271185398101807, "learning_rate": 1.730135932099596e-05, "loss": 0.7464, "step": 10233 }, { "epoch": 0.26, "grad_norm": 1.2154897451400757, "learning_rate": 1.7300792057621626e-05, "loss": 0.6236, "step": 10234 }, { "epoch": 0.26, "grad_norm": 3.485732316970825, "learning_rate": 1.730022474393482e-05, "loss": 0.7122, "step": 10235 }, { "epoch": 0.26, "grad_norm": 3.1397838592529297, "learning_rate": 1.7299657379939446e-05, "loss": 0.7119, "step": 10236 }, { "epoch": 0.26, "grad_norm": 3.3031773567199707, "learning_rate": 1.7299089965639425e-05, "loss": 0.6853, "step": 10237 }, { "epoch": 0.26, "grad_norm": 1.9287554025650024, "learning_rate": 1.7298522501038658e-05, "loss": 0.5857, "step": 10238 }, { "epoch": 0.26, "grad_norm": 1.4100337028503418, "learning_rate": 1.7297954986141063e-05, "loss": 0.536, "step": 10239 }, { "epoch": 0.26, "grad_norm": 1.2987579107284546, "learning_rate": 1.7297387420950546e-05, "loss": 0.5673, "step": 10240 }, { "epoch": 0.26, "grad_norm": 2.8492088317871094, "learning_rate": 1.7296819805471018e-05, "loss": 0.691, "step": 10241 }, { "epoch": 0.26, "grad_norm": 2.315200090408325, "learning_rate": 1.7296252139706397e-05, "loss": 0.5771, "step": 10242 }, { "epoch": 0.26, "grad_norm": 3.374311923980713, "learning_rate": 1.7295684423660585e-05, "loss": 0.6477, "step": 10243 }, { "epoch": 0.26, "grad_norm": 1.2501713037490845, "learning_rate": 1.72951166573375e-05, "loss": 0.4841, "step": 10244 }, { "epoch": 0.26, "grad_norm": 2.0747857093811035, "learning_rate": 1.7294548840741054e-05, "loss": 0.5411, "step": 10245 }, { "epoch": 0.26, "grad_norm": 1.4542196989059448, "learning_rate": 1.729398097387516e-05, "loss": 0.5996, "step": 10246 }, { "epoch": 0.26, "grad_norm": 3.6235311031341553, "learning_rate": 1.7293413056743735e-05, "loss": 0.7761, "step": 10247 }, { "epoch": 0.26, "grad_norm": 2.2843997478485107, "learning_rate": 1.7292845089350688e-05, "loss": 0.6566, "step": 10248 }, { "epoch": 0.26, "grad_norm": 2.038015127182007, "learning_rate": 1.7292277071699934e-05, "loss": 0.6885, "step": 10249 }, { "epoch": 0.26, "grad_norm": 2.064058303833008, "learning_rate": 1.7291709003795386e-05, "loss": 0.6382, "step": 10250 }, { "epoch": 0.26, "grad_norm": 1.864570140838623, "learning_rate": 1.7291140885640964e-05, "loss": 0.4414, "step": 10251 }, { "epoch": 0.26, "grad_norm": 1.3409219980239868, "learning_rate": 1.7290572717240578e-05, "loss": 0.5195, "step": 10252 }, { "epoch": 0.26, "grad_norm": 1.2847214937210083, "learning_rate": 1.7290004498598145e-05, "loss": 0.716, "step": 10253 }, { "epoch": 0.26, "grad_norm": 3.5803592205047607, "learning_rate": 1.728943622971758e-05, "loss": 0.5545, "step": 10254 }, { "epoch": 0.26, "grad_norm": 1.8623491525650024, "learning_rate": 1.7288867910602803e-05, "loss": 0.6232, "step": 10255 }, { "epoch": 0.26, "grad_norm": 1.5073652267456055, "learning_rate": 1.728829954125773e-05, "loss": 0.4946, "step": 10256 }, { "epoch": 0.26, "grad_norm": 1.8561064004898071, "learning_rate": 1.728773112168627e-05, "loss": 0.7102, "step": 10257 }, { "epoch": 0.26, "grad_norm": 3.6469526290893555, "learning_rate": 1.7287162651892347e-05, "loss": 0.5468, "step": 10258 }, { "epoch": 0.26, "grad_norm": 1.3460458517074585, "learning_rate": 1.728659413187988e-05, "loss": 0.4829, "step": 10259 }, { "epoch": 0.26, "grad_norm": 2.646253824234009, "learning_rate": 1.728602556165278e-05, "loss": 0.6613, "step": 10260 }, { "epoch": 0.26, "grad_norm": 1.7186713218688965, "learning_rate": 1.728545694121497e-05, "loss": 0.5049, "step": 10261 }, { "epoch": 0.26, "grad_norm": 1.4573149681091309, "learning_rate": 1.728488827057037e-05, "loss": 0.497, "step": 10262 }, { "epoch": 0.26, "grad_norm": 5.066043853759766, "learning_rate": 1.7284319549722894e-05, "loss": 0.6166, "step": 10263 }, { "epoch": 0.26, "grad_norm": 6.54914665222168, "learning_rate": 1.728375077867647e-05, "loss": 0.6465, "step": 10264 }, { "epoch": 0.26, "grad_norm": 2.123493194580078, "learning_rate": 1.7283181957435007e-05, "loss": 0.5662, "step": 10265 }, { "epoch": 0.26, "grad_norm": 1.770580768585205, "learning_rate": 1.728261308600243e-05, "loss": 0.597, "step": 10266 }, { "epoch": 0.26, "grad_norm": 3.7044858932495117, "learning_rate": 1.728204416438266e-05, "loss": 0.6692, "step": 10267 }, { "epoch": 0.26, "grad_norm": 2.610661268234253, "learning_rate": 1.7281475192579614e-05, "loss": 0.6649, "step": 10268 }, { "epoch": 0.26, "grad_norm": 5.433135509490967, "learning_rate": 1.7280906170597222e-05, "loss": 0.5266, "step": 10269 }, { "epoch": 0.26, "grad_norm": 1.4571646451950073, "learning_rate": 1.7280337098439393e-05, "loss": 0.473, "step": 10270 }, { "epoch": 0.26, "grad_norm": 1.7933160066604614, "learning_rate": 1.727976797611006e-05, "loss": 0.756, "step": 10271 }, { "epoch": 0.26, "grad_norm": 2.3413212299346924, "learning_rate": 1.7279198803613134e-05, "loss": 0.5674, "step": 10272 }, { "epoch": 0.26, "grad_norm": 1.8053297996520996, "learning_rate": 1.7278629580952543e-05, "loss": 0.5178, "step": 10273 }, { "epoch": 0.26, "grad_norm": 1.536693811416626, "learning_rate": 1.7278060308132213e-05, "loss": 0.6003, "step": 10274 }, { "epoch": 0.26, "grad_norm": 1.7307865619659424, "learning_rate": 1.7277490985156063e-05, "loss": 0.6155, "step": 10275 }, { "epoch": 0.26, "grad_norm": 1.0787756443023682, "learning_rate": 1.7276921612028018e-05, "loss": 0.5446, "step": 10276 }, { "epoch": 0.26, "grad_norm": 1.7457126379013062, "learning_rate": 1.7276352188752e-05, "loss": 0.7116, "step": 10277 }, { "epoch": 0.26, "grad_norm": 2.1356420516967773, "learning_rate": 1.7275782715331933e-05, "loss": 0.7697, "step": 10278 }, { "epoch": 0.26, "grad_norm": 1.7533235549926758, "learning_rate": 1.7275213191771745e-05, "loss": 0.5449, "step": 10279 }, { "epoch": 0.26, "grad_norm": 2.3126816749572754, "learning_rate": 1.727464361807536e-05, "loss": 0.5739, "step": 10280 }, { "epoch": 0.26, "grad_norm": 1.9536362886428833, "learning_rate": 1.7274073994246698e-05, "loss": 0.6221, "step": 10281 }, { "epoch": 0.26, "grad_norm": 5.4080586433410645, "learning_rate": 1.727350432028969e-05, "loss": 0.756, "step": 10282 }, { "epoch": 0.26, "grad_norm": 3.3186416625976562, "learning_rate": 1.7272934596208258e-05, "loss": 0.5911, "step": 10283 }, { "epoch": 0.26, "grad_norm": 2.126943349838257, "learning_rate": 1.727236482200633e-05, "loss": 0.6846, "step": 10284 }, { "epoch": 0.26, "grad_norm": 3.400374174118042, "learning_rate": 1.7271794997687833e-05, "loss": 0.4993, "step": 10285 }, { "epoch": 0.26, "grad_norm": 3.3520476818084717, "learning_rate": 1.7271225123256697e-05, "loss": 0.5857, "step": 10286 }, { "epoch": 0.26, "grad_norm": 3.8683483600616455, "learning_rate": 1.727065519871684e-05, "loss": 0.5839, "step": 10287 }, { "epoch": 0.26, "grad_norm": 2.038339853286743, "learning_rate": 1.72700852240722e-05, "loss": 0.651, "step": 10288 }, { "epoch": 0.26, "grad_norm": 4.056258201599121, "learning_rate": 1.72695151993267e-05, "loss": 0.9698, "step": 10289 }, { "epoch": 0.26, "grad_norm": 4.490356922149658, "learning_rate": 1.7268945124484268e-05, "loss": 0.6942, "step": 10290 }, { "epoch": 0.26, "grad_norm": 5.178760528564453, "learning_rate": 1.726837499954883e-05, "loss": 0.577, "step": 10291 }, { "epoch": 0.26, "grad_norm": 1.5472418069839478, "learning_rate": 1.726780482452432e-05, "loss": 0.5776, "step": 10292 }, { "epoch": 0.26, "grad_norm": 4.199975490570068, "learning_rate": 1.7267234599414665e-05, "loss": 0.5948, "step": 10293 }, { "epoch": 0.26, "grad_norm": 1.8295713663101196, "learning_rate": 1.7266664324223795e-05, "loss": 0.6887, "step": 10294 }, { "epoch": 0.26, "grad_norm": 1.382850170135498, "learning_rate": 1.7266093998955636e-05, "loss": 0.5849, "step": 10295 }, { "epoch": 0.26, "grad_norm": 1.529787540435791, "learning_rate": 1.726552362361413e-05, "loss": 0.5094, "step": 10296 }, { "epoch": 0.26, "grad_norm": 5.2612199783325195, "learning_rate": 1.7264953198203195e-05, "loss": 0.5719, "step": 10297 }, { "epoch": 0.26, "grad_norm": 1.1815778017044067, "learning_rate": 1.7264382722726767e-05, "loss": 0.6464, "step": 10298 }, { "epoch": 0.26, "grad_norm": 1.9640772342681885, "learning_rate": 1.7263812197188777e-05, "loss": 0.6281, "step": 10299 }, { "epoch": 0.26, "grad_norm": 4.982004165649414, "learning_rate": 1.726324162159316e-05, "loss": 0.6751, "step": 10300 }, { "epoch": 0.26, "grad_norm": 1.2512803077697754, "learning_rate": 1.726267099594384e-05, "loss": 0.6008, "step": 10301 }, { "epoch": 0.26, "grad_norm": 2.0482823848724365, "learning_rate": 1.726210032024476e-05, "loss": 0.58, "step": 10302 }, { "epoch": 0.26, "grad_norm": 4.208841800689697, "learning_rate": 1.7261529594499842e-05, "loss": 0.7266, "step": 10303 }, { "epoch": 0.26, "grad_norm": 2.284182071685791, "learning_rate": 1.726095881871303e-05, "loss": 0.7374, "step": 10304 }, { "epoch": 0.26, "grad_norm": 3.268721342086792, "learning_rate": 1.7260387992888246e-05, "loss": 0.7985, "step": 10305 }, { "epoch": 0.26, "grad_norm": 2.979248046875, "learning_rate": 1.725981711702943e-05, "loss": 0.5187, "step": 10306 }, { "epoch": 0.26, "grad_norm": 1.4816519021987915, "learning_rate": 1.7259246191140515e-05, "loss": 0.5353, "step": 10307 }, { "epoch": 0.26, "grad_norm": 1.8622829914093018, "learning_rate": 1.7258675215225437e-05, "loss": 0.5374, "step": 10308 }, { "epoch": 0.26, "grad_norm": 4.089668273925781, "learning_rate": 1.725810418928813e-05, "loss": 0.5447, "step": 10309 }, { "epoch": 0.26, "grad_norm": 1.6213163137435913, "learning_rate": 1.7257533113332533e-05, "loss": 0.6074, "step": 10310 }, { "epoch": 0.26, "grad_norm": 2.1973042488098145, "learning_rate": 1.725696198736257e-05, "loss": 0.5222, "step": 10311 }, { "epoch": 0.26, "grad_norm": 2.557537794113159, "learning_rate": 1.725639081138219e-05, "loss": 0.6775, "step": 10312 }, { "epoch": 0.26, "grad_norm": 2.8722565174102783, "learning_rate": 1.725581958539532e-05, "loss": 0.6034, "step": 10313 }, { "epoch": 0.26, "grad_norm": 4.223282814025879, "learning_rate": 1.7255248309405904e-05, "loss": 0.6416, "step": 10314 }, { "epoch": 0.26, "grad_norm": 2.127154588699341, "learning_rate": 1.725467698341787e-05, "loss": 0.5528, "step": 10315 }, { "epoch": 0.26, "grad_norm": 2.0246245861053467, "learning_rate": 1.7254105607435162e-05, "loss": 0.58, "step": 10316 }, { "epoch": 0.26, "grad_norm": 1.3304435014724731, "learning_rate": 1.7253534181461718e-05, "loss": 0.4392, "step": 10317 }, { "epoch": 0.26, "grad_norm": 1.5445669889450073, "learning_rate": 1.7252962705501472e-05, "loss": 0.6388, "step": 10318 }, { "epoch": 0.26, "grad_norm": 2.7040483951568604, "learning_rate": 1.725239117955836e-05, "loss": 0.4984, "step": 10319 }, { "epoch": 0.26, "grad_norm": 1.9701664447784424, "learning_rate": 1.7251819603636327e-05, "loss": 0.6031, "step": 10320 }, { "epoch": 0.26, "grad_norm": 8.071246147155762, "learning_rate": 1.7251247977739313e-05, "loss": 0.6512, "step": 10321 }, { "epoch": 0.26, "grad_norm": 1.7383474111557007, "learning_rate": 1.7250676301871248e-05, "loss": 0.5928, "step": 10322 }, { "epoch": 0.26, "grad_norm": 2.0548207759857178, "learning_rate": 1.7250104576036083e-05, "loss": 0.5306, "step": 10323 }, { "epoch": 0.26, "grad_norm": 3.2122321128845215, "learning_rate": 1.7249532800237747e-05, "loss": 0.8073, "step": 10324 }, { "epoch": 0.26, "grad_norm": 1.4795387983322144, "learning_rate": 1.724896097448019e-05, "loss": 0.537, "step": 10325 }, { "epoch": 0.26, "grad_norm": 1.2287613153457642, "learning_rate": 1.7248389098767344e-05, "loss": 0.6723, "step": 10326 }, { "epoch": 0.26, "grad_norm": 4.066767692565918, "learning_rate": 1.7247817173103154e-05, "loss": 0.7192, "step": 10327 }, { "epoch": 0.26, "grad_norm": 4.639942169189453, "learning_rate": 1.7247245197491566e-05, "loss": 0.8196, "step": 10328 }, { "epoch": 0.26, "grad_norm": 3.4065380096435547, "learning_rate": 1.7246673171936516e-05, "loss": 0.6041, "step": 10329 }, { "epoch": 0.26, "grad_norm": 1.5217689275741577, "learning_rate": 1.7246101096441945e-05, "loss": 0.5324, "step": 10330 }, { "epoch": 0.26, "grad_norm": 1.6625148057937622, "learning_rate": 1.7245528971011797e-05, "loss": 0.7469, "step": 10331 }, { "epoch": 0.26, "grad_norm": 3.151412010192871, "learning_rate": 1.724495679565002e-05, "loss": 0.5765, "step": 10332 }, { "epoch": 0.26, "grad_norm": 1.6900821924209595, "learning_rate": 1.7244384570360548e-05, "loss": 0.5472, "step": 10333 }, { "epoch": 0.26, "grad_norm": 2.274270534515381, "learning_rate": 1.724381229514733e-05, "loss": 0.6507, "step": 10334 }, { "epoch": 0.26, "grad_norm": 1.18596351146698, "learning_rate": 1.724323997001431e-05, "loss": 0.6779, "step": 10335 }, { "epoch": 0.26, "grad_norm": 1.8900316953659058, "learning_rate": 1.724266759496543e-05, "loss": 0.7249, "step": 10336 }, { "epoch": 0.26, "grad_norm": 3.535909414291382, "learning_rate": 1.7242095170004637e-05, "loss": 0.6291, "step": 10337 }, { "epoch": 0.26, "grad_norm": 2.1621835231781006, "learning_rate": 1.724152269513587e-05, "loss": 0.6748, "step": 10338 }, { "epoch": 0.26, "grad_norm": 2.023466110229492, "learning_rate": 1.7240950170363082e-05, "loss": 0.5935, "step": 10339 }, { "epoch": 0.27, "grad_norm": 1.204762578010559, "learning_rate": 1.7240377595690214e-05, "loss": 0.5988, "step": 10340 }, { "epoch": 0.27, "grad_norm": 1.3727853298187256, "learning_rate": 1.723980497112121e-05, "loss": 0.5295, "step": 10341 }, { "epoch": 0.27, "grad_norm": 1.4725282192230225, "learning_rate": 1.723923229666002e-05, "loss": 0.5746, "step": 10342 }, { "epoch": 0.27, "grad_norm": 3.3628368377685547, "learning_rate": 1.723865957231059e-05, "loss": 0.5283, "step": 10343 }, { "epoch": 0.27, "grad_norm": 2.589390277862549, "learning_rate": 1.7238086798076864e-05, "loss": 0.679, "step": 10344 }, { "epoch": 0.27, "grad_norm": 5.776382923126221, "learning_rate": 1.7237513973962794e-05, "loss": 0.5986, "step": 10345 }, { "epoch": 0.27, "grad_norm": 1.9732416868209839, "learning_rate": 1.7236941099972324e-05, "loss": 0.7413, "step": 10346 }, { "epoch": 0.27, "grad_norm": 2.3407304286956787, "learning_rate": 1.72363681761094e-05, "loss": 0.7751, "step": 10347 }, { "epoch": 0.27, "grad_norm": 1.3974379301071167, "learning_rate": 1.7235795202377975e-05, "loss": 0.3639, "step": 10348 }, { "epoch": 0.27, "grad_norm": 2.0458381175994873, "learning_rate": 1.7235222178781995e-05, "loss": 0.7462, "step": 10349 }, { "epoch": 0.27, "grad_norm": 1.2415361404418945, "learning_rate": 1.723464910532541e-05, "loss": 0.4887, "step": 10350 }, { "epoch": 0.27, "grad_norm": 2.971076488494873, "learning_rate": 1.7234075982012167e-05, "loss": 0.7133, "step": 10351 }, { "epoch": 0.27, "grad_norm": 1.388187050819397, "learning_rate": 1.723350280884622e-05, "loss": 0.5464, "step": 10352 }, { "epoch": 0.27, "grad_norm": 3.370326519012451, "learning_rate": 1.7232929585831515e-05, "loss": 0.5398, "step": 10353 }, { "epoch": 0.27, "grad_norm": 2.431964635848999, "learning_rate": 1.7232356312972004e-05, "loss": 0.6232, "step": 10354 }, { "epoch": 0.27, "grad_norm": 1.350730538368225, "learning_rate": 1.7231782990271634e-05, "loss": 0.694, "step": 10355 }, { "epoch": 0.27, "grad_norm": 2.150667190551758, "learning_rate": 1.7231209617734362e-05, "loss": 0.5328, "step": 10356 }, { "epoch": 0.27, "grad_norm": 12.000198364257812, "learning_rate": 1.7230636195364137e-05, "loss": 0.4743, "step": 10357 }, { "epoch": 0.27, "grad_norm": 2.3375442028045654, "learning_rate": 1.7230062723164908e-05, "loss": 0.5981, "step": 10358 }, { "epoch": 0.27, "grad_norm": 1.1999237537384033, "learning_rate": 1.722948920114063e-05, "loss": 0.5685, "step": 10359 }, { "epoch": 0.27, "grad_norm": 1.2515963315963745, "learning_rate": 1.7228915629295256e-05, "loss": 0.442, "step": 10360 }, { "epoch": 0.27, "grad_norm": 4.013225078582764, "learning_rate": 1.7228342007632733e-05, "loss": 0.5733, "step": 10361 }, { "epoch": 0.27, "grad_norm": 1.4263441562652588, "learning_rate": 1.722776833615702e-05, "loss": 0.6146, "step": 10362 }, { "epoch": 0.27, "grad_norm": 1.8915776014328003, "learning_rate": 1.722719461487207e-05, "loss": 0.6738, "step": 10363 }, { "epoch": 0.27, "grad_norm": 4.701291084289551, "learning_rate": 1.7226620843781835e-05, "loss": 0.7164, "step": 10364 }, { "epoch": 0.27, "grad_norm": 2.7951536178588867, "learning_rate": 1.722604702289027e-05, "loss": 0.4885, "step": 10365 }, { "epoch": 0.27, "grad_norm": 1.941246747970581, "learning_rate": 1.7225473152201327e-05, "loss": 0.6392, "step": 10366 }, { "epoch": 0.27, "grad_norm": 4.447939395904541, "learning_rate": 1.7224899231718964e-05, "loss": 0.7694, "step": 10367 }, { "epoch": 0.27, "grad_norm": 2.475933790206909, "learning_rate": 1.7224325261447133e-05, "loss": 0.6716, "step": 10368 }, { "epoch": 0.27, "grad_norm": 5.9991021156311035, "learning_rate": 1.7223751241389792e-05, "loss": 0.6613, "step": 10369 }, { "epoch": 0.27, "grad_norm": 2.6852006912231445, "learning_rate": 1.7223177171550892e-05, "loss": 0.705, "step": 10370 }, { "epoch": 0.27, "grad_norm": 2.62589430809021, "learning_rate": 1.72226030519344e-05, "loss": 0.6065, "step": 10371 }, { "epoch": 0.27, "grad_norm": 3.3719584941864014, "learning_rate": 1.722202888254426e-05, "loss": 0.7615, "step": 10372 }, { "epoch": 0.27, "grad_norm": 1.518291711807251, "learning_rate": 1.722145466338444e-05, "loss": 0.4805, "step": 10373 }, { "epoch": 0.27, "grad_norm": 0.9727563261985779, "learning_rate": 1.7220880394458885e-05, "loss": 0.4523, "step": 10374 }, { "epoch": 0.27, "grad_norm": 4.337921619415283, "learning_rate": 1.722030607577156e-05, "loss": 0.6764, "step": 10375 }, { "epoch": 0.27, "grad_norm": 3.3893914222717285, "learning_rate": 1.7219731707326425e-05, "loss": 0.5155, "step": 10376 }, { "epoch": 0.27, "grad_norm": 1.5982078313827515, "learning_rate": 1.7219157289127434e-05, "loss": 0.528, "step": 10377 }, { "epoch": 0.27, "grad_norm": 2.7870190143585205, "learning_rate": 1.7218582821178543e-05, "loss": 0.6987, "step": 10378 }, { "epoch": 0.27, "grad_norm": 3.229400634765625, "learning_rate": 1.721800830348372e-05, "loss": 0.6061, "step": 10379 }, { "epoch": 0.27, "grad_norm": 5.2472429275512695, "learning_rate": 1.7217433736046914e-05, "loss": 0.6254, "step": 10380 }, { "epoch": 0.27, "grad_norm": 1.0821138620376587, "learning_rate": 1.7216859118872095e-05, "loss": 0.5317, "step": 10381 }, { "epoch": 0.27, "grad_norm": 1.7271555662155151, "learning_rate": 1.7216284451963207e-05, "loss": 0.5759, "step": 10382 }, { "epoch": 0.27, "grad_norm": 1.3456761837005615, "learning_rate": 1.721570973532423e-05, "loss": 0.5844, "step": 10383 }, { "epoch": 0.27, "grad_norm": 1.5696345567703247, "learning_rate": 1.7215134968959112e-05, "loss": 0.5622, "step": 10384 }, { "epoch": 0.27, "grad_norm": 1.7491257190704346, "learning_rate": 1.7214560152871812e-05, "loss": 0.687, "step": 10385 }, { "epoch": 0.27, "grad_norm": 1.9229645729064941, "learning_rate": 1.72139852870663e-05, "loss": 0.5981, "step": 10386 }, { "epoch": 0.27, "grad_norm": 1.4585776329040527, "learning_rate": 1.7213410371546534e-05, "loss": 0.6612, "step": 10387 }, { "epoch": 0.27, "grad_norm": 4.2399773597717285, "learning_rate": 1.7212835406316478e-05, "loss": 0.7042, "step": 10388 }, { "epoch": 0.27, "grad_norm": 1.5399255752563477, "learning_rate": 1.7212260391380086e-05, "loss": 0.5507, "step": 10389 }, { "epoch": 0.27, "grad_norm": 2.457958936691284, "learning_rate": 1.721168532674133e-05, "loss": 0.6354, "step": 10390 }, { "epoch": 0.27, "grad_norm": 1.0319502353668213, "learning_rate": 1.7211110212404173e-05, "loss": 0.6653, "step": 10391 }, { "epoch": 0.27, "grad_norm": 1.589028000831604, "learning_rate": 1.721053504837257e-05, "loss": 0.5765, "step": 10392 }, { "epoch": 0.27, "grad_norm": 2.821570873260498, "learning_rate": 1.720995983465049e-05, "loss": 0.6163, "step": 10393 }, { "epoch": 0.27, "grad_norm": 6.739777088165283, "learning_rate": 1.7209384571241897e-05, "loss": 0.6071, "step": 10394 }, { "epoch": 0.27, "grad_norm": 2.342973232269287, "learning_rate": 1.7208809258150755e-05, "loss": 0.5782, "step": 10395 }, { "epoch": 0.27, "grad_norm": 3.839987277984619, "learning_rate": 1.7208233895381028e-05, "loss": 0.6626, "step": 10396 }, { "epoch": 0.27, "grad_norm": 3.087172269821167, "learning_rate": 1.720765848293668e-05, "loss": 0.5191, "step": 10397 }, { "epoch": 0.27, "grad_norm": 2.592932939529419, "learning_rate": 1.7207083020821683e-05, "loss": 0.5803, "step": 10398 }, { "epoch": 0.27, "grad_norm": 1.378783106803894, "learning_rate": 1.7206507509039996e-05, "loss": 0.5431, "step": 10399 }, { "epoch": 0.27, "grad_norm": 5.8270392417907715, "learning_rate": 1.7205931947595584e-05, "loss": 0.802, "step": 10400 }, { "epoch": 0.27, "grad_norm": 2.7027957439422607, "learning_rate": 1.720535633649242e-05, "loss": 0.7358, "step": 10401 }, { "epoch": 0.27, "grad_norm": 4.527614593505859, "learning_rate": 1.7204780675734466e-05, "loss": 0.58, "step": 10402 }, { "epoch": 0.27, "grad_norm": 1.6848350763320923, "learning_rate": 1.7204204965325687e-05, "loss": 0.6203, "step": 10403 }, { "epoch": 0.27, "grad_norm": 4.219266891479492, "learning_rate": 1.7203629205270054e-05, "loss": 0.725, "step": 10404 }, { "epoch": 0.27, "grad_norm": 1.4948339462280273, "learning_rate": 1.7203053395571538e-05, "loss": 0.6991, "step": 10405 }, { "epoch": 0.27, "grad_norm": 2.467477798461914, "learning_rate": 1.7202477536234103e-05, "loss": 0.6614, "step": 10406 }, { "epoch": 0.27, "grad_norm": 3.4709219932556152, "learning_rate": 1.7201901627261716e-05, "loss": 0.5297, "step": 10407 }, { "epoch": 0.27, "grad_norm": 2.3296332359313965, "learning_rate": 1.7201325668658348e-05, "loss": 0.7452, "step": 10408 }, { "epoch": 0.27, "grad_norm": 1.3200055360794067, "learning_rate": 1.7200749660427967e-05, "loss": 0.5365, "step": 10409 }, { "epoch": 0.27, "grad_norm": 1.5064334869384766, "learning_rate": 1.7200173602574544e-05, "loss": 0.5303, "step": 10410 }, { "epoch": 0.27, "grad_norm": 7.903009414672852, "learning_rate": 1.7199597495102046e-05, "loss": 0.6305, "step": 10411 }, { "epoch": 0.27, "grad_norm": 1.9094072580337524, "learning_rate": 1.719902133801445e-05, "loss": 0.6096, "step": 10412 }, { "epoch": 0.27, "grad_norm": 6.153435707092285, "learning_rate": 1.719844513131572e-05, "loss": 0.8073, "step": 10413 }, { "epoch": 0.27, "grad_norm": 2.489394187927246, "learning_rate": 1.7197868875009828e-05, "loss": 0.681, "step": 10414 }, { "epoch": 0.27, "grad_norm": 4.105792045593262, "learning_rate": 1.7197292569100743e-05, "loss": 0.5526, "step": 10415 }, { "epoch": 0.27, "grad_norm": 1.6898813247680664, "learning_rate": 1.7196716213592444e-05, "loss": 0.5982, "step": 10416 }, { "epoch": 0.27, "grad_norm": 2.706157684326172, "learning_rate": 1.7196139808488896e-05, "loss": 0.6012, "step": 10417 }, { "epoch": 0.27, "grad_norm": 5.578977108001709, "learning_rate": 1.7195563353794075e-05, "loss": 0.7498, "step": 10418 }, { "epoch": 0.27, "grad_norm": 3.4835824966430664, "learning_rate": 1.719498684951195e-05, "loss": 0.5515, "step": 10419 }, { "epoch": 0.27, "grad_norm": 1.526913046836853, "learning_rate": 1.7194410295646496e-05, "loss": 0.5348, "step": 10420 }, { "epoch": 0.27, "grad_norm": 2.727038621902466, "learning_rate": 1.719383369220169e-05, "loss": 0.4824, "step": 10421 }, { "epoch": 0.27, "grad_norm": 3.1308882236480713, "learning_rate": 1.7193257039181497e-05, "loss": 0.6784, "step": 10422 }, { "epoch": 0.27, "grad_norm": 1.6273607015609741, "learning_rate": 1.71926803365899e-05, "loss": 0.5375, "step": 10423 }, { "epoch": 0.27, "grad_norm": 2.029088020324707, "learning_rate": 1.7192103584430866e-05, "loss": 0.6601, "step": 10424 }, { "epoch": 0.27, "grad_norm": 1.4108965396881104, "learning_rate": 1.719152678270837e-05, "loss": 0.6108, "step": 10425 }, { "epoch": 0.27, "grad_norm": 1.7622039318084717, "learning_rate": 1.7190949931426396e-05, "loss": 0.6135, "step": 10426 }, { "epoch": 0.27, "grad_norm": 4.674098014831543, "learning_rate": 1.719037303058891e-05, "loss": 0.5507, "step": 10427 }, { "epoch": 0.27, "grad_norm": 2.4893558025360107, "learning_rate": 1.718979608019989e-05, "loss": 0.7473, "step": 10428 }, { "epoch": 0.27, "grad_norm": 1.4901381731033325, "learning_rate": 1.7189219080263314e-05, "loss": 0.59, "step": 10429 }, { "epoch": 0.27, "grad_norm": 4.052511692047119, "learning_rate": 1.7188642030783154e-05, "loss": 0.6629, "step": 10430 }, { "epoch": 0.27, "grad_norm": 2.5459868907928467, "learning_rate": 1.718806493176339e-05, "loss": 0.6833, "step": 10431 }, { "epoch": 0.27, "grad_norm": 1.1876094341278076, "learning_rate": 1.7187487783208e-05, "loss": 0.693, "step": 10432 }, { "epoch": 0.27, "grad_norm": 2.2595038414001465, "learning_rate": 1.718691058512096e-05, "loss": 0.5083, "step": 10433 }, { "epoch": 0.27, "grad_norm": 2.199469804763794, "learning_rate": 1.7186333337506246e-05, "loss": 0.3855, "step": 10434 }, { "epoch": 0.27, "grad_norm": 1.3651084899902344, "learning_rate": 1.718575604036784e-05, "loss": 0.6832, "step": 10435 }, { "epoch": 0.27, "grad_norm": 4.800659656524658, "learning_rate": 1.7185178693709717e-05, "loss": 0.574, "step": 10436 }, { "epoch": 0.27, "grad_norm": 1.7174155712127686, "learning_rate": 1.7184601297535858e-05, "loss": 0.5806, "step": 10437 }, { "epoch": 0.27, "grad_norm": 1.2038943767547607, "learning_rate": 1.7184023851850237e-05, "loss": 0.5234, "step": 10438 }, { "epoch": 0.27, "grad_norm": 1.9361424446105957, "learning_rate": 1.7183446356656837e-05, "loss": 0.6407, "step": 10439 }, { "epoch": 0.27, "grad_norm": 5.634932518005371, "learning_rate": 1.7182868811959637e-05, "loss": 0.7463, "step": 10440 }, { "epoch": 0.27, "grad_norm": 1.7635661363601685, "learning_rate": 1.7182291217762625e-05, "loss": 0.6647, "step": 10441 }, { "epoch": 0.27, "grad_norm": 2.744920492172241, "learning_rate": 1.7181713574069767e-05, "loss": 0.6105, "step": 10442 }, { "epoch": 0.27, "grad_norm": 7.797251224517822, "learning_rate": 1.7181135880885052e-05, "loss": 0.5213, "step": 10443 }, { "epoch": 0.27, "grad_norm": 1.6492900848388672, "learning_rate": 1.7180558138212465e-05, "loss": 0.6555, "step": 10444 }, { "epoch": 0.27, "grad_norm": 1.8926087617874146, "learning_rate": 1.7179980346055978e-05, "loss": 0.7003, "step": 10445 }, { "epoch": 0.27, "grad_norm": 2.2690114974975586, "learning_rate": 1.717940250441958e-05, "loss": 0.738, "step": 10446 }, { "epoch": 0.27, "grad_norm": 1.1286585330963135, "learning_rate": 1.7178824613307248e-05, "loss": 0.542, "step": 10447 }, { "epoch": 0.27, "grad_norm": 1.2356127500534058, "learning_rate": 1.717824667272297e-05, "loss": 0.6043, "step": 10448 }, { "epoch": 0.27, "grad_norm": 0.9155341386795044, "learning_rate": 1.7177668682670723e-05, "loss": 0.4479, "step": 10449 }, { "epoch": 0.27, "grad_norm": 2.523710250854492, "learning_rate": 1.7177090643154493e-05, "loss": 0.6788, "step": 10450 }, { "epoch": 0.27, "grad_norm": 2.10634446144104, "learning_rate": 1.7176512554178265e-05, "loss": 0.6551, "step": 10451 }, { "epoch": 0.27, "grad_norm": 2.8996899127960205, "learning_rate": 1.717593441574602e-05, "loss": 0.6462, "step": 10452 }, { "epoch": 0.27, "grad_norm": 2.2792093753814697, "learning_rate": 1.7175356227861744e-05, "loss": 0.6953, "step": 10453 }, { "epoch": 0.27, "grad_norm": 3.096442461013794, "learning_rate": 1.717477799052942e-05, "loss": 0.5568, "step": 10454 }, { "epoch": 0.27, "grad_norm": 2.740790843963623, "learning_rate": 1.7174199703753034e-05, "loss": 0.5751, "step": 10455 }, { "epoch": 0.27, "grad_norm": 1.3758773803710938, "learning_rate": 1.7173621367536573e-05, "loss": 0.4492, "step": 10456 }, { "epoch": 0.27, "grad_norm": 3.388047218322754, "learning_rate": 1.7173042981884016e-05, "loss": 0.8551, "step": 10457 }, { "epoch": 0.27, "grad_norm": 5.6148858070373535, "learning_rate": 1.7172464546799358e-05, "loss": 0.5648, "step": 10458 }, { "epoch": 0.27, "grad_norm": 3.3248727321624756, "learning_rate": 1.7171886062286577e-05, "loss": 0.5466, "step": 10459 }, { "epoch": 0.27, "grad_norm": 3.4242069721221924, "learning_rate": 1.7171307528349664e-05, "loss": 0.6906, "step": 10460 }, { "epoch": 0.27, "grad_norm": 3.6039295196533203, "learning_rate": 1.7170728944992607e-05, "loss": 0.7321, "step": 10461 }, { "epoch": 0.27, "grad_norm": 4.1047515869140625, "learning_rate": 1.717015031221939e-05, "loss": 0.6589, "step": 10462 }, { "epoch": 0.27, "grad_norm": 1.8873437643051147, "learning_rate": 1.7169571630034002e-05, "loss": 0.6381, "step": 10463 }, { "epoch": 0.27, "grad_norm": 1.6754176616668701, "learning_rate": 1.716899289844043e-05, "loss": 0.4534, "step": 10464 }, { "epoch": 0.27, "grad_norm": 3.505943536758423, "learning_rate": 1.7168414117442665e-05, "loss": 0.7947, "step": 10465 }, { "epoch": 0.27, "grad_norm": 4.115198612213135, "learning_rate": 1.716783528704469e-05, "loss": 0.8531, "step": 10466 }, { "epoch": 0.27, "grad_norm": 3.0588767528533936, "learning_rate": 1.7167256407250497e-05, "loss": 0.7722, "step": 10467 }, { "epoch": 0.27, "grad_norm": 1.6988847255706787, "learning_rate": 1.716667747806408e-05, "loss": 0.6135, "step": 10468 }, { "epoch": 0.27, "grad_norm": 1.8093992471694946, "learning_rate": 1.7166098499489423e-05, "loss": 0.5863, "step": 10469 }, { "epoch": 0.27, "grad_norm": 1.4422802925109863, "learning_rate": 1.7165519471530515e-05, "loss": 0.5975, "step": 10470 }, { "epoch": 0.27, "grad_norm": 1.7004985809326172, "learning_rate": 1.716494039419135e-05, "loss": 0.6294, "step": 10471 }, { "epoch": 0.27, "grad_norm": 2.7716152667999268, "learning_rate": 1.716436126747592e-05, "loss": 0.5881, "step": 10472 }, { "epoch": 0.27, "grad_norm": 5.698907375335693, "learning_rate": 1.716378209138821e-05, "loss": 0.6225, "step": 10473 }, { "epoch": 0.27, "grad_norm": 1.3406811952590942, "learning_rate": 1.7163202865932215e-05, "loss": 0.5396, "step": 10474 }, { "epoch": 0.27, "grad_norm": 1.3931673765182495, "learning_rate": 1.7162623591111927e-05, "loss": 0.6234, "step": 10475 }, { "epoch": 0.27, "grad_norm": 2.895897626876831, "learning_rate": 1.7162044266931337e-05, "loss": 0.765, "step": 10476 }, { "epoch": 0.27, "grad_norm": 1.3248695135116577, "learning_rate": 1.7161464893394438e-05, "loss": 0.583, "step": 10477 }, { "epoch": 0.27, "grad_norm": 1.5677300691604614, "learning_rate": 1.716088547050522e-05, "loss": 0.588, "step": 10478 }, { "epoch": 0.27, "grad_norm": 6.373230457305908, "learning_rate": 1.716030599826768e-05, "loss": 0.6348, "step": 10479 }, { "epoch": 0.27, "grad_norm": 3.0452024936676025, "learning_rate": 1.7159726476685808e-05, "loss": 0.6596, "step": 10480 }, { "epoch": 0.27, "grad_norm": 2.9644088745117188, "learning_rate": 1.7159146905763605e-05, "loss": 0.6571, "step": 10481 }, { "epoch": 0.27, "grad_norm": 10.19273567199707, "learning_rate": 1.7158567285505054e-05, "loss": 0.7052, "step": 10482 }, { "epoch": 0.27, "grad_norm": 1.732442021369934, "learning_rate": 1.7157987615914155e-05, "loss": 0.6488, "step": 10483 }, { "epoch": 0.27, "grad_norm": 1.5682631731033325, "learning_rate": 1.7157407896994903e-05, "loss": 0.7694, "step": 10484 }, { "epoch": 0.27, "grad_norm": 2.754690408706665, "learning_rate": 1.7156828128751293e-05, "loss": 0.6968, "step": 10485 }, { "epoch": 0.27, "grad_norm": 1.9027048349380493, "learning_rate": 1.715624831118732e-05, "loss": 0.5952, "step": 10486 }, { "epoch": 0.27, "grad_norm": 2.451554775238037, "learning_rate": 1.715566844430698e-05, "loss": 0.5173, "step": 10487 }, { "epoch": 0.27, "grad_norm": 2.4628028869628906, "learning_rate": 1.7155088528114267e-05, "loss": 0.5979, "step": 10488 }, { "epoch": 0.27, "grad_norm": 1.7834326028823853, "learning_rate": 1.715450856261318e-05, "loss": 0.7126, "step": 10489 }, { "epoch": 0.27, "grad_norm": 1.8994197845458984, "learning_rate": 1.7153928547807715e-05, "loss": 0.6286, "step": 10490 }, { "epoch": 0.27, "grad_norm": 1.7406526803970337, "learning_rate": 1.715334848370187e-05, "loss": 0.6765, "step": 10491 }, { "epoch": 0.27, "grad_norm": 1.7539279460906982, "learning_rate": 1.715276837029964e-05, "loss": 0.4637, "step": 10492 }, { "epoch": 0.27, "grad_norm": 3.2005839347839355, "learning_rate": 1.7152188207605026e-05, "loss": 0.7406, "step": 10493 }, { "epoch": 0.27, "grad_norm": 1.8368433713912964, "learning_rate": 1.715160799562202e-05, "loss": 0.6901, "step": 10494 }, { "epoch": 0.27, "grad_norm": 3.1635358333587646, "learning_rate": 1.7151027734354627e-05, "loss": 0.5333, "step": 10495 }, { "epoch": 0.27, "grad_norm": 6.514796257019043, "learning_rate": 1.7150447423806846e-05, "loss": 0.7693, "step": 10496 }, { "epoch": 0.27, "grad_norm": 4.5121355056762695, "learning_rate": 1.7149867063982673e-05, "loss": 0.5424, "step": 10497 }, { "epoch": 0.27, "grad_norm": 4.276540756225586, "learning_rate": 1.7149286654886105e-05, "loss": 0.7613, "step": 10498 }, { "epoch": 0.27, "grad_norm": 1.0976669788360596, "learning_rate": 1.714870619652115e-05, "loss": 0.5641, "step": 10499 }, { "epoch": 0.27, "grad_norm": 2.8727266788482666, "learning_rate": 1.71481256888918e-05, "loss": 0.7178, "step": 10500 }, { "epoch": 0.27, "grad_norm": 2.1444778442382812, "learning_rate": 1.7147545132002056e-05, "loss": 0.6857, "step": 10501 }, { "epoch": 0.27, "grad_norm": 1.1664303541183472, "learning_rate": 1.7146964525855928e-05, "loss": 0.6281, "step": 10502 }, { "epoch": 0.27, "grad_norm": 1.826616883277893, "learning_rate": 1.7146383870457402e-05, "loss": 0.6546, "step": 10503 }, { "epoch": 0.27, "grad_norm": 1.8672350645065308, "learning_rate": 1.7145803165810497e-05, "loss": 0.6286, "step": 10504 }, { "epoch": 0.27, "grad_norm": 1.5941745042800903, "learning_rate": 1.7145222411919203e-05, "loss": 0.6326, "step": 10505 }, { "epoch": 0.27, "grad_norm": 4.649797439575195, "learning_rate": 1.7144641608787523e-05, "loss": 0.7859, "step": 10506 }, { "epoch": 0.27, "grad_norm": 4.265768527984619, "learning_rate": 1.7144060756419465e-05, "loss": 0.6508, "step": 10507 }, { "epoch": 0.27, "grad_norm": 1.6429574489593506, "learning_rate": 1.7143479854819025e-05, "loss": 0.6567, "step": 10508 }, { "epoch": 0.27, "grad_norm": 3.283254384994507, "learning_rate": 1.7142898903990213e-05, "loss": 0.7263, "step": 10509 }, { "epoch": 0.27, "grad_norm": 1.278393030166626, "learning_rate": 1.7142317903937027e-05, "loss": 0.5687, "step": 10510 }, { "epoch": 0.27, "grad_norm": 2.0107123851776123, "learning_rate": 1.7141736854663475e-05, "loss": 0.5824, "step": 10511 }, { "epoch": 0.27, "grad_norm": 1.0870623588562012, "learning_rate": 1.7141155756173557e-05, "loss": 0.6432, "step": 10512 }, { "epoch": 0.27, "grad_norm": 4.882471084594727, "learning_rate": 1.7140574608471283e-05, "loss": 0.4626, "step": 10513 }, { "epoch": 0.27, "grad_norm": 5.355022430419922, "learning_rate": 1.7139993411560654e-05, "loss": 0.6811, "step": 10514 }, { "epoch": 0.27, "grad_norm": 3.367053270339966, "learning_rate": 1.7139412165445674e-05, "loss": 0.531, "step": 10515 }, { "epoch": 0.27, "grad_norm": 3.2618651390075684, "learning_rate": 1.7138830870130355e-05, "loss": 0.5635, "step": 10516 }, { "epoch": 0.27, "grad_norm": 1.2263333797454834, "learning_rate": 1.7138249525618695e-05, "loss": 0.4815, "step": 10517 }, { "epoch": 0.27, "grad_norm": 1.9347307682037354, "learning_rate": 1.7137668131914705e-05, "loss": 0.6588, "step": 10518 }, { "epoch": 0.27, "grad_norm": 2.6831259727478027, "learning_rate": 1.713708668902239e-05, "loss": 0.6714, "step": 10519 }, { "epoch": 0.27, "grad_norm": 2.6219544410705566, "learning_rate": 1.713650519694576e-05, "loss": 0.5737, "step": 10520 }, { "epoch": 0.27, "grad_norm": 1.973292589187622, "learning_rate": 1.7135923655688817e-05, "loss": 0.7297, "step": 10521 }, { "epoch": 0.27, "grad_norm": 1.6172285079956055, "learning_rate": 1.7135342065255574e-05, "loss": 0.5302, "step": 10522 }, { "epoch": 0.27, "grad_norm": 5.131592750549316, "learning_rate": 1.7134760425650036e-05, "loss": 0.7116, "step": 10523 }, { "epoch": 0.27, "grad_norm": 1.9748961925506592, "learning_rate": 1.7134178736876207e-05, "loss": 0.6575, "step": 10524 }, { "epoch": 0.27, "grad_norm": 4.131565570831299, "learning_rate": 1.7133596998938105e-05, "loss": 0.5041, "step": 10525 }, { "epoch": 0.27, "grad_norm": 1.2216655015945435, "learning_rate": 1.7133015211839733e-05, "loss": 0.6802, "step": 10526 }, { "epoch": 0.27, "grad_norm": 3.268169403076172, "learning_rate": 1.71324333755851e-05, "loss": 0.6247, "step": 10527 }, { "epoch": 0.27, "grad_norm": 1.6808568239212036, "learning_rate": 1.713185149017822e-05, "loss": 0.4423, "step": 10528 }, { "epoch": 0.27, "grad_norm": 1.3346638679504395, "learning_rate": 1.71312695556231e-05, "loss": 0.5206, "step": 10529 }, { "epoch": 0.27, "grad_norm": 1.4683501720428467, "learning_rate": 1.713068757192375e-05, "loss": 0.4731, "step": 10530 }, { "epoch": 0.27, "grad_norm": 1.8211398124694824, "learning_rate": 1.713010553908418e-05, "loss": 0.6223, "step": 10531 }, { "epoch": 0.27, "grad_norm": 0.8719409108161926, "learning_rate": 1.7129523457108403e-05, "loss": 0.437, "step": 10532 }, { "epoch": 0.27, "grad_norm": 1.5432178974151611, "learning_rate": 1.712894132600043e-05, "loss": 0.5461, "step": 10533 }, { "epoch": 0.27, "grad_norm": 4.925620079040527, "learning_rate": 1.712835914576427e-05, "loss": 0.6787, "step": 10534 }, { "epoch": 0.27, "grad_norm": 3.2563304901123047, "learning_rate": 1.712777691640394e-05, "loss": 0.5834, "step": 10535 }, { "epoch": 0.27, "grad_norm": 2.3917999267578125, "learning_rate": 1.712719463792345e-05, "loss": 0.405, "step": 10536 }, { "epoch": 0.27, "grad_norm": 1.410229206085205, "learning_rate": 1.712661231032681e-05, "loss": 0.6168, "step": 10537 }, { "epoch": 0.27, "grad_norm": 1.93365478515625, "learning_rate": 1.7126029933618035e-05, "loss": 0.6539, "step": 10538 }, { "epoch": 0.27, "grad_norm": 3.4441115856170654, "learning_rate": 1.712544750780114e-05, "loss": 0.8168, "step": 10539 }, { "epoch": 0.27, "grad_norm": 2.054760217666626, "learning_rate": 1.712486503288014e-05, "loss": 0.5242, "step": 10540 }, { "epoch": 0.27, "grad_norm": 1.9230034351348877, "learning_rate": 1.7124282508859043e-05, "loss": 0.8389, "step": 10541 }, { "epoch": 0.27, "grad_norm": 2.2377753257751465, "learning_rate": 1.7123699935741868e-05, "loss": 0.568, "step": 10542 }, { "epoch": 0.27, "grad_norm": 1.2256460189819336, "learning_rate": 1.7123117313532625e-05, "loss": 0.4056, "step": 10543 }, { "epoch": 0.27, "grad_norm": 3.9668898582458496, "learning_rate": 1.7122534642235336e-05, "loss": 0.767, "step": 10544 }, { "epoch": 0.27, "grad_norm": 3.419273853302002, "learning_rate": 1.7121951921854013e-05, "loss": 0.6667, "step": 10545 }, { "epoch": 0.27, "grad_norm": 3.282280206680298, "learning_rate": 1.7121369152392673e-05, "loss": 0.5653, "step": 10546 }, { "epoch": 0.27, "grad_norm": 2.1181397438049316, "learning_rate": 1.712078633385533e-05, "loss": 0.5398, "step": 10547 }, { "epoch": 0.27, "grad_norm": 1.3388288021087646, "learning_rate": 1.7120203466246e-05, "loss": 0.5487, "step": 10548 }, { "epoch": 0.27, "grad_norm": 6.227156639099121, "learning_rate": 1.7119620549568704e-05, "loss": 0.6448, "step": 10549 }, { "epoch": 0.27, "grad_norm": 2.6501119136810303, "learning_rate": 1.7119037583827455e-05, "loss": 0.6076, "step": 10550 }, { "epoch": 0.27, "grad_norm": 1.3342951536178589, "learning_rate": 1.7118454569026268e-05, "loss": 0.5433, "step": 10551 }, { "epoch": 0.27, "grad_norm": 2.7843918800354004, "learning_rate": 1.711787150516917e-05, "loss": 0.5785, "step": 10552 }, { "epoch": 0.27, "grad_norm": 3.2406108379364014, "learning_rate": 1.711728839226017e-05, "loss": 0.6738, "step": 10553 }, { "epoch": 0.27, "grad_norm": 1.219913363456726, "learning_rate": 1.7116705230303287e-05, "loss": 0.5554, "step": 10554 }, { "epoch": 0.27, "grad_norm": 2.671708345413208, "learning_rate": 1.7116122019302546e-05, "loss": 0.5726, "step": 10555 }, { "epoch": 0.27, "grad_norm": 1.7949217557907104, "learning_rate": 1.7115538759261966e-05, "loss": 0.6946, "step": 10556 }, { "epoch": 0.27, "grad_norm": 3.752833843231201, "learning_rate": 1.7114955450185558e-05, "loss": 0.6406, "step": 10557 }, { "epoch": 0.27, "grad_norm": 1.7003308534622192, "learning_rate": 1.711437209207735e-05, "loss": 0.6531, "step": 10558 }, { "epoch": 0.27, "grad_norm": 3.957108497619629, "learning_rate": 1.711378868494136e-05, "loss": 0.6693, "step": 10559 }, { "epoch": 0.27, "grad_norm": 1.932533860206604, "learning_rate": 1.7113205228781608e-05, "loss": 0.6925, "step": 10560 }, { "epoch": 0.27, "grad_norm": 2.2894606590270996, "learning_rate": 1.711262172360211e-05, "loss": 0.7346, "step": 10561 }, { "epoch": 0.27, "grad_norm": 2.047576904296875, "learning_rate": 1.7112038169406897e-05, "loss": 0.6302, "step": 10562 }, { "epoch": 0.27, "grad_norm": 1.2487149238586426, "learning_rate": 1.711145456619998e-05, "loss": 0.6823, "step": 10563 }, { "epoch": 0.27, "grad_norm": 4.0070319175720215, "learning_rate": 1.711087091398539e-05, "loss": 0.4675, "step": 10564 }, { "epoch": 0.27, "grad_norm": 4.251082420349121, "learning_rate": 1.7110287212767143e-05, "loss": 0.5465, "step": 10565 }, { "epoch": 0.27, "grad_norm": 1.3584078550338745, "learning_rate": 1.7109703462549264e-05, "loss": 0.5891, "step": 10566 }, { "epoch": 0.27, "grad_norm": 2.623624324798584, "learning_rate": 1.7109119663335777e-05, "loss": 0.6394, "step": 10567 }, { "epoch": 0.27, "grad_norm": 2.0767645835876465, "learning_rate": 1.71085358151307e-05, "loss": 0.5544, "step": 10568 }, { "epoch": 0.27, "grad_norm": 1.5719801187515259, "learning_rate": 1.7107951917938067e-05, "loss": 0.7038, "step": 10569 }, { "epoch": 0.27, "grad_norm": 2.531994342803955, "learning_rate": 1.710736797176189e-05, "loss": 0.5235, "step": 10570 }, { "epoch": 0.27, "grad_norm": 1.8548409938812256, "learning_rate": 1.7106783976606197e-05, "loss": 0.548, "step": 10571 }, { "epoch": 0.27, "grad_norm": 3.143843412399292, "learning_rate": 1.7106199932475017e-05, "loss": 0.6263, "step": 10572 }, { "epoch": 0.27, "grad_norm": 1.6218676567077637, "learning_rate": 1.710561583937237e-05, "loss": 0.6283, "step": 10573 }, { "epoch": 0.27, "grad_norm": 3.5354831218719482, "learning_rate": 1.7105031697302284e-05, "loss": 0.5833, "step": 10574 }, { "epoch": 0.27, "grad_norm": 2.3065168857574463, "learning_rate": 1.710444750626878e-05, "loss": 0.5187, "step": 10575 }, { "epoch": 0.27, "grad_norm": 1.420287847518921, "learning_rate": 1.7103863266275892e-05, "loss": 0.5617, "step": 10576 }, { "epoch": 0.27, "grad_norm": 5.639750003814697, "learning_rate": 1.710327897732764e-05, "loss": 0.6796, "step": 10577 }, { "epoch": 0.27, "grad_norm": 5.8313374519348145, "learning_rate": 1.710269463942805e-05, "loss": 0.6976, "step": 10578 }, { "epoch": 0.27, "grad_norm": 1.8245881795883179, "learning_rate": 1.7102110252581153e-05, "loss": 0.6687, "step": 10579 }, { "epoch": 0.27, "grad_norm": 5.150444030761719, "learning_rate": 1.7101525816790975e-05, "loss": 0.5695, "step": 10580 }, { "epoch": 0.27, "grad_norm": 2.7693517208099365, "learning_rate": 1.710094133206154e-05, "loss": 0.7801, "step": 10581 }, { "epoch": 0.27, "grad_norm": 2.363312244415283, "learning_rate": 1.7100356798396877e-05, "loss": 0.519, "step": 10582 }, { "epoch": 0.27, "grad_norm": 3.7362942695617676, "learning_rate": 1.7099772215801022e-05, "loss": 0.713, "step": 10583 }, { "epoch": 0.27, "grad_norm": 1.5109182596206665, "learning_rate": 1.7099187584277992e-05, "loss": 0.5847, "step": 10584 }, { "epoch": 0.27, "grad_norm": 4.573208332061768, "learning_rate": 1.7098602903831824e-05, "loss": 0.6452, "step": 10585 }, { "epoch": 0.27, "grad_norm": 2.3685402870178223, "learning_rate": 1.709801817446654e-05, "loss": 0.7227, "step": 10586 }, { "epoch": 0.27, "grad_norm": 3.9202325344085693, "learning_rate": 1.709743339618618e-05, "loss": 0.661, "step": 10587 }, { "epoch": 0.27, "grad_norm": 2.014533281326294, "learning_rate": 1.7096848568994765e-05, "loss": 0.5226, "step": 10588 }, { "epoch": 0.27, "grad_norm": 3.6923294067382812, "learning_rate": 1.7096263692896332e-05, "loss": 0.69, "step": 10589 }, { "epoch": 0.27, "grad_norm": 8.694920539855957, "learning_rate": 1.7095678767894902e-05, "loss": 0.5547, "step": 10590 }, { "epoch": 0.27, "grad_norm": 1.3678159713745117, "learning_rate": 1.7095093793994518e-05, "loss": 0.6751, "step": 10591 }, { "epoch": 0.27, "grad_norm": 1.8255432844161987, "learning_rate": 1.70945087711992e-05, "loss": 0.5149, "step": 10592 }, { "epoch": 0.27, "grad_norm": 3.0875606536865234, "learning_rate": 1.709392369951299e-05, "loss": 0.6973, "step": 10593 }, { "epoch": 0.27, "grad_norm": 1.5586268901824951, "learning_rate": 1.709333857893991e-05, "loss": 0.5728, "step": 10594 }, { "epoch": 0.27, "grad_norm": 1.5627448558807373, "learning_rate": 1.7092753409484e-05, "loss": 0.4377, "step": 10595 }, { "epoch": 0.27, "grad_norm": 1.0987004041671753, "learning_rate": 1.7092168191149286e-05, "loss": 0.5845, "step": 10596 }, { "epoch": 0.27, "grad_norm": 3.5979504585266113, "learning_rate": 1.7091582923939807e-05, "loss": 0.6698, "step": 10597 }, { "epoch": 0.27, "grad_norm": 8.205560684204102, "learning_rate": 1.7090997607859593e-05, "loss": 0.6572, "step": 10598 }, { "epoch": 0.27, "grad_norm": 6.75533390045166, "learning_rate": 1.709041224291268e-05, "loss": 0.4756, "step": 10599 }, { "epoch": 0.27, "grad_norm": 3.4672188758850098, "learning_rate": 1.70898268291031e-05, "loss": 0.5937, "step": 10600 }, { "epoch": 0.27, "grad_norm": 1.8124738931655884, "learning_rate": 1.7089241366434887e-05, "loss": 0.7006, "step": 10601 }, { "epoch": 0.27, "grad_norm": 1.8371126651763916, "learning_rate": 1.708865585491208e-05, "loss": 0.5757, "step": 10602 }, { "epoch": 0.27, "grad_norm": 1.688338279724121, "learning_rate": 1.7088070294538707e-05, "loss": 0.5591, "step": 10603 }, { "epoch": 0.27, "grad_norm": 1.583387017250061, "learning_rate": 1.7087484685318804e-05, "loss": 0.7135, "step": 10604 }, { "epoch": 0.27, "grad_norm": 3.936267852783203, "learning_rate": 1.7086899027256414e-05, "loss": 0.4877, "step": 10605 }, { "epoch": 0.27, "grad_norm": 1.843657374382019, "learning_rate": 1.7086313320355567e-05, "loss": 0.6257, "step": 10606 }, { "epoch": 0.27, "grad_norm": 1.8324943780899048, "learning_rate": 1.70857275646203e-05, "loss": 0.5883, "step": 10607 }, { "epoch": 0.27, "grad_norm": 3.9377901554107666, "learning_rate": 1.708514176005465e-05, "loss": 0.7341, "step": 10608 }, { "epoch": 0.27, "grad_norm": 3.162061929702759, "learning_rate": 1.7084555906662653e-05, "loss": 0.7198, "step": 10609 }, { "epoch": 0.27, "grad_norm": 4.524088382720947, "learning_rate": 1.7083970004448352e-05, "loss": 0.7091, "step": 10610 }, { "epoch": 0.27, "grad_norm": 2.4873852729797363, "learning_rate": 1.7083384053415777e-05, "loss": 0.8412, "step": 10611 }, { "epoch": 0.27, "grad_norm": 1.3890224695205688, "learning_rate": 1.708279805356897e-05, "loss": 0.5792, "step": 10612 }, { "epoch": 0.27, "grad_norm": 3.3586626052856445, "learning_rate": 1.7082212004911968e-05, "loss": 0.6863, "step": 10613 }, { "epoch": 0.27, "grad_norm": 2.069211006164551, "learning_rate": 1.708162590744881e-05, "loss": 0.5731, "step": 10614 }, { "epoch": 0.27, "grad_norm": 2.711285352706909, "learning_rate": 1.7081039761183537e-05, "loss": 0.6616, "step": 10615 }, { "epoch": 0.27, "grad_norm": 1.4303909540176392, "learning_rate": 1.7080453566120188e-05, "loss": 0.7296, "step": 10616 }, { "epoch": 0.27, "grad_norm": 2.1513657569885254, "learning_rate": 1.7079867322262797e-05, "loss": 0.679, "step": 10617 }, { "epoch": 0.27, "grad_norm": 1.9680403470993042, "learning_rate": 1.707928102961541e-05, "loss": 0.5496, "step": 10618 }, { "epoch": 0.27, "grad_norm": 1.7912137508392334, "learning_rate": 1.707869468818207e-05, "loss": 0.554, "step": 10619 }, { "epoch": 0.27, "grad_norm": 3.72456431388855, "learning_rate": 1.707810829796681e-05, "loss": 0.5759, "step": 10620 }, { "epoch": 0.27, "grad_norm": 1.7878648042678833, "learning_rate": 1.7077521858973677e-05, "loss": 0.6007, "step": 10621 }, { "epoch": 0.27, "grad_norm": 2.8009681701660156, "learning_rate": 1.7076935371206705e-05, "loss": 0.6421, "step": 10622 }, { "epoch": 0.27, "grad_norm": 2.894098997116089, "learning_rate": 1.7076348834669945e-05, "loss": 0.5156, "step": 10623 }, { "epoch": 0.27, "grad_norm": 4.555731296539307, "learning_rate": 1.707576224936743e-05, "loss": 0.6617, "step": 10624 }, { "epoch": 0.27, "grad_norm": 2.66739821434021, "learning_rate": 1.7075175615303212e-05, "loss": 0.6123, "step": 10625 }, { "epoch": 0.27, "grad_norm": 5.379076957702637, "learning_rate": 1.7074588932481327e-05, "loss": 0.6501, "step": 10626 }, { "epoch": 0.27, "grad_norm": 1.9986768960952759, "learning_rate": 1.707400220090582e-05, "loss": 0.6072, "step": 10627 }, { "epoch": 0.27, "grad_norm": 3.302137613296509, "learning_rate": 1.707341542058073e-05, "loss": 0.6146, "step": 10628 }, { "epoch": 0.27, "grad_norm": 1.5162503719329834, "learning_rate": 1.707282859151011e-05, "loss": 0.526, "step": 10629 }, { "epoch": 0.27, "grad_norm": 4.8826212882995605, "learning_rate": 1.7072241713697996e-05, "loss": 0.7921, "step": 10630 }, { "epoch": 0.27, "grad_norm": 1.3975393772125244, "learning_rate": 1.7071654787148438e-05, "loss": 0.6194, "step": 10631 }, { "epoch": 0.27, "grad_norm": 1.2841098308563232, "learning_rate": 1.7071067811865477e-05, "loss": 0.578, "step": 10632 }, { "epoch": 0.27, "grad_norm": 2.5940632820129395, "learning_rate": 1.7070480787853158e-05, "loss": 0.5437, "step": 10633 }, { "epoch": 0.27, "grad_norm": 2.7303524017333984, "learning_rate": 1.706989371511553e-05, "loss": 0.5711, "step": 10634 }, { "epoch": 0.27, "grad_norm": 9.362373352050781, "learning_rate": 1.7069306593656633e-05, "loss": 0.568, "step": 10635 }, { "epoch": 0.27, "grad_norm": 4.013896465301514, "learning_rate": 1.706871942348052e-05, "loss": 0.7525, "step": 10636 }, { "epoch": 0.27, "grad_norm": 2.1391801834106445, "learning_rate": 1.706813220459123e-05, "loss": 0.7472, "step": 10637 }, { "epoch": 0.27, "grad_norm": 1.4478394985198975, "learning_rate": 1.7067544936992816e-05, "loss": 0.7114, "step": 10638 }, { "epoch": 0.27, "grad_norm": 5.673261642456055, "learning_rate": 1.706695762068932e-05, "loss": 0.8807, "step": 10639 }, { "epoch": 0.27, "grad_norm": 1.7702707052230835, "learning_rate": 1.7066370255684797e-05, "loss": 0.7235, "step": 10640 }, { "epoch": 0.27, "grad_norm": 3.911405324935913, "learning_rate": 1.7065782841983285e-05, "loss": 0.7805, "step": 10641 }, { "epoch": 0.27, "grad_norm": 1.395450472831726, "learning_rate": 1.7065195379588837e-05, "loss": 0.5371, "step": 10642 }, { "epoch": 0.27, "grad_norm": 2.5085575580596924, "learning_rate": 1.7064607868505502e-05, "loss": 0.716, "step": 10643 }, { "epoch": 0.27, "grad_norm": 1.9342467784881592, "learning_rate": 1.706402030873733e-05, "loss": 0.5212, "step": 10644 }, { "epoch": 0.27, "grad_norm": 1.8748741149902344, "learning_rate": 1.7063432700288366e-05, "loss": 0.8108, "step": 10645 }, { "epoch": 0.27, "grad_norm": 9.614623069763184, "learning_rate": 1.7062845043162663e-05, "loss": 0.5647, "step": 10646 }, { "epoch": 0.27, "grad_norm": 4.493422031402588, "learning_rate": 1.7062257337364268e-05, "loss": 0.6386, "step": 10647 }, { "epoch": 0.27, "grad_norm": 3.5461533069610596, "learning_rate": 1.7061669582897234e-05, "loss": 0.5284, "step": 10648 }, { "epoch": 0.27, "grad_norm": 3.107541799545288, "learning_rate": 1.706108177976561e-05, "loss": 0.5962, "step": 10649 }, { "epoch": 0.27, "grad_norm": 2.852168083190918, "learning_rate": 1.706049392797344e-05, "loss": 0.7126, "step": 10650 }, { "epoch": 0.27, "grad_norm": 3.2908244132995605, "learning_rate": 1.705990602752479e-05, "loss": 0.6437, "step": 10651 }, { "epoch": 0.27, "grad_norm": 2.4570724964141846, "learning_rate": 1.7059318078423698e-05, "loss": 0.7208, "step": 10652 }, { "epoch": 0.27, "grad_norm": 2.8558733463287354, "learning_rate": 1.7058730080674225e-05, "loss": 0.5933, "step": 10653 }, { "epoch": 0.27, "grad_norm": 1.5574601888656616, "learning_rate": 1.7058142034280413e-05, "loss": 0.5702, "step": 10654 }, { "epoch": 0.27, "grad_norm": 1.660065770149231, "learning_rate": 1.7057553939246326e-05, "loss": 0.5808, "step": 10655 }, { "epoch": 0.27, "grad_norm": 2.1377601623535156, "learning_rate": 1.705696579557601e-05, "loss": 0.5257, "step": 10656 }, { "epoch": 0.27, "grad_norm": 2.283003568649292, "learning_rate": 1.705637760327352e-05, "loss": 0.7901, "step": 10657 }, { "epoch": 0.27, "grad_norm": 1.7981910705566406, "learning_rate": 1.7055789362342907e-05, "loss": 0.4168, "step": 10658 }, { "epoch": 0.27, "grad_norm": 1.1972088813781738, "learning_rate": 1.7055201072788226e-05, "loss": 0.5984, "step": 10659 }, { "epoch": 0.27, "grad_norm": 1.7986067533493042, "learning_rate": 1.7054612734613532e-05, "loss": 0.6434, "step": 10660 }, { "epoch": 0.27, "grad_norm": 3.05114483833313, "learning_rate": 1.705402434782288e-05, "loss": 0.4558, "step": 10661 }, { "epoch": 0.27, "grad_norm": 6.324521541595459, "learning_rate": 1.7053435912420327e-05, "loss": 0.6348, "step": 10662 }, { "epoch": 0.27, "grad_norm": 1.4326553344726562, "learning_rate": 1.705284742840992e-05, "loss": 0.5959, "step": 10663 }, { "epoch": 0.27, "grad_norm": 2.6587886810302734, "learning_rate": 1.7052258895795724e-05, "loss": 0.6025, "step": 10664 }, { "epoch": 0.27, "grad_norm": 2.293978452682495, "learning_rate": 1.7051670314581788e-05, "loss": 0.6593, "step": 10665 }, { "epoch": 0.27, "grad_norm": 1.788012981414795, "learning_rate": 1.705108168477217e-05, "loss": 0.5389, "step": 10666 }, { "epoch": 0.27, "grad_norm": 1.3887284994125366, "learning_rate": 1.7050493006370927e-05, "loss": 0.6117, "step": 10667 }, { "epoch": 0.27, "grad_norm": 1.2112215757369995, "learning_rate": 1.7049904279382117e-05, "loss": 0.6364, "step": 10668 }, { "epoch": 0.27, "grad_norm": 1.7147841453552246, "learning_rate": 1.7049315503809797e-05, "loss": 0.689, "step": 10669 }, { "epoch": 0.27, "grad_norm": 1.235589623451233, "learning_rate": 1.704872667965802e-05, "loss": 0.4342, "step": 10670 }, { "epoch": 0.27, "grad_norm": 3.814969539642334, "learning_rate": 1.7048137806930848e-05, "loss": 0.6456, "step": 10671 }, { "epoch": 0.27, "grad_norm": 11.351566314697266, "learning_rate": 1.704754888563234e-05, "loss": 0.8687, "step": 10672 }, { "epoch": 0.27, "grad_norm": 2.29015851020813, "learning_rate": 1.7046959915766553e-05, "loss": 0.5293, "step": 10673 }, { "epoch": 0.27, "grad_norm": 1.556877851486206, "learning_rate": 1.7046370897337544e-05, "loss": 0.5826, "step": 10674 }, { "epoch": 0.27, "grad_norm": 1.4870713949203491, "learning_rate": 1.7045781830349373e-05, "loss": 0.6129, "step": 10675 }, { "epoch": 0.27, "grad_norm": 6.598711967468262, "learning_rate": 1.7045192714806102e-05, "loss": 0.6168, "step": 10676 }, { "epoch": 0.27, "grad_norm": 1.9732266664505005, "learning_rate": 1.704460355071179e-05, "loss": 0.5711, "step": 10677 }, { "epoch": 0.27, "grad_norm": 1.5091989040374756, "learning_rate": 1.7044014338070493e-05, "loss": 0.6418, "step": 10678 }, { "epoch": 0.27, "grad_norm": 3.792818307876587, "learning_rate": 1.7043425076886277e-05, "loss": 0.604, "step": 10679 }, { "epoch": 0.27, "grad_norm": 4.07708215713501, "learning_rate": 1.70428357671632e-05, "loss": 0.6088, "step": 10680 }, { "epoch": 0.27, "grad_norm": 1.915000557899475, "learning_rate": 1.704224640890532e-05, "loss": 0.6852, "step": 10681 }, { "epoch": 0.27, "grad_norm": 1.541056752204895, "learning_rate": 1.7041657002116707e-05, "loss": 0.5726, "step": 10682 }, { "epoch": 0.27, "grad_norm": 3.3108835220336914, "learning_rate": 1.7041067546801416e-05, "loss": 0.7946, "step": 10683 }, { "epoch": 0.27, "grad_norm": 2.2182092666625977, "learning_rate": 1.7040478042963514e-05, "loss": 0.5392, "step": 10684 }, { "epoch": 0.27, "grad_norm": 2.7746949195861816, "learning_rate": 1.7039888490607056e-05, "loss": 0.4086, "step": 10685 }, { "epoch": 0.27, "grad_norm": 2.545170307159424, "learning_rate": 1.7039298889736112e-05, "loss": 0.6433, "step": 10686 }, { "epoch": 0.27, "grad_norm": 1.3798372745513916, "learning_rate": 1.703870924035474e-05, "loss": 0.6756, "step": 10687 }, { "epoch": 0.27, "grad_norm": 4.350090503692627, "learning_rate": 1.7038119542467012e-05, "loss": 0.7283, "step": 10688 }, { "epoch": 0.27, "grad_norm": 5.193234920501709, "learning_rate": 1.703752979607698e-05, "loss": 0.6861, "step": 10689 }, { "epoch": 0.27, "grad_norm": 2.0357208251953125, "learning_rate": 1.7036940001188718e-05, "loss": 0.7196, "step": 10690 }, { "epoch": 0.27, "grad_norm": 2.23648738861084, "learning_rate": 1.7036350157806282e-05, "loss": 0.6713, "step": 10691 }, { "epoch": 0.27, "grad_norm": 2.190101385116577, "learning_rate": 1.7035760265933747e-05, "loss": 0.55, "step": 10692 }, { "epoch": 0.27, "grad_norm": 1.297954797744751, "learning_rate": 1.703517032557517e-05, "loss": 0.7172, "step": 10693 }, { "epoch": 0.27, "grad_norm": 2.1590988636016846, "learning_rate": 1.7034580336734617e-05, "loss": 0.5345, "step": 10694 }, { "epoch": 0.27, "grad_norm": 2.0124640464782715, "learning_rate": 1.703399029941616e-05, "loss": 0.7346, "step": 10695 }, { "epoch": 0.27, "grad_norm": 2.198265790939331, "learning_rate": 1.703340021362386e-05, "loss": 0.6777, "step": 10696 }, { "epoch": 0.27, "grad_norm": 1.8568625450134277, "learning_rate": 1.703281007936178e-05, "loss": 0.7325, "step": 10697 }, { "epoch": 0.27, "grad_norm": 1.5353436470031738, "learning_rate": 1.7032219896633995e-05, "loss": 0.7176, "step": 10698 }, { "epoch": 0.27, "grad_norm": 2.8651442527770996, "learning_rate": 1.7031629665444568e-05, "loss": 0.602, "step": 10699 }, { "epoch": 0.27, "grad_norm": 6.258308410644531, "learning_rate": 1.7031039385797568e-05, "loss": 0.6734, "step": 10700 }, { "epoch": 0.27, "grad_norm": 1.5201542377471924, "learning_rate": 1.703044905769706e-05, "loss": 0.5734, "step": 10701 }, { "epoch": 0.27, "grad_norm": 1.627387285232544, "learning_rate": 1.7029858681147114e-05, "loss": 0.5441, "step": 10702 }, { "epoch": 0.27, "grad_norm": 4.918879985809326, "learning_rate": 1.70292682561518e-05, "loss": 0.727, "step": 10703 }, { "epoch": 0.27, "grad_norm": 6.11279296875, "learning_rate": 1.7028677782715187e-05, "loss": 0.7065, "step": 10704 }, { "epoch": 0.27, "grad_norm": 4.841457366943359, "learning_rate": 1.7028087260841338e-05, "loss": 0.5372, "step": 10705 }, { "epoch": 0.27, "grad_norm": 1.8555607795715332, "learning_rate": 1.7027496690534332e-05, "loss": 0.689, "step": 10706 }, { "epoch": 0.27, "grad_norm": 1.6846733093261719, "learning_rate": 1.702690607179823e-05, "loss": 0.5663, "step": 10707 }, { "epoch": 0.27, "grad_norm": 3.5357837677001953, "learning_rate": 1.702631540463711e-05, "loss": 0.7616, "step": 10708 }, { "epoch": 0.27, "grad_norm": 1.5365636348724365, "learning_rate": 1.7025724689055036e-05, "loss": 0.6398, "step": 10709 }, { "epoch": 0.27, "grad_norm": 1.8572068214416504, "learning_rate": 1.702513392505608e-05, "loss": 0.4439, "step": 10710 }, { "epoch": 0.27, "grad_norm": 6.214900970458984, "learning_rate": 1.7024543112644317e-05, "loss": 0.56, "step": 10711 }, { "epoch": 0.27, "grad_norm": 1.2544978857040405, "learning_rate": 1.7023952251823814e-05, "loss": 0.6429, "step": 10712 }, { "epoch": 0.27, "grad_norm": 1.8782873153686523, "learning_rate": 1.7023361342598647e-05, "loss": 0.6037, "step": 10713 }, { "epoch": 0.27, "grad_norm": 27.61713218688965, "learning_rate": 1.7022770384972887e-05, "loss": 0.681, "step": 10714 }, { "epoch": 0.27, "grad_norm": 7.121687889099121, "learning_rate": 1.7022179378950605e-05, "loss": 0.735, "step": 10715 }, { "epoch": 0.27, "grad_norm": 1.969274640083313, "learning_rate": 1.7021588324535873e-05, "loss": 0.6524, "step": 10716 }, { "epoch": 0.27, "grad_norm": 2.7021756172180176, "learning_rate": 1.7020997221732765e-05, "loss": 0.4862, "step": 10717 }, { "epoch": 0.27, "grad_norm": 1.4888508319854736, "learning_rate": 1.702040607054536e-05, "loss": 0.6741, "step": 10718 }, { "epoch": 0.27, "grad_norm": 2.076514482498169, "learning_rate": 1.701981487097772e-05, "loss": 0.6726, "step": 10719 }, { "epoch": 0.27, "grad_norm": 2.8230271339416504, "learning_rate": 1.701922362303393e-05, "loss": 0.5484, "step": 10720 }, { "epoch": 0.27, "grad_norm": 1.1255412101745605, "learning_rate": 1.701863232671806e-05, "loss": 0.4653, "step": 10721 }, { "epoch": 0.27, "grad_norm": 3.72204327583313, "learning_rate": 1.7018040982034185e-05, "loss": 0.5011, "step": 10722 }, { "epoch": 0.27, "grad_norm": 2.1110408306121826, "learning_rate": 1.7017449588986384e-05, "loss": 0.7125, "step": 10723 }, { "epoch": 0.27, "grad_norm": 7.069624423980713, "learning_rate": 1.7016858147578728e-05, "loss": 0.6125, "step": 10724 }, { "epoch": 0.27, "grad_norm": 4.470709800720215, "learning_rate": 1.7016266657815293e-05, "loss": 0.7028, "step": 10725 }, { "epoch": 0.27, "grad_norm": 2.202679395675659, "learning_rate": 1.7015675119700154e-05, "loss": 0.6016, "step": 10726 }, { "epoch": 0.27, "grad_norm": 1.791212797164917, "learning_rate": 1.7015083533237394e-05, "loss": 0.5813, "step": 10727 }, { "epoch": 0.27, "grad_norm": 1.1806539297103882, "learning_rate": 1.7014491898431083e-05, "loss": 0.5539, "step": 10728 }, { "epoch": 0.27, "grad_norm": 6.1942291259765625, "learning_rate": 1.70139002152853e-05, "loss": 0.6836, "step": 10729 }, { "epoch": 0.28, "grad_norm": 1.7336041927337646, "learning_rate": 1.7013308483804125e-05, "loss": 0.5714, "step": 10730 }, { "epoch": 0.28, "grad_norm": 3.062162160873413, "learning_rate": 1.701271670399163e-05, "loss": 0.7153, "step": 10731 }, { "epoch": 0.28, "grad_norm": 2.5266058444976807, "learning_rate": 1.7012124875851907e-05, "loss": 0.524, "step": 10732 }, { "epoch": 0.28, "grad_norm": 3.10891056060791, "learning_rate": 1.7011532999389016e-05, "loss": 0.5792, "step": 10733 }, { "epoch": 0.28, "grad_norm": 7.542961120605469, "learning_rate": 1.7010941074607047e-05, "loss": 0.6221, "step": 10734 }, { "epoch": 0.28, "grad_norm": 6.656444072723389, "learning_rate": 1.7010349101510078e-05, "loss": 0.8112, "step": 10735 }, { "epoch": 0.28, "grad_norm": 2.7026031017303467, "learning_rate": 1.7009757080102188e-05, "loss": 0.6418, "step": 10736 }, { "epoch": 0.28, "grad_norm": 1.813892126083374, "learning_rate": 1.7009165010387454e-05, "loss": 0.5769, "step": 10737 }, { "epoch": 0.28, "grad_norm": 2.8207156658172607, "learning_rate": 1.7008572892369957e-05, "loss": 0.4727, "step": 10738 }, { "epoch": 0.28, "grad_norm": 2.367443323135376, "learning_rate": 1.7007980726053782e-05, "loss": 0.6884, "step": 10739 }, { "epoch": 0.28, "grad_norm": 2.6157724857330322, "learning_rate": 1.7007388511443006e-05, "loss": 0.7834, "step": 10740 }, { "epoch": 0.28, "grad_norm": 2.1090188026428223, "learning_rate": 1.700679624854171e-05, "loss": 0.6984, "step": 10741 }, { "epoch": 0.28, "grad_norm": 4.456668853759766, "learning_rate": 1.7006203937353976e-05, "loss": 0.7084, "step": 10742 }, { "epoch": 0.28, "grad_norm": 2.3970766067504883, "learning_rate": 1.7005611577883887e-05, "loss": 0.6305, "step": 10743 }, { "epoch": 0.28, "grad_norm": 3.520932197570801, "learning_rate": 1.7005019170135525e-05, "loss": 0.6648, "step": 10744 }, { "epoch": 0.28, "grad_norm": 6.537268161773682, "learning_rate": 1.700442671411297e-05, "loss": 0.7976, "step": 10745 }, { "epoch": 0.28, "grad_norm": 6.715425968170166, "learning_rate": 1.7003834209820308e-05, "loss": 0.7618, "step": 10746 }, { "epoch": 0.28, "grad_norm": 2.033916473388672, "learning_rate": 1.700324165726162e-05, "loss": 0.6924, "step": 10747 }, { "epoch": 0.28, "grad_norm": 4.470858097076416, "learning_rate": 1.7002649056440993e-05, "loss": 0.7175, "step": 10748 }, { "epoch": 0.28, "grad_norm": 3.1824045181274414, "learning_rate": 1.700205640736251e-05, "loss": 0.6557, "step": 10749 }, { "epoch": 0.28, "grad_norm": 1.5566248893737793, "learning_rate": 1.7001463710030245e-05, "loss": 0.5639, "step": 10750 }, { "epoch": 0.28, "grad_norm": 1.4320528507232666, "learning_rate": 1.70008709644483e-05, "loss": 0.6892, "step": 10751 }, { "epoch": 0.28, "grad_norm": 1.1039689779281616, "learning_rate": 1.7000278170620744e-05, "loss": 0.4767, "step": 10752 }, { "epoch": 0.28, "grad_norm": 1.178141713142395, "learning_rate": 1.699968532855167e-05, "loss": 0.6664, "step": 10753 }, { "epoch": 0.28, "grad_norm": 2.384977340698242, "learning_rate": 1.6999092438245163e-05, "loss": 0.5898, "step": 10754 }, { "epoch": 0.28, "grad_norm": 2.9100165367126465, "learning_rate": 1.699849949970531e-05, "loss": 0.4888, "step": 10755 }, { "epoch": 0.28, "grad_norm": 1.4687747955322266, "learning_rate": 1.6997906512936192e-05, "loss": 0.603, "step": 10756 }, { "epoch": 0.28, "grad_norm": 6.897499084472656, "learning_rate": 1.6997313477941904e-05, "loss": 0.5824, "step": 10757 }, { "epoch": 0.28, "grad_norm": 4.647179126739502, "learning_rate": 1.6996720394726526e-05, "loss": 0.6386, "step": 10758 }, { "epoch": 0.28, "grad_norm": 3.6385457515716553, "learning_rate": 1.6996127263294142e-05, "loss": 0.6011, "step": 10759 }, { "epoch": 0.28, "grad_norm": 0.8748733997344971, "learning_rate": 1.6995534083648847e-05, "loss": 0.4676, "step": 10760 }, { "epoch": 0.28, "grad_norm": 2.2234983444213867, "learning_rate": 1.699494085579473e-05, "loss": 0.5955, "step": 10761 }, { "epoch": 0.28, "grad_norm": 2.424417734146118, "learning_rate": 1.6994347579735867e-05, "loss": 0.6581, "step": 10762 }, { "epoch": 0.28, "grad_norm": 1.4225330352783203, "learning_rate": 1.6993754255476362e-05, "loss": 0.6412, "step": 10763 }, { "epoch": 0.28, "grad_norm": 6.01723051071167, "learning_rate": 1.6993160883020293e-05, "loss": 0.7278, "step": 10764 }, { "epoch": 0.28, "grad_norm": 1.2307335138320923, "learning_rate": 1.6992567462371756e-05, "loss": 0.5565, "step": 10765 }, { "epoch": 0.28, "grad_norm": 3.8433244228363037, "learning_rate": 1.6991973993534836e-05, "loss": 0.6055, "step": 10766 }, { "epoch": 0.28, "grad_norm": 2.0895376205444336, "learning_rate": 1.699138047651362e-05, "loss": 0.7739, "step": 10767 }, { "epoch": 0.28, "grad_norm": 4.096584796905518, "learning_rate": 1.6990786911312206e-05, "loss": 0.6197, "step": 10768 }, { "epoch": 0.28, "grad_norm": 2.6140809059143066, "learning_rate": 1.699019329793468e-05, "loss": 0.5784, "step": 10769 }, { "epoch": 0.28, "grad_norm": 5.594974994659424, "learning_rate": 1.6989599636385135e-05, "loss": 0.6607, "step": 10770 }, { "epoch": 0.28, "grad_norm": 1.5376198291778564, "learning_rate": 1.698900592666766e-05, "loss": 0.4332, "step": 10771 }, { "epoch": 0.28, "grad_norm": 1.8946200609207153, "learning_rate": 1.6988412168786346e-05, "loss": 0.6626, "step": 10772 }, { "epoch": 0.28, "grad_norm": 1.9213242530822754, "learning_rate": 1.6987818362745288e-05, "loss": 0.8008, "step": 10773 }, { "epoch": 0.28, "grad_norm": 2.5726051330566406, "learning_rate": 1.698722450854857e-05, "loss": 0.513, "step": 10774 }, { "epoch": 0.28, "grad_norm": 3.487917184829712, "learning_rate": 1.6986630606200297e-05, "loss": 0.5174, "step": 10775 }, { "epoch": 0.28, "grad_norm": 2.394672155380249, "learning_rate": 1.698603665570455e-05, "loss": 0.6921, "step": 10776 }, { "epoch": 0.28, "grad_norm": 1.4064757823944092, "learning_rate": 1.6985442657065433e-05, "loss": 0.5346, "step": 10777 }, { "epoch": 0.28, "grad_norm": 2.141967535018921, "learning_rate": 1.6984848610287034e-05, "loss": 0.7158, "step": 10778 }, { "epoch": 0.28, "grad_norm": 5.018703460693359, "learning_rate": 1.6984254515373443e-05, "loss": 0.657, "step": 10779 }, { "epoch": 0.28, "grad_norm": 3.717656373977661, "learning_rate": 1.6983660372328757e-05, "loss": 0.8235, "step": 10780 }, { "epoch": 0.28, "grad_norm": 5.398063659667969, "learning_rate": 1.698306618115707e-05, "loss": 0.7466, "step": 10781 }, { "epoch": 0.28, "grad_norm": 1.500860333442688, "learning_rate": 1.6982471941862483e-05, "loss": 0.5739, "step": 10782 }, { "epoch": 0.28, "grad_norm": 1.2587785720825195, "learning_rate": 1.6981877654449085e-05, "loss": 0.5542, "step": 10783 }, { "epoch": 0.28, "grad_norm": 5.985598564147949, "learning_rate": 1.6981283318920972e-05, "loss": 0.6153, "step": 10784 }, { "epoch": 0.28, "grad_norm": 3.4026105403900146, "learning_rate": 1.6980688935282242e-05, "loss": 0.483, "step": 10785 }, { "epoch": 0.28, "grad_norm": 2.018738031387329, "learning_rate": 1.6980094503536988e-05, "loss": 0.677, "step": 10786 }, { "epoch": 0.28, "grad_norm": 2.4420008659362793, "learning_rate": 1.6979500023689305e-05, "loss": 0.7873, "step": 10787 }, { "epoch": 0.28, "grad_norm": 2.1418862342834473, "learning_rate": 1.69789054957433e-05, "loss": 0.6807, "step": 10788 }, { "epoch": 0.28, "grad_norm": 2.2742884159088135, "learning_rate": 1.6978310919703055e-05, "loss": 0.5991, "step": 10789 }, { "epoch": 0.28, "grad_norm": 5.830355644226074, "learning_rate": 1.697771629557268e-05, "loss": 0.5679, "step": 10790 }, { "epoch": 0.28, "grad_norm": 3.779797315597534, "learning_rate": 1.6977121623356267e-05, "loss": 0.6716, "step": 10791 }, { "epoch": 0.28, "grad_norm": 11.428433418273926, "learning_rate": 1.6976526903057916e-05, "loss": 0.6341, "step": 10792 }, { "epoch": 0.28, "grad_norm": 3.468357801437378, "learning_rate": 1.6975932134681724e-05, "loss": 0.6061, "step": 10793 }, { "epoch": 0.28, "grad_norm": 2.703011989593506, "learning_rate": 1.697533731823179e-05, "loss": 0.6274, "step": 10794 }, { "epoch": 0.28, "grad_norm": 2.1641433238983154, "learning_rate": 1.6974742453712212e-05, "loss": 0.6728, "step": 10795 }, { "epoch": 0.28, "grad_norm": 6.205154895782471, "learning_rate": 1.6974147541127096e-05, "loss": 0.6825, "step": 10796 }, { "epoch": 0.28, "grad_norm": 1.6800001859664917, "learning_rate": 1.6973552580480534e-05, "loss": 0.5741, "step": 10797 }, { "epoch": 0.28, "grad_norm": 2.971198797225952, "learning_rate": 1.697295757177663e-05, "loss": 0.5481, "step": 10798 }, { "epoch": 0.28, "grad_norm": 2.826079845428467, "learning_rate": 1.697236251501948e-05, "loss": 0.5599, "step": 10799 }, { "epoch": 0.28, "grad_norm": 1.3554011583328247, "learning_rate": 1.697176741021319e-05, "loss": 0.5749, "step": 10800 }, { "epoch": 0.28, "grad_norm": 1.444543719291687, "learning_rate": 1.6971172257361858e-05, "loss": 0.629, "step": 10801 }, { "epoch": 0.28, "grad_norm": 2.544130563735962, "learning_rate": 1.697057705646959e-05, "loss": 0.7006, "step": 10802 }, { "epoch": 0.28, "grad_norm": 4.472865581512451, "learning_rate": 1.696998180754048e-05, "loss": 0.5158, "step": 10803 }, { "epoch": 0.28, "grad_norm": 3.24280047416687, "learning_rate": 1.6969386510578635e-05, "loss": 0.5172, "step": 10804 }, { "epoch": 0.28, "grad_norm": 2.268848419189453, "learning_rate": 1.6968791165588156e-05, "loss": 0.7619, "step": 10805 }, { "epoch": 0.28, "grad_norm": 3.550945520401001, "learning_rate": 1.696819577257315e-05, "loss": 0.7534, "step": 10806 }, { "epoch": 0.28, "grad_norm": 2.115163564682007, "learning_rate": 1.6967600331537715e-05, "loss": 0.7945, "step": 10807 }, { "epoch": 0.28, "grad_norm": 8.629799842834473, "learning_rate": 1.6967004842485955e-05, "loss": 0.7264, "step": 10808 }, { "epoch": 0.28, "grad_norm": 2.480994701385498, "learning_rate": 1.6966409305421977e-05, "loss": 0.6932, "step": 10809 }, { "epoch": 0.28, "grad_norm": 3.0773186683654785, "learning_rate": 1.6965813720349878e-05, "loss": 0.6051, "step": 10810 }, { "epoch": 0.28, "grad_norm": 1.6788169145584106, "learning_rate": 1.6965218087273773e-05, "loss": 0.5277, "step": 10811 }, { "epoch": 0.28, "grad_norm": 8.327675819396973, "learning_rate": 1.696462240619776e-05, "loss": 0.8073, "step": 10812 }, { "epoch": 0.28, "grad_norm": 1.6762549877166748, "learning_rate": 1.696402667712594e-05, "loss": 0.5736, "step": 10813 }, { "epoch": 0.28, "grad_norm": 1.2973052263259888, "learning_rate": 1.696343090006243e-05, "loss": 0.7045, "step": 10814 }, { "epoch": 0.28, "grad_norm": 2.9765825271606445, "learning_rate": 1.6962835075011326e-05, "loss": 0.7496, "step": 10815 }, { "epoch": 0.28, "grad_norm": 1.1933958530426025, "learning_rate": 1.696223920197674e-05, "loss": 0.5229, "step": 10816 }, { "epoch": 0.28, "grad_norm": 1.864619493484497, "learning_rate": 1.6961643280962768e-05, "loss": 0.5639, "step": 10817 }, { "epoch": 0.28, "grad_norm": 1.7903364896774292, "learning_rate": 1.696104731197353e-05, "loss": 0.5675, "step": 10818 }, { "epoch": 0.28, "grad_norm": 4.683073997497559, "learning_rate": 1.6960451295013126e-05, "loss": 0.5615, "step": 10819 }, { "epoch": 0.28, "grad_norm": 4.5072126388549805, "learning_rate": 1.6959855230085665e-05, "loss": 0.7624, "step": 10820 }, { "epoch": 0.28, "grad_norm": 4.289661407470703, "learning_rate": 1.6959259117195258e-05, "loss": 0.7898, "step": 10821 }, { "epoch": 0.28, "grad_norm": 5.150883674621582, "learning_rate": 1.6958662956346005e-05, "loss": 0.4479, "step": 10822 }, { "epoch": 0.28, "grad_norm": 1.6610223054885864, "learning_rate": 1.6958066747542016e-05, "loss": 0.6124, "step": 10823 }, { "epoch": 0.28, "grad_norm": 3.4415407180786133, "learning_rate": 1.6957470490787406e-05, "loss": 0.7588, "step": 10824 }, { "epoch": 0.28, "grad_norm": 7.004415988922119, "learning_rate": 1.6956874186086283e-05, "loss": 0.5902, "step": 10825 }, { "epoch": 0.28, "grad_norm": 4.6181488037109375, "learning_rate": 1.695627783344275e-05, "loss": 0.5521, "step": 10826 }, { "epoch": 0.28, "grad_norm": 1.6878764629364014, "learning_rate": 1.6955681432860923e-05, "loss": 0.5478, "step": 10827 }, { "epoch": 0.28, "grad_norm": 3.4969937801361084, "learning_rate": 1.695508498434491e-05, "loss": 0.6052, "step": 10828 }, { "epoch": 0.28, "grad_norm": 1.9428821802139282, "learning_rate": 1.6954488487898813e-05, "loss": 0.5887, "step": 10829 }, { "epoch": 0.28, "grad_norm": 2.7610318660736084, "learning_rate": 1.695389194352676e-05, "loss": 0.5951, "step": 10830 }, { "epoch": 0.28, "grad_norm": 2.816288709640503, "learning_rate": 1.695329535123285e-05, "loss": 0.5952, "step": 10831 }, { "epoch": 0.28, "grad_norm": 3.2661561965942383, "learning_rate": 1.6952698711021197e-05, "loss": 0.6145, "step": 10832 }, { "epoch": 0.28, "grad_norm": 2.123478412628174, "learning_rate": 1.695210202289591e-05, "loss": 0.6326, "step": 10833 }, { "epoch": 0.28, "grad_norm": 1.6815804243087769, "learning_rate": 1.6951505286861107e-05, "loss": 0.6523, "step": 10834 }, { "epoch": 0.28, "grad_norm": 1.6590276956558228, "learning_rate": 1.6950908502920892e-05, "loss": 0.5815, "step": 10835 }, { "epoch": 0.28, "grad_norm": 1.2806845903396606, "learning_rate": 1.695031167107939e-05, "loss": 0.5046, "step": 10836 }, { "epoch": 0.28, "grad_norm": 3.260067939758301, "learning_rate": 1.69497147913407e-05, "loss": 0.4848, "step": 10837 }, { "epoch": 0.28, "grad_norm": 3.903700113296509, "learning_rate": 1.6949117863708946e-05, "loss": 0.6676, "step": 10838 }, { "epoch": 0.28, "grad_norm": 2.4397926330566406, "learning_rate": 1.6948520888188235e-05, "loss": 0.6631, "step": 10839 }, { "epoch": 0.28, "grad_norm": 1.810437798500061, "learning_rate": 1.6947923864782686e-05, "loss": 0.6704, "step": 10840 }, { "epoch": 0.28, "grad_norm": 1.9162505865097046, "learning_rate": 1.6947326793496407e-05, "loss": 0.5802, "step": 10841 }, { "epoch": 0.28, "grad_norm": 2.4916396141052246, "learning_rate": 1.6946729674333518e-05, "loss": 0.6495, "step": 10842 }, { "epoch": 0.28, "grad_norm": 2.7513113021850586, "learning_rate": 1.6946132507298133e-05, "loss": 0.7744, "step": 10843 }, { "epoch": 0.28, "grad_norm": 1.4830684661865234, "learning_rate": 1.694553529239437e-05, "loss": 0.5082, "step": 10844 }, { "epoch": 0.28, "grad_norm": 1.998164176940918, "learning_rate": 1.6944938029626338e-05, "loss": 0.5703, "step": 10845 }, { "epoch": 0.28, "grad_norm": 1.37984299659729, "learning_rate": 1.6944340718998157e-05, "loss": 0.5974, "step": 10846 }, { "epoch": 0.28, "grad_norm": 2.014810085296631, "learning_rate": 1.6943743360513942e-05, "loss": 0.7223, "step": 10847 }, { "epoch": 0.28, "grad_norm": 1.3687782287597656, "learning_rate": 1.6943145954177808e-05, "loss": 0.6663, "step": 10848 }, { "epoch": 0.28, "grad_norm": 5.269414901733398, "learning_rate": 1.694254849999388e-05, "loss": 0.5823, "step": 10849 }, { "epoch": 0.28, "grad_norm": 2.646909713745117, "learning_rate": 1.6941950997966264e-05, "loss": 0.6558, "step": 10850 }, { "epoch": 0.28, "grad_norm": 2.547654628753662, "learning_rate": 1.6941353448099086e-05, "loss": 0.6106, "step": 10851 }, { "epoch": 0.28, "grad_norm": 2.066507339477539, "learning_rate": 1.694075585039646e-05, "loss": 0.8065, "step": 10852 }, { "epoch": 0.28, "grad_norm": 2.311461925506592, "learning_rate": 1.6940158204862507e-05, "loss": 0.6056, "step": 10853 }, { "epoch": 0.28, "grad_norm": 2.0036330223083496, "learning_rate": 1.693956051150134e-05, "loss": 0.6542, "step": 10854 }, { "epoch": 0.28, "grad_norm": 1.2703782320022583, "learning_rate": 1.6938962770317086e-05, "loss": 0.4505, "step": 10855 }, { "epoch": 0.28, "grad_norm": 3.7602548599243164, "learning_rate": 1.6938364981313858e-05, "loss": 0.8001, "step": 10856 }, { "epoch": 0.28, "grad_norm": 1.7228012084960938, "learning_rate": 1.693776714449578e-05, "loss": 0.6136, "step": 10857 }, { "epoch": 0.28, "grad_norm": 2.161360025405884, "learning_rate": 1.6937169259866967e-05, "loss": 0.7396, "step": 10858 }, { "epoch": 0.28, "grad_norm": 1.77908456325531, "learning_rate": 1.6936571327431543e-05, "loss": 0.6005, "step": 10859 }, { "epoch": 0.28, "grad_norm": 9.479955673217773, "learning_rate": 1.6935973347193626e-05, "loss": 0.7102, "step": 10860 }, { "epoch": 0.28, "grad_norm": 1.0491681098937988, "learning_rate": 1.6935375319157338e-05, "loss": 0.5488, "step": 10861 }, { "epoch": 0.28, "grad_norm": 2.998624563217163, "learning_rate": 1.6934777243326802e-05, "loss": 0.5742, "step": 10862 }, { "epoch": 0.28, "grad_norm": 1.2197275161743164, "learning_rate": 1.693417911970614e-05, "loss": 0.5979, "step": 10863 }, { "epoch": 0.28, "grad_norm": 2.348191261291504, "learning_rate": 1.693358094829947e-05, "loss": 0.7478, "step": 10864 }, { "epoch": 0.28, "grad_norm": 1.3103623390197754, "learning_rate": 1.6932982729110914e-05, "loss": 0.4339, "step": 10865 }, { "epoch": 0.28, "grad_norm": 2.5147716999053955, "learning_rate": 1.69323844621446e-05, "loss": 0.6571, "step": 10866 }, { "epoch": 0.28, "grad_norm": 2.190718650817871, "learning_rate": 1.6931786147404646e-05, "loss": 0.6166, "step": 10867 }, { "epoch": 0.28, "grad_norm": 3.3631324768066406, "learning_rate": 1.6931187784895175e-05, "loss": 0.7717, "step": 10868 }, { "epoch": 0.28, "grad_norm": 2.4286975860595703, "learning_rate": 1.6930589374620318e-05, "loss": 0.647, "step": 10869 }, { "epoch": 0.28, "grad_norm": 3.3246378898620605, "learning_rate": 1.6929990916584186e-05, "loss": 0.5924, "step": 10870 }, { "epoch": 0.28, "grad_norm": 1.5458980798721313, "learning_rate": 1.6929392410790914e-05, "loss": 0.5979, "step": 10871 }, { "epoch": 0.28, "grad_norm": 2.458245277404785, "learning_rate": 1.6928793857244624e-05, "loss": 0.7867, "step": 10872 }, { "epoch": 0.28, "grad_norm": 2.0373220443725586, "learning_rate": 1.692819525594944e-05, "loss": 0.5851, "step": 10873 }, { "epoch": 0.28, "grad_norm": 1.8689017295837402, "learning_rate": 1.6927596606909487e-05, "loss": 0.6581, "step": 10874 }, { "epoch": 0.28, "grad_norm": 2.578498601913452, "learning_rate": 1.692699791012889e-05, "loss": 0.6021, "step": 10875 }, { "epoch": 0.28, "grad_norm": 1.7820261716842651, "learning_rate": 1.6926399165611775e-05, "loss": 0.4843, "step": 10876 }, { "epoch": 0.28, "grad_norm": 1.2880208492279053, "learning_rate": 1.6925800373362267e-05, "loss": 0.5664, "step": 10877 }, { "epoch": 0.28, "grad_norm": 3.550764799118042, "learning_rate": 1.6925201533384495e-05, "loss": 0.7693, "step": 10878 }, { "epoch": 0.28, "grad_norm": 4.641972541809082, "learning_rate": 1.6924602645682586e-05, "loss": 0.6717, "step": 10879 }, { "epoch": 0.28, "grad_norm": 1.3050540685653687, "learning_rate": 1.6924003710260664e-05, "loss": 0.4721, "step": 10880 }, { "epoch": 0.28, "grad_norm": 1.627431869506836, "learning_rate": 1.6923404727122862e-05, "loss": 0.4622, "step": 10881 }, { "epoch": 0.28, "grad_norm": 9.167265892028809, "learning_rate": 1.6922805696273304e-05, "loss": 0.7062, "step": 10882 }, { "epoch": 0.28, "grad_norm": 1.9085475206375122, "learning_rate": 1.6922206617716116e-05, "loss": 0.6005, "step": 10883 }, { "epoch": 0.28, "grad_norm": 1.7639875411987305, "learning_rate": 1.692160749145543e-05, "loss": 0.7099, "step": 10884 }, { "epoch": 0.28, "grad_norm": 1.549432635307312, "learning_rate": 1.6921008317495374e-05, "loss": 0.534, "step": 10885 }, { "epoch": 0.28, "grad_norm": 5.784168720245361, "learning_rate": 1.6920409095840077e-05, "loss": 0.6275, "step": 10886 }, { "epoch": 0.28, "grad_norm": 3.260542392730713, "learning_rate": 1.6919809826493664e-05, "loss": 0.5839, "step": 10887 }, { "epoch": 0.28, "grad_norm": 1.3801440000534058, "learning_rate": 1.6919210509460275e-05, "loss": 0.5122, "step": 10888 }, { "epoch": 0.28, "grad_norm": 1.5150550603866577, "learning_rate": 1.6918611144744034e-05, "loss": 0.632, "step": 10889 }, { "epoch": 0.28, "grad_norm": 1.9240219593048096, "learning_rate": 1.691801173234907e-05, "loss": 0.6571, "step": 10890 }, { "epoch": 0.28, "grad_norm": 1.625636100769043, "learning_rate": 1.691741227227951e-05, "loss": 0.5883, "step": 10891 }, { "epoch": 0.28, "grad_norm": 6.165071487426758, "learning_rate": 1.6916812764539497e-05, "loss": 0.5097, "step": 10892 }, { "epoch": 0.28, "grad_norm": 1.570717453956604, "learning_rate": 1.6916213209133158e-05, "loss": 0.487, "step": 10893 }, { "epoch": 0.28, "grad_norm": 2.340297222137451, "learning_rate": 1.6915613606064617e-05, "loss": 0.6778, "step": 10894 }, { "epoch": 0.28, "grad_norm": 1.658130407333374, "learning_rate": 1.6915013955338014e-05, "loss": 0.6896, "step": 10895 }, { "epoch": 0.28, "grad_norm": 1.7677679061889648, "learning_rate": 1.691441425695748e-05, "loss": 0.5164, "step": 10896 }, { "epoch": 0.28, "grad_norm": 1.4755178689956665, "learning_rate": 1.6913814510927145e-05, "loss": 0.617, "step": 10897 }, { "epoch": 0.28, "grad_norm": 2.5655972957611084, "learning_rate": 1.691321471725115e-05, "loss": 0.6649, "step": 10898 }, { "epoch": 0.28, "grad_norm": 6.713710784912109, "learning_rate": 1.6912614875933616e-05, "loss": 0.8332, "step": 10899 }, { "epoch": 0.28, "grad_norm": 3.2883784770965576, "learning_rate": 1.6912014986978686e-05, "loss": 0.5389, "step": 10900 }, { "epoch": 0.28, "grad_norm": 2.0627832412719727, "learning_rate": 1.6911415050390488e-05, "loss": 0.6731, "step": 10901 }, { "epoch": 0.28, "grad_norm": 1.7621151208877563, "learning_rate": 1.6910815066173167e-05, "loss": 0.5814, "step": 10902 }, { "epoch": 0.28, "grad_norm": 1.9599906206130981, "learning_rate": 1.6910215034330842e-05, "loss": 0.5741, "step": 10903 }, { "epoch": 0.28, "grad_norm": 4.0344343185424805, "learning_rate": 1.690961495486766e-05, "loss": 0.6427, "step": 10904 }, { "epoch": 0.28, "grad_norm": 1.8596857786178589, "learning_rate": 1.6909014827787754e-05, "loss": 0.5993, "step": 10905 }, { "epoch": 0.28, "grad_norm": 1.5042845010757446, "learning_rate": 1.690841465309526e-05, "loss": 0.694, "step": 10906 }, { "epoch": 0.28, "grad_norm": 3.9714200496673584, "learning_rate": 1.690781443079431e-05, "loss": 0.7841, "step": 10907 }, { "epoch": 0.28, "grad_norm": 4.517660617828369, "learning_rate": 1.690721416088904e-05, "loss": 0.7118, "step": 10908 }, { "epoch": 0.28, "grad_norm": 1.5331956148147583, "learning_rate": 1.690661384338359e-05, "loss": 0.695, "step": 10909 }, { "epoch": 0.28, "grad_norm": 2.6161088943481445, "learning_rate": 1.6906013478282103e-05, "loss": 0.6654, "step": 10910 }, { "epoch": 0.28, "grad_norm": 1.3171485662460327, "learning_rate": 1.6905413065588703e-05, "loss": 0.6133, "step": 10911 }, { "epoch": 0.28, "grad_norm": 1.9232187271118164, "learning_rate": 1.690481260530754e-05, "loss": 0.6142, "step": 10912 }, { "epoch": 0.28, "grad_norm": 2.3896126747131348, "learning_rate": 1.6904212097442744e-05, "loss": 0.4013, "step": 10913 }, { "epoch": 0.28, "grad_norm": 1.8099452257156372, "learning_rate": 1.690361154199845e-05, "loss": 0.6316, "step": 10914 }, { "epoch": 0.28, "grad_norm": 1.2539795637130737, "learning_rate": 1.6903010938978812e-05, "loss": 0.5328, "step": 10915 }, { "epoch": 0.28, "grad_norm": 2.8730170726776123, "learning_rate": 1.6902410288387956e-05, "loss": 0.5555, "step": 10916 }, { "epoch": 0.28, "grad_norm": 5.728988170623779, "learning_rate": 1.6901809590230026e-05, "loss": 0.8414, "step": 10917 }, { "epoch": 0.28, "grad_norm": 1.4348945617675781, "learning_rate": 1.6901208844509157e-05, "loss": 0.7492, "step": 10918 }, { "epoch": 0.28, "grad_norm": 2.138533353805542, "learning_rate": 1.6900608051229495e-05, "loss": 0.6389, "step": 10919 }, { "epoch": 0.28, "grad_norm": 1.5419220924377441, "learning_rate": 1.6900007210395178e-05, "loss": 0.591, "step": 10920 }, { "epoch": 0.28, "grad_norm": 1.5666126012802124, "learning_rate": 1.689940632201034e-05, "loss": 0.5321, "step": 10921 }, { "epoch": 0.28, "grad_norm": 1.8261244297027588, "learning_rate": 1.6898805386079137e-05, "loss": 0.6913, "step": 10922 }, { "epoch": 0.28, "grad_norm": 1.8462164402008057, "learning_rate": 1.68982044026057e-05, "loss": 0.5267, "step": 10923 }, { "epoch": 0.28, "grad_norm": 1.6011152267456055, "learning_rate": 1.6897603371594168e-05, "loss": 0.4645, "step": 10924 }, { "epoch": 0.28, "grad_norm": 4.374702453613281, "learning_rate": 1.6897002293048693e-05, "loss": 0.7265, "step": 10925 }, { "epoch": 0.28, "grad_norm": 2.511444568634033, "learning_rate": 1.6896401166973405e-05, "loss": 0.6745, "step": 10926 }, { "epoch": 0.28, "grad_norm": 1.8352378606796265, "learning_rate": 1.6895799993372457e-05, "loss": 0.7444, "step": 10927 }, { "epoch": 0.28, "grad_norm": 1.6925947666168213, "learning_rate": 1.6895198772249986e-05, "loss": 0.5823, "step": 10928 }, { "epoch": 0.28, "grad_norm": 2.0441112518310547, "learning_rate": 1.6894597503610138e-05, "loss": 0.6403, "step": 10929 }, { "epoch": 0.28, "grad_norm": 3.287843942642212, "learning_rate": 1.6893996187457055e-05, "loss": 0.6648, "step": 10930 }, { "epoch": 0.28, "grad_norm": 5.293215751647949, "learning_rate": 1.689339482379488e-05, "loss": 0.7386, "step": 10931 }, { "epoch": 0.28, "grad_norm": 2.1208441257476807, "learning_rate": 1.689279341262776e-05, "loss": 0.4684, "step": 10932 }, { "epoch": 0.28, "grad_norm": 3.0820088386535645, "learning_rate": 1.689219195395984e-05, "loss": 0.813, "step": 10933 }, { "epoch": 0.28, "grad_norm": 1.2108029127120972, "learning_rate": 1.6891590447795262e-05, "loss": 0.5043, "step": 10934 }, { "epoch": 0.28, "grad_norm": 1.946577548980713, "learning_rate": 1.6890988894138173e-05, "loss": 0.5159, "step": 10935 }, { "epoch": 0.28, "grad_norm": 2.1280617713928223, "learning_rate": 1.689038729299272e-05, "loss": 0.6822, "step": 10936 }, { "epoch": 0.28, "grad_norm": 2.167210578918457, "learning_rate": 1.6889785644363043e-05, "loss": 0.5323, "step": 10937 }, { "epoch": 0.28, "grad_norm": 2.6707053184509277, "learning_rate": 1.688918394825329e-05, "loss": 0.6884, "step": 10938 }, { "epoch": 0.28, "grad_norm": 2.9960784912109375, "learning_rate": 1.6888582204667613e-05, "loss": 0.7192, "step": 10939 }, { "epoch": 0.28, "grad_norm": 2.939870834350586, "learning_rate": 1.6887980413610156e-05, "loss": 0.6069, "step": 10940 }, { "epoch": 0.28, "grad_norm": 2.0118138790130615, "learning_rate": 1.6887378575085064e-05, "loss": 0.6753, "step": 10941 }, { "epoch": 0.28, "grad_norm": 1.6949955224990845, "learning_rate": 1.6886776689096485e-05, "loss": 0.765, "step": 10942 }, { "epoch": 0.28, "grad_norm": 4.547423839569092, "learning_rate": 1.6886174755648568e-05, "loss": 0.6305, "step": 10943 }, { "epoch": 0.28, "grad_norm": 6.965216636657715, "learning_rate": 1.688557277474546e-05, "loss": 0.9613, "step": 10944 }, { "epoch": 0.28, "grad_norm": 1.400702953338623, "learning_rate": 1.688497074639131e-05, "loss": 0.5834, "step": 10945 }, { "epoch": 0.28, "grad_norm": 1.6018853187561035, "learning_rate": 1.6884368670590268e-05, "loss": 0.599, "step": 10946 }, { "epoch": 0.28, "grad_norm": 2.7633581161499023, "learning_rate": 1.6883766547346486e-05, "loss": 0.7632, "step": 10947 }, { "epoch": 0.28, "grad_norm": 1.5078942775726318, "learning_rate": 1.6883164376664106e-05, "loss": 0.6474, "step": 10948 }, { "epoch": 0.28, "grad_norm": 2.373656988143921, "learning_rate": 1.6882562158547284e-05, "loss": 0.6523, "step": 10949 }, { "epoch": 0.28, "grad_norm": 2.593855381011963, "learning_rate": 1.6881959893000165e-05, "loss": 0.6398, "step": 10950 }, { "epoch": 0.28, "grad_norm": 1.2524527311325073, "learning_rate": 1.6881357580026905e-05, "loss": 0.4975, "step": 10951 }, { "epoch": 0.28, "grad_norm": 2.7222046852111816, "learning_rate": 1.6880755219631648e-05, "loss": 0.6242, "step": 10952 }, { "epoch": 0.28, "grad_norm": 1.1467251777648926, "learning_rate": 1.6880152811818552e-05, "loss": 0.6011, "step": 10953 }, { "epoch": 0.28, "grad_norm": 2.9669721126556396, "learning_rate": 1.6879550356591764e-05, "loss": 0.6076, "step": 10954 }, { "epoch": 0.28, "grad_norm": 1.5335168838500977, "learning_rate": 1.687894785395544e-05, "loss": 0.6399, "step": 10955 }, { "epoch": 0.28, "grad_norm": 3.5186808109283447, "learning_rate": 1.6878345303913724e-05, "loss": 0.5117, "step": 10956 }, { "epoch": 0.28, "grad_norm": 4.518438339233398, "learning_rate": 1.6877742706470778e-05, "loss": 0.5748, "step": 10957 }, { "epoch": 0.28, "grad_norm": 2.384849786758423, "learning_rate": 1.6877140061630748e-05, "loss": 0.648, "step": 10958 }, { "epoch": 0.28, "grad_norm": 3.326352834701538, "learning_rate": 1.687653736939779e-05, "loss": 0.485, "step": 10959 }, { "epoch": 0.28, "grad_norm": 1.9944418668746948, "learning_rate": 1.6875934629776056e-05, "loss": 0.6567, "step": 10960 }, { "epoch": 0.28, "grad_norm": 5.458002090454102, "learning_rate": 1.68753318427697e-05, "loss": 0.7633, "step": 10961 }, { "epoch": 0.28, "grad_norm": 1.1635507345199585, "learning_rate": 1.687472900838288e-05, "loss": 0.6242, "step": 10962 }, { "epoch": 0.28, "grad_norm": 2.197392702102661, "learning_rate": 1.6874126126619743e-05, "loss": 0.6528, "step": 10963 }, { "epoch": 0.28, "grad_norm": 1.741608738899231, "learning_rate": 1.687352319748445e-05, "loss": 0.5982, "step": 10964 }, { "epoch": 0.28, "grad_norm": 1.472342848777771, "learning_rate": 1.6872920220981152e-05, "loss": 0.6672, "step": 10965 }, { "epoch": 0.28, "grad_norm": 1.9244662523269653, "learning_rate": 1.6872317197114007e-05, "loss": 0.5823, "step": 10966 }, { "epoch": 0.28, "grad_norm": 1.7030134201049805, "learning_rate": 1.687171412588717e-05, "loss": 0.6269, "step": 10967 }, { "epoch": 0.28, "grad_norm": 1.3176536560058594, "learning_rate": 1.6871111007304794e-05, "loss": 0.476, "step": 10968 }, { "epoch": 0.28, "grad_norm": 7.054920673370361, "learning_rate": 1.6870507841371036e-05, "loss": 0.6515, "step": 10969 }, { "epoch": 0.28, "grad_norm": 5.440568923950195, "learning_rate": 1.686990462809006e-05, "loss": 0.6552, "step": 10970 }, { "epoch": 0.28, "grad_norm": 2.96760892868042, "learning_rate": 1.686930136746601e-05, "loss": 0.6941, "step": 10971 }, { "epoch": 0.28, "grad_norm": 1.948385238647461, "learning_rate": 1.686869805950306e-05, "loss": 0.5265, "step": 10972 }, { "epoch": 0.28, "grad_norm": 1.7709249258041382, "learning_rate": 1.6868094704205353e-05, "loss": 0.5357, "step": 10973 }, { "epoch": 0.28, "grad_norm": 2.1582088470458984, "learning_rate": 1.686749130157705e-05, "loss": 0.6152, "step": 10974 }, { "epoch": 0.28, "grad_norm": 1.730655550956726, "learning_rate": 1.6866887851622315e-05, "loss": 0.5955, "step": 10975 }, { "epoch": 0.28, "grad_norm": 2.124433755874634, "learning_rate": 1.6866284354345304e-05, "loss": 0.6131, "step": 10976 }, { "epoch": 0.28, "grad_norm": 2.1536505222320557, "learning_rate": 1.6865680809750174e-05, "loss": 0.6259, "step": 10977 }, { "epoch": 0.28, "grad_norm": 1.5223511457443237, "learning_rate": 1.6865077217841085e-05, "loss": 0.6363, "step": 10978 }, { "epoch": 0.28, "grad_norm": 1.5056662559509277, "learning_rate": 1.68644735786222e-05, "loss": 0.6401, "step": 10979 }, { "epoch": 0.28, "grad_norm": 1.8901387453079224, "learning_rate": 1.686386989209767e-05, "loss": 0.6155, "step": 10980 }, { "epoch": 0.28, "grad_norm": 3.6860835552215576, "learning_rate": 1.6863266158271662e-05, "loss": 0.6287, "step": 10981 }, { "epoch": 0.28, "grad_norm": 1.657810091972351, "learning_rate": 1.6862662377148338e-05, "loss": 0.7084, "step": 10982 }, { "epoch": 0.28, "grad_norm": 2.336592435836792, "learning_rate": 1.6862058548731854e-05, "loss": 0.6847, "step": 10983 }, { "epoch": 0.28, "grad_norm": 3.456237316131592, "learning_rate": 1.6861454673026378e-05, "loss": 0.6826, "step": 10984 }, { "epoch": 0.28, "grad_norm": 3.3617360591888428, "learning_rate": 1.6860850750036063e-05, "loss": 0.6343, "step": 10985 }, { "epoch": 0.28, "grad_norm": 3.2932240962982178, "learning_rate": 1.6860246779765078e-05, "loss": 0.6797, "step": 10986 }, { "epoch": 0.28, "grad_norm": 1.839593529701233, "learning_rate": 1.685964276221758e-05, "loss": 0.5423, "step": 10987 }, { "epoch": 0.28, "grad_norm": 4.789241313934326, "learning_rate": 1.6859038697397733e-05, "loss": 0.6325, "step": 10988 }, { "epoch": 0.28, "grad_norm": 1.5610665082931519, "learning_rate": 1.6858434585309705e-05, "loss": 0.781, "step": 10989 }, { "epoch": 0.28, "grad_norm": 2.6555378437042236, "learning_rate": 1.685783042595765e-05, "loss": 0.6415, "step": 10990 }, { "epoch": 0.28, "grad_norm": 3.331904649734497, "learning_rate": 1.685722621934574e-05, "loss": 0.7022, "step": 10991 }, { "epoch": 0.28, "grad_norm": 3.571112632751465, "learning_rate": 1.685662196547813e-05, "loss": 0.516, "step": 10992 }, { "epoch": 0.28, "grad_norm": 1.862865686416626, "learning_rate": 1.6856017664358992e-05, "loss": 0.5203, "step": 10993 }, { "epoch": 0.28, "grad_norm": 1.6313005685806274, "learning_rate": 1.685541331599249e-05, "loss": 0.6296, "step": 10994 }, { "epoch": 0.28, "grad_norm": 1.3731398582458496, "learning_rate": 1.685480892038278e-05, "loss": 0.709, "step": 10995 }, { "epoch": 0.28, "grad_norm": 3.816211223602295, "learning_rate": 1.685420447753404e-05, "loss": 0.9282, "step": 10996 }, { "epoch": 0.28, "grad_norm": 2.176335096359253, "learning_rate": 1.685359998745043e-05, "loss": 0.6553, "step": 10997 }, { "epoch": 0.28, "grad_norm": 1.6526153087615967, "learning_rate": 1.685299545013611e-05, "loss": 0.6538, "step": 10998 }, { "epoch": 0.28, "grad_norm": 3.955660820007324, "learning_rate": 1.685239086559525e-05, "loss": 0.6889, "step": 10999 }, { "epoch": 0.28, "grad_norm": 1.4671131372451782, "learning_rate": 1.6851786233832022e-05, "loss": 0.4551, "step": 11000 }, { "epoch": 0.28, "grad_norm": 2.8812344074249268, "learning_rate": 1.6851181554850586e-05, "loss": 0.5933, "step": 11001 }, { "epoch": 0.28, "grad_norm": 3.3868372440338135, "learning_rate": 1.685057682865511e-05, "loss": 0.6313, "step": 11002 }, { "epoch": 0.28, "grad_norm": 4.845898151397705, "learning_rate": 1.6849972055249763e-05, "loss": 0.6258, "step": 11003 }, { "epoch": 0.28, "grad_norm": 1.3422398567199707, "learning_rate": 1.6849367234638712e-05, "loss": 0.5419, "step": 11004 }, { "epoch": 0.28, "grad_norm": 6.564964771270752, "learning_rate": 1.6848762366826124e-05, "loss": 0.8355, "step": 11005 }, { "epoch": 0.28, "grad_norm": 2.040454864501953, "learning_rate": 1.684815745181617e-05, "loss": 0.4651, "step": 11006 }, { "epoch": 0.28, "grad_norm": 1.9865642786026, "learning_rate": 1.684755248961302e-05, "loss": 0.6244, "step": 11007 }, { "epoch": 0.28, "grad_norm": 4.474790573120117, "learning_rate": 1.6846947480220835e-05, "loss": 0.5939, "step": 11008 }, { "epoch": 0.28, "grad_norm": 3.213005304336548, "learning_rate": 1.6846342423643793e-05, "loss": 0.5862, "step": 11009 }, { "epoch": 0.28, "grad_norm": 1.102514624595642, "learning_rate": 1.6845737319886056e-05, "loss": 0.4666, "step": 11010 }, { "epoch": 0.28, "grad_norm": 1.3571629524230957, "learning_rate": 1.6845132168951804e-05, "loss": 0.5764, "step": 11011 }, { "epoch": 0.28, "grad_norm": 2.8432765007019043, "learning_rate": 1.6844526970845202e-05, "loss": 0.572, "step": 11012 }, { "epoch": 0.28, "grad_norm": 1.060225486755371, "learning_rate": 1.6843921725570415e-05, "loss": 0.5686, "step": 11013 }, { "epoch": 0.28, "grad_norm": 2.843191146850586, "learning_rate": 1.684331643313162e-05, "loss": 0.8171, "step": 11014 }, { "epoch": 0.28, "grad_norm": 2.6839258670806885, "learning_rate": 1.6842711093532994e-05, "loss": 0.669, "step": 11015 }, { "epoch": 0.28, "grad_norm": 1.5912284851074219, "learning_rate": 1.6842105706778697e-05, "loss": 0.6621, "step": 11016 }, { "epoch": 0.28, "grad_norm": 1.5125675201416016, "learning_rate": 1.6841500272872905e-05, "loss": 0.5735, "step": 11017 }, { "epoch": 0.28, "grad_norm": 2.777566909790039, "learning_rate": 1.6840894791819793e-05, "loss": 0.6753, "step": 11018 }, { "epoch": 0.28, "grad_norm": 2.7363743782043457, "learning_rate": 1.6840289263623533e-05, "loss": 0.6944, "step": 11019 }, { "epoch": 0.28, "grad_norm": 1.4226511716842651, "learning_rate": 1.6839683688288294e-05, "loss": 0.6396, "step": 11020 }, { "epoch": 0.28, "grad_norm": 1.9082502126693726, "learning_rate": 1.6839078065818257e-05, "loss": 0.716, "step": 11021 }, { "epoch": 0.28, "grad_norm": 2.208155632019043, "learning_rate": 1.6838472396217586e-05, "loss": 0.715, "step": 11022 }, { "epoch": 0.28, "grad_norm": 4.758241653442383, "learning_rate": 1.683786667949046e-05, "loss": 0.5018, "step": 11023 }, { "epoch": 0.28, "grad_norm": 1.437580943107605, "learning_rate": 1.6837260915641057e-05, "loss": 0.5407, "step": 11024 }, { "epoch": 0.28, "grad_norm": 5.831695079803467, "learning_rate": 1.6836655104673544e-05, "loss": 0.6716, "step": 11025 }, { "epoch": 0.28, "grad_norm": 2.2156741619110107, "learning_rate": 1.68360492465921e-05, "loss": 0.8007, "step": 11026 }, { "epoch": 0.28, "grad_norm": 2.2017173767089844, "learning_rate": 1.6835443341400897e-05, "loss": 0.7261, "step": 11027 }, { "epoch": 0.28, "grad_norm": 2.887012481689453, "learning_rate": 1.6834837389104118e-05, "loss": 0.7484, "step": 11028 }, { "epoch": 0.28, "grad_norm": 4.451372146606445, "learning_rate": 1.683423138970593e-05, "loss": 0.6542, "step": 11029 }, { "epoch": 0.28, "grad_norm": 3.158816337585449, "learning_rate": 1.6833625343210512e-05, "loss": 0.6299, "step": 11030 }, { "epoch": 0.28, "grad_norm": 2.5664169788360596, "learning_rate": 1.6833019249622044e-05, "loss": 0.7263, "step": 11031 }, { "epoch": 0.28, "grad_norm": 1.5118590593338013, "learning_rate": 1.68324131089447e-05, "loss": 0.4367, "step": 11032 }, { "epoch": 0.28, "grad_norm": 5.0500664710998535, "learning_rate": 1.683180692118266e-05, "loss": 0.5927, "step": 11033 }, { "epoch": 0.28, "grad_norm": 4.361425876617432, "learning_rate": 1.6831200686340093e-05, "loss": 0.7692, "step": 11034 }, { "epoch": 0.28, "grad_norm": 3.760572671890259, "learning_rate": 1.6830594404421184e-05, "loss": 0.6473, "step": 11035 }, { "epoch": 0.28, "grad_norm": 5.378133296966553, "learning_rate": 1.6829988075430114e-05, "loss": 0.6089, "step": 11036 }, { "epoch": 0.28, "grad_norm": 1.5828508138656616, "learning_rate": 1.6829381699371055e-05, "loss": 0.5896, "step": 11037 }, { "epoch": 0.28, "grad_norm": 2.06729793548584, "learning_rate": 1.6828775276248187e-05, "loss": 0.7381, "step": 11038 }, { "epoch": 0.28, "grad_norm": 2.0476815700531006, "learning_rate": 1.682816880606569e-05, "loss": 0.6745, "step": 11039 }, { "epoch": 0.28, "grad_norm": 2.566610336303711, "learning_rate": 1.6827562288827746e-05, "loss": 0.7513, "step": 11040 }, { "epoch": 0.28, "grad_norm": 3.4631171226501465, "learning_rate": 1.6826955724538526e-05, "loss": 0.754, "step": 11041 }, { "epoch": 0.28, "grad_norm": 1.9834072589874268, "learning_rate": 1.6826349113202222e-05, "loss": 0.6124, "step": 11042 }, { "epoch": 0.28, "grad_norm": 2.4367475509643555, "learning_rate": 1.682574245482301e-05, "loss": 0.5143, "step": 11043 }, { "epoch": 0.28, "grad_norm": 3.383195400238037, "learning_rate": 1.6825135749405066e-05, "loss": 0.7401, "step": 11044 }, { "epoch": 0.28, "grad_norm": 2.6650421619415283, "learning_rate": 1.6824528996952573e-05, "loss": 0.4671, "step": 11045 }, { "epoch": 0.28, "grad_norm": 1.5689092874526978, "learning_rate": 1.6823922197469716e-05, "loss": 0.5813, "step": 11046 }, { "epoch": 0.28, "grad_norm": 2.104921579360962, "learning_rate": 1.6823315350960675e-05, "loss": 0.6218, "step": 11047 }, { "epoch": 0.28, "grad_norm": 2.365699529647827, "learning_rate": 1.682270845742963e-05, "loss": 0.4885, "step": 11048 }, { "epoch": 0.28, "grad_norm": 1.3143874406814575, "learning_rate": 1.6822101516880764e-05, "loss": 0.5783, "step": 11049 }, { "epoch": 0.28, "grad_norm": 4.1621575355529785, "learning_rate": 1.6821494529318263e-05, "loss": 0.6404, "step": 11050 }, { "epoch": 0.28, "grad_norm": 1.3691766262054443, "learning_rate": 1.6820887494746305e-05, "loss": 0.5103, "step": 11051 }, { "epoch": 0.28, "grad_norm": 2.042384624481201, "learning_rate": 1.6820280413169076e-05, "loss": 0.6213, "step": 11052 }, { "epoch": 0.28, "grad_norm": 9.338765144348145, "learning_rate": 1.681967328459076e-05, "loss": 0.7721, "step": 11053 }, { "epoch": 0.28, "grad_norm": 3.216869354248047, "learning_rate": 1.681906610901554e-05, "loss": 0.6948, "step": 11054 }, { "epoch": 0.28, "grad_norm": 1.4202603101730347, "learning_rate": 1.6818458886447603e-05, "loss": 0.5919, "step": 11055 }, { "epoch": 0.28, "grad_norm": 1.743761420249939, "learning_rate": 1.6817851616891127e-05, "loss": 0.5622, "step": 11056 }, { "epoch": 0.28, "grad_norm": 1.9474178552627563, "learning_rate": 1.6817244300350303e-05, "loss": 0.6358, "step": 11057 }, { "epoch": 0.28, "grad_norm": 2.1571333408355713, "learning_rate": 1.6816636936829315e-05, "loss": 0.5647, "step": 11058 }, { "epoch": 0.28, "grad_norm": 5.7905707359313965, "learning_rate": 1.681602952633235e-05, "loss": 0.5875, "step": 11059 }, { "epoch": 0.28, "grad_norm": 3.6667189598083496, "learning_rate": 1.6815422068863587e-05, "loss": 0.5437, "step": 11060 }, { "epoch": 0.28, "grad_norm": 1.3876442909240723, "learning_rate": 1.6814814564427217e-05, "loss": 0.679, "step": 11061 }, { "epoch": 0.28, "grad_norm": 2.7105345726013184, "learning_rate": 1.681420701302743e-05, "loss": 0.892, "step": 11062 }, { "epoch": 0.28, "grad_norm": 1.4523544311523438, "learning_rate": 1.6813599414668406e-05, "loss": 0.7379, "step": 11063 }, { "epoch": 0.28, "grad_norm": 2.2784926891326904, "learning_rate": 1.681299176935434e-05, "loss": 0.5867, "step": 11064 }, { "epoch": 0.28, "grad_norm": 2.709933280944824, "learning_rate": 1.681238407708941e-05, "loss": 0.6822, "step": 11065 }, { "epoch": 0.28, "grad_norm": 3.3376693725585938, "learning_rate": 1.6811776337877812e-05, "loss": 0.4899, "step": 11066 }, { "epoch": 0.28, "grad_norm": 1.7528927326202393, "learning_rate": 1.6811168551723728e-05, "loss": 0.709, "step": 11067 }, { "epoch": 0.28, "grad_norm": 4.023425579071045, "learning_rate": 1.6810560718631353e-05, "loss": 0.5078, "step": 11068 }, { "epoch": 0.28, "grad_norm": 1.1449335813522339, "learning_rate": 1.680995283860487e-05, "loss": 0.5892, "step": 11069 }, { "epoch": 0.28, "grad_norm": 1.7579665184020996, "learning_rate": 1.680934491164847e-05, "loss": 0.7523, "step": 11070 }, { "epoch": 0.28, "grad_norm": 3.0197956562042236, "learning_rate": 1.680873693776635e-05, "loss": 0.7201, "step": 11071 }, { "epoch": 0.28, "grad_norm": 2.3567488193511963, "learning_rate": 1.6808128916962685e-05, "loss": 0.6489, "step": 11072 }, { "epoch": 0.28, "grad_norm": 2.2568373680114746, "learning_rate": 1.6807520849241673e-05, "loss": 0.6254, "step": 11073 }, { "epoch": 0.28, "grad_norm": 4.73289680480957, "learning_rate": 1.680691273460751e-05, "loss": 0.6224, "step": 11074 }, { "epoch": 0.28, "grad_norm": 2.7016820907592773, "learning_rate": 1.6806304573064375e-05, "loss": 0.5855, "step": 11075 }, { "epoch": 0.28, "grad_norm": 1.4249217510223389, "learning_rate": 1.680569636461647e-05, "loss": 0.651, "step": 11076 }, { "epoch": 0.28, "grad_norm": 2.663259744644165, "learning_rate": 1.6805088109267975e-05, "loss": 0.5231, "step": 11077 }, { "epoch": 0.28, "grad_norm": 2.0789177417755127, "learning_rate": 1.6804479807023093e-05, "loss": 0.5415, "step": 11078 }, { "epoch": 0.28, "grad_norm": 2.301401376724243, "learning_rate": 1.680387145788601e-05, "loss": 0.64, "step": 11079 }, { "epoch": 0.28, "grad_norm": 2.050736904144287, "learning_rate": 1.6803263061860918e-05, "loss": 0.5919, "step": 11080 }, { "epoch": 0.28, "grad_norm": 1.5251331329345703, "learning_rate": 1.6802654618952015e-05, "loss": 0.6038, "step": 11081 }, { "epoch": 0.28, "grad_norm": 1.2004282474517822, "learning_rate": 1.6802046129163487e-05, "loss": 0.6152, "step": 11082 }, { "epoch": 0.28, "grad_norm": 4.221023082733154, "learning_rate": 1.6801437592499532e-05, "loss": 0.6879, "step": 11083 }, { "epoch": 0.28, "grad_norm": 1.454351544380188, "learning_rate": 1.6800829008964342e-05, "loss": 0.5624, "step": 11084 }, { "epoch": 0.28, "grad_norm": 1.7862268686294556, "learning_rate": 1.6800220378562108e-05, "loss": 0.6117, "step": 11085 }, { "epoch": 0.28, "grad_norm": 3.692277669906616, "learning_rate": 1.6799611701297033e-05, "loss": 0.6096, "step": 11086 }, { "epoch": 0.28, "grad_norm": 2.7336809635162354, "learning_rate": 1.6799002977173303e-05, "loss": 0.6992, "step": 11087 }, { "epoch": 0.28, "grad_norm": 1.155901551246643, "learning_rate": 1.6798394206195116e-05, "loss": 0.5211, "step": 11088 }, { "epoch": 0.28, "grad_norm": 7.582627773284912, "learning_rate": 1.679778538836667e-05, "loss": 0.789, "step": 11089 }, { "epoch": 0.28, "grad_norm": 2.1344592571258545, "learning_rate": 1.6797176523692153e-05, "loss": 0.5661, "step": 11090 }, { "epoch": 0.28, "grad_norm": 1.6332205533981323, "learning_rate": 1.6796567612175767e-05, "loss": 0.5642, "step": 11091 }, { "epoch": 0.28, "grad_norm": 2.6714327335357666, "learning_rate": 1.6795958653821708e-05, "loss": 0.6717, "step": 11092 }, { "epoch": 0.28, "grad_norm": 2.0948450565338135, "learning_rate": 1.679534964863417e-05, "loss": 0.6379, "step": 11093 }, { "epoch": 0.28, "grad_norm": 1.7776954174041748, "learning_rate": 1.6794740596617356e-05, "loss": 0.6532, "step": 11094 }, { "epoch": 0.28, "grad_norm": 1.9295908212661743, "learning_rate": 1.6794131497775456e-05, "loss": 0.5421, "step": 11095 }, { "epoch": 0.28, "grad_norm": 2.020644426345825, "learning_rate": 1.679352235211267e-05, "loss": 0.8571, "step": 11096 }, { "epoch": 0.28, "grad_norm": 2.18082857131958, "learning_rate": 1.6792913159633197e-05, "loss": 0.6902, "step": 11097 }, { "epoch": 0.28, "grad_norm": 2.3233189582824707, "learning_rate": 1.6792303920341237e-05, "loss": 0.6186, "step": 11098 }, { "epoch": 0.28, "grad_norm": 8.097027778625488, "learning_rate": 1.679169463424098e-05, "loss": 0.7142, "step": 11099 }, { "epoch": 0.28, "grad_norm": 0.9326288104057312, "learning_rate": 1.679108530133663e-05, "loss": 0.519, "step": 11100 }, { "epoch": 0.28, "grad_norm": 2.3867907524108887, "learning_rate": 1.6790475921632392e-05, "loss": 0.5452, "step": 11101 }, { "epoch": 0.28, "grad_norm": 1.3263816833496094, "learning_rate": 1.6789866495132456e-05, "loss": 0.5872, "step": 11102 }, { "epoch": 0.28, "grad_norm": 2.4027626514434814, "learning_rate": 1.678925702184103e-05, "loss": 0.683, "step": 11103 }, { "epoch": 0.28, "grad_norm": 2.8507487773895264, "learning_rate": 1.6788647501762307e-05, "loss": 0.6701, "step": 11104 }, { "epoch": 0.28, "grad_norm": 1.8998732566833496, "learning_rate": 1.6788037934900493e-05, "loss": 0.7286, "step": 11105 }, { "epoch": 0.28, "grad_norm": 1.5532668828964233, "learning_rate": 1.678742832125978e-05, "loss": 0.6898, "step": 11106 }, { "epoch": 0.28, "grad_norm": 2.0446290969848633, "learning_rate": 1.678681866084438e-05, "loss": 0.6342, "step": 11107 }, { "epoch": 0.28, "grad_norm": 1.3239754438400269, "learning_rate": 1.6786208953658492e-05, "loss": 0.5938, "step": 11108 }, { "epoch": 0.28, "grad_norm": 2.3735485076904297, "learning_rate": 1.678559919970631e-05, "loss": 0.689, "step": 11109 }, { "epoch": 0.28, "grad_norm": 5.420017719268799, "learning_rate": 1.6784989398992047e-05, "loss": 0.717, "step": 11110 }, { "epoch": 0.28, "grad_norm": 1.8452959060668945, "learning_rate": 1.6784379551519897e-05, "loss": 0.6021, "step": 11111 }, { "epoch": 0.28, "grad_norm": 2.3677868843078613, "learning_rate": 1.6783769657294066e-05, "loss": 0.6313, "step": 11112 }, { "epoch": 0.28, "grad_norm": 5.570645332336426, "learning_rate": 1.6783159716318755e-05, "loss": 0.6015, "step": 11113 }, { "epoch": 0.28, "grad_norm": 1.3409855365753174, "learning_rate": 1.6782549728598166e-05, "loss": 0.7041, "step": 11114 }, { "epoch": 0.28, "grad_norm": 2.1169519424438477, "learning_rate": 1.678193969413651e-05, "loss": 0.7069, "step": 11115 }, { "epoch": 0.28, "grad_norm": 1.3195717334747314, "learning_rate": 1.6781329612937988e-05, "loss": 0.5596, "step": 11116 }, { "epoch": 0.28, "grad_norm": 1.0869615077972412, "learning_rate": 1.67807194850068e-05, "loss": 0.5616, "step": 11117 }, { "epoch": 0.28, "grad_norm": 3.5237839221954346, "learning_rate": 1.6780109310347156e-05, "loss": 0.7364, "step": 11118 }, { "epoch": 0.28, "grad_norm": 3.279524564743042, "learning_rate": 1.6779499088963255e-05, "loss": 0.8075, "step": 11119 }, { "epoch": 0.29, "grad_norm": 1.187976598739624, "learning_rate": 1.6778888820859304e-05, "loss": 0.6095, "step": 11120 }, { "epoch": 0.29, "grad_norm": 2.503873586654663, "learning_rate": 1.6778278506039517e-05, "loss": 0.6517, "step": 11121 }, { "epoch": 0.29, "grad_norm": 5.760505199432373, "learning_rate": 1.677766814450809e-05, "loss": 0.6885, "step": 11122 }, { "epoch": 0.29, "grad_norm": 2.1194608211517334, "learning_rate": 1.677705773626923e-05, "loss": 0.6192, "step": 11123 }, { "epoch": 0.29, "grad_norm": 2.9668662548065186, "learning_rate": 1.6776447281327146e-05, "loss": 0.6911, "step": 11124 }, { "epoch": 0.29, "grad_norm": 10.569475173950195, "learning_rate": 1.6775836779686047e-05, "loss": 0.5292, "step": 11125 }, { "epoch": 0.29, "grad_norm": 3.3061184883117676, "learning_rate": 1.6775226231350135e-05, "loss": 0.6816, "step": 11126 }, { "epoch": 0.29, "grad_norm": 1.7545034885406494, "learning_rate": 1.6774615636323623e-05, "loss": 0.7681, "step": 11127 }, { "epoch": 0.29, "grad_norm": 1.3916881084442139, "learning_rate": 1.6774004994610715e-05, "loss": 0.4655, "step": 11128 }, { "epoch": 0.29, "grad_norm": 2.9890358448028564, "learning_rate": 1.677339430621562e-05, "loss": 0.6424, "step": 11129 }, { "epoch": 0.29, "grad_norm": 4.53801155090332, "learning_rate": 1.677278357114255e-05, "loss": 0.6088, "step": 11130 }, { "epoch": 0.29, "grad_norm": 1.640574336051941, "learning_rate": 1.6772172789395708e-05, "loss": 0.672, "step": 11131 }, { "epoch": 0.29, "grad_norm": 1.4008768796920776, "learning_rate": 1.6771561960979306e-05, "loss": 0.6555, "step": 11132 }, { "epoch": 0.29, "grad_norm": 2.2345757484436035, "learning_rate": 1.6770951085897552e-05, "loss": 0.7464, "step": 11133 }, { "epoch": 0.29, "grad_norm": 1.9782543182373047, "learning_rate": 1.677034016415466e-05, "loss": 0.6579, "step": 11134 }, { "epoch": 0.29, "grad_norm": 1.664148211479187, "learning_rate": 1.6769729195754836e-05, "loss": 0.5965, "step": 11135 }, { "epoch": 0.29, "grad_norm": 3.0573415756225586, "learning_rate": 1.676911818070229e-05, "loss": 0.5014, "step": 11136 }, { "epoch": 0.29, "grad_norm": 1.9934667348861694, "learning_rate": 1.6768507119001237e-05, "loss": 0.698, "step": 11137 }, { "epoch": 0.29, "grad_norm": 1.3180601596832275, "learning_rate": 1.6767896010655883e-05, "loss": 0.5982, "step": 11138 }, { "epoch": 0.29, "grad_norm": 2.8723225593566895, "learning_rate": 1.676728485567044e-05, "loss": 0.7704, "step": 11139 }, { "epoch": 0.29, "grad_norm": 2.410160541534424, "learning_rate": 1.6766673654049127e-05, "loss": 0.5834, "step": 11140 }, { "epoch": 0.29, "grad_norm": 1.1963545083999634, "learning_rate": 1.676606240579615e-05, "loss": 0.5767, "step": 11141 }, { "epoch": 0.29, "grad_norm": 2.1440846920013428, "learning_rate": 1.676545111091572e-05, "loss": 0.7163, "step": 11142 }, { "epoch": 0.29, "grad_norm": 2.0572891235351562, "learning_rate": 1.6764839769412047e-05, "loss": 0.7519, "step": 11143 }, { "epoch": 0.29, "grad_norm": 1.7243704795837402, "learning_rate": 1.6764228381289355e-05, "loss": 0.4601, "step": 11144 }, { "epoch": 0.29, "grad_norm": 1.9534801244735718, "learning_rate": 1.6763616946551845e-05, "loss": 0.695, "step": 11145 }, { "epoch": 0.29, "grad_norm": 2.158245086669922, "learning_rate": 1.6763005465203738e-05, "loss": 0.6573, "step": 11146 }, { "epoch": 0.29, "grad_norm": 1.8345459699630737, "learning_rate": 1.676239393724925e-05, "loss": 0.587, "step": 11147 }, { "epoch": 0.29, "grad_norm": 3.187530755996704, "learning_rate": 1.6761782362692586e-05, "loss": 0.6935, "step": 11148 }, { "epoch": 0.29, "grad_norm": 3.4782650470733643, "learning_rate": 1.676117074153797e-05, "loss": 0.6471, "step": 11149 }, { "epoch": 0.29, "grad_norm": 1.4308995008468628, "learning_rate": 1.6760559073789612e-05, "loss": 0.6114, "step": 11150 }, { "epoch": 0.29, "grad_norm": 2.8692986965179443, "learning_rate": 1.675994735945173e-05, "loss": 0.7222, "step": 11151 }, { "epoch": 0.29, "grad_norm": 1.8979381322860718, "learning_rate": 1.6759335598528537e-05, "loss": 0.538, "step": 11152 }, { "epoch": 0.29, "grad_norm": 2.3455605506896973, "learning_rate": 1.6758723791024248e-05, "loss": 0.6517, "step": 11153 }, { "epoch": 0.29, "grad_norm": 2.4121792316436768, "learning_rate": 1.675811193694308e-05, "loss": 0.5857, "step": 11154 }, { "epoch": 0.29, "grad_norm": 1.339431643486023, "learning_rate": 1.6757500036289253e-05, "loss": 0.5636, "step": 11155 }, { "epoch": 0.29, "grad_norm": 4.041143894195557, "learning_rate": 1.675688808906698e-05, "loss": 0.7088, "step": 11156 }, { "epoch": 0.29, "grad_norm": 3.5188381671905518, "learning_rate": 1.675627609528048e-05, "loss": 0.8174, "step": 11157 }, { "epoch": 0.29, "grad_norm": 2.096395254135132, "learning_rate": 1.675566405493397e-05, "loss": 0.5363, "step": 11158 }, { "epoch": 0.29, "grad_norm": 5.242836952209473, "learning_rate": 1.6755051968031666e-05, "loss": 0.6135, "step": 11159 }, { "epoch": 0.29, "grad_norm": 1.2868661880493164, "learning_rate": 1.675443983457779e-05, "loss": 0.6349, "step": 11160 }, { "epoch": 0.29, "grad_norm": 1.5822771787643433, "learning_rate": 1.6753827654576554e-05, "loss": 0.5792, "step": 11161 }, { "epoch": 0.29, "grad_norm": 5.079410552978516, "learning_rate": 1.6753215428032188e-05, "loss": 0.6312, "step": 11162 }, { "epoch": 0.29, "grad_norm": 1.302024245262146, "learning_rate": 1.67526031549489e-05, "loss": 0.602, "step": 11163 }, { "epoch": 0.29, "grad_norm": 2.8459956645965576, "learning_rate": 1.6751990835330914e-05, "loss": 0.5588, "step": 11164 }, { "epoch": 0.29, "grad_norm": 1.9708372354507446, "learning_rate": 1.675137846918245e-05, "loss": 0.509, "step": 11165 }, { "epoch": 0.29, "grad_norm": 4.599279403686523, "learning_rate": 1.6750766056507722e-05, "loss": 0.5048, "step": 11166 }, { "epoch": 0.29, "grad_norm": 1.4184216260910034, "learning_rate": 1.675015359731096e-05, "loss": 0.5927, "step": 11167 }, { "epoch": 0.29, "grad_norm": 1.7753552198410034, "learning_rate": 1.674954109159638e-05, "loss": 0.5984, "step": 11168 }, { "epoch": 0.29, "grad_norm": 3.2778310775756836, "learning_rate": 1.6748928539368204e-05, "loss": 0.6589, "step": 11169 }, { "epoch": 0.29, "grad_norm": 1.3990072011947632, "learning_rate": 1.674831594063065e-05, "loss": 0.5538, "step": 11170 }, { "epoch": 0.29, "grad_norm": 1.5845293998718262, "learning_rate": 1.6747703295387948e-05, "loss": 0.6227, "step": 11171 }, { "epoch": 0.29, "grad_norm": 1.3018608093261719, "learning_rate": 1.674709060364431e-05, "loss": 0.5144, "step": 11172 }, { "epoch": 0.29, "grad_norm": 2.379643201828003, "learning_rate": 1.674647786540396e-05, "loss": 0.5907, "step": 11173 }, { "epoch": 0.29, "grad_norm": 2.970851421356201, "learning_rate": 1.6745865080671126e-05, "loss": 0.6295, "step": 11174 }, { "epoch": 0.29, "grad_norm": 2.221458673477173, "learning_rate": 1.6745252249450026e-05, "loss": 0.5169, "step": 11175 }, { "epoch": 0.29, "grad_norm": 3.067138910293579, "learning_rate": 1.674463937174489e-05, "loss": 0.6387, "step": 11176 }, { "epoch": 0.29, "grad_norm": 2.966604232788086, "learning_rate": 1.6744026447559933e-05, "loss": 0.7237, "step": 11177 }, { "epoch": 0.29, "grad_norm": 4.073104381561279, "learning_rate": 1.6743413476899385e-05, "loss": 0.6862, "step": 11178 }, { "epoch": 0.29, "grad_norm": 1.2084555625915527, "learning_rate": 1.6742800459767466e-05, "loss": 0.5394, "step": 11179 }, { "epoch": 0.29, "grad_norm": 1.961933970451355, "learning_rate": 1.6742187396168407e-05, "loss": 0.6427, "step": 11180 }, { "epoch": 0.29, "grad_norm": 2.509282350540161, "learning_rate": 1.6741574286106423e-05, "loss": 0.5735, "step": 11181 }, { "epoch": 0.29, "grad_norm": 3.5302772521972656, "learning_rate": 1.6740961129585746e-05, "loss": 0.5319, "step": 11182 }, { "epoch": 0.29, "grad_norm": 1.4743295907974243, "learning_rate": 1.67403479266106e-05, "loss": 0.5298, "step": 11183 }, { "epoch": 0.29, "grad_norm": 6.276654243469238, "learning_rate": 1.673973467718521e-05, "loss": 0.6399, "step": 11184 }, { "epoch": 0.29, "grad_norm": 2.7607779502868652, "learning_rate": 1.6739121381313804e-05, "loss": 0.5462, "step": 11185 }, { "epoch": 0.29, "grad_norm": 1.5409988164901733, "learning_rate": 1.6738508039000606e-05, "loss": 0.6067, "step": 11186 }, { "epoch": 0.29, "grad_norm": 2.18050217628479, "learning_rate": 1.6737894650249848e-05, "loss": 0.4435, "step": 11187 }, { "epoch": 0.29, "grad_norm": 1.903620719909668, "learning_rate": 1.673728121506575e-05, "loss": 0.6782, "step": 11188 }, { "epoch": 0.29, "grad_norm": 1.544513463973999, "learning_rate": 1.673666773345254e-05, "loss": 0.627, "step": 11189 }, { "epoch": 0.29, "grad_norm": 1.1931182146072388, "learning_rate": 1.6736054205414453e-05, "loss": 0.5277, "step": 11190 }, { "epoch": 0.29, "grad_norm": 4.4770002365112305, "learning_rate": 1.673544063095571e-05, "loss": 0.6254, "step": 11191 }, { "epoch": 0.29, "grad_norm": 6.529090881347656, "learning_rate": 1.6734827010080544e-05, "loss": 0.5516, "step": 11192 }, { "epoch": 0.29, "grad_norm": 2.1045773029327393, "learning_rate": 1.673421334279318e-05, "loss": 0.5878, "step": 11193 }, { "epoch": 0.29, "grad_norm": 6.03900671005249, "learning_rate": 1.6733599629097847e-05, "loss": 0.7363, "step": 11194 }, { "epoch": 0.29, "grad_norm": 1.759706974029541, "learning_rate": 1.6732985868998778e-05, "loss": 0.5264, "step": 11195 }, { "epoch": 0.29, "grad_norm": 1.9951726198196411, "learning_rate": 1.6732372062500202e-05, "loss": 0.605, "step": 11196 }, { "epoch": 0.29, "grad_norm": 2.214421272277832, "learning_rate": 1.6731758209606343e-05, "loss": 0.6344, "step": 11197 }, { "epoch": 0.29, "grad_norm": 6.503337860107422, "learning_rate": 1.673114431032144e-05, "loss": 0.7934, "step": 11198 }, { "epoch": 0.29, "grad_norm": 3.8589982986450195, "learning_rate": 1.6730530364649714e-05, "loss": 0.5581, "step": 11199 }, { "epoch": 0.29, "grad_norm": 3.722320318222046, "learning_rate": 1.6729916372595405e-05, "loss": 0.7426, "step": 11200 }, { "epoch": 0.29, "grad_norm": 3.616715669631958, "learning_rate": 1.672930233416274e-05, "loss": 0.6042, "step": 11201 }, { "epoch": 0.29, "grad_norm": 2.0751020908355713, "learning_rate": 1.6728688249355954e-05, "loss": 0.5909, "step": 11202 }, { "epoch": 0.29, "grad_norm": 7.091705799102783, "learning_rate": 1.6728074118179272e-05, "loss": 0.7509, "step": 11203 }, { "epoch": 0.29, "grad_norm": 2.787886619567871, "learning_rate": 1.6727459940636932e-05, "loss": 0.6541, "step": 11204 }, { "epoch": 0.29, "grad_norm": 4.111745357513428, "learning_rate": 1.672684571673316e-05, "loss": 0.7511, "step": 11205 }, { "epoch": 0.29, "grad_norm": 2.654224157333374, "learning_rate": 1.6726231446472202e-05, "loss": 0.6229, "step": 11206 }, { "epoch": 0.29, "grad_norm": 2.4527153968811035, "learning_rate": 1.6725617129858277e-05, "loss": 0.671, "step": 11207 }, { "epoch": 0.29, "grad_norm": 2.731377363204956, "learning_rate": 1.6725002766895626e-05, "loss": 0.611, "step": 11208 }, { "epoch": 0.29, "grad_norm": 1.7827128171920776, "learning_rate": 1.672438835758848e-05, "loss": 0.6741, "step": 11209 }, { "epoch": 0.29, "grad_norm": 1.8232128620147705, "learning_rate": 1.6723773901941072e-05, "loss": 0.6505, "step": 11210 }, { "epoch": 0.29, "grad_norm": 4.132362365722656, "learning_rate": 1.6723159399957642e-05, "loss": 0.4679, "step": 11211 }, { "epoch": 0.29, "grad_norm": 3.2294325828552246, "learning_rate": 1.6722544851642424e-05, "loss": 0.4715, "step": 11212 }, { "epoch": 0.29, "grad_norm": 2.9333226680755615, "learning_rate": 1.6721930256999644e-05, "loss": 0.574, "step": 11213 }, { "epoch": 0.29, "grad_norm": 1.2376737594604492, "learning_rate": 1.672131561603355e-05, "loss": 0.6274, "step": 11214 }, { "epoch": 0.29, "grad_norm": 3.2496676445007324, "learning_rate": 1.6720700928748364e-05, "loss": 0.8442, "step": 11215 }, { "epoch": 0.29, "grad_norm": 2.4595699310302734, "learning_rate": 1.672008619514834e-05, "loss": 0.7947, "step": 11216 }, { "epoch": 0.29, "grad_norm": 2.6457509994506836, "learning_rate": 1.6719471415237694e-05, "loss": 0.5799, "step": 11217 }, { "epoch": 0.29, "grad_norm": 1.334443211555481, "learning_rate": 1.6718856589020677e-05, "loss": 0.6886, "step": 11218 }, { "epoch": 0.29, "grad_norm": 3.7142763137817383, "learning_rate": 1.671824171650152e-05, "loss": 0.6497, "step": 11219 }, { "epoch": 0.29, "grad_norm": 1.5007141828536987, "learning_rate": 1.6717626797684464e-05, "loss": 0.6355, "step": 11220 }, { "epoch": 0.29, "grad_norm": 3.721649169921875, "learning_rate": 1.6717011832573742e-05, "loss": 0.5425, "step": 11221 }, { "epoch": 0.29, "grad_norm": 1.884374737739563, "learning_rate": 1.6716396821173594e-05, "loss": 0.5998, "step": 11222 }, { "epoch": 0.29, "grad_norm": 1.7827092409133911, "learning_rate": 1.6715781763488263e-05, "loss": 0.4062, "step": 11223 }, { "epoch": 0.29, "grad_norm": 3.3371148109436035, "learning_rate": 1.671516665952198e-05, "loss": 0.6468, "step": 11224 }, { "epoch": 0.29, "grad_norm": 2.252776861190796, "learning_rate": 1.6714551509278987e-05, "loss": 0.5739, "step": 11225 }, { "epoch": 0.29, "grad_norm": 2.322197914123535, "learning_rate": 1.6713936312763525e-05, "loss": 0.7057, "step": 11226 }, { "epoch": 0.29, "grad_norm": 5.76985502243042, "learning_rate": 1.671332106997983e-05, "loss": 0.6403, "step": 11227 }, { "epoch": 0.29, "grad_norm": 1.7808518409729004, "learning_rate": 1.6712705780932147e-05, "loss": 0.5856, "step": 11228 }, { "epoch": 0.29, "grad_norm": 3.0046679973602295, "learning_rate": 1.6712090445624713e-05, "loss": 0.5649, "step": 11229 }, { "epoch": 0.29, "grad_norm": 1.880547046661377, "learning_rate": 1.6711475064061765e-05, "loss": 0.7019, "step": 11230 }, { "epoch": 0.29, "grad_norm": 1.8500808477401733, "learning_rate": 1.6710859636247552e-05, "loss": 0.5228, "step": 11231 }, { "epoch": 0.29, "grad_norm": 2.8295910358428955, "learning_rate": 1.671024416218631e-05, "loss": 0.5764, "step": 11232 }, { "epoch": 0.29, "grad_norm": 1.8268779516220093, "learning_rate": 1.6709628641882276e-05, "loss": 0.7485, "step": 11233 }, { "epoch": 0.29, "grad_norm": 2.933643102645874, "learning_rate": 1.6709013075339702e-05, "loss": 0.6711, "step": 11234 }, { "epoch": 0.29, "grad_norm": 1.3696483373641968, "learning_rate": 1.670839746256282e-05, "loss": 0.7018, "step": 11235 }, { "epoch": 0.29, "grad_norm": 2.1624534130096436, "learning_rate": 1.6707781803555882e-05, "loss": 0.6493, "step": 11236 }, { "epoch": 0.29, "grad_norm": 1.6847338676452637, "learning_rate": 1.6707166098323126e-05, "loss": 0.59, "step": 11237 }, { "epoch": 0.29, "grad_norm": 2.4154112339019775, "learning_rate": 1.6706550346868794e-05, "loss": 0.7058, "step": 11238 }, { "epoch": 0.29, "grad_norm": 3.4563393592834473, "learning_rate": 1.670593454919713e-05, "loss": 0.7469, "step": 11239 }, { "epoch": 0.29, "grad_norm": 2.207810401916504, "learning_rate": 1.6705318705312375e-05, "loss": 0.6607, "step": 11240 }, { "epoch": 0.29, "grad_norm": 2.553089141845703, "learning_rate": 1.6704702815218778e-05, "loss": 0.5245, "step": 11241 }, { "epoch": 0.29, "grad_norm": 5.4182209968566895, "learning_rate": 1.6704086878920583e-05, "loss": 0.7146, "step": 11242 }, { "epoch": 0.29, "grad_norm": 1.941465139389038, "learning_rate": 1.6703470896422037e-05, "loss": 0.5864, "step": 11243 }, { "epoch": 0.29, "grad_norm": 1.5951772928237915, "learning_rate": 1.6702854867727375e-05, "loss": 0.662, "step": 11244 }, { "epoch": 0.29, "grad_norm": 1.425964593887329, "learning_rate": 1.670223879284085e-05, "loss": 0.6128, "step": 11245 }, { "epoch": 0.29, "grad_norm": 1.363441824913025, "learning_rate": 1.6701622671766706e-05, "loss": 0.6382, "step": 11246 }, { "epoch": 0.29, "grad_norm": 1.279674768447876, "learning_rate": 1.670100650450919e-05, "loss": 0.4809, "step": 11247 }, { "epoch": 0.29, "grad_norm": 2.380164384841919, "learning_rate": 1.6700390291072545e-05, "loss": 0.5682, "step": 11248 }, { "epoch": 0.29, "grad_norm": 2.326573133468628, "learning_rate": 1.6699774031461024e-05, "loss": 0.6937, "step": 11249 }, { "epoch": 0.29, "grad_norm": 2.9116361141204834, "learning_rate": 1.669915772567887e-05, "loss": 0.7158, "step": 11250 }, { "epoch": 0.29, "grad_norm": 4.799156665802002, "learning_rate": 1.669854137373032e-05, "loss": 0.6754, "step": 11251 }, { "epoch": 0.29, "grad_norm": 2.0406949520111084, "learning_rate": 1.669792497561964e-05, "loss": 0.6104, "step": 11252 }, { "epoch": 0.29, "grad_norm": 3.629533290863037, "learning_rate": 1.6697308531351068e-05, "loss": 0.7543, "step": 11253 }, { "epoch": 0.29, "grad_norm": 1.4992804527282715, "learning_rate": 1.6696692040928853e-05, "loss": 0.6266, "step": 11254 }, { "epoch": 0.29, "grad_norm": 2.0679683685302734, "learning_rate": 1.669607550435724e-05, "loss": 0.7873, "step": 11255 }, { "epoch": 0.29, "grad_norm": 1.2004165649414062, "learning_rate": 1.6695458921640485e-05, "loss": 0.6228, "step": 11256 }, { "epoch": 0.29, "grad_norm": 1.4294981956481934, "learning_rate": 1.6694842292782833e-05, "loss": 0.5843, "step": 11257 }, { "epoch": 0.29, "grad_norm": 1.566979169845581, "learning_rate": 1.6694225617788534e-05, "loss": 0.5775, "step": 11258 }, { "epoch": 0.29, "grad_norm": 1.9206204414367676, "learning_rate": 1.669360889666184e-05, "loss": 0.5339, "step": 11259 }, { "epoch": 0.29, "grad_norm": 4.086844444274902, "learning_rate": 1.6692992129406996e-05, "loss": 0.7929, "step": 11260 }, { "epoch": 0.29, "grad_norm": 4.953901290893555, "learning_rate": 1.6692375316028255e-05, "loss": 0.6994, "step": 11261 }, { "epoch": 0.29, "grad_norm": 2.7191390991210938, "learning_rate": 1.669175845652987e-05, "loss": 0.5491, "step": 11262 }, { "epoch": 0.29, "grad_norm": 1.5022742748260498, "learning_rate": 1.669114155091609e-05, "loss": 0.492, "step": 11263 }, { "epoch": 0.29, "grad_norm": 1.5354238748550415, "learning_rate": 1.6690524599191162e-05, "loss": 0.5649, "step": 11264 }, { "epoch": 0.29, "grad_norm": 1.5575149059295654, "learning_rate": 1.6689907601359345e-05, "loss": 0.5908, "step": 11265 }, { "epoch": 0.29, "grad_norm": 2.7692854404449463, "learning_rate": 1.668929055742489e-05, "loss": 0.6651, "step": 11266 }, { "epoch": 0.29, "grad_norm": 4.323894023895264, "learning_rate": 1.6688673467392043e-05, "loss": 0.5729, "step": 11267 }, { "epoch": 0.29, "grad_norm": 1.899975299835205, "learning_rate": 1.6688056331265065e-05, "loss": 0.5703, "step": 11268 }, { "epoch": 0.29, "grad_norm": 4.013859748840332, "learning_rate": 1.66874391490482e-05, "loss": 0.7275, "step": 11269 }, { "epoch": 0.29, "grad_norm": 3.4255871772766113, "learning_rate": 1.6686821920745707e-05, "loss": 0.6471, "step": 11270 }, { "epoch": 0.29, "grad_norm": 3.519984722137451, "learning_rate": 1.668620464636184e-05, "loss": 0.5415, "step": 11271 }, { "epoch": 0.29, "grad_norm": 3.059934616088867, "learning_rate": 1.668558732590085e-05, "loss": 0.5617, "step": 11272 }, { "epoch": 0.29, "grad_norm": 2.1304047107696533, "learning_rate": 1.668496995936699e-05, "loss": 0.6029, "step": 11273 }, { "epoch": 0.29, "grad_norm": 3.6968743801116943, "learning_rate": 1.6684352546764522e-05, "loss": 0.4752, "step": 11274 }, { "epoch": 0.29, "grad_norm": 2.432567596435547, "learning_rate": 1.6683735088097694e-05, "loss": 0.6193, "step": 11275 }, { "epoch": 0.29, "grad_norm": 2.9816811084747314, "learning_rate": 1.6683117583370764e-05, "loss": 0.5044, "step": 11276 }, { "epoch": 0.29, "grad_norm": 1.2995262145996094, "learning_rate": 1.668250003258798e-05, "loss": 0.4362, "step": 11277 }, { "epoch": 0.29, "grad_norm": 1.5082118511199951, "learning_rate": 1.668188243575361e-05, "loss": 0.7086, "step": 11278 }, { "epoch": 0.29, "grad_norm": 1.5187318325042725, "learning_rate": 1.6681264792871906e-05, "loss": 0.636, "step": 11279 }, { "epoch": 0.29, "grad_norm": 2.5922691822052, "learning_rate": 1.668064710394712e-05, "loss": 0.5996, "step": 11280 }, { "epoch": 0.29, "grad_norm": 4.176223278045654, "learning_rate": 1.668002936898351e-05, "loss": 0.5814, "step": 11281 }, { "epoch": 0.29, "grad_norm": 4.267077445983887, "learning_rate": 1.6679411587985337e-05, "loss": 0.5686, "step": 11282 }, { "epoch": 0.29, "grad_norm": 4.1384596824646, "learning_rate": 1.6678793760956854e-05, "loss": 0.6541, "step": 11283 }, { "epoch": 0.29, "grad_norm": 2.8023648262023926, "learning_rate": 1.667817588790232e-05, "loss": 0.6475, "step": 11284 }, { "epoch": 0.29, "grad_norm": 1.6676743030548096, "learning_rate": 1.6677557968825995e-05, "loss": 0.6038, "step": 11285 }, { "epoch": 0.29, "grad_norm": 1.5975295305252075, "learning_rate": 1.6676940003732135e-05, "loss": 0.495, "step": 11286 }, { "epoch": 0.29, "grad_norm": 4.059474945068359, "learning_rate": 1.6676321992625e-05, "loss": 0.6921, "step": 11287 }, { "epoch": 0.29, "grad_norm": 1.9099615812301636, "learning_rate": 1.6675703935508847e-05, "loss": 0.4856, "step": 11288 }, { "epoch": 0.29, "grad_norm": 1.3613076210021973, "learning_rate": 1.6675085832387935e-05, "loss": 0.6033, "step": 11289 }, { "epoch": 0.29, "grad_norm": 3.619499683380127, "learning_rate": 1.667446768326653e-05, "loss": 0.6536, "step": 11290 }, { "epoch": 0.29, "grad_norm": 2.084435224533081, "learning_rate": 1.6673849488148882e-05, "loss": 0.7545, "step": 11291 }, { "epoch": 0.29, "grad_norm": 3.39788556098938, "learning_rate": 1.667323124703926e-05, "loss": 0.6295, "step": 11292 }, { "epoch": 0.29, "grad_norm": 1.677337408065796, "learning_rate": 1.667261295994192e-05, "loss": 0.7158, "step": 11293 }, { "epoch": 0.29, "grad_norm": 1.61282217502594, "learning_rate": 1.6671994626861125e-05, "loss": 0.5543, "step": 11294 }, { "epoch": 0.29, "grad_norm": 1.7648720741271973, "learning_rate": 1.667137624780113e-05, "loss": 0.4628, "step": 11295 }, { "epoch": 0.29, "grad_norm": 3.2770063877105713, "learning_rate": 1.6670757822766208e-05, "loss": 0.7145, "step": 11296 }, { "epoch": 0.29, "grad_norm": 2.1388540267944336, "learning_rate": 1.6670139351760608e-05, "loss": 0.6884, "step": 11297 }, { "epoch": 0.29, "grad_norm": 1.9741408824920654, "learning_rate": 1.6669520834788603e-05, "loss": 0.5186, "step": 11298 }, { "epoch": 0.29, "grad_norm": 3.7775652408599854, "learning_rate": 1.6668902271854443e-05, "loss": 0.683, "step": 11299 }, { "epoch": 0.29, "grad_norm": 5.8257575035095215, "learning_rate": 1.666828366296241e-05, "loss": 0.6745, "step": 11300 }, { "epoch": 0.29, "grad_norm": 4.179060935974121, "learning_rate": 1.6667665008116745e-05, "loss": 0.6314, "step": 11301 }, { "epoch": 0.29, "grad_norm": 1.9750841856002808, "learning_rate": 1.6667046307321724e-05, "loss": 0.5819, "step": 11302 }, { "epoch": 0.29, "grad_norm": 1.8566020727157593, "learning_rate": 1.666642756058161e-05, "loss": 0.7261, "step": 11303 }, { "epoch": 0.29, "grad_norm": 2.3231887817382812, "learning_rate": 1.6665808767900667e-05, "loss": 0.6786, "step": 11304 }, { "epoch": 0.29, "grad_norm": 3.2751529216766357, "learning_rate": 1.6665189929283158e-05, "loss": 0.6696, "step": 11305 }, { "epoch": 0.29, "grad_norm": 2.525628089904785, "learning_rate": 1.6664571044733345e-05, "loss": 0.5788, "step": 11306 }, { "epoch": 0.29, "grad_norm": 1.931883454322815, "learning_rate": 1.66639521142555e-05, "loss": 0.541, "step": 11307 }, { "epoch": 0.29, "grad_norm": 2.564189910888672, "learning_rate": 1.6663333137853877e-05, "loss": 0.6307, "step": 11308 }, { "epoch": 0.29, "grad_norm": 1.8484283685684204, "learning_rate": 1.6662714115532753e-05, "loss": 0.6593, "step": 11309 }, { "epoch": 0.29, "grad_norm": 1.1476327180862427, "learning_rate": 1.6662095047296386e-05, "loss": 0.7091, "step": 11310 }, { "epoch": 0.29, "grad_norm": 2.1728808879852295, "learning_rate": 1.6661475933149047e-05, "loss": 0.7665, "step": 11311 }, { "epoch": 0.29, "grad_norm": 1.1446253061294556, "learning_rate": 1.6660856773095e-05, "loss": 0.5888, "step": 11312 }, { "epoch": 0.29, "grad_norm": 3.5395584106445312, "learning_rate": 1.6660237567138514e-05, "loss": 0.6226, "step": 11313 }, { "epoch": 0.29, "grad_norm": 1.8421565294265747, "learning_rate": 1.6659618315283855e-05, "loss": 0.6309, "step": 11314 }, { "epoch": 0.29, "grad_norm": 3.6069600582122803, "learning_rate": 1.665899901753529e-05, "loss": 0.7554, "step": 11315 }, { "epoch": 0.29, "grad_norm": 1.7742087841033936, "learning_rate": 1.665837967389709e-05, "loss": 0.6237, "step": 11316 }, { "epoch": 0.29, "grad_norm": 1.8666282892227173, "learning_rate": 1.665776028437352e-05, "loss": 0.6834, "step": 11317 }, { "epoch": 0.29, "grad_norm": 2.4912519454956055, "learning_rate": 1.6657140848968847e-05, "loss": 0.5327, "step": 11318 }, { "epoch": 0.29, "grad_norm": 2.394655704498291, "learning_rate": 1.665652136768734e-05, "loss": 0.6495, "step": 11319 }, { "epoch": 0.29, "grad_norm": 2.5976696014404297, "learning_rate": 1.6655901840533274e-05, "loss": 0.6412, "step": 11320 }, { "epoch": 0.29, "grad_norm": 3.549325942993164, "learning_rate": 1.665528226751091e-05, "loss": 0.5631, "step": 11321 }, { "epoch": 0.29, "grad_norm": 5.1848673820495605, "learning_rate": 1.6654662648624523e-05, "loss": 0.681, "step": 11322 }, { "epoch": 0.29, "grad_norm": 1.697845220565796, "learning_rate": 1.6654042983878386e-05, "loss": 0.5562, "step": 11323 }, { "epoch": 0.29, "grad_norm": 1.6113395690917969, "learning_rate": 1.665342327327676e-05, "loss": 0.4655, "step": 11324 }, { "epoch": 0.29, "grad_norm": 3.5676403045654297, "learning_rate": 1.6652803516823925e-05, "loss": 0.5337, "step": 11325 }, { "epoch": 0.29, "grad_norm": 2.348439931869507, "learning_rate": 1.6652183714524143e-05, "loss": 0.7162, "step": 11326 }, { "epoch": 0.29, "grad_norm": 1.3412151336669922, "learning_rate": 1.6651563866381694e-05, "loss": 0.5745, "step": 11327 }, { "epoch": 0.29, "grad_norm": 1.6057168245315552, "learning_rate": 1.6650943972400845e-05, "loss": 0.7135, "step": 11328 }, { "epoch": 0.29, "grad_norm": 2.3069908618927, "learning_rate": 1.6650324032585867e-05, "loss": 0.6446, "step": 11329 }, { "epoch": 0.29, "grad_norm": 3.141021490097046, "learning_rate": 1.6649704046941036e-05, "loss": 0.5652, "step": 11330 }, { "epoch": 0.29, "grad_norm": 1.8230901956558228, "learning_rate": 1.6649084015470622e-05, "loss": 0.6411, "step": 11331 }, { "epoch": 0.29, "grad_norm": 2.4380059242248535, "learning_rate": 1.6648463938178897e-05, "loss": 0.5697, "step": 11332 }, { "epoch": 0.29, "grad_norm": 0.9441443085670471, "learning_rate": 1.6647843815070136e-05, "loss": 0.626, "step": 11333 }, { "epoch": 0.29, "grad_norm": 1.6792316436767578, "learning_rate": 1.6647223646148613e-05, "loss": 0.5808, "step": 11334 }, { "epoch": 0.29, "grad_norm": 4.516804218292236, "learning_rate": 1.6646603431418602e-05, "loss": 0.7389, "step": 11335 }, { "epoch": 0.29, "grad_norm": 1.3581585884094238, "learning_rate": 1.6645983170884374e-05, "loss": 0.5113, "step": 11336 }, { "epoch": 0.29, "grad_norm": 2.441450834274292, "learning_rate": 1.6645362864550206e-05, "loss": 0.6343, "step": 11337 }, { "epoch": 0.29, "grad_norm": 3.672649621963501, "learning_rate": 1.6644742512420372e-05, "loss": 0.5697, "step": 11338 }, { "epoch": 0.29, "grad_norm": 3.038767099380493, "learning_rate": 1.6644122114499145e-05, "loss": 0.7196, "step": 11339 }, { "epoch": 0.29, "grad_norm": 3.2480814456939697, "learning_rate": 1.6643501670790806e-05, "loss": 0.6349, "step": 11340 }, { "epoch": 0.29, "grad_norm": 3.5752251148223877, "learning_rate": 1.6642881181299627e-05, "loss": 0.6975, "step": 11341 }, { "epoch": 0.29, "grad_norm": 2.3835415840148926, "learning_rate": 1.6642260646029883e-05, "loss": 0.6017, "step": 11342 }, { "epoch": 0.29, "grad_norm": 4.6506123542785645, "learning_rate": 1.6641640064985855e-05, "loss": 0.7628, "step": 11343 }, { "epoch": 0.29, "grad_norm": 2.2584972381591797, "learning_rate": 1.664101943817181e-05, "loss": 0.6003, "step": 11344 }, { "epoch": 0.29, "grad_norm": 4.470996379852295, "learning_rate": 1.6640398765592036e-05, "loss": 0.586, "step": 11345 }, { "epoch": 0.29, "grad_norm": 1.3634194135665894, "learning_rate": 1.6639778047250804e-05, "loss": 0.5389, "step": 11346 }, { "epoch": 0.29, "grad_norm": 3.3834967613220215, "learning_rate": 1.6639157283152395e-05, "loss": 0.6539, "step": 11347 }, { "epoch": 0.29, "grad_norm": 1.7807339429855347, "learning_rate": 1.6638536473301084e-05, "loss": 0.7675, "step": 11348 }, { "epoch": 0.29, "grad_norm": 3.304539442062378, "learning_rate": 1.663791561770115e-05, "loss": 0.6762, "step": 11349 }, { "epoch": 0.29, "grad_norm": 2.5214405059814453, "learning_rate": 1.6637294716356873e-05, "loss": 0.6092, "step": 11350 }, { "epoch": 0.29, "grad_norm": 1.551829218864441, "learning_rate": 1.663667376927253e-05, "loss": 0.6797, "step": 11351 }, { "epoch": 0.29, "grad_norm": 1.901551604270935, "learning_rate": 1.66360527764524e-05, "loss": 0.5554, "step": 11352 }, { "epoch": 0.29, "grad_norm": 4.092691898345947, "learning_rate": 1.6635431737900763e-05, "loss": 0.558, "step": 11353 }, { "epoch": 0.29, "grad_norm": 1.4053840637207031, "learning_rate": 1.66348106536219e-05, "loss": 0.6912, "step": 11354 }, { "epoch": 0.29, "grad_norm": 3.644650936126709, "learning_rate": 1.6634189523620095e-05, "loss": 0.7994, "step": 11355 }, { "epoch": 0.29, "grad_norm": 1.1222891807556152, "learning_rate": 1.663356834789962e-05, "loss": 0.6761, "step": 11356 }, { "epoch": 0.29, "grad_norm": 1.5699535608291626, "learning_rate": 1.6632947126464756e-05, "loss": 0.5438, "step": 11357 }, { "epoch": 0.29, "grad_norm": 1.707487940788269, "learning_rate": 1.6632325859319792e-05, "loss": 0.583, "step": 11358 }, { "epoch": 0.29, "grad_norm": 4.85951566696167, "learning_rate": 1.6631704546469004e-05, "loss": 0.7158, "step": 11359 }, { "epoch": 0.29, "grad_norm": 2.8640451431274414, "learning_rate": 1.6631083187916676e-05, "loss": 0.5384, "step": 11360 }, { "epoch": 0.29, "grad_norm": 6.034692287445068, "learning_rate": 1.6630461783667086e-05, "loss": 0.7889, "step": 11361 }, { "epoch": 0.29, "grad_norm": 1.1070852279663086, "learning_rate": 1.662984033372452e-05, "loss": 0.5612, "step": 11362 }, { "epoch": 0.29, "grad_norm": 3.172151803970337, "learning_rate": 1.6629218838093262e-05, "loss": 0.97, "step": 11363 }, { "epoch": 0.29, "grad_norm": 1.7104905843734741, "learning_rate": 1.662859729677759e-05, "loss": 0.6357, "step": 11364 }, { "epoch": 0.29, "grad_norm": 2.4674463272094727, "learning_rate": 1.662797570978179e-05, "loss": 0.6769, "step": 11365 }, { "epoch": 0.29, "grad_norm": 2.242136240005493, "learning_rate": 1.6627354077110146e-05, "loss": 0.6309, "step": 11366 }, { "epoch": 0.29, "grad_norm": 2.1341981887817383, "learning_rate": 1.6626732398766942e-05, "loss": 0.8501, "step": 11367 }, { "epoch": 0.29, "grad_norm": 2.5297718048095703, "learning_rate": 1.6626110674756462e-05, "loss": 0.5574, "step": 11368 }, { "epoch": 0.29, "grad_norm": 3.4736273288726807, "learning_rate": 1.662548890508299e-05, "loss": 0.7501, "step": 11369 }, { "epoch": 0.29, "grad_norm": 1.6935949325561523, "learning_rate": 1.662486708975081e-05, "loss": 0.6237, "step": 11370 }, { "epoch": 0.29, "grad_norm": 2.3879876136779785, "learning_rate": 1.6624245228764208e-05, "loss": 0.5845, "step": 11371 }, { "epoch": 0.29, "grad_norm": 2.427591323852539, "learning_rate": 1.662362332212747e-05, "loss": 0.8956, "step": 11372 }, { "epoch": 0.29, "grad_norm": 2.185727596282959, "learning_rate": 1.662300136984488e-05, "loss": 0.5927, "step": 11373 }, { "epoch": 0.29, "grad_norm": 1.9175472259521484, "learning_rate": 1.662237937192073e-05, "loss": 0.627, "step": 11374 }, { "epoch": 0.29, "grad_norm": 2.1981120109558105, "learning_rate": 1.6621757328359298e-05, "loss": 0.7953, "step": 11375 }, { "epoch": 0.29, "grad_norm": 1.9342012405395508, "learning_rate": 1.6621135239164877e-05, "loss": 0.6454, "step": 11376 }, { "epoch": 0.29, "grad_norm": 2.3299758434295654, "learning_rate": 1.662051310434175e-05, "loss": 0.8873, "step": 11377 }, { "epoch": 0.29, "grad_norm": 1.4362432956695557, "learning_rate": 1.6619890923894207e-05, "loss": 0.6148, "step": 11378 }, { "epoch": 0.29, "grad_norm": 1.4010624885559082, "learning_rate": 1.6619268697826533e-05, "loss": 0.6643, "step": 11379 }, { "epoch": 0.29, "grad_norm": 5.473193168640137, "learning_rate": 1.661864642614302e-05, "loss": 0.6976, "step": 11380 }, { "epoch": 0.29, "grad_norm": 1.2287423610687256, "learning_rate": 1.6618024108847953e-05, "loss": 0.554, "step": 11381 }, { "epoch": 0.29, "grad_norm": 1.5488369464874268, "learning_rate": 1.6617401745945624e-05, "loss": 0.5721, "step": 11382 }, { "epoch": 0.29, "grad_norm": 3.228294849395752, "learning_rate": 1.6616779337440317e-05, "loss": 0.6379, "step": 11383 }, { "epoch": 0.29, "grad_norm": 2.416879415512085, "learning_rate": 1.6616156883336324e-05, "loss": 0.621, "step": 11384 }, { "epoch": 0.29, "grad_norm": 1.574447751045227, "learning_rate": 1.6615534383637936e-05, "loss": 0.7238, "step": 11385 }, { "epoch": 0.29, "grad_norm": 2.7957229614257812, "learning_rate": 1.661491183834944e-05, "loss": 0.4721, "step": 11386 }, { "epoch": 0.29, "grad_norm": 1.89987313747406, "learning_rate": 1.6614289247475127e-05, "loss": 0.6341, "step": 11387 }, { "epoch": 0.29, "grad_norm": 2.433258533477783, "learning_rate": 1.661366661101929e-05, "loss": 0.5099, "step": 11388 }, { "epoch": 0.29, "grad_norm": 1.7736036777496338, "learning_rate": 1.6613043928986217e-05, "loss": 0.7195, "step": 11389 }, { "epoch": 0.29, "grad_norm": 4.286211967468262, "learning_rate": 1.6612421201380197e-05, "loss": 0.6661, "step": 11390 }, { "epoch": 0.29, "grad_norm": 7.015773773193359, "learning_rate": 1.6611798428205526e-05, "loss": 0.6485, "step": 11391 }, { "epoch": 0.29, "grad_norm": 2.223832607269287, "learning_rate": 1.6611175609466497e-05, "loss": 0.55, "step": 11392 }, { "epoch": 0.29, "grad_norm": 2.3516340255737305, "learning_rate": 1.6610552745167398e-05, "loss": 0.6853, "step": 11393 }, { "epoch": 0.29, "grad_norm": 2.4791007041931152, "learning_rate": 1.6609929835312523e-05, "loss": 0.6326, "step": 11394 }, { "epoch": 0.29, "grad_norm": 1.732067584991455, "learning_rate": 1.660930687990616e-05, "loss": 0.6621, "step": 11395 }, { "epoch": 0.29, "grad_norm": 2.5774476528167725, "learning_rate": 1.660868387895261e-05, "loss": 0.6313, "step": 11396 }, { "epoch": 0.29, "grad_norm": 1.4944181442260742, "learning_rate": 1.6608060832456162e-05, "loss": 0.5067, "step": 11397 }, { "epoch": 0.29, "grad_norm": 1.1736198663711548, "learning_rate": 1.6607437740421112e-05, "loss": 0.5262, "step": 11398 }, { "epoch": 0.29, "grad_norm": 2.2463266849517822, "learning_rate": 1.6606814602851753e-05, "loss": 0.5536, "step": 11399 }, { "epoch": 0.29, "grad_norm": 2.075936794281006, "learning_rate": 1.6606191419752377e-05, "loss": 0.7397, "step": 11400 }, { "epoch": 0.29, "grad_norm": 2.3771209716796875, "learning_rate": 1.6605568191127277e-05, "loss": 0.6303, "step": 11401 }, { "epoch": 0.29, "grad_norm": 2.4381425380706787, "learning_rate": 1.6604944916980753e-05, "loss": 0.6212, "step": 11402 }, { "epoch": 0.29, "grad_norm": 2.862445116043091, "learning_rate": 1.6604321597317098e-05, "loss": 0.6619, "step": 11403 }, { "epoch": 0.29, "grad_norm": 1.668596863746643, "learning_rate": 1.6603698232140614e-05, "loss": 0.7128, "step": 11404 }, { "epoch": 0.29, "grad_norm": 1.1258230209350586, "learning_rate": 1.6603074821455584e-05, "loss": 0.6426, "step": 11405 }, { "epoch": 0.29, "grad_norm": 3.615846872329712, "learning_rate": 1.660245136526631e-05, "loss": 0.7534, "step": 11406 }, { "epoch": 0.29, "grad_norm": 3.93790602684021, "learning_rate": 1.6601827863577095e-05, "loss": 0.5782, "step": 11407 }, { "epoch": 0.29, "grad_norm": 1.5137523412704468, "learning_rate": 1.6601204316392224e-05, "loss": 0.5694, "step": 11408 }, { "epoch": 0.29, "grad_norm": 2.885103225708008, "learning_rate": 1.6600580723716002e-05, "loss": 0.717, "step": 11409 }, { "epoch": 0.29, "grad_norm": 2.347958564758301, "learning_rate": 1.6599957085552726e-05, "loss": 0.6383, "step": 11410 }, { "epoch": 0.29, "grad_norm": 2.590782880783081, "learning_rate": 1.6599333401906694e-05, "loss": 0.6394, "step": 11411 }, { "epoch": 0.29, "grad_norm": 1.3582593202590942, "learning_rate": 1.65987096727822e-05, "loss": 0.5453, "step": 11412 }, { "epoch": 0.29, "grad_norm": 4.3477983474731445, "learning_rate": 1.659808589818354e-05, "loss": 0.7186, "step": 11413 }, { "epoch": 0.29, "grad_norm": 2.6956875324249268, "learning_rate": 1.6597462078115026e-05, "loss": 0.7932, "step": 11414 }, { "epoch": 0.29, "grad_norm": 1.161657452583313, "learning_rate": 1.6596838212580944e-05, "loss": 0.5493, "step": 11415 }, { "epoch": 0.29, "grad_norm": 2.7069759368896484, "learning_rate": 1.6596214301585597e-05, "loss": 0.8209, "step": 11416 }, { "epoch": 0.29, "grad_norm": 2.005682945251465, "learning_rate": 1.6595590345133285e-05, "loss": 0.6904, "step": 11417 }, { "epoch": 0.29, "grad_norm": 1.0899840593338013, "learning_rate": 1.659496634322831e-05, "loss": 0.5924, "step": 11418 }, { "epoch": 0.29, "grad_norm": 2.453228235244751, "learning_rate": 1.6594342295874968e-05, "loss": 0.6444, "step": 11419 }, { "epoch": 0.29, "grad_norm": 2.6505355834960938, "learning_rate": 1.659371820307756e-05, "loss": 0.8403, "step": 11420 }, { "epoch": 0.29, "grad_norm": 1.816224455833435, "learning_rate": 1.6593094064840395e-05, "loss": 0.7204, "step": 11421 }, { "epoch": 0.29, "grad_norm": 1.5981076955795288, "learning_rate": 1.6592469881167764e-05, "loss": 0.6057, "step": 11422 }, { "epoch": 0.29, "grad_norm": 3.658647298812866, "learning_rate": 1.6591845652063977e-05, "loss": 0.8545, "step": 11423 }, { "epoch": 0.29, "grad_norm": 5.296632766723633, "learning_rate": 1.6591221377533324e-05, "loss": 0.6443, "step": 11424 }, { "epoch": 0.29, "grad_norm": 1.466497540473938, "learning_rate": 1.659059705758012e-05, "loss": 0.6587, "step": 11425 }, { "epoch": 0.29, "grad_norm": 2.819079875946045, "learning_rate": 1.658997269220866e-05, "loss": 0.6628, "step": 11426 }, { "epoch": 0.29, "grad_norm": 8.529704093933105, "learning_rate": 1.6589348281423246e-05, "loss": 0.6236, "step": 11427 }, { "epoch": 0.29, "grad_norm": 1.89150071144104, "learning_rate": 1.6588723825228187e-05, "loss": 0.582, "step": 11428 }, { "epoch": 0.29, "grad_norm": 1.627579927444458, "learning_rate": 1.658809932362778e-05, "loss": 0.5867, "step": 11429 }, { "epoch": 0.29, "grad_norm": 1.6986016035079956, "learning_rate": 1.658747477662633e-05, "loss": 0.6536, "step": 11430 }, { "epoch": 0.29, "grad_norm": 1.486151933670044, "learning_rate": 1.6586850184228147e-05, "loss": 0.5692, "step": 11431 }, { "epoch": 0.29, "grad_norm": 4.1885199546813965, "learning_rate": 1.6586225546437528e-05, "loss": 0.6195, "step": 11432 }, { "epoch": 0.29, "grad_norm": 1.9832748174667358, "learning_rate": 1.658560086325878e-05, "loss": 0.4654, "step": 11433 }, { "epoch": 0.29, "grad_norm": 3.228545665740967, "learning_rate": 1.658497613469621e-05, "loss": 0.6847, "step": 11434 }, { "epoch": 0.29, "grad_norm": 1.680097222328186, "learning_rate": 1.6584351360754123e-05, "loss": 0.7525, "step": 11435 }, { "epoch": 0.29, "grad_norm": 3.52180814743042, "learning_rate": 1.658372654143682e-05, "loss": 0.7444, "step": 11436 }, { "epoch": 0.29, "grad_norm": 1.624953031539917, "learning_rate": 1.658310167674861e-05, "loss": 0.6336, "step": 11437 }, { "epoch": 0.29, "grad_norm": 1.3795974254608154, "learning_rate": 1.6582476766693804e-05, "loss": 0.569, "step": 11438 }, { "epoch": 0.29, "grad_norm": 1.77335524559021, "learning_rate": 1.65818518112767e-05, "loss": 0.5325, "step": 11439 }, { "epoch": 0.29, "grad_norm": 1.7794520854949951, "learning_rate": 1.6581226810501606e-05, "loss": 0.623, "step": 11440 }, { "epoch": 0.29, "grad_norm": 1.7918611764907837, "learning_rate": 1.6580601764372836e-05, "loss": 0.5932, "step": 11441 }, { "epoch": 0.29, "grad_norm": 1.7102584838867188, "learning_rate": 1.657997667289469e-05, "loss": 0.6664, "step": 11442 }, { "epoch": 0.29, "grad_norm": 2.1595089435577393, "learning_rate": 1.657935153607148e-05, "loss": 0.6312, "step": 11443 }, { "epoch": 0.29, "grad_norm": 9.486306190490723, "learning_rate": 1.657872635390751e-05, "loss": 0.6552, "step": 11444 }, { "epoch": 0.29, "grad_norm": 2.043332099914551, "learning_rate": 1.6578101126407097e-05, "loss": 0.5569, "step": 11445 }, { "epoch": 0.29, "grad_norm": 5.100113868713379, "learning_rate": 1.657747585357454e-05, "loss": 0.7144, "step": 11446 }, { "epoch": 0.29, "grad_norm": 1.849700689315796, "learning_rate": 1.657685053541415e-05, "loss": 0.5676, "step": 11447 }, { "epoch": 0.29, "grad_norm": 2.9065027236938477, "learning_rate": 1.657622517193024e-05, "loss": 0.5795, "step": 11448 }, { "epoch": 0.29, "grad_norm": 1.897972583770752, "learning_rate": 1.657559976312712e-05, "loss": 0.4397, "step": 11449 }, { "epoch": 0.29, "grad_norm": 4.912369251251221, "learning_rate": 1.6574974309009097e-05, "loss": 0.6037, "step": 11450 }, { "epoch": 0.29, "grad_norm": 3.868896007537842, "learning_rate": 1.6574348809580477e-05, "loss": 0.5006, "step": 11451 }, { "epoch": 0.29, "grad_norm": 2.998396873474121, "learning_rate": 1.657372326484558e-05, "loss": 0.6268, "step": 11452 }, { "epoch": 0.29, "grad_norm": 1.2716865539550781, "learning_rate": 1.657309767480871e-05, "loss": 0.5056, "step": 11453 }, { "epoch": 0.29, "grad_norm": 1.5715707540512085, "learning_rate": 1.6572472039474183e-05, "loss": 0.6061, "step": 11454 }, { "epoch": 0.29, "grad_norm": 1.7968252897262573, "learning_rate": 1.6571846358846306e-05, "loss": 0.5767, "step": 11455 }, { "epoch": 0.29, "grad_norm": 1.8094593286514282, "learning_rate": 1.6571220632929394e-05, "loss": 0.5673, "step": 11456 }, { "epoch": 0.29, "grad_norm": 1.7955100536346436, "learning_rate": 1.6570594861727756e-05, "loss": 0.6618, "step": 11457 }, { "epoch": 0.29, "grad_norm": 1.960807204246521, "learning_rate": 1.656996904524571e-05, "loss": 0.6326, "step": 11458 }, { "epoch": 0.29, "grad_norm": 2.2917990684509277, "learning_rate": 1.6569343183487562e-05, "loss": 0.6058, "step": 11459 }, { "epoch": 0.29, "grad_norm": 2.188477039337158, "learning_rate": 1.6568717276457628e-05, "loss": 0.7452, "step": 11460 }, { "epoch": 0.29, "grad_norm": 2.477511405944824, "learning_rate": 1.6568091324160224e-05, "loss": 0.6455, "step": 11461 }, { "epoch": 0.29, "grad_norm": 2.933988332748413, "learning_rate": 1.6567465326599656e-05, "loss": 0.4747, "step": 11462 }, { "epoch": 0.29, "grad_norm": 6.197417736053467, "learning_rate": 1.6566839283780244e-05, "loss": 0.7622, "step": 11463 }, { "epoch": 0.29, "grad_norm": 1.146791696548462, "learning_rate": 1.6566213195706305e-05, "loss": 0.5127, "step": 11464 }, { "epoch": 0.29, "grad_norm": 1.972271203994751, "learning_rate": 1.6565587062382148e-05, "loss": 0.6986, "step": 11465 }, { "epoch": 0.29, "grad_norm": 3.597435235977173, "learning_rate": 1.6564960883812092e-05, "loss": 0.7318, "step": 11466 }, { "epoch": 0.29, "grad_norm": 3.3245770931243896, "learning_rate": 1.656433466000045e-05, "loss": 0.859, "step": 11467 }, { "epoch": 0.29, "grad_norm": 1.1532002687454224, "learning_rate": 1.6563708390951534e-05, "loss": 0.6521, "step": 11468 }, { "epoch": 0.29, "grad_norm": 4.842721939086914, "learning_rate": 1.6563082076669666e-05, "loss": 0.7302, "step": 11469 }, { "epoch": 0.29, "grad_norm": 3.4793283939361572, "learning_rate": 1.6562455717159157e-05, "loss": 0.6146, "step": 11470 }, { "epoch": 0.29, "grad_norm": 1.8019245862960815, "learning_rate": 1.656182931242433e-05, "loss": 0.6014, "step": 11471 }, { "epoch": 0.29, "grad_norm": 1.3782705068588257, "learning_rate": 1.6561202862469495e-05, "loss": 0.5522, "step": 11472 }, { "epoch": 0.29, "grad_norm": 5.357417583465576, "learning_rate": 1.656057636729897e-05, "loss": 0.5531, "step": 11473 }, { "epoch": 0.29, "grad_norm": 2.7797248363494873, "learning_rate": 1.655994982691708e-05, "loss": 0.673, "step": 11474 }, { "epoch": 0.29, "grad_norm": 2.8190414905548096, "learning_rate": 1.6559323241328135e-05, "loss": 0.5601, "step": 11475 }, { "epoch": 0.29, "grad_norm": 1.600687026977539, "learning_rate": 1.6558696610536453e-05, "loss": 0.593, "step": 11476 }, { "epoch": 0.29, "grad_norm": 1.0359101295471191, "learning_rate": 1.6558069934546357e-05, "loss": 0.5464, "step": 11477 }, { "epoch": 0.29, "grad_norm": 5.5800652503967285, "learning_rate": 1.6557443213362164e-05, "loss": 0.6498, "step": 11478 }, { "epoch": 0.29, "grad_norm": 2.2516446113586426, "learning_rate": 1.655681644698819e-05, "loss": 0.7206, "step": 11479 }, { "epoch": 0.29, "grad_norm": 18.93696403503418, "learning_rate": 1.6556189635428758e-05, "loss": 0.736, "step": 11480 }, { "epoch": 0.29, "grad_norm": 1.487596869468689, "learning_rate": 1.6555562778688187e-05, "loss": 0.5548, "step": 11481 }, { "epoch": 0.29, "grad_norm": 2.1332201957702637, "learning_rate": 1.6554935876770792e-05, "loss": 0.6913, "step": 11482 }, { "epoch": 0.29, "grad_norm": 2.27455735206604, "learning_rate": 1.65543089296809e-05, "loss": 0.5534, "step": 11483 }, { "epoch": 0.29, "grad_norm": 1.920148253440857, "learning_rate": 1.655368193742283e-05, "loss": 0.5235, "step": 11484 }, { "epoch": 0.29, "grad_norm": 5.735466957092285, "learning_rate": 1.65530549000009e-05, "loss": 0.4597, "step": 11485 }, { "epoch": 0.29, "grad_norm": 6.726390361785889, "learning_rate": 1.655242781741943e-05, "loss": 0.5934, "step": 11486 }, { "epoch": 0.29, "grad_norm": 1.789284348487854, "learning_rate": 1.6551800689682747e-05, "loss": 0.6131, "step": 11487 }, { "epoch": 0.29, "grad_norm": 3.2637810707092285, "learning_rate": 1.655117351679517e-05, "loss": 0.7314, "step": 11488 }, { "epoch": 0.29, "grad_norm": 3.497579574584961, "learning_rate": 1.655054629876102e-05, "loss": 0.6329, "step": 11489 }, { "epoch": 0.29, "grad_norm": 1.8875831365585327, "learning_rate": 1.6549919035584617e-05, "loss": 0.8019, "step": 11490 }, { "epoch": 0.29, "grad_norm": 3.3725433349609375, "learning_rate": 1.6549291727270294e-05, "loss": 0.6277, "step": 11491 }, { "epoch": 0.29, "grad_norm": 1.0312585830688477, "learning_rate": 1.6548664373822363e-05, "loss": 0.6099, "step": 11492 }, { "epoch": 0.29, "grad_norm": 2.5749244689941406, "learning_rate": 1.6548036975245153e-05, "loss": 0.6459, "step": 11493 }, { "epoch": 0.29, "grad_norm": 1.1819729804992676, "learning_rate": 1.6547409531542985e-05, "loss": 0.5841, "step": 11494 }, { "epoch": 0.29, "grad_norm": 5.819862365722656, "learning_rate": 1.654678204272018e-05, "loss": 0.6385, "step": 11495 }, { "epoch": 0.29, "grad_norm": 2.996002435684204, "learning_rate": 1.6546154508781075e-05, "loss": 0.5706, "step": 11496 }, { "epoch": 0.29, "grad_norm": 2.6448192596435547, "learning_rate": 1.654552692972998e-05, "loss": 0.6755, "step": 11497 }, { "epoch": 0.29, "grad_norm": 3.9393150806427, "learning_rate": 1.6544899305571227e-05, "loss": 0.7223, "step": 11498 }, { "epoch": 0.29, "grad_norm": 1.0689451694488525, "learning_rate": 1.654427163630914e-05, "loss": 0.4649, "step": 11499 }, { "epoch": 0.29, "grad_norm": 2.107792854309082, "learning_rate": 1.6543643921948044e-05, "loss": 0.6594, "step": 11500 }, { "epoch": 0.29, "grad_norm": 2.5039243698120117, "learning_rate": 1.6543016162492267e-05, "loss": 0.5585, "step": 11501 }, { "epoch": 0.29, "grad_norm": 1.1678773164749146, "learning_rate": 1.6542388357946132e-05, "loss": 0.5005, "step": 11502 }, { "epoch": 0.29, "grad_norm": 5.689382553100586, "learning_rate": 1.6541760508313967e-05, "loss": 0.6259, "step": 11503 }, { "epoch": 0.29, "grad_norm": 1.4486039876937866, "learning_rate": 1.6541132613600094e-05, "loss": 0.4289, "step": 11504 }, { "epoch": 0.29, "grad_norm": 1.4488062858581543, "learning_rate": 1.654050467380885e-05, "loss": 0.6649, "step": 11505 }, { "epoch": 0.29, "grad_norm": 3.4007248878479004, "learning_rate": 1.6539876688944556e-05, "loss": 0.8114, "step": 11506 }, { "epoch": 0.29, "grad_norm": 1.297590732574463, "learning_rate": 1.653924865901154e-05, "loss": 0.5106, "step": 11507 }, { "epoch": 0.29, "grad_norm": 2.504452705383301, "learning_rate": 1.6538620584014132e-05, "loss": 0.5747, "step": 11508 }, { "epoch": 0.29, "grad_norm": 2.4272067546844482, "learning_rate": 1.6537992463956657e-05, "loss": 0.631, "step": 11509 }, { "epoch": 0.3, "grad_norm": 3.05191969871521, "learning_rate": 1.6537364298843446e-05, "loss": 0.6232, "step": 11510 }, { "epoch": 0.3, "grad_norm": 1.723707914352417, "learning_rate": 1.653673608867883e-05, "loss": 0.7526, "step": 11511 }, { "epoch": 0.3, "grad_norm": 2.9027493000030518, "learning_rate": 1.653610783346713e-05, "loss": 0.6212, "step": 11512 }, { "epoch": 0.3, "grad_norm": 1.8721410036087036, "learning_rate": 1.6535479533212685e-05, "loss": 0.643, "step": 11513 }, { "epoch": 0.3, "grad_norm": 1.9501726627349854, "learning_rate": 1.6534851187919822e-05, "loss": 0.822, "step": 11514 }, { "epoch": 0.3, "grad_norm": 2.1599020957946777, "learning_rate": 1.653422279759287e-05, "loss": 0.5843, "step": 11515 }, { "epoch": 0.3, "grad_norm": 1.5882750749588013, "learning_rate": 1.6533594362236155e-05, "loss": 0.6213, "step": 11516 }, { "epoch": 0.3, "grad_norm": 1.4887559413909912, "learning_rate": 1.6532965881854014e-05, "loss": 0.7351, "step": 11517 }, { "epoch": 0.3, "grad_norm": 3.6606802940368652, "learning_rate": 1.6532337356450782e-05, "loss": 0.4619, "step": 11518 }, { "epoch": 0.3, "grad_norm": 3.223412036895752, "learning_rate": 1.653170878603078e-05, "loss": 0.5518, "step": 11519 }, { "epoch": 0.3, "grad_norm": 1.4932302236557007, "learning_rate": 1.6531080170598345e-05, "loss": 0.5587, "step": 11520 }, { "epoch": 0.3, "grad_norm": 2.312469005584717, "learning_rate": 1.6530451510157813e-05, "loss": 0.7857, "step": 11521 }, { "epoch": 0.3, "grad_norm": 5.547374725341797, "learning_rate": 1.652982280471351e-05, "loss": 0.6534, "step": 11522 }, { "epoch": 0.3, "grad_norm": 2.9262545108795166, "learning_rate": 1.6529194054269765e-05, "loss": 0.5495, "step": 11523 }, { "epoch": 0.3, "grad_norm": 2.3077540397644043, "learning_rate": 1.652856525883092e-05, "loss": 0.6281, "step": 11524 }, { "epoch": 0.3, "grad_norm": 1.8123520612716675, "learning_rate": 1.652793641840131e-05, "loss": 0.6264, "step": 11525 }, { "epoch": 0.3, "grad_norm": 1.7799150943756104, "learning_rate": 1.652730753298526e-05, "loss": 0.6481, "step": 11526 }, { "epoch": 0.3, "grad_norm": 3.8443477153778076, "learning_rate": 1.6526678602587105e-05, "loss": 0.6127, "step": 11527 }, { "epoch": 0.3, "grad_norm": 1.7213135957717896, "learning_rate": 1.6526049627211182e-05, "loss": 0.5439, "step": 11528 }, { "epoch": 0.3, "grad_norm": 6.076050758361816, "learning_rate": 1.6525420606861828e-05, "loss": 0.6848, "step": 11529 }, { "epoch": 0.3, "grad_norm": 1.8886080980300903, "learning_rate": 1.6524791541543373e-05, "loss": 0.6663, "step": 11530 }, { "epoch": 0.3, "grad_norm": 4.866276741027832, "learning_rate": 1.6524162431260153e-05, "loss": 0.7357, "step": 11531 }, { "epoch": 0.3, "grad_norm": 2.1251380443573, "learning_rate": 1.6523533276016506e-05, "loss": 0.6865, "step": 11532 }, { "epoch": 0.3, "grad_norm": 2.175652503967285, "learning_rate": 1.6522904075816765e-05, "loss": 0.6225, "step": 11533 }, { "epoch": 0.3, "grad_norm": 2.307422637939453, "learning_rate": 1.6522274830665267e-05, "loss": 0.6112, "step": 11534 }, { "epoch": 0.3, "grad_norm": 1.800922155380249, "learning_rate": 1.652164554056635e-05, "loss": 0.6279, "step": 11535 }, { "epoch": 0.3, "grad_norm": 2.7886881828308105, "learning_rate": 1.652101620552435e-05, "loss": 0.6142, "step": 11536 }, { "epoch": 0.3, "grad_norm": 7.834133625030518, "learning_rate": 1.6520386825543603e-05, "loss": 0.6504, "step": 11537 }, { "epoch": 0.3, "grad_norm": 1.7898648977279663, "learning_rate": 1.6519757400628444e-05, "loss": 0.7601, "step": 11538 }, { "epoch": 0.3, "grad_norm": 2.956199884414673, "learning_rate": 1.6519127930783218e-05, "loss": 0.9247, "step": 11539 }, { "epoch": 0.3, "grad_norm": 2.557957172393799, "learning_rate": 1.6518498416012252e-05, "loss": 0.7286, "step": 11540 }, { "epoch": 0.3, "grad_norm": 2.5707318782806396, "learning_rate": 1.6517868856319893e-05, "loss": 0.6673, "step": 11541 }, { "epoch": 0.3, "grad_norm": 1.6910091638565063, "learning_rate": 1.651723925171048e-05, "loss": 0.5085, "step": 11542 }, { "epoch": 0.3, "grad_norm": 4.594368934631348, "learning_rate": 1.651660960218834e-05, "loss": 0.642, "step": 11543 }, { "epoch": 0.3, "grad_norm": 2.02455997467041, "learning_rate": 1.6515979907757825e-05, "loss": 0.6401, "step": 11544 }, { "epoch": 0.3, "grad_norm": 2.7216668128967285, "learning_rate": 1.6515350168423273e-05, "loss": 0.5356, "step": 11545 }, { "epoch": 0.3, "grad_norm": 5.721945762634277, "learning_rate": 1.651472038418902e-05, "loss": 0.5569, "step": 11546 }, { "epoch": 0.3, "grad_norm": 1.737474799156189, "learning_rate": 1.6514090555059404e-05, "loss": 0.6546, "step": 11547 }, { "epoch": 0.3, "grad_norm": 1.2992651462554932, "learning_rate": 1.6513460681038773e-05, "loss": 0.5831, "step": 11548 }, { "epoch": 0.3, "grad_norm": 2.2708394527435303, "learning_rate": 1.651283076213146e-05, "loss": 0.6138, "step": 11549 }, { "epoch": 0.3, "grad_norm": 2.000930070877075, "learning_rate": 1.651220079834181e-05, "loss": 0.6478, "step": 11550 }, { "epoch": 0.3, "grad_norm": 4.476296424865723, "learning_rate": 1.6511570789674164e-05, "loss": 0.6545, "step": 11551 }, { "epoch": 0.3, "grad_norm": 5.731906414031982, "learning_rate": 1.6510940736132862e-05, "loss": 0.5215, "step": 11552 }, { "epoch": 0.3, "grad_norm": 1.8942326307296753, "learning_rate": 1.6510310637722247e-05, "loss": 0.7013, "step": 11553 }, { "epoch": 0.3, "grad_norm": 0.9134144186973572, "learning_rate": 1.650968049444666e-05, "loss": 0.5956, "step": 11554 }, { "epoch": 0.3, "grad_norm": 6.800982475280762, "learning_rate": 1.650905030631045e-05, "loss": 0.7136, "step": 11555 }, { "epoch": 0.3, "grad_norm": 2.783541440963745, "learning_rate": 1.650842007331795e-05, "loss": 0.6349, "step": 11556 }, { "epoch": 0.3, "grad_norm": 1.6824266910552979, "learning_rate": 1.6507789795473508e-05, "loss": 0.584, "step": 11557 }, { "epoch": 0.3, "grad_norm": 3.3225760459899902, "learning_rate": 1.650715947278147e-05, "loss": 0.6934, "step": 11558 }, { "epoch": 0.3, "grad_norm": 1.3267369270324707, "learning_rate": 1.6506529105246176e-05, "loss": 0.7887, "step": 11559 }, { "epoch": 0.3, "grad_norm": 3.974595308303833, "learning_rate": 1.650589869287197e-05, "loss": 0.4866, "step": 11560 }, { "epoch": 0.3, "grad_norm": 3.991346836090088, "learning_rate": 1.65052682356632e-05, "loss": 0.7705, "step": 11561 }, { "epoch": 0.3, "grad_norm": 3.8521387577056885, "learning_rate": 1.6504637733624206e-05, "loss": 0.4876, "step": 11562 }, { "epoch": 0.3, "grad_norm": 1.4215091466903687, "learning_rate": 1.6504007186759336e-05, "loss": 0.6156, "step": 11563 }, { "epoch": 0.3, "grad_norm": 1.5363770723342896, "learning_rate": 1.6503376595072934e-05, "loss": 0.5818, "step": 11564 }, { "epoch": 0.3, "grad_norm": 1.4132241010665894, "learning_rate": 1.6502745958569345e-05, "loss": 0.5564, "step": 11565 }, { "epoch": 0.3, "grad_norm": 1.6453913450241089, "learning_rate": 1.650211527725292e-05, "loss": 0.6254, "step": 11566 }, { "epoch": 0.3, "grad_norm": 1.1597743034362793, "learning_rate": 1.6501484551128e-05, "loss": 0.5937, "step": 11567 }, { "epoch": 0.3, "grad_norm": 6.805324077606201, "learning_rate": 1.6500853780198932e-05, "loss": 0.5952, "step": 11568 }, { "epoch": 0.3, "grad_norm": 3.4335060119628906, "learning_rate": 1.6500222964470066e-05, "loss": 0.6571, "step": 11569 }, { "epoch": 0.3, "grad_norm": 1.3856110572814941, "learning_rate": 1.6499592103945744e-05, "loss": 0.5643, "step": 11570 }, { "epoch": 0.3, "grad_norm": 3.61714243888855, "learning_rate": 1.649896119863032e-05, "loss": 0.585, "step": 11571 }, { "epoch": 0.3, "grad_norm": 1.1855807304382324, "learning_rate": 1.6498330248528137e-05, "loss": 0.6262, "step": 11572 }, { "epoch": 0.3, "grad_norm": 1.7275049686431885, "learning_rate": 1.6497699253643545e-05, "loss": 0.6614, "step": 11573 }, { "epoch": 0.3, "grad_norm": 1.3845518827438354, "learning_rate": 1.6497068213980893e-05, "loss": 0.6247, "step": 11574 }, { "epoch": 0.3, "grad_norm": 1.8189938068389893, "learning_rate": 1.6496437129544526e-05, "loss": 0.7325, "step": 11575 }, { "epoch": 0.3, "grad_norm": 2.1420671939849854, "learning_rate": 1.6495806000338798e-05, "loss": 0.566, "step": 11576 }, { "epoch": 0.3, "grad_norm": 1.9393731355667114, "learning_rate": 1.6495174826368057e-05, "loss": 0.7226, "step": 11577 }, { "epoch": 0.3, "grad_norm": 1.1115461587905884, "learning_rate": 1.649454360763665e-05, "loss": 0.5368, "step": 11578 }, { "epoch": 0.3, "grad_norm": 2.766563653945923, "learning_rate": 1.6493912344148925e-05, "loss": 0.716, "step": 11579 }, { "epoch": 0.3, "grad_norm": 7.102710723876953, "learning_rate": 1.649328103590924e-05, "loss": 0.6832, "step": 11580 }, { "epoch": 0.3, "grad_norm": 3.9084882736206055, "learning_rate": 1.6492649682921943e-05, "loss": 0.594, "step": 11581 }, { "epoch": 0.3, "grad_norm": 1.3754456043243408, "learning_rate": 1.6492018285191382e-05, "loss": 0.7527, "step": 11582 }, { "epoch": 0.3, "grad_norm": 1.5126842260360718, "learning_rate": 1.649138684272191e-05, "loss": 0.5204, "step": 11583 }, { "epoch": 0.3, "grad_norm": 6.494123935699463, "learning_rate": 1.6490755355517876e-05, "loss": 0.4958, "step": 11584 }, { "epoch": 0.3, "grad_norm": 2.088346481323242, "learning_rate": 1.6490123823583636e-05, "loss": 0.7197, "step": 11585 }, { "epoch": 0.3, "grad_norm": 7.177193641662598, "learning_rate": 1.648949224692354e-05, "loss": 0.8025, "step": 11586 }, { "epoch": 0.3, "grad_norm": 1.4933840036392212, "learning_rate": 1.648886062554194e-05, "loss": 0.5064, "step": 11587 }, { "epoch": 0.3, "grad_norm": 1.1824973821640015, "learning_rate": 1.648822895944319e-05, "loss": 0.6199, "step": 11588 }, { "epoch": 0.3, "grad_norm": 3.4435572624206543, "learning_rate": 1.6487597248631644e-05, "loss": 0.9711, "step": 11589 }, { "epoch": 0.3, "grad_norm": 1.3504801988601685, "learning_rate": 1.6486965493111652e-05, "loss": 0.5334, "step": 11590 }, { "epoch": 0.3, "grad_norm": 1.8401808738708496, "learning_rate": 1.648633369288757e-05, "loss": 0.5347, "step": 11591 }, { "epoch": 0.3, "grad_norm": 4.436821460723877, "learning_rate": 1.6485701847963748e-05, "loss": 0.5199, "step": 11592 }, { "epoch": 0.3, "grad_norm": 3.8174877166748047, "learning_rate": 1.6485069958344546e-05, "loss": 0.7231, "step": 11593 }, { "epoch": 0.3, "grad_norm": 1.452487826347351, "learning_rate": 1.6484438024034317e-05, "loss": 0.6472, "step": 11594 }, { "epoch": 0.3, "grad_norm": 1.8432565927505493, "learning_rate": 1.6483806045037415e-05, "loss": 0.6406, "step": 11595 }, { "epoch": 0.3, "grad_norm": 3.067645788192749, "learning_rate": 1.6483174021358194e-05, "loss": 0.5625, "step": 11596 }, { "epoch": 0.3, "grad_norm": 3.08855938911438, "learning_rate": 1.6482541953001006e-05, "loss": 0.7249, "step": 11597 }, { "epoch": 0.3, "grad_norm": 1.6811386346817017, "learning_rate": 1.648190983997022e-05, "loss": 0.685, "step": 11598 }, { "epoch": 0.3, "grad_norm": 1.209220290184021, "learning_rate": 1.648127768227018e-05, "loss": 0.5966, "step": 11599 }, { "epoch": 0.3, "grad_norm": 1.916377305984497, "learning_rate": 1.648064547990525e-05, "loss": 0.6588, "step": 11600 }, { "epoch": 0.3, "grad_norm": 2.6314356327056885, "learning_rate": 1.6480013232879776e-05, "loss": 0.4827, "step": 11601 }, { "epoch": 0.3, "grad_norm": 1.7227718830108643, "learning_rate": 1.6479380941198127e-05, "loss": 0.684, "step": 11602 }, { "epoch": 0.3, "grad_norm": 1.5606937408447266, "learning_rate": 1.6478748604864652e-05, "loss": 0.7087, "step": 11603 }, { "epoch": 0.3, "grad_norm": 1.9824812412261963, "learning_rate": 1.6478116223883714e-05, "loss": 0.5111, "step": 11604 }, { "epoch": 0.3, "grad_norm": 2.7380523681640625, "learning_rate": 1.6477483798259668e-05, "loss": 0.4849, "step": 11605 }, { "epoch": 0.3, "grad_norm": 1.95579993724823, "learning_rate": 1.6476851327996874e-05, "loss": 0.5567, "step": 11606 }, { "epoch": 0.3, "grad_norm": 2.5589897632598877, "learning_rate": 1.6476218813099692e-05, "loss": 0.5388, "step": 11607 }, { "epoch": 0.3, "grad_norm": 1.8721303939819336, "learning_rate": 1.6475586253572476e-05, "loss": 0.6501, "step": 11608 }, { "epoch": 0.3, "grad_norm": 1.3375439643859863, "learning_rate": 1.6474953649419587e-05, "loss": 0.6878, "step": 11609 }, { "epoch": 0.3, "grad_norm": 1.523701548576355, "learning_rate": 1.6474321000645388e-05, "loss": 0.5423, "step": 11610 }, { "epoch": 0.3, "grad_norm": 2.23972225189209, "learning_rate": 1.6473688307254234e-05, "loss": 0.5216, "step": 11611 }, { "epoch": 0.3, "grad_norm": 2.9228198528289795, "learning_rate": 1.6473055569250488e-05, "loss": 0.5873, "step": 11612 }, { "epoch": 0.3, "grad_norm": 3.2158918380737305, "learning_rate": 1.647242278663851e-05, "loss": 0.7222, "step": 11613 }, { "epoch": 0.3, "grad_norm": 1.219130039215088, "learning_rate": 1.6471789959422663e-05, "loss": 0.5885, "step": 11614 }, { "epoch": 0.3, "grad_norm": 3.727339267730713, "learning_rate": 1.64711570876073e-05, "loss": 0.6383, "step": 11615 }, { "epoch": 0.3, "grad_norm": 1.2178279161453247, "learning_rate": 1.647052417119679e-05, "loss": 0.6776, "step": 11616 }, { "epoch": 0.3, "grad_norm": 1.6202489137649536, "learning_rate": 1.6469891210195497e-05, "loss": 0.5685, "step": 11617 }, { "epoch": 0.3, "grad_norm": 2.060044050216675, "learning_rate": 1.6469258204607775e-05, "loss": 0.5208, "step": 11618 }, { "epoch": 0.3, "grad_norm": 3.195406436920166, "learning_rate": 1.6468625154437988e-05, "loss": 0.6809, "step": 11619 }, { "epoch": 0.3, "grad_norm": 3.826895236968994, "learning_rate": 1.6467992059690504e-05, "loss": 0.634, "step": 11620 }, { "epoch": 0.3, "grad_norm": 3.385608196258545, "learning_rate": 1.646735892036968e-05, "loss": 0.5724, "step": 11621 }, { "epoch": 0.3, "grad_norm": 2.108858346939087, "learning_rate": 1.6466725736479882e-05, "loss": 0.5812, "step": 11622 }, { "epoch": 0.3, "grad_norm": 2.7423603534698486, "learning_rate": 1.6466092508025475e-05, "loss": 0.6168, "step": 11623 }, { "epoch": 0.3, "grad_norm": 1.2037320137023926, "learning_rate": 1.6465459235010817e-05, "loss": 0.5605, "step": 11624 }, { "epoch": 0.3, "grad_norm": 1.229257583618164, "learning_rate": 1.6464825917440278e-05, "loss": 0.6216, "step": 11625 }, { "epoch": 0.3, "grad_norm": 1.624822735786438, "learning_rate": 1.646419255531822e-05, "loss": 0.666, "step": 11626 }, { "epoch": 0.3, "grad_norm": 1.9889308214187622, "learning_rate": 1.646355914864901e-05, "loss": 0.6972, "step": 11627 }, { "epoch": 0.3, "grad_norm": 2.919508695602417, "learning_rate": 1.646292569743701e-05, "loss": 0.7333, "step": 11628 }, { "epoch": 0.3, "grad_norm": 2.275745153427124, "learning_rate": 1.6462292201686586e-05, "loss": 0.5887, "step": 11629 }, { "epoch": 0.3, "grad_norm": 1.6555694341659546, "learning_rate": 1.6461658661402106e-05, "loss": 0.5645, "step": 11630 }, { "epoch": 0.3, "grad_norm": 2.3553102016448975, "learning_rate": 1.6461025076587932e-05, "loss": 0.6373, "step": 11631 }, { "epoch": 0.3, "grad_norm": 1.8266569375991821, "learning_rate": 1.646039144724843e-05, "loss": 0.7244, "step": 11632 }, { "epoch": 0.3, "grad_norm": 2.916696071624756, "learning_rate": 1.6459757773387973e-05, "loss": 0.4684, "step": 11633 }, { "epoch": 0.3, "grad_norm": 4.985363006591797, "learning_rate": 1.6459124055010923e-05, "loss": 0.7981, "step": 11634 }, { "epoch": 0.3, "grad_norm": 2.1746022701263428, "learning_rate": 1.645849029212165e-05, "loss": 0.641, "step": 11635 }, { "epoch": 0.3, "grad_norm": 2.2082953453063965, "learning_rate": 1.6457856484724514e-05, "loss": 0.522, "step": 11636 }, { "epoch": 0.3, "grad_norm": 8.729731559753418, "learning_rate": 1.6457222632823894e-05, "loss": 0.5864, "step": 11637 }, { "epoch": 0.3, "grad_norm": 2.9790468215942383, "learning_rate": 1.645658873642415e-05, "loss": 0.5599, "step": 11638 }, { "epoch": 0.3, "grad_norm": 1.3772774934768677, "learning_rate": 1.645595479552965e-05, "loss": 0.4991, "step": 11639 }, { "epoch": 0.3, "grad_norm": 5.00169563293457, "learning_rate": 1.645532081014477e-05, "loss": 0.5094, "step": 11640 }, { "epoch": 0.3, "grad_norm": 2.0480306148529053, "learning_rate": 1.6454686780273875e-05, "loss": 0.3484, "step": 11641 }, { "epoch": 0.3, "grad_norm": 2.0594964027404785, "learning_rate": 1.6454052705921332e-05, "loss": 0.6774, "step": 11642 }, { "epoch": 0.3, "grad_norm": 1.8686821460723877, "learning_rate": 1.6453418587091512e-05, "loss": 0.6244, "step": 11643 }, { "epoch": 0.3, "grad_norm": 1.497650146484375, "learning_rate": 1.6452784423788787e-05, "loss": 0.6363, "step": 11644 }, { "epoch": 0.3, "grad_norm": 1.7163952589035034, "learning_rate": 1.6452150216017523e-05, "loss": 0.6237, "step": 11645 }, { "epoch": 0.3, "grad_norm": 3.526158571243286, "learning_rate": 1.6451515963782095e-05, "loss": 0.7736, "step": 11646 }, { "epoch": 0.3, "grad_norm": 2.0742218494415283, "learning_rate": 1.6450881667086873e-05, "loss": 0.7074, "step": 11647 }, { "epoch": 0.3, "grad_norm": 2.207805633544922, "learning_rate": 1.6450247325936227e-05, "loss": 0.5725, "step": 11648 }, { "epoch": 0.3, "grad_norm": 2.1657164096832275, "learning_rate": 1.644961294033453e-05, "loss": 0.6462, "step": 11649 }, { "epoch": 0.3, "grad_norm": 3.648599863052368, "learning_rate": 1.644897851028615e-05, "loss": 0.7955, "step": 11650 }, { "epoch": 0.3, "grad_norm": 3.11792254447937, "learning_rate": 1.6448344035795463e-05, "loss": 0.7088, "step": 11651 }, { "epoch": 0.3, "grad_norm": 2.4690449237823486, "learning_rate": 1.644770951686684e-05, "loss": 0.7369, "step": 11652 }, { "epoch": 0.3, "grad_norm": 5.45159387588501, "learning_rate": 1.6447074953504656e-05, "loss": 0.6198, "step": 11653 }, { "epoch": 0.3, "grad_norm": 2.2596380710601807, "learning_rate": 1.644644034571328e-05, "loss": 0.7014, "step": 11654 }, { "epoch": 0.3, "grad_norm": 5.334100723266602, "learning_rate": 1.6445805693497087e-05, "loss": 0.5745, "step": 11655 }, { "epoch": 0.3, "grad_norm": 1.18357253074646, "learning_rate": 1.644517099686045e-05, "loss": 0.5896, "step": 11656 }, { "epoch": 0.3, "grad_norm": 1.8383549451828003, "learning_rate": 1.6444536255807745e-05, "loss": 0.5543, "step": 11657 }, { "epoch": 0.3, "grad_norm": 1.8044781684875488, "learning_rate": 1.6443901470343348e-05, "loss": 0.7405, "step": 11658 }, { "epoch": 0.3, "grad_norm": 1.4924925565719604, "learning_rate": 1.6443266640471626e-05, "loss": 0.6187, "step": 11659 }, { "epoch": 0.3, "grad_norm": 3.1879451274871826, "learning_rate": 1.644263176619696e-05, "loss": 0.7238, "step": 11660 }, { "epoch": 0.3, "grad_norm": 1.9200994968414307, "learning_rate": 1.6441996847523723e-05, "loss": 0.41, "step": 11661 }, { "epoch": 0.3, "grad_norm": 7.302661895751953, "learning_rate": 1.644136188445629e-05, "loss": 0.6571, "step": 11662 }, { "epoch": 0.3, "grad_norm": 2.304457902908325, "learning_rate": 1.644072687699904e-05, "loss": 0.4339, "step": 11663 }, { "epoch": 0.3, "grad_norm": 9.015588760375977, "learning_rate": 1.6440091825156347e-05, "loss": 0.5355, "step": 11664 }, { "epoch": 0.3, "grad_norm": 1.8222484588623047, "learning_rate": 1.6439456728932584e-05, "loss": 0.6672, "step": 11665 }, { "epoch": 0.3, "grad_norm": 5.565004825592041, "learning_rate": 1.6438821588332134e-05, "loss": 0.5667, "step": 11666 }, { "epoch": 0.3, "grad_norm": 1.5412161350250244, "learning_rate": 1.643818640335937e-05, "loss": 0.5898, "step": 11667 }, { "epoch": 0.3, "grad_norm": 2.3948323726654053, "learning_rate": 1.643755117401867e-05, "loss": 0.5062, "step": 11668 }, { "epoch": 0.3, "grad_norm": 2.2341432571411133, "learning_rate": 1.6436915900314412e-05, "loss": 0.7682, "step": 11669 }, { "epoch": 0.3, "grad_norm": 3.8529999256134033, "learning_rate": 1.6436280582250972e-05, "loss": 0.6097, "step": 11670 }, { "epoch": 0.3, "grad_norm": 1.938759207725525, "learning_rate": 1.6435645219832733e-05, "loss": 0.533, "step": 11671 }, { "epoch": 0.3, "grad_norm": 3.10710072517395, "learning_rate": 1.6435009813064065e-05, "loss": 0.8563, "step": 11672 }, { "epoch": 0.3, "grad_norm": 3.1585378646850586, "learning_rate": 1.643437436194936e-05, "loss": 0.546, "step": 11673 }, { "epoch": 0.3, "grad_norm": 3.9888744354248047, "learning_rate": 1.6433738866492983e-05, "loss": 0.661, "step": 11674 }, { "epoch": 0.3, "grad_norm": 7.331356048583984, "learning_rate": 1.6433103326699322e-05, "loss": 0.6064, "step": 11675 }, { "epoch": 0.3, "grad_norm": 1.178468942642212, "learning_rate": 1.6432467742572757e-05, "loss": 0.641, "step": 11676 }, { "epoch": 0.3, "grad_norm": 2.1386559009552, "learning_rate": 1.643183211411766e-05, "loss": 0.6327, "step": 11677 }, { "epoch": 0.3, "grad_norm": 1.315485954284668, "learning_rate": 1.6431196441338418e-05, "loss": 0.5738, "step": 11678 }, { "epoch": 0.3, "grad_norm": 1.2405762672424316, "learning_rate": 1.6430560724239417e-05, "loss": 0.605, "step": 11679 }, { "epoch": 0.3, "grad_norm": 2.555060863494873, "learning_rate": 1.6429924962825027e-05, "loss": 0.531, "step": 11680 }, { "epoch": 0.3, "grad_norm": 2.179098129272461, "learning_rate": 1.6429289157099636e-05, "loss": 0.5378, "step": 11681 }, { "epoch": 0.3, "grad_norm": 2.0636813640594482, "learning_rate": 1.642865330706762e-05, "loss": 0.62, "step": 11682 }, { "epoch": 0.3, "grad_norm": 6.14339542388916, "learning_rate": 1.6428017412733367e-05, "loss": 0.5805, "step": 11683 }, { "epoch": 0.3, "grad_norm": 12.432439804077148, "learning_rate": 1.642738147410125e-05, "loss": 0.7517, "step": 11684 }, { "epoch": 0.3, "grad_norm": 2.3514418601989746, "learning_rate": 1.6426745491175666e-05, "loss": 0.6657, "step": 11685 }, { "epoch": 0.3, "grad_norm": 1.3905303478240967, "learning_rate": 1.642610946396099e-05, "loss": 0.6277, "step": 11686 }, { "epoch": 0.3, "grad_norm": 1.7267537117004395, "learning_rate": 1.64254733924616e-05, "loss": 0.6364, "step": 11687 }, { "epoch": 0.3, "grad_norm": 1.368512749671936, "learning_rate": 1.6424837276681883e-05, "loss": 0.4897, "step": 11688 }, { "epoch": 0.3, "grad_norm": 4.748495578765869, "learning_rate": 1.642420111662623e-05, "loss": 0.7558, "step": 11689 }, { "epoch": 0.3, "grad_norm": 2.885364055633545, "learning_rate": 1.6423564912299016e-05, "loss": 0.7019, "step": 11690 }, { "epoch": 0.3, "grad_norm": 1.7089864015579224, "learning_rate": 1.642292866370463e-05, "loss": 0.5372, "step": 11691 }, { "epoch": 0.3, "grad_norm": 3.5084667205810547, "learning_rate": 1.6422292370847452e-05, "loss": 0.5904, "step": 11692 }, { "epoch": 0.3, "grad_norm": 1.84794020652771, "learning_rate": 1.642165603373187e-05, "loss": 0.5678, "step": 11693 }, { "epoch": 0.3, "grad_norm": 2.772716760635376, "learning_rate": 1.6421019652362268e-05, "loss": 0.913, "step": 11694 }, { "epoch": 0.3, "grad_norm": 2.580380439758301, "learning_rate": 1.642038322674304e-05, "loss": 0.692, "step": 11695 }, { "epoch": 0.3, "grad_norm": 2.18426513671875, "learning_rate": 1.6419746756878556e-05, "loss": 0.5537, "step": 11696 }, { "epoch": 0.3, "grad_norm": 7.316044330596924, "learning_rate": 1.6419110242773216e-05, "loss": 0.622, "step": 11697 }, { "epoch": 0.3, "grad_norm": 2.2209179401397705, "learning_rate": 1.64184736844314e-05, "loss": 0.6883, "step": 11698 }, { "epoch": 0.3, "grad_norm": 3.5253026485443115, "learning_rate": 1.6417837081857493e-05, "loss": 0.7445, "step": 11699 }, { "epoch": 0.3, "grad_norm": 1.6470673084259033, "learning_rate": 1.6417200435055884e-05, "loss": 0.5498, "step": 11700 }, { "epoch": 0.3, "grad_norm": 1.594724178314209, "learning_rate": 1.6416563744030966e-05, "loss": 0.6007, "step": 11701 }, { "epoch": 0.3, "grad_norm": 1.256798267364502, "learning_rate": 1.641592700878712e-05, "loss": 0.4699, "step": 11702 }, { "epoch": 0.3, "grad_norm": 1.234454870223999, "learning_rate": 1.6415290229328737e-05, "loss": 0.463, "step": 11703 }, { "epoch": 0.3, "grad_norm": 1.4825855493545532, "learning_rate": 1.6414653405660205e-05, "loss": 0.5765, "step": 11704 }, { "epoch": 0.3, "grad_norm": 0.9718382358551025, "learning_rate": 1.6414016537785907e-05, "loss": 0.4386, "step": 11705 }, { "epoch": 0.3, "grad_norm": 5.728033542633057, "learning_rate": 1.641337962571024e-05, "loss": 0.7093, "step": 11706 }, { "epoch": 0.3, "grad_norm": 1.1393095254898071, "learning_rate": 1.641274266943759e-05, "loss": 0.5672, "step": 11707 }, { "epoch": 0.3, "grad_norm": 2.2990050315856934, "learning_rate": 1.6412105668972346e-05, "loss": 0.5977, "step": 11708 }, { "epoch": 0.3, "grad_norm": 1.6666529178619385, "learning_rate": 1.64114686243189e-05, "loss": 0.6569, "step": 11709 }, { "epoch": 0.3, "grad_norm": 2.7081429958343506, "learning_rate": 1.6410831535481636e-05, "loss": 0.4614, "step": 11710 }, { "epoch": 0.3, "grad_norm": 2.293396472930908, "learning_rate": 1.6410194402464955e-05, "loss": 0.6464, "step": 11711 }, { "epoch": 0.3, "grad_norm": 1.249561071395874, "learning_rate": 1.6409557225273232e-05, "loss": 0.6683, "step": 11712 }, { "epoch": 0.3, "grad_norm": 2.0434465408325195, "learning_rate": 1.6408920003910876e-05, "loss": 0.5526, "step": 11713 }, { "epoch": 0.3, "grad_norm": 4.433396816253662, "learning_rate": 1.640828273838227e-05, "loss": 0.6034, "step": 11714 }, { "epoch": 0.3, "grad_norm": 2.3790225982666016, "learning_rate": 1.64076454286918e-05, "loss": 0.7229, "step": 11715 }, { "epoch": 0.3, "grad_norm": 2.674889087677002, "learning_rate": 1.6407008074843868e-05, "loss": 0.5786, "step": 11716 }, { "epoch": 0.3, "grad_norm": 2.8851890563964844, "learning_rate": 1.6406370676842856e-05, "loss": 0.4338, "step": 11717 }, { "epoch": 0.3, "grad_norm": 3.132861614227295, "learning_rate": 1.6405733234693166e-05, "loss": 0.7949, "step": 11718 }, { "epoch": 0.3, "grad_norm": 1.0339645147323608, "learning_rate": 1.6405095748399186e-05, "loss": 0.5133, "step": 11719 }, { "epoch": 0.3, "grad_norm": 1.6136271953582764, "learning_rate": 1.640445821796531e-05, "loss": 0.5275, "step": 11720 }, { "epoch": 0.3, "grad_norm": 5.6236348152160645, "learning_rate": 1.6403820643395935e-05, "loss": 0.7559, "step": 11721 }, { "epoch": 0.3, "grad_norm": 2.3175270557403564, "learning_rate": 1.6403183024695448e-05, "loss": 0.5687, "step": 11722 }, { "epoch": 0.3, "grad_norm": 1.7263134717941284, "learning_rate": 1.640254536186825e-05, "loss": 0.6228, "step": 11723 }, { "epoch": 0.3, "grad_norm": 3.4100310802459717, "learning_rate": 1.6401907654918728e-05, "loss": 0.7167, "step": 11724 }, { "epoch": 0.3, "grad_norm": 1.2059038877487183, "learning_rate": 1.6401269903851284e-05, "loss": 0.5312, "step": 11725 }, { "epoch": 0.3, "grad_norm": 1.212461233139038, "learning_rate": 1.6400632108670303e-05, "loss": 0.6858, "step": 11726 }, { "epoch": 0.3, "grad_norm": 1.8381597995758057, "learning_rate": 1.6399994269380193e-05, "loss": 0.6175, "step": 11727 }, { "epoch": 0.3, "grad_norm": 1.4788063764572144, "learning_rate": 1.6399356385985342e-05, "loss": 0.6243, "step": 11728 }, { "epoch": 0.3, "grad_norm": 1.975494146347046, "learning_rate": 1.6398718458490148e-05, "loss": 0.6544, "step": 11729 }, { "epoch": 0.3, "grad_norm": 1.124221682548523, "learning_rate": 1.6398080486899e-05, "loss": 0.5476, "step": 11730 }, { "epoch": 0.3, "grad_norm": 1.4053959846496582, "learning_rate": 1.639744247121631e-05, "loss": 0.6018, "step": 11731 }, { "epoch": 0.3, "grad_norm": 2.848297595977783, "learning_rate": 1.639680441144646e-05, "loss": 0.6762, "step": 11732 }, { "epoch": 0.3, "grad_norm": 1.9371966123580933, "learning_rate": 1.6396166307593856e-05, "loss": 0.5947, "step": 11733 }, { "epoch": 0.3, "grad_norm": 2.711455821990967, "learning_rate": 1.639552815966289e-05, "loss": 0.5646, "step": 11734 }, { "epoch": 0.3, "grad_norm": 1.489627718925476, "learning_rate": 1.6394889967657962e-05, "loss": 0.6022, "step": 11735 }, { "epoch": 0.3, "grad_norm": 2.4192440509796143, "learning_rate": 1.639425173158347e-05, "loss": 0.5412, "step": 11736 }, { "epoch": 0.3, "grad_norm": 5.435688018798828, "learning_rate": 1.6393613451443815e-05, "loss": 0.7478, "step": 11737 }, { "epoch": 0.3, "grad_norm": 1.573888897895813, "learning_rate": 1.6392975127243392e-05, "loss": 0.5853, "step": 11738 }, { "epoch": 0.3, "grad_norm": 3.819941759109497, "learning_rate": 1.63923367589866e-05, "loss": 0.6925, "step": 11739 }, { "epoch": 0.3, "grad_norm": 5.103796005249023, "learning_rate": 1.6391698346677838e-05, "loss": 0.51, "step": 11740 }, { "epoch": 0.3, "grad_norm": 3.613492012023926, "learning_rate": 1.639105989032151e-05, "loss": 0.5278, "step": 11741 }, { "epoch": 0.3, "grad_norm": 3.4347546100616455, "learning_rate": 1.6390421389922007e-05, "loss": 0.5827, "step": 11742 }, { "epoch": 0.3, "grad_norm": 1.3269959688186646, "learning_rate": 1.6389782845483742e-05, "loss": 0.5406, "step": 11743 }, { "epoch": 0.3, "grad_norm": 4.370487213134766, "learning_rate": 1.6389144257011103e-05, "loss": 0.8458, "step": 11744 }, { "epoch": 0.3, "grad_norm": 0.9424536824226379, "learning_rate": 1.6388505624508496e-05, "loss": 0.5235, "step": 11745 }, { "epoch": 0.3, "grad_norm": 1.2168786525726318, "learning_rate": 1.638786694798032e-05, "loss": 0.5996, "step": 11746 }, { "epoch": 0.3, "grad_norm": 2.0705649852752686, "learning_rate": 1.6387228227430983e-05, "loss": 0.6369, "step": 11747 }, { "epoch": 0.3, "grad_norm": 3.553541660308838, "learning_rate": 1.638658946286488e-05, "loss": 0.5901, "step": 11748 }, { "epoch": 0.3, "grad_norm": 1.6138650178909302, "learning_rate": 1.6385950654286414e-05, "loss": 0.6494, "step": 11749 }, { "epoch": 0.3, "grad_norm": 8.398167610168457, "learning_rate": 1.6385311801699992e-05, "loss": 0.6692, "step": 11750 }, { "epoch": 0.3, "grad_norm": 2.7750213146209717, "learning_rate": 1.6384672905110008e-05, "loss": 0.7301, "step": 11751 }, { "epoch": 0.3, "grad_norm": 2.0561461448669434, "learning_rate": 1.638403396452087e-05, "loss": 0.5191, "step": 11752 }, { "epoch": 0.3, "grad_norm": 2.043973445892334, "learning_rate": 1.6383394979936984e-05, "loss": 0.6122, "step": 11753 }, { "epoch": 0.3, "grad_norm": 2.8035569190979004, "learning_rate": 1.6382755951362746e-05, "loss": 0.6398, "step": 11754 }, { "epoch": 0.3, "grad_norm": 1.6566981077194214, "learning_rate": 1.638211687880257e-05, "loss": 0.44, "step": 11755 }, { "epoch": 0.3, "grad_norm": 2.649451494216919, "learning_rate": 1.638147776226085e-05, "loss": 0.6415, "step": 11756 }, { "epoch": 0.3, "grad_norm": 1.2396018505096436, "learning_rate": 1.6380838601741995e-05, "loss": 0.508, "step": 11757 }, { "epoch": 0.3, "grad_norm": 3.43183970451355, "learning_rate": 1.638019939725041e-05, "loss": 0.6726, "step": 11758 }, { "epoch": 0.3, "grad_norm": 2.0237748622894287, "learning_rate": 1.6379560148790497e-05, "loss": 0.6332, "step": 11759 }, { "epoch": 0.3, "grad_norm": 1.3911551237106323, "learning_rate": 1.6378920856366662e-05, "loss": 0.5348, "step": 11760 }, { "epoch": 0.3, "grad_norm": 1.679558277130127, "learning_rate": 1.6378281519983318e-05, "loss": 0.4575, "step": 11761 }, { "epoch": 0.3, "grad_norm": 12.548291206359863, "learning_rate": 1.637764213964486e-05, "loss": 0.729, "step": 11762 }, { "epoch": 0.3, "grad_norm": 2.2118031978607178, "learning_rate": 1.6377002715355704e-05, "loss": 0.601, "step": 11763 }, { "epoch": 0.3, "grad_norm": 1.7060383558273315, "learning_rate": 1.6376363247120246e-05, "loss": 0.6093, "step": 11764 }, { "epoch": 0.3, "grad_norm": 1.963748812675476, "learning_rate": 1.63757237349429e-05, "loss": 0.4985, "step": 11765 }, { "epoch": 0.3, "grad_norm": 3.0550010204315186, "learning_rate": 1.6375084178828077e-05, "loss": 0.6227, "step": 11766 }, { "epoch": 0.3, "grad_norm": 3.8234872817993164, "learning_rate": 1.637444457878017e-05, "loss": 0.6422, "step": 11767 }, { "epoch": 0.3, "grad_norm": 2.884186267852783, "learning_rate": 1.6373804934803602e-05, "loss": 0.8121, "step": 11768 }, { "epoch": 0.3, "grad_norm": 2.5544798374176025, "learning_rate": 1.6373165246902774e-05, "loss": 0.7195, "step": 11769 }, { "epoch": 0.3, "grad_norm": 1.3080765008926392, "learning_rate": 1.6372525515082095e-05, "loss": 0.6263, "step": 11770 }, { "epoch": 0.3, "grad_norm": 2.524259090423584, "learning_rate": 1.6371885739345973e-05, "loss": 0.6924, "step": 11771 }, { "epoch": 0.3, "grad_norm": 1.769919514656067, "learning_rate": 1.637124591969882e-05, "loss": 0.5666, "step": 11772 }, { "epoch": 0.3, "grad_norm": 1.969050645828247, "learning_rate": 1.637060605614504e-05, "loss": 0.5975, "step": 11773 }, { "epoch": 0.3, "grad_norm": 1.426308274269104, "learning_rate": 1.6369966148689046e-05, "loss": 0.6834, "step": 11774 }, { "epoch": 0.3, "grad_norm": 1.6277614831924438, "learning_rate": 1.6369326197335248e-05, "loss": 0.6742, "step": 11775 }, { "epoch": 0.3, "grad_norm": 4.335145473480225, "learning_rate": 1.6368686202088054e-05, "loss": 0.5043, "step": 11776 }, { "epoch": 0.3, "grad_norm": 1.695891261100769, "learning_rate": 1.636804616295188e-05, "loss": 0.6837, "step": 11777 }, { "epoch": 0.3, "grad_norm": 1.6596572399139404, "learning_rate": 1.636740607993113e-05, "loss": 0.7258, "step": 11778 }, { "epoch": 0.3, "grad_norm": 1.536129355430603, "learning_rate": 1.6366765953030218e-05, "loss": 0.459, "step": 11779 }, { "epoch": 0.3, "grad_norm": 1.0974764823913574, "learning_rate": 1.6366125782253552e-05, "loss": 0.5722, "step": 11780 }, { "epoch": 0.3, "grad_norm": 2.9975767135620117, "learning_rate": 1.636548556760555e-05, "loss": 0.6725, "step": 11781 }, { "epoch": 0.3, "grad_norm": 2.7860336303710938, "learning_rate": 1.636484530909062e-05, "loss": 0.4916, "step": 11782 }, { "epoch": 0.3, "grad_norm": 1.8112105131149292, "learning_rate": 1.6364205006713175e-05, "loss": 0.7088, "step": 11783 }, { "epoch": 0.3, "grad_norm": 2.2304062843322754, "learning_rate": 1.636356466047763e-05, "loss": 0.5481, "step": 11784 }, { "epoch": 0.3, "grad_norm": 2.122654914855957, "learning_rate": 1.636292427038839e-05, "loss": 0.7512, "step": 11785 }, { "epoch": 0.3, "grad_norm": 6.0105061531066895, "learning_rate": 1.6362283836449878e-05, "loss": 0.6347, "step": 11786 }, { "epoch": 0.3, "grad_norm": 2.000537157058716, "learning_rate": 1.63616433586665e-05, "loss": 0.8256, "step": 11787 }, { "epoch": 0.3, "grad_norm": 2.5928659439086914, "learning_rate": 1.6361002837042675e-05, "loss": 0.6179, "step": 11788 }, { "epoch": 0.3, "grad_norm": 3.0360734462738037, "learning_rate": 1.6360362271582815e-05, "loss": 0.7481, "step": 11789 }, { "epoch": 0.3, "grad_norm": 2.739013433456421, "learning_rate": 1.635972166229133e-05, "loss": 0.6002, "step": 11790 }, { "epoch": 0.3, "grad_norm": 1.6811270713806152, "learning_rate": 1.6359081009172644e-05, "loss": 0.6593, "step": 11791 }, { "epoch": 0.3, "grad_norm": 1.3390270471572876, "learning_rate": 1.6358440312231163e-05, "loss": 0.5804, "step": 11792 }, { "epoch": 0.3, "grad_norm": 7.737635612487793, "learning_rate": 1.635779957147131e-05, "loss": 0.5672, "step": 11793 }, { "epoch": 0.3, "grad_norm": 2.1917800903320312, "learning_rate": 1.6357158786897492e-05, "loss": 0.5588, "step": 11794 }, { "epoch": 0.3, "grad_norm": 1.516959309577942, "learning_rate": 1.6356517958514133e-05, "loss": 0.5436, "step": 11795 }, { "epoch": 0.3, "grad_norm": 2.105937957763672, "learning_rate": 1.6355877086325645e-05, "loss": 0.5027, "step": 11796 }, { "epoch": 0.3, "grad_norm": 2.81191349029541, "learning_rate": 1.6355236170336446e-05, "loss": 0.7602, "step": 11797 }, { "epoch": 0.3, "grad_norm": 1.3592603206634521, "learning_rate": 1.635459521055095e-05, "loss": 0.6157, "step": 11798 }, { "epoch": 0.3, "grad_norm": 2.8435256481170654, "learning_rate": 1.635395420697358e-05, "loss": 0.7858, "step": 11799 }, { "epoch": 0.3, "grad_norm": 3.461099147796631, "learning_rate": 1.6353313159608743e-05, "loss": 0.666, "step": 11800 }, { "epoch": 0.3, "grad_norm": 2.3415701389312744, "learning_rate": 1.635267206846087e-05, "loss": 0.5635, "step": 11801 }, { "epoch": 0.3, "grad_norm": 2.4257233142852783, "learning_rate": 1.6352030933534368e-05, "loss": 0.5274, "step": 11802 }, { "epoch": 0.3, "grad_norm": 1.5910086631774902, "learning_rate": 1.6351389754833662e-05, "loss": 0.4621, "step": 11803 }, { "epoch": 0.3, "grad_norm": 1.709827184677124, "learning_rate": 1.635074853236317e-05, "loss": 0.5556, "step": 11804 }, { "epoch": 0.3, "grad_norm": 1.6961981058120728, "learning_rate": 1.6350107266127303e-05, "loss": 0.6423, "step": 11805 }, { "epoch": 0.3, "grad_norm": 1.8427841663360596, "learning_rate": 1.634946595613049e-05, "loss": 0.5217, "step": 11806 }, { "epoch": 0.3, "grad_norm": 7.262327671051025, "learning_rate": 1.6348824602377144e-05, "loss": 0.6854, "step": 11807 }, { "epoch": 0.3, "grad_norm": 3.546261787414551, "learning_rate": 1.634818320487169e-05, "loss": 0.6655, "step": 11808 }, { "epoch": 0.3, "grad_norm": 2.121721029281616, "learning_rate": 1.6347541763618546e-05, "loss": 0.6239, "step": 11809 }, { "epoch": 0.3, "grad_norm": 4.748379230499268, "learning_rate": 1.634690027862213e-05, "loss": 0.7807, "step": 11810 }, { "epoch": 0.3, "grad_norm": 3.665874719619751, "learning_rate": 1.634625874988687e-05, "loss": 0.4494, "step": 11811 }, { "epoch": 0.3, "grad_norm": 4.476574897766113, "learning_rate": 1.6345617177417178e-05, "loss": 0.4909, "step": 11812 }, { "epoch": 0.3, "grad_norm": 2.5849456787109375, "learning_rate": 1.6344975561217477e-05, "loss": 0.646, "step": 11813 }, { "epoch": 0.3, "grad_norm": 1.7977386713027954, "learning_rate": 1.6344333901292193e-05, "loss": 0.6294, "step": 11814 }, { "epoch": 0.3, "grad_norm": 1.3426101207733154, "learning_rate": 1.6343692197645745e-05, "loss": 0.6696, "step": 11815 }, { "epoch": 0.3, "grad_norm": 1.5040340423583984, "learning_rate": 1.6343050450282558e-05, "loss": 0.54, "step": 11816 }, { "epoch": 0.3, "grad_norm": 5.307180881500244, "learning_rate": 1.634240865920705e-05, "loss": 0.7725, "step": 11817 }, { "epoch": 0.3, "grad_norm": 3.6527435779571533, "learning_rate": 1.6341766824423646e-05, "loss": 0.557, "step": 11818 }, { "epoch": 0.3, "grad_norm": 2.514686346054077, "learning_rate": 1.6341124945936774e-05, "loss": 0.6685, "step": 11819 }, { "epoch": 0.3, "grad_norm": 3.4998114109039307, "learning_rate": 1.6340483023750845e-05, "loss": 0.6094, "step": 11820 }, { "epoch": 0.3, "grad_norm": 2.004885673522949, "learning_rate": 1.6339841057870295e-05, "loss": 0.5995, "step": 11821 }, { "epoch": 0.3, "grad_norm": 1.557108759880066, "learning_rate": 1.6339199048299545e-05, "loss": 0.7478, "step": 11822 }, { "epoch": 0.3, "grad_norm": 4.59208345413208, "learning_rate": 1.6338556995043016e-05, "loss": 0.8498, "step": 11823 }, { "epoch": 0.3, "grad_norm": 1.7817513942718506, "learning_rate": 1.6337914898105134e-05, "loss": 0.5406, "step": 11824 }, { "epoch": 0.3, "grad_norm": 3.584899425506592, "learning_rate": 1.6337272757490327e-05, "loss": 0.7194, "step": 11825 }, { "epoch": 0.3, "grad_norm": 2.4327502250671387, "learning_rate": 1.6336630573203015e-05, "loss": 0.8583, "step": 11826 }, { "epoch": 0.3, "grad_norm": 3.0796985626220703, "learning_rate": 1.6335988345247625e-05, "loss": 0.5422, "step": 11827 }, { "epoch": 0.3, "grad_norm": 2.5054759979248047, "learning_rate": 1.6335346073628585e-05, "loss": 0.4787, "step": 11828 }, { "epoch": 0.3, "grad_norm": 1.366664171218872, "learning_rate": 1.6334703758350322e-05, "loss": 0.5669, "step": 11829 }, { "epoch": 0.3, "grad_norm": 1.6831697225570679, "learning_rate": 1.6334061399417258e-05, "loss": 0.5703, "step": 11830 }, { "epoch": 0.3, "grad_norm": 2.347181797027588, "learning_rate": 1.6333418996833822e-05, "loss": 0.5144, "step": 11831 }, { "epoch": 0.3, "grad_norm": 2.736344337463379, "learning_rate": 1.6332776550604445e-05, "loss": 0.5552, "step": 11832 }, { "epoch": 0.3, "grad_norm": 3.3345181941986084, "learning_rate": 1.6332134060733545e-05, "loss": 0.6763, "step": 11833 }, { "epoch": 0.3, "grad_norm": 1.714705228805542, "learning_rate": 1.633149152722556e-05, "loss": 0.5154, "step": 11834 }, { "epoch": 0.3, "grad_norm": 5.944699764251709, "learning_rate": 1.6330848950084915e-05, "loss": 0.577, "step": 11835 }, { "epoch": 0.3, "grad_norm": 2.6652917861938477, "learning_rate": 1.6330206329316033e-05, "loss": 0.6904, "step": 11836 }, { "epoch": 0.3, "grad_norm": 2.6926472187042236, "learning_rate": 1.6329563664923346e-05, "loss": 0.5923, "step": 11837 }, { "epoch": 0.3, "grad_norm": 2.0278308391571045, "learning_rate": 1.6328920956911284e-05, "loss": 0.5417, "step": 11838 }, { "epoch": 0.3, "grad_norm": 4.160927772521973, "learning_rate": 1.6328278205284273e-05, "loss": 0.5763, "step": 11839 }, { "epoch": 0.3, "grad_norm": 6.1239094734191895, "learning_rate": 1.6327635410046747e-05, "loss": 0.826, "step": 11840 }, { "epoch": 0.3, "grad_norm": 4.3637285232543945, "learning_rate": 1.6326992571203132e-05, "loss": 0.7111, "step": 11841 }, { "epoch": 0.3, "grad_norm": 1.7500239610671997, "learning_rate": 1.632634968875786e-05, "loss": 0.5916, "step": 11842 }, { "epoch": 0.3, "grad_norm": 1.2253044843673706, "learning_rate": 1.632570676271536e-05, "loss": 0.5368, "step": 11843 }, { "epoch": 0.3, "grad_norm": 6.210618019104004, "learning_rate": 1.632506379308006e-05, "loss": 0.6674, "step": 11844 }, { "epoch": 0.3, "grad_norm": 3.1295437812805176, "learning_rate": 1.6324420779856398e-05, "loss": 0.5861, "step": 11845 }, { "epoch": 0.3, "grad_norm": 3.6612839698791504, "learning_rate": 1.6323777723048803e-05, "loss": 0.5072, "step": 11846 }, { "epoch": 0.3, "grad_norm": 1.205467700958252, "learning_rate": 1.6323134622661698e-05, "loss": 0.6361, "step": 11847 }, { "epoch": 0.3, "grad_norm": 1.7498645782470703, "learning_rate": 1.6322491478699525e-05, "loss": 0.5175, "step": 11848 }, { "epoch": 0.3, "grad_norm": 1.2472875118255615, "learning_rate": 1.6321848291166717e-05, "loss": 0.5866, "step": 11849 }, { "epoch": 0.3, "grad_norm": 1.674576759338379, "learning_rate": 1.63212050600677e-05, "loss": 0.5827, "step": 11850 }, { "epoch": 0.3, "grad_norm": 4.181934356689453, "learning_rate": 1.6320561785406907e-05, "loss": 0.8864, "step": 11851 }, { "epoch": 0.3, "grad_norm": 1.5917158126831055, "learning_rate": 1.6319918467188776e-05, "loss": 0.6245, "step": 11852 }, { "epoch": 0.3, "grad_norm": 1.633948564529419, "learning_rate": 1.6319275105417736e-05, "loss": 0.4986, "step": 11853 }, { "epoch": 0.3, "grad_norm": 2.6908600330352783, "learning_rate": 1.631863170009822e-05, "loss": 0.393, "step": 11854 }, { "epoch": 0.3, "grad_norm": 3.1588165760040283, "learning_rate": 1.6317988251234667e-05, "loss": 0.6357, "step": 11855 }, { "epoch": 0.3, "grad_norm": 3.2454209327697754, "learning_rate": 1.6317344758831506e-05, "loss": 0.7896, "step": 11856 }, { "epoch": 0.3, "grad_norm": 1.1609411239624023, "learning_rate": 1.6316701222893178e-05, "loss": 0.5371, "step": 11857 }, { "epoch": 0.3, "grad_norm": 2.439882516860962, "learning_rate": 1.631605764342411e-05, "loss": 0.7656, "step": 11858 }, { "epoch": 0.3, "grad_norm": 1.235517144203186, "learning_rate": 1.6315414020428742e-05, "loss": 0.5845, "step": 11859 }, { "epoch": 0.3, "grad_norm": 2.3758020401000977, "learning_rate": 1.631477035391151e-05, "loss": 0.6259, "step": 11860 }, { "epoch": 0.3, "grad_norm": 2.317230463027954, "learning_rate": 1.6314126643876846e-05, "loss": 0.6204, "step": 11861 }, { "epoch": 0.3, "grad_norm": 2.9561100006103516, "learning_rate": 1.6313482890329185e-05, "loss": 0.6339, "step": 11862 }, { "epoch": 0.3, "grad_norm": 3.4948294162750244, "learning_rate": 1.6312839093272972e-05, "loss": 0.641, "step": 11863 }, { "epoch": 0.3, "grad_norm": 3.1046218872070312, "learning_rate": 1.6312195252712638e-05, "loss": 0.5581, "step": 11864 }, { "epoch": 0.3, "grad_norm": 2.4908037185668945, "learning_rate": 1.6311551368652616e-05, "loss": 0.5848, "step": 11865 }, { "epoch": 0.3, "grad_norm": 2.4456305503845215, "learning_rate": 1.631090744109735e-05, "loss": 0.5763, "step": 11866 }, { "epoch": 0.3, "grad_norm": 1.3728423118591309, "learning_rate": 1.6310263470051272e-05, "loss": 0.589, "step": 11867 }, { "epoch": 0.3, "grad_norm": 1.6479824781417847, "learning_rate": 1.6309619455518824e-05, "loss": 0.7337, "step": 11868 }, { "epoch": 0.3, "grad_norm": 2.189573287963867, "learning_rate": 1.6308975397504445e-05, "loss": 0.7559, "step": 11869 }, { "epoch": 0.3, "grad_norm": 4.209466934204102, "learning_rate": 1.630833129601257e-05, "loss": 0.7098, "step": 11870 }, { "epoch": 0.3, "grad_norm": 1.8412739038467407, "learning_rate": 1.630768715104764e-05, "loss": 0.6667, "step": 11871 }, { "epoch": 0.3, "grad_norm": 2.9571726322174072, "learning_rate": 1.630704296261409e-05, "loss": 0.6446, "step": 11872 }, { "epoch": 0.3, "grad_norm": 1.1812880039215088, "learning_rate": 1.6306398730716365e-05, "loss": 0.6175, "step": 11873 }, { "epoch": 0.3, "grad_norm": 3.622969150543213, "learning_rate": 1.63057544553589e-05, "loss": 0.7169, "step": 11874 }, { "epoch": 0.3, "grad_norm": 2.1268091201782227, "learning_rate": 1.6305110136546138e-05, "loss": 0.6735, "step": 11875 }, { "epoch": 0.3, "grad_norm": 1.7710654735565186, "learning_rate": 1.630446577428252e-05, "loss": 0.6134, "step": 11876 }, { "epoch": 0.3, "grad_norm": 2.488582134246826, "learning_rate": 1.6303821368572484e-05, "loss": 0.7735, "step": 11877 }, { "epoch": 0.3, "grad_norm": 2.4422290325164795, "learning_rate": 1.6303176919420472e-05, "loss": 0.6024, "step": 11878 }, { "epoch": 0.3, "grad_norm": 2.759056329727173, "learning_rate": 1.6302532426830923e-05, "loss": 0.5852, "step": 11879 }, { "epoch": 0.3, "grad_norm": 1.7085903882980347, "learning_rate": 1.630188789080828e-05, "loss": 0.6135, "step": 11880 }, { "epoch": 0.3, "grad_norm": 2.1503500938415527, "learning_rate": 1.6301243311356985e-05, "loss": 0.6167, "step": 11881 }, { "epoch": 0.3, "grad_norm": 2.3409523963928223, "learning_rate": 1.630059868848148e-05, "loss": 0.5942, "step": 11882 }, { "epoch": 0.3, "grad_norm": 7.934074878692627, "learning_rate": 1.629995402218621e-05, "loss": 0.5311, "step": 11883 }, { "epoch": 0.3, "grad_norm": 1.4910224676132202, "learning_rate": 1.6299309312475607e-05, "loss": 0.5001, "step": 11884 }, { "epoch": 0.3, "grad_norm": 2.157400608062744, "learning_rate": 1.629866455935413e-05, "loss": 0.5472, "step": 11885 }, { "epoch": 0.3, "grad_norm": 1.4160836935043335, "learning_rate": 1.629801976282621e-05, "loss": 0.5396, "step": 11886 }, { "epoch": 0.3, "grad_norm": 1.6194988489151, "learning_rate": 1.6297374922896298e-05, "loss": 0.5605, "step": 11887 }, { "epoch": 0.3, "grad_norm": 1.3464421033859253, "learning_rate": 1.629673003956883e-05, "loss": 0.4795, "step": 11888 }, { "epoch": 0.3, "grad_norm": 2.2801029682159424, "learning_rate": 1.629608511284826e-05, "loss": 0.6708, "step": 11889 }, { "epoch": 0.3, "grad_norm": 2.6693317890167236, "learning_rate": 1.6295440142739024e-05, "loss": 0.5657, "step": 11890 }, { "epoch": 0.3, "grad_norm": 4.120190143585205, "learning_rate": 1.6294795129245567e-05, "loss": 0.7414, "step": 11891 }, { "epoch": 0.3, "grad_norm": 5.583128452301025, "learning_rate": 1.629415007237234e-05, "loss": 0.6225, "step": 11892 }, { "epoch": 0.3, "grad_norm": 3.63925838470459, "learning_rate": 1.6293504972123786e-05, "loss": 0.4869, "step": 11893 }, { "epoch": 0.3, "grad_norm": 5.048389434814453, "learning_rate": 1.6292859828504348e-05, "loss": 0.8025, "step": 11894 }, { "epoch": 0.3, "grad_norm": 2.127305269241333, "learning_rate": 1.6292214641518473e-05, "loss": 0.5841, "step": 11895 }, { "epoch": 0.3, "grad_norm": 1.6224265098571777, "learning_rate": 1.629156941117061e-05, "loss": 0.65, "step": 11896 }, { "epoch": 0.3, "grad_norm": 1.1694812774658203, "learning_rate": 1.6290924137465203e-05, "loss": 0.6885, "step": 11897 }, { "epoch": 0.3, "grad_norm": 3.553508996963501, "learning_rate": 1.6290278820406696e-05, "loss": 0.6118, "step": 11898 }, { "epoch": 0.3, "grad_norm": 1.80997633934021, "learning_rate": 1.6289633459999542e-05, "loss": 0.5579, "step": 11899 }, { "epoch": 0.31, "grad_norm": 2.9769859313964844, "learning_rate": 1.6288988056248185e-05, "loss": 0.5402, "step": 11900 }, { "epoch": 0.31, "grad_norm": 1.4483894109725952, "learning_rate": 1.6288342609157072e-05, "loss": 0.5892, "step": 11901 }, { "epoch": 0.31, "grad_norm": 1.9221529960632324, "learning_rate": 1.6287697118730656e-05, "loss": 0.861, "step": 11902 }, { "epoch": 0.31, "grad_norm": 5.773678779602051, "learning_rate": 1.628705158497338e-05, "loss": 0.6549, "step": 11903 }, { "epoch": 0.31, "grad_norm": 1.334010362625122, "learning_rate": 1.6286406007889692e-05, "loss": 0.6617, "step": 11904 }, { "epoch": 0.31, "grad_norm": 1.904037356376648, "learning_rate": 1.6285760387484048e-05, "loss": 0.5445, "step": 11905 }, { "epoch": 0.31, "grad_norm": 4.179773807525635, "learning_rate": 1.628511472376089e-05, "loss": 0.5586, "step": 11906 }, { "epoch": 0.31, "grad_norm": 1.4057718515396118, "learning_rate": 1.628446901672467e-05, "loss": 0.5725, "step": 11907 }, { "epoch": 0.31, "grad_norm": 1.2063056230545044, "learning_rate": 1.628382326637984e-05, "loss": 0.6235, "step": 11908 }, { "epoch": 0.31, "grad_norm": 1.8246315717697144, "learning_rate": 1.6283177472730844e-05, "loss": 0.5861, "step": 11909 }, { "epoch": 0.31, "grad_norm": 3.5166590213775635, "learning_rate": 1.628253163578214e-05, "loss": 0.6877, "step": 11910 }, { "epoch": 0.31, "grad_norm": 2.485921621322632, "learning_rate": 1.6281885755538173e-05, "loss": 0.6294, "step": 11911 }, { "epoch": 0.31, "grad_norm": 3.6962320804595947, "learning_rate": 1.6281239832003397e-05, "loss": 0.7277, "step": 11912 }, { "epoch": 0.31, "grad_norm": 4.1081767082214355, "learning_rate": 1.628059386518226e-05, "loss": 0.7264, "step": 11913 }, { "epoch": 0.31, "grad_norm": 2.52054762840271, "learning_rate": 1.627994785507922e-05, "loss": 0.6434, "step": 11914 }, { "epoch": 0.31, "grad_norm": 1.8173511028289795, "learning_rate": 1.6279301801698722e-05, "loss": 0.4507, "step": 11915 }, { "epoch": 0.31, "grad_norm": 2.553868055343628, "learning_rate": 1.627865570504522e-05, "loss": 0.5806, "step": 11916 }, { "epoch": 0.31, "grad_norm": 1.9657888412475586, "learning_rate": 1.627800956512317e-05, "loss": 0.5868, "step": 11917 }, { "epoch": 0.31, "grad_norm": 1.8690407276153564, "learning_rate": 1.6277363381937022e-05, "loss": 0.6993, "step": 11918 }, { "epoch": 0.31, "grad_norm": 5.952195644378662, "learning_rate": 1.6276717155491227e-05, "loss": 0.5949, "step": 11919 }, { "epoch": 0.31, "grad_norm": 3.182751417160034, "learning_rate": 1.6276070885790244e-05, "loss": 0.6755, "step": 11920 }, { "epoch": 0.31, "grad_norm": 2.157205581665039, "learning_rate": 1.6275424572838522e-05, "loss": 0.6513, "step": 11921 }, { "epoch": 0.31, "grad_norm": 1.5318267345428467, "learning_rate": 1.6274778216640513e-05, "loss": 0.4747, "step": 11922 }, { "epoch": 0.31, "grad_norm": 3.257749080657959, "learning_rate": 1.6274131817200683e-05, "loss": 0.7823, "step": 11923 }, { "epoch": 0.31, "grad_norm": 1.968763828277588, "learning_rate": 1.6273485374523473e-05, "loss": 0.6827, "step": 11924 }, { "epoch": 0.31, "grad_norm": 2.858565092086792, "learning_rate": 1.6272838888613342e-05, "loss": 0.7593, "step": 11925 }, { "epoch": 0.31, "grad_norm": 1.9580819606781006, "learning_rate": 1.6272192359474746e-05, "loss": 0.5941, "step": 11926 }, { "epoch": 0.31, "grad_norm": 4.665679454803467, "learning_rate": 1.6271545787112147e-05, "loss": 0.6155, "step": 11927 }, { "epoch": 0.31, "grad_norm": 1.9184019565582275, "learning_rate": 1.6270899171529987e-05, "loss": 0.5255, "step": 11928 }, { "epoch": 0.31, "grad_norm": 2.2942111492156982, "learning_rate": 1.6270252512732737e-05, "loss": 0.5095, "step": 11929 }, { "epoch": 0.31, "grad_norm": 3.1451056003570557, "learning_rate": 1.626960581072484e-05, "loss": 0.6671, "step": 11930 }, { "epoch": 0.31, "grad_norm": 3.4012606143951416, "learning_rate": 1.626895906551076e-05, "loss": 0.7355, "step": 11931 }, { "epoch": 0.31, "grad_norm": 1.717358112335205, "learning_rate": 1.6268312277094954e-05, "loss": 0.6775, "step": 11932 }, { "epoch": 0.31, "grad_norm": 5.224094867706299, "learning_rate": 1.626766544548188e-05, "loss": 0.4756, "step": 11933 }, { "epoch": 0.31, "grad_norm": 1.6750003099441528, "learning_rate": 1.626701857067599e-05, "loss": 0.792, "step": 11934 }, { "epoch": 0.31, "grad_norm": 4.601729869842529, "learning_rate": 1.6266371652681744e-05, "loss": 0.6929, "step": 11935 }, { "epoch": 0.31, "grad_norm": 3.6562280654907227, "learning_rate": 1.6265724691503605e-05, "loss": 0.5935, "step": 11936 }, { "epoch": 0.31, "grad_norm": 3.1714797019958496, "learning_rate": 1.6265077687146024e-05, "loss": 0.5996, "step": 11937 }, { "epoch": 0.31, "grad_norm": 4.577271461486816, "learning_rate": 1.626443063961347e-05, "loss": 0.6407, "step": 11938 }, { "epoch": 0.31, "grad_norm": 2.103888750076294, "learning_rate": 1.6263783548910393e-05, "loss": 0.6909, "step": 11939 }, { "epoch": 0.31, "grad_norm": 5.31804084777832, "learning_rate": 1.626313641504125e-05, "loss": 0.5289, "step": 11940 }, { "epoch": 0.31, "grad_norm": 2.9871718883514404, "learning_rate": 1.6262489238010514e-05, "loss": 0.5942, "step": 11941 }, { "epoch": 0.31, "grad_norm": 1.2708089351654053, "learning_rate": 1.626184201782263e-05, "loss": 0.5363, "step": 11942 }, { "epoch": 0.31, "grad_norm": 4.917883396148682, "learning_rate": 1.6261194754482068e-05, "loss": 0.8265, "step": 11943 }, { "epoch": 0.31, "grad_norm": 3.742032289505005, "learning_rate": 1.6260547447993286e-05, "loss": 0.7491, "step": 11944 }, { "epoch": 0.31, "grad_norm": 4.554431438446045, "learning_rate": 1.6259900098360742e-05, "loss": 0.8042, "step": 11945 }, { "epoch": 0.31, "grad_norm": 1.8010550737380981, "learning_rate": 1.62592527055889e-05, "loss": 0.6368, "step": 11946 }, { "epoch": 0.31, "grad_norm": 2.9211225509643555, "learning_rate": 1.6258605269682223e-05, "loss": 0.5184, "step": 11947 }, { "epoch": 0.31, "grad_norm": 6.505197048187256, "learning_rate": 1.6257957790645166e-05, "loss": 0.6583, "step": 11948 }, { "epoch": 0.31, "grad_norm": 1.2139217853546143, "learning_rate": 1.6257310268482202e-05, "loss": 0.5273, "step": 11949 }, { "epoch": 0.31, "grad_norm": 2.313157081604004, "learning_rate": 1.625666270319778e-05, "loss": 0.6143, "step": 11950 }, { "epoch": 0.31, "grad_norm": 1.5775525569915771, "learning_rate": 1.6256015094796373e-05, "loss": 0.6686, "step": 11951 }, { "epoch": 0.31, "grad_norm": 3.0258352756500244, "learning_rate": 1.625536744328244e-05, "loss": 0.7059, "step": 11952 }, { "epoch": 0.31, "grad_norm": 3.853569269180298, "learning_rate": 1.6254719748660446e-05, "loss": 0.8057, "step": 11953 }, { "epoch": 0.31, "grad_norm": 1.7763923406600952, "learning_rate": 1.625407201093485e-05, "loss": 0.527, "step": 11954 }, { "epoch": 0.31, "grad_norm": 2.2537193298339844, "learning_rate": 1.6253424230110122e-05, "loss": 0.5039, "step": 11955 }, { "epoch": 0.31, "grad_norm": 3.4265289306640625, "learning_rate": 1.6252776406190722e-05, "loss": 0.6485, "step": 11956 }, { "epoch": 0.31, "grad_norm": 1.545055627822876, "learning_rate": 1.6252128539181116e-05, "loss": 0.4151, "step": 11957 }, { "epoch": 0.31, "grad_norm": 2.6942169666290283, "learning_rate": 1.6251480629085766e-05, "loss": 0.7157, "step": 11958 }, { "epoch": 0.31, "grad_norm": 2.5757317543029785, "learning_rate": 1.6250832675909142e-05, "loss": 0.494, "step": 11959 }, { "epoch": 0.31, "grad_norm": 6.062894344329834, "learning_rate": 1.62501846796557e-05, "loss": 0.6673, "step": 11960 }, { "epoch": 0.31, "grad_norm": 1.5416338443756104, "learning_rate": 1.624953664032992e-05, "loss": 0.6378, "step": 11961 }, { "epoch": 0.31, "grad_norm": 1.8814961910247803, "learning_rate": 1.6248888557936255e-05, "loss": 0.5179, "step": 11962 }, { "epoch": 0.31, "grad_norm": 1.80117928981781, "learning_rate": 1.6248240432479177e-05, "loss": 0.5096, "step": 11963 }, { "epoch": 0.31, "grad_norm": 2.167180061340332, "learning_rate": 1.6247592263963152e-05, "loss": 0.5958, "step": 11964 }, { "epoch": 0.31, "grad_norm": 5.410120010375977, "learning_rate": 1.624694405239265e-05, "loss": 0.5134, "step": 11965 }, { "epoch": 0.31, "grad_norm": 2.500455141067505, "learning_rate": 1.6246295797772127e-05, "loss": 0.7512, "step": 11966 }, { "epoch": 0.31, "grad_norm": 1.268049955368042, "learning_rate": 1.624564750010606e-05, "loss": 0.4857, "step": 11967 }, { "epoch": 0.31, "grad_norm": 2.995009422302246, "learning_rate": 1.6244999159398917e-05, "loss": 0.6418, "step": 11968 }, { "epoch": 0.31, "grad_norm": 5.207587242126465, "learning_rate": 1.624435077565516e-05, "loss": 0.6565, "step": 11969 }, { "epoch": 0.31, "grad_norm": 2.9135677814483643, "learning_rate": 1.6243702348879265e-05, "loss": 0.6765, "step": 11970 }, { "epoch": 0.31, "grad_norm": 1.4957172870635986, "learning_rate": 1.624305387907569e-05, "loss": 0.6793, "step": 11971 }, { "epoch": 0.31, "grad_norm": 1.7587445974349976, "learning_rate": 1.6242405366248914e-05, "loss": 0.7295, "step": 11972 }, { "epoch": 0.31, "grad_norm": 1.3804835081100464, "learning_rate": 1.62417568104034e-05, "loss": 0.4605, "step": 11973 }, { "epoch": 0.31, "grad_norm": 5.286097526550293, "learning_rate": 1.6241108211543624e-05, "loss": 0.541, "step": 11974 }, { "epoch": 0.31, "grad_norm": 2.3939263820648193, "learning_rate": 1.6240459569674048e-05, "loss": 0.5821, "step": 11975 }, { "epoch": 0.31, "grad_norm": 1.5938396453857422, "learning_rate": 1.6239810884799142e-05, "loss": 0.5441, "step": 11976 }, { "epoch": 0.31, "grad_norm": 1.111342430114746, "learning_rate": 1.6239162156923385e-05, "loss": 0.5144, "step": 11977 }, { "epoch": 0.31, "grad_norm": 1.733772873878479, "learning_rate": 1.623851338605124e-05, "loss": 0.556, "step": 11978 }, { "epoch": 0.31, "grad_norm": 1.8740341663360596, "learning_rate": 1.6237864572187182e-05, "loss": 0.6679, "step": 11979 }, { "epoch": 0.31, "grad_norm": 2.136655807495117, "learning_rate": 1.6237215715335677e-05, "loss": 0.7466, "step": 11980 }, { "epoch": 0.31, "grad_norm": 1.2624623775482178, "learning_rate": 1.6236566815501204e-05, "loss": 0.3601, "step": 11981 }, { "epoch": 0.31, "grad_norm": 1.911818027496338, "learning_rate": 1.6235917872688225e-05, "loss": 0.5417, "step": 11982 }, { "epoch": 0.31, "grad_norm": 2.348215341567993, "learning_rate": 1.6235268886901222e-05, "loss": 0.6251, "step": 11983 }, { "epoch": 0.31, "grad_norm": 8.706378936767578, "learning_rate": 1.623461985814466e-05, "loss": 0.7176, "step": 11984 }, { "epoch": 0.31, "grad_norm": 2.359001636505127, "learning_rate": 1.623397078642302e-05, "loss": 0.6702, "step": 11985 }, { "epoch": 0.31, "grad_norm": 3.593644618988037, "learning_rate": 1.6233321671740766e-05, "loss": 0.6147, "step": 11986 }, { "epoch": 0.31, "grad_norm": 1.8053404092788696, "learning_rate": 1.623267251410238e-05, "loss": 0.6936, "step": 11987 }, { "epoch": 0.31, "grad_norm": 1.170911431312561, "learning_rate": 1.6232023313512324e-05, "loss": 0.6657, "step": 11988 }, { "epoch": 0.31, "grad_norm": 2.8289966583251953, "learning_rate": 1.623137406997508e-05, "loss": 0.6916, "step": 11989 }, { "epoch": 0.31, "grad_norm": 2.0186102390289307, "learning_rate": 1.6230724783495125e-05, "loss": 0.5272, "step": 11990 }, { "epoch": 0.31, "grad_norm": 4.661313056945801, "learning_rate": 1.623007545407693e-05, "loss": 0.5886, "step": 11991 }, { "epoch": 0.31, "grad_norm": 3.375739336013794, "learning_rate": 1.6229426081724963e-05, "loss": 0.6477, "step": 11992 }, { "epoch": 0.31, "grad_norm": 1.8787285089492798, "learning_rate": 1.6228776666443707e-05, "loss": 0.4923, "step": 11993 }, { "epoch": 0.31, "grad_norm": 3.8998255729675293, "learning_rate": 1.6228127208237638e-05, "loss": 0.5977, "step": 11994 }, { "epoch": 0.31, "grad_norm": 3.3642497062683105, "learning_rate": 1.6227477707111226e-05, "loss": 0.5348, "step": 11995 }, { "epoch": 0.31, "grad_norm": 3.4948689937591553, "learning_rate": 1.6226828163068952e-05, "loss": 0.4549, "step": 11996 }, { "epoch": 0.31, "grad_norm": 2.610199213027954, "learning_rate": 1.622617857611529e-05, "loss": 0.5164, "step": 11997 }, { "epoch": 0.31, "grad_norm": 1.4774612188339233, "learning_rate": 1.6225528946254718e-05, "loss": 0.4642, "step": 11998 }, { "epoch": 0.31, "grad_norm": 1.8161780834197998, "learning_rate": 1.6224879273491713e-05, "loss": 0.6517, "step": 11999 }, { "epoch": 0.31, "grad_norm": 1.3740156888961792, "learning_rate": 1.6224229557830748e-05, "loss": 0.6041, "step": 12000 }, { "epoch": 0.31, "grad_norm": 2.5180795192718506, "learning_rate": 1.6223579799276302e-05, "loss": 0.592, "step": 12001 }, { "epoch": 0.31, "grad_norm": 2.1493418216705322, "learning_rate": 1.6222929997832858e-05, "loss": 0.7357, "step": 12002 }, { "epoch": 0.31, "grad_norm": 8.911270141601562, "learning_rate": 1.622228015350489e-05, "loss": 0.6645, "step": 12003 }, { "epoch": 0.31, "grad_norm": 2.8687686920166016, "learning_rate": 1.6221630266296875e-05, "loss": 0.8061, "step": 12004 }, { "epoch": 0.31, "grad_norm": 2.1160428524017334, "learning_rate": 1.6220980336213294e-05, "loss": 0.603, "step": 12005 }, { "epoch": 0.31, "grad_norm": 5.030689716339111, "learning_rate": 1.622033036325862e-05, "loss": 0.7668, "step": 12006 }, { "epoch": 0.31, "grad_norm": 6.173713684082031, "learning_rate": 1.621968034743734e-05, "loss": 0.6151, "step": 12007 }, { "epoch": 0.31, "grad_norm": 1.2669191360473633, "learning_rate": 1.6219030288753935e-05, "loss": 0.5537, "step": 12008 }, { "epoch": 0.31, "grad_norm": 2.811063766479492, "learning_rate": 1.6218380187212878e-05, "loss": 0.5538, "step": 12009 }, { "epoch": 0.31, "grad_norm": 2.578545093536377, "learning_rate": 1.6217730042818648e-05, "loss": 0.7057, "step": 12010 }, { "epoch": 0.31, "grad_norm": 5.21694278717041, "learning_rate": 1.6217079855575732e-05, "loss": 0.7522, "step": 12011 }, { "epoch": 0.31, "grad_norm": 1.975155234336853, "learning_rate": 1.621642962548861e-05, "loss": 0.6262, "step": 12012 }, { "epoch": 0.31, "grad_norm": 1.7626382112503052, "learning_rate": 1.6215779352561754e-05, "loss": 0.5827, "step": 12013 }, { "epoch": 0.31, "grad_norm": 1.7791976928710938, "learning_rate": 1.6215129036799657e-05, "loss": 0.5699, "step": 12014 }, { "epoch": 0.31, "grad_norm": 2.516771078109741, "learning_rate": 1.6214478678206794e-05, "loss": 0.6397, "step": 12015 }, { "epoch": 0.31, "grad_norm": 3.748993396759033, "learning_rate": 1.6213828276787647e-05, "loss": 0.7974, "step": 12016 }, { "epoch": 0.31, "grad_norm": 1.618190884590149, "learning_rate": 1.62131778325467e-05, "loss": 0.4512, "step": 12017 }, { "epoch": 0.31, "grad_norm": 3.0884580612182617, "learning_rate": 1.6212527345488434e-05, "loss": 0.5555, "step": 12018 }, { "epoch": 0.31, "grad_norm": 4.53738260269165, "learning_rate": 1.6211876815617336e-05, "loss": 0.6547, "step": 12019 }, { "epoch": 0.31, "grad_norm": 2.0658304691314697, "learning_rate": 1.621122624293788e-05, "loss": 0.502, "step": 12020 }, { "epoch": 0.31, "grad_norm": 1.5835777521133423, "learning_rate": 1.6210575627454562e-05, "loss": 0.4456, "step": 12021 }, { "epoch": 0.31, "grad_norm": 1.697807788848877, "learning_rate": 1.6209924969171854e-05, "loss": 0.5264, "step": 12022 }, { "epoch": 0.31, "grad_norm": 1.7715312242507935, "learning_rate": 1.620927426809425e-05, "loss": 0.7375, "step": 12023 }, { "epoch": 0.31, "grad_norm": 1.4181387424468994, "learning_rate": 1.620862352422622e-05, "loss": 0.4758, "step": 12024 }, { "epoch": 0.31, "grad_norm": 5.727085590362549, "learning_rate": 1.6207972737572263e-05, "loss": 0.5495, "step": 12025 }, { "epoch": 0.31, "grad_norm": 2.5256359577178955, "learning_rate": 1.6207321908136858e-05, "loss": 0.5605, "step": 12026 }, { "epoch": 0.31, "grad_norm": 1.9445806741714478, "learning_rate": 1.620667103592449e-05, "loss": 0.6083, "step": 12027 }, { "epoch": 0.31, "grad_norm": 1.8272298574447632, "learning_rate": 1.6206020120939644e-05, "loss": 0.52, "step": 12028 }, { "epoch": 0.31, "grad_norm": 1.9870514869689941, "learning_rate": 1.6205369163186807e-05, "loss": 0.5469, "step": 12029 }, { "epoch": 0.31, "grad_norm": 7.273355484008789, "learning_rate": 1.6204718162670462e-05, "loss": 0.8018, "step": 12030 }, { "epoch": 0.31, "grad_norm": 7.716556072235107, "learning_rate": 1.62040671193951e-05, "loss": 0.6734, "step": 12031 }, { "epoch": 0.31, "grad_norm": 1.5487585067749023, "learning_rate": 1.6203416033365206e-05, "loss": 0.5441, "step": 12032 }, { "epoch": 0.31, "grad_norm": 2.324526071548462, "learning_rate": 1.6202764904585263e-05, "loss": 0.4327, "step": 12033 }, { "epoch": 0.31, "grad_norm": 1.9475034475326538, "learning_rate": 1.6202113733059762e-05, "loss": 0.4332, "step": 12034 }, { "epoch": 0.31, "grad_norm": 5.357659339904785, "learning_rate": 1.620146251879319e-05, "loss": 0.6952, "step": 12035 }, { "epoch": 0.31, "grad_norm": 3.1996877193450928, "learning_rate": 1.6200811261790036e-05, "loss": 0.7182, "step": 12036 }, { "epoch": 0.31, "grad_norm": 1.4605549573898315, "learning_rate": 1.6200159962054786e-05, "loss": 0.5455, "step": 12037 }, { "epoch": 0.31, "grad_norm": 3.0292327404022217, "learning_rate": 1.6199508619591927e-05, "loss": 0.6971, "step": 12038 }, { "epoch": 0.31, "grad_norm": 3.1759278774261475, "learning_rate": 1.619885723440595e-05, "loss": 0.6456, "step": 12039 }, { "epoch": 0.31, "grad_norm": 1.8884090185165405, "learning_rate": 1.6198205806501347e-05, "loss": 0.6291, "step": 12040 }, { "epoch": 0.31, "grad_norm": 1.5767290592193604, "learning_rate": 1.61975543358826e-05, "loss": 0.5357, "step": 12041 }, { "epoch": 0.31, "grad_norm": 3.329169273376465, "learning_rate": 1.6196902822554203e-05, "loss": 0.5555, "step": 12042 }, { "epoch": 0.31, "grad_norm": 2.242349147796631, "learning_rate": 1.6196251266520647e-05, "loss": 0.6966, "step": 12043 }, { "epoch": 0.31, "grad_norm": 2.0585412979125977, "learning_rate": 1.6195599667786415e-05, "loss": 0.636, "step": 12044 }, { "epoch": 0.31, "grad_norm": 9.81251049041748, "learning_rate": 1.6194948026356007e-05, "loss": 0.6746, "step": 12045 }, { "epoch": 0.31, "grad_norm": 1.5836461782455444, "learning_rate": 1.6194296342233905e-05, "loss": 0.5471, "step": 12046 }, { "epoch": 0.31, "grad_norm": 1.509856939315796, "learning_rate": 1.619364461542461e-05, "loss": 0.5292, "step": 12047 }, { "epoch": 0.31, "grad_norm": 1.5589454174041748, "learning_rate": 1.6192992845932604e-05, "loss": 0.6188, "step": 12048 }, { "epoch": 0.31, "grad_norm": 1.8974252939224243, "learning_rate": 1.6192341033762384e-05, "loss": 0.6197, "step": 12049 }, { "epoch": 0.31, "grad_norm": 2.7836334705352783, "learning_rate": 1.619168917891844e-05, "loss": 0.5972, "step": 12050 }, { "epoch": 0.31, "grad_norm": 6.323564052581787, "learning_rate": 1.6191037281405263e-05, "loss": 0.4876, "step": 12051 }, { "epoch": 0.31, "grad_norm": 1.8970855474472046, "learning_rate": 1.6190385341227347e-05, "loss": 0.5098, "step": 12052 }, { "epoch": 0.31, "grad_norm": 1.6019287109375, "learning_rate": 1.618973335838918e-05, "loss": 0.6093, "step": 12053 }, { "epoch": 0.31, "grad_norm": 1.6544933319091797, "learning_rate": 1.6189081332895267e-05, "loss": 0.6035, "step": 12054 }, { "epoch": 0.31, "grad_norm": 1.6104204654693604, "learning_rate": 1.618842926475009e-05, "loss": 0.7676, "step": 12055 }, { "epoch": 0.31, "grad_norm": 1.4328093528747559, "learning_rate": 1.618777715395815e-05, "loss": 0.5687, "step": 12056 }, { "epoch": 0.31, "grad_norm": 1.5074540376663208, "learning_rate": 1.6187125000523932e-05, "loss": 0.59, "step": 12057 }, { "epoch": 0.31, "grad_norm": 19.622806549072266, "learning_rate": 1.6186472804451935e-05, "loss": 0.6497, "step": 12058 }, { "epoch": 0.31, "grad_norm": 1.7767153978347778, "learning_rate": 1.618582056574666e-05, "loss": 0.6726, "step": 12059 }, { "epoch": 0.31, "grad_norm": 1.582058072090149, "learning_rate": 1.6185168284412594e-05, "loss": 0.5811, "step": 12060 }, { "epoch": 0.31, "grad_norm": 2.3880507946014404, "learning_rate": 1.618451596045423e-05, "loss": 0.5891, "step": 12061 }, { "epoch": 0.31, "grad_norm": 2.0096707344055176, "learning_rate": 1.618386359387607e-05, "loss": 0.7338, "step": 12062 }, { "epoch": 0.31, "grad_norm": 1.5687283277511597, "learning_rate": 1.618321118468261e-05, "loss": 0.5642, "step": 12063 }, { "epoch": 0.31, "grad_norm": 1.8648484945297241, "learning_rate": 1.6182558732878342e-05, "loss": 0.5039, "step": 12064 }, { "epoch": 0.31, "grad_norm": 3.5338375568389893, "learning_rate": 1.6181906238467762e-05, "loss": 0.7581, "step": 12065 }, { "epoch": 0.31, "grad_norm": 1.3875479698181152, "learning_rate": 1.618125370145537e-05, "loss": 0.4512, "step": 12066 }, { "epoch": 0.31, "grad_norm": 2.5771305561065674, "learning_rate": 1.6180601121845657e-05, "loss": 0.5447, "step": 12067 }, { "epoch": 0.31, "grad_norm": 1.732740044593811, "learning_rate": 1.6179948499643128e-05, "loss": 0.6689, "step": 12068 }, { "epoch": 0.31, "grad_norm": 4.845135688781738, "learning_rate": 1.6179295834852277e-05, "loss": 0.5844, "step": 12069 }, { "epoch": 0.31, "grad_norm": 2.821824789047241, "learning_rate": 1.6178643127477597e-05, "loss": 0.7779, "step": 12070 }, { "epoch": 0.31, "grad_norm": 1.7621753215789795, "learning_rate": 1.6177990377523593e-05, "loss": 0.4234, "step": 12071 }, { "epoch": 0.31, "grad_norm": 2.0164482593536377, "learning_rate": 1.617733758499476e-05, "loss": 0.5684, "step": 12072 }, { "epoch": 0.31, "grad_norm": 3.0880253314971924, "learning_rate": 1.61766847498956e-05, "loss": 0.8215, "step": 12073 }, { "epoch": 0.31, "grad_norm": 1.5570975542068481, "learning_rate": 1.6176031872230606e-05, "loss": 0.591, "step": 12074 }, { "epoch": 0.31, "grad_norm": 10.149227142333984, "learning_rate": 1.617537895200428e-05, "loss": 0.6891, "step": 12075 }, { "epoch": 0.31, "grad_norm": 2.2929532527923584, "learning_rate": 1.6174725989221124e-05, "loss": 0.6527, "step": 12076 }, { "epoch": 0.31, "grad_norm": 15.632678985595703, "learning_rate": 1.617407298388564e-05, "loss": 0.6234, "step": 12077 }, { "epoch": 0.31, "grad_norm": 1.5943639278411865, "learning_rate": 1.6173419936002315e-05, "loss": 0.5477, "step": 12078 }, { "epoch": 0.31, "grad_norm": 1.2197355031967163, "learning_rate": 1.6172766845575663e-05, "loss": 0.724, "step": 12079 }, { "epoch": 0.31, "grad_norm": 1.5948718786239624, "learning_rate": 1.6172113712610183e-05, "loss": 0.6164, "step": 12080 }, { "epoch": 0.31, "grad_norm": 2.277378797531128, "learning_rate": 1.6171460537110365e-05, "loss": 0.6263, "step": 12081 }, { "epoch": 0.31, "grad_norm": 3.0627167224884033, "learning_rate": 1.6170807319080727e-05, "loss": 0.6183, "step": 12082 }, { "epoch": 0.31, "grad_norm": 5.604319095611572, "learning_rate": 1.6170154058525756e-05, "loss": 0.6656, "step": 12083 }, { "epoch": 0.31, "grad_norm": 10.920841217041016, "learning_rate": 1.6169500755449964e-05, "loss": 0.6233, "step": 12084 }, { "epoch": 0.31, "grad_norm": 4.1349616050720215, "learning_rate": 1.6168847409857846e-05, "loss": 0.8184, "step": 12085 }, { "epoch": 0.31, "grad_norm": 2.4483554363250732, "learning_rate": 1.6168194021753906e-05, "loss": 0.6365, "step": 12086 }, { "epoch": 0.31, "grad_norm": 1.8252432346343994, "learning_rate": 1.616754059114265e-05, "loss": 0.5539, "step": 12087 }, { "epoch": 0.31, "grad_norm": 2.4796504974365234, "learning_rate": 1.616688711802858e-05, "loss": 0.6958, "step": 12088 }, { "epoch": 0.31, "grad_norm": 2.808635950088501, "learning_rate": 1.6166233602416196e-05, "loss": 0.4904, "step": 12089 }, { "epoch": 0.31, "grad_norm": 1.7761714458465576, "learning_rate": 1.6165580044310006e-05, "loss": 0.6066, "step": 12090 }, { "epoch": 0.31, "grad_norm": 3.388312816619873, "learning_rate": 1.616492644371451e-05, "loss": 0.8131, "step": 12091 }, { "epoch": 0.31, "grad_norm": 2.179400682449341, "learning_rate": 1.6164272800634218e-05, "loss": 0.5883, "step": 12092 }, { "epoch": 0.31, "grad_norm": 3.682020664215088, "learning_rate": 1.6163619115073625e-05, "loss": 0.6341, "step": 12093 }, { "epoch": 0.31, "grad_norm": 1.7500441074371338, "learning_rate": 1.6162965387037247e-05, "loss": 0.5463, "step": 12094 }, { "epoch": 0.31, "grad_norm": 1.436561942100525, "learning_rate": 1.616231161652958e-05, "loss": 0.4954, "step": 12095 }, { "epoch": 0.31, "grad_norm": 1.5689265727996826, "learning_rate": 1.6161657803555133e-05, "loss": 0.53, "step": 12096 }, { "epoch": 0.31, "grad_norm": 3.3153445720672607, "learning_rate": 1.6161003948118413e-05, "loss": 0.6373, "step": 12097 }, { "epoch": 0.31, "grad_norm": 1.523423671722412, "learning_rate": 1.6160350050223923e-05, "loss": 0.6571, "step": 12098 }, { "epoch": 0.31, "grad_norm": 4.454174041748047, "learning_rate": 1.615969610987617e-05, "loss": 0.6187, "step": 12099 }, { "epoch": 0.31, "grad_norm": 2.048727035522461, "learning_rate": 1.6159042127079666e-05, "loss": 0.5212, "step": 12100 }, { "epoch": 0.31, "grad_norm": 5.5302510261535645, "learning_rate": 1.6158388101838908e-05, "loss": 0.8158, "step": 12101 }, { "epoch": 0.31, "grad_norm": 4.907585620880127, "learning_rate": 1.6157734034158412e-05, "loss": 0.6198, "step": 12102 }, { "epoch": 0.31, "grad_norm": 1.0800641775131226, "learning_rate": 1.615707992404268e-05, "loss": 0.5631, "step": 12103 }, { "epoch": 0.31, "grad_norm": 1.0901378393173218, "learning_rate": 1.615642577149622e-05, "loss": 0.5635, "step": 12104 }, { "epoch": 0.31, "grad_norm": 1.389396071434021, "learning_rate": 1.6155771576523546e-05, "loss": 0.645, "step": 12105 }, { "epoch": 0.31, "grad_norm": 1.2452127933502197, "learning_rate": 1.6155117339129156e-05, "loss": 0.5508, "step": 12106 }, { "epoch": 0.31, "grad_norm": 2.171837091445923, "learning_rate": 1.615446305931757e-05, "loss": 0.7542, "step": 12107 }, { "epoch": 0.31, "grad_norm": 1.7032135725021362, "learning_rate": 1.6153808737093285e-05, "loss": 0.5628, "step": 12108 }, { "epoch": 0.31, "grad_norm": 2.508772850036621, "learning_rate": 1.615315437246082e-05, "loss": 0.6837, "step": 12109 }, { "epoch": 0.31, "grad_norm": 1.2552541494369507, "learning_rate": 1.6152499965424683e-05, "loss": 0.6459, "step": 12110 }, { "epoch": 0.31, "grad_norm": 2.4140665531158447, "learning_rate": 1.6151845515989378e-05, "loss": 0.7226, "step": 12111 }, { "epoch": 0.31, "grad_norm": 1.1368703842163086, "learning_rate": 1.615119102415942e-05, "loss": 0.695, "step": 12112 }, { "epoch": 0.31, "grad_norm": 2.060713529586792, "learning_rate": 1.6150536489939316e-05, "loss": 0.5868, "step": 12113 }, { "epoch": 0.31, "grad_norm": 4.547066688537598, "learning_rate": 1.614988191333358e-05, "loss": 0.86, "step": 12114 }, { "epoch": 0.31, "grad_norm": 1.4361966848373413, "learning_rate": 1.6149227294346722e-05, "loss": 0.5537, "step": 12115 }, { "epoch": 0.31, "grad_norm": 2.1685802936553955, "learning_rate": 1.6148572632983252e-05, "loss": 0.5987, "step": 12116 }, { "epoch": 0.31, "grad_norm": 1.572309970855713, "learning_rate": 1.6147917929247684e-05, "loss": 0.5539, "step": 12117 }, { "epoch": 0.31, "grad_norm": 1.4798455238342285, "learning_rate": 1.6147263183144528e-05, "loss": 0.4886, "step": 12118 }, { "epoch": 0.31, "grad_norm": 1.8346456289291382, "learning_rate": 1.6146608394678296e-05, "loss": 0.6895, "step": 12119 }, { "epoch": 0.31, "grad_norm": 2.050485372543335, "learning_rate": 1.61459535638535e-05, "loss": 0.6031, "step": 12120 }, { "epoch": 0.31, "grad_norm": 1.9597562551498413, "learning_rate": 1.6145298690674654e-05, "loss": 0.6155, "step": 12121 }, { "epoch": 0.31, "grad_norm": 2.02703857421875, "learning_rate": 1.614464377514627e-05, "loss": 0.7139, "step": 12122 }, { "epoch": 0.31, "grad_norm": 2.3256778717041016, "learning_rate": 1.614398881727286e-05, "loss": 0.7773, "step": 12123 }, { "epoch": 0.31, "grad_norm": 2.2996926307678223, "learning_rate": 1.614333381705894e-05, "loss": 0.6319, "step": 12124 }, { "epoch": 0.31, "grad_norm": 1.5333577394485474, "learning_rate": 1.6142678774509024e-05, "loss": 0.6522, "step": 12125 }, { "epoch": 0.31, "grad_norm": 1.5565398931503296, "learning_rate": 1.6142023689627624e-05, "loss": 0.5827, "step": 12126 }, { "epoch": 0.31, "grad_norm": 1.1774828433990479, "learning_rate": 1.614136856241926e-05, "loss": 0.5876, "step": 12127 }, { "epoch": 0.31, "grad_norm": 2.8388259410858154, "learning_rate": 1.6140713392888435e-05, "loss": 0.5531, "step": 12128 }, { "epoch": 0.31, "grad_norm": 4.04442024230957, "learning_rate": 1.6140058181039676e-05, "loss": 0.6036, "step": 12129 }, { "epoch": 0.31, "grad_norm": 4.092720031738281, "learning_rate": 1.613940292687749e-05, "loss": 0.7693, "step": 12130 }, { "epoch": 0.31, "grad_norm": 2.4123375415802, "learning_rate": 1.6138747630406396e-05, "loss": 0.6277, "step": 12131 }, { "epoch": 0.31, "grad_norm": 1.9462798833847046, "learning_rate": 1.6138092291630915e-05, "loss": 0.6617, "step": 12132 }, { "epoch": 0.31, "grad_norm": 1.5427316427230835, "learning_rate": 1.6137436910555554e-05, "loss": 0.7176, "step": 12133 }, { "epoch": 0.31, "grad_norm": 2.434813976287842, "learning_rate": 1.6136781487184836e-05, "loss": 0.6121, "step": 12134 }, { "epoch": 0.31, "grad_norm": 1.8218015432357788, "learning_rate": 1.6136126021523274e-05, "loss": 0.4775, "step": 12135 }, { "epoch": 0.31, "grad_norm": 8.119818687438965, "learning_rate": 1.6135470513575388e-05, "loss": 0.8255, "step": 12136 }, { "epoch": 0.31, "grad_norm": 3.1812572479248047, "learning_rate": 1.6134814963345694e-05, "loss": 0.7852, "step": 12137 }, { "epoch": 0.31, "grad_norm": 2.033525228500366, "learning_rate": 1.613415937083871e-05, "loss": 0.5755, "step": 12138 }, { "epoch": 0.31, "grad_norm": 2.7294089794158936, "learning_rate": 1.6133503736058948e-05, "loss": 0.6404, "step": 12139 }, { "epoch": 0.31, "grad_norm": 2.3575446605682373, "learning_rate": 1.6132848059010936e-05, "loss": 0.694, "step": 12140 }, { "epoch": 0.31, "grad_norm": 3.263251781463623, "learning_rate": 1.613219233969919e-05, "loss": 0.387, "step": 12141 }, { "epoch": 0.31, "grad_norm": 1.8932108879089355, "learning_rate": 1.6131536578128225e-05, "loss": 0.487, "step": 12142 }, { "epoch": 0.31, "grad_norm": 1.148364543914795, "learning_rate": 1.6130880774302565e-05, "loss": 0.7222, "step": 12143 }, { "epoch": 0.31, "grad_norm": 1.3400883674621582, "learning_rate": 1.613022492822672e-05, "loss": 0.4608, "step": 12144 }, { "epoch": 0.31, "grad_norm": 1.8358176946640015, "learning_rate": 1.612956903990522e-05, "loss": 0.5355, "step": 12145 }, { "epoch": 0.31, "grad_norm": 3.325577735900879, "learning_rate": 1.6128913109342585e-05, "loss": 0.7603, "step": 12146 }, { "epoch": 0.31, "grad_norm": 2.4348268508911133, "learning_rate": 1.6128257136543326e-05, "loss": 0.7432, "step": 12147 }, { "epoch": 0.31, "grad_norm": 3.9855120182037354, "learning_rate": 1.6127601121511973e-05, "loss": 0.8363, "step": 12148 }, { "epoch": 0.31, "grad_norm": 1.4962579011917114, "learning_rate": 1.612694506425304e-05, "loss": 0.6642, "step": 12149 }, { "epoch": 0.31, "grad_norm": 2.559966564178467, "learning_rate": 1.612628896477105e-05, "loss": 0.5477, "step": 12150 }, { "epoch": 0.31, "grad_norm": 2.7160589694976807, "learning_rate": 1.612563282307053e-05, "loss": 0.6385, "step": 12151 }, { "epoch": 0.31, "grad_norm": 1.5840582847595215, "learning_rate": 1.6124976639155988e-05, "loss": 0.541, "step": 12152 }, { "epoch": 0.31, "grad_norm": 2.1701712608337402, "learning_rate": 1.6124320413031963e-05, "loss": 0.6168, "step": 12153 }, { "epoch": 0.31, "grad_norm": 2.2027337551116943, "learning_rate": 1.6123664144702966e-05, "loss": 0.5796, "step": 12154 }, { "epoch": 0.31, "grad_norm": 2.8794543743133545, "learning_rate": 1.6123007834173524e-05, "loss": 0.7183, "step": 12155 }, { "epoch": 0.31, "grad_norm": 2.750795841217041, "learning_rate": 1.612235148144816e-05, "loss": 0.5341, "step": 12156 }, { "epoch": 0.31, "grad_norm": 2.102222204208374, "learning_rate": 1.6121695086531393e-05, "loss": 0.5796, "step": 12157 }, { "epoch": 0.31, "grad_norm": 1.624824047088623, "learning_rate": 1.612103864942775e-05, "loss": 0.5304, "step": 12158 }, { "epoch": 0.31, "grad_norm": 8.04460334777832, "learning_rate": 1.6120382170141756e-05, "loss": 0.7804, "step": 12159 }, { "epoch": 0.31, "grad_norm": 1.2999581098556519, "learning_rate": 1.6119725648677935e-05, "loss": 0.5823, "step": 12160 }, { "epoch": 0.31, "grad_norm": 4.129842281341553, "learning_rate": 1.6119069085040808e-05, "loss": 0.6617, "step": 12161 }, { "epoch": 0.31, "grad_norm": 8.169415473937988, "learning_rate": 1.6118412479234898e-05, "loss": 0.583, "step": 12162 }, { "epoch": 0.31, "grad_norm": 2.8198208808898926, "learning_rate": 1.6117755831264738e-05, "loss": 0.6459, "step": 12163 }, { "epoch": 0.31, "grad_norm": 2.110302448272705, "learning_rate": 1.6117099141134848e-05, "loss": 0.6228, "step": 12164 }, { "epoch": 0.31, "grad_norm": 2.75071382522583, "learning_rate": 1.6116442408849755e-05, "loss": 0.6164, "step": 12165 }, { "epoch": 0.31, "grad_norm": 2.0414700508117676, "learning_rate": 1.6115785634413977e-05, "loss": 0.5401, "step": 12166 }, { "epoch": 0.31, "grad_norm": 1.7307393550872803, "learning_rate": 1.611512881783205e-05, "loss": 0.6567, "step": 12167 }, { "epoch": 0.31, "grad_norm": 2.0049967765808105, "learning_rate": 1.6114471959108498e-05, "loss": 0.7416, "step": 12168 }, { "epoch": 0.31, "grad_norm": 3.2621450424194336, "learning_rate": 1.6113815058247846e-05, "loss": 0.7039, "step": 12169 }, { "epoch": 0.31, "grad_norm": 1.5465258359909058, "learning_rate": 1.6113158115254623e-05, "loss": 0.6006, "step": 12170 }, { "epoch": 0.31, "grad_norm": 1.8934391736984253, "learning_rate": 1.6112501130133353e-05, "loss": 0.4823, "step": 12171 }, { "epoch": 0.31, "grad_norm": 1.3335554599761963, "learning_rate": 1.6111844102888568e-05, "loss": 0.6079, "step": 12172 }, { "epoch": 0.31, "grad_norm": 1.647479772567749, "learning_rate": 1.6111187033524792e-05, "loss": 0.547, "step": 12173 }, { "epoch": 0.31, "grad_norm": 2.219078540802002, "learning_rate": 1.6110529922046555e-05, "loss": 0.61, "step": 12174 }, { "epoch": 0.31, "grad_norm": 1.4198888540267944, "learning_rate": 1.6109872768458383e-05, "loss": 0.6124, "step": 12175 }, { "epoch": 0.31, "grad_norm": 10.620206832885742, "learning_rate": 1.610921557276481e-05, "loss": 0.6031, "step": 12176 }, { "epoch": 0.31, "grad_norm": 1.4653785228729248, "learning_rate": 1.6108558334970355e-05, "loss": 0.5926, "step": 12177 }, { "epoch": 0.31, "grad_norm": 6.085151195526123, "learning_rate": 1.610790105507956e-05, "loss": 0.6789, "step": 12178 }, { "epoch": 0.31, "grad_norm": 1.8300495147705078, "learning_rate": 1.6107243733096944e-05, "loss": 0.5626, "step": 12179 }, { "epoch": 0.31, "grad_norm": 1.9317967891693115, "learning_rate": 1.6106586369027045e-05, "loss": 0.5778, "step": 12180 }, { "epoch": 0.31, "grad_norm": 1.8149418830871582, "learning_rate": 1.6105928962874387e-05, "loss": 0.6467, "step": 12181 }, { "epoch": 0.31, "grad_norm": 2.82161021232605, "learning_rate": 1.6105271514643504e-05, "loss": 0.6021, "step": 12182 }, { "epoch": 0.31, "grad_norm": 1.6472219228744507, "learning_rate": 1.6104614024338926e-05, "loss": 0.4208, "step": 12183 }, { "epoch": 0.31, "grad_norm": 5.364432334899902, "learning_rate": 1.610395649196518e-05, "loss": 0.5955, "step": 12184 }, { "epoch": 0.31, "grad_norm": 1.2216789722442627, "learning_rate": 1.61032989175268e-05, "loss": 0.523, "step": 12185 }, { "epoch": 0.31, "grad_norm": 2.0476698875427246, "learning_rate": 1.6102641301028323e-05, "loss": 0.7673, "step": 12186 }, { "epoch": 0.31, "grad_norm": 3.194027900695801, "learning_rate": 1.6101983642474273e-05, "loss": 0.6585, "step": 12187 }, { "epoch": 0.31, "grad_norm": 4.643954753875732, "learning_rate": 1.6101325941869185e-05, "loss": 0.7183, "step": 12188 }, { "epoch": 0.31, "grad_norm": 1.7242625951766968, "learning_rate": 1.6100668199217598e-05, "loss": 0.525, "step": 12189 }, { "epoch": 0.31, "grad_norm": 6.089824199676514, "learning_rate": 1.610001041452403e-05, "loss": 0.5942, "step": 12190 }, { "epoch": 0.31, "grad_norm": 7.2827558517456055, "learning_rate": 1.6099352587793027e-05, "loss": 0.4957, "step": 12191 }, { "epoch": 0.31, "grad_norm": 4.266315460205078, "learning_rate": 1.6098694719029117e-05, "loss": 0.5432, "step": 12192 }, { "epoch": 0.31, "grad_norm": 4.784697532653809, "learning_rate": 1.6098036808236835e-05, "loss": 0.73, "step": 12193 }, { "epoch": 0.31, "grad_norm": 1.878753423690796, "learning_rate": 1.6097378855420717e-05, "loss": 0.6116, "step": 12194 }, { "epoch": 0.31, "grad_norm": 1.8567583560943604, "learning_rate": 1.609672086058529e-05, "loss": 0.5118, "step": 12195 }, { "epoch": 0.31, "grad_norm": 1.323791742324829, "learning_rate": 1.6096062823735095e-05, "loss": 0.5967, "step": 12196 }, { "epoch": 0.31, "grad_norm": 7.004481792449951, "learning_rate": 1.6095404744874668e-05, "loss": 0.5517, "step": 12197 }, { "epoch": 0.31, "grad_norm": 2.1748929023742676, "learning_rate": 1.6094746624008538e-05, "loss": 0.5275, "step": 12198 }, { "epoch": 0.31, "grad_norm": 2.653493881225586, "learning_rate": 1.6094088461141243e-05, "loss": 0.6318, "step": 12199 }, { "epoch": 0.31, "grad_norm": 2.0956296920776367, "learning_rate": 1.609343025627732e-05, "loss": 0.5474, "step": 12200 }, { "epoch": 0.31, "grad_norm": 1.5493491888046265, "learning_rate": 1.6092772009421303e-05, "loss": 0.6053, "step": 12201 }, { "epoch": 0.31, "grad_norm": 2.4595186710357666, "learning_rate": 1.6092113720577732e-05, "loss": 0.5926, "step": 12202 }, { "epoch": 0.31, "grad_norm": 2.853574514389038, "learning_rate": 1.609145538975114e-05, "loss": 0.6829, "step": 12203 }, { "epoch": 0.31, "grad_norm": 1.610432505607605, "learning_rate": 1.609079701694606e-05, "loss": 0.4893, "step": 12204 }, { "epoch": 0.31, "grad_norm": 2.43687105178833, "learning_rate": 1.609013860216704e-05, "loss": 0.6377, "step": 12205 }, { "epoch": 0.31, "grad_norm": 1.932060718536377, "learning_rate": 1.6089480145418607e-05, "loss": 0.6387, "step": 12206 }, { "epoch": 0.31, "grad_norm": 2.313138961791992, "learning_rate": 1.6088821646705303e-05, "loss": 0.5693, "step": 12207 }, { "epoch": 0.31, "grad_norm": 1.7883094549179077, "learning_rate": 1.6088163106031664e-05, "loss": 0.5375, "step": 12208 }, { "epoch": 0.31, "grad_norm": 1.661623477935791, "learning_rate": 1.6087504523402233e-05, "loss": 0.7461, "step": 12209 }, { "epoch": 0.31, "grad_norm": 3.271143674850464, "learning_rate": 1.608684589882154e-05, "loss": 0.6579, "step": 12210 }, { "epoch": 0.31, "grad_norm": 2.71244740486145, "learning_rate": 1.6086187232294137e-05, "loss": 0.7283, "step": 12211 }, { "epoch": 0.31, "grad_norm": 2.2911155223846436, "learning_rate": 1.6085528523824548e-05, "loss": 0.5893, "step": 12212 }, { "epoch": 0.31, "grad_norm": 2.2608931064605713, "learning_rate": 1.6084869773417325e-05, "loss": 0.5899, "step": 12213 }, { "epoch": 0.31, "grad_norm": 1.360839605331421, "learning_rate": 1.6084210981077e-05, "loss": 0.5781, "step": 12214 }, { "epoch": 0.31, "grad_norm": 1.7688393592834473, "learning_rate": 1.6083552146808118e-05, "loss": 0.7261, "step": 12215 }, { "epoch": 0.31, "grad_norm": 2.4056828022003174, "learning_rate": 1.6082893270615215e-05, "loss": 0.5645, "step": 12216 }, { "epoch": 0.31, "grad_norm": 2.594947338104248, "learning_rate": 1.6082234352502834e-05, "loss": 0.718, "step": 12217 }, { "epoch": 0.31, "grad_norm": 2.9640843868255615, "learning_rate": 1.608157539247551e-05, "loss": 0.9061, "step": 12218 }, { "epoch": 0.31, "grad_norm": 3.233132839202881, "learning_rate": 1.6080916390537792e-05, "loss": 0.73, "step": 12219 }, { "epoch": 0.31, "grad_norm": 4.556036472320557, "learning_rate": 1.6080257346694222e-05, "loss": 0.7872, "step": 12220 }, { "epoch": 0.31, "grad_norm": 1.7660380601882935, "learning_rate": 1.6079598260949335e-05, "loss": 0.7235, "step": 12221 }, { "epoch": 0.31, "grad_norm": 3.9549407958984375, "learning_rate": 1.6078939133307678e-05, "loss": 0.6812, "step": 12222 }, { "epoch": 0.31, "grad_norm": 1.1130192279815674, "learning_rate": 1.6078279963773794e-05, "loss": 0.6356, "step": 12223 }, { "epoch": 0.31, "grad_norm": 1.936341643333435, "learning_rate": 1.607762075235222e-05, "loss": 0.7082, "step": 12224 }, { "epoch": 0.31, "grad_norm": 19.13199806213379, "learning_rate": 1.60769614990475e-05, "loss": 0.4973, "step": 12225 }, { "epoch": 0.31, "grad_norm": 12.863142967224121, "learning_rate": 1.6076302203864184e-05, "loss": 0.6448, "step": 12226 }, { "epoch": 0.31, "grad_norm": 6.135601043701172, "learning_rate": 1.6075642866806807e-05, "loss": 0.6749, "step": 12227 }, { "epoch": 0.31, "grad_norm": 2.2195920944213867, "learning_rate": 1.607498348787992e-05, "loss": 0.5702, "step": 12228 }, { "epoch": 0.31, "grad_norm": 2.3549745082855225, "learning_rate": 1.607432406708806e-05, "loss": 0.6454, "step": 12229 }, { "epoch": 0.31, "grad_norm": 2.233395576477051, "learning_rate": 1.6073664604435775e-05, "loss": 0.6533, "step": 12230 }, { "epoch": 0.31, "grad_norm": 1.2472604513168335, "learning_rate": 1.607300509992761e-05, "loss": 0.5577, "step": 12231 }, { "epoch": 0.31, "grad_norm": 5.091346740722656, "learning_rate": 1.6072345553568115e-05, "loss": 0.5552, "step": 12232 }, { "epoch": 0.31, "grad_norm": 2.1769328117370605, "learning_rate": 1.607168596536182e-05, "loss": 0.6249, "step": 12233 }, { "epoch": 0.31, "grad_norm": 4.0329270362854, "learning_rate": 1.6071026335313286e-05, "loss": 0.5462, "step": 12234 }, { "epoch": 0.31, "grad_norm": 1.1712002754211426, "learning_rate": 1.607036666342705e-05, "loss": 0.4973, "step": 12235 }, { "epoch": 0.31, "grad_norm": 2.6762828826904297, "learning_rate": 1.6069706949707665e-05, "loss": 0.5096, "step": 12236 }, { "epoch": 0.31, "grad_norm": 6.198853969573975, "learning_rate": 1.6069047194159667e-05, "loss": 0.6988, "step": 12237 }, { "epoch": 0.31, "grad_norm": 4.778260231018066, "learning_rate": 1.606838739678761e-05, "loss": 0.5218, "step": 12238 }, { "epoch": 0.31, "grad_norm": 2.9943227767944336, "learning_rate": 1.6067727557596042e-05, "loss": 0.5925, "step": 12239 }, { "epoch": 0.31, "grad_norm": 2.322355270385742, "learning_rate": 1.606706767658951e-05, "loss": 0.5536, "step": 12240 }, { "epoch": 0.31, "grad_norm": 1.5061664581298828, "learning_rate": 1.6066407753772555e-05, "loss": 0.5632, "step": 12241 }, { "epoch": 0.31, "grad_norm": 1.713728666305542, "learning_rate": 1.606574778914973e-05, "loss": 0.6996, "step": 12242 }, { "epoch": 0.31, "grad_norm": 3.646390914916992, "learning_rate": 1.6065087782725583e-05, "loss": 0.7645, "step": 12243 }, { "epoch": 0.31, "grad_norm": 1.0319128036499023, "learning_rate": 1.606442773450466e-05, "loss": 0.5993, "step": 12244 }, { "epoch": 0.31, "grad_norm": 4.987636566162109, "learning_rate": 1.6063767644491513e-05, "loss": 0.5815, "step": 12245 }, { "epoch": 0.31, "grad_norm": 4.5949201583862305, "learning_rate": 1.6063107512690687e-05, "loss": 0.7496, "step": 12246 }, { "epoch": 0.31, "grad_norm": 1.6954454183578491, "learning_rate": 1.6062447339106736e-05, "loss": 0.7261, "step": 12247 }, { "epoch": 0.31, "grad_norm": 4.268557071685791, "learning_rate": 1.6061787123744202e-05, "loss": 0.5442, "step": 12248 }, { "epoch": 0.31, "grad_norm": 1.7138265371322632, "learning_rate": 1.6061126866607644e-05, "loss": 0.5833, "step": 12249 }, { "epoch": 0.31, "grad_norm": 8.151646614074707, "learning_rate": 1.6060466567701607e-05, "loss": 0.7013, "step": 12250 }, { "epoch": 0.31, "grad_norm": 8.323813438415527, "learning_rate": 1.6059806227030642e-05, "loss": 0.5571, "step": 12251 }, { "epoch": 0.31, "grad_norm": 1.3472594022750854, "learning_rate": 1.6059145844599296e-05, "loss": 0.667, "step": 12252 }, { "epoch": 0.31, "grad_norm": 2.106187343597412, "learning_rate": 1.605848542041213e-05, "loss": 0.5993, "step": 12253 }, { "epoch": 0.31, "grad_norm": 1.5635929107666016, "learning_rate": 1.6057824954473682e-05, "loss": 0.6724, "step": 12254 }, { "epoch": 0.31, "grad_norm": 3.6058974266052246, "learning_rate": 1.6057164446788515e-05, "loss": 0.5125, "step": 12255 }, { "epoch": 0.31, "grad_norm": 1.4849679470062256, "learning_rate": 1.6056503897361174e-05, "loss": 0.4563, "step": 12256 }, { "epoch": 0.31, "grad_norm": 2.5900609493255615, "learning_rate": 1.6055843306196214e-05, "loss": 0.8004, "step": 12257 }, { "epoch": 0.31, "grad_norm": 1.7349623441696167, "learning_rate": 1.6055182673298185e-05, "loss": 0.7999, "step": 12258 }, { "epoch": 0.31, "grad_norm": 1.2883538007736206, "learning_rate": 1.6054521998671643e-05, "loss": 0.6326, "step": 12259 }, { "epoch": 0.31, "grad_norm": 1.6073858737945557, "learning_rate": 1.605386128232114e-05, "loss": 0.7552, "step": 12260 }, { "epoch": 0.31, "grad_norm": 1.87626051902771, "learning_rate": 1.6053200524251223e-05, "loss": 0.6162, "step": 12261 }, { "epoch": 0.31, "grad_norm": 5.793854236602783, "learning_rate": 1.605253972446646e-05, "loss": 0.8479, "step": 12262 }, { "epoch": 0.31, "grad_norm": 2.685803174972534, "learning_rate": 1.605187888297139e-05, "loss": 0.6055, "step": 12263 }, { "epoch": 0.31, "grad_norm": 1.5936007499694824, "learning_rate": 1.605121799977057e-05, "loss": 0.4504, "step": 12264 }, { "epoch": 0.31, "grad_norm": 1.1670098304748535, "learning_rate": 1.6050557074868565e-05, "loss": 0.4041, "step": 12265 }, { "epoch": 0.31, "grad_norm": 3.712526798248291, "learning_rate": 1.6049896108269916e-05, "loss": 0.5865, "step": 12266 }, { "epoch": 0.31, "grad_norm": 9.779470443725586, "learning_rate": 1.6049235099979186e-05, "loss": 0.6714, "step": 12267 }, { "epoch": 0.31, "grad_norm": 2.637948989868164, "learning_rate": 1.604857405000093e-05, "loss": 0.7135, "step": 12268 }, { "epoch": 0.31, "grad_norm": 1.5508090257644653, "learning_rate": 1.60479129583397e-05, "loss": 0.619, "step": 12269 }, { "epoch": 0.31, "grad_norm": 3.329019546508789, "learning_rate": 1.6047251825000056e-05, "loss": 0.7169, "step": 12270 }, { "epoch": 0.31, "grad_norm": 2.709306001663208, "learning_rate": 1.604659064998655e-05, "loss": 0.8104, "step": 12271 }, { "epoch": 0.31, "grad_norm": 5.02471923828125, "learning_rate": 1.6045929433303737e-05, "loss": 0.7511, "step": 12272 }, { "epoch": 0.31, "grad_norm": 3.0483689308166504, "learning_rate": 1.604526817495618e-05, "loss": 0.9173, "step": 12273 }, { "epoch": 0.31, "grad_norm": 1.6355938911437988, "learning_rate": 1.604460687494843e-05, "loss": 0.4203, "step": 12274 }, { "epoch": 0.31, "grad_norm": 4.263720989227295, "learning_rate": 1.604394553328505e-05, "loss": 0.6023, "step": 12275 }, { "epoch": 0.31, "grad_norm": 1.8265427350997925, "learning_rate": 1.6043284149970594e-05, "loss": 0.5995, "step": 12276 }, { "epoch": 0.31, "grad_norm": 1.8944371938705444, "learning_rate": 1.604262272500962e-05, "loss": 0.5549, "step": 12277 }, { "epoch": 0.31, "grad_norm": 1.241002082824707, "learning_rate": 1.6041961258406686e-05, "loss": 0.5256, "step": 12278 }, { "epoch": 0.31, "grad_norm": 1.0173680782318115, "learning_rate": 1.604129975016635e-05, "loss": 0.6412, "step": 12279 }, { "epoch": 0.31, "grad_norm": 8.637712478637695, "learning_rate": 1.6040638200293176e-05, "loss": 0.6017, "step": 12280 }, { "epoch": 0.31, "grad_norm": 2.9101107120513916, "learning_rate": 1.603997660879171e-05, "loss": 0.5063, "step": 12281 }, { "epoch": 0.31, "grad_norm": 1.7283415794372559, "learning_rate": 1.6039314975666527e-05, "loss": 0.6531, "step": 12282 }, { "epoch": 0.31, "grad_norm": 2.311213731765747, "learning_rate": 1.603865330092218e-05, "loss": 0.58, "step": 12283 }, { "epoch": 0.31, "grad_norm": 3.3592422008514404, "learning_rate": 1.6037991584563224e-05, "loss": 0.5064, "step": 12284 }, { "epoch": 0.31, "grad_norm": 1.2894840240478516, "learning_rate": 1.6037329826594228e-05, "loss": 0.5731, "step": 12285 }, { "epoch": 0.31, "grad_norm": 9.221358299255371, "learning_rate": 1.6036668027019745e-05, "loss": 0.5804, "step": 12286 }, { "epoch": 0.31, "grad_norm": 2.8524436950683594, "learning_rate": 1.6036006185844338e-05, "loss": 0.6397, "step": 12287 }, { "epoch": 0.31, "grad_norm": 3.7938296794891357, "learning_rate": 1.603534430307257e-05, "loss": 0.6452, "step": 12288 }, { "epoch": 0.31, "grad_norm": 11.102874755859375, "learning_rate": 1.6034682378708998e-05, "loss": 0.945, "step": 12289 }, { "epoch": 0.31, "grad_norm": 1.3783639669418335, "learning_rate": 1.603402041275819e-05, "loss": 0.5447, "step": 12290 }, { "epoch": 0.32, "grad_norm": 2.48193097114563, "learning_rate": 1.6033358405224705e-05, "loss": 0.5245, "step": 12291 }, { "epoch": 0.32, "grad_norm": 2.544668197631836, "learning_rate": 1.60326963561131e-05, "loss": 0.5669, "step": 12292 }, { "epoch": 0.32, "grad_norm": 6.604290962219238, "learning_rate": 1.6032034265427947e-05, "loss": 0.5758, "step": 12293 }, { "epoch": 0.32, "grad_norm": 3.068098306655884, "learning_rate": 1.60313721331738e-05, "loss": 0.5178, "step": 12294 }, { "epoch": 0.32, "grad_norm": 1.4248167276382446, "learning_rate": 1.6030709959355225e-05, "loss": 0.4451, "step": 12295 }, { "epoch": 0.32, "grad_norm": 3.3901498317718506, "learning_rate": 1.603004774397679e-05, "loss": 0.6761, "step": 12296 }, { "epoch": 0.32, "grad_norm": 5.202439308166504, "learning_rate": 1.6029385487043052e-05, "loss": 0.7566, "step": 12297 }, { "epoch": 0.32, "grad_norm": 1.527434229850769, "learning_rate": 1.6028723188558574e-05, "loss": 0.6992, "step": 12298 }, { "epoch": 0.32, "grad_norm": 2.888139486312866, "learning_rate": 1.6028060848527927e-05, "loss": 0.8007, "step": 12299 }, { "epoch": 0.32, "grad_norm": 4.786111831665039, "learning_rate": 1.6027398466955674e-05, "loss": 0.5341, "step": 12300 }, { "epoch": 0.32, "grad_norm": 1.2750906944274902, "learning_rate": 1.6026736043846375e-05, "loss": 0.5882, "step": 12301 }, { "epoch": 0.32, "grad_norm": 2.44881534576416, "learning_rate": 1.6026073579204594e-05, "loss": 0.6463, "step": 12302 }, { "epoch": 0.32, "grad_norm": 2.1974940299987793, "learning_rate": 1.6025411073034902e-05, "loss": 0.6155, "step": 12303 }, { "epoch": 0.32, "grad_norm": 1.3195998668670654, "learning_rate": 1.6024748525341866e-05, "loss": 0.6058, "step": 12304 }, { "epoch": 0.32, "grad_norm": 1.9763262271881104, "learning_rate": 1.6024085936130046e-05, "loss": 0.6453, "step": 12305 }, { "epoch": 0.32, "grad_norm": 1.6443709135055542, "learning_rate": 1.602342330540401e-05, "loss": 0.5821, "step": 12306 }, { "epoch": 0.32, "grad_norm": 1.0316356420516968, "learning_rate": 1.6022760633168325e-05, "loss": 0.583, "step": 12307 }, { "epoch": 0.32, "grad_norm": 1.6242456436157227, "learning_rate": 1.6022097919427558e-05, "loss": 0.6629, "step": 12308 }, { "epoch": 0.32, "grad_norm": 1.0958945751190186, "learning_rate": 1.6021435164186276e-05, "loss": 0.4145, "step": 12309 }, { "epoch": 0.32, "grad_norm": 2.4250288009643555, "learning_rate": 1.6020772367449043e-05, "loss": 0.5889, "step": 12310 }, { "epoch": 0.32, "grad_norm": 1.807237148284912, "learning_rate": 1.602010952922043e-05, "loss": 0.6078, "step": 12311 }, { "epoch": 0.32, "grad_norm": 1.7965140342712402, "learning_rate": 1.6019446649505007e-05, "loss": 0.6116, "step": 12312 }, { "epoch": 0.32, "grad_norm": 6.4144110679626465, "learning_rate": 1.6018783728307338e-05, "loss": 0.8599, "step": 12313 }, { "epoch": 0.32, "grad_norm": 1.816601276397705, "learning_rate": 1.6018120765631993e-05, "loss": 0.6197, "step": 12314 }, { "epoch": 0.32, "grad_norm": 1.5566805601119995, "learning_rate": 1.6017457761483544e-05, "loss": 0.7675, "step": 12315 }, { "epoch": 0.32, "grad_norm": 1.1958450078964233, "learning_rate": 1.6016794715866554e-05, "loss": 0.5098, "step": 12316 }, { "epoch": 0.32, "grad_norm": 1.6705472469329834, "learning_rate": 1.6016131628785593e-05, "loss": 0.7178, "step": 12317 }, { "epoch": 0.32, "grad_norm": 1.6339268684387207, "learning_rate": 1.6015468500245233e-05, "loss": 0.4542, "step": 12318 }, { "epoch": 0.32, "grad_norm": 1.8907392024993896, "learning_rate": 1.6014805330250047e-05, "loss": 0.6047, "step": 12319 }, { "epoch": 0.32, "grad_norm": 7.756803512573242, "learning_rate": 1.6014142118804595e-05, "loss": 0.5408, "step": 12320 }, { "epoch": 0.32, "grad_norm": 1.3105417490005493, "learning_rate": 1.601347886591346e-05, "loss": 0.4847, "step": 12321 }, { "epoch": 0.32, "grad_norm": 3.3110458850860596, "learning_rate": 1.6012815571581205e-05, "loss": 0.5436, "step": 12322 }, { "epoch": 0.32, "grad_norm": 2.1657278537750244, "learning_rate": 1.60121522358124e-05, "loss": 0.5832, "step": 12323 }, { "epoch": 0.32, "grad_norm": 5.710997581481934, "learning_rate": 1.601148885861162e-05, "loss": 0.7722, "step": 12324 }, { "epoch": 0.32, "grad_norm": 2.3266613483428955, "learning_rate": 1.601082543998344e-05, "loss": 0.7954, "step": 12325 }, { "epoch": 0.32, "grad_norm": 11.878747940063477, "learning_rate": 1.6010161979932425e-05, "loss": 0.6586, "step": 12326 }, { "epoch": 0.32, "grad_norm": 1.6874890327453613, "learning_rate": 1.600949847846315e-05, "loss": 0.6282, "step": 12327 }, { "epoch": 0.32, "grad_norm": 1.3783973455429077, "learning_rate": 1.600883493558018e-05, "loss": 0.5922, "step": 12328 }, { "epoch": 0.32, "grad_norm": 2.3039960861206055, "learning_rate": 1.6008171351288105e-05, "loss": 0.5531, "step": 12329 }, { "epoch": 0.32, "grad_norm": 2.425647735595703, "learning_rate": 1.6007507725591484e-05, "loss": 0.6084, "step": 12330 }, { "epoch": 0.32, "grad_norm": 1.4667917490005493, "learning_rate": 1.6006844058494894e-05, "loss": 0.7245, "step": 12331 }, { "epoch": 0.32, "grad_norm": 1.7428466081619263, "learning_rate": 1.6006180350002906e-05, "loss": 0.7003, "step": 12332 }, { "epoch": 0.32, "grad_norm": 1.401991605758667, "learning_rate": 1.60055166001201e-05, "loss": 0.5383, "step": 12333 }, { "epoch": 0.32, "grad_norm": 1.3577018976211548, "learning_rate": 1.6004852808851045e-05, "loss": 0.5856, "step": 12334 }, { "epoch": 0.32, "grad_norm": 2.631143808364868, "learning_rate": 1.6004188976200317e-05, "loss": 0.6319, "step": 12335 }, { "epoch": 0.32, "grad_norm": 3.9778223037719727, "learning_rate": 1.600352510217249e-05, "loss": 0.5795, "step": 12336 }, { "epoch": 0.32, "grad_norm": 1.7736961841583252, "learning_rate": 1.600286118677214e-05, "loss": 0.6099, "step": 12337 }, { "epoch": 0.32, "grad_norm": 1.2396939992904663, "learning_rate": 1.6002197230003844e-05, "loss": 0.536, "step": 12338 }, { "epoch": 0.32, "grad_norm": 1.3291162252426147, "learning_rate": 1.6001533231872175e-05, "loss": 0.5497, "step": 12339 }, { "epoch": 0.32, "grad_norm": 1.1783863306045532, "learning_rate": 1.6000869192381707e-05, "loss": 0.597, "step": 12340 }, { "epoch": 0.32, "grad_norm": 1.2061151266098022, "learning_rate": 1.6000205111537024e-05, "loss": 0.5557, "step": 12341 }, { "epoch": 0.32, "grad_norm": 2.8301784992218018, "learning_rate": 1.5999540989342693e-05, "loss": 0.7216, "step": 12342 }, { "epoch": 0.32, "grad_norm": 1.8393032550811768, "learning_rate": 1.5998876825803294e-05, "loss": 0.5645, "step": 12343 }, { "epoch": 0.32, "grad_norm": 2.1318817138671875, "learning_rate": 1.599821262092341e-05, "loss": 0.6627, "step": 12344 }, { "epoch": 0.32, "grad_norm": 1.530228614807129, "learning_rate": 1.5997548374707608e-05, "loss": 0.6015, "step": 12345 }, { "epoch": 0.32, "grad_norm": 7.166741371154785, "learning_rate": 1.599688408716047e-05, "loss": 0.728, "step": 12346 }, { "epoch": 0.32, "grad_norm": 1.9765044450759888, "learning_rate": 1.5996219758286574e-05, "loss": 0.6133, "step": 12347 }, { "epoch": 0.32, "grad_norm": 1.0963172912597656, "learning_rate": 1.5995555388090503e-05, "loss": 0.5402, "step": 12348 }, { "epoch": 0.32, "grad_norm": 1.518776297569275, "learning_rate": 1.5994890976576832e-05, "loss": 0.5788, "step": 12349 }, { "epoch": 0.32, "grad_norm": 12.942167282104492, "learning_rate": 1.5994226523750132e-05, "loss": 0.7092, "step": 12350 }, { "epoch": 0.32, "grad_norm": 2.2490122318267822, "learning_rate": 1.5993562029614994e-05, "loss": 0.7047, "step": 12351 }, { "epoch": 0.32, "grad_norm": 5.256515979766846, "learning_rate": 1.5992897494175988e-05, "loss": 0.5212, "step": 12352 }, { "epoch": 0.32, "grad_norm": 3.0510470867156982, "learning_rate": 1.5992232917437706e-05, "loss": 0.5895, "step": 12353 }, { "epoch": 0.32, "grad_norm": 1.7322492599487305, "learning_rate": 1.599156829940471e-05, "loss": 0.5687, "step": 12354 }, { "epoch": 0.32, "grad_norm": 9.40259838104248, "learning_rate": 1.5990903640081593e-05, "loss": 0.6502, "step": 12355 }, { "epoch": 0.32, "grad_norm": 2.861058473587036, "learning_rate": 1.5990238939472935e-05, "loss": 0.6198, "step": 12356 }, { "epoch": 0.32, "grad_norm": 5.24873685836792, "learning_rate": 1.598957419758331e-05, "loss": 0.7289, "step": 12357 }, { "epoch": 0.32, "grad_norm": 2.4698739051818848, "learning_rate": 1.5988909414417303e-05, "loss": 0.7353, "step": 12358 }, { "epoch": 0.32, "grad_norm": 3.2349135875701904, "learning_rate": 1.5988244589979496e-05, "loss": 0.6797, "step": 12359 }, { "epoch": 0.32, "grad_norm": 4.323082447052002, "learning_rate": 1.5987579724274466e-05, "loss": 0.6924, "step": 12360 }, { "epoch": 0.32, "grad_norm": 4.621750354766846, "learning_rate": 1.5986914817306803e-05, "loss": 0.4393, "step": 12361 }, { "epoch": 0.32, "grad_norm": 1.6639354228973389, "learning_rate": 1.5986249869081083e-05, "loss": 0.6527, "step": 12362 }, { "epoch": 0.32, "grad_norm": 2.126845121383667, "learning_rate": 1.598558487960189e-05, "loss": 0.4373, "step": 12363 }, { "epoch": 0.32, "grad_norm": 3.527930974960327, "learning_rate": 1.5984919848873806e-05, "loss": 0.6307, "step": 12364 }, { "epoch": 0.32, "grad_norm": 2.873732328414917, "learning_rate": 1.5984254776901414e-05, "loss": 0.5496, "step": 12365 }, { "epoch": 0.32, "grad_norm": 1.8122221231460571, "learning_rate": 1.59835896636893e-05, "loss": 0.6021, "step": 12366 }, { "epoch": 0.32, "grad_norm": 2.9995107650756836, "learning_rate": 1.598292450924204e-05, "loss": 0.5601, "step": 12367 }, { "epoch": 0.32, "grad_norm": 2.9048256874084473, "learning_rate": 1.598225931356423e-05, "loss": 0.6532, "step": 12368 }, { "epoch": 0.32, "grad_norm": 3.5411477088928223, "learning_rate": 1.598159407666044e-05, "loss": 0.6573, "step": 12369 }, { "epoch": 0.32, "grad_norm": 1.7483558654785156, "learning_rate": 1.5980928798535267e-05, "loss": 0.6232, "step": 12370 }, { "epoch": 0.32, "grad_norm": 3.476478099822998, "learning_rate": 1.598026347919329e-05, "loss": 0.9005, "step": 12371 }, { "epoch": 0.32, "grad_norm": 1.597926139831543, "learning_rate": 1.5979598118639094e-05, "loss": 0.5911, "step": 12372 }, { "epoch": 0.32, "grad_norm": 1.230365514755249, "learning_rate": 1.5978932716877263e-05, "loss": 0.474, "step": 12373 }, { "epoch": 0.32, "grad_norm": 1.8255751132965088, "learning_rate": 1.5978267273912385e-05, "loss": 0.5986, "step": 12374 }, { "epoch": 0.32, "grad_norm": 1.5586192607879639, "learning_rate": 1.5977601789749045e-05, "loss": 0.5612, "step": 12375 }, { "epoch": 0.32, "grad_norm": 6.241675853729248, "learning_rate": 1.5976936264391825e-05, "loss": 0.5447, "step": 12376 }, { "epoch": 0.32, "grad_norm": 3.4410691261291504, "learning_rate": 1.5976270697845318e-05, "loss": 0.5807, "step": 12377 }, { "epoch": 0.32, "grad_norm": 2.2238001823425293, "learning_rate": 1.597560509011411e-05, "loss": 0.7377, "step": 12378 }, { "epoch": 0.32, "grad_norm": 3.62542462348938, "learning_rate": 1.5974939441202784e-05, "loss": 0.6238, "step": 12379 }, { "epoch": 0.32, "grad_norm": 3.6916093826293945, "learning_rate": 1.5974273751115932e-05, "loss": 0.5915, "step": 12380 }, { "epoch": 0.32, "grad_norm": 1.7829439640045166, "learning_rate": 1.5973608019858136e-05, "loss": 0.6494, "step": 12381 }, { "epoch": 0.32, "grad_norm": 1.456761360168457, "learning_rate": 1.597294224743399e-05, "loss": 0.6226, "step": 12382 }, { "epoch": 0.32, "grad_norm": 1.9319686889648438, "learning_rate": 1.5972276433848074e-05, "loss": 0.6551, "step": 12383 }, { "epoch": 0.32, "grad_norm": 6.392002105712891, "learning_rate": 1.5971610579104983e-05, "loss": 0.5826, "step": 12384 }, { "epoch": 0.32, "grad_norm": 1.1645469665527344, "learning_rate": 1.5970944683209305e-05, "loss": 0.4616, "step": 12385 }, { "epoch": 0.32, "grad_norm": 1.2497438192367554, "learning_rate": 1.5970278746165623e-05, "loss": 0.5411, "step": 12386 }, { "epoch": 0.32, "grad_norm": 2.3936285972595215, "learning_rate": 1.5969612767978535e-05, "loss": 0.5335, "step": 12387 }, { "epoch": 0.32, "grad_norm": 1.14206862449646, "learning_rate": 1.5968946748652623e-05, "loss": 0.6398, "step": 12388 }, { "epoch": 0.32, "grad_norm": 7.900638103485107, "learning_rate": 1.5968280688192483e-05, "loss": 0.6568, "step": 12389 }, { "epoch": 0.32, "grad_norm": 1.9395641088485718, "learning_rate": 1.59676145866027e-05, "loss": 0.6932, "step": 12390 }, { "epoch": 0.32, "grad_norm": 1.3332704305648804, "learning_rate": 1.596694844388787e-05, "loss": 0.5613, "step": 12391 }, { "epoch": 0.32, "grad_norm": 7.1894450187683105, "learning_rate": 1.596628226005258e-05, "loss": 0.7044, "step": 12392 }, { "epoch": 0.32, "grad_norm": 1.782941222190857, "learning_rate": 1.596561603510142e-05, "loss": 0.7135, "step": 12393 }, { "epoch": 0.32, "grad_norm": 1.0926460027694702, "learning_rate": 1.5964949769038978e-05, "loss": 0.5314, "step": 12394 }, { "epoch": 0.32, "grad_norm": 1.2635440826416016, "learning_rate": 1.5964283461869854e-05, "loss": 0.5434, "step": 12395 }, { "epoch": 0.32, "grad_norm": 2.242626190185547, "learning_rate": 1.5963617113598635e-05, "loss": 0.6207, "step": 12396 }, { "epoch": 0.32, "grad_norm": 1.1838476657867432, "learning_rate": 1.5962950724229913e-05, "loss": 0.4743, "step": 12397 }, { "epoch": 0.32, "grad_norm": 4.34743070602417, "learning_rate": 1.596228429376828e-05, "loss": 0.5633, "step": 12398 }, { "epoch": 0.32, "grad_norm": 3.735081911087036, "learning_rate": 1.596161782221833e-05, "loss": 0.7734, "step": 12399 }, { "epoch": 0.32, "grad_norm": 2.310397148132324, "learning_rate": 1.5960951309584655e-05, "loss": 0.6597, "step": 12400 }, { "epoch": 0.32, "grad_norm": 2.3164875507354736, "learning_rate": 1.596028475587185e-05, "loss": 0.6868, "step": 12401 }, { "epoch": 0.32, "grad_norm": 1.5332742929458618, "learning_rate": 1.5959618161084506e-05, "loss": 0.6037, "step": 12402 }, { "epoch": 0.32, "grad_norm": 2.9372289180755615, "learning_rate": 1.5958951525227218e-05, "loss": 0.7023, "step": 12403 }, { "epoch": 0.32, "grad_norm": 1.882468342781067, "learning_rate": 1.5958284848304583e-05, "loss": 0.617, "step": 12404 }, { "epoch": 0.32, "grad_norm": 4.917450428009033, "learning_rate": 1.5957618130321186e-05, "loss": 0.4891, "step": 12405 }, { "epoch": 0.32, "grad_norm": 1.9182586669921875, "learning_rate": 1.595695137128163e-05, "loss": 0.5435, "step": 12406 }, { "epoch": 0.32, "grad_norm": 4.564478397369385, "learning_rate": 1.5956284571190513e-05, "loss": 0.4802, "step": 12407 }, { "epoch": 0.32, "grad_norm": 3.650383949279785, "learning_rate": 1.595561773005242e-05, "loss": 0.5202, "step": 12408 }, { "epoch": 0.32, "grad_norm": 1.9750741720199585, "learning_rate": 1.5954950847871952e-05, "loss": 0.6963, "step": 12409 }, { "epoch": 0.32, "grad_norm": 1.9208650588989258, "learning_rate": 1.5954283924653702e-05, "loss": 0.5999, "step": 12410 }, { "epoch": 0.32, "grad_norm": 2.221813678741455, "learning_rate": 1.5953616960402273e-05, "loss": 0.5226, "step": 12411 }, { "epoch": 0.32, "grad_norm": 4.234320163726807, "learning_rate": 1.595294995512225e-05, "loss": 0.5967, "step": 12412 }, { "epoch": 0.32, "grad_norm": 2.276228904724121, "learning_rate": 1.595228290881824e-05, "loss": 0.6938, "step": 12413 }, { "epoch": 0.32, "grad_norm": 3.4044182300567627, "learning_rate": 1.5951615821494835e-05, "loss": 0.7136, "step": 12414 }, { "epoch": 0.32, "grad_norm": 5.964245319366455, "learning_rate": 1.5950948693156634e-05, "loss": 0.7323, "step": 12415 }, { "epoch": 0.32, "grad_norm": 1.646161437034607, "learning_rate": 1.5950281523808234e-05, "loss": 0.686, "step": 12416 }, { "epoch": 0.32, "grad_norm": 2.174104928970337, "learning_rate": 1.5949614313454228e-05, "loss": 0.6716, "step": 12417 }, { "epoch": 0.32, "grad_norm": 2.058062791824341, "learning_rate": 1.5948947062099222e-05, "loss": 0.5927, "step": 12418 }, { "epoch": 0.32, "grad_norm": 2.93247389793396, "learning_rate": 1.5948279769747808e-05, "loss": 0.6279, "step": 12419 }, { "epoch": 0.32, "grad_norm": 1.8561248779296875, "learning_rate": 1.5947612436404587e-05, "loss": 0.7479, "step": 12420 }, { "epoch": 0.32, "grad_norm": 2.1393706798553467, "learning_rate": 1.594694506207416e-05, "loss": 0.621, "step": 12421 }, { "epoch": 0.32, "grad_norm": 12.643290519714355, "learning_rate": 1.5946277646761125e-05, "loss": 0.8269, "step": 12422 }, { "epoch": 0.32, "grad_norm": 1.5234673023223877, "learning_rate": 1.5945610190470072e-05, "loss": 0.431, "step": 12423 }, { "epoch": 0.32, "grad_norm": 5.729968547821045, "learning_rate": 1.5944942693205617e-05, "loss": 0.5672, "step": 12424 }, { "epoch": 0.32, "grad_norm": 5.5838704109191895, "learning_rate": 1.594427515497235e-05, "loss": 0.5313, "step": 12425 }, { "epoch": 0.32, "grad_norm": 1.1776785850524902, "learning_rate": 1.594360757577487e-05, "loss": 0.6302, "step": 12426 }, { "epoch": 0.32, "grad_norm": 2.432687997817993, "learning_rate": 1.5942939955617783e-05, "loss": 0.7809, "step": 12427 }, { "epoch": 0.32, "grad_norm": 4.4375457763671875, "learning_rate": 1.594227229450569e-05, "loss": 0.4528, "step": 12428 }, { "epoch": 0.32, "grad_norm": 1.8229459524154663, "learning_rate": 1.5941604592443186e-05, "loss": 0.5596, "step": 12429 }, { "epoch": 0.32, "grad_norm": 2.719372272491455, "learning_rate": 1.594093684943488e-05, "loss": 0.659, "step": 12430 }, { "epoch": 0.32, "grad_norm": 1.9477450847625732, "learning_rate": 1.5940269065485366e-05, "loss": 0.6914, "step": 12431 }, { "epoch": 0.32, "grad_norm": 2.0511722564697266, "learning_rate": 1.5939601240599253e-05, "loss": 0.7657, "step": 12432 }, { "epoch": 0.32, "grad_norm": 1.7385144233703613, "learning_rate": 1.5938933374781136e-05, "loss": 0.5508, "step": 12433 }, { "epoch": 0.32, "grad_norm": 1.5137301683425903, "learning_rate": 1.5938265468035625e-05, "loss": 0.6336, "step": 12434 }, { "epoch": 0.32, "grad_norm": 1.5781389474868774, "learning_rate": 1.5937597520367315e-05, "loss": 0.5918, "step": 12435 }, { "epoch": 0.32, "grad_norm": 1.4678040742874146, "learning_rate": 1.593692953178082e-05, "loss": 0.5516, "step": 12436 }, { "epoch": 0.32, "grad_norm": 2.0224545001983643, "learning_rate": 1.593626150228073e-05, "loss": 0.5844, "step": 12437 }, { "epoch": 0.32, "grad_norm": 2.9077436923980713, "learning_rate": 1.593559343187166e-05, "loss": 0.6004, "step": 12438 }, { "epoch": 0.32, "grad_norm": 2.2343697547912598, "learning_rate": 1.5934925320558203e-05, "loss": 0.613, "step": 12439 }, { "epoch": 0.32, "grad_norm": 1.481019139289856, "learning_rate": 1.5934257168344976e-05, "loss": 0.53, "step": 12440 }, { "epoch": 0.32, "grad_norm": 2.4310693740844727, "learning_rate": 1.5933588975236574e-05, "loss": 0.7908, "step": 12441 }, { "epoch": 0.32, "grad_norm": 5.7153825759887695, "learning_rate": 1.5932920741237606e-05, "loss": 0.5738, "step": 12442 }, { "epoch": 0.32, "grad_norm": 1.7106072902679443, "learning_rate": 1.593225246635267e-05, "loss": 0.595, "step": 12443 }, { "epoch": 0.32, "grad_norm": 1.3885055780410767, "learning_rate": 1.5931584150586383e-05, "loss": 0.5772, "step": 12444 }, { "epoch": 0.32, "grad_norm": 2.2625997066497803, "learning_rate": 1.5930915793943343e-05, "loss": 0.6133, "step": 12445 }, { "epoch": 0.32, "grad_norm": 3.159632682800293, "learning_rate": 1.5930247396428157e-05, "loss": 0.4836, "step": 12446 }, { "epoch": 0.32, "grad_norm": 3.7245936393737793, "learning_rate": 1.592957895804543e-05, "loss": 0.699, "step": 12447 }, { "epoch": 0.32, "grad_norm": 2.089524030685425, "learning_rate": 1.5928910478799776e-05, "loss": 0.7158, "step": 12448 }, { "epoch": 0.32, "grad_norm": 2.5012764930725098, "learning_rate": 1.5928241958695793e-05, "loss": 0.4215, "step": 12449 }, { "epoch": 0.32, "grad_norm": 1.1800358295440674, "learning_rate": 1.592757339773809e-05, "loss": 0.6428, "step": 12450 }, { "epoch": 0.32, "grad_norm": 3.049177885055542, "learning_rate": 1.5926904795931273e-05, "loss": 0.7188, "step": 12451 }, { "epoch": 0.32, "grad_norm": 2.209660768508911, "learning_rate": 1.5926236153279955e-05, "loss": 0.7543, "step": 12452 }, { "epoch": 0.32, "grad_norm": 1.3678922653198242, "learning_rate": 1.592556746978874e-05, "loss": 0.606, "step": 12453 }, { "epoch": 0.32, "grad_norm": 2.092646598815918, "learning_rate": 1.5924898745462237e-05, "loss": 0.7888, "step": 12454 }, { "epoch": 0.32, "grad_norm": 2.4704227447509766, "learning_rate": 1.5924229980305052e-05, "loss": 0.6972, "step": 12455 }, { "epoch": 0.32, "grad_norm": 2.0763444900512695, "learning_rate": 1.5923561174321797e-05, "loss": 0.5218, "step": 12456 }, { "epoch": 0.32, "grad_norm": 1.7526313066482544, "learning_rate": 1.5922892327517084e-05, "loss": 0.6368, "step": 12457 }, { "epoch": 0.32, "grad_norm": 1.5867445468902588, "learning_rate": 1.5922223439895514e-05, "loss": 0.5711, "step": 12458 }, { "epoch": 0.32, "grad_norm": 1.8172332048416138, "learning_rate": 1.59215545114617e-05, "loss": 0.5213, "step": 12459 }, { "epoch": 0.32, "grad_norm": 2.6808886528015137, "learning_rate": 1.5920885542220255e-05, "loss": 0.7323, "step": 12460 }, { "epoch": 0.32, "grad_norm": 3.772981643676758, "learning_rate": 1.5920216532175785e-05, "loss": 0.5104, "step": 12461 }, { "epoch": 0.32, "grad_norm": 4.311972618103027, "learning_rate": 1.5919547481332904e-05, "loss": 0.6848, "step": 12462 }, { "epoch": 0.32, "grad_norm": 2.3346261978149414, "learning_rate": 1.591887838969622e-05, "loss": 0.5453, "step": 12463 }, { "epoch": 0.32, "grad_norm": 10.76407241821289, "learning_rate": 1.5918209257270344e-05, "loss": 0.6437, "step": 12464 }, { "epoch": 0.32, "grad_norm": 1.681143879890442, "learning_rate": 1.591754008405989e-05, "loss": 0.5532, "step": 12465 }, { "epoch": 0.32, "grad_norm": 1.7003129720687866, "learning_rate": 1.5916870870069466e-05, "loss": 0.4623, "step": 12466 }, { "epoch": 0.32, "grad_norm": 1.7136274576187134, "learning_rate": 1.5916201615303684e-05, "loss": 0.6578, "step": 12467 }, { "epoch": 0.32, "grad_norm": 1.8392335176467896, "learning_rate": 1.591553231976716e-05, "loss": 0.6465, "step": 12468 }, { "epoch": 0.32, "grad_norm": 8.211278915405273, "learning_rate": 1.5914862983464502e-05, "loss": 0.6335, "step": 12469 }, { "epoch": 0.32, "grad_norm": 11.630391120910645, "learning_rate": 1.5914193606400325e-05, "loss": 0.601, "step": 12470 }, { "epoch": 0.32, "grad_norm": 1.56362783908844, "learning_rate": 1.5913524188579243e-05, "loss": 0.7325, "step": 12471 }, { "epoch": 0.32, "grad_norm": 6.9648661613464355, "learning_rate": 1.5912854730005864e-05, "loss": 0.7384, "step": 12472 }, { "epoch": 0.32, "grad_norm": 3.5021135807037354, "learning_rate": 1.5912185230684804e-05, "loss": 0.4957, "step": 12473 }, { "epoch": 0.32, "grad_norm": 1.770880103111267, "learning_rate": 1.5911515690620683e-05, "loss": 0.5756, "step": 12474 }, { "epoch": 0.32, "grad_norm": 1.4672966003417969, "learning_rate": 1.5910846109818106e-05, "loss": 0.4763, "step": 12475 }, { "epoch": 0.32, "grad_norm": 1.566848635673523, "learning_rate": 1.5910176488281694e-05, "loss": 0.5692, "step": 12476 }, { "epoch": 0.32, "grad_norm": 1.976302981376648, "learning_rate": 1.5909506826016055e-05, "loss": 0.6931, "step": 12477 }, { "epoch": 0.32, "grad_norm": 2.423440933227539, "learning_rate": 1.590883712302581e-05, "loss": 0.4636, "step": 12478 }, { "epoch": 0.32, "grad_norm": 1.8540494441986084, "learning_rate": 1.590816737931557e-05, "loss": 0.6357, "step": 12479 }, { "epoch": 0.32, "grad_norm": 1.8569473028182983, "learning_rate": 1.5907497594889956e-05, "loss": 0.6887, "step": 12480 }, { "epoch": 0.32, "grad_norm": 1.9834827184677124, "learning_rate": 1.590682776975358e-05, "loss": 0.4831, "step": 12481 }, { "epoch": 0.32, "grad_norm": 4.04394006729126, "learning_rate": 1.5906157903911056e-05, "loss": 0.5945, "step": 12482 }, { "epoch": 0.32, "grad_norm": 2.17950177192688, "learning_rate": 1.5905487997367e-05, "loss": 0.4855, "step": 12483 }, { "epoch": 0.32, "grad_norm": 3.3260657787323, "learning_rate": 1.5904818050126034e-05, "loss": 0.647, "step": 12484 }, { "epoch": 0.32, "grad_norm": 7.916005611419678, "learning_rate": 1.590414806219277e-05, "loss": 0.5688, "step": 12485 }, { "epoch": 0.32, "grad_norm": 1.84055757522583, "learning_rate": 1.590347803357183e-05, "loss": 0.5069, "step": 12486 }, { "epoch": 0.32, "grad_norm": 1.2359869480133057, "learning_rate": 1.590280796426783e-05, "loss": 0.5466, "step": 12487 }, { "epoch": 0.32, "grad_norm": 2.3184826374053955, "learning_rate": 1.590213785428538e-05, "loss": 0.7384, "step": 12488 }, { "epoch": 0.32, "grad_norm": 1.7534866333007812, "learning_rate": 1.5901467703629107e-05, "loss": 0.5019, "step": 12489 }, { "epoch": 0.32, "grad_norm": 1.6328623294830322, "learning_rate": 1.5900797512303623e-05, "loss": 0.6331, "step": 12490 }, { "epoch": 0.32, "grad_norm": 3.0598456859588623, "learning_rate": 1.5900127280313554e-05, "loss": 0.6915, "step": 12491 }, { "epoch": 0.32, "grad_norm": 1.628247857093811, "learning_rate": 1.5899457007663515e-05, "loss": 0.5893, "step": 12492 }, { "epoch": 0.32, "grad_norm": 1.3107402324676514, "learning_rate": 1.589878669435812e-05, "loss": 0.4749, "step": 12493 }, { "epoch": 0.32, "grad_norm": 1.6876094341278076, "learning_rate": 1.5898116340401996e-05, "loss": 0.6661, "step": 12494 }, { "epoch": 0.32, "grad_norm": 6.154672145843506, "learning_rate": 1.589744594579976e-05, "loss": 0.6538, "step": 12495 }, { "epoch": 0.32, "grad_norm": 1.2867281436920166, "learning_rate": 1.589677551055603e-05, "loss": 0.634, "step": 12496 }, { "epoch": 0.32, "grad_norm": 2.570765495300293, "learning_rate": 1.589610503467543e-05, "loss": 0.6616, "step": 12497 }, { "epoch": 0.32, "grad_norm": 1.5611398220062256, "learning_rate": 1.5895434518162576e-05, "loss": 0.5221, "step": 12498 }, { "epoch": 0.32, "grad_norm": 1.6210291385650635, "learning_rate": 1.5894763961022096e-05, "loss": 0.6233, "step": 12499 }, { "epoch": 0.32, "grad_norm": 1.5429357290267944, "learning_rate": 1.5894093363258602e-05, "loss": 0.5163, "step": 12500 }, { "epoch": 0.32, "grad_norm": 0.9076051115989685, "learning_rate": 1.589342272487672e-05, "loss": 0.4902, "step": 12501 }, { "epoch": 0.32, "grad_norm": 1.3071790933609009, "learning_rate": 1.589275204588107e-05, "loss": 0.5898, "step": 12502 }, { "epoch": 0.32, "grad_norm": 3.1539018154144287, "learning_rate": 1.589208132627628e-05, "loss": 0.7003, "step": 12503 }, { "epoch": 0.32, "grad_norm": 8.366153717041016, "learning_rate": 1.5891410566066964e-05, "loss": 0.588, "step": 12504 }, { "epoch": 0.32, "grad_norm": 1.3685226440429688, "learning_rate": 1.5890739765257747e-05, "loss": 0.4899, "step": 12505 }, { "epoch": 0.32, "grad_norm": 3.2384626865386963, "learning_rate": 1.5890068923853253e-05, "loss": 0.6684, "step": 12506 }, { "epoch": 0.32, "grad_norm": 5.782877445220947, "learning_rate": 1.5889398041858107e-05, "loss": 0.4222, "step": 12507 }, { "epoch": 0.32, "grad_norm": 3.6022844314575195, "learning_rate": 1.5888727119276926e-05, "loss": 0.7852, "step": 12508 }, { "epoch": 0.32, "grad_norm": 1.6334105730056763, "learning_rate": 1.5888056156114336e-05, "loss": 0.57, "step": 12509 }, { "epoch": 0.32, "grad_norm": 1.5494014024734497, "learning_rate": 1.5887385152374968e-05, "loss": 0.6128, "step": 12510 }, { "epoch": 0.32, "grad_norm": 4.0030131340026855, "learning_rate": 1.588671410806344e-05, "loss": 0.8482, "step": 12511 }, { "epoch": 0.32, "grad_norm": 1.509390115737915, "learning_rate": 1.5886043023184374e-05, "loss": 0.6577, "step": 12512 }, { "epoch": 0.32, "grad_norm": 1.4061470031738281, "learning_rate": 1.5885371897742397e-05, "loss": 0.5355, "step": 12513 }, { "epoch": 0.32, "grad_norm": 1.7006608247756958, "learning_rate": 1.5884700731742135e-05, "loss": 0.5222, "step": 12514 }, { "epoch": 0.32, "grad_norm": 2.3004043102264404, "learning_rate": 1.5884029525188215e-05, "loss": 0.6326, "step": 12515 }, { "epoch": 0.32, "grad_norm": 4.683338642120361, "learning_rate": 1.5883358278085256e-05, "loss": 0.7204, "step": 12516 }, { "epoch": 0.32, "grad_norm": 4.643656253814697, "learning_rate": 1.588268699043789e-05, "loss": 0.6149, "step": 12517 }, { "epoch": 0.32, "grad_norm": 5.305634021759033, "learning_rate": 1.5882015662250744e-05, "loss": 0.6792, "step": 12518 }, { "epoch": 0.32, "grad_norm": 2.0209450721740723, "learning_rate": 1.5881344293528436e-05, "loss": 0.5808, "step": 12519 }, { "epoch": 0.32, "grad_norm": 1.3449331521987915, "learning_rate": 1.5880672884275602e-05, "loss": 0.4493, "step": 12520 }, { "epoch": 0.32, "grad_norm": 1.34049654006958, "learning_rate": 1.5880001434496863e-05, "loss": 0.6428, "step": 12521 }, { "epoch": 0.32, "grad_norm": 1.3930625915527344, "learning_rate": 1.5879329944196847e-05, "loss": 0.4771, "step": 12522 }, { "epoch": 0.32, "grad_norm": 1.2546958923339844, "learning_rate": 1.587865841338019e-05, "loss": 0.6816, "step": 12523 }, { "epoch": 0.32, "grad_norm": 1.4570716619491577, "learning_rate": 1.5877986842051504e-05, "loss": 0.4396, "step": 12524 }, { "epoch": 0.32, "grad_norm": 2.3685407638549805, "learning_rate": 1.587731523021543e-05, "loss": 0.5736, "step": 12525 }, { "epoch": 0.32, "grad_norm": 1.9803085327148438, "learning_rate": 1.587664357787659e-05, "loss": 0.7193, "step": 12526 }, { "epoch": 0.32, "grad_norm": 1.6071770191192627, "learning_rate": 1.5875971885039616e-05, "loss": 0.6618, "step": 12527 }, { "epoch": 0.32, "grad_norm": 4.689986228942871, "learning_rate": 1.5875300151709137e-05, "loss": 0.6756, "step": 12528 }, { "epoch": 0.32, "grad_norm": 3.58418345451355, "learning_rate": 1.587462837788978e-05, "loss": 0.6621, "step": 12529 }, { "epoch": 0.32, "grad_norm": 7.1453423500061035, "learning_rate": 1.5873956563586172e-05, "loss": 0.8898, "step": 12530 }, { "epoch": 0.32, "grad_norm": 2.2028894424438477, "learning_rate": 1.5873284708802946e-05, "loss": 0.6273, "step": 12531 }, { "epoch": 0.32, "grad_norm": 1.7988312244415283, "learning_rate": 1.5872612813544735e-05, "loss": 0.4872, "step": 12532 }, { "epoch": 0.32, "grad_norm": 1.9378833770751953, "learning_rate": 1.5871940877816163e-05, "loss": 0.5238, "step": 12533 }, { "epoch": 0.32, "grad_norm": 2.0909628868103027, "learning_rate": 1.5871268901621867e-05, "loss": 0.6541, "step": 12534 }, { "epoch": 0.32, "grad_norm": 4.663445949554443, "learning_rate": 1.587059688496647e-05, "loss": 0.5746, "step": 12535 }, { "epoch": 0.32, "grad_norm": 1.5943177938461304, "learning_rate": 1.586992482785461e-05, "loss": 0.6031, "step": 12536 }, { "epoch": 0.32, "grad_norm": 4.429296016693115, "learning_rate": 1.5869252730290915e-05, "loss": 0.7246, "step": 12537 }, { "epoch": 0.32, "grad_norm": 1.9616740942001343, "learning_rate": 1.5868580592280017e-05, "loss": 0.5848, "step": 12538 }, { "epoch": 0.32, "grad_norm": 2.9537620544433594, "learning_rate": 1.5867908413826552e-05, "loss": 0.6064, "step": 12539 }, { "epoch": 0.32, "grad_norm": 1.6090000867843628, "learning_rate": 1.5867236194935144e-05, "loss": 0.5382, "step": 12540 }, { "epoch": 0.32, "grad_norm": 4.652350902557373, "learning_rate": 1.5866563935610435e-05, "loss": 0.7078, "step": 12541 }, { "epoch": 0.32, "grad_norm": 1.6214148998260498, "learning_rate": 1.586589163585705e-05, "loss": 0.4964, "step": 12542 }, { "epoch": 0.32, "grad_norm": 6.840658187866211, "learning_rate": 1.5865219295679626e-05, "loss": 0.6769, "step": 12543 }, { "epoch": 0.32, "grad_norm": 3.5288636684417725, "learning_rate": 1.5864546915082798e-05, "loss": 0.4642, "step": 12544 }, { "epoch": 0.32, "grad_norm": 1.9121636152267456, "learning_rate": 1.5863874494071196e-05, "loss": 0.585, "step": 12545 }, { "epoch": 0.32, "grad_norm": 2.0926601886749268, "learning_rate": 1.586320203264945e-05, "loss": 0.5654, "step": 12546 }, { "epoch": 0.32, "grad_norm": 3.046048164367676, "learning_rate": 1.5862529530822204e-05, "loss": 0.6286, "step": 12547 }, { "epoch": 0.32, "grad_norm": 3.0066733360290527, "learning_rate": 1.5861856988594085e-05, "loss": 0.583, "step": 12548 }, { "epoch": 0.32, "grad_norm": 1.5194861888885498, "learning_rate": 1.5861184405969735e-05, "loss": 0.6442, "step": 12549 }, { "epoch": 0.32, "grad_norm": 1.6736575365066528, "learning_rate": 1.5860511782953782e-05, "loss": 0.6529, "step": 12550 }, { "epoch": 0.32, "grad_norm": 3.6687920093536377, "learning_rate": 1.5859839119550864e-05, "loss": 0.5409, "step": 12551 }, { "epoch": 0.32, "grad_norm": 2.408423662185669, "learning_rate": 1.5859166415765613e-05, "loss": 0.6426, "step": 12552 }, { "epoch": 0.32, "grad_norm": 2.683910608291626, "learning_rate": 1.5858493671602674e-05, "loss": 0.6699, "step": 12553 }, { "epoch": 0.32, "grad_norm": 1.979111909866333, "learning_rate": 1.585782088706667e-05, "loss": 0.6265, "step": 12554 }, { "epoch": 0.32, "grad_norm": 1.490401268005371, "learning_rate": 1.585714806216225e-05, "loss": 0.5309, "step": 12555 }, { "epoch": 0.32, "grad_norm": 1.5514827966690063, "learning_rate": 1.5856475196894043e-05, "loss": 0.5891, "step": 12556 }, { "epoch": 0.32, "grad_norm": 1.741098403930664, "learning_rate": 1.5855802291266692e-05, "loss": 0.613, "step": 12557 }, { "epoch": 0.32, "grad_norm": 3.0864098072052, "learning_rate": 1.585512934528483e-05, "loss": 0.6058, "step": 12558 }, { "epoch": 0.32, "grad_norm": 1.8407474756240845, "learning_rate": 1.585445635895309e-05, "loss": 0.5097, "step": 12559 }, { "epoch": 0.32, "grad_norm": 3.2087807655334473, "learning_rate": 1.5853783332276118e-05, "loss": 0.4206, "step": 12560 }, { "epoch": 0.32, "grad_norm": 3.4016273021698, "learning_rate": 1.585311026525855e-05, "loss": 0.5467, "step": 12561 }, { "epoch": 0.32, "grad_norm": 3.7812817096710205, "learning_rate": 1.5852437157905023e-05, "loss": 0.5533, "step": 12562 }, { "epoch": 0.32, "grad_norm": 4.3539958000183105, "learning_rate": 1.5851764010220175e-05, "loss": 0.5229, "step": 12563 }, { "epoch": 0.32, "grad_norm": 1.2058109045028687, "learning_rate": 1.5851090822208645e-05, "loss": 0.4847, "step": 12564 }, { "epoch": 0.32, "grad_norm": 1.3339570760726929, "learning_rate": 1.5850417593875074e-05, "loss": 0.6149, "step": 12565 }, { "epoch": 0.32, "grad_norm": 4.13124942779541, "learning_rate": 1.5849744325224103e-05, "loss": 0.6699, "step": 12566 }, { "epoch": 0.32, "grad_norm": 1.5267963409423828, "learning_rate": 1.5849071016260364e-05, "loss": 0.674, "step": 12567 }, { "epoch": 0.32, "grad_norm": 3.6556124687194824, "learning_rate": 1.5848397666988506e-05, "loss": 0.4802, "step": 12568 }, { "epoch": 0.32, "grad_norm": 3.5248377323150635, "learning_rate": 1.5847724277413166e-05, "loss": 0.7531, "step": 12569 }, { "epoch": 0.32, "grad_norm": 0.9821199774742126, "learning_rate": 1.5847050847538984e-05, "loss": 0.5416, "step": 12570 }, { "epoch": 0.32, "grad_norm": 3.800936698913574, "learning_rate": 1.5846377377370598e-05, "loss": 0.632, "step": 12571 }, { "epoch": 0.32, "grad_norm": 1.6888911724090576, "learning_rate": 1.5845703866912656e-05, "loss": 0.7048, "step": 12572 }, { "epoch": 0.32, "grad_norm": 0.9594559073448181, "learning_rate": 1.5845030316169793e-05, "loss": 0.5349, "step": 12573 }, { "epoch": 0.32, "grad_norm": 3.765692710876465, "learning_rate": 1.5844356725146654e-05, "loss": 0.7775, "step": 12574 }, { "epoch": 0.32, "grad_norm": 4.810887336730957, "learning_rate": 1.584368309384788e-05, "loss": 0.7193, "step": 12575 }, { "epoch": 0.32, "grad_norm": 3.8456292152404785, "learning_rate": 1.5843009422278113e-05, "loss": 0.6184, "step": 12576 }, { "epoch": 0.32, "grad_norm": 2.3034956455230713, "learning_rate": 1.5842335710441998e-05, "loss": 0.5892, "step": 12577 }, { "epoch": 0.32, "grad_norm": 1.7650705575942993, "learning_rate": 1.5841661958344174e-05, "loss": 0.675, "step": 12578 }, { "epoch": 0.32, "grad_norm": 3.231234073638916, "learning_rate": 1.584098816598929e-05, "loss": 0.6893, "step": 12579 }, { "epoch": 0.32, "grad_norm": 3.636598587036133, "learning_rate": 1.584031433338198e-05, "loss": 0.6048, "step": 12580 }, { "epoch": 0.32, "grad_norm": 1.847156286239624, "learning_rate": 1.5839640460526897e-05, "loss": 0.5956, "step": 12581 }, { "epoch": 0.32, "grad_norm": 6.6911749839782715, "learning_rate": 1.5838966547428675e-05, "loss": 0.6931, "step": 12582 }, { "epoch": 0.32, "grad_norm": 4.5585784912109375, "learning_rate": 1.5838292594091967e-05, "loss": 0.4876, "step": 12583 }, { "epoch": 0.32, "grad_norm": 2.1203320026397705, "learning_rate": 1.5837618600521418e-05, "loss": 0.3597, "step": 12584 }, { "epoch": 0.32, "grad_norm": 1.3858774900436401, "learning_rate": 1.5836944566721665e-05, "loss": 0.5895, "step": 12585 }, { "epoch": 0.32, "grad_norm": 1.4488993883132935, "learning_rate": 1.583627049269736e-05, "loss": 0.5356, "step": 12586 }, { "epoch": 0.32, "grad_norm": 2.1988613605499268, "learning_rate": 1.5835596378453142e-05, "loss": 0.7051, "step": 12587 }, { "epoch": 0.32, "grad_norm": 2.179922103881836, "learning_rate": 1.583492222399366e-05, "loss": 0.6178, "step": 12588 }, { "epoch": 0.32, "grad_norm": 1.6827747821807861, "learning_rate": 1.5834248029323562e-05, "loss": 0.6818, "step": 12589 }, { "epoch": 0.32, "grad_norm": 1.2742748260498047, "learning_rate": 1.583357379444749e-05, "loss": 0.663, "step": 12590 }, { "epoch": 0.32, "grad_norm": 1.2992029190063477, "learning_rate": 1.5832899519370096e-05, "loss": 0.7058, "step": 12591 }, { "epoch": 0.32, "grad_norm": 2.141538143157959, "learning_rate": 1.583222520409602e-05, "loss": 0.6018, "step": 12592 }, { "epoch": 0.32, "grad_norm": 1.8071953058242798, "learning_rate": 1.5831550848629915e-05, "loss": 0.4087, "step": 12593 }, { "epoch": 0.32, "grad_norm": 3.2502939701080322, "learning_rate": 1.5830876452976424e-05, "loss": 0.6036, "step": 12594 }, { "epoch": 0.32, "grad_norm": 2.7621257305145264, "learning_rate": 1.583020201714019e-05, "loss": 0.6785, "step": 12595 }, { "epoch": 0.32, "grad_norm": 2.8340916633605957, "learning_rate": 1.5829527541125876e-05, "loss": 0.6787, "step": 12596 }, { "epoch": 0.32, "grad_norm": 2.4656500816345215, "learning_rate": 1.5828853024938115e-05, "loss": 0.5678, "step": 12597 }, { "epoch": 0.32, "grad_norm": 4.084838390350342, "learning_rate": 1.5828178468581566e-05, "loss": 0.7101, "step": 12598 }, { "epoch": 0.32, "grad_norm": 7.7480692863464355, "learning_rate": 1.582750387206087e-05, "loss": 0.7385, "step": 12599 }, { "epoch": 0.32, "grad_norm": 2.368424654006958, "learning_rate": 1.5826829235380673e-05, "loss": 0.5956, "step": 12600 }, { "epoch": 0.32, "grad_norm": 3.309735059738159, "learning_rate": 1.5826154558545636e-05, "loss": 0.6807, "step": 12601 }, { "epoch": 0.32, "grad_norm": 1.5340510606765747, "learning_rate": 1.58254798415604e-05, "loss": 0.5549, "step": 12602 }, { "epoch": 0.32, "grad_norm": 2.755220890045166, "learning_rate": 1.5824805084429617e-05, "loss": 0.8328, "step": 12603 }, { "epoch": 0.32, "grad_norm": 2.102245330810547, "learning_rate": 1.5824130287157937e-05, "loss": 0.6385, "step": 12604 }, { "epoch": 0.32, "grad_norm": 2.872880220413208, "learning_rate": 1.5823455449750008e-05, "loss": 0.5721, "step": 12605 }, { "epoch": 0.32, "grad_norm": 2.0460472106933594, "learning_rate": 1.5822780572210487e-05, "loss": 0.6123, "step": 12606 }, { "epoch": 0.32, "grad_norm": 2.313480854034424, "learning_rate": 1.582210565454402e-05, "loss": 0.7743, "step": 12607 }, { "epoch": 0.32, "grad_norm": 3.1204326152801514, "learning_rate": 1.5821430696755258e-05, "loss": 0.5798, "step": 12608 }, { "epoch": 0.32, "grad_norm": 2.8353638648986816, "learning_rate": 1.582075569884885e-05, "loss": 0.701, "step": 12609 }, { "epoch": 0.32, "grad_norm": 1.4955044984817505, "learning_rate": 1.5820080660829454e-05, "loss": 0.549, "step": 12610 }, { "epoch": 0.32, "grad_norm": 1.322974681854248, "learning_rate": 1.5819405582701717e-05, "loss": 0.5289, "step": 12611 }, { "epoch": 0.32, "grad_norm": 6.441142559051514, "learning_rate": 1.5818730464470293e-05, "loss": 0.7712, "step": 12612 }, { "epoch": 0.32, "grad_norm": 3.8613104820251465, "learning_rate": 1.5818055306139835e-05, "loss": 0.6388, "step": 12613 }, { "epoch": 0.32, "grad_norm": 1.8013215065002441, "learning_rate": 1.5817380107714995e-05, "loss": 0.6319, "step": 12614 }, { "epoch": 0.32, "grad_norm": 1.984802007675171, "learning_rate": 1.5816704869200424e-05, "loss": 0.5313, "step": 12615 }, { "epoch": 0.32, "grad_norm": 2.514677047729492, "learning_rate": 1.581602959060078e-05, "loss": 0.5414, "step": 12616 }, { "epoch": 0.32, "grad_norm": 1.9412943124771118, "learning_rate": 1.5815354271920712e-05, "loss": 0.7105, "step": 12617 }, { "epoch": 0.32, "grad_norm": 1.238586187362671, "learning_rate": 1.5814678913164878e-05, "loss": 0.6615, "step": 12618 }, { "epoch": 0.32, "grad_norm": 1.6696715354919434, "learning_rate": 1.581400351433793e-05, "loss": 0.5963, "step": 12619 }, { "epoch": 0.32, "grad_norm": 1.2334643602371216, "learning_rate": 1.5813328075444523e-05, "loss": 0.4317, "step": 12620 }, { "epoch": 0.32, "grad_norm": 3.3435111045837402, "learning_rate": 1.581265259648931e-05, "loss": 0.6198, "step": 12621 }, { "epoch": 0.32, "grad_norm": 1.53399658203125, "learning_rate": 1.581197707747695e-05, "loss": 0.5183, "step": 12622 }, { "epoch": 0.32, "grad_norm": 1.7239999771118164, "learning_rate": 1.581130151841209e-05, "loss": 0.516, "step": 12623 }, { "epoch": 0.32, "grad_norm": 1.9127534627914429, "learning_rate": 1.5810625919299392e-05, "loss": 0.5764, "step": 12624 }, { "epoch": 0.32, "grad_norm": 1.961220383644104, "learning_rate": 1.5809950280143512e-05, "loss": 0.451, "step": 12625 }, { "epoch": 0.32, "grad_norm": 3.122753858566284, "learning_rate": 1.5809274600949107e-05, "loss": 0.5518, "step": 12626 }, { "epoch": 0.32, "grad_norm": 1.823480486869812, "learning_rate": 1.5808598881720828e-05, "loss": 0.6462, "step": 12627 }, { "epoch": 0.32, "grad_norm": 6.168002128601074, "learning_rate": 1.5807923122463337e-05, "loss": 0.6161, "step": 12628 }, { "epoch": 0.32, "grad_norm": 1.4016659259796143, "learning_rate": 1.5807247323181286e-05, "loss": 0.5859, "step": 12629 }, { "epoch": 0.32, "grad_norm": 5.233405590057373, "learning_rate": 1.5806571483879338e-05, "loss": 0.8353, "step": 12630 }, { "epoch": 0.32, "grad_norm": 3.2541143894195557, "learning_rate": 1.5805895604562146e-05, "loss": 0.5667, "step": 12631 }, { "epoch": 0.32, "grad_norm": 4.550004005432129, "learning_rate": 1.5805219685234368e-05, "loss": 0.6479, "step": 12632 }, { "epoch": 0.32, "grad_norm": 7.792737007141113, "learning_rate": 1.5804543725900665e-05, "loss": 0.6349, "step": 12633 }, { "epoch": 0.32, "grad_norm": 4.417478084564209, "learning_rate": 1.5803867726565695e-05, "loss": 0.4945, "step": 12634 }, { "epoch": 0.32, "grad_norm": 1.4749042987823486, "learning_rate": 1.5803191687234114e-05, "loss": 0.7305, "step": 12635 }, { "epoch": 0.32, "grad_norm": 1.97218918800354, "learning_rate": 1.580251560791058e-05, "loss": 0.4764, "step": 12636 }, { "epoch": 0.32, "grad_norm": 1.6887503862380981, "learning_rate": 1.5801839488599756e-05, "loss": 0.435, "step": 12637 }, { "epoch": 0.32, "grad_norm": 2.7057945728302, "learning_rate": 1.5801163329306297e-05, "loss": 0.6074, "step": 12638 }, { "epoch": 0.32, "grad_norm": 1.7893904447555542, "learning_rate": 1.5800487130034865e-05, "loss": 0.6215, "step": 12639 }, { "epoch": 0.32, "grad_norm": 1.3396284580230713, "learning_rate": 1.5799810890790122e-05, "loss": 0.6069, "step": 12640 }, { "epoch": 0.32, "grad_norm": 2.5557668209075928, "learning_rate": 1.5799134611576726e-05, "loss": 0.4602, "step": 12641 }, { "epoch": 0.32, "grad_norm": 1.7208800315856934, "learning_rate": 1.5798458292399334e-05, "loss": 0.5598, "step": 12642 }, { "epoch": 0.32, "grad_norm": 6.980539321899414, "learning_rate": 1.5797781933262613e-05, "loss": 0.9306, "step": 12643 }, { "epoch": 0.32, "grad_norm": 2.9780924320220947, "learning_rate": 1.5797105534171223e-05, "loss": 0.701, "step": 12644 }, { "epoch": 0.32, "grad_norm": 2.1038951873779297, "learning_rate": 1.5796429095129822e-05, "loss": 0.7094, "step": 12645 }, { "epoch": 0.32, "grad_norm": 1.9485081434249878, "learning_rate": 1.5795752616143073e-05, "loss": 0.5774, "step": 12646 }, { "epoch": 0.32, "grad_norm": 1.145407795906067, "learning_rate": 1.579507609721564e-05, "loss": 0.5478, "step": 12647 }, { "epoch": 0.32, "grad_norm": 2.0489413738250732, "learning_rate": 1.5794399538352182e-05, "loss": 0.5987, "step": 12648 }, { "epoch": 0.32, "grad_norm": 1.5057666301727295, "learning_rate": 1.5793722939557362e-05, "loss": 0.7096, "step": 12649 }, { "epoch": 0.32, "grad_norm": 3.680757522583008, "learning_rate": 1.5793046300835842e-05, "loss": 0.6964, "step": 12650 }, { "epoch": 0.32, "grad_norm": 1.7538385391235352, "learning_rate": 1.5792369622192293e-05, "loss": 0.6557, "step": 12651 }, { "epoch": 0.32, "grad_norm": 3.5647172927856445, "learning_rate": 1.5791692903631365e-05, "loss": 0.4562, "step": 12652 }, { "epoch": 0.32, "grad_norm": 1.7067687511444092, "learning_rate": 1.5791016145157732e-05, "loss": 0.6225, "step": 12653 }, { "epoch": 0.32, "grad_norm": 2.0399887561798096, "learning_rate": 1.579033934677605e-05, "loss": 0.7514, "step": 12654 }, { "epoch": 0.32, "grad_norm": 1.9090884923934937, "learning_rate": 1.5789662508490993e-05, "loss": 0.7264, "step": 12655 }, { "epoch": 0.32, "grad_norm": 2.669849395751953, "learning_rate": 1.5788985630307214e-05, "loss": 0.5803, "step": 12656 }, { "epoch": 0.32, "grad_norm": 1.4713000059127808, "learning_rate": 1.5788308712229386e-05, "loss": 0.5402, "step": 12657 }, { "epoch": 0.32, "grad_norm": 4.870156288146973, "learning_rate": 1.5787631754262168e-05, "loss": 0.6897, "step": 12658 }, { "epoch": 0.32, "grad_norm": 3.2359371185302734, "learning_rate": 1.5786954756410235e-05, "loss": 0.7303, "step": 12659 }, { "epoch": 0.32, "grad_norm": 3.2483623027801514, "learning_rate": 1.578627771867824e-05, "loss": 0.7174, "step": 12660 }, { "epoch": 0.32, "grad_norm": 7.968489646911621, "learning_rate": 1.5785600641070852e-05, "loss": 0.6736, "step": 12661 }, { "epoch": 0.32, "grad_norm": 2.010122776031494, "learning_rate": 1.578492352359274e-05, "loss": 0.6674, "step": 12662 }, { "epoch": 0.32, "grad_norm": 1.469411849975586, "learning_rate": 1.5784246366248576e-05, "loss": 0.5811, "step": 12663 }, { "epoch": 0.32, "grad_norm": 7.6222944259643555, "learning_rate": 1.5783569169043013e-05, "loss": 0.6318, "step": 12664 }, { "epoch": 0.32, "grad_norm": 5.980518341064453, "learning_rate": 1.578289193198073e-05, "loss": 0.6529, "step": 12665 }, { "epoch": 0.32, "grad_norm": 2.9501423835754395, "learning_rate": 1.578221465506639e-05, "loss": 0.5983, "step": 12666 }, { "epoch": 0.32, "grad_norm": 1.9725064039230347, "learning_rate": 1.5781537338304653e-05, "loss": 0.6693, "step": 12667 }, { "epoch": 0.32, "grad_norm": 1.4653809070587158, "learning_rate": 1.5780859981700193e-05, "loss": 0.4984, "step": 12668 }, { "epoch": 0.32, "grad_norm": 2.3527305126190186, "learning_rate": 1.5780182585257682e-05, "loss": 0.7095, "step": 12669 }, { "epoch": 0.32, "grad_norm": 3.054666519165039, "learning_rate": 1.5779505148981783e-05, "loss": 0.5674, "step": 12670 }, { "epoch": 0.32, "grad_norm": 16.059648513793945, "learning_rate": 1.577882767287717e-05, "loss": 0.5372, "step": 12671 }, { "epoch": 0.32, "grad_norm": 1.8257540464401245, "learning_rate": 1.57781501569485e-05, "loss": 0.5732, "step": 12672 }, { "epoch": 0.32, "grad_norm": 2.9176182746887207, "learning_rate": 1.577747260120045e-05, "loss": 0.5604, "step": 12673 }, { "epoch": 0.32, "grad_norm": 1.364650011062622, "learning_rate": 1.577679500563769e-05, "loss": 0.5864, "step": 12674 }, { "epoch": 0.32, "grad_norm": 1.4442931413650513, "learning_rate": 1.577611737026489e-05, "loss": 0.6986, "step": 12675 }, { "epoch": 0.32, "grad_norm": 2.5052883625030518, "learning_rate": 1.5775439695086717e-05, "loss": 0.6024, "step": 12676 }, { "epoch": 0.32, "grad_norm": 3.7003817558288574, "learning_rate": 1.577476198010784e-05, "loss": 0.4275, "step": 12677 }, { "epoch": 0.32, "grad_norm": 11.448741912841797, "learning_rate": 1.5774084225332937e-05, "loss": 0.5758, "step": 12678 }, { "epoch": 0.32, "grad_norm": 1.9983911514282227, "learning_rate": 1.577340643076667e-05, "loss": 0.6365, "step": 12679 }, { "epoch": 0.32, "grad_norm": 1.896275281906128, "learning_rate": 1.5772728596413712e-05, "loss": 0.617, "step": 12680 }, { "epoch": 0.33, "grad_norm": 2.427440881729126, "learning_rate": 1.5772050722278737e-05, "loss": 0.6731, "step": 12681 }, { "epoch": 0.33, "grad_norm": 1.8110681772232056, "learning_rate": 1.577137280836641e-05, "loss": 0.634, "step": 12682 }, { "epoch": 0.33, "grad_norm": 4.700080871582031, "learning_rate": 1.5770694854681416e-05, "loss": 0.7227, "step": 12683 }, { "epoch": 0.33, "grad_norm": 1.6874052286148071, "learning_rate": 1.5770016861228413e-05, "loss": 0.5342, "step": 12684 }, { "epoch": 0.33, "grad_norm": 2.9135398864746094, "learning_rate": 1.5769338828012076e-05, "loss": 0.6598, "step": 12685 }, { "epoch": 0.33, "grad_norm": 2.1695590019226074, "learning_rate": 1.5768660755037086e-05, "loss": 0.6862, "step": 12686 }, { "epoch": 0.33, "grad_norm": 4.335602760314941, "learning_rate": 1.576798264230811e-05, "loss": 0.7135, "step": 12687 }, { "epoch": 0.33, "grad_norm": 2.563256025314331, "learning_rate": 1.576730448982982e-05, "loss": 0.7714, "step": 12688 }, { "epoch": 0.33, "grad_norm": 1.9101592302322388, "learning_rate": 1.5766626297606893e-05, "loss": 0.4958, "step": 12689 }, { "epoch": 0.33, "grad_norm": 2.4164154529571533, "learning_rate": 1.5765948065643995e-05, "loss": 0.5824, "step": 12690 }, { "epoch": 0.33, "grad_norm": 3.0910215377807617, "learning_rate": 1.576526979394581e-05, "loss": 0.448, "step": 12691 }, { "epoch": 0.33, "grad_norm": 1.5404165983200073, "learning_rate": 1.5764591482517008e-05, "loss": 0.5707, "step": 12692 }, { "epoch": 0.33, "grad_norm": 1.5173428058624268, "learning_rate": 1.576391313136226e-05, "loss": 0.6625, "step": 12693 }, { "epoch": 0.33, "grad_norm": 0.9253403544425964, "learning_rate": 1.5763234740486246e-05, "loss": 0.4681, "step": 12694 }, { "epoch": 0.33, "grad_norm": 2.1188998222351074, "learning_rate": 1.5762556309893638e-05, "loss": 0.5455, "step": 12695 }, { "epoch": 0.33, "grad_norm": 10.260692596435547, "learning_rate": 1.5761877839589113e-05, "loss": 0.5977, "step": 12696 }, { "epoch": 0.33, "grad_norm": 1.2286884784698486, "learning_rate": 1.5761199329577347e-05, "loss": 0.3244, "step": 12697 }, { "epoch": 0.33, "grad_norm": 2.640275716781616, "learning_rate": 1.5760520779863015e-05, "loss": 0.7902, "step": 12698 }, { "epoch": 0.33, "grad_norm": 1.7675621509552002, "learning_rate": 1.575984219045079e-05, "loss": 0.7385, "step": 12699 }, { "epoch": 0.33, "grad_norm": 1.5960443019866943, "learning_rate": 1.575916356134535e-05, "loss": 0.5012, "step": 12700 }, { "epoch": 0.33, "grad_norm": 1.1496344804763794, "learning_rate": 1.5758484892551378e-05, "loss": 0.5746, "step": 12701 }, { "epoch": 0.33, "grad_norm": 2.037233352661133, "learning_rate": 1.5757806184073544e-05, "loss": 0.7041, "step": 12702 }, { "epoch": 0.33, "grad_norm": 1.7412947416305542, "learning_rate": 1.5757127435916527e-05, "loss": 0.6561, "step": 12703 }, { "epoch": 0.33, "grad_norm": 3.3083674907684326, "learning_rate": 1.5756448648085005e-05, "loss": 0.6796, "step": 12704 }, { "epoch": 0.33, "grad_norm": 1.343428134918213, "learning_rate": 1.5755769820583653e-05, "loss": 0.6561, "step": 12705 }, { "epoch": 0.33, "grad_norm": 3.2542691230773926, "learning_rate": 1.5755090953417155e-05, "loss": 0.7042, "step": 12706 }, { "epoch": 0.33, "grad_norm": 1.3300927877426147, "learning_rate": 1.5754412046590186e-05, "loss": 0.5095, "step": 12707 }, { "epoch": 0.33, "grad_norm": 1.6797151565551758, "learning_rate": 1.575373310010742e-05, "loss": 0.6679, "step": 12708 }, { "epoch": 0.33, "grad_norm": 2.3736000061035156, "learning_rate": 1.5753054113973542e-05, "loss": 0.7131, "step": 12709 }, { "epoch": 0.33, "grad_norm": 1.9237232208251953, "learning_rate": 1.5752375088193233e-05, "loss": 0.6295, "step": 12710 }, { "epoch": 0.33, "grad_norm": 1.4102611541748047, "learning_rate": 1.5751696022771166e-05, "loss": 0.5033, "step": 12711 }, { "epoch": 0.33, "grad_norm": 1.4713540077209473, "learning_rate": 1.5751016917712022e-05, "loss": 0.4976, "step": 12712 }, { "epoch": 0.33, "grad_norm": 2.0276596546173096, "learning_rate": 1.5750337773020488e-05, "loss": 0.6755, "step": 12713 }, { "epoch": 0.33, "grad_norm": 4.23100471496582, "learning_rate": 1.5749658588701235e-05, "loss": 0.8063, "step": 12714 }, { "epoch": 0.33, "grad_norm": 1.760824203491211, "learning_rate": 1.5748979364758947e-05, "loss": 0.7242, "step": 12715 }, { "epoch": 0.33, "grad_norm": 1.4959261417388916, "learning_rate": 1.5748300101198306e-05, "loss": 0.5521, "step": 12716 }, { "epoch": 0.33, "grad_norm": 2.5291271209716797, "learning_rate": 1.5747620798023993e-05, "loss": 0.6417, "step": 12717 }, { "epoch": 0.33, "grad_norm": 2.7932419776916504, "learning_rate": 1.574694145524069e-05, "loss": 0.6001, "step": 12718 }, { "epoch": 0.33, "grad_norm": 2.5996837615966797, "learning_rate": 1.574626207285307e-05, "loss": 0.6913, "step": 12719 }, { "epoch": 0.33, "grad_norm": 3.561202049255371, "learning_rate": 1.5745582650865827e-05, "loss": 0.68, "step": 12720 }, { "epoch": 0.33, "grad_norm": 2.1842808723449707, "learning_rate": 1.5744903189283638e-05, "loss": 0.5527, "step": 12721 }, { "epoch": 0.33, "grad_norm": 3.761488199234009, "learning_rate": 1.5744223688111183e-05, "loss": 0.6161, "step": 12722 }, { "epoch": 0.33, "grad_norm": 4.011020660400391, "learning_rate": 1.574354414735315e-05, "loss": 0.6315, "step": 12723 }, { "epoch": 0.33, "grad_norm": 1.4814434051513672, "learning_rate": 1.574286456701422e-05, "loss": 0.6086, "step": 12724 }, { "epoch": 0.33, "grad_norm": 1.600799560546875, "learning_rate": 1.5742184947099074e-05, "loss": 0.521, "step": 12725 }, { "epoch": 0.33, "grad_norm": 2.020781993865967, "learning_rate": 1.5741505287612397e-05, "loss": 0.7788, "step": 12726 }, { "epoch": 0.33, "grad_norm": 4.238119602203369, "learning_rate": 1.5740825588558873e-05, "loss": 0.6131, "step": 12727 }, { "epoch": 0.33, "grad_norm": 1.8744226694107056, "learning_rate": 1.5740145849943185e-05, "loss": 0.4521, "step": 12728 }, { "epoch": 0.33, "grad_norm": 2.3016135692596436, "learning_rate": 1.573946607177002e-05, "loss": 0.5352, "step": 12729 }, { "epoch": 0.33, "grad_norm": 1.2938177585601807, "learning_rate": 1.5738786254044057e-05, "loss": 0.5172, "step": 12730 }, { "epoch": 0.33, "grad_norm": 2.049766778945923, "learning_rate": 1.5738106396769986e-05, "loss": 0.4906, "step": 12731 }, { "epoch": 0.33, "grad_norm": 1.7245241403579712, "learning_rate": 1.5737426499952494e-05, "loss": 0.6639, "step": 12732 }, { "epoch": 0.33, "grad_norm": 4.14170503616333, "learning_rate": 1.573674656359626e-05, "loss": 0.6151, "step": 12733 }, { "epoch": 0.33, "grad_norm": 2.0206525325775146, "learning_rate": 1.5736066587705974e-05, "loss": 0.5683, "step": 12734 }, { "epoch": 0.33, "grad_norm": 1.3154816627502441, "learning_rate": 1.573538657228632e-05, "loss": 0.6496, "step": 12735 }, { "epoch": 0.33, "grad_norm": 5.295160293579102, "learning_rate": 1.5734706517341988e-05, "loss": 0.3947, "step": 12736 }, { "epoch": 0.33, "grad_norm": 3.8777811527252197, "learning_rate": 1.5734026422877655e-05, "loss": 0.8078, "step": 12737 }, { "epoch": 0.33, "grad_norm": 3.508802890777588, "learning_rate": 1.573334628889802e-05, "loss": 0.5583, "step": 12738 }, { "epoch": 0.33, "grad_norm": 2.8836963176727295, "learning_rate": 1.573266611540776e-05, "loss": 0.5921, "step": 12739 }, { "epoch": 0.33, "grad_norm": 2.2234044075012207, "learning_rate": 1.5731985902411572e-05, "loss": 0.5669, "step": 12740 }, { "epoch": 0.33, "grad_norm": 1.7047785520553589, "learning_rate": 1.5731305649914137e-05, "loss": 0.5474, "step": 12741 }, { "epoch": 0.33, "grad_norm": 2.49116849899292, "learning_rate": 1.573062535792014e-05, "loss": 0.6495, "step": 12742 }, { "epoch": 0.33, "grad_norm": 9.472586631774902, "learning_rate": 1.572994502643428e-05, "loss": 0.6768, "step": 12743 }, { "epoch": 0.33, "grad_norm": 2.9912939071655273, "learning_rate": 1.5729264655461234e-05, "loss": 0.6257, "step": 12744 }, { "epoch": 0.33, "grad_norm": 1.3243821859359741, "learning_rate": 1.5728584245005696e-05, "loss": 0.5551, "step": 12745 }, { "epoch": 0.33, "grad_norm": 1.9570074081420898, "learning_rate": 1.5727903795072355e-05, "loss": 0.5293, "step": 12746 }, { "epoch": 0.33, "grad_norm": 6.087452411651611, "learning_rate": 1.57272233056659e-05, "loss": 0.5835, "step": 12747 }, { "epoch": 0.33, "grad_norm": 1.6005048751831055, "learning_rate": 1.572654277679102e-05, "loss": 0.6968, "step": 12748 }, { "epoch": 0.33, "grad_norm": 3.0277960300445557, "learning_rate": 1.5725862208452407e-05, "loss": 0.5977, "step": 12749 }, { "epoch": 0.33, "grad_norm": 2.243123769760132, "learning_rate": 1.5725181600654744e-05, "loss": 0.8087, "step": 12750 }, { "epoch": 0.33, "grad_norm": 1.4538706541061401, "learning_rate": 1.572450095340273e-05, "loss": 0.5918, "step": 12751 }, { "epoch": 0.33, "grad_norm": 7.161552906036377, "learning_rate": 1.572382026670105e-05, "loss": 0.664, "step": 12752 }, { "epoch": 0.33, "grad_norm": 0.9863607883453369, "learning_rate": 1.57231395405544e-05, "loss": 0.5815, "step": 12753 }, { "epoch": 0.33, "grad_norm": 2.269611358642578, "learning_rate": 1.5722458774967468e-05, "loss": 0.6292, "step": 12754 }, { "epoch": 0.33, "grad_norm": 1.6664161682128906, "learning_rate": 1.5721777969944942e-05, "loss": 0.3982, "step": 12755 }, { "epoch": 0.33, "grad_norm": 1.1833938360214233, "learning_rate": 1.572109712549152e-05, "loss": 0.5007, "step": 12756 }, { "epoch": 0.33, "grad_norm": 1.3367338180541992, "learning_rate": 1.572041624161189e-05, "loss": 0.5289, "step": 12757 }, { "epoch": 0.33, "grad_norm": 4.315981388092041, "learning_rate": 1.5719735318310743e-05, "loss": 0.5955, "step": 12758 }, { "epoch": 0.33, "grad_norm": 2.4865379333496094, "learning_rate": 1.5719054355592774e-05, "loss": 0.5923, "step": 12759 }, { "epoch": 0.33, "grad_norm": 8.273918151855469, "learning_rate": 1.5718373353462676e-05, "loss": 0.8482, "step": 12760 }, { "epoch": 0.33, "grad_norm": 4.109305381774902, "learning_rate": 1.5717692311925143e-05, "loss": 0.6617, "step": 12761 }, { "epoch": 0.33, "grad_norm": 1.2945191860198975, "learning_rate": 1.5717011230984867e-05, "loss": 0.6031, "step": 12762 }, { "epoch": 0.33, "grad_norm": 1.3934130668640137, "learning_rate": 1.571633011064654e-05, "loss": 0.4848, "step": 12763 }, { "epoch": 0.33, "grad_norm": 3.8858160972595215, "learning_rate": 1.5715648950914857e-05, "loss": 0.7667, "step": 12764 }, { "epoch": 0.33, "grad_norm": 1.5811336040496826, "learning_rate": 1.5714967751794512e-05, "loss": 0.4736, "step": 12765 }, { "epoch": 0.33, "grad_norm": 1.9385355710983276, "learning_rate": 1.57142865132902e-05, "loss": 0.6132, "step": 12766 }, { "epoch": 0.33, "grad_norm": 2.578542709350586, "learning_rate": 1.5713605235406616e-05, "loss": 0.5962, "step": 12767 }, { "epoch": 0.33, "grad_norm": 3.5506439208984375, "learning_rate": 1.5712923918148454e-05, "loss": 0.5925, "step": 12768 }, { "epoch": 0.33, "grad_norm": 2.0018911361694336, "learning_rate": 1.571224256152041e-05, "loss": 0.5701, "step": 12769 }, { "epoch": 0.33, "grad_norm": 1.3073081970214844, "learning_rate": 1.571156116552718e-05, "loss": 0.5339, "step": 12770 }, { "epoch": 0.33, "grad_norm": 1.5794813632965088, "learning_rate": 1.5710879730173458e-05, "loss": 0.5479, "step": 12771 }, { "epoch": 0.33, "grad_norm": 1.69619619846344, "learning_rate": 1.571019825546394e-05, "loss": 0.6765, "step": 12772 }, { "epoch": 0.33, "grad_norm": 2.415372371673584, "learning_rate": 1.5709516741403324e-05, "loss": 0.7314, "step": 12773 }, { "epoch": 0.33, "grad_norm": 3.4053237438201904, "learning_rate": 1.5708835187996303e-05, "loss": 0.5374, "step": 12774 }, { "epoch": 0.33, "grad_norm": 2.9158337116241455, "learning_rate": 1.570815359524758e-05, "loss": 0.763, "step": 12775 }, { "epoch": 0.33, "grad_norm": 1.754248857498169, "learning_rate": 1.5707471963161848e-05, "loss": 0.6364, "step": 12776 }, { "epoch": 0.33, "grad_norm": 6.80690336227417, "learning_rate": 1.5706790291743805e-05, "loss": 0.5465, "step": 12777 }, { "epoch": 0.33, "grad_norm": 4.137683868408203, "learning_rate": 1.5706108580998147e-05, "loss": 0.6986, "step": 12778 }, { "epoch": 0.33, "grad_norm": 4.379408359527588, "learning_rate": 1.5705426830929575e-05, "loss": 0.5726, "step": 12779 }, { "epoch": 0.33, "grad_norm": 3.083343505859375, "learning_rate": 1.5704745041542787e-05, "loss": 0.6051, "step": 12780 }, { "epoch": 0.33, "grad_norm": 1.4097155332565308, "learning_rate": 1.5704063212842476e-05, "loss": 0.5836, "step": 12781 }, { "epoch": 0.33, "grad_norm": 2.142711877822876, "learning_rate": 1.5703381344833348e-05, "loss": 0.6206, "step": 12782 }, { "epoch": 0.33, "grad_norm": 3.6348867416381836, "learning_rate": 1.57026994375201e-05, "loss": 0.8124, "step": 12783 }, { "epoch": 0.33, "grad_norm": 1.5101741552352905, "learning_rate": 1.5702017490907428e-05, "loss": 0.6653, "step": 12784 }, { "epoch": 0.33, "grad_norm": 1.5368783473968506, "learning_rate": 1.5701335505000038e-05, "loss": 0.5594, "step": 12785 }, { "epoch": 0.33, "grad_norm": 3.654733657836914, "learning_rate": 1.570065347980262e-05, "loss": 0.579, "step": 12786 }, { "epoch": 0.33, "grad_norm": 3.408918857574463, "learning_rate": 1.5699971415319884e-05, "loss": 0.7103, "step": 12787 }, { "epoch": 0.33, "grad_norm": 2.404196262359619, "learning_rate": 1.5699289311556523e-05, "loss": 0.5438, "step": 12788 }, { "epoch": 0.33, "grad_norm": 1.561713457107544, "learning_rate": 1.569860716851724e-05, "loss": 0.4503, "step": 12789 }, { "epoch": 0.33, "grad_norm": 1.8098868131637573, "learning_rate": 1.569792498620674e-05, "loss": 0.3856, "step": 12790 }, { "epoch": 0.33, "grad_norm": 2.181748867034912, "learning_rate": 1.5697242764629717e-05, "loss": 0.727, "step": 12791 }, { "epoch": 0.33, "grad_norm": 1.8731266260147095, "learning_rate": 1.5696560503790882e-05, "loss": 0.7002, "step": 12792 }, { "epoch": 0.33, "grad_norm": 0.9934712052345276, "learning_rate": 1.5695878203694927e-05, "loss": 0.656, "step": 12793 }, { "epoch": 0.33, "grad_norm": 1.645633339881897, "learning_rate": 1.5695195864346556e-05, "loss": 0.6589, "step": 12794 }, { "epoch": 0.33, "grad_norm": 1.9492391347885132, "learning_rate": 1.5694513485750477e-05, "loss": 0.6221, "step": 12795 }, { "epoch": 0.33, "grad_norm": 2.099902629852295, "learning_rate": 1.569383106791139e-05, "loss": 0.706, "step": 12796 }, { "epoch": 0.33, "grad_norm": 3.618643283843994, "learning_rate": 1.5693148610833993e-05, "loss": 0.565, "step": 12797 }, { "epoch": 0.33, "grad_norm": 2.038299798965454, "learning_rate": 1.5692466114522994e-05, "loss": 0.5794, "step": 12798 }, { "epoch": 0.33, "grad_norm": 3.727334976196289, "learning_rate": 1.5691783578983094e-05, "loss": 0.7532, "step": 12799 }, { "epoch": 0.33, "grad_norm": 2.1221141815185547, "learning_rate": 1.5691101004218996e-05, "loss": 0.6585, "step": 12800 }, { "epoch": 0.33, "grad_norm": 1.9316142797470093, "learning_rate": 1.5690418390235408e-05, "loss": 0.5067, "step": 12801 }, { "epoch": 0.33, "grad_norm": 5.0487518310546875, "learning_rate": 1.568973573703703e-05, "loss": 0.6625, "step": 12802 }, { "epoch": 0.33, "grad_norm": 5.24063777923584, "learning_rate": 1.5689053044628567e-05, "loss": 0.5478, "step": 12803 }, { "epoch": 0.33, "grad_norm": 3.1734609603881836, "learning_rate": 1.5688370313014725e-05, "loss": 0.6475, "step": 12804 }, { "epoch": 0.33, "grad_norm": 3.807529926300049, "learning_rate": 1.5687687542200213e-05, "loss": 0.8101, "step": 12805 }, { "epoch": 0.33, "grad_norm": 4.417123794555664, "learning_rate": 1.568700473218973e-05, "loss": 0.5753, "step": 12806 }, { "epoch": 0.33, "grad_norm": 1.460317850112915, "learning_rate": 1.568632188298798e-05, "loss": 0.5949, "step": 12807 }, { "epoch": 0.33, "grad_norm": 1.5645707845687866, "learning_rate": 1.568563899459967e-05, "loss": 0.491, "step": 12808 }, { "epoch": 0.33, "grad_norm": 1.198575735092163, "learning_rate": 1.5684956067029515e-05, "loss": 0.459, "step": 12809 }, { "epoch": 0.33, "grad_norm": 2.411193370819092, "learning_rate": 1.5684273100282214e-05, "loss": 0.5697, "step": 12810 }, { "epoch": 0.33, "grad_norm": 2.164581775665283, "learning_rate": 1.568359009436247e-05, "loss": 0.6469, "step": 12811 }, { "epoch": 0.33, "grad_norm": 2.8217010498046875, "learning_rate": 1.5682907049274994e-05, "loss": 0.6111, "step": 12812 }, { "epoch": 0.33, "grad_norm": 5.295314788818359, "learning_rate": 1.5682223965024498e-05, "loss": 0.751, "step": 12813 }, { "epoch": 0.33, "grad_norm": 4.122061729431152, "learning_rate": 1.568154084161568e-05, "loss": 0.4856, "step": 12814 }, { "epoch": 0.33, "grad_norm": 1.7020701169967651, "learning_rate": 1.5680857679053253e-05, "loss": 0.4965, "step": 12815 }, { "epoch": 0.33, "grad_norm": 3.606821298599243, "learning_rate": 1.5680174477341923e-05, "loss": 0.7552, "step": 12816 }, { "epoch": 0.33, "grad_norm": 2.0492210388183594, "learning_rate": 1.5679491236486398e-05, "loss": 0.5895, "step": 12817 }, { "epoch": 0.33, "grad_norm": 3.166077136993408, "learning_rate": 1.567880795649139e-05, "loss": 0.59, "step": 12818 }, { "epoch": 0.33, "grad_norm": 1.9426518678665161, "learning_rate": 1.5678124637361604e-05, "loss": 0.5736, "step": 12819 }, { "epoch": 0.33, "grad_norm": 1.7797319889068604, "learning_rate": 1.5677441279101753e-05, "loss": 0.6803, "step": 12820 }, { "epoch": 0.33, "grad_norm": 1.710426688194275, "learning_rate": 1.567675788171654e-05, "loss": 0.6636, "step": 12821 }, { "epoch": 0.33, "grad_norm": 1.74079430103302, "learning_rate": 1.567607444521068e-05, "loss": 0.4387, "step": 12822 }, { "epoch": 0.33, "grad_norm": 1.6934233903884888, "learning_rate": 1.5675390969588882e-05, "loss": 0.4706, "step": 12823 }, { "epoch": 0.33, "grad_norm": 2.0848278999328613, "learning_rate": 1.5674707454855852e-05, "loss": 0.5497, "step": 12824 }, { "epoch": 0.33, "grad_norm": 1.508697748184204, "learning_rate": 1.5674023901016307e-05, "loss": 0.627, "step": 12825 }, { "epoch": 0.33, "grad_norm": 1.829694390296936, "learning_rate": 1.567334030807495e-05, "loss": 0.5497, "step": 12826 }, { "epoch": 0.33, "grad_norm": 2.9564850330352783, "learning_rate": 1.56726566760365e-05, "loss": 0.6671, "step": 12827 }, { "epoch": 0.33, "grad_norm": 2.630146026611328, "learning_rate": 1.5671973004905664e-05, "loss": 0.467, "step": 12828 }, { "epoch": 0.33, "grad_norm": 1.4400707483291626, "learning_rate": 1.567128929468715e-05, "loss": 0.6009, "step": 12829 }, { "epoch": 0.33, "grad_norm": 1.7736507654190063, "learning_rate": 1.5670605545385676e-05, "loss": 0.5264, "step": 12830 }, { "epoch": 0.33, "grad_norm": 2.327453374862671, "learning_rate": 1.566992175700595e-05, "loss": 0.6201, "step": 12831 }, { "epoch": 0.33, "grad_norm": 1.8180981874465942, "learning_rate": 1.5669237929552686e-05, "loss": 0.6217, "step": 12832 }, { "epoch": 0.33, "grad_norm": 2.1880507469177246, "learning_rate": 1.5668554063030597e-05, "loss": 0.4852, "step": 12833 }, { "epoch": 0.33, "grad_norm": 2.1057522296905518, "learning_rate": 1.5667870157444395e-05, "loss": 0.667, "step": 12834 }, { "epoch": 0.33, "grad_norm": 1.5081478357315063, "learning_rate": 1.566718621279879e-05, "loss": 0.5544, "step": 12835 }, { "epoch": 0.33, "grad_norm": 1.485237717628479, "learning_rate": 1.56665022290985e-05, "loss": 0.4993, "step": 12836 }, { "epoch": 0.33, "grad_norm": 2.5206682682037354, "learning_rate": 1.5665818206348235e-05, "loss": 0.6274, "step": 12837 }, { "epoch": 0.33, "grad_norm": 3.3452858924865723, "learning_rate": 1.566513414455271e-05, "loss": 0.5922, "step": 12838 }, { "epoch": 0.33, "grad_norm": 3.144334077835083, "learning_rate": 1.5664450043716643e-05, "loss": 0.5786, "step": 12839 }, { "epoch": 0.33, "grad_norm": 1.944707989692688, "learning_rate": 1.5663765903844743e-05, "loss": 0.7716, "step": 12840 }, { "epoch": 0.33, "grad_norm": 2.8521153926849365, "learning_rate": 1.5663081724941727e-05, "loss": 0.6867, "step": 12841 }, { "epoch": 0.33, "grad_norm": 4.1992340087890625, "learning_rate": 1.566239750701231e-05, "loss": 0.5932, "step": 12842 }, { "epoch": 0.33, "grad_norm": 2.30029559135437, "learning_rate": 1.5661713250061207e-05, "loss": 0.5956, "step": 12843 }, { "epoch": 0.33, "grad_norm": 1.3532283306121826, "learning_rate": 1.5661028954093132e-05, "loss": 0.6259, "step": 12844 }, { "epoch": 0.33, "grad_norm": 1.4362891912460327, "learning_rate": 1.56603446191128e-05, "loss": 0.6435, "step": 12845 }, { "epoch": 0.33, "grad_norm": 1.3623095750808716, "learning_rate": 1.5659660245124934e-05, "loss": 0.4289, "step": 12846 }, { "epoch": 0.33, "grad_norm": 1.4592074155807495, "learning_rate": 1.5658975832134243e-05, "loss": 0.5196, "step": 12847 }, { "epoch": 0.33, "grad_norm": 1.4625232219696045, "learning_rate": 1.5658291380145447e-05, "loss": 0.5341, "step": 12848 }, { "epoch": 0.33, "grad_norm": 2.251621961593628, "learning_rate": 1.5657606889163262e-05, "loss": 0.6755, "step": 12849 }, { "epoch": 0.33, "grad_norm": 1.5252971649169922, "learning_rate": 1.5656922359192405e-05, "loss": 0.6182, "step": 12850 }, { "epoch": 0.33, "grad_norm": 1.5867702960968018, "learning_rate": 1.5656237790237588e-05, "loss": 0.5718, "step": 12851 }, { "epoch": 0.33, "grad_norm": 2.724691390991211, "learning_rate": 1.565555318230354e-05, "loss": 0.618, "step": 12852 }, { "epoch": 0.33, "grad_norm": 1.9053958654403687, "learning_rate": 1.5654868535394968e-05, "loss": 0.5465, "step": 12853 }, { "epoch": 0.33, "grad_norm": 1.2512692213058472, "learning_rate": 1.5654183849516597e-05, "loss": 0.4551, "step": 12854 }, { "epoch": 0.33, "grad_norm": 2.3802125453948975, "learning_rate": 1.5653499124673145e-05, "loss": 0.7175, "step": 12855 }, { "epoch": 0.33, "grad_norm": 3.206427812576294, "learning_rate": 1.5652814360869326e-05, "loss": 0.4593, "step": 12856 }, { "epoch": 0.33, "grad_norm": 1.2787781953811646, "learning_rate": 1.5652129558109865e-05, "loss": 0.5945, "step": 12857 }, { "epoch": 0.33, "grad_norm": 21.374584197998047, "learning_rate": 1.5651444716399475e-05, "loss": 0.6273, "step": 12858 }, { "epoch": 0.33, "grad_norm": 1.2225117683410645, "learning_rate": 1.5650759835742883e-05, "loss": 0.6023, "step": 12859 }, { "epoch": 0.33, "grad_norm": 3.4902029037475586, "learning_rate": 1.5650074916144798e-05, "loss": 0.6165, "step": 12860 }, { "epoch": 0.33, "grad_norm": 2.6088006496429443, "learning_rate": 1.5649389957609947e-05, "loss": 0.7907, "step": 12861 }, { "epoch": 0.33, "grad_norm": 1.7616006135940552, "learning_rate": 1.5648704960143056e-05, "loss": 0.6388, "step": 12862 }, { "epoch": 0.33, "grad_norm": 2.1646811962127686, "learning_rate": 1.5648019923748835e-05, "loss": 0.5358, "step": 12863 }, { "epoch": 0.33, "grad_norm": 1.561295509338379, "learning_rate": 1.564733484843201e-05, "loss": 0.5996, "step": 12864 }, { "epoch": 0.33, "grad_norm": 2.7712039947509766, "learning_rate": 1.5646649734197297e-05, "loss": 0.6242, "step": 12865 }, { "epoch": 0.33, "grad_norm": 1.1228628158569336, "learning_rate": 1.564596458104943e-05, "loss": 0.4847, "step": 12866 }, { "epoch": 0.33, "grad_norm": 1.9507807493209839, "learning_rate": 1.5645279388993115e-05, "loss": 0.7333, "step": 12867 }, { "epoch": 0.33, "grad_norm": 4.965750694274902, "learning_rate": 1.5644594158033085e-05, "loss": 0.6583, "step": 12868 }, { "epoch": 0.33, "grad_norm": 2.348227024078369, "learning_rate": 1.564390888817406e-05, "loss": 0.6465, "step": 12869 }, { "epoch": 0.33, "grad_norm": 2.1365818977355957, "learning_rate": 1.5643223579420754e-05, "loss": 0.7246, "step": 12870 }, { "epoch": 0.33, "grad_norm": 1.520372748374939, "learning_rate": 1.56425382317779e-05, "loss": 0.5036, "step": 12871 }, { "epoch": 0.33, "grad_norm": 3.2416114807128906, "learning_rate": 1.564185284525022e-05, "loss": 0.6878, "step": 12872 }, { "epoch": 0.33, "grad_norm": 3.4132237434387207, "learning_rate": 1.5641167419842433e-05, "loss": 0.6518, "step": 12873 }, { "epoch": 0.33, "grad_norm": 1.996930718421936, "learning_rate": 1.5640481955559262e-05, "loss": 0.5967, "step": 12874 }, { "epoch": 0.33, "grad_norm": 13.973556518554688, "learning_rate": 1.5639796452405435e-05, "loss": 0.5337, "step": 12875 }, { "epoch": 0.33, "grad_norm": 2.521690607070923, "learning_rate": 1.5639110910385677e-05, "loss": 0.7193, "step": 12876 }, { "epoch": 0.33, "grad_norm": 2.7154898643493652, "learning_rate": 1.5638425329504708e-05, "loss": 0.527, "step": 12877 }, { "epoch": 0.33, "grad_norm": 8.737675666809082, "learning_rate": 1.563773970976725e-05, "loss": 0.802, "step": 12878 }, { "epoch": 0.33, "grad_norm": 1.5115280151367188, "learning_rate": 1.5637054051178036e-05, "loss": 0.6469, "step": 12879 }, { "epoch": 0.33, "grad_norm": 4.784313201904297, "learning_rate": 1.5636368353741783e-05, "loss": 0.6151, "step": 12880 }, { "epoch": 0.33, "grad_norm": 1.3935109376907349, "learning_rate": 1.5635682617463225e-05, "loss": 0.5473, "step": 12881 }, { "epoch": 0.33, "grad_norm": 1.5285418033599854, "learning_rate": 1.563499684234708e-05, "loss": 0.6589, "step": 12882 }, { "epoch": 0.33, "grad_norm": 2.0490806102752686, "learning_rate": 1.5634311028398078e-05, "loss": 0.6671, "step": 12883 }, { "epoch": 0.33, "grad_norm": 2.3112852573394775, "learning_rate": 1.5633625175620946e-05, "loss": 0.4217, "step": 12884 }, { "epoch": 0.33, "grad_norm": 1.7292314767837524, "learning_rate": 1.5632939284020404e-05, "loss": 0.5836, "step": 12885 }, { "epoch": 0.33, "grad_norm": 2.5095794200897217, "learning_rate": 1.563225335360119e-05, "loss": 0.7569, "step": 12886 }, { "epoch": 0.33, "grad_norm": 1.4802385568618774, "learning_rate": 1.5631567384368018e-05, "loss": 0.6193, "step": 12887 }, { "epoch": 0.33, "grad_norm": 1.3959071636199951, "learning_rate": 1.5630881376325626e-05, "loss": 0.5531, "step": 12888 }, { "epoch": 0.33, "grad_norm": 2.303248882293701, "learning_rate": 1.5630195329478738e-05, "loss": 0.7163, "step": 12889 }, { "epoch": 0.33, "grad_norm": 2.140582323074341, "learning_rate": 1.5629509243832076e-05, "loss": 0.552, "step": 12890 }, { "epoch": 0.33, "grad_norm": 2.59382963180542, "learning_rate": 1.5628823119390376e-05, "loss": 0.7407, "step": 12891 }, { "epoch": 0.33, "grad_norm": 2.488114356994629, "learning_rate": 1.5628136956158366e-05, "loss": 0.5135, "step": 12892 }, { "epoch": 0.33, "grad_norm": 4.101167678833008, "learning_rate": 1.5627450754140764e-05, "loss": 0.3717, "step": 12893 }, { "epoch": 0.33, "grad_norm": 1.2158316373825073, "learning_rate": 1.5626764513342318e-05, "loss": 0.4831, "step": 12894 }, { "epoch": 0.33, "grad_norm": 1.1647648811340332, "learning_rate": 1.562607823376774e-05, "loss": 0.5876, "step": 12895 }, { "epoch": 0.33, "grad_norm": 1.3166204690933228, "learning_rate": 1.5625391915421764e-05, "loss": 0.5071, "step": 12896 }, { "epoch": 0.33, "grad_norm": 1.8388558626174927, "learning_rate": 1.5624705558309123e-05, "loss": 0.508, "step": 12897 }, { "epoch": 0.33, "grad_norm": 2.97263765335083, "learning_rate": 1.5624019162434546e-05, "loss": 0.6046, "step": 12898 }, { "epoch": 0.33, "grad_norm": 4.350203514099121, "learning_rate": 1.562333272780276e-05, "loss": 0.6415, "step": 12899 }, { "epoch": 0.33, "grad_norm": 3.410933017730713, "learning_rate": 1.56226462544185e-05, "loss": 0.7433, "step": 12900 }, { "epoch": 0.33, "grad_norm": 1.9174832105636597, "learning_rate": 1.562195974228649e-05, "loss": 0.6483, "step": 12901 }, { "epoch": 0.33, "grad_norm": 2.0345630645751953, "learning_rate": 1.5621273191411474e-05, "loss": 0.5915, "step": 12902 }, { "epoch": 0.33, "grad_norm": 4.475225448608398, "learning_rate": 1.5620586601798165e-05, "loss": 0.58, "step": 12903 }, { "epoch": 0.33, "grad_norm": 3.148383617401123, "learning_rate": 1.561989997345131e-05, "loss": 0.6277, "step": 12904 }, { "epoch": 0.33, "grad_norm": 3.351184129714966, "learning_rate": 1.5619213306375635e-05, "loss": 0.6944, "step": 12905 }, { "epoch": 0.33, "grad_norm": 1.516305923461914, "learning_rate": 1.561852660057587e-05, "loss": 0.4531, "step": 12906 }, { "epoch": 0.33, "grad_norm": 5.624637126922607, "learning_rate": 1.561783985605675e-05, "loss": 0.4644, "step": 12907 }, { "epoch": 0.33, "grad_norm": 2.3907697200775146, "learning_rate": 1.561715307282301e-05, "loss": 0.7265, "step": 12908 }, { "epoch": 0.33, "grad_norm": 5.0726237297058105, "learning_rate": 1.5616466250879377e-05, "loss": 0.6331, "step": 12909 }, { "epoch": 0.33, "grad_norm": 3.168137788772583, "learning_rate": 1.561577939023059e-05, "loss": 0.5956, "step": 12910 }, { "epoch": 0.33, "grad_norm": 1.215067982673645, "learning_rate": 1.5615092490881377e-05, "loss": 0.5778, "step": 12911 }, { "epoch": 0.33, "grad_norm": 3.3904027938842773, "learning_rate": 1.5614405552836478e-05, "loss": 0.5427, "step": 12912 }, { "epoch": 0.33, "grad_norm": 2.167128562927246, "learning_rate": 1.561371857610062e-05, "loss": 0.7559, "step": 12913 }, { "epoch": 0.33, "grad_norm": 4.612728595733643, "learning_rate": 1.561303156067854e-05, "loss": 0.5385, "step": 12914 }, { "epoch": 0.33, "grad_norm": 2.533318519592285, "learning_rate": 1.5612344506574976e-05, "loss": 0.6574, "step": 12915 }, { "epoch": 0.33, "grad_norm": 4.631585597991943, "learning_rate": 1.5611657413794657e-05, "loss": 0.7069, "step": 12916 }, { "epoch": 0.33, "grad_norm": 2.263791084289551, "learning_rate": 1.561097028234232e-05, "loss": 0.564, "step": 12917 }, { "epoch": 0.33, "grad_norm": 6.436921119689941, "learning_rate": 1.5610283112222703e-05, "loss": 0.6484, "step": 12918 }, { "epoch": 0.33, "grad_norm": 1.788208246231079, "learning_rate": 1.560959590344054e-05, "loss": 0.602, "step": 12919 }, { "epoch": 0.33, "grad_norm": 2.304252862930298, "learning_rate": 1.5608908656000564e-05, "loss": 0.4944, "step": 12920 }, { "epoch": 0.33, "grad_norm": 3.9104416370391846, "learning_rate": 1.5608221369907516e-05, "loss": 0.5105, "step": 12921 }, { "epoch": 0.33, "grad_norm": 1.3697909116744995, "learning_rate": 1.5607534045166127e-05, "loss": 0.6657, "step": 12922 }, { "epoch": 0.33, "grad_norm": 3.421454668045044, "learning_rate": 1.560684668178114e-05, "loss": 0.5695, "step": 12923 }, { "epoch": 0.33, "grad_norm": 1.6867643594741821, "learning_rate": 1.5606159279757287e-05, "loss": 0.5195, "step": 12924 }, { "epoch": 0.33, "grad_norm": 5.000154495239258, "learning_rate": 1.5605471839099304e-05, "loss": 0.5784, "step": 12925 }, { "epoch": 0.33, "grad_norm": 1.1259877681732178, "learning_rate": 1.560478435981193e-05, "loss": 0.5432, "step": 12926 }, { "epoch": 0.33, "grad_norm": 1.5862390995025635, "learning_rate": 1.5604096841899908e-05, "loss": 0.6062, "step": 12927 }, { "epoch": 0.33, "grad_norm": 6.8421196937561035, "learning_rate": 1.560340928536797e-05, "loss": 0.5389, "step": 12928 }, { "epoch": 0.33, "grad_norm": 1.290454626083374, "learning_rate": 1.5602721690220853e-05, "loss": 0.613, "step": 12929 }, { "epoch": 0.33, "grad_norm": 2.4110238552093506, "learning_rate": 1.56020340564633e-05, "loss": 0.6368, "step": 12930 }, { "epoch": 0.33, "grad_norm": 1.816820502281189, "learning_rate": 1.5601346384100048e-05, "loss": 0.6344, "step": 12931 }, { "epoch": 0.33, "grad_norm": 1.1691887378692627, "learning_rate": 1.5600658673135836e-05, "loss": 0.5687, "step": 12932 }, { "epoch": 0.33, "grad_norm": 1.9036647081375122, "learning_rate": 1.55999709235754e-05, "loss": 0.5867, "step": 12933 }, { "epoch": 0.33, "grad_norm": 1.7968007326126099, "learning_rate": 1.5599283135423487e-05, "loss": 0.6532, "step": 12934 }, { "epoch": 0.33, "grad_norm": 3.490269422531128, "learning_rate": 1.559859530868483e-05, "loss": 0.5812, "step": 12935 }, { "epoch": 0.33, "grad_norm": 2.2026307582855225, "learning_rate": 1.5597907443364174e-05, "loss": 0.7042, "step": 12936 }, { "epoch": 0.33, "grad_norm": 3.8231279850006104, "learning_rate": 1.5597219539466254e-05, "loss": 0.6625, "step": 12937 }, { "epoch": 0.33, "grad_norm": 6.8201799392700195, "learning_rate": 1.5596531596995815e-05, "loss": 0.7267, "step": 12938 }, { "epoch": 0.33, "grad_norm": 5.964277267456055, "learning_rate": 1.5595843615957594e-05, "loss": 0.5524, "step": 12939 }, { "epoch": 0.33, "grad_norm": 3.84161639213562, "learning_rate": 1.5595155596356338e-05, "loss": 0.8123, "step": 12940 }, { "epoch": 0.33, "grad_norm": 1.6659836769104004, "learning_rate": 1.5594467538196784e-05, "loss": 0.5649, "step": 12941 }, { "epoch": 0.33, "grad_norm": 1.5136268138885498, "learning_rate": 1.559377944148367e-05, "loss": 0.5528, "step": 12942 }, { "epoch": 0.33, "grad_norm": 1.5963423252105713, "learning_rate": 1.5593091306221747e-05, "loss": 0.4762, "step": 12943 }, { "epoch": 0.33, "grad_norm": 1.3328757286071777, "learning_rate": 1.5592403132415754e-05, "loss": 0.6409, "step": 12944 }, { "epoch": 0.33, "grad_norm": 1.6550081968307495, "learning_rate": 1.559171492007043e-05, "loss": 0.8059, "step": 12945 }, { "epoch": 0.33, "grad_norm": 3.6466479301452637, "learning_rate": 1.5591026669190518e-05, "loss": 0.7778, "step": 12946 }, { "epoch": 0.33, "grad_norm": 1.5985547304153442, "learning_rate": 1.5590338379780764e-05, "loss": 0.594, "step": 12947 }, { "epoch": 0.33, "grad_norm": 1.6455774307250977, "learning_rate": 1.558965005184591e-05, "loss": 0.7753, "step": 12948 }, { "epoch": 0.33, "grad_norm": 10.487652778625488, "learning_rate": 1.55889616853907e-05, "loss": 0.5765, "step": 12949 }, { "epoch": 0.33, "grad_norm": 1.6108222007751465, "learning_rate": 1.558827328041988e-05, "loss": 0.7961, "step": 12950 }, { "epoch": 0.33, "grad_norm": 1.989035725593567, "learning_rate": 1.5587584836938184e-05, "loss": 0.6179, "step": 12951 }, { "epoch": 0.33, "grad_norm": 2.270947217941284, "learning_rate": 1.558689635495037e-05, "loss": 0.6944, "step": 12952 }, { "epoch": 0.33, "grad_norm": 2.2160918712615967, "learning_rate": 1.5586207834461178e-05, "loss": 0.644, "step": 12953 }, { "epoch": 0.33, "grad_norm": 2.0054540634155273, "learning_rate": 1.5585519275475345e-05, "loss": 0.6924, "step": 12954 }, { "epoch": 0.33, "grad_norm": 1.5119227170944214, "learning_rate": 1.5584830677997628e-05, "loss": 0.5568, "step": 12955 }, { "epoch": 0.33, "grad_norm": 1.3496787548065186, "learning_rate": 1.5584142042032763e-05, "loss": 0.612, "step": 12956 }, { "epoch": 0.33, "grad_norm": 3.2594995498657227, "learning_rate": 1.55834533675855e-05, "loss": 0.6653, "step": 12957 }, { "epoch": 0.33, "grad_norm": 1.6467225551605225, "learning_rate": 1.5582764654660588e-05, "loss": 0.4933, "step": 12958 }, { "epoch": 0.33, "grad_norm": 1.088258981704712, "learning_rate": 1.5582075903262765e-05, "loss": 0.5014, "step": 12959 }, { "epoch": 0.33, "grad_norm": 7.138655185699463, "learning_rate": 1.5581387113396784e-05, "loss": 0.6033, "step": 12960 }, { "epoch": 0.33, "grad_norm": 3.1907315254211426, "learning_rate": 1.558069828506739e-05, "loss": 0.6744, "step": 12961 }, { "epoch": 0.33, "grad_norm": 3.5963218212127686, "learning_rate": 1.5580009418279324e-05, "loss": 0.6576, "step": 12962 }, { "epoch": 0.33, "grad_norm": 2.482311248779297, "learning_rate": 1.5579320513037346e-05, "loss": 0.6086, "step": 12963 }, { "epoch": 0.33, "grad_norm": 4.484996795654297, "learning_rate": 1.5578631569346194e-05, "loss": 0.7473, "step": 12964 }, { "epoch": 0.33, "grad_norm": 5.899898052215576, "learning_rate": 1.5577942587210616e-05, "loss": 0.6562, "step": 12965 }, { "epoch": 0.33, "grad_norm": 1.063705563545227, "learning_rate": 1.5577253566635364e-05, "loss": 0.4096, "step": 12966 }, { "epoch": 0.33, "grad_norm": 1.1281527280807495, "learning_rate": 1.5576564507625186e-05, "loss": 0.6001, "step": 12967 }, { "epoch": 0.33, "grad_norm": 1.6435017585754395, "learning_rate": 1.5575875410184825e-05, "loss": 0.4565, "step": 12968 }, { "epoch": 0.33, "grad_norm": 1.3068864345550537, "learning_rate": 1.5575186274319038e-05, "loss": 0.4985, "step": 12969 }, { "epoch": 0.33, "grad_norm": 1.4915038347244263, "learning_rate": 1.5574497100032565e-05, "loss": 0.6219, "step": 12970 }, { "epoch": 0.33, "grad_norm": 2.053819417953491, "learning_rate": 1.5573807887330163e-05, "loss": 0.7814, "step": 12971 }, { "epoch": 0.33, "grad_norm": 2.6176133155822754, "learning_rate": 1.5573118636216578e-05, "loss": 0.6558, "step": 12972 }, { "epoch": 0.33, "grad_norm": 1.9687517881393433, "learning_rate": 1.557242934669656e-05, "loss": 0.5522, "step": 12973 }, { "epoch": 0.33, "grad_norm": 3.682448387145996, "learning_rate": 1.5571740018774863e-05, "loss": 0.5117, "step": 12974 }, { "epoch": 0.33, "grad_norm": 1.7191261053085327, "learning_rate": 1.5571050652456232e-05, "loss": 0.6646, "step": 12975 }, { "epoch": 0.33, "grad_norm": 1.2780208587646484, "learning_rate": 1.5570361247745423e-05, "loss": 0.4948, "step": 12976 }, { "epoch": 0.33, "grad_norm": 3.8409173488616943, "learning_rate": 1.5569671804647183e-05, "loss": 0.8261, "step": 12977 }, { "epoch": 0.33, "grad_norm": 2.0802671909332275, "learning_rate": 1.5568982323166258e-05, "loss": 0.4859, "step": 12978 }, { "epoch": 0.33, "grad_norm": 1.2935949563980103, "learning_rate": 1.5568292803307415e-05, "loss": 0.6877, "step": 12979 }, { "epoch": 0.33, "grad_norm": 1.436801791191101, "learning_rate": 1.5567603245075388e-05, "loss": 0.4868, "step": 12980 }, { "epoch": 0.33, "grad_norm": 1.444520354270935, "learning_rate": 1.556691364847494e-05, "loss": 0.6239, "step": 12981 }, { "epoch": 0.33, "grad_norm": 4.105180740356445, "learning_rate": 1.5566224013510822e-05, "loss": 0.6969, "step": 12982 }, { "epoch": 0.33, "grad_norm": 1.4635844230651855, "learning_rate": 1.5565534340187786e-05, "loss": 0.6311, "step": 12983 }, { "epoch": 0.33, "grad_norm": 1.4358488321304321, "learning_rate": 1.5564844628510584e-05, "loss": 0.5027, "step": 12984 }, { "epoch": 0.33, "grad_norm": 3.4084250926971436, "learning_rate": 1.556415487848397e-05, "loss": 0.5993, "step": 12985 }, { "epoch": 0.33, "grad_norm": 1.6436740159988403, "learning_rate": 1.556346509011269e-05, "loss": 0.6947, "step": 12986 }, { "epoch": 0.33, "grad_norm": 2.400696039199829, "learning_rate": 1.5562775263401508e-05, "loss": 0.5348, "step": 12987 }, { "epoch": 0.33, "grad_norm": 9.725274085998535, "learning_rate": 1.5562085398355177e-05, "loss": 0.6788, "step": 12988 }, { "epoch": 0.33, "grad_norm": 1.4619518518447876, "learning_rate": 1.556139549497844e-05, "loss": 0.6844, "step": 12989 }, { "epoch": 0.33, "grad_norm": 3.0909128189086914, "learning_rate": 1.5560705553276065e-05, "loss": 0.6284, "step": 12990 }, { "epoch": 0.33, "grad_norm": 1.4353357553482056, "learning_rate": 1.55600155732528e-05, "loss": 0.5493, "step": 12991 }, { "epoch": 0.33, "grad_norm": 4.112400054931641, "learning_rate": 1.5559325554913402e-05, "loss": 0.6745, "step": 12992 }, { "epoch": 0.33, "grad_norm": 1.4842015504837036, "learning_rate": 1.555863549826262e-05, "loss": 0.5882, "step": 12993 }, { "epoch": 0.33, "grad_norm": 3.654085874557495, "learning_rate": 1.555794540330522e-05, "loss": 0.7636, "step": 12994 }, { "epoch": 0.33, "grad_norm": 1.7804641723632812, "learning_rate": 1.555725527004595e-05, "loss": 0.5848, "step": 12995 }, { "epoch": 0.33, "grad_norm": 1.5606709718704224, "learning_rate": 1.555656509848957e-05, "loss": 0.6383, "step": 12996 }, { "epoch": 0.33, "grad_norm": 2.970205545425415, "learning_rate": 1.5555874888640832e-05, "loss": 0.6185, "step": 12997 }, { "epoch": 0.33, "grad_norm": 4.104419231414795, "learning_rate": 1.5555184640504494e-05, "loss": 0.7755, "step": 12998 }, { "epoch": 0.33, "grad_norm": 1.746667742729187, "learning_rate": 1.5554494354085317e-05, "loss": 0.6434, "step": 12999 }, { "epoch": 0.33, "grad_norm": 2.672060489654541, "learning_rate": 1.555380402938805e-05, "loss": 0.6319, "step": 13000 }, { "epoch": 0.33, "grad_norm": 1.5294079780578613, "learning_rate": 1.555311366641746e-05, "loss": 0.532, "step": 13001 }, { "epoch": 0.33, "grad_norm": 2.0492734909057617, "learning_rate": 1.5552423265178297e-05, "loss": 0.7164, "step": 13002 }, { "epoch": 0.33, "grad_norm": 1.8395780324935913, "learning_rate": 1.555173282567532e-05, "loss": 0.6184, "step": 13003 }, { "epoch": 0.33, "grad_norm": 4.851170539855957, "learning_rate": 1.5551042347913286e-05, "loss": 0.7066, "step": 13004 }, { "epoch": 0.33, "grad_norm": 2.7051472663879395, "learning_rate": 1.5550351831896962e-05, "loss": 0.6627, "step": 13005 }, { "epoch": 0.33, "grad_norm": 1.6914554834365845, "learning_rate": 1.55496612776311e-05, "loss": 0.5348, "step": 13006 }, { "epoch": 0.33, "grad_norm": 1.9026689529418945, "learning_rate": 1.5548970685120452e-05, "loss": 0.6153, "step": 13007 }, { "epoch": 0.33, "grad_norm": 2.61772084236145, "learning_rate": 1.5548280054369788e-05, "loss": 0.7036, "step": 13008 }, { "epoch": 0.33, "grad_norm": 1.2808449268341064, "learning_rate": 1.5547589385383867e-05, "loss": 0.6761, "step": 13009 }, { "epoch": 0.33, "grad_norm": 2.405487537384033, "learning_rate": 1.5546898678167442e-05, "loss": 0.7118, "step": 13010 }, { "epoch": 0.33, "grad_norm": 3.8724615573883057, "learning_rate": 1.5546207932725278e-05, "loss": 0.7013, "step": 13011 }, { "epoch": 0.33, "grad_norm": 2.3160147666931152, "learning_rate": 1.554551714906213e-05, "loss": 0.6708, "step": 13012 }, { "epoch": 0.33, "grad_norm": 1.608890414237976, "learning_rate": 1.554482632718276e-05, "loss": 0.6129, "step": 13013 }, { "epoch": 0.33, "grad_norm": 2.7606208324432373, "learning_rate": 1.554413546709194e-05, "loss": 0.5699, "step": 13014 }, { "epoch": 0.33, "grad_norm": 2.057400703430176, "learning_rate": 1.5543444568794416e-05, "loss": 0.733, "step": 13015 }, { "epoch": 0.33, "grad_norm": 1.9443950653076172, "learning_rate": 1.554275363229495e-05, "loss": 0.4401, "step": 13016 }, { "epoch": 0.33, "grad_norm": 2.050853967666626, "learning_rate": 1.5542062657598315e-05, "loss": 0.691, "step": 13017 }, { "epoch": 0.33, "grad_norm": 1.421044111251831, "learning_rate": 1.5541371644709266e-05, "loss": 0.6836, "step": 13018 }, { "epoch": 0.33, "grad_norm": 2.4806854724884033, "learning_rate": 1.5540680593632562e-05, "loss": 0.5483, "step": 13019 }, { "epoch": 0.33, "grad_norm": 3.6780147552490234, "learning_rate": 1.553998950437297e-05, "loss": 0.5401, "step": 13020 }, { "epoch": 0.33, "grad_norm": 4.7826972007751465, "learning_rate": 1.553929837693525e-05, "loss": 0.5003, "step": 13021 }, { "epoch": 0.33, "grad_norm": 1.3190455436706543, "learning_rate": 1.5538607211324163e-05, "loss": 0.6279, "step": 13022 }, { "epoch": 0.33, "grad_norm": 1.9506915807724, "learning_rate": 1.553791600754448e-05, "loss": 0.4713, "step": 13023 }, { "epoch": 0.33, "grad_norm": 1.6977050304412842, "learning_rate": 1.5537224765600955e-05, "loss": 0.5374, "step": 13024 }, { "epoch": 0.33, "grad_norm": 3.525792121887207, "learning_rate": 1.553653348549836e-05, "loss": 0.5288, "step": 13025 }, { "epoch": 0.33, "grad_norm": 1.792995572090149, "learning_rate": 1.5535842167241454e-05, "loss": 0.6553, "step": 13026 }, { "epoch": 0.33, "grad_norm": 3.5857431888580322, "learning_rate": 1.5535150810834997e-05, "loss": 0.6476, "step": 13027 }, { "epoch": 0.33, "grad_norm": 5.447162628173828, "learning_rate": 1.5534459416283764e-05, "loss": 0.6403, "step": 13028 }, { "epoch": 0.33, "grad_norm": 2.5522875785827637, "learning_rate": 1.553376798359251e-05, "loss": 0.6862, "step": 13029 }, { "epoch": 0.33, "grad_norm": 2.0896079540252686, "learning_rate": 1.5533076512766004e-05, "loss": 0.7371, "step": 13030 }, { "epoch": 0.33, "grad_norm": 3.5984134674072266, "learning_rate": 1.5532385003809012e-05, "loss": 0.7402, "step": 13031 }, { "epoch": 0.33, "grad_norm": 1.7484915256500244, "learning_rate": 1.5531693456726298e-05, "loss": 0.6219, "step": 13032 }, { "epoch": 0.33, "grad_norm": 2.614987850189209, "learning_rate": 1.553100187152263e-05, "loss": 0.7915, "step": 13033 }, { "epoch": 0.33, "grad_norm": 2.364398956298828, "learning_rate": 1.5530310248202768e-05, "loss": 0.7371, "step": 13034 }, { "epoch": 0.33, "grad_norm": 2.015162467956543, "learning_rate": 1.5529618586771483e-05, "loss": 0.6466, "step": 13035 }, { "epoch": 0.33, "grad_norm": 3.2171363830566406, "learning_rate": 1.5528926887233542e-05, "loss": 0.5559, "step": 13036 }, { "epoch": 0.33, "grad_norm": 1.9438891410827637, "learning_rate": 1.552823514959371e-05, "loss": 0.6697, "step": 13037 }, { "epoch": 0.33, "grad_norm": 1.4454354047775269, "learning_rate": 1.5527543373856756e-05, "loss": 0.5594, "step": 13038 }, { "epoch": 0.33, "grad_norm": 3.3020987510681152, "learning_rate": 1.5526851560027446e-05, "loss": 0.6583, "step": 13039 }, { "epoch": 0.33, "grad_norm": 2.887625217437744, "learning_rate": 1.5526159708110547e-05, "loss": 0.6503, "step": 13040 }, { "epoch": 0.33, "grad_norm": 2.8664801120758057, "learning_rate": 1.5525467818110823e-05, "loss": 0.6775, "step": 13041 }, { "epoch": 0.33, "grad_norm": 1.7633076906204224, "learning_rate": 1.552477589003305e-05, "loss": 0.6149, "step": 13042 }, { "epoch": 0.33, "grad_norm": 1.3658722639083862, "learning_rate": 1.5524083923881993e-05, "loss": 0.6042, "step": 13043 }, { "epoch": 0.33, "grad_norm": 2.081821918487549, "learning_rate": 1.5523391919662417e-05, "loss": 0.7038, "step": 13044 }, { "epoch": 0.33, "grad_norm": 1.4993146657943726, "learning_rate": 1.5522699877379095e-05, "loss": 0.555, "step": 13045 }, { "epoch": 0.33, "grad_norm": 1.4050045013427734, "learning_rate": 1.55220077970368e-05, "loss": 0.524, "step": 13046 }, { "epoch": 0.33, "grad_norm": 1.3536596298217773, "learning_rate": 1.5521315678640295e-05, "loss": 0.5921, "step": 13047 }, { "epoch": 0.33, "grad_norm": 3.7510063648223877, "learning_rate": 1.5520623522194346e-05, "loss": 0.5177, "step": 13048 }, { "epoch": 0.33, "grad_norm": 1.471580982208252, "learning_rate": 1.5519931327703734e-05, "loss": 0.4728, "step": 13049 }, { "epoch": 0.33, "grad_norm": 1.3721165657043457, "learning_rate": 1.551923909517322e-05, "loss": 0.462, "step": 13050 }, { "epoch": 0.33, "grad_norm": 2.2519545555114746, "learning_rate": 1.5518546824607577e-05, "loss": 0.6813, "step": 13051 }, { "epoch": 0.33, "grad_norm": 2.892481803894043, "learning_rate": 1.5517854516011577e-05, "loss": 0.7711, "step": 13052 }, { "epoch": 0.33, "grad_norm": 1.483802080154419, "learning_rate": 1.551716216938999e-05, "loss": 0.5398, "step": 13053 }, { "epoch": 0.33, "grad_norm": 3.439973831176758, "learning_rate": 1.551646978474759e-05, "loss": 0.6442, "step": 13054 }, { "epoch": 0.33, "grad_norm": 1.5814943313598633, "learning_rate": 1.5515777362089145e-05, "loss": 0.5793, "step": 13055 }, { "epoch": 0.33, "grad_norm": 2.549743890762329, "learning_rate": 1.5515084901419425e-05, "loss": 0.7061, "step": 13056 }, { "epoch": 0.33, "grad_norm": 2.6976680755615234, "learning_rate": 1.5514392402743207e-05, "loss": 0.4997, "step": 13057 }, { "epoch": 0.33, "grad_norm": 2.581995964050293, "learning_rate": 1.551369986606526e-05, "loss": 0.8692, "step": 13058 }, { "epoch": 0.33, "grad_norm": 1.3705949783325195, "learning_rate": 1.5513007291390363e-05, "loss": 0.4539, "step": 13059 }, { "epoch": 0.33, "grad_norm": 5.772825241088867, "learning_rate": 1.5512314678723278e-05, "loss": 0.631, "step": 13060 }, { "epoch": 0.33, "grad_norm": 5.717283248901367, "learning_rate": 1.5511622028068784e-05, "loss": 0.7758, "step": 13061 }, { "epoch": 0.33, "grad_norm": 2.114914894104004, "learning_rate": 1.5510929339431652e-05, "loss": 0.64, "step": 13062 }, { "epoch": 0.33, "grad_norm": 1.5398350954055786, "learning_rate": 1.5510236612816663e-05, "loss": 0.6756, "step": 13063 }, { "epoch": 0.33, "grad_norm": 1.4488089084625244, "learning_rate": 1.550954384822858e-05, "loss": 0.5292, "step": 13064 }, { "epoch": 0.33, "grad_norm": 1.463700771331787, "learning_rate": 1.5508851045672185e-05, "loss": 0.6052, "step": 13065 }, { "epoch": 0.33, "grad_norm": 1.8424201011657715, "learning_rate": 1.5508158205152247e-05, "loss": 0.7066, "step": 13066 }, { "epoch": 0.33, "grad_norm": 1.532387137413025, "learning_rate": 1.5507465326673544e-05, "loss": 0.4321, "step": 13067 }, { "epoch": 0.33, "grad_norm": 2.8983473777770996, "learning_rate": 1.550677241024085e-05, "loss": 0.7351, "step": 13068 }, { "epoch": 0.33, "grad_norm": 1.1515998840332031, "learning_rate": 1.5506079455858942e-05, "loss": 0.5853, "step": 13069 }, { "epoch": 0.33, "grad_norm": 2.12955641746521, "learning_rate": 1.550538646353259e-05, "loss": 0.4856, "step": 13070 }, { "epoch": 0.34, "grad_norm": 1.258371353149414, "learning_rate": 1.5504693433266577e-05, "loss": 0.5709, "step": 13071 }, { "epoch": 0.34, "grad_norm": 2.0650675296783447, "learning_rate": 1.5504000365065674e-05, "loss": 0.5462, "step": 13072 }, { "epoch": 0.34, "grad_norm": 1.0552476644515991, "learning_rate": 1.5503307258934662e-05, "loss": 0.4701, "step": 13073 }, { "epoch": 0.34, "grad_norm": 1.504506230354309, "learning_rate": 1.550261411487831e-05, "loss": 0.7572, "step": 13074 }, { "epoch": 0.34, "grad_norm": 1.780427098274231, "learning_rate": 1.5501920932901396e-05, "loss": 0.4896, "step": 13075 }, { "epoch": 0.34, "grad_norm": 1.586448073387146, "learning_rate": 1.5501227713008703e-05, "loss": 0.6094, "step": 13076 }, { "epoch": 0.34, "grad_norm": 3.3704545497894287, "learning_rate": 1.5500534455205004e-05, "loss": 0.5497, "step": 13077 }, { "epoch": 0.34, "grad_norm": 1.9997131824493408, "learning_rate": 1.549984115949508e-05, "loss": 0.5287, "step": 13078 }, { "epoch": 0.34, "grad_norm": 1.409042239189148, "learning_rate": 1.5499147825883704e-05, "loss": 0.6175, "step": 13079 }, { "epoch": 0.34, "grad_norm": 3.3191423416137695, "learning_rate": 1.5498454454375654e-05, "loss": 0.4946, "step": 13080 }, { "epoch": 0.34, "grad_norm": 8.545674324035645, "learning_rate": 1.549776104497571e-05, "loss": 0.5456, "step": 13081 }, { "epoch": 0.34, "grad_norm": 1.7271028757095337, "learning_rate": 1.5497067597688658e-05, "loss": 0.6627, "step": 13082 }, { "epoch": 0.34, "grad_norm": 1.7145882844924927, "learning_rate": 1.5496374112519262e-05, "loss": 0.611, "step": 13083 }, { "epoch": 0.34, "grad_norm": 2.926914691925049, "learning_rate": 1.5495680589472312e-05, "loss": 0.7434, "step": 13084 }, { "epoch": 0.34, "grad_norm": 4.779566287994385, "learning_rate": 1.5494987028552584e-05, "loss": 0.6361, "step": 13085 }, { "epoch": 0.34, "grad_norm": 2.4566235542297363, "learning_rate": 1.549429342976486e-05, "loss": 0.5828, "step": 13086 }, { "epoch": 0.34, "grad_norm": 1.9867079257965088, "learning_rate": 1.5493599793113914e-05, "loss": 0.7245, "step": 13087 }, { "epoch": 0.34, "grad_norm": 3.2556381225585938, "learning_rate": 1.549290611860453e-05, "loss": 0.5749, "step": 13088 }, { "epoch": 0.34, "grad_norm": 2.153374195098877, "learning_rate": 1.549221240624149e-05, "loss": 0.5504, "step": 13089 }, { "epoch": 0.34, "grad_norm": 1.5527764558792114, "learning_rate": 1.549151865602957e-05, "loss": 0.4458, "step": 13090 }, { "epoch": 0.34, "grad_norm": 6.530335426330566, "learning_rate": 1.5490824867973553e-05, "loss": 0.6865, "step": 13091 }, { "epoch": 0.34, "grad_norm": 1.5267211198806763, "learning_rate": 1.5490131042078226e-05, "loss": 0.4629, "step": 13092 }, { "epoch": 0.34, "grad_norm": 4.206407070159912, "learning_rate": 1.548943717834836e-05, "loss": 0.7608, "step": 13093 }, { "epoch": 0.34, "grad_norm": 1.6890735626220703, "learning_rate": 1.5488743276788745e-05, "loss": 0.6203, "step": 13094 }, { "epoch": 0.34, "grad_norm": 1.530720829963684, "learning_rate": 1.5488049337404155e-05, "loss": 0.5119, "step": 13095 }, { "epoch": 0.34, "grad_norm": 2.631683111190796, "learning_rate": 1.5487355360199382e-05, "loss": 0.6834, "step": 13096 }, { "epoch": 0.34, "grad_norm": 2.4778988361358643, "learning_rate": 1.54866613451792e-05, "loss": 0.535, "step": 13097 }, { "epoch": 0.34, "grad_norm": 5.931501865386963, "learning_rate": 1.5485967292348397e-05, "loss": 0.5885, "step": 13098 }, { "epoch": 0.34, "grad_norm": 2.524890422821045, "learning_rate": 1.5485273201711754e-05, "loss": 0.5726, "step": 13099 }, { "epoch": 0.34, "grad_norm": 2.634843111038208, "learning_rate": 1.5484579073274052e-05, "loss": 0.5949, "step": 13100 }, { "epoch": 0.34, "grad_norm": 1.5738580226898193, "learning_rate": 1.548388490704008e-05, "loss": 0.4807, "step": 13101 }, { "epoch": 0.34, "grad_norm": 2.617979049682617, "learning_rate": 1.5483190703014616e-05, "loss": 0.6504, "step": 13102 }, { "epoch": 0.34, "grad_norm": 5.920228481292725, "learning_rate": 1.548249646120245e-05, "loss": 0.5747, "step": 13103 }, { "epoch": 0.34, "grad_norm": 2.1241660118103027, "learning_rate": 1.548180218160836e-05, "loss": 0.5061, "step": 13104 }, { "epoch": 0.34, "grad_norm": 1.6521388292312622, "learning_rate": 1.5481107864237134e-05, "loss": 0.4627, "step": 13105 }, { "epoch": 0.34, "grad_norm": 3.319242477416992, "learning_rate": 1.5480413509093555e-05, "loss": 0.649, "step": 13106 }, { "epoch": 0.34, "grad_norm": 1.647267460823059, "learning_rate": 1.547971911618241e-05, "loss": 0.6382, "step": 13107 }, { "epoch": 0.34, "grad_norm": 4.562036991119385, "learning_rate": 1.547902468550848e-05, "loss": 0.6008, "step": 13108 }, { "epoch": 0.34, "grad_norm": 2.0302648544311523, "learning_rate": 1.547833021707656e-05, "loss": 0.7004, "step": 13109 }, { "epoch": 0.34, "grad_norm": 5.055891036987305, "learning_rate": 1.547763571089143e-05, "loss": 0.9095, "step": 13110 }, { "epoch": 0.34, "grad_norm": 1.686977505683899, "learning_rate": 1.5476941166957874e-05, "loss": 0.5531, "step": 13111 }, { "epoch": 0.34, "grad_norm": 3.3196728229522705, "learning_rate": 1.547624658528068e-05, "loss": 0.644, "step": 13112 }, { "epoch": 0.34, "grad_norm": 1.86263108253479, "learning_rate": 1.5475551965864633e-05, "loss": 0.5592, "step": 13113 }, { "epoch": 0.34, "grad_norm": 2.656693935394287, "learning_rate": 1.5474857308714526e-05, "loss": 0.6691, "step": 13114 }, { "epoch": 0.34, "grad_norm": 1.8247121572494507, "learning_rate": 1.547416261383514e-05, "loss": 0.5916, "step": 13115 }, { "epoch": 0.34, "grad_norm": 1.3460490703582764, "learning_rate": 1.5473467881231265e-05, "loss": 0.5934, "step": 13116 }, { "epoch": 0.34, "grad_norm": 6.122802257537842, "learning_rate": 1.5472773110907687e-05, "loss": 0.5584, "step": 13117 }, { "epoch": 0.34, "grad_norm": 2.243006706237793, "learning_rate": 1.5472078302869197e-05, "loss": 0.696, "step": 13118 }, { "epoch": 0.34, "grad_norm": 3.4870717525482178, "learning_rate": 1.547138345712058e-05, "loss": 0.588, "step": 13119 }, { "epoch": 0.34, "grad_norm": 1.1455585956573486, "learning_rate": 1.5470688573666628e-05, "loss": 0.6667, "step": 13120 }, { "epoch": 0.34, "grad_norm": 2.166229486465454, "learning_rate": 1.5469993652512126e-05, "loss": 0.5648, "step": 13121 }, { "epoch": 0.34, "grad_norm": 1.746660828590393, "learning_rate": 1.5469298693661863e-05, "loss": 0.5025, "step": 13122 }, { "epoch": 0.34, "grad_norm": 1.3513659238815308, "learning_rate": 1.546860369712063e-05, "loss": 0.4939, "step": 13123 }, { "epoch": 0.34, "grad_norm": 0.9375208616256714, "learning_rate": 1.5467908662893216e-05, "loss": 0.6074, "step": 13124 }, { "epoch": 0.34, "grad_norm": 3.2467129230499268, "learning_rate": 1.546721359098441e-05, "loss": 0.5969, "step": 13125 }, { "epoch": 0.34, "grad_norm": 3.466539144515991, "learning_rate": 1.5466518481399003e-05, "loss": 0.575, "step": 13126 }, { "epoch": 0.34, "grad_norm": 1.150790810585022, "learning_rate": 1.546582333414179e-05, "loss": 0.6437, "step": 13127 }, { "epoch": 0.34, "grad_norm": 1.5086947679519653, "learning_rate": 1.546512814921755e-05, "loss": 0.6302, "step": 13128 }, { "epoch": 0.34, "grad_norm": 2.318868398666382, "learning_rate": 1.546443292663108e-05, "loss": 0.7554, "step": 13129 }, { "epoch": 0.34, "grad_norm": 2.5206098556518555, "learning_rate": 1.5463737666387176e-05, "loss": 0.4524, "step": 13130 }, { "epoch": 0.34, "grad_norm": 4.840446949005127, "learning_rate": 1.5463042368490624e-05, "loss": 0.6993, "step": 13131 }, { "epoch": 0.34, "grad_norm": 1.3663297891616821, "learning_rate": 1.5462347032946215e-05, "loss": 0.6163, "step": 13132 }, { "epoch": 0.34, "grad_norm": 1.4703242778778076, "learning_rate": 1.5461651659758744e-05, "loss": 0.5118, "step": 13133 }, { "epoch": 0.34, "grad_norm": 2.078141927719116, "learning_rate": 1.5460956248932998e-05, "loss": 0.5815, "step": 13134 }, { "epoch": 0.34, "grad_norm": 1.6951687335968018, "learning_rate": 1.546026080047377e-05, "loss": 0.5363, "step": 13135 }, { "epoch": 0.34, "grad_norm": 6.605808734893799, "learning_rate": 1.545956531438586e-05, "loss": 1.077, "step": 13136 }, { "epoch": 0.34, "grad_norm": 3.2481181621551514, "learning_rate": 1.5458869790674052e-05, "loss": 0.6776, "step": 13137 }, { "epoch": 0.34, "grad_norm": 1.4065155982971191, "learning_rate": 1.545817422934315e-05, "loss": 0.5256, "step": 13138 }, { "epoch": 0.34, "grad_norm": 1.3163647651672363, "learning_rate": 1.5457478630397934e-05, "loss": 0.4657, "step": 13139 }, { "epoch": 0.34, "grad_norm": 1.527330756187439, "learning_rate": 1.5456782993843206e-05, "loss": 0.5842, "step": 13140 }, { "epoch": 0.34, "grad_norm": 7.306219100952148, "learning_rate": 1.5456087319683757e-05, "loss": 0.6418, "step": 13141 }, { "epoch": 0.34, "grad_norm": 2.0437259674072266, "learning_rate": 1.5455391607924382e-05, "loss": 0.673, "step": 13142 }, { "epoch": 0.34, "grad_norm": 2.1383025646209717, "learning_rate": 1.5454695858569877e-05, "loss": 0.6923, "step": 13143 }, { "epoch": 0.34, "grad_norm": 3.593646287918091, "learning_rate": 1.5454000071625036e-05, "loss": 0.4027, "step": 13144 }, { "epoch": 0.34, "grad_norm": 1.7044826745986938, "learning_rate": 1.545330424709465e-05, "loss": 0.5917, "step": 13145 }, { "epoch": 0.34, "grad_norm": 2.1398491859436035, "learning_rate": 1.545260838498352e-05, "loss": 0.4449, "step": 13146 }, { "epoch": 0.34, "grad_norm": 1.653067708015442, "learning_rate": 1.5451912485296437e-05, "loss": 0.66, "step": 13147 }, { "epoch": 0.34, "grad_norm": 1.4912457466125488, "learning_rate": 1.5451216548038197e-05, "loss": 0.6912, "step": 13148 }, { "epoch": 0.34, "grad_norm": 1.8446574211120605, "learning_rate": 1.5450520573213598e-05, "loss": 0.5811, "step": 13149 }, { "epoch": 0.34, "grad_norm": 1.6737388372421265, "learning_rate": 1.5449824560827438e-05, "loss": 0.6001, "step": 13150 }, { "epoch": 0.34, "grad_norm": 2.1114659309387207, "learning_rate": 1.544912851088451e-05, "loss": 0.6547, "step": 13151 }, { "epoch": 0.34, "grad_norm": 1.1710679531097412, "learning_rate": 1.544843242338961e-05, "loss": 0.4254, "step": 13152 }, { "epoch": 0.34, "grad_norm": 1.6087019443511963, "learning_rate": 1.544773629834754e-05, "loss": 0.5674, "step": 13153 }, { "epoch": 0.34, "grad_norm": 1.8484952449798584, "learning_rate": 1.544704013576309e-05, "loss": 0.564, "step": 13154 }, { "epoch": 0.34, "grad_norm": 1.9138274192810059, "learning_rate": 1.5446343935641066e-05, "loss": 0.5626, "step": 13155 }, { "epoch": 0.34, "grad_norm": 3.0259814262390137, "learning_rate": 1.5445647697986257e-05, "loss": 0.6346, "step": 13156 }, { "epoch": 0.34, "grad_norm": 1.8525991439819336, "learning_rate": 1.5444951422803466e-05, "loss": 0.6021, "step": 13157 }, { "epoch": 0.34, "grad_norm": 1.7876052856445312, "learning_rate": 1.5444255110097492e-05, "loss": 0.584, "step": 13158 }, { "epoch": 0.34, "grad_norm": 2.0861899852752686, "learning_rate": 1.5443558759873132e-05, "loss": 0.5041, "step": 13159 }, { "epoch": 0.34, "grad_norm": 1.3452239036560059, "learning_rate": 1.5442862372135188e-05, "loss": 0.5921, "step": 13160 }, { "epoch": 0.34, "grad_norm": 1.545202612876892, "learning_rate": 1.5442165946888455e-05, "loss": 0.5112, "step": 13161 }, { "epoch": 0.34, "grad_norm": 2.0681424140930176, "learning_rate": 1.544146948413773e-05, "loss": 0.6783, "step": 13162 }, { "epoch": 0.34, "grad_norm": 2.8772518634796143, "learning_rate": 1.5440772983887816e-05, "loss": 0.7356, "step": 13163 }, { "epoch": 0.34, "grad_norm": 1.4227254390716553, "learning_rate": 1.5440076446143515e-05, "loss": 0.5997, "step": 13164 }, { "epoch": 0.34, "grad_norm": 2.1859211921691895, "learning_rate": 1.5439379870909626e-05, "loss": 0.441, "step": 13165 }, { "epoch": 0.34, "grad_norm": 1.5046604871749878, "learning_rate": 1.543868325819095e-05, "loss": 0.5667, "step": 13166 }, { "epoch": 0.34, "grad_norm": 1.9213535785675049, "learning_rate": 1.543798660799228e-05, "loss": 0.7421, "step": 13167 }, { "epoch": 0.34, "grad_norm": 4.063724040985107, "learning_rate": 1.5437289920318423e-05, "loss": 0.7608, "step": 13168 }, { "epoch": 0.34, "grad_norm": 3.623558521270752, "learning_rate": 1.5436593195174184e-05, "loss": 0.7635, "step": 13169 }, { "epoch": 0.34, "grad_norm": 1.3708416223526, "learning_rate": 1.5435896432564356e-05, "loss": 0.4935, "step": 13170 }, { "epoch": 0.34, "grad_norm": 2.8233513832092285, "learning_rate": 1.543519963249375e-05, "loss": 0.6417, "step": 13171 }, { "epoch": 0.34, "grad_norm": 2.0604324340820312, "learning_rate": 1.543450279496716e-05, "loss": 0.7902, "step": 13172 }, { "epoch": 0.34, "grad_norm": 2.036893367767334, "learning_rate": 1.5433805919989394e-05, "loss": 0.4981, "step": 13173 }, { "epoch": 0.34, "grad_norm": 1.418429970741272, "learning_rate": 1.5433109007565245e-05, "loss": 0.6175, "step": 13174 }, { "epoch": 0.34, "grad_norm": 2.029829978942871, "learning_rate": 1.543241205769953e-05, "loss": 0.4356, "step": 13175 }, { "epoch": 0.34, "grad_norm": 10.2883939743042, "learning_rate": 1.5431715070397038e-05, "loss": 0.4682, "step": 13176 }, { "epoch": 0.34, "grad_norm": 2.8103344440460205, "learning_rate": 1.543101804566258e-05, "loss": 0.7105, "step": 13177 }, { "epoch": 0.34, "grad_norm": 5.313912868499756, "learning_rate": 1.5430320983500958e-05, "loss": 0.6928, "step": 13178 }, { "epoch": 0.34, "grad_norm": 2.435340404510498, "learning_rate": 1.5429623883916975e-05, "loss": 0.6102, "step": 13179 }, { "epoch": 0.34, "grad_norm": 1.9582961797714233, "learning_rate": 1.5428926746915437e-05, "loss": 0.6047, "step": 13180 }, { "epoch": 0.34, "grad_norm": 4.435967922210693, "learning_rate": 1.5428229572501145e-05, "loss": 0.857, "step": 13181 }, { "epoch": 0.34, "grad_norm": 3.750485897064209, "learning_rate": 1.5427532360678906e-05, "loss": 0.6188, "step": 13182 }, { "epoch": 0.34, "grad_norm": 1.963905692100525, "learning_rate": 1.5426835111453522e-05, "loss": 0.6657, "step": 13183 }, { "epoch": 0.34, "grad_norm": 7.017673015594482, "learning_rate": 1.5426137824829804e-05, "loss": 0.6433, "step": 13184 }, { "epoch": 0.34, "grad_norm": 2.9552385807037354, "learning_rate": 1.542544050081255e-05, "loss": 0.6693, "step": 13185 }, { "epoch": 0.34, "grad_norm": 7.494712829589844, "learning_rate": 1.542474313940657e-05, "loss": 0.7203, "step": 13186 }, { "epoch": 0.34, "grad_norm": 2.5270376205444336, "learning_rate": 1.5424045740616667e-05, "loss": 0.6813, "step": 13187 }, { "epoch": 0.34, "grad_norm": 6.987388610839844, "learning_rate": 1.542334830444765e-05, "loss": 0.4833, "step": 13188 }, { "epoch": 0.34, "grad_norm": 1.1991266012191772, "learning_rate": 1.5422650830904322e-05, "loss": 0.4421, "step": 13189 }, { "epoch": 0.34, "grad_norm": 1.0652518272399902, "learning_rate": 1.542195331999149e-05, "loss": 0.4652, "step": 13190 }, { "epoch": 0.34, "grad_norm": 4.023861408233643, "learning_rate": 1.5421255771713966e-05, "loss": 0.4718, "step": 13191 }, { "epoch": 0.34, "grad_norm": 2.0861663818359375, "learning_rate": 1.542055818607655e-05, "loss": 0.7848, "step": 13192 }, { "epoch": 0.34, "grad_norm": 1.6176692247390747, "learning_rate": 1.5419860563084056e-05, "loss": 0.565, "step": 13193 }, { "epoch": 0.34, "grad_norm": 1.4428082704544067, "learning_rate": 1.5419162902741287e-05, "loss": 0.4715, "step": 13194 }, { "epoch": 0.34, "grad_norm": 1.1913623809814453, "learning_rate": 1.5418465205053052e-05, "loss": 0.5105, "step": 13195 }, { "epoch": 0.34, "grad_norm": 2.6677658557891846, "learning_rate": 1.5417767470024156e-05, "loss": 0.6076, "step": 13196 }, { "epoch": 0.34, "grad_norm": 2.7020926475524902, "learning_rate": 1.5417069697659416e-05, "loss": 0.6035, "step": 13197 }, { "epoch": 0.34, "grad_norm": 1.631761908531189, "learning_rate": 1.5416371887963627e-05, "loss": 0.7043, "step": 13198 }, { "epoch": 0.34, "grad_norm": 1.411582589149475, "learning_rate": 1.5415674040941613e-05, "loss": 0.5579, "step": 13199 }, { "epoch": 0.34, "grad_norm": 3.079556465148926, "learning_rate": 1.5414976156598172e-05, "loss": 0.4625, "step": 13200 }, { "epoch": 0.34, "grad_norm": 2.598194122314453, "learning_rate": 1.541427823493812e-05, "loss": 0.6628, "step": 13201 }, { "epoch": 0.34, "grad_norm": 4.150028705596924, "learning_rate": 1.541358027596626e-05, "loss": 0.8345, "step": 13202 }, { "epoch": 0.34, "grad_norm": 1.632123589515686, "learning_rate": 1.541288227968741e-05, "loss": 0.7894, "step": 13203 }, { "epoch": 0.34, "grad_norm": 7.761053085327148, "learning_rate": 1.5412184246106376e-05, "loss": 0.7008, "step": 13204 }, { "epoch": 0.34, "grad_norm": 4.429832935333252, "learning_rate": 1.5411486175227967e-05, "loss": 0.5307, "step": 13205 }, { "epoch": 0.34, "grad_norm": 1.231958270072937, "learning_rate": 1.5410788067056995e-05, "loss": 0.5927, "step": 13206 }, { "epoch": 0.34, "grad_norm": 1.566246747970581, "learning_rate": 1.541008992159827e-05, "loss": 0.5439, "step": 13207 }, { "epoch": 0.34, "grad_norm": 3.7125163078308105, "learning_rate": 1.5409391738856605e-05, "loss": 0.768, "step": 13208 }, { "epoch": 0.34, "grad_norm": 3.5711047649383545, "learning_rate": 1.5408693518836813e-05, "loss": 0.6704, "step": 13209 }, { "epoch": 0.34, "grad_norm": 3.464869737625122, "learning_rate": 1.54079952615437e-05, "loss": 0.6363, "step": 13210 }, { "epoch": 0.34, "grad_norm": 2.35671329498291, "learning_rate": 1.5407296966982082e-05, "loss": 0.6997, "step": 13211 }, { "epoch": 0.34, "grad_norm": 3.3757050037384033, "learning_rate": 1.540659863515677e-05, "loss": 0.5896, "step": 13212 }, { "epoch": 0.34, "grad_norm": 2.7986598014831543, "learning_rate": 1.5405900266072582e-05, "loss": 0.6551, "step": 13213 }, { "epoch": 0.34, "grad_norm": 1.7142668962478638, "learning_rate": 1.5405201859734324e-05, "loss": 0.6168, "step": 13214 }, { "epoch": 0.34, "grad_norm": 2.792526960372925, "learning_rate": 1.5404503416146805e-05, "loss": 0.5077, "step": 13215 }, { "epoch": 0.34, "grad_norm": 2.8508849143981934, "learning_rate": 1.5403804935314845e-05, "loss": 0.5288, "step": 13216 }, { "epoch": 0.34, "grad_norm": 2.3736441135406494, "learning_rate": 1.540310641724326e-05, "loss": 0.5394, "step": 13217 }, { "epoch": 0.34, "grad_norm": 2.004067897796631, "learning_rate": 1.540240786193686e-05, "loss": 0.7539, "step": 13218 }, { "epoch": 0.34, "grad_norm": 6.858737468719482, "learning_rate": 1.540170926940046e-05, "loss": 0.6531, "step": 13219 }, { "epoch": 0.34, "grad_norm": 1.0927388668060303, "learning_rate": 1.540101063963887e-05, "loss": 0.445, "step": 13220 }, { "epoch": 0.34, "grad_norm": 1.812938928604126, "learning_rate": 1.540031197265691e-05, "loss": 0.6705, "step": 13221 }, { "epoch": 0.34, "grad_norm": 2.7813186645507812, "learning_rate": 1.539961326845939e-05, "loss": 0.7801, "step": 13222 }, { "epoch": 0.34, "grad_norm": 2.6336238384246826, "learning_rate": 1.539891452705113e-05, "loss": 0.6388, "step": 13223 }, { "epoch": 0.34, "grad_norm": 9.161930084228516, "learning_rate": 1.5398215748436946e-05, "loss": 0.6066, "step": 13224 }, { "epoch": 0.34, "grad_norm": 5.260777950286865, "learning_rate": 1.5397516932621645e-05, "loss": 0.6105, "step": 13225 }, { "epoch": 0.34, "grad_norm": 1.9237266778945923, "learning_rate": 1.5396818079610052e-05, "loss": 0.54, "step": 13226 }, { "epoch": 0.34, "grad_norm": 3.567767381668091, "learning_rate": 1.5396119189406977e-05, "loss": 0.5722, "step": 13227 }, { "epoch": 0.34, "grad_norm": 1.7302602529525757, "learning_rate": 1.539542026201724e-05, "loss": 0.5786, "step": 13228 }, { "epoch": 0.34, "grad_norm": 1.0336899757385254, "learning_rate": 1.5394721297445656e-05, "loss": 0.5304, "step": 13229 }, { "epoch": 0.34, "grad_norm": 1.4335954189300537, "learning_rate": 1.5394022295697042e-05, "loss": 0.6196, "step": 13230 }, { "epoch": 0.34, "grad_norm": 3.801811933517456, "learning_rate": 1.5393323256776215e-05, "loss": 0.8114, "step": 13231 }, { "epoch": 0.34, "grad_norm": 1.862492561340332, "learning_rate": 1.5392624180687992e-05, "loss": 0.6393, "step": 13232 }, { "epoch": 0.34, "grad_norm": 3.0565037727355957, "learning_rate": 1.5391925067437195e-05, "loss": 0.748, "step": 13233 }, { "epoch": 0.34, "grad_norm": 1.4128204584121704, "learning_rate": 1.5391225917028634e-05, "loss": 0.6396, "step": 13234 }, { "epoch": 0.34, "grad_norm": 4.521236419677734, "learning_rate": 1.539052672946713e-05, "loss": 0.7498, "step": 13235 }, { "epoch": 0.34, "grad_norm": 1.7100706100463867, "learning_rate": 1.5389827504757508e-05, "loss": 0.5224, "step": 13236 }, { "epoch": 0.34, "grad_norm": 2.1455986499786377, "learning_rate": 1.5389128242904575e-05, "loss": 0.472, "step": 13237 }, { "epoch": 0.34, "grad_norm": 4.647315979003906, "learning_rate": 1.538842894391316e-05, "loss": 0.7063, "step": 13238 }, { "epoch": 0.34, "grad_norm": 1.2261301279067993, "learning_rate": 1.5387729607788076e-05, "loss": 0.5679, "step": 13239 }, { "epoch": 0.34, "grad_norm": 2.254058361053467, "learning_rate": 1.5387030234534145e-05, "loss": 0.466, "step": 13240 }, { "epoch": 0.34, "grad_norm": 2.4466943740844727, "learning_rate": 1.5386330824156187e-05, "loss": 0.7218, "step": 13241 }, { "epoch": 0.34, "grad_norm": 1.9276000261306763, "learning_rate": 1.538563137665902e-05, "loss": 0.5556, "step": 13242 }, { "epoch": 0.34, "grad_norm": 2.1102306842803955, "learning_rate": 1.5384931892047467e-05, "loss": 0.7129, "step": 13243 }, { "epoch": 0.34, "grad_norm": 6.824401378631592, "learning_rate": 1.5384232370326344e-05, "loss": 0.6038, "step": 13244 }, { "epoch": 0.34, "grad_norm": 2.8657407760620117, "learning_rate": 1.5383532811500478e-05, "loss": 0.5196, "step": 13245 }, { "epoch": 0.34, "grad_norm": 1.9106855392456055, "learning_rate": 1.538283321557468e-05, "loss": 0.5701, "step": 13246 }, { "epoch": 0.34, "grad_norm": 1.3243016004562378, "learning_rate": 1.538213358255378e-05, "loss": 0.5509, "step": 13247 }, { "epoch": 0.34, "grad_norm": 2.41038179397583, "learning_rate": 1.53814339124426e-05, "loss": 0.7504, "step": 13248 }, { "epoch": 0.34, "grad_norm": 1.9976677894592285, "learning_rate": 1.5380734205245952e-05, "loss": 0.5878, "step": 13249 }, { "epoch": 0.34, "grad_norm": 1.2824980020523071, "learning_rate": 1.538003446096867e-05, "loss": 0.5688, "step": 13250 }, { "epoch": 0.34, "grad_norm": 1.385142207145691, "learning_rate": 1.537933467961557e-05, "loss": 0.6157, "step": 13251 }, { "epoch": 0.34, "grad_norm": 1.4566564559936523, "learning_rate": 1.537863486119147e-05, "loss": 0.6013, "step": 13252 }, { "epoch": 0.34, "grad_norm": 3.1391191482543945, "learning_rate": 1.5377935005701202e-05, "loss": 0.4643, "step": 13253 }, { "epoch": 0.34, "grad_norm": 1.1603864431381226, "learning_rate": 1.5377235113149583e-05, "loss": 0.5461, "step": 13254 }, { "epoch": 0.34, "grad_norm": 1.497918963432312, "learning_rate": 1.5376535183541445e-05, "loss": 0.6849, "step": 13255 }, { "epoch": 0.34, "grad_norm": 2.0381250381469727, "learning_rate": 1.5375835216881595e-05, "loss": 0.5928, "step": 13256 }, { "epoch": 0.34, "grad_norm": 2.041135787963867, "learning_rate": 1.537513521317487e-05, "loss": 0.6732, "step": 13257 }, { "epoch": 0.34, "grad_norm": 1.6495897769927979, "learning_rate": 1.537443517242609e-05, "loss": 0.4854, "step": 13258 }, { "epoch": 0.34, "grad_norm": 1.213229775428772, "learning_rate": 1.5373735094640076e-05, "loss": 0.6082, "step": 13259 }, { "epoch": 0.34, "grad_norm": 2.27652907371521, "learning_rate": 1.537303497982166e-05, "loss": 0.5818, "step": 13260 }, { "epoch": 0.34, "grad_norm": 3.751728057861328, "learning_rate": 1.537233482797566e-05, "loss": 0.8235, "step": 13261 }, { "epoch": 0.34, "grad_norm": 2.15096116065979, "learning_rate": 1.5371634639106908e-05, "loss": 0.7476, "step": 13262 }, { "epoch": 0.34, "grad_norm": 1.7812834978103638, "learning_rate": 1.537093441322022e-05, "loss": 0.5179, "step": 13263 }, { "epoch": 0.34, "grad_norm": 1.7259613275527954, "learning_rate": 1.5370234150320427e-05, "loss": 0.7015, "step": 13264 }, { "epoch": 0.34, "grad_norm": 1.707206130027771, "learning_rate": 1.5369533850412357e-05, "loss": 0.6126, "step": 13265 }, { "epoch": 0.34, "grad_norm": 3.817842721939087, "learning_rate": 1.536883351350083e-05, "loss": 0.4668, "step": 13266 }, { "epoch": 0.34, "grad_norm": 5.948449611663818, "learning_rate": 1.5368133139590677e-05, "loss": 0.5016, "step": 13267 }, { "epoch": 0.34, "grad_norm": 1.7006767988204956, "learning_rate": 1.5367432728686722e-05, "loss": 0.6342, "step": 13268 }, { "epoch": 0.34, "grad_norm": 1.6615098714828491, "learning_rate": 1.5366732280793794e-05, "loss": 0.5239, "step": 13269 }, { "epoch": 0.34, "grad_norm": 1.6382871866226196, "learning_rate": 1.536603179591672e-05, "loss": 0.7322, "step": 13270 }, { "epoch": 0.34, "grad_norm": 3.4809319972991943, "learning_rate": 1.5365331274060324e-05, "loss": 0.5629, "step": 13271 }, { "epoch": 0.34, "grad_norm": 1.6410998106002808, "learning_rate": 1.5364630715229437e-05, "loss": 0.6204, "step": 13272 }, { "epoch": 0.34, "grad_norm": 1.7989825010299683, "learning_rate": 1.5363930119428883e-05, "loss": 0.5668, "step": 13273 }, { "epoch": 0.34, "grad_norm": 7.437666893005371, "learning_rate": 1.5363229486663496e-05, "loss": 0.7401, "step": 13274 }, { "epoch": 0.34, "grad_norm": 1.2381128072738647, "learning_rate": 1.5362528816938098e-05, "loss": 0.496, "step": 13275 }, { "epoch": 0.34, "grad_norm": 1.922656774520874, "learning_rate": 1.5361828110257522e-05, "loss": 0.5164, "step": 13276 }, { "epoch": 0.34, "grad_norm": 1.66756272315979, "learning_rate": 1.5361127366626595e-05, "loss": 0.7131, "step": 13277 }, { "epoch": 0.34, "grad_norm": 1.2791827917099, "learning_rate": 1.5360426586050147e-05, "loss": 0.4901, "step": 13278 }, { "epoch": 0.34, "grad_norm": 2.709564447402954, "learning_rate": 1.5359725768533006e-05, "loss": 0.5937, "step": 13279 }, { "epoch": 0.34, "grad_norm": 4.041281700134277, "learning_rate": 1.535902491408e-05, "loss": 0.6814, "step": 13280 }, { "epoch": 0.34, "grad_norm": 2.83798885345459, "learning_rate": 1.5358324022695964e-05, "loss": 0.7469, "step": 13281 }, { "epoch": 0.34, "grad_norm": 1.8706974983215332, "learning_rate": 1.5357623094385728e-05, "loss": 0.6651, "step": 13282 }, { "epoch": 0.34, "grad_norm": 2.046572208404541, "learning_rate": 1.5356922129154116e-05, "loss": 0.6845, "step": 13283 }, { "epoch": 0.34, "grad_norm": 4.431583404541016, "learning_rate": 1.5356221127005965e-05, "loss": 0.7235, "step": 13284 }, { "epoch": 0.34, "grad_norm": 1.9462194442749023, "learning_rate": 1.53555200879461e-05, "loss": 0.6456, "step": 13285 }, { "epoch": 0.34, "grad_norm": 1.579107642173767, "learning_rate": 1.5354819011979356e-05, "loss": 0.6517, "step": 13286 }, { "epoch": 0.34, "grad_norm": 1.929875373840332, "learning_rate": 1.5354117899110565e-05, "loss": 0.5664, "step": 13287 }, { "epoch": 0.34, "grad_norm": 4.1879401206970215, "learning_rate": 1.5353416749344554e-05, "loss": 0.6115, "step": 13288 }, { "epoch": 0.34, "grad_norm": 4.911621570587158, "learning_rate": 1.535271556268616e-05, "loss": 0.6571, "step": 13289 }, { "epoch": 0.34, "grad_norm": 2.5366008281707764, "learning_rate": 1.535201433914021e-05, "loss": 0.4957, "step": 13290 }, { "epoch": 0.34, "grad_norm": 2.6154534816741943, "learning_rate": 1.535131307871155e-05, "loss": 0.6862, "step": 13291 }, { "epoch": 0.34, "grad_norm": 8.178511619567871, "learning_rate": 1.5350611781404992e-05, "loss": 0.6932, "step": 13292 }, { "epoch": 0.34, "grad_norm": 1.668879747390747, "learning_rate": 1.534991044722538e-05, "loss": 0.5288, "step": 13293 }, { "epoch": 0.34, "grad_norm": 2.1711978912353516, "learning_rate": 1.534920907617755e-05, "loss": 0.6165, "step": 13294 }, { "epoch": 0.34, "grad_norm": 1.7586604356765747, "learning_rate": 1.534850766826633e-05, "loss": 0.6706, "step": 13295 }, { "epoch": 0.34, "grad_norm": 1.8044737577438354, "learning_rate": 1.5347806223496554e-05, "loss": 0.5689, "step": 13296 }, { "epoch": 0.34, "grad_norm": 2.891021966934204, "learning_rate": 1.534710474187306e-05, "loss": 0.66, "step": 13297 }, { "epoch": 0.34, "grad_norm": 1.9001864194869995, "learning_rate": 1.5346403223400682e-05, "loss": 0.3914, "step": 13298 }, { "epoch": 0.34, "grad_norm": 1.3998111486434937, "learning_rate": 1.5345701668084244e-05, "loss": 0.6724, "step": 13299 }, { "epoch": 0.34, "grad_norm": 3.5607335567474365, "learning_rate": 1.5345000075928595e-05, "loss": 0.3964, "step": 13300 }, { "epoch": 0.34, "grad_norm": 1.842088222503662, "learning_rate": 1.5344298446938564e-05, "loss": 0.6043, "step": 13301 }, { "epoch": 0.34, "grad_norm": 2.5261974334716797, "learning_rate": 1.534359678111898e-05, "loss": 0.7022, "step": 13302 }, { "epoch": 0.34, "grad_norm": 1.2064085006713867, "learning_rate": 1.534289507847469e-05, "loss": 0.4754, "step": 13303 }, { "epoch": 0.34, "grad_norm": 2.091339588165283, "learning_rate": 1.5342193339010522e-05, "loss": 0.5497, "step": 13304 }, { "epoch": 0.34, "grad_norm": 1.787903904914856, "learning_rate": 1.5341491562731313e-05, "loss": 0.6002, "step": 13305 }, { "epoch": 0.34, "grad_norm": 1.3107224702835083, "learning_rate": 1.53407897496419e-05, "loss": 0.6162, "step": 13306 }, { "epoch": 0.34, "grad_norm": 1.9914133548736572, "learning_rate": 1.534008789974712e-05, "loss": 0.71, "step": 13307 }, { "epoch": 0.34, "grad_norm": 1.458149790763855, "learning_rate": 1.5339386013051808e-05, "loss": 0.5989, "step": 13308 }, { "epoch": 0.34, "grad_norm": 1.4056249856948853, "learning_rate": 1.5338684089560804e-05, "loss": 0.716, "step": 13309 }, { "epoch": 0.34, "grad_norm": 2.1833882331848145, "learning_rate": 1.533798212927894e-05, "loss": 0.4729, "step": 13310 }, { "epoch": 0.34, "grad_norm": 3.0361814498901367, "learning_rate": 1.533728013221106e-05, "loss": 0.6864, "step": 13311 }, { "epoch": 0.34, "grad_norm": 1.269518494606018, "learning_rate": 1.5336578098361997e-05, "loss": 0.5952, "step": 13312 }, { "epoch": 0.34, "grad_norm": 1.2350399494171143, "learning_rate": 1.5335876027736593e-05, "loss": 0.5167, "step": 13313 }, { "epoch": 0.34, "grad_norm": 1.799892544746399, "learning_rate": 1.5335173920339683e-05, "loss": 0.6716, "step": 13314 }, { "epoch": 0.34, "grad_norm": 3.2137579917907715, "learning_rate": 1.5334471776176104e-05, "loss": 0.6969, "step": 13315 }, { "epoch": 0.34, "grad_norm": 2.7208995819091797, "learning_rate": 1.5333769595250697e-05, "loss": 0.5568, "step": 13316 }, { "epoch": 0.34, "grad_norm": 1.4087729454040527, "learning_rate": 1.53330673775683e-05, "loss": 0.4953, "step": 13317 }, { "epoch": 0.34, "grad_norm": 4.503134727478027, "learning_rate": 1.533236512313376e-05, "loss": 0.7164, "step": 13318 }, { "epoch": 0.34, "grad_norm": 0.8625106811523438, "learning_rate": 1.5331662831951902e-05, "loss": 0.4977, "step": 13319 }, { "epoch": 0.34, "grad_norm": 1.2412779331207275, "learning_rate": 1.533096050402758e-05, "loss": 0.5229, "step": 13320 }, { "epoch": 0.34, "grad_norm": 2.087719440460205, "learning_rate": 1.533025813936562e-05, "loss": 0.6149, "step": 13321 }, { "epoch": 0.34, "grad_norm": 2.2480621337890625, "learning_rate": 1.5329555737970876e-05, "loss": 0.5911, "step": 13322 }, { "epoch": 0.34, "grad_norm": 1.0555503368377686, "learning_rate": 1.5328853299848182e-05, "loss": 0.6292, "step": 13323 }, { "epoch": 0.34, "grad_norm": 9.737192153930664, "learning_rate": 1.5328150825002372e-05, "loss": 0.7805, "step": 13324 }, { "epoch": 0.34, "grad_norm": 1.7212762832641602, "learning_rate": 1.53274483134383e-05, "loss": 0.5453, "step": 13325 }, { "epoch": 0.34, "grad_norm": 1.271376371383667, "learning_rate": 1.53267457651608e-05, "loss": 0.645, "step": 13326 }, { "epoch": 0.34, "grad_norm": 1.302796721458435, "learning_rate": 1.5326043180174716e-05, "loss": 0.6001, "step": 13327 }, { "epoch": 0.34, "grad_norm": 3.0215349197387695, "learning_rate": 1.5325340558484885e-05, "loss": 0.6831, "step": 13328 }, { "epoch": 0.34, "grad_norm": 1.8146039247512817, "learning_rate": 1.5324637900096156e-05, "loss": 0.6312, "step": 13329 }, { "epoch": 0.34, "grad_norm": 1.8840092420578003, "learning_rate": 1.5323935205013368e-05, "loss": 0.495, "step": 13330 }, { "epoch": 0.34, "grad_norm": 1.0109466314315796, "learning_rate": 1.5323232473241363e-05, "loss": 0.672, "step": 13331 }, { "epoch": 0.34, "grad_norm": 6.265796661376953, "learning_rate": 1.5322529704784982e-05, "loss": 0.7135, "step": 13332 }, { "epoch": 0.34, "grad_norm": 1.5387587547302246, "learning_rate": 1.5321826899649073e-05, "loss": 0.6157, "step": 13333 }, { "epoch": 0.34, "grad_norm": 2.364398241043091, "learning_rate": 1.5321124057838473e-05, "loss": 0.8041, "step": 13334 }, { "epoch": 0.34, "grad_norm": 1.1170231103897095, "learning_rate": 1.532042117935803e-05, "loss": 0.5359, "step": 13335 }, { "epoch": 0.34, "grad_norm": 3.0553717613220215, "learning_rate": 1.531971826421259e-05, "loss": 0.6942, "step": 13336 }, { "epoch": 0.34, "grad_norm": 1.3785710334777832, "learning_rate": 1.5319015312406992e-05, "loss": 0.5945, "step": 13337 }, { "epoch": 0.34, "grad_norm": 1.9808343648910522, "learning_rate": 1.5318312323946082e-05, "loss": 0.5173, "step": 13338 }, { "epoch": 0.34, "grad_norm": 1.5939531326293945, "learning_rate": 1.5317609298834706e-05, "loss": 0.5916, "step": 13339 }, { "epoch": 0.34, "grad_norm": 1.9716440439224243, "learning_rate": 1.5316906237077707e-05, "loss": 0.5848, "step": 13340 }, { "epoch": 0.34, "grad_norm": 0.9975973963737488, "learning_rate": 1.531620313867993e-05, "loss": 0.6472, "step": 13341 }, { "epoch": 0.34, "grad_norm": 2.192453145980835, "learning_rate": 1.531550000364622e-05, "loss": 0.5888, "step": 13342 }, { "epoch": 0.34, "grad_norm": 1.8064073324203491, "learning_rate": 1.5314796831981426e-05, "loss": 0.5963, "step": 13343 }, { "epoch": 0.34, "grad_norm": 1.4710609912872314, "learning_rate": 1.531409362369039e-05, "loss": 0.5731, "step": 13344 }, { "epoch": 0.34, "grad_norm": 2.1130549907684326, "learning_rate": 1.5313390378777956e-05, "loss": 0.6541, "step": 13345 }, { "epoch": 0.34, "grad_norm": 1.6545618772506714, "learning_rate": 1.531268709724898e-05, "loss": 0.6086, "step": 13346 }, { "epoch": 0.34, "grad_norm": 1.2671018838882446, "learning_rate": 1.5311983779108296e-05, "loss": 0.5467, "step": 13347 }, { "epoch": 0.34, "grad_norm": 1.9226983785629272, "learning_rate": 1.5311280424360758e-05, "loss": 0.7606, "step": 13348 }, { "epoch": 0.34, "grad_norm": 3.994778871536255, "learning_rate": 1.5310577033011218e-05, "loss": 0.4721, "step": 13349 }, { "epoch": 0.34, "grad_norm": 1.5696182250976562, "learning_rate": 1.5309873605064515e-05, "loss": 0.6104, "step": 13350 }, { "epoch": 0.34, "grad_norm": 1.5948275327682495, "learning_rate": 1.5309170140525497e-05, "loss": 0.6466, "step": 13351 }, { "epoch": 0.34, "grad_norm": 1.9647624492645264, "learning_rate": 1.5308466639399017e-05, "loss": 0.6422, "step": 13352 }, { "epoch": 0.34, "grad_norm": 2.2372353076934814, "learning_rate": 1.5307763101689915e-05, "loss": 0.6621, "step": 13353 }, { "epoch": 0.34, "grad_norm": 7.273383140563965, "learning_rate": 1.5307059527403048e-05, "loss": 0.5395, "step": 13354 }, { "epoch": 0.34, "grad_norm": 5.904451370239258, "learning_rate": 1.530635591654326e-05, "loss": 0.8027, "step": 13355 }, { "epoch": 0.34, "grad_norm": 5.52869176864624, "learning_rate": 1.5305652269115402e-05, "loss": 0.633, "step": 13356 }, { "epoch": 0.34, "grad_norm": 2.593489408493042, "learning_rate": 1.530494858512432e-05, "loss": 0.564, "step": 13357 }, { "epoch": 0.34, "grad_norm": 1.6238571405410767, "learning_rate": 1.5304244864574866e-05, "loss": 0.662, "step": 13358 }, { "epoch": 0.34, "grad_norm": 1.6569968461990356, "learning_rate": 1.530354110747189e-05, "loss": 0.6295, "step": 13359 }, { "epoch": 0.34, "grad_norm": 1.4654150009155273, "learning_rate": 1.5302837313820242e-05, "loss": 0.498, "step": 13360 }, { "epoch": 0.34, "grad_norm": 2.0394372940063477, "learning_rate": 1.5302133483624765e-05, "loss": 0.6281, "step": 13361 }, { "epoch": 0.34, "grad_norm": 1.298003077507019, "learning_rate": 1.5301429616890323e-05, "loss": 0.5829, "step": 13362 }, { "epoch": 0.34, "grad_norm": 4.218740940093994, "learning_rate": 1.5300725713621754e-05, "loss": 0.5416, "step": 13363 }, { "epoch": 0.34, "grad_norm": 2.2803258895874023, "learning_rate": 1.5300021773823913e-05, "loss": 0.5879, "step": 13364 }, { "epoch": 0.34, "grad_norm": 6.017627239227295, "learning_rate": 1.5299317797501653e-05, "loss": 0.784, "step": 13365 }, { "epoch": 0.34, "grad_norm": 1.6123390197753906, "learning_rate": 1.5298613784659825e-05, "loss": 0.6028, "step": 13366 }, { "epoch": 0.34, "grad_norm": 2.89026141166687, "learning_rate": 1.5297909735303277e-05, "loss": 0.6402, "step": 13367 }, { "epoch": 0.34, "grad_norm": 1.991970419883728, "learning_rate": 1.5297205649436864e-05, "loss": 0.4957, "step": 13368 }, { "epoch": 0.34, "grad_norm": 1.2426575422286987, "learning_rate": 1.5296501527065443e-05, "loss": 0.4493, "step": 13369 }, { "epoch": 0.34, "grad_norm": 1.290966510772705, "learning_rate": 1.5295797368193857e-05, "loss": 0.5938, "step": 13370 }, { "epoch": 0.34, "grad_norm": 1.1406292915344238, "learning_rate": 1.5295093172826962e-05, "loss": 0.6451, "step": 13371 }, { "epoch": 0.34, "grad_norm": 1.5690773725509644, "learning_rate": 1.5294388940969614e-05, "loss": 0.4324, "step": 13372 }, { "epoch": 0.34, "grad_norm": 2.510558605194092, "learning_rate": 1.529368467262666e-05, "loss": 0.8171, "step": 13373 }, { "epoch": 0.34, "grad_norm": 4.704306602478027, "learning_rate": 1.5292980367802962e-05, "loss": 0.5545, "step": 13374 }, { "epoch": 0.34, "grad_norm": 8.284334182739258, "learning_rate": 1.529227602650336e-05, "loss": 0.7216, "step": 13375 }, { "epoch": 0.34, "grad_norm": 1.4502094984054565, "learning_rate": 1.5291571648732727e-05, "loss": 0.6123, "step": 13376 }, { "epoch": 0.34, "grad_norm": 2.492959976196289, "learning_rate": 1.52908672344959e-05, "loss": 0.5688, "step": 13377 }, { "epoch": 0.34, "grad_norm": 1.5354011058807373, "learning_rate": 1.529016278379774e-05, "loss": 0.5043, "step": 13378 }, { "epoch": 0.34, "grad_norm": 3.777681827545166, "learning_rate": 1.5289458296643103e-05, "loss": 0.6373, "step": 13379 }, { "epoch": 0.34, "grad_norm": 1.328966736793518, "learning_rate": 1.5288753773036847e-05, "loss": 0.4901, "step": 13380 }, { "epoch": 0.34, "grad_norm": 1.2897645235061646, "learning_rate": 1.5288049212983816e-05, "loss": 0.4458, "step": 13381 }, { "epoch": 0.34, "grad_norm": 1.5285658836364746, "learning_rate": 1.5287344616488877e-05, "loss": 0.4775, "step": 13382 }, { "epoch": 0.34, "grad_norm": 1.6227694749832153, "learning_rate": 1.5286639983556878e-05, "loss": 0.5687, "step": 13383 }, { "epoch": 0.34, "grad_norm": 4.197868824005127, "learning_rate": 1.528593531419268e-05, "loss": 0.6519, "step": 13384 }, { "epoch": 0.34, "grad_norm": 1.7833431959152222, "learning_rate": 1.5285230608401133e-05, "loss": 0.7698, "step": 13385 }, { "epoch": 0.34, "grad_norm": 3.6578080654144287, "learning_rate": 1.5284525866187104e-05, "loss": 0.505, "step": 13386 }, { "epoch": 0.34, "grad_norm": 3.6460161209106445, "learning_rate": 1.5283821087555436e-05, "loss": 0.7544, "step": 13387 }, { "epoch": 0.34, "grad_norm": 6.434605598449707, "learning_rate": 1.5283116272510997e-05, "loss": 0.8167, "step": 13388 }, { "epoch": 0.34, "grad_norm": 4.716455459594727, "learning_rate": 1.5282411421058636e-05, "loss": 0.5109, "step": 13389 }, { "epoch": 0.34, "grad_norm": 1.459567904472351, "learning_rate": 1.5281706533203215e-05, "loss": 0.5162, "step": 13390 }, { "epoch": 0.34, "grad_norm": 3.391406297683716, "learning_rate": 1.528100160894959e-05, "loss": 0.825, "step": 13391 }, { "epoch": 0.34, "grad_norm": 1.6272343397140503, "learning_rate": 1.5280296648302622e-05, "loss": 0.5148, "step": 13392 }, { "epoch": 0.34, "grad_norm": 3.1096339225769043, "learning_rate": 1.527959165126717e-05, "loss": 0.5115, "step": 13393 }, { "epoch": 0.34, "grad_norm": 1.5721209049224854, "learning_rate": 1.5278886617848084e-05, "loss": 0.5664, "step": 13394 }, { "epoch": 0.34, "grad_norm": 2.572235345840454, "learning_rate": 1.527818154805023e-05, "loss": 0.7966, "step": 13395 }, { "epoch": 0.34, "grad_norm": 1.5485548973083496, "learning_rate": 1.5277476441878466e-05, "loss": 0.6041, "step": 13396 }, { "epoch": 0.34, "grad_norm": 2.973018169403076, "learning_rate": 1.5276771299337645e-05, "loss": 0.5312, "step": 13397 }, { "epoch": 0.34, "grad_norm": 1.479256510734558, "learning_rate": 1.5276066120432637e-05, "loss": 0.5434, "step": 13398 }, { "epoch": 0.34, "grad_norm": 3.0968034267425537, "learning_rate": 1.527536090516829e-05, "loss": 0.497, "step": 13399 }, { "epoch": 0.34, "grad_norm": 1.5940852165222168, "learning_rate": 1.527465565354948e-05, "loss": 0.7087, "step": 13400 }, { "epoch": 0.34, "grad_norm": 2.407010316848755, "learning_rate": 1.527395036558105e-05, "loss": 0.5811, "step": 13401 }, { "epoch": 0.34, "grad_norm": 2.3980553150177, "learning_rate": 1.5273245041267867e-05, "loss": 0.6438, "step": 13402 }, { "epoch": 0.34, "grad_norm": 2.2360520362854004, "learning_rate": 1.5272539680614795e-05, "loss": 0.531, "step": 13403 }, { "epoch": 0.34, "grad_norm": 1.3771039247512817, "learning_rate": 1.527183428362669e-05, "loss": 0.5938, "step": 13404 }, { "epoch": 0.34, "grad_norm": 2.129213809967041, "learning_rate": 1.5271128850308417e-05, "loss": 0.66, "step": 13405 }, { "epoch": 0.34, "grad_norm": 10.52962875366211, "learning_rate": 1.5270423380664835e-05, "loss": 0.6646, "step": 13406 }, { "epoch": 0.34, "grad_norm": 1.5217710733413696, "learning_rate": 1.5269717874700804e-05, "loss": 0.5595, "step": 13407 }, { "epoch": 0.34, "grad_norm": 2.103313684463501, "learning_rate": 1.5269012332421192e-05, "loss": 0.647, "step": 13408 }, { "epoch": 0.34, "grad_norm": 2.5997936725616455, "learning_rate": 1.5268306753830857e-05, "loss": 0.6043, "step": 13409 }, { "epoch": 0.34, "grad_norm": 1.2844667434692383, "learning_rate": 1.526760113893466e-05, "loss": 0.5886, "step": 13410 }, { "epoch": 0.34, "grad_norm": 1.149275779724121, "learning_rate": 1.5266895487737462e-05, "loss": 0.5182, "step": 13411 }, { "epoch": 0.34, "grad_norm": 3.399808406829834, "learning_rate": 1.5266189800244134e-05, "loss": 0.775, "step": 13412 }, { "epoch": 0.34, "grad_norm": 1.285339593887329, "learning_rate": 1.526548407645953e-05, "loss": 0.5801, "step": 13413 }, { "epoch": 0.34, "grad_norm": 1.644626498222351, "learning_rate": 1.5264778316388523e-05, "loss": 0.5663, "step": 13414 }, { "epoch": 0.34, "grad_norm": 1.6682590246200562, "learning_rate": 1.526407252003597e-05, "loss": 0.6037, "step": 13415 }, { "epoch": 0.34, "grad_norm": 1.319893479347229, "learning_rate": 1.5263366687406736e-05, "loss": 0.5951, "step": 13416 }, { "epoch": 0.34, "grad_norm": 4.847728252410889, "learning_rate": 1.5262660818505687e-05, "loss": 0.7505, "step": 13417 }, { "epoch": 0.34, "grad_norm": 2.4857001304626465, "learning_rate": 1.5261954913337685e-05, "loss": 0.6107, "step": 13418 }, { "epoch": 0.34, "grad_norm": 1.6531895399093628, "learning_rate": 1.5261248971907593e-05, "loss": 0.6967, "step": 13419 }, { "epoch": 0.34, "grad_norm": 1.8008688688278198, "learning_rate": 1.526054299422028e-05, "loss": 0.4902, "step": 13420 }, { "epoch": 0.34, "grad_norm": 2.256088972091675, "learning_rate": 1.5259836980280613e-05, "loss": 0.6499, "step": 13421 }, { "epoch": 0.34, "grad_norm": 2.2659342288970947, "learning_rate": 1.525913093009345e-05, "loss": 0.7818, "step": 13422 }, { "epoch": 0.34, "grad_norm": 1.0584195852279663, "learning_rate": 1.5258424843663662e-05, "loss": 0.6065, "step": 13423 }, { "epoch": 0.34, "grad_norm": 1.9020715951919556, "learning_rate": 1.5257718720996116e-05, "loss": 0.6286, "step": 13424 }, { "epoch": 0.34, "grad_norm": 1.495686650276184, "learning_rate": 1.5257012562095673e-05, "loss": 0.5827, "step": 13425 }, { "epoch": 0.34, "grad_norm": 2.1279184818267822, "learning_rate": 1.52563063669672e-05, "loss": 0.6984, "step": 13426 }, { "epoch": 0.34, "grad_norm": 2.2970337867736816, "learning_rate": 1.5255600135615569e-05, "loss": 0.6036, "step": 13427 }, { "epoch": 0.34, "grad_norm": 3.13857102394104, "learning_rate": 1.5254893868045644e-05, "loss": 0.4895, "step": 13428 }, { "epoch": 0.34, "grad_norm": 4.452589988708496, "learning_rate": 1.525418756426229e-05, "loss": 0.8032, "step": 13429 }, { "epoch": 0.34, "grad_norm": 1.4458060264587402, "learning_rate": 1.5253481224270379e-05, "loss": 0.6631, "step": 13430 }, { "epoch": 0.34, "grad_norm": 2.883857250213623, "learning_rate": 1.5252774848074774e-05, "loss": 0.6866, "step": 13431 }, { "epoch": 0.34, "grad_norm": 3.4727749824523926, "learning_rate": 1.5252068435680344e-05, "loss": 0.7276, "step": 13432 }, { "epoch": 0.34, "grad_norm": 2.340893030166626, "learning_rate": 1.5251361987091962e-05, "loss": 0.5653, "step": 13433 }, { "epoch": 0.34, "grad_norm": 5.996244430541992, "learning_rate": 1.5250655502314492e-05, "loss": 0.7283, "step": 13434 }, { "epoch": 0.34, "grad_norm": 1.361446499824524, "learning_rate": 1.5249948981352799e-05, "loss": 0.4568, "step": 13435 }, { "epoch": 0.34, "grad_norm": 4.66246223449707, "learning_rate": 1.5249242424211758e-05, "loss": 0.8535, "step": 13436 }, { "epoch": 0.34, "grad_norm": 1.823090672492981, "learning_rate": 1.5248535830896237e-05, "loss": 0.396, "step": 13437 }, { "epoch": 0.34, "grad_norm": 1.1502639055252075, "learning_rate": 1.5247829201411102e-05, "loss": 0.4964, "step": 13438 }, { "epoch": 0.34, "grad_norm": 4.525057792663574, "learning_rate": 1.5247122535761228e-05, "loss": 0.4973, "step": 13439 }, { "epoch": 0.34, "grad_norm": 1.489331841468811, "learning_rate": 1.5246415833951478e-05, "loss": 0.4787, "step": 13440 }, { "epoch": 0.34, "grad_norm": 2.843923330307007, "learning_rate": 1.524570909598673e-05, "loss": 0.7447, "step": 13441 }, { "epoch": 0.34, "grad_norm": 1.1356863975524902, "learning_rate": 1.5245002321871848e-05, "loss": 0.4126, "step": 13442 }, { "epoch": 0.34, "grad_norm": 1.130581259727478, "learning_rate": 1.5244295511611707e-05, "loss": 0.5023, "step": 13443 }, { "epoch": 0.34, "grad_norm": 2.101263999938965, "learning_rate": 1.5243588665211177e-05, "loss": 0.5299, "step": 13444 }, { "epoch": 0.34, "grad_norm": 2.127185344696045, "learning_rate": 1.5242881782675127e-05, "loss": 0.6282, "step": 13445 }, { "epoch": 0.34, "grad_norm": 2.1415650844573975, "learning_rate": 1.524217486400843e-05, "loss": 0.5887, "step": 13446 }, { "epoch": 0.34, "grad_norm": 1.843063235282898, "learning_rate": 1.5241467909215956e-05, "loss": 0.782, "step": 13447 }, { "epoch": 0.34, "grad_norm": 2.471693277359009, "learning_rate": 1.5240760918302581e-05, "loss": 0.5403, "step": 13448 }, { "epoch": 0.34, "grad_norm": 2.9708778858184814, "learning_rate": 1.5240053891273173e-05, "loss": 0.6095, "step": 13449 }, { "epoch": 0.34, "grad_norm": 9.407404899597168, "learning_rate": 1.5239346828132604e-05, "loss": 0.7935, "step": 13450 }, { "epoch": 0.34, "grad_norm": 3.3174936771392822, "learning_rate": 1.5238639728885751e-05, "loss": 0.6803, "step": 13451 }, { "epoch": 0.34, "grad_norm": 2.2474770545959473, "learning_rate": 1.5237932593537482e-05, "loss": 0.7166, "step": 13452 }, { "epoch": 0.34, "grad_norm": 2.5339086055755615, "learning_rate": 1.5237225422092674e-05, "loss": 0.5103, "step": 13453 }, { "epoch": 0.34, "grad_norm": 5.847801685333252, "learning_rate": 1.5236518214556198e-05, "loss": 0.5348, "step": 13454 }, { "epoch": 0.34, "grad_norm": 1.633160948753357, "learning_rate": 1.5235810970932927e-05, "loss": 0.5619, "step": 13455 }, { "epoch": 0.34, "grad_norm": 2.135730028152466, "learning_rate": 1.5235103691227737e-05, "loss": 0.6209, "step": 13456 }, { "epoch": 0.34, "grad_norm": 5.2884626388549805, "learning_rate": 1.5234396375445502e-05, "loss": 0.4782, "step": 13457 }, { "epoch": 0.34, "grad_norm": 5.817503929138184, "learning_rate": 1.5233689023591099e-05, "loss": 0.8889, "step": 13458 }, { "epoch": 0.34, "grad_norm": 1.5186628103256226, "learning_rate": 1.5232981635669393e-05, "loss": 0.5257, "step": 13459 }, { "epoch": 0.34, "grad_norm": 1.7407689094543457, "learning_rate": 1.5232274211685269e-05, "loss": 0.6067, "step": 13460 }, { "epoch": 0.35, "grad_norm": 2.658386707305908, "learning_rate": 1.5231566751643599e-05, "loss": 0.6875, "step": 13461 }, { "epoch": 0.35, "grad_norm": 1.8437286615371704, "learning_rate": 1.5230859255549254e-05, "loss": 0.6865, "step": 13462 }, { "epoch": 0.35, "grad_norm": 3.190730571746826, "learning_rate": 1.5230151723407116e-05, "loss": 0.7057, "step": 13463 }, { "epoch": 0.35, "grad_norm": 1.7556458711624146, "learning_rate": 1.522944415522206e-05, "loss": 0.5152, "step": 13464 }, { "epoch": 0.35, "grad_norm": 1.5758404731750488, "learning_rate": 1.5228736550998958e-05, "loss": 0.5953, "step": 13465 }, { "epoch": 0.35, "grad_norm": 2.304776668548584, "learning_rate": 1.5228028910742687e-05, "loss": 0.5775, "step": 13466 }, { "epoch": 0.35, "grad_norm": 1.3052769899368286, "learning_rate": 1.5227321234458128e-05, "loss": 0.4111, "step": 13467 }, { "epoch": 0.35, "grad_norm": 9.099185943603516, "learning_rate": 1.5226613522150155e-05, "loss": 0.615, "step": 13468 }, { "epoch": 0.35, "grad_norm": 3.152153253555298, "learning_rate": 1.5225905773823644e-05, "loss": 0.6157, "step": 13469 }, { "epoch": 0.35, "grad_norm": 3.540105104446411, "learning_rate": 1.5225197989483475e-05, "loss": 0.7035, "step": 13470 }, { "epoch": 0.35, "grad_norm": 2.187995195388794, "learning_rate": 1.5224490169134525e-05, "loss": 0.7365, "step": 13471 }, { "epoch": 0.35, "grad_norm": 1.6483567953109741, "learning_rate": 1.5223782312781668e-05, "loss": 0.6881, "step": 13472 }, { "epoch": 0.35, "grad_norm": 1.9031169414520264, "learning_rate": 1.5223074420429788e-05, "loss": 0.6949, "step": 13473 }, { "epoch": 0.35, "grad_norm": 1.4664061069488525, "learning_rate": 1.522236649208376e-05, "loss": 0.4705, "step": 13474 }, { "epoch": 0.35, "grad_norm": 2.7800612449645996, "learning_rate": 1.5221658527748462e-05, "loss": 0.5668, "step": 13475 }, { "epoch": 0.35, "grad_norm": 7.21087646484375, "learning_rate": 1.5220950527428775e-05, "loss": 0.7628, "step": 13476 }, { "epoch": 0.35, "grad_norm": 1.6105573177337646, "learning_rate": 1.5220242491129576e-05, "loss": 0.4168, "step": 13477 }, { "epoch": 0.35, "grad_norm": 6.1611762046813965, "learning_rate": 1.5219534418855746e-05, "loss": 0.5416, "step": 13478 }, { "epoch": 0.35, "grad_norm": 1.6931990385055542, "learning_rate": 1.5218826310612164e-05, "loss": 0.7425, "step": 13479 }, { "epoch": 0.35, "grad_norm": 5.58427619934082, "learning_rate": 1.521811816640371e-05, "loss": 0.7602, "step": 13480 }, { "epoch": 0.35, "grad_norm": 3.504167079925537, "learning_rate": 1.5217409986235266e-05, "loss": 0.5732, "step": 13481 }, { "epoch": 0.35, "grad_norm": 8.895421981811523, "learning_rate": 1.5216701770111707e-05, "loss": 0.6524, "step": 13482 }, { "epoch": 0.35, "grad_norm": 5.530327796936035, "learning_rate": 1.5215993518037921e-05, "loss": 0.6475, "step": 13483 }, { "epoch": 0.35, "grad_norm": 2.457277297973633, "learning_rate": 1.5215285230018779e-05, "loss": 0.5903, "step": 13484 }, { "epoch": 0.35, "grad_norm": 1.0955103635787964, "learning_rate": 1.5214576906059173e-05, "loss": 0.4621, "step": 13485 }, { "epoch": 0.35, "grad_norm": 2.3145127296447754, "learning_rate": 1.5213868546163974e-05, "loss": 0.6518, "step": 13486 }, { "epoch": 0.35, "grad_norm": 3.468510150909424, "learning_rate": 1.5213160150338073e-05, "loss": 0.6914, "step": 13487 }, { "epoch": 0.35, "grad_norm": 4.098588466644287, "learning_rate": 1.5212451718586345e-05, "loss": 0.5812, "step": 13488 }, { "epoch": 0.35, "grad_norm": 1.3667198419570923, "learning_rate": 1.5211743250913674e-05, "loss": 0.5762, "step": 13489 }, { "epoch": 0.35, "grad_norm": 1.590782880783081, "learning_rate": 1.5211034747324947e-05, "loss": 0.5675, "step": 13490 }, { "epoch": 0.35, "grad_norm": 1.7433115243911743, "learning_rate": 1.521032620782504e-05, "loss": 0.5547, "step": 13491 }, { "epoch": 0.35, "grad_norm": 1.52081298828125, "learning_rate": 1.5209617632418838e-05, "loss": 0.6355, "step": 13492 }, { "epoch": 0.35, "grad_norm": 2.1214020252227783, "learning_rate": 1.5208909021111225e-05, "loss": 0.4877, "step": 13493 }, { "epoch": 0.35, "grad_norm": 1.7082587480545044, "learning_rate": 1.520820037390708e-05, "loss": 0.5206, "step": 13494 }, { "epoch": 0.35, "grad_norm": 2.117903470993042, "learning_rate": 1.5207491690811293e-05, "loss": 0.6577, "step": 13495 }, { "epoch": 0.35, "grad_norm": 1.5657991170883179, "learning_rate": 1.5206782971828746e-05, "loss": 0.7283, "step": 13496 }, { "epoch": 0.35, "grad_norm": 1.5775686502456665, "learning_rate": 1.5206074216964322e-05, "loss": 0.6333, "step": 13497 }, { "epoch": 0.35, "grad_norm": 3.2838170528411865, "learning_rate": 1.5205365426222903e-05, "loss": 0.7945, "step": 13498 }, { "epoch": 0.35, "grad_norm": 1.4639695882797241, "learning_rate": 1.5204656599609375e-05, "loss": 0.6743, "step": 13499 }, { "epoch": 0.35, "grad_norm": 2.0050408840179443, "learning_rate": 1.5203947737128625e-05, "loss": 0.614, "step": 13500 }, { "epoch": 0.35, "grad_norm": 1.4666746854782104, "learning_rate": 1.520323883878554e-05, "loss": 0.5801, "step": 13501 }, { "epoch": 0.35, "grad_norm": 2.0846526622772217, "learning_rate": 1.5202529904584999e-05, "loss": 0.6799, "step": 13502 }, { "epoch": 0.35, "grad_norm": 0.9819992184638977, "learning_rate": 1.520182093453189e-05, "loss": 0.506, "step": 13503 }, { "epoch": 0.35, "grad_norm": 1.8242837190628052, "learning_rate": 1.52011119286311e-05, "loss": 0.5463, "step": 13504 }, { "epoch": 0.35, "grad_norm": 3.7645418643951416, "learning_rate": 1.520040288688751e-05, "loss": 0.5921, "step": 13505 }, { "epoch": 0.35, "grad_norm": 3.29282283782959, "learning_rate": 1.5199693809306015e-05, "loss": 0.5315, "step": 13506 }, { "epoch": 0.35, "grad_norm": 6.571748733520508, "learning_rate": 1.51989846958915e-05, "loss": 0.9803, "step": 13507 }, { "epoch": 0.35, "grad_norm": 4.029139041900635, "learning_rate": 1.5198275546648841e-05, "loss": 0.7007, "step": 13508 }, { "epoch": 0.35, "grad_norm": 1.5997544527053833, "learning_rate": 1.5197566361582941e-05, "loss": 0.496, "step": 13509 }, { "epoch": 0.35, "grad_norm": 2.209906816482544, "learning_rate": 1.5196857140698674e-05, "loss": 0.752, "step": 13510 }, { "epoch": 0.35, "grad_norm": 1.6348865032196045, "learning_rate": 1.5196147884000931e-05, "loss": 0.5951, "step": 13511 }, { "epoch": 0.35, "grad_norm": 1.4074512720108032, "learning_rate": 1.5195438591494607e-05, "loss": 0.5489, "step": 13512 }, { "epoch": 0.35, "grad_norm": 2.0466158390045166, "learning_rate": 1.519472926318458e-05, "loss": 0.5712, "step": 13513 }, { "epoch": 0.35, "grad_norm": 1.6245293617248535, "learning_rate": 1.5194019899075745e-05, "loss": 0.6281, "step": 13514 }, { "epoch": 0.35, "grad_norm": 2.09096097946167, "learning_rate": 1.5193310499172987e-05, "loss": 0.7413, "step": 13515 }, { "epoch": 0.35, "grad_norm": 2.8651933670043945, "learning_rate": 1.5192601063481196e-05, "loss": 0.5208, "step": 13516 }, { "epoch": 0.35, "grad_norm": 1.6515300273895264, "learning_rate": 1.5191891592005262e-05, "loss": 0.4717, "step": 13517 }, { "epoch": 0.35, "grad_norm": 1.6023980379104614, "learning_rate": 1.519118208475007e-05, "loss": 0.6204, "step": 13518 }, { "epoch": 0.35, "grad_norm": 1.5082759857177734, "learning_rate": 1.5190472541720514e-05, "loss": 0.6139, "step": 13519 }, { "epoch": 0.35, "grad_norm": 1.4776314496994019, "learning_rate": 1.5189762962921482e-05, "loss": 0.6259, "step": 13520 }, { "epoch": 0.35, "grad_norm": 3.5877296924591064, "learning_rate": 1.5189053348357864e-05, "loss": 0.6348, "step": 13521 }, { "epoch": 0.35, "grad_norm": 1.199491024017334, "learning_rate": 1.518834369803455e-05, "loss": 0.5102, "step": 13522 }, { "epoch": 0.35, "grad_norm": 1.4029113054275513, "learning_rate": 1.5187634011956432e-05, "loss": 0.5104, "step": 13523 }, { "epoch": 0.35, "grad_norm": 4.0783467292785645, "learning_rate": 1.5186924290128403e-05, "loss": 0.5605, "step": 13524 }, { "epoch": 0.35, "grad_norm": 2.3745055198669434, "learning_rate": 1.5186214532555344e-05, "loss": 0.7302, "step": 13525 }, { "epoch": 0.35, "grad_norm": 2.4835240840911865, "learning_rate": 1.5185504739242157e-05, "loss": 0.5612, "step": 13526 }, { "epoch": 0.35, "grad_norm": 1.8604007959365845, "learning_rate": 1.5184794910193725e-05, "loss": 0.6841, "step": 13527 }, { "epoch": 0.35, "grad_norm": 1.8055323362350464, "learning_rate": 1.5184085045414949e-05, "loss": 0.517, "step": 13528 }, { "epoch": 0.35, "grad_norm": 3.2404708862304688, "learning_rate": 1.5183375144910713e-05, "loss": 0.5299, "step": 13529 }, { "epoch": 0.35, "grad_norm": 1.3614983558654785, "learning_rate": 1.5182665208685912e-05, "loss": 0.5526, "step": 13530 }, { "epoch": 0.35, "grad_norm": 3.7569241523742676, "learning_rate": 1.5181955236745438e-05, "loss": 0.5423, "step": 13531 }, { "epoch": 0.35, "grad_norm": 2.0085227489471436, "learning_rate": 1.5181245229094185e-05, "loss": 0.5474, "step": 13532 }, { "epoch": 0.35, "grad_norm": 2.2952399253845215, "learning_rate": 1.5180535185737046e-05, "loss": 0.7284, "step": 13533 }, { "epoch": 0.35, "grad_norm": 5.459627628326416, "learning_rate": 1.5179825106678912e-05, "loss": 0.6747, "step": 13534 }, { "epoch": 0.35, "grad_norm": 3.656330108642578, "learning_rate": 1.5179114991924677e-05, "loss": 0.5808, "step": 13535 }, { "epoch": 0.35, "grad_norm": 1.571057915687561, "learning_rate": 1.5178404841479238e-05, "loss": 0.6158, "step": 13536 }, { "epoch": 0.35, "grad_norm": 2.266223669052124, "learning_rate": 1.5177694655347483e-05, "loss": 0.8046, "step": 13537 }, { "epoch": 0.35, "grad_norm": 4.284971714019775, "learning_rate": 1.5176984433534312e-05, "loss": 0.7352, "step": 13538 }, { "epoch": 0.35, "grad_norm": 1.1151773929595947, "learning_rate": 1.5176274176044617e-05, "loss": 0.5839, "step": 13539 }, { "epoch": 0.35, "grad_norm": 4.371710300445557, "learning_rate": 1.5175563882883291e-05, "loss": 0.6608, "step": 13540 }, { "epoch": 0.35, "grad_norm": 3.9120287895202637, "learning_rate": 1.517485355405523e-05, "loss": 0.573, "step": 13541 }, { "epoch": 0.35, "grad_norm": 1.5403168201446533, "learning_rate": 1.5174143189565328e-05, "loss": 0.6081, "step": 13542 }, { "epoch": 0.35, "grad_norm": 1.3457270860671997, "learning_rate": 1.5173432789418488e-05, "loss": 0.7111, "step": 13543 }, { "epoch": 0.35, "grad_norm": 1.7935006618499756, "learning_rate": 1.517272235361959e-05, "loss": 0.5907, "step": 13544 }, { "epoch": 0.35, "grad_norm": 3.559011220932007, "learning_rate": 1.5172011882173547e-05, "loss": 0.697, "step": 13545 }, { "epoch": 0.35, "grad_norm": 8.704092979431152, "learning_rate": 1.5171301375085245e-05, "loss": 0.4236, "step": 13546 }, { "epoch": 0.35, "grad_norm": 1.49691641330719, "learning_rate": 1.5170590832359585e-05, "loss": 0.6448, "step": 13547 }, { "epoch": 0.35, "grad_norm": 3.1454710960388184, "learning_rate": 1.516988025400146e-05, "loss": 0.7354, "step": 13548 }, { "epoch": 0.35, "grad_norm": 1.582517385482788, "learning_rate": 1.5169169640015766e-05, "loss": 0.5263, "step": 13549 }, { "epoch": 0.35, "grad_norm": 5.053560256958008, "learning_rate": 1.5168458990407406e-05, "loss": 0.474, "step": 13550 }, { "epoch": 0.35, "grad_norm": 2.0511884689331055, "learning_rate": 1.5167748305181272e-05, "loss": 0.3965, "step": 13551 }, { "epoch": 0.35, "grad_norm": 1.5437546968460083, "learning_rate": 1.5167037584342262e-05, "loss": 0.5856, "step": 13552 }, { "epoch": 0.35, "grad_norm": 2.10245418548584, "learning_rate": 1.5166326827895277e-05, "loss": 0.5508, "step": 13553 }, { "epoch": 0.35, "grad_norm": 3.355990171432495, "learning_rate": 1.5165616035845213e-05, "loss": 0.5521, "step": 13554 }, { "epoch": 0.35, "grad_norm": 8.078605651855469, "learning_rate": 1.516490520819697e-05, "loss": 0.7037, "step": 13555 }, { "epoch": 0.35, "grad_norm": 2.282670497894287, "learning_rate": 1.5164194344955442e-05, "loss": 0.6159, "step": 13556 }, { "epoch": 0.35, "grad_norm": 5.560230731964111, "learning_rate": 1.5163483446125533e-05, "loss": 0.5446, "step": 13557 }, { "epoch": 0.35, "grad_norm": 1.8540246486663818, "learning_rate": 1.5162772511712138e-05, "loss": 0.5985, "step": 13558 }, { "epoch": 0.35, "grad_norm": 2.5791404247283936, "learning_rate": 1.516206154172016e-05, "loss": 0.6756, "step": 13559 }, { "epoch": 0.35, "grad_norm": 6.143213272094727, "learning_rate": 1.5161350536154498e-05, "loss": 0.5289, "step": 13560 }, { "epoch": 0.35, "grad_norm": 1.78495454788208, "learning_rate": 1.5160639495020051e-05, "loss": 0.571, "step": 13561 }, { "epoch": 0.35, "grad_norm": 2.829251289367676, "learning_rate": 1.5159928418321719e-05, "loss": 0.6009, "step": 13562 }, { "epoch": 0.35, "grad_norm": 3.521047592163086, "learning_rate": 1.51592173060644e-05, "loss": 0.5306, "step": 13563 }, { "epoch": 0.35, "grad_norm": 1.4661933183670044, "learning_rate": 1.5158506158252997e-05, "loss": 0.6422, "step": 13564 }, { "epoch": 0.35, "grad_norm": 1.5611906051635742, "learning_rate": 1.5157794974892412e-05, "loss": 0.4256, "step": 13565 }, { "epoch": 0.35, "grad_norm": 2.769909381866455, "learning_rate": 1.5157083755987544e-05, "loss": 0.6362, "step": 13566 }, { "epoch": 0.35, "grad_norm": 2.686379909515381, "learning_rate": 1.5156372501543293e-05, "loss": 0.6687, "step": 13567 }, { "epoch": 0.35, "grad_norm": 5.295518398284912, "learning_rate": 1.5155661211564562e-05, "loss": 0.7807, "step": 13568 }, { "epoch": 0.35, "grad_norm": 1.2161377668380737, "learning_rate": 1.5154949886056259e-05, "loss": 0.5621, "step": 13569 }, { "epoch": 0.35, "grad_norm": 1.3129123449325562, "learning_rate": 1.5154238525023273e-05, "loss": 0.4985, "step": 13570 }, { "epoch": 0.35, "grad_norm": 2.805983066558838, "learning_rate": 1.5153527128470517e-05, "loss": 0.5969, "step": 13571 }, { "epoch": 0.35, "grad_norm": 4.383635520935059, "learning_rate": 1.5152815696402889e-05, "loss": 0.7278, "step": 13572 }, { "epoch": 0.35, "grad_norm": 1.3761523962020874, "learning_rate": 1.5152104228825293e-05, "loss": 0.5955, "step": 13573 }, { "epoch": 0.35, "grad_norm": 2.0187032222747803, "learning_rate": 1.515139272574263e-05, "loss": 0.7117, "step": 13574 }, { "epoch": 0.35, "grad_norm": 1.9271882772445679, "learning_rate": 1.5150681187159806e-05, "loss": 0.6991, "step": 13575 }, { "epoch": 0.35, "grad_norm": 2.6519057750701904, "learning_rate": 1.5149969613081723e-05, "loss": 0.4748, "step": 13576 }, { "epoch": 0.35, "grad_norm": 2.6756625175476074, "learning_rate": 1.5149258003513285e-05, "loss": 0.7765, "step": 13577 }, { "epoch": 0.35, "grad_norm": 2.3335866928100586, "learning_rate": 1.5148546358459395e-05, "loss": 0.4229, "step": 13578 }, { "epoch": 0.35, "grad_norm": 4.453612804412842, "learning_rate": 1.514783467792496e-05, "loss": 0.5952, "step": 13579 }, { "epoch": 0.35, "grad_norm": 6.787145137786865, "learning_rate": 1.5147122961914882e-05, "loss": 0.436, "step": 13580 }, { "epoch": 0.35, "grad_norm": 1.5725712776184082, "learning_rate": 1.5146411210434066e-05, "loss": 0.5888, "step": 13581 }, { "epoch": 0.35, "grad_norm": 5.909660339355469, "learning_rate": 1.5145699423487417e-05, "loss": 0.6346, "step": 13582 }, { "epoch": 0.35, "grad_norm": 5.02834939956665, "learning_rate": 1.5144987601079839e-05, "loss": 0.5667, "step": 13583 }, { "epoch": 0.35, "grad_norm": 6.769103050231934, "learning_rate": 1.514427574321624e-05, "loss": 0.565, "step": 13584 }, { "epoch": 0.35, "grad_norm": 3.3233065605163574, "learning_rate": 1.5143563849901527e-05, "loss": 0.6192, "step": 13585 }, { "epoch": 0.35, "grad_norm": 2.9824161529541016, "learning_rate": 1.51428519211406e-05, "loss": 0.6324, "step": 13586 }, { "epoch": 0.35, "grad_norm": 2.559293508529663, "learning_rate": 1.514213995693837e-05, "loss": 0.6543, "step": 13587 }, { "epoch": 0.35, "grad_norm": 2.408048391342163, "learning_rate": 1.5141427957299741e-05, "loss": 0.611, "step": 13588 }, { "epoch": 0.35, "grad_norm": 3.4869601726531982, "learning_rate": 1.5140715922229624e-05, "loss": 0.6459, "step": 13589 }, { "epoch": 0.35, "grad_norm": 1.506824254989624, "learning_rate": 1.5140003851732919e-05, "loss": 0.544, "step": 13590 }, { "epoch": 0.35, "grad_norm": 1.0700703859329224, "learning_rate": 1.5139291745814537e-05, "loss": 0.62, "step": 13591 }, { "epoch": 0.35, "grad_norm": 3.0919487476348877, "learning_rate": 1.5138579604479385e-05, "loss": 0.5857, "step": 13592 }, { "epoch": 0.35, "grad_norm": 1.611306071281433, "learning_rate": 1.5137867427732371e-05, "loss": 0.6638, "step": 13593 }, { "epoch": 0.35, "grad_norm": 1.3570876121520996, "learning_rate": 1.5137155215578404e-05, "loss": 0.4525, "step": 13594 }, { "epoch": 0.35, "grad_norm": 2.071051597595215, "learning_rate": 1.5136442968022391e-05, "loss": 0.431, "step": 13595 }, { "epoch": 0.35, "grad_norm": 2.2696001529693604, "learning_rate": 1.5135730685069238e-05, "loss": 0.4647, "step": 13596 }, { "epoch": 0.35, "grad_norm": 1.987532377243042, "learning_rate": 1.5135018366723857e-05, "loss": 0.5903, "step": 13597 }, { "epoch": 0.35, "grad_norm": 2.1800124645233154, "learning_rate": 1.5134306012991156e-05, "loss": 0.5937, "step": 13598 }, { "epoch": 0.35, "grad_norm": 1.948583722114563, "learning_rate": 1.5133593623876043e-05, "loss": 0.5261, "step": 13599 }, { "epoch": 0.35, "grad_norm": 1.767971396446228, "learning_rate": 1.5132881199383426e-05, "loss": 0.6381, "step": 13600 }, { "epoch": 0.35, "grad_norm": 1.952566146850586, "learning_rate": 1.513216873951822e-05, "loss": 0.7112, "step": 13601 }, { "epoch": 0.35, "grad_norm": 1.7091083526611328, "learning_rate": 1.5131456244285329e-05, "loss": 0.6502, "step": 13602 }, { "epoch": 0.35, "grad_norm": 1.1761953830718994, "learning_rate": 1.5130743713689668e-05, "loss": 0.4286, "step": 13603 }, { "epoch": 0.35, "grad_norm": 1.9999619722366333, "learning_rate": 1.5130031147736147e-05, "loss": 0.4653, "step": 13604 }, { "epoch": 0.35, "grad_norm": 0.9170989394187927, "learning_rate": 1.5129318546429667e-05, "loss": 0.5094, "step": 13605 }, { "epoch": 0.35, "grad_norm": 1.3826900720596313, "learning_rate": 1.512860590977515e-05, "loss": 0.5923, "step": 13606 }, { "epoch": 0.35, "grad_norm": 1.0096547603607178, "learning_rate": 1.5127893237777502e-05, "loss": 0.4171, "step": 13607 }, { "epoch": 0.35, "grad_norm": 1.0257459878921509, "learning_rate": 1.5127180530441637e-05, "loss": 0.6393, "step": 13608 }, { "epoch": 0.35, "grad_norm": 1.569482445716858, "learning_rate": 1.5126467787772465e-05, "loss": 0.6138, "step": 13609 }, { "epoch": 0.35, "grad_norm": 2.197840929031372, "learning_rate": 1.5125755009774896e-05, "loss": 0.5595, "step": 13610 }, { "epoch": 0.35, "grad_norm": 3.1300442218780518, "learning_rate": 1.5125042196453846e-05, "loss": 0.643, "step": 13611 }, { "epoch": 0.35, "grad_norm": 1.5188021659851074, "learning_rate": 1.5124329347814223e-05, "loss": 0.5018, "step": 13612 }, { "epoch": 0.35, "grad_norm": 2.2414495944976807, "learning_rate": 1.5123616463860943e-05, "loss": 0.6351, "step": 13613 }, { "epoch": 0.35, "grad_norm": 1.5553747415542603, "learning_rate": 1.5122903544598917e-05, "loss": 0.6964, "step": 13614 }, { "epoch": 0.35, "grad_norm": 1.2662469148635864, "learning_rate": 1.512219059003306e-05, "loss": 0.3883, "step": 13615 }, { "epoch": 0.35, "grad_norm": 1.767914056777954, "learning_rate": 1.512147760016828e-05, "loss": 0.6501, "step": 13616 }, { "epoch": 0.35, "grad_norm": 1.9519480466842651, "learning_rate": 1.5120764575009494e-05, "loss": 0.5419, "step": 13617 }, { "epoch": 0.35, "grad_norm": 1.357221245765686, "learning_rate": 1.5120051514561618e-05, "loss": 0.5268, "step": 13618 }, { "epoch": 0.35, "grad_norm": 1.1756616830825806, "learning_rate": 1.5119338418829563e-05, "loss": 0.5126, "step": 13619 }, { "epoch": 0.35, "grad_norm": 1.984980821609497, "learning_rate": 1.5118625287818244e-05, "loss": 0.5954, "step": 13620 }, { "epoch": 0.35, "grad_norm": 1.411769151687622, "learning_rate": 1.5117912121532576e-05, "loss": 0.7051, "step": 13621 }, { "epoch": 0.35, "grad_norm": 2.199227809906006, "learning_rate": 1.511719891997747e-05, "loss": 0.659, "step": 13622 }, { "epoch": 0.35, "grad_norm": 2.275895833969116, "learning_rate": 1.511648568315785e-05, "loss": 0.5875, "step": 13623 }, { "epoch": 0.35, "grad_norm": 1.5863968133926392, "learning_rate": 1.511577241107862e-05, "loss": 0.7024, "step": 13624 }, { "epoch": 0.35, "grad_norm": 1.332092046737671, "learning_rate": 1.5115059103744702e-05, "loss": 0.6379, "step": 13625 }, { "epoch": 0.35, "grad_norm": 1.2494266033172607, "learning_rate": 1.5114345761161011e-05, "loss": 0.4414, "step": 13626 }, { "epoch": 0.35, "grad_norm": 1.3488233089447021, "learning_rate": 1.511363238333246e-05, "loss": 0.5047, "step": 13627 }, { "epoch": 0.35, "grad_norm": 2.1360485553741455, "learning_rate": 1.511291897026397e-05, "loss": 0.5504, "step": 13628 }, { "epoch": 0.35, "grad_norm": 1.7790558338165283, "learning_rate": 1.5112205521960453e-05, "loss": 0.5517, "step": 13629 }, { "epoch": 0.35, "grad_norm": 1.365431785583496, "learning_rate": 1.511149203842683e-05, "loss": 0.5871, "step": 13630 }, { "epoch": 0.35, "grad_norm": 10.908469200134277, "learning_rate": 1.511077851966801e-05, "loss": 0.4786, "step": 13631 }, { "epoch": 0.35, "grad_norm": 9.857041358947754, "learning_rate": 1.511006496568892e-05, "loss": 0.4796, "step": 13632 }, { "epoch": 0.35, "grad_norm": 3.7384183406829834, "learning_rate": 1.510935137649447e-05, "loss": 0.5685, "step": 13633 }, { "epoch": 0.35, "grad_norm": 4.910064220428467, "learning_rate": 1.510863775208958e-05, "loss": 0.8255, "step": 13634 }, { "epoch": 0.35, "grad_norm": 1.4536347389221191, "learning_rate": 1.5107924092479173e-05, "loss": 0.5063, "step": 13635 }, { "epoch": 0.35, "grad_norm": 3.6234793663024902, "learning_rate": 1.5107210397668158e-05, "loss": 0.6971, "step": 13636 }, { "epoch": 0.35, "grad_norm": 3.44966983795166, "learning_rate": 1.5106496667661458e-05, "loss": 0.5087, "step": 13637 }, { "epoch": 0.35, "grad_norm": 1.7454276084899902, "learning_rate": 1.5105782902463992e-05, "loss": 0.6036, "step": 13638 }, { "epoch": 0.35, "grad_norm": 2.928253173828125, "learning_rate": 1.5105069102080676e-05, "loss": 0.5144, "step": 13639 }, { "epoch": 0.35, "grad_norm": 1.392703890800476, "learning_rate": 1.5104355266516434e-05, "loss": 0.4531, "step": 13640 }, { "epoch": 0.35, "grad_norm": 1.5589635372161865, "learning_rate": 1.5103641395776181e-05, "loss": 0.516, "step": 13641 }, { "epoch": 0.35, "grad_norm": 1.887239694595337, "learning_rate": 1.5102927489864838e-05, "loss": 0.663, "step": 13642 }, { "epoch": 0.35, "grad_norm": 12.172735214233398, "learning_rate": 1.5102213548787324e-05, "loss": 0.8159, "step": 13643 }, { "epoch": 0.35, "grad_norm": 1.6293349266052246, "learning_rate": 1.5101499572548561e-05, "loss": 0.4715, "step": 13644 }, { "epoch": 0.35, "grad_norm": 0.9635195732116699, "learning_rate": 1.5100785561153468e-05, "loss": 0.6095, "step": 13645 }, { "epoch": 0.35, "grad_norm": 1.5793287754058838, "learning_rate": 1.5100071514606965e-05, "loss": 0.7266, "step": 13646 }, { "epoch": 0.35, "grad_norm": 1.494734287261963, "learning_rate": 1.5099357432913974e-05, "loss": 0.6484, "step": 13647 }, { "epoch": 0.35, "grad_norm": 2.655867099761963, "learning_rate": 1.5098643316079413e-05, "loss": 0.572, "step": 13648 }, { "epoch": 0.35, "grad_norm": 1.5294382572174072, "learning_rate": 1.509792916410821e-05, "loss": 0.5477, "step": 13649 }, { "epoch": 0.35, "grad_norm": 2.349259376525879, "learning_rate": 1.509721497700528e-05, "loss": 0.6675, "step": 13650 }, { "epoch": 0.35, "grad_norm": 7.6919145584106445, "learning_rate": 1.5096500754775542e-05, "loss": 0.6754, "step": 13651 }, { "epoch": 0.35, "grad_norm": 1.391735553741455, "learning_rate": 1.5095786497423927e-05, "loss": 0.4842, "step": 13652 }, { "epoch": 0.35, "grad_norm": 4.628031253814697, "learning_rate": 1.5095072204955355e-05, "loss": 0.4775, "step": 13653 }, { "epoch": 0.35, "grad_norm": 1.3802030086517334, "learning_rate": 1.5094357877374743e-05, "loss": 0.5576, "step": 13654 }, { "epoch": 0.35, "grad_norm": 5.8165178298950195, "learning_rate": 1.5093643514687017e-05, "loss": 0.5333, "step": 13655 }, { "epoch": 0.35, "grad_norm": 1.6145555973052979, "learning_rate": 1.5092929116897101e-05, "loss": 0.5519, "step": 13656 }, { "epoch": 0.35, "grad_norm": 4.361815452575684, "learning_rate": 1.5092214684009921e-05, "loss": 0.522, "step": 13657 }, { "epoch": 0.35, "grad_norm": 5.76760196685791, "learning_rate": 1.5091500216030391e-05, "loss": 0.7309, "step": 13658 }, { "epoch": 0.35, "grad_norm": 1.068487286567688, "learning_rate": 1.5090785712963443e-05, "loss": 0.578, "step": 13659 }, { "epoch": 0.35, "grad_norm": 3.2181060314178467, "learning_rate": 1.5090071174813996e-05, "loss": 0.5896, "step": 13660 }, { "epoch": 0.35, "grad_norm": 1.0205872058868408, "learning_rate": 1.5089356601586978e-05, "loss": 0.6298, "step": 13661 }, { "epoch": 0.35, "grad_norm": 1.654451847076416, "learning_rate": 1.5088641993287314e-05, "loss": 0.5723, "step": 13662 }, { "epoch": 0.35, "grad_norm": 1.9509488344192505, "learning_rate": 1.5087927349919923e-05, "loss": 0.8077, "step": 13663 }, { "epoch": 0.35, "grad_norm": 3.89854097366333, "learning_rate": 1.5087212671489732e-05, "loss": 0.6847, "step": 13664 }, { "epoch": 0.35, "grad_norm": 4.286295413970947, "learning_rate": 1.5086497958001672e-05, "loss": 0.7522, "step": 13665 }, { "epoch": 0.35, "grad_norm": 2.6356544494628906, "learning_rate": 1.5085783209460662e-05, "loss": 0.5205, "step": 13666 }, { "epoch": 0.35, "grad_norm": 1.650984525680542, "learning_rate": 1.5085068425871628e-05, "loss": 0.7164, "step": 13667 }, { "epoch": 0.35, "grad_norm": 3.2968316078186035, "learning_rate": 1.5084353607239498e-05, "loss": 0.5459, "step": 13668 }, { "epoch": 0.35, "grad_norm": 2.7890279293060303, "learning_rate": 1.5083638753569197e-05, "loss": 0.6641, "step": 13669 }, { "epoch": 0.35, "grad_norm": 2.7100882530212402, "learning_rate": 1.5082923864865651e-05, "loss": 0.5738, "step": 13670 }, { "epoch": 0.35, "grad_norm": 3.2869515419006348, "learning_rate": 1.508220894113379e-05, "loss": 0.69, "step": 13671 }, { "epoch": 0.35, "grad_norm": 1.2281410694122314, "learning_rate": 1.5081493982378535e-05, "loss": 0.5828, "step": 13672 }, { "epoch": 0.35, "grad_norm": 1.6280242204666138, "learning_rate": 1.5080778988604814e-05, "loss": 0.5798, "step": 13673 }, { "epoch": 0.35, "grad_norm": 2.2647533416748047, "learning_rate": 1.5080063959817561e-05, "loss": 0.716, "step": 13674 }, { "epoch": 0.35, "grad_norm": 1.686936378479004, "learning_rate": 1.5079348896021695e-05, "loss": 0.5499, "step": 13675 }, { "epoch": 0.35, "grad_norm": 1.7281876802444458, "learning_rate": 1.507863379722215e-05, "loss": 0.5261, "step": 13676 }, { "epoch": 0.35, "grad_norm": 1.3363926410675049, "learning_rate": 1.5077918663423852e-05, "loss": 0.5068, "step": 13677 }, { "epoch": 0.35, "grad_norm": 1.6572870016098022, "learning_rate": 1.5077203494631727e-05, "loss": 0.5293, "step": 13678 }, { "epoch": 0.35, "grad_norm": 3.8823623657226562, "learning_rate": 1.5076488290850707e-05, "loss": 0.7035, "step": 13679 }, { "epoch": 0.35, "grad_norm": 2.5728707313537598, "learning_rate": 1.5075773052085718e-05, "loss": 0.5772, "step": 13680 }, { "epoch": 0.35, "grad_norm": 4.733125686645508, "learning_rate": 1.507505777834169e-05, "loss": 0.5697, "step": 13681 }, { "epoch": 0.35, "grad_norm": 4.507107257843018, "learning_rate": 1.507434246962355e-05, "loss": 0.6482, "step": 13682 }, { "epoch": 0.35, "grad_norm": 6.211649417877197, "learning_rate": 1.5073627125936235e-05, "loss": 0.7497, "step": 13683 }, { "epoch": 0.35, "grad_norm": 17.789798736572266, "learning_rate": 1.5072911747284666e-05, "loss": 0.5051, "step": 13684 }, { "epoch": 0.35, "grad_norm": 0.9927811622619629, "learning_rate": 1.5072196333673776e-05, "loss": 0.3938, "step": 13685 }, { "epoch": 0.35, "grad_norm": 2.056241035461426, "learning_rate": 1.5071480885108498e-05, "loss": 0.5604, "step": 13686 }, { "epoch": 0.35, "grad_norm": 2.296924114227295, "learning_rate": 1.5070765401593757e-05, "loss": 0.5096, "step": 13687 }, { "epoch": 0.35, "grad_norm": 1.6504840850830078, "learning_rate": 1.507004988313449e-05, "loss": 0.6037, "step": 13688 }, { "epoch": 0.35, "grad_norm": 1.9014101028442383, "learning_rate": 1.5069334329735621e-05, "loss": 0.6401, "step": 13689 }, { "epoch": 0.35, "grad_norm": 1.17475163936615, "learning_rate": 1.5068618741402088e-05, "loss": 0.5119, "step": 13690 }, { "epoch": 0.35, "grad_norm": 1.8438048362731934, "learning_rate": 1.5067903118138816e-05, "loss": 0.5781, "step": 13691 }, { "epoch": 0.35, "grad_norm": 2.180042028427124, "learning_rate": 1.506718745995074e-05, "loss": 0.5733, "step": 13692 }, { "epoch": 0.35, "grad_norm": 1.9603345394134521, "learning_rate": 1.5066471766842794e-05, "loss": 0.5701, "step": 13693 }, { "epoch": 0.35, "grad_norm": 1.7292062044143677, "learning_rate": 1.5065756038819905e-05, "loss": 0.4465, "step": 13694 }, { "epoch": 0.35, "grad_norm": 2.0005552768707275, "learning_rate": 1.5065040275887013e-05, "loss": 0.665, "step": 13695 }, { "epoch": 0.35, "grad_norm": 1.1371691226959229, "learning_rate": 1.5064324478049039e-05, "loss": 0.6438, "step": 13696 }, { "epoch": 0.35, "grad_norm": 1.3073936700820923, "learning_rate": 1.5063608645310925e-05, "loss": 0.42, "step": 13697 }, { "epoch": 0.35, "grad_norm": 12.124367713928223, "learning_rate": 1.5062892777677602e-05, "loss": 0.5809, "step": 13698 }, { "epoch": 0.35, "grad_norm": 4.301356792449951, "learning_rate": 1.5062176875154001e-05, "loss": 0.5816, "step": 13699 }, { "epoch": 0.35, "grad_norm": 1.8661059141159058, "learning_rate": 1.5061460937745064e-05, "loss": 0.6343, "step": 13700 }, { "epoch": 0.35, "grad_norm": 14.870474815368652, "learning_rate": 1.5060744965455709e-05, "loss": 0.8864, "step": 13701 }, { "epoch": 0.35, "grad_norm": 2.173157215118408, "learning_rate": 1.5060028958290884e-05, "loss": 0.5111, "step": 13702 }, { "epoch": 0.35, "grad_norm": 1.2973254919052124, "learning_rate": 1.5059312916255517e-05, "loss": 0.6193, "step": 13703 }, { "epoch": 0.35, "grad_norm": 1.3674622774124146, "learning_rate": 1.5058596839354545e-05, "loss": 0.5812, "step": 13704 }, { "epoch": 0.35, "grad_norm": 1.577201008796692, "learning_rate": 1.5057880727592901e-05, "loss": 0.7059, "step": 13705 }, { "epoch": 0.35, "grad_norm": 1.7188670635223389, "learning_rate": 1.5057164580975521e-05, "loss": 0.5645, "step": 13706 }, { "epoch": 0.35, "grad_norm": 2.1190638542175293, "learning_rate": 1.5056448399507342e-05, "loss": 0.6606, "step": 13707 }, { "epoch": 0.35, "grad_norm": 2.0446360111236572, "learning_rate": 1.5055732183193294e-05, "loss": 0.7528, "step": 13708 }, { "epoch": 0.35, "grad_norm": 2.349375009536743, "learning_rate": 1.5055015932038318e-05, "loss": 0.5922, "step": 13709 }, { "epoch": 0.35, "grad_norm": 2.239682674407959, "learning_rate": 1.5054299646047348e-05, "loss": 0.5927, "step": 13710 }, { "epoch": 0.35, "grad_norm": 2.9769880771636963, "learning_rate": 1.5053583325225316e-05, "loss": 0.7132, "step": 13711 }, { "epoch": 0.35, "grad_norm": 3.8858251571655273, "learning_rate": 1.505286696957717e-05, "loss": 0.5902, "step": 13712 }, { "epoch": 0.35, "grad_norm": 1.5852632522583008, "learning_rate": 1.5052150579107833e-05, "loss": 0.5939, "step": 13713 }, { "epoch": 0.35, "grad_norm": 1.6319918632507324, "learning_rate": 1.5051434153822251e-05, "loss": 0.6052, "step": 13714 }, { "epoch": 0.35, "grad_norm": 1.5741130113601685, "learning_rate": 1.505071769372536e-05, "loss": 0.498, "step": 13715 }, { "epoch": 0.35, "grad_norm": 3.150402784347534, "learning_rate": 1.5050001198822093e-05, "loss": 0.6103, "step": 13716 }, { "epoch": 0.35, "grad_norm": 3.538811206817627, "learning_rate": 1.5049284669117391e-05, "loss": 0.6107, "step": 13717 }, { "epoch": 0.35, "grad_norm": 1.9760143756866455, "learning_rate": 1.5048568104616195e-05, "loss": 0.5438, "step": 13718 }, { "epoch": 0.35, "grad_norm": 1.0956299304962158, "learning_rate": 1.5047851505323437e-05, "loss": 0.6045, "step": 13719 }, { "epoch": 0.35, "grad_norm": 3.7881596088409424, "learning_rate": 1.5047134871244056e-05, "loss": 0.8069, "step": 13720 }, { "epoch": 0.35, "grad_norm": 4.509947299957275, "learning_rate": 1.5046418202382993e-05, "loss": 0.6784, "step": 13721 }, { "epoch": 0.35, "grad_norm": 1.916195034980774, "learning_rate": 1.5045701498745188e-05, "loss": 0.6304, "step": 13722 }, { "epoch": 0.35, "grad_norm": 4.227390289306641, "learning_rate": 1.5044984760335575e-05, "loss": 0.6722, "step": 13723 }, { "epoch": 0.35, "grad_norm": 0.9712246060371399, "learning_rate": 1.5044267987159101e-05, "loss": 0.5438, "step": 13724 }, { "epoch": 0.35, "grad_norm": 1.696484923362732, "learning_rate": 1.5043551179220698e-05, "loss": 0.6594, "step": 13725 }, { "epoch": 0.35, "grad_norm": 1.6428276300430298, "learning_rate": 1.5042834336525311e-05, "loss": 0.4741, "step": 13726 }, { "epoch": 0.35, "grad_norm": 1.8945075273513794, "learning_rate": 1.5042117459077879e-05, "loss": 0.7521, "step": 13727 }, { "epoch": 0.35, "grad_norm": 3.5886073112487793, "learning_rate": 1.5041400546883337e-05, "loss": 0.7532, "step": 13728 }, { "epoch": 0.35, "grad_norm": 2.019339084625244, "learning_rate": 1.5040683599946633e-05, "loss": 0.6198, "step": 13729 }, { "epoch": 0.35, "grad_norm": 1.9403083324432373, "learning_rate": 1.5039966618272703e-05, "loss": 0.4627, "step": 13730 }, { "epoch": 0.35, "grad_norm": 5.46642541885376, "learning_rate": 1.503924960186649e-05, "loss": 0.733, "step": 13731 }, { "epoch": 0.35, "grad_norm": 1.1985217332839966, "learning_rate": 1.5038532550732936e-05, "loss": 0.4636, "step": 13732 }, { "epoch": 0.35, "grad_norm": 1.9709736108779907, "learning_rate": 1.5037815464876979e-05, "loss": 0.7474, "step": 13733 }, { "epoch": 0.35, "grad_norm": 2.398771286010742, "learning_rate": 1.5037098344303562e-05, "loss": 0.6735, "step": 13734 }, { "epoch": 0.35, "grad_norm": 2.3875572681427, "learning_rate": 1.503638118901763e-05, "loss": 0.68, "step": 13735 }, { "epoch": 0.35, "grad_norm": 2.4506733417510986, "learning_rate": 1.5035663999024124e-05, "loss": 0.5642, "step": 13736 }, { "epoch": 0.35, "grad_norm": 2.401681661605835, "learning_rate": 1.5034946774327982e-05, "loss": 0.4235, "step": 13737 }, { "epoch": 0.35, "grad_norm": 3.5289297103881836, "learning_rate": 1.5034229514934153e-05, "loss": 0.9091, "step": 13738 }, { "epoch": 0.35, "grad_norm": 9.522298812866211, "learning_rate": 1.5033512220847577e-05, "loss": 0.6043, "step": 13739 }, { "epoch": 0.35, "grad_norm": 4.971579074859619, "learning_rate": 1.5032794892073195e-05, "loss": 0.5361, "step": 13740 }, { "epoch": 0.35, "grad_norm": 1.955124855041504, "learning_rate": 1.503207752861595e-05, "loss": 0.6048, "step": 13741 }, { "epoch": 0.35, "grad_norm": 1.1765573024749756, "learning_rate": 1.5031360130480796e-05, "loss": 0.62, "step": 13742 }, { "epoch": 0.35, "grad_norm": 1.3872162103652954, "learning_rate": 1.5030642697672664e-05, "loss": 0.565, "step": 13743 }, { "epoch": 0.35, "grad_norm": 3.1043927669525146, "learning_rate": 1.5029925230196504e-05, "loss": 0.4869, "step": 13744 }, { "epoch": 0.35, "grad_norm": 3.827413320541382, "learning_rate": 1.5029207728057259e-05, "loss": 0.707, "step": 13745 }, { "epoch": 0.35, "grad_norm": 2.3431997299194336, "learning_rate": 1.5028490191259875e-05, "loss": 0.556, "step": 13746 }, { "epoch": 0.35, "grad_norm": 3.5388023853302, "learning_rate": 1.5027772619809295e-05, "loss": 0.5499, "step": 13747 }, { "epoch": 0.35, "grad_norm": 5.701773166656494, "learning_rate": 1.5027055013710464e-05, "loss": 0.6452, "step": 13748 }, { "epoch": 0.35, "grad_norm": 1.9705897569656372, "learning_rate": 1.5026337372968329e-05, "loss": 0.6033, "step": 13749 }, { "epoch": 0.35, "grad_norm": 2.958832025527954, "learning_rate": 1.5025619697587837e-05, "loss": 0.6405, "step": 13750 }, { "epoch": 0.35, "grad_norm": 2.0467820167541504, "learning_rate": 1.5024901987573928e-05, "loss": 0.6295, "step": 13751 }, { "epoch": 0.35, "grad_norm": 1.5958894491195679, "learning_rate": 1.5024184242931554e-05, "loss": 0.6642, "step": 13752 }, { "epoch": 0.35, "grad_norm": 7.038437843322754, "learning_rate": 1.5023466463665655e-05, "loss": 0.7245, "step": 13753 }, { "epoch": 0.35, "grad_norm": 1.5122758150100708, "learning_rate": 1.5022748649781185e-05, "loss": 0.5299, "step": 13754 }, { "epoch": 0.35, "grad_norm": 4.331114768981934, "learning_rate": 1.5022030801283085e-05, "loss": 0.7462, "step": 13755 }, { "epoch": 0.35, "grad_norm": 2.2692182064056396, "learning_rate": 1.5021312918176306e-05, "loss": 0.6361, "step": 13756 }, { "epoch": 0.35, "grad_norm": 1.331224799156189, "learning_rate": 1.502059500046579e-05, "loss": 0.6005, "step": 13757 }, { "epoch": 0.35, "grad_norm": 1.6788930892944336, "learning_rate": 1.5019877048156488e-05, "loss": 0.4817, "step": 13758 }, { "epoch": 0.35, "grad_norm": 5.924993991851807, "learning_rate": 1.5019159061253348e-05, "loss": 0.676, "step": 13759 }, { "epoch": 0.35, "grad_norm": 1.6052242517471313, "learning_rate": 1.5018441039761318e-05, "loss": 0.5602, "step": 13760 }, { "epoch": 0.35, "grad_norm": 2.749051809310913, "learning_rate": 1.5017722983685346e-05, "loss": 0.6832, "step": 13761 }, { "epoch": 0.35, "grad_norm": 1.6921820640563965, "learning_rate": 1.5017004893030378e-05, "loss": 0.5819, "step": 13762 }, { "epoch": 0.35, "grad_norm": 2.075366497039795, "learning_rate": 1.5016286767801362e-05, "loss": 0.656, "step": 13763 }, { "epoch": 0.35, "grad_norm": 2.1607258319854736, "learning_rate": 1.5015568608003253e-05, "loss": 0.762, "step": 13764 }, { "epoch": 0.35, "grad_norm": 2.1240365505218506, "learning_rate": 1.5014850413640993e-05, "loss": 0.591, "step": 13765 }, { "epoch": 0.35, "grad_norm": 1.4365085363388062, "learning_rate": 1.5014132184719539e-05, "loss": 0.6456, "step": 13766 }, { "epoch": 0.35, "grad_norm": 1.2426279783248901, "learning_rate": 1.5013413921243831e-05, "loss": 0.5549, "step": 13767 }, { "epoch": 0.35, "grad_norm": 2.129244804382324, "learning_rate": 1.5012695623218827e-05, "loss": 0.7868, "step": 13768 }, { "epoch": 0.35, "grad_norm": 3.642812728881836, "learning_rate": 1.5011977290649476e-05, "loss": 0.6838, "step": 13769 }, { "epoch": 0.35, "grad_norm": 1.1025124788284302, "learning_rate": 1.5011258923540722e-05, "loss": 0.5649, "step": 13770 }, { "epoch": 0.35, "grad_norm": 2.4944777488708496, "learning_rate": 1.5010540521897522e-05, "loss": 0.4804, "step": 13771 }, { "epoch": 0.35, "grad_norm": 1.8022711277008057, "learning_rate": 1.5009822085724823e-05, "loss": 0.6505, "step": 13772 }, { "epoch": 0.35, "grad_norm": 3.3618645668029785, "learning_rate": 1.500910361502758e-05, "loss": 0.5838, "step": 13773 }, { "epoch": 0.35, "grad_norm": 1.4491444826126099, "learning_rate": 1.5008385109810741e-05, "loss": 0.7293, "step": 13774 }, { "epoch": 0.35, "grad_norm": 1.7723220586776733, "learning_rate": 1.500766657007926e-05, "loss": 0.6055, "step": 13775 }, { "epoch": 0.35, "grad_norm": 2.3874194622039795, "learning_rate": 1.5006947995838083e-05, "loss": 0.6423, "step": 13776 }, { "epoch": 0.35, "grad_norm": 1.3443665504455566, "learning_rate": 1.5006229387092171e-05, "loss": 0.669, "step": 13777 }, { "epoch": 0.35, "grad_norm": 5.41630220413208, "learning_rate": 1.5005510743846468e-05, "loss": 0.4792, "step": 13778 }, { "epoch": 0.35, "grad_norm": 1.4235764741897583, "learning_rate": 1.5004792066105931e-05, "loss": 0.5141, "step": 13779 }, { "epoch": 0.35, "grad_norm": 5.854018211364746, "learning_rate": 1.5004073353875512e-05, "loss": 0.6201, "step": 13780 }, { "epoch": 0.35, "grad_norm": 1.8970227241516113, "learning_rate": 1.5003354607160159e-05, "loss": 0.6478, "step": 13781 }, { "epoch": 0.35, "grad_norm": 4.158437252044678, "learning_rate": 1.5002635825964832e-05, "loss": 0.5799, "step": 13782 }, { "epoch": 0.35, "grad_norm": 2.777076244354248, "learning_rate": 1.5001917010294484e-05, "loss": 0.726, "step": 13783 }, { "epoch": 0.35, "grad_norm": 3.688098430633545, "learning_rate": 1.5001198160154067e-05, "loss": 0.6861, "step": 13784 }, { "epoch": 0.35, "grad_norm": 2.048609733581543, "learning_rate": 1.5000479275548531e-05, "loss": 0.458, "step": 13785 }, { "epoch": 0.35, "grad_norm": 6.804433345794678, "learning_rate": 1.4999760356482836e-05, "loss": 0.6881, "step": 13786 }, { "epoch": 0.35, "grad_norm": 1.7802928686141968, "learning_rate": 1.4999041402961934e-05, "loss": 0.6305, "step": 13787 }, { "epoch": 0.35, "grad_norm": 2.329103946685791, "learning_rate": 1.4998322414990776e-05, "loss": 0.6308, "step": 13788 }, { "epoch": 0.35, "grad_norm": 1.4557565450668335, "learning_rate": 1.4997603392574323e-05, "loss": 0.4566, "step": 13789 }, { "epoch": 0.35, "grad_norm": 1.8377505540847778, "learning_rate": 1.4996884335717526e-05, "loss": 0.68, "step": 13790 }, { "epoch": 0.35, "grad_norm": 2.4995687007904053, "learning_rate": 1.499616524442534e-05, "loss": 0.5983, "step": 13791 }, { "epoch": 0.35, "grad_norm": 1.457815170288086, "learning_rate": 1.4995446118702726e-05, "loss": 0.5971, "step": 13792 }, { "epoch": 0.35, "grad_norm": 2.1847987174987793, "learning_rate": 1.4994726958554633e-05, "loss": 0.6092, "step": 13793 }, { "epoch": 0.35, "grad_norm": 2.405059576034546, "learning_rate": 1.499400776398602e-05, "loss": 0.5217, "step": 13794 }, { "epoch": 0.35, "grad_norm": 1.2878367900848389, "learning_rate": 1.4993288535001844e-05, "loss": 0.4578, "step": 13795 }, { "epoch": 0.35, "grad_norm": 1.7872956991195679, "learning_rate": 1.499256927160706e-05, "loss": 0.6727, "step": 13796 }, { "epoch": 0.35, "grad_norm": 2.1837716102600098, "learning_rate": 1.4991849973806627e-05, "loss": 0.6208, "step": 13797 }, { "epoch": 0.35, "grad_norm": 1.2106125354766846, "learning_rate": 1.4991130641605497e-05, "loss": 0.5781, "step": 13798 }, { "epoch": 0.35, "grad_norm": 1.8383381366729736, "learning_rate": 1.4990411275008635e-05, "loss": 0.5809, "step": 13799 }, { "epoch": 0.35, "grad_norm": 2.2169029712677, "learning_rate": 1.4989691874020986e-05, "loss": 0.6623, "step": 13800 }, { "epoch": 0.35, "grad_norm": 2.0751125812530518, "learning_rate": 1.498897243864752e-05, "loss": 0.5515, "step": 13801 }, { "epoch": 0.35, "grad_norm": 1.414849042892456, "learning_rate": 1.4988252968893191e-05, "loss": 0.5563, "step": 13802 }, { "epoch": 0.35, "grad_norm": 1.7681523561477661, "learning_rate": 1.4987533464762955e-05, "loss": 0.6246, "step": 13803 }, { "epoch": 0.35, "grad_norm": 4.98410701751709, "learning_rate": 1.4986813926261774e-05, "loss": 0.6134, "step": 13804 }, { "epoch": 0.35, "grad_norm": 1.6376813650131226, "learning_rate": 1.4986094353394603e-05, "loss": 0.6484, "step": 13805 }, { "epoch": 0.35, "grad_norm": 5.436532974243164, "learning_rate": 1.4985374746166402e-05, "loss": 0.5555, "step": 13806 }, { "epoch": 0.35, "grad_norm": 1.0602099895477295, "learning_rate": 1.498465510458213e-05, "loss": 0.4251, "step": 13807 }, { "epoch": 0.35, "grad_norm": 2.23392391204834, "learning_rate": 1.4983935428646747e-05, "loss": 0.6324, "step": 13808 }, { "epoch": 0.35, "grad_norm": 1.3638025522232056, "learning_rate": 1.4983215718365216e-05, "loss": 0.6486, "step": 13809 }, { "epoch": 0.35, "grad_norm": 2.2998461723327637, "learning_rate": 1.4982495973742489e-05, "loss": 0.6609, "step": 13810 }, { "epoch": 0.35, "grad_norm": 1.4225854873657227, "learning_rate": 1.4981776194783531e-05, "loss": 0.4421, "step": 13811 }, { "epoch": 0.35, "grad_norm": 1.1542768478393555, "learning_rate": 1.4981056381493301e-05, "loss": 0.6239, "step": 13812 }, { "epoch": 0.35, "grad_norm": 6.579796314239502, "learning_rate": 1.4980336533876759e-05, "loss": 0.7702, "step": 13813 }, { "epoch": 0.35, "grad_norm": 3.01888370513916, "learning_rate": 1.497961665193887e-05, "loss": 0.621, "step": 13814 }, { "epoch": 0.35, "grad_norm": 4.57466459274292, "learning_rate": 1.4978896735684587e-05, "loss": 0.599, "step": 13815 }, { "epoch": 0.35, "grad_norm": 1.635222315788269, "learning_rate": 1.4978176785118879e-05, "loss": 0.4928, "step": 13816 }, { "epoch": 0.35, "grad_norm": 7.652211666107178, "learning_rate": 1.4977456800246701e-05, "loss": 0.6897, "step": 13817 }, { "epoch": 0.35, "grad_norm": 1.2588013410568237, "learning_rate": 1.497673678107302e-05, "loss": 0.5097, "step": 13818 }, { "epoch": 0.35, "grad_norm": 4.677975654602051, "learning_rate": 1.4976016727602795e-05, "loss": 0.6865, "step": 13819 }, { "epoch": 0.35, "grad_norm": 1.6055692434310913, "learning_rate": 1.497529663984099e-05, "loss": 0.6635, "step": 13820 }, { "epoch": 0.35, "grad_norm": 2.3635363578796387, "learning_rate": 1.4974576517792567e-05, "loss": 0.6241, "step": 13821 }, { "epoch": 0.35, "grad_norm": 1.902205467224121, "learning_rate": 1.4973856361462486e-05, "loss": 0.4893, "step": 13822 }, { "epoch": 0.35, "grad_norm": 5.46039342880249, "learning_rate": 1.4973136170855713e-05, "loss": 0.5459, "step": 13823 }, { "epoch": 0.35, "grad_norm": 1.1368557214736938, "learning_rate": 1.497241594597721e-05, "loss": 0.5307, "step": 13824 }, { "epoch": 0.35, "grad_norm": 1.5592094659805298, "learning_rate": 1.497169568683194e-05, "loss": 0.5657, "step": 13825 }, { "epoch": 0.35, "grad_norm": 1.5201542377471924, "learning_rate": 1.4970975393424867e-05, "loss": 0.6837, "step": 13826 }, { "epoch": 0.35, "grad_norm": 1.4163745641708374, "learning_rate": 1.4970255065760954e-05, "loss": 0.671, "step": 13827 }, { "epoch": 0.35, "grad_norm": 2.16692852973938, "learning_rate": 1.4969534703845168e-05, "loss": 0.6179, "step": 13828 }, { "epoch": 0.35, "grad_norm": 3.3432323932647705, "learning_rate": 1.4968814307682466e-05, "loss": 0.7406, "step": 13829 }, { "epoch": 0.35, "grad_norm": 2.215320348739624, "learning_rate": 1.4968093877277823e-05, "loss": 0.7938, "step": 13830 }, { "epoch": 0.35, "grad_norm": 2.441012382507324, "learning_rate": 1.4967373412636198e-05, "loss": 0.8334, "step": 13831 }, { "epoch": 0.35, "grad_norm": 3.1931838989257812, "learning_rate": 1.4966652913762552e-05, "loss": 0.574, "step": 13832 }, { "epoch": 0.35, "grad_norm": 5.661154270172119, "learning_rate": 1.4965932380661859e-05, "loss": 0.6347, "step": 13833 }, { "epoch": 0.35, "grad_norm": 1.4199249744415283, "learning_rate": 1.4965211813339075e-05, "loss": 0.4957, "step": 13834 }, { "epoch": 0.35, "grad_norm": 2.0689713954925537, "learning_rate": 1.4964491211799173e-05, "loss": 0.6818, "step": 13835 }, { "epoch": 0.35, "grad_norm": 2.3928725719451904, "learning_rate": 1.4963770576047116e-05, "loss": 0.5176, "step": 13836 }, { "epoch": 0.35, "grad_norm": 1.3841118812561035, "learning_rate": 1.4963049906087872e-05, "loss": 0.7223, "step": 13837 }, { "epoch": 0.35, "grad_norm": 2.37807035446167, "learning_rate": 1.4962329201926407e-05, "loss": 0.6596, "step": 13838 }, { "epoch": 0.35, "grad_norm": 2.032899856567383, "learning_rate": 1.4961608463567686e-05, "loss": 0.6405, "step": 13839 }, { "epoch": 0.35, "grad_norm": 2.583997964859009, "learning_rate": 1.4960887691016677e-05, "loss": 0.6452, "step": 13840 }, { "epoch": 0.35, "grad_norm": 3.1184473037719727, "learning_rate": 1.4960166884278344e-05, "loss": 0.6843, "step": 13841 }, { "epoch": 0.35, "grad_norm": 2.7345235347747803, "learning_rate": 1.4959446043357657e-05, "loss": 0.7516, "step": 13842 }, { "epoch": 0.35, "grad_norm": 1.3730789422988892, "learning_rate": 1.495872516825959e-05, "loss": 0.527, "step": 13843 }, { "epoch": 0.35, "grad_norm": 1.4398324489593506, "learning_rate": 1.4958004258989096e-05, "loss": 0.5771, "step": 13844 }, { "epoch": 0.35, "grad_norm": 7.422095775604248, "learning_rate": 1.4957283315551159e-05, "loss": 0.5043, "step": 13845 }, { "epoch": 0.35, "grad_norm": 9.526698112487793, "learning_rate": 1.4956562337950734e-05, "loss": 0.6353, "step": 13846 }, { "epoch": 0.35, "grad_norm": 1.435989499092102, "learning_rate": 1.4955841326192802e-05, "loss": 0.6436, "step": 13847 }, { "epoch": 0.35, "grad_norm": 1.306895136833191, "learning_rate": 1.495512028028232e-05, "loss": 0.4539, "step": 13848 }, { "epoch": 0.35, "grad_norm": 3.5160605907440186, "learning_rate": 1.4954399200224263e-05, "loss": 0.548, "step": 13849 }, { "epoch": 0.35, "grad_norm": 2.129255771636963, "learning_rate": 1.49536780860236e-05, "loss": 0.5234, "step": 13850 }, { "epoch": 0.36, "grad_norm": 2.9459388256073, "learning_rate": 1.4952956937685299e-05, "loss": 0.4682, "step": 13851 }, { "epoch": 0.36, "grad_norm": 2.4785373210906982, "learning_rate": 1.495223575521433e-05, "loss": 0.5748, "step": 13852 }, { "epoch": 0.36, "grad_norm": 1.4998985528945923, "learning_rate": 1.4951514538615665e-05, "loss": 0.6397, "step": 13853 }, { "epoch": 0.36, "grad_norm": 2.7181267738342285, "learning_rate": 1.4950793287894274e-05, "loss": 0.6156, "step": 13854 }, { "epoch": 0.36, "grad_norm": 3.1759355068206787, "learning_rate": 1.4950072003055125e-05, "loss": 0.6036, "step": 13855 }, { "epoch": 0.36, "grad_norm": 5.696474075317383, "learning_rate": 1.4949350684103188e-05, "loss": 1.0694, "step": 13856 }, { "epoch": 0.36, "grad_norm": 1.646299958229065, "learning_rate": 1.4948629331043436e-05, "loss": 0.6408, "step": 13857 }, { "epoch": 0.36, "grad_norm": 3.2047486305236816, "learning_rate": 1.494790794388084e-05, "loss": 0.6635, "step": 13858 }, { "epoch": 0.36, "grad_norm": 1.9291597604751587, "learning_rate": 1.4947186522620372e-05, "loss": 0.6, "step": 13859 }, { "epoch": 0.36, "grad_norm": 2.301521062850952, "learning_rate": 1.4946465067267002e-05, "loss": 0.7409, "step": 13860 }, { "epoch": 0.36, "grad_norm": 1.1538230180740356, "learning_rate": 1.4945743577825703e-05, "loss": 0.5578, "step": 13861 }, { "epoch": 0.36, "grad_norm": 2.0322704315185547, "learning_rate": 1.4945022054301443e-05, "loss": 0.5723, "step": 13862 }, { "epoch": 0.36, "grad_norm": 1.9574156999588013, "learning_rate": 1.4944300496699202e-05, "loss": 0.7385, "step": 13863 }, { "epoch": 0.36, "grad_norm": 1.053905725479126, "learning_rate": 1.4943578905023945e-05, "loss": 0.4276, "step": 13864 }, { "epoch": 0.36, "grad_norm": 1.3042280673980713, "learning_rate": 1.4942857279280652e-05, "loss": 0.7369, "step": 13865 }, { "epoch": 0.36, "grad_norm": 7.785228729248047, "learning_rate": 1.494213561947429e-05, "loss": 0.6404, "step": 13866 }, { "epoch": 0.36, "grad_norm": 1.118720531463623, "learning_rate": 1.4941413925609833e-05, "loss": 0.4579, "step": 13867 }, { "epoch": 0.36, "grad_norm": 2.157336711883545, "learning_rate": 1.4940692197692257e-05, "loss": 0.6111, "step": 13868 }, { "epoch": 0.36, "grad_norm": 1.3490632772445679, "learning_rate": 1.4939970435726534e-05, "loss": 0.5339, "step": 13869 }, { "epoch": 0.36, "grad_norm": 2.0974373817443848, "learning_rate": 1.4939248639717641e-05, "loss": 0.5733, "step": 13870 }, { "epoch": 0.36, "grad_norm": 1.2718312740325928, "learning_rate": 1.4938526809670546e-05, "loss": 0.6035, "step": 13871 }, { "epoch": 0.36, "grad_norm": 9.6221923828125, "learning_rate": 1.4937804945590229e-05, "loss": 0.8222, "step": 13872 }, { "epoch": 0.36, "grad_norm": 1.8534880876541138, "learning_rate": 1.4937083047481662e-05, "loss": 0.6271, "step": 13873 }, { "epoch": 0.36, "grad_norm": 1.5247491598129272, "learning_rate": 1.4936361115349817e-05, "loss": 0.589, "step": 13874 }, { "epoch": 0.36, "grad_norm": 1.0973992347717285, "learning_rate": 1.4935639149199678e-05, "loss": 0.6749, "step": 13875 }, { "epoch": 0.36, "grad_norm": 1.695894718170166, "learning_rate": 1.493491714903621e-05, "loss": 0.5997, "step": 13876 }, { "epoch": 0.36, "grad_norm": 2.8577911853790283, "learning_rate": 1.4934195114864398e-05, "loss": 0.6833, "step": 13877 }, { "epoch": 0.36, "grad_norm": 2.1993508338928223, "learning_rate": 1.4933473046689211e-05, "loss": 0.537, "step": 13878 }, { "epoch": 0.36, "grad_norm": 5.962558269500732, "learning_rate": 1.4932750944515628e-05, "loss": 0.5682, "step": 13879 }, { "epoch": 0.36, "grad_norm": 1.1479729413986206, "learning_rate": 1.4932028808348622e-05, "loss": 0.6032, "step": 13880 }, { "epoch": 0.36, "grad_norm": 1.8890337944030762, "learning_rate": 1.4931306638193175e-05, "loss": 0.6219, "step": 13881 }, { "epoch": 0.36, "grad_norm": 1.994386911392212, "learning_rate": 1.493058443405426e-05, "loss": 0.6042, "step": 13882 }, { "epoch": 0.36, "grad_norm": 1.255079984664917, "learning_rate": 1.4929862195936852e-05, "loss": 0.5926, "step": 13883 }, { "epoch": 0.36, "grad_norm": 2.6026079654693604, "learning_rate": 1.4929139923845936e-05, "loss": 0.5742, "step": 13884 }, { "epoch": 0.36, "grad_norm": 2.9684946537017822, "learning_rate": 1.4928417617786481e-05, "loss": 0.6484, "step": 13885 }, { "epoch": 0.36, "grad_norm": 4.307115077972412, "learning_rate": 1.492769527776347e-05, "loss": 0.697, "step": 13886 }, { "epoch": 0.36, "grad_norm": 3.981201171875, "learning_rate": 1.4926972903781878e-05, "loss": 0.5722, "step": 13887 }, { "epoch": 0.36, "grad_norm": 3.261427640914917, "learning_rate": 1.4926250495846683e-05, "loss": 0.5656, "step": 13888 }, { "epoch": 0.36, "grad_norm": 1.3550896644592285, "learning_rate": 1.4925528053962868e-05, "loss": 0.5015, "step": 13889 }, { "epoch": 0.36, "grad_norm": 1.1371846199035645, "learning_rate": 1.4924805578135404e-05, "loss": 0.5481, "step": 13890 }, { "epoch": 0.36, "grad_norm": 3.3919031620025635, "learning_rate": 1.4924083068369278e-05, "loss": 0.8251, "step": 13891 }, { "epoch": 0.36, "grad_norm": 2.486701011657715, "learning_rate": 1.4923360524669464e-05, "loss": 0.7643, "step": 13892 }, { "epoch": 0.36, "grad_norm": 3.008769989013672, "learning_rate": 1.492263794704094e-05, "loss": 0.6661, "step": 13893 }, { "epoch": 0.36, "grad_norm": 1.2651931047439575, "learning_rate": 1.4921915335488691e-05, "loss": 0.5449, "step": 13894 }, { "epoch": 0.36, "grad_norm": 1.8154351711273193, "learning_rate": 1.492119269001769e-05, "loss": 0.6232, "step": 13895 }, { "epoch": 0.36, "grad_norm": 2.443509101867676, "learning_rate": 1.4920470010632927e-05, "loss": 0.6667, "step": 13896 }, { "epoch": 0.36, "grad_norm": 1.3359287977218628, "learning_rate": 1.4919747297339368e-05, "loss": 0.5805, "step": 13897 }, { "epoch": 0.36, "grad_norm": 4.667123317718506, "learning_rate": 1.4919024550142005e-05, "loss": 0.6388, "step": 13898 }, { "epoch": 0.36, "grad_norm": 2.448413372039795, "learning_rate": 1.4918301769045818e-05, "loss": 0.5679, "step": 13899 }, { "epoch": 0.36, "grad_norm": 1.6727341413497925, "learning_rate": 1.4917578954055783e-05, "loss": 0.4932, "step": 13900 }, { "epoch": 0.36, "grad_norm": 1.8848899602890015, "learning_rate": 1.4916856105176885e-05, "loss": 0.5239, "step": 13901 }, { "epoch": 0.36, "grad_norm": 2.5038294792175293, "learning_rate": 1.4916133222414099e-05, "loss": 0.5728, "step": 13902 }, { "epoch": 0.36, "grad_norm": 0.9586470723152161, "learning_rate": 1.4915410305772416e-05, "loss": 0.5721, "step": 13903 }, { "epoch": 0.36, "grad_norm": 1.4934836626052856, "learning_rate": 1.4914687355256812e-05, "loss": 0.7129, "step": 13904 }, { "epoch": 0.36, "grad_norm": 1.2415069341659546, "learning_rate": 1.491396437087227e-05, "loss": 0.473, "step": 13905 }, { "epoch": 0.36, "grad_norm": 1.155272126197815, "learning_rate": 1.4913241352623775e-05, "loss": 0.6617, "step": 13906 }, { "epoch": 0.36, "grad_norm": 2.652710199356079, "learning_rate": 1.4912518300516303e-05, "loss": 0.6483, "step": 13907 }, { "epoch": 0.36, "grad_norm": 2.371758222579956, "learning_rate": 1.4911795214554845e-05, "loss": 0.6133, "step": 13908 }, { "epoch": 0.36, "grad_norm": 2.0568511486053467, "learning_rate": 1.491107209474438e-05, "loss": 0.6558, "step": 13909 }, { "epoch": 0.36, "grad_norm": 1.887809157371521, "learning_rate": 1.491034894108989e-05, "loss": 0.65, "step": 13910 }, { "epoch": 0.36, "grad_norm": 1.8045037984848022, "learning_rate": 1.4909625753596362e-05, "loss": 0.6495, "step": 13911 }, { "epoch": 0.36, "grad_norm": 1.807007908821106, "learning_rate": 1.4908902532268775e-05, "loss": 0.5983, "step": 13912 }, { "epoch": 0.36, "grad_norm": 5.086552619934082, "learning_rate": 1.4908179277112117e-05, "loss": 0.5772, "step": 13913 }, { "epoch": 0.36, "grad_norm": 1.4281314611434937, "learning_rate": 1.4907455988131371e-05, "loss": 0.581, "step": 13914 }, { "epoch": 0.36, "grad_norm": 1.6644487380981445, "learning_rate": 1.4906732665331522e-05, "loss": 0.5469, "step": 13915 }, { "epoch": 0.36, "grad_norm": 2.3206050395965576, "learning_rate": 1.4906009308717555e-05, "loss": 0.7511, "step": 13916 }, { "epoch": 0.36, "grad_norm": 1.462540864944458, "learning_rate": 1.4905285918294454e-05, "loss": 0.5523, "step": 13917 }, { "epoch": 0.36, "grad_norm": 1.1101728677749634, "learning_rate": 1.4904562494067204e-05, "loss": 0.4402, "step": 13918 }, { "epoch": 0.36, "grad_norm": 1.7644894123077393, "learning_rate": 1.4903839036040787e-05, "loss": 0.5053, "step": 13919 }, { "epoch": 0.36, "grad_norm": 1.3186664581298828, "learning_rate": 1.4903115544220199e-05, "loss": 0.539, "step": 13920 }, { "epoch": 0.36, "grad_norm": 1.1329259872436523, "learning_rate": 1.4902392018610415e-05, "loss": 0.5618, "step": 13921 }, { "epoch": 0.36, "grad_norm": 3.973264217376709, "learning_rate": 1.4901668459216425e-05, "loss": 0.5724, "step": 13922 }, { "epoch": 0.36, "grad_norm": 3.189720392227173, "learning_rate": 1.4900944866043216e-05, "loss": 0.6931, "step": 13923 }, { "epoch": 0.36, "grad_norm": 3.07317852973938, "learning_rate": 1.4900221239095774e-05, "loss": 0.6663, "step": 13924 }, { "epoch": 0.36, "grad_norm": 1.257609248161316, "learning_rate": 1.4899497578379088e-05, "loss": 0.4345, "step": 13925 }, { "epoch": 0.36, "grad_norm": 5.0359673500061035, "learning_rate": 1.4898773883898138e-05, "loss": 0.9017, "step": 13926 }, { "epoch": 0.36, "grad_norm": 2.587127923965454, "learning_rate": 1.4898050155657919e-05, "loss": 0.633, "step": 13927 }, { "epoch": 0.36, "grad_norm": 2.396758556365967, "learning_rate": 1.4897326393663419e-05, "loss": 0.5885, "step": 13928 }, { "epoch": 0.36, "grad_norm": 2.1465559005737305, "learning_rate": 1.4896602597919617e-05, "loss": 0.6705, "step": 13929 }, { "epoch": 0.36, "grad_norm": 1.410786509513855, "learning_rate": 1.4895878768431509e-05, "loss": 0.4548, "step": 13930 }, { "epoch": 0.36, "grad_norm": 3.393584728240967, "learning_rate": 1.4895154905204079e-05, "loss": 0.7936, "step": 13931 }, { "epoch": 0.36, "grad_norm": 1.6469205617904663, "learning_rate": 1.4894431008242319e-05, "loss": 0.4331, "step": 13932 }, { "epoch": 0.36, "grad_norm": 1.281748652458191, "learning_rate": 1.4893707077551213e-05, "loss": 0.5485, "step": 13933 }, { "epoch": 0.36, "grad_norm": 2.120039939880371, "learning_rate": 1.4892983113135753e-05, "loss": 0.6288, "step": 13934 }, { "epoch": 0.36, "grad_norm": 1.6450319290161133, "learning_rate": 1.4892259115000927e-05, "loss": 0.5562, "step": 13935 }, { "epoch": 0.36, "grad_norm": 1.908953309059143, "learning_rate": 1.4891535083151727e-05, "loss": 0.4691, "step": 13936 }, { "epoch": 0.36, "grad_norm": 5.406191349029541, "learning_rate": 1.4890811017593138e-05, "loss": 0.7965, "step": 13937 }, { "epoch": 0.36, "grad_norm": 1.3880788087844849, "learning_rate": 1.4890086918330153e-05, "loss": 0.6283, "step": 13938 }, { "epoch": 0.36, "grad_norm": 1.594313144683838, "learning_rate": 1.488936278536776e-05, "loss": 0.5044, "step": 13939 }, { "epoch": 0.36, "grad_norm": 1.7327731847763062, "learning_rate": 1.4888638618710953e-05, "loss": 0.6252, "step": 13940 }, { "epoch": 0.36, "grad_norm": 2.944477081298828, "learning_rate": 1.488791441836472e-05, "loss": 0.6033, "step": 13941 }, { "epoch": 0.36, "grad_norm": 6.223230838775635, "learning_rate": 1.488719018433405e-05, "loss": 0.6387, "step": 13942 }, { "epoch": 0.36, "grad_norm": 2.608806848526001, "learning_rate": 1.4886465916623936e-05, "loss": 0.5145, "step": 13943 }, { "epoch": 0.36, "grad_norm": 3.843341588973999, "learning_rate": 1.4885741615239368e-05, "loss": 0.7111, "step": 13944 }, { "epoch": 0.36, "grad_norm": 1.9162003993988037, "learning_rate": 1.4885017280185338e-05, "loss": 0.7126, "step": 13945 }, { "epoch": 0.36, "grad_norm": 3.6117870807647705, "learning_rate": 1.488429291146684e-05, "loss": 0.6921, "step": 13946 }, { "epoch": 0.36, "grad_norm": 1.3628880977630615, "learning_rate": 1.4883568509088867e-05, "loss": 0.4993, "step": 13947 }, { "epoch": 0.36, "grad_norm": 1.7120840549468994, "learning_rate": 1.4882844073056402e-05, "loss": 0.6642, "step": 13948 }, { "epoch": 0.36, "grad_norm": 3.335571527481079, "learning_rate": 1.4882119603374445e-05, "loss": 0.5635, "step": 13949 }, { "epoch": 0.36, "grad_norm": 2.244950294494629, "learning_rate": 1.4881395100047989e-05, "loss": 0.4844, "step": 13950 }, { "epoch": 0.36, "grad_norm": 1.164033055305481, "learning_rate": 1.4880670563082022e-05, "loss": 0.5915, "step": 13951 }, { "epoch": 0.36, "grad_norm": 3.103984832763672, "learning_rate": 1.4879945992481541e-05, "loss": 0.5987, "step": 13952 }, { "epoch": 0.36, "grad_norm": 1.5042566061019897, "learning_rate": 1.487922138825154e-05, "loss": 0.5597, "step": 13953 }, { "epoch": 0.36, "grad_norm": 2.529763698577881, "learning_rate": 1.4878496750397007e-05, "loss": 0.5199, "step": 13954 }, { "epoch": 0.36, "grad_norm": 2.1431586742401123, "learning_rate": 1.487777207892294e-05, "loss": 0.727, "step": 13955 }, { "epoch": 0.36, "grad_norm": 1.8856922388076782, "learning_rate": 1.4877047373834334e-05, "loss": 0.5902, "step": 13956 }, { "epoch": 0.36, "grad_norm": 3.2134242057800293, "learning_rate": 1.4876322635136182e-05, "loss": 0.4332, "step": 13957 }, { "epoch": 0.36, "grad_norm": 1.2959175109863281, "learning_rate": 1.4875597862833475e-05, "loss": 0.5631, "step": 13958 }, { "epoch": 0.36, "grad_norm": 1.7885737419128418, "learning_rate": 1.4874873056931214e-05, "loss": 0.5464, "step": 13959 }, { "epoch": 0.36, "grad_norm": 10.182140350341797, "learning_rate": 1.487414821743439e-05, "loss": 0.7438, "step": 13960 }, { "epoch": 0.36, "grad_norm": 3.426248073577881, "learning_rate": 1.4873423344347999e-05, "loss": 0.7742, "step": 13961 }, { "epoch": 0.36, "grad_norm": 4.151254177093506, "learning_rate": 1.4872698437677034e-05, "loss": 0.7461, "step": 13962 }, { "epoch": 0.36, "grad_norm": 1.52980637550354, "learning_rate": 1.4871973497426492e-05, "loss": 0.6745, "step": 13963 }, { "epoch": 0.36, "grad_norm": 5.17825984954834, "learning_rate": 1.4871248523601374e-05, "loss": 0.7989, "step": 13964 }, { "epoch": 0.36, "grad_norm": 7.549812316894531, "learning_rate": 1.4870523516206668e-05, "loss": 0.5855, "step": 13965 }, { "epoch": 0.36, "grad_norm": 2.385622501373291, "learning_rate": 1.4869798475247375e-05, "loss": 0.5219, "step": 13966 }, { "epoch": 0.36, "grad_norm": 2.3792717456817627, "learning_rate": 1.4869073400728491e-05, "loss": 0.8231, "step": 13967 }, { "epoch": 0.36, "grad_norm": 1.5753642320632935, "learning_rate": 1.4868348292655012e-05, "loss": 0.6571, "step": 13968 }, { "epoch": 0.36, "grad_norm": 1.9892139434814453, "learning_rate": 1.4867623151031935e-05, "loss": 0.5465, "step": 13969 }, { "epoch": 0.36, "grad_norm": 1.751270055770874, "learning_rate": 1.4866897975864256e-05, "loss": 0.6292, "step": 13970 }, { "epoch": 0.36, "grad_norm": 2.050032615661621, "learning_rate": 1.4866172767156975e-05, "loss": 0.6808, "step": 13971 }, { "epoch": 0.36, "grad_norm": 1.580414891242981, "learning_rate": 1.486544752491509e-05, "loss": 0.6999, "step": 13972 }, { "epoch": 0.36, "grad_norm": 1.0280195474624634, "learning_rate": 1.4864722249143595e-05, "loss": 0.6839, "step": 13973 }, { "epoch": 0.36, "grad_norm": 2.5319128036499023, "learning_rate": 1.4863996939847492e-05, "loss": 0.6592, "step": 13974 }, { "epoch": 0.36, "grad_norm": 2.141706943511963, "learning_rate": 1.4863271597031777e-05, "loss": 0.531, "step": 13975 }, { "epoch": 0.36, "grad_norm": 2.0240085124969482, "learning_rate": 1.486254622070145e-05, "loss": 0.5236, "step": 13976 }, { "epoch": 0.36, "grad_norm": 2.719207286834717, "learning_rate": 1.4861820810861508e-05, "loss": 0.818, "step": 13977 }, { "epoch": 0.36, "grad_norm": 0.9928183555603027, "learning_rate": 1.4861095367516954e-05, "loss": 0.6196, "step": 13978 }, { "epoch": 0.36, "grad_norm": 3.3238372802734375, "learning_rate": 1.4860369890672784e-05, "loss": 0.6665, "step": 13979 }, { "epoch": 0.36, "grad_norm": 1.4045132398605347, "learning_rate": 1.4859644380333995e-05, "loss": 0.6757, "step": 13980 }, { "epoch": 0.36, "grad_norm": 3.848299026489258, "learning_rate": 1.4858918836505595e-05, "loss": 0.7035, "step": 13981 }, { "epoch": 0.36, "grad_norm": 2.4122731685638428, "learning_rate": 1.4858193259192577e-05, "loss": 0.7951, "step": 13982 }, { "epoch": 0.36, "grad_norm": 1.2870471477508545, "learning_rate": 1.4857467648399941e-05, "loss": 0.5718, "step": 13983 }, { "epoch": 0.36, "grad_norm": 4.818072319030762, "learning_rate": 1.4856742004132694e-05, "loss": 0.6792, "step": 13984 }, { "epoch": 0.36, "grad_norm": 2.1281800270080566, "learning_rate": 1.485601632639583e-05, "loss": 0.6666, "step": 13985 }, { "epoch": 0.36, "grad_norm": 1.881445050239563, "learning_rate": 1.4855290615194354e-05, "loss": 0.6463, "step": 13986 }, { "epoch": 0.36, "grad_norm": 2.861515522003174, "learning_rate": 1.4854564870533263e-05, "loss": 0.619, "step": 13987 }, { "epoch": 0.36, "grad_norm": 3.9343349933624268, "learning_rate": 1.4853839092417564e-05, "loss": 0.6278, "step": 13988 }, { "epoch": 0.36, "grad_norm": 3.9993972778320312, "learning_rate": 1.4853113280852252e-05, "loss": 0.5557, "step": 13989 }, { "epoch": 0.36, "grad_norm": 5.159435272216797, "learning_rate": 1.4852387435842332e-05, "loss": 0.7256, "step": 13990 }, { "epoch": 0.36, "grad_norm": 5.82498025894165, "learning_rate": 1.485166155739281e-05, "loss": 0.65, "step": 13991 }, { "epoch": 0.36, "grad_norm": 1.480995535850525, "learning_rate": 1.4850935645508682e-05, "loss": 0.4801, "step": 13992 }, { "epoch": 0.36, "grad_norm": 3.1136839389801025, "learning_rate": 1.4850209700194955e-05, "loss": 0.5065, "step": 13993 }, { "epoch": 0.36, "grad_norm": 7.693317413330078, "learning_rate": 1.4849483721456627e-05, "loss": 0.5876, "step": 13994 }, { "epoch": 0.36, "grad_norm": 2.9398090839385986, "learning_rate": 1.4848757709298705e-05, "loss": 0.6874, "step": 13995 }, { "epoch": 0.36, "grad_norm": 5.154563903808594, "learning_rate": 1.4848031663726192e-05, "loss": 0.5808, "step": 13996 }, { "epoch": 0.36, "grad_norm": 1.7588690519332886, "learning_rate": 1.4847305584744092e-05, "loss": 0.421, "step": 13997 }, { "epoch": 0.36, "grad_norm": 1.659283995628357, "learning_rate": 1.4846579472357405e-05, "loss": 0.6293, "step": 13998 }, { "epoch": 0.36, "grad_norm": 1.7159384489059448, "learning_rate": 1.4845853326571138e-05, "loss": 0.5424, "step": 13999 }, { "epoch": 0.36, "grad_norm": 1.544070839881897, "learning_rate": 1.4845127147390294e-05, "loss": 0.566, "step": 14000 }, { "epoch": 0.36, "grad_norm": 1.9811067581176758, "learning_rate": 1.4844400934819879e-05, "loss": 0.4588, "step": 14001 }, { "epoch": 0.36, "grad_norm": 1.6734857559204102, "learning_rate": 1.4843674688864895e-05, "loss": 0.4788, "step": 14002 }, { "epoch": 0.36, "grad_norm": 1.4203944206237793, "learning_rate": 1.4842948409530348e-05, "loss": 0.5445, "step": 14003 }, { "epoch": 0.36, "grad_norm": 3.134204626083374, "learning_rate": 1.4842222096821243e-05, "loss": 0.4602, "step": 14004 }, { "epoch": 0.36, "grad_norm": 8.913202285766602, "learning_rate": 1.4841495750742589e-05, "loss": 0.7033, "step": 14005 }, { "epoch": 0.36, "grad_norm": 1.1512137651443481, "learning_rate": 1.4840769371299383e-05, "loss": 0.4534, "step": 14006 }, { "epoch": 0.36, "grad_norm": 3.4100241661071777, "learning_rate": 1.4840042958496639e-05, "loss": 0.781, "step": 14007 }, { "epoch": 0.36, "grad_norm": 1.1980630159378052, "learning_rate": 1.483931651233936e-05, "loss": 0.5305, "step": 14008 }, { "epoch": 0.36, "grad_norm": 1.1416339874267578, "learning_rate": 1.4838590032832549e-05, "loss": 0.5507, "step": 14009 }, { "epoch": 0.36, "grad_norm": 1.489907145500183, "learning_rate": 1.483786351998122e-05, "loss": 0.4852, "step": 14010 }, { "epoch": 0.36, "grad_norm": 2.889436960220337, "learning_rate": 1.483713697379037e-05, "loss": 0.6929, "step": 14011 }, { "epoch": 0.36, "grad_norm": 3.2859880924224854, "learning_rate": 1.4836410394265012e-05, "loss": 0.7042, "step": 14012 }, { "epoch": 0.36, "grad_norm": 1.6479551792144775, "learning_rate": 1.4835683781410155e-05, "loss": 0.6116, "step": 14013 }, { "epoch": 0.36, "grad_norm": 2.1203365325927734, "learning_rate": 1.48349571352308e-05, "loss": 0.5402, "step": 14014 }, { "epoch": 0.36, "grad_norm": 3.982942581176758, "learning_rate": 1.4834230455731962e-05, "loss": 0.7487, "step": 14015 }, { "epoch": 0.36, "grad_norm": 2.08416485786438, "learning_rate": 1.483350374291864e-05, "loss": 0.6139, "step": 14016 }, { "epoch": 0.36, "grad_norm": 2.9621939659118652, "learning_rate": 1.4832776996795852e-05, "loss": 0.6079, "step": 14017 }, { "epoch": 0.36, "grad_norm": 3.6969447135925293, "learning_rate": 1.4832050217368598e-05, "loss": 0.5557, "step": 14018 }, { "epoch": 0.36, "grad_norm": 6.517960548400879, "learning_rate": 1.4831323404641889e-05, "loss": 0.6531, "step": 14019 }, { "epoch": 0.36, "grad_norm": 1.449552297592163, "learning_rate": 1.4830596558620738e-05, "loss": 0.5469, "step": 14020 }, { "epoch": 0.36, "grad_norm": 1.6001015901565552, "learning_rate": 1.4829869679310148e-05, "loss": 0.6306, "step": 14021 }, { "epoch": 0.36, "grad_norm": 4.287232398986816, "learning_rate": 1.482914276671513e-05, "loss": 0.6931, "step": 14022 }, { "epoch": 0.36, "grad_norm": 1.6020350456237793, "learning_rate": 1.4828415820840694e-05, "loss": 0.5054, "step": 14023 }, { "epoch": 0.36, "grad_norm": 2.2807719707489014, "learning_rate": 1.482768884169185e-05, "loss": 0.6509, "step": 14024 }, { "epoch": 0.36, "grad_norm": 3.2078371047973633, "learning_rate": 1.482696182927361e-05, "loss": 0.4979, "step": 14025 }, { "epoch": 0.36, "grad_norm": 1.280246376991272, "learning_rate": 1.4826234783590978e-05, "loss": 0.4939, "step": 14026 }, { "epoch": 0.36, "grad_norm": 1.9765746593475342, "learning_rate": 1.4825507704648972e-05, "loss": 0.4819, "step": 14027 }, { "epoch": 0.36, "grad_norm": 2.665311336517334, "learning_rate": 1.4824780592452595e-05, "loss": 0.6795, "step": 14028 }, { "epoch": 0.36, "grad_norm": 1.6904748678207397, "learning_rate": 1.4824053447006863e-05, "loss": 0.6185, "step": 14029 }, { "epoch": 0.36, "grad_norm": 1.651886224746704, "learning_rate": 1.4823326268316785e-05, "loss": 0.5729, "step": 14030 }, { "epoch": 0.36, "grad_norm": 1.9636576175689697, "learning_rate": 1.482259905638737e-05, "loss": 0.747, "step": 14031 }, { "epoch": 0.36, "grad_norm": 5.202960014343262, "learning_rate": 1.4821871811223639e-05, "loss": 0.7211, "step": 14032 }, { "epoch": 0.36, "grad_norm": 2.4843204021453857, "learning_rate": 1.482114453283059e-05, "loss": 0.6345, "step": 14033 }, { "epoch": 0.36, "grad_norm": 2.1578562259674072, "learning_rate": 1.4820417221213247e-05, "loss": 0.6448, "step": 14034 }, { "epoch": 0.36, "grad_norm": 2.261892795562744, "learning_rate": 1.4819689876376613e-05, "loss": 0.5232, "step": 14035 }, { "epoch": 0.36, "grad_norm": 1.5319632291793823, "learning_rate": 1.4818962498325706e-05, "loss": 0.4844, "step": 14036 }, { "epoch": 0.36, "grad_norm": 8.868600845336914, "learning_rate": 1.4818235087065538e-05, "loss": 0.6321, "step": 14037 }, { "epoch": 0.36, "grad_norm": 1.2349581718444824, "learning_rate": 1.481750764260112e-05, "loss": 0.6012, "step": 14038 }, { "epoch": 0.36, "grad_norm": 4.886639595031738, "learning_rate": 1.4816780164937466e-05, "loss": 0.7756, "step": 14039 }, { "epoch": 0.36, "grad_norm": 3.5781564712524414, "learning_rate": 1.4816052654079586e-05, "loss": 0.6803, "step": 14040 }, { "epoch": 0.36, "grad_norm": 1.2993241548538208, "learning_rate": 1.4815325110032504e-05, "loss": 0.7338, "step": 14041 }, { "epoch": 0.36, "grad_norm": 3.2213289737701416, "learning_rate": 1.481459753280122e-05, "loss": 0.7083, "step": 14042 }, { "epoch": 0.36, "grad_norm": 5.817113399505615, "learning_rate": 1.4813869922390757e-05, "loss": 0.4616, "step": 14043 }, { "epoch": 0.36, "grad_norm": 1.3472682237625122, "learning_rate": 1.481314227880613e-05, "loss": 0.5721, "step": 14044 }, { "epoch": 0.36, "grad_norm": 2.0596303939819336, "learning_rate": 1.4812414602052346e-05, "loss": 0.4138, "step": 14045 }, { "epoch": 0.36, "grad_norm": 2.508457660675049, "learning_rate": 1.4811686892134427e-05, "loss": 0.5923, "step": 14046 }, { "epoch": 0.36, "grad_norm": 8.690642356872559, "learning_rate": 1.4810959149057382e-05, "loss": 0.5617, "step": 14047 }, { "epoch": 0.36, "grad_norm": 1.421862006187439, "learning_rate": 1.481023137282623e-05, "loss": 0.644, "step": 14048 }, { "epoch": 0.36, "grad_norm": 2.151214122772217, "learning_rate": 1.4809503563445985e-05, "loss": 0.6058, "step": 14049 }, { "epoch": 0.36, "grad_norm": 1.4289740324020386, "learning_rate": 1.4808775720921664e-05, "loss": 0.5819, "step": 14050 }, { "epoch": 0.36, "grad_norm": 3.5937163829803467, "learning_rate": 1.4808047845258284e-05, "loss": 0.5514, "step": 14051 }, { "epoch": 0.36, "grad_norm": 2.807468891143799, "learning_rate": 1.4807319936460856e-05, "loss": 0.5388, "step": 14052 }, { "epoch": 0.36, "grad_norm": 1.596737265586853, "learning_rate": 1.4806591994534398e-05, "loss": 0.4973, "step": 14053 }, { "epoch": 0.36, "grad_norm": 1.0479031801223755, "learning_rate": 1.4805864019483931e-05, "loss": 0.6392, "step": 14054 }, { "epoch": 0.36, "grad_norm": 8.88907241821289, "learning_rate": 1.4805136011314466e-05, "loss": 0.6812, "step": 14055 }, { "epoch": 0.36, "grad_norm": 1.1986221075057983, "learning_rate": 1.4804407970031028e-05, "loss": 0.4735, "step": 14056 }, { "epoch": 0.36, "grad_norm": 2.0148963928222656, "learning_rate": 1.4803679895638623e-05, "loss": 0.7538, "step": 14057 }, { "epoch": 0.36, "grad_norm": 5.465276718139648, "learning_rate": 1.4802951788142277e-05, "loss": 0.635, "step": 14058 }, { "epoch": 0.36, "grad_norm": 1.6739258766174316, "learning_rate": 1.4802223647547003e-05, "loss": 0.6161, "step": 14059 }, { "epoch": 0.36, "grad_norm": 2.0968127250671387, "learning_rate": 1.480149547385782e-05, "loss": 0.4733, "step": 14060 }, { "epoch": 0.36, "grad_norm": 1.456518292427063, "learning_rate": 1.4800767267079748e-05, "loss": 0.4564, "step": 14061 }, { "epoch": 0.36, "grad_norm": 1.5784133672714233, "learning_rate": 1.4800039027217805e-05, "loss": 0.4804, "step": 14062 }, { "epoch": 0.36, "grad_norm": 2.212686777114868, "learning_rate": 1.4799310754277008e-05, "loss": 0.6407, "step": 14063 }, { "epoch": 0.36, "grad_norm": 1.2242341041564941, "learning_rate": 1.4798582448262375e-05, "loss": 0.5763, "step": 14064 }, { "epoch": 0.36, "grad_norm": 1.059946894645691, "learning_rate": 1.4797854109178929e-05, "loss": 0.5833, "step": 14065 }, { "epoch": 0.36, "grad_norm": 1.9002529382705688, "learning_rate": 1.4797125737031686e-05, "loss": 0.561, "step": 14066 }, { "epoch": 0.36, "grad_norm": 1.3360224962234497, "learning_rate": 1.4796397331825665e-05, "loss": 0.5378, "step": 14067 }, { "epoch": 0.36, "grad_norm": 1.232279658317566, "learning_rate": 1.4795668893565889e-05, "loss": 0.5566, "step": 14068 }, { "epoch": 0.36, "grad_norm": 2.110405206680298, "learning_rate": 1.4794940422257373e-05, "loss": 0.6487, "step": 14069 }, { "epoch": 0.36, "grad_norm": 1.5398186445236206, "learning_rate": 1.4794211917905144e-05, "loss": 0.486, "step": 14070 }, { "epoch": 0.36, "grad_norm": 1.7067815065383911, "learning_rate": 1.4793483380514215e-05, "loss": 0.711, "step": 14071 }, { "epoch": 0.36, "grad_norm": 1.1527758836746216, "learning_rate": 1.4792754810089611e-05, "loss": 0.4989, "step": 14072 }, { "epoch": 0.36, "grad_norm": 2.2670035362243652, "learning_rate": 1.4792026206636353e-05, "loss": 0.6651, "step": 14073 }, { "epoch": 0.36, "grad_norm": 5.194073677062988, "learning_rate": 1.479129757015946e-05, "loss": 0.92, "step": 14074 }, { "epoch": 0.36, "grad_norm": 6.456017971038818, "learning_rate": 1.4790568900663958e-05, "loss": 0.7748, "step": 14075 }, { "epoch": 0.36, "grad_norm": 2.263718366622925, "learning_rate": 1.4789840198154859e-05, "loss": 0.723, "step": 14076 }, { "epoch": 0.36, "grad_norm": 1.6785352230072021, "learning_rate": 1.4789111462637194e-05, "loss": 0.6624, "step": 14077 }, { "epoch": 0.36, "grad_norm": 3.948035717010498, "learning_rate": 1.4788382694115982e-05, "loss": 0.7551, "step": 14078 }, { "epoch": 0.36, "grad_norm": 1.51953125, "learning_rate": 1.4787653892596243e-05, "loss": 0.6137, "step": 14079 }, { "epoch": 0.36, "grad_norm": 1.7709784507751465, "learning_rate": 1.4786925058083001e-05, "loss": 0.5002, "step": 14080 }, { "epoch": 0.36, "grad_norm": 2.8593497276306152, "learning_rate": 1.4786196190581281e-05, "loss": 0.5519, "step": 14081 }, { "epoch": 0.36, "grad_norm": 3.3236024379730225, "learning_rate": 1.4785467290096103e-05, "loss": 0.6378, "step": 14082 }, { "epoch": 0.36, "grad_norm": 1.5777429342269897, "learning_rate": 1.478473835663249e-05, "loss": 0.6521, "step": 14083 }, { "epoch": 0.36, "grad_norm": 8.705463409423828, "learning_rate": 1.4784009390195467e-05, "loss": 0.436, "step": 14084 }, { "epoch": 0.36, "grad_norm": 1.7943453788757324, "learning_rate": 1.4783280390790058e-05, "loss": 0.5559, "step": 14085 }, { "epoch": 0.36, "grad_norm": 2.94044828414917, "learning_rate": 1.4782551358421283e-05, "loss": 0.7356, "step": 14086 }, { "epoch": 0.36, "grad_norm": 2.545276403427124, "learning_rate": 1.4781822293094172e-05, "loss": 0.7413, "step": 14087 }, { "epoch": 0.36, "grad_norm": 2.387153387069702, "learning_rate": 1.4781093194813743e-05, "loss": 0.4607, "step": 14088 }, { "epoch": 0.36, "grad_norm": 1.2567706108093262, "learning_rate": 1.4780364063585027e-05, "loss": 0.5999, "step": 14089 }, { "epoch": 0.36, "grad_norm": 1.9730229377746582, "learning_rate": 1.4779634899413044e-05, "loss": 0.6118, "step": 14090 }, { "epoch": 0.36, "grad_norm": 5.839680194854736, "learning_rate": 1.4778905702302817e-05, "loss": 0.591, "step": 14091 }, { "epoch": 0.36, "grad_norm": 1.1602416038513184, "learning_rate": 1.4778176472259377e-05, "loss": 0.4725, "step": 14092 }, { "epoch": 0.36, "grad_norm": 1.2252377271652222, "learning_rate": 1.4777447209287747e-05, "loss": 0.6748, "step": 14093 }, { "epoch": 0.36, "grad_norm": 1.9313247203826904, "learning_rate": 1.4776717913392951e-05, "loss": 0.6308, "step": 14094 }, { "epoch": 0.36, "grad_norm": 1.7729288339614868, "learning_rate": 1.4775988584580017e-05, "loss": 0.7171, "step": 14095 }, { "epoch": 0.36, "grad_norm": 2.644183874130249, "learning_rate": 1.477525922285397e-05, "loss": 0.6287, "step": 14096 }, { "epoch": 0.36, "grad_norm": 8.15859603881836, "learning_rate": 1.4774529828219837e-05, "loss": 0.475, "step": 14097 }, { "epoch": 0.36, "grad_norm": 7.6537041664123535, "learning_rate": 1.4773800400682643e-05, "loss": 0.5641, "step": 14098 }, { "epoch": 0.36, "grad_norm": 1.799765706062317, "learning_rate": 1.4773070940247416e-05, "loss": 0.4421, "step": 14099 }, { "epoch": 0.36, "grad_norm": 2.4812612533569336, "learning_rate": 1.4772341446919186e-05, "loss": 0.6993, "step": 14100 }, { "epoch": 0.36, "grad_norm": 1.5965900421142578, "learning_rate": 1.4771611920702973e-05, "loss": 0.536, "step": 14101 }, { "epoch": 0.36, "grad_norm": 3.009260416030884, "learning_rate": 1.4770882361603808e-05, "loss": 0.5643, "step": 14102 }, { "epoch": 0.36, "grad_norm": 2.2478933334350586, "learning_rate": 1.4770152769626723e-05, "loss": 0.5536, "step": 14103 }, { "epoch": 0.36, "grad_norm": 1.715314507484436, "learning_rate": 1.4769423144776737e-05, "loss": 0.6598, "step": 14104 }, { "epoch": 0.36, "grad_norm": 1.6483172178268433, "learning_rate": 1.4768693487058887e-05, "loss": 0.4617, "step": 14105 }, { "epoch": 0.36, "grad_norm": 1.3477610349655151, "learning_rate": 1.4767963796478198e-05, "loss": 0.5287, "step": 14106 }, { "epoch": 0.36, "grad_norm": 1.7618095874786377, "learning_rate": 1.4767234073039696e-05, "loss": 0.6719, "step": 14107 }, { "epoch": 0.36, "grad_norm": 3.8773202896118164, "learning_rate": 1.4766504316748412e-05, "loss": 0.6357, "step": 14108 }, { "epoch": 0.36, "grad_norm": 1.8686528205871582, "learning_rate": 1.4765774527609375e-05, "loss": 0.4626, "step": 14109 }, { "epoch": 0.36, "grad_norm": 2.462307929992676, "learning_rate": 1.4765044705627614e-05, "loss": 0.7613, "step": 14110 }, { "epoch": 0.36, "grad_norm": 4.494183540344238, "learning_rate": 1.476431485080816e-05, "loss": 0.4721, "step": 14111 }, { "epoch": 0.36, "grad_norm": 1.999914526939392, "learning_rate": 1.476358496315604e-05, "loss": 0.7134, "step": 14112 }, { "epoch": 0.36, "grad_norm": 3.726734161376953, "learning_rate": 1.4762855042676285e-05, "loss": 0.582, "step": 14113 }, { "epoch": 0.36, "grad_norm": 4.790481090545654, "learning_rate": 1.4762125089373926e-05, "loss": 0.6952, "step": 14114 }, { "epoch": 0.36, "grad_norm": 2.2253968715667725, "learning_rate": 1.4761395103253993e-05, "loss": 0.6723, "step": 14115 }, { "epoch": 0.36, "grad_norm": 1.8653490543365479, "learning_rate": 1.4760665084321515e-05, "loss": 0.6318, "step": 14116 }, { "epoch": 0.36, "grad_norm": 5.200174331665039, "learning_rate": 1.4759935032581525e-05, "loss": 0.6435, "step": 14117 }, { "epoch": 0.36, "grad_norm": 3.57498836517334, "learning_rate": 1.4759204948039051e-05, "loss": 0.4127, "step": 14118 }, { "epoch": 0.36, "grad_norm": 1.0959972143173218, "learning_rate": 1.4758474830699131e-05, "loss": 0.5434, "step": 14119 }, { "epoch": 0.36, "grad_norm": 1.3245631456375122, "learning_rate": 1.475774468056679e-05, "loss": 0.5699, "step": 14120 }, { "epoch": 0.36, "grad_norm": 1.7671422958374023, "learning_rate": 1.475701449764706e-05, "loss": 0.7378, "step": 14121 }, { "epoch": 0.36, "grad_norm": 1.8930423259735107, "learning_rate": 1.4756284281944977e-05, "loss": 0.5766, "step": 14122 }, { "epoch": 0.36, "grad_norm": 1.5281590223312378, "learning_rate": 1.4755554033465571e-05, "loss": 0.4557, "step": 14123 }, { "epoch": 0.36, "grad_norm": 1.3527849912643433, "learning_rate": 1.4754823752213874e-05, "loss": 0.5465, "step": 14124 }, { "epoch": 0.36, "grad_norm": 3.9479928016662598, "learning_rate": 1.4754093438194917e-05, "loss": 0.8613, "step": 14125 }, { "epoch": 0.36, "grad_norm": 1.6730130910873413, "learning_rate": 1.475336309141374e-05, "loss": 0.4825, "step": 14126 }, { "epoch": 0.36, "grad_norm": 1.6804827451705933, "learning_rate": 1.4752632711875365e-05, "loss": 0.5672, "step": 14127 }, { "epoch": 0.36, "grad_norm": 6.754593372344971, "learning_rate": 1.4751902299584832e-05, "loss": 0.8464, "step": 14128 }, { "epoch": 0.36, "grad_norm": 1.3166872262954712, "learning_rate": 1.4751171854547177e-05, "loss": 0.6209, "step": 14129 }, { "epoch": 0.36, "grad_norm": 1.2997872829437256, "learning_rate": 1.4750441376767428e-05, "loss": 0.454, "step": 14130 }, { "epoch": 0.36, "grad_norm": 1.2902977466583252, "learning_rate": 1.4749710866250623e-05, "loss": 0.6145, "step": 14131 }, { "epoch": 0.36, "grad_norm": 1.9142310619354248, "learning_rate": 1.4748980323001794e-05, "loss": 0.5421, "step": 14132 }, { "epoch": 0.36, "grad_norm": 1.679448127746582, "learning_rate": 1.4748249747025976e-05, "loss": 0.4514, "step": 14133 }, { "epoch": 0.36, "grad_norm": 9.789088249206543, "learning_rate": 1.4747519138328206e-05, "loss": 0.6064, "step": 14134 }, { "epoch": 0.36, "grad_norm": 3.2729713916778564, "learning_rate": 1.4746788496913513e-05, "loss": 0.7606, "step": 14135 }, { "epoch": 0.36, "grad_norm": 2.0574820041656494, "learning_rate": 1.474605782278694e-05, "loss": 0.6715, "step": 14136 }, { "epoch": 0.36, "grad_norm": 4.126162528991699, "learning_rate": 1.4745327115953516e-05, "loss": 0.3473, "step": 14137 }, { "epoch": 0.36, "grad_norm": 1.4063053131103516, "learning_rate": 1.4744596376418279e-05, "loss": 0.3949, "step": 14138 }, { "epoch": 0.36, "grad_norm": 1.9097919464111328, "learning_rate": 1.4743865604186266e-05, "loss": 0.6873, "step": 14139 }, { "epoch": 0.36, "grad_norm": 4.191584587097168, "learning_rate": 1.4743134799262509e-05, "loss": 0.6646, "step": 14140 }, { "epoch": 0.36, "grad_norm": 2.9752397537231445, "learning_rate": 1.4742403961652048e-05, "loss": 0.8493, "step": 14141 }, { "epoch": 0.36, "grad_norm": 1.6466177701950073, "learning_rate": 1.4741673091359918e-05, "loss": 0.5288, "step": 14142 }, { "epoch": 0.36, "grad_norm": 1.7205485105514526, "learning_rate": 1.4740942188391159e-05, "loss": 0.5669, "step": 14143 }, { "epoch": 0.36, "grad_norm": 1.7830791473388672, "learning_rate": 1.47402112527508e-05, "loss": 0.4937, "step": 14144 }, { "epoch": 0.36, "grad_norm": 3.25113844871521, "learning_rate": 1.4739480284443886e-05, "loss": 0.6588, "step": 14145 }, { "epoch": 0.36, "grad_norm": 3.0805435180664062, "learning_rate": 1.4738749283475452e-05, "loss": 0.5964, "step": 14146 }, { "epoch": 0.36, "grad_norm": 3.668905258178711, "learning_rate": 1.4738018249850534e-05, "loss": 0.5789, "step": 14147 }, { "epoch": 0.36, "grad_norm": 1.311683177947998, "learning_rate": 1.473728718357417e-05, "loss": 0.7247, "step": 14148 }, { "epoch": 0.36, "grad_norm": 1.981155276298523, "learning_rate": 1.47365560846514e-05, "loss": 0.6299, "step": 14149 }, { "epoch": 0.36, "grad_norm": 3.0959835052490234, "learning_rate": 1.4735824953087265e-05, "loss": 0.4871, "step": 14150 }, { "epoch": 0.36, "grad_norm": 9.869626998901367, "learning_rate": 1.4735093788886797e-05, "loss": 0.7009, "step": 14151 }, { "epoch": 0.36, "grad_norm": 3.0501389503479004, "learning_rate": 1.4734362592055034e-05, "loss": 0.4854, "step": 14152 }, { "epoch": 0.36, "grad_norm": 1.2876739501953125, "learning_rate": 1.4733631362597023e-05, "loss": 0.6395, "step": 14153 }, { "epoch": 0.36, "grad_norm": 2.0437960624694824, "learning_rate": 1.4732900100517795e-05, "loss": 0.5891, "step": 14154 }, { "epoch": 0.36, "grad_norm": 1.2215766906738281, "learning_rate": 1.4732168805822396e-05, "loss": 0.5036, "step": 14155 }, { "epoch": 0.36, "grad_norm": 1.7040036916732788, "learning_rate": 1.4731437478515862e-05, "loss": 0.5897, "step": 14156 }, { "epoch": 0.36, "grad_norm": 2.311458110809326, "learning_rate": 1.4730706118603231e-05, "loss": 0.5954, "step": 14157 }, { "epoch": 0.36, "grad_norm": 1.5826373100280762, "learning_rate": 1.4729974726089548e-05, "loss": 0.7563, "step": 14158 }, { "epoch": 0.36, "grad_norm": 1.2677847146987915, "learning_rate": 1.472924330097985e-05, "loss": 0.634, "step": 14159 }, { "epoch": 0.36, "grad_norm": 1.5735341310501099, "learning_rate": 1.472851184327918e-05, "loss": 0.619, "step": 14160 }, { "epoch": 0.36, "grad_norm": 1.2519906759262085, "learning_rate": 1.4727780352992575e-05, "loss": 0.4225, "step": 14161 }, { "epoch": 0.36, "grad_norm": 1.8240165710449219, "learning_rate": 1.4727048830125081e-05, "loss": 0.5505, "step": 14162 }, { "epoch": 0.36, "grad_norm": 8.859983444213867, "learning_rate": 1.4726317274681734e-05, "loss": 0.4694, "step": 14163 }, { "epoch": 0.36, "grad_norm": 3.4625251293182373, "learning_rate": 1.4725585686667576e-05, "loss": 0.5213, "step": 14164 }, { "epoch": 0.36, "grad_norm": 1.1882551908493042, "learning_rate": 1.4724854066087655e-05, "loss": 0.448, "step": 14165 }, { "epoch": 0.36, "grad_norm": 2.163527727127075, "learning_rate": 1.4724122412947004e-05, "loss": 0.7778, "step": 14166 }, { "epoch": 0.36, "grad_norm": 2.4699647426605225, "learning_rate": 1.4723390727250672e-05, "loss": 0.7535, "step": 14167 }, { "epoch": 0.36, "grad_norm": 1.2724729776382446, "learning_rate": 1.4722659009003698e-05, "loss": 0.6817, "step": 14168 }, { "epoch": 0.36, "grad_norm": 9.87271785736084, "learning_rate": 1.4721927258211122e-05, "loss": 0.6373, "step": 14169 }, { "epoch": 0.36, "grad_norm": 2.691328287124634, "learning_rate": 1.4721195474877994e-05, "loss": 0.554, "step": 14170 }, { "epoch": 0.36, "grad_norm": 1.2412660121917725, "learning_rate": 1.4720463659009352e-05, "loss": 0.5302, "step": 14171 }, { "epoch": 0.36, "grad_norm": 1.7094182968139648, "learning_rate": 1.471973181061024e-05, "loss": 0.5043, "step": 14172 }, { "epoch": 0.36, "grad_norm": 5.9112067222595215, "learning_rate": 1.47189999296857e-05, "loss": 0.616, "step": 14173 }, { "epoch": 0.36, "grad_norm": 1.1470921039581299, "learning_rate": 1.4718268016240778e-05, "loss": 0.5838, "step": 14174 }, { "epoch": 0.36, "grad_norm": 1.1717349290847778, "learning_rate": 1.4717536070280519e-05, "loss": 0.6467, "step": 14175 }, { "epoch": 0.36, "grad_norm": 2.146026372909546, "learning_rate": 1.4716804091809964e-05, "loss": 0.6339, "step": 14176 }, { "epoch": 0.36, "grad_norm": 1.1119123697280884, "learning_rate": 1.4716072080834158e-05, "loss": 0.503, "step": 14177 }, { "epoch": 0.36, "grad_norm": 2.6157031059265137, "learning_rate": 1.4715340037358145e-05, "loss": 0.5793, "step": 14178 }, { "epoch": 0.36, "grad_norm": 2.9414703845977783, "learning_rate": 1.4714607961386975e-05, "loss": 0.6905, "step": 14179 }, { "epoch": 0.36, "grad_norm": 4.4087910652160645, "learning_rate": 1.4713875852925686e-05, "loss": 0.6554, "step": 14180 }, { "epoch": 0.36, "grad_norm": 2.1888973712921143, "learning_rate": 1.4713143711979325e-05, "loss": 0.5678, "step": 14181 }, { "epoch": 0.36, "grad_norm": 16.801589965820312, "learning_rate": 1.4712411538552942e-05, "loss": 0.7066, "step": 14182 }, { "epoch": 0.36, "grad_norm": 3.3933968544006348, "learning_rate": 1.4711679332651576e-05, "loss": 0.7416, "step": 14183 }, { "epoch": 0.36, "grad_norm": 1.3966037034988403, "learning_rate": 1.471094709428028e-05, "loss": 0.4752, "step": 14184 }, { "epoch": 0.36, "grad_norm": 4.106174945831299, "learning_rate": 1.4710214823444093e-05, "loss": 0.5869, "step": 14185 }, { "epoch": 0.36, "grad_norm": 3.509279727935791, "learning_rate": 1.4709482520148064e-05, "loss": 0.6356, "step": 14186 }, { "epoch": 0.36, "grad_norm": 1.1784967184066772, "learning_rate": 1.4708750184397245e-05, "loss": 0.4344, "step": 14187 }, { "epoch": 0.36, "grad_norm": 1.4613134860992432, "learning_rate": 1.4708017816196674e-05, "loss": 0.553, "step": 14188 }, { "epoch": 0.36, "grad_norm": 6.058172702789307, "learning_rate": 1.4707285415551404e-05, "loss": 0.691, "step": 14189 }, { "epoch": 0.36, "grad_norm": 12.629839897155762, "learning_rate": 1.470655298246648e-05, "loss": 0.8333, "step": 14190 }, { "epoch": 0.36, "grad_norm": 1.1363344192504883, "learning_rate": 1.4705820516946949e-05, "loss": 0.4722, "step": 14191 }, { "epoch": 0.36, "grad_norm": 1.305466890335083, "learning_rate": 1.470508801899786e-05, "loss": 0.6266, "step": 14192 }, { "epoch": 0.36, "grad_norm": 2.2188777923583984, "learning_rate": 1.470435548862426e-05, "loss": 0.644, "step": 14193 }, { "epoch": 0.36, "grad_norm": 3.0253193378448486, "learning_rate": 1.4703622925831199e-05, "loss": 0.8488, "step": 14194 }, { "epoch": 0.36, "grad_norm": 1.762291669845581, "learning_rate": 1.4702890330623721e-05, "loss": 0.4507, "step": 14195 }, { "epoch": 0.36, "grad_norm": 2.0949323177337646, "learning_rate": 1.470215770300688e-05, "loss": 0.5916, "step": 14196 }, { "epoch": 0.36, "grad_norm": 3.6589512825012207, "learning_rate": 1.4701425042985719e-05, "loss": 0.7017, "step": 14197 }, { "epoch": 0.36, "grad_norm": 3.6328132152557373, "learning_rate": 1.4700692350565293e-05, "loss": 0.7522, "step": 14198 }, { "epoch": 0.36, "grad_norm": 2.0158417224884033, "learning_rate": 1.4699959625750651e-05, "loss": 0.6126, "step": 14199 }, { "epoch": 0.36, "grad_norm": 2.735769748687744, "learning_rate": 1.4699226868546836e-05, "loss": 0.6858, "step": 14200 }, { "epoch": 0.36, "grad_norm": 1.93779718875885, "learning_rate": 1.4698494078958906e-05, "loss": 0.7414, "step": 14201 }, { "epoch": 0.36, "grad_norm": 4.014461040496826, "learning_rate": 1.4697761256991902e-05, "loss": 0.7431, "step": 14202 }, { "epoch": 0.36, "grad_norm": 1.4509437084197998, "learning_rate": 1.469702840265088e-05, "loss": 0.5086, "step": 14203 }, { "epoch": 0.36, "grad_norm": 4.643726348876953, "learning_rate": 1.469629551594089e-05, "loss": 0.6934, "step": 14204 }, { "epoch": 0.36, "grad_norm": 2.723365306854248, "learning_rate": 1.4695562596866982e-05, "loss": 0.7412, "step": 14205 }, { "epoch": 0.36, "grad_norm": 2.660944938659668, "learning_rate": 1.4694829645434206e-05, "loss": 0.5198, "step": 14206 }, { "epoch": 0.36, "grad_norm": 1.258374810218811, "learning_rate": 1.4694096661647614e-05, "loss": 0.6199, "step": 14207 }, { "epoch": 0.36, "grad_norm": 3.255686044692993, "learning_rate": 1.4693363645512257e-05, "loss": 0.6854, "step": 14208 }, { "epoch": 0.36, "grad_norm": 1.428959846496582, "learning_rate": 1.4692630597033186e-05, "loss": 0.3742, "step": 14209 }, { "epoch": 0.36, "grad_norm": 1.99720299243927, "learning_rate": 1.4691897516215452e-05, "loss": 0.59, "step": 14210 }, { "epoch": 0.36, "grad_norm": 1.1951913833618164, "learning_rate": 1.4691164403064113e-05, "loss": 0.5499, "step": 14211 }, { "epoch": 0.36, "grad_norm": 3.578252077102661, "learning_rate": 1.4690431257584212e-05, "loss": 0.774, "step": 14212 }, { "epoch": 0.36, "grad_norm": 1.5206830501556396, "learning_rate": 1.4689698079780806e-05, "loss": 0.6339, "step": 14213 }, { "epoch": 0.36, "grad_norm": 2.9407553672790527, "learning_rate": 1.4688964869658948e-05, "loss": 0.6546, "step": 14214 }, { "epoch": 0.36, "grad_norm": 2.077378749847412, "learning_rate": 1.468823162722369e-05, "loss": 0.6944, "step": 14215 }, { "epoch": 0.36, "grad_norm": 5.629208087921143, "learning_rate": 1.4687498352480085e-05, "loss": 0.5134, "step": 14216 }, { "epoch": 0.36, "grad_norm": 2.01000714302063, "learning_rate": 1.4686765045433186e-05, "loss": 0.7591, "step": 14217 }, { "epoch": 0.36, "grad_norm": 2.7691738605499268, "learning_rate": 1.4686031706088048e-05, "loss": 0.5085, "step": 14218 }, { "epoch": 0.36, "grad_norm": 1.3690941333770752, "learning_rate": 1.468529833444972e-05, "loss": 0.5248, "step": 14219 }, { "epoch": 0.36, "grad_norm": 1.7459789514541626, "learning_rate": 1.4684564930523262e-05, "loss": 0.5097, "step": 14220 }, { "epoch": 0.36, "grad_norm": 1.587292194366455, "learning_rate": 1.4683831494313727e-05, "loss": 0.6008, "step": 14221 }, { "epoch": 0.36, "grad_norm": 1.8232250213623047, "learning_rate": 1.4683098025826166e-05, "loss": 0.6951, "step": 14222 }, { "epoch": 0.36, "grad_norm": 1.5256788730621338, "learning_rate": 1.4682364525065636e-05, "loss": 0.6513, "step": 14223 }, { "epoch": 0.36, "grad_norm": 1.6708871126174927, "learning_rate": 1.4681630992037192e-05, "loss": 0.6843, "step": 14224 }, { "epoch": 0.36, "grad_norm": 1.8130462169647217, "learning_rate": 1.4680897426745885e-05, "loss": 0.5649, "step": 14225 }, { "epoch": 0.36, "grad_norm": 10.9724760055542, "learning_rate": 1.4680163829196779e-05, "loss": 0.7997, "step": 14226 }, { "epoch": 0.36, "grad_norm": 2.5980772972106934, "learning_rate": 1.4679430199394921e-05, "loss": 0.5497, "step": 14227 }, { "epoch": 0.36, "grad_norm": 7.464146137237549, "learning_rate": 1.4678696537345372e-05, "loss": 0.5806, "step": 14228 }, { "epoch": 0.36, "grad_norm": 2.7152090072631836, "learning_rate": 1.4677962843053183e-05, "loss": 0.6809, "step": 14229 }, { "epoch": 0.36, "grad_norm": 1.149048924446106, "learning_rate": 1.4677229116523417e-05, "loss": 0.452, "step": 14230 }, { "epoch": 0.36, "grad_norm": 1.1423377990722656, "learning_rate": 1.4676495357761125e-05, "loss": 0.5114, "step": 14231 }, { "epoch": 0.36, "grad_norm": 5.311104774475098, "learning_rate": 1.4675761566771362e-05, "loss": 0.671, "step": 14232 }, { "epoch": 0.36, "grad_norm": 1.3963837623596191, "learning_rate": 1.4675027743559192e-05, "loss": 0.6537, "step": 14233 }, { "epoch": 0.36, "grad_norm": 0.9558618664741516, "learning_rate": 1.4674293888129665e-05, "loss": 0.5451, "step": 14234 }, { "epoch": 0.36, "grad_norm": 2.158512830734253, "learning_rate": 1.4673560000487844e-05, "loss": 0.5957, "step": 14235 }, { "epoch": 0.36, "grad_norm": 1.9027454853057861, "learning_rate": 1.4672826080638782e-05, "loss": 0.6925, "step": 14236 }, { "epoch": 0.36, "grad_norm": 2.4257473945617676, "learning_rate": 1.467209212858754e-05, "loss": 0.5148, "step": 14237 }, { "epoch": 0.36, "grad_norm": 5.407060623168945, "learning_rate": 1.4671358144339171e-05, "loss": 0.6667, "step": 14238 }, { "epoch": 0.36, "grad_norm": 3.179095506668091, "learning_rate": 1.4670624127898738e-05, "loss": 0.6349, "step": 14239 }, { "epoch": 0.36, "grad_norm": 2.4158425331115723, "learning_rate": 1.46698900792713e-05, "loss": 0.5043, "step": 14240 }, { "epoch": 0.37, "grad_norm": 1.431351661682129, "learning_rate": 1.466915599846191e-05, "loss": 0.5896, "step": 14241 }, { "epoch": 0.37, "grad_norm": 1.975041151046753, "learning_rate": 1.4668421885475632e-05, "loss": 0.5226, "step": 14242 }, { "epoch": 0.37, "grad_norm": 2.331836700439453, "learning_rate": 1.4667687740317524e-05, "loss": 0.6187, "step": 14243 }, { "epoch": 0.37, "grad_norm": 2.096893548965454, "learning_rate": 1.4666953562992646e-05, "loss": 0.7744, "step": 14244 }, { "epoch": 0.37, "grad_norm": 1.8130306005477905, "learning_rate": 1.4666219353506052e-05, "loss": 0.5405, "step": 14245 }, { "epoch": 0.37, "grad_norm": 3.027647018432617, "learning_rate": 1.466548511186281e-05, "loss": 0.6084, "step": 14246 }, { "epoch": 0.37, "grad_norm": 1.4549336433410645, "learning_rate": 1.4664750838067973e-05, "loss": 0.5095, "step": 14247 }, { "epoch": 0.37, "grad_norm": 5.028558254241943, "learning_rate": 1.4664016532126603e-05, "loss": 0.7087, "step": 14248 }, { "epoch": 0.37, "grad_norm": 1.0690264701843262, "learning_rate": 1.4663282194043764e-05, "loss": 0.6137, "step": 14249 }, { "epoch": 0.37, "grad_norm": 1.9412152767181396, "learning_rate": 1.4662547823824513e-05, "loss": 0.4831, "step": 14250 }, { "epoch": 0.37, "grad_norm": 2.0801355838775635, "learning_rate": 1.4661813421473911e-05, "loss": 0.6236, "step": 14251 }, { "epoch": 0.37, "grad_norm": 1.243910312652588, "learning_rate": 1.4661078986997021e-05, "loss": 0.5865, "step": 14252 }, { "epoch": 0.37, "grad_norm": 1.6098893880844116, "learning_rate": 1.46603445203989e-05, "loss": 0.6544, "step": 14253 }, { "epoch": 0.37, "grad_norm": 4.260434627532959, "learning_rate": 1.4659610021684616e-05, "loss": 0.5657, "step": 14254 }, { "epoch": 0.37, "grad_norm": 1.3603019714355469, "learning_rate": 1.4658875490859226e-05, "loss": 0.5894, "step": 14255 }, { "epoch": 0.37, "grad_norm": 3.1578586101531982, "learning_rate": 1.4658140927927793e-05, "loss": 0.6387, "step": 14256 }, { "epoch": 0.37, "grad_norm": 1.4155490398406982, "learning_rate": 1.4657406332895381e-05, "loss": 0.5217, "step": 14257 }, { "epoch": 0.37, "grad_norm": 2.284116268157959, "learning_rate": 1.4656671705767048e-05, "loss": 0.6125, "step": 14258 }, { "epoch": 0.37, "grad_norm": 1.2906036376953125, "learning_rate": 1.4655937046547861e-05, "loss": 0.565, "step": 14259 }, { "epoch": 0.37, "grad_norm": 3.515072822570801, "learning_rate": 1.4655202355242879e-05, "loss": 0.5885, "step": 14260 }, { "epoch": 0.37, "grad_norm": 1.590355396270752, "learning_rate": 1.4654467631857168e-05, "loss": 0.5329, "step": 14261 }, { "epoch": 0.37, "grad_norm": 2.0440924167633057, "learning_rate": 1.4653732876395792e-05, "loss": 0.6151, "step": 14262 }, { "epoch": 0.37, "grad_norm": 1.312130093574524, "learning_rate": 1.465299808886381e-05, "loss": 0.476, "step": 14263 }, { "epoch": 0.37, "grad_norm": 3.6958181858062744, "learning_rate": 1.4652263269266291e-05, "loss": 0.7448, "step": 14264 }, { "epoch": 0.37, "grad_norm": 3.0373423099517822, "learning_rate": 1.4651528417608294e-05, "loss": 0.5887, "step": 14265 }, { "epoch": 0.37, "grad_norm": 1.2481311559677124, "learning_rate": 1.4650793533894887e-05, "loss": 0.6131, "step": 14266 }, { "epoch": 0.37, "grad_norm": 11.131312370300293, "learning_rate": 1.4650058618131134e-05, "loss": 0.6245, "step": 14267 }, { "epoch": 0.37, "grad_norm": 1.2072091102600098, "learning_rate": 1.4649323670322095e-05, "loss": 0.4984, "step": 14268 }, { "epoch": 0.37, "grad_norm": 9.566783905029297, "learning_rate": 1.464858869047284e-05, "loss": 0.7194, "step": 14269 }, { "epoch": 0.37, "grad_norm": 8.624589920043945, "learning_rate": 1.4647853678588434e-05, "loss": 0.8635, "step": 14270 }, { "epoch": 0.37, "grad_norm": 4.319787979125977, "learning_rate": 1.464711863467394e-05, "loss": 0.478, "step": 14271 }, { "epoch": 0.37, "grad_norm": 3.1890311241149902, "learning_rate": 1.4646383558734422e-05, "loss": 0.4606, "step": 14272 }, { "epoch": 0.37, "grad_norm": 2.1903018951416016, "learning_rate": 1.4645648450774948e-05, "loss": 0.6566, "step": 14273 }, { "epoch": 0.37, "grad_norm": 2.852025270462036, "learning_rate": 1.4644913310800583e-05, "loss": 0.6665, "step": 14274 }, { "epoch": 0.37, "grad_norm": 2.517836809158325, "learning_rate": 1.4644178138816395e-05, "loss": 0.5642, "step": 14275 }, { "epoch": 0.37, "grad_norm": 2.285202980041504, "learning_rate": 1.4643442934827449e-05, "loss": 0.6065, "step": 14276 }, { "epoch": 0.37, "grad_norm": 1.89871346950531, "learning_rate": 1.4642707698838811e-05, "loss": 0.6241, "step": 14277 }, { "epoch": 0.37, "grad_norm": 5.940661430358887, "learning_rate": 1.4641972430855548e-05, "loss": 0.6417, "step": 14278 }, { "epoch": 0.37, "grad_norm": 3.2766969203948975, "learning_rate": 1.464123713088273e-05, "loss": 0.5366, "step": 14279 }, { "epoch": 0.37, "grad_norm": 5.989433288574219, "learning_rate": 1.4640501798925419e-05, "loss": 0.8022, "step": 14280 }, { "epoch": 0.37, "grad_norm": 1.8094468116760254, "learning_rate": 1.4639766434988687e-05, "loss": 0.489, "step": 14281 }, { "epoch": 0.37, "grad_norm": 1.185073971748352, "learning_rate": 1.46390310390776e-05, "loss": 0.7018, "step": 14282 }, { "epoch": 0.37, "grad_norm": 1.6239136457443237, "learning_rate": 1.4638295611197224e-05, "loss": 0.3686, "step": 14283 }, { "epoch": 0.37, "grad_norm": 1.1919835805892944, "learning_rate": 1.463756015135263e-05, "loss": 0.4447, "step": 14284 }, { "epoch": 0.37, "grad_norm": 3.090466260910034, "learning_rate": 1.4636824659548883e-05, "loss": 0.4532, "step": 14285 }, { "epoch": 0.37, "grad_norm": 1.3889678716659546, "learning_rate": 1.4636089135791054e-05, "loss": 0.5091, "step": 14286 }, { "epoch": 0.37, "grad_norm": 2.6652073860168457, "learning_rate": 1.463535358008421e-05, "loss": 0.6329, "step": 14287 }, { "epoch": 0.37, "grad_norm": 1.6635886430740356, "learning_rate": 1.4634617992433426e-05, "loss": 0.6178, "step": 14288 }, { "epoch": 0.37, "grad_norm": 1.107001543045044, "learning_rate": 1.4633882372843764e-05, "loss": 0.4951, "step": 14289 }, { "epoch": 0.37, "grad_norm": 3.2226922512054443, "learning_rate": 1.4633146721320295e-05, "loss": 0.7038, "step": 14290 }, { "epoch": 0.37, "grad_norm": 6.976506233215332, "learning_rate": 1.4632411037868091e-05, "loss": 0.8621, "step": 14291 }, { "epoch": 0.37, "grad_norm": 2.9674527645111084, "learning_rate": 1.463167532249222e-05, "loss": 0.7004, "step": 14292 }, { "epoch": 0.37, "grad_norm": 2.047036647796631, "learning_rate": 1.4630939575197753e-05, "loss": 0.5098, "step": 14293 }, { "epoch": 0.37, "grad_norm": 2.6412129402160645, "learning_rate": 1.4630203795989758e-05, "loss": 0.6102, "step": 14294 }, { "epoch": 0.37, "grad_norm": 5.277761936187744, "learning_rate": 1.462946798487331e-05, "loss": 0.644, "step": 14295 }, { "epoch": 0.37, "grad_norm": 2.3911032676696777, "learning_rate": 1.4628732141853475e-05, "loss": 0.4335, "step": 14296 }, { "epoch": 0.37, "grad_norm": 1.61957848072052, "learning_rate": 1.4627996266935327e-05, "loss": 0.6414, "step": 14297 }, { "epoch": 0.37, "grad_norm": 2.3871285915374756, "learning_rate": 1.4627260360123939e-05, "loss": 0.6516, "step": 14298 }, { "epoch": 0.37, "grad_norm": 1.281335711479187, "learning_rate": 1.4626524421424375e-05, "loss": 0.5368, "step": 14299 }, { "epoch": 0.37, "grad_norm": 1.309417963027954, "learning_rate": 1.4625788450841713e-05, "loss": 0.4685, "step": 14300 }, { "epoch": 0.37, "grad_norm": 1.870274543762207, "learning_rate": 1.4625052448381024e-05, "loss": 0.6561, "step": 14301 }, { "epoch": 0.37, "grad_norm": 1.1149556636810303, "learning_rate": 1.4624316414047379e-05, "loss": 0.5992, "step": 14302 }, { "epoch": 0.37, "grad_norm": 1.1710447072982788, "learning_rate": 1.4623580347845852e-05, "loss": 0.5877, "step": 14303 }, { "epoch": 0.37, "grad_norm": 3.0072052478790283, "learning_rate": 1.4622844249781512e-05, "loss": 0.5092, "step": 14304 }, { "epoch": 0.37, "grad_norm": 2.9359333515167236, "learning_rate": 1.4622108119859435e-05, "loss": 0.8671, "step": 14305 }, { "epoch": 0.37, "grad_norm": 3.1119322776794434, "learning_rate": 1.4621371958084692e-05, "loss": 0.5526, "step": 14306 }, { "epoch": 0.37, "grad_norm": 1.6204931735992432, "learning_rate": 1.4620635764462354e-05, "loss": 0.5998, "step": 14307 }, { "epoch": 0.37, "grad_norm": 1.267539381980896, "learning_rate": 1.4619899538997503e-05, "loss": 0.4603, "step": 14308 }, { "epoch": 0.37, "grad_norm": 1.5339351892471313, "learning_rate": 1.4619163281695204e-05, "loss": 0.6543, "step": 14309 }, { "epoch": 0.37, "grad_norm": 2.2983009815216064, "learning_rate": 1.4618426992560534e-05, "loss": 0.4852, "step": 14310 }, { "epoch": 0.37, "grad_norm": 2.908719062805176, "learning_rate": 1.4617690671598566e-05, "loss": 0.6499, "step": 14311 }, { "epoch": 0.37, "grad_norm": 1.8622701168060303, "learning_rate": 1.4616954318814375e-05, "loss": 0.5696, "step": 14312 }, { "epoch": 0.37, "grad_norm": 1.288753867149353, "learning_rate": 1.4616217934213038e-05, "loss": 0.5436, "step": 14313 }, { "epoch": 0.37, "grad_norm": 8.246845245361328, "learning_rate": 1.4615481517799624e-05, "loss": 0.5872, "step": 14314 }, { "epoch": 0.37, "grad_norm": 1.4299365282058716, "learning_rate": 1.4614745069579212e-05, "loss": 0.6201, "step": 14315 }, { "epoch": 0.37, "grad_norm": 2.6785788536071777, "learning_rate": 1.4614008589556876e-05, "loss": 0.5718, "step": 14316 }, { "epoch": 0.37, "grad_norm": 2.0359532833099365, "learning_rate": 1.4613272077737695e-05, "loss": 0.6327, "step": 14317 }, { "epoch": 0.37, "grad_norm": 4.082106590270996, "learning_rate": 1.4612535534126737e-05, "loss": 0.5665, "step": 14318 }, { "epoch": 0.37, "grad_norm": 3.812955141067505, "learning_rate": 1.4611798958729082e-05, "loss": 0.5572, "step": 14319 }, { "epoch": 0.37, "grad_norm": 2.8831489086151123, "learning_rate": 1.461106235154981e-05, "loss": 0.7497, "step": 14320 }, { "epoch": 0.37, "grad_norm": 1.4164927005767822, "learning_rate": 1.4610325712593988e-05, "loss": 0.4956, "step": 14321 }, { "epoch": 0.37, "grad_norm": 2.174906015396118, "learning_rate": 1.4609589041866704e-05, "loss": 0.4913, "step": 14322 }, { "epoch": 0.37, "grad_norm": 1.6551649570465088, "learning_rate": 1.4608852339373022e-05, "loss": 0.56, "step": 14323 }, { "epoch": 0.37, "grad_norm": 2.130603075027466, "learning_rate": 1.460811560511803e-05, "loss": 0.7756, "step": 14324 }, { "epoch": 0.37, "grad_norm": 1.2032407522201538, "learning_rate": 1.4607378839106799e-05, "loss": 0.4491, "step": 14325 }, { "epoch": 0.37, "grad_norm": 2.8822290897369385, "learning_rate": 1.4606642041344405e-05, "loss": 0.5007, "step": 14326 }, { "epoch": 0.37, "grad_norm": 1.407515525817871, "learning_rate": 1.460590521183593e-05, "loss": 0.5957, "step": 14327 }, { "epoch": 0.37, "grad_norm": 2.935715913772583, "learning_rate": 1.460516835058645e-05, "loss": 0.4777, "step": 14328 }, { "epoch": 0.37, "grad_norm": 2.585655450820923, "learning_rate": 1.4604431457601046e-05, "loss": 0.6588, "step": 14329 }, { "epoch": 0.37, "grad_norm": 2.6899659633636475, "learning_rate": 1.4603694532884789e-05, "loss": 0.5764, "step": 14330 }, { "epoch": 0.37, "grad_norm": 1.365592122077942, "learning_rate": 1.4602957576442763e-05, "loss": 0.6879, "step": 14331 }, { "epoch": 0.37, "grad_norm": 3.624375343322754, "learning_rate": 1.4602220588280047e-05, "loss": 0.7887, "step": 14332 }, { "epoch": 0.37, "grad_norm": 1.2057641744613647, "learning_rate": 1.4601483568401716e-05, "loss": 0.4614, "step": 14333 }, { "epoch": 0.37, "grad_norm": 4.148049354553223, "learning_rate": 1.460074651681285e-05, "loss": 0.5982, "step": 14334 }, { "epoch": 0.37, "grad_norm": 4.337004661560059, "learning_rate": 1.4600009433518532e-05, "loss": 0.6973, "step": 14335 }, { "epoch": 0.37, "grad_norm": 2.523155450820923, "learning_rate": 1.4599272318523836e-05, "loss": 0.638, "step": 14336 }, { "epoch": 0.37, "grad_norm": 2.328591823577881, "learning_rate": 1.459853517183385e-05, "loss": 0.6649, "step": 14337 }, { "epoch": 0.37, "grad_norm": 2.5117812156677246, "learning_rate": 1.4597797993453644e-05, "loss": 0.6006, "step": 14338 }, { "epoch": 0.37, "grad_norm": 1.3090136051177979, "learning_rate": 1.4597060783388305e-05, "loss": 0.7663, "step": 14339 }, { "epoch": 0.37, "grad_norm": 1.1808427572250366, "learning_rate": 1.459632354164291e-05, "loss": 0.56, "step": 14340 }, { "epoch": 0.37, "grad_norm": 2.8731930255889893, "learning_rate": 1.4595586268222539e-05, "loss": 0.7974, "step": 14341 }, { "epoch": 0.37, "grad_norm": 2.35013747215271, "learning_rate": 1.459484896313228e-05, "loss": 0.6563, "step": 14342 }, { "epoch": 0.37, "grad_norm": 2.7193448543548584, "learning_rate": 1.4594111626377203e-05, "loss": 0.6021, "step": 14343 }, { "epoch": 0.37, "grad_norm": 2.0289323329925537, "learning_rate": 1.4593374257962399e-05, "loss": 0.6548, "step": 14344 }, { "epoch": 0.37, "grad_norm": 1.6917192935943604, "learning_rate": 1.4592636857892943e-05, "loss": 0.6112, "step": 14345 }, { "epoch": 0.37, "grad_norm": 4.90592622756958, "learning_rate": 1.4591899426173922e-05, "loss": 0.6026, "step": 14346 }, { "epoch": 0.37, "grad_norm": 3.06241512298584, "learning_rate": 1.4591161962810412e-05, "loss": 0.6044, "step": 14347 }, { "epoch": 0.37, "grad_norm": 2.5679163932800293, "learning_rate": 1.45904244678075e-05, "loss": 0.5365, "step": 14348 }, { "epoch": 0.37, "grad_norm": 1.0857117176055908, "learning_rate": 1.4589686941170267e-05, "loss": 0.4442, "step": 14349 }, { "epoch": 0.37, "grad_norm": 1.6145695447921753, "learning_rate": 1.4588949382903793e-05, "loss": 0.5279, "step": 14350 }, { "epoch": 0.37, "grad_norm": 3.074439764022827, "learning_rate": 1.4588211793013166e-05, "loss": 0.594, "step": 14351 }, { "epoch": 0.37, "grad_norm": 1.4226903915405273, "learning_rate": 1.4587474171503461e-05, "loss": 0.6116, "step": 14352 }, { "epoch": 0.37, "grad_norm": 2.288388252258301, "learning_rate": 1.458673651837977e-05, "loss": 0.601, "step": 14353 }, { "epoch": 0.37, "grad_norm": 2.1097421646118164, "learning_rate": 1.4585998833647172e-05, "loss": 0.7047, "step": 14354 }, { "epoch": 0.37, "grad_norm": 2.119128465652466, "learning_rate": 1.4585261117310752e-05, "loss": 0.6479, "step": 14355 }, { "epoch": 0.37, "grad_norm": 2.175915241241455, "learning_rate": 1.4584523369375592e-05, "loss": 0.5078, "step": 14356 }, { "epoch": 0.37, "grad_norm": 3.7978835105895996, "learning_rate": 1.4583785589846776e-05, "loss": 0.5935, "step": 14357 }, { "epoch": 0.37, "grad_norm": 3.905061721801758, "learning_rate": 1.458304777872939e-05, "loss": 0.5909, "step": 14358 }, { "epoch": 0.37, "grad_norm": 2.4315168857574463, "learning_rate": 1.4582309936028521e-05, "loss": 0.6658, "step": 14359 }, { "epoch": 0.37, "grad_norm": 2.041565179824829, "learning_rate": 1.4581572061749247e-05, "loss": 0.5592, "step": 14360 }, { "epoch": 0.37, "grad_norm": 1.4311275482177734, "learning_rate": 1.4580834155896662e-05, "loss": 0.5302, "step": 14361 }, { "epoch": 0.37, "grad_norm": 2.199899673461914, "learning_rate": 1.4580096218475839e-05, "loss": 0.4779, "step": 14362 }, { "epoch": 0.37, "grad_norm": 2.987818956375122, "learning_rate": 1.4579358249491872e-05, "loss": 0.5692, "step": 14363 }, { "epoch": 0.37, "grad_norm": 3.6228721141815186, "learning_rate": 1.457862024894985e-05, "loss": 0.6134, "step": 14364 }, { "epoch": 0.37, "grad_norm": 2.101550817489624, "learning_rate": 1.457788221685485e-05, "loss": 0.5172, "step": 14365 }, { "epoch": 0.37, "grad_norm": 1.7246508598327637, "learning_rate": 1.4577144153211963e-05, "loss": 0.4355, "step": 14366 }, { "epoch": 0.37, "grad_norm": 1.4649771451950073, "learning_rate": 1.4576406058026273e-05, "loss": 0.628, "step": 14367 }, { "epoch": 0.37, "grad_norm": 1.614510416984558, "learning_rate": 1.4575667931302871e-05, "loss": 0.6296, "step": 14368 }, { "epoch": 0.37, "grad_norm": 1.5223519802093506, "learning_rate": 1.4574929773046836e-05, "loss": 0.5509, "step": 14369 }, { "epoch": 0.37, "grad_norm": 1.888992428779602, "learning_rate": 1.457419158326326e-05, "loss": 0.6527, "step": 14370 }, { "epoch": 0.37, "grad_norm": 1.9759153127670288, "learning_rate": 1.4573453361957231e-05, "loss": 0.5551, "step": 14371 }, { "epoch": 0.37, "grad_norm": 2.7308363914489746, "learning_rate": 1.4572715109133832e-05, "loss": 0.5674, "step": 14372 }, { "epoch": 0.37, "grad_norm": 6.750986099243164, "learning_rate": 1.4571976824798158e-05, "loss": 0.7753, "step": 14373 }, { "epoch": 0.37, "grad_norm": 3.001340627670288, "learning_rate": 1.4571238508955287e-05, "loss": 0.7905, "step": 14374 }, { "epoch": 0.37, "grad_norm": 1.6922160387039185, "learning_rate": 1.4570500161610314e-05, "loss": 0.6532, "step": 14375 }, { "epoch": 0.37, "grad_norm": 3.4274964332580566, "learning_rate": 1.4569761782768328e-05, "loss": 0.6054, "step": 14376 }, { "epoch": 0.37, "grad_norm": 3.001183032989502, "learning_rate": 1.4569023372434412e-05, "loss": 0.6586, "step": 14377 }, { "epoch": 0.37, "grad_norm": 2.5431723594665527, "learning_rate": 1.456828493061366e-05, "loss": 0.6604, "step": 14378 }, { "epoch": 0.37, "grad_norm": 1.0835011005401611, "learning_rate": 1.4567546457311155e-05, "loss": 0.5276, "step": 14379 }, { "epoch": 0.37, "grad_norm": 1.3519471883773804, "learning_rate": 1.4566807952531992e-05, "loss": 0.6047, "step": 14380 }, { "epoch": 0.37, "grad_norm": 0.9433081150054932, "learning_rate": 1.4566069416281257e-05, "loss": 0.4592, "step": 14381 }, { "epoch": 0.37, "grad_norm": 1.898582100868225, "learning_rate": 1.4565330848564039e-05, "loss": 0.5459, "step": 14382 }, { "epoch": 0.37, "grad_norm": 1.5955617427825928, "learning_rate": 1.4564592249385432e-05, "loss": 0.5261, "step": 14383 }, { "epoch": 0.37, "grad_norm": 1.3799651861190796, "learning_rate": 1.4563853618750518e-05, "loss": 0.5923, "step": 14384 }, { "epoch": 0.37, "grad_norm": 5.045045852661133, "learning_rate": 1.4563114956664397e-05, "loss": 0.5705, "step": 14385 }, { "epoch": 0.37, "grad_norm": 2.1138014793395996, "learning_rate": 1.4562376263132151e-05, "loss": 0.6429, "step": 14386 }, { "epoch": 0.37, "grad_norm": 2.3849329948425293, "learning_rate": 1.4561637538158877e-05, "loss": 0.4659, "step": 14387 }, { "epoch": 0.37, "grad_norm": 3.524075746536255, "learning_rate": 1.4560898781749661e-05, "loss": 0.8461, "step": 14388 }, { "epoch": 0.37, "grad_norm": 1.8508039712905884, "learning_rate": 1.4560159993909596e-05, "loss": 0.6818, "step": 14389 }, { "epoch": 0.37, "grad_norm": 2.8600475788116455, "learning_rate": 1.4559421174643778e-05, "loss": 0.6398, "step": 14390 }, { "epoch": 0.37, "grad_norm": 4.227744102478027, "learning_rate": 1.4558682323957287e-05, "loss": 0.6524, "step": 14391 }, { "epoch": 0.37, "grad_norm": 1.198222041130066, "learning_rate": 1.4557943441855227e-05, "loss": 0.5693, "step": 14392 }, { "epoch": 0.37, "grad_norm": 1.1513211727142334, "learning_rate": 1.4557204528342679e-05, "loss": 0.5577, "step": 14393 }, { "epoch": 0.37, "grad_norm": 3.0384750366210938, "learning_rate": 1.4556465583424743e-05, "loss": 0.6189, "step": 14394 }, { "epoch": 0.37, "grad_norm": 9.966412544250488, "learning_rate": 1.4555726607106511e-05, "loss": 0.4891, "step": 14395 }, { "epoch": 0.37, "grad_norm": 4.996164321899414, "learning_rate": 1.4554987599393072e-05, "loss": 0.813, "step": 14396 }, { "epoch": 0.37, "grad_norm": 5.426745414733887, "learning_rate": 1.455424856028952e-05, "loss": 0.5692, "step": 14397 }, { "epoch": 0.37, "grad_norm": 1.4888018369674683, "learning_rate": 1.4553509489800947e-05, "loss": 0.5563, "step": 14398 }, { "epoch": 0.37, "grad_norm": 2.9670495986938477, "learning_rate": 1.4552770387932447e-05, "loss": 0.5685, "step": 14399 }, { "epoch": 0.37, "grad_norm": 6.079531192779541, "learning_rate": 1.4552031254689116e-05, "loss": 0.5256, "step": 14400 }, { "epoch": 0.37, "grad_norm": 2.539985179901123, "learning_rate": 1.4551292090076044e-05, "loss": 0.5274, "step": 14401 }, { "epoch": 0.37, "grad_norm": 3.1864092350006104, "learning_rate": 1.455055289409833e-05, "loss": 0.4955, "step": 14402 }, { "epoch": 0.37, "grad_norm": 1.2800219058990479, "learning_rate": 1.454981366676106e-05, "loss": 0.4925, "step": 14403 }, { "epoch": 0.37, "grad_norm": 1.7860848903656006, "learning_rate": 1.4549074408069332e-05, "loss": 0.685, "step": 14404 }, { "epoch": 0.37, "grad_norm": 2.0340542793273926, "learning_rate": 1.4548335118028246e-05, "loss": 0.6841, "step": 14405 }, { "epoch": 0.37, "grad_norm": 2.276862859725952, "learning_rate": 1.4547595796642887e-05, "loss": 0.5429, "step": 14406 }, { "epoch": 0.37, "grad_norm": 1.6854923963546753, "learning_rate": 1.4546856443918357e-05, "loss": 0.513, "step": 14407 }, { "epoch": 0.37, "grad_norm": 1.4504749774932861, "learning_rate": 1.4546117059859748e-05, "loss": 0.6284, "step": 14408 }, { "epoch": 0.37, "grad_norm": 2.023970603942871, "learning_rate": 1.4545377644472159e-05, "loss": 0.6792, "step": 14409 }, { "epoch": 0.37, "grad_norm": 1.4726343154907227, "learning_rate": 1.454463819776068e-05, "loss": 0.6128, "step": 14410 }, { "epoch": 0.37, "grad_norm": 1.6607900857925415, "learning_rate": 1.454389871973041e-05, "loss": 0.5906, "step": 14411 }, { "epoch": 0.37, "grad_norm": 1.5777686834335327, "learning_rate": 1.4543159210386448e-05, "loss": 0.6916, "step": 14412 }, { "epoch": 0.37, "grad_norm": 2.092525005340576, "learning_rate": 1.4542419669733883e-05, "loss": 0.6082, "step": 14413 }, { "epoch": 0.37, "grad_norm": 1.082255482673645, "learning_rate": 1.454168009777782e-05, "loss": 0.5526, "step": 14414 }, { "epoch": 0.37, "grad_norm": 1.7474902868270874, "learning_rate": 1.4540940494523348e-05, "loss": 0.5858, "step": 14415 }, { "epoch": 0.37, "grad_norm": 1.5017794370651245, "learning_rate": 1.4540200859975567e-05, "loss": 0.6631, "step": 14416 }, { "epoch": 0.37, "grad_norm": 1.65192449092865, "learning_rate": 1.4539461194139574e-05, "loss": 0.6146, "step": 14417 }, { "epoch": 0.37, "grad_norm": 1.9327991008758545, "learning_rate": 1.4538721497020464e-05, "loss": 0.6543, "step": 14418 }, { "epoch": 0.37, "grad_norm": 1.8657866716384888, "learning_rate": 1.4537981768623345e-05, "loss": 0.7344, "step": 14419 }, { "epoch": 0.37, "grad_norm": 1.1993634700775146, "learning_rate": 1.4537242008953298e-05, "loss": 0.5287, "step": 14420 }, { "epoch": 0.37, "grad_norm": 1.3401888608932495, "learning_rate": 1.4536502218015436e-05, "loss": 0.507, "step": 14421 }, { "epoch": 0.37, "grad_norm": 6.777328968048096, "learning_rate": 1.4535762395814846e-05, "loss": 0.7828, "step": 14422 }, { "epoch": 0.37, "grad_norm": 1.4553427696228027, "learning_rate": 1.4535022542356632e-05, "loss": 0.6778, "step": 14423 }, { "epoch": 0.37, "grad_norm": 3.331129312515259, "learning_rate": 1.4534282657645896e-05, "loss": 0.6463, "step": 14424 }, { "epoch": 0.37, "grad_norm": 1.4465738534927368, "learning_rate": 1.4533542741687728e-05, "loss": 0.6398, "step": 14425 }, { "epoch": 0.37, "grad_norm": 1.1337504386901855, "learning_rate": 1.4532802794487236e-05, "loss": 0.5961, "step": 14426 }, { "epoch": 0.37, "grad_norm": 1.6157745122909546, "learning_rate": 1.453206281604951e-05, "loss": 0.6565, "step": 14427 }, { "epoch": 0.37, "grad_norm": 5.428047180175781, "learning_rate": 1.4531322806379655e-05, "loss": 0.6662, "step": 14428 }, { "epoch": 0.37, "grad_norm": 2.4968864917755127, "learning_rate": 1.4530582765482775e-05, "loss": 0.4972, "step": 14429 }, { "epoch": 0.37, "grad_norm": 3.4907960891723633, "learning_rate": 1.452984269336396e-05, "loss": 0.6714, "step": 14430 }, { "epoch": 0.37, "grad_norm": 4.959827899932861, "learning_rate": 1.452910259002832e-05, "loss": 0.5577, "step": 14431 }, { "epoch": 0.37, "grad_norm": 1.3202050924301147, "learning_rate": 1.4528362455480945e-05, "loss": 0.4507, "step": 14432 }, { "epoch": 0.37, "grad_norm": 1.3579994440078735, "learning_rate": 1.4527622289726943e-05, "loss": 0.5959, "step": 14433 }, { "epoch": 0.37, "grad_norm": 1.5046106576919556, "learning_rate": 1.4526882092771413e-05, "loss": 0.6332, "step": 14434 }, { "epoch": 0.37, "grad_norm": 1.239635944366455, "learning_rate": 1.4526141864619454e-05, "loss": 0.5926, "step": 14435 }, { "epoch": 0.37, "grad_norm": 1.9030907154083252, "learning_rate": 1.4525401605276172e-05, "loss": 0.4167, "step": 14436 }, { "epoch": 0.37, "grad_norm": 2.1184468269348145, "learning_rate": 1.4524661314746662e-05, "loss": 0.5135, "step": 14437 }, { "epoch": 0.37, "grad_norm": 4.843777179718018, "learning_rate": 1.4523920993036033e-05, "loss": 0.5873, "step": 14438 }, { "epoch": 0.37, "grad_norm": 1.7887576818466187, "learning_rate": 1.4523180640149379e-05, "loss": 0.5726, "step": 14439 }, { "epoch": 0.37, "grad_norm": 1.2480144500732422, "learning_rate": 1.4522440256091807e-05, "loss": 0.544, "step": 14440 }, { "epoch": 0.37, "grad_norm": 2.748384952545166, "learning_rate": 1.452169984086842e-05, "loss": 0.6441, "step": 14441 }, { "epoch": 0.37, "grad_norm": 1.2764108180999756, "learning_rate": 1.4520959394484316e-05, "loss": 0.698, "step": 14442 }, { "epoch": 0.37, "grad_norm": 1.3215378522872925, "learning_rate": 1.4520218916944602e-05, "loss": 0.5474, "step": 14443 }, { "epoch": 0.37, "grad_norm": 2.236241102218628, "learning_rate": 1.4519478408254375e-05, "loss": 0.5976, "step": 14444 }, { "epoch": 0.37, "grad_norm": 1.1947182416915894, "learning_rate": 1.4518737868418749e-05, "loss": 0.4314, "step": 14445 }, { "epoch": 0.37, "grad_norm": 1.7532709836959839, "learning_rate": 1.4517997297442815e-05, "loss": 0.6779, "step": 14446 }, { "epoch": 0.37, "grad_norm": 2.703322410583496, "learning_rate": 1.4517256695331683e-05, "loss": 0.5559, "step": 14447 }, { "epoch": 0.37, "grad_norm": 2.708092212677002, "learning_rate": 1.4516516062090457e-05, "loss": 0.5445, "step": 14448 }, { "epoch": 0.37, "grad_norm": 3.3992385864257812, "learning_rate": 1.4515775397724238e-05, "loss": 0.6429, "step": 14449 }, { "epoch": 0.37, "grad_norm": 1.2734222412109375, "learning_rate": 1.4515034702238136e-05, "loss": 0.6464, "step": 14450 }, { "epoch": 0.37, "grad_norm": 2.835582971572876, "learning_rate": 1.4514293975637248e-05, "loss": 0.6526, "step": 14451 }, { "epoch": 0.37, "grad_norm": 2.034572124481201, "learning_rate": 1.451355321792668e-05, "loss": 0.6165, "step": 14452 }, { "epoch": 0.37, "grad_norm": 0.9858352541923523, "learning_rate": 1.4512812429111547e-05, "loss": 0.5635, "step": 14453 }, { "epoch": 0.37, "grad_norm": 1.1317955255508423, "learning_rate": 1.4512071609196941e-05, "loss": 0.6502, "step": 14454 }, { "epoch": 0.37, "grad_norm": 2.805765390396118, "learning_rate": 1.4511330758187971e-05, "loss": 0.7564, "step": 14455 }, { "epoch": 0.37, "grad_norm": 3.6344072818756104, "learning_rate": 1.4510589876089746e-05, "loss": 0.628, "step": 14456 }, { "epoch": 0.37, "grad_norm": 1.3002089262008667, "learning_rate": 1.4509848962907369e-05, "loss": 0.5928, "step": 14457 }, { "epoch": 0.37, "grad_norm": 1.489351749420166, "learning_rate": 1.4509108018645946e-05, "loss": 0.4891, "step": 14458 }, { "epoch": 0.37, "grad_norm": 1.4161607027053833, "learning_rate": 1.4508367043310583e-05, "loss": 0.6481, "step": 14459 }, { "epoch": 0.37, "grad_norm": 2.2576332092285156, "learning_rate": 1.4507626036906389e-05, "loss": 0.6045, "step": 14460 }, { "epoch": 0.37, "grad_norm": 1.1899303197860718, "learning_rate": 1.4506884999438464e-05, "loss": 0.4983, "step": 14461 }, { "epoch": 0.37, "grad_norm": 1.2864729166030884, "learning_rate": 1.450614393091192e-05, "loss": 0.4299, "step": 14462 }, { "epoch": 0.37, "grad_norm": 2.9522061347961426, "learning_rate": 1.4505402831331866e-05, "loss": 0.6107, "step": 14463 }, { "epoch": 0.37, "grad_norm": 3.15826678276062, "learning_rate": 1.4504661700703405e-05, "loss": 0.6623, "step": 14464 }, { "epoch": 0.37, "grad_norm": 2.1561450958251953, "learning_rate": 1.4503920539031645e-05, "loss": 0.6458, "step": 14465 }, { "epoch": 0.37, "grad_norm": 1.2668670415878296, "learning_rate": 1.4503179346321694e-05, "loss": 0.5405, "step": 14466 }, { "epoch": 0.37, "grad_norm": 2.1139180660247803, "learning_rate": 1.4502438122578658e-05, "loss": 0.7687, "step": 14467 }, { "epoch": 0.37, "grad_norm": 1.82655668258667, "learning_rate": 1.4501696867807653e-05, "loss": 0.6122, "step": 14468 }, { "epoch": 0.37, "grad_norm": 3.1675474643707275, "learning_rate": 1.4500955582013778e-05, "loss": 0.4907, "step": 14469 }, { "epoch": 0.37, "grad_norm": 1.7467408180236816, "learning_rate": 1.4500214265202145e-05, "loss": 0.6452, "step": 14470 }, { "epoch": 0.37, "grad_norm": 1.9271395206451416, "learning_rate": 1.4499472917377864e-05, "loss": 0.7082, "step": 14471 }, { "epoch": 0.37, "grad_norm": 1.9762417078018188, "learning_rate": 1.449873153854604e-05, "loss": 0.5405, "step": 14472 }, { "epoch": 0.37, "grad_norm": 5.09847354888916, "learning_rate": 1.4497990128711786e-05, "loss": 0.6559, "step": 14473 }, { "epoch": 0.37, "grad_norm": 1.1630831956863403, "learning_rate": 1.449724868788021e-05, "loss": 0.5899, "step": 14474 }, { "epoch": 0.37, "grad_norm": 5.672711372375488, "learning_rate": 1.449650721605642e-05, "loss": 0.7392, "step": 14475 }, { "epoch": 0.37, "grad_norm": 1.3595126867294312, "learning_rate": 1.4495765713245532e-05, "loss": 0.5277, "step": 14476 }, { "epoch": 0.37, "grad_norm": 2.1676957607269287, "learning_rate": 1.4495024179452646e-05, "loss": 0.6221, "step": 14477 }, { "epoch": 0.37, "grad_norm": 1.812345266342163, "learning_rate": 1.4494282614682879e-05, "loss": 0.6851, "step": 14478 }, { "epoch": 0.37, "grad_norm": 3.8617985248565674, "learning_rate": 1.4493541018941339e-05, "loss": 0.5708, "step": 14479 }, { "epoch": 0.37, "grad_norm": 2.853773832321167, "learning_rate": 1.4492799392233141e-05, "loss": 0.6154, "step": 14480 }, { "epoch": 0.37, "grad_norm": 1.1399060487747192, "learning_rate": 1.4492057734563388e-05, "loss": 0.6065, "step": 14481 }, { "epoch": 0.37, "grad_norm": 1.312404990196228, "learning_rate": 1.4491316045937199e-05, "loss": 0.5351, "step": 14482 }, { "epoch": 0.37, "grad_norm": 2.7990758419036865, "learning_rate": 1.4490574326359679e-05, "loss": 0.7178, "step": 14483 }, { "epoch": 0.37, "grad_norm": 2.0371313095092773, "learning_rate": 1.4489832575835941e-05, "loss": 0.7669, "step": 14484 }, { "epoch": 0.37, "grad_norm": 1.7484996318817139, "learning_rate": 1.4489090794371099e-05, "loss": 0.7202, "step": 14485 }, { "epoch": 0.37, "grad_norm": 1.3708800077438354, "learning_rate": 1.4488348981970264e-05, "loss": 0.5702, "step": 14486 }, { "epoch": 0.37, "grad_norm": 1.4326171875, "learning_rate": 1.4487607138638549e-05, "loss": 0.4815, "step": 14487 }, { "epoch": 0.37, "grad_norm": 2.255093812942505, "learning_rate": 1.4486865264381063e-05, "loss": 0.6888, "step": 14488 }, { "epoch": 0.37, "grad_norm": 1.3438513278961182, "learning_rate": 1.4486123359202922e-05, "loss": 0.6068, "step": 14489 }, { "epoch": 0.37, "grad_norm": 3.138925075531006, "learning_rate": 1.4485381423109235e-05, "loss": 0.6362, "step": 14490 }, { "epoch": 0.37, "grad_norm": 1.141091227531433, "learning_rate": 1.4484639456105116e-05, "loss": 0.6409, "step": 14491 }, { "epoch": 0.37, "grad_norm": 1.3468233346939087, "learning_rate": 1.4483897458195683e-05, "loss": 0.4262, "step": 14492 }, { "epoch": 0.37, "grad_norm": 0.9628923535346985, "learning_rate": 1.4483155429386045e-05, "loss": 0.4402, "step": 14493 }, { "epoch": 0.37, "grad_norm": 1.8034101724624634, "learning_rate": 1.4482413369681317e-05, "loss": 0.6053, "step": 14494 }, { "epoch": 0.37, "grad_norm": 1.1181176900863647, "learning_rate": 1.448167127908661e-05, "loss": 0.535, "step": 14495 }, { "epoch": 0.37, "grad_norm": 4.360827445983887, "learning_rate": 1.4480929157607043e-05, "loss": 0.6192, "step": 14496 }, { "epoch": 0.37, "grad_norm": 4.506994724273682, "learning_rate": 1.4480187005247725e-05, "loss": 0.658, "step": 14497 }, { "epoch": 0.37, "grad_norm": 2.5090603828430176, "learning_rate": 1.4479444822013774e-05, "loss": 0.6408, "step": 14498 }, { "epoch": 0.37, "grad_norm": 2.0135247707366943, "learning_rate": 1.4478702607910303e-05, "loss": 0.6198, "step": 14499 }, { "epoch": 0.37, "grad_norm": 2.873476505279541, "learning_rate": 1.4477960362942427e-05, "loss": 0.6329, "step": 14500 }, { "epoch": 0.37, "grad_norm": 1.8671780824661255, "learning_rate": 1.4477218087115264e-05, "loss": 0.6899, "step": 14501 }, { "epoch": 0.37, "grad_norm": 1.8495529890060425, "learning_rate": 1.4476475780433922e-05, "loss": 0.5761, "step": 14502 }, { "epoch": 0.37, "grad_norm": 1.4514471292495728, "learning_rate": 1.4475733442903526e-05, "loss": 0.6647, "step": 14503 }, { "epoch": 0.37, "grad_norm": 1.7871134281158447, "learning_rate": 1.4474991074529185e-05, "loss": 0.7394, "step": 14504 }, { "epoch": 0.37, "grad_norm": 1.4872798919677734, "learning_rate": 1.4474248675316017e-05, "loss": 0.6865, "step": 14505 }, { "epoch": 0.37, "grad_norm": 1.259018063545227, "learning_rate": 1.447350624526914e-05, "loss": 0.389, "step": 14506 }, { "epoch": 0.37, "grad_norm": 1.8301937580108643, "learning_rate": 1.4472763784393667e-05, "loss": 0.6257, "step": 14507 }, { "epoch": 0.37, "grad_norm": 1.644848108291626, "learning_rate": 1.4472021292694714e-05, "loss": 0.4744, "step": 14508 }, { "epoch": 0.37, "grad_norm": 5.990988731384277, "learning_rate": 1.4471278770177404e-05, "loss": 0.4746, "step": 14509 }, { "epoch": 0.37, "grad_norm": 1.4632012844085693, "learning_rate": 1.4470536216846846e-05, "loss": 0.6729, "step": 14510 }, { "epoch": 0.37, "grad_norm": 3.8585920333862305, "learning_rate": 1.4469793632708164e-05, "loss": 0.5863, "step": 14511 }, { "epoch": 0.37, "grad_norm": 1.557419776916504, "learning_rate": 1.4469051017766471e-05, "loss": 0.5058, "step": 14512 }, { "epoch": 0.37, "grad_norm": 3.703326940536499, "learning_rate": 1.4468308372026888e-05, "loss": 0.6721, "step": 14513 }, { "epoch": 0.37, "grad_norm": 10.513847351074219, "learning_rate": 1.4467565695494527e-05, "loss": 0.6296, "step": 14514 }, { "epoch": 0.37, "grad_norm": 1.2081409692764282, "learning_rate": 1.4466822988174512e-05, "loss": 0.4586, "step": 14515 }, { "epoch": 0.37, "grad_norm": 3.970215320587158, "learning_rate": 1.4466080250071962e-05, "loss": 0.6296, "step": 14516 }, { "epoch": 0.37, "grad_norm": 3.0312747955322266, "learning_rate": 1.4465337481191987e-05, "loss": 0.8146, "step": 14517 }, { "epoch": 0.37, "grad_norm": 2.6077678203582764, "learning_rate": 1.4464594681539717e-05, "loss": 0.8114, "step": 14518 }, { "epoch": 0.37, "grad_norm": 3.7202906608581543, "learning_rate": 1.4463851851120262e-05, "loss": 0.6599, "step": 14519 }, { "epoch": 0.37, "grad_norm": 3.831348419189453, "learning_rate": 1.4463108989938746e-05, "loss": 0.5803, "step": 14520 }, { "epoch": 0.37, "grad_norm": 1.617753267288208, "learning_rate": 1.4462366098000287e-05, "loss": 0.709, "step": 14521 }, { "epoch": 0.37, "grad_norm": 6.410208225250244, "learning_rate": 1.4461623175310004e-05, "loss": 0.7337, "step": 14522 }, { "epoch": 0.37, "grad_norm": 1.2265655994415283, "learning_rate": 1.4460880221873019e-05, "loss": 0.5887, "step": 14523 }, { "epoch": 0.37, "grad_norm": 1.6237133741378784, "learning_rate": 1.4460137237694447e-05, "loss": 0.4875, "step": 14524 }, { "epoch": 0.37, "grad_norm": 1.5515615940093994, "learning_rate": 1.4459394222779412e-05, "loss": 0.6158, "step": 14525 }, { "epoch": 0.37, "grad_norm": 0.9476109147071838, "learning_rate": 1.4458651177133036e-05, "loss": 0.5693, "step": 14526 }, { "epoch": 0.37, "grad_norm": 1.5711162090301514, "learning_rate": 1.4457908100760433e-05, "loss": 0.7125, "step": 14527 }, { "epoch": 0.37, "grad_norm": 2.4060680866241455, "learning_rate": 1.4457164993666732e-05, "loss": 0.5188, "step": 14528 }, { "epoch": 0.37, "grad_norm": 1.5100226402282715, "learning_rate": 1.4456421855857049e-05, "loss": 0.5193, "step": 14529 }, { "epoch": 0.37, "grad_norm": 4.348368167877197, "learning_rate": 1.4455678687336507e-05, "loss": 0.6768, "step": 14530 }, { "epoch": 0.37, "grad_norm": 4.4780049324035645, "learning_rate": 1.4454935488110224e-05, "loss": 0.6104, "step": 14531 }, { "epoch": 0.37, "grad_norm": 5.227052688598633, "learning_rate": 1.4454192258183326e-05, "loss": 0.7972, "step": 14532 }, { "epoch": 0.37, "grad_norm": 1.7854938507080078, "learning_rate": 1.4453448997560937e-05, "loss": 0.6912, "step": 14533 }, { "epoch": 0.37, "grad_norm": 1.8372913599014282, "learning_rate": 1.4452705706248172e-05, "loss": 0.6772, "step": 14534 }, { "epoch": 0.37, "grad_norm": 4.6935200691223145, "learning_rate": 1.4451962384250155e-05, "loss": 0.5088, "step": 14535 }, { "epoch": 0.37, "grad_norm": 1.951927900314331, "learning_rate": 1.4451219031572013e-05, "loss": 0.5355, "step": 14536 }, { "epoch": 0.37, "grad_norm": 1.5387117862701416, "learning_rate": 1.4450475648218865e-05, "loss": 0.6014, "step": 14537 }, { "epoch": 0.37, "grad_norm": 1.7274303436279297, "learning_rate": 1.4449732234195834e-05, "loss": 0.6758, "step": 14538 }, { "epoch": 0.37, "grad_norm": 1.846318006515503, "learning_rate": 1.4448988789508045e-05, "loss": 0.5424, "step": 14539 }, { "epoch": 0.37, "grad_norm": 1.6119548082351685, "learning_rate": 1.4448245314160624e-05, "loss": 0.5396, "step": 14540 }, { "epoch": 0.37, "grad_norm": 1.7174111604690552, "learning_rate": 1.4447501808158686e-05, "loss": 0.5255, "step": 14541 }, { "epoch": 0.37, "grad_norm": 2.0077881813049316, "learning_rate": 1.4446758271507362e-05, "loss": 0.6319, "step": 14542 }, { "epoch": 0.37, "grad_norm": 3.090672254562378, "learning_rate": 1.4446014704211773e-05, "loss": 0.4972, "step": 14543 }, { "epoch": 0.37, "grad_norm": 1.4767037630081177, "learning_rate": 1.4445271106277042e-05, "loss": 0.5524, "step": 14544 }, { "epoch": 0.37, "grad_norm": 2.0456089973449707, "learning_rate": 1.4444527477708299e-05, "loss": 0.5792, "step": 14545 }, { "epoch": 0.37, "grad_norm": 1.3427242040634155, "learning_rate": 1.4443783818510662e-05, "loss": 0.6124, "step": 14546 }, { "epoch": 0.37, "grad_norm": 3.053628444671631, "learning_rate": 1.4443040128689262e-05, "loss": 0.606, "step": 14547 }, { "epoch": 0.37, "grad_norm": 6.422877788543701, "learning_rate": 1.4442296408249216e-05, "loss": 0.6817, "step": 14548 }, { "epoch": 0.37, "grad_norm": 1.957809567451477, "learning_rate": 1.4441552657195657e-05, "loss": 0.6025, "step": 14549 }, { "epoch": 0.37, "grad_norm": 1.7390345335006714, "learning_rate": 1.4440808875533708e-05, "loss": 0.4344, "step": 14550 }, { "epoch": 0.37, "grad_norm": 1.7509064674377441, "learning_rate": 1.4440065063268494e-05, "loss": 0.5802, "step": 14551 }, { "epoch": 0.37, "grad_norm": 1.376242995262146, "learning_rate": 1.443932122040514e-05, "loss": 0.5855, "step": 14552 }, { "epoch": 0.37, "grad_norm": 1.557560682296753, "learning_rate": 1.4438577346948774e-05, "loss": 0.5504, "step": 14553 }, { "epoch": 0.37, "grad_norm": 6.037519931793213, "learning_rate": 1.443783344290452e-05, "loss": 0.7104, "step": 14554 }, { "epoch": 0.37, "grad_norm": 2.087944984436035, "learning_rate": 1.4437089508277508e-05, "loss": 0.5168, "step": 14555 }, { "epoch": 0.37, "grad_norm": 2.0129969120025635, "learning_rate": 1.4436345543072863e-05, "loss": 0.5079, "step": 14556 }, { "epoch": 0.37, "grad_norm": 1.4602642059326172, "learning_rate": 1.4435601547295713e-05, "loss": 0.4652, "step": 14557 }, { "epoch": 0.37, "grad_norm": 3.009509801864624, "learning_rate": 1.4434857520951177e-05, "loss": 0.5831, "step": 14558 }, { "epoch": 0.37, "grad_norm": 2.1024837493896484, "learning_rate": 1.4434113464044397e-05, "loss": 0.7152, "step": 14559 }, { "epoch": 0.37, "grad_norm": 1.6811609268188477, "learning_rate": 1.4433369376580489e-05, "loss": 0.7286, "step": 14560 }, { "epoch": 0.37, "grad_norm": 3.4442384243011475, "learning_rate": 1.4432625258564583e-05, "loss": 0.6519, "step": 14561 }, { "epoch": 0.37, "grad_norm": 5.479091167449951, "learning_rate": 1.4431881110001814e-05, "loss": 0.5983, "step": 14562 }, { "epoch": 0.37, "grad_norm": 1.7065446376800537, "learning_rate": 1.44311369308973e-05, "loss": 0.7152, "step": 14563 }, { "epoch": 0.37, "grad_norm": 2.6030092239379883, "learning_rate": 1.4430392721256176e-05, "loss": 0.8402, "step": 14564 }, { "epoch": 0.37, "grad_norm": 3.393018960952759, "learning_rate": 1.442964848108357e-05, "loss": 0.7574, "step": 14565 }, { "epoch": 0.37, "grad_norm": 2.19092059135437, "learning_rate": 1.442890421038461e-05, "loss": 0.499, "step": 14566 }, { "epoch": 0.37, "grad_norm": 1.2095494270324707, "learning_rate": 1.4428159909164421e-05, "loss": 0.6661, "step": 14567 }, { "epoch": 0.37, "grad_norm": 4.216270446777344, "learning_rate": 1.4427415577428136e-05, "loss": 0.4985, "step": 14568 }, { "epoch": 0.37, "grad_norm": 4.82743501663208, "learning_rate": 1.4426671215180888e-05, "loss": 0.5521, "step": 14569 }, { "epoch": 0.37, "grad_norm": 3.2885220050811768, "learning_rate": 1.44259268224278e-05, "loss": 0.6313, "step": 14570 }, { "epoch": 0.37, "grad_norm": 3.711209535598755, "learning_rate": 1.442518239917401e-05, "loss": 0.6514, "step": 14571 }, { "epoch": 0.37, "grad_norm": 1.4043220281600952, "learning_rate": 1.4424437945424638e-05, "loss": 0.5915, "step": 14572 }, { "epoch": 0.37, "grad_norm": 4.247926235198975, "learning_rate": 1.442369346118482e-05, "loss": 0.6129, "step": 14573 }, { "epoch": 0.37, "grad_norm": 2.859294891357422, "learning_rate": 1.4422948946459687e-05, "loss": 0.7147, "step": 14574 }, { "epoch": 0.37, "grad_norm": 1.616597294807434, "learning_rate": 1.4422204401254369e-05, "loss": 0.511, "step": 14575 }, { "epoch": 0.37, "grad_norm": 1.563197374343872, "learning_rate": 1.4421459825573996e-05, "loss": 0.5988, "step": 14576 }, { "epoch": 0.37, "grad_norm": 1.9116061925888062, "learning_rate": 1.4420715219423702e-05, "loss": 0.6068, "step": 14577 }, { "epoch": 0.37, "grad_norm": 3.162911891937256, "learning_rate": 1.4419970582808613e-05, "loss": 0.5426, "step": 14578 }, { "epoch": 0.37, "grad_norm": 2.711165189743042, "learning_rate": 1.4419225915733866e-05, "loss": 0.4823, "step": 14579 }, { "epoch": 0.37, "grad_norm": 2.3145172595977783, "learning_rate": 1.4418481218204587e-05, "loss": 0.5248, "step": 14580 }, { "epoch": 0.37, "grad_norm": 8.594217300415039, "learning_rate": 1.4417736490225913e-05, "loss": 0.7079, "step": 14581 }, { "epoch": 0.37, "grad_norm": 1.1455191373825073, "learning_rate": 1.4416991731802976e-05, "loss": 0.7408, "step": 14582 }, { "epoch": 0.37, "grad_norm": 1.8194057941436768, "learning_rate": 1.4416246942940904e-05, "loss": 0.6401, "step": 14583 }, { "epoch": 0.37, "grad_norm": 1.493476152420044, "learning_rate": 1.4415502123644835e-05, "loss": 0.6207, "step": 14584 }, { "epoch": 0.37, "grad_norm": 3.338937997817993, "learning_rate": 1.44147572739199e-05, "loss": 0.8638, "step": 14585 }, { "epoch": 0.37, "grad_norm": 3.5741586685180664, "learning_rate": 1.441401239377123e-05, "loss": 0.5831, "step": 14586 }, { "epoch": 0.37, "grad_norm": 4.352028846740723, "learning_rate": 1.4413267483203961e-05, "loss": 0.8318, "step": 14587 }, { "epoch": 0.37, "grad_norm": 1.026899814605713, "learning_rate": 1.4412522542223221e-05, "loss": 0.5179, "step": 14588 }, { "epoch": 0.37, "grad_norm": 1.642122507095337, "learning_rate": 1.4411777570834153e-05, "loss": 0.5204, "step": 14589 }, { "epoch": 0.37, "grad_norm": 2.933143377304077, "learning_rate": 1.4411032569041883e-05, "loss": 0.4623, "step": 14590 }, { "epoch": 0.37, "grad_norm": 4.1586480140686035, "learning_rate": 1.441028753685155e-05, "loss": 0.6325, "step": 14591 }, { "epoch": 0.37, "grad_norm": 4.428946018218994, "learning_rate": 1.4409542474268285e-05, "loss": 0.6288, "step": 14592 }, { "epoch": 0.37, "grad_norm": 1.6596187353134155, "learning_rate": 1.4408797381297221e-05, "loss": 0.5247, "step": 14593 }, { "epoch": 0.37, "grad_norm": 2.507481575012207, "learning_rate": 1.4408052257943502e-05, "loss": 0.5384, "step": 14594 }, { "epoch": 0.37, "grad_norm": 2.308917999267578, "learning_rate": 1.440730710421225e-05, "loss": 0.5566, "step": 14595 }, { "epoch": 0.37, "grad_norm": 2.9291889667510986, "learning_rate": 1.4406561920108608e-05, "loss": 0.626, "step": 14596 }, { "epoch": 0.37, "grad_norm": 4.986640453338623, "learning_rate": 1.4405816705637713e-05, "loss": 0.7226, "step": 14597 }, { "epoch": 0.37, "grad_norm": 1.8690707683563232, "learning_rate": 1.4405071460804695e-05, "loss": 0.68, "step": 14598 }, { "epoch": 0.37, "grad_norm": 1.4516774415969849, "learning_rate": 1.4404326185614688e-05, "loss": 0.6565, "step": 14599 }, { "epoch": 0.37, "grad_norm": 8.238450050354004, "learning_rate": 1.4403580880072835e-05, "loss": 0.5131, "step": 14600 }, { "epoch": 0.37, "grad_norm": 1.5463229417800903, "learning_rate": 1.4402835544184273e-05, "loss": 0.5696, "step": 14601 }, { "epoch": 0.37, "grad_norm": 2.665125846862793, "learning_rate": 1.440209017795413e-05, "loss": 0.6365, "step": 14602 }, { "epoch": 0.37, "grad_norm": 2.0660789012908936, "learning_rate": 1.4401344781387549e-05, "loss": 0.6222, "step": 14603 }, { "epoch": 0.37, "grad_norm": 2.144773244857788, "learning_rate": 1.4400599354489661e-05, "loss": 0.7472, "step": 14604 }, { "epoch": 0.37, "grad_norm": 1.231695294380188, "learning_rate": 1.4399853897265612e-05, "loss": 0.5825, "step": 14605 }, { "epoch": 0.37, "grad_norm": 2.0460736751556396, "learning_rate": 1.4399108409720532e-05, "loss": 0.5046, "step": 14606 }, { "epoch": 0.37, "grad_norm": 4.256237030029297, "learning_rate": 1.439836289185956e-05, "loss": 0.6359, "step": 14607 }, { "epoch": 0.37, "grad_norm": 3.4058666229248047, "learning_rate": 1.4397617343687837e-05, "loss": 0.5718, "step": 14608 }, { "epoch": 0.37, "grad_norm": 2.070565700531006, "learning_rate": 1.4396871765210496e-05, "loss": 0.5887, "step": 14609 }, { "epoch": 0.37, "grad_norm": 4.734989643096924, "learning_rate": 1.439612615643268e-05, "loss": 0.5521, "step": 14610 }, { "epoch": 0.37, "grad_norm": 1.8086551427841187, "learning_rate": 1.4395380517359521e-05, "loss": 0.5573, "step": 14611 }, { "epoch": 0.37, "grad_norm": 6.211372375488281, "learning_rate": 1.439463484799616e-05, "loss": 0.7287, "step": 14612 }, { "epoch": 0.37, "grad_norm": 1.6355818510055542, "learning_rate": 1.4393889148347739e-05, "loss": 0.5881, "step": 14613 }, { "epoch": 0.37, "grad_norm": 1.4294379949569702, "learning_rate": 1.4393143418419396e-05, "loss": 0.5911, "step": 14614 }, { "epoch": 0.37, "grad_norm": 1.3826199769973755, "learning_rate": 1.4392397658216269e-05, "loss": 0.4637, "step": 14615 }, { "epoch": 0.37, "grad_norm": 2.639881134033203, "learning_rate": 1.4391651867743493e-05, "loss": 0.6856, "step": 14616 }, { "epoch": 0.37, "grad_norm": 2.7796711921691895, "learning_rate": 1.439090604700621e-05, "loss": 0.5194, "step": 14617 }, { "epoch": 0.37, "grad_norm": 1.8109095096588135, "learning_rate": 1.4390160196009565e-05, "loss": 0.5554, "step": 14618 }, { "epoch": 0.37, "grad_norm": 2.6508655548095703, "learning_rate": 1.4389414314758693e-05, "loss": 0.6277, "step": 14619 }, { "epoch": 0.37, "grad_norm": 2.271878957748413, "learning_rate": 1.4388668403258738e-05, "loss": 0.6367, "step": 14620 }, { "epoch": 0.37, "grad_norm": 1.7369641065597534, "learning_rate": 1.4387922461514834e-05, "loss": 0.6771, "step": 14621 }, { "epoch": 0.37, "grad_norm": 1.3611313104629517, "learning_rate": 1.4387176489532129e-05, "loss": 0.6822, "step": 14622 }, { "epoch": 0.37, "grad_norm": 5.958657741546631, "learning_rate": 1.4386430487315756e-05, "loss": 0.5737, "step": 14623 }, { "epoch": 0.37, "grad_norm": 2.268470048904419, "learning_rate": 1.4385684454870862e-05, "loss": 0.4253, "step": 14624 }, { "epoch": 0.37, "grad_norm": 1.2439740896224976, "learning_rate": 1.4384938392202586e-05, "loss": 0.5811, "step": 14625 }, { "epoch": 0.37, "grad_norm": 1.6955715417861938, "learning_rate": 1.4384192299316067e-05, "loss": 0.6106, "step": 14626 }, { "epoch": 0.37, "grad_norm": 2.7288882732391357, "learning_rate": 1.438344617621645e-05, "loss": 0.5062, "step": 14627 }, { "epoch": 0.37, "grad_norm": 3.0768063068389893, "learning_rate": 1.4382700022908877e-05, "loss": 0.5282, "step": 14628 }, { "epoch": 0.37, "grad_norm": 2.0766608715057373, "learning_rate": 1.4381953839398489e-05, "loss": 0.8421, "step": 14629 }, { "epoch": 0.37, "grad_norm": 1.3886350393295288, "learning_rate": 1.4381207625690427e-05, "loss": 0.6175, "step": 14630 }, { "epoch": 0.38, "grad_norm": 1.5827115774154663, "learning_rate": 1.4380461381789837e-05, "loss": 0.4113, "step": 14631 }, { "epoch": 0.38, "grad_norm": 3.4349801540374756, "learning_rate": 1.4379715107701857e-05, "loss": 0.6692, "step": 14632 }, { "epoch": 0.38, "grad_norm": 2.081644058227539, "learning_rate": 1.4378968803431631e-05, "loss": 0.5271, "step": 14633 }, { "epoch": 0.38, "grad_norm": 1.9876787662506104, "learning_rate": 1.4378222468984309e-05, "loss": 0.5963, "step": 14634 }, { "epoch": 0.38, "grad_norm": 1.9895700216293335, "learning_rate": 1.4377476104365022e-05, "loss": 0.6018, "step": 14635 }, { "epoch": 0.38, "grad_norm": 3.055487871170044, "learning_rate": 1.437672970957892e-05, "loss": 0.4822, "step": 14636 }, { "epoch": 0.38, "grad_norm": 1.6727359294891357, "learning_rate": 1.4375983284631152e-05, "loss": 0.6507, "step": 14637 }, { "epoch": 0.38, "grad_norm": 2.2415475845336914, "learning_rate": 1.4375236829526852e-05, "loss": 0.6461, "step": 14638 }, { "epoch": 0.38, "grad_norm": 1.7936303615570068, "learning_rate": 1.437449034427117e-05, "loss": 0.5731, "step": 14639 }, { "epoch": 0.38, "grad_norm": 1.4080084562301636, "learning_rate": 1.437374382886925e-05, "loss": 0.467, "step": 14640 }, { "epoch": 0.38, "grad_norm": 6.104716777801514, "learning_rate": 1.437299728332623e-05, "loss": 0.5141, "step": 14641 }, { "epoch": 0.38, "grad_norm": 1.6096512079238892, "learning_rate": 1.4372250707647268e-05, "loss": 0.5375, "step": 14642 }, { "epoch": 0.38, "grad_norm": 3.972968101501465, "learning_rate": 1.4371504101837496e-05, "loss": 0.4802, "step": 14643 }, { "epoch": 0.38, "grad_norm": 2.071200132369995, "learning_rate": 1.4370757465902067e-05, "loss": 0.5378, "step": 14644 }, { "epoch": 0.38, "grad_norm": 4.307863712310791, "learning_rate": 1.4370010799846121e-05, "loss": 0.5455, "step": 14645 }, { "epoch": 0.38, "grad_norm": 1.7680028676986694, "learning_rate": 1.4369264103674806e-05, "loss": 0.6501, "step": 14646 }, { "epoch": 0.38, "grad_norm": 0.9953347444534302, "learning_rate": 1.436851737739327e-05, "loss": 0.5767, "step": 14647 }, { "epoch": 0.38, "grad_norm": 0.942155659198761, "learning_rate": 1.4367770621006653e-05, "loss": 0.3816, "step": 14648 }, { "epoch": 0.38, "grad_norm": 1.5148614645004272, "learning_rate": 1.436702383452011e-05, "loss": 0.5798, "step": 14649 }, { "epoch": 0.38, "grad_norm": 1.2329216003417969, "learning_rate": 1.436627701793878e-05, "loss": 0.5755, "step": 14650 }, { "epoch": 0.38, "grad_norm": 1.5729109048843384, "learning_rate": 1.4365530171267814e-05, "loss": 0.5342, "step": 14651 }, { "epoch": 0.38, "grad_norm": 1.7368310689926147, "learning_rate": 1.4364783294512351e-05, "loss": 0.5305, "step": 14652 }, { "epoch": 0.38, "grad_norm": 1.7547123432159424, "learning_rate": 1.436403638767755e-05, "loss": 0.6817, "step": 14653 }, { "epoch": 0.38, "grad_norm": 1.8061652183532715, "learning_rate": 1.436328945076855e-05, "loss": 0.6244, "step": 14654 }, { "epoch": 0.38, "grad_norm": 2.3678171634674072, "learning_rate": 1.4362542483790498e-05, "loss": 0.6371, "step": 14655 }, { "epoch": 0.38, "grad_norm": 1.2165491580963135, "learning_rate": 1.4361795486748547e-05, "loss": 0.4852, "step": 14656 }, { "epoch": 0.38, "grad_norm": 1.7998578548431396, "learning_rate": 1.436104845964784e-05, "loss": 0.5291, "step": 14657 }, { "epoch": 0.38, "grad_norm": 0.8817337155342102, "learning_rate": 1.4360301402493526e-05, "loss": 0.4042, "step": 14658 }, { "epoch": 0.38, "grad_norm": 1.6493310928344727, "learning_rate": 1.4359554315290759e-05, "loss": 0.6289, "step": 14659 }, { "epoch": 0.38, "grad_norm": 4.106941223144531, "learning_rate": 1.4358807198044678e-05, "loss": 0.6611, "step": 14660 }, { "epoch": 0.38, "grad_norm": 1.304315209388733, "learning_rate": 1.4358060050760438e-05, "loss": 0.4155, "step": 14661 }, { "epoch": 0.38, "grad_norm": 1.6087374687194824, "learning_rate": 1.4357312873443186e-05, "loss": 0.6122, "step": 14662 }, { "epoch": 0.38, "grad_norm": 1.5831010341644287, "learning_rate": 1.435656566609807e-05, "loss": 0.6449, "step": 14663 }, { "epoch": 0.38, "grad_norm": 5.265295028686523, "learning_rate": 1.4355818428730241e-05, "loss": 0.676, "step": 14664 }, { "epoch": 0.38, "grad_norm": 1.9699116945266724, "learning_rate": 1.4355071161344846e-05, "loss": 0.7562, "step": 14665 }, { "epoch": 0.38, "grad_norm": 2.135575532913208, "learning_rate": 1.4354323863947042e-05, "loss": 0.7028, "step": 14666 }, { "epoch": 0.38, "grad_norm": 1.3309217691421509, "learning_rate": 1.435357653654197e-05, "loss": 0.5356, "step": 14667 }, { "epoch": 0.38, "grad_norm": 0.870218813419342, "learning_rate": 1.4352829179134787e-05, "loss": 0.5555, "step": 14668 }, { "epoch": 0.38, "grad_norm": 2.188068151473999, "learning_rate": 1.4352081791730634e-05, "loss": 0.5016, "step": 14669 }, { "epoch": 0.38, "grad_norm": 6.542667865753174, "learning_rate": 1.4351334374334672e-05, "loss": 0.5996, "step": 14670 }, { "epoch": 0.38, "grad_norm": 3.850053310394287, "learning_rate": 1.4350586926952045e-05, "loss": 0.7512, "step": 14671 }, { "epoch": 0.38, "grad_norm": 1.2341049909591675, "learning_rate": 1.4349839449587908e-05, "loss": 0.4396, "step": 14672 }, { "epoch": 0.38, "grad_norm": 3.8236477375030518, "learning_rate": 1.4349091942247411e-05, "loss": 0.5717, "step": 14673 }, { "epoch": 0.38, "grad_norm": 8.740253448486328, "learning_rate": 1.4348344404935703e-05, "loss": 0.6265, "step": 14674 }, { "epoch": 0.38, "grad_norm": 1.5386924743652344, "learning_rate": 1.434759683765794e-05, "loss": 0.5147, "step": 14675 }, { "epoch": 0.38, "grad_norm": 3.0745229721069336, "learning_rate": 1.4346849240419267e-05, "loss": 0.9147, "step": 14676 }, { "epoch": 0.38, "grad_norm": 3.8446366786956787, "learning_rate": 1.4346101613224841e-05, "loss": 0.7792, "step": 14677 }, { "epoch": 0.38, "grad_norm": 1.7805882692337036, "learning_rate": 1.4345353956079814e-05, "loss": 0.5883, "step": 14678 }, { "epoch": 0.38, "grad_norm": 6.901033401489258, "learning_rate": 1.4344606268989339e-05, "loss": 0.6307, "step": 14679 }, { "epoch": 0.38, "grad_norm": 1.176007866859436, "learning_rate": 1.4343858551958564e-05, "loss": 0.4724, "step": 14680 }, { "epoch": 0.38, "grad_norm": 1.8699634075164795, "learning_rate": 1.4343110804992648e-05, "loss": 0.553, "step": 14681 }, { "epoch": 0.38, "grad_norm": 2.3583180904388428, "learning_rate": 1.4342363028096737e-05, "loss": 0.4983, "step": 14682 }, { "epoch": 0.38, "grad_norm": 2.8331844806671143, "learning_rate": 1.4341615221275993e-05, "loss": 0.7728, "step": 14683 }, { "epoch": 0.38, "grad_norm": 1.6676748991012573, "learning_rate": 1.434086738453556e-05, "loss": 0.6434, "step": 14684 }, { "epoch": 0.38, "grad_norm": 2.055725336074829, "learning_rate": 1.43401195178806e-05, "loss": 0.5294, "step": 14685 }, { "epoch": 0.38, "grad_norm": 1.3221758604049683, "learning_rate": 1.433937162131626e-05, "loss": 0.5649, "step": 14686 }, { "epoch": 0.38, "grad_norm": 1.0064196586608887, "learning_rate": 1.43386236948477e-05, "loss": 0.6204, "step": 14687 }, { "epoch": 0.38, "grad_norm": 1.6433438062667847, "learning_rate": 1.433787573848007e-05, "loss": 0.5541, "step": 14688 }, { "epoch": 0.38, "grad_norm": 8.315400123596191, "learning_rate": 1.4337127752218525e-05, "loss": 0.5211, "step": 14689 }, { "epoch": 0.38, "grad_norm": 1.625427007675171, "learning_rate": 1.433637973606822e-05, "loss": 0.6736, "step": 14690 }, { "epoch": 0.38, "grad_norm": 6.744244575500488, "learning_rate": 1.433563169003431e-05, "loss": 0.5575, "step": 14691 }, { "epoch": 0.38, "grad_norm": 3.355811834335327, "learning_rate": 1.4334883614121956e-05, "loss": 0.6733, "step": 14692 }, { "epoch": 0.38, "grad_norm": 1.160235047340393, "learning_rate": 1.43341355083363e-05, "loss": 0.5694, "step": 14693 }, { "epoch": 0.38, "grad_norm": 1.094104528427124, "learning_rate": 1.4333387372682508e-05, "loss": 0.6433, "step": 14694 }, { "epoch": 0.38, "grad_norm": 1.3923325538635254, "learning_rate": 1.4332639207165733e-05, "loss": 0.6658, "step": 14695 }, { "epoch": 0.38, "grad_norm": 1.6550042629241943, "learning_rate": 1.4331891011791132e-05, "loss": 0.4988, "step": 14696 }, { "epoch": 0.38, "grad_norm": 1.881067156791687, "learning_rate": 1.4331142786563857e-05, "loss": 0.6405, "step": 14697 }, { "epoch": 0.38, "grad_norm": 1.916646122932434, "learning_rate": 1.4330394531489069e-05, "loss": 0.6416, "step": 14698 }, { "epoch": 0.38, "grad_norm": 2.211714029312134, "learning_rate": 1.432964624657192e-05, "loss": 0.5333, "step": 14699 }, { "epoch": 0.38, "grad_norm": 7.446270942687988, "learning_rate": 1.4328897931817572e-05, "loss": 0.6711, "step": 14700 }, { "epoch": 0.38, "grad_norm": 3.6598260402679443, "learning_rate": 1.4328149587231178e-05, "loss": 0.5548, "step": 14701 }, { "epoch": 0.38, "grad_norm": 1.5150970220565796, "learning_rate": 1.4327401212817898e-05, "loss": 0.5762, "step": 14702 }, { "epoch": 0.38, "grad_norm": 2.949552059173584, "learning_rate": 1.4326652808582884e-05, "loss": 0.5319, "step": 14703 }, { "epoch": 0.38, "grad_norm": 1.6532297134399414, "learning_rate": 1.4325904374531298e-05, "loss": 0.7577, "step": 14704 }, { "epoch": 0.38, "grad_norm": 2.4135470390319824, "learning_rate": 1.4325155910668298e-05, "loss": 0.5965, "step": 14705 }, { "epoch": 0.38, "grad_norm": 1.3890328407287598, "learning_rate": 1.432440741699904e-05, "loss": 0.7074, "step": 14706 }, { "epoch": 0.38, "grad_norm": 1.7942111492156982, "learning_rate": 1.4323658893528685e-05, "loss": 0.6843, "step": 14707 }, { "epoch": 0.38, "grad_norm": 2.0496020317077637, "learning_rate": 1.4322910340262387e-05, "loss": 0.7132, "step": 14708 }, { "epoch": 0.38, "grad_norm": 1.8233437538146973, "learning_rate": 1.4322161757205306e-05, "loss": 0.594, "step": 14709 }, { "epoch": 0.38, "grad_norm": 3.4999969005584717, "learning_rate": 1.4321413144362606e-05, "loss": 0.4897, "step": 14710 }, { "epoch": 0.38, "grad_norm": 4.954614639282227, "learning_rate": 1.432066450173944e-05, "loss": 0.7545, "step": 14711 }, { "epoch": 0.38, "grad_norm": 1.642087697982788, "learning_rate": 1.4319915829340968e-05, "loss": 0.662, "step": 14712 }, { "epoch": 0.38, "grad_norm": 2.9181206226348877, "learning_rate": 1.4319167127172346e-05, "loss": 0.6765, "step": 14713 }, { "epoch": 0.38, "grad_norm": 1.7444554567337036, "learning_rate": 1.4318418395238742e-05, "loss": 0.6605, "step": 14714 }, { "epoch": 0.38, "grad_norm": 1.9814823865890503, "learning_rate": 1.431766963354531e-05, "loss": 0.566, "step": 14715 }, { "epoch": 0.38, "grad_norm": 3.550999879837036, "learning_rate": 1.4316920842097213e-05, "loss": 0.6948, "step": 14716 }, { "epoch": 0.38, "grad_norm": 1.4358464479446411, "learning_rate": 1.431617202089961e-05, "loss": 0.7019, "step": 14717 }, { "epoch": 0.38, "grad_norm": 2.2679636478424072, "learning_rate": 1.431542316995766e-05, "loss": 0.5286, "step": 14718 }, { "epoch": 0.38, "grad_norm": 1.667065978050232, "learning_rate": 1.4314674289276526e-05, "loss": 0.6209, "step": 14719 }, { "epoch": 0.38, "grad_norm": 1.5989723205566406, "learning_rate": 1.4313925378861366e-05, "loss": 0.5928, "step": 14720 }, { "epoch": 0.38, "grad_norm": 1.373589277267456, "learning_rate": 1.4313176438717342e-05, "loss": 0.5282, "step": 14721 }, { "epoch": 0.38, "grad_norm": 2.0114080905914307, "learning_rate": 1.4312427468849619e-05, "loss": 0.5516, "step": 14722 }, { "epoch": 0.38, "grad_norm": 3.5560531616210938, "learning_rate": 1.4311678469263353e-05, "loss": 0.7171, "step": 14723 }, { "epoch": 0.38, "grad_norm": 1.2224830389022827, "learning_rate": 1.4310929439963709e-05, "loss": 0.6385, "step": 14724 }, { "epoch": 0.38, "grad_norm": 1.0868734121322632, "learning_rate": 1.4310180380955846e-05, "loss": 0.6283, "step": 14725 }, { "epoch": 0.38, "grad_norm": 2.069687604904175, "learning_rate": 1.4309431292244926e-05, "loss": 0.6317, "step": 14726 }, { "epoch": 0.38, "grad_norm": 1.3961968421936035, "learning_rate": 1.4308682173836118e-05, "loss": 0.5776, "step": 14727 }, { "epoch": 0.38, "grad_norm": 2.309708595275879, "learning_rate": 1.4307933025734575e-05, "loss": 0.597, "step": 14728 }, { "epoch": 0.38, "grad_norm": 2.5927600860595703, "learning_rate": 1.4307183847945467e-05, "loss": 0.6728, "step": 14729 }, { "epoch": 0.38, "grad_norm": 3.3670780658721924, "learning_rate": 1.4306434640473952e-05, "loss": 0.6392, "step": 14730 }, { "epoch": 0.38, "grad_norm": 3.3152284622192383, "learning_rate": 1.4305685403325199e-05, "loss": 0.6956, "step": 14731 }, { "epoch": 0.38, "grad_norm": 5.154952526092529, "learning_rate": 1.4304936136504361e-05, "loss": 0.7148, "step": 14732 }, { "epoch": 0.38, "grad_norm": 3.6134793758392334, "learning_rate": 1.4304186840016612e-05, "loss": 0.5003, "step": 14733 }, { "epoch": 0.38, "grad_norm": 5.362034797668457, "learning_rate": 1.4303437513867109e-05, "loss": 0.5816, "step": 14734 }, { "epoch": 0.38, "grad_norm": 9.203394889831543, "learning_rate": 1.430268815806102e-05, "loss": 0.4337, "step": 14735 }, { "epoch": 0.38, "grad_norm": 4.195966720581055, "learning_rate": 1.4301938772603508e-05, "loss": 0.6468, "step": 14736 }, { "epoch": 0.38, "grad_norm": 1.6820110082626343, "learning_rate": 1.4301189357499737e-05, "loss": 0.5624, "step": 14737 }, { "epoch": 0.38, "grad_norm": 1.5879679918289185, "learning_rate": 1.4300439912754867e-05, "loss": 0.6621, "step": 14738 }, { "epoch": 0.38, "grad_norm": 2.198585033416748, "learning_rate": 1.4299690438374073e-05, "loss": 0.5837, "step": 14739 }, { "epoch": 0.38, "grad_norm": 1.5862836837768555, "learning_rate": 1.4298940934362509e-05, "loss": 0.5882, "step": 14740 }, { "epoch": 0.38, "grad_norm": 2.295781135559082, "learning_rate": 1.4298191400725348e-05, "loss": 0.6579, "step": 14741 }, { "epoch": 0.38, "grad_norm": 2.3202967643737793, "learning_rate": 1.4297441837467752e-05, "loss": 0.6299, "step": 14742 }, { "epoch": 0.38, "grad_norm": 8.260100364685059, "learning_rate": 1.4296692244594885e-05, "loss": 0.742, "step": 14743 }, { "epoch": 0.38, "grad_norm": 3.786003589630127, "learning_rate": 1.4295942622111913e-05, "loss": 0.5486, "step": 14744 }, { "epoch": 0.38, "grad_norm": 1.4111498594284058, "learning_rate": 1.4295192970024006e-05, "loss": 0.6604, "step": 14745 }, { "epoch": 0.38, "grad_norm": 1.3713634014129639, "learning_rate": 1.429444328833633e-05, "loss": 0.5932, "step": 14746 }, { "epoch": 0.38, "grad_norm": 1.597030758857727, "learning_rate": 1.4293693577054044e-05, "loss": 0.7188, "step": 14747 }, { "epoch": 0.38, "grad_norm": 1.639214038848877, "learning_rate": 1.4292943836182324e-05, "loss": 0.4808, "step": 14748 }, { "epoch": 0.38, "grad_norm": 1.4896835088729858, "learning_rate": 1.4292194065726328e-05, "loss": 0.4672, "step": 14749 }, { "epoch": 0.38, "grad_norm": 1.7274810075759888, "learning_rate": 1.4291444265691227e-05, "loss": 0.6339, "step": 14750 }, { "epoch": 0.38, "grad_norm": 1.5870989561080933, "learning_rate": 1.4290694436082193e-05, "loss": 0.5806, "step": 14751 }, { "epoch": 0.38, "grad_norm": 4.299439907073975, "learning_rate": 1.4289944576904385e-05, "loss": 0.5064, "step": 14752 }, { "epoch": 0.38, "grad_norm": 5.552547454833984, "learning_rate": 1.4289194688162976e-05, "loss": 0.5375, "step": 14753 }, { "epoch": 0.38, "grad_norm": 1.3730361461639404, "learning_rate": 1.428844476986313e-05, "loss": 0.6464, "step": 14754 }, { "epoch": 0.38, "grad_norm": 1.1943814754486084, "learning_rate": 1.428769482201002e-05, "loss": 0.608, "step": 14755 }, { "epoch": 0.38, "grad_norm": 3.4174232482910156, "learning_rate": 1.4286944844608807e-05, "loss": 0.721, "step": 14756 }, { "epoch": 0.38, "grad_norm": 4.0895094871521, "learning_rate": 1.4286194837664664e-05, "loss": 0.7025, "step": 14757 }, { "epoch": 0.38, "grad_norm": 7.283816337585449, "learning_rate": 1.4285444801182763e-05, "loss": 0.6443, "step": 14758 }, { "epoch": 0.38, "grad_norm": 1.25505793094635, "learning_rate": 1.4284694735168265e-05, "loss": 0.542, "step": 14759 }, { "epoch": 0.38, "grad_norm": 1.6440438032150269, "learning_rate": 1.4283944639626347e-05, "loss": 0.3208, "step": 14760 }, { "epoch": 0.38, "grad_norm": 1.2635409832000732, "learning_rate": 1.4283194514562169e-05, "loss": 0.5812, "step": 14761 }, { "epoch": 0.38, "grad_norm": 1.444451093673706, "learning_rate": 1.4282444359980904e-05, "loss": 0.6114, "step": 14762 }, { "epoch": 0.38, "grad_norm": 1.6251438856124878, "learning_rate": 1.4281694175887731e-05, "loss": 0.5892, "step": 14763 }, { "epoch": 0.38, "grad_norm": 5.511345863342285, "learning_rate": 1.4280943962287806e-05, "loss": 0.7769, "step": 14764 }, { "epoch": 0.38, "grad_norm": 4.03713321685791, "learning_rate": 1.4280193719186306e-05, "loss": 0.6112, "step": 14765 }, { "epoch": 0.38, "grad_norm": 4.841307163238525, "learning_rate": 1.42794434465884e-05, "loss": 0.6956, "step": 14766 }, { "epoch": 0.38, "grad_norm": 1.3696845769882202, "learning_rate": 1.4278693144499256e-05, "loss": 0.5587, "step": 14767 }, { "epoch": 0.38, "grad_norm": 1.4329084157943726, "learning_rate": 1.4277942812924051e-05, "loss": 0.6336, "step": 14768 }, { "epoch": 0.38, "grad_norm": 6.839156150817871, "learning_rate": 1.427719245186795e-05, "loss": 0.5722, "step": 14769 }, { "epoch": 0.38, "grad_norm": 1.1797025203704834, "learning_rate": 1.4276442061336127e-05, "loss": 0.5837, "step": 14770 }, { "epoch": 0.38, "grad_norm": 2.8130314350128174, "learning_rate": 1.4275691641333749e-05, "loss": 0.6264, "step": 14771 }, { "epoch": 0.38, "grad_norm": 1.6789993047714233, "learning_rate": 1.4274941191865995e-05, "loss": 0.5341, "step": 14772 }, { "epoch": 0.38, "grad_norm": 1.5023411512374878, "learning_rate": 1.4274190712938027e-05, "loss": 0.4736, "step": 14773 }, { "epoch": 0.38, "grad_norm": 5.834114074707031, "learning_rate": 1.4273440204555025e-05, "loss": 0.6769, "step": 14774 }, { "epoch": 0.38, "grad_norm": 5.413857936859131, "learning_rate": 1.4272689666722157e-05, "loss": 0.534, "step": 14775 }, { "epoch": 0.38, "grad_norm": 4.299098491668701, "learning_rate": 1.4271939099444597e-05, "loss": 0.758, "step": 14776 }, { "epoch": 0.38, "grad_norm": 2.266758441925049, "learning_rate": 1.4271188502727516e-05, "loss": 0.5803, "step": 14777 }, { "epoch": 0.38, "grad_norm": 3.129791736602783, "learning_rate": 1.4270437876576087e-05, "loss": 0.6985, "step": 14778 }, { "epoch": 0.38, "grad_norm": 1.9963910579681396, "learning_rate": 1.4269687220995483e-05, "loss": 0.6623, "step": 14779 }, { "epoch": 0.38, "grad_norm": 2.152637243270874, "learning_rate": 1.4268936535990879e-05, "loss": 0.5683, "step": 14780 }, { "epoch": 0.38, "grad_norm": 1.8109564781188965, "learning_rate": 1.4268185821567441e-05, "loss": 0.4527, "step": 14781 }, { "epoch": 0.38, "grad_norm": 2.523221492767334, "learning_rate": 1.4267435077730354e-05, "loss": 0.5627, "step": 14782 }, { "epoch": 0.38, "grad_norm": 1.5275943279266357, "learning_rate": 1.4266684304484783e-05, "loss": 0.5359, "step": 14783 }, { "epoch": 0.38, "grad_norm": 3.081599473953247, "learning_rate": 1.4265933501835907e-05, "loss": 0.4622, "step": 14784 }, { "epoch": 0.38, "grad_norm": 3.8186938762664795, "learning_rate": 1.4265182669788895e-05, "loss": 0.637, "step": 14785 }, { "epoch": 0.38, "grad_norm": 2.167973041534424, "learning_rate": 1.4264431808348922e-05, "loss": 0.6183, "step": 14786 }, { "epoch": 0.38, "grad_norm": 1.8913360834121704, "learning_rate": 1.4263680917521168e-05, "loss": 0.6464, "step": 14787 }, { "epoch": 0.38, "grad_norm": 1.476022720336914, "learning_rate": 1.42629299973108e-05, "loss": 0.5324, "step": 14788 }, { "epoch": 0.38, "grad_norm": 3.8791985511779785, "learning_rate": 1.4262179047723e-05, "loss": 0.6256, "step": 14789 }, { "epoch": 0.38, "grad_norm": 2.914440393447876, "learning_rate": 1.4261428068762936e-05, "loss": 0.9598, "step": 14790 }, { "epoch": 0.38, "grad_norm": 2.809147834777832, "learning_rate": 1.426067706043579e-05, "loss": 0.5797, "step": 14791 }, { "epoch": 0.38, "grad_norm": 1.430897831916809, "learning_rate": 1.4259926022746734e-05, "loss": 0.5623, "step": 14792 }, { "epoch": 0.38, "grad_norm": 2.5207974910736084, "learning_rate": 1.4259174955700945e-05, "loss": 0.5989, "step": 14793 }, { "epoch": 0.38, "grad_norm": 1.2216655015945435, "learning_rate": 1.4258423859303599e-05, "loss": 0.6085, "step": 14794 }, { "epoch": 0.38, "grad_norm": 1.0659871101379395, "learning_rate": 1.4257672733559866e-05, "loss": 0.5718, "step": 14795 }, { "epoch": 0.38, "grad_norm": 1.6643083095550537, "learning_rate": 1.4256921578474934e-05, "loss": 0.6684, "step": 14796 }, { "epoch": 0.38, "grad_norm": 7.379184246063232, "learning_rate": 1.4256170394053968e-05, "loss": 0.4771, "step": 14797 }, { "epoch": 0.38, "grad_norm": 3.9394381046295166, "learning_rate": 1.425541918030215e-05, "loss": 0.7922, "step": 14798 }, { "epoch": 0.38, "grad_norm": 2.252941131591797, "learning_rate": 1.425466793722466e-05, "loss": 0.5181, "step": 14799 }, { "epoch": 0.38, "grad_norm": 1.6597965955734253, "learning_rate": 1.4253916664826668e-05, "loss": 0.6704, "step": 14800 }, { "epoch": 0.38, "grad_norm": 2.900120258331299, "learning_rate": 1.4253165363113357e-05, "loss": 0.93, "step": 14801 }, { "epoch": 0.38, "grad_norm": 3.912307024002075, "learning_rate": 1.42524140320899e-05, "loss": 0.8189, "step": 14802 }, { "epoch": 0.38, "grad_norm": 13.07928466796875, "learning_rate": 1.4251662671761478e-05, "loss": 0.5421, "step": 14803 }, { "epoch": 0.38, "grad_norm": 2.4093332290649414, "learning_rate": 1.4250911282133272e-05, "loss": 0.72, "step": 14804 }, { "epoch": 0.38, "grad_norm": 1.366398572921753, "learning_rate": 1.425015986321045e-05, "loss": 0.5654, "step": 14805 }, { "epoch": 0.38, "grad_norm": 1.924418568611145, "learning_rate": 1.4249408414998202e-05, "loss": 0.5812, "step": 14806 }, { "epoch": 0.38, "grad_norm": 2.056469202041626, "learning_rate": 1.42486569375017e-05, "loss": 0.6274, "step": 14807 }, { "epoch": 0.38, "grad_norm": 1.729279637336731, "learning_rate": 1.4247905430726122e-05, "loss": 0.6124, "step": 14808 }, { "epoch": 0.38, "grad_norm": 1.365814447402954, "learning_rate": 1.424715389467665e-05, "loss": 0.6812, "step": 14809 }, { "epoch": 0.38, "grad_norm": 2.452338933944702, "learning_rate": 1.4246402329358457e-05, "loss": 0.6173, "step": 14810 }, { "epoch": 0.38, "grad_norm": 2.4517529010772705, "learning_rate": 1.4245650734776733e-05, "loss": 0.6497, "step": 14811 }, { "epoch": 0.38, "grad_norm": 2.44378399848938, "learning_rate": 1.4244899110936647e-05, "loss": 0.5785, "step": 14812 }, { "epoch": 0.38, "grad_norm": 1.295514702796936, "learning_rate": 1.4244147457843387e-05, "loss": 0.5118, "step": 14813 }, { "epoch": 0.38, "grad_norm": 3.785256862640381, "learning_rate": 1.4243395775502124e-05, "loss": 0.5753, "step": 14814 }, { "epoch": 0.38, "grad_norm": 1.3258998394012451, "learning_rate": 1.4242644063918048e-05, "loss": 0.6997, "step": 14815 }, { "epoch": 0.38, "grad_norm": 3.3070383071899414, "learning_rate": 1.4241892323096333e-05, "loss": 0.6822, "step": 14816 }, { "epoch": 0.38, "grad_norm": 1.9682680368423462, "learning_rate": 1.4241140553042163e-05, "loss": 0.6169, "step": 14817 }, { "epoch": 0.38, "grad_norm": 1.9363700151443481, "learning_rate": 1.4240388753760714e-05, "loss": 0.6331, "step": 14818 }, { "epoch": 0.38, "grad_norm": 1.1118237972259521, "learning_rate": 1.4239636925257172e-05, "loss": 0.5602, "step": 14819 }, { "epoch": 0.38, "grad_norm": 1.4018893241882324, "learning_rate": 1.4238885067536714e-05, "loss": 0.3845, "step": 14820 }, { "epoch": 0.38, "grad_norm": 5.890771865844727, "learning_rate": 1.4238133180604525e-05, "loss": 0.7006, "step": 14821 }, { "epoch": 0.38, "grad_norm": 2.0188965797424316, "learning_rate": 1.4237381264465783e-05, "loss": 0.7472, "step": 14822 }, { "epoch": 0.38, "grad_norm": 2.475736141204834, "learning_rate": 1.4236629319125673e-05, "loss": 0.4344, "step": 14823 }, { "epoch": 0.38, "grad_norm": 1.9369492530822754, "learning_rate": 1.4235877344589375e-05, "loss": 0.5448, "step": 14824 }, { "epoch": 0.38, "grad_norm": 2.689723014831543, "learning_rate": 1.423512534086207e-05, "loss": 0.6658, "step": 14825 }, { "epoch": 0.38, "grad_norm": 1.5328714847564697, "learning_rate": 1.4234373307948945e-05, "loss": 0.5076, "step": 14826 }, { "epoch": 0.38, "grad_norm": 2.319683074951172, "learning_rate": 1.4233621245855177e-05, "loss": 0.6528, "step": 14827 }, { "epoch": 0.38, "grad_norm": 4.277766227722168, "learning_rate": 1.4232869154585953e-05, "loss": 0.6994, "step": 14828 }, { "epoch": 0.38, "grad_norm": 6.281398773193359, "learning_rate": 1.4232117034146453e-05, "loss": 0.6458, "step": 14829 }, { "epoch": 0.38, "grad_norm": 2.3894591331481934, "learning_rate": 1.423136488454186e-05, "loss": 0.563, "step": 14830 }, { "epoch": 0.38, "grad_norm": 2.0889759063720703, "learning_rate": 1.423061270577736e-05, "loss": 0.6367, "step": 14831 }, { "epoch": 0.38, "grad_norm": 3.11320161819458, "learning_rate": 1.4229860497858136e-05, "loss": 0.6065, "step": 14832 }, { "epoch": 0.38, "grad_norm": 1.527089238166809, "learning_rate": 1.422910826078937e-05, "loss": 0.5756, "step": 14833 }, { "epoch": 0.38, "grad_norm": 1.3064292669296265, "learning_rate": 1.4228355994576247e-05, "loss": 0.5573, "step": 14834 }, { "epoch": 0.38, "grad_norm": 3.158177137374878, "learning_rate": 1.4227603699223949e-05, "loss": 0.6925, "step": 14835 }, { "epoch": 0.38, "grad_norm": 4.191962242126465, "learning_rate": 1.4226851374737668e-05, "loss": 0.4587, "step": 14836 }, { "epoch": 0.38, "grad_norm": 2.5247926712036133, "learning_rate": 1.4226099021122577e-05, "loss": 0.7958, "step": 14837 }, { "epoch": 0.38, "grad_norm": 1.4204930067062378, "learning_rate": 1.422534663838387e-05, "loss": 0.5273, "step": 14838 }, { "epoch": 0.38, "grad_norm": 0.8832296133041382, "learning_rate": 1.4224594226526727e-05, "loss": 0.6183, "step": 14839 }, { "epoch": 0.38, "grad_norm": 1.307875633239746, "learning_rate": 1.4223841785556336e-05, "loss": 0.4694, "step": 14840 }, { "epoch": 0.38, "grad_norm": 4.670359134674072, "learning_rate": 1.4223089315477878e-05, "loss": 0.8715, "step": 14841 }, { "epoch": 0.38, "grad_norm": 2.39727783203125, "learning_rate": 1.4222336816296545e-05, "loss": 0.8088, "step": 14842 }, { "epoch": 0.38, "grad_norm": 1.4161810874938965, "learning_rate": 1.4221584288017518e-05, "loss": 0.6152, "step": 14843 }, { "epoch": 0.38, "grad_norm": 3.5451014041900635, "learning_rate": 1.4220831730645985e-05, "loss": 0.7309, "step": 14844 }, { "epoch": 0.38, "grad_norm": 2.6835861206054688, "learning_rate": 1.4220079144187131e-05, "loss": 0.6787, "step": 14845 }, { "epoch": 0.38, "grad_norm": 1.5990632772445679, "learning_rate": 1.4219326528646143e-05, "loss": 0.5486, "step": 14846 }, { "epoch": 0.38, "grad_norm": 2.9468741416931152, "learning_rate": 1.4218573884028206e-05, "loss": 0.6635, "step": 14847 }, { "epoch": 0.38, "grad_norm": 1.7998987436294556, "learning_rate": 1.421782121033851e-05, "loss": 0.5725, "step": 14848 }, { "epoch": 0.38, "grad_norm": 1.7150038480758667, "learning_rate": 1.421706850758224e-05, "loss": 0.6264, "step": 14849 }, { "epoch": 0.38, "grad_norm": 5.431746482849121, "learning_rate": 1.4216315775764583e-05, "loss": 0.5173, "step": 14850 }, { "epoch": 0.38, "grad_norm": 4.43544864654541, "learning_rate": 1.4215563014890726e-05, "loss": 0.5095, "step": 14851 }, { "epoch": 0.38, "grad_norm": 1.413008689880371, "learning_rate": 1.4214810224965858e-05, "loss": 0.5759, "step": 14852 }, { "epoch": 0.38, "grad_norm": 2.063532829284668, "learning_rate": 1.4214057405995166e-05, "loss": 0.8766, "step": 14853 }, { "epoch": 0.38, "grad_norm": 2.89839768409729, "learning_rate": 1.4213304557983836e-05, "loss": 0.5108, "step": 14854 }, { "epoch": 0.38, "grad_norm": 0.9926307797431946, "learning_rate": 1.4212551680937062e-05, "loss": 0.6095, "step": 14855 }, { "epoch": 0.38, "grad_norm": 3.5600900650024414, "learning_rate": 1.4211798774860024e-05, "loss": 0.8155, "step": 14856 }, { "epoch": 0.38, "grad_norm": 1.7302119731903076, "learning_rate": 1.4211045839757918e-05, "loss": 0.6213, "step": 14857 }, { "epoch": 0.38, "grad_norm": 1.5421578884124756, "learning_rate": 1.421029287563593e-05, "loss": 0.5123, "step": 14858 }, { "epoch": 0.38, "grad_norm": 1.3529367446899414, "learning_rate": 1.4209539882499243e-05, "loss": 0.5728, "step": 14859 }, { "epoch": 0.38, "grad_norm": 1.6353954076766968, "learning_rate": 1.4208786860353059e-05, "loss": 0.5498, "step": 14860 }, { "epoch": 0.38, "grad_norm": 2.221114158630371, "learning_rate": 1.4208033809202557e-05, "loss": 0.4683, "step": 14861 }, { "epoch": 0.38, "grad_norm": 1.5378280878067017, "learning_rate": 1.420728072905293e-05, "loss": 0.492, "step": 14862 }, { "epoch": 0.38, "grad_norm": 1.2200947999954224, "learning_rate": 1.4206527619909368e-05, "loss": 0.6902, "step": 14863 }, { "epoch": 0.38, "grad_norm": 1.1547685861587524, "learning_rate": 1.420577448177706e-05, "loss": 0.5722, "step": 14864 }, { "epoch": 0.38, "grad_norm": 5.031322956085205, "learning_rate": 1.4205021314661196e-05, "loss": 0.5731, "step": 14865 }, { "epoch": 0.38, "grad_norm": 2.919814348220825, "learning_rate": 1.4204268118566967e-05, "loss": 0.6224, "step": 14866 }, { "epoch": 0.38, "grad_norm": 2.517185688018799, "learning_rate": 1.4203514893499564e-05, "loss": 0.5969, "step": 14867 }, { "epoch": 0.38, "grad_norm": 3.575982093811035, "learning_rate": 1.4202761639464178e-05, "loss": 0.726, "step": 14868 }, { "epoch": 0.38, "grad_norm": 1.7752227783203125, "learning_rate": 1.4202008356466e-05, "loss": 0.4901, "step": 14869 }, { "epoch": 0.38, "grad_norm": 1.4243454933166504, "learning_rate": 1.4201255044510215e-05, "loss": 0.5152, "step": 14870 }, { "epoch": 0.38, "grad_norm": 1.4696906805038452, "learning_rate": 1.4200501703602022e-05, "loss": 0.5232, "step": 14871 }, { "epoch": 0.38, "grad_norm": 1.573981523513794, "learning_rate": 1.4199748333746618e-05, "loss": 0.6307, "step": 14872 }, { "epoch": 0.38, "grad_norm": 1.175271987915039, "learning_rate": 1.4198994934949178e-05, "loss": 0.5095, "step": 14873 }, { "epoch": 0.38, "grad_norm": 3.066477060317993, "learning_rate": 1.4198241507214907e-05, "loss": 0.834, "step": 14874 }, { "epoch": 0.38, "grad_norm": 1.9403481483459473, "learning_rate": 1.4197488050548991e-05, "loss": 0.6088, "step": 14875 }, { "epoch": 0.38, "grad_norm": 3.596290349960327, "learning_rate": 1.4196734564956624e-05, "loss": 0.5535, "step": 14876 }, { "epoch": 0.38, "grad_norm": 4.155620098114014, "learning_rate": 1.4195981050443001e-05, "loss": 0.513, "step": 14877 }, { "epoch": 0.38, "grad_norm": 2.074655294418335, "learning_rate": 1.4195227507013312e-05, "loss": 0.7894, "step": 14878 }, { "epoch": 0.38, "grad_norm": 10.406558990478516, "learning_rate": 1.4194473934672752e-05, "loss": 0.7049, "step": 14879 }, { "epoch": 0.38, "grad_norm": 1.6333515644073486, "learning_rate": 1.4193720333426511e-05, "loss": 0.6405, "step": 14880 }, { "epoch": 0.38, "grad_norm": 1.3903828859329224, "learning_rate": 1.4192966703279787e-05, "loss": 0.5869, "step": 14881 }, { "epoch": 0.38, "grad_norm": 4.754318714141846, "learning_rate": 1.4192213044237768e-05, "loss": 0.6836, "step": 14882 }, { "epoch": 0.38, "grad_norm": 1.5963060855865479, "learning_rate": 1.419145935630565e-05, "loss": 0.5318, "step": 14883 }, { "epoch": 0.38, "grad_norm": 1.4049428701400757, "learning_rate": 1.4190705639488628e-05, "loss": 0.6091, "step": 14884 }, { "epoch": 0.38, "grad_norm": 2.0520267486572266, "learning_rate": 1.4189951893791897e-05, "loss": 0.6837, "step": 14885 }, { "epoch": 0.38, "grad_norm": 1.850041151046753, "learning_rate": 1.418919811922065e-05, "loss": 0.7972, "step": 14886 }, { "epoch": 0.38, "grad_norm": 1.5511391162872314, "learning_rate": 1.4188444315780081e-05, "loss": 0.4647, "step": 14887 }, { "epoch": 0.38, "grad_norm": 2.4963557720184326, "learning_rate": 1.4187690483475385e-05, "loss": 0.7852, "step": 14888 }, { "epoch": 0.38, "grad_norm": 7.633752346038818, "learning_rate": 1.4186936622311757e-05, "loss": 0.5469, "step": 14889 }, { "epoch": 0.38, "grad_norm": 2.443563938140869, "learning_rate": 1.4186182732294392e-05, "loss": 0.7222, "step": 14890 }, { "epoch": 0.38, "grad_norm": 1.6060363054275513, "learning_rate": 1.4185428813428489e-05, "loss": 0.3391, "step": 14891 }, { "epoch": 0.38, "grad_norm": 1.9688048362731934, "learning_rate": 1.4184674865719235e-05, "loss": 0.7541, "step": 14892 }, { "epoch": 0.38, "grad_norm": 1.3771792650222778, "learning_rate": 1.4183920889171833e-05, "loss": 0.5578, "step": 14893 }, { "epoch": 0.38, "grad_norm": 1.2066240310668945, "learning_rate": 1.4183166883791477e-05, "loss": 0.5054, "step": 14894 }, { "epoch": 0.38, "grad_norm": 1.815363883972168, "learning_rate": 1.4182412849583363e-05, "loss": 0.6221, "step": 14895 }, { "epoch": 0.38, "grad_norm": 3.451394557952881, "learning_rate": 1.4181658786552688e-05, "loss": 0.6211, "step": 14896 }, { "epoch": 0.38, "grad_norm": 1.501732587814331, "learning_rate": 1.4180904694704645e-05, "loss": 0.7285, "step": 14897 }, { "epoch": 0.38, "grad_norm": 1.9180880784988403, "learning_rate": 1.4180150574044439e-05, "loss": 0.6011, "step": 14898 }, { "epoch": 0.38, "grad_norm": 2.5380990505218506, "learning_rate": 1.4179396424577255e-05, "loss": 0.5406, "step": 14899 }, { "epoch": 0.38, "grad_norm": 1.552822232246399, "learning_rate": 1.4178642246308299e-05, "loss": 0.5564, "step": 14900 }, { "epoch": 0.38, "grad_norm": 1.4448552131652832, "learning_rate": 1.4177888039242768e-05, "loss": 0.6422, "step": 14901 }, { "epoch": 0.38, "grad_norm": 1.7358136177062988, "learning_rate": 1.4177133803385857e-05, "loss": 0.5876, "step": 14902 }, { "epoch": 0.38, "grad_norm": 4.789478778839111, "learning_rate": 1.4176379538742761e-05, "loss": 0.6667, "step": 14903 }, { "epoch": 0.38, "grad_norm": 5.2544660568237305, "learning_rate": 1.4175625245318685e-05, "loss": 0.6224, "step": 14904 }, { "epoch": 0.38, "grad_norm": 1.5374400615692139, "learning_rate": 1.4174870923118822e-05, "loss": 0.4953, "step": 14905 }, { "epoch": 0.38, "grad_norm": 3.3914575576782227, "learning_rate": 1.4174116572148368e-05, "loss": 0.5274, "step": 14906 }, { "epoch": 0.38, "grad_norm": 1.4612805843353271, "learning_rate": 1.417336219241253e-05, "loss": 0.6975, "step": 14907 }, { "epoch": 0.38, "grad_norm": 4.0225677490234375, "learning_rate": 1.4172607783916499e-05, "loss": 0.5322, "step": 14908 }, { "epoch": 0.38, "grad_norm": 1.8446418046951294, "learning_rate": 1.4171853346665476e-05, "loss": 0.7323, "step": 14909 }, { "epoch": 0.38, "grad_norm": 7.3882269859313965, "learning_rate": 1.4171098880664665e-05, "loss": 0.9344, "step": 14910 }, { "epoch": 0.38, "grad_norm": 2.9028892517089844, "learning_rate": 1.4170344385919257e-05, "loss": 0.5211, "step": 14911 }, { "epoch": 0.38, "grad_norm": 2.3922860622406006, "learning_rate": 1.4169589862434455e-05, "loss": 0.7158, "step": 14912 }, { "epoch": 0.38, "grad_norm": 1.2785512208938599, "learning_rate": 1.4168835310215462e-05, "loss": 0.6098, "step": 14913 }, { "epoch": 0.38, "grad_norm": 1.5945050716400146, "learning_rate": 1.4168080729267472e-05, "loss": 0.5278, "step": 14914 }, { "epoch": 0.38, "grad_norm": 1.5838232040405273, "learning_rate": 1.4167326119595692e-05, "loss": 0.5322, "step": 14915 }, { "epoch": 0.38, "grad_norm": 1.5833362340927124, "learning_rate": 1.4166571481205318e-05, "loss": 0.5774, "step": 14916 }, { "epoch": 0.38, "grad_norm": 4.027886867523193, "learning_rate": 1.416581681410155e-05, "loss": 0.93, "step": 14917 }, { "epoch": 0.38, "grad_norm": 3.4600610733032227, "learning_rate": 1.416506211828959e-05, "loss": 0.5685, "step": 14918 }, { "epoch": 0.38, "grad_norm": 4.211681365966797, "learning_rate": 1.4164307393774636e-05, "loss": 0.7186, "step": 14919 }, { "epoch": 0.38, "grad_norm": 1.2600160837173462, "learning_rate": 1.4163552640561895e-05, "loss": 0.497, "step": 14920 }, { "epoch": 0.38, "grad_norm": 2.8999743461608887, "learning_rate": 1.4162797858656565e-05, "loss": 0.7735, "step": 14921 }, { "epoch": 0.38, "grad_norm": 1.6980829238891602, "learning_rate": 1.4162043048063846e-05, "loss": 0.5721, "step": 14922 }, { "epoch": 0.38, "grad_norm": 2.3342959880828857, "learning_rate": 1.416128820878894e-05, "loss": 0.5413, "step": 14923 }, { "epoch": 0.38, "grad_norm": 5.632952690124512, "learning_rate": 1.4160533340837052e-05, "loss": 0.6595, "step": 14924 }, { "epoch": 0.38, "grad_norm": 1.2781871557235718, "learning_rate": 1.4159778444213384e-05, "loss": 0.5681, "step": 14925 }, { "epoch": 0.38, "grad_norm": 1.0891823768615723, "learning_rate": 1.4159023518923132e-05, "loss": 0.491, "step": 14926 }, { "epoch": 0.38, "grad_norm": 2.484989643096924, "learning_rate": 1.4158268564971506e-05, "loss": 0.8393, "step": 14927 }, { "epoch": 0.38, "grad_norm": 1.253894329071045, "learning_rate": 1.4157513582363705e-05, "loss": 0.5686, "step": 14928 }, { "epoch": 0.38, "grad_norm": 3.1623995304107666, "learning_rate": 1.415675857110493e-05, "loss": 0.5962, "step": 14929 }, { "epoch": 0.38, "grad_norm": 1.9074665307998657, "learning_rate": 1.415600353120039e-05, "loss": 0.6627, "step": 14930 }, { "epoch": 0.38, "grad_norm": 1.4561951160430908, "learning_rate": 1.4155248462655284e-05, "loss": 0.5107, "step": 14931 }, { "epoch": 0.38, "grad_norm": 1.306305170059204, "learning_rate": 1.4154493365474815e-05, "loss": 0.537, "step": 14932 }, { "epoch": 0.38, "grad_norm": 2.668015241622925, "learning_rate": 1.4153738239664188e-05, "loss": 0.6216, "step": 14933 }, { "epoch": 0.38, "grad_norm": 3.0838377475738525, "learning_rate": 1.415298308522861e-05, "loss": 0.7237, "step": 14934 }, { "epoch": 0.38, "grad_norm": 1.9044314622879028, "learning_rate": 1.4152227902173276e-05, "loss": 0.4727, "step": 14935 }, { "epoch": 0.38, "grad_norm": 2.3331997394561768, "learning_rate": 1.4151472690503398e-05, "loss": 0.6979, "step": 14936 }, { "epoch": 0.38, "grad_norm": 1.422038197517395, "learning_rate": 1.4150717450224181e-05, "loss": 0.5709, "step": 14937 }, { "epoch": 0.38, "grad_norm": 4.75294828414917, "learning_rate": 1.4149962181340824e-05, "loss": 0.5629, "step": 14938 }, { "epoch": 0.38, "grad_norm": 4.305881023406982, "learning_rate": 1.4149206883858538e-05, "loss": 0.8111, "step": 14939 }, { "epoch": 0.38, "grad_norm": 1.8209317922592163, "learning_rate": 1.4148451557782523e-05, "loss": 0.5544, "step": 14940 }, { "epoch": 0.38, "grad_norm": 1.9343510866165161, "learning_rate": 1.4147696203117985e-05, "loss": 0.5724, "step": 14941 }, { "epoch": 0.38, "grad_norm": 4.273874282836914, "learning_rate": 1.4146940819870132e-05, "loss": 0.648, "step": 14942 }, { "epoch": 0.38, "grad_norm": 3.6828644275665283, "learning_rate": 1.4146185408044169e-05, "loss": 0.5865, "step": 14943 }, { "epoch": 0.38, "grad_norm": 1.5652140378952026, "learning_rate": 1.4145429967645299e-05, "loss": 0.6892, "step": 14944 }, { "epoch": 0.38, "grad_norm": 1.7820826768875122, "learning_rate": 1.4144674498678732e-05, "loss": 0.5324, "step": 14945 }, { "epoch": 0.38, "grad_norm": 1.7979457378387451, "learning_rate": 1.4143919001149672e-05, "loss": 0.4789, "step": 14946 }, { "epoch": 0.38, "grad_norm": 3.3189613819122314, "learning_rate": 1.4143163475063323e-05, "loss": 0.7586, "step": 14947 }, { "epoch": 0.38, "grad_norm": 2.977640390396118, "learning_rate": 1.4142407920424896e-05, "loss": 0.5735, "step": 14948 }, { "epoch": 0.38, "grad_norm": 3.5511374473571777, "learning_rate": 1.41416523372396e-05, "loss": 0.636, "step": 14949 }, { "epoch": 0.38, "grad_norm": 1.7720178365707397, "learning_rate": 1.4140896725512633e-05, "loss": 0.5654, "step": 14950 }, { "epoch": 0.38, "grad_norm": 2.324324607849121, "learning_rate": 1.4140141085249208e-05, "loss": 0.7329, "step": 14951 }, { "epoch": 0.38, "grad_norm": 1.6818145513534546, "learning_rate": 1.4139385416454534e-05, "loss": 0.7324, "step": 14952 }, { "epoch": 0.38, "grad_norm": 5.61033296585083, "learning_rate": 1.4138629719133815e-05, "loss": 0.8867, "step": 14953 }, { "epoch": 0.38, "grad_norm": 1.5449657440185547, "learning_rate": 1.413787399329226e-05, "loss": 0.6316, "step": 14954 }, { "epoch": 0.38, "grad_norm": 1.518373966217041, "learning_rate": 1.4137118238935077e-05, "loss": 0.6067, "step": 14955 }, { "epoch": 0.38, "grad_norm": 2.125415563583374, "learning_rate": 1.4136362456067474e-05, "loss": 0.7618, "step": 14956 }, { "epoch": 0.38, "grad_norm": 3.302447557449341, "learning_rate": 1.413560664469466e-05, "loss": 0.6341, "step": 14957 }, { "epoch": 0.38, "grad_norm": 2.6526436805725098, "learning_rate": 1.4134850804821842e-05, "loss": 0.5424, "step": 14958 }, { "epoch": 0.38, "grad_norm": 2.4818801879882812, "learning_rate": 1.4134094936454235e-05, "loss": 0.6948, "step": 14959 }, { "epoch": 0.38, "grad_norm": 1.6843827962875366, "learning_rate": 1.4133339039597038e-05, "loss": 0.494, "step": 14960 }, { "epoch": 0.38, "grad_norm": 1.6083705425262451, "learning_rate": 1.4132583114255467e-05, "loss": 0.6508, "step": 14961 }, { "epoch": 0.38, "grad_norm": 1.841225504875183, "learning_rate": 1.413182716043473e-05, "loss": 0.6446, "step": 14962 }, { "epoch": 0.38, "grad_norm": 3.9753828048706055, "learning_rate": 1.4131071178140034e-05, "loss": 0.6711, "step": 14963 }, { "epoch": 0.38, "grad_norm": 1.4837126731872559, "learning_rate": 1.4130315167376594e-05, "loss": 0.5314, "step": 14964 }, { "epoch": 0.38, "grad_norm": 1.3291348218917847, "learning_rate": 1.4129559128149612e-05, "loss": 0.5991, "step": 14965 }, { "epoch": 0.38, "grad_norm": 2.3076491355895996, "learning_rate": 1.412880306046431e-05, "loss": 0.6806, "step": 14966 }, { "epoch": 0.38, "grad_norm": 1.5520490407943726, "learning_rate": 1.4128046964325886e-05, "loss": 0.5982, "step": 14967 }, { "epoch": 0.38, "grad_norm": 1.9905954599380493, "learning_rate": 1.4127290839739555e-05, "loss": 0.5063, "step": 14968 }, { "epoch": 0.38, "grad_norm": 1.3425772190093994, "learning_rate": 1.4126534686710531e-05, "loss": 0.5832, "step": 14969 }, { "epoch": 0.38, "grad_norm": 2.597954273223877, "learning_rate": 1.412577850524402e-05, "loss": 0.7386, "step": 14970 }, { "epoch": 0.38, "grad_norm": 4.313017845153809, "learning_rate": 1.4125022295345242e-05, "loss": 0.7143, "step": 14971 }, { "epoch": 0.38, "grad_norm": 1.275840401649475, "learning_rate": 1.4124266057019395e-05, "loss": 0.3805, "step": 14972 }, { "epoch": 0.38, "grad_norm": 2.6006758213043213, "learning_rate": 1.4123509790271702e-05, "loss": 0.5686, "step": 14973 }, { "epoch": 0.38, "grad_norm": 1.761931300163269, "learning_rate": 1.4122753495107366e-05, "loss": 0.5139, "step": 14974 }, { "epoch": 0.38, "grad_norm": 1.7239775657653809, "learning_rate": 1.4121997171531605e-05, "loss": 0.4842, "step": 14975 }, { "epoch": 0.38, "grad_norm": 2.630769968032837, "learning_rate": 1.412124081954963e-05, "loss": 0.6392, "step": 14976 }, { "epoch": 0.38, "grad_norm": 3.2822062969207764, "learning_rate": 1.412048443916665e-05, "loss": 0.6298, "step": 14977 }, { "epoch": 0.38, "grad_norm": 2.543527364730835, "learning_rate": 1.4119728030387884e-05, "loss": 0.6384, "step": 14978 }, { "epoch": 0.38, "grad_norm": 2.9215452671051025, "learning_rate": 1.4118971593218537e-05, "loss": 0.6138, "step": 14979 }, { "epoch": 0.38, "grad_norm": 3.053981304168701, "learning_rate": 1.4118215127663827e-05, "loss": 0.674, "step": 14980 }, { "epoch": 0.38, "grad_norm": 2.5489253997802734, "learning_rate": 1.4117458633728967e-05, "loss": 0.6106, "step": 14981 }, { "epoch": 0.38, "grad_norm": 4.02583646774292, "learning_rate": 1.4116702111419167e-05, "loss": 0.5044, "step": 14982 }, { "epoch": 0.38, "grad_norm": 1.9263211488723755, "learning_rate": 1.4115945560739643e-05, "loss": 0.5159, "step": 14983 }, { "epoch": 0.38, "grad_norm": 2.688812017440796, "learning_rate": 1.411518898169561e-05, "loss": 0.5888, "step": 14984 }, { "epoch": 0.38, "grad_norm": 2.5301146507263184, "learning_rate": 1.4114432374292278e-05, "loss": 0.6446, "step": 14985 }, { "epoch": 0.38, "grad_norm": 2.5314595699310303, "learning_rate": 1.4113675738534862e-05, "loss": 0.684, "step": 14986 }, { "epoch": 0.38, "grad_norm": 1.7280324697494507, "learning_rate": 1.4112919074428577e-05, "loss": 0.6362, "step": 14987 }, { "epoch": 0.38, "grad_norm": 1.8177025318145752, "learning_rate": 1.4112162381978643e-05, "loss": 0.8253, "step": 14988 }, { "epoch": 0.38, "grad_norm": 4.246464252471924, "learning_rate": 1.4111405661190263e-05, "loss": 0.585, "step": 14989 }, { "epoch": 0.38, "grad_norm": 1.0378299951553345, "learning_rate": 1.4110648912068663e-05, "loss": 0.4256, "step": 14990 }, { "epoch": 0.38, "grad_norm": 1.6212049722671509, "learning_rate": 1.410989213461905e-05, "loss": 0.6741, "step": 14991 }, { "epoch": 0.38, "grad_norm": 1.2220643758773804, "learning_rate": 1.4109135328846641e-05, "loss": 0.4328, "step": 14992 }, { "epoch": 0.38, "grad_norm": 1.479352593421936, "learning_rate": 1.4108378494756658e-05, "loss": 0.6253, "step": 14993 }, { "epoch": 0.38, "grad_norm": 1.5710746049880981, "learning_rate": 1.4107621632354306e-05, "loss": 0.5598, "step": 14994 }, { "epoch": 0.38, "grad_norm": 1.2615885734558105, "learning_rate": 1.4106864741644812e-05, "loss": 0.5175, "step": 14995 }, { "epoch": 0.38, "grad_norm": 1.3483632802963257, "learning_rate": 1.4106107822633383e-05, "loss": 0.5765, "step": 14996 }, { "epoch": 0.38, "grad_norm": 2.8677921295166016, "learning_rate": 1.4105350875325239e-05, "loss": 0.7026, "step": 14997 }, { "epoch": 0.38, "grad_norm": 1.242006778717041, "learning_rate": 1.4104593899725598e-05, "loss": 0.5377, "step": 14998 }, { "epoch": 0.38, "grad_norm": 1.5376579761505127, "learning_rate": 1.4103836895839674e-05, "loss": 0.6417, "step": 14999 }, { "epoch": 0.38, "grad_norm": 2.1548240184783936, "learning_rate": 1.4103079863672682e-05, "loss": 0.5966, "step": 15000 }, { "epoch": 0.38, "grad_norm": 1.7148966789245605, "learning_rate": 1.4102322803229842e-05, "loss": 0.5733, "step": 15001 }, { "epoch": 0.38, "grad_norm": 1.3759477138519287, "learning_rate": 1.4101565714516374e-05, "loss": 0.5318, "step": 15002 }, { "epoch": 0.38, "grad_norm": 1.5896652936935425, "learning_rate": 1.4100808597537488e-05, "loss": 0.6515, "step": 15003 }, { "epoch": 0.38, "grad_norm": 1.4760725498199463, "learning_rate": 1.4100051452298406e-05, "loss": 0.5851, "step": 15004 }, { "epoch": 0.38, "grad_norm": 1.1348577737808228, "learning_rate": 1.4099294278804348e-05, "loss": 0.5909, "step": 15005 }, { "epoch": 0.38, "grad_norm": 1.9007837772369385, "learning_rate": 1.4098537077060525e-05, "loss": 0.4212, "step": 15006 }, { "epoch": 0.38, "grad_norm": 9.047467231750488, "learning_rate": 1.4097779847072163e-05, "loss": 0.8436, "step": 15007 }, { "epoch": 0.38, "grad_norm": 4.87790060043335, "learning_rate": 1.4097022588844475e-05, "loss": 0.529, "step": 15008 }, { "epoch": 0.38, "grad_norm": 5.335225582122803, "learning_rate": 1.4096265302382682e-05, "loss": 0.6265, "step": 15009 }, { "epoch": 0.38, "grad_norm": 2.094888210296631, "learning_rate": 1.4095507987692e-05, "loss": 0.6193, "step": 15010 }, { "epoch": 0.38, "grad_norm": 1.490254282951355, "learning_rate": 1.4094750644777654e-05, "loss": 0.6346, "step": 15011 }, { "epoch": 0.38, "grad_norm": 1.2680039405822754, "learning_rate": 1.4093993273644858e-05, "loss": 0.7113, "step": 15012 }, { "epoch": 0.38, "grad_norm": 1.5530959367752075, "learning_rate": 1.409323587429883e-05, "loss": 0.5018, "step": 15013 }, { "epoch": 0.38, "grad_norm": 2.596402645111084, "learning_rate": 1.4092478446744795e-05, "loss": 0.5296, "step": 15014 }, { "epoch": 0.38, "grad_norm": 2.7502129077911377, "learning_rate": 1.4091720990987968e-05, "loss": 0.5695, "step": 15015 }, { "epoch": 0.38, "grad_norm": 2.368155002593994, "learning_rate": 1.4090963507033568e-05, "loss": 0.494, "step": 15016 }, { "epoch": 0.38, "grad_norm": 1.3973968029022217, "learning_rate": 1.4090205994886824e-05, "loss": 0.4553, "step": 15017 }, { "epoch": 0.38, "grad_norm": 1.2655824422836304, "learning_rate": 1.4089448454552944e-05, "loss": 0.6208, "step": 15018 }, { "epoch": 0.38, "grad_norm": 2.7107672691345215, "learning_rate": 1.4088690886037158e-05, "loss": 0.6454, "step": 15019 }, { "epoch": 0.38, "grad_norm": 3.0089211463928223, "learning_rate": 1.4087933289344682e-05, "loss": 0.7183, "step": 15020 }, { "epoch": 0.38, "grad_norm": 2.91573166847229, "learning_rate": 1.4087175664480736e-05, "loss": 0.5019, "step": 15021 }, { "epoch": 0.39, "grad_norm": 2.8621859550476074, "learning_rate": 1.4086418011450546e-05, "loss": 0.5339, "step": 15022 }, { "epoch": 0.39, "grad_norm": 2.77771258354187, "learning_rate": 1.4085660330259332e-05, "loss": 0.4884, "step": 15023 }, { "epoch": 0.39, "grad_norm": 1.5250402688980103, "learning_rate": 1.4084902620912311e-05, "loss": 0.6296, "step": 15024 }, { "epoch": 0.39, "grad_norm": 1.5570495128631592, "learning_rate": 1.4084144883414708e-05, "loss": 0.6182, "step": 15025 }, { "epoch": 0.39, "grad_norm": 1.724247694015503, "learning_rate": 1.4083387117771745e-05, "loss": 0.6271, "step": 15026 }, { "epoch": 0.39, "grad_norm": 2.5472755432128906, "learning_rate": 1.4082629323988641e-05, "loss": 0.5924, "step": 15027 }, { "epoch": 0.39, "grad_norm": 1.577997088432312, "learning_rate": 1.4081871502070623e-05, "loss": 0.588, "step": 15028 }, { "epoch": 0.39, "grad_norm": 1.4543570280075073, "learning_rate": 1.4081113652022914e-05, "loss": 0.4745, "step": 15029 }, { "epoch": 0.39, "grad_norm": 2.5356056690216064, "learning_rate": 1.4080355773850727e-05, "loss": 0.8298, "step": 15030 }, { "epoch": 0.39, "grad_norm": 3.579282760620117, "learning_rate": 1.4079597867559298e-05, "loss": 0.5277, "step": 15031 }, { "epoch": 0.39, "grad_norm": 2.037172317504883, "learning_rate": 1.4078839933153838e-05, "loss": 0.4495, "step": 15032 }, { "epoch": 0.39, "grad_norm": 1.8570433855056763, "learning_rate": 1.4078081970639579e-05, "loss": 0.4248, "step": 15033 }, { "epoch": 0.39, "grad_norm": 3.9460387229919434, "learning_rate": 1.4077323980021743e-05, "loss": 0.553, "step": 15034 }, { "epoch": 0.39, "grad_norm": 1.6338988542556763, "learning_rate": 1.4076565961305547e-05, "loss": 0.5916, "step": 15035 }, { "epoch": 0.39, "grad_norm": 1.697953462600708, "learning_rate": 1.4075807914496222e-05, "loss": 0.6087, "step": 15036 }, { "epoch": 0.39, "grad_norm": 1.0362417697906494, "learning_rate": 1.4075049839598989e-05, "loss": 0.6134, "step": 15037 }, { "epoch": 0.39, "grad_norm": 1.9145451784133911, "learning_rate": 1.4074291736619072e-05, "loss": 0.5589, "step": 15038 }, { "epoch": 0.39, "grad_norm": 1.7579747438430786, "learning_rate": 1.4073533605561698e-05, "loss": 0.6234, "step": 15039 }, { "epoch": 0.39, "grad_norm": 2.9520726203918457, "learning_rate": 1.4072775446432088e-05, "loss": 0.6358, "step": 15040 }, { "epoch": 0.39, "grad_norm": 1.1147300004959106, "learning_rate": 1.4072017259235469e-05, "loss": 0.6892, "step": 15041 }, { "epoch": 0.39, "grad_norm": 2.7410471439361572, "learning_rate": 1.4071259043977066e-05, "loss": 0.6013, "step": 15042 }, { "epoch": 0.39, "grad_norm": 1.9401038885116577, "learning_rate": 1.4070500800662103e-05, "loss": 0.5051, "step": 15043 }, { "epoch": 0.39, "grad_norm": 5.306087970733643, "learning_rate": 1.4069742529295805e-05, "loss": 0.5245, "step": 15044 }, { "epoch": 0.39, "grad_norm": 2.027326822280884, "learning_rate": 1.4068984229883398e-05, "loss": 0.7394, "step": 15045 }, { "epoch": 0.39, "grad_norm": 1.7851595878601074, "learning_rate": 1.406822590243011e-05, "loss": 0.5453, "step": 15046 }, { "epoch": 0.39, "grad_norm": 4.808313369750977, "learning_rate": 1.4067467546941165e-05, "loss": 0.5295, "step": 15047 }, { "epoch": 0.39, "grad_norm": 2.5202691555023193, "learning_rate": 1.4066709163421788e-05, "loss": 0.7094, "step": 15048 }, { "epoch": 0.39, "grad_norm": 1.7565383911132812, "learning_rate": 1.4065950751877207e-05, "loss": 0.4984, "step": 15049 }, { "epoch": 0.39, "grad_norm": 1.2686961889266968, "learning_rate": 1.4065192312312645e-05, "loss": 0.5512, "step": 15050 }, { "epoch": 0.39, "grad_norm": 1.7446216344833374, "learning_rate": 1.4064433844733336e-05, "loss": 0.4504, "step": 15051 }, { "epoch": 0.39, "grad_norm": 3.1676251888275146, "learning_rate": 1.4063675349144501e-05, "loss": 0.5494, "step": 15052 }, { "epoch": 0.39, "grad_norm": 5.7338104248046875, "learning_rate": 1.406291682555137e-05, "loss": 0.5004, "step": 15053 }, { "epoch": 0.39, "grad_norm": 2.686840534210205, "learning_rate": 1.4062158273959164e-05, "loss": 0.6372, "step": 15054 }, { "epoch": 0.39, "grad_norm": 2.8522348403930664, "learning_rate": 1.4061399694373119e-05, "loss": 0.5974, "step": 15055 }, { "epoch": 0.39, "grad_norm": 2.0577375888824463, "learning_rate": 1.4060641086798459e-05, "loss": 0.5757, "step": 15056 }, { "epoch": 0.39, "grad_norm": 1.4190469980239868, "learning_rate": 1.4059882451240409e-05, "loss": 0.5468, "step": 15057 }, { "epoch": 0.39, "grad_norm": 2.0100257396698, "learning_rate": 1.4059123787704203e-05, "loss": 0.6388, "step": 15058 }, { "epoch": 0.39, "grad_norm": 3.3245480060577393, "learning_rate": 1.4058365096195064e-05, "loss": 0.6371, "step": 15059 }, { "epoch": 0.39, "grad_norm": 1.4833611249923706, "learning_rate": 1.4057606376718224e-05, "loss": 0.6302, "step": 15060 }, { "epoch": 0.39, "grad_norm": 1.7437901496887207, "learning_rate": 1.405684762927891e-05, "loss": 0.5158, "step": 15061 }, { "epoch": 0.39, "grad_norm": 1.9394235610961914, "learning_rate": 1.405608885388235e-05, "loss": 0.7246, "step": 15062 }, { "epoch": 0.39, "grad_norm": 1.5953155755996704, "learning_rate": 1.4055330050533775e-05, "loss": 0.5869, "step": 15063 }, { "epoch": 0.39, "grad_norm": 1.7149313688278198, "learning_rate": 1.4054571219238412e-05, "loss": 0.5241, "step": 15064 }, { "epoch": 0.39, "grad_norm": 1.9641680717468262, "learning_rate": 1.4053812360001492e-05, "loss": 0.5805, "step": 15065 }, { "epoch": 0.39, "grad_norm": 1.608120322227478, "learning_rate": 1.4053053472828244e-05, "loss": 0.5799, "step": 15066 }, { "epoch": 0.39, "grad_norm": 4.705008029937744, "learning_rate": 1.4052294557723895e-05, "loss": 0.6023, "step": 15067 }, { "epoch": 0.39, "grad_norm": 1.4219582080841064, "learning_rate": 1.4051535614693683e-05, "loss": 0.4973, "step": 15068 }, { "epoch": 0.39, "grad_norm": 3.9484355449676514, "learning_rate": 1.405077664374283e-05, "loss": 0.6265, "step": 15069 }, { "epoch": 0.39, "grad_norm": 1.8537191152572632, "learning_rate": 1.405001764487657e-05, "loss": 0.5013, "step": 15070 }, { "epoch": 0.39, "grad_norm": 1.7515538930892944, "learning_rate": 1.4049258618100133e-05, "loss": 0.645, "step": 15071 }, { "epoch": 0.39, "grad_norm": 3.4163177013397217, "learning_rate": 1.4048499563418747e-05, "loss": 0.8478, "step": 15072 }, { "epoch": 0.39, "grad_norm": 2.9166319370269775, "learning_rate": 1.404774048083765e-05, "loss": 0.5664, "step": 15073 }, { "epoch": 0.39, "grad_norm": 2.9101691246032715, "learning_rate": 1.4046981370362065e-05, "loss": 0.5315, "step": 15074 }, { "epoch": 0.39, "grad_norm": 1.6698311567306519, "learning_rate": 1.4046222231997232e-05, "loss": 0.6028, "step": 15075 }, { "epoch": 0.39, "grad_norm": 4.184365749359131, "learning_rate": 1.4045463065748371e-05, "loss": 0.6037, "step": 15076 }, { "epoch": 0.39, "grad_norm": 1.4538156986236572, "learning_rate": 1.4044703871620722e-05, "loss": 0.6132, "step": 15077 }, { "epoch": 0.39, "grad_norm": 1.426515817642212, "learning_rate": 1.4043944649619516e-05, "loss": 0.5522, "step": 15078 }, { "epoch": 0.39, "grad_norm": 2.973036289215088, "learning_rate": 1.4043185399749986e-05, "loss": 0.6667, "step": 15079 }, { "epoch": 0.39, "grad_norm": 1.5864143371582031, "learning_rate": 1.404242612201736e-05, "loss": 0.5548, "step": 15080 }, { "epoch": 0.39, "grad_norm": 1.9739168882369995, "learning_rate": 1.4041666816426874e-05, "loss": 0.4418, "step": 15081 }, { "epoch": 0.39, "grad_norm": 1.5297783613204956, "learning_rate": 1.4040907482983759e-05, "loss": 0.4242, "step": 15082 }, { "epoch": 0.39, "grad_norm": 1.5475598573684692, "learning_rate": 1.4040148121693247e-05, "loss": 0.6451, "step": 15083 }, { "epoch": 0.39, "grad_norm": 5.61276912689209, "learning_rate": 1.4039388732560573e-05, "loss": 0.6626, "step": 15084 }, { "epoch": 0.39, "grad_norm": 2.7631094455718994, "learning_rate": 1.4038629315590972e-05, "loss": 0.5779, "step": 15085 }, { "epoch": 0.39, "grad_norm": 2.630650043487549, "learning_rate": 1.4037869870789674e-05, "loss": 0.5748, "step": 15086 }, { "epoch": 0.39, "grad_norm": 1.4012683629989624, "learning_rate": 1.4037110398161913e-05, "loss": 0.5566, "step": 15087 }, { "epoch": 0.39, "grad_norm": 3.5590147972106934, "learning_rate": 1.4036350897712922e-05, "loss": 0.6614, "step": 15088 }, { "epoch": 0.39, "grad_norm": 2.696326732635498, "learning_rate": 1.4035591369447938e-05, "loss": 0.6513, "step": 15089 }, { "epoch": 0.39, "grad_norm": 1.8457754850387573, "learning_rate": 1.4034831813372195e-05, "loss": 0.4757, "step": 15090 }, { "epoch": 0.39, "grad_norm": 3.0799546241760254, "learning_rate": 1.4034072229490922e-05, "loss": 0.6152, "step": 15091 }, { "epoch": 0.39, "grad_norm": 1.1495540142059326, "learning_rate": 1.4033312617809363e-05, "loss": 0.3565, "step": 15092 }, { "epoch": 0.39, "grad_norm": 3.522641658782959, "learning_rate": 1.4032552978332745e-05, "loss": 0.7582, "step": 15093 }, { "epoch": 0.39, "grad_norm": 1.6926374435424805, "learning_rate": 1.4031793311066307e-05, "loss": 0.677, "step": 15094 }, { "epoch": 0.39, "grad_norm": 1.293939232826233, "learning_rate": 1.4031033616015279e-05, "loss": 0.6104, "step": 15095 }, { "epoch": 0.39, "grad_norm": 1.6177016496658325, "learning_rate": 1.40302738931849e-05, "loss": 0.7608, "step": 15096 }, { "epoch": 0.39, "grad_norm": 4.595571041107178, "learning_rate": 1.4029514142580409e-05, "loss": 0.6598, "step": 15097 }, { "epoch": 0.39, "grad_norm": 1.290344476699829, "learning_rate": 1.4028754364207036e-05, "loss": 0.3649, "step": 15098 }, { "epoch": 0.39, "grad_norm": 1.9236174821853638, "learning_rate": 1.402799455807002e-05, "loss": 0.5702, "step": 15099 }, { "epoch": 0.39, "grad_norm": 1.5579363107681274, "learning_rate": 1.4027234724174594e-05, "loss": 0.4568, "step": 15100 }, { "epoch": 0.39, "grad_norm": 1.8838682174682617, "learning_rate": 1.4026474862525996e-05, "loss": 0.5793, "step": 15101 }, { "epoch": 0.39, "grad_norm": 1.8332579135894775, "learning_rate": 1.4025714973129467e-05, "loss": 0.4186, "step": 15102 }, { "epoch": 0.39, "grad_norm": 1.7497797012329102, "learning_rate": 1.4024955055990237e-05, "loss": 0.4934, "step": 15103 }, { "epoch": 0.39, "grad_norm": 1.3322174549102783, "learning_rate": 1.4024195111113548e-05, "loss": 0.6713, "step": 15104 }, { "epoch": 0.39, "grad_norm": 1.0476789474487305, "learning_rate": 1.4023435138504631e-05, "loss": 0.4691, "step": 15105 }, { "epoch": 0.39, "grad_norm": 4.662284851074219, "learning_rate": 1.4022675138168726e-05, "loss": 0.4178, "step": 15106 }, { "epoch": 0.39, "grad_norm": 1.1331738233566284, "learning_rate": 1.4021915110111074e-05, "loss": 0.4427, "step": 15107 }, { "epoch": 0.39, "grad_norm": 1.589394211769104, "learning_rate": 1.402115505433691e-05, "loss": 0.4804, "step": 15108 }, { "epoch": 0.39, "grad_norm": 1.5102072954177856, "learning_rate": 1.4020394970851472e-05, "loss": 0.4025, "step": 15109 }, { "epoch": 0.39, "grad_norm": 1.0790046453475952, "learning_rate": 1.4019634859659996e-05, "loss": 0.5168, "step": 15110 }, { "epoch": 0.39, "grad_norm": 1.2087594270706177, "learning_rate": 1.4018874720767724e-05, "loss": 0.5948, "step": 15111 }, { "epoch": 0.39, "grad_norm": 1.5388723611831665, "learning_rate": 1.4018114554179888e-05, "loss": 0.5382, "step": 15112 }, { "epoch": 0.39, "grad_norm": 4.702392101287842, "learning_rate": 1.4017354359901734e-05, "loss": 0.6081, "step": 15113 }, { "epoch": 0.39, "grad_norm": 7.734903812408447, "learning_rate": 1.40165941379385e-05, "loss": 0.7327, "step": 15114 }, { "epoch": 0.39, "grad_norm": 1.3822963237762451, "learning_rate": 1.4015833888295418e-05, "loss": 0.474, "step": 15115 }, { "epoch": 0.39, "grad_norm": 1.6912702322006226, "learning_rate": 1.4015073610977737e-05, "loss": 0.5493, "step": 15116 }, { "epoch": 0.39, "grad_norm": 6.28038215637207, "learning_rate": 1.4014313305990687e-05, "loss": 0.799, "step": 15117 }, { "epoch": 0.39, "grad_norm": 2.355067729949951, "learning_rate": 1.401355297333951e-05, "loss": 0.5824, "step": 15118 }, { "epoch": 0.39, "grad_norm": 1.3617329597473145, "learning_rate": 1.4012792613029454e-05, "loss": 0.587, "step": 15119 }, { "epoch": 0.39, "grad_norm": 1.6685699224472046, "learning_rate": 1.4012032225065748e-05, "loss": 0.5495, "step": 15120 }, { "epoch": 0.39, "grad_norm": 1.303682804107666, "learning_rate": 1.401127180945364e-05, "loss": 0.4754, "step": 15121 }, { "epoch": 0.39, "grad_norm": 8.555214881896973, "learning_rate": 1.401051136619836e-05, "loss": 0.7039, "step": 15122 }, { "epoch": 0.39, "grad_norm": 1.2451303005218506, "learning_rate": 1.4009750895305163e-05, "loss": 0.4681, "step": 15123 }, { "epoch": 0.39, "grad_norm": 3.724341630935669, "learning_rate": 1.4008990396779276e-05, "loss": 0.7371, "step": 15124 }, { "epoch": 0.39, "grad_norm": 3.3702447414398193, "learning_rate": 1.4008229870625949e-05, "loss": 0.6789, "step": 15125 }, { "epoch": 0.39, "grad_norm": 2.6353886127471924, "learning_rate": 1.4007469316850419e-05, "loss": 0.5993, "step": 15126 }, { "epoch": 0.39, "grad_norm": 1.7747670412063599, "learning_rate": 1.4006708735457928e-05, "loss": 0.6742, "step": 15127 }, { "epoch": 0.39, "grad_norm": 2.2452392578125, "learning_rate": 1.4005948126453719e-05, "loss": 0.7405, "step": 15128 }, { "epoch": 0.39, "grad_norm": 1.2754029035568237, "learning_rate": 1.400518748984303e-05, "loss": 0.7045, "step": 15129 }, { "epoch": 0.39, "grad_norm": 1.3889875411987305, "learning_rate": 1.4004426825631108e-05, "loss": 0.5885, "step": 15130 }, { "epoch": 0.39, "grad_norm": 1.3010847568511963, "learning_rate": 1.400366613382319e-05, "loss": 0.5962, "step": 15131 }, { "epoch": 0.39, "grad_norm": 1.9525409936904907, "learning_rate": 1.4002905414424522e-05, "loss": 0.6282, "step": 15132 }, { "epoch": 0.39, "grad_norm": 4.522443771362305, "learning_rate": 1.4002144667440344e-05, "loss": 0.6501, "step": 15133 }, { "epoch": 0.39, "grad_norm": 1.5898407697677612, "learning_rate": 1.40013838928759e-05, "loss": 0.6034, "step": 15134 }, { "epoch": 0.39, "grad_norm": 2.0896170139312744, "learning_rate": 1.4000623090736432e-05, "loss": 0.4789, "step": 15135 }, { "epoch": 0.39, "grad_norm": 1.3692106008529663, "learning_rate": 1.3999862261027181e-05, "loss": 0.6625, "step": 15136 }, { "epoch": 0.39, "grad_norm": 3.1875622272491455, "learning_rate": 1.3999101403753393e-05, "loss": 0.7347, "step": 15137 }, { "epoch": 0.39, "grad_norm": 1.1097180843353271, "learning_rate": 1.3998340518920312e-05, "loss": 0.7018, "step": 15138 }, { "epoch": 0.39, "grad_norm": 1.0658129453659058, "learning_rate": 1.399757960653318e-05, "loss": 0.5088, "step": 15139 }, { "epoch": 0.39, "grad_norm": 1.7289834022521973, "learning_rate": 1.399681866659724e-05, "loss": 0.6548, "step": 15140 }, { "epoch": 0.39, "grad_norm": 3.2338802814483643, "learning_rate": 1.3996057699117737e-05, "loss": 0.5648, "step": 15141 }, { "epoch": 0.39, "grad_norm": 1.5302091836929321, "learning_rate": 1.3995296704099914e-05, "loss": 0.5493, "step": 15142 }, { "epoch": 0.39, "grad_norm": 1.7689357995986938, "learning_rate": 1.3994535681549017e-05, "loss": 0.7627, "step": 15143 }, { "epoch": 0.39, "grad_norm": 1.261378288269043, "learning_rate": 1.3993774631470291e-05, "loss": 0.5521, "step": 15144 }, { "epoch": 0.39, "grad_norm": 5.050626277923584, "learning_rate": 1.399301355386898e-05, "loss": 0.463, "step": 15145 }, { "epoch": 0.39, "grad_norm": 4.624445915222168, "learning_rate": 1.3992252448750326e-05, "loss": 0.5118, "step": 15146 }, { "epoch": 0.39, "grad_norm": 1.9480053186416626, "learning_rate": 1.3991491316119576e-05, "loss": 0.3182, "step": 15147 }, { "epoch": 0.39, "grad_norm": 9.127930641174316, "learning_rate": 1.3990730155981975e-05, "loss": 0.6147, "step": 15148 }, { "epoch": 0.39, "grad_norm": 2.213041305541992, "learning_rate": 1.398996896834277e-05, "loss": 0.7247, "step": 15149 }, { "epoch": 0.39, "grad_norm": 1.5038342475891113, "learning_rate": 1.3989207753207208e-05, "loss": 0.6571, "step": 15150 }, { "epoch": 0.39, "grad_norm": 10.276046752929688, "learning_rate": 1.398844651058053e-05, "loss": 0.6171, "step": 15151 }, { "epoch": 0.39, "grad_norm": 1.846408486366272, "learning_rate": 1.3987685240467985e-05, "loss": 0.538, "step": 15152 }, { "epoch": 0.39, "grad_norm": 1.7760621309280396, "learning_rate": 1.398692394287482e-05, "loss": 0.6656, "step": 15153 }, { "epoch": 0.39, "grad_norm": 0.9796525239944458, "learning_rate": 1.3986162617806277e-05, "loss": 0.5523, "step": 15154 }, { "epoch": 0.39, "grad_norm": 3.136653184890747, "learning_rate": 1.3985401265267608e-05, "loss": 0.659, "step": 15155 }, { "epoch": 0.39, "grad_norm": 1.8855555057525635, "learning_rate": 1.3984639885264055e-05, "loss": 0.5877, "step": 15156 }, { "epoch": 0.39, "grad_norm": 2.121649742126465, "learning_rate": 1.398387847780087e-05, "loss": 0.4766, "step": 15157 }, { "epoch": 0.39, "grad_norm": 1.7951687574386597, "learning_rate": 1.3983117042883296e-05, "loss": 0.581, "step": 15158 }, { "epoch": 0.39, "grad_norm": 1.914878487586975, "learning_rate": 1.398235558051658e-05, "loss": 0.552, "step": 15159 }, { "epoch": 0.39, "grad_norm": 4.031264781951904, "learning_rate": 1.3981594090705977e-05, "loss": 0.7114, "step": 15160 }, { "epoch": 0.39, "grad_norm": 2.005831241607666, "learning_rate": 1.3980832573456722e-05, "loss": 0.5891, "step": 15161 }, { "epoch": 0.39, "grad_norm": 1.301867961883545, "learning_rate": 1.3980071028774076e-05, "loss": 0.6253, "step": 15162 }, { "epoch": 0.39, "grad_norm": 1.77849543094635, "learning_rate": 1.3979309456663276e-05, "loss": 0.6086, "step": 15163 }, { "epoch": 0.39, "grad_norm": 1.6269561052322388, "learning_rate": 1.3978547857129579e-05, "loss": 0.5106, "step": 15164 }, { "epoch": 0.39, "grad_norm": 2.8653178215026855, "learning_rate": 1.3977786230178227e-05, "loss": 0.6497, "step": 15165 }, { "epoch": 0.39, "grad_norm": 2.9958109855651855, "learning_rate": 1.3977024575814472e-05, "loss": 0.4976, "step": 15166 }, { "epoch": 0.39, "grad_norm": 1.9126672744750977, "learning_rate": 1.3976262894043564e-05, "loss": 0.5159, "step": 15167 }, { "epoch": 0.39, "grad_norm": 2.2013132572174072, "learning_rate": 1.3975501184870748e-05, "loss": 0.4971, "step": 15168 }, { "epoch": 0.39, "grad_norm": 2.438230276107788, "learning_rate": 1.3974739448301278e-05, "loss": 0.473, "step": 15169 }, { "epoch": 0.39, "grad_norm": 13.252216339111328, "learning_rate": 1.3973977684340399e-05, "loss": 0.5867, "step": 15170 }, { "epoch": 0.39, "grad_norm": 1.8705817461013794, "learning_rate": 1.3973215892993363e-05, "loss": 0.5371, "step": 15171 }, { "epoch": 0.39, "grad_norm": 1.1037317514419556, "learning_rate": 1.3972454074265421e-05, "loss": 0.4884, "step": 15172 }, { "epoch": 0.39, "grad_norm": 1.847749948501587, "learning_rate": 1.3971692228161818e-05, "loss": 0.6127, "step": 15173 }, { "epoch": 0.39, "grad_norm": 1.3122681379318237, "learning_rate": 1.397093035468781e-05, "loss": 0.5922, "step": 15174 }, { "epoch": 0.39, "grad_norm": 1.5217324495315552, "learning_rate": 1.3970168453848642e-05, "loss": 0.5669, "step": 15175 }, { "epoch": 0.39, "grad_norm": 1.7228457927703857, "learning_rate": 1.3969406525649572e-05, "loss": 0.5874, "step": 15176 }, { "epoch": 0.39, "grad_norm": 1.121617078781128, "learning_rate": 1.3968644570095841e-05, "loss": 0.5162, "step": 15177 }, { "epoch": 0.39, "grad_norm": 1.8249568939208984, "learning_rate": 1.3967882587192707e-05, "loss": 0.6883, "step": 15178 }, { "epoch": 0.39, "grad_norm": 2.2100541591644287, "learning_rate": 1.3967120576945422e-05, "loss": 0.5147, "step": 15179 }, { "epoch": 0.39, "grad_norm": 2.1667392253875732, "learning_rate": 1.3966358539359228e-05, "loss": 0.4903, "step": 15180 }, { "epoch": 0.39, "grad_norm": 4.5661420822143555, "learning_rate": 1.396559647443939e-05, "loss": 0.8849, "step": 15181 }, { "epoch": 0.39, "grad_norm": 1.144660472869873, "learning_rate": 1.3964834382191147e-05, "loss": 0.506, "step": 15182 }, { "epoch": 0.39, "grad_norm": 3.8295247554779053, "learning_rate": 1.3964072262619759e-05, "loss": 0.8716, "step": 15183 }, { "epoch": 0.39, "grad_norm": 1.1872835159301758, "learning_rate": 1.3963310115730475e-05, "loss": 0.5309, "step": 15184 }, { "epoch": 0.39, "grad_norm": 3.808804512023926, "learning_rate": 1.3962547941528548e-05, "loss": 0.6883, "step": 15185 }, { "epoch": 0.39, "grad_norm": 1.0569051504135132, "learning_rate": 1.396178574001923e-05, "loss": 0.598, "step": 15186 }, { "epoch": 0.39, "grad_norm": 1.3345911502838135, "learning_rate": 1.3961023511207772e-05, "loss": 0.6716, "step": 15187 }, { "epoch": 0.39, "grad_norm": 2.2649903297424316, "learning_rate": 1.396026125509943e-05, "loss": 0.611, "step": 15188 }, { "epoch": 0.39, "grad_norm": 2.290210723876953, "learning_rate": 1.3959498971699457e-05, "loss": 0.5794, "step": 15189 }, { "epoch": 0.39, "grad_norm": 1.1987556219100952, "learning_rate": 1.3958736661013102e-05, "loss": 0.3972, "step": 15190 }, { "epoch": 0.39, "grad_norm": 1.5180590152740479, "learning_rate": 1.3957974323045621e-05, "loss": 0.4613, "step": 15191 }, { "epoch": 0.39, "grad_norm": 8.275405883789062, "learning_rate": 1.3957211957802267e-05, "loss": 0.816, "step": 15192 }, { "epoch": 0.39, "grad_norm": 10.841330528259277, "learning_rate": 1.3956449565288296e-05, "loss": 0.3862, "step": 15193 }, { "epoch": 0.39, "grad_norm": 3.841200351715088, "learning_rate": 1.3955687145508963e-05, "loss": 0.6342, "step": 15194 }, { "epoch": 0.39, "grad_norm": 1.5911892652511597, "learning_rate": 1.3954924698469516e-05, "loss": 0.6983, "step": 15195 }, { "epoch": 0.39, "grad_norm": 2.1177797317504883, "learning_rate": 1.3954162224175213e-05, "loss": 0.5541, "step": 15196 }, { "epoch": 0.39, "grad_norm": 3.986431837081909, "learning_rate": 1.3953399722631309e-05, "loss": 0.5903, "step": 15197 }, { "epoch": 0.39, "grad_norm": 1.3166583776474, "learning_rate": 1.3952637193843057e-05, "loss": 0.5361, "step": 15198 }, { "epoch": 0.39, "grad_norm": 2.669613838195801, "learning_rate": 1.3951874637815714e-05, "loss": 0.6463, "step": 15199 }, { "epoch": 0.39, "grad_norm": 2.0479061603546143, "learning_rate": 1.3951112054554531e-05, "loss": 0.5902, "step": 15200 }, { "epoch": 0.39, "grad_norm": 5.243528842926025, "learning_rate": 1.395034944406477e-05, "loss": 0.6239, "step": 15201 }, { "epoch": 0.39, "grad_norm": 1.2023595571517944, "learning_rate": 1.394958680635168e-05, "loss": 0.5612, "step": 15202 }, { "epoch": 0.39, "grad_norm": 1.741879940032959, "learning_rate": 1.3948824141420521e-05, "loss": 0.4533, "step": 15203 }, { "epoch": 0.39, "grad_norm": 3.2995638847351074, "learning_rate": 1.3948061449276544e-05, "loss": 0.7666, "step": 15204 }, { "epoch": 0.39, "grad_norm": 6.124861240386963, "learning_rate": 1.394729872992501e-05, "loss": 0.8161, "step": 15205 }, { "epoch": 0.39, "grad_norm": 1.6872934103012085, "learning_rate": 1.3946535983371172e-05, "loss": 0.5826, "step": 15206 }, { "epoch": 0.39, "grad_norm": 1.7657341957092285, "learning_rate": 1.3945773209620287e-05, "loss": 0.3768, "step": 15207 }, { "epoch": 0.39, "grad_norm": 9.356586456298828, "learning_rate": 1.3945010408677615e-05, "loss": 0.9253, "step": 15208 }, { "epoch": 0.39, "grad_norm": 1.4826140403747559, "learning_rate": 1.3944247580548406e-05, "loss": 0.5721, "step": 15209 }, { "epoch": 0.39, "grad_norm": 1.979324460029602, "learning_rate": 1.3943484725237923e-05, "loss": 0.5807, "step": 15210 }, { "epoch": 0.39, "grad_norm": 1.573412537574768, "learning_rate": 1.3942721842751421e-05, "loss": 0.6092, "step": 15211 }, { "epoch": 0.39, "grad_norm": 1.9312448501586914, "learning_rate": 1.3941958933094158e-05, "loss": 0.4931, "step": 15212 }, { "epoch": 0.39, "grad_norm": 1.4552887678146362, "learning_rate": 1.3941195996271388e-05, "loss": 0.4899, "step": 15213 }, { "epoch": 0.39, "grad_norm": 2.660227060317993, "learning_rate": 1.3940433032288372e-05, "loss": 0.5069, "step": 15214 }, { "epoch": 0.39, "grad_norm": 1.918265461921692, "learning_rate": 1.3939670041150368e-05, "loss": 0.6203, "step": 15215 }, { "epoch": 0.39, "grad_norm": 3.761140823364258, "learning_rate": 1.3938907022862633e-05, "loss": 0.8249, "step": 15216 }, { "epoch": 0.39, "grad_norm": 2.835735321044922, "learning_rate": 1.3938143977430424e-05, "loss": 0.566, "step": 15217 }, { "epoch": 0.39, "grad_norm": 1.531447410583496, "learning_rate": 1.3937380904859004e-05, "loss": 0.5395, "step": 15218 }, { "epoch": 0.39, "grad_norm": 2.805220603942871, "learning_rate": 1.3936617805153625e-05, "loss": 0.7094, "step": 15219 }, { "epoch": 0.39, "grad_norm": 6.792532444000244, "learning_rate": 1.3935854678319552e-05, "loss": 0.5774, "step": 15220 }, { "epoch": 0.39, "grad_norm": 2.6733615398406982, "learning_rate": 1.393509152436204e-05, "loss": 0.5827, "step": 15221 }, { "epoch": 0.39, "grad_norm": 4.1058268547058105, "learning_rate": 1.3934328343286348e-05, "loss": 0.5319, "step": 15222 }, { "epoch": 0.39, "grad_norm": 5.861111164093018, "learning_rate": 1.393356513509774e-05, "loss": 0.7123, "step": 15223 }, { "epoch": 0.39, "grad_norm": 1.5931636095046997, "learning_rate": 1.3932801899801471e-05, "loss": 0.614, "step": 15224 }, { "epoch": 0.39, "grad_norm": 8.187414169311523, "learning_rate": 1.3932038637402803e-05, "loss": 0.6176, "step": 15225 }, { "epoch": 0.39, "grad_norm": 6.392857074737549, "learning_rate": 1.3931275347906992e-05, "loss": 0.6741, "step": 15226 }, { "epoch": 0.39, "grad_norm": 4.360495567321777, "learning_rate": 1.3930512031319303e-05, "loss": 0.4986, "step": 15227 }, { "epoch": 0.39, "grad_norm": 7.38381290435791, "learning_rate": 1.3929748687644997e-05, "loss": 0.5004, "step": 15228 }, { "epoch": 0.39, "grad_norm": 1.2540183067321777, "learning_rate": 1.3928985316889327e-05, "loss": 0.5405, "step": 15229 }, { "epoch": 0.39, "grad_norm": 2.0192489624023438, "learning_rate": 1.3928221919057562e-05, "loss": 0.5353, "step": 15230 }, { "epoch": 0.39, "grad_norm": 1.1665011644363403, "learning_rate": 1.392745849415496e-05, "loss": 0.6431, "step": 15231 }, { "epoch": 0.39, "grad_norm": 3.196648120880127, "learning_rate": 1.3926695042186782e-05, "loss": 0.6374, "step": 15232 }, { "epoch": 0.39, "grad_norm": 9.89175033569336, "learning_rate": 1.3925931563158286e-05, "loss": 0.6176, "step": 15233 }, { "epoch": 0.39, "grad_norm": 2.89506459236145, "learning_rate": 1.3925168057074736e-05, "loss": 0.7085, "step": 15234 }, { "epoch": 0.39, "grad_norm": 1.3268245458602905, "learning_rate": 1.3924404523941397e-05, "loss": 0.6407, "step": 15235 }, { "epoch": 0.39, "grad_norm": 5.300000190734863, "learning_rate": 1.3923640963763524e-05, "loss": 0.7628, "step": 15236 }, { "epoch": 0.39, "grad_norm": 2.049121618270874, "learning_rate": 1.3922877376546385e-05, "loss": 0.5585, "step": 15237 }, { "epoch": 0.39, "grad_norm": 1.7631521224975586, "learning_rate": 1.3922113762295237e-05, "loss": 0.7479, "step": 15238 }, { "epoch": 0.39, "grad_norm": 3.439523458480835, "learning_rate": 1.3921350121015347e-05, "loss": 0.5847, "step": 15239 }, { "epoch": 0.39, "grad_norm": 1.4661588668823242, "learning_rate": 1.3920586452711977e-05, "loss": 0.5528, "step": 15240 }, { "epoch": 0.39, "grad_norm": 4.1847429275512695, "learning_rate": 1.3919822757390387e-05, "loss": 0.63, "step": 15241 }, { "epoch": 0.39, "grad_norm": 1.4050084352493286, "learning_rate": 1.3919059035055842e-05, "loss": 0.5261, "step": 15242 }, { "epoch": 0.39, "grad_norm": 1.9284052848815918, "learning_rate": 1.3918295285713601e-05, "loss": 0.4984, "step": 15243 }, { "epoch": 0.39, "grad_norm": 2.0916688442230225, "learning_rate": 1.3917531509368936e-05, "loss": 0.6513, "step": 15244 }, { "epoch": 0.39, "grad_norm": 1.7718755006790161, "learning_rate": 1.3916767706027102e-05, "loss": 0.655, "step": 15245 }, { "epoch": 0.39, "grad_norm": 3.145393133163452, "learning_rate": 1.3916003875693363e-05, "loss": 0.605, "step": 15246 }, { "epoch": 0.39, "grad_norm": 1.5036131143569946, "learning_rate": 1.3915240018372991e-05, "loss": 0.4802, "step": 15247 }, { "epoch": 0.39, "grad_norm": 2.8125522136688232, "learning_rate": 1.391447613407124e-05, "loss": 0.4705, "step": 15248 }, { "epoch": 0.39, "grad_norm": 1.2534940242767334, "learning_rate": 1.3913712222793384e-05, "loss": 0.5009, "step": 15249 }, { "epoch": 0.39, "grad_norm": 1.3370274305343628, "learning_rate": 1.3912948284544676e-05, "loss": 0.602, "step": 15250 }, { "epoch": 0.39, "grad_norm": 2.5744810104370117, "learning_rate": 1.3912184319330391e-05, "loss": 0.5408, "step": 15251 }, { "epoch": 0.39, "grad_norm": 2.041125535964966, "learning_rate": 1.3911420327155786e-05, "loss": 0.5788, "step": 15252 }, { "epoch": 0.39, "grad_norm": 5.865996360778809, "learning_rate": 1.3910656308026133e-05, "loss": 0.5518, "step": 15253 }, { "epoch": 0.39, "grad_norm": 1.0147762298583984, "learning_rate": 1.3909892261946694e-05, "loss": 0.5365, "step": 15254 }, { "epoch": 0.39, "grad_norm": 1.32412850856781, "learning_rate": 1.3909128188922732e-05, "loss": 0.5387, "step": 15255 }, { "epoch": 0.39, "grad_norm": 2.1254003047943115, "learning_rate": 1.3908364088959515e-05, "loss": 0.631, "step": 15256 }, { "epoch": 0.39, "grad_norm": 5.454556941986084, "learning_rate": 1.3907599962062308e-05, "loss": 0.6034, "step": 15257 }, { "epoch": 0.39, "grad_norm": 2.8201498985290527, "learning_rate": 1.3906835808236376e-05, "loss": 0.5483, "step": 15258 }, { "epoch": 0.39, "grad_norm": 2.4978888034820557, "learning_rate": 1.3906071627486987e-05, "loss": 0.6158, "step": 15259 }, { "epoch": 0.39, "grad_norm": 3.014086961746216, "learning_rate": 1.3905307419819407e-05, "loss": 0.5993, "step": 15260 }, { "epoch": 0.39, "grad_norm": 2.1698532104492188, "learning_rate": 1.3904543185238904e-05, "loss": 0.7689, "step": 15261 }, { "epoch": 0.39, "grad_norm": 1.4819436073303223, "learning_rate": 1.390377892375074e-05, "loss": 0.6727, "step": 15262 }, { "epoch": 0.39, "grad_norm": 2.2122480869293213, "learning_rate": 1.3903014635360182e-05, "loss": 0.6374, "step": 15263 }, { "epoch": 0.39, "grad_norm": 2.2889058589935303, "learning_rate": 1.3902250320072501e-05, "loss": 0.6551, "step": 15264 }, { "epoch": 0.39, "grad_norm": 4.677048206329346, "learning_rate": 1.3901485977892964e-05, "loss": 0.6284, "step": 15265 }, { "epoch": 0.39, "grad_norm": 1.4867377281188965, "learning_rate": 1.3900721608826835e-05, "loss": 0.5429, "step": 15266 }, { "epoch": 0.39, "grad_norm": 2.967585563659668, "learning_rate": 1.3899957212879383e-05, "loss": 0.5617, "step": 15267 }, { "epoch": 0.39, "grad_norm": 1.1609647274017334, "learning_rate": 1.389919279005588e-05, "loss": 0.4658, "step": 15268 }, { "epoch": 0.39, "grad_norm": 7.602931022644043, "learning_rate": 1.3898428340361584e-05, "loss": 0.6682, "step": 15269 }, { "epoch": 0.39, "grad_norm": 2.112884998321533, "learning_rate": 1.3897663863801772e-05, "loss": 0.6641, "step": 15270 }, { "epoch": 0.39, "grad_norm": 1.9903099536895752, "learning_rate": 1.3896899360381712e-05, "loss": 0.6479, "step": 15271 }, { "epoch": 0.39, "grad_norm": 2.021345615386963, "learning_rate": 1.3896134830106664e-05, "loss": 0.6119, "step": 15272 }, { "epoch": 0.39, "grad_norm": 1.3801085948944092, "learning_rate": 1.3895370272981909e-05, "loss": 0.4132, "step": 15273 }, { "epoch": 0.39, "grad_norm": 2.973754405975342, "learning_rate": 1.3894605689012706e-05, "loss": 0.4927, "step": 15274 }, { "epoch": 0.39, "grad_norm": 1.6046600341796875, "learning_rate": 1.3893841078204327e-05, "loss": 0.4661, "step": 15275 }, { "epoch": 0.39, "grad_norm": 2.5161616802215576, "learning_rate": 1.3893076440562043e-05, "loss": 0.4873, "step": 15276 }, { "epoch": 0.39, "grad_norm": 1.2179133892059326, "learning_rate": 1.3892311776091122e-05, "loss": 0.5084, "step": 15277 }, { "epoch": 0.39, "grad_norm": 1.3526272773742676, "learning_rate": 1.3891547084796832e-05, "loss": 0.4984, "step": 15278 }, { "epoch": 0.39, "grad_norm": 1.1640119552612305, "learning_rate": 1.3890782366684446e-05, "loss": 0.7133, "step": 15279 }, { "epoch": 0.39, "grad_norm": 1.5310169458389282, "learning_rate": 1.3890017621759231e-05, "loss": 0.708, "step": 15280 }, { "epoch": 0.39, "grad_norm": 3.4791829586029053, "learning_rate": 1.388925285002646e-05, "loss": 0.5404, "step": 15281 }, { "epoch": 0.39, "grad_norm": 1.643827199935913, "learning_rate": 1.38884880514914e-05, "loss": 0.5477, "step": 15282 }, { "epoch": 0.39, "grad_norm": 1.2314444780349731, "learning_rate": 1.3887723226159327e-05, "loss": 0.5822, "step": 15283 }, { "epoch": 0.39, "grad_norm": 1.404565453529358, "learning_rate": 1.3886958374035504e-05, "loss": 0.5233, "step": 15284 }, { "epoch": 0.39, "grad_norm": 1.23952317237854, "learning_rate": 1.3886193495125208e-05, "loss": 0.5322, "step": 15285 }, { "epoch": 0.39, "grad_norm": 1.6268364191055298, "learning_rate": 1.3885428589433707e-05, "loss": 0.6529, "step": 15286 }, { "epoch": 0.39, "grad_norm": 1.2462459802627563, "learning_rate": 1.3884663656966274e-05, "loss": 0.5479, "step": 15287 }, { "epoch": 0.39, "grad_norm": 3.920146942138672, "learning_rate": 1.388389869772818e-05, "loss": 0.5576, "step": 15288 }, { "epoch": 0.39, "grad_norm": 1.6085069179534912, "learning_rate": 1.3883133711724695e-05, "loss": 0.5675, "step": 15289 }, { "epoch": 0.39, "grad_norm": 3.5154244899749756, "learning_rate": 1.3882368698961094e-05, "loss": 0.6131, "step": 15290 }, { "epoch": 0.39, "grad_norm": 3.14924955368042, "learning_rate": 1.3881603659442643e-05, "loss": 0.6852, "step": 15291 }, { "epoch": 0.39, "grad_norm": 1.6562854051589966, "learning_rate": 1.3880838593174622e-05, "loss": 0.6846, "step": 15292 }, { "epoch": 0.39, "grad_norm": 1.7119849920272827, "learning_rate": 1.3880073500162299e-05, "loss": 0.661, "step": 15293 }, { "epoch": 0.39, "grad_norm": 2.4297752380371094, "learning_rate": 1.3879308380410948e-05, "loss": 0.8067, "step": 15294 }, { "epoch": 0.39, "grad_norm": 1.273003101348877, "learning_rate": 1.3878543233925838e-05, "loss": 0.6175, "step": 15295 }, { "epoch": 0.39, "grad_norm": 4.640974521636963, "learning_rate": 1.3877778060712246e-05, "loss": 0.6672, "step": 15296 }, { "epoch": 0.39, "grad_norm": 4.489790916442871, "learning_rate": 1.3877012860775448e-05, "loss": 0.5399, "step": 15297 }, { "epoch": 0.39, "grad_norm": 1.5801701545715332, "learning_rate": 1.3876247634120707e-05, "loss": 0.6289, "step": 15298 }, { "epoch": 0.39, "grad_norm": 3.89886474609375, "learning_rate": 1.3875482380753304e-05, "loss": 0.5672, "step": 15299 }, { "epoch": 0.39, "grad_norm": 1.4059593677520752, "learning_rate": 1.3874717100678515e-05, "loss": 0.4762, "step": 15300 }, { "epoch": 0.39, "grad_norm": 0.9922407269477844, "learning_rate": 1.3873951793901605e-05, "loss": 0.3293, "step": 15301 }, { "epoch": 0.39, "grad_norm": 2.2805817127227783, "learning_rate": 1.3873186460427857e-05, "loss": 0.7143, "step": 15302 }, { "epoch": 0.39, "grad_norm": 1.4627703428268433, "learning_rate": 1.3872421100262542e-05, "loss": 0.4758, "step": 15303 }, { "epoch": 0.39, "grad_norm": 2.8633878231048584, "learning_rate": 1.3871655713410928e-05, "loss": 0.6027, "step": 15304 }, { "epoch": 0.39, "grad_norm": 1.638963222503662, "learning_rate": 1.38708902998783e-05, "loss": 0.5693, "step": 15305 }, { "epoch": 0.39, "grad_norm": 4.143208980560303, "learning_rate": 1.3870124859669927e-05, "loss": 0.5976, "step": 15306 }, { "epoch": 0.39, "grad_norm": 1.7031406164169312, "learning_rate": 1.3869359392791081e-05, "loss": 0.4893, "step": 15307 }, { "epoch": 0.39, "grad_norm": 2.0292887687683105, "learning_rate": 1.3868593899247047e-05, "loss": 0.5382, "step": 15308 }, { "epoch": 0.39, "grad_norm": 2.5798697471618652, "learning_rate": 1.3867828379043091e-05, "loss": 0.6937, "step": 15309 }, { "epoch": 0.39, "grad_norm": 1.393897294998169, "learning_rate": 1.3867062832184494e-05, "loss": 0.5188, "step": 15310 }, { "epoch": 0.39, "grad_norm": 8.562321662902832, "learning_rate": 1.3866297258676528e-05, "loss": 0.62, "step": 15311 }, { "epoch": 0.39, "grad_norm": 4.880002975463867, "learning_rate": 1.3865531658524472e-05, "loss": 0.4364, "step": 15312 }, { "epoch": 0.39, "grad_norm": 1.3570852279663086, "learning_rate": 1.3864766031733598e-05, "loss": 0.5142, "step": 15313 }, { "epoch": 0.39, "grad_norm": 1.5446337461471558, "learning_rate": 1.3864000378309183e-05, "loss": 0.4821, "step": 15314 }, { "epoch": 0.39, "grad_norm": 3.405254602432251, "learning_rate": 1.3863234698256509e-05, "loss": 0.4107, "step": 15315 }, { "epoch": 0.39, "grad_norm": 1.6717575788497925, "learning_rate": 1.3862468991580848e-05, "loss": 0.452, "step": 15316 }, { "epoch": 0.39, "grad_norm": 1.7358672618865967, "learning_rate": 1.3861703258287477e-05, "loss": 0.5879, "step": 15317 }, { "epoch": 0.39, "grad_norm": 2.170966863632202, "learning_rate": 1.3860937498381673e-05, "loss": 0.6979, "step": 15318 }, { "epoch": 0.39, "grad_norm": 2.113518476486206, "learning_rate": 1.386017171186871e-05, "loss": 0.5338, "step": 15319 }, { "epoch": 0.39, "grad_norm": 1.695681095123291, "learning_rate": 1.3859405898753875e-05, "loss": 0.4161, "step": 15320 }, { "epoch": 0.39, "grad_norm": 15.43096923828125, "learning_rate": 1.3858640059042434e-05, "loss": 0.6663, "step": 15321 }, { "epoch": 0.39, "grad_norm": 1.7248446941375732, "learning_rate": 1.3857874192739673e-05, "loss": 0.6178, "step": 15322 }, { "epoch": 0.39, "grad_norm": 2.5573413372039795, "learning_rate": 1.3857108299850866e-05, "loss": 0.6384, "step": 15323 }, { "epoch": 0.39, "grad_norm": 1.544743537902832, "learning_rate": 1.3856342380381293e-05, "loss": 0.4734, "step": 15324 }, { "epoch": 0.39, "grad_norm": 1.1123075485229492, "learning_rate": 1.3855576434336228e-05, "loss": 0.5815, "step": 15325 }, { "epoch": 0.39, "grad_norm": 3.129929780960083, "learning_rate": 1.3854810461720955e-05, "loss": 0.5756, "step": 15326 }, { "epoch": 0.39, "grad_norm": 8.36301040649414, "learning_rate": 1.385404446254075e-05, "loss": 0.7027, "step": 15327 }, { "epoch": 0.39, "grad_norm": 14.247393608093262, "learning_rate": 1.3853278436800893e-05, "loss": 0.5839, "step": 15328 }, { "epoch": 0.39, "grad_norm": 1.501931071281433, "learning_rate": 1.385251238450666e-05, "loss": 0.6766, "step": 15329 }, { "epoch": 0.39, "grad_norm": 3.9907820224761963, "learning_rate": 1.3851746305663332e-05, "loss": 0.6808, "step": 15330 }, { "epoch": 0.39, "grad_norm": 3.12174129486084, "learning_rate": 1.3850980200276187e-05, "loss": 0.7374, "step": 15331 }, { "epoch": 0.39, "grad_norm": 1.85196852684021, "learning_rate": 1.3850214068350509e-05, "loss": 0.4039, "step": 15332 }, { "epoch": 0.39, "grad_norm": 1.8202404975891113, "learning_rate": 1.3849447909891574e-05, "loss": 0.6742, "step": 15333 }, { "epoch": 0.39, "grad_norm": 5.898043155670166, "learning_rate": 1.384868172490466e-05, "loss": 0.6791, "step": 15334 }, { "epoch": 0.39, "grad_norm": 1.6340067386627197, "learning_rate": 1.3847915513395051e-05, "loss": 0.5574, "step": 15335 }, { "epoch": 0.39, "grad_norm": 1.466979742050171, "learning_rate": 1.3847149275368028e-05, "loss": 0.4668, "step": 15336 }, { "epoch": 0.39, "grad_norm": 2.0264811515808105, "learning_rate": 1.3846383010828866e-05, "loss": 0.653, "step": 15337 }, { "epoch": 0.39, "grad_norm": 6.3295159339904785, "learning_rate": 1.3845616719782848e-05, "loss": 0.5313, "step": 15338 }, { "epoch": 0.39, "grad_norm": 3.1477973461151123, "learning_rate": 1.3844850402235262e-05, "loss": 0.6421, "step": 15339 }, { "epoch": 0.39, "grad_norm": 1.882934331893921, "learning_rate": 1.3844084058191377e-05, "loss": 0.5882, "step": 15340 }, { "epoch": 0.39, "grad_norm": 2.344290018081665, "learning_rate": 1.3843317687656481e-05, "loss": 0.7311, "step": 15341 }, { "epoch": 0.39, "grad_norm": 1.5504812002182007, "learning_rate": 1.3842551290635854e-05, "loss": 0.4934, "step": 15342 }, { "epoch": 0.39, "grad_norm": 2.9699950218200684, "learning_rate": 1.3841784867134775e-05, "loss": 0.6252, "step": 15343 }, { "epoch": 0.39, "grad_norm": 2.07372784614563, "learning_rate": 1.3841018417158534e-05, "loss": 0.708, "step": 15344 }, { "epoch": 0.39, "grad_norm": 1.510957956314087, "learning_rate": 1.3840251940712404e-05, "loss": 0.4478, "step": 15345 }, { "epoch": 0.39, "grad_norm": 2.8396432399749756, "learning_rate": 1.383948543780167e-05, "loss": 0.519, "step": 15346 }, { "epoch": 0.39, "grad_norm": 3.3086442947387695, "learning_rate": 1.3838718908431617e-05, "loss": 0.7306, "step": 15347 }, { "epoch": 0.39, "grad_norm": 1.5639592409133911, "learning_rate": 1.383795235260752e-05, "loss": 0.5403, "step": 15348 }, { "epoch": 0.39, "grad_norm": 2.3470919132232666, "learning_rate": 1.383718577033467e-05, "loss": 0.5122, "step": 15349 }, { "epoch": 0.39, "grad_norm": 1.2714390754699707, "learning_rate": 1.3836419161618346e-05, "loss": 0.5734, "step": 15350 }, { "epoch": 0.39, "grad_norm": 3.1740095615386963, "learning_rate": 1.3835652526463834e-05, "loss": 0.4332, "step": 15351 }, { "epoch": 0.39, "grad_norm": 1.2652937173843384, "learning_rate": 1.383488586487641e-05, "loss": 0.6064, "step": 15352 }, { "epoch": 0.39, "grad_norm": 1.3153088092803955, "learning_rate": 1.3834119176861368e-05, "loss": 0.5533, "step": 15353 }, { "epoch": 0.39, "grad_norm": 2.5452449321746826, "learning_rate": 1.3833352462423979e-05, "loss": 0.5956, "step": 15354 }, { "epoch": 0.39, "grad_norm": 1.5789709091186523, "learning_rate": 1.3832585721569535e-05, "loss": 0.6528, "step": 15355 }, { "epoch": 0.39, "grad_norm": 1.8972831964492798, "learning_rate": 1.383181895430332e-05, "loss": 0.5955, "step": 15356 }, { "epoch": 0.39, "grad_norm": 1.7154093980789185, "learning_rate": 1.3831052160630614e-05, "loss": 0.663, "step": 15357 }, { "epoch": 0.39, "grad_norm": 1.7095640897750854, "learning_rate": 1.3830285340556706e-05, "loss": 0.5939, "step": 15358 }, { "epoch": 0.39, "grad_norm": 3.9135076999664307, "learning_rate": 1.3829518494086876e-05, "loss": 0.6149, "step": 15359 }, { "epoch": 0.39, "grad_norm": 1.2863348722457886, "learning_rate": 1.3828751621226411e-05, "loss": 0.6045, "step": 15360 }, { "epoch": 0.39, "grad_norm": 2.529458522796631, "learning_rate": 1.3827984721980596e-05, "loss": 0.6748, "step": 15361 }, { "epoch": 0.39, "grad_norm": 1.107298493385315, "learning_rate": 1.3827217796354712e-05, "loss": 0.3207, "step": 15362 }, { "epoch": 0.39, "grad_norm": 1.4759867191314697, "learning_rate": 1.3826450844354052e-05, "loss": 0.509, "step": 15363 }, { "epoch": 0.39, "grad_norm": 2.0767195224761963, "learning_rate": 1.3825683865983894e-05, "loss": 0.4919, "step": 15364 }, { "epoch": 0.39, "grad_norm": 4.728708267211914, "learning_rate": 1.3824916861249527e-05, "loss": 0.5411, "step": 15365 }, { "epoch": 0.39, "grad_norm": 5.244126796722412, "learning_rate": 1.3824149830156235e-05, "loss": 0.6578, "step": 15366 }, { "epoch": 0.39, "grad_norm": 0.9627252817153931, "learning_rate": 1.3823382772709306e-05, "loss": 0.3596, "step": 15367 }, { "epoch": 0.39, "grad_norm": 1.464294672012329, "learning_rate": 1.3822615688914025e-05, "loss": 0.5396, "step": 15368 }, { "epoch": 0.39, "grad_norm": 1.355384349822998, "learning_rate": 1.3821848578775678e-05, "loss": 0.5992, "step": 15369 }, { "epoch": 0.39, "grad_norm": 1.080865740776062, "learning_rate": 1.3821081442299553e-05, "loss": 0.5812, "step": 15370 }, { "epoch": 0.39, "grad_norm": 1.6294915676116943, "learning_rate": 1.3820314279490934e-05, "loss": 0.7328, "step": 15371 }, { "epoch": 0.39, "grad_norm": 1.6098288297653198, "learning_rate": 1.3819547090355107e-05, "loss": 0.482, "step": 15372 }, { "epoch": 0.39, "grad_norm": 3.7975783348083496, "learning_rate": 1.3818779874897366e-05, "loss": 0.6409, "step": 15373 }, { "epoch": 0.39, "grad_norm": 0.914949893951416, "learning_rate": 1.3818012633122991e-05, "loss": 0.4654, "step": 15374 }, { "epoch": 0.39, "grad_norm": 1.7259941101074219, "learning_rate": 1.381724536503727e-05, "loss": 0.5793, "step": 15375 }, { "epoch": 0.39, "grad_norm": 1.558815360069275, "learning_rate": 1.3816478070645494e-05, "loss": 0.6915, "step": 15376 }, { "epoch": 0.39, "grad_norm": 2.9548215866088867, "learning_rate": 1.381571074995295e-05, "loss": 0.5964, "step": 15377 }, { "epoch": 0.39, "grad_norm": 2.208798408508301, "learning_rate": 1.3814943402964921e-05, "loss": 0.6683, "step": 15378 }, { "epoch": 0.39, "grad_norm": 1.5535008907318115, "learning_rate": 1.38141760296867e-05, "loss": 0.5313, "step": 15379 }, { "epoch": 0.39, "grad_norm": 1.5074975490570068, "learning_rate": 1.3813408630123577e-05, "loss": 0.6255, "step": 15380 }, { "epoch": 0.39, "grad_norm": 1.987703561782837, "learning_rate": 1.3812641204280834e-05, "loss": 0.8593, "step": 15381 }, { "epoch": 0.39, "grad_norm": 2.0705020427703857, "learning_rate": 1.381187375216377e-05, "loss": 0.7611, "step": 15382 }, { "epoch": 0.39, "grad_norm": 1.2390007972717285, "learning_rate": 1.381110627377766e-05, "loss": 0.4952, "step": 15383 }, { "epoch": 0.39, "grad_norm": 1.6648939847946167, "learning_rate": 1.38103387691278e-05, "loss": 0.5654, "step": 15384 }, { "epoch": 0.39, "grad_norm": 3.485429048538208, "learning_rate": 1.3809571238219481e-05, "loss": 0.6246, "step": 15385 }, { "epoch": 0.39, "grad_norm": 7.713693618774414, "learning_rate": 1.380880368105799e-05, "loss": 0.725, "step": 15386 }, { "epoch": 0.39, "grad_norm": 2.1114261150360107, "learning_rate": 1.3808036097648619e-05, "loss": 0.7321, "step": 15387 }, { "epoch": 0.39, "grad_norm": 8.351716995239258, "learning_rate": 1.3807268487996652e-05, "loss": 0.8401, "step": 15388 }, { "epoch": 0.39, "grad_norm": 1.3064144849777222, "learning_rate": 1.3806500852107385e-05, "loss": 0.4643, "step": 15389 }, { "epoch": 0.39, "grad_norm": 1.74192214012146, "learning_rate": 1.3805733189986106e-05, "loss": 0.5809, "step": 15390 }, { "epoch": 0.39, "grad_norm": 2.2286908626556396, "learning_rate": 1.3804965501638104e-05, "loss": 0.4236, "step": 15391 }, { "epoch": 0.39, "grad_norm": 2.9032504558563232, "learning_rate": 1.380419778706867e-05, "loss": 0.577, "step": 15392 }, { "epoch": 0.39, "grad_norm": 1.7377928495407104, "learning_rate": 1.3803430046283094e-05, "loss": 0.6466, "step": 15393 }, { "epoch": 0.39, "grad_norm": 2.3114094734191895, "learning_rate": 1.380266227928667e-05, "loss": 0.6094, "step": 15394 }, { "epoch": 0.39, "grad_norm": 4.183095455169678, "learning_rate": 1.3801894486084684e-05, "loss": 0.7649, "step": 15395 }, { "epoch": 0.39, "grad_norm": 1.6115078926086426, "learning_rate": 1.3801126666682431e-05, "loss": 0.5353, "step": 15396 }, { "epoch": 0.39, "grad_norm": 1.9746278524398804, "learning_rate": 1.3800358821085202e-05, "loss": 0.5286, "step": 15397 }, { "epoch": 0.39, "grad_norm": 1.9010024070739746, "learning_rate": 1.3799590949298288e-05, "loss": 0.6852, "step": 15398 }, { "epoch": 0.39, "grad_norm": 2.2975704669952393, "learning_rate": 1.379882305132698e-05, "loss": 0.5864, "step": 15399 }, { "epoch": 0.39, "grad_norm": 2.8565948009490967, "learning_rate": 1.3798055127176569e-05, "loss": 0.5135, "step": 15400 }, { "epoch": 0.39, "grad_norm": 3.2574615478515625, "learning_rate": 1.3797287176852348e-05, "loss": 0.5914, "step": 15401 }, { "epoch": 0.39, "grad_norm": 2.026301622390747, "learning_rate": 1.379651920035961e-05, "loss": 0.5937, "step": 15402 }, { "epoch": 0.39, "grad_norm": 3.476242780685425, "learning_rate": 1.3795751197703646e-05, "loss": 0.6867, "step": 15403 }, { "epoch": 0.39, "grad_norm": 1.4006296396255493, "learning_rate": 1.3794983168889751e-05, "loss": 0.6313, "step": 15404 }, { "epoch": 0.39, "grad_norm": 6.385976791381836, "learning_rate": 1.3794215113923218e-05, "loss": 0.5942, "step": 15405 }, { "epoch": 0.39, "grad_norm": 2.0406084060668945, "learning_rate": 1.3793447032809334e-05, "loss": 0.6242, "step": 15406 }, { "epoch": 0.39, "grad_norm": 4.4571027755737305, "learning_rate": 1.3792678925553397e-05, "loss": 0.6837, "step": 15407 }, { "epoch": 0.39, "grad_norm": 1.6578549146652222, "learning_rate": 1.3791910792160701e-05, "loss": 0.6339, "step": 15408 }, { "epoch": 0.39, "grad_norm": 1.3699456453323364, "learning_rate": 1.379114263263654e-05, "loss": 0.5989, "step": 15409 }, { "epoch": 0.39, "grad_norm": 2.309267282485962, "learning_rate": 1.3790374446986203e-05, "loss": 0.5796, "step": 15410 }, { "epoch": 0.39, "grad_norm": 6.47341775894165, "learning_rate": 1.3789606235214987e-05, "loss": 0.5769, "step": 15411 }, { "epoch": 0.4, "grad_norm": 2.1580312252044678, "learning_rate": 1.3788837997328187e-05, "loss": 0.6939, "step": 15412 }, { "epoch": 0.4, "grad_norm": 3.3057892322540283, "learning_rate": 1.3788069733331093e-05, "loss": 0.7196, "step": 15413 }, { "epoch": 0.4, "grad_norm": 1.6356654167175293, "learning_rate": 1.3787301443229006e-05, "loss": 0.6113, "step": 15414 }, { "epoch": 0.4, "grad_norm": 2.5284078121185303, "learning_rate": 1.3786533127027215e-05, "loss": 0.5915, "step": 15415 }, { "epoch": 0.4, "grad_norm": 3.0113627910614014, "learning_rate": 1.3785764784731017e-05, "loss": 0.6347, "step": 15416 }, { "epoch": 0.4, "grad_norm": 2.2484474182128906, "learning_rate": 1.3784996416345706e-05, "loss": 0.7249, "step": 15417 }, { "epoch": 0.4, "grad_norm": 3.6010019779205322, "learning_rate": 1.378422802187658e-05, "loss": 0.5879, "step": 15418 }, { "epoch": 0.4, "grad_norm": 6.331574440002441, "learning_rate": 1.3783459601328927e-05, "loss": 0.5854, "step": 15419 }, { "epoch": 0.4, "grad_norm": 2.2291066646575928, "learning_rate": 1.3782691154708052e-05, "loss": 0.6927, "step": 15420 }, { "epoch": 0.4, "grad_norm": 6.723401069641113, "learning_rate": 1.3781922682019246e-05, "loss": 0.6391, "step": 15421 }, { "epoch": 0.4, "grad_norm": 1.3566367626190186, "learning_rate": 1.3781154183267803e-05, "loss": 0.5737, "step": 15422 }, { "epoch": 0.4, "grad_norm": 1.4875463247299194, "learning_rate": 1.378038565845902e-05, "loss": 0.4689, "step": 15423 }, { "epoch": 0.4, "grad_norm": 2.753479480743408, "learning_rate": 1.3779617107598198e-05, "loss": 0.4484, "step": 15424 }, { "epoch": 0.4, "grad_norm": 7.601710319519043, "learning_rate": 1.3778848530690625e-05, "loss": 0.5995, "step": 15425 }, { "epoch": 0.4, "grad_norm": 4.770264148712158, "learning_rate": 1.3778079927741606e-05, "loss": 0.6009, "step": 15426 }, { "epoch": 0.4, "grad_norm": 2.082383632659912, "learning_rate": 1.3777311298756431e-05, "loss": 0.5668, "step": 15427 }, { "epoch": 0.4, "grad_norm": 2.8922417163848877, "learning_rate": 1.37765426437404e-05, "loss": 0.5711, "step": 15428 }, { "epoch": 0.4, "grad_norm": 1.9697672128677368, "learning_rate": 1.377577396269881e-05, "loss": 0.6179, "step": 15429 }, { "epoch": 0.4, "grad_norm": 2.2524821758270264, "learning_rate": 1.3775005255636957e-05, "loss": 0.5571, "step": 15430 }, { "epoch": 0.4, "grad_norm": 1.1701616048812866, "learning_rate": 1.3774236522560144e-05, "loss": 0.5238, "step": 15431 }, { "epoch": 0.4, "grad_norm": 2.2574081420898438, "learning_rate": 1.377346776347366e-05, "loss": 0.608, "step": 15432 }, { "epoch": 0.4, "grad_norm": 1.8570681810379028, "learning_rate": 1.3772698978382808e-05, "loss": 0.638, "step": 15433 }, { "epoch": 0.4, "grad_norm": 2.2378218173980713, "learning_rate": 1.3771930167292885e-05, "loss": 0.5695, "step": 15434 }, { "epoch": 0.4, "grad_norm": 2.1268773078918457, "learning_rate": 1.3771161330209185e-05, "loss": 0.6877, "step": 15435 }, { "epoch": 0.4, "grad_norm": 3.3070082664489746, "learning_rate": 1.3770392467137016e-05, "loss": 0.5729, "step": 15436 }, { "epoch": 0.4, "grad_norm": 2.618093490600586, "learning_rate": 1.376962357808167e-05, "loss": 0.6114, "step": 15437 }, { "epoch": 0.4, "grad_norm": 1.0272109508514404, "learning_rate": 1.3768854663048445e-05, "loss": 0.4848, "step": 15438 }, { "epoch": 0.4, "grad_norm": 4.150496482849121, "learning_rate": 1.3768085722042641e-05, "loss": 0.6076, "step": 15439 }, { "epoch": 0.4, "grad_norm": 2.7888782024383545, "learning_rate": 1.3767316755069557e-05, "loss": 0.6358, "step": 15440 }, { "epoch": 0.4, "grad_norm": 2.9774255752563477, "learning_rate": 1.3766547762134497e-05, "loss": 0.7314, "step": 15441 }, { "epoch": 0.4, "grad_norm": 1.3688626289367676, "learning_rate": 1.3765778743242752e-05, "loss": 0.4657, "step": 15442 }, { "epoch": 0.4, "grad_norm": 3.087477922439575, "learning_rate": 1.376500969839963e-05, "loss": 0.5165, "step": 15443 }, { "epoch": 0.4, "grad_norm": 1.2709529399871826, "learning_rate": 1.3764240627610423e-05, "loss": 0.5415, "step": 15444 }, { "epoch": 0.4, "grad_norm": 1.635675072669983, "learning_rate": 1.3763471530880436e-05, "loss": 0.6354, "step": 15445 }, { "epoch": 0.4, "grad_norm": 2.0753281116485596, "learning_rate": 1.3762702408214966e-05, "loss": 0.4535, "step": 15446 }, { "epoch": 0.4, "grad_norm": 1.4194551706314087, "learning_rate": 1.3761933259619315e-05, "loss": 0.6782, "step": 15447 }, { "epoch": 0.4, "grad_norm": 2.1382369995117188, "learning_rate": 1.3761164085098786e-05, "loss": 0.4896, "step": 15448 }, { "epoch": 0.4, "grad_norm": 1.3593140840530396, "learning_rate": 1.3760394884658674e-05, "loss": 0.5722, "step": 15449 }, { "epoch": 0.4, "grad_norm": 6.293449878692627, "learning_rate": 1.3759625658304287e-05, "loss": 0.7332, "step": 15450 }, { "epoch": 0.4, "grad_norm": 1.3429865837097168, "learning_rate": 1.375885640604092e-05, "loss": 0.5071, "step": 15451 }, { "epoch": 0.4, "grad_norm": 1.3991212844848633, "learning_rate": 1.3758087127873873e-05, "loss": 0.5703, "step": 15452 }, { "epoch": 0.4, "grad_norm": 1.282594919204712, "learning_rate": 1.3757317823808455e-05, "loss": 0.6476, "step": 15453 }, { "epoch": 0.4, "grad_norm": 1.4182465076446533, "learning_rate": 1.3756548493849962e-05, "loss": 0.4951, "step": 15454 }, { "epoch": 0.4, "grad_norm": 2.3201136589050293, "learning_rate": 1.3755779138003696e-05, "loss": 0.7006, "step": 15455 }, { "epoch": 0.4, "grad_norm": 3.032426357269287, "learning_rate": 1.375500975627496e-05, "loss": 0.4927, "step": 15456 }, { "epoch": 0.4, "grad_norm": 3.6500868797302246, "learning_rate": 1.3754240348669055e-05, "loss": 0.682, "step": 15457 }, { "epoch": 0.4, "grad_norm": 4.060730934143066, "learning_rate": 1.3753470915191286e-05, "loss": 0.5864, "step": 15458 }, { "epoch": 0.4, "grad_norm": 8.9636869430542, "learning_rate": 1.3752701455846951e-05, "loss": 0.6155, "step": 15459 }, { "epoch": 0.4, "grad_norm": 2.5290846824645996, "learning_rate": 1.3751931970641359e-05, "loss": 0.5728, "step": 15460 }, { "epoch": 0.4, "grad_norm": 2.397291898727417, "learning_rate": 1.3751162459579805e-05, "loss": 0.6031, "step": 15461 }, { "epoch": 0.4, "grad_norm": 2.9952731132507324, "learning_rate": 1.3750392922667602e-05, "loss": 0.5766, "step": 15462 }, { "epoch": 0.4, "grad_norm": 3.156604290008545, "learning_rate": 1.374962335991004e-05, "loss": 0.5446, "step": 15463 }, { "epoch": 0.4, "grad_norm": 2.9486045837402344, "learning_rate": 1.3748853771312433e-05, "loss": 0.7227, "step": 15464 }, { "epoch": 0.4, "grad_norm": 1.7501496076583862, "learning_rate": 1.3748084156880081e-05, "loss": 0.6602, "step": 15465 }, { "epoch": 0.4, "grad_norm": 3.497953414916992, "learning_rate": 1.374731451661829e-05, "loss": 0.634, "step": 15466 }, { "epoch": 0.4, "grad_norm": 1.9250614643096924, "learning_rate": 1.3746544850532359e-05, "loss": 0.5469, "step": 15467 }, { "epoch": 0.4, "grad_norm": 1.1366549730300903, "learning_rate": 1.3745775158627595e-05, "loss": 0.4698, "step": 15468 }, { "epoch": 0.4, "grad_norm": 1.1547406911849976, "learning_rate": 1.37450054409093e-05, "loss": 0.4903, "step": 15469 }, { "epoch": 0.4, "grad_norm": 1.3539170026779175, "learning_rate": 1.3744235697382783e-05, "loss": 0.4883, "step": 15470 }, { "epoch": 0.4, "grad_norm": 1.0638847351074219, "learning_rate": 1.3743465928053345e-05, "loss": 0.41, "step": 15471 }, { "epoch": 0.4, "grad_norm": 2.4258928298950195, "learning_rate": 1.3742696132926292e-05, "loss": 0.5734, "step": 15472 }, { "epoch": 0.4, "grad_norm": 1.7939157485961914, "learning_rate": 1.374192631200693e-05, "loss": 0.6785, "step": 15473 }, { "epoch": 0.4, "grad_norm": 1.6497653722763062, "learning_rate": 1.3741156465300563e-05, "loss": 0.4974, "step": 15474 }, { "epoch": 0.4, "grad_norm": 1.5547940731048584, "learning_rate": 1.3740386592812493e-05, "loss": 0.5761, "step": 15475 }, { "epoch": 0.4, "grad_norm": 7.890730381011963, "learning_rate": 1.3739616694548028e-05, "loss": 0.6031, "step": 15476 }, { "epoch": 0.4, "grad_norm": 1.1873884201049805, "learning_rate": 1.3738846770512477e-05, "loss": 0.578, "step": 15477 }, { "epoch": 0.4, "grad_norm": 3.9296059608459473, "learning_rate": 1.3738076820711142e-05, "loss": 0.5157, "step": 15478 }, { "epoch": 0.4, "grad_norm": 1.1713839769363403, "learning_rate": 1.3737306845149332e-05, "loss": 0.5997, "step": 15479 }, { "epoch": 0.4, "grad_norm": 2.2686705589294434, "learning_rate": 1.3736536843832348e-05, "loss": 0.7124, "step": 15480 }, { "epoch": 0.4, "grad_norm": 1.0161629915237427, "learning_rate": 1.37357668167655e-05, "loss": 0.6076, "step": 15481 }, { "epoch": 0.4, "grad_norm": 4.142569541931152, "learning_rate": 1.3734996763954098e-05, "loss": 0.5915, "step": 15482 }, { "epoch": 0.4, "grad_norm": 1.4399478435516357, "learning_rate": 1.373422668540344e-05, "loss": 0.4801, "step": 15483 }, { "epoch": 0.4, "grad_norm": 1.4632054567337036, "learning_rate": 1.3733456581118842e-05, "loss": 0.5525, "step": 15484 }, { "epoch": 0.4, "grad_norm": 2.83301043510437, "learning_rate": 1.3732686451105604e-05, "loss": 0.511, "step": 15485 }, { "epoch": 0.4, "grad_norm": 2.7813565731048584, "learning_rate": 1.3731916295369037e-05, "loss": 0.6229, "step": 15486 }, { "epoch": 0.4, "grad_norm": 3.2323362827301025, "learning_rate": 1.3731146113914446e-05, "loss": 0.6662, "step": 15487 }, { "epoch": 0.4, "grad_norm": 1.2484678030014038, "learning_rate": 1.3730375906747142e-05, "loss": 0.596, "step": 15488 }, { "epoch": 0.4, "grad_norm": 1.8587464094161987, "learning_rate": 1.372960567387243e-05, "loss": 0.4693, "step": 15489 }, { "epoch": 0.4, "grad_norm": 3.4088075160980225, "learning_rate": 1.3728835415295618e-05, "loss": 0.5083, "step": 15490 }, { "epoch": 0.4, "grad_norm": 1.4229624271392822, "learning_rate": 1.372806513102202e-05, "loss": 0.4186, "step": 15491 }, { "epoch": 0.4, "grad_norm": 1.943289875984192, "learning_rate": 1.3727294821056934e-05, "loss": 0.5689, "step": 15492 }, { "epoch": 0.4, "grad_norm": 3.6497349739074707, "learning_rate": 1.3726524485405675e-05, "loss": 0.6535, "step": 15493 }, { "epoch": 0.4, "grad_norm": 2.0742738246917725, "learning_rate": 1.372575412407355e-05, "loss": 0.7447, "step": 15494 }, { "epoch": 0.4, "grad_norm": 5.687325477600098, "learning_rate": 1.3724983737065868e-05, "loss": 0.4688, "step": 15495 }, { "epoch": 0.4, "grad_norm": 2.0757806301116943, "learning_rate": 1.372421332438794e-05, "loss": 0.663, "step": 15496 }, { "epoch": 0.4, "grad_norm": 1.707013726234436, "learning_rate": 1.3723442886045074e-05, "loss": 0.5133, "step": 15497 }, { "epoch": 0.4, "grad_norm": 4.695838928222656, "learning_rate": 1.3722672422042577e-05, "loss": 0.5618, "step": 15498 }, { "epoch": 0.4, "grad_norm": 1.6590176820755005, "learning_rate": 1.3721901932385761e-05, "loss": 0.4671, "step": 15499 }, { "epoch": 0.4, "grad_norm": 3.389583110809326, "learning_rate": 1.3721131417079936e-05, "loss": 0.7851, "step": 15500 }, { "epoch": 0.4, "grad_norm": 2.2727878093719482, "learning_rate": 1.3720360876130412e-05, "loss": 0.5601, "step": 15501 }, { "epoch": 0.4, "grad_norm": 2.885211229324341, "learning_rate": 1.3719590309542496e-05, "loss": 0.7223, "step": 15502 }, { "epoch": 0.4, "grad_norm": 4.034945011138916, "learning_rate": 1.3718819717321503e-05, "loss": 0.6493, "step": 15503 }, { "epoch": 0.4, "grad_norm": 2.479008436203003, "learning_rate": 1.3718049099472736e-05, "loss": 0.71, "step": 15504 }, { "epoch": 0.4, "grad_norm": 2.2024827003479004, "learning_rate": 1.3717278456001515e-05, "loss": 0.696, "step": 15505 }, { "epoch": 0.4, "grad_norm": 1.275251030921936, "learning_rate": 1.3716507786913146e-05, "loss": 0.6539, "step": 15506 }, { "epoch": 0.4, "grad_norm": 3.203855514526367, "learning_rate": 1.3715737092212939e-05, "loss": 0.694, "step": 15507 }, { "epoch": 0.4, "grad_norm": 1.3963649272918701, "learning_rate": 1.371496637190621e-05, "loss": 0.5069, "step": 15508 }, { "epoch": 0.4, "grad_norm": 3.681234359741211, "learning_rate": 1.371419562599826e-05, "loss": 0.5799, "step": 15509 }, { "epoch": 0.4, "grad_norm": 2.1670351028442383, "learning_rate": 1.371342485449441e-05, "loss": 0.5946, "step": 15510 }, { "epoch": 0.4, "grad_norm": 1.3429886102676392, "learning_rate": 1.371265405739997e-05, "loss": 0.4581, "step": 15511 }, { "epoch": 0.4, "grad_norm": 1.8742382526397705, "learning_rate": 1.3711883234720252e-05, "loss": 0.4667, "step": 15512 }, { "epoch": 0.4, "grad_norm": 1.7179749011993408, "learning_rate": 1.3711112386460565e-05, "loss": 0.5778, "step": 15513 }, { "epoch": 0.4, "grad_norm": 1.9634921550750732, "learning_rate": 1.3710341512626222e-05, "loss": 0.7244, "step": 15514 }, { "epoch": 0.4, "grad_norm": 2.1626875400543213, "learning_rate": 1.3709570613222538e-05, "loss": 0.6886, "step": 15515 }, { "epoch": 0.4, "grad_norm": 1.6688421964645386, "learning_rate": 1.3708799688254823e-05, "loss": 0.5963, "step": 15516 }, { "epoch": 0.4, "grad_norm": 13.472213745117188, "learning_rate": 1.3708028737728392e-05, "loss": 0.6628, "step": 15517 }, { "epoch": 0.4, "grad_norm": 1.571786642074585, "learning_rate": 1.3707257761648556e-05, "loss": 0.6689, "step": 15518 }, { "epoch": 0.4, "grad_norm": 3.599804401397705, "learning_rate": 1.3706486760020627e-05, "loss": 0.5271, "step": 15519 }, { "epoch": 0.4, "grad_norm": 1.145873785018921, "learning_rate": 1.3705715732849922e-05, "loss": 0.5068, "step": 15520 }, { "epoch": 0.4, "grad_norm": 4.473372459411621, "learning_rate": 1.3704944680141751e-05, "loss": 0.5703, "step": 15521 }, { "epoch": 0.4, "grad_norm": 5.0715813636779785, "learning_rate": 1.3704173601901429e-05, "loss": 0.5543, "step": 15522 }, { "epoch": 0.4, "grad_norm": 1.372568130493164, "learning_rate": 1.3703402498134271e-05, "loss": 0.5104, "step": 15523 }, { "epoch": 0.4, "grad_norm": 1.1415531635284424, "learning_rate": 1.3702631368845589e-05, "loss": 0.5939, "step": 15524 }, { "epoch": 0.4, "grad_norm": 1.2877296209335327, "learning_rate": 1.3701860214040698e-05, "loss": 0.5176, "step": 15525 }, { "epoch": 0.4, "grad_norm": 1.660592794418335, "learning_rate": 1.370108903372491e-05, "loss": 0.572, "step": 15526 }, { "epoch": 0.4, "grad_norm": 1.2832804918289185, "learning_rate": 1.3700317827903546e-05, "loss": 0.5012, "step": 15527 }, { "epoch": 0.4, "grad_norm": 3.4170448780059814, "learning_rate": 1.3699546596581912e-05, "loss": 0.663, "step": 15528 }, { "epoch": 0.4, "grad_norm": 2.231900691986084, "learning_rate": 1.3698775339765329e-05, "loss": 0.5995, "step": 15529 }, { "epoch": 0.4, "grad_norm": 3.97442626953125, "learning_rate": 1.3698004057459111e-05, "loss": 0.7565, "step": 15530 }, { "epoch": 0.4, "grad_norm": 3.077069044113159, "learning_rate": 1.3697232749668569e-05, "loss": 0.551, "step": 15531 }, { "epoch": 0.4, "grad_norm": 1.9114810228347778, "learning_rate": 1.3696461416399027e-05, "loss": 0.4989, "step": 15532 }, { "epoch": 0.4, "grad_norm": 1.8316651582717896, "learning_rate": 1.369569005765579e-05, "loss": 0.7476, "step": 15533 }, { "epoch": 0.4, "grad_norm": 27.865678787231445, "learning_rate": 1.3694918673444183e-05, "loss": 0.6235, "step": 15534 }, { "epoch": 0.4, "grad_norm": 2.105618476867676, "learning_rate": 1.3694147263769516e-05, "loss": 0.5471, "step": 15535 }, { "epoch": 0.4, "grad_norm": 3.368812084197998, "learning_rate": 1.3693375828637106e-05, "loss": 0.7328, "step": 15536 }, { "epoch": 0.4, "grad_norm": 1.417842149734497, "learning_rate": 1.3692604368052274e-05, "loss": 0.5712, "step": 15537 }, { "epoch": 0.4, "grad_norm": 2.0924715995788574, "learning_rate": 1.3691832882020328e-05, "loss": 0.7867, "step": 15538 }, { "epoch": 0.4, "grad_norm": 1.4641938209533691, "learning_rate": 1.3691061370546592e-05, "loss": 0.6037, "step": 15539 }, { "epoch": 0.4, "grad_norm": 1.0885332822799683, "learning_rate": 1.3690289833636379e-05, "loss": 0.6047, "step": 15540 }, { "epoch": 0.4, "grad_norm": 1.2595999240875244, "learning_rate": 1.3689518271295006e-05, "loss": 0.6616, "step": 15541 }, { "epoch": 0.4, "grad_norm": 2.1808881759643555, "learning_rate": 1.3688746683527793e-05, "loss": 0.62, "step": 15542 }, { "epoch": 0.4, "grad_norm": 3.566267490386963, "learning_rate": 1.3687975070340054e-05, "loss": 0.531, "step": 15543 }, { "epoch": 0.4, "grad_norm": 3.516234874725342, "learning_rate": 1.3687203431737111e-05, "loss": 0.4349, "step": 15544 }, { "epoch": 0.4, "grad_norm": 1.0824971199035645, "learning_rate": 1.3686431767724275e-05, "loss": 0.404, "step": 15545 }, { "epoch": 0.4, "grad_norm": 1.5938994884490967, "learning_rate": 1.3685660078306869e-05, "loss": 0.6156, "step": 15546 }, { "epoch": 0.4, "grad_norm": 1.7626765966415405, "learning_rate": 1.368488836349021e-05, "loss": 0.4952, "step": 15547 }, { "epoch": 0.4, "grad_norm": 1.6178083419799805, "learning_rate": 1.3684116623279611e-05, "loss": 0.7168, "step": 15548 }, { "epoch": 0.4, "grad_norm": 1.7465664148330688, "learning_rate": 1.3683344857680399e-05, "loss": 0.5783, "step": 15549 }, { "epoch": 0.4, "grad_norm": 2.19920015335083, "learning_rate": 1.3682573066697886e-05, "loss": 0.5783, "step": 15550 }, { "epoch": 0.4, "grad_norm": 1.631003499031067, "learning_rate": 1.3681801250337394e-05, "loss": 0.6117, "step": 15551 }, { "epoch": 0.4, "grad_norm": 1.8684704303741455, "learning_rate": 1.3681029408604243e-05, "loss": 0.6313, "step": 15552 }, { "epoch": 0.4, "grad_norm": 1.4004909992218018, "learning_rate": 1.3680257541503748e-05, "loss": 0.5155, "step": 15553 }, { "epoch": 0.4, "grad_norm": 1.5564658641815186, "learning_rate": 1.3679485649041231e-05, "loss": 0.603, "step": 15554 }, { "epoch": 0.4, "grad_norm": 2.3297204971313477, "learning_rate": 1.3678713731222009e-05, "loss": 0.6639, "step": 15555 }, { "epoch": 0.4, "grad_norm": 2.6696455478668213, "learning_rate": 1.3677941788051403e-05, "loss": 0.5314, "step": 15556 }, { "epoch": 0.4, "grad_norm": 1.4234434366226196, "learning_rate": 1.3677169819534734e-05, "loss": 0.4797, "step": 15557 }, { "epoch": 0.4, "grad_norm": 1.7099179029464722, "learning_rate": 1.367639782567732e-05, "loss": 0.6797, "step": 15558 }, { "epoch": 0.4, "grad_norm": 1.6993955373764038, "learning_rate": 1.3675625806484486e-05, "loss": 0.6217, "step": 15559 }, { "epoch": 0.4, "grad_norm": 2.2003347873687744, "learning_rate": 1.3674853761961542e-05, "loss": 0.5672, "step": 15560 }, { "epoch": 0.4, "grad_norm": 2.3532555103302, "learning_rate": 1.3674081692113816e-05, "loss": 0.5045, "step": 15561 }, { "epoch": 0.4, "grad_norm": 2.0391950607299805, "learning_rate": 1.3673309596946632e-05, "loss": 0.7813, "step": 15562 }, { "epoch": 0.4, "grad_norm": 1.280802607536316, "learning_rate": 1.3672537476465299e-05, "loss": 0.4418, "step": 15563 }, { "epoch": 0.4, "grad_norm": 1.2779241800308228, "learning_rate": 1.367176533067515e-05, "loss": 0.5892, "step": 15564 }, { "epoch": 0.4, "grad_norm": 1.5758129358291626, "learning_rate": 1.3670993159581501e-05, "loss": 0.5957, "step": 15565 }, { "epoch": 0.4, "grad_norm": 1.2825547456741333, "learning_rate": 1.3670220963189673e-05, "loss": 0.5865, "step": 15566 }, { "epoch": 0.4, "grad_norm": 3.716403007507324, "learning_rate": 1.3669448741504986e-05, "loss": 0.6807, "step": 15567 }, { "epoch": 0.4, "grad_norm": 2.9779040813446045, "learning_rate": 1.3668676494532764e-05, "loss": 0.6486, "step": 15568 }, { "epoch": 0.4, "grad_norm": 4.526107311248779, "learning_rate": 1.3667904222278333e-05, "loss": 0.6916, "step": 15569 }, { "epoch": 0.4, "grad_norm": 1.884314775466919, "learning_rate": 1.3667131924747005e-05, "loss": 0.6678, "step": 15570 }, { "epoch": 0.4, "grad_norm": 4.374455451965332, "learning_rate": 1.3666359601944111e-05, "loss": 0.7408, "step": 15571 }, { "epoch": 0.4, "grad_norm": 2.100527048110962, "learning_rate": 1.3665587253874968e-05, "loss": 0.6468, "step": 15572 }, { "epoch": 0.4, "grad_norm": 2.4917922019958496, "learning_rate": 1.3664814880544901e-05, "loss": 0.7197, "step": 15573 }, { "epoch": 0.4, "grad_norm": 3.7425525188446045, "learning_rate": 1.3664042481959233e-05, "loss": 0.6263, "step": 15574 }, { "epoch": 0.4, "grad_norm": 4.865002155303955, "learning_rate": 1.3663270058123285e-05, "loss": 0.5955, "step": 15575 }, { "epoch": 0.4, "grad_norm": 1.4122344255447388, "learning_rate": 1.3662497609042385e-05, "loss": 0.7229, "step": 15576 }, { "epoch": 0.4, "grad_norm": 2.4226441383361816, "learning_rate": 1.3661725134721849e-05, "loss": 0.7329, "step": 15577 }, { "epoch": 0.4, "grad_norm": 1.1122524738311768, "learning_rate": 1.3660952635167002e-05, "loss": 0.5201, "step": 15578 }, { "epoch": 0.4, "grad_norm": 3.7881197929382324, "learning_rate": 1.3660180110383174e-05, "loss": 0.533, "step": 15579 }, { "epoch": 0.4, "grad_norm": 1.4681894779205322, "learning_rate": 1.3659407560375682e-05, "loss": 0.5084, "step": 15580 }, { "epoch": 0.4, "grad_norm": 2.740950584411621, "learning_rate": 1.3658634985149853e-05, "loss": 0.6997, "step": 15581 }, { "epoch": 0.4, "grad_norm": 1.5000513792037964, "learning_rate": 1.3657862384711007e-05, "loss": 0.6352, "step": 15582 }, { "epoch": 0.4, "grad_norm": 1.932830810546875, "learning_rate": 1.3657089759064476e-05, "loss": 0.5623, "step": 15583 }, { "epoch": 0.4, "grad_norm": 1.420534610748291, "learning_rate": 1.3656317108215577e-05, "loss": 0.4666, "step": 15584 }, { "epoch": 0.4, "grad_norm": 1.7608795166015625, "learning_rate": 1.3655544432169639e-05, "loss": 0.7121, "step": 15585 }, { "epoch": 0.4, "grad_norm": 3.5824801921844482, "learning_rate": 1.3654771730931987e-05, "loss": 0.6265, "step": 15586 }, { "epoch": 0.4, "grad_norm": 1.5979619026184082, "learning_rate": 1.365399900450794e-05, "loss": 0.5199, "step": 15587 }, { "epoch": 0.4, "grad_norm": 1.8363032341003418, "learning_rate": 1.3653226252902831e-05, "loss": 0.5877, "step": 15588 }, { "epoch": 0.4, "grad_norm": 1.8998138904571533, "learning_rate": 1.365245347612198e-05, "loss": 0.5898, "step": 15589 }, { "epoch": 0.4, "grad_norm": 1.1428179740905762, "learning_rate": 1.3651680674170714e-05, "loss": 0.5461, "step": 15590 }, { "epoch": 0.4, "grad_norm": 1.3461610078811646, "learning_rate": 1.3650907847054363e-05, "loss": 0.4709, "step": 15591 }, { "epoch": 0.4, "grad_norm": 1.247357964515686, "learning_rate": 1.3650134994778246e-05, "loss": 0.4374, "step": 15592 }, { "epoch": 0.4, "grad_norm": 1.1460219621658325, "learning_rate": 1.3649362117347693e-05, "loss": 0.5173, "step": 15593 }, { "epoch": 0.4, "grad_norm": 1.3969186544418335, "learning_rate": 1.3648589214768026e-05, "loss": 0.4552, "step": 15594 }, { "epoch": 0.4, "grad_norm": 2.2372894287109375, "learning_rate": 1.3647816287044579e-05, "loss": 0.5209, "step": 15595 }, { "epoch": 0.4, "grad_norm": 2.2315399646759033, "learning_rate": 1.3647043334182671e-05, "loss": 0.7101, "step": 15596 }, { "epoch": 0.4, "grad_norm": 2.5018420219421387, "learning_rate": 1.364627035618763e-05, "loss": 0.4531, "step": 15597 }, { "epoch": 0.4, "grad_norm": 1.6322351694107056, "learning_rate": 1.3645497353064788e-05, "loss": 0.6775, "step": 15598 }, { "epoch": 0.4, "grad_norm": 2.1539499759674072, "learning_rate": 1.3644724324819467e-05, "loss": 0.4856, "step": 15599 }, { "epoch": 0.4, "grad_norm": 2.019493818283081, "learning_rate": 1.3643951271456996e-05, "loss": 0.5608, "step": 15600 }, { "epoch": 0.4, "grad_norm": 2.8803913593292236, "learning_rate": 1.3643178192982703e-05, "loss": 0.5776, "step": 15601 }, { "epoch": 0.4, "grad_norm": 1.3477832078933716, "learning_rate": 1.364240508940191e-05, "loss": 0.5709, "step": 15602 }, { "epoch": 0.4, "grad_norm": 1.8021820783615112, "learning_rate": 1.3641631960719956e-05, "loss": 0.7135, "step": 15603 }, { "epoch": 0.4, "grad_norm": 2.046433448791504, "learning_rate": 1.3640858806942158e-05, "loss": 0.5554, "step": 15604 }, { "epoch": 0.4, "grad_norm": 2.8813745975494385, "learning_rate": 1.3640085628073854e-05, "loss": 0.6432, "step": 15605 }, { "epoch": 0.4, "grad_norm": 1.090194582939148, "learning_rate": 1.3639312424120362e-05, "loss": 0.4799, "step": 15606 }, { "epoch": 0.4, "grad_norm": 2.2797255516052246, "learning_rate": 1.3638539195087017e-05, "loss": 0.5728, "step": 15607 }, { "epoch": 0.4, "grad_norm": 1.5242207050323486, "learning_rate": 1.3637765940979144e-05, "loss": 0.6107, "step": 15608 }, { "epoch": 0.4, "grad_norm": 1.5841882228851318, "learning_rate": 1.3636992661802073e-05, "loss": 0.5783, "step": 15609 }, { "epoch": 0.4, "grad_norm": 1.8420650959014893, "learning_rate": 1.3636219357561137e-05, "loss": 0.6227, "step": 15610 }, { "epoch": 0.4, "grad_norm": 1.6405227184295654, "learning_rate": 1.3635446028261658e-05, "loss": 0.6149, "step": 15611 }, { "epoch": 0.4, "grad_norm": 1.8221521377563477, "learning_rate": 1.3634672673908973e-05, "loss": 0.566, "step": 15612 }, { "epoch": 0.4, "grad_norm": 0.9661979079246521, "learning_rate": 1.3633899294508406e-05, "loss": 0.4726, "step": 15613 }, { "epoch": 0.4, "grad_norm": 3.2955143451690674, "learning_rate": 1.3633125890065286e-05, "loss": 0.6269, "step": 15614 }, { "epoch": 0.4, "grad_norm": 4.217745304107666, "learning_rate": 1.3632352460584948e-05, "loss": 0.7896, "step": 15615 }, { "epoch": 0.4, "grad_norm": 2.665327548980713, "learning_rate": 1.3631579006072716e-05, "loss": 0.6483, "step": 15616 }, { "epoch": 0.4, "grad_norm": 1.42611563205719, "learning_rate": 1.3630805526533925e-05, "loss": 0.4645, "step": 15617 }, { "epoch": 0.4, "grad_norm": 2.9320497512817383, "learning_rate": 1.3630032021973902e-05, "loss": 0.5811, "step": 15618 }, { "epoch": 0.4, "grad_norm": 2.1693575382232666, "learning_rate": 1.3629258492397981e-05, "loss": 0.6978, "step": 15619 }, { "epoch": 0.4, "grad_norm": 2.559314012527466, "learning_rate": 1.3628484937811489e-05, "loss": 0.5011, "step": 15620 }, { "epoch": 0.4, "grad_norm": 4.588726997375488, "learning_rate": 1.3627711358219758e-05, "loss": 0.6683, "step": 15621 }, { "epoch": 0.4, "grad_norm": 1.3520400524139404, "learning_rate": 1.362693775362812e-05, "loss": 0.6548, "step": 15622 }, { "epoch": 0.4, "grad_norm": 2.211261749267578, "learning_rate": 1.3626164124041905e-05, "loss": 0.6198, "step": 15623 }, { "epoch": 0.4, "grad_norm": 1.3842899799346924, "learning_rate": 1.3625390469466448e-05, "loss": 0.564, "step": 15624 }, { "epoch": 0.4, "grad_norm": 3.9642181396484375, "learning_rate": 1.3624616789907075e-05, "loss": 0.7677, "step": 15625 }, { "epoch": 0.4, "grad_norm": 3.151729106903076, "learning_rate": 1.362384308536912e-05, "loss": 0.6118, "step": 15626 }, { "epoch": 0.4, "grad_norm": 1.302481770515442, "learning_rate": 1.3623069355857916e-05, "loss": 0.4862, "step": 15627 }, { "epoch": 0.4, "grad_norm": 2.4007694721221924, "learning_rate": 1.3622295601378794e-05, "loss": 0.4169, "step": 15628 }, { "epoch": 0.4, "grad_norm": 2.8863790035247803, "learning_rate": 1.3621521821937086e-05, "loss": 0.6581, "step": 15629 }, { "epoch": 0.4, "grad_norm": 1.4181489944458008, "learning_rate": 1.3620748017538125e-05, "loss": 0.5804, "step": 15630 }, { "epoch": 0.4, "grad_norm": 1.2755320072174072, "learning_rate": 1.3619974188187243e-05, "loss": 0.4407, "step": 15631 }, { "epoch": 0.4, "grad_norm": 1.4867911338806152, "learning_rate": 1.3619200333889778e-05, "loss": 0.5953, "step": 15632 }, { "epoch": 0.4, "grad_norm": 2.0400688648223877, "learning_rate": 1.3618426454651053e-05, "loss": 0.5287, "step": 15633 }, { "epoch": 0.4, "grad_norm": 12.119502067565918, "learning_rate": 1.3617652550476408e-05, "loss": 0.6207, "step": 15634 }, { "epoch": 0.4, "grad_norm": 7.416453838348389, "learning_rate": 1.3616878621371173e-05, "loss": 0.7349, "step": 15635 }, { "epoch": 0.4, "grad_norm": 1.7376635074615479, "learning_rate": 1.3616104667340687e-05, "loss": 0.5574, "step": 15636 }, { "epoch": 0.4, "grad_norm": 1.057898998260498, "learning_rate": 1.3615330688390273e-05, "loss": 0.4115, "step": 15637 }, { "epoch": 0.4, "grad_norm": 1.7987898588180542, "learning_rate": 1.3614556684525275e-05, "loss": 0.63, "step": 15638 }, { "epoch": 0.4, "grad_norm": 11.123028755187988, "learning_rate": 1.3613782655751025e-05, "loss": 0.7708, "step": 15639 }, { "epoch": 0.4, "grad_norm": 5.255756855010986, "learning_rate": 1.361300860207285e-05, "loss": 0.5884, "step": 15640 }, { "epoch": 0.4, "grad_norm": 1.3285777568817139, "learning_rate": 1.3612234523496095e-05, "loss": 0.6679, "step": 15641 }, { "epoch": 0.4, "grad_norm": 4.491518020629883, "learning_rate": 1.3611460420026085e-05, "loss": 0.5931, "step": 15642 }, { "epoch": 0.4, "grad_norm": 1.0907515287399292, "learning_rate": 1.361068629166816e-05, "loss": 0.422, "step": 15643 }, { "epoch": 0.4, "grad_norm": 0.991107702255249, "learning_rate": 1.3609912138427656e-05, "loss": 0.4984, "step": 15644 }, { "epoch": 0.4, "grad_norm": 7.910868167877197, "learning_rate": 1.3609137960309902e-05, "loss": 0.6298, "step": 15645 }, { "epoch": 0.4, "grad_norm": 3.642287492752075, "learning_rate": 1.360836375732024e-05, "loss": 0.5929, "step": 15646 }, { "epoch": 0.4, "grad_norm": 1.7000149488449097, "learning_rate": 1.3607589529464e-05, "loss": 0.6911, "step": 15647 }, { "epoch": 0.4, "grad_norm": 1.7791328430175781, "learning_rate": 1.360681527674652e-05, "loss": 0.5063, "step": 15648 }, { "epoch": 0.4, "grad_norm": 1.8870623111724854, "learning_rate": 1.3606040999173135e-05, "loss": 0.6691, "step": 15649 }, { "epoch": 0.4, "grad_norm": 1.4260433912277222, "learning_rate": 1.3605266696749179e-05, "loss": 0.5642, "step": 15650 }, { "epoch": 0.4, "grad_norm": 1.0937628746032715, "learning_rate": 1.3604492369479993e-05, "loss": 0.5284, "step": 15651 }, { "epoch": 0.4, "grad_norm": 2.1703295707702637, "learning_rate": 1.3603718017370908e-05, "loss": 0.6775, "step": 15652 }, { "epoch": 0.4, "grad_norm": 3.1171047687530518, "learning_rate": 1.3602943640427263e-05, "loss": 0.6653, "step": 15653 }, { "epoch": 0.4, "grad_norm": 10.92061710357666, "learning_rate": 1.3602169238654396e-05, "loss": 0.7587, "step": 15654 }, { "epoch": 0.4, "grad_norm": 4.840504169464111, "learning_rate": 1.360139481205764e-05, "loss": 0.7675, "step": 15655 }, { "epoch": 0.4, "grad_norm": 1.4446779489517212, "learning_rate": 1.3600620360642334e-05, "loss": 0.6435, "step": 15656 }, { "epoch": 0.4, "grad_norm": 5.844417572021484, "learning_rate": 1.3599845884413814e-05, "loss": 0.6939, "step": 15657 }, { "epoch": 0.4, "grad_norm": 1.5643306970596313, "learning_rate": 1.3599071383377421e-05, "loss": 0.5943, "step": 15658 }, { "epoch": 0.4, "grad_norm": 1.247908592224121, "learning_rate": 1.3598296857538484e-05, "loss": 0.5224, "step": 15659 }, { "epoch": 0.4, "grad_norm": 1.425697922706604, "learning_rate": 1.359752230690235e-05, "loss": 0.5175, "step": 15660 }, { "epoch": 0.4, "grad_norm": 4.735794544219971, "learning_rate": 1.359674773147435e-05, "loss": 0.6497, "step": 15661 }, { "epoch": 0.4, "grad_norm": 2.7578299045562744, "learning_rate": 1.3595973131259823e-05, "loss": 0.381, "step": 15662 }, { "epoch": 0.4, "grad_norm": 1.359764575958252, "learning_rate": 1.3595198506264111e-05, "loss": 0.4688, "step": 15663 }, { "epoch": 0.4, "grad_norm": 1.598961353302002, "learning_rate": 1.359442385649255e-05, "loss": 0.5123, "step": 15664 }, { "epoch": 0.4, "grad_norm": 1.5014903545379639, "learning_rate": 1.3593649181950477e-05, "loss": 0.6165, "step": 15665 }, { "epoch": 0.4, "grad_norm": 1.5097174644470215, "learning_rate": 1.3592874482643233e-05, "loss": 0.464, "step": 15666 }, { "epoch": 0.4, "grad_norm": 1.114217758178711, "learning_rate": 1.3592099758576152e-05, "loss": 0.7175, "step": 15667 }, { "epoch": 0.4, "grad_norm": 8.231127738952637, "learning_rate": 1.3591325009754579e-05, "loss": 0.6116, "step": 15668 }, { "epoch": 0.4, "grad_norm": 5.528635025024414, "learning_rate": 1.3590550236183848e-05, "loss": 0.6988, "step": 15669 }, { "epoch": 0.4, "grad_norm": 1.3217225074768066, "learning_rate": 1.3589775437869301e-05, "loss": 0.4616, "step": 15670 }, { "epoch": 0.4, "grad_norm": 2.685457468032837, "learning_rate": 1.3589000614816281e-05, "loss": 0.6075, "step": 15671 }, { "epoch": 0.4, "grad_norm": 2.2164337635040283, "learning_rate": 1.358822576703012e-05, "loss": 0.6681, "step": 15672 }, { "epoch": 0.4, "grad_norm": 8.078534126281738, "learning_rate": 1.3587450894516164e-05, "loss": 0.5219, "step": 15673 }, { "epoch": 0.4, "grad_norm": 1.452295184135437, "learning_rate": 1.3586675997279746e-05, "loss": 0.5968, "step": 15674 }, { "epoch": 0.4, "grad_norm": 1.4633985757827759, "learning_rate": 1.3585901075326214e-05, "loss": 0.4573, "step": 15675 }, { "epoch": 0.4, "grad_norm": 13.131553649902344, "learning_rate": 1.3585126128660902e-05, "loss": 0.6778, "step": 15676 }, { "epoch": 0.4, "grad_norm": 4.929814338684082, "learning_rate": 1.3584351157289152e-05, "loss": 0.7636, "step": 15677 }, { "epoch": 0.4, "grad_norm": 1.6069858074188232, "learning_rate": 1.358357616121631e-05, "loss": 0.6252, "step": 15678 }, { "epoch": 0.4, "grad_norm": 2.4211385250091553, "learning_rate": 1.358280114044771e-05, "loss": 0.6161, "step": 15679 }, { "epoch": 0.4, "grad_norm": 2.162330150604248, "learning_rate": 1.3582026094988698e-05, "loss": 0.5338, "step": 15680 }, { "epoch": 0.4, "grad_norm": 2.659693717956543, "learning_rate": 1.3581251024844609e-05, "loss": 0.4953, "step": 15681 }, { "epoch": 0.4, "grad_norm": 1.7498337030410767, "learning_rate": 1.3580475930020788e-05, "loss": 0.5859, "step": 15682 }, { "epoch": 0.4, "grad_norm": 4.6014204025268555, "learning_rate": 1.357970081052258e-05, "loss": 0.6928, "step": 15683 }, { "epoch": 0.4, "grad_norm": 6.572803020477295, "learning_rate": 1.357892566635532e-05, "loss": 0.6908, "step": 15684 }, { "epoch": 0.4, "grad_norm": 2.533374786376953, "learning_rate": 1.3578150497524356e-05, "loss": 0.6935, "step": 15685 }, { "epoch": 0.4, "grad_norm": 2.1545000076293945, "learning_rate": 1.3577375304035024e-05, "loss": 0.6308, "step": 15686 }, { "epoch": 0.4, "grad_norm": 2.40315842628479, "learning_rate": 1.3576600085892671e-05, "loss": 0.4721, "step": 15687 }, { "epoch": 0.4, "grad_norm": 1.7332003116607666, "learning_rate": 1.3575824843102633e-05, "loss": 0.5823, "step": 15688 }, { "epoch": 0.4, "grad_norm": 1.7141544818878174, "learning_rate": 1.3575049575670261e-05, "loss": 0.5925, "step": 15689 }, { "epoch": 0.4, "grad_norm": 1.252874732017517, "learning_rate": 1.3574274283600892e-05, "loss": 0.4277, "step": 15690 }, { "epoch": 0.4, "grad_norm": 2.4816062450408936, "learning_rate": 1.3573498966899869e-05, "loss": 0.5726, "step": 15691 }, { "epoch": 0.4, "grad_norm": 1.713512659072876, "learning_rate": 1.3572723625572538e-05, "loss": 0.6314, "step": 15692 }, { "epoch": 0.4, "grad_norm": 3.3389761447906494, "learning_rate": 1.3571948259624239e-05, "loss": 0.6638, "step": 15693 }, { "epoch": 0.4, "grad_norm": 3.9228262901306152, "learning_rate": 1.3571172869060317e-05, "loss": 0.8287, "step": 15694 }, { "epoch": 0.4, "grad_norm": 1.2792006731033325, "learning_rate": 1.3570397453886116e-05, "loss": 0.5638, "step": 15695 }, { "epoch": 0.4, "grad_norm": 6.175705432891846, "learning_rate": 1.3569622014106978e-05, "loss": 0.7807, "step": 15696 }, { "epoch": 0.4, "grad_norm": 3.4354517459869385, "learning_rate": 1.3568846549728249e-05, "loss": 0.665, "step": 15697 }, { "epoch": 0.4, "grad_norm": 2.276092529296875, "learning_rate": 1.356807106075527e-05, "loss": 0.7609, "step": 15698 }, { "epoch": 0.4, "grad_norm": 1.8087818622589111, "learning_rate": 1.3567295547193388e-05, "loss": 0.6204, "step": 15699 }, { "epoch": 0.4, "grad_norm": 1.396772861480713, "learning_rate": 1.3566520009047948e-05, "loss": 0.5271, "step": 15700 }, { "epoch": 0.4, "grad_norm": 3.1825156211853027, "learning_rate": 1.356574444632429e-05, "loss": 0.6925, "step": 15701 }, { "epoch": 0.4, "grad_norm": 2.6692276000976562, "learning_rate": 1.3564968859027763e-05, "loss": 0.6497, "step": 15702 }, { "epoch": 0.4, "grad_norm": 4.467495441436768, "learning_rate": 1.3564193247163711e-05, "loss": 0.5398, "step": 15703 }, { "epoch": 0.4, "grad_norm": 1.1779828071594238, "learning_rate": 1.3563417610737477e-05, "loss": 0.4401, "step": 15704 }, { "epoch": 0.4, "grad_norm": 2.430833101272583, "learning_rate": 1.3562641949754406e-05, "loss": 0.4709, "step": 15705 }, { "epoch": 0.4, "grad_norm": 1.5713529586791992, "learning_rate": 1.3561866264219848e-05, "loss": 0.61, "step": 15706 }, { "epoch": 0.4, "grad_norm": 1.2814114093780518, "learning_rate": 1.3561090554139145e-05, "loss": 0.5125, "step": 15707 }, { "epoch": 0.4, "grad_norm": 3.7340312004089355, "learning_rate": 1.3560314819517642e-05, "loss": 0.676, "step": 15708 }, { "epoch": 0.4, "grad_norm": 2.9478132724761963, "learning_rate": 1.3559539060360687e-05, "loss": 0.6197, "step": 15709 }, { "epoch": 0.4, "grad_norm": 1.5770440101623535, "learning_rate": 1.3558763276673624e-05, "loss": 0.4024, "step": 15710 }, { "epoch": 0.4, "grad_norm": 1.513055443763733, "learning_rate": 1.35579874684618e-05, "loss": 0.6973, "step": 15711 }, { "epoch": 0.4, "grad_norm": 3.0641047954559326, "learning_rate": 1.3557211635730565e-05, "loss": 0.6407, "step": 15712 }, { "epoch": 0.4, "grad_norm": 1.248029112815857, "learning_rate": 1.3556435778485258e-05, "loss": 0.5613, "step": 15713 }, { "epoch": 0.4, "grad_norm": 2.6671507358551025, "learning_rate": 1.3555659896731232e-05, "loss": 0.6804, "step": 15714 }, { "epoch": 0.4, "grad_norm": 1.7520185708999634, "learning_rate": 1.3554883990473833e-05, "loss": 0.5452, "step": 15715 }, { "epoch": 0.4, "grad_norm": 1.2634081840515137, "learning_rate": 1.3554108059718404e-05, "loss": 0.6224, "step": 15716 }, { "epoch": 0.4, "grad_norm": 1.4875521659851074, "learning_rate": 1.3553332104470298e-05, "loss": 0.5074, "step": 15717 }, { "epoch": 0.4, "grad_norm": 1.7843616008758545, "learning_rate": 1.3552556124734859e-05, "loss": 0.6592, "step": 15718 }, { "epoch": 0.4, "grad_norm": 8.58126449584961, "learning_rate": 1.3551780120517435e-05, "loss": 0.6356, "step": 15719 }, { "epoch": 0.4, "grad_norm": 1.9265179634094238, "learning_rate": 1.3551004091823373e-05, "loss": 0.64, "step": 15720 }, { "epoch": 0.4, "grad_norm": 1.9630908966064453, "learning_rate": 1.3550228038658024e-05, "loss": 0.5659, "step": 15721 }, { "epoch": 0.4, "grad_norm": 1.4126225709915161, "learning_rate": 1.354945196102673e-05, "loss": 0.6458, "step": 15722 }, { "epoch": 0.4, "grad_norm": 1.921600341796875, "learning_rate": 1.3548675858934846e-05, "loss": 0.4305, "step": 15723 }, { "epoch": 0.4, "grad_norm": 3.441143274307251, "learning_rate": 1.3547899732387717e-05, "loss": 0.5519, "step": 15724 }, { "epoch": 0.4, "grad_norm": 1.660589575767517, "learning_rate": 1.3547123581390692e-05, "loss": 0.686, "step": 15725 }, { "epoch": 0.4, "grad_norm": 1.2674411535263062, "learning_rate": 1.3546347405949118e-05, "loss": 0.556, "step": 15726 }, { "epoch": 0.4, "grad_norm": 1.5465584993362427, "learning_rate": 1.3545571206068346e-05, "loss": 0.5686, "step": 15727 }, { "epoch": 0.4, "grad_norm": 8.008051872253418, "learning_rate": 1.3544794981753729e-05, "loss": 0.4668, "step": 15728 }, { "epoch": 0.4, "grad_norm": 3.5836081504821777, "learning_rate": 1.3544018733010607e-05, "loss": 0.7067, "step": 15729 }, { "epoch": 0.4, "grad_norm": 1.5427298545837402, "learning_rate": 1.3543242459844337e-05, "loss": 0.5613, "step": 15730 }, { "epoch": 0.4, "grad_norm": 1.635801911354065, "learning_rate": 1.3542466162260265e-05, "loss": 0.572, "step": 15731 }, { "epoch": 0.4, "grad_norm": 1.5013844966888428, "learning_rate": 1.3541689840263744e-05, "loss": 0.647, "step": 15732 }, { "epoch": 0.4, "grad_norm": 1.4200021028518677, "learning_rate": 1.3540913493860119e-05, "loss": 0.4815, "step": 15733 }, { "epoch": 0.4, "grad_norm": 1.1375987529754639, "learning_rate": 1.3540137123054745e-05, "loss": 0.5178, "step": 15734 }, { "epoch": 0.4, "grad_norm": 2.8741297721862793, "learning_rate": 1.3539360727852965e-05, "loss": 0.524, "step": 15735 }, { "epoch": 0.4, "grad_norm": 2.721475839614868, "learning_rate": 1.353858430826014e-05, "loss": 0.5538, "step": 15736 }, { "epoch": 0.4, "grad_norm": 5.217652320861816, "learning_rate": 1.3537807864281613e-05, "loss": 0.7456, "step": 15737 }, { "epoch": 0.4, "grad_norm": 3.161757469177246, "learning_rate": 1.353703139592274e-05, "loss": 0.7273, "step": 15738 }, { "epoch": 0.4, "grad_norm": 1.7710483074188232, "learning_rate": 1.3536254903188865e-05, "loss": 0.6629, "step": 15739 }, { "epoch": 0.4, "grad_norm": 2.055464267730713, "learning_rate": 1.3535478386085343e-05, "loss": 0.7106, "step": 15740 }, { "epoch": 0.4, "grad_norm": 0.935679018497467, "learning_rate": 1.3534701844617528e-05, "loss": 0.623, "step": 15741 }, { "epoch": 0.4, "grad_norm": 1.8480364084243774, "learning_rate": 1.3533925278790767e-05, "loss": 0.4645, "step": 15742 }, { "epoch": 0.4, "grad_norm": 1.2103960514068604, "learning_rate": 1.3533148688610414e-05, "loss": 0.4138, "step": 15743 }, { "epoch": 0.4, "grad_norm": 3.9506053924560547, "learning_rate": 1.353237207408182e-05, "loss": 0.5323, "step": 15744 }, { "epoch": 0.4, "grad_norm": 4.231690406799316, "learning_rate": 1.3531595435210333e-05, "loss": 0.5375, "step": 15745 }, { "epoch": 0.4, "grad_norm": 3.5148346424102783, "learning_rate": 1.3530818772001312e-05, "loss": 0.5796, "step": 15746 }, { "epoch": 0.4, "grad_norm": 3.098144769668579, "learning_rate": 1.3530042084460107e-05, "loss": 0.5968, "step": 15747 }, { "epoch": 0.4, "grad_norm": 4.947483062744141, "learning_rate": 1.352926537259207e-05, "loss": 0.7577, "step": 15748 }, { "epoch": 0.4, "grad_norm": 2.554100751876831, "learning_rate": 1.3528488636402553e-05, "loss": 0.5735, "step": 15749 }, { "epoch": 0.4, "grad_norm": 2.7371280193328857, "learning_rate": 1.3527711875896907e-05, "loss": 0.6169, "step": 15750 }, { "epoch": 0.4, "grad_norm": 1.5454357862472534, "learning_rate": 1.3526935091080489e-05, "loss": 0.6052, "step": 15751 }, { "epoch": 0.4, "grad_norm": 3.651742696762085, "learning_rate": 1.352615828195865e-05, "loss": 0.6655, "step": 15752 }, { "epoch": 0.4, "grad_norm": 6.668135643005371, "learning_rate": 1.3525381448536745e-05, "loss": 0.5038, "step": 15753 }, { "epoch": 0.4, "grad_norm": 3.30110764503479, "learning_rate": 1.3524604590820123e-05, "loss": 0.672, "step": 15754 }, { "epoch": 0.4, "grad_norm": 1.6270986795425415, "learning_rate": 1.3523827708814143e-05, "loss": 0.6234, "step": 15755 }, { "epoch": 0.4, "grad_norm": 4.296581745147705, "learning_rate": 1.3523050802524154e-05, "loss": 0.7251, "step": 15756 }, { "epoch": 0.4, "grad_norm": 1.5003061294555664, "learning_rate": 1.3522273871955513e-05, "loss": 0.4653, "step": 15757 }, { "epoch": 0.4, "grad_norm": 1.8699922561645508, "learning_rate": 1.3521496917113575e-05, "loss": 0.4562, "step": 15758 }, { "epoch": 0.4, "grad_norm": 1.8667643070220947, "learning_rate": 1.352071993800369e-05, "loss": 0.7011, "step": 15759 }, { "epoch": 0.4, "grad_norm": 1.4639263153076172, "learning_rate": 1.3519942934631218e-05, "loss": 0.4794, "step": 15760 }, { "epoch": 0.4, "grad_norm": 3.5159714221954346, "learning_rate": 1.3519165907001508e-05, "loss": 0.5717, "step": 15761 }, { "epoch": 0.4, "grad_norm": 1.0376136302947998, "learning_rate": 1.351838885511992e-05, "loss": 0.5182, "step": 15762 }, { "epoch": 0.4, "grad_norm": 4.993588447570801, "learning_rate": 1.3517611778991805e-05, "loss": 0.7651, "step": 15763 }, { "epoch": 0.4, "grad_norm": 4.088494777679443, "learning_rate": 1.3516834678622516e-05, "loss": 0.7833, "step": 15764 }, { "epoch": 0.4, "grad_norm": 1.2580093145370483, "learning_rate": 1.3516057554017418e-05, "loss": 0.7407, "step": 15765 }, { "epoch": 0.4, "grad_norm": 2.9926295280456543, "learning_rate": 1.3515280405181859e-05, "loss": 0.7929, "step": 15766 }, { "epoch": 0.4, "grad_norm": 1.9016667604446411, "learning_rate": 1.3514503232121196e-05, "loss": 0.5271, "step": 15767 }, { "epoch": 0.4, "grad_norm": 2.8657658100128174, "learning_rate": 1.3513726034840784e-05, "loss": 0.7129, "step": 15768 }, { "epoch": 0.4, "grad_norm": 1.60527765750885, "learning_rate": 1.3512948813345981e-05, "loss": 0.6003, "step": 15769 }, { "epoch": 0.4, "grad_norm": 1.7834289073944092, "learning_rate": 1.3512171567642138e-05, "loss": 0.6737, "step": 15770 }, { "epoch": 0.4, "grad_norm": 1.716599464416504, "learning_rate": 1.3511394297734616e-05, "loss": 0.4466, "step": 15771 }, { "epoch": 0.4, "grad_norm": 2.2607879638671875, "learning_rate": 1.3510617003628774e-05, "loss": 0.608, "step": 15772 }, { "epoch": 0.4, "grad_norm": 2.0411505699157715, "learning_rate": 1.3509839685329962e-05, "loss": 0.5451, "step": 15773 }, { "epoch": 0.4, "grad_norm": 1.4420956373214722, "learning_rate": 1.3509062342843541e-05, "loss": 0.5666, "step": 15774 }, { "epoch": 0.4, "grad_norm": 4.702355861663818, "learning_rate": 1.3508284976174869e-05, "loss": 0.7856, "step": 15775 }, { "epoch": 0.4, "grad_norm": 1.3843919038772583, "learning_rate": 1.3507507585329297e-05, "loss": 0.6105, "step": 15776 }, { "epoch": 0.4, "grad_norm": 1.7308733463287354, "learning_rate": 1.350673017031219e-05, "loss": 0.4861, "step": 15777 }, { "epoch": 0.4, "grad_norm": 3.7403414249420166, "learning_rate": 1.35059527311289e-05, "loss": 0.4967, "step": 15778 }, { "epoch": 0.4, "grad_norm": 2.7025651931762695, "learning_rate": 1.3505175267784786e-05, "loss": 0.4779, "step": 15779 }, { "epoch": 0.4, "grad_norm": 5.469259738922119, "learning_rate": 1.3504397780285209e-05, "loss": 0.7482, "step": 15780 }, { "epoch": 0.4, "grad_norm": 4.285213947296143, "learning_rate": 1.3503620268635522e-05, "loss": 0.6129, "step": 15781 }, { "epoch": 0.4, "grad_norm": 1.8023669719696045, "learning_rate": 1.3502842732841085e-05, "loss": 0.6455, "step": 15782 }, { "epoch": 0.4, "grad_norm": 1.1225755214691162, "learning_rate": 1.3502065172907255e-05, "loss": 0.4635, "step": 15783 }, { "epoch": 0.4, "grad_norm": 1.3060123920440674, "learning_rate": 1.3501287588839395e-05, "loss": 0.5195, "step": 15784 }, { "epoch": 0.4, "grad_norm": 2.0502943992614746, "learning_rate": 1.3500509980642859e-05, "loss": 0.5463, "step": 15785 }, { "epoch": 0.4, "grad_norm": 1.5798313617706299, "learning_rate": 1.349973234832301e-05, "loss": 0.7513, "step": 15786 }, { "epoch": 0.4, "grad_norm": 1.6720836162567139, "learning_rate": 1.3498954691885202e-05, "loss": 0.6457, "step": 15787 }, { "epoch": 0.4, "grad_norm": 2.3670191764831543, "learning_rate": 1.34981770113348e-05, "loss": 0.5942, "step": 15788 }, { "epoch": 0.4, "grad_norm": 1.8054779767990112, "learning_rate": 1.3497399306677157e-05, "loss": 0.6216, "step": 15789 }, { "epoch": 0.4, "grad_norm": 3.0201094150543213, "learning_rate": 1.3496621577917635e-05, "loss": 0.5718, "step": 15790 }, { "epoch": 0.4, "grad_norm": 1.0586782693862915, "learning_rate": 1.3495843825061593e-05, "loss": 0.4554, "step": 15791 }, { "epoch": 0.4, "grad_norm": 2.50754976272583, "learning_rate": 1.3495066048114393e-05, "loss": 0.6556, "step": 15792 }, { "epoch": 0.4, "grad_norm": 1.784696102142334, "learning_rate": 1.3494288247081392e-05, "loss": 0.5915, "step": 15793 }, { "epoch": 0.4, "grad_norm": 4.673630237579346, "learning_rate": 1.3493510421967956e-05, "loss": 0.5856, "step": 15794 }, { "epoch": 0.4, "grad_norm": 1.543756127357483, "learning_rate": 1.3492732572779437e-05, "loss": 0.6837, "step": 15795 }, { "epoch": 0.4, "grad_norm": 3.0059101581573486, "learning_rate": 1.3491954699521204e-05, "loss": 0.6677, "step": 15796 }, { "epoch": 0.4, "grad_norm": 2.6237494945526123, "learning_rate": 1.3491176802198607e-05, "loss": 0.6514, "step": 15797 }, { "epoch": 0.4, "grad_norm": 1.3650026321411133, "learning_rate": 1.3490398880817016e-05, "loss": 0.7086, "step": 15798 }, { "epoch": 0.4, "grad_norm": 2.068767547607422, "learning_rate": 1.3489620935381789e-05, "loss": 0.6893, "step": 15799 }, { "epoch": 0.4, "grad_norm": 2.4703874588012695, "learning_rate": 1.3488842965898284e-05, "loss": 0.5525, "step": 15800 }, { "epoch": 0.4, "grad_norm": 1.9046642780303955, "learning_rate": 1.3488064972371869e-05, "loss": 0.6925, "step": 15801 }, { "epoch": 0.41, "grad_norm": 1.8001011610031128, "learning_rate": 1.3487286954807899e-05, "loss": 0.5975, "step": 15802 }, { "epoch": 0.41, "grad_norm": 1.3207658529281616, "learning_rate": 1.348650891321174e-05, "loss": 0.4491, "step": 15803 }, { "epoch": 0.41, "grad_norm": 4.756546497344971, "learning_rate": 1.3485730847588752e-05, "loss": 0.5635, "step": 15804 }, { "epoch": 0.41, "grad_norm": 1.7923476696014404, "learning_rate": 1.3484952757944294e-05, "loss": 0.5714, "step": 15805 }, { "epoch": 0.41, "grad_norm": 2.1593177318573, "learning_rate": 1.3484174644283737e-05, "loss": 0.6443, "step": 15806 }, { "epoch": 0.41, "grad_norm": 1.3009214401245117, "learning_rate": 1.3483396506612431e-05, "loss": 0.6142, "step": 15807 }, { "epoch": 0.41, "grad_norm": 1.3555903434753418, "learning_rate": 1.3482618344935748e-05, "loss": 0.5662, "step": 15808 }, { "epoch": 0.41, "grad_norm": 1.2244234085083008, "learning_rate": 1.3481840159259045e-05, "loss": 0.6212, "step": 15809 }, { "epoch": 0.41, "grad_norm": 1.3271372318267822, "learning_rate": 1.3481061949587689e-05, "loss": 0.6009, "step": 15810 }, { "epoch": 0.41, "grad_norm": 2.820220947265625, "learning_rate": 1.3480283715927044e-05, "loss": 0.8567, "step": 15811 }, { "epoch": 0.41, "grad_norm": 1.3617490530014038, "learning_rate": 1.3479505458282466e-05, "loss": 0.5953, "step": 15812 }, { "epoch": 0.41, "grad_norm": 3.0881214141845703, "learning_rate": 1.3478727176659323e-05, "loss": 0.4933, "step": 15813 }, { "epoch": 0.41, "grad_norm": 1.993706226348877, "learning_rate": 1.3477948871062979e-05, "loss": 0.5153, "step": 15814 }, { "epoch": 0.41, "grad_norm": 2.062217950820923, "learning_rate": 1.3477170541498795e-05, "loss": 0.5825, "step": 15815 }, { "epoch": 0.41, "grad_norm": 1.2657647132873535, "learning_rate": 1.3476392187972139e-05, "loss": 0.6315, "step": 15816 }, { "epoch": 0.41, "grad_norm": 5.773327350616455, "learning_rate": 1.3475613810488369e-05, "loss": 0.6182, "step": 15817 }, { "epoch": 0.41, "grad_norm": 1.3095691204071045, "learning_rate": 1.3474835409052853e-05, "loss": 0.522, "step": 15818 }, { "epoch": 0.41, "grad_norm": 1.3665462732315063, "learning_rate": 1.3474056983670954e-05, "loss": 0.5194, "step": 15819 }, { "epoch": 0.41, "grad_norm": 2.4542434215545654, "learning_rate": 1.3473278534348038e-05, "loss": 0.5941, "step": 15820 }, { "epoch": 0.41, "grad_norm": 1.9561220407485962, "learning_rate": 1.3472500061089468e-05, "loss": 0.6572, "step": 15821 }, { "epoch": 0.41, "grad_norm": 1.9266339540481567, "learning_rate": 1.3471721563900608e-05, "loss": 0.6833, "step": 15822 }, { "epoch": 0.41, "grad_norm": 2.076110363006592, "learning_rate": 1.3470943042786829e-05, "loss": 0.5804, "step": 15823 }, { "epoch": 0.41, "grad_norm": 1.4268555641174316, "learning_rate": 1.3470164497753485e-05, "loss": 0.5445, "step": 15824 }, { "epoch": 0.41, "grad_norm": 1.7428464889526367, "learning_rate": 1.3469385928805951e-05, "loss": 0.5943, "step": 15825 }, { "epoch": 0.41, "grad_norm": 2.1971287727355957, "learning_rate": 1.3468607335949588e-05, "loss": 0.5399, "step": 15826 }, { "epoch": 0.41, "grad_norm": 1.4546080827713013, "learning_rate": 1.3467828719189761e-05, "loss": 0.6236, "step": 15827 }, { "epoch": 0.41, "grad_norm": 2.872335195541382, "learning_rate": 1.346705007853184e-05, "loss": 0.4195, "step": 15828 }, { "epoch": 0.41, "grad_norm": 2.4468941688537598, "learning_rate": 1.3466271413981187e-05, "loss": 0.4963, "step": 15829 }, { "epoch": 0.41, "grad_norm": 1.9341801404953003, "learning_rate": 1.346549272554317e-05, "loss": 0.7234, "step": 15830 }, { "epoch": 0.41, "grad_norm": 2.0347275733947754, "learning_rate": 1.3464714013223152e-05, "loss": 0.6363, "step": 15831 }, { "epoch": 0.41, "grad_norm": 2.501561164855957, "learning_rate": 1.34639352770265e-05, "loss": 0.5731, "step": 15832 }, { "epoch": 0.41, "grad_norm": 1.3097484111785889, "learning_rate": 1.3463156516958588e-05, "loss": 0.4462, "step": 15833 }, { "epoch": 0.41, "grad_norm": 1.9642773866653442, "learning_rate": 1.3462377733024775e-05, "loss": 0.7088, "step": 15834 }, { "epoch": 0.41, "grad_norm": 1.4073355197906494, "learning_rate": 1.346159892523043e-05, "loss": 0.5292, "step": 15835 }, { "epoch": 0.41, "grad_norm": 1.993153691291809, "learning_rate": 1.3460820093580918e-05, "loss": 0.6341, "step": 15836 }, { "epoch": 0.41, "grad_norm": 1.759588360786438, "learning_rate": 1.3460041238081612e-05, "loss": 0.574, "step": 15837 }, { "epoch": 0.41, "grad_norm": 3.013927459716797, "learning_rate": 1.345926235873787e-05, "loss": 0.5319, "step": 15838 }, { "epoch": 0.41, "grad_norm": 3.7947113513946533, "learning_rate": 1.3458483455555066e-05, "loss": 0.7071, "step": 15839 }, { "epoch": 0.41, "grad_norm": 1.4648913145065308, "learning_rate": 1.3457704528538573e-05, "loss": 0.5635, "step": 15840 }, { "epoch": 0.41, "grad_norm": 1.749659538269043, "learning_rate": 1.3456925577693746e-05, "loss": 0.5976, "step": 15841 }, { "epoch": 0.41, "grad_norm": 3.1729629039764404, "learning_rate": 1.3456146603025966e-05, "loss": 0.6086, "step": 15842 }, { "epoch": 0.41, "grad_norm": 2.0313897132873535, "learning_rate": 1.345536760454059e-05, "loss": 0.7122, "step": 15843 }, { "epoch": 0.41, "grad_norm": 1.3962594270706177, "learning_rate": 1.3454588582242991e-05, "loss": 0.5145, "step": 15844 }, { "epoch": 0.41, "grad_norm": 2.2367684841156006, "learning_rate": 1.3453809536138543e-05, "loss": 0.5857, "step": 15845 }, { "epoch": 0.41, "grad_norm": 7.815845489501953, "learning_rate": 1.3453030466232605e-05, "loss": 0.4774, "step": 15846 }, { "epoch": 0.41, "grad_norm": 1.9273561239242554, "learning_rate": 1.3452251372530554e-05, "loss": 0.6855, "step": 15847 }, { "epoch": 0.41, "grad_norm": 3.153085470199585, "learning_rate": 1.3451472255037751e-05, "loss": 0.6369, "step": 15848 }, { "epoch": 0.41, "grad_norm": 1.4557949304580688, "learning_rate": 1.3450693113759575e-05, "loss": 0.5636, "step": 15849 }, { "epoch": 0.41, "grad_norm": 1.7732348442077637, "learning_rate": 1.3449913948701386e-05, "loss": 0.5138, "step": 15850 }, { "epoch": 0.41, "grad_norm": 4.2821044921875, "learning_rate": 1.3449134759868557e-05, "loss": 0.6127, "step": 15851 }, { "epoch": 0.41, "grad_norm": 2.131361484527588, "learning_rate": 1.3448355547266461e-05, "loss": 0.5199, "step": 15852 }, { "epoch": 0.41, "grad_norm": 1.3254402875900269, "learning_rate": 1.3447576310900461e-05, "loss": 0.5174, "step": 15853 }, { "epoch": 0.41, "grad_norm": 2.6815645694732666, "learning_rate": 1.3446797050775935e-05, "loss": 0.5495, "step": 15854 }, { "epoch": 0.41, "grad_norm": 2.4793596267700195, "learning_rate": 1.3446017766898248e-05, "loss": 0.5498, "step": 15855 }, { "epoch": 0.41, "grad_norm": 1.3176369667053223, "learning_rate": 1.3445238459272769e-05, "loss": 0.6573, "step": 15856 }, { "epoch": 0.41, "grad_norm": 1.6532894372940063, "learning_rate": 1.3444459127904876e-05, "loss": 0.5099, "step": 15857 }, { "epoch": 0.41, "grad_norm": 1.356216311454773, "learning_rate": 1.344367977279993e-05, "loss": 0.4928, "step": 15858 }, { "epoch": 0.41, "grad_norm": 5.318270206451416, "learning_rate": 1.3442900393963312e-05, "loss": 0.7206, "step": 15859 }, { "epoch": 0.41, "grad_norm": 2.7154061794281006, "learning_rate": 1.3442120991400383e-05, "loss": 0.7327, "step": 15860 }, { "epoch": 0.41, "grad_norm": 1.244502305984497, "learning_rate": 1.344134156511652e-05, "loss": 0.5013, "step": 15861 }, { "epoch": 0.41, "grad_norm": 4.179072380065918, "learning_rate": 1.3440562115117093e-05, "loss": 0.5558, "step": 15862 }, { "epoch": 0.41, "grad_norm": 2.348961353302002, "learning_rate": 1.3439782641407473e-05, "loss": 0.808, "step": 15863 }, { "epoch": 0.41, "grad_norm": 1.4211913347244263, "learning_rate": 1.3439003143993034e-05, "loss": 0.6296, "step": 15864 }, { "epoch": 0.41, "grad_norm": 1.9255133867263794, "learning_rate": 1.3438223622879143e-05, "loss": 0.5, "step": 15865 }, { "epoch": 0.41, "grad_norm": 3.274182081222534, "learning_rate": 1.3437444078071177e-05, "loss": 0.6075, "step": 15866 }, { "epoch": 0.41, "grad_norm": 1.293744683265686, "learning_rate": 1.3436664509574506e-05, "loss": 0.6803, "step": 15867 }, { "epoch": 0.41, "grad_norm": 2.1167032718658447, "learning_rate": 1.3435884917394501e-05, "loss": 0.4726, "step": 15868 }, { "epoch": 0.41, "grad_norm": 4.2617902755737305, "learning_rate": 1.3435105301536538e-05, "loss": 0.5489, "step": 15869 }, { "epoch": 0.41, "grad_norm": 2.254730224609375, "learning_rate": 1.3434325662005986e-05, "loss": 0.5514, "step": 15870 }, { "epoch": 0.41, "grad_norm": 1.0286126136779785, "learning_rate": 1.343354599880822e-05, "loss": 0.4623, "step": 15871 }, { "epoch": 0.41, "grad_norm": 4.121782302856445, "learning_rate": 1.3432766311948609e-05, "loss": 0.6397, "step": 15872 }, { "epoch": 0.41, "grad_norm": 1.184482216835022, "learning_rate": 1.3431986601432532e-05, "loss": 0.6763, "step": 15873 }, { "epoch": 0.41, "grad_norm": 1.5190056562423706, "learning_rate": 1.3431206867265358e-05, "loss": 0.6732, "step": 15874 }, { "epoch": 0.41, "grad_norm": 1.1041254997253418, "learning_rate": 1.3430427109452464e-05, "loss": 0.45, "step": 15875 }, { "epoch": 0.41, "grad_norm": 2.819866180419922, "learning_rate": 1.342964732799922e-05, "loss": 0.5919, "step": 15876 }, { "epoch": 0.41, "grad_norm": 1.9218401908874512, "learning_rate": 1.3428867522911002e-05, "loss": 0.5877, "step": 15877 }, { "epoch": 0.41, "grad_norm": 5.4145283699035645, "learning_rate": 1.3428087694193184e-05, "loss": 0.6792, "step": 15878 }, { "epoch": 0.41, "grad_norm": 2.2912888526916504, "learning_rate": 1.3427307841851139e-05, "loss": 0.5341, "step": 15879 }, { "epoch": 0.41, "grad_norm": 1.6591472625732422, "learning_rate": 1.342652796589024e-05, "loss": 0.5499, "step": 15880 }, { "epoch": 0.41, "grad_norm": 3.296869993209839, "learning_rate": 1.3425748066315865e-05, "loss": 0.6412, "step": 15881 }, { "epoch": 0.41, "grad_norm": 3.398439407348633, "learning_rate": 1.3424968143133383e-05, "loss": 0.8019, "step": 15882 }, { "epoch": 0.41, "grad_norm": 1.7539039850234985, "learning_rate": 1.3424188196348178e-05, "loss": 0.7392, "step": 15883 }, { "epoch": 0.41, "grad_norm": 1.4464234113693237, "learning_rate": 1.3423408225965615e-05, "loss": 0.6385, "step": 15884 }, { "epoch": 0.41, "grad_norm": 1.4634054899215698, "learning_rate": 1.3422628231991075e-05, "loss": 0.4652, "step": 15885 }, { "epoch": 0.41, "grad_norm": 1.5376859903335571, "learning_rate": 1.3421848214429932e-05, "loss": 0.6045, "step": 15886 }, { "epoch": 0.41, "grad_norm": 3.113882064819336, "learning_rate": 1.3421068173287559e-05, "loss": 0.5613, "step": 15887 }, { "epoch": 0.41, "grad_norm": 1.3487093448638916, "learning_rate": 1.3420288108569335e-05, "loss": 0.6108, "step": 15888 }, { "epoch": 0.41, "grad_norm": 4.908705711364746, "learning_rate": 1.3419508020280635e-05, "loss": 0.6243, "step": 15889 }, { "epoch": 0.41, "grad_norm": 1.3172935247421265, "learning_rate": 1.3418727908426832e-05, "loss": 0.6438, "step": 15890 }, { "epoch": 0.41, "grad_norm": 1.452409029006958, "learning_rate": 1.3417947773013304e-05, "loss": 0.496, "step": 15891 }, { "epoch": 0.41, "grad_norm": 3.408289909362793, "learning_rate": 1.3417167614045428e-05, "loss": 0.5259, "step": 15892 }, { "epoch": 0.41, "grad_norm": 1.4750254154205322, "learning_rate": 1.341638743152858e-05, "loss": 0.6059, "step": 15893 }, { "epoch": 0.41, "grad_norm": 1.669284462928772, "learning_rate": 1.3415607225468138e-05, "loss": 0.6403, "step": 15894 }, { "epoch": 0.41, "grad_norm": 1.9469746351242065, "learning_rate": 1.3414826995869474e-05, "loss": 0.5429, "step": 15895 }, { "epoch": 0.41, "grad_norm": 2.298567771911621, "learning_rate": 1.3414046742737967e-05, "loss": 0.6151, "step": 15896 }, { "epoch": 0.41, "grad_norm": 2.9063398838043213, "learning_rate": 1.3413266466078998e-05, "loss": 0.5108, "step": 15897 }, { "epoch": 0.41, "grad_norm": 1.7761033773422241, "learning_rate": 1.341248616589794e-05, "loss": 0.6963, "step": 15898 }, { "epoch": 0.41, "grad_norm": 1.8728861808776855, "learning_rate": 1.341170584220017e-05, "loss": 0.5235, "step": 15899 }, { "epoch": 0.41, "grad_norm": 1.7729034423828125, "learning_rate": 1.341092549499107e-05, "loss": 0.5079, "step": 15900 }, { "epoch": 0.41, "grad_norm": 1.4707940816879272, "learning_rate": 1.3410145124276009e-05, "loss": 0.3376, "step": 15901 }, { "epoch": 0.41, "grad_norm": 1.8155394792556763, "learning_rate": 1.3409364730060374e-05, "loss": 0.6755, "step": 15902 }, { "epoch": 0.41, "grad_norm": 2.7684977054595947, "learning_rate": 1.3408584312349539e-05, "loss": 0.5947, "step": 15903 }, { "epoch": 0.41, "grad_norm": 1.8118667602539062, "learning_rate": 1.3407803871148879e-05, "loss": 0.6283, "step": 15904 }, { "epoch": 0.41, "grad_norm": 1.9717744588851929, "learning_rate": 1.3407023406463781e-05, "loss": 0.6548, "step": 15905 }, { "epoch": 0.41, "grad_norm": 2.0994083881378174, "learning_rate": 1.3406242918299615e-05, "loss": 0.5423, "step": 15906 }, { "epoch": 0.41, "grad_norm": 3.491710901260376, "learning_rate": 1.3405462406661765e-05, "loss": 0.6004, "step": 15907 }, { "epoch": 0.41, "grad_norm": 1.9537053108215332, "learning_rate": 1.3404681871555605e-05, "loss": 0.4843, "step": 15908 }, { "epoch": 0.41, "grad_norm": 1.190871000289917, "learning_rate": 1.3403901312986517e-05, "loss": 0.4922, "step": 15909 }, { "epoch": 0.41, "grad_norm": 2.4943602085113525, "learning_rate": 1.3403120730959884e-05, "loss": 0.6056, "step": 15910 }, { "epoch": 0.41, "grad_norm": 1.6179076433181763, "learning_rate": 1.3402340125481077e-05, "loss": 0.5524, "step": 15911 }, { "epoch": 0.41, "grad_norm": 2.704195261001587, "learning_rate": 1.340155949655548e-05, "loss": 0.5638, "step": 15912 }, { "epoch": 0.41, "grad_norm": 1.8023384809494019, "learning_rate": 1.3400778844188472e-05, "loss": 0.541, "step": 15913 }, { "epoch": 0.41, "grad_norm": 1.3506278991699219, "learning_rate": 1.339999816838543e-05, "loss": 0.5701, "step": 15914 }, { "epoch": 0.41, "grad_norm": 1.9250848293304443, "learning_rate": 1.3399217469151741e-05, "loss": 0.7227, "step": 15915 }, { "epoch": 0.41, "grad_norm": 2.123187780380249, "learning_rate": 1.339843674649278e-05, "loss": 0.469, "step": 15916 }, { "epoch": 0.41, "grad_norm": 2.484771966934204, "learning_rate": 1.3397656000413928e-05, "loss": 0.4984, "step": 15917 }, { "epoch": 0.41, "grad_norm": 3.470867872238159, "learning_rate": 1.3396875230920564e-05, "loss": 0.7702, "step": 15918 }, { "epoch": 0.41, "grad_norm": 1.8798686265945435, "learning_rate": 1.3396094438018068e-05, "loss": 0.4706, "step": 15919 }, { "epoch": 0.41, "grad_norm": 1.2748610973358154, "learning_rate": 1.3395313621711827e-05, "loss": 0.3933, "step": 15920 }, { "epoch": 0.41, "grad_norm": 1.3579362630844116, "learning_rate": 1.3394532782007217e-05, "loss": 0.557, "step": 15921 }, { "epoch": 0.41, "grad_norm": 1.9576281309127808, "learning_rate": 1.3393751918909618e-05, "loss": 0.5256, "step": 15922 }, { "epoch": 0.41, "grad_norm": 9.488140106201172, "learning_rate": 1.3392971032424415e-05, "loss": 0.7128, "step": 15923 }, { "epoch": 0.41, "grad_norm": 2.8979995250701904, "learning_rate": 1.3392190122556985e-05, "loss": 0.5739, "step": 15924 }, { "epoch": 0.41, "grad_norm": 1.7893593311309814, "learning_rate": 1.3391409189312714e-05, "loss": 0.7019, "step": 15925 }, { "epoch": 0.41, "grad_norm": 1.4332690238952637, "learning_rate": 1.3390628232696981e-05, "loss": 0.5973, "step": 15926 }, { "epoch": 0.41, "grad_norm": 3.193013906478882, "learning_rate": 1.3389847252715167e-05, "loss": 0.4838, "step": 15927 }, { "epoch": 0.41, "grad_norm": 1.3436086177825928, "learning_rate": 1.3389066249372658e-05, "loss": 0.5365, "step": 15928 }, { "epoch": 0.41, "grad_norm": 4.533938884735107, "learning_rate": 1.338828522267483e-05, "loss": 0.4839, "step": 15929 }, { "epoch": 0.41, "grad_norm": 2.886312961578369, "learning_rate": 1.3387504172627073e-05, "loss": 0.6561, "step": 15930 }, { "epoch": 0.41, "grad_norm": 4.306732654571533, "learning_rate": 1.3386723099234761e-05, "loss": 1.126, "step": 15931 }, { "epoch": 0.41, "grad_norm": 2.1693928241729736, "learning_rate": 1.3385942002503286e-05, "loss": 0.564, "step": 15932 }, { "epoch": 0.41, "grad_norm": 3.4889252185821533, "learning_rate": 1.3385160882438022e-05, "loss": 0.6226, "step": 15933 }, { "epoch": 0.41, "grad_norm": 1.3609040975570679, "learning_rate": 1.3384379739044357e-05, "loss": 0.5657, "step": 15934 }, { "epoch": 0.41, "grad_norm": 2.5545129776000977, "learning_rate": 1.3383598572327673e-05, "loss": 0.6337, "step": 15935 }, { "epoch": 0.41, "grad_norm": 5.3130784034729, "learning_rate": 1.3382817382293354e-05, "loss": 0.6854, "step": 15936 }, { "epoch": 0.41, "grad_norm": 2.6232078075408936, "learning_rate": 1.3382036168946786e-05, "loss": 0.6389, "step": 15937 }, { "epoch": 0.41, "grad_norm": 2.6946239471435547, "learning_rate": 1.3381254932293347e-05, "loss": 0.7264, "step": 15938 }, { "epoch": 0.41, "grad_norm": 1.251848816871643, "learning_rate": 1.338047367233842e-05, "loss": 0.4911, "step": 15939 }, { "epoch": 0.41, "grad_norm": 3.7826719284057617, "learning_rate": 1.3379692389087395e-05, "loss": 0.6046, "step": 15940 }, { "epoch": 0.41, "grad_norm": 5.750975608825684, "learning_rate": 1.3378911082545653e-05, "loss": 0.7028, "step": 15941 }, { "epoch": 0.41, "grad_norm": 1.7134819030761719, "learning_rate": 1.337812975271858e-05, "loss": 0.5045, "step": 15942 }, { "epoch": 0.41, "grad_norm": 5.759629726409912, "learning_rate": 1.3377348399611558e-05, "loss": 0.7748, "step": 15943 }, { "epoch": 0.41, "grad_norm": 1.6387439966201782, "learning_rate": 1.3376567023229977e-05, "loss": 0.6752, "step": 15944 }, { "epoch": 0.41, "grad_norm": 6.864534378051758, "learning_rate": 1.3375785623579209e-05, "loss": 0.6144, "step": 15945 }, { "epoch": 0.41, "grad_norm": 1.364580512046814, "learning_rate": 1.3375004200664654e-05, "loss": 0.5488, "step": 15946 }, { "epoch": 0.41, "grad_norm": 1.13759183883667, "learning_rate": 1.3374222754491687e-05, "loss": 0.5248, "step": 15947 }, { "epoch": 0.41, "grad_norm": 3.1617307662963867, "learning_rate": 1.3373441285065698e-05, "loss": 0.7148, "step": 15948 }, { "epoch": 0.41, "grad_norm": 2.5291309356689453, "learning_rate": 1.3372659792392071e-05, "loss": 0.6434, "step": 15949 }, { "epoch": 0.41, "grad_norm": 2.8586795330047607, "learning_rate": 1.3371878276476192e-05, "loss": 0.6951, "step": 15950 }, { "epoch": 0.41, "grad_norm": 2.846482515335083, "learning_rate": 1.3371096737323446e-05, "loss": 0.5828, "step": 15951 }, { "epoch": 0.41, "grad_norm": 1.4092991352081299, "learning_rate": 1.337031517493922e-05, "loss": 0.6806, "step": 15952 }, { "epoch": 0.41, "grad_norm": 5.877697467803955, "learning_rate": 1.3369533589328896e-05, "loss": 0.7749, "step": 15953 }, { "epoch": 0.41, "grad_norm": 1.808197021484375, "learning_rate": 1.3368751980497868e-05, "loss": 0.5652, "step": 15954 }, { "epoch": 0.41, "grad_norm": 2.929044246673584, "learning_rate": 1.3367970348451514e-05, "loss": 0.6612, "step": 15955 }, { "epoch": 0.41, "grad_norm": 2.028975248336792, "learning_rate": 1.3367188693195228e-05, "loss": 0.6444, "step": 15956 }, { "epoch": 0.41, "grad_norm": 1.5742592811584473, "learning_rate": 1.3366407014734391e-05, "loss": 0.4929, "step": 15957 }, { "epoch": 0.41, "grad_norm": 1.9108543395996094, "learning_rate": 1.336562531307439e-05, "loss": 0.7938, "step": 15958 }, { "epoch": 0.41, "grad_norm": 1.4993751049041748, "learning_rate": 1.3364843588220614e-05, "loss": 0.6239, "step": 15959 }, { "epoch": 0.41, "grad_norm": 1.2753753662109375, "learning_rate": 1.3364061840178453e-05, "loss": 0.6939, "step": 15960 }, { "epoch": 0.41, "grad_norm": 1.798119306564331, "learning_rate": 1.3363280068953291e-05, "loss": 0.6115, "step": 15961 }, { "epoch": 0.41, "grad_norm": 1.9898221492767334, "learning_rate": 1.3362498274550513e-05, "loss": 0.5245, "step": 15962 }, { "epoch": 0.41, "grad_norm": 1.450713038444519, "learning_rate": 1.3361716456975513e-05, "loss": 0.4168, "step": 15963 }, { "epoch": 0.41, "grad_norm": 2.4508049488067627, "learning_rate": 1.3360934616233671e-05, "loss": 0.5864, "step": 15964 }, { "epoch": 0.41, "grad_norm": 2.4011051654815674, "learning_rate": 1.3360152752330383e-05, "loss": 0.5724, "step": 15965 }, { "epoch": 0.41, "grad_norm": 1.812902569770813, "learning_rate": 1.3359370865271033e-05, "loss": 0.4809, "step": 15966 }, { "epoch": 0.41, "grad_norm": 1.7801216840744019, "learning_rate": 1.3358588955061007e-05, "loss": 0.6971, "step": 15967 }, { "epoch": 0.41, "grad_norm": 1.942370891571045, "learning_rate": 1.33578070217057e-05, "loss": 0.4852, "step": 15968 }, { "epoch": 0.41, "grad_norm": 4.689652919769287, "learning_rate": 1.3357025065210491e-05, "loss": 0.5945, "step": 15969 }, { "epoch": 0.41, "grad_norm": 1.575212836265564, "learning_rate": 1.335624308558078e-05, "loss": 0.6532, "step": 15970 }, { "epoch": 0.41, "grad_norm": 1.6057204008102417, "learning_rate": 1.3355461082821946e-05, "loss": 0.5344, "step": 15971 }, { "epoch": 0.41, "grad_norm": 1.6189799308776855, "learning_rate": 1.3354679056939381e-05, "loss": 0.686, "step": 15972 }, { "epoch": 0.41, "grad_norm": 1.6996381282806396, "learning_rate": 1.335389700793848e-05, "loss": 0.4641, "step": 15973 }, { "epoch": 0.41, "grad_norm": 1.221616506576538, "learning_rate": 1.3353114935824627e-05, "loss": 0.6974, "step": 15974 }, { "epoch": 0.41, "grad_norm": 5.764305114746094, "learning_rate": 1.3352332840603213e-05, "loss": 0.6894, "step": 15975 }, { "epoch": 0.41, "grad_norm": 5.42576265335083, "learning_rate": 1.3351550722279625e-05, "loss": 0.7359, "step": 15976 }, { "epoch": 0.41, "grad_norm": 1.464407205581665, "learning_rate": 1.3350768580859254e-05, "loss": 0.5499, "step": 15977 }, { "epoch": 0.41, "grad_norm": 6.390162944793701, "learning_rate": 1.3349986416347496e-05, "loss": 0.6347, "step": 15978 }, { "epoch": 0.41, "grad_norm": 1.8143538236618042, "learning_rate": 1.3349204228749733e-05, "loss": 0.5901, "step": 15979 }, { "epoch": 0.41, "grad_norm": 2.7414329051971436, "learning_rate": 1.334842201807136e-05, "loss": 0.7317, "step": 15980 }, { "epoch": 0.41, "grad_norm": 4.681162357330322, "learning_rate": 1.3347639784317763e-05, "loss": 0.6348, "step": 15981 }, { "epoch": 0.41, "grad_norm": 1.5833191871643066, "learning_rate": 1.3346857527494338e-05, "loss": 0.64, "step": 15982 }, { "epoch": 0.41, "grad_norm": 2.241137742996216, "learning_rate": 1.3346075247606476e-05, "loss": 0.435, "step": 15983 }, { "epoch": 0.41, "grad_norm": 2.693805694580078, "learning_rate": 1.3345292944659563e-05, "loss": 0.6646, "step": 15984 }, { "epoch": 0.41, "grad_norm": 1.631351351737976, "learning_rate": 1.3344510618658992e-05, "loss": 0.3786, "step": 15985 }, { "epoch": 0.41, "grad_norm": 1.5472357273101807, "learning_rate": 1.3343728269610157e-05, "loss": 0.6419, "step": 15986 }, { "epoch": 0.41, "grad_norm": 1.7016218900680542, "learning_rate": 1.3342945897518447e-05, "loss": 0.5115, "step": 15987 }, { "epoch": 0.41, "grad_norm": 2.0808212757110596, "learning_rate": 1.3342163502389254e-05, "loss": 0.5566, "step": 15988 }, { "epoch": 0.41, "grad_norm": 1.3101309537887573, "learning_rate": 1.3341381084227969e-05, "loss": 0.6087, "step": 15989 }, { "epoch": 0.41, "grad_norm": 1.5120898485183716, "learning_rate": 1.3340598643039985e-05, "loss": 0.6437, "step": 15990 }, { "epoch": 0.41, "grad_norm": 2.171032428741455, "learning_rate": 1.3339816178830694e-05, "loss": 0.7263, "step": 15991 }, { "epoch": 0.41, "grad_norm": 2.468714952468872, "learning_rate": 1.3339033691605489e-05, "loss": 0.6251, "step": 15992 }, { "epoch": 0.41, "grad_norm": 2.0202815532684326, "learning_rate": 1.3338251181369761e-05, "loss": 0.7283, "step": 15993 }, { "epoch": 0.41, "grad_norm": 8.141382217407227, "learning_rate": 1.3337468648128901e-05, "loss": 0.6428, "step": 15994 }, { "epoch": 0.41, "grad_norm": 1.3781739473342896, "learning_rate": 1.3336686091888308e-05, "loss": 0.7015, "step": 15995 }, { "epoch": 0.41, "grad_norm": 4.805251598358154, "learning_rate": 1.333590351265337e-05, "loss": 0.4201, "step": 15996 }, { "epoch": 0.41, "grad_norm": 2.3744261264801025, "learning_rate": 1.333512091042948e-05, "loss": 0.5657, "step": 15997 }, { "epoch": 0.41, "grad_norm": 1.4323716163635254, "learning_rate": 1.333433828522203e-05, "loss": 0.5964, "step": 15998 }, { "epoch": 0.41, "grad_norm": 3.7938036918640137, "learning_rate": 1.333355563703642e-05, "loss": 0.7516, "step": 15999 }, { "epoch": 0.41, "grad_norm": 1.2358684539794922, "learning_rate": 1.3332772965878033e-05, "loss": 0.5318, "step": 16000 }, { "epoch": 0.41, "grad_norm": 1.9992717504501343, "learning_rate": 1.333199027175227e-05, "loss": 0.54, "step": 16001 }, { "epoch": 0.41, "grad_norm": 1.5055185556411743, "learning_rate": 1.3331207554664527e-05, "loss": 0.4568, "step": 16002 }, { "epoch": 0.41, "grad_norm": 1.2933197021484375, "learning_rate": 1.3330424814620192e-05, "loss": 0.6356, "step": 16003 }, { "epoch": 0.41, "grad_norm": 1.8053683042526245, "learning_rate": 1.3329642051624663e-05, "loss": 0.5031, "step": 16004 }, { "epoch": 0.41, "grad_norm": 1.4208835363388062, "learning_rate": 1.3328859265683332e-05, "loss": 0.6647, "step": 16005 }, { "epoch": 0.41, "grad_norm": 2.991731643676758, "learning_rate": 1.3328076456801594e-05, "loss": 0.5631, "step": 16006 }, { "epoch": 0.41, "grad_norm": 2.028536558151245, "learning_rate": 1.3327293624984845e-05, "loss": 0.5649, "step": 16007 }, { "epoch": 0.41, "grad_norm": 1.4263523817062378, "learning_rate": 1.3326510770238476e-05, "loss": 0.6024, "step": 16008 }, { "epoch": 0.41, "grad_norm": 2.5293548107147217, "learning_rate": 1.3325727892567887e-05, "loss": 0.6211, "step": 16009 }, { "epoch": 0.41, "grad_norm": 4.561427593231201, "learning_rate": 1.332494499197847e-05, "loss": 0.6707, "step": 16010 }, { "epoch": 0.41, "grad_norm": 5.358569145202637, "learning_rate": 1.3324162068475624e-05, "loss": 0.6703, "step": 16011 }, { "epoch": 0.41, "grad_norm": 2.0410749912261963, "learning_rate": 1.3323379122064737e-05, "loss": 0.6839, "step": 16012 }, { "epoch": 0.41, "grad_norm": 1.351202368736267, "learning_rate": 1.332259615275121e-05, "loss": 0.5613, "step": 16013 }, { "epoch": 0.41, "grad_norm": 3.3788928985595703, "learning_rate": 1.332181316054044e-05, "loss": 0.908, "step": 16014 }, { "epoch": 0.41, "grad_norm": 1.1537474393844604, "learning_rate": 1.3321030145437819e-05, "loss": 0.5648, "step": 16015 }, { "epoch": 0.41, "grad_norm": 2.9199092388153076, "learning_rate": 1.3320247107448747e-05, "loss": 0.5775, "step": 16016 }, { "epoch": 0.41, "grad_norm": 1.6368757486343384, "learning_rate": 1.3319464046578614e-05, "loss": 0.6187, "step": 16017 }, { "epoch": 0.41, "grad_norm": 2.0814459323883057, "learning_rate": 1.3318680962832823e-05, "loss": 0.5348, "step": 16018 }, { "epoch": 0.41, "grad_norm": 1.575429916381836, "learning_rate": 1.3317897856216768e-05, "loss": 0.3726, "step": 16019 }, { "epoch": 0.41, "grad_norm": 2.313809633255005, "learning_rate": 1.3317114726735842e-05, "loss": 0.688, "step": 16020 }, { "epoch": 0.41, "grad_norm": 2.6691393852233887, "learning_rate": 1.3316331574395449e-05, "loss": 0.6034, "step": 16021 }, { "epoch": 0.41, "grad_norm": 1.8012968301773071, "learning_rate": 1.3315548399200981e-05, "loss": 0.7293, "step": 16022 }, { "epoch": 0.41, "grad_norm": 1.512121319770813, "learning_rate": 1.3314765201157836e-05, "loss": 0.4368, "step": 16023 }, { "epoch": 0.41, "grad_norm": 3.02445125579834, "learning_rate": 1.3313981980271412e-05, "loss": 0.6182, "step": 16024 }, { "epoch": 0.41, "grad_norm": 2.404648542404175, "learning_rate": 1.3313198736547106e-05, "loss": 0.5006, "step": 16025 }, { "epoch": 0.41, "grad_norm": 1.7532446384429932, "learning_rate": 1.3312415469990318e-05, "loss": 0.6541, "step": 16026 }, { "epoch": 0.41, "grad_norm": 1.8561888933181763, "learning_rate": 1.3311632180606443e-05, "loss": 0.5613, "step": 16027 }, { "epoch": 0.41, "grad_norm": 1.359741449356079, "learning_rate": 1.3310848868400879e-05, "loss": 0.5979, "step": 16028 }, { "epoch": 0.41, "grad_norm": 1.4359291791915894, "learning_rate": 1.3310065533379024e-05, "loss": 0.6454, "step": 16029 }, { "epoch": 0.41, "grad_norm": 3.376323699951172, "learning_rate": 1.3309282175546278e-05, "loss": 0.7042, "step": 16030 }, { "epoch": 0.41, "grad_norm": 1.0751937627792358, "learning_rate": 1.330849879490804e-05, "loss": 0.4691, "step": 16031 }, { "epoch": 0.41, "grad_norm": 1.3563450574874878, "learning_rate": 1.3307715391469703e-05, "loss": 0.529, "step": 16032 }, { "epoch": 0.41, "grad_norm": 1.1683567762374878, "learning_rate": 1.330693196523667e-05, "loss": 0.4889, "step": 16033 }, { "epoch": 0.41, "grad_norm": 1.734403133392334, "learning_rate": 1.330614851621434e-05, "loss": 0.6785, "step": 16034 }, { "epoch": 0.41, "grad_norm": 2.2584877014160156, "learning_rate": 1.3305365044408114e-05, "loss": 0.5151, "step": 16035 }, { "epoch": 0.41, "grad_norm": 1.3670620918273926, "learning_rate": 1.3304581549823388e-05, "loss": 0.6092, "step": 16036 }, { "epoch": 0.41, "grad_norm": 3.4935476779937744, "learning_rate": 1.330379803246556e-05, "loss": 0.7463, "step": 16037 }, { "epoch": 0.41, "grad_norm": 1.5263774394989014, "learning_rate": 1.3303014492340035e-05, "loss": 0.6344, "step": 16038 }, { "epoch": 0.41, "grad_norm": 3.666187047958374, "learning_rate": 1.3302230929452207e-05, "loss": 0.6084, "step": 16039 }, { "epoch": 0.41, "grad_norm": 1.8883187770843506, "learning_rate": 1.3301447343807476e-05, "loss": 0.7004, "step": 16040 }, { "epoch": 0.41, "grad_norm": 1.3794745206832886, "learning_rate": 1.3300663735411247e-05, "loss": 0.4485, "step": 16041 }, { "epoch": 0.41, "grad_norm": 1.9200260639190674, "learning_rate": 1.3299880104268917e-05, "loss": 0.5796, "step": 16042 }, { "epoch": 0.41, "grad_norm": 1.6270442008972168, "learning_rate": 1.3299096450385887e-05, "loss": 0.5943, "step": 16043 }, { "epoch": 0.41, "grad_norm": 2.364752769470215, "learning_rate": 1.3298312773767554e-05, "loss": 0.592, "step": 16044 }, { "epoch": 0.41, "grad_norm": 2.808499336242676, "learning_rate": 1.3297529074419322e-05, "loss": 0.5314, "step": 16045 }, { "epoch": 0.41, "grad_norm": 2.448990821838379, "learning_rate": 1.3296745352346596e-05, "loss": 0.6118, "step": 16046 }, { "epoch": 0.41, "grad_norm": 3.0820558071136475, "learning_rate": 1.3295961607554766e-05, "loss": 0.6667, "step": 16047 }, { "epoch": 0.41, "grad_norm": 1.3576935529708862, "learning_rate": 1.3295177840049245e-05, "loss": 0.7013, "step": 16048 }, { "epoch": 0.41, "grad_norm": 3.0619049072265625, "learning_rate": 1.3294394049835424e-05, "loss": 0.5191, "step": 16049 }, { "epoch": 0.41, "grad_norm": 1.7028100490570068, "learning_rate": 1.329361023691871e-05, "loss": 0.5627, "step": 16050 }, { "epoch": 0.41, "grad_norm": 1.8249613046646118, "learning_rate": 1.3292826401304505e-05, "loss": 0.5339, "step": 16051 }, { "epoch": 0.41, "grad_norm": 5.570461750030518, "learning_rate": 1.3292042542998208e-05, "loss": 0.8089, "step": 16052 }, { "epoch": 0.41, "grad_norm": 3.1421988010406494, "learning_rate": 1.3291258662005225e-05, "loss": 0.4889, "step": 16053 }, { "epoch": 0.41, "grad_norm": 1.0020256042480469, "learning_rate": 1.3290474758330952e-05, "loss": 0.6003, "step": 16054 }, { "epoch": 0.41, "grad_norm": 1.5401488542556763, "learning_rate": 1.3289690831980794e-05, "loss": 0.7681, "step": 16055 }, { "epoch": 0.41, "grad_norm": 2.4938347339630127, "learning_rate": 1.3288906882960155e-05, "loss": 0.6753, "step": 16056 }, { "epoch": 0.41, "grad_norm": 1.948067545890808, "learning_rate": 1.3288122911274434e-05, "loss": 0.6246, "step": 16057 }, { "epoch": 0.41, "grad_norm": 2.338862895965576, "learning_rate": 1.328733891692904e-05, "loss": 0.6705, "step": 16058 }, { "epoch": 0.41, "grad_norm": 1.3997087478637695, "learning_rate": 1.3286554899929367e-05, "loss": 0.4893, "step": 16059 }, { "epoch": 0.41, "grad_norm": 2.1589996814727783, "learning_rate": 1.3285770860280826e-05, "loss": 0.7513, "step": 16060 }, { "epoch": 0.41, "grad_norm": 1.6790111064910889, "learning_rate": 1.3284986797988815e-05, "loss": 0.6865, "step": 16061 }, { "epoch": 0.41, "grad_norm": 1.231642246246338, "learning_rate": 1.3284202713058736e-05, "loss": 0.6698, "step": 16062 }, { "epoch": 0.41, "grad_norm": 2.6479568481445312, "learning_rate": 1.3283418605496e-05, "loss": 0.6622, "step": 16063 }, { "epoch": 0.41, "grad_norm": 3.4903781414031982, "learning_rate": 1.3282634475306003e-05, "loss": 0.5305, "step": 16064 }, { "epoch": 0.41, "grad_norm": 3.0463380813598633, "learning_rate": 1.3281850322494151e-05, "loss": 0.6361, "step": 16065 }, { "epoch": 0.41, "grad_norm": 1.643062949180603, "learning_rate": 1.328106614706585e-05, "loss": 0.675, "step": 16066 }, { "epoch": 0.41, "grad_norm": 1.2958741188049316, "learning_rate": 1.3280281949026504e-05, "loss": 0.4396, "step": 16067 }, { "epoch": 0.41, "grad_norm": 1.6406655311584473, "learning_rate": 1.3279497728381511e-05, "loss": 0.5157, "step": 16068 }, { "epoch": 0.41, "grad_norm": 3.0321977138519287, "learning_rate": 1.3278713485136283e-05, "loss": 0.6469, "step": 16069 }, { "epoch": 0.41, "grad_norm": 2.5128700733184814, "learning_rate": 1.3277929219296223e-05, "loss": 0.6488, "step": 16070 }, { "epoch": 0.41, "grad_norm": 3.727640151977539, "learning_rate": 1.3277144930866733e-05, "loss": 0.5248, "step": 16071 }, { "epoch": 0.41, "grad_norm": 2.569368362426758, "learning_rate": 1.327636061985322e-05, "loss": 0.5401, "step": 16072 }, { "epoch": 0.41, "grad_norm": 2.4119114875793457, "learning_rate": 1.3275576286261085e-05, "loss": 0.5486, "step": 16073 }, { "epoch": 0.41, "grad_norm": 2.865710973739624, "learning_rate": 1.3274791930095736e-05, "loss": 0.5513, "step": 16074 }, { "epoch": 0.41, "grad_norm": 1.2973591089248657, "learning_rate": 1.3274007551362583e-05, "loss": 0.4871, "step": 16075 }, { "epoch": 0.41, "grad_norm": 1.1753708124160767, "learning_rate": 1.3273223150067023e-05, "loss": 0.5087, "step": 16076 }, { "epoch": 0.41, "grad_norm": 2.8955705165863037, "learning_rate": 1.3272438726214467e-05, "loss": 0.44, "step": 16077 }, { "epoch": 0.41, "grad_norm": 1.5581110715866089, "learning_rate": 1.327165427981032e-05, "loss": 0.515, "step": 16078 }, { "epoch": 0.41, "grad_norm": 7.810736656188965, "learning_rate": 1.3270869810859986e-05, "loss": 0.6204, "step": 16079 }, { "epoch": 0.41, "grad_norm": 6.467879295349121, "learning_rate": 1.3270085319368871e-05, "loss": 0.6763, "step": 16080 }, { "epoch": 0.41, "grad_norm": 3.2830393314361572, "learning_rate": 1.3269300805342384e-05, "loss": 0.5771, "step": 16081 }, { "epoch": 0.41, "grad_norm": 3.503375768661499, "learning_rate": 1.326851626878593e-05, "loss": 0.6581, "step": 16082 }, { "epoch": 0.41, "grad_norm": 1.5230767726898193, "learning_rate": 1.3267731709704913e-05, "loss": 0.622, "step": 16083 }, { "epoch": 0.41, "grad_norm": 1.5706244707107544, "learning_rate": 1.3266947128104746e-05, "loss": 0.5155, "step": 16084 }, { "epoch": 0.41, "grad_norm": 1.596053957939148, "learning_rate": 1.3266162523990828e-05, "loss": 0.5766, "step": 16085 }, { "epoch": 0.41, "grad_norm": 2.54414701461792, "learning_rate": 1.3265377897368572e-05, "loss": 0.5187, "step": 16086 }, { "epoch": 0.41, "grad_norm": 5.712452411651611, "learning_rate": 1.3264593248243384e-05, "loss": 0.6485, "step": 16087 }, { "epoch": 0.41, "grad_norm": 1.5004175901412964, "learning_rate": 1.3263808576620667e-05, "loss": 0.582, "step": 16088 }, { "epoch": 0.41, "grad_norm": 2.0042624473571777, "learning_rate": 1.3263023882505836e-05, "loss": 0.556, "step": 16089 }, { "epoch": 0.41, "grad_norm": 4.131579399108887, "learning_rate": 1.3262239165904291e-05, "loss": 0.7288, "step": 16090 }, { "epoch": 0.41, "grad_norm": 4.526731491088867, "learning_rate": 1.3261454426821445e-05, "loss": 0.6499, "step": 16091 }, { "epoch": 0.41, "grad_norm": 4.13587760925293, "learning_rate": 1.3260669665262704e-05, "loss": 0.4859, "step": 16092 }, { "epoch": 0.41, "grad_norm": 1.8835065364837646, "learning_rate": 1.3259884881233476e-05, "loss": 0.5951, "step": 16093 }, { "epoch": 0.41, "grad_norm": 1.3285399675369263, "learning_rate": 1.325910007473917e-05, "loss": 0.5699, "step": 16094 }, { "epoch": 0.41, "grad_norm": 3.841641426086426, "learning_rate": 1.3258315245785194e-05, "loss": 0.7867, "step": 16095 }, { "epoch": 0.41, "grad_norm": 3.9612491130828857, "learning_rate": 1.3257530394376956e-05, "loss": 0.678, "step": 16096 }, { "epoch": 0.41, "grad_norm": 3.1382012367248535, "learning_rate": 1.3256745520519862e-05, "loss": 0.6523, "step": 16097 }, { "epoch": 0.41, "grad_norm": 1.7593599557876587, "learning_rate": 1.3255960624219327e-05, "loss": 0.7896, "step": 16098 }, { "epoch": 0.41, "grad_norm": 6.4238786697387695, "learning_rate": 1.3255175705480758e-05, "loss": 0.5395, "step": 16099 }, { "epoch": 0.41, "grad_norm": 3.320403814315796, "learning_rate": 1.3254390764309562e-05, "loss": 0.6405, "step": 16100 }, { "epoch": 0.41, "grad_norm": 2.599518299102783, "learning_rate": 1.325360580071115e-05, "loss": 0.5827, "step": 16101 }, { "epoch": 0.41, "grad_norm": 1.4568867683410645, "learning_rate": 1.325282081469093e-05, "loss": 0.5384, "step": 16102 }, { "epoch": 0.41, "grad_norm": 1.3029563426971436, "learning_rate": 1.3252035806254311e-05, "loss": 0.4715, "step": 16103 }, { "epoch": 0.41, "grad_norm": 1.1070849895477295, "learning_rate": 1.3251250775406707e-05, "loss": 0.4843, "step": 16104 }, { "epoch": 0.41, "grad_norm": 1.6286479234695435, "learning_rate": 1.3250465722153525e-05, "loss": 0.5334, "step": 16105 }, { "epoch": 0.41, "grad_norm": 3.0944089889526367, "learning_rate": 1.3249680646500175e-05, "loss": 0.7393, "step": 16106 }, { "epoch": 0.41, "grad_norm": 1.634320616722107, "learning_rate": 1.3248895548452066e-05, "loss": 0.6592, "step": 16107 }, { "epoch": 0.41, "grad_norm": 1.4112471342086792, "learning_rate": 1.3248110428014613e-05, "loss": 0.5802, "step": 16108 }, { "epoch": 0.41, "grad_norm": 1.839905023574829, "learning_rate": 1.3247325285193223e-05, "loss": 0.5204, "step": 16109 }, { "epoch": 0.41, "grad_norm": 3.771667957305908, "learning_rate": 1.3246540119993303e-05, "loss": 0.544, "step": 16110 }, { "epoch": 0.41, "grad_norm": 1.5015854835510254, "learning_rate": 1.3245754932420274e-05, "loss": 0.6132, "step": 16111 }, { "epoch": 0.41, "grad_norm": 9.803589820861816, "learning_rate": 1.3244969722479537e-05, "loss": 0.8189, "step": 16112 }, { "epoch": 0.41, "grad_norm": 2.7473742961883545, "learning_rate": 1.324418449017651e-05, "loss": 0.5621, "step": 16113 }, { "epoch": 0.41, "grad_norm": 1.9982572793960571, "learning_rate": 1.3243399235516601e-05, "loss": 0.5178, "step": 16114 }, { "epoch": 0.41, "grad_norm": 2.324026346206665, "learning_rate": 1.3242613958505222e-05, "loss": 0.5133, "step": 16115 }, { "epoch": 0.41, "grad_norm": 2.690854549407959, "learning_rate": 1.3241828659147784e-05, "loss": 0.6084, "step": 16116 }, { "epoch": 0.41, "grad_norm": 1.101768136024475, "learning_rate": 1.32410433374497e-05, "loss": 0.5618, "step": 16117 }, { "epoch": 0.41, "grad_norm": 1.4508708715438843, "learning_rate": 1.3240257993416383e-05, "loss": 0.5504, "step": 16118 }, { "epoch": 0.41, "grad_norm": 2.183729410171509, "learning_rate": 1.3239472627053242e-05, "loss": 0.4576, "step": 16119 }, { "epoch": 0.41, "grad_norm": 4.430200099945068, "learning_rate": 1.3238687238365693e-05, "loss": 0.4764, "step": 16120 }, { "epoch": 0.41, "grad_norm": 1.704955816268921, "learning_rate": 1.3237901827359144e-05, "loss": 0.5631, "step": 16121 }, { "epoch": 0.41, "grad_norm": 2.796640396118164, "learning_rate": 1.3237116394039008e-05, "loss": 0.6579, "step": 16122 }, { "epoch": 0.41, "grad_norm": 1.2110320329666138, "learning_rate": 1.3236330938410703e-05, "loss": 0.5696, "step": 16123 }, { "epoch": 0.41, "grad_norm": 3.1039111614227295, "learning_rate": 1.323554546047964e-05, "loss": 0.5878, "step": 16124 }, { "epoch": 0.41, "grad_norm": 1.987860083580017, "learning_rate": 1.3234759960251227e-05, "loss": 0.6986, "step": 16125 }, { "epoch": 0.41, "grad_norm": 4.925377368927002, "learning_rate": 1.323397443773088e-05, "loss": 0.6335, "step": 16126 }, { "epoch": 0.41, "grad_norm": 1.5108921527862549, "learning_rate": 1.3233188892924016e-05, "loss": 0.5337, "step": 16127 }, { "epoch": 0.41, "grad_norm": 1.9267674684524536, "learning_rate": 1.3232403325836045e-05, "loss": 0.7212, "step": 16128 }, { "epoch": 0.41, "grad_norm": 1.1602721214294434, "learning_rate": 1.323161773647238e-05, "loss": 0.5065, "step": 16129 }, { "epoch": 0.41, "grad_norm": 1.4938021898269653, "learning_rate": 1.3230832124838436e-05, "loss": 0.4783, "step": 16130 }, { "epoch": 0.41, "grad_norm": 2.586374282836914, "learning_rate": 1.3230046490939625e-05, "loss": 0.7703, "step": 16131 }, { "epoch": 0.41, "grad_norm": 1.5239399671554565, "learning_rate": 1.3229260834781367e-05, "loss": 0.5838, "step": 16132 }, { "epoch": 0.41, "grad_norm": 2.287982702255249, "learning_rate": 1.3228475156369069e-05, "loss": 0.6079, "step": 16133 }, { "epoch": 0.41, "grad_norm": 2.5731866359710693, "learning_rate": 1.3227689455708147e-05, "loss": 0.4856, "step": 16134 }, { "epoch": 0.41, "grad_norm": 1.4659148454666138, "learning_rate": 1.3226903732804022e-05, "loss": 0.508, "step": 16135 }, { "epoch": 0.41, "grad_norm": 1.5255889892578125, "learning_rate": 1.3226117987662099e-05, "loss": 0.6185, "step": 16136 }, { "epoch": 0.41, "grad_norm": 3.1196277141571045, "learning_rate": 1.3225332220287804e-05, "loss": 0.657, "step": 16137 }, { "epoch": 0.41, "grad_norm": 1.5447591543197632, "learning_rate": 1.322454643068654e-05, "loss": 0.7267, "step": 16138 }, { "epoch": 0.41, "grad_norm": 2.4321463108062744, "learning_rate": 1.3223760618863728e-05, "loss": 0.8668, "step": 16139 }, { "epoch": 0.41, "grad_norm": 2.4577553272247314, "learning_rate": 1.3222974784824787e-05, "loss": 0.4608, "step": 16140 }, { "epoch": 0.41, "grad_norm": 2.3533709049224854, "learning_rate": 1.3222188928575124e-05, "loss": 0.594, "step": 16141 }, { "epoch": 0.41, "grad_norm": 2.279690742492676, "learning_rate": 1.3221403050120163e-05, "loss": 0.6876, "step": 16142 }, { "epoch": 0.41, "grad_norm": 1.4255380630493164, "learning_rate": 1.3220617149465316e-05, "loss": 0.5017, "step": 16143 }, { "epoch": 0.41, "grad_norm": 1.854986548423767, "learning_rate": 1.3219831226615996e-05, "loss": 0.5441, "step": 16144 }, { "epoch": 0.41, "grad_norm": 2.0969438552856445, "learning_rate": 1.3219045281577624e-05, "loss": 0.5586, "step": 16145 }, { "epoch": 0.41, "grad_norm": 4.736588478088379, "learning_rate": 1.3218259314355612e-05, "loss": 0.7547, "step": 16146 }, { "epoch": 0.41, "grad_norm": 1.2904951572418213, "learning_rate": 1.3217473324955384e-05, "loss": 0.6435, "step": 16147 }, { "epoch": 0.41, "grad_norm": 1.4629333019256592, "learning_rate": 1.3216687313382347e-05, "loss": 0.5141, "step": 16148 }, { "epoch": 0.41, "grad_norm": 2.6318469047546387, "learning_rate": 1.3215901279641925e-05, "loss": 0.5114, "step": 16149 }, { "epoch": 0.41, "grad_norm": 1.617210865020752, "learning_rate": 1.3215115223739527e-05, "loss": 0.7113, "step": 16150 }, { "epoch": 0.41, "grad_norm": 2.123544931411743, "learning_rate": 1.321432914568058e-05, "loss": 0.6507, "step": 16151 }, { "epoch": 0.41, "grad_norm": 1.06902015209198, "learning_rate": 1.3213543045470495e-05, "loss": 0.4546, "step": 16152 }, { "epoch": 0.41, "grad_norm": 1.4942080974578857, "learning_rate": 1.3212756923114686e-05, "loss": 0.5644, "step": 16153 }, { "epoch": 0.41, "grad_norm": 1.2321419715881348, "learning_rate": 1.3211970778618578e-05, "loss": 0.4678, "step": 16154 }, { "epoch": 0.41, "grad_norm": 1.1457439661026, "learning_rate": 1.3211184611987587e-05, "loss": 0.6375, "step": 16155 }, { "epoch": 0.41, "grad_norm": 4.021166801452637, "learning_rate": 1.3210398423227126e-05, "loss": 0.6751, "step": 16156 }, { "epoch": 0.41, "grad_norm": 2.7949278354644775, "learning_rate": 1.320961221234262e-05, "loss": 0.451, "step": 16157 }, { "epoch": 0.41, "grad_norm": 1.915594220161438, "learning_rate": 1.320882597933948e-05, "loss": 0.4828, "step": 16158 }, { "epoch": 0.41, "grad_norm": 1.6224533319473267, "learning_rate": 1.3208039724223126e-05, "loss": 0.5498, "step": 16159 }, { "epoch": 0.41, "grad_norm": 2.4049746990203857, "learning_rate": 1.320725344699898e-05, "loss": 0.7053, "step": 16160 }, { "epoch": 0.41, "grad_norm": 3.297454357147217, "learning_rate": 1.3206467147672455e-05, "loss": 0.6907, "step": 16161 }, { "epoch": 0.41, "grad_norm": 2.6086666584014893, "learning_rate": 1.320568082624898e-05, "loss": 0.6531, "step": 16162 }, { "epoch": 0.41, "grad_norm": 1.4714765548706055, "learning_rate": 1.3204894482733958e-05, "loss": 0.5135, "step": 16163 }, { "epoch": 0.41, "grad_norm": 1.2955379486083984, "learning_rate": 1.3204108117132823e-05, "loss": 0.585, "step": 16164 }, { "epoch": 0.41, "grad_norm": 4.14393424987793, "learning_rate": 1.3203321729450985e-05, "loss": 0.4657, "step": 16165 }, { "epoch": 0.41, "grad_norm": 0.8482614755630493, "learning_rate": 1.3202535319693865e-05, "loss": 0.4617, "step": 16166 }, { "epoch": 0.41, "grad_norm": 1.7620056867599487, "learning_rate": 1.3201748887866888e-05, "loss": 0.5988, "step": 16167 }, { "epoch": 0.41, "grad_norm": 3.1668500900268555, "learning_rate": 1.3200962433975466e-05, "loss": 0.598, "step": 16168 }, { "epoch": 0.41, "grad_norm": 3.401304006576538, "learning_rate": 1.3200175958025025e-05, "loss": 0.5432, "step": 16169 }, { "epoch": 0.41, "grad_norm": 1.6750472784042358, "learning_rate": 1.3199389460020977e-05, "loss": 0.5669, "step": 16170 }, { "epoch": 0.41, "grad_norm": 2.115166187286377, "learning_rate": 1.319860293996875e-05, "loss": 0.4852, "step": 16171 }, { "epoch": 0.41, "grad_norm": 1.8764091730117798, "learning_rate": 1.3197816397873762e-05, "loss": 0.5834, "step": 16172 }, { "epoch": 0.41, "grad_norm": 1.662965178489685, "learning_rate": 1.319702983374143e-05, "loss": 0.5742, "step": 16173 }, { "epoch": 0.41, "grad_norm": 2.8504037857055664, "learning_rate": 1.3196243247577178e-05, "loss": 0.5573, "step": 16174 }, { "epoch": 0.41, "grad_norm": 8.266140937805176, "learning_rate": 1.3195456639386426e-05, "loss": 0.8117, "step": 16175 }, { "epoch": 0.41, "grad_norm": 2.7659106254577637, "learning_rate": 1.3194670009174596e-05, "loss": 0.6727, "step": 16176 }, { "epoch": 0.41, "grad_norm": 1.8746641874313354, "learning_rate": 1.3193883356947103e-05, "loss": 0.6069, "step": 16177 }, { "epoch": 0.41, "grad_norm": 1.591013789176941, "learning_rate": 1.3193096682709374e-05, "loss": 0.606, "step": 16178 }, { "epoch": 0.41, "grad_norm": 14.474678993225098, "learning_rate": 1.319230998646683e-05, "loss": 0.631, "step": 16179 }, { "epoch": 0.41, "grad_norm": 1.4755722284317017, "learning_rate": 1.319152326822489e-05, "loss": 0.5642, "step": 16180 }, { "epoch": 0.41, "grad_norm": 5.095833778381348, "learning_rate": 1.3190736527988981e-05, "loss": 0.5062, "step": 16181 }, { "epoch": 0.41, "grad_norm": 1.310636281967163, "learning_rate": 1.3189949765764516e-05, "loss": 0.4284, "step": 16182 }, { "epoch": 0.41, "grad_norm": 3.166369676589966, "learning_rate": 1.318916298155692e-05, "loss": 0.5463, "step": 16183 }, { "epoch": 0.41, "grad_norm": 1.9478983879089355, "learning_rate": 1.3188376175371622e-05, "loss": 0.4184, "step": 16184 }, { "epoch": 0.41, "grad_norm": 2.5970230102539062, "learning_rate": 1.3187589347214034e-05, "loss": 0.5725, "step": 16185 }, { "epoch": 0.41, "grad_norm": 1.7422007322311401, "learning_rate": 1.3186802497089585e-05, "loss": 0.5862, "step": 16186 }, { "epoch": 0.41, "grad_norm": 1.1706048250198364, "learning_rate": 1.3186015625003692e-05, "loss": 0.6331, "step": 16187 }, { "epoch": 0.41, "grad_norm": 1.5001599788665771, "learning_rate": 1.3185228730961785e-05, "loss": 0.5749, "step": 16188 }, { "epoch": 0.41, "grad_norm": 3.14815092086792, "learning_rate": 1.3184441814969282e-05, "loss": 0.6032, "step": 16189 }, { "epoch": 0.41, "grad_norm": 2.393554210662842, "learning_rate": 1.3183654877031605e-05, "loss": 0.7763, "step": 16190 }, { "epoch": 0.41, "grad_norm": 1.099797248840332, "learning_rate": 1.318286791715418e-05, "loss": 0.5015, "step": 16191 }, { "epoch": 0.42, "grad_norm": 4.809372425079346, "learning_rate": 1.3182080935342428e-05, "loss": 0.6892, "step": 16192 }, { "epoch": 0.42, "grad_norm": 4.163505554199219, "learning_rate": 1.3181293931601775e-05, "loss": 0.7459, "step": 16193 }, { "epoch": 0.42, "grad_norm": 2.4683351516723633, "learning_rate": 1.318050690593764e-05, "loss": 0.5799, "step": 16194 }, { "epoch": 0.42, "grad_norm": 9.795742988586426, "learning_rate": 1.3179719858355452e-05, "loss": 0.6175, "step": 16195 }, { "epoch": 0.42, "grad_norm": 4.3710222244262695, "learning_rate": 1.3178932788860633e-05, "loss": 0.6698, "step": 16196 }, { "epoch": 0.42, "grad_norm": 1.5585381984710693, "learning_rate": 1.3178145697458604e-05, "loss": 0.595, "step": 16197 }, { "epoch": 0.42, "grad_norm": 1.3920998573303223, "learning_rate": 1.3177358584154795e-05, "loss": 0.5202, "step": 16198 }, { "epoch": 0.42, "grad_norm": 2.9452614784240723, "learning_rate": 1.3176571448954626e-05, "loss": 0.6186, "step": 16199 }, { "epoch": 0.42, "grad_norm": 1.2719331979751587, "learning_rate": 1.317578429186352e-05, "loss": 0.4993, "step": 16200 }, { "epoch": 0.42, "grad_norm": 1.1433268785476685, "learning_rate": 1.3174997112886905e-05, "loss": 0.5855, "step": 16201 }, { "epoch": 0.42, "grad_norm": 1.7636820077896118, "learning_rate": 1.3174209912030203e-05, "loss": 0.6357, "step": 16202 }, { "epoch": 0.42, "grad_norm": 1.7776355743408203, "learning_rate": 1.3173422689298843e-05, "loss": 0.4424, "step": 16203 }, { "epoch": 0.42, "grad_norm": 1.521438717842102, "learning_rate": 1.3172635444698246e-05, "loss": 0.6443, "step": 16204 }, { "epoch": 0.42, "grad_norm": 1.1858474016189575, "learning_rate": 1.317184817823384e-05, "loss": 0.5443, "step": 16205 }, { "epoch": 0.42, "grad_norm": 1.5279752016067505, "learning_rate": 1.3171060889911046e-05, "loss": 0.5266, "step": 16206 }, { "epoch": 0.42, "grad_norm": 2.648381233215332, "learning_rate": 1.3170273579735295e-05, "loss": 0.5747, "step": 16207 }, { "epoch": 0.42, "grad_norm": 1.781592607498169, "learning_rate": 1.3169486247712014e-05, "loss": 0.641, "step": 16208 }, { "epoch": 0.42, "grad_norm": 4.9508771896362305, "learning_rate": 1.316869889384662e-05, "loss": 0.5436, "step": 16209 }, { "epoch": 0.42, "grad_norm": 4.584483623504639, "learning_rate": 1.3167911518144546e-05, "loss": 0.7074, "step": 16210 }, { "epoch": 0.42, "grad_norm": 1.3020179271697998, "learning_rate": 1.3167124120611214e-05, "loss": 0.5317, "step": 16211 }, { "epoch": 0.42, "grad_norm": 8.267867088317871, "learning_rate": 1.3166336701252053e-05, "loss": 0.4832, "step": 16212 }, { "epoch": 0.42, "grad_norm": 4.553502082824707, "learning_rate": 1.3165549260072492e-05, "loss": 0.5864, "step": 16213 }, { "epoch": 0.42, "grad_norm": 3.9295456409454346, "learning_rate": 1.3164761797077949e-05, "loss": 0.5454, "step": 16214 }, { "epoch": 0.42, "grad_norm": 2.4100561141967773, "learning_rate": 1.3163974312273861e-05, "loss": 0.7599, "step": 16215 }, { "epoch": 0.42, "grad_norm": 1.2086093425750732, "learning_rate": 1.3163186805665647e-05, "loss": 0.6628, "step": 16216 }, { "epoch": 0.42, "grad_norm": 2.3540449142456055, "learning_rate": 1.3162399277258739e-05, "loss": 0.5706, "step": 16217 }, { "epoch": 0.42, "grad_norm": 1.336026906967163, "learning_rate": 1.3161611727058562e-05, "loss": 0.4821, "step": 16218 }, { "epoch": 0.42, "grad_norm": 8.176534652709961, "learning_rate": 1.316082415507054e-05, "loss": 0.7726, "step": 16219 }, { "epoch": 0.42, "grad_norm": 5.5949835777282715, "learning_rate": 1.3160036561300104e-05, "loss": 0.6796, "step": 16220 }, { "epoch": 0.42, "grad_norm": 1.59907865524292, "learning_rate": 1.3159248945752683e-05, "loss": 0.5857, "step": 16221 }, { "epoch": 0.42, "grad_norm": 0.8628712296485901, "learning_rate": 1.3158461308433705e-05, "loss": 0.4684, "step": 16222 }, { "epoch": 0.42, "grad_norm": 1.7662100791931152, "learning_rate": 1.3157673649348592e-05, "loss": 0.5115, "step": 16223 }, { "epoch": 0.42, "grad_norm": 1.786263346672058, "learning_rate": 1.315688596850278e-05, "loss": 0.6167, "step": 16224 }, { "epoch": 0.42, "grad_norm": 4.317847728729248, "learning_rate": 1.3156098265901691e-05, "loss": 0.775, "step": 16225 }, { "epoch": 0.42, "grad_norm": 1.9075977802276611, "learning_rate": 1.3155310541550754e-05, "loss": 0.586, "step": 16226 }, { "epoch": 0.42, "grad_norm": 1.703416109085083, "learning_rate": 1.3154522795455403e-05, "loss": 0.5378, "step": 16227 }, { "epoch": 0.42, "grad_norm": 4.688594341278076, "learning_rate": 1.315373502762106e-05, "loss": 0.7052, "step": 16228 }, { "epoch": 0.42, "grad_norm": 3.0070033073425293, "learning_rate": 1.315294723805316e-05, "loss": 0.5452, "step": 16229 }, { "epoch": 0.42, "grad_norm": 1.7358543872833252, "learning_rate": 1.3152159426757122e-05, "loss": 0.5641, "step": 16230 }, { "epoch": 0.42, "grad_norm": 1.5907976627349854, "learning_rate": 1.3151371593738386e-05, "loss": 0.6162, "step": 16231 }, { "epoch": 0.42, "grad_norm": 1.7586323022842407, "learning_rate": 1.3150583739002378e-05, "loss": 0.5384, "step": 16232 }, { "epoch": 0.42, "grad_norm": 12.152508735656738, "learning_rate": 1.3149795862554525e-05, "loss": 0.6598, "step": 16233 }, { "epoch": 0.42, "grad_norm": 1.1223900318145752, "learning_rate": 1.3149007964400258e-05, "loss": 0.4609, "step": 16234 }, { "epoch": 0.42, "grad_norm": 1.8094991445541382, "learning_rate": 1.3148220044545004e-05, "loss": 0.6268, "step": 16235 }, { "epoch": 0.42, "grad_norm": 2.161234140396118, "learning_rate": 1.3147432102994197e-05, "loss": 0.6466, "step": 16236 }, { "epoch": 0.42, "grad_norm": 1.8900865316390991, "learning_rate": 1.3146644139753266e-05, "loss": 0.4802, "step": 16237 }, { "epoch": 0.42, "grad_norm": 1.6635425090789795, "learning_rate": 1.3145856154827642e-05, "loss": 0.6668, "step": 16238 }, { "epoch": 0.42, "grad_norm": 1.4934052228927612, "learning_rate": 1.3145068148222753e-05, "loss": 0.5234, "step": 16239 }, { "epoch": 0.42, "grad_norm": 1.6953959465026855, "learning_rate": 1.3144280119944027e-05, "loss": 0.6132, "step": 16240 }, { "epoch": 0.42, "grad_norm": 1.1231848001480103, "learning_rate": 1.3143492069996902e-05, "loss": 0.5526, "step": 16241 }, { "epoch": 0.42, "grad_norm": 2.7183499336242676, "learning_rate": 1.3142703998386802e-05, "loss": 0.5347, "step": 16242 }, { "epoch": 0.42, "grad_norm": 4.286718368530273, "learning_rate": 1.314191590511916e-05, "loss": 0.5646, "step": 16243 }, { "epoch": 0.42, "grad_norm": 12.278230667114258, "learning_rate": 1.3141127790199409e-05, "loss": 0.6923, "step": 16244 }, { "epoch": 0.42, "grad_norm": 4.876790523529053, "learning_rate": 1.3140339653632976e-05, "loss": 0.7236, "step": 16245 }, { "epoch": 0.42, "grad_norm": 3.2305030822753906, "learning_rate": 1.31395514954253e-05, "loss": 0.5453, "step": 16246 }, { "epoch": 0.42, "grad_norm": 17.4434757232666, "learning_rate": 1.3138763315581804e-05, "loss": 0.622, "step": 16247 }, { "epoch": 0.42, "grad_norm": 2.2135603427886963, "learning_rate": 1.3137975114107923e-05, "loss": 0.5077, "step": 16248 }, { "epoch": 0.42, "grad_norm": 6.687023162841797, "learning_rate": 1.3137186891009092e-05, "loss": 0.6425, "step": 16249 }, { "epoch": 0.42, "grad_norm": 3.6503140926361084, "learning_rate": 1.3136398646290738e-05, "loss": 0.5638, "step": 16250 }, { "epoch": 0.42, "grad_norm": 1.8186476230621338, "learning_rate": 1.3135610379958296e-05, "loss": 0.6091, "step": 16251 }, { "epoch": 0.42, "grad_norm": 3.7551450729370117, "learning_rate": 1.3134822092017194e-05, "loss": 0.7649, "step": 16252 }, { "epoch": 0.42, "grad_norm": 1.8252311944961548, "learning_rate": 1.3134033782472872e-05, "loss": 0.5664, "step": 16253 }, { "epoch": 0.42, "grad_norm": 2.6700632572174072, "learning_rate": 1.3133245451330757e-05, "loss": 0.5526, "step": 16254 }, { "epoch": 0.42, "grad_norm": 2.475712299346924, "learning_rate": 1.3132457098596279e-05, "loss": 0.45, "step": 16255 }, { "epoch": 0.42, "grad_norm": 1.5215204954147339, "learning_rate": 1.313166872427488e-05, "loss": 0.669, "step": 16256 }, { "epoch": 0.42, "grad_norm": 3.715805768966675, "learning_rate": 1.3130880328371984e-05, "loss": 0.5197, "step": 16257 }, { "epoch": 0.42, "grad_norm": 2.1242213249206543, "learning_rate": 1.313009191089303e-05, "loss": 0.5572, "step": 16258 }, { "epoch": 0.42, "grad_norm": 3.865299701690674, "learning_rate": 1.3129303471843447e-05, "loss": 0.6292, "step": 16259 }, { "epoch": 0.42, "grad_norm": 1.857379674911499, "learning_rate": 1.3128515011228671e-05, "loss": 0.6591, "step": 16260 }, { "epoch": 0.42, "grad_norm": 1.6561522483825684, "learning_rate": 1.3127726529054137e-05, "loss": 0.6029, "step": 16261 }, { "epoch": 0.42, "grad_norm": 2.9215691089630127, "learning_rate": 1.3126938025325276e-05, "loss": 0.676, "step": 16262 }, { "epoch": 0.42, "grad_norm": 7.971997261047363, "learning_rate": 1.3126149500047522e-05, "loss": 0.6955, "step": 16263 }, { "epoch": 0.42, "grad_norm": 1.5188264846801758, "learning_rate": 1.312536095322631e-05, "loss": 0.4977, "step": 16264 }, { "epoch": 0.42, "grad_norm": 1.7947238683700562, "learning_rate": 1.3124572384867071e-05, "loss": 0.5767, "step": 16265 }, { "epoch": 0.42, "grad_norm": 1.2954341173171997, "learning_rate": 1.3123783794975247e-05, "loss": 0.524, "step": 16266 }, { "epoch": 0.42, "grad_norm": 3.120145082473755, "learning_rate": 1.3122995183556264e-05, "loss": 0.4582, "step": 16267 }, { "epoch": 0.42, "grad_norm": 4.01954460144043, "learning_rate": 1.3122206550615564e-05, "loss": 0.9442, "step": 16268 }, { "epoch": 0.42, "grad_norm": 1.750563621520996, "learning_rate": 1.3121417896158576e-05, "loss": 0.3869, "step": 16269 }, { "epoch": 0.42, "grad_norm": 3.445791721343994, "learning_rate": 1.3120629220190736e-05, "loss": 0.6801, "step": 16270 }, { "epoch": 0.42, "grad_norm": 1.6553128957748413, "learning_rate": 1.3119840522717481e-05, "loss": 0.6326, "step": 16271 }, { "epoch": 0.42, "grad_norm": 2.589062213897705, "learning_rate": 1.3119051803744243e-05, "loss": 0.842, "step": 16272 }, { "epoch": 0.42, "grad_norm": 1.0034743547439575, "learning_rate": 1.3118263063276466e-05, "loss": 0.589, "step": 16273 }, { "epoch": 0.42, "grad_norm": 2.947714328765869, "learning_rate": 1.3117474301319572e-05, "loss": 0.549, "step": 16274 }, { "epoch": 0.42, "grad_norm": 1.8933076858520508, "learning_rate": 1.3116685517879007e-05, "loss": 0.5246, "step": 16275 }, { "epoch": 0.42, "grad_norm": 1.783159852027893, "learning_rate": 1.3115896712960203e-05, "loss": 0.453, "step": 16276 }, { "epoch": 0.42, "grad_norm": 7.094086170196533, "learning_rate": 1.3115107886568596e-05, "loss": 0.5303, "step": 16277 }, { "epoch": 0.42, "grad_norm": 2.005812168121338, "learning_rate": 1.3114319038709624e-05, "loss": 0.6921, "step": 16278 }, { "epoch": 0.42, "grad_norm": 2.1410343647003174, "learning_rate": 1.311353016938872e-05, "loss": 0.5343, "step": 16279 }, { "epoch": 0.42, "grad_norm": 2.0531258583068848, "learning_rate": 1.3112741278611321e-05, "loss": 0.4293, "step": 16280 }, { "epoch": 0.42, "grad_norm": 3.2976181507110596, "learning_rate": 1.311195236638287e-05, "loss": 0.6233, "step": 16281 }, { "epoch": 0.42, "grad_norm": 1.0344983339309692, "learning_rate": 1.3111163432708792e-05, "loss": 0.6256, "step": 16282 }, { "epoch": 0.42, "grad_norm": 1.5931589603424072, "learning_rate": 1.3110374477594534e-05, "loss": 0.6797, "step": 16283 }, { "epoch": 0.42, "grad_norm": 1.803612470626831, "learning_rate": 1.3109585501045528e-05, "loss": 0.5817, "step": 16284 }, { "epoch": 0.42, "grad_norm": 2.4794163703918457, "learning_rate": 1.3108796503067214e-05, "loss": 0.5836, "step": 16285 }, { "epoch": 0.42, "grad_norm": 13.303414344787598, "learning_rate": 1.3108007483665027e-05, "loss": 0.576, "step": 16286 }, { "epoch": 0.42, "grad_norm": 1.4270384311676025, "learning_rate": 1.3107218442844403e-05, "loss": 0.589, "step": 16287 }, { "epoch": 0.42, "grad_norm": 1.3548904657363892, "learning_rate": 1.3106429380610783e-05, "loss": 0.5573, "step": 16288 }, { "epoch": 0.42, "grad_norm": 2.3187780380249023, "learning_rate": 1.3105640296969604e-05, "loss": 0.5173, "step": 16289 }, { "epoch": 0.42, "grad_norm": 3.6539833545684814, "learning_rate": 1.3104851191926302e-05, "loss": 0.6024, "step": 16290 }, { "epoch": 0.42, "grad_norm": 2.479363441467285, "learning_rate": 1.3104062065486316e-05, "loss": 0.7883, "step": 16291 }, { "epoch": 0.42, "grad_norm": 2.0715348720550537, "learning_rate": 1.3103272917655085e-05, "loss": 0.5886, "step": 16292 }, { "epoch": 0.42, "grad_norm": 1.4645254611968994, "learning_rate": 1.3102483748438048e-05, "loss": 0.5227, "step": 16293 }, { "epoch": 0.42, "grad_norm": 1.3467122316360474, "learning_rate": 1.3101694557840641e-05, "loss": 0.3421, "step": 16294 }, { "epoch": 0.42, "grad_norm": 1.9806610345840454, "learning_rate": 1.3100905345868305e-05, "loss": 0.5987, "step": 16295 }, { "epoch": 0.42, "grad_norm": 5.882818222045898, "learning_rate": 1.3100116112526476e-05, "loss": 0.6649, "step": 16296 }, { "epoch": 0.42, "grad_norm": 1.1301517486572266, "learning_rate": 1.3099326857820596e-05, "loss": 0.5985, "step": 16297 }, { "epoch": 0.42, "grad_norm": 2.153564214706421, "learning_rate": 1.3098537581756099e-05, "loss": 0.5808, "step": 16298 }, { "epoch": 0.42, "grad_norm": 6.173220634460449, "learning_rate": 1.309774828433843e-05, "loss": 0.5684, "step": 16299 }, { "epoch": 0.42, "grad_norm": 3.354975938796997, "learning_rate": 1.3096958965573028e-05, "loss": 0.8261, "step": 16300 }, { "epoch": 0.42, "grad_norm": 2.66292667388916, "learning_rate": 1.3096169625465327e-05, "loss": 0.5692, "step": 16301 }, { "epoch": 0.42, "grad_norm": 2.7233760356903076, "learning_rate": 1.3095380264020773e-05, "loss": 0.62, "step": 16302 }, { "epoch": 0.42, "grad_norm": 1.6478924751281738, "learning_rate": 1.3094590881244802e-05, "loss": 0.4941, "step": 16303 }, { "epoch": 0.42, "grad_norm": 1.7341208457946777, "learning_rate": 1.3093801477142854e-05, "loss": 0.6541, "step": 16304 }, { "epoch": 0.42, "grad_norm": 1.6860694885253906, "learning_rate": 1.3093012051720371e-05, "loss": 0.6565, "step": 16305 }, { "epoch": 0.42, "grad_norm": 3.666900157928467, "learning_rate": 1.309222260498279e-05, "loss": 0.5157, "step": 16306 }, { "epoch": 0.42, "grad_norm": 2.8194093704223633, "learning_rate": 1.3091433136935556e-05, "loss": 0.555, "step": 16307 }, { "epoch": 0.42, "grad_norm": 2.008096694946289, "learning_rate": 1.3090643647584106e-05, "loss": 0.4562, "step": 16308 }, { "epoch": 0.42, "grad_norm": 1.891950011253357, "learning_rate": 1.3089854136933885e-05, "loss": 0.5926, "step": 16309 }, { "epoch": 0.42, "grad_norm": 2.777510166168213, "learning_rate": 1.3089064604990324e-05, "loss": 0.6749, "step": 16310 }, { "epoch": 0.42, "grad_norm": 1.4548755884170532, "learning_rate": 1.3088275051758875e-05, "loss": 0.5447, "step": 16311 }, { "epoch": 0.42, "grad_norm": 2.018092155456543, "learning_rate": 1.3087485477244975e-05, "loss": 0.5078, "step": 16312 }, { "epoch": 0.42, "grad_norm": 2.117189407348633, "learning_rate": 1.3086695881454062e-05, "loss": 0.5746, "step": 16313 }, { "epoch": 0.42, "grad_norm": 1.3146321773529053, "learning_rate": 1.3085906264391581e-05, "loss": 0.5309, "step": 16314 }, { "epoch": 0.42, "grad_norm": 3.217212677001953, "learning_rate": 1.3085116626062973e-05, "loss": 0.5258, "step": 16315 }, { "epoch": 0.42, "grad_norm": 1.7791410684585571, "learning_rate": 1.3084326966473679e-05, "loss": 0.5612, "step": 16316 }, { "epoch": 0.42, "grad_norm": 2.739351272583008, "learning_rate": 1.3083537285629145e-05, "loss": 0.5323, "step": 16317 }, { "epoch": 0.42, "grad_norm": 1.7178394794464111, "learning_rate": 1.3082747583534805e-05, "loss": 0.5652, "step": 16318 }, { "epoch": 0.42, "grad_norm": 4.416489601135254, "learning_rate": 1.3081957860196109e-05, "loss": 0.8266, "step": 16319 }, { "epoch": 0.42, "grad_norm": 1.40420663356781, "learning_rate": 1.308116811561849e-05, "loss": 0.5595, "step": 16320 }, { "epoch": 0.42, "grad_norm": 2.275568962097168, "learning_rate": 1.3080378349807401e-05, "loss": 0.4998, "step": 16321 }, { "epoch": 0.42, "grad_norm": 1.2401591539382935, "learning_rate": 1.307958856276828e-05, "loss": 0.5894, "step": 16322 }, { "epoch": 0.42, "grad_norm": 2.232248544692993, "learning_rate": 1.3078798754506565e-05, "loss": 0.4922, "step": 16323 }, { "epoch": 0.42, "grad_norm": 7.212932586669922, "learning_rate": 1.3078008925027708e-05, "loss": 0.8633, "step": 16324 }, { "epoch": 0.42, "grad_norm": 1.3533506393432617, "learning_rate": 1.3077219074337144e-05, "loss": 0.5649, "step": 16325 }, { "epoch": 0.42, "grad_norm": 1.655330777168274, "learning_rate": 1.3076429202440322e-05, "loss": 0.6373, "step": 16326 }, { "epoch": 0.42, "grad_norm": 3.028419017791748, "learning_rate": 1.307563930934268e-05, "loss": 0.4106, "step": 16327 }, { "epoch": 0.42, "grad_norm": 1.9378691911697388, "learning_rate": 1.3074849395049664e-05, "loss": 0.7737, "step": 16328 }, { "epoch": 0.42, "grad_norm": 1.4357290267944336, "learning_rate": 1.3074059459566723e-05, "loss": 0.6719, "step": 16329 }, { "epoch": 0.42, "grad_norm": 1.4245021343231201, "learning_rate": 1.3073269502899291e-05, "loss": 0.5255, "step": 16330 }, { "epoch": 0.42, "grad_norm": 4.367660999298096, "learning_rate": 1.3072479525052817e-05, "loss": 0.4855, "step": 16331 }, { "epoch": 0.42, "grad_norm": 3.1578595638275146, "learning_rate": 1.3071689526032745e-05, "loss": 0.5267, "step": 16332 }, { "epoch": 0.42, "grad_norm": 2.275942325592041, "learning_rate": 1.3070899505844516e-05, "loss": 0.5103, "step": 16333 }, { "epoch": 0.42, "grad_norm": 1.3210946321487427, "learning_rate": 1.3070109464493583e-05, "loss": 0.462, "step": 16334 }, { "epoch": 0.42, "grad_norm": 2.392756938934326, "learning_rate": 1.3069319401985378e-05, "loss": 0.5601, "step": 16335 }, { "epoch": 0.42, "grad_norm": 2.657609462738037, "learning_rate": 1.3068529318325357e-05, "loss": 0.6642, "step": 16336 }, { "epoch": 0.42, "grad_norm": 2.4578025341033936, "learning_rate": 1.3067739213518958e-05, "loss": 0.3693, "step": 16337 }, { "epoch": 0.42, "grad_norm": 1.2646501064300537, "learning_rate": 1.3066949087571627e-05, "loss": 0.4792, "step": 16338 }, { "epoch": 0.42, "grad_norm": 6.590734958648682, "learning_rate": 1.3066158940488812e-05, "loss": 0.6563, "step": 16339 }, { "epoch": 0.42, "grad_norm": 1.3784797191619873, "learning_rate": 1.306536877227595e-05, "loss": 0.537, "step": 16340 }, { "epoch": 0.42, "grad_norm": 1.8677444458007812, "learning_rate": 1.3064578582938498e-05, "loss": 0.5582, "step": 16341 }, { "epoch": 0.42, "grad_norm": 5.875698089599609, "learning_rate": 1.3063788372481894e-05, "loss": 0.5973, "step": 16342 }, { "epoch": 0.42, "grad_norm": 2.764163017272949, "learning_rate": 1.3062998140911588e-05, "loss": 0.5281, "step": 16343 }, { "epoch": 0.42, "grad_norm": 2.109558343887329, "learning_rate": 1.3062207888233019e-05, "loss": 0.5878, "step": 16344 }, { "epoch": 0.42, "grad_norm": 9.199172973632812, "learning_rate": 1.3061417614451637e-05, "loss": 0.5854, "step": 16345 }, { "epoch": 0.42, "grad_norm": 3.095756769180298, "learning_rate": 1.3060627319572892e-05, "loss": 0.5677, "step": 16346 }, { "epoch": 0.42, "grad_norm": 2.0166661739349365, "learning_rate": 1.3059837003602222e-05, "loss": 0.4675, "step": 16347 }, { "epoch": 0.42, "grad_norm": 2.9386775493621826, "learning_rate": 1.305904666654508e-05, "loss": 0.8012, "step": 16348 }, { "epoch": 0.42, "grad_norm": 2.3441359996795654, "learning_rate": 1.3058256308406912e-05, "loss": 0.6796, "step": 16349 }, { "epoch": 0.42, "grad_norm": 5.624656677246094, "learning_rate": 1.305746592919316e-05, "loss": 0.7768, "step": 16350 }, { "epoch": 0.42, "grad_norm": 1.9801703691482544, "learning_rate": 1.3056675528909275e-05, "loss": 0.5331, "step": 16351 }, { "epoch": 0.42, "grad_norm": 1.399544596672058, "learning_rate": 1.3055885107560701e-05, "loss": 0.4573, "step": 16352 }, { "epoch": 0.42, "grad_norm": 1.6935501098632812, "learning_rate": 1.305509466515289e-05, "loss": 0.5492, "step": 16353 }, { "epoch": 0.42, "grad_norm": 1.2778044939041138, "learning_rate": 1.3054304201691283e-05, "loss": 0.5932, "step": 16354 }, { "epoch": 0.42, "grad_norm": 1.4252315759658813, "learning_rate": 1.3053513717181335e-05, "loss": 0.6667, "step": 16355 }, { "epoch": 0.42, "grad_norm": 2.375966787338257, "learning_rate": 1.3052723211628484e-05, "loss": 0.6813, "step": 16356 }, { "epoch": 0.42, "grad_norm": 4.115445613861084, "learning_rate": 1.3051932685038183e-05, "loss": 0.6664, "step": 16357 }, { "epoch": 0.42, "grad_norm": 3.8878092765808105, "learning_rate": 1.3051142137415882e-05, "loss": 0.6049, "step": 16358 }, { "epoch": 0.42, "grad_norm": 2.1219475269317627, "learning_rate": 1.305035156876702e-05, "loss": 0.5821, "step": 16359 }, { "epoch": 0.42, "grad_norm": 1.519579291343689, "learning_rate": 1.304956097909706e-05, "loss": 0.606, "step": 16360 }, { "epoch": 0.42, "grad_norm": 1.4561878442764282, "learning_rate": 1.3048770368411436e-05, "loss": 0.5896, "step": 16361 }, { "epoch": 0.42, "grad_norm": 1.4039175510406494, "learning_rate": 1.3047979736715605e-05, "loss": 0.6035, "step": 16362 }, { "epoch": 0.42, "grad_norm": 1.4289090633392334, "learning_rate": 1.304718908401501e-05, "loss": 0.5148, "step": 16363 }, { "epoch": 0.42, "grad_norm": 1.3608800172805786, "learning_rate": 1.3046398410315105e-05, "loss": 0.5203, "step": 16364 }, { "epoch": 0.42, "grad_norm": 1.6413449048995972, "learning_rate": 1.3045607715621334e-05, "loss": 0.5158, "step": 16365 }, { "epoch": 0.42, "grad_norm": 5.262510299682617, "learning_rate": 1.304481699993915e-05, "loss": 0.6165, "step": 16366 }, { "epoch": 0.42, "grad_norm": 1.5489850044250488, "learning_rate": 1.3044026263273998e-05, "loss": 0.6637, "step": 16367 }, { "epoch": 0.42, "grad_norm": 1.5400856733322144, "learning_rate": 1.304323550563133e-05, "loss": 0.5282, "step": 16368 }, { "epoch": 0.42, "grad_norm": 2.098081111907959, "learning_rate": 1.3042444727016594e-05, "loss": 0.613, "step": 16369 }, { "epoch": 0.42, "grad_norm": 1.4110504388809204, "learning_rate": 1.3041653927435243e-05, "loss": 0.5486, "step": 16370 }, { "epoch": 0.42, "grad_norm": 2.08384108543396, "learning_rate": 1.304086310689272e-05, "loss": 0.7057, "step": 16371 }, { "epoch": 0.42, "grad_norm": 1.2771751880645752, "learning_rate": 1.3040072265394485e-05, "loss": 0.5385, "step": 16372 }, { "epoch": 0.42, "grad_norm": 2.607670783996582, "learning_rate": 1.3039281402945977e-05, "loss": 0.6146, "step": 16373 }, { "epoch": 0.42, "grad_norm": 4.163516998291016, "learning_rate": 1.303849051955265e-05, "loss": 0.5702, "step": 16374 }, { "epoch": 0.42, "grad_norm": 1.7324107885360718, "learning_rate": 1.3037699615219961e-05, "loss": 0.6601, "step": 16375 }, { "epoch": 0.42, "grad_norm": 1.4156997203826904, "learning_rate": 1.303690868995335e-05, "loss": 0.5485, "step": 16376 }, { "epoch": 0.42, "grad_norm": 2.090888261795044, "learning_rate": 1.3036117743758276e-05, "loss": 0.6566, "step": 16377 }, { "epoch": 0.42, "grad_norm": 1.7557703256607056, "learning_rate": 1.3035326776640183e-05, "loss": 0.6157, "step": 16378 }, { "epoch": 0.42, "grad_norm": 1.8649346828460693, "learning_rate": 1.3034535788604526e-05, "loss": 0.6267, "step": 16379 }, { "epoch": 0.42, "grad_norm": 2.9458065032958984, "learning_rate": 1.3033744779656754e-05, "loss": 0.5564, "step": 16380 }, { "epoch": 0.42, "grad_norm": 3.159885883331299, "learning_rate": 1.3032953749802317e-05, "loss": 0.5706, "step": 16381 }, { "epoch": 0.42, "grad_norm": 2.4620063304901123, "learning_rate": 1.3032162699046674e-05, "loss": 0.4599, "step": 16382 }, { "epoch": 0.42, "grad_norm": 3.2421135902404785, "learning_rate": 1.3031371627395265e-05, "loss": 0.5763, "step": 16383 }, { "epoch": 0.42, "grad_norm": 1.1561423540115356, "learning_rate": 1.3030580534853553e-05, "loss": 0.4366, "step": 16384 }, { "epoch": 0.42, "grad_norm": 5.688292026519775, "learning_rate": 1.3029789421426977e-05, "loss": 0.7788, "step": 16385 }, { "epoch": 0.42, "grad_norm": 2.179468870162964, "learning_rate": 1.3028998287120999e-05, "loss": 0.5489, "step": 16386 }, { "epoch": 0.42, "grad_norm": 0.8975594639778137, "learning_rate": 1.3028207131941071e-05, "loss": 0.5295, "step": 16387 }, { "epoch": 0.42, "grad_norm": 1.6980129480361938, "learning_rate": 1.3027415955892637e-05, "loss": 0.5758, "step": 16388 }, { "epoch": 0.42, "grad_norm": 1.58530592918396, "learning_rate": 1.3026624758981159e-05, "loss": 0.5273, "step": 16389 }, { "epoch": 0.42, "grad_norm": 1.6725882291793823, "learning_rate": 1.3025833541212081e-05, "loss": 0.6491, "step": 16390 }, { "epoch": 0.42, "grad_norm": 1.8484001159667969, "learning_rate": 1.302504230259086e-05, "loss": 0.5794, "step": 16391 }, { "epoch": 0.42, "grad_norm": 3.8919968605041504, "learning_rate": 1.3024251043122948e-05, "loss": 0.711, "step": 16392 }, { "epoch": 0.42, "grad_norm": 16.57990264892578, "learning_rate": 1.3023459762813797e-05, "loss": 0.5715, "step": 16393 }, { "epoch": 0.42, "grad_norm": 1.594642996788025, "learning_rate": 1.3022668461668866e-05, "loss": 0.7033, "step": 16394 }, { "epoch": 0.42, "grad_norm": 3.353050708770752, "learning_rate": 1.3021877139693597e-05, "loss": 0.6407, "step": 16395 }, { "epoch": 0.42, "grad_norm": 6.139402866363525, "learning_rate": 1.302108579689345e-05, "loss": 0.702, "step": 16396 }, { "epoch": 0.42, "grad_norm": 2.822692632675171, "learning_rate": 1.3020294433273881e-05, "loss": 0.6639, "step": 16397 }, { "epoch": 0.42, "grad_norm": 1.9631783962249756, "learning_rate": 1.3019503048840338e-05, "loss": 0.6467, "step": 16398 }, { "epoch": 0.42, "grad_norm": 1.6875513792037964, "learning_rate": 1.301871164359828e-05, "loss": 0.483, "step": 16399 }, { "epoch": 0.42, "grad_norm": 7.264743804931641, "learning_rate": 1.3017920217553153e-05, "loss": 0.7046, "step": 16400 }, { "epoch": 0.42, "grad_norm": 0.8238412141799927, "learning_rate": 1.3017128770710416e-05, "loss": 0.3838, "step": 16401 }, { "epoch": 0.42, "grad_norm": 2.107973098754883, "learning_rate": 1.3016337303075526e-05, "loss": 0.5462, "step": 16402 }, { "epoch": 0.42, "grad_norm": 8.594632148742676, "learning_rate": 1.3015545814653931e-05, "loss": 0.546, "step": 16403 }, { "epoch": 0.42, "grad_norm": 5.2596259117126465, "learning_rate": 1.3014754305451094e-05, "loss": 0.7593, "step": 16404 }, { "epoch": 0.42, "grad_norm": 6.661108016967773, "learning_rate": 1.3013962775472462e-05, "loss": 0.6159, "step": 16405 }, { "epoch": 0.42, "grad_norm": 1.4671963453292847, "learning_rate": 1.3013171224723492e-05, "loss": 0.4541, "step": 16406 }, { "epoch": 0.42, "grad_norm": 3.2231602668762207, "learning_rate": 1.3012379653209637e-05, "loss": 0.5393, "step": 16407 }, { "epoch": 0.42, "grad_norm": 4.275794506072998, "learning_rate": 1.3011588060936355e-05, "loss": 0.6475, "step": 16408 }, { "epoch": 0.42, "grad_norm": 6.971919536590576, "learning_rate": 1.3010796447909101e-05, "loss": 0.7027, "step": 16409 }, { "epoch": 0.42, "grad_norm": 7.727889537811279, "learning_rate": 1.301000481413333e-05, "loss": 0.5892, "step": 16410 }, { "epoch": 0.42, "grad_norm": 1.6469645500183105, "learning_rate": 1.3009213159614497e-05, "loss": 0.5541, "step": 16411 }, { "epoch": 0.42, "grad_norm": 1.9699255228042603, "learning_rate": 1.3008421484358055e-05, "loss": 0.4889, "step": 16412 }, { "epoch": 0.42, "grad_norm": 3.2964365482330322, "learning_rate": 1.3007629788369465e-05, "loss": 0.6892, "step": 16413 }, { "epoch": 0.42, "grad_norm": 1.5588200092315674, "learning_rate": 1.3006838071654179e-05, "loss": 0.6828, "step": 16414 }, { "epoch": 0.42, "grad_norm": 6.501062393188477, "learning_rate": 1.3006046334217652e-05, "loss": 0.5838, "step": 16415 }, { "epoch": 0.42, "grad_norm": 1.247026801109314, "learning_rate": 1.3005254576065347e-05, "loss": 0.5203, "step": 16416 }, { "epoch": 0.42, "grad_norm": 3.693265199661255, "learning_rate": 1.3004462797202712e-05, "loss": 0.7045, "step": 16417 }, { "epoch": 0.42, "grad_norm": 1.7875581979751587, "learning_rate": 1.3003670997635209e-05, "loss": 0.7138, "step": 16418 }, { "epoch": 0.42, "grad_norm": 2.1333065032958984, "learning_rate": 1.300287917736829e-05, "loss": 0.5209, "step": 16419 }, { "epoch": 0.42, "grad_norm": 1.541386604309082, "learning_rate": 1.3002087336407415e-05, "loss": 0.651, "step": 16420 }, { "epoch": 0.42, "grad_norm": 1.636492371559143, "learning_rate": 1.3001295474758042e-05, "loss": 0.5178, "step": 16421 }, { "epoch": 0.42, "grad_norm": 3.833594799041748, "learning_rate": 1.3000503592425625e-05, "loss": 0.7786, "step": 16422 }, { "epoch": 0.42, "grad_norm": 1.3807955980300903, "learning_rate": 1.2999711689415624e-05, "loss": 0.5735, "step": 16423 }, { "epoch": 0.42, "grad_norm": 2.381274938583374, "learning_rate": 1.299891976573349e-05, "loss": 0.5444, "step": 16424 }, { "epoch": 0.42, "grad_norm": 1.709566593170166, "learning_rate": 1.299812782138469e-05, "loss": 0.5735, "step": 16425 }, { "epoch": 0.42, "grad_norm": 1.4813753366470337, "learning_rate": 1.2997335856374676e-05, "loss": 0.5251, "step": 16426 }, { "epoch": 0.42, "grad_norm": 3.3211052417755127, "learning_rate": 1.2996543870708905e-05, "loss": 0.7394, "step": 16427 }, { "epoch": 0.42, "grad_norm": 1.3933963775634766, "learning_rate": 1.2995751864392836e-05, "loss": 0.5798, "step": 16428 }, { "epoch": 0.42, "grad_norm": 2.2410411834716797, "learning_rate": 1.2994959837431927e-05, "loss": 0.5823, "step": 16429 }, { "epoch": 0.42, "grad_norm": 3.8929691314697266, "learning_rate": 1.2994167789831638e-05, "loss": 0.5866, "step": 16430 }, { "epoch": 0.42, "grad_norm": 1.6538773775100708, "learning_rate": 1.2993375721597424e-05, "loss": 0.5144, "step": 16431 }, { "epoch": 0.42, "grad_norm": 7.449024200439453, "learning_rate": 1.2992583632734745e-05, "loss": 0.5839, "step": 16432 }, { "epoch": 0.42, "grad_norm": 1.7631756067276, "learning_rate": 1.2991791523249062e-05, "loss": 0.7135, "step": 16433 }, { "epoch": 0.42, "grad_norm": 1.5548192262649536, "learning_rate": 1.2990999393145828e-05, "loss": 0.5236, "step": 16434 }, { "epoch": 0.42, "grad_norm": 1.2214183807373047, "learning_rate": 1.299020724243051e-05, "loss": 0.6173, "step": 16435 }, { "epoch": 0.42, "grad_norm": 9.458608627319336, "learning_rate": 1.2989415071108557e-05, "loss": 0.7121, "step": 16436 }, { "epoch": 0.42, "grad_norm": 5.875394344329834, "learning_rate": 1.2988622879185437e-05, "loss": 0.5869, "step": 16437 }, { "epoch": 0.42, "grad_norm": 3.2253103256225586, "learning_rate": 1.2987830666666605e-05, "loss": 0.5917, "step": 16438 }, { "epoch": 0.42, "grad_norm": 5.871885776519775, "learning_rate": 1.2987038433557522e-05, "loss": 0.504, "step": 16439 }, { "epoch": 0.42, "grad_norm": 1.3662934303283691, "learning_rate": 1.2986246179863647e-05, "loss": 0.539, "step": 16440 }, { "epoch": 0.42, "grad_norm": 1.504287600517273, "learning_rate": 1.2985453905590437e-05, "loss": 0.6428, "step": 16441 }, { "epoch": 0.42, "grad_norm": 1.452409267425537, "learning_rate": 1.2984661610743357e-05, "loss": 0.7335, "step": 16442 }, { "epoch": 0.42, "grad_norm": 10.451252937316895, "learning_rate": 1.2983869295327862e-05, "loss": 0.5592, "step": 16443 }, { "epoch": 0.42, "grad_norm": 1.8737714290618896, "learning_rate": 1.2983076959349415e-05, "loss": 0.5321, "step": 16444 }, { "epoch": 0.42, "grad_norm": 3.18271803855896, "learning_rate": 1.2982284602813478e-05, "loss": 0.444, "step": 16445 }, { "epoch": 0.42, "grad_norm": 1.411750078201294, "learning_rate": 1.2981492225725508e-05, "loss": 0.41, "step": 16446 }, { "epoch": 0.42, "grad_norm": 4.907325267791748, "learning_rate": 1.2980699828090966e-05, "loss": 0.7585, "step": 16447 }, { "epoch": 0.42, "grad_norm": 1.6061314344406128, "learning_rate": 1.2979907409915313e-05, "loss": 0.6367, "step": 16448 }, { "epoch": 0.42, "grad_norm": 4.396011829376221, "learning_rate": 1.297911497120401e-05, "loss": 0.6776, "step": 16449 }, { "epoch": 0.42, "grad_norm": 0.9499678015708923, "learning_rate": 1.297832251196252e-05, "loss": 0.5966, "step": 16450 }, { "epoch": 0.42, "grad_norm": 1.3346762657165527, "learning_rate": 1.2977530032196301e-05, "loss": 0.3499, "step": 16451 }, { "epoch": 0.42, "grad_norm": 4.378175735473633, "learning_rate": 1.2976737531910818e-05, "loss": 0.482, "step": 16452 }, { "epoch": 0.42, "grad_norm": 1.399383544921875, "learning_rate": 1.2975945011111526e-05, "loss": 0.5644, "step": 16453 }, { "epoch": 0.42, "grad_norm": 3.8575010299682617, "learning_rate": 1.2975152469803893e-05, "loss": 0.6248, "step": 16454 }, { "epoch": 0.42, "grad_norm": 1.3015916347503662, "learning_rate": 1.2974359907993378e-05, "loss": 0.6533, "step": 16455 }, { "epoch": 0.42, "grad_norm": 4.815296649932861, "learning_rate": 1.2973567325685442e-05, "loss": 0.5642, "step": 16456 }, { "epoch": 0.42, "grad_norm": 2.6845712661743164, "learning_rate": 1.2972774722885552e-05, "loss": 0.5326, "step": 16457 }, { "epoch": 0.42, "grad_norm": 1.0389777421951294, "learning_rate": 1.2971982099599165e-05, "loss": 0.4936, "step": 16458 }, { "epoch": 0.42, "grad_norm": 1.958808183670044, "learning_rate": 1.2971189455831743e-05, "loss": 0.6614, "step": 16459 }, { "epoch": 0.42, "grad_norm": 2.9525320529937744, "learning_rate": 1.297039679158875e-05, "loss": 0.6989, "step": 16460 }, { "epoch": 0.42, "grad_norm": 2.966569662094116, "learning_rate": 1.2969604106875649e-05, "loss": 0.5944, "step": 16461 }, { "epoch": 0.42, "grad_norm": 1.993523120880127, "learning_rate": 1.2968811401697905e-05, "loss": 0.5501, "step": 16462 }, { "epoch": 0.42, "grad_norm": 4.693492412567139, "learning_rate": 1.2968018676060974e-05, "loss": 0.7623, "step": 16463 }, { "epoch": 0.42, "grad_norm": 1.2025537490844727, "learning_rate": 1.2967225929970324e-05, "loss": 0.5007, "step": 16464 }, { "epoch": 0.42, "grad_norm": 10.461287498474121, "learning_rate": 1.2966433163431418e-05, "loss": 0.4739, "step": 16465 }, { "epoch": 0.42, "grad_norm": 2.8007540702819824, "learning_rate": 1.2965640376449715e-05, "loss": 0.6668, "step": 16466 }, { "epoch": 0.42, "grad_norm": 1.5736809968948364, "learning_rate": 1.2964847569030688e-05, "loss": 0.5589, "step": 16467 }, { "epoch": 0.42, "grad_norm": 5.606605529785156, "learning_rate": 1.2964054741179788e-05, "loss": 0.5388, "step": 16468 }, { "epoch": 0.42, "grad_norm": 3.5669593811035156, "learning_rate": 1.296326189290249e-05, "loss": 0.6027, "step": 16469 }, { "epoch": 0.42, "grad_norm": 2.286740303039551, "learning_rate": 1.2962469024204248e-05, "loss": 0.6131, "step": 16470 }, { "epoch": 0.42, "grad_norm": 3.057234525680542, "learning_rate": 1.2961676135090535e-05, "loss": 0.607, "step": 16471 }, { "epoch": 0.42, "grad_norm": 1.9944682121276855, "learning_rate": 1.2960883225566807e-05, "loss": 0.5549, "step": 16472 }, { "epoch": 0.42, "grad_norm": 1.9877910614013672, "learning_rate": 1.2960090295638535e-05, "loss": 0.5874, "step": 16473 }, { "epoch": 0.42, "grad_norm": 2.014671564102173, "learning_rate": 1.2959297345311178e-05, "loss": 0.5506, "step": 16474 }, { "epoch": 0.42, "grad_norm": 1.7974536418914795, "learning_rate": 1.2958504374590205e-05, "loss": 0.6773, "step": 16475 }, { "epoch": 0.42, "grad_norm": 2.7395975589752197, "learning_rate": 1.2957711383481078e-05, "loss": 0.5075, "step": 16476 }, { "epoch": 0.42, "grad_norm": 1.6006108522415161, "learning_rate": 1.295691837198926e-05, "loss": 0.5222, "step": 16477 }, { "epoch": 0.42, "grad_norm": 1.3321635723114014, "learning_rate": 1.295612534012022e-05, "loss": 0.5123, "step": 16478 }, { "epoch": 0.42, "grad_norm": 1.5301955938339233, "learning_rate": 1.2955332287879423e-05, "loss": 0.5364, "step": 16479 }, { "epoch": 0.42, "grad_norm": 1.278406023979187, "learning_rate": 1.295453921527233e-05, "loss": 0.5789, "step": 16480 }, { "epoch": 0.42, "grad_norm": 1.6627469062805176, "learning_rate": 1.2953746122304414e-05, "loss": 0.6209, "step": 16481 }, { "epoch": 0.42, "grad_norm": 1.4550046920776367, "learning_rate": 1.2952953008981129e-05, "loss": 0.4954, "step": 16482 }, { "epoch": 0.42, "grad_norm": 3.253964424133301, "learning_rate": 1.2952159875307953e-05, "loss": 0.6779, "step": 16483 }, { "epoch": 0.42, "grad_norm": 1.4924179315567017, "learning_rate": 1.295136672129034e-05, "loss": 0.5242, "step": 16484 }, { "epoch": 0.42, "grad_norm": 3.5327670574188232, "learning_rate": 1.2950573546933764e-05, "loss": 0.6479, "step": 16485 }, { "epoch": 0.42, "grad_norm": 4.8134002685546875, "learning_rate": 1.294978035224369e-05, "loss": 0.7199, "step": 16486 }, { "epoch": 0.42, "grad_norm": 1.5802305936813354, "learning_rate": 1.2948987137225582e-05, "loss": 0.482, "step": 16487 }, { "epoch": 0.42, "grad_norm": 1.3156452178955078, "learning_rate": 1.2948193901884909e-05, "loss": 0.5101, "step": 16488 }, { "epoch": 0.42, "grad_norm": 3.918843984603882, "learning_rate": 1.2947400646227133e-05, "loss": 0.6976, "step": 16489 }, { "epoch": 0.42, "grad_norm": 2.4189584255218506, "learning_rate": 1.2946607370257725e-05, "loss": 0.5645, "step": 16490 }, { "epoch": 0.42, "grad_norm": 3.0592477321624756, "learning_rate": 1.294581407398215e-05, "loss": 0.7279, "step": 16491 }, { "epoch": 0.42, "grad_norm": 1.559117317199707, "learning_rate": 1.2945020757405876e-05, "loss": 0.4603, "step": 16492 }, { "epoch": 0.42, "grad_norm": 1.6235285997390747, "learning_rate": 1.294422742053437e-05, "loss": 0.5818, "step": 16493 }, { "epoch": 0.42, "grad_norm": 5.446388244628906, "learning_rate": 1.2943434063373094e-05, "loss": 0.5196, "step": 16494 }, { "epoch": 0.42, "grad_norm": 2.093860149383545, "learning_rate": 1.2942640685927523e-05, "loss": 0.656, "step": 16495 }, { "epoch": 0.42, "grad_norm": 3.0900683403015137, "learning_rate": 1.2941847288203122e-05, "loss": 0.625, "step": 16496 }, { "epoch": 0.42, "grad_norm": 4.164198398590088, "learning_rate": 1.2941053870205355e-05, "loss": 0.5708, "step": 16497 }, { "epoch": 0.42, "grad_norm": 1.4088869094848633, "learning_rate": 1.2940260431939696e-05, "loss": 0.506, "step": 16498 }, { "epoch": 0.42, "grad_norm": 1.4478954076766968, "learning_rate": 1.2939466973411607e-05, "loss": 0.5657, "step": 16499 }, { "epoch": 0.42, "grad_norm": 1.5502722263336182, "learning_rate": 1.293867349462656e-05, "loss": 0.6441, "step": 16500 }, { "epoch": 0.42, "grad_norm": 1.8548818826675415, "learning_rate": 1.2937879995590018e-05, "loss": 0.6875, "step": 16501 }, { "epoch": 0.42, "grad_norm": 1.2099794149398804, "learning_rate": 1.2937086476307457e-05, "loss": 0.5264, "step": 16502 }, { "epoch": 0.42, "grad_norm": 1.1325547695159912, "learning_rate": 1.2936292936784338e-05, "loss": 0.6602, "step": 16503 }, { "epoch": 0.42, "grad_norm": 1.1393121480941772, "learning_rate": 1.2935499377026134e-05, "loss": 0.5858, "step": 16504 }, { "epoch": 0.42, "grad_norm": 1.9028667211532593, "learning_rate": 1.2934705797038314e-05, "loss": 0.4997, "step": 16505 }, { "epoch": 0.42, "grad_norm": 1.8954142332077026, "learning_rate": 1.2933912196826343e-05, "loss": 0.7159, "step": 16506 }, { "epoch": 0.42, "grad_norm": 1.8184964656829834, "learning_rate": 1.2933118576395693e-05, "loss": 0.701, "step": 16507 }, { "epoch": 0.42, "grad_norm": 1.276716947555542, "learning_rate": 1.2932324935751834e-05, "loss": 0.6135, "step": 16508 }, { "epoch": 0.42, "grad_norm": 2.612785816192627, "learning_rate": 1.2931531274900233e-05, "loss": 0.7006, "step": 16509 }, { "epoch": 0.42, "grad_norm": 1.5764278173446655, "learning_rate": 1.293073759384636e-05, "loss": 0.4241, "step": 16510 }, { "epoch": 0.42, "grad_norm": 2.2723143100738525, "learning_rate": 1.2929943892595684e-05, "loss": 0.4755, "step": 16511 }, { "epoch": 0.42, "grad_norm": 1.8139755725860596, "learning_rate": 1.292915017115368e-05, "loss": 0.6418, "step": 16512 }, { "epoch": 0.42, "grad_norm": 5.1180500984191895, "learning_rate": 1.2928356429525808e-05, "loss": 0.5325, "step": 16513 }, { "epoch": 0.42, "grad_norm": 1.7318577766418457, "learning_rate": 1.2927562667717544e-05, "loss": 0.4459, "step": 16514 }, { "epoch": 0.42, "grad_norm": 1.977126955986023, "learning_rate": 1.2926768885734362e-05, "loss": 0.6082, "step": 16515 }, { "epoch": 0.42, "grad_norm": 1.697320818901062, "learning_rate": 1.2925975083581723e-05, "loss": 0.6609, "step": 16516 }, { "epoch": 0.42, "grad_norm": 2.1848368644714355, "learning_rate": 1.2925181261265101e-05, "loss": 0.573, "step": 16517 }, { "epoch": 0.42, "grad_norm": 1.2337654829025269, "learning_rate": 1.2924387418789973e-05, "loss": 0.6107, "step": 16518 }, { "epoch": 0.42, "grad_norm": 1.06984281539917, "learning_rate": 1.2923593556161798e-05, "loss": 0.504, "step": 16519 }, { "epoch": 0.42, "grad_norm": 2.1858596801757812, "learning_rate": 1.2922799673386057e-05, "loss": 0.6048, "step": 16520 }, { "epoch": 0.42, "grad_norm": 2.079763650894165, "learning_rate": 1.2922005770468214e-05, "loss": 0.6788, "step": 16521 }, { "epoch": 0.42, "grad_norm": 3.8371381759643555, "learning_rate": 1.2921211847413744e-05, "loss": 0.6221, "step": 16522 }, { "epoch": 0.42, "grad_norm": 1.5666396617889404, "learning_rate": 1.292041790422812e-05, "loss": 0.4481, "step": 16523 }, { "epoch": 0.42, "grad_norm": 3.3150265216827393, "learning_rate": 1.2919623940916806e-05, "loss": 0.6799, "step": 16524 }, { "epoch": 0.42, "grad_norm": 2.1582272052764893, "learning_rate": 1.2918829957485283e-05, "loss": 0.6237, "step": 16525 }, { "epoch": 0.42, "grad_norm": 1.153334617614746, "learning_rate": 1.2918035953939015e-05, "loss": 0.5916, "step": 16526 }, { "epoch": 0.42, "grad_norm": 1.3155601024627686, "learning_rate": 1.2917241930283477e-05, "loss": 0.5992, "step": 16527 }, { "epoch": 0.42, "grad_norm": 2.3115074634552, "learning_rate": 1.291644788652414e-05, "loss": 0.5614, "step": 16528 }, { "epoch": 0.42, "grad_norm": 1.2742546796798706, "learning_rate": 1.2915653822666476e-05, "loss": 0.5232, "step": 16529 }, { "epoch": 0.42, "grad_norm": 4.016646862030029, "learning_rate": 1.291485973871596e-05, "loss": 0.7002, "step": 16530 }, { "epoch": 0.42, "grad_norm": 1.218124508857727, "learning_rate": 1.2914065634678057e-05, "loss": 0.7167, "step": 16531 }, { "epoch": 0.42, "grad_norm": 1.4815168380737305, "learning_rate": 1.291327151055825e-05, "loss": 0.6981, "step": 16532 }, { "epoch": 0.42, "grad_norm": 2.916264772415161, "learning_rate": 1.2912477366362001e-05, "loss": 0.4757, "step": 16533 }, { "epoch": 0.42, "grad_norm": 1.730326771736145, "learning_rate": 1.2911683202094791e-05, "loss": 0.6698, "step": 16534 }, { "epoch": 0.42, "grad_norm": 1.6665420532226562, "learning_rate": 1.2910889017762091e-05, "loss": 0.5952, "step": 16535 }, { "epoch": 0.42, "grad_norm": 1.5643508434295654, "learning_rate": 1.291009481336937e-05, "loss": 0.641, "step": 16536 }, { "epoch": 0.42, "grad_norm": 4.623631000518799, "learning_rate": 1.2909300588922105e-05, "loss": 0.7113, "step": 16537 }, { "epoch": 0.42, "grad_norm": 1.0519235134124756, "learning_rate": 1.2908506344425766e-05, "loss": 0.6697, "step": 16538 }, { "epoch": 0.42, "grad_norm": 1.2457115650177002, "learning_rate": 1.2907712079885832e-05, "loss": 0.5318, "step": 16539 }, { "epoch": 0.42, "grad_norm": 3.5373780727386475, "learning_rate": 1.2906917795307772e-05, "loss": 0.4754, "step": 16540 }, { "epoch": 0.42, "grad_norm": 2.078728437423706, "learning_rate": 1.290612349069706e-05, "loss": 0.7832, "step": 16541 }, { "epoch": 0.42, "grad_norm": 1.589813470840454, "learning_rate": 1.2905329166059172e-05, "loss": 0.5181, "step": 16542 }, { "epoch": 0.42, "grad_norm": 3.0801758766174316, "learning_rate": 1.2904534821399578e-05, "loss": 0.7288, "step": 16543 }, { "epoch": 0.42, "grad_norm": 1.5793888568878174, "learning_rate": 1.290374045672376e-05, "loss": 0.6227, "step": 16544 }, { "epoch": 0.42, "grad_norm": 1.4465020895004272, "learning_rate": 1.2902946072037181e-05, "loss": 0.6536, "step": 16545 }, { "epoch": 0.42, "grad_norm": 4.114627361297607, "learning_rate": 1.2902151667345323e-05, "loss": 0.8273, "step": 16546 }, { "epoch": 0.42, "grad_norm": 3.3886189460754395, "learning_rate": 1.2901357242653662e-05, "loss": 0.696, "step": 16547 }, { "epoch": 0.42, "grad_norm": 2.7470080852508545, "learning_rate": 1.2900562797967667e-05, "loss": 0.5432, "step": 16548 }, { "epoch": 0.42, "grad_norm": 1.4706093072891235, "learning_rate": 1.2899768333292817e-05, "loss": 0.5286, "step": 16549 }, { "epoch": 0.42, "grad_norm": 1.3608494997024536, "learning_rate": 1.2898973848634584e-05, "loss": 0.624, "step": 16550 }, { "epoch": 0.42, "grad_norm": 1.5578910112380981, "learning_rate": 1.2898179343998447e-05, "loss": 0.5755, "step": 16551 }, { "epoch": 0.42, "grad_norm": 3.55315899848938, "learning_rate": 1.2897384819389877e-05, "loss": 0.7347, "step": 16552 }, { "epoch": 0.42, "grad_norm": 1.6317143440246582, "learning_rate": 1.289659027481435e-05, "loss": 0.7182, "step": 16553 }, { "epoch": 0.42, "grad_norm": 4.464321136474609, "learning_rate": 1.2895795710277346e-05, "loss": 0.719, "step": 16554 }, { "epoch": 0.42, "grad_norm": 2.217531681060791, "learning_rate": 1.2895001125784334e-05, "loss": 0.6787, "step": 16555 }, { "epoch": 0.42, "grad_norm": 1.0339425802230835, "learning_rate": 1.2894206521340796e-05, "loss": 0.4427, "step": 16556 }, { "epoch": 0.42, "grad_norm": 1.7627332210540771, "learning_rate": 1.2893411896952201e-05, "loss": 0.6881, "step": 16557 }, { "epoch": 0.42, "grad_norm": 1.891730546951294, "learning_rate": 1.2892617252624032e-05, "loss": 0.5617, "step": 16558 }, { "epoch": 0.42, "grad_norm": 1.5391156673431396, "learning_rate": 1.2891822588361763e-05, "loss": 0.5348, "step": 16559 }, { "epoch": 0.42, "grad_norm": 2.202636480331421, "learning_rate": 1.2891027904170867e-05, "loss": 0.6501, "step": 16560 }, { "epoch": 0.42, "grad_norm": 1.3955556154251099, "learning_rate": 1.2890233200056823e-05, "loss": 0.6824, "step": 16561 }, { "epoch": 0.42, "grad_norm": 5.230324745178223, "learning_rate": 1.288943847602511e-05, "loss": 0.5317, "step": 16562 }, { "epoch": 0.42, "grad_norm": 2.5454368591308594, "learning_rate": 1.2888643732081198e-05, "loss": 0.5668, "step": 16563 }, { "epoch": 0.42, "grad_norm": 2.6084561347961426, "learning_rate": 1.2887848968230574e-05, "loss": 0.7282, "step": 16564 }, { "epoch": 0.42, "grad_norm": 2.381516933441162, "learning_rate": 1.2887054184478704e-05, "loss": 0.7429, "step": 16565 }, { "epoch": 0.42, "grad_norm": 1.06074059009552, "learning_rate": 1.2886259380831077e-05, "loss": 0.5207, "step": 16566 }, { "epoch": 0.42, "grad_norm": 2.1242878437042236, "learning_rate": 1.2885464557293156e-05, "loss": 0.5671, "step": 16567 }, { "epoch": 0.42, "grad_norm": 1.733526587486267, "learning_rate": 1.2884669713870432e-05, "loss": 0.613, "step": 16568 }, { "epoch": 0.42, "grad_norm": 1.678318738937378, "learning_rate": 1.2883874850568374e-05, "loss": 0.4784, "step": 16569 }, { "epoch": 0.42, "grad_norm": 1.3097106218338013, "learning_rate": 1.2883079967392463e-05, "loss": 0.5502, "step": 16570 }, { "epoch": 0.42, "grad_norm": 3.9631059169769287, "learning_rate": 1.2882285064348174e-05, "loss": 0.6735, "step": 16571 }, { "epoch": 0.42, "grad_norm": 1.802281141281128, "learning_rate": 1.2881490141440992e-05, "loss": 0.5176, "step": 16572 }, { "epoch": 0.42, "grad_norm": 1.4185304641723633, "learning_rate": 1.2880695198676385e-05, "loss": 0.513, "step": 16573 }, { "epoch": 0.42, "grad_norm": 1.2554651498794556, "learning_rate": 1.287990023605984e-05, "loss": 0.5668, "step": 16574 }, { "epoch": 0.42, "grad_norm": 6.428916931152344, "learning_rate": 1.2879105253596828e-05, "loss": 0.5007, "step": 16575 }, { "epoch": 0.42, "grad_norm": 3.315066337585449, "learning_rate": 1.2878310251292836e-05, "loss": 0.6364, "step": 16576 }, { "epoch": 0.42, "grad_norm": 1.8299766778945923, "learning_rate": 1.2877515229153337e-05, "loss": 0.6602, "step": 16577 }, { "epoch": 0.42, "grad_norm": 2.39741849899292, "learning_rate": 1.2876720187183812e-05, "loss": 0.7675, "step": 16578 }, { "epoch": 0.42, "grad_norm": 1.0999313592910767, "learning_rate": 1.2875925125389734e-05, "loss": 0.5321, "step": 16579 }, { "epoch": 0.42, "grad_norm": 2.796231508255005, "learning_rate": 1.287513004377659e-05, "loss": 0.5423, "step": 16580 }, { "epoch": 0.42, "grad_norm": 2.5516655445098877, "learning_rate": 1.2874334942349856e-05, "loss": 0.7268, "step": 16581 }, { "epoch": 0.43, "grad_norm": 1.7347626686096191, "learning_rate": 1.2873539821115011e-05, "loss": 0.6777, "step": 16582 }, { "epoch": 0.43, "grad_norm": 1.3821210861206055, "learning_rate": 1.2872744680077536e-05, "loss": 0.6534, "step": 16583 }, { "epoch": 0.43, "grad_norm": 1.8937972784042358, "learning_rate": 1.2871949519242908e-05, "loss": 0.4766, "step": 16584 }, { "epoch": 0.43, "grad_norm": 1.85288405418396, "learning_rate": 1.2871154338616609e-05, "loss": 0.5327, "step": 16585 }, { "epoch": 0.43, "grad_norm": 2.4753048419952393, "learning_rate": 1.2870359138204117e-05, "loss": 0.5884, "step": 16586 }, { "epoch": 0.43, "grad_norm": 2.0297927856445312, "learning_rate": 1.2869563918010915e-05, "loss": 0.6287, "step": 16587 }, { "epoch": 0.43, "grad_norm": 2.172173023223877, "learning_rate": 1.286876867804248e-05, "loss": 0.6133, "step": 16588 }, { "epoch": 0.43, "grad_norm": 1.7194544076919556, "learning_rate": 1.2867973418304294e-05, "loss": 0.6524, "step": 16589 }, { "epoch": 0.43, "grad_norm": 3.5067107677459717, "learning_rate": 1.286717813880184e-05, "loss": 0.7288, "step": 16590 }, { "epoch": 0.43, "grad_norm": 1.293526291847229, "learning_rate": 1.286638283954059e-05, "loss": 0.5142, "step": 16591 }, { "epoch": 0.43, "grad_norm": 7.650509357452393, "learning_rate": 1.2865587520526034e-05, "loss": 0.8338, "step": 16592 }, { "epoch": 0.43, "grad_norm": 1.7420027256011963, "learning_rate": 1.2864792181763647e-05, "loss": 0.3034, "step": 16593 }, { "epoch": 0.43, "grad_norm": 6.51984167098999, "learning_rate": 1.2863996823258913e-05, "loss": 0.605, "step": 16594 }, { "epoch": 0.43, "grad_norm": 10.814027786254883, "learning_rate": 1.2863201445017314e-05, "loss": 0.548, "step": 16595 }, { "epoch": 0.43, "grad_norm": 1.8397624492645264, "learning_rate": 1.2862406047044327e-05, "loss": 0.6838, "step": 16596 }, { "epoch": 0.43, "grad_norm": 1.577732801437378, "learning_rate": 1.2861610629345437e-05, "loss": 0.7189, "step": 16597 }, { "epoch": 0.43, "grad_norm": 1.5898860692977905, "learning_rate": 1.2860815191926126e-05, "loss": 0.6602, "step": 16598 }, { "epoch": 0.43, "grad_norm": 1.224122405052185, "learning_rate": 1.286001973479187e-05, "loss": 0.5913, "step": 16599 }, { "epoch": 0.43, "grad_norm": 1.27490234375, "learning_rate": 1.285922425794816e-05, "loss": 0.464, "step": 16600 }, { "epoch": 0.43, "grad_norm": 2.268604278564453, "learning_rate": 1.2858428761400468e-05, "loss": 0.6178, "step": 16601 }, { "epoch": 0.43, "grad_norm": 3.534475564956665, "learning_rate": 1.2857633245154283e-05, "loss": 0.565, "step": 16602 }, { "epoch": 0.43, "grad_norm": 1.519977331161499, "learning_rate": 1.2856837709215084e-05, "loss": 0.5847, "step": 16603 }, { "epoch": 0.43, "grad_norm": 2.2172021865844727, "learning_rate": 1.2856042153588358e-05, "loss": 0.6765, "step": 16604 }, { "epoch": 0.43, "grad_norm": 2.8812758922576904, "learning_rate": 1.2855246578279579e-05, "loss": 0.5439, "step": 16605 }, { "epoch": 0.43, "grad_norm": 2.0976572036743164, "learning_rate": 1.2854450983294235e-05, "loss": 0.5236, "step": 16606 }, { "epoch": 0.43, "grad_norm": 2.782022714614868, "learning_rate": 1.2853655368637813e-05, "loss": 0.5394, "step": 16607 }, { "epoch": 0.43, "grad_norm": 4.053925037384033, "learning_rate": 1.2852859734315786e-05, "loss": 0.5729, "step": 16608 }, { "epoch": 0.43, "grad_norm": 1.5667091608047485, "learning_rate": 1.2852064080333643e-05, "loss": 0.5419, "step": 16609 }, { "epoch": 0.43, "grad_norm": 7.703134536743164, "learning_rate": 1.2851268406696867e-05, "loss": 0.6595, "step": 16610 }, { "epoch": 0.43, "grad_norm": 3.1236536502838135, "learning_rate": 1.285047271341094e-05, "loss": 0.6667, "step": 16611 }, { "epoch": 0.43, "grad_norm": 1.955863356590271, "learning_rate": 1.2849677000481347e-05, "loss": 0.6695, "step": 16612 }, { "epoch": 0.43, "grad_norm": 1.6920419931411743, "learning_rate": 1.2848881267913568e-05, "loss": 0.4641, "step": 16613 }, { "epoch": 0.43, "grad_norm": 2.571653127670288, "learning_rate": 1.2848085515713092e-05, "loss": 0.5696, "step": 16614 }, { "epoch": 0.43, "grad_norm": 1.55669367313385, "learning_rate": 1.2847289743885398e-05, "loss": 0.5989, "step": 16615 }, { "epoch": 0.43, "grad_norm": 1.2649176120758057, "learning_rate": 1.2846493952435973e-05, "loss": 0.476, "step": 16616 }, { "epoch": 0.43, "grad_norm": 1.824737787246704, "learning_rate": 1.28456981413703e-05, "loss": 0.4257, "step": 16617 }, { "epoch": 0.43, "grad_norm": 1.8419480323791504, "learning_rate": 1.2844902310693864e-05, "loss": 0.6666, "step": 16618 }, { "epoch": 0.43, "grad_norm": 1.6919794082641602, "learning_rate": 1.2844106460412147e-05, "loss": 0.6504, "step": 16619 }, { "epoch": 0.43, "grad_norm": 2.602710723876953, "learning_rate": 1.2843310590530636e-05, "loss": 0.7585, "step": 16620 }, { "epoch": 0.43, "grad_norm": 3.583986520767212, "learning_rate": 1.2842514701054816e-05, "loss": 0.8744, "step": 16621 }, { "epoch": 0.43, "grad_norm": 1.2396029233932495, "learning_rate": 1.2841718791990165e-05, "loss": 0.511, "step": 16622 }, { "epoch": 0.43, "grad_norm": 1.7863075733184814, "learning_rate": 1.2840922863342178e-05, "loss": 0.6479, "step": 16623 }, { "epoch": 0.43, "grad_norm": 1.965465784072876, "learning_rate": 1.2840126915116336e-05, "loss": 0.6718, "step": 16624 }, { "epoch": 0.43, "grad_norm": 1.3010683059692383, "learning_rate": 1.2839330947318122e-05, "loss": 0.714, "step": 16625 }, { "epoch": 0.43, "grad_norm": 1.383776068687439, "learning_rate": 1.2838534959953024e-05, "loss": 0.5266, "step": 16626 }, { "epoch": 0.43, "grad_norm": 3.538572072982788, "learning_rate": 1.2837738953026527e-05, "loss": 0.5157, "step": 16627 }, { "epoch": 0.43, "grad_norm": 3.8333427906036377, "learning_rate": 1.2836942926544112e-05, "loss": 0.5709, "step": 16628 }, { "epoch": 0.43, "grad_norm": 1.5796349048614502, "learning_rate": 1.2836146880511274e-05, "loss": 0.5814, "step": 16629 }, { "epoch": 0.43, "grad_norm": 1.2703750133514404, "learning_rate": 1.283535081493349e-05, "loss": 0.6049, "step": 16630 }, { "epoch": 0.43, "grad_norm": 1.867659568786621, "learning_rate": 1.283455472981625e-05, "loss": 0.6857, "step": 16631 }, { "epoch": 0.43, "grad_norm": 1.5565738677978516, "learning_rate": 1.2833758625165041e-05, "loss": 0.535, "step": 16632 }, { "epoch": 0.43, "grad_norm": 1.7933610677719116, "learning_rate": 1.2832962500985348e-05, "loss": 0.5897, "step": 16633 }, { "epoch": 0.43, "grad_norm": 1.3791215419769287, "learning_rate": 1.2832166357282655e-05, "loss": 0.481, "step": 16634 }, { "epoch": 0.43, "grad_norm": 5.115856647491455, "learning_rate": 1.2831370194062451e-05, "loss": 0.67, "step": 16635 }, { "epoch": 0.43, "grad_norm": 1.763083815574646, "learning_rate": 1.2830574011330225e-05, "loss": 0.3792, "step": 16636 }, { "epoch": 0.43, "grad_norm": 1.7076127529144287, "learning_rate": 1.282977780909146e-05, "loss": 0.3055, "step": 16637 }, { "epoch": 0.43, "grad_norm": 1.572444200515747, "learning_rate": 1.2828981587351646e-05, "loss": 0.6141, "step": 16638 }, { "epoch": 0.43, "grad_norm": 2.118840217590332, "learning_rate": 1.2828185346116266e-05, "loss": 0.6273, "step": 16639 }, { "epoch": 0.43, "grad_norm": 5.122152805328369, "learning_rate": 1.2827389085390811e-05, "loss": 0.7172, "step": 16640 }, { "epoch": 0.43, "grad_norm": 2.948509693145752, "learning_rate": 1.2826592805180766e-05, "loss": 0.5086, "step": 16641 }, { "epoch": 0.43, "grad_norm": 2.099081039428711, "learning_rate": 1.2825796505491617e-05, "loss": 0.6374, "step": 16642 }, { "epoch": 0.43, "grad_norm": 1.3427979946136475, "learning_rate": 1.2825000186328856e-05, "loss": 0.4636, "step": 16643 }, { "epoch": 0.43, "grad_norm": 1.8768278360366821, "learning_rate": 1.282420384769797e-05, "loss": 0.5459, "step": 16644 }, { "epoch": 0.43, "grad_norm": 2.053135395050049, "learning_rate": 1.282340748960444e-05, "loss": 0.5793, "step": 16645 }, { "epoch": 0.43, "grad_norm": 1.251033067703247, "learning_rate": 1.2822611112053767e-05, "loss": 0.5586, "step": 16646 }, { "epoch": 0.43, "grad_norm": 4.251291275024414, "learning_rate": 1.2821814715051426e-05, "loss": 0.7363, "step": 16647 }, { "epoch": 0.43, "grad_norm": 10.922865867614746, "learning_rate": 1.2821018298602911e-05, "loss": 0.5837, "step": 16648 }, { "epoch": 0.43, "grad_norm": 3.244126319885254, "learning_rate": 1.2820221862713713e-05, "loss": 0.6664, "step": 16649 }, { "epoch": 0.43, "grad_norm": 2.0175154209136963, "learning_rate": 1.2819425407389315e-05, "loss": 0.6702, "step": 16650 }, { "epoch": 0.43, "grad_norm": 4.788172245025635, "learning_rate": 1.2818628932635211e-05, "loss": 0.6122, "step": 16651 }, { "epoch": 0.43, "grad_norm": 1.5917335748672485, "learning_rate": 1.2817832438456885e-05, "loss": 0.5558, "step": 16652 }, { "epoch": 0.43, "grad_norm": 2.137197494506836, "learning_rate": 1.281703592485983e-05, "loss": 0.4011, "step": 16653 }, { "epoch": 0.43, "grad_norm": 3.3689427375793457, "learning_rate": 1.2816239391849531e-05, "loss": 0.5852, "step": 16654 }, { "epoch": 0.43, "grad_norm": 4.72491979598999, "learning_rate": 1.281544283943148e-05, "loss": 0.6855, "step": 16655 }, { "epoch": 0.43, "grad_norm": 3.1285691261291504, "learning_rate": 1.2814646267611171e-05, "loss": 0.5424, "step": 16656 }, { "epoch": 0.43, "grad_norm": 1.6703968048095703, "learning_rate": 1.281384967639408e-05, "loss": 0.4342, "step": 16657 }, { "epoch": 0.43, "grad_norm": 8.528141975402832, "learning_rate": 1.2813053065785713e-05, "loss": 0.6844, "step": 16658 }, { "epoch": 0.43, "grad_norm": 12.514361381530762, "learning_rate": 1.2812256435791545e-05, "loss": 0.6023, "step": 16659 }, { "epoch": 0.43, "grad_norm": 2.879523754119873, "learning_rate": 1.2811459786417076e-05, "loss": 0.6195, "step": 16660 }, { "epoch": 0.43, "grad_norm": 1.297322392463684, "learning_rate": 1.281066311766779e-05, "loss": 0.5595, "step": 16661 }, { "epoch": 0.43, "grad_norm": 1.3286994695663452, "learning_rate": 1.280986642954918e-05, "loss": 0.5606, "step": 16662 }, { "epoch": 0.43, "grad_norm": 2.450246572494507, "learning_rate": 1.2809069722066739e-05, "loss": 0.5483, "step": 16663 }, { "epoch": 0.43, "grad_norm": 1.4619263410568237, "learning_rate": 1.2808272995225951e-05, "loss": 0.4988, "step": 16664 }, { "epoch": 0.43, "grad_norm": 2.6464312076568604, "learning_rate": 1.2807476249032313e-05, "loss": 0.6176, "step": 16665 }, { "epoch": 0.43, "grad_norm": 1.4531482458114624, "learning_rate": 1.2806679483491309e-05, "loss": 0.5912, "step": 16666 }, { "epoch": 0.43, "grad_norm": 1.870166540145874, "learning_rate": 1.2805882698608434e-05, "loss": 0.4757, "step": 16667 }, { "epoch": 0.43, "grad_norm": 3.815843343734741, "learning_rate": 1.280508589438918e-05, "loss": 0.6255, "step": 16668 }, { "epoch": 0.43, "grad_norm": 7.550793170928955, "learning_rate": 1.2804289070839036e-05, "loss": 0.6317, "step": 16669 }, { "epoch": 0.43, "grad_norm": 1.248039722442627, "learning_rate": 1.2803492227963492e-05, "loss": 0.7126, "step": 16670 }, { "epoch": 0.43, "grad_norm": 2.043842077255249, "learning_rate": 1.280269536576804e-05, "loss": 0.7694, "step": 16671 }, { "epoch": 0.43, "grad_norm": 4.867486476898193, "learning_rate": 1.2801898484258177e-05, "loss": 0.431, "step": 16672 }, { "epoch": 0.43, "grad_norm": 2.4686567783355713, "learning_rate": 1.2801101583439385e-05, "loss": 0.6962, "step": 16673 }, { "epoch": 0.43, "grad_norm": 5.10963249206543, "learning_rate": 1.2800304663317165e-05, "loss": 0.706, "step": 16674 }, { "epoch": 0.43, "grad_norm": 2.0952298641204834, "learning_rate": 1.2799507723897002e-05, "loss": 0.5718, "step": 16675 }, { "epoch": 0.43, "grad_norm": 1.500890851020813, "learning_rate": 1.2798710765184393e-05, "loss": 0.6309, "step": 16676 }, { "epoch": 0.43, "grad_norm": 6.709853649139404, "learning_rate": 1.2797913787184825e-05, "loss": 0.4479, "step": 16677 }, { "epoch": 0.43, "grad_norm": 2.384244203567505, "learning_rate": 1.2797116789903794e-05, "loss": 0.6681, "step": 16678 }, { "epoch": 0.43, "grad_norm": 2.617202043533325, "learning_rate": 1.279631977334679e-05, "loss": 0.67, "step": 16679 }, { "epoch": 0.43, "grad_norm": 3.2827327251434326, "learning_rate": 1.2795522737519309e-05, "loss": 0.6556, "step": 16680 }, { "epoch": 0.43, "grad_norm": 2.016958475112915, "learning_rate": 1.279472568242684e-05, "loss": 0.7054, "step": 16681 }, { "epoch": 0.43, "grad_norm": 5.562905311584473, "learning_rate": 1.279392860807488e-05, "loss": 0.6498, "step": 16682 }, { "epoch": 0.43, "grad_norm": 1.107972264289856, "learning_rate": 1.2793131514468918e-05, "loss": 0.4119, "step": 16683 }, { "epoch": 0.43, "grad_norm": 2.645294666290283, "learning_rate": 1.2792334401614445e-05, "loss": 0.5571, "step": 16684 }, { "epoch": 0.43, "grad_norm": 1.1323248147964478, "learning_rate": 1.2791537269516962e-05, "loss": 0.5706, "step": 16685 }, { "epoch": 0.43, "grad_norm": 2.0112245082855225, "learning_rate": 1.2790740118181957e-05, "loss": 0.6353, "step": 16686 }, { "epoch": 0.43, "grad_norm": 12.455434799194336, "learning_rate": 1.2789942947614924e-05, "loss": 0.6092, "step": 16687 }, { "epoch": 0.43, "grad_norm": 1.4911506175994873, "learning_rate": 1.2789145757821358e-05, "loss": 0.5615, "step": 16688 }, { "epoch": 0.43, "grad_norm": 1.923095941543579, "learning_rate": 1.278834854880675e-05, "loss": 0.5086, "step": 16689 }, { "epoch": 0.43, "grad_norm": 1.2962276935577393, "learning_rate": 1.2787551320576597e-05, "loss": 0.7422, "step": 16690 }, { "epoch": 0.43, "grad_norm": 1.294237732887268, "learning_rate": 1.2786754073136389e-05, "loss": 0.4654, "step": 16691 }, { "epoch": 0.43, "grad_norm": 1.217273473739624, "learning_rate": 1.2785956806491627e-05, "loss": 0.5663, "step": 16692 }, { "epoch": 0.43, "grad_norm": 2.227999448776245, "learning_rate": 1.2785159520647797e-05, "loss": 0.6529, "step": 16693 }, { "epoch": 0.43, "grad_norm": 1.317864179611206, "learning_rate": 1.27843622156104e-05, "loss": 0.6184, "step": 16694 }, { "epoch": 0.43, "grad_norm": 1.4032858610153198, "learning_rate": 1.2783564891384928e-05, "loss": 0.6152, "step": 16695 }, { "epoch": 0.43, "grad_norm": 1.506764531135559, "learning_rate": 1.2782767547976873e-05, "loss": 0.6209, "step": 16696 }, { "epoch": 0.43, "grad_norm": 1.40048348903656, "learning_rate": 1.2781970185391734e-05, "loss": 0.5845, "step": 16697 }, { "epoch": 0.43, "grad_norm": 3.5676639080047607, "learning_rate": 1.2781172803635005e-05, "loss": 0.6878, "step": 16698 }, { "epoch": 0.43, "grad_norm": 1.4210076332092285, "learning_rate": 1.278037540271218e-05, "loss": 0.6301, "step": 16699 }, { "epoch": 0.43, "grad_norm": 1.9609729051589966, "learning_rate": 1.2779577982628752e-05, "loss": 0.5425, "step": 16700 }, { "epoch": 0.43, "grad_norm": 2.6057980060577393, "learning_rate": 1.2778780543390224e-05, "loss": 0.6864, "step": 16701 }, { "epoch": 0.43, "grad_norm": 1.0141059160232544, "learning_rate": 1.2777983085002081e-05, "loss": 0.4722, "step": 16702 }, { "epoch": 0.43, "grad_norm": 2.9157872200012207, "learning_rate": 1.2777185607469827e-05, "loss": 0.548, "step": 16703 }, { "epoch": 0.43, "grad_norm": 1.4488409757614136, "learning_rate": 1.2776388110798954e-05, "loss": 0.5861, "step": 16704 }, { "epoch": 0.43, "grad_norm": 1.7402536869049072, "learning_rate": 1.2775590594994958e-05, "loss": 0.6702, "step": 16705 }, { "epoch": 0.43, "grad_norm": 2.262789726257324, "learning_rate": 1.2774793060063338e-05, "loss": 0.5751, "step": 16706 }, { "epoch": 0.43, "grad_norm": 1.9680171012878418, "learning_rate": 1.2773995506009583e-05, "loss": 0.7057, "step": 16707 }, { "epoch": 0.43, "grad_norm": 1.5602984428405762, "learning_rate": 1.2773197932839194e-05, "loss": 0.6061, "step": 16708 }, { "epoch": 0.43, "grad_norm": 2.147613525390625, "learning_rate": 1.2772400340557672e-05, "loss": 0.6264, "step": 16709 }, { "epoch": 0.43, "grad_norm": 2.9368784427642822, "learning_rate": 1.2771602729170505e-05, "loss": 0.5264, "step": 16710 }, { "epoch": 0.43, "grad_norm": 6.745118141174316, "learning_rate": 1.2770805098683194e-05, "loss": 0.7934, "step": 16711 }, { "epoch": 0.43, "grad_norm": 1.1936360597610474, "learning_rate": 1.2770007449101233e-05, "loss": 0.5703, "step": 16712 }, { "epoch": 0.43, "grad_norm": 5.530300140380859, "learning_rate": 1.2769209780430126e-05, "loss": 0.6816, "step": 16713 }, { "epoch": 0.43, "grad_norm": 4.1232147216796875, "learning_rate": 1.276841209267536e-05, "loss": 0.4645, "step": 16714 }, { "epoch": 0.43, "grad_norm": 2.416714906692505, "learning_rate": 1.276761438584244e-05, "loss": 0.553, "step": 16715 }, { "epoch": 0.43, "grad_norm": 3.5600247383117676, "learning_rate": 1.276681665993686e-05, "loss": 0.5049, "step": 16716 }, { "epoch": 0.43, "grad_norm": 3.975569486618042, "learning_rate": 1.2766018914964116e-05, "loss": 0.6236, "step": 16717 }, { "epoch": 0.43, "grad_norm": 5.261063098907471, "learning_rate": 1.276522115092971e-05, "loss": 0.6848, "step": 16718 }, { "epoch": 0.43, "grad_norm": 3.699483871459961, "learning_rate": 1.2764423367839137e-05, "loss": 0.5274, "step": 16719 }, { "epoch": 0.43, "grad_norm": 4.176163673400879, "learning_rate": 1.2763625565697892e-05, "loss": 0.653, "step": 16720 }, { "epoch": 0.43, "grad_norm": 1.399303674697876, "learning_rate": 1.2762827744511479e-05, "loss": 0.5978, "step": 16721 }, { "epoch": 0.43, "grad_norm": 2.532498836517334, "learning_rate": 1.2762029904285388e-05, "loss": 0.5992, "step": 16722 }, { "epoch": 0.43, "grad_norm": 1.801389455795288, "learning_rate": 1.2761232045025129e-05, "loss": 0.6787, "step": 16723 }, { "epoch": 0.43, "grad_norm": 1.5087566375732422, "learning_rate": 1.2760434166736191e-05, "loss": 0.5956, "step": 16724 }, { "epoch": 0.43, "grad_norm": 3.155744791030884, "learning_rate": 1.2759636269424075e-05, "loss": 0.6421, "step": 16725 }, { "epoch": 0.43, "grad_norm": 2.3473145961761475, "learning_rate": 1.2758838353094278e-05, "loss": 0.483, "step": 16726 }, { "epoch": 0.43, "grad_norm": 2.407439708709717, "learning_rate": 1.2758040417752299e-05, "loss": 0.6025, "step": 16727 }, { "epoch": 0.43, "grad_norm": 1.4283839464187622, "learning_rate": 1.2757242463403643e-05, "loss": 0.4524, "step": 16728 }, { "epoch": 0.43, "grad_norm": 1.22639799118042, "learning_rate": 1.27564444900538e-05, "loss": 0.6587, "step": 16729 }, { "epoch": 0.43, "grad_norm": 1.2959296703338623, "learning_rate": 1.2755646497708275e-05, "loss": 0.7004, "step": 16730 }, { "epoch": 0.43, "grad_norm": 2.5121824741363525, "learning_rate": 1.2754848486372566e-05, "loss": 0.7478, "step": 16731 }, { "epoch": 0.43, "grad_norm": 3.378573417663574, "learning_rate": 1.2754050456052172e-05, "loss": 0.5713, "step": 16732 }, { "epoch": 0.43, "grad_norm": 4.977473258972168, "learning_rate": 1.2753252406752595e-05, "loss": 0.5836, "step": 16733 }, { "epoch": 0.43, "grad_norm": 2.7640697956085205, "learning_rate": 1.2752454338479327e-05, "loss": 0.5853, "step": 16734 }, { "epoch": 0.43, "grad_norm": 1.343299150466919, "learning_rate": 1.2751656251237877e-05, "loss": 0.5892, "step": 16735 }, { "epoch": 0.43, "grad_norm": 1.6983660459518433, "learning_rate": 1.2750858145033738e-05, "loss": 0.5979, "step": 16736 }, { "epoch": 0.43, "grad_norm": 1.6273407936096191, "learning_rate": 1.2750060019872415e-05, "loss": 0.5861, "step": 16737 }, { "epoch": 0.43, "grad_norm": 1.7746251821517944, "learning_rate": 1.2749261875759404e-05, "loss": 0.7176, "step": 16738 }, { "epoch": 0.43, "grad_norm": 2.2606210708618164, "learning_rate": 1.2748463712700207e-05, "loss": 0.5637, "step": 16739 }, { "epoch": 0.43, "grad_norm": 2.354536533355713, "learning_rate": 1.274766553070033e-05, "loss": 0.6129, "step": 16740 }, { "epoch": 0.43, "grad_norm": 6.197108745574951, "learning_rate": 1.2746867329765264e-05, "loss": 0.5673, "step": 16741 }, { "epoch": 0.43, "grad_norm": 6.758795738220215, "learning_rate": 1.2746069109900516e-05, "loss": 0.618, "step": 16742 }, { "epoch": 0.43, "grad_norm": 6.826289653778076, "learning_rate": 1.2745270871111584e-05, "loss": 0.7003, "step": 16743 }, { "epoch": 0.43, "grad_norm": 2.0805249214172363, "learning_rate": 1.2744472613403969e-05, "loss": 0.3473, "step": 16744 }, { "epoch": 0.43, "grad_norm": 2.5800740718841553, "learning_rate": 1.2743674336783173e-05, "loss": 0.6517, "step": 16745 }, { "epoch": 0.43, "grad_norm": 1.4547110795974731, "learning_rate": 1.27428760412547e-05, "loss": 0.6067, "step": 16746 }, { "epoch": 0.43, "grad_norm": 5.973377704620361, "learning_rate": 1.2742077726824047e-05, "loss": 0.6405, "step": 16747 }, { "epoch": 0.43, "grad_norm": 3.5638108253479004, "learning_rate": 1.2741279393496715e-05, "loss": 0.5566, "step": 16748 }, { "epoch": 0.43, "grad_norm": 1.9104958772659302, "learning_rate": 1.2740481041278208e-05, "loss": 0.6054, "step": 16749 }, { "epoch": 0.43, "grad_norm": 1.497982382774353, "learning_rate": 1.2739682670174032e-05, "loss": 0.5613, "step": 16750 }, { "epoch": 0.43, "grad_norm": 3.6171212196350098, "learning_rate": 1.2738884280189678e-05, "loss": 0.5542, "step": 16751 }, { "epoch": 0.43, "grad_norm": 1.7816828489303589, "learning_rate": 1.2738085871330661e-05, "loss": 0.5129, "step": 16752 }, { "epoch": 0.43, "grad_norm": 1.6075817346572876, "learning_rate": 1.2737287443602471e-05, "loss": 0.6046, "step": 16753 }, { "epoch": 0.43, "grad_norm": 2.3285763263702393, "learning_rate": 1.2736488997010618e-05, "loss": 0.5742, "step": 16754 }, { "epoch": 0.43, "grad_norm": 1.8352731466293335, "learning_rate": 1.27356905315606e-05, "loss": 0.542, "step": 16755 }, { "epoch": 0.43, "grad_norm": 2.1621110439300537, "learning_rate": 1.2734892047257924e-05, "loss": 0.5107, "step": 16756 }, { "epoch": 0.43, "grad_norm": 2.414959669113159, "learning_rate": 1.273409354410809e-05, "loss": 0.4806, "step": 16757 }, { "epoch": 0.43, "grad_norm": 1.8405102491378784, "learning_rate": 1.2733295022116601e-05, "loss": 0.68, "step": 16758 }, { "epoch": 0.43, "grad_norm": 3.387329578399658, "learning_rate": 1.2732496481288958e-05, "loss": 0.6927, "step": 16759 }, { "epoch": 0.43, "grad_norm": 2.4110262393951416, "learning_rate": 1.273169792163067e-05, "loss": 0.4774, "step": 16760 }, { "epoch": 0.43, "grad_norm": 1.4179127216339111, "learning_rate": 1.2730899343147232e-05, "loss": 0.56, "step": 16761 }, { "epoch": 0.43, "grad_norm": 1.350419044494629, "learning_rate": 1.2730100745844154e-05, "loss": 0.4089, "step": 16762 }, { "epoch": 0.43, "grad_norm": 1.023166537284851, "learning_rate": 1.2729302129726935e-05, "loss": 0.6629, "step": 16763 }, { "epoch": 0.43, "grad_norm": 2.8946146965026855, "learning_rate": 1.2728503494801081e-05, "loss": 0.6693, "step": 16764 }, { "epoch": 0.43, "grad_norm": 1.9334839582443237, "learning_rate": 1.2727704841072095e-05, "loss": 0.6199, "step": 16765 }, { "epoch": 0.43, "grad_norm": 2.0922813415527344, "learning_rate": 1.272690616854548e-05, "loss": 0.5873, "step": 16766 }, { "epoch": 0.43, "grad_norm": 5.057321071624756, "learning_rate": 1.2726107477226744e-05, "loss": 0.5784, "step": 16767 }, { "epoch": 0.43, "grad_norm": 1.4362766742706299, "learning_rate": 1.2725308767121384e-05, "loss": 0.6212, "step": 16768 }, { "epoch": 0.43, "grad_norm": 1.3562171459197998, "learning_rate": 1.2724510038234912e-05, "loss": 0.6658, "step": 16769 }, { "epoch": 0.43, "grad_norm": 3.2178103923797607, "learning_rate": 1.2723711290572826e-05, "loss": 0.5028, "step": 16770 }, { "epoch": 0.43, "grad_norm": 3.182048797607422, "learning_rate": 1.2722912524140634e-05, "loss": 0.4998, "step": 16771 }, { "epoch": 0.43, "grad_norm": 1.4292194843292236, "learning_rate": 1.272211373894384e-05, "loss": 0.6272, "step": 16772 }, { "epoch": 0.43, "grad_norm": 9.150818824768066, "learning_rate": 1.2721314934987947e-05, "loss": 0.6269, "step": 16773 }, { "epoch": 0.43, "grad_norm": 7.555466175079346, "learning_rate": 1.2720516112278463e-05, "loss": 0.565, "step": 16774 }, { "epoch": 0.43, "grad_norm": 2.0752248764038086, "learning_rate": 1.271971727082089e-05, "loss": 0.5721, "step": 16775 }, { "epoch": 0.43, "grad_norm": 3.816819429397583, "learning_rate": 1.2718918410620733e-05, "loss": 0.4761, "step": 16776 }, { "epoch": 0.43, "grad_norm": 1.8096438646316528, "learning_rate": 1.2718119531683502e-05, "loss": 0.4675, "step": 16777 }, { "epoch": 0.43, "grad_norm": 1.1476047039031982, "learning_rate": 1.2717320634014695e-05, "loss": 0.4937, "step": 16778 }, { "epoch": 0.43, "grad_norm": 1.3635038137435913, "learning_rate": 1.2716521717619825e-05, "loss": 0.5108, "step": 16779 }, { "epoch": 0.43, "grad_norm": 2.8061583042144775, "learning_rate": 1.271572278250439e-05, "loss": 0.5994, "step": 16780 }, { "epoch": 0.43, "grad_norm": 1.8306134939193726, "learning_rate": 1.2714923828673902e-05, "loss": 0.5902, "step": 16781 }, { "epoch": 0.43, "grad_norm": 1.631504774093628, "learning_rate": 1.2714124856133865e-05, "loss": 0.5299, "step": 16782 }, { "epoch": 0.43, "grad_norm": 2.2008135318756104, "learning_rate": 1.2713325864889783e-05, "loss": 0.6486, "step": 16783 }, { "epoch": 0.43, "grad_norm": 2.0370922088623047, "learning_rate": 1.2712526854947168e-05, "loss": 0.6661, "step": 16784 }, { "epoch": 0.43, "grad_norm": 5.786085605621338, "learning_rate": 1.2711727826311516e-05, "loss": 0.6187, "step": 16785 }, { "epoch": 0.43, "grad_norm": 2.8199973106384277, "learning_rate": 1.2710928778988345e-05, "loss": 0.6097, "step": 16786 }, { "epoch": 0.43, "grad_norm": 1.0745458602905273, "learning_rate": 1.2710129712983153e-05, "loss": 0.6523, "step": 16787 }, { "epoch": 0.43, "grad_norm": 2.615041732788086, "learning_rate": 1.2709330628301448e-05, "loss": 0.666, "step": 16788 }, { "epoch": 0.43, "grad_norm": 2.448601722717285, "learning_rate": 1.2708531524948742e-05, "loss": 0.5068, "step": 16789 }, { "epoch": 0.43, "grad_norm": 3.8677468299865723, "learning_rate": 1.2707732402930537e-05, "loss": 0.7202, "step": 16790 }, { "epoch": 0.43, "grad_norm": 1.232587218284607, "learning_rate": 1.2706933262252341e-05, "loss": 0.5439, "step": 16791 }, { "epoch": 0.43, "grad_norm": 1.7819585800170898, "learning_rate": 1.2706134102919663e-05, "loss": 0.6337, "step": 16792 }, { "epoch": 0.43, "grad_norm": 2.414506673812866, "learning_rate": 1.2705334924938006e-05, "loss": 0.4964, "step": 16793 }, { "epoch": 0.43, "grad_norm": 1.6377559900283813, "learning_rate": 1.2704535728312886e-05, "loss": 0.5511, "step": 16794 }, { "epoch": 0.43, "grad_norm": 3.6839370727539062, "learning_rate": 1.2703736513049801e-05, "loss": 0.7232, "step": 16795 }, { "epoch": 0.43, "grad_norm": 1.6169517040252686, "learning_rate": 1.2702937279154261e-05, "loss": 0.5503, "step": 16796 }, { "epoch": 0.43, "grad_norm": 1.2582132816314697, "learning_rate": 1.2702138026631779e-05, "loss": 0.4982, "step": 16797 }, { "epoch": 0.43, "grad_norm": 3.7563467025756836, "learning_rate": 1.2701338755487858e-05, "loss": 0.7875, "step": 16798 }, { "epoch": 0.43, "grad_norm": 1.1757087707519531, "learning_rate": 1.2700539465728006e-05, "loss": 0.4688, "step": 16799 }, { "epoch": 0.43, "grad_norm": 9.934027671813965, "learning_rate": 1.2699740157357735e-05, "loss": 0.5449, "step": 16800 }, { "epoch": 0.43, "grad_norm": 2.217522621154785, "learning_rate": 1.2698940830382549e-05, "loss": 0.649, "step": 16801 }, { "epoch": 0.43, "grad_norm": 1.2890675067901611, "learning_rate": 1.2698141484807959e-05, "loss": 0.541, "step": 16802 }, { "epoch": 0.43, "grad_norm": 2.5861828327178955, "learning_rate": 1.2697342120639475e-05, "loss": 0.6486, "step": 16803 }, { "epoch": 0.43, "grad_norm": 6.128487586975098, "learning_rate": 1.26965427378826e-05, "loss": 0.6428, "step": 16804 }, { "epoch": 0.43, "grad_norm": 1.7563724517822266, "learning_rate": 1.2695743336542848e-05, "loss": 0.599, "step": 16805 }, { "epoch": 0.43, "grad_norm": 1.5169408321380615, "learning_rate": 1.2694943916625728e-05, "loss": 0.6976, "step": 16806 }, { "epoch": 0.43, "grad_norm": 1.8902839422225952, "learning_rate": 1.2694144478136747e-05, "loss": 0.5191, "step": 16807 }, { "epoch": 0.43, "grad_norm": 1.1668390035629272, "learning_rate": 1.2693345021081416e-05, "loss": 0.5683, "step": 16808 }, { "epoch": 0.43, "grad_norm": 8.269783020019531, "learning_rate": 1.2692545545465241e-05, "loss": 0.4694, "step": 16809 }, { "epoch": 0.43, "grad_norm": 1.822746753692627, "learning_rate": 1.2691746051293737e-05, "loss": 0.4833, "step": 16810 }, { "epoch": 0.43, "grad_norm": 2.9213294982910156, "learning_rate": 1.2690946538572406e-05, "loss": 0.729, "step": 16811 }, { "epoch": 0.43, "grad_norm": 1.4403616189956665, "learning_rate": 1.2690147007306763e-05, "loss": 0.4917, "step": 16812 }, { "epoch": 0.43, "grad_norm": 3.5896542072296143, "learning_rate": 1.268934745750232e-05, "loss": 0.5354, "step": 16813 }, { "epoch": 0.43, "grad_norm": 2.537597417831421, "learning_rate": 1.2688547889164582e-05, "loss": 0.353, "step": 16814 }, { "epoch": 0.43, "grad_norm": 3.4477756023406982, "learning_rate": 1.268774830229906e-05, "loss": 0.6157, "step": 16815 }, { "epoch": 0.43, "grad_norm": 2.2453298568725586, "learning_rate": 1.2686948696911267e-05, "loss": 0.681, "step": 16816 }, { "epoch": 0.43, "grad_norm": 2.8160324096679688, "learning_rate": 1.2686149073006711e-05, "loss": 0.7809, "step": 16817 }, { "epoch": 0.43, "grad_norm": 1.9264945983886719, "learning_rate": 1.2685349430590902e-05, "loss": 0.6295, "step": 16818 }, { "epoch": 0.43, "grad_norm": 3.170347213745117, "learning_rate": 1.2684549769669352e-05, "loss": 0.6638, "step": 16819 }, { "epoch": 0.43, "grad_norm": 2.1644327640533447, "learning_rate": 1.2683750090247575e-05, "loss": 0.4549, "step": 16820 }, { "epoch": 0.43, "grad_norm": 2.2925822734832764, "learning_rate": 1.2682950392331076e-05, "loss": 0.6956, "step": 16821 }, { "epoch": 0.43, "grad_norm": 1.6003473997116089, "learning_rate": 1.2682150675925368e-05, "loss": 0.643, "step": 16822 }, { "epoch": 0.43, "grad_norm": 1.5531258583068848, "learning_rate": 1.2681350941035964e-05, "loss": 0.5553, "step": 16823 }, { "epoch": 0.43, "grad_norm": 1.8444252014160156, "learning_rate": 1.2680551187668374e-05, "loss": 0.554, "step": 16824 }, { "epoch": 0.43, "grad_norm": 1.4273344278335571, "learning_rate": 1.2679751415828107e-05, "loss": 0.5762, "step": 16825 }, { "epoch": 0.43, "grad_norm": 3.577955961227417, "learning_rate": 1.2678951625520676e-05, "loss": 0.5293, "step": 16826 }, { "epoch": 0.43, "grad_norm": 1.3845224380493164, "learning_rate": 1.2678151816751598e-05, "loss": 0.5762, "step": 16827 }, { "epoch": 0.43, "grad_norm": 11.569426536560059, "learning_rate": 1.2677351989526379e-05, "loss": 0.6365, "step": 16828 }, { "epoch": 0.43, "grad_norm": 1.4097131490707397, "learning_rate": 1.2676552143850528e-05, "loss": 0.5036, "step": 16829 }, { "epoch": 0.43, "grad_norm": 3.946824073791504, "learning_rate": 1.2675752279729568e-05, "loss": 0.5385, "step": 16830 }, { "epoch": 0.43, "grad_norm": 1.265987515449524, "learning_rate": 1.2674952397169e-05, "loss": 0.7169, "step": 16831 }, { "epoch": 0.43, "grad_norm": 1.6378419399261475, "learning_rate": 1.2674152496174342e-05, "loss": 0.6034, "step": 16832 }, { "epoch": 0.43, "grad_norm": 2.409187078475952, "learning_rate": 1.2673352576751103e-05, "loss": 0.7432, "step": 16833 }, { "epoch": 0.43, "grad_norm": 2.2478208541870117, "learning_rate": 1.26725526389048e-05, "loss": 0.5969, "step": 16834 }, { "epoch": 0.43, "grad_norm": 2.6045570373535156, "learning_rate": 1.267175268264094e-05, "loss": 0.6478, "step": 16835 }, { "epoch": 0.43, "grad_norm": 1.524465799331665, "learning_rate": 1.2670952707965043e-05, "loss": 0.4773, "step": 16836 }, { "epoch": 0.43, "grad_norm": 1.5424433946609497, "learning_rate": 1.2670152714882617e-05, "loss": 0.5471, "step": 16837 }, { "epoch": 0.43, "grad_norm": 1.6131151914596558, "learning_rate": 1.2669352703399173e-05, "loss": 0.6573, "step": 16838 }, { "epoch": 0.43, "grad_norm": 6.068085193634033, "learning_rate": 1.2668552673520233e-05, "loss": 0.8447, "step": 16839 }, { "epoch": 0.43, "grad_norm": 2.222717761993408, "learning_rate": 1.26677526252513e-05, "loss": 0.8144, "step": 16840 }, { "epoch": 0.43, "grad_norm": 1.2692450284957886, "learning_rate": 1.2666952558597891e-05, "loss": 0.5282, "step": 16841 }, { "epoch": 0.43, "grad_norm": 2.55199933052063, "learning_rate": 1.2666152473565526e-05, "loss": 0.5826, "step": 16842 }, { "epoch": 0.43, "grad_norm": 4.176991939544678, "learning_rate": 1.2665352370159708e-05, "loss": 0.609, "step": 16843 }, { "epoch": 0.43, "grad_norm": 3.711059808731079, "learning_rate": 1.266455224838596e-05, "loss": 0.4646, "step": 16844 }, { "epoch": 0.43, "grad_norm": 1.2745718955993652, "learning_rate": 1.266375210824979e-05, "loss": 0.6189, "step": 16845 }, { "epoch": 0.43, "grad_norm": 5.751780986785889, "learning_rate": 1.2662951949756712e-05, "loss": 0.6605, "step": 16846 }, { "epoch": 0.43, "grad_norm": 2.3041088581085205, "learning_rate": 1.2662151772912243e-05, "loss": 0.5804, "step": 16847 }, { "epoch": 0.43, "grad_norm": 2.003436326980591, "learning_rate": 1.26613515777219e-05, "loss": 0.6236, "step": 16848 }, { "epoch": 0.43, "grad_norm": 4.585877418518066, "learning_rate": 1.2660551364191189e-05, "loss": 0.6835, "step": 16849 }, { "epoch": 0.43, "grad_norm": 1.4201569557189941, "learning_rate": 1.2659751132325633e-05, "loss": 0.4448, "step": 16850 }, { "epoch": 0.43, "grad_norm": 1.4105808734893799, "learning_rate": 1.2658950882130744e-05, "loss": 0.5939, "step": 16851 }, { "epoch": 0.43, "grad_norm": 1.5146820545196533, "learning_rate": 1.2658150613612033e-05, "loss": 0.717, "step": 16852 }, { "epoch": 0.43, "grad_norm": 2.3259522914886475, "learning_rate": 1.2657350326775018e-05, "loss": 0.526, "step": 16853 }, { "epoch": 0.43, "grad_norm": 2.1836297512054443, "learning_rate": 1.2656550021625215e-05, "loss": 0.6761, "step": 16854 }, { "epoch": 0.43, "grad_norm": 4.320558071136475, "learning_rate": 1.2655749698168139e-05, "loss": 0.6319, "step": 16855 }, { "epoch": 0.43, "grad_norm": 3.290634870529175, "learning_rate": 1.2654949356409305e-05, "loss": 0.5517, "step": 16856 }, { "epoch": 0.43, "grad_norm": 1.688586711883545, "learning_rate": 1.2654148996354225e-05, "loss": 0.4992, "step": 16857 }, { "epoch": 0.43, "grad_norm": 1.2747390270233154, "learning_rate": 1.265334861800842e-05, "loss": 0.5248, "step": 16858 }, { "epoch": 0.43, "grad_norm": 2.039595365524292, "learning_rate": 1.2652548221377405e-05, "loss": 0.4934, "step": 16859 }, { "epoch": 0.43, "grad_norm": 2.4722509384155273, "learning_rate": 1.2651747806466693e-05, "loss": 0.622, "step": 16860 }, { "epoch": 0.43, "grad_norm": 2.834416151046753, "learning_rate": 1.2650947373281799e-05, "loss": 0.5211, "step": 16861 }, { "epoch": 0.43, "grad_norm": 3.4700167179107666, "learning_rate": 1.2650146921828244e-05, "loss": 0.5946, "step": 16862 }, { "epoch": 0.43, "grad_norm": 1.3588007688522339, "learning_rate": 1.2649346452111539e-05, "loss": 0.6359, "step": 16863 }, { "epoch": 0.43, "grad_norm": 1.8188986778259277, "learning_rate": 1.2648545964137204e-05, "loss": 0.501, "step": 16864 }, { "epoch": 0.43, "grad_norm": 1.789186954498291, "learning_rate": 1.2647745457910752e-05, "loss": 0.4974, "step": 16865 }, { "epoch": 0.43, "grad_norm": 1.8114248514175415, "learning_rate": 1.2646944933437705e-05, "loss": 0.4113, "step": 16866 }, { "epoch": 0.43, "grad_norm": 1.6702845096588135, "learning_rate": 1.2646144390723574e-05, "loss": 0.5581, "step": 16867 }, { "epoch": 0.43, "grad_norm": 1.0593141317367554, "learning_rate": 1.2645343829773882e-05, "loss": 0.3936, "step": 16868 }, { "epoch": 0.43, "grad_norm": 1.5751771926879883, "learning_rate": 1.2644543250594138e-05, "loss": 0.6159, "step": 16869 }, { "epoch": 0.43, "grad_norm": 1.5100681781768799, "learning_rate": 1.2643742653189866e-05, "loss": 0.6026, "step": 16870 }, { "epoch": 0.43, "grad_norm": 3.8493852615356445, "learning_rate": 1.2642942037566579e-05, "loss": 0.4132, "step": 16871 }, { "epoch": 0.43, "grad_norm": 2.704970121383667, "learning_rate": 1.2642141403729795e-05, "loss": 0.5786, "step": 16872 }, { "epoch": 0.43, "grad_norm": 1.487343668937683, "learning_rate": 1.2641340751685037e-05, "loss": 0.6026, "step": 16873 }, { "epoch": 0.43, "grad_norm": 5.379764556884766, "learning_rate": 1.2640540081437813e-05, "loss": 0.7038, "step": 16874 }, { "epoch": 0.43, "grad_norm": 5.42455530166626, "learning_rate": 1.263973939299365e-05, "loss": 0.5286, "step": 16875 }, { "epoch": 0.43, "grad_norm": 2.2196555137634277, "learning_rate": 1.2638938686358057e-05, "loss": 0.6604, "step": 16876 }, { "epoch": 0.43, "grad_norm": 1.472015380859375, "learning_rate": 1.2638137961536558e-05, "loss": 0.4946, "step": 16877 }, { "epoch": 0.43, "grad_norm": 2.4380991458892822, "learning_rate": 1.2637337218534673e-05, "loss": 0.6629, "step": 16878 }, { "epoch": 0.43, "grad_norm": 1.279319405555725, "learning_rate": 1.2636536457357915e-05, "loss": 0.617, "step": 16879 }, { "epoch": 0.43, "grad_norm": 4.696185111999512, "learning_rate": 1.2635735678011803e-05, "loss": 0.6779, "step": 16880 }, { "epoch": 0.43, "grad_norm": 1.4702575206756592, "learning_rate": 1.2634934880501856e-05, "loss": 0.5559, "step": 16881 }, { "epoch": 0.43, "grad_norm": 1.6026275157928467, "learning_rate": 1.2634134064833594e-05, "loss": 0.6258, "step": 16882 }, { "epoch": 0.43, "grad_norm": 1.592273235321045, "learning_rate": 1.2633333231012538e-05, "loss": 0.483, "step": 16883 }, { "epoch": 0.43, "grad_norm": 1.6969867944717407, "learning_rate": 1.26325323790442e-05, "loss": 0.4246, "step": 16884 }, { "epoch": 0.43, "grad_norm": 1.3722825050354004, "learning_rate": 1.2631731508934105e-05, "loss": 0.5742, "step": 16885 }, { "epoch": 0.43, "grad_norm": 1.5123059749603271, "learning_rate": 1.263093062068777e-05, "loss": 0.5628, "step": 16886 }, { "epoch": 0.43, "grad_norm": 2.1027579307556152, "learning_rate": 1.263012971431071e-05, "loss": 0.5137, "step": 16887 }, { "epoch": 0.43, "grad_norm": 3.0298333168029785, "learning_rate": 1.2629328789808453e-05, "loss": 0.6962, "step": 16888 }, { "epoch": 0.43, "grad_norm": 0.9854096174240112, "learning_rate": 1.2628527847186511e-05, "loss": 0.6456, "step": 16889 }, { "epoch": 0.43, "grad_norm": 2.252441883087158, "learning_rate": 1.2627726886450409e-05, "loss": 0.6384, "step": 16890 }, { "epoch": 0.43, "grad_norm": 1.331368088722229, "learning_rate": 1.262692590760566e-05, "loss": 0.4538, "step": 16891 }, { "epoch": 0.43, "grad_norm": 2.346601963043213, "learning_rate": 1.2626124910657791e-05, "loss": 0.6752, "step": 16892 }, { "epoch": 0.43, "grad_norm": 1.4984440803527832, "learning_rate": 1.262532389561232e-05, "loss": 0.5581, "step": 16893 }, { "epoch": 0.43, "grad_norm": 3.3646693229675293, "learning_rate": 1.2624522862474764e-05, "loss": 0.542, "step": 16894 }, { "epoch": 0.43, "grad_norm": 1.593062162399292, "learning_rate": 1.2623721811250648e-05, "loss": 0.715, "step": 16895 }, { "epoch": 0.43, "grad_norm": 11.275732040405273, "learning_rate": 1.2622920741945488e-05, "loss": 0.7639, "step": 16896 }, { "epoch": 0.43, "grad_norm": 3.8919248580932617, "learning_rate": 1.2622119654564804e-05, "loss": 0.7324, "step": 16897 }, { "epoch": 0.43, "grad_norm": 3.3868587017059326, "learning_rate": 1.262131854911412e-05, "loss": 0.8855, "step": 16898 }, { "epoch": 0.43, "grad_norm": 3.213451623916626, "learning_rate": 1.2620517425598957e-05, "loss": 0.5344, "step": 16899 }, { "epoch": 0.43, "grad_norm": 1.5739208459854126, "learning_rate": 1.2619716284024834e-05, "loss": 0.599, "step": 16900 }, { "epoch": 0.43, "grad_norm": 1.184219241142273, "learning_rate": 1.2618915124397271e-05, "loss": 0.4918, "step": 16901 }, { "epoch": 0.43, "grad_norm": 2.0429821014404297, "learning_rate": 1.2618113946721792e-05, "loss": 0.6207, "step": 16902 }, { "epoch": 0.43, "grad_norm": 3.1733055114746094, "learning_rate": 1.2617312751003915e-05, "loss": 0.601, "step": 16903 }, { "epoch": 0.43, "grad_norm": 3.4289963245391846, "learning_rate": 1.2616511537249163e-05, "loss": 0.683, "step": 16904 }, { "epoch": 0.43, "grad_norm": 3.4190163612365723, "learning_rate": 1.261571030546306e-05, "loss": 0.6008, "step": 16905 }, { "epoch": 0.43, "grad_norm": 1.880357027053833, "learning_rate": 1.2614909055651121e-05, "loss": 0.5338, "step": 16906 }, { "epoch": 0.43, "grad_norm": 2.9624578952789307, "learning_rate": 1.2614107787818874e-05, "loss": 0.5684, "step": 16907 }, { "epoch": 0.43, "grad_norm": 1.6842232942581177, "learning_rate": 1.2613306501971838e-05, "loss": 0.4809, "step": 16908 }, { "epoch": 0.43, "grad_norm": 13.345519065856934, "learning_rate": 1.2612505198115535e-05, "loss": 0.6005, "step": 16909 }, { "epoch": 0.43, "grad_norm": 2.214787006378174, "learning_rate": 1.2611703876255487e-05, "loss": 0.5894, "step": 16910 }, { "epoch": 0.43, "grad_norm": 1.3739992380142212, "learning_rate": 1.2610902536397216e-05, "loss": 0.6122, "step": 16911 }, { "epoch": 0.43, "grad_norm": 1.586862564086914, "learning_rate": 1.2610101178546249e-05, "loss": 0.5867, "step": 16912 }, { "epoch": 0.43, "grad_norm": 2.563532829284668, "learning_rate": 1.2609299802708101e-05, "loss": 0.5409, "step": 16913 }, { "epoch": 0.43, "grad_norm": 1.3034641742706299, "learning_rate": 1.2608498408888297e-05, "loss": 0.614, "step": 16914 }, { "epoch": 0.43, "grad_norm": 4.948727607727051, "learning_rate": 1.2607696997092364e-05, "loss": 0.7267, "step": 16915 }, { "epoch": 0.43, "grad_norm": 4.489602088928223, "learning_rate": 1.2606895567325821e-05, "loss": 0.5258, "step": 16916 }, { "epoch": 0.43, "grad_norm": 3.9775006771087646, "learning_rate": 1.2606094119594193e-05, "loss": 0.5818, "step": 16917 }, { "epoch": 0.43, "grad_norm": 1.1893303394317627, "learning_rate": 1.2605292653902999e-05, "loss": 0.4561, "step": 16918 }, { "epoch": 0.43, "grad_norm": 4.039992809295654, "learning_rate": 1.2604491170257766e-05, "loss": 0.5701, "step": 16919 }, { "epoch": 0.43, "grad_norm": 1.8272466659545898, "learning_rate": 1.2603689668664015e-05, "loss": 0.6927, "step": 16920 }, { "epoch": 0.43, "grad_norm": 4.45235538482666, "learning_rate": 1.260288814912727e-05, "loss": 0.6911, "step": 16921 }, { "epoch": 0.43, "grad_norm": 2.7399604320526123, "learning_rate": 1.2602086611653056e-05, "loss": 0.676, "step": 16922 }, { "epoch": 0.43, "grad_norm": 2.854980707168579, "learning_rate": 1.2601285056246897e-05, "loss": 0.7061, "step": 16923 }, { "epoch": 0.43, "grad_norm": 1.6937081813812256, "learning_rate": 1.2600483482914316e-05, "loss": 0.3956, "step": 16924 }, { "epoch": 0.43, "grad_norm": 1.6612677574157715, "learning_rate": 1.2599681891660835e-05, "loss": 0.5872, "step": 16925 }, { "epoch": 0.43, "grad_norm": 1.6446994543075562, "learning_rate": 1.2598880282491979e-05, "loss": 0.4929, "step": 16926 }, { "epoch": 0.43, "grad_norm": 3.5121190547943115, "learning_rate": 1.2598078655413275e-05, "loss": 0.5524, "step": 16927 }, { "epoch": 0.43, "grad_norm": 2.6616880893707275, "learning_rate": 1.2597277010430244e-05, "loss": 0.4724, "step": 16928 }, { "epoch": 0.43, "grad_norm": 2.3456382751464844, "learning_rate": 1.2596475347548413e-05, "loss": 0.6452, "step": 16929 }, { "epoch": 0.43, "grad_norm": 1.6171536445617676, "learning_rate": 1.2595673666773302e-05, "loss": 0.5824, "step": 16930 }, { "epoch": 0.43, "grad_norm": 2.0155274868011475, "learning_rate": 1.2594871968110445e-05, "loss": 0.4787, "step": 16931 }, { "epoch": 0.43, "grad_norm": 1.6614525318145752, "learning_rate": 1.2594070251565353e-05, "loss": 0.5768, "step": 16932 }, { "epoch": 0.43, "grad_norm": 4.018322944641113, "learning_rate": 1.2593268517143561e-05, "loss": 0.7291, "step": 16933 }, { "epoch": 0.43, "grad_norm": 2.3708784580230713, "learning_rate": 1.2592466764850594e-05, "loss": 0.6236, "step": 16934 }, { "epoch": 0.43, "grad_norm": 2.1197094917297363, "learning_rate": 1.2591664994691974e-05, "loss": 0.4442, "step": 16935 }, { "epoch": 0.43, "grad_norm": 2.867345094680786, "learning_rate": 1.2590863206673229e-05, "loss": 0.5556, "step": 16936 }, { "epoch": 0.43, "grad_norm": 1.9518485069274902, "learning_rate": 1.259006140079988e-05, "loss": 0.6073, "step": 16937 }, { "epoch": 0.43, "grad_norm": 3.5841751098632812, "learning_rate": 1.2589259577077453e-05, "loss": 0.6621, "step": 16938 }, { "epoch": 0.43, "grad_norm": 1.911251425743103, "learning_rate": 1.2588457735511479e-05, "loss": 0.6508, "step": 16939 }, { "epoch": 0.43, "grad_norm": 1.7592829465866089, "learning_rate": 1.2587655876107479e-05, "loss": 0.576, "step": 16940 }, { "epoch": 0.43, "grad_norm": 1.6130332946777344, "learning_rate": 1.2586853998870982e-05, "loss": 0.5339, "step": 16941 }, { "epoch": 0.43, "grad_norm": 2.7347655296325684, "learning_rate": 1.2586052103807511e-05, "loss": 0.5645, "step": 16942 }, { "epoch": 0.43, "grad_norm": 2.4361579418182373, "learning_rate": 1.2585250190922596e-05, "loss": 0.5657, "step": 16943 }, { "epoch": 0.43, "grad_norm": 1.4406832456588745, "learning_rate": 1.2584448260221758e-05, "loss": 0.5777, "step": 16944 }, { "epoch": 0.43, "grad_norm": 1.3925386667251587, "learning_rate": 1.2583646311710527e-05, "loss": 0.338, "step": 16945 }, { "epoch": 0.43, "grad_norm": 1.7130004167556763, "learning_rate": 1.258284434539443e-05, "loss": 0.562, "step": 16946 }, { "epoch": 0.43, "grad_norm": 1.3347893953323364, "learning_rate": 1.258204236127899e-05, "loss": 0.6017, "step": 16947 }, { "epoch": 0.43, "grad_norm": 6.6754937171936035, "learning_rate": 1.258124035936974e-05, "loss": 0.836, "step": 16948 }, { "epoch": 0.43, "grad_norm": 1.3707276582717896, "learning_rate": 1.2580438339672199e-05, "loss": 0.4294, "step": 16949 }, { "epoch": 0.43, "grad_norm": 2.187824249267578, "learning_rate": 1.25796363021919e-05, "loss": 0.6683, "step": 16950 }, { "epoch": 0.43, "grad_norm": 3.1684038639068604, "learning_rate": 1.2578834246934368e-05, "loss": 0.5973, "step": 16951 }, { "epoch": 0.43, "grad_norm": 1.5846621990203857, "learning_rate": 1.257803217390513e-05, "loss": 0.6434, "step": 16952 }, { "epoch": 0.43, "grad_norm": 1.7417738437652588, "learning_rate": 1.2577230083109717e-05, "loss": 0.6069, "step": 16953 }, { "epoch": 0.43, "grad_norm": 1.7590968608856201, "learning_rate": 1.2576427974553652e-05, "loss": 0.7015, "step": 16954 }, { "epoch": 0.43, "grad_norm": 5.684542179107666, "learning_rate": 1.2575625848242463e-05, "loss": 0.8549, "step": 16955 }, { "epoch": 0.43, "grad_norm": 3.11379337310791, "learning_rate": 1.2574823704181677e-05, "loss": 0.5032, "step": 16956 }, { "epoch": 0.43, "grad_norm": 2.327634811401367, "learning_rate": 1.2574021542376827e-05, "loss": 0.4817, "step": 16957 }, { "epoch": 0.43, "grad_norm": 1.1637779474258423, "learning_rate": 1.2573219362833438e-05, "loss": 0.4364, "step": 16958 }, { "epoch": 0.43, "grad_norm": 1.4919898509979248, "learning_rate": 1.2572417165557033e-05, "loss": 0.6567, "step": 16959 }, { "epoch": 0.43, "grad_norm": 1.436472773551941, "learning_rate": 1.257161495055315e-05, "loss": 0.691, "step": 16960 }, { "epoch": 0.43, "grad_norm": 1.6297036409378052, "learning_rate": 1.257081271782731e-05, "loss": 0.6285, "step": 16961 }, { "epoch": 0.43, "grad_norm": 4.469921588897705, "learning_rate": 1.2570010467385046e-05, "loss": 0.5739, "step": 16962 }, { "epoch": 0.43, "grad_norm": 4.920122146606445, "learning_rate": 1.2569208199231883e-05, "loss": 0.4715, "step": 16963 }, { "epoch": 0.43, "grad_norm": 6.947304725646973, "learning_rate": 1.2568405913373352e-05, "loss": 0.6004, "step": 16964 }, { "epoch": 0.43, "grad_norm": 2.298595666885376, "learning_rate": 1.256760360981498e-05, "loss": 0.6546, "step": 16965 }, { "epoch": 0.43, "grad_norm": 3.3815269470214844, "learning_rate": 1.2566801288562298e-05, "loss": 0.5309, "step": 16966 }, { "epoch": 0.43, "grad_norm": 2.496915340423584, "learning_rate": 1.2565998949620832e-05, "loss": 0.5251, "step": 16967 }, { "epoch": 0.43, "grad_norm": 1.0623329877853394, "learning_rate": 1.2565196592996117e-05, "loss": 0.4389, "step": 16968 }, { "epoch": 0.43, "grad_norm": 7.080456256866455, "learning_rate": 1.2564394218693675e-05, "loss": 0.5591, "step": 16969 }, { "epoch": 0.43, "grad_norm": 3.1051976680755615, "learning_rate": 1.2563591826719042e-05, "loss": 0.5669, "step": 16970 }, { "epoch": 0.43, "grad_norm": 5.221709728240967, "learning_rate": 1.2562789417077744e-05, "loss": 0.5855, "step": 16971 }, { "epoch": 0.44, "grad_norm": 7.431614398956299, "learning_rate": 1.2561986989775312e-05, "loss": 0.6466, "step": 16972 }, { "epoch": 0.44, "grad_norm": 1.510407567024231, "learning_rate": 1.2561184544817272e-05, "loss": 0.5429, "step": 16973 }, { "epoch": 0.44, "grad_norm": 1.2710833549499512, "learning_rate": 1.2560382082209159e-05, "loss": 0.5946, "step": 16974 }, { "epoch": 0.44, "grad_norm": 1.459912657737732, "learning_rate": 1.2559579601956503e-05, "loss": 0.5485, "step": 16975 }, { "epoch": 0.44, "grad_norm": 1.848650574684143, "learning_rate": 1.2558777104064833e-05, "loss": 0.547, "step": 16976 }, { "epoch": 0.44, "grad_norm": 1.7041794061660767, "learning_rate": 1.2557974588539676e-05, "loss": 0.6238, "step": 16977 }, { "epoch": 0.44, "grad_norm": 2.051802396774292, "learning_rate": 1.2557172055386567e-05, "loss": 0.5109, "step": 16978 }, { "epoch": 0.44, "grad_norm": 1.2193453311920166, "learning_rate": 1.2556369504611033e-05, "loss": 0.4818, "step": 16979 }, { "epoch": 0.44, "grad_norm": 9.85212230682373, "learning_rate": 1.2555566936218609e-05, "loss": 0.7772, "step": 16980 }, { "epoch": 0.44, "grad_norm": 3.4872870445251465, "learning_rate": 1.255476435021482e-05, "loss": 0.7605, "step": 16981 }, { "epoch": 0.44, "grad_norm": 1.2060097455978394, "learning_rate": 1.2553961746605203e-05, "loss": 0.5143, "step": 16982 }, { "epoch": 0.44, "grad_norm": 2.0768673419952393, "learning_rate": 1.2553159125395284e-05, "loss": 0.5619, "step": 16983 }, { "epoch": 0.44, "grad_norm": 3.0452563762664795, "learning_rate": 1.25523564865906e-05, "loss": 0.5514, "step": 16984 }, { "epoch": 0.44, "grad_norm": 3.819286346435547, "learning_rate": 1.2551553830196675e-05, "loss": 0.5082, "step": 16985 }, { "epoch": 0.44, "grad_norm": 1.3234705924987793, "learning_rate": 1.2550751156219043e-05, "loss": 0.5512, "step": 16986 }, { "epoch": 0.44, "grad_norm": 6.8269524574279785, "learning_rate": 1.2549948464663242e-05, "loss": 0.5686, "step": 16987 }, { "epoch": 0.44, "grad_norm": 1.228574514389038, "learning_rate": 1.2549145755534793e-05, "loss": 0.4823, "step": 16988 }, { "epoch": 0.44, "grad_norm": 2.4919023513793945, "learning_rate": 1.2548343028839236e-05, "loss": 0.571, "step": 16989 }, { "epoch": 0.44, "grad_norm": 1.9127224683761597, "learning_rate": 1.2547540284582099e-05, "loss": 0.5614, "step": 16990 }, { "epoch": 0.44, "grad_norm": 1.3425623178482056, "learning_rate": 1.2546737522768912e-05, "loss": 0.5877, "step": 16991 }, { "epoch": 0.44, "grad_norm": 3.1033172607421875, "learning_rate": 1.2545934743405215e-05, "loss": 0.6153, "step": 16992 }, { "epoch": 0.44, "grad_norm": 3.4793894290924072, "learning_rate": 1.254513194649653e-05, "loss": 0.7055, "step": 16993 }, { "epoch": 0.44, "grad_norm": 3.0004665851593018, "learning_rate": 1.25443291320484e-05, "loss": 0.592, "step": 16994 }, { "epoch": 0.44, "grad_norm": 1.409295916557312, "learning_rate": 1.2543526300066348e-05, "loss": 0.4827, "step": 16995 }, { "epoch": 0.44, "grad_norm": 3.1297531127929688, "learning_rate": 1.2542723450555916e-05, "loss": 0.6906, "step": 16996 }, { "epoch": 0.44, "grad_norm": 2.6822471618652344, "learning_rate": 1.2541920583522626e-05, "loss": 0.6283, "step": 16997 }, { "epoch": 0.44, "grad_norm": 2.2887723445892334, "learning_rate": 1.2541117698972019e-05, "loss": 0.5496, "step": 16998 }, { "epoch": 0.44, "grad_norm": 2.0641350746154785, "learning_rate": 1.2540314796909624e-05, "loss": 0.5451, "step": 16999 }, { "epoch": 0.44, "grad_norm": 1.3353568315505981, "learning_rate": 1.2539511877340978e-05, "loss": 0.5042, "step": 17000 }, { "epoch": 0.44, "grad_norm": 1.1932215690612793, "learning_rate": 1.253870894027161e-05, "loss": 0.5398, "step": 17001 }, { "epoch": 0.44, "grad_norm": 3.8574471473693848, "learning_rate": 1.2537905985707055e-05, "loss": 0.6194, "step": 17002 }, { "epoch": 0.44, "grad_norm": 3.7676329612731934, "learning_rate": 1.2537103013652845e-05, "loss": 0.5464, "step": 17003 }, { "epoch": 0.44, "grad_norm": 1.829761266708374, "learning_rate": 1.2536300024114515e-05, "loss": 0.4871, "step": 17004 }, { "epoch": 0.44, "grad_norm": 3.9399001598358154, "learning_rate": 1.25354970170976e-05, "loss": 0.5915, "step": 17005 }, { "epoch": 0.44, "grad_norm": 1.2512542009353638, "learning_rate": 1.2534693992607632e-05, "loss": 0.4416, "step": 17006 }, { "epoch": 0.44, "grad_norm": 2.5075297355651855, "learning_rate": 1.2533890950650147e-05, "loss": 0.7088, "step": 17007 }, { "epoch": 0.44, "grad_norm": 5.667497634887695, "learning_rate": 1.2533087891230676e-05, "loss": 0.545, "step": 17008 }, { "epoch": 0.44, "grad_norm": 1.4166860580444336, "learning_rate": 1.2532284814354756e-05, "loss": 0.6865, "step": 17009 }, { "epoch": 0.44, "grad_norm": 1.3035006523132324, "learning_rate": 1.2531481720027916e-05, "loss": 0.5285, "step": 17010 }, { "epoch": 0.44, "grad_norm": 3.0239734649658203, "learning_rate": 1.25306786082557e-05, "loss": 0.4913, "step": 17011 }, { "epoch": 0.44, "grad_norm": 3.292062759399414, "learning_rate": 1.2529875479043634e-05, "loss": 0.7337, "step": 17012 }, { "epoch": 0.44, "grad_norm": 3.2395026683807373, "learning_rate": 1.2529072332397253e-05, "loss": 0.667, "step": 17013 }, { "epoch": 0.44, "grad_norm": 2.3159005641937256, "learning_rate": 1.25282691683221e-05, "loss": 0.5606, "step": 17014 }, { "epoch": 0.44, "grad_norm": 2.134939193725586, "learning_rate": 1.25274659868237e-05, "loss": 0.4933, "step": 17015 }, { "epoch": 0.44, "grad_norm": 1.7097359895706177, "learning_rate": 1.2526662787907595e-05, "loss": 0.4429, "step": 17016 }, { "epoch": 0.44, "grad_norm": 1.239081859588623, "learning_rate": 1.2525859571579314e-05, "loss": 0.585, "step": 17017 }, { "epoch": 0.44, "grad_norm": 1.3341996669769287, "learning_rate": 1.25250563378444e-05, "loss": 0.4638, "step": 17018 }, { "epoch": 0.44, "grad_norm": 0.994642972946167, "learning_rate": 1.2524253086708382e-05, "loss": 0.5786, "step": 17019 }, { "epoch": 0.44, "grad_norm": 1.570459246635437, "learning_rate": 1.2523449818176796e-05, "loss": 0.5115, "step": 17020 }, { "epoch": 0.44, "grad_norm": 2.615109920501709, "learning_rate": 1.2522646532255182e-05, "loss": 0.6565, "step": 17021 }, { "epoch": 0.44, "grad_norm": 3.097867012023926, "learning_rate": 1.252184322894907e-05, "loss": 0.6026, "step": 17022 }, { "epoch": 0.44, "grad_norm": 2.6490743160247803, "learning_rate": 1.2521039908264006e-05, "loss": 0.5515, "step": 17023 }, { "epoch": 0.44, "grad_norm": 2.424243211746216, "learning_rate": 1.252023657020551e-05, "loss": 0.7303, "step": 17024 }, { "epoch": 0.44, "grad_norm": 2.2660322189331055, "learning_rate": 1.251943321477913e-05, "loss": 0.6601, "step": 17025 }, { "epoch": 0.44, "grad_norm": 1.375630497932434, "learning_rate": 1.2518629841990401e-05, "loss": 0.6995, "step": 17026 }, { "epoch": 0.44, "grad_norm": 4.0981669425964355, "learning_rate": 1.2517826451844857e-05, "loss": 0.5955, "step": 17027 }, { "epoch": 0.44, "grad_norm": 1.3797786235809326, "learning_rate": 1.2517023044348036e-05, "loss": 0.5563, "step": 17028 }, { "epoch": 0.44, "grad_norm": 3.137708902359009, "learning_rate": 1.251621961950547e-05, "loss": 0.5409, "step": 17029 }, { "epoch": 0.44, "grad_norm": 1.3303987979888916, "learning_rate": 1.25154161773227e-05, "loss": 0.4907, "step": 17030 }, { "epoch": 0.44, "grad_norm": 1.6570167541503906, "learning_rate": 1.2514612717805264e-05, "loss": 0.768, "step": 17031 }, { "epoch": 0.44, "grad_norm": 2.2012572288513184, "learning_rate": 1.2513809240958695e-05, "loss": 0.6593, "step": 17032 }, { "epoch": 0.44, "grad_norm": 1.3534055948257446, "learning_rate": 1.2513005746788533e-05, "loss": 0.6625, "step": 17033 }, { "epoch": 0.44, "grad_norm": 1.4832898378372192, "learning_rate": 1.2512202235300316e-05, "loss": 0.7481, "step": 17034 }, { "epoch": 0.44, "grad_norm": 1.0747456550598145, "learning_rate": 1.2511398706499576e-05, "loss": 0.5645, "step": 17035 }, { "epoch": 0.44, "grad_norm": 1.5034658908843994, "learning_rate": 1.2510595160391856e-05, "loss": 0.6271, "step": 17036 }, { "epoch": 0.44, "grad_norm": 1.1189558506011963, "learning_rate": 1.250979159698269e-05, "loss": 0.5295, "step": 17037 }, { "epoch": 0.44, "grad_norm": 3.644467830657959, "learning_rate": 1.2508988016277619e-05, "loss": 0.8406, "step": 17038 }, { "epoch": 0.44, "grad_norm": 4.039090633392334, "learning_rate": 1.2508184418282179e-05, "loss": 0.6595, "step": 17039 }, { "epoch": 0.44, "grad_norm": 3.200775384902954, "learning_rate": 1.2507380803001906e-05, "loss": 0.5624, "step": 17040 }, { "epoch": 0.44, "grad_norm": 1.3429484367370605, "learning_rate": 1.2506577170442342e-05, "loss": 0.5885, "step": 17041 }, { "epoch": 0.44, "grad_norm": 1.9052962064743042, "learning_rate": 1.2505773520609022e-05, "loss": 0.3831, "step": 17042 }, { "epoch": 0.44, "grad_norm": 1.9624770879745483, "learning_rate": 1.2504969853507484e-05, "loss": 0.6576, "step": 17043 }, { "epoch": 0.44, "grad_norm": 4.153110027313232, "learning_rate": 1.2504166169143269e-05, "loss": 0.4457, "step": 17044 }, { "epoch": 0.44, "grad_norm": 5.2695231437683105, "learning_rate": 1.2503362467521913e-05, "loss": 0.6597, "step": 17045 }, { "epoch": 0.44, "grad_norm": 1.622315764427185, "learning_rate": 1.2502558748648957e-05, "loss": 0.5338, "step": 17046 }, { "epoch": 0.44, "grad_norm": 1.6338237524032593, "learning_rate": 1.2501755012529934e-05, "loss": 0.4836, "step": 17047 }, { "epoch": 0.44, "grad_norm": 2.2220194339752197, "learning_rate": 1.2500951259170393e-05, "loss": 0.6531, "step": 17048 }, { "epoch": 0.44, "grad_norm": 1.4692445993423462, "learning_rate": 1.2500147488575864e-05, "loss": 0.5159, "step": 17049 }, { "epoch": 0.44, "grad_norm": 2.689429759979248, "learning_rate": 1.2499343700751892e-05, "loss": 0.6701, "step": 17050 }, { "epoch": 0.44, "grad_norm": 2.9743144512176514, "learning_rate": 1.249853989570401e-05, "loss": 0.5977, "step": 17051 }, { "epoch": 0.44, "grad_norm": 1.9729496240615845, "learning_rate": 1.2497736073437762e-05, "loss": 0.7427, "step": 17052 }, { "epoch": 0.44, "grad_norm": 4.440124988555908, "learning_rate": 1.2496932233958687e-05, "loss": 0.5849, "step": 17053 }, { "epoch": 0.44, "grad_norm": 1.0852550268173218, "learning_rate": 1.2496128377272322e-05, "loss": 0.4799, "step": 17054 }, { "epoch": 0.44, "grad_norm": 4.130334854125977, "learning_rate": 1.2495324503384213e-05, "loss": 0.5739, "step": 17055 }, { "epoch": 0.44, "grad_norm": 1.4647928476333618, "learning_rate": 1.2494520612299889e-05, "loss": 0.6232, "step": 17056 }, { "epoch": 0.44, "grad_norm": 1.1656467914581299, "learning_rate": 1.2493716704024901e-05, "loss": 0.534, "step": 17057 }, { "epoch": 0.44, "grad_norm": 6.455867767333984, "learning_rate": 1.249291277856478e-05, "loss": 0.5589, "step": 17058 }, { "epoch": 0.44, "grad_norm": 3.2179512977600098, "learning_rate": 1.2492108835925075e-05, "loss": 0.6344, "step": 17059 }, { "epoch": 0.44, "grad_norm": 1.4244471788406372, "learning_rate": 1.2491304876111318e-05, "loss": 0.6267, "step": 17060 }, { "epoch": 0.44, "grad_norm": 1.4487160444259644, "learning_rate": 1.2490500899129055e-05, "loss": 0.48, "step": 17061 }, { "epoch": 0.44, "grad_norm": 2.002260208129883, "learning_rate": 1.2489696904983824e-05, "loss": 0.634, "step": 17062 }, { "epoch": 0.44, "grad_norm": 4.761775970458984, "learning_rate": 1.2488892893681164e-05, "loss": 0.4762, "step": 17063 }, { "epoch": 0.44, "grad_norm": 2.8423995971679688, "learning_rate": 1.2488088865226622e-05, "loss": 0.5493, "step": 17064 }, { "epoch": 0.44, "grad_norm": 7.838754653930664, "learning_rate": 1.2487284819625732e-05, "loss": 0.6485, "step": 17065 }, { "epoch": 0.44, "grad_norm": 1.8321188688278198, "learning_rate": 1.2486480756884035e-05, "loss": 0.7151, "step": 17066 }, { "epoch": 0.44, "grad_norm": 8.748587608337402, "learning_rate": 1.2485676677007081e-05, "loss": 0.5781, "step": 17067 }, { "epoch": 0.44, "grad_norm": 2.5084149837493896, "learning_rate": 1.24848725800004e-05, "loss": 0.5713, "step": 17068 }, { "epoch": 0.44, "grad_norm": 1.373009204864502, "learning_rate": 1.2484068465869541e-05, "loss": 0.5673, "step": 17069 }, { "epoch": 0.44, "grad_norm": 1.4892618656158447, "learning_rate": 1.2483264334620042e-05, "loss": 0.4572, "step": 17070 }, { "epoch": 0.44, "grad_norm": 2.727383613586426, "learning_rate": 1.2482460186257446e-05, "loss": 0.7031, "step": 17071 }, { "epoch": 0.44, "grad_norm": 4.169290542602539, "learning_rate": 1.2481656020787294e-05, "loss": 0.8518, "step": 17072 }, { "epoch": 0.44, "grad_norm": 1.511796236038208, "learning_rate": 1.2480851838215125e-05, "loss": 0.4975, "step": 17073 }, { "epoch": 0.44, "grad_norm": 1.3225387334823608, "learning_rate": 1.2480047638546488e-05, "loss": 0.5638, "step": 17074 }, { "epoch": 0.44, "grad_norm": 2.553807020187378, "learning_rate": 1.2479243421786917e-05, "loss": 0.6247, "step": 17075 }, { "epoch": 0.44, "grad_norm": 1.1721748113632202, "learning_rate": 1.247843918794196e-05, "loss": 0.5954, "step": 17076 }, { "epoch": 0.44, "grad_norm": 5.058193683624268, "learning_rate": 1.2477634937017157e-05, "loss": 0.8005, "step": 17077 }, { "epoch": 0.44, "grad_norm": 1.72437584400177, "learning_rate": 1.247683066901805e-05, "loss": 0.6055, "step": 17078 }, { "epoch": 0.44, "grad_norm": 2.5522620677948, "learning_rate": 1.2476026383950184e-05, "loss": 0.4783, "step": 17079 }, { "epoch": 0.44, "grad_norm": 8.468464851379395, "learning_rate": 1.2475222081819096e-05, "loss": 0.6837, "step": 17080 }, { "epoch": 0.44, "grad_norm": 2.838604211807251, "learning_rate": 1.2474417762630338e-05, "loss": 0.5667, "step": 17081 }, { "epoch": 0.44, "grad_norm": 2.0088019371032715, "learning_rate": 1.2473613426389444e-05, "loss": 0.6955, "step": 17082 }, { "epoch": 0.44, "grad_norm": 9.0936279296875, "learning_rate": 1.2472809073101959e-05, "loss": 0.5613, "step": 17083 }, { "epoch": 0.44, "grad_norm": 1.0885648727416992, "learning_rate": 1.247200470277343e-05, "loss": 0.4496, "step": 17084 }, { "epoch": 0.44, "grad_norm": 1.3330097198486328, "learning_rate": 1.2471200315409394e-05, "loss": 0.6364, "step": 17085 }, { "epoch": 0.44, "grad_norm": 1.3234734535217285, "learning_rate": 1.2470395911015403e-05, "loss": 0.5328, "step": 17086 }, { "epoch": 0.44, "grad_norm": 3.335832357406616, "learning_rate": 1.2469591489596992e-05, "loss": 0.882, "step": 17087 }, { "epoch": 0.44, "grad_norm": 2.512791156768799, "learning_rate": 1.246878705115971e-05, "loss": 0.5785, "step": 17088 }, { "epoch": 0.44, "grad_norm": 1.3443853855133057, "learning_rate": 1.2467982595709099e-05, "loss": 0.6974, "step": 17089 }, { "epoch": 0.44, "grad_norm": 3.7757160663604736, "learning_rate": 1.2467178123250698e-05, "loss": 0.6275, "step": 17090 }, { "epoch": 0.44, "grad_norm": 1.1799715757369995, "learning_rate": 1.246637363379006e-05, "loss": 0.3972, "step": 17091 }, { "epoch": 0.44, "grad_norm": 1.6281630992889404, "learning_rate": 1.2465569127332725e-05, "loss": 0.5294, "step": 17092 }, { "epoch": 0.44, "grad_norm": 2.078270196914673, "learning_rate": 1.2464764603884235e-05, "loss": 0.5083, "step": 17093 }, { "epoch": 0.44, "grad_norm": 1.999647855758667, "learning_rate": 1.2463960063450134e-05, "loss": 0.5467, "step": 17094 }, { "epoch": 0.44, "grad_norm": 1.20880925655365, "learning_rate": 1.2463155506035968e-05, "loss": 0.6655, "step": 17095 }, { "epoch": 0.44, "grad_norm": 2.961014747619629, "learning_rate": 1.2462350931647288e-05, "loss": 0.8215, "step": 17096 }, { "epoch": 0.44, "grad_norm": 0.9853777885437012, "learning_rate": 1.2461546340289625e-05, "loss": 0.5429, "step": 17097 }, { "epoch": 0.44, "grad_norm": 3.608210563659668, "learning_rate": 1.2460741731968535e-05, "loss": 0.5336, "step": 17098 }, { "epoch": 0.44, "grad_norm": 6.054318904876709, "learning_rate": 1.2459937106689556e-05, "loss": 0.5235, "step": 17099 }, { "epoch": 0.44, "grad_norm": 1.5330265760421753, "learning_rate": 1.245913246445824e-05, "loss": 0.634, "step": 17100 }, { "epoch": 0.44, "grad_norm": 9.437564849853516, "learning_rate": 1.2458327805280126e-05, "loss": 0.699, "step": 17101 }, { "epoch": 0.44, "grad_norm": 1.1769660711288452, "learning_rate": 1.245752312916076e-05, "loss": 0.5882, "step": 17102 }, { "epoch": 0.44, "grad_norm": 1.3281477689743042, "learning_rate": 1.2456718436105692e-05, "loss": 0.4705, "step": 17103 }, { "epoch": 0.44, "grad_norm": 2.089891195297241, "learning_rate": 1.245591372612046e-05, "loss": 0.671, "step": 17104 }, { "epoch": 0.44, "grad_norm": 2.911588430404663, "learning_rate": 1.2455108999210616e-05, "loss": 0.5858, "step": 17105 }, { "epoch": 0.44, "grad_norm": 1.2799367904663086, "learning_rate": 1.2454304255381704e-05, "loss": 0.565, "step": 17106 }, { "epoch": 0.44, "grad_norm": 4.684200763702393, "learning_rate": 1.2453499494639265e-05, "loss": 0.4438, "step": 17107 }, { "epoch": 0.44, "grad_norm": 4.230319499969482, "learning_rate": 1.2452694716988853e-05, "loss": 0.6611, "step": 17108 }, { "epoch": 0.44, "grad_norm": 2.496291160583496, "learning_rate": 1.2451889922436008e-05, "loss": 0.6408, "step": 17109 }, { "epoch": 0.44, "grad_norm": 3.1633501052856445, "learning_rate": 1.245108511098628e-05, "loss": 0.4301, "step": 17110 }, { "epoch": 0.44, "grad_norm": 1.4206982851028442, "learning_rate": 1.2450280282645212e-05, "loss": 0.463, "step": 17111 }, { "epoch": 0.44, "grad_norm": 1.9241889715194702, "learning_rate": 1.2449475437418351e-05, "loss": 0.5859, "step": 17112 }, { "epoch": 0.44, "grad_norm": 2.015817880630493, "learning_rate": 1.2448670575311245e-05, "loss": 0.539, "step": 17113 }, { "epoch": 0.44, "grad_norm": 1.9734275341033936, "learning_rate": 1.2447865696329439e-05, "loss": 0.5213, "step": 17114 }, { "epoch": 0.44, "grad_norm": 1.0787086486816406, "learning_rate": 1.2447060800478484e-05, "loss": 0.5693, "step": 17115 }, { "epoch": 0.44, "grad_norm": 1.602540373802185, "learning_rate": 1.2446255887763919e-05, "loss": 0.5038, "step": 17116 }, { "epoch": 0.44, "grad_norm": 2.965991973876953, "learning_rate": 1.2445450958191296e-05, "loss": 0.5653, "step": 17117 }, { "epoch": 0.44, "grad_norm": 1.6995164155960083, "learning_rate": 1.2444646011766164e-05, "loss": 0.585, "step": 17118 }, { "epoch": 0.44, "grad_norm": 2.6391444206237793, "learning_rate": 1.2443841048494062e-05, "loss": 0.5341, "step": 17119 }, { "epoch": 0.44, "grad_norm": 2.164649486541748, "learning_rate": 1.2443036068380549e-05, "loss": 0.7785, "step": 17120 }, { "epoch": 0.44, "grad_norm": 1.3811848163604736, "learning_rate": 1.2442231071431163e-05, "loss": 0.3594, "step": 17121 }, { "epoch": 0.44, "grad_norm": 2.1381771564483643, "learning_rate": 1.2441426057651458e-05, "loss": 0.5511, "step": 17122 }, { "epoch": 0.44, "grad_norm": 2.199212074279785, "learning_rate": 1.2440621027046977e-05, "loss": 0.6333, "step": 17123 }, { "epoch": 0.44, "grad_norm": 2.036884069442749, "learning_rate": 1.2439815979623267e-05, "loss": 0.4412, "step": 17124 }, { "epoch": 0.44, "grad_norm": 2.6791322231292725, "learning_rate": 1.243901091538588e-05, "loss": 0.6915, "step": 17125 }, { "epoch": 0.44, "grad_norm": 3.150313377380371, "learning_rate": 1.2438205834340363e-05, "loss": 0.667, "step": 17126 }, { "epoch": 0.44, "grad_norm": 1.2299962043762207, "learning_rate": 1.243740073649226e-05, "loss": 0.5962, "step": 17127 }, { "epoch": 0.44, "grad_norm": 7.081182956695557, "learning_rate": 1.2436595621847126e-05, "loss": 0.4384, "step": 17128 }, { "epoch": 0.44, "grad_norm": 1.1427522897720337, "learning_rate": 1.2435790490410502e-05, "loss": 0.5174, "step": 17129 }, { "epoch": 0.44, "grad_norm": 3.0597195625305176, "learning_rate": 1.2434985342187944e-05, "loss": 0.6706, "step": 17130 }, { "epoch": 0.44, "grad_norm": 1.238927960395813, "learning_rate": 1.2434180177184995e-05, "loss": 0.4553, "step": 17131 }, { "epoch": 0.44, "grad_norm": 1.8646714687347412, "learning_rate": 1.2433374995407208e-05, "loss": 0.5799, "step": 17132 }, { "epoch": 0.44, "grad_norm": 2.7131166458129883, "learning_rate": 1.2432569796860125e-05, "loss": 0.6831, "step": 17133 }, { "epoch": 0.44, "grad_norm": 2.4342360496520996, "learning_rate": 1.2431764581549301e-05, "loss": 0.5534, "step": 17134 }, { "epoch": 0.44, "grad_norm": 1.0425653457641602, "learning_rate": 1.2430959349480285e-05, "loss": 0.7085, "step": 17135 }, { "epoch": 0.44, "grad_norm": 1.5714956521987915, "learning_rate": 1.2430154100658623e-05, "loss": 0.64, "step": 17136 }, { "epoch": 0.44, "grad_norm": 1.4580159187316895, "learning_rate": 1.2429348835089865e-05, "loss": 0.728, "step": 17137 }, { "epoch": 0.44, "grad_norm": 1.8937346935272217, "learning_rate": 1.2428543552779562e-05, "loss": 0.6334, "step": 17138 }, { "epoch": 0.44, "grad_norm": 6.770019054412842, "learning_rate": 1.2427738253733261e-05, "loss": 0.7212, "step": 17139 }, { "epoch": 0.44, "grad_norm": 2.2715601921081543, "learning_rate": 1.2426932937956515e-05, "loss": 0.6849, "step": 17140 }, { "epoch": 0.44, "grad_norm": 2.2100892066955566, "learning_rate": 1.2426127605454872e-05, "loss": 0.4879, "step": 17141 }, { "epoch": 0.44, "grad_norm": 1.9849658012390137, "learning_rate": 1.2425322256233883e-05, "loss": 0.5092, "step": 17142 }, { "epoch": 0.44, "grad_norm": 3.0329110622406006, "learning_rate": 1.2424516890299094e-05, "loss": 0.5717, "step": 17143 }, { "epoch": 0.44, "grad_norm": 5.630963325500488, "learning_rate": 1.2423711507656057e-05, "loss": 0.4233, "step": 17144 }, { "epoch": 0.44, "grad_norm": 3.9969899654388428, "learning_rate": 1.2422906108310326e-05, "loss": 0.4657, "step": 17145 }, { "epoch": 0.44, "grad_norm": 1.8602664470672607, "learning_rate": 1.2422100692267447e-05, "loss": 0.5928, "step": 17146 }, { "epoch": 0.44, "grad_norm": 1.5095875263214111, "learning_rate": 1.2421295259532972e-05, "loss": 0.7038, "step": 17147 }, { "epoch": 0.44, "grad_norm": 2.0634818077087402, "learning_rate": 1.2420489810112452e-05, "loss": 0.6437, "step": 17148 }, { "epoch": 0.44, "grad_norm": 1.445171594619751, "learning_rate": 1.2419684344011437e-05, "loss": 0.4452, "step": 17149 }, { "epoch": 0.44, "grad_norm": 2.7450554370880127, "learning_rate": 1.2418878861235478e-05, "loss": 0.513, "step": 17150 }, { "epoch": 0.44, "grad_norm": 3.8098738193511963, "learning_rate": 1.2418073361790122e-05, "loss": 0.4262, "step": 17151 }, { "epoch": 0.44, "grad_norm": 2.9910645484924316, "learning_rate": 1.2417267845680927e-05, "loss": 0.6138, "step": 17152 }, { "epoch": 0.44, "grad_norm": 3.849247932434082, "learning_rate": 1.241646231291344e-05, "loss": 0.6282, "step": 17153 }, { "epoch": 0.44, "grad_norm": 7.629355430603027, "learning_rate": 1.2415656763493213e-05, "loss": 0.6926, "step": 17154 }, { "epoch": 0.44, "grad_norm": 3.1975209712982178, "learning_rate": 1.2414851197425797e-05, "loss": 0.7099, "step": 17155 }, { "epoch": 0.44, "grad_norm": 1.9986637830734253, "learning_rate": 1.2414045614716741e-05, "loss": 0.6339, "step": 17156 }, { "epoch": 0.44, "grad_norm": 3.127572536468506, "learning_rate": 1.2413240015371604e-05, "loss": 0.6405, "step": 17157 }, { "epoch": 0.44, "grad_norm": 1.2392301559448242, "learning_rate": 1.241243439939593e-05, "loss": 0.4904, "step": 17158 }, { "epoch": 0.44, "grad_norm": 3.0797250270843506, "learning_rate": 1.2411628766795276e-05, "loss": 0.7444, "step": 17159 }, { "epoch": 0.44, "grad_norm": 2.5312936305999756, "learning_rate": 1.2410823117575191e-05, "loss": 0.554, "step": 17160 }, { "epoch": 0.44, "grad_norm": 1.679309606552124, "learning_rate": 1.2410017451741227e-05, "loss": 0.4847, "step": 17161 }, { "epoch": 0.44, "grad_norm": 2.0354673862457275, "learning_rate": 1.2409211769298934e-05, "loss": 0.5593, "step": 17162 }, { "epoch": 0.44, "grad_norm": 4.040026664733887, "learning_rate": 1.2408406070253871e-05, "loss": 0.5758, "step": 17163 }, { "epoch": 0.44, "grad_norm": 2.2095155715942383, "learning_rate": 1.2407600354611586e-05, "loss": 0.6783, "step": 17164 }, { "epoch": 0.44, "grad_norm": 1.3369163274765015, "learning_rate": 1.240679462237763e-05, "loss": 0.657, "step": 17165 }, { "epoch": 0.44, "grad_norm": 5.302651882171631, "learning_rate": 1.240598887355756e-05, "loss": 0.6796, "step": 17166 }, { "epoch": 0.44, "grad_norm": 1.494361400604248, "learning_rate": 1.2405183108156923e-05, "loss": 0.5928, "step": 17167 }, { "epoch": 0.44, "grad_norm": 1.791129231452942, "learning_rate": 1.2404377326181278e-05, "loss": 0.6202, "step": 17168 }, { "epoch": 0.44, "grad_norm": 2.986745834350586, "learning_rate": 1.2403571527636176e-05, "loss": 0.5987, "step": 17169 }, { "epoch": 0.44, "grad_norm": 1.6093074083328247, "learning_rate": 1.2402765712527166e-05, "loss": 0.5752, "step": 17170 }, { "epoch": 0.44, "grad_norm": 2.099299192428589, "learning_rate": 1.2401959880859808e-05, "loss": 0.6421, "step": 17171 }, { "epoch": 0.44, "grad_norm": 1.5342737436294556, "learning_rate": 1.2401154032639649e-05, "loss": 0.5354, "step": 17172 }, { "epoch": 0.44, "grad_norm": 1.8559824228286743, "learning_rate": 1.2400348167872245e-05, "loss": 0.4916, "step": 17173 }, { "epoch": 0.44, "grad_norm": 5.125316619873047, "learning_rate": 1.2399542286563149e-05, "loss": 0.3985, "step": 17174 }, { "epoch": 0.44, "grad_norm": 2.114711284637451, "learning_rate": 1.2398736388717916e-05, "loss": 0.8133, "step": 17175 }, { "epoch": 0.44, "grad_norm": 2.0595808029174805, "learning_rate": 1.2397930474342099e-05, "loss": 0.5945, "step": 17176 }, { "epoch": 0.44, "grad_norm": 1.7552160024642944, "learning_rate": 1.2397124543441251e-05, "loss": 0.5433, "step": 17177 }, { "epoch": 0.44, "grad_norm": 1.182126522064209, "learning_rate": 1.2396318596020928e-05, "loss": 0.5901, "step": 17178 }, { "epoch": 0.44, "grad_norm": 3.184168815612793, "learning_rate": 1.2395512632086681e-05, "loss": 0.8252, "step": 17179 }, { "epoch": 0.44, "grad_norm": 2.018235445022583, "learning_rate": 1.2394706651644066e-05, "loss": 0.6176, "step": 17180 }, { "epoch": 0.44, "grad_norm": 0.9693441390991211, "learning_rate": 1.2393900654698638e-05, "loss": 0.5051, "step": 17181 }, { "epoch": 0.44, "grad_norm": 0.89347904920578, "learning_rate": 1.2393094641255949e-05, "loss": 0.4698, "step": 17182 }, { "epoch": 0.44, "grad_norm": 1.545379400253296, "learning_rate": 1.239228861132156e-05, "loss": 0.4885, "step": 17183 }, { "epoch": 0.44, "grad_norm": 0.9378412365913391, "learning_rate": 1.2391482564901017e-05, "loss": 0.487, "step": 17184 }, { "epoch": 0.44, "grad_norm": 1.399121642112732, "learning_rate": 1.2390676501999881e-05, "loss": 0.5553, "step": 17185 }, { "epoch": 0.44, "grad_norm": 5.546231746673584, "learning_rate": 1.2389870422623702e-05, "loss": 0.9452, "step": 17186 }, { "epoch": 0.44, "grad_norm": 6.768205642700195, "learning_rate": 1.2389064326778038e-05, "loss": 0.4703, "step": 17187 }, { "epoch": 0.44, "grad_norm": 3.2524547576904297, "learning_rate": 1.2388258214468446e-05, "loss": 0.7262, "step": 17188 }, { "epoch": 0.44, "grad_norm": 1.698908805847168, "learning_rate": 1.2387452085700473e-05, "loss": 0.5923, "step": 17189 }, { "epoch": 0.44, "grad_norm": 15.361283302307129, "learning_rate": 1.2386645940479686e-05, "loss": 0.619, "step": 17190 }, { "epoch": 0.44, "grad_norm": 2.1123645305633545, "learning_rate": 1.2385839778811632e-05, "loss": 0.6345, "step": 17191 }, { "epoch": 0.44, "grad_norm": 4.23773193359375, "learning_rate": 1.2385033600701871e-05, "loss": 0.5596, "step": 17192 }, { "epoch": 0.44, "grad_norm": 7.156328201293945, "learning_rate": 1.2384227406155955e-05, "loss": 0.7248, "step": 17193 }, { "epoch": 0.44, "grad_norm": 2.283944606781006, "learning_rate": 1.2383421195179442e-05, "loss": 0.6618, "step": 17194 }, { "epoch": 0.44, "grad_norm": 2.8293299674987793, "learning_rate": 1.238261496777789e-05, "loss": 0.7152, "step": 17195 }, { "epoch": 0.44, "grad_norm": 1.2173986434936523, "learning_rate": 1.2381808723956847e-05, "loss": 0.2412, "step": 17196 }, { "epoch": 0.44, "grad_norm": 1.1173648834228516, "learning_rate": 1.2381002463721878e-05, "loss": 0.5717, "step": 17197 }, { "epoch": 0.44, "grad_norm": 1.3695176839828491, "learning_rate": 1.2380196187078537e-05, "loss": 0.483, "step": 17198 }, { "epoch": 0.44, "grad_norm": 6.928812503814697, "learning_rate": 1.2379389894032378e-05, "loss": 0.7414, "step": 17199 }, { "epoch": 0.44, "grad_norm": 2.044816017150879, "learning_rate": 1.237858358458896e-05, "loss": 0.6331, "step": 17200 }, { "epoch": 0.44, "grad_norm": 1.6645445823669434, "learning_rate": 1.2377777258753838e-05, "loss": 0.6727, "step": 17201 }, { "epoch": 0.44, "grad_norm": 7.434718132019043, "learning_rate": 1.237697091653257e-05, "loss": 0.5359, "step": 17202 }, { "epoch": 0.44, "grad_norm": 2.6878106594085693, "learning_rate": 1.2376164557930707e-05, "loss": 0.5179, "step": 17203 }, { "epoch": 0.44, "grad_norm": 1.3107717037200928, "learning_rate": 1.2375358182953814e-05, "loss": 0.5163, "step": 17204 }, { "epoch": 0.44, "grad_norm": 1.3333274126052856, "learning_rate": 1.2374551791607446e-05, "loss": 0.4576, "step": 17205 }, { "epoch": 0.44, "grad_norm": 1.3374027013778687, "learning_rate": 1.2373745383897156e-05, "loss": 0.5948, "step": 17206 }, { "epoch": 0.44, "grad_norm": 1.4724680185317993, "learning_rate": 1.2372938959828507e-05, "loss": 0.5453, "step": 17207 }, { "epoch": 0.44, "grad_norm": 1.3640528917312622, "learning_rate": 1.2372132519407051e-05, "loss": 0.6365, "step": 17208 }, { "epoch": 0.44, "grad_norm": 1.5874013900756836, "learning_rate": 1.237132606263835e-05, "loss": 0.5822, "step": 17209 }, { "epoch": 0.44, "grad_norm": 1.4711711406707764, "learning_rate": 1.2370519589527958e-05, "loss": 0.4734, "step": 17210 }, { "epoch": 0.44, "grad_norm": 1.1239155530929565, "learning_rate": 1.2369713100081435e-05, "loss": 0.5723, "step": 17211 }, { "epoch": 0.44, "grad_norm": 4.6323041915893555, "learning_rate": 1.236890659430434e-05, "loss": 0.6517, "step": 17212 }, { "epoch": 0.44, "grad_norm": 1.3167839050292969, "learning_rate": 1.2368100072202225e-05, "loss": 0.5066, "step": 17213 }, { "epoch": 0.44, "grad_norm": 1.3656408786773682, "learning_rate": 1.2367293533780655e-05, "loss": 0.7034, "step": 17214 }, { "epoch": 0.44, "grad_norm": 2.6143198013305664, "learning_rate": 1.2366486979045185e-05, "loss": 0.5574, "step": 17215 }, { "epoch": 0.44, "grad_norm": 1.5546356439590454, "learning_rate": 1.236568040800137e-05, "loss": 0.5598, "step": 17216 }, { "epoch": 0.44, "grad_norm": 1.5704227685928345, "learning_rate": 1.2364873820654776e-05, "loss": 0.4788, "step": 17217 }, { "epoch": 0.44, "grad_norm": 2.353602170944214, "learning_rate": 1.2364067217010958e-05, "loss": 0.5024, "step": 17218 }, { "epoch": 0.44, "grad_norm": 0.919535756111145, "learning_rate": 1.2363260597075472e-05, "loss": 0.5225, "step": 17219 }, { "epoch": 0.44, "grad_norm": 8.495712280273438, "learning_rate": 1.236245396085388e-05, "loss": 0.6072, "step": 17220 }, { "epoch": 0.44, "grad_norm": 6.340206623077393, "learning_rate": 1.2361647308351737e-05, "loss": 0.533, "step": 17221 }, { "epoch": 0.44, "grad_norm": 2.9488344192504883, "learning_rate": 1.2360840639574607e-05, "loss": 0.5483, "step": 17222 }, { "epoch": 0.44, "grad_norm": 1.8097130060195923, "learning_rate": 1.2360033954528044e-05, "loss": 0.5709, "step": 17223 }, { "epoch": 0.44, "grad_norm": 1.6745280027389526, "learning_rate": 1.2359227253217614e-05, "loss": 0.6123, "step": 17224 }, { "epoch": 0.44, "grad_norm": 2.4458673000335693, "learning_rate": 1.2358420535648867e-05, "loss": 0.6662, "step": 17225 }, { "epoch": 0.44, "grad_norm": 1.5284333229064941, "learning_rate": 1.235761380182737e-05, "loss": 0.5728, "step": 17226 }, { "epoch": 0.44, "grad_norm": 1.347859501838684, "learning_rate": 1.235680705175868e-05, "loss": 0.4148, "step": 17227 }, { "epoch": 0.44, "grad_norm": 1.7804309129714966, "learning_rate": 1.2356000285448355e-05, "loss": 0.4801, "step": 17228 }, { "epoch": 0.44, "grad_norm": 1.2559796571731567, "learning_rate": 1.2355193502901959e-05, "loss": 0.5886, "step": 17229 }, { "epoch": 0.44, "grad_norm": 3.1027204990386963, "learning_rate": 1.2354386704125045e-05, "loss": 0.5771, "step": 17230 }, { "epoch": 0.44, "grad_norm": 2.0023996829986572, "learning_rate": 1.2353579889123182e-05, "loss": 0.5882, "step": 17231 }, { "epoch": 0.44, "grad_norm": 1.0957441329956055, "learning_rate": 1.2352773057901921e-05, "loss": 0.6042, "step": 17232 }, { "epoch": 0.44, "grad_norm": 6.025760173797607, "learning_rate": 1.2351966210466827e-05, "loss": 0.4852, "step": 17233 }, { "epoch": 0.44, "grad_norm": 2.3411998748779297, "learning_rate": 1.235115934682346e-05, "loss": 0.6412, "step": 17234 }, { "epoch": 0.44, "grad_norm": 2.0461764335632324, "learning_rate": 1.235035246697738e-05, "loss": 0.5256, "step": 17235 }, { "epoch": 0.44, "grad_norm": 1.819067358970642, "learning_rate": 1.2349545570934148e-05, "loss": 0.4658, "step": 17236 }, { "epoch": 0.44, "grad_norm": 1.5684815645217896, "learning_rate": 1.2348738658699322e-05, "loss": 0.5766, "step": 17237 }, { "epoch": 0.44, "grad_norm": 2.2856478691101074, "learning_rate": 1.2347931730278467e-05, "loss": 0.5569, "step": 17238 }, { "epoch": 0.44, "grad_norm": 1.2991912364959717, "learning_rate": 1.234712478567714e-05, "loss": 0.6164, "step": 17239 }, { "epoch": 0.44, "grad_norm": 2.1807010173797607, "learning_rate": 1.2346317824900904e-05, "loss": 0.6399, "step": 17240 }, { "epoch": 0.44, "grad_norm": 1.6653469800949097, "learning_rate": 1.2345510847955322e-05, "loss": 0.6802, "step": 17241 }, { "epoch": 0.44, "grad_norm": 1.3907690048217773, "learning_rate": 1.2344703854845949e-05, "loss": 0.5345, "step": 17242 }, { "epoch": 0.44, "grad_norm": 1.5668162107467651, "learning_rate": 1.2343896845578356e-05, "loss": 0.7574, "step": 17243 }, { "epoch": 0.44, "grad_norm": 3.5431225299835205, "learning_rate": 1.2343089820158093e-05, "loss": 0.6163, "step": 17244 }, { "epoch": 0.44, "grad_norm": 2.39658522605896, "learning_rate": 1.2342282778590731e-05, "loss": 0.4615, "step": 17245 }, { "epoch": 0.44, "grad_norm": 2.106151819229126, "learning_rate": 1.2341475720881825e-05, "loss": 0.6392, "step": 17246 }, { "epoch": 0.44, "grad_norm": 4.5200982093811035, "learning_rate": 1.234066864703694e-05, "loss": 0.7262, "step": 17247 }, { "epoch": 0.44, "grad_norm": 1.6476324796676636, "learning_rate": 1.2339861557061635e-05, "loss": 0.5504, "step": 17248 }, { "epoch": 0.44, "grad_norm": 1.9792653322219849, "learning_rate": 1.2339054450961479e-05, "loss": 0.6259, "step": 17249 }, { "epoch": 0.44, "grad_norm": 2.2984373569488525, "learning_rate": 1.2338247328742023e-05, "loss": 0.7311, "step": 17250 }, { "epoch": 0.44, "grad_norm": 2.473529577255249, "learning_rate": 1.233744019040884e-05, "loss": 0.6396, "step": 17251 }, { "epoch": 0.44, "grad_norm": 2.8819844722747803, "learning_rate": 1.2336633035967487e-05, "loss": 0.4714, "step": 17252 }, { "epoch": 0.44, "grad_norm": 1.5200841426849365, "learning_rate": 1.2335825865423528e-05, "loss": 0.5071, "step": 17253 }, { "epoch": 0.44, "grad_norm": 0.8667179346084595, "learning_rate": 1.2335018678782523e-05, "loss": 0.47, "step": 17254 }, { "epoch": 0.44, "grad_norm": 2.1315033435821533, "learning_rate": 1.2334211476050036e-05, "loss": 0.719, "step": 17255 }, { "epoch": 0.44, "grad_norm": 1.3382984399795532, "learning_rate": 1.2333404257231631e-05, "loss": 0.5521, "step": 17256 }, { "epoch": 0.44, "grad_norm": 3.981220245361328, "learning_rate": 1.2332597022332869e-05, "loss": 0.614, "step": 17257 }, { "epoch": 0.44, "grad_norm": 1.5756930112838745, "learning_rate": 1.2331789771359314e-05, "loss": 0.5136, "step": 17258 }, { "epoch": 0.44, "grad_norm": 1.301391839981079, "learning_rate": 1.2330982504316529e-05, "loss": 0.5759, "step": 17259 }, { "epoch": 0.44, "grad_norm": 4.566233158111572, "learning_rate": 1.2330175221210077e-05, "loss": 0.6584, "step": 17260 }, { "epoch": 0.44, "grad_norm": 1.9075413942337036, "learning_rate": 1.2329367922045523e-05, "loss": 0.6467, "step": 17261 }, { "epoch": 0.44, "grad_norm": 3.851348400115967, "learning_rate": 1.2328560606828427e-05, "loss": 0.6971, "step": 17262 }, { "epoch": 0.44, "grad_norm": 4.718881607055664, "learning_rate": 1.2327753275564353e-05, "loss": 0.5785, "step": 17263 }, { "epoch": 0.44, "grad_norm": 1.464343547821045, "learning_rate": 1.2326945928258869e-05, "loss": 0.5855, "step": 17264 }, { "epoch": 0.44, "grad_norm": 1.5303210020065308, "learning_rate": 1.232613856491753e-05, "loss": 0.5879, "step": 17265 }, { "epoch": 0.44, "grad_norm": 4.054640293121338, "learning_rate": 1.2325331185545913e-05, "loss": 0.522, "step": 17266 }, { "epoch": 0.44, "grad_norm": 1.4130305051803589, "learning_rate": 1.232452379014957e-05, "loss": 0.5333, "step": 17267 }, { "epoch": 0.44, "grad_norm": 1.908880591392517, "learning_rate": 1.2323716378734071e-05, "loss": 0.6317, "step": 17268 }, { "epoch": 0.44, "grad_norm": 7.231569766998291, "learning_rate": 1.2322908951304977e-05, "loss": 0.6128, "step": 17269 }, { "epoch": 0.44, "grad_norm": 5.3611931800842285, "learning_rate": 1.2322101507867856e-05, "loss": 0.6839, "step": 17270 }, { "epoch": 0.44, "grad_norm": 1.8990426063537598, "learning_rate": 1.2321294048428269e-05, "loss": 0.6849, "step": 17271 }, { "epoch": 0.44, "grad_norm": 4.137402534484863, "learning_rate": 1.2320486572991782e-05, "loss": 0.6713, "step": 17272 }, { "epoch": 0.44, "grad_norm": 2.4371259212493896, "learning_rate": 1.231967908156396e-05, "loss": 0.7382, "step": 17273 }, { "epoch": 0.44, "grad_norm": 1.1569753885269165, "learning_rate": 1.2318871574150364e-05, "loss": 0.4952, "step": 17274 }, { "epoch": 0.44, "grad_norm": 1.476791501045227, "learning_rate": 1.2318064050756567e-05, "loss": 0.5495, "step": 17275 }, { "epoch": 0.44, "grad_norm": 1.015330195426941, "learning_rate": 1.2317256511388125e-05, "loss": 0.4977, "step": 17276 }, { "epoch": 0.44, "grad_norm": 1.7627809047698975, "learning_rate": 1.2316448956050606e-05, "loss": 0.4715, "step": 17277 }, { "epoch": 0.44, "grad_norm": 3.2105095386505127, "learning_rate": 1.231564138474958e-05, "loss": 0.5824, "step": 17278 }, { "epoch": 0.44, "grad_norm": 3.5231716632843018, "learning_rate": 1.2314833797490606e-05, "loss": 0.5246, "step": 17279 }, { "epoch": 0.44, "grad_norm": 1.3145015239715576, "learning_rate": 1.2314026194279253e-05, "loss": 0.5398, "step": 17280 }, { "epoch": 0.44, "grad_norm": 2.055086374282837, "learning_rate": 1.2313218575121084e-05, "loss": 0.8181, "step": 17281 }, { "epoch": 0.44, "grad_norm": 1.525063395500183, "learning_rate": 1.2312410940021669e-05, "loss": 0.5496, "step": 17282 }, { "epoch": 0.44, "grad_norm": 2.1885344982147217, "learning_rate": 1.2311603288986566e-05, "loss": 0.5358, "step": 17283 }, { "epoch": 0.44, "grad_norm": 1.3998597860336304, "learning_rate": 1.2310795622021347e-05, "loss": 0.6816, "step": 17284 }, { "epoch": 0.44, "grad_norm": 2.1586482524871826, "learning_rate": 1.230998793913158e-05, "loss": 0.5235, "step": 17285 }, { "epoch": 0.44, "grad_norm": 2.255944013595581, "learning_rate": 1.2309180240322825e-05, "loss": 0.6431, "step": 17286 }, { "epoch": 0.44, "grad_norm": 1.8331432342529297, "learning_rate": 1.230837252560065e-05, "loss": 0.4773, "step": 17287 }, { "epoch": 0.44, "grad_norm": 1.11565363407135, "learning_rate": 1.2307564794970622e-05, "loss": 0.5687, "step": 17288 }, { "epoch": 0.44, "grad_norm": 2.5134074687957764, "learning_rate": 1.2306757048438305e-05, "loss": 0.4769, "step": 17289 }, { "epoch": 0.44, "grad_norm": 1.6200406551361084, "learning_rate": 1.2305949286009271e-05, "loss": 0.7032, "step": 17290 }, { "epoch": 0.44, "grad_norm": 2.1779425144195557, "learning_rate": 1.230514150768908e-05, "loss": 0.4832, "step": 17291 }, { "epoch": 0.44, "grad_norm": 1.765859842300415, "learning_rate": 1.2304333713483306e-05, "loss": 0.5198, "step": 17292 }, { "epoch": 0.44, "grad_norm": 2.801226854324341, "learning_rate": 1.2303525903397508e-05, "loss": 0.4716, "step": 17293 }, { "epoch": 0.44, "grad_norm": 4.041502952575684, "learning_rate": 1.230271807743726e-05, "loss": 0.5835, "step": 17294 }, { "epoch": 0.44, "grad_norm": 2.1719744205474854, "learning_rate": 1.2301910235608124e-05, "loss": 0.6995, "step": 17295 }, { "epoch": 0.44, "grad_norm": 2.778644561767578, "learning_rate": 1.2301102377915667e-05, "loss": 0.6438, "step": 17296 }, { "epoch": 0.44, "grad_norm": 1.185989499092102, "learning_rate": 1.2300294504365462e-05, "loss": 0.546, "step": 17297 }, { "epoch": 0.44, "grad_norm": 1.6103250980377197, "learning_rate": 1.229948661496307e-05, "loss": 0.7058, "step": 17298 }, { "epoch": 0.44, "grad_norm": 3.285501480102539, "learning_rate": 1.2298678709714063e-05, "loss": 0.5777, "step": 17299 }, { "epoch": 0.44, "grad_norm": 2.005059003829956, "learning_rate": 1.2297870788624004e-05, "loss": 0.6834, "step": 17300 }, { "epoch": 0.44, "grad_norm": 1.5633459091186523, "learning_rate": 1.2297062851698463e-05, "loss": 0.5548, "step": 17301 }, { "epoch": 0.44, "grad_norm": 1.9722070693969727, "learning_rate": 1.229625489894301e-05, "loss": 0.6361, "step": 17302 }, { "epoch": 0.44, "grad_norm": 9.464615821838379, "learning_rate": 1.229544693036321e-05, "loss": 0.7292, "step": 17303 }, { "epoch": 0.44, "grad_norm": 1.7385351657867432, "learning_rate": 1.2294638945964632e-05, "loss": 0.4599, "step": 17304 }, { "epoch": 0.44, "grad_norm": 1.4474560022354126, "learning_rate": 1.2293830945752844e-05, "loss": 0.5144, "step": 17305 }, { "epoch": 0.44, "grad_norm": 2.291905641555786, "learning_rate": 1.2293022929733417e-05, "loss": 0.603, "step": 17306 }, { "epoch": 0.44, "grad_norm": 1.6264550685882568, "learning_rate": 1.2292214897911911e-05, "loss": 0.4402, "step": 17307 }, { "epoch": 0.44, "grad_norm": 2.289494752883911, "learning_rate": 1.2291406850293902e-05, "loss": 0.4817, "step": 17308 }, { "epoch": 0.44, "grad_norm": 5.206571578979492, "learning_rate": 1.229059878688496e-05, "loss": 0.605, "step": 17309 }, { "epoch": 0.44, "grad_norm": 1.3943637609481812, "learning_rate": 1.2289790707690645e-05, "loss": 0.5969, "step": 17310 }, { "epoch": 0.44, "grad_norm": 1.786413311958313, "learning_rate": 1.2288982612716535e-05, "loss": 0.5016, "step": 17311 }, { "epoch": 0.44, "grad_norm": 1.8241504430770874, "learning_rate": 1.2288174501968193e-05, "loss": 0.6103, "step": 17312 }, { "epoch": 0.44, "grad_norm": 1.9206010103225708, "learning_rate": 1.228736637545119e-05, "loss": 0.5045, "step": 17313 }, { "epoch": 0.44, "grad_norm": 1.4683948755264282, "learning_rate": 1.2286558233171098e-05, "loss": 0.6156, "step": 17314 }, { "epoch": 0.44, "grad_norm": 3.8706812858581543, "learning_rate": 1.2285750075133478e-05, "loss": 0.6724, "step": 17315 }, { "epoch": 0.44, "grad_norm": 2.2641212940216064, "learning_rate": 1.2284941901343909e-05, "loss": 0.7141, "step": 17316 }, { "epoch": 0.44, "grad_norm": 1.6705604791641235, "learning_rate": 1.2284133711807952e-05, "loss": 0.554, "step": 17317 }, { "epoch": 0.44, "grad_norm": 0.9333325624465942, "learning_rate": 1.2283325506531183e-05, "loss": 0.5424, "step": 17318 }, { "epoch": 0.44, "grad_norm": 1.2286237478256226, "learning_rate": 1.2282517285519172e-05, "loss": 0.6731, "step": 17319 }, { "epoch": 0.44, "grad_norm": 2.566866159439087, "learning_rate": 1.228170904877748e-05, "loss": 0.4312, "step": 17320 }, { "epoch": 0.44, "grad_norm": 1.7911893129348755, "learning_rate": 1.2280900796311687e-05, "loss": 0.5832, "step": 17321 }, { "epoch": 0.44, "grad_norm": 2.9601356983184814, "learning_rate": 1.2280092528127356e-05, "loss": 0.6021, "step": 17322 }, { "epoch": 0.44, "grad_norm": 3.63496994972229, "learning_rate": 1.2279284244230064e-05, "loss": 0.6562, "step": 17323 }, { "epoch": 0.44, "grad_norm": 4.439505577087402, "learning_rate": 1.2278475944625372e-05, "loss": 0.7216, "step": 17324 }, { "epoch": 0.44, "grad_norm": 1.6994504928588867, "learning_rate": 1.2277667629318857e-05, "loss": 0.5884, "step": 17325 }, { "epoch": 0.44, "grad_norm": 2.440394878387451, "learning_rate": 1.2276859298316089e-05, "loss": 0.5329, "step": 17326 }, { "epoch": 0.44, "grad_norm": 1.7301998138427734, "learning_rate": 1.2276050951622635e-05, "loss": 0.4494, "step": 17327 }, { "epoch": 0.44, "grad_norm": 1.4356857538223267, "learning_rate": 1.227524258924407e-05, "loss": 0.546, "step": 17328 }, { "epoch": 0.44, "grad_norm": 2.1557600498199463, "learning_rate": 1.2274434211185961e-05, "loss": 0.5468, "step": 17329 }, { "epoch": 0.44, "grad_norm": 3.3805031776428223, "learning_rate": 1.2273625817453878e-05, "loss": 0.7379, "step": 17330 }, { "epoch": 0.44, "grad_norm": 2.0029423236846924, "learning_rate": 1.22728174080534e-05, "loss": 0.6273, "step": 17331 }, { "epoch": 0.44, "grad_norm": 1.675944209098816, "learning_rate": 1.227200898299009e-05, "loss": 0.3946, "step": 17332 }, { "epoch": 0.44, "grad_norm": 3.8957245349884033, "learning_rate": 1.227120054226952e-05, "loss": 0.6277, "step": 17333 }, { "epoch": 0.44, "grad_norm": 1.130083680152893, "learning_rate": 1.2270392085897262e-05, "loss": 0.5409, "step": 17334 }, { "epoch": 0.44, "grad_norm": 2.7111032009124756, "learning_rate": 1.2269583613878892e-05, "loss": 0.7213, "step": 17335 }, { "epoch": 0.44, "grad_norm": 1.2039449214935303, "learning_rate": 1.2268775126219974e-05, "loss": 0.6386, "step": 17336 }, { "epoch": 0.44, "grad_norm": 2.362102508544922, "learning_rate": 1.2267966622926084e-05, "loss": 0.6012, "step": 17337 }, { "epoch": 0.44, "grad_norm": 4.143128395080566, "learning_rate": 1.2267158104002795e-05, "loss": 0.7082, "step": 17338 }, { "epoch": 0.44, "grad_norm": 1.6174222230911255, "learning_rate": 1.2266349569455675e-05, "loss": 0.5743, "step": 17339 }, { "epoch": 0.44, "grad_norm": 1.4252773523330688, "learning_rate": 1.22655410192903e-05, "loss": 0.4984, "step": 17340 }, { "epoch": 0.44, "grad_norm": 2.4915659427642822, "learning_rate": 1.2264732453512235e-05, "loss": 0.5555, "step": 17341 }, { "epoch": 0.44, "grad_norm": 3.5644664764404297, "learning_rate": 1.226392387212706e-05, "loss": 0.5019, "step": 17342 }, { "epoch": 0.44, "grad_norm": 2.2619524002075195, "learning_rate": 1.2263115275140342e-05, "loss": 0.76, "step": 17343 }, { "epoch": 0.44, "grad_norm": 1.5639787912368774, "learning_rate": 1.2262306662557658e-05, "loss": 0.4897, "step": 17344 }, { "epoch": 0.44, "grad_norm": 2.5181477069854736, "learning_rate": 1.2261498034384578e-05, "loss": 0.6647, "step": 17345 }, { "epoch": 0.44, "grad_norm": 2.3949122428894043, "learning_rate": 1.2260689390626671e-05, "loss": 0.5974, "step": 17346 }, { "epoch": 0.44, "grad_norm": 1.440424919128418, "learning_rate": 1.2259880731289518e-05, "loss": 0.556, "step": 17347 }, { "epoch": 0.44, "grad_norm": 1.654712438583374, "learning_rate": 1.225907205637868e-05, "loss": 0.6383, "step": 17348 }, { "epoch": 0.44, "grad_norm": 1.6486212015151978, "learning_rate": 1.225826336589974e-05, "loss": 0.5313, "step": 17349 }, { "epoch": 0.44, "grad_norm": 1.4549638032913208, "learning_rate": 1.225745465985827e-05, "loss": 0.5004, "step": 17350 }, { "epoch": 0.44, "grad_norm": 1.4444388151168823, "learning_rate": 1.2256645938259838e-05, "loss": 0.6379, "step": 17351 }, { "epoch": 0.44, "grad_norm": 5.387918472290039, "learning_rate": 1.2255837201110021e-05, "loss": 0.5265, "step": 17352 }, { "epoch": 0.44, "grad_norm": 2.3300304412841797, "learning_rate": 1.2255028448414392e-05, "loss": 0.64, "step": 17353 }, { "epoch": 0.44, "grad_norm": 4.041933536529541, "learning_rate": 1.2254219680178521e-05, "loss": 0.5871, "step": 17354 }, { "epoch": 0.44, "grad_norm": 1.6522119045257568, "learning_rate": 1.2253410896407986e-05, "loss": 0.5578, "step": 17355 }, { "epoch": 0.44, "grad_norm": 1.4774917364120483, "learning_rate": 1.2252602097108359e-05, "loss": 0.5885, "step": 17356 }, { "epoch": 0.44, "grad_norm": 2.4402153491973877, "learning_rate": 1.2251793282285213e-05, "loss": 0.5029, "step": 17357 }, { "epoch": 0.44, "grad_norm": 3.6111655235290527, "learning_rate": 1.2250984451944124e-05, "loss": 0.5421, "step": 17358 }, { "epoch": 0.44, "grad_norm": 5.276867389678955, "learning_rate": 1.2250175606090661e-05, "loss": 1.0105, "step": 17359 }, { "epoch": 0.44, "grad_norm": 1.3403606414794922, "learning_rate": 1.2249366744730404e-05, "loss": 0.48, "step": 17360 }, { "epoch": 0.44, "grad_norm": 0.9288330674171448, "learning_rate": 1.2248557867868925e-05, "loss": 0.5117, "step": 17361 }, { "epoch": 0.44, "grad_norm": 3.9831485748291016, "learning_rate": 1.2247748975511797e-05, "loss": 0.5374, "step": 17362 }, { "epoch": 0.45, "grad_norm": 2.238367795944214, "learning_rate": 1.2246940067664594e-05, "loss": 0.585, "step": 17363 }, { "epoch": 0.45, "grad_norm": 6.341908931732178, "learning_rate": 1.2246131144332896e-05, "loss": 0.5924, "step": 17364 }, { "epoch": 0.45, "grad_norm": 4.154472351074219, "learning_rate": 1.224532220552227e-05, "loss": 0.6926, "step": 17365 }, { "epoch": 0.45, "grad_norm": 2.1009442806243896, "learning_rate": 1.2244513251238295e-05, "loss": 0.5727, "step": 17366 }, { "epoch": 0.45, "grad_norm": 1.3029179573059082, "learning_rate": 1.2243704281486543e-05, "loss": 0.5078, "step": 17367 }, { "epoch": 0.45, "grad_norm": 1.245069146156311, "learning_rate": 1.2242895296272593e-05, "loss": 0.5576, "step": 17368 }, { "epoch": 0.45, "grad_norm": 1.7339431047439575, "learning_rate": 1.2242086295602014e-05, "loss": 0.4531, "step": 17369 }, { "epoch": 0.45, "grad_norm": 1.5051153898239136, "learning_rate": 1.224127727948039e-05, "loss": 0.5608, "step": 17370 }, { "epoch": 0.45, "grad_norm": 2.8342554569244385, "learning_rate": 1.2240468247913286e-05, "loss": 0.6405, "step": 17371 }, { "epoch": 0.45, "grad_norm": 1.6246166229248047, "learning_rate": 1.2239659200906286e-05, "loss": 0.4677, "step": 17372 }, { "epoch": 0.45, "grad_norm": 1.1233052015304565, "learning_rate": 1.2238850138464961e-05, "loss": 0.5024, "step": 17373 }, { "epoch": 0.45, "grad_norm": 2.3775794506073, "learning_rate": 1.2238041060594888e-05, "loss": 0.6894, "step": 17374 }, { "epoch": 0.45, "grad_norm": 3.5395572185516357, "learning_rate": 1.2237231967301641e-05, "loss": 0.5436, "step": 17375 }, { "epoch": 0.45, "grad_norm": 1.992332100868225, "learning_rate": 1.2236422858590795e-05, "loss": 0.5017, "step": 17376 }, { "epoch": 0.45, "grad_norm": 2.319758415222168, "learning_rate": 1.2235613734467933e-05, "loss": 0.472, "step": 17377 }, { "epoch": 0.45, "grad_norm": 1.9924914836883545, "learning_rate": 1.2234804594938621e-05, "loss": 0.5618, "step": 17378 }, { "epoch": 0.45, "grad_norm": 1.2021583318710327, "learning_rate": 1.2233995440008442e-05, "loss": 0.5293, "step": 17379 }, { "epoch": 0.45, "grad_norm": 0.9978770613670349, "learning_rate": 1.2233186269682968e-05, "loss": 0.5399, "step": 17380 }, { "epoch": 0.45, "grad_norm": 2.136005163192749, "learning_rate": 1.2232377083967779e-05, "loss": 0.6732, "step": 17381 }, { "epoch": 0.45, "grad_norm": 2.569675922393799, "learning_rate": 1.223156788286845e-05, "loss": 0.6068, "step": 17382 }, { "epoch": 0.45, "grad_norm": 4.419538497924805, "learning_rate": 1.2230758666390554e-05, "loss": 0.7756, "step": 17383 }, { "epoch": 0.45, "grad_norm": 1.5475378036499023, "learning_rate": 1.2229949434539676e-05, "loss": 0.6295, "step": 17384 }, { "epoch": 0.45, "grad_norm": 1.2990649938583374, "learning_rate": 1.2229140187321384e-05, "loss": 0.55, "step": 17385 }, { "epoch": 0.45, "grad_norm": 1.2309846878051758, "learning_rate": 1.2228330924741259e-05, "loss": 0.6306, "step": 17386 }, { "epoch": 0.45, "grad_norm": 1.9609458446502686, "learning_rate": 1.2227521646804875e-05, "loss": 0.5708, "step": 17387 }, { "epoch": 0.45, "grad_norm": 1.5226590633392334, "learning_rate": 1.2226712353517814e-05, "loss": 0.3889, "step": 17388 }, { "epoch": 0.45, "grad_norm": 1.6508346796035767, "learning_rate": 1.2225903044885652e-05, "loss": 0.6555, "step": 17389 }, { "epoch": 0.45, "grad_norm": 1.120137333869934, "learning_rate": 1.2225093720913961e-05, "loss": 0.4407, "step": 17390 }, { "epoch": 0.45, "grad_norm": 2.0962812900543213, "learning_rate": 1.2224284381608325e-05, "loss": 0.4211, "step": 17391 }, { "epoch": 0.45, "grad_norm": 1.9460755586624146, "learning_rate": 1.2223475026974316e-05, "loss": 0.7197, "step": 17392 }, { "epoch": 0.45, "grad_norm": 2.3218281269073486, "learning_rate": 1.2222665657017514e-05, "loss": 0.7503, "step": 17393 }, { "epoch": 0.45, "grad_norm": 1.8975430727005005, "learning_rate": 1.22218562717435e-05, "loss": 0.5128, "step": 17394 }, { "epoch": 0.45, "grad_norm": 6.856725692749023, "learning_rate": 1.2221046871157844e-05, "loss": 0.6256, "step": 17395 }, { "epoch": 0.45, "grad_norm": 1.118238091468811, "learning_rate": 1.2220237455266133e-05, "loss": 0.6209, "step": 17396 }, { "epoch": 0.45, "grad_norm": 3.8198537826538086, "learning_rate": 1.2219428024073935e-05, "loss": 0.514, "step": 17397 }, { "epoch": 0.45, "grad_norm": 3.2752535343170166, "learning_rate": 1.2218618577586838e-05, "loss": 0.579, "step": 17398 }, { "epoch": 0.45, "grad_norm": 3.3521487712860107, "learning_rate": 1.2217809115810415e-05, "loss": 0.7365, "step": 17399 }, { "epoch": 0.45, "grad_norm": 1.4071017503738403, "learning_rate": 1.2216999638750241e-05, "loss": 0.5271, "step": 17400 }, { "epoch": 0.45, "grad_norm": 1.573107361793518, "learning_rate": 1.2216190146411904e-05, "loss": 0.5953, "step": 17401 }, { "epoch": 0.45, "grad_norm": 2.1084353923797607, "learning_rate": 1.2215380638800973e-05, "loss": 0.5506, "step": 17402 }, { "epoch": 0.45, "grad_norm": 1.4534201622009277, "learning_rate": 1.2214571115923032e-05, "loss": 0.5591, "step": 17403 }, { "epoch": 0.45, "grad_norm": 2.1305158138275146, "learning_rate": 1.2213761577783658e-05, "loss": 0.6078, "step": 17404 }, { "epoch": 0.45, "grad_norm": 2.4372270107269287, "learning_rate": 1.2212952024388428e-05, "loss": 0.4844, "step": 17405 }, { "epoch": 0.45, "grad_norm": 2.0390312671661377, "learning_rate": 1.2212142455742927e-05, "loss": 0.6344, "step": 17406 }, { "epoch": 0.45, "grad_norm": 2.990902900695801, "learning_rate": 1.2211332871852727e-05, "loss": 0.6235, "step": 17407 }, { "epoch": 0.45, "grad_norm": 1.6453700065612793, "learning_rate": 1.2210523272723412e-05, "loss": 0.5311, "step": 17408 }, { "epoch": 0.45, "grad_norm": 1.392742395401001, "learning_rate": 1.2209713658360558e-05, "loss": 0.6121, "step": 17409 }, { "epoch": 0.45, "grad_norm": 3.9257988929748535, "learning_rate": 1.2208904028769745e-05, "loss": 0.4511, "step": 17410 }, { "epoch": 0.45, "grad_norm": 2.263209104537964, "learning_rate": 1.2208094383956555e-05, "loss": 0.6177, "step": 17411 }, { "epoch": 0.45, "grad_norm": 1.3540875911712646, "learning_rate": 1.2207284723926565e-05, "loss": 0.527, "step": 17412 }, { "epoch": 0.45, "grad_norm": 2.9033074378967285, "learning_rate": 1.2206475048685356e-05, "loss": 0.6581, "step": 17413 }, { "epoch": 0.45, "grad_norm": 2.707573890686035, "learning_rate": 1.2205665358238506e-05, "loss": 0.5796, "step": 17414 }, { "epoch": 0.45, "grad_norm": 4.181916236877441, "learning_rate": 1.2204855652591599e-05, "loss": 0.6988, "step": 17415 }, { "epoch": 0.45, "grad_norm": 1.6680989265441895, "learning_rate": 1.2204045931750208e-05, "loss": 0.5407, "step": 17416 }, { "epoch": 0.45, "grad_norm": 2.809622049331665, "learning_rate": 1.2203236195719919e-05, "loss": 0.78, "step": 17417 }, { "epoch": 0.45, "grad_norm": 4.382085800170898, "learning_rate": 1.220242644450631e-05, "loss": 0.3983, "step": 17418 }, { "epoch": 0.45, "grad_norm": 1.0928599834442139, "learning_rate": 1.2201616678114963e-05, "loss": 0.5278, "step": 17419 }, { "epoch": 0.45, "grad_norm": 2.8061928749084473, "learning_rate": 1.2200806896551455e-05, "loss": 0.6259, "step": 17420 }, { "epoch": 0.45, "grad_norm": 4.374377250671387, "learning_rate": 1.219999709982137e-05, "loss": 0.5594, "step": 17421 }, { "epoch": 0.45, "grad_norm": 1.2259162664413452, "learning_rate": 1.2199187287930285e-05, "loss": 0.5628, "step": 17422 }, { "epoch": 0.45, "grad_norm": 2.271282434463501, "learning_rate": 1.2198377460883788e-05, "loss": 0.3456, "step": 17423 }, { "epoch": 0.45, "grad_norm": 1.1763473749160767, "learning_rate": 1.2197567618687449e-05, "loss": 0.4925, "step": 17424 }, { "epoch": 0.45, "grad_norm": 5.070210933685303, "learning_rate": 1.2196757761346858e-05, "loss": 0.6843, "step": 17425 }, { "epoch": 0.45, "grad_norm": 1.1052544116973877, "learning_rate": 1.2195947888867593e-05, "loss": 0.4591, "step": 17426 }, { "epoch": 0.45, "grad_norm": 1.4534225463867188, "learning_rate": 1.2195138001255232e-05, "loss": 0.5298, "step": 17427 }, { "epoch": 0.45, "grad_norm": 0.8925514817237854, "learning_rate": 1.219432809851536e-05, "loss": 0.4864, "step": 17428 }, { "epoch": 0.45, "grad_norm": 1.018621563911438, "learning_rate": 1.219351818065356e-05, "loss": 0.5897, "step": 17429 }, { "epoch": 0.45, "grad_norm": 2.1501572132110596, "learning_rate": 1.2192708247675406e-05, "loss": 0.7767, "step": 17430 }, { "epoch": 0.45, "grad_norm": 1.3932549953460693, "learning_rate": 1.2191898299586487e-05, "loss": 0.5525, "step": 17431 }, { "epoch": 0.45, "grad_norm": 2.372044563293457, "learning_rate": 1.2191088336392386e-05, "loss": 0.6626, "step": 17432 }, { "epoch": 0.45, "grad_norm": 8.619837760925293, "learning_rate": 1.2190278358098673e-05, "loss": 0.442, "step": 17433 }, { "epoch": 0.45, "grad_norm": 2.0834438800811768, "learning_rate": 1.2189468364710942e-05, "loss": 0.656, "step": 17434 }, { "epoch": 0.45, "grad_norm": 1.7255337238311768, "learning_rate": 1.218865835623477e-05, "loss": 0.6301, "step": 17435 }, { "epoch": 0.45, "grad_norm": 1.129523515701294, "learning_rate": 1.2187848332675737e-05, "loss": 0.5398, "step": 17436 }, { "epoch": 0.45, "grad_norm": 4.135753154754639, "learning_rate": 1.2187038294039433e-05, "loss": 0.6033, "step": 17437 }, { "epoch": 0.45, "grad_norm": 1.3863509893417358, "learning_rate": 1.2186228240331433e-05, "loss": 0.5042, "step": 17438 }, { "epoch": 0.45, "grad_norm": 1.4404809474945068, "learning_rate": 1.2185418171557319e-05, "loss": 0.4437, "step": 17439 }, { "epoch": 0.45, "grad_norm": 3.9943201541900635, "learning_rate": 1.218460808772268e-05, "loss": 0.4398, "step": 17440 }, { "epoch": 0.45, "grad_norm": 1.4922524690628052, "learning_rate": 1.218379798883309e-05, "loss": 0.3563, "step": 17441 }, { "epoch": 0.45, "grad_norm": 1.7465343475341797, "learning_rate": 1.218298787489414e-05, "loss": 0.6363, "step": 17442 }, { "epoch": 0.45, "grad_norm": 6.916751861572266, "learning_rate": 1.2182177745911408e-05, "loss": 0.4994, "step": 17443 }, { "epoch": 0.45, "grad_norm": 1.2606889009475708, "learning_rate": 1.2181367601890479e-05, "loss": 0.6187, "step": 17444 }, { "epoch": 0.45, "grad_norm": 1.4610496759414673, "learning_rate": 1.2180557442836933e-05, "loss": 0.6746, "step": 17445 }, { "epoch": 0.45, "grad_norm": 1.136075735092163, "learning_rate": 1.2179747268756354e-05, "loss": 0.6254, "step": 17446 }, { "epoch": 0.45, "grad_norm": 0.9755748510360718, "learning_rate": 1.2178937079654329e-05, "loss": 0.4749, "step": 17447 }, { "epoch": 0.45, "grad_norm": 3.7815897464752197, "learning_rate": 1.2178126875536438e-05, "loss": 0.5962, "step": 17448 }, { "epoch": 0.45, "grad_norm": 5.527194976806641, "learning_rate": 1.2177316656408267e-05, "loss": 0.5721, "step": 17449 }, { "epoch": 0.45, "grad_norm": 1.3445260524749756, "learning_rate": 1.2176506422275393e-05, "loss": 0.7463, "step": 17450 }, { "epoch": 0.45, "grad_norm": 1.73329496383667, "learning_rate": 1.2175696173143406e-05, "loss": 0.5378, "step": 17451 }, { "epoch": 0.45, "grad_norm": 4.381777286529541, "learning_rate": 1.2174885909017888e-05, "loss": 0.6299, "step": 17452 }, { "epoch": 0.45, "grad_norm": 2.351444721221924, "learning_rate": 1.2174075629904422e-05, "loss": 0.6589, "step": 17453 }, { "epoch": 0.45, "grad_norm": 7.705101490020752, "learning_rate": 1.2173265335808597e-05, "loss": 0.7704, "step": 17454 }, { "epoch": 0.45, "grad_norm": 2.509133815765381, "learning_rate": 1.2172455026735988e-05, "loss": 0.5208, "step": 17455 }, { "epoch": 0.45, "grad_norm": 6.1228179931640625, "learning_rate": 1.2171644702692186e-05, "loss": 0.5123, "step": 17456 }, { "epoch": 0.45, "grad_norm": 2.083261251449585, "learning_rate": 1.2170834363682771e-05, "loss": 0.5251, "step": 17457 }, { "epoch": 0.45, "grad_norm": 1.402055025100708, "learning_rate": 1.2170024009713331e-05, "loss": 0.5933, "step": 17458 }, { "epoch": 0.45, "grad_norm": 5.479429721832275, "learning_rate": 1.216921364078945e-05, "loss": 0.5426, "step": 17459 }, { "epoch": 0.45, "grad_norm": 12.543302536010742, "learning_rate": 1.2168403256916708e-05, "loss": 0.6304, "step": 17460 }, { "epoch": 0.45, "grad_norm": 1.7576770782470703, "learning_rate": 1.2167592858100696e-05, "loss": 0.6176, "step": 17461 }, { "epoch": 0.45, "grad_norm": 3.3475124835968018, "learning_rate": 1.2166782444346993e-05, "loss": 0.6494, "step": 17462 }, { "epoch": 0.45, "grad_norm": 5.266508102416992, "learning_rate": 1.2165972015661189e-05, "loss": 0.4957, "step": 17463 }, { "epoch": 0.45, "grad_norm": 2.379098653793335, "learning_rate": 1.2165161572048866e-05, "loss": 0.4634, "step": 17464 }, { "epoch": 0.45, "grad_norm": 2.336078643798828, "learning_rate": 1.2164351113515611e-05, "loss": 0.6526, "step": 17465 }, { "epoch": 0.45, "grad_norm": 3.025150775909424, "learning_rate": 1.2163540640067007e-05, "loss": 0.7062, "step": 17466 }, { "epoch": 0.45, "grad_norm": 3.4316608905792236, "learning_rate": 1.2162730151708639e-05, "loss": 0.6427, "step": 17467 }, { "epoch": 0.45, "grad_norm": 1.2932771444320679, "learning_rate": 1.2161919648446096e-05, "loss": 0.5075, "step": 17468 }, { "epoch": 0.45, "grad_norm": 8.65092658996582, "learning_rate": 1.2161109130284959e-05, "loss": 0.5241, "step": 17469 }, { "epoch": 0.45, "grad_norm": 3.8202931880950928, "learning_rate": 1.2160298597230816e-05, "loss": 0.6466, "step": 17470 }, { "epoch": 0.45, "grad_norm": 0.9563695192337036, "learning_rate": 1.2159488049289252e-05, "loss": 0.5006, "step": 17471 }, { "epoch": 0.45, "grad_norm": 1.3146744966506958, "learning_rate": 1.2158677486465853e-05, "loss": 0.5322, "step": 17472 }, { "epoch": 0.45, "grad_norm": 1.4332557916641235, "learning_rate": 1.2157866908766207e-05, "loss": 0.6982, "step": 17473 }, { "epoch": 0.45, "grad_norm": 3.3934950828552246, "learning_rate": 1.2157056316195897e-05, "loss": 0.5439, "step": 17474 }, { "epoch": 0.45, "grad_norm": 1.7229005098342896, "learning_rate": 1.2156245708760508e-05, "loss": 0.5331, "step": 17475 }, { "epoch": 0.45, "grad_norm": 1.2489904165267944, "learning_rate": 1.215543508646563e-05, "loss": 0.5059, "step": 17476 }, { "epoch": 0.45, "grad_norm": 0.9532371163368225, "learning_rate": 1.2154624449316848e-05, "loss": 0.5023, "step": 17477 }, { "epoch": 0.45, "grad_norm": 3.6880030632019043, "learning_rate": 1.215381379731975e-05, "loss": 0.5806, "step": 17478 }, { "epoch": 0.45, "grad_norm": 1.480370283126831, "learning_rate": 1.2153003130479915e-05, "loss": 0.384, "step": 17479 }, { "epoch": 0.45, "grad_norm": 2.0847325325012207, "learning_rate": 1.2152192448802938e-05, "loss": 0.5896, "step": 17480 }, { "epoch": 0.45, "grad_norm": 6.383630752563477, "learning_rate": 1.2151381752294403e-05, "loss": 0.4861, "step": 17481 }, { "epoch": 0.45, "grad_norm": 2.63152813911438, "learning_rate": 1.2150571040959895e-05, "loss": 0.4411, "step": 17482 }, { "epoch": 0.45, "grad_norm": 1.5549379587173462, "learning_rate": 1.2149760314805007e-05, "loss": 0.6125, "step": 17483 }, { "epoch": 0.45, "grad_norm": 3.3834941387176514, "learning_rate": 1.2148949573835315e-05, "loss": 0.6065, "step": 17484 }, { "epoch": 0.45, "grad_norm": 0.9593064785003662, "learning_rate": 1.2148138818056418e-05, "loss": 0.5751, "step": 17485 }, { "epoch": 0.45, "grad_norm": 1.232901930809021, "learning_rate": 1.2147328047473896e-05, "loss": 0.5202, "step": 17486 }, { "epoch": 0.45, "grad_norm": 1.9945018291473389, "learning_rate": 1.2146517262093339e-05, "loss": 0.4885, "step": 17487 }, { "epoch": 0.45, "grad_norm": 1.4265893697738647, "learning_rate": 1.2145706461920332e-05, "loss": 0.6404, "step": 17488 }, { "epoch": 0.45, "grad_norm": 1.2472031116485596, "learning_rate": 1.2144895646960464e-05, "loss": 0.6307, "step": 17489 }, { "epoch": 0.45, "grad_norm": 1.1358613967895508, "learning_rate": 1.2144084817219325e-05, "loss": 0.597, "step": 17490 }, { "epoch": 0.45, "grad_norm": 2.4563040733337402, "learning_rate": 1.2143273972702498e-05, "loss": 0.5533, "step": 17491 }, { "epoch": 0.45, "grad_norm": 1.5318901538848877, "learning_rate": 1.2142463113415575e-05, "loss": 0.4842, "step": 17492 }, { "epoch": 0.45, "grad_norm": 8.731736183166504, "learning_rate": 1.2141652239364143e-05, "loss": 0.6345, "step": 17493 }, { "epoch": 0.45, "grad_norm": 8.389070510864258, "learning_rate": 1.2140841350553787e-05, "loss": 0.5589, "step": 17494 }, { "epoch": 0.45, "grad_norm": 2.1821234226226807, "learning_rate": 1.2140030446990097e-05, "loss": 0.3653, "step": 17495 }, { "epoch": 0.45, "grad_norm": 3.5638463497161865, "learning_rate": 1.2139219528678663e-05, "loss": 0.655, "step": 17496 }, { "epoch": 0.45, "grad_norm": 1.8279542922973633, "learning_rate": 1.213840859562507e-05, "loss": 0.6198, "step": 17497 }, { "epoch": 0.45, "grad_norm": 2.1556777954101562, "learning_rate": 1.2137597647834914e-05, "loss": 0.6313, "step": 17498 }, { "epoch": 0.45, "grad_norm": 1.348261833190918, "learning_rate": 1.2136786685313773e-05, "loss": 0.6452, "step": 17499 }, { "epoch": 0.45, "grad_norm": 2.030879259109497, "learning_rate": 1.2135975708067242e-05, "loss": 0.7377, "step": 17500 }, { "epoch": 0.45, "grad_norm": 2.088364839553833, "learning_rate": 1.2135164716100909e-05, "loss": 0.5567, "step": 17501 }, { "epoch": 0.45, "grad_norm": 1.1303991079330444, "learning_rate": 1.213435370942036e-05, "loss": 0.5485, "step": 17502 }, { "epoch": 0.45, "grad_norm": 1.5395461320877075, "learning_rate": 1.2133542688031188e-05, "loss": 0.628, "step": 17503 }, { "epoch": 0.45, "grad_norm": 1.5093154907226562, "learning_rate": 1.213273165193898e-05, "loss": 0.4534, "step": 17504 }, { "epoch": 0.45, "grad_norm": 1.493672251701355, "learning_rate": 1.2131920601149328e-05, "loss": 0.6195, "step": 17505 }, { "epoch": 0.45, "grad_norm": 1.9790421724319458, "learning_rate": 1.2131109535667813e-05, "loss": 0.5455, "step": 17506 }, { "epoch": 0.45, "grad_norm": 3.830094814300537, "learning_rate": 1.2130298455500034e-05, "loss": 0.6143, "step": 17507 }, { "epoch": 0.45, "grad_norm": 1.9619566202163696, "learning_rate": 1.2129487360651575e-05, "loss": 0.4418, "step": 17508 }, { "epoch": 0.45, "grad_norm": 1.3671255111694336, "learning_rate": 1.2128676251128027e-05, "loss": 0.5507, "step": 17509 }, { "epoch": 0.45, "grad_norm": 2.0324337482452393, "learning_rate": 1.212786512693498e-05, "loss": 0.7297, "step": 17510 }, { "epoch": 0.45, "grad_norm": 1.4276350736618042, "learning_rate": 1.2127053988078023e-05, "loss": 0.5743, "step": 17511 }, { "epoch": 0.45, "grad_norm": 1.4015629291534424, "learning_rate": 1.212624283456275e-05, "loss": 0.5399, "step": 17512 }, { "epoch": 0.45, "grad_norm": 8.050141334533691, "learning_rate": 1.212543166639474e-05, "loss": 0.4977, "step": 17513 }, { "epoch": 0.45, "grad_norm": 2.8399038314819336, "learning_rate": 1.2124620483579594e-05, "loss": 0.5914, "step": 17514 }, { "epoch": 0.45, "grad_norm": 1.1921125650405884, "learning_rate": 1.21238092861229e-05, "loss": 0.573, "step": 17515 }, { "epoch": 0.45, "grad_norm": 1.2201869487762451, "learning_rate": 1.2122998074030246e-05, "loss": 0.4175, "step": 17516 }, { "epoch": 0.45, "grad_norm": 1.7652246952056885, "learning_rate": 1.2122186847307222e-05, "loss": 0.5271, "step": 17517 }, { "epoch": 0.45, "grad_norm": 5.603321075439453, "learning_rate": 1.212137560595942e-05, "loss": 0.6069, "step": 17518 }, { "epoch": 0.45, "grad_norm": 3.129005193710327, "learning_rate": 1.2120564349992427e-05, "loss": 0.5587, "step": 17519 }, { "epoch": 0.45, "grad_norm": 1.1079522371292114, "learning_rate": 1.2119753079411843e-05, "loss": 0.5253, "step": 17520 }, { "epoch": 0.45, "grad_norm": 1.7691141366958618, "learning_rate": 1.2118941794223247e-05, "loss": 0.4995, "step": 17521 }, { "epoch": 0.45, "grad_norm": 1.2194974422454834, "learning_rate": 1.2118130494432241e-05, "loss": 0.4775, "step": 17522 }, { "epoch": 0.45, "grad_norm": 2.2663421630859375, "learning_rate": 1.2117319180044404e-05, "loss": 0.6837, "step": 17523 }, { "epoch": 0.45, "grad_norm": 3.8131580352783203, "learning_rate": 1.2116507851065338e-05, "loss": 0.5811, "step": 17524 }, { "epoch": 0.45, "grad_norm": 3.055122137069702, "learning_rate": 1.2115696507500628e-05, "loss": 0.5452, "step": 17525 }, { "epoch": 0.45, "grad_norm": 1.5519471168518066, "learning_rate": 1.2114885149355867e-05, "loss": 0.6992, "step": 17526 }, { "epoch": 0.45, "grad_norm": 3.3746984004974365, "learning_rate": 1.2114073776636646e-05, "loss": 0.5711, "step": 17527 }, { "epoch": 0.45, "grad_norm": 1.6158156394958496, "learning_rate": 1.2113262389348556e-05, "loss": 0.7156, "step": 17528 }, { "epoch": 0.45, "grad_norm": 1.5745513439178467, "learning_rate": 1.2112450987497193e-05, "loss": 0.5609, "step": 17529 }, { "epoch": 0.45, "grad_norm": 4.52299165725708, "learning_rate": 1.211163957108814e-05, "loss": 0.5645, "step": 17530 }, { "epoch": 0.45, "grad_norm": 5.89552116394043, "learning_rate": 1.2110828140126996e-05, "loss": 0.6388, "step": 17531 }, { "epoch": 0.45, "grad_norm": 1.374589204788208, "learning_rate": 1.2110016694619351e-05, "loss": 0.5254, "step": 17532 }, { "epoch": 0.45, "grad_norm": 1.0337976217269897, "learning_rate": 1.2109205234570795e-05, "loss": 0.4899, "step": 17533 }, { "epoch": 0.45, "grad_norm": 2.9724795818328857, "learning_rate": 1.2108393759986925e-05, "loss": 0.4079, "step": 17534 }, { "epoch": 0.45, "grad_norm": 1.9068489074707031, "learning_rate": 1.2107582270873327e-05, "loss": 0.699, "step": 17535 }, { "epoch": 0.45, "grad_norm": 2.175588607788086, "learning_rate": 1.2106770767235598e-05, "loss": 0.3965, "step": 17536 }, { "epoch": 0.45, "grad_norm": 3.283622980117798, "learning_rate": 1.2105959249079327e-05, "loss": 0.6982, "step": 17537 }, { "epoch": 0.45, "grad_norm": 6.092421054840088, "learning_rate": 1.2105147716410106e-05, "loss": 0.679, "step": 17538 }, { "epoch": 0.45, "grad_norm": 3.7917866706848145, "learning_rate": 1.2104336169233533e-05, "loss": 0.6436, "step": 17539 }, { "epoch": 0.45, "grad_norm": 9.3746919631958, "learning_rate": 1.2103524607555196e-05, "loss": 0.672, "step": 17540 }, { "epoch": 0.45, "grad_norm": 3.966094970703125, "learning_rate": 1.2102713031380688e-05, "loss": 0.7001, "step": 17541 }, { "epoch": 0.45, "grad_norm": 2.264829158782959, "learning_rate": 1.2101901440715604e-05, "loss": 0.7274, "step": 17542 }, { "epoch": 0.45, "grad_norm": 1.5237761735916138, "learning_rate": 1.2101089835565534e-05, "loss": 0.6096, "step": 17543 }, { "epoch": 0.45, "grad_norm": 2.2180159091949463, "learning_rate": 1.2100278215936076e-05, "loss": 0.6291, "step": 17544 }, { "epoch": 0.45, "grad_norm": 1.658153772354126, "learning_rate": 1.2099466581832817e-05, "loss": 0.6149, "step": 17545 }, { "epoch": 0.45, "grad_norm": 2.7358248233795166, "learning_rate": 1.2098654933261358e-05, "loss": 0.7127, "step": 17546 }, { "epoch": 0.45, "grad_norm": 3.2237226963043213, "learning_rate": 1.2097843270227284e-05, "loss": 0.5243, "step": 17547 }, { "epoch": 0.45, "grad_norm": 1.1732323169708252, "learning_rate": 1.209703159273619e-05, "loss": 0.6014, "step": 17548 }, { "epoch": 0.45, "grad_norm": 2.9844353199005127, "learning_rate": 1.2096219900793676e-05, "loss": 0.5002, "step": 17549 }, { "epoch": 0.45, "grad_norm": 1.1323750019073486, "learning_rate": 1.209540819440533e-05, "loss": 0.5498, "step": 17550 }, { "epoch": 0.45, "grad_norm": 1.9008890390396118, "learning_rate": 1.2094596473576748e-05, "loss": 0.5466, "step": 17551 }, { "epoch": 0.45, "grad_norm": 1.2764012813568115, "learning_rate": 1.2093784738313521e-05, "loss": 0.598, "step": 17552 }, { "epoch": 0.45, "grad_norm": 1.888081669807434, "learning_rate": 1.2092972988621248e-05, "loss": 0.7374, "step": 17553 }, { "epoch": 0.45, "grad_norm": 6.6505303382873535, "learning_rate": 1.2092161224505516e-05, "loss": 0.6632, "step": 17554 }, { "epoch": 0.45, "grad_norm": 6.109855651855469, "learning_rate": 1.2091349445971925e-05, "loss": 0.8445, "step": 17555 }, { "epoch": 0.45, "grad_norm": 3.403280258178711, "learning_rate": 1.2090537653026068e-05, "loss": 0.454, "step": 17556 }, { "epoch": 0.45, "grad_norm": 2.1767618656158447, "learning_rate": 1.208972584567354e-05, "loss": 0.7318, "step": 17557 }, { "epoch": 0.45, "grad_norm": 2.2384769916534424, "learning_rate": 1.2088914023919933e-05, "loss": 0.536, "step": 17558 }, { "epoch": 0.45, "grad_norm": 1.5112119913101196, "learning_rate": 1.2088102187770842e-05, "loss": 0.473, "step": 17559 }, { "epoch": 0.45, "grad_norm": 2.9959568977355957, "learning_rate": 1.2087290337231863e-05, "loss": 0.5592, "step": 17560 }, { "epoch": 0.45, "grad_norm": 1.9715898036956787, "learning_rate": 1.2086478472308594e-05, "loss": 0.7014, "step": 17561 }, { "epoch": 0.45, "grad_norm": 3.835667848587036, "learning_rate": 1.2085666593006622e-05, "loss": 0.5668, "step": 17562 }, { "epoch": 0.45, "grad_norm": 1.6890531778335571, "learning_rate": 1.2084854699331548e-05, "loss": 0.6167, "step": 17563 }, { "epoch": 0.45, "grad_norm": 1.08048415184021, "learning_rate": 1.2084042791288963e-05, "loss": 0.6018, "step": 17564 }, { "epoch": 0.45, "grad_norm": 1.0577017068862915, "learning_rate": 1.2083230868884468e-05, "loss": 0.5957, "step": 17565 }, { "epoch": 0.45, "grad_norm": 2.898385524749756, "learning_rate": 1.2082418932123652e-05, "loss": 0.5756, "step": 17566 }, { "epoch": 0.45, "grad_norm": 1.7898246049880981, "learning_rate": 1.2081606981012112e-05, "loss": 0.5319, "step": 17567 }, { "epoch": 0.45, "grad_norm": 1.7171820402145386, "learning_rate": 1.208079501555545e-05, "loss": 0.4078, "step": 17568 }, { "epoch": 0.45, "grad_norm": 1.2879655361175537, "learning_rate": 1.207998303575925e-05, "loss": 0.5814, "step": 17569 }, { "epoch": 0.45, "grad_norm": 3.827045440673828, "learning_rate": 1.2079171041629118e-05, "loss": 0.7438, "step": 17570 }, { "epoch": 0.45, "grad_norm": 2.015577793121338, "learning_rate": 1.2078359033170644e-05, "loss": 0.7752, "step": 17571 }, { "epoch": 0.45, "grad_norm": 3.677300214767456, "learning_rate": 1.2077547010389423e-05, "loss": 0.6923, "step": 17572 }, { "epoch": 0.45, "grad_norm": 3.7388393878936768, "learning_rate": 1.2076734973291055e-05, "loss": 0.5672, "step": 17573 }, { "epoch": 0.45, "grad_norm": 1.3191239833831787, "learning_rate": 1.2075922921881133e-05, "loss": 0.5058, "step": 17574 }, { "epoch": 0.45, "grad_norm": 1.2345079183578491, "learning_rate": 1.207511085616526e-05, "loss": 0.5421, "step": 17575 }, { "epoch": 0.45, "grad_norm": 2.3918731212615967, "learning_rate": 1.207429877614902e-05, "loss": 0.6246, "step": 17576 }, { "epoch": 0.45, "grad_norm": 3.698831796646118, "learning_rate": 1.207348668183802e-05, "loss": 0.6842, "step": 17577 }, { "epoch": 0.45, "grad_norm": 1.753461241722107, "learning_rate": 1.207267457323785e-05, "loss": 0.6721, "step": 17578 }, { "epoch": 0.45, "grad_norm": 1.0025968551635742, "learning_rate": 1.2071862450354111e-05, "loss": 0.519, "step": 17579 }, { "epoch": 0.45, "grad_norm": 2.9083688259124756, "learning_rate": 1.2071050313192396e-05, "loss": 0.6696, "step": 17580 }, { "epoch": 0.45, "grad_norm": 1.489710807800293, "learning_rate": 1.2070238161758303e-05, "loss": 0.4898, "step": 17581 }, { "epoch": 0.45, "grad_norm": 1.733089804649353, "learning_rate": 1.2069425996057434e-05, "loss": 0.5185, "step": 17582 }, { "epoch": 0.45, "grad_norm": 1.4813590049743652, "learning_rate": 1.2068613816095376e-05, "loss": 0.4086, "step": 17583 }, { "epoch": 0.45, "grad_norm": 1.7736395597457886, "learning_rate": 1.206780162187773e-05, "loss": 0.5179, "step": 17584 }, { "epoch": 0.45, "grad_norm": 6.044402122497559, "learning_rate": 1.20669894134101e-05, "loss": 0.5421, "step": 17585 }, { "epoch": 0.45, "grad_norm": 2.199504852294922, "learning_rate": 1.2066177190698074e-05, "loss": 0.7406, "step": 17586 }, { "epoch": 0.45, "grad_norm": 1.4421945810317993, "learning_rate": 1.2065364953747253e-05, "loss": 0.6747, "step": 17587 }, { "epoch": 0.45, "grad_norm": 3.9283337593078613, "learning_rate": 1.2064552702563235e-05, "loss": 0.8656, "step": 17588 }, { "epoch": 0.45, "grad_norm": 1.383339524269104, "learning_rate": 1.2063740437151616e-05, "loss": 0.6127, "step": 17589 }, { "epoch": 0.45, "grad_norm": 1.4903148412704468, "learning_rate": 1.2062928157517997e-05, "loss": 0.4334, "step": 17590 }, { "epoch": 0.45, "grad_norm": 3.9094583988189697, "learning_rate": 1.2062115863667972e-05, "loss": 0.7893, "step": 17591 }, { "epoch": 0.45, "grad_norm": 1.1822680234909058, "learning_rate": 1.2061303555607137e-05, "loss": 0.5063, "step": 17592 }, { "epoch": 0.45, "grad_norm": 1.1024365425109863, "learning_rate": 1.2060491233341097e-05, "loss": 0.6008, "step": 17593 }, { "epoch": 0.45, "grad_norm": 1.8666667938232422, "learning_rate": 1.2059678896875445e-05, "loss": 0.6023, "step": 17594 }, { "epoch": 0.45, "grad_norm": 1.7746286392211914, "learning_rate": 1.205886654621578e-05, "loss": 0.4953, "step": 17595 }, { "epoch": 0.45, "grad_norm": 2.901677370071411, "learning_rate": 1.2058054181367697e-05, "loss": 0.6203, "step": 17596 }, { "epoch": 0.45, "grad_norm": 6.684867858886719, "learning_rate": 1.2057241802336802e-05, "loss": 0.5035, "step": 17597 }, { "epoch": 0.45, "grad_norm": 1.830985188484192, "learning_rate": 1.2056429409128687e-05, "loss": 0.5656, "step": 17598 }, { "epoch": 0.45, "grad_norm": 1.841720461845398, "learning_rate": 1.2055617001748954e-05, "loss": 0.5325, "step": 17599 }, { "epoch": 0.45, "grad_norm": 2.3671510219573975, "learning_rate": 1.2054804580203196e-05, "loss": 0.56, "step": 17600 }, { "epoch": 0.45, "grad_norm": 1.4008373022079468, "learning_rate": 1.2053992144497019e-05, "loss": 0.5784, "step": 17601 }, { "epoch": 0.45, "grad_norm": 2.76888108253479, "learning_rate": 1.2053179694636019e-05, "loss": 0.7548, "step": 17602 }, { "epoch": 0.45, "grad_norm": 2.7933456897735596, "learning_rate": 1.2052367230625792e-05, "loss": 0.5577, "step": 17603 }, { "epoch": 0.45, "grad_norm": 4.137055397033691, "learning_rate": 1.2051554752471943e-05, "loss": 0.6585, "step": 17604 }, { "epoch": 0.45, "grad_norm": 2.4273998737335205, "learning_rate": 1.2050742260180066e-05, "loss": 0.639, "step": 17605 }, { "epoch": 0.45, "grad_norm": 5.610229015350342, "learning_rate": 1.2049929753755762e-05, "loss": 0.6116, "step": 17606 }, { "epoch": 0.45, "grad_norm": 3.621561288833618, "learning_rate": 1.2049117233204631e-05, "loss": 0.5647, "step": 17607 }, { "epoch": 0.45, "grad_norm": 8.523184776306152, "learning_rate": 1.204830469853227e-05, "loss": 0.483, "step": 17608 }, { "epoch": 0.45, "grad_norm": 1.6955231428146362, "learning_rate": 1.2047492149744279e-05, "loss": 0.4469, "step": 17609 }, { "epoch": 0.45, "grad_norm": 1.659441351890564, "learning_rate": 1.204667958684626e-05, "loss": 0.5415, "step": 17610 }, { "epoch": 0.45, "grad_norm": 2.2257769107818604, "learning_rate": 1.2045867009843808e-05, "loss": 0.7628, "step": 17611 }, { "epoch": 0.45, "grad_norm": 1.6144236326217651, "learning_rate": 1.204505441874253e-05, "loss": 0.5576, "step": 17612 }, { "epoch": 0.45, "grad_norm": 2.671600580215454, "learning_rate": 1.2044241813548018e-05, "loss": 0.7046, "step": 17613 }, { "epoch": 0.45, "grad_norm": 3.053039073944092, "learning_rate": 1.2043429194265882e-05, "loss": 0.6113, "step": 17614 }, { "epoch": 0.45, "grad_norm": 1.3815338611602783, "learning_rate": 1.204261656090171e-05, "loss": 0.4904, "step": 17615 }, { "epoch": 0.45, "grad_norm": 1.780409574508667, "learning_rate": 1.2041803913461108e-05, "loss": 0.6219, "step": 17616 }, { "epoch": 0.45, "grad_norm": 2.332376480102539, "learning_rate": 1.2040991251949678e-05, "loss": 0.5616, "step": 17617 }, { "epoch": 0.45, "grad_norm": 1.7135834693908691, "learning_rate": 1.2040178576373019e-05, "loss": 0.5386, "step": 17618 }, { "epoch": 0.45, "grad_norm": 1.6328811645507812, "learning_rate": 1.2039365886736732e-05, "loss": 0.676, "step": 17619 }, { "epoch": 0.45, "grad_norm": 2.858644723892212, "learning_rate": 1.2038553183046414e-05, "loss": 0.5555, "step": 17620 }, { "epoch": 0.45, "grad_norm": 7.1830363273620605, "learning_rate": 1.203774046530767e-05, "loss": 0.5335, "step": 17621 }, { "epoch": 0.45, "grad_norm": 9.788026809692383, "learning_rate": 1.2036927733526096e-05, "loss": 0.6938, "step": 17622 }, { "epoch": 0.45, "grad_norm": 1.0765342712402344, "learning_rate": 1.2036114987707296e-05, "loss": 0.3933, "step": 17623 }, { "epoch": 0.45, "grad_norm": 2.161569595336914, "learning_rate": 1.2035302227856876e-05, "loss": 0.6078, "step": 17624 }, { "epoch": 0.45, "grad_norm": 7.305125713348389, "learning_rate": 1.2034489453980427e-05, "loss": 0.6353, "step": 17625 }, { "epoch": 0.45, "grad_norm": 1.4637256860733032, "learning_rate": 1.2033676666083556e-05, "loss": 0.546, "step": 17626 }, { "epoch": 0.45, "grad_norm": 4.37380838394165, "learning_rate": 1.2032863864171863e-05, "loss": 0.5238, "step": 17627 }, { "epoch": 0.45, "grad_norm": 2.2167978286743164, "learning_rate": 1.203205104825095e-05, "loss": 0.6835, "step": 17628 }, { "epoch": 0.45, "grad_norm": 1.26824951171875, "learning_rate": 1.2031238218326418e-05, "loss": 0.6493, "step": 17629 }, { "epoch": 0.45, "grad_norm": 4.558446884155273, "learning_rate": 1.2030425374403866e-05, "loss": 0.8048, "step": 17630 }, { "epoch": 0.45, "grad_norm": 1.6943873167037964, "learning_rate": 1.2029612516488902e-05, "loss": 0.5472, "step": 17631 }, { "epoch": 0.45, "grad_norm": 1.3701971769332886, "learning_rate": 1.202879964458712e-05, "loss": 0.425, "step": 17632 }, { "epoch": 0.45, "grad_norm": 2.3877720832824707, "learning_rate": 1.2027986758704129e-05, "loss": 0.7325, "step": 17633 }, { "epoch": 0.45, "grad_norm": 3.131579875946045, "learning_rate": 1.2027173858845525e-05, "loss": 0.7536, "step": 17634 }, { "epoch": 0.45, "grad_norm": 1.7039260864257812, "learning_rate": 1.202636094501691e-05, "loss": 0.4571, "step": 17635 }, { "epoch": 0.45, "grad_norm": 2.5164339542388916, "learning_rate": 1.2025548017223893e-05, "loss": 0.5074, "step": 17636 }, { "epoch": 0.45, "grad_norm": 1.6821353435516357, "learning_rate": 1.202473507547207e-05, "loss": 0.5438, "step": 17637 }, { "epoch": 0.45, "grad_norm": 1.2673296928405762, "learning_rate": 1.2023922119767044e-05, "loss": 0.4799, "step": 17638 }, { "epoch": 0.45, "grad_norm": 1.8355499505996704, "learning_rate": 1.2023109150114421e-05, "loss": 0.4209, "step": 17639 }, { "epoch": 0.45, "grad_norm": 3.736347198486328, "learning_rate": 1.2022296166519795e-05, "loss": 0.5918, "step": 17640 }, { "epoch": 0.45, "grad_norm": 2.7115166187286377, "learning_rate": 1.202148316898878e-05, "loss": 0.8059, "step": 17641 }, { "epoch": 0.45, "grad_norm": 2.4895975589752197, "learning_rate": 1.202067015752697e-05, "loss": 0.5146, "step": 17642 }, { "epoch": 0.45, "grad_norm": 1.062934398651123, "learning_rate": 1.2019857132139975e-05, "loss": 0.5931, "step": 17643 }, { "epoch": 0.45, "grad_norm": 5.299081802368164, "learning_rate": 1.201904409283339e-05, "loss": 0.4279, "step": 17644 }, { "epoch": 0.45, "grad_norm": 6.454748153686523, "learning_rate": 1.2018231039612822e-05, "loss": 0.6962, "step": 17645 }, { "epoch": 0.45, "grad_norm": 1.5574911832809448, "learning_rate": 1.2017417972483874e-05, "loss": 0.6039, "step": 17646 }, { "epoch": 0.45, "grad_norm": 13.428184509277344, "learning_rate": 1.2016604891452147e-05, "loss": 0.6256, "step": 17647 }, { "epoch": 0.45, "grad_norm": 3.4004108905792236, "learning_rate": 1.201579179652325e-05, "loss": 0.6484, "step": 17648 }, { "epoch": 0.45, "grad_norm": 1.5326361656188965, "learning_rate": 1.2014978687702778e-05, "loss": 0.5772, "step": 17649 }, { "epoch": 0.45, "grad_norm": 5.038146495819092, "learning_rate": 1.2014165564996343e-05, "loss": 0.6123, "step": 17650 }, { "epoch": 0.45, "grad_norm": 5.007472991943359, "learning_rate": 1.2013352428409542e-05, "loss": 0.6643, "step": 17651 }, { "epoch": 0.45, "grad_norm": 1.0443837642669678, "learning_rate": 1.2012539277947979e-05, "loss": 0.5771, "step": 17652 }, { "epoch": 0.45, "grad_norm": 1.0091849565505981, "learning_rate": 1.2011726113617264e-05, "loss": 0.605, "step": 17653 }, { "epoch": 0.45, "grad_norm": 6.8849639892578125, "learning_rate": 1.2010912935422994e-05, "loss": 0.5898, "step": 17654 }, { "epoch": 0.45, "grad_norm": 1.5244636535644531, "learning_rate": 1.2010099743370776e-05, "loss": 0.5192, "step": 17655 }, { "epoch": 0.45, "grad_norm": 1.1729967594146729, "learning_rate": 1.2009286537466212e-05, "loss": 0.472, "step": 17656 }, { "epoch": 0.45, "grad_norm": 1.408828854560852, "learning_rate": 1.2008473317714911e-05, "loss": 0.5459, "step": 17657 }, { "epoch": 0.45, "grad_norm": 0.9613147974014282, "learning_rate": 1.200766008412247e-05, "loss": 0.4539, "step": 17658 }, { "epoch": 0.45, "grad_norm": 1.8575787544250488, "learning_rate": 1.2006846836694498e-05, "loss": 0.4863, "step": 17659 }, { "epoch": 0.45, "grad_norm": 1.4381462335586548, "learning_rate": 1.20060335754366e-05, "loss": 0.5251, "step": 17660 }, { "epoch": 0.45, "grad_norm": 2.3656563758850098, "learning_rate": 1.2005220300354376e-05, "loss": 0.7355, "step": 17661 }, { "epoch": 0.45, "grad_norm": 3.175527572631836, "learning_rate": 1.2004407011453438e-05, "loss": 0.6499, "step": 17662 }, { "epoch": 0.45, "grad_norm": 1.963624358177185, "learning_rate": 1.2003593708739383e-05, "loss": 0.6175, "step": 17663 }, { "epoch": 0.45, "grad_norm": 1.7959082126617432, "learning_rate": 1.2002780392217818e-05, "loss": 0.5606, "step": 17664 }, { "epoch": 0.45, "grad_norm": 3.308835506439209, "learning_rate": 1.200196706189435e-05, "loss": 0.6451, "step": 17665 }, { "epoch": 0.45, "grad_norm": 1.6643229722976685, "learning_rate": 1.2001153717774585e-05, "loss": 0.4681, "step": 17666 }, { "epoch": 0.45, "grad_norm": 1.7254210710525513, "learning_rate": 1.2000340359864125e-05, "loss": 0.64, "step": 17667 }, { "epoch": 0.45, "grad_norm": 4.82436466217041, "learning_rate": 1.1999526988168572e-05, "loss": 0.5794, "step": 17668 }, { "epoch": 0.45, "grad_norm": 3.4076108932495117, "learning_rate": 1.1998713602693539e-05, "loss": 0.5598, "step": 17669 }, { "epoch": 0.45, "grad_norm": 2.2007861137390137, "learning_rate": 1.1997900203444628e-05, "loss": 0.5242, "step": 17670 }, { "epoch": 0.45, "grad_norm": 1.3211694955825806, "learning_rate": 1.1997086790427442e-05, "loss": 0.29, "step": 17671 }, { "epoch": 0.45, "grad_norm": 2.0716049671173096, "learning_rate": 1.199627336364759e-05, "loss": 0.6003, "step": 17672 }, { "epoch": 0.45, "grad_norm": 3.098906993865967, "learning_rate": 1.1995459923110675e-05, "loss": 0.6388, "step": 17673 }, { "epoch": 0.45, "grad_norm": 1.7194746732711792, "learning_rate": 1.1994646468822304e-05, "loss": 0.5935, "step": 17674 }, { "epoch": 0.45, "grad_norm": 6.592508316040039, "learning_rate": 1.1993833000788082e-05, "loss": 0.5655, "step": 17675 }, { "epoch": 0.45, "grad_norm": 2.3287861347198486, "learning_rate": 1.1993019519013616e-05, "loss": 0.594, "step": 17676 }, { "epoch": 0.45, "grad_norm": 4.041775703430176, "learning_rate": 1.1992206023504514e-05, "loss": 0.544, "step": 17677 }, { "epoch": 0.45, "grad_norm": 5.4213175773620605, "learning_rate": 1.1991392514266377e-05, "loss": 0.4164, "step": 17678 }, { "epoch": 0.45, "grad_norm": 1.6240603923797607, "learning_rate": 1.1990578991304815e-05, "loss": 0.5951, "step": 17679 }, { "epoch": 0.45, "grad_norm": 5.804792881011963, "learning_rate": 1.1989765454625433e-05, "loss": 0.6229, "step": 17680 }, { "epoch": 0.45, "grad_norm": 12.633130073547363, "learning_rate": 1.1988951904233837e-05, "loss": 0.6756, "step": 17681 }, { "epoch": 0.45, "grad_norm": 2.2293736934661865, "learning_rate": 1.1988138340135635e-05, "loss": 0.6803, "step": 17682 }, { "epoch": 0.45, "grad_norm": 1.3966577053070068, "learning_rate": 1.1987324762336432e-05, "loss": 0.4061, "step": 17683 }, { "epoch": 0.45, "grad_norm": 1.509594440460205, "learning_rate": 1.1986511170841837e-05, "loss": 0.608, "step": 17684 }, { "epoch": 0.45, "grad_norm": 1.1785634756088257, "learning_rate": 1.1985697565657453e-05, "loss": 0.5694, "step": 17685 }, { "epoch": 0.45, "grad_norm": 1.6966993808746338, "learning_rate": 1.198488394678889e-05, "loss": 0.477, "step": 17686 }, { "epoch": 0.45, "grad_norm": 1.2203856706619263, "learning_rate": 1.1984070314241753e-05, "loss": 0.5431, "step": 17687 }, { "epoch": 0.45, "grad_norm": 2.8888416290283203, "learning_rate": 1.198325666802165e-05, "loss": 0.5926, "step": 17688 }, { "epoch": 0.45, "grad_norm": 5.014795303344727, "learning_rate": 1.198244300813419e-05, "loss": 0.6209, "step": 17689 }, { "epoch": 0.45, "grad_norm": 1.036415457725525, "learning_rate": 1.1981629334584975e-05, "loss": 0.4335, "step": 17690 }, { "epoch": 0.45, "grad_norm": 1.6301031112670898, "learning_rate": 1.198081564737962e-05, "loss": 0.6814, "step": 17691 }, { "epoch": 0.45, "grad_norm": 1.6798194646835327, "learning_rate": 1.1980001946523723e-05, "loss": 0.5035, "step": 17692 }, { "epoch": 0.45, "grad_norm": 1.2288764715194702, "learning_rate": 1.1979188232022898e-05, "loss": 0.6342, "step": 17693 }, { "epoch": 0.45, "grad_norm": 1.2368297576904297, "learning_rate": 1.1978374503882752e-05, "loss": 0.5296, "step": 17694 }, { "epoch": 0.45, "grad_norm": 3.934255599975586, "learning_rate": 1.1977560762108893e-05, "loss": 0.5633, "step": 17695 }, { "epoch": 0.45, "grad_norm": 12.142624855041504, "learning_rate": 1.1976747006706927e-05, "loss": 0.7758, "step": 17696 }, { "epoch": 0.45, "grad_norm": 1.4198558330535889, "learning_rate": 1.1975933237682461e-05, "loss": 0.5017, "step": 17697 }, { "epoch": 0.45, "grad_norm": 3.0041534900665283, "learning_rate": 1.1975119455041107e-05, "loss": 0.6282, "step": 17698 }, { "epoch": 0.45, "grad_norm": 4.728216648101807, "learning_rate": 1.1974305658788467e-05, "loss": 0.4544, "step": 17699 }, { "epoch": 0.45, "grad_norm": 4.878019332885742, "learning_rate": 1.1973491848930157e-05, "loss": 0.654, "step": 17700 }, { "epoch": 0.45, "grad_norm": 3.1937239170074463, "learning_rate": 1.1972678025471778e-05, "loss": 0.6169, "step": 17701 }, { "epoch": 0.45, "grad_norm": 4.800282955169678, "learning_rate": 1.1971864188418943e-05, "loss": 0.4058, "step": 17702 }, { "epoch": 0.45, "grad_norm": 3.1314027309417725, "learning_rate": 1.1971050337777259e-05, "loss": 0.5333, "step": 17703 }, { "epoch": 0.45, "grad_norm": 1.236596941947937, "learning_rate": 1.1970236473552332e-05, "loss": 0.3395, "step": 17704 }, { "epoch": 0.45, "grad_norm": 1.705036997795105, "learning_rate": 1.1969422595749776e-05, "loss": 0.6064, "step": 17705 }, { "epoch": 0.45, "grad_norm": 6.031052112579346, "learning_rate": 1.1968608704375195e-05, "loss": 0.6906, "step": 17706 }, { "epoch": 0.45, "grad_norm": 1.9168181419372559, "learning_rate": 1.19677947994342e-05, "loss": 0.5538, "step": 17707 }, { "epoch": 0.45, "grad_norm": 3.13973331451416, "learning_rate": 1.19669808809324e-05, "loss": 0.6675, "step": 17708 }, { "epoch": 0.45, "grad_norm": 1.5157488584518433, "learning_rate": 1.1966166948875402e-05, "loss": 0.5055, "step": 17709 }, { "epoch": 0.45, "grad_norm": 1.2018163204193115, "learning_rate": 1.1965353003268817e-05, "loss": 0.4777, "step": 17710 }, { "epoch": 0.45, "grad_norm": 2.3348636627197266, "learning_rate": 1.1964539044118257e-05, "loss": 0.4972, "step": 17711 }, { "epoch": 0.45, "grad_norm": 1.969760537147522, "learning_rate": 1.1963725071429323e-05, "loss": 0.6613, "step": 17712 }, { "epoch": 0.45, "grad_norm": 1.2196612358093262, "learning_rate": 1.1962911085207635e-05, "loss": 0.5883, "step": 17713 }, { "epoch": 0.45, "grad_norm": 2.1462857723236084, "learning_rate": 1.1962097085458794e-05, "loss": 0.637, "step": 17714 }, { "epoch": 0.45, "grad_norm": 3.7737035751342773, "learning_rate": 1.1961283072188414e-05, "loss": 0.61, "step": 17715 }, { "epoch": 0.45, "grad_norm": 2.866804599761963, "learning_rate": 1.1960469045402101e-05, "loss": 0.6313, "step": 17716 }, { "epoch": 0.45, "grad_norm": 1.0843721628189087, "learning_rate": 1.1959655005105468e-05, "loss": 0.5168, "step": 17717 }, { "epoch": 0.45, "grad_norm": 1.9898784160614014, "learning_rate": 1.1958840951304125e-05, "loss": 0.4861, "step": 17718 }, { "epoch": 0.45, "grad_norm": 7.245439052581787, "learning_rate": 1.195802688400368e-05, "loss": 0.6744, "step": 17719 }, { "epoch": 0.45, "grad_norm": 2.0553793907165527, "learning_rate": 1.1957212803209745e-05, "loss": 0.5722, "step": 17720 }, { "epoch": 0.45, "grad_norm": 3.009223699569702, "learning_rate": 1.1956398708927928e-05, "loss": 0.5938, "step": 17721 }, { "epoch": 0.45, "grad_norm": 2.629136562347412, "learning_rate": 1.1955584601163839e-05, "loss": 0.6714, "step": 17722 }, { "epoch": 0.45, "grad_norm": 1.630683422088623, "learning_rate": 1.195477047992309e-05, "loss": 0.4915, "step": 17723 }, { "epoch": 0.45, "grad_norm": 4.848593711853027, "learning_rate": 1.195395634521129e-05, "loss": 0.6059, "step": 17724 }, { "epoch": 0.45, "grad_norm": 1.3513113260269165, "learning_rate": 1.1953142197034055e-05, "loss": 0.5651, "step": 17725 }, { "epoch": 0.45, "grad_norm": 1.6698321104049683, "learning_rate": 1.1952328035396987e-05, "loss": 0.5588, "step": 17726 }, { "epoch": 0.45, "grad_norm": 1.2477729320526123, "learning_rate": 1.1951513860305704e-05, "loss": 0.636, "step": 17727 }, { "epoch": 0.45, "grad_norm": 2.775272846221924, "learning_rate": 1.1950699671765813e-05, "loss": 0.4841, "step": 17728 }, { "epoch": 0.45, "grad_norm": 1.8673312664031982, "learning_rate": 1.1949885469782922e-05, "loss": 0.5771, "step": 17729 }, { "epoch": 0.45, "grad_norm": 3.0510778427124023, "learning_rate": 1.194907125436265e-05, "loss": 0.6315, "step": 17730 }, { "epoch": 0.45, "grad_norm": 1.4879745244979858, "learning_rate": 1.1948257025510599e-05, "loss": 0.5876, "step": 17731 }, { "epoch": 0.45, "grad_norm": 3.180593252182007, "learning_rate": 1.1947442783232386e-05, "loss": 0.5469, "step": 17732 }, { "epoch": 0.45, "grad_norm": 1.5831847190856934, "learning_rate": 1.194662852753362e-05, "loss": 0.479, "step": 17733 }, { "epoch": 0.45, "grad_norm": 1.8084059953689575, "learning_rate": 1.1945814258419915e-05, "loss": 0.6372, "step": 17734 }, { "epoch": 0.45, "grad_norm": 3.0980610847473145, "learning_rate": 1.1944999975896881e-05, "loss": 0.706, "step": 17735 }, { "epoch": 0.45, "grad_norm": 3.0250964164733887, "learning_rate": 1.1944185679970129e-05, "loss": 0.5184, "step": 17736 }, { "epoch": 0.45, "grad_norm": 1.5356814861297607, "learning_rate": 1.1943371370645267e-05, "loss": 0.4762, "step": 17737 }, { "epoch": 0.45, "grad_norm": 1.305873155593872, "learning_rate": 1.1942557047927915e-05, "loss": 0.6351, "step": 17738 }, { "epoch": 0.45, "grad_norm": 1.1895793676376343, "learning_rate": 1.1941742711823676e-05, "loss": 0.4153, "step": 17739 }, { "epoch": 0.45, "grad_norm": 2.313321352005005, "learning_rate": 1.1940928362338172e-05, "loss": 0.6988, "step": 17740 }, { "epoch": 0.45, "grad_norm": 3.397651433944702, "learning_rate": 1.1940113999477004e-05, "loss": 0.6602, "step": 17741 }, { "epoch": 0.45, "grad_norm": 0.9577383399009705, "learning_rate": 1.1939299623245792e-05, "loss": 0.4805, "step": 17742 }, { "epoch": 0.45, "grad_norm": 1.3232687711715698, "learning_rate": 1.1938485233650144e-05, "loss": 0.5988, "step": 17743 }, { "epoch": 0.45, "grad_norm": 1.7351001501083374, "learning_rate": 1.1937670830695673e-05, "loss": 0.5537, "step": 17744 }, { "epoch": 0.45, "grad_norm": 1.8099794387817383, "learning_rate": 1.1936856414387993e-05, "loss": 0.5621, "step": 17745 }, { "epoch": 0.45, "grad_norm": 1.1031135320663452, "learning_rate": 1.1936041984732716e-05, "loss": 0.5173, "step": 17746 }, { "epoch": 0.45, "grad_norm": 1.1912517547607422, "learning_rate": 1.1935227541735455e-05, "loss": 0.5272, "step": 17747 }, { "epoch": 0.45, "grad_norm": 1.8616454601287842, "learning_rate": 1.1934413085401818e-05, "loss": 0.4659, "step": 17748 }, { "epoch": 0.45, "grad_norm": 1.5134755373001099, "learning_rate": 1.1933598615737423e-05, "loss": 0.6607, "step": 17749 }, { "epoch": 0.45, "grad_norm": 1.903611660003662, "learning_rate": 1.1932784132747883e-05, "loss": 0.6012, "step": 17750 }, { "epoch": 0.45, "grad_norm": 2.208348035812378, "learning_rate": 1.1931969636438807e-05, "loss": 0.6066, "step": 17751 }, { "epoch": 0.45, "grad_norm": 2.131432056427002, "learning_rate": 1.1931155126815812e-05, "loss": 0.2943, "step": 17752 }, { "epoch": 0.46, "grad_norm": 3.35199236869812, "learning_rate": 1.1930340603884507e-05, "loss": 0.5592, "step": 17753 }, { "epoch": 0.46, "grad_norm": 1.3328138589859009, "learning_rate": 1.192952606765051e-05, "loss": 0.4389, "step": 17754 }, { "epoch": 0.46, "grad_norm": 2.2675530910491943, "learning_rate": 1.1928711518119429e-05, "loss": 0.5072, "step": 17755 }, { "epoch": 0.46, "grad_norm": 1.4243125915527344, "learning_rate": 1.1927896955296879e-05, "loss": 0.5346, "step": 17756 }, { "epoch": 0.46, "grad_norm": 3.3491809368133545, "learning_rate": 1.1927082379188477e-05, "loss": 0.6106, "step": 17757 }, { "epoch": 0.46, "grad_norm": 1.7430354356765747, "learning_rate": 1.1926267789799833e-05, "loss": 0.5372, "step": 17758 }, { "epoch": 0.46, "grad_norm": 7.060868263244629, "learning_rate": 1.1925453187136562e-05, "loss": 0.6691, "step": 17759 }, { "epoch": 0.46, "grad_norm": 1.8718119859695435, "learning_rate": 1.1924638571204276e-05, "loss": 0.6553, "step": 17760 }, { "epoch": 0.46, "grad_norm": 1.8598562479019165, "learning_rate": 1.192382394200859e-05, "loss": 0.5197, "step": 17761 }, { "epoch": 0.46, "grad_norm": 1.1114497184753418, "learning_rate": 1.1923009299555122e-05, "loss": 0.5545, "step": 17762 }, { "epoch": 0.46, "grad_norm": 1.9928441047668457, "learning_rate": 1.1922194643849477e-05, "loss": 0.6067, "step": 17763 }, { "epoch": 0.46, "grad_norm": 1.2809851169586182, "learning_rate": 1.1921379974897278e-05, "loss": 0.503, "step": 17764 }, { "epoch": 0.46, "grad_norm": 1.5189554691314697, "learning_rate": 1.1920565292704132e-05, "loss": 0.5765, "step": 17765 }, { "epoch": 0.46, "grad_norm": 1.503542184829712, "learning_rate": 1.191975059727566e-05, "loss": 0.6433, "step": 17766 }, { "epoch": 0.46, "grad_norm": 1.8746370077133179, "learning_rate": 1.1918935888617472e-05, "loss": 0.6843, "step": 17767 }, { "epoch": 0.46, "grad_norm": 1.8291250467300415, "learning_rate": 1.191812116673518e-05, "loss": 0.5798, "step": 17768 }, { "epoch": 0.46, "grad_norm": 1.4795763492584229, "learning_rate": 1.1917306431634405e-05, "loss": 0.5274, "step": 17769 }, { "epoch": 0.46, "grad_norm": 1.167778491973877, "learning_rate": 1.191649168332076e-05, "loss": 0.4684, "step": 17770 }, { "epoch": 0.46, "grad_norm": 3.726303815841675, "learning_rate": 1.1915676921799857e-05, "loss": 0.5533, "step": 17771 }, { "epoch": 0.46, "grad_norm": 1.1061546802520752, "learning_rate": 1.1914862147077311e-05, "loss": 0.4167, "step": 17772 }, { "epoch": 0.46, "grad_norm": 1.5151814222335815, "learning_rate": 1.1914047359158738e-05, "loss": 0.6035, "step": 17773 }, { "epoch": 0.46, "grad_norm": 1.8308942317962646, "learning_rate": 1.1913232558049757e-05, "loss": 0.4028, "step": 17774 }, { "epoch": 0.46, "grad_norm": 1.5455431938171387, "learning_rate": 1.1912417743755973e-05, "loss": 0.4562, "step": 17775 }, { "epoch": 0.46, "grad_norm": 2.1082146167755127, "learning_rate": 1.1911602916283014e-05, "loss": 0.5265, "step": 17776 }, { "epoch": 0.46, "grad_norm": 1.4516291618347168, "learning_rate": 1.1910788075636483e-05, "loss": 0.5278, "step": 17777 }, { "epoch": 0.46, "grad_norm": 1.3280720710754395, "learning_rate": 1.1909973221822004e-05, "loss": 0.5505, "step": 17778 }, { "epoch": 0.46, "grad_norm": 3.1853201389312744, "learning_rate": 1.190915835484519e-05, "loss": 0.6279, "step": 17779 }, { "epoch": 0.46, "grad_norm": 1.587286353111267, "learning_rate": 1.1908343474711655e-05, "loss": 0.5656, "step": 17780 }, { "epoch": 0.46, "grad_norm": 1.4475055932998657, "learning_rate": 1.1907528581427017e-05, "loss": 0.6478, "step": 17781 }, { "epoch": 0.46, "grad_norm": 1.8040766716003418, "learning_rate": 1.1906713674996888e-05, "loss": 0.5101, "step": 17782 }, { "epoch": 0.46, "grad_norm": 6.902963638305664, "learning_rate": 1.1905898755426888e-05, "loss": 0.6195, "step": 17783 }, { "epoch": 0.46, "grad_norm": 1.3784115314483643, "learning_rate": 1.1905083822722631e-05, "loss": 0.6121, "step": 17784 }, { "epoch": 0.46, "grad_norm": 2.0898067951202393, "learning_rate": 1.190426887688973e-05, "loss": 0.7334, "step": 17785 }, { "epoch": 0.46, "grad_norm": 4.071809768676758, "learning_rate": 1.190345391793381e-05, "loss": 0.6726, "step": 17786 }, { "epoch": 0.46, "grad_norm": 1.5968060493469238, "learning_rate": 1.1902638945860478e-05, "loss": 0.6188, "step": 17787 }, { "epoch": 0.46, "grad_norm": 1.2274245023727417, "learning_rate": 1.1901823960675356e-05, "loss": 0.523, "step": 17788 }, { "epoch": 0.46, "grad_norm": 8.144371032714844, "learning_rate": 1.1901008962384056e-05, "loss": 0.6769, "step": 17789 }, { "epoch": 0.46, "grad_norm": 1.4911741018295288, "learning_rate": 1.1900193950992195e-05, "loss": 0.3793, "step": 17790 }, { "epoch": 0.46, "grad_norm": 1.3795804977416992, "learning_rate": 1.1899378926505393e-05, "loss": 0.6383, "step": 17791 }, { "epoch": 0.46, "grad_norm": 1.4632148742675781, "learning_rate": 1.1898563888929267e-05, "loss": 0.5176, "step": 17792 }, { "epoch": 0.46, "grad_norm": 1.8476910591125488, "learning_rate": 1.189774883826943e-05, "loss": 0.6715, "step": 17793 }, { "epoch": 0.46, "grad_norm": 4.143965244293213, "learning_rate": 1.1896933774531499e-05, "loss": 0.7534, "step": 17794 }, { "epoch": 0.46, "grad_norm": 2.0277929306030273, "learning_rate": 1.1896118697721093e-05, "loss": 0.6054, "step": 17795 }, { "epoch": 0.46, "grad_norm": 6.505453109741211, "learning_rate": 1.1895303607843829e-05, "loss": 0.6529, "step": 17796 }, { "epoch": 0.46, "grad_norm": 1.5312119722366333, "learning_rate": 1.189448850490532e-05, "loss": 0.4672, "step": 17797 }, { "epoch": 0.46, "grad_norm": 1.8485230207443237, "learning_rate": 1.189367338891119e-05, "loss": 0.6752, "step": 17798 }, { "epoch": 0.46, "grad_norm": 1.729090929031372, "learning_rate": 1.1892858259867051e-05, "loss": 0.5464, "step": 17799 }, { "epoch": 0.46, "grad_norm": 1.295793056488037, "learning_rate": 1.1892043117778525e-05, "loss": 0.4418, "step": 17800 }, { "epoch": 0.46, "grad_norm": 3.649008274078369, "learning_rate": 1.1891227962651221e-05, "loss": 0.6167, "step": 17801 }, { "epoch": 0.46, "grad_norm": 1.5236718654632568, "learning_rate": 1.1890412794490767e-05, "loss": 0.4088, "step": 17802 }, { "epoch": 0.46, "grad_norm": 5.5560150146484375, "learning_rate": 1.1889597613302775e-05, "loss": 0.7211, "step": 17803 }, { "epoch": 0.46, "grad_norm": 1.3305091857910156, "learning_rate": 1.1888782419092862e-05, "loss": 0.4637, "step": 17804 }, { "epoch": 0.46, "grad_norm": 1.477181077003479, "learning_rate": 1.1887967211866647e-05, "loss": 0.4753, "step": 17805 }, { "epoch": 0.46, "grad_norm": 2.4656007289886475, "learning_rate": 1.1887151991629748e-05, "loss": 0.5045, "step": 17806 }, { "epoch": 0.46, "grad_norm": 3.28825306892395, "learning_rate": 1.1886336758387787e-05, "loss": 0.6197, "step": 17807 }, { "epoch": 0.46, "grad_norm": 2.7619240283966064, "learning_rate": 1.1885521512146372e-05, "loss": 0.8786, "step": 17808 }, { "epoch": 0.46, "grad_norm": 1.217929482460022, "learning_rate": 1.1884706252911129e-05, "loss": 0.6463, "step": 17809 }, { "epoch": 0.46, "grad_norm": 2.3512072563171387, "learning_rate": 1.1883890980687678e-05, "loss": 0.5962, "step": 17810 }, { "epoch": 0.46, "grad_norm": 1.5511318445205688, "learning_rate": 1.1883075695481632e-05, "loss": 0.5827, "step": 17811 }, { "epoch": 0.46, "grad_norm": 1.1749521493911743, "learning_rate": 1.188226039729861e-05, "loss": 0.5106, "step": 17812 }, { "epoch": 0.46, "grad_norm": 2.806628704071045, "learning_rate": 1.1881445086144234e-05, "loss": 0.6965, "step": 17813 }, { "epoch": 0.46, "grad_norm": 1.0511364936828613, "learning_rate": 1.1880629762024118e-05, "loss": 0.5615, "step": 17814 }, { "epoch": 0.46, "grad_norm": 3.3537521362304688, "learning_rate": 1.1879814424943888e-05, "loss": 0.5055, "step": 17815 }, { "epoch": 0.46, "grad_norm": 2.5372860431671143, "learning_rate": 1.1878999074909153e-05, "loss": 0.723, "step": 17816 }, { "epoch": 0.46, "grad_norm": 1.4770267009735107, "learning_rate": 1.187818371192554e-05, "loss": 0.6478, "step": 17817 }, { "epoch": 0.46, "grad_norm": 1.695135474205017, "learning_rate": 1.1877368335998663e-05, "loss": 0.6416, "step": 17818 }, { "epoch": 0.46, "grad_norm": 2.2737627029418945, "learning_rate": 1.1876552947134143e-05, "loss": 0.6056, "step": 17819 }, { "epoch": 0.46, "grad_norm": 1.044570803642273, "learning_rate": 1.1875737545337601e-05, "loss": 0.5128, "step": 17820 }, { "epoch": 0.46, "grad_norm": 3.055492639541626, "learning_rate": 1.187492213061465e-05, "loss": 0.5377, "step": 17821 }, { "epoch": 0.46, "grad_norm": 1.89043390750885, "learning_rate": 1.1874106702970919e-05, "loss": 0.5772, "step": 17822 }, { "epoch": 0.46, "grad_norm": 6.4701690673828125, "learning_rate": 1.1873291262412017e-05, "loss": 0.5593, "step": 17823 }, { "epoch": 0.46, "grad_norm": 4.8763651847839355, "learning_rate": 1.1872475808943573e-05, "loss": 0.6007, "step": 17824 }, { "epoch": 0.46, "grad_norm": 2.6184606552124023, "learning_rate": 1.1871660342571199e-05, "loss": 0.6093, "step": 17825 }, { "epoch": 0.46, "grad_norm": 3.007477283477783, "learning_rate": 1.1870844863300519e-05, "loss": 0.5664, "step": 17826 }, { "epoch": 0.46, "grad_norm": 1.2040187120437622, "learning_rate": 1.1870029371137153e-05, "loss": 0.473, "step": 17827 }, { "epoch": 0.46, "grad_norm": 1.193939447402954, "learning_rate": 1.1869213866086715e-05, "loss": 0.5635, "step": 17828 }, { "epoch": 0.46, "grad_norm": 5.0811052322387695, "learning_rate": 1.1868398348154835e-05, "loss": 0.5716, "step": 17829 }, { "epoch": 0.46, "grad_norm": 1.56045663356781, "learning_rate": 1.1867582817347123e-05, "loss": 0.5598, "step": 17830 }, { "epoch": 0.46, "grad_norm": 5.703890323638916, "learning_rate": 1.1866767273669204e-05, "loss": 0.5519, "step": 17831 }, { "epoch": 0.46, "grad_norm": 1.821593999862671, "learning_rate": 1.18659517171267e-05, "loss": 0.6924, "step": 17832 }, { "epoch": 0.46, "grad_norm": 3.3522162437438965, "learning_rate": 1.1865136147725229e-05, "loss": 0.4658, "step": 17833 }, { "epoch": 0.46, "grad_norm": 3.0926358699798584, "learning_rate": 1.186432056547041e-05, "loss": 0.6483, "step": 17834 }, { "epoch": 0.46, "grad_norm": 3.443427801132202, "learning_rate": 1.1863504970367865e-05, "loss": 0.6532, "step": 17835 }, { "epoch": 0.46, "grad_norm": 1.9728492498397827, "learning_rate": 1.1862689362423217e-05, "loss": 0.66, "step": 17836 }, { "epoch": 0.46, "grad_norm": 1.9133853912353516, "learning_rate": 1.1861873741642083e-05, "loss": 0.4644, "step": 17837 }, { "epoch": 0.46, "grad_norm": 1.3755009174346924, "learning_rate": 1.1861058108030081e-05, "loss": 0.3945, "step": 17838 }, { "epoch": 0.46, "grad_norm": 3.1119790077209473, "learning_rate": 1.1860242461592841e-05, "loss": 0.5377, "step": 17839 }, { "epoch": 0.46, "grad_norm": 2.123953104019165, "learning_rate": 1.1859426802335976e-05, "loss": 0.5247, "step": 17840 }, { "epoch": 0.46, "grad_norm": 2.305036783218384, "learning_rate": 1.1858611130265113e-05, "loss": 0.5187, "step": 17841 }, { "epoch": 0.46, "grad_norm": 1.244546890258789, "learning_rate": 1.1857795445385865e-05, "loss": 0.5531, "step": 17842 }, { "epoch": 0.46, "grad_norm": 2.2993619441986084, "learning_rate": 1.1856979747703858e-05, "loss": 0.6566, "step": 17843 }, { "epoch": 0.46, "grad_norm": 2.2083590030670166, "learning_rate": 1.1856164037224718e-05, "loss": 0.6775, "step": 17844 }, { "epoch": 0.46, "grad_norm": 1.5161336660385132, "learning_rate": 1.1855348313954059e-05, "loss": 0.7531, "step": 17845 }, { "epoch": 0.46, "grad_norm": 1.6067613363265991, "learning_rate": 1.1854532577897504e-05, "loss": 0.5713, "step": 17846 }, { "epoch": 0.46, "grad_norm": 7.197810173034668, "learning_rate": 1.1853716829060675e-05, "loss": 0.5779, "step": 17847 }, { "epoch": 0.46, "grad_norm": 1.2827436923980713, "learning_rate": 1.1852901067449199e-05, "loss": 0.6269, "step": 17848 }, { "epoch": 0.46, "grad_norm": 2.2435667514801025, "learning_rate": 1.1852085293068689e-05, "loss": 0.6168, "step": 17849 }, { "epoch": 0.46, "grad_norm": 2.325796365737915, "learning_rate": 1.1851269505924771e-05, "loss": 0.5479, "step": 17850 }, { "epoch": 0.46, "grad_norm": 2.4887282848358154, "learning_rate": 1.1850453706023069e-05, "loss": 0.6385, "step": 17851 }, { "epoch": 0.46, "grad_norm": 2.9849588871002197, "learning_rate": 1.1849637893369199e-05, "loss": 0.4687, "step": 17852 }, { "epoch": 0.46, "grad_norm": 1.8931854963302612, "learning_rate": 1.1848822067968788e-05, "loss": 0.6012, "step": 17853 }, { "epoch": 0.46, "grad_norm": 1.5796144008636475, "learning_rate": 1.184800622982746e-05, "loss": 0.4532, "step": 17854 }, { "epoch": 0.46, "grad_norm": 13.661648750305176, "learning_rate": 1.184719037895083e-05, "loss": 0.7899, "step": 17855 }, { "epoch": 0.46, "grad_norm": 2.46246075630188, "learning_rate": 1.1846374515344528e-05, "loss": 0.5695, "step": 17856 }, { "epoch": 0.46, "grad_norm": 1.0392075777053833, "learning_rate": 1.1845558639014167e-05, "loss": 0.4585, "step": 17857 }, { "epoch": 0.46, "grad_norm": 5.911065101623535, "learning_rate": 1.1844742749965379e-05, "loss": 0.7457, "step": 17858 }, { "epoch": 0.46, "grad_norm": 2.4516587257385254, "learning_rate": 1.1843926848203785e-05, "loss": 0.6451, "step": 17859 }, { "epoch": 0.46, "grad_norm": 1.9733277559280396, "learning_rate": 1.1843110933735002e-05, "loss": 0.534, "step": 17860 }, { "epoch": 0.46, "grad_norm": 1.5362907648086548, "learning_rate": 1.184229500656466e-05, "loss": 0.5625, "step": 17861 }, { "epoch": 0.46, "grad_norm": 2.594696521759033, "learning_rate": 1.1841479066698375e-05, "loss": 0.557, "step": 17862 }, { "epoch": 0.46, "grad_norm": 1.668723702430725, "learning_rate": 1.1840663114141778e-05, "loss": 0.6113, "step": 17863 }, { "epoch": 0.46, "grad_norm": 3.1724932193756104, "learning_rate": 1.183984714890048e-05, "loss": 0.5522, "step": 17864 }, { "epoch": 0.46, "grad_norm": 1.2584916353225708, "learning_rate": 1.1839031170980115e-05, "loss": 0.6138, "step": 17865 }, { "epoch": 0.46, "grad_norm": 1.731391429901123, "learning_rate": 1.1838215180386304e-05, "loss": 0.7206, "step": 17866 }, { "epoch": 0.46, "grad_norm": 1.4880306720733643, "learning_rate": 1.1837399177124667e-05, "loss": 0.5257, "step": 17867 }, { "epoch": 0.46, "grad_norm": 1.962907075881958, "learning_rate": 1.183658316120083e-05, "loss": 0.6311, "step": 17868 }, { "epoch": 0.46, "grad_norm": 1.3696955442428589, "learning_rate": 1.1835767132620415e-05, "loss": 0.548, "step": 17869 }, { "epoch": 0.46, "grad_norm": 2.450826406478882, "learning_rate": 1.1834951091389046e-05, "loss": 0.7065, "step": 17870 }, { "epoch": 0.46, "grad_norm": 4.821959972381592, "learning_rate": 1.1834135037512348e-05, "loss": 0.6507, "step": 17871 }, { "epoch": 0.46, "grad_norm": 1.53432035446167, "learning_rate": 1.1833318970995945e-05, "loss": 0.5143, "step": 17872 }, { "epoch": 0.46, "grad_norm": 1.3614717721939087, "learning_rate": 1.1832502891845457e-05, "loss": 0.5657, "step": 17873 }, { "epoch": 0.46, "grad_norm": 1.428006887435913, "learning_rate": 1.1831686800066511e-05, "loss": 0.5539, "step": 17874 }, { "epoch": 0.46, "grad_norm": 1.8134738206863403, "learning_rate": 1.1830870695664731e-05, "loss": 0.5644, "step": 17875 }, { "epoch": 0.46, "grad_norm": 1.234241008758545, "learning_rate": 1.183005457864574e-05, "loss": 0.6133, "step": 17876 }, { "epoch": 0.46, "grad_norm": 4.143795013427734, "learning_rate": 1.1829238449015161e-05, "loss": 0.3546, "step": 17877 }, { "epoch": 0.46, "grad_norm": 3.1004223823547363, "learning_rate": 1.1828422306778622e-05, "loss": 0.3935, "step": 17878 }, { "epoch": 0.46, "grad_norm": 3.281497001647949, "learning_rate": 1.1827606151941744e-05, "loss": 0.4917, "step": 17879 }, { "epoch": 0.46, "grad_norm": 2.8450143337249756, "learning_rate": 1.1826789984510154e-05, "loss": 0.4566, "step": 17880 }, { "epoch": 0.46, "grad_norm": 1.723906397819519, "learning_rate": 1.1825973804489475e-05, "loss": 0.5674, "step": 17881 }, { "epoch": 0.46, "grad_norm": 2.2337281703948975, "learning_rate": 1.1825157611885328e-05, "loss": 0.6587, "step": 17882 }, { "epoch": 0.46, "grad_norm": 1.1554665565490723, "learning_rate": 1.1824341406703346e-05, "loss": 0.6676, "step": 17883 }, { "epoch": 0.46, "grad_norm": 1.0902031660079956, "learning_rate": 1.1823525188949147e-05, "loss": 0.6803, "step": 17884 }, { "epoch": 0.46, "grad_norm": 5.376710414886475, "learning_rate": 1.1822708958628359e-05, "loss": 0.6313, "step": 17885 }, { "epoch": 0.46, "grad_norm": 1.182405948638916, "learning_rate": 1.1821892715746606e-05, "loss": 0.4587, "step": 17886 }, { "epoch": 0.46, "grad_norm": 3.6802024841308594, "learning_rate": 1.1821076460309513e-05, "loss": 0.5098, "step": 17887 }, { "epoch": 0.46, "grad_norm": 3.3745038509368896, "learning_rate": 1.1820260192322703e-05, "loss": 0.6454, "step": 17888 }, { "epoch": 0.46, "grad_norm": 1.3271180391311646, "learning_rate": 1.1819443911791806e-05, "loss": 0.4099, "step": 17889 }, { "epoch": 0.46, "grad_norm": 3.46496319770813, "learning_rate": 1.1818627618722444e-05, "loss": 0.5603, "step": 17890 }, { "epoch": 0.46, "grad_norm": 11.438974380493164, "learning_rate": 1.1817811313120243e-05, "loss": 0.4523, "step": 17891 }, { "epoch": 0.46, "grad_norm": 1.5756181478500366, "learning_rate": 1.1816994994990829e-05, "loss": 0.6599, "step": 17892 }, { "epoch": 0.46, "grad_norm": 2.720703601837158, "learning_rate": 1.1816178664339826e-05, "loss": 0.72, "step": 17893 }, { "epoch": 0.46, "grad_norm": 3.5630838871002197, "learning_rate": 1.1815362321172859e-05, "loss": 0.6299, "step": 17894 }, { "epoch": 0.46, "grad_norm": 2.5856919288635254, "learning_rate": 1.1814545965495558e-05, "loss": 0.4693, "step": 17895 }, { "epoch": 0.46, "grad_norm": 1.44764244556427, "learning_rate": 1.1813729597313546e-05, "loss": 0.4757, "step": 17896 }, { "epoch": 0.46, "grad_norm": 4.463068962097168, "learning_rate": 1.181291321663245e-05, "loss": 0.494, "step": 17897 }, { "epoch": 0.46, "grad_norm": 4.048813343048096, "learning_rate": 1.1812096823457893e-05, "loss": 0.5331, "step": 17898 }, { "epoch": 0.46, "grad_norm": 1.4131627082824707, "learning_rate": 1.1811280417795503e-05, "loss": 0.5948, "step": 17899 }, { "epoch": 0.46, "grad_norm": 1.935199499130249, "learning_rate": 1.1810463999650907e-05, "loss": 0.4713, "step": 17900 }, { "epoch": 0.46, "grad_norm": 1.3057695627212524, "learning_rate": 1.1809647569029732e-05, "loss": 0.6473, "step": 17901 }, { "epoch": 0.46, "grad_norm": 5.7569684982299805, "learning_rate": 1.1808831125937603e-05, "loss": 0.5926, "step": 17902 }, { "epoch": 0.46, "grad_norm": 1.1411083936691284, "learning_rate": 1.1808014670380143e-05, "loss": 0.5272, "step": 17903 }, { "epoch": 0.46, "grad_norm": 1.552192211151123, "learning_rate": 1.1807198202362987e-05, "loss": 0.5569, "step": 17904 }, { "epoch": 0.46, "grad_norm": 0.9656416773796082, "learning_rate": 1.1806381721891751e-05, "loss": 0.6543, "step": 17905 }, { "epoch": 0.46, "grad_norm": 2.5527360439300537, "learning_rate": 1.180556522897207e-05, "loss": 0.5341, "step": 17906 }, { "epoch": 0.46, "grad_norm": 2.1870174407958984, "learning_rate": 1.1804748723609568e-05, "loss": 0.6839, "step": 17907 }, { "epoch": 0.46, "grad_norm": 9.79118537902832, "learning_rate": 1.180393220580987e-05, "loss": 0.6611, "step": 17908 }, { "epoch": 0.46, "grad_norm": 3.4840786457061768, "learning_rate": 1.1803115675578605e-05, "loss": 0.5389, "step": 17909 }, { "epoch": 0.46, "grad_norm": 1.2275466918945312, "learning_rate": 1.18022991329214e-05, "loss": 0.6551, "step": 17910 }, { "epoch": 0.46, "grad_norm": 3.490536689758301, "learning_rate": 1.1801482577843879e-05, "loss": 0.5778, "step": 17911 }, { "epoch": 0.46, "grad_norm": 1.7693971395492554, "learning_rate": 1.1800666010351675e-05, "loss": 0.5931, "step": 17912 }, { "epoch": 0.46, "grad_norm": 5.339860916137695, "learning_rate": 1.1799849430450411e-05, "loss": 0.565, "step": 17913 }, { "epoch": 0.46, "grad_norm": 2.307399272918701, "learning_rate": 1.179903283814572e-05, "loss": 0.5475, "step": 17914 }, { "epoch": 0.46, "grad_norm": 1.827591896057129, "learning_rate": 1.1798216233443217e-05, "loss": 0.7516, "step": 17915 }, { "epoch": 0.46, "grad_norm": 1.463416337966919, "learning_rate": 1.1797399616348546e-05, "loss": 0.6069, "step": 17916 }, { "epoch": 0.46, "grad_norm": 1.3121899366378784, "learning_rate": 1.179658298686732e-05, "loss": 0.5364, "step": 17917 }, { "epoch": 0.46, "grad_norm": 1.5079127550125122, "learning_rate": 1.1795766345005173e-05, "loss": 0.6269, "step": 17918 }, { "epoch": 0.46, "grad_norm": 1.8199114799499512, "learning_rate": 1.1794949690767737e-05, "loss": 0.6485, "step": 17919 }, { "epoch": 0.46, "grad_norm": 4.3227858543396, "learning_rate": 1.179413302416063e-05, "loss": 0.5918, "step": 17920 }, { "epoch": 0.46, "grad_norm": 3.5183682441711426, "learning_rate": 1.1793316345189489e-05, "loss": 0.3886, "step": 17921 }, { "epoch": 0.46, "grad_norm": 2.1313273906707764, "learning_rate": 1.1792499653859936e-05, "loss": 0.6694, "step": 17922 }, { "epoch": 0.46, "grad_norm": 2.0391323566436768, "learning_rate": 1.1791682950177603e-05, "loss": 0.5548, "step": 17923 }, { "epoch": 0.46, "grad_norm": 2.177809953689575, "learning_rate": 1.1790866234148117e-05, "loss": 0.4855, "step": 17924 }, { "epoch": 0.46, "grad_norm": 3.9422662258148193, "learning_rate": 1.1790049505777103e-05, "loss": 0.5339, "step": 17925 }, { "epoch": 0.46, "grad_norm": 1.521950364112854, "learning_rate": 1.1789232765070197e-05, "loss": 0.6891, "step": 17926 }, { "epoch": 0.46, "grad_norm": 4.696593761444092, "learning_rate": 1.1788416012033022e-05, "loss": 0.5124, "step": 17927 }, { "epoch": 0.46, "grad_norm": 1.4052696228027344, "learning_rate": 1.1787599246671206e-05, "loss": 0.6114, "step": 17928 }, { "epoch": 0.46, "grad_norm": 1.6442264318466187, "learning_rate": 1.1786782468990379e-05, "loss": 0.5782, "step": 17929 }, { "epoch": 0.46, "grad_norm": 0.9623590111732483, "learning_rate": 1.1785965678996169e-05, "loss": 0.486, "step": 17930 }, { "epoch": 0.46, "grad_norm": 1.2757513523101807, "learning_rate": 1.1785148876694208e-05, "loss": 0.4542, "step": 17931 }, { "epoch": 0.46, "grad_norm": 2.4936206340789795, "learning_rate": 1.178433206209012e-05, "loss": 0.6204, "step": 17932 }, { "epoch": 0.46, "grad_norm": 1.9229265451431274, "learning_rate": 1.1783515235189539e-05, "loss": 0.5601, "step": 17933 }, { "epoch": 0.46, "grad_norm": 1.3795082569122314, "learning_rate": 1.1782698395998089e-05, "loss": 0.5692, "step": 17934 }, { "epoch": 0.46, "grad_norm": 6.618528842926025, "learning_rate": 1.1781881544521403e-05, "loss": 0.7098, "step": 17935 }, { "epoch": 0.46, "grad_norm": 1.4269764423370361, "learning_rate": 1.178106468076511e-05, "loss": 0.5971, "step": 17936 }, { "epoch": 0.46, "grad_norm": 1.1142562627792358, "learning_rate": 1.1780247804734837e-05, "loss": 0.5174, "step": 17937 }, { "epoch": 0.46, "grad_norm": 1.6897794008255005, "learning_rate": 1.1779430916436215e-05, "loss": 0.7367, "step": 17938 }, { "epoch": 0.46, "grad_norm": 1.8393023014068604, "learning_rate": 1.1778614015874872e-05, "loss": 0.6577, "step": 17939 }, { "epoch": 0.46, "grad_norm": 1.9436064958572388, "learning_rate": 1.1777797103056438e-05, "loss": 0.3885, "step": 17940 }, { "epoch": 0.46, "grad_norm": 1.1934186220169067, "learning_rate": 1.1776980177986545e-05, "loss": 0.5442, "step": 17941 }, { "epoch": 0.46, "grad_norm": 1.8614836931228638, "learning_rate": 1.177616324067082e-05, "loss": 0.6528, "step": 17942 }, { "epoch": 0.46, "grad_norm": 3.3421173095703125, "learning_rate": 1.1775346291114895e-05, "loss": 0.7157, "step": 17943 }, { "epoch": 0.46, "grad_norm": 1.9488675594329834, "learning_rate": 1.1774529329324397e-05, "loss": 0.6597, "step": 17944 }, { "epoch": 0.46, "grad_norm": 2.5090203285217285, "learning_rate": 1.1773712355304961e-05, "loss": 0.6318, "step": 17945 }, { "epoch": 0.46, "grad_norm": 5.980227470397949, "learning_rate": 1.1772895369062211e-05, "loss": 0.6988, "step": 17946 }, { "epoch": 0.46, "grad_norm": 1.2391403913497925, "learning_rate": 1.177207837060178e-05, "loss": 0.6138, "step": 17947 }, { "epoch": 0.46, "grad_norm": 3.7948224544525146, "learning_rate": 1.17712613599293e-05, "loss": 0.7146, "step": 17948 }, { "epoch": 0.46, "grad_norm": 1.397072672843933, "learning_rate": 1.1770444337050397e-05, "loss": 0.6383, "step": 17949 }, { "epoch": 0.46, "grad_norm": 4.29863166809082, "learning_rate": 1.1769627301970707e-05, "loss": 0.6571, "step": 17950 }, { "epoch": 0.46, "grad_norm": 4.333876609802246, "learning_rate": 1.1768810254695855e-05, "loss": 0.6494, "step": 17951 }, { "epoch": 0.46, "grad_norm": 2.04732346534729, "learning_rate": 1.1767993195231473e-05, "loss": 0.7489, "step": 17952 }, { "epoch": 0.46, "grad_norm": 1.6821582317352295, "learning_rate": 1.1767176123583197e-05, "loss": 0.5138, "step": 17953 }, { "epoch": 0.46, "grad_norm": 1.6170870065689087, "learning_rate": 1.176635903975665e-05, "loss": 0.536, "step": 17954 }, { "epoch": 0.46, "grad_norm": 6.89355993270874, "learning_rate": 1.1765541943757467e-05, "loss": 0.6555, "step": 17955 }, { "epoch": 0.46, "grad_norm": 6.797183036804199, "learning_rate": 1.1764724835591277e-05, "loss": 0.7166, "step": 17956 }, { "epoch": 0.46, "grad_norm": 1.8503347635269165, "learning_rate": 1.1763907715263716e-05, "loss": 0.6699, "step": 17957 }, { "epoch": 0.46, "grad_norm": 2.771353006362915, "learning_rate": 1.1763090582780408e-05, "loss": 0.537, "step": 17958 }, { "epoch": 0.46, "grad_norm": 7.2541584968566895, "learning_rate": 1.1762273438146984e-05, "loss": 0.5041, "step": 17959 }, { "epoch": 0.46, "grad_norm": 1.8023130893707275, "learning_rate": 1.1761456281369085e-05, "loss": 0.5997, "step": 17960 }, { "epoch": 0.46, "grad_norm": 2.55609393119812, "learning_rate": 1.1760639112452332e-05, "loss": 0.5295, "step": 17961 }, { "epoch": 0.46, "grad_norm": 1.1617079973220825, "learning_rate": 1.1759821931402365e-05, "loss": 0.5371, "step": 17962 }, { "epoch": 0.46, "grad_norm": 1.683142066001892, "learning_rate": 1.1759004738224806e-05, "loss": 0.553, "step": 17963 }, { "epoch": 0.46, "grad_norm": 5.402130126953125, "learning_rate": 1.1758187532925292e-05, "loss": 0.6717, "step": 17964 }, { "epoch": 0.46, "grad_norm": 3.634558916091919, "learning_rate": 1.1757370315509458e-05, "loss": 0.7102, "step": 17965 }, { "epoch": 0.46, "grad_norm": 2.7022342681884766, "learning_rate": 1.1756553085982928e-05, "loss": 0.4751, "step": 17966 }, { "epoch": 0.46, "grad_norm": 1.373311996459961, "learning_rate": 1.175573584435134e-05, "loss": 0.465, "step": 17967 }, { "epoch": 0.46, "grad_norm": 1.6044962406158447, "learning_rate": 1.1754918590620322e-05, "loss": 0.5325, "step": 17968 }, { "epoch": 0.46, "grad_norm": 2.1543819904327393, "learning_rate": 1.1754101324795508e-05, "loss": 0.5696, "step": 17969 }, { "epoch": 0.46, "grad_norm": 1.4442369937896729, "learning_rate": 1.175328404688253e-05, "loss": 0.5683, "step": 17970 }, { "epoch": 0.46, "grad_norm": 1.017110824584961, "learning_rate": 1.175246675688702e-05, "loss": 0.4657, "step": 17971 }, { "epoch": 0.46, "grad_norm": 1.6107150316238403, "learning_rate": 1.175164945481461e-05, "loss": 0.6206, "step": 17972 }, { "epoch": 0.46, "grad_norm": 1.2710552215576172, "learning_rate": 1.1750832140670934e-05, "loss": 0.6483, "step": 17973 }, { "epoch": 0.46, "grad_norm": 2.145407199859619, "learning_rate": 1.175001481446162e-05, "loss": 0.7621, "step": 17974 }, { "epoch": 0.46, "grad_norm": 1.546135663986206, "learning_rate": 1.1749197476192305e-05, "loss": 0.4421, "step": 17975 }, { "epoch": 0.46, "grad_norm": 1.3755083084106445, "learning_rate": 1.174838012586862e-05, "loss": 0.6544, "step": 17976 }, { "epoch": 0.46, "grad_norm": 1.0840938091278076, "learning_rate": 1.1747562763496197e-05, "loss": 0.5228, "step": 17977 }, { "epoch": 0.46, "grad_norm": 2.5883607864379883, "learning_rate": 1.174674538908067e-05, "loss": 0.5516, "step": 17978 }, { "epoch": 0.46, "grad_norm": 5.257323265075684, "learning_rate": 1.174592800262767e-05, "loss": 0.5542, "step": 17979 }, { "epoch": 0.46, "grad_norm": 1.6505134105682373, "learning_rate": 1.174511060414283e-05, "loss": 0.6816, "step": 17980 }, { "epoch": 0.46, "grad_norm": 6.523122310638428, "learning_rate": 1.1744293193631785e-05, "loss": 0.6155, "step": 17981 }, { "epoch": 0.46, "grad_norm": 7.716966152191162, "learning_rate": 1.174347577110017e-05, "loss": 0.6643, "step": 17982 }, { "epoch": 0.46, "grad_norm": 2.1018261909484863, "learning_rate": 1.1742658336553614e-05, "loss": 0.4836, "step": 17983 }, { "epoch": 0.46, "grad_norm": 1.2625706195831299, "learning_rate": 1.1741840889997751e-05, "loss": 0.591, "step": 17984 }, { "epoch": 0.46, "grad_norm": 2.8715200424194336, "learning_rate": 1.1741023431438215e-05, "loss": 0.6466, "step": 17985 }, { "epoch": 0.46, "grad_norm": 1.8523638248443604, "learning_rate": 1.1740205960880637e-05, "loss": 0.6947, "step": 17986 }, { "epoch": 0.46, "grad_norm": 4.231201171875, "learning_rate": 1.1739388478330658e-05, "loss": 0.5143, "step": 17987 }, { "epoch": 0.46, "grad_norm": 4.389978885650635, "learning_rate": 1.1738570983793902e-05, "loss": 0.6755, "step": 17988 }, { "epoch": 0.46, "grad_norm": 6.576979160308838, "learning_rate": 1.1737753477276011e-05, "loss": 0.6696, "step": 17989 }, { "epoch": 0.46, "grad_norm": 1.2243928909301758, "learning_rate": 1.1736935958782609e-05, "loss": 0.585, "step": 17990 }, { "epoch": 0.46, "grad_norm": 1.80921471118927, "learning_rate": 1.1736118428319339e-05, "loss": 0.5584, "step": 17991 }, { "epoch": 0.46, "grad_norm": 1.208152413368225, "learning_rate": 1.1735300885891832e-05, "loss": 0.4793, "step": 17992 }, { "epoch": 0.46, "grad_norm": 1.0638935565948486, "learning_rate": 1.1734483331505722e-05, "loss": 0.4804, "step": 17993 }, { "epoch": 0.46, "grad_norm": 2.2802300453186035, "learning_rate": 1.1733665765166642e-05, "loss": 0.579, "step": 17994 }, { "epoch": 0.46, "grad_norm": 1.2942999601364136, "learning_rate": 1.1732848186880225e-05, "loss": 0.4007, "step": 17995 }, { "epoch": 0.46, "grad_norm": 14.645133018493652, "learning_rate": 1.1732030596652108e-05, "loss": 0.6715, "step": 17996 }, { "epoch": 0.46, "grad_norm": 6.195923805236816, "learning_rate": 1.1731212994487926e-05, "loss": 0.6462, "step": 17997 }, { "epoch": 0.46, "grad_norm": 4.023922920227051, "learning_rate": 1.173039538039331e-05, "loss": 0.7303, "step": 17998 }, { "epoch": 0.46, "grad_norm": 1.8875460624694824, "learning_rate": 1.1729577754373895e-05, "loss": 0.5979, "step": 17999 }, { "epoch": 0.46, "grad_norm": 3.915714979171753, "learning_rate": 1.1728760116435318e-05, "loss": 0.6434, "step": 18000 }, { "epoch": 0.46, "grad_norm": 2.293121099472046, "learning_rate": 1.1727942466583215e-05, "loss": 0.5517, "step": 18001 }, { "epoch": 0.46, "grad_norm": 3.1194040775299072, "learning_rate": 1.1727124804823214e-05, "loss": 0.5665, "step": 18002 }, { "epoch": 0.46, "grad_norm": 1.520574927330017, "learning_rate": 1.1726307131160954e-05, "loss": 0.5787, "step": 18003 }, { "epoch": 0.46, "grad_norm": 1.6259490251541138, "learning_rate": 1.1725489445602075e-05, "loss": 0.6111, "step": 18004 }, { "epoch": 0.46, "grad_norm": 3.3941166400909424, "learning_rate": 1.1724671748152202e-05, "loss": 0.5322, "step": 18005 }, { "epoch": 0.46, "grad_norm": 1.1856601238250732, "learning_rate": 1.1723854038816978e-05, "loss": 0.5482, "step": 18006 }, { "epoch": 0.46, "grad_norm": 12.093711853027344, "learning_rate": 1.172303631760203e-05, "loss": 0.6862, "step": 18007 }, { "epoch": 0.46, "grad_norm": 1.5204733610153198, "learning_rate": 1.1722218584513004e-05, "loss": 0.6205, "step": 18008 }, { "epoch": 0.46, "grad_norm": 1.760574221611023, "learning_rate": 1.1721400839555526e-05, "loss": 0.5559, "step": 18009 }, { "epoch": 0.46, "grad_norm": 2.0450282096862793, "learning_rate": 1.1720583082735233e-05, "loss": 0.5406, "step": 18010 }, { "epoch": 0.46, "grad_norm": 1.69071364402771, "learning_rate": 1.1719765314057767e-05, "loss": 0.4329, "step": 18011 }, { "epoch": 0.46, "grad_norm": 1.204704999923706, "learning_rate": 1.1718947533528758e-05, "loss": 0.4741, "step": 18012 }, { "epoch": 0.46, "grad_norm": 3.93829345703125, "learning_rate": 1.171812974115384e-05, "loss": 0.5957, "step": 18013 }, { "epoch": 0.46, "grad_norm": 1.0215959548950195, "learning_rate": 1.1717311936938653e-05, "loss": 0.4468, "step": 18014 }, { "epoch": 0.46, "grad_norm": 2.600661516189575, "learning_rate": 1.1716494120888831e-05, "loss": 0.6884, "step": 18015 }, { "epoch": 0.46, "grad_norm": 2.3900997638702393, "learning_rate": 1.171567629301001e-05, "loss": 0.6216, "step": 18016 }, { "epoch": 0.46, "grad_norm": 1.4745360612869263, "learning_rate": 1.1714858453307824e-05, "loss": 0.5716, "step": 18017 }, { "epoch": 0.46, "grad_norm": 1.325024127960205, "learning_rate": 1.1714040601787914e-05, "loss": 0.5735, "step": 18018 }, { "epoch": 0.46, "grad_norm": 2.830395460128784, "learning_rate": 1.1713222738455912e-05, "loss": 0.588, "step": 18019 }, { "epoch": 0.46, "grad_norm": 1.737605094909668, "learning_rate": 1.1712404863317453e-05, "loss": 0.6299, "step": 18020 }, { "epoch": 0.46, "grad_norm": 3.6366100311279297, "learning_rate": 1.171158697637818e-05, "loss": 0.7477, "step": 18021 }, { "epoch": 0.46, "grad_norm": 1.1294020414352417, "learning_rate": 1.171076907764372e-05, "loss": 0.51, "step": 18022 }, { "epoch": 0.46, "grad_norm": 1.5302894115447998, "learning_rate": 1.1709951167119718e-05, "loss": 0.6364, "step": 18023 }, { "epoch": 0.46, "grad_norm": 2.0494730472564697, "learning_rate": 1.1709133244811805e-05, "loss": 0.5288, "step": 18024 }, { "epoch": 0.46, "grad_norm": 1.2595895528793335, "learning_rate": 1.1708315310725623e-05, "loss": 0.4795, "step": 18025 }, { "epoch": 0.46, "grad_norm": 1.7020633220672607, "learning_rate": 1.17074973648668e-05, "loss": 0.5857, "step": 18026 }, { "epoch": 0.46, "grad_norm": 2.319105386734009, "learning_rate": 1.170667940724098e-05, "loss": 0.6985, "step": 18027 }, { "epoch": 0.46, "grad_norm": 1.6300275325775146, "learning_rate": 1.1705861437853798e-05, "loss": 0.6144, "step": 18028 }, { "epoch": 0.46, "grad_norm": 1.4631465673446655, "learning_rate": 1.170504345671089e-05, "loss": 0.5212, "step": 18029 }, { "epoch": 0.46, "grad_norm": 1.3566769361495972, "learning_rate": 1.1704225463817895e-05, "loss": 0.4178, "step": 18030 }, { "epoch": 0.46, "grad_norm": 1.014915943145752, "learning_rate": 1.1703407459180446e-05, "loss": 0.5725, "step": 18031 }, { "epoch": 0.46, "grad_norm": 4.476042747497559, "learning_rate": 1.1702589442804185e-05, "loss": 0.5878, "step": 18032 }, { "epoch": 0.46, "grad_norm": 1.6914767026901245, "learning_rate": 1.1701771414694748e-05, "loss": 0.4515, "step": 18033 }, { "epoch": 0.46, "grad_norm": 1.3290163278579712, "learning_rate": 1.1700953374857772e-05, "loss": 0.467, "step": 18034 }, { "epoch": 0.46, "grad_norm": 1.0632123947143555, "learning_rate": 1.1700135323298892e-05, "loss": 0.4272, "step": 18035 }, { "epoch": 0.46, "grad_norm": 1.0956827402114868, "learning_rate": 1.1699317260023748e-05, "loss": 0.5189, "step": 18036 }, { "epoch": 0.46, "grad_norm": 0.8204511404037476, "learning_rate": 1.1698499185037977e-05, "loss": 0.3697, "step": 18037 }, { "epoch": 0.46, "grad_norm": 1.43205988407135, "learning_rate": 1.1697681098347218e-05, "loss": 0.549, "step": 18038 }, { "epoch": 0.46, "grad_norm": 3.4541752338409424, "learning_rate": 1.1696862999957106e-05, "loss": 0.527, "step": 18039 }, { "epoch": 0.46, "grad_norm": 1.2050580978393555, "learning_rate": 1.1696044889873282e-05, "loss": 0.5728, "step": 18040 }, { "epoch": 0.46, "grad_norm": 1.4316643476486206, "learning_rate": 1.169522676810138e-05, "loss": 0.5946, "step": 18041 }, { "epoch": 0.46, "grad_norm": 3.8955211639404297, "learning_rate": 1.1694408634647042e-05, "loss": 0.534, "step": 18042 }, { "epoch": 0.46, "grad_norm": 3.739551305770874, "learning_rate": 1.1693590489515902e-05, "loss": 0.5374, "step": 18043 }, { "epoch": 0.46, "grad_norm": 5.905345916748047, "learning_rate": 1.16927723327136e-05, "loss": 0.5853, "step": 18044 }, { "epoch": 0.46, "grad_norm": 2.3778598308563232, "learning_rate": 1.1691954164245779e-05, "loss": 0.5201, "step": 18045 }, { "epoch": 0.46, "grad_norm": 3.1892027854919434, "learning_rate": 1.1691135984118067e-05, "loss": 0.754, "step": 18046 }, { "epoch": 0.46, "grad_norm": 3.6359682083129883, "learning_rate": 1.1690317792336115e-05, "loss": 0.4399, "step": 18047 }, { "epoch": 0.46, "grad_norm": 1.144094705581665, "learning_rate": 1.1689499588905548e-05, "loss": 0.512, "step": 18048 }, { "epoch": 0.46, "grad_norm": 2.3162970542907715, "learning_rate": 1.1688681373832016e-05, "loss": 0.6558, "step": 18049 }, { "epoch": 0.46, "grad_norm": 4.212455749511719, "learning_rate": 1.168786314712115e-05, "loss": 0.6157, "step": 18050 }, { "epoch": 0.46, "grad_norm": 1.4730348587036133, "learning_rate": 1.1687044908778593e-05, "loss": 0.5268, "step": 18051 }, { "epoch": 0.46, "grad_norm": 5.852015018463135, "learning_rate": 1.1686226658809985e-05, "loss": 0.4727, "step": 18052 }, { "epoch": 0.46, "grad_norm": 2.0411980152130127, "learning_rate": 1.1685408397220956e-05, "loss": 0.723, "step": 18053 }, { "epoch": 0.46, "grad_norm": 2.469576120376587, "learning_rate": 1.1684590124017156e-05, "loss": 0.463, "step": 18054 }, { "epoch": 0.46, "grad_norm": 4.061047077178955, "learning_rate": 1.1683771839204219e-05, "loss": 0.7332, "step": 18055 }, { "epoch": 0.46, "grad_norm": 1.4436637163162231, "learning_rate": 1.1682953542787782e-05, "loss": 0.5683, "step": 18056 }, { "epoch": 0.46, "grad_norm": 2.4132683277130127, "learning_rate": 1.1682135234773488e-05, "loss": 0.5226, "step": 18057 }, { "epoch": 0.46, "grad_norm": 2.038163661956787, "learning_rate": 1.1681316915166976e-05, "loss": 0.5676, "step": 18058 }, { "epoch": 0.46, "grad_norm": 1.642573595046997, "learning_rate": 1.1680498583973883e-05, "loss": 0.594, "step": 18059 }, { "epoch": 0.46, "grad_norm": 1.8212915658950806, "learning_rate": 1.1679680241199848e-05, "loss": 0.5471, "step": 18060 }, { "epoch": 0.46, "grad_norm": 1.5955679416656494, "learning_rate": 1.1678861886850512e-05, "loss": 0.5893, "step": 18061 }, { "epoch": 0.46, "grad_norm": 2.2609002590179443, "learning_rate": 1.1678043520931519e-05, "loss": 0.6578, "step": 18062 }, { "epoch": 0.46, "grad_norm": 4.871547698974609, "learning_rate": 1.1677225143448501e-05, "loss": 0.3708, "step": 18063 }, { "epoch": 0.46, "grad_norm": 2.591432809829712, "learning_rate": 1.1676406754407101e-05, "loss": 0.4204, "step": 18064 }, { "epoch": 0.46, "grad_norm": 2.530686616897583, "learning_rate": 1.167558835381296e-05, "loss": 0.5465, "step": 18065 }, { "epoch": 0.46, "grad_norm": 1.4257657527923584, "learning_rate": 1.1674769941671715e-05, "loss": 0.4782, "step": 18066 }, { "epoch": 0.46, "grad_norm": 19.811786651611328, "learning_rate": 1.1673951517989009e-05, "loss": 0.6878, "step": 18067 }, { "epoch": 0.46, "grad_norm": 5.51139497756958, "learning_rate": 1.1673133082770481e-05, "loss": 0.5007, "step": 18068 }, { "epoch": 0.46, "grad_norm": 3.916398048400879, "learning_rate": 1.1672314636021771e-05, "loss": 0.6537, "step": 18069 }, { "epoch": 0.46, "grad_norm": 1.0272008180618286, "learning_rate": 1.1671496177748518e-05, "loss": 0.4723, "step": 18070 }, { "epoch": 0.46, "grad_norm": 1.7093937397003174, "learning_rate": 1.1670677707956367e-05, "loss": 0.5619, "step": 18071 }, { "epoch": 0.46, "grad_norm": 1.5546542406082153, "learning_rate": 1.166985922665095e-05, "loss": 0.5183, "step": 18072 }, { "epoch": 0.46, "grad_norm": 2.8741259574890137, "learning_rate": 1.1669040733837913e-05, "loss": 0.6983, "step": 18073 }, { "epoch": 0.46, "grad_norm": 2.674593687057495, "learning_rate": 1.16682222295229e-05, "loss": 0.6102, "step": 18074 }, { "epoch": 0.46, "grad_norm": 1.141192078590393, "learning_rate": 1.1667403713711543e-05, "loss": 0.5039, "step": 18075 }, { "epoch": 0.46, "grad_norm": 1.8538111448287964, "learning_rate": 1.1666585186409488e-05, "loss": 0.561, "step": 18076 }, { "epoch": 0.46, "grad_norm": 10.48147964477539, "learning_rate": 1.1665766647622377e-05, "loss": 0.5517, "step": 18077 }, { "epoch": 0.46, "grad_norm": 1.6434357166290283, "learning_rate": 1.1664948097355848e-05, "loss": 0.5312, "step": 18078 }, { "epoch": 0.46, "grad_norm": 1.4989449977874756, "learning_rate": 1.166412953561554e-05, "loss": 0.4683, "step": 18079 }, { "epoch": 0.46, "grad_norm": 4.1221537590026855, "learning_rate": 1.16633109624071e-05, "loss": 0.6175, "step": 18080 }, { "epoch": 0.46, "grad_norm": 3.4387588500976562, "learning_rate": 1.1662492377736166e-05, "loss": 0.5429, "step": 18081 }, { "epoch": 0.46, "grad_norm": 2.0905160903930664, "learning_rate": 1.1661673781608376e-05, "loss": 0.5728, "step": 18082 }, { "epoch": 0.46, "grad_norm": 1.968144178390503, "learning_rate": 1.1660855174029377e-05, "loss": 0.5892, "step": 18083 }, { "epoch": 0.46, "grad_norm": 7.18565034866333, "learning_rate": 1.1660036555004809e-05, "loss": 0.529, "step": 18084 }, { "epoch": 0.46, "grad_norm": 2.1045634746551514, "learning_rate": 1.1659217924540307e-05, "loss": 0.3567, "step": 18085 }, { "epoch": 0.46, "grad_norm": 2.3754618167877197, "learning_rate": 1.1658399282641522e-05, "loss": 0.5321, "step": 18086 }, { "epoch": 0.46, "grad_norm": 3.2901010513305664, "learning_rate": 1.1657580629314089e-05, "loss": 0.6154, "step": 18087 }, { "epoch": 0.46, "grad_norm": 2.3969218730926514, "learning_rate": 1.1656761964563653e-05, "loss": 0.6801, "step": 18088 }, { "epoch": 0.46, "grad_norm": 1.4593909978866577, "learning_rate": 1.1655943288395852e-05, "loss": 0.6303, "step": 18089 }, { "epoch": 0.46, "grad_norm": 1.4132691621780396, "learning_rate": 1.1655124600816335e-05, "loss": 0.4231, "step": 18090 }, { "epoch": 0.46, "grad_norm": 1.3254239559173584, "learning_rate": 1.1654305901830734e-05, "loss": 0.6249, "step": 18091 }, { "epoch": 0.46, "grad_norm": 1.7356821298599243, "learning_rate": 1.1653487191444698e-05, "loss": 0.5254, "step": 18092 }, { "epoch": 0.46, "grad_norm": 2.2167301177978516, "learning_rate": 1.1652668469663867e-05, "loss": 0.4711, "step": 18093 }, { "epoch": 0.46, "grad_norm": 1.760366439819336, "learning_rate": 1.1651849736493884e-05, "loss": 0.6408, "step": 18094 }, { "epoch": 0.46, "grad_norm": 1.3617483377456665, "learning_rate": 1.1651030991940389e-05, "loss": 0.5507, "step": 18095 }, { "epoch": 0.46, "grad_norm": 1.745019555091858, "learning_rate": 1.1650212236009026e-05, "loss": 0.3945, "step": 18096 }, { "epoch": 0.46, "grad_norm": 1.4914543628692627, "learning_rate": 1.1649393468705439e-05, "loss": 0.3731, "step": 18097 }, { "epoch": 0.46, "grad_norm": 1.5605953931808472, "learning_rate": 1.1648574690035268e-05, "loss": 0.648, "step": 18098 }, { "epoch": 0.46, "grad_norm": 1.363195776939392, "learning_rate": 1.1647755900004152e-05, "loss": 0.3751, "step": 18099 }, { "epoch": 0.46, "grad_norm": 2.508049726486206, "learning_rate": 1.164693709861774e-05, "loss": 0.4066, "step": 18100 }, { "epoch": 0.46, "grad_norm": 1.6582765579223633, "learning_rate": 1.1646118285881673e-05, "loss": 0.5928, "step": 18101 }, { "epoch": 0.46, "grad_norm": 0.9769712686538696, "learning_rate": 1.1645299461801591e-05, "loss": 0.423, "step": 18102 }, { "epoch": 0.46, "grad_norm": 2.465663194656372, "learning_rate": 1.1644480626383143e-05, "loss": 0.5915, "step": 18103 }, { "epoch": 0.46, "grad_norm": 1.904605746269226, "learning_rate": 1.1643661779631966e-05, "loss": 0.6746, "step": 18104 }, { "epoch": 0.46, "grad_norm": 1.8926433324813843, "learning_rate": 1.1642842921553703e-05, "loss": 0.4086, "step": 18105 }, { "epoch": 0.46, "grad_norm": 2.9203367233276367, "learning_rate": 1.1642024052154e-05, "loss": 0.5277, "step": 18106 }, { "epoch": 0.46, "grad_norm": 2.2532527446746826, "learning_rate": 1.1641205171438496e-05, "loss": 0.6835, "step": 18107 }, { "epoch": 0.46, "grad_norm": 2.897913694381714, "learning_rate": 1.1640386279412842e-05, "loss": 0.6672, "step": 18108 }, { "epoch": 0.46, "grad_norm": 1.3407506942749023, "learning_rate": 1.1639567376082672e-05, "loss": 0.5169, "step": 18109 }, { "epoch": 0.46, "grad_norm": 1.1850864887237549, "learning_rate": 1.1638748461453635e-05, "loss": 0.5616, "step": 18110 }, { "epoch": 0.46, "grad_norm": 1.625575304031372, "learning_rate": 1.1637929535531374e-05, "loss": 0.5615, "step": 18111 }, { "epoch": 0.46, "grad_norm": 1.4025312662124634, "learning_rate": 1.163711059832153e-05, "loss": 0.6978, "step": 18112 }, { "epoch": 0.46, "grad_norm": 10.386712074279785, "learning_rate": 1.1636291649829751e-05, "loss": 0.6775, "step": 18113 }, { "epoch": 0.46, "grad_norm": 1.751840353012085, "learning_rate": 1.1635472690061675e-05, "loss": 0.5485, "step": 18114 }, { "epoch": 0.46, "grad_norm": 1.2144397497177124, "learning_rate": 1.1634653719022952e-05, "loss": 0.3391, "step": 18115 }, { "epoch": 0.46, "grad_norm": 1.2811388969421387, "learning_rate": 1.1633834736719218e-05, "loss": 0.6633, "step": 18116 }, { "epoch": 0.46, "grad_norm": 1.3143749237060547, "learning_rate": 1.1633015743156126e-05, "loss": 0.4951, "step": 18117 }, { "epoch": 0.46, "grad_norm": 2.8304953575134277, "learning_rate": 1.1632196738339314e-05, "loss": 0.5238, "step": 18118 }, { "epoch": 0.46, "grad_norm": 1.4085725545883179, "learning_rate": 1.1631377722274426e-05, "loss": 0.4799, "step": 18119 }, { "epoch": 0.46, "grad_norm": 1.7529710531234741, "learning_rate": 1.1630558694967111e-05, "loss": 0.5255, "step": 18120 }, { "epoch": 0.46, "grad_norm": 1.370566487312317, "learning_rate": 1.1629739656423007e-05, "loss": 0.5523, "step": 18121 }, { "epoch": 0.46, "grad_norm": 1.9242136478424072, "learning_rate": 1.1628920606647763e-05, "loss": 0.6826, "step": 18122 }, { "epoch": 0.46, "grad_norm": 6.879140377044678, "learning_rate": 1.162810154564702e-05, "loss": 0.6405, "step": 18123 }, { "epoch": 0.46, "grad_norm": 1.1258786916732788, "learning_rate": 1.1627282473426422e-05, "loss": 0.464, "step": 18124 }, { "epoch": 0.46, "grad_norm": 1.8931140899658203, "learning_rate": 1.162646338999162e-05, "loss": 0.6295, "step": 18125 }, { "epoch": 0.46, "grad_norm": 7.633251190185547, "learning_rate": 1.162564429534825e-05, "loss": 0.5954, "step": 18126 }, { "epoch": 0.46, "grad_norm": 7.502825736999512, "learning_rate": 1.1624825189501964e-05, "loss": 0.6439, "step": 18127 }, { "epoch": 0.46, "grad_norm": 5.7206645011901855, "learning_rate": 1.1624006072458402e-05, "loss": 0.6608, "step": 18128 }, { "epoch": 0.46, "grad_norm": 1.9275575876235962, "learning_rate": 1.1623186944223213e-05, "loss": 0.545, "step": 18129 }, { "epoch": 0.46, "grad_norm": 1.2352216243743896, "learning_rate": 1.1622367804802036e-05, "loss": 0.5062, "step": 18130 }, { "epoch": 0.46, "grad_norm": 1.520107388496399, "learning_rate": 1.1621548654200523e-05, "loss": 0.5502, "step": 18131 }, { "epoch": 0.46, "grad_norm": 1.3958650827407837, "learning_rate": 1.1620729492424315e-05, "loss": 0.5515, "step": 18132 }, { "epoch": 0.46, "grad_norm": 1.4861669540405273, "learning_rate": 1.1619910319479053e-05, "loss": 0.5809, "step": 18133 }, { "epoch": 0.46, "grad_norm": 2.2602903842926025, "learning_rate": 1.161909113537039e-05, "loss": 0.5882, "step": 18134 }, { "epoch": 0.46, "grad_norm": 3.2104153633117676, "learning_rate": 1.161827194010397e-05, "loss": 0.6172, "step": 18135 }, { "epoch": 0.46, "grad_norm": 1.5703976154327393, "learning_rate": 1.1617452733685433e-05, "loss": 0.5394, "step": 18136 }, { "epoch": 0.46, "grad_norm": 1.2877247333526611, "learning_rate": 1.1616633516120431e-05, "loss": 0.5614, "step": 18137 }, { "epoch": 0.46, "grad_norm": 5.73144006729126, "learning_rate": 1.1615814287414601e-05, "loss": 0.6425, "step": 18138 }, { "epoch": 0.46, "grad_norm": 1.5081382989883423, "learning_rate": 1.16149950475736e-05, "loss": 0.4997, "step": 18139 }, { "epoch": 0.46, "grad_norm": 1.675614356994629, "learning_rate": 1.1614175796603065e-05, "loss": 0.5452, "step": 18140 }, { "epoch": 0.46, "grad_norm": 1.6399648189544678, "learning_rate": 1.1613356534508645e-05, "loss": 0.6192, "step": 18141 }, { "epoch": 0.46, "grad_norm": 1.9600220918655396, "learning_rate": 1.1612537261295987e-05, "loss": 0.534, "step": 18142 }, { "epoch": 0.47, "grad_norm": 8.850170135498047, "learning_rate": 1.1611717976970731e-05, "loss": 0.7255, "step": 18143 }, { "epoch": 0.47, "grad_norm": 1.9184452295303345, "learning_rate": 1.1610898681538532e-05, "loss": 0.7183, "step": 18144 }, { "epoch": 0.47, "grad_norm": 1.4811080694198608, "learning_rate": 1.1610079375005028e-05, "loss": 0.5942, "step": 18145 }, { "epoch": 0.47, "grad_norm": 1.4795833826065063, "learning_rate": 1.1609260057375874e-05, "loss": 0.5381, "step": 18146 }, { "epoch": 0.47, "grad_norm": 1.8876181840896606, "learning_rate": 1.1608440728656705e-05, "loss": 0.5425, "step": 18147 }, { "epoch": 0.47, "grad_norm": 3.444551944732666, "learning_rate": 1.1607621388853171e-05, "loss": 0.583, "step": 18148 }, { "epoch": 0.47, "grad_norm": 8.607529640197754, "learning_rate": 1.1606802037970926e-05, "loss": 0.6855, "step": 18149 }, { "epoch": 0.47, "grad_norm": 5.170992374420166, "learning_rate": 1.1605982676015607e-05, "loss": 0.8046, "step": 18150 }, { "epoch": 0.47, "grad_norm": 2.609264612197876, "learning_rate": 1.160516330299287e-05, "loss": 0.6813, "step": 18151 }, { "epoch": 0.47, "grad_norm": 8.143667221069336, "learning_rate": 1.1604343918908349e-05, "loss": 0.5697, "step": 18152 }, { "epoch": 0.47, "grad_norm": 1.2683640718460083, "learning_rate": 1.1603524523767702e-05, "loss": 0.6676, "step": 18153 }, { "epoch": 0.47, "grad_norm": 1.2984638214111328, "learning_rate": 1.160270511757657e-05, "loss": 0.6908, "step": 18154 }, { "epoch": 0.47, "grad_norm": 4.253125190734863, "learning_rate": 1.1601885700340602e-05, "loss": 0.7521, "step": 18155 }, { "epoch": 0.47, "grad_norm": 1.9165762662887573, "learning_rate": 1.1601066272065445e-05, "loss": 0.6054, "step": 18156 }, { "epoch": 0.47, "grad_norm": 2.7847542762756348, "learning_rate": 1.1600246832756743e-05, "loss": 0.7623, "step": 18157 }, { "epoch": 0.47, "grad_norm": 1.3988887071609497, "learning_rate": 1.1599427382420148e-05, "loss": 0.4999, "step": 18158 }, { "epoch": 0.47, "grad_norm": 6.941284656524658, "learning_rate": 1.15986079210613e-05, "loss": 0.7817, "step": 18159 }, { "epoch": 0.47, "grad_norm": 1.1709904670715332, "learning_rate": 1.159778844868585e-05, "loss": 0.5934, "step": 18160 }, { "epoch": 0.47, "grad_norm": 1.2498987913131714, "learning_rate": 1.1596968965299453e-05, "loss": 0.4734, "step": 18161 }, { "epoch": 0.47, "grad_norm": 1.8366711139678955, "learning_rate": 1.1596149470907743e-05, "loss": 0.5279, "step": 18162 }, { "epoch": 0.47, "grad_norm": 1.4196569919586182, "learning_rate": 1.1595329965516378e-05, "loss": 0.6062, "step": 18163 }, { "epoch": 0.47, "grad_norm": 1.3156309127807617, "learning_rate": 1.1594510449130999e-05, "loss": 0.6289, "step": 18164 }, { "epoch": 0.47, "grad_norm": 1.7392228841781616, "learning_rate": 1.1593690921757252e-05, "loss": 0.5605, "step": 18165 }, { "epoch": 0.47, "grad_norm": 2.936016082763672, "learning_rate": 1.1592871383400794e-05, "loss": 0.5299, "step": 18166 }, { "epoch": 0.47, "grad_norm": 0.9415296316146851, "learning_rate": 1.1592051834067264e-05, "loss": 0.6156, "step": 18167 }, { "epoch": 0.47, "grad_norm": 1.4109529256820679, "learning_rate": 1.1591232273762317e-05, "loss": 0.5435, "step": 18168 }, { "epoch": 0.47, "grad_norm": 1.2068557739257812, "learning_rate": 1.1590412702491594e-05, "loss": 0.6564, "step": 18169 }, { "epoch": 0.47, "grad_norm": 5.911811828613281, "learning_rate": 1.1589593120260744e-05, "loss": 0.6307, "step": 18170 }, { "epoch": 0.47, "grad_norm": 2.130739450454712, "learning_rate": 1.1588773527075419e-05, "loss": 0.6157, "step": 18171 }, { "epoch": 0.47, "grad_norm": 4.356781482696533, "learning_rate": 1.1587953922941265e-05, "loss": 0.6737, "step": 18172 }, { "epoch": 0.47, "grad_norm": 2.721921443939209, "learning_rate": 1.158713430786393e-05, "loss": 0.4469, "step": 18173 }, { "epoch": 0.47, "grad_norm": 1.6208086013793945, "learning_rate": 1.1586314681849062e-05, "loss": 0.5855, "step": 18174 }, { "epoch": 0.47, "grad_norm": 3.6885271072387695, "learning_rate": 1.1585495044902313e-05, "loss": 0.533, "step": 18175 }, { "epoch": 0.47, "grad_norm": 2.934422016143799, "learning_rate": 1.1584675397029324e-05, "loss": 0.5752, "step": 18176 }, { "epoch": 0.47, "grad_norm": 8.626482009887695, "learning_rate": 1.1583855738235747e-05, "loss": 0.7636, "step": 18177 }, { "epoch": 0.47, "grad_norm": 1.4747971296310425, "learning_rate": 1.1583036068527236e-05, "loss": 0.5149, "step": 18178 }, { "epoch": 0.47, "grad_norm": 1.1495842933654785, "learning_rate": 1.158221638790943e-05, "loss": 0.4857, "step": 18179 }, { "epoch": 0.47, "grad_norm": 1.5232020616531372, "learning_rate": 1.1581396696387988e-05, "loss": 0.5506, "step": 18180 }, { "epoch": 0.47, "grad_norm": 1.850366473197937, "learning_rate": 1.158057699396855e-05, "loss": 0.5092, "step": 18181 }, { "epoch": 0.47, "grad_norm": 2.157912492752075, "learning_rate": 1.1579757280656767e-05, "loss": 0.6542, "step": 18182 }, { "epoch": 0.47, "grad_norm": 3.6051344871520996, "learning_rate": 1.1578937556458294e-05, "loss": 0.5837, "step": 18183 }, { "epoch": 0.47, "grad_norm": 1.6707887649536133, "learning_rate": 1.1578117821378772e-05, "loss": 0.3423, "step": 18184 }, { "epoch": 0.47, "grad_norm": 1.846657633781433, "learning_rate": 1.1577298075423855e-05, "loss": 0.6303, "step": 18185 }, { "epoch": 0.47, "grad_norm": 0.981820285320282, "learning_rate": 1.157647831859919e-05, "loss": 0.5063, "step": 18186 }, { "epoch": 0.47, "grad_norm": 1.6833381652832031, "learning_rate": 1.1575658550910427e-05, "loss": 0.601, "step": 18187 }, { "epoch": 0.47, "grad_norm": 2.2829043865203857, "learning_rate": 1.1574838772363216e-05, "loss": 0.7219, "step": 18188 }, { "epoch": 0.47, "grad_norm": 1.7163684368133545, "learning_rate": 1.1574018982963203e-05, "loss": 0.5412, "step": 18189 }, { "epoch": 0.47, "grad_norm": 1.8172792196273804, "learning_rate": 1.1573199182716042e-05, "loss": 0.6534, "step": 18190 }, { "epoch": 0.47, "grad_norm": 5.311891078948975, "learning_rate": 1.157237937162738e-05, "loss": 0.5657, "step": 18191 }, { "epoch": 0.47, "grad_norm": 1.1116045713424683, "learning_rate": 1.1571559549702869e-05, "loss": 0.5833, "step": 18192 }, { "epoch": 0.47, "grad_norm": 1.441240668296814, "learning_rate": 1.1570739716948154e-05, "loss": 0.5374, "step": 18193 }, { "epoch": 0.47, "grad_norm": 1.1424355506896973, "learning_rate": 1.1569919873368888e-05, "loss": 0.5363, "step": 18194 }, { "epoch": 0.47, "grad_norm": 5.860166072845459, "learning_rate": 1.1569100018970723e-05, "loss": 0.5552, "step": 18195 }, { "epoch": 0.47, "grad_norm": 4.92924690246582, "learning_rate": 1.1568280153759303e-05, "loss": 0.6623, "step": 18196 }, { "epoch": 0.47, "grad_norm": 1.6416033506393433, "learning_rate": 1.1567460277740285e-05, "loss": 0.57, "step": 18197 }, { "epoch": 0.47, "grad_norm": 1.7549278736114502, "learning_rate": 1.1566640390919315e-05, "loss": 0.6147, "step": 18198 }, { "epoch": 0.47, "grad_norm": 2.072962522506714, "learning_rate": 1.1565820493302044e-05, "loss": 0.5539, "step": 18199 }, { "epoch": 0.47, "grad_norm": 1.5257184505462646, "learning_rate": 1.156500058489412e-05, "loss": 0.5704, "step": 18200 }, { "epoch": 0.47, "grad_norm": 2.6399855613708496, "learning_rate": 1.1564180665701195e-05, "loss": 0.7696, "step": 18201 }, { "epoch": 0.47, "grad_norm": 1.2725765705108643, "learning_rate": 1.156336073572892e-05, "loss": 0.4539, "step": 18202 }, { "epoch": 0.47, "grad_norm": 1.416597604751587, "learning_rate": 1.1562540794982946e-05, "loss": 0.4874, "step": 18203 }, { "epoch": 0.47, "grad_norm": 8.294791221618652, "learning_rate": 1.1561720843468923e-05, "loss": 0.6573, "step": 18204 }, { "epoch": 0.47, "grad_norm": 1.618424415588379, "learning_rate": 1.15609008811925e-05, "loss": 0.644, "step": 18205 }, { "epoch": 0.47, "grad_norm": 1.558800458908081, "learning_rate": 1.1560080908159327e-05, "loss": 0.6778, "step": 18206 }, { "epoch": 0.47, "grad_norm": 2.1610825061798096, "learning_rate": 1.1559260924375062e-05, "loss": 0.6586, "step": 18207 }, { "epoch": 0.47, "grad_norm": 3.2150473594665527, "learning_rate": 1.1558440929845345e-05, "loss": 0.7269, "step": 18208 }, { "epoch": 0.47, "grad_norm": 5.347744464874268, "learning_rate": 1.1557620924575836e-05, "loss": 0.628, "step": 18209 }, { "epoch": 0.47, "grad_norm": 1.1444268226623535, "learning_rate": 1.1556800908572179e-05, "loss": 0.5204, "step": 18210 }, { "epoch": 0.47, "grad_norm": 2.646775245666504, "learning_rate": 1.155598088184003e-05, "loss": 0.57, "step": 18211 }, { "epoch": 0.47, "grad_norm": 5.127409934997559, "learning_rate": 1.1555160844385039e-05, "loss": 0.5577, "step": 18212 }, { "epoch": 0.47, "grad_norm": 1.7319303750991821, "learning_rate": 1.1554340796212856e-05, "loss": 0.5919, "step": 18213 }, { "epoch": 0.47, "grad_norm": 1.1825631856918335, "learning_rate": 1.1553520737329131e-05, "loss": 0.4356, "step": 18214 }, { "epoch": 0.47, "grad_norm": 1.9601211547851562, "learning_rate": 1.1552700667739519e-05, "loss": 0.576, "step": 18215 }, { "epoch": 0.47, "grad_norm": 4.454233169555664, "learning_rate": 1.1551880587449667e-05, "loss": 0.4199, "step": 18216 }, { "epoch": 0.47, "grad_norm": 1.1297498941421509, "learning_rate": 1.1551060496465232e-05, "loss": 0.5624, "step": 18217 }, { "epoch": 0.47, "grad_norm": 1.3412303924560547, "learning_rate": 1.1550240394791861e-05, "loss": 0.5662, "step": 18218 }, { "epoch": 0.47, "grad_norm": 3.3547298908233643, "learning_rate": 1.154942028243521e-05, "loss": 0.5808, "step": 18219 }, { "epoch": 0.47, "grad_norm": 1.5046513080596924, "learning_rate": 1.1548600159400923e-05, "loss": 0.6267, "step": 18220 }, { "epoch": 0.47, "grad_norm": 4.23620080947876, "learning_rate": 1.1547780025694656e-05, "loss": 0.5296, "step": 18221 }, { "epoch": 0.47, "grad_norm": 4.3581719398498535, "learning_rate": 1.1546959881322067e-05, "loss": 0.676, "step": 18222 }, { "epoch": 0.47, "grad_norm": 4.1381516456604, "learning_rate": 1.1546139726288798e-05, "loss": 0.5405, "step": 18223 }, { "epoch": 0.47, "grad_norm": 1.0829015970230103, "learning_rate": 1.1545319560600507e-05, "loss": 0.599, "step": 18224 }, { "epoch": 0.47, "grad_norm": 14.149874687194824, "learning_rate": 1.1544499384262844e-05, "loss": 0.5101, "step": 18225 }, { "epoch": 0.47, "grad_norm": 2.5606024265289307, "learning_rate": 1.1543679197281463e-05, "loss": 0.6503, "step": 18226 }, { "epoch": 0.47, "grad_norm": 2.3274850845336914, "learning_rate": 1.1542858999662014e-05, "loss": 0.6386, "step": 18227 }, { "epoch": 0.47, "grad_norm": 1.3083127737045288, "learning_rate": 1.154203879141015e-05, "loss": 0.5676, "step": 18228 }, { "epoch": 0.47, "grad_norm": 1.5482360124588013, "learning_rate": 1.1541218572531526e-05, "loss": 0.5125, "step": 18229 }, { "epoch": 0.47, "grad_norm": 2.5798373222351074, "learning_rate": 1.1540398343031788e-05, "loss": 0.4571, "step": 18230 }, { "epoch": 0.47, "grad_norm": 1.1830556392669678, "learning_rate": 1.1539578102916594e-05, "loss": 0.5496, "step": 18231 }, { "epoch": 0.47, "grad_norm": 4.295096397399902, "learning_rate": 1.1538757852191592e-05, "loss": 0.4213, "step": 18232 }, { "epoch": 0.47, "grad_norm": 1.7187261581420898, "learning_rate": 1.153793759086244e-05, "loss": 0.5452, "step": 18233 }, { "epoch": 0.47, "grad_norm": 3.654003381729126, "learning_rate": 1.1537117318934791e-05, "loss": 0.6547, "step": 18234 }, { "epoch": 0.47, "grad_norm": 1.4079301357269287, "learning_rate": 1.1536297036414292e-05, "loss": 0.5148, "step": 18235 }, { "epoch": 0.47, "grad_norm": 4.56131649017334, "learning_rate": 1.15354767433066e-05, "loss": 0.6665, "step": 18236 }, { "epoch": 0.47, "grad_norm": 1.4275834560394287, "learning_rate": 1.1534656439617367e-05, "loss": 0.6362, "step": 18237 }, { "epoch": 0.47, "grad_norm": 16.966732025146484, "learning_rate": 1.1533836125352246e-05, "loss": 0.6556, "step": 18238 }, { "epoch": 0.47, "grad_norm": 6.571176528930664, "learning_rate": 1.1533015800516888e-05, "loss": 1.0941, "step": 18239 }, { "epoch": 0.47, "grad_norm": 1.635695219039917, "learning_rate": 1.1532195465116951e-05, "loss": 0.5132, "step": 18240 }, { "epoch": 0.47, "grad_norm": 1.7732226848602295, "learning_rate": 1.1531375119158085e-05, "loss": 0.4368, "step": 18241 }, { "epoch": 0.47, "grad_norm": 1.3748023509979248, "learning_rate": 1.1530554762645942e-05, "loss": 0.4595, "step": 18242 }, { "epoch": 0.47, "grad_norm": 8.671783447265625, "learning_rate": 1.1529734395586181e-05, "loss": 0.512, "step": 18243 }, { "epoch": 0.47, "grad_norm": 3.7673239707946777, "learning_rate": 1.1528914017984446e-05, "loss": 0.4931, "step": 18244 }, { "epoch": 0.47, "grad_norm": 1.6330397129058838, "learning_rate": 1.15280936298464e-05, "loss": 0.6331, "step": 18245 }, { "epoch": 0.47, "grad_norm": 1.8341457843780518, "learning_rate": 1.1527273231177694e-05, "loss": 0.6296, "step": 18246 }, { "epoch": 0.47, "grad_norm": 1.0044419765472412, "learning_rate": 1.1526452821983978e-05, "loss": 0.4576, "step": 18247 }, { "epoch": 0.47, "grad_norm": 7.674452304840088, "learning_rate": 1.152563240227091e-05, "loss": 0.8626, "step": 18248 }, { "epoch": 0.47, "grad_norm": 2.7759296894073486, "learning_rate": 1.1524811972044138e-05, "loss": 0.7182, "step": 18249 }, { "epoch": 0.47, "grad_norm": 3.972261428833008, "learning_rate": 1.1523991531309321e-05, "loss": 0.6747, "step": 18250 }, { "epoch": 0.47, "grad_norm": 2.1106441020965576, "learning_rate": 1.1523171080072116e-05, "loss": 0.5609, "step": 18251 }, { "epoch": 0.47, "grad_norm": 1.4856072664260864, "learning_rate": 1.152235061833817e-05, "loss": 0.5616, "step": 18252 }, { "epoch": 0.47, "grad_norm": 1.3845397233963013, "learning_rate": 1.152153014611314e-05, "loss": 0.446, "step": 18253 }, { "epoch": 0.47, "grad_norm": 1.5256887674331665, "learning_rate": 1.152070966340268e-05, "loss": 0.5522, "step": 18254 }, { "epoch": 0.47, "grad_norm": 8.872364044189453, "learning_rate": 1.1519889170212446e-05, "loss": 0.5829, "step": 18255 }, { "epoch": 0.47, "grad_norm": 3.144350290298462, "learning_rate": 1.1519068666548087e-05, "loss": 0.8029, "step": 18256 }, { "epoch": 0.47, "grad_norm": 5.785163402557373, "learning_rate": 1.1518248152415263e-05, "loss": 0.6682, "step": 18257 }, { "epoch": 0.47, "grad_norm": 1.3231359720230103, "learning_rate": 1.1517427627819631e-05, "loss": 0.4686, "step": 18258 }, { "epoch": 0.47, "grad_norm": 1.2567293643951416, "learning_rate": 1.1516607092766833e-05, "loss": 0.5436, "step": 18259 }, { "epoch": 0.47, "grad_norm": 2.752561092376709, "learning_rate": 1.151578654726254e-05, "loss": 0.5499, "step": 18260 }, { "epoch": 0.47, "grad_norm": 1.021546721458435, "learning_rate": 1.1514965991312392e-05, "loss": 0.4035, "step": 18261 }, { "epoch": 0.47, "grad_norm": 2.041961431503296, "learning_rate": 1.1514145424922052e-05, "loss": 0.6027, "step": 18262 }, { "epoch": 0.47, "grad_norm": 2.808065414428711, "learning_rate": 1.1513324848097176e-05, "loss": 0.6791, "step": 18263 }, { "epoch": 0.47, "grad_norm": 2.5064918994903564, "learning_rate": 1.1512504260843411e-05, "loss": 0.8063, "step": 18264 }, { "epoch": 0.47, "grad_norm": 1.0991636514663696, "learning_rate": 1.1511683663166422e-05, "loss": 0.5057, "step": 18265 }, { "epoch": 0.47, "grad_norm": 3.4072177410125732, "learning_rate": 1.1510863055071855e-05, "loss": 0.5357, "step": 18266 }, { "epoch": 0.47, "grad_norm": 2.4009063243865967, "learning_rate": 1.1510042436565372e-05, "loss": 0.6275, "step": 18267 }, { "epoch": 0.47, "grad_norm": 1.5823544263839722, "learning_rate": 1.1509221807652623e-05, "loss": 0.5685, "step": 18268 }, { "epoch": 0.47, "grad_norm": 2.9168107509613037, "learning_rate": 1.1508401168339264e-05, "loss": 0.6709, "step": 18269 }, { "epoch": 0.47, "grad_norm": 3.979473829269409, "learning_rate": 1.1507580518630957e-05, "loss": 0.5156, "step": 18270 }, { "epoch": 0.47, "grad_norm": 2.471597671508789, "learning_rate": 1.1506759858533347e-05, "loss": 0.4413, "step": 18271 }, { "epoch": 0.47, "grad_norm": 1.5512537956237793, "learning_rate": 1.1505939188052098e-05, "loss": 0.7507, "step": 18272 }, { "epoch": 0.47, "grad_norm": 1.9081391096115112, "learning_rate": 1.1505118507192861e-05, "loss": 0.5265, "step": 18273 }, { "epoch": 0.47, "grad_norm": 1.4333975315093994, "learning_rate": 1.1504297815961291e-05, "loss": 0.5291, "step": 18274 }, { "epoch": 0.47, "grad_norm": 2.500753402709961, "learning_rate": 1.150347711436305e-05, "loss": 0.6665, "step": 18275 }, { "epoch": 0.47, "grad_norm": 1.6405597925186157, "learning_rate": 1.1502656402403785e-05, "loss": 0.4853, "step": 18276 }, { "epoch": 0.47, "grad_norm": 3.1897435188293457, "learning_rate": 1.150183568008916e-05, "loss": 0.5892, "step": 18277 }, { "epoch": 0.47, "grad_norm": 1.4290920495986938, "learning_rate": 1.1501014947424822e-05, "loss": 0.6878, "step": 18278 }, { "epoch": 0.47, "grad_norm": 1.7034621238708496, "learning_rate": 1.1500194204416437e-05, "loss": 0.7227, "step": 18279 }, { "epoch": 0.47, "grad_norm": 1.0805575847625732, "learning_rate": 1.1499373451069652e-05, "loss": 0.67, "step": 18280 }, { "epoch": 0.47, "grad_norm": 2.985032320022583, "learning_rate": 1.149855268739013e-05, "loss": 0.671, "step": 18281 }, { "epoch": 0.47, "grad_norm": 3.089989423751831, "learning_rate": 1.1497731913383525e-05, "loss": 0.7407, "step": 18282 }, { "epoch": 0.47, "grad_norm": 2.559587240219116, "learning_rate": 1.149691112905549e-05, "loss": 0.7541, "step": 18283 }, { "epoch": 0.47, "grad_norm": 1.0624322891235352, "learning_rate": 1.1496090334411687e-05, "loss": 0.6045, "step": 18284 }, { "epoch": 0.47, "grad_norm": 2.50020432472229, "learning_rate": 1.1495269529457766e-05, "loss": 0.5362, "step": 18285 }, { "epoch": 0.47, "grad_norm": 1.9479821920394897, "learning_rate": 1.1494448714199386e-05, "loss": 0.524, "step": 18286 }, { "epoch": 0.47, "grad_norm": 13.228917121887207, "learning_rate": 1.149362788864221e-05, "loss": 0.7541, "step": 18287 }, { "epoch": 0.47, "grad_norm": 3.6467721462249756, "learning_rate": 1.1492807052791883e-05, "loss": 0.6663, "step": 18288 }, { "epoch": 0.47, "grad_norm": 3.8348727226257324, "learning_rate": 1.1491986206654073e-05, "loss": 0.6579, "step": 18289 }, { "epoch": 0.47, "grad_norm": 1.1505662202835083, "learning_rate": 1.1491165350234427e-05, "loss": 0.5958, "step": 18290 }, { "epoch": 0.47, "grad_norm": 1.8532166481018066, "learning_rate": 1.1490344483538607e-05, "loss": 0.7416, "step": 18291 }, { "epoch": 0.47, "grad_norm": 1.2030783891677856, "learning_rate": 1.1489523606572273e-05, "loss": 0.7292, "step": 18292 }, { "epoch": 0.47, "grad_norm": 2.066948652267456, "learning_rate": 1.1488702719341073e-05, "loss": 0.5849, "step": 18293 }, { "epoch": 0.47, "grad_norm": 1.2327964305877686, "learning_rate": 1.1487881821850673e-05, "loss": 0.5115, "step": 18294 }, { "epoch": 0.47, "grad_norm": 1.459144949913025, "learning_rate": 1.1487060914106723e-05, "loss": 0.5294, "step": 18295 }, { "epoch": 0.47, "grad_norm": 1.4633084535598755, "learning_rate": 1.1486239996114888e-05, "loss": 0.3677, "step": 18296 }, { "epoch": 0.47, "grad_norm": 1.568238615989685, "learning_rate": 1.1485419067880816e-05, "loss": 0.511, "step": 18297 }, { "epoch": 0.47, "grad_norm": 1.3324611186981201, "learning_rate": 1.148459812941017e-05, "loss": 0.6525, "step": 18298 }, { "epoch": 0.47, "grad_norm": 1.2370673418045044, "learning_rate": 1.1483777180708608e-05, "loss": 0.5996, "step": 18299 }, { "epoch": 0.47, "grad_norm": 2.838826894760132, "learning_rate": 1.1482956221781785e-05, "loss": 0.3907, "step": 18300 }, { "epoch": 0.47, "grad_norm": 1.8843791484832764, "learning_rate": 1.148213525263536e-05, "loss": 0.4484, "step": 18301 }, { "epoch": 0.47, "grad_norm": 2.250002861022949, "learning_rate": 1.1481314273274988e-05, "loss": 0.6661, "step": 18302 }, { "epoch": 0.47, "grad_norm": 2.466843605041504, "learning_rate": 1.148049328370633e-05, "loss": 0.6333, "step": 18303 }, { "epoch": 0.47, "grad_norm": 1.5548129081726074, "learning_rate": 1.1479672283935044e-05, "loss": 0.574, "step": 18304 }, { "epoch": 0.47, "grad_norm": 2.570997476577759, "learning_rate": 1.1478851273966785e-05, "loss": 0.5606, "step": 18305 }, { "epoch": 0.47, "grad_norm": 1.1792060136795044, "learning_rate": 1.1478030253807214e-05, "loss": 0.602, "step": 18306 }, { "epoch": 0.47, "grad_norm": 4.840175628662109, "learning_rate": 1.1477209223461983e-05, "loss": 0.5537, "step": 18307 }, { "epoch": 0.47, "grad_norm": 2.011690855026245, "learning_rate": 1.1476388182936758e-05, "loss": 0.6498, "step": 18308 }, { "epoch": 0.47, "grad_norm": 1.3969138860702515, "learning_rate": 1.1475567132237188e-05, "loss": 0.5019, "step": 18309 }, { "epoch": 0.47, "grad_norm": 1.5901001691818237, "learning_rate": 1.147474607136894e-05, "loss": 0.4837, "step": 18310 }, { "epoch": 0.47, "grad_norm": 6.892782688140869, "learning_rate": 1.1473925000337668e-05, "loss": 0.7964, "step": 18311 }, { "epoch": 0.47, "grad_norm": 3.0977935791015625, "learning_rate": 1.1473103919149031e-05, "loss": 0.5044, "step": 18312 }, { "epoch": 0.47, "grad_norm": 2.4500088691711426, "learning_rate": 1.147228282780869e-05, "loss": 0.6465, "step": 18313 }, { "epoch": 0.47, "grad_norm": 1.8687818050384521, "learning_rate": 1.1471461726322297e-05, "loss": 0.4475, "step": 18314 }, { "epoch": 0.47, "grad_norm": 1.223544716835022, "learning_rate": 1.1470640614695514e-05, "loss": 0.4812, "step": 18315 }, { "epoch": 0.47, "grad_norm": 1.9322926998138428, "learning_rate": 1.1469819492934002e-05, "loss": 0.7191, "step": 18316 }, { "epoch": 0.47, "grad_norm": 2.5291740894317627, "learning_rate": 1.1468998361043417e-05, "loss": 0.6335, "step": 18317 }, { "epoch": 0.47, "grad_norm": 1.6078011989593506, "learning_rate": 1.146817721902942e-05, "loss": 0.5697, "step": 18318 }, { "epoch": 0.47, "grad_norm": 1.7869675159454346, "learning_rate": 1.1467356066897665e-05, "loss": 0.6659, "step": 18319 }, { "epoch": 0.47, "grad_norm": 1.7083224058151245, "learning_rate": 1.1466534904653816e-05, "loss": 0.5018, "step": 18320 }, { "epoch": 0.47, "grad_norm": 1.9754304885864258, "learning_rate": 1.1465713732303527e-05, "loss": 0.6334, "step": 18321 }, { "epoch": 0.47, "grad_norm": 4.590029239654541, "learning_rate": 1.1464892549852462e-05, "loss": 0.6192, "step": 18322 }, { "epoch": 0.47, "grad_norm": 3.527374505996704, "learning_rate": 1.1464071357306276e-05, "loss": 0.5313, "step": 18323 }, { "epoch": 0.47, "grad_norm": 1.893859624862671, "learning_rate": 1.1463250154670633e-05, "loss": 0.5997, "step": 18324 }, { "epoch": 0.47, "grad_norm": 1.6352328062057495, "learning_rate": 1.1462428941951189e-05, "loss": 0.6783, "step": 18325 }, { "epoch": 0.47, "grad_norm": 1.8458881378173828, "learning_rate": 1.14616077191536e-05, "loss": 0.604, "step": 18326 }, { "epoch": 0.47, "grad_norm": 1.8153787851333618, "learning_rate": 1.146078648628353e-05, "loss": 0.5713, "step": 18327 }, { "epoch": 0.47, "grad_norm": 1.3949164152145386, "learning_rate": 1.145996524334664e-05, "loss": 0.6693, "step": 18328 }, { "epoch": 0.47, "grad_norm": 2.7280685901641846, "learning_rate": 1.1459143990348585e-05, "loss": 0.8321, "step": 18329 }, { "epoch": 0.47, "grad_norm": 1.706913948059082, "learning_rate": 1.1458322727295028e-05, "loss": 0.682, "step": 18330 }, { "epoch": 0.47, "grad_norm": 1.193337082862854, "learning_rate": 1.1457501454191624e-05, "loss": 0.5193, "step": 18331 }, { "epoch": 0.47, "grad_norm": 1.478702187538147, "learning_rate": 1.1456680171044038e-05, "loss": 0.6008, "step": 18332 }, { "epoch": 0.47, "grad_norm": 6.761075496673584, "learning_rate": 1.1455858877857926e-05, "loss": 0.6705, "step": 18333 }, { "epoch": 0.47, "grad_norm": 2.076775312423706, "learning_rate": 1.145503757463895e-05, "loss": 0.485, "step": 18334 }, { "epoch": 0.47, "grad_norm": 1.5902366638183594, "learning_rate": 1.1454216261392772e-05, "loss": 0.5112, "step": 18335 }, { "epoch": 0.47, "grad_norm": 4.073602199554443, "learning_rate": 1.1453394938125044e-05, "loss": 0.6865, "step": 18336 }, { "epoch": 0.47, "grad_norm": 6.740777969360352, "learning_rate": 1.1452573604841432e-05, "loss": 0.6875, "step": 18337 }, { "epoch": 0.47, "grad_norm": 1.5431382656097412, "learning_rate": 1.1451752261547599e-05, "loss": 0.5037, "step": 18338 }, { "epoch": 0.47, "grad_norm": 2.288017511367798, "learning_rate": 1.1450930908249199e-05, "loss": 0.5358, "step": 18339 }, { "epoch": 0.47, "grad_norm": 2.9665281772613525, "learning_rate": 1.1450109544951895e-05, "loss": 0.6863, "step": 18340 }, { "epoch": 0.47, "grad_norm": 0.8782254457473755, "learning_rate": 1.1449288171661347e-05, "loss": 0.509, "step": 18341 }, { "epoch": 0.47, "grad_norm": 2.8081932067871094, "learning_rate": 1.1448466788383215e-05, "loss": 0.3857, "step": 18342 }, { "epoch": 0.47, "grad_norm": 2.0839931964874268, "learning_rate": 1.1447645395123162e-05, "loss": 0.6444, "step": 18343 }, { "epoch": 0.47, "grad_norm": 7.064395904541016, "learning_rate": 1.1446823991886842e-05, "loss": 0.7253, "step": 18344 }, { "epoch": 0.47, "grad_norm": 2.081650972366333, "learning_rate": 1.1446002578679925e-05, "loss": 0.4668, "step": 18345 }, { "epoch": 0.47, "grad_norm": 2.018667697906494, "learning_rate": 1.1445181155508063e-05, "loss": 0.521, "step": 18346 }, { "epoch": 0.47, "grad_norm": 3.475222110748291, "learning_rate": 1.1444359722376922e-05, "loss": 0.4815, "step": 18347 }, { "epoch": 0.47, "grad_norm": 1.424303650856018, "learning_rate": 1.144353827929216e-05, "loss": 0.4047, "step": 18348 }, { "epoch": 0.47, "grad_norm": 5.191776275634766, "learning_rate": 1.144271682625944e-05, "loss": 0.5519, "step": 18349 }, { "epoch": 0.47, "grad_norm": 2.545289993286133, "learning_rate": 1.1441895363284426e-05, "loss": 0.5929, "step": 18350 }, { "epoch": 0.47, "grad_norm": 1.185627818107605, "learning_rate": 1.144107389037277e-05, "loss": 0.4478, "step": 18351 }, { "epoch": 0.47, "grad_norm": 1.0952872037887573, "learning_rate": 1.144025240753014e-05, "loss": 0.446, "step": 18352 }, { "epoch": 0.47, "grad_norm": 2.632857322692871, "learning_rate": 1.1439430914762193e-05, "loss": 0.6958, "step": 18353 }, { "epoch": 0.47, "grad_norm": 1.6537103652954102, "learning_rate": 1.1438609412074592e-05, "loss": 0.4834, "step": 18354 }, { "epoch": 0.47, "grad_norm": 2.642991065979004, "learning_rate": 1.1437787899473003e-05, "loss": 0.5905, "step": 18355 }, { "epoch": 0.47, "grad_norm": 3.5703256130218506, "learning_rate": 1.1436966376963081e-05, "loss": 0.4949, "step": 18356 }, { "epoch": 0.47, "grad_norm": 1.3894566297531128, "learning_rate": 1.143614484455049e-05, "loss": 0.5359, "step": 18357 }, { "epoch": 0.47, "grad_norm": 1.4709467887878418, "learning_rate": 1.143532330224089e-05, "loss": 0.6009, "step": 18358 }, { "epoch": 0.47, "grad_norm": 3.696847915649414, "learning_rate": 1.1434501750039946e-05, "loss": 0.6518, "step": 18359 }, { "epoch": 0.47, "grad_norm": 1.88527512550354, "learning_rate": 1.1433680187953313e-05, "loss": 0.5144, "step": 18360 }, { "epoch": 0.47, "grad_norm": 2.6092169284820557, "learning_rate": 1.143285861598666e-05, "loss": 0.5169, "step": 18361 }, { "epoch": 0.47, "grad_norm": 2.450058698654175, "learning_rate": 1.1432037034145646e-05, "loss": 0.6192, "step": 18362 }, { "epoch": 0.47, "grad_norm": 2.6159658432006836, "learning_rate": 1.143121544243593e-05, "loss": 0.5009, "step": 18363 }, { "epoch": 0.47, "grad_norm": 2.0099072456359863, "learning_rate": 1.1430393840863179e-05, "loss": 0.5587, "step": 18364 }, { "epoch": 0.47, "grad_norm": 1.8148953914642334, "learning_rate": 1.142957222943305e-05, "loss": 0.7653, "step": 18365 }, { "epoch": 0.47, "grad_norm": 1.733198642730713, "learning_rate": 1.1428750608151207e-05, "loss": 0.5757, "step": 18366 }, { "epoch": 0.47, "grad_norm": 6.984241008758545, "learning_rate": 1.1427928977023315e-05, "loss": 0.5251, "step": 18367 }, { "epoch": 0.47, "grad_norm": 1.9798429012298584, "learning_rate": 1.1427107336055031e-05, "loss": 0.5503, "step": 18368 }, { "epoch": 0.47, "grad_norm": 1.8242117166519165, "learning_rate": 1.1426285685252023e-05, "loss": 0.6811, "step": 18369 }, { "epoch": 0.47, "grad_norm": 1.5028923749923706, "learning_rate": 1.1425464024619949e-05, "loss": 0.6888, "step": 18370 }, { "epoch": 0.47, "grad_norm": 2.2585651874542236, "learning_rate": 1.1424642354164468e-05, "loss": 0.695, "step": 18371 }, { "epoch": 0.47, "grad_norm": 1.5252071619033813, "learning_rate": 1.1423820673891253e-05, "loss": 0.5212, "step": 18372 }, { "epoch": 0.47, "grad_norm": 1.5731345415115356, "learning_rate": 1.1422998983805958e-05, "loss": 0.5338, "step": 18373 }, { "epoch": 0.47, "grad_norm": 7.192972660064697, "learning_rate": 1.1422177283914249e-05, "loss": 0.5137, "step": 18374 }, { "epoch": 0.47, "grad_norm": 5.272173881530762, "learning_rate": 1.1421355574221786e-05, "loss": 0.715, "step": 18375 }, { "epoch": 0.47, "grad_norm": 2.187084913253784, "learning_rate": 1.1420533854734236e-05, "loss": 0.5506, "step": 18376 }, { "epoch": 0.47, "grad_norm": 3.83848237991333, "learning_rate": 1.1419712125457256e-05, "loss": 0.6406, "step": 18377 }, { "epoch": 0.47, "grad_norm": 2.1012706756591797, "learning_rate": 1.1418890386396513e-05, "loss": 0.5598, "step": 18378 }, { "epoch": 0.47, "grad_norm": 1.0951496362686157, "learning_rate": 1.1418068637557668e-05, "loss": 0.5272, "step": 18379 }, { "epoch": 0.47, "grad_norm": 1.0746711492538452, "learning_rate": 1.1417246878946386e-05, "loss": 0.5488, "step": 18380 }, { "epoch": 0.47, "grad_norm": 1.4305038452148438, "learning_rate": 1.1416425110568331e-05, "loss": 0.5116, "step": 18381 }, { "epoch": 0.47, "grad_norm": 2.4655041694641113, "learning_rate": 1.1415603332429162e-05, "loss": 0.4803, "step": 18382 }, { "epoch": 0.47, "grad_norm": 1.8237805366516113, "learning_rate": 1.1414781544534543e-05, "loss": 0.6373, "step": 18383 }, { "epoch": 0.47, "grad_norm": 7.259571552276611, "learning_rate": 1.141395974689014e-05, "loss": 0.5077, "step": 18384 }, { "epoch": 0.47, "grad_norm": 4.29972505569458, "learning_rate": 1.1413137939501615e-05, "loss": 0.5362, "step": 18385 }, { "epoch": 0.47, "grad_norm": 1.8935060501098633, "learning_rate": 1.1412316122374632e-05, "loss": 0.5163, "step": 18386 }, { "epoch": 0.47, "grad_norm": 1.2364226579666138, "learning_rate": 1.1411494295514851e-05, "loss": 0.4654, "step": 18387 }, { "epoch": 0.47, "grad_norm": 1.3591232299804688, "learning_rate": 1.141067245892794e-05, "loss": 0.5636, "step": 18388 }, { "epoch": 0.47, "grad_norm": 2.8750932216644287, "learning_rate": 1.140985061261956e-05, "loss": 0.739, "step": 18389 }, { "epoch": 0.47, "grad_norm": 1.0908726453781128, "learning_rate": 1.1409028756595374e-05, "loss": 0.4215, "step": 18390 }, { "epoch": 0.47, "grad_norm": 3.9418838024139404, "learning_rate": 1.140820689086105e-05, "loss": 0.5891, "step": 18391 }, { "epoch": 0.47, "grad_norm": 1.0185718536376953, "learning_rate": 1.1407385015422245e-05, "loss": 0.4781, "step": 18392 }, { "epoch": 0.47, "grad_norm": 1.4226763248443604, "learning_rate": 1.1406563130284632e-05, "loss": 0.612, "step": 18393 }, { "epoch": 0.47, "grad_norm": 4.836442947387695, "learning_rate": 1.1405741235453864e-05, "loss": 0.6326, "step": 18394 }, { "epoch": 0.47, "grad_norm": 1.6103020906448364, "learning_rate": 1.1404919330935612e-05, "loss": 0.5651, "step": 18395 }, { "epoch": 0.47, "grad_norm": 1.301701307296753, "learning_rate": 1.140409741673554e-05, "loss": 0.606, "step": 18396 }, { "epoch": 0.47, "grad_norm": 6.647199630737305, "learning_rate": 1.1403275492859312e-05, "loss": 0.7664, "step": 18397 }, { "epoch": 0.47, "grad_norm": 3.734708786010742, "learning_rate": 1.140245355931259e-05, "loss": 0.5815, "step": 18398 }, { "epoch": 0.47, "grad_norm": 1.4172970056533813, "learning_rate": 1.1401631616101036e-05, "loss": 0.5573, "step": 18399 }, { "epoch": 0.47, "grad_norm": 1.5154609680175781, "learning_rate": 1.140080966323032e-05, "loss": 0.5694, "step": 18400 }, { "epoch": 0.47, "grad_norm": 2.4752421379089355, "learning_rate": 1.1399987700706105e-05, "loss": 0.482, "step": 18401 }, { "epoch": 0.47, "grad_norm": 1.441098690032959, "learning_rate": 1.1399165728534051e-05, "loss": 0.6792, "step": 18402 }, { "epoch": 0.47, "grad_norm": 12.516620635986328, "learning_rate": 1.1398343746719831e-05, "loss": 0.6714, "step": 18403 }, { "epoch": 0.47, "grad_norm": 5.223119735717773, "learning_rate": 1.13975217552691e-05, "loss": 0.5991, "step": 18404 }, { "epoch": 0.47, "grad_norm": 1.33162260055542, "learning_rate": 1.139669975418753e-05, "loss": 0.7243, "step": 18405 }, { "epoch": 0.47, "grad_norm": 4.639612197875977, "learning_rate": 1.1395877743480782e-05, "loss": 0.6074, "step": 18406 }, { "epoch": 0.47, "grad_norm": 2.5066583156585693, "learning_rate": 1.1395055723154518e-05, "loss": 0.5658, "step": 18407 }, { "epoch": 0.47, "grad_norm": 2.256397247314453, "learning_rate": 1.1394233693214412e-05, "loss": 0.4143, "step": 18408 }, { "epoch": 0.47, "grad_norm": 2.8753631114959717, "learning_rate": 1.1393411653666118e-05, "loss": 0.5399, "step": 18409 }, { "epoch": 0.47, "grad_norm": 1.2157608270645142, "learning_rate": 1.1392589604515312e-05, "loss": 0.4326, "step": 18410 }, { "epoch": 0.47, "grad_norm": 1.1795988082885742, "learning_rate": 1.139176754576765e-05, "loss": 0.6233, "step": 18411 }, { "epoch": 0.47, "grad_norm": 1.6674954891204834, "learning_rate": 1.1390945477428799e-05, "loss": 0.4569, "step": 18412 }, { "epoch": 0.47, "grad_norm": 1.214412808418274, "learning_rate": 1.1390123399504427e-05, "loss": 0.6407, "step": 18413 }, { "epoch": 0.47, "grad_norm": 1.3448420763015747, "learning_rate": 1.1389301312000199e-05, "loss": 0.4344, "step": 18414 }, { "epoch": 0.47, "grad_norm": 3.923797845840454, "learning_rate": 1.138847921492178e-05, "loss": 0.829, "step": 18415 }, { "epoch": 0.47, "grad_norm": 2.2625041007995605, "learning_rate": 1.138765710827483e-05, "loss": 0.5036, "step": 18416 }, { "epoch": 0.47, "grad_norm": 6.918336391448975, "learning_rate": 1.1386834992065024e-05, "loss": 0.6246, "step": 18417 }, { "epoch": 0.47, "grad_norm": 1.2825007438659668, "learning_rate": 1.1386012866298019e-05, "loss": 0.5229, "step": 18418 }, { "epoch": 0.47, "grad_norm": 4.373631000518799, "learning_rate": 1.1385190730979486e-05, "loss": 0.5504, "step": 18419 }, { "epoch": 0.47, "grad_norm": 1.0992401838302612, "learning_rate": 1.1384368586115091e-05, "loss": 0.5564, "step": 18420 }, { "epoch": 0.47, "grad_norm": 1.1421992778778076, "learning_rate": 1.1383546431710492e-05, "loss": 0.6677, "step": 18421 }, { "epoch": 0.47, "grad_norm": 1.355176568031311, "learning_rate": 1.1382724267771366e-05, "loss": 0.5834, "step": 18422 }, { "epoch": 0.47, "grad_norm": 2.1615419387817383, "learning_rate": 1.138190209430337e-05, "loss": 0.3976, "step": 18423 }, { "epoch": 0.47, "grad_norm": 5.694173812866211, "learning_rate": 1.1381079911312171e-05, "loss": 0.4633, "step": 18424 }, { "epoch": 0.47, "grad_norm": 2.11185622215271, "learning_rate": 1.138025771880344e-05, "loss": 0.5486, "step": 18425 }, { "epoch": 0.47, "grad_norm": 2.7953639030456543, "learning_rate": 1.137943551678284e-05, "loss": 0.5754, "step": 18426 }, { "epoch": 0.47, "grad_norm": 1.7192999124526978, "learning_rate": 1.1378613305256036e-05, "loss": 0.6206, "step": 18427 }, { "epoch": 0.47, "grad_norm": 1.6402337551116943, "learning_rate": 1.1377791084228695e-05, "loss": 0.4629, "step": 18428 }, { "epoch": 0.47, "grad_norm": 2.212249279022217, "learning_rate": 1.1376968853706485e-05, "loss": 0.5823, "step": 18429 }, { "epoch": 0.47, "grad_norm": 2.7530713081359863, "learning_rate": 1.1376146613695069e-05, "loss": 0.5427, "step": 18430 }, { "epoch": 0.47, "grad_norm": 3.468492269515991, "learning_rate": 1.1375324364200113e-05, "loss": 0.6432, "step": 18431 }, { "epoch": 0.47, "grad_norm": 11.354146003723145, "learning_rate": 1.1374502105227289e-05, "loss": 0.678, "step": 18432 }, { "epoch": 0.47, "grad_norm": 7.052667617797852, "learning_rate": 1.1373679836782258e-05, "loss": 0.557, "step": 18433 }, { "epoch": 0.47, "grad_norm": 6.274240493774414, "learning_rate": 1.1372857558870694e-05, "loss": 0.5146, "step": 18434 }, { "epoch": 0.47, "grad_norm": 2.2626819610595703, "learning_rate": 1.1372035271498252e-05, "loss": 0.6983, "step": 18435 }, { "epoch": 0.47, "grad_norm": 2.354966402053833, "learning_rate": 1.1371212974670607e-05, "loss": 0.497, "step": 18436 }, { "epoch": 0.47, "grad_norm": 6.458771228790283, "learning_rate": 1.1370390668393426e-05, "loss": 0.5367, "step": 18437 }, { "epoch": 0.47, "grad_norm": 1.8142036199569702, "learning_rate": 1.1369568352672369e-05, "loss": 0.5457, "step": 18438 }, { "epoch": 0.47, "grad_norm": 15.179179191589355, "learning_rate": 1.1368746027513112e-05, "loss": 0.5173, "step": 18439 }, { "epoch": 0.47, "grad_norm": 2.050123453140259, "learning_rate": 1.1367923692921313e-05, "loss": 0.7626, "step": 18440 }, { "epoch": 0.47, "grad_norm": 2.171574831008911, "learning_rate": 1.1367101348902645e-05, "loss": 0.67, "step": 18441 }, { "epoch": 0.47, "grad_norm": 13.048636436462402, "learning_rate": 1.1366278995462774e-05, "loss": 0.6843, "step": 18442 }, { "epoch": 0.47, "grad_norm": 1.4464385509490967, "learning_rate": 1.1365456632607364e-05, "loss": 0.5104, "step": 18443 }, { "epoch": 0.47, "grad_norm": 1.277248740196228, "learning_rate": 1.136463426034209e-05, "loss": 0.5805, "step": 18444 }, { "epoch": 0.47, "grad_norm": 3.561440944671631, "learning_rate": 1.1363811878672609e-05, "loss": 0.5921, "step": 18445 }, { "epoch": 0.47, "grad_norm": 2.313413619995117, "learning_rate": 1.1362989487604597e-05, "loss": 0.5602, "step": 18446 }, { "epoch": 0.47, "grad_norm": 2.9788496494293213, "learning_rate": 1.1362167087143714e-05, "loss": 0.5853, "step": 18447 }, { "epoch": 0.47, "grad_norm": 1.746537208557129, "learning_rate": 1.1361344677295632e-05, "loss": 0.6226, "step": 18448 }, { "epoch": 0.47, "grad_norm": 1.1222906112670898, "learning_rate": 1.136052225806602e-05, "loss": 0.5417, "step": 18449 }, { "epoch": 0.47, "grad_norm": 4.419636249542236, "learning_rate": 1.1359699829460542e-05, "loss": 0.682, "step": 18450 }, { "epoch": 0.47, "grad_norm": 1.5053256750106812, "learning_rate": 1.1358877391484868e-05, "loss": 0.524, "step": 18451 }, { "epoch": 0.47, "grad_norm": 1.401739239692688, "learning_rate": 1.1358054944144663e-05, "loss": 0.5845, "step": 18452 }, { "epoch": 0.47, "grad_norm": 2.0798490047454834, "learning_rate": 1.13572324874456e-05, "loss": 0.4426, "step": 18453 }, { "epoch": 0.47, "grad_norm": 2.3017678260803223, "learning_rate": 1.1356410021393341e-05, "loss": 0.4889, "step": 18454 }, { "epoch": 0.47, "grad_norm": 1.2483927011489868, "learning_rate": 1.1355587545993555e-05, "loss": 0.4905, "step": 18455 }, { "epoch": 0.47, "grad_norm": 4.303004741668701, "learning_rate": 1.1354765061251916e-05, "loss": 0.7128, "step": 18456 }, { "epoch": 0.47, "grad_norm": 2.867966413497925, "learning_rate": 1.1353942567174083e-05, "loss": 0.5823, "step": 18457 }, { "epoch": 0.47, "grad_norm": 2.6345298290252686, "learning_rate": 1.1353120063765727e-05, "loss": 0.6297, "step": 18458 }, { "epoch": 0.47, "grad_norm": 2.3636255264282227, "learning_rate": 1.1352297551032523e-05, "loss": 0.5649, "step": 18459 }, { "epoch": 0.47, "grad_norm": 1.3026771545410156, "learning_rate": 1.135147502898013e-05, "loss": 0.5149, "step": 18460 }, { "epoch": 0.47, "grad_norm": 4.865105628967285, "learning_rate": 1.1350652497614223e-05, "loss": 0.478, "step": 18461 }, { "epoch": 0.47, "grad_norm": 2.9881041049957275, "learning_rate": 1.1349829956940463e-05, "loss": 0.5567, "step": 18462 }, { "epoch": 0.47, "grad_norm": 2.0356853008270264, "learning_rate": 1.1349007406964525e-05, "loss": 0.5735, "step": 18463 }, { "epoch": 0.47, "grad_norm": 1.0946727991104126, "learning_rate": 1.1348184847692078e-05, "loss": 0.4036, "step": 18464 }, { "epoch": 0.47, "grad_norm": 2.166959047317505, "learning_rate": 1.1347362279128786e-05, "loss": 0.6097, "step": 18465 }, { "epoch": 0.47, "grad_norm": 0.8928899765014648, "learning_rate": 1.1346539701280321e-05, "loss": 0.5294, "step": 18466 }, { "epoch": 0.47, "grad_norm": 1.360944151878357, "learning_rate": 1.1345717114152349e-05, "loss": 0.5116, "step": 18467 }, { "epoch": 0.47, "grad_norm": 1.8668179512023926, "learning_rate": 1.134489451775054e-05, "loss": 0.5021, "step": 18468 }, { "epoch": 0.47, "grad_norm": 1.3700237274169922, "learning_rate": 1.1344071912080564e-05, "loss": 0.5616, "step": 18469 }, { "epoch": 0.47, "grad_norm": 1.3300762176513672, "learning_rate": 1.1343249297148086e-05, "loss": 0.6213, "step": 18470 }, { "epoch": 0.47, "grad_norm": 1.819089412689209, "learning_rate": 1.1342426672958782e-05, "loss": 0.4845, "step": 18471 }, { "epoch": 0.47, "grad_norm": 1.4807558059692383, "learning_rate": 1.1341604039518313e-05, "loss": 0.4724, "step": 18472 }, { "epoch": 0.47, "grad_norm": 8.550365447998047, "learning_rate": 1.1340781396832354e-05, "loss": 0.5129, "step": 18473 }, { "epoch": 0.47, "grad_norm": 1.7151000499725342, "learning_rate": 1.1339958744906571e-05, "loss": 0.562, "step": 18474 }, { "epoch": 0.47, "grad_norm": 1.8908835649490356, "learning_rate": 1.1339136083746633e-05, "loss": 0.5181, "step": 18475 }, { "epoch": 0.47, "grad_norm": 2.4268081188201904, "learning_rate": 1.1338313413358213e-05, "loss": 0.608, "step": 18476 }, { "epoch": 0.47, "grad_norm": 1.6586593389511108, "learning_rate": 1.1337490733746977e-05, "loss": 0.5784, "step": 18477 }, { "epoch": 0.47, "grad_norm": 1.6765754222869873, "learning_rate": 1.1336668044918598e-05, "loss": 0.5077, "step": 18478 }, { "epoch": 0.47, "grad_norm": 1.8662269115447998, "learning_rate": 1.1335845346878736e-05, "loss": 0.5175, "step": 18479 }, { "epoch": 0.47, "grad_norm": 5.634096145629883, "learning_rate": 1.133502263963307e-05, "loss": 0.6462, "step": 18480 }, { "epoch": 0.47, "grad_norm": 2.8453314304351807, "learning_rate": 1.133419992318727e-05, "loss": 0.5487, "step": 18481 }, { "epoch": 0.47, "grad_norm": 0.8921924233436584, "learning_rate": 1.1333377197546997e-05, "loss": 0.4531, "step": 18482 }, { "epoch": 0.47, "grad_norm": 3.3093812465667725, "learning_rate": 1.133255446271793e-05, "loss": 0.7042, "step": 18483 }, { "epoch": 0.47, "grad_norm": 7.587646007537842, "learning_rate": 1.1331731718705735e-05, "loss": 0.7328, "step": 18484 }, { "epoch": 0.47, "grad_norm": 1.130275845527649, "learning_rate": 1.133090896551608e-05, "loss": 0.4037, "step": 18485 }, { "epoch": 0.47, "grad_norm": 1.7752268314361572, "learning_rate": 1.1330086203154637e-05, "loss": 0.5738, "step": 18486 }, { "epoch": 0.47, "grad_norm": 1.5569250583648682, "learning_rate": 1.1329263431627074e-05, "loss": 0.6111, "step": 18487 }, { "epoch": 0.47, "grad_norm": 10.442520141601562, "learning_rate": 1.1328440650939066e-05, "loss": 0.7346, "step": 18488 }, { "epoch": 0.47, "grad_norm": 1.610697865486145, "learning_rate": 1.1327617861096276e-05, "loss": 0.574, "step": 18489 }, { "epoch": 0.47, "grad_norm": 2.6983509063720703, "learning_rate": 1.132679506210438e-05, "loss": 0.7942, "step": 18490 }, { "epoch": 0.47, "grad_norm": 1.5486698150634766, "learning_rate": 1.1325972253969044e-05, "loss": 0.4962, "step": 18491 }, { "epoch": 0.47, "grad_norm": 1.7036786079406738, "learning_rate": 1.1325149436695942e-05, "loss": 0.5472, "step": 18492 }, { "epoch": 0.47, "grad_norm": 4.238677978515625, "learning_rate": 1.1324326610290744e-05, "loss": 0.6253, "step": 18493 }, { "epoch": 0.47, "grad_norm": 3.2030770778656006, "learning_rate": 1.1323503774759116e-05, "loss": 0.5754, "step": 18494 }, { "epoch": 0.47, "grad_norm": 1.643284559249878, "learning_rate": 1.1322680930106733e-05, "loss": 0.5508, "step": 18495 }, { "epoch": 0.47, "grad_norm": 1.9241195917129517, "learning_rate": 1.1321858076339265e-05, "loss": 0.533, "step": 18496 }, { "epoch": 0.47, "grad_norm": 1.5897655487060547, "learning_rate": 1.1321035213462382e-05, "loss": 0.4846, "step": 18497 }, { "epoch": 0.47, "grad_norm": 4.0537109375, "learning_rate": 1.1320212341481751e-05, "loss": 0.5208, "step": 18498 }, { "epoch": 0.47, "grad_norm": 17.626924514770508, "learning_rate": 1.1319389460403048e-05, "loss": 0.6018, "step": 18499 }, { "epoch": 0.47, "grad_norm": 1.8413890600204468, "learning_rate": 1.1318566570231942e-05, "loss": 0.6558, "step": 18500 }, { "epoch": 0.47, "grad_norm": 13.767271041870117, "learning_rate": 1.1317743670974103e-05, "loss": 0.7201, "step": 18501 }, { "epoch": 0.47, "grad_norm": 14.503755569458008, "learning_rate": 1.1316920762635202e-05, "loss": 0.5474, "step": 18502 }, { "epoch": 0.47, "grad_norm": 1.9826573133468628, "learning_rate": 1.131609784522091e-05, "loss": 0.5501, "step": 18503 }, { "epoch": 0.47, "grad_norm": 4.625516891479492, "learning_rate": 1.1315274918736899e-05, "loss": 0.3831, "step": 18504 }, { "epoch": 0.47, "grad_norm": 2.874119281768799, "learning_rate": 1.131445198318884e-05, "loss": 0.5681, "step": 18505 }, { "epoch": 0.47, "grad_norm": 1.074669361114502, "learning_rate": 1.1313629038582406e-05, "loss": 0.6282, "step": 18506 }, { "epoch": 0.47, "grad_norm": 2.1705639362335205, "learning_rate": 1.1312806084923265e-05, "loss": 0.5628, "step": 18507 }, { "epoch": 0.47, "grad_norm": 1.5643409490585327, "learning_rate": 1.1311983122217084e-05, "loss": 0.6661, "step": 18508 }, { "epoch": 0.47, "grad_norm": 2.097992181777954, "learning_rate": 1.1311160150469547e-05, "loss": 0.5358, "step": 18509 }, { "epoch": 0.47, "grad_norm": 2.439589500427246, "learning_rate": 1.1310337169686314e-05, "loss": 0.7268, "step": 18510 }, { "epoch": 0.47, "grad_norm": 1.5993595123291016, "learning_rate": 1.130951417987306e-05, "loss": 0.639, "step": 18511 }, { "epoch": 0.47, "grad_norm": 1.6372146606445312, "learning_rate": 1.1308691181035458e-05, "loss": 0.5638, "step": 18512 }, { "epoch": 0.47, "grad_norm": 1.253866195678711, "learning_rate": 1.1307868173179179e-05, "loss": 0.4995, "step": 18513 }, { "epoch": 0.47, "grad_norm": 1.6287959814071655, "learning_rate": 1.1307045156309895e-05, "loss": 0.5361, "step": 18514 }, { "epoch": 0.47, "grad_norm": 1.7172592878341675, "learning_rate": 1.1306222130433276e-05, "loss": 0.5195, "step": 18515 }, { "epoch": 0.47, "grad_norm": 2.4169819355010986, "learning_rate": 1.1305399095554994e-05, "loss": 0.5959, "step": 18516 }, { "epoch": 0.47, "grad_norm": 3.580190896987915, "learning_rate": 1.1304576051680722e-05, "loss": 0.6252, "step": 18517 }, { "epoch": 0.47, "grad_norm": 1.269972562789917, "learning_rate": 1.130375299881613e-05, "loss": 0.4284, "step": 18518 }, { "epoch": 0.47, "grad_norm": 1.5859020948410034, "learning_rate": 1.1302929936966895e-05, "loss": 0.6511, "step": 18519 }, { "epoch": 0.47, "grad_norm": 1.6733574867248535, "learning_rate": 1.1302106866138685e-05, "loss": 0.6602, "step": 18520 }, { "epoch": 0.47, "grad_norm": 9.396330833435059, "learning_rate": 1.1301283786337172e-05, "loss": 0.4017, "step": 18521 }, { "epoch": 0.47, "grad_norm": 1.4348087310791016, "learning_rate": 1.1300460697568026e-05, "loss": 0.6567, "step": 18522 }, { "epoch": 0.47, "grad_norm": 1.6566816568374634, "learning_rate": 1.1299637599836923e-05, "loss": 0.4794, "step": 18523 }, { "epoch": 0.47, "grad_norm": 7.677512168884277, "learning_rate": 1.1298814493149537e-05, "loss": 0.6341, "step": 18524 }, { "epoch": 0.47, "grad_norm": 5.193102836608887, "learning_rate": 1.1297991377511534e-05, "loss": 0.6384, "step": 18525 }, { "epoch": 0.47, "grad_norm": 2.5643680095672607, "learning_rate": 1.1297168252928592e-05, "loss": 0.6168, "step": 18526 }, { "epoch": 0.47, "grad_norm": 1.401579737663269, "learning_rate": 1.129634511940638e-05, "loss": 0.7291, "step": 18527 }, { "epoch": 0.47, "grad_norm": 9.33685302734375, "learning_rate": 1.1295521976950572e-05, "loss": 0.6994, "step": 18528 }, { "epoch": 0.47, "grad_norm": 1.7659960985183716, "learning_rate": 1.129469882556684e-05, "loss": 0.5344, "step": 18529 }, { "epoch": 0.47, "grad_norm": 1.193927526473999, "learning_rate": 1.1293875665260858e-05, "loss": 0.6499, "step": 18530 }, { "epoch": 0.47, "grad_norm": 1.52060866355896, "learning_rate": 1.1293052496038299e-05, "loss": 0.5052, "step": 18531 }, { "epoch": 0.47, "grad_norm": 1.1523455381393433, "learning_rate": 1.1292229317904834e-05, "loss": 0.5288, "step": 18532 }, { "epoch": 0.48, "grad_norm": 2.768571615219116, "learning_rate": 1.1291406130866134e-05, "loss": 0.6076, "step": 18533 }, { "epoch": 0.48, "grad_norm": 1.9798606634140015, "learning_rate": 1.1290582934927878e-05, "loss": 0.6387, "step": 18534 }, { "epoch": 0.48, "grad_norm": 1.4761484861373901, "learning_rate": 1.1289759730095733e-05, "loss": 0.6243, "step": 18535 }, { "epoch": 0.48, "grad_norm": 2.473684310913086, "learning_rate": 1.1288936516375376e-05, "loss": 0.7365, "step": 18536 }, { "epoch": 0.48, "grad_norm": 2.839796304702759, "learning_rate": 1.1288113293772476e-05, "loss": 0.6643, "step": 18537 }, { "epoch": 0.48, "grad_norm": 1.7539936304092407, "learning_rate": 1.1287290062292711e-05, "loss": 0.5476, "step": 18538 }, { "epoch": 0.48, "grad_norm": 5.830857753753662, "learning_rate": 1.1286466821941749e-05, "loss": 0.6497, "step": 18539 }, { "epoch": 0.48, "grad_norm": 2.211050510406494, "learning_rate": 1.1285643572725266e-05, "loss": 0.5844, "step": 18540 }, { "epoch": 0.48, "grad_norm": 2.8286304473876953, "learning_rate": 1.1284820314648937e-05, "loss": 0.6914, "step": 18541 }, { "epoch": 0.48, "grad_norm": 4.8351006507873535, "learning_rate": 1.1283997047718432e-05, "loss": 0.6075, "step": 18542 }, { "epoch": 0.48, "grad_norm": 1.2757824659347534, "learning_rate": 1.1283173771939428e-05, "loss": 0.6084, "step": 18543 }, { "epoch": 0.48, "grad_norm": 2.247114658355713, "learning_rate": 1.1282350487317594e-05, "loss": 0.5289, "step": 18544 }, { "epoch": 0.48, "grad_norm": 2.052699327468872, "learning_rate": 1.1281527193858609e-05, "loss": 0.5095, "step": 18545 }, { "epoch": 0.48, "grad_norm": 4.336716651916504, "learning_rate": 1.128070389156814e-05, "loss": 0.5865, "step": 18546 }, { "epoch": 0.48, "grad_norm": 3.7239954471588135, "learning_rate": 1.1279880580451867e-05, "loss": 0.6099, "step": 18547 }, { "epoch": 0.48, "grad_norm": 1.6265708208084106, "learning_rate": 1.1279057260515463e-05, "loss": 0.6035, "step": 18548 }, { "epoch": 0.48, "grad_norm": 1.6666916608810425, "learning_rate": 1.1278233931764596e-05, "loss": 0.5512, "step": 18549 }, { "epoch": 0.48, "grad_norm": 1.5408622026443481, "learning_rate": 1.1277410594204948e-05, "loss": 0.6696, "step": 18550 }, { "epoch": 0.48, "grad_norm": 3.7556064128875732, "learning_rate": 1.1276587247842183e-05, "loss": 0.6996, "step": 18551 }, { "epoch": 0.48, "grad_norm": 0.9425685405731201, "learning_rate": 1.1275763892681984e-05, "loss": 0.5764, "step": 18552 }, { "epoch": 0.48, "grad_norm": 3.4099364280700684, "learning_rate": 1.1274940528730023e-05, "loss": 0.5594, "step": 18553 }, { "epoch": 0.48, "grad_norm": 1.2634515762329102, "learning_rate": 1.127411715599197e-05, "loss": 0.5571, "step": 18554 }, { "epoch": 0.48, "grad_norm": 2.7764225006103516, "learning_rate": 1.1273293774473506e-05, "loss": 0.6405, "step": 18555 }, { "epoch": 0.48, "grad_norm": 1.392107605934143, "learning_rate": 1.1272470384180296e-05, "loss": 0.5632, "step": 18556 }, { "epoch": 0.48, "grad_norm": 2.053830146789551, "learning_rate": 1.1271646985118023e-05, "loss": 0.5266, "step": 18557 }, { "epoch": 0.48, "grad_norm": 6.268481731414795, "learning_rate": 1.1270823577292358e-05, "loss": 0.715, "step": 18558 }, { "epoch": 0.48, "grad_norm": 1.5964401960372925, "learning_rate": 1.1270000160708974e-05, "loss": 0.567, "step": 18559 }, { "epoch": 0.48, "grad_norm": 3.48654842376709, "learning_rate": 1.1269176735373549e-05, "loss": 0.7276, "step": 18560 }, { "epoch": 0.48, "grad_norm": 1.3889201879501343, "learning_rate": 1.1268353301291752e-05, "loss": 0.5173, "step": 18561 }, { "epoch": 0.48, "grad_norm": 2.3933775424957275, "learning_rate": 1.1267529858469264e-05, "loss": 0.4687, "step": 18562 }, { "epoch": 0.48, "grad_norm": 1.8303565979003906, "learning_rate": 1.1266706406911755e-05, "loss": 0.5459, "step": 18563 }, { "epoch": 0.48, "grad_norm": 1.499028205871582, "learning_rate": 1.12658829466249e-05, "loss": 0.51, "step": 18564 }, { "epoch": 0.48, "grad_norm": 3.240095853805542, "learning_rate": 1.1265059477614377e-05, "loss": 0.7303, "step": 18565 }, { "epoch": 0.48, "grad_norm": 1.8397701978683472, "learning_rate": 1.1264235999885857e-05, "loss": 0.6057, "step": 18566 }, { "epoch": 0.48, "grad_norm": 0.9590002298355103, "learning_rate": 1.126341251344502e-05, "loss": 0.4215, "step": 18567 }, { "epoch": 0.48, "grad_norm": 7.2080979347229, "learning_rate": 1.1262589018297536e-05, "loss": 0.6122, "step": 18568 }, { "epoch": 0.48, "grad_norm": 2.870119094848633, "learning_rate": 1.1261765514449081e-05, "loss": 0.6008, "step": 18569 }, { "epoch": 0.48, "grad_norm": 1.7117260694503784, "learning_rate": 1.1260942001905334e-05, "loss": 0.4535, "step": 18570 }, { "epoch": 0.48, "grad_norm": 1.0537821054458618, "learning_rate": 1.1260118480671962e-05, "loss": 0.539, "step": 18571 }, { "epoch": 0.48, "grad_norm": 1.361855387687683, "learning_rate": 1.1259294950754647e-05, "loss": 0.4996, "step": 18572 }, { "epoch": 0.48, "grad_norm": 2.0068564414978027, "learning_rate": 1.1258471412159063e-05, "loss": 0.6386, "step": 18573 }, { "epoch": 0.48, "grad_norm": 3.478302240371704, "learning_rate": 1.1257647864890884e-05, "loss": 0.4642, "step": 18574 }, { "epoch": 0.48, "grad_norm": 2.2372469902038574, "learning_rate": 1.1256824308955787e-05, "loss": 0.5647, "step": 18575 }, { "epoch": 0.48, "grad_norm": 1.661097526550293, "learning_rate": 1.1256000744359446e-05, "loss": 0.4583, "step": 18576 }, { "epoch": 0.48, "grad_norm": 1.552451491355896, "learning_rate": 1.1255177171107539e-05, "loss": 0.602, "step": 18577 }, { "epoch": 0.48, "grad_norm": 4.051680564880371, "learning_rate": 1.1254353589205737e-05, "loss": 0.4724, "step": 18578 }, { "epoch": 0.48, "grad_norm": 1.1717673540115356, "learning_rate": 1.1253529998659718e-05, "loss": 0.4768, "step": 18579 }, { "epoch": 0.48, "grad_norm": 2.183007001876831, "learning_rate": 1.1252706399475159e-05, "loss": 0.5438, "step": 18580 }, { "epoch": 0.48, "grad_norm": 4.166205883026123, "learning_rate": 1.1251882791657734e-05, "loss": 0.5881, "step": 18581 }, { "epoch": 0.48, "grad_norm": 6.874096870422363, "learning_rate": 1.125105917521312e-05, "loss": 0.7936, "step": 18582 }, { "epoch": 0.48, "grad_norm": 1.97589111328125, "learning_rate": 1.1250235550146991e-05, "loss": 0.5671, "step": 18583 }, { "epoch": 0.48, "grad_norm": 3.5229079723358154, "learning_rate": 1.1249411916465023e-05, "loss": 0.6652, "step": 18584 }, { "epoch": 0.48, "grad_norm": 1.438352346420288, "learning_rate": 1.1248588274172896e-05, "loss": 0.3686, "step": 18585 }, { "epoch": 0.48, "grad_norm": 1.5218558311462402, "learning_rate": 1.1247764623276281e-05, "loss": 0.4293, "step": 18586 }, { "epoch": 0.48, "grad_norm": 2.2830052375793457, "learning_rate": 1.1246940963780857e-05, "loss": 0.7157, "step": 18587 }, { "epoch": 0.48, "grad_norm": 1.3438129425048828, "learning_rate": 1.1246117295692297e-05, "loss": 0.3325, "step": 18588 }, { "epoch": 0.48, "grad_norm": 1.466770052909851, "learning_rate": 1.1245293619016282e-05, "loss": 0.5945, "step": 18589 }, { "epoch": 0.48, "grad_norm": 2.020461082458496, "learning_rate": 1.1244469933758486e-05, "loss": 0.5236, "step": 18590 }, { "epoch": 0.48, "grad_norm": 6.556528568267822, "learning_rate": 1.1243646239924582e-05, "loss": 0.6725, "step": 18591 }, { "epoch": 0.48, "grad_norm": 2.2107901573181152, "learning_rate": 1.1242822537520253e-05, "loss": 0.6155, "step": 18592 }, { "epoch": 0.48, "grad_norm": 1.6045814752578735, "learning_rate": 1.124199882655117e-05, "loss": 0.5758, "step": 18593 }, { "epoch": 0.48, "grad_norm": 1.256772518157959, "learning_rate": 1.1241175107023013e-05, "loss": 0.5861, "step": 18594 }, { "epoch": 0.48, "grad_norm": 1.7870393991470337, "learning_rate": 1.1240351378941457e-05, "loss": 0.424, "step": 18595 }, { "epoch": 0.48, "grad_norm": 1.3885020017623901, "learning_rate": 1.1239527642312176e-05, "loss": 0.5524, "step": 18596 }, { "epoch": 0.48, "grad_norm": 2.639913558959961, "learning_rate": 1.123870389714085e-05, "loss": 0.6317, "step": 18597 }, { "epoch": 0.48, "grad_norm": 1.2175703048706055, "learning_rate": 1.1237880143433156e-05, "loss": 0.5097, "step": 18598 }, { "epoch": 0.48, "grad_norm": 1.967980146408081, "learning_rate": 1.1237056381194771e-05, "loss": 0.4875, "step": 18599 }, { "epoch": 0.48, "grad_norm": 1.2016364336013794, "learning_rate": 1.1236232610431368e-05, "loss": 0.6121, "step": 18600 }, { "epoch": 0.48, "grad_norm": 2.736543893814087, "learning_rate": 1.1235408831148626e-05, "loss": 0.5431, "step": 18601 }, { "epoch": 0.48, "grad_norm": 2.602231502532959, "learning_rate": 1.1234585043352223e-05, "loss": 0.8238, "step": 18602 }, { "epoch": 0.48, "grad_norm": 1.9148043394088745, "learning_rate": 1.123376124704784e-05, "loss": 0.6412, "step": 18603 }, { "epoch": 0.48, "grad_norm": 2.143319845199585, "learning_rate": 1.1232937442241145e-05, "loss": 0.7691, "step": 18604 }, { "epoch": 0.48, "grad_norm": 1.3508632183074951, "learning_rate": 1.1232113628937819e-05, "loss": 0.5237, "step": 18605 }, { "epoch": 0.48, "grad_norm": 2.5674726963043213, "learning_rate": 1.1231289807143544e-05, "loss": 0.4668, "step": 18606 }, { "epoch": 0.48, "grad_norm": 1.0845483541488647, "learning_rate": 1.1230465976863992e-05, "loss": 0.4568, "step": 18607 }, { "epoch": 0.48, "grad_norm": 2.5036675930023193, "learning_rate": 1.122964213810484e-05, "loss": 0.659, "step": 18608 }, { "epoch": 0.48, "grad_norm": 5.662391185760498, "learning_rate": 1.1228818290871767e-05, "loss": 0.6159, "step": 18609 }, { "epoch": 0.48, "grad_norm": 2.220984935760498, "learning_rate": 1.1227994435170451e-05, "loss": 0.708, "step": 18610 }, { "epoch": 0.48, "grad_norm": 4.1415510177612305, "learning_rate": 1.122717057100657e-05, "loss": 0.8818, "step": 18611 }, { "epoch": 0.48, "grad_norm": 1.511313796043396, "learning_rate": 1.1226346698385797e-05, "loss": 0.4019, "step": 18612 }, { "epoch": 0.48, "grad_norm": 1.9176779985427856, "learning_rate": 1.1225522817313813e-05, "loss": 0.5495, "step": 18613 }, { "epoch": 0.48, "grad_norm": 3.542841672897339, "learning_rate": 1.1224698927796301e-05, "loss": 0.6236, "step": 18614 }, { "epoch": 0.48, "grad_norm": 1.784082293510437, "learning_rate": 1.1223875029838929e-05, "loss": 0.3965, "step": 18615 }, { "epoch": 0.48, "grad_norm": 1.4967690706253052, "learning_rate": 1.1223051123447382e-05, "loss": 0.5875, "step": 18616 }, { "epoch": 0.48, "grad_norm": 1.4163419008255005, "learning_rate": 1.1222227208627335e-05, "loss": 0.554, "step": 18617 }, { "epoch": 0.48, "grad_norm": 2.4967358112335205, "learning_rate": 1.1221403285384464e-05, "loss": 0.5946, "step": 18618 }, { "epoch": 0.48, "grad_norm": 1.0675841569900513, "learning_rate": 1.1220579353724448e-05, "loss": 0.389, "step": 18619 }, { "epoch": 0.48, "grad_norm": 2.5336387157440186, "learning_rate": 1.1219755413652965e-05, "loss": 0.5657, "step": 18620 }, { "epoch": 0.48, "grad_norm": 1.6990509033203125, "learning_rate": 1.12189314651757e-05, "loss": 0.6276, "step": 18621 }, { "epoch": 0.48, "grad_norm": 1.6344777345657349, "learning_rate": 1.1218107508298322e-05, "loss": 0.5239, "step": 18622 }, { "epoch": 0.48, "grad_norm": 1.1414889097213745, "learning_rate": 1.121728354302651e-05, "loss": 0.551, "step": 18623 }, { "epoch": 0.48, "grad_norm": 4.314700126647949, "learning_rate": 1.1216459569365946e-05, "loss": 0.4965, "step": 18624 }, { "epoch": 0.48, "grad_norm": 1.2385514974594116, "learning_rate": 1.1215635587322307e-05, "loss": 0.3872, "step": 18625 }, { "epoch": 0.48, "grad_norm": 10.927678108215332, "learning_rate": 1.1214811596901273e-05, "loss": 0.6273, "step": 18626 }, { "epoch": 0.48, "grad_norm": 1.3749061822891235, "learning_rate": 1.121398759810852e-05, "loss": 0.6154, "step": 18627 }, { "epoch": 0.48, "grad_norm": 1.6003447771072388, "learning_rate": 1.1213163590949727e-05, "loss": 0.5508, "step": 18628 }, { "epoch": 0.48, "grad_norm": 1.747066855430603, "learning_rate": 1.121233957543057e-05, "loss": 0.6222, "step": 18629 }, { "epoch": 0.48, "grad_norm": 2.4637928009033203, "learning_rate": 1.1211515551556736e-05, "loss": 0.5964, "step": 18630 }, { "epoch": 0.48, "grad_norm": 1.4256932735443115, "learning_rate": 1.1210691519333892e-05, "loss": 0.7721, "step": 18631 }, { "epoch": 0.48, "grad_norm": 1.8536850214004517, "learning_rate": 1.1209867478767724e-05, "loss": 0.4266, "step": 18632 }, { "epoch": 0.48, "grad_norm": 3.2819981575012207, "learning_rate": 1.1209043429863913e-05, "loss": 0.5554, "step": 18633 }, { "epoch": 0.48, "grad_norm": 2.3144729137420654, "learning_rate": 1.1208219372628131e-05, "loss": 0.4975, "step": 18634 }, { "epoch": 0.48, "grad_norm": 2.128863573074341, "learning_rate": 1.1207395307066062e-05, "loss": 0.5959, "step": 18635 }, { "epoch": 0.48, "grad_norm": 1.2025121450424194, "learning_rate": 1.120657123318338e-05, "loss": 0.556, "step": 18636 }, { "epoch": 0.48, "grad_norm": 2.434929609298706, "learning_rate": 1.1205747150985771e-05, "loss": 0.6116, "step": 18637 }, { "epoch": 0.48, "grad_norm": 4.320565700531006, "learning_rate": 1.120492306047891e-05, "loss": 0.6512, "step": 18638 }, { "epoch": 0.48, "grad_norm": 1.4299153089523315, "learning_rate": 1.1204098961668473e-05, "loss": 0.4269, "step": 18639 }, { "epoch": 0.48, "grad_norm": 1.2583715915679932, "learning_rate": 1.1203274854560145e-05, "loss": 0.6188, "step": 18640 }, { "epoch": 0.48, "grad_norm": 5.205913543701172, "learning_rate": 1.12024507391596e-05, "loss": 0.6515, "step": 18641 }, { "epoch": 0.48, "grad_norm": 3.2803847789764404, "learning_rate": 1.1201626615472523e-05, "loss": 0.4389, "step": 18642 }, { "epoch": 0.48, "grad_norm": 6.288066864013672, "learning_rate": 1.1200802483504589e-05, "loss": 0.6372, "step": 18643 }, { "epoch": 0.48, "grad_norm": 2.68127179145813, "learning_rate": 1.119997834326148e-05, "loss": 0.669, "step": 18644 }, { "epoch": 0.48, "grad_norm": 5.647597312927246, "learning_rate": 1.1199154194748874e-05, "loss": 0.6261, "step": 18645 }, { "epoch": 0.48, "grad_norm": 5.965761184692383, "learning_rate": 1.1198330037972448e-05, "loss": 0.613, "step": 18646 }, { "epoch": 0.48, "grad_norm": 3.1468753814697266, "learning_rate": 1.1197505872937886e-05, "loss": 0.5838, "step": 18647 }, { "epoch": 0.48, "grad_norm": 1.5258219242095947, "learning_rate": 1.1196681699650865e-05, "loss": 0.5368, "step": 18648 }, { "epoch": 0.48, "grad_norm": 1.3560646772384644, "learning_rate": 1.1195857518117065e-05, "loss": 0.6496, "step": 18649 }, { "epoch": 0.48, "grad_norm": 2.462034225463867, "learning_rate": 1.119503332834217e-05, "loss": 0.7908, "step": 18650 }, { "epoch": 0.48, "grad_norm": 6.824061870574951, "learning_rate": 1.1194209130331852e-05, "loss": 0.6294, "step": 18651 }, { "epoch": 0.48, "grad_norm": 3.438675880432129, "learning_rate": 1.11933849240918e-05, "loss": 0.6795, "step": 18652 }, { "epoch": 0.48, "grad_norm": 1.6064141988754272, "learning_rate": 1.1192560709627682e-05, "loss": 0.6315, "step": 18653 }, { "epoch": 0.48, "grad_norm": 11.813928604125977, "learning_rate": 1.1191736486945187e-05, "loss": 0.7922, "step": 18654 }, { "epoch": 0.48, "grad_norm": 1.7344698905944824, "learning_rate": 1.1190912256049997e-05, "loss": 0.6746, "step": 18655 }, { "epoch": 0.48, "grad_norm": 1.4530583620071411, "learning_rate": 1.1190088016947785e-05, "loss": 0.4569, "step": 18656 }, { "epoch": 0.48, "grad_norm": 2.8785948753356934, "learning_rate": 1.1189263769644235e-05, "loss": 0.7672, "step": 18657 }, { "epoch": 0.48, "grad_norm": 3.4254655838012695, "learning_rate": 1.1188439514145024e-05, "loss": 0.5489, "step": 18658 }, { "epoch": 0.48, "grad_norm": 1.9579766988754272, "learning_rate": 1.1187615250455837e-05, "loss": 0.4591, "step": 18659 }, { "epoch": 0.48, "grad_norm": 2.672675609588623, "learning_rate": 1.1186790978582352e-05, "loss": 0.5871, "step": 18660 }, { "epoch": 0.48, "grad_norm": 1.6610840559005737, "learning_rate": 1.1185966698530246e-05, "loss": 0.6502, "step": 18661 }, { "epoch": 0.48, "grad_norm": 2.7327113151550293, "learning_rate": 1.1185142410305205e-05, "loss": 0.4841, "step": 18662 }, { "epoch": 0.48, "grad_norm": 1.546887755393982, "learning_rate": 1.1184318113912907e-05, "loss": 0.5683, "step": 18663 }, { "epoch": 0.48, "grad_norm": 2.7757766246795654, "learning_rate": 1.1183493809359034e-05, "loss": 0.7798, "step": 18664 }, { "epoch": 0.48, "grad_norm": 2.3617589473724365, "learning_rate": 1.1182669496649262e-05, "loss": 0.6286, "step": 18665 }, { "epoch": 0.48, "grad_norm": 1.6891824007034302, "learning_rate": 1.1181845175789274e-05, "loss": 0.68, "step": 18666 }, { "epoch": 0.48, "grad_norm": 1.5404000282287598, "learning_rate": 1.1181020846784756e-05, "loss": 0.6142, "step": 18667 }, { "epoch": 0.48, "grad_norm": 1.8097076416015625, "learning_rate": 1.1180196509641382e-05, "loss": 0.5431, "step": 18668 }, { "epoch": 0.48, "grad_norm": 1.454264760017395, "learning_rate": 1.1179372164364836e-05, "loss": 0.7724, "step": 18669 }, { "epoch": 0.48, "grad_norm": 1.9746674299240112, "learning_rate": 1.1178547810960797e-05, "loss": 0.4262, "step": 18670 }, { "epoch": 0.48, "grad_norm": 1.332016110420227, "learning_rate": 1.117772344943495e-05, "loss": 0.5668, "step": 18671 }, { "epoch": 0.48, "grad_norm": 1.3118144273757935, "learning_rate": 1.1176899079792969e-05, "loss": 0.6073, "step": 18672 }, { "epoch": 0.48, "grad_norm": 3.5607407093048096, "learning_rate": 1.1176074702040538e-05, "loss": 0.5912, "step": 18673 }, { "epoch": 0.48, "grad_norm": 3.0880959033966064, "learning_rate": 1.1175250316183343e-05, "loss": 0.5948, "step": 18674 }, { "epoch": 0.48, "grad_norm": 2.0353739261627197, "learning_rate": 1.117442592222706e-05, "loss": 0.6075, "step": 18675 }, { "epoch": 0.48, "grad_norm": 1.6301000118255615, "learning_rate": 1.117360152017737e-05, "loss": 0.6799, "step": 18676 }, { "epoch": 0.48, "grad_norm": 1.814629077911377, "learning_rate": 1.1172777110039956e-05, "loss": 0.5357, "step": 18677 }, { "epoch": 0.48, "grad_norm": 3.873854637145996, "learning_rate": 1.1171952691820498e-05, "loss": 0.5609, "step": 18678 }, { "epoch": 0.48, "grad_norm": 1.109114170074463, "learning_rate": 1.1171128265524682e-05, "loss": 0.453, "step": 18679 }, { "epoch": 0.48, "grad_norm": 1.4383872747421265, "learning_rate": 1.1170303831158183e-05, "loss": 0.5892, "step": 18680 }, { "epoch": 0.48, "grad_norm": 1.4419273138046265, "learning_rate": 1.1169479388726685e-05, "loss": 0.5457, "step": 18681 }, { "epoch": 0.48, "grad_norm": 1.2234913110733032, "learning_rate": 1.1168654938235869e-05, "loss": 0.6764, "step": 18682 }, { "epoch": 0.48, "grad_norm": 1.2951723337173462, "learning_rate": 1.1167830479691421e-05, "loss": 0.5197, "step": 18683 }, { "epoch": 0.48, "grad_norm": 4.558944225311279, "learning_rate": 1.1167006013099017e-05, "loss": 0.6102, "step": 18684 }, { "epoch": 0.48, "grad_norm": 1.4595898389816284, "learning_rate": 1.116618153846434e-05, "loss": 0.4399, "step": 18685 }, { "epoch": 0.48, "grad_norm": 1.5786315202713013, "learning_rate": 1.1165357055793073e-05, "loss": 0.7188, "step": 18686 }, { "epoch": 0.48, "grad_norm": 1.008203387260437, "learning_rate": 1.1164532565090898e-05, "loss": 0.5307, "step": 18687 }, { "epoch": 0.48, "grad_norm": 1.669103980064392, "learning_rate": 1.1163708066363496e-05, "loss": 0.4927, "step": 18688 }, { "epoch": 0.48, "grad_norm": 7.70032262802124, "learning_rate": 1.1162883559616548e-05, "loss": 0.6783, "step": 18689 }, { "epoch": 0.48, "grad_norm": 4.922123908996582, "learning_rate": 1.1162059044855738e-05, "loss": 0.6884, "step": 18690 }, { "epoch": 0.48, "grad_norm": 1.8197680711746216, "learning_rate": 1.1161234522086749e-05, "loss": 0.5633, "step": 18691 }, { "epoch": 0.48, "grad_norm": 2.5351221561431885, "learning_rate": 1.1160409991315257e-05, "loss": 0.5935, "step": 18692 }, { "epoch": 0.48, "grad_norm": 1.1726329326629639, "learning_rate": 1.1159585452546952e-05, "loss": 0.4859, "step": 18693 }, { "epoch": 0.48, "grad_norm": 2.8549129962921143, "learning_rate": 1.115876090578751e-05, "loss": 0.6022, "step": 18694 }, { "epoch": 0.48, "grad_norm": 3.1248528957366943, "learning_rate": 1.1157936351042617e-05, "loss": 0.6432, "step": 18695 }, { "epoch": 0.48, "grad_norm": 1.5504200458526611, "learning_rate": 1.1157111788317955e-05, "loss": 0.4413, "step": 18696 }, { "epoch": 0.48, "grad_norm": 1.7871828079223633, "learning_rate": 1.1156287217619206e-05, "loss": 0.6313, "step": 18697 }, { "epoch": 0.48, "grad_norm": 1.636344075202942, "learning_rate": 1.1155462638952051e-05, "loss": 0.6165, "step": 18698 }, { "epoch": 0.48, "grad_norm": 1.5706787109375, "learning_rate": 1.1154638052322176e-05, "loss": 0.6752, "step": 18699 }, { "epoch": 0.48, "grad_norm": 1.445340871810913, "learning_rate": 1.1153813457735257e-05, "loss": 0.4631, "step": 18700 }, { "epoch": 0.48, "grad_norm": 0.8531800508499146, "learning_rate": 1.1152988855196981e-05, "loss": 0.504, "step": 18701 }, { "epoch": 0.48, "grad_norm": 3.7604165077209473, "learning_rate": 1.115216424471303e-05, "loss": 0.6219, "step": 18702 }, { "epoch": 0.48, "grad_norm": 1.730316400527954, "learning_rate": 1.1151339626289091e-05, "loss": 0.4795, "step": 18703 }, { "epoch": 0.48, "grad_norm": 2.2158613204956055, "learning_rate": 1.1150514999930837e-05, "loss": 0.716, "step": 18704 }, { "epoch": 0.48, "grad_norm": 4.40336275100708, "learning_rate": 1.114969036564396e-05, "loss": 0.6145, "step": 18705 }, { "epoch": 0.48, "grad_norm": 6.275074005126953, "learning_rate": 1.1148865723434137e-05, "loss": 0.7628, "step": 18706 }, { "epoch": 0.48, "grad_norm": 1.1169887781143188, "learning_rate": 1.1148041073307054e-05, "loss": 0.5328, "step": 18707 }, { "epoch": 0.48, "grad_norm": 2.3101515769958496, "learning_rate": 1.1147216415268396e-05, "loss": 0.6627, "step": 18708 }, { "epoch": 0.48, "grad_norm": 1.8579082489013672, "learning_rate": 1.1146391749323839e-05, "loss": 0.6429, "step": 18709 }, { "epoch": 0.48, "grad_norm": 5.347056865692139, "learning_rate": 1.114556707547907e-05, "loss": 0.5376, "step": 18710 }, { "epoch": 0.48, "grad_norm": 2.1985597610473633, "learning_rate": 1.1144742393739776e-05, "loss": 0.6468, "step": 18711 }, { "epoch": 0.48, "grad_norm": 1.4841270446777344, "learning_rate": 1.1143917704111635e-05, "loss": 0.658, "step": 18712 }, { "epoch": 0.48, "grad_norm": 9.547553062438965, "learning_rate": 1.1143093006600334e-05, "loss": 0.6331, "step": 18713 }, { "epoch": 0.48, "grad_norm": 1.6750799417495728, "learning_rate": 1.1142268301211549e-05, "loss": 0.5743, "step": 18714 }, { "epoch": 0.48, "grad_norm": 3.7456700801849365, "learning_rate": 1.1141443587950975e-05, "loss": 0.5498, "step": 18715 }, { "epoch": 0.48, "grad_norm": 9.234150886535645, "learning_rate": 1.1140618866824284e-05, "loss": 0.7459, "step": 18716 }, { "epoch": 0.48, "grad_norm": 1.5917165279388428, "learning_rate": 1.1139794137837165e-05, "loss": 0.5515, "step": 18717 }, { "epoch": 0.48, "grad_norm": 2.238990306854248, "learning_rate": 1.1138969400995304e-05, "loss": 0.6416, "step": 18718 }, { "epoch": 0.48, "grad_norm": 10.561041831970215, "learning_rate": 1.1138144656304377e-05, "loss": 0.7972, "step": 18719 }, { "epoch": 0.48, "grad_norm": 1.3179043531417847, "learning_rate": 1.1137319903770077e-05, "loss": 0.5542, "step": 18720 }, { "epoch": 0.48, "grad_norm": 1.4307847023010254, "learning_rate": 1.1136495143398079e-05, "loss": 0.5723, "step": 18721 }, { "epoch": 0.48, "grad_norm": 1.4714126586914062, "learning_rate": 1.113567037519407e-05, "loss": 0.5186, "step": 18722 }, { "epoch": 0.48, "grad_norm": 2.150308132171631, "learning_rate": 1.1134845599163738e-05, "loss": 0.5742, "step": 18723 }, { "epoch": 0.48, "grad_norm": 1.1059454679489136, "learning_rate": 1.1134020815312761e-05, "loss": 0.5665, "step": 18724 }, { "epoch": 0.48, "grad_norm": 1.517101526260376, "learning_rate": 1.1133196023646827e-05, "loss": 0.5857, "step": 18725 }, { "epoch": 0.48, "grad_norm": 2.566884756088257, "learning_rate": 1.1132371224171614e-05, "loss": 0.4269, "step": 18726 }, { "epoch": 0.48, "grad_norm": 2.1945700645446777, "learning_rate": 1.1131546416892814e-05, "loss": 0.5921, "step": 18727 }, { "epoch": 0.48, "grad_norm": 1.6035743951797485, "learning_rate": 1.1130721601816103e-05, "loss": 0.5392, "step": 18728 }, { "epoch": 0.48, "grad_norm": 2.322699546813965, "learning_rate": 1.1129896778947169e-05, "loss": 0.5461, "step": 18729 }, { "epoch": 0.48, "grad_norm": 1.2985811233520508, "learning_rate": 1.1129071948291701e-05, "loss": 0.5166, "step": 18730 }, { "epoch": 0.48, "grad_norm": 2.6071977615356445, "learning_rate": 1.1128247109855374e-05, "loss": 0.5686, "step": 18731 }, { "epoch": 0.48, "grad_norm": 1.5741831064224243, "learning_rate": 1.1127422263643882e-05, "loss": 0.3224, "step": 18732 }, { "epoch": 0.48, "grad_norm": 2.670724868774414, "learning_rate": 1.1126597409662899e-05, "loss": 0.5959, "step": 18733 }, { "epoch": 0.48, "grad_norm": 4.380283355712891, "learning_rate": 1.1125772547918115e-05, "loss": 0.6903, "step": 18734 }, { "epoch": 0.48, "grad_norm": 1.868537187576294, "learning_rate": 1.1124947678415214e-05, "loss": 0.408, "step": 18735 }, { "epoch": 0.48, "grad_norm": 1.371938943862915, "learning_rate": 1.1124122801159882e-05, "loss": 0.5117, "step": 18736 }, { "epoch": 0.48, "grad_norm": 1.4966522455215454, "learning_rate": 1.1123297916157803e-05, "loss": 0.6828, "step": 18737 }, { "epoch": 0.48, "grad_norm": 1.1097155809402466, "learning_rate": 1.1122473023414657e-05, "loss": 0.5098, "step": 18738 }, { "epoch": 0.48, "grad_norm": 1.824204444885254, "learning_rate": 1.1121648122936134e-05, "loss": 0.6615, "step": 18739 }, { "epoch": 0.48, "grad_norm": 5.423099994659424, "learning_rate": 1.1120823214727916e-05, "loss": 0.5919, "step": 18740 }, { "epoch": 0.48, "grad_norm": 2.3229849338531494, "learning_rate": 1.1119998298795688e-05, "loss": 0.7234, "step": 18741 }, { "epoch": 0.48, "grad_norm": 14.385151863098145, "learning_rate": 1.1119173375145137e-05, "loss": 0.4824, "step": 18742 }, { "epoch": 0.48, "grad_norm": 3.5040123462677, "learning_rate": 1.1118348443781948e-05, "loss": 0.5875, "step": 18743 }, { "epoch": 0.48, "grad_norm": 4.241629600524902, "learning_rate": 1.1117523504711803e-05, "loss": 0.6972, "step": 18744 }, { "epoch": 0.48, "grad_norm": 1.977356195449829, "learning_rate": 1.1116698557940387e-05, "loss": 0.6379, "step": 18745 }, { "epoch": 0.48, "grad_norm": 2.3937320709228516, "learning_rate": 1.1115873603473384e-05, "loss": 0.6195, "step": 18746 }, { "epoch": 0.48, "grad_norm": 1.6008039712905884, "learning_rate": 1.1115048641316484e-05, "loss": 0.576, "step": 18747 }, { "epoch": 0.48, "grad_norm": 2.5609283447265625, "learning_rate": 1.1114223671475368e-05, "loss": 0.7757, "step": 18748 }, { "epoch": 0.48, "grad_norm": 6.014826774597168, "learning_rate": 1.1113398693955724e-05, "loss": 0.5654, "step": 18749 }, { "epoch": 0.48, "grad_norm": 2.547369956970215, "learning_rate": 1.1112573708763235e-05, "loss": 0.71, "step": 18750 }, { "epoch": 0.48, "grad_norm": 1.948851466178894, "learning_rate": 1.1111748715903587e-05, "loss": 0.651, "step": 18751 }, { "epoch": 0.48, "grad_norm": 1.548755407333374, "learning_rate": 1.1110923715382464e-05, "loss": 0.7361, "step": 18752 }, { "epoch": 0.48, "grad_norm": 1.5140557289123535, "learning_rate": 1.1110098707205555e-05, "loss": 0.4493, "step": 18753 }, { "epoch": 0.48, "grad_norm": 1.5099513530731201, "learning_rate": 1.110927369137854e-05, "loss": 0.4724, "step": 18754 }, { "epoch": 0.48, "grad_norm": 1.7276946306228638, "learning_rate": 1.110844866790711e-05, "loss": 0.6268, "step": 18755 }, { "epoch": 0.48, "grad_norm": 1.4559729099273682, "learning_rate": 1.1107623636796949e-05, "loss": 0.5541, "step": 18756 }, { "epoch": 0.48, "grad_norm": 1.6897581815719604, "learning_rate": 1.1106798598053741e-05, "loss": 0.5925, "step": 18757 }, { "epoch": 0.48, "grad_norm": 1.1764311790466309, "learning_rate": 1.1105973551683171e-05, "loss": 0.4436, "step": 18758 }, { "epoch": 0.48, "grad_norm": 2.1521875858306885, "learning_rate": 1.1105148497690928e-05, "loss": 0.8023, "step": 18759 }, { "epoch": 0.48, "grad_norm": 3.9064993858337402, "learning_rate": 1.1104323436082693e-05, "loss": 0.5522, "step": 18760 }, { "epoch": 0.48, "grad_norm": 0.9368126392364502, "learning_rate": 1.110349836686416e-05, "loss": 0.4652, "step": 18761 }, { "epoch": 0.48, "grad_norm": 1.8791011571884155, "learning_rate": 1.1102673290041004e-05, "loss": 0.4557, "step": 18762 }, { "epoch": 0.48, "grad_norm": 1.3187217712402344, "learning_rate": 1.1101848205618919e-05, "loss": 0.6906, "step": 18763 }, { "epoch": 0.48, "grad_norm": 1.3927245140075684, "learning_rate": 1.1101023113603588e-05, "loss": 0.5266, "step": 18764 }, { "epoch": 0.48, "grad_norm": 4.849286079406738, "learning_rate": 1.1100198014000698e-05, "loss": 0.5817, "step": 18765 }, { "epoch": 0.48, "grad_norm": 1.391323447227478, "learning_rate": 1.1099372906815936e-05, "loss": 0.5358, "step": 18766 }, { "epoch": 0.48, "grad_norm": 1.3788554668426514, "learning_rate": 1.1098547792054984e-05, "loss": 0.5787, "step": 18767 }, { "epoch": 0.48, "grad_norm": 2.070232391357422, "learning_rate": 1.1097722669723533e-05, "loss": 0.5084, "step": 18768 }, { "epoch": 0.48, "grad_norm": 2.152705430984497, "learning_rate": 1.1096897539827262e-05, "loss": 0.6106, "step": 18769 }, { "epoch": 0.48, "grad_norm": 7.433644771575928, "learning_rate": 1.1096072402371867e-05, "loss": 0.6508, "step": 18770 }, { "epoch": 0.48, "grad_norm": 6.160703182220459, "learning_rate": 1.1095247257363028e-05, "loss": 0.6904, "step": 18771 }, { "epoch": 0.48, "grad_norm": 15.716949462890625, "learning_rate": 1.1094422104806433e-05, "loss": 0.7304, "step": 18772 }, { "epoch": 0.48, "grad_norm": 1.8567982912063599, "learning_rate": 1.109359694470777e-05, "loss": 0.5782, "step": 18773 }, { "epoch": 0.48, "grad_norm": 1.5873686075210571, "learning_rate": 1.1092771777072722e-05, "loss": 0.5671, "step": 18774 }, { "epoch": 0.48, "grad_norm": 7.938536167144775, "learning_rate": 1.1091946601906978e-05, "loss": 0.5218, "step": 18775 }, { "epoch": 0.48, "grad_norm": 1.4965978860855103, "learning_rate": 1.1091121419216225e-05, "loss": 0.5515, "step": 18776 }, { "epoch": 0.48, "grad_norm": 4.194252967834473, "learning_rate": 1.1090296229006147e-05, "loss": 0.5938, "step": 18777 }, { "epoch": 0.48, "grad_norm": 3.6295764446258545, "learning_rate": 1.1089471031282435e-05, "loss": 0.6591, "step": 18778 }, { "epoch": 0.48, "grad_norm": 1.3355456590652466, "learning_rate": 1.108864582605077e-05, "loss": 0.4981, "step": 18779 }, { "epoch": 0.48, "grad_norm": 3.2310688495635986, "learning_rate": 1.1087820613316844e-05, "loss": 0.5017, "step": 18780 }, { "epoch": 0.48, "grad_norm": 1.193307638168335, "learning_rate": 1.1086995393086341e-05, "loss": 0.4608, "step": 18781 }, { "epoch": 0.48, "grad_norm": 2.0852842330932617, "learning_rate": 1.1086170165364948e-05, "loss": 0.4698, "step": 18782 }, { "epoch": 0.48, "grad_norm": 5.240453720092773, "learning_rate": 1.1085344930158354e-05, "loss": 0.6961, "step": 18783 }, { "epoch": 0.48, "grad_norm": 1.5745139122009277, "learning_rate": 1.1084519687472244e-05, "loss": 0.5878, "step": 18784 }, { "epoch": 0.48, "grad_norm": 2.7276344299316406, "learning_rate": 1.1083694437312306e-05, "loss": 0.4714, "step": 18785 }, { "epoch": 0.48, "grad_norm": 2.0136489868164062, "learning_rate": 1.1082869179684225e-05, "loss": 0.4975, "step": 18786 }, { "epoch": 0.48, "grad_norm": 0.8929166197776794, "learning_rate": 1.1082043914593692e-05, "loss": 0.5103, "step": 18787 }, { "epoch": 0.48, "grad_norm": 1.8004693984985352, "learning_rate": 1.1081218642046394e-05, "loss": 0.6638, "step": 18788 }, { "epoch": 0.48, "grad_norm": 1.6122509241104126, "learning_rate": 1.1080393362048012e-05, "loss": 0.4846, "step": 18789 }, { "epoch": 0.48, "grad_norm": 2.668403387069702, "learning_rate": 1.1079568074604242e-05, "loss": 0.4614, "step": 18790 }, { "epoch": 0.48, "grad_norm": 1.5131714344024658, "learning_rate": 1.1078742779720763e-05, "loss": 0.5742, "step": 18791 }, { "epoch": 0.48, "grad_norm": 1.0334547758102417, "learning_rate": 1.107791747740327e-05, "loss": 0.5192, "step": 18792 }, { "epoch": 0.48, "grad_norm": 1.4451658725738525, "learning_rate": 1.1077092167657444e-05, "loss": 0.5449, "step": 18793 }, { "epoch": 0.48, "grad_norm": 10.048670768737793, "learning_rate": 1.1076266850488977e-05, "loss": 0.607, "step": 18794 }, { "epoch": 0.48, "grad_norm": 5.188830852508545, "learning_rate": 1.1075441525903554e-05, "loss": 0.6153, "step": 18795 }, { "epoch": 0.48, "grad_norm": 1.355567455291748, "learning_rate": 1.1074616193906864e-05, "loss": 0.5935, "step": 18796 }, { "epoch": 0.48, "grad_norm": 1.5233852863311768, "learning_rate": 1.1073790854504596e-05, "loss": 0.4884, "step": 18797 }, { "epoch": 0.48, "grad_norm": 2.161283254623413, "learning_rate": 1.1072965507702436e-05, "loss": 0.6868, "step": 18798 }, { "epoch": 0.48, "grad_norm": 5.560903072357178, "learning_rate": 1.1072140153506068e-05, "loss": 0.5399, "step": 18799 }, { "epoch": 0.48, "grad_norm": 1.920973300933838, "learning_rate": 1.1071314791921187e-05, "loss": 0.5113, "step": 18800 }, { "epoch": 0.48, "grad_norm": 1.1327801942825317, "learning_rate": 1.1070489422953476e-05, "loss": 0.5378, "step": 18801 }, { "epoch": 0.48, "grad_norm": 2.425973892211914, "learning_rate": 1.1069664046608628e-05, "loss": 0.4956, "step": 18802 }, { "epoch": 0.48, "grad_norm": 1.447200894355774, "learning_rate": 1.1068838662892323e-05, "loss": 0.494, "step": 18803 }, { "epoch": 0.48, "grad_norm": 2.9728221893310547, "learning_rate": 1.1068013271810256e-05, "loss": 0.5746, "step": 18804 }, { "epoch": 0.48, "grad_norm": 1.5875319242477417, "learning_rate": 1.1067187873368112e-05, "loss": 0.5642, "step": 18805 }, { "epoch": 0.48, "grad_norm": 2.0655720233917236, "learning_rate": 1.1066362467571578e-05, "loss": 0.5404, "step": 18806 }, { "epoch": 0.48, "grad_norm": 1.3875657320022583, "learning_rate": 1.1065537054426345e-05, "loss": 0.5678, "step": 18807 }, { "epoch": 0.48, "grad_norm": 4.819038391113281, "learning_rate": 1.10647116339381e-05, "loss": 0.5661, "step": 18808 }, { "epoch": 0.48, "grad_norm": 1.7079607248306274, "learning_rate": 1.1063886206112533e-05, "loss": 0.6377, "step": 18809 }, { "epoch": 0.48, "grad_norm": 2.100101947784424, "learning_rate": 1.106306077095533e-05, "loss": 0.6539, "step": 18810 }, { "epoch": 0.48, "grad_norm": 1.4120008945465088, "learning_rate": 1.1062235328472177e-05, "loss": 0.5267, "step": 18811 }, { "epoch": 0.48, "grad_norm": 1.3111149072647095, "learning_rate": 1.1061409878668769e-05, "loss": 0.5045, "step": 18812 }, { "epoch": 0.48, "grad_norm": 8.155963897705078, "learning_rate": 1.1060584421550788e-05, "loss": 0.3975, "step": 18813 }, { "epoch": 0.48, "grad_norm": 3.033832550048828, "learning_rate": 1.1059758957123929e-05, "loss": 0.5587, "step": 18814 }, { "epoch": 0.48, "grad_norm": 1.7985442876815796, "learning_rate": 1.1058933485393874e-05, "loss": 0.6076, "step": 18815 }, { "epoch": 0.48, "grad_norm": 2.089369297027588, "learning_rate": 1.1058108006366315e-05, "loss": 0.5121, "step": 18816 }, { "epoch": 0.48, "grad_norm": 7.860580921173096, "learning_rate": 1.1057282520046944e-05, "loss": 0.7307, "step": 18817 }, { "epoch": 0.48, "grad_norm": 1.5423189401626587, "learning_rate": 1.1056457026441444e-05, "loss": 0.5927, "step": 18818 }, { "epoch": 0.48, "grad_norm": 1.725147008895874, "learning_rate": 1.1055631525555503e-05, "loss": 0.7315, "step": 18819 }, { "epoch": 0.48, "grad_norm": 1.1305838823318481, "learning_rate": 1.1054806017394817e-05, "loss": 0.5438, "step": 18820 }, { "epoch": 0.48, "grad_norm": 1.3551781177520752, "learning_rate": 1.1053980501965069e-05, "loss": 0.5888, "step": 18821 }, { "epoch": 0.48, "grad_norm": 4.3113861083984375, "learning_rate": 1.105315497927195e-05, "loss": 0.8036, "step": 18822 }, { "epoch": 0.48, "grad_norm": 3.600334882736206, "learning_rate": 1.1052329449321146e-05, "loss": 0.8096, "step": 18823 }, { "epoch": 0.48, "grad_norm": 2.0718493461608887, "learning_rate": 1.1051503912118353e-05, "loss": 0.663, "step": 18824 }, { "epoch": 0.48, "grad_norm": 1.34200119972229, "learning_rate": 1.1050678367669252e-05, "loss": 0.514, "step": 18825 }, { "epoch": 0.48, "grad_norm": 1.1099514961242676, "learning_rate": 1.1049852815979536e-05, "loss": 0.4897, "step": 18826 }, { "epoch": 0.48, "grad_norm": 4.63147497177124, "learning_rate": 1.1049027257054897e-05, "loss": 0.6901, "step": 18827 }, { "epoch": 0.48, "grad_norm": 4.086266040802002, "learning_rate": 1.1048201690901017e-05, "loss": 0.6859, "step": 18828 }, { "epoch": 0.48, "grad_norm": 1.986933708190918, "learning_rate": 1.1047376117523593e-05, "loss": 0.456, "step": 18829 }, { "epoch": 0.48, "grad_norm": 1.2962369918823242, "learning_rate": 1.1046550536928309e-05, "loss": 0.419, "step": 18830 }, { "epoch": 0.48, "grad_norm": 2.1872568130493164, "learning_rate": 1.1045724949120855e-05, "loss": 0.6382, "step": 18831 }, { "epoch": 0.48, "grad_norm": 1.778255581855774, "learning_rate": 1.1044899354106923e-05, "loss": 0.526, "step": 18832 }, { "epoch": 0.48, "grad_norm": 2.849259853363037, "learning_rate": 1.1044073751892202e-05, "loss": 0.6244, "step": 18833 }, { "epoch": 0.48, "grad_norm": 1.3913908004760742, "learning_rate": 1.1043248142482381e-05, "loss": 0.5475, "step": 18834 }, { "epoch": 0.48, "grad_norm": 4.148932933807373, "learning_rate": 1.1042422525883148e-05, "loss": 0.6003, "step": 18835 }, { "epoch": 0.48, "grad_norm": 18.873640060424805, "learning_rate": 1.1041596902100196e-05, "loss": 0.749, "step": 18836 }, { "epoch": 0.48, "grad_norm": 2.536893844604492, "learning_rate": 1.1040771271139207e-05, "loss": 0.7725, "step": 18837 }, { "epoch": 0.48, "grad_norm": 1.3197041749954224, "learning_rate": 1.1039945633005879e-05, "loss": 0.4871, "step": 18838 }, { "epoch": 0.48, "grad_norm": 2.409468173980713, "learning_rate": 1.1039119987705902e-05, "loss": 0.4798, "step": 18839 }, { "epoch": 0.48, "grad_norm": 2.406156301498413, "learning_rate": 1.103829433524496e-05, "loss": 0.578, "step": 18840 }, { "epoch": 0.48, "grad_norm": 2.0191404819488525, "learning_rate": 1.1037468675628749e-05, "loss": 0.4975, "step": 18841 }, { "epoch": 0.48, "grad_norm": 1.4197748899459839, "learning_rate": 1.1036643008862953e-05, "loss": 0.6021, "step": 18842 }, { "epoch": 0.48, "grad_norm": 1.4120972156524658, "learning_rate": 1.1035817334953263e-05, "loss": 0.5236, "step": 18843 }, { "epoch": 0.48, "grad_norm": 1.3553035259246826, "learning_rate": 1.1034991653905374e-05, "loss": 0.5746, "step": 18844 }, { "epoch": 0.48, "grad_norm": 1.4580708742141724, "learning_rate": 1.103416596572497e-05, "loss": 0.5966, "step": 18845 }, { "epoch": 0.48, "grad_norm": 1.8202247619628906, "learning_rate": 1.1033340270417748e-05, "loss": 0.6315, "step": 18846 }, { "epoch": 0.48, "grad_norm": 2.579742908477783, "learning_rate": 1.103251456798939e-05, "loss": 0.5835, "step": 18847 }, { "epoch": 0.48, "grad_norm": 5.099997043609619, "learning_rate": 1.1031688858445593e-05, "loss": 0.6499, "step": 18848 }, { "epoch": 0.48, "grad_norm": 1.0741121768951416, "learning_rate": 1.1030863141792042e-05, "loss": 0.4709, "step": 18849 }, { "epoch": 0.48, "grad_norm": 1.2516697645187378, "learning_rate": 1.103003741803443e-05, "loss": 0.4416, "step": 18850 }, { "epoch": 0.48, "grad_norm": 6.812499523162842, "learning_rate": 1.102921168717845e-05, "loss": 0.5776, "step": 18851 }, { "epoch": 0.48, "grad_norm": 4.375300884246826, "learning_rate": 1.1028385949229788e-05, "loss": 0.5313, "step": 18852 }, { "epoch": 0.48, "grad_norm": 1.5006017684936523, "learning_rate": 1.1027560204194136e-05, "loss": 0.5062, "step": 18853 }, { "epoch": 0.48, "grad_norm": 9.879478454589844, "learning_rate": 1.1026734452077183e-05, "loss": 0.5668, "step": 18854 }, { "epoch": 0.48, "grad_norm": 4.363272666931152, "learning_rate": 1.1025908692884624e-05, "loss": 0.6296, "step": 18855 }, { "epoch": 0.48, "grad_norm": 5.7833943367004395, "learning_rate": 1.1025082926622146e-05, "loss": 0.5615, "step": 18856 }, { "epoch": 0.48, "grad_norm": 2.9588611125946045, "learning_rate": 1.102425715329544e-05, "loss": 0.5397, "step": 18857 }, { "epoch": 0.48, "grad_norm": 1.4826643466949463, "learning_rate": 1.1023431372910199e-05, "loss": 0.6534, "step": 18858 }, { "epoch": 0.48, "grad_norm": 5.230888366699219, "learning_rate": 1.1022605585472108e-05, "loss": 0.5429, "step": 18859 }, { "epoch": 0.48, "grad_norm": 2.6425955295562744, "learning_rate": 1.1021779790986867e-05, "loss": 0.506, "step": 18860 }, { "epoch": 0.48, "grad_norm": 1.4265451431274414, "learning_rate": 1.1020953989460156e-05, "loss": 0.4533, "step": 18861 }, { "epoch": 0.48, "grad_norm": 1.6141924858093262, "learning_rate": 1.1020128180897674e-05, "loss": 0.6686, "step": 18862 }, { "epoch": 0.48, "grad_norm": 4.149303436279297, "learning_rate": 1.1019302365305112e-05, "loss": 0.7843, "step": 18863 }, { "epoch": 0.48, "grad_norm": 8.763094902038574, "learning_rate": 1.1018476542688155e-05, "loss": 0.6432, "step": 18864 }, { "epoch": 0.48, "grad_norm": 1.6112340688705444, "learning_rate": 1.10176507130525e-05, "loss": 0.5378, "step": 18865 }, { "epoch": 0.48, "grad_norm": 1.305306077003479, "learning_rate": 1.1016824876403833e-05, "loss": 0.5301, "step": 18866 }, { "epoch": 0.48, "grad_norm": 3.6103992462158203, "learning_rate": 1.1015999032747849e-05, "loss": 0.5019, "step": 18867 }, { "epoch": 0.48, "grad_norm": 2.4338741302490234, "learning_rate": 1.1015173182090239e-05, "loss": 0.6419, "step": 18868 }, { "epoch": 0.48, "grad_norm": 2.1528401374816895, "learning_rate": 1.1014347324436691e-05, "loss": 0.6261, "step": 18869 }, { "epoch": 0.48, "grad_norm": 1.6473891735076904, "learning_rate": 1.1013521459792902e-05, "loss": 0.3017, "step": 18870 }, { "epoch": 0.48, "grad_norm": 2.6151342391967773, "learning_rate": 1.1012695588164557e-05, "loss": 0.5985, "step": 18871 }, { "epoch": 0.48, "grad_norm": 5.980619430541992, "learning_rate": 1.1011869709557352e-05, "loss": 0.7254, "step": 18872 }, { "epoch": 0.48, "grad_norm": 1.842819094657898, "learning_rate": 1.1011043823976977e-05, "loss": 0.7397, "step": 18873 }, { "epoch": 0.48, "grad_norm": 2.4370193481445312, "learning_rate": 1.1010217931429121e-05, "loss": 0.4955, "step": 18874 }, { "epoch": 0.48, "grad_norm": 2.7741386890411377, "learning_rate": 1.100939203191948e-05, "loss": 0.5496, "step": 18875 }, { "epoch": 0.48, "grad_norm": 5.95932674407959, "learning_rate": 1.1008566125453742e-05, "loss": 0.6037, "step": 18876 }, { "epoch": 0.48, "grad_norm": 1.7648165225982666, "learning_rate": 1.10077402120376e-05, "loss": 0.497, "step": 18877 }, { "epoch": 0.48, "grad_norm": 1.728284239768982, "learning_rate": 1.1006914291676745e-05, "loss": 0.5596, "step": 18878 }, { "epoch": 0.48, "grad_norm": 2.84409761428833, "learning_rate": 1.1006088364376867e-05, "loss": 0.6106, "step": 18879 }, { "epoch": 0.48, "grad_norm": 5.283143043518066, "learning_rate": 1.1005262430143663e-05, "loss": 0.6652, "step": 18880 }, { "epoch": 0.48, "grad_norm": 1.4848065376281738, "learning_rate": 1.1004436488982824e-05, "loss": 0.5074, "step": 18881 }, { "epoch": 0.48, "grad_norm": 2.3926503658294678, "learning_rate": 1.1003610540900037e-05, "loss": 0.5566, "step": 18882 }, { "epoch": 0.48, "grad_norm": 1.0239332914352417, "learning_rate": 1.1002784585900996e-05, "loss": 0.5504, "step": 18883 }, { "epoch": 0.48, "grad_norm": 1.2160509824752808, "learning_rate": 1.1001958623991396e-05, "loss": 0.3616, "step": 18884 }, { "epoch": 0.48, "grad_norm": 2.2850124835968018, "learning_rate": 1.1001132655176925e-05, "loss": 0.5812, "step": 18885 }, { "epoch": 0.48, "grad_norm": 2.7454047203063965, "learning_rate": 1.1000306679463278e-05, "loss": 0.7083, "step": 18886 }, { "epoch": 0.48, "grad_norm": 1.288673758506775, "learning_rate": 1.0999480696856144e-05, "loss": 0.5631, "step": 18887 }, { "epoch": 0.48, "grad_norm": 2.520268201828003, "learning_rate": 1.0998654707361218e-05, "loss": 0.5724, "step": 18888 }, { "epoch": 0.48, "grad_norm": 1.529763102531433, "learning_rate": 1.0997828710984192e-05, "loss": 0.6398, "step": 18889 }, { "epoch": 0.48, "grad_norm": 3.0756893157958984, "learning_rate": 1.0997002707730758e-05, "loss": 0.6885, "step": 18890 }, { "epoch": 0.48, "grad_norm": 11.565506935119629, "learning_rate": 1.0996176697606604e-05, "loss": 0.6072, "step": 18891 }, { "epoch": 0.48, "grad_norm": 5.1797332763671875, "learning_rate": 1.0995350680617429e-05, "loss": 0.553, "step": 18892 }, { "epoch": 0.48, "grad_norm": 1.241424798965454, "learning_rate": 1.099452465676892e-05, "loss": 0.5803, "step": 18893 }, { "epoch": 0.48, "grad_norm": 1.3897607326507568, "learning_rate": 1.0993698626066775e-05, "loss": 0.5753, "step": 18894 }, { "epoch": 0.48, "grad_norm": 3.1289193630218506, "learning_rate": 1.0992872588516684e-05, "loss": 0.6244, "step": 18895 }, { "epoch": 0.48, "grad_norm": 2.2055747509002686, "learning_rate": 1.0992046544124335e-05, "loss": 0.6003, "step": 18896 }, { "epoch": 0.48, "grad_norm": 1.5898436307907104, "learning_rate": 1.0991220492895427e-05, "loss": 0.5389, "step": 18897 }, { "epoch": 0.48, "grad_norm": 1.4148451089859009, "learning_rate": 1.099039443483565e-05, "loss": 0.475, "step": 18898 }, { "epoch": 0.48, "grad_norm": 1.172820806503296, "learning_rate": 1.0989568369950696e-05, "loss": 0.4852, "step": 18899 }, { "epoch": 0.48, "grad_norm": 1.5761897563934326, "learning_rate": 1.0988742298246257e-05, "loss": 0.6375, "step": 18900 }, { "epoch": 0.48, "grad_norm": 1.2265827655792236, "learning_rate": 1.0987916219728032e-05, "loss": 0.3993, "step": 18901 }, { "epoch": 0.48, "grad_norm": 1.2365479469299316, "learning_rate": 1.0987090134401706e-05, "loss": 0.5462, "step": 18902 }, { "epoch": 0.48, "grad_norm": 1.575806975364685, "learning_rate": 1.0986264042272973e-05, "loss": 0.5464, "step": 18903 }, { "epoch": 0.48, "grad_norm": 2.200984001159668, "learning_rate": 1.0985437943347532e-05, "loss": 0.5918, "step": 18904 }, { "epoch": 0.48, "grad_norm": 9.86325454711914, "learning_rate": 1.098461183763107e-05, "loss": 0.5203, "step": 18905 }, { "epoch": 0.48, "grad_norm": 1.448836088180542, "learning_rate": 1.098378572512928e-05, "loss": 0.5816, "step": 18906 }, { "epoch": 0.48, "grad_norm": 1.5019556283950806, "learning_rate": 1.0982959605847858e-05, "loss": 0.5661, "step": 18907 }, { "epoch": 0.48, "grad_norm": 1.7604475021362305, "learning_rate": 1.0982133479792495e-05, "loss": 0.5846, "step": 18908 }, { "epoch": 0.48, "grad_norm": 3.4517199993133545, "learning_rate": 1.098130734696889e-05, "loss": 0.599, "step": 18909 }, { "epoch": 0.48, "grad_norm": 1.5671850442886353, "learning_rate": 1.0980481207382728e-05, "loss": 0.5365, "step": 18910 }, { "epoch": 0.48, "grad_norm": 3.183006763458252, "learning_rate": 1.0979655061039704e-05, "loss": 0.4617, "step": 18911 }, { "epoch": 0.48, "grad_norm": 2.714420795440674, "learning_rate": 1.0978828907945513e-05, "loss": 0.7269, "step": 18912 }, { "epoch": 0.48, "grad_norm": 2.329359769821167, "learning_rate": 1.0978002748105851e-05, "loss": 0.512, "step": 18913 }, { "epoch": 0.48, "grad_norm": 1.9454188346862793, "learning_rate": 1.0977176581526406e-05, "loss": 0.6313, "step": 18914 }, { "epoch": 0.48, "grad_norm": 1.7041397094726562, "learning_rate": 1.0976350408212874e-05, "loss": 0.7045, "step": 18915 }, { "epoch": 0.48, "grad_norm": 1.7447948455810547, "learning_rate": 1.0975524228170951e-05, "loss": 0.6683, "step": 18916 }, { "epoch": 0.48, "grad_norm": 3.9032504558563232, "learning_rate": 1.0974698041406324e-05, "loss": 0.6003, "step": 18917 }, { "epoch": 0.48, "grad_norm": 1.6508798599243164, "learning_rate": 1.0973871847924692e-05, "loss": 0.5913, "step": 18918 }, { "epoch": 0.48, "grad_norm": 1.6361585855484009, "learning_rate": 1.0973045647731747e-05, "loss": 0.4195, "step": 18919 }, { "epoch": 0.48, "grad_norm": 2.4542593955993652, "learning_rate": 1.0972219440833185e-05, "loss": 0.6536, "step": 18920 }, { "epoch": 0.48, "grad_norm": 4.654839038848877, "learning_rate": 1.0971393227234697e-05, "loss": 0.6876, "step": 18921 }, { "epoch": 0.48, "grad_norm": 1.2785452604293823, "learning_rate": 1.0970567006941975e-05, "loss": 0.4935, "step": 18922 }, { "epoch": 0.49, "grad_norm": 2.7713067531585693, "learning_rate": 1.0969740779960717e-05, "loss": 0.5349, "step": 18923 }, { "epoch": 0.49, "grad_norm": 1.4123305082321167, "learning_rate": 1.0968914546296612e-05, "loss": 0.4898, "step": 18924 }, { "epoch": 0.49, "grad_norm": 3.1781678199768066, "learning_rate": 1.0968088305955358e-05, "loss": 0.4901, "step": 18925 }, { "epoch": 0.49, "grad_norm": 3.791469097137451, "learning_rate": 1.0967262058942646e-05, "loss": 0.7228, "step": 18926 }, { "epoch": 0.49, "grad_norm": 2.2966694831848145, "learning_rate": 1.0966435805264175e-05, "loss": 0.4842, "step": 18927 }, { "epoch": 0.49, "grad_norm": 1.2504074573516846, "learning_rate": 1.0965609544925635e-05, "loss": 0.4348, "step": 18928 }, { "epoch": 0.49, "grad_norm": 5.698391437530518, "learning_rate": 1.0964783277932719e-05, "loss": 0.5212, "step": 18929 }, { "epoch": 0.49, "grad_norm": 4.868698596954346, "learning_rate": 1.0963957004291124e-05, "loss": 0.6514, "step": 18930 }, { "epoch": 0.49, "grad_norm": 1.5237071514129639, "learning_rate": 1.0963130724006542e-05, "loss": 0.7389, "step": 18931 }, { "epoch": 0.49, "grad_norm": 2.1938912868499756, "learning_rate": 1.0962304437084667e-05, "loss": 0.506, "step": 18932 }, { "epoch": 0.49, "grad_norm": 3.896686315536499, "learning_rate": 1.09614781435312e-05, "loss": 0.5817, "step": 18933 }, { "epoch": 0.49, "grad_norm": 1.3679324388504028, "learning_rate": 1.0960651843351821e-05, "loss": 0.5706, "step": 18934 }, { "epoch": 0.49, "grad_norm": 4.722874164581299, "learning_rate": 1.095982553655224e-05, "loss": 0.7137, "step": 18935 }, { "epoch": 0.49, "grad_norm": 2.8464062213897705, "learning_rate": 1.0958999223138143e-05, "loss": 0.5595, "step": 18936 }, { "epoch": 0.49, "grad_norm": 3.1347873210906982, "learning_rate": 1.0958172903115224e-05, "loss": 0.569, "step": 18937 }, { "epoch": 0.49, "grad_norm": 2.488938808441162, "learning_rate": 1.095734657648918e-05, "loss": 0.5438, "step": 18938 }, { "epoch": 0.49, "grad_norm": 1.128694772720337, "learning_rate": 1.0956520243265705e-05, "loss": 0.5746, "step": 18939 }, { "epoch": 0.49, "grad_norm": 4.810090065002441, "learning_rate": 1.0955693903450495e-05, "loss": 0.4781, "step": 18940 }, { "epoch": 0.49, "grad_norm": 1.7079548835754395, "learning_rate": 1.0954867557049243e-05, "loss": 0.5087, "step": 18941 }, { "epoch": 0.49, "grad_norm": 1.4852293729782104, "learning_rate": 1.095404120406764e-05, "loss": 0.5752, "step": 18942 }, { "epoch": 0.49, "grad_norm": 1.7904081344604492, "learning_rate": 1.0953214844511388e-05, "loss": 0.5097, "step": 18943 }, { "epoch": 0.49, "grad_norm": 1.5407835245132446, "learning_rate": 1.0952388478386178e-05, "loss": 0.4301, "step": 18944 }, { "epoch": 0.49, "grad_norm": 1.79794442653656, "learning_rate": 1.0951562105697704e-05, "loss": 0.6986, "step": 18945 }, { "epoch": 0.49, "grad_norm": 1.3384994268417358, "learning_rate": 1.0950735726451662e-05, "loss": 0.5544, "step": 18946 }, { "epoch": 0.49, "grad_norm": 1.3809096813201904, "learning_rate": 1.0949909340653744e-05, "loss": 0.5712, "step": 18947 }, { "epoch": 0.49, "grad_norm": 2.025580406188965, "learning_rate": 1.0949082948309654e-05, "loss": 0.5351, "step": 18948 }, { "epoch": 0.49, "grad_norm": 1.2451344728469849, "learning_rate": 1.0948256549425073e-05, "loss": 0.5074, "step": 18949 }, { "epoch": 0.49, "grad_norm": 4.755324363708496, "learning_rate": 1.0947430144005708e-05, "loss": 0.6222, "step": 18950 }, { "epoch": 0.49, "grad_norm": 2.803208827972412, "learning_rate": 1.0946603732057248e-05, "loss": 0.62, "step": 18951 }, { "epoch": 0.49, "grad_norm": 4.104332447052002, "learning_rate": 1.094577731358539e-05, "loss": 0.6226, "step": 18952 }, { "epoch": 0.49, "grad_norm": 4.880587577819824, "learning_rate": 1.0944950888595832e-05, "loss": 0.5705, "step": 18953 }, { "epoch": 0.49, "grad_norm": 2.7817435264587402, "learning_rate": 1.0944124457094262e-05, "loss": 0.549, "step": 18954 }, { "epoch": 0.49, "grad_norm": 2.6234493255615234, "learning_rate": 1.0943298019086383e-05, "loss": 0.6299, "step": 18955 }, { "epoch": 0.49, "grad_norm": 3.8470399379730225, "learning_rate": 1.0942471574577884e-05, "loss": 0.4777, "step": 18956 }, { "epoch": 0.49, "grad_norm": 1.295218586921692, "learning_rate": 1.0941645123574465e-05, "loss": 0.6105, "step": 18957 }, { "epoch": 0.49, "grad_norm": 1.3856816291809082, "learning_rate": 1.0940818666081818e-05, "loss": 0.4889, "step": 18958 }, { "epoch": 0.49, "grad_norm": 5.242255210876465, "learning_rate": 1.0939992202105637e-05, "loss": 0.4353, "step": 18959 }, { "epoch": 0.49, "grad_norm": 1.9637525081634521, "learning_rate": 1.0939165731651626e-05, "loss": 0.5714, "step": 18960 }, { "epoch": 0.49, "grad_norm": 8.551774978637695, "learning_rate": 1.0938339254725472e-05, "loss": 0.5395, "step": 18961 }, { "epoch": 0.49, "grad_norm": 3.274397373199463, "learning_rate": 1.0937512771332875e-05, "loss": 0.5461, "step": 18962 }, { "epoch": 0.49, "grad_norm": 2.5830655097961426, "learning_rate": 1.0936686281479528e-05, "loss": 0.6608, "step": 18963 }, { "epoch": 0.49, "grad_norm": 2.820732831954956, "learning_rate": 1.0935859785171125e-05, "loss": 0.5736, "step": 18964 }, { "epoch": 0.49, "grad_norm": 2.0999505519866943, "learning_rate": 1.0935033282413368e-05, "loss": 0.651, "step": 18965 }, { "epoch": 0.49, "grad_norm": 1.672593116760254, "learning_rate": 1.0934206773211946e-05, "loss": 0.4606, "step": 18966 }, { "epoch": 0.49, "grad_norm": 1.1196433305740356, "learning_rate": 1.0933380257572561e-05, "loss": 0.5122, "step": 18967 }, { "epoch": 0.49, "grad_norm": 2.0689210891723633, "learning_rate": 1.0932553735500905e-05, "loss": 0.5185, "step": 18968 }, { "epoch": 0.49, "grad_norm": 1.2519387006759644, "learning_rate": 1.0931727207002673e-05, "loss": 0.6166, "step": 18969 }, { "epoch": 0.49, "grad_norm": 1.6247652769088745, "learning_rate": 1.0930900672083561e-05, "loss": 0.5876, "step": 18970 }, { "epoch": 0.49, "grad_norm": 1.7085317373275757, "learning_rate": 1.0930074130749269e-05, "loss": 0.6328, "step": 18971 }, { "epoch": 0.49, "grad_norm": 1.6578787565231323, "learning_rate": 1.0929247583005492e-05, "loss": 0.6405, "step": 18972 }, { "epoch": 0.49, "grad_norm": 1.3869752883911133, "learning_rate": 1.092842102885792e-05, "loss": 0.6735, "step": 18973 }, { "epoch": 0.49, "grad_norm": 1.2828670740127563, "learning_rate": 1.0927594468312259e-05, "loss": 0.5154, "step": 18974 }, { "epoch": 0.49, "grad_norm": 3.2976934909820557, "learning_rate": 1.0926767901374195e-05, "loss": 0.6563, "step": 18975 }, { "epoch": 0.49, "grad_norm": 4.0656962394714355, "learning_rate": 1.092594132804943e-05, "loss": 0.6162, "step": 18976 }, { "epoch": 0.49, "grad_norm": 2.6213321685791016, "learning_rate": 1.0925114748343661e-05, "loss": 0.741, "step": 18977 }, { "epoch": 0.49, "grad_norm": 1.3496971130371094, "learning_rate": 1.092428816226258e-05, "loss": 0.5504, "step": 18978 }, { "epoch": 0.49, "grad_norm": 1.6184862852096558, "learning_rate": 1.0923461569811889e-05, "loss": 0.6595, "step": 18979 }, { "epoch": 0.49, "grad_norm": 10.818374633789062, "learning_rate": 1.0922634970997277e-05, "loss": 0.6074, "step": 18980 }, { "epoch": 0.49, "grad_norm": 1.2632354497909546, "learning_rate": 1.0921808365824446e-05, "loss": 0.3727, "step": 18981 }, { "epoch": 0.49, "grad_norm": 1.5091255903244019, "learning_rate": 1.0920981754299091e-05, "loss": 0.5172, "step": 18982 }, { "epoch": 0.49, "grad_norm": 1.352846384048462, "learning_rate": 1.0920155136426909e-05, "loss": 0.482, "step": 18983 }, { "epoch": 0.49, "grad_norm": 1.2942336797714233, "learning_rate": 1.0919328512213595e-05, "loss": 0.6229, "step": 18984 }, { "epoch": 0.49, "grad_norm": 1.5824724435806274, "learning_rate": 1.0918501881664846e-05, "loss": 0.5686, "step": 18985 }, { "epoch": 0.49, "grad_norm": 2.082981586456299, "learning_rate": 1.0917675244786361e-05, "loss": 0.6124, "step": 18986 }, { "epoch": 0.49, "grad_norm": 4.5099310874938965, "learning_rate": 1.0916848601583834e-05, "loss": 0.601, "step": 18987 }, { "epoch": 0.49, "grad_norm": 1.494356632232666, "learning_rate": 1.091602195206296e-05, "loss": 0.5902, "step": 18988 }, { "epoch": 0.49, "grad_norm": 2.181877374649048, "learning_rate": 1.0915195296229441e-05, "loss": 0.7433, "step": 18989 }, { "epoch": 0.49, "grad_norm": 3.866769552230835, "learning_rate": 1.091436863408897e-05, "loss": 0.6571, "step": 18990 }, { "epoch": 0.49, "grad_norm": 4.721578598022461, "learning_rate": 1.0913541965647247e-05, "loss": 0.6078, "step": 18991 }, { "epoch": 0.49, "grad_norm": 1.228157877922058, "learning_rate": 1.0912715290909963e-05, "loss": 0.5879, "step": 18992 }, { "epoch": 0.49, "grad_norm": 1.9062052965164185, "learning_rate": 1.0911888609882821e-05, "loss": 0.4881, "step": 18993 }, { "epoch": 0.49, "grad_norm": 1.6297348737716675, "learning_rate": 1.0911061922571514e-05, "loss": 0.6606, "step": 18994 }, { "epoch": 0.49, "grad_norm": 2.399911880493164, "learning_rate": 1.0910235228981742e-05, "loss": 0.4527, "step": 18995 }, { "epoch": 0.49, "grad_norm": 3.5175249576568604, "learning_rate": 1.0909408529119203e-05, "loss": 0.581, "step": 18996 }, { "epoch": 0.49, "grad_norm": 6.546579360961914, "learning_rate": 1.0908581822989587e-05, "loss": 0.7684, "step": 18997 }, { "epoch": 0.49, "grad_norm": 3.4926998615264893, "learning_rate": 1.09077551105986e-05, "loss": 0.5803, "step": 18998 }, { "epoch": 0.49, "grad_norm": 1.2811416387557983, "learning_rate": 1.0906928391951932e-05, "loss": 0.4866, "step": 18999 }, { "epoch": 0.49, "grad_norm": 4.1627421379089355, "learning_rate": 1.0906101667055285e-05, "loss": 0.66, "step": 19000 }, { "epoch": 0.49, "grad_norm": 1.1856422424316406, "learning_rate": 1.0905274935914353e-05, "loss": 0.5247, "step": 19001 }, { "epoch": 0.49, "grad_norm": 1.2489359378814697, "learning_rate": 1.0904448198534836e-05, "loss": 0.6161, "step": 19002 }, { "epoch": 0.49, "grad_norm": 1.4485076665878296, "learning_rate": 1.0903621454922433e-05, "loss": 0.6768, "step": 19003 }, { "epoch": 0.49, "grad_norm": 3.3209850788116455, "learning_rate": 1.0902794705082833e-05, "loss": 0.5131, "step": 19004 }, { "epoch": 0.49, "grad_norm": 1.3335456848144531, "learning_rate": 1.0901967949021741e-05, "loss": 0.4983, "step": 19005 }, { "epoch": 0.49, "grad_norm": 1.4612057209014893, "learning_rate": 1.0901141186744858e-05, "loss": 0.5867, "step": 19006 }, { "epoch": 0.49, "grad_norm": 1.527114748954773, "learning_rate": 1.0900314418257869e-05, "loss": 0.5157, "step": 19007 }, { "epoch": 0.49, "grad_norm": 3.715437889099121, "learning_rate": 1.0899487643566483e-05, "loss": 0.5885, "step": 19008 }, { "epoch": 0.49, "grad_norm": 1.0805943012237549, "learning_rate": 1.0898660862676389e-05, "loss": 0.5047, "step": 19009 }, { "epoch": 0.49, "grad_norm": 3.156160831451416, "learning_rate": 1.0897834075593295e-05, "loss": 0.6269, "step": 19010 }, { "epoch": 0.49, "grad_norm": 3.056774377822876, "learning_rate": 1.0897007282322886e-05, "loss": 0.4645, "step": 19011 }, { "epoch": 0.49, "grad_norm": 1.4885518550872803, "learning_rate": 1.0896180482870869e-05, "loss": 0.5863, "step": 19012 }, { "epoch": 0.49, "grad_norm": 1.93011474609375, "learning_rate": 1.0895353677242942e-05, "loss": 0.5502, "step": 19013 }, { "epoch": 0.49, "grad_norm": 1.4612858295440674, "learning_rate": 1.0894526865444797e-05, "loss": 0.6115, "step": 19014 }, { "epoch": 0.49, "grad_norm": 3.3525900840759277, "learning_rate": 1.0893700047482136e-05, "loss": 0.6358, "step": 19015 }, { "epoch": 0.49, "grad_norm": 1.381367802619934, "learning_rate": 1.0892873223360653e-05, "loss": 0.4734, "step": 19016 }, { "epoch": 0.49, "grad_norm": 1.2040863037109375, "learning_rate": 1.089204639308605e-05, "loss": 0.6102, "step": 19017 }, { "epoch": 0.49, "grad_norm": 6.404255390167236, "learning_rate": 1.0891219556664026e-05, "loss": 0.6744, "step": 19018 }, { "epoch": 0.49, "grad_norm": 1.2545090913772583, "learning_rate": 1.0890392714100276e-05, "loss": 0.6207, "step": 19019 }, { "epoch": 0.49, "grad_norm": 1.4543434381484985, "learning_rate": 1.0889565865400498e-05, "loss": 0.559, "step": 19020 }, { "epoch": 0.49, "grad_norm": 1.2886322736740112, "learning_rate": 1.088873901057039e-05, "loss": 0.5314, "step": 19021 }, { "epoch": 0.49, "grad_norm": 8.574499130249023, "learning_rate": 1.0887912149615654e-05, "loss": 0.67, "step": 19022 }, { "epoch": 0.49, "grad_norm": 1.270758867263794, "learning_rate": 1.0887085282541983e-05, "loss": 0.6494, "step": 19023 }, { "epoch": 0.49, "grad_norm": 1.2242058515548706, "learning_rate": 1.0886258409355077e-05, "loss": 0.6447, "step": 19024 }, { "epoch": 0.49, "grad_norm": 4.048243999481201, "learning_rate": 1.0885431530060637e-05, "loss": 0.6322, "step": 19025 }, { "epoch": 0.49, "grad_norm": 2.447324752807617, "learning_rate": 1.0884604644664355e-05, "loss": 0.6457, "step": 19026 }, { "epoch": 0.49, "grad_norm": 2.0060718059539795, "learning_rate": 1.088377775317194e-05, "loss": 0.5123, "step": 19027 }, { "epoch": 0.49, "grad_norm": 1.4677128791809082, "learning_rate": 1.0882950855589078e-05, "loss": 0.6555, "step": 19028 }, { "epoch": 0.49, "grad_norm": 1.5923720598220825, "learning_rate": 1.0882123951921474e-05, "loss": 0.6747, "step": 19029 }, { "epoch": 0.49, "grad_norm": 1.7344815731048584, "learning_rate": 1.0881297042174831e-05, "loss": 0.5719, "step": 19030 }, { "epoch": 0.49, "grad_norm": 1.363637089729309, "learning_rate": 1.0880470126354839e-05, "loss": 0.5992, "step": 19031 }, { "epoch": 0.49, "grad_norm": 3.238121271133423, "learning_rate": 1.08796432044672e-05, "loss": 0.5303, "step": 19032 }, { "epoch": 0.49, "grad_norm": 2.051079034805298, "learning_rate": 1.0878816276517614e-05, "loss": 0.5417, "step": 19033 }, { "epoch": 0.49, "grad_norm": 2.2215797901153564, "learning_rate": 1.0877989342511779e-05, "loss": 0.5855, "step": 19034 }, { "epoch": 0.49, "grad_norm": 1.9486842155456543, "learning_rate": 1.087716240245539e-05, "loss": 0.5036, "step": 19035 }, { "epoch": 0.49, "grad_norm": 1.184606671333313, "learning_rate": 1.087633545635415e-05, "loss": 0.5806, "step": 19036 }, { "epoch": 0.49, "grad_norm": 1.4480494260787964, "learning_rate": 1.0875508504213756e-05, "loss": 0.4556, "step": 19037 }, { "epoch": 0.49, "grad_norm": 1.4477108716964722, "learning_rate": 1.0874681546039909e-05, "loss": 0.6053, "step": 19038 }, { "epoch": 0.49, "grad_norm": 2.860971450805664, "learning_rate": 1.0873854581838307e-05, "loss": 0.7115, "step": 19039 }, { "epoch": 0.49, "grad_norm": 2.00500750541687, "learning_rate": 1.0873027611614645e-05, "loss": 0.6539, "step": 19040 }, { "epoch": 0.49, "grad_norm": 4.747631072998047, "learning_rate": 1.0872200635374627e-05, "loss": 0.6103, "step": 19041 }, { "epoch": 0.49, "grad_norm": 1.4510557651519775, "learning_rate": 1.0871373653123951e-05, "loss": 0.6289, "step": 19042 }, { "epoch": 0.49, "grad_norm": 4.236361980438232, "learning_rate": 1.0870546664868313e-05, "loss": 0.6467, "step": 19043 }, { "epoch": 0.49, "grad_norm": 6.526067733764648, "learning_rate": 1.0869719670613419e-05, "loss": 0.8667, "step": 19044 }, { "epoch": 0.49, "grad_norm": 3.5691170692443848, "learning_rate": 1.086889267036496e-05, "loss": 0.5774, "step": 19045 }, { "epoch": 0.49, "grad_norm": 8.456110954284668, "learning_rate": 1.0868065664128638e-05, "loss": 0.7304, "step": 19046 }, { "epoch": 0.49, "grad_norm": 1.4458019733428955, "learning_rate": 1.0867238651910152e-05, "loss": 0.4303, "step": 19047 }, { "epoch": 0.49, "grad_norm": 2.4339327812194824, "learning_rate": 1.0866411633715204e-05, "loss": 0.5779, "step": 19048 }, { "epoch": 0.49, "grad_norm": 1.9548349380493164, "learning_rate": 1.0865584609549492e-05, "loss": 0.6087, "step": 19049 }, { "epoch": 0.49, "grad_norm": 1.5315256118774414, "learning_rate": 1.0864757579418714e-05, "loss": 0.5319, "step": 19050 }, { "epoch": 0.49, "grad_norm": 1.1386948823928833, "learning_rate": 1.086393054332857e-05, "loss": 0.5884, "step": 19051 }, { "epoch": 0.49, "grad_norm": 1.8224925994873047, "learning_rate": 1.086310350128476e-05, "loss": 0.56, "step": 19052 }, { "epoch": 0.49, "grad_norm": 1.535575032234192, "learning_rate": 1.0862276453292982e-05, "loss": 0.5118, "step": 19053 }, { "epoch": 0.49, "grad_norm": 0.9659891128540039, "learning_rate": 1.0861449399358936e-05, "loss": 0.3559, "step": 19054 }, { "epoch": 0.49, "grad_norm": 2.3822529315948486, "learning_rate": 1.0860622339488323e-05, "loss": 0.5725, "step": 19055 }, { "epoch": 0.49, "grad_norm": 6.206979751586914, "learning_rate": 1.0859795273686842e-05, "loss": 0.7558, "step": 19056 }, { "epoch": 0.49, "grad_norm": 2.0964748859405518, "learning_rate": 1.085896820196019e-05, "loss": 0.4934, "step": 19057 }, { "epoch": 0.49, "grad_norm": 2.3344779014587402, "learning_rate": 1.085814112431407e-05, "loss": 0.7345, "step": 19058 }, { "epoch": 0.49, "grad_norm": 3.079927444458008, "learning_rate": 1.085731404075418e-05, "loss": 0.6309, "step": 19059 }, { "epoch": 0.49, "grad_norm": 0.9623987078666687, "learning_rate": 1.085648695128622e-05, "loss": 0.4642, "step": 19060 }, { "epoch": 0.49, "grad_norm": 3.689908981323242, "learning_rate": 1.0855659855915891e-05, "loss": 0.5304, "step": 19061 }, { "epoch": 0.49, "grad_norm": 1.8907852172851562, "learning_rate": 1.0854832754648896e-05, "loss": 0.6673, "step": 19062 }, { "epoch": 0.49, "grad_norm": 8.51570987701416, "learning_rate": 1.0854005647490925e-05, "loss": 0.563, "step": 19063 }, { "epoch": 0.49, "grad_norm": 1.719647765159607, "learning_rate": 1.0853178534447686e-05, "loss": 0.5028, "step": 19064 }, { "epoch": 0.49, "grad_norm": 2.2499258518218994, "learning_rate": 1.0852351415524876e-05, "loss": 0.4484, "step": 19065 }, { "epoch": 0.49, "grad_norm": 1.8713797330856323, "learning_rate": 1.0851524290728194e-05, "loss": 0.41, "step": 19066 }, { "epoch": 0.49, "grad_norm": 1.1016545295715332, "learning_rate": 1.0850697160063345e-05, "loss": 0.5264, "step": 19067 }, { "epoch": 0.49, "grad_norm": 1.22311270236969, "learning_rate": 1.084987002353602e-05, "loss": 0.4972, "step": 19068 }, { "epoch": 0.49, "grad_norm": 2.7791662216186523, "learning_rate": 1.084904288115193e-05, "loss": 0.5097, "step": 19069 }, { "epoch": 0.49, "grad_norm": 1.7483495473861694, "learning_rate": 1.0848215732916767e-05, "loss": 0.7024, "step": 19070 }, { "epoch": 0.49, "grad_norm": 1.5085724592208862, "learning_rate": 1.0847388578836235e-05, "loss": 0.5593, "step": 19071 }, { "epoch": 0.49, "grad_norm": 5.267498970031738, "learning_rate": 1.0846561418916033e-05, "loss": 0.532, "step": 19072 }, { "epoch": 0.49, "grad_norm": 1.5495325326919556, "learning_rate": 1.0845734253161861e-05, "loss": 0.4771, "step": 19073 }, { "epoch": 0.49, "grad_norm": 9.279023170471191, "learning_rate": 1.0844907081579423e-05, "loss": 0.5863, "step": 19074 }, { "epoch": 0.49, "grad_norm": 1.6739615201950073, "learning_rate": 1.0844079904174414e-05, "loss": 0.6372, "step": 19075 }, { "epoch": 0.49, "grad_norm": 1.2283780574798584, "learning_rate": 1.0843252720952537e-05, "loss": 0.5081, "step": 19076 }, { "epoch": 0.49, "grad_norm": 0.8714867830276489, "learning_rate": 1.084242553191949e-05, "loss": 0.5093, "step": 19077 }, { "epoch": 0.49, "grad_norm": 2.602617025375366, "learning_rate": 1.0841598337080979e-05, "loss": 0.596, "step": 19078 }, { "epoch": 0.49, "grad_norm": 6.311976432800293, "learning_rate": 1.0840771136442698e-05, "loss": 0.5444, "step": 19079 }, { "epoch": 0.49, "grad_norm": 1.5122311115264893, "learning_rate": 1.083994393001035e-05, "loss": 0.6034, "step": 19080 }, { "epoch": 0.49, "grad_norm": 1.7400799989700317, "learning_rate": 1.0839116717789641e-05, "loss": 0.3637, "step": 19081 }, { "epoch": 0.49, "grad_norm": 4.147833824157715, "learning_rate": 1.0838289499786264e-05, "loss": 0.4856, "step": 19082 }, { "epoch": 0.49, "grad_norm": 1.8033732175827026, "learning_rate": 1.0837462276005922e-05, "loss": 0.5614, "step": 19083 }, { "epoch": 0.49, "grad_norm": 2.524958848953247, "learning_rate": 1.0836635046454315e-05, "loss": 0.5463, "step": 19084 }, { "epoch": 0.49, "grad_norm": 8.213638305664062, "learning_rate": 1.0835807811137144e-05, "loss": 0.4875, "step": 19085 }, { "epoch": 0.49, "grad_norm": 1.2235122919082642, "learning_rate": 1.0834980570060115e-05, "loss": 0.6337, "step": 19086 }, { "epoch": 0.49, "grad_norm": 0.9976930618286133, "learning_rate": 1.0834153323228921e-05, "loss": 0.4857, "step": 19087 }, { "epoch": 0.49, "grad_norm": 2.648362398147583, "learning_rate": 1.0833326070649268e-05, "loss": 0.681, "step": 19088 }, { "epoch": 0.49, "grad_norm": 1.218864917755127, "learning_rate": 1.0832498812326854e-05, "loss": 0.5625, "step": 19089 }, { "epoch": 0.49, "grad_norm": 1.2660330533981323, "learning_rate": 1.0831671548267383e-05, "loss": 0.6311, "step": 19090 }, { "epoch": 0.49, "grad_norm": 0.9625077247619629, "learning_rate": 1.0830844278476554e-05, "loss": 0.5091, "step": 19091 }, { "epoch": 0.49, "grad_norm": 2.2958595752716064, "learning_rate": 1.0830017002960068e-05, "loss": 0.6727, "step": 19092 }, { "epoch": 0.49, "grad_norm": 1.443193793296814, "learning_rate": 1.0829189721723626e-05, "loss": 0.501, "step": 19093 }, { "epoch": 0.49, "grad_norm": 1.8911973237991333, "learning_rate": 1.0828362434772927e-05, "loss": 0.523, "step": 19094 }, { "epoch": 0.49, "grad_norm": 1.5068739652633667, "learning_rate": 1.0827535142113678e-05, "loss": 0.6412, "step": 19095 }, { "epoch": 0.49, "grad_norm": 4.357928276062012, "learning_rate": 1.0826707843751576e-05, "loss": 0.5826, "step": 19096 }, { "epoch": 0.49, "grad_norm": 1.1488288640975952, "learning_rate": 1.0825880539692322e-05, "loss": 0.5422, "step": 19097 }, { "epoch": 0.49, "grad_norm": 1.5843956470489502, "learning_rate": 1.0825053229941617e-05, "loss": 0.5137, "step": 19098 }, { "epoch": 0.49, "grad_norm": 2.8815627098083496, "learning_rate": 1.0824225914505166e-05, "loss": 0.6168, "step": 19099 }, { "epoch": 0.49, "grad_norm": 5.671812057495117, "learning_rate": 1.0823398593388667e-05, "loss": 0.398, "step": 19100 }, { "epoch": 0.49, "grad_norm": 1.0189452171325684, "learning_rate": 1.082257126659782e-05, "loss": 0.6498, "step": 19101 }, { "epoch": 0.49, "grad_norm": 1.3140450716018677, "learning_rate": 1.0821743934138334e-05, "loss": 0.5124, "step": 19102 }, { "epoch": 0.49, "grad_norm": 1.3345768451690674, "learning_rate": 1.0820916596015901e-05, "loss": 0.4309, "step": 19103 }, { "epoch": 0.49, "grad_norm": 1.7721002101898193, "learning_rate": 1.0820089252236225e-05, "loss": 0.6955, "step": 19104 }, { "epoch": 0.49, "grad_norm": 4.097582817077637, "learning_rate": 1.0819261902805013e-05, "loss": 0.6877, "step": 19105 }, { "epoch": 0.49, "grad_norm": 1.6213124990463257, "learning_rate": 1.0818434547727961e-05, "loss": 0.721, "step": 19106 }, { "epoch": 0.49, "grad_norm": 5.504312038421631, "learning_rate": 1.0817607187010775e-05, "loss": 0.4886, "step": 19107 }, { "epoch": 0.49, "grad_norm": 4.279885768890381, "learning_rate": 1.0816779820659149e-05, "loss": 0.6187, "step": 19108 }, { "epoch": 0.49, "grad_norm": 4.268616676330566, "learning_rate": 1.081595244867879e-05, "loss": 0.6209, "step": 19109 }, { "epoch": 0.49, "grad_norm": 1.519796371459961, "learning_rate": 1.0815125071075403e-05, "loss": 0.4828, "step": 19110 }, { "epoch": 0.49, "grad_norm": 4.458428859710693, "learning_rate": 1.0814297687854685e-05, "loss": 0.6662, "step": 19111 }, { "epoch": 0.49, "grad_norm": 3.250664710998535, "learning_rate": 1.081347029902234e-05, "loss": 0.5359, "step": 19112 }, { "epoch": 0.49, "grad_norm": 2.4175493717193604, "learning_rate": 1.0812642904584066e-05, "loss": 0.615, "step": 19113 }, { "epoch": 0.49, "grad_norm": 3.15889573097229, "learning_rate": 1.0811815504545566e-05, "loss": 0.4711, "step": 19114 }, { "epoch": 0.49, "grad_norm": 1.783204436302185, "learning_rate": 1.081098809891255e-05, "loss": 0.4988, "step": 19115 }, { "epoch": 0.49, "grad_norm": 2.2583413124084473, "learning_rate": 1.0810160687690708e-05, "loss": 0.6208, "step": 19116 }, { "epoch": 0.49, "grad_norm": 10.814515113830566, "learning_rate": 1.0809333270885751e-05, "loss": 0.4299, "step": 19117 }, { "epoch": 0.49, "grad_norm": 1.990960955619812, "learning_rate": 1.0808505848503375e-05, "loss": 0.5317, "step": 19118 }, { "epoch": 0.49, "grad_norm": 3.4639952182769775, "learning_rate": 1.0807678420549286e-05, "loss": 0.5286, "step": 19119 }, { "epoch": 0.49, "grad_norm": 2.4858040809631348, "learning_rate": 1.0806850987029182e-05, "loss": 0.4206, "step": 19120 }, { "epoch": 0.49, "grad_norm": 2.005183219909668, "learning_rate": 1.0806023547948772e-05, "loss": 0.592, "step": 19121 }, { "epoch": 0.49, "grad_norm": 4.294896602630615, "learning_rate": 1.0805196103313752e-05, "loss": 0.7412, "step": 19122 }, { "epoch": 0.49, "grad_norm": 1.5136128664016724, "learning_rate": 1.0804368653129826e-05, "loss": 0.702, "step": 19123 }, { "epoch": 0.49, "grad_norm": 1.8227065801620483, "learning_rate": 1.0803541197402698e-05, "loss": 0.4399, "step": 19124 }, { "epoch": 0.49, "grad_norm": 6.893460750579834, "learning_rate": 1.0802713736138069e-05, "loss": 0.5317, "step": 19125 }, { "epoch": 0.49, "grad_norm": 0.9813435673713684, "learning_rate": 1.080188626934164e-05, "loss": 0.5964, "step": 19126 }, { "epoch": 0.49, "grad_norm": 1.7024353742599487, "learning_rate": 1.0801058797019115e-05, "loss": 0.5206, "step": 19127 }, { "epoch": 0.49, "grad_norm": 5.5662102699279785, "learning_rate": 1.0800231319176196e-05, "loss": 0.4894, "step": 19128 }, { "epoch": 0.49, "grad_norm": 2.2130093574523926, "learning_rate": 1.0799403835818587e-05, "loss": 0.5483, "step": 19129 }, { "epoch": 0.49, "grad_norm": 2.916801691055298, "learning_rate": 1.0798576346951986e-05, "loss": 0.5332, "step": 19130 }, { "epoch": 0.49, "grad_norm": 3.1155266761779785, "learning_rate": 1.0797748852582102e-05, "loss": 0.6893, "step": 19131 }, { "epoch": 0.49, "grad_norm": 6.423600196838379, "learning_rate": 1.0796921352714632e-05, "loss": 0.7219, "step": 19132 }, { "epoch": 0.49, "grad_norm": 5.747305393218994, "learning_rate": 1.079609384735528e-05, "loss": 0.5798, "step": 19133 }, { "epoch": 0.49, "grad_norm": 6.46632719039917, "learning_rate": 1.0795266336509751e-05, "loss": 0.4862, "step": 19134 }, { "epoch": 0.49, "grad_norm": 1.4191988706588745, "learning_rate": 1.0794438820183743e-05, "loss": 0.6685, "step": 19135 }, { "epoch": 0.49, "grad_norm": 15.512869834899902, "learning_rate": 1.0793611298382966e-05, "loss": 0.625, "step": 19136 }, { "epoch": 0.49, "grad_norm": 1.825134515762329, "learning_rate": 1.0792783771113116e-05, "loss": 0.5228, "step": 19137 }, { "epoch": 0.49, "grad_norm": 3.7069876194000244, "learning_rate": 1.0791956238379899e-05, "loss": 0.5503, "step": 19138 }, { "epoch": 0.49, "grad_norm": 2.3235924243927, "learning_rate": 1.0791128700189017e-05, "loss": 0.5882, "step": 19139 }, { "epoch": 0.49, "grad_norm": 1.2914724349975586, "learning_rate": 1.0790301156546175e-05, "loss": 0.7172, "step": 19140 }, { "epoch": 0.49, "grad_norm": 1.9582228660583496, "learning_rate": 1.0789473607457073e-05, "loss": 0.5427, "step": 19141 }, { "epoch": 0.49, "grad_norm": 1.777395248413086, "learning_rate": 1.0788646052927412e-05, "loss": 0.5645, "step": 19142 }, { "epoch": 0.49, "grad_norm": 1.0824390649795532, "learning_rate": 1.07878184929629e-05, "loss": 0.5485, "step": 19143 }, { "epoch": 0.49, "grad_norm": 0.9725481271743774, "learning_rate": 1.078699092756924e-05, "loss": 0.6213, "step": 19144 }, { "epoch": 0.49, "grad_norm": 5.42785120010376, "learning_rate": 1.0786163356752128e-05, "loss": 0.5532, "step": 19145 }, { "epoch": 0.49, "grad_norm": 1.156838297843933, "learning_rate": 1.0785335780517277e-05, "loss": 0.5171, "step": 19146 }, { "epoch": 0.49, "grad_norm": 1.4089343547821045, "learning_rate": 1.078450819887038e-05, "loss": 0.5599, "step": 19147 }, { "epoch": 0.49, "grad_norm": 1.3117529153823853, "learning_rate": 1.0783680611817151e-05, "loss": 0.5192, "step": 19148 }, { "epoch": 0.49, "grad_norm": 1.6761823892593384, "learning_rate": 1.0782853019363284e-05, "loss": 0.4763, "step": 19149 }, { "epoch": 0.49, "grad_norm": 1.2434953451156616, "learning_rate": 1.0782025421514485e-05, "loss": 0.5963, "step": 19150 }, { "epoch": 0.49, "grad_norm": 1.2069815397262573, "learning_rate": 1.0781197818276463e-05, "loss": 0.4165, "step": 19151 }, { "epoch": 0.49, "grad_norm": 2.06063175201416, "learning_rate": 1.078037020965491e-05, "loss": 0.7604, "step": 19152 }, { "epoch": 0.49, "grad_norm": 2.428121328353882, "learning_rate": 1.0779542595655543e-05, "loss": 0.6813, "step": 19153 }, { "epoch": 0.49, "grad_norm": 1.7174696922302246, "learning_rate": 1.0778714976284053e-05, "loss": 0.5814, "step": 19154 }, { "epoch": 0.49, "grad_norm": 1.5067580938339233, "learning_rate": 1.0777887351546151e-05, "loss": 0.526, "step": 19155 }, { "epoch": 0.49, "grad_norm": 1.8133903741836548, "learning_rate": 1.0777059721447536e-05, "loss": 0.5206, "step": 19156 }, { "epoch": 0.49, "grad_norm": 1.741765022277832, "learning_rate": 1.0776232085993913e-05, "loss": 0.6724, "step": 19157 }, { "epoch": 0.49, "grad_norm": 1.7350577116012573, "learning_rate": 1.0775404445190989e-05, "loss": 0.5292, "step": 19158 }, { "epoch": 0.49, "grad_norm": 1.61814546585083, "learning_rate": 1.0774576799044463e-05, "loss": 0.6778, "step": 19159 }, { "epoch": 0.49, "grad_norm": 2.0635604858398438, "learning_rate": 1.0773749147560042e-05, "loss": 0.6504, "step": 19160 }, { "epoch": 0.49, "grad_norm": 3.16827392578125, "learning_rate": 1.0772921490743426e-05, "loss": 0.668, "step": 19161 }, { "epoch": 0.49, "grad_norm": 1.496447205543518, "learning_rate": 1.077209382860032e-05, "loss": 0.5698, "step": 19162 }, { "epoch": 0.49, "grad_norm": 1.662817358970642, "learning_rate": 1.0771266161136433e-05, "loss": 0.5403, "step": 19163 }, { "epoch": 0.49, "grad_norm": 5.529510974884033, "learning_rate": 1.0770438488357459e-05, "loss": 0.8128, "step": 19164 }, { "epoch": 0.49, "grad_norm": 10.873364448547363, "learning_rate": 1.076961081026911e-05, "loss": 0.6029, "step": 19165 }, { "epoch": 0.49, "grad_norm": 3.2750816345214844, "learning_rate": 1.0768783126877087e-05, "loss": 0.8959, "step": 19166 }, { "epoch": 0.49, "grad_norm": 3.3374738693237305, "learning_rate": 1.076795543818709e-05, "loss": 0.5886, "step": 19167 }, { "epoch": 0.49, "grad_norm": 2.563652753829956, "learning_rate": 1.076712774420483e-05, "loss": 0.5082, "step": 19168 }, { "epoch": 0.49, "grad_norm": 5.840484619140625, "learning_rate": 1.0766300044936006e-05, "loss": 0.4955, "step": 19169 }, { "epoch": 0.49, "grad_norm": 2.1221113204956055, "learning_rate": 1.0765472340386326e-05, "loss": 0.5501, "step": 19170 }, { "epoch": 0.49, "grad_norm": 1.2924937009811401, "learning_rate": 1.076464463056149e-05, "loss": 0.5548, "step": 19171 }, { "epoch": 0.49, "grad_norm": 2.530773162841797, "learning_rate": 1.0763816915467205e-05, "loss": 0.6589, "step": 19172 }, { "epoch": 0.49, "grad_norm": 1.936786413192749, "learning_rate": 1.076298919510917e-05, "loss": 0.5512, "step": 19173 }, { "epoch": 0.49, "grad_norm": 8.683175086975098, "learning_rate": 1.0762161469493095e-05, "loss": 0.58, "step": 19174 }, { "epoch": 0.49, "grad_norm": 15.811776161193848, "learning_rate": 1.0761333738624682e-05, "loss": 0.596, "step": 19175 }, { "epoch": 0.49, "grad_norm": 2.6414999961853027, "learning_rate": 1.0760506002509634e-05, "loss": 0.594, "step": 19176 }, { "epoch": 0.49, "grad_norm": 1.643568754196167, "learning_rate": 1.0759678261153659e-05, "loss": 0.5274, "step": 19177 }, { "epoch": 0.49, "grad_norm": 3.0067522525787354, "learning_rate": 1.0758850514562457e-05, "loss": 0.6033, "step": 19178 }, { "epoch": 0.49, "grad_norm": 2.5650901794433594, "learning_rate": 1.0758022762741734e-05, "loss": 0.5228, "step": 19179 }, { "epoch": 0.49, "grad_norm": 3.2086617946624756, "learning_rate": 1.0757195005697193e-05, "loss": 0.4188, "step": 19180 }, { "epoch": 0.49, "grad_norm": 1.4859437942504883, "learning_rate": 1.0756367243434542e-05, "loss": 0.4903, "step": 19181 }, { "epoch": 0.49, "grad_norm": 1.6417111158370972, "learning_rate": 1.0755539475959484e-05, "loss": 0.5579, "step": 19182 }, { "epoch": 0.49, "grad_norm": 3.560272216796875, "learning_rate": 1.075471170327772e-05, "loss": 0.5134, "step": 19183 }, { "epoch": 0.49, "grad_norm": 4.743326187133789, "learning_rate": 1.0753883925394961e-05, "loss": 0.4982, "step": 19184 }, { "epoch": 0.49, "grad_norm": 1.28582763671875, "learning_rate": 1.0753056142316904e-05, "loss": 0.4952, "step": 19185 }, { "epoch": 0.49, "grad_norm": 5.37310266494751, "learning_rate": 1.0752228354049259e-05, "loss": 0.8777, "step": 19186 }, { "epoch": 0.49, "grad_norm": 7.071609020233154, "learning_rate": 1.075140056059773e-05, "loss": 0.4896, "step": 19187 }, { "epoch": 0.49, "grad_norm": 1.7808893918991089, "learning_rate": 1.0750572761968017e-05, "loss": 0.5761, "step": 19188 }, { "epoch": 0.49, "grad_norm": 1.0166609287261963, "learning_rate": 1.0749744958165827e-05, "loss": 0.7416, "step": 19189 }, { "epoch": 0.49, "grad_norm": 3.8042991161346436, "learning_rate": 1.0748917149196869e-05, "loss": 0.6217, "step": 19190 }, { "epoch": 0.49, "grad_norm": 2.166426658630371, "learning_rate": 1.0748089335066843e-05, "loss": 0.6835, "step": 19191 }, { "epoch": 0.49, "grad_norm": 3.3898980617523193, "learning_rate": 1.0747261515781458e-05, "loss": 0.6226, "step": 19192 }, { "epoch": 0.49, "grad_norm": 1.590989589691162, "learning_rate": 1.0746433691346412e-05, "loss": 0.4921, "step": 19193 }, { "epoch": 0.49, "grad_norm": 2.4988675117492676, "learning_rate": 1.0745605861767417e-05, "loss": 0.6206, "step": 19194 }, { "epoch": 0.49, "grad_norm": 4.665926933288574, "learning_rate": 1.0744778027050174e-05, "loss": 0.4686, "step": 19195 }, { "epoch": 0.49, "grad_norm": 2.3789780139923096, "learning_rate": 1.0743950187200387e-05, "loss": 0.5206, "step": 19196 }, { "epoch": 0.49, "grad_norm": 1.832780122756958, "learning_rate": 1.0743122342223766e-05, "loss": 0.4831, "step": 19197 }, { "epoch": 0.49, "grad_norm": 1.2883052825927734, "learning_rate": 1.074229449212601e-05, "loss": 0.5814, "step": 19198 }, { "epoch": 0.49, "grad_norm": 1.228401780128479, "learning_rate": 1.0741466636912829e-05, "loss": 0.63, "step": 19199 }, { "epoch": 0.49, "grad_norm": 1.3604917526245117, "learning_rate": 1.0740638776589922e-05, "loss": 0.6358, "step": 19200 }, { "epoch": 0.49, "grad_norm": 3.391690969467163, "learning_rate": 1.0739810911163e-05, "loss": 0.5863, "step": 19201 }, { "epoch": 0.49, "grad_norm": 0.9048210978507996, "learning_rate": 1.0738983040637768e-05, "loss": 0.5387, "step": 19202 }, { "epoch": 0.49, "grad_norm": 1.4364217519760132, "learning_rate": 1.0738155165019926e-05, "loss": 0.6511, "step": 19203 }, { "epoch": 0.49, "grad_norm": 1.957162618637085, "learning_rate": 1.0737327284315186e-05, "loss": 0.6275, "step": 19204 }, { "epoch": 0.49, "grad_norm": 1.5158759355545044, "learning_rate": 1.0736499398529246e-05, "loss": 0.5158, "step": 19205 }, { "epoch": 0.49, "grad_norm": 6.736508846282959, "learning_rate": 1.0735671507667815e-05, "loss": 0.5731, "step": 19206 }, { "epoch": 0.49, "grad_norm": 1.2907607555389404, "learning_rate": 1.07348436117366e-05, "loss": 0.5042, "step": 19207 }, { "epoch": 0.49, "grad_norm": 2.016231060028076, "learning_rate": 1.0734015710741304e-05, "loss": 0.533, "step": 19208 }, { "epoch": 0.49, "grad_norm": 1.6183918714523315, "learning_rate": 1.0733187804687634e-05, "loss": 0.5607, "step": 19209 }, { "epoch": 0.49, "grad_norm": 1.4521024227142334, "learning_rate": 1.0732359893581292e-05, "loss": 0.5029, "step": 19210 }, { "epoch": 0.49, "grad_norm": 1.2807278633117676, "learning_rate": 1.073153197742799e-05, "loss": 0.5669, "step": 19211 }, { "epoch": 0.49, "grad_norm": 2.2462990283966064, "learning_rate": 1.0730704056233423e-05, "loss": 0.6503, "step": 19212 }, { "epoch": 0.49, "grad_norm": 1.2364963293075562, "learning_rate": 1.0729876130003304e-05, "loss": 0.5148, "step": 19213 }, { "epoch": 0.49, "grad_norm": 2.0886237621307373, "learning_rate": 1.0729048198743342e-05, "loss": 0.3703, "step": 19214 }, { "epoch": 0.49, "grad_norm": 2.277576208114624, "learning_rate": 1.0728220262459235e-05, "loss": 0.4566, "step": 19215 }, { "epoch": 0.49, "grad_norm": 1.1472777128219604, "learning_rate": 1.072739232115669e-05, "loss": 0.3405, "step": 19216 }, { "epoch": 0.49, "grad_norm": 4.032161712646484, "learning_rate": 1.0726564374841415e-05, "loss": 0.7552, "step": 19217 }, { "epoch": 0.49, "grad_norm": 1.7683496475219727, "learning_rate": 1.0725736423519114e-05, "loss": 0.6259, "step": 19218 }, { "epoch": 0.49, "grad_norm": 2.034696340560913, "learning_rate": 1.0724908467195496e-05, "loss": 0.7252, "step": 19219 }, { "epoch": 0.49, "grad_norm": 2.007319688796997, "learning_rate": 1.0724080505876263e-05, "loss": 0.6667, "step": 19220 }, { "epoch": 0.49, "grad_norm": 1.4635627269744873, "learning_rate": 1.0723252539567124e-05, "loss": 0.4774, "step": 19221 }, { "epoch": 0.49, "grad_norm": 1.720631718635559, "learning_rate": 1.0722424568273779e-05, "loss": 0.5025, "step": 19222 }, { "epoch": 0.49, "grad_norm": 16.440277099609375, "learning_rate": 1.0721596592001942e-05, "loss": 0.4738, "step": 19223 }, { "epoch": 0.49, "grad_norm": 1.2894692420959473, "learning_rate": 1.0720768610757311e-05, "loss": 0.6364, "step": 19224 }, { "epoch": 0.49, "grad_norm": 1.1160751581192017, "learning_rate": 1.0719940624545595e-05, "loss": 0.6141, "step": 19225 }, { "epoch": 0.49, "grad_norm": 1.5205519199371338, "learning_rate": 1.0719112633372503e-05, "loss": 0.4932, "step": 19226 }, { "epoch": 0.49, "grad_norm": 1.8458307981491089, "learning_rate": 1.0718284637243738e-05, "loss": 0.319, "step": 19227 }, { "epoch": 0.49, "grad_norm": 1.2627277374267578, "learning_rate": 1.071745663616501e-05, "loss": 0.3539, "step": 19228 }, { "epoch": 0.49, "grad_norm": 0.9833424091339111, "learning_rate": 1.0716628630142016e-05, "loss": 0.5787, "step": 19229 }, { "epoch": 0.49, "grad_norm": 1.7228660583496094, "learning_rate": 1.071580061918047e-05, "loss": 0.515, "step": 19230 }, { "epoch": 0.49, "grad_norm": 3.9248461723327637, "learning_rate": 1.0714972603286076e-05, "loss": 0.5045, "step": 19231 }, { "epoch": 0.49, "grad_norm": 4.157602787017822, "learning_rate": 1.0714144582464539e-05, "loss": 0.5987, "step": 19232 }, { "epoch": 0.49, "grad_norm": 1.4567359685897827, "learning_rate": 1.0713316556721566e-05, "loss": 0.5918, "step": 19233 }, { "epoch": 0.49, "grad_norm": 0.8762786388397217, "learning_rate": 1.0712488526062865e-05, "loss": 0.4807, "step": 19234 }, { "epoch": 0.49, "grad_norm": 2.7177891731262207, "learning_rate": 1.071166049049414e-05, "loss": 0.6441, "step": 19235 }, { "epoch": 0.49, "grad_norm": 1.581921935081482, "learning_rate": 1.0710832450021099e-05, "loss": 0.4577, "step": 19236 }, { "epoch": 0.49, "grad_norm": 3.6653356552124023, "learning_rate": 1.0710004404649445e-05, "loss": 0.5373, "step": 19237 }, { "epoch": 0.49, "grad_norm": 8.850972175598145, "learning_rate": 1.0709176354384889e-05, "loss": 0.5575, "step": 19238 }, { "epoch": 0.49, "grad_norm": 1.3947557210922241, "learning_rate": 1.0708348299233134e-05, "loss": 0.5086, "step": 19239 }, { "epoch": 0.49, "grad_norm": 1.2396694421768188, "learning_rate": 1.0707520239199889e-05, "loss": 0.5538, "step": 19240 }, { "epoch": 0.49, "grad_norm": 1.3605239391326904, "learning_rate": 1.0706692174290856e-05, "loss": 0.6279, "step": 19241 }, { "epoch": 0.49, "grad_norm": 11.010721206665039, "learning_rate": 1.0705864104511746e-05, "loss": 0.7047, "step": 19242 }, { "epoch": 0.49, "grad_norm": 1.3788686990737915, "learning_rate": 1.0705036029868266e-05, "loss": 0.5229, "step": 19243 }, { "epoch": 0.49, "grad_norm": 1.2088499069213867, "learning_rate": 1.070420795036612e-05, "loss": 0.5046, "step": 19244 }, { "epoch": 0.49, "grad_norm": 1.4455995559692383, "learning_rate": 1.0703379866011014e-05, "loss": 0.6344, "step": 19245 }, { "epoch": 0.49, "grad_norm": 14.651344299316406, "learning_rate": 1.0702551776808653e-05, "loss": 0.7465, "step": 19246 }, { "epoch": 0.49, "grad_norm": 3.05808424949646, "learning_rate": 1.070172368276475e-05, "loss": 0.5689, "step": 19247 }, { "epoch": 0.49, "grad_norm": 0.9423986673355103, "learning_rate": 1.0700895583885009e-05, "loss": 0.5407, "step": 19248 }, { "epoch": 0.49, "grad_norm": 7.6589674949646, "learning_rate": 1.0700067480175135e-05, "loss": 0.7015, "step": 19249 }, { "epoch": 0.49, "grad_norm": 1.176133155822754, "learning_rate": 1.0699239371640838e-05, "loss": 0.5575, "step": 19250 }, { "epoch": 0.49, "grad_norm": 1.3483976125717163, "learning_rate": 1.0698411258287817e-05, "loss": 0.5802, "step": 19251 }, { "epoch": 0.49, "grad_norm": 2.8800208568573, "learning_rate": 1.069758314012179e-05, "loss": 0.6065, "step": 19252 }, { "epoch": 0.49, "grad_norm": 1.952075719833374, "learning_rate": 1.0696755017148454e-05, "loss": 0.8182, "step": 19253 }, { "epoch": 0.49, "grad_norm": 1.7518188953399658, "learning_rate": 1.0695926889373522e-05, "loss": 0.5162, "step": 19254 }, { "epoch": 0.49, "grad_norm": 1.130595326423645, "learning_rate": 1.06950987568027e-05, "loss": 0.6723, "step": 19255 }, { "epoch": 0.49, "grad_norm": 1.17551851272583, "learning_rate": 1.0694270619441693e-05, "loss": 0.5, "step": 19256 }, { "epoch": 0.49, "grad_norm": 1.4754085540771484, "learning_rate": 1.069344247729621e-05, "loss": 0.5823, "step": 19257 }, { "epoch": 0.49, "grad_norm": 1.2858608961105347, "learning_rate": 1.0692614330371955e-05, "loss": 0.5766, "step": 19258 }, { "epoch": 0.49, "grad_norm": 1.7392624616622925, "learning_rate": 1.069178617867464e-05, "loss": 0.5247, "step": 19259 }, { "epoch": 0.49, "grad_norm": 7.989686489105225, "learning_rate": 1.0690958022209968e-05, "loss": 0.5795, "step": 19260 }, { "epoch": 0.49, "grad_norm": 1.3287907838821411, "learning_rate": 1.0690129860983645e-05, "loss": 0.4912, "step": 19261 }, { "epoch": 0.49, "grad_norm": 1.585999846458435, "learning_rate": 1.0689301695001384e-05, "loss": 0.6865, "step": 19262 }, { "epoch": 0.49, "grad_norm": 1.6689517498016357, "learning_rate": 1.0688473524268887e-05, "loss": 0.5395, "step": 19263 }, { "epoch": 0.49, "grad_norm": 8.575843811035156, "learning_rate": 1.0687645348791866e-05, "loss": 0.5808, "step": 19264 }, { "epoch": 0.49, "grad_norm": 1.3133078813552856, "learning_rate": 1.068681716857602e-05, "loss": 0.5809, "step": 19265 }, { "epoch": 0.49, "grad_norm": 3.9623231887817383, "learning_rate": 1.0685988983627065e-05, "loss": 0.4739, "step": 19266 }, { "epoch": 0.49, "grad_norm": 2.6160428524017334, "learning_rate": 1.0685160793950707e-05, "loss": 0.5485, "step": 19267 }, { "epoch": 0.49, "grad_norm": 2.4690160751342773, "learning_rate": 1.0684332599552647e-05, "loss": 0.6808, "step": 19268 }, { "epoch": 0.49, "grad_norm": 4.315406322479248, "learning_rate": 1.0683504400438598e-05, "loss": 0.813, "step": 19269 }, { "epoch": 0.49, "grad_norm": 1.879336953163147, "learning_rate": 1.0682676196614265e-05, "loss": 0.6547, "step": 19270 }, { "epoch": 0.49, "grad_norm": 10.497154235839844, "learning_rate": 1.0681847988085358e-05, "loss": 0.4137, "step": 19271 }, { "epoch": 0.49, "grad_norm": 3.659972906112671, "learning_rate": 1.0681019774857583e-05, "loss": 0.6377, "step": 19272 }, { "epoch": 0.49, "grad_norm": 1.2139232158660889, "learning_rate": 1.0680191556936648e-05, "loss": 0.4981, "step": 19273 }, { "epoch": 0.49, "grad_norm": 1.125913143157959, "learning_rate": 1.0679363334328262e-05, "loss": 0.5857, "step": 19274 }, { "epoch": 0.49, "grad_norm": 7.126436710357666, "learning_rate": 1.0678535107038126e-05, "loss": 0.6799, "step": 19275 }, { "epoch": 0.49, "grad_norm": 1.4454489946365356, "learning_rate": 1.0677706875071955e-05, "loss": 0.5702, "step": 19276 }, { "epoch": 0.49, "grad_norm": 2.5478711128234863, "learning_rate": 1.0676878638435455e-05, "loss": 0.5289, "step": 19277 }, { "epoch": 0.49, "grad_norm": 1.730250597000122, "learning_rate": 1.067605039713433e-05, "loss": 0.5867, "step": 19278 }, { "epoch": 0.49, "grad_norm": 2.6834239959716797, "learning_rate": 1.0675222151174294e-05, "loss": 0.6069, "step": 19279 }, { "epoch": 0.49, "grad_norm": 1.8187214136123657, "learning_rate": 1.0674393900561047e-05, "loss": 0.4417, "step": 19280 }, { "epoch": 0.49, "grad_norm": 1.4212526082992554, "learning_rate": 1.0673565645300304e-05, "loss": 0.707, "step": 19281 }, { "epoch": 0.49, "grad_norm": 1.6046688556671143, "learning_rate": 1.0672737385397768e-05, "loss": 0.6297, "step": 19282 }, { "epoch": 0.49, "grad_norm": 2.063547134399414, "learning_rate": 1.0671909120859147e-05, "loss": 0.5383, "step": 19283 }, { "epoch": 0.49, "grad_norm": 4.35076904296875, "learning_rate": 1.0671080851690157e-05, "loss": 0.489, "step": 19284 }, { "epoch": 0.49, "grad_norm": 2.7431445121765137, "learning_rate": 1.0670252577896492e-05, "loss": 0.6756, "step": 19285 }, { "epoch": 0.49, "grad_norm": 2.05886173248291, "learning_rate": 1.0669424299483873e-05, "loss": 0.6833, "step": 19286 }, { "epoch": 0.49, "grad_norm": 1.2798731327056885, "learning_rate": 1.0668596016457996e-05, "loss": 0.5111, "step": 19287 }, { "epoch": 0.49, "grad_norm": 3.3001108169555664, "learning_rate": 1.0667767728824581e-05, "loss": 0.4962, "step": 19288 }, { "epoch": 0.49, "grad_norm": 1.5856294631958008, "learning_rate": 1.0666939436589329e-05, "loss": 0.5585, "step": 19289 }, { "epoch": 0.49, "grad_norm": 1.6855396032333374, "learning_rate": 1.0666111139757949e-05, "loss": 0.6767, "step": 19290 }, { "epoch": 0.49, "grad_norm": 1.0179774761199951, "learning_rate": 1.0665282838336148e-05, "loss": 0.4959, "step": 19291 }, { "epoch": 0.49, "grad_norm": 4.81998348236084, "learning_rate": 1.0664454532329638e-05, "loss": 0.4877, "step": 19292 }, { "epoch": 0.49, "grad_norm": 2.2253036499023438, "learning_rate": 1.0663626221744125e-05, "loss": 0.6568, "step": 19293 }, { "epoch": 0.49, "grad_norm": 8.092674255371094, "learning_rate": 1.0662797906585314e-05, "loss": 0.698, "step": 19294 }, { "epoch": 0.49, "grad_norm": 1.4605121612548828, "learning_rate": 1.0661969586858918e-05, "loss": 0.6493, "step": 19295 }, { "epoch": 0.49, "grad_norm": 1.350434422492981, "learning_rate": 1.0661141262570645e-05, "loss": 0.4954, "step": 19296 }, { "epoch": 0.49, "grad_norm": 1.2726072072982788, "learning_rate": 1.0660312933726198e-05, "loss": 0.5118, "step": 19297 }, { "epoch": 0.49, "grad_norm": 1.6316344738006592, "learning_rate": 1.0659484600331294e-05, "loss": 0.4722, "step": 19298 }, { "epoch": 0.49, "grad_norm": 1.55950927734375, "learning_rate": 1.0658656262391633e-05, "loss": 0.433, "step": 19299 }, { "epoch": 0.49, "grad_norm": 1.6248677968978882, "learning_rate": 1.0657827919912926e-05, "loss": 0.5779, "step": 19300 }, { "epoch": 0.49, "grad_norm": 1.3116258382797241, "learning_rate": 1.0656999572900885e-05, "loss": 0.5096, "step": 19301 }, { "epoch": 0.49, "grad_norm": 1.7606775760650635, "learning_rate": 1.0656171221361214e-05, "loss": 0.4625, "step": 19302 }, { "epoch": 0.49, "grad_norm": 6.564388275146484, "learning_rate": 1.0655342865299625e-05, "loss": 0.6727, "step": 19303 }, { "epoch": 0.49, "grad_norm": 7.108134746551514, "learning_rate": 1.0654514504721823e-05, "loss": 0.3884, "step": 19304 }, { "epoch": 0.49, "grad_norm": 1.9941749572753906, "learning_rate": 1.0653686139633521e-05, "loss": 0.4972, "step": 19305 }, { "epoch": 0.49, "grad_norm": 1.345983862876892, "learning_rate": 1.0652857770040424e-05, "loss": 0.5001, "step": 19306 }, { "epoch": 0.49, "grad_norm": 1.4201370477676392, "learning_rate": 1.0652029395948238e-05, "loss": 0.4664, "step": 19307 }, { "epoch": 0.49, "grad_norm": 1.448178768157959, "learning_rate": 1.0651201017362678e-05, "loss": 0.6573, "step": 19308 }, { "epoch": 0.49, "grad_norm": 7.424077033996582, "learning_rate": 1.0650372634289447e-05, "loss": 0.7697, "step": 19309 }, { "epoch": 0.49, "grad_norm": 4.93006706237793, "learning_rate": 1.064954424673426e-05, "loss": 0.6936, "step": 19310 }, { "epoch": 0.49, "grad_norm": 1.6498277187347412, "learning_rate": 1.064871585470282e-05, "loss": 0.558, "step": 19311 }, { "epoch": 0.49, "grad_norm": 1.8818373680114746, "learning_rate": 1.0647887458200838e-05, "loss": 0.5801, "step": 19312 }, { "epoch": 0.5, "grad_norm": 1.3614449501037598, "learning_rate": 1.0647059057234024e-05, "loss": 0.6724, "step": 19313 }, { "epoch": 0.5, "grad_norm": 8.48884105682373, "learning_rate": 1.0646230651808086e-05, "loss": 0.4644, "step": 19314 }, { "epoch": 0.5, "grad_norm": 2.3169608116149902, "learning_rate": 1.0645402241928729e-05, "loss": 0.6162, "step": 19315 }, { "epoch": 0.5, "grad_norm": 1.4540083408355713, "learning_rate": 1.0644573827601667e-05, "loss": 0.5403, "step": 19316 }, { "epoch": 0.5, "grad_norm": 1.0216773748397827, "learning_rate": 1.0643745408832606e-05, "loss": 0.5025, "step": 19317 }, { "epoch": 0.5, "grad_norm": 4.033738136291504, "learning_rate": 1.0642916985627258e-05, "loss": 0.6719, "step": 19318 }, { "epoch": 0.5, "grad_norm": 4.448424816131592, "learning_rate": 1.0642088557991331e-05, "loss": 0.4993, "step": 19319 }, { "epoch": 0.5, "grad_norm": 2.357929229736328, "learning_rate": 1.0641260125930531e-05, "loss": 0.5833, "step": 19320 }, { "epoch": 0.5, "grad_norm": 2.3617074489593506, "learning_rate": 1.0640431689450567e-05, "loss": 0.5679, "step": 19321 }, { "epoch": 0.5, "grad_norm": 1.638343334197998, "learning_rate": 1.0639603248557153e-05, "loss": 0.5364, "step": 19322 }, { "epoch": 0.5, "grad_norm": 4.435923099517822, "learning_rate": 1.0638774803255995e-05, "loss": 0.6771, "step": 19323 }, { "epoch": 0.5, "grad_norm": 8.049835205078125, "learning_rate": 1.0637946353552802e-05, "loss": 0.6257, "step": 19324 }, { "epoch": 0.5, "grad_norm": 2.230746030807495, "learning_rate": 1.0637117899453287e-05, "loss": 0.6472, "step": 19325 }, { "epoch": 0.5, "grad_norm": 5.92332649230957, "learning_rate": 1.063628944096315e-05, "loss": 0.6464, "step": 19326 }, { "epoch": 0.5, "grad_norm": 2.239896535873413, "learning_rate": 1.0635460978088108e-05, "loss": 0.6663, "step": 19327 }, { "epoch": 0.5, "grad_norm": 3.398909568786621, "learning_rate": 1.0634632510833869e-05, "loss": 0.5495, "step": 19328 }, { "epoch": 0.5, "grad_norm": 1.5124733448028564, "learning_rate": 1.0633804039206142e-05, "loss": 0.4505, "step": 19329 }, { "epoch": 0.5, "grad_norm": 1.433836817741394, "learning_rate": 1.0632975563210634e-05, "loss": 0.6039, "step": 19330 }, { "epoch": 0.5, "grad_norm": 1.5222424268722534, "learning_rate": 1.0632147082853058e-05, "loss": 0.5116, "step": 19331 }, { "epoch": 0.5, "grad_norm": 2.824005126953125, "learning_rate": 1.063131859813912e-05, "loss": 0.7229, "step": 19332 }, { "epoch": 0.5, "grad_norm": 4.27531623840332, "learning_rate": 1.0630490109074532e-05, "loss": 0.4321, "step": 19333 }, { "epoch": 0.5, "grad_norm": 1.1667437553405762, "learning_rate": 1.0629661615664999e-05, "loss": 0.6008, "step": 19334 }, { "epoch": 0.5, "grad_norm": 1.7443101406097412, "learning_rate": 1.0628833117916237e-05, "loss": 0.498, "step": 19335 }, { "epoch": 0.5, "grad_norm": 1.5520801544189453, "learning_rate": 1.062800461583395e-05, "loss": 0.6148, "step": 19336 }, { "epoch": 0.5, "grad_norm": 2.6968398094177246, "learning_rate": 1.0627176109423853e-05, "loss": 0.511, "step": 19337 }, { "epoch": 0.5, "grad_norm": 6.603621006011963, "learning_rate": 1.0626347598691651e-05, "loss": 0.7403, "step": 19338 }, { "epoch": 0.5, "grad_norm": 4.886739730834961, "learning_rate": 1.0625519083643052e-05, "loss": 0.4672, "step": 19339 }, { "epoch": 0.5, "grad_norm": 1.4938055276870728, "learning_rate": 1.0624690564283772e-05, "loss": 0.5411, "step": 19340 }, { "epoch": 0.5, "grad_norm": 1.2766518592834473, "learning_rate": 1.0623862040619515e-05, "loss": 0.5277, "step": 19341 }, { "epoch": 0.5, "grad_norm": 1.5096383094787598, "learning_rate": 1.0623033512655995e-05, "loss": 0.6243, "step": 19342 }, { "epoch": 0.5, "grad_norm": 2.1267688274383545, "learning_rate": 1.0622204980398917e-05, "loss": 0.5581, "step": 19343 }, { "epoch": 0.5, "grad_norm": 3.257629871368408, "learning_rate": 1.0621376443853995e-05, "loss": 0.8577, "step": 19344 }, { "epoch": 0.5, "grad_norm": 1.4277299642562866, "learning_rate": 1.0620547903026936e-05, "loss": 0.5208, "step": 19345 }, { "epoch": 0.5, "grad_norm": 1.154600977897644, "learning_rate": 1.061971935792345e-05, "loss": 0.549, "step": 19346 }, { "epoch": 0.5, "grad_norm": 1.8683120012283325, "learning_rate": 1.0618890808549249e-05, "loss": 0.5599, "step": 19347 }, { "epoch": 0.5, "grad_norm": 1.2324271202087402, "learning_rate": 1.0618062254910043e-05, "loss": 0.471, "step": 19348 }, { "epoch": 0.5, "grad_norm": 1.6460729837417603, "learning_rate": 1.0617233697011538e-05, "loss": 0.4026, "step": 19349 }, { "epoch": 0.5, "grad_norm": 1.1802791357040405, "learning_rate": 1.0616405134859444e-05, "loss": 0.5472, "step": 19350 }, { "epoch": 0.5, "grad_norm": 1.970755934715271, "learning_rate": 1.0615576568459476e-05, "loss": 0.5081, "step": 19351 }, { "epoch": 0.5, "grad_norm": 1.3460084199905396, "learning_rate": 1.061474799781734e-05, "loss": 0.43, "step": 19352 }, { "epoch": 0.5, "grad_norm": 3.073889970779419, "learning_rate": 1.0613919422938748e-05, "loss": 0.5961, "step": 19353 }, { "epoch": 0.5, "grad_norm": 11.321761131286621, "learning_rate": 1.061309084382941e-05, "loss": 0.5191, "step": 19354 }, { "epoch": 0.5, "grad_norm": 3.128480911254883, "learning_rate": 1.0612262260495031e-05, "loss": 0.5758, "step": 19355 }, { "epoch": 0.5, "grad_norm": 3.004404067993164, "learning_rate": 1.0611433672941327e-05, "loss": 0.5373, "step": 19356 }, { "epoch": 0.5, "grad_norm": 10.51987361907959, "learning_rate": 1.0610605081174008e-05, "loss": 0.4819, "step": 19357 }, { "epoch": 0.5, "grad_norm": 1.305375099182129, "learning_rate": 1.0609776485198782e-05, "loss": 0.5303, "step": 19358 }, { "epoch": 0.5, "grad_norm": 1.590990662574768, "learning_rate": 1.060894788502136e-05, "loss": 0.5347, "step": 19359 }, { "epoch": 0.5, "grad_norm": 4.976981163024902, "learning_rate": 1.0608119280647448e-05, "loss": 0.6145, "step": 19360 }, { "epoch": 0.5, "grad_norm": 1.161802053451538, "learning_rate": 1.0607290672082765e-05, "loss": 0.5279, "step": 19361 }, { "epoch": 0.5, "grad_norm": 1.355643630027771, "learning_rate": 1.0606462059333013e-05, "loss": 0.6963, "step": 19362 }, { "epoch": 0.5, "grad_norm": 3.434065580368042, "learning_rate": 1.0605633442403905e-05, "loss": 0.6385, "step": 19363 }, { "epoch": 0.5, "grad_norm": 1.4509201049804688, "learning_rate": 1.0604804821301155e-05, "loss": 0.4658, "step": 19364 }, { "epoch": 0.5, "grad_norm": 2.3328135013580322, "learning_rate": 1.0603976196030468e-05, "loss": 0.5423, "step": 19365 }, { "epoch": 0.5, "grad_norm": 1.729595422744751, "learning_rate": 1.0603147566597557e-05, "loss": 0.5705, "step": 19366 }, { "epoch": 0.5, "grad_norm": 1.5442456007003784, "learning_rate": 1.060231893300813e-05, "loss": 0.4615, "step": 19367 }, { "epoch": 0.5, "grad_norm": 2.993117332458496, "learning_rate": 1.06014902952679e-05, "loss": 0.6209, "step": 19368 }, { "epoch": 0.5, "grad_norm": 1.9370195865631104, "learning_rate": 1.060066165338258e-05, "loss": 0.6605, "step": 19369 }, { "epoch": 0.5, "grad_norm": 1.2418663501739502, "learning_rate": 1.0599833007357874e-05, "loss": 0.6902, "step": 19370 }, { "epoch": 0.5, "grad_norm": 1.6500099897384644, "learning_rate": 1.0599004357199498e-05, "loss": 0.6092, "step": 19371 }, { "epoch": 0.5, "grad_norm": 1.081527829170227, "learning_rate": 1.0598175702913159e-05, "loss": 0.4823, "step": 19372 }, { "epoch": 0.5, "grad_norm": 1.4418179988861084, "learning_rate": 1.059734704450457e-05, "loss": 0.4246, "step": 19373 }, { "epoch": 0.5, "grad_norm": 1.6093437671661377, "learning_rate": 1.059651838197944e-05, "loss": 0.684, "step": 19374 }, { "epoch": 0.5, "grad_norm": 1.4138689041137695, "learning_rate": 1.0595689715343477e-05, "loss": 0.6387, "step": 19375 }, { "epoch": 0.5, "grad_norm": 2.347764015197754, "learning_rate": 1.0594861044602398e-05, "loss": 0.535, "step": 19376 }, { "epoch": 0.5, "grad_norm": 1.3354547023773193, "learning_rate": 1.059403236976191e-05, "loss": 0.4949, "step": 19377 }, { "epoch": 0.5, "grad_norm": 1.6072802543640137, "learning_rate": 1.0593203690827724e-05, "loss": 0.5643, "step": 19378 }, { "epoch": 0.5, "grad_norm": 1.6728934049606323, "learning_rate": 1.0592375007805551e-05, "loss": 0.553, "step": 19379 }, { "epoch": 0.5, "grad_norm": 8.219711303710938, "learning_rate": 1.05915463207011e-05, "loss": 0.4954, "step": 19380 }, { "epoch": 0.5, "grad_norm": 5.08738899230957, "learning_rate": 1.0590717629520087e-05, "loss": 0.6644, "step": 19381 }, { "epoch": 0.5, "grad_norm": 1.8725154399871826, "learning_rate": 1.0589888934268215e-05, "loss": 0.6074, "step": 19382 }, { "epoch": 0.5, "grad_norm": 4.25814962387085, "learning_rate": 1.05890602349512e-05, "loss": 0.4439, "step": 19383 }, { "epoch": 0.5, "grad_norm": 0.8829296231269836, "learning_rate": 1.0588231531574753e-05, "loss": 0.5439, "step": 19384 }, { "epoch": 0.5, "grad_norm": 2.622049331665039, "learning_rate": 1.0587402824144586e-05, "loss": 0.4657, "step": 19385 }, { "epoch": 0.5, "grad_norm": 5.044341564178467, "learning_rate": 1.0586574112666404e-05, "loss": 0.6248, "step": 19386 }, { "epoch": 0.5, "grad_norm": 1.2126801013946533, "learning_rate": 1.0585745397145923e-05, "loss": 0.4608, "step": 19387 }, { "epoch": 0.5, "grad_norm": 1.7766387462615967, "learning_rate": 1.0584916677588854e-05, "loss": 0.7015, "step": 19388 }, { "epoch": 0.5, "grad_norm": 1.6694167852401733, "learning_rate": 1.0584087954000905e-05, "loss": 0.4412, "step": 19389 }, { "epoch": 0.5, "grad_norm": 2.145120620727539, "learning_rate": 1.058325922638779e-05, "loss": 0.4717, "step": 19390 }, { "epoch": 0.5, "grad_norm": 1.9917597770690918, "learning_rate": 1.0582430494755218e-05, "loss": 0.577, "step": 19391 }, { "epoch": 0.5, "grad_norm": 3.0287563800811768, "learning_rate": 1.0581601759108901e-05, "loss": 0.4649, "step": 19392 }, { "epoch": 0.5, "grad_norm": 1.2769900560379028, "learning_rate": 1.0580773019454551e-05, "loss": 0.6268, "step": 19393 }, { "epoch": 0.5, "grad_norm": 1.5753360986709595, "learning_rate": 1.0579944275797877e-05, "loss": 0.6955, "step": 19394 }, { "epoch": 0.5, "grad_norm": 10.790263175964355, "learning_rate": 1.0579115528144592e-05, "loss": 0.5271, "step": 19395 }, { "epoch": 0.5, "grad_norm": 1.396219253540039, "learning_rate": 1.0578286776500405e-05, "loss": 0.453, "step": 19396 }, { "epoch": 0.5, "grad_norm": 6.4465651512146, "learning_rate": 1.0577458020871032e-05, "loss": 0.5316, "step": 19397 }, { "epoch": 0.5, "grad_norm": 1.5505599975585938, "learning_rate": 1.0576629261262182e-05, "loss": 0.5216, "step": 19398 }, { "epoch": 0.5, "grad_norm": 1.3408119678497314, "learning_rate": 1.0575800497679561e-05, "loss": 0.6501, "step": 19399 }, { "epoch": 0.5, "grad_norm": 1.3884291648864746, "learning_rate": 1.0574971730128887e-05, "loss": 0.6023, "step": 19400 }, { "epoch": 0.5, "grad_norm": 5.116919040679932, "learning_rate": 1.057414295861587e-05, "loss": 0.6695, "step": 19401 }, { "epoch": 0.5, "grad_norm": 1.9760226011276245, "learning_rate": 1.0573314183146219e-05, "loss": 0.6857, "step": 19402 }, { "epoch": 0.5, "grad_norm": 1.3462520837783813, "learning_rate": 1.0572485403725645e-05, "loss": 0.55, "step": 19403 }, { "epoch": 0.5, "grad_norm": 3.7824573516845703, "learning_rate": 1.0571656620359863e-05, "loss": 0.4783, "step": 19404 }, { "epoch": 0.5, "grad_norm": 1.2755110263824463, "learning_rate": 1.0570827833054584e-05, "loss": 0.4996, "step": 19405 }, { "epoch": 0.5, "grad_norm": 2.417684316635132, "learning_rate": 1.0569999041815517e-05, "loss": 0.6298, "step": 19406 }, { "epoch": 0.5, "grad_norm": 3.399846315383911, "learning_rate": 1.0569170246648378e-05, "loss": 0.6256, "step": 19407 }, { "epoch": 0.5, "grad_norm": 1.9962247610092163, "learning_rate": 1.056834144755887e-05, "loss": 0.6571, "step": 19408 }, { "epoch": 0.5, "grad_norm": 1.0629349946975708, "learning_rate": 1.056751264455271e-05, "loss": 0.5457, "step": 19409 }, { "epoch": 0.5, "grad_norm": 1.305538296699524, "learning_rate": 1.0566683837635613e-05, "loss": 0.4851, "step": 19410 }, { "epoch": 0.5, "grad_norm": 2.1217222213745117, "learning_rate": 1.0565855026813286e-05, "loss": 0.4841, "step": 19411 }, { "epoch": 0.5, "grad_norm": 4.440552234649658, "learning_rate": 1.0565026212091443e-05, "loss": 0.5809, "step": 19412 }, { "epoch": 0.5, "grad_norm": 1.8799067735671997, "learning_rate": 1.0564197393475791e-05, "loss": 0.6586, "step": 19413 }, { "epoch": 0.5, "grad_norm": 1.428390622138977, "learning_rate": 1.0563368570972049e-05, "loss": 0.6311, "step": 19414 }, { "epoch": 0.5, "grad_norm": 1.2521989345550537, "learning_rate": 1.0562539744585921e-05, "loss": 0.6553, "step": 19415 }, { "epoch": 0.5, "grad_norm": 2.0336575508117676, "learning_rate": 1.0561710914323121e-05, "loss": 0.6548, "step": 19416 }, { "epoch": 0.5, "grad_norm": 3.186979293823242, "learning_rate": 1.0560882080189368e-05, "loss": 0.675, "step": 19417 }, { "epoch": 0.5, "grad_norm": 1.8126038312911987, "learning_rate": 1.0560053242190363e-05, "loss": 0.4531, "step": 19418 }, { "epoch": 0.5, "grad_norm": 1.244179129600525, "learning_rate": 1.0559224400331826e-05, "loss": 0.6608, "step": 19419 }, { "epoch": 0.5, "grad_norm": 1.731042742729187, "learning_rate": 1.0558395554619464e-05, "loss": 0.5132, "step": 19420 }, { "epoch": 0.5, "grad_norm": 3.4233338832855225, "learning_rate": 1.0557566705058991e-05, "loss": 0.9361, "step": 19421 }, { "epoch": 0.5, "grad_norm": 1.292930006980896, "learning_rate": 1.055673785165612e-05, "loss": 0.6149, "step": 19422 }, { "epoch": 0.5, "grad_norm": 1.4481128454208374, "learning_rate": 1.055590899441656e-05, "loss": 0.6433, "step": 19423 }, { "epoch": 0.5, "grad_norm": 2.7129955291748047, "learning_rate": 1.0555080133346025e-05, "loss": 0.6515, "step": 19424 }, { "epoch": 0.5, "grad_norm": 1.6670820713043213, "learning_rate": 1.0554251268450226e-05, "loss": 0.6033, "step": 19425 }, { "epoch": 0.5, "grad_norm": 2.365732192993164, "learning_rate": 1.0553422399734878e-05, "loss": 0.6257, "step": 19426 }, { "epoch": 0.5, "grad_norm": 3.085658311843872, "learning_rate": 1.0552593527205687e-05, "loss": 0.7354, "step": 19427 }, { "epoch": 0.5, "grad_norm": 1.897318720817566, "learning_rate": 1.055176465086837e-05, "loss": 0.5347, "step": 19428 }, { "epoch": 0.5, "grad_norm": 1.3085976839065552, "learning_rate": 1.0550935770728641e-05, "loss": 0.4586, "step": 19429 }, { "epoch": 0.5, "grad_norm": 1.1834566593170166, "learning_rate": 1.0550106886792203e-05, "loss": 0.5182, "step": 19430 }, { "epoch": 0.5, "grad_norm": 1.717597246170044, "learning_rate": 1.0549277999064775e-05, "loss": 0.3383, "step": 19431 }, { "epoch": 0.5, "grad_norm": 2.1159827709198, "learning_rate": 1.0548449107552072e-05, "loss": 0.5917, "step": 19432 }, { "epoch": 0.5, "grad_norm": 4.035801410675049, "learning_rate": 1.0547620212259799e-05, "loss": 0.8682, "step": 19433 }, { "epoch": 0.5, "grad_norm": 1.2662874460220337, "learning_rate": 1.0546791313193674e-05, "loss": 0.6329, "step": 19434 }, { "epoch": 0.5, "grad_norm": 2.4006171226501465, "learning_rate": 1.0545962410359406e-05, "loss": 0.6459, "step": 19435 }, { "epoch": 0.5, "grad_norm": 1.6777387857437134, "learning_rate": 1.0545133503762703e-05, "loss": 0.6122, "step": 19436 }, { "epoch": 0.5, "grad_norm": 1.1151070594787598, "learning_rate": 1.054430459340929e-05, "loss": 0.5201, "step": 19437 }, { "epoch": 0.5, "grad_norm": 1.2794651985168457, "learning_rate": 1.0543475679304867e-05, "loss": 0.5263, "step": 19438 }, { "epoch": 0.5, "grad_norm": 6.56917142868042, "learning_rate": 1.0542646761455152e-05, "loss": 0.4621, "step": 19439 }, { "epoch": 0.5, "grad_norm": 1.5683037042617798, "learning_rate": 1.0541817839865857e-05, "loss": 0.5313, "step": 19440 }, { "epoch": 0.5, "grad_norm": 2.319486379623413, "learning_rate": 1.0540988914542696e-05, "loss": 0.6303, "step": 19441 }, { "epoch": 0.5, "grad_norm": 2.5006327629089355, "learning_rate": 1.0540159985491376e-05, "loss": 0.6555, "step": 19442 }, { "epoch": 0.5, "grad_norm": 1.1179319620132446, "learning_rate": 1.0539331052717612e-05, "loss": 0.582, "step": 19443 }, { "epoch": 0.5, "grad_norm": 1.6458985805511475, "learning_rate": 1.0538502116227121e-05, "loss": 0.445, "step": 19444 }, { "epoch": 0.5, "grad_norm": 1.0684919357299805, "learning_rate": 1.0537673176025608e-05, "loss": 0.4811, "step": 19445 }, { "epoch": 0.5, "grad_norm": 5.803920269012451, "learning_rate": 1.0536844232118792e-05, "loss": 0.8153, "step": 19446 }, { "epoch": 0.5, "grad_norm": 2.5980899333953857, "learning_rate": 1.0536015284512382e-05, "loss": 0.5583, "step": 19447 }, { "epoch": 0.5, "grad_norm": 6.9280571937561035, "learning_rate": 1.0535186333212091e-05, "loss": 0.6433, "step": 19448 }, { "epoch": 0.5, "grad_norm": 4.734993934631348, "learning_rate": 1.0534357378223632e-05, "loss": 0.6147, "step": 19449 }, { "epoch": 0.5, "grad_norm": 2.923602819442749, "learning_rate": 1.0533528419552719e-05, "loss": 0.6496, "step": 19450 }, { "epoch": 0.5, "grad_norm": 2.179603099822998, "learning_rate": 1.0532699457205062e-05, "loss": 0.5413, "step": 19451 }, { "epoch": 0.5, "grad_norm": 4.55269193649292, "learning_rate": 1.0531870491186376e-05, "loss": 0.5382, "step": 19452 }, { "epoch": 0.5, "grad_norm": 1.2998296022415161, "learning_rate": 1.0531041521502372e-05, "loss": 0.5432, "step": 19453 }, { "epoch": 0.5, "grad_norm": 4.966288089752197, "learning_rate": 1.0530212548158762e-05, "loss": 0.4323, "step": 19454 }, { "epoch": 0.5, "grad_norm": 3.4808037281036377, "learning_rate": 1.052938357116126e-05, "loss": 0.5414, "step": 19455 }, { "epoch": 0.5, "grad_norm": 4.650114059448242, "learning_rate": 1.0528554590515582e-05, "loss": 0.6838, "step": 19456 }, { "epoch": 0.5, "grad_norm": 2.5501649379730225, "learning_rate": 1.0527725606227438e-05, "loss": 0.5, "step": 19457 }, { "epoch": 0.5, "grad_norm": 0.864163339138031, "learning_rate": 1.052689661830254e-05, "loss": 0.4621, "step": 19458 }, { "epoch": 0.5, "grad_norm": 2.992358446121216, "learning_rate": 1.0526067626746599e-05, "loss": 0.5637, "step": 19459 }, { "epoch": 0.5, "grad_norm": 1.8742411136627197, "learning_rate": 1.052523863156533e-05, "loss": 0.5237, "step": 19460 }, { "epoch": 0.5, "grad_norm": 5.713834285736084, "learning_rate": 1.0524409632764452e-05, "loss": 0.7282, "step": 19461 }, { "epoch": 0.5, "grad_norm": 1.103487253189087, "learning_rate": 1.0523580630349669e-05, "loss": 0.5602, "step": 19462 }, { "epoch": 0.5, "grad_norm": 1.7809937000274658, "learning_rate": 1.0522751624326697e-05, "loss": 0.6507, "step": 19463 }, { "epoch": 0.5, "grad_norm": 1.7632310390472412, "learning_rate": 1.0521922614701248e-05, "loss": 0.6359, "step": 19464 }, { "epoch": 0.5, "grad_norm": 1.4629305601119995, "learning_rate": 1.0521093601479037e-05, "loss": 0.5714, "step": 19465 }, { "epoch": 0.5, "grad_norm": 3.2083518505096436, "learning_rate": 1.0520264584665777e-05, "loss": 0.6196, "step": 19466 }, { "epoch": 0.5, "grad_norm": 1.4961546659469604, "learning_rate": 1.051943556426718e-05, "loss": 0.6671, "step": 19467 }, { "epoch": 0.5, "grad_norm": 1.334776759147644, "learning_rate": 1.051860654028896e-05, "loss": 0.5086, "step": 19468 }, { "epoch": 0.5, "grad_norm": 1.6231392621994019, "learning_rate": 1.0517777512736829e-05, "loss": 0.6598, "step": 19469 }, { "epoch": 0.5, "grad_norm": 6.043583393096924, "learning_rate": 1.0516948481616502e-05, "loss": 0.5433, "step": 19470 }, { "epoch": 0.5, "grad_norm": 1.0848063230514526, "learning_rate": 1.0516119446933688e-05, "loss": 0.6227, "step": 19471 }, { "epoch": 0.5, "grad_norm": 1.333718180656433, "learning_rate": 1.0515290408694105e-05, "loss": 0.4243, "step": 19472 }, { "epoch": 0.5, "grad_norm": 1.332148551940918, "learning_rate": 1.0514461366903465e-05, "loss": 0.5558, "step": 19473 }, { "epoch": 0.5, "grad_norm": 2.140063524246216, "learning_rate": 1.0513632321567478e-05, "loss": 0.7361, "step": 19474 }, { "epoch": 0.5, "grad_norm": 1.9264650344848633, "learning_rate": 1.0512803272691863e-05, "loss": 0.6474, "step": 19475 }, { "epoch": 0.5, "grad_norm": 1.4271647930145264, "learning_rate": 1.0511974220282327e-05, "loss": 0.6754, "step": 19476 }, { "epoch": 0.5, "grad_norm": 1.9461034536361694, "learning_rate": 1.0511145164344587e-05, "loss": 0.5744, "step": 19477 }, { "epoch": 0.5, "grad_norm": 1.4750949144363403, "learning_rate": 1.0510316104884357e-05, "loss": 0.5378, "step": 19478 }, { "epoch": 0.5, "grad_norm": 0.9878882765769958, "learning_rate": 1.0509487041907347e-05, "loss": 0.421, "step": 19479 }, { "epoch": 0.5, "grad_norm": 2.13800048828125, "learning_rate": 1.0508657975419275e-05, "loss": 0.6606, "step": 19480 }, { "epoch": 0.5, "grad_norm": 1.5372920036315918, "learning_rate": 1.050782890542585e-05, "loss": 0.4967, "step": 19481 }, { "epoch": 0.5, "grad_norm": 1.4133251905441284, "learning_rate": 1.050699983193279e-05, "loss": 0.5789, "step": 19482 }, { "epoch": 0.5, "grad_norm": 1.2201162576675415, "learning_rate": 1.0506170754945801e-05, "loss": 0.3631, "step": 19483 }, { "epoch": 0.5, "grad_norm": 2.8725099563598633, "learning_rate": 1.0505341674470602e-05, "loss": 0.6807, "step": 19484 }, { "epoch": 0.5, "grad_norm": 2.044011116027832, "learning_rate": 1.0504512590512909e-05, "loss": 0.5305, "step": 19485 }, { "epoch": 0.5, "grad_norm": 1.3138362169265747, "learning_rate": 1.050368350307843e-05, "loss": 0.4791, "step": 19486 }, { "epoch": 0.5, "grad_norm": 1.6947447061538696, "learning_rate": 1.0502854412172881e-05, "loss": 0.5682, "step": 19487 }, { "epoch": 0.5, "grad_norm": 2.5289995670318604, "learning_rate": 1.0502025317801973e-05, "loss": 0.6092, "step": 19488 }, { "epoch": 0.5, "grad_norm": 10.680920600891113, "learning_rate": 1.0501196219971425e-05, "loss": 0.6342, "step": 19489 }, { "epoch": 0.5, "grad_norm": 1.1868408918380737, "learning_rate": 1.0500367118686948e-05, "loss": 0.3071, "step": 19490 }, { "epoch": 0.5, "grad_norm": 3.1213507652282715, "learning_rate": 1.0499538013954254e-05, "loss": 0.6851, "step": 19491 }, { "epoch": 0.5, "grad_norm": 2.0595364570617676, "learning_rate": 1.0498708905779058e-05, "loss": 0.7204, "step": 19492 }, { "epoch": 0.5, "grad_norm": 1.1540863513946533, "learning_rate": 1.0497879794167072e-05, "loss": 0.6001, "step": 19493 }, { "epoch": 0.5, "grad_norm": 1.6560419797897339, "learning_rate": 1.0497050679124015e-05, "loss": 0.67, "step": 19494 }, { "epoch": 0.5, "grad_norm": 3.6651201248168945, "learning_rate": 1.0496221560655592e-05, "loss": 0.5642, "step": 19495 }, { "epoch": 0.5, "grad_norm": 2.1308107376098633, "learning_rate": 1.0495392438767525e-05, "loss": 0.5899, "step": 19496 }, { "epoch": 0.5, "grad_norm": 3.8164925575256348, "learning_rate": 1.0494563313465525e-05, "loss": 0.6219, "step": 19497 }, { "epoch": 0.5, "grad_norm": 3.0151143074035645, "learning_rate": 1.0493734184755305e-05, "loss": 0.6284, "step": 19498 }, { "epoch": 0.5, "grad_norm": 1.3179078102111816, "learning_rate": 1.0492905052642579e-05, "loss": 0.6876, "step": 19499 }, { "epoch": 0.5, "grad_norm": 2.8066282272338867, "learning_rate": 1.049207591713306e-05, "loss": 0.4868, "step": 19500 }, { "epoch": 0.5, "grad_norm": 2.1784989833831787, "learning_rate": 1.0491246778232462e-05, "loss": 0.5984, "step": 19501 }, { "epoch": 0.5, "grad_norm": 2.165848970413208, "learning_rate": 1.0490417635946501e-05, "loss": 0.7001, "step": 19502 }, { "epoch": 0.5, "grad_norm": 1.4872627258300781, "learning_rate": 1.048958849028089e-05, "loss": 0.6135, "step": 19503 }, { "epoch": 0.5, "grad_norm": 1.6541179418563843, "learning_rate": 1.0488759341241344e-05, "loss": 0.5029, "step": 19504 }, { "epoch": 0.5, "grad_norm": 5.940282821655273, "learning_rate": 1.0487930188833575e-05, "loss": 0.7121, "step": 19505 }, { "epoch": 0.5, "grad_norm": 1.4774774312973022, "learning_rate": 1.0487101033063299e-05, "loss": 0.5804, "step": 19506 }, { "epoch": 0.5, "grad_norm": 1.5601714849472046, "learning_rate": 1.0486271873936225e-05, "loss": 0.4486, "step": 19507 }, { "epoch": 0.5, "grad_norm": 1.2082099914550781, "learning_rate": 1.0485442711458071e-05, "loss": 0.4624, "step": 19508 }, { "epoch": 0.5, "grad_norm": 1.2583640813827515, "learning_rate": 1.0484613545634554e-05, "loss": 0.5308, "step": 19509 }, { "epoch": 0.5, "grad_norm": 1.06446373462677, "learning_rate": 1.0483784376471382e-05, "loss": 0.5029, "step": 19510 }, { "epoch": 0.5, "grad_norm": 2.849331855773926, "learning_rate": 1.0482955203974275e-05, "loss": 0.5193, "step": 19511 }, { "epoch": 0.5, "grad_norm": 2.8944056034088135, "learning_rate": 1.0482126028148943e-05, "loss": 0.7704, "step": 19512 }, { "epoch": 0.5, "grad_norm": 1.9854849576950073, "learning_rate": 1.04812968490011e-05, "loss": 0.3663, "step": 19513 }, { "epoch": 0.5, "grad_norm": 1.5887812376022339, "learning_rate": 1.0480467666536462e-05, "loss": 0.5472, "step": 19514 }, { "epoch": 0.5, "grad_norm": 1.4314017295837402, "learning_rate": 1.0479638480760742e-05, "loss": 0.5136, "step": 19515 }, { "epoch": 0.5, "grad_norm": 1.6671570539474487, "learning_rate": 1.0478809291679656e-05, "loss": 0.5018, "step": 19516 }, { "epoch": 0.5, "grad_norm": 1.4156848192214966, "learning_rate": 1.0477980099298916e-05, "loss": 0.5369, "step": 19517 }, { "epoch": 0.5, "grad_norm": 1.6020677089691162, "learning_rate": 1.047715090362424e-05, "loss": 0.3979, "step": 19518 }, { "epoch": 0.5, "grad_norm": 1.2767237424850464, "learning_rate": 1.0476321704661338e-05, "loss": 0.5469, "step": 19519 }, { "epoch": 0.5, "grad_norm": 1.9482958316802979, "learning_rate": 1.0475492502415927e-05, "loss": 0.573, "step": 19520 }, { "epoch": 0.5, "grad_norm": 2.7476887702941895, "learning_rate": 1.0474663296893722e-05, "loss": 0.673, "step": 19521 }, { "epoch": 0.5, "grad_norm": 3.1377084255218506, "learning_rate": 1.0473834088100431e-05, "loss": 0.4943, "step": 19522 }, { "epoch": 0.5, "grad_norm": 1.4844303131103516, "learning_rate": 1.0473004876041776e-05, "loss": 0.4263, "step": 19523 }, { "epoch": 0.5, "grad_norm": 6.750805854797363, "learning_rate": 1.0472175660723469e-05, "loss": 0.4451, "step": 19524 }, { "epoch": 0.5, "grad_norm": 2.066286087036133, "learning_rate": 1.0471346442151222e-05, "loss": 0.6388, "step": 19525 }, { "epoch": 0.5, "grad_norm": 2.5560853481292725, "learning_rate": 1.0470517220330753e-05, "loss": 0.6173, "step": 19526 }, { "epoch": 0.5, "grad_norm": 2.17610239982605, "learning_rate": 1.0469687995267774e-05, "loss": 0.6248, "step": 19527 }, { "epoch": 0.5, "grad_norm": 1.4471782445907593, "learning_rate": 1.0468858766968002e-05, "loss": 0.5508, "step": 19528 }, { "epoch": 0.5, "grad_norm": 3.710747241973877, "learning_rate": 1.0468029535437149e-05, "loss": 0.6749, "step": 19529 }, { "epoch": 0.5, "grad_norm": 1.7186200618743896, "learning_rate": 1.0467200300680932e-05, "loss": 0.648, "step": 19530 }, { "epoch": 0.5, "grad_norm": 0.8518493175506592, "learning_rate": 1.046637106270506e-05, "loss": 0.5297, "step": 19531 }, { "epoch": 0.5, "grad_norm": 1.3044569492340088, "learning_rate": 1.0465541821515255e-05, "loss": 0.6053, "step": 19532 }, { "epoch": 0.5, "grad_norm": 1.3714678287506104, "learning_rate": 1.0464712577117228e-05, "loss": 0.5703, "step": 19533 }, { "epoch": 0.5, "grad_norm": 2.1236884593963623, "learning_rate": 1.0463883329516693e-05, "loss": 0.5866, "step": 19534 }, { "epoch": 0.5, "grad_norm": 1.6648545265197754, "learning_rate": 1.0463054078719368e-05, "loss": 0.3952, "step": 19535 }, { "epoch": 0.5, "grad_norm": 1.0564699172973633, "learning_rate": 1.0462224824730961e-05, "loss": 0.5023, "step": 19536 }, { "epoch": 0.5, "grad_norm": 1.8856984376907349, "learning_rate": 1.0461395567557194e-05, "loss": 0.5862, "step": 19537 }, { "epoch": 0.5, "grad_norm": 2.1449577808380127, "learning_rate": 1.0460566307203776e-05, "loss": 0.5435, "step": 19538 }, { "epoch": 0.5, "grad_norm": 6.393767833709717, "learning_rate": 1.0459737043676426e-05, "loss": 0.7781, "step": 19539 }, { "epoch": 0.5, "grad_norm": 1.4155702590942383, "learning_rate": 1.045890777698086e-05, "loss": 0.5134, "step": 19540 }, { "epoch": 0.5, "grad_norm": 3.376211404800415, "learning_rate": 1.0458078507122785e-05, "loss": 0.709, "step": 19541 }, { "epoch": 0.5, "grad_norm": 3.0326340198516846, "learning_rate": 1.0457249234107922e-05, "loss": 0.7751, "step": 19542 }, { "epoch": 0.5, "grad_norm": 2.5787229537963867, "learning_rate": 1.0456419957941985e-05, "loss": 0.6727, "step": 19543 }, { "epoch": 0.5, "grad_norm": 2.3712046146392822, "learning_rate": 1.045559067863069e-05, "loss": 0.8572, "step": 19544 }, { "epoch": 0.5, "grad_norm": 1.4673807621002197, "learning_rate": 1.045476139617975e-05, "loss": 0.5363, "step": 19545 }, { "epoch": 0.5, "grad_norm": 1.0130858421325684, "learning_rate": 1.0453932110594877e-05, "loss": 0.4936, "step": 19546 }, { "epoch": 0.5, "grad_norm": 1.5905770063400269, "learning_rate": 1.0453102821881793e-05, "loss": 0.4109, "step": 19547 }, { "epoch": 0.5, "grad_norm": 1.7570297718048096, "learning_rate": 1.045227353004621e-05, "loss": 0.561, "step": 19548 }, { "epoch": 0.5, "grad_norm": 3.1120963096618652, "learning_rate": 1.0451444235093837e-05, "loss": 0.6735, "step": 19549 }, { "epoch": 0.5, "grad_norm": 3.112205743789673, "learning_rate": 1.0450614937030399e-05, "loss": 0.5752, "step": 19550 }, { "epoch": 0.5, "grad_norm": 1.6580034494400024, "learning_rate": 1.04497856358616e-05, "loss": 0.5755, "step": 19551 }, { "epoch": 0.5, "grad_norm": 1.5394471883773804, "learning_rate": 1.0448956331593166e-05, "loss": 0.4512, "step": 19552 }, { "epoch": 0.5, "grad_norm": 1.7075698375701904, "learning_rate": 1.0448127024230805e-05, "loss": 0.3833, "step": 19553 }, { "epoch": 0.5, "grad_norm": 1.7307764291763306, "learning_rate": 1.0447297713780234e-05, "loss": 0.627, "step": 19554 }, { "epoch": 0.5, "grad_norm": 1.623255968093872, "learning_rate": 1.0446468400247169e-05, "loss": 0.5822, "step": 19555 }, { "epoch": 0.5, "grad_norm": 6.640224456787109, "learning_rate": 1.0445639083637323e-05, "loss": 0.5353, "step": 19556 }, { "epoch": 0.5, "grad_norm": 1.7038698196411133, "learning_rate": 1.0444809763956411e-05, "loss": 0.5778, "step": 19557 }, { "epoch": 0.5, "grad_norm": 3.0134754180908203, "learning_rate": 1.0443980441210154e-05, "loss": 0.5453, "step": 19558 }, { "epoch": 0.5, "grad_norm": 3.456806182861328, "learning_rate": 1.044315111540426e-05, "loss": 0.5371, "step": 19559 }, { "epoch": 0.5, "grad_norm": 2.722020149230957, "learning_rate": 1.0442321786544447e-05, "loss": 0.6665, "step": 19560 }, { "epoch": 0.5, "grad_norm": 1.4816030263900757, "learning_rate": 1.044149245463643e-05, "loss": 0.5094, "step": 19561 }, { "epoch": 0.5, "grad_norm": 1.807647705078125, "learning_rate": 1.0440663119685925e-05, "loss": 0.6293, "step": 19562 }, { "epoch": 0.5, "grad_norm": 1.1957675218582153, "learning_rate": 1.0439833781698648e-05, "loss": 0.4046, "step": 19563 }, { "epoch": 0.5, "grad_norm": 1.1882356405258179, "learning_rate": 1.043900444068031e-05, "loss": 0.4805, "step": 19564 }, { "epoch": 0.5, "grad_norm": 1.6068445444107056, "learning_rate": 1.0438175096636631e-05, "loss": 0.5192, "step": 19565 }, { "epoch": 0.5, "grad_norm": 1.3927011489868164, "learning_rate": 1.0437345749573325e-05, "loss": 0.4697, "step": 19566 }, { "epoch": 0.5, "grad_norm": 1.6234906911849976, "learning_rate": 1.0436516399496106e-05, "loss": 0.6824, "step": 19567 }, { "epoch": 0.5, "grad_norm": 1.37702476978302, "learning_rate": 1.043568704641069e-05, "loss": 0.5355, "step": 19568 }, { "epoch": 0.5, "grad_norm": 4.125162124633789, "learning_rate": 1.043485769032279e-05, "loss": 0.5993, "step": 19569 }, { "epoch": 0.5, "grad_norm": 1.3368186950683594, "learning_rate": 1.0434028331238128e-05, "loss": 0.4842, "step": 19570 }, { "epoch": 0.5, "grad_norm": 2.0930919647216797, "learning_rate": 1.0433198969162415e-05, "loss": 0.6297, "step": 19571 }, { "epoch": 0.5, "grad_norm": 1.3836150169372559, "learning_rate": 1.0432369604101368e-05, "loss": 0.4358, "step": 19572 }, { "epoch": 0.5, "grad_norm": 0.8951970338821411, "learning_rate": 1.0431540236060697e-05, "loss": 0.5987, "step": 19573 }, { "epoch": 0.5, "grad_norm": 2.8780739307403564, "learning_rate": 1.0430710865046125e-05, "loss": 0.5823, "step": 19574 }, { "epoch": 0.5, "grad_norm": 5.955351829528809, "learning_rate": 1.0429881491063363e-05, "loss": 0.652, "step": 19575 }, { "epoch": 0.5, "grad_norm": 1.394119143486023, "learning_rate": 1.0429052114118128e-05, "loss": 0.4723, "step": 19576 }, { "epoch": 0.5, "grad_norm": 2.6246542930603027, "learning_rate": 1.0428222734216137e-05, "loss": 0.4991, "step": 19577 }, { "epoch": 0.5, "grad_norm": 3.6362528800964355, "learning_rate": 1.0427393351363102e-05, "loss": 0.6839, "step": 19578 }, { "epoch": 0.5, "grad_norm": 1.541799783706665, "learning_rate": 1.0426563965564742e-05, "loss": 0.5155, "step": 19579 }, { "epoch": 0.5, "grad_norm": 6.666862964630127, "learning_rate": 1.042573457682677e-05, "loss": 0.7307, "step": 19580 }, { "epoch": 0.5, "grad_norm": 1.5098989009857178, "learning_rate": 1.0424905185154902e-05, "loss": 0.5301, "step": 19581 }, { "epoch": 0.5, "grad_norm": 1.9839932918548584, "learning_rate": 1.0424075790554858e-05, "loss": 0.6505, "step": 19582 }, { "epoch": 0.5, "grad_norm": 1.6245118379592896, "learning_rate": 1.0423246393032347e-05, "loss": 0.6661, "step": 19583 }, { "epoch": 0.5, "grad_norm": 1.7001206874847412, "learning_rate": 1.042241699259309e-05, "loss": 0.5517, "step": 19584 }, { "epoch": 0.5, "grad_norm": 1.5259511470794678, "learning_rate": 1.0421587589242796e-05, "loss": 0.5978, "step": 19585 }, { "epoch": 0.5, "grad_norm": 1.8768357038497925, "learning_rate": 1.0420758182987188e-05, "loss": 0.5464, "step": 19586 }, { "epoch": 0.5, "grad_norm": 1.1758695840835571, "learning_rate": 1.041992877383198e-05, "loss": 0.5574, "step": 19587 }, { "epoch": 0.5, "grad_norm": 2.412541389465332, "learning_rate": 1.0419099361782885e-05, "loss": 0.5295, "step": 19588 }, { "epoch": 0.5, "grad_norm": 1.356332540512085, "learning_rate": 1.0418269946845623e-05, "loss": 0.5603, "step": 19589 }, { "epoch": 0.5, "grad_norm": 5.005875587463379, "learning_rate": 1.0417440529025903e-05, "loss": 0.8014, "step": 19590 }, { "epoch": 0.5, "grad_norm": 1.6041151285171509, "learning_rate": 1.041661110832945e-05, "loss": 0.5331, "step": 19591 }, { "epoch": 0.5, "grad_norm": 1.4434086084365845, "learning_rate": 1.0415781684761973e-05, "loss": 0.6155, "step": 19592 }, { "epoch": 0.5, "grad_norm": 1.0746580362319946, "learning_rate": 1.0414952258329187e-05, "loss": 0.5343, "step": 19593 }, { "epoch": 0.5, "grad_norm": 5.719414710998535, "learning_rate": 1.0414122829036815e-05, "loss": 0.7885, "step": 19594 }, { "epoch": 0.5, "grad_norm": 1.028041124343872, "learning_rate": 1.0413293396890567e-05, "loss": 0.556, "step": 19595 }, { "epoch": 0.5, "grad_norm": 1.698433756828308, "learning_rate": 1.0412463961896162e-05, "loss": 0.4873, "step": 19596 }, { "epoch": 0.5, "grad_norm": 1.6505168676376343, "learning_rate": 1.0411634524059312e-05, "loss": 0.6233, "step": 19597 }, { "epoch": 0.5, "grad_norm": 1.7234389781951904, "learning_rate": 1.0410805083385736e-05, "loss": 0.5801, "step": 19598 }, { "epoch": 0.5, "grad_norm": 1.9415112733840942, "learning_rate": 1.0409975639881149e-05, "loss": 0.6686, "step": 19599 }, { "epoch": 0.5, "grad_norm": 1.4937162399291992, "learning_rate": 1.040914619355127e-05, "loss": 0.7546, "step": 19600 }, { "epoch": 0.5, "grad_norm": 1.6151484251022339, "learning_rate": 1.0408316744401813e-05, "loss": 0.5962, "step": 19601 }, { "epoch": 0.5, "grad_norm": 4.014051914215088, "learning_rate": 1.0407487292438487e-05, "loss": 0.6765, "step": 19602 }, { "epoch": 0.5, "grad_norm": 1.6063789129257202, "learning_rate": 1.0406657837667022e-05, "loss": 0.596, "step": 19603 }, { "epoch": 0.5, "grad_norm": 3.3270413875579834, "learning_rate": 1.040582838009312e-05, "loss": 0.6171, "step": 19604 }, { "epoch": 0.5, "grad_norm": 1.1146137714385986, "learning_rate": 1.0404998919722508e-05, "loss": 0.4952, "step": 19605 }, { "epoch": 0.5, "grad_norm": 2.4247934818267822, "learning_rate": 1.0404169456560899e-05, "loss": 0.5783, "step": 19606 }, { "epoch": 0.5, "grad_norm": 1.2438238859176636, "learning_rate": 1.0403339990614006e-05, "loss": 0.5529, "step": 19607 }, { "epoch": 0.5, "grad_norm": 1.07135009765625, "learning_rate": 1.0402510521887548e-05, "loss": 0.5721, "step": 19608 }, { "epoch": 0.5, "grad_norm": 1.324413537979126, "learning_rate": 1.040168105038724e-05, "loss": 0.5532, "step": 19609 }, { "epoch": 0.5, "grad_norm": 1.3740155696868896, "learning_rate": 1.0400851576118798e-05, "loss": 0.5006, "step": 19610 }, { "epoch": 0.5, "grad_norm": 1.8260289430618286, "learning_rate": 1.040002209908794e-05, "loss": 0.6351, "step": 19611 }, { "epoch": 0.5, "grad_norm": 1.4534012079238892, "learning_rate": 1.039919261930038e-05, "loss": 0.7261, "step": 19612 }, { "epoch": 0.5, "grad_norm": 4.706769943237305, "learning_rate": 1.0398363136761835e-05, "loss": 0.7752, "step": 19613 }, { "epoch": 0.5, "grad_norm": 1.5951155424118042, "learning_rate": 1.0397533651478022e-05, "loss": 0.6564, "step": 19614 }, { "epoch": 0.5, "grad_norm": 5.348352909088135, "learning_rate": 1.039670416345466e-05, "loss": 0.5958, "step": 19615 }, { "epoch": 0.5, "grad_norm": 7.747278690338135, "learning_rate": 1.0395874672697457e-05, "loss": 0.6255, "step": 19616 }, { "epoch": 0.5, "grad_norm": 0.9636504650115967, "learning_rate": 1.0395045179212135e-05, "loss": 0.3851, "step": 19617 }, { "epoch": 0.5, "grad_norm": 2.8889732360839844, "learning_rate": 1.0394215683004414e-05, "loss": 0.6537, "step": 19618 }, { "epoch": 0.5, "grad_norm": 1.7482378482818604, "learning_rate": 1.0393386184080002e-05, "loss": 0.5502, "step": 19619 }, { "epoch": 0.5, "grad_norm": 1.993099570274353, "learning_rate": 1.039255668244462e-05, "loss": 0.4897, "step": 19620 }, { "epoch": 0.5, "grad_norm": 3.061655282974243, "learning_rate": 1.0391727178103984e-05, "loss": 0.6117, "step": 19621 }, { "epoch": 0.5, "grad_norm": 4.553856372833252, "learning_rate": 1.0390897671063813e-05, "loss": 0.6892, "step": 19622 }, { "epoch": 0.5, "grad_norm": 1.8404241800308228, "learning_rate": 1.039006816132982e-05, "loss": 0.4986, "step": 19623 }, { "epoch": 0.5, "grad_norm": 5.303806781768799, "learning_rate": 1.038923864890772e-05, "loss": 0.5928, "step": 19624 }, { "epoch": 0.5, "grad_norm": 3.045565366744995, "learning_rate": 1.0388409133803235e-05, "loss": 0.6589, "step": 19625 }, { "epoch": 0.5, "grad_norm": 8.326787948608398, "learning_rate": 1.0387579616022076e-05, "loss": 0.7423, "step": 19626 }, { "epoch": 0.5, "grad_norm": 4.400457859039307, "learning_rate": 1.0386750095569962e-05, "loss": 0.543, "step": 19627 }, { "epoch": 0.5, "grad_norm": 1.4472641944885254, "learning_rate": 1.038592057245261e-05, "loss": 0.4681, "step": 19628 }, { "epoch": 0.5, "grad_norm": 2.320404291152954, "learning_rate": 1.0385091046675736e-05, "loss": 0.6314, "step": 19629 }, { "epoch": 0.5, "grad_norm": 3.3238658905029297, "learning_rate": 1.0384261518245059e-05, "loss": 0.6485, "step": 19630 }, { "epoch": 0.5, "grad_norm": 0.9708772301673889, "learning_rate": 1.0383431987166287e-05, "loss": 0.4812, "step": 19631 }, { "epoch": 0.5, "grad_norm": 1.6408413648605347, "learning_rate": 1.0382602453445148e-05, "loss": 0.4637, "step": 19632 }, { "epoch": 0.5, "grad_norm": 1.4073874950408936, "learning_rate": 1.0381772917087349e-05, "loss": 0.5713, "step": 19633 }, { "epoch": 0.5, "grad_norm": 1.1212788820266724, "learning_rate": 1.0380943378098611e-05, "loss": 0.4364, "step": 19634 }, { "epoch": 0.5, "grad_norm": 4.204575538635254, "learning_rate": 1.0380113836484656e-05, "loss": 0.5936, "step": 19635 }, { "epoch": 0.5, "grad_norm": 2.355576515197754, "learning_rate": 1.037928429225119e-05, "loss": 0.5602, "step": 19636 }, { "epoch": 0.5, "grad_norm": 1.4477945566177368, "learning_rate": 1.0378454745403938e-05, "loss": 0.7747, "step": 19637 }, { "epoch": 0.5, "grad_norm": 11.297886848449707, "learning_rate": 1.0377625195948611e-05, "loss": 0.7014, "step": 19638 }, { "epoch": 0.5, "grad_norm": 1.5459543466567993, "learning_rate": 1.037679564389093e-05, "loss": 0.6392, "step": 19639 }, { "epoch": 0.5, "grad_norm": 1.2342394590377808, "learning_rate": 1.037596608923661e-05, "loss": 0.5069, "step": 19640 }, { "epoch": 0.5, "grad_norm": 1.5023994445800781, "learning_rate": 1.0375136531991367e-05, "loss": 0.606, "step": 19641 }, { "epoch": 0.5, "grad_norm": 1.3293639421463013, "learning_rate": 1.0374306972160921e-05, "loss": 0.5314, "step": 19642 }, { "epoch": 0.5, "grad_norm": 3.2721824645996094, "learning_rate": 1.0373477409750986e-05, "loss": 0.4544, "step": 19643 }, { "epoch": 0.5, "grad_norm": 1.6542352437973022, "learning_rate": 1.0372647844767278e-05, "loss": 0.5458, "step": 19644 }, { "epoch": 0.5, "grad_norm": 1.3052128553390503, "learning_rate": 1.0371818277215515e-05, "loss": 0.4482, "step": 19645 }, { "epoch": 0.5, "grad_norm": 7.156485080718994, "learning_rate": 1.0370988707101412e-05, "loss": 0.5277, "step": 19646 }, { "epoch": 0.5, "grad_norm": 12.263742446899414, "learning_rate": 1.0370159134430692e-05, "loss": 0.5919, "step": 19647 }, { "epoch": 0.5, "grad_norm": 6.808625221252441, "learning_rate": 1.0369329559209067e-05, "loss": 0.7498, "step": 19648 }, { "epoch": 0.5, "grad_norm": 5.771021842956543, "learning_rate": 1.0368499981442254e-05, "loss": 0.4789, "step": 19649 }, { "epoch": 0.5, "grad_norm": 2.070857048034668, "learning_rate": 1.036767040113597e-05, "loss": 0.5913, "step": 19650 }, { "epoch": 0.5, "grad_norm": 3.749478816986084, "learning_rate": 1.0366840818295931e-05, "loss": 0.61, "step": 19651 }, { "epoch": 0.5, "grad_norm": 1.6428192853927612, "learning_rate": 1.0366011232927859e-05, "loss": 0.8347, "step": 19652 }, { "epoch": 0.5, "grad_norm": 1.7208203077316284, "learning_rate": 1.0365181645037468e-05, "loss": 0.5939, "step": 19653 }, { "epoch": 0.5, "grad_norm": 1.3838680982589722, "learning_rate": 1.0364352054630473e-05, "loss": 0.6531, "step": 19654 }, { "epoch": 0.5, "grad_norm": 2.9387528896331787, "learning_rate": 1.0363522461712591e-05, "loss": 0.3715, "step": 19655 }, { "epoch": 0.5, "grad_norm": 7.487239360809326, "learning_rate": 1.0362692866289543e-05, "loss": 0.5655, "step": 19656 }, { "epoch": 0.5, "grad_norm": 1.122998595237732, "learning_rate": 1.0361863268367043e-05, "loss": 0.5128, "step": 19657 }, { "epoch": 0.5, "grad_norm": 1.2289018630981445, "learning_rate": 1.0361033667950805e-05, "loss": 0.4975, "step": 19658 }, { "epoch": 0.5, "grad_norm": 3.1561241149902344, "learning_rate": 1.0360204065046556e-05, "loss": 0.6526, "step": 19659 }, { "epoch": 0.5, "grad_norm": 2.2526462078094482, "learning_rate": 1.0359374459660003e-05, "loss": 0.6499, "step": 19660 }, { "epoch": 0.5, "grad_norm": 1.6450210809707642, "learning_rate": 1.035854485179687e-05, "loss": 0.5514, "step": 19661 }, { "epoch": 0.5, "grad_norm": 4.873327732086182, "learning_rate": 1.0357715241462868e-05, "loss": 0.4792, "step": 19662 }, { "epoch": 0.5, "grad_norm": 7.53515100479126, "learning_rate": 1.0356885628663718e-05, "loss": 0.5844, "step": 19663 }, { "epoch": 0.5, "grad_norm": 1.1311390399932861, "learning_rate": 1.0356056013405139e-05, "loss": 0.4358, "step": 19664 }, { "epoch": 0.5, "grad_norm": 2.8554141521453857, "learning_rate": 1.035522639569284e-05, "loss": 0.5258, "step": 19665 }, { "epoch": 0.5, "grad_norm": 1.6226921081542969, "learning_rate": 1.035439677553255e-05, "loss": 0.6538, "step": 19666 }, { "epoch": 0.5, "grad_norm": 2.3303632736206055, "learning_rate": 1.0353567152929976e-05, "loss": 0.6361, "step": 19667 }, { "epoch": 0.5, "grad_norm": 2.76344895362854, "learning_rate": 1.0352737527890841e-05, "loss": 0.5346, "step": 19668 }, { "epoch": 0.5, "grad_norm": 3.363039016723633, "learning_rate": 1.0351907900420864e-05, "loss": 0.8529, "step": 19669 }, { "epoch": 0.5, "grad_norm": 1.2474888563156128, "learning_rate": 1.0351078270525753e-05, "loss": 0.4535, "step": 19670 }, { "epoch": 0.5, "grad_norm": 1.6144906282424927, "learning_rate": 1.0350248638211236e-05, "loss": 0.5995, "step": 19671 }, { "epoch": 0.5, "grad_norm": 1.395642638206482, "learning_rate": 1.0349419003483021e-05, "loss": 0.6361, "step": 19672 }, { "epoch": 0.5, "grad_norm": 1.2259680032730103, "learning_rate": 1.0348589366346831e-05, "loss": 0.67, "step": 19673 }, { "epoch": 0.5, "grad_norm": 3.842643976211548, "learning_rate": 1.0347759726808381e-05, "loss": 0.8085, "step": 19674 }, { "epoch": 0.5, "grad_norm": 1.3812272548675537, "learning_rate": 1.0346930084873392e-05, "loss": 0.3825, "step": 19675 }, { "epoch": 0.5, "grad_norm": 1.7834299802780151, "learning_rate": 1.034610044054758e-05, "loss": 0.5731, "step": 19676 }, { "epoch": 0.5, "grad_norm": 3.1764848232269287, "learning_rate": 1.0345270793836658e-05, "loss": 0.4806, "step": 19677 }, { "epoch": 0.5, "grad_norm": 1.3100866079330444, "learning_rate": 1.0344441144746346e-05, "loss": 0.6431, "step": 19678 }, { "epoch": 0.5, "grad_norm": 1.6252994537353516, "learning_rate": 1.0343611493282363e-05, "loss": 0.3868, "step": 19679 }, { "epoch": 0.5, "grad_norm": 1.6426148414611816, "learning_rate": 1.0342781839450427e-05, "loss": 0.5396, "step": 19680 }, { "epoch": 0.5, "grad_norm": 1.3224772214889526, "learning_rate": 1.0341952183256255e-05, "loss": 0.5247, "step": 19681 }, { "epoch": 0.5, "grad_norm": 2.1579885482788086, "learning_rate": 1.034112252470556e-05, "loss": 0.5135, "step": 19682 }, { "epoch": 0.5, "grad_norm": 1.5686430931091309, "learning_rate": 1.0340292863804067e-05, "loss": 0.3867, "step": 19683 }, { "epoch": 0.5, "grad_norm": 1.387138843536377, "learning_rate": 1.0339463200557485e-05, "loss": 0.6087, "step": 19684 }, { "epoch": 0.5, "grad_norm": 7.595003128051758, "learning_rate": 1.0338633534971537e-05, "loss": 0.4614, "step": 19685 }, { "epoch": 0.5, "grad_norm": 7.574713706970215, "learning_rate": 1.033780386705194e-05, "loss": 0.5453, "step": 19686 }, { "epoch": 0.5, "grad_norm": 1.9620798826217651, "learning_rate": 1.033697419680441e-05, "loss": 0.6514, "step": 19687 }, { "epoch": 0.5, "grad_norm": 1.47160804271698, "learning_rate": 1.0336144524234667e-05, "loss": 0.4415, "step": 19688 }, { "epoch": 0.5, "grad_norm": 2.0956737995147705, "learning_rate": 1.0335314849348428e-05, "loss": 0.6281, "step": 19689 }, { "epoch": 0.5, "grad_norm": 2.061447858810425, "learning_rate": 1.0334485172151408e-05, "loss": 0.6573, "step": 19690 }, { "epoch": 0.5, "grad_norm": 2.9258995056152344, "learning_rate": 1.0333655492649327e-05, "loss": 0.5664, "step": 19691 }, { "epoch": 0.5, "grad_norm": 1.5097432136535645, "learning_rate": 1.03328258108479e-05, "loss": 0.5294, "step": 19692 }, { "epoch": 0.5, "grad_norm": 1.7619307041168213, "learning_rate": 1.033199612675285e-05, "loss": 0.7401, "step": 19693 }, { "epoch": 0.5, "grad_norm": 4.800558090209961, "learning_rate": 1.0331166440369889e-05, "loss": 0.6244, "step": 19694 }, { "epoch": 0.5, "grad_norm": 3.272634267807007, "learning_rate": 1.0330336751704737e-05, "loss": 0.4576, "step": 19695 }, { "epoch": 0.5, "grad_norm": 1.1948281526565552, "learning_rate": 1.0329507060763114e-05, "loss": 0.4713, "step": 19696 }, { "epoch": 0.5, "grad_norm": 3.0499370098114014, "learning_rate": 1.0328677367550732e-05, "loss": 0.4683, "step": 19697 }, { "epoch": 0.5, "grad_norm": 1.8904980421066284, "learning_rate": 1.0327847672073315e-05, "loss": 0.6932, "step": 19698 }, { "epoch": 0.5, "grad_norm": 1.6296191215515137, "learning_rate": 1.0327017974336574e-05, "loss": 0.6929, "step": 19699 }, { "epoch": 0.5, "grad_norm": 1.640589952468872, "learning_rate": 1.0326188274346237e-05, "loss": 0.6607, "step": 19700 }, { "epoch": 0.5, "grad_norm": 1.5345216989517212, "learning_rate": 1.0325358572108009e-05, "loss": 0.6446, "step": 19701 }, { "epoch": 0.5, "grad_norm": 1.319790244102478, "learning_rate": 1.0324528867627614e-05, "loss": 0.6379, "step": 19702 }, { "epoch": 0.5, "grad_norm": 2.263279676437378, "learning_rate": 1.0323699160910776e-05, "loss": 0.5058, "step": 19703 }, { "epoch": 0.51, "grad_norm": 6.569403648376465, "learning_rate": 1.0322869451963202e-05, "loss": 0.6101, "step": 19704 }, { "epoch": 0.51, "grad_norm": 1.8959739208221436, "learning_rate": 1.0322039740790617e-05, "loss": 0.6943, "step": 19705 }, { "epoch": 0.51, "grad_norm": 9.07732105255127, "learning_rate": 1.0321210027398734e-05, "loss": 0.3264, "step": 19706 }, { "epoch": 0.51, "grad_norm": 1.4462878704071045, "learning_rate": 1.0320380311793274e-05, "loss": 0.551, "step": 19707 }, { "epoch": 0.51, "grad_norm": 1.4573945999145508, "learning_rate": 1.0319550593979954e-05, "loss": 0.4068, "step": 19708 }, { "epoch": 0.51, "grad_norm": 1.986980676651001, "learning_rate": 1.0318720873964493e-05, "loss": 0.7707, "step": 19709 }, { "epoch": 0.51, "grad_norm": 3.350681781768799, "learning_rate": 1.031789115175261e-05, "loss": 0.5531, "step": 19710 }, { "epoch": 0.51, "grad_norm": 1.481669306755066, "learning_rate": 1.0317061427350018e-05, "loss": 0.5603, "step": 19711 }, { "epoch": 0.51, "grad_norm": 1.3467062711715698, "learning_rate": 1.0316231700762439e-05, "loss": 0.5805, "step": 19712 }, { "epoch": 0.51, "grad_norm": 1.5875368118286133, "learning_rate": 1.0315401971995588e-05, "loss": 0.483, "step": 19713 }, { "epoch": 0.51, "grad_norm": 1.1047143936157227, "learning_rate": 1.0314572241055185e-05, "loss": 0.4809, "step": 19714 }, { "epoch": 0.51, "grad_norm": 8.048443794250488, "learning_rate": 1.0313742507946952e-05, "loss": 0.612, "step": 19715 }, { "epoch": 0.51, "grad_norm": 1.6211365461349487, "learning_rate": 1.0312912772676597e-05, "loss": 0.5606, "step": 19716 }, { "epoch": 0.51, "grad_norm": 1.471154808998108, "learning_rate": 1.0312083035249846e-05, "loss": 0.4667, "step": 19717 }, { "epoch": 0.51, "grad_norm": 6.901044845581055, "learning_rate": 1.0311253295672416e-05, "loss": 0.696, "step": 19718 }, { "epoch": 0.51, "grad_norm": 2.9447243213653564, "learning_rate": 1.0310423553950022e-05, "loss": 0.5645, "step": 19719 }, { "epoch": 0.51, "grad_norm": 2.578526020050049, "learning_rate": 1.0309593810088385e-05, "loss": 0.5855, "step": 19720 }, { "epoch": 0.51, "grad_norm": 1.2507842779159546, "learning_rate": 1.0308764064093221e-05, "loss": 0.4633, "step": 19721 }, { "epoch": 0.51, "grad_norm": 1.5538078546524048, "learning_rate": 1.0307934315970252e-05, "loss": 0.7311, "step": 19722 }, { "epoch": 0.51, "grad_norm": 5.703929901123047, "learning_rate": 1.030710456572519e-05, "loss": 0.6644, "step": 19723 }, { "epoch": 0.51, "grad_norm": 1.1577789783477783, "learning_rate": 1.0306274813363759e-05, "loss": 0.5397, "step": 19724 }, { "epoch": 0.51, "grad_norm": 0.9945943355560303, "learning_rate": 1.030544505889167e-05, "loss": 0.5618, "step": 19725 }, { "epoch": 0.51, "grad_norm": 1.0286425352096558, "learning_rate": 1.0304615302314648e-05, "loss": 0.3808, "step": 19726 }, { "epoch": 0.51, "grad_norm": 5.724275588989258, "learning_rate": 1.0303785543638409e-05, "loss": 0.6074, "step": 19727 }, { "epoch": 0.51, "grad_norm": 1.3699755668640137, "learning_rate": 1.0302955782868671e-05, "loss": 0.4294, "step": 19728 }, { "epoch": 0.51, "grad_norm": 1.2803906202316284, "learning_rate": 1.0302126020011154e-05, "loss": 0.548, "step": 19729 }, { "epoch": 0.51, "grad_norm": 1.6651700735092163, "learning_rate": 1.0301296255071571e-05, "loss": 0.6696, "step": 19730 }, { "epoch": 0.51, "grad_norm": 3.955711603164673, "learning_rate": 1.0300466488055642e-05, "loss": 0.5912, "step": 19731 }, { "epoch": 0.51, "grad_norm": 1.6809927225112915, "learning_rate": 1.0299636718969093e-05, "loss": 0.7057, "step": 19732 }, { "epoch": 0.51, "grad_norm": 10.039469718933105, "learning_rate": 1.029880694781763e-05, "loss": 0.629, "step": 19733 }, { "epoch": 0.51, "grad_norm": 4.094971656799316, "learning_rate": 1.0297977174606981e-05, "loss": 0.5585, "step": 19734 }, { "epoch": 0.51, "grad_norm": 1.9478888511657715, "learning_rate": 1.029714739934286e-05, "loss": 0.3319, "step": 19735 }, { "epoch": 0.51, "grad_norm": 1.3566272258758545, "learning_rate": 1.0296317622030984e-05, "loss": 0.5439, "step": 19736 }, { "epoch": 0.51, "grad_norm": 2.5303940773010254, "learning_rate": 1.0295487842677075e-05, "loss": 0.7541, "step": 19737 }, { "epoch": 0.51, "grad_norm": 1.627362608909607, "learning_rate": 1.0294658061286847e-05, "loss": 0.5739, "step": 19738 }, { "epoch": 0.51, "grad_norm": 1.3418002128601074, "learning_rate": 1.0293828277866022e-05, "loss": 0.5278, "step": 19739 }, { "epoch": 0.51, "grad_norm": 1.8407138586044312, "learning_rate": 1.029299849242032e-05, "loss": 0.6209, "step": 19740 }, { "epoch": 0.51, "grad_norm": 6.10495138168335, "learning_rate": 1.0292168704955454e-05, "loss": 0.7024, "step": 19741 }, { "epoch": 0.51, "grad_norm": 5.49205207824707, "learning_rate": 1.0291338915477143e-05, "loss": 0.5423, "step": 19742 }, { "epoch": 0.51, "grad_norm": 2.561116933822632, "learning_rate": 1.0290509123991107e-05, "loss": 0.5588, "step": 19743 }, { "epoch": 0.51, "grad_norm": 1.9620137214660645, "learning_rate": 1.0289679330503069e-05, "loss": 0.5315, "step": 19744 }, { "epoch": 0.51, "grad_norm": 3.9191200733184814, "learning_rate": 1.0288849535018738e-05, "loss": 0.533, "step": 19745 }, { "epoch": 0.51, "grad_norm": 2.8367581367492676, "learning_rate": 1.0288019737543841e-05, "loss": 0.6264, "step": 19746 }, { "epoch": 0.51, "grad_norm": 2.070018768310547, "learning_rate": 1.0287189938084092e-05, "loss": 0.4534, "step": 19747 }, { "epoch": 0.51, "grad_norm": 2.547961473464966, "learning_rate": 1.0286360136645211e-05, "loss": 0.7807, "step": 19748 }, { "epoch": 0.51, "grad_norm": 1.598892331123352, "learning_rate": 1.0285530333232916e-05, "loss": 0.5064, "step": 19749 }, { "epoch": 0.51, "grad_norm": 1.150806188583374, "learning_rate": 1.0284700527852925e-05, "loss": 0.4829, "step": 19750 }, { "epoch": 0.51, "grad_norm": 2.789151906967163, "learning_rate": 1.0283870720510956e-05, "loss": 0.7465, "step": 19751 }, { "epoch": 0.51, "grad_norm": 3.852510452270508, "learning_rate": 1.0283040911212728e-05, "loss": 0.489, "step": 19752 }, { "epoch": 0.51, "grad_norm": 1.6420822143554688, "learning_rate": 1.0282211099963962e-05, "loss": 0.5695, "step": 19753 }, { "epoch": 0.51, "grad_norm": 1.6432158946990967, "learning_rate": 1.0281381286770371e-05, "loss": 0.4981, "step": 19754 }, { "epoch": 0.51, "grad_norm": 1.860877275466919, "learning_rate": 1.0280551471637678e-05, "loss": 0.6136, "step": 19755 }, { "epoch": 0.51, "grad_norm": 5.86235237121582, "learning_rate": 1.0279721654571603e-05, "loss": 0.544, "step": 19756 }, { "epoch": 0.51, "grad_norm": 3.0325586795806885, "learning_rate": 1.027889183557786e-05, "loss": 0.3684, "step": 19757 }, { "epoch": 0.51, "grad_norm": 6.154959678649902, "learning_rate": 1.0278062014662171e-05, "loss": 0.5598, "step": 19758 }, { "epoch": 0.51, "grad_norm": 2.0237200260162354, "learning_rate": 1.0277232191830253e-05, "loss": 0.5381, "step": 19759 }, { "epoch": 0.51, "grad_norm": 2.6547293663024902, "learning_rate": 1.0276402367087824e-05, "loss": 0.5931, "step": 19760 }, { "epoch": 0.51, "grad_norm": 3.4245355129241943, "learning_rate": 1.0275572540440604e-05, "loss": 0.6512, "step": 19761 }, { "epoch": 0.51, "grad_norm": 2.005955457687378, "learning_rate": 1.027474271189431e-05, "loss": 0.5933, "step": 19762 }, { "epoch": 0.51, "grad_norm": 1.3442734479904175, "learning_rate": 1.0273912881454663e-05, "loss": 0.6211, "step": 19763 }, { "epoch": 0.51, "grad_norm": 2.2406930923461914, "learning_rate": 1.027308304912738e-05, "loss": 0.534, "step": 19764 }, { "epoch": 0.51, "grad_norm": 1.312870740890503, "learning_rate": 1.0272253214918184e-05, "loss": 0.5064, "step": 19765 }, { "epoch": 0.51, "grad_norm": 1.6823713779449463, "learning_rate": 1.0271423378832786e-05, "loss": 0.5578, "step": 19766 }, { "epoch": 0.51, "grad_norm": 1.474427342414856, "learning_rate": 1.0270593540876907e-05, "loss": 0.5131, "step": 19767 }, { "epoch": 0.51, "grad_norm": 1.0383816957473755, "learning_rate": 1.0269763701056272e-05, "loss": 0.4843, "step": 19768 }, { "epoch": 0.51, "grad_norm": 7.468932628631592, "learning_rate": 1.026893385937659e-05, "loss": 0.5907, "step": 19769 }, { "epoch": 0.51, "grad_norm": 7.2801690101623535, "learning_rate": 1.026810401584359e-05, "loss": 0.7095, "step": 19770 }, { "epoch": 0.51, "grad_norm": 3.1216342449188232, "learning_rate": 1.0267274170462981e-05, "loss": 0.6063, "step": 19771 }, { "epoch": 0.51, "grad_norm": 1.5390815734863281, "learning_rate": 1.0266444323240488e-05, "loss": 0.4997, "step": 19772 }, { "epoch": 0.51, "grad_norm": 3.7721943855285645, "learning_rate": 1.0265614474181831e-05, "loss": 0.7119, "step": 19773 }, { "epoch": 0.51, "grad_norm": 7.992249965667725, "learning_rate": 1.0264784623292721e-05, "loss": 0.5859, "step": 19774 }, { "epoch": 0.51, "grad_norm": 1.440327525138855, "learning_rate": 1.0263954770578886e-05, "loss": 0.5285, "step": 19775 }, { "epoch": 0.51, "grad_norm": 1.2878086566925049, "learning_rate": 1.0263124916046036e-05, "loss": 0.6226, "step": 19776 }, { "epoch": 0.51, "grad_norm": 4.558366775512695, "learning_rate": 1.02622950596999e-05, "loss": 0.3554, "step": 19777 }, { "epoch": 0.51, "grad_norm": 1.6736316680908203, "learning_rate": 1.0261465201546188e-05, "loss": 0.5112, "step": 19778 }, { "epoch": 0.51, "grad_norm": 1.402114748954773, "learning_rate": 1.026063534159062e-05, "loss": 0.6988, "step": 19779 }, { "epoch": 0.51, "grad_norm": 1.5635356903076172, "learning_rate": 1.025980547983892e-05, "loss": 0.6221, "step": 19780 }, { "epoch": 0.51, "grad_norm": 1.8008323907852173, "learning_rate": 1.0258975616296803e-05, "loss": 0.6351, "step": 19781 }, { "epoch": 0.51, "grad_norm": 3.3923439979553223, "learning_rate": 1.025814575096999e-05, "loss": 0.5327, "step": 19782 }, { "epoch": 0.51, "grad_norm": 5.556819915771484, "learning_rate": 1.0257315883864197e-05, "loss": 0.6717, "step": 19783 }, { "epoch": 0.51, "grad_norm": 1.4822156429290771, "learning_rate": 1.0256486014985145e-05, "loss": 0.4925, "step": 19784 }, { "epoch": 0.51, "grad_norm": 2.776303768157959, "learning_rate": 1.0255656144338553e-05, "loss": 0.461, "step": 19785 }, { "epoch": 0.51, "grad_norm": 1.0644757747650146, "learning_rate": 1.0254826271930138e-05, "loss": 0.6271, "step": 19786 }, { "epoch": 0.51, "grad_norm": 2.039188861846924, "learning_rate": 1.0253996397765625e-05, "loss": 0.6145, "step": 19787 }, { "epoch": 0.51, "grad_norm": 7.607957363128662, "learning_rate": 1.0253166521850723e-05, "loss": 0.7613, "step": 19788 }, { "epoch": 0.51, "grad_norm": 1.817333459854126, "learning_rate": 1.0252336644191159e-05, "loss": 0.7093, "step": 19789 }, { "epoch": 0.51, "grad_norm": 2.4312233924865723, "learning_rate": 1.0251506764792651e-05, "loss": 0.7314, "step": 19790 }, { "epoch": 0.51, "grad_norm": 4.182204723358154, "learning_rate": 1.0250676883660913e-05, "loss": 0.6971, "step": 19791 }, { "epoch": 0.51, "grad_norm": 1.8137378692626953, "learning_rate": 1.024984700080167e-05, "loss": 0.4667, "step": 19792 }, { "epoch": 0.51, "grad_norm": 2.0417063236236572, "learning_rate": 1.0249017116220636e-05, "loss": 0.6582, "step": 19793 }, { "epoch": 0.51, "grad_norm": 1.3638056516647339, "learning_rate": 1.0248187229923532e-05, "loss": 0.5623, "step": 19794 }, { "epoch": 0.51, "grad_norm": 1.3219902515411377, "learning_rate": 1.024735734191608e-05, "loss": 0.5122, "step": 19795 }, { "epoch": 0.51, "grad_norm": 11.504741668701172, "learning_rate": 1.0246527452203995e-05, "loss": 0.6522, "step": 19796 }, { "epoch": 0.51, "grad_norm": 1.5201587677001953, "learning_rate": 1.0245697560793e-05, "loss": 0.5379, "step": 19797 }, { "epoch": 0.51, "grad_norm": 1.9216225147247314, "learning_rate": 1.0244867667688809e-05, "loss": 0.6705, "step": 19798 }, { "epoch": 0.51, "grad_norm": 1.836022138595581, "learning_rate": 1.0244037772897144e-05, "loss": 0.6337, "step": 19799 }, { "epoch": 0.51, "grad_norm": 6.153585433959961, "learning_rate": 1.0243207876423726e-05, "loss": 0.7576, "step": 19800 }, { "epoch": 0.51, "grad_norm": 1.6999083757400513, "learning_rate": 1.0242377978274271e-05, "loss": 0.5988, "step": 19801 }, { "epoch": 0.51, "grad_norm": 2.6736083030700684, "learning_rate": 1.02415480784545e-05, "loss": 0.5438, "step": 19802 }, { "epoch": 0.51, "grad_norm": 1.3359482288360596, "learning_rate": 1.024071817697013e-05, "loss": 0.5425, "step": 19803 }, { "epoch": 0.51, "grad_norm": 3.060084819793701, "learning_rate": 1.0239888273826884e-05, "loss": 0.6464, "step": 19804 }, { "epoch": 0.51, "grad_norm": 1.9757137298583984, "learning_rate": 1.0239058369030475e-05, "loss": 0.6514, "step": 19805 }, { "epoch": 0.51, "grad_norm": 1.7847930192947388, "learning_rate": 1.0238228462586629e-05, "loss": 0.4937, "step": 19806 }, { "epoch": 0.51, "grad_norm": 1.3120249509811401, "learning_rate": 1.0237398554501062e-05, "loss": 0.5999, "step": 19807 }, { "epoch": 0.51, "grad_norm": 0.9268749952316284, "learning_rate": 1.023656864477949e-05, "loss": 0.5258, "step": 19808 }, { "epoch": 0.51, "grad_norm": 1.8437353372573853, "learning_rate": 1.023573873342764e-05, "loss": 0.5599, "step": 19809 }, { "epoch": 0.51, "grad_norm": 1.7691528797149658, "learning_rate": 1.0234908820451223e-05, "loss": 0.4728, "step": 19810 }, { "epoch": 0.51, "grad_norm": 0.9264657497406006, "learning_rate": 1.0234078905855962e-05, "loss": 0.4886, "step": 19811 }, { "epoch": 0.51, "grad_norm": 1.5208394527435303, "learning_rate": 1.023324898964758e-05, "loss": 0.6208, "step": 19812 }, { "epoch": 0.51, "grad_norm": 2.0532803535461426, "learning_rate": 1.023241907183179e-05, "loss": 0.6473, "step": 19813 }, { "epoch": 0.51, "grad_norm": 2.4412615299224854, "learning_rate": 1.0231589152414314e-05, "loss": 0.6669, "step": 19814 }, { "epoch": 0.51, "grad_norm": 1.2961690425872803, "learning_rate": 1.0230759231400871e-05, "loss": 0.4898, "step": 19815 }, { "epoch": 0.51, "grad_norm": 3.096179485321045, "learning_rate": 1.022992930879718e-05, "loss": 0.5561, "step": 19816 }, { "epoch": 0.51, "grad_norm": 1.2839678525924683, "learning_rate": 1.0229099384608963e-05, "loss": 0.6564, "step": 19817 }, { "epoch": 0.51, "grad_norm": 1.096511960029602, "learning_rate": 1.0228269458841934e-05, "loss": 0.5038, "step": 19818 }, { "epoch": 0.51, "grad_norm": 1.1549131870269775, "learning_rate": 1.0227439531501817e-05, "loss": 0.4364, "step": 19819 }, { "epoch": 0.51, "grad_norm": 1.5257515907287598, "learning_rate": 1.0226609602594329e-05, "loss": 0.6977, "step": 19820 }, { "epoch": 0.51, "grad_norm": 1.7480424642562866, "learning_rate": 1.0225779672125191e-05, "loss": 0.6365, "step": 19821 }, { "epoch": 0.51, "grad_norm": 3.2690670490264893, "learning_rate": 1.0224949740100118e-05, "loss": 0.5367, "step": 19822 }, { "epoch": 0.51, "grad_norm": 1.804132103919983, "learning_rate": 1.0224119806524834e-05, "loss": 0.5611, "step": 19823 }, { "epoch": 0.51, "grad_norm": 3.032165765762329, "learning_rate": 1.0223289871405062e-05, "loss": 0.5347, "step": 19824 }, { "epoch": 0.51, "grad_norm": 5.051044464111328, "learning_rate": 1.0222459934746511e-05, "loss": 0.6079, "step": 19825 }, { "epoch": 0.51, "grad_norm": 2.23610520362854, "learning_rate": 1.022162999655491e-05, "loss": 0.5908, "step": 19826 }, { "epoch": 0.51, "grad_norm": 4.6948418617248535, "learning_rate": 1.022080005683597e-05, "loss": 0.699, "step": 19827 }, { "epoch": 0.51, "grad_norm": 4.0377326011657715, "learning_rate": 1.0219970115595416e-05, "loss": 0.6361, "step": 19828 }, { "epoch": 0.51, "grad_norm": 1.35438871383667, "learning_rate": 1.0219140172838968e-05, "loss": 0.5386, "step": 19829 }, { "epoch": 0.51, "grad_norm": 3.9284372329711914, "learning_rate": 1.0218310228572341e-05, "loss": 0.6007, "step": 19830 }, { "epoch": 0.51, "grad_norm": 1.5917937755584717, "learning_rate": 1.021748028280126e-05, "loss": 0.5247, "step": 19831 }, { "epoch": 0.51, "grad_norm": 1.127899408340454, "learning_rate": 1.021665033553144e-05, "loss": 0.4705, "step": 19832 }, { "epoch": 0.51, "grad_norm": 1.8577361106872559, "learning_rate": 1.0215820386768602e-05, "loss": 0.7263, "step": 19833 }, { "epoch": 0.51, "grad_norm": 1.8079026937484741, "learning_rate": 1.0214990436518466e-05, "loss": 0.6219, "step": 19834 }, { "epoch": 0.51, "grad_norm": 1.5249887704849243, "learning_rate": 1.0214160484786751e-05, "loss": 0.5876, "step": 19835 }, { "epoch": 0.51, "grad_norm": 1.3835158348083496, "learning_rate": 1.0213330531579175e-05, "loss": 0.48, "step": 19836 }, { "epoch": 0.51, "grad_norm": 3.5222556591033936, "learning_rate": 1.0212500576901461e-05, "loss": 0.5975, "step": 19837 }, { "epoch": 0.51, "grad_norm": 1.352715015411377, "learning_rate": 1.0211670620759326e-05, "loss": 0.6397, "step": 19838 }, { "epoch": 0.51, "grad_norm": 2.18105149269104, "learning_rate": 1.0210840663158488e-05, "loss": 0.5367, "step": 19839 }, { "epoch": 0.51, "grad_norm": 1.4666411876678467, "learning_rate": 1.021001070410467e-05, "loss": 0.5799, "step": 19840 }, { "epoch": 0.51, "grad_norm": 3.2434439659118652, "learning_rate": 1.0209180743603591e-05, "loss": 0.5758, "step": 19841 }, { "epoch": 0.51, "grad_norm": 1.571868896484375, "learning_rate": 1.0208350781660969e-05, "loss": 0.6022, "step": 19842 }, { "epoch": 0.51, "grad_norm": 1.445256233215332, "learning_rate": 1.0207520818282526e-05, "loss": 0.6113, "step": 19843 }, { "epoch": 0.51, "grad_norm": 2.850219249725342, "learning_rate": 1.0206690853473977e-05, "loss": 0.5076, "step": 19844 }, { "epoch": 0.51, "grad_norm": 1.3458536863327026, "learning_rate": 1.0205860887241046e-05, "loss": 0.461, "step": 19845 }, { "epoch": 0.51, "grad_norm": 4.0958709716796875, "learning_rate": 1.020503091958945e-05, "loss": 0.6254, "step": 19846 }, { "epoch": 0.51, "grad_norm": 4.1949143409729, "learning_rate": 1.020420095052491e-05, "loss": 0.6246, "step": 19847 }, { "epoch": 0.51, "grad_norm": 3.9870128631591797, "learning_rate": 1.0203370980053149e-05, "loss": 0.5581, "step": 19848 }, { "epoch": 0.51, "grad_norm": 4.479071140289307, "learning_rate": 1.0202541008179878e-05, "loss": 0.7217, "step": 19849 }, { "epoch": 0.51, "grad_norm": 2.5902769565582275, "learning_rate": 1.0201711034910823e-05, "loss": 0.7282, "step": 19850 }, { "epoch": 0.51, "grad_norm": 2.3946053981781006, "learning_rate": 1.0200881060251704e-05, "loss": 0.4767, "step": 19851 }, { "epoch": 0.51, "grad_norm": 4.616271018981934, "learning_rate": 1.0200051084208236e-05, "loss": 0.7939, "step": 19852 }, { "epoch": 0.51, "grad_norm": 1.9372047185897827, "learning_rate": 1.0199221106786146e-05, "loss": 0.6051, "step": 19853 }, { "epoch": 0.51, "grad_norm": 2.4887664318084717, "learning_rate": 1.0198391127991145e-05, "loss": 0.5603, "step": 19854 }, { "epoch": 0.51, "grad_norm": 1.9012514352798462, "learning_rate": 1.019756114782896e-05, "loss": 0.5234, "step": 19855 }, { "epoch": 0.51, "grad_norm": 4.526191234588623, "learning_rate": 1.0196731166305305e-05, "loss": 0.5191, "step": 19856 }, { "epoch": 0.51, "grad_norm": 4.93612003326416, "learning_rate": 1.0195901183425904e-05, "loss": 0.6826, "step": 19857 }, { "epoch": 0.51, "grad_norm": 1.3417176008224487, "learning_rate": 1.0195071199196476e-05, "loss": 0.6208, "step": 19858 }, { "epoch": 0.51, "grad_norm": 1.7312226295471191, "learning_rate": 1.0194241213622738e-05, "loss": 0.4421, "step": 19859 }, { "epoch": 0.51, "grad_norm": 1.4898782968521118, "learning_rate": 1.0193411226710413e-05, "loss": 0.5749, "step": 19860 }, { "epoch": 0.51, "grad_norm": 3.816626787185669, "learning_rate": 1.0192581238465217e-05, "loss": 0.5763, "step": 19861 }, { "epoch": 0.51, "grad_norm": 3.2203166484832764, "learning_rate": 1.0191751248892877e-05, "loss": 0.7529, "step": 19862 }, { "epoch": 0.51, "grad_norm": 1.1476541757583618, "learning_rate": 1.0190921257999102e-05, "loss": 0.5305, "step": 19863 }, { "epoch": 0.51, "grad_norm": 10.595564842224121, "learning_rate": 1.0190091265789621e-05, "loss": 0.4865, "step": 19864 }, { "epoch": 0.51, "grad_norm": 3.275315284729004, "learning_rate": 1.018926127227015e-05, "loss": 0.5779, "step": 19865 }, { "epoch": 0.51, "grad_norm": 1.4156439304351807, "learning_rate": 1.018843127744641e-05, "loss": 0.6927, "step": 19866 }, { "epoch": 0.51, "grad_norm": 0.8853965401649475, "learning_rate": 1.0187601281324118e-05, "loss": 0.5711, "step": 19867 }, { "epoch": 0.51, "grad_norm": 1.2426509857177734, "learning_rate": 1.0186771283908996e-05, "loss": 0.6113, "step": 19868 }, { "epoch": 0.51, "grad_norm": 1.911157488822937, "learning_rate": 1.0185941285206765e-05, "loss": 0.5297, "step": 19869 }, { "epoch": 0.51, "grad_norm": 1.544844627380371, "learning_rate": 1.0185111285223144e-05, "loss": 0.5748, "step": 19870 }, { "epoch": 0.51, "grad_norm": 2.5003747940063477, "learning_rate": 1.0184281283963848e-05, "loss": 0.5737, "step": 19871 }, { "epoch": 0.51, "grad_norm": 1.5149180889129639, "learning_rate": 1.0183451281434607e-05, "loss": 0.6237, "step": 19872 }, { "epoch": 0.51, "grad_norm": 2.067075252532959, "learning_rate": 1.018262127764113e-05, "loss": 0.5087, "step": 19873 }, { "epoch": 0.51, "grad_norm": 1.299118161201477, "learning_rate": 1.0181791272589148e-05, "loss": 0.5826, "step": 19874 }, { "epoch": 0.51, "grad_norm": 2.4033327102661133, "learning_rate": 1.0180961266284368e-05, "loss": 0.5242, "step": 19875 }, { "epoch": 0.51, "grad_norm": 2.6021738052368164, "learning_rate": 1.018013125873252e-05, "loss": 0.5962, "step": 19876 }, { "epoch": 0.51, "grad_norm": 7.692070960998535, "learning_rate": 1.0179301249939321e-05, "loss": 0.4942, "step": 19877 }, { "epoch": 0.51, "grad_norm": 2.7420754432678223, "learning_rate": 1.017847123991049e-05, "loss": 0.5955, "step": 19878 }, { "epoch": 0.51, "grad_norm": 3.8705966472625732, "learning_rate": 1.0177641228651746e-05, "loss": 0.5857, "step": 19879 }, { "epoch": 0.51, "grad_norm": 1.952111005783081, "learning_rate": 1.0176811216168811e-05, "loss": 0.5109, "step": 19880 }, { "epoch": 0.51, "grad_norm": 6.649511337280273, "learning_rate": 1.0175981202467404e-05, "loss": 0.6502, "step": 19881 }, { "epoch": 0.51, "grad_norm": 6.375709056854248, "learning_rate": 1.0175151187553247e-05, "loss": 0.7588, "step": 19882 }, { "epoch": 0.51, "grad_norm": 3.9051263332366943, "learning_rate": 1.0174321171432056e-05, "loss": 0.5568, "step": 19883 }, { "epoch": 0.51, "grad_norm": 4.059036731719971, "learning_rate": 1.0173491154109556e-05, "loss": 0.5877, "step": 19884 }, { "epoch": 0.51, "grad_norm": 1.4279301166534424, "learning_rate": 1.0172661135591459e-05, "loss": 0.4534, "step": 19885 }, { "epoch": 0.51, "grad_norm": 1.62465238571167, "learning_rate": 1.0171831115883495e-05, "loss": 0.5171, "step": 19886 }, { "epoch": 0.51, "grad_norm": 1.0926862955093384, "learning_rate": 1.0171001094991375e-05, "loss": 0.3453, "step": 19887 }, { "epoch": 0.51, "grad_norm": 3.3044395446777344, "learning_rate": 1.0170171072920823e-05, "loss": 0.7025, "step": 19888 }, { "epoch": 0.51, "grad_norm": 3.0520243644714355, "learning_rate": 1.016934104967756e-05, "loss": 0.6611, "step": 19889 }, { "epoch": 0.51, "grad_norm": 2.330630302429199, "learning_rate": 1.0168511025267307e-05, "loss": 0.5872, "step": 19890 }, { "epoch": 0.51, "grad_norm": 1.2032719850540161, "learning_rate": 1.0167680999695779e-05, "loss": 0.5528, "step": 19891 }, { "epoch": 0.51, "grad_norm": 7.110833168029785, "learning_rate": 1.01668509729687e-05, "loss": 0.6778, "step": 19892 }, { "epoch": 0.51, "grad_norm": 1.3264405727386475, "learning_rate": 1.0166020945091789e-05, "loss": 0.5999, "step": 19893 }, { "epoch": 0.51, "grad_norm": 2.0544586181640625, "learning_rate": 1.0165190916070766e-05, "loss": 0.6176, "step": 19894 }, { "epoch": 0.51, "grad_norm": 1.906385064125061, "learning_rate": 1.016436088591135e-05, "loss": 0.5587, "step": 19895 }, { "epoch": 0.51, "grad_norm": 1.7009849548339844, "learning_rate": 1.0163530854619263e-05, "loss": 0.5551, "step": 19896 }, { "epoch": 0.51, "grad_norm": 3.5422418117523193, "learning_rate": 1.0162700822200225e-05, "loss": 0.7035, "step": 19897 }, { "epoch": 0.51, "grad_norm": 3.2531394958496094, "learning_rate": 1.0161870788659953e-05, "loss": 0.5562, "step": 19898 }, { "epoch": 0.51, "grad_norm": 1.3359994888305664, "learning_rate": 1.016104075400417e-05, "loss": 0.7959, "step": 19899 }, { "epoch": 0.51, "grad_norm": 1.1084010601043701, "learning_rate": 1.0160210718238595e-05, "loss": 0.5168, "step": 19900 }, { "epoch": 0.51, "grad_norm": 2.3258838653564453, "learning_rate": 1.015938068136895e-05, "loss": 0.6752, "step": 19901 }, { "epoch": 0.51, "grad_norm": 1.870177984237671, "learning_rate": 1.0158550643400953e-05, "loss": 0.4695, "step": 19902 }, { "epoch": 0.51, "grad_norm": 3.197051525115967, "learning_rate": 1.0157720604340324e-05, "loss": 0.5072, "step": 19903 }, { "epoch": 0.51, "grad_norm": 1.1219193935394287, "learning_rate": 1.0156890564192783e-05, "loss": 0.6198, "step": 19904 }, { "epoch": 0.51, "grad_norm": 1.7950273752212524, "learning_rate": 1.015606052296405e-05, "loss": 0.7142, "step": 19905 }, { "epoch": 0.51, "grad_norm": 1.639596939086914, "learning_rate": 1.0155230480659848e-05, "loss": 0.5508, "step": 19906 }, { "epoch": 0.51, "grad_norm": 2.356475353240967, "learning_rate": 1.0154400437285894e-05, "loss": 0.5436, "step": 19907 }, { "epoch": 0.51, "grad_norm": 1.572357416152954, "learning_rate": 1.0153570392847911e-05, "loss": 0.4556, "step": 19908 }, { "epoch": 0.51, "grad_norm": 1.3737640380859375, "learning_rate": 1.0152740347351615e-05, "loss": 0.4345, "step": 19909 }, { "epoch": 0.51, "grad_norm": 1.4634828567504883, "learning_rate": 1.015191030080273e-05, "loss": 0.4569, "step": 19910 }, { "epoch": 0.51, "grad_norm": 3.914067506790161, "learning_rate": 1.0151080253206975e-05, "loss": 0.5009, "step": 19911 }, { "epoch": 0.51, "grad_norm": 1.3991657495498657, "learning_rate": 1.015025020457007e-05, "loss": 0.4739, "step": 19912 }, { "epoch": 0.51, "grad_norm": 2.0662386417388916, "learning_rate": 1.0149420154897736e-05, "loss": 0.6172, "step": 19913 }, { "epoch": 0.51, "grad_norm": 2.8041324615478516, "learning_rate": 1.0148590104195689e-05, "loss": 0.6945, "step": 19914 }, { "epoch": 0.51, "grad_norm": 1.0963636636734009, "learning_rate": 1.0147760052469651e-05, "loss": 0.5678, "step": 19915 }, { "epoch": 0.51, "grad_norm": 1.634987473487854, "learning_rate": 1.0146929999725348e-05, "loss": 0.5849, "step": 19916 }, { "epoch": 0.51, "grad_norm": 3.8337295055389404, "learning_rate": 1.0146099945968493e-05, "loss": 0.6201, "step": 19917 }, { "epoch": 0.51, "grad_norm": 8.656442642211914, "learning_rate": 1.014526989120481e-05, "loss": 0.5033, "step": 19918 }, { "epoch": 0.51, "grad_norm": 7.133236408233643, "learning_rate": 1.0144439835440016e-05, "loss": 0.4341, "step": 19919 }, { "epoch": 0.51, "grad_norm": 6.006038665771484, "learning_rate": 1.0143609778679835e-05, "loss": 0.7461, "step": 19920 }, { "epoch": 0.51, "grad_norm": 2.5075013637542725, "learning_rate": 1.0142779720929987e-05, "loss": 0.6247, "step": 19921 }, { "epoch": 0.51, "grad_norm": 1.3246510028839111, "learning_rate": 1.0141949662196188e-05, "loss": 0.4953, "step": 19922 }, { "epoch": 0.51, "grad_norm": 2.5113632678985596, "learning_rate": 1.0141119602484167e-05, "loss": 0.3573, "step": 19923 }, { "epoch": 0.51, "grad_norm": 1.7988473176956177, "learning_rate": 1.0140289541799632e-05, "loss": 0.65, "step": 19924 }, { "epoch": 0.51, "grad_norm": 4.323080539703369, "learning_rate": 1.0139459480148313e-05, "loss": 0.6153, "step": 19925 }, { "epoch": 0.51, "grad_norm": 8.638983726501465, "learning_rate": 1.0138629417535925e-05, "loss": 0.6062, "step": 19926 }, { "epoch": 0.51, "grad_norm": 5.281078815460205, "learning_rate": 1.013779935396819e-05, "loss": 0.7276, "step": 19927 }, { "epoch": 0.51, "grad_norm": 1.3166033029556274, "learning_rate": 1.0136969289450831e-05, "loss": 0.5226, "step": 19928 }, { "epoch": 0.51, "grad_norm": 1.4909213781356812, "learning_rate": 1.0136139223989563e-05, "loss": 0.5377, "step": 19929 }, { "epoch": 0.51, "grad_norm": 2.5748393535614014, "learning_rate": 1.013530915759011e-05, "loss": 0.626, "step": 19930 }, { "epoch": 0.51, "grad_norm": 1.5302504301071167, "learning_rate": 1.0134479090258191e-05, "loss": 0.656, "step": 19931 }, { "epoch": 0.51, "grad_norm": 4.007969379425049, "learning_rate": 1.0133649021999526e-05, "loss": 0.666, "step": 19932 }, { "epoch": 0.51, "grad_norm": 3.6059350967407227, "learning_rate": 1.0132818952819839e-05, "loss": 0.5769, "step": 19933 }, { "epoch": 0.51, "grad_norm": 2.136653184890747, "learning_rate": 1.0131988882724843e-05, "loss": 0.3605, "step": 19934 }, { "epoch": 0.51, "grad_norm": 4.317100524902344, "learning_rate": 1.0131158811720265e-05, "loss": 0.5787, "step": 19935 }, { "epoch": 0.51, "grad_norm": 3.5404317378997803, "learning_rate": 1.0130328739811824e-05, "loss": 0.4944, "step": 19936 }, { "epoch": 0.51, "grad_norm": 2.374030828475952, "learning_rate": 1.0129498667005235e-05, "loss": 0.4531, "step": 19937 }, { "epoch": 0.51, "grad_norm": 2.2201743125915527, "learning_rate": 1.0128668593306226e-05, "loss": 0.5646, "step": 19938 }, { "epoch": 0.51, "grad_norm": 1.5350135564804077, "learning_rate": 1.0127838518720512e-05, "loss": 0.6601, "step": 19939 }, { "epoch": 0.51, "grad_norm": 1.6969491243362427, "learning_rate": 1.0127008443253819e-05, "loss": 0.5513, "step": 19940 }, { "epoch": 0.51, "grad_norm": 2.003692626953125, "learning_rate": 1.0126178366911858e-05, "loss": 0.6318, "step": 19941 }, { "epoch": 0.51, "grad_norm": 1.121381163597107, "learning_rate": 1.0125348289700361e-05, "loss": 0.5716, "step": 19942 }, { "epoch": 0.51, "grad_norm": 1.4570155143737793, "learning_rate": 1.0124518211625038e-05, "loss": 0.5214, "step": 19943 }, { "epoch": 0.51, "grad_norm": 2.8376965522766113, "learning_rate": 1.0123688132691614e-05, "loss": 0.6431, "step": 19944 }, { "epoch": 0.51, "grad_norm": 4.355597496032715, "learning_rate": 1.0122858052905811e-05, "loss": 0.5449, "step": 19945 }, { "epoch": 0.51, "grad_norm": 5.61566686630249, "learning_rate": 1.0122027972273344e-05, "loss": 0.6035, "step": 19946 }, { "epoch": 0.51, "grad_norm": 0.9832484722137451, "learning_rate": 1.0121197890799943e-05, "loss": 0.4164, "step": 19947 }, { "epoch": 0.51, "grad_norm": 2.6180026531219482, "learning_rate": 1.0120367808491319e-05, "loss": 0.732, "step": 19948 }, { "epoch": 0.51, "grad_norm": 1.485956072807312, "learning_rate": 1.0119537725353193e-05, "loss": 0.5953, "step": 19949 }, { "epoch": 0.51, "grad_norm": 1.8251450061798096, "learning_rate": 1.0118707641391292e-05, "loss": 0.5913, "step": 19950 }, { "epoch": 0.51, "grad_norm": 2.5500998497009277, "learning_rate": 1.0117877556611329e-05, "loss": 0.5121, "step": 19951 }, { "epoch": 0.51, "grad_norm": 8.689358711242676, "learning_rate": 1.0117047471019032e-05, "loss": 0.689, "step": 19952 }, { "epoch": 0.51, "grad_norm": 1.918915033340454, "learning_rate": 1.0116217384620116e-05, "loss": 0.7335, "step": 19953 }, { "epoch": 0.51, "grad_norm": 5.74259090423584, "learning_rate": 1.0115387297420301e-05, "loss": 0.6977, "step": 19954 }, { "epoch": 0.51, "grad_norm": 1.738466501235962, "learning_rate": 1.011455720942531e-05, "loss": 0.5496, "step": 19955 }, { "epoch": 0.51, "grad_norm": 4.3674092292785645, "learning_rate": 1.0113727120640861e-05, "loss": 0.4896, "step": 19956 }, { "epoch": 0.51, "grad_norm": 7.7018537521362305, "learning_rate": 1.011289703107268e-05, "loss": 0.6065, "step": 19957 }, { "epoch": 0.51, "grad_norm": 1.5633138418197632, "learning_rate": 1.0112066940726482e-05, "loss": 0.6363, "step": 19958 }, { "epoch": 0.51, "grad_norm": 3.1017866134643555, "learning_rate": 1.0111236849607988e-05, "loss": 0.5493, "step": 19959 }, { "epoch": 0.51, "grad_norm": 1.659630537033081, "learning_rate": 1.0110406757722922e-05, "loss": 0.4485, "step": 19960 }, { "epoch": 0.51, "grad_norm": 3.5630576610565186, "learning_rate": 1.0109576665076999e-05, "loss": 0.3775, "step": 19961 }, { "epoch": 0.51, "grad_norm": 1.729473352432251, "learning_rate": 1.0108746571675944e-05, "loss": 0.5945, "step": 19962 }, { "epoch": 0.51, "grad_norm": 1.7050371170043945, "learning_rate": 1.0107916477525475e-05, "loss": 0.6014, "step": 19963 }, { "epoch": 0.51, "grad_norm": 1.6702626943588257, "learning_rate": 1.0107086382631315e-05, "loss": 0.6376, "step": 19964 }, { "epoch": 0.51, "grad_norm": 1.6295825242996216, "learning_rate": 1.010625628699918e-05, "loss": 0.4897, "step": 19965 }, { "epoch": 0.51, "grad_norm": 1.1665332317352295, "learning_rate": 1.0105426190634798e-05, "loss": 0.5419, "step": 19966 }, { "epoch": 0.51, "grad_norm": 14.639674186706543, "learning_rate": 1.0104596093543881e-05, "loss": 0.5208, "step": 19967 }, { "epoch": 0.51, "grad_norm": 1.49540114402771, "learning_rate": 1.0103765995732155e-05, "loss": 0.7398, "step": 19968 }, { "epoch": 0.51, "grad_norm": 1.457321286201477, "learning_rate": 1.010293589720534e-05, "loss": 0.4835, "step": 19969 }, { "epoch": 0.51, "grad_norm": 4.042537212371826, "learning_rate": 1.0102105797969153e-05, "loss": 0.582, "step": 19970 }, { "epoch": 0.51, "grad_norm": 4.3986735343933105, "learning_rate": 1.010127569802932e-05, "loss": 0.6826, "step": 19971 }, { "epoch": 0.51, "grad_norm": 1.5208392143249512, "learning_rate": 1.0100445597391555e-05, "loss": 0.5885, "step": 19972 }, { "epoch": 0.51, "grad_norm": 1.1083587408065796, "learning_rate": 1.0099615496061583e-05, "loss": 0.4666, "step": 19973 }, { "epoch": 0.51, "grad_norm": 1.039128065109253, "learning_rate": 1.0098785394045126e-05, "loss": 0.548, "step": 19974 }, { "epoch": 0.51, "grad_norm": 6.304329872131348, "learning_rate": 1.0097955291347901e-05, "loss": 0.4642, "step": 19975 }, { "epoch": 0.51, "grad_norm": 1.4816268682479858, "learning_rate": 1.0097125187975629e-05, "loss": 0.6187, "step": 19976 }, { "epoch": 0.51, "grad_norm": 5.978009223937988, "learning_rate": 1.0096295083934031e-05, "loss": 0.6066, "step": 19977 }, { "epoch": 0.51, "grad_norm": 1.5557808876037598, "learning_rate": 1.0095464979228829e-05, "loss": 0.573, "step": 19978 }, { "epoch": 0.51, "grad_norm": 3.2242045402526855, "learning_rate": 1.0094634873865741e-05, "loss": 0.5611, "step": 19979 }, { "epoch": 0.51, "grad_norm": 3.203803062438965, "learning_rate": 1.0093804767850489e-05, "loss": 0.6159, "step": 19980 }, { "epoch": 0.51, "grad_norm": 2.1760785579681396, "learning_rate": 1.0092974661188794e-05, "loss": 0.6432, "step": 19981 }, { "epoch": 0.51, "grad_norm": 1.8667621612548828, "learning_rate": 1.0092144553886376e-05, "loss": 0.6146, "step": 19982 }, { "epoch": 0.51, "grad_norm": 1.5026898384094238, "learning_rate": 1.0091314445948956e-05, "loss": 0.4379, "step": 19983 }, { "epoch": 0.51, "grad_norm": 1.3295538425445557, "learning_rate": 1.0090484337382253e-05, "loss": 0.5281, "step": 19984 }, { "epoch": 0.51, "grad_norm": 1.9166154861450195, "learning_rate": 1.0089654228191989e-05, "loss": 0.6641, "step": 19985 }, { "epoch": 0.51, "grad_norm": 2.8031466007232666, "learning_rate": 1.0088824118383886e-05, "loss": 0.4961, "step": 19986 }, { "epoch": 0.51, "grad_norm": 1.703173279762268, "learning_rate": 1.0087994007963662e-05, "loss": 0.3426, "step": 19987 }, { "epoch": 0.51, "grad_norm": 1.9561436176300049, "learning_rate": 1.008716389693704e-05, "loss": 0.444, "step": 19988 }, { "epoch": 0.51, "grad_norm": 10.114163398742676, "learning_rate": 1.0086333785309737e-05, "loss": 0.509, "step": 19989 }, { "epoch": 0.51, "grad_norm": 4.300797939300537, "learning_rate": 1.0085503673087476e-05, "loss": 0.5812, "step": 19990 }, { "epoch": 0.51, "grad_norm": 1.119912028312683, "learning_rate": 1.0084673560275979e-05, "loss": 0.4932, "step": 19991 }, { "epoch": 0.51, "grad_norm": 3.1108882427215576, "learning_rate": 1.0083843446880964e-05, "loss": 0.5103, "step": 19992 }, { "epoch": 0.51, "grad_norm": 2.4982306957244873, "learning_rate": 1.0083013332908154e-05, "loss": 0.6973, "step": 19993 }, { "epoch": 0.51, "grad_norm": 1.6464612483978271, "learning_rate": 1.0082183218363264e-05, "loss": 0.5013, "step": 19994 }, { "epoch": 0.51, "grad_norm": 1.1927299499511719, "learning_rate": 1.0081353103252023e-05, "loss": 0.5582, "step": 19995 }, { "epoch": 0.51, "grad_norm": 2.604825735092163, "learning_rate": 1.0080522987580146e-05, "loss": 0.5152, "step": 19996 }, { "epoch": 0.51, "grad_norm": 1.3226981163024902, "learning_rate": 1.0079692871353355e-05, "loss": 0.5883, "step": 19997 }, { "epoch": 0.51, "grad_norm": 1.556030511856079, "learning_rate": 1.0078862754577372e-05, "loss": 0.5434, "step": 19998 }, { "epoch": 0.51, "grad_norm": 1.2980635166168213, "learning_rate": 1.0078032637257914e-05, "loss": 0.4453, "step": 19999 }, { "epoch": 0.51, "grad_norm": 1.4550296068191528, "learning_rate": 1.0077202519400708e-05, "loss": 0.6253, "step": 20000 }, { "epoch": 0.51, "grad_norm": 1.5326043367385864, "learning_rate": 1.0076372401011468e-05, "loss": 0.4707, "step": 20001 }, { "epoch": 0.51, "grad_norm": 1.4892698526382446, "learning_rate": 1.0075542282095916e-05, "loss": 0.581, "step": 20002 }, { "epoch": 0.51, "grad_norm": 5.648560047149658, "learning_rate": 1.0074712162659779e-05, "loss": 0.4405, "step": 20003 }, { "epoch": 0.51, "grad_norm": 4.403719425201416, "learning_rate": 1.0073882042708768e-05, "loss": 0.6621, "step": 20004 }, { "epoch": 0.51, "grad_norm": 0.9949017763137817, "learning_rate": 1.0073051922248612e-05, "loss": 0.504, "step": 20005 }, { "epoch": 0.51, "grad_norm": 2.653736114501953, "learning_rate": 1.0072221801285025e-05, "loss": 0.7104, "step": 20006 }, { "epoch": 0.51, "grad_norm": 3.489349842071533, "learning_rate": 1.0071391679823733e-05, "loss": 0.818, "step": 20007 }, { "epoch": 0.51, "grad_norm": 1.96199631690979, "learning_rate": 1.007056155787045e-05, "loss": 0.5085, "step": 20008 }, { "epoch": 0.51, "grad_norm": 1.6538236141204834, "learning_rate": 1.0069731435430904e-05, "loss": 0.3787, "step": 20009 }, { "epoch": 0.51, "grad_norm": 1.7679691314697266, "learning_rate": 1.0068901312510814e-05, "loss": 0.7051, "step": 20010 }, { "epoch": 0.51, "grad_norm": 2.358232259750366, "learning_rate": 1.0068071189115898e-05, "loss": 0.4927, "step": 20011 }, { "epoch": 0.51, "grad_norm": 2.9807770252227783, "learning_rate": 1.006724106525188e-05, "loss": 0.5346, "step": 20012 }, { "epoch": 0.51, "grad_norm": 1.3038498163223267, "learning_rate": 1.0066410940924477e-05, "loss": 0.474, "step": 20013 }, { "epoch": 0.51, "grad_norm": 5.970985412597656, "learning_rate": 1.006558081613941e-05, "loss": 0.5914, "step": 20014 }, { "epoch": 0.51, "grad_norm": 1.6593430042266846, "learning_rate": 1.0064750690902405e-05, "loss": 0.4498, "step": 20015 }, { "epoch": 0.51, "grad_norm": 1.2244600057601929, "learning_rate": 1.0063920565219177e-05, "loss": 0.6308, "step": 20016 }, { "epoch": 0.51, "grad_norm": 1.2764885425567627, "learning_rate": 1.006309043909545e-05, "loss": 0.4883, "step": 20017 }, { "epoch": 0.51, "grad_norm": 3.8657751083374023, "learning_rate": 1.006226031253694e-05, "loss": 0.53, "step": 20018 }, { "epoch": 0.51, "grad_norm": 2.650904655456543, "learning_rate": 1.0061430185549374e-05, "loss": 0.5492, "step": 20019 }, { "epoch": 0.51, "grad_norm": 1.3100814819335938, "learning_rate": 1.006060005813847e-05, "loss": 0.5558, "step": 20020 }, { "epoch": 0.51, "grad_norm": 2.1854636669158936, "learning_rate": 1.0059769930309947e-05, "loss": 0.4725, "step": 20021 }, { "epoch": 0.51, "grad_norm": 1.2055089473724365, "learning_rate": 1.0058939802069528e-05, "loss": 0.5594, "step": 20022 }, { "epoch": 0.51, "grad_norm": 2.477830648422241, "learning_rate": 1.005810967342293e-05, "loss": 0.5972, "step": 20023 }, { "epoch": 0.51, "grad_norm": 4.744597911834717, "learning_rate": 1.0057279544375882e-05, "loss": 0.5711, "step": 20024 }, { "epoch": 0.51, "grad_norm": 4.024113178253174, "learning_rate": 1.0056449414934095e-05, "loss": 0.7837, "step": 20025 }, { "epoch": 0.51, "grad_norm": 1.125610113143921, "learning_rate": 1.0055619285103295e-05, "loss": 0.4812, "step": 20026 }, { "epoch": 0.51, "grad_norm": 4.023019313812256, "learning_rate": 1.0054789154889204e-05, "loss": 0.7693, "step": 20027 }, { "epoch": 0.51, "grad_norm": 6.382535934448242, "learning_rate": 1.0053959024297537e-05, "loss": 0.5469, "step": 20028 }, { "epoch": 0.51, "grad_norm": 1.1054826974868774, "learning_rate": 1.0053128893334022e-05, "loss": 0.4311, "step": 20029 }, { "epoch": 0.51, "grad_norm": 1.7977807521820068, "learning_rate": 1.0052298762004373e-05, "loss": 0.5079, "step": 20030 }, { "epoch": 0.51, "grad_norm": 2.082195997238159, "learning_rate": 1.0051468630314314e-05, "loss": 0.4315, "step": 20031 }, { "epoch": 0.51, "grad_norm": 2.7404403686523438, "learning_rate": 1.005063849826957e-05, "loss": 0.6485, "step": 20032 }, { "epoch": 0.51, "grad_norm": 5.217479705810547, "learning_rate": 1.0049808365875853e-05, "loss": 0.3592, "step": 20033 }, { "epoch": 0.51, "grad_norm": 2.223304033279419, "learning_rate": 1.0048978233138891e-05, "loss": 0.6518, "step": 20034 }, { "epoch": 0.51, "grad_norm": 2.9981470108032227, "learning_rate": 1.0048148100064399e-05, "loss": 0.6833, "step": 20035 }, { "epoch": 0.51, "grad_norm": 2.0447349548339844, "learning_rate": 1.0047317966658099e-05, "loss": 0.5064, "step": 20036 }, { "epoch": 0.51, "grad_norm": 8.930660247802734, "learning_rate": 1.0046487832925717e-05, "loss": 0.577, "step": 20037 }, { "epoch": 0.51, "grad_norm": 3.639366865158081, "learning_rate": 1.0045657698872968e-05, "loss": 0.6762, "step": 20038 }, { "epoch": 0.51, "grad_norm": 2.3923559188842773, "learning_rate": 1.0044827564505577e-05, "loss": 0.5033, "step": 20039 }, { "epoch": 0.51, "grad_norm": 1.1173055171966553, "learning_rate": 1.004399742982926e-05, "loss": 0.4873, "step": 20040 }, { "epoch": 0.51, "grad_norm": 1.3016256093978882, "learning_rate": 1.004316729484974e-05, "loss": 0.4717, "step": 20041 }, { "epoch": 0.51, "grad_norm": 6.517178058624268, "learning_rate": 1.0042337159572742e-05, "loss": 0.753, "step": 20042 }, { "epoch": 0.51, "grad_norm": 1.22769296169281, "learning_rate": 1.0041507024003978e-05, "loss": 0.6064, "step": 20043 }, { "epoch": 0.51, "grad_norm": 1.2079992294311523, "learning_rate": 1.004067688814918e-05, "loss": 0.5145, "step": 20044 }, { "epoch": 0.51, "grad_norm": 1.556821584701538, "learning_rate": 1.0039846752014056e-05, "loss": 0.4885, "step": 20045 }, { "epoch": 0.51, "grad_norm": 2.9562413692474365, "learning_rate": 1.0039016615604336e-05, "loss": 0.6036, "step": 20046 }, { "epoch": 0.51, "grad_norm": 3.711550235748291, "learning_rate": 1.0038186478925739e-05, "loss": 0.4086, "step": 20047 }, { "epoch": 0.51, "grad_norm": 3.134176254272461, "learning_rate": 1.0037356341983982e-05, "loss": 0.5714, "step": 20048 }, { "epoch": 0.51, "grad_norm": 8.907294273376465, "learning_rate": 1.0036526204784791e-05, "loss": 0.6559, "step": 20049 }, { "epoch": 0.51, "grad_norm": 1.2297555208206177, "learning_rate": 1.0035696067333881e-05, "loss": 0.5479, "step": 20050 }, { "epoch": 0.51, "grad_norm": 1.787208080291748, "learning_rate": 1.0034865929636981e-05, "loss": 0.5089, "step": 20051 }, { "epoch": 0.51, "grad_norm": 2.979309320449829, "learning_rate": 1.0034035791699803e-05, "loss": 0.5713, "step": 20052 }, { "epoch": 0.51, "grad_norm": 1.380378007888794, "learning_rate": 1.0033205653528073e-05, "loss": 0.4997, "step": 20053 }, { "epoch": 0.51, "grad_norm": 2.478411912918091, "learning_rate": 1.0032375515127512e-05, "loss": 0.6022, "step": 20054 }, { "epoch": 0.51, "grad_norm": 2.2938222885131836, "learning_rate": 1.0031545376503838e-05, "loss": 0.7258, "step": 20055 }, { "epoch": 0.51, "grad_norm": 0.9858886003494263, "learning_rate": 1.0030715237662774e-05, "loss": 0.5607, "step": 20056 }, { "epoch": 0.51, "grad_norm": 1.2995606660842896, "learning_rate": 1.0029885098610037e-05, "loss": 0.6049, "step": 20057 }, { "epoch": 0.51, "grad_norm": 1.9717861413955688, "learning_rate": 1.0029054959351352e-05, "loss": 0.5794, "step": 20058 }, { "epoch": 0.51, "grad_norm": 1.578485131263733, "learning_rate": 1.002822481989244e-05, "loss": 0.6695, "step": 20059 }, { "epoch": 0.51, "grad_norm": 1.4135324954986572, "learning_rate": 1.002739468023902e-05, "loss": 0.4838, "step": 20060 }, { "epoch": 0.51, "grad_norm": 4.123579978942871, "learning_rate": 1.0026564540396813e-05, "loss": 0.7636, "step": 20061 }, { "epoch": 0.51, "grad_norm": 1.5089765787124634, "learning_rate": 1.0025734400371538e-05, "loss": 0.5021, "step": 20062 }, { "epoch": 0.51, "grad_norm": 1.0104583501815796, "learning_rate": 1.002490426016892e-05, "loss": 0.4087, "step": 20063 }, { "epoch": 0.51, "grad_norm": 1.2275974750518799, "learning_rate": 1.0024074119794675e-05, "loss": 0.5743, "step": 20064 }, { "epoch": 0.51, "grad_norm": 2.8368499279022217, "learning_rate": 1.0023243979254526e-05, "loss": 0.6436, "step": 20065 }, { "epoch": 0.51, "grad_norm": 1.0844542980194092, "learning_rate": 1.00224138385542e-05, "loss": 0.5058, "step": 20066 }, { "epoch": 0.51, "grad_norm": 0.9985594153404236, "learning_rate": 1.0021583697699406e-05, "loss": 0.6066, "step": 20067 }, { "epoch": 0.51, "grad_norm": 2.8905177116394043, "learning_rate": 1.0020753556695873e-05, "loss": 0.6155, "step": 20068 }, { "epoch": 0.51, "grad_norm": 4.03070068359375, "learning_rate": 1.0019923415549319e-05, "loss": 0.5721, "step": 20069 }, { "epoch": 0.51, "grad_norm": 1.890517234802246, "learning_rate": 1.0019093274265463e-05, "loss": 0.5278, "step": 20070 }, { "epoch": 0.51, "grad_norm": 1.3894330263137817, "learning_rate": 1.0018263132850032e-05, "loss": 0.5821, "step": 20071 }, { "epoch": 0.51, "grad_norm": 3.2158000469207764, "learning_rate": 1.0017432991308743e-05, "loss": 0.6502, "step": 20072 }, { "epoch": 0.51, "grad_norm": 4.079522609710693, "learning_rate": 1.0016602849647315e-05, "loss": 0.6283, "step": 20073 }, { "epoch": 0.51, "grad_norm": 1.2882596254348755, "learning_rate": 1.001577270787147e-05, "loss": 0.5867, "step": 20074 }, { "epoch": 0.51, "grad_norm": 1.4814659357070923, "learning_rate": 1.0014942565986932e-05, "loss": 0.4434, "step": 20075 }, { "epoch": 0.51, "grad_norm": 1.2767932415008545, "learning_rate": 1.0014112423999415e-05, "loss": 0.5181, "step": 20076 }, { "epoch": 0.51, "grad_norm": 2.326792001724243, "learning_rate": 1.0013282281914647e-05, "loss": 0.5904, "step": 20077 }, { "epoch": 0.51, "grad_norm": 1.1790794134140015, "learning_rate": 1.0012452139738347e-05, "loss": 0.4907, "step": 20078 }, { "epoch": 0.51, "grad_norm": 1.3798432350158691, "learning_rate": 1.001162199747623e-05, "loss": 0.4931, "step": 20079 }, { "epoch": 0.51, "grad_norm": 3.957979440689087, "learning_rate": 1.0010791855134027e-05, "loss": 0.5875, "step": 20080 }, { "epoch": 0.51, "grad_norm": 1.8060473203659058, "learning_rate": 1.0009961712717451e-05, "loss": 0.5073, "step": 20081 }, { "epoch": 0.51, "grad_norm": 1.3129785060882568, "learning_rate": 1.0009131570232223e-05, "loss": 0.5735, "step": 20082 }, { "epoch": 0.51, "grad_norm": 1.8538905382156372, "learning_rate": 1.0008301427684069e-05, "loss": 0.5224, "step": 20083 }, { "epoch": 0.51, "grad_norm": 2.768113136291504, "learning_rate": 1.0007471285078704e-05, "loss": 0.5865, "step": 20084 }, { "epoch": 0.51, "grad_norm": 1.2313153743743896, "learning_rate": 1.0006641142421855e-05, "loss": 0.5906, "step": 20085 }, { "epoch": 0.51, "grad_norm": 1.0974189043045044, "learning_rate": 1.0005810999719236e-05, "loss": 0.5206, "step": 20086 }, { "epoch": 0.51, "grad_norm": 4.7446699142456055, "learning_rate": 1.0004980856976576e-05, "loss": 0.5114, "step": 20087 }, { "epoch": 0.51, "grad_norm": 1.3785020112991333, "learning_rate": 1.0004150714199587e-05, "loss": 0.4741, "step": 20088 }, { "epoch": 0.51, "grad_norm": 1.180321455001831, "learning_rate": 1.0003320571393994e-05, "loss": 0.5679, "step": 20089 }, { "epoch": 0.51, "grad_norm": 3.1406219005584717, "learning_rate": 1.0002490428565519e-05, "loss": 0.6983, "step": 20090 }, { "epoch": 0.51, "grad_norm": 1.3175691366195679, "learning_rate": 1.0001660285719882e-05, "loss": 0.4979, "step": 20091 }, { "epoch": 0.51, "grad_norm": 1.8219207525253296, "learning_rate": 1.0000830142862802e-05, "loss": 0.7557, "step": 20092 }, { "epoch": 0.51, "grad_norm": 1.9218961000442505, "learning_rate": 1e-05, "loss": 0.5485, "step": 20093 }, { "epoch": 0.52, "grad_norm": 4.2926859855651855, "learning_rate": 9.9991698571372e-06, "loss": 0.5409, "step": 20094 }, { "epoch": 0.52, "grad_norm": 1.3713322877883911, "learning_rate": 9.998339714280122e-06, "loss": 0.54, "step": 20095 }, { "epoch": 0.52, "grad_norm": 1.8244434595108032, "learning_rate": 9.997509571434483e-06, "loss": 0.5666, "step": 20096 }, { "epoch": 0.52, "grad_norm": 1.7262895107269287, "learning_rate": 9.996679428606008e-06, "loss": 0.4706, "step": 20097 }, { "epoch": 0.52, "grad_norm": 1.462113618850708, "learning_rate": 9.995849285800417e-06, "loss": 0.4646, "step": 20098 }, { "epoch": 0.52, "grad_norm": 7.9056782722473145, "learning_rate": 9.995019143023426e-06, "loss": 0.8617, "step": 20099 }, { "epoch": 0.52, "grad_norm": 1.6905579566955566, "learning_rate": 9.994189000280766e-06, "loss": 0.6261, "step": 20100 }, { "epoch": 0.52, "grad_norm": 5.320907115936279, "learning_rate": 9.993358857578147e-06, "loss": 0.5905, "step": 20101 }, { "epoch": 0.52, "grad_norm": 0.919093668460846, "learning_rate": 9.992528714921298e-06, "loss": 0.4723, "step": 20102 }, { "epoch": 0.52, "grad_norm": 1.6451137065887451, "learning_rate": 9.991698572315933e-06, "loss": 0.5657, "step": 20103 }, { "epoch": 0.52, "grad_norm": 4.502850532531738, "learning_rate": 9.990868429767779e-06, "loss": 0.5994, "step": 20104 }, { "epoch": 0.52, "grad_norm": 3.565873861312866, "learning_rate": 9.990038287282554e-06, "loss": 0.6407, "step": 20105 }, { "epoch": 0.52, "grad_norm": 1.2803176641464233, "learning_rate": 9.989208144865976e-06, "loss": 0.5956, "step": 20106 }, { "epoch": 0.52, "grad_norm": 6.073367595672607, "learning_rate": 9.988378002523773e-06, "loss": 0.699, "step": 20107 }, { "epoch": 0.52, "grad_norm": 1.4248207807540894, "learning_rate": 9.987547860261658e-06, "loss": 0.6092, "step": 20108 }, { "epoch": 0.52, "grad_norm": 1.5749976634979248, "learning_rate": 9.986717718085358e-06, "loss": 0.5188, "step": 20109 }, { "epoch": 0.52, "grad_norm": 6.795104503631592, "learning_rate": 9.985887576000586e-06, "loss": 0.5875, "step": 20110 }, { "epoch": 0.52, "grad_norm": 1.2850152254104614, "learning_rate": 9.985057434013071e-06, "loss": 0.7003, "step": 20111 }, { "epoch": 0.52, "grad_norm": 2.9894163608551025, "learning_rate": 9.984227292128533e-06, "loss": 0.5874, "step": 20112 }, { "epoch": 0.52, "grad_norm": 3.9686238765716553, "learning_rate": 9.983397150352688e-06, "loss": 0.6654, "step": 20113 }, { "epoch": 0.52, "grad_norm": 1.576589584350586, "learning_rate": 9.982567008691262e-06, "loss": 0.6235, "step": 20114 }, { "epoch": 0.52, "grad_norm": 6.066977500915527, "learning_rate": 9.981736867149971e-06, "loss": 0.7267, "step": 20115 }, { "epoch": 0.52, "grad_norm": 1.713375449180603, "learning_rate": 9.980906725734539e-06, "loss": 0.6878, "step": 20116 }, { "epoch": 0.52, "grad_norm": 1.9802862405776978, "learning_rate": 9.980076584450684e-06, "loss": 0.6642, "step": 20117 }, { "epoch": 0.52, "grad_norm": 1.0532035827636719, "learning_rate": 9.979246443304129e-06, "loss": 0.4321, "step": 20118 }, { "epoch": 0.52, "grad_norm": 1.7871325016021729, "learning_rate": 9.978416302300597e-06, "loss": 0.5326, "step": 20119 }, { "epoch": 0.52, "grad_norm": 1.5668805837631226, "learning_rate": 9.977586161445804e-06, "loss": 0.5961, "step": 20120 }, { "epoch": 0.52, "grad_norm": 1.4576542377471924, "learning_rate": 9.976756020745475e-06, "loss": 0.5146, "step": 20121 }, { "epoch": 0.52, "grad_norm": 7.806443691253662, "learning_rate": 9.975925880205328e-06, "loss": 0.5233, "step": 20122 }, { "epoch": 0.52, "grad_norm": 1.3186484575271606, "learning_rate": 9.975095739831082e-06, "loss": 0.593, "step": 20123 }, { "epoch": 0.52, "grad_norm": 1.6354517936706543, "learning_rate": 9.974265599628463e-06, "loss": 0.5902, "step": 20124 }, { "epoch": 0.52, "grad_norm": 1.7057279348373413, "learning_rate": 9.97343545960319e-06, "loss": 0.6298, "step": 20125 }, { "epoch": 0.52, "grad_norm": 2.798278331756592, "learning_rate": 9.972605319760984e-06, "loss": 0.6887, "step": 20126 }, { "epoch": 0.52, "grad_norm": 2.5449817180633545, "learning_rate": 9.971775180107563e-06, "loss": 0.6347, "step": 20127 }, { "epoch": 0.52, "grad_norm": 2.1703133583068848, "learning_rate": 9.970945040648651e-06, "loss": 0.6052, "step": 20128 }, { "epoch": 0.52, "grad_norm": 2.0104598999023438, "learning_rate": 9.970114901389965e-06, "loss": 0.5353, "step": 20129 }, { "epoch": 0.52, "grad_norm": 4.339698791503906, "learning_rate": 9.96928476233723e-06, "loss": 0.6644, "step": 20130 }, { "epoch": 0.52, "grad_norm": 2.9675819873809814, "learning_rate": 9.968454623496165e-06, "loss": 0.63, "step": 20131 }, { "epoch": 0.52, "grad_norm": 5.878174304962158, "learning_rate": 9.967624484872491e-06, "loss": 0.6078, "step": 20132 }, { "epoch": 0.52, "grad_norm": 4.393277168273926, "learning_rate": 9.96679434647193e-06, "loss": 0.3921, "step": 20133 }, { "epoch": 0.52, "grad_norm": 1.8225345611572266, "learning_rate": 9.9659642083002e-06, "loss": 0.616, "step": 20134 }, { "epoch": 0.52, "grad_norm": 6.360684394836426, "learning_rate": 9.965134070363022e-06, "loss": 0.5365, "step": 20135 }, { "epoch": 0.52, "grad_norm": 1.4892345666885376, "learning_rate": 9.96430393266612e-06, "loss": 0.4933, "step": 20136 }, { "epoch": 0.52, "grad_norm": 1.8712893724441528, "learning_rate": 9.963473795215212e-06, "loss": 0.5651, "step": 20137 }, { "epoch": 0.52, "grad_norm": 1.3452277183532715, "learning_rate": 9.962643658016022e-06, "loss": 0.4922, "step": 20138 }, { "epoch": 0.52, "grad_norm": 1.948409914970398, "learning_rate": 9.961813521074265e-06, "loss": 0.4613, "step": 20139 }, { "epoch": 0.52, "grad_norm": 2.0398595333099365, "learning_rate": 9.960983384395668e-06, "loss": 0.4863, "step": 20140 }, { "epoch": 0.52, "grad_norm": 1.7219288349151611, "learning_rate": 9.960153247985946e-06, "loss": 0.5119, "step": 20141 }, { "epoch": 0.52, "grad_norm": 1.59800386428833, "learning_rate": 9.959323111850825e-06, "loss": 0.6394, "step": 20142 }, { "epoch": 0.52, "grad_norm": 2.6525187492370605, "learning_rate": 9.958492975996024e-06, "loss": 0.7227, "step": 20143 }, { "epoch": 0.52, "grad_norm": 1.5714020729064941, "learning_rate": 9.957662840427261e-06, "loss": 0.5748, "step": 20144 }, { "epoch": 0.52, "grad_norm": 6.773614406585693, "learning_rate": 9.956832705150262e-06, "loss": 0.6014, "step": 20145 }, { "epoch": 0.52, "grad_norm": 4.446954727172852, "learning_rate": 9.956002570170743e-06, "loss": 0.7869, "step": 20146 }, { "epoch": 0.52, "grad_norm": 2.4126617908477783, "learning_rate": 9.955172435494425e-06, "loss": 0.6421, "step": 20147 }, { "epoch": 0.52, "grad_norm": 1.8905495405197144, "learning_rate": 9.954342301127035e-06, "loss": 0.685, "step": 20148 }, { "epoch": 0.52, "grad_norm": 4.346289157867432, "learning_rate": 9.953512167074286e-06, "loss": 0.5736, "step": 20149 }, { "epoch": 0.52, "grad_norm": 6.021164894104004, "learning_rate": 9.952682033341903e-06, "loss": 0.4684, "step": 20150 }, { "epoch": 0.52, "grad_norm": 1.098257064819336, "learning_rate": 9.951851899935604e-06, "loss": 0.523, "step": 20151 }, { "epoch": 0.52, "grad_norm": 1.063724160194397, "learning_rate": 9.951021766861112e-06, "loss": 0.4323, "step": 20152 }, { "epoch": 0.52, "grad_norm": 0.9723995327949524, "learning_rate": 9.950191634124149e-06, "loss": 0.4669, "step": 20153 }, { "epoch": 0.52, "grad_norm": 3.914412260055542, "learning_rate": 9.949361501730432e-06, "loss": 0.6385, "step": 20154 }, { "epoch": 0.52, "grad_norm": 1.2478543519973755, "learning_rate": 9.948531369685686e-06, "loss": 0.5477, "step": 20155 }, { "epoch": 0.52, "grad_norm": 1.855725884437561, "learning_rate": 9.947701237995627e-06, "loss": 0.6758, "step": 20156 }, { "epoch": 0.52, "grad_norm": 1.687538981437683, "learning_rate": 9.94687110666598e-06, "loss": 0.4619, "step": 20157 }, { "epoch": 0.52, "grad_norm": 1.0472842454910278, "learning_rate": 9.946040975702463e-06, "loss": 0.5261, "step": 20158 }, { "epoch": 0.52, "grad_norm": 1.7008742094039917, "learning_rate": 9.945210845110797e-06, "loss": 0.5512, "step": 20159 }, { "epoch": 0.52, "grad_norm": 1.6617826223373413, "learning_rate": 9.944380714896707e-06, "loss": 0.5425, "step": 20160 }, { "epoch": 0.52, "grad_norm": 3.8737423419952393, "learning_rate": 9.943550585065905e-06, "loss": 0.8419, "step": 20161 }, { "epoch": 0.52, "grad_norm": 2.107300281524658, "learning_rate": 9.94272045562412e-06, "loss": 0.7327, "step": 20162 }, { "epoch": 0.52, "grad_norm": 3.3247735500335693, "learning_rate": 9.94189032657707e-06, "loss": 0.6952, "step": 20163 }, { "epoch": 0.52, "grad_norm": 1.6992180347442627, "learning_rate": 9.941060197930474e-06, "loss": 0.4114, "step": 20164 }, { "epoch": 0.52, "grad_norm": 1.315132737159729, "learning_rate": 9.940230069690055e-06, "loss": 0.5571, "step": 20165 }, { "epoch": 0.52, "grad_norm": 2.85567569732666, "learning_rate": 9.939399941861532e-06, "loss": 0.5119, "step": 20166 }, { "epoch": 0.52, "grad_norm": 1.2205142974853516, "learning_rate": 9.938569814450627e-06, "loss": 0.454, "step": 20167 }, { "epoch": 0.52, "grad_norm": 2.1311731338500977, "learning_rate": 9.93773968746306e-06, "loss": 0.4879, "step": 20168 }, { "epoch": 0.52, "grad_norm": 1.2877792119979858, "learning_rate": 9.936909560904553e-06, "loss": 0.4791, "step": 20169 }, { "epoch": 0.52, "grad_norm": 1.9676074981689453, "learning_rate": 9.936079434780823e-06, "loss": 0.6061, "step": 20170 }, { "epoch": 0.52, "grad_norm": 1.2947221994400024, "learning_rate": 9.935249309097595e-06, "loss": 0.5641, "step": 20171 }, { "epoch": 0.52, "grad_norm": 1.45624577999115, "learning_rate": 9.93441918386059e-06, "loss": 0.6104, "step": 20172 }, { "epoch": 0.52, "grad_norm": 2.5846738815307617, "learning_rate": 9.933589059075524e-06, "loss": 0.7686, "step": 20173 }, { "epoch": 0.52, "grad_norm": 1.17795729637146, "learning_rate": 9.932758934748123e-06, "loss": 0.5161, "step": 20174 }, { "epoch": 0.52, "grad_norm": 1.6318436861038208, "learning_rate": 9.931928810884102e-06, "loss": 0.6023, "step": 20175 }, { "epoch": 0.52, "grad_norm": 1.3020954132080078, "learning_rate": 9.931098687489186e-06, "loss": 0.5256, "step": 20176 }, { "epoch": 0.52, "grad_norm": 2.199267625808716, "learning_rate": 9.930268564569096e-06, "loss": 0.6337, "step": 20177 }, { "epoch": 0.52, "grad_norm": 3.3114395141601562, "learning_rate": 9.929438442129549e-06, "loss": 0.5438, "step": 20178 }, { "epoch": 0.52, "grad_norm": 2.3783440589904785, "learning_rate": 9.92860832017627e-06, "loss": 0.6755, "step": 20179 }, { "epoch": 0.52, "grad_norm": 0.8618102669715881, "learning_rate": 9.927778198714976e-06, "loss": 0.5128, "step": 20180 }, { "epoch": 0.52, "grad_norm": 2.310999631881714, "learning_rate": 9.926948077751391e-06, "loss": 0.6675, "step": 20181 }, { "epoch": 0.52, "grad_norm": 2.8271443843841553, "learning_rate": 9.926117957291234e-06, "loss": 0.7, "step": 20182 }, { "epoch": 0.52, "grad_norm": 1.6189111471176147, "learning_rate": 9.925287837340223e-06, "loss": 0.5335, "step": 20183 }, { "epoch": 0.52, "grad_norm": 1.1666984558105469, "learning_rate": 9.924457717904084e-06, "loss": 0.5199, "step": 20184 }, { "epoch": 0.52, "grad_norm": 9.087242126464844, "learning_rate": 9.923627598988534e-06, "loss": 0.5025, "step": 20185 }, { "epoch": 0.52, "grad_norm": 1.5620445013046265, "learning_rate": 9.922797480599296e-06, "loss": 0.4341, "step": 20186 }, { "epoch": 0.52, "grad_norm": 2.1202335357666016, "learning_rate": 9.921967362742086e-06, "loss": 0.5173, "step": 20187 }, { "epoch": 0.52, "grad_norm": 1.384762167930603, "learning_rate": 9.921137245422628e-06, "loss": 0.6508, "step": 20188 }, { "epoch": 0.52, "grad_norm": 1.919460654258728, "learning_rate": 9.920307128646645e-06, "loss": 0.619, "step": 20189 }, { "epoch": 0.52, "grad_norm": 2.030341386795044, "learning_rate": 9.919477012419855e-06, "loss": 0.535, "step": 20190 }, { "epoch": 0.52, "grad_norm": 3.077615261077881, "learning_rate": 9.918646896747978e-06, "loss": 0.6075, "step": 20191 }, { "epoch": 0.52, "grad_norm": 4.08862829208374, "learning_rate": 9.917816781636736e-06, "loss": 0.6198, "step": 20192 }, { "epoch": 0.52, "grad_norm": 1.1376855373382568, "learning_rate": 9.916986667091848e-06, "loss": 0.5364, "step": 20193 }, { "epoch": 0.52, "grad_norm": 1.000274896621704, "learning_rate": 9.916156553119038e-06, "loss": 0.6164, "step": 20194 }, { "epoch": 0.52, "grad_norm": 10.838167190551758, "learning_rate": 9.915326439724023e-06, "loss": 0.4918, "step": 20195 }, { "epoch": 0.52, "grad_norm": 1.2818313837051392, "learning_rate": 9.914496326912526e-06, "loss": 0.4752, "step": 20196 }, { "epoch": 0.52, "grad_norm": 0.9198343753814697, "learning_rate": 9.913666214690263e-06, "loss": 0.5868, "step": 20197 }, { "epoch": 0.52, "grad_norm": 1.509845495223999, "learning_rate": 9.912836103062962e-06, "loss": 0.5737, "step": 20198 }, { "epoch": 0.52, "grad_norm": 2.6970582008361816, "learning_rate": 9.91200599203634e-06, "loss": 0.6219, "step": 20199 }, { "epoch": 0.52, "grad_norm": 1.221207618713379, "learning_rate": 9.911175881616114e-06, "loss": 0.4267, "step": 20200 }, { "epoch": 0.52, "grad_norm": 2.379847764968872, "learning_rate": 9.910345771808011e-06, "loss": 0.5328, "step": 20201 }, { "epoch": 0.52, "grad_norm": 2.9128589630126953, "learning_rate": 9.909515662617747e-06, "loss": 0.6111, "step": 20202 }, { "epoch": 0.52, "grad_norm": 3.683154582977295, "learning_rate": 9.908685554051047e-06, "loss": 0.6289, "step": 20203 }, { "epoch": 0.52, "grad_norm": 2.538201332092285, "learning_rate": 9.907855446113624e-06, "loss": 0.5023, "step": 20204 }, { "epoch": 0.52, "grad_norm": 1.2344616651535034, "learning_rate": 9.907025338811206e-06, "loss": 0.5589, "step": 20205 }, { "epoch": 0.52, "grad_norm": 1.3944518566131592, "learning_rate": 9.906195232149511e-06, "loss": 0.5032, "step": 20206 }, { "epoch": 0.52, "grad_norm": 4.94588041305542, "learning_rate": 9.90536512613426e-06, "loss": 0.7315, "step": 20207 }, { "epoch": 0.52, "grad_norm": 7.921760082244873, "learning_rate": 9.904535020771173e-06, "loss": 0.5567, "step": 20208 }, { "epoch": 0.52, "grad_norm": 1.0506223440170288, "learning_rate": 9.903704916065969e-06, "loss": 0.3983, "step": 20209 }, { "epoch": 0.52, "grad_norm": 1.5851205587387085, "learning_rate": 9.902874812024373e-06, "loss": 0.5664, "step": 20210 }, { "epoch": 0.52, "grad_norm": 1.0733098983764648, "learning_rate": 9.9020447086521e-06, "loss": 0.5047, "step": 20211 }, { "epoch": 0.52, "grad_norm": 1.8338439464569092, "learning_rate": 9.901214605954874e-06, "loss": 0.5946, "step": 20212 }, { "epoch": 0.52, "grad_norm": 2.324444532394409, "learning_rate": 9.900384503938417e-06, "loss": 0.6173, "step": 20213 }, { "epoch": 0.52, "grad_norm": 1.2003285884857178, "learning_rate": 9.899554402608445e-06, "loss": 0.4387, "step": 20214 }, { "epoch": 0.52, "grad_norm": 2.697075843811035, "learning_rate": 9.898724301970686e-06, "loss": 0.5249, "step": 20215 }, { "epoch": 0.52, "grad_norm": 2.9342665672302246, "learning_rate": 9.897894202030852e-06, "loss": 0.6862, "step": 20216 }, { "epoch": 0.52, "grad_norm": 1.7380027770996094, "learning_rate": 9.897064102794665e-06, "loss": 0.54, "step": 20217 }, { "epoch": 0.52, "grad_norm": 2.424304246902466, "learning_rate": 9.896234004267848e-06, "loss": 0.4959, "step": 20218 }, { "epoch": 0.52, "grad_norm": 3.744878053665161, "learning_rate": 9.895403906456124e-06, "loss": 0.6795, "step": 20219 }, { "epoch": 0.52, "grad_norm": 2.139991283416748, "learning_rate": 9.894573809365207e-06, "loss": 0.6037, "step": 20220 }, { "epoch": 0.52, "grad_norm": 1.7656461000442505, "learning_rate": 9.893743713000824e-06, "loss": 0.683, "step": 20221 }, { "epoch": 0.52, "grad_norm": 3.1931025981903076, "learning_rate": 9.89291361736869e-06, "loss": 0.6532, "step": 20222 }, { "epoch": 0.52, "grad_norm": 1.2727599143981934, "learning_rate": 9.89208352247453e-06, "loss": 0.5038, "step": 20223 }, { "epoch": 0.52, "grad_norm": 1.3748835325241089, "learning_rate": 9.891253428324061e-06, "loss": 0.5388, "step": 20224 }, { "epoch": 0.52, "grad_norm": 2.5215208530426025, "learning_rate": 9.890423334923005e-06, "loss": 0.6993, "step": 20225 }, { "epoch": 0.52, "grad_norm": 2.831099271774292, "learning_rate": 9.889593242277085e-06, "loss": 0.5389, "step": 20226 }, { "epoch": 0.52, "grad_norm": 1.4546140432357788, "learning_rate": 9.888763150392016e-06, "loss": 0.4457, "step": 20227 }, { "epoch": 0.52, "grad_norm": 1.327240228652954, "learning_rate": 9.887933059273523e-06, "loss": 0.5362, "step": 20228 }, { "epoch": 0.52, "grad_norm": 3.1865484714508057, "learning_rate": 9.887102968927325e-06, "loss": 0.7004, "step": 20229 }, { "epoch": 0.52, "grad_norm": 3.1310834884643555, "learning_rate": 9.88627287935914e-06, "loss": 0.5735, "step": 20230 }, { "epoch": 0.52, "grad_norm": 1.1531429290771484, "learning_rate": 9.885442790574695e-06, "loss": 0.5957, "step": 20231 }, { "epoch": 0.52, "grad_norm": 1.4191657304763794, "learning_rate": 9.884612702579704e-06, "loss": 0.615, "step": 20232 }, { "epoch": 0.52, "grad_norm": 1.6391890048980713, "learning_rate": 9.883782615379891e-06, "loss": 0.6154, "step": 20233 }, { "epoch": 0.52, "grad_norm": 3.29459547996521, "learning_rate": 9.882952528980973e-06, "loss": 0.6017, "step": 20234 }, { "epoch": 0.52, "grad_norm": 2.4597256183624268, "learning_rate": 9.882122443388673e-06, "loss": 0.6679, "step": 20235 }, { "epoch": 0.52, "grad_norm": 1.6880698204040527, "learning_rate": 9.881292358608714e-06, "loss": 0.4301, "step": 20236 }, { "epoch": 0.52, "grad_norm": 5.876855373382568, "learning_rate": 9.88046227464681e-06, "loss": 0.6639, "step": 20237 }, { "epoch": 0.52, "grad_norm": 3.1078901290893555, "learning_rate": 9.879632191508688e-06, "loss": 0.6934, "step": 20238 }, { "epoch": 0.52, "grad_norm": 1.0892589092254639, "learning_rate": 9.878802109200063e-06, "loss": 0.4875, "step": 20239 }, { "epoch": 0.52, "grad_norm": 3.099468231201172, "learning_rate": 9.877972027726659e-06, "loss": 0.3906, "step": 20240 }, { "epoch": 0.52, "grad_norm": 1.5146541595458984, "learning_rate": 9.877141947094194e-06, "loss": 0.5869, "step": 20241 }, { "epoch": 0.52, "grad_norm": 2.5043442249298096, "learning_rate": 9.87631186730839e-06, "loss": 0.5205, "step": 20242 }, { "epoch": 0.52, "grad_norm": 2.8396127223968506, "learning_rate": 9.875481788374967e-06, "loss": 0.5293, "step": 20243 }, { "epoch": 0.52, "grad_norm": 2.264209747314453, "learning_rate": 9.874651710299644e-06, "loss": 0.4052, "step": 20244 }, { "epoch": 0.52, "grad_norm": 6.371861934661865, "learning_rate": 9.873821633088145e-06, "loss": 0.4412, "step": 20245 }, { "epoch": 0.52, "grad_norm": 2.428257465362549, "learning_rate": 9.872991556746187e-06, "loss": 0.6865, "step": 20246 }, { "epoch": 0.52, "grad_norm": 2.174487590789795, "learning_rate": 9.872161481279489e-06, "loss": 0.4235, "step": 20247 }, { "epoch": 0.52, "grad_norm": 1.4644107818603516, "learning_rate": 9.871331406693779e-06, "loss": 0.4978, "step": 20248 }, { "epoch": 0.52, "grad_norm": 3.693087100982666, "learning_rate": 9.870501332994768e-06, "loss": 0.5919, "step": 20249 }, { "epoch": 0.52, "grad_norm": 4.457208156585693, "learning_rate": 9.869671260188183e-06, "loss": 0.4729, "step": 20250 }, { "epoch": 0.52, "grad_norm": 1.6637334823608398, "learning_rate": 9.86884118827974e-06, "loss": 0.4259, "step": 20251 }, { "epoch": 0.52, "grad_norm": 1.2779107093811035, "learning_rate": 9.868011117275162e-06, "loss": 0.5495, "step": 20252 }, { "epoch": 0.52, "grad_norm": 1.9533510208129883, "learning_rate": 9.867181047180168e-06, "loss": 0.6022, "step": 20253 }, { "epoch": 0.52, "grad_norm": 1.6725579500198364, "learning_rate": 9.866350978000478e-06, "loss": 0.5297, "step": 20254 }, { "epoch": 0.52, "grad_norm": 1.2201112508773804, "learning_rate": 9.865520909741816e-06, "loss": 0.5431, "step": 20255 }, { "epoch": 0.52, "grad_norm": 4.696491241455078, "learning_rate": 9.864690842409894e-06, "loss": 0.6288, "step": 20256 }, { "epoch": 0.52, "grad_norm": 1.3109439611434937, "learning_rate": 9.863860776010444e-06, "loss": 0.5949, "step": 20257 }, { "epoch": 0.52, "grad_norm": 1.674823522567749, "learning_rate": 9.863030710549176e-06, "loss": 0.5471, "step": 20258 }, { "epoch": 0.52, "grad_norm": 3.0262579917907715, "learning_rate": 9.862200646031814e-06, "loss": 0.6102, "step": 20259 }, { "epoch": 0.52, "grad_norm": 1.9997146129608154, "learning_rate": 9.861370582464081e-06, "loss": 0.5501, "step": 20260 }, { "epoch": 0.52, "grad_norm": 3.1836814880371094, "learning_rate": 9.860540519851692e-06, "loss": 0.5075, "step": 20261 }, { "epoch": 0.52, "grad_norm": 1.407561182975769, "learning_rate": 9.859710458200373e-06, "loss": 0.4492, "step": 20262 }, { "epoch": 0.52, "grad_norm": 1.6656945943832397, "learning_rate": 9.85888039751584e-06, "loss": 0.5367, "step": 20263 }, { "epoch": 0.52, "grad_norm": 1.10928213596344, "learning_rate": 9.858050337803817e-06, "loss": 0.5371, "step": 20264 }, { "epoch": 0.52, "grad_norm": 1.6717803478240967, "learning_rate": 9.857220279070018e-06, "loss": 0.3982, "step": 20265 }, { "epoch": 0.52, "grad_norm": 3.411318778991699, "learning_rate": 9.856390221320168e-06, "loss": 0.5188, "step": 20266 }, { "epoch": 0.52, "grad_norm": 1.117350459098816, "learning_rate": 9.855560164559989e-06, "loss": 0.4372, "step": 20267 }, { "epoch": 0.52, "grad_norm": 1.163708209991455, "learning_rate": 9.854730108795195e-06, "loss": 0.5547, "step": 20268 }, { "epoch": 0.52, "grad_norm": 6.854141712188721, "learning_rate": 9.853900054031514e-06, "loss": 0.6261, "step": 20269 }, { "epoch": 0.52, "grad_norm": 1.7594777345657349, "learning_rate": 9.853070000274657e-06, "loss": 0.5226, "step": 20270 }, { "epoch": 0.52, "grad_norm": 1.6050535440444946, "learning_rate": 9.852239947530352e-06, "loss": 0.7165, "step": 20271 }, { "epoch": 0.52, "grad_norm": 0.9751092195510864, "learning_rate": 9.851409895804316e-06, "loss": 0.464, "step": 20272 }, { "epoch": 0.52, "grad_norm": 1.5633097887039185, "learning_rate": 9.85057984510227e-06, "loss": 0.6981, "step": 20273 }, { "epoch": 0.52, "grad_norm": 1.533193826675415, "learning_rate": 9.849749795429936e-06, "loss": 0.5812, "step": 20274 }, { "epoch": 0.52, "grad_norm": 1.8680317401885986, "learning_rate": 9.848919746793029e-06, "loss": 0.5646, "step": 20275 }, { "epoch": 0.52, "grad_norm": 7.644049644470215, "learning_rate": 9.848089699197275e-06, "loss": 0.4916, "step": 20276 }, { "epoch": 0.52, "grad_norm": 1.3539479970932007, "learning_rate": 9.847259652648387e-06, "loss": 0.6139, "step": 20277 }, { "epoch": 0.52, "grad_norm": 0.945522129535675, "learning_rate": 9.846429607152092e-06, "loss": 0.567, "step": 20278 }, { "epoch": 0.52, "grad_norm": 1.5593225955963135, "learning_rate": 9.845599562714109e-06, "loss": 0.5281, "step": 20279 }, { "epoch": 0.52, "grad_norm": 1.1083829402923584, "learning_rate": 9.844769519340153e-06, "loss": 0.5683, "step": 20280 }, { "epoch": 0.52, "grad_norm": 1.4198869466781616, "learning_rate": 9.843939477035953e-06, "loss": 0.5839, "step": 20281 }, { "epoch": 0.52, "grad_norm": 1.4268678426742554, "learning_rate": 9.84310943580722e-06, "loss": 0.656, "step": 20282 }, { "epoch": 0.52, "grad_norm": 2.2314679622650146, "learning_rate": 9.842279395659679e-06, "loss": 0.6854, "step": 20283 }, { "epoch": 0.52, "grad_norm": 1.6202789545059204, "learning_rate": 9.84144935659905e-06, "loss": 0.6587, "step": 20284 }, { "epoch": 0.52, "grad_norm": 1.1892284154891968, "learning_rate": 9.840619318631053e-06, "loss": 0.377, "step": 20285 }, { "epoch": 0.52, "grad_norm": 1.550155758857727, "learning_rate": 9.839789281761409e-06, "loss": 0.4819, "step": 20286 }, { "epoch": 0.52, "grad_norm": 3.2255284786224365, "learning_rate": 9.838959245995833e-06, "loss": 0.6368, "step": 20287 }, { "epoch": 0.52, "grad_norm": 1.5392224788665771, "learning_rate": 9.838129211340049e-06, "loss": 0.5504, "step": 20288 }, { "epoch": 0.52, "grad_norm": 1.39189875125885, "learning_rate": 9.83729917779978e-06, "loss": 0.4976, "step": 20289 }, { "epoch": 0.52, "grad_norm": 1.958403468132019, "learning_rate": 9.836469145380739e-06, "loss": 0.4829, "step": 20290 }, { "epoch": 0.52, "grad_norm": 2.2600886821746826, "learning_rate": 9.835639114088653e-06, "loss": 0.5476, "step": 20291 }, { "epoch": 0.52, "grad_norm": 4.041697978973389, "learning_rate": 9.834809083929236e-06, "loss": 0.6297, "step": 20292 }, { "epoch": 0.52, "grad_norm": 1.9095250368118286, "learning_rate": 9.833979054908215e-06, "loss": 0.5902, "step": 20293 }, { "epoch": 0.52, "grad_norm": 1.428835391998291, "learning_rate": 9.833149027031303e-06, "loss": 0.5353, "step": 20294 }, { "epoch": 0.52, "grad_norm": 4.245733261108398, "learning_rate": 9.832319000304223e-06, "loss": 0.5403, "step": 20295 }, { "epoch": 0.52, "grad_norm": 1.1452454328536987, "learning_rate": 9.831488974732698e-06, "loss": 0.4059, "step": 20296 }, { "epoch": 0.52, "grad_norm": 2.2411370277404785, "learning_rate": 9.830658950322441e-06, "loss": 0.6274, "step": 20297 }, { "epoch": 0.52, "grad_norm": 1.1681877374649048, "learning_rate": 9.829828927079179e-06, "loss": 0.6095, "step": 20298 }, { "epoch": 0.52, "grad_norm": 1.1242574453353882, "learning_rate": 9.828998905008627e-06, "loss": 0.5578, "step": 20299 }, { "epoch": 0.52, "grad_norm": 2.254011869430542, "learning_rate": 9.828168884116508e-06, "loss": 0.508, "step": 20300 }, { "epoch": 0.52, "grad_norm": 1.2619479894638062, "learning_rate": 9.827338864408543e-06, "loss": 0.4013, "step": 20301 }, { "epoch": 0.52, "grad_norm": 3.5487732887268066, "learning_rate": 9.826508845890447e-06, "loss": 0.5382, "step": 20302 }, { "epoch": 0.52, "grad_norm": 4.703164100646973, "learning_rate": 9.825678828567947e-06, "loss": 0.5954, "step": 20303 }, { "epoch": 0.52, "grad_norm": 2.361546277999878, "learning_rate": 9.824848812446755e-06, "loss": 0.7298, "step": 20304 }, { "epoch": 0.52, "grad_norm": 3.041015863418579, "learning_rate": 9.824018797532598e-06, "loss": 0.4867, "step": 20305 }, { "epoch": 0.52, "grad_norm": 1.260692834854126, "learning_rate": 9.82318878383119e-06, "loss": 0.6406, "step": 20306 }, { "epoch": 0.52, "grad_norm": 3.0911989212036133, "learning_rate": 9.822358771348256e-06, "loss": 0.5536, "step": 20307 }, { "epoch": 0.52, "grad_norm": 4.3005290031433105, "learning_rate": 9.821528760089514e-06, "loss": 0.6866, "step": 20308 }, { "epoch": 0.52, "grad_norm": 1.6032311916351318, "learning_rate": 9.82069875006068e-06, "loss": 0.5593, "step": 20309 }, { "epoch": 0.52, "grad_norm": 1.9571536779403687, "learning_rate": 9.819868741267483e-06, "loss": 0.5209, "step": 20310 }, { "epoch": 0.52, "grad_norm": 1.970090389251709, "learning_rate": 9.819038733715634e-06, "loss": 0.555, "step": 20311 }, { "epoch": 0.52, "grad_norm": 3.8074328899383545, "learning_rate": 9.818208727410857e-06, "loss": 0.6683, "step": 20312 }, { "epoch": 0.52, "grad_norm": 3.590250253677368, "learning_rate": 9.81737872235887e-06, "loss": 0.4884, "step": 20313 }, { "epoch": 0.52, "grad_norm": 1.0601190328598022, "learning_rate": 9.816548718565396e-06, "loss": 0.5584, "step": 20314 }, { "epoch": 0.52, "grad_norm": 2.786604881286621, "learning_rate": 9.815718716036154e-06, "loss": 0.5028, "step": 20315 }, { "epoch": 0.52, "grad_norm": 2.3064017295837402, "learning_rate": 9.81488871477686e-06, "loss": 0.675, "step": 20316 }, { "epoch": 0.52, "grad_norm": 4.470898628234863, "learning_rate": 9.814058714793238e-06, "loss": 0.6914, "step": 20317 }, { "epoch": 0.52, "grad_norm": 2.0622177124023438, "learning_rate": 9.813228716091006e-06, "loss": 0.6028, "step": 20318 }, { "epoch": 0.52, "grad_norm": 1.268560528755188, "learning_rate": 9.812398718675884e-06, "loss": 0.542, "step": 20319 }, { "epoch": 0.52, "grad_norm": 4.595201015472412, "learning_rate": 9.811568722553595e-06, "loss": 0.6624, "step": 20320 }, { "epoch": 0.52, "grad_norm": 1.4351680278778076, "learning_rate": 9.810738727729852e-06, "loss": 0.4409, "step": 20321 }, { "epoch": 0.52, "grad_norm": 1.649393081665039, "learning_rate": 9.809908734210382e-06, "loss": 0.4951, "step": 20322 }, { "epoch": 0.52, "grad_norm": 1.1325267553329468, "learning_rate": 9.8090787420009e-06, "loss": 0.4386, "step": 20323 }, { "epoch": 0.52, "grad_norm": 5.043456554412842, "learning_rate": 9.808248751107127e-06, "loss": 0.8655, "step": 20324 }, { "epoch": 0.52, "grad_norm": 1.1931880712509155, "learning_rate": 9.807418761534785e-06, "loss": 0.5536, "step": 20325 }, { "epoch": 0.52, "grad_norm": 2.5533957481384277, "learning_rate": 9.806588773289589e-06, "loss": 0.708, "step": 20326 }, { "epoch": 0.52, "grad_norm": 1.7508890628814697, "learning_rate": 9.805758786377266e-06, "loss": 0.7461, "step": 20327 }, { "epoch": 0.52, "grad_norm": 0.9874829053878784, "learning_rate": 9.804928800803527e-06, "loss": 0.5147, "step": 20328 }, { "epoch": 0.52, "grad_norm": 1.8428897857666016, "learning_rate": 9.804098816574098e-06, "loss": 0.6813, "step": 20329 }, { "epoch": 0.52, "grad_norm": 1.283440351486206, "learning_rate": 9.803268833694696e-06, "loss": 0.4259, "step": 20330 }, { "epoch": 0.52, "grad_norm": 1.0405514240264893, "learning_rate": 9.802438852171042e-06, "loss": 0.5768, "step": 20331 }, { "epoch": 0.52, "grad_norm": 3.962219476699829, "learning_rate": 9.801608872008857e-06, "loss": 0.5021, "step": 20332 }, { "epoch": 0.52, "grad_norm": 1.5070164203643799, "learning_rate": 9.800778893213857e-06, "loss": 0.5786, "step": 20333 }, { "epoch": 0.52, "grad_norm": 4.562404632568359, "learning_rate": 9.799948915791765e-06, "loss": 0.7011, "step": 20334 }, { "epoch": 0.52, "grad_norm": 1.329802393913269, "learning_rate": 9.7991189397483e-06, "loss": 0.4691, "step": 20335 }, { "epoch": 0.52, "grad_norm": 1.362004280090332, "learning_rate": 9.798288965089179e-06, "loss": 0.5197, "step": 20336 }, { "epoch": 0.52, "grad_norm": 2.5077219009399414, "learning_rate": 9.797458991820125e-06, "loss": 0.4365, "step": 20337 }, { "epoch": 0.52, "grad_norm": 2.1716127395629883, "learning_rate": 9.796629019946855e-06, "loss": 0.6082, "step": 20338 }, { "epoch": 0.52, "grad_norm": 1.4981426000595093, "learning_rate": 9.795799049475092e-06, "loss": 0.5668, "step": 20339 }, { "epoch": 0.52, "grad_norm": 3.743649482727051, "learning_rate": 9.794969080410553e-06, "loss": 0.7037, "step": 20340 }, { "epoch": 0.52, "grad_norm": 2.3389461040496826, "learning_rate": 9.794139112758955e-06, "loss": 0.6222, "step": 20341 }, { "epoch": 0.52, "grad_norm": 1.4069275856018066, "learning_rate": 9.793309146526027e-06, "loss": 0.5231, "step": 20342 }, { "epoch": 0.52, "grad_norm": 4.623563289642334, "learning_rate": 9.792479181717479e-06, "loss": 0.7277, "step": 20343 }, { "epoch": 0.52, "grad_norm": 2.619399070739746, "learning_rate": 9.791649218339034e-06, "loss": 0.6564, "step": 20344 }, { "epoch": 0.52, "grad_norm": 1.9839966297149658, "learning_rate": 9.79081925639641e-06, "loss": 0.6569, "step": 20345 }, { "epoch": 0.52, "grad_norm": 3.2710628509521484, "learning_rate": 9.789989295895333e-06, "loss": 0.6424, "step": 20346 }, { "epoch": 0.52, "grad_norm": 7.704067707061768, "learning_rate": 9.789159336841515e-06, "loss": 0.7604, "step": 20347 }, { "epoch": 0.52, "grad_norm": 2.0630950927734375, "learning_rate": 9.788329379240678e-06, "loss": 0.6343, "step": 20348 }, { "epoch": 0.52, "grad_norm": 1.6604881286621094, "learning_rate": 9.787499423098544e-06, "loss": 0.6269, "step": 20349 }, { "epoch": 0.52, "grad_norm": 1.6500599384307861, "learning_rate": 9.786669468420827e-06, "loss": 0.4601, "step": 20350 }, { "epoch": 0.52, "grad_norm": 3.1684255599975586, "learning_rate": 9.785839515213254e-06, "loss": 0.7828, "step": 20351 }, { "epoch": 0.52, "grad_norm": 3.0602171421051025, "learning_rate": 9.785009563481536e-06, "loss": 0.6903, "step": 20352 }, { "epoch": 0.52, "grad_norm": 2.7878236770629883, "learning_rate": 9.784179613231399e-06, "loss": 0.581, "step": 20353 }, { "epoch": 0.52, "grad_norm": 1.5256361961364746, "learning_rate": 9.783349664468564e-06, "loss": 0.4797, "step": 20354 }, { "epoch": 0.52, "grad_norm": 1.9352047443389893, "learning_rate": 9.782519717198741e-06, "loss": 0.4883, "step": 20355 }, { "epoch": 0.52, "grad_norm": 1.86716890335083, "learning_rate": 9.781689771427662e-06, "loss": 0.5194, "step": 20356 }, { "epoch": 0.52, "grad_norm": 2.3883955478668213, "learning_rate": 9.780859827161035e-06, "loss": 0.7014, "step": 20357 }, { "epoch": 0.52, "grad_norm": 1.2253283262252808, "learning_rate": 9.780029884404587e-06, "loss": 0.5611, "step": 20358 }, { "epoch": 0.52, "grad_norm": 1.8744006156921387, "learning_rate": 9.779199943164033e-06, "loss": 0.6737, "step": 20359 }, { "epoch": 0.52, "grad_norm": 5.217334747314453, "learning_rate": 9.778370003445094e-06, "loss": 1.2154, "step": 20360 }, { "epoch": 0.52, "grad_norm": 1.2833585739135742, "learning_rate": 9.77754006525349e-06, "loss": 0.5209, "step": 20361 }, { "epoch": 0.52, "grad_norm": 4.709677696228027, "learning_rate": 9.776710128594941e-06, "loss": 0.5198, "step": 20362 }, { "epoch": 0.52, "grad_norm": 1.544837474822998, "learning_rate": 9.775880193475167e-06, "loss": 0.67, "step": 20363 }, { "epoch": 0.52, "grad_norm": 1.2291349172592163, "learning_rate": 9.775050259899883e-06, "loss": 0.5165, "step": 20364 }, { "epoch": 0.52, "grad_norm": 1.1084654331207275, "learning_rate": 9.774220327874812e-06, "loss": 0.6127, "step": 20365 }, { "epoch": 0.52, "grad_norm": 2.5663647651672363, "learning_rate": 9.773390397405675e-06, "loss": 0.57, "step": 20366 }, { "epoch": 0.52, "grad_norm": 1.0726603269577026, "learning_rate": 9.772560468498186e-06, "loss": 0.5469, "step": 20367 }, { "epoch": 0.52, "grad_norm": 1.6048647165298462, "learning_rate": 9.771730541158068e-06, "loss": 0.5036, "step": 20368 }, { "epoch": 0.52, "grad_norm": 1.273786187171936, "learning_rate": 9.77090061539104e-06, "loss": 0.5223, "step": 20369 }, { "epoch": 0.52, "grad_norm": 1.3897924423217773, "learning_rate": 9.770070691202823e-06, "loss": 0.6268, "step": 20370 }, { "epoch": 0.52, "grad_norm": 1.2458348274230957, "learning_rate": 9.769240768599132e-06, "loss": 0.2984, "step": 20371 }, { "epoch": 0.52, "grad_norm": 2.238701343536377, "learning_rate": 9.768410847585687e-06, "loss": 0.5642, "step": 20372 }, { "epoch": 0.52, "grad_norm": 1.188775897026062, "learning_rate": 9.767580928168214e-06, "loss": 0.4585, "step": 20373 }, { "epoch": 0.52, "grad_norm": 1.23002290725708, "learning_rate": 9.766751010352423e-06, "loss": 0.5292, "step": 20374 }, { "epoch": 0.52, "grad_norm": 1.3139073848724365, "learning_rate": 9.76592109414404e-06, "loss": 0.5685, "step": 20375 }, { "epoch": 0.52, "grad_norm": 1.0328606367111206, "learning_rate": 9.76509117954878e-06, "loss": 0.5724, "step": 20376 }, { "epoch": 0.52, "grad_norm": 4.813914775848389, "learning_rate": 9.764261266572364e-06, "loss": 0.7041, "step": 20377 }, { "epoch": 0.52, "grad_norm": 2.337965726852417, "learning_rate": 9.763431355220512e-06, "loss": 0.6773, "step": 20378 }, { "epoch": 0.52, "grad_norm": 1.7594746351242065, "learning_rate": 9.762601445498942e-06, "loss": 0.5877, "step": 20379 }, { "epoch": 0.52, "grad_norm": 3.4707226753234863, "learning_rate": 9.761771537413375e-06, "loss": 0.6448, "step": 20380 }, { "epoch": 0.52, "grad_norm": 1.8218677043914795, "learning_rate": 9.760941630969527e-06, "loss": 0.6741, "step": 20381 }, { "epoch": 0.52, "grad_norm": 1.3811880350112915, "learning_rate": 9.76011172617312e-06, "loss": 0.4734, "step": 20382 }, { "epoch": 0.52, "grad_norm": 1.1272332668304443, "learning_rate": 9.759281823029873e-06, "loss": 0.4617, "step": 20383 }, { "epoch": 0.52, "grad_norm": 1.5402884483337402, "learning_rate": 9.758451921545503e-06, "loss": 0.4185, "step": 20384 }, { "epoch": 0.52, "grad_norm": 1.6867811679840088, "learning_rate": 9.757622021725732e-06, "loss": 0.5568, "step": 20385 }, { "epoch": 0.52, "grad_norm": 2.5584518909454346, "learning_rate": 9.756792123576277e-06, "loss": 0.4781, "step": 20386 }, { "epoch": 0.52, "grad_norm": 1.667210340499878, "learning_rate": 9.75596222710286e-06, "loss": 0.6134, "step": 20387 }, { "epoch": 0.52, "grad_norm": 1.9229744672775269, "learning_rate": 9.755132332311193e-06, "loss": 0.5927, "step": 20388 }, { "epoch": 0.52, "grad_norm": 3.6823477745056152, "learning_rate": 9.754302439207003e-06, "loss": 0.5782, "step": 20389 }, { "epoch": 0.52, "grad_norm": 3.830695867538452, "learning_rate": 9.753472547796008e-06, "loss": 0.7271, "step": 20390 }, { "epoch": 0.52, "grad_norm": 2.8386576175689697, "learning_rate": 9.752642658083922e-06, "loss": 0.6295, "step": 20391 }, { "epoch": 0.52, "grad_norm": 6.477225303649902, "learning_rate": 9.75181277007647e-06, "loss": 0.6607, "step": 20392 }, { "epoch": 0.52, "grad_norm": 1.9557030200958252, "learning_rate": 9.750982883779367e-06, "loss": 0.7567, "step": 20393 }, { "epoch": 0.52, "grad_norm": 1.6804379224777222, "learning_rate": 9.750152999198334e-06, "loss": 0.6203, "step": 20394 }, { "epoch": 0.52, "grad_norm": 3.393261432647705, "learning_rate": 9.74932311633909e-06, "loss": 0.4843, "step": 20395 }, { "epoch": 0.52, "grad_norm": 1.5467802286148071, "learning_rate": 9.748493235207352e-06, "loss": 0.5834, "step": 20396 }, { "epoch": 0.52, "grad_norm": 1.6875402927398682, "learning_rate": 9.747663355808844e-06, "loss": 0.5914, "step": 20397 }, { "epoch": 0.52, "grad_norm": 3.56075119972229, "learning_rate": 9.746833478149279e-06, "loss": 0.6404, "step": 20398 }, { "epoch": 0.52, "grad_norm": 2.6982593536376953, "learning_rate": 9.746003602234379e-06, "loss": 0.5413, "step": 20399 }, { "epoch": 0.52, "grad_norm": 1.3146592378616333, "learning_rate": 9.745173728069862e-06, "loss": 0.5067, "step": 20400 }, { "epoch": 0.52, "grad_norm": 1.194750428199768, "learning_rate": 9.744343855661447e-06, "loss": 0.5349, "step": 20401 }, { "epoch": 0.52, "grad_norm": 4.211876392364502, "learning_rate": 9.743513985014856e-06, "loss": 0.778, "step": 20402 }, { "epoch": 0.52, "grad_norm": 1.4048895835876465, "learning_rate": 9.742684116135803e-06, "loss": 0.5696, "step": 20403 }, { "epoch": 0.52, "grad_norm": 1.3548856973648071, "learning_rate": 9.741854249030012e-06, "loss": 0.449, "step": 20404 }, { "epoch": 0.52, "grad_norm": 1.1170984506607056, "learning_rate": 9.741024383703197e-06, "loss": 0.4388, "step": 20405 }, { "epoch": 0.52, "grad_norm": 2.4025347232818604, "learning_rate": 9.740194520161079e-06, "loss": 0.5772, "step": 20406 }, { "epoch": 0.52, "grad_norm": 1.9486804008483887, "learning_rate": 9.739364658409381e-06, "loss": 0.4226, "step": 20407 }, { "epoch": 0.52, "grad_norm": 2.6878724098205566, "learning_rate": 9.738534798453813e-06, "loss": 0.8542, "step": 20408 }, { "epoch": 0.52, "grad_norm": 2.471511125564575, "learning_rate": 9.737704940300103e-06, "loss": 0.612, "step": 20409 }, { "epoch": 0.52, "grad_norm": 1.9171741008758545, "learning_rate": 9.736875083953963e-06, "loss": 0.435, "step": 20410 }, { "epoch": 0.52, "grad_norm": 2.5965723991394043, "learning_rate": 9.736045229421118e-06, "loss": 0.5282, "step": 20411 }, { "epoch": 0.52, "grad_norm": 2.6610007286071777, "learning_rate": 9.735215376707279e-06, "loss": 0.5466, "step": 20412 }, { "epoch": 0.52, "grad_norm": 1.3410252332687378, "learning_rate": 9.73438552581817e-06, "loss": 0.5521, "step": 20413 }, { "epoch": 0.52, "grad_norm": 1.2064179182052612, "learning_rate": 9.733555676759512e-06, "loss": 0.5721, "step": 20414 }, { "epoch": 0.52, "grad_norm": 1.3654521703720093, "learning_rate": 9.732725829537018e-06, "loss": 0.5295, "step": 20415 }, { "epoch": 0.52, "grad_norm": 1.8130817413330078, "learning_rate": 9.731895984156413e-06, "loss": 0.5716, "step": 20416 }, { "epoch": 0.52, "grad_norm": 2.9025168418884277, "learning_rate": 9.73106614062341e-06, "loss": 0.5374, "step": 20417 }, { "epoch": 0.52, "grad_norm": 8.918498992919922, "learning_rate": 9.73023629894373e-06, "loss": 0.5715, "step": 20418 }, { "epoch": 0.52, "grad_norm": 2.343677043914795, "learning_rate": 9.729406459123093e-06, "loss": 0.6754, "step": 20419 }, { "epoch": 0.52, "grad_norm": 2.7544591426849365, "learning_rate": 9.728576621167215e-06, "loss": 0.612, "step": 20420 }, { "epoch": 0.52, "grad_norm": 1.4304457902908325, "learning_rate": 9.727746785081819e-06, "loss": 0.6117, "step": 20421 }, { "epoch": 0.52, "grad_norm": 1.3600332736968994, "learning_rate": 9.726916950872619e-06, "loss": 0.5919, "step": 20422 }, { "epoch": 0.52, "grad_norm": 2.127094268798828, "learning_rate": 9.726087118545338e-06, "loss": 0.7304, "step": 20423 }, { "epoch": 0.52, "grad_norm": 2.4402451515197754, "learning_rate": 9.72525728810569e-06, "loss": 0.6788, "step": 20424 }, { "epoch": 0.52, "grad_norm": 1.143631935119629, "learning_rate": 9.724427459559396e-06, "loss": 0.6102, "step": 20425 }, { "epoch": 0.52, "grad_norm": 9.112702369689941, "learning_rate": 9.723597632912178e-06, "loss": 0.7296, "step": 20426 }, { "epoch": 0.52, "grad_norm": 1.7138683795928955, "learning_rate": 9.722767808169747e-06, "loss": 0.4607, "step": 20427 }, { "epoch": 0.52, "grad_norm": 5.199458122253418, "learning_rate": 9.72193798533783e-06, "loss": 0.4124, "step": 20428 }, { "epoch": 0.52, "grad_norm": 3.4844493865966797, "learning_rate": 9.72110816442214e-06, "loss": 0.6481, "step": 20429 }, { "epoch": 0.52, "grad_norm": 2.7044694423675537, "learning_rate": 9.720278345428397e-06, "loss": 0.7655, "step": 20430 }, { "epoch": 0.52, "grad_norm": 1.6140246391296387, "learning_rate": 9.719448528362322e-06, "loss": 0.5918, "step": 20431 }, { "epoch": 0.52, "grad_norm": 1.2595818042755127, "learning_rate": 9.718618713229629e-06, "loss": 0.6143, "step": 20432 }, { "epoch": 0.52, "grad_norm": 3.9832072257995605, "learning_rate": 9.71778890003604e-06, "loss": 0.5658, "step": 20433 }, { "epoch": 0.52, "grad_norm": 1.1941626071929932, "learning_rate": 9.716959088787272e-06, "loss": 0.5616, "step": 20434 }, { "epoch": 0.52, "grad_norm": 3.4676387310028076, "learning_rate": 9.716129279489044e-06, "loss": 0.4633, "step": 20435 }, { "epoch": 0.52, "grad_norm": 1.4425870180130005, "learning_rate": 9.715299472147077e-06, "loss": 0.4921, "step": 20436 }, { "epoch": 0.52, "grad_norm": 1.6529980897903442, "learning_rate": 9.714469666767084e-06, "loss": 0.7614, "step": 20437 }, { "epoch": 0.52, "grad_norm": 1.3605289459228516, "learning_rate": 9.71363986335479e-06, "loss": 0.5626, "step": 20438 }, { "epoch": 0.52, "grad_norm": 2.9647772312164307, "learning_rate": 9.712810061915908e-06, "loss": 0.4265, "step": 20439 }, { "epoch": 0.52, "grad_norm": 1.21343994140625, "learning_rate": 9.71198026245616e-06, "loss": 0.7214, "step": 20440 }, { "epoch": 0.52, "grad_norm": 1.490337610244751, "learning_rate": 9.711150464981262e-06, "loss": 0.4866, "step": 20441 }, { "epoch": 0.52, "grad_norm": 4.850344181060791, "learning_rate": 9.710320669496933e-06, "loss": 0.4659, "step": 20442 }, { "epoch": 0.52, "grad_norm": 1.223376750946045, "learning_rate": 9.709490876008892e-06, "loss": 0.5897, "step": 20443 }, { "epoch": 0.52, "grad_norm": 4.72163724899292, "learning_rate": 9.708661084522857e-06, "loss": 0.3917, "step": 20444 }, { "epoch": 0.52, "grad_norm": 1.5759367942810059, "learning_rate": 9.707831295044549e-06, "loss": 0.5645, "step": 20445 }, { "epoch": 0.52, "grad_norm": 1.503242015838623, "learning_rate": 9.707001507579682e-06, "loss": 0.4368, "step": 20446 }, { "epoch": 0.52, "grad_norm": 1.900598406791687, "learning_rate": 9.706171722133976e-06, "loss": 0.6705, "step": 20447 }, { "epoch": 0.52, "grad_norm": 3.759830951690674, "learning_rate": 9.705341938713155e-06, "loss": 0.5709, "step": 20448 }, { "epoch": 0.52, "grad_norm": 2.1173648834228516, "learning_rate": 9.704512157322925e-06, "loss": 0.548, "step": 20449 }, { "epoch": 0.52, "grad_norm": 1.1576941013336182, "learning_rate": 9.703682377969017e-06, "loss": 0.4419, "step": 20450 }, { "epoch": 0.52, "grad_norm": 1.281281590461731, "learning_rate": 9.702852600657142e-06, "loss": 0.5901, "step": 20451 }, { "epoch": 0.52, "grad_norm": 1.0205820798873901, "learning_rate": 9.702022825393022e-06, "loss": 0.4719, "step": 20452 }, { "epoch": 0.52, "grad_norm": 2.2808260917663574, "learning_rate": 9.70119305218237e-06, "loss": 0.5717, "step": 20453 }, { "epoch": 0.52, "grad_norm": 2.3282995223999023, "learning_rate": 9.700363281030909e-06, "loss": 0.4877, "step": 20454 }, { "epoch": 0.52, "grad_norm": 1.0948412418365479, "learning_rate": 9.699533511944356e-06, "loss": 0.542, "step": 20455 }, { "epoch": 0.52, "grad_norm": 2.404892683029175, "learning_rate": 9.69870374492843e-06, "loss": 0.6687, "step": 20456 }, { "epoch": 0.52, "grad_norm": 1.0696979761123657, "learning_rate": 9.697873979988851e-06, "loss": 0.5233, "step": 20457 }, { "epoch": 0.52, "grad_norm": 2.9260478019714355, "learning_rate": 9.697044217131334e-06, "loss": 0.5508, "step": 20458 }, { "epoch": 0.52, "grad_norm": 1.5862658023834229, "learning_rate": 9.696214456361594e-06, "loss": 0.5971, "step": 20459 }, { "epoch": 0.52, "grad_norm": 2.4026219844818115, "learning_rate": 9.695384697685355e-06, "loss": 0.7161, "step": 20460 }, { "epoch": 0.52, "grad_norm": 3.244781255722046, "learning_rate": 9.694554941108334e-06, "loss": 0.5961, "step": 20461 }, { "epoch": 0.52, "grad_norm": 2.8866775035858154, "learning_rate": 9.693725186636248e-06, "loss": 0.6259, "step": 20462 }, { "epoch": 0.52, "grad_norm": 3.4410746097564697, "learning_rate": 9.692895434274815e-06, "loss": 0.6831, "step": 20463 }, { "epoch": 0.52, "grad_norm": 2.6394760608673096, "learning_rate": 9.692065684029753e-06, "loss": 0.6634, "step": 20464 }, { "epoch": 0.52, "grad_norm": 3.7644002437591553, "learning_rate": 9.69123593590678e-06, "loss": 0.6842, "step": 20465 }, { "epoch": 0.52, "grad_norm": 6.213589191436768, "learning_rate": 9.69040618991162e-06, "loss": 0.7123, "step": 20466 }, { "epoch": 0.52, "grad_norm": 3.5526890754699707, "learning_rate": 9.689576446049982e-06, "loss": 0.8955, "step": 20467 }, { "epoch": 0.52, "grad_norm": 2.0454301834106445, "learning_rate": 9.68874670432759e-06, "loss": 0.6086, "step": 20468 }, { "epoch": 0.52, "grad_norm": 7.3080220222473145, "learning_rate": 9.687916964750159e-06, "loss": 0.3274, "step": 20469 }, { "epoch": 0.52, "grad_norm": 4.352459907531738, "learning_rate": 9.687087227323408e-06, "loss": 0.5073, "step": 20470 }, { "epoch": 0.52, "grad_norm": 2.428562879562378, "learning_rate": 9.686257492053055e-06, "loss": 0.7757, "step": 20471 }, { "epoch": 0.52, "grad_norm": 1.3225620985031128, "learning_rate": 9.685427758944818e-06, "loss": 0.6298, "step": 20472 }, { "epoch": 0.52, "grad_norm": 1.785604476928711, "learning_rate": 9.684598028004417e-06, "loss": 0.5308, "step": 20473 }, { "epoch": 0.52, "grad_norm": 1.3694415092468262, "learning_rate": 9.683768299237566e-06, "loss": 0.5887, "step": 20474 }, { "epoch": 0.52, "grad_norm": 2.7392804622650146, "learning_rate": 9.682938572649987e-06, "loss": 0.6209, "step": 20475 }, { "epoch": 0.52, "grad_norm": 2.3845980167388916, "learning_rate": 9.682108848247396e-06, "loss": 0.6952, "step": 20476 }, { "epoch": 0.52, "grad_norm": 2.090099573135376, "learning_rate": 9.68127912603551e-06, "loss": 0.5601, "step": 20477 }, { "epoch": 0.52, "grad_norm": 2.598684072494507, "learning_rate": 9.68044940602005e-06, "loss": 0.7185, "step": 20478 }, { "epoch": 0.52, "grad_norm": 1.4922739267349243, "learning_rate": 9.67961968820673e-06, "loss": 0.4702, "step": 20479 }, { "epoch": 0.52, "grad_norm": 1.9791338443756104, "learning_rate": 9.678789972601271e-06, "loss": 0.6183, "step": 20480 }, { "epoch": 0.52, "grad_norm": 4.514583110809326, "learning_rate": 9.677960259209389e-06, "loss": 0.6435, "step": 20481 }, { "epoch": 0.52, "grad_norm": 1.5352822542190552, "learning_rate": 9.677130548036803e-06, "loss": 0.358, "step": 20482 }, { "epoch": 0.52, "grad_norm": 2.5487334728240967, "learning_rate": 9.67630083908923e-06, "loss": 0.4535, "step": 20483 }, { "epoch": 0.53, "grad_norm": 1.4912806749343872, "learning_rate": 9.675471132372388e-06, "loss": 0.556, "step": 20484 }, { "epoch": 0.53, "grad_norm": 2.4768800735473633, "learning_rate": 9.674641427891996e-06, "loss": 0.6141, "step": 20485 }, { "epoch": 0.53, "grad_norm": 1.6338086128234863, "learning_rate": 9.67381172565377e-06, "loss": 0.605, "step": 20486 }, { "epoch": 0.53, "grad_norm": 1.674689531326294, "learning_rate": 9.672982025663429e-06, "loss": 0.4883, "step": 20487 }, { "epoch": 0.53, "grad_norm": 4.787062168121338, "learning_rate": 9.67215232792669e-06, "loss": 0.7235, "step": 20488 }, { "epoch": 0.53, "grad_norm": 1.5897157192230225, "learning_rate": 9.671322632449271e-06, "loss": 0.5286, "step": 20489 }, { "epoch": 0.53, "grad_norm": 1.373138666152954, "learning_rate": 9.670492939236891e-06, "loss": 0.5545, "step": 20490 }, { "epoch": 0.53, "grad_norm": 1.0929588079452515, "learning_rate": 9.669663248295268e-06, "loss": 0.3668, "step": 20491 }, { "epoch": 0.53, "grad_norm": 9.297228813171387, "learning_rate": 9.668833559630116e-06, "loss": 0.6018, "step": 20492 }, { "epoch": 0.53, "grad_norm": 1.5119280815124512, "learning_rate": 9.668003873247154e-06, "loss": 0.4623, "step": 20493 }, { "epoch": 0.53, "grad_norm": 1.1749366521835327, "learning_rate": 9.667174189152105e-06, "loss": 0.5551, "step": 20494 }, { "epoch": 0.53, "grad_norm": 1.5711634159088135, "learning_rate": 9.666344507350678e-06, "loss": 0.5723, "step": 20495 }, { "epoch": 0.53, "grad_norm": 1.9350849390029907, "learning_rate": 9.665514827848595e-06, "loss": 0.5888, "step": 20496 }, { "epoch": 0.53, "grad_norm": 1.7196017503738403, "learning_rate": 9.664685150651579e-06, "loss": 0.7176, "step": 20497 }, { "epoch": 0.53, "grad_norm": 2.603940963745117, "learning_rate": 9.663855475765336e-06, "loss": 0.5859, "step": 20498 }, { "epoch": 0.53, "grad_norm": 2.59135103225708, "learning_rate": 9.663025803195594e-06, "loss": 0.4674, "step": 20499 }, { "epoch": 0.53, "grad_norm": 7.373378753662109, "learning_rate": 9.662196132948065e-06, "loss": 0.5773, "step": 20500 }, { "epoch": 0.53, "grad_norm": 2.6863174438476562, "learning_rate": 9.661366465028466e-06, "loss": 0.7518, "step": 20501 }, { "epoch": 0.53, "grad_norm": 5.416933536529541, "learning_rate": 9.66053679944252e-06, "loss": 0.5109, "step": 20502 }, { "epoch": 0.53, "grad_norm": 1.816911220550537, "learning_rate": 9.65970713619594e-06, "loss": 0.6667, "step": 20503 }, { "epoch": 0.53, "grad_norm": 3.0862507820129395, "learning_rate": 9.658877475294444e-06, "loss": 0.6191, "step": 20504 }, { "epoch": 0.53, "grad_norm": 1.809969425201416, "learning_rate": 9.65804781674375e-06, "loss": 0.6153, "step": 20505 }, { "epoch": 0.53, "grad_norm": 1.377150535583496, "learning_rate": 9.657218160549578e-06, "loss": 0.6196, "step": 20506 }, { "epoch": 0.53, "grad_norm": 1.2322839498519897, "learning_rate": 9.65638850671764e-06, "loss": 0.5853, "step": 20507 }, { "epoch": 0.53, "grad_norm": 1.8090921640396118, "learning_rate": 9.655558855253657e-06, "loss": 0.6045, "step": 20508 }, { "epoch": 0.53, "grad_norm": 1.2860530614852905, "learning_rate": 9.654729206163347e-06, "loss": 0.5232, "step": 20509 }, { "epoch": 0.53, "grad_norm": 1.2952816486358643, "learning_rate": 9.653899559452427e-06, "loss": 0.6016, "step": 20510 }, { "epoch": 0.53, "grad_norm": 2.643862009048462, "learning_rate": 9.653069915126613e-06, "loss": 0.6229, "step": 20511 }, { "epoch": 0.53, "grad_norm": 2.0126330852508545, "learning_rate": 9.652240273191622e-06, "loss": 0.5066, "step": 20512 }, { "epoch": 0.53, "grad_norm": 1.5663766860961914, "learning_rate": 9.651410633653174e-06, "loss": 0.6105, "step": 20513 }, { "epoch": 0.53, "grad_norm": 1.6801612377166748, "learning_rate": 9.650580996516985e-06, "loss": 0.6303, "step": 20514 }, { "epoch": 0.53, "grad_norm": 1.6183985471725464, "learning_rate": 9.649751361788771e-06, "loss": 0.5657, "step": 20515 }, { "epoch": 0.53, "grad_norm": 1.5559247732162476, "learning_rate": 9.648921729474252e-06, "loss": 0.5573, "step": 20516 }, { "epoch": 0.53, "grad_norm": 1.4457207918167114, "learning_rate": 9.648092099579143e-06, "loss": 0.5047, "step": 20517 }, { "epoch": 0.53, "grad_norm": 3.2465364933013916, "learning_rate": 9.64726247210916e-06, "loss": 0.5704, "step": 20518 }, { "epoch": 0.53, "grad_norm": 1.5805679559707642, "learning_rate": 9.646432847070026e-06, "loss": 0.5722, "step": 20519 }, { "epoch": 0.53, "grad_norm": 4.952967166900635, "learning_rate": 9.645603224467454e-06, "loss": 0.6514, "step": 20520 }, { "epoch": 0.53, "grad_norm": 6.091490268707275, "learning_rate": 9.644773604307163e-06, "loss": 0.6527, "step": 20521 }, { "epoch": 0.53, "grad_norm": 1.236244797706604, "learning_rate": 9.643943986594866e-06, "loss": 0.7403, "step": 20522 }, { "epoch": 0.53, "grad_norm": 1.2476015090942383, "learning_rate": 9.643114371336285e-06, "loss": 0.6327, "step": 20523 }, { "epoch": 0.53, "grad_norm": 1.1213380098342896, "learning_rate": 9.642284758537134e-06, "loss": 0.6521, "step": 20524 }, { "epoch": 0.53, "grad_norm": 1.653554081916809, "learning_rate": 9.641455148203133e-06, "loss": 0.4721, "step": 20525 }, { "epoch": 0.53, "grad_norm": 1.4168059825897217, "learning_rate": 9.64062554034e-06, "loss": 0.6308, "step": 20526 }, { "epoch": 0.53, "grad_norm": 3.2036941051483154, "learning_rate": 9.639795934953446e-06, "loss": 0.5182, "step": 20527 }, { "epoch": 0.53, "grad_norm": 1.8359479904174805, "learning_rate": 9.638966332049196e-06, "loss": 0.6141, "step": 20528 }, { "epoch": 0.53, "grad_norm": 2.288726806640625, "learning_rate": 9.63813673163296e-06, "loss": 0.623, "step": 20529 }, { "epoch": 0.53, "grad_norm": 1.641493320465088, "learning_rate": 9.63730713371046e-06, "loss": 0.5794, "step": 20530 }, { "epoch": 0.53, "grad_norm": 1.6309961080551147, "learning_rate": 9.636477538287412e-06, "loss": 0.5837, "step": 20531 }, { "epoch": 0.53, "grad_norm": 1.329128384590149, "learning_rate": 9.63564794536953e-06, "loss": 0.3914, "step": 20532 }, { "epoch": 0.53, "grad_norm": 3.124926805496216, "learning_rate": 9.634818354962537e-06, "loss": 0.7483, "step": 20533 }, { "epoch": 0.53, "grad_norm": 2.2924156188964844, "learning_rate": 9.633988767072143e-06, "loss": 0.5644, "step": 20534 }, { "epoch": 0.53, "grad_norm": 2.196458339691162, "learning_rate": 9.63315918170407e-06, "loss": 0.782, "step": 20535 }, { "epoch": 0.53, "grad_norm": 2.044396162033081, "learning_rate": 9.632329598864034e-06, "loss": 0.4673, "step": 20536 }, { "epoch": 0.53, "grad_norm": 1.7736425399780273, "learning_rate": 9.631500018557749e-06, "loss": 0.6434, "step": 20537 }, { "epoch": 0.53, "grad_norm": 5.154769420623779, "learning_rate": 9.630670440790937e-06, "loss": 0.6708, "step": 20538 }, { "epoch": 0.53, "grad_norm": 1.5177370309829712, "learning_rate": 9.62984086556931e-06, "loss": 0.6951, "step": 20539 }, { "epoch": 0.53, "grad_norm": 1.8617749214172363, "learning_rate": 9.62901129289859e-06, "loss": 0.4725, "step": 20540 }, { "epoch": 0.53, "grad_norm": 1.4474886655807495, "learning_rate": 9.628181722784488e-06, "loss": 0.3985, "step": 20541 }, { "epoch": 0.53, "grad_norm": 1.9855599403381348, "learning_rate": 9.627352155232725e-06, "loss": 0.5912, "step": 20542 }, { "epoch": 0.53, "grad_norm": 4.257863998413086, "learning_rate": 9.62652259024902e-06, "loss": 0.7079, "step": 20543 }, { "epoch": 0.53, "grad_norm": 2.939919948577881, "learning_rate": 9.625693027839082e-06, "loss": 0.5743, "step": 20544 }, { "epoch": 0.53, "grad_norm": 1.6544486284255981, "learning_rate": 9.624863468008636e-06, "loss": 0.6828, "step": 20545 }, { "epoch": 0.53, "grad_norm": 1.5200209617614746, "learning_rate": 9.624033910763393e-06, "loss": 0.5358, "step": 20546 }, { "epoch": 0.53, "grad_norm": 6.727102279663086, "learning_rate": 9.623204356109074e-06, "loss": 0.5231, "step": 20547 }, { "epoch": 0.53, "grad_norm": 1.9421076774597168, "learning_rate": 9.622374804051392e-06, "loss": 0.6016, "step": 20548 }, { "epoch": 0.53, "grad_norm": 2.0885751247406006, "learning_rate": 9.621545254596065e-06, "loss": 0.5313, "step": 20549 }, { "epoch": 0.53, "grad_norm": 3.6293160915374756, "learning_rate": 9.620715707748813e-06, "loss": 0.6724, "step": 20550 }, { "epoch": 0.53, "grad_norm": 1.1584081649780273, "learning_rate": 9.619886163515349e-06, "loss": 0.5566, "step": 20551 }, { "epoch": 0.53, "grad_norm": 1.6571860313415527, "learning_rate": 9.61905662190139e-06, "loss": 0.6523, "step": 20552 }, { "epoch": 0.53, "grad_norm": 1.2427347898483276, "learning_rate": 9.618227082912653e-06, "loss": 0.679, "step": 20553 }, { "epoch": 0.53, "grad_norm": 1.0188883543014526, "learning_rate": 9.617397546554856e-06, "loss": 0.3992, "step": 20554 }, { "epoch": 0.53, "grad_norm": 1.778517246246338, "learning_rate": 9.616568012833716e-06, "loss": 0.635, "step": 20555 }, { "epoch": 0.53, "grad_norm": 1.2729398012161255, "learning_rate": 9.615738481754946e-06, "loss": 0.521, "step": 20556 }, { "epoch": 0.53, "grad_norm": 1.5611603260040283, "learning_rate": 9.614908953324268e-06, "loss": 0.6284, "step": 20557 }, { "epoch": 0.53, "grad_norm": 2.1585631370544434, "learning_rate": 9.614079427547393e-06, "loss": 0.7804, "step": 20558 }, { "epoch": 0.53, "grad_norm": 2.7150697708129883, "learning_rate": 9.613249904430041e-06, "loss": 0.592, "step": 20559 }, { "epoch": 0.53, "grad_norm": 1.9847084283828735, "learning_rate": 9.612420383977925e-06, "loss": 0.6205, "step": 20560 }, { "epoch": 0.53, "grad_norm": 1.0528689622879028, "learning_rate": 9.611590866196767e-06, "loss": 0.4968, "step": 20561 }, { "epoch": 0.53, "grad_norm": 2.87962007522583, "learning_rate": 9.610761351092281e-06, "loss": 0.6399, "step": 20562 }, { "epoch": 0.53, "grad_norm": 2.6718170642852783, "learning_rate": 9.609931838670182e-06, "loss": 0.5675, "step": 20563 }, { "epoch": 0.53, "grad_norm": 7.4498443603515625, "learning_rate": 9.60910232893619e-06, "loss": 0.8074, "step": 20564 }, { "epoch": 0.53, "grad_norm": 2.4161739349365234, "learning_rate": 9.608272821896017e-06, "loss": 0.6447, "step": 20565 }, { "epoch": 0.53, "grad_norm": 1.4056936502456665, "learning_rate": 9.607443317555381e-06, "loss": 0.6033, "step": 20566 }, { "epoch": 0.53, "grad_norm": 2.9314262866973877, "learning_rate": 9.606613815920002e-06, "loss": 0.5569, "step": 20567 }, { "epoch": 0.53, "grad_norm": 6.843033790588379, "learning_rate": 9.60578431699559e-06, "loss": 0.6284, "step": 20568 }, { "epoch": 0.53, "grad_norm": 4.9019598960876465, "learning_rate": 9.604954820787868e-06, "loss": 0.482, "step": 20569 }, { "epoch": 0.53, "grad_norm": 1.0674177408218384, "learning_rate": 9.604125327302545e-06, "loss": 0.573, "step": 20570 }, { "epoch": 0.53, "grad_norm": 1.708385944366455, "learning_rate": 9.603295836545344e-06, "loss": 0.5246, "step": 20571 }, { "epoch": 0.53, "grad_norm": 4.687284469604492, "learning_rate": 9.602466348521981e-06, "loss": 0.68, "step": 20572 }, { "epoch": 0.53, "grad_norm": 2.6247687339782715, "learning_rate": 9.601636863238166e-06, "loss": 0.4933, "step": 20573 }, { "epoch": 0.53, "grad_norm": 1.480368733406067, "learning_rate": 9.600807380699624e-06, "loss": 0.5289, "step": 20574 }, { "epoch": 0.53, "grad_norm": 5.967885971069336, "learning_rate": 9.599977900912062e-06, "loss": 0.5742, "step": 20575 }, { "epoch": 0.53, "grad_norm": 1.3930281400680542, "learning_rate": 9.599148423881205e-06, "loss": 0.4654, "step": 20576 }, { "epoch": 0.53, "grad_norm": 5.731527328491211, "learning_rate": 9.598318949612762e-06, "loss": 0.7224, "step": 20577 }, { "epoch": 0.53, "grad_norm": 0.9044684767723083, "learning_rate": 9.597489478112454e-06, "loss": 0.4538, "step": 20578 }, { "epoch": 0.53, "grad_norm": 3.3775064945220947, "learning_rate": 9.596660009385996e-06, "loss": 0.716, "step": 20579 }, { "epoch": 0.53, "grad_norm": 2.501406669616699, "learning_rate": 9.595830543439103e-06, "loss": 0.5419, "step": 20580 }, { "epoch": 0.53, "grad_norm": 1.278205394744873, "learning_rate": 9.595001080277493e-06, "loss": 0.5655, "step": 20581 }, { "epoch": 0.53, "grad_norm": 2.678908348083496, "learning_rate": 9.594171619906881e-06, "loss": 0.6504, "step": 20582 }, { "epoch": 0.53, "grad_norm": 5.201006889343262, "learning_rate": 9.593342162332982e-06, "loss": 0.6209, "step": 20583 }, { "epoch": 0.53, "grad_norm": 5.156996726989746, "learning_rate": 9.592512707561515e-06, "loss": 0.6131, "step": 20584 }, { "epoch": 0.53, "grad_norm": 1.2878565788269043, "learning_rate": 9.591683255598192e-06, "loss": 0.5907, "step": 20585 }, { "epoch": 0.53, "grad_norm": 2.0439252853393555, "learning_rate": 9.590853806448734e-06, "loss": 0.6249, "step": 20586 }, { "epoch": 0.53, "grad_norm": 1.5454219579696655, "learning_rate": 9.590024360118853e-06, "loss": 0.5189, "step": 20587 }, { "epoch": 0.53, "grad_norm": 1.4859970808029175, "learning_rate": 9.589194916614268e-06, "loss": 0.6522, "step": 20588 }, { "epoch": 0.53, "grad_norm": 1.428834080696106, "learning_rate": 9.588365475940692e-06, "loss": 0.5592, "step": 20589 }, { "epoch": 0.53, "grad_norm": 3.7170796394348145, "learning_rate": 9.587536038103843e-06, "loss": 0.7559, "step": 20590 }, { "epoch": 0.53, "grad_norm": 2.9107882976531982, "learning_rate": 9.586706603109436e-06, "loss": 0.5565, "step": 20591 }, { "epoch": 0.53, "grad_norm": 1.0424370765686035, "learning_rate": 9.585877170963187e-06, "loss": 0.4481, "step": 20592 }, { "epoch": 0.53, "grad_norm": 3.080871820449829, "learning_rate": 9.585047741670816e-06, "loss": 0.7043, "step": 20593 }, { "epoch": 0.53, "grad_norm": 3.0658814907073975, "learning_rate": 9.58421831523803e-06, "loss": 0.4658, "step": 20594 }, { "epoch": 0.53, "grad_norm": 3.4865121841430664, "learning_rate": 9.583388891670553e-06, "loss": 0.4448, "step": 20595 }, { "epoch": 0.53, "grad_norm": 1.5783106088638306, "learning_rate": 9.582559470974098e-06, "loss": 0.5962, "step": 20596 }, { "epoch": 0.53, "grad_norm": 1.031895637512207, "learning_rate": 9.58173005315438e-06, "loss": 0.4603, "step": 20597 }, { "epoch": 0.53, "grad_norm": 1.5851106643676758, "learning_rate": 9.580900638217118e-06, "loss": 0.58, "step": 20598 }, { "epoch": 0.53, "grad_norm": 3.791635036468506, "learning_rate": 9.580071226168023e-06, "loss": 0.4709, "step": 20599 }, { "epoch": 0.53, "grad_norm": 1.1282650232315063, "learning_rate": 9.579241817012816e-06, "loss": 0.3661, "step": 20600 }, { "epoch": 0.53, "grad_norm": 1.3089501857757568, "learning_rate": 9.578412410757205e-06, "loss": 0.4676, "step": 20601 }, { "epoch": 0.53, "grad_norm": 2.176673650741577, "learning_rate": 9.577583007406915e-06, "loss": 0.5129, "step": 20602 }, { "epoch": 0.53, "grad_norm": 13.629509925842285, "learning_rate": 9.576753606967656e-06, "loss": 0.4981, "step": 20603 }, { "epoch": 0.53, "grad_norm": 1.771056056022644, "learning_rate": 9.575924209445146e-06, "loss": 0.6321, "step": 20604 }, { "epoch": 0.53, "grad_norm": 1.437505841255188, "learning_rate": 9.5750948148451e-06, "loss": 0.5159, "step": 20605 }, { "epoch": 0.53, "grad_norm": 2.9629745483398438, "learning_rate": 9.574265423173234e-06, "loss": 0.5613, "step": 20606 }, { "epoch": 0.53, "grad_norm": 3.265887498855591, "learning_rate": 9.573436034435261e-06, "loss": 0.563, "step": 20607 }, { "epoch": 0.53, "grad_norm": 8.961052894592285, "learning_rate": 9.572606648636901e-06, "loss": 0.5929, "step": 20608 }, { "epoch": 0.53, "grad_norm": 2.6903157234191895, "learning_rate": 9.571777265783866e-06, "loss": 0.6385, "step": 20609 }, { "epoch": 0.53, "grad_norm": 1.4866576194763184, "learning_rate": 9.570947885881875e-06, "loss": 0.448, "step": 20610 }, { "epoch": 0.53, "grad_norm": 1.633039951324463, "learning_rate": 9.57011850893664e-06, "loss": 0.5765, "step": 20611 }, { "epoch": 0.53, "grad_norm": 14.799895286560059, "learning_rate": 9.569289134953878e-06, "loss": 0.6893, "step": 20612 }, { "epoch": 0.53, "grad_norm": 2.2588951587677, "learning_rate": 9.568459763939306e-06, "loss": 0.4717, "step": 20613 }, { "epoch": 0.53, "grad_norm": 1.8992031812667847, "learning_rate": 9.567630395898636e-06, "loss": 0.7417, "step": 20614 }, { "epoch": 0.53, "grad_norm": 2.1258392333984375, "learning_rate": 9.566801030837589e-06, "loss": 0.6579, "step": 20615 }, { "epoch": 0.53, "grad_norm": 5.309093952178955, "learning_rate": 9.565971668761874e-06, "loss": 0.6696, "step": 20616 }, { "epoch": 0.53, "grad_norm": 1.3038355112075806, "learning_rate": 9.565142309677211e-06, "loss": 0.6081, "step": 20617 }, { "epoch": 0.53, "grad_norm": 7.107775688171387, "learning_rate": 9.564312953589315e-06, "loss": 0.6347, "step": 20618 }, { "epoch": 0.53, "grad_norm": 1.5251901149749756, "learning_rate": 9.563483600503897e-06, "loss": 0.5458, "step": 20619 }, { "epoch": 0.53, "grad_norm": 1.6702789068222046, "learning_rate": 9.56265425042668e-06, "loss": 0.6119, "step": 20620 }, { "epoch": 0.53, "grad_norm": 3.472015142440796, "learning_rate": 9.561824903363372e-06, "loss": 0.5269, "step": 20621 }, { "epoch": 0.53, "grad_norm": 2.006862163543701, "learning_rate": 9.560995559319694e-06, "loss": 0.723, "step": 20622 }, { "epoch": 0.53, "grad_norm": 2.6440980434417725, "learning_rate": 9.560166218301355e-06, "loss": 0.6476, "step": 20623 }, { "epoch": 0.53, "grad_norm": 1.685803771018982, "learning_rate": 9.559336880314076e-06, "loss": 0.5466, "step": 20624 }, { "epoch": 0.53, "grad_norm": 1.0671718120574951, "learning_rate": 9.558507545363572e-06, "loss": 0.3798, "step": 20625 }, { "epoch": 0.53, "grad_norm": 1.4388070106506348, "learning_rate": 9.557678213455555e-06, "loss": 0.5638, "step": 20626 }, { "epoch": 0.53, "grad_norm": 1.1137555837631226, "learning_rate": 9.556848884595744e-06, "loss": 0.406, "step": 20627 }, { "epoch": 0.53, "grad_norm": 1.529371738433838, "learning_rate": 9.55601955878985e-06, "loss": 0.4273, "step": 20628 }, { "epoch": 0.53, "grad_norm": 1.6623616218566895, "learning_rate": 9.555190236043592e-06, "loss": 0.4889, "step": 20629 }, { "epoch": 0.53, "grad_norm": 1.7991995811462402, "learning_rate": 9.55436091636268e-06, "loss": 0.6006, "step": 20630 }, { "epoch": 0.53, "grad_norm": 2.2185380458831787, "learning_rate": 9.553531599752834e-06, "loss": 0.561, "step": 20631 }, { "epoch": 0.53, "grad_norm": 1.5961188077926636, "learning_rate": 9.552702286219771e-06, "loss": 0.576, "step": 20632 }, { "epoch": 0.53, "grad_norm": 1.479962944984436, "learning_rate": 9.551872975769198e-06, "loss": 0.5923, "step": 20633 }, { "epoch": 0.53, "grad_norm": 3.2582387924194336, "learning_rate": 9.55104366840684e-06, "loss": 0.4345, "step": 20634 }, { "epoch": 0.53, "grad_norm": 1.6203078031539917, "learning_rate": 9.550214364138402e-06, "loss": 0.7018, "step": 20635 }, { "epoch": 0.53, "grad_norm": 1.790804386138916, "learning_rate": 9.549385062969604e-06, "loss": 0.4808, "step": 20636 }, { "epoch": 0.53, "grad_norm": 1.2869969606399536, "learning_rate": 9.548555764906165e-06, "loss": 0.5108, "step": 20637 }, { "epoch": 0.53, "grad_norm": 2.807023525238037, "learning_rate": 9.547726469953794e-06, "loss": 0.5343, "step": 20638 }, { "epoch": 0.53, "grad_norm": 2.3022496700286865, "learning_rate": 9.54689717811821e-06, "loss": 0.4549, "step": 20639 }, { "epoch": 0.53, "grad_norm": 4.871763706207275, "learning_rate": 9.546067889405123e-06, "loss": 0.5981, "step": 20640 }, { "epoch": 0.53, "grad_norm": 1.1534690856933594, "learning_rate": 9.545238603820252e-06, "loss": 0.4265, "step": 20641 }, { "epoch": 0.53, "grad_norm": 1.4769870042800903, "learning_rate": 9.54440932136931e-06, "loss": 0.4583, "step": 20642 }, { "epoch": 0.53, "grad_norm": 1.4696545600891113, "learning_rate": 9.543580042058013e-06, "loss": 0.5006, "step": 20643 }, { "epoch": 0.53, "grad_norm": 1.8539540767669678, "learning_rate": 9.542750765892078e-06, "loss": 0.61, "step": 20644 }, { "epoch": 0.53, "grad_norm": 5.180116653442383, "learning_rate": 9.541921492877215e-06, "loss": 0.6022, "step": 20645 }, { "epoch": 0.53, "grad_norm": 1.6234257221221924, "learning_rate": 9.541092223019144e-06, "loss": 0.4818, "step": 20646 }, { "epoch": 0.53, "grad_norm": 3.74831485748291, "learning_rate": 9.540262956323574e-06, "loss": 0.7244, "step": 20647 }, { "epoch": 0.53, "grad_norm": 1.5628745555877686, "learning_rate": 9.539433692796222e-06, "loss": 0.4353, "step": 20648 }, { "epoch": 0.53, "grad_norm": 1.2068692445755005, "learning_rate": 9.538604432442808e-06, "loss": 0.4375, "step": 20649 }, { "epoch": 0.53, "grad_norm": 1.9352365732192993, "learning_rate": 9.53777517526904e-06, "loss": 0.5344, "step": 20650 }, { "epoch": 0.53, "grad_norm": 2.606036901473999, "learning_rate": 9.536945921280637e-06, "loss": 0.6913, "step": 20651 }, { "epoch": 0.53, "grad_norm": 1.0218435525894165, "learning_rate": 9.536116670483307e-06, "loss": 0.6624, "step": 20652 }, { "epoch": 0.53, "grad_norm": 6.864732265472412, "learning_rate": 9.535287422882774e-06, "loss": 0.8284, "step": 20653 }, { "epoch": 0.53, "grad_norm": 1.7502193450927734, "learning_rate": 9.534458178484746e-06, "loss": 0.493, "step": 20654 }, { "epoch": 0.53, "grad_norm": 2.801384449005127, "learning_rate": 9.53362893729494e-06, "loss": 0.5818, "step": 20655 }, { "epoch": 0.53, "grad_norm": 2.320091485977173, "learning_rate": 9.53279969931907e-06, "loss": 0.7171, "step": 20656 }, { "epoch": 0.53, "grad_norm": 0.9925020933151245, "learning_rate": 9.531970464562853e-06, "loss": 0.4158, "step": 20657 }, { "epoch": 0.53, "grad_norm": 3.0134923458099365, "learning_rate": 9.531141233032e-06, "loss": 0.6199, "step": 20658 }, { "epoch": 0.53, "grad_norm": 1.6057007312774658, "learning_rate": 9.530312004732226e-06, "loss": 0.4188, "step": 20659 }, { "epoch": 0.53, "grad_norm": 1.7618085145950317, "learning_rate": 9.529482779669247e-06, "loss": 0.4836, "step": 20660 }, { "epoch": 0.53, "grad_norm": 3.1292998790740967, "learning_rate": 9.52865355784878e-06, "loss": 0.6709, "step": 20661 }, { "epoch": 0.53, "grad_norm": 13.171121597290039, "learning_rate": 9.527824339276532e-06, "loss": 0.6721, "step": 20662 }, { "epoch": 0.53, "grad_norm": 1.3563424348831177, "learning_rate": 9.526995123958225e-06, "loss": 0.4591, "step": 20663 }, { "epoch": 0.53, "grad_norm": 1.5568606853485107, "learning_rate": 9.52616591189957e-06, "loss": 0.5296, "step": 20664 }, { "epoch": 0.53, "grad_norm": 3.0847280025482178, "learning_rate": 9.52533670310628e-06, "loss": 0.7481, "step": 20665 }, { "epoch": 0.53, "grad_norm": 1.2069082260131836, "learning_rate": 9.524507497584075e-06, "loss": 0.6244, "step": 20666 }, { "epoch": 0.53, "grad_norm": 1.4413632154464722, "learning_rate": 9.523678295338662e-06, "loss": 0.5751, "step": 20667 }, { "epoch": 0.53, "grad_norm": 1.6528772115707397, "learning_rate": 9.522849096375762e-06, "loss": 0.5783, "step": 20668 }, { "epoch": 0.53, "grad_norm": 5.5315704345703125, "learning_rate": 9.522019900701083e-06, "loss": 0.6918, "step": 20669 }, { "epoch": 0.53, "grad_norm": 2.6847383975982666, "learning_rate": 9.521190708320345e-06, "loss": 0.4859, "step": 20670 }, { "epoch": 0.53, "grad_norm": 2.2448227405548096, "learning_rate": 9.520361519239258e-06, "loss": 0.6645, "step": 20671 }, { "epoch": 0.53, "grad_norm": 2.856984853744507, "learning_rate": 9.51953233346354e-06, "loss": 0.5984, "step": 20672 }, { "epoch": 0.53, "grad_norm": 1.1591066122055054, "learning_rate": 9.518703150998902e-06, "loss": 0.4433, "step": 20673 }, { "epoch": 0.53, "grad_norm": 1.7550512552261353, "learning_rate": 9.517873971851059e-06, "loss": 0.5104, "step": 20674 }, { "epoch": 0.53, "grad_norm": 1.6084064245224, "learning_rate": 9.517044796025729e-06, "loss": 0.4529, "step": 20675 }, { "epoch": 0.53, "grad_norm": 2.042186737060547, "learning_rate": 9.516215623528618e-06, "loss": 0.3971, "step": 20676 }, { "epoch": 0.53, "grad_norm": 2.20269513130188, "learning_rate": 9.515386454365448e-06, "loss": 0.5823, "step": 20677 }, { "epoch": 0.53, "grad_norm": 1.6127859354019165, "learning_rate": 9.51455728854193e-06, "loss": 0.5109, "step": 20678 }, { "epoch": 0.53, "grad_norm": 10.137473106384277, "learning_rate": 9.513728126063776e-06, "loss": 0.6282, "step": 20679 }, { "epoch": 0.53, "grad_norm": 1.6012743711471558, "learning_rate": 9.512898966936705e-06, "loss": 0.524, "step": 20680 }, { "epoch": 0.53, "grad_norm": 1.55226731300354, "learning_rate": 9.512069811166427e-06, "loss": 0.5666, "step": 20681 }, { "epoch": 0.53, "grad_norm": 3.1741678714752197, "learning_rate": 9.511240658758657e-06, "loss": 0.7877, "step": 20682 }, { "epoch": 0.53, "grad_norm": 1.37396240234375, "learning_rate": 9.510411509719111e-06, "loss": 0.6591, "step": 20683 }, { "epoch": 0.53, "grad_norm": 1.5647826194763184, "learning_rate": 9.509582364053499e-06, "loss": 0.4764, "step": 20684 }, { "epoch": 0.53, "grad_norm": 0.9246823787689209, "learning_rate": 9.50875322176754e-06, "loss": 0.5162, "step": 20685 }, { "epoch": 0.53, "grad_norm": 2.3662657737731934, "learning_rate": 9.507924082866942e-06, "loss": 0.625, "step": 20686 }, { "epoch": 0.53, "grad_norm": 1.626846194267273, "learning_rate": 9.507094947357425e-06, "loss": 0.7471, "step": 20687 }, { "epoch": 0.53, "grad_norm": 1.1361953020095825, "learning_rate": 9.506265815244697e-06, "loss": 0.5429, "step": 20688 }, { "epoch": 0.53, "grad_norm": 4.766388893127441, "learning_rate": 9.505436686534475e-06, "loss": 0.6347, "step": 20689 }, { "epoch": 0.53, "grad_norm": 5.170994758605957, "learning_rate": 9.504607561232477e-06, "loss": 0.5204, "step": 20690 }, { "epoch": 0.53, "grad_norm": 1.597782850265503, "learning_rate": 9.503778439344406e-06, "loss": 0.5475, "step": 20691 }, { "epoch": 0.53, "grad_norm": 9.55129337310791, "learning_rate": 9.502949320875988e-06, "loss": 0.5019, "step": 20692 }, { "epoch": 0.53, "grad_norm": 3.261870861053467, "learning_rate": 9.502120205832928e-06, "loss": 0.5465, "step": 20693 }, { "epoch": 0.53, "grad_norm": 1.6972216367721558, "learning_rate": 9.501291094220945e-06, "loss": 0.6331, "step": 20694 }, { "epoch": 0.53, "grad_norm": 1.343286395072937, "learning_rate": 9.500461986045746e-06, "loss": 0.6002, "step": 20695 }, { "epoch": 0.53, "grad_norm": 2.1039209365844727, "learning_rate": 9.499632881313052e-06, "loss": 0.6345, "step": 20696 }, { "epoch": 0.53, "grad_norm": 1.2278845310211182, "learning_rate": 9.498803780028577e-06, "loss": 0.473, "step": 20697 }, { "epoch": 0.53, "grad_norm": 1.112028956413269, "learning_rate": 9.497974682198025e-06, "loss": 0.5457, "step": 20698 }, { "epoch": 0.53, "grad_norm": 1.4478570222854614, "learning_rate": 9.49714558782712e-06, "loss": 0.6648, "step": 20699 }, { "epoch": 0.53, "grad_norm": 2.4193859100341797, "learning_rate": 9.496316496921577e-06, "loss": 0.5572, "step": 20700 }, { "epoch": 0.53, "grad_norm": 1.2131433486938477, "learning_rate": 9.495487409487096e-06, "loss": 0.5239, "step": 20701 }, { "epoch": 0.53, "grad_norm": 1.1147722005844116, "learning_rate": 9.494658325529401e-06, "loss": 0.5035, "step": 20702 }, { "epoch": 0.53, "grad_norm": 1.0464683771133423, "learning_rate": 9.493829245054204e-06, "loss": 0.5328, "step": 20703 }, { "epoch": 0.53, "grad_norm": 1.2883809804916382, "learning_rate": 9.493000168067217e-06, "loss": 0.6289, "step": 20704 }, { "epoch": 0.53, "grad_norm": 3.917653799057007, "learning_rate": 9.492171094574155e-06, "loss": 0.6308, "step": 20705 }, { "epoch": 0.53, "grad_norm": 1.940056324005127, "learning_rate": 9.49134202458073e-06, "loss": 0.6401, "step": 20706 }, { "epoch": 0.53, "grad_norm": 2.5407612323760986, "learning_rate": 9.490512958092656e-06, "loss": 0.5521, "step": 20707 }, { "epoch": 0.53, "grad_norm": 2.292459726333618, "learning_rate": 9.489683895115648e-06, "loss": 0.5323, "step": 20708 }, { "epoch": 0.53, "grad_norm": 1.3923554420471191, "learning_rate": 9.488854835655417e-06, "loss": 0.5244, "step": 20709 }, { "epoch": 0.53, "grad_norm": 3.5857620239257812, "learning_rate": 9.488025779717678e-06, "loss": 0.6859, "step": 20710 }, { "epoch": 0.53, "grad_norm": 2.095167875289917, "learning_rate": 9.487196727308142e-06, "loss": 0.5194, "step": 20711 }, { "epoch": 0.53, "grad_norm": 1.6518083810806274, "learning_rate": 9.486367678432526e-06, "loss": 0.5844, "step": 20712 }, { "epoch": 0.53, "grad_norm": 2.066075086593628, "learning_rate": 9.48553863309654e-06, "loss": 0.6171, "step": 20713 }, { "epoch": 0.53, "grad_norm": 4.0136518478393555, "learning_rate": 9.484709591305898e-06, "loss": 0.5613, "step": 20714 }, { "epoch": 0.53, "grad_norm": 4.91719913482666, "learning_rate": 9.483880553066315e-06, "loss": 0.585, "step": 20715 }, { "epoch": 0.53, "grad_norm": 8.69449234008789, "learning_rate": 9.483051518383503e-06, "loss": 0.7264, "step": 20716 }, { "epoch": 0.53, "grad_norm": 4.428665637969971, "learning_rate": 9.482222487263176e-06, "loss": 0.4637, "step": 20717 }, { "epoch": 0.53, "grad_norm": 3.722999334335327, "learning_rate": 9.481393459711045e-06, "loss": 0.634, "step": 20718 }, { "epoch": 0.53, "grad_norm": 2.1549274921417236, "learning_rate": 9.480564435732823e-06, "loss": 0.5239, "step": 20719 }, { "epoch": 0.53, "grad_norm": 1.5624480247497559, "learning_rate": 9.479735415334228e-06, "loss": 0.458, "step": 20720 }, { "epoch": 0.53, "grad_norm": 4.022721767425537, "learning_rate": 9.478906398520966e-06, "loss": 0.6525, "step": 20721 }, { "epoch": 0.53, "grad_norm": 4.500644683837891, "learning_rate": 9.478077385298757e-06, "loss": 0.5108, "step": 20722 }, { "epoch": 0.53, "grad_norm": 1.3838351964950562, "learning_rate": 9.477248375673308e-06, "loss": 0.6342, "step": 20723 }, { "epoch": 0.53, "grad_norm": 1.5837339162826538, "learning_rate": 9.476419369650338e-06, "loss": 0.5191, "step": 20724 }, { "epoch": 0.53, "grad_norm": 1.439971923828125, "learning_rate": 9.475590367235555e-06, "loss": 0.4161, "step": 20725 }, { "epoch": 0.53, "grad_norm": 1.4614924192428589, "learning_rate": 9.474761368434671e-06, "loss": 0.5183, "step": 20726 }, { "epoch": 0.53, "grad_norm": 1.2439241409301758, "learning_rate": 9.473932373253406e-06, "loss": 0.5279, "step": 20727 }, { "epoch": 0.53, "grad_norm": 1.972264289855957, "learning_rate": 9.473103381697467e-06, "loss": 0.507, "step": 20728 }, { "epoch": 0.53, "grad_norm": 1.5366028547286987, "learning_rate": 9.472274393772567e-06, "loss": 0.432, "step": 20729 }, { "epoch": 0.53, "grad_norm": 2.2554478645324707, "learning_rate": 9.471445409484423e-06, "loss": 0.5609, "step": 20730 }, { "epoch": 0.53, "grad_norm": 1.323490858078003, "learning_rate": 9.470616428838742e-06, "loss": 0.4508, "step": 20731 }, { "epoch": 0.53, "grad_norm": 7.482113838195801, "learning_rate": 9.469787451841243e-06, "loss": 0.7056, "step": 20732 }, { "epoch": 0.53, "grad_norm": 2.642915725708008, "learning_rate": 9.468958478497633e-06, "loss": 0.6446, "step": 20733 }, { "epoch": 0.53, "grad_norm": 2.493380546569824, "learning_rate": 9.468129508813631e-06, "loss": 0.7193, "step": 20734 }, { "epoch": 0.53, "grad_norm": 1.362327218055725, "learning_rate": 9.467300542794943e-06, "loss": 0.3927, "step": 20735 }, { "epoch": 0.53, "grad_norm": 1.1586514711380005, "learning_rate": 9.466471580447286e-06, "loss": 0.3326, "step": 20736 }, { "epoch": 0.53, "grad_norm": 6.5170111656188965, "learning_rate": 9.465642621776373e-06, "loss": 0.6756, "step": 20737 }, { "epoch": 0.53, "grad_norm": 2.7386350631713867, "learning_rate": 9.464813666787912e-06, "loss": 0.3746, "step": 20738 }, { "epoch": 0.53, "grad_norm": 1.7747032642364502, "learning_rate": 9.463984715487623e-06, "loss": 0.5238, "step": 20739 }, { "epoch": 0.53, "grad_norm": 1.6222188472747803, "learning_rate": 9.463155767881213e-06, "loss": 0.4446, "step": 20740 }, { "epoch": 0.53, "grad_norm": 3.3220114707946777, "learning_rate": 9.462326823974397e-06, "loss": 0.7186, "step": 20741 }, { "epoch": 0.53, "grad_norm": 1.787315845489502, "learning_rate": 9.461497883772884e-06, "loss": 0.568, "step": 20742 }, { "epoch": 0.53, "grad_norm": 2.7837226390838623, "learning_rate": 9.46066894728239e-06, "loss": 0.5088, "step": 20743 }, { "epoch": 0.53, "grad_norm": 8.509963989257812, "learning_rate": 9.45984001450863e-06, "loss": 0.5667, "step": 20744 }, { "epoch": 0.53, "grad_norm": 2.2867674827575684, "learning_rate": 9.459011085457309e-06, "loss": 0.5797, "step": 20745 }, { "epoch": 0.53, "grad_norm": 1.6890356540679932, "learning_rate": 9.458182160134148e-06, "loss": 0.4575, "step": 20746 }, { "epoch": 0.53, "grad_norm": 1.258049488067627, "learning_rate": 9.457353238544853e-06, "loss": 0.5647, "step": 20747 }, { "epoch": 0.53, "grad_norm": 1.0910756587982178, "learning_rate": 9.456524320695136e-06, "loss": 0.6588, "step": 20748 }, { "epoch": 0.53, "grad_norm": 1.5983672142028809, "learning_rate": 9.455695406590715e-06, "loss": 0.5628, "step": 20749 }, { "epoch": 0.53, "grad_norm": 1.5818781852722168, "learning_rate": 9.454866496237298e-06, "loss": 0.3977, "step": 20750 }, { "epoch": 0.53, "grad_norm": 4.743034839630127, "learning_rate": 9.454037589640601e-06, "loss": 0.6572, "step": 20751 }, { "epoch": 0.53, "grad_norm": 0.9418217539787292, "learning_rate": 9.453208686806331e-06, "loss": 0.327, "step": 20752 }, { "epoch": 0.53, "grad_norm": 5.761138916015625, "learning_rate": 9.452379787740206e-06, "loss": 0.5577, "step": 20753 }, { "epoch": 0.53, "grad_norm": 1.8792035579681396, "learning_rate": 9.451550892447933e-06, "loss": 0.4876, "step": 20754 }, { "epoch": 0.53, "grad_norm": 1.5042351484298706, "learning_rate": 9.450722000935227e-06, "loss": 0.5152, "step": 20755 }, { "epoch": 0.53, "grad_norm": 1.423493504524231, "learning_rate": 9.449893113207802e-06, "loss": 0.4353, "step": 20756 }, { "epoch": 0.53, "grad_norm": 1.5304728746414185, "learning_rate": 9.449064229271365e-06, "loss": 0.4825, "step": 20757 }, { "epoch": 0.53, "grad_norm": 1.7898199558258057, "learning_rate": 9.448235349131634e-06, "loss": 0.5757, "step": 20758 }, { "epoch": 0.53, "grad_norm": 1.4505035877227783, "learning_rate": 9.447406472794316e-06, "loss": 0.679, "step": 20759 }, { "epoch": 0.53, "grad_norm": 2.510380744934082, "learning_rate": 9.446577600265125e-06, "loss": 0.8523, "step": 20760 }, { "epoch": 0.53, "grad_norm": 8.777914047241211, "learning_rate": 9.445748731549776e-06, "loss": 0.6553, "step": 20761 }, { "epoch": 0.53, "grad_norm": 8.19636344909668, "learning_rate": 9.444919866653978e-06, "loss": 0.5904, "step": 20762 }, { "epoch": 0.53, "grad_norm": 7.823781490325928, "learning_rate": 9.444091005583443e-06, "loss": 0.4665, "step": 20763 }, { "epoch": 0.53, "grad_norm": 1.4949109554290771, "learning_rate": 9.443262148343883e-06, "loss": 0.5565, "step": 20764 }, { "epoch": 0.53, "grad_norm": 1.6368318796157837, "learning_rate": 9.442433294941012e-06, "loss": 0.6698, "step": 20765 }, { "epoch": 0.53, "grad_norm": 2.0358023643493652, "learning_rate": 9.44160444538054e-06, "loss": 0.6363, "step": 20766 }, { "epoch": 0.53, "grad_norm": 1.702489972114563, "learning_rate": 9.440775599668177e-06, "loss": 0.5338, "step": 20767 }, { "epoch": 0.53, "grad_norm": 1.692725419998169, "learning_rate": 9.43994675780964e-06, "loss": 0.6839, "step": 20768 }, { "epoch": 0.53, "grad_norm": 4.364685535430908, "learning_rate": 9.439117919810637e-06, "loss": 0.4734, "step": 20769 }, { "epoch": 0.53, "grad_norm": 1.2103509902954102, "learning_rate": 9.438289085676882e-06, "loss": 0.4148, "step": 20770 }, { "epoch": 0.53, "grad_norm": 1.5165987014770508, "learning_rate": 9.437460255414084e-06, "loss": 0.5553, "step": 20771 }, { "epoch": 0.53, "grad_norm": 1.0594137907028198, "learning_rate": 9.436631429027956e-06, "loss": 0.3676, "step": 20772 }, { "epoch": 0.53, "grad_norm": 2.736898899078369, "learning_rate": 9.435802606524212e-06, "loss": 0.6785, "step": 20773 }, { "epoch": 0.53, "grad_norm": 1.0674673318862915, "learning_rate": 9.434973787908562e-06, "loss": 0.4647, "step": 20774 }, { "epoch": 0.53, "grad_norm": 1.4479997158050537, "learning_rate": 9.434144973186718e-06, "loss": 0.5202, "step": 20775 }, { "epoch": 0.53, "grad_norm": 1.7183102369308472, "learning_rate": 9.433316162364388e-06, "loss": 0.4814, "step": 20776 }, { "epoch": 0.53, "grad_norm": 1.3333290815353394, "learning_rate": 9.432487355447293e-06, "loss": 0.5671, "step": 20777 }, { "epoch": 0.53, "grad_norm": 2.0182576179504395, "learning_rate": 9.431658552441132e-06, "loss": 0.4274, "step": 20778 }, { "epoch": 0.53, "grad_norm": 1.9657987356185913, "learning_rate": 9.430829753351627e-06, "loss": 0.5457, "step": 20779 }, { "epoch": 0.53, "grad_norm": 1.3145402669906616, "learning_rate": 9.430000958184486e-06, "loss": 0.5526, "step": 20780 }, { "epoch": 0.53, "grad_norm": 7.240925312042236, "learning_rate": 9.429172166945418e-06, "loss": 0.4777, "step": 20781 }, { "epoch": 0.53, "grad_norm": 1.4754074811935425, "learning_rate": 9.428343379640138e-06, "loss": 0.4432, "step": 20782 }, { "epoch": 0.53, "grad_norm": 1.5627676248550415, "learning_rate": 9.427514596274356e-06, "loss": 0.5278, "step": 20783 }, { "epoch": 0.53, "grad_norm": 1.8140891790390015, "learning_rate": 9.426685816853785e-06, "loss": 0.5393, "step": 20784 }, { "epoch": 0.53, "grad_norm": 1.3931993246078491, "learning_rate": 9.425857041384134e-06, "loss": 0.541, "step": 20785 }, { "epoch": 0.53, "grad_norm": 2.6134815216064453, "learning_rate": 9.425028269871114e-06, "loss": 0.5868, "step": 20786 }, { "epoch": 0.53, "grad_norm": 1.4289990663528442, "learning_rate": 9.424199502320442e-06, "loss": 0.4166, "step": 20787 }, { "epoch": 0.53, "grad_norm": 3.1283581256866455, "learning_rate": 9.423370738737823e-06, "loss": 0.7474, "step": 20788 }, { "epoch": 0.53, "grad_norm": 1.0978444814682007, "learning_rate": 9.422541979128973e-06, "loss": 0.3228, "step": 20789 }, { "epoch": 0.53, "grad_norm": 1.1031737327575684, "learning_rate": 9.421713223499596e-06, "loss": 0.4516, "step": 20790 }, { "epoch": 0.53, "grad_norm": 1.3783869743347168, "learning_rate": 9.42088447185541e-06, "loss": 0.5275, "step": 20791 }, { "epoch": 0.53, "grad_norm": 1.7399011850357056, "learning_rate": 9.420055724202126e-06, "loss": 0.5151, "step": 20792 }, { "epoch": 0.53, "grad_norm": 1.6667473316192627, "learning_rate": 9.419226980545452e-06, "loss": 0.6773, "step": 20793 }, { "epoch": 0.53, "grad_norm": 4.937415599822998, "learning_rate": 9.418398240891102e-06, "loss": 0.375, "step": 20794 }, { "epoch": 0.53, "grad_norm": 3.8800506591796875, "learning_rate": 9.417569505244785e-06, "loss": 0.6578, "step": 20795 }, { "epoch": 0.53, "grad_norm": 3.4958832263946533, "learning_rate": 9.416740773612211e-06, "loss": 0.5462, "step": 20796 }, { "epoch": 0.53, "grad_norm": 2.3969807624816895, "learning_rate": 9.415912045999099e-06, "loss": 0.6058, "step": 20797 }, { "epoch": 0.53, "grad_norm": 9.974592208862305, "learning_rate": 9.415083322411149e-06, "loss": 0.795, "step": 20798 }, { "epoch": 0.53, "grad_norm": 2.0260467529296875, "learning_rate": 9.41425460285408e-06, "loss": 0.5432, "step": 20799 }, { "epoch": 0.53, "grad_norm": 2.4929513931274414, "learning_rate": 9.413425887333598e-06, "loss": 0.5604, "step": 20800 }, { "epoch": 0.53, "grad_norm": 1.005890130996704, "learning_rate": 9.412597175855418e-06, "loss": 0.5564, "step": 20801 }, { "epoch": 0.53, "grad_norm": 6.233622074127197, "learning_rate": 9.411768468425248e-06, "loss": 0.5813, "step": 20802 }, { "epoch": 0.53, "grad_norm": 6.533484935760498, "learning_rate": 9.410939765048801e-06, "loss": 0.5792, "step": 20803 }, { "epoch": 0.53, "grad_norm": 7.7918572425842285, "learning_rate": 9.410111065731788e-06, "loss": 0.4991, "step": 20804 }, { "epoch": 0.53, "grad_norm": 3.7186965942382812, "learning_rate": 9.409282370479918e-06, "loss": 0.6033, "step": 20805 }, { "epoch": 0.53, "grad_norm": 1.196913242340088, "learning_rate": 9.408453679298904e-06, "loss": 0.5358, "step": 20806 }, { "epoch": 0.53, "grad_norm": 1.2980740070343018, "learning_rate": 9.407624992194454e-06, "loss": 0.5863, "step": 20807 }, { "epoch": 0.53, "grad_norm": 2.307589054107666, "learning_rate": 9.406796309172278e-06, "loss": 0.6529, "step": 20808 }, { "epoch": 0.53, "grad_norm": 1.6090328693389893, "learning_rate": 9.405967630238092e-06, "loss": 0.5781, "step": 20809 }, { "epoch": 0.53, "grad_norm": 1.1481002569198608, "learning_rate": 9.405138955397604e-06, "loss": 0.5544, "step": 20810 }, { "epoch": 0.53, "grad_norm": 3.292081594467163, "learning_rate": 9.404310284656526e-06, "loss": 0.5638, "step": 20811 }, { "epoch": 0.53, "grad_norm": 1.9370852708816528, "learning_rate": 9.403481618020565e-06, "loss": 0.5089, "step": 20812 }, { "epoch": 0.53, "grad_norm": 2.463697910308838, "learning_rate": 9.402652955495432e-06, "loss": 0.6143, "step": 20813 }, { "epoch": 0.53, "grad_norm": 0.9797117114067078, "learning_rate": 9.401824297086844e-06, "loss": 0.2944, "step": 20814 }, { "epoch": 0.53, "grad_norm": 2.033539295196533, "learning_rate": 9.400995642800505e-06, "loss": 0.4917, "step": 20815 }, { "epoch": 0.53, "grad_norm": 2.1680986881256104, "learning_rate": 9.40016699264213e-06, "loss": 0.6901, "step": 20816 }, { "epoch": 0.53, "grad_norm": 1.7096227407455444, "learning_rate": 9.399338346617423e-06, "loss": 0.562, "step": 20817 }, { "epoch": 0.53, "grad_norm": 10.19101333618164, "learning_rate": 9.398509704732101e-06, "loss": 0.8032, "step": 20818 }, { "epoch": 0.53, "grad_norm": 3.09307861328125, "learning_rate": 9.397681066991871e-06, "loss": 0.6561, "step": 20819 }, { "epoch": 0.53, "grad_norm": 3.7491095066070557, "learning_rate": 9.396852433402446e-06, "loss": 0.6867, "step": 20820 }, { "epoch": 0.53, "grad_norm": 1.2879375219345093, "learning_rate": 9.396023803969535e-06, "loss": 0.6155, "step": 20821 }, { "epoch": 0.53, "grad_norm": 2.508155107498169, "learning_rate": 9.395195178698848e-06, "loss": 0.523, "step": 20822 }, { "epoch": 0.53, "grad_norm": 1.6428905725479126, "learning_rate": 9.394366557596097e-06, "loss": 0.5962, "step": 20823 }, { "epoch": 0.53, "grad_norm": 1.333587408065796, "learning_rate": 9.39353794066699e-06, "loss": 0.6911, "step": 20824 }, { "epoch": 0.53, "grad_norm": 1.1508225202560425, "learning_rate": 9.392709327917237e-06, "loss": 0.5644, "step": 20825 }, { "epoch": 0.53, "grad_norm": 4.989583492279053, "learning_rate": 9.391880719352554e-06, "loss": 0.4725, "step": 20826 }, { "epoch": 0.53, "grad_norm": 3.762275218963623, "learning_rate": 9.391052114978644e-06, "loss": 0.5986, "step": 20827 }, { "epoch": 0.53, "grad_norm": 2.072085380554199, "learning_rate": 9.390223514801223e-06, "loss": 0.6154, "step": 20828 }, { "epoch": 0.53, "grad_norm": 2.719604015350342, "learning_rate": 9.389394918825995e-06, "loss": 0.5843, "step": 20829 }, { "epoch": 0.53, "grad_norm": 3.6794517040252686, "learning_rate": 9.388566327058676e-06, "loss": 0.5896, "step": 20830 }, { "epoch": 0.53, "grad_norm": 1.8369847536087036, "learning_rate": 9.38773773950497e-06, "loss": 0.6004, "step": 20831 }, { "epoch": 0.53, "grad_norm": 1.5977492332458496, "learning_rate": 9.386909156170595e-06, "loss": 0.5041, "step": 20832 }, { "epoch": 0.53, "grad_norm": 2.512521743774414, "learning_rate": 9.386080577061255e-06, "loss": 0.5098, "step": 20833 }, { "epoch": 0.53, "grad_norm": 1.1379549503326416, "learning_rate": 9.385252002182662e-06, "loss": 0.5952, "step": 20834 }, { "epoch": 0.53, "grad_norm": 4.154743671417236, "learning_rate": 9.384423431540528e-06, "loss": 0.5855, "step": 20835 }, { "epoch": 0.53, "grad_norm": 1.2098442316055298, "learning_rate": 9.383594865140558e-06, "loss": 0.5869, "step": 20836 }, { "epoch": 0.53, "grad_norm": 7.54318380355835, "learning_rate": 9.382766302988466e-06, "loss": 0.5835, "step": 20837 }, { "epoch": 0.53, "grad_norm": 1.558778166770935, "learning_rate": 9.381937745089962e-06, "loss": 0.4953, "step": 20838 }, { "epoch": 0.53, "grad_norm": 1.5740817785263062, "learning_rate": 9.381109191450753e-06, "loss": 0.5006, "step": 20839 }, { "epoch": 0.53, "grad_norm": 1.0153870582580566, "learning_rate": 9.380280642076552e-06, "loss": 0.5461, "step": 20840 }, { "epoch": 0.53, "grad_norm": 2.4217817783355713, "learning_rate": 9.379452096973066e-06, "loss": 0.6172, "step": 20841 }, { "epoch": 0.53, "grad_norm": 1.921887993812561, "learning_rate": 9.378623556146009e-06, "loss": 0.4834, "step": 20842 }, { "epoch": 0.53, "grad_norm": 5.752786159515381, "learning_rate": 9.377795019601084e-06, "loss": 0.5492, "step": 20843 }, { "epoch": 0.53, "grad_norm": 2.079270124435425, "learning_rate": 9.376966487344007e-06, "loss": 0.445, "step": 20844 }, { "epoch": 0.53, "grad_norm": 5.931550979614258, "learning_rate": 9.376137959380486e-06, "loss": 0.3922, "step": 20845 }, { "epoch": 0.53, "grad_norm": 1.5385290384292603, "learning_rate": 9.37530943571623e-06, "loss": 0.4716, "step": 20846 }, { "epoch": 0.53, "grad_norm": 2.330200672149658, "learning_rate": 9.374480916356951e-06, "loss": 0.603, "step": 20847 }, { "epoch": 0.53, "grad_norm": 3.906093120574951, "learning_rate": 9.373652401308352e-06, "loss": 0.5826, "step": 20848 }, { "epoch": 0.53, "grad_norm": 7.0301289558410645, "learning_rate": 9.372823890576149e-06, "loss": 0.5618, "step": 20849 }, { "epoch": 0.53, "grad_norm": 5.157600402832031, "learning_rate": 9.371995384166051e-06, "loss": 0.6823, "step": 20850 }, { "epoch": 0.53, "grad_norm": 1.574841856956482, "learning_rate": 9.371166882083765e-06, "loss": 0.5811, "step": 20851 }, { "epoch": 0.53, "grad_norm": 1.5834810733795166, "learning_rate": 9.370338384335004e-06, "loss": 0.5879, "step": 20852 }, { "epoch": 0.53, "grad_norm": 4.560768127441406, "learning_rate": 9.369509890925471e-06, "loss": 0.7327, "step": 20853 }, { "epoch": 0.53, "grad_norm": 1.675927758216858, "learning_rate": 9.368681401860882e-06, "loss": 0.592, "step": 20854 }, { "epoch": 0.53, "grad_norm": 1.8306634426116943, "learning_rate": 9.367852917146947e-06, "loss": 0.5991, "step": 20855 }, { "epoch": 0.53, "grad_norm": 3.3933610916137695, "learning_rate": 9.367024436789367e-06, "loss": 0.5816, "step": 20856 }, { "epoch": 0.53, "grad_norm": 1.7779775857925415, "learning_rate": 9.366195960793863e-06, "loss": 0.5677, "step": 20857 }, { "epoch": 0.53, "grad_norm": 2.402102470397949, "learning_rate": 9.365367489166133e-06, "loss": 0.6136, "step": 20858 }, { "epoch": 0.53, "grad_norm": 1.5595338344573975, "learning_rate": 9.364539021911895e-06, "loss": 0.6039, "step": 20859 }, { "epoch": 0.53, "grad_norm": 2.7103235721588135, "learning_rate": 9.363710559036853e-06, "loss": 0.4573, "step": 20860 }, { "epoch": 0.53, "grad_norm": 1.5179331302642822, "learning_rate": 9.362882100546718e-06, "loss": 0.4637, "step": 20861 }, { "epoch": 0.53, "grad_norm": 2.220757007598877, "learning_rate": 9.3620536464472e-06, "loss": 0.6157, "step": 20862 }, { "epoch": 0.53, "grad_norm": 1.6622923612594604, "learning_rate": 9.361225196744006e-06, "loss": 0.415, "step": 20863 }, { "epoch": 0.53, "grad_norm": 7.504933834075928, "learning_rate": 9.36039675144285e-06, "loss": 0.5988, "step": 20864 }, { "epoch": 0.53, "grad_norm": 1.8662348985671997, "learning_rate": 9.359568310549434e-06, "loss": 0.4895, "step": 20865 }, { "epoch": 0.53, "grad_norm": 1.601670503616333, "learning_rate": 9.358739874069472e-06, "loss": 0.6834, "step": 20866 }, { "epoch": 0.53, "grad_norm": 1.1898612976074219, "learning_rate": 9.357911442008674e-06, "loss": 0.4254, "step": 20867 }, { "epoch": 0.53, "grad_norm": 3.3149940967559814, "learning_rate": 9.357083014372744e-06, "loss": 0.8275, "step": 20868 }, { "epoch": 0.53, "grad_norm": 2.540076732635498, "learning_rate": 9.356254591167396e-06, "loss": 0.6629, "step": 20869 }, { "epoch": 0.53, "grad_norm": 1.9131112098693848, "learning_rate": 9.355426172398337e-06, "loss": 0.6258, "step": 20870 }, { "epoch": 0.53, "grad_norm": 1.3478413820266724, "learning_rate": 9.354597758071276e-06, "loss": 0.4595, "step": 20871 }, { "epoch": 0.53, "grad_norm": 14.39721965789795, "learning_rate": 9.35376934819192e-06, "loss": 0.4494, "step": 20872 }, { "epoch": 0.53, "grad_norm": 1.4587355852127075, "learning_rate": 9.352940942765979e-06, "loss": 0.5517, "step": 20873 }, { "epoch": 0.54, "grad_norm": 1.5512417554855347, "learning_rate": 9.352112541799166e-06, "loss": 0.4942, "step": 20874 }, { "epoch": 0.54, "grad_norm": 4.924829006195068, "learning_rate": 9.351284145297183e-06, "loss": 0.4979, "step": 20875 }, { "epoch": 0.54, "grad_norm": 5.496407508850098, "learning_rate": 9.350455753265744e-06, "loss": 0.5748, "step": 20876 }, { "epoch": 0.54, "grad_norm": 5.645120143890381, "learning_rate": 9.349627365710554e-06, "loss": 0.6678, "step": 20877 }, { "epoch": 0.54, "grad_norm": 1.6985633373260498, "learning_rate": 9.348798982637324e-06, "loss": 0.6427, "step": 20878 }, { "epoch": 0.54, "grad_norm": 1.4068983793258667, "learning_rate": 9.347970604051765e-06, "loss": 0.6367, "step": 20879 }, { "epoch": 0.54, "grad_norm": 4.061779975891113, "learning_rate": 9.34714222995958e-06, "loss": 0.5237, "step": 20880 }, { "epoch": 0.54, "grad_norm": 1.6103302240371704, "learning_rate": 9.346313860366482e-06, "loss": 0.5709, "step": 20881 }, { "epoch": 0.54, "grad_norm": 2.332164764404297, "learning_rate": 9.345485495278177e-06, "loss": 0.5886, "step": 20882 }, { "epoch": 0.54, "grad_norm": 2.0614404678344727, "learning_rate": 9.344657134700378e-06, "loss": 0.6236, "step": 20883 }, { "epoch": 0.54, "grad_norm": 1.9320653676986694, "learning_rate": 9.343828778638786e-06, "loss": 0.4994, "step": 20884 }, { "epoch": 0.54, "grad_norm": 1.6231697797775269, "learning_rate": 9.343000427099115e-06, "loss": 0.5, "step": 20885 }, { "epoch": 0.54, "grad_norm": 1.231300950050354, "learning_rate": 9.342172080087074e-06, "loss": 0.5672, "step": 20886 }, { "epoch": 0.54, "grad_norm": 1.5135409832000732, "learning_rate": 9.341343737608368e-06, "loss": 0.7402, "step": 20887 }, { "epoch": 0.54, "grad_norm": 1.8167930841445923, "learning_rate": 9.340515399668709e-06, "loss": 0.5917, "step": 20888 }, { "epoch": 0.54, "grad_norm": 1.6180212497711182, "learning_rate": 9.339687066273802e-06, "loss": 0.4886, "step": 20889 }, { "epoch": 0.54, "grad_norm": 0.8525639772415161, "learning_rate": 9.338858737429355e-06, "loss": 0.4155, "step": 20890 }, { "epoch": 0.54, "grad_norm": 3.1212947368621826, "learning_rate": 9.338030413141084e-06, "loss": 0.4623, "step": 20891 }, { "epoch": 0.54, "grad_norm": 1.8246331214904785, "learning_rate": 9.337202093414686e-06, "loss": 0.5031, "step": 20892 }, { "epoch": 0.54, "grad_norm": 14.84946060180664, "learning_rate": 9.336373778255878e-06, "loss": 0.6438, "step": 20893 }, { "epoch": 0.54, "grad_norm": 3.038348913192749, "learning_rate": 9.335545467670364e-06, "loss": 0.4929, "step": 20894 }, { "epoch": 0.54, "grad_norm": 1.6892179250717163, "learning_rate": 9.334717161663852e-06, "loss": 0.5566, "step": 20895 }, { "epoch": 0.54, "grad_norm": 2.7844488620758057, "learning_rate": 9.333888860242053e-06, "loss": 0.5091, "step": 20896 }, { "epoch": 0.54, "grad_norm": 1.1435104608535767, "learning_rate": 9.333060563410673e-06, "loss": 0.5343, "step": 20897 }, { "epoch": 0.54, "grad_norm": 1.319339632987976, "learning_rate": 9.33223227117542e-06, "loss": 0.4561, "step": 20898 }, { "epoch": 0.54, "grad_norm": 1.4136189222335815, "learning_rate": 9.331403983542002e-06, "loss": 0.5652, "step": 20899 }, { "epoch": 0.54, "grad_norm": 1.1967710256576538, "learning_rate": 9.33057570051613e-06, "loss": 0.5511, "step": 20900 }, { "epoch": 0.54, "grad_norm": 1.9469746351242065, "learning_rate": 9.329747422103508e-06, "loss": 0.5993, "step": 20901 }, { "epoch": 0.54, "grad_norm": 1.19121515750885, "learning_rate": 9.328919148309845e-06, "loss": 0.5429, "step": 20902 }, { "epoch": 0.54, "grad_norm": 3.9713685512542725, "learning_rate": 9.328090879140852e-06, "loss": 0.5535, "step": 20903 }, { "epoch": 0.54, "grad_norm": 7.830660343170166, "learning_rate": 9.327262614602233e-06, "loss": 0.6105, "step": 20904 }, { "epoch": 0.54, "grad_norm": 3.0189497470855713, "learning_rate": 9.326434354699698e-06, "loss": 0.7145, "step": 20905 }, { "epoch": 0.54, "grad_norm": 1.7536942958831787, "learning_rate": 9.325606099438953e-06, "loss": 0.5436, "step": 20906 }, { "epoch": 0.54, "grad_norm": 3.868422508239746, "learning_rate": 9.324777848825707e-06, "loss": 0.6361, "step": 20907 }, { "epoch": 0.54, "grad_norm": 1.8451969623565674, "learning_rate": 9.323949602865671e-06, "loss": 0.5958, "step": 20908 }, { "epoch": 0.54, "grad_norm": 1.5465167760849, "learning_rate": 9.323121361564546e-06, "loss": 0.5235, "step": 20909 }, { "epoch": 0.54, "grad_norm": 6.190786361694336, "learning_rate": 9.322293124928045e-06, "loss": 0.767, "step": 20910 }, { "epoch": 0.54, "grad_norm": 1.5668258666992188, "learning_rate": 9.321464892961874e-06, "loss": 0.5281, "step": 20911 }, { "epoch": 0.54, "grad_norm": 1.681048035621643, "learning_rate": 9.320636665671742e-06, "loss": 0.5858, "step": 20912 }, { "epoch": 0.54, "grad_norm": 3.861377000808716, "learning_rate": 9.319808443063353e-06, "loss": 0.5546, "step": 20913 }, { "epoch": 0.54, "grad_norm": 5.769500732421875, "learning_rate": 9.318980225142417e-06, "loss": 0.7949, "step": 20914 }, { "epoch": 0.54, "grad_norm": 5.636148452758789, "learning_rate": 9.318152011914644e-06, "loss": 0.7288, "step": 20915 }, { "epoch": 0.54, "grad_norm": 1.8848079442977905, "learning_rate": 9.317323803385735e-06, "loss": 0.6517, "step": 20916 }, { "epoch": 0.54, "grad_norm": 5.78288459777832, "learning_rate": 9.316495599561404e-06, "loss": 0.6994, "step": 20917 }, { "epoch": 0.54, "grad_norm": 1.2515846490859985, "learning_rate": 9.315667400447354e-06, "loss": 0.5, "step": 20918 }, { "epoch": 0.54, "grad_norm": 1.7417912483215332, "learning_rate": 9.314839206049295e-06, "loss": 0.3871, "step": 20919 }, { "epoch": 0.54, "grad_norm": 1.8457281589508057, "learning_rate": 9.314011016372936e-06, "loss": 0.6599, "step": 20920 }, { "epoch": 0.54, "grad_norm": 1.7512246370315552, "learning_rate": 9.31318283142398e-06, "loss": 0.5734, "step": 20921 }, { "epoch": 0.54, "grad_norm": 1.344299554824829, "learning_rate": 9.312354651208137e-06, "loss": 0.6533, "step": 20922 }, { "epoch": 0.54, "grad_norm": 1.0881210565567017, "learning_rate": 9.311526475731113e-06, "loss": 0.4788, "step": 20923 }, { "epoch": 0.54, "grad_norm": 1.8829340934753418, "learning_rate": 9.310698304998617e-06, "loss": 0.6661, "step": 20924 }, { "epoch": 0.54, "grad_norm": 3.1404218673706055, "learning_rate": 9.309870139016355e-06, "loss": 0.6865, "step": 20925 }, { "epoch": 0.54, "grad_norm": 2.640676975250244, "learning_rate": 9.309041977790034e-06, "loss": 0.6924, "step": 20926 }, { "epoch": 0.54, "grad_norm": 1.7493232488632202, "learning_rate": 9.308213821325362e-06, "loss": 0.5299, "step": 20927 }, { "epoch": 0.54, "grad_norm": 1.4147759675979614, "learning_rate": 9.307385669628045e-06, "loss": 0.5491, "step": 20928 }, { "epoch": 0.54, "grad_norm": 8.165780067443848, "learning_rate": 9.306557522703794e-06, "loss": 0.4879, "step": 20929 }, { "epoch": 0.54, "grad_norm": 1.238985300064087, "learning_rate": 9.305729380558309e-06, "loss": 0.4817, "step": 20930 }, { "epoch": 0.54, "grad_norm": 1.361258864402771, "learning_rate": 9.3049012431973e-06, "loss": 0.4777, "step": 20931 }, { "epoch": 0.54, "grad_norm": 1.1521245241165161, "learning_rate": 9.30407311062648e-06, "loss": 0.4411, "step": 20932 }, { "epoch": 0.54, "grad_norm": 2.0759458541870117, "learning_rate": 9.303244982851546e-06, "loss": 0.6605, "step": 20933 }, { "epoch": 0.54, "grad_norm": 3.1920783519744873, "learning_rate": 9.302416859878213e-06, "loss": 0.4438, "step": 20934 }, { "epoch": 0.54, "grad_norm": 2.39133882522583, "learning_rate": 9.301588741712182e-06, "loss": 0.4691, "step": 20935 }, { "epoch": 0.54, "grad_norm": 1.310835599899292, "learning_rate": 9.300760628359167e-06, "loss": 0.5476, "step": 20936 }, { "epoch": 0.54, "grad_norm": 1.7807838916778564, "learning_rate": 9.299932519824867e-06, "loss": 0.6148, "step": 20937 }, { "epoch": 0.54, "grad_norm": 1.2604525089263916, "learning_rate": 9.299104416114991e-06, "loss": 0.5191, "step": 20938 }, { "epoch": 0.54, "grad_norm": 1.555864691734314, "learning_rate": 9.29827631723525e-06, "loss": 0.6175, "step": 20939 }, { "epoch": 0.54, "grad_norm": 1.429587721824646, "learning_rate": 9.297448223191345e-06, "loss": 0.5716, "step": 20940 }, { "epoch": 0.54, "grad_norm": 4.02254056930542, "learning_rate": 9.296620133988989e-06, "loss": 0.7728, "step": 20941 }, { "epoch": 0.54, "grad_norm": 1.3406810760498047, "learning_rate": 9.295792049633882e-06, "loss": 0.6944, "step": 20942 }, { "epoch": 0.54, "grad_norm": 1.7072064876556396, "learning_rate": 9.29496397013174e-06, "loss": 0.6283, "step": 20943 }, { "epoch": 0.54, "grad_norm": 5.609112739562988, "learning_rate": 9.294135895488257e-06, "loss": 0.6866, "step": 20944 }, { "epoch": 0.54, "grad_norm": 1.520359992980957, "learning_rate": 9.293307825709147e-06, "loss": 0.5083, "step": 20945 }, { "epoch": 0.54, "grad_norm": 1.017260193824768, "learning_rate": 9.292479760800116e-06, "loss": 0.5552, "step": 20946 }, { "epoch": 0.54, "grad_norm": 1.5333771705627441, "learning_rate": 9.291651700766871e-06, "loss": 0.7208, "step": 20947 }, { "epoch": 0.54, "grad_norm": 4.950769901275635, "learning_rate": 9.290823645615116e-06, "loss": 0.5917, "step": 20948 }, { "epoch": 0.54, "grad_norm": 1.0820549726486206, "learning_rate": 9.289995595350557e-06, "loss": 0.4781, "step": 20949 }, { "epoch": 0.54, "grad_norm": 1.538815975189209, "learning_rate": 9.289167549978907e-06, "loss": 0.5789, "step": 20950 }, { "epoch": 0.54, "grad_norm": 1.6664310693740845, "learning_rate": 9.288339509505863e-06, "loss": 0.5792, "step": 20951 }, { "epoch": 0.54, "grad_norm": 1.9788100719451904, "learning_rate": 9.28751147393714e-06, "loss": 0.6687, "step": 20952 }, { "epoch": 0.54, "grad_norm": 6.033438205718994, "learning_rate": 9.286683443278437e-06, "loss": 0.5326, "step": 20953 }, { "epoch": 0.54, "grad_norm": 1.1919034719467163, "learning_rate": 9.285855417535466e-06, "loss": 0.6892, "step": 20954 }, { "epoch": 0.54, "grad_norm": 1.5438662767410278, "learning_rate": 9.28502739671393e-06, "loss": 0.4509, "step": 20955 }, { "epoch": 0.54, "grad_norm": 5.132479667663574, "learning_rate": 9.284199380819534e-06, "loss": 0.6119, "step": 20956 }, { "epoch": 0.54, "grad_norm": 1.5391497611999512, "learning_rate": 9.283371369857989e-06, "loss": 0.5693, "step": 20957 }, { "epoch": 0.54, "grad_norm": 1.882162094116211, "learning_rate": 9.282543363834995e-06, "loss": 0.6752, "step": 20958 }, { "epoch": 0.54, "grad_norm": 1.4510661363601685, "learning_rate": 9.281715362756267e-06, "loss": 0.5228, "step": 20959 }, { "epoch": 0.54, "grad_norm": 2.260362148284912, "learning_rate": 9.2808873666275e-06, "loss": 0.573, "step": 20960 }, { "epoch": 0.54, "grad_norm": 1.099044680595398, "learning_rate": 9.280059375454407e-06, "loss": 0.4295, "step": 20961 }, { "epoch": 0.54, "grad_norm": 3.6476492881774902, "learning_rate": 9.279231389242694e-06, "loss": 0.5889, "step": 20962 }, { "epoch": 0.54, "grad_norm": 2.5484097003936768, "learning_rate": 9.278403407998063e-06, "loss": 0.5876, "step": 20963 }, { "epoch": 0.54, "grad_norm": 1.1326735019683838, "learning_rate": 9.277575431726226e-06, "loss": 0.4687, "step": 20964 }, { "epoch": 0.54, "grad_norm": 2.0410828590393066, "learning_rate": 9.27674746043288e-06, "loss": 0.5118, "step": 20965 }, { "epoch": 0.54, "grad_norm": 5.093124866485596, "learning_rate": 9.275919494123742e-06, "loss": 0.5366, "step": 20966 }, { "epoch": 0.54, "grad_norm": 2.0682623386383057, "learning_rate": 9.275091532804507e-06, "loss": 0.5465, "step": 20967 }, { "epoch": 0.54, "grad_norm": 1.8530302047729492, "learning_rate": 9.274263576480887e-06, "loss": 0.4488, "step": 20968 }, { "epoch": 0.54, "grad_norm": 1.4257915019989014, "learning_rate": 9.273435625158588e-06, "loss": 0.4804, "step": 20969 }, { "epoch": 0.54, "grad_norm": 3.2658963203430176, "learning_rate": 9.272607678843315e-06, "loss": 0.6234, "step": 20970 }, { "epoch": 0.54, "grad_norm": 1.4182761907577515, "learning_rate": 9.271779737540772e-06, "loss": 0.658, "step": 20971 }, { "epoch": 0.54, "grad_norm": 2.3917810916900635, "learning_rate": 9.270951801256663e-06, "loss": 0.6938, "step": 20972 }, { "epoch": 0.54, "grad_norm": 1.1052175760269165, "learning_rate": 9.270123869996697e-06, "loss": 0.5158, "step": 20973 }, { "epoch": 0.54, "grad_norm": 1.3624628782272339, "learning_rate": 9.26929594376658e-06, "loss": 0.5044, "step": 20974 }, { "epoch": 0.54, "grad_norm": 3.4574079513549805, "learning_rate": 9.268468022572017e-06, "loss": 0.5881, "step": 20975 }, { "epoch": 0.54, "grad_norm": 1.462891936302185, "learning_rate": 9.267640106418713e-06, "loss": 0.5901, "step": 20976 }, { "epoch": 0.54, "grad_norm": 1.6255817413330078, "learning_rate": 9.26681219531237e-06, "loss": 0.644, "step": 20977 }, { "epoch": 0.54, "grad_norm": 2.0491890907287598, "learning_rate": 9.2659842892587e-06, "loss": 0.5843, "step": 20978 }, { "epoch": 0.54, "grad_norm": 0.9667607545852661, "learning_rate": 9.265156388263403e-06, "loss": 0.4058, "step": 20979 }, { "epoch": 0.54, "grad_norm": 5.564145565032959, "learning_rate": 9.264328492332187e-06, "loss": 0.4176, "step": 20980 }, { "epoch": 0.54, "grad_norm": 8.365113258361816, "learning_rate": 9.26350060147076e-06, "loss": 0.6845, "step": 20981 }, { "epoch": 0.54, "grad_norm": 1.1917906999588013, "learning_rate": 9.262672715684819e-06, "loss": 0.5403, "step": 20982 }, { "epoch": 0.54, "grad_norm": 1.3058907985687256, "learning_rate": 9.261844834980079e-06, "loss": 0.5019, "step": 20983 }, { "epoch": 0.54, "grad_norm": 1.5997718572616577, "learning_rate": 9.261016959362237e-06, "loss": 0.6118, "step": 20984 }, { "epoch": 0.54, "grad_norm": 1.3085626363754272, "learning_rate": 9.260189088837003e-06, "loss": 0.4604, "step": 20985 }, { "epoch": 0.54, "grad_norm": 2.2271878719329834, "learning_rate": 9.259361223410081e-06, "loss": 0.573, "step": 20986 }, { "epoch": 0.54, "grad_norm": 1.2494051456451416, "learning_rate": 9.258533363087176e-06, "loss": 0.5858, "step": 20987 }, { "epoch": 0.54, "grad_norm": 1.7429934740066528, "learning_rate": 9.257705507873996e-06, "loss": 0.5954, "step": 20988 }, { "epoch": 0.54, "grad_norm": 1.35455322265625, "learning_rate": 9.25687765777624e-06, "loss": 0.6074, "step": 20989 }, { "epoch": 0.54, "grad_norm": 1.903569221496582, "learning_rate": 9.256049812799616e-06, "loss": 0.6648, "step": 20990 }, { "epoch": 0.54, "grad_norm": 4.51414155960083, "learning_rate": 9.255221972949831e-06, "loss": 0.5699, "step": 20991 }, { "epoch": 0.54, "grad_norm": 0.9171894788742065, "learning_rate": 9.254394138232588e-06, "loss": 0.5594, "step": 20992 }, { "epoch": 0.54, "grad_norm": 3.132185459136963, "learning_rate": 9.253566308653593e-06, "loss": 0.7202, "step": 20993 }, { "epoch": 0.54, "grad_norm": 1.2300058603286743, "learning_rate": 9.252738484218547e-06, "loss": 0.5318, "step": 20994 }, { "epoch": 0.54, "grad_norm": 1.9243285655975342, "learning_rate": 9.251910664933162e-06, "loss": 0.5123, "step": 20995 }, { "epoch": 0.54, "grad_norm": 2.126046895980835, "learning_rate": 9.251082850803136e-06, "loss": 0.519, "step": 20996 }, { "epoch": 0.54, "grad_norm": 2.129039764404297, "learning_rate": 9.250255041834175e-06, "loss": 0.5444, "step": 20997 }, { "epoch": 0.54, "grad_norm": 2.93241286277771, "learning_rate": 9.24942723803199e-06, "loss": 0.683, "step": 20998 }, { "epoch": 0.54, "grad_norm": 2.0996596813201904, "learning_rate": 9.248599439402277e-06, "loss": 0.6512, "step": 20999 }, { "epoch": 0.54, "grad_norm": 1.5460927486419678, "learning_rate": 9.247771645950746e-06, "loss": 0.5453, "step": 21000 }, { "epoch": 0.54, "grad_norm": 2.3245606422424316, "learning_rate": 9.2469438576831e-06, "loss": 0.4661, "step": 21001 }, { "epoch": 0.54, "grad_norm": 1.2747929096221924, "learning_rate": 9.246116074605042e-06, "loss": 0.4896, "step": 21002 }, { "epoch": 0.54, "grad_norm": 10.15328311920166, "learning_rate": 9.245288296722282e-06, "loss": 0.626, "step": 21003 }, { "epoch": 0.54, "grad_norm": 1.4327154159545898, "learning_rate": 9.244460524040518e-06, "loss": 0.5092, "step": 21004 }, { "epoch": 0.54, "grad_norm": 2.3936612606048584, "learning_rate": 9.243632756565461e-06, "loss": 0.6117, "step": 21005 }, { "epoch": 0.54, "grad_norm": 1.5988646745681763, "learning_rate": 9.242804994302808e-06, "loss": 0.4542, "step": 21006 }, { "epoch": 0.54, "grad_norm": 3.191852569580078, "learning_rate": 9.24197723725827e-06, "loss": 0.6256, "step": 21007 }, { "epoch": 0.54, "grad_norm": 1.2759884595870972, "learning_rate": 9.241149485437546e-06, "loss": 0.635, "step": 21008 }, { "epoch": 0.54, "grad_norm": 1.082289218902588, "learning_rate": 9.240321738846343e-06, "loss": 0.4581, "step": 21009 }, { "epoch": 0.54, "grad_norm": 2.529229164123535, "learning_rate": 9.239493997490367e-06, "loss": 0.4224, "step": 21010 }, { "epoch": 0.54, "grad_norm": 2.1599392890930176, "learning_rate": 9.23866626137532e-06, "loss": 0.5762, "step": 21011 }, { "epoch": 0.54, "grad_norm": 1.1618092060089111, "learning_rate": 9.237838530506908e-06, "loss": 0.596, "step": 21012 }, { "epoch": 0.54, "grad_norm": 6.122807502746582, "learning_rate": 9.237010804890833e-06, "loss": 0.6041, "step": 21013 }, { "epoch": 0.54, "grad_norm": 3.082944393157959, "learning_rate": 9.236183084532799e-06, "loss": 0.7352, "step": 21014 }, { "epoch": 0.54, "grad_norm": 1.4456976652145386, "learning_rate": 9.235355369438514e-06, "loss": 0.602, "step": 21015 }, { "epoch": 0.54, "grad_norm": 3.8421096801757812, "learning_rate": 9.234527659613675e-06, "loss": 0.6631, "step": 21016 }, { "epoch": 0.54, "grad_norm": 2.2295820713043213, "learning_rate": 9.233699955063997e-06, "loss": 0.5205, "step": 21017 }, { "epoch": 0.54, "grad_norm": 2.0063230991363525, "learning_rate": 9.23287225579517e-06, "loss": 0.7029, "step": 21018 }, { "epoch": 0.54, "grad_norm": 7.042069435119629, "learning_rate": 9.232044561812911e-06, "loss": 0.6245, "step": 21019 }, { "epoch": 0.54, "grad_norm": 3.693582057952881, "learning_rate": 9.231216873122917e-06, "loss": 0.4716, "step": 21020 }, { "epoch": 0.54, "grad_norm": 2.7232329845428467, "learning_rate": 9.230389189730891e-06, "loss": 0.5845, "step": 21021 }, { "epoch": 0.54, "grad_norm": 1.3830004930496216, "learning_rate": 9.229561511642543e-06, "loss": 0.5777, "step": 21022 }, { "epoch": 0.54, "grad_norm": 2.0455048084259033, "learning_rate": 9.22873383886357e-06, "loss": 0.7361, "step": 21023 }, { "epoch": 0.54, "grad_norm": 2.0027003288269043, "learning_rate": 9.227906171399682e-06, "loss": 0.4954, "step": 21024 }, { "epoch": 0.54, "grad_norm": 1.50035560131073, "learning_rate": 9.227078509256576e-06, "loss": 0.6916, "step": 21025 }, { "epoch": 0.54, "grad_norm": 3.3127167224884033, "learning_rate": 9.22625085243996e-06, "loss": 0.756, "step": 21026 }, { "epoch": 0.54, "grad_norm": 3.617257833480835, "learning_rate": 9.225423200955539e-06, "loss": 0.6377, "step": 21027 }, { "epoch": 0.54, "grad_norm": 1.4970061779022217, "learning_rate": 9.224595554809013e-06, "loss": 0.5261, "step": 21028 }, { "epoch": 0.54, "grad_norm": 0.8646206259727478, "learning_rate": 9.223767914006088e-06, "loss": 0.5586, "step": 21029 }, { "epoch": 0.54, "grad_norm": 1.0986372232437134, "learning_rate": 9.222940278552467e-06, "loss": 0.4373, "step": 21030 }, { "epoch": 0.54, "grad_norm": 2.70318341255188, "learning_rate": 9.22211264845385e-06, "loss": 0.5935, "step": 21031 }, { "epoch": 0.54, "grad_norm": 0.9508468508720398, "learning_rate": 9.22128502371595e-06, "loss": 0.5668, "step": 21032 }, { "epoch": 0.54, "grad_norm": 1.042604684829712, "learning_rate": 9.22045740434446e-06, "loss": 0.4512, "step": 21033 }, { "epoch": 0.54, "grad_norm": 3.9070212841033936, "learning_rate": 9.219629790345091e-06, "loss": 0.5638, "step": 21034 }, { "epoch": 0.54, "grad_norm": 1.3924897909164429, "learning_rate": 9.21880218172354e-06, "loss": 0.5074, "step": 21035 }, { "epoch": 0.54, "grad_norm": 5.245360851287842, "learning_rate": 9.217974578485516e-06, "loss": 0.5606, "step": 21036 }, { "epoch": 0.54, "grad_norm": 1.433779239654541, "learning_rate": 9.217146980636719e-06, "loss": 0.3598, "step": 21037 }, { "epoch": 0.54, "grad_norm": 7.378932476043701, "learning_rate": 9.216319388182852e-06, "loss": 0.5461, "step": 21038 }, { "epoch": 0.54, "grad_norm": 1.891113042831421, "learning_rate": 9.215491801129622e-06, "loss": 0.4899, "step": 21039 }, { "epoch": 0.54, "grad_norm": 1.5277551412582397, "learning_rate": 9.214664219482727e-06, "loss": 0.7736, "step": 21040 }, { "epoch": 0.54, "grad_norm": 1.8400378227233887, "learning_rate": 9.213836643247874e-06, "loss": 0.7357, "step": 21041 }, { "epoch": 0.54, "grad_norm": 5.852789878845215, "learning_rate": 9.213009072430764e-06, "loss": 0.5533, "step": 21042 }, { "epoch": 0.54, "grad_norm": 1.0931977033615112, "learning_rate": 9.212181507037101e-06, "loss": 0.4187, "step": 21043 }, { "epoch": 0.54, "grad_norm": 1.722089171409607, "learning_rate": 9.211353947072591e-06, "loss": 0.6215, "step": 21044 }, { "epoch": 0.54, "grad_norm": 2.3922975063323975, "learning_rate": 9.21052639254293e-06, "loss": 0.635, "step": 21045 }, { "epoch": 0.54, "grad_norm": 1.920243263244629, "learning_rate": 9.209698843453829e-06, "loss": 0.5219, "step": 21046 }, { "epoch": 0.54, "grad_norm": 1.4453537464141846, "learning_rate": 9.208871299810984e-06, "loss": 0.6841, "step": 21047 }, { "epoch": 0.54, "grad_norm": 1.4412951469421387, "learning_rate": 9.208043761620105e-06, "loss": 0.5983, "step": 21048 }, { "epoch": 0.54, "grad_norm": 1.549647569656372, "learning_rate": 9.207216228886886e-06, "loss": 0.5456, "step": 21049 }, { "epoch": 0.54, "grad_norm": 2.8677732944488525, "learning_rate": 9.206388701617036e-06, "loss": 0.8183, "step": 21050 }, { "epoch": 0.54, "grad_norm": 4.608283042907715, "learning_rate": 9.205561179816259e-06, "loss": 0.4725, "step": 21051 }, { "epoch": 0.54, "grad_norm": 2.007281541824341, "learning_rate": 9.204733663490252e-06, "loss": 0.4705, "step": 21052 }, { "epoch": 0.54, "grad_norm": 2.26442289352417, "learning_rate": 9.203906152644724e-06, "loss": 0.4619, "step": 21053 }, { "epoch": 0.54, "grad_norm": 1.726694941520691, "learning_rate": 9.203078647285372e-06, "loss": 0.5171, "step": 21054 }, { "epoch": 0.54, "grad_norm": 3.7638309001922607, "learning_rate": 9.202251147417901e-06, "loss": 0.6855, "step": 21055 }, { "epoch": 0.54, "grad_norm": 1.1216994524002075, "learning_rate": 9.201423653048016e-06, "loss": 0.5096, "step": 21056 }, { "epoch": 0.54, "grad_norm": 3.9143848419189453, "learning_rate": 9.200596164181416e-06, "loss": 0.7812, "step": 21057 }, { "epoch": 0.54, "grad_norm": 1.328251600265503, "learning_rate": 9.199768680823808e-06, "loss": 0.6357, "step": 21058 }, { "epoch": 0.54, "grad_norm": 3.2296905517578125, "learning_rate": 9.198941202980888e-06, "loss": 0.6534, "step": 21059 }, { "epoch": 0.54, "grad_norm": 2.3068559169769287, "learning_rate": 9.198113730658364e-06, "loss": 0.4871, "step": 21060 }, { "epoch": 0.54, "grad_norm": 1.948006510734558, "learning_rate": 9.197286263861935e-06, "loss": 0.5463, "step": 21061 }, { "epoch": 0.54, "grad_norm": 4.543369770050049, "learning_rate": 9.196458802597303e-06, "loss": 0.5947, "step": 21062 }, { "epoch": 0.54, "grad_norm": 1.3949869871139526, "learning_rate": 9.195631346870177e-06, "loss": 0.6615, "step": 21063 }, { "epoch": 0.54, "grad_norm": 2.9434893131256104, "learning_rate": 9.19480389668625e-06, "loss": 0.6282, "step": 21064 }, { "epoch": 0.54, "grad_norm": 1.4799964427947998, "learning_rate": 9.193976452051231e-06, "loss": 0.6663, "step": 21065 }, { "epoch": 0.54, "grad_norm": 2.351816415786743, "learning_rate": 9.19314901297082e-06, "loss": 0.5442, "step": 21066 }, { "epoch": 0.54, "grad_norm": 1.5467737913131714, "learning_rate": 9.192321579450717e-06, "loss": 0.6651, "step": 21067 }, { "epoch": 0.54, "grad_norm": 4.021343231201172, "learning_rate": 9.191494151496629e-06, "loss": 0.5049, "step": 21068 }, { "epoch": 0.54, "grad_norm": 1.246768832206726, "learning_rate": 9.190666729114252e-06, "loss": 0.4806, "step": 21069 }, { "epoch": 0.54, "grad_norm": 5.210280418395996, "learning_rate": 9.189839312309296e-06, "loss": 0.8556, "step": 21070 }, { "epoch": 0.54, "grad_norm": 1.4617387056350708, "learning_rate": 9.189011901087454e-06, "loss": 0.4955, "step": 21071 }, { "epoch": 0.54, "grad_norm": 1.7972359657287598, "learning_rate": 9.188184495454437e-06, "loss": 0.5077, "step": 21072 }, { "epoch": 0.54, "grad_norm": 1.4410994052886963, "learning_rate": 9.187357095415937e-06, "loss": 0.5554, "step": 21073 }, { "epoch": 0.54, "grad_norm": 1.985643982887268, "learning_rate": 9.186529700977664e-06, "loss": 0.4646, "step": 21074 }, { "epoch": 0.54, "grad_norm": 1.850924015045166, "learning_rate": 9.185702312145319e-06, "loss": 0.5764, "step": 21075 }, { "epoch": 0.54, "grad_norm": 1.3548976182937622, "learning_rate": 9.184874928924599e-06, "loss": 0.4575, "step": 21076 }, { "epoch": 0.54, "grad_norm": 3.2759578227996826, "learning_rate": 9.184047551321212e-06, "loss": 0.4662, "step": 21077 }, { "epoch": 0.54, "grad_norm": 2.0877280235290527, "learning_rate": 9.183220179340855e-06, "loss": 0.6479, "step": 21078 }, { "epoch": 0.54, "grad_norm": 2.436413288116455, "learning_rate": 9.18239281298923e-06, "loss": 0.5231, "step": 21079 }, { "epoch": 0.54, "grad_norm": 1.5485913753509521, "learning_rate": 9.181565452272042e-06, "loss": 0.6125, "step": 21080 }, { "epoch": 0.54, "grad_norm": 1.8759304285049438, "learning_rate": 9.180738097194988e-06, "loss": 0.5591, "step": 21081 }, { "epoch": 0.54, "grad_norm": 1.4668936729431152, "learning_rate": 9.179910747763776e-06, "loss": 0.5461, "step": 21082 }, { "epoch": 0.54, "grad_norm": 4.446603775024414, "learning_rate": 9.179083403984102e-06, "loss": 0.6487, "step": 21083 }, { "epoch": 0.54, "grad_norm": 1.594428539276123, "learning_rate": 9.17825606586167e-06, "loss": 0.5457, "step": 21084 }, { "epoch": 0.54, "grad_norm": 1.4792345762252808, "learning_rate": 9.177428733402181e-06, "loss": 0.5651, "step": 21085 }, { "epoch": 0.54, "grad_norm": 1.6951733827590942, "learning_rate": 9.176601406611335e-06, "loss": 0.4654, "step": 21086 }, { "epoch": 0.54, "grad_norm": 1.0475586652755737, "learning_rate": 9.175774085494837e-06, "loss": 0.5457, "step": 21087 }, { "epoch": 0.54, "grad_norm": 1.5058408975601196, "learning_rate": 9.174946770058384e-06, "loss": 0.4655, "step": 21088 }, { "epoch": 0.54, "grad_norm": 1.3950263261795044, "learning_rate": 9.174119460307683e-06, "loss": 0.4126, "step": 21089 }, { "epoch": 0.54, "grad_norm": 1.7425113916397095, "learning_rate": 9.173292156248428e-06, "loss": 0.6209, "step": 21090 }, { "epoch": 0.54, "grad_norm": 2.271488666534424, "learning_rate": 9.172464857886325e-06, "loss": 0.4819, "step": 21091 }, { "epoch": 0.54, "grad_norm": 0.9884616136550903, "learning_rate": 9.171637565227074e-06, "loss": 0.427, "step": 21092 }, { "epoch": 0.54, "grad_norm": 1.3034604787826538, "learning_rate": 9.170810278276377e-06, "loss": 0.61, "step": 21093 }, { "epoch": 0.54, "grad_norm": 2.228771209716797, "learning_rate": 9.169982997039937e-06, "loss": 0.6087, "step": 21094 }, { "epoch": 0.54, "grad_norm": 1.4516475200653076, "learning_rate": 9.169155721523448e-06, "loss": 0.6259, "step": 21095 }, { "epoch": 0.54, "grad_norm": 2.026596784591675, "learning_rate": 9.168328451732618e-06, "loss": 0.4956, "step": 21096 }, { "epoch": 0.54, "grad_norm": 10.095904350280762, "learning_rate": 9.167501187673147e-06, "loss": 0.5349, "step": 21097 }, { "epoch": 0.54, "grad_norm": 4.155965328216553, "learning_rate": 9.166673929350734e-06, "loss": 0.5651, "step": 21098 }, { "epoch": 0.54, "grad_norm": 1.442712426185608, "learning_rate": 9.165846676771082e-06, "loss": 0.565, "step": 21099 }, { "epoch": 0.54, "grad_norm": 1.1706974506378174, "learning_rate": 9.165019429939887e-06, "loss": 0.406, "step": 21100 }, { "epoch": 0.54, "grad_norm": 1.63673996925354, "learning_rate": 9.164192188862858e-06, "loss": 0.6482, "step": 21101 }, { "epoch": 0.54, "grad_norm": 2.8649072647094727, "learning_rate": 9.163364953545688e-06, "loss": 0.5106, "step": 21102 }, { "epoch": 0.54, "grad_norm": 5.204161643981934, "learning_rate": 9.162537723994082e-06, "loss": 0.7032, "step": 21103 }, { "epoch": 0.54, "grad_norm": 1.9617151021957397, "learning_rate": 9.161710500213741e-06, "loss": 0.5789, "step": 21104 }, { "epoch": 0.54, "grad_norm": 1.6616404056549072, "learning_rate": 9.160883282210362e-06, "loss": 0.4595, "step": 21105 }, { "epoch": 0.54, "grad_norm": 1.2111765146255493, "learning_rate": 9.160056069989652e-06, "loss": 0.6424, "step": 21106 }, { "epoch": 0.54, "grad_norm": 0.9886237382888794, "learning_rate": 9.159228863557305e-06, "loss": 0.5547, "step": 21107 }, { "epoch": 0.54, "grad_norm": 2.08742094039917, "learning_rate": 9.158401662919023e-06, "loss": 0.4057, "step": 21108 }, { "epoch": 0.54, "grad_norm": 1.7253977060317993, "learning_rate": 9.157574468080513e-06, "loss": 0.7368, "step": 21109 }, { "epoch": 0.54, "grad_norm": 4.42363977432251, "learning_rate": 9.156747279047466e-06, "loss": 0.7582, "step": 21110 }, { "epoch": 0.54, "grad_norm": 6.171353816986084, "learning_rate": 9.15592009582559e-06, "loss": 0.5581, "step": 21111 }, { "epoch": 0.54, "grad_norm": 2.2057788372039795, "learning_rate": 9.155092918420579e-06, "loss": 0.7358, "step": 21112 }, { "epoch": 0.54, "grad_norm": 1.672122597694397, "learning_rate": 9.15426574683814e-06, "loss": 0.6336, "step": 21113 }, { "epoch": 0.54, "grad_norm": 1.882489800453186, "learning_rate": 9.153438581083968e-06, "loss": 0.5622, "step": 21114 }, { "epoch": 0.54, "grad_norm": 2.896345853805542, "learning_rate": 9.152611421163766e-06, "loss": 0.5266, "step": 21115 }, { "epoch": 0.54, "grad_norm": 7.713200092315674, "learning_rate": 9.151784267083236e-06, "loss": 0.5807, "step": 21116 }, { "epoch": 0.54, "grad_norm": 1.4298681020736694, "learning_rate": 9.150957118848072e-06, "loss": 0.5132, "step": 21117 }, { "epoch": 0.54, "grad_norm": 1.3483054637908936, "learning_rate": 9.150129976463981e-06, "loss": 0.5027, "step": 21118 }, { "epoch": 0.54, "grad_norm": 2.231567144393921, "learning_rate": 9.14930283993666e-06, "loss": 0.4856, "step": 21119 }, { "epoch": 0.54, "grad_norm": 4.026254177093506, "learning_rate": 9.148475709271807e-06, "loss": 0.6724, "step": 21120 }, { "epoch": 0.54, "grad_norm": 1.5639148950576782, "learning_rate": 9.147648584475129e-06, "loss": 0.746, "step": 21121 }, { "epoch": 0.54, "grad_norm": 3.014042615890503, "learning_rate": 9.146821465552318e-06, "loss": 0.5926, "step": 21122 }, { "epoch": 0.54, "grad_norm": 4.504148483276367, "learning_rate": 9.145994352509077e-06, "loss": 0.5009, "step": 21123 }, { "epoch": 0.54, "grad_norm": 0.8763904571533203, "learning_rate": 9.145167245351108e-06, "loss": 0.3615, "step": 21124 }, { "epoch": 0.54, "grad_norm": 1.728564977645874, "learning_rate": 9.14434014408411e-06, "loss": 0.584, "step": 21125 }, { "epoch": 0.54, "grad_norm": 1.1195564270019531, "learning_rate": 9.143513048713778e-06, "loss": 0.4523, "step": 21126 }, { "epoch": 0.54, "grad_norm": 1.204023003578186, "learning_rate": 9.14268595924582e-06, "loss": 0.4144, "step": 21127 }, { "epoch": 0.54, "grad_norm": 1.9687716960906982, "learning_rate": 9.141858875685932e-06, "loss": 0.6254, "step": 21128 }, { "epoch": 0.54, "grad_norm": 2.5506324768066406, "learning_rate": 9.14103179803981e-06, "loss": 0.6022, "step": 21129 }, { "epoch": 0.54, "grad_norm": 4.59144401550293, "learning_rate": 9.140204726313161e-06, "loss": 0.5715, "step": 21130 }, { "epoch": 0.54, "grad_norm": 5.564054489135742, "learning_rate": 9.139377660511679e-06, "loss": 0.6188, "step": 21131 }, { "epoch": 0.54, "grad_norm": 2.812824249267578, "learning_rate": 9.138550600641064e-06, "loss": 0.497, "step": 21132 }, { "epoch": 0.54, "grad_norm": 2.26912522315979, "learning_rate": 9.13772354670702e-06, "loss": 0.4176, "step": 21133 }, { "epoch": 0.54, "grad_norm": 2.7953217029571533, "learning_rate": 9.136896498715242e-06, "loss": 0.5836, "step": 21134 }, { "epoch": 0.54, "grad_norm": 2.414290428161621, "learning_rate": 9.136069456671433e-06, "loss": 0.4536, "step": 21135 }, { "epoch": 0.54, "grad_norm": 2.578064441680908, "learning_rate": 9.135242420581288e-06, "loss": 0.576, "step": 21136 }, { "epoch": 0.54, "grad_norm": 1.3544358015060425, "learning_rate": 9.13441539045051e-06, "loss": 0.4589, "step": 21137 }, { "epoch": 0.54, "grad_norm": 2.7160229682922363, "learning_rate": 9.133588366284796e-06, "loss": 0.623, "step": 21138 }, { "epoch": 0.54, "grad_norm": 1.4313640594482422, "learning_rate": 9.132761348089846e-06, "loss": 0.4749, "step": 21139 }, { "epoch": 0.54, "grad_norm": 2.124422788619995, "learning_rate": 9.131934335871363e-06, "loss": 0.7128, "step": 21140 }, { "epoch": 0.54, "grad_norm": 1.530778408050537, "learning_rate": 9.131107329635042e-06, "loss": 0.5258, "step": 21141 }, { "epoch": 0.54, "grad_norm": 10.020781517028809, "learning_rate": 9.130280329386585e-06, "loss": 0.5848, "step": 21142 }, { "epoch": 0.54, "grad_norm": 2.4590320587158203, "learning_rate": 9.129453335131687e-06, "loss": 0.5442, "step": 21143 }, { "epoch": 0.54, "grad_norm": 1.1516731977462769, "learning_rate": 9.128626346876049e-06, "loss": 0.4831, "step": 21144 }, { "epoch": 0.54, "grad_norm": 1.5021618604660034, "learning_rate": 9.127799364625374e-06, "loss": 0.5558, "step": 21145 }, { "epoch": 0.54, "grad_norm": 1.3382521867752075, "learning_rate": 9.126972388385354e-06, "loss": 0.4084, "step": 21146 }, { "epoch": 0.54, "grad_norm": 1.7128403186798096, "learning_rate": 9.126145418161696e-06, "loss": 0.6408, "step": 21147 }, { "epoch": 0.54, "grad_norm": 2.0654876232147217, "learning_rate": 9.125318453960091e-06, "loss": 0.6693, "step": 21148 }, { "epoch": 0.54, "grad_norm": 1.4305511713027954, "learning_rate": 9.124491495786244e-06, "loss": 0.511, "step": 21149 }, { "epoch": 0.54, "grad_norm": 6.599347114562988, "learning_rate": 9.123664543645851e-06, "loss": 0.8182, "step": 21150 }, { "epoch": 0.54, "grad_norm": 1.7627166509628296, "learning_rate": 9.12283759754461e-06, "loss": 0.6534, "step": 21151 }, { "epoch": 0.54, "grad_norm": 1.2376577854156494, "learning_rate": 9.122010657488225e-06, "loss": 0.3589, "step": 21152 }, { "epoch": 0.54, "grad_norm": 4.312007427215576, "learning_rate": 9.121183723482387e-06, "loss": 0.6856, "step": 21153 }, { "epoch": 0.54, "grad_norm": 1.5830775499343872, "learning_rate": 9.120356795532802e-06, "loss": 0.607, "step": 21154 }, { "epoch": 0.54, "grad_norm": 1.3434481620788574, "learning_rate": 9.119529873645163e-06, "loss": 0.5472, "step": 21155 }, { "epoch": 0.54, "grad_norm": 3.9755213260650635, "learning_rate": 9.11870295782517e-06, "loss": 0.5744, "step": 21156 }, { "epoch": 0.54, "grad_norm": 0.8016297221183777, "learning_rate": 9.117876048078524e-06, "loss": 0.5262, "step": 21157 }, { "epoch": 0.54, "grad_norm": 2.538214921951294, "learning_rate": 9.117049144410922e-06, "loss": 0.636, "step": 21158 }, { "epoch": 0.54, "grad_norm": 1.7058014869689941, "learning_rate": 9.116222246828063e-06, "loss": 0.6037, "step": 21159 }, { "epoch": 0.54, "grad_norm": 1.4711415767669678, "learning_rate": 9.115395355335645e-06, "loss": 0.4465, "step": 21160 }, { "epoch": 0.54, "grad_norm": 2.221508026123047, "learning_rate": 9.114568469939364e-06, "loss": 0.612, "step": 21161 }, { "epoch": 0.54, "grad_norm": 2.090635299682617, "learning_rate": 9.113741590644925e-06, "loss": 0.4871, "step": 21162 }, { "epoch": 0.54, "grad_norm": 0.8127344846725464, "learning_rate": 9.112914717458018e-06, "loss": 0.4336, "step": 21163 }, { "epoch": 0.54, "grad_norm": 1.6155500411987305, "learning_rate": 9.11208785038435e-06, "loss": 0.5945, "step": 21164 }, { "epoch": 0.54, "grad_norm": 1.6638503074645996, "learning_rate": 9.111260989429612e-06, "loss": 0.5776, "step": 21165 }, { "epoch": 0.54, "grad_norm": 1.4950505495071411, "learning_rate": 9.110434134599505e-06, "loss": 0.498, "step": 21166 }, { "epoch": 0.54, "grad_norm": 1.576854944229126, "learning_rate": 9.109607285899726e-06, "loss": 0.7305, "step": 21167 }, { "epoch": 0.54, "grad_norm": 1.3876886367797852, "learning_rate": 9.108780443335974e-06, "loss": 0.6433, "step": 21168 }, { "epoch": 0.54, "grad_norm": 2.005063056945801, "learning_rate": 9.107953606913951e-06, "loss": 0.7714, "step": 21169 }, { "epoch": 0.54, "grad_norm": 6.170684814453125, "learning_rate": 9.107126776639347e-06, "loss": 0.5866, "step": 21170 }, { "epoch": 0.54, "grad_norm": 2.255201578140259, "learning_rate": 9.106299952517867e-06, "loss": 0.5638, "step": 21171 }, { "epoch": 0.54, "grad_norm": 1.6445280313491821, "learning_rate": 9.105473134555204e-06, "loss": 0.5925, "step": 21172 }, { "epoch": 0.54, "grad_norm": 5.414241790771484, "learning_rate": 9.10464632275706e-06, "loss": 0.7483, "step": 21173 }, { "epoch": 0.54, "grad_norm": 1.571158766746521, "learning_rate": 9.103819517129131e-06, "loss": 0.5349, "step": 21174 }, { "epoch": 0.54, "grad_norm": 1.0495212078094482, "learning_rate": 9.102992717677114e-06, "loss": 0.5523, "step": 21175 }, { "epoch": 0.54, "grad_norm": 2.222350835800171, "learning_rate": 9.10216592440671e-06, "loss": 0.6494, "step": 21176 }, { "epoch": 0.54, "grad_norm": 6.432540416717529, "learning_rate": 9.101339137323611e-06, "loss": 0.4424, "step": 21177 }, { "epoch": 0.54, "grad_norm": 4.200922012329102, "learning_rate": 9.100512356433519e-06, "loss": 0.6365, "step": 21178 }, { "epoch": 0.54, "grad_norm": 1.5437510013580322, "learning_rate": 9.099685581742133e-06, "loss": 0.5873, "step": 21179 }, { "epoch": 0.54, "grad_norm": 1.4452965259552002, "learning_rate": 9.098858813255144e-06, "loss": 0.5017, "step": 21180 }, { "epoch": 0.54, "grad_norm": 1.2455781698226929, "learning_rate": 9.098032050978259e-06, "loss": 0.3354, "step": 21181 }, { "epoch": 0.54, "grad_norm": 2.636873245239258, "learning_rate": 9.097205294917167e-06, "loss": 0.5264, "step": 21182 }, { "epoch": 0.54, "grad_norm": 1.441037893295288, "learning_rate": 9.096378545077572e-06, "loss": 0.3369, "step": 21183 }, { "epoch": 0.54, "grad_norm": 2.247101306915283, "learning_rate": 9.095551801465164e-06, "loss": 0.7757, "step": 21184 }, { "epoch": 0.54, "grad_norm": 1.8752809762954712, "learning_rate": 9.09472506408565e-06, "loss": 0.5172, "step": 21185 }, { "epoch": 0.54, "grad_norm": 1.3287863731384277, "learning_rate": 9.093898332944718e-06, "loss": 0.5447, "step": 21186 }, { "epoch": 0.54, "grad_norm": 3.5319631099700928, "learning_rate": 9.093071608048071e-06, "loss": 0.5767, "step": 21187 }, { "epoch": 0.54, "grad_norm": 1.1704623699188232, "learning_rate": 9.092244889401404e-06, "loss": 0.59, "step": 21188 }, { "epoch": 0.54, "grad_norm": 1.568041443824768, "learning_rate": 9.091418177010418e-06, "loss": 0.5846, "step": 21189 }, { "epoch": 0.54, "grad_norm": 0.9570544362068176, "learning_rate": 9.090591470880802e-06, "loss": 0.4838, "step": 21190 }, { "epoch": 0.54, "grad_norm": 0.9729098081588745, "learning_rate": 9.08976477101826e-06, "loss": 0.4574, "step": 21191 }, { "epoch": 0.54, "grad_norm": 1.9762104749679565, "learning_rate": 9.088938077428489e-06, "loss": 0.5643, "step": 21192 }, { "epoch": 0.54, "grad_norm": 1.8435516357421875, "learning_rate": 9.088111390117182e-06, "loss": 0.4213, "step": 21193 }, { "epoch": 0.54, "grad_norm": 1.2683523893356323, "learning_rate": 9.08728470909004e-06, "loss": 0.5647, "step": 21194 }, { "epoch": 0.54, "grad_norm": 1.3457759618759155, "learning_rate": 9.086458034352758e-06, "loss": 0.5016, "step": 21195 }, { "epoch": 0.54, "grad_norm": 9.083005905151367, "learning_rate": 9.085631365911035e-06, "loss": 0.4995, "step": 21196 }, { "epoch": 0.54, "grad_norm": 1.827929139137268, "learning_rate": 9.084804703770562e-06, "loss": 0.5813, "step": 21197 }, { "epoch": 0.54, "grad_norm": 1.3458307981491089, "learning_rate": 9.083978047937041e-06, "loss": 0.6564, "step": 21198 }, { "epoch": 0.54, "grad_norm": 1.7471517324447632, "learning_rate": 9.08315139841617e-06, "loss": 0.5231, "step": 21199 }, { "epoch": 0.54, "grad_norm": 1.6850162744522095, "learning_rate": 9.082324755213642e-06, "loss": 0.528, "step": 21200 }, { "epoch": 0.54, "grad_norm": 1.402759075164795, "learning_rate": 9.081498118335159e-06, "loss": 0.4211, "step": 21201 }, { "epoch": 0.54, "grad_norm": 1.577997088432312, "learning_rate": 9.08067148778641e-06, "loss": 0.4961, "step": 21202 }, { "epoch": 0.54, "grad_norm": 1.7336900234222412, "learning_rate": 9.079844863573095e-06, "loss": 0.5663, "step": 21203 }, { "epoch": 0.54, "grad_norm": 1.2257094383239746, "learning_rate": 9.079018245700914e-06, "loss": 0.4212, "step": 21204 }, { "epoch": 0.54, "grad_norm": 2.8989498615264893, "learning_rate": 9.078191634175557e-06, "loss": 0.4967, "step": 21205 }, { "epoch": 0.54, "grad_norm": 1.1704217195510864, "learning_rate": 9.077365029002728e-06, "loss": 0.5855, "step": 21206 }, { "epoch": 0.54, "grad_norm": 1.1546974182128906, "learning_rate": 9.076538430188118e-06, "loss": 0.4831, "step": 21207 }, { "epoch": 0.54, "grad_norm": 1.0863165855407715, "learning_rate": 9.075711837737425e-06, "loss": 0.5233, "step": 21208 }, { "epoch": 0.54, "grad_norm": 2.9046905040740967, "learning_rate": 9.074885251656344e-06, "loss": 0.6355, "step": 21209 }, { "epoch": 0.54, "grad_norm": 1.7492009401321411, "learning_rate": 9.074058671950573e-06, "loss": 0.5761, "step": 21210 }, { "epoch": 0.54, "grad_norm": 1.4208961725234985, "learning_rate": 9.07323209862581e-06, "loss": 0.4974, "step": 21211 }, { "epoch": 0.54, "grad_norm": 1.1588616371154785, "learning_rate": 9.072405531687746e-06, "loss": 0.5009, "step": 21212 }, { "epoch": 0.54, "grad_norm": 1.778422236442566, "learning_rate": 9.071578971142084e-06, "loss": 0.4128, "step": 21213 }, { "epoch": 0.54, "grad_norm": 1.1045094728469849, "learning_rate": 9.070752416994513e-06, "loss": 0.3578, "step": 21214 }, { "epoch": 0.54, "grad_norm": 1.0405558347702026, "learning_rate": 9.069925869250734e-06, "loss": 0.4907, "step": 21215 }, { "epoch": 0.54, "grad_norm": 1.991865873336792, "learning_rate": 9.069099327916444e-06, "loss": 0.6335, "step": 21216 }, { "epoch": 0.54, "grad_norm": 1.4159884452819824, "learning_rate": 9.068272792997332e-06, "loss": 0.4906, "step": 21217 }, { "epoch": 0.54, "grad_norm": 1.67532217502594, "learning_rate": 9.067446264499102e-06, "loss": 0.4112, "step": 21218 }, { "epoch": 0.54, "grad_norm": 1.3125741481781006, "learning_rate": 9.066619742427444e-06, "loss": 0.5387, "step": 21219 }, { "epoch": 0.54, "grad_norm": 1.0498181581497192, "learning_rate": 9.065793226788057e-06, "loss": 0.4902, "step": 21220 }, { "epoch": 0.54, "grad_norm": 1.642173171043396, "learning_rate": 9.064966717586637e-06, "loss": 0.4219, "step": 21221 }, { "epoch": 0.54, "grad_norm": 1.8174580335617065, "learning_rate": 9.064140214828878e-06, "loss": 0.5403, "step": 21222 }, { "epoch": 0.54, "grad_norm": 2.6017251014709473, "learning_rate": 9.063313718520479e-06, "loss": 0.6634, "step": 21223 }, { "epoch": 0.54, "grad_norm": 2.2056291103363037, "learning_rate": 9.06248722866713e-06, "loss": 0.6521, "step": 21224 }, { "epoch": 0.54, "grad_norm": 1.2653919458389282, "learning_rate": 9.061660745274533e-06, "loss": 0.5156, "step": 21225 }, { "epoch": 0.54, "grad_norm": 7.784726142883301, "learning_rate": 9.060834268348379e-06, "loss": 0.7209, "step": 21226 }, { "epoch": 0.54, "grad_norm": 1.9188199043273926, "learning_rate": 9.060007797894364e-06, "loss": 0.4692, "step": 21227 }, { "epoch": 0.54, "grad_norm": 1.1097813844680786, "learning_rate": 9.059181333918188e-06, "loss": 0.5096, "step": 21228 }, { "epoch": 0.54, "grad_norm": 3.7560322284698486, "learning_rate": 9.05835487642554e-06, "loss": 0.7873, "step": 21229 }, { "epoch": 0.54, "grad_norm": 1.2738603353500366, "learning_rate": 9.05752842542212e-06, "loss": 0.4388, "step": 21230 }, { "epoch": 0.54, "grad_norm": 1.2102556228637695, "learning_rate": 9.056701980913623e-06, "loss": 0.4866, "step": 21231 }, { "epoch": 0.54, "grad_norm": 9.625247955322266, "learning_rate": 9.05587554290574e-06, "loss": 0.5297, "step": 21232 }, { "epoch": 0.54, "grad_norm": 4.767704010009766, "learning_rate": 9.055049111404173e-06, "loss": 0.5714, "step": 21233 }, { "epoch": 0.54, "grad_norm": 1.4248411655426025, "learning_rate": 9.054222686414612e-06, "loss": 0.4657, "step": 21234 }, { "epoch": 0.54, "grad_norm": 1.7050052881240845, "learning_rate": 9.053396267942755e-06, "loss": 0.4347, "step": 21235 }, { "epoch": 0.54, "grad_norm": 7.0157904624938965, "learning_rate": 9.052569855994296e-06, "loss": 0.6982, "step": 21236 }, { "epoch": 0.54, "grad_norm": 3.249340295791626, "learning_rate": 9.051743450574932e-06, "loss": 0.5961, "step": 21237 }, { "epoch": 0.54, "grad_norm": 1.7870635986328125, "learning_rate": 9.050917051690353e-06, "loss": 0.5471, "step": 21238 }, { "epoch": 0.54, "grad_norm": 1.3785090446472168, "learning_rate": 9.050090659346258e-06, "loss": 0.6084, "step": 21239 }, { "epoch": 0.54, "grad_norm": 1.0659137964248657, "learning_rate": 9.049264273548343e-06, "loss": 0.5453, "step": 21240 }, { "epoch": 0.54, "grad_norm": 1.6455907821655273, "learning_rate": 9.048437894302301e-06, "loss": 0.4335, "step": 21241 }, { "epoch": 0.54, "grad_norm": 1.871372103691101, "learning_rate": 9.047611521613829e-06, "loss": 0.5395, "step": 21242 }, { "epoch": 0.54, "grad_norm": 2.456289529800415, "learning_rate": 9.046785155488615e-06, "loss": 0.6518, "step": 21243 }, { "epoch": 0.54, "grad_norm": 5.422550201416016, "learning_rate": 9.045958795932362e-06, "loss": 0.6753, "step": 21244 }, { "epoch": 0.54, "grad_norm": 1.715195655822754, "learning_rate": 9.045132442950762e-06, "loss": 0.6933, "step": 21245 }, { "epoch": 0.54, "grad_norm": 1.7423641681671143, "learning_rate": 9.044306096549508e-06, "loss": 0.6286, "step": 21246 }, { "epoch": 0.54, "grad_norm": 1.1901891231536865, "learning_rate": 9.043479756734299e-06, "loss": 0.5488, "step": 21247 }, { "epoch": 0.54, "grad_norm": 1.3239139318466187, "learning_rate": 9.042653423510823e-06, "loss": 0.4545, "step": 21248 }, { "epoch": 0.54, "grad_norm": 2.3169825077056885, "learning_rate": 9.04182709688478e-06, "loss": 0.6851, "step": 21249 }, { "epoch": 0.54, "grad_norm": 6.1175217628479, "learning_rate": 9.041000776861862e-06, "loss": 0.5711, "step": 21250 }, { "epoch": 0.54, "grad_norm": 1.1828868389129639, "learning_rate": 9.040174463447762e-06, "loss": 0.4841, "step": 21251 }, { "epoch": 0.54, "grad_norm": 1.5244275331497192, "learning_rate": 9.03934815664818e-06, "loss": 0.5943, "step": 21252 }, { "epoch": 0.54, "grad_norm": 3.880051374435425, "learning_rate": 9.038521856468805e-06, "loss": 0.4534, "step": 21253 }, { "epoch": 0.54, "grad_norm": 2.029466390609741, "learning_rate": 9.037695562915335e-06, "loss": 0.5024, "step": 21254 }, { "epoch": 0.54, "grad_norm": 2.1716952323913574, "learning_rate": 9.03686927599346e-06, "loss": 0.5927, "step": 21255 }, { "epoch": 0.54, "grad_norm": 1.896777868270874, "learning_rate": 9.036042995708877e-06, "loss": 0.5386, "step": 21256 }, { "epoch": 0.54, "grad_norm": 7.874247074127197, "learning_rate": 9.035216722067283e-06, "loss": 0.6171, "step": 21257 }, { "epoch": 0.54, "grad_norm": 2.3252530097961426, "learning_rate": 9.034390455074367e-06, "loss": 0.5124, "step": 21258 }, { "epoch": 0.54, "grad_norm": 1.8048473596572876, "learning_rate": 9.033564194735828e-06, "loss": 0.5489, "step": 21259 }, { "epoch": 0.54, "grad_norm": 5.960934162139893, "learning_rate": 9.032737941057355e-06, "loss": 0.3968, "step": 21260 }, { "epoch": 0.54, "grad_norm": 8.996803283691406, "learning_rate": 9.031911694044646e-06, "loss": 0.4389, "step": 21261 }, { "epoch": 0.54, "grad_norm": 1.8392688035964966, "learning_rate": 9.031085453703391e-06, "loss": 0.6612, "step": 21262 }, { "epoch": 0.54, "grad_norm": 1.47792649269104, "learning_rate": 9.030259220039287e-06, "loss": 0.458, "step": 21263 }, { "epoch": 0.55, "grad_norm": 5.085816383361816, "learning_rate": 9.029432993058029e-06, "loss": 0.5122, "step": 21264 }, { "epoch": 0.55, "grad_norm": 1.4782840013504028, "learning_rate": 9.028606772765307e-06, "loss": 0.6872, "step": 21265 }, { "epoch": 0.55, "grad_norm": 4.8487868309021, "learning_rate": 9.027780559166819e-06, "loss": 0.6065, "step": 21266 }, { "epoch": 0.55, "grad_norm": 2.1482043266296387, "learning_rate": 9.026954352268254e-06, "loss": 0.6562, "step": 21267 }, { "epoch": 0.55, "grad_norm": 1.185977816581726, "learning_rate": 9.02612815207531e-06, "loss": 0.4497, "step": 21268 }, { "epoch": 0.55, "grad_norm": 2.628438949584961, "learning_rate": 9.025301958593679e-06, "loss": 0.5801, "step": 21269 }, { "epoch": 0.55, "grad_norm": 1.7097417116165161, "learning_rate": 9.024475771829052e-06, "loss": 0.7509, "step": 21270 }, { "epoch": 0.55, "grad_norm": 1.9781203269958496, "learning_rate": 9.02364959178713e-06, "loss": 0.4684, "step": 21271 }, { "epoch": 0.55, "grad_norm": 6.946122169494629, "learning_rate": 9.022823418473596e-06, "loss": 0.5553, "step": 21272 }, { "epoch": 0.55, "grad_norm": 1.8849607706069946, "learning_rate": 9.02199725189415e-06, "loss": 0.5409, "step": 21273 }, { "epoch": 0.55, "grad_norm": 4.712571144104004, "learning_rate": 9.021171092054489e-06, "loss": 0.8485, "step": 21274 }, { "epoch": 0.55, "grad_norm": 1.8860973119735718, "learning_rate": 9.020344938960297e-06, "loss": 0.516, "step": 21275 }, { "epoch": 0.55, "grad_norm": 6.500773906707764, "learning_rate": 9.019518792617277e-06, "loss": 0.5734, "step": 21276 }, { "epoch": 0.55, "grad_norm": 1.7713779211044312, "learning_rate": 9.018692653031113e-06, "loss": 0.5582, "step": 21277 }, { "epoch": 0.55, "grad_norm": 3.9634768962860107, "learning_rate": 9.017866520207506e-06, "loss": 0.5036, "step": 21278 }, { "epoch": 0.55, "grad_norm": 7.400994300842285, "learning_rate": 9.017040394152143e-06, "loss": 0.5809, "step": 21279 }, { "epoch": 0.55, "grad_norm": 1.6423070430755615, "learning_rate": 9.016214274870721e-06, "loss": 0.6134, "step": 21280 }, { "epoch": 0.55, "grad_norm": 1.4245948791503906, "learning_rate": 9.015388162368934e-06, "loss": 0.4816, "step": 21281 }, { "epoch": 0.55, "grad_norm": 1.4573794603347778, "learning_rate": 9.014562056652472e-06, "loss": 0.4847, "step": 21282 }, { "epoch": 0.55, "grad_norm": 3.7055158615112305, "learning_rate": 9.01373595772703e-06, "loss": 0.4277, "step": 21283 }, { "epoch": 0.55, "grad_norm": 1.1769976615905762, "learning_rate": 9.012909865598298e-06, "loss": 0.6847, "step": 21284 }, { "epoch": 0.55, "grad_norm": 4.097756862640381, "learning_rate": 9.01208378027197e-06, "loss": 0.4839, "step": 21285 }, { "epoch": 0.55, "grad_norm": 1.7430109977722168, "learning_rate": 9.011257701753744e-06, "loss": 0.6482, "step": 21286 }, { "epoch": 0.55, "grad_norm": 1.45085608959198, "learning_rate": 9.010431630049307e-06, "loss": 0.4964, "step": 21287 }, { "epoch": 0.55, "grad_norm": 1.346541166305542, "learning_rate": 9.009605565164355e-06, "loss": 0.4565, "step": 21288 }, { "epoch": 0.55, "grad_norm": 6.653155326843262, "learning_rate": 9.008779507104575e-06, "loss": 0.5182, "step": 21289 }, { "epoch": 0.55, "grad_norm": 1.159954309463501, "learning_rate": 9.007953455875669e-06, "loss": 0.5049, "step": 21290 }, { "epoch": 0.55, "grad_norm": 2.6449568271636963, "learning_rate": 9.007127411483321e-06, "loss": 0.5998, "step": 21291 }, { "epoch": 0.55, "grad_norm": 1.0878167152404785, "learning_rate": 9.006301373933226e-06, "loss": 0.5695, "step": 21292 }, { "epoch": 0.55, "grad_norm": 1.7890933752059937, "learning_rate": 9.005475343231081e-06, "loss": 0.6633, "step": 21293 }, { "epoch": 0.55, "grad_norm": 1.6158844232559204, "learning_rate": 9.004649319382574e-06, "loss": 0.5317, "step": 21294 }, { "epoch": 0.55, "grad_norm": 3.3275647163391113, "learning_rate": 9.0038233023934e-06, "loss": 0.5129, "step": 21295 }, { "epoch": 0.55, "grad_norm": 15.36898422241211, "learning_rate": 9.002997292269247e-06, "loss": 0.5332, "step": 21296 }, { "epoch": 0.55, "grad_norm": 2.2164149284362793, "learning_rate": 9.002171289015811e-06, "loss": 0.6245, "step": 21297 }, { "epoch": 0.55, "grad_norm": 1.5733060836791992, "learning_rate": 9.001345292638785e-06, "loss": 0.6317, "step": 21298 }, { "epoch": 0.55, "grad_norm": 3.7142937183380127, "learning_rate": 9.000519303143857e-06, "loss": 0.5305, "step": 21299 }, { "epoch": 0.55, "grad_norm": 7.525929927825928, "learning_rate": 8.999693320536726e-06, "loss": 0.7275, "step": 21300 }, { "epoch": 0.55, "grad_norm": 1.0942192077636719, "learning_rate": 8.998867344823077e-06, "loss": 0.3645, "step": 21301 }, { "epoch": 0.55, "grad_norm": 1.4290262460708618, "learning_rate": 8.998041376008608e-06, "loss": 0.5418, "step": 21302 }, { "epoch": 0.55, "grad_norm": 1.4828715324401855, "learning_rate": 8.997215414099006e-06, "loss": 0.5878, "step": 21303 }, { "epoch": 0.55, "grad_norm": 1.3911482095718384, "learning_rate": 8.996389459099966e-06, "loss": 0.4626, "step": 21304 }, { "epoch": 0.55, "grad_norm": 1.1773698329925537, "learning_rate": 8.99556351101718e-06, "loss": 0.4697, "step": 21305 }, { "epoch": 0.55, "grad_norm": 9.880603790283203, "learning_rate": 8.994737569856338e-06, "loss": 0.4303, "step": 21306 }, { "epoch": 0.55, "grad_norm": 1.2306785583496094, "learning_rate": 8.993911635623135e-06, "loss": 0.4069, "step": 21307 }, { "epoch": 0.55, "grad_norm": 1.142849087715149, "learning_rate": 8.99308570832326e-06, "loss": 0.4799, "step": 21308 }, { "epoch": 0.55, "grad_norm": 1.3737252950668335, "learning_rate": 8.992259787962402e-06, "loss": 0.5628, "step": 21309 }, { "epoch": 0.55, "grad_norm": 1.4845938682556152, "learning_rate": 8.991433874546263e-06, "loss": 0.499, "step": 21310 }, { "epoch": 0.55, "grad_norm": 2.7307043075561523, "learning_rate": 8.990607968080524e-06, "loss": 0.5682, "step": 21311 }, { "epoch": 0.55, "grad_norm": 1.1785247325897217, "learning_rate": 8.989782068570882e-06, "loss": 0.4712, "step": 21312 }, { "epoch": 0.55, "grad_norm": 9.820212364196777, "learning_rate": 8.988956176023028e-06, "loss": 0.5912, "step": 21313 }, { "epoch": 0.55, "grad_norm": 3.1717209815979004, "learning_rate": 8.98813029044265e-06, "loss": 0.879, "step": 21314 }, { "epoch": 0.55, "grad_norm": 2.7176363468170166, "learning_rate": 8.987304411835444e-06, "loss": 0.543, "step": 21315 }, { "epoch": 0.55, "grad_norm": 1.753799319267273, "learning_rate": 8.9864785402071e-06, "loss": 0.4668, "step": 21316 }, { "epoch": 0.55, "grad_norm": 1.4022023677825928, "learning_rate": 8.98565267556331e-06, "loss": 0.637, "step": 21317 }, { "epoch": 0.55, "grad_norm": 1.7748022079467773, "learning_rate": 8.984826817909763e-06, "loss": 0.5057, "step": 21318 }, { "epoch": 0.55, "grad_norm": 1.754471778869629, "learning_rate": 8.984000967252153e-06, "loss": 0.4363, "step": 21319 }, { "epoch": 0.55, "grad_norm": 1.4832640886306763, "learning_rate": 8.983175123596168e-06, "loss": 0.5831, "step": 21320 }, { "epoch": 0.55, "grad_norm": 5.157922267913818, "learning_rate": 8.982349286947503e-06, "loss": 0.6888, "step": 21321 }, { "epoch": 0.55, "grad_norm": 1.1532753705978394, "learning_rate": 8.981523457311848e-06, "loss": 0.5435, "step": 21322 }, { "epoch": 0.55, "grad_norm": 1.0647008419036865, "learning_rate": 8.980697634694891e-06, "loss": 0.6058, "step": 21323 }, { "epoch": 0.55, "grad_norm": 2.3861591815948486, "learning_rate": 8.979871819102328e-06, "loss": 0.6547, "step": 21324 }, { "epoch": 0.55, "grad_norm": 1.2846741676330566, "learning_rate": 8.979046010539845e-06, "loss": 0.6074, "step": 21325 }, { "epoch": 0.55, "grad_norm": 4.638919353485107, "learning_rate": 8.978220209013136e-06, "loss": 0.5875, "step": 21326 }, { "epoch": 0.55, "grad_norm": 7.615731716156006, "learning_rate": 8.977394414527894e-06, "loss": 0.6655, "step": 21327 }, { "epoch": 0.55, "grad_norm": 1.5941462516784668, "learning_rate": 8.976568627089805e-06, "loss": 0.7125, "step": 21328 }, { "epoch": 0.55, "grad_norm": 0.9818629622459412, "learning_rate": 8.975742846704564e-06, "loss": 0.5217, "step": 21329 }, { "epoch": 0.55, "grad_norm": 1.9862463474273682, "learning_rate": 8.974917073377857e-06, "loss": 0.5354, "step": 21330 }, { "epoch": 0.55, "grad_norm": 2.70687198638916, "learning_rate": 8.97409130711538e-06, "loss": 0.6393, "step": 21331 }, { "epoch": 0.55, "grad_norm": 1.3194389343261719, "learning_rate": 8.973265547922818e-06, "loss": 0.6058, "step": 21332 }, { "epoch": 0.55, "grad_norm": 4.288248062133789, "learning_rate": 8.972439795805867e-06, "loss": 0.7372, "step": 21333 }, { "epoch": 0.55, "grad_norm": 1.0339890718460083, "learning_rate": 8.971614050770217e-06, "loss": 0.5306, "step": 21334 }, { "epoch": 0.55, "grad_norm": 8.307890892028809, "learning_rate": 8.970788312821554e-06, "loss": 0.6052, "step": 21335 }, { "epoch": 0.55, "grad_norm": 3.9570560455322266, "learning_rate": 8.969962581965574e-06, "loss": 0.5783, "step": 21336 }, { "epoch": 0.55, "grad_norm": 1.4867839813232422, "learning_rate": 8.969136858207962e-06, "loss": 0.4691, "step": 21337 }, { "epoch": 0.55, "grad_norm": 1.2344001531600952, "learning_rate": 8.968311141554412e-06, "loss": 0.508, "step": 21338 }, { "epoch": 0.55, "grad_norm": 2.755460500717163, "learning_rate": 8.967485432010614e-06, "loss": 0.5471, "step": 21339 }, { "epoch": 0.55, "grad_norm": 1.6263447999954224, "learning_rate": 8.966659729582257e-06, "loss": 0.4962, "step": 21340 }, { "epoch": 0.55, "grad_norm": 1.316063642501831, "learning_rate": 8.965834034275033e-06, "loss": 0.4826, "step": 21341 }, { "epoch": 0.55, "grad_norm": 2.142186403274536, "learning_rate": 8.96500834609463e-06, "loss": 0.6033, "step": 21342 }, { "epoch": 0.55, "grad_norm": 1.181928277015686, "learning_rate": 8.96418266504674e-06, "loss": 0.4475, "step": 21343 }, { "epoch": 0.55, "grad_norm": 1.6017011404037476, "learning_rate": 8.96335699113705e-06, "loss": 0.677, "step": 21344 }, { "epoch": 0.55, "grad_norm": 1.8906906843185425, "learning_rate": 8.962531324371254e-06, "loss": 0.5076, "step": 21345 }, { "epoch": 0.55, "grad_norm": 2.656176805496216, "learning_rate": 8.961705664755043e-06, "loss": 0.5666, "step": 21346 }, { "epoch": 0.55, "grad_norm": 1.63832688331604, "learning_rate": 8.9608800122941e-06, "loss": 0.4664, "step": 21347 }, { "epoch": 0.55, "grad_norm": 1.99643874168396, "learning_rate": 8.960054366994123e-06, "loss": 0.6022, "step": 21348 }, { "epoch": 0.55, "grad_norm": 0.8645721077919006, "learning_rate": 8.959228728860794e-06, "loss": 0.5222, "step": 21349 }, { "epoch": 0.55, "grad_norm": 1.2224315404891968, "learning_rate": 8.958403097899808e-06, "loss": 0.4797, "step": 21350 }, { "epoch": 0.55, "grad_norm": 3.305574417114258, "learning_rate": 8.957577474116856e-06, "loss": 0.6262, "step": 21351 }, { "epoch": 0.55, "grad_norm": 11.529094696044922, "learning_rate": 8.956751857517622e-06, "loss": 0.5049, "step": 21352 }, { "epoch": 0.55, "grad_norm": 3.817983865737915, "learning_rate": 8.955926248107801e-06, "loss": 0.5673, "step": 21353 }, { "epoch": 0.55, "grad_norm": 5.579779148101807, "learning_rate": 8.955100645893078e-06, "loss": 0.6344, "step": 21354 }, { "epoch": 0.55, "grad_norm": 1.084625005722046, "learning_rate": 8.954275050879148e-06, "loss": 0.5653, "step": 21355 }, { "epoch": 0.55, "grad_norm": 3.2035067081451416, "learning_rate": 8.953449463071695e-06, "loss": 0.5969, "step": 21356 }, { "epoch": 0.55, "grad_norm": 11.90583324432373, "learning_rate": 8.95262388247641e-06, "loss": 0.7881, "step": 21357 }, { "epoch": 0.55, "grad_norm": 6.602420330047607, "learning_rate": 8.951798309098986e-06, "loss": 0.6808, "step": 21358 }, { "epoch": 0.55, "grad_norm": 1.4039077758789062, "learning_rate": 8.950972742945107e-06, "loss": 0.5841, "step": 21359 }, { "epoch": 0.55, "grad_norm": 1.3424831628799438, "learning_rate": 8.950147184020467e-06, "loss": 0.5301, "step": 21360 }, { "epoch": 0.55, "grad_norm": 2.0666868686676025, "learning_rate": 8.94932163233075e-06, "loss": 0.4939, "step": 21361 }, { "epoch": 0.55, "grad_norm": 1.5128973722457886, "learning_rate": 8.94849608788165e-06, "loss": 0.6484, "step": 21362 }, { "epoch": 0.55, "grad_norm": 1.2331305742263794, "learning_rate": 8.947670550678855e-06, "loss": 0.4889, "step": 21363 }, { "epoch": 0.55, "grad_norm": 1.3766087293624878, "learning_rate": 8.946845020728052e-06, "loss": 0.4599, "step": 21364 }, { "epoch": 0.55, "grad_norm": 1.625238060951233, "learning_rate": 8.946019498034935e-06, "loss": 0.525, "step": 21365 }, { "epoch": 0.55, "grad_norm": 1.7114118337631226, "learning_rate": 8.945193982605185e-06, "loss": 0.6453, "step": 21366 }, { "epoch": 0.55, "grad_norm": 1.2397311925888062, "learning_rate": 8.944368474444497e-06, "loss": 0.5208, "step": 21367 }, { "epoch": 0.55, "grad_norm": 2.425109386444092, "learning_rate": 8.943542973558558e-06, "loss": 0.6421, "step": 21368 }, { "epoch": 0.55, "grad_norm": 1.5438215732574463, "learning_rate": 8.942717479953057e-06, "loss": 0.4536, "step": 21369 }, { "epoch": 0.55, "grad_norm": 2.7118844985961914, "learning_rate": 8.941891993633685e-06, "loss": 0.6289, "step": 21370 }, { "epoch": 0.55, "grad_norm": 4.368587493896484, "learning_rate": 8.941066514606126e-06, "loss": 0.8039, "step": 21371 }, { "epoch": 0.55, "grad_norm": 1.3488878011703491, "learning_rate": 8.940241042876073e-06, "loss": 0.4876, "step": 21372 }, { "epoch": 0.55, "grad_norm": 1.2567994594573975, "learning_rate": 8.939415578449212e-06, "loss": 0.4858, "step": 21373 }, { "epoch": 0.55, "grad_norm": 1.7347543239593506, "learning_rate": 8.938590121331233e-06, "loss": 0.6238, "step": 21374 }, { "epoch": 0.55, "grad_norm": 4.962185382843018, "learning_rate": 8.937764671527823e-06, "loss": 0.6458, "step": 21375 }, { "epoch": 0.55, "grad_norm": 1.0825231075286865, "learning_rate": 8.936939229044672e-06, "loss": 0.5229, "step": 21376 }, { "epoch": 0.55, "grad_norm": 6.106166839599609, "learning_rate": 8.93611379388747e-06, "loss": 0.5136, "step": 21377 }, { "epoch": 0.55, "grad_norm": 2.7986364364624023, "learning_rate": 8.935288366061901e-06, "loss": 0.6021, "step": 21378 }, { "epoch": 0.55, "grad_norm": 2.5464179515838623, "learning_rate": 8.934462945573653e-06, "loss": 0.456, "step": 21379 }, { "epoch": 0.55, "grad_norm": 3.4087092876434326, "learning_rate": 8.933637532428424e-06, "loss": 0.712, "step": 21380 }, { "epoch": 0.55, "grad_norm": 1.522781252861023, "learning_rate": 8.93281212663189e-06, "loss": 0.5267, "step": 21381 }, { "epoch": 0.55, "grad_norm": 1.9477183818817139, "learning_rate": 8.931986728189746e-06, "loss": 0.3915, "step": 21382 }, { "epoch": 0.55, "grad_norm": 2.8088300228118896, "learning_rate": 8.931161337107678e-06, "loss": 0.6314, "step": 21383 }, { "epoch": 0.55, "grad_norm": 1.4818860292434692, "learning_rate": 8.930335953391375e-06, "loss": 0.598, "step": 21384 }, { "epoch": 0.55, "grad_norm": 1.2920695543289185, "learning_rate": 8.929510577046524e-06, "loss": 0.6131, "step": 21385 }, { "epoch": 0.55, "grad_norm": 11.050309181213379, "learning_rate": 8.928685208078812e-06, "loss": 0.5705, "step": 21386 }, { "epoch": 0.55, "grad_norm": 2.036440849304199, "learning_rate": 8.927859846493932e-06, "loss": 0.5733, "step": 21387 }, { "epoch": 0.55, "grad_norm": 1.438097596168518, "learning_rate": 8.927034492297566e-06, "loss": 0.4704, "step": 21388 }, { "epoch": 0.55, "grad_norm": 2.1927082538604736, "learning_rate": 8.926209145495406e-06, "loss": 0.5164, "step": 21389 }, { "epoch": 0.55, "grad_norm": 1.7140100002288818, "learning_rate": 8.925383806093136e-06, "loss": 0.4933, "step": 21390 }, { "epoch": 0.55, "grad_norm": 1.279856562614441, "learning_rate": 8.924558474096446e-06, "loss": 0.3138, "step": 21391 }, { "epoch": 0.55, "grad_norm": 1.0899031162261963, "learning_rate": 8.923733149511025e-06, "loss": 0.5537, "step": 21392 }, { "epoch": 0.55, "grad_norm": 1.1681725978851318, "learning_rate": 8.922907832342556e-06, "loss": 0.5476, "step": 21393 }, { "epoch": 0.55, "grad_norm": 1.3305774927139282, "learning_rate": 8.922082522596733e-06, "loss": 0.6422, "step": 21394 }, { "epoch": 0.55, "grad_norm": 0.9609471559524536, "learning_rate": 8.921257220279237e-06, "loss": 0.497, "step": 21395 }, { "epoch": 0.55, "grad_norm": 1.0393109321594238, "learning_rate": 8.920431925395762e-06, "loss": 0.5564, "step": 21396 }, { "epoch": 0.55, "grad_norm": 5.530872344970703, "learning_rate": 8.919606637951988e-06, "loss": 0.4545, "step": 21397 }, { "epoch": 0.55, "grad_norm": 1.4830478429794312, "learning_rate": 8.918781357953608e-06, "loss": 0.5997, "step": 21398 }, { "epoch": 0.55, "grad_norm": 1.9695018529891968, "learning_rate": 8.917956085406308e-06, "loss": 0.49, "step": 21399 }, { "epoch": 0.55, "grad_norm": 4.039414882659912, "learning_rate": 8.917130820315773e-06, "loss": 0.596, "step": 21400 }, { "epoch": 0.55, "grad_norm": 1.3369922637939453, "learning_rate": 8.916305562687695e-06, "loss": 0.4405, "step": 21401 }, { "epoch": 0.55, "grad_norm": 1.4447320699691772, "learning_rate": 8.915480312527756e-06, "loss": 0.5501, "step": 21402 }, { "epoch": 0.55, "grad_norm": 8.510144233703613, "learning_rate": 8.914655069841646e-06, "loss": 0.4727, "step": 21403 }, { "epoch": 0.55, "grad_norm": 1.1418533325195312, "learning_rate": 8.913829834635053e-06, "loss": 0.4186, "step": 21404 }, { "epoch": 0.55, "grad_norm": 3.6326029300689697, "learning_rate": 8.913004606913659e-06, "loss": 0.5122, "step": 21405 }, { "epoch": 0.55, "grad_norm": 7.73630428314209, "learning_rate": 8.912179386683158e-06, "loss": 0.5109, "step": 21406 }, { "epoch": 0.55, "grad_norm": 9.243630409240723, "learning_rate": 8.91135417394923e-06, "loss": 0.7353, "step": 21407 }, { "epoch": 0.55, "grad_norm": 2.08786940574646, "learning_rate": 8.910528968717565e-06, "loss": 0.618, "step": 21408 }, { "epoch": 0.55, "grad_norm": 2.7240724563598633, "learning_rate": 8.909703770993855e-06, "loss": 0.5122, "step": 21409 }, { "epoch": 0.55, "grad_norm": 1.6135083436965942, "learning_rate": 8.908878580783775e-06, "loss": 0.5303, "step": 21410 }, { "epoch": 0.55, "grad_norm": 1.814394235610962, "learning_rate": 8.908053398093022e-06, "loss": 0.4458, "step": 21411 }, { "epoch": 0.55, "grad_norm": 5.142496585845947, "learning_rate": 8.907228222927278e-06, "loss": 0.5116, "step": 21412 }, { "epoch": 0.55, "grad_norm": 1.1916202306747437, "learning_rate": 8.906403055292232e-06, "loss": 0.4268, "step": 21413 }, { "epoch": 0.55, "grad_norm": 2.041046142578125, "learning_rate": 8.905577895193567e-06, "loss": 0.6438, "step": 21414 }, { "epoch": 0.55, "grad_norm": 1.1373544931411743, "learning_rate": 8.904752742636972e-06, "loss": 0.5389, "step": 21415 }, { "epoch": 0.55, "grad_norm": 1.4996715784072876, "learning_rate": 8.903927597628135e-06, "loss": 0.6349, "step": 21416 }, { "epoch": 0.55, "grad_norm": 4.524784088134766, "learning_rate": 8.903102460172738e-06, "loss": 0.607, "step": 21417 }, { "epoch": 0.55, "grad_norm": 4.505724906921387, "learning_rate": 8.90227733027647e-06, "loss": 0.6847, "step": 21418 }, { "epoch": 0.55, "grad_norm": 3.775026798248291, "learning_rate": 8.901452207945018e-06, "loss": 0.5687, "step": 21419 }, { "epoch": 0.55, "grad_norm": 1.5914644002914429, "learning_rate": 8.900627093184066e-06, "loss": 0.6131, "step": 21420 }, { "epoch": 0.55, "grad_norm": 1.9859427213668823, "learning_rate": 8.899801985999302e-06, "loss": 0.6571, "step": 21421 }, { "epoch": 0.55, "grad_norm": 1.4057092666625977, "learning_rate": 8.898976886396411e-06, "loss": 0.567, "step": 21422 }, { "epoch": 0.55, "grad_norm": 1.2327830791473389, "learning_rate": 8.898151794381083e-06, "loss": 0.5031, "step": 21423 }, { "epoch": 0.55, "grad_norm": 1.6712292432785034, "learning_rate": 8.897326709958996e-06, "loss": 0.6357, "step": 21424 }, { "epoch": 0.55, "grad_norm": 1.5764367580413818, "learning_rate": 8.896501633135844e-06, "loss": 0.4447, "step": 21425 }, { "epoch": 0.55, "grad_norm": 1.6258220672607422, "learning_rate": 8.895676563917305e-06, "loss": 0.6831, "step": 21426 }, { "epoch": 0.55, "grad_norm": 1.0501203536987305, "learning_rate": 8.894851502309072e-06, "loss": 0.5572, "step": 21427 }, { "epoch": 0.55, "grad_norm": 1.2238645553588867, "learning_rate": 8.894026448316832e-06, "loss": 0.422, "step": 21428 }, { "epoch": 0.55, "grad_norm": 2.0613584518432617, "learning_rate": 8.893201401946264e-06, "loss": 0.5169, "step": 21429 }, { "epoch": 0.55, "grad_norm": 2.6063308715820312, "learning_rate": 8.892376363203055e-06, "loss": 0.6391, "step": 21430 }, { "epoch": 0.55, "grad_norm": 1.0216628313064575, "learning_rate": 8.891551332092893e-06, "loss": 0.5097, "step": 21431 }, { "epoch": 0.55, "grad_norm": 1.5037199258804321, "learning_rate": 8.890726308621463e-06, "loss": 0.4966, "step": 21432 }, { "epoch": 0.55, "grad_norm": 2.8827502727508545, "learning_rate": 8.889901292794449e-06, "loss": 0.4363, "step": 21433 }, { "epoch": 0.55, "grad_norm": 4.7530364990234375, "learning_rate": 8.88907628461754e-06, "loss": 0.6862, "step": 21434 }, { "epoch": 0.55, "grad_norm": 1.0287754535675049, "learning_rate": 8.888251284096418e-06, "loss": 0.48, "step": 21435 }, { "epoch": 0.55, "grad_norm": 6.036418437957764, "learning_rate": 8.88742629123677e-06, "loss": 0.5095, "step": 21436 }, { "epoch": 0.55, "grad_norm": 1.8787952661514282, "learning_rate": 8.88660130604428e-06, "loss": 0.7065, "step": 21437 }, { "epoch": 0.55, "grad_norm": 7.355007648468018, "learning_rate": 8.885776328524637e-06, "loss": 0.7483, "step": 21438 }, { "epoch": 0.55, "grad_norm": 2.1580145359039307, "learning_rate": 8.88495135868352e-06, "loss": 0.5959, "step": 21439 }, { "epoch": 0.55, "grad_norm": 1.5752744674682617, "learning_rate": 8.884126396526619e-06, "loss": 0.6231, "step": 21440 }, { "epoch": 0.55, "grad_norm": 6.2868828773498535, "learning_rate": 8.88330144205962e-06, "loss": 0.4457, "step": 21441 }, { "epoch": 0.55, "grad_norm": 5.2867631912231445, "learning_rate": 8.882476495288202e-06, "loss": 0.7006, "step": 21442 }, { "epoch": 0.55, "grad_norm": 7.498204708099365, "learning_rate": 8.881651556218059e-06, "loss": 0.7494, "step": 21443 }, { "epoch": 0.55, "grad_norm": 1.76716148853302, "learning_rate": 8.880826624854866e-06, "loss": 0.6654, "step": 21444 }, { "epoch": 0.55, "grad_norm": 1.3093961477279663, "learning_rate": 8.880001701204313e-06, "loss": 0.49, "step": 21445 }, { "epoch": 0.55, "grad_norm": 1.867656946182251, "learning_rate": 8.879176785272088e-06, "loss": 0.6045, "step": 21446 }, { "epoch": 0.55, "grad_norm": 1.4757581949234009, "learning_rate": 8.87835187706387e-06, "loss": 0.5546, "step": 21447 }, { "epoch": 0.55, "grad_norm": 1.8925048112869263, "learning_rate": 8.877526976585348e-06, "loss": 0.6043, "step": 21448 }, { "epoch": 0.55, "grad_norm": 1.365187644958496, "learning_rate": 8.876702083842202e-06, "loss": 0.4191, "step": 21449 }, { "epoch": 0.55, "grad_norm": 1.0443141460418701, "learning_rate": 8.875877198840122e-06, "loss": 0.4563, "step": 21450 }, { "epoch": 0.55, "grad_norm": 4.3395094871521, "learning_rate": 8.875052321584789e-06, "loss": 0.6058, "step": 21451 }, { "epoch": 0.55, "grad_norm": 1.2641617059707642, "learning_rate": 8.87422745208189e-06, "loss": 0.4673, "step": 21452 }, { "epoch": 0.55, "grad_norm": 1.254271388053894, "learning_rate": 8.873402590337106e-06, "loss": 0.6804, "step": 21453 }, { "epoch": 0.55, "grad_norm": 1.8213828802108765, "learning_rate": 8.872577736356125e-06, "loss": 0.4344, "step": 21454 }, { "epoch": 0.55, "grad_norm": 1.7106068134307861, "learning_rate": 8.871752890144631e-06, "loss": 0.5542, "step": 21455 }, { "epoch": 0.55, "grad_norm": 1.4378174543380737, "learning_rate": 8.870928051708305e-06, "loss": 0.3475, "step": 21456 }, { "epoch": 0.55, "grad_norm": 2.4903790950775146, "learning_rate": 8.870103221052833e-06, "loss": 0.5745, "step": 21457 }, { "epoch": 0.55, "grad_norm": 2.866647243499756, "learning_rate": 8.869278398183903e-06, "loss": 0.5593, "step": 21458 }, { "epoch": 0.55, "grad_norm": 1.1328232288360596, "learning_rate": 8.868453583107193e-06, "loss": 0.2829, "step": 21459 }, { "epoch": 0.55, "grad_norm": 1.321353793144226, "learning_rate": 8.867628775828393e-06, "loss": 0.516, "step": 21460 }, { "epoch": 0.55, "grad_norm": 1.053176760673523, "learning_rate": 8.86680397635318e-06, "loss": 0.3938, "step": 21461 }, { "epoch": 0.55, "grad_norm": 3.9603965282440186, "learning_rate": 8.865979184687244e-06, "loss": 0.4744, "step": 21462 }, { "epoch": 0.55, "grad_norm": 1.1522361040115356, "learning_rate": 8.865154400836267e-06, "loss": 0.5574, "step": 21463 }, { "epoch": 0.55, "grad_norm": 1.5868773460388184, "learning_rate": 8.864329624805932e-06, "loss": 0.4594, "step": 21464 }, { "epoch": 0.55, "grad_norm": 1.5106678009033203, "learning_rate": 8.863504856601926e-06, "loss": 0.6601, "step": 21465 }, { "epoch": 0.55, "grad_norm": 7.094997882843018, "learning_rate": 8.862680096229928e-06, "loss": 0.6858, "step": 21466 }, { "epoch": 0.55, "grad_norm": 2.760653018951416, "learning_rate": 8.861855343695627e-06, "loss": 0.7417, "step": 21467 }, { "epoch": 0.55, "grad_norm": 1.52125084400177, "learning_rate": 8.861030599004701e-06, "loss": 0.6197, "step": 21468 }, { "epoch": 0.55, "grad_norm": 1.2638702392578125, "learning_rate": 8.860205862162837e-06, "loss": 0.4517, "step": 21469 }, { "epoch": 0.55, "grad_norm": 3.5255160331726074, "learning_rate": 8.85938113317572e-06, "loss": 0.7943, "step": 21470 }, { "epoch": 0.55, "grad_norm": 1.5189260244369507, "learning_rate": 8.858556412049031e-06, "loss": 0.5791, "step": 21471 }, { "epoch": 0.55, "grad_norm": 1.9227889776229858, "learning_rate": 8.857731698788454e-06, "loss": 0.5633, "step": 21472 }, { "epoch": 0.55, "grad_norm": 4.843482971191406, "learning_rate": 8.856906993399673e-06, "loss": 0.72, "step": 21473 }, { "epoch": 0.55, "grad_norm": 2.733027696609497, "learning_rate": 8.856082295888367e-06, "loss": 0.7138, "step": 21474 }, { "epoch": 0.55, "grad_norm": 1.8649576902389526, "learning_rate": 8.855257606260227e-06, "loss": 0.6536, "step": 21475 }, { "epoch": 0.55, "grad_norm": 2.948981761932373, "learning_rate": 8.854432924520931e-06, "loss": 0.8742, "step": 21476 }, { "epoch": 0.55, "grad_norm": 1.129289150238037, "learning_rate": 8.853608250676166e-06, "loss": 0.526, "step": 21477 }, { "epoch": 0.55, "grad_norm": 2.3067493438720703, "learning_rate": 8.85278358473161e-06, "loss": 0.5317, "step": 21478 }, { "epoch": 0.55, "grad_norm": 1.608925223350525, "learning_rate": 8.851958926692951e-06, "loss": 0.5139, "step": 21479 }, { "epoch": 0.55, "grad_norm": 1.2889677286148071, "learning_rate": 8.851134276565866e-06, "loss": 0.4955, "step": 21480 }, { "epoch": 0.55, "grad_norm": 1.1270718574523926, "learning_rate": 8.850309634356044e-06, "loss": 0.5212, "step": 21481 }, { "epoch": 0.55, "grad_norm": 2.5603017807006836, "learning_rate": 8.849485000069168e-06, "loss": 0.6018, "step": 21482 }, { "epoch": 0.55, "grad_norm": 1.323588490486145, "learning_rate": 8.848660373710915e-06, "loss": 0.5312, "step": 21483 }, { "epoch": 0.55, "grad_norm": 7.331055641174316, "learning_rate": 8.847835755286974e-06, "loss": 0.6889, "step": 21484 }, { "epoch": 0.55, "grad_norm": 2.006162166595459, "learning_rate": 8.847011144803024e-06, "loss": 0.582, "step": 21485 }, { "epoch": 0.55, "grad_norm": 1.7388519048690796, "learning_rate": 8.846186542264747e-06, "loss": 0.5191, "step": 21486 }, { "epoch": 0.55, "grad_norm": 1.3709520101547241, "learning_rate": 8.84536194767783e-06, "loss": 0.5863, "step": 21487 }, { "epoch": 0.55, "grad_norm": 2.198850393295288, "learning_rate": 8.84453736104795e-06, "loss": 0.4758, "step": 21488 }, { "epoch": 0.55, "grad_norm": 2.741415500640869, "learning_rate": 8.843712782380799e-06, "loss": 0.7035, "step": 21489 }, { "epoch": 0.55, "grad_norm": 4.525641918182373, "learning_rate": 8.842888211682046e-06, "loss": 0.7291, "step": 21490 }, { "epoch": 0.55, "grad_norm": 1.3219653367996216, "learning_rate": 8.842063648957386e-06, "loss": 0.3958, "step": 21491 }, { "epoch": 0.55, "grad_norm": 3.859508991241455, "learning_rate": 8.84123909421249e-06, "loss": 0.5827, "step": 21492 }, { "epoch": 0.55, "grad_norm": 1.8718657493591309, "learning_rate": 8.84041454745305e-06, "loss": 0.5829, "step": 21493 }, { "epoch": 0.55, "grad_norm": 1.7712708711624146, "learning_rate": 8.839590008684746e-06, "loss": 0.6846, "step": 21494 }, { "epoch": 0.55, "grad_norm": 4.7195658683776855, "learning_rate": 8.838765477913255e-06, "loss": 0.6403, "step": 21495 }, { "epoch": 0.55, "grad_norm": 1.9888652563095093, "learning_rate": 8.837940955144265e-06, "loss": 0.6357, "step": 21496 }, { "epoch": 0.55, "grad_norm": 1.5644160509109497, "learning_rate": 8.837116440383453e-06, "loss": 0.6103, "step": 21497 }, { "epoch": 0.55, "grad_norm": 1.4273183345794678, "learning_rate": 8.836291933636505e-06, "loss": 0.5355, "step": 21498 }, { "epoch": 0.55, "grad_norm": 0.9151634573936462, "learning_rate": 8.835467434909106e-06, "loss": 0.5298, "step": 21499 }, { "epoch": 0.55, "grad_norm": 2.2091028690338135, "learning_rate": 8.834642944206928e-06, "loss": 0.6317, "step": 21500 }, { "epoch": 0.55, "grad_norm": 1.8796467781066895, "learning_rate": 8.833818461535662e-06, "loss": 0.4777, "step": 21501 }, { "epoch": 0.55, "grad_norm": 5.672064781188965, "learning_rate": 8.832993986900986e-06, "loss": 0.6995, "step": 21502 }, { "epoch": 0.55, "grad_norm": 3.8243496417999268, "learning_rate": 8.83216952030858e-06, "loss": 0.6375, "step": 21503 }, { "epoch": 0.55, "grad_norm": 1.701622724533081, "learning_rate": 8.831345061764132e-06, "loss": 0.5366, "step": 21504 }, { "epoch": 0.55, "grad_norm": 9.781052589416504, "learning_rate": 8.830520611273316e-06, "loss": 0.5837, "step": 21505 }, { "epoch": 0.55, "grad_norm": 1.1704926490783691, "learning_rate": 8.829696168841822e-06, "loss": 0.4501, "step": 21506 }, { "epoch": 0.55, "grad_norm": 1.4131828546524048, "learning_rate": 8.828871734475321e-06, "loss": 0.5269, "step": 21507 }, { "epoch": 0.55, "grad_norm": 1.827118158340454, "learning_rate": 8.828047308179505e-06, "loss": 0.6043, "step": 21508 }, { "epoch": 0.55, "grad_norm": 1.3911558389663696, "learning_rate": 8.827222889960048e-06, "loss": 0.4311, "step": 21509 }, { "epoch": 0.55, "grad_norm": 1.5547105073928833, "learning_rate": 8.826398479822632e-06, "loss": 0.5897, "step": 21510 }, { "epoch": 0.55, "grad_norm": 1.561933994293213, "learning_rate": 8.825574077772945e-06, "loss": 0.6052, "step": 21511 }, { "epoch": 0.55, "grad_norm": 4.685482501983643, "learning_rate": 8.82474968381666e-06, "loss": 0.7107, "step": 21512 }, { "epoch": 0.55, "grad_norm": 1.0696330070495605, "learning_rate": 8.823925297959464e-06, "loss": 0.5267, "step": 21513 }, { "epoch": 0.55, "grad_norm": 0.7772420048713684, "learning_rate": 8.823100920207035e-06, "loss": 0.4586, "step": 21514 }, { "epoch": 0.55, "grad_norm": 2.5834851264953613, "learning_rate": 8.822276550565054e-06, "loss": 0.6291, "step": 21515 }, { "epoch": 0.55, "grad_norm": 3.2015092372894287, "learning_rate": 8.821452189039206e-06, "loss": 0.5212, "step": 21516 }, { "epoch": 0.55, "grad_norm": 0.9429378509521484, "learning_rate": 8.820627835635166e-06, "loss": 0.5637, "step": 21517 }, { "epoch": 0.55, "grad_norm": 1.2188785076141357, "learning_rate": 8.81980349035862e-06, "loss": 0.4811, "step": 21518 }, { "epoch": 0.55, "grad_norm": 4.013949871063232, "learning_rate": 8.818979153215245e-06, "loss": 0.6284, "step": 21519 }, { "epoch": 0.55, "grad_norm": 1.019155502319336, "learning_rate": 8.818154824210728e-06, "loss": 0.4154, "step": 21520 }, { "epoch": 0.55, "grad_norm": 1.6839327812194824, "learning_rate": 8.81733050335074e-06, "loss": 0.5508, "step": 21521 }, { "epoch": 0.55, "grad_norm": 1.8777000904083252, "learning_rate": 8.81650619064097e-06, "loss": 0.5896, "step": 21522 }, { "epoch": 0.55, "grad_norm": 2.407153606414795, "learning_rate": 8.815681886087096e-06, "loss": 0.4444, "step": 21523 }, { "epoch": 0.55, "grad_norm": 1.3386383056640625, "learning_rate": 8.814857589694797e-06, "loss": 0.5196, "step": 21524 }, { "epoch": 0.55, "grad_norm": 1.9522817134857178, "learning_rate": 8.814033301469757e-06, "loss": 0.6385, "step": 21525 }, { "epoch": 0.55, "grad_norm": 1.8943644762039185, "learning_rate": 8.813209021417652e-06, "loss": 0.5615, "step": 21526 }, { "epoch": 0.55, "grad_norm": 1.0805281400680542, "learning_rate": 8.812384749544165e-06, "loss": 0.5927, "step": 21527 }, { "epoch": 0.55, "grad_norm": 1.5345185995101929, "learning_rate": 8.811560485854979e-06, "loss": 0.6365, "step": 21528 }, { "epoch": 0.55, "grad_norm": 3.209765672683716, "learning_rate": 8.810736230355767e-06, "loss": 0.6643, "step": 21529 }, { "epoch": 0.55, "grad_norm": 2.4940383434295654, "learning_rate": 8.809911983052219e-06, "loss": 0.6226, "step": 21530 }, { "epoch": 0.55, "grad_norm": 3.636549234390259, "learning_rate": 8.809087743950006e-06, "loss": 0.6208, "step": 21531 }, { "epoch": 0.55, "grad_norm": 1.3348052501678467, "learning_rate": 8.808263513054814e-06, "loss": 0.4393, "step": 21532 }, { "epoch": 0.55, "grad_norm": 1.3961981534957886, "learning_rate": 8.80743929037232e-06, "loss": 0.5824, "step": 21533 }, { "epoch": 0.55, "grad_norm": 1.6524394750595093, "learning_rate": 8.806615075908205e-06, "loss": 0.492, "step": 21534 }, { "epoch": 0.55, "grad_norm": 1.033495306968689, "learning_rate": 8.80579086966815e-06, "loss": 0.5621, "step": 21535 }, { "epoch": 0.55, "grad_norm": 1.1079894304275513, "learning_rate": 8.804966671657833e-06, "loss": 0.5382, "step": 21536 }, { "epoch": 0.55, "grad_norm": 1.8799301385879517, "learning_rate": 8.804142481882937e-06, "loss": 0.4798, "step": 21537 }, { "epoch": 0.55, "grad_norm": 3.815436601638794, "learning_rate": 8.803318300349137e-06, "loss": 0.6027, "step": 21538 }, { "epoch": 0.55, "grad_norm": 3.976325511932373, "learning_rate": 8.802494127062116e-06, "loss": 0.6748, "step": 21539 }, { "epoch": 0.55, "grad_norm": 2.306784152984619, "learning_rate": 8.801669962027556e-06, "loss": 0.7208, "step": 21540 }, { "epoch": 0.55, "grad_norm": 1.6335493326187134, "learning_rate": 8.80084580525113e-06, "loss": 0.5624, "step": 21541 }, { "epoch": 0.55, "grad_norm": 1.0304689407348633, "learning_rate": 8.800021656738525e-06, "loss": 0.6028, "step": 21542 }, { "epoch": 0.55, "grad_norm": 4.044867038726807, "learning_rate": 8.799197516495413e-06, "loss": 0.4662, "step": 21543 }, { "epoch": 0.55, "grad_norm": 1.3373949527740479, "learning_rate": 8.79837338452748e-06, "loss": 0.5871, "step": 21544 }, { "epoch": 0.55, "grad_norm": 1.3660203218460083, "learning_rate": 8.797549260840402e-06, "loss": 0.4882, "step": 21545 }, { "epoch": 0.55, "grad_norm": 1.5478107929229736, "learning_rate": 8.796725145439857e-06, "loss": 0.5598, "step": 21546 }, { "epoch": 0.55, "grad_norm": 1.8152323961257935, "learning_rate": 8.79590103833153e-06, "loss": 0.6825, "step": 21547 }, { "epoch": 0.55, "grad_norm": 2.428410768508911, "learning_rate": 8.795076939521094e-06, "loss": 0.6338, "step": 21548 }, { "epoch": 0.55, "grad_norm": 1.6962248086929321, "learning_rate": 8.794252849014234e-06, "loss": 0.5577, "step": 21549 }, { "epoch": 0.55, "grad_norm": 6.311432361602783, "learning_rate": 8.793428766816621e-06, "loss": 0.5826, "step": 21550 }, { "epoch": 0.55, "grad_norm": 3.780087471008301, "learning_rate": 8.792604692933941e-06, "loss": 0.7992, "step": 21551 }, { "epoch": 0.55, "grad_norm": 1.431739091873169, "learning_rate": 8.791780627371872e-06, "loss": 0.4978, "step": 21552 }, { "epoch": 0.55, "grad_norm": 13.996528625488281, "learning_rate": 8.79095657013609e-06, "loss": 0.5627, "step": 21553 }, { "epoch": 0.55, "grad_norm": 3.5899202823638916, "learning_rate": 8.790132521232278e-06, "loss": 0.59, "step": 21554 }, { "epoch": 0.55, "grad_norm": 3.3591349124908447, "learning_rate": 8.78930848066611e-06, "loss": 0.6512, "step": 21555 }, { "epoch": 0.55, "grad_norm": 1.76192307472229, "learning_rate": 8.788484448443268e-06, "loss": 0.6664, "step": 21556 }, { "epoch": 0.55, "grad_norm": 6.334238529205322, "learning_rate": 8.787660424569432e-06, "loss": 0.6064, "step": 21557 }, { "epoch": 0.55, "grad_norm": 5.967721462249756, "learning_rate": 8.786836409050276e-06, "loss": 0.7947, "step": 21558 }, { "epoch": 0.55, "grad_norm": 2.5033376216888428, "learning_rate": 8.786012401891483e-06, "loss": 0.4099, "step": 21559 }, { "epoch": 0.55, "grad_norm": 1.4779164791107178, "learning_rate": 8.785188403098728e-06, "loss": 0.5969, "step": 21560 }, { "epoch": 0.55, "grad_norm": 1.844508409500122, "learning_rate": 8.784364412677694e-06, "loss": 0.4834, "step": 21561 }, { "epoch": 0.55, "grad_norm": 3.244515895843506, "learning_rate": 8.783540430634055e-06, "loss": 0.4918, "step": 21562 }, { "epoch": 0.55, "grad_norm": 1.447778582572937, "learning_rate": 8.782716456973491e-06, "loss": 0.5635, "step": 21563 }, { "epoch": 0.55, "grad_norm": 8.70824146270752, "learning_rate": 8.781892491701682e-06, "loss": 0.7075, "step": 21564 }, { "epoch": 0.55, "grad_norm": 1.268041729927063, "learning_rate": 8.781068534824303e-06, "loss": 0.4774, "step": 21565 }, { "epoch": 0.55, "grad_norm": 1.9323683977127075, "learning_rate": 8.780244586347036e-06, "loss": 0.6592, "step": 21566 }, { "epoch": 0.55, "grad_norm": 1.4369986057281494, "learning_rate": 8.779420646275553e-06, "loss": 0.5318, "step": 21567 }, { "epoch": 0.55, "grad_norm": 1.649592399597168, "learning_rate": 8.77859671461554e-06, "loss": 0.59, "step": 21568 }, { "epoch": 0.55, "grad_norm": 2.0744335651397705, "learning_rate": 8.77777279137267e-06, "loss": 0.5575, "step": 21569 }, { "epoch": 0.55, "grad_norm": 1.7935092449188232, "learning_rate": 8.77694887655262e-06, "loss": 0.7097, "step": 21570 }, { "epoch": 0.55, "grad_norm": 1.612726092338562, "learning_rate": 8.776124970161073e-06, "loss": 0.5497, "step": 21571 }, { "epoch": 0.55, "grad_norm": 1.6824864149093628, "learning_rate": 8.7753010722037e-06, "loss": 0.5897, "step": 21572 }, { "epoch": 0.55, "grad_norm": 2.5768730640411377, "learning_rate": 8.774477182686188e-06, "loss": 0.5817, "step": 21573 }, { "epoch": 0.55, "grad_norm": 1.2554746866226196, "learning_rate": 8.773653301614206e-06, "loss": 0.4097, "step": 21574 }, { "epoch": 0.55, "grad_norm": 3.790715456008911, "learning_rate": 8.772829428993434e-06, "loss": 0.5694, "step": 21575 }, { "epoch": 0.55, "grad_norm": 1.905503273010254, "learning_rate": 8.772005564829552e-06, "loss": 0.7478, "step": 21576 }, { "epoch": 0.55, "grad_norm": 1.162438988685608, "learning_rate": 8.771181709128236e-06, "loss": 0.5205, "step": 21577 }, { "epoch": 0.55, "grad_norm": 4.9905009269714355, "learning_rate": 8.770357861895164e-06, "loss": 0.7833, "step": 21578 }, { "epoch": 0.55, "grad_norm": 1.1808522939682007, "learning_rate": 8.769534023136013e-06, "loss": 0.6299, "step": 21579 }, { "epoch": 0.55, "grad_norm": 2.0205483436584473, "learning_rate": 8.768710192856458e-06, "loss": 0.7212, "step": 21580 }, { "epoch": 0.55, "grad_norm": 1.1941279172897339, "learning_rate": 8.767886371062183e-06, "loss": 0.5048, "step": 21581 }, { "epoch": 0.55, "grad_norm": 0.9751299023628235, "learning_rate": 8.767062557758857e-06, "loss": 0.5465, "step": 21582 }, { "epoch": 0.55, "grad_norm": 1.7048008441925049, "learning_rate": 8.766238752952166e-06, "loss": 0.6921, "step": 21583 }, { "epoch": 0.55, "grad_norm": 9.466760635375977, "learning_rate": 8.765414956647778e-06, "loss": 0.6149, "step": 21584 }, { "epoch": 0.55, "grad_norm": 1.5051337480545044, "learning_rate": 8.764591168851377e-06, "loss": 0.525, "step": 21585 }, { "epoch": 0.55, "grad_norm": 1.1919593811035156, "learning_rate": 8.763767389568635e-06, "loss": 0.4901, "step": 21586 }, { "epoch": 0.55, "grad_norm": 3.692661762237549, "learning_rate": 8.762943618805234e-06, "loss": 0.5875, "step": 21587 }, { "epoch": 0.55, "grad_norm": 1.5917915105819702, "learning_rate": 8.762119856566848e-06, "loss": 0.5446, "step": 21588 }, { "epoch": 0.55, "grad_norm": 3.5420444011688232, "learning_rate": 8.761296102859152e-06, "loss": 0.658, "step": 21589 }, { "epoch": 0.55, "grad_norm": 2.1573305130004883, "learning_rate": 8.760472357687828e-06, "loss": 0.7047, "step": 21590 }, { "epoch": 0.55, "grad_norm": 2.0339479446411133, "learning_rate": 8.759648621058548e-06, "loss": 0.6264, "step": 21591 }, { "epoch": 0.55, "grad_norm": 1.1957547664642334, "learning_rate": 8.758824892976989e-06, "loss": 0.5346, "step": 21592 }, { "epoch": 0.55, "grad_norm": 6.208741664886475, "learning_rate": 8.758001173448833e-06, "loss": 0.6412, "step": 21593 }, { "epoch": 0.55, "grad_norm": 7.983431339263916, "learning_rate": 8.757177462479749e-06, "loss": 0.6108, "step": 21594 }, { "epoch": 0.55, "grad_norm": 1.4770629405975342, "learning_rate": 8.75635376007542e-06, "loss": 0.4743, "step": 21595 }, { "epoch": 0.55, "grad_norm": 1.186580777168274, "learning_rate": 8.755530066241517e-06, "loss": 0.3836, "step": 21596 }, { "epoch": 0.55, "grad_norm": 1.2637063264846802, "learning_rate": 8.75470638098372e-06, "loss": 0.4622, "step": 21597 }, { "epoch": 0.55, "grad_norm": 2.0625457763671875, "learning_rate": 8.753882704307706e-06, "loss": 0.755, "step": 21598 }, { "epoch": 0.55, "grad_norm": 1.2094851732254028, "learning_rate": 8.753059036219146e-06, "loss": 0.4479, "step": 21599 }, { "epoch": 0.55, "grad_norm": 4.5474324226379395, "learning_rate": 8.752235376723724e-06, "loss": 0.5213, "step": 21600 }, { "epoch": 0.55, "grad_norm": 10.66763687133789, "learning_rate": 8.751411725827109e-06, "loss": 0.5411, "step": 21601 }, { "epoch": 0.55, "grad_norm": 1.944286584854126, "learning_rate": 8.750588083534982e-06, "loss": 0.4085, "step": 21602 }, { "epoch": 0.55, "grad_norm": 4.398859024047852, "learning_rate": 8.749764449853014e-06, "loss": 0.5429, "step": 21603 }, { "epoch": 0.55, "grad_norm": 1.302828073501587, "learning_rate": 8.748940824786884e-06, "loss": 0.5113, "step": 21604 }, { "epoch": 0.55, "grad_norm": 2.4780123233795166, "learning_rate": 8.74811720834227e-06, "loss": 0.6187, "step": 21605 }, { "epoch": 0.55, "grad_norm": 1.826905608177185, "learning_rate": 8.747293600524844e-06, "loss": 0.5568, "step": 21606 }, { "epoch": 0.55, "grad_norm": 1.3438173532485962, "learning_rate": 8.746470001340285e-06, "loss": 0.5143, "step": 21607 }, { "epoch": 0.55, "grad_norm": 1.3095107078552246, "learning_rate": 8.745646410794266e-06, "loss": 0.5526, "step": 21608 }, { "epoch": 0.55, "grad_norm": 1.1908574104309082, "learning_rate": 8.744822828892463e-06, "loss": 0.6838, "step": 21609 }, { "epoch": 0.55, "grad_norm": 1.4923324584960938, "learning_rate": 8.743999255640556e-06, "loss": 0.3628, "step": 21610 }, { "epoch": 0.55, "grad_norm": 1.2336622476577759, "learning_rate": 8.743175691044215e-06, "loss": 0.5366, "step": 21611 }, { "epoch": 0.55, "grad_norm": 1.7625008821487427, "learning_rate": 8.742352135109117e-06, "loss": 0.7152, "step": 21612 }, { "epoch": 0.55, "grad_norm": 1.1834355592727661, "learning_rate": 8.741528587840939e-06, "loss": 0.4708, "step": 21613 }, { "epoch": 0.55, "grad_norm": 0.8984786868095398, "learning_rate": 8.740705049245355e-06, "loss": 0.4933, "step": 21614 }, { "epoch": 0.55, "grad_norm": 1.1629106998443604, "learning_rate": 8.73988151932804e-06, "loss": 0.5342, "step": 21615 }, { "epoch": 0.55, "grad_norm": 1.6982909440994263, "learning_rate": 8.739057998094668e-06, "loss": 0.529, "step": 21616 }, { "epoch": 0.55, "grad_norm": 1.4360097646713257, "learning_rate": 8.73823448555092e-06, "loss": 0.5292, "step": 21617 }, { "epoch": 0.55, "grad_norm": 1.9069468975067139, "learning_rate": 8.737410981702466e-06, "loss": 0.5107, "step": 21618 }, { "epoch": 0.55, "grad_norm": 1.816751480102539, "learning_rate": 8.736587486554983e-06, "loss": 0.4786, "step": 21619 }, { "epoch": 0.55, "grad_norm": 1.6913260221481323, "learning_rate": 8.735764000114141e-06, "loss": 0.5338, "step": 21620 }, { "epoch": 0.55, "grad_norm": 2.3379719257354736, "learning_rate": 8.734940522385623e-06, "loss": 0.6331, "step": 21621 }, { "epoch": 0.55, "grad_norm": 6.53376579284668, "learning_rate": 8.7341170533751e-06, "loss": 0.7229, "step": 21622 }, { "epoch": 0.55, "grad_norm": 1.226466417312622, "learning_rate": 8.733293593088245e-06, "loss": 0.4783, "step": 21623 }, { "epoch": 0.55, "grad_norm": 2.109029769897461, "learning_rate": 8.73247014153074e-06, "loss": 0.4781, "step": 21624 }, { "epoch": 0.55, "grad_norm": 2.1915929317474365, "learning_rate": 8.731646698708248e-06, "loss": 0.7452, "step": 21625 }, { "epoch": 0.55, "grad_norm": 2.914414167404175, "learning_rate": 8.730823264626455e-06, "loss": 0.4725, "step": 21626 }, { "epoch": 0.55, "grad_norm": 4.231839179992676, "learning_rate": 8.729999839291027e-06, "loss": 0.6042, "step": 21627 }, { "epoch": 0.55, "grad_norm": 1.395403504371643, "learning_rate": 8.729176422707642e-06, "loss": 0.4536, "step": 21628 }, { "epoch": 0.55, "grad_norm": 3.313061475753784, "learning_rate": 8.728353014881978e-06, "loss": 0.5098, "step": 21629 }, { "epoch": 0.55, "grad_norm": 3.9704060554504395, "learning_rate": 8.727529615819704e-06, "loss": 0.7785, "step": 21630 }, { "epoch": 0.55, "grad_norm": 0.9487061500549316, "learning_rate": 8.726706225526499e-06, "loss": 0.4928, "step": 21631 }, { "epoch": 0.55, "grad_norm": 5.919315338134766, "learning_rate": 8.72588284400803e-06, "loss": 0.4375, "step": 21632 }, { "epoch": 0.55, "grad_norm": 2.0434584617614746, "learning_rate": 8.725059471269977e-06, "loss": 0.5431, "step": 21633 }, { "epoch": 0.55, "grad_norm": 5.482300281524658, "learning_rate": 8.724236107318018e-06, "loss": 0.6147, "step": 21634 }, { "epoch": 0.55, "grad_norm": 1.9800872802734375, "learning_rate": 8.723412752157817e-06, "loss": 0.5645, "step": 21635 }, { "epoch": 0.55, "grad_norm": 2.3757951259613037, "learning_rate": 8.722589405795057e-06, "loss": 0.5542, "step": 21636 }, { "epoch": 0.55, "grad_norm": 1.9693812131881714, "learning_rate": 8.721766068235404e-06, "loss": 0.4188, "step": 21637 }, { "epoch": 0.55, "grad_norm": 1.7295408248901367, "learning_rate": 8.720942739484542e-06, "loss": 0.4657, "step": 21638 }, { "epoch": 0.55, "grad_norm": 1.48289155960083, "learning_rate": 8.720119419548134e-06, "loss": 0.5655, "step": 21639 }, { "epoch": 0.55, "grad_norm": 1.345560908317566, "learning_rate": 8.71929610843186e-06, "loss": 0.668, "step": 21640 }, { "epoch": 0.55, "grad_norm": 3.1279847621917725, "learning_rate": 8.718472806141393e-06, "loss": 0.6784, "step": 21641 }, { "epoch": 0.55, "grad_norm": 1.5245060920715332, "learning_rate": 8.717649512682407e-06, "loss": 0.524, "step": 21642 }, { "epoch": 0.55, "grad_norm": 1.8001835346221924, "learning_rate": 8.716826228060576e-06, "loss": 0.5575, "step": 21643 }, { "epoch": 0.55, "grad_norm": 1.5936765670776367, "learning_rate": 8.716002952281568e-06, "loss": 0.5065, "step": 21644 }, { "epoch": 0.55, "grad_norm": 5.9017534255981445, "learning_rate": 8.715179685351065e-06, "loss": 0.6593, "step": 21645 }, { "epoch": 0.55, "grad_norm": 3.3326549530029297, "learning_rate": 8.714356427274736e-06, "loss": 0.6465, "step": 21646 }, { "epoch": 0.55, "grad_norm": 1.8393588066101074, "learning_rate": 8.713533178058253e-06, "loss": 0.6049, "step": 21647 }, { "epoch": 0.55, "grad_norm": 1.1858066320419312, "learning_rate": 8.712709937707292e-06, "loss": 0.5228, "step": 21648 }, { "epoch": 0.55, "grad_norm": 0.9732711315155029, "learning_rate": 8.711886706227525e-06, "loss": 0.4843, "step": 21649 }, { "epoch": 0.55, "grad_norm": 1.996240496635437, "learning_rate": 8.711063483624626e-06, "loss": 0.6813, "step": 21650 }, { "epoch": 0.55, "grad_norm": 4.611724376678467, "learning_rate": 8.710240269904268e-06, "loss": 0.6303, "step": 21651 }, { "epoch": 0.55, "grad_norm": 1.4304351806640625, "learning_rate": 8.709417065072122e-06, "loss": 0.4891, "step": 21652 }, { "epoch": 0.55, "grad_norm": 3.3425347805023193, "learning_rate": 8.708593869133866e-06, "loss": 0.5587, "step": 21653 }, { "epoch": 0.56, "grad_norm": 2.030365467071533, "learning_rate": 8.707770682095168e-06, "loss": 0.479, "step": 21654 }, { "epoch": 0.56, "grad_norm": 2.990316867828369, "learning_rate": 8.706947503961703e-06, "loss": 0.5581, "step": 21655 }, { "epoch": 0.56, "grad_norm": 1.5727417469024658, "learning_rate": 8.70612433473914e-06, "loss": 0.6627, "step": 21656 }, { "epoch": 0.56, "grad_norm": 2.908640146255493, "learning_rate": 8.705301174433158e-06, "loss": 0.6408, "step": 21657 }, { "epoch": 0.56, "grad_norm": 0.9288195371627808, "learning_rate": 8.70447802304943e-06, "loss": 0.4565, "step": 21658 }, { "epoch": 0.56, "grad_norm": 0.9984347224235535, "learning_rate": 8.703654880593621e-06, "loss": 0.575, "step": 21659 }, { "epoch": 0.56, "grad_norm": 1.6501998901367188, "learning_rate": 8.70283174707141e-06, "loss": 0.4727, "step": 21660 }, { "epoch": 0.56, "grad_norm": 3.5882744789123535, "learning_rate": 8.702008622488468e-06, "loss": 0.7076, "step": 21661 }, { "epoch": 0.56, "grad_norm": 1.1221274137496948, "learning_rate": 8.701185506850465e-06, "loss": 0.5114, "step": 21662 }, { "epoch": 0.56, "grad_norm": 1.110358476638794, "learning_rate": 8.700362400163079e-06, "loss": 0.5214, "step": 21663 }, { "epoch": 0.56, "grad_norm": 1.6289366483688354, "learning_rate": 8.699539302431974e-06, "loss": 0.4324, "step": 21664 }, { "epoch": 0.56, "grad_norm": 1.485718846321106, "learning_rate": 8.698716213662832e-06, "loss": 0.6407, "step": 21665 }, { "epoch": 0.56, "grad_norm": 2.5780997276306152, "learning_rate": 8.697893133861317e-06, "loss": 0.391, "step": 21666 }, { "epoch": 0.56, "grad_norm": 1.6970551013946533, "learning_rate": 8.697070063033107e-06, "loss": 0.4801, "step": 21667 }, { "epoch": 0.56, "grad_norm": 2.301178455352783, "learning_rate": 8.69624700118387e-06, "loss": 0.5489, "step": 21668 }, { "epoch": 0.56, "grad_norm": 1.479283094406128, "learning_rate": 8.695423948319278e-06, "loss": 0.4959, "step": 21669 }, { "epoch": 0.56, "grad_norm": 3.5455493927001953, "learning_rate": 8.69460090444501e-06, "loss": 0.5601, "step": 21670 }, { "epoch": 0.56, "grad_norm": 1.5959296226501465, "learning_rate": 8.69377786956673e-06, "loss": 0.5152, "step": 21671 }, { "epoch": 0.56, "grad_norm": 1.1373627185821533, "learning_rate": 8.692954843690108e-06, "loss": 0.3679, "step": 21672 }, { "epoch": 0.56, "grad_norm": 1.653356671333313, "learning_rate": 8.692131826820826e-06, "loss": 0.5309, "step": 21673 }, { "epoch": 0.56, "grad_norm": 1.9387407302856445, "learning_rate": 8.691308818964545e-06, "loss": 0.6863, "step": 21674 }, { "epoch": 0.56, "grad_norm": 3.0715646743774414, "learning_rate": 8.690485820126942e-06, "loss": 0.6354, "step": 21675 }, { "epoch": 0.56, "grad_norm": 1.4154893159866333, "learning_rate": 8.689662830313691e-06, "loss": 0.4942, "step": 21676 }, { "epoch": 0.56, "grad_norm": 1.0830973386764526, "learning_rate": 8.688839849530458e-06, "loss": 0.506, "step": 21677 }, { "epoch": 0.56, "grad_norm": 5.033221244812012, "learning_rate": 8.688016877782919e-06, "loss": 0.6473, "step": 21678 }, { "epoch": 0.56, "grad_norm": 2.9995689392089844, "learning_rate": 8.687193915076742e-06, "loss": 0.7135, "step": 21679 }, { "epoch": 0.56, "grad_norm": 1.3692879676818848, "learning_rate": 8.686370961417601e-06, "loss": 0.5074, "step": 21680 }, { "epoch": 0.56, "grad_norm": 2.3604092597961426, "learning_rate": 8.685548016811164e-06, "loss": 0.6319, "step": 21681 }, { "epoch": 0.56, "grad_norm": 1.8988136053085327, "learning_rate": 8.684725081263103e-06, "loss": 0.7124, "step": 21682 }, { "epoch": 0.56, "grad_norm": 7.0529255867004395, "learning_rate": 8.683902154779095e-06, "loss": 0.4754, "step": 21683 }, { "epoch": 0.56, "grad_norm": 3.5295586585998535, "learning_rate": 8.683079237364803e-06, "loss": 0.638, "step": 21684 }, { "epoch": 0.56, "grad_norm": 2.1153135299682617, "learning_rate": 8.682256329025902e-06, "loss": 0.6227, "step": 21685 }, { "epoch": 0.56, "grad_norm": 1.7985879182815552, "learning_rate": 8.681433429768063e-06, "loss": 0.6373, "step": 21686 }, { "epoch": 0.56, "grad_norm": 1.9054368734359741, "learning_rate": 8.680610539596957e-06, "loss": 0.6296, "step": 21687 }, { "epoch": 0.56, "grad_norm": 2.0469913482666016, "learning_rate": 8.679787658518256e-06, "loss": 0.8115, "step": 21688 }, { "epoch": 0.56, "grad_norm": 3.886354684829712, "learning_rate": 8.678964786537625e-06, "loss": 0.5753, "step": 21689 }, { "epoch": 0.56, "grad_norm": 1.88274085521698, "learning_rate": 8.678141923660741e-06, "loss": 0.671, "step": 21690 }, { "epoch": 0.56, "grad_norm": 4.108698844909668, "learning_rate": 8.67731906989327e-06, "loss": 0.5554, "step": 21691 }, { "epoch": 0.56, "grad_norm": 1.6903120279312134, "learning_rate": 8.676496225240887e-06, "loss": 0.5035, "step": 21692 }, { "epoch": 0.56, "grad_norm": 3.287700891494751, "learning_rate": 8.675673389709261e-06, "loss": 0.5329, "step": 21693 }, { "epoch": 0.56, "grad_norm": 3.1302602291107178, "learning_rate": 8.67485056330406e-06, "loss": 0.5805, "step": 21694 }, { "epoch": 0.56, "grad_norm": 0.9906175136566162, "learning_rate": 8.674027746030959e-06, "loss": 0.5385, "step": 21695 }, { "epoch": 0.56, "grad_norm": 2.3467214107513428, "learning_rate": 8.673204937895625e-06, "loss": 0.5716, "step": 21696 }, { "epoch": 0.56, "grad_norm": 1.1823726892471313, "learning_rate": 8.67238213890373e-06, "loss": 0.6024, "step": 21697 }, { "epoch": 0.56, "grad_norm": 7.9667510986328125, "learning_rate": 8.671559349060939e-06, "loss": 0.498, "step": 21698 }, { "epoch": 0.56, "grad_norm": 2.235260486602783, "learning_rate": 8.670736568372928e-06, "loss": 0.5551, "step": 21699 }, { "epoch": 0.56, "grad_norm": 6.406453609466553, "learning_rate": 8.669913796845368e-06, "loss": 0.631, "step": 21700 }, { "epoch": 0.56, "grad_norm": 1.5155613422393799, "learning_rate": 8.669091034483926e-06, "loss": 0.4621, "step": 21701 }, { "epoch": 0.56, "grad_norm": 1.6302489042282104, "learning_rate": 8.66826828129427e-06, "loss": 0.5564, "step": 21702 }, { "epoch": 0.56, "grad_norm": 1.3665879964828491, "learning_rate": 8.667445537282074e-06, "loss": 0.5465, "step": 21703 }, { "epoch": 0.56, "grad_norm": 1.7408055067062378, "learning_rate": 8.666622802453005e-06, "loss": 0.5767, "step": 21704 }, { "epoch": 0.56, "grad_norm": 1.8656519651412964, "learning_rate": 8.665800076812737e-06, "loss": 0.5615, "step": 21705 }, { "epoch": 0.56, "grad_norm": 1.325760841369629, "learning_rate": 8.664977360366933e-06, "loss": 0.6002, "step": 21706 }, { "epoch": 0.56, "grad_norm": 15.065065383911133, "learning_rate": 8.664154653121268e-06, "loss": 0.4876, "step": 21707 }, { "epoch": 0.56, "grad_norm": 1.1129133701324463, "learning_rate": 8.663331955081409e-06, "loss": 0.5195, "step": 21708 }, { "epoch": 0.56, "grad_norm": 6.323650360107422, "learning_rate": 8.662509266253028e-06, "loss": 0.6451, "step": 21709 }, { "epoch": 0.56, "grad_norm": 2.09016752243042, "learning_rate": 8.66168658664179e-06, "loss": 0.5193, "step": 21710 }, { "epoch": 0.56, "grad_norm": 1.6280577182769775, "learning_rate": 8.66086391625337e-06, "loss": 0.4813, "step": 21711 }, { "epoch": 0.56, "grad_norm": 1.6683982610702515, "learning_rate": 8.660041255093434e-06, "loss": 0.4964, "step": 21712 }, { "epoch": 0.56, "grad_norm": 1.2587591409683228, "learning_rate": 8.659218603167651e-06, "loss": 0.4205, "step": 21713 }, { "epoch": 0.56, "grad_norm": 2.237358331680298, "learning_rate": 8.658395960481692e-06, "loss": 0.6543, "step": 21714 }, { "epoch": 0.56, "grad_norm": 1.5920826196670532, "learning_rate": 8.657573327041224e-06, "loss": 0.5295, "step": 21715 }, { "epoch": 0.56, "grad_norm": 1.2644844055175781, "learning_rate": 8.656750702851917e-06, "loss": 0.5663, "step": 21716 }, { "epoch": 0.56, "grad_norm": 4.441897869110107, "learning_rate": 8.655928087919443e-06, "loss": 0.6399, "step": 21717 }, { "epoch": 0.56, "grad_norm": 3.432828664779663, "learning_rate": 8.655105482249464e-06, "loss": 0.5981, "step": 21718 }, { "epoch": 0.56, "grad_norm": 2.063143253326416, "learning_rate": 8.654282885847656e-06, "loss": 0.665, "step": 21719 }, { "epoch": 0.56, "grad_norm": 1.900519847869873, "learning_rate": 8.653460298719684e-06, "loss": 0.55, "step": 21720 }, { "epoch": 0.56, "grad_norm": 1.3227535486221313, "learning_rate": 8.65263772087122e-06, "loss": 0.6785, "step": 21721 }, { "epoch": 0.56, "grad_norm": 1.266626238822937, "learning_rate": 8.651815152307927e-06, "loss": 0.5362, "step": 21722 }, { "epoch": 0.56, "grad_norm": 2.017765998840332, "learning_rate": 8.650992593035477e-06, "loss": 0.6195, "step": 21723 }, { "epoch": 0.56, "grad_norm": 1.1323237419128418, "learning_rate": 8.650170043059542e-06, "loss": 0.4765, "step": 21724 }, { "epoch": 0.56, "grad_norm": 1.5032620429992676, "learning_rate": 8.649347502385784e-06, "loss": 0.5208, "step": 21725 }, { "epoch": 0.56, "grad_norm": 1.3479300737380981, "learning_rate": 8.648524971019876e-06, "loss": 0.4925, "step": 21726 }, { "epoch": 0.56, "grad_norm": 1.1165531873703003, "learning_rate": 8.647702448967482e-06, "loss": 0.4143, "step": 21727 }, { "epoch": 0.56, "grad_norm": 1.422698974609375, "learning_rate": 8.646879936234274e-06, "loss": 0.6695, "step": 21728 }, { "epoch": 0.56, "grad_norm": 1.1806732416152954, "learning_rate": 8.646057432825922e-06, "loss": 0.5247, "step": 21729 }, { "epoch": 0.56, "grad_norm": 1.2668758630752563, "learning_rate": 8.645234938748089e-06, "loss": 0.5333, "step": 21730 }, { "epoch": 0.56, "grad_norm": 3.8749303817749023, "learning_rate": 8.644412454006447e-06, "loss": 0.4392, "step": 21731 }, { "epoch": 0.56, "grad_norm": 1.719598650932312, "learning_rate": 8.643589978606664e-06, "loss": 0.5012, "step": 21732 }, { "epoch": 0.56, "grad_norm": 1.5221210718154907, "learning_rate": 8.642767512554403e-06, "loss": 0.5444, "step": 21733 }, { "epoch": 0.56, "grad_norm": 1.7564489841461182, "learning_rate": 8.641945055855339e-06, "loss": 0.6326, "step": 21734 }, { "epoch": 0.56, "grad_norm": 6.172165393829346, "learning_rate": 8.641122608515135e-06, "loss": 0.8233, "step": 21735 }, { "epoch": 0.56, "grad_norm": 1.6128650903701782, "learning_rate": 8.640300170539461e-06, "loss": 0.5424, "step": 21736 }, { "epoch": 0.56, "grad_norm": 1.0386496782302856, "learning_rate": 8.639477741933983e-06, "loss": 0.5607, "step": 21737 }, { "epoch": 0.56, "grad_norm": 5.445272445678711, "learning_rate": 8.63865532270437e-06, "loss": 0.5562, "step": 21738 }, { "epoch": 0.56, "grad_norm": 1.8064368963241577, "learning_rate": 8.637832912856289e-06, "loss": 0.6239, "step": 21739 }, { "epoch": 0.56, "grad_norm": 1.0775285959243774, "learning_rate": 8.637010512395406e-06, "loss": 0.4083, "step": 21740 }, { "epoch": 0.56, "grad_norm": 1.3628650903701782, "learning_rate": 8.636188121327394e-06, "loss": 0.5515, "step": 21741 }, { "epoch": 0.56, "grad_norm": 1.8826515674591064, "learning_rate": 8.635365739657916e-06, "loss": 0.475, "step": 21742 }, { "epoch": 0.56, "grad_norm": 1.3420971632003784, "learning_rate": 8.634543367392637e-06, "loss": 0.5245, "step": 21743 }, { "epoch": 0.56, "grad_norm": 1.9573498964309692, "learning_rate": 8.63372100453723e-06, "loss": 0.4595, "step": 21744 }, { "epoch": 0.56, "grad_norm": 2.6362383365631104, "learning_rate": 8.632898651097357e-06, "loss": 0.575, "step": 21745 }, { "epoch": 0.56, "grad_norm": 4.957082271575928, "learning_rate": 8.63207630707869e-06, "loss": 0.5488, "step": 21746 }, { "epoch": 0.56, "grad_norm": 3.094545602798462, "learning_rate": 8.631253972486893e-06, "loss": 0.3797, "step": 21747 }, { "epoch": 0.56, "grad_norm": 1.6569116115570068, "learning_rate": 8.630431647327635e-06, "loss": 0.6058, "step": 21748 }, { "epoch": 0.56, "grad_norm": 2.0116970539093018, "learning_rate": 8.62960933160658e-06, "loss": 0.4811, "step": 21749 }, { "epoch": 0.56, "grad_norm": 1.078488826751709, "learning_rate": 8.628787025329398e-06, "loss": 0.5589, "step": 21750 }, { "epoch": 0.56, "grad_norm": 10.5344820022583, "learning_rate": 8.627964728501751e-06, "loss": 0.5471, "step": 21751 }, { "epoch": 0.56, "grad_norm": 1.8375087976455688, "learning_rate": 8.62714244112931e-06, "loss": 0.5987, "step": 21752 }, { "epoch": 0.56, "grad_norm": 3.7256228923797607, "learning_rate": 8.626320163217744e-06, "loss": 0.7355, "step": 21753 }, { "epoch": 0.56, "grad_norm": 1.8578791618347168, "learning_rate": 8.625497894772713e-06, "loss": 0.7144, "step": 21754 }, { "epoch": 0.56, "grad_norm": 1.9280438423156738, "learning_rate": 8.624675635799888e-06, "loss": 0.7257, "step": 21755 }, { "epoch": 0.56, "grad_norm": 2.9925293922424316, "learning_rate": 8.623853386304934e-06, "loss": 0.6887, "step": 21756 }, { "epoch": 0.56, "grad_norm": 1.2179975509643555, "learning_rate": 8.623031146293518e-06, "loss": 0.5511, "step": 21757 }, { "epoch": 0.56, "grad_norm": 2.331772804260254, "learning_rate": 8.622208915771308e-06, "loss": 0.5988, "step": 21758 }, { "epoch": 0.56, "grad_norm": 1.6447930335998535, "learning_rate": 8.621386694743967e-06, "loss": 0.5219, "step": 21759 }, { "epoch": 0.56, "grad_norm": 0.9997071623802185, "learning_rate": 8.620564483217165e-06, "loss": 0.5482, "step": 21760 }, { "epoch": 0.56, "grad_norm": 1.9272356033325195, "learning_rate": 8.619742281196563e-06, "loss": 0.6562, "step": 21761 }, { "epoch": 0.56, "grad_norm": 1.5640875101089478, "learning_rate": 8.618920088687832e-06, "loss": 0.457, "step": 21762 }, { "epoch": 0.56, "grad_norm": 1.1112693548202515, "learning_rate": 8.618097905696635e-06, "loss": 0.5614, "step": 21763 }, { "epoch": 0.56, "grad_norm": 1.646544337272644, "learning_rate": 8.617275732228637e-06, "loss": 0.5397, "step": 21764 }, { "epoch": 0.56, "grad_norm": 1.7860313653945923, "learning_rate": 8.61645356828951e-06, "loss": 0.4315, "step": 21765 }, { "epoch": 0.56, "grad_norm": 1.788812518119812, "learning_rate": 8.615631413884914e-06, "loss": 0.5118, "step": 21766 }, { "epoch": 0.56, "grad_norm": 3.4691479206085205, "learning_rate": 8.614809269020516e-06, "loss": 0.5922, "step": 21767 }, { "epoch": 0.56, "grad_norm": 1.8251159191131592, "learning_rate": 8.613987133701983e-06, "loss": 0.5794, "step": 21768 }, { "epoch": 0.56, "grad_norm": 19.802541732788086, "learning_rate": 8.613165007934978e-06, "loss": 0.6027, "step": 21769 }, { "epoch": 0.56, "grad_norm": 1.6661268472671509, "learning_rate": 8.612342891725172e-06, "loss": 0.4746, "step": 21770 }, { "epoch": 0.56, "grad_norm": 2.8348004817962646, "learning_rate": 8.611520785078225e-06, "loss": 0.6786, "step": 21771 }, { "epoch": 0.56, "grad_norm": 1.8664804697036743, "learning_rate": 8.610698687999804e-06, "loss": 0.6179, "step": 21772 }, { "epoch": 0.56, "grad_norm": 2.5198473930358887, "learning_rate": 8.609876600495575e-06, "loss": 0.5067, "step": 21773 }, { "epoch": 0.56, "grad_norm": 1.7877274751663208, "learning_rate": 8.609054522571205e-06, "loss": 0.6078, "step": 21774 }, { "epoch": 0.56, "grad_norm": 1.676620602607727, "learning_rate": 8.608232454232354e-06, "loss": 0.615, "step": 21775 }, { "epoch": 0.56, "grad_norm": 4.508011341094971, "learning_rate": 8.607410395484691e-06, "loss": 0.5209, "step": 21776 }, { "epoch": 0.56, "grad_norm": 0.94298255443573, "learning_rate": 8.606588346333883e-06, "loss": 0.5006, "step": 21777 }, { "epoch": 0.56, "grad_norm": 1.0692709684371948, "learning_rate": 8.605766306785593e-06, "loss": 0.5601, "step": 21778 }, { "epoch": 0.56, "grad_norm": 2.7241787910461426, "learning_rate": 8.604944276845485e-06, "loss": 0.5933, "step": 21779 }, { "epoch": 0.56, "grad_norm": 25.473251342773438, "learning_rate": 8.604122256519223e-06, "loss": 0.4248, "step": 21780 }, { "epoch": 0.56, "grad_norm": 1.787765622138977, "learning_rate": 8.603300245812473e-06, "loss": 0.4258, "step": 21781 }, { "epoch": 0.56, "grad_norm": 1.2457419633865356, "learning_rate": 8.602478244730903e-06, "loss": 0.5116, "step": 21782 }, { "epoch": 0.56, "grad_norm": 2.0806915760040283, "learning_rate": 8.601656253280172e-06, "loss": 0.6022, "step": 21783 }, { "epoch": 0.56, "grad_norm": 1.8220281600952148, "learning_rate": 8.60083427146595e-06, "loss": 0.6066, "step": 21784 }, { "epoch": 0.56, "grad_norm": 2.1671454906463623, "learning_rate": 8.600012299293898e-06, "loss": 0.6567, "step": 21785 }, { "epoch": 0.56, "grad_norm": 0.8878040313720703, "learning_rate": 8.599190336769681e-06, "loss": 0.5714, "step": 21786 }, { "epoch": 0.56, "grad_norm": 3.039006233215332, "learning_rate": 8.598368383898967e-06, "loss": 0.6606, "step": 21787 }, { "epoch": 0.56, "grad_norm": 1.2409213781356812, "learning_rate": 8.597546440687415e-06, "loss": 0.527, "step": 21788 }, { "epoch": 0.56, "grad_norm": 5.480368137359619, "learning_rate": 8.596724507140692e-06, "loss": 0.7523, "step": 21789 }, { "epoch": 0.56, "grad_norm": 4.031874656677246, "learning_rate": 8.595902583264462e-06, "loss": 0.6509, "step": 21790 }, { "epoch": 0.56, "grad_norm": 2.74444580078125, "learning_rate": 8.59508066906439e-06, "loss": 0.5983, "step": 21791 }, { "epoch": 0.56, "grad_norm": 2.7178611755371094, "learning_rate": 8.59425876454614e-06, "loss": 0.6087, "step": 21792 }, { "epoch": 0.56, "grad_norm": 1.7731784582138062, "learning_rate": 8.593436869715373e-06, "loss": 0.5834, "step": 21793 }, { "epoch": 0.56, "grad_norm": 1.059876561164856, "learning_rate": 8.592614984577757e-06, "loss": 0.573, "step": 21794 }, { "epoch": 0.56, "grad_norm": 1.095832347869873, "learning_rate": 8.591793109138954e-06, "loss": 0.5148, "step": 21795 }, { "epoch": 0.56, "grad_norm": 1.4149208068847656, "learning_rate": 8.590971243404628e-06, "loss": 0.5426, "step": 21796 }, { "epoch": 0.56, "grad_norm": 2.164335250854492, "learning_rate": 8.590149387380444e-06, "loss": 0.5237, "step": 21797 }, { "epoch": 0.56, "grad_norm": 1.9946506023406982, "learning_rate": 8.589327541072063e-06, "loss": 0.5536, "step": 21798 }, { "epoch": 0.56, "grad_norm": 1.2618273496627808, "learning_rate": 8.588505704485152e-06, "loss": 0.608, "step": 21799 }, { "epoch": 0.56, "grad_norm": 1.2212629318237305, "learning_rate": 8.587683877625373e-06, "loss": 0.3706, "step": 21800 }, { "epoch": 0.56, "grad_norm": 1.8156192302703857, "learning_rate": 8.586862060498388e-06, "loss": 0.499, "step": 21801 }, { "epoch": 0.56, "grad_norm": 1.7282744646072388, "learning_rate": 8.586040253109862e-06, "loss": 0.5457, "step": 21802 }, { "epoch": 0.56, "grad_norm": 1.575518012046814, "learning_rate": 8.58521845546546e-06, "loss": 0.4564, "step": 21803 }, { "epoch": 0.56, "grad_norm": 2.7991318702697754, "learning_rate": 8.584396667570841e-06, "loss": 0.6267, "step": 21804 }, { "epoch": 0.56, "grad_norm": 2.040663719177246, "learning_rate": 8.583574889431672e-06, "loss": 0.56, "step": 21805 }, { "epoch": 0.56, "grad_norm": 1.1644287109375, "learning_rate": 8.582753121053615e-06, "loss": 0.3343, "step": 21806 }, { "epoch": 0.56, "grad_norm": 1.9422082901000977, "learning_rate": 8.581931362442333e-06, "loss": 0.6012, "step": 21807 }, { "epoch": 0.56, "grad_norm": 4.55574893951416, "learning_rate": 8.58110961360349e-06, "loss": 0.7055, "step": 21808 }, { "epoch": 0.56, "grad_norm": 1.1057181358337402, "learning_rate": 8.580287874542747e-06, "loss": 0.5465, "step": 21809 }, { "epoch": 0.56, "grad_norm": 4.228796005249023, "learning_rate": 8.579466145265766e-06, "loss": 0.8278, "step": 21810 }, { "epoch": 0.56, "grad_norm": 2.5278003215789795, "learning_rate": 8.578644425778217e-06, "loss": 0.4586, "step": 21811 }, { "epoch": 0.56, "grad_norm": 9.373123168945312, "learning_rate": 8.577822716085755e-06, "loss": 0.5443, "step": 21812 }, { "epoch": 0.56, "grad_norm": 1.7724664211273193, "learning_rate": 8.577001016194046e-06, "loss": 0.4915, "step": 21813 }, { "epoch": 0.56, "grad_norm": 1.105873703956604, "learning_rate": 8.576179326108749e-06, "loss": 0.4259, "step": 21814 }, { "epoch": 0.56, "grad_norm": 1.2843846082687378, "learning_rate": 8.575357645835534e-06, "loss": 0.5201, "step": 21815 }, { "epoch": 0.56, "grad_norm": 1.1440057754516602, "learning_rate": 8.574535975380056e-06, "loss": 0.695, "step": 21816 }, { "epoch": 0.56, "grad_norm": 1.72774338722229, "learning_rate": 8.573714314747979e-06, "loss": 0.7157, "step": 21817 }, { "epoch": 0.56, "grad_norm": 1.3878101110458374, "learning_rate": 8.572892663944972e-06, "loss": 0.4719, "step": 21818 }, { "epoch": 0.56, "grad_norm": 2.144404649734497, "learning_rate": 8.572071022976688e-06, "loss": 0.4848, "step": 21819 }, { "epoch": 0.56, "grad_norm": 1.4209930896759033, "learning_rate": 8.571249391848796e-06, "loss": 0.5024, "step": 21820 }, { "epoch": 0.56, "grad_norm": 2.929922342300415, "learning_rate": 8.570427770566954e-06, "loss": 0.7031, "step": 21821 }, { "epoch": 0.56, "grad_norm": 1.8667335510253906, "learning_rate": 8.569606159136825e-06, "loss": 0.4463, "step": 21822 }, { "epoch": 0.56, "grad_norm": 5.719899654388428, "learning_rate": 8.568784557564072e-06, "loss": 0.5425, "step": 21823 }, { "epoch": 0.56, "grad_norm": 3.9039459228515625, "learning_rate": 8.567962965854357e-06, "loss": 0.6268, "step": 21824 }, { "epoch": 0.56, "grad_norm": 1.4419589042663574, "learning_rate": 8.567141384013344e-06, "loss": 0.5605, "step": 21825 }, { "epoch": 0.56, "grad_norm": 1.2660771608352661, "learning_rate": 8.566319812046688e-06, "loss": 0.6388, "step": 21826 }, { "epoch": 0.56, "grad_norm": 1.6038964986801147, "learning_rate": 8.565498249960057e-06, "loss": 0.6858, "step": 21827 }, { "epoch": 0.56, "grad_norm": 1.2205132246017456, "learning_rate": 8.564676697759112e-06, "loss": 0.5694, "step": 21828 }, { "epoch": 0.56, "grad_norm": 2.572434425354004, "learning_rate": 8.563855155449512e-06, "loss": 0.5973, "step": 21829 }, { "epoch": 0.56, "grad_norm": 4.373045921325684, "learning_rate": 8.563033623036922e-06, "loss": 0.6366, "step": 21830 }, { "epoch": 0.56, "grad_norm": 1.6815264225006104, "learning_rate": 8.562212100526999e-06, "loss": 0.5414, "step": 21831 }, { "epoch": 0.56, "grad_norm": 4.109041690826416, "learning_rate": 8.56139058792541e-06, "loss": 0.5288, "step": 21832 }, { "epoch": 0.56, "grad_norm": 1.4205058813095093, "learning_rate": 8.56056908523781e-06, "loss": 0.5658, "step": 21833 }, { "epoch": 0.56, "grad_norm": 2.213350534439087, "learning_rate": 8.559747592469864e-06, "loss": 0.5581, "step": 21834 }, { "epoch": 0.56, "grad_norm": 2.9207255840301514, "learning_rate": 8.558926109627234e-06, "loss": 0.4907, "step": 21835 }, { "epoch": 0.56, "grad_norm": 0.9797150492668152, "learning_rate": 8.558104636715579e-06, "loss": 0.4236, "step": 21836 }, { "epoch": 0.56, "grad_norm": 1.781714916229248, "learning_rate": 8.557283173740563e-06, "loss": 0.6669, "step": 21837 }, { "epoch": 0.56, "grad_norm": 1.9682765007019043, "learning_rate": 8.556461720707841e-06, "loss": 0.6748, "step": 21838 }, { "epoch": 0.56, "grad_norm": 1.340129017829895, "learning_rate": 8.55564027762308e-06, "loss": 0.6295, "step": 21839 }, { "epoch": 0.56, "grad_norm": 1.814414143562317, "learning_rate": 8.55481884449194e-06, "loss": 0.4644, "step": 21840 }, { "epoch": 0.56, "grad_norm": 1.1912872791290283, "learning_rate": 8.553997421320078e-06, "loss": 0.5434, "step": 21841 }, { "epoch": 0.56, "grad_norm": 1.9024345874786377, "learning_rate": 8.55317600811316e-06, "loss": 0.5015, "step": 21842 }, { "epoch": 0.56, "grad_norm": 1.1173678636550903, "learning_rate": 8.552354604876843e-06, "loss": 0.4077, "step": 21843 }, { "epoch": 0.56, "grad_norm": 2.343782901763916, "learning_rate": 8.551533211616789e-06, "loss": 0.6553, "step": 21844 }, { "epoch": 0.56, "grad_norm": 1.5446350574493408, "learning_rate": 8.550711828338655e-06, "loss": 0.6804, "step": 21845 }, { "epoch": 0.56, "grad_norm": 1.9041637182235718, "learning_rate": 8.549890455048107e-06, "loss": 0.6638, "step": 21846 }, { "epoch": 0.56, "grad_norm": 3.1334447860717773, "learning_rate": 8.549069091750805e-06, "loss": 0.6803, "step": 21847 }, { "epoch": 0.56, "grad_norm": 2.680253028869629, "learning_rate": 8.548247738452404e-06, "loss": 0.5023, "step": 21848 }, { "epoch": 0.56, "grad_norm": 2.8805294036865234, "learning_rate": 8.54742639515857e-06, "loss": 0.5787, "step": 21849 }, { "epoch": 0.56, "grad_norm": 2.1587467193603516, "learning_rate": 8.546605061874958e-06, "loss": 0.7069, "step": 21850 }, { "epoch": 0.56, "grad_norm": 3.5033199787139893, "learning_rate": 8.545783738607232e-06, "loss": 0.6084, "step": 21851 }, { "epoch": 0.56, "grad_norm": 1.2223471403121948, "learning_rate": 8.544962425361052e-06, "loss": 0.5791, "step": 21852 }, { "epoch": 0.56, "grad_norm": 1.150020718574524, "learning_rate": 8.544141122142074e-06, "loss": 0.4809, "step": 21853 }, { "epoch": 0.56, "grad_norm": 9.110395431518555, "learning_rate": 8.543319828955965e-06, "loss": 0.8007, "step": 21854 }, { "epoch": 0.56, "grad_norm": 4.3302717208862305, "learning_rate": 8.542498545808376e-06, "loss": 0.588, "step": 21855 }, { "epoch": 0.56, "grad_norm": 1.0042033195495605, "learning_rate": 8.541677272704975e-06, "loss": 0.5685, "step": 21856 }, { "epoch": 0.56, "grad_norm": 1.4633054733276367, "learning_rate": 8.540856009651416e-06, "loss": 0.5857, "step": 21857 }, { "epoch": 0.56, "grad_norm": 2.8123059272766113, "learning_rate": 8.54003475665336e-06, "loss": 0.5277, "step": 21858 }, { "epoch": 0.56, "grad_norm": 1.867074728012085, "learning_rate": 8.53921351371647e-06, "loss": 0.5465, "step": 21859 }, { "epoch": 0.56, "grad_norm": 1.6065226793289185, "learning_rate": 8.5383922808464e-06, "loss": 0.499, "step": 21860 }, { "epoch": 0.56, "grad_norm": 2.508481025695801, "learning_rate": 8.537571058048815e-06, "loss": 0.5874, "step": 21861 }, { "epoch": 0.56, "grad_norm": 1.016858696937561, "learning_rate": 8.536749845329369e-06, "loss": 0.5442, "step": 21862 }, { "epoch": 0.56, "grad_norm": 1.4254264831542969, "learning_rate": 8.535928642693722e-06, "loss": 0.5921, "step": 21863 }, { "epoch": 0.56, "grad_norm": 1.6355316638946533, "learning_rate": 8.53510745014754e-06, "loss": 0.4212, "step": 21864 }, { "epoch": 0.56, "grad_norm": 3.874250888824463, "learning_rate": 8.534286267696473e-06, "loss": 0.5752, "step": 21865 }, { "epoch": 0.56, "grad_norm": 1.4541168212890625, "learning_rate": 8.533465095346187e-06, "loss": 0.4722, "step": 21866 }, { "epoch": 0.56, "grad_norm": 1.5541958808898926, "learning_rate": 8.532643933102336e-06, "loss": 0.6542, "step": 21867 }, { "epoch": 0.56, "grad_norm": 2.798698663711548, "learning_rate": 8.531822780970583e-06, "loss": 0.632, "step": 21868 }, { "epoch": 0.56, "grad_norm": 1.580343246459961, "learning_rate": 8.531001638956583e-06, "loss": 0.4911, "step": 21869 }, { "epoch": 0.56, "grad_norm": 1.5151479244232178, "learning_rate": 8.530180507065996e-06, "loss": 0.6245, "step": 21870 }, { "epoch": 0.56, "grad_norm": 1.7326520681381226, "learning_rate": 8.529359385304486e-06, "loss": 0.6277, "step": 21871 }, { "epoch": 0.56, "grad_norm": 1.5933693647384644, "learning_rate": 8.528538273677703e-06, "loss": 0.591, "step": 21872 }, { "epoch": 0.56, "grad_norm": 2.318250894546509, "learning_rate": 8.527717172191313e-06, "loss": 0.4564, "step": 21873 }, { "epoch": 0.56, "grad_norm": 1.4720494747161865, "learning_rate": 8.526896080850969e-06, "loss": 0.5507, "step": 21874 }, { "epoch": 0.56, "grad_norm": 2.65238881111145, "learning_rate": 8.52607499966233e-06, "loss": 0.5389, "step": 21875 }, { "epoch": 0.56, "grad_norm": 3.015653371810913, "learning_rate": 8.52525392863106e-06, "loss": 0.498, "step": 21876 }, { "epoch": 0.56, "grad_norm": 1.76327383518219, "learning_rate": 8.524432867762812e-06, "loss": 0.5544, "step": 21877 }, { "epoch": 0.56, "grad_norm": 1.872604489326477, "learning_rate": 8.523611817063247e-06, "loss": 0.6334, "step": 21878 }, { "epoch": 0.56, "grad_norm": 1.1390538215637207, "learning_rate": 8.522790776538019e-06, "loss": 0.5512, "step": 21879 }, { "epoch": 0.56, "grad_norm": 3.842639446258545, "learning_rate": 8.521969746192788e-06, "loss": 0.6533, "step": 21880 }, { "epoch": 0.56, "grad_norm": 1.2315014600753784, "learning_rate": 8.521148726033217e-06, "loss": 0.4368, "step": 21881 }, { "epoch": 0.56, "grad_norm": 2.3709523677825928, "learning_rate": 8.520327716064956e-06, "loss": 0.6206, "step": 21882 }, { "epoch": 0.56, "grad_norm": 3.014634609222412, "learning_rate": 8.519506716293671e-06, "loss": 0.5786, "step": 21883 }, { "epoch": 0.56, "grad_norm": 1.5975819826126099, "learning_rate": 8.518685726725011e-06, "loss": 0.5637, "step": 21884 }, { "epoch": 0.56, "grad_norm": 2.5107429027557373, "learning_rate": 8.517864747364642e-06, "loss": 0.5451, "step": 21885 }, { "epoch": 0.56, "grad_norm": 2.6457223892211914, "learning_rate": 8.517043778218216e-06, "loss": 0.5668, "step": 21886 }, { "epoch": 0.56, "grad_norm": 3.022031307220459, "learning_rate": 8.516222819291392e-06, "loss": 0.7251, "step": 21887 }, { "epoch": 0.56, "grad_norm": 1.258547306060791, "learning_rate": 8.51540187058983e-06, "loss": 0.5034, "step": 21888 }, { "epoch": 0.56, "grad_norm": 2.17354416847229, "learning_rate": 8.514580932119186e-06, "loss": 0.5387, "step": 21889 }, { "epoch": 0.56, "grad_norm": 3.8601510524749756, "learning_rate": 8.513760003885117e-06, "loss": 0.602, "step": 21890 }, { "epoch": 0.56, "grad_norm": 2.409926176071167, "learning_rate": 8.512939085893277e-06, "loss": 0.6696, "step": 21891 }, { "epoch": 0.56, "grad_norm": 1.5396735668182373, "learning_rate": 8.512118178149328e-06, "loss": 0.3948, "step": 21892 }, { "epoch": 0.56, "grad_norm": 1.3079215288162231, "learning_rate": 8.511297280658929e-06, "loss": 0.4247, "step": 21893 }, { "epoch": 0.56, "grad_norm": 1.9995585680007935, "learning_rate": 8.510476393427729e-06, "loss": 0.6174, "step": 21894 }, { "epoch": 0.56, "grad_norm": 1.724185585975647, "learning_rate": 8.509655516461393e-06, "loss": 0.5987, "step": 21895 }, { "epoch": 0.56, "grad_norm": 1.491565465927124, "learning_rate": 8.508834649765574e-06, "loss": 0.4653, "step": 21896 }, { "epoch": 0.56, "grad_norm": 2.4063565731048584, "learning_rate": 8.50801379334593e-06, "loss": 0.5247, "step": 21897 }, { "epoch": 0.56, "grad_norm": 2.797563314437866, "learning_rate": 8.507192947208116e-06, "loss": 0.5989, "step": 21898 }, { "epoch": 0.56, "grad_norm": 1.5642272233963013, "learning_rate": 8.50637211135779e-06, "loss": 0.672, "step": 21899 }, { "epoch": 0.56, "grad_norm": 1.7267462015151978, "learning_rate": 8.505551285800614e-06, "loss": 0.464, "step": 21900 }, { "epoch": 0.56, "grad_norm": 9.01020622253418, "learning_rate": 8.504730470542236e-06, "loss": 0.697, "step": 21901 }, { "epoch": 0.56, "grad_norm": 1.2417916059494019, "learning_rate": 8.503909665588317e-06, "loss": 0.4076, "step": 21902 }, { "epoch": 0.56, "grad_norm": 1.2232115268707275, "learning_rate": 8.50308887094451e-06, "loss": 0.5471, "step": 21903 }, { "epoch": 0.56, "grad_norm": 1.2715739011764526, "learning_rate": 8.502268086616477e-06, "loss": 0.6341, "step": 21904 }, { "epoch": 0.56, "grad_norm": 1.3749945163726807, "learning_rate": 8.50144731260987e-06, "loss": 0.3557, "step": 21905 }, { "epoch": 0.56, "grad_norm": 2.2185230255126953, "learning_rate": 8.500626548930348e-06, "loss": 0.5805, "step": 21906 }, { "epoch": 0.56, "grad_norm": 1.2563532590866089, "learning_rate": 8.499805795583566e-06, "loss": 0.5171, "step": 21907 }, { "epoch": 0.56, "grad_norm": 6.220854759216309, "learning_rate": 8.498985052575178e-06, "loss": 0.6678, "step": 21908 }, { "epoch": 0.56, "grad_norm": 5.726404666900635, "learning_rate": 8.498164319910845e-06, "loss": 0.7314, "step": 21909 }, { "epoch": 0.56, "grad_norm": 1.947147250175476, "learning_rate": 8.497343597596217e-06, "loss": 0.5223, "step": 21910 }, { "epoch": 0.56, "grad_norm": 1.6883410215377808, "learning_rate": 8.496522885636952e-06, "loss": 0.6018, "step": 21911 }, { "epoch": 0.56, "grad_norm": 2.539680242538452, "learning_rate": 8.49570218403871e-06, "loss": 0.6186, "step": 21912 }, { "epoch": 0.56, "grad_norm": 3.3116517066955566, "learning_rate": 8.494881492807144e-06, "loss": 0.5325, "step": 21913 }, { "epoch": 0.56, "grad_norm": 6.181703090667725, "learning_rate": 8.494060811947907e-06, "loss": 0.4066, "step": 21914 }, { "epoch": 0.56, "grad_norm": 6.544204235076904, "learning_rate": 8.493240141466658e-06, "loss": 0.745, "step": 21915 }, { "epoch": 0.56, "grad_norm": 1.4260574579238892, "learning_rate": 8.49241948136905e-06, "loss": 0.4822, "step": 21916 }, { "epoch": 0.56, "grad_norm": 2.533884286880493, "learning_rate": 8.491598831660737e-06, "loss": 0.5217, "step": 21917 }, { "epoch": 0.56, "grad_norm": 2.159712791442871, "learning_rate": 8.490778192347382e-06, "loss": 0.4247, "step": 21918 }, { "epoch": 0.56, "grad_norm": 1.4315239191055298, "learning_rate": 8.489957563434633e-06, "loss": 0.5847, "step": 21919 }, { "epoch": 0.56, "grad_norm": 1.6436303853988647, "learning_rate": 8.48913694492815e-06, "loss": 0.3741, "step": 21920 }, { "epoch": 0.56, "grad_norm": 1.625984787940979, "learning_rate": 8.488316336833583e-06, "loss": 0.5875, "step": 21921 }, { "epoch": 0.56, "grad_norm": 1.1269460916519165, "learning_rate": 8.48749573915659e-06, "loss": 0.4991, "step": 21922 }, { "epoch": 0.56, "grad_norm": 1.3802696466445923, "learning_rate": 8.48667515190283e-06, "loss": 0.5464, "step": 21923 }, { "epoch": 0.56, "grad_norm": 1.317262053489685, "learning_rate": 8.48585457507795e-06, "loss": 0.4806, "step": 21924 }, { "epoch": 0.56, "grad_norm": 3.3292524814605713, "learning_rate": 8.485034008687613e-06, "loss": 0.4746, "step": 21925 }, { "epoch": 0.56, "grad_norm": 1.069754958152771, "learning_rate": 8.484213452737467e-06, "loss": 0.5848, "step": 21926 }, { "epoch": 0.56, "grad_norm": 1.763715147972107, "learning_rate": 8.48339290723317e-06, "loss": 0.6236, "step": 21927 }, { "epoch": 0.56, "grad_norm": 2.235690116882324, "learning_rate": 8.482572372180376e-06, "loss": 0.6182, "step": 21928 }, { "epoch": 0.56, "grad_norm": 1.6785942316055298, "learning_rate": 8.481751847584738e-06, "loss": 0.4681, "step": 21929 }, { "epoch": 0.56, "grad_norm": 6.678755283355713, "learning_rate": 8.480931333451916e-06, "loss": 0.5476, "step": 21930 }, { "epoch": 0.56, "grad_norm": 3.7990875244140625, "learning_rate": 8.480110829787559e-06, "loss": 0.6537, "step": 21931 }, { "epoch": 0.56, "grad_norm": 1.1863632202148438, "learning_rate": 8.479290336597325e-06, "loss": 0.4486, "step": 21932 }, { "epoch": 0.56, "grad_norm": 1.378313422203064, "learning_rate": 8.478469853886864e-06, "loss": 0.5951, "step": 21933 }, { "epoch": 0.56, "grad_norm": 1.376834511756897, "learning_rate": 8.477649381661834e-06, "loss": 0.3721, "step": 21934 }, { "epoch": 0.56, "grad_norm": 1.9448474645614624, "learning_rate": 8.47682891992789e-06, "loss": 0.566, "step": 21935 }, { "epoch": 0.56, "grad_norm": 0.9625649452209473, "learning_rate": 8.47600846869068e-06, "loss": 0.5582, "step": 21936 }, { "epoch": 0.56, "grad_norm": 6.811615467071533, "learning_rate": 8.475188027955867e-06, "loss": 0.795, "step": 21937 }, { "epoch": 0.56, "grad_norm": 1.3473761081695557, "learning_rate": 8.474367597729097e-06, "loss": 0.5441, "step": 21938 }, { "epoch": 0.56, "grad_norm": 5.057673931121826, "learning_rate": 8.473547178016029e-06, "loss": 0.5256, "step": 21939 }, { "epoch": 0.56, "grad_norm": 6.88327693939209, "learning_rate": 8.472726768822311e-06, "loss": 0.6144, "step": 21940 }, { "epoch": 0.56, "grad_norm": 3.7409369945526123, "learning_rate": 8.471906370153602e-06, "loss": 0.5658, "step": 21941 }, { "epoch": 0.56, "grad_norm": 1.3322980403900146, "learning_rate": 8.471085982015557e-06, "loss": 0.4528, "step": 21942 }, { "epoch": 0.56, "grad_norm": 1.7269566059112549, "learning_rate": 8.470265604413824e-06, "loss": 0.5652, "step": 21943 }, { "epoch": 0.56, "grad_norm": 1.8923684358596802, "learning_rate": 8.469445237354062e-06, "loss": 0.5155, "step": 21944 }, { "epoch": 0.56, "grad_norm": 7.4217681884765625, "learning_rate": 8.46862488084192e-06, "loss": 0.5444, "step": 21945 }, { "epoch": 0.56, "grad_norm": 11.165934562683105, "learning_rate": 8.467804534883052e-06, "loss": 0.4898, "step": 21946 }, { "epoch": 0.56, "grad_norm": 1.6798678636550903, "learning_rate": 8.466984199483115e-06, "loss": 0.5535, "step": 21947 }, { "epoch": 0.56, "grad_norm": 1.5735788345336914, "learning_rate": 8.466163874647759e-06, "loss": 0.5675, "step": 21948 }, { "epoch": 0.56, "grad_norm": 2.9893014430999756, "learning_rate": 8.465343560382638e-06, "loss": 0.5753, "step": 21949 }, { "epoch": 0.56, "grad_norm": 1.4383327960968018, "learning_rate": 8.464523256693404e-06, "loss": 0.7126, "step": 21950 }, { "epoch": 0.56, "grad_norm": 11.123398780822754, "learning_rate": 8.463702963585713e-06, "loss": 0.5771, "step": 21951 }, { "epoch": 0.56, "grad_norm": 2.3821499347686768, "learning_rate": 8.462882681065214e-06, "loss": 0.5936, "step": 21952 }, { "epoch": 0.56, "grad_norm": 1.3616811037063599, "learning_rate": 8.462062409137562e-06, "loss": 0.5138, "step": 21953 }, { "epoch": 0.56, "grad_norm": 1.10321843624115, "learning_rate": 8.461242147808411e-06, "loss": 0.5187, "step": 21954 }, { "epoch": 0.56, "grad_norm": 1.7779090404510498, "learning_rate": 8.460421897083411e-06, "loss": 0.6303, "step": 21955 }, { "epoch": 0.56, "grad_norm": 3.5183658599853516, "learning_rate": 8.459601656968217e-06, "loss": 0.7284, "step": 21956 }, { "epoch": 0.56, "grad_norm": 2.6486802101135254, "learning_rate": 8.458781427468481e-06, "loss": 0.6881, "step": 21957 }, { "epoch": 0.56, "grad_norm": 1.5123804807662964, "learning_rate": 8.457961208589853e-06, "loss": 0.443, "step": 21958 }, { "epoch": 0.56, "grad_norm": 1.7374813556671143, "learning_rate": 8.457141000337991e-06, "loss": 0.596, "step": 21959 }, { "epoch": 0.56, "grad_norm": 2.0903947353363037, "learning_rate": 8.45632080271854e-06, "loss": 0.4993, "step": 21960 }, { "epoch": 0.56, "grad_norm": 1.4691762924194336, "learning_rate": 8.45550061573716e-06, "loss": 0.6114, "step": 21961 }, { "epoch": 0.56, "grad_norm": 2.0344655513763428, "learning_rate": 8.454680439399496e-06, "loss": 0.5993, "step": 21962 }, { "epoch": 0.56, "grad_norm": 2.7201969623565674, "learning_rate": 8.453860273711207e-06, "loss": 0.7444, "step": 21963 }, { "epoch": 0.56, "grad_norm": 2.5411019325256348, "learning_rate": 8.453040118677938e-06, "loss": 0.5176, "step": 21964 }, { "epoch": 0.56, "grad_norm": 1.8867074251174927, "learning_rate": 8.452219974305345e-06, "loss": 0.4487, "step": 21965 }, { "epoch": 0.56, "grad_norm": 1.7557734251022339, "learning_rate": 8.451399840599082e-06, "loss": 0.6349, "step": 21966 }, { "epoch": 0.56, "grad_norm": 1.2462704181671143, "learning_rate": 8.450579717564797e-06, "loss": 0.5879, "step": 21967 }, { "epoch": 0.56, "grad_norm": 1.1302928924560547, "learning_rate": 8.449759605208146e-06, "loss": 0.4706, "step": 21968 }, { "epoch": 0.56, "grad_norm": 2.6580116748809814, "learning_rate": 8.448939503534773e-06, "loss": 0.7187, "step": 21969 }, { "epoch": 0.56, "grad_norm": 1.2246202230453491, "learning_rate": 8.448119412550334e-06, "loss": 0.5073, "step": 21970 }, { "epoch": 0.56, "grad_norm": 4.261128902435303, "learning_rate": 8.447299332260486e-06, "loss": 0.6221, "step": 21971 }, { "epoch": 0.56, "grad_norm": 2.6684539318084717, "learning_rate": 8.446479262670872e-06, "loss": 0.5762, "step": 21972 }, { "epoch": 0.56, "grad_norm": 3.464815855026245, "learning_rate": 8.445659203787151e-06, "loss": 0.6145, "step": 21973 }, { "epoch": 0.56, "grad_norm": 1.3277068138122559, "learning_rate": 8.444839155614966e-06, "loss": 0.6307, "step": 21974 }, { "epoch": 0.56, "grad_norm": 1.4451909065246582, "learning_rate": 8.444019118159972e-06, "loss": 0.5798, "step": 21975 }, { "epoch": 0.56, "grad_norm": 4.073009490966797, "learning_rate": 8.443199091427825e-06, "loss": 0.5717, "step": 21976 }, { "epoch": 0.56, "grad_norm": 1.1775720119476318, "learning_rate": 8.442379075424167e-06, "loss": 0.5219, "step": 21977 }, { "epoch": 0.56, "grad_norm": 5.849940299987793, "learning_rate": 8.441559070154659e-06, "loss": 0.6161, "step": 21978 }, { "epoch": 0.56, "grad_norm": 1.337831735610962, "learning_rate": 8.440739075624942e-06, "loss": 0.4938, "step": 21979 }, { "epoch": 0.56, "grad_norm": 4.808383464813232, "learning_rate": 8.439919091840674e-06, "loss": 0.643, "step": 21980 }, { "epoch": 0.56, "grad_norm": 1.032914638519287, "learning_rate": 8.439099118807503e-06, "loss": 0.534, "step": 21981 }, { "epoch": 0.56, "grad_norm": 2.581190586090088, "learning_rate": 8.43827915653108e-06, "loss": 0.5364, "step": 21982 }, { "epoch": 0.56, "grad_norm": 2.005866527557373, "learning_rate": 8.437459205017057e-06, "loss": 0.6364, "step": 21983 }, { "epoch": 0.56, "grad_norm": 3.0447607040405273, "learning_rate": 8.436639264271081e-06, "loss": 0.5134, "step": 21984 }, { "epoch": 0.56, "grad_norm": 1.189283847808838, "learning_rate": 8.435819334298808e-06, "loss": 0.5122, "step": 21985 }, { "epoch": 0.56, "grad_norm": 4.216008186340332, "learning_rate": 8.434999415105882e-06, "loss": 0.7577, "step": 21986 }, { "epoch": 0.56, "grad_norm": 1.633002758026123, "learning_rate": 8.434179506697959e-06, "loss": 0.5093, "step": 21987 }, { "epoch": 0.56, "grad_norm": 4.126791000366211, "learning_rate": 8.433359609080688e-06, "loss": 0.6741, "step": 21988 }, { "epoch": 0.56, "grad_norm": 7.7642951011657715, "learning_rate": 8.432539722259716e-06, "loss": 0.4968, "step": 21989 }, { "epoch": 0.56, "grad_norm": 1.3902508020401, "learning_rate": 8.431719846240698e-06, "loss": 0.5451, "step": 21990 }, { "epoch": 0.56, "grad_norm": 1.7182446718215942, "learning_rate": 8.430899981029278e-06, "loss": 0.5082, "step": 21991 }, { "epoch": 0.56, "grad_norm": 2.5365378856658936, "learning_rate": 8.430080126631115e-06, "loss": 0.6884, "step": 21992 }, { "epoch": 0.56, "grad_norm": 1.9549293518066406, "learning_rate": 8.42926028305185e-06, "loss": 0.7447, "step": 21993 }, { "epoch": 0.56, "grad_norm": 3.4718477725982666, "learning_rate": 8.428440450297135e-06, "loss": 0.4623, "step": 21994 }, { "epoch": 0.56, "grad_norm": 1.3649938106536865, "learning_rate": 8.427620628372624e-06, "loss": 0.4017, "step": 21995 }, { "epoch": 0.56, "grad_norm": 1.2177789211273193, "learning_rate": 8.42680081728396e-06, "loss": 0.3981, "step": 21996 }, { "epoch": 0.56, "grad_norm": 2.791668176651001, "learning_rate": 8.4259810170368e-06, "loss": 0.6402, "step": 21997 }, { "epoch": 0.56, "grad_norm": 0.9719753861427307, "learning_rate": 8.425161227636788e-06, "loss": 0.4292, "step": 21998 }, { "epoch": 0.56, "grad_norm": 5.733160018920898, "learning_rate": 8.424341449089574e-06, "loss": 0.5842, "step": 21999 }, { "epoch": 0.56, "grad_norm": 1.7091540098190308, "learning_rate": 8.423521681400812e-06, "loss": 0.6693, "step": 22000 }, { "epoch": 0.56, "grad_norm": 1.3019452095031738, "learning_rate": 8.422701924576146e-06, "loss": 0.6076, "step": 22001 }, { "epoch": 0.56, "grad_norm": 4.48372220993042, "learning_rate": 8.42188217862123e-06, "loss": 0.6686, "step": 22002 }, { "epoch": 0.56, "grad_norm": 1.6917660236358643, "learning_rate": 8.421062443541709e-06, "loss": 0.4577, "step": 22003 }, { "epoch": 0.56, "grad_norm": 1.5213384628295898, "learning_rate": 8.420242719343235e-06, "loss": 0.6647, "step": 22004 }, { "epoch": 0.56, "grad_norm": 2.6497931480407715, "learning_rate": 8.419423006031453e-06, "loss": 0.457, "step": 22005 }, { "epoch": 0.56, "grad_norm": 1.92470121383667, "learning_rate": 8.418603303612013e-06, "loss": 0.5091, "step": 22006 }, { "epoch": 0.56, "grad_norm": 4.901005744934082, "learning_rate": 8.417783612090571e-06, "loss": 0.588, "step": 22007 }, { "epoch": 0.56, "grad_norm": 1.3149458169937134, "learning_rate": 8.416963931472767e-06, "loss": 0.5347, "step": 22008 }, { "epoch": 0.56, "grad_norm": 1.8098150491714478, "learning_rate": 8.416144261764256e-06, "loss": 0.5746, "step": 22009 }, { "epoch": 0.56, "grad_norm": 1.9165457487106323, "learning_rate": 8.41532460297068e-06, "loss": 0.6371, "step": 22010 }, { "epoch": 0.56, "grad_norm": 3.4697139263153076, "learning_rate": 8.414504955097692e-06, "loss": 0.6534, "step": 22011 }, { "epoch": 0.56, "grad_norm": 1.1050665378570557, "learning_rate": 8.413685318150941e-06, "loss": 0.5706, "step": 22012 }, { "epoch": 0.56, "grad_norm": 1.6117864847183228, "learning_rate": 8.412865692136072e-06, "loss": 0.5864, "step": 22013 }, { "epoch": 0.56, "grad_norm": 1.0700576305389404, "learning_rate": 8.412046077058739e-06, "loss": 0.5361, "step": 22014 }, { "epoch": 0.56, "grad_norm": 1.5096973180770874, "learning_rate": 8.411226472924583e-06, "loss": 0.5217, "step": 22015 }, { "epoch": 0.56, "grad_norm": 1.7294732332229614, "learning_rate": 8.410406879739257e-06, "loss": 0.5407, "step": 22016 }, { "epoch": 0.56, "grad_norm": 4.3138861656188965, "learning_rate": 8.409587297508411e-06, "loss": 0.6357, "step": 22017 }, { "epoch": 0.56, "grad_norm": 1.5287426710128784, "learning_rate": 8.408767726237687e-06, "loss": 0.6396, "step": 22018 }, { "epoch": 0.56, "grad_norm": 1.2041670083999634, "learning_rate": 8.407948165932737e-06, "loss": 0.5209, "step": 22019 }, { "epoch": 0.56, "grad_norm": 1.8403209447860718, "learning_rate": 8.407128616599209e-06, "loss": 0.4982, "step": 22020 }, { "epoch": 0.56, "grad_norm": 1.4989683628082275, "learning_rate": 8.40630907824275e-06, "loss": 0.6396, "step": 22021 }, { "epoch": 0.56, "grad_norm": 1.1676892042160034, "learning_rate": 8.405489550869005e-06, "loss": 0.6169, "step": 22022 }, { "epoch": 0.56, "grad_norm": 1.029484510421753, "learning_rate": 8.404670034483625e-06, "loss": 0.5373, "step": 22023 }, { "epoch": 0.56, "grad_norm": 1.9768619537353516, "learning_rate": 8.403850529092259e-06, "loss": 0.5787, "step": 22024 }, { "epoch": 0.56, "grad_norm": 1.1834431886672974, "learning_rate": 8.40303103470055e-06, "loss": 0.4468, "step": 22025 }, { "epoch": 0.56, "grad_norm": 1.4033839702606201, "learning_rate": 8.40221155131415e-06, "loss": 0.5182, "step": 22026 }, { "epoch": 0.56, "grad_norm": 1.4387078285217285, "learning_rate": 8.401392078938702e-06, "loss": 0.5812, "step": 22027 }, { "epoch": 0.56, "grad_norm": 2.1137924194335938, "learning_rate": 8.400572617579856e-06, "loss": 0.5846, "step": 22028 }, { "epoch": 0.56, "grad_norm": 1.0700403451919556, "learning_rate": 8.39975316724326e-06, "loss": 0.446, "step": 22029 }, { "epoch": 0.56, "grad_norm": 1.1478477716445923, "learning_rate": 8.398933727934559e-06, "loss": 0.6012, "step": 22030 }, { "epoch": 0.56, "grad_norm": 1.4486041069030762, "learning_rate": 8.398114299659401e-06, "loss": 0.5853, "step": 22031 }, { "epoch": 0.56, "grad_norm": 3.586820125579834, "learning_rate": 8.397294882423433e-06, "loss": 0.6083, "step": 22032 }, { "epoch": 0.56, "grad_norm": 3.2809548377990723, "learning_rate": 8.396475476232303e-06, "loss": 0.5731, "step": 22033 }, { "epoch": 0.56, "grad_norm": 3.6376328468322754, "learning_rate": 8.395656081091653e-06, "loss": 0.7564, "step": 22034 }, { "epoch": 0.56, "grad_norm": 4.031139850616455, "learning_rate": 8.394836697007134e-06, "loss": 0.4809, "step": 22035 }, { "epoch": 0.56, "grad_norm": 1.5614053010940552, "learning_rate": 8.394017323984394e-06, "loss": 0.5501, "step": 22036 }, { "epoch": 0.56, "grad_norm": 2.830782413482666, "learning_rate": 8.393197962029076e-06, "loss": 0.5988, "step": 22037 }, { "epoch": 0.56, "grad_norm": 3.3323028087615967, "learning_rate": 8.39237861114683e-06, "loss": 0.508, "step": 22038 }, { "epoch": 0.56, "grad_norm": 1.1049108505249023, "learning_rate": 8.391559271343299e-06, "loss": 0.4765, "step": 22039 }, { "epoch": 0.56, "grad_norm": 0.9757125377655029, "learning_rate": 8.390739942624131e-06, "loss": 0.5723, "step": 22040 }, { "epoch": 0.56, "grad_norm": 1.1394929885864258, "learning_rate": 8.389920624994973e-06, "loss": 0.6233, "step": 22041 }, { "epoch": 0.56, "grad_norm": 1.764144778251648, "learning_rate": 8.389101318461471e-06, "loss": 0.4822, "step": 22042 }, { "epoch": 0.56, "grad_norm": 1.7995051145553589, "learning_rate": 8.38828202302927e-06, "loss": 0.6314, "step": 22043 }, { "epoch": 0.56, "grad_norm": 1.1511040925979614, "learning_rate": 8.387462738704018e-06, "loss": 0.4935, "step": 22044 }, { "epoch": 0.57, "grad_norm": 7.535769939422607, "learning_rate": 8.386643465491359e-06, "loss": 0.6217, "step": 22045 }, { "epoch": 0.57, "grad_norm": 1.3529702425003052, "learning_rate": 8.385824203396938e-06, "loss": 0.5984, "step": 22046 }, { "epoch": 0.57, "grad_norm": 1.7287988662719727, "learning_rate": 8.385004952426404e-06, "loss": 0.5354, "step": 22047 }, { "epoch": 0.57, "grad_norm": 3.268324375152588, "learning_rate": 8.3841857125854e-06, "loss": 0.6239, "step": 22048 }, { "epoch": 0.57, "grad_norm": 1.9392107725143433, "learning_rate": 8.383366483879574e-06, "loss": 0.6318, "step": 22049 }, { "epoch": 0.57, "grad_norm": 9.325926780700684, "learning_rate": 8.382547266314572e-06, "loss": 0.5638, "step": 22050 }, { "epoch": 0.57, "grad_norm": 9.170292854309082, "learning_rate": 8.381728059896035e-06, "loss": 0.634, "step": 22051 }, { "epoch": 0.57, "grad_norm": 2.386190176010132, "learning_rate": 8.380908864629611e-06, "loss": 0.6792, "step": 22052 }, { "epoch": 0.57, "grad_norm": 1.0973631143569946, "learning_rate": 8.38008968052095e-06, "loss": 0.5686, "step": 22053 }, { "epoch": 0.57, "grad_norm": 1.1992437839508057, "learning_rate": 8.37927050757569e-06, "loss": 0.4229, "step": 22054 }, { "epoch": 0.57, "grad_norm": 1.739017367362976, "learning_rate": 8.378451345799482e-06, "loss": 0.5973, "step": 22055 }, { "epoch": 0.57, "grad_norm": 2.534005880355835, "learning_rate": 8.377632195197965e-06, "loss": 0.4751, "step": 22056 }, { "epoch": 0.57, "grad_norm": 1.3350856304168701, "learning_rate": 8.37681305577679e-06, "loss": 0.5426, "step": 22057 }, { "epoch": 0.57, "grad_norm": 1.717416524887085, "learning_rate": 8.3759939275416e-06, "loss": 0.5843, "step": 22058 }, { "epoch": 0.57, "grad_norm": 0.9853304624557495, "learning_rate": 8.375174810498037e-06, "loss": 0.5325, "step": 22059 }, { "epoch": 0.57, "grad_norm": 8.0341157913208, "learning_rate": 8.374355704651751e-06, "loss": 0.6168, "step": 22060 }, { "epoch": 0.57, "grad_norm": 3.624798536300659, "learning_rate": 8.373536610008382e-06, "loss": 0.5146, "step": 22061 }, { "epoch": 0.57, "grad_norm": 2.301926374435425, "learning_rate": 8.37271752657358e-06, "loss": 0.6611, "step": 22062 }, { "epoch": 0.57, "grad_norm": 1.8317489624023438, "learning_rate": 8.371898454352984e-06, "loss": 0.5052, "step": 22063 }, { "epoch": 0.57, "grad_norm": 1.6593774557113647, "learning_rate": 8.37107939335224e-06, "loss": 0.5735, "step": 22064 }, { "epoch": 0.57, "grad_norm": 2.4598169326782227, "learning_rate": 8.370260343576996e-06, "loss": 0.7505, "step": 22065 }, { "epoch": 0.57, "grad_norm": 1.1163103580474854, "learning_rate": 8.369441305032892e-06, "loss": 0.3899, "step": 22066 }, { "epoch": 0.57, "grad_norm": 3.9535470008850098, "learning_rate": 8.368622277725576e-06, "loss": 0.6133, "step": 22067 }, { "epoch": 0.57, "grad_norm": 3.4440243244171143, "learning_rate": 8.36780326166069e-06, "loss": 0.5283, "step": 22068 }, { "epoch": 0.57, "grad_norm": 1.2502208948135376, "learning_rate": 8.366984256843875e-06, "loss": 0.3801, "step": 22069 }, { "epoch": 0.57, "grad_norm": 1.4840131998062134, "learning_rate": 8.366165263280783e-06, "loss": 0.5819, "step": 22070 }, { "epoch": 0.57, "grad_norm": 1.5100146532058716, "learning_rate": 8.365346280977051e-06, "loss": 0.4692, "step": 22071 }, { "epoch": 0.57, "grad_norm": 1.3827968835830688, "learning_rate": 8.364527309938327e-06, "loss": 0.661, "step": 22072 }, { "epoch": 0.57, "grad_norm": 1.0780198574066162, "learning_rate": 8.363708350170252e-06, "loss": 0.4588, "step": 22073 }, { "epoch": 0.57, "grad_norm": 11.765861511230469, "learning_rate": 8.362889401678474e-06, "loss": 0.6246, "step": 22074 }, { "epoch": 0.57, "grad_norm": 9.62347412109375, "learning_rate": 8.36207046446863e-06, "loss": 0.5064, "step": 22075 }, { "epoch": 0.57, "grad_norm": 1.7423614263534546, "learning_rate": 8.361251538546367e-06, "loss": 0.599, "step": 22076 }, { "epoch": 0.57, "grad_norm": 1.2423762083053589, "learning_rate": 8.360432623917331e-06, "loss": 0.633, "step": 22077 }, { "epoch": 0.57, "grad_norm": 6.2881975173950195, "learning_rate": 8.359613720587163e-06, "loss": 0.6645, "step": 22078 }, { "epoch": 0.57, "grad_norm": 1.5579880475997925, "learning_rate": 8.358794828561507e-06, "loss": 0.5543, "step": 22079 }, { "epoch": 0.57, "grad_norm": 3.1476593017578125, "learning_rate": 8.357975947846004e-06, "loss": 0.6873, "step": 22080 }, { "epoch": 0.57, "grad_norm": 1.4239799976348877, "learning_rate": 8.357157078446299e-06, "loss": 0.5916, "step": 22081 }, { "epoch": 0.57, "grad_norm": 1.1788049936294556, "learning_rate": 8.35633822036804e-06, "loss": 0.3492, "step": 22082 }, { "epoch": 0.57, "grad_norm": 2.182284116744995, "learning_rate": 8.35551937361686e-06, "loss": 0.4245, "step": 22083 }, { "epoch": 0.57, "grad_norm": 6.544614315032959, "learning_rate": 8.35470053819841e-06, "loss": 0.6009, "step": 22084 }, { "epoch": 0.57, "grad_norm": 2.1322739124298096, "learning_rate": 8.353881714118329e-06, "loss": 0.6406, "step": 22085 }, { "epoch": 0.57, "grad_norm": 1.038339376449585, "learning_rate": 8.353062901382263e-06, "loss": 0.33, "step": 22086 }, { "epoch": 0.57, "grad_norm": 1.7422765493392944, "learning_rate": 8.35224409999585e-06, "loss": 0.5373, "step": 22087 }, { "epoch": 0.57, "grad_norm": 1.6478769779205322, "learning_rate": 8.351425309964735e-06, "loss": 0.6283, "step": 22088 }, { "epoch": 0.57, "grad_norm": 1.413022518157959, "learning_rate": 8.350606531294564e-06, "loss": 0.5464, "step": 22089 }, { "epoch": 0.57, "grad_norm": 1.288185954093933, "learning_rate": 8.349787763990976e-06, "loss": 0.5757, "step": 22090 }, { "epoch": 0.57, "grad_norm": 1.1433420181274414, "learning_rate": 8.348969008059614e-06, "loss": 0.4483, "step": 22091 }, { "epoch": 0.57, "grad_norm": 1.4790562391281128, "learning_rate": 8.348150263506118e-06, "loss": 0.5378, "step": 22092 }, { "epoch": 0.57, "grad_norm": 5.0658345222473145, "learning_rate": 8.347331530336134e-06, "loss": 0.7306, "step": 22093 }, { "epoch": 0.57, "grad_norm": 1.47715163230896, "learning_rate": 8.346512808555305e-06, "loss": 0.5324, "step": 22094 }, { "epoch": 0.57, "grad_norm": 1.2402242422103882, "learning_rate": 8.345694098169268e-06, "loss": 0.5046, "step": 22095 }, { "epoch": 0.57, "grad_norm": 2.580556631088257, "learning_rate": 8.34487539918367e-06, "loss": 0.5239, "step": 22096 }, { "epoch": 0.57, "grad_norm": 2.1742868423461914, "learning_rate": 8.344056711604148e-06, "loss": 0.5065, "step": 22097 }, { "epoch": 0.57, "grad_norm": 1.3816312551498413, "learning_rate": 8.343238035436349e-06, "loss": 0.6788, "step": 22098 }, { "epoch": 0.57, "grad_norm": 3.0268006324768066, "learning_rate": 8.342419370685913e-06, "loss": 0.5903, "step": 22099 }, { "epoch": 0.57, "grad_norm": 3.2383134365081787, "learning_rate": 8.34160071735848e-06, "loss": 0.6434, "step": 22100 }, { "epoch": 0.57, "grad_norm": 1.5081753730773926, "learning_rate": 8.340782075459694e-06, "loss": 0.5355, "step": 22101 }, { "epoch": 0.57, "grad_norm": 1.3741267919540405, "learning_rate": 8.339963444995193e-06, "loss": 0.5853, "step": 22102 }, { "epoch": 0.57, "grad_norm": 1.8593392372131348, "learning_rate": 8.339144825970624e-06, "loss": 0.4742, "step": 22103 }, { "epoch": 0.57, "grad_norm": 1.8027162551879883, "learning_rate": 8.338326218391623e-06, "loss": 0.6059, "step": 22104 }, { "epoch": 0.57, "grad_norm": 0.9920346736907959, "learning_rate": 8.337507622263836e-06, "loss": 0.5195, "step": 22105 }, { "epoch": 0.57, "grad_norm": 3.2963128089904785, "learning_rate": 8.3366890375929e-06, "loss": 0.5547, "step": 22106 }, { "epoch": 0.57, "grad_norm": 1.4108701944351196, "learning_rate": 8.335870464384459e-06, "loss": 0.462, "step": 22107 }, { "epoch": 0.57, "grad_norm": 1.8038687705993652, "learning_rate": 8.335051902644155e-06, "loss": 0.5763, "step": 22108 }, { "epoch": 0.57, "grad_norm": 2.660808563232422, "learning_rate": 8.334233352377625e-06, "loss": 0.5428, "step": 22109 }, { "epoch": 0.57, "grad_norm": 1.8505808115005493, "learning_rate": 8.333414813590512e-06, "loss": 0.6476, "step": 22110 }, { "epoch": 0.57, "grad_norm": 1.385689616203308, "learning_rate": 8.332596286288459e-06, "loss": 0.5346, "step": 22111 }, { "epoch": 0.57, "grad_norm": 2.2001302242279053, "learning_rate": 8.331777770477102e-06, "loss": 0.4873, "step": 22112 }, { "epoch": 0.57, "grad_norm": 1.2407454252243042, "learning_rate": 8.330959266162089e-06, "loss": 0.4585, "step": 22113 }, { "epoch": 0.57, "grad_norm": 1.9696770906448364, "learning_rate": 8.330140773349051e-06, "loss": 0.531, "step": 22114 }, { "epoch": 0.57, "grad_norm": 5.632303714752197, "learning_rate": 8.329322292043638e-06, "loss": 0.5977, "step": 22115 }, { "epoch": 0.57, "grad_norm": 1.4102678298950195, "learning_rate": 8.328503822251484e-06, "loss": 0.6183, "step": 22116 }, { "epoch": 0.57, "grad_norm": 2.6676228046417236, "learning_rate": 8.32768536397823e-06, "loss": 0.7474, "step": 22117 }, { "epoch": 0.57, "grad_norm": 3.020690679550171, "learning_rate": 8.32686691722952e-06, "loss": 0.7728, "step": 22118 }, { "epoch": 0.57, "grad_norm": 1.1568098068237305, "learning_rate": 8.326048482010991e-06, "loss": 0.574, "step": 22119 }, { "epoch": 0.57, "grad_norm": 1.454423427581787, "learning_rate": 8.325230058328286e-06, "loss": 0.4565, "step": 22120 }, { "epoch": 0.57, "grad_norm": 2.5509750843048096, "learning_rate": 8.324411646187042e-06, "loss": 0.6214, "step": 22121 }, { "epoch": 0.57, "grad_norm": 1.7233715057373047, "learning_rate": 8.323593245592899e-06, "loss": 0.5587, "step": 22122 }, { "epoch": 0.57, "grad_norm": 2.533505439758301, "learning_rate": 8.3227748565515e-06, "loss": 0.5096, "step": 22123 }, { "epoch": 0.57, "grad_norm": 4.396392822265625, "learning_rate": 8.321956479068483e-06, "loss": 0.6513, "step": 22124 }, { "epoch": 0.57, "grad_norm": 2.2075717449188232, "learning_rate": 8.321138113149488e-06, "loss": 0.5501, "step": 22125 }, { "epoch": 0.57, "grad_norm": 3.464839458465576, "learning_rate": 8.320319758800154e-06, "loss": 0.7222, "step": 22126 }, { "epoch": 0.57, "grad_norm": 1.049578070640564, "learning_rate": 8.31950141602612e-06, "loss": 0.6494, "step": 22127 }, { "epoch": 0.57, "grad_norm": 7.5983099937438965, "learning_rate": 8.318683084833025e-06, "loss": 0.9115, "step": 22128 }, { "epoch": 0.57, "grad_norm": 1.3583803176879883, "learning_rate": 8.31786476522651e-06, "loss": 0.4689, "step": 22129 }, { "epoch": 0.57, "grad_norm": 2.8610732555389404, "learning_rate": 8.31704645721222e-06, "loss": 0.7108, "step": 22130 }, { "epoch": 0.57, "grad_norm": 2.5166852474212646, "learning_rate": 8.316228160795781e-06, "loss": 0.5195, "step": 22131 }, { "epoch": 0.57, "grad_norm": 2.0207579135894775, "learning_rate": 8.315409875982846e-06, "loss": 0.4717, "step": 22132 }, { "epoch": 0.57, "grad_norm": 1.5403225421905518, "learning_rate": 8.314591602779042e-06, "loss": 0.536, "step": 22133 }, { "epoch": 0.57, "grad_norm": 1.9813660383224487, "learning_rate": 8.313773341190017e-06, "loss": 0.6743, "step": 22134 }, { "epoch": 0.57, "grad_norm": 1.3963298797607422, "learning_rate": 8.312955091221408e-06, "loss": 0.5561, "step": 22135 }, { "epoch": 0.57, "grad_norm": 1.0744335651397705, "learning_rate": 8.31213685287885e-06, "loss": 0.3928, "step": 22136 }, { "epoch": 0.57, "grad_norm": 1.2780953645706177, "learning_rate": 8.311318626167985e-06, "loss": 0.5746, "step": 22137 }, { "epoch": 0.57, "grad_norm": 1.9086666107177734, "learning_rate": 8.310500411094452e-06, "loss": 0.78, "step": 22138 }, { "epoch": 0.57, "grad_norm": 1.4112638235092163, "learning_rate": 8.30968220766389e-06, "loss": 0.6291, "step": 22139 }, { "epoch": 0.57, "grad_norm": 1.6668473482131958, "learning_rate": 8.308864015881933e-06, "loss": 0.6381, "step": 22140 }, { "epoch": 0.57, "grad_norm": 9.499021530151367, "learning_rate": 8.308045835754223e-06, "loss": 0.5962, "step": 22141 }, { "epoch": 0.57, "grad_norm": 1.4538462162017822, "learning_rate": 8.3072276672864e-06, "loss": 0.4457, "step": 22142 }, { "epoch": 0.57, "grad_norm": 1.4656561613082886, "learning_rate": 8.3064095104841e-06, "loss": 0.5129, "step": 22143 }, { "epoch": 0.57, "grad_norm": 1.4927481412887573, "learning_rate": 8.305591365352962e-06, "loss": 0.5359, "step": 22144 }, { "epoch": 0.57, "grad_norm": 1.3192973136901855, "learning_rate": 8.304773231898622e-06, "loss": 0.7115, "step": 22145 }, { "epoch": 0.57, "grad_norm": 2.251788377761841, "learning_rate": 8.30395511012672e-06, "loss": 0.5919, "step": 22146 }, { "epoch": 0.57, "grad_norm": 2.705839157104492, "learning_rate": 8.303137000042895e-06, "loss": 0.6126, "step": 22147 }, { "epoch": 0.57, "grad_norm": 3.7915079593658447, "learning_rate": 8.302318901652784e-06, "loss": 0.5072, "step": 22148 }, { "epoch": 0.57, "grad_norm": 1.9830845594406128, "learning_rate": 8.301500814962025e-06, "loss": 0.6218, "step": 22149 }, { "epoch": 0.57, "grad_norm": 2.4375061988830566, "learning_rate": 8.300682739976254e-06, "loss": 0.5329, "step": 22150 }, { "epoch": 0.57, "grad_norm": 1.4127779006958008, "learning_rate": 8.29986467670111e-06, "loss": 0.5758, "step": 22151 }, { "epoch": 0.57, "grad_norm": 1.4397608041763306, "learning_rate": 8.29904662514223e-06, "loss": 0.4695, "step": 22152 }, { "epoch": 0.57, "grad_norm": 3.3702871799468994, "learning_rate": 8.298228585305253e-06, "loss": 0.5564, "step": 22153 }, { "epoch": 0.57, "grad_norm": 1.572118878364563, "learning_rate": 8.297410557195815e-06, "loss": 0.5083, "step": 22154 }, { "epoch": 0.57, "grad_norm": 6.4507832527160645, "learning_rate": 8.296592540819553e-06, "loss": 0.4799, "step": 22155 }, { "epoch": 0.57, "grad_norm": 3.409991502761841, "learning_rate": 8.295774536182111e-06, "loss": 0.6377, "step": 22156 }, { "epoch": 0.57, "grad_norm": 1.3884190320968628, "learning_rate": 8.294956543289115e-06, "loss": 0.5783, "step": 22157 }, { "epoch": 0.57, "grad_norm": 0.8978163003921509, "learning_rate": 8.294138562146207e-06, "loss": 0.4059, "step": 22158 }, { "epoch": 0.57, "grad_norm": 2.541409969329834, "learning_rate": 8.293320592759024e-06, "loss": 0.7432, "step": 22159 }, { "epoch": 0.57, "grad_norm": 5.368743896484375, "learning_rate": 8.292502635133205e-06, "loss": 0.6409, "step": 22160 }, { "epoch": 0.57, "grad_norm": 2.0015676021575928, "learning_rate": 8.291684689274384e-06, "loss": 0.607, "step": 22161 }, { "epoch": 0.57, "grad_norm": 1.2078953981399536, "learning_rate": 8.2908667551882e-06, "loss": 0.5559, "step": 22162 }, { "epoch": 0.57, "grad_norm": 7.7288408279418945, "learning_rate": 8.290048832880287e-06, "loss": 0.6065, "step": 22163 }, { "epoch": 0.57, "grad_norm": 1.458017110824585, "learning_rate": 8.289230922356281e-06, "loss": 0.5757, "step": 22164 }, { "epoch": 0.57, "grad_norm": 4.761025905609131, "learning_rate": 8.288413023621827e-06, "loss": 0.7904, "step": 22165 }, { "epoch": 0.57, "grad_norm": 2.628227472305298, "learning_rate": 8.287595136682549e-06, "loss": 0.5653, "step": 22166 }, { "epoch": 0.57, "grad_norm": 1.7657595872879028, "learning_rate": 8.286777261544093e-06, "loss": 0.6429, "step": 22167 }, { "epoch": 0.57, "grad_norm": 1.200203776359558, "learning_rate": 8.28595939821209e-06, "loss": 0.5393, "step": 22168 }, { "epoch": 0.57, "grad_norm": 2.1709787845611572, "learning_rate": 8.28514154669218e-06, "loss": 0.5931, "step": 22169 }, { "epoch": 0.57, "grad_norm": 3.2610254287719727, "learning_rate": 8.284323706989996e-06, "loss": 0.5668, "step": 22170 }, { "epoch": 0.57, "grad_norm": 1.0871739387512207, "learning_rate": 8.283505879111174e-06, "loss": 0.609, "step": 22171 }, { "epoch": 0.57, "grad_norm": 3.593183994293213, "learning_rate": 8.282688063061352e-06, "loss": 0.4837, "step": 22172 }, { "epoch": 0.57, "grad_norm": 2.0399935245513916, "learning_rate": 8.281870258846164e-06, "loss": 0.5149, "step": 22173 }, { "epoch": 0.57, "grad_norm": 1.417157769203186, "learning_rate": 8.281052466471249e-06, "loss": 0.5724, "step": 22174 }, { "epoch": 0.57, "grad_norm": 1.3023568391799927, "learning_rate": 8.280234685942238e-06, "loss": 0.6837, "step": 22175 }, { "epoch": 0.57, "grad_norm": 1.0613460540771484, "learning_rate": 8.279416917264768e-06, "loss": 0.5109, "step": 22176 }, { "epoch": 0.57, "grad_norm": 7.372481346130371, "learning_rate": 8.27859916044448e-06, "loss": 0.5959, "step": 22177 }, { "epoch": 0.57, "grad_norm": 0.9670805335044861, "learning_rate": 8.277781415487001e-06, "loss": 0.341, "step": 22178 }, { "epoch": 0.57, "grad_norm": 1.61049485206604, "learning_rate": 8.276963682397975e-06, "loss": 0.6323, "step": 22179 }, { "epoch": 0.57, "grad_norm": 2.4533300399780273, "learning_rate": 8.276145961183029e-06, "loss": 0.6177, "step": 22180 }, { "epoch": 0.57, "grad_norm": 1.289402723312378, "learning_rate": 8.275328251847803e-06, "loss": 0.592, "step": 22181 }, { "epoch": 0.57, "grad_norm": 3.6490318775177, "learning_rate": 8.274510554397932e-06, "loss": 0.5823, "step": 22182 }, { "epoch": 0.57, "grad_norm": 1.8151572942733765, "learning_rate": 8.273692868839048e-06, "loss": 0.4277, "step": 22183 }, { "epoch": 0.57, "grad_norm": 1.2635904550552368, "learning_rate": 8.272875195176791e-06, "loss": 0.5163, "step": 22184 }, { "epoch": 0.57, "grad_norm": 3.0078132152557373, "learning_rate": 8.27205753341679e-06, "loss": 0.4963, "step": 22185 }, { "epoch": 0.57, "grad_norm": 2.0553643703460693, "learning_rate": 8.271239883564687e-06, "loss": 0.7066, "step": 22186 }, { "epoch": 0.57, "grad_norm": 5.072731971740723, "learning_rate": 8.270422245626109e-06, "loss": 0.6304, "step": 22187 }, { "epoch": 0.57, "grad_norm": 6.6223907470703125, "learning_rate": 8.269604619606695e-06, "loss": 0.607, "step": 22188 }, { "epoch": 0.57, "grad_norm": 1.463840126991272, "learning_rate": 8.26878700551208e-06, "loss": 0.5514, "step": 22189 }, { "epoch": 0.57, "grad_norm": 1.1457505226135254, "learning_rate": 8.267969403347895e-06, "loss": 0.5348, "step": 22190 }, { "epoch": 0.57, "grad_norm": 2.495792865753174, "learning_rate": 8.26715181311978e-06, "loss": 0.6628, "step": 22191 }, { "epoch": 0.57, "grad_norm": 1.6469519138336182, "learning_rate": 8.266334234833365e-06, "loss": 0.6604, "step": 22192 }, { "epoch": 0.57, "grad_norm": 2.5743157863616943, "learning_rate": 8.265516668494285e-06, "loss": 0.5927, "step": 22193 }, { "epoch": 0.57, "grad_norm": 1.419350266456604, "learning_rate": 8.264699114108172e-06, "loss": 0.4441, "step": 22194 }, { "epoch": 0.57, "grad_norm": 2.0816729068756104, "learning_rate": 8.263881571680665e-06, "loss": 0.6181, "step": 22195 }, { "epoch": 0.57, "grad_norm": 0.9785966873168945, "learning_rate": 8.263064041217394e-06, "loss": 0.4551, "step": 22196 }, { "epoch": 0.57, "grad_norm": 1.3585010766983032, "learning_rate": 8.262246522723996e-06, "loss": 0.4565, "step": 22197 }, { "epoch": 0.57, "grad_norm": 1.0051822662353516, "learning_rate": 8.261429016206105e-06, "loss": 0.6058, "step": 22198 }, { "epoch": 0.57, "grad_norm": 3.1430792808532715, "learning_rate": 8.260611521669349e-06, "loss": 0.6778, "step": 22199 }, { "epoch": 0.57, "grad_norm": 2.3132100105285645, "learning_rate": 8.259794039119365e-06, "loss": 0.6244, "step": 22200 }, { "epoch": 0.57, "grad_norm": 1.745663046836853, "learning_rate": 8.25897656856179e-06, "loss": 0.4814, "step": 22201 }, { "epoch": 0.57, "grad_norm": 4.784824848175049, "learning_rate": 8.258159110002252e-06, "loss": 0.5061, "step": 22202 }, { "epoch": 0.57, "grad_norm": 2.509615421295166, "learning_rate": 8.257341663446393e-06, "loss": 0.5766, "step": 22203 }, { "epoch": 0.57, "grad_norm": 1.4848562479019165, "learning_rate": 8.256524228899834e-06, "loss": 0.5024, "step": 22204 }, { "epoch": 0.57, "grad_norm": 2.5588881969451904, "learning_rate": 8.255706806368216e-06, "loss": 0.5432, "step": 22205 }, { "epoch": 0.57, "grad_norm": 1.1348483562469482, "learning_rate": 8.254889395857173e-06, "loss": 0.502, "step": 22206 }, { "epoch": 0.57, "grad_norm": 3.000295877456665, "learning_rate": 8.254071997372336e-06, "loss": 0.5277, "step": 22207 }, { "epoch": 0.57, "grad_norm": 1.7287019491195679, "learning_rate": 8.253254610919337e-06, "loss": 0.5592, "step": 22208 }, { "epoch": 0.57, "grad_norm": 1.0442723035812378, "learning_rate": 8.252437236503807e-06, "loss": 0.4763, "step": 22209 }, { "epoch": 0.57, "grad_norm": 1.1243191957473755, "learning_rate": 8.251619874131387e-06, "loss": 0.466, "step": 22210 }, { "epoch": 0.57, "grad_norm": 1.1270575523376465, "learning_rate": 8.2508025238077e-06, "loss": 0.5176, "step": 22211 }, { "epoch": 0.57, "grad_norm": 1.7347413301467896, "learning_rate": 8.249985185538382e-06, "loss": 0.6175, "step": 22212 }, { "epoch": 0.57, "grad_norm": 1.6361589431762695, "learning_rate": 8.249167859329072e-06, "loss": 0.5653, "step": 22213 }, { "epoch": 0.57, "grad_norm": 1.4981061220169067, "learning_rate": 8.248350545185393e-06, "loss": 0.6388, "step": 22214 }, { "epoch": 0.57, "grad_norm": 6.403457164764404, "learning_rate": 8.247533243112985e-06, "loss": 0.53, "step": 22215 }, { "epoch": 0.57, "grad_norm": 3.9971847534179688, "learning_rate": 8.246715953117474e-06, "loss": 0.5828, "step": 22216 }, { "epoch": 0.57, "grad_norm": 1.3730509281158447, "learning_rate": 8.245898675204494e-06, "loss": 0.3874, "step": 22217 }, { "epoch": 0.57, "grad_norm": 1.5439425706863403, "learning_rate": 8.245081409379682e-06, "loss": 0.4722, "step": 22218 }, { "epoch": 0.57, "grad_norm": 1.5036346912384033, "learning_rate": 8.244264155648663e-06, "loss": 0.542, "step": 22219 }, { "epoch": 0.57, "grad_norm": 3.9547061920166016, "learning_rate": 8.243446914017076e-06, "loss": 0.464, "step": 22220 }, { "epoch": 0.57, "grad_norm": 3.3220479488372803, "learning_rate": 8.242629684490546e-06, "loss": 0.6097, "step": 22221 }, { "epoch": 0.57, "grad_norm": 1.3043774366378784, "learning_rate": 8.24181246707471e-06, "loss": 0.466, "step": 22222 }, { "epoch": 0.57, "grad_norm": 2.7645015716552734, "learning_rate": 8.240995261775196e-06, "loss": 0.5019, "step": 22223 }, { "epoch": 0.57, "grad_norm": 1.1599295139312744, "learning_rate": 8.240178068597638e-06, "loss": 0.4448, "step": 22224 }, { "epoch": 0.57, "grad_norm": 1.0790849924087524, "learning_rate": 8.23936088754767e-06, "loss": 0.6262, "step": 22225 }, { "epoch": 0.57, "grad_norm": 3.4804341793060303, "learning_rate": 8.238543718630916e-06, "loss": 0.5214, "step": 22226 }, { "epoch": 0.57, "grad_norm": 1.2575851678848267, "learning_rate": 8.237726561853017e-06, "loss": 0.5717, "step": 22227 }, { "epoch": 0.57, "grad_norm": 1.8595316410064697, "learning_rate": 8.236909417219595e-06, "loss": 0.5047, "step": 22228 }, { "epoch": 0.57, "grad_norm": 6.306836128234863, "learning_rate": 8.236092284736289e-06, "loss": 0.4872, "step": 22229 }, { "epoch": 0.57, "grad_norm": 1.3619807958602905, "learning_rate": 8.235275164408725e-06, "loss": 0.6231, "step": 22230 }, { "epoch": 0.57, "grad_norm": 5.231004238128662, "learning_rate": 8.234458056242536e-06, "loss": 0.4036, "step": 22231 }, { "epoch": 0.57, "grad_norm": 2.9328935146331787, "learning_rate": 8.233640960243354e-06, "loss": 0.6622, "step": 22232 }, { "epoch": 0.57, "grad_norm": 3.0507521629333496, "learning_rate": 8.232823876416806e-06, "loss": 0.5461, "step": 22233 }, { "epoch": 0.57, "grad_norm": 2.0407519340515137, "learning_rate": 8.23200680476853e-06, "loss": 0.7199, "step": 22234 }, { "epoch": 0.57, "grad_norm": 2.513864517211914, "learning_rate": 8.231189745304148e-06, "loss": 0.5778, "step": 22235 }, { "epoch": 0.57, "grad_norm": 2.4376442432403564, "learning_rate": 8.230372698029296e-06, "loss": 0.6151, "step": 22236 }, { "epoch": 0.57, "grad_norm": 2.2990975379943848, "learning_rate": 8.229555662949605e-06, "loss": 0.6014, "step": 22237 }, { "epoch": 0.57, "grad_norm": 1.6982994079589844, "learning_rate": 8.228738640070703e-06, "loss": 0.5209, "step": 22238 }, { "epoch": 0.57, "grad_norm": 1.58586847782135, "learning_rate": 8.227921629398223e-06, "loss": 0.4572, "step": 22239 }, { "epoch": 0.57, "grad_norm": 1.388521432876587, "learning_rate": 8.227104630937792e-06, "loss": 0.6348, "step": 22240 }, { "epoch": 0.57, "grad_norm": 1.3321075439453125, "learning_rate": 8.22628764469504e-06, "loss": 0.5182, "step": 22241 }, { "epoch": 0.57, "grad_norm": 2.167541265487671, "learning_rate": 8.225470670675604e-06, "loss": 0.5951, "step": 22242 }, { "epoch": 0.57, "grad_norm": 2.08937668800354, "learning_rate": 8.224653708885107e-06, "loss": 0.5319, "step": 22243 }, { "epoch": 0.57, "grad_norm": 2.1859846115112305, "learning_rate": 8.223836759329182e-06, "loss": 0.5001, "step": 22244 }, { "epoch": 0.57, "grad_norm": 1.609914779663086, "learning_rate": 8.223019822013457e-06, "loss": 0.6301, "step": 22245 }, { "epoch": 0.57, "grad_norm": 1.2597023248672485, "learning_rate": 8.222202896943565e-06, "loss": 0.5096, "step": 22246 }, { "epoch": 0.57, "grad_norm": 1.4742511510849, "learning_rate": 8.221385984125133e-06, "loss": 0.3648, "step": 22247 }, { "epoch": 0.57, "grad_norm": 4.046901702880859, "learning_rate": 8.220569083563787e-06, "loss": 0.6758, "step": 22248 }, { "epoch": 0.57, "grad_norm": 1.7926355600357056, "learning_rate": 8.219752195265168e-06, "loss": 0.6295, "step": 22249 }, { "epoch": 0.57, "grad_norm": 4.565611839294434, "learning_rate": 8.218935319234893e-06, "loss": 0.5462, "step": 22250 }, { "epoch": 0.57, "grad_norm": 1.3244291543960571, "learning_rate": 8.2181184554786e-06, "loss": 0.4441, "step": 22251 }, { "epoch": 0.57, "grad_norm": 1.5525215864181519, "learning_rate": 8.217301604001913e-06, "loss": 0.5756, "step": 22252 }, { "epoch": 0.57, "grad_norm": 1.1268049478530884, "learning_rate": 8.216484764810464e-06, "loss": 0.608, "step": 22253 }, { "epoch": 0.57, "grad_norm": 1.2917391061782837, "learning_rate": 8.215667937909882e-06, "loss": 0.5224, "step": 22254 }, { "epoch": 0.57, "grad_norm": 1.1115341186523438, "learning_rate": 8.214851123305796e-06, "loss": 0.534, "step": 22255 }, { "epoch": 0.57, "grad_norm": 1.442016839981079, "learning_rate": 8.214034321003834e-06, "loss": 0.5141, "step": 22256 }, { "epoch": 0.57, "grad_norm": 2.061469554901123, "learning_rate": 8.213217531009625e-06, "loss": 0.7054, "step": 22257 }, { "epoch": 0.57, "grad_norm": 6.704794406890869, "learning_rate": 8.212400753328797e-06, "loss": 0.6112, "step": 22258 }, { "epoch": 0.57, "grad_norm": 1.4115678071975708, "learning_rate": 8.211583987966983e-06, "loss": 0.5265, "step": 22259 }, { "epoch": 0.57, "grad_norm": 6.04698371887207, "learning_rate": 8.210767234929805e-06, "loss": 0.6276, "step": 22260 }, { "epoch": 0.57, "grad_norm": 0.9963745474815369, "learning_rate": 8.209950494222899e-06, "loss": 0.3416, "step": 22261 }, { "epoch": 0.57, "grad_norm": 2.1023929119110107, "learning_rate": 8.209133765851886e-06, "loss": 0.6784, "step": 22262 }, { "epoch": 0.57, "grad_norm": 2.060321569442749, "learning_rate": 8.2083170498224e-06, "loss": 0.6179, "step": 22263 }, { "epoch": 0.57, "grad_norm": 2.1775295734405518, "learning_rate": 8.207500346140067e-06, "loss": 0.7732, "step": 22264 }, { "epoch": 0.57, "grad_norm": 2.6447160243988037, "learning_rate": 8.206683654810515e-06, "loss": 0.5613, "step": 22265 }, { "epoch": 0.57, "grad_norm": 1.8517299890518188, "learning_rate": 8.205866975839373e-06, "loss": 0.4856, "step": 22266 }, { "epoch": 0.57, "grad_norm": 3.5984270572662354, "learning_rate": 8.205050309232268e-06, "loss": 0.3965, "step": 22267 }, { "epoch": 0.57, "grad_norm": 1.0078688859939575, "learning_rate": 8.20423365499483e-06, "loss": 0.3726, "step": 22268 }, { "epoch": 0.57, "grad_norm": 2.659978151321411, "learning_rate": 8.203417013132683e-06, "loss": 0.4832, "step": 22269 }, { "epoch": 0.57, "grad_norm": 1.6322115659713745, "learning_rate": 8.202600383651458e-06, "loss": 0.5249, "step": 22270 }, { "epoch": 0.57, "grad_norm": 1.5638529062271118, "learning_rate": 8.201783766556784e-06, "loss": 0.4627, "step": 22271 }, { "epoch": 0.57, "grad_norm": 1.1542624235153198, "learning_rate": 8.200967161854284e-06, "loss": 0.4103, "step": 22272 }, { "epoch": 0.57, "grad_norm": 2.686523199081421, "learning_rate": 8.20015056954959e-06, "loss": 0.7953, "step": 22273 }, { "epoch": 0.57, "grad_norm": 1.4683046340942383, "learning_rate": 8.199333989648327e-06, "loss": 0.5877, "step": 22274 }, { "epoch": 0.57, "grad_norm": 1.577581524848938, "learning_rate": 8.198517422156123e-06, "loss": 0.5333, "step": 22275 }, { "epoch": 0.57, "grad_norm": 2.823153018951416, "learning_rate": 8.197700867078605e-06, "loss": 0.5092, "step": 22276 }, { "epoch": 0.57, "grad_norm": 1.8966314792633057, "learning_rate": 8.196884324421397e-06, "loss": 0.5187, "step": 22277 }, { "epoch": 0.57, "grad_norm": 1.7232345342636108, "learning_rate": 8.196067794190135e-06, "loss": 0.6775, "step": 22278 }, { "epoch": 0.57, "grad_norm": 2.7768476009368896, "learning_rate": 8.195251276390436e-06, "loss": 0.5161, "step": 22279 }, { "epoch": 0.57, "grad_norm": 1.2790685892105103, "learning_rate": 8.194434771027934e-06, "loss": 0.5282, "step": 22280 }, { "epoch": 0.57, "grad_norm": 1.4572166204452515, "learning_rate": 8.193618278108252e-06, "loss": 0.4882, "step": 22281 }, { "epoch": 0.57, "grad_norm": 2.7445952892303467, "learning_rate": 8.192801797637018e-06, "loss": 0.4227, "step": 22282 }, { "epoch": 0.57, "grad_norm": 6.978333950042725, "learning_rate": 8.191985329619859e-06, "loss": 0.6063, "step": 22283 }, { "epoch": 0.57, "grad_norm": 3.8207767009735107, "learning_rate": 8.1911688740624e-06, "loss": 0.4791, "step": 22284 }, { "epoch": 0.57, "grad_norm": 2.6622695922851562, "learning_rate": 8.190352430970271e-06, "loss": 0.6013, "step": 22285 }, { "epoch": 0.57, "grad_norm": 1.2904666662216187, "learning_rate": 8.189536000349095e-06, "loss": 0.4797, "step": 22286 }, { "epoch": 0.57, "grad_norm": 4.654319763183594, "learning_rate": 8.1887195822045e-06, "loss": 0.4975, "step": 22287 }, { "epoch": 0.57, "grad_norm": 1.2193888425827026, "learning_rate": 8.18790317654211e-06, "loss": 0.423, "step": 22288 }, { "epoch": 0.57, "grad_norm": 2.230588912963867, "learning_rate": 8.187086783367552e-06, "loss": 0.5611, "step": 22289 }, { "epoch": 0.57, "grad_norm": 1.1240582466125488, "learning_rate": 8.186270402686457e-06, "loss": 0.5823, "step": 22290 }, { "epoch": 0.57, "grad_norm": 1.6885467767715454, "learning_rate": 8.185454034504443e-06, "loss": 0.5531, "step": 22291 }, { "epoch": 0.57, "grad_norm": 3.4459712505340576, "learning_rate": 8.184637678827143e-06, "loss": 0.6963, "step": 22292 }, { "epoch": 0.57, "grad_norm": 1.3140873908996582, "learning_rate": 8.183821335660179e-06, "loss": 0.5727, "step": 22293 }, { "epoch": 0.57, "grad_norm": 5.152951717376709, "learning_rate": 8.183005005009175e-06, "loss": 0.6846, "step": 22294 }, { "epoch": 0.57, "grad_norm": 0.9547716379165649, "learning_rate": 8.18218868687976e-06, "loss": 0.4765, "step": 22295 }, { "epoch": 0.57, "grad_norm": 6.733241558074951, "learning_rate": 8.181372381277559e-06, "loss": 0.5226, "step": 22296 }, { "epoch": 0.57, "grad_norm": 3.326281785964966, "learning_rate": 8.180556088208197e-06, "loss": 0.4202, "step": 22297 }, { "epoch": 0.57, "grad_norm": 0.9763713479042053, "learning_rate": 8.179739807677298e-06, "loss": 0.4112, "step": 22298 }, { "epoch": 0.57, "grad_norm": 1.7489533424377441, "learning_rate": 8.17892353969049e-06, "loss": 0.5544, "step": 22299 }, { "epoch": 0.57, "grad_norm": 2.409611701965332, "learning_rate": 8.178107284253398e-06, "loss": 0.6266, "step": 22300 }, { "epoch": 0.57, "grad_norm": 1.071614384651184, "learning_rate": 8.177291041371643e-06, "loss": 0.5122, "step": 22301 }, { "epoch": 0.57, "grad_norm": 4.77557897567749, "learning_rate": 8.176474811050856e-06, "loss": 0.4991, "step": 22302 }, { "epoch": 0.57, "grad_norm": 5.026862144470215, "learning_rate": 8.175658593296658e-06, "loss": 0.4283, "step": 22303 }, { "epoch": 0.57, "grad_norm": 1.169556975364685, "learning_rate": 8.174842388114674e-06, "loss": 0.636, "step": 22304 }, { "epoch": 0.57, "grad_norm": 2.07641863822937, "learning_rate": 8.17402619551053e-06, "loss": 0.597, "step": 22305 }, { "epoch": 0.57, "grad_norm": 1.6081591844558716, "learning_rate": 8.173210015489848e-06, "loss": 0.6375, "step": 22306 }, { "epoch": 0.57, "grad_norm": 1.8240474462509155, "learning_rate": 8.17239384805826e-06, "loss": 0.7156, "step": 22307 }, { "epoch": 0.57, "grad_norm": 1.7923871278762817, "learning_rate": 8.171577693221381e-06, "loss": 0.5737, "step": 22308 }, { "epoch": 0.57, "grad_norm": 2.9424526691436768, "learning_rate": 8.170761550984842e-06, "loss": 0.6853, "step": 22309 }, { "epoch": 0.57, "grad_norm": 1.3723727464675903, "learning_rate": 8.169945421354264e-06, "loss": 0.5028, "step": 22310 }, { "epoch": 0.57, "grad_norm": 9.558361053466797, "learning_rate": 8.16912930433527e-06, "loss": 0.6877, "step": 22311 }, { "epoch": 0.57, "grad_norm": 1.956142544746399, "learning_rate": 8.168313199933492e-06, "loss": 0.5889, "step": 22312 }, { "epoch": 0.57, "grad_norm": 1.6371077299118042, "learning_rate": 8.167497108154545e-06, "loss": 0.7923, "step": 22313 }, { "epoch": 0.57, "grad_norm": 3.6295974254608154, "learning_rate": 8.16668102900406e-06, "loss": 0.4486, "step": 22314 }, { "epoch": 0.57, "grad_norm": 1.9023960828781128, "learning_rate": 8.165864962487654e-06, "loss": 0.6578, "step": 22315 }, { "epoch": 0.57, "grad_norm": 3.199907064437866, "learning_rate": 8.165048908610957e-06, "loss": 0.5943, "step": 22316 }, { "epoch": 0.57, "grad_norm": 1.3752057552337646, "learning_rate": 8.164232867379588e-06, "loss": 0.4604, "step": 22317 }, { "epoch": 0.57, "grad_norm": 1.7138844728469849, "learning_rate": 8.163416838799171e-06, "loss": 0.4588, "step": 22318 }, { "epoch": 0.57, "grad_norm": 1.1452534198760986, "learning_rate": 8.162600822875336e-06, "loss": 0.4619, "step": 22319 }, { "epoch": 0.57, "grad_norm": 1.8638286590576172, "learning_rate": 8.161784819613698e-06, "loss": 0.6778, "step": 22320 }, { "epoch": 0.57, "grad_norm": 3.846843957901001, "learning_rate": 8.160968829019888e-06, "loss": 0.6566, "step": 22321 }, { "epoch": 0.57, "grad_norm": 1.677161455154419, "learning_rate": 8.160152851099523e-06, "loss": 0.5495, "step": 22322 }, { "epoch": 0.57, "grad_norm": 1.5984749794006348, "learning_rate": 8.159336885858227e-06, "loss": 0.5612, "step": 22323 }, { "epoch": 0.57, "grad_norm": 1.4107780456542969, "learning_rate": 8.158520933301628e-06, "loss": 0.4503, "step": 22324 }, { "epoch": 0.57, "grad_norm": 3.9096298217773438, "learning_rate": 8.157704993435342e-06, "loss": 0.6252, "step": 22325 }, { "epoch": 0.57, "grad_norm": 2.4488847255706787, "learning_rate": 8.156889066265e-06, "loss": 0.5126, "step": 22326 }, { "epoch": 0.57, "grad_norm": 1.706210970878601, "learning_rate": 8.156073151796218e-06, "loss": 0.4107, "step": 22327 }, { "epoch": 0.57, "grad_norm": 0.9785019159317017, "learning_rate": 8.155257250034623e-06, "loss": 0.5045, "step": 22328 }, { "epoch": 0.57, "grad_norm": 1.3574084043502808, "learning_rate": 8.154441360985835e-06, "loss": 0.6068, "step": 22329 }, { "epoch": 0.57, "grad_norm": 2.2035276889801025, "learning_rate": 8.153625484655477e-06, "loss": 0.5626, "step": 22330 }, { "epoch": 0.57, "grad_norm": 2.1029889583587646, "learning_rate": 8.152809621049174e-06, "loss": 0.5161, "step": 22331 }, { "epoch": 0.57, "grad_norm": 1.4505826234817505, "learning_rate": 8.151993770172544e-06, "loss": 0.437, "step": 22332 }, { "epoch": 0.57, "grad_norm": 1.6538289785385132, "learning_rate": 8.151177932031214e-06, "loss": 0.494, "step": 22333 }, { "epoch": 0.57, "grad_norm": 1.234599232673645, "learning_rate": 8.150362106630803e-06, "loss": 0.4892, "step": 22334 }, { "epoch": 0.57, "grad_norm": 1.0951659679412842, "learning_rate": 8.149546293976933e-06, "loss": 0.6465, "step": 22335 }, { "epoch": 0.57, "grad_norm": 2.2466423511505127, "learning_rate": 8.14873049407523e-06, "loss": 0.6194, "step": 22336 }, { "epoch": 0.57, "grad_norm": 3.3912410736083984, "learning_rate": 8.147914706931313e-06, "loss": 0.4656, "step": 22337 }, { "epoch": 0.57, "grad_norm": 1.2726939916610718, "learning_rate": 8.147098932550805e-06, "loss": 0.5764, "step": 22338 }, { "epoch": 0.57, "grad_norm": 6.436535835266113, "learning_rate": 8.146283170939325e-06, "loss": 0.4742, "step": 22339 }, { "epoch": 0.57, "grad_norm": 2.502255439758301, "learning_rate": 8.1454674221025e-06, "loss": 0.6319, "step": 22340 }, { "epoch": 0.57, "grad_norm": 3.3309459686279297, "learning_rate": 8.144651686045943e-06, "loss": 0.609, "step": 22341 }, { "epoch": 0.57, "grad_norm": 3.8183586597442627, "learning_rate": 8.143835962775284e-06, "loss": 0.4707, "step": 22342 }, { "epoch": 0.57, "grad_norm": 2.056964874267578, "learning_rate": 8.143020252296142e-06, "loss": 0.6017, "step": 22343 }, { "epoch": 0.57, "grad_norm": 1.5190069675445557, "learning_rate": 8.142204554614135e-06, "loss": 0.6873, "step": 22344 }, { "epoch": 0.57, "grad_norm": 1.59722101688385, "learning_rate": 8.141388869734892e-06, "loss": 0.7753, "step": 22345 }, { "epoch": 0.57, "grad_norm": 9.29855728149414, "learning_rate": 8.140573197664024e-06, "loss": 0.3808, "step": 22346 }, { "epoch": 0.57, "grad_norm": 1.5911179780960083, "learning_rate": 8.139757538407159e-06, "loss": 0.5456, "step": 22347 }, { "epoch": 0.57, "grad_norm": 7.173649311065674, "learning_rate": 8.138941891969919e-06, "loss": 0.5273, "step": 22348 }, { "epoch": 0.57, "grad_norm": 2.3595619201660156, "learning_rate": 8.138126258357919e-06, "loss": 0.6517, "step": 22349 }, { "epoch": 0.57, "grad_norm": 4.5681304931640625, "learning_rate": 8.137310637576785e-06, "loss": 0.5819, "step": 22350 }, { "epoch": 0.57, "grad_norm": 12.538080215454102, "learning_rate": 8.136495029632134e-06, "loss": 0.6274, "step": 22351 }, { "epoch": 0.57, "grad_norm": 3.1348001956939697, "learning_rate": 8.13567943452959e-06, "loss": 0.5599, "step": 22352 }, { "epoch": 0.57, "grad_norm": 4.633863925933838, "learning_rate": 8.134863852274773e-06, "loss": 0.5334, "step": 22353 }, { "epoch": 0.57, "grad_norm": 1.3907864093780518, "learning_rate": 8.134048282873299e-06, "loss": 0.6142, "step": 22354 }, { "epoch": 0.57, "grad_norm": 1.681740403175354, "learning_rate": 8.133232726330796e-06, "loss": 0.6788, "step": 22355 }, { "epoch": 0.57, "grad_norm": 1.5411440134048462, "learning_rate": 8.132417182652877e-06, "loss": 0.5836, "step": 22356 }, { "epoch": 0.57, "grad_norm": 2.2784314155578613, "learning_rate": 8.131601651845168e-06, "loss": 0.5111, "step": 22357 }, { "epoch": 0.57, "grad_norm": 0.9856935739517212, "learning_rate": 8.130786133913285e-06, "loss": 0.4266, "step": 22358 }, { "epoch": 0.57, "grad_norm": 1.7809938192367554, "learning_rate": 8.129970628862849e-06, "loss": 0.6027, "step": 22359 }, { "epoch": 0.57, "grad_norm": 1.0518468618392944, "learning_rate": 8.129155136699483e-06, "loss": 0.5787, "step": 22360 }, { "epoch": 0.57, "grad_norm": 1.3826425075531006, "learning_rate": 8.128339657428801e-06, "loss": 0.3978, "step": 22361 }, { "epoch": 0.57, "grad_norm": 1.911399006843567, "learning_rate": 8.12752419105643e-06, "loss": 0.7005, "step": 22362 }, { "epoch": 0.57, "grad_norm": 1.295835018157959, "learning_rate": 8.126708737587983e-06, "loss": 0.4337, "step": 22363 }, { "epoch": 0.57, "grad_norm": 2.8509602546691895, "learning_rate": 8.125893297029083e-06, "loss": 0.6136, "step": 22364 }, { "epoch": 0.57, "grad_norm": 2.050353765487671, "learning_rate": 8.12507786938535e-06, "loss": 0.4858, "step": 22365 }, { "epoch": 0.57, "grad_norm": 1.6853057146072388, "learning_rate": 8.1242624546624e-06, "loss": 0.5507, "step": 22366 }, { "epoch": 0.57, "grad_norm": 1.2798891067504883, "learning_rate": 8.12344705286586e-06, "loss": 0.527, "step": 22367 }, { "epoch": 0.57, "grad_norm": 2.05790376663208, "learning_rate": 8.122631664001339e-06, "loss": 0.5278, "step": 22368 }, { "epoch": 0.57, "grad_norm": 1.6492778062820435, "learning_rate": 8.121816288074463e-06, "loss": 0.4553, "step": 22369 }, { "epoch": 0.57, "grad_norm": 1.613600254058838, "learning_rate": 8.121000925090849e-06, "loss": 0.5956, "step": 22370 }, { "epoch": 0.57, "grad_norm": 1.4976277351379395, "learning_rate": 8.120185575056114e-06, "loss": 0.7168, "step": 22371 }, { "epoch": 0.57, "grad_norm": 2.368162155151367, "learning_rate": 8.119370237975882e-06, "loss": 0.5725, "step": 22372 }, { "epoch": 0.57, "grad_norm": 7.660526275634766, "learning_rate": 8.118554913855767e-06, "loss": 0.5861, "step": 22373 }, { "epoch": 0.57, "grad_norm": 2.163684129714966, "learning_rate": 8.117739602701391e-06, "loss": 0.5938, "step": 22374 }, { "epoch": 0.57, "grad_norm": 0.9560720920562744, "learning_rate": 8.116924304518369e-06, "loss": 0.3969, "step": 22375 }, { "epoch": 0.57, "grad_norm": 1.4687373638153076, "learning_rate": 8.116109019312322e-06, "loss": 0.4489, "step": 22376 }, { "epoch": 0.57, "grad_norm": 1.6096197366714478, "learning_rate": 8.115293747088871e-06, "loss": 0.6161, "step": 22377 }, { "epoch": 0.57, "grad_norm": 5.258591651916504, "learning_rate": 8.114478487853628e-06, "loss": 0.5157, "step": 22378 }, { "epoch": 0.57, "grad_norm": 1.0294981002807617, "learning_rate": 8.113663241612217e-06, "loss": 0.4518, "step": 22379 }, { "epoch": 0.57, "grad_norm": 1.7186633348464966, "learning_rate": 8.112848008370252e-06, "loss": 0.637, "step": 22380 }, { "epoch": 0.57, "grad_norm": 1.5887417793273926, "learning_rate": 8.112032788133355e-06, "loss": 0.3966, "step": 22381 }, { "epoch": 0.57, "grad_norm": 2.0831165313720703, "learning_rate": 8.11121758090714e-06, "loss": 0.497, "step": 22382 }, { "epoch": 0.57, "grad_norm": 5.698422908782959, "learning_rate": 8.110402386697227e-06, "loss": 0.4898, "step": 22383 }, { "epoch": 0.57, "grad_norm": 1.8078982830047607, "learning_rate": 8.109587205509235e-06, "loss": 0.5142, "step": 22384 }, { "epoch": 0.57, "grad_norm": 1.1583318710327148, "learning_rate": 8.108772037348777e-06, "loss": 0.7226, "step": 22385 }, { "epoch": 0.57, "grad_norm": 2.2957983016967773, "learning_rate": 8.107956882221478e-06, "loss": 0.4912, "step": 22386 }, { "epoch": 0.57, "grad_norm": 1.5647363662719727, "learning_rate": 8.107141740132949e-06, "loss": 0.6761, "step": 22387 }, { "epoch": 0.57, "grad_norm": 1.3844032287597656, "learning_rate": 8.106326611088811e-06, "loss": 0.5242, "step": 22388 }, { "epoch": 0.57, "grad_norm": 2.042536973953247, "learning_rate": 8.10551149509468e-06, "loss": 0.5317, "step": 22389 }, { "epoch": 0.57, "grad_norm": 1.394973635673523, "learning_rate": 8.104696392156173e-06, "loss": 0.5613, "step": 22390 }, { "epoch": 0.57, "grad_norm": 1.6130865812301636, "learning_rate": 8.103881302278908e-06, "loss": 0.527, "step": 22391 }, { "epoch": 0.57, "grad_norm": 0.9559298157691956, "learning_rate": 8.103066225468503e-06, "loss": 0.446, "step": 22392 }, { "epoch": 0.57, "grad_norm": 1.2049994468688965, "learning_rate": 8.102251161730572e-06, "loss": 0.5476, "step": 22393 }, { "epoch": 0.57, "grad_norm": 3.097792148590088, "learning_rate": 8.101436111070734e-06, "loss": 0.4476, "step": 22394 }, { "epoch": 0.57, "grad_norm": 2.986673593521118, "learning_rate": 8.100621073494605e-06, "loss": 0.6341, "step": 22395 }, { "epoch": 0.57, "grad_norm": 6.126429080963135, "learning_rate": 8.099806049007806e-06, "loss": 0.7401, "step": 22396 }, { "epoch": 0.57, "grad_norm": 0.9853159785270691, "learning_rate": 8.098991037615946e-06, "loss": 0.3569, "step": 22397 }, { "epoch": 0.57, "grad_norm": 1.6782646179199219, "learning_rate": 8.098176039324649e-06, "loss": 0.5661, "step": 22398 }, { "epoch": 0.57, "grad_norm": 6.02609920501709, "learning_rate": 8.097361054139527e-06, "loss": 0.6579, "step": 22399 }, { "epoch": 0.57, "grad_norm": 1.6407305002212524, "learning_rate": 8.096546082066195e-06, "loss": 0.4622, "step": 22400 }, { "epoch": 0.57, "grad_norm": 1.86026132106781, "learning_rate": 8.095731123110272e-06, "loss": 0.4905, "step": 22401 }, { "epoch": 0.57, "grad_norm": 1.3712960481643677, "learning_rate": 8.094916177277374e-06, "loss": 0.3751, "step": 22402 }, { "epoch": 0.57, "grad_norm": 1.5475016832351685, "learning_rate": 8.094101244573115e-06, "loss": 0.577, "step": 22403 }, { "epoch": 0.57, "grad_norm": 1.5115941762924194, "learning_rate": 8.093286325003117e-06, "loss": 0.5746, "step": 22404 }, { "epoch": 0.57, "grad_norm": 6.253923416137695, "learning_rate": 8.09247141857299e-06, "loss": 0.6273, "step": 22405 }, { "epoch": 0.57, "grad_norm": 2.194204092025757, "learning_rate": 8.091656525288348e-06, "loss": 0.6447, "step": 22406 }, { "epoch": 0.57, "grad_norm": 2.2615244388580322, "learning_rate": 8.090841645154814e-06, "loss": 0.6713, "step": 22407 }, { "epoch": 0.57, "grad_norm": 5.702324867248535, "learning_rate": 8.090026778178e-06, "loss": 0.5872, "step": 22408 }, { "epoch": 0.57, "grad_norm": 1.8040716648101807, "learning_rate": 8.089211924363522e-06, "loss": 0.6875, "step": 22409 }, { "epoch": 0.57, "grad_norm": 1.3220031261444092, "learning_rate": 8.088397083716993e-06, "loss": 0.4412, "step": 22410 }, { "epoch": 0.57, "grad_norm": 12.906220436096191, "learning_rate": 8.08758225624403e-06, "loss": 0.5526, "step": 22411 }, { "epoch": 0.57, "grad_norm": 1.3524123430252075, "learning_rate": 8.08676744195025e-06, "loss": 0.4491, "step": 22412 }, { "epoch": 0.57, "grad_norm": 1.8226943016052246, "learning_rate": 8.085952640841264e-06, "loss": 0.6063, "step": 22413 }, { "epoch": 0.57, "grad_norm": 3.522913694381714, "learning_rate": 8.085137852922694e-06, "loss": 0.5594, "step": 22414 }, { "epoch": 0.57, "grad_norm": 3.7286744117736816, "learning_rate": 8.084323078200148e-06, "loss": 0.516, "step": 22415 }, { "epoch": 0.57, "grad_norm": 1.2442647218704224, "learning_rate": 8.083508316679246e-06, "loss": 0.5737, "step": 22416 }, { "epoch": 0.57, "grad_norm": 5.158330917358398, "learning_rate": 8.082693568365598e-06, "loss": 0.6081, "step": 22417 }, { "epoch": 0.57, "grad_norm": 2.677696943283081, "learning_rate": 8.081878833264823e-06, "loss": 0.5444, "step": 22418 }, { "epoch": 0.57, "grad_norm": 6.452542304992676, "learning_rate": 8.081064111382535e-06, "loss": 0.4135, "step": 22419 }, { "epoch": 0.57, "grad_norm": 1.2980061769485474, "learning_rate": 8.080249402724346e-06, "loss": 0.5271, "step": 22420 }, { "epoch": 0.57, "grad_norm": 1.4218887090682983, "learning_rate": 8.079434707295873e-06, "loss": 0.6318, "step": 22421 }, { "epoch": 0.57, "grad_norm": 1.1235311031341553, "learning_rate": 8.078620025102728e-06, "loss": 0.4285, "step": 22422 }, { "epoch": 0.57, "grad_norm": 7.778475284576416, "learning_rate": 8.077805356150528e-06, "loss": 0.5531, "step": 22423 }, { "epoch": 0.57, "grad_norm": 4.227255344390869, "learning_rate": 8.076990700444885e-06, "loss": 0.7649, "step": 22424 }, { "epoch": 0.57, "grad_norm": 1.6910256147384644, "learning_rate": 8.076176057991413e-06, "loss": 0.6309, "step": 22425 }, { "epoch": 0.57, "grad_norm": 1.443021535873413, "learning_rate": 8.075361428795729e-06, "loss": 0.5978, "step": 22426 }, { "epoch": 0.57, "grad_norm": 1.041608452796936, "learning_rate": 8.074546812863443e-06, "loss": 0.5196, "step": 22427 }, { "epoch": 0.57, "grad_norm": 1.441433072090149, "learning_rate": 8.073732210200172e-06, "loss": 0.4342, "step": 22428 }, { "epoch": 0.57, "grad_norm": 3.777076482772827, "learning_rate": 8.072917620811528e-06, "loss": 0.7292, "step": 22429 }, { "epoch": 0.57, "grad_norm": 1.5465773344039917, "learning_rate": 8.072103044703123e-06, "loss": 0.4514, "step": 22430 }, { "epoch": 0.57, "grad_norm": 2.1477460861206055, "learning_rate": 8.071288481880576e-06, "loss": 0.4928, "step": 22431 }, { "epoch": 0.57, "grad_norm": 1.6034022569656372, "learning_rate": 8.070473932349497e-06, "loss": 0.6312, "step": 22432 }, { "epoch": 0.57, "grad_norm": 2.3158016204833984, "learning_rate": 8.069659396115498e-06, "loss": 0.3759, "step": 22433 }, { "epoch": 0.57, "grad_norm": 1.5862689018249512, "learning_rate": 8.068844873184193e-06, "loss": 0.5089, "step": 22434 }, { "epoch": 0.58, "grad_norm": 0.9462985992431641, "learning_rate": 8.068030363561196e-06, "loss": 0.517, "step": 22435 }, { "epoch": 0.58, "grad_norm": 1.3895596265792847, "learning_rate": 8.067215867252122e-06, "loss": 0.607, "step": 22436 }, { "epoch": 0.58, "grad_norm": 1.103493571281433, "learning_rate": 8.066401384262579e-06, "loss": 0.4674, "step": 22437 }, { "epoch": 0.58, "grad_norm": 1.9642022848129272, "learning_rate": 8.065586914598187e-06, "loss": 0.4455, "step": 22438 }, { "epoch": 0.58, "grad_norm": 1.7019973993301392, "learning_rate": 8.06477245826455e-06, "loss": 0.5016, "step": 22439 }, { "epoch": 0.58, "grad_norm": 1.5722295045852661, "learning_rate": 8.063958015267289e-06, "loss": 0.383, "step": 22440 }, { "epoch": 0.58, "grad_norm": 15.366128921508789, "learning_rate": 8.063143585612012e-06, "loss": 0.6997, "step": 22441 }, { "epoch": 0.58, "grad_norm": 2.575052499771118, "learning_rate": 8.06232916930433e-06, "loss": 0.6692, "step": 22442 }, { "epoch": 0.58, "grad_norm": 1.4945049285888672, "learning_rate": 8.061514766349861e-06, "loss": 0.6468, "step": 22443 }, { "epoch": 0.58, "grad_norm": 1.5145307779312134, "learning_rate": 8.060700376754213e-06, "loss": 0.5213, "step": 22444 }, { "epoch": 0.58, "grad_norm": 2.7697319984436035, "learning_rate": 8.059886000523001e-06, "loss": 0.5323, "step": 22445 }, { "epoch": 0.58, "grad_norm": 1.3090547323226929, "learning_rate": 8.059071637661833e-06, "loss": 0.5723, "step": 22446 }, { "epoch": 0.58, "grad_norm": 2.126586437225342, "learning_rate": 8.058257288176326e-06, "loss": 0.4115, "step": 22447 }, { "epoch": 0.58, "grad_norm": 1.7770540714263916, "learning_rate": 8.05744295207209e-06, "loss": 0.5967, "step": 22448 }, { "epoch": 0.58, "grad_norm": 1.0864934921264648, "learning_rate": 8.056628629354736e-06, "loss": 0.5806, "step": 22449 }, { "epoch": 0.58, "grad_norm": 1.3964717388153076, "learning_rate": 8.055814320029878e-06, "loss": 0.5202, "step": 22450 }, { "epoch": 0.58, "grad_norm": 12.052990913391113, "learning_rate": 8.055000024103124e-06, "loss": 0.5855, "step": 22451 }, { "epoch": 0.58, "grad_norm": 5.441343784332275, "learning_rate": 8.05418574158009e-06, "loss": 0.6291, "step": 22452 }, { "epoch": 0.58, "grad_norm": 1.5829375982284546, "learning_rate": 8.053371472466383e-06, "loss": 0.5845, "step": 22453 }, { "epoch": 0.58, "grad_norm": 1.5727206468582153, "learning_rate": 8.052557216767617e-06, "loss": 0.5662, "step": 22454 }, { "epoch": 0.58, "grad_norm": 1.0388885736465454, "learning_rate": 8.051742974489406e-06, "loss": 0.3172, "step": 22455 }, { "epoch": 0.58, "grad_norm": 1.722682237625122, "learning_rate": 8.050928745637357e-06, "loss": 0.4157, "step": 22456 }, { "epoch": 0.58, "grad_norm": 1.6168491840362549, "learning_rate": 8.050114530217083e-06, "loss": 0.6285, "step": 22457 }, { "epoch": 0.58, "grad_norm": 1.7151036262512207, "learning_rate": 8.049300328234192e-06, "loss": 0.5302, "step": 22458 }, { "epoch": 0.58, "grad_norm": 1.2771767377853394, "learning_rate": 8.0484861396943e-06, "loss": 0.5498, "step": 22459 }, { "epoch": 0.58, "grad_norm": 1.6948447227478027, "learning_rate": 8.047671964603016e-06, "loss": 0.7505, "step": 22460 }, { "epoch": 0.58, "grad_norm": 1.4868247509002686, "learning_rate": 8.046857802965948e-06, "loss": 0.5464, "step": 22461 }, { "epoch": 0.58, "grad_norm": 1.208075761795044, "learning_rate": 8.046043654788711e-06, "loss": 0.529, "step": 22462 }, { "epoch": 0.58, "grad_norm": 1.7553144693374634, "learning_rate": 8.045229520076911e-06, "loss": 0.5245, "step": 22463 }, { "epoch": 0.58, "grad_norm": 1.5395755767822266, "learning_rate": 8.044415398836164e-06, "loss": 0.4839, "step": 22464 }, { "epoch": 0.58, "grad_norm": 1.412696123123169, "learning_rate": 8.043601291072076e-06, "loss": 0.5498, "step": 22465 }, { "epoch": 0.58, "grad_norm": 7.142147064208984, "learning_rate": 8.042787196790258e-06, "loss": 0.6094, "step": 22466 }, { "epoch": 0.58, "grad_norm": 2.3084020614624023, "learning_rate": 8.041973115996324e-06, "loss": 0.5904, "step": 22467 }, { "epoch": 0.58, "grad_norm": 1.8305304050445557, "learning_rate": 8.041159048695876e-06, "loss": 0.5612, "step": 22468 }, { "epoch": 0.58, "grad_norm": 1.8696523904800415, "learning_rate": 8.040344994894534e-06, "loss": 0.4505, "step": 22469 }, { "epoch": 0.58, "grad_norm": 1.5192338228225708, "learning_rate": 8.0395309545979e-06, "loss": 0.4142, "step": 22470 }, { "epoch": 0.58, "grad_norm": 9.135268211364746, "learning_rate": 8.038716927811588e-06, "loss": 0.5398, "step": 22471 }, { "epoch": 0.58, "grad_norm": 2.821511745452881, "learning_rate": 8.037902914541209e-06, "loss": 0.3924, "step": 22472 }, { "epoch": 0.58, "grad_norm": 1.6346725225448608, "learning_rate": 8.037088914792368e-06, "loss": 0.4038, "step": 22473 }, { "epoch": 0.58, "grad_norm": 8.119145393371582, "learning_rate": 8.036274928570678e-06, "loss": 0.5989, "step": 22474 }, { "epoch": 0.58, "grad_norm": 1.4334217309951782, "learning_rate": 8.035460955881746e-06, "loss": 0.564, "step": 22475 }, { "epoch": 0.58, "grad_norm": 1.2257776260375977, "learning_rate": 8.034646996731186e-06, "loss": 0.4451, "step": 22476 }, { "epoch": 0.58, "grad_norm": 1.7908856868743896, "learning_rate": 8.0338330511246e-06, "loss": 0.5762, "step": 22477 }, { "epoch": 0.58, "grad_norm": 2.4465553760528564, "learning_rate": 8.033019119067602e-06, "loss": 0.5917, "step": 22478 }, { "epoch": 0.58, "grad_norm": 2.0274300575256348, "learning_rate": 8.032205200565804e-06, "loss": 0.5598, "step": 22479 }, { "epoch": 0.58, "grad_norm": 6.17031717300415, "learning_rate": 8.031391295624808e-06, "loss": 0.6738, "step": 22480 }, { "epoch": 0.58, "grad_norm": 1.7632569074630737, "learning_rate": 8.030577404250228e-06, "loss": 0.5886, "step": 22481 }, { "epoch": 0.58, "grad_norm": 1.3539867401123047, "learning_rate": 8.029763526447671e-06, "loss": 0.5843, "step": 22482 }, { "epoch": 0.58, "grad_norm": 1.1433780193328857, "learning_rate": 8.028949662222743e-06, "loss": 0.4812, "step": 22483 }, { "epoch": 0.58, "grad_norm": 2.1016461849212646, "learning_rate": 8.02813581158106e-06, "loss": 0.6283, "step": 22484 }, { "epoch": 0.58, "grad_norm": 7.35927677154541, "learning_rate": 8.027321974528224e-06, "loss": 0.7042, "step": 22485 }, { "epoch": 0.58, "grad_norm": 1.1771297454833984, "learning_rate": 8.026508151069848e-06, "loss": 0.5319, "step": 22486 }, { "epoch": 0.58, "grad_norm": 6.059714317321777, "learning_rate": 8.025694341211534e-06, "loss": 0.698, "step": 22487 }, { "epoch": 0.58, "grad_norm": 1.7625046968460083, "learning_rate": 8.024880544958894e-06, "loss": 0.5852, "step": 22488 }, { "epoch": 0.58, "grad_norm": 2.1724531650543213, "learning_rate": 8.024066762317542e-06, "loss": 0.6083, "step": 22489 }, { "epoch": 0.58, "grad_norm": 2.185978412628174, "learning_rate": 8.023252993293076e-06, "loss": 0.579, "step": 22490 }, { "epoch": 0.58, "grad_norm": 1.6707667112350464, "learning_rate": 8.02243923789111e-06, "loss": 0.4534, "step": 22491 }, { "epoch": 0.58, "grad_norm": 2.7787890434265137, "learning_rate": 8.02162549611725e-06, "loss": 0.8266, "step": 22492 }, { "epoch": 0.58, "grad_norm": 2.3482916355133057, "learning_rate": 8.020811767977105e-06, "loss": 0.5204, "step": 22493 }, { "epoch": 0.58, "grad_norm": 1.962372899055481, "learning_rate": 8.019998053476279e-06, "loss": 0.5207, "step": 22494 }, { "epoch": 0.58, "grad_norm": 3.344808340072632, "learning_rate": 8.019184352620384e-06, "loss": 0.6005, "step": 22495 }, { "epoch": 0.58, "grad_norm": 1.4108749628067017, "learning_rate": 8.018370665415027e-06, "loss": 0.5465, "step": 22496 }, { "epoch": 0.58, "grad_norm": 1.480731725692749, "learning_rate": 8.017556991865814e-06, "loss": 0.4635, "step": 22497 }, { "epoch": 0.58, "grad_norm": 1.70001220703125, "learning_rate": 8.016743331978353e-06, "loss": 0.6397, "step": 22498 }, { "epoch": 0.58, "grad_norm": 1.3528434038162231, "learning_rate": 8.015929685758248e-06, "loss": 0.487, "step": 22499 }, { "epoch": 0.58, "grad_norm": 1.5766171216964722, "learning_rate": 8.015116053211112e-06, "loss": 0.4625, "step": 22500 }, { "epoch": 0.58, "grad_norm": 1.4027652740478516, "learning_rate": 8.01430243434255e-06, "loss": 0.7755, "step": 22501 }, { "epoch": 0.58, "grad_norm": 1.6447598934173584, "learning_rate": 8.013488829158166e-06, "loss": 0.613, "step": 22502 }, { "epoch": 0.58, "grad_norm": 1.5756638050079346, "learning_rate": 8.01267523766357e-06, "loss": 0.4855, "step": 22503 }, { "epoch": 0.58, "grad_norm": 3.454650640487671, "learning_rate": 8.011861659864367e-06, "loss": 0.4778, "step": 22504 }, { "epoch": 0.58, "grad_norm": 5.4728217124938965, "learning_rate": 8.011048095766165e-06, "loss": 0.6093, "step": 22505 }, { "epoch": 0.58, "grad_norm": 8.503570556640625, "learning_rate": 8.010234545374568e-06, "loss": 0.4553, "step": 22506 }, { "epoch": 0.58, "grad_norm": 0.9445537328720093, "learning_rate": 8.009421008695187e-06, "loss": 0.4021, "step": 22507 }, { "epoch": 0.58, "grad_norm": 1.431449294090271, "learning_rate": 8.008607485733626e-06, "loss": 0.4594, "step": 22508 }, { "epoch": 0.58, "grad_norm": 1.8182109594345093, "learning_rate": 8.007793976495488e-06, "loss": 0.5428, "step": 22509 }, { "epoch": 0.58, "grad_norm": 1.1450132131576538, "learning_rate": 8.006980480986385e-06, "loss": 0.5946, "step": 22510 }, { "epoch": 0.58, "grad_norm": 2.375816822052002, "learning_rate": 8.00616699921192e-06, "loss": 0.7745, "step": 22511 }, { "epoch": 0.58, "grad_norm": 1.3417294025421143, "learning_rate": 8.005353531177698e-06, "loss": 0.5242, "step": 22512 }, { "epoch": 0.58, "grad_norm": 3.1319327354431152, "learning_rate": 8.004540076889328e-06, "loss": 0.7697, "step": 22513 }, { "epoch": 0.58, "grad_norm": 3.650763750076294, "learning_rate": 8.003726636352413e-06, "loss": 0.4648, "step": 22514 }, { "epoch": 0.58, "grad_norm": 6.702761173248291, "learning_rate": 8.002913209572561e-06, "loss": 0.677, "step": 22515 }, { "epoch": 0.58, "grad_norm": 4.37895393371582, "learning_rate": 8.002099796555375e-06, "loss": 0.4854, "step": 22516 }, { "epoch": 0.58, "grad_norm": 1.5257219076156616, "learning_rate": 8.001286397306465e-06, "loss": 0.5889, "step": 22517 }, { "epoch": 0.58, "grad_norm": 3.634831666946411, "learning_rate": 8.00047301183143e-06, "loss": 0.4813, "step": 22518 }, { "epoch": 0.58, "grad_norm": 31.873687744140625, "learning_rate": 7.999659640135878e-06, "loss": 0.487, "step": 22519 }, { "epoch": 0.58, "grad_norm": 5.345081329345703, "learning_rate": 7.99884628222542e-06, "loss": 0.6496, "step": 22520 }, { "epoch": 0.58, "grad_norm": 1.4405423402786255, "learning_rate": 7.998032938105651e-06, "loss": 0.4823, "step": 22521 }, { "epoch": 0.58, "grad_norm": 2.3120486736297607, "learning_rate": 7.997219607782185e-06, "loss": 0.5063, "step": 22522 }, { "epoch": 0.58, "grad_norm": 1.548514485359192, "learning_rate": 7.99640629126062e-06, "loss": 0.4844, "step": 22523 }, { "epoch": 0.58, "grad_norm": 1.108392596244812, "learning_rate": 7.995592988546566e-06, "loss": 0.5585, "step": 22524 }, { "epoch": 0.58, "grad_norm": 1.300718069076538, "learning_rate": 7.994779699645626e-06, "loss": 0.4384, "step": 22525 }, { "epoch": 0.58, "grad_norm": 2.5221633911132812, "learning_rate": 7.993966424563403e-06, "loss": 0.563, "step": 22526 }, { "epoch": 0.58, "grad_norm": 1.5931774377822876, "learning_rate": 7.993153163305504e-06, "loss": 0.5961, "step": 22527 }, { "epoch": 0.58, "grad_norm": 1.5468138456344604, "learning_rate": 7.992339915877534e-06, "loss": 0.602, "step": 22528 }, { "epoch": 0.58, "grad_norm": 2.3448312282562256, "learning_rate": 7.991526682285092e-06, "loss": 0.6804, "step": 22529 }, { "epoch": 0.58, "grad_norm": 1.8327513933181763, "learning_rate": 7.99071346253379e-06, "loss": 0.4569, "step": 22530 }, { "epoch": 0.58, "grad_norm": 1.3966211080551147, "learning_rate": 7.989900256629227e-06, "loss": 0.5914, "step": 22531 }, { "epoch": 0.58, "grad_norm": 1.552115559577942, "learning_rate": 7.98908706457701e-06, "loss": 0.6189, "step": 22532 }, { "epoch": 0.58, "grad_norm": 5.158538818359375, "learning_rate": 7.988273886382741e-06, "loss": 0.691, "step": 22533 }, { "epoch": 0.58, "grad_norm": 1.5445655584335327, "learning_rate": 7.987460722052025e-06, "loss": 0.655, "step": 22534 }, { "epoch": 0.58, "grad_norm": 3.219181776046753, "learning_rate": 7.986647571590463e-06, "loss": 0.6876, "step": 22535 }, { "epoch": 0.58, "grad_norm": 2.0918617248535156, "learning_rate": 7.98583443500366e-06, "loss": 0.6055, "step": 22536 }, { "epoch": 0.58, "grad_norm": 4.795582294464111, "learning_rate": 7.985021312297226e-06, "loss": 0.5023, "step": 22537 }, { "epoch": 0.58, "grad_norm": 4.335300922393799, "learning_rate": 7.984208203476752e-06, "loss": 0.6869, "step": 22538 }, { "epoch": 0.58, "grad_norm": 0.8952757120132446, "learning_rate": 7.983395108547855e-06, "loss": 0.5514, "step": 22539 }, { "epoch": 0.58, "grad_norm": 1.5448274612426758, "learning_rate": 7.982582027516129e-06, "loss": 0.5961, "step": 22540 }, { "epoch": 0.58, "grad_norm": 2.8949480056762695, "learning_rate": 7.98176896038718e-06, "loss": 0.5777, "step": 22541 }, { "epoch": 0.58, "grad_norm": 2.4288086891174316, "learning_rate": 7.980955907166614e-06, "loss": 0.7026, "step": 22542 }, { "epoch": 0.58, "grad_norm": 1.3198044300079346, "learning_rate": 7.980142867860029e-06, "loss": 0.54, "step": 22543 }, { "epoch": 0.58, "grad_norm": 4.8927507400512695, "learning_rate": 7.979329842473031e-06, "loss": 0.5868, "step": 22544 }, { "epoch": 0.58, "grad_norm": 1.6372920274734497, "learning_rate": 7.978516831011223e-06, "loss": 0.5289, "step": 22545 }, { "epoch": 0.58, "grad_norm": 1.544932246208191, "learning_rate": 7.977703833480206e-06, "loss": 0.5876, "step": 22546 }, { "epoch": 0.58, "grad_norm": 1.2494887113571167, "learning_rate": 7.976890849885584e-06, "loss": 0.5217, "step": 22547 }, { "epoch": 0.58, "grad_norm": 6.9093098640441895, "learning_rate": 7.976077880232958e-06, "loss": 0.6472, "step": 22548 }, { "epoch": 0.58, "grad_norm": 3.216959238052368, "learning_rate": 7.975264924527934e-06, "loss": 0.6711, "step": 22549 }, { "epoch": 0.58, "grad_norm": 7.020009517669678, "learning_rate": 7.974451982776108e-06, "loss": 0.5685, "step": 22550 }, { "epoch": 0.58, "grad_norm": 1.7199501991271973, "learning_rate": 7.973639054983092e-06, "loss": 0.3623, "step": 22551 }, { "epoch": 0.58, "grad_norm": 1.4636099338531494, "learning_rate": 7.972826141154478e-06, "loss": 0.5717, "step": 22552 }, { "epoch": 0.58, "grad_norm": 1.7828730344772339, "learning_rate": 7.972013241295875e-06, "loss": 0.5282, "step": 22553 }, { "epoch": 0.58, "grad_norm": 1.5437281131744385, "learning_rate": 7.971200355412883e-06, "loss": 0.5436, "step": 22554 }, { "epoch": 0.58, "grad_norm": 7.319361209869385, "learning_rate": 7.970387483511101e-06, "loss": 0.5913, "step": 22555 }, { "epoch": 0.58, "grad_norm": 4.163303852081299, "learning_rate": 7.969574625596136e-06, "loss": 0.6403, "step": 22556 }, { "epoch": 0.58, "grad_norm": 1.2928967475891113, "learning_rate": 7.968761781673585e-06, "loss": 0.5901, "step": 22557 }, { "epoch": 0.58, "grad_norm": 3.2265937328338623, "learning_rate": 7.967948951749054e-06, "loss": 0.5452, "step": 22558 }, { "epoch": 0.58, "grad_norm": 5.230465412139893, "learning_rate": 7.967136135828139e-06, "loss": 0.6081, "step": 22559 }, { "epoch": 0.58, "grad_norm": 2.2701916694641113, "learning_rate": 7.966323333916446e-06, "loss": 0.7333, "step": 22560 }, { "epoch": 0.58, "grad_norm": 14.625689506530762, "learning_rate": 7.965510546019577e-06, "loss": 0.5252, "step": 22561 }, { "epoch": 0.58, "grad_norm": 1.4879133701324463, "learning_rate": 7.96469777214313e-06, "loss": 0.585, "step": 22562 }, { "epoch": 0.58, "grad_norm": 1.099544882774353, "learning_rate": 7.963885012292705e-06, "loss": 0.5364, "step": 22563 }, { "epoch": 0.58, "grad_norm": 1.8014129400253296, "learning_rate": 7.963072266473907e-06, "loss": 0.6726, "step": 22564 }, { "epoch": 0.58, "grad_norm": 2.183173418045044, "learning_rate": 7.962259534692334e-06, "loss": 0.4603, "step": 22565 }, { "epoch": 0.58, "grad_norm": 1.3227604627609253, "learning_rate": 7.961446816953592e-06, "loss": 0.4567, "step": 22566 }, { "epoch": 0.58, "grad_norm": 1.5547518730163574, "learning_rate": 7.960634113263273e-06, "loss": 0.6707, "step": 22567 }, { "epoch": 0.58, "grad_norm": 1.9737331867218018, "learning_rate": 7.959821423626985e-06, "loss": 0.5192, "step": 22568 }, { "epoch": 0.58, "grad_norm": 6.632327079772949, "learning_rate": 7.959008748050323e-06, "loss": 0.6048, "step": 22569 }, { "epoch": 0.58, "grad_norm": 0.9956133961677551, "learning_rate": 7.958196086538895e-06, "loss": 0.5495, "step": 22570 }, { "epoch": 0.58, "grad_norm": 0.9323713779449463, "learning_rate": 7.957383439098295e-06, "loss": 0.5998, "step": 22571 }, { "epoch": 0.58, "grad_norm": 3.538741111755371, "learning_rate": 7.956570805734124e-06, "loss": 0.5561, "step": 22572 }, { "epoch": 0.58, "grad_norm": 2.370948553085327, "learning_rate": 7.955758186451983e-06, "loss": 0.7227, "step": 22573 }, { "epoch": 0.58, "grad_norm": 0.9517052173614502, "learning_rate": 7.954945581257473e-06, "loss": 0.4772, "step": 22574 }, { "epoch": 0.58, "grad_norm": 1.4541131258010864, "learning_rate": 7.954132990156194e-06, "loss": 0.585, "step": 22575 }, { "epoch": 0.58, "grad_norm": 1.622783899307251, "learning_rate": 7.953320413153744e-06, "loss": 0.4096, "step": 22576 }, { "epoch": 0.58, "grad_norm": 3.888551950454712, "learning_rate": 7.952507850255723e-06, "loss": 0.778, "step": 22577 }, { "epoch": 0.58, "grad_norm": 7.751514911651611, "learning_rate": 7.951695301467735e-06, "loss": 0.6223, "step": 22578 }, { "epoch": 0.58, "grad_norm": 1.9455420970916748, "learning_rate": 7.950882766795372e-06, "loss": 0.6911, "step": 22579 }, { "epoch": 0.58, "grad_norm": 1.6436318159103394, "learning_rate": 7.950070246244241e-06, "loss": 0.4512, "step": 22580 }, { "epoch": 0.58, "grad_norm": 1.0094738006591797, "learning_rate": 7.949257739819936e-06, "loss": 0.4325, "step": 22581 }, { "epoch": 0.58, "grad_norm": 1.1221203804016113, "learning_rate": 7.948445247528057e-06, "loss": 0.4469, "step": 22582 }, { "epoch": 0.58, "grad_norm": 1.2627251148223877, "learning_rate": 7.947632769374208e-06, "loss": 0.4216, "step": 22583 }, { "epoch": 0.58, "grad_norm": 2.535684585571289, "learning_rate": 7.946820305363981e-06, "loss": 0.5698, "step": 22584 }, { "epoch": 0.58, "grad_norm": 2.4234421253204346, "learning_rate": 7.946007855502983e-06, "loss": 0.606, "step": 22585 }, { "epoch": 0.58, "grad_norm": 1.2125030755996704, "learning_rate": 7.945195419796804e-06, "loss": 0.5045, "step": 22586 }, { "epoch": 0.58, "grad_norm": 2.472877025604248, "learning_rate": 7.94438299825105e-06, "loss": 0.6248, "step": 22587 }, { "epoch": 0.58, "grad_norm": 2.1558964252471924, "learning_rate": 7.943570590871315e-06, "loss": 0.5303, "step": 22588 }, { "epoch": 0.58, "grad_norm": 4.365184307098389, "learning_rate": 7.942758197663198e-06, "loss": 0.6638, "step": 22589 }, { "epoch": 0.58, "grad_norm": 1.3233522176742554, "learning_rate": 7.941945818632304e-06, "loss": 0.4356, "step": 22590 }, { "epoch": 0.58, "grad_norm": 5.865708351135254, "learning_rate": 7.941133453784221e-06, "loss": 0.7739, "step": 22591 }, { "epoch": 0.58, "grad_norm": 1.5160396099090576, "learning_rate": 7.940321103124557e-06, "loss": 0.6178, "step": 22592 }, { "epoch": 0.58, "grad_norm": 2.3212263584136963, "learning_rate": 7.939508766658905e-06, "loss": 0.6582, "step": 22593 }, { "epoch": 0.58, "grad_norm": 2.062405824661255, "learning_rate": 7.93869644439286e-06, "loss": 0.4524, "step": 22594 }, { "epoch": 0.58, "grad_norm": 1.622628927230835, "learning_rate": 7.93788413633203e-06, "loss": 0.6086, "step": 22595 }, { "epoch": 0.58, "grad_norm": 3.421830654144287, "learning_rate": 7.937071842482003e-06, "loss": 0.4786, "step": 22596 }, { "epoch": 0.58, "grad_norm": 1.3694159984588623, "learning_rate": 7.936259562848386e-06, "loss": 0.4582, "step": 22597 }, { "epoch": 0.58, "grad_norm": 2.5940427780151367, "learning_rate": 7.935447297436765e-06, "loss": 0.5684, "step": 22598 }, { "epoch": 0.58, "grad_norm": 1.0002927780151367, "learning_rate": 7.934635046252748e-06, "loss": 0.5472, "step": 22599 }, { "epoch": 0.58, "grad_norm": 1.3491274118423462, "learning_rate": 7.933822809301928e-06, "loss": 0.5877, "step": 22600 }, { "epoch": 0.58, "grad_norm": 1.8710517883300781, "learning_rate": 7.9330105865899e-06, "loss": 0.6281, "step": 22601 }, { "epoch": 0.58, "grad_norm": 1.1389296054840088, "learning_rate": 7.932198378122268e-06, "loss": 0.5371, "step": 22602 }, { "epoch": 0.58, "grad_norm": 1.9450525045394897, "learning_rate": 7.931386183904626e-06, "loss": 0.54, "step": 22603 }, { "epoch": 0.58, "grad_norm": 3.199469804763794, "learning_rate": 7.930574003942571e-06, "loss": 0.4951, "step": 22604 }, { "epoch": 0.58, "grad_norm": 8.355208396911621, "learning_rate": 7.929761838241695e-06, "loss": 0.7385, "step": 22605 }, { "epoch": 0.58, "grad_norm": 1.7241848707199097, "learning_rate": 7.928949686807604e-06, "loss": 0.5726, "step": 22606 }, { "epoch": 0.58, "grad_norm": 1.4255290031433105, "learning_rate": 7.92813754964589e-06, "loss": 0.5575, "step": 22607 }, { "epoch": 0.58, "grad_norm": 1.3263905048370361, "learning_rate": 7.927325426762151e-06, "loss": 0.5292, "step": 22608 }, { "epoch": 0.58, "grad_norm": 1.544312596321106, "learning_rate": 7.926513318161982e-06, "loss": 0.4791, "step": 22609 }, { "epoch": 0.58, "grad_norm": 1.6630668640136719, "learning_rate": 7.92570122385098e-06, "loss": 0.7593, "step": 22610 }, { "epoch": 0.58, "grad_norm": 1.3941868543624878, "learning_rate": 7.924889143834744e-06, "loss": 0.5538, "step": 22611 }, { "epoch": 0.58, "grad_norm": 1.4432529211044312, "learning_rate": 7.924077078118865e-06, "loss": 0.5878, "step": 22612 }, { "epoch": 0.58, "grad_norm": 1.8560535907745361, "learning_rate": 7.923265026708945e-06, "loss": 0.6422, "step": 22613 }, { "epoch": 0.58, "grad_norm": 2.0103323459625244, "learning_rate": 7.922452989610579e-06, "loss": 0.5739, "step": 22614 }, { "epoch": 0.58, "grad_norm": 2.440972328186035, "learning_rate": 7.921640966829357e-06, "loss": 0.6062, "step": 22615 }, { "epoch": 0.58, "grad_norm": 1.1957601308822632, "learning_rate": 7.920828958370883e-06, "loss": 0.4892, "step": 22616 }, { "epoch": 0.58, "grad_norm": 1.4992822408676147, "learning_rate": 7.92001696424075e-06, "loss": 0.5159, "step": 22617 }, { "epoch": 0.58, "grad_norm": 10.986348152160645, "learning_rate": 7.91920498444455e-06, "loss": 0.6953, "step": 22618 }, { "epoch": 0.58, "grad_norm": 1.5405778884887695, "learning_rate": 7.918393018987886e-06, "loss": 0.5457, "step": 22619 }, { "epoch": 0.58, "grad_norm": 7.349771499633789, "learning_rate": 7.917581067876348e-06, "loss": 0.4294, "step": 22620 }, { "epoch": 0.58, "grad_norm": 1.6118874549865723, "learning_rate": 7.916769131115534e-06, "loss": 0.6425, "step": 22621 }, { "epoch": 0.58, "grad_norm": 3.7668097019195557, "learning_rate": 7.915957208711037e-06, "loss": 0.7181, "step": 22622 }, { "epoch": 0.58, "grad_norm": 4.579023361206055, "learning_rate": 7.915145300668454e-06, "loss": 0.4376, "step": 22623 }, { "epoch": 0.58, "grad_norm": 0.9837808012962341, "learning_rate": 7.91433340699338e-06, "loss": 0.4304, "step": 22624 }, { "epoch": 0.58, "grad_norm": 2.3917994499206543, "learning_rate": 7.913521527691408e-06, "loss": 0.5477, "step": 22625 }, { "epoch": 0.58, "grad_norm": 3.5394487380981445, "learning_rate": 7.912709662768137e-06, "loss": 0.7024, "step": 22626 }, { "epoch": 0.58, "grad_norm": 1.5161930322647095, "learning_rate": 7.911897812229158e-06, "loss": 0.6469, "step": 22627 }, { "epoch": 0.58, "grad_norm": 1.4675264358520508, "learning_rate": 7.91108597608007e-06, "loss": 0.7402, "step": 22628 }, { "epoch": 0.58, "grad_norm": 1.842298984527588, "learning_rate": 7.910274154326462e-06, "loss": 0.3994, "step": 22629 }, { "epoch": 0.58, "grad_norm": 1.4300106763839722, "learning_rate": 7.909462346973932e-06, "loss": 0.6871, "step": 22630 }, { "epoch": 0.58, "grad_norm": 5.010676383972168, "learning_rate": 7.908650554028077e-06, "loss": 0.5892, "step": 22631 }, { "epoch": 0.58, "grad_norm": 0.7821966409683228, "learning_rate": 7.907838775494486e-06, "loss": 0.4813, "step": 22632 }, { "epoch": 0.58, "grad_norm": 6.817914962768555, "learning_rate": 7.907027011378756e-06, "loss": 0.63, "step": 22633 }, { "epoch": 0.58, "grad_norm": 5.991556167602539, "learning_rate": 7.90621526168648e-06, "loss": 0.4339, "step": 22634 }, { "epoch": 0.58, "grad_norm": 9.670144081115723, "learning_rate": 7.905403526423254e-06, "loss": 0.4151, "step": 22635 }, { "epoch": 0.58, "grad_norm": 4.078619003295898, "learning_rate": 7.904591805594672e-06, "loss": 0.5585, "step": 22636 }, { "epoch": 0.58, "grad_norm": 1.8804877996444702, "learning_rate": 7.903780099206326e-06, "loss": 0.4451, "step": 22637 }, { "epoch": 0.58, "grad_norm": 1.5155953168869019, "learning_rate": 7.90296840726381e-06, "loss": 0.5499, "step": 22638 }, { "epoch": 0.58, "grad_norm": 1.9600427150726318, "learning_rate": 7.902156729772718e-06, "loss": 0.6526, "step": 22639 }, { "epoch": 0.58, "grad_norm": 6.3951520919799805, "learning_rate": 7.901345066738646e-06, "loss": 0.5423, "step": 22640 }, { "epoch": 0.58, "grad_norm": 2.4182040691375732, "learning_rate": 7.900533418167186e-06, "loss": 0.574, "step": 22641 }, { "epoch": 0.58, "grad_norm": 1.1284998655319214, "learning_rate": 7.899721784063929e-06, "loss": 0.5211, "step": 22642 }, { "epoch": 0.58, "grad_norm": 1.3348942995071411, "learning_rate": 7.898910164434468e-06, "loss": 0.5893, "step": 22643 }, { "epoch": 0.58, "grad_norm": 1.3228590488433838, "learning_rate": 7.898098559284403e-06, "loss": 0.457, "step": 22644 }, { "epoch": 0.58, "grad_norm": 1.4850029945373535, "learning_rate": 7.897286968619316e-06, "loss": 0.5759, "step": 22645 }, { "epoch": 0.58, "grad_norm": 6.026855945587158, "learning_rate": 7.89647539244481e-06, "loss": 0.6485, "step": 22646 }, { "epoch": 0.58, "grad_norm": 8.437499046325684, "learning_rate": 7.895663830766472e-06, "loss": 0.6609, "step": 22647 }, { "epoch": 0.58, "grad_norm": 1.8368725776672363, "learning_rate": 7.894852283589897e-06, "loss": 0.5226, "step": 22648 }, { "epoch": 0.58, "grad_norm": 3.5739238262176514, "learning_rate": 7.89404075092068e-06, "loss": 0.5945, "step": 22649 }, { "epoch": 0.58, "grad_norm": 2.6218013763427734, "learning_rate": 7.893229232764409e-06, "loss": 0.5696, "step": 22650 }, { "epoch": 0.58, "grad_norm": 1.4485948085784912, "learning_rate": 7.892417729126678e-06, "loss": 0.508, "step": 22651 }, { "epoch": 0.58, "grad_norm": 1.6114832162857056, "learning_rate": 7.891606240013081e-06, "loss": 0.456, "step": 22652 }, { "epoch": 0.58, "grad_norm": 1.6905359029769897, "learning_rate": 7.89079476542921e-06, "loss": 0.7242, "step": 22653 }, { "epoch": 0.58, "grad_norm": 1.9678415060043335, "learning_rate": 7.889983305380654e-06, "loss": 0.6424, "step": 22654 }, { "epoch": 0.58, "grad_norm": 1.7377272844314575, "learning_rate": 7.889171859873008e-06, "loss": 0.6275, "step": 22655 }, { "epoch": 0.58, "grad_norm": 4.312966823577881, "learning_rate": 7.888360428911865e-06, "loss": 0.5924, "step": 22656 }, { "epoch": 0.58, "grad_norm": 4.139462471008301, "learning_rate": 7.887549012502814e-06, "loss": 0.5369, "step": 22657 }, { "epoch": 0.58, "grad_norm": 1.102789044380188, "learning_rate": 7.886737610651449e-06, "loss": 0.5045, "step": 22658 }, { "epoch": 0.58, "grad_norm": 1.3906404972076416, "learning_rate": 7.885926223363359e-06, "loss": 0.6221, "step": 22659 }, { "epoch": 0.58, "grad_norm": 1.5047863721847534, "learning_rate": 7.885114850644137e-06, "loss": 0.3732, "step": 22660 }, { "epoch": 0.58, "grad_norm": 1.2985153198242188, "learning_rate": 7.884303492499377e-06, "loss": 0.7381, "step": 22661 }, { "epoch": 0.58, "grad_norm": 4.2648420333862305, "learning_rate": 7.883492148934667e-06, "loss": 0.49, "step": 22662 }, { "epoch": 0.58, "grad_norm": 2.436004161834717, "learning_rate": 7.8826808199556e-06, "loss": 0.6188, "step": 22663 }, { "epoch": 0.58, "grad_norm": 2.680950403213501, "learning_rate": 7.881869505567766e-06, "loss": 0.7061, "step": 22664 }, { "epoch": 0.58, "grad_norm": 0.9644909501075745, "learning_rate": 7.881058205776758e-06, "loss": 0.5274, "step": 22665 }, { "epoch": 0.58, "grad_norm": 1.444163203239441, "learning_rate": 7.880246920588162e-06, "loss": 0.6821, "step": 22666 }, { "epoch": 0.58, "grad_norm": 1.7008639574050903, "learning_rate": 7.879435650007575e-06, "loss": 0.5137, "step": 22667 }, { "epoch": 0.58, "grad_norm": 1.3995596170425415, "learning_rate": 7.878624394040586e-06, "loss": 0.6519, "step": 22668 }, { "epoch": 0.58, "grad_norm": 2.7119598388671875, "learning_rate": 7.877813152692783e-06, "loss": 0.7128, "step": 22669 }, { "epoch": 0.58, "grad_norm": 6.637372970581055, "learning_rate": 7.87700192596976e-06, "loss": 0.708, "step": 22670 }, { "epoch": 0.58, "grad_norm": 2.4521420001983643, "learning_rate": 7.876190713877105e-06, "loss": 0.5293, "step": 22671 }, { "epoch": 0.58, "grad_norm": 1.9809938669204712, "learning_rate": 7.875379516420408e-06, "loss": 0.6538, "step": 22672 }, { "epoch": 0.58, "grad_norm": 1.3672983646392822, "learning_rate": 7.874568333605263e-06, "loss": 0.5448, "step": 22673 }, { "epoch": 0.58, "grad_norm": 3.2760815620422363, "learning_rate": 7.873757165437257e-06, "loss": 0.5782, "step": 22674 }, { "epoch": 0.58, "grad_norm": 1.5579417943954468, "learning_rate": 7.872946011921982e-06, "loss": 0.5344, "step": 22675 }, { "epoch": 0.58, "grad_norm": 1.4547446966171265, "learning_rate": 7.872134873065024e-06, "loss": 0.3729, "step": 22676 }, { "epoch": 0.58, "grad_norm": 1.329002857208252, "learning_rate": 7.871323748871976e-06, "loss": 0.4446, "step": 22677 }, { "epoch": 0.58, "grad_norm": 3.332571268081665, "learning_rate": 7.87051263934843e-06, "loss": 0.6406, "step": 22678 }, { "epoch": 0.58, "grad_norm": 2.290449857711792, "learning_rate": 7.86970154449997e-06, "loss": 0.5147, "step": 22679 }, { "epoch": 0.58, "grad_norm": 5.334336757659912, "learning_rate": 7.86889046433219e-06, "loss": 0.7345, "step": 22680 }, { "epoch": 0.58, "grad_norm": 2.466897964477539, "learning_rate": 7.868079398850679e-06, "loss": 0.4503, "step": 22681 }, { "epoch": 0.58, "grad_norm": 3.5083935260772705, "learning_rate": 7.867268348061024e-06, "loss": 0.5361, "step": 22682 }, { "epoch": 0.58, "grad_norm": 1.2101045846939087, "learning_rate": 7.866457311968817e-06, "loss": 0.5863, "step": 22683 }, { "epoch": 0.58, "grad_norm": 4.437656879425049, "learning_rate": 7.865646290579642e-06, "loss": 0.8396, "step": 22684 }, { "epoch": 0.58, "grad_norm": 2.5056135654449463, "learning_rate": 7.864835283899097e-06, "loss": 0.5857, "step": 22685 }, { "epoch": 0.58, "grad_norm": 1.7860064506530762, "learning_rate": 7.864024291932762e-06, "loss": 0.5906, "step": 22686 }, { "epoch": 0.58, "grad_norm": 1.2944151163101196, "learning_rate": 7.863213314686232e-06, "loss": 0.4714, "step": 22687 }, { "epoch": 0.58, "grad_norm": 1.2313578128814697, "learning_rate": 7.862402352165091e-06, "loss": 0.4495, "step": 22688 }, { "epoch": 0.58, "grad_norm": 2.0425784587860107, "learning_rate": 7.861591404374931e-06, "loss": 0.5436, "step": 22689 }, { "epoch": 0.58, "grad_norm": 3.8057351112365723, "learning_rate": 7.86078047132134e-06, "loss": 0.6016, "step": 22690 }, { "epoch": 0.58, "grad_norm": 1.2083865404129028, "learning_rate": 7.859969553009906e-06, "loss": 0.5466, "step": 22691 }, { "epoch": 0.58, "grad_norm": 1.8070887327194214, "learning_rate": 7.85915864944622e-06, "loss": 0.5796, "step": 22692 }, { "epoch": 0.58, "grad_norm": 11.248767852783203, "learning_rate": 7.858347760635862e-06, "loss": 0.5637, "step": 22693 }, { "epoch": 0.58, "grad_norm": 1.934798240661621, "learning_rate": 7.85753688658443e-06, "loss": 0.4832, "step": 22694 }, { "epoch": 0.58, "grad_norm": 1.5407904386520386, "learning_rate": 7.856726027297505e-06, "loss": 0.6197, "step": 22695 }, { "epoch": 0.58, "grad_norm": 3.4927875995635986, "learning_rate": 7.855915182780679e-06, "loss": 0.517, "step": 22696 }, { "epoch": 0.58, "grad_norm": 6.467670917510986, "learning_rate": 7.855104353039539e-06, "loss": 0.6228, "step": 22697 }, { "epoch": 0.58, "grad_norm": 1.3849258422851562, "learning_rate": 7.854293538079671e-06, "loss": 0.5151, "step": 22698 }, { "epoch": 0.58, "grad_norm": 2.407707929611206, "learning_rate": 7.853482737906666e-06, "loss": 0.4767, "step": 22699 }, { "epoch": 0.58, "grad_norm": 1.5993326902389526, "learning_rate": 7.852671952526108e-06, "loss": 0.5656, "step": 22700 }, { "epoch": 0.58, "grad_norm": 1.4151943922042847, "learning_rate": 7.851861181943585e-06, "loss": 0.4505, "step": 22701 }, { "epoch": 0.58, "grad_norm": 1.6685805320739746, "learning_rate": 7.851050426164687e-06, "loss": 0.4727, "step": 22702 }, { "epoch": 0.58, "grad_norm": 1.3264729976654053, "learning_rate": 7.850239685194997e-06, "loss": 0.422, "step": 22703 }, { "epoch": 0.58, "grad_norm": 3.4920077323913574, "learning_rate": 7.849428959040108e-06, "loss": 0.7707, "step": 22704 }, { "epoch": 0.58, "grad_norm": 1.6688085794448853, "learning_rate": 7.848618247705599e-06, "loss": 0.665, "step": 22705 }, { "epoch": 0.58, "grad_norm": 2.296384334564209, "learning_rate": 7.847807551197067e-06, "loss": 0.5916, "step": 22706 }, { "epoch": 0.58, "grad_norm": 4.515378475189209, "learning_rate": 7.846996869520088e-06, "loss": 0.6397, "step": 22707 }, { "epoch": 0.58, "grad_norm": 1.4792598485946655, "learning_rate": 7.846186202680256e-06, "loss": 0.5005, "step": 22708 }, { "epoch": 0.58, "grad_norm": 3.6166796684265137, "learning_rate": 7.845375550683156e-06, "loss": 0.603, "step": 22709 }, { "epoch": 0.58, "grad_norm": 1.5199908018112183, "learning_rate": 7.844564913534372e-06, "loss": 0.5526, "step": 22710 }, { "epoch": 0.58, "grad_norm": 2.5491721630096436, "learning_rate": 7.843754291239496e-06, "loss": 0.7324, "step": 22711 }, { "epoch": 0.58, "grad_norm": 0.8481385707855225, "learning_rate": 7.842943683804108e-06, "loss": 0.5167, "step": 22712 }, { "epoch": 0.58, "grad_norm": 1.3280659914016724, "learning_rate": 7.842133091233796e-06, "loss": 0.5011, "step": 22713 }, { "epoch": 0.58, "grad_norm": 4.238144397735596, "learning_rate": 7.841322513534149e-06, "loss": 0.5621, "step": 22714 }, { "epoch": 0.58, "grad_norm": 1.9445781707763672, "learning_rate": 7.84051195071075e-06, "loss": 0.5268, "step": 22715 }, { "epoch": 0.58, "grad_norm": 1.4927257299423218, "learning_rate": 7.839701402769188e-06, "loss": 0.5894, "step": 22716 }, { "epoch": 0.58, "grad_norm": 1.5800740718841553, "learning_rate": 7.838890869715043e-06, "loss": 0.6039, "step": 22717 }, { "epoch": 0.58, "grad_norm": 1.8797074556350708, "learning_rate": 7.838080351553907e-06, "loss": 0.6546, "step": 22718 }, { "epoch": 0.58, "grad_norm": 3.6863927841186523, "learning_rate": 7.837269848291363e-06, "loss": 0.5854, "step": 22719 }, { "epoch": 0.58, "grad_norm": 1.6159940958023071, "learning_rate": 7.836459359932995e-06, "loss": 0.469, "step": 22720 }, { "epoch": 0.58, "grad_norm": 1.081020474433899, "learning_rate": 7.835648886484392e-06, "loss": 0.4631, "step": 22721 }, { "epoch": 0.58, "grad_norm": 2.1148555278778076, "learning_rate": 7.834838427951136e-06, "loss": 0.4782, "step": 22722 }, { "epoch": 0.58, "grad_norm": 3.455789566040039, "learning_rate": 7.834027984338815e-06, "loss": 0.5309, "step": 22723 }, { "epoch": 0.58, "grad_norm": 1.5762149095535278, "learning_rate": 7.833217555653008e-06, "loss": 0.458, "step": 22724 }, { "epoch": 0.58, "grad_norm": 1.6695574522018433, "learning_rate": 7.832407141899307e-06, "loss": 0.448, "step": 22725 }, { "epoch": 0.58, "grad_norm": 7.66286039352417, "learning_rate": 7.831596743083296e-06, "loss": 0.4422, "step": 22726 }, { "epoch": 0.58, "grad_norm": 3.758612871170044, "learning_rate": 7.830786359210554e-06, "loss": 0.6417, "step": 22727 }, { "epoch": 0.58, "grad_norm": 2.75878643989563, "learning_rate": 7.829975990286672e-06, "loss": 0.6647, "step": 22728 }, { "epoch": 0.58, "grad_norm": 1.5215884447097778, "learning_rate": 7.82916563631723e-06, "loss": 0.6861, "step": 22729 }, { "epoch": 0.58, "grad_norm": 1.349882960319519, "learning_rate": 7.828355297307817e-06, "loss": 0.6072, "step": 22730 }, { "epoch": 0.58, "grad_norm": 5.6133246421813965, "learning_rate": 7.827544973264016e-06, "loss": 0.4916, "step": 22731 }, { "epoch": 0.58, "grad_norm": 2.638493061065674, "learning_rate": 7.826734664191406e-06, "loss": 0.4678, "step": 22732 }, { "epoch": 0.58, "grad_norm": 2.5097508430480957, "learning_rate": 7.82592437009558e-06, "loss": 0.5587, "step": 22733 }, { "epoch": 0.58, "grad_norm": 4.555880546569824, "learning_rate": 7.825114090982113e-06, "loss": 0.4258, "step": 22734 }, { "epoch": 0.58, "grad_norm": 1.6055259704589844, "learning_rate": 7.824303826856598e-06, "loss": 0.5014, "step": 22735 }, { "epoch": 0.58, "grad_norm": 1.6539841890335083, "learning_rate": 7.82349357772461e-06, "loss": 0.4276, "step": 22736 }, { "epoch": 0.58, "grad_norm": 3.3934712409973145, "learning_rate": 7.822683343591736e-06, "loss": 0.742, "step": 22737 }, { "epoch": 0.58, "grad_norm": 1.2474501132965088, "learning_rate": 7.821873124463565e-06, "loss": 0.4322, "step": 22738 }, { "epoch": 0.58, "grad_norm": 3.8245131969451904, "learning_rate": 7.821062920345673e-06, "loss": 0.3754, "step": 22739 }, { "epoch": 0.58, "grad_norm": 3.390709638595581, "learning_rate": 7.820252731243649e-06, "loss": 0.5786, "step": 22740 }, { "epoch": 0.58, "grad_norm": 1.1488279104232788, "learning_rate": 7.81944255716307e-06, "loss": 0.5314, "step": 22741 }, { "epoch": 0.58, "grad_norm": 2.6572983264923096, "learning_rate": 7.818632398109523e-06, "loss": 0.6548, "step": 22742 }, { "epoch": 0.58, "grad_norm": 1.9342620372772217, "learning_rate": 7.817822254088595e-06, "loss": 0.6436, "step": 22743 }, { "epoch": 0.58, "grad_norm": 1.6527924537658691, "learning_rate": 7.817012125105861e-06, "loss": 0.547, "step": 22744 }, { "epoch": 0.58, "grad_norm": 2.6156740188598633, "learning_rate": 7.816202011166912e-06, "loss": 0.5719, "step": 22745 }, { "epoch": 0.58, "grad_norm": 1.5962376594543457, "learning_rate": 7.815391912277324e-06, "loss": 0.4969, "step": 22746 }, { "epoch": 0.58, "grad_norm": 3.87467885017395, "learning_rate": 7.814581828442683e-06, "loss": 0.5658, "step": 22747 }, { "epoch": 0.58, "grad_norm": 1.897268533706665, "learning_rate": 7.813771759668572e-06, "loss": 0.6025, "step": 22748 }, { "epoch": 0.58, "grad_norm": 1.1546331644058228, "learning_rate": 7.812961705960568e-06, "loss": 0.5248, "step": 22749 }, { "epoch": 0.58, "grad_norm": 2.4634578227996826, "learning_rate": 7.812151667324264e-06, "loss": 0.5425, "step": 22750 }, { "epoch": 0.58, "grad_norm": 3.0626425743103027, "learning_rate": 7.811341643765233e-06, "loss": 0.4897, "step": 22751 }, { "epoch": 0.58, "grad_norm": 1.3274976015090942, "learning_rate": 7.810531635289063e-06, "loss": 0.4307, "step": 22752 }, { "epoch": 0.58, "grad_norm": 1.6781513690948486, "learning_rate": 7.809721641901329e-06, "loss": 0.5122, "step": 22753 }, { "epoch": 0.58, "grad_norm": 6.542611122131348, "learning_rate": 7.80891166360762e-06, "loss": 0.5411, "step": 22754 }, { "epoch": 0.58, "grad_norm": 1.4546538591384888, "learning_rate": 7.808101700413514e-06, "loss": 0.5972, "step": 22755 }, { "epoch": 0.58, "grad_norm": 1.4279203414916992, "learning_rate": 7.807291752324595e-06, "loss": 0.6529, "step": 22756 }, { "epoch": 0.58, "grad_norm": 1.6198694705963135, "learning_rate": 7.806481819346446e-06, "loss": 0.6931, "step": 22757 }, { "epoch": 0.58, "grad_norm": 1.573067545890808, "learning_rate": 7.805671901484641e-06, "loss": 0.5154, "step": 22758 }, { "epoch": 0.58, "grad_norm": 1.518065333366394, "learning_rate": 7.804861998744772e-06, "loss": 0.646, "step": 22759 }, { "epoch": 0.58, "grad_norm": 4.131968975067139, "learning_rate": 7.804052111132412e-06, "loss": 0.5145, "step": 22760 }, { "epoch": 0.58, "grad_norm": 6.951172828674316, "learning_rate": 7.803242238653144e-06, "loss": 0.6568, "step": 22761 }, { "epoch": 0.58, "grad_norm": 1.2211451530456543, "learning_rate": 7.802432381312553e-06, "loss": 0.5275, "step": 22762 }, { "epoch": 0.58, "grad_norm": 10.057632446289062, "learning_rate": 7.801622539116215e-06, "loss": 0.6119, "step": 22763 }, { "epoch": 0.58, "grad_norm": 6.76735782623291, "learning_rate": 7.800812712069717e-06, "loss": 0.4817, "step": 22764 }, { "epoch": 0.58, "grad_norm": 1.967236876487732, "learning_rate": 7.800002900178632e-06, "loss": 0.6556, "step": 22765 }, { "epoch": 0.58, "grad_norm": 7.670356750488281, "learning_rate": 7.799193103448546e-06, "loss": 0.6911, "step": 22766 }, { "epoch": 0.58, "grad_norm": 1.4751774072647095, "learning_rate": 7.79838332188504e-06, "loss": 0.6657, "step": 22767 }, { "epoch": 0.58, "grad_norm": 1.990478277206421, "learning_rate": 7.797573555493691e-06, "loss": 0.3541, "step": 22768 }, { "epoch": 0.58, "grad_norm": 1.2875083684921265, "learning_rate": 7.796763804280084e-06, "loss": 0.5694, "step": 22769 }, { "epoch": 0.58, "grad_norm": 3.1689634323120117, "learning_rate": 7.795954068249795e-06, "loss": 0.7063, "step": 22770 }, { "epoch": 0.58, "grad_norm": 1.111762523651123, "learning_rate": 7.795144347408405e-06, "loss": 0.544, "step": 22771 }, { "epoch": 0.58, "grad_norm": 1.5197128057479858, "learning_rate": 7.794334641761496e-06, "loss": 0.5475, "step": 22772 }, { "epoch": 0.58, "grad_norm": 1.3919014930725098, "learning_rate": 7.793524951314646e-06, "loss": 0.6453, "step": 22773 }, { "epoch": 0.58, "grad_norm": 1.08293616771698, "learning_rate": 7.792715276073438e-06, "loss": 0.3702, "step": 22774 }, { "epoch": 0.58, "grad_norm": 2.063225269317627, "learning_rate": 7.791905616043447e-06, "loss": 0.5281, "step": 22775 }, { "epoch": 0.58, "grad_norm": 3.4323160648345947, "learning_rate": 7.791095971230258e-06, "loss": 0.5575, "step": 22776 }, { "epoch": 0.58, "grad_norm": 1.2496956586837769, "learning_rate": 7.790286341639446e-06, "loss": 0.5334, "step": 22777 }, { "epoch": 0.58, "grad_norm": 5.3915581703186035, "learning_rate": 7.78947672727659e-06, "loss": 0.6857, "step": 22778 }, { "epoch": 0.58, "grad_norm": 2.352569341659546, "learning_rate": 7.788667128147276e-06, "loss": 0.5323, "step": 22779 }, { "epoch": 0.58, "grad_norm": 2.176614284515381, "learning_rate": 7.787857544257076e-06, "loss": 0.6466, "step": 22780 }, { "epoch": 0.58, "grad_norm": 1.2772605419158936, "learning_rate": 7.787047975611573e-06, "loss": 0.5222, "step": 22781 }, { "epoch": 0.58, "grad_norm": 2.311406135559082, "learning_rate": 7.786238422216345e-06, "loss": 0.6105, "step": 22782 }, { "epoch": 0.58, "grad_norm": 0.8704176545143127, "learning_rate": 7.78542888407697e-06, "loss": 0.4558, "step": 22783 }, { "epoch": 0.58, "grad_norm": 0.9861936569213867, "learning_rate": 7.78461936119903e-06, "loss": 0.4396, "step": 22784 }, { "epoch": 0.58, "grad_norm": 3.04543137550354, "learning_rate": 7.7838098535881e-06, "loss": 0.6264, "step": 22785 }, { "epoch": 0.58, "grad_norm": 1.0991754531860352, "learning_rate": 7.78300036124976e-06, "loss": 0.4018, "step": 22786 }, { "epoch": 0.58, "grad_norm": 3.056777238845825, "learning_rate": 7.782190884189588e-06, "loss": 0.5528, "step": 22787 }, { "epoch": 0.58, "grad_norm": 1.653379201889038, "learning_rate": 7.781381422413165e-06, "loss": 0.5521, "step": 22788 }, { "epoch": 0.58, "grad_norm": 2.6038215160369873, "learning_rate": 7.780571975926066e-06, "loss": 0.6017, "step": 22789 }, { "epoch": 0.58, "grad_norm": 6.4882121086120605, "learning_rate": 7.77976254473387e-06, "loss": 0.5757, "step": 22790 }, { "epoch": 0.58, "grad_norm": 1.4569487571716309, "learning_rate": 7.778953128842159e-06, "loss": 0.5014, "step": 22791 }, { "epoch": 0.58, "grad_norm": 1.8296730518341064, "learning_rate": 7.778143728256504e-06, "loss": 0.6436, "step": 22792 }, { "epoch": 0.58, "grad_norm": 4.19140625, "learning_rate": 7.777334342982489e-06, "loss": 0.4557, "step": 22793 }, { "epoch": 0.58, "grad_norm": 2.841510534286499, "learning_rate": 7.776524973025686e-06, "loss": 0.5518, "step": 22794 }, { "epoch": 0.58, "grad_norm": 2.0741565227508545, "learning_rate": 7.775715618391679e-06, "loss": 0.704, "step": 22795 }, { "epoch": 0.58, "grad_norm": 1.3194808959960938, "learning_rate": 7.774906279086042e-06, "loss": 0.5772, "step": 22796 }, { "epoch": 0.58, "grad_norm": 4.859668254852295, "learning_rate": 7.774096955114353e-06, "loss": 0.5111, "step": 22797 }, { "epoch": 0.58, "grad_norm": 1.4294753074645996, "learning_rate": 7.773287646482188e-06, "loss": 0.7166, "step": 22798 }, { "epoch": 0.58, "grad_norm": 1.474059820175171, "learning_rate": 7.772478353195126e-06, "loss": 0.5074, "step": 22799 }, { "epoch": 0.58, "grad_norm": 1.2500420808792114, "learning_rate": 7.771669075258746e-06, "loss": 0.5189, "step": 22800 }, { "epoch": 0.58, "grad_norm": 2.931060552597046, "learning_rate": 7.770859812678619e-06, "loss": 0.5791, "step": 22801 }, { "epoch": 0.58, "grad_norm": 2.4500062465667725, "learning_rate": 7.770050565460327e-06, "loss": 0.5028, "step": 22802 }, { "epoch": 0.58, "grad_norm": 1.6468937397003174, "learning_rate": 7.769241333609448e-06, "loss": 0.4888, "step": 22803 }, { "epoch": 0.58, "grad_norm": 1.652878999710083, "learning_rate": 7.768432117131552e-06, "loss": 0.5595, "step": 22804 }, { "epoch": 0.58, "grad_norm": 2.538189172744751, "learning_rate": 7.767622916032224e-06, "loss": 0.6067, "step": 22805 }, { "epoch": 0.58, "grad_norm": 2.040590286254883, "learning_rate": 7.766813730317034e-06, "loss": 0.5855, "step": 22806 }, { "epoch": 0.58, "grad_norm": 2.0420055389404297, "learning_rate": 7.76600455999156e-06, "loss": 0.4524, "step": 22807 }, { "epoch": 0.58, "grad_norm": 1.0674582719802856, "learning_rate": 7.765195405061382e-06, "loss": 0.6282, "step": 22808 }, { "epoch": 0.58, "grad_norm": 2.307337522506714, "learning_rate": 7.76438626553207e-06, "loss": 0.6029, "step": 22809 }, { "epoch": 0.58, "grad_norm": 1.6218262910842896, "learning_rate": 7.763577141409206e-06, "loss": 0.544, "step": 22810 }, { "epoch": 0.58, "grad_norm": 1.4873594045639038, "learning_rate": 7.762768032698362e-06, "loss": 0.4536, "step": 22811 }, { "epoch": 0.58, "grad_norm": 1.8207151889801025, "learning_rate": 7.761958939405114e-06, "loss": 0.6107, "step": 22812 }, { "epoch": 0.58, "grad_norm": 1.695632815361023, "learning_rate": 7.761149861535042e-06, "loss": 0.5207, "step": 22813 }, { "epoch": 0.58, "grad_norm": 1.2630807161331177, "learning_rate": 7.760340799093715e-06, "loss": 0.5795, "step": 22814 }, { "epoch": 0.58, "grad_norm": 1.7248241901397705, "learning_rate": 7.759531752086716e-06, "loss": 0.5805, "step": 22815 }, { "epoch": 0.58, "grad_norm": 1.276228427886963, "learning_rate": 7.758722720519614e-06, "loss": 0.5565, "step": 22816 }, { "epoch": 0.58, "grad_norm": 11.313486099243164, "learning_rate": 7.757913704397987e-06, "loss": 0.5998, "step": 22817 }, { "epoch": 0.58, "grad_norm": 1.498442530632019, "learning_rate": 7.75710470372741e-06, "loss": 0.5484, "step": 22818 }, { "epoch": 0.58, "grad_norm": 1.4314476251602173, "learning_rate": 7.756295718513458e-06, "loss": 0.6103, "step": 22819 }, { "epoch": 0.58, "grad_norm": 3.145817995071411, "learning_rate": 7.75548674876171e-06, "loss": 0.6164, "step": 22820 }, { "epoch": 0.58, "grad_norm": 1.5166716575622559, "learning_rate": 7.754677794477732e-06, "loss": 0.4893, "step": 22821 }, { "epoch": 0.58, "grad_norm": 1.4613364934921265, "learning_rate": 7.753868855667108e-06, "loss": 0.4272, "step": 22822 }, { "epoch": 0.58, "grad_norm": 1.625744104385376, "learning_rate": 7.753059932335406e-06, "loss": 0.563, "step": 22823 }, { "epoch": 0.58, "grad_norm": 5.046171188354492, "learning_rate": 7.752251024488203e-06, "loss": 0.6253, "step": 22824 }, { "epoch": 0.59, "grad_norm": 1.6659220457077026, "learning_rate": 7.751442132131077e-06, "loss": 0.4104, "step": 22825 }, { "epoch": 0.59, "grad_norm": 1.8060880899429321, "learning_rate": 7.750633255269594e-06, "loss": 0.5657, "step": 22826 }, { "epoch": 0.59, "grad_norm": 2.798978328704834, "learning_rate": 7.74982439390934e-06, "loss": 0.5173, "step": 22827 }, { "epoch": 0.59, "grad_norm": 1.0505093336105347, "learning_rate": 7.749015548055878e-06, "loss": 0.4154, "step": 22828 }, { "epoch": 0.59, "grad_norm": 1.5731544494628906, "learning_rate": 7.748206717714789e-06, "loss": 0.6067, "step": 22829 }, { "epoch": 0.59, "grad_norm": 2.0934996604919434, "learning_rate": 7.747397902891643e-06, "loss": 0.574, "step": 22830 }, { "epoch": 0.59, "grad_norm": 2.137922525405884, "learning_rate": 7.746589103592014e-06, "loss": 0.6029, "step": 22831 }, { "epoch": 0.59, "grad_norm": 7.706847667694092, "learning_rate": 7.74578031982148e-06, "loss": 0.5533, "step": 22832 }, { "epoch": 0.59, "grad_norm": 1.8730148077011108, "learning_rate": 7.744971551585609e-06, "loss": 0.5298, "step": 22833 }, { "epoch": 0.59, "grad_norm": 1.955386996269226, "learning_rate": 7.74416279888998e-06, "loss": 0.4859, "step": 22834 }, { "epoch": 0.59, "grad_norm": 0.9833956956863403, "learning_rate": 7.743354061740162e-06, "loss": 0.3431, "step": 22835 }, { "epoch": 0.59, "grad_norm": 1.562315821647644, "learning_rate": 7.74254534014173e-06, "loss": 0.5738, "step": 22836 }, { "epoch": 0.59, "grad_norm": 5.191104888916016, "learning_rate": 7.74173663410026e-06, "loss": 0.6494, "step": 22837 }, { "epoch": 0.59, "grad_norm": 1.3391141891479492, "learning_rate": 7.74092794362132e-06, "loss": 0.6228, "step": 22838 }, { "epoch": 0.59, "grad_norm": 4.469849586486816, "learning_rate": 7.740119268710487e-06, "loss": 0.5425, "step": 22839 }, { "epoch": 0.59, "grad_norm": 1.629542350769043, "learning_rate": 7.73931060937333e-06, "loss": 0.4704, "step": 22840 }, { "epoch": 0.59, "grad_norm": 9.001374244689941, "learning_rate": 7.738501965615425e-06, "loss": 0.538, "step": 22841 }, { "epoch": 0.59, "grad_norm": 4.574305534362793, "learning_rate": 7.737693337442342e-06, "loss": 0.3562, "step": 22842 }, { "epoch": 0.59, "grad_norm": 1.4369988441467285, "learning_rate": 7.736884724859658e-06, "loss": 0.5748, "step": 22843 }, { "epoch": 0.59, "grad_norm": 1.1459044218063354, "learning_rate": 7.736076127872942e-06, "loss": 0.3013, "step": 22844 }, { "epoch": 0.59, "grad_norm": 1.7478221654891968, "learning_rate": 7.735267546487765e-06, "loss": 0.6093, "step": 22845 }, { "epoch": 0.59, "grad_norm": 3.328918695449829, "learning_rate": 7.734458980709704e-06, "loss": 0.5739, "step": 22846 }, { "epoch": 0.59, "grad_norm": 1.2986279726028442, "learning_rate": 7.733650430544326e-06, "loss": 0.7084, "step": 22847 }, { "epoch": 0.59, "grad_norm": 3.7244627475738525, "learning_rate": 7.732841895997205e-06, "loss": 0.609, "step": 22848 }, { "epoch": 0.59, "grad_norm": 1.5424513816833496, "learning_rate": 7.732033377073916e-06, "loss": 0.5933, "step": 22849 }, { "epoch": 0.59, "grad_norm": 1.1390063762664795, "learning_rate": 7.731224873780026e-06, "loss": 0.4767, "step": 22850 }, { "epoch": 0.59, "grad_norm": 2.1312735080718994, "learning_rate": 7.730416386121111e-06, "loss": 0.6289, "step": 22851 }, { "epoch": 0.59, "grad_norm": 3.8065547943115234, "learning_rate": 7.729607914102738e-06, "loss": 0.534, "step": 22852 }, { "epoch": 0.59, "grad_norm": 2.1390204429626465, "learning_rate": 7.728799457730483e-06, "loss": 0.5885, "step": 22853 }, { "epoch": 0.59, "grad_norm": 4.585874557495117, "learning_rate": 7.727991017009912e-06, "loss": 0.7968, "step": 22854 }, { "epoch": 0.59, "grad_norm": 1.2778103351593018, "learning_rate": 7.7271825919466e-06, "loss": 0.5719, "step": 22855 }, { "epoch": 0.59, "grad_norm": 1.3625065088272095, "learning_rate": 7.726374182546122e-06, "loss": 0.5427, "step": 22856 }, { "epoch": 0.59, "grad_norm": 2.225069522857666, "learning_rate": 7.72556578881404e-06, "loss": 0.5718, "step": 22857 }, { "epoch": 0.59, "grad_norm": 1.049124002456665, "learning_rate": 7.724757410755932e-06, "loss": 0.4301, "step": 22858 }, { "epoch": 0.59, "grad_norm": 13.127531051635742, "learning_rate": 7.723949048377367e-06, "loss": 0.5128, "step": 22859 }, { "epoch": 0.59, "grad_norm": 1.3778584003448486, "learning_rate": 7.723140701683913e-06, "loss": 0.5085, "step": 22860 }, { "epoch": 0.59, "grad_norm": 0.8457406759262085, "learning_rate": 7.722332370681145e-06, "loss": 0.4696, "step": 22861 }, { "epoch": 0.59, "grad_norm": 1.4585480690002441, "learning_rate": 7.721524055374628e-06, "loss": 0.4272, "step": 22862 }, { "epoch": 0.59, "grad_norm": 1.1545147895812988, "learning_rate": 7.72071575576994e-06, "loss": 0.3205, "step": 22863 }, { "epoch": 0.59, "grad_norm": 6.498286724090576, "learning_rate": 7.719907471872644e-06, "loss": 0.6041, "step": 22864 }, { "epoch": 0.59, "grad_norm": 1.603253960609436, "learning_rate": 7.719099203688314e-06, "loss": 0.5274, "step": 22865 }, { "epoch": 0.59, "grad_norm": 5.720435619354248, "learning_rate": 7.71829095122252e-06, "loss": 0.4064, "step": 22866 }, { "epoch": 0.59, "grad_norm": 6.745174884796143, "learning_rate": 7.71748271448083e-06, "loss": 0.5424, "step": 22867 }, { "epoch": 0.59, "grad_norm": 1.7515928745269775, "learning_rate": 7.716674493468816e-06, "loss": 0.4791, "step": 22868 }, { "epoch": 0.59, "grad_norm": 1.2983753681182861, "learning_rate": 7.715866288192046e-06, "loss": 0.4301, "step": 22869 }, { "epoch": 0.59, "grad_norm": 2.7103469371795654, "learning_rate": 7.715058098656093e-06, "loss": 0.5705, "step": 22870 }, { "epoch": 0.59, "grad_norm": 1.7792131900787354, "learning_rate": 7.71424992486652e-06, "loss": 0.4922, "step": 22871 }, { "epoch": 0.59, "grad_norm": 1.2571356296539307, "learning_rate": 7.713441766828904e-06, "loss": 0.5009, "step": 22872 }, { "epoch": 0.59, "grad_norm": 1.7572081089019775, "learning_rate": 7.712633624548812e-06, "loss": 0.5811, "step": 22873 }, { "epoch": 0.59, "grad_norm": 1.4120819568634033, "learning_rate": 7.711825498031808e-06, "loss": 0.5622, "step": 22874 }, { "epoch": 0.59, "grad_norm": 1.6241403818130493, "learning_rate": 7.711017387283466e-06, "loss": 0.4528, "step": 22875 }, { "epoch": 0.59, "grad_norm": 1.1231772899627686, "learning_rate": 7.710209292309355e-06, "loss": 0.4949, "step": 22876 }, { "epoch": 0.59, "grad_norm": 5.895758152008057, "learning_rate": 7.709401213115042e-06, "loss": 0.7199, "step": 22877 }, { "epoch": 0.59, "grad_norm": 1.2744288444519043, "learning_rate": 7.7085931497061e-06, "loss": 0.5511, "step": 22878 }, { "epoch": 0.59, "grad_norm": 1.9241597652435303, "learning_rate": 7.707785102088089e-06, "loss": 0.5685, "step": 22879 }, { "epoch": 0.59, "grad_norm": 3.2718312740325928, "learning_rate": 7.706977070266587e-06, "loss": 0.7601, "step": 22880 }, { "epoch": 0.59, "grad_norm": 1.726639986038208, "learning_rate": 7.706169054247157e-06, "loss": 0.6487, "step": 22881 }, { "epoch": 0.59, "grad_norm": 1.429776906967163, "learning_rate": 7.70536105403537e-06, "loss": 0.5015, "step": 22882 }, { "epoch": 0.59, "grad_norm": 1.382567286491394, "learning_rate": 7.704553069636795e-06, "loss": 0.626, "step": 22883 }, { "epoch": 0.59, "grad_norm": 1.106727123260498, "learning_rate": 7.703745101056994e-06, "loss": 0.4294, "step": 22884 }, { "epoch": 0.59, "grad_norm": 1.2660722732543945, "learning_rate": 7.702937148301539e-06, "loss": 0.569, "step": 22885 }, { "epoch": 0.59, "grad_norm": 1.3011540174484253, "learning_rate": 7.702129211376001e-06, "loss": 0.4895, "step": 22886 }, { "epoch": 0.59, "grad_norm": 1.4006156921386719, "learning_rate": 7.701321290285942e-06, "loss": 0.4746, "step": 22887 }, { "epoch": 0.59, "grad_norm": 3.335031509399414, "learning_rate": 7.700513385036935e-06, "loss": 0.5244, "step": 22888 }, { "epoch": 0.59, "grad_norm": 1.6720300912857056, "learning_rate": 7.699705495634543e-06, "loss": 0.5504, "step": 22889 }, { "epoch": 0.59, "grad_norm": 3.924023151397705, "learning_rate": 7.698897622084334e-06, "loss": 0.6854, "step": 22890 }, { "epoch": 0.59, "grad_norm": 4.096286773681641, "learning_rate": 7.698089764391881e-06, "loss": 0.7679, "step": 22891 }, { "epoch": 0.59, "grad_norm": 1.9223215579986572, "learning_rate": 7.697281922562744e-06, "loss": 0.4811, "step": 22892 }, { "epoch": 0.59, "grad_norm": 1.3517056703567505, "learning_rate": 7.696474096602497e-06, "loss": 0.466, "step": 22893 }, { "epoch": 0.59, "grad_norm": 1.884526014328003, "learning_rate": 7.6956662865167e-06, "loss": 0.6778, "step": 22894 }, { "epoch": 0.59, "grad_norm": 1.703276515007019, "learning_rate": 7.694858492310924e-06, "loss": 0.5814, "step": 22895 }, { "epoch": 0.59, "grad_norm": 1.2232072353363037, "learning_rate": 7.694050713990734e-06, "loss": 0.5467, "step": 22896 }, { "epoch": 0.59, "grad_norm": 6.836905002593994, "learning_rate": 7.693242951561698e-06, "loss": 0.609, "step": 22897 }, { "epoch": 0.59, "grad_norm": 1.4600903987884521, "learning_rate": 7.692435205029385e-06, "loss": 0.6905, "step": 22898 }, { "epoch": 0.59, "grad_norm": 7.655346393585205, "learning_rate": 7.691627474399355e-06, "loss": 0.5518, "step": 22899 }, { "epoch": 0.59, "grad_norm": 3.69549822807312, "learning_rate": 7.690819759677182e-06, "loss": 0.6965, "step": 22900 }, { "epoch": 0.59, "grad_norm": 1.433132529258728, "learning_rate": 7.690012060868427e-06, "loss": 0.4711, "step": 22901 }, { "epoch": 0.59, "grad_norm": 4.0027079582214355, "learning_rate": 7.689204377978654e-06, "loss": 0.5824, "step": 22902 }, { "epoch": 0.59, "grad_norm": 1.7637263536453247, "learning_rate": 7.688396711013437e-06, "loss": 0.4966, "step": 22903 }, { "epoch": 0.59, "grad_norm": 1.5281511545181274, "learning_rate": 7.687589059978336e-06, "loss": 0.5311, "step": 22904 }, { "epoch": 0.59, "grad_norm": 2.13107967376709, "learning_rate": 7.68678142487892e-06, "loss": 0.4144, "step": 22905 }, { "epoch": 0.59, "grad_norm": 1.1965439319610596, "learning_rate": 7.685973805720752e-06, "loss": 0.5881, "step": 22906 }, { "epoch": 0.59, "grad_norm": 5.44000244140625, "learning_rate": 7.685166202509397e-06, "loss": 0.7759, "step": 22907 }, { "epoch": 0.59, "grad_norm": 4.010946273803711, "learning_rate": 7.684358615250425e-06, "loss": 0.6519, "step": 22908 }, { "epoch": 0.59, "grad_norm": 1.9826619625091553, "learning_rate": 7.683551043949397e-06, "loss": 0.4583, "step": 22909 }, { "epoch": 0.59, "grad_norm": 8.331925392150879, "learning_rate": 7.682743488611882e-06, "loss": 0.427, "step": 22910 }, { "epoch": 0.59, "grad_norm": 4.471086502075195, "learning_rate": 7.681935949243438e-06, "loss": 0.8329, "step": 22911 }, { "epoch": 0.59, "grad_norm": 1.7974318265914917, "learning_rate": 7.681128425849641e-06, "loss": 0.6602, "step": 22912 }, { "epoch": 0.59, "grad_norm": 9.134284019470215, "learning_rate": 7.680320918436046e-06, "loss": 0.4557, "step": 22913 }, { "epoch": 0.59, "grad_norm": 3.7448136806488037, "learning_rate": 7.679513427008223e-06, "loss": 0.6123, "step": 22914 }, { "epoch": 0.59, "grad_norm": 1.0739442110061646, "learning_rate": 7.678705951571736e-06, "loss": 0.5541, "step": 22915 }, { "epoch": 0.59, "grad_norm": 1.617126226425171, "learning_rate": 7.677898492132149e-06, "loss": 0.3922, "step": 22916 }, { "epoch": 0.59, "grad_norm": 2.2299225330352783, "learning_rate": 7.677091048695028e-06, "loss": 0.6382, "step": 22917 }, { "epoch": 0.59, "grad_norm": 1.7723314762115479, "learning_rate": 7.676283621265934e-06, "loss": 0.5632, "step": 22918 }, { "epoch": 0.59, "grad_norm": 0.8267837762832642, "learning_rate": 7.675476209850433e-06, "loss": 0.3277, "step": 22919 }, { "epoch": 0.59, "grad_norm": 9.048502922058105, "learning_rate": 7.674668814454092e-06, "loss": 0.7517, "step": 22920 }, { "epoch": 0.59, "grad_norm": 3.7580740451812744, "learning_rate": 7.67386143508247e-06, "loss": 0.5464, "step": 22921 }, { "epoch": 0.59, "grad_norm": 2.5055577754974365, "learning_rate": 7.673054071741138e-06, "loss": 0.5641, "step": 22922 }, { "epoch": 0.59, "grad_norm": 1.2669696807861328, "learning_rate": 7.67224672443565e-06, "loss": 0.4329, "step": 22923 }, { "epoch": 0.59, "grad_norm": 2.3988735675811768, "learning_rate": 7.671439393171578e-06, "loss": 0.6509, "step": 22924 }, { "epoch": 0.59, "grad_norm": 2.8645718097686768, "learning_rate": 7.670632077954483e-06, "loss": 0.671, "step": 22925 }, { "epoch": 0.59, "grad_norm": 2.3038508892059326, "learning_rate": 7.669824778789927e-06, "loss": 0.5317, "step": 22926 }, { "epoch": 0.59, "grad_norm": 2.190876007080078, "learning_rate": 7.669017495683474e-06, "loss": 0.5609, "step": 22927 }, { "epoch": 0.59, "grad_norm": 1.0594338178634644, "learning_rate": 7.66821022864069e-06, "loss": 0.4704, "step": 22928 }, { "epoch": 0.59, "grad_norm": 1.6987416744232178, "learning_rate": 7.667402977667136e-06, "loss": 0.5994, "step": 22929 }, { "epoch": 0.59, "grad_norm": 1.836885929107666, "learning_rate": 7.666595742768375e-06, "loss": 0.6436, "step": 22930 }, { "epoch": 0.59, "grad_norm": 1.7495839595794678, "learning_rate": 7.665788523949967e-06, "loss": 0.5938, "step": 22931 }, { "epoch": 0.59, "grad_norm": 1.849275827407837, "learning_rate": 7.664981321217482e-06, "loss": 0.3465, "step": 22932 }, { "epoch": 0.59, "grad_norm": 1.1045117378234863, "learning_rate": 7.664174134576477e-06, "loss": 0.451, "step": 22933 }, { "epoch": 0.59, "grad_norm": 1.893170714378357, "learning_rate": 7.663366964032518e-06, "loss": 0.5065, "step": 22934 }, { "epoch": 0.59, "grad_norm": 3.245473623275757, "learning_rate": 7.662559809591163e-06, "loss": 0.3785, "step": 22935 }, { "epoch": 0.59, "grad_norm": 1.5055378675460815, "learning_rate": 7.66175267125798e-06, "loss": 0.5426, "step": 22936 }, { "epoch": 0.59, "grad_norm": 3.9900641441345215, "learning_rate": 7.660945549038528e-06, "loss": 0.4858, "step": 22937 }, { "epoch": 0.59, "grad_norm": 1.7152094841003418, "learning_rate": 7.660138442938367e-06, "loss": 0.4541, "step": 22938 }, { "epoch": 0.59, "grad_norm": 2.3280231952667236, "learning_rate": 7.659331352963067e-06, "loss": 0.3729, "step": 22939 }, { "epoch": 0.59, "grad_norm": 1.2014535665512085, "learning_rate": 7.65852427911818e-06, "loss": 0.6044, "step": 22940 }, { "epoch": 0.59, "grad_norm": 1.16152024269104, "learning_rate": 7.657717221409275e-06, "loss": 0.4778, "step": 22941 }, { "epoch": 0.59, "grad_norm": 1.5485888719558716, "learning_rate": 7.65691017984191e-06, "loss": 0.6204, "step": 22942 }, { "epoch": 0.59, "grad_norm": 2.277411699295044, "learning_rate": 7.656103154421648e-06, "loss": 0.5803, "step": 22943 }, { "epoch": 0.59, "grad_norm": 1.31471848487854, "learning_rate": 7.655296145154053e-06, "loss": 0.502, "step": 22944 }, { "epoch": 0.59, "grad_norm": 9.263175964355469, "learning_rate": 7.65448915204468e-06, "loss": 0.467, "step": 22945 }, { "epoch": 0.59, "grad_norm": 1.539137363433838, "learning_rate": 7.653682175099098e-06, "loss": 0.6956, "step": 22946 }, { "epoch": 0.59, "grad_norm": 5.439033508300781, "learning_rate": 7.652875214322862e-06, "loss": 0.4852, "step": 22947 }, { "epoch": 0.59, "grad_norm": 1.9722540378570557, "learning_rate": 7.652068269721537e-06, "loss": 0.5515, "step": 22948 }, { "epoch": 0.59, "grad_norm": 2.9504237174987793, "learning_rate": 7.65126134130068e-06, "loss": 0.5918, "step": 22949 }, { "epoch": 0.59, "grad_norm": 4.215325832366943, "learning_rate": 7.650454429065854e-06, "loss": 0.648, "step": 22950 }, { "epoch": 0.59, "grad_norm": 1.1791220903396606, "learning_rate": 7.649647533022623e-06, "loss": 0.4795, "step": 22951 }, { "epoch": 0.59, "grad_norm": 1.5536032915115356, "learning_rate": 7.648840653176543e-06, "loss": 0.5707, "step": 22952 }, { "epoch": 0.59, "grad_norm": 5.837118625640869, "learning_rate": 7.648033789533177e-06, "loss": 0.6259, "step": 22953 }, { "epoch": 0.59, "grad_norm": 3.0818569660186768, "learning_rate": 7.647226942098082e-06, "loss": 0.5628, "step": 22954 }, { "epoch": 0.59, "grad_norm": 2.3039186000823975, "learning_rate": 7.646420110876823e-06, "loss": 0.4643, "step": 22955 }, { "epoch": 0.59, "grad_norm": 1.619394063949585, "learning_rate": 7.645613295874957e-06, "loss": 0.6381, "step": 22956 }, { "epoch": 0.59, "grad_norm": 5.499913215637207, "learning_rate": 7.644806497098044e-06, "loss": 0.5225, "step": 22957 }, { "epoch": 0.59, "grad_norm": 1.0541155338287354, "learning_rate": 7.643999714551648e-06, "loss": 0.5076, "step": 22958 }, { "epoch": 0.59, "grad_norm": 1.463706374168396, "learning_rate": 7.643192948241324e-06, "loss": 0.4427, "step": 22959 }, { "epoch": 0.59, "grad_norm": 5.662600517272949, "learning_rate": 7.642386198172633e-06, "loss": 0.5949, "step": 22960 }, { "epoch": 0.59, "grad_norm": 1.6082743406295776, "learning_rate": 7.641579464351137e-06, "loss": 0.5369, "step": 22961 }, { "epoch": 0.59, "grad_norm": 6.04693603515625, "learning_rate": 7.64077274678239e-06, "loss": 0.471, "step": 22962 }, { "epoch": 0.59, "grad_norm": 1.3247524499893188, "learning_rate": 7.639966045471958e-06, "loss": 0.569, "step": 22963 }, { "epoch": 0.59, "grad_norm": 1.9898043870925903, "learning_rate": 7.639159360425396e-06, "loss": 0.541, "step": 22964 }, { "epoch": 0.59, "grad_norm": 1.4100579023361206, "learning_rate": 7.638352691648267e-06, "loss": 0.569, "step": 22965 }, { "epoch": 0.59, "grad_norm": 1.331627607345581, "learning_rate": 7.637546039146124e-06, "loss": 0.521, "step": 22966 }, { "epoch": 0.59, "grad_norm": 1.3653088808059692, "learning_rate": 7.63673940292453e-06, "loss": 0.5368, "step": 22967 }, { "epoch": 0.59, "grad_norm": 5.876605033874512, "learning_rate": 7.635932782989045e-06, "loss": 0.6243, "step": 22968 }, { "epoch": 0.59, "grad_norm": 1.5117604732513428, "learning_rate": 7.635126179345225e-06, "loss": 0.5004, "step": 22969 }, { "epoch": 0.59, "grad_norm": 2.127274990081787, "learning_rate": 7.634319591998631e-06, "loss": 0.7392, "step": 22970 }, { "epoch": 0.59, "grad_norm": 1.273322343826294, "learning_rate": 7.633513020954819e-06, "loss": 0.4347, "step": 22971 }, { "epoch": 0.59, "grad_norm": 1.6026638746261597, "learning_rate": 7.632706466219347e-06, "loss": 0.5029, "step": 22972 }, { "epoch": 0.59, "grad_norm": 1.6550761461257935, "learning_rate": 7.631899927797777e-06, "loss": 0.4509, "step": 22973 }, { "epoch": 0.59, "grad_norm": 1.9403080940246582, "learning_rate": 7.631093405695664e-06, "loss": 0.4447, "step": 22974 }, { "epoch": 0.59, "grad_norm": 2.696484327316284, "learning_rate": 7.630286899918568e-06, "loss": 0.5044, "step": 22975 }, { "epoch": 0.59, "grad_norm": 0.9631097912788391, "learning_rate": 7.629480410472045e-06, "loss": 0.4644, "step": 22976 }, { "epoch": 0.59, "grad_norm": 9.118060111999512, "learning_rate": 7.628673937361654e-06, "loss": 0.6163, "step": 22977 }, { "epoch": 0.59, "grad_norm": 1.3531373739242554, "learning_rate": 7.627867480592952e-06, "loss": 0.6228, "step": 22978 }, { "epoch": 0.59, "grad_norm": 1.9670873880386353, "learning_rate": 7.6270610401714966e-06, "loss": 0.5461, "step": 22979 }, { "epoch": 0.59, "grad_norm": 1.2345163822174072, "learning_rate": 7.6262546161028474e-06, "loss": 0.5367, "step": 22980 }, { "epoch": 0.59, "grad_norm": 0.9510248303413391, "learning_rate": 7.625448208392558e-06, "loss": 0.6091, "step": 22981 }, { "epoch": 0.59, "grad_norm": 3.2958898544311523, "learning_rate": 7.624641817046189e-06, "loss": 0.6076, "step": 22982 }, { "epoch": 0.59, "grad_norm": 1.3737854957580566, "learning_rate": 7.623835442069295e-06, "loss": 0.5404, "step": 22983 }, { "epoch": 0.59, "grad_norm": 1.265509009361267, "learning_rate": 7.6230290834674345e-06, "loss": 0.5684, "step": 22984 }, { "epoch": 0.59, "grad_norm": 2.7012827396392822, "learning_rate": 7.622222741246165e-06, "loss": 0.5231, "step": 22985 }, { "epoch": 0.59, "grad_norm": 7.947667121887207, "learning_rate": 7.6214164154110426e-06, "loss": 0.6376, "step": 22986 }, { "epoch": 0.59, "grad_norm": 1.3206021785736084, "learning_rate": 7.620610105967625e-06, "loss": 0.6854, "step": 22987 }, { "epoch": 0.59, "grad_norm": 5.754129886627197, "learning_rate": 7.619803812921464e-06, "loss": 0.6647, "step": 22988 }, { "epoch": 0.59, "grad_norm": 0.931321382522583, "learning_rate": 7.618997536278125e-06, "loss": 0.4851, "step": 22989 }, { "epoch": 0.59, "grad_norm": 1.4528095722198486, "learning_rate": 7.618191276043154e-06, "loss": 0.4933, "step": 22990 }, { "epoch": 0.59, "grad_norm": 1.754569411277771, "learning_rate": 7.617385032222114e-06, "loss": 0.5792, "step": 22991 }, { "epoch": 0.59, "grad_norm": 3.4859507083892822, "learning_rate": 7.616578804820561e-06, "loss": 0.5688, "step": 22992 }, { "epoch": 0.59, "grad_norm": 1.6469124555587769, "learning_rate": 7.615772593844047e-06, "loss": 0.5595, "step": 22993 }, { "epoch": 0.59, "grad_norm": 5.569589614868164, "learning_rate": 7.614966399298134e-06, "loss": 0.7121, "step": 22994 }, { "epoch": 0.59, "grad_norm": 1.3522229194641113, "learning_rate": 7.61416022118837e-06, "loss": 0.5016, "step": 22995 }, { "epoch": 0.59, "grad_norm": 2.438011407852173, "learning_rate": 7.6133540595203155e-06, "loss": 0.4766, "step": 22996 }, { "epoch": 0.59, "grad_norm": 1.6774563789367676, "learning_rate": 7.612547914299528e-06, "loss": 0.355, "step": 22997 }, { "epoch": 0.59, "grad_norm": 3.058905601501465, "learning_rate": 7.611741785531559e-06, "loss": 0.6023, "step": 22998 }, { "epoch": 0.59, "grad_norm": 3.6921257972717285, "learning_rate": 7.610935673221966e-06, "loss": 0.4885, "step": 22999 }, { "epoch": 0.59, "grad_norm": 13.770489692687988, "learning_rate": 7.610129577376301e-06, "loss": 0.666, "step": 23000 }, { "epoch": 0.59, "grad_norm": 2.565617322921753, "learning_rate": 7.609323498000123e-06, "loss": 0.5361, "step": 23001 }, { "epoch": 0.59, "grad_norm": 1.3085354566574097, "learning_rate": 7.608517435098987e-06, "loss": 0.5608, "step": 23002 }, { "epoch": 0.59, "grad_norm": 1.414385437965393, "learning_rate": 7.607711388678442e-06, "loss": 0.6033, "step": 23003 }, { "epoch": 0.59, "grad_norm": 2.4382970333099365, "learning_rate": 7.6069053587440525e-06, "loss": 0.5042, "step": 23004 }, { "epoch": 0.59, "grad_norm": 1.7682034969329834, "learning_rate": 7.6060993453013645e-06, "loss": 0.6151, "step": 23005 }, { "epoch": 0.59, "grad_norm": 4.529171466827393, "learning_rate": 7.6052933483559375e-06, "loss": 0.6477, "step": 23006 }, { "epoch": 0.59, "grad_norm": 1.4786828756332397, "learning_rate": 7.604487367913323e-06, "loss": 0.6041, "step": 23007 }, { "epoch": 0.59, "grad_norm": 3.0642263889312744, "learning_rate": 7.603681403979075e-06, "loss": 0.5438, "step": 23008 }, { "epoch": 0.59, "grad_norm": 1.9797271490097046, "learning_rate": 7.6028754565587525e-06, "loss": 0.6029, "step": 23009 }, { "epoch": 0.59, "grad_norm": 2.104947328567505, "learning_rate": 7.602069525657904e-06, "loss": 0.4998, "step": 23010 }, { "epoch": 0.59, "grad_norm": 1.698438286781311, "learning_rate": 7.601263611282088e-06, "loss": 0.4581, "step": 23011 }, { "epoch": 0.59, "grad_norm": 4.356664657592773, "learning_rate": 7.600457713436853e-06, "loss": 0.6713, "step": 23012 }, { "epoch": 0.59, "grad_norm": 1.7633328437805176, "learning_rate": 7.599651832127757e-06, "loss": 0.676, "step": 23013 }, { "epoch": 0.59, "grad_norm": 1.0470573902130127, "learning_rate": 7.598845967360355e-06, "loss": 0.5183, "step": 23014 }, { "epoch": 0.59, "grad_norm": 5.86364221572876, "learning_rate": 7.598040119140195e-06, "loss": 0.8014, "step": 23015 }, { "epoch": 0.59, "grad_norm": 1.543338656425476, "learning_rate": 7.597234287472837e-06, "loss": 0.3734, "step": 23016 }, { "epoch": 0.59, "grad_norm": 1.6750028133392334, "learning_rate": 7.596428472363827e-06, "loss": 0.6345, "step": 23017 }, { "epoch": 0.59, "grad_norm": 5.652486324310303, "learning_rate": 7.595622673818724e-06, "loss": 0.6835, "step": 23018 }, { "epoch": 0.59, "grad_norm": 16.679920196533203, "learning_rate": 7.594816891843077e-06, "loss": 0.6373, "step": 23019 }, { "epoch": 0.59, "grad_norm": 5.450803756713867, "learning_rate": 7.594011126442442e-06, "loss": 0.614, "step": 23020 }, { "epoch": 0.59, "grad_norm": 1.9190059900283813, "learning_rate": 7.593205377622373e-06, "loss": 0.3728, "step": 23021 }, { "epoch": 0.59, "grad_norm": 8.777201652526855, "learning_rate": 7.592399645388418e-06, "loss": 0.4874, "step": 23022 }, { "epoch": 0.59, "grad_norm": 1.2530577182769775, "learning_rate": 7.5915939297461325e-06, "loss": 0.5794, "step": 23023 }, { "epoch": 0.59, "grad_norm": 9.326350212097168, "learning_rate": 7.590788230701067e-06, "loss": 0.6452, "step": 23024 }, { "epoch": 0.59, "grad_norm": 2.4691267013549805, "learning_rate": 7.589982548258776e-06, "loss": 0.5493, "step": 23025 }, { "epoch": 0.59, "grad_norm": 2.587125539779663, "learning_rate": 7.589176882424813e-06, "loss": 0.5426, "step": 23026 }, { "epoch": 0.59, "grad_norm": 1.3219995498657227, "learning_rate": 7.588371233204728e-06, "loss": 0.4259, "step": 23027 }, { "epoch": 0.59, "grad_norm": 1.0102678537368774, "learning_rate": 7.587565600604073e-06, "loss": 0.4066, "step": 23028 }, { "epoch": 0.59, "grad_norm": 6.193108558654785, "learning_rate": 7.586759984628399e-06, "loss": 0.7249, "step": 23029 }, { "epoch": 0.59, "grad_norm": 4.038420677185059, "learning_rate": 7.58595438528326e-06, "loss": 0.7262, "step": 23030 }, { "epoch": 0.59, "grad_norm": 1.3394900560379028, "learning_rate": 7.585148802574207e-06, "loss": 0.4557, "step": 23031 }, { "epoch": 0.59, "grad_norm": 1.6520124673843384, "learning_rate": 7.58434323650679e-06, "loss": 0.6772, "step": 23032 }, { "epoch": 0.59, "grad_norm": 1.6564608812332153, "learning_rate": 7.583537687086564e-06, "loss": 0.4814, "step": 23033 }, { "epoch": 0.59, "grad_norm": 1.5429389476776123, "learning_rate": 7.582732154319075e-06, "loss": 0.6591, "step": 23034 }, { "epoch": 0.59, "grad_norm": 3.418410062789917, "learning_rate": 7.581926638209881e-06, "loss": 0.4789, "step": 23035 }, { "epoch": 0.59, "grad_norm": 9.962236404418945, "learning_rate": 7.581121138764526e-06, "loss": 0.5962, "step": 23036 }, { "epoch": 0.59, "grad_norm": 2.8714420795440674, "learning_rate": 7.580315655988566e-06, "loss": 0.6442, "step": 23037 }, { "epoch": 0.59, "grad_norm": 1.351642370223999, "learning_rate": 7.579510189887551e-06, "loss": 0.5156, "step": 23038 }, { "epoch": 0.59, "grad_norm": 1.4390546083450317, "learning_rate": 7.578704740467029e-06, "loss": 0.626, "step": 23039 }, { "epoch": 0.59, "grad_norm": 1.4899985790252686, "learning_rate": 7.577899307732556e-06, "loss": 0.4039, "step": 23040 }, { "epoch": 0.59, "grad_norm": 1.223107933998108, "learning_rate": 7.577093891689676e-06, "loss": 0.4737, "step": 23041 }, { "epoch": 0.59, "grad_norm": 2.6456315517425537, "learning_rate": 7.576288492343945e-06, "loss": 0.5902, "step": 23042 }, { "epoch": 0.59, "grad_norm": 1.6532191038131714, "learning_rate": 7.57548310970091e-06, "loss": 0.5634, "step": 23043 }, { "epoch": 0.59, "grad_norm": 1.0552140474319458, "learning_rate": 7.574677743766121e-06, "loss": 0.4957, "step": 23044 }, { "epoch": 0.59, "grad_norm": 3.288969039916992, "learning_rate": 7.573872394545131e-06, "loss": 0.7013, "step": 23045 }, { "epoch": 0.59, "grad_norm": 14.812541007995605, "learning_rate": 7.573067062043486e-06, "loss": 0.7098, "step": 23046 }, { "epoch": 0.59, "grad_norm": 1.581520676612854, "learning_rate": 7.572261746266743e-06, "loss": 0.6228, "step": 23047 }, { "epoch": 0.59, "grad_norm": 1.43108332157135, "learning_rate": 7.571456447220441e-06, "loss": 0.5791, "step": 23048 }, { "epoch": 0.59, "grad_norm": 1.4864848852157593, "learning_rate": 7.570651164910137e-06, "loss": 0.5822, "step": 23049 }, { "epoch": 0.59, "grad_norm": 3.0232510566711426, "learning_rate": 7.569845899341382e-06, "loss": 0.5129, "step": 23050 }, { "epoch": 0.59, "grad_norm": 1.6749699115753174, "learning_rate": 7.569040650519719e-06, "loss": 0.5815, "step": 23051 }, { "epoch": 0.59, "grad_norm": 1.1447511911392212, "learning_rate": 7.568235418450702e-06, "loss": 0.5679, "step": 23052 }, { "epoch": 0.59, "grad_norm": 1.4999911785125732, "learning_rate": 7.567430203139878e-06, "loss": 0.3884, "step": 23053 }, { "epoch": 0.59, "grad_norm": 2.9872827529907227, "learning_rate": 7.566625004592796e-06, "loss": 0.5907, "step": 23054 }, { "epoch": 0.59, "grad_norm": 1.7467823028564453, "learning_rate": 7.565819822815008e-06, "loss": 0.434, "step": 23055 }, { "epoch": 0.59, "grad_norm": 1.6477330923080444, "learning_rate": 7.565014657812059e-06, "loss": 0.545, "step": 23056 }, { "epoch": 0.59, "grad_norm": 4.026971817016602, "learning_rate": 7.564209509589499e-06, "loss": 0.5628, "step": 23057 }, { "epoch": 0.59, "grad_norm": 1.988412857055664, "learning_rate": 7.563404378152877e-06, "loss": 0.5382, "step": 23058 }, { "epoch": 0.59, "grad_norm": 1.1015502214431763, "learning_rate": 7.562599263507743e-06, "loss": 0.534, "step": 23059 }, { "epoch": 0.59, "grad_norm": 3.5344395637512207, "learning_rate": 7.5617941656596405e-06, "loss": 0.9353, "step": 23060 }, { "epoch": 0.59, "grad_norm": 4.195814609527588, "learning_rate": 7.560989084614121e-06, "loss": 0.7846, "step": 23061 }, { "epoch": 0.59, "grad_norm": 2.07497501373291, "learning_rate": 7.560184020376735e-06, "loss": 0.5321, "step": 23062 }, { "epoch": 0.59, "grad_norm": 2.1507885456085205, "learning_rate": 7.559378972953027e-06, "loss": 0.4852, "step": 23063 }, { "epoch": 0.59, "grad_norm": 4.616745471954346, "learning_rate": 7.558573942348546e-06, "loss": 0.6607, "step": 23064 }, { "epoch": 0.59, "grad_norm": 1.4228788614273071, "learning_rate": 7.5577689285688375e-06, "loss": 0.6527, "step": 23065 }, { "epoch": 0.59, "grad_norm": 1.6843434572219849, "learning_rate": 7.556963931619451e-06, "loss": 0.6255, "step": 23066 }, { "epoch": 0.59, "grad_norm": 2.787379503250122, "learning_rate": 7.5561589515059375e-06, "loss": 0.7585, "step": 23067 }, { "epoch": 0.59, "grad_norm": 1.5908979177474976, "learning_rate": 7.555353988233838e-06, "loss": 0.456, "step": 23068 }, { "epoch": 0.59, "grad_norm": 1.9392313957214355, "learning_rate": 7.554549041808705e-06, "loss": 0.5252, "step": 23069 }, { "epoch": 0.59, "grad_norm": 1.173266053199768, "learning_rate": 7.553744112236082e-06, "loss": 0.404, "step": 23070 }, { "epoch": 0.59, "grad_norm": 1.9924932718276978, "learning_rate": 7.5529391995215204e-06, "loss": 0.5694, "step": 23071 }, { "epoch": 0.59, "grad_norm": 9.480990409851074, "learning_rate": 7.552134303670562e-06, "loss": 0.6615, "step": 23072 }, { "epoch": 0.59, "grad_norm": 1.3243249654769897, "learning_rate": 7.5513294246887556e-06, "loss": 0.5678, "step": 23073 }, { "epoch": 0.59, "grad_norm": 6.501044750213623, "learning_rate": 7.550524562581651e-06, "loss": 0.7488, "step": 23074 }, { "epoch": 0.59, "grad_norm": 15.485472679138184, "learning_rate": 7.549719717354788e-06, "loss": 0.6304, "step": 23075 }, { "epoch": 0.59, "grad_norm": 1.400801420211792, "learning_rate": 7.548914889013722e-06, "loss": 0.4562, "step": 23076 }, { "epoch": 0.59, "grad_norm": 1.1585474014282227, "learning_rate": 7.5481100775639925e-06, "loss": 0.3783, "step": 23077 }, { "epoch": 0.59, "grad_norm": 1.5399396419525146, "learning_rate": 7.547305283011147e-06, "loss": 0.524, "step": 23078 }, { "epoch": 0.59, "grad_norm": 6.747594833374023, "learning_rate": 7.5465005053607345e-06, "loss": 0.8055, "step": 23079 }, { "epoch": 0.59, "grad_norm": 1.2431026697158813, "learning_rate": 7.545695744618298e-06, "loss": 0.4443, "step": 23080 }, { "epoch": 0.59, "grad_norm": 1.1298719644546509, "learning_rate": 7.544891000789385e-06, "loss": 0.6712, "step": 23081 }, { "epoch": 0.59, "grad_norm": 1.8910748958587646, "learning_rate": 7.5440862738795395e-06, "loss": 0.3357, "step": 23082 }, { "epoch": 0.59, "grad_norm": 4.519070625305176, "learning_rate": 7.543281563894311e-06, "loss": 0.4089, "step": 23083 }, { "epoch": 0.59, "grad_norm": 2.025914430618286, "learning_rate": 7.542476870839239e-06, "loss": 0.492, "step": 23084 }, { "epoch": 0.59, "grad_norm": 2.699622631072998, "learning_rate": 7.5416721947198745e-06, "loss": 0.6459, "step": 23085 }, { "epoch": 0.59, "grad_norm": 3.6575777530670166, "learning_rate": 7.540867535541761e-06, "loss": 0.4708, "step": 23086 }, { "epoch": 0.59, "grad_norm": 0.9230466485023499, "learning_rate": 7.540062893310443e-06, "loss": 0.4066, "step": 23087 }, { "epoch": 0.59, "grad_norm": 1.2424887418746948, "learning_rate": 7.539258268031467e-06, "loss": 0.4317, "step": 23088 }, { "epoch": 0.59, "grad_norm": 1.6784287691116333, "learning_rate": 7.538453659710375e-06, "loss": 0.6062, "step": 23089 }, { "epoch": 0.59, "grad_norm": 2.205687999725342, "learning_rate": 7.537649068352715e-06, "loss": 0.6024, "step": 23090 }, { "epoch": 0.59, "grad_norm": 1.4226163625717163, "learning_rate": 7.536844493964032e-06, "loss": 0.5144, "step": 23091 }, { "epoch": 0.59, "grad_norm": 1.3017202615737915, "learning_rate": 7.536039936549866e-06, "loss": 0.6775, "step": 23092 }, { "epoch": 0.59, "grad_norm": 10.11506175994873, "learning_rate": 7.535235396115768e-06, "loss": 0.6043, "step": 23093 }, { "epoch": 0.59, "grad_norm": 3.8530166149139404, "learning_rate": 7.534430872667278e-06, "loss": 0.5942, "step": 23094 }, { "epoch": 0.59, "grad_norm": 2.0376272201538086, "learning_rate": 7.53362636620994e-06, "loss": 0.5459, "step": 23095 }, { "epoch": 0.59, "grad_norm": 1.5798594951629639, "learning_rate": 7.532821876749301e-06, "loss": 0.4652, "step": 23096 }, { "epoch": 0.59, "grad_norm": 1.7577296495437622, "learning_rate": 7.5320174042909025e-06, "loss": 0.4046, "step": 23097 }, { "epoch": 0.59, "grad_norm": 1.7238342761993408, "learning_rate": 7.531212948840292e-06, "loss": 0.5615, "step": 23098 }, { "epoch": 0.59, "grad_norm": 1.386018991470337, "learning_rate": 7.530408510403008e-06, "loss": 0.5631, "step": 23099 }, { "epoch": 0.59, "grad_norm": 8.747761726379395, "learning_rate": 7.529604088984599e-06, "loss": 0.5341, "step": 23100 }, { "epoch": 0.59, "grad_norm": 2.2210118770599365, "learning_rate": 7.5287996845906055e-06, "loss": 0.603, "step": 23101 }, { "epoch": 0.59, "grad_norm": 2.001396894454956, "learning_rate": 7.527995297226571e-06, "loss": 0.6384, "step": 23102 }, { "epoch": 0.59, "grad_norm": 1.6693757772445679, "learning_rate": 7.527190926898042e-06, "loss": 0.4551, "step": 23103 }, { "epoch": 0.59, "grad_norm": 1.9580025672912598, "learning_rate": 7.526386573610558e-06, "loss": 0.4912, "step": 23104 }, { "epoch": 0.59, "grad_norm": 2.213975191116333, "learning_rate": 7.525582237369665e-06, "loss": 0.6295, "step": 23105 }, { "epoch": 0.59, "grad_norm": 0.8745080232620239, "learning_rate": 7.5247779181809035e-06, "loss": 0.5069, "step": 23106 }, { "epoch": 0.59, "grad_norm": 5.751091957092285, "learning_rate": 7.523973616049817e-06, "loss": 0.6098, "step": 23107 }, { "epoch": 0.59, "grad_norm": 1.2951886653900146, "learning_rate": 7.523169330981951e-06, "loss": 0.6001, "step": 23108 }, { "epoch": 0.59, "grad_norm": 6.645153999328613, "learning_rate": 7.5223650629828435e-06, "loss": 0.5366, "step": 23109 }, { "epoch": 0.59, "grad_norm": 1.3175268173217773, "learning_rate": 7.521560812058042e-06, "loss": 0.549, "step": 23110 }, { "epoch": 0.59, "grad_norm": 1.11328125, "learning_rate": 7.520756578213083e-06, "loss": 0.4991, "step": 23111 }, { "epoch": 0.59, "grad_norm": 1.3912255764007568, "learning_rate": 7.519952361453515e-06, "loss": 0.4246, "step": 23112 }, { "epoch": 0.59, "grad_norm": 1.0809870958328247, "learning_rate": 7.5191481617848745e-06, "loss": 0.5637, "step": 23113 }, { "epoch": 0.59, "grad_norm": 7.007692813873291, "learning_rate": 7.518343979212708e-06, "loss": 0.6973, "step": 23114 }, { "epoch": 0.59, "grad_norm": 1.5697649717330933, "learning_rate": 7.517539813742556e-06, "loss": 0.6002, "step": 23115 }, { "epoch": 0.59, "grad_norm": 1.285324215888977, "learning_rate": 7.516735665379958e-06, "loss": 0.5632, "step": 23116 }, { "epoch": 0.59, "grad_norm": 1.1736401319503784, "learning_rate": 7.515931534130461e-06, "loss": 0.4414, "step": 23117 }, { "epoch": 0.59, "grad_norm": 1.2115017175674438, "learning_rate": 7.5151274199995996e-06, "loss": 0.6364, "step": 23118 }, { "epoch": 0.59, "grad_norm": 1.3657972812652588, "learning_rate": 7.51432332299292e-06, "loss": 0.6689, "step": 23119 }, { "epoch": 0.59, "grad_norm": 1.5516977310180664, "learning_rate": 7.5135192431159645e-06, "loss": 0.4724, "step": 23120 }, { "epoch": 0.59, "grad_norm": 2.4763004779815674, "learning_rate": 7.51271518037427e-06, "loss": 0.6238, "step": 23121 }, { "epoch": 0.59, "grad_norm": 1.1069259643554688, "learning_rate": 7.511911134773381e-06, "loss": 0.5162, "step": 23122 }, { "epoch": 0.59, "grad_norm": 1.2632722854614258, "learning_rate": 7.511107106318836e-06, "loss": 0.4556, "step": 23123 }, { "epoch": 0.59, "grad_norm": 1.6173537969589233, "learning_rate": 7.510303095016178e-06, "loss": 0.5043, "step": 23124 }, { "epoch": 0.59, "grad_norm": 1.7457832098007202, "learning_rate": 7.509499100870948e-06, "loss": 0.61, "step": 23125 }, { "epoch": 0.59, "grad_norm": 1.6846176385879517, "learning_rate": 7.508695123888687e-06, "loss": 0.4873, "step": 23126 }, { "epoch": 0.59, "grad_norm": 1.1226173639297485, "learning_rate": 7.50789116407493e-06, "loss": 0.5741, "step": 23127 }, { "epoch": 0.59, "grad_norm": 1.8927401304244995, "learning_rate": 7.507087221435223e-06, "loss": 0.6356, "step": 23128 }, { "epoch": 0.59, "grad_norm": 4.856530666351318, "learning_rate": 7.506283295975104e-06, "loss": 0.5616, "step": 23129 }, { "epoch": 0.59, "grad_norm": 1.7486392259597778, "learning_rate": 7.505479387700115e-06, "loss": 0.5514, "step": 23130 }, { "epoch": 0.59, "grad_norm": 2.3961710929870605, "learning_rate": 7.504675496615793e-06, "loss": 0.5514, "step": 23131 }, { "epoch": 0.59, "grad_norm": 1.7935367822647095, "learning_rate": 7.503871622727681e-06, "loss": 0.5045, "step": 23132 }, { "epoch": 0.59, "grad_norm": 3.9840519428253174, "learning_rate": 7.503067766041318e-06, "loss": 0.3628, "step": 23133 }, { "epoch": 0.59, "grad_norm": 2.8102011680603027, "learning_rate": 7.502263926562241e-06, "loss": 0.5805, "step": 23134 }, { "epoch": 0.59, "grad_norm": 1.0828896760940552, "learning_rate": 7.501460104295994e-06, "loss": 0.5947, "step": 23135 }, { "epoch": 0.59, "grad_norm": 1.9687652587890625, "learning_rate": 7.500656299248114e-06, "loss": 0.5789, "step": 23136 }, { "epoch": 0.59, "grad_norm": 4.043866157531738, "learning_rate": 7.499852511424139e-06, "loss": 0.6603, "step": 23137 }, { "epoch": 0.59, "grad_norm": 14.563496589660645, "learning_rate": 7.499048740829612e-06, "loss": 0.6441, "step": 23138 }, { "epoch": 0.59, "grad_norm": 0.8339711427688599, "learning_rate": 7.498244987470067e-06, "loss": 0.5022, "step": 23139 }, { "epoch": 0.59, "grad_norm": 1.3523911237716675, "learning_rate": 7.49744125135105e-06, "loss": 0.401, "step": 23140 }, { "epoch": 0.59, "grad_norm": 1.3758344650268555, "learning_rate": 7.496637532478091e-06, "loss": 0.4498, "step": 23141 }, { "epoch": 0.59, "grad_norm": 2.49519681930542, "learning_rate": 7.495833830856737e-06, "loss": 0.6002, "step": 23142 }, { "epoch": 0.59, "grad_norm": 3.193354845046997, "learning_rate": 7.495030146492521e-06, "loss": 0.6575, "step": 23143 }, { "epoch": 0.59, "grad_norm": 1.1328599452972412, "learning_rate": 7.494226479390982e-06, "loss": 0.4961, "step": 23144 }, { "epoch": 0.59, "grad_norm": 6.512454986572266, "learning_rate": 7.493422829557664e-06, "loss": 0.5098, "step": 23145 }, { "epoch": 0.59, "grad_norm": 1.2840427160263062, "learning_rate": 7.492619196998097e-06, "loss": 0.4191, "step": 23146 }, { "epoch": 0.59, "grad_norm": 1.4928866624832153, "learning_rate": 7.491815581717827e-06, "loss": 0.554, "step": 23147 }, { "epoch": 0.59, "grad_norm": 1.6078587770462036, "learning_rate": 7.491011983722385e-06, "loss": 0.5967, "step": 23148 }, { "epoch": 0.59, "grad_norm": 2.0905721187591553, "learning_rate": 7.490208403017312e-06, "loss": 0.4604, "step": 23149 }, { "epoch": 0.59, "grad_norm": 1.426010012626648, "learning_rate": 7.489404839608149e-06, "loss": 0.5875, "step": 23150 }, { "epoch": 0.59, "grad_norm": 1.8338426351547241, "learning_rate": 7.488601293500428e-06, "loss": 0.637, "step": 23151 }, { "epoch": 0.59, "grad_norm": 1.5218478441238403, "learning_rate": 7.48779776469969e-06, "loss": 0.5252, "step": 23152 }, { "epoch": 0.59, "grad_norm": 1.5775610208511353, "learning_rate": 7.48699425321147e-06, "loss": 0.4999, "step": 23153 }, { "epoch": 0.59, "grad_norm": 1.195015549659729, "learning_rate": 7.486190759041309e-06, "loss": 0.4715, "step": 23154 }, { "epoch": 0.59, "grad_norm": 1.801250696182251, "learning_rate": 7.48538728219474e-06, "loss": 0.6075, "step": 23155 }, { "epoch": 0.59, "grad_norm": 2.023517608642578, "learning_rate": 7.484583822677303e-06, "loss": 0.5503, "step": 23156 }, { "epoch": 0.59, "grad_norm": 1.0949413776397705, "learning_rate": 7.483780380494535e-06, "loss": 0.4437, "step": 23157 }, { "epoch": 0.59, "grad_norm": 1.834885597229004, "learning_rate": 7.48297695565197e-06, "loss": 0.634, "step": 23158 }, { "epoch": 0.59, "grad_norm": 1.2458378076553345, "learning_rate": 7.48217354815515e-06, "loss": 0.6694, "step": 23159 }, { "epoch": 0.59, "grad_norm": 1.677139163017273, "learning_rate": 7.481370158009603e-06, "loss": 0.4888, "step": 23160 }, { "epoch": 0.59, "grad_norm": 2.097090721130371, "learning_rate": 7.4805667852208716e-06, "loss": 0.7187, "step": 23161 }, { "epoch": 0.59, "grad_norm": 12.005764961242676, "learning_rate": 7.479763429794493e-06, "loss": 0.9138, "step": 23162 }, { "epoch": 0.59, "grad_norm": 1.5213004350662231, "learning_rate": 7.4789600917360005e-06, "loss": 0.4434, "step": 23163 }, { "epoch": 0.59, "grad_norm": 1.9793083667755127, "learning_rate": 7.478156771050933e-06, "loss": 0.4116, "step": 23164 }, { "epoch": 0.59, "grad_norm": 2.3503129482269287, "learning_rate": 7.477353467744821e-06, "loss": 0.6494, "step": 23165 }, { "epoch": 0.59, "grad_norm": 1.3874197006225586, "learning_rate": 7.476550181823208e-06, "loss": 0.4731, "step": 23166 }, { "epoch": 0.59, "grad_norm": 1.5399706363677979, "learning_rate": 7.475746913291624e-06, "loss": 0.6432, "step": 23167 }, { "epoch": 0.59, "grad_norm": 2.119656562805176, "learning_rate": 7.474943662155605e-06, "loss": 0.5375, "step": 23168 }, { "epoch": 0.59, "grad_norm": 3.973679542541504, "learning_rate": 7.47414042842069e-06, "loss": 0.5642, "step": 23169 }, { "epoch": 0.59, "grad_norm": 2.676715850830078, "learning_rate": 7.47333721209241e-06, "loss": 0.5562, "step": 23170 }, { "epoch": 0.59, "grad_norm": 2.1004676818847656, "learning_rate": 7.4725340131763045e-06, "loss": 0.6479, "step": 23171 }, { "epoch": 0.59, "grad_norm": 2.040513515472412, "learning_rate": 7.4717308316779056e-06, "loss": 0.6574, "step": 23172 }, { "epoch": 0.59, "grad_norm": 1.2180430889129639, "learning_rate": 7.47092766760275e-06, "loss": 0.5345, "step": 23173 }, { "epoch": 0.59, "grad_norm": 2.9675891399383545, "learning_rate": 7.470124520956372e-06, "loss": 0.4185, "step": 23174 }, { "epoch": 0.59, "grad_norm": 1.684954285621643, "learning_rate": 7.4693213917443065e-06, "loss": 0.5725, "step": 23175 }, { "epoch": 0.59, "grad_norm": 13.073729515075684, "learning_rate": 7.468518279972088e-06, "loss": 0.6608, "step": 23176 }, { "epoch": 0.59, "grad_norm": 1.3861620426177979, "learning_rate": 7.46771518564525e-06, "loss": 0.5165, "step": 23177 }, { "epoch": 0.59, "grad_norm": 1.4931929111480713, "learning_rate": 7.466912108769331e-06, "loss": 0.5455, "step": 23178 }, { "epoch": 0.59, "grad_norm": 2.221883773803711, "learning_rate": 7.466109049349859e-06, "loss": 0.4556, "step": 23179 }, { "epoch": 0.59, "grad_norm": 7.502220153808594, "learning_rate": 7.465306007392371e-06, "loss": 0.6704, "step": 23180 }, { "epoch": 0.59, "grad_norm": 1.7220808267593384, "learning_rate": 7.464502982902404e-06, "loss": 0.6331, "step": 23181 }, { "epoch": 0.59, "grad_norm": 1.253177285194397, "learning_rate": 7.463699975885488e-06, "loss": 0.5427, "step": 23182 }, { "epoch": 0.59, "grad_norm": 1.001551866531372, "learning_rate": 7.4628969863471616e-06, "loss": 0.483, "step": 23183 }, { "epoch": 0.59, "grad_norm": 1.69522225856781, "learning_rate": 7.462094014292951e-06, "loss": 0.4589, "step": 23184 }, { "epoch": 0.59, "grad_norm": 1.982110619544983, "learning_rate": 7.461291059728395e-06, "loss": 0.591, "step": 23185 }, { "epoch": 0.59, "grad_norm": 4.0810370445251465, "learning_rate": 7.460488122659026e-06, "loss": 0.5294, "step": 23186 }, { "epoch": 0.59, "grad_norm": 2.2152068614959717, "learning_rate": 7.459685203090379e-06, "loss": 0.6224, "step": 23187 }, { "epoch": 0.59, "grad_norm": 1.2541592121124268, "learning_rate": 7.458882301027985e-06, "loss": 0.5435, "step": 23188 }, { "epoch": 0.59, "grad_norm": 7.801128387451172, "learning_rate": 7.458079416477377e-06, "loss": 0.6556, "step": 23189 }, { "epoch": 0.59, "grad_norm": 6.905296802520752, "learning_rate": 7.457276549444088e-06, "loss": 0.5869, "step": 23190 }, { "epoch": 0.59, "grad_norm": 1.458196997642517, "learning_rate": 7.456473699933653e-06, "loss": 0.5313, "step": 23191 }, { "epoch": 0.59, "grad_norm": 1.0321402549743652, "learning_rate": 7.4556708679516025e-06, "loss": 0.3173, "step": 23192 }, { "epoch": 0.59, "grad_norm": 1.961878776550293, "learning_rate": 7.454868053503471e-06, "loss": 0.539, "step": 23193 }, { "epoch": 0.59, "grad_norm": 1.5276271104812622, "learning_rate": 7.454065256594788e-06, "loss": 0.6785, "step": 23194 }, { "epoch": 0.59, "grad_norm": 1.0870767831802368, "learning_rate": 7.4532624772310905e-06, "loss": 0.526, "step": 23195 }, { "epoch": 0.59, "grad_norm": 1.6702278852462769, "learning_rate": 7.452459715417905e-06, "loss": 0.5563, "step": 23196 }, { "epoch": 0.59, "grad_norm": 2.082402229309082, "learning_rate": 7.451656971160767e-06, "loss": 0.6501, "step": 23197 }, { "epoch": 0.59, "grad_norm": 1.5698093175888062, "learning_rate": 7.45085424446521e-06, "loss": 0.5966, "step": 23198 }, { "epoch": 0.59, "grad_norm": 1.6756325960159302, "learning_rate": 7.450051535336762e-06, "loss": 0.5537, "step": 23199 }, { "epoch": 0.59, "grad_norm": 1.5615990161895752, "learning_rate": 7.4492488437809585e-06, "loss": 0.5073, "step": 23200 }, { "epoch": 0.59, "grad_norm": 1.8121105432510376, "learning_rate": 7.448446169803328e-06, "loss": 0.5191, "step": 23201 }, { "epoch": 0.59, "grad_norm": 1.5536750555038452, "learning_rate": 7.4476435134094035e-06, "loss": 0.5572, "step": 23202 }, { "epoch": 0.59, "grad_norm": 1.7193868160247803, "learning_rate": 7.446840874604718e-06, "loss": 0.6031, "step": 23203 }, { "epoch": 0.59, "grad_norm": 1.1441187858581543, "learning_rate": 7.4460382533948e-06, "loss": 0.5318, "step": 23204 }, { "epoch": 0.59, "grad_norm": 1.328139066696167, "learning_rate": 7.445235649785183e-06, "loss": 0.6145, "step": 23205 }, { "epoch": 0.59, "grad_norm": 1.920059323310852, "learning_rate": 7.444433063781394e-06, "loss": 0.4779, "step": 23206 }, { "epoch": 0.59, "grad_norm": 1.930627703666687, "learning_rate": 7.443630495388971e-06, "loss": 0.6959, "step": 23207 }, { "epoch": 0.59, "grad_norm": 1.3568227291107178, "learning_rate": 7.442827944613436e-06, "loss": 0.4132, "step": 23208 }, { "epoch": 0.59, "grad_norm": 1.7449777126312256, "learning_rate": 7.442025411460325e-06, "loss": 0.5771, "step": 23209 }, { "epoch": 0.59, "grad_norm": 1.8180397748947144, "learning_rate": 7.4412228959351715e-06, "loss": 0.4804, "step": 23210 }, { "epoch": 0.59, "grad_norm": 2.1072802543640137, "learning_rate": 7.440420398043499e-06, "loss": 0.6514, "step": 23211 }, { "epoch": 0.59, "grad_norm": 4.828705310821533, "learning_rate": 7.439617917790842e-06, "loss": 0.6764, "step": 23212 }, { "epoch": 0.59, "grad_norm": 1.5019561052322388, "learning_rate": 7.438815455182729e-06, "loss": 0.6485, "step": 23213 }, { "epoch": 0.59, "grad_norm": 1.9289753437042236, "learning_rate": 7.438013010224692e-06, "loss": 0.6694, "step": 23214 }, { "epoch": 0.6, "grad_norm": 7.669424533843994, "learning_rate": 7.43721058292226e-06, "loss": 0.6465, "step": 23215 }, { "epoch": 0.6, "grad_norm": 19.77705192565918, "learning_rate": 7.43640817328096e-06, "loss": 0.5677, "step": 23216 }, { "epoch": 0.6, "grad_norm": 1.9304659366607666, "learning_rate": 7.435605781306328e-06, "loss": 0.6247, "step": 23217 }, { "epoch": 0.6, "grad_norm": 7.19829797744751, "learning_rate": 7.434803407003886e-06, "loss": 0.5093, "step": 23218 }, { "epoch": 0.6, "grad_norm": 3.389981508255005, "learning_rate": 7.434001050379171e-06, "loss": 0.7687, "step": 23219 }, { "epoch": 0.6, "grad_norm": 1.189618706703186, "learning_rate": 7.4331987114377055e-06, "loss": 0.5818, "step": 23220 }, { "epoch": 0.6, "grad_norm": 1.0653055906295776, "learning_rate": 7.432396390185022e-06, "loss": 0.5616, "step": 23221 }, { "epoch": 0.6, "grad_norm": 4.075242042541504, "learning_rate": 7.431594086626652e-06, "loss": 0.6428, "step": 23222 }, { "epoch": 0.6, "grad_norm": 1.122536540031433, "learning_rate": 7.43079180076812e-06, "loss": 0.5584, "step": 23223 }, { "epoch": 0.6, "grad_norm": 1.5261136293411255, "learning_rate": 7.429989532614958e-06, "loss": 0.6148, "step": 23224 }, { "epoch": 0.6, "grad_norm": 1.5149914026260376, "learning_rate": 7.429187282172692e-06, "loss": 0.5402, "step": 23225 }, { "epoch": 0.6, "grad_norm": 1.468453288078308, "learning_rate": 7.4283850494468516e-06, "loss": 0.5828, "step": 23226 }, { "epoch": 0.6, "grad_norm": 5.2919158935546875, "learning_rate": 7.427582834442968e-06, "loss": 0.4175, "step": 23227 }, { "epoch": 0.6, "grad_norm": 1.3228360414505005, "learning_rate": 7.426780637166566e-06, "loss": 0.4528, "step": 23228 }, { "epoch": 0.6, "grad_norm": 1.4555375576019287, "learning_rate": 7.425978457623177e-06, "loss": 0.3731, "step": 23229 }, { "epoch": 0.6, "grad_norm": 1.4064916372299194, "learning_rate": 7.425176295818325e-06, "loss": 0.4809, "step": 23230 }, { "epoch": 0.6, "grad_norm": 0.952286958694458, "learning_rate": 7.424374151757541e-06, "loss": 0.453, "step": 23231 }, { "epoch": 0.6, "grad_norm": 2.328054666519165, "learning_rate": 7.4235720254463525e-06, "loss": 0.5654, "step": 23232 }, { "epoch": 0.6, "grad_norm": 2.2140281200408936, "learning_rate": 7.422769916890287e-06, "loss": 0.7002, "step": 23233 }, { "epoch": 0.6, "grad_norm": 3.253857374191284, "learning_rate": 7.421967826094872e-06, "loss": 0.6897, "step": 23234 }, { "epoch": 0.6, "grad_norm": 1.362776279449463, "learning_rate": 7.421165753065635e-06, "loss": 0.5871, "step": 23235 }, { "epoch": 0.6, "grad_norm": 1.2958403825759888, "learning_rate": 7.420363697808104e-06, "loss": 0.4686, "step": 23236 }, { "epoch": 0.6, "grad_norm": 5.185765266418457, "learning_rate": 7.419561660327805e-06, "loss": 0.6186, "step": 23237 }, { "epoch": 0.6, "grad_norm": 5.807377815246582, "learning_rate": 7.418759640630264e-06, "loss": 0.6813, "step": 23238 }, { "epoch": 0.6, "grad_norm": 4.51445198059082, "learning_rate": 7.417957638721014e-06, "loss": 0.5919, "step": 23239 }, { "epoch": 0.6, "grad_norm": 1.8617784976959229, "learning_rate": 7.4171556546055745e-06, "loss": 0.543, "step": 23240 }, { "epoch": 0.6, "grad_norm": 1.298466444015503, "learning_rate": 7.416353688289477e-06, "loss": 0.4842, "step": 23241 }, { "epoch": 0.6, "grad_norm": 2.4392359256744385, "learning_rate": 7.415551739778246e-06, "loss": 0.6806, "step": 23242 }, { "epoch": 0.6, "grad_norm": 3.929603338241577, "learning_rate": 7.414749809077409e-06, "loss": 0.5934, "step": 23243 }, { "epoch": 0.6, "grad_norm": 4.660943984985352, "learning_rate": 7.413947896192493e-06, "loss": 0.5004, "step": 23244 }, { "epoch": 0.6, "grad_norm": 1.3688722848892212, "learning_rate": 7.4131460011290215e-06, "loss": 0.4513, "step": 23245 }, { "epoch": 0.6, "grad_norm": 1.7262651920318604, "learning_rate": 7.412344123892523e-06, "loss": 0.5914, "step": 23246 }, { "epoch": 0.6, "grad_norm": 2.3039393424987793, "learning_rate": 7.411542264488523e-06, "loss": 0.453, "step": 23247 }, { "epoch": 0.6, "grad_norm": 1.9009665250778198, "learning_rate": 7.41074042292255e-06, "loss": 0.5563, "step": 23248 }, { "epoch": 0.6, "grad_norm": 1.875894546508789, "learning_rate": 7.409938599200124e-06, "loss": 0.5851, "step": 23249 }, { "epoch": 0.6, "grad_norm": 1.8797415494918823, "learning_rate": 7.409136793326774e-06, "loss": 0.563, "step": 23250 }, { "epoch": 0.6, "grad_norm": 1.7593475580215454, "learning_rate": 7.408335005308028e-06, "loss": 0.6053, "step": 23251 }, { "epoch": 0.6, "grad_norm": 11.433080673217773, "learning_rate": 7.407533235149407e-06, "loss": 0.6129, "step": 23252 }, { "epoch": 0.6, "grad_norm": 3.887662649154663, "learning_rate": 7.4067314828564395e-06, "loss": 0.5221, "step": 23253 }, { "epoch": 0.6, "grad_norm": 2.401578903198242, "learning_rate": 7.405929748434648e-06, "loss": 0.6874, "step": 23254 }, { "epoch": 0.6, "grad_norm": 13.671636581420898, "learning_rate": 7.405128031889559e-06, "loss": 0.6739, "step": 23255 }, { "epoch": 0.6, "grad_norm": 1.449662685394287, "learning_rate": 7.404326333226699e-06, "loss": 0.5488, "step": 23256 }, { "epoch": 0.6, "grad_norm": 1.120103120803833, "learning_rate": 7.40352465245159e-06, "loss": 0.5742, "step": 23257 }, { "epoch": 0.6, "grad_norm": 0.9097456932067871, "learning_rate": 7.40272298956976e-06, "loss": 0.523, "step": 23258 }, { "epoch": 0.6, "grad_norm": 2.3769311904907227, "learning_rate": 7.401921344586727e-06, "loss": 0.454, "step": 23259 }, { "epoch": 0.6, "grad_norm": 1.7035261392593384, "learning_rate": 7.401119717508023e-06, "loss": 0.5533, "step": 23260 }, { "epoch": 0.6, "grad_norm": 1.9907640218734741, "learning_rate": 7.400318108339168e-06, "loss": 0.6025, "step": 23261 }, { "epoch": 0.6, "grad_norm": 1.5271596908569336, "learning_rate": 7.399516517085687e-06, "loss": 0.5777, "step": 23262 }, { "epoch": 0.6, "grad_norm": 1.4717289209365845, "learning_rate": 7.398714943753106e-06, "loss": 0.4891, "step": 23263 }, { "epoch": 0.6, "grad_norm": 1.8228693008422852, "learning_rate": 7.397913388346945e-06, "loss": 0.4945, "step": 23264 }, { "epoch": 0.6, "grad_norm": 3.1687681674957275, "learning_rate": 7.397111850872734e-06, "loss": 0.5879, "step": 23265 }, { "epoch": 0.6, "grad_norm": 1.5663715600967407, "learning_rate": 7.3963103313359884e-06, "loss": 0.5469, "step": 23266 }, { "epoch": 0.6, "grad_norm": 1.102972149848938, "learning_rate": 7.3955088297422375e-06, "loss": 0.3953, "step": 23267 }, { "epoch": 0.6, "grad_norm": 1.5078362226486206, "learning_rate": 7.3947073460970055e-06, "loss": 0.6426, "step": 23268 }, { "epoch": 0.6, "grad_norm": 2.029512643814087, "learning_rate": 7.393905880405811e-06, "loss": 0.6604, "step": 23269 }, { "epoch": 0.6, "grad_norm": 1.430885910987854, "learning_rate": 7.393104432674182e-06, "loss": 0.5421, "step": 23270 }, { "epoch": 0.6, "grad_norm": 0.9687708616256714, "learning_rate": 7.392303002907637e-06, "loss": 0.5104, "step": 23271 }, { "epoch": 0.6, "grad_norm": 2.339404821395874, "learning_rate": 7.391501591111705e-06, "loss": 0.5949, "step": 23272 }, { "epoch": 0.6, "grad_norm": 1.4472283124923706, "learning_rate": 7.390700197291902e-06, "loss": 0.583, "step": 23273 }, { "epoch": 0.6, "grad_norm": 1.6299352645874023, "learning_rate": 7.389898821453754e-06, "loss": 0.5898, "step": 23274 }, { "epoch": 0.6, "grad_norm": 6.131813049316406, "learning_rate": 7.389097463602786e-06, "loss": 0.9113, "step": 23275 }, { "epoch": 0.6, "grad_norm": 1.3116331100463867, "learning_rate": 7.388296123744516e-06, "loss": 0.5478, "step": 23276 }, { "epoch": 0.6, "grad_norm": 5.200634479522705, "learning_rate": 7.387494801884469e-06, "loss": 0.4411, "step": 23277 }, { "epoch": 0.6, "grad_norm": 9.583003044128418, "learning_rate": 7.386693498028165e-06, "loss": 0.5578, "step": 23278 }, { "epoch": 0.6, "grad_norm": 1.9626929759979248, "learning_rate": 7.385892212181129e-06, "loss": 0.4797, "step": 23279 }, { "epoch": 0.6, "grad_norm": 2.6847500801086426, "learning_rate": 7.385090944348883e-06, "loss": 0.551, "step": 23280 }, { "epoch": 0.6, "grad_norm": 11.513999938964844, "learning_rate": 7.384289694536943e-06, "loss": 0.5715, "step": 23281 }, { "epoch": 0.6, "grad_norm": 1.260812759399414, "learning_rate": 7.38348846275084e-06, "loss": 0.5123, "step": 23282 }, { "epoch": 0.6, "grad_norm": 1.3917511701583862, "learning_rate": 7.382687248996088e-06, "loss": 0.5548, "step": 23283 }, { "epoch": 0.6, "grad_norm": 2.1943514347076416, "learning_rate": 7.38188605327821e-06, "loss": 0.4016, "step": 23284 }, { "epoch": 0.6, "grad_norm": 1.4526973962783813, "learning_rate": 7.381084875602731e-06, "loss": 0.5933, "step": 23285 }, { "epoch": 0.6, "grad_norm": 3.994603395462036, "learning_rate": 7.380283715975168e-06, "loss": 0.7273, "step": 23286 }, { "epoch": 0.6, "grad_norm": 1.225334644317627, "learning_rate": 7.3794825744010455e-06, "loss": 0.5771, "step": 23287 }, { "epoch": 0.6, "grad_norm": 1.0173896551132202, "learning_rate": 7.378681450885881e-06, "loss": 0.4904, "step": 23288 }, { "epoch": 0.6, "grad_norm": 7.108945369720459, "learning_rate": 7.377880345435199e-06, "loss": 0.6448, "step": 23289 }, { "epoch": 0.6, "grad_norm": 1.20294189453125, "learning_rate": 7.377079258054516e-06, "loss": 0.5884, "step": 23290 }, { "epoch": 0.6, "grad_norm": 1.8321073055267334, "learning_rate": 7.376278188749355e-06, "loss": 0.6119, "step": 23291 }, { "epoch": 0.6, "grad_norm": 2.236840009689331, "learning_rate": 7.375477137525239e-06, "loss": 0.4326, "step": 23292 }, { "epoch": 0.6, "grad_norm": 1.4362887144088745, "learning_rate": 7.374676104387682e-06, "loss": 0.5271, "step": 23293 }, { "epoch": 0.6, "grad_norm": 2.393048048019409, "learning_rate": 7.373875089342212e-06, "loss": 0.6698, "step": 23294 }, { "epoch": 0.6, "grad_norm": 4.141578674316406, "learning_rate": 7.3730740923943415e-06, "loss": 0.4702, "step": 23295 }, { "epoch": 0.6, "grad_norm": 1.334916591644287, "learning_rate": 7.372273113549595e-06, "loss": 0.518, "step": 23296 }, { "epoch": 0.6, "grad_norm": 2.6069564819335938, "learning_rate": 7.371472152813492e-06, "loss": 0.7154, "step": 23297 }, { "epoch": 0.6, "grad_norm": 1.3885685205459595, "learning_rate": 7.370671210191551e-06, "loss": 0.4354, "step": 23298 }, { "epoch": 0.6, "grad_norm": 0.8958437442779541, "learning_rate": 7.3698702856892934e-06, "loss": 0.5672, "step": 23299 }, { "epoch": 0.6, "grad_norm": 9.016650199890137, "learning_rate": 7.3690693793122344e-06, "loss": 0.4514, "step": 23300 }, { "epoch": 0.6, "grad_norm": 1.8164901733398438, "learning_rate": 7.3682684910658996e-06, "loss": 0.5246, "step": 23301 }, { "epoch": 0.6, "grad_norm": 1.5767796039581299, "learning_rate": 7.367467620955803e-06, "loss": 0.5395, "step": 23302 }, { "epoch": 0.6, "grad_norm": 3.1511518955230713, "learning_rate": 7.366666768987465e-06, "loss": 0.6883, "step": 23303 }, { "epoch": 0.6, "grad_norm": 1.8549643754959106, "learning_rate": 7.365865935166407e-06, "loss": 0.5928, "step": 23304 }, { "epoch": 0.6, "grad_norm": 1.8233319520950317, "learning_rate": 7.3650651194981445e-06, "loss": 0.5243, "step": 23305 }, { "epoch": 0.6, "grad_norm": 1.1390496492385864, "learning_rate": 7.364264321988199e-06, "loss": 0.5387, "step": 23306 }, { "epoch": 0.6, "grad_norm": 2.38535213470459, "learning_rate": 7.363463542642087e-06, "loss": 0.6897, "step": 23307 }, { "epoch": 0.6, "grad_norm": 1.7937239408493042, "learning_rate": 7.362662781465329e-06, "loss": 0.4419, "step": 23308 }, { "epoch": 0.6, "grad_norm": 1.344982385635376, "learning_rate": 7.361862038463441e-06, "loss": 0.5523, "step": 23309 }, { "epoch": 0.6, "grad_norm": 2.337160348892212, "learning_rate": 7.361061313641943e-06, "loss": 0.5605, "step": 23310 }, { "epoch": 0.6, "grad_norm": 1.1005924940109253, "learning_rate": 7.360260607006353e-06, "loss": 0.5141, "step": 23311 }, { "epoch": 0.6, "grad_norm": 1.1484218835830688, "learning_rate": 7.359459918562188e-06, "loss": 0.4552, "step": 23312 }, { "epoch": 0.6, "grad_norm": 1.3049558401107788, "learning_rate": 7.358659248314967e-06, "loss": 0.603, "step": 23313 }, { "epoch": 0.6, "grad_norm": 1.8139220476150513, "learning_rate": 7.357858596270204e-06, "loss": 0.6605, "step": 23314 }, { "epoch": 0.6, "grad_norm": 5.239174842834473, "learning_rate": 7.357057962433422e-06, "loss": 0.6135, "step": 23315 }, { "epoch": 0.6, "grad_norm": 1.5981565713882446, "learning_rate": 7.3562573468101365e-06, "loss": 0.5059, "step": 23316 }, { "epoch": 0.6, "grad_norm": 13.336993217468262, "learning_rate": 7.3554567494058624e-06, "loss": 0.6453, "step": 23317 }, { "epoch": 0.6, "grad_norm": 1.3445104360580444, "learning_rate": 7.354656170226122e-06, "loss": 0.5849, "step": 23318 }, { "epoch": 0.6, "grad_norm": 3.6020116806030273, "learning_rate": 7.353855609276426e-06, "loss": 0.6106, "step": 23319 }, { "epoch": 0.6, "grad_norm": 1.796279788017273, "learning_rate": 7.353055066562295e-06, "loss": 0.4986, "step": 23320 }, { "epoch": 0.6, "grad_norm": 1.6845951080322266, "learning_rate": 7.352254542089248e-06, "loss": 0.7221, "step": 23321 }, { "epoch": 0.6, "grad_norm": 6.621196269989014, "learning_rate": 7.351454035862797e-06, "loss": 0.8214, "step": 23322 }, { "epoch": 0.6, "grad_norm": 1.5089353322982788, "learning_rate": 7.350653547888463e-06, "loss": 0.6385, "step": 23323 }, { "epoch": 0.6, "grad_norm": 1.888420581817627, "learning_rate": 7.349853078171758e-06, "loss": 0.5584, "step": 23324 }, { "epoch": 0.6, "grad_norm": 2.2610926628112793, "learning_rate": 7.349052626718203e-06, "loss": 0.5131, "step": 23325 }, { "epoch": 0.6, "grad_norm": 2.4620931148529053, "learning_rate": 7.34825219353331e-06, "loss": 0.5059, "step": 23326 }, { "epoch": 0.6, "grad_norm": 2.1456949710845947, "learning_rate": 7.347451778622596e-06, "loss": 0.601, "step": 23327 }, { "epoch": 0.6, "grad_norm": 1.5102043151855469, "learning_rate": 7.346651381991581e-06, "loss": 0.5581, "step": 23328 }, { "epoch": 0.6, "grad_norm": 4.717879295349121, "learning_rate": 7.345851003645774e-06, "loss": 0.7666, "step": 23329 }, { "epoch": 0.6, "grad_norm": 2.688467502593994, "learning_rate": 7.345050643590698e-06, "loss": 0.5796, "step": 23330 }, { "epoch": 0.6, "grad_norm": 0.9671322703361511, "learning_rate": 7.344250301831861e-06, "loss": 0.6345, "step": 23331 }, { "epoch": 0.6, "grad_norm": 6.7691521644592285, "learning_rate": 7.343449978374784e-06, "loss": 0.4881, "step": 23332 }, { "epoch": 0.6, "grad_norm": 5.254200458526611, "learning_rate": 7.342649673224983e-06, "loss": 0.6443, "step": 23333 }, { "epoch": 0.6, "grad_norm": 1.7167431116104126, "learning_rate": 7.341849386387968e-06, "loss": 0.5129, "step": 23334 }, { "epoch": 0.6, "grad_norm": 7.333148956298828, "learning_rate": 7.34104911786926e-06, "loss": 0.6262, "step": 23335 }, { "epoch": 0.6, "grad_norm": 2.2572782039642334, "learning_rate": 7.340248867674369e-06, "loss": 0.7148, "step": 23336 }, { "epoch": 0.6, "grad_norm": 0.816932737827301, "learning_rate": 7.3394486358088105e-06, "loss": 0.501, "step": 23337 }, { "epoch": 0.6, "grad_norm": 4.729573726654053, "learning_rate": 7.338648422278103e-06, "loss": 0.5946, "step": 23338 }, { "epoch": 0.6, "grad_norm": 1.032600998878479, "learning_rate": 7.337848227087756e-06, "loss": 0.4709, "step": 23339 }, { "epoch": 0.6, "grad_norm": 3.077671766281128, "learning_rate": 7.33704805024329e-06, "loss": 0.5228, "step": 23340 }, { "epoch": 0.6, "grad_norm": 2.677639961242676, "learning_rate": 7.336247891750212e-06, "loss": 0.5819, "step": 23341 }, { "epoch": 0.6, "grad_norm": 1.4774250984191895, "learning_rate": 7.335447751614044e-06, "loss": 0.4293, "step": 23342 }, { "epoch": 0.6, "grad_norm": 1.101670265197754, "learning_rate": 7.334647629840292e-06, "loss": 0.3694, "step": 23343 }, { "epoch": 0.6, "grad_norm": 1.9199875593185425, "learning_rate": 7.333847526434475e-06, "loss": 0.6144, "step": 23344 }, { "epoch": 0.6, "grad_norm": 1.3083949089050293, "learning_rate": 7.333047441402108e-06, "loss": 0.5858, "step": 23345 }, { "epoch": 0.6, "grad_norm": 1.5320048332214355, "learning_rate": 7.3322473747487e-06, "loss": 0.5482, "step": 23346 }, { "epoch": 0.6, "grad_norm": 5.656673431396484, "learning_rate": 7.33144732647977e-06, "loss": 0.4173, "step": 23347 }, { "epoch": 0.6, "grad_norm": 1.3028002977371216, "learning_rate": 7.330647296600826e-06, "loss": 0.429, "step": 23348 }, { "epoch": 0.6, "grad_norm": 3.1481971740722656, "learning_rate": 7.329847285117384e-06, "loss": 0.8134, "step": 23349 }, { "epoch": 0.6, "grad_norm": 1.8235071897506714, "learning_rate": 7.329047292034958e-06, "loss": 0.5421, "step": 23350 }, { "epoch": 0.6, "grad_norm": 1.3283724784851074, "learning_rate": 7.328247317359058e-06, "loss": 0.522, "step": 23351 }, { "epoch": 0.6, "grad_norm": 9.921801567077637, "learning_rate": 7.327447361095203e-06, "loss": 0.4923, "step": 23352 }, { "epoch": 0.6, "grad_norm": 1.9075415134429932, "learning_rate": 7.326647423248898e-06, "loss": 0.6078, "step": 23353 }, { "epoch": 0.6, "grad_norm": 4.343696594238281, "learning_rate": 7.32584750382566e-06, "loss": 0.6186, "step": 23354 }, { "epoch": 0.6, "grad_norm": 1.5211293697357178, "learning_rate": 7.325047602831001e-06, "loss": 0.5465, "step": 23355 }, { "epoch": 0.6, "grad_norm": 2.493835687637329, "learning_rate": 7.324247720270434e-06, "loss": 0.4886, "step": 23356 }, { "epoch": 0.6, "grad_norm": 0.9729092717170715, "learning_rate": 7.323447856149472e-06, "loss": 0.5437, "step": 23357 }, { "epoch": 0.6, "grad_norm": 3.5291526317596436, "learning_rate": 7.322648010473623e-06, "loss": 0.4408, "step": 23358 }, { "epoch": 0.6, "grad_norm": 1.2134226560592651, "learning_rate": 7.321848183248403e-06, "loss": 0.6301, "step": 23359 }, { "epoch": 0.6, "grad_norm": 1.8464233875274658, "learning_rate": 7.321048374479323e-06, "loss": 0.7806, "step": 23360 }, { "epoch": 0.6, "grad_norm": 2.2780425548553467, "learning_rate": 7.3202485841718925e-06, "loss": 0.4142, "step": 23361 }, { "epoch": 0.6, "grad_norm": 1.3033270835876465, "learning_rate": 7.319448812331629e-06, "loss": 0.4542, "step": 23362 }, { "epoch": 0.6, "grad_norm": 1.1154640913009644, "learning_rate": 7.3186490589640366e-06, "loss": 0.4193, "step": 23363 }, { "epoch": 0.6, "grad_norm": 2.029878854751587, "learning_rate": 7.317849324074633e-06, "loss": 0.4308, "step": 23364 }, { "epoch": 0.6, "grad_norm": 3.901522159576416, "learning_rate": 7.317049607668924e-06, "loss": 0.6525, "step": 23365 }, { "epoch": 0.6, "grad_norm": 1.5398551225662231, "learning_rate": 7.316249909752428e-06, "loss": 0.6372, "step": 23366 }, { "epoch": 0.6, "grad_norm": 1.1449710130691528, "learning_rate": 7.3154502303306475e-06, "loss": 0.5855, "step": 23367 }, { "epoch": 0.6, "grad_norm": 1.2782437801361084, "learning_rate": 7.314650569409102e-06, "loss": 0.4768, "step": 23368 }, { "epoch": 0.6, "grad_norm": 1.1895053386688232, "learning_rate": 7.313850926993293e-06, "loss": 0.5722, "step": 23369 }, { "epoch": 0.6, "grad_norm": 1.4960893392562866, "learning_rate": 7.313051303088738e-06, "loss": 0.5018, "step": 23370 }, { "epoch": 0.6, "grad_norm": 2.6062381267547607, "learning_rate": 7.312251697700944e-06, "loss": 0.4438, "step": 23371 }, { "epoch": 0.6, "grad_norm": 1.6907199621200562, "learning_rate": 7.311452110835424e-06, "loss": 0.5819, "step": 23372 }, { "epoch": 0.6, "grad_norm": 4.339608192443848, "learning_rate": 7.3106525424976845e-06, "loss": 0.7507, "step": 23373 }, { "epoch": 0.6, "grad_norm": 1.5451570749282837, "learning_rate": 7.30985299269324e-06, "loss": 0.5887, "step": 23374 }, { "epoch": 0.6, "grad_norm": 1.8951106071472168, "learning_rate": 7.3090534614276e-06, "loss": 0.3864, "step": 23375 }, { "epoch": 0.6, "grad_norm": 4.956265926361084, "learning_rate": 7.30825394870627e-06, "loss": 0.4846, "step": 23376 }, { "epoch": 0.6, "grad_norm": 0.9252402782440186, "learning_rate": 7.307454454534763e-06, "loss": 0.6351, "step": 23377 }, { "epoch": 0.6, "grad_norm": 1.8422081470489502, "learning_rate": 7.306654978918589e-06, "loss": 0.7957, "step": 23378 }, { "epoch": 0.6, "grad_norm": 5.932176113128662, "learning_rate": 7.305855521863257e-06, "loss": 0.6895, "step": 23379 }, { "epoch": 0.6, "grad_norm": 2.063532829284668, "learning_rate": 7.305056083374276e-06, "loss": 0.538, "step": 23380 }, { "epoch": 0.6, "grad_norm": 4.073763370513916, "learning_rate": 7.304256663457154e-06, "loss": 0.6618, "step": 23381 }, { "epoch": 0.6, "grad_norm": 1.5978446006774902, "learning_rate": 7.303457262117404e-06, "loss": 0.5279, "step": 23382 }, { "epoch": 0.6, "grad_norm": 1.3112109899520874, "learning_rate": 7.302657879360531e-06, "loss": 0.5607, "step": 23383 }, { "epoch": 0.6, "grad_norm": 1.4008060693740845, "learning_rate": 7.301858515192046e-06, "loss": 0.562, "step": 23384 }, { "epoch": 0.6, "grad_norm": 2.9725399017333984, "learning_rate": 7.301059169617455e-06, "loss": 0.5819, "step": 23385 }, { "epoch": 0.6, "grad_norm": 1.5210509300231934, "learning_rate": 7.3002598426422695e-06, "loss": 0.5262, "step": 23386 }, { "epoch": 0.6, "grad_norm": 1.154223918914795, "learning_rate": 7.299460534271998e-06, "loss": 0.5649, "step": 23387 }, { "epoch": 0.6, "grad_norm": 2.0299553871154785, "learning_rate": 7.298661244512146e-06, "loss": 0.4438, "step": 23388 }, { "epoch": 0.6, "grad_norm": 0.8717895746231079, "learning_rate": 7.297861973368227e-06, "loss": 0.5489, "step": 23389 }, { "epoch": 0.6, "grad_norm": 2.161475419998169, "learning_rate": 7.297062720845742e-06, "loss": 0.6467, "step": 23390 }, { "epoch": 0.6, "grad_norm": 3.5316474437713623, "learning_rate": 7.296263486950204e-06, "loss": 0.4612, "step": 23391 }, { "epoch": 0.6, "grad_norm": 5.086852550506592, "learning_rate": 7.29546427168712e-06, "loss": 0.6546, "step": 23392 }, { "epoch": 0.6, "grad_norm": 1.9527134895324707, "learning_rate": 7.2946650750619955e-06, "loss": 0.5979, "step": 23393 }, { "epoch": 0.6, "grad_norm": 2.137488603591919, "learning_rate": 7.293865897080342e-06, "loss": 0.3656, "step": 23394 }, { "epoch": 0.6, "grad_norm": 1.3999661207199097, "learning_rate": 7.293066737747663e-06, "loss": 0.5298, "step": 23395 }, { "epoch": 0.6, "grad_norm": 1.2442184686660767, "learning_rate": 7.292267597069469e-06, "loss": 0.5587, "step": 23396 }, { "epoch": 0.6, "grad_norm": 1.4751052856445312, "learning_rate": 7.291468475051262e-06, "loss": 0.5552, "step": 23397 }, { "epoch": 0.6, "grad_norm": 1.502929925918579, "learning_rate": 7.290669371698554e-06, "loss": 0.6372, "step": 23398 }, { "epoch": 0.6, "grad_norm": 1.110829472541809, "learning_rate": 7.289870287016854e-06, "loss": 0.4422, "step": 23399 }, { "epoch": 0.6, "grad_norm": 1.438125491142273, "learning_rate": 7.289071221011661e-06, "loss": 0.5112, "step": 23400 }, { "epoch": 0.6, "grad_norm": 1.433269739151001, "learning_rate": 7.288272173688489e-06, "loss": 0.4692, "step": 23401 }, { "epoch": 0.6, "grad_norm": 1.9832351207733154, "learning_rate": 7.287473145052839e-06, "loss": 0.334, "step": 23402 }, { "epoch": 0.6, "grad_norm": 3.165522336959839, "learning_rate": 7.28667413511022e-06, "loss": 0.5386, "step": 23403 }, { "epoch": 0.6, "grad_norm": 1.7581682205200195, "learning_rate": 7.2858751438661404e-06, "loss": 0.5584, "step": 23404 }, { "epoch": 0.6, "grad_norm": 1.6991713047027588, "learning_rate": 7.285076171326102e-06, "loss": 0.6816, "step": 23405 }, { "epoch": 0.6, "grad_norm": 1.2815567255020142, "learning_rate": 7.284277217495615e-06, "loss": 0.5098, "step": 23406 }, { "epoch": 0.6, "grad_norm": 2.17145037651062, "learning_rate": 7.283478282380181e-06, "loss": 0.6319, "step": 23407 }, { "epoch": 0.6, "grad_norm": 1.5882675647735596, "learning_rate": 7.28267936598531e-06, "loss": 0.5732, "step": 23408 }, { "epoch": 0.6, "grad_norm": 3.307405471801758, "learning_rate": 7.2818804683165044e-06, "loss": 0.513, "step": 23409 }, { "epoch": 0.6, "grad_norm": 5.325443267822266, "learning_rate": 7.28108158937927e-06, "loss": 0.6787, "step": 23410 }, { "epoch": 0.6, "grad_norm": 1.216639518737793, "learning_rate": 7.280282729179115e-06, "loss": 0.3342, "step": 23411 }, { "epoch": 0.6, "grad_norm": 1.9668489694595337, "learning_rate": 7.279483887721542e-06, "loss": 0.6248, "step": 23412 }, { "epoch": 0.6, "grad_norm": 1.3634477853775024, "learning_rate": 7.278685065012058e-06, "loss": 0.4078, "step": 23413 }, { "epoch": 0.6, "grad_norm": 1.5235464572906494, "learning_rate": 7.277886261056164e-06, "loss": 0.4865, "step": 23414 }, { "epoch": 0.6, "grad_norm": 1.5963014364242554, "learning_rate": 7.277087475859369e-06, "loss": 0.5127, "step": 23415 }, { "epoch": 0.6, "grad_norm": 2.2381832599639893, "learning_rate": 7.276288709427177e-06, "loss": 0.4362, "step": 23416 }, { "epoch": 0.6, "grad_norm": 1.3998167514801025, "learning_rate": 7.275489961765092e-06, "loss": 0.5274, "step": 23417 }, { "epoch": 0.6, "grad_norm": 2.051637887954712, "learning_rate": 7.274691232878619e-06, "loss": 0.5557, "step": 23418 }, { "epoch": 0.6, "grad_norm": 1.7217084169387817, "learning_rate": 7.27389252277326e-06, "loss": 0.5194, "step": 23419 }, { "epoch": 0.6, "grad_norm": 1.5639662742614746, "learning_rate": 7.273093831454522e-06, "loss": 0.5066, "step": 23420 }, { "epoch": 0.6, "grad_norm": 1.3734451532363892, "learning_rate": 7.272295158927909e-06, "loss": 0.5416, "step": 23421 }, { "epoch": 0.6, "grad_norm": 2.156107187271118, "learning_rate": 7.271496505198922e-06, "loss": 0.5888, "step": 23422 }, { "epoch": 0.6, "grad_norm": 11.103862762451172, "learning_rate": 7.2706978702730705e-06, "loss": 0.5532, "step": 23423 }, { "epoch": 0.6, "grad_norm": 1.7320903539657593, "learning_rate": 7.269899254155851e-06, "loss": 0.6295, "step": 23424 }, { "epoch": 0.6, "grad_norm": 1.145678162574768, "learning_rate": 7.2691006568527735e-06, "loss": 0.5292, "step": 23425 }, { "epoch": 0.6, "grad_norm": 1.4764082431793213, "learning_rate": 7.268302078369336e-06, "loss": 0.48, "step": 23426 }, { "epoch": 0.6, "grad_norm": 1.3086812496185303, "learning_rate": 7.267503518711044e-06, "loss": 0.5369, "step": 23427 }, { "epoch": 0.6, "grad_norm": 1.492845058441162, "learning_rate": 7.266704977883403e-06, "loss": 0.4147, "step": 23428 }, { "epoch": 0.6, "grad_norm": 0.9895613193511963, "learning_rate": 7.265906455891913e-06, "loss": 0.3905, "step": 23429 }, { "epoch": 0.6, "grad_norm": 1.203774333000183, "learning_rate": 7.26510795274208e-06, "loss": 0.5904, "step": 23430 }, { "epoch": 0.6, "grad_norm": 3.59633731842041, "learning_rate": 7.264309468439401e-06, "loss": 0.5025, "step": 23431 }, { "epoch": 0.6, "grad_norm": 1.5830631256103516, "learning_rate": 7.263511002989385e-06, "loss": 0.6621, "step": 23432 }, { "epoch": 0.6, "grad_norm": 1.0484349727630615, "learning_rate": 7.262712556397532e-06, "loss": 0.5566, "step": 23433 }, { "epoch": 0.6, "grad_norm": 1.173700213432312, "learning_rate": 7.261914128669343e-06, "loss": 0.5277, "step": 23434 }, { "epoch": 0.6, "grad_norm": 1.2890955209732056, "learning_rate": 7.261115719810323e-06, "loss": 0.3899, "step": 23435 }, { "epoch": 0.6, "grad_norm": 2.9591317176818848, "learning_rate": 7.260317329825972e-06, "loss": 0.5273, "step": 23436 }, { "epoch": 0.6, "grad_norm": 1.2694952487945557, "learning_rate": 7.259518958721794e-06, "loss": 0.5948, "step": 23437 }, { "epoch": 0.6, "grad_norm": 1.8469938039779663, "learning_rate": 7.258720606503287e-06, "loss": 0.583, "step": 23438 }, { "epoch": 0.6, "grad_norm": 1.9149346351623535, "learning_rate": 7.257922273175957e-06, "loss": 0.5012, "step": 23439 }, { "epoch": 0.6, "grad_norm": 1.6020679473876953, "learning_rate": 7.2571239587453045e-06, "loss": 0.5089, "step": 23440 }, { "epoch": 0.6, "grad_norm": 4.170322895050049, "learning_rate": 7.256325663216828e-06, "loss": 0.5889, "step": 23441 }, { "epoch": 0.6, "grad_norm": 0.9768604040145874, "learning_rate": 7.255527386596035e-06, "loss": 0.4151, "step": 23442 }, { "epoch": 0.6, "grad_norm": 1.8081448078155518, "learning_rate": 7.254729128888421e-06, "loss": 0.5221, "step": 23443 }, { "epoch": 0.6, "grad_norm": 2.108119249343872, "learning_rate": 7.2539308900994876e-06, "loss": 0.566, "step": 23444 }, { "epoch": 0.6, "grad_norm": 1.5222312211990356, "learning_rate": 7.253132670234739e-06, "loss": 0.3877, "step": 23445 }, { "epoch": 0.6, "grad_norm": 4.481703281402588, "learning_rate": 7.252334469299673e-06, "loss": 0.5676, "step": 23446 }, { "epoch": 0.6, "grad_norm": 7.905366897583008, "learning_rate": 7.251536287299794e-06, "loss": 0.595, "step": 23447 }, { "epoch": 0.6, "grad_norm": 1.2139891386032104, "learning_rate": 7.250738124240597e-06, "loss": 0.45, "step": 23448 }, { "epoch": 0.6, "grad_norm": 1.6494240760803223, "learning_rate": 7.249939980127589e-06, "loss": 0.4703, "step": 23449 }, { "epoch": 0.6, "grad_norm": 1.930810570716858, "learning_rate": 7.249141854966264e-06, "loss": 0.4877, "step": 23450 }, { "epoch": 0.6, "grad_norm": 1.9043363332748413, "learning_rate": 7.248343748762126e-06, "loss": 0.6751, "step": 23451 }, { "epoch": 0.6, "grad_norm": 7.385401248931885, "learning_rate": 7.247545661520676e-06, "loss": 0.4544, "step": 23452 }, { "epoch": 0.6, "grad_norm": 1.917008638381958, "learning_rate": 7.24674759324741e-06, "loss": 0.7577, "step": 23453 }, { "epoch": 0.6, "grad_norm": 1.7644007205963135, "learning_rate": 7.24594954394783e-06, "loss": 0.622, "step": 23454 }, { "epoch": 0.6, "grad_norm": 6.015096187591553, "learning_rate": 7.245151513627436e-06, "loss": 0.7086, "step": 23455 }, { "epoch": 0.6, "grad_norm": 2.033907890319824, "learning_rate": 7.2443535022917255e-06, "loss": 0.595, "step": 23456 }, { "epoch": 0.6, "grad_norm": 2.090550184249878, "learning_rate": 7.243555509946202e-06, "loss": 0.4812, "step": 23457 }, { "epoch": 0.6, "grad_norm": 1.4750393629074097, "learning_rate": 7.24275753659636e-06, "loss": 0.541, "step": 23458 }, { "epoch": 0.6, "grad_norm": 2.789016008377075, "learning_rate": 7.241959582247702e-06, "loss": 0.4073, "step": 23459 }, { "epoch": 0.6, "grad_norm": 1.453104019165039, "learning_rate": 7.241161646905726e-06, "loss": 0.5399, "step": 23460 }, { "epoch": 0.6, "grad_norm": 5.870641708374023, "learning_rate": 7.24036373057593e-06, "loss": 0.5741, "step": 23461 }, { "epoch": 0.6, "grad_norm": 2.034942865371704, "learning_rate": 7.239565833263813e-06, "loss": 0.7837, "step": 23462 }, { "epoch": 0.6, "grad_norm": 1.2473191022872925, "learning_rate": 7.2387679549748736e-06, "loss": 0.4349, "step": 23463 }, { "epoch": 0.6, "grad_norm": 1.6934610605239868, "learning_rate": 7.2379700957146125e-06, "loss": 0.5572, "step": 23464 }, { "epoch": 0.6, "grad_norm": 2.949289083480835, "learning_rate": 7.237172255488525e-06, "loss": 0.7189, "step": 23465 }, { "epoch": 0.6, "grad_norm": 1.5468556880950928, "learning_rate": 7.236374434302111e-06, "loss": 0.4756, "step": 23466 }, { "epoch": 0.6, "grad_norm": 6.128027439117432, "learning_rate": 7.235576632160867e-06, "loss": 0.614, "step": 23467 }, { "epoch": 0.6, "grad_norm": 1.2739297151565552, "learning_rate": 7.234778849070292e-06, "loss": 0.5156, "step": 23468 }, { "epoch": 0.6, "grad_norm": 1.7942404747009277, "learning_rate": 7.233981085035886e-06, "loss": 0.4196, "step": 23469 }, { "epoch": 0.6, "grad_norm": 2.2328715324401855, "learning_rate": 7.2331833400631435e-06, "loss": 0.5784, "step": 23470 }, { "epoch": 0.6, "grad_norm": 1.312673568725586, "learning_rate": 7.232385614157564e-06, "loss": 0.5374, "step": 23471 }, { "epoch": 0.6, "grad_norm": 1.180433750152588, "learning_rate": 7.231587907324642e-06, "loss": 0.5675, "step": 23472 }, { "epoch": 0.6, "grad_norm": 2.416574716567993, "learning_rate": 7.230790219569877e-06, "loss": 0.5709, "step": 23473 }, { "epoch": 0.6, "grad_norm": 2.452533006668091, "learning_rate": 7.229992550898768e-06, "loss": 0.54, "step": 23474 }, { "epoch": 0.6, "grad_norm": 2.222824811935425, "learning_rate": 7.229194901316808e-06, "loss": 0.6252, "step": 23475 }, { "epoch": 0.6, "grad_norm": 1.2087416648864746, "learning_rate": 7.228397270829499e-06, "loss": 0.4059, "step": 23476 }, { "epoch": 0.6, "grad_norm": 7.466699123382568, "learning_rate": 7.227599659442331e-06, "loss": 0.4904, "step": 23477 }, { "epoch": 0.6, "grad_norm": 1.4047229290008545, "learning_rate": 7.2268020671608075e-06, "loss": 0.4561, "step": 23478 }, { "epoch": 0.6, "grad_norm": 1.8354893922805786, "learning_rate": 7.2260044939904196e-06, "loss": 0.6582, "step": 23479 }, { "epoch": 0.6, "grad_norm": 3.1296029090881348, "learning_rate": 7.2252069399366666e-06, "loss": 0.431, "step": 23480 }, { "epoch": 0.6, "grad_norm": 1.8518266677856445, "learning_rate": 7.224409405005045e-06, "loss": 0.5732, "step": 23481 }, { "epoch": 0.6, "grad_norm": 1.0976800918579102, "learning_rate": 7.2236118892010475e-06, "loss": 0.5014, "step": 23482 }, { "epoch": 0.6, "grad_norm": 2.738142490386963, "learning_rate": 7.222814392530176e-06, "loss": 0.5428, "step": 23483 }, { "epoch": 0.6, "grad_norm": 1.071159839630127, "learning_rate": 7.222016914997921e-06, "loss": 0.5172, "step": 23484 }, { "epoch": 0.6, "grad_norm": 1.0453230142593384, "learning_rate": 7.22121945660978e-06, "loss": 0.5471, "step": 23485 }, { "epoch": 0.6, "grad_norm": 5.075007915496826, "learning_rate": 7.2204220173712495e-06, "loss": 0.7455, "step": 23486 }, { "epoch": 0.6, "grad_norm": 2.9022414684295654, "learning_rate": 7.219624597287822e-06, "loss": 0.5753, "step": 23487 }, { "epoch": 0.6, "grad_norm": 1.4199824333190918, "learning_rate": 7.218827196364999e-06, "loss": 0.4999, "step": 23488 }, { "epoch": 0.6, "grad_norm": 1.0484263896942139, "learning_rate": 7.218029814608267e-06, "loss": 0.5601, "step": 23489 }, { "epoch": 0.6, "grad_norm": 1.6987578868865967, "learning_rate": 7.2172324520231305e-06, "loss": 0.5641, "step": 23490 }, { "epoch": 0.6, "grad_norm": 2.332396984100342, "learning_rate": 7.216435108615076e-06, "loss": 0.6701, "step": 23491 }, { "epoch": 0.6, "grad_norm": 1.3204189538955688, "learning_rate": 7.215637784389602e-06, "loss": 0.4969, "step": 23492 }, { "epoch": 0.6, "grad_norm": 4.12473726272583, "learning_rate": 7.2148404793522055e-06, "loss": 0.6808, "step": 23493 }, { "epoch": 0.6, "grad_norm": 2.7972218990325928, "learning_rate": 7.214043193508377e-06, "loss": 0.6612, "step": 23494 }, { "epoch": 0.6, "grad_norm": 2.0704057216644287, "learning_rate": 7.213245926863613e-06, "loss": 0.5381, "step": 23495 }, { "epoch": 0.6, "grad_norm": 1.0978211164474487, "learning_rate": 7.212448679423407e-06, "loss": 0.4214, "step": 23496 }, { "epoch": 0.6, "grad_norm": 1.2341428995132446, "learning_rate": 7.211651451193252e-06, "loss": 0.5607, "step": 23497 }, { "epoch": 0.6, "grad_norm": 1.8551719188690186, "learning_rate": 7.2108542421786465e-06, "loss": 0.4282, "step": 23498 }, { "epoch": 0.6, "grad_norm": 1.6601613759994507, "learning_rate": 7.2100570523850776e-06, "loss": 0.5421, "step": 23499 }, { "epoch": 0.6, "grad_norm": 2.2970666885375977, "learning_rate": 7.209259881818047e-06, "loss": 0.61, "step": 23500 }, { "epoch": 0.6, "grad_norm": 1.8503928184509277, "learning_rate": 7.20846273048304e-06, "loss": 0.583, "step": 23501 }, { "epoch": 0.6, "grad_norm": 4.601803779602051, "learning_rate": 7.207665598385557e-06, "loss": 0.7637, "step": 23502 }, { "epoch": 0.6, "grad_norm": 1.0720000267028809, "learning_rate": 7.206868485531087e-06, "loss": 0.5715, "step": 23503 }, { "epoch": 0.6, "grad_norm": 4.223840236663818, "learning_rate": 7.206071391925122e-06, "loss": 0.6137, "step": 23504 }, { "epoch": 0.6, "grad_norm": 1.6780458688735962, "learning_rate": 7.205274317573163e-06, "loss": 0.4538, "step": 23505 }, { "epoch": 0.6, "grad_norm": 1.4127541780471802, "learning_rate": 7.204477262480694e-06, "loss": 0.4639, "step": 23506 }, { "epoch": 0.6, "grad_norm": 4.414810657501221, "learning_rate": 7.203680226653213e-06, "loss": 0.5157, "step": 23507 }, { "epoch": 0.6, "grad_norm": 2.096538782119751, "learning_rate": 7.20288321009621e-06, "loss": 0.5952, "step": 23508 }, { "epoch": 0.6, "grad_norm": 2.498541831970215, "learning_rate": 7.202086212815178e-06, "loss": 0.5537, "step": 23509 }, { "epoch": 0.6, "grad_norm": 1.4496876001358032, "learning_rate": 7.201289234815612e-06, "loss": 0.3854, "step": 23510 }, { "epoch": 0.6, "grad_norm": 6.119482040405273, "learning_rate": 7.2004922761029995e-06, "loss": 0.6412, "step": 23511 }, { "epoch": 0.6, "grad_norm": 1.4854663610458374, "learning_rate": 7.199695336682839e-06, "loss": 0.7245, "step": 23512 }, { "epoch": 0.6, "grad_norm": 4.001683235168457, "learning_rate": 7.1988984165606155e-06, "loss": 0.6186, "step": 23513 }, { "epoch": 0.6, "grad_norm": 0.9897076487541199, "learning_rate": 7.198101515741824e-06, "loss": 0.4415, "step": 23514 }, { "epoch": 0.6, "grad_norm": 1.3215748071670532, "learning_rate": 7.197304634231961e-06, "loss": 0.4849, "step": 23515 }, { "epoch": 0.6, "grad_norm": 1.7630378007888794, "learning_rate": 7.196507772036509e-06, "loss": 0.7511, "step": 23516 }, { "epoch": 0.6, "grad_norm": 1.2431917190551758, "learning_rate": 7.195710929160968e-06, "loss": 0.6102, "step": 23517 }, { "epoch": 0.6, "grad_norm": 1.7849476337432861, "learning_rate": 7.194914105610822e-06, "loss": 0.4824, "step": 23518 }, { "epoch": 0.6, "grad_norm": 1.2637473344802856, "learning_rate": 7.194117301391569e-06, "loss": 0.5383, "step": 23519 }, { "epoch": 0.6, "grad_norm": 4.101125717163086, "learning_rate": 7.193320516508694e-06, "loss": 0.7232, "step": 23520 }, { "epoch": 0.6, "grad_norm": 5.208270072937012, "learning_rate": 7.19252375096769e-06, "loss": 0.6627, "step": 23521 }, { "epoch": 0.6, "grad_norm": 6.205633163452148, "learning_rate": 7.191727004774052e-06, "loss": 0.6045, "step": 23522 }, { "epoch": 0.6, "grad_norm": 2.2720932960510254, "learning_rate": 7.190930277933263e-06, "loss": 0.6063, "step": 23523 }, { "epoch": 0.6, "grad_norm": 7.871540546417236, "learning_rate": 7.190133570450823e-06, "loss": 0.3789, "step": 23524 }, { "epoch": 0.6, "grad_norm": 1.2001621723175049, "learning_rate": 7.189336882332212e-06, "loss": 0.7057, "step": 23525 }, { "epoch": 0.6, "grad_norm": 1.6849963665008545, "learning_rate": 7.188540213582926e-06, "loss": 0.4624, "step": 23526 }, { "epoch": 0.6, "grad_norm": 1.4157723188400269, "learning_rate": 7.187743564208458e-06, "loss": 0.4719, "step": 23527 }, { "epoch": 0.6, "grad_norm": 2.0234568119049072, "learning_rate": 7.186946934214292e-06, "loss": 0.5227, "step": 23528 }, { "epoch": 0.6, "grad_norm": 1.183434009552002, "learning_rate": 7.186150323605922e-06, "loss": 0.4203, "step": 23529 }, { "epoch": 0.6, "grad_norm": 3.1249659061431885, "learning_rate": 7.185353732388833e-06, "loss": 0.6501, "step": 23530 }, { "epoch": 0.6, "grad_norm": 2.0158026218414307, "learning_rate": 7.184557160568522e-06, "loss": 0.4129, "step": 23531 }, { "epoch": 0.6, "grad_norm": 3.380769729614258, "learning_rate": 7.18376060815047e-06, "loss": 0.7664, "step": 23532 }, { "epoch": 0.6, "grad_norm": 1.2898255586624146, "learning_rate": 7.182964075140172e-06, "loss": 0.4685, "step": 23533 }, { "epoch": 0.6, "grad_norm": 1.1333609819412231, "learning_rate": 7.1821675615431175e-06, "loss": 0.4993, "step": 23534 }, { "epoch": 0.6, "grad_norm": 1.7753708362579346, "learning_rate": 7.181371067364792e-06, "loss": 0.5871, "step": 23535 }, { "epoch": 0.6, "grad_norm": 2.2369675636291504, "learning_rate": 7.1805745926106875e-06, "loss": 0.6145, "step": 23536 }, { "epoch": 0.6, "grad_norm": 1.293410062789917, "learning_rate": 7.17977813728629e-06, "loss": 0.544, "step": 23537 }, { "epoch": 0.6, "grad_norm": 1.2387200593948364, "learning_rate": 7.17898170139709e-06, "loss": 0.653, "step": 23538 }, { "epoch": 0.6, "grad_norm": 1.5570902824401855, "learning_rate": 7.1781852849485775e-06, "loss": 0.5522, "step": 23539 }, { "epoch": 0.6, "grad_norm": 5.015877723693848, "learning_rate": 7.1773888879462376e-06, "loss": 0.6494, "step": 23540 }, { "epoch": 0.6, "grad_norm": 1.5968270301818848, "learning_rate": 7.176592510395561e-06, "loss": 0.6263, "step": 23541 }, { "epoch": 0.6, "grad_norm": 5.059554576873779, "learning_rate": 7.1757961523020346e-06, "loss": 0.7005, "step": 23542 }, { "epoch": 0.6, "grad_norm": 3.2473225593566895, "learning_rate": 7.174999813671147e-06, "loss": 0.9086, "step": 23543 }, { "epoch": 0.6, "grad_norm": 2.249305009841919, "learning_rate": 7.174203494508385e-06, "loss": 0.5574, "step": 23544 }, { "epoch": 0.6, "grad_norm": 2.2356913089752197, "learning_rate": 7.173407194819238e-06, "loss": 0.5404, "step": 23545 }, { "epoch": 0.6, "grad_norm": 2.0265681743621826, "learning_rate": 7.172610914609193e-06, "loss": 0.6014, "step": 23546 }, { "epoch": 0.6, "grad_norm": 1.7719675302505493, "learning_rate": 7.171814653883735e-06, "loss": 0.4495, "step": 23547 }, { "epoch": 0.6, "grad_norm": 4.341804504394531, "learning_rate": 7.171018412648357e-06, "loss": 0.4729, "step": 23548 }, { "epoch": 0.6, "grad_norm": 1.4830539226531982, "learning_rate": 7.17022219090854e-06, "loss": 0.5773, "step": 23549 }, { "epoch": 0.6, "grad_norm": 4.934545516967773, "learning_rate": 7.1694259886697745e-06, "loss": 0.561, "step": 23550 }, { "epoch": 0.6, "grad_norm": 1.4066381454467773, "learning_rate": 7.1686298059375485e-06, "loss": 0.518, "step": 23551 }, { "epoch": 0.6, "grad_norm": 1.5570241212844849, "learning_rate": 7.1678336427173455e-06, "loss": 0.492, "step": 23552 }, { "epoch": 0.6, "grad_norm": 9.82682991027832, "learning_rate": 7.167037499014655e-06, "loss": 0.5997, "step": 23553 }, { "epoch": 0.6, "grad_norm": 1.956853985786438, "learning_rate": 7.16624137483496e-06, "loss": 0.584, "step": 23554 }, { "epoch": 0.6, "grad_norm": 1.3207588195800781, "learning_rate": 7.16544527018375e-06, "loss": 0.6367, "step": 23555 }, { "epoch": 0.6, "grad_norm": 1.7868022918701172, "learning_rate": 7.1646491850665116e-06, "loss": 0.7053, "step": 23556 }, { "epoch": 0.6, "grad_norm": 1.1618865728378296, "learning_rate": 7.163853119488727e-06, "loss": 0.6087, "step": 23557 }, { "epoch": 0.6, "grad_norm": 1.2276959419250488, "learning_rate": 7.163057073455888e-06, "loss": 0.553, "step": 23558 }, { "epoch": 0.6, "grad_norm": 1.1609208583831787, "learning_rate": 7.162261046973476e-06, "loss": 0.464, "step": 23559 }, { "epoch": 0.6, "grad_norm": 9.634407043457031, "learning_rate": 7.1614650400469775e-06, "loss": 0.5453, "step": 23560 }, { "epoch": 0.6, "grad_norm": 1.470826268196106, "learning_rate": 7.160669052681879e-06, "loss": 0.4739, "step": 23561 }, { "epoch": 0.6, "grad_norm": 3.572852611541748, "learning_rate": 7.159873084883664e-06, "loss": 0.6249, "step": 23562 }, { "epoch": 0.6, "grad_norm": 1.092381238937378, "learning_rate": 7.1590771366578226e-06, "loss": 0.5015, "step": 23563 }, { "epoch": 0.6, "grad_norm": 1.6883867979049683, "learning_rate": 7.158281208009834e-06, "loss": 0.5137, "step": 23564 }, { "epoch": 0.6, "grad_norm": 0.9260473251342773, "learning_rate": 7.157485298945188e-06, "loss": 0.5066, "step": 23565 }, { "epoch": 0.6, "grad_norm": 1.2016196250915527, "learning_rate": 7.156689409469366e-06, "loss": 0.5853, "step": 23566 }, { "epoch": 0.6, "grad_norm": 4.394940376281738, "learning_rate": 7.1558935395878535e-06, "loss": 0.7025, "step": 23567 }, { "epoch": 0.6, "grad_norm": 4.75009822845459, "learning_rate": 7.155097689306139e-06, "loss": 0.4208, "step": 23568 }, { "epoch": 0.6, "grad_norm": 2.7329189777374268, "learning_rate": 7.1543018586296996e-06, "loss": 0.6198, "step": 23569 }, { "epoch": 0.6, "grad_norm": 1.6701050996780396, "learning_rate": 7.153506047564028e-06, "loss": 0.6099, "step": 23570 }, { "epoch": 0.6, "grad_norm": 1.561911940574646, "learning_rate": 7.152710256114602e-06, "loss": 0.5429, "step": 23571 }, { "epoch": 0.6, "grad_norm": 1.6884132623672485, "learning_rate": 7.15191448428691e-06, "loss": 0.5369, "step": 23572 }, { "epoch": 0.6, "grad_norm": 2.5754377841949463, "learning_rate": 7.151118732086433e-06, "loss": 0.5181, "step": 23573 }, { "epoch": 0.6, "grad_norm": 4.252705097198486, "learning_rate": 7.1503229995186544e-06, "loss": 0.5908, "step": 23574 }, { "epoch": 0.6, "grad_norm": 2.6844482421875, "learning_rate": 7.149527286589062e-06, "loss": 0.5361, "step": 23575 }, { "epoch": 0.6, "grad_norm": 1.5153886079788208, "learning_rate": 7.148731593303133e-06, "loss": 0.4783, "step": 23576 }, { "epoch": 0.6, "grad_norm": 4.131347179412842, "learning_rate": 7.147935919666359e-06, "loss": 0.502, "step": 23577 }, { "epoch": 0.6, "grad_norm": 1.4004614353179932, "learning_rate": 7.147140265684216e-06, "loss": 0.562, "step": 23578 }, { "epoch": 0.6, "grad_norm": 3.641517400741577, "learning_rate": 7.14634463136219e-06, "loss": 0.7257, "step": 23579 }, { "epoch": 0.6, "grad_norm": 4.814395427703857, "learning_rate": 7.145549016705765e-06, "loss": 0.5489, "step": 23580 }, { "epoch": 0.6, "grad_norm": 1.221659541130066, "learning_rate": 7.14475342172042e-06, "loss": 0.4924, "step": 23581 }, { "epoch": 0.6, "grad_norm": 2.1011464595794678, "learning_rate": 7.143957846411646e-06, "loss": 0.4592, "step": 23582 }, { "epoch": 0.6, "grad_norm": 1.592453122138977, "learning_rate": 7.143162290784915e-06, "loss": 0.467, "step": 23583 }, { "epoch": 0.6, "grad_norm": 2.4886980056762695, "learning_rate": 7.142366754845719e-06, "loss": 0.6071, "step": 23584 }, { "epoch": 0.6, "grad_norm": 1.6220104694366455, "learning_rate": 7.141571238599532e-06, "loss": 0.6663, "step": 23585 }, { "epoch": 0.6, "grad_norm": 2.8823800086975098, "learning_rate": 7.1407757420518416e-06, "loss": 0.5682, "step": 23586 }, { "epoch": 0.6, "grad_norm": 1.313401699066162, "learning_rate": 7.1399802652081305e-06, "loss": 0.5045, "step": 23587 }, { "epoch": 0.6, "grad_norm": 1.0842312574386597, "learning_rate": 7.139184808073876e-06, "loss": 0.464, "step": 23588 }, { "epoch": 0.6, "grad_norm": 3.2477643489837646, "learning_rate": 7.138389370654564e-06, "loss": 0.6207, "step": 23589 }, { "epoch": 0.6, "grad_norm": 1.4790271520614624, "learning_rate": 7.137593952955673e-06, "loss": 0.609, "step": 23590 }, { "epoch": 0.6, "grad_norm": 5.414974212646484, "learning_rate": 7.136798554982687e-06, "loss": 0.6902, "step": 23591 }, { "epoch": 0.6, "grad_norm": 4.8714752197265625, "learning_rate": 7.1360031767410865e-06, "loss": 0.4682, "step": 23592 }, { "epoch": 0.6, "grad_norm": 1.596277117729187, "learning_rate": 7.135207818236352e-06, "loss": 0.6116, "step": 23593 }, { "epoch": 0.6, "grad_norm": 5.288077354431152, "learning_rate": 7.134412479473967e-06, "loss": 0.6132, "step": 23594 }, { "epoch": 0.6, "grad_norm": 1.8414764404296875, "learning_rate": 7.13361716045941e-06, "loss": 0.5735, "step": 23595 }, { "epoch": 0.6, "grad_norm": 1.5759867429733276, "learning_rate": 7.132821861198165e-06, "loss": 0.5928, "step": 23596 }, { "epoch": 0.6, "grad_norm": 3.0854225158691406, "learning_rate": 7.132026581695706e-06, "loss": 0.527, "step": 23597 }, { "epoch": 0.6, "grad_norm": 2.1011500358581543, "learning_rate": 7.131231321957519e-06, "loss": 0.5048, "step": 23598 }, { "epoch": 0.6, "grad_norm": 1.0203582048416138, "learning_rate": 7.130436081989086e-06, "loss": 0.5475, "step": 23599 }, { "epoch": 0.6, "grad_norm": 13.032718658447266, "learning_rate": 7.1296408617958825e-06, "loss": 0.4969, "step": 23600 }, { "epoch": 0.6, "grad_norm": 1.468528151512146, "learning_rate": 7.128845661383392e-06, "loss": 0.5739, "step": 23601 }, { "epoch": 0.6, "grad_norm": 1.3086320161819458, "learning_rate": 7.128050480757093e-06, "loss": 0.6133, "step": 23602 }, { "epoch": 0.6, "grad_norm": 1.2666196823120117, "learning_rate": 7.127255319922464e-06, "loss": 0.5269, "step": 23603 }, { "epoch": 0.6, "grad_norm": 1.6422230005264282, "learning_rate": 7.126460178884991e-06, "loss": 0.544, "step": 23604 }, { "epoch": 0.61, "grad_norm": 1.3344688415527344, "learning_rate": 7.125665057650144e-06, "loss": 0.5254, "step": 23605 }, { "epoch": 0.61, "grad_norm": 1.0182781219482422, "learning_rate": 7.124869956223412e-06, "loss": 0.5781, "step": 23606 }, { "epoch": 0.61, "grad_norm": 2.038444757461548, "learning_rate": 7.124074874610266e-06, "loss": 0.6497, "step": 23607 }, { "epoch": 0.61, "grad_norm": 1.1407922506332397, "learning_rate": 7.1232798128161926e-06, "loss": 0.5306, "step": 23608 }, { "epoch": 0.61, "grad_norm": 1.9137800931930542, "learning_rate": 7.122484770846665e-06, "loss": 0.617, "step": 23609 }, { "epoch": 0.61, "grad_norm": 1.4878482818603516, "learning_rate": 7.121689748707164e-06, "loss": 0.4958, "step": 23610 }, { "epoch": 0.61, "grad_norm": 3.0163772106170654, "learning_rate": 7.120894746403173e-06, "loss": 0.5401, "step": 23611 }, { "epoch": 0.61, "grad_norm": 1.3676121234893799, "learning_rate": 7.120099763940166e-06, "loss": 0.5469, "step": 23612 }, { "epoch": 0.61, "grad_norm": 1.7941585779190063, "learning_rate": 7.119304801323619e-06, "loss": 0.5955, "step": 23613 }, { "epoch": 0.61, "grad_norm": 1.3025730848312378, "learning_rate": 7.118509858559015e-06, "loss": 0.5958, "step": 23614 }, { "epoch": 0.61, "grad_norm": 1.1948182582855225, "learning_rate": 7.117714935651829e-06, "loss": 0.4965, "step": 23615 }, { "epoch": 0.61, "grad_norm": 4.10548734664917, "learning_rate": 7.116920032607541e-06, "loss": 0.7463, "step": 23616 }, { "epoch": 0.61, "grad_norm": 0.8320286273956299, "learning_rate": 7.116125149431632e-06, "loss": 0.4036, "step": 23617 }, { "epoch": 0.61, "grad_norm": 1.8799349069595337, "learning_rate": 7.115330286129573e-06, "loss": 0.6754, "step": 23618 }, { "epoch": 0.61, "grad_norm": 1.2798649072647095, "learning_rate": 7.114535442706847e-06, "loss": 0.4589, "step": 23619 }, { "epoch": 0.61, "grad_norm": 1.1244378089904785, "learning_rate": 7.11374061916893e-06, "loss": 0.478, "step": 23620 }, { "epoch": 0.61, "grad_norm": 10.1011381149292, "learning_rate": 7.112945815521297e-06, "loss": 0.6669, "step": 23621 }, { "epoch": 0.61, "grad_norm": 2.997291326522827, "learning_rate": 7.1121510317694305e-06, "loss": 0.592, "step": 23622 }, { "epoch": 0.61, "grad_norm": 1.568386435508728, "learning_rate": 7.111356267918803e-06, "loss": 0.5531, "step": 23623 }, { "epoch": 0.61, "grad_norm": 1.4223356246948242, "learning_rate": 7.110561523974895e-06, "loss": 0.31, "step": 23624 }, { "epoch": 0.61, "grad_norm": 3.4358816146850586, "learning_rate": 7.109766799943181e-06, "loss": 0.6319, "step": 23625 }, { "epoch": 0.61, "grad_norm": 2.4782702922821045, "learning_rate": 7.108972095829139e-06, "loss": 0.6483, "step": 23626 }, { "epoch": 0.61, "grad_norm": 4.626878261566162, "learning_rate": 7.108177411638243e-06, "loss": 0.5339, "step": 23627 }, { "epoch": 0.61, "grad_norm": 4.875125408172607, "learning_rate": 7.107382747375972e-06, "loss": 0.5831, "step": 23628 }, { "epoch": 0.61, "grad_norm": 2.061809778213501, "learning_rate": 7.106588103047804e-06, "loss": 0.4988, "step": 23629 }, { "epoch": 0.61, "grad_norm": 1.8976348638534546, "learning_rate": 7.105793478659209e-06, "loss": 0.4993, "step": 23630 }, { "epoch": 0.61, "grad_norm": 2.1408956050872803, "learning_rate": 7.104998874215672e-06, "loss": 0.8089, "step": 23631 }, { "epoch": 0.61, "grad_norm": 2.0132484436035156, "learning_rate": 7.10420428972266e-06, "loss": 0.6597, "step": 23632 }, { "epoch": 0.61, "grad_norm": 1.9986693859100342, "learning_rate": 7.103409725185653e-06, "loss": 0.5701, "step": 23633 }, { "epoch": 0.61, "grad_norm": 2.2438600063323975, "learning_rate": 7.102615180610129e-06, "loss": 0.6116, "step": 23634 }, { "epoch": 0.61, "grad_norm": 1.9996315240859985, "learning_rate": 7.101820656001557e-06, "loss": 0.5351, "step": 23635 }, { "epoch": 0.61, "grad_norm": 10.966156959533691, "learning_rate": 7.10102615136542e-06, "loss": 0.5005, "step": 23636 }, { "epoch": 0.61, "grad_norm": 1.589205026626587, "learning_rate": 7.100231666707187e-06, "loss": 0.4418, "step": 23637 }, { "epoch": 0.61, "grad_norm": 1.1537470817565918, "learning_rate": 7.099437202032337e-06, "loss": 0.7001, "step": 23638 }, { "epoch": 0.61, "grad_norm": 6.124727249145508, "learning_rate": 7.098642757346343e-06, "loss": 0.8034, "step": 23639 }, { "epoch": 0.61, "grad_norm": 7.006716251373291, "learning_rate": 7.09784833265468e-06, "loss": 0.7621, "step": 23640 }, { "epoch": 0.61, "grad_norm": 1.7557345628738403, "learning_rate": 7.097053927962824e-06, "loss": 0.6754, "step": 23641 }, { "epoch": 0.61, "grad_norm": 3.088430166244507, "learning_rate": 7.096259543276247e-06, "loss": 0.6063, "step": 23642 }, { "epoch": 0.61, "grad_norm": 7.18287467956543, "learning_rate": 7.095465178600427e-06, "loss": 0.5186, "step": 23643 }, { "epoch": 0.61, "grad_norm": 1.2685840129852295, "learning_rate": 7.094670833940833e-06, "loss": 0.4754, "step": 23644 }, { "epoch": 0.61, "grad_norm": 1.4088879823684692, "learning_rate": 7.093876509302944e-06, "loss": 0.5125, "step": 23645 }, { "epoch": 0.61, "grad_norm": 1.582844614982605, "learning_rate": 7.093082204692234e-06, "loss": 0.4878, "step": 23646 }, { "epoch": 0.61, "grad_norm": 1.7837861776351929, "learning_rate": 7.092287920114172e-06, "loss": 0.7043, "step": 23647 }, { "epoch": 0.61, "grad_norm": 2.504847288131714, "learning_rate": 7.091493655574238e-06, "loss": 0.6586, "step": 23648 }, { "epoch": 0.61, "grad_norm": 1.3146084547042847, "learning_rate": 7.090699411077899e-06, "loss": 0.5511, "step": 23649 }, { "epoch": 0.61, "grad_norm": 8.473387718200684, "learning_rate": 7.089905186630636e-06, "loss": 0.7023, "step": 23650 }, { "epoch": 0.61, "grad_norm": 1.1721463203430176, "learning_rate": 7.0891109822379145e-06, "loss": 0.3961, "step": 23651 }, { "epoch": 0.61, "grad_norm": 8.496786117553711, "learning_rate": 7.0883167979052125e-06, "loss": 0.7969, "step": 23652 }, { "epoch": 0.61, "grad_norm": 1.1328805685043335, "learning_rate": 7.087522633638002e-06, "loss": 0.5353, "step": 23653 }, { "epoch": 0.61, "grad_norm": 1.2755162715911865, "learning_rate": 7.086728489441755e-06, "loss": 0.5291, "step": 23654 }, { "epoch": 0.61, "grad_norm": 3.3561627864837646, "learning_rate": 7.085934365321947e-06, "loss": 0.8073, "step": 23655 }, { "epoch": 0.61, "grad_norm": 2.4512038230895996, "learning_rate": 7.085140261284047e-06, "loss": 0.627, "step": 23656 }, { "epoch": 0.61, "grad_norm": 1.7368580102920532, "learning_rate": 7.084346177333528e-06, "loss": 0.513, "step": 23657 }, { "epoch": 0.61, "grad_norm": 1.7483175992965698, "learning_rate": 7.083552113475865e-06, "loss": 0.6802, "step": 23658 }, { "epoch": 0.61, "grad_norm": 2.0716331005096436, "learning_rate": 7.082758069716528e-06, "loss": 0.4926, "step": 23659 }, { "epoch": 0.61, "grad_norm": 9.224872589111328, "learning_rate": 7.081964046060991e-06, "loss": 0.2924, "step": 23660 }, { "epoch": 0.61, "grad_norm": 1.766579508781433, "learning_rate": 7.081170042514722e-06, "loss": 0.4237, "step": 23661 }, { "epoch": 0.61, "grad_norm": 1.5807682275772095, "learning_rate": 7.080376059083196e-06, "loss": 0.4671, "step": 23662 }, { "epoch": 0.61, "grad_norm": 3.635361909866333, "learning_rate": 7.079582095771885e-06, "loss": 0.5691, "step": 23663 }, { "epoch": 0.61, "grad_norm": 3.2116527557373047, "learning_rate": 7.078788152586258e-06, "loss": 0.4964, "step": 23664 }, { "epoch": 0.61, "grad_norm": 2.652323007583618, "learning_rate": 7.07799422953179e-06, "loss": 0.7643, "step": 23665 }, { "epoch": 0.61, "grad_norm": 2.5671465396881104, "learning_rate": 7.077200326613948e-06, "loss": 0.5367, "step": 23666 }, { "epoch": 0.61, "grad_norm": 1.5937448740005493, "learning_rate": 7.076406443838206e-06, "loss": 0.4869, "step": 23667 }, { "epoch": 0.61, "grad_norm": 1.7934926748275757, "learning_rate": 7.075612581210034e-06, "loss": 0.3983, "step": 23668 }, { "epoch": 0.61, "grad_norm": 1.486152172088623, "learning_rate": 7.074818738734901e-06, "loss": 0.6989, "step": 23669 }, { "epoch": 0.61, "grad_norm": 2.1589558124542236, "learning_rate": 7.074024916418283e-06, "loss": 0.6433, "step": 23670 }, { "epoch": 0.61, "grad_norm": 6.219930648803711, "learning_rate": 7.073231114265644e-06, "loss": 0.8045, "step": 23671 }, { "epoch": 0.61, "grad_norm": 1.6330840587615967, "learning_rate": 7.072437332282459e-06, "loss": 0.576, "step": 23672 }, { "epoch": 0.61, "grad_norm": 1.085739254951477, "learning_rate": 7.071643570474196e-06, "loss": 0.4774, "step": 23673 }, { "epoch": 0.61, "grad_norm": 2.2281084060668945, "learning_rate": 7.070849828846325e-06, "loss": 0.6936, "step": 23674 }, { "epoch": 0.61, "grad_norm": 2.5482661724090576, "learning_rate": 7.070056107404317e-06, "loss": 0.5856, "step": 23675 }, { "epoch": 0.61, "grad_norm": 6.608173370361328, "learning_rate": 7.069262406153641e-06, "loss": 0.6084, "step": 23676 }, { "epoch": 0.61, "grad_norm": 1.0791816711425781, "learning_rate": 7.0684687250997704e-06, "loss": 0.3555, "step": 23677 }, { "epoch": 0.61, "grad_norm": 2.098242998123169, "learning_rate": 7.067675064248167e-06, "loss": 0.5248, "step": 23678 }, { "epoch": 0.61, "grad_norm": 0.9487341046333313, "learning_rate": 7.066881423604309e-06, "loss": 0.4557, "step": 23679 }, { "epoch": 0.61, "grad_norm": 2.0108144283294678, "learning_rate": 7.0660878031736594e-06, "loss": 0.5424, "step": 23680 }, { "epoch": 0.61, "grad_norm": 2.268969774246216, "learning_rate": 7.065294202961688e-06, "loss": 0.543, "step": 23681 }, { "epoch": 0.61, "grad_norm": 1.1503576040267944, "learning_rate": 7.064500622973869e-06, "loss": 0.5511, "step": 23682 }, { "epoch": 0.61, "grad_norm": 5.611383438110352, "learning_rate": 7.063707063215664e-06, "loss": 0.6616, "step": 23683 }, { "epoch": 0.61, "grad_norm": 1.7072724103927612, "learning_rate": 7.0629135236925475e-06, "loss": 0.6677, "step": 23684 }, { "epoch": 0.61, "grad_norm": 1.8868274688720703, "learning_rate": 7.062120004409983e-06, "loss": 0.4486, "step": 23685 }, { "epoch": 0.61, "grad_norm": 1.3671292066574097, "learning_rate": 7.061326505373443e-06, "loss": 0.5557, "step": 23686 }, { "epoch": 0.61, "grad_norm": 3.315031051635742, "learning_rate": 7.060533026588396e-06, "loss": 0.7174, "step": 23687 }, { "epoch": 0.61, "grad_norm": 2.087825059890747, "learning_rate": 7.059739568060306e-06, "loss": 0.5969, "step": 23688 }, { "epoch": 0.61, "grad_norm": 1.6565717458724976, "learning_rate": 7.058946129794647e-06, "loss": 0.5487, "step": 23689 }, { "epoch": 0.61, "grad_norm": 1.6423850059509277, "learning_rate": 7.058152711796881e-06, "loss": 0.6669, "step": 23690 }, { "epoch": 0.61, "grad_norm": 4.7676239013671875, "learning_rate": 7.05735931407248e-06, "loss": 0.6888, "step": 23691 }, { "epoch": 0.61, "grad_norm": 6.5794501304626465, "learning_rate": 7.0565659366269065e-06, "loss": 0.5801, "step": 23692 }, { "epoch": 0.61, "grad_norm": 3.918821096420288, "learning_rate": 7.055772579465634e-06, "loss": 0.5825, "step": 23693 }, { "epoch": 0.61, "grad_norm": 2.8906936645507812, "learning_rate": 7.054979242594127e-06, "loss": 0.6716, "step": 23694 }, { "epoch": 0.61, "grad_norm": 5.555405616760254, "learning_rate": 7.054185926017851e-06, "loss": 0.5951, "step": 23695 }, { "epoch": 0.61, "grad_norm": 4.062568187713623, "learning_rate": 7.053392629742278e-06, "loss": 0.6213, "step": 23696 }, { "epoch": 0.61, "grad_norm": 1.3889714479446411, "learning_rate": 7.05259935377287e-06, "loss": 0.5836, "step": 23697 }, { "epoch": 0.61, "grad_norm": 1.39657461643219, "learning_rate": 7.051806098115095e-06, "loss": 0.3814, "step": 23698 }, { "epoch": 0.61, "grad_norm": 1.4067878723144531, "learning_rate": 7.051012862774421e-06, "loss": 0.3379, "step": 23699 }, { "epoch": 0.61, "grad_norm": 2.1537926197052, "learning_rate": 7.050219647756314e-06, "loss": 0.6491, "step": 23700 }, { "epoch": 0.61, "grad_norm": 2.442286968231201, "learning_rate": 7.04942645306624e-06, "loss": 0.6151, "step": 23701 }, { "epoch": 0.61, "grad_norm": 1.411683201789856, "learning_rate": 7.048633278709662e-06, "loss": 0.5805, "step": 23702 }, { "epoch": 0.61, "grad_norm": 1.1548717021942139, "learning_rate": 7.047840124692051e-06, "loss": 0.6065, "step": 23703 }, { "epoch": 0.61, "grad_norm": 1.7802410125732422, "learning_rate": 7.047046991018873e-06, "loss": 0.4675, "step": 23704 }, { "epoch": 0.61, "grad_norm": 1.6300745010375977, "learning_rate": 7.04625387769559e-06, "loss": 0.6854, "step": 23705 }, { "epoch": 0.61, "grad_norm": 1.7370984554290771, "learning_rate": 7.045460784727671e-06, "loss": 0.4887, "step": 23706 }, { "epoch": 0.61, "grad_norm": 3.0082263946533203, "learning_rate": 7.044667712120579e-06, "loss": 0.6636, "step": 23707 }, { "epoch": 0.61, "grad_norm": 2.612898111343384, "learning_rate": 7.043874659879783e-06, "loss": 0.4899, "step": 23708 }, { "epoch": 0.61, "grad_norm": 1.911181092262268, "learning_rate": 7.0430816280107415e-06, "loss": 0.5566, "step": 23709 }, { "epoch": 0.61, "grad_norm": 1.5028691291809082, "learning_rate": 7.042288616518924e-06, "loss": 0.6706, "step": 23710 }, { "epoch": 0.61, "grad_norm": 1.3784823417663574, "learning_rate": 7.041495625409799e-06, "loss": 0.5445, "step": 23711 }, { "epoch": 0.61, "grad_norm": 2.0791821479797363, "learning_rate": 7.040702654688825e-06, "loss": 0.5247, "step": 23712 }, { "epoch": 0.61, "grad_norm": 2.5830187797546387, "learning_rate": 7.039909704361469e-06, "loss": 0.5807, "step": 23713 }, { "epoch": 0.61, "grad_norm": 7.592529773712158, "learning_rate": 7.039116774433195e-06, "loss": 0.6276, "step": 23714 }, { "epoch": 0.61, "grad_norm": 3.1137430667877197, "learning_rate": 7.038323864909469e-06, "loss": 0.6475, "step": 23715 }, { "epoch": 0.61, "grad_norm": 4.075384140014648, "learning_rate": 7.037530975795755e-06, "loss": 0.5731, "step": 23716 }, { "epoch": 0.61, "grad_norm": 1.7407609224319458, "learning_rate": 7.036738107097514e-06, "loss": 0.482, "step": 23717 }, { "epoch": 0.61, "grad_norm": 2.8704960346221924, "learning_rate": 7.0359452588202145e-06, "loss": 0.6769, "step": 23718 }, { "epoch": 0.61, "grad_norm": 1.7218339443206787, "learning_rate": 7.035152430969316e-06, "loss": 0.4476, "step": 23719 }, { "epoch": 0.61, "grad_norm": 1.3421440124511719, "learning_rate": 7.0343596235502865e-06, "loss": 0.6374, "step": 23720 }, { "epoch": 0.61, "grad_norm": 1.1131303310394287, "learning_rate": 7.033566836568586e-06, "loss": 0.3554, "step": 23721 }, { "epoch": 0.61, "grad_norm": 5.5204596519470215, "learning_rate": 7.0327740700296775e-06, "loss": 0.6022, "step": 23722 }, { "epoch": 0.61, "grad_norm": 1.8715232610702515, "learning_rate": 7.03198132393903e-06, "loss": 0.554, "step": 23723 }, { "epoch": 0.61, "grad_norm": 1.7336504459381104, "learning_rate": 7.031188598302099e-06, "loss": 0.7253, "step": 23724 }, { "epoch": 0.61, "grad_norm": 1.271052360534668, "learning_rate": 7.030395893124354e-06, "loss": 0.5718, "step": 23725 }, { "epoch": 0.61, "grad_norm": 6.813954830169678, "learning_rate": 7.029603208411252e-06, "loss": 0.642, "step": 23726 }, { "epoch": 0.61, "grad_norm": 2.463266134262085, "learning_rate": 7.0288105441682585e-06, "loss": 0.692, "step": 23727 }, { "epoch": 0.61, "grad_norm": 1.6337225437164307, "learning_rate": 7.0280179004008385e-06, "loss": 0.5436, "step": 23728 }, { "epoch": 0.61, "grad_norm": 1.6244642734527588, "learning_rate": 7.027225277114449e-06, "loss": 0.4835, "step": 23729 }, { "epoch": 0.61, "grad_norm": 1.3995238542556763, "learning_rate": 7.026432674314558e-06, "loss": 0.5805, "step": 23730 }, { "epoch": 0.61, "grad_norm": 0.8625264167785645, "learning_rate": 7.025640092006622e-06, "loss": 0.5327, "step": 23731 }, { "epoch": 0.61, "grad_norm": 1.6580382585525513, "learning_rate": 7.0248475301961105e-06, "loss": 0.4762, "step": 23732 }, { "epoch": 0.61, "grad_norm": 1.732374668121338, "learning_rate": 7.024054988888476e-06, "loss": 0.5483, "step": 23733 }, { "epoch": 0.61, "grad_norm": 1.4895827770233154, "learning_rate": 7.023262468089185e-06, "loss": 0.4862, "step": 23734 }, { "epoch": 0.61, "grad_norm": 2.2374825477600098, "learning_rate": 7.022469967803702e-06, "loss": 0.4971, "step": 23735 }, { "epoch": 0.61, "grad_norm": 1.0780256986618042, "learning_rate": 7.021677488037483e-06, "loss": 0.5026, "step": 23736 }, { "epoch": 0.61, "grad_norm": 2.038963556289673, "learning_rate": 7.020885028795994e-06, "loss": 0.4776, "step": 23737 }, { "epoch": 0.61, "grad_norm": 6.2077317237854, "learning_rate": 7.020092590084691e-06, "loss": 0.7935, "step": 23738 }, { "epoch": 0.61, "grad_norm": 9.097803115844727, "learning_rate": 7.019300171909037e-06, "loss": 0.4032, "step": 23739 }, { "epoch": 0.61, "grad_norm": 1.3736438751220703, "learning_rate": 7.0185077742744965e-06, "loss": 0.3958, "step": 23740 }, { "epoch": 0.61, "grad_norm": 3.3183720111846924, "learning_rate": 7.0177153971865256e-06, "loss": 0.4705, "step": 23741 }, { "epoch": 0.61, "grad_norm": 2.7023203372955322, "learning_rate": 7.016923040650588e-06, "loss": 0.6836, "step": 23742 }, { "epoch": 0.61, "grad_norm": 1.6619569063186646, "learning_rate": 7.01613070467214e-06, "loss": 0.3955, "step": 23743 }, { "epoch": 0.61, "grad_norm": 1.2460589408874512, "learning_rate": 7.015338389256647e-06, "loss": 0.6113, "step": 23744 }, { "epoch": 0.61, "grad_norm": 5.109539985656738, "learning_rate": 7.014546094409566e-06, "loss": 0.5577, "step": 23745 }, { "epoch": 0.61, "grad_norm": 1.6809253692626953, "learning_rate": 7.013753820136356e-06, "loss": 0.6515, "step": 23746 }, { "epoch": 0.61, "grad_norm": 3.6215760707855225, "learning_rate": 7.012961566442481e-06, "loss": 0.536, "step": 23747 }, { "epoch": 0.61, "grad_norm": 1.961164951324463, "learning_rate": 7.012169333333396e-06, "loss": 0.5614, "step": 23748 }, { "epoch": 0.61, "grad_norm": 1.4920268058776855, "learning_rate": 7.0113771208145665e-06, "loss": 0.7921, "step": 23749 }, { "epoch": 0.61, "grad_norm": 1.8234481811523438, "learning_rate": 7.010584928891444e-06, "loss": 0.5498, "step": 23750 }, { "epoch": 0.61, "grad_norm": 1.3648681640625, "learning_rate": 7.009792757569493e-06, "loss": 0.5979, "step": 23751 }, { "epoch": 0.61, "grad_norm": 4.773688793182373, "learning_rate": 7.0090006068541735e-06, "loss": 0.5667, "step": 23752 }, { "epoch": 0.61, "grad_norm": 1.274706482887268, "learning_rate": 7.008208476750942e-06, "loss": 0.5782, "step": 23753 }, { "epoch": 0.61, "grad_norm": 0.9184036254882812, "learning_rate": 7.007416367265258e-06, "loss": 0.503, "step": 23754 }, { "epoch": 0.61, "grad_norm": 2.160703420639038, "learning_rate": 7.006624278402579e-06, "loss": 0.5389, "step": 23755 }, { "epoch": 0.61, "grad_norm": 0.9511123299598694, "learning_rate": 7.005832210168365e-06, "loss": 0.5303, "step": 23756 }, { "epoch": 0.61, "grad_norm": 7.3521809577941895, "learning_rate": 7.005040162568077e-06, "loss": 0.5721, "step": 23757 }, { "epoch": 0.61, "grad_norm": 4.072381973266602, "learning_rate": 7.004248135607168e-06, "loss": 0.6799, "step": 23758 }, { "epoch": 0.61, "grad_norm": 1.9493403434753418, "learning_rate": 7.003456129291099e-06, "loss": 0.5256, "step": 23759 }, { "epoch": 0.61, "grad_norm": 1.5375103950500488, "learning_rate": 7.002664143625329e-06, "loss": 0.4307, "step": 23760 }, { "epoch": 0.61, "grad_norm": 6.771734714508057, "learning_rate": 7.001872178615315e-06, "loss": 0.5, "step": 23761 }, { "epoch": 0.61, "grad_norm": 1.1783802509307861, "learning_rate": 7.001080234266511e-06, "loss": 0.3929, "step": 23762 }, { "epoch": 0.61, "grad_norm": 1.250747799873352, "learning_rate": 7.00028831058438e-06, "loss": 0.5512, "step": 23763 }, { "epoch": 0.61, "grad_norm": 1.2878762483596802, "learning_rate": 6.999496407574378e-06, "loss": 0.4742, "step": 23764 }, { "epoch": 0.61, "grad_norm": 1.113472819328308, "learning_rate": 6.998704525241961e-06, "loss": 0.4469, "step": 23765 }, { "epoch": 0.61, "grad_norm": 1.233378529548645, "learning_rate": 6.997912663592588e-06, "loss": 0.4571, "step": 23766 }, { "epoch": 0.61, "grad_norm": 2.6131844520568848, "learning_rate": 6.997120822631712e-06, "loss": 0.6341, "step": 23767 }, { "epoch": 0.61, "grad_norm": 4.7794084548950195, "learning_rate": 6.9963290023647944e-06, "loss": 0.589, "step": 23768 }, { "epoch": 0.61, "grad_norm": 1.4369767904281616, "learning_rate": 6.995537202797291e-06, "loss": 0.4667, "step": 23769 }, { "epoch": 0.61, "grad_norm": 1.6015828847885132, "learning_rate": 6.994745423934655e-06, "loss": 0.6003, "step": 23770 }, { "epoch": 0.61, "grad_norm": 1.6137241125106812, "learning_rate": 6.9939536657823495e-06, "loss": 0.5051, "step": 23771 }, { "epoch": 0.61, "grad_norm": 3.055689573287964, "learning_rate": 6.993161928345824e-06, "loss": 0.6836, "step": 23772 }, { "epoch": 0.61, "grad_norm": 1.0765228271484375, "learning_rate": 6.99237021163054e-06, "loss": 0.4763, "step": 23773 }, { "epoch": 0.61, "grad_norm": 1.8928276300430298, "learning_rate": 6.991578515641946e-06, "loss": 0.3866, "step": 23774 }, { "epoch": 0.61, "grad_norm": 1.3589184284210205, "learning_rate": 6.990786840385505e-06, "loss": 0.4776, "step": 23775 }, { "epoch": 0.61, "grad_norm": 2.194995403289795, "learning_rate": 6.989995185866673e-06, "loss": 0.4869, "step": 23776 }, { "epoch": 0.61, "grad_norm": 1.7526376247406006, "learning_rate": 6.9892035520908995e-06, "loss": 0.6367, "step": 23777 }, { "epoch": 0.61, "grad_norm": 1.5414760112762451, "learning_rate": 6.988411939063647e-06, "loss": 0.6607, "step": 23778 }, { "epoch": 0.61, "grad_norm": 7.4431023597717285, "learning_rate": 6.9876203467903645e-06, "loss": 0.7746, "step": 23779 }, { "epoch": 0.61, "grad_norm": 4.755068302154541, "learning_rate": 6.986828775276511e-06, "loss": 0.5921, "step": 23780 }, { "epoch": 0.61, "grad_norm": 5.838051795959473, "learning_rate": 6.986037224527542e-06, "loss": 0.479, "step": 23781 }, { "epoch": 0.61, "grad_norm": 1.6422711610794067, "learning_rate": 6.985245694548907e-06, "loss": 0.648, "step": 23782 }, { "epoch": 0.61, "grad_norm": 1.3022786378860474, "learning_rate": 6.9844541853460705e-06, "loss": 0.5788, "step": 23783 }, { "epoch": 0.61, "grad_norm": 2.472032070159912, "learning_rate": 6.983662696924476e-06, "loss": 0.6539, "step": 23784 }, { "epoch": 0.61, "grad_norm": 1.491439938545227, "learning_rate": 6.982871229289587e-06, "loss": 0.57, "step": 23785 }, { "epoch": 0.61, "grad_norm": 2.210111618041992, "learning_rate": 6.9820797824468516e-06, "loss": 0.7882, "step": 23786 }, { "epoch": 0.61, "grad_norm": 2.1345231533050537, "learning_rate": 6.981288356401725e-06, "loss": 0.4318, "step": 23787 }, { "epoch": 0.61, "grad_norm": 3.3746938705444336, "learning_rate": 6.980496951159666e-06, "loss": 0.7249, "step": 23788 }, { "epoch": 0.61, "grad_norm": 5.878673076629639, "learning_rate": 6.979705566726123e-06, "loss": 0.763, "step": 23789 }, { "epoch": 0.61, "grad_norm": 2.5233354568481445, "learning_rate": 6.978914203106553e-06, "loss": 0.5769, "step": 23790 }, { "epoch": 0.61, "grad_norm": 1.437914490699768, "learning_rate": 6.978122860306405e-06, "loss": 0.6267, "step": 23791 }, { "epoch": 0.61, "grad_norm": 2.2407212257385254, "learning_rate": 6.977331538331138e-06, "loss": 0.4901, "step": 23792 }, { "epoch": 0.61, "grad_norm": 1.4431333541870117, "learning_rate": 6.9765402371862035e-06, "loss": 0.6747, "step": 23793 }, { "epoch": 0.61, "grad_norm": 1.3196649551391602, "learning_rate": 6.975748956877053e-06, "loss": 0.5213, "step": 23794 }, { "epoch": 0.61, "grad_norm": 5.472591876983643, "learning_rate": 6.974957697409142e-06, "loss": 0.5347, "step": 23795 }, { "epoch": 0.61, "grad_norm": 1.1359506845474243, "learning_rate": 6.974166458787921e-06, "loss": 0.5874, "step": 23796 }, { "epoch": 0.61, "grad_norm": 1.736481785774231, "learning_rate": 6.9733752410188426e-06, "loss": 0.578, "step": 23797 }, { "epoch": 0.61, "grad_norm": 3.7304108142852783, "learning_rate": 6.972584044107363e-06, "loss": 0.5304, "step": 23798 }, { "epoch": 0.61, "grad_norm": 3.099900722503662, "learning_rate": 6.9717928680589305e-06, "loss": 0.5204, "step": 23799 }, { "epoch": 0.61, "grad_norm": 1.3275866508483887, "learning_rate": 6.971001712879001e-06, "loss": 0.5302, "step": 23800 }, { "epoch": 0.61, "grad_norm": 1.9093527793884277, "learning_rate": 6.970210578573023e-06, "loss": 0.5932, "step": 23801 }, { "epoch": 0.61, "grad_norm": 1.0792168378829956, "learning_rate": 6.969419465146452e-06, "loss": 0.5491, "step": 23802 }, { "epoch": 0.61, "grad_norm": 0.946313202381134, "learning_rate": 6.9686283726047354e-06, "loss": 0.4916, "step": 23803 }, { "epoch": 0.61, "grad_norm": 2.5262789726257324, "learning_rate": 6.967837300953327e-06, "loss": 0.5575, "step": 23804 }, { "epoch": 0.61, "grad_norm": 1.354101538658142, "learning_rate": 6.9670462501976825e-06, "loss": 0.4899, "step": 23805 }, { "epoch": 0.61, "grad_norm": 1.2515175342559814, "learning_rate": 6.966255220343248e-06, "loss": 0.7228, "step": 23806 }, { "epoch": 0.61, "grad_norm": 1.5103274583816528, "learning_rate": 6.965464211395477e-06, "loss": 0.4328, "step": 23807 }, { "epoch": 0.61, "grad_norm": 2.0466861724853516, "learning_rate": 6.964673223359819e-06, "loss": 0.6712, "step": 23808 }, { "epoch": 0.61, "grad_norm": 2.858471393585205, "learning_rate": 6.963882256241725e-06, "loss": 0.5842, "step": 23809 }, { "epoch": 0.61, "grad_norm": 2.0978078842163086, "learning_rate": 6.96309131004665e-06, "loss": 0.5187, "step": 23810 }, { "epoch": 0.61, "grad_norm": 1.261812686920166, "learning_rate": 6.962300384780041e-06, "loss": 0.5365, "step": 23811 }, { "epoch": 0.61, "grad_norm": 1.4781713485717773, "learning_rate": 6.961509480447349e-06, "loss": 0.4633, "step": 23812 }, { "epoch": 0.61, "grad_norm": 1.6074905395507812, "learning_rate": 6.9607185970540235e-06, "loss": 0.3971, "step": 23813 }, { "epoch": 0.61, "grad_norm": 1.8877019882202148, "learning_rate": 6.9599277346055185e-06, "loss": 0.7597, "step": 23814 }, { "epoch": 0.61, "grad_norm": 1.4161452054977417, "learning_rate": 6.959136893107279e-06, "loss": 0.5129, "step": 23815 }, { "epoch": 0.61, "grad_norm": 5.942913055419922, "learning_rate": 6.958346072564758e-06, "loss": 0.5597, "step": 23816 }, { "epoch": 0.61, "grad_norm": 1.4978196620941162, "learning_rate": 6.957555272983407e-06, "loss": 0.4849, "step": 23817 }, { "epoch": 0.61, "grad_norm": 1.4598805904388428, "learning_rate": 6.9567644943686704e-06, "loss": 0.5989, "step": 23818 }, { "epoch": 0.61, "grad_norm": 2.331749439239502, "learning_rate": 6.955973736726005e-06, "loss": 0.5562, "step": 23819 }, { "epoch": 0.61, "grad_norm": 1.627058744430542, "learning_rate": 6.955183000060853e-06, "loss": 0.4898, "step": 23820 }, { "epoch": 0.61, "grad_norm": 2.4085073471069336, "learning_rate": 6.954392284378666e-06, "loss": 0.6044, "step": 23821 }, { "epoch": 0.61, "grad_norm": 4.054805278778076, "learning_rate": 6.953601589684898e-06, "loss": 0.59, "step": 23822 }, { "epoch": 0.61, "grad_norm": 2.889814615249634, "learning_rate": 6.95281091598499e-06, "loss": 0.5583, "step": 23823 }, { "epoch": 0.61, "grad_norm": 2.445744276046753, "learning_rate": 6.952020263284398e-06, "loss": 0.4703, "step": 23824 }, { "epoch": 0.61, "grad_norm": 1.4046381711959839, "learning_rate": 6.951229631588564e-06, "loss": 0.3791, "step": 23825 }, { "epoch": 0.61, "grad_norm": 1.2056561708450317, "learning_rate": 6.950439020902944e-06, "loss": 0.5646, "step": 23826 }, { "epoch": 0.61, "grad_norm": 1.4811729192733765, "learning_rate": 6.9496484312329785e-06, "loss": 0.5284, "step": 23827 }, { "epoch": 0.61, "grad_norm": 1.6218502521514893, "learning_rate": 6.948857862584119e-06, "loss": 0.5148, "step": 23828 }, { "epoch": 0.61, "grad_norm": 4.354403495788574, "learning_rate": 6.94806731496182e-06, "loss": 0.7615, "step": 23829 }, { "epoch": 0.61, "grad_norm": 1.1512869596481323, "learning_rate": 6.947276788371518e-06, "loss": 0.6368, "step": 23830 }, { "epoch": 0.61, "grad_norm": 2.1911892890930176, "learning_rate": 6.9464862828186695e-06, "loss": 0.6281, "step": 23831 }, { "epoch": 0.61, "grad_norm": 5.463820934295654, "learning_rate": 6.945695798308717e-06, "loss": 0.5344, "step": 23832 }, { "epoch": 0.61, "grad_norm": 1.339570164680481, "learning_rate": 6.9449053348471095e-06, "loss": 0.583, "step": 23833 }, { "epoch": 0.61, "grad_norm": 1.5484938621520996, "learning_rate": 6.944114892439299e-06, "loss": 0.5017, "step": 23834 }, { "epoch": 0.61, "grad_norm": 1.6536564826965332, "learning_rate": 6.943324471090725e-06, "loss": 0.6569, "step": 23835 }, { "epoch": 0.61, "grad_norm": 1.8586127758026123, "learning_rate": 6.942534070806841e-06, "loss": 0.5621, "step": 23836 }, { "epoch": 0.61, "grad_norm": 3.427055597305298, "learning_rate": 6.94174369159309e-06, "loss": 0.4938, "step": 23837 }, { "epoch": 0.61, "grad_norm": 3.488968849182129, "learning_rate": 6.94095333345492e-06, "loss": 0.4546, "step": 23838 }, { "epoch": 0.61, "grad_norm": 1.6681934595108032, "learning_rate": 6.940162996397779e-06, "loss": 0.5662, "step": 23839 }, { "epoch": 0.61, "grad_norm": 1.132483720779419, "learning_rate": 6.93937268042711e-06, "loss": 0.6703, "step": 23840 }, { "epoch": 0.61, "grad_norm": 1.1095526218414307, "learning_rate": 6.938582385548364e-06, "loss": 0.5273, "step": 23841 }, { "epoch": 0.61, "grad_norm": 1.3886287212371826, "learning_rate": 6.937792111766983e-06, "loss": 0.5113, "step": 23842 }, { "epoch": 0.61, "grad_norm": 2.1059515476226807, "learning_rate": 6.937001859088417e-06, "loss": 0.4755, "step": 23843 }, { "epoch": 0.61, "grad_norm": 2.0565712451934814, "learning_rate": 6.936211627518107e-06, "loss": 0.6135, "step": 23844 }, { "epoch": 0.61, "grad_norm": 1.6853371858596802, "learning_rate": 6.935421417061502e-06, "loss": 0.67, "step": 23845 }, { "epoch": 0.61, "grad_norm": 1.4748612642288208, "learning_rate": 6.934631227724049e-06, "loss": 0.4504, "step": 23846 }, { "epoch": 0.61, "grad_norm": 2.1083433628082275, "learning_rate": 6.933841059511191e-06, "loss": 0.6817, "step": 23847 }, { "epoch": 0.61, "grad_norm": 5.498601913452148, "learning_rate": 6.9330509124283755e-06, "loss": 0.6465, "step": 23848 }, { "epoch": 0.61, "grad_norm": 1.879933476448059, "learning_rate": 6.932260786481043e-06, "loss": 0.7203, "step": 23849 }, { "epoch": 0.61, "grad_norm": 2.612993001937866, "learning_rate": 6.931470681674643e-06, "loss": 0.5326, "step": 23850 }, { "epoch": 0.61, "grad_norm": 1.2817625999450684, "learning_rate": 6.930680598014621e-06, "loss": 0.5248, "step": 23851 }, { "epoch": 0.61, "grad_norm": 1.612457275390625, "learning_rate": 6.9298905355064186e-06, "loss": 0.5563, "step": 23852 }, { "epoch": 0.61, "grad_norm": 1.93992280960083, "learning_rate": 6.929100494155483e-06, "loss": 0.6266, "step": 23853 }, { "epoch": 0.61, "grad_norm": 0.967094898223877, "learning_rate": 6.928310473967259e-06, "loss": 0.3796, "step": 23854 }, { "epoch": 0.61, "grad_norm": 6.53262186050415, "learning_rate": 6.927520474947187e-06, "loss": 0.586, "step": 23855 }, { "epoch": 0.61, "grad_norm": 3.3328909873962402, "learning_rate": 6.926730497100714e-06, "loss": 0.7656, "step": 23856 }, { "epoch": 0.61, "grad_norm": 4.164773941040039, "learning_rate": 6.925940540433282e-06, "loss": 0.6694, "step": 23857 }, { "epoch": 0.61, "grad_norm": 11.68254566192627, "learning_rate": 6.925150604950337e-06, "loss": 0.6102, "step": 23858 }, { "epoch": 0.61, "grad_norm": 1.6413097381591797, "learning_rate": 6.9243606906573235e-06, "loss": 0.5275, "step": 23859 }, { "epoch": 0.61, "grad_norm": 3.4769511222839355, "learning_rate": 6.923570797559682e-06, "loss": 0.5046, "step": 23860 }, { "epoch": 0.61, "grad_norm": 2.228271722793579, "learning_rate": 6.922780925662861e-06, "loss": 0.5079, "step": 23861 }, { "epoch": 0.61, "grad_norm": 2.276686429977417, "learning_rate": 6.921991074972298e-06, "loss": 0.7234, "step": 23862 }, { "epoch": 0.61, "grad_norm": 1.4841376543045044, "learning_rate": 6.9212012454934374e-06, "loss": 0.5699, "step": 23863 }, { "epoch": 0.61, "grad_norm": 1.0549299716949463, "learning_rate": 6.920411437231727e-06, "loss": 0.6444, "step": 23864 }, { "epoch": 0.61, "grad_norm": 1.3850005865097046, "learning_rate": 6.919621650192603e-06, "loss": 0.6493, "step": 23865 }, { "epoch": 0.61, "grad_norm": 1.3638190031051636, "learning_rate": 6.9188318843815136e-06, "loss": 0.4816, "step": 23866 }, { "epoch": 0.61, "grad_norm": 1.3871452808380127, "learning_rate": 6.918042139803898e-06, "loss": 0.5526, "step": 23867 }, { "epoch": 0.61, "grad_norm": 1.634893774986267, "learning_rate": 6.917252416465201e-06, "loss": 0.5864, "step": 23868 }, { "epoch": 0.61, "grad_norm": 3.7197558879852295, "learning_rate": 6.916462714370862e-06, "loss": 0.6608, "step": 23869 }, { "epoch": 0.61, "grad_norm": 1.6250646114349365, "learning_rate": 6.915673033526325e-06, "loss": 0.6441, "step": 23870 }, { "epoch": 0.61, "grad_norm": 1.5501185655593872, "learning_rate": 6.914883373937031e-06, "loss": 0.6815, "step": 23871 }, { "epoch": 0.61, "grad_norm": 3.581976890563965, "learning_rate": 6.914093735608424e-06, "loss": 0.4916, "step": 23872 }, { "epoch": 0.61, "grad_norm": 1.7352814674377441, "learning_rate": 6.913304118545945e-06, "loss": 0.5276, "step": 23873 }, { "epoch": 0.61, "grad_norm": 1.9358093738555908, "learning_rate": 6.912514522755032e-06, "loss": 0.7076, "step": 23874 }, { "epoch": 0.61, "grad_norm": 1.5193274021148682, "learning_rate": 6.91172494824113e-06, "loss": 0.6819, "step": 23875 }, { "epoch": 0.61, "grad_norm": 1.7567646503448486, "learning_rate": 6.9109353950096815e-06, "loss": 0.573, "step": 23876 }, { "epoch": 0.61, "grad_norm": 1.9903627634048462, "learning_rate": 6.910145863066122e-06, "loss": 0.5049, "step": 23877 }, { "epoch": 0.61, "grad_norm": 1.8384060859680176, "learning_rate": 6.9093563524158995e-06, "loss": 0.5471, "step": 23878 }, { "epoch": 0.61, "grad_norm": 0.9952897429466248, "learning_rate": 6.908566863064449e-06, "loss": 0.318, "step": 23879 }, { "epoch": 0.61, "grad_norm": 1.9268569946289062, "learning_rate": 6.907777395017215e-06, "loss": 0.5497, "step": 23880 }, { "epoch": 0.61, "grad_norm": 1.2058496475219727, "learning_rate": 6.906987948279635e-06, "loss": 0.5569, "step": 23881 }, { "epoch": 0.61, "grad_norm": 1.4065600633621216, "learning_rate": 6.90619852285715e-06, "loss": 0.5435, "step": 23882 }, { "epoch": 0.61, "grad_norm": 6.7789435386657715, "learning_rate": 6.905409118755204e-06, "loss": 0.5759, "step": 23883 }, { "epoch": 0.61, "grad_norm": 1.3681308031082153, "learning_rate": 6.904619735979232e-06, "loss": 0.5509, "step": 23884 }, { "epoch": 0.61, "grad_norm": 4.565943241119385, "learning_rate": 6.903830374534678e-06, "loss": 0.5982, "step": 23885 }, { "epoch": 0.61, "grad_norm": 2.7561309337615967, "learning_rate": 6.9030410344269785e-06, "loss": 0.5671, "step": 23886 }, { "epoch": 0.61, "grad_norm": 2.063339948654175, "learning_rate": 6.902251715661573e-06, "loss": 0.7373, "step": 23887 }, { "epoch": 0.61, "grad_norm": 1.753416895866394, "learning_rate": 6.9014624182439046e-06, "loss": 0.6166, "step": 23888 }, { "epoch": 0.61, "grad_norm": 1.509621262550354, "learning_rate": 6.90067314217941e-06, "loss": 0.568, "step": 23889 }, { "epoch": 0.61, "grad_norm": 5.618647575378418, "learning_rate": 6.89988388747353e-06, "loss": 0.6627, "step": 23890 }, { "epoch": 0.61, "grad_norm": 7.0724663734436035, "learning_rate": 6.899094654131701e-06, "loss": 0.6406, "step": 23891 }, { "epoch": 0.61, "grad_norm": 2.961426258087158, "learning_rate": 6.898305442159363e-06, "loss": 0.5419, "step": 23892 }, { "epoch": 0.61, "grad_norm": 6.674289226531982, "learning_rate": 6.8975162515619574e-06, "loss": 0.4894, "step": 23893 }, { "epoch": 0.61, "grad_norm": 1.7849256992340088, "learning_rate": 6.896727082344919e-06, "loss": 0.5543, "step": 23894 }, { "epoch": 0.61, "grad_norm": 1.8727461099624634, "learning_rate": 6.8959379345136874e-06, "loss": 0.531, "step": 23895 }, { "epoch": 0.61, "grad_norm": 1.691474437713623, "learning_rate": 6.8951488080737015e-06, "loss": 0.5002, "step": 23896 }, { "epoch": 0.61, "grad_norm": 2.2324564456939697, "learning_rate": 6.894359703030402e-06, "loss": 0.7352, "step": 23897 }, { "epoch": 0.61, "grad_norm": 1.4182826280593872, "learning_rate": 6.893570619389222e-06, "loss": 0.7312, "step": 23898 }, { "epoch": 0.61, "grad_norm": 1.5830142498016357, "learning_rate": 6.892781557155601e-06, "loss": 0.4857, "step": 23899 }, { "epoch": 0.61, "grad_norm": 1.4551746845245361, "learning_rate": 6.89199251633498e-06, "loss": 0.7261, "step": 23900 }, { "epoch": 0.61, "grad_norm": 2.146322011947632, "learning_rate": 6.891203496932791e-06, "loss": 0.5359, "step": 23901 }, { "epoch": 0.61, "grad_norm": 1.1767233610153198, "learning_rate": 6.8904144989544765e-06, "loss": 0.3615, "step": 23902 }, { "epoch": 0.61, "grad_norm": 1.976860523223877, "learning_rate": 6.88962552240547e-06, "loss": 0.6037, "step": 23903 }, { "epoch": 0.61, "grad_norm": 0.9653282761573792, "learning_rate": 6.88883656729121e-06, "loss": 0.4824, "step": 23904 }, { "epoch": 0.61, "grad_norm": 1.5559779405593872, "learning_rate": 6.888047633617137e-06, "loss": 0.466, "step": 23905 }, { "epoch": 0.61, "grad_norm": 1.719949722290039, "learning_rate": 6.887258721388681e-06, "loss": 0.5629, "step": 23906 }, { "epoch": 0.61, "grad_norm": 1.178372859954834, "learning_rate": 6.886469830611285e-06, "loss": 0.4997, "step": 23907 }, { "epoch": 0.61, "grad_norm": 1.2748134136199951, "learning_rate": 6.8856809612903805e-06, "loss": 0.4223, "step": 23908 }, { "epoch": 0.61, "grad_norm": 3.103452205657959, "learning_rate": 6.8848921134314096e-06, "loss": 0.5381, "step": 23909 }, { "epoch": 0.61, "grad_norm": 1.1234757900238037, "learning_rate": 6.884103287039801e-06, "loss": 0.5769, "step": 23910 }, { "epoch": 0.61, "grad_norm": 2.180413246154785, "learning_rate": 6.883314482120995e-06, "loss": 0.6198, "step": 23911 }, { "epoch": 0.61, "grad_norm": 2.5131707191467285, "learning_rate": 6.882525698680431e-06, "loss": 0.6092, "step": 23912 }, { "epoch": 0.61, "grad_norm": 0.9752160906791687, "learning_rate": 6.88173693672354e-06, "loss": 0.4417, "step": 23913 }, { "epoch": 0.61, "grad_norm": 1.7538278102874756, "learning_rate": 6.880948196255759e-06, "loss": 0.5815, "step": 23914 }, { "epoch": 0.61, "grad_norm": 3.7469897270202637, "learning_rate": 6.880159477282522e-06, "loss": 0.4456, "step": 23915 }, { "epoch": 0.61, "grad_norm": 1.612412452697754, "learning_rate": 6.879370779809266e-06, "loss": 0.5284, "step": 23916 }, { "epoch": 0.61, "grad_norm": 1.334855079650879, "learning_rate": 6.878582103841427e-06, "loss": 0.5928, "step": 23917 }, { "epoch": 0.61, "grad_norm": 1.151177167892456, "learning_rate": 6.877793449384438e-06, "loss": 0.3946, "step": 23918 }, { "epoch": 0.61, "grad_norm": 4.644373416900635, "learning_rate": 6.877004816443738e-06, "loss": 0.7955, "step": 23919 }, { "epoch": 0.61, "grad_norm": 1.7144354581832886, "learning_rate": 6.876216205024755e-06, "loss": 0.7086, "step": 23920 }, { "epoch": 0.61, "grad_norm": 3.0745790004730225, "learning_rate": 6.87542761513293e-06, "loss": 0.5902, "step": 23921 }, { "epoch": 0.61, "grad_norm": 2.726604461669922, "learning_rate": 6.874639046773693e-06, "loss": 0.5929, "step": 23922 }, { "epoch": 0.61, "grad_norm": 0.9471423029899597, "learning_rate": 6.8738504999524806e-06, "loss": 0.5654, "step": 23923 }, { "epoch": 0.61, "grad_norm": 1.5653457641601562, "learning_rate": 6.8730619746747285e-06, "loss": 0.4884, "step": 23924 }, { "epoch": 0.61, "grad_norm": 1.325656771659851, "learning_rate": 6.872273470945865e-06, "loss": 0.467, "step": 23925 }, { "epoch": 0.61, "grad_norm": 2.6387338638305664, "learning_rate": 6.871484988771331e-06, "loss": 0.454, "step": 23926 }, { "epoch": 0.61, "grad_norm": 1.4583631753921509, "learning_rate": 6.870696528156555e-06, "loss": 0.6482, "step": 23927 }, { "epoch": 0.61, "grad_norm": 2.9660446643829346, "learning_rate": 6.869908089106973e-06, "loss": 0.6043, "step": 23928 }, { "epoch": 0.61, "grad_norm": 5.5377197265625, "learning_rate": 6.86911967162802e-06, "loss": 0.5983, "step": 23929 }, { "epoch": 0.61, "grad_norm": 2.680271863937378, "learning_rate": 6.868331275725124e-06, "loss": 0.572, "step": 23930 }, { "epoch": 0.61, "grad_norm": 1.213640809059143, "learning_rate": 6.867542901403723e-06, "loss": 0.5473, "step": 23931 }, { "epoch": 0.61, "grad_norm": 1.738077163696289, "learning_rate": 6.8667545486692474e-06, "loss": 0.4792, "step": 23932 }, { "epoch": 0.61, "grad_norm": 1.7358936071395874, "learning_rate": 6.865966217527131e-06, "loss": 0.4509, "step": 23933 }, { "epoch": 0.61, "grad_norm": 1.492120385169983, "learning_rate": 6.865177907982808e-06, "loss": 0.5987, "step": 23934 }, { "epoch": 0.61, "grad_norm": 1.619267463684082, "learning_rate": 6.864389620041707e-06, "loss": 0.3858, "step": 23935 }, { "epoch": 0.61, "grad_norm": 1.6429193019866943, "learning_rate": 6.863601353709266e-06, "loss": 0.5833, "step": 23936 }, { "epoch": 0.61, "grad_norm": 0.766832709312439, "learning_rate": 6.862813108990911e-06, "loss": 0.4434, "step": 23937 }, { "epoch": 0.61, "grad_norm": 4.426865577697754, "learning_rate": 6.862024885892079e-06, "loss": 0.3467, "step": 23938 }, { "epoch": 0.61, "grad_norm": 1.4367640018463135, "learning_rate": 6.861236684418199e-06, "loss": 0.5469, "step": 23939 }, { "epoch": 0.61, "grad_norm": 1.594775676727295, "learning_rate": 6.860448504574703e-06, "loss": 0.5419, "step": 23940 }, { "epoch": 0.61, "grad_norm": 1.3349535465240479, "learning_rate": 6.859660346367025e-06, "loss": 0.5269, "step": 23941 }, { "epoch": 0.61, "grad_norm": 3.397299289703369, "learning_rate": 6.8588722098005935e-06, "loss": 0.7304, "step": 23942 }, { "epoch": 0.61, "grad_norm": 10.593498229980469, "learning_rate": 6.858084094880844e-06, "loss": 0.5265, "step": 23943 }, { "epoch": 0.61, "grad_norm": 1.5119328498840332, "learning_rate": 6.8572960016132025e-06, "loss": 0.5488, "step": 23944 }, { "epoch": 0.61, "grad_norm": 1.299095869064331, "learning_rate": 6.856507930003102e-06, "loss": 0.6291, "step": 23945 }, { "epoch": 0.61, "grad_norm": 1.7011914253234863, "learning_rate": 6.855719880055976e-06, "loss": 0.5797, "step": 23946 }, { "epoch": 0.61, "grad_norm": 7.962972164154053, "learning_rate": 6.854931851777251e-06, "loss": 0.6544, "step": 23947 }, { "epoch": 0.61, "grad_norm": 3.2732577323913574, "learning_rate": 6.854143845172363e-06, "loss": 0.6373, "step": 23948 }, { "epoch": 0.61, "grad_norm": 1.242000699043274, "learning_rate": 6.8533558602467355e-06, "loss": 0.4634, "step": 23949 }, { "epoch": 0.61, "grad_norm": 1.6479034423828125, "learning_rate": 6.8525678970058066e-06, "loss": 0.4985, "step": 23950 }, { "epoch": 0.61, "grad_norm": 1.6000761985778809, "learning_rate": 6.851779955454998e-06, "loss": 0.5629, "step": 23951 }, { "epoch": 0.61, "grad_norm": 6.633377552032471, "learning_rate": 6.850992035599746e-06, "loss": 0.5413, "step": 23952 }, { "epoch": 0.61, "grad_norm": 1.2925572395324707, "learning_rate": 6.850204137445479e-06, "loss": 0.561, "step": 23953 }, { "epoch": 0.61, "grad_norm": 2.629243850708008, "learning_rate": 6.849416260997625e-06, "loss": 0.594, "step": 23954 }, { "epoch": 0.61, "grad_norm": 1.4773284196853638, "learning_rate": 6.848628406261617e-06, "loss": 0.6241, "step": 23955 }, { "epoch": 0.61, "grad_norm": 1.3612287044525146, "learning_rate": 6.847840573242879e-06, "loss": 0.5323, "step": 23956 }, { "epoch": 0.61, "grad_norm": 9.123564720153809, "learning_rate": 6.847052761946844e-06, "loss": 0.6268, "step": 23957 }, { "epoch": 0.61, "grad_norm": 1.779000163078308, "learning_rate": 6.846264972378942e-06, "loss": 0.5339, "step": 23958 }, { "epoch": 0.61, "grad_norm": 2.1981027126312256, "learning_rate": 6.8454772045446e-06, "loss": 0.597, "step": 23959 }, { "epoch": 0.61, "grad_norm": 3.5226640701293945, "learning_rate": 6.844689458449247e-06, "loss": 0.6864, "step": 23960 }, { "epoch": 0.61, "grad_norm": 0.915401041507721, "learning_rate": 6.843901734098313e-06, "loss": 0.41, "step": 23961 }, { "epoch": 0.61, "grad_norm": 1.2149114608764648, "learning_rate": 6.843114031497225e-06, "loss": 0.3999, "step": 23962 }, { "epoch": 0.61, "grad_norm": 1.1113542318344116, "learning_rate": 6.84232635065141e-06, "loss": 0.515, "step": 23963 }, { "epoch": 0.61, "grad_norm": 1.630111575126648, "learning_rate": 6.841538691566299e-06, "loss": 0.5414, "step": 23964 }, { "epoch": 0.61, "grad_norm": 2.578343152999878, "learning_rate": 6.840751054247319e-06, "loss": 0.5699, "step": 23965 }, { "epoch": 0.61, "grad_norm": 1.9123423099517822, "learning_rate": 6.839963438699897e-06, "loss": 0.4934, "step": 23966 }, { "epoch": 0.61, "grad_norm": 1.1589487791061401, "learning_rate": 6.839175844929464e-06, "loss": 0.4703, "step": 23967 }, { "epoch": 0.61, "grad_norm": 1.3225868940353394, "learning_rate": 6.838388272941443e-06, "loss": 0.6529, "step": 23968 }, { "epoch": 0.61, "grad_norm": 1.1501606702804565, "learning_rate": 6.837600722741264e-06, "loss": 0.607, "step": 23969 }, { "epoch": 0.61, "grad_norm": 2.735322952270508, "learning_rate": 6.836813194334355e-06, "loss": 0.5934, "step": 23970 }, { "epoch": 0.61, "grad_norm": 1.3488935232162476, "learning_rate": 6.836025687726142e-06, "loss": 0.4465, "step": 23971 }, { "epoch": 0.61, "grad_norm": 2.5550343990325928, "learning_rate": 6.835238202922052e-06, "loss": 0.7246, "step": 23972 }, { "epoch": 0.61, "grad_norm": 1.9687020778656006, "learning_rate": 6.834450739927512e-06, "loss": 0.6616, "step": 23973 }, { "epoch": 0.61, "grad_norm": 1.2793456315994263, "learning_rate": 6.83366329874795e-06, "loss": 0.6113, "step": 23974 }, { "epoch": 0.61, "grad_norm": 1.717802882194519, "learning_rate": 6.832875879388789e-06, "loss": 0.583, "step": 23975 }, { "epoch": 0.61, "grad_norm": 2.7278356552124023, "learning_rate": 6.832088481855457e-06, "loss": 0.801, "step": 23976 }, { "epoch": 0.61, "grad_norm": 1.387220859527588, "learning_rate": 6.831301106153383e-06, "loss": 0.4026, "step": 23977 }, { "epoch": 0.61, "grad_norm": 1.0795714855194092, "learning_rate": 6.83051375228799e-06, "loss": 0.4351, "step": 23978 }, { "epoch": 0.61, "grad_norm": 3.0987942218780518, "learning_rate": 6.829726420264706e-06, "loss": 0.6524, "step": 23979 }, { "epoch": 0.61, "grad_norm": 2.406017303466797, "learning_rate": 6.828939110088954e-06, "loss": 0.6848, "step": 23980 }, { "epoch": 0.61, "grad_norm": 3.3361449241638184, "learning_rate": 6.828151821766162e-06, "loss": 0.6614, "step": 23981 }, { "epoch": 0.61, "grad_norm": 2.725853681564331, "learning_rate": 6.827364555301757e-06, "loss": 0.663, "step": 23982 }, { "epoch": 0.61, "grad_norm": 1.6338605880737305, "learning_rate": 6.82657731070116e-06, "loss": 0.6379, "step": 23983 }, { "epoch": 0.61, "grad_norm": 1.39536714553833, "learning_rate": 6.8257900879698e-06, "loss": 0.497, "step": 23984 }, { "epoch": 0.61, "grad_norm": 1.5830837488174438, "learning_rate": 6.825002887113099e-06, "loss": 0.6302, "step": 23985 }, { "epoch": 0.61, "grad_norm": 4.374743461608887, "learning_rate": 6.824215708136484e-06, "loss": 0.5678, "step": 23986 }, { "epoch": 0.61, "grad_norm": 1.4419763088226318, "learning_rate": 6.8234285510453795e-06, "loss": 0.6551, "step": 23987 }, { "epoch": 0.61, "grad_norm": 1.1903921365737915, "learning_rate": 6.822641415845208e-06, "loss": 0.3831, "step": 23988 }, { "epoch": 0.61, "grad_norm": 6.484899520874023, "learning_rate": 6.821854302541398e-06, "loss": 0.5586, "step": 23989 }, { "epoch": 0.61, "grad_norm": 1.343283772468567, "learning_rate": 6.8210672111393695e-06, "loss": 0.5061, "step": 23990 }, { "epoch": 0.61, "grad_norm": 2.113856554031372, "learning_rate": 6.820280141644551e-06, "loss": 0.5095, "step": 23991 }, { "epoch": 0.61, "grad_norm": 1.5277353525161743, "learning_rate": 6.819493094062361e-06, "loss": 0.6421, "step": 23992 }, { "epoch": 0.61, "grad_norm": 4.501242160797119, "learning_rate": 6.8187060683982275e-06, "loss": 0.7166, "step": 23993 }, { "epoch": 0.61, "grad_norm": 12.176604270935059, "learning_rate": 6.817919064657575e-06, "loss": 0.7449, "step": 23994 }, { "epoch": 0.62, "grad_norm": 3.6782660484313965, "learning_rate": 6.817132082845823e-06, "loss": 0.7524, "step": 23995 }, { "epoch": 0.62, "grad_norm": 1.4514765739440918, "learning_rate": 6.816345122968399e-06, "loss": 0.7535, "step": 23996 }, { "epoch": 0.62, "grad_norm": 2.1591858863830566, "learning_rate": 6.815558185030722e-06, "loss": 0.6287, "step": 23997 }, { "epoch": 0.62, "grad_norm": 2.7419190406799316, "learning_rate": 6.814771269038216e-06, "loss": 0.543, "step": 23998 }, { "epoch": 0.62, "grad_norm": 1.1291002035140991, "learning_rate": 6.8139843749963096e-06, "loss": 0.3486, "step": 23999 }, { "epoch": 0.62, "grad_norm": 1.5355170965194702, "learning_rate": 6.813197502910418e-06, "loss": 0.4259, "step": 24000 }, { "epoch": 0.62, "grad_norm": 1.425169587135315, "learning_rate": 6.81241065278597e-06, "loss": 0.5571, "step": 24001 }, { "epoch": 0.62, "grad_norm": 1.4754400253295898, "learning_rate": 6.811623824628382e-06, "loss": 0.5445, "step": 24002 }, { "epoch": 0.62, "grad_norm": 1.5738129615783691, "learning_rate": 6.8108370184430816e-06, "loss": 0.5528, "step": 24003 }, { "epoch": 0.62, "grad_norm": 1.032833218574524, "learning_rate": 6.810050234235488e-06, "loss": 0.5132, "step": 24004 }, { "epoch": 0.62, "grad_norm": 2.9987330436706543, "learning_rate": 6.809263472011023e-06, "loss": 0.5719, "step": 24005 }, { "epoch": 0.62, "grad_norm": 4.436177730560303, "learning_rate": 6.808476731775111e-06, "loss": 0.7815, "step": 24006 }, { "epoch": 0.62, "grad_norm": 1.0571975708007812, "learning_rate": 6.807690013533171e-06, "loss": 0.5541, "step": 24007 }, { "epoch": 0.62, "grad_norm": 2.07236647605896, "learning_rate": 6.806903317290628e-06, "loss": 0.5343, "step": 24008 }, { "epoch": 0.62, "grad_norm": 1.8903038501739502, "learning_rate": 6.8061166430529e-06, "loss": 0.5283, "step": 24009 }, { "epoch": 0.62, "grad_norm": 2.0122697353363037, "learning_rate": 6.805329990825408e-06, "loss": 0.522, "step": 24010 }, { "epoch": 0.62, "grad_norm": 1.1793949604034424, "learning_rate": 6.804543360613578e-06, "loss": 0.4377, "step": 24011 }, { "epoch": 0.62, "grad_norm": 2.302412271499634, "learning_rate": 6.803756752422823e-06, "loss": 0.6138, "step": 24012 }, { "epoch": 0.62, "grad_norm": 2.4840691089630127, "learning_rate": 6.802970166258573e-06, "loss": 0.5982, "step": 24013 }, { "epoch": 0.62, "grad_norm": 1.2308871746063232, "learning_rate": 6.802183602126242e-06, "loss": 0.442, "step": 24014 }, { "epoch": 0.62, "grad_norm": 1.3131128549575806, "learning_rate": 6.801397060031254e-06, "loss": 0.4541, "step": 24015 }, { "epoch": 0.62, "grad_norm": 1.3648860454559326, "learning_rate": 6.800610539979024e-06, "loss": 0.6281, "step": 24016 }, { "epoch": 0.62, "grad_norm": 1.1618645191192627, "learning_rate": 6.799824041974978e-06, "loss": 0.5152, "step": 24017 }, { "epoch": 0.62, "grad_norm": 1.3572392463684082, "learning_rate": 6.799037566024537e-06, "loss": 0.3958, "step": 24018 }, { "epoch": 0.62, "grad_norm": 1.3553122282028198, "learning_rate": 6.798251112133114e-06, "loss": 0.5387, "step": 24019 }, { "epoch": 0.62, "grad_norm": 1.2965967655181885, "learning_rate": 6.797464680306136e-06, "loss": 0.4402, "step": 24020 }, { "epoch": 0.62, "grad_norm": 2.2233290672302246, "learning_rate": 6.796678270549018e-06, "loss": 0.517, "step": 24021 }, { "epoch": 0.62, "grad_norm": 2.279101848602295, "learning_rate": 6.795891882867179e-06, "loss": 0.482, "step": 24022 }, { "epoch": 0.62, "grad_norm": 2.0204684734344482, "learning_rate": 6.795105517266043e-06, "loss": 0.6261, "step": 24023 }, { "epoch": 0.62, "grad_norm": 1.6182591915130615, "learning_rate": 6.794319173751025e-06, "loss": 0.5275, "step": 24024 }, { "epoch": 0.62, "grad_norm": 1.384963870048523, "learning_rate": 6.793532852327546e-06, "loss": 0.4189, "step": 24025 }, { "epoch": 0.62, "grad_norm": 2.2017064094543457, "learning_rate": 6.792746553001024e-06, "loss": 0.5162, "step": 24026 }, { "epoch": 0.62, "grad_norm": 1.012210726737976, "learning_rate": 6.791960275776878e-06, "loss": 0.611, "step": 24027 }, { "epoch": 0.62, "grad_norm": 0.9883646368980408, "learning_rate": 6.791174020660524e-06, "loss": 0.4718, "step": 24028 }, { "epoch": 0.62, "grad_norm": 1.4840282201766968, "learning_rate": 6.790387787657384e-06, "loss": 0.6076, "step": 24029 }, { "epoch": 0.62, "grad_norm": 1.3638808727264404, "learning_rate": 6.789601576772876e-06, "loss": 0.5593, "step": 24030 }, { "epoch": 0.62, "grad_norm": 2.753631830215454, "learning_rate": 6.788815388012415e-06, "loss": 0.5694, "step": 24031 }, { "epoch": 0.62, "grad_norm": 1.7186918258666992, "learning_rate": 6.788029221381423e-06, "loss": 0.503, "step": 24032 }, { "epoch": 0.62, "grad_norm": 1.6581273078918457, "learning_rate": 6.787243076885315e-06, "loss": 0.5299, "step": 24033 }, { "epoch": 0.62, "grad_norm": 2.2301673889160156, "learning_rate": 6.786456954529508e-06, "loss": 0.3586, "step": 24034 }, { "epoch": 0.62, "grad_norm": 1.7933766841888428, "learning_rate": 6.7856708543194225e-06, "loss": 0.6572, "step": 24035 }, { "epoch": 0.62, "grad_norm": 4.166688442230225, "learning_rate": 6.784884776260472e-06, "loss": 0.6222, "step": 24036 }, { "epoch": 0.62, "grad_norm": 1.5439292192459106, "learning_rate": 6.7840987203580785e-06, "loss": 0.5644, "step": 24037 }, { "epoch": 0.62, "grad_norm": 2.35758638381958, "learning_rate": 6.7833126866176534e-06, "loss": 0.562, "step": 24038 }, { "epoch": 0.62, "grad_norm": 1.1584209203720093, "learning_rate": 6.782526675044616e-06, "loss": 0.4816, "step": 24039 }, { "epoch": 0.62, "grad_norm": 1.353269338607788, "learning_rate": 6.781740685644387e-06, "loss": 0.5663, "step": 24040 }, { "epoch": 0.62, "grad_norm": 1.8578108549118042, "learning_rate": 6.780954718422377e-06, "loss": 0.5446, "step": 24041 }, { "epoch": 0.62, "grad_norm": 1.4742788076400757, "learning_rate": 6.780168773384006e-06, "loss": 0.5332, "step": 24042 }, { "epoch": 0.62, "grad_norm": 1.9181113243103027, "learning_rate": 6.779382850534686e-06, "loss": 0.6319, "step": 24043 }, { "epoch": 0.62, "grad_norm": 3.5879061222076416, "learning_rate": 6.778596949879839e-06, "loss": 0.6215, "step": 24044 }, { "epoch": 0.62, "grad_norm": 1.6170958280563354, "learning_rate": 6.777811071424876e-06, "loss": 0.5941, "step": 24045 }, { "epoch": 0.62, "grad_norm": 16.902523040771484, "learning_rate": 6.777025215175215e-06, "loss": 0.5834, "step": 24046 }, { "epoch": 0.62, "grad_norm": 1.2239148616790771, "learning_rate": 6.7762393811362716e-06, "loss": 0.5525, "step": 24047 }, { "epoch": 0.62, "grad_norm": 2.576852560043335, "learning_rate": 6.775453569313462e-06, "loss": 0.5546, "step": 24048 }, { "epoch": 0.62, "grad_norm": 1.1234546899795532, "learning_rate": 6.7746677797122e-06, "loss": 0.4607, "step": 24049 }, { "epoch": 0.62, "grad_norm": 1.051648736000061, "learning_rate": 6.7738820123379e-06, "loss": 0.5746, "step": 24050 }, { "epoch": 0.62, "grad_norm": 2.24507474899292, "learning_rate": 6.773096267195978e-06, "loss": 0.4513, "step": 24051 }, { "epoch": 0.62, "grad_norm": 3.1493630409240723, "learning_rate": 6.7723105442918525e-06, "loss": 0.481, "step": 24052 }, { "epoch": 0.62, "grad_norm": 1.79440176486969, "learning_rate": 6.771524843630932e-06, "loss": 0.4852, "step": 24053 }, { "epoch": 0.62, "grad_norm": 1.369185209274292, "learning_rate": 6.770739165218636e-06, "loss": 0.5004, "step": 24054 }, { "epoch": 0.62, "grad_norm": 4.569916725158691, "learning_rate": 6.769953509060374e-06, "loss": 0.576, "step": 24055 }, { "epoch": 0.62, "grad_norm": 1.7129144668579102, "learning_rate": 6.769167875161566e-06, "loss": 0.6805, "step": 24056 }, { "epoch": 0.62, "grad_norm": 5.200899600982666, "learning_rate": 6.768382263527622e-06, "loss": 0.5268, "step": 24057 }, { "epoch": 0.62, "grad_norm": 1.786534309387207, "learning_rate": 6.767596674163957e-06, "loss": 0.5444, "step": 24058 }, { "epoch": 0.62, "grad_norm": 9.486761093139648, "learning_rate": 6.7668111070759855e-06, "loss": 0.4026, "step": 24059 }, { "epoch": 0.62, "grad_norm": 5.174485683441162, "learning_rate": 6.766025562269119e-06, "loss": 0.5854, "step": 24060 }, { "epoch": 0.62, "grad_norm": 3.5880963802337646, "learning_rate": 6.765240039748775e-06, "loss": 0.8039, "step": 24061 }, { "epoch": 0.62, "grad_norm": 4.225072860717773, "learning_rate": 6.764454539520362e-06, "loss": 0.6169, "step": 24062 }, { "epoch": 0.62, "grad_norm": 1.561317801475525, "learning_rate": 6.763669061589296e-06, "loss": 0.5392, "step": 24063 }, { "epoch": 0.62, "grad_norm": 1.6096447706222534, "learning_rate": 6.762883605960992e-06, "loss": 0.5772, "step": 24064 }, { "epoch": 0.62, "grad_norm": 5.049702167510986, "learning_rate": 6.762098172640856e-06, "loss": 0.4534, "step": 24065 }, { "epoch": 0.62, "grad_norm": 9.465691566467285, "learning_rate": 6.76131276163431e-06, "loss": 0.4894, "step": 24066 }, { "epoch": 0.62, "grad_norm": 1.1280633211135864, "learning_rate": 6.760527372946759e-06, "loss": 0.4786, "step": 24067 }, { "epoch": 0.62, "grad_norm": 1.6774661540985107, "learning_rate": 6.759742006583619e-06, "loss": 0.5506, "step": 24068 }, { "epoch": 0.62, "grad_norm": 1.0582278966903687, "learning_rate": 6.7589566625503e-06, "loss": 0.461, "step": 24069 }, { "epoch": 0.62, "grad_norm": 1.3734277486801147, "learning_rate": 6.758171340852216e-06, "loss": 0.3932, "step": 24070 }, { "epoch": 0.62, "grad_norm": 2.2037062644958496, "learning_rate": 6.75738604149478e-06, "loss": 0.6281, "step": 24071 }, { "epoch": 0.62, "grad_norm": 17.03104019165039, "learning_rate": 6.7566007644834005e-06, "loss": 0.7336, "step": 24072 }, { "epoch": 0.62, "grad_norm": 1.195788025856018, "learning_rate": 6.755815509823492e-06, "loss": 0.5344, "step": 24073 }, { "epoch": 0.62, "grad_norm": 1.2050328254699707, "learning_rate": 6.755030277520464e-06, "loss": 0.4605, "step": 24074 }, { "epoch": 0.62, "grad_norm": 1.2996207475662231, "learning_rate": 6.754245067579728e-06, "loss": 0.5387, "step": 24075 }, { "epoch": 0.62, "grad_norm": 2.56925630569458, "learning_rate": 6.753459880006697e-06, "loss": 0.6724, "step": 24076 }, { "epoch": 0.62, "grad_norm": 1.9197908639907837, "learning_rate": 6.7526747148067795e-06, "loss": 0.6956, "step": 24077 }, { "epoch": 0.62, "grad_norm": 2.1674466133117676, "learning_rate": 6.751889571985391e-06, "loss": 0.3117, "step": 24078 }, { "epoch": 0.62, "grad_norm": 1.6882809400558472, "learning_rate": 6.751104451547934e-06, "loss": 0.4571, "step": 24079 }, { "epoch": 0.62, "grad_norm": 3.9136769771575928, "learning_rate": 6.7503193534998254e-06, "loss": 0.4368, "step": 24080 }, { "epoch": 0.62, "grad_norm": 2.8123481273651123, "learning_rate": 6.7495342778464765e-06, "loss": 0.7577, "step": 24081 }, { "epoch": 0.62, "grad_norm": 2.504626750946045, "learning_rate": 6.748749224593294e-06, "loss": 0.7505, "step": 24082 }, { "epoch": 0.62, "grad_norm": 2.383928060531616, "learning_rate": 6.747964193745691e-06, "loss": 0.5128, "step": 24083 }, { "epoch": 0.62, "grad_norm": 1.7732130289077759, "learning_rate": 6.747179185309072e-06, "loss": 0.4751, "step": 24084 }, { "epoch": 0.62, "grad_norm": 1.6060088872909546, "learning_rate": 6.746394199288853e-06, "loss": 0.6726, "step": 24085 }, { "epoch": 0.62, "grad_norm": 1.6588952541351318, "learning_rate": 6.74560923569044e-06, "loss": 0.3867, "step": 24086 }, { "epoch": 0.62, "grad_norm": 1.5052341222763062, "learning_rate": 6.7448242945192435e-06, "loss": 0.5439, "step": 24087 }, { "epoch": 0.62, "grad_norm": 2.33353590965271, "learning_rate": 6.744039375780673e-06, "loss": 0.5727, "step": 24088 }, { "epoch": 0.62, "grad_norm": 1.607951045036316, "learning_rate": 6.743254479480137e-06, "loss": 0.4554, "step": 24089 }, { "epoch": 0.62, "grad_norm": 1.2981040477752686, "learning_rate": 6.742469605623048e-06, "loss": 0.4601, "step": 24090 }, { "epoch": 0.62, "grad_norm": 2.241919755935669, "learning_rate": 6.741684754214808e-06, "loss": 0.6163, "step": 24091 }, { "epoch": 0.62, "grad_norm": 2.1936049461364746, "learning_rate": 6.740899925260831e-06, "loss": 0.759, "step": 24092 }, { "epoch": 0.62, "grad_norm": 2.6575279235839844, "learning_rate": 6.740115118766526e-06, "loss": 0.51, "step": 24093 }, { "epoch": 0.62, "grad_norm": 1.7649034261703491, "learning_rate": 6.739330334737297e-06, "loss": 0.5055, "step": 24094 }, { "epoch": 0.62, "grad_norm": 1.997157335281372, "learning_rate": 6.738545573178556e-06, "loss": 0.4621, "step": 24095 }, { "epoch": 0.62, "grad_norm": 0.8968907594680786, "learning_rate": 6.737760834095712e-06, "loss": 0.5273, "step": 24096 }, { "epoch": 0.62, "grad_norm": 1.7416454553604126, "learning_rate": 6.73697611749417e-06, "loss": 0.4667, "step": 24097 }, { "epoch": 0.62, "grad_norm": 1.2088574171066284, "learning_rate": 6.736191423379337e-06, "loss": 0.5993, "step": 24098 }, { "epoch": 0.62, "grad_norm": 2.5570247173309326, "learning_rate": 6.735406751756621e-06, "loss": 0.606, "step": 24099 }, { "epoch": 0.62, "grad_norm": 1.0890398025512695, "learning_rate": 6.7346221026314315e-06, "loss": 0.4082, "step": 24100 }, { "epoch": 0.62, "grad_norm": 1.1208572387695312, "learning_rate": 6.733837476009175e-06, "loss": 0.4925, "step": 24101 }, { "epoch": 0.62, "grad_norm": 8.490772247314453, "learning_rate": 6.7330528718952586e-06, "loss": 0.5331, "step": 24102 }, { "epoch": 0.62, "grad_norm": 4.084012985229492, "learning_rate": 6.732268290295091e-06, "loss": 0.5745, "step": 24103 }, { "epoch": 0.62, "grad_norm": 1.2620269060134888, "learning_rate": 6.731483731214074e-06, "loss": 0.5078, "step": 24104 }, { "epoch": 0.62, "grad_norm": 1.7772961854934692, "learning_rate": 6.7306991946576195e-06, "loss": 0.4426, "step": 24105 }, { "epoch": 0.62, "grad_norm": 1.2509827613830566, "learning_rate": 6.729914680631133e-06, "loss": 0.5457, "step": 24106 }, { "epoch": 0.62, "grad_norm": 2.032308340072632, "learning_rate": 6.729130189140019e-06, "loss": 0.4738, "step": 24107 }, { "epoch": 0.62, "grad_norm": 1.4677224159240723, "learning_rate": 6.7283457201896865e-06, "loss": 0.5588, "step": 24108 }, { "epoch": 0.62, "grad_norm": 1.3062105178833008, "learning_rate": 6.727561273785537e-06, "loss": 0.53, "step": 24109 }, { "epoch": 0.62, "grad_norm": 8.279508590698242, "learning_rate": 6.726776849932982e-06, "loss": 0.6954, "step": 24110 }, { "epoch": 0.62, "grad_norm": 1.154750108718872, "learning_rate": 6.725992448637422e-06, "loss": 0.4891, "step": 24111 }, { "epoch": 0.62, "grad_norm": 1.5461786985397339, "learning_rate": 6.725208069904265e-06, "loss": 0.5046, "step": 24112 }, { "epoch": 0.62, "grad_norm": 1.7334563732147217, "learning_rate": 6.7244237137389205e-06, "loss": 0.6174, "step": 24113 }, { "epoch": 0.62, "grad_norm": 1.6548329591751099, "learning_rate": 6.723639380146786e-06, "loss": 0.6596, "step": 24114 }, { "epoch": 0.62, "grad_norm": 2.852538585662842, "learning_rate": 6.722855069133273e-06, "loss": 0.4674, "step": 24115 }, { "epoch": 0.62, "grad_norm": 4.019576549530029, "learning_rate": 6.722070780703782e-06, "loss": 0.6897, "step": 24116 }, { "epoch": 0.62, "grad_norm": 3.4371883869171143, "learning_rate": 6.721286514863718e-06, "loss": 0.5281, "step": 24117 }, { "epoch": 0.62, "grad_norm": 2.831786870956421, "learning_rate": 6.720502271618492e-06, "loss": 0.5711, "step": 24118 }, { "epoch": 0.62, "grad_norm": 1.6974149942398071, "learning_rate": 6.7197180509735e-06, "loss": 0.6657, "step": 24119 }, { "epoch": 0.62, "grad_norm": 1.8889830112457275, "learning_rate": 6.718933852934154e-06, "loss": 0.4899, "step": 24120 }, { "epoch": 0.62, "grad_norm": 3.4315505027770996, "learning_rate": 6.718149677505852e-06, "loss": 0.4932, "step": 24121 }, { "epoch": 0.62, "grad_norm": 1.1864001750946045, "learning_rate": 6.717365524694e-06, "loss": 0.5023, "step": 24122 }, { "epoch": 0.62, "grad_norm": 6.0890655517578125, "learning_rate": 6.716581394504005e-06, "loss": 0.4579, "step": 24123 }, { "epoch": 0.62, "grad_norm": 1.4973692893981934, "learning_rate": 6.715797286941266e-06, "loss": 0.5729, "step": 24124 }, { "epoch": 0.62, "grad_norm": 1.7227058410644531, "learning_rate": 6.715013202011191e-06, "loss": 0.7142, "step": 24125 }, { "epoch": 0.62, "grad_norm": 2.248594284057617, "learning_rate": 6.714229139719179e-06, "loss": 0.525, "step": 24126 }, { "epoch": 0.62, "grad_norm": 1.2424567937850952, "learning_rate": 6.713445100070637e-06, "loss": 0.5214, "step": 24127 }, { "epoch": 0.62, "grad_norm": 1.2108711004257202, "learning_rate": 6.712661083070966e-06, "loss": 0.3532, "step": 24128 }, { "epoch": 0.62, "grad_norm": 1.3073087930679321, "learning_rate": 6.711877088725567e-06, "loss": 0.4601, "step": 24129 }, { "epoch": 0.62, "grad_norm": 1.1568537950515747, "learning_rate": 6.71109311703985e-06, "loss": 0.4235, "step": 24130 }, { "epoch": 0.62, "grad_norm": 1.8120063543319702, "learning_rate": 6.710309168019209e-06, "loss": 0.4826, "step": 24131 }, { "epoch": 0.62, "grad_norm": 1.4868199825286865, "learning_rate": 6.709525241669053e-06, "loss": 0.6027, "step": 24132 }, { "epoch": 0.62, "grad_norm": 1.2625296115875244, "learning_rate": 6.708741337994779e-06, "loss": 0.4545, "step": 24133 }, { "epoch": 0.62, "grad_norm": 2.5221683979034424, "learning_rate": 6.707957457001795e-06, "loss": 0.6622, "step": 24134 }, { "epoch": 0.62, "grad_norm": 2.7777340412139893, "learning_rate": 6.707173598695499e-06, "loss": 0.6876, "step": 24135 }, { "epoch": 0.62, "grad_norm": 1.7765246629714966, "learning_rate": 6.706389763081292e-06, "loss": 0.6134, "step": 24136 }, { "epoch": 0.62, "grad_norm": 1.55404794216156, "learning_rate": 6.70560595016458e-06, "loss": 0.6135, "step": 24137 }, { "epoch": 0.62, "grad_norm": 2.0785481929779053, "learning_rate": 6.70482215995076e-06, "loss": 0.5582, "step": 24138 }, { "epoch": 0.62, "grad_norm": 5.224449157714844, "learning_rate": 6.704038392445238e-06, "loss": 0.4768, "step": 24139 }, { "epoch": 0.62, "grad_norm": 1.8124301433563232, "learning_rate": 6.70325464765341e-06, "loss": 0.4417, "step": 24140 }, { "epoch": 0.62, "grad_norm": 1.332143783569336, "learning_rate": 6.702470925580679e-06, "loss": 0.5057, "step": 24141 }, { "epoch": 0.62, "grad_norm": 0.9619943499565125, "learning_rate": 6.701687226232451e-06, "loss": 0.5175, "step": 24142 }, { "epoch": 0.62, "grad_norm": 1.1486822366714478, "learning_rate": 6.70090354961412e-06, "loss": 0.3684, "step": 24143 }, { "epoch": 0.62, "grad_norm": 1.4631953239440918, "learning_rate": 6.70011989573109e-06, "loss": 0.6735, "step": 24144 }, { "epoch": 0.62, "grad_norm": 3.3456172943115234, "learning_rate": 6.699336264588758e-06, "loss": 0.6021, "step": 24145 }, { "epoch": 0.62, "grad_norm": 8.622075080871582, "learning_rate": 6.698552656192527e-06, "loss": 0.7754, "step": 24146 }, { "epoch": 0.62, "grad_norm": 1.8172430992126465, "learning_rate": 6.697769070547799e-06, "loss": 0.5526, "step": 24147 }, { "epoch": 0.62, "grad_norm": 1.2798655033111572, "learning_rate": 6.69698550765997e-06, "loss": 0.5096, "step": 24148 }, { "epoch": 0.62, "grad_norm": 1.2390809059143066, "learning_rate": 6.6962019675344435e-06, "loss": 0.5192, "step": 24149 }, { "epoch": 0.62, "grad_norm": 1.3174042701721191, "learning_rate": 6.695418450176616e-06, "loss": 0.6238, "step": 24150 }, { "epoch": 0.62, "grad_norm": 1.1471368074417114, "learning_rate": 6.694634955591891e-06, "loss": 0.4899, "step": 24151 }, { "epoch": 0.62, "grad_norm": 2.221266269683838, "learning_rate": 6.693851483785663e-06, "loss": 0.6864, "step": 24152 }, { "epoch": 0.62, "grad_norm": 1.5724927186965942, "learning_rate": 6.693068034763333e-06, "loss": 0.6614, "step": 24153 }, { "epoch": 0.62, "grad_norm": 1.4976390600204468, "learning_rate": 6.692284608530301e-06, "loss": 0.5915, "step": 24154 }, { "epoch": 0.62, "grad_norm": 4.489410400390625, "learning_rate": 6.691501205091966e-06, "loss": 0.5511, "step": 24155 }, { "epoch": 0.62, "grad_norm": 1.4170923233032227, "learning_rate": 6.690717824453726e-06, "loss": 0.5854, "step": 24156 }, { "epoch": 0.62, "grad_norm": 1.9044197797775269, "learning_rate": 6.689934466620979e-06, "loss": 0.495, "step": 24157 }, { "epoch": 0.62, "grad_norm": 1.0164846181869507, "learning_rate": 6.6891511315991235e-06, "loss": 0.4283, "step": 24158 }, { "epoch": 0.62, "grad_norm": 1.7988238334655762, "learning_rate": 6.68836781939356e-06, "loss": 0.6496, "step": 24159 }, { "epoch": 0.62, "grad_norm": 2.5050580501556396, "learning_rate": 6.687584530009683e-06, "loss": 0.7514, "step": 24160 }, { "epoch": 0.62, "grad_norm": 1.719908595085144, "learning_rate": 6.686801263452896e-06, "loss": 0.5506, "step": 24161 }, { "epoch": 0.62, "grad_norm": 1.8086930513381958, "learning_rate": 6.686018019728588e-06, "loss": 0.5733, "step": 24162 }, { "epoch": 0.62, "grad_norm": 1.973888635635376, "learning_rate": 6.685234798842166e-06, "loss": 0.5384, "step": 24163 }, { "epoch": 0.62, "grad_norm": 6.585511207580566, "learning_rate": 6.684451600799022e-06, "loss": 0.62, "step": 24164 }, { "epoch": 0.62, "grad_norm": 7.203404426574707, "learning_rate": 6.683668425604552e-06, "loss": 0.7546, "step": 24165 }, { "epoch": 0.62, "grad_norm": 1.4387904405593872, "learning_rate": 6.68288527326416e-06, "loss": 0.4933, "step": 24166 }, { "epoch": 0.62, "grad_norm": 1.4824135303497314, "learning_rate": 6.682102143783236e-06, "loss": 0.6143, "step": 24167 }, { "epoch": 0.62, "grad_norm": 1.3961163759231567, "learning_rate": 6.681319037167181e-06, "loss": 0.5787, "step": 24168 }, { "epoch": 0.62, "grad_norm": 1.3163013458251953, "learning_rate": 6.680535953421389e-06, "loss": 0.5321, "step": 24169 }, { "epoch": 0.62, "grad_norm": 2.750758409500122, "learning_rate": 6.6797528925512564e-06, "loss": 0.4938, "step": 24170 }, { "epoch": 0.62, "grad_norm": 1.1468294858932495, "learning_rate": 6.678969854562184e-06, "loss": 0.4559, "step": 24171 }, { "epoch": 0.62, "grad_norm": 4.785669803619385, "learning_rate": 6.678186839459562e-06, "loss": 0.6004, "step": 24172 }, { "epoch": 0.62, "grad_norm": 8.405019760131836, "learning_rate": 6.677403847248792e-06, "loss": 0.5867, "step": 24173 }, { "epoch": 0.62, "grad_norm": 1.0942916870117188, "learning_rate": 6.676620877935265e-06, "loss": 0.5629, "step": 24174 }, { "epoch": 0.62, "grad_norm": 4.83272647857666, "learning_rate": 6.67583793152438e-06, "loss": 0.5412, "step": 24175 }, { "epoch": 0.62, "grad_norm": 5.976644992828369, "learning_rate": 6.675055008021532e-06, "loss": 0.5482, "step": 24176 }, { "epoch": 0.62, "grad_norm": 1.33652663230896, "learning_rate": 6.674272107432113e-06, "loss": 0.4365, "step": 24177 }, { "epoch": 0.62, "grad_norm": 1.3145939111709595, "learning_rate": 6.673489229761526e-06, "loss": 0.6135, "step": 24178 }, { "epoch": 0.62, "grad_norm": 1.2546758651733398, "learning_rate": 6.672706375015159e-06, "loss": 0.4297, "step": 24179 }, { "epoch": 0.62, "grad_norm": 6.713487148284912, "learning_rate": 6.67192354319841e-06, "loss": 0.6969, "step": 24180 }, { "epoch": 0.62, "grad_norm": 2.366960048675537, "learning_rate": 6.67114073431667e-06, "loss": 0.6397, "step": 24181 }, { "epoch": 0.62, "grad_norm": 5.132049083709717, "learning_rate": 6.670357948375339e-06, "loss": 0.6836, "step": 24182 }, { "epoch": 0.62, "grad_norm": 4.28316593170166, "learning_rate": 6.669575185379811e-06, "loss": 0.6182, "step": 24183 }, { "epoch": 0.62, "grad_norm": 1.6620166301727295, "learning_rate": 6.668792445335475e-06, "loss": 0.4945, "step": 24184 }, { "epoch": 0.62, "grad_norm": 13.05023193359375, "learning_rate": 6.668009728247731e-06, "loss": 0.5719, "step": 24185 }, { "epoch": 0.62, "grad_norm": 1.475242018699646, "learning_rate": 6.667227034121968e-06, "loss": 0.5078, "step": 24186 }, { "epoch": 0.62, "grad_norm": 5.707156181335449, "learning_rate": 6.666444362963585e-06, "loss": 0.6007, "step": 24187 }, { "epoch": 0.62, "grad_norm": 2.6683993339538574, "learning_rate": 6.665661714777971e-06, "loss": 0.588, "step": 24188 }, { "epoch": 0.62, "grad_norm": 1.7331416606903076, "learning_rate": 6.664879089570523e-06, "loss": 0.6442, "step": 24189 }, { "epoch": 0.62, "grad_norm": 2.8249106407165527, "learning_rate": 6.664096487346634e-06, "loss": 0.6558, "step": 24190 }, { "epoch": 0.62, "grad_norm": 0.8349547386169434, "learning_rate": 6.6633139081116934e-06, "loss": 0.4802, "step": 24191 }, { "epoch": 0.62, "grad_norm": 2.0258028507232666, "learning_rate": 6.6625313518711e-06, "loss": 0.3058, "step": 24192 }, { "epoch": 0.62, "grad_norm": 2.953751802444458, "learning_rate": 6.661748818630242e-06, "loss": 0.5595, "step": 24193 }, { "epoch": 0.62, "grad_norm": 5.773693561553955, "learning_rate": 6.660966308394513e-06, "loss": 0.5821, "step": 24194 }, { "epoch": 0.62, "grad_norm": 9.56319808959961, "learning_rate": 6.660183821169308e-06, "loss": 0.4519, "step": 24195 }, { "epoch": 0.62, "grad_norm": 1.3436890840530396, "learning_rate": 6.659401356960018e-06, "loss": 0.4217, "step": 24196 }, { "epoch": 0.62, "grad_norm": 1.6388462781906128, "learning_rate": 6.658618915772035e-06, "loss": 0.683, "step": 24197 }, { "epoch": 0.62, "grad_norm": 1.1610687971115112, "learning_rate": 6.65783649761075e-06, "loss": 0.6448, "step": 24198 }, { "epoch": 0.62, "grad_norm": 1.2854783535003662, "learning_rate": 6.657054102481555e-06, "loss": 0.5884, "step": 24199 }, { "epoch": 0.62, "grad_norm": 7.715897560119629, "learning_rate": 6.656271730389846e-06, "loss": 0.5527, "step": 24200 }, { "epoch": 0.62, "grad_norm": 4.691333293914795, "learning_rate": 6.655489381341009e-06, "loss": 0.5643, "step": 24201 }, { "epoch": 0.62, "grad_norm": 1.164109706878662, "learning_rate": 6.654707055340441e-06, "loss": 0.5503, "step": 24202 }, { "epoch": 0.62, "grad_norm": 1.4905502796173096, "learning_rate": 6.653924752393528e-06, "loss": 0.5586, "step": 24203 }, { "epoch": 0.62, "grad_norm": 1.446709156036377, "learning_rate": 6.653142472505665e-06, "loss": 0.4865, "step": 24204 }, { "epoch": 0.62, "grad_norm": 0.8518725633621216, "learning_rate": 6.652360215682239e-06, "loss": 0.441, "step": 24205 }, { "epoch": 0.62, "grad_norm": 1.2356767654418945, "learning_rate": 6.651577981928643e-06, "loss": 0.5676, "step": 24206 }, { "epoch": 0.62, "grad_norm": 5.195384502410889, "learning_rate": 6.65079577125027e-06, "loss": 0.5134, "step": 24207 }, { "epoch": 0.62, "grad_norm": 2.385965347290039, "learning_rate": 6.650013583652507e-06, "loss": 0.6234, "step": 24208 }, { "epoch": 0.62, "grad_norm": 1.4724148511886597, "learning_rate": 6.649231419140747e-06, "loss": 0.633, "step": 24209 }, { "epoch": 0.62, "grad_norm": 1.7085902690887451, "learning_rate": 6.648449277720377e-06, "loss": 0.5368, "step": 24210 }, { "epoch": 0.62, "grad_norm": 1.702492594718933, "learning_rate": 6.64766715939679e-06, "loss": 0.5555, "step": 24211 }, { "epoch": 0.62, "grad_norm": 6.904205322265625, "learning_rate": 6.6468850641753755e-06, "loss": 0.5837, "step": 24212 }, { "epoch": 0.62, "grad_norm": 1.4423223733901978, "learning_rate": 6.646102992061521e-06, "loss": 0.6143, "step": 24213 }, { "epoch": 0.62, "grad_norm": 1.1936873197555542, "learning_rate": 6.6453209430606204e-06, "loss": 0.3798, "step": 24214 }, { "epoch": 0.62, "grad_norm": 1.2152420282363892, "learning_rate": 6.6445389171780575e-06, "loss": 0.4767, "step": 24215 }, { "epoch": 0.62, "grad_norm": 1.2619898319244385, "learning_rate": 6.643756914419224e-06, "loss": 0.5292, "step": 24216 }, { "epoch": 0.62, "grad_norm": 2.5962955951690674, "learning_rate": 6.6429749347895115e-06, "loss": 0.5915, "step": 24217 }, { "epoch": 0.62, "grad_norm": 1.5441080331802368, "learning_rate": 6.6421929782943036e-06, "loss": 0.6575, "step": 24218 }, { "epoch": 0.62, "grad_norm": 1.4743542671203613, "learning_rate": 6.641411044938996e-06, "loss": 0.5082, "step": 24219 }, { "epoch": 0.62, "grad_norm": 1.0002422332763672, "learning_rate": 6.6406291347289705e-06, "loss": 0.5899, "step": 24220 }, { "epoch": 0.62, "grad_norm": 1.3400708436965942, "learning_rate": 6.63984724766962e-06, "loss": 0.4989, "step": 24221 }, { "epoch": 0.62, "grad_norm": 1.1166950464248657, "learning_rate": 6.63906538376633e-06, "loss": 0.408, "step": 24222 }, { "epoch": 0.62, "grad_norm": 1.1352993249893188, "learning_rate": 6.63828354302449e-06, "loss": 0.3547, "step": 24223 }, { "epoch": 0.62, "grad_norm": 1.145598292350769, "learning_rate": 6.637501725449489e-06, "loss": 0.5681, "step": 24224 }, { "epoch": 0.62, "grad_norm": 17.89838409423828, "learning_rate": 6.636719931046711e-06, "loss": 0.7234, "step": 24225 }, { "epoch": 0.62, "grad_norm": 1.091263771057129, "learning_rate": 6.63593815982155e-06, "loss": 0.5851, "step": 24226 }, { "epoch": 0.62, "grad_norm": 5.7524733543396, "learning_rate": 6.635156411779386e-06, "loss": 0.7193, "step": 24227 }, { "epoch": 0.62, "grad_norm": 1.220110297203064, "learning_rate": 6.634374686925611e-06, "loss": 0.5776, "step": 24228 }, { "epoch": 0.62, "grad_norm": 1.2071176767349243, "learning_rate": 6.633592985265613e-06, "loss": 0.4443, "step": 24229 }, { "epoch": 0.62, "grad_norm": 1.5464849472045898, "learning_rate": 6.632811306804776e-06, "loss": 0.5629, "step": 24230 }, { "epoch": 0.62, "grad_norm": 2.819542407989502, "learning_rate": 6.632029651548488e-06, "loss": 0.44, "step": 24231 }, { "epoch": 0.62, "grad_norm": 7.895538806915283, "learning_rate": 6.6312480195021355e-06, "loss": 0.6816, "step": 24232 }, { "epoch": 0.62, "grad_norm": 12.11933422088623, "learning_rate": 6.630466410671106e-06, "loss": 0.5534, "step": 24233 }, { "epoch": 0.62, "grad_norm": 1.268133521080017, "learning_rate": 6.629684825060784e-06, "loss": 0.4043, "step": 24234 }, { "epoch": 0.62, "grad_norm": 18.037256240844727, "learning_rate": 6.628903262676556e-06, "loss": 0.7116, "step": 24235 }, { "epoch": 0.62, "grad_norm": 1.8636198043823242, "learning_rate": 6.628121723523811e-06, "loss": 0.4658, "step": 24236 }, { "epoch": 0.62, "grad_norm": 5.853795051574707, "learning_rate": 6.627340207607931e-06, "loss": 0.3547, "step": 24237 }, { "epoch": 0.62, "grad_norm": 1.337327480316162, "learning_rate": 6.6265587149343055e-06, "loss": 0.6524, "step": 24238 }, { "epoch": 0.62, "grad_norm": 1.1709460020065308, "learning_rate": 6.625777245508316e-06, "loss": 0.521, "step": 24239 }, { "epoch": 0.62, "grad_norm": 3.9220526218414307, "learning_rate": 6.624995799335349e-06, "loss": 0.4492, "step": 24240 }, { "epoch": 0.62, "grad_norm": 2.205925941467285, "learning_rate": 6.624214376420792e-06, "loss": 0.7269, "step": 24241 }, { "epoch": 0.62, "grad_norm": 5.277402400970459, "learning_rate": 6.623432976770028e-06, "loss": 0.6175, "step": 24242 }, { "epoch": 0.62, "grad_norm": 2.201343536376953, "learning_rate": 6.622651600388444e-06, "loss": 0.7854, "step": 24243 }, { "epoch": 0.62, "grad_norm": 2.928001642227173, "learning_rate": 6.621870247281421e-06, "loss": 0.5206, "step": 24244 }, { "epoch": 0.62, "grad_norm": 3.6376302242279053, "learning_rate": 6.621088917454349e-06, "loss": 0.6783, "step": 24245 }, { "epoch": 0.62, "grad_norm": 1.2556108236312866, "learning_rate": 6.620307610912606e-06, "loss": 0.5558, "step": 24246 }, { "epoch": 0.62, "grad_norm": 1.5361289978027344, "learning_rate": 6.61952632766158e-06, "loss": 0.6077, "step": 24247 }, { "epoch": 0.62, "grad_norm": 1.6181386709213257, "learning_rate": 6.6187450677066575e-06, "loss": 0.6555, "step": 24248 }, { "epoch": 0.62, "grad_norm": 1.7238926887512207, "learning_rate": 6.617963831053218e-06, "loss": 0.5501, "step": 24249 }, { "epoch": 0.62, "grad_norm": 1.584944725036621, "learning_rate": 6.617182617706648e-06, "loss": 0.6258, "step": 24250 }, { "epoch": 0.62, "grad_norm": 1.2600311040878296, "learning_rate": 6.616401427672327e-06, "loss": 0.4622, "step": 24251 }, { "epoch": 0.62, "grad_norm": 3.0135459899902344, "learning_rate": 6.615620260955644e-06, "loss": 0.8335, "step": 24252 }, { "epoch": 0.62, "grad_norm": 1.8027440309524536, "learning_rate": 6.614839117561981e-06, "loss": 0.4136, "step": 24253 }, { "epoch": 0.62, "grad_norm": 1.8128256797790527, "learning_rate": 6.614057997496718e-06, "loss": 0.479, "step": 24254 }, { "epoch": 0.62, "grad_norm": 2.3993775844573975, "learning_rate": 6.613276900765242e-06, "loss": 0.5856, "step": 24255 }, { "epoch": 0.62, "grad_norm": 1.371161699295044, "learning_rate": 6.612495827372931e-06, "loss": 0.527, "step": 24256 }, { "epoch": 0.62, "grad_norm": 1.8143479824066162, "learning_rate": 6.611714777325174e-06, "loss": 0.5057, "step": 24257 }, { "epoch": 0.62, "grad_norm": 8.050761222839355, "learning_rate": 6.610933750627346e-06, "loss": 0.6868, "step": 24258 }, { "epoch": 0.62, "grad_norm": 1.8245136737823486, "learning_rate": 6.610152747284835e-06, "loss": 0.5765, "step": 24259 }, { "epoch": 0.62, "grad_norm": 1.478996753692627, "learning_rate": 6.609371767303023e-06, "loss": 0.6108, "step": 24260 }, { "epoch": 0.62, "grad_norm": 1.045113444328308, "learning_rate": 6.6085908106872885e-06, "loss": 0.5631, "step": 24261 }, { "epoch": 0.62, "grad_norm": 4.830283164978027, "learning_rate": 6.607809877443019e-06, "loss": 0.7309, "step": 24262 }, { "epoch": 0.62, "grad_norm": 2.01265025138855, "learning_rate": 6.607028967575589e-06, "loss": 0.6898, "step": 24263 }, { "epoch": 0.62, "grad_norm": 2.0931146144866943, "learning_rate": 6.6062480810903824e-06, "loss": 0.6364, "step": 24264 }, { "epoch": 0.62, "grad_norm": 4.01942253112793, "learning_rate": 6.605467217992787e-06, "loss": 0.5043, "step": 24265 }, { "epoch": 0.62, "grad_norm": 3.538374900817871, "learning_rate": 6.604686378288174e-06, "loss": 0.4647, "step": 24266 }, { "epoch": 0.62, "grad_norm": 1.1740686893463135, "learning_rate": 6.603905561981934e-06, "loss": 0.4841, "step": 24267 }, { "epoch": 0.62, "grad_norm": 1.0912667512893677, "learning_rate": 6.603124769079439e-06, "loss": 0.3602, "step": 24268 }, { "epoch": 0.62, "grad_norm": 1.9148508310317993, "learning_rate": 6.602343999586075e-06, "loss": 0.8587, "step": 24269 }, { "epoch": 0.62, "grad_norm": 4.281744480133057, "learning_rate": 6.601563253507224e-06, "loss": 0.7755, "step": 24270 }, { "epoch": 0.62, "grad_norm": 1.0353821516036987, "learning_rate": 6.600782530848261e-06, "loss": 0.4977, "step": 24271 }, { "epoch": 0.62, "grad_norm": 1.7520970106124878, "learning_rate": 6.6000018316145706e-06, "loss": 0.4332, "step": 24272 }, { "epoch": 0.62, "grad_norm": 1.2760361433029175, "learning_rate": 6.5992211558115305e-06, "loss": 0.4767, "step": 24273 }, { "epoch": 0.62, "grad_norm": 2.5747580528259277, "learning_rate": 6.598440503444524e-06, "loss": 0.47, "step": 24274 }, { "epoch": 0.62, "grad_norm": 1.2483580112457275, "learning_rate": 6.5976598745189266e-06, "loss": 0.6423, "step": 24275 }, { "epoch": 0.62, "grad_norm": 2.1291563510894775, "learning_rate": 6.596879269040119e-06, "loss": 0.6336, "step": 24276 }, { "epoch": 0.62, "grad_norm": 1.7073613405227661, "learning_rate": 6.596098687013483e-06, "loss": 0.5713, "step": 24277 }, { "epoch": 0.62, "grad_norm": 3.431018114089966, "learning_rate": 6.595318128444396e-06, "loss": 0.5452, "step": 24278 }, { "epoch": 0.62, "grad_norm": 1.5220273733139038, "learning_rate": 6.594537593338237e-06, "loss": 0.4831, "step": 24279 }, { "epoch": 0.62, "grad_norm": 1.3918805122375488, "learning_rate": 6.593757081700385e-06, "loss": 0.6301, "step": 24280 }, { "epoch": 0.62, "grad_norm": 2.124112129211426, "learning_rate": 6.59297659353622e-06, "loss": 0.4874, "step": 24281 }, { "epoch": 0.62, "grad_norm": 1.701892375946045, "learning_rate": 6.59219612885112e-06, "loss": 0.5685, "step": 24282 }, { "epoch": 0.62, "grad_norm": 1.2178765535354614, "learning_rate": 6.591415687650462e-06, "loss": 0.4266, "step": 24283 }, { "epoch": 0.62, "grad_norm": 2.016521453857422, "learning_rate": 6.590635269939628e-06, "loss": 0.4116, "step": 24284 }, { "epoch": 0.62, "grad_norm": 1.7550008296966553, "learning_rate": 6.589854875723991e-06, "loss": 0.7431, "step": 24285 }, { "epoch": 0.62, "grad_norm": 1.2665324211120605, "learning_rate": 6.589074505008935e-06, "loss": 0.6157, "step": 24286 }, { "epoch": 0.62, "grad_norm": 1.2616069316864014, "learning_rate": 6.58829415779983e-06, "loss": 0.5153, "step": 24287 }, { "epoch": 0.62, "grad_norm": 1.9483346939086914, "learning_rate": 6.5875138341020616e-06, "loss": 0.4866, "step": 24288 }, { "epoch": 0.62, "grad_norm": 1.4821279048919678, "learning_rate": 6.586733533921004e-06, "loss": 0.6427, "step": 24289 }, { "epoch": 0.62, "grad_norm": 1.672752022743225, "learning_rate": 6.5859532572620325e-06, "loss": 0.4513, "step": 24290 }, { "epoch": 0.62, "grad_norm": 2.1694464683532715, "learning_rate": 6.585173004130529e-06, "loss": 0.5023, "step": 24291 }, { "epoch": 0.62, "grad_norm": 1.6397958993911743, "learning_rate": 6.584392774531865e-06, "loss": 0.5692, "step": 24292 }, { "epoch": 0.62, "grad_norm": 8.701528549194336, "learning_rate": 6.58361256847142e-06, "loss": 0.7692, "step": 24293 }, { "epoch": 0.62, "grad_norm": 1.19671630859375, "learning_rate": 6.582832385954573e-06, "loss": 0.531, "step": 24294 }, { "epoch": 0.62, "grad_norm": 1.4836206436157227, "learning_rate": 6.582052226986697e-06, "loss": 0.6817, "step": 24295 }, { "epoch": 0.62, "grad_norm": 3.9273438453674316, "learning_rate": 6.58127209157317e-06, "loss": 0.5648, "step": 24296 }, { "epoch": 0.62, "grad_norm": 1.439914345741272, "learning_rate": 6.580491979719368e-06, "loss": 0.4669, "step": 24297 }, { "epoch": 0.62, "grad_norm": 1.6794278621673584, "learning_rate": 6.579711891430667e-06, "loss": 0.6544, "step": 24298 }, { "epoch": 0.62, "grad_norm": 3.196340560913086, "learning_rate": 6.578931826712443e-06, "loss": 0.6191, "step": 24299 }, { "epoch": 0.62, "grad_norm": 1.2777161598205566, "learning_rate": 6.578151785570069e-06, "loss": 0.5716, "step": 24300 }, { "epoch": 0.62, "grad_norm": 2.036853313446045, "learning_rate": 6.577371768008927e-06, "loss": 0.448, "step": 24301 }, { "epoch": 0.62, "grad_norm": 1.1668412685394287, "learning_rate": 6.576591774034385e-06, "loss": 0.6544, "step": 24302 }, { "epoch": 0.62, "grad_norm": 2.153120517730713, "learning_rate": 6.575811803651824e-06, "loss": 0.5654, "step": 24303 }, { "epoch": 0.62, "grad_norm": 1.5386290550231934, "learning_rate": 6.5750318568666164e-06, "loss": 0.7005, "step": 24304 }, { "epoch": 0.62, "grad_norm": 1.051364541053772, "learning_rate": 6.574251933684136e-06, "loss": 0.2861, "step": 24305 }, { "epoch": 0.62, "grad_norm": 1.09521484375, "learning_rate": 6.573472034109762e-06, "loss": 0.5952, "step": 24306 }, { "epoch": 0.62, "grad_norm": 4.8901824951171875, "learning_rate": 6.572692158148862e-06, "loss": 0.6673, "step": 24307 }, { "epoch": 0.62, "grad_norm": 1.260755181312561, "learning_rate": 6.571912305806818e-06, "loss": 0.4019, "step": 24308 }, { "epoch": 0.62, "grad_norm": 1.126332402229309, "learning_rate": 6.571132477088998e-06, "loss": 0.4219, "step": 24309 }, { "epoch": 0.62, "grad_norm": 2.031221628189087, "learning_rate": 6.570352672000782e-06, "loss": 0.6139, "step": 24310 }, { "epoch": 0.62, "grad_norm": 5.758479118347168, "learning_rate": 6.569572890547538e-06, "loss": 0.7196, "step": 24311 }, { "epoch": 0.62, "grad_norm": 3.5345423221588135, "learning_rate": 6.568793132734641e-06, "loss": 0.5122, "step": 24312 }, { "epoch": 0.62, "grad_norm": 1.0568033456802368, "learning_rate": 6.56801339856747e-06, "loss": 0.4565, "step": 24313 }, { "epoch": 0.62, "grad_norm": 3.0871400833129883, "learning_rate": 6.567233688051391e-06, "loss": 0.7145, "step": 24314 }, { "epoch": 0.62, "grad_norm": 5.498534202575684, "learning_rate": 6.566454001191784e-06, "loss": 0.4322, "step": 24315 }, { "epoch": 0.62, "grad_norm": 1.6725891828536987, "learning_rate": 6.565674337994017e-06, "loss": 0.471, "step": 24316 }, { "epoch": 0.62, "grad_norm": 8.987821578979492, "learning_rate": 6.564894698463463e-06, "loss": 0.6757, "step": 24317 }, { "epoch": 0.62, "grad_norm": 1.1109446287155151, "learning_rate": 6.5641150826055e-06, "loss": 0.4471, "step": 24318 }, { "epoch": 0.62, "grad_norm": 10.649608612060547, "learning_rate": 6.563335490425495e-06, "loss": 0.4503, "step": 24319 }, { "epoch": 0.62, "grad_norm": 2.1637842655181885, "learning_rate": 6.562555921928825e-06, "loss": 0.5668, "step": 24320 }, { "epoch": 0.62, "grad_norm": 3.201481580734253, "learning_rate": 6.561776377120856e-06, "loss": 0.361, "step": 24321 }, { "epoch": 0.62, "grad_norm": 3.0136373043060303, "learning_rate": 6.560996856006967e-06, "loss": 0.6905, "step": 24322 }, { "epoch": 0.62, "grad_norm": 1.0621471405029297, "learning_rate": 6.5602173585925275e-06, "loss": 0.4909, "step": 24323 }, { "epoch": 0.62, "grad_norm": 2.201828718185425, "learning_rate": 6.5594378848829065e-06, "loss": 0.6212, "step": 24324 }, { "epoch": 0.62, "grad_norm": 1.3786063194274902, "learning_rate": 6.558658434883482e-06, "loss": 0.5335, "step": 24325 }, { "epoch": 0.62, "grad_norm": 9.51390552520752, "learning_rate": 6.557879008599617e-06, "loss": 0.5135, "step": 24326 }, { "epoch": 0.62, "grad_norm": 1.448209524154663, "learning_rate": 6.557099606036691e-06, "loss": 0.5455, "step": 24327 }, { "epoch": 0.62, "grad_norm": 1.5709154605865479, "learning_rate": 6.556320227200069e-06, "loss": 0.5076, "step": 24328 }, { "epoch": 0.62, "grad_norm": 1.1559189558029175, "learning_rate": 6.555540872095125e-06, "loss": 0.3536, "step": 24329 }, { "epoch": 0.62, "grad_norm": 1.4607678651809692, "learning_rate": 6.5547615407272305e-06, "loss": 0.4657, "step": 24330 }, { "epoch": 0.62, "grad_norm": 1.5274631977081299, "learning_rate": 6.553982233101752e-06, "loss": 0.5193, "step": 24331 }, { "epoch": 0.62, "grad_norm": 2.2298057079315186, "learning_rate": 6.553202949224067e-06, "loss": 0.4827, "step": 24332 }, { "epoch": 0.62, "grad_norm": 3.008612632751465, "learning_rate": 6.552423689099538e-06, "loss": 0.4965, "step": 24333 }, { "epoch": 0.62, "grad_norm": 3.5229225158691406, "learning_rate": 6.551644452733541e-06, "loss": 0.5191, "step": 24334 }, { "epoch": 0.62, "grad_norm": 1.8494566679000854, "learning_rate": 6.550865240131445e-06, "loss": 0.4, "step": 24335 }, { "epoch": 0.62, "grad_norm": 1.414320468902588, "learning_rate": 6.550086051298616e-06, "loss": 0.682, "step": 24336 }, { "epoch": 0.62, "grad_norm": 1.1737611293792725, "learning_rate": 6.549306886240429e-06, "loss": 0.5053, "step": 24337 }, { "epoch": 0.62, "grad_norm": 1.2584680318832397, "learning_rate": 6.54852774496225e-06, "loss": 0.5009, "step": 24338 }, { "epoch": 0.62, "grad_norm": 2.873671293258667, "learning_rate": 6.547748627469452e-06, "loss": 0.6065, "step": 24339 }, { "epoch": 0.62, "grad_norm": 0.9642959833145142, "learning_rate": 6.5469695337674e-06, "loss": 0.503, "step": 24340 }, { "epoch": 0.62, "grad_norm": 2.7983994483947754, "learning_rate": 6.546190463861463e-06, "loss": 0.6915, "step": 24341 }, { "epoch": 0.62, "grad_norm": 1.0190691947937012, "learning_rate": 6.5454114177570106e-06, "loss": 0.5107, "step": 24342 }, { "epoch": 0.62, "grad_norm": 1.85429048538208, "learning_rate": 6.5446323954594144e-06, "loss": 0.604, "step": 24343 }, { "epoch": 0.62, "grad_norm": 1.6690505743026733, "learning_rate": 6.543853396974039e-06, "loss": 0.6763, "step": 24344 }, { "epoch": 0.62, "grad_norm": 2.243368148803711, "learning_rate": 6.543074422306257e-06, "loss": 0.6253, "step": 24345 }, { "epoch": 0.62, "grad_norm": 1.3507674932479858, "learning_rate": 6.5422954714614324e-06, "loss": 0.3887, "step": 24346 }, { "epoch": 0.62, "grad_norm": 1.6542071104049683, "learning_rate": 6.541516544444934e-06, "loss": 0.4752, "step": 24347 }, { "epoch": 0.62, "grad_norm": 1.3511176109313965, "learning_rate": 6.540737641262133e-06, "loss": 0.4456, "step": 24348 }, { "epoch": 0.62, "grad_norm": 2.6989808082580566, "learning_rate": 6.5399587619183945e-06, "loss": 0.5502, "step": 24349 }, { "epoch": 0.62, "grad_norm": 1.6643750667572021, "learning_rate": 6.539179906419086e-06, "loss": 0.5852, "step": 24350 }, { "epoch": 0.62, "grad_norm": 0.8762086033821106, "learning_rate": 6.538401074769576e-06, "loss": 0.5797, "step": 24351 }, { "epoch": 0.62, "grad_norm": 1.06690514087677, "learning_rate": 6.537622266975231e-06, "loss": 0.5007, "step": 24352 }, { "epoch": 0.62, "grad_norm": 7.707923412322998, "learning_rate": 6.536843483041417e-06, "loss": 0.5334, "step": 24353 }, { "epoch": 0.62, "grad_norm": 1.4868545532226562, "learning_rate": 6.536064722973501e-06, "loss": 0.5508, "step": 24354 }, { "epoch": 0.62, "grad_norm": 1.5014500617980957, "learning_rate": 6.5352859867768534e-06, "loss": 0.5569, "step": 24355 }, { "epoch": 0.62, "grad_norm": 1.4575119018554688, "learning_rate": 6.534507274456836e-06, "loss": 0.5966, "step": 24356 }, { "epoch": 0.62, "grad_norm": 1.2887898683547974, "learning_rate": 6.533728586018819e-06, "loss": 0.6615, "step": 24357 }, { "epoch": 0.62, "grad_norm": 1.0752191543579102, "learning_rate": 6.532949921468164e-06, "loss": 0.4688, "step": 24358 }, { "epoch": 0.62, "grad_norm": 1.4779038429260254, "learning_rate": 6.532171280810241e-06, "loss": 0.4822, "step": 24359 }, { "epoch": 0.62, "grad_norm": 1.4069429636001587, "learning_rate": 6.531392664050417e-06, "loss": 0.4434, "step": 24360 }, { "epoch": 0.62, "grad_norm": 3.846226930618286, "learning_rate": 6.530614071194052e-06, "loss": 0.7231, "step": 24361 }, { "epoch": 0.62, "grad_norm": 1.6948151588439941, "learning_rate": 6.52983550224652e-06, "loss": 0.4854, "step": 24362 }, { "epoch": 0.62, "grad_norm": 12.929471969604492, "learning_rate": 6.529056957213178e-06, "loss": 0.5582, "step": 24363 }, { "epoch": 0.62, "grad_norm": 4.944458484649658, "learning_rate": 6.528278436099393e-06, "loss": 0.7496, "step": 24364 }, { "epoch": 0.62, "grad_norm": 3.1947379112243652, "learning_rate": 6.527499938910537e-06, "loss": 0.6781, "step": 24365 }, { "epoch": 0.62, "grad_norm": 0.9621655344963074, "learning_rate": 6.526721465651966e-06, "loss": 0.4054, "step": 24366 }, { "epoch": 0.62, "grad_norm": 1.2090882062911987, "learning_rate": 6.52594301632905e-06, "loss": 0.502, "step": 24367 }, { "epoch": 0.62, "grad_norm": 5.027904510498047, "learning_rate": 6.525164590947152e-06, "loss": 0.7631, "step": 24368 }, { "epoch": 0.62, "grad_norm": 1.9022877216339111, "learning_rate": 6.524386189511638e-06, "loss": 0.6234, "step": 24369 }, { "epoch": 0.62, "grad_norm": 1.6381622552871704, "learning_rate": 6.523607812027868e-06, "loss": 0.6392, "step": 24370 }, { "epoch": 0.62, "grad_norm": 1.158698320388794, "learning_rate": 6.522829458501208e-06, "loss": 0.5274, "step": 24371 }, { "epoch": 0.62, "grad_norm": 4.411960601806641, "learning_rate": 6.5220511289370255e-06, "loss": 0.3428, "step": 24372 }, { "epoch": 0.62, "grad_norm": 2.154487371444702, "learning_rate": 6.521272823340681e-06, "loss": 0.609, "step": 24373 }, { "epoch": 0.62, "grad_norm": 2.3347911834716797, "learning_rate": 6.5204945417175395e-06, "loss": 0.4491, "step": 24374 }, { "epoch": 0.62, "grad_norm": 1.280407190322876, "learning_rate": 6.519716284072962e-06, "loss": 0.6621, "step": 24375 }, { "epoch": 0.62, "grad_norm": 4.141654014587402, "learning_rate": 6.518938050412314e-06, "loss": 0.4977, "step": 24376 }, { "epoch": 0.62, "grad_norm": 1.6002429723739624, "learning_rate": 6.518159840740959e-06, "loss": 0.6214, "step": 24377 }, { "epoch": 0.62, "grad_norm": 2.0527548789978027, "learning_rate": 6.5173816550642565e-06, "loss": 0.5901, "step": 24378 }, { "epoch": 0.62, "grad_norm": 2.80838680267334, "learning_rate": 6.516603493387575e-06, "loss": 0.5469, "step": 24379 }, { "epoch": 0.62, "grad_norm": 1.2999749183654785, "learning_rate": 6.51582535571627e-06, "loss": 0.483, "step": 24380 }, { "epoch": 0.62, "grad_norm": 3.0233449935913086, "learning_rate": 6.51504724205571e-06, "loss": 0.5185, "step": 24381 }, { "epoch": 0.62, "grad_norm": 2.3859965801239014, "learning_rate": 6.5142691524112535e-06, "loss": 0.5117, "step": 24382 }, { "epoch": 0.62, "grad_norm": 1.317659854888916, "learning_rate": 6.513491086788264e-06, "loss": 0.5193, "step": 24383 }, { "epoch": 0.62, "grad_norm": 1.707268476486206, "learning_rate": 6.512713045192105e-06, "loss": 0.6275, "step": 24384 }, { "epoch": 0.63, "grad_norm": 1.5275517702102661, "learning_rate": 6.5119350276281355e-06, "loss": 0.5082, "step": 24385 }, { "epoch": 0.63, "grad_norm": 2.806044101715088, "learning_rate": 6.51115703410172e-06, "loss": 0.6094, "step": 24386 }, { "epoch": 0.63, "grad_norm": 1.25758695602417, "learning_rate": 6.510379064618216e-06, "loss": 0.5919, "step": 24387 }, { "epoch": 0.63, "grad_norm": 2.0358800888061523, "learning_rate": 6.509601119182988e-06, "loss": 0.5182, "step": 24388 }, { "epoch": 0.63, "grad_norm": 2.8539130687713623, "learning_rate": 6.508823197801397e-06, "loss": 0.5525, "step": 24389 }, { "epoch": 0.63, "grad_norm": 1.2047308683395386, "learning_rate": 6.508045300478802e-06, "loss": 0.5012, "step": 24390 }, { "epoch": 0.63, "grad_norm": 1.2344845533370972, "learning_rate": 6.5072674272205674e-06, "loss": 0.5142, "step": 24391 }, { "epoch": 0.63, "grad_norm": 1.0578199625015259, "learning_rate": 6.50648957803205e-06, "loss": 0.4982, "step": 24392 }, { "epoch": 0.63, "grad_norm": 2.9529242515563965, "learning_rate": 6.505711752918611e-06, "loss": 0.6565, "step": 24393 }, { "epoch": 0.63, "grad_norm": 3.1187000274658203, "learning_rate": 6.50493395188561e-06, "loss": 0.6389, "step": 24394 }, { "epoch": 0.63, "grad_norm": 1.7286832332611084, "learning_rate": 6.50415617493841e-06, "loss": 0.4431, "step": 24395 }, { "epoch": 0.63, "grad_norm": 4.6862006187438965, "learning_rate": 6.503378422082369e-06, "loss": 0.512, "step": 24396 }, { "epoch": 0.63, "grad_norm": 1.3452814817428589, "learning_rate": 6.502600693322847e-06, "loss": 0.4767, "step": 24397 }, { "epoch": 0.63, "grad_norm": 1.366277813911438, "learning_rate": 6.501822988665205e-06, "loss": 0.5338, "step": 24398 }, { "epoch": 0.63, "grad_norm": 4.733194828033447, "learning_rate": 6.5010453081148e-06, "loss": 0.3792, "step": 24399 }, { "epoch": 0.63, "grad_norm": 1.0795140266418457, "learning_rate": 6.500267651676993e-06, "loss": 0.6014, "step": 24400 }, { "epoch": 0.63, "grad_norm": 3.9160656929016113, "learning_rate": 6.499490019357142e-06, "loss": 0.5271, "step": 24401 }, { "epoch": 0.63, "grad_norm": 1.252232313156128, "learning_rate": 6.498712411160606e-06, "loss": 0.6408, "step": 24402 }, { "epoch": 0.63, "grad_norm": 1.1698664426803589, "learning_rate": 6.497934827092747e-06, "loss": 0.5524, "step": 24403 }, { "epoch": 0.63, "grad_norm": 0.9503963589668274, "learning_rate": 6.497157267158918e-06, "loss": 0.516, "step": 24404 }, { "epoch": 0.63, "grad_norm": 1.1056883335113525, "learning_rate": 6.49637973136448e-06, "loss": 0.5052, "step": 24405 }, { "epoch": 0.63, "grad_norm": 2.390943765640259, "learning_rate": 6.495602219714795e-06, "loss": 0.7011, "step": 24406 }, { "epoch": 0.63, "grad_norm": 1.6069191694259644, "learning_rate": 6.4948247322152145e-06, "loss": 0.6423, "step": 24407 }, { "epoch": 0.63, "grad_norm": 1.2219245433807373, "learning_rate": 6.494047268871103e-06, "loss": 0.5873, "step": 24408 }, { "epoch": 0.63, "grad_norm": 1.464308500289917, "learning_rate": 6.493269829687813e-06, "loss": 0.593, "step": 24409 }, { "epoch": 0.63, "grad_norm": 1.7046343088150024, "learning_rate": 6.492492414670706e-06, "loss": 0.3454, "step": 24410 }, { "epoch": 0.63, "grad_norm": 1.665255069732666, "learning_rate": 6.491715023825135e-06, "loss": 0.5323, "step": 24411 }, { "epoch": 0.63, "grad_norm": 1.370002269744873, "learning_rate": 6.490937657156459e-06, "loss": 0.4892, "step": 24412 }, { "epoch": 0.63, "grad_norm": 2.1515655517578125, "learning_rate": 6.4901603146700394e-06, "loss": 0.5865, "step": 24413 }, { "epoch": 0.63, "grad_norm": 3.268972396850586, "learning_rate": 6.48938299637123e-06, "loss": 0.4853, "step": 24414 }, { "epoch": 0.63, "grad_norm": 1.4943045377731323, "learning_rate": 6.488605702265387e-06, "loss": 0.6783, "step": 24415 }, { "epoch": 0.63, "grad_norm": 2.00110125541687, "learning_rate": 6.487828432357865e-06, "loss": 0.6179, "step": 24416 }, { "epoch": 0.63, "grad_norm": 1.1952461004257202, "learning_rate": 6.487051186654023e-06, "loss": 0.5495, "step": 24417 }, { "epoch": 0.63, "grad_norm": 1.762255311012268, "learning_rate": 6.48627396515922e-06, "loss": 0.5835, "step": 24418 }, { "epoch": 0.63, "grad_norm": 2.7203221321105957, "learning_rate": 6.4854967678788075e-06, "loss": 0.6052, "step": 24419 }, { "epoch": 0.63, "grad_norm": 1.2738063335418701, "learning_rate": 6.484719594818145e-06, "loss": 0.5478, "step": 24420 }, { "epoch": 0.63, "grad_norm": 1.0324970483779907, "learning_rate": 6.483942445982583e-06, "loss": 0.5401, "step": 24421 }, { "epoch": 0.63, "grad_norm": 1.4303542375564575, "learning_rate": 6.483165321377484e-06, "loss": 0.5689, "step": 24422 }, { "epoch": 0.63, "grad_norm": 2.0971481800079346, "learning_rate": 6.482388221008198e-06, "loss": 0.541, "step": 24423 }, { "epoch": 0.63, "grad_norm": 1.4971818923950195, "learning_rate": 6.481611144880082e-06, "loss": 0.5656, "step": 24424 }, { "epoch": 0.63, "grad_norm": 1.2296563386917114, "learning_rate": 6.480834092998496e-06, "loss": 0.612, "step": 24425 }, { "epoch": 0.63, "grad_norm": 1.16441810131073, "learning_rate": 6.480057065368786e-06, "loss": 0.6556, "step": 24426 }, { "epoch": 0.63, "grad_norm": 0.9972710609436035, "learning_rate": 6.4792800619963135e-06, "loss": 0.5251, "step": 24427 }, { "epoch": 0.63, "grad_norm": 5.017991542816162, "learning_rate": 6.478503082886429e-06, "loss": 0.5494, "step": 24428 }, { "epoch": 0.63, "grad_norm": 1.546135663986206, "learning_rate": 6.4777261280444885e-06, "loss": 0.566, "step": 24429 }, { "epoch": 0.63, "grad_norm": 1.2055166959762573, "learning_rate": 6.476949197475849e-06, "loss": 0.3796, "step": 24430 }, { "epoch": 0.63, "grad_norm": 1.325514316558838, "learning_rate": 6.4761722911858605e-06, "loss": 0.4977, "step": 24431 }, { "epoch": 0.63, "grad_norm": 1.4451608657836914, "learning_rate": 6.475395409179881e-06, "loss": 0.5039, "step": 24432 }, { "epoch": 0.63, "grad_norm": 5.768633842468262, "learning_rate": 6.474618551463258e-06, "loss": 0.5252, "step": 24433 }, { "epoch": 0.63, "grad_norm": 5.766919136047363, "learning_rate": 6.4738417180413535e-06, "loss": 0.5792, "step": 24434 }, { "epoch": 0.63, "grad_norm": 1.7757703065872192, "learning_rate": 6.473064908919513e-06, "loss": 0.5217, "step": 24435 }, { "epoch": 0.63, "grad_norm": 3.9392852783203125, "learning_rate": 6.4722881241030945e-06, "loss": 0.5997, "step": 24436 }, { "epoch": 0.63, "grad_norm": 5.246737957000732, "learning_rate": 6.471511363597452e-06, "loss": 0.4998, "step": 24437 }, { "epoch": 0.63, "grad_norm": 2.038494110107422, "learning_rate": 6.470734627407933e-06, "loss": 0.7123, "step": 24438 }, { "epoch": 0.63, "grad_norm": 1.8941071033477783, "learning_rate": 6.469957915539896e-06, "loss": 0.5682, "step": 24439 }, { "epoch": 0.63, "grad_norm": 1.8307363986968994, "learning_rate": 6.469181227998689e-06, "loss": 0.5185, "step": 24440 }, { "epoch": 0.63, "grad_norm": 2.1854701042175293, "learning_rate": 6.4684045647896674e-06, "loss": 0.5957, "step": 24441 }, { "epoch": 0.63, "grad_norm": 1.4519412517547607, "learning_rate": 6.4676279259181855e-06, "loss": 0.4122, "step": 24442 }, { "epoch": 0.63, "grad_norm": 4.8350419998168945, "learning_rate": 6.466851311389589e-06, "loss": 0.6239, "step": 24443 }, { "epoch": 0.63, "grad_norm": 3.6114752292633057, "learning_rate": 6.4660747212092365e-06, "loss": 0.466, "step": 24444 }, { "epoch": 0.63, "grad_norm": 7.29458475112915, "learning_rate": 6.465298155382474e-06, "loss": 0.6236, "step": 24445 }, { "epoch": 0.63, "grad_norm": 2.7212796211242676, "learning_rate": 6.464521613914658e-06, "loss": 0.4532, "step": 24446 }, { "epoch": 0.63, "grad_norm": 1.7805794477462769, "learning_rate": 6.463745096811138e-06, "loss": 0.447, "step": 24447 }, { "epoch": 0.63, "grad_norm": 1.3105665445327759, "learning_rate": 6.462968604077262e-06, "loss": 0.552, "step": 24448 }, { "epoch": 0.63, "grad_norm": 1.887214183807373, "learning_rate": 6.462192135718388e-06, "loss": 0.5802, "step": 24449 }, { "epoch": 0.63, "grad_norm": 1.8458130359649658, "learning_rate": 6.46141569173986e-06, "loss": 0.5165, "step": 24450 }, { "epoch": 0.63, "grad_norm": 1.584623098373413, "learning_rate": 6.460639272147035e-06, "loss": 0.4959, "step": 24451 }, { "epoch": 0.63, "grad_norm": 3.6232144832611084, "learning_rate": 6.45986287694526e-06, "loss": 0.5441, "step": 24452 }, { "epoch": 0.63, "grad_norm": 1.5894173383712769, "learning_rate": 6.459086506139883e-06, "loss": 0.4456, "step": 24453 }, { "epoch": 0.63, "grad_norm": 1.7329164743423462, "learning_rate": 6.458310159736261e-06, "loss": 0.7763, "step": 24454 }, { "epoch": 0.63, "grad_norm": 7.296441078186035, "learning_rate": 6.457533837739737e-06, "loss": 0.7027, "step": 24455 }, { "epoch": 0.63, "grad_norm": 17.53563690185547, "learning_rate": 6.456757540155667e-06, "loss": 0.6036, "step": 24456 }, { "epoch": 0.63, "grad_norm": 1.8946696519851685, "learning_rate": 6.455981266989395e-06, "loss": 0.4288, "step": 24457 }, { "epoch": 0.63, "grad_norm": 1.096004843711853, "learning_rate": 6.455205018246275e-06, "loss": 0.3, "step": 24458 }, { "epoch": 0.63, "grad_norm": 1.5872100591659546, "learning_rate": 6.454428793931655e-06, "loss": 0.5362, "step": 24459 }, { "epoch": 0.63, "grad_norm": 1.2704907655715942, "learning_rate": 6.453652594050883e-06, "loss": 0.5539, "step": 24460 }, { "epoch": 0.63, "grad_norm": 1.3701244592666626, "learning_rate": 6.452876418609313e-06, "loss": 0.6338, "step": 24461 }, { "epoch": 0.63, "grad_norm": 1.0655633211135864, "learning_rate": 6.452100267612287e-06, "loss": 0.5047, "step": 24462 }, { "epoch": 0.63, "grad_norm": 5.522716522216797, "learning_rate": 6.451324141065159e-06, "loss": 0.5876, "step": 24463 }, { "epoch": 0.63, "grad_norm": 1.330012559890747, "learning_rate": 6.4505480389732725e-06, "loss": 0.5588, "step": 24464 }, { "epoch": 0.63, "grad_norm": 0.9799916744232178, "learning_rate": 6.449771961341979e-06, "loss": 0.3897, "step": 24465 }, { "epoch": 0.63, "grad_norm": 1.5418401956558228, "learning_rate": 6.44899590817663e-06, "loss": 0.6877, "step": 24466 }, { "epoch": 0.63, "grad_norm": 1.5196518898010254, "learning_rate": 6.4482198794825675e-06, "loss": 0.5509, "step": 24467 }, { "epoch": 0.63, "grad_norm": 1.374916911125183, "learning_rate": 6.4474438752651445e-06, "loss": 0.486, "step": 24468 }, { "epoch": 0.63, "grad_norm": 1.399437665939331, "learning_rate": 6.4466678955297045e-06, "loss": 0.4647, "step": 24469 }, { "epoch": 0.63, "grad_norm": 1.9365917444229126, "learning_rate": 6.4458919402815965e-06, "loss": 0.5706, "step": 24470 }, { "epoch": 0.63, "grad_norm": 2.021254062652588, "learning_rate": 6.445116009526171e-06, "loss": 0.5599, "step": 24471 }, { "epoch": 0.63, "grad_norm": 2.263420343399048, "learning_rate": 6.444340103268769e-06, "loss": 0.8431, "step": 24472 }, { "epoch": 0.63, "grad_norm": 6.967215538024902, "learning_rate": 6.443564221514745e-06, "loss": 0.7828, "step": 24473 }, { "epoch": 0.63, "grad_norm": 5.95363712310791, "learning_rate": 6.442788364269439e-06, "loss": 0.5582, "step": 24474 }, { "epoch": 0.63, "grad_norm": 1.4264620542526245, "learning_rate": 6.442012531538202e-06, "loss": 0.5109, "step": 24475 }, { "epoch": 0.63, "grad_norm": 6.743673324584961, "learning_rate": 6.4412367233263785e-06, "loss": 0.6547, "step": 24476 }, { "epoch": 0.63, "grad_norm": 2.6709883213043213, "learning_rate": 6.440460939639316e-06, "loss": 0.5071, "step": 24477 }, { "epoch": 0.63, "grad_norm": 0.9353325366973877, "learning_rate": 6.439685180482362e-06, "loss": 0.506, "step": 24478 }, { "epoch": 0.63, "grad_norm": 1.2420698404312134, "learning_rate": 6.438909445860858e-06, "loss": 0.6127, "step": 24479 }, { "epoch": 0.63, "grad_norm": 1.9859951734542847, "learning_rate": 6.438133735780156e-06, "loss": 0.4916, "step": 24480 }, { "epoch": 0.63, "grad_norm": 1.906516194343567, "learning_rate": 6.437358050245596e-06, "loss": 0.6475, "step": 24481 }, { "epoch": 0.63, "grad_norm": 1.2375891208648682, "learning_rate": 6.436582389262526e-06, "loss": 0.5506, "step": 24482 }, { "epoch": 0.63, "grad_norm": 3.7341318130493164, "learning_rate": 6.435806752836294e-06, "loss": 0.5054, "step": 24483 }, { "epoch": 0.63, "grad_norm": 2.0799062252044678, "learning_rate": 6.435031140972239e-06, "loss": 0.542, "step": 24484 }, { "epoch": 0.63, "grad_norm": 1.6194124221801758, "learning_rate": 6.4342555536757125e-06, "loss": 0.5853, "step": 24485 }, { "epoch": 0.63, "grad_norm": 1.4338215589523315, "learning_rate": 6.433479990952055e-06, "loss": 0.538, "step": 24486 }, { "epoch": 0.63, "grad_norm": 2.8651931285858154, "learning_rate": 6.432704452806615e-06, "loss": 0.7492, "step": 24487 }, { "epoch": 0.63, "grad_norm": 1.6437679529190063, "learning_rate": 6.431928939244732e-06, "loss": 0.5245, "step": 24488 }, { "epoch": 0.63, "grad_norm": 1.3472425937652588, "learning_rate": 6.431153450271752e-06, "loss": 0.441, "step": 24489 }, { "epoch": 0.63, "grad_norm": 4.172524452209473, "learning_rate": 6.430377985893024e-06, "loss": 0.497, "step": 24490 }, { "epoch": 0.63, "grad_norm": 1.7844139337539673, "learning_rate": 6.429602546113887e-06, "loss": 0.5616, "step": 24491 }, { "epoch": 0.63, "grad_norm": 2.893075704574585, "learning_rate": 6.428827130939686e-06, "loss": 0.5386, "step": 24492 }, { "epoch": 0.63, "grad_norm": 6.38607931137085, "learning_rate": 6.428051740375763e-06, "loss": 0.501, "step": 24493 }, { "epoch": 0.63, "grad_norm": 1.5066046714782715, "learning_rate": 6.427276374427465e-06, "loss": 0.5308, "step": 24494 }, { "epoch": 0.63, "grad_norm": 2.6517882347106934, "learning_rate": 6.426501033100135e-06, "loss": 0.6995, "step": 24495 }, { "epoch": 0.63, "grad_norm": 2.514430522918701, "learning_rate": 6.425725716399112e-06, "loss": 0.4117, "step": 24496 }, { "epoch": 0.63, "grad_norm": 10.018542289733887, "learning_rate": 6.424950424329743e-06, "loss": 0.7621, "step": 24497 }, { "epoch": 0.63, "grad_norm": 0.7782698273658752, "learning_rate": 6.424175156897369e-06, "loss": 0.4068, "step": 24498 }, { "epoch": 0.63, "grad_norm": 1.2804598808288574, "learning_rate": 6.423399914107333e-06, "loss": 0.547, "step": 24499 }, { "epoch": 0.63, "grad_norm": 0.9964642524719238, "learning_rate": 6.422624695964979e-06, "loss": 0.6004, "step": 24500 }, { "epoch": 0.63, "grad_norm": 1.6621278524398804, "learning_rate": 6.421849502475647e-06, "loss": 0.4209, "step": 24501 }, { "epoch": 0.63, "grad_norm": 0.8783935308456421, "learning_rate": 6.421074333644682e-06, "loss": 0.5189, "step": 24502 }, { "epoch": 0.63, "grad_norm": 4.942699909210205, "learning_rate": 6.420299189477422e-06, "loss": 0.5604, "step": 24503 }, { "epoch": 0.63, "grad_norm": 3.389939546585083, "learning_rate": 6.4195240699792125e-06, "loss": 0.6641, "step": 24504 }, { "epoch": 0.63, "grad_norm": 2.9554455280303955, "learning_rate": 6.418748975155394e-06, "loss": 0.5822, "step": 24505 }, { "epoch": 0.63, "grad_norm": 1.5185885429382324, "learning_rate": 6.417973905011305e-06, "loss": 0.5305, "step": 24506 }, { "epoch": 0.63, "grad_norm": 5.8342604637146, "learning_rate": 6.417198859552293e-06, "loss": 0.6628, "step": 24507 }, { "epoch": 0.63, "grad_norm": 1.7026646137237549, "learning_rate": 6.416423838783692e-06, "loss": 0.6131, "step": 24508 }, { "epoch": 0.63, "grad_norm": 1.0301613807678223, "learning_rate": 6.4156488427108495e-06, "loss": 0.4833, "step": 24509 }, { "epoch": 0.63, "grad_norm": 0.9308114647865295, "learning_rate": 6.414873871339101e-06, "loss": 0.6065, "step": 24510 }, { "epoch": 0.63, "grad_norm": 0.8253814578056335, "learning_rate": 6.414098924673789e-06, "loss": 0.4945, "step": 24511 }, { "epoch": 0.63, "grad_norm": 1.0848983526229858, "learning_rate": 6.413324002720258e-06, "loss": 0.6019, "step": 24512 }, { "epoch": 0.63, "grad_norm": 1.6217743158340454, "learning_rate": 6.412549105483841e-06, "loss": 0.5686, "step": 24513 }, { "epoch": 0.63, "grad_norm": 2.9306225776672363, "learning_rate": 6.411774232969884e-06, "loss": 0.6659, "step": 24514 }, { "epoch": 0.63, "grad_norm": 1.6718207597732544, "learning_rate": 6.410999385183723e-06, "loss": 0.6753, "step": 24515 }, { "epoch": 0.63, "grad_norm": 2.064038038253784, "learning_rate": 6.4102245621307006e-06, "loss": 0.6633, "step": 24516 }, { "epoch": 0.63, "grad_norm": 1.525146484375, "learning_rate": 6.409449763816154e-06, "loss": 0.6659, "step": 24517 }, { "epoch": 0.63, "grad_norm": 2.3939619064331055, "learning_rate": 6.408674990245424e-06, "loss": 0.5791, "step": 24518 }, { "epoch": 0.63, "grad_norm": 3.7947404384613037, "learning_rate": 6.40790024142385e-06, "loss": 0.4755, "step": 24519 }, { "epoch": 0.63, "grad_norm": 1.4284017086029053, "learning_rate": 6.407125517356771e-06, "loss": 0.6669, "step": 24520 }, { "epoch": 0.63, "grad_norm": 1.4521905183792114, "learning_rate": 6.406350818049527e-06, "loss": 0.5818, "step": 24521 }, { "epoch": 0.63, "grad_norm": 2.3167057037353516, "learning_rate": 6.405576143507453e-06, "loss": 0.5154, "step": 24522 }, { "epoch": 0.63, "grad_norm": 5.127050399780273, "learning_rate": 6.404801493735889e-06, "loss": 0.5731, "step": 24523 }, { "epoch": 0.63, "grad_norm": 1.722074031829834, "learning_rate": 6.4040268687401785e-06, "loss": 0.4708, "step": 24524 }, { "epoch": 0.63, "grad_norm": 1.0309631824493408, "learning_rate": 6.403252268525653e-06, "loss": 0.4607, "step": 24525 }, { "epoch": 0.63, "grad_norm": 1.137325644493103, "learning_rate": 6.402477693097654e-06, "loss": 0.4165, "step": 24526 }, { "epoch": 0.63, "grad_norm": 1.2011780738830566, "learning_rate": 6.4017031424615176e-06, "loss": 0.5608, "step": 24527 }, { "epoch": 0.63, "grad_norm": 1.7799761295318604, "learning_rate": 6.400928616622584e-06, "loss": 0.6471, "step": 24528 }, { "epoch": 0.63, "grad_norm": 1.2933894395828247, "learning_rate": 6.4001541155861864e-06, "loss": 0.5015, "step": 24529 }, { "epoch": 0.63, "grad_norm": 1.3281610012054443, "learning_rate": 6.399379639357666e-06, "loss": 0.5455, "step": 24530 }, { "epoch": 0.63, "grad_norm": 2.9138433933258057, "learning_rate": 6.398605187942362e-06, "loss": 0.6168, "step": 24531 }, { "epoch": 0.63, "grad_norm": 1.7602742910385132, "learning_rate": 6.397830761345605e-06, "loss": 0.5481, "step": 24532 }, { "epoch": 0.63, "grad_norm": 5.760814189910889, "learning_rate": 6.397056359572737e-06, "loss": 0.5831, "step": 24533 }, { "epoch": 0.63, "grad_norm": 1.8027113676071167, "learning_rate": 6.396281982629092e-06, "loss": 0.657, "step": 24534 }, { "epoch": 0.63, "grad_norm": 1.3313853740692139, "learning_rate": 6.395507630520007e-06, "loss": 0.5698, "step": 24535 }, { "epoch": 0.63, "grad_norm": 1.120744228363037, "learning_rate": 6.394733303250822e-06, "loss": 0.5187, "step": 24536 }, { "epoch": 0.63, "grad_norm": 1.3272459506988525, "learning_rate": 6.3939590008268655e-06, "loss": 0.5353, "step": 24537 }, { "epoch": 0.63, "grad_norm": 3.2708442211151123, "learning_rate": 6.393184723253483e-06, "loss": 0.5081, "step": 24538 }, { "epoch": 0.63, "grad_norm": 1.580395221710205, "learning_rate": 6.392410470536002e-06, "loss": 0.63, "step": 24539 }, { "epoch": 0.63, "grad_norm": 1.9694185256958008, "learning_rate": 6.391636242679763e-06, "loss": 0.7212, "step": 24540 }, { "epoch": 0.63, "grad_norm": 1.3904955387115479, "learning_rate": 6.390862039690097e-06, "loss": 0.5374, "step": 24541 }, { "epoch": 0.63, "grad_norm": 1.2464700937271118, "learning_rate": 6.390087861572345e-06, "loss": 0.5794, "step": 24542 }, { "epoch": 0.63, "grad_norm": 1.6313223838806152, "learning_rate": 6.3893137083318405e-06, "loss": 0.5505, "step": 24543 }, { "epoch": 0.63, "grad_norm": 1.585915207862854, "learning_rate": 6.388539579973915e-06, "loss": 0.7024, "step": 24544 }, { "epoch": 0.63, "grad_norm": 2.390260696411133, "learning_rate": 6.387765476503907e-06, "loss": 0.503, "step": 24545 }, { "epoch": 0.63, "grad_norm": 2.4659030437469482, "learning_rate": 6.38699139792715e-06, "loss": 0.6197, "step": 24546 }, { "epoch": 0.63, "grad_norm": 3.640813112258911, "learning_rate": 6.386217344248977e-06, "loss": 0.4139, "step": 24547 }, { "epoch": 0.63, "grad_norm": 2.4661529064178467, "learning_rate": 6.385443315474727e-06, "loss": 0.7367, "step": 24548 }, { "epoch": 0.63, "grad_norm": 1.3088228702545166, "learning_rate": 6.384669311609727e-06, "loss": 0.6089, "step": 24549 }, { "epoch": 0.63, "grad_norm": 1.2182807922363281, "learning_rate": 6.383895332659318e-06, "loss": 0.5085, "step": 24550 }, { "epoch": 0.63, "grad_norm": 1.5799368619918823, "learning_rate": 6.383121378628827e-06, "loss": 0.5278, "step": 24551 }, { "epoch": 0.63, "grad_norm": 0.8456626534461975, "learning_rate": 6.382347449523591e-06, "loss": 0.5165, "step": 24552 }, { "epoch": 0.63, "grad_norm": 1.5726107358932495, "learning_rate": 6.381573545348948e-06, "loss": 0.5483, "step": 24553 }, { "epoch": 0.63, "grad_norm": 3.3158960342407227, "learning_rate": 6.380799666110224e-06, "loss": 0.5565, "step": 24554 }, { "epoch": 0.63, "grad_norm": 4.597177028656006, "learning_rate": 6.380025811812756e-06, "loss": 0.747, "step": 24555 }, { "epoch": 0.63, "grad_norm": 1.0504071712493896, "learning_rate": 6.379251982461874e-06, "loss": 0.4987, "step": 24556 }, { "epoch": 0.63, "grad_norm": 0.9785075783729553, "learning_rate": 6.3784781780629145e-06, "loss": 0.4626, "step": 24557 }, { "epoch": 0.63, "grad_norm": 1.448611855506897, "learning_rate": 6.377704398621206e-06, "loss": 0.5485, "step": 24558 }, { "epoch": 0.63, "grad_norm": 0.9659205079078674, "learning_rate": 6.376930644142083e-06, "loss": 0.5548, "step": 24559 }, { "epoch": 0.63, "grad_norm": 1.3728106021881104, "learning_rate": 6.376156914630881e-06, "loss": 0.5512, "step": 24560 }, { "epoch": 0.63, "grad_norm": 1.2958537340164185, "learning_rate": 6.375383210092926e-06, "loss": 0.619, "step": 24561 }, { "epoch": 0.63, "grad_norm": 1.148253083229065, "learning_rate": 6.374609530533556e-06, "loss": 0.4891, "step": 24562 }, { "epoch": 0.63, "grad_norm": 2.092341661453247, "learning_rate": 6.373835875958094e-06, "loss": 0.5799, "step": 24563 }, { "epoch": 0.63, "grad_norm": 1.3477089405059814, "learning_rate": 6.373062246371879e-06, "loss": 0.5585, "step": 24564 }, { "epoch": 0.63, "grad_norm": 1.6056897640228271, "learning_rate": 6.372288641780243e-06, "loss": 0.6025, "step": 24565 }, { "epoch": 0.63, "grad_norm": 1.0809701681137085, "learning_rate": 6.371515062188511e-06, "loss": 0.5633, "step": 24566 }, { "epoch": 0.63, "grad_norm": 1.233083963394165, "learning_rate": 6.370741507602022e-06, "loss": 0.6349, "step": 24567 }, { "epoch": 0.63, "grad_norm": 1.6049078702926636, "learning_rate": 6.369967978026099e-06, "loss": 0.5188, "step": 24568 }, { "epoch": 0.63, "grad_norm": 1.4661645889282227, "learning_rate": 6.369194473466078e-06, "loss": 0.6012, "step": 24569 }, { "epoch": 0.63, "grad_norm": 1.6747528314590454, "learning_rate": 6.368420993927284e-06, "loss": 0.5164, "step": 24570 }, { "epoch": 0.63, "grad_norm": 1.4907623529434204, "learning_rate": 6.367647539415054e-06, "loss": 0.4856, "step": 24571 }, { "epoch": 0.63, "grad_norm": 1.4623678922653198, "learning_rate": 6.366874109934715e-06, "loss": 0.4837, "step": 24572 }, { "epoch": 0.63, "grad_norm": 2.171293258666992, "learning_rate": 6.366100705491596e-06, "loss": 0.5949, "step": 24573 }, { "epoch": 0.63, "grad_norm": 2.5138192176818848, "learning_rate": 6.365327326091029e-06, "loss": 0.467, "step": 24574 }, { "epoch": 0.63, "grad_norm": 1.5380104780197144, "learning_rate": 6.364553971738341e-06, "loss": 0.5586, "step": 24575 }, { "epoch": 0.63, "grad_norm": 1.2454131841659546, "learning_rate": 6.363780642438864e-06, "loss": 0.5357, "step": 24576 }, { "epoch": 0.63, "grad_norm": 2.496791362762451, "learning_rate": 6.363007338197926e-06, "loss": 0.59, "step": 24577 }, { "epoch": 0.63, "grad_norm": 3.066849946975708, "learning_rate": 6.362234059020857e-06, "loss": 0.5966, "step": 24578 }, { "epoch": 0.63, "grad_norm": 1.6593888998031616, "learning_rate": 6.361460804912987e-06, "loss": 0.5004, "step": 24579 }, { "epoch": 0.63, "grad_norm": 1.059395670890808, "learning_rate": 6.360687575879641e-06, "loss": 0.4327, "step": 24580 }, { "epoch": 0.63, "grad_norm": 6.72149133682251, "learning_rate": 6.359914371926152e-06, "loss": 0.5638, "step": 24581 }, { "epoch": 0.63, "grad_norm": 2.6863017082214355, "learning_rate": 6.359141193057845e-06, "loss": 0.3632, "step": 24582 }, { "epoch": 0.63, "grad_norm": 1.403397798538208, "learning_rate": 6.358368039280048e-06, "loss": 0.5826, "step": 24583 }, { "epoch": 0.63, "grad_norm": 1.5943747758865356, "learning_rate": 6.35759491059809e-06, "loss": 0.4073, "step": 24584 }, { "epoch": 0.63, "grad_norm": 1.6925973892211914, "learning_rate": 6.356821807017304e-06, "loss": 0.6236, "step": 24585 }, { "epoch": 0.63, "grad_norm": 1.61599862575531, "learning_rate": 6.356048728543009e-06, "loss": 0.5075, "step": 24586 }, { "epoch": 0.63, "grad_norm": 11.899872779846191, "learning_rate": 6.355275675180539e-06, "loss": 0.5884, "step": 24587 }, { "epoch": 0.63, "grad_norm": 1.6224344968795776, "learning_rate": 6.354502646935217e-06, "loss": 0.7877, "step": 24588 }, { "epoch": 0.63, "grad_norm": 1.7088638544082642, "learning_rate": 6.353729643812372e-06, "loss": 0.5195, "step": 24589 }, { "epoch": 0.63, "grad_norm": 1.1262786388397217, "learning_rate": 6.352956665817335e-06, "loss": 0.4661, "step": 24590 }, { "epoch": 0.63, "grad_norm": 1.9156006574630737, "learning_rate": 6.352183712955426e-06, "loss": 0.589, "step": 24591 }, { "epoch": 0.63, "grad_norm": 1.329931378364563, "learning_rate": 6.351410785231977e-06, "loss": 0.6418, "step": 24592 }, { "epoch": 0.63, "grad_norm": 1.9782263040542603, "learning_rate": 6.350637882652312e-06, "loss": 0.5413, "step": 24593 }, { "epoch": 0.63, "grad_norm": 2.2319252490997314, "learning_rate": 6.349865005221757e-06, "loss": 0.4102, "step": 24594 }, { "epoch": 0.63, "grad_norm": 1.458642840385437, "learning_rate": 6.349092152945641e-06, "loss": 0.5666, "step": 24595 }, { "epoch": 0.63, "grad_norm": 1.5358707904815674, "learning_rate": 6.348319325829286e-06, "loss": 0.445, "step": 24596 }, { "epoch": 0.63, "grad_norm": 1.4549139738082886, "learning_rate": 6.3475465238780234e-06, "loss": 0.569, "step": 24597 }, { "epoch": 0.63, "grad_norm": 1.5223774909973145, "learning_rate": 6.346773747097172e-06, "loss": 0.4253, "step": 24598 }, { "epoch": 0.63, "grad_norm": 1.3101747035980225, "learning_rate": 6.346000995492063e-06, "loss": 0.4861, "step": 24599 }, { "epoch": 0.63, "grad_norm": 1.3826167583465576, "learning_rate": 6.345228269068018e-06, "loss": 0.66, "step": 24600 }, { "epoch": 0.63, "grad_norm": 2.714289665222168, "learning_rate": 6.344455567830364e-06, "loss": 0.6912, "step": 24601 }, { "epoch": 0.63, "grad_norm": 1.1314406394958496, "learning_rate": 6.343682891784426e-06, "loss": 0.3344, "step": 24602 }, { "epoch": 0.63, "grad_norm": 5.484067440032959, "learning_rate": 6.342910240935528e-06, "loss": 0.7593, "step": 24603 }, { "epoch": 0.63, "grad_norm": 1.6506470441818237, "learning_rate": 6.342137615288997e-06, "loss": 0.5804, "step": 24604 }, { "epoch": 0.63, "grad_norm": 2.8201351165771484, "learning_rate": 6.341365014850153e-06, "loss": 0.6343, "step": 24605 }, { "epoch": 0.63, "grad_norm": 2.2071309089660645, "learning_rate": 6.340592439624323e-06, "loss": 0.6428, "step": 24606 }, { "epoch": 0.63, "grad_norm": 8.236501693725586, "learning_rate": 6.339819889616832e-06, "loss": 0.5998, "step": 24607 }, { "epoch": 0.63, "grad_norm": 2.663764715194702, "learning_rate": 6.339047364832999e-06, "loss": 0.4804, "step": 24608 }, { "epoch": 0.63, "grad_norm": 2.1450319290161133, "learning_rate": 6.338274865278158e-06, "loss": 0.5147, "step": 24609 }, { "epoch": 0.63, "grad_norm": 1.4132119417190552, "learning_rate": 6.337502390957622e-06, "loss": 0.4312, "step": 24610 }, { "epoch": 0.63, "grad_norm": 0.9546657800674438, "learning_rate": 6.336729941876719e-06, "loss": 0.4687, "step": 24611 }, { "epoch": 0.63, "grad_norm": 1.4207295179367065, "learning_rate": 6.335957518040771e-06, "loss": 0.6667, "step": 24612 }, { "epoch": 0.63, "grad_norm": 1.7995996475219727, "learning_rate": 6.335185119455103e-06, "loss": 0.6318, "step": 24613 }, { "epoch": 0.63, "grad_norm": 2.8024563789367676, "learning_rate": 6.3344127461250375e-06, "loss": 0.5654, "step": 24614 }, { "epoch": 0.63, "grad_norm": 1.4358253479003906, "learning_rate": 6.333640398055893e-06, "loss": 0.5068, "step": 24615 }, { "epoch": 0.63, "grad_norm": 2.173025131225586, "learning_rate": 6.3328680752530006e-06, "loss": 0.616, "step": 24616 }, { "epoch": 0.63, "grad_norm": 1.8308745622634888, "learning_rate": 6.332095777721673e-06, "loss": 0.5234, "step": 24617 }, { "epoch": 0.63, "grad_norm": 1.383109450340271, "learning_rate": 6.331323505467238e-06, "loss": 0.6961, "step": 24618 }, { "epoch": 0.63, "grad_norm": 1.6997709274291992, "learning_rate": 6.330551258495018e-06, "loss": 0.6915, "step": 24619 }, { "epoch": 0.63, "grad_norm": 1.5609955787658691, "learning_rate": 6.3297790368103316e-06, "loss": 0.4582, "step": 24620 }, { "epoch": 0.63, "grad_norm": 3.153461456298828, "learning_rate": 6.329006840418505e-06, "loss": 0.5109, "step": 24621 }, { "epoch": 0.63, "grad_norm": 1.142134666442871, "learning_rate": 6.328234669324853e-06, "loss": 0.5678, "step": 24622 }, { "epoch": 0.63, "grad_norm": 1.3379641771316528, "learning_rate": 6.3274625235347045e-06, "loss": 0.5687, "step": 24623 }, { "epoch": 0.63, "grad_norm": 1.2246745824813843, "learning_rate": 6.326690403053375e-06, "loss": 0.6294, "step": 24624 }, { "epoch": 0.63, "grad_norm": 1.4356110095977783, "learning_rate": 6.3259183078861865e-06, "loss": 0.5234, "step": 24625 }, { "epoch": 0.63, "grad_norm": 6.403934955596924, "learning_rate": 6.325146238038462e-06, "loss": 0.4782, "step": 24626 }, { "epoch": 0.63, "grad_norm": 1.0212734937667847, "learning_rate": 6.324374193515521e-06, "loss": 0.4367, "step": 24627 }, { "epoch": 0.63, "grad_norm": 3.4478871822357178, "learning_rate": 6.323602174322685e-06, "loss": 0.5453, "step": 24628 }, { "epoch": 0.63, "grad_norm": 2.1076271533966064, "learning_rate": 6.322830180465271e-06, "loss": 0.8261, "step": 24629 }, { "epoch": 0.63, "grad_norm": 1.6504449844360352, "learning_rate": 6.322058211948601e-06, "loss": 0.4344, "step": 24630 }, { "epoch": 0.63, "grad_norm": 1.5552743673324585, "learning_rate": 6.321286268777996e-06, "loss": 0.5176, "step": 24631 }, { "epoch": 0.63, "grad_norm": 1.526718258857727, "learning_rate": 6.320514350958775e-06, "loss": 0.5825, "step": 24632 }, { "epoch": 0.63, "grad_norm": 3.2315738201141357, "learning_rate": 6.319742458496258e-06, "loss": 0.6079, "step": 24633 }, { "epoch": 0.63, "grad_norm": 1.7514911890029907, "learning_rate": 6.318970591395762e-06, "loss": 0.4533, "step": 24634 }, { "epoch": 0.63, "grad_norm": 1.0299485921859741, "learning_rate": 6.318198749662609e-06, "loss": 0.5719, "step": 24635 }, { "epoch": 0.63, "grad_norm": 5.916042804718018, "learning_rate": 6.317426933302118e-06, "loss": 0.441, "step": 24636 }, { "epoch": 0.63, "grad_norm": 1.4665672779083252, "learning_rate": 6.316655142319605e-06, "loss": 0.575, "step": 24637 }, { "epoch": 0.63, "grad_norm": 4.997828006744385, "learning_rate": 6.315883376720393e-06, "loss": 0.6948, "step": 24638 }, { "epoch": 0.63, "grad_norm": 1.540384292602539, "learning_rate": 6.315111636509796e-06, "loss": 0.5525, "step": 24639 }, { "epoch": 0.63, "grad_norm": 1.481048583984375, "learning_rate": 6.3143399216931365e-06, "loss": 0.5378, "step": 24640 }, { "epoch": 0.63, "grad_norm": 1.0005167722702026, "learning_rate": 6.313568232275729e-06, "loss": 0.4708, "step": 24641 }, { "epoch": 0.63, "grad_norm": 3.0822598934173584, "learning_rate": 6.312796568262893e-06, "loss": 0.5377, "step": 24642 }, { "epoch": 0.63, "grad_norm": 1.2461141347885132, "learning_rate": 6.312024929659948e-06, "loss": 0.4326, "step": 24643 }, { "epoch": 0.63, "grad_norm": 1.7565597295761108, "learning_rate": 6.311253316472209e-06, "loss": 0.6199, "step": 24644 }, { "epoch": 0.63, "grad_norm": 2.283987283706665, "learning_rate": 6.3104817287049954e-06, "loss": 0.687, "step": 24645 }, { "epoch": 0.63, "grad_norm": 1.2256637811660767, "learning_rate": 6.3097101663636225e-06, "loss": 0.6411, "step": 24646 }, { "epoch": 0.63, "grad_norm": 2.5677733421325684, "learning_rate": 6.308938629453409e-06, "loss": 0.7124, "step": 24647 }, { "epoch": 0.63, "grad_norm": 1.4845753908157349, "learning_rate": 6.308167117979674e-06, "loss": 0.4683, "step": 24648 }, { "epoch": 0.63, "grad_norm": 2.9245002269744873, "learning_rate": 6.307395631947731e-06, "loss": 0.6011, "step": 24649 }, { "epoch": 0.63, "grad_norm": 1.4602216482162476, "learning_rate": 6.3066241713628965e-06, "loss": 0.68, "step": 24650 }, { "epoch": 0.63, "grad_norm": 8.652697563171387, "learning_rate": 6.305852736230487e-06, "loss": 0.6965, "step": 24651 }, { "epoch": 0.63, "grad_norm": 2.280790328979492, "learning_rate": 6.305081326555821e-06, "loss": 0.5871, "step": 24652 }, { "epoch": 0.63, "grad_norm": 1.6070226430892944, "learning_rate": 6.304309942344211e-06, "loss": 0.6101, "step": 24653 }, { "epoch": 0.63, "grad_norm": 1.0953246355056763, "learning_rate": 6.303538583600976e-06, "loss": 0.4022, "step": 24654 }, { "epoch": 0.63, "grad_norm": 1.162656307220459, "learning_rate": 6.302767250331432e-06, "loss": 0.5745, "step": 24655 }, { "epoch": 0.63, "grad_norm": 1.592862844467163, "learning_rate": 6.301995942540891e-06, "loss": 0.5856, "step": 24656 }, { "epoch": 0.63, "grad_norm": 1.863356351852417, "learning_rate": 6.301224660234675e-06, "loss": 0.3332, "step": 24657 }, { "epoch": 0.63, "grad_norm": 1.6764181852340698, "learning_rate": 6.30045340341809e-06, "loss": 0.6083, "step": 24658 }, { "epoch": 0.63, "grad_norm": 1.4625065326690674, "learning_rate": 6.299682172096458e-06, "loss": 0.5748, "step": 24659 }, { "epoch": 0.63, "grad_norm": 0.7979446053504944, "learning_rate": 6.298910966275091e-06, "loss": 0.4487, "step": 24660 }, { "epoch": 0.63, "grad_norm": 2.732895612716675, "learning_rate": 6.298139785959305e-06, "loss": 0.7258, "step": 24661 }, { "epoch": 0.63, "grad_norm": 1.6921502351760864, "learning_rate": 6.297368631154414e-06, "loss": 0.5879, "step": 24662 }, { "epoch": 0.63, "grad_norm": 9.129425048828125, "learning_rate": 6.296597501865731e-06, "loss": 0.6177, "step": 24663 }, { "epoch": 0.63, "grad_norm": 11.143539428710938, "learning_rate": 6.2958263980985745e-06, "loss": 0.7246, "step": 24664 }, { "epoch": 0.63, "grad_norm": 1.7564107179641724, "learning_rate": 6.295055319858252e-06, "loss": 0.703, "step": 24665 }, { "epoch": 0.63, "grad_norm": 1.8319039344787598, "learning_rate": 6.2942842671500805e-06, "loss": 0.5756, "step": 24666 }, { "epoch": 0.63, "grad_norm": 2.3160035610198975, "learning_rate": 6.293513239979376e-06, "loss": 0.5797, "step": 24667 }, { "epoch": 0.63, "grad_norm": 1.6701321601867676, "learning_rate": 6.292742238351447e-06, "loss": 0.4977, "step": 24668 }, { "epoch": 0.63, "grad_norm": 2.2890913486480713, "learning_rate": 6.291971262271612e-06, "loss": 0.536, "step": 24669 }, { "epoch": 0.63, "grad_norm": 1.4571442604064941, "learning_rate": 6.291200311745179e-06, "loss": 0.6073, "step": 24670 }, { "epoch": 0.63, "grad_norm": 2.3091516494750977, "learning_rate": 6.290429386777463e-06, "loss": 0.466, "step": 24671 }, { "epoch": 0.63, "grad_norm": 4.29983377456665, "learning_rate": 6.2896584873737795e-06, "loss": 0.5197, "step": 24672 }, { "epoch": 0.63, "grad_norm": 1.6462836265563965, "learning_rate": 6.288887613539438e-06, "loss": 0.5078, "step": 24673 }, { "epoch": 0.63, "grad_norm": 1.080558180809021, "learning_rate": 6.288116765279753e-06, "loss": 0.453, "step": 24674 }, { "epoch": 0.63, "grad_norm": 5.619462490081787, "learning_rate": 6.287345942600031e-06, "loss": 0.4965, "step": 24675 }, { "epoch": 0.63, "grad_norm": 9.272526741027832, "learning_rate": 6.286575145505592e-06, "loss": 0.4596, "step": 24676 }, { "epoch": 0.63, "grad_norm": 1.1946650743484497, "learning_rate": 6.285804374001742e-06, "loss": 0.6, "step": 24677 }, { "epoch": 0.63, "grad_norm": 4.3056559562683105, "learning_rate": 6.285033628093795e-06, "loss": 0.5213, "step": 24678 }, { "epoch": 0.63, "grad_norm": 1.5406596660614014, "learning_rate": 6.284262907787064e-06, "loss": 0.5495, "step": 24679 }, { "epoch": 0.63, "grad_norm": 6.556835174560547, "learning_rate": 6.283492213086857e-06, "loss": 0.6685, "step": 24680 }, { "epoch": 0.63, "grad_norm": 9.542814254760742, "learning_rate": 6.282721543998488e-06, "loss": 0.6664, "step": 24681 }, { "epoch": 0.63, "grad_norm": 1.7290486097335815, "learning_rate": 6.281950900527265e-06, "loss": 0.4844, "step": 24682 }, { "epoch": 0.63, "grad_norm": 2.5774126052856445, "learning_rate": 6.281180282678501e-06, "loss": 0.6428, "step": 24683 }, { "epoch": 0.63, "grad_norm": 1.1371440887451172, "learning_rate": 6.280409690457507e-06, "loss": 0.5411, "step": 24684 }, { "epoch": 0.63, "grad_norm": 1.137556791305542, "learning_rate": 6.279639123869591e-06, "loss": 0.4292, "step": 24685 }, { "epoch": 0.63, "grad_norm": 1.3257557153701782, "learning_rate": 6.2788685829200666e-06, "loss": 0.5863, "step": 24686 }, { "epoch": 0.63, "grad_norm": 14.933858871459961, "learning_rate": 6.27809806761424e-06, "loss": 0.6172, "step": 24687 }, { "epoch": 0.63, "grad_norm": 2.388962745666504, "learning_rate": 6.277327577957424e-06, "loss": 0.5105, "step": 24688 }, { "epoch": 0.63, "grad_norm": 0.941655695438385, "learning_rate": 6.276557113954929e-06, "loss": 0.4181, "step": 24689 }, { "epoch": 0.63, "grad_norm": 1.2305362224578857, "learning_rate": 6.275786675612061e-06, "loss": 0.5529, "step": 24690 }, { "epoch": 0.63, "grad_norm": 1.4581888914108276, "learning_rate": 6.275016262934135e-06, "loss": 0.4909, "step": 24691 }, { "epoch": 0.63, "grad_norm": 1.740175485610962, "learning_rate": 6.274245875926452e-06, "loss": 0.5877, "step": 24692 }, { "epoch": 0.63, "grad_norm": 1.154309630393982, "learning_rate": 6.273475514594329e-06, "loss": 0.5482, "step": 24693 }, { "epoch": 0.63, "grad_norm": 1.7694472074508667, "learning_rate": 6.2727051789430704e-06, "loss": 0.5393, "step": 24694 }, { "epoch": 0.63, "grad_norm": 1.593438744544983, "learning_rate": 6.271934868977985e-06, "loss": 0.5687, "step": 24695 }, { "epoch": 0.63, "grad_norm": 1.4581074714660645, "learning_rate": 6.271164584704384e-06, "loss": 0.5429, "step": 24696 }, { "epoch": 0.63, "grad_norm": 1.1540881395339966, "learning_rate": 6.270394326127571e-06, "loss": 0.4642, "step": 24697 }, { "epoch": 0.63, "grad_norm": 0.8964442610740662, "learning_rate": 6.2696240932528615e-06, "loss": 0.5474, "step": 24698 }, { "epoch": 0.63, "grad_norm": 2.931955575942993, "learning_rate": 6.268853886085556e-06, "loss": 0.543, "step": 24699 }, { "epoch": 0.63, "grad_norm": 2.9935147762298584, "learning_rate": 6.268083704630965e-06, "loss": 0.6427, "step": 24700 }, { "epoch": 0.63, "grad_norm": 3.486088514328003, "learning_rate": 6.2673135488944e-06, "loss": 0.6612, "step": 24701 }, { "epoch": 0.63, "grad_norm": 1.3651100397109985, "learning_rate": 6.266543418881161e-06, "loss": 0.483, "step": 24702 }, { "epoch": 0.63, "grad_norm": 3.82643723487854, "learning_rate": 6.265773314596562e-06, "loss": 0.5027, "step": 24703 }, { "epoch": 0.63, "grad_norm": 2.0114166736602783, "learning_rate": 6.265003236045905e-06, "loss": 0.7686, "step": 24704 }, { "epoch": 0.63, "grad_norm": 1.8655434846878052, "learning_rate": 6.264233183234503e-06, "loss": 0.6789, "step": 24705 }, { "epoch": 0.63, "grad_norm": 8.288745880126953, "learning_rate": 6.263463156167655e-06, "loss": 0.802, "step": 24706 }, { "epoch": 0.63, "grad_norm": 3.489015817642212, "learning_rate": 6.262693154850671e-06, "loss": 0.5689, "step": 24707 }, { "epoch": 0.63, "grad_norm": 2.1818859577178955, "learning_rate": 6.26192317928886e-06, "loss": 0.4884, "step": 24708 }, { "epoch": 0.63, "grad_norm": 1.7989665269851685, "learning_rate": 6.261153229487524e-06, "loss": 0.6426, "step": 24709 }, { "epoch": 0.63, "grad_norm": 1.9356062412261963, "learning_rate": 6.2603833054519735e-06, "loss": 0.4708, "step": 24710 }, { "epoch": 0.63, "grad_norm": 1.415217399597168, "learning_rate": 6.2596134071875104e-06, "loss": 0.5002, "step": 24711 }, { "epoch": 0.63, "grad_norm": 10.01988410949707, "learning_rate": 6.2588435346994415e-06, "loss": 0.565, "step": 24712 }, { "epoch": 0.63, "grad_norm": 1.8967713117599487, "learning_rate": 6.2580736879930735e-06, "loss": 0.3982, "step": 24713 }, { "epoch": 0.63, "grad_norm": 1.2480953931808472, "learning_rate": 6.25730386707371e-06, "loss": 0.3649, "step": 24714 }, { "epoch": 0.63, "grad_norm": 2.3008604049682617, "learning_rate": 6.256534071946658e-06, "loss": 0.5465, "step": 24715 }, { "epoch": 0.63, "grad_norm": 6.518881797790527, "learning_rate": 6.255764302617219e-06, "loss": 0.6458, "step": 24716 }, { "epoch": 0.63, "grad_norm": 1.4076699018478394, "learning_rate": 6.2549945590907025e-06, "loss": 0.6714, "step": 24717 }, { "epoch": 0.63, "grad_norm": 1.2521156072616577, "learning_rate": 6.254224841372409e-06, "loss": 0.5136, "step": 24718 }, { "epoch": 0.63, "grad_norm": 1.4107433557510376, "learning_rate": 6.2534551494676445e-06, "loss": 0.4804, "step": 24719 }, { "epoch": 0.63, "grad_norm": 1.7690095901489258, "learning_rate": 6.252685483381715e-06, "loss": 0.4645, "step": 24720 }, { "epoch": 0.63, "grad_norm": 5.2015814781188965, "learning_rate": 6.25191584311992e-06, "loss": 0.6588, "step": 24721 }, { "epoch": 0.63, "grad_norm": 1.6970850229263306, "learning_rate": 6.25114622868757e-06, "loss": 0.5545, "step": 24722 }, { "epoch": 0.63, "grad_norm": 4.142518997192383, "learning_rate": 6.250376640089961e-06, "loss": 0.4778, "step": 24723 }, { "epoch": 0.63, "grad_norm": 1.6001861095428467, "learning_rate": 6.249607077332402e-06, "loss": 0.6134, "step": 24724 }, { "epoch": 0.63, "grad_norm": 1.5523706674575806, "learning_rate": 6.2488375404201965e-06, "loss": 0.6415, "step": 24725 }, { "epoch": 0.63, "grad_norm": 1.6817054748535156, "learning_rate": 6.248068029358643e-06, "loss": 0.6801, "step": 24726 }, { "epoch": 0.63, "grad_norm": 3.4577457904815674, "learning_rate": 6.247298544153051e-06, "loss": 0.5886, "step": 24727 }, { "epoch": 0.63, "grad_norm": 1.241406798362732, "learning_rate": 6.246529084808717e-06, "loss": 0.6061, "step": 24728 }, { "epoch": 0.63, "grad_norm": 4.812157154083252, "learning_rate": 6.245759651330948e-06, "loss": 0.6747, "step": 24729 }, { "epoch": 0.63, "grad_norm": 1.6722626686096191, "learning_rate": 6.244990243725044e-06, "loss": 0.546, "step": 24730 }, { "epoch": 0.63, "grad_norm": 1.5256547927856445, "learning_rate": 6.244220861996306e-06, "loss": 0.567, "step": 24731 }, { "epoch": 0.63, "grad_norm": 1.9050843715667725, "learning_rate": 6.243451506150042e-06, "loss": 0.677, "step": 24732 }, { "epoch": 0.63, "grad_norm": 1.728217363357544, "learning_rate": 6.242682176191548e-06, "loss": 0.5761, "step": 24733 }, { "epoch": 0.63, "grad_norm": 3.114123821258545, "learning_rate": 6.24191287212613e-06, "loss": 0.5579, "step": 24734 }, { "epoch": 0.63, "grad_norm": 3.5924172401428223, "learning_rate": 6.241143593959085e-06, "loss": 0.5146, "step": 24735 }, { "epoch": 0.63, "grad_norm": 7.670954704284668, "learning_rate": 6.240374341695716e-06, "loss": 0.5028, "step": 24736 }, { "epoch": 0.63, "grad_norm": 1.9743038415908813, "learning_rate": 6.2396051153413276e-06, "loss": 0.7622, "step": 24737 }, { "epoch": 0.63, "grad_norm": 1.7187515497207642, "learning_rate": 6.238835914901217e-06, "loss": 0.6465, "step": 24738 }, { "epoch": 0.63, "grad_norm": 2.7655115127563477, "learning_rate": 6.238066740380687e-06, "loss": 0.5286, "step": 24739 }, { "epoch": 0.63, "grad_norm": 1.5630933046340942, "learning_rate": 6.237297591785037e-06, "loss": 0.5612, "step": 24740 }, { "epoch": 0.63, "grad_norm": 1.8700367212295532, "learning_rate": 6.236528469119567e-06, "loss": 0.5548, "step": 24741 }, { "epoch": 0.63, "grad_norm": 1.2326947450637817, "learning_rate": 6.23575937238958e-06, "loss": 0.5764, "step": 24742 }, { "epoch": 0.63, "grad_norm": 1.2854621410369873, "learning_rate": 6.2349903016003734e-06, "loss": 0.5729, "step": 24743 }, { "epoch": 0.63, "grad_norm": 1.019098162651062, "learning_rate": 6.23422125675725e-06, "loss": 0.4133, "step": 24744 }, { "epoch": 0.63, "grad_norm": 1.513750672340393, "learning_rate": 6.233452237865505e-06, "loss": 0.5925, "step": 24745 }, { "epoch": 0.63, "grad_norm": 1.2823686599731445, "learning_rate": 6.232683244930444e-06, "loss": 0.5536, "step": 24746 }, { "epoch": 0.63, "grad_norm": 2.3997883796691895, "learning_rate": 6.23191427795736e-06, "loss": 0.5108, "step": 24747 }, { "epoch": 0.63, "grad_norm": 1.346081018447876, "learning_rate": 6.2311453369515565e-06, "loss": 0.4972, "step": 24748 }, { "epoch": 0.63, "grad_norm": 1.7733683586120605, "learning_rate": 6.230376421918334e-06, "loss": 0.491, "step": 24749 }, { "epoch": 0.63, "grad_norm": 2.005375385284424, "learning_rate": 6.229607532862985e-06, "loss": 0.7146, "step": 24750 }, { "epoch": 0.63, "grad_norm": 2.093458652496338, "learning_rate": 6.228838669790815e-06, "loss": 0.7082, "step": 24751 }, { "epoch": 0.63, "grad_norm": 0.9962294101715088, "learning_rate": 6.228069832707119e-06, "loss": 0.4437, "step": 24752 }, { "epoch": 0.63, "grad_norm": 1.8990613222122192, "learning_rate": 6.227301021617194e-06, "loss": 0.6349, "step": 24753 }, { "epoch": 0.63, "grad_norm": 4.509775638580322, "learning_rate": 6.226532236526344e-06, "loss": 0.7828, "step": 24754 }, { "epoch": 0.63, "grad_norm": 1.6923896074295044, "learning_rate": 6.225763477439859e-06, "loss": 0.6134, "step": 24755 }, { "epoch": 0.63, "grad_norm": 2.1336185932159424, "learning_rate": 6.224994744363044e-06, "loss": 0.4309, "step": 24756 }, { "epoch": 0.63, "grad_norm": 1.2395548820495605, "learning_rate": 6.224226037301191e-06, "loss": 0.5839, "step": 24757 }, { "epoch": 0.63, "grad_norm": 2.1060497760772705, "learning_rate": 6.223457356259603e-06, "loss": 0.6389, "step": 24758 }, { "epoch": 0.63, "grad_norm": 3.025965929031372, "learning_rate": 6.222688701243571e-06, "loss": 0.5114, "step": 24759 }, { "epoch": 0.63, "grad_norm": 1.5255883932113647, "learning_rate": 6.221920072258396e-06, "loss": 0.5597, "step": 24760 }, { "epoch": 0.63, "grad_norm": 1.2336310148239136, "learning_rate": 6.221151469309377e-06, "loss": 0.6089, "step": 24761 }, { "epoch": 0.63, "grad_norm": 1.6010704040527344, "learning_rate": 6.220382892401804e-06, "loss": 0.4391, "step": 24762 }, { "epoch": 0.63, "grad_norm": 1.4428589344024658, "learning_rate": 6.219614341540981e-06, "loss": 0.5979, "step": 24763 }, { "epoch": 0.63, "grad_norm": 2.088412284851074, "learning_rate": 6.2188458167322e-06, "loss": 0.5728, "step": 24764 }, { "epoch": 0.63, "grad_norm": 4.76185417175293, "learning_rate": 6.218077317980755e-06, "loss": 0.5213, "step": 24765 }, { "epoch": 0.63, "grad_norm": 1.3502604961395264, "learning_rate": 6.21730884529195e-06, "loss": 0.4695, "step": 24766 }, { "epoch": 0.63, "grad_norm": 1.4848878383636475, "learning_rate": 6.216540398671072e-06, "loss": 0.5773, "step": 24767 }, { "epoch": 0.63, "grad_norm": 1.2707786560058594, "learning_rate": 6.215771978123424e-06, "loss": 0.7097, "step": 24768 }, { "epoch": 0.63, "grad_norm": 2.1422972679138184, "learning_rate": 6.215003583654295e-06, "loss": 0.6256, "step": 24769 }, { "epoch": 0.63, "grad_norm": 1.5930778980255127, "learning_rate": 6.214235215268985e-06, "loss": 0.6429, "step": 24770 }, { "epoch": 0.63, "grad_norm": 1.2251702547073364, "learning_rate": 6.213466872972787e-06, "loss": 0.673, "step": 24771 }, { "epoch": 0.63, "grad_norm": 3.980616807937622, "learning_rate": 6.212698556770995e-06, "loss": 0.4875, "step": 24772 }, { "epoch": 0.63, "grad_norm": 1.6275410652160645, "learning_rate": 6.211930266668906e-06, "loss": 0.532, "step": 24773 }, { "epoch": 0.63, "grad_norm": 1.7252223491668701, "learning_rate": 6.211162002671813e-06, "loss": 0.6522, "step": 24774 }, { "epoch": 0.63, "grad_norm": 1.3248438835144043, "learning_rate": 6.210393764785015e-06, "loss": 0.4161, "step": 24775 }, { "epoch": 0.64, "grad_norm": 5.278762340545654, "learning_rate": 6.209625553013799e-06, "loss": 0.7293, "step": 24776 }, { "epoch": 0.64, "grad_norm": 1.9456549882888794, "learning_rate": 6.208857367363461e-06, "loss": 0.7534, "step": 24777 }, { "epoch": 0.64, "grad_norm": 2.403059720993042, "learning_rate": 6.2080892078393e-06, "loss": 0.5536, "step": 24778 }, { "epoch": 0.64, "grad_norm": 1.9086464643478394, "learning_rate": 6.207321074446601e-06, "loss": 0.8081, "step": 24779 }, { "epoch": 0.64, "grad_norm": 1.130321979522705, "learning_rate": 6.206552967190667e-06, "loss": 0.6508, "step": 24780 }, { "epoch": 0.64, "grad_norm": 1.520521879196167, "learning_rate": 6.205784886076785e-06, "loss": 0.4097, "step": 24781 }, { "epoch": 0.64, "grad_norm": 1.5916392803192139, "learning_rate": 6.205016831110249e-06, "loss": 0.589, "step": 24782 }, { "epoch": 0.64, "grad_norm": 4.033803939819336, "learning_rate": 6.204248802296354e-06, "loss": 0.5305, "step": 24783 }, { "epoch": 0.64, "grad_norm": 2.62614107131958, "learning_rate": 6.203480799640391e-06, "loss": 0.7273, "step": 24784 }, { "epoch": 0.64, "grad_norm": 1.9275898933410645, "learning_rate": 6.202712823147654e-06, "loss": 0.5496, "step": 24785 }, { "epoch": 0.64, "grad_norm": 0.8503903746604919, "learning_rate": 6.201944872823433e-06, "loss": 0.4799, "step": 24786 }, { "epoch": 0.64, "grad_norm": 1.133830189704895, "learning_rate": 6.201176948673023e-06, "loss": 0.5151, "step": 24787 }, { "epoch": 0.64, "grad_norm": 17.458372116088867, "learning_rate": 6.200409050701715e-06, "loss": 0.6029, "step": 24788 }, { "epoch": 0.64, "grad_norm": 1.509771704673767, "learning_rate": 6.199641178914797e-06, "loss": 0.5669, "step": 24789 }, { "epoch": 0.64, "grad_norm": 4.043368816375732, "learning_rate": 6.19887333331757e-06, "loss": 0.8656, "step": 24790 }, { "epoch": 0.64, "grad_norm": 1.745298981666565, "learning_rate": 6.198105513915316e-06, "loss": 0.5888, "step": 24791 }, { "epoch": 0.64, "grad_norm": 1.1063921451568604, "learning_rate": 6.197337720713333e-06, "loss": 0.5285, "step": 24792 }, { "epoch": 0.64, "grad_norm": 7.3337812423706055, "learning_rate": 6.1965699537169065e-06, "loss": 0.4548, "step": 24793 }, { "epoch": 0.64, "grad_norm": 1.4050976037979126, "learning_rate": 6.19580221293133e-06, "loss": 0.4507, "step": 24794 }, { "epoch": 0.64, "grad_norm": 1.9860464334487915, "learning_rate": 6.195034498361898e-06, "loss": 0.583, "step": 24795 }, { "epoch": 0.64, "grad_norm": 1.4475764036178589, "learning_rate": 6.1942668100138945e-06, "loss": 0.5087, "step": 24796 }, { "epoch": 0.64, "grad_norm": 12.299765586853027, "learning_rate": 6.1934991478926165e-06, "loss": 0.71, "step": 24797 }, { "epoch": 0.64, "grad_norm": 3.960374116897583, "learning_rate": 6.1927315120033475e-06, "loss": 0.5552, "step": 24798 }, { "epoch": 0.64, "grad_norm": 1.8584954738616943, "learning_rate": 6.191963902351383e-06, "loss": 0.4695, "step": 24799 }, { "epoch": 0.64, "grad_norm": 8.422621726989746, "learning_rate": 6.19119631894201e-06, "loss": 0.5686, "step": 24800 }, { "epoch": 0.64, "grad_norm": 1.4304368495941162, "learning_rate": 6.1904287617805185e-06, "loss": 0.5762, "step": 24801 }, { "epoch": 0.64, "grad_norm": 6.293344974517822, "learning_rate": 6.189661230872201e-06, "loss": 0.5433, "step": 24802 }, { "epoch": 0.64, "grad_norm": 2.91115140914917, "learning_rate": 6.188893726222342e-06, "loss": 0.5518, "step": 24803 }, { "epoch": 0.64, "grad_norm": 1.3576260805130005, "learning_rate": 6.188126247836235e-06, "loss": 0.4509, "step": 24804 }, { "epoch": 0.64, "grad_norm": 2.957106113433838, "learning_rate": 6.187358795719165e-06, "loss": 0.6315, "step": 24805 }, { "epoch": 0.64, "grad_norm": 1.468799114227295, "learning_rate": 6.186591369876422e-06, "loss": 0.5912, "step": 24806 }, { "epoch": 0.64, "grad_norm": 1.2475509643554688, "learning_rate": 6.185823970313299e-06, "loss": 0.572, "step": 24807 }, { "epoch": 0.64, "grad_norm": 1.7333606481552124, "learning_rate": 6.1850565970350785e-06, "loss": 0.7645, "step": 24808 }, { "epoch": 0.64, "grad_norm": 1.1008589267730713, "learning_rate": 6.184289250047053e-06, "loss": 0.5085, "step": 24809 }, { "epoch": 0.64, "grad_norm": 1.4240740537643433, "learning_rate": 6.1835219293545066e-06, "loss": 0.5336, "step": 24810 }, { "epoch": 0.64, "grad_norm": 1.5934162139892578, "learning_rate": 6.182754634962731e-06, "loss": 0.5018, "step": 24811 }, { "epoch": 0.64, "grad_norm": 1.6079021692276, "learning_rate": 6.1819873668770115e-06, "loss": 0.5753, "step": 24812 }, { "epoch": 0.64, "grad_norm": 2.0033528804779053, "learning_rate": 6.181220125102635e-06, "loss": 0.6302, "step": 24813 }, { "epoch": 0.64, "grad_norm": 1.8756531476974487, "learning_rate": 6.1804529096448925e-06, "loss": 0.5135, "step": 24814 }, { "epoch": 0.64, "grad_norm": 1.273762583732605, "learning_rate": 6.179685720509068e-06, "loss": 0.5894, "step": 24815 }, { "epoch": 0.64, "grad_norm": 1.3933324813842773, "learning_rate": 6.178918557700449e-06, "loss": 0.4555, "step": 24816 }, { "epoch": 0.64, "grad_norm": 1.4680192470550537, "learning_rate": 6.178151421224323e-06, "loss": 0.4605, "step": 24817 }, { "epoch": 0.64, "grad_norm": 1.5366582870483398, "learning_rate": 6.1773843110859746e-06, "loss": 0.5985, "step": 24818 }, { "epoch": 0.64, "grad_norm": 13.378497123718262, "learning_rate": 6.176617227290695e-06, "loss": 0.5227, "step": 24819 }, { "epoch": 0.64, "grad_norm": 1.089959740638733, "learning_rate": 6.175850169843765e-06, "loss": 0.4952, "step": 24820 }, { "epoch": 0.64, "grad_norm": 1.3155150413513184, "learning_rate": 6.1750831387504754e-06, "loss": 0.5632, "step": 24821 }, { "epoch": 0.64, "grad_norm": 1.75645911693573, "learning_rate": 6.174316134016107e-06, "loss": 0.522, "step": 24822 }, { "epoch": 0.64, "grad_norm": 1.559736967086792, "learning_rate": 6.1735491556459515e-06, "loss": 0.549, "step": 24823 }, { "epoch": 0.64, "grad_norm": 1.9644030332565308, "learning_rate": 6.17278220364529e-06, "loss": 0.5437, "step": 24824 }, { "epoch": 0.64, "grad_norm": 6.907244682312012, "learning_rate": 6.17201527801941e-06, "loss": 0.6745, "step": 24825 }, { "epoch": 0.64, "grad_norm": 6.709618091583252, "learning_rate": 6.171248378773593e-06, "loss": 0.5856, "step": 24826 }, { "epoch": 0.64, "grad_norm": 9.044189453125, "learning_rate": 6.170481505913128e-06, "loss": 0.5252, "step": 24827 }, { "epoch": 0.64, "grad_norm": 1.381524682044983, "learning_rate": 6.1697146594432976e-06, "loss": 0.3247, "step": 24828 }, { "epoch": 0.64, "grad_norm": 1.3470993041992188, "learning_rate": 6.16894783936939e-06, "loss": 0.5395, "step": 24829 }, { "epoch": 0.64, "grad_norm": 1.510223388671875, "learning_rate": 6.168181045696684e-06, "loss": 0.4064, "step": 24830 }, { "epoch": 0.64, "grad_norm": 4.088507175445557, "learning_rate": 6.167414278430468e-06, "loss": 0.405, "step": 24831 }, { "epoch": 0.64, "grad_norm": 1.1923911571502686, "learning_rate": 6.166647537576025e-06, "loss": 0.532, "step": 24832 }, { "epoch": 0.64, "grad_norm": 7.559825897216797, "learning_rate": 6.165880823138638e-06, "loss": 0.5295, "step": 24833 }, { "epoch": 0.64, "grad_norm": 5.3619537353515625, "learning_rate": 6.165114135123593e-06, "loss": 0.7836, "step": 24834 }, { "epoch": 0.64, "grad_norm": 1.2746703624725342, "learning_rate": 6.164347473536171e-06, "loss": 0.4801, "step": 24835 }, { "epoch": 0.64, "grad_norm": 1.0013753175735474, "learning_rate": 6.163580838381655e-06, "loss": 0.4436, "step": 24836 }, { "epoch": 0.64, "grad_norm": 1.080325961112976, "learning_rate": 6.162814229665333e-06, "loss": 0.5527, "step": 24837 }, { "epoch": 0.64, "grad_norm": 1.2965461015701294, "learning_rate": 6.162047647392481e-06, "loss": 0.5246, "step": 24838 }, { "epoch": 0.64, "grad_norm": 1.88127863407135, "learning_rate": 6.1612810915683895e-06, "loss": 0.4392, "step": 24839 }, { "epoch": 0.64, "grad_norm": 4.339995384216309, "learning_rate": 6.160514562198334e-06, "loss": 0.6266, "step": 24840 }, { "epoch": 0.64, "grad_norm": 2.9437708854675293, "learning_rate": 6.159748059287602e-06, "loss": 0.498, "step": 24841 }, { "epoch": 0.64, "grad_norm": 1.264845609664917, "learning_rate": 6.1589815828414715e-06, "loss": 0.5355, "step": 24842 }, { "epoch": 0.64, "grad_norm": 13.352984428405762, "learning_rate": 6.158215132865226e-06, "loss": 0.5962, "step": 24843 }, { "epoch": 0.64, "grad_norm": 1.1642804145812988, "learning_rate": 6.157448709364151e-06, "loss": 0.4322, "step": 24844 }, { "epoch": 0.64, "grad_norm": 4.9302263259887695, "learning_rate": 6.156682312343524e-06, "loss": 0.6875, "step": 24845 }, { "epoch": 0.64, "grad_norm": 1.264941692352295, "learning_rate": 6.155915941808629e-06, "loss": 0.4389, "step": 24846 }, { "epoch": 0.64, "grad_norm": 1.2954230308532715, "learning_rate": 6.1551495977647445e-06, "loss": 0.38, "step": 24847 }, { "epoch": 0.64, "grad_norm": 1.5019664764404297, "learning_rate": 6.154383280217153e-06, "loss": 0.6097, "step": 24848 }, { "epoch": 0.64, "grad_norm": 1.0326108932495117, "learning_rate": 6.153616989171138e-06, "loss": 0.4397, "step": 24849 }, { "epoch": 0.64, "grad_norm": 1.614372730255127, "learning_rate": 6.152850724631977e-06, "loss": 0.6761, "step": 24850 }, { "epoch": 0.64, "grad_norm": 0.9223507046699524, "learning_rate": 6.152084486604952e-06, "loss": 0.5236, "step": 24851 }, { "epoch": 0.64, "grad_norm": 2.4769656658172607, "learning_rate": 6.151318275095344e-06, "loss": 0.6253, "step": 24852 }, { "epoch": 0.64, "grad_norm": 3.4358017444610596, "learning_rate": 6.150552090108433e-06, "loss": 0.6059, "step": 24853 }, { "epoch": 0.64, "grad_norm": 9.746894836425781, "learning_rate": 6.149785931649495e-06, "loss": 0.7026, "step": 24854 }, { "epoch": 0.64, "grad_norm": 1.6638262271881104, "learning_rate": 6.149019799723815e-06, "loss": 0.5361, "step": 24855 }, { "epoch": 0.64, "grad_norm": 1.0697436332702637, "learning_rate": 6.148253694336674e-06, "loss": 0.4742, "step": 24856 }, { "epoch": 0.64, "grad_norm": 6.10417366027832, "learning_rate": 6.147487615493345e-06, "loss": 0.6045, "step": 24857 }, { "epoch": 0.64, "grad_norm": 9.975382804870605, "learning_rate": 6.146721563199114e-06, "loss": 0.4976, "step": 24858 }, { "epoch": 0.64, "grad_norm": 2.3594727516174316, "learning_rate": 6.145955537459254e-06, "loss": 0.4635, "step": 24859 }, { "epoch": 0.64, "grad_norm": 1.2712960243225098, "learning_rate": 6.145189538279048e-06, "loss": 0.4757, "step": 24860 }, { "epoch": 0.64, "grad_norm": 11.818902969360352, "learning_rate": 6.1444235656637755e-06, "loss": 0.4925, "step": 24861 }, { "epoch": 0.64, "grad_norm": 2.9076037406921387, "learning_rate": 6.1436576196187125e-06, "loss": 0.6653, "step": 24862 }, { "epoch": 0.64, "grad_norm": 2.32625412940979, "learning_rate": 6.142891700149138e-06, "loss": 0.6012, "step": 24863 }, { "epoch": 0.64, "grad_norm": 1.368453025817871, "learning_rate": 6.142125807260331e-06, "loss": 0.5395, "step": 24864 }, { "epoch": 0.64, "grad_norm": 1.1622496843338013, "learning_rate": 6.14135994095757e-06, "loss": 0.5295, "step": 24865 }, { "epoch": 0.64, "grad_norm": 1.1439980268478394, "learning_rate": 6.1405941012461315e-06, "loss": 0.4883, "step": 24866 }, { "epoch": 0.64, "grad_norm": 0.9799870252609253, "learning_rate": 6.139828288131291e-06, "loss": 0.54, "step": 24867 }, { "epoch": 0.64, "grad_norm": 2.03515887260437, "learning_rate": 6.139062501618334e-06, "loss": 0.596, "step": 24868 }, { "epoch": 0.64, "grad_norm": 1.4695523977279663, "learning_rate": 6.1382967417125284e-06, "loss": 0.4815, "step": 24869 }, { "epoch": 0.64, "grad_norm": 1.326003074645996, "learning_rate": 6.1375310084191576e-06, "loss": 0.433, "step": 24870 }, { "epoch": 0.64, "grad_norm": 3.7983217239379883, "learning_rate": 6.136765301743495e-06, "loss": 0.5265, "step": 24871 }, { "epoch": 0.64, "grad_norm": 1.0686613321304321, "learning_rate": 6.135999621690818e-06, "loss": 0.4541, "step": 24872 }, { "epoch": 0.64, "grad_norm": 9.87901496887207, "learning_rate": 6.135233968266408e-06, "loss": 0.66, "step": 24873 }, { "epoch": 0.64, "grad_norm": 1.2757824659347534, "learning_rate": 6.1344683414755325e-06, "loss": 0.5505, "step": 24874 }, { "epoch": 0.64, "grad_norm": 1.5672931671142578, "learning_rate": 6.1337027413234776e-06, "loss": 0.5306, "step": 24875 }, { "epoch": 0.64, "grad_norm": 1.9684429168701172, "learning_rate": 6.132937167815511e-06, "loss": 0.5758, "step": 24876 }, { "epoch": 0.64, "grad_norm": 1.238662600517273, "learning_rate": 6.13217162095691e-06, "loss": 0.4856, "step": 24877 }, { "epoch": 0.64, "grad_norm": 1.143601894378662, "learning_rate": 6.131406100752957e-06, "loss": 0.5223, "step": 24878 }, { "epoch": 0.64, "grad_norm": 2.0025734901428223, "learning_rate": 6.13064060720892e-06, "loss": 0.4184, "step": 24879 }, { "epoch": 0.64, "grad_norm": 1.4277665615081787, "learning_rate": 6.129875140330078e-06, "loss": 0.4798, "step": 24880 }, { "epoch": 0.64, "grad_norm": 1.7284144163131714, "learning_rate": 6.129109700121705e-06, "loss": 0.6277, "step": 24881 }, { "epoch": 0.64, "grad_norm": 1.5059477090835571, "learning_rate": 6.128344286589076e-06, "loss": 0.5502, "step": 24882 }, { "epoch": 0.64, "grad_norm": 1.2978664636611938, "learning_rate": 6.127578899737465e-06, "loss": 0.395, "step": 24883 }, { "epoch": 0.64, "grad_norm": 1.058192253112793, "learning_rate": 6.126813539572146e-06, "loss": 0.4944, "step": 24884 }, { "epoch": 0.64, "grad_norm": 1.2018635272979736, "learning_rate": 6.126048206098398e-06, "loss": 0.5285, "step": 24885 }, { "epoch": 0.64, "grad_norm": 1.5651254653930664, "learning_rate": 6.12528289932149e-06, "loss": 0.5282, "step": 24886 }, { "epoch": 0.64, "grad_norm": 1.2559064626693726, "learning_rate": 6.124517619246699e-06, "loss": 0.481, "step": 24887 }, { "epoch": 0.64, "grad_norm": 1.8574081659317017, "learning_rate": 6.123752365879296e-06, "loss": 0.7089, "step": 24888 }, { "epoch": 0.64, "grad_norm": 3.5503809452056885, "learning_rate": 6.122987139224557e-06, "loss": 0.6462, "step": 24889 }, { "epoch": 0.64, "grad_norm": 2.2693419456481934, "learning_rate": 6.122221939287755e-06, "loss": 0.5385, "step": 24890 }, { "epoch": 0.64, "grad_norm": 1.2759054899215698, "learning_rate": 6.121456766074163e-06, "loss": 0.3364, "step": 24891 }, { "epoch": 0.64, "grad_norm": 1.265055537223816, "learning_rate": 6.120691619589056e-06, "loss": 0.6617, "step": 24892 }, { "epoch": 0.64, "grad_norm": 1.7202061414718628, "learning_rate": 6.119926499837703e-06, "loss": 0.4499, "step": 24893 }, { "epoch": 0.64, "grad_norm": 1.9524813890457153, "learning_rate": 6.119161406825382e-06, "loss": 0.5986, "step": 24894 }, { "epoch": 0.64, "grad_norm": 1.4614708423614502, "learning_rate": 6.1183963405573574e-06, "loss": 0.6189, "step": 24895 }, { "epoch": 0.64, "grad_norm": 1.3837655782699585, "learning_rate": 6.117631301038909e-06, "loss": 0.3886, "step": 24896 }, { "epoch": 0.64, "grad_norm": 2.052919864654541, "learning_rate": 6.116866288275308e-06, "loss": 0.4381, "step": 24897 }, { "epoch": 0.64, "grad_norm": 1.2475885152816772, "learning_rate": 6.116101302271823e-06, "loss": 0.6547, "step": 24898 }, { "epoch": 0.64, "grad_norm": 1.161419153213501, "learning_rate": 6.115336343033729e-06, "loss": 0.5362, "step": 24899 }, { "epoch": 0.64, "grad_norm": 4.657290935516357, "learning_rate": 6.114571410566294e-06, "loss": 0.6573, "step": 24900 }, { "epoch": 0.64, "grad_norm": 2.152045249938965, "learning_rate": 6.113806504874794e-06, "loss": 0.5362, "step": 24901 }, { "epoch": 0.64, "grad_norm": 4.065793514251709, "learning_rate": 6.113041625964499e-06, "loss": 0.488, "step": 24902 }, { "epoch": 0.64, "grad_norm": 2.676886796951294, "learning_rate": 6.112276773840676e-06, "loss": 0.5485, "step": 24903 }, { "epoch": 0.64, "grad_norm": 1.1004573106765747, "learning_rate": 6.111511948508601e-06, "loss": 0.3969, "step": 24904 }, { "epoch": 0.64, "grad_norm": 1.370595932006836, "learning_rate": 6.110747149973541e-06, "loss": 0.577, "step": 24905 }, { "epoch": 0.64, "grad_norm": 1.0276210308074951, "learning_rate": 6.109982378240772e-06, "loss": 0.4982, "step": 24906 }, { "epoch": 0.64, "grad_norm": 1.7487194538116455, "learning_rate": 6.109217633315556e-06, "loss": 0.4852, "step": 24907 }, { "epoch": 0.64, "grad_norm": 0.9417387247085571, "learning_rate": 6.108452915203169e-06, "loss": 0.6057, "step": 24908 }, { "epoch": 0.64, "grad_norm": 3.564610481262207, "learning_rate": 6.107688223908882e-06, "loss": 0.6284, "step": 24909 }, { "epoch": 0.64, "grad_norm": 1.759800672531128, "learning_rate": 6.10692355943796e-06, "loss": 0.6309, "step": 24910 }, { "epoch": 0.64, "grad_norm": 5.235246181488037, "learning_rate": 6.106158921795676e-06, "loss": 0.6971, "step": 24911 }, { "epoch": 0.64, "grad_norm": 2.098499298095703, "learning_rate": 6.105394310987297e-06, "loss": 0.5766, "step": 24912 }, { "epoch": 0.64, "grad_norm": 6.898186206817627, "learning_rate": 6.104629727018094e-06, "loss": 0.5869, "step": 24913 }, { "epoch": 0.64, "grad_norm": 1.9051216840744019, "learning_rate": 6.103865169893337e-06, "loss": 0.5313, "step": 24914 }, { "epoch": 0.64, "grad_norm": 1.2751851081848145, "learning_rate": 6.103100639618291e-06, "loss": 0.4627, "step": 24915 }, { "epoch": 0.64, "grad_norm": 1.315744400024414, "learning_rate": 6.10233613619823e-06, "loss": 0.5282, "step": 24916 }, { "epoch": 0.64, "grad_norm": 1.1326546669006348, "learning_rate": 6.101571659638418e-06, "loss": 0.4769, "step": 24917 }, { "epoch": 0.64, "grad_norm": 3.603314161300659, "learning_rate": 6.100807209944124e-06, "loss": 0.7108, "step": 24918 }, { "epoch": 0.64, "grad_norm": 1.8694161176681519, "learning_rate": 6.1000427871206195e-06, "loss": 0.6088, "step": 24919 }, { "epoch": 0.64, "grad_norm": 5.1446027755737305, "learning_rate": 6.099278391173167e-06, "loss": 0.5965, "step": 24920 }, { "epoch": 0.64, "grad_norm": 1.1274895668029785, "learning_rate": 6.098514022107041e-06, "loss": 0.3681, "step": 24921 }, { "epoch": 0.64, "grad_norm": 1.6163303852081299, "learning_rate": 6.097749679927501e-06, "loss": 0.6303, "step": 24922 }, { "epoch": 0.64, "grad_norm": 1.3066174983978271, "learning_rate": 6.096985364639822e-06, "loss": 0.5967, "step": 24923 }, { "epoch": 0.64, "grad_norm": 1.2389713525772095, "learning_rate": 6.096221076249265e-06, "loss": 0.5244, "step": 24924 }, { "epoch": 0.64, "grad_norm": 3.3946945667266846, "learning_rate": 6.0954568147611e-06, "loss": 0.6128, "step": 24925 }, { "epoch": 0.64, "grad_norm": 1.4358015060424805, "learning_rate": 6.094692580180596e-06, "loss": 0.461, "step": 24926 }, { "epoch": 0.64, "grad_norm": 3.203580379486084, "learning_rate": 6.093928372513014e-06, "loss": 0.5393, "step": 24927 }, { "epoch": 0.64, "grad_norm": 2.280336856842041, "learning_rate": 6.093164191763628e-06, "loss": 0.6598, "step": 24928 }, { "epoch": 0.64, "grad_norm": 2.223567008972168, "learning_rate": 6.0924000379376955e-06, "loss": 0.5667, "step": 24929 }, { "epoch": 0.64, "grad_norm": 3.2179105281829834, "learning_rate": 6.091635911040488e-06, "loss": 0.419, "step": 24930 }, { "epoch": 0.64, "grad_norm": 1.1943434476852417, "learning_rate": 6.090871811077271e-06, "loss": 0.5237, "step": 24931 }, { "epoch": 0.64, "grad_norm": 2.0331928730010986, "learning_rate": 6.090107738053309e-06, "loss": 0.587, "step": 24932 }, { "epoch": 0.64, "grad_norm": 3.3263654708862305, "learning_rate": 6.08934369197387e-06, "loss": 0.5013, "step": 24933 }, { "epoch": 0.64, "grad_norm": 9.358922958374023, "learning_rate": 6.0885796728442144e-06, "loss": 0.6071, "step": 24934 }, { "epoch": 0.64, "grad_norm": 1.5259627103805542, "learning_rate": 6.087815680669613e-06, "loss": 0.5079, "step": 24935 }, { "epoch": 0.64, "grad_norm": 3.700173854827881, "learning_rate": 6.087051715455325e-06, "loss": 0.7024, "step": 24936 }, { "epoch": 0.64, "grad_norm": 1.3624904155731201, "learning_rate": 6.086287777206621e-06, "loss": 0.4979, "step": 24937 }, { "epoch": 0.64, "grad_norm": 1.4457896947860718, "learning_rate": 6.0855238659287615e-06, "loss": 0.3872, "step": 24938 }, { "epoch": 0.64, "grad_norm": 1.4780516624450684, "learning_rate": 6.084759981627013e-06, "loss": 0.669, "step": 24939 }, { "epoch": 0.64, "grad_norm": 1.9217699766159058, "learning_rate": 6.083996124306639e-06, "loss": 0.4388, "step": 24940 }, { "epoch": 0.64, "grad_norm": 10.33878231048584, "learning_rate": 6.083232293972903e-06, "loss": 0.602, "step": 24941 }, { "epoch": 0.64, "grad_norm": 1.0937730073928833, "learning_rate": 6.082468490631068e-06, "loss": 0.6359, "step": 24942 }, { "epoch": 0.64, "grad_norm": 1.7701349258422852, "learning_rate": 6.0817047142864e-06, "loss": 0.3994, "step": 24943 }, { "epoch": 0.64, "grad_norm": 1.996791124343872, "learning_rate": 6.080940964944162e-06, "loss": 0.5229, "step": 24944 }, { "epoch": 0.64, "grad_norm": 1.4161040782928467, "learning_rate": 6.080177242609617e-06, "loss": 0.5248, "step": 24945 }, { "epoch": 0.64, "grad_norm": 1.7130783796310425, "learning_rate": 6.079413547288026e-06, "loss": 0.5645, "step": 24946 }, { "epoch": 0.64, "grad_norm": 5.221043586730957, "learning_rate": 6.078649878984655e-06, "loss": 0.5058, "step": 24947 }, { "epoch": 0.64, "grad_norm": 1.503463625907898, "learning_rate": 6.077886237704764e-06, "loss": 0.3699, "step": 24948 }, { "epoch": 0.64, "grad_norm": 3.158804416656494, "learning_rate": 6.077122623453618e-06, "loss": 0.4716, "step": 24949 }, { "epoch": 0.64, "grad_norm": 2.074742078781128, "learning_rate": 6.076359036236479e-06, "loss": 0.4719, "step": 24950 }, { "epoch": 0.64, "grad_norm": 1.2586778402328491, "learning_rate": 6.075595476058607e-06, "loss": 0.6187, "step": 24951 }, { "epoch": 0.64, "grad_norm": 2.0298244953155518, "learning_rate": 6.074831942925268e-06, "loss": 0.5924, "step": 24952 }, { "epoch": 0.64, "grad_norm": 2.4940993785858154, "learning_rate": 6.074068436841718e-06, "loss": 0.7109, "step": 24953 }, { "epoch": 0.64, "grad_norm": 2.5734670162200928, "learning_rate": 6.073304957813222e-06, "loss": 0.6781, "step": 24954 }, { "epoch": 0.64, "grad_norm": 1.3614715337753296, "learning_rate": 6.072541505845043e-06, "loss": 0.4946, "step": 24955 }, { "epoch": 0.64, "grad_norm": 1.2116467952728271, "learning_rate": 6.071778080942439e-06, "loss": 0.6015, "step": 24956 }, { "epoch": 0.64, "grad_norm": 2.048374652862549, "learning_rate": 6.071014683110674e-06, "loss": 0.5171, "step": 24957 }, { "epoch": 0.64, "grad_norm": 1.9452131986618042, "learning_rate": 6.0702513123550065e-06, "loss": 0.5656, "step": 24958 }, { "epoch": 0.64, "grad_norm": 2.9388132095336914, "learning_rate": 6.0694879686807e-06, "loss": 0.7327, "step": 24959 }, { "epoch": 0.64, "grad_norm": 2.535224437713623, "learning_rate": 6.06872465209301e-06, "loss": 0.5494, "step": 24960 }, { "epoch": 0.64, "grad_norm": 1.9388872385025024, "learning_rate": 6.067961362597201e-06, "loss": 0.4935, "step": 24961 }, { "epoch": 0.64, "grad_norm": 3.365818500518799, "learning_rate": 6.067198100198532e-06, "loss": 0.7027, "step": 24962 }, { "epoch": 0.64, "grad_norm": 7.383760452270508, "learning_rate": 6.066434864902263e-06, "loss": 0.6328, "step": 24963 }, { "epoch": 0.64, "grad_norm": 5.9207634925842285, "learning_rate": 6.065671656713654e-06, "loss": 0.5454, "step": 24964 }, { "epoch": 0.64, "grad_norm": 6.319280624389648, "learning_rate": 6.064908475637963e-06, "loss": 0.6443, "step": 24965 }, { "epoch": 0.64, "grad_norm": 3.1700313091278076, "learning_rate": 6.06414532168045e-06, "loss": 0.4472, "step": 24966 }, { "epoch": 0.64, "grad_norm": 2.46628737449646, "learning_rate": 6.063382194846378e-06, "loss": 0.5939, "step": 24967 }, { "epoch": 0.64, "grad_norm": 1.039821982383728, "learning_rate": 6.062619095141e-06, "loss": 0.4517, "step": 24968 }, { "epoch": 0.64, "grad_norm": 1.1500318050384521, "learning_rate": 6.061856022569579e-06, "loss": 0.4387, "step": 24969 }, { "epoch": 0.64, "grad_norm": 1.716089129447937, "learning_rate": 6.061092977137371e-06, "loss": 0.4997, "step": 24970 }, { "epoch": 0.64, "grad_norm": 1.111589789390564, "learning_rate": 6.060329958849633e-06, "loss": 0.5198, "step": 24971 }, { "epoch": 0.64, "grad_norm": 4.095426559448242, "learning_rate": 6.059566967711631e-06, "loss": 0.5434, "step": 24972 }, { "epoch": 0.64, "grad_norm": 4.7551374435424805, "learning_rate": 6.058804003728614e-06, "loss": 0.6184, "step": 24973 }, { "epoch": 0.64, "grad_norm": 1.17972993850708, "learning_rate": 6.058041066905847e-06, "loss": 0.4617, "step": 24974 }, { "epoch": 0.64, "grad_norm": 1.3708069324493408, "learning_rate": 6.05727815724858e-06, "loss": 0.5882, "step": 24975 }, { "epoch": 0.64, "grad_norm": 2.31524920463562, "learning_rate": 6.05651527476208e-06, "loss": 0.4718, "step": 24976 }, { "epoch": 0.64, "grad_norm": 7.474590301513672, "learning_rate": 6.055752419451595e-06, "loss": 0.5697, "step": 24977 }, { "epoch": 0.64, "grad_norm": 1.3413796424865723, "learning_rate": 6.054989591322386e-06, "loss": 0.5883, "step": 24978 }, { "epoch": 0.64, "grad_norm": 3.676938056945801, "learning_rate": 6.054226790379715e-06, "loss": 0.6113, "step": 24979 }, { "epoch": 0.64, "grad_norm": 3.735339403152466, "learning_rate": 6.053464016628829e-06, "loss": 0.8941, "step": 24980 }, { "epoch": 0.64, "grad_norm": 1.8344801664352417, "learning_rate": 6.052701270074993e-06, "loss": 0.6066, "step": 24981 }, { "epoch": 0.64, "grad_norm": 9.375237464904785, "learning_rate": 6.0519385507234575e-06, "loss": 0.494, "step": 24982 }, { "epoch": 0.64, "grad_norm": 1.2104140520095825, "learning_rate": 6.051175858579481e-06, "loss": 0.3214, "step": 24983 }, { "epoch": 0.64, "grad_norm": 2.6086111068725586, "learning_rate": 6.050413193648324e-06, "loss": 0.5469, "step": 24984 }, { "epoch": 0.64, "grad_norm": 1.3485723733901978, "learning_rate": 6.049650555935232e-06, "loss": 0.5085, "step": 24985 }, { "epoch": 0.64, "grad_norm": 1.5752676725387573, "learning_rate": 6.048887945445471e-06, "loss": 0.5067, "step": 24986 }, { "epoch": 0.64, "grad_norm": 1.3933696746826172, "learning_rate": 6.04812536218429e-06, "loss": 0.5218, "step": 24987 }, { "epoch": 0.64, "grad_norm": 2.4559128284454346, "learning_rate": 6.047362806156947e-06, "loss": 0.6265, "step": 24988 }, { "epoch": 0.64, "grad_norm": 3.8884854316711426, "learning_rate": 6.046600277368694e-06, "loss": 0.6387, "step": 24989 }, { "epoch": 0.64, "grad_norm": 1.5687532424926758, "learning_rate": 6.045837775824789e-06, "loss": 0.5225, "step": 24990 }, { "epoch": 0.64, "grad_norm": 1.7248300313949585, "learning_rate": 6.0450753015304874e-06, "loss": 0.6122, "step": 24991 }, { "epoch": 0.64, "grad_norm": 2.7019894123077393, "learning_rate": 6.04431285449104e-06, "loss": 0.5377, "step": 24992 }, { "epoch": 0.64, "grad_norm": 10.76031494140625, "learning_rate": 6.043550434711705e-06, "loss": 0.6982, "step": 24993 }, { "epoch": 0.64, "grad_norm": 1.1834735870361328, "learning_rate": 6.0427880421977335e-06, "loss": 0.4457, "step": 24994 }, { "epoch": 0.64, "grad_norm": 1.1287206411361694, "learning_rate": 6.04202567695438e-06, "loss": 0.5114, "step": 24995 }, { "epoch": 0.64, "grad_norm": 1.3366929292678833, "learning_rate": 6.0412633389869025e-06, "loss": 0.593, "step": 24996 }, { "epoch": 0.64, "grad_norm": 2.3452165126800537, "learning_rate": 6.0405010283005476e-06, "loss": 0.5533, "step": 24997 }, { "epoch": 0.64, "grad_norm": 3.4093856811523438, "learning_rate": 6.039738744900574e-06, "loss": 0.5062, "step": 24998 }, { "epoch": 0.64, "grad_norm": 1.065795660018921, "learning_rate": 6.0389764887922294e-06, "loss": 0.5743, "step": 24999 }, { "epoch": 0.64, "grad_norm": 3.83808970451355, "learning_rate": 6.038214259980775e-06, "loss": 0.5117, "step": 25000 }, { "epoch": 0.64, "grad_norm": 1.7041722536087036, "learning_rate": 6.037452058471456e-06, "loss": 0.6362, "step": 25001 }, { "epoch": 0.64, "grad_norm": 1.3688565492630005, "learning_rate": 6.036689884269526e-06, "loss": 0.5601, "step": 25002 }, { "epoch": 0.64, "grad_norm": 4.287816047668457, "learning_rate": 6.0359277373802435e-06, "loss": 0.4952, "step": 25003 }, { "epoch": 0.64, "grad_norm": 1.374922275543213, "learning_rate": 6.035165617808855e-06, "loss": 0.5002, "step": 25004 }, { "epoch": 0.64, "grad_norm": 1.4782710075378418, "learning_rate": 6.034403525560615e-06, "loss": 0.5481, "step": 25005 }, { "epoch": 0.64, "grad_norm": 7.329281806945801, "learning_rate": 6.033641460640772e-06, "loss": 0.7504, "step": 25006 }, { "epoch": 0.64, "grad_norm": 1.4295247793197632, "learning_rate": 6.032879423054581e-06, "loss": 0.4176, "step": 25007 }, { "epoch": 0.64, "grad_norm": 1.1563154458999634, "learning_rate": 6.032117412807295e-06, "loss": 0.5618, "step": 25008 }, { "epoch": 0.64, "grad_norm": 1.9371215105056763, "learning_rate": 6.03135542990416e-06, "loss": 0.569, "step": 25009 }, { "epoch": 0.64, "grad_norm": 0.9741025567054749, "learning_rate": 6.030593474350432e-06, "loss": 0.5184, "step": 25010 }, { "epoch": 0.64, "grad_norm": 1.260383129119873, "learning_rate": 6.029831546151358e-06, "loss": 0.6037, "step": 25011 }, { "epoch": 0.64, "grad_norm": 1.1920320987701416, "learning_rate": 6.029069645312193e-06, "loss": 0.4072, "step": 25012 }, { "epoch": 0.64, "grad_norm": 1.468050479888916, "learning_rate": 6.028307771838183e-06, "loss": 0.5471, "step": 25013 }, { "epoch": 0.64, "grad_norm": 1.5286247730255127, "learning_rate": 6.027545925734582e-06, "loss": 0.6032, "step": 25014 }, { "epoch": 0.64, "grad_norm": 4.228246212005615, "learning_rate": 6.026784107006638e-06, "loss": 0.6178, "step": 25015 }, { "epoch": 0.64, "grad_norm": 2.6721091270446777, "learning_rate": 6.026022315659601e-06, "loss": 0.6552, "step": 25016 }, { "epoch": 0.64, "grad_norm": 2.001204252243042, "learning_rate": 6.025260551698724e-06, "loss": 0.4718, "step": 25017 }, { "epoch": 0.64, "grad_norm": 1.315879464149475, "learning_rate": 6.0244988151292514e-06, "loss": 0.6292, "step": 25018 }, { "epoch": 0.64, "grad_norm": 1.1575522422790527, "learning_rate": 6.023737105956436e-06, "loss": 0.5541, "step": 25019 }, { "epoch": 0.64, "grad_norm": 1.8278933763504028, "learning_rate": 6.0229754241855286e-06, "loss": 0.5056, "step": 25020 }, { "epoch": 0.64, "grad_norm": 1.732292652130127, "learning_rate": 6.022213769821773e-06, "loss": 0.4504, "step": 25021 }, { "epoch": 0.64, "grad_norm": 2.9342522621154785, "learning_rate": 6.021452142870423e-06, "loss": 0.6562, "step": 25022 }, { "epoch": 0.64, "grad_norm": 4.298573017120361, "learning_rate": 6.020690543336724e-06, "loss": 0.5542, "step": 25023 }, { "epoch": 0.64, "grad_norm": 4.703331470489502, "learning_rate": 6.019928971225926e-06, "loss": 0.7807, "step": 25024 }, { "epoch": 0.64, "grad_norm": 1.914204716682434, "learning_rate": 6.019167426543278e-06, "loss": 0.51, "step": 25025 }, { "epoch": 0.64, "grad_norm": 2.2637970447540283, "learning_rate": 6.018405909294025e-06, "loss": 0.4273, "step": 25026 }, { "epoch": 0.64, "grad_norm": 1.8818556070327759, "learning_rate": 6.0176444194834196e-06, "loss": 0.5157, "step": 25027 }, { "epoch": 0.64, "grad_norm": 1.92563796043396, "learning_rate": 6.016882957116706e-06, "loss": 0.5963, "step": 25028 }, { "epoch": 0.64, "grad_norm": 1.1547837257385254, "learning_rate": 6.016121522199132e-06, "loss": 0.5734, "step": 25029 }, { "epoch": 0.64, "grad_norm": 2.0372211933135986, "learning_rate": 6.015360114735945e-06, "loss": 0.6283, "step": 25030 }, { "epoch": 0.64, "grad_norm": 1.7217682600021362, "learning_rate": 6.014598734732393e-06, "loss": 0.5511, "step": 25031 }, { "epoch": 0.64, "grad_norm": 3.7572617530822754, "learning_rate": 6.013837382193724e-06, "loss": 0.5287, "step": 25032 }, { "epoch": 0.64, "grad_norm": 1.3800417184829712, "learning_rate": 6.013076057125182e-06, "loss": 0.3035, "step": 25033 }, { "epoch": 0.64, "grad_norm": 1.0462042093276978, "learning_rate": 6.012314759532016e-06, "loss": 0.4844, "step": 25034 }, { "epoch": 0.64, "grad_norm": 1.402842402458191, "learning_rate": 6.0115534894194705e-06, "loss": 0.4462, "step": 25035 }, { "epoch": 0.64, "grad_norm": 2.2736656665802, "learning_rate": 6.0107922467927925e-06, "loss": 0.7397, "step": 25036 }, { "epoch": 0.64, "grad_norm": 1.2539021968841553, "learning_rate": 6.0100310316572295e-06, "loss": 0.5888, "step": 25037 }, { "epoch": 0.64, "grad_norm": 1.898908019065857, "learning_rate": 6.0092698440180245e-06, "loss": 0.63, "step": 25038 }, { "epoch": 0.64, "grad_norm": 1.3145866394042969, "learning_rate": 6.008508683880425e-06, "loss": 0.4551, "step": 25039 }, { "epoch": 0.64, "grad_norm": 3.429657459259033, "learning_rate": 6.007747551249676e-06, "loss": 0.8135, "step": 25040 }, { "epoch": 0.64, "grad_norm": 7.9844255447387695, "learning_rate": 6.006986446131024e-06, "loss": 0.7104, "step": 25041 }, { "epoch": 0.64, "grad_norm": 2.171583652496338, "learning_rate": 6.00622536852971e-06, "loss": 0.6328, "step": 25042 }, { "epoch": 0.64, "grad_norm": 2.6890759468078613, "learning_rate": 6.005464318450982e-06, "loss": 0.5042, "step": 25043 }, { "epoch": 0.64, "grad_norm": 1.2626948356628418, "learning_rate": 6.004703295900087e-06, "loss": 0.4935, "step": 25044 }, { "epoch": 0.64, "grad_norm": 1.1947275400161743, "learning_rate": 6.0039423008822635e-06, "loss": 0.5091, "step": 25045 }, { "epoch": 0.64, "grad_norm": 2.6463749408721924, "learning_rate": 6.003181333402762e-06, "loss": 0.432, "step": 25046 }, { "epoch": 0.64, "grad_norm": 1.2422304153442383, "learning_rate": 6.002420393466821e-06, "loss": 0.5654, "step": 25047 }, { "epoch": 0.64, "grad_norm": 2.199878692626953, "learning_rate": 6.001659481079688e-06, "loss": 0.6466, "step": 25048 }, { "epoch": 0.64, "grad_norm": 1.6257702112197876, "learning_rate": 6.000898596246607e-06, "loss": 0.5591, "step": 25049 }, { "epoch": 0.64, "grad_norm": 1.8913484811782837, "learning_rate": 6.00013773897282e-06, "loss": 0.5683, "step": 25050 }, { "epoch": 0.64, "grad_norm": 3.656752109527588, "learning_rate": 5.999376909263571e-06, "loss": 0.5005, "step": 25051 }, { "epoch": 0.64, "grad_norm": 1.4351025819778442, "learning_rate": 5.998616107124102e-06, "loss": 0.5269, "step": 25052 }, { "epoch": 0.64, "grad_norm": 2.366659164428711, "learning_rate": 5.997855332559659e-06, "loss": 0.538, "step": 25053 }, { "epoch": 0.64, "grad_norm": 0.9395701885223389, "learning_rate": 5.997094585575479e-06, "loss": 0.3522, "step": 25054 }, { "epoch": 0.64, "grad_norm": 1.5623273849487305, "learning_rate": 5.996333866176809e-06, "loss": 0.6259, "step": 25055 }, { "epoch": 0.64, "grad_norm": 1.9486651420593262, "learning_rate": 5.995573174368894e-06, "loss": 0.478, "step": 25056 }, { "epoch": 0.64, "grad_norm": 4.840363502502441, "learning_rate": 5.994812510156968e-06, "loss": 0.4857, "step": 25057 }, { "epoch": 0.64, "grad_norm": 4.337116241455078, "learning_rate": 5.994051873546283e-06, "loss": 0.5782, "step": 25058 }, { "epoch": 0.64, "grad_norm": 1.4078178405761719, "learning_rate": 5.993291264542072e-06, "loss": 0.5787, "step": 25059 }, { "epoch": 0.64, "grad_norm": 1.1724987030029297, "learning_rate": 5.992530683149581e-06, "loss": 0.5725, "step": 25060 }, { "epoch": 0.64, "grad_norm": 1.237884759902954, "learning_rate": 5.991770129374053e-06, "loss": 0.5422, "step": 25061 }, { "epoch": 0.64, "grad_norm": 3.690402030944824, "learning_rate": 5.991009603220724e-06, "loss": 0.6649, "step": 25062 }, { "epoch": 0.64, "grad_norm": 1.2070786952972412, "learning_rate": 5.990249104694841e-06, "loss": 0.5197, "step": 25063 }, { "epoch": 0.64, "grad_norm": 1.6531654596328735, "learning_rate": 5.9894886338016385e-06, "loss": 0.5995, "step": 25064 }, { "epoch": 0.64, "grad_norm": 2.491154909133911, "learning_rate": 5.9887281905463624e-06, "loss": 0.4721, "step": 25065 }, { "epoch": 0.64, "grad_norm": 1.7317149639129639, "learning_rate": 5.987967774934254e-06, "loss": 0.4661, "step": 25066 }, { "epoch": 0.64, "grad_norm": 1.3383007049560547, "learning_rate": 5.987207386970552e-06, "loss": 0.5287, "step": 25067 }, { "epoch": 0.64, "grad_norm": 2.3822195529937744, "learning_rate": 5.986447026660491e-06, "loss": 0.6046, "step": 25068 }, { "epoch": 0.64, "grad_norm": 1.34355628490448, "learning_rate": 5.9856866940093186e-06, "loss": 0.6346, "step": 25069 }, { "epoch": 0.64, "grad_norm": 1.6837598085403442, "learning_rate": 5.9849263890222685e-06, "loss": 0.6764, "step": 25070 }, { "epoch": 0.64, "grad_norm": 2.1262781620025635, "learning_rate": 5.9841661117045855e-06, "loss": 0.5806, "step": 25071 }, { "epoch": 0.64, "grad_norm": 1.1545077562332153, "learning_rate": 5.983405862061505e-06, "loss": 0.4544, "step": 25072 }, { "epoch": 0.64, "grad_norm": 1.4954475164413452, "learning_rate": 5.982645640098268e-06, "loss": 0.5251, "step": 25073 }, { "epoch": 0.64, "grad_norm": 1.569196105003357, "learning_rate": 5.981885445820115e-06, "loss": 0.5478, "step": 25074 }, { "epoch": 0.64, "grad_norm": 6.872664928436279, "learning_rate": 5.98112527923228e-06, "loss": 0.6866, "step": 25075 }, { "epoch": 0.64, "grad_norm": 2.174039125442505, "learning_rate": 5.98036514034001e-06, "loss": 0.5008, "step": 25076 }, { "epoch": 0.64, "grad_norm": 3.217623233795166, "learning_rate": 5.979605029148533e-06, "loss": 0.6009, "step": 25077 }, { "epoch": 0.64, "grad_norm": 1.5010751485824585, "learning_rate": 5.978844945663094e-06, "loss": 0.4809, "step": 25078 }, { "epoch": 0.64, "grad_norm": 2.1129302978515625, "learning_rate": 5.978084889888929e-06, "loss": 0.611, "step": 25079 }, { "epoch": 0.64, "grad_norm": 1.5102496147155762, "learning_rate": 5.977324861831276e-06, "loss": 0.5736, "step": 25080 }, { "epoch": 0.64, "grad_norm": 4.335636138916016, "learning_rate": 5.976564861495375e-06, "loss": 0.4906, "step": 25081 }, { "epoch": 0.64, "grad_norm": 1.4201034307479858, "learning_rate": 5.9758048888864585e-06, "loss": 0.5383, "step": 25082 }, { "epoch": 0.64, "grad_norm": 1.8009107112884521, "learning_rate": 5.975044944009768e-06, "loss": 0.634, "step": 25083 }, { "epoch": 0.64, "grad_norm": 6.467182159423828, "learning_rate": 5.9742850268705375e-06, "loss": 0.6967, "step": 25084 }, { "epoch": 0.64, "grad_norm": 2.4509589672088623, "learning_rate": 5.973525137474005e-06, "loss": 0.4986, "step": 25085 }, { "epoch": 0.64, "grad_norm": 2.3973639011383057, "learning_rate": 5.972765275825411e-06, "loss": 0.5036, "step": 25086 }, { "epoch": 0.64, "grad_norm": 1.5930545330047607, "learning_rate": 5.972005441929984e-06, "loss": 0.6119, "step": 25087 }, { "epoch": 0.64, "grad_norm": 4.416432857513428, "learning_rate": 5.9712456357929695e-06, "loss": 0.7443, "step": 25088 }, { "epoch": 0.64, "grad_norm": 1.5866447687149048, "learning_rate": 5.970485857419595e-06, "loss": 0.682, "step": 25089 }, { "epoch": 0.64, "grad_norm": 3.4719557762145996, "learning_rate": 5.969726106815102e-06, "loss": 0.6004, "step": 25090 }, { "epoch": 0.64, "grad_norm": 1.6175580024719238, "learning_rate": 5.9689663839847254e-06, "loss": 0.6041, "step": 25091 }, { "epoch": 0.64, "grad_norm": 4.454458713531494, "learning_rate": 5.968206688933698e-06, "loss": 0.2985, "step": 25092 }, { "epoch": 0.64, "grad_norm": 10.672722816467285, "learning_rate": 5.967447021667261e-06, "loss": 0.5219, "step": 25093 }, { "epoch": 0.64, "grad_norm": 1.5104470252990723, "learning_rate": 5.966687382190641e-06, "loss": 0.4919, "step": 25094 }, { "epoch": 0.64, "grad_norm": 1.5935369729995728, "learning_rate": 5.965927770509081e-06, "loss": 0.6011, "step": 25095 }, { "epoch": 0.64, "grad_norm": 1.1857317686080933, "learning_rate": 5.96516818662781e-06, "loss": 0.3775, "step": 25096 }, { "epoch": 0.64, "grad_norm": 1.6077114343643188, "learning_rate": 5.964408630552066e-06, "loss": 0.5366, "step": 25097 }, { "epoch": 0.64, "grad_norm": 1.1176601648330688, "learning_rate": 5.963649102287083e-06, "loss": 0.3358, "step": 25098 }, { "epoch": 0.64, "grad_norm": 1.2756446599960327, "learning_rate": 5.9628896018380925e-06, "loss": 0.4676, "step": 25099 }, { "epoch": 0.64, "grad_norm": 1.044152855873108, "learning_rate": 5.962130129210333e-06, "loss": 0.5104, "step": 25100 }, { "epoch": 0.64, "grad_norm": 1.274510383605957, "learning_rate": 5.961370684409033e-06, "loss": 0.5983, "step": 25101 }, { "epoch": 0.64, "grad_norm": 0.9097446203231812, "learning_rate": 5.960611267439429e-06, "loss": 0.4656, "step": 25102 }, { "epoch": 0.64, "grad_norm": 1.978389859199524, "learning_rate": 5.959851878306757e-06, "loss": 0.7109, "step": 25103 }, { "epoch": 0.64, "grad_norm": 1.962399959564209, "learning_rate": 5.959092517016246e-06, "loss": 0.5899, "step": 25104 }, { "epoch": 0.64, "grad_norm": 1.910592794418335, "learning_rate": 5.958333183573132e-06, "loss": 0.7567, "step": 25105 }, { "epoch": 0.64, "grad_norm": 1.3579721450805664, "learning_rate": 5.957573877982645e-06, "loss": 0.5313, "step": 25106 }, { "epoch": 0.64, "grad_norm": 2.5198187828063965, "learning_rate": 5.956814600250019e-06, "loss": 0.4259, "step": 25107 }, { "epoch": 0.64, "grad_norm": 2.9322280883789062, "learning_rate": 5.956055350380488e-06, "loss": 0.5306, "step": 25108 }, { "epoch": 0.64, "grad_norm": 2.1008803844451904, "learning_rate": 5.955296128379281e-06, "loss": 0.5117, "step": 25109 }, { "epoch": 0.64, "grad_norm": 1.9044357538223267, "learning_rate": 5.954536934251634e-06, "loss": 0.5782, "step": 25110 }, { "epoch": 0.64, "grad_norm": 1.319351315498352, "learning_rate": 5.953777768002775e-06, "loss": 0.3583, "step": 25111 }, { "epoch": 0.64, "grad_norm": 3.5510447025299072, "learning_rate": 5.953018629637939e-06, "loss": 0.5641, "step": 25112 }, { "epoch": 0.64, "grad_norm": 13.130908966064453, "learning_rate": 5.952259519162356e-06, "loss": 0.4614, "step": 25113 }, { "epoch": 0.64, "grad_norm": 1.5286345481872559, "learning_rate": 5.951500436581254e-06, "loss": 0.5024, "step": 25114 }, { "epoch": 0.64, "grad_norm": 1.0916478633880615, "learning_rate": 5.950741381899871e-06, "loss": 0.5559, "step": 25115 }, { "epoch": 0.64, "grad_norm": 2.138678789138794, "learning_rate": 5.949982355123433e-06, "loss": 0.6461, "step": 25116 }, { "epoch": 0.64, "grad_norm": 1.4356521368026733, "learning_rate": 5.949223356257175e-06, "loss": 0.4425, "step": 25117 }, { "epoch": 0.64, "grad_norm": 1.216698169708252, "learning_rate": 5.948464385306321e-06, "loss": 0.4288, "step": 25118 }, { "epoch": 0.64, "grad_norm": 0.9097158312797546, "learning_rate": 5.947705442276106e-06, "loss": 0.4016, "step": 25119 }, { "epoch": 0.64, "grad_norm": 1.848439335823059, "learning_rate": 5.946946527171761e-06, "loss": 0.5516, "step": 25120 }, { "epoch": 0.64, "grad_norm": 1.2843252420425415, "learning_rate": 5.946187639998512e-06, "loss": 0.461, "step": 25121 }, { "epoch": 0.64, "grad_norm": 8.367231369018555, "learning_rate": 5.945428780761593e-06, "loss": 0.4957, "step": 25122 }, { "epoch": 0.64, "grad_norm": 1.9885741472244263, "learning_rate": 5.944669949466228e-06, "loss": 0.4362, "step": 25123 }, { "epoch": 0.64, "grad_norm": 1.1847078800201416, "learning_rate": 5.943911146117653e-06, "loss": 0.3894, "step": 25124 }, { "epoch": 0.64, "grad_norm": 1.6787075996398926, "learning_rate": 5.9431523707210935e-06, "loss": 0.4029, "step": 25125 }, { "epoch": 0.64, "grad_norm": 2.288949489593506, "learning_rate": 5.942393623281778e-06, "loss": 0.4734, "step": 25126 }, { "epoch": 0.64, "grad_norm": 2.3646035194396973, "learning_rate": 5.941634903804938e-06, "loss": 0.577, "step": 25127 }, { "epoch": 0.64, "grad_norm": 2.6482341289520264, "learning_rate": 5.9408762122958e-06, "loss": 0.6555, "step": 25128 }, { "epoch": 0.64, "grad_norm": 1.9959367513656616, "learning_rate": 5.940117548759594e-06, "loss": 0.4961, "step": 25129 }, { "epoch": 0.64, "grad_norm": 2.0248847007751465, "learning_rate": 5.939358913201545e-06, "loss": 0.6997, "step": 25130 }, { "epoch": 0.64, "grad_norm": 1.0863653421401978, "learning_rate": 5.938600305626882e-06, "loss": 0.3638, "step": 25131 }, { "epoch": 0.64, "grad_norm": 3.003756284713745, "learning_rate": 5.937841726040838e-06, "loss": 0.6057, "step": 25132 }, { "epoch": 0.64, "grad_norm": 1.041736125946045, "learning_rate": 5.937083174448635e-06, "loss": 0.5499, "step": 25133 }, { "epoch": 0.64, "grad_norm": 1.5910059213638306, "learning_rate": 5.936324650855503e-06, "loss": 0.587, "step": 25134 }, { "epoch": 0.64, "grad_norm": 6.595940589904785, "learning_rate": 5.935566155266667e-06, "loss": 0.5961, "step": 25135 }, { "epoch": 0.64, "grad_norm": 1.0949169397354126, "learning_rate": 5.934807687687356e-06, "loss": 0.4553, "step": 25136 }, { "epoch": 0.64, "grad_norm": 1.0846904516220093, "learning_rate": 5.934049248122796e-06, "loss": 0.4825, "step": 25137 }, { "epoch": 0.64, "grad_norm": 0.990184485912323, "learning_rate": 5.933290836578214e-06, "loss": 0.5716, "step": 25138 }, { "epoch": 0.64, "grad_norm": 1.7937867641448975, "learning_rate": 5.932532453058839e-06, "loss": 0.7308, "step": 25139 }, { "epoch": 0.64, "grad_norm": 1.1771128177642822, "learning_rate": 5.9317740975698915e-06, "loss": 0.3467, "step": 25140 }, { "epoch": 0.64, "grad_norm": 1.5723321437835693, "learning_rate": 5.931015770116604e-06, "loss": 0.5403, "step": 25141 }, { "epoch": 0.64, "grad_norm": 1.7684680223464966, "learning_rate": 5.930257470704197e-06, "loss": 0.5698, "step": 25142 }, { "epoch": 0.64, "grad_norm": 5.872871398925781, "learning_rate": 5.929499199337899e-06, "loss": 0.6288, "step": 25143 }, { "epoch": 0.64, "grad_norm": 1.4362188577651978, "learning_rate": 5.928740956022938e-06, "loss": 0.5948, "step": 25144 }, { "epoch": 0.64, "grad_norm": 3.2081034183502197, "learning_rate": 5.927982740764533e-06, "loss": 0.6043, "step": 25145 }, { "epoch": 0.64, "grad_norm": 5.663710594177246, "learning_rate": 5.9272245535679155e-06, "loss": 0.4782, "step": 25146 }, { "epoch": 0.64, "grad_norm": 1.5960869789123535, "learning_rate": 5.926466394438305e-06, "loss": 0.6497, "step": 25147 }, { "epoch": 0.64, "grad_norm": 1.2613050937652588, "learning_rate": 5.925708263380932e-06, "loss": 0.5531, "step": 25148 }, { "epoch": 0.64, "grad_norm": 1.3247419595718384, "learning_rate": 5.9249501604010155e-06, "loss": 0.5529, "step": 25149 }, { "epoch": 0.64, "grad_norm": 1.896262764930725, "learning_rate": 5.924192085503781e-06, "loss": 0.5113, "step": 25150 }, { "epoch": 0.64, "grad_norm": 1.4426673650741577, "learning_rate": 5.923434038694457e-06, "loss": 0.4904, "step": 25151 }, { "epoch": 0.64, "grad_norm": 2.851091146469116, "learning_rate": 5.922676019978262e-06, "loss": 0.4864, "step": 25152 }, { "epoch": 0.64, "grad_norm": 1.2745072841644287, "learning_rate": 5.921918029360425e-06, "loss": 0.6043, "step": 25153 }, { "epoch": 0.64, "grad_norm": 2.43379282951355, "learning_rate": 5.921160066846163e-06, "loss": 0.5323, "step": 25154 }, { "epoch": 0.64, "grad_norm": 1.7830110788345337, "learning_rate": 5.920402132440706e-06, "loss": 0.5882, "step": 25155 }, { "epoch": 0.64, "grad_norm": 3.468615770339966, "learning_rate": 5.9196442261492745e-06, "loss": 0.5537, "step": 25156 }, { "epoch": 0.64, "grad_norm": 0.927689254283905, "learning_rate": 5.9188863479770905e-06, "loss": 0.5476, "step": 25157 }, { "epoch": 0.64, "grad_norm": 1.3582996129989624, "learning_rate": 5.9181284979293794e-06, "loss": 0.4978, "step": 25158 }, { "epoch": 0.64, "grad_norm": 2.472621202468872, "learning_rate": 5.917370676011361e-06, "loss": 0.5155, "step": 25159 }, { "epoch": 0.64, "grad_norm": 3.7943077087402344, "learning_rate": 5.916612882228256e-06, "loss": 0.7277, "step": 25160 }, { "epoch": 0.64, "grad_norm": 4.586153984069824, "learning_rate": 5.915855116585295e-06, "loss": 0.5566, "step": 25161 }, { "epoch": 0.64, "grad_norm": 10.745657920837402, "learning_rate": 5.915097379087691e-06, "loss": 0.5191, "step": 25162 }, { "epoch": 0.64, "grad_norm": 1.5759296417236328, "learning_rate": 5.914339669740672e-06, "loss": 0.6001, "step": 25163 }, { "epoch": 0.64, "grad_norm": 12.574440956115723, "learning_rate": 5.913581988549455e-06, "loss": 0.4949, "step": 25164 }, { "epoch": 0.64, "grad_norm": 1.780529260635376, "learning_rate": 5.912824335519265e-06, "loss": 0.5835, "step": 25165 }, { "epoch": 0.65, "grad_norm": 5.957090854644775, "learning_rate": 5.912066710655321e-06, "loss": 0.5506, "step": 25166 }, { "epoch": 0.65, "grad_norm": 2.309399127960205, "learning_rate": 5.911309113962844e-06, "loss": 0.6181, "step": 25167 }, { "epoch": 0.65, "grad_norm": 1.4180667400360107, "learning_rate": 5.910551545447059e-06, "loss": 0.5778, "step": 25168 }, { "epoch": 0.65, "grad_norm": 1.4275548458099365, "learning_rate": 5.9097940051131804e-06, "loss": 0.5643, "step": 25169 }, { "epoch": 0.65, "grad_norm": 1.864046573638916, "learning_rate": 5.909036492966434e-06, "loss": 0.3498, "step": 25170 }, { "epoch": 0.65, "grad_norm": 1.4774823188781738, "learning_rate": 5.908279009012036e-06, "loss": 0.4715, "step": 25171 }, { "epoch": 0.65, "grad_norm": 1.844490885734558, "learning_rate": 5.907521553255209e-06, "loss": 0.5725, "step": 25172 }, { "epoch": 0.65, "grad_norm": 1.582044005393982, "learning_rate": 5.906764125701172e-06, "loss": 0.5097, "step": 25173 }, { "epoch": 0.65, "grad_norm": 1.1460447311401367, "learning_rate": 5.906006726355146e-06, "loss": 0.5544, "step": 25174 }, { "epoch": 0.65, "grad_norm": 6.377322673797607, "learning_rate": 5.90524935522235e-06, "loss": 0.6941, "step": 25175 }, { "epoch": 0.65, "grad_norm": 1.2129000425338745, "learning_rate": 5.904492012308001e-06, "loss": 0.4722, "step": 25176 }, { "epoch": 0.65, "grad_norm": 1.1328938007354736, "learning_rate": 5.9037346976173226e-06, "loss": 0.58, "step": 25177 }, { "epoch": 0.65, "grad_norm": 1.6618636846542358, "learning_rate": 5.902977411155528e-06, "loss": 0.5259, "step": 25178 }, { "epoch": 0.65, "grad_norm": 4.543323516845703, "learning_rate": 5.90222015292784e-06, "loss": 0.5711, "step": 25179 }, { "epoch": 0.65, "grad_norm": 1.3091787099838257, "learning_rate": 5.901462922939478e-06, "loss": 0.5765, "step": 25180 }, { "epoch": 0.65, "grad_norm": 1.3579071760177612, "learning_rate": 5.900705721195656e-06, "loss": 0.5379, "step": 25181 }, { "epoch": 0.65, "grad_norm": 3.400420665740967, "learning_rate": 5.8999485477015975e-06, "loss": 0.3918, "step": 25182 }, { "epoch": 0.65, "grad_norm": 2.2912776470184326, "learning_rate": 5.899191402462515e-06, "loss": 0.4753, "step": 25183 }, { "epoch": 0.65, "grad_norm": 4.906058311462402, "learning_rate": 5.8984342854836294e-06, "loss": 0.4725, "step": 25184 }, { "epoch": 0.65, "grad_norm": 2.0513057708740234, "learning_rate": 5.89767719677016e-06, "loss": 0.5492, "step": 25185 }, { "epoch": 0.65, "grad_norm": 0.8324511647224426, "learning_rate": 5.89692013632732e-06, "loss": 0.4527, "step": 25186 }, { "epoch": 0.65, "grad_norm": 1.2627646923065186, "learning_rate": 5.896163104160331e-06, "loss": 0.5603, "step": 25187 }, { "epoch": 0.65, "grad_norm": 1.5999308824539185, "learning_rate": 5.895406100274406e-06, "loss": 0.5308, "step": 25188 }, { "epoch": 0.65, "grad_norm": 3.2650339603424072, "learning_rate": 5.894649124674763e-06, "loss": 0.5618, "step": 25189 }, { "epoch": 0.65, "grad_norm": 1.280060887336731, "learning_rate": 5.893892177366619e-06, "loss": 0.58, "step": 25190 }, { "epoch": 0.65, "grad_norm": 2.694652557373047, "learning_rate": 5.893135258355189e-06, "loss": 0.6319, "step": 25191 }, { "epoch": 0.65, "grad_norm": 5.841196060180664, "learning_rate": 5.892378367645695e-06, "loss": 0.5488, "step": 25192 }, { "epoch": 0.65, "grad_norm": 1.2033799886703491, "learning_rate": 5.891621505243344e-06, "loss": 0.6172, "step": 25193 }, { "epoch": 0.65, "grad_norm": 6.427003860473633, "learning_rate": 5.8908646711533605e-06, "loss": 0.6693, "step": 25194 }, { "epoch": 0.65, "grad_norm": 1.1988104581832886, "learning_rate": 5.890107865380953e-06, "loss": 0.5573, "step": 25195 }, { "epoch": 0.65, "grad_norm": 2.8796303272247314, "learning_rate": 5.889351087931341e-06, "loss": 0.7193, "step": 25196 }, { "epoch": 0.65, "grad_norm": 1.2658098936080933, "learning_rate": 5.888594338809739e-06, "loss": 0.6221, "step": 25197 }, { "epoch": 0.65, "grad_norm": 1.4828321933746338, "learning_rate": 5.887837618021362e-06, "loss": 0.4394, "step": 25198 }, { "epoch": 0.65, "grad_norm": 2.2780070304870605, "learning_rate": 5.887080925571425e-06, "loss": 0.6088, "step": 25199 }, { "epoch": 0.65, "grad_norm": 1.457108736038208, "learning_rate": 5.886324261465139e-06, "loss": 0.5968, "step": 25200 }, { "epoch": 0.65, "grad_norm": 0.9681143164634705, "learning_rate": 5.885567625707725e-06, "loss": 0.4033, "step": 25201 }, { "epoch": 0.65, "grad_norm": 7.411917209625244, "learning_rate": 5.884811018304394e-06, "loss": 0.6334, "step": 25202 }, { "epoch": 0.65, "grad_norm": 1.6282507181167603, "learning_rate": 5.884054439260358e-06, "loss": 0.6116, "step": 25203 }, { "epoch": 0.65, "grad_norm": 2.230984687805176, "learning_rate": 5.883297888580836e-06, "loss": 0.5633, "step": 25204 }, { "epoch": 0.65, "grad_norm": 12.18547248840332, "learning_rate": 5.882541366271035e-06, "loss": 0.5633, "step": 25205 }, { "epoch": 0.65, "grad_norm": 0.9765986800193787, "learning_rate": 5.881784872336176e-06, "loss": 0.5652, "step": 25206 }, { "epoch": 0.65, "grad_norm": 1.7205874919891357, "learning_rate": 5.881028406781464e-06, "loss": 0.4766, "step": 25207 }, { "epoch": 0.65, "grad_norm": 1.7111886739730835, "learning_rate": 5.880271969612119e-06, "loss": 0.6252, "step": 25208 }, { "epoch": 0.65, "grad_norm": 1.2383509874343872, "learning_rate": 5.879515560833351e-06, "loss": 0.5916, "step": 25209 }, { "epoch": 0.65, "grad_norm": 1.4809070825576782, "learning_rate": 5.878759180450373e-06, "loss": 0.5049, "step": 25210 }, { "epoch": 0.65, "grad_norm": 1.0324121713638306, "learning_rate": 5.878002828468399e-06, "loss": 0.3648, "step": 25211 }, { "epoch": 0.65, "grad_norm": 2.41084623336792, "learning_rate": 5.877246504892637e-06, "loss": 0.3215, "step": 25212 }, { "epoch": 0.65, "grad_norm": 2.105710744857788, "learning_rate": 5.876490209728302e-06, "loss": 0.5462, "step": 25213 }, { "epoch": 0.65, "grad_norm": 1.4498422145843506, "learning_rate": 5.875733942980608e-06, "loss": 0.5848, "step": 25214 }, { "epoch": 0.65, "grad_norm": 3.521045446395874, "learning_rate": 5.874977704654763e-06, "loss": 0.528, "step": 25215 }, { "epoch": 0.65, "grad_norm": 3.038331985473633, "learning_rate": 5.874221494755981e-06, "loss": 0.6062, "step": 25216 }, { "epoch": 0.65, "grad_norm": 1.3821297883987427, "learning_rate": 5.873465313289471e-06, "loss": 0.453, "step": 25217 }, { "epoch": 0.65, "grad_norm": 1.3330624103546143, "learning_rate": 5.872709160260448e-06, "loss": 0.6657, "step": 25218 }, { "epoch": 0.65, "grad_norm": 1.5554407835006714, "learning_rate": 5.871953035674119e-06, "loss": 0.537, "step": 25219 }, { "epoch": 0.65, "grad_norm": 4.1858696937561035, "learning_rate": 5.871196939535695e-06, "loss": 0.4988, "step": 25220 }, { "epoch": 0.65, "grad_norm": 1.6786174774169922, "learning_rate": 5.870440871850389e-06, "loss": 0.5996, "step": 25221 }, { "epoch": 0.65, "grad_norm": 6.619041919708252, "learning_rate": 5.86968483262341e-06, "loss": 0.6891, "step": 25222 }, { "epoch": 0.65, "grad_norm": 1.8112220764160156, "learning_rate": 5.86892882185997e-06, "loss": 0.5378, "step": 25223 }, { "epoch": 0.65, "grad_norm": 1.5920424461364746, "learning_rate": 5.868172839565274e-06, "loss": 0.5649, "step": 25224 }, { "epoch": 0.65, "grad_norm": 1.2844386100769043, "learning_rate": 5.8674168857445344e-06, "loss": 0.5675, "step": 25225 }, { "epoch": 0.65, "grad_norm": 1.509689211845398, "learning_rate": 5.866660960402965e-06, "loss": 0.5745, "step": 25226 }, { "epoch": 0.65, "grad_norm": 1.3383069038391113, "learning_rate": 5.865905063545768e-06, "loss": 0.6464, "step": 25227 }, { "epoch": 0.65, "grad_norm": 3.6285014152526855, "learning_rate": 5.865149195178159e-06, "loss": 0.7059, "step": 25228 }, { "epoch": 0.65, "grad_norm": 2.158773183822632, "learning_rate": 5.8643933553053425e-06, "loss": 0.548, "step": 25229 }, { "epoch": 0.65, "grad_norm": 1.4242208003997803, "learning_rate": 5.86363754393253e-06, "loss": 0.6048, "step": 25230 }, { "epoch": 0.65, "grad_norm": 3.654123306274414, "learning_rate": 5.8628817610649255e-06, "loss": 0.4928, "step": 25231 }, { "epoch": 0.65, "grad_norm": 4.492091178894043, "learning_rate": 5.8621260067077425e-06, "loss": 0.6533, "step": 25232 }, { "epoch": 0.65, "grad_norm": 1.5583163499832153, "learning_rate": 5.861370280866189e-06, "loss": 0.399, "step": 25233 }, { "epoch": 0.65, "grad_norm": 2.012850522994995, "learning_rate": 5.86061458354547e-06, "loss": 0.5731, "step": 25234 }, { "epoch": 0.65, "grad_norm": 1.3672974109649658, "learning_rate": 5.859858914750796e-06, "loss": 0.529, "step": 25235 }, { "epoch": 0.65, "grad_norm": 1.177297830581665, "learning_rate": 5.85910327448737e-06, "loss": 0.5854, "step": 25236 }, { "epoch": 0.65, "grad_norm": 2.1446685791015625, "learning_rate": 5.858347662760404e-06, "loss": 0.5974, "step": 25237 }, { "epoch": 0.65, "grad_norm": 5.002359390258789, "learning_rate": 5.857592079575105e-06, "loss": 0.5724, "step": 25238 }, { "epoch": 0.65, "grad_norm": 1.7138196229934692, "learning_rate": 5.856836524936677e-06, "loss": 0.4069, "step": 25239 }, { "epoch": 0.65, "grad_norm": 2.2130508422851562, "learning_rate": 5.8560809988503334e-06, "loss": 0.4831, "step": 25240 }, { "epoch": 0.65, "grad_norm": 3.454188346862793, "learning_rate": 5.855325501321272e-06, "loss": 0.5371, "step": 25241 }, { "epoch": 0.65, "grad_norm": 1.6742569208145142, "learning_rate": 5.854570032354705e-06, "loss": 0.5165, "step": 25242 }, { "epoch": 0.65, "grad_norm": 1.2606490850448608, "learning_rate": 5.853814591955835e-06, "loss": 0.5016, "step": 25243 }, { "epoch": 0.65, "grad_norm": 1.409637689590454, "learning_rate": 5.85305918012987e-06, "loss": 0.5623, "step": 25244 }, { "epoch": 0.65, "grad_norm": 1.3175554275512695, "learning_rate": 5.852303796882018e-06, "loss": 0.5549, "step": 25245 }, { "epoch": 0.65, "grad_norm": 1.1932992935180664, "learning_rate": 5.851548442217481e-06, "loss": 0.5426, "step": 25246 }, { "epoch": 0.65, "grad_norm": 2.862844944000244, "learning_rate": 5.850793116141466e-06, "loss": 0.6457, "step": 25247 }, { "epoch": 0.65, "grad_norm": 1.105074167251587, "learning_rate": 5.850037818659178e-06, "loss": 0.4277, "step": 25248 }, { "epoch": 0.65, "grad_norm": 3.840952157974243, "learning_rate": 5.849282549775821e-06, "loss": 0.6775, "step": 25249 }, { "epoch": 0.65, "grad_norm": 1.3282169103622437, "learning_rate": 5.848527309496603e-06, "loss": 0.6041, "step": 25250 }, { "epoch": 0.65, "grad_norm": 1.4054243564605713, "learning_rate": 5.8477720978267245e-06, "loss": 0.4864, "step": 25251 }, { "epoch": 0.65, "grad_norm": 2.080535411834717, "learning_rate": 5.847016914771395e-06, "loss": 0.6143, "step": 25252 }, { "epoch": 0.65, "grad_norm": 1.3392202854156494, "learning_rate": 5.846261760335813e-06, "loss": 0.4447, "step": 25253 }, { "epoch": 0.65, "grad_norm": 3.147535800933838, "learning_rate": 5.8455066345251845e-06, "loss": 0.5895, "step": 25254 }, { "epoch": 0.65, "grad_norm": 1.8696616888046265, "learning_rate": 5.844751537344717e-06, "loss": 0.6187, "step": 25255 }, { "epoch": 0.65, "grad_norm": 1.7895909547805786, "learning_rate": 5.84399646879961e-06, "loss": 0.7285, "step": 25256 }, { "epoch": 0.65, "grad_norm": 5.613438606262207, "learning_rate": 5.84324142889507e-06, "loss": 0.5783, "step": 25257 }, { "epoch": 0.65, "grad_norm": 2.3359262943267822, "learning_rate": 5.842486417636296e-06, "loss": 0.4457, "step": 25258 }, { "epoch": 0.65, "grad_norm": 1.6784600019454956, "learning_rate": 5.841731435028496e-06, "loss": 0.4499, "step": 25259 }, { "epoch": 0.65, "grad_norm": 7.514337062835693, "learning_rate": 5.840976481076868e-06, "loss": 0.5067, "step": 25260 }, { "epoch": 0.65, "grad_norm": 2.5031440258026123, "learning_rate": 5.840221555786618e-06, "loss": 0.5835, "step": 25261 }, { "epoch": 0.65, "grad_norm": 1.6442484855651855, "learning_rate": 5.839466659162949e-06, "loss": 0.3748, "step": 25262 }, { "epoch": 0.65, "grad_norm": 3.8494527339935303, "learning_rate": 5.838711791211059e-06, "loss": 0.4679, "step": 25263 }, { "epoch": 0.65, "grad_norm": 4.925861358642578, "learning_rate": 5.837956951936157e-06, "loss": 0.6903, "step": 25264 }, { "epoch": 0.65, "grad_norm": 1.6624335050582886, "learning_rate": 5.837202141343439e-06, "loss": 0.5396, "step": 25265 }, { "epoch": 0.65, "grad_norm": 2.64542555809021, "learning_rate": 5.836447359438104e-06, "loss": 0.6981, "step": 25266 }, { "epoch": 0.65, "grad_norm": 1.1466389894485474, "learning_rate": 5.835692606225365e-06, "loss": 0.4037, "step": 25267 }, { "epoch": 0.65, "grad_norm": 1.3116525411605835, "learning_rate": 5.8349378817104145e-06, "loss": 0.578, "step": 25268 }, { "epoch": 0.65, "grad_norm": 1.4169321060180664, "learning_rate": 5.83418318589845e-06, "loss": 0.4391, "step": 25269 }, { "epoch": 0.65, "grad_norm": 2.2487735748291016, "learning_rate": 5.833428518794684e-06, "loss": 0.5383, "step": 25270 }, { "epoch": 0.65, "grad_norm": 2.7831759452819824, "learning_rate": 5.832673880404309e-06, "loss": 0.6032, "step": 25271 }, { "epoch": 0.65, "grad_norm": 8.439457893371582, "learning_rate": 5.831919270732524e-06, "loss": 0.6204, "step": 25272 }, { "epoch": 0.65, "grad_norm": 2.644545793533325, "learning_rate": 5.831164689784538e-06, "loss": 0.6781, "step": 25273 }, { "epoch": 0.65, "grad_norm": 1.440387487411499, "learning_rate": 5.830410137565545e-06, "loss": 0.3672, "step": 25274 }, { "epoch": 0.65, "grad_norm": 1.6039905548095703, "learning_rate": 5.829655614080747e-06, "loss": 0.4487, "step": 25275 }, { "epoch": 0.65, "grad_norm": 1.6278258562088013, "learning_rate": 5.8289011193353375e-06, "loss": 0.6423, "step": 25276 }, { "epoch": 0.65, "grad_norm": 3.747316837310791, "learning_rate": 5.828146653334523e-06, "loss": 0.4456, "step": 25277 }, { "epoch": 0.65, "grad_norm": 5.466047763824463, "learning_rate": 5.827392216083503e-06, "loss": 0.6656, "step": 25278 }, { "epoch": 0.65, "grad_norm": 5.68079948425293, "learning_rate": 5.82663780758747e-06, "loss": 0.5687, "step": 25279 }, { "epoch": 0.65, "grad_norm": 1.5302592515945435, "learning_rate": 5.82588342785163e-06, "loss": 0.4472, "step": 25280 }, { "epoch": 0.65, "grad_norm": 1.1995093822479248, "learning_rate": 5.825129076881182e-06, "loss": 0.5087, "step": 25281 }, { "epoch": 0.65, "grad_norm": 10.83018970489502, "learning_rate": 5.824374754681319e-06, "loss": 0.7372, "step": 25282 }, { "epoch": 0.65, "grad_norm": 1.35413658618927, "learning_rate": 5.823620461257238e-06, "loss": 0.5822, "step": 25283 }, { "epoch": 0.65, "grad_norm": 1.6695611476898193, "learning_rate": 5.822866196614145e-06, "loss": 0.5365, "step": 25284 }, { "epoch": 0.65, "grad_norm": 1.5590263605117798, "learning_rate": 5.8221119607572355e-06, "loss": 0.5842, "step": 25285 }, { "epoch": 0.65, "grad_norm": 1.6153204441070557, "learning_rate": 5.821357753691698e-06, "loss": 0.5714, "step": 25286 }, { "epoch": 0.65, "grad_norm": 3.4997942447662354, "learning_rate": 5.820603575422745e-06, "loss": 0.7946, "step": 25287 }, { "epoch": 0.65, "grad_norm": 4.595487117767334, "learning_rate": 5.819849425955566e-06, "loss": 0.5509, "step": 25288 }, { "epoch": 0.65, "grad_norm": 2.541759967803955, "learning_rate": 5.819095305295357e-06, "loss": 0.5764, "step": 25289 }, { "epoch": 0.65, "grad_norm": 3.478438138961792, "learning_rate": 5.818341213447314e-06, "loss": 0.4435, "step": 25290 }, { "epoch": 0.65, "grad_norm": 1.523438572883606, "learning_rate": 5.81758715041664e-06, "loss": 0.5692, "step": 25291 }, { "epoch": 0.65, "grad_norm": 3.2132551670074463, "learning_rate": 5.816833116208525e-06, "loss": 0.6473, "step": 25292 }, { "epoch": 0.65, "grad_norm": 1.350190281867981, "learning_rate": 5.816079110828167e-06, "loss": 0.5932, "step": 25293 }, { "epoch": 0.65, "grad_norm": 1.428537130355835, "learning_rate": 5.815325134280766e-06, "loss": 0.638, "step": 25294 }, { "epoch": 0.65, "grad_norm": 1.574641227722168, "learning_rate": 5.814571186571516e-06, "loss": 0.5049, "step": 25295 }, { "epoch": 0.65, "grad_norm": 1.5973100662231445, "learning_rate": 5.813817267705606e-06, "loss": 0.5592, "step": 25296 }, { "epoch": 0.65, "grad_norm": 6.556861400604248, "learning_rate": 5.813063377688244e-06, "loss": 0.5143, "step": 25297 }, { "epoch": 0.65, "grad_norm": 1.1357918977737427, "learning_rate": 5.812309516524617e-06, "loss": 0.5525, "step": 25298 }, { "epoch": 0.65, "grad_norm": 0.9805474877357483, "learning_rate": 5.811555684219921e-06, "loss": 0.5051, "step": 25299 }, { "epoch": 0.65, "grad_norm": 3.8333568572998047, "learning_rate": 5.8108018807793495e-06, "loss": 0.5171, "step": 25300 }, { "epoch": 0.65, "grad_norm": 3.102834701538086, "learning_rate": 5.810048106208103e-06, "loss": 0.3926, "step": 25301 }, { "epoch": 0.65, "grad_norm": 3.3453617095947266, "learning_rate": 5.809294360511373e-06, "loss": 0.4705, "step": 25302 }, { "epoch": 0.65, "grad_norm": 3.762647867202759, "learning_rate": 5.808540643694349e-06, "loss": 0.5228, "step": 25303 }, { "epoch": 0.65, "grad_norm": 1.188834309577942, "learning_rate": 5.807786955762234e-06, "loss": 0.616, "step": 25304 }, { "epoch": 0.65, "grad_norm": 1.8630332946777344, "learning_rate": 5.807033296720216e-06, "loss": 0.6608, "step": 25305 }, { "epoch": 0.65, "grad_norm": 1.348597526550293, "learning_rate": 5.806279666573491e-06, "loss": 0.4522, "step": 25306 }, { "epoch": 0.65, "grad_norm": 2.044252872467041, "learning_rate": 5.805526065327248e-06, "loss": 0.5965, "step": 25307 }, { "epoch": 0.65, "grad_norm": 1.3785752058029175, "learning_rate": 5.804772492986688e-06, "loss": 0.4704, "step": 25308 }, { "epoch": 0.65, "grad_norm": 1.4043924808502197, "learning_rate": 5.804018949557001e-06, "loss": 0.5062, "step": 25309 }, { "epoch": 0.65, "grad_norm": 2.0446276664733887, "learning_rate": 5.803265435043379e-06, "loss": 0.6323, "step": 25310 }, { "epoch": 0.65, "grad_norm": 5.543822288513184, "learning_rate": 5.802511949451014e-06, "loss": 0.6274, "step": 25311 }, { "epoch": 0.65, "grad_norm": 1.9772521257400513, "learning_rate": 5.8017584927851e-06, "loss": 0.4545, "step": 25312 }, { "epoch": 0.65, "grad_norm": 1.297886610031128, "learning_rate": 5.8010050650508254e-06, "loss": 0.5836, "step": 25313 }, { "epoch": 0.65, "grad_norm": 5.288137435913086, "learning_rate": 5.800251666253389e-06, "loss": 0.715, "step": 25314 }, { "epoch": 0.65, "grad_norm": 1.520375370979309, "learning_rate": 5.799498296397981e-06, "loss": 0.5835, "step": 25315 }, { "epoch": 0.65, "grad_norm": 6.556519508361816, "learning_rate": 5.7987449554897856e-06, "loss": 0.4989, "step": 25316 }, { "epoch": 0.65, "grad_norm": 1.079140067100525, "learning_rate": 5.7979916435340065e-06, "loss": 0.4694, "step": 25317 }, { "epoch": 0.65, "grad_norm": 1.0181162357330322, "learning_rate": 5.797238360535828e-06, "loss": 0.4033, "step": 25318 }, { "epoch": 0.65, "grad_norm": 1.6355947256088257, "learning_rate": 5.796485106500443e-06, "loss": 0.6325, "step": 25319 }, { "epoch": 0.65, "grad_norm": 2.27970290184021, "learning_rate": 5.795731881433036e-06, "loss": 0.5302, "step": 25320 }, { "epoch": 0.65, "grad_norm": 3.3842170238494873, "learning_rate": 5.794978685338809e-06, "loss": 0.5699, "step": 25321 }, { "epoch": 0.65, "grad_norm": 1.6934586763381958, "learning_rate": 5.794225518222947e-06, "loss": 0.5868, "step": 25322 }, { "epoch": 0.65, "grad_norm": 1.5019396543502808, "learning_rate": 5.793472380090636e-06, "loss": 0.561, "step": 25323 }, { "epoch": 0.65, "grad_norm": 1.3681429624557495, "learning_rate": 5.792719270947074e-06, "loss": 0.5691, "step": 25324 }, { "epoch": 0.65, "grad_norm": 1.742576003074646, "learning_rate": 5.791966190797449e-06, "loss": 0.4633, "step": 25325 }, { "epoch": 0.65, "grad_norm": 1.3200032711029053, "learning_rate": 5.791213139646944e-06, "loss": 0.5505, "step": 25326 }, { "epoch": 0.65, "grad_norm": 1.5873513221740723, "learning_rate": 5.790460117500758e-06, "loss": 0.5811, "step": 25327 }, { "epoch": 0.65, "grad_norm": 1.2765029668807983, "learning_rate": 5.789707124364077e-06, "loss": 0.5099, "step": 25328 }, { "epoch": 0.65, "grad_norm": 2.7367074489593506, "learning_rate": 5.7889541602420885e-06, "loss": 0.6122, "step": 25329 }, { "epoch": 0.65, "grad_norm": 2.125058174133301, "learning_rate": 5.7882012251399786e-06, "loss": 0.5664, "step": 25330 }, { "epoch": 0.65, "grad_norm": 4.426790237426758, "learning_rate": 5.787448319062944e-06, "loss": 0.6452, "step": 25331 }, { "epoch": 0.65, "grad_norm": 1.6640230417251587, "learning_rate": 5.786695442016168e-06, "loss": 0.5304, "step": 25332 }, { "epoch": 0.65, "grad_norm": 1.1802781820297241, "learning_rate": 5.785942594004837e-06, "loss": 0.5068, "step": 25333 }, { "epoch": 0.65, "grad_norm": 1.2890539169311523, "learning_rate": 5.7851897750341456e-06, "loss": 0.6982, "step": 25334 }, { "epoch": 0.65, "grad_norm": 1.8977513313293457, "learning_rate": 5.784436985109279e-06, "loss": 0.507, "step": 25335 }, { "epoch": 0.65, "grad_norm": 1.4666426181793213, "learning_rate": 5.783684224235424e-06, "loss": 0.6291, "step": 25336 }, { "epoch": 0.65, "grad_norm": 1.4166767597198486, "learning_rate": 5.782931492417763e-06, "loss": 0.518, "step": 25337 }, { "epoch": 0.65, "grad_norm": 5.998589515686035, "learning_rate": 5.782178789661494e-06, "loss": 0.8094, "step": 25338 }, { "epoch": 0.65, "grad_norm": 1.779032826423645, "learning_rate": 5.7814261159717986e-06, "loss": 0.775, "step": 25339 }, { "epoch": 0.65, "grad_norm": 1.3353886604309082, "learning_rate": 5.78067347135386e-06, "loss": 0.566, "step": 25340 }, { "epoch": 0.65, "grad_norm": 12.2741117477417, "learning_rate": 5.779920855812872e-06, "loss": 0.6365, "step": 25341 }, { "epoch": 0.65, "grad_norm": 1.4003076553344727, "learning_rate": 5.7791682693540205e-06, "loss": 0.467, "step": 25342 }, { "epoch": 0.65, "grad_norm": 9.520569801330566, "learning_rate": 5.778415711982485e-06, "loss": 0.4852, "step": 25343 }, { "epoch": 0.65, "grad_norm": 4.69835901260376, "learning_rate": 5.777663183703459e-06, "loss": 0.6072, "step": 25344 }, { "epoch": 0.65, "grad_norm": 1.830662488937378, "learning_rate": 5.776910684522125e-06, "loss": 0.4309, "step": 25345 }, { "epoch": 0.65, "grad_norm": 1.5723479986190796, "learning_rate": 5.776158214443671e-06, "loss": 0.466, "step": 25346 }, { "epoch": 0.65, "grad_norm": 1.4079424142837524, "learning_rate": 5.775405773473277e-06, "loss": 0.6144, "step": 25347 }, { "epoch": 0.65, "grad_norm": 1.7156975269317627, "learning_rate": 5.774653361616136e-06, "loss": 0.5642, "step": 25348 }, { "epoch": 0.65, "grad_norm": 2.0011146068573, "learning_rate": 5.773900978877428e-06, "loss": 0.6846, "step": 25349 }, { "epoch": 0.65, "grad_norm": 1.5281871557235718, "learning_rate": 5.773148625262337e-06, "loss": 0.6492, "step": 25350 }, { "epoch": 0.65, "grad_norm": 1.4049580097198486, "learning_rate": 5.772396300776053e-06, "loss": 0.4503, "step": 25351 }, { "epoch": 0.65, "grad_norm": 2.9571354389190674, "learning_rate": 5.771644005423758e-06, "loss": 0.6492, "step": 25352 }, { "epoch": 0.65, "grad_norm": 1.9058681726455688, "learning_rate": 5.770891739210636e-06, "loss": 0.4836, "step": 25353 }, { "epoch": 0.65, "grad_norm": 1.4006925821304321, "learning_rate": 5.770139502141867e-06, "loss": 0.4953, "step": 25354 }, { "epoch": 0.65, "grad_norm": 2.6899197101593018, "learning_rate": 5.7693872942226435e-06, "loss": 0.7283, "step": 25355 }, { "epoch": 0.65, "grad_norm": 1.2756539583206177, "learning_rate": 5.7686351154581454e-06, "loss": 0.7074, "step": 25356 }, { "epoch": 0.65, "grad_norm": 1.8075677156448364, "learning_rate": 5.767882965853551e-06, "loss": 0.5181, "step": 25357 }, { "epoch": 0.65, "grad_norm": 1.6862396001815796, "learning_rate": 5.767130845414052e-06, "loss": 0.5644, "step": 25358 }, { "epoch": 0.65, "grad_norm": 1.2522141933441162, "learning_rate": 5.7663787541448276e-06, "loss": 0.4584, "step": 25359 }, { "epoch": 0.65, "grad_norm": 1.4604030847549438, "learning_rate": 5.765626692051062e-06, "loss": 0.592, "step": 25360 }, { "epoch": 0.65, "grad_norm": 2.9499406814575195, "learning_rate": 5.764874659137931e-06, "loss": 0.5638, "step": 25361 }, { "epoch": 0.65, "grad_norm": 1.828281044960022, "learning_rate": 5.764122655410629e-06, "loss": 0.5863, "step": 25362 }, { "epoch": 0.65, "grad_norm": 1.5291333198547363, "learning_rate": 5.763370680874332e-06, "loss": 0.5862, "step": 25363 }, { "epoch": 0.65, "grad_norm": 1.410314679145813, "learning_rate": 5.762618735534219e-06, "loss": 0.6829, "step": 25364 }, { "epoch": 0.65, "grad_norm": 1.2993968725204468, "learning_rate": 5.761866819395479e-06, "loss": 0.4993, "step": 25365 }, { "epoch": 0.65, "grad_norm": 3.7011499404907227, "learning_rate": 5.76111493246329e-06, "loss": 0.6031, "step": 25366 }, { "epoch": 0.65, "grad_norm": 1.3582262992858887, "learning_rate": 5.76036307474283e-06, "loss": 0.557, "step": 25367 }, { "epoch": 0.65, "grad_norm": 1.6538021564483643, "learning_rate": 5.759611246239289e-06, "loss": 0.6693, "step": 25368 }, { "epoch": 0.65, "grad_norm": 2.6636834144592285, "learning_rate": 5.7588594469578415e-06, "loss": 0.47, "step": 25369 }, { "epoch": 0.65, "grad_norm": 4.231945514678955, "learning_rate": 5.75810767690367e-06, "loss": 0.4558, "step": 25370 }, { "epoch": 0.65, "grad_norm": 1.4361683130264282, "learning_rate": 5.757355936081954e-06, "loss": 0.5923, "step": 25371 }, { "epoch": 0.65, "grad_norm": 7.05622673034668, "learning_rate": 5.756604224497877e-06, "loss": 0.5668, "step": 25372 }, { "epoch": 0.65, "grad_norm": 1.8275682926177979, "learning_rate": 5.755852542156619e-06, "loss": 0.5033, "step": 25373 }, { "epoch": 0.65, "grad_norm": 1.2617799043655396, "learning_rate": 5.755100889063354e-06, "loss": 0.5082, "step": 25374 }, { "epoch": 0.65, "grad_norm": 1.982426643371582, "learning_rate": 5.7543492652232715e-06, "loss": 0.4047, "step": 25375 }, { "epoch": 0.65, "grad_norm": 1.725106120109558, "learning_rate": 5.753597670641545e-06, "loss": 0.5096, "step": 25376 }, { "epoch": 0.65, "grad_norm": 1.5382519960403442, "learning_rate": 5.7528461053233574e-06, "loss": 0.5648, "step": 25377 }, { "epoch": 0.65, "grad_norm": 2.9003682136535645, "learning_rate": 5.752094569273881e-06, "loss": 0.7283, "step": 25378 }, { "epoch": 0.65, "grad_norm": 2.759016752243042, "learning_rate": 5.7513430624983044e-06, "loss": 0.6597, "step": 25379 }, { "epoch": 0.65, "grad_norm": 1.1422847509384155, "learning_rate": 5.7505915850018016e-06, "loss": 0.5261, "step": 25380 }, { "epoch": 0.65, "grad_norm": 2.7694544792175293, "learning_rate": 5.7498401367895485e-06, "loss": 0.5023, "step": 25381 }, { "epoch": 0.65, "grad_norm": 3.5001590251922607, "learning_rate": 5.749088717866731e-06, "loss": 0.7383, "step": 25382 }, { "epoch": 0.65, "grad_norm": 3.6203486919403076, "learning_rate": 5.748337328238522e-06, "loss": 0.664, "step": 25383 }, { "epoch": 0.65, "grad_norm": 2.0638439655303955, "learning_rate": 5.747585967910103e-06, "loss": 0.6493, "step": 25384 }, { "epoch": 0.65, "grad_norm": 1.2958879470825195, "learning_rate": 5.746834636886645e-06, "loss": 0.4371, "step": 25385 }, { "epoch": 0.65, "grad_norm": 3.992706298828125, "learning_rate": 5.746083335173334e-06, "loss": 0.5323, "step": 25386 }, { "epoch": 0.65, "grad_norm": 2.0158803462982178, "learning_rate": 5.745332062775345e-06, "loss": 0.4744, "step": 25387 }, { "epoch": 0.65, "grad_norm": 3.3709938526153564, "learning_rate": 5.74458081969785e-06, "loss": 0.5445, "step": 25388 }, { "epoch": 0.65, "grad_norm": 1.126595139503479, "learning_rate": 5.743829605946034e-06, "loss": 0.513, "step": 25389 }, { "epoch": 0.65, "grad_norm": 2.7602574825286865, "learning_rate": 5.743078421525072e-06, "loss": 0.6859, "step": 25390 }, { "epoch": 0.65, "grad_norm": 1.2456026077270508, "learning_rate": 5.742327266440133e-06, "loss": 0.5028, "step": 25391 }, { "epoch": 0.65, "grad_norm": 1.378629207611084, "learning_rate": 5.7415761406964056e-06, "loss": 0.408, "step": 25392 }, { "epoch": 0.65, "grad_norm": 1.2728195190429688, "learning_rate": 5.7408250442990585e-06, "loss": 0.6555, "step": 25393 }, { "epoch": 0.65, "grad_norm": 1.826073408126831, "learning_rate": 5.74007397725327e-06, "loss": 0.4298, "step": 25394 }, { "epoch": 0.65, "grad_norm": 1.5497264862060547, "learning_rate": 5.739322939564212e-06, "loss": 0.5024, "step": 25395 }, { "epoch": 0.65, "grad_norm": 1.2914520502090454, "learning_rate": 5.7385719312370645e-06, "loss": 0.5467, "step": 25396 }, { "epoch": 0.65, "grad_norm": 1.1426303386688232, "learning_rate": 5.7378209522770045e-06, "loss": 0.5727, "step": 25397 }, { "epoch": 0.65, "grad_norm": 1.3315966129302979, "learning_rate": 5.7370700026892e-06, "loss": 0.4749, "step": 25398 }, { "epoch": 0.65, "grad_norm": 1.7455965280532837, "learning_rate": 5.736319082478836e-06, "loss": 0.4232, "step": 25399 }, { "epoch": 0.65, "grad_norm": 3.207623243331909, "learning_rate": 5.7355681916510805e-06, "loss": 0.7538, "step": 25400 }, { "epoch": 0.65, "grad_norm": 1.056673288345337, "learning_rate": 5.734817330211111e-06, "loss": 0.4196, "step": 25401 }, { "epoch": 0.65, "grad_norm": 1.2008978128433228, "learning_rate": 5.734066498164096e-06, "loss": 0.4124, "step": 25402 }, { "epoch": 0.65, "grad_norm": 1.3714330196380615, "learning_rate": 5.733315695515219e-06, "loss": 0.5319, "step": 25403 }, { "epoch": 0.65, "grad_norm": 1.3448030948638916, "learning_rate": 5.73256492226965e-06, "loss": 0.6426, "step": 25404 }, { "epoch": 0.65, "grad_norm": 3.2030277252197266, "learning_rate": 5.731814178432558e-06, "loss": 0.8185, "step": 25405 }, { "epoch": 0.65, "grad_norm": 1.3343716859817505, "learning_rate": 5.731063464009125e-06, "loss": 0.4439, "step": 25406 }, { "epoch": 0.65, "grad_norm": 1.8431065082550049, "learning_rate": 5.730312779004521e-06, "loss": 0.5007, "step": 25407 }, { "epoch": 0.65, "grad_norm": 1.2018704414367676, "learning_rate": 5.729562123423914e-06, "loss": 0.4774, "step": 25408 }, { "epoch": 0.65, "grad_norm": 1.514675498008728, "learning_rate": 5.728811497272486e-06, "loss": 0.5082, "step": 25409 }, { "epoch": 0.65, "grad_norm": 1.4151431322097778, "learning_rate": 5.7280609005554074e-06, "loss": 0.5466, "step": 25410 }, { "epoch": 0.65, "grad_norm": 5.146805286407471, "learning_rate": 5.727310333277847e-06, "loss": 0.5956, "step": 25411 }, { "epoch": 0.65, "grad_norm": 1.242844581604004, "learning_rate": 5.726559795444977e-06, "loss": 0.5416, "step": 25412 }, { "epoch": 0.65, "grad_norm": 1.4777703285217285, "learning_rate": 5.725809287061976e-06, "loss": 0.697, "step": 25413 }, { "epoch": 0.65, "grad_norm": 1.3081408739089966, "learning_rate": 5.725058808134012e-06, "loss": 0.3865, "step": 25414 }, { "epoch": 0.65, "grad_norm": 1.550767183303833, "learning_rate": 5.724308358666252e-06, "loss": 0.4195, "step": 25415 }, { "epoch": 0.65, "grad_norm": 1.160067081451416, "learning_rate": 5.7235579386638776e-06, "loss": 0.6572, "step": 25416 }, { "epoch": 0.65, "grad_norm": 1.305594563484192, "learning_rate": 5.7228075481320545e-06, "loss": 0.4401, "step": 25417 }, { "epoch": 0.65, "grad_norm": 1.993435025215149, "learning_rate": 5.722057187075953e-06, "loss": 0.555, "step": 25418 }, { "epoch": 0.65, "grad_norm": 1.619553565979004, "learning_rate": 5.7213068555007444e-06, "loss": 0.5892, "step": 25419 }, { "epoch": 0.65, "grad_norm": 1.5752558708190918, "learning_rate": 5.720556553411604e-06, "loss": 0.5618, "step": 25420 }, { "epoch": 0.65, "grad_norm": 7.3754563331604, "learning_rate": 5.719806280813699e-06, "loss": 0.6464, "step": 25421 }, { "epoch": 0.65, "grad_norm": 3.021498918533325, "learning_rate": 5.719056037712196e-06, "loss": 0.8079, "step": 25422 }, { "epoch": 0.65, "grad_norm": 6.328460693359375, "learning_rate": 5.718305824112273e-06, "loss": 0.6263, "step": 25423 }, { "epoch": 0.65, "grad_norm": 2.978912830352783, "learning_rate": 5.7175556400190965e-06, "loss": 0.531, "step": 25424 }, { "epoch": 0.65, "grad_norm": 3.288076877593994, "learning_rate": 5.716805485437836e-06, "loss": 0.5874, "step": 25425 }, { "epoch": 0.65, "grad_norm": 3.54313063621521, "learning_rate": 5.716055360373658e-06, "loss": 0.7807, "step": 25426 }, { "epoch": 0.65, "grad_norm": 1.4213918447494507, "learning_rate": 5.715305264831738e-06, "loss": 0.6319, "step": 25427 }, { "epoch": 0.65, "grad_norm": 1.6246435642242432, "learning_rate": 5.714555198817242e-06, "loss": 0.5394, "step": 25428 }, { "epoch": 0.65, "grad_norm": 1.3759394884109497, "learning_rate": 5.713805162335335e-06, "loss": 0.5955, "step": 25429 }, { "epoch": 0.65, "grad_norm": 1.175696611404419, "learning_rate": 5.713055155391195e-06, "loss": 0.5307, "step": 25430 }, { "epoch": 0.65, "grad_norm": 9.52476692199707, "learning_rate": 5.712305177989985e-06, "loss": 0.6984, "step": 25431 }, { "epoch": 0.65, "grad_norm": 1.0780023336410522, "learning_rate": 5.7115552301368705e-06, "loss": 0.5885, "step": 25432 }, { "epoch": 0.65, "grad_norm": 1.2243216037750244, "learning_rate": 5.710805311837027e-06, "loss": 0.6229, "step": 25433 }, { "epoch": 0.65, "grad_norm": 2.4149694442749023, "learning_rate": 5.710055423095618e-06, "loss": 0.5847, "step": 25434 }, { "epoch": 0.65, "grad_norm": 5.781997203826904, "learning_rate": 5.7093055639178115e-06, "loss": 0.5051, "step": 25435 }, { "epoch": 0.65, "grad_norm": 2.250159740447998, "learning_rate": 5.708555734308772e-06, "loss": 0.6145, "step": 25436 }, { "epoch": 0.65, "grad_norm": 1.0536361932754517, "learning_rate": 5.707805934273673e-06, "loss": 0.3742, "step": 25437 }, { "epoch": 0.65, "grad_norm": 1.097359538078308, "learning_rate": 5.707056163817681e-06, "loss": 0.4782, "step": 25438 }, { "epoch": 0.65, "grad_norm": 8.845572471618652, "learning_rate": 5.706306422945955e-06, "loss": 0.5436, "step": 25439 }, { "epoch": 0.65, "grad_norm": 1.1526895761489868, "learning_rate": 5.7055567116636734e-06, "loss": 0.4671, "step": 25440 }, { "epoch": 0.65, "grad_norm": 1.3460278511047363, "learning_rate": 5.704807029975996e-06, "loss": 0.3598, "step": 25441 }, { "epoch": 0.65, "grad_norm": 1.8301767110824585, "learning_rate": 5.70405737788809e-06, "loss": 0.6363, "step": 25442 }, { "epoch": 0.65, "grad_norm": 1.8864692449569702, "learning_rate": 5.703307755405118e-06, "loss": 0.4582, "step": 25443 }, { "epoch": 0.65, "grad_norm": 2.138296604156494, "learning_rate": 5.7025581625322525e-06, "loss": 0.6188, "step": 25444 }, { "epoch": 0.65, "grad_norm": 12.418617248535156, "learning_rate": 5.701808599274656e-06, "loss": 0.4588, "step": 25445 }, { "epoch": 0.65, "grad_norm": 1.0544333457946777, "learning_rate": 5.701059065637491e-06, "loss": 0.5794, "step": 25446 }, { "epoch": 0.65, "grad_norm": 1.5708993673324585, "learning_rate": 5.700309561625931e-06, "loss": 0.5926, "step": 25447 }, { "epoch": 0.65, "grad_norm": 1.6898672580718994, "learning_rate": 5.699560087245135e-06, "loss": 0.4709, "step": 25448 }, { "epoch": 0.65, "grad_norm": 1.5632346868515015, "learning_rate": 5.698810642500265e-06, "loss": 0.5856, "step": 25449 }, { "epoch": 0.65, "grad_norm": 1.5337467193603516, "learning_rate": 5.698061227396494e-06, "loss": 0.4809, "step": 25450 }, { "epoch": 0.65, "grad_norm": 2.22277569770813, "learning_rate": 5.697311841938983e-06, "loss": 0.5518, "step": 25451 }, { "epoch": 0.65, "grad_norm": 1.2928870916366577, "learning_rate": 5.696562486132895e-06, "loss": 0.4827, "step": 25452 }, { "epoch": 0.65, "grad_norm": 1.3301492929458618, "learning_rate": 5.69581315998339e-06, "loss": 0.6526, "step": 25453 }, { "epoch": 0.65, "grad_norm": 1.1316559314727783, "learning_rate": 5.695063863495641e-06, "loss": 0.4704, "step": 25454 }, { "epoch": 0.65, "grad_norm": 1.6019619703292847, "learning_rate": 5.694314596674808e-06, "loss": 0.5973, "step": 25455 }, { "epoch": 0.65, "grad_norm": 2.8776214122772217, "learning_rate": 5.693565359526048e-06, "loss": 0.7817, "step": 25456 }, { "epoch": 0.65, "grad_norm": 1.229754090309143, "learning_rate": 5.692816152054536e-06, "loss": 0.5673, "step": 25457 }, { "epoch": 0.65, "grad_norm": 1.4854809045791626, "learning_rate": 5.692066974265428e-06, "loss": 0.6346, "step": 25458 }, { "epoch": 0.65, "grad_norm": 1.6555429697036743, "learning_rate": 5.691317826163888e-06, "loss": 0.4823, "step": 25459 }, { "epoch": 0.65, "grad_norm": 1.4571233987808228, "learning_rate": 5.690568707755075e-06, "loss": 0.7016, "step": 25460 }, { "epoch": 0.65, "grad_norm": 1.2486217021942139, "learning_rate": 5.689819619044158e-06, "loss": 0.5553, "step": 25461 }, { "epoch": 0.65, "grad_norm": 0.9939311742782593, "learning_rate": 5.689070560036297e-06, "loss": 0.4438, "step": 25462 }, { "epoch": 0.65, "grad_norm": 2.941274642944336, "learning_rate": 5.688321530736649e-06, "loss": 0.5481, "step": 25463 }, { "epoch": 0.65, "grad_norm": 2.0319950580596924, "learning_rate": 5.687572531150385e-06, "loss": 0.5079, "step": 25464 }, { "epoch": 0.65, "grad_norm": 2.2578535079956055, "learning_rate": 5.6868235612826605e-06, "loss": 0.6855, "step": 25465 }, { "epoch": 0.65, "grad_norm": 1.3968998193740845, "learning_rate": 5.686074621138639e-06, "loss": 0.5813, "step": 25466 }, { "epoch": 0.65, "grad_norm": 1.696299433708191, "learning_rate": 5.685325710723477e-06, "loss": 0.4632, "step": 25467 }, { "epoch": 0.65, "grad_norm": 1.1480541229248047, "learning_rate": 5.684576830042343e-06, "loss": 0.6215, "step": 25468 }, { "epoch": 0.65, "grad_norm": 1.431247353553772, "learning_rate": 5.683827979100393e-06, "loss": 0.5435, "step": 25469 }, { "epoch": 0.65, "grad_norm": 9.426025390625, "learning_rate": 5.6830791579027876e-06, "loss": 0.7144, "step": 25470 }, { "epoch": 0.65, "grad_norm": 1.7259780168533325, "learning_rate": 5.6823303664546916e-06, "loss": 0.4519, "step": 25471 }, { "epoch": 0.65, "grad_norm": 1.8033379316329956, "learning_rate": 5.681581604761261e-06, "loss": 0.5973, "step": 25472 }, { "epoch": 0.65, "grad_norm": 1.1312808990478516, "learning_rate": 5.680832872827654e-06, "loss": 0.4856, "step": 25473 }, { "epoch": 0.65, "grad_norm": 2.7000021934509277, "learning_rate": 5.680084170659037e-06, "loss": 0.7033, "step": 25474 }, { "epoch": 0.65, "grad_norm": 1.4646856784820557, "learning_rate": 5.679335498260565e-06, "loss": 0.5167, "step": 25475 }, { "epoch": 0.65, "grad_norm": 2.3203892707824707, "learning_rate": 5.6785868556374e-06, "loss": 0.4956, "step": 25476 }, { "epoch": 0.65, "grad_norm": 1.2785338163375854, "learning_rate": 5.677838242794694e-06, "loss": 0.5055, "step": 25477 }, { "epoch": 0.65, "grad_norm": 0.9756854772567749, "learning_rate": 5.677089659737616e-06, "loss": 0.5618, "step": 25478 }, { "epoch": 0.65, "grad_norm": 1.0236103534698486, "learning_rate": 5.676341106471319e-06, "loss": 0.4042, "step": 25479 }, { "epoch": 0.65, "grad_norm": 3.2680647373199463, "learning_rate": 5.67559258300096e-06, "loss": 0.4595, "step": 25480 }, { "epoch": 0.65, "grad_norm": 2.045853614807129, "learning_rate": 5.674844089331704e-06, "loss": 0.4924, "step": 25481 }, { "epoch": 0.65, "grad_norm": 7.220934867858887, "learning_rate": 5.674095625468705e-06, "loss": 0.7148, "step": 25482 }, { "epoch": 0.65, "grad_norm": 1.2770659923553467, "learning_rate": 5.67334719141712e-06, "loss": 0.6182, "step": 25483 }, { "epoch": 0.65, "grad_norm": 3.861689329147339, "learning_rate": 5.672598787182107e-06, "loss": 0.5238, "step": 25484 }, { "epoch": 0.65, "grad_norm": 1.5202075242996216, "learning_rate": 5.671850412768825e-06, "loss": 0.6731, "step": 25485 }, { "epoch": 0.65, "grad_norm": 2.8771374225616455, "learning_rate": 5.671102068182432e-06, "loss": 0.5761, "step": 25486 }, { "epoch": 0.65, "grad_norm": 1.5803526639938354, "learning_rate": 5.67035375342808e-06, "loss": 0.52, "step": 25487 }, { "epoch": 0.65, "grad_norm": 1.9094241857528687, "learning_rate": 5.6696054685109335e-06, "loss": 0.5156, "step": 25488 }, { "epoch": 0.65, "grad_norm": 1.962876796722412, "learning_rate": 5.6688572134361455e-06, "loss": 0.4525, "step": 25489 }, { "epoch": 0.65, "grad_norm": 5.914521217346191, "learning_rate": 5.668108988208869e-06, "loss": 0.6384, "step": 25490 }, { "epoch": 0.65, "grad_norm": 1.1377094984054565, "learning_rate": 5.6673607928342675e-06, "loss": 0.689, "step": 25491 }, { "epoch": 0.65, "grad_norm": 1.8516106605529785, "learning_rate": 5.666612627317494e-06, "loss": 0.5084, "step": 25492 }, { "epoch": 0.65, "grad_norm": 1.0877947807312012, "learning_rate": 5.665864491663703e-06, "loss": 0.4583, "step": 25493 }, { "epoch": 0.65, "grad_norm": 1.9869873523712158, "learning_rate": 5.665116385878047e-06, "loss": 0.5448, "step": 25494 }, { "epoch": 0.65, "grad_norm": 1.5165762901306152, "learning_rate": 5.664368309965688e-06, "loss": 0.4596, "step": 25495 }, { "epoch": 0.65, "grad_norm": 1.2975252866744995, "learning_rate": 5.663620263931782e-06, "loss": 0.4827, "step": 25496 }, { "epoch": 0.65, "grad_norm": 1.2583160400390625, "learning_rate": 5.6628722477814745e-06, "loss": 0.5423, "step": 25497 }, { "epoch": 0.65, "grad_norm": 1.5452085733413696, "learning_rate": 5.662124261519931e-06, "loss": 0.5183, "step": 25498 }, { "epoch": 0.65, "grad_norm": 1.005971074104309, "learning_rate": 5.661376305152302e-06, "loss": 0.5315, "step": 25499 }, { "epoch": 0.65, "grad_norm": 2.875239372253418, "learning_rate": 5.660628378683741e-06, "loss": 0.489, "step": 25500 }, { "epoch": 0.65, "grad_norm": 1.73557448387146, "learning_rate": 5.659880482119399e-06, "loss": 0.5958, "step": 25501 }, { "epoch": 0.65, "grad_norm": 1.6961109638214111, "learning_rate": 5.65913261546444e-06, "loss": 0.5568, "step": 25502 }, { "epoch": 0.65, "grad_norm": 1.6125389337539673, "learning_rate": 5.658384778724011e-06, "loss": 0.4249, "step": 25503 }, { "epoch": 0.65, "grad_norm": 1.9177334308624268, "learning_rate": 5.657636971903262e-06, "loss": 0.5285, "step": 25504 }, { "epoch": 0.65, "grad_norm": 1.8627591133117676, "learning_rate": 5.656889195007354e-06, "loss": 0.7268, "step": 25505 }, { "epoch": 0.65, "grad_norm": 1.6925538778305054, "learning_rate": 5.656141448041438e-06, "loss": 0.5938, "step": 25506 }, { "epoch": 0.65, "grad_norm": 1.2958067655563354, "learning_rate": 5.655393731010665e-06, "loss": 0.5956, "step": 25507 }, { "epoch": 0.65, "grad_norm": 1.8563356399536133, "learning_rate": 5.654646043920186e-06, "loss": 0.4528, "step": 25508 }, { "epoch": 0.65, "grad_norm": 1.0018742084503174, "learning_rate": 5.6538983867751606e-06, "loss": 0.5352, "step": 25509 }, { "epoch": 0.65, "grad_norm": 1.573749303817749, "learning_rate": 5.653150759580737e-06, "loss": 0.3898, "step": 25510 }, { "epoch": 0.65, "grad_norm": 2.1315805912017822, "learning_rate": 5.652403162342061e-06, "loss": 0.4832, "step": 25511 }, { "epoch": 0.65, "grad_norm": 1.5385745763778687, "learning_rate": 5.651655595064298e-06, "loss": 0.4765, "step": 25512 }, { "epoch": 0.65, "grad_norm": 1.6365654468536377, "learning_rate": 5.650908057752591e-06, "loss": 0.5834, "step": 25513 }, { "epoch": 0.65, "grad_norm": 1.0640454292297363, "learning_rate": 5.650160550412091e-06, "loss": 0.2901, "step": 25514 }, { "epoch": 0.65, "grad_norm": 1.609991431236267, "learning_rate": 5.649413073047954e-06, "loss": 0.523, "step": 25515 }, { "epoch": 0.65, "grad_norm": 1.3820528984069824, "learning_rate": 5.648665625665329e-06, "loss": 0.5214, "step": 25516 }, { "epoch": 0.65, "grad_norm": 1.0364891290664673, "learning_rate": 5.647918208269367e-06, "loss": 0.5031, "step": 25517 }, { "epoch": 0.65, "grad_norm": 2.2158684730529785, "learning_rate": 5.647170820865216e-06, "loss": 0.4112, "step": 25518 }, { "epoch": 0.65, "grad_norm": 1.4180076122283936, "learning_rate": 5.6464234634580305e-06, "loss": 0.5754, "step": 25519 }, { "epoch": 0.65, "grad_norm": 1.3202632665634155, "learning_rate": 5.64567613605296e-06, "loss": 0.4571, "step": 25520 }, { "epoch": 0.65, "grad_norm": 4.235408306121826, "learning_rate": 5.64492883865515e-06, "loss": 0.8134, "step": 25521 }, { "epoch": 0.65, "grad_norm": 1.8963395357131958, "learning_rate": 5.644181571269759e-06, "loss": 0.7012, "step": 25522 }, { "epoch": 0.65, "grad_norm": 1.3116297721862793, "learning_rate": 5.643434333901932e-06, "loss": 0.5887, "step": 25523 }, { "epoch": 0.65, "grad_norm": 2.7638676166534424, "learning_rate": 5.642687126556819e-06, "loss": 0.6861, "step": 25524 }, { "epoch": 0.65, "grad_norm": 2.500619649887085, "learning_rate": 5.641939949239563e-06, "loss": 0.6533, "step": 25525 }, { "epoch": 0.65, "grad_norm": 9.593958854675293, "learning_rate": 5.641192801955324e-06, "loss": 0.5205, "step": 25526 }, { "epoch": 0.65, "grad_norm": 1.7442675828933716, "learning_rate": 5.640445684709246e-06, "loss": 0.4408, "step": 25527 }, { "epoch": 0.65, "grad_norm": 1.5026541948318481, "learning_rate": 5.639698597506472e-06, "loss": 0.4981, "step": 25528 }, { "epoch": 0.65, "grad_norm": 5.893008708953857, "learning_rate": 5.638951540352161e-06, "loss": 0.5775, "step": 25529 }, { "epoch": 0.65, "grad_norm": 3.2821803092956543, "learning_rate": 5.6382045132514555e-06, "loss": 0.4187, "step": 25530 }, { "epoch": 0.65, "grad_norm": 1.9167087078094482, "learning_rate": 5.637457516209501e-06, "loss": 0.6015, "step": 25531 }, { "epoch": 0.65, "grad_norm": 1.1722487211227417, "learning_rate": 5.636710549231452e-06, "loss": 0.5057, "step": 25532 }, { "epoch": 0.65, "grad_norm": 1.4959580898284912, "learning_rate": 5.635963612322452e-06, "loss": 0.4669, "step": 25533 }, { "epoch": 0.65, "grad_norm": 1.8390655517578125, "learning_rate": 5.635216705487649e-06, "loss": 0.5554, "step": 25534 }, { "epoch": 0.65, "grad_norm": 0.9693142771720886, "learning_rate": 5.634469828732187e-06, "loss": 0.4996, "step": 25535 }, { "epoch": 0.65, "grad_norm": 6.734084606170654, "learning_rate": 5.633722982061221e-06, "loss": 0.5539, "step": 25536 }, { "epoch": 0.65, "grad_norm": 1.905752420425415, "learning_rate": 5.632976165479892e-06, "loss": 0.5396, "step": 25537 }, { "epoch": 0.65, "grad_norm": 9.316946983337402, "learning_rate": 5.632229378993343e-06, "loss": 0.5671, "step": 25538 }, { "epoch": 0.65, "grad_norm": 1.1218583583831787, "learning_rate": 5.631482622606731e-06, "loss": 0.5059, "step": 25539 }, { "epoch": 0.65, "grad_norm": 3.15391206741333, "learning_rate": 5.6307358963251945e-06, "loss": 0.518, "step": 25540 }, { "epoch": 0.65, "grad_norm": 1.7822867631912231, "learning_rate": 5.629989200153882e-06, "loss": 0.6179, "step": 25541 }, { "epoch": 0.65, "grad_norm": 1.3225150108337402, "learning_rate": 5.629242534097934e-06, "loss": 0.5298, "step": 25542 }, { "epoch": 0.65, "grad_norm": 9.22607135772705, "learning_rate": 5.628495898162505e-06, "loss": 0.4488, "step": 25543 }, { "epoch": 0.65, "grad_norm": 1.4766508340835571, "learning_rate": 5.627749292352736e-06, "loss": 0.4285, "step": 25544 }, { "epoch": 0.65, "grad_norm": 1.2338241338729858, "learning_rate": 5.627002716673766e-06, "loss": 0.5354, "step": 25545 }, { "epoch": 0.65, "grad_norm": 5.023980140686035, "learning_rate": 5.6262561711307516e-06, "loss": 0.5547, "step": 25546 }, { "epoch": 0.65, "grad_norm": 7.7000226974487305, "learning_rate": 5.625509655728831e-06, "loss": 0.6578, "step": 25547 }, { "epoch": 0.65, "grad_norm": 2.27966570854187, "learning_rate": 5.624763170473151e-06, "loss": 0.6251, "step": 25548 }, { "epoch": 0.65, "grad_norm": 2.4320998191833496, "learning_rate": 5.624016715368849e-06, "loss": 0.6629, "step": 25549 }, { "epoch": 0.65, "grad_norm": 6.955654144287109, "learning_rate": 5.623270290421079e-06, "loss": 0.4298, "step": 25550 }, { "epoch": 0.65, "grad_norm": 0.9856897592544556, "learning_rate": 5.622523895634981e-06, "loss": 0.4588, "step": 25551 }, { "epoch": 0.65, "grad_norm": 1.098787784576416, "learning_rate": 5.621777531015697e-06, "loss": 0.4267, "step": 25552 }, { "epoch": 0.65, "grad_norm": 1.3826934099197388, "learning_rate": 5.621031196568372e-06, "loss": 0.3863, "step": 25553 }, { "epoch": 0.65, "grad_norm": 8.457361221313477, "learning_rate": 5.620284892298149e-06, "loss": 0.4623, "step": 25554 }, { "epoch": 0.65, "grad_norm": 5.238854885101318, "learning_rate": 5.619538618210168e-06, "loss": 0.7248, "step": 25555 }, { "epoch": 0.66, "grad_norm": 1.4079701900482178, "learning_rate": 5.618792374309576e-06, "loss": 0.5731, "step": 25556 }, { "epoch": 0.66, "grad_norm": 1.4240219593048096, "learning_rate": 5.618046160601515e-06, "loss": 0.458, "step": 25557 }, { "epoch": 0.66, "grad_norm": 4.2761149406433105, "learning_rate": 5.617299977091125e-06, "loss": 0.5972, "step": 25558 }, { "epoch": 0.66, "grad_norm": 1.4953402280807495, "learning_rate": 5.6165538237835525e-06, "loss": 0.5755, "step": 25559 }, { "epoch": 0.66, "grad_norm": 3.4068005084991455, "learning_rate": 5.6158077006839385e-06, "loss": 0.6097, "step": 25560 }, { "epoch": 0.66, "grad_norm": 0.9696152806282043, "learning_rate": 5.615061607797423e-06, "loss": 0.4528, "step": 25561 }, { "epoch": 0.66, "grad_norm": 2.7533109188079834, "learning_rate": 5.6143155451291434e-06, "loss": 0.4403, "step": 25562 }, { "epoch": 0.66, "grad_norm": 2.531907558441162, "learning_rate": 5.61356951268425e-06, "loss": 0.4802, "step": 25563 }, { "epoch": 0.66, "grad_norm": 2.440051555633545, "learning_rate": 5.612823510467879e-06, "loss": 0.5185, "step": 25564 }, { "epoch": 0.66, "grad_norm": 1.2021403312683105, "learning_rate": 5.612077538485167e-06, "loss": 0.6304, "step": 25565 }, { "epoch": 0.66, "grad_norm": 1.3058605194091797, "learning_rate": 5.611331596741267e-06, "loss": 0.5911, "step": 25566 }, { "epoch": 0.66, "grad_norm": 1.876117467880249, "learning_rate": 5.61058568524131e-06, "loss": 0.5785, "step": 25567 }, { "epoch": 0.66, "grad_norm": 1.1688077449798584, "learning_rate": 5.609839803990435e-06, "loss": 0.5832, "step": 25568 }, { "epoch": 0.66, "grad_norm": 1.1228563785552979, "learning_rate": 5.609093952993792e-06, "loss": 0.532, "step": 25569 }, { "epoch": 0.66, "grad_norm": 1.6239880323410034, "learning_rate": 5.608348132256513e-06, "loss": 0.6293, "step": 25570 }, { "epoch": 0.66, "grad_norm": 1.5601855516433716, "learning_rate": 5.60760234178374e-06, "loss": 0.5888, "step": 25571 }, { "epoch": 0.66, "grad_norm": 4.80596923828125, "learning_rate": 5.606856581580608e-06, "loss": 0.5897, "step": 25572 }, { "epoch": 0.66, "grad_norm": 1.8889228105545044, "learning_rate": 5.6061108516522646e-06, "loss": 0.4746, "step": 25573 }, { "epoch": 0.66, "grad_norm": 1.3077654838562012, "learning_rate": 5.605365152003843e-06, "loss": 0.4762, "step": 25574 }, { "epoch": 0.66, "grad_norm": 3.2216198444366455, "learning_rate": 5.604619482640482e-06, "loss": 0.6443, "step": 25575 }, { "epoch": 0.66, "grad_norm": 3.0575358867645264, "learning_rate": 5.603873843567325e-06, "loss": 0.6727, "step": 25576 }, { "epoch": 0.66, "grad_norm": 1.8974204063415527, "learning_rate": 5.603128234789508e-06, "loss": 0.6685, "step": 25577 }, { "epoch": 0.66, "grad_norm": 1.249746322631836, "learning_rate": 5.602382656312169e-06, "loss": 0.4268, "step": 25578 }, { "epoch": 0.66, "grad_norm": 1.924350619316101, "learning_rate": 5.601637108140441e-06, "loss": 0.7784, "step": 25579 }, { "epoch": 0.66, "grad_norm": 1.1768341064453125, "learning_rate": 5.600891590279471e-06, "loss": 0.5963, "step": 25580 }, { "epoch": 0.66, "grad_norm": 7.046895980834961, "learning_rate": 5.600146102734394e-06, "loss": 0.4959, "step": 25581 }, { "epoch": 0.66, "grad_norm": 2.569075584411621, "learning_rate": 5.59940064551034e-06, "loss": 0.5654, "step": 25582 }, { "epoch": 0.66, "grad_norm": 1.9094358682632446, "learning_rate": 5.598655218612457e-06, "loss": 0.5099, "step": 25583 }, { "epoch": 0.66, "grad_norm": 1.0042577981948853, "learning_rate": 5.597909822045876e-06, "loss": 0.4739, "step": 25584 }, { "epoch": 0.66, "grad_norm": 2.8683969974517822, "learning_rate": 5.5971644558157316e-06, "loss": 0.3336, "step": 25585 }, { "epoch": 0.66, "grad_norm": 1.4768157005310059, "learning_rate": 5.596419119927168e-06, "loss": 0.5691, "step": 25586 }, { "epoch": 0.66, "grad_norm": 1.1243866682052612, "learning_rate": 5.595673814385316e-06, "loss": 0.3756, "step": 25587 }, { "epoch": 0.66, "grad_norm": 1.167344570159912, "learning_rate": 5.594928539195314e-06, "loss": 0.5259, "step": 25588 }, { "epoch": 0.66, "grad_norm": 1.295424222946167, "learning_rate": 5.594183294362292e-06, "loss": 0.4631, "step": 25589 }, { "epoch": 0.66, "grad_norm": 1.9275050163269043, "learning_rate": 5.593438079891396e-06, "loss": 0.4704, "step": 25590 }, { "epoch": 0.66, "grad_norm": 4.390618324279785, "learning_rate": 5.592692895787755e-06, "loss": 0.5043, "step": 25591 }, { "epoch": 0.66, "grad_norm": 2.4894845485687256, "learning_rate": 5.5919477420565026e-06, "loss": 0.4721, "step": 25592 }, { "epoch": 0.66, "grad_norm": 1.8085113763809204, "learning_rate": 5.59120261870278e-06, "loss": 0.5234, "step": 25593 }, { "epoch": 0.66, "grad_norm": 1.5467640161514282, "learning_rate": 5.59045752573172e-06, "loss": 0.4186, "step": 25594 }, { "epoch": 0.66, "grad_norm": 1.609877109527588, "learning_rate": 5.589712463148455e-06, "loss": 0.6912, "step": 25595 }, { "epoch": 0.66, "grad_norm": 1.5199153423309326, "learning_rate": 5.588967430958119e-06, "loss": 0.614, "step": 25596 }, { "epoch": 0.66, "grad_norm": 1.5589191913604736, "learning_rate": 5.588222429165851e-06, "loss": 0.6349, "step": 25597 }, { "epoch": 0.66, "grad_norm": 1.3686563968658447, "learning_rate": 5.587477457776782e-06, "loss": 0.4512, "step": 25598 }, { "epoch": 0.66, "grad_norm": 3.0004513263702393, "learning_rate": 5.586732516796043e-06, "loss": 0.473, "step": 25599 }, { "epoch": 0.66, "grad_norm": 9.363661766052246, "learning_rate": 5.585987606228774e-06, "loss": 0.5616, "step": 25600 }, { "epoch": 0.66, "grad_norm": 1.5419915914535522, "learning_rate": 5.585242726080105e-06, "loss": 0.5583, "step": 25601 }, { "epoch": 0.66, "grad_norm": 1.4961923360824585, "learning_rate": 5.58449787635517e-06, "loss": 0.6401, "step": 25602 }, { "epoch": 0.66, "grad_norm": 1.7702000141143799, "learning_rate": 5.583753057059098e-06, "loss": 0.435, "step": 25603 }, { "epoch": 0.66, "grad_norm": 1.4167855978012085, "learning_rate": 5.58300826819703e-06, "loss": 0.4051, "step": 25604 }, { "epoch": 0.66, "grad_norm": 1.4716819524765015, "learning_rate": 5.582263509774093e-06, "loss": 0.4077, "step": 25605 }, { "epoch": 0.66, "grad_norm": 1.4827584028244019, "learning_rate": 5.5815187817954166e-06, "loss": 0.4162, "step": 25606 }, { "epoch": 0.66, "grad_norm": 1.0743387937545776, "learning_rate": 5.580774084266141e-06, "loss": 0.5584, "step": 25607 }, { "epoch": 0.66, "grad_norm": 1.9516026973724365, "learning_rate": 5.580029417191392e-06, "loss": 0.6255, "step": 25608 }, { "epoch": 0.66, "grad_norm": 1.9966944456100464, "learning_rate": 5.579284780576302e-06, "loss": 0.6643, "step": 25609 }, { "epoch": 0.66, "grad_norm": 4.068225860595703, "learning_rate": 5.578540174426006e-06, "loss": 0.4605, "step": 25610 }, { "epoch": 0.66, "grad_norm": 3.000396966934204, "learning_rate": 5.577795598745634e-06, "loss": 0.5657, "step": 25611 }, { "epoch": 0.66, "grad_norm": 1.0113067626953125, "learning_rate": 5.577051053540317e-06, "loss": 0.4009, "step": 25612 }, { "epoch": 0.66, "grad_norm": 2.712198257446289, "learning_rate": 5.576306538815181e-06, "loss": 0.6765, "step": 25613 }, { "epoch": 0.66, "grad_norm": 9.052458763122559, "learning_rate": 5.575562054575365e-06, "loss": 0.4099, "step": 25614 }, { "epoch": 0.66, "grad_norm": 1.7025582790374756, "learning_rate": 5.574817600825996e-06, "loss": 0.4882, "step": 25615 }, { "epoch": 0.66, "grad_norm": 1.731452465057373, "learning_rate": 5.574073177572199e-06, "loss": 0.6229, "step": 25616 }, { "epoch": 0.66, "grad_norm": 5.779480934143066, "learning_rate": 5.573328784819115e-06, "loss": 0.4797, "step": 25617 }, { "epoch": 0.66, "grad_norm": 2.9773480892181396, "learning_rate": 5.572584422571865e-06, "loss": 0.4919, "step": 25618 }, { "epoch": 0.66, "grad_norm": 1.423102855682373, "learning_rate": 5.571840090835583e-06, "loss": 0.5179, "step": 25619 }, { "epoch": 0.66, "grad_norm": 1.683089017868042, "learning_rate": 5.571095789615394e-06, "loss": 0.5866, "step": 25620 }, { "epoch": 0.66, "grad_norm": 2.114496946334839, "learning_rate": 5.5703515189164335e-06, "loss": 0.697, "step": 25621 }, { "epoch": 0.66, "grad_norm": 6.671690940856934, "learning_rate": 5.569607278743827e-06, "loss": 0.643, "step": 25622 }, { "epoch": 0.66, "grad_norm": 1.3545386791229248, "learning_rate": 5.5688630691027e-06, "loss": 0.6, "step": 25623 }, { "epoch": 0.66, "grad_norm": 1.2783623933792114, "learning_rate": 5.568118889998189e-06, "loss": 0.3836, "step": 25624 }, { "epoch": 0.66, "grad_norm": 2.2115983963012695, "learning_rate": 5.567374741435418e-06, "loss": 0.7658, "step": 25625 }, { "epoch": 0.66, "grad_norm": 1.4006012678146362, "learning_rate": 5.566630623419511e-06, "loss": 0.4161, "step": 25626 }, { "epoch": 0.66, "grad_norm": 1.4356777667999268, "learning_rate": 5.565886535955606e-06, "loss": 0.5514, "step": 25627 }, { "epoch": 0.66, "grad_norm": 1.3897929191589355, "learning_rate": 5.565142479048824e-06, "loss": 0.4917, "step": 25628 }, { "epoch": 0.66, "grad_norm": 1.4955658912658691, "learning_rate": 5.564398452704294e-06, "loss": 0.6027, "step": 25629 }, { "epoch": 0.66, "grad_norm": 3.363698720932007, "learning_rate": 5.563654456927139e-06, "loss": 0.5099, "step": 25630 }, { "epoch": 0.66, "grad_norm": 1.2566922903060913, "learning_rate": 5.562910491722494e-06, "loss": 0.5727, "step": 25631 }, { "epoch": 0.66, "grad_norm": 2.333475112915039, "learning_rate": 5.562166557095483e-06, "loss": 0.5112, "step": 25632 }, { "epoch": 0.66, "grad_norm": 1.7043004035949707, "learning_rate": 5.561422653051227e-06, "loss": 0.5888, "step": 25633 }, { "epoch": 0.66, "grad_norm": 1.7078258991241455, "learning_rate": 5.560678779594862e-06, "loss": 0.485, "step": 25634 }, { "epoch": 0.66, "grad_norm": 1.2659741640090942, "learning_rate": 5.55993493673151e-06, "loss": 0.5623, "step": 25635 }, { "epoch": 0.66, "grad_norm": 2.6317951679229736, "learning_rate": 5.559191124466296e-06, "loss": 0.6593, "step": 25636 }, { "epoch": 0.66, "grad_norm": 1.0870469808578491, "learning_rate": 5.558447342804345e-06, "loss": 0.4818, "step": 25637 }, { "epoch": 0.66, "grad_norm": 0.9399467706680298, "learning_rate": 5.557703591750787e-06, "loss": 0.4521, "step": 25638 }, { "epoch": 0.66, "grad_norm": 1.7315633296966553, "learning_rate": 5.556959871310744e-06, "loss": 0.5175, "step": 25639 }, { "epoch": 0.66, "grad_norm": 1.262580156326294, "learning_rate": 5.55621618148934e-06, "loss": 0.3913, "step": 25640 }, { "epoch": 0.66, "grad_norm": 1.344793438911438, "learning_rate": 5.555472522291705e-06, "loss": 0.3986, "step": 25641 }, { "epoch": 0.66, "grad_norm": 1.4196971654891968, "learning_rate": 5.5547288937229604e-06, "loss": 0.546, "step": 25642 }, { "epoch": 0.66, "grad_norm": 1.323989987373352, "learning_rate": 5.553985295788234e-06, "loss": 0.5451, "step": 25643 }, { "epoch": 0.66, "grad_norm": 1.5062335729599, "learning_rate": 5.553241728492642e-06, "loss": 0.5147, "step": 25644 }, { "epoch": 0.66, "grad_norm": 1.2280672788619995, "learning_rate": 5.552498191841318e-06, "loss": 0.5303, "step": 25645 }, { "epoch": 0.66, "grad_norm": 1.494128704071045, "learning_rate": 5.551754685839382e-06, "loss": 0.6492, "step": 25646 }, { "epoch": 0.66, "grad_norm": 1.408814787864685, "learning_rate": 5.551011210491955e-06, "loss": 0.447, "step": 25647 }, { "epoch": 0.66, "grad_norm": 1.0048490762710571, "learning_rate": 5.550267765804167e-06, "loss": 0.5273, "step": 25648 }, { "epoch": 0.66, "grad_norm": 1.6927130222320557, "learning_rate": 5.54952435178114e-06, "loss": 0.5969, "step": 25649 }, { "epoch": 0.66, "grad_norm": 0.944142758846283, "learning_rate": 5.548780968427988e-06, "loss": 0.5061, "step": 25650 }, { "epoch": 0.66, "grad_norm": 1.529930591583252, "learning_rate": 5.548037615749846e-06, "loss": 0.5193, "step": 25651 }, { "epoch": 0.66, "grad_norm": 1.9930323362350464, "learning_rate": 5.547294293751833e-06, "loss": 0.6135, "step": 25652 }, { "epoch": 0.66, "grad_norm": 1.7872300148010254, "learning_rate": 5.546551002439069e-06, "loss": 0.5471, "step": 25653 }, { "epoch": 0.66, "grad_norm": 1.4790862798690796, "learning_rate": 5.545807741816673e-06, "loss": 0.5913, "step": 25654 }, { "epoch": 0.66, "grad_norm": 1.845221996307373, "learning_rate": 5.545064511889777e-06, "loss": 0.4909, "step": 25655 }, { "epoch": 0.66, "grad_norm": 2.770266056060791, "learning_rate": 5.544321312663498e-06, "loss": 0.6184, "step": 25656 }, { "epoch": 0.66, "grad_norm": 2.0955288410186768, "learning_rate": 5.543578144142951e-06, "loss": 0.5797, "step": 25657 }, { "epoch": 0.66, "grad_norm": 3.545633316040039, "learning_rate": 5.54283500633327e-06, "loss": 0.5313, "step": 25658 }, { "epoch": 0.66, "grad_norm": 2.356783390045166, "learning_rate": 5.542091899239569e-06, "loss": 0.609, "step": 25659 }, { "epoch": 0.66, "grad_norm": 6.426510810852051, "learning_rate": 5.54134882286697e-06, "loss": 0.6721, "step": 25660 }, { "epoch": 0.66, "grad_norm": 1.7108250856399536, "learning_rate": 5.540605777220589e-06, "loss": 0.534, "step": 25661 }, { "epoch": 0.66, "grad_norm": 1.0876487493515015, "learning_rate": 5.539862762305556e-06, "loss": 0.4797, "step": 25662 }, { "epoch": 0.66, "grad_norm": 3.850071907043457, "learning_rate": 5.539119778126987e-06, "loss": 0.5468, "step": 25663 }, { "epoch": 0.66, "grad_norm": 1.8007748126983643, "learning_rate": 5.538376824689997e-06, "loss": 0.5291, "step": 25664 }, { "epoch": 0.66, "grad_norm": 1.4876909255981445, "learning_rate": 5.537633901999715e-06, "loss": 0.6249, "step": 25665 }, { "epoch": 0.66, "grad_norm": 2.2088236808776855, "learning_rate": 5.536891010061256e-06, "loss": 0.5599, "step": 25666 }, { "epoch": 0.66, "grad_norm": 1.2681984901428223, "learning_rate": 5.536148148879738e-06, "loss": 0.3858, "step": 25667 }, { "epoch": 0.66, "grad_norm": 1.3115547895431519, "learning_rate": 5.535405318460285e-06, "loss": 0.511, "step": 25668 }, { "epoch": 0.66, "grad_norm": 1.651903748512268, "learning_rate": 5.534662518808014e-06, "loss": 0.671, "step": 25669 }, { "epoch": 0.66, "grad_norm": 1.6742000579833984, "learning_rate": 5.533919749928044e-06, "loss": 0.6079, "step": 25670 }, { "epoch": 0.66, "grad_norm": 1.71674382686615, "learning_rate": 5.533177011825489e-06, "loss": 0.6663, "step": 25671 }, { "epoch": 0.66, "grad_norm": 1.2234723567962646, "learning_rate": 5.532434304505475e-06, "loss": 0.4665, "step": 25672 }, { "epoch": 0.66, "grad_norm": 2.394080877304077, "learning_rate": 5.531691627973118e-06, "loss": 0.5811, "step": 25673 }, { "epoch": 0.66, "grad_norm": 1.3573795557022095, "learning_rate": 5.53094898223353e-06, "loss": 0.6684, "step": 25674 }, { "epoch": 0.66, "grad_norm": 1.1065510511398315, "learning_rate": 5.530206367291839e-06, "loss": 0.5038, "step": 25675 }, { "epoch": 0.66, "grad_norm": 1.6704736948013306, "learning_rate": 5.529463783153157e-06, "loss": 0.604, "step": 25676 }, { "epoch": 0.66, "grad_norm": 1.7270267009735107, "learning_rate": 5.528721229822602e-06, "loss": 0.6396, "step": 25677 }, { "epoch": 0.66, "grad_norm": 1.1216168403625488, "learning_rate": 5.527978707305286e-06, "loss": 0.4184, "step": 25678 }, { "epoch": 0.66, "grad_norm": 1.0379549264907837, "learning_rate": 5.5272362156063375e-06, "loss": 0.3956, "step": 25679 }, { "epoch": 0.66, "grad_norm": 2.895510196685791, "learning_rate": 5.526493754730865e-06, "loss": 0.5575, "step": 25680 }, { "epoch": 0.66, "grad_norm": 1.342382788658142, "learning_rate": 5.525751324683983e-06, "loss": 0.5899, "step": 25681 }, { "epoch": 0.66, "grad_norm": 1.891400694847107, "learning_rate": 5.525008925470817e-06, "loss": 0.6124, "step": 25682 }, { "epoch": 0.66, "grad_norm": 6.607693672180176, "learning_rate": 5.524266557096478e-06, "loss": 0.5169, "step": 25683 }, { "epoch": 0.66, "grad_norm": 24.822277069091797, "learning_rate": 5.523524219566081e-06, "loss": 0.5967, "step": 25684 }, { "epoch": 0.66, "grad_norm": 1.288673758506775, "learning_rate": 5.522781912884739e-06, "loss": 0.4759, "step": 25685 }, { "epoch": 0.66, "grad_norm": 1.4887659549713135, "learning_rate": 5.522039637057576e-06, "loss": 0.5033, "step": 25686 }, { "epoch": 0.66, "grad_norm": 7.564735412597656, "learning_rate": 5.5212973920897015e-06, "loss": 0.4948, "step": 25687 }, { "epoch": 0.66, "grad_norm": 1.4789823293685913, "learning_rate": 5.520555177986229e-06, "loss": 0.5175, "step": 25688 }, { "epoch": 0.66, "grad_norm": 2.6673941612243652, "learning_rate": 5.519812994752278e-06, "loss": 0.482, "step": 25689 }, { "epoch": 0.66, "grad_norm": 1.830824851989746, "learning_rate": 5.519070842392962e-06, "loss": 0.5824, "step": 25690 }, { "epoch": 0.66, "grad_norm": 1.980769157409668, "learning_rate": 5.5183287209133905e-06, "loss": 0.7214, "step": 25691 }, { "epoch": 0.66, "grad_norm": 20.40602684020996, "learning_rate": 5.5175866303186855e-06, "loss": 0.697, "step": 25692 }, { "epoch": 0.66, "grad_norm": 0.7848488688468933, "learning_rate": 5.516844570613957e-06, "loss": 0.3331, "step": 25693 }, { "epoch": 0.66, "grad_norm": 1.178409218788147, "learning_rate": 5.51610254180432e-06, "loss": 0.4246, "step": 25694 }, { "epoch": 0.66, "grad_norm": 2.4532086849212646, "learning_rate": 5.515360543894883e-06, "loss": 0.503, "step": 25695 }, { "epoch": 0.66, "grad_norm": 1.0791702270507812, "learning_rate": 5.514618576890769e-06, "loss": 0.3968, "step": 25696 }, { "epoch": 0.66, "grad_norm": 11.541605949401855, "learning_rate": 5.513876640797085e-06, "loss": 0.4183, "step": 25697 }, { "epoch": 0.66, "grad_norm": 8.641510963439941, "learning_rate": 5.513134735618939e-06, "loss": 0.4859, "step": 25698 }, { "epoch": 0.66, "grad_norm": 1.2594993114471436, "learning_rate": 5.512392861361455e-06, "loss": 0.6167, "step": 25699 }, { "epoch": 0.66, "grad_norm": 1.239166021347046, "learning_rate": 5.5116510180297386e-06, "loss": 0.4994, "step": 25700 }, { "epoch": 0.66, "grad_norm": 3.400501012802124, "learning_rate": 5.5109092056289045e-06, "loss": 0.619, "step": 25701 }, { "epoch": 0.66, "grad_norm": 1.1687970161437988, "learning_rate": 5.51016742416406e-06, "loss": 0.4225, "step": 25702 }, { "epoch": 0.66, "grad_norm": 2.359147310256958, "learning_rate": 5.509425673640325e-06, "loss": 0.6309, "step": 25703 }, { "epoch": 0.66, "grad_norm": 4.666459083557129, "learning_rate": 5.508683954062807e-06, "loss": 0.8338, "step": 25704 }, { "epoch": 0.66, "grad_norm": 1.2575249671936035, "learning_rate": 5.507942265436612e-06, "loss": 0.4757, "step": 25705 }, { "epoch": 0.66, "grad_norm": 1.917183518409729, "learning_rate": 5.507200607766863e-06, "loss": 0.5468, "step": 25706 }, { "epoch": 0.66, "grad_norm": 1.5718117952346802, "learning_rate": 5.506458981058664e-06, "loss": 0.4958, "step": 25707 }, { "epoch": 0.66, "grad_norm": 1.7896291017532349, "learning_rate": 5.505717385317125e-06, "loss": 0.6894, "step": 25708 }, { "epoch": 0.66, "grad_norm": 1.0927252769470215, "learning_rate": 5.504975820547356e-06, "loss": 0.3094, "step": 25709 }, { "epoch": 0.66, "grad_norm": 1.1461176872253418, "learning_rate": 5.504234286754473e-06, "loss": 0.5503, "step": 25710 }, { "epoch": 0.66, "grad_norm": 1.3986397981643677, "learning_rate": 5.5034927839435825e-06, "loss": 0.4817, "step": 25711 }, { "epoch": 0.66, "grad_norm": 2.612578868865967, "learning_rate": 5.502751312119792e-06, "loss": 0.5678, "step": 25712 }, { "epoch": 0.66, "grad_norm": 0.840024471282959, "learning_rate": 5.5020098712882164e-06, "loss": 0.4929, "step": 25713 }, { "epoch": 0.66, "grad_norm": 1.95318603515625, "learning_rate": 5.501268461453964e-06, "loss": 0.6765, "step": 25714 }, { "epoch": 0.66, "grad_norm": 1.8324507474899292, "learning_rate": 5.500527082622138e-06, "loss": 0.5874, "step": 25715 }, { "epoch": 0.66, "grad_norm": 1.6964470148086548, "learning_rate": 5.499785734797856e-06, "loss": 0.5824, "step": 25716 }, { "epoch": 0.66, "grad_norm": 1.2357250452041626, "learning_rate": 5.499044417986226e-06, "loss": 0.516, "step": 25717 }, { "epoch": 0.66, "grad_norm": 1.8338950872421265, "learning_rate": 5.498303132192352e-06, "loss": 0.5657, "step": 25718 }, { "epoch": 0.66, "grad_norm": 1.381027102470398, "learning_rate": 5.49756187742134e-06, "loss": 0.4336, "step": 25719 }, { "epoch": 0.66, "grad_norm": 1.338683843612671, "learning_rate": 5.496820653678309e-06, "loss": 0.6001, "step": 25720 }, { "epoch": 0.66, "grad_norm": 1.0109800100326538, "learning_rate": 5.49607946096836e-06, "loss": 0.5578, "step": 25721 }, { "epoch": 0.66, "grad_norm": 3.4106388092041016, "learning_rate": 5.495338299296598e-06, "loss": 0.5003, "step": 25722 }, { "epoch": 0.66, "grad_norm": 1.9401538372039795, "learning_rate": 5.494597168668137e-06, "loss": 0.4802, "step": 25723 }, { "epoch": 0.66, "grad_norm": 2.393728733062744, "learning_rate": 5.493856069088082e-06, "loss": 0.5373, "step": 25724 }, { "epoch": 0.66, "grad_norm": 1.7776306867599487, "learning_rate": 5.493115000561541e-06, "loss": 0.5917, "step": 25725 }, { "epoch": 0.66, "grad_norm": 2.102220058441162, "learning_rate": 5.492373963093616e-06, "loss": 0.4881, "step": 25726 }, { "epoch": 0.66, "grad_norm": 0.8071642518043518, "learning_rate": 5.491632956689421e-06, "loss": 0.3668, "step": 25727 }, { "epoch": 0.66, "grad_norm": 6.97590446472168, "learning_rate": 5.490891981354059e-06, "loss": 0.6101, "step": 25728 }, { "epoch": 0.66, "grad_norm": 1.3137354850769043, "learning_rate": 5.490151037092633e-06, "loss": 0.3767, "step": 25729 }, { "epoch": 0.66, "grad_norm": 0.9292369484901428, "learning_rate": 5.489410123910256e-06, "loss": 0.3701, "step": 25730 }, { "epoch": 0.66, "grad_norm": 1.511720895767212, "learning_rate": 5.488669241812031e-06, "loss": 0.4846, "step": 25731 }, { "epoch": 0.66, "grad_norm": 4.491745948791504, "learning_rate": 5.487928390803059e-06, "loss": 0.5299, "step": 25732 }, { "epoch": 0.66, "grad_norm": 1.2281033992767334, "learning_rate": 5.487187570888455e-06, "loss": 0.5004, "step": 25733 }, { "epoch": 0.66, "grad_norm": 13.338376998901367, "learning_rate": 5.486446782073318e-06, "loss": 0.9157, "step": 25734 }, { "epoch": 0.66, "grad_norm": 1.1525744199752808, "learning_rate": 5.485706024362755e-06, "loss": 0.5062, "step": 25735 }, { "epoch": 0.66, "grad_norm": 1.4527463912963867, "learning_rate": 5.484965297761864e-06, "loss": 0.5218, "step": 25736 }, { "epoch": 0.66, "grad_norm": 3.142124652862549, "learning_rate": 5.484224602275762e-06, "loss": 0.5767, "step": 25737 }, { "epoch": 0.66, "grad_norm": 1.6782903671264648, "learning_rate": 5.483483937909545e-06, "loss": 0.5759, "step": 25738 }, { "epoch": 0.66, "grad_norm": 3.5125572681427, "learning_rate": 5.482743304668316e-06, "loss": 0.7721, "step": 25739 }, { "epoch": 0.66, "grad_norm": 1.5214284658432007, "learning_rate": 5.482002702557187e-06, "loss": 0.4773, "step": 25740 }, { "epoch": 0.66, "grad_norm": 8.416552543640137, "learning_rate": 5.4812621315812555e-06, "loss": 0.4076, "step": 25741 }, { "epoch": 0.66, "grad_norm": 2.1909897327423096, "learning_rate": 5.4805215917456265e-06, "loss": 0.5156, "step": 25742 }, { "epoch": 0.66, "grad_norm": 1.5189927816390991, "learning_rate": 5.4797810830554e-06, "loss": 0.5408, "step": 25743 }, { "epoch": 0.66, "grad_norm": 1.8763201236724854, "learning_rate": 5.4790406055156866e-06, "loss": 0.469, "step": 25744 }, { "epoch": 0.66, "grad_norm": 1.0789490938186646, "learning_rate": 5.478300159131584e-06, "loss": 0.5079, "step": 25745 }, { "epoch": 0.66, "grad_norm": 2.204803228378296, "learning_rate": 5.477559743908192e-06, "loss": 0.4704, "step": 25746 }, { "epoch": 0.66, "grad_norm": 0.968302845954895, "learning_rate": 5.476819359850621e-06, "loss": 0.4407, "step": 25747 }, { "epoch": 0.66, "grad_norm": 1.4794368743896484, "learning_rate": 5.47607900696397e-06, "loss": 0.577, "step": 25748 }, { "epoch": 0.66, "grad_norm": 3.175661087036133, "learning_rate": 5.4753386852533395e-06, "loss": 0.4419, "step": 25749 }, { "epoch": 0.66, "grad_norm": 0.897916316986084, "learning_rate": 5.474598394723829e-06, "loss": 0.557, "step": 25750 }, { "epoch": 0.66, "grad_norm": 7.91356897354126, "learning_rate": 5.473858135380545e-06, "loss": 0.5151, "step": 25751 }, { "epoch": 0.66, "grad_norm": 1.8291386365890503, "learning_rate": 5.47311790722859e-06, "loss": 0.5805, "step": 25752 }, { "epoch": 0.66, "grad_norm": 9.784052848815918, "learning_rate": 5.4723777102730565e-06, "loss": 0.5173, "step": 25753 }, { "epoch": 0.66, "grad_norm": 1.1579371690750122, "learning_rate": 5.471637544519056e-06, "loss": 0.5915, "step": 25754 }, { "epoch": 0.66, "grad_norm": 1.7557331323623657, "learning_rate": 5.470897409971685e-06, "loss": 0.4833, "step": 25755 }, { "epoch": 0.66, "grad_norm": 1.6947333812713623, "learning_rate": 5.470157306636039e-06, "loss": 0.5778, "step": 25756 }, { "epoch": 0.66, "grad_norm": 8.617860794067383, "learning_rate": 5.469417234517227e-06, "loss": 0.8798, "step": 25757 }, { "epoch": 0.66, "grad_norm": 1.3844964504241943, "learning_rate": 5.4686771936203445e-06, "loss": 0.6073, "step": 25758 }, { "epoch": 0.66, "grad_norm": 3.2675328254699707, "learning_rate": 5.467937183950493e-06, "loss": 0.643, "step": 25759 }, { "epoch": 0.66, "grad_norm": 1.0511386394500732, "learning_rate": 5.4671972055127665e-06, "loss": 0.5466, "step": 25760 }, { "epoch": 0.66, "grad_norm": 3.4675660133361816, "learning_rate": 5.466457258312272e-06, "loss": 0.4805, "step": 25761 }, { "epoch": 0.66, "grad_norm": 1.5753116607666016, "learning_rate": 5.465717342354108e-06, "loss": 0.5529, "step": 25762 }, { "epoch": 0.66, "grad_norm": 2.0462567806243896, "learning_rate": 5.464977457643366e-06, "loss": 0.4998, "step": 25763 }, { "epoch": 0.66, "grad_norm": 10.288176536560059, "learning_rate": 5.464237604185154e-06, "loss": 0.4529, "step": 25764 }, { "epoch": 0.66, "grad_norm": 1.1350728273391724, "learning_rate": 5.463497781984568e-06, "loss": 0.4659, "step": 25765 }, { "epoch": 0.66, "grad_norm": 1.0779592990875244, "learning_rate": 5.462757991046703e-06, "loss": 0.5148, "step": 25766 }, { "epoch": 0.66, "grad_norm": 3.4871742725372314, "learning_rate": 5.462018231376658e-06, "loss": 0.5056, "step": 25767 }, { "epoch": 0.66, "grad_norm": 1.590376615524292, "learning_rate": 5.461278502979534e-06, "loss": 0.5165, "step": 25768 }, { "epoch": 0.66, "grad_norm": 1.866034746170044, "learning_rate": 5.460538805860429e-06, "loss": 0.6031, "step": 25769 }, { "epoch": 0.66, "grad_norm": 1.8134756088256836, "learning_rate": 5.459799140024433e-06, "loss": 0.5899, "step": 25770 }, { "epoch": 0.66, "grad_norm": 3.9850149154663086, "learning_rate": 5.459059505476654e-06, "loss": 0.8633, "step": 25771 }, { "epoch": 0.66, "grad_norm": 2.7312183380126953, "learning_rate": 5.458319902222185e-06, "loss": 0.6538, "step": 25772 }, { "epoch": 0.66, "grad_norm": 7.363259315490723, "learning_rate": 5.4575803302661145e-06, "loss": 0.617, "step": 25773 }, { "epoch": 0.66, "grad_norm": 1.5777664184570312, "learning_rate": 5.456840789613553e-06, "loss": 0.5615, "step": 25774 }, { "epoch": 0.66, "grad_norm": 1.2900605201721191, "learning_rate": 5.45610128026959e-06, "loss": 0.4438, "step": 25775 }, { "epoch": 0.66, "grad_norm": 1.8756635189056396, "learning_rate": 5.455361802239323e-06, "loss": 0.5381, "step": 25776 }, { "epoch": 0.66, "grad_norm": 1.1389071941375732, "learning_rate": 5.4546223555278414e-06, "loss": 0.4504, "step": 25777 }, { "epoch": 0.66, "grad_norm": 1.2199254035949707, "learning_rate": 5.453882940140252e-06, "loss": 0.5185, "step": 25778 }, { "epoch": 0.66, "grad_norm": 0.9485123753547668, "learning_rate": 5.453143556081646e-06, "loss": 0.4396, "step": 25779 }, { "epoch": 0.66, "grad_norm": 1.7461744546890259, "learning_rate": 5.452404203357112e-06, "loss": 0.6004, "step": 25780 }, { "epoch": 0.66, "grad_norm": 7.33079195022583, "learning_rate": 5.451664881971757e-06, "loss": 0.5963, "step": 25781 }, { "epoch": 0.66, "grad_norm": 1.4756547212600708, "learning_rate": 5.450925591930669e-06, "loss": 0.5827, "step": 25782 }, { "epoch": 0.66, "grad_norm": 2.2574758529663086, "learning_rate": 5.450186333238945e-06, "loss": 0.6274, "step": 25783 }, { "epoch": 0.66, "grad_norm": 1.105708360671997, "learning_rate": 5.449447105901673e-06, "loss": 0.5071, "step": 25784 }, { "epoch": 0.66, "grad_norm": 3.3671247959136963, "learning_rate": 5.448707909923956e-06, "loss": 0.7327, "step": 25785 }, { "epoch": 0.66, "grad_norm": 1.8787909746170044, "learning_rate": 5.447968745310886e-06, "loss": 0.5735, "step": 25786 }, { "epoch": 0.66, "grad_norm": 1.45624577999115, "learning_rate": 5.447229612067551e-06, "loss": 0.4745, "step": 25787 }, { "epoch": 0.66, "grad_norm": 1.7277840375900269, "learning_rate": 5.4464905101990535e-06, "loss": 0.6614, "step": 25788 }, { "epoch": 0.66, "grad_norm": 12.947997093200684, "learning_rate": 5.445751439710483e-06, "loss": 0.5055, "step": 25789 }, { "epoch": 0.66, "grad_norm": 1.1424907445907593, "learning_rate": 5.445012400606933e-06, "loss": 0.5542, "step": 25790 }, { "epoch": 0.66, "grad_norm": 1.4620760679244995, "learning_rate": 5.44427339289349e-06, "loss": 0.4635, "step": 25791 }, { "epoch": 0.66, "grad_norm": 7.353451728820801, "learning_rate": 5.443534416575258e-06, "loss": 0.593, "step": 25792 }, { "epoch": 0.66, "grad_norm": 5.259403228759766, "learning_rate": 5.442795471657323e-06, "loss": 0.5697, "step": 25793 }, { "epoch": 0.66, "grad_norm": 2.8762459754943848, "learning_rate": 5.442056558144779e-06, "loss": 0.6738, "step": 25794 }, { "epoch": 0.66, "grad_norm": 1.2353113889694214, "learning_rate": 5.441317676042717e-06, "loss": 0.4895, "step": 25795 }, { "epoch": 0.66, "grad_norm": 2.8753998279571533, "learning_rate": 5.4405788253562305e-06, "loss": 0.4085, "step": 25796 }, { "epoch": 0.66, "grad_norm": 1.539440393447876, "learning_rate": 5.439840006090405e-06, "loss": 0.5704, "step": 25797 }, { "epoch": 0.66, "grad_norm": 0.8465512990951538, "learning_rate": 5.439101218250342e-06, "loss": 0.4678, "step": 25798 }, { "epoch": 0.66, "grad_norm": 1.8161845207214355, "learning_rate": 5.438362461841129e-06, "loss": 0.561, "step": 25799 }, { "epoch": 0.66, "grad_norm": 1.576703429222107, "learning_rate": 5.4376237368678504e-06, "loss": 0.4809, "step": 25800 }, { "epoch": 0.66, "grad_norm": 1.123649001121521, "learning_rate": 5.436885043335607e-06, "loss": 0.462, "step": 25801 }, { "epoch": 0.66, "grad_norm": 1.900962471961975, "learning_rate": 5.436146381249485e-06, "loss": 0.5965, "step": 25802 }, { "epoch": 0.66, "grad_norm": 1.2987277507781982, "learning_rate": 5.435407750614577e-06, "loss": 0.5546, "step": 25803 }, { "epoch": 0.66, "grad_norm": 3.7072720527648926, "learning_rate": 5.434669151435964e-06, "loss": 0.5974, "step": 25804 }, { "epoch": 0.66, "grad_norm": 6.391079902648926, "learning_rate": 5.433930583718748e-06, "loss": 0.6876, "step": 25805 }, { "epoch": 0.66, "grad_norm": 4.579309940338135, "learning_rate": 5.433192047468014e-06, "loss": 0.3181, "step": 25806 }, { "epoch": 0.66, "grad_norm": 4.007078170776367, "learning_rate": 5.432453542688847e-06, "loss": 0.5238, "step": 25807 }, { "epoch": 0.66, "grad_norm": 1.2073427438735962, "learning_rate": 5.431715069386345e-06, "loss": 0.4938, "step": 25808 }, { "epoch": 0.66, "grad_norm": 1.640103816986084, "learning_rate": 5.430976627565592e-06, "loss": 0.3268, "step": 25809 }, { "epoch": 0.66, "grad_norm": 4.131341934204102, "learning_rate": 5.430238217231674e-06, "loss": 0.4566, "step": 25810 }, { "epoch": 0.66, "grad_norm": 1.2181843519210815, "learning_rate": 5.429499838389687e-06, "loss": 0.3962, "step": 25811 }, { "epoch": 0.66, "grad_norm": 1.8808236122131348, "learning_rate": 5.428761491044716e-06, "loss": 0.5358, "step": 25812 }, { "epoch": 0.66, "grad_norm": 1.4402458667755127, "learning_rate": 5.428023175201848e-06, "loss": 0.6026, "step": 25813 }, { "epoch": 0.66, "grad_norm": 1.2568649053573608, "learning_rate": 5.427284890866169e-06, "loss": 0.5521, "step": 25814 }, { "epoch": 0.66, "grad_norm": 5.262631416320801, "learning_rate": 5.426546638042773e-06, "loss": 0.5442, "step": 25815 }, { "epoch": 0.66, "grad_norm": 5.784384250640869, "learning_rate": 5.425808416736746e-06, "loss": 0.6609, "step": 25816 }, { "epoch": 0.66, "grad_norm": 2.2903168201446533, "learning_rate": 5.425070226953167e-06, "loss": 0.4875, "step": 25817 }, { "epoch": 0.66, "grad_norm": 1.3183491230010986, "learning_rate": 5.424332068697136e-06, "loss": 0.49, "step": 25818 }, { "epoch": 0.66, "grad_norm": 1.3863767385482788, "learning_rate": 5.423593941973731e-06, "loss": 0.5474, "step": 25819 }, { "epoch": 0.66, "grad_norm": 1.199535846710205, "learning_rate": 5.422855846788043e-06, "loss": 0.4674, "step": 25820 }, { "epoch": 0.66, "grad_norm": 7.642554759979248, "learning_rate": 5.4221177831451535e-06, "loss": 0.816, "step": 25821 }, { "epoch": 0.66, "grad_norm": 0.9740903973579407, "learning_rate": 5.421379751050156e-06, "loss": 0.2056, "step": 25822 }, { "epoch": 0.66, "grad_norm": 1.6573768854141235, "learning_rate": 5.420641750508131e-06, "loss": 0.4324, "step": 25823 }, { "epoch": 0.66, "grad_norm": 1.9048748016357422, "learning_rate": 5.419903781524163e-06, "loss": 0.4545, "step": 25824 }, { "epoch": 0.66, "grad_norm": 1.196975827217102, "learning_rate": 5.419165844103345e-06, "loss": 0.3642, "step": 25825 }, { "epoch": 0.66, "grad_norm": 1.6267900466918945, "learning_rate": 5.4184279382507565e-06, "loss": 0.4754, "step": 25826 }, { "epoch": 0.66, "grad_norm": 2.594224452972412, "learning_rate": 5.4176900639714814e-06, "loss": 0.4189, "step": 25827 }, { "epoch": 0.66, "grad_norm": 1.0433661937713623, "learning_rate": 5.416952221270613e-06, "loss": 0.4542, "step": 25828 }, { "epoch": 0.66, "grad_norm": 1.3682137727737427, "learning_rate": 5.4162144101532286e-06, "loss": 0.5645, "step": 25829 }, { "epoch": 0.66, "grad_norm": 1.2538056373596191, "learning_rate": 5.415476630624414e-06, "loss": 0.5797, "step": 25830 }, { "epoch": 0.66, "grad_norm": 4.08758020401001, "learning_rate": 5.414738882689252e-06, "loss": 0.6061, "step": 25831 }, { "epoch": 0.66, "grad_norm": 2.041297197341919, "learning_rate": 5.414001166352833e-06, "loss": 0.5112, "step": 25832 }, { "epoch": 0.66, "grad_norm": 2.9520158767700195, "learning_rate": 5.413263481620234e-06, "loss": 0.6232, "step": 25833 }, { "epoch": 0.66, "grad_norm": 1.001403570175171, "learning_rate": 5.4125258284965395e-06, "loss": 0.3865, "step": 25834 }, { "epoch": 0.66, "grad_norm": 1.170485019683838, "learning_rate": 5.41178820698684e-06, "loss": 0.464, "step": 25835 }, { "epoch": 0.66, "grad_norm": 1.1348357200622559, "learning_rate": 5.411050617096212e-06, "loss": 0.5125, "step": 25836 }, { "epoch": 0.66, "grad_norm": 1.6971423625946045, "learning_rate": 5.41031305882974e-06, "loss": 0.552, "step": 25837 }, { "epoch": 0.66, "grad_norm": 1.9132788181304932, "learning_rate": 5.409575532192504e-06, "loss": 0.4132, "step": 25838 }, { "epoch": 0.66, "grad_norm": 1.8467016220092773, "learning_rate": 5.408838037189592e-06, "loss": 0.5072, "step": 25839 }, { "epoch": 0.66, "grad_norm": 1.594889521598816, "learning_rate": 5.408100573826085e-06, "loss": 0.5333, "step": 25840 }, { "epoch": 0.66, "grad_norm": 2.060136556625366, "learning_rate": 5.4073631421070595e-06, "loss": 0.5017, "step": 25841 }, { "epoch": 0.66, "grad_norm": 1.4251043796539307, "learning_rate": 5.406625742037605e-06, "loss": 0.6418, "step": 25842 }, { "epoch": 0.66, "grad_norm": 1.5564756393432617, "learning_rate": 5.405888373622801e-06, "loss": 0.6455, "step": 25843 }, { "epoch": 0.66, "grad_norm": 1.4329416751861572, "learning_rate": 5.405151036867729e-06, "loss": 0.5459, "step": 25844 }, { "epoch": 0.66, "grad_norm": 1.4449154138565063, "learning_rate": 5.404413731777462e-06, "loss": 0.4904, "step": 25845 }, { "epoch": 0.66, "grad_norm": 3.2019991874694824, "learning_rate": 5.403676458357096e-06, "loss": 0.6656, "step": 25846 }, { "epoch": 0.66, "grad_norm": 1.2378679513931274, "learning_rate": 5.402939216611702e-06, "loss": 0.5349, "step": 25847 }, { "epoch": 0.66, "grad_norm": 2.3714444637298584, "learning_rate": 5.4022020065463595e-06, "loss": 0.4974, "step": 25848 }, { "epoch": 0.66, "grad_norm": 6.733086585998535, "learning_rate": 5.401464828166156e-06, "loss": 0.692, "step": 25849 }, { "epoch": 0.66, "grad_norm": 1.150490164756775, "learning_rate": 5.400727681476168e-06, "loss": 0.3641, "step": 25850 }, { "epoch": 0.66, "grad_norm": 1.2151563167572021, "learning_rate": 5.399990566481471e-06, "loss": 0.4371, "step": 25851 }, { "epoch": 0.66, "grad_norm": 1.2520562410354614, "learning_rate": 5.399253483187154e-06, "loss": 0.4134, "step": 25852 }, { "epoch": 0.66, "grad_norm": 3.914483070373535, "learning_rate": 5.398516431598291e-06, "loss": 0.6102, "step": 25853 }, { "epoch": 0.66, "grad_norm": 1.710379958152771, "learning_rate": 5.397779411719961e-06, "loss": 0.4852, "step": 25854 }, { "epoch": 0.66, "grad_norm": 0.9181016087532043, "learning_rate": 5.39704242355724e-06, "loss": 0.3153, "step": 25855 }, { "epoch": 0.66, "grad_norm": 2.977588176727295, "learning_rate": 5.396305467115215e-06, "loss": 0.5836, "step": 25856 }, { "epoch": 0.66, "grad_norm": 1.1961745023727417, "learning_rate": 5.395568542398961e-06, "loss": 0.6332, "step": 25857 }, { "epoch": 0.66, "grad_norm": 1.972489356994629, "learning_rate": 5.3948316494135515e-06, "loss": 0.4708, "step": 25858 }, { "epoch": 0.66, "grad_norm": 13.57070255279541, "learning_rate": 5.394094788164073e-06, "loss": 0.5833, "step": 25859 }, { "epoch": 0.66, "grad_norm": 1.1554577350616455, "learning_rate": 5.3933579586555985e-06, "loss": 0.5094, "step": 25860 }, { "epoch": 0.66, "grad_norm": 1.405574083328247, "learning_rate": 5.392621160893208e-06, "loss": 0.706, "step": 25861 }, { "epoch": 0.66, "grad_norm": 1.8749076128005981, "learning_rate": 5.391884394881973e-06, "loss": 0.5188, "step": 25862 }, { "epoch": 0.66, "grad_norm": 1.3175537586212158, "learning_rate": 5.391147660626981e-06, "loss": 0.3982, "step": 25863 }, { "epoch": 0.66, "grad_norm": 1.8564274311065674, "learning_rate": 5.390410958133302e-06, "loss": 0.758, "step": 25864 }, { "epoch": 0.66, "grad_norm": 1.2939409017562866, "learning_rate": 5.3896742874060115e-06, "loss": 0.4917, "step": 25865 }, { "epoch": 0.66, "grad_norm": 0.9695566296577454, "learning_rate": 5.388937648450194e-06, "loss": 0.3947, "step": 25866 }, { "epoch": 0.66, "grad_norm": 2.0125269889831543, "learning_rate": 5.3882010412709195e-06, "loss": 0.4279, "step": 25867 }, { "epoch": 0.66, "grad_norm": 8.35159683227539, "learning_rate": 5.3874644658732645e-06, "loss": 0.532, "step": 25868 }, { "epoch": 0.66, "grad_norm": 1.8940565586090088, "learning_rate": 5.386727922262308e-06, "loss": 0.5957, "step": 25869 }, { "epoch": 0.66, "grad_norm": 3.159956455230713, "learning_rate": 5.385991410443126e-06, "loss": 0.7274, "step": 25870 }, { "epoch": 0.66, "grad_norm": 1.8721462488174438, "learning_rate": 5.385254930420791e-06, "loss": 0.5719, "step": 25871 }, { "epoch": 0.66, "grad_norm": 1.802437663078308, "learning_rate": 5.384518482200377e-06, "loss": 0.5055, "step": 25872 }, { "epoch": 0.66, "grad_norm": 9.342350959777832, "learning_rate": 5.383782065786967e-06, "loss": 0.5632, "step": 25873 }, { "epoch": 0.66, "grad_norm": 0.9996933937072754, "learning_rate": 5.383045681185628e-06, "loss": 0.3788, "step": 25874 }, { "epoch": 0.66, "grad_norm": 7.425116062164307, "learning_rate": 5.382309328401435e-06, "loss": 0.6836, "step": 25875 }, { "epoch": 0.66, "grad_norm": 1.211279034614563, "learning_rate": 5.381573007439469e-06, "loss": 0.5187, "step": 25876 }, { "epoch": 0.66, "grad_norm": 1.0471175909042358, "learning_rate": 5.3808367183048e-06, "loss": 0.6336, "step": 25877 }, { "epoch": 0.66, "grad_norm": 1.827148199081421, "learning_rate": 5.380100461002502e-06, "loss": 0.5404, "step": 25878 }, { "epoch": 0.66, "grad_norm": 1.5543419122695923, "learning_rate": 5.379364235537645e-06, "loss": 0.4792, "step": 25879 }, { "epoch": 0.66, "grad_norm": 4.903489112854004, "learning_rate": 5.378628041915312e-06, "loss": 0.5836, "step": 25880 }, { "epoch": 0.66, "grad_norm": 1.5750069618225098, "learning_rate": 5.37789188014057e-06, "loss": 0.3992, "step": 25881 }, { "epoch": 0.66, "grad_norm": 1.2067573070526123, "learning_rate": 5.37715575021849e-06, "loss": 0.5462, "step": 25882 }, { "epoch": 0.66, "grad_norm": 1.3032550811767578, "learning_rate": 5.376419652154151e-06, "loss": 0.5289, "step": 25883 }, { "epoch": 0.66, "grad_norm": 1.367092490196228, "learning_rate": 5.375683585952625e-06, "loss": 0.4579, "step": 25884 }, { "epoch": 0.66, "grad_norm": 6.653757572174072, "learning_rate": 5.3749475516189806e-06, "loss": 0.4191, "step": 25885 }, { "epoch": 0.66, "grad_norm": 3.499547004699707, "learning_rate": 5.374211549158288e-06, "loss": 0.5471, "step": 25886 }, { "epoch": 0.66, "grad_norm": 1.3066926002502441, "learning_rate": 5.373475578575629e-06, "loss": 0.6624, "step": 25887 }, { "epoch": 0.66, "grad_norm": 1.323272943496704, "learning_rate": 5.372739639876068e-06, "loss": 0.5077, "step": 25888 }, { "epoch": 0.66, "grad_norm": 9.21635913848877, "learning_rate": 5.372003733064675e-06, "loss": 0.9426, "step": 25889 }, { "epoch": 0.66, "grad_norm": 2.902470827102661, "learning_rate": 5.371267858146527e-06, "loss": 0.5291, "step": 25890 }, { "epoch": 0.66, "grad_norm": 3.8360090255737305, "learning_rate": 5.370532015126695e-06, "loss": 0.6679, "step": 25891 }, { "epoch": 0.66, "grad_norm": 2.9726221561431885, "learning_rate": 5.369796204010242e-06, "loss": 0.6801, "step": 25892 }, { "epoch": 0.66, "grad_norm": 1.7670562267303467, "learning_rate": 5.369060424802251e-06, "loss": 0.6482, "step": 25893 }, { "epoch": 0.66, "grad_norm": 1.6935453414916992, "learning_rate": 5.368324677507784e-06, "loss": 0.46, "step": 25894 }, { "epoch": 0.66, "grad_norm": 1.7002081871032715, "learning_rate": 5.367588962131913e-06, "loss": 0.5343, "step": 25895 }, { "epoch": 0.66, "grad_norm": 1.1787540912628174, "learning_rate": 5.366853278679707e-06, "loss": 0.5783, "step": 25896 }, { "epoch": 0.66, "grad_norm": 2.4150145053863525, "learning_rate": 5.36611762715624e-06, "loss": 0.655, "step": 25897 }, { "epoch": 0.66, "grad_norm": 1.2069858312606812, "learning_rate": 5.365382007566579e-06, "loss": 0.4994, "step": 25898 }, { "epoch": 0.66, "grad_norm": 7.578184604644775, "learning_rate": 5.3646464199157885e-06, "loss": 0.7246, "step": 25899 }, { "epoch": 0.66, "grad_norm": 4.023301124572754, "learning_rate": 5.363910864208948e-06, "loss": 0.6625, "step": 25900 }, { "epoch": 0.66, "grad_norm": 2.6633622646331787, "learning_rate": 5.363175340451121e-06, "loss": 0.676, "step": 25901 }, { "epoch": 0.66, "grad_norm": 1.2529351711273193, "learning_rate": 5.3624398486473765e-06, "loss": 0.5257, "step": 25902 }, { "epoch": 0.66, "grad_norm": 8.909289360046387, "learning_rate": 5.361704388802777e-06, "loss": 0.6682, "step": 25903 }, { "epoch": 0.66, "grad_norm": 1.713679313659668, "learning_rate": 5.360968960922404e-06, "loss": 0.4716, "step": 25904 }, { "epoch": 0.66, "grad_norm": 1.5296003818511963, "learning_rate": 5.360233565011317e-06, "loss": 0.6262, "step": 25905 }, { "epoch": 0.66, "grad_norm": 16.96331214904785, "learning_rate": 5.35949820107458e-06, "loss": 0.5329, "step": 25906 }, { "epoch": 0.66, "grad_norm": 3.714329957962036, "learning_rate": 5.358762869117271e-06, "loss": 0.6616, "step": 25907 }, { "epoch": 0.66, "grad_norm": 1.4555197954177856, "learning_rate": 5.358027569144453e-06, "loss": 0.5833, "step": 25908 }, { "epoch": 0.66, "grad_norm": 2.1782748699188232, "learning_rate": 5.357292301161188e-06, "loss": 0.5206, "step": 25909 }, { "epoch": 0.66, "grad_norm": 1.6143966913223267, "learning_rate": 5.356557065172552e-06, "loss": 0.639, "step": 25910 }, { "epoch": 0.66, "grad_norm": 1.740541696548462, "learning_rate": 5.355821861183608e-06, "loss": 0.7006, "step": 25911 }, { "epoch": 0.66, "grad_norm": 1.7756714820861816, "learning_rate": 5.3550866891994204e-06, "loss": 0.6055, "step": 25912 }, { "epoch": 0.66, "grad_norm": 12.392816543579102, "learning_rate": 5.354351549225054e-06, "loss": 0.4993, "step": 25913 }, { "epoch": 0.66, "grad_norm": 1.1710007190704346, "learning_rate": 5.353616441265581e-06, "loss": 0.511, "step": 25914 }, { "epoch": 0.66, "grad_norm": 1.1075937747955322, "learning_rate": 5.352881365326065e-06, "loss": 0.4931, "step": 25915 }, { "epoch": 0.66, "grad_norm": 1.3431644439697266, "learning_rate": 5.352146321411568e-06, "loss": 0.6256, "step": 25916 }, { "epoch": 0.66, "grad_norm": 1.933415174484253, "learning_rate": 5.3514113095271615e-06, "loss": 0.5107, "step": 25917 }, { "epoch": 0.66, "grad_norm": 1.4360432624816895, "learning_rate": 5.350676329677909e-06, "loss": 0.7192, "step": 25918 }, { "epoch": 0.66, "grad_norm": 4.09792947769165, "learning_rate": 5.349941381868873e-06, "loss": 0.5169, "step": 25919 }, { "epoch": 0.66, "grad_norm": 2.7152554988861084, "learning_rate": 5.349206466105115e-06, "loss": 0.6384, "step": 25920 }, { "epoch": 0.66, "grad_norm": 4.19129753112793, "learning_rate": 5.348471582391709e-06, "loss": 0.5078, "step": 25921 }, { "epoch": 0.66, "grad_norm": 1.3862286806106567, "learning_rate": 5.347736730733715e-06, "loss": 0.5943, "step": 25922 }, { "epoch": 0.66, "grad_norm": 1.7622599601745605, "learning_rate": 5.3470019111361915e-06, "loss": 0.5757, "step": 25923 }, { "epoch": 0.66, "grad_norm": 7.487002372741699, "learning_rate": 5.346267123604212e-06, "loss": 0.5595, "step": 25924 }, { "epoch": 0.66, "grad_norm": 2.4041483402252197, "learning_rate": 5.345532368142835e-06, "loss": 0.6869, "step": 25925 }, { "epoch": 0.66, "grad_norm": 8.02918529510498, "learning_rate": 5.344797644757125e-06, "loss": 0.5372, "step": 25926 }, { "epoch": 0.66, "grad_norm": 1.5820149183273315, "learning_rate": 5.344062953452142e-06, "loss": 0.4817, "step": 25927 }, { "epoch": 0.66, "grad_norm": 5.32680082321167, "learning_rate": 5.343328294232955e-06, "loss": 0.6446, "step": 25928 }, { "epoch": 0.66, "grad_norm": 1.3224645853042603, "learning_rate": 5.342593667104624e-06, "loss": 0.5054, "step": 25929 }, { "epoch": 0.66, "grad_norm": 2.36661434173584, "learning_rate": 5.341859072072207e-06, "loss": 0.5484, "step": 25930 }, { "epoch": 0.66, "grad_norm": 1.3261662721633911, "learning_rate": 5.341124509140776e-06, "loss": 0.6649, "step": 25931 }, { "epoch": 0.66, "grad_norm": 2.922105312347412, "learning_rate": 5.340389978315386e-06, "loss": 0.4872, "step": 25932 }, { "epoch": 0.66, "grad_norm": 7.381535053253174, "learning_rate": 5.339655479601099e-06, "loss": 0.7971, "step": 25933 }, { "epoch": 0.66, "grad_norm": 1.2958178520202637, "learning_rate": 5.338921013002981e-06, "loss": 0.5644, "step": 25934 }, { "epoch": 0.66, "grad_norm": 1.6290513277053833, "learning_rate": 5.338186578526092e-06, "loss": 0.5597, "step": 25935 }, { "epoch": 0.66, "grad_norm": 8.357565879821777, "learning_rate": 5.337452176175492e-06, "loss": 0.6937, "step": 25936 }, { "epoch": 0.66, "grad_norm": 3.2813045978546143, "learning_rate": 5.336717805956238e-06, "loss": 0.4271, "step": 25937 }, { "epoch": 0.66, "grad_norm": 1.046025276184082, "learning_rate": 5.335983467873398e-06, "loss": 0.5914, "step": 25938 }, { "epoch": 0.66, "grad_norm": 2.2909774780273438, "learning_rate": 5.335249161932031e-06, "loss": 0.5063, "step": 25939 }, { "epoch": 0.66, "grad_norm": 1.783560872077942, "learning_rate": 5.334514888137192e-06, "loss": 0.6483, "step": 25940 }, { "epoch": 0.66, "grad_norm": 1.5729261636734009, "learning_rate": 5.333780646493949e-06, "loss": 0.6225, "step": 25941 }, { "epoch": 0.66, "grad_norm": 1.1903172731399536, "learning_rate": 5.333046437007358e-06, "loss": 0.5406, "step": 25942 }, { "epoch": 0.66, "grad_norm": 1.176357388496399, "learning_rate": 5.332312259682479e-06, "loss": 0.5517, "step": 25943 }, { "epoch": 0.66, "grad_norm": 1.6646085977554321, "learning_rate": 5.331578114524369e-06, "loss": 0.4413, "step": 25944 }, { "epoch": 0.66, "grad_norm": 7.298194885253906, "learning_rate": 5.330844001538091e-06, "loss": 0.6034, "step": 25945 }, { "epoch": 0.67, "grad_norm": 1.7830215692520142, "learning_rate": 5.330109920728705e-06, "loss": 0.5579, "step": 25946 }, { "epoch": 0.67, "grad_norm": 1.5946681499481201, "learning_rate": 5.329375872101263e-06, "loss": 0.6635, "step": 25947 }, { "epoch": 0.67, "grad_norm": 1.097063660621643, "learning_rate": 5.328641855660831e-06, "loss": 0.5278, "step": 25948 }, { "epoch": 0.67, "grad_norm": 2.6108813285827637, "learning_rate": 5.327907871412466e-06, "loss": 0.8164, "step": 25949 }, { "epoch": 0.67, "grad_norm": 2.098271131515503, "learning_rate": 5.327173919361219e-06, "loss": 0.4943, "step": 25950 }, { "epoch": 0.67, "grad_norm": 3.4074506759643555, "learning_rate": 5.326439999512159e-06, "loss": 0.6637, "step": 25951 }, { "epoch": 0.67, "grad_norm": 0.9638291001319885, "learning_rate": 5.325706111870337e-06, "loss": 0.6908, "step": 25952 }, { "epoch": 0.67, "grad_norm": 2.7999801635742188, "learning_rate": 5.324972256440812e-06, "loss": 0.5238, "step": 25953 }, { "epoch": 0.67, "grad_norm": 1.1906880140304565, "learning_rate": 5.324238433228638e-06, "loss": 0.4503, "step": 25954 }, { "epoch": 0.67, "grad_norm": 1.0002347230911255, "learning_rate": 5.323504642238879e-06, "loss": 0.581, "step": 25955 }, { "epoch": 0.67, "grad_norm": 2.1920650005340576, "learning_rate": 5.322770883476587e-06, "loss": 0.4672, "step": 25956 }, { "epoch": 0.67, "grad_norm": 1.819644570350647, "learning_rate": 5.322037156946816e-06, "loss": 0.6246, "step": 25957 }, { "epoch": 0.67, "grad_norm": 3.8674731254577637, "learning_rate": 5.32130346265463e-06, "loss": 0.6982, "step": 25958 }, { "epoch": 0.67, "grad_norm": 1.4073022603988647, "learning_rate": 5.320569800605082e-06, "loss": 0.5623, "step": 25959 }, { "epoch": 0.67, "grad_norm": 2.3026371002197266, "learning_rate": 5.319836170803225e-06, "loss": 0.6085, "step": 25960 }, { "epoch": 0.67, "grad_norm": 1.3063157796859741, "learning_rate": 5.3191025732541145e-06, "loss": 0.4385, "step": 25961 }, { "epoch": 0.67, "grad_norm": 1.895865797996521, "learning_rate": 5.318369007962811e-06, "loss": 0.404, "step": 25962 }, { "epoch": 0.67, "grad_norm": 2.008739948272705, "learning_rate": 5.317635474934368e-06, "loss": 0.742, "step": 25963 }, { "epoch": 0.67, "grad_norm": 1.2424179315567017, "learning_rate": 5.316901974173836e-06, "loss": 0.5929, "step": 25964 }, { "epoch": 0.67, "grad_norm": 0.8810437321662903, "learning_rate": 5.316168505686277e-06, "loss": 0.466, "step": 25965 }, { "epoch": 0.67, "grad_norm": 1.781516194343567, "learning_rate": 5.31543506947674e-06, "loss": 0.5135, "step": 25966 }, { "epoch": 0.67, "grad_norm": 1.2071141004562378, "learning_rate": 5.314701665550284e-06, "loss": 0.4969, "step": 25967 }, { "epoch": 0.67, "grad_norm": 1.809087872505188, "learning_rate": 5.313968293911955e-06, "loss": 0.6862, "step": 25968 }, { "epoch": 0.67, "grad_norm": 11.27363395690918, "learning_rate": 5.313234954566817e-06, "loss": 0.3645, "step": 25969 }, { "epoch": 0.67, "grad_norm": 1.3022379875183105, "learning_rate": 5.312501647519919e-06, "loss": 0.5513, "step": 25970 }, { "epoch": 0.67, "grad_norm": 7.002523422241211, "learning_rate": 5.311768372776311e-06, "loss": 0.4455, "step": 25971 }, { "epoch": 0.67, "grad_norm": 2.75807785987854, "learning_rate": 5.311035130341053e-06, "loss": 0.4903, "step": 25972 }, { "epoch": 0.67, "grad_norm": 2.4569199085235596, "learning_rate": 5.310301920219196e-06, "loss": 0.695, "step": 25973 }, { "epoch": 0.67, "grad_norm": 3.883085012435913, "learning_rate": 5.309568742415787e-06, "loss": 0.4628, "step": 25974 }, { "epoch": 0.67, "grad_norm": 1.7818220853805542, "learning_rate": 5.308835596935888e-06, "loss": 0.4992, "step": 25975 }, { "epoch": 0.67, "grad_norm": 1.7090115547180176, "learning_rate": 5.308102483784547e-06, "loss": 0.5445, "step": 25976 }, { "epoch": 0.67, "grad_norm": 1.5145137310028076, "learning_rate": 5.307369402966815e-06, "loss": 0.5748, "step": 25977 }, { "epoch": 0.67, "grad_norm": 12.651999473571777, "learning_rate": 5.3066363544877416e-06, "loss": 0.7088, "step": 25978 }, { "epoch": 0.67, "grad_norm": 4.127339839935303, "learning_rate": 5.305903338352387e-06, "loss": 0.7441, "step": 25979 }, { "epoch": 0.67, "grad_norm": 2.1642537117004395, "learning_rate": 5.305170354565796e-06, "loss": 0.5299, "step": 25980 }, { "epoch": 0.67, "grad_norm": 1.2317613363265991, "learning_rate": 5.304437403133018e-06, "loss": 0.539, "step": 25981 }, { "epoch": 0.67, "grad_norm": 1.0961685180664062, "learning_rate": 5.303704484059111e-06, "loss": 0.5868, "step": 25982 }, { "epoch": 0.67, "grad_norm": 1.2348848581314087, "learning_rate": 5.302971597349122e-06, "loss": 0.462, "step": 25983 }, { "epoch": 0.67, "grad_norm": 2.0424606800079346, "learning_rate": 5.302238743008101e-06, "loss": 0.4018, "step": 25984 }, { "epoch": 0.67, "grad_norm": 1.28188955783844, "learning_rate": 5.301505921041097e-06, "loss": 0.6989, "step": 25985 }, { "epoch": 0.67, "grad_norm": 1.2380231618881226, "learning_rate": 5.300773131453165e-06, "loss": 0.4903, "step": 25986 }, { "epoch": 0.67, "grad_norm": 1.8381373882293701, "learning_rate": 5.300040374249352e-06, "loss": 0.6292, "step": 25987 }, { "epoch": 0.67, "grad_norm": 1.0948666334152222, "learning_rate": 5.299307649434705e-06, "loss": 0.3969, "step": 25988 }, { "epoch": 0.67, "grad_norm": 1.254805326461792, "learning_rate": 5.298574957014279e-06, "loss": 0.4607, "step": 25989 }, { "epoch": 0.67, "grad_norm": 3.441681385040283, "learning_rate": 5.297842296993123e-06, "loss": 0.5292, "step": 25990 }, { "epoch": 0.67, "grad_norm": 1.3654841184616089, "learning_rate": 5.2971096693762816e-06, "loss": 0.4784, "step": 25991 }, { "epoch": 0.67, "grad_norm": 2.8880257606506348, "learning_rate": 5.296377074168803e-06, "loss": 0.6915, "step": 25992 }, { "epoch": 0.67, "grad_norm": 5.835241794586182, "learning_rate": 5.295644511375741e-06, "loss": 0.5863, "step": 25993 }, { "epoch": 0.67, "grad_norm": 1.6255794763565063, "learning_rate": 5.294911981002143e-06, "loss": 0.3681, "step": 25994 }, { "epoch": 0.67, "grad_norm": 1.863656997680664, "learning_rate": 5.294179483053052e-06, "loss": 0.5683, "step": 25995 }, { "epoch": 0.67, "grad_norm": 2.1930670738220215, "learning_rate": 5.293447017533522e-06, "loss": 0.5912, "step": 25996 }, { "epoch": 0.67, "grad_norm": 2.898621082305908, "learning_rate": 5.2927145844485985e-06, "loss": 0.4662, "step": 25997 }, { "epoch": 0.67, "grad_norm": 10.844644546508789, "learning_rate": 5.291982183803325e-06, "loss": 0.652, "step": 25998 }, { "epoch": 0.67, "grad_norm": 3.082831859588623, "learning_rate": 5.291249815602756e-06, "loss": 0.5334, "step": 25999 }, { "epoch": 0.67, "grad_norm": 2.018843173980713, "learning_rate": 5.2905174798519356e-06, "loss": 0.5506, "step": 26000 }, { "epoch": 0.67, "grad_norm": 8.898416519165039, "learning_rate": 5.28978517655591e-06, "loss": 0.7748, "step": 26001 }, { "epoch": 0.67, "grad_norm": 2.1174323558807373, "learning_rate": 5.28905290571972e-06, "loss": 0.6271, "step": 26002 }, { "epoch": 0.67, "grad_norm": 1.3993371725082397, "learning_rate": 5.288320667348423e-06, "loss": 0.5433, "step": 26003 }, { "epoch": 0.67, "grad_norm": 2.073446273803711, "learning_rate": 5.287588461447061e-06, "loss": 0.6542, "step": 26004 }, { "epoch": 0.67, "grad_norm": 5.046175956726074, "learning_rate": 5.286856288020673e-06, "loss": 0.5964, "step": 26005 }, { "epoch": 0.67, "grad_norm": 5.718631744384766, "learning_rate": 5.286124147074314e-06, "loss": 0.5688, "step": 26006 }, { "epoch": 0.67, "grad_norm": 1.0972092151641846, "learning_rate": 5.285392038613028e-06, "loss": 0.4168, "step": 26007 }, { "epoch": 0.67, "grad_norm": 3.074702262878418, "learning_rate": 5.284659962641856e-06, "loss": 0.5388, "step": 26008 }, { "epoch": 0.67, "grad_norm": 3.1030430793762207, "learning_rate": 5.283927919165842e-06, "loss": 0.533, "step": 26009 }, { "epoch": 0.67, "grad_norm": 1.6088706254959106, "learning_rate": 5.283195908190037e-06, "loss": 0.6542, "step": 26010 }, { "epoch": 0.67, "grad_norm": 3.0339207649230957, "learning_rate": 5.282463929719484e-06, "loss": 0.5212, "step": 26011 }, { "epoch": 0.67, "grad_norm": 6.705866813659668, "learning_rate": 5.2817319837592205e-06, "loss": 0.6923, "step": 26012 }, { "epoch": 0.67, "grad_norm": 2.3562753200531006, "learning_rate": 5.2810000703143004e-06, "loss": 0.5738, "step": 26013 }, { "epoch": 0.67, "grad_norm": 1.3690065145492554, "learning_rate": 5.280268189389764e-06, "loss": 0.4364, "step": 26014 }, { "epoch": 0.67, "grad_norm": 1.575122356414795, "learning_rate": 5.2795363409906475e-06, "loss": 0.4871, "step": 26015 }, { "epoch": 0.67, "grad_norm": 1.3278989791870117, "learning_rate": 5.278804525122006e-06, "loss": 0.5927, "step": 26016 }, { "epoch": 0.67, "grad_norm": 1.9501092433929443, "learning_rate": 5.278072741788878e-06, "loss": 0.6303, "step": 26017 }, { "epoch": 0.67, "grad_norm": 6.175083637237549, "learning_rate": 5.2773409909963065e-06, "loss": 0.5016, "step": 26018 }, { "epoch": 0.67, "grad_norm": 4.0015788078308105, "learning_rate": 5.276609272749329e-06, "loss": 0.7618, "step": 26019 }, { "epoch": 0.67, "grad_norm": 3.5812554359436035, "learning_rate": 5.275877587052998e-06, "loss": 0.6251, "step": 26020 }, { "epoch": 0.67, "grad_norm": 6.938551425933838, "learning_rate": 5.275145933912349e-06, "loss": 0.5458, "step": 26021 }, { "epoch": 0.67, "grad_norm": 2.4914700984954834, "learning_rate": 5.274414313332422e-06, "loss": 0.4295, "step": 26022 }, { "epoch": 0.67, "grad_norm": 2.00227427482605, "learning_rate": 5.2736827253182675e-06, "loss": 0.606, "step": 26023 }, { "epoch": 0.67, "grad_norm": 1.1637825965881348, "learning_rate": 5.272951169874923e-06, "loss": 0.5287, "step": 26024 }, { "epoch": 0.67, "grad_norm": 1.858392357826233, "learning_rate": 5.272219647007426e-06, "loss": 0.5583, "step": 26025 }, { "epoch": 0.67, "grad_norm": 1.0041499137878418, "learning_rate": 5.27148815672082e-06, "loss": 0.4879, "step": 26026 }, { "epoch": 0.67, "grad_norm": 1.0456644296646118, "learning_rate": 5.270756699020149e-06, "loss": 0.5252, "step": 26027 }, { "epoch": 0.67, "grad_norm": 1.1797449588775635, "learning_rate": 5.270025273910453e-06, "loss": 0.5426, "step": 26028 }, { "epoch": 0.67, "grad_norm": 10.433816909790039, "learning_rate": 5.269293881396766e-06, "loss": 0.7362, "step": 26029 }, { "epoch": 0.67, "grad_norm": 3.9146029949188232, "learning_rate": 5.268562521484139e-06, "loss": 0.6612, "step": 26030 }, { "epoch": 0.67, "grad_norm": 6.241305828094482, "learning_rate": 5.267831194177606e-06, "loss": 0.5657, "step": 26031 }, { "epoch": 0.67, "grad_norm": 1.4302871227264404, "learning_rate": 5.267099899482206e-06, "loss": 0.6549, "step": 26032 }, { "epoch": 0.67, "grad_norm": 2.1613919734954834, "learning_rate": 5.266368637402979e-06, "loss": 0.4705, "step": 26033 }, { "epoch": 0.67, "grad_norm": 1.2373603582382202, "learning_rate": 5.265637407944967e-06, "loss": 0.5054, "step": 26034 }, { "epoch": 0.67, "grad_norm": 1.2047486305236816, "learning_rate": 5.264906211113208e-06, "loss": 0.6204, "step": 26035 }, { "epoch": 0.67, "grad_norm": 3.5859198570251465, "learning_rate": 5.264175046912736e-06, "loss": 0.5878, "step": 26036 }, { "epoch": 0.67, "grad_norm": 1.3065000772476196, "learning_rate": 5.263443915348603e-06, "loss": 0.4559, "step": 26037 }, { "epoch": 0.67, "grad_norm": 3.203787326812744, "learning_rate": 5.262712816425834e-06, "loss": 0.7326, "step": 26038 }, { "epoch": 0.67, "grad_norm": 2.4725775718688965, "learning_rate": 5.26198175014947e-06, "loss": 0.4776, "step": 26039 }, { "epoch": 0.67, "grad_norm": 1.7486190795898438, "learning_rate": 5.261250716524553e-06, "loss": 0.4243, "step": 26040 }, { "epoch": 0.67, "grad_norm": 1.5237613916397095, "learning_rate": 5.26051971555612e-06, "loss": 0.6216, "step": 26041 }, { "epoch": 0.67, "grad_norm": 6.6570305824279785, "learning_rate": 5.259788747249201e-06, "loss": 0.6113, "step": 26042 }, { "epoch": 0.67, "grad_norm": 1.3416763544082642, "learning_rate": 5.259057811608848e-06, "loss": 0.4748, "step": 26043 }, { "epoch": 0.67, "grad_norm": 4.3002448081970215, "learning_rate": 5.258326908640087e-06, "loss": 0.6316, "step": 26044 }, { "epoch": 0.67, "grad_norm": 1.461674690246582, "learning_rate": 5.257596038347954e-06, "loss": 0.535, "step": 26045 }, { "epoch": 0.67, "grad_norm": 1.2902631759643555, "learning_rate": 5.256865200737495e-06, "loss": 0.6071, "step": 26046 }, { "epoch": 0.67, "grad_norm": 1.2522486448287964, "learning_rate": 5.256134395813741e-06, "loss": 0.5273, "step": 26047 }, { "epoch": 0.67, "grad_norm": 1.5500463247299194, "learning_rate": 5.255403623581728e-06, "loss": 0.508, "step": 26048 }, { "epoch": 0.67, "grad_norm": 2.167299270629883, "learning_rate": 5.254672884046488e-06, "loss": 0.7089, "step": 26049 }, { "epoch": 0.67, "grad_norm": 1.4116263389587402, "learning_rate": 5.253942177213065e-06, "loss": 0.6285, "step": 26050 }, { "epoch": 0.67, "grad_norm": 1.2881686687469482, "learning_rate": 5.25321150308649e-06, "loss": 0.6066, "step": 26051 }, { "epoch": 0.67, "grad_norm": 4.322758197784424, "learning_rate": 5.252480861671798e-06, "loss": 0.6764, "step": 26052 }, { "epoch": 0.67, "grad_norm": 1.9427742958068848, "learning_rate": 5.2517502529740276e-06, "loss": 0.6899, "step": 26053 }, { "epoch": 0.67, "grad_norm": 1.6771671772003174, "learning_rate": 5.2510196769982105e-06, "loss": 0.5479, "step": 26054 }, { "epoch": 0.67, "grad_norm": 2.489386558532715, "learning_rate": 5.250289133749383e-06, "loss": 0.5699, "step": 26055 }, { "epoch": 0.67, "grad_norm": 1.0835239887237549, "learning_rate": 5.249558623232575e-06, "loss": 0.5185, "step": 26056 }, { "epoch": 0.67, "grad_norm": 1.5168743133544922, "learning_rate": 5.248828145452828e-06, "loss": 0.4799, "step": 26057 }, { "epoch": 0.67, "grad_norm": 7.445111274719238, "learning_rate": 5.248097700415173e-06, "loss": 0.4976, "step": 26058 }, { "epoch": 0.67, "grad_norm": 4.531943321228027, "learning_rate": 5.247367288124638e-06, "loss": 0.6294, "step": 26059 }, { "epoch": 0.67, "grad_norm": 2.1610748767852783, "learning_rate": 5.246636908586267e-06, "loss": 0.7312, "step": 26060 }, { "epoch": 0.67, "grad_norm": 2.474048614501953, "learning_rate": 5.2459065618050875e-06, "loss": 0.6158, "step": 26061 }, { "epoch": 0.67, "grad_norm": 1.5136476755142212, "learning_rate": 5.245176247786132e-06, "loss": 0.4676, "step": 26062 }, { "epoch": 0.67, "grad_norm": 1.1951570510864258, "learning_rate": 5.244445966534433e-06, "loss": 0.605, "step": 26063 }, { "epoch": 0.67, "grad_norm": 1.3885900974273682, "learning_rate": 5.2437157180550266e-06, "loss": 0.4624, "step": 26064 }, { "epoch": 0.67, "grad_norm": 2.1580066680908203, "learning_rate": 5.242985502352944e-06, "loss": 0.4858, "step": 26065 }, { "epoch": 0.67, "grad_norm": 1.862229824066162, "learning_rate": 5.242255319433214e-06, "loss": 0.4591, "step": 26066 }, { "epoch": 0.67, "grad_norm": 1.632604956626892, "learning_rate": 5.2415251693008736e-06, "loss": 0.5504, "step": 26067 }, { "epoch": 0.67, "grad_norm": 2.0223355293273926, "learning_rate": 5.240795051960952e-06, "loss": 0.5034, "step": 26068 }, { "epoch": 0.67, "grad_norm": 2.9563615322113037, "learning_rate": 5.240064967418478e-06, "loss": 0.5684, "step": 26069 }, { "epoch": 0.67, "grad_norm": 2.9610416889190674, "learning_rate": 5.239334915678489e-06, "loss": 0.6579, "step": 26070 }, { "epoch": 0.67, "grad_norm": 2.1619536876678467, "learning_rate": 5.238604896746013e-06, "loss": 0.4961, "step": 26071 }, { "epoch": 0.67, "grad_norm": 1.4478951692581177, "learning_rate": 5.2378749106260794e-06, "loss": 0.6324, "step": 26072 }, { "epoch": 0.67, "grad_norm": 5.68985652923584, "learning_rate": 5.237144957323719e-06, "loss": 0.5454, "step": 26073 }, { "epoch": 0.67, "grad_norm": 3.1914310455322266, "learning_rate": 5.2364150368439644e-06, "loss": 0.6609, "step": 26074 }, { "epoch": 0.67, "grad_norm": 2.1578712463378906, "learning_rate": 5.235685149191847e-06, "loss": 0.5892, "step": 26075 }, { "epoch": 0.67, "grad_norm": 3.926300287246704, "learning_rate": 5.234955294372388e-06, "loss": 0.705, "step": 26076 }, { "epoch": 0.67, "grad_norm": 1.3345403671264648, "learning_rate": 5.234225472390629e-06, "loss": 0.3838, "step": 26077 }, { "epoch": 0.67, "grad_norm": 1.8409723043441772, "learning_rate": 5.233495683251593e-06, "loss": 0.52, "step": 26078 }, { "epoch": 0.67, "grad_norm": 2.3345913887023926, "learning_rate": 5.2327659269603104e-06, "loss": 0.4498, "step": 26079 }, { "epoch": 0.67, "grad_norm": 1.637600302696228, "learning_rate": 5.232036203521806e-06, "loss": 0.5746, "step": 26080 }, { "epoch": 0.67, "grad_norm": 3.768824577331543, "learning_rate": 5.231306512941116e-06, "loss": 0.5636, "step": 26081 }, { "epoch": 0.67, "grad_norm": 1.3713408708572388, "learning_rate": 5.230576855223267e-06, "loss": 0.3642, "step": 26082 }, { "epoch": 0.67, "grad_norm": 5.583181381225586, "learning_rate": 5.229847230373281e-06, "loss": 0.5492, "step": 26083 }, { "epoch": 0.67, "grad_norm": 1.650223731994629, "learning_rate": 5.229117638396195e-06, "loss": 0.4697, "step": 26084 }, { "epoch": 0.67, "grad_norm": 1.831257939338684, "learning_rate": 5.2283880792970335e-06, "loss": 0.5777, "step": 26085 }, { "epoch": 0.67, "grad_norm": 2.9703595638275146, "learning_rate": 5.227658553080822e-06, "loss": 0.6119, "step": 26086 }, { "epoch": 0.67, "grad_norm": 1.2328418493270874, "learning_rate": 5.226929059752587e-06, "loss": 0.4213, "step": 26087 }, { "epoch": 0.67, "grad_norm": 1.7133989334106445, "learning_rate": 5.226199599317361e-06, "loss": 0.5969, "step": 26088 }, { "epoch": 0.67, "grad_norm": 2.361295700073242, "learning_rate": 5.2254701717801695e-06, "loss": 0.5379, "step": 26089 }, { "epoch": 0.67, "grad_norm": 1.3696104288101196, "learning_rate": 5.224740777146034e-06, "loss": 0.5481, "step": 26090 }, { "epoch": 0.67, "grad_norm": 1.1103423833847046, "learning_rate": 5.224011415419988e-06, "loss": 0.4236, "step": 26091 }, { "epoch": 0.67, "grad_norm": 1.4965001344680786, "learning_rate": 5.223282086607053e-06, "loss": 0.4935, "step": 26092 }, { "epoch": 0.67, "grad_norm": 1.4545602798461914, "learning_rate": 5.2225527907122554e-06, "loss": 0.5456, "step": 26093 }, { "epoch": 0.67, "grad_norm": 1.0382418632507324, "learning_rate": 5.221823527740626e-06, "loss": 0.5849, "step": 26094 }, { "epoch": 0.67, "grad_norm": 1.0487360954284668, "learning_rate": 5.221094297697187e-06, "loss": 0.3893, "step": 26095 }, { "epoch": 0.67, "grad_norm": 1.8155581951141357, "learning_rate": 5.2203651005869626e-06, "loss": 0.5782, "step": 26096 }, { "epoch": 0.67, "grad_norm": 8.252103805541992, "learning_rate": 5.2196359364149766e-06, "loss": 0.4367, "step": 26097 }, { "epoch": 0.67, "grad_norm": 3.3565673828125, "learning_rate": 5.218906805186259e-06, "loss": 0.6135, "step": 26098 }, { "epoch": 0.67, "grad_norm": 1.8826799392700195, "learning_rate": 5.218177706905833e-06, "loss": 0.5338, "step": 26099 }, { "epoch": 0.67, "grad_norm": 2.160386323928833, "learning_rate": 5.2174486415787174e-06, "loss": 0.4333, "step": 26100 }, { "epoch": 0.67, "grad_norm": 9.920794486999512, "learning_rate": 5.216719609209946e-06, "loss": 0.507, "step": 26101 }, { "epoch": 0.67, "grad_norm": 6.8263702392578125, "learning_rate": 5.215990609804536e-06, "loss": 0.5847, "step": 26102 }, { "epoch": 0.67, "grad_norm": 1.115149736404419, "learning_rate": 5.2152616433675155e-06, "loss": 0.5051, "step": 26103 }, { "epoch": 0.67, "grad_norm": 1.7663663625717163, "learning_rate": 5.214532709903899e-06, "loss": 0.4402, "step": 26104 }, { "epoch": 0.67, "grad_norm": 1.2584238052368164, "learning_rate": 5.213803809418723e-06, "loss": 0.4447, "step": 26105 }, { "epoch": 0.67, "grad_norm": 1.290193796157837, "learning_rate": 5.213074941917003e-06, "loss": 0.5521, "step": 26106 }, { "epoch": 0.67, "grad_norm": 0.935606837272644, "learning_rate": 5.212346107403758e-06, "loss": 0.4844, "step": 26107 }, { "epoch": 0.67, "grad_norm": 2.833360433578491, "learning_rate": 5.211617305884023e-06, "loss": 0.3805, "step": 26108 }, { "epoch": 0.67, "grad_norm": 2.085740566253662, "learning_rate": 5.21088853736281e-06, "loss": 0.4519, "step": 26109 }, { "epoch": 0.67, "grad_norm": 0.6993083953857422, "learning_rate": 5.210159801845141e-06, "loss": 0.454, "step": 26110 }, { "epoch": 0.67, "grad_norm": 2.380124092102051, "learning_rate": 5.209431099336047e-06, "loss": 0.6143, "step": 26111 }, { "epoch": 0.67, "grad_norm": 1.3041248321533203, "learning_rate": 5.208702429840542e-06, "loss": 0.4759, "step": 26112 }, { "epoch": 0.67, "grad_norm": 9.614834785461426, "learning_rate": 5.207973793363651e-06, "loss": 0.6195, "step": 26113 }, { "epoch": 0.67, "grad_norm": 1.0867356061935425, "learning_rate": 5.2072451899103895e-06, "loss": 0.4401, "step": 26114 }, { "epoch": 0.67, "grad_norm": 1.6475573778152466, "learning_rate": 5.206516619485787e-06, "loss": 0.523, "step": 26115 }, { "epoch": 0.67, "grad_norm": 1.7711615562438965, "learning_rate": 5.20578808209486e-06, "loss": 0.526, "step": 26116 }, { "epoch": 0.67, "grad_norm": 9.014121055603027, "learning_rate": 5.205059577742627e-06, "loss": 0.5697, "step": 26117 }, { "epoch": 0.67, "grad_norm": 2.9766507148742676, "learning_rate": 5.204331106434114e-06, "loss": 0.8274, "step": 26118 }, { "epoch": 0.67, "grad_norm": 2.2121682167053223, "learning_rate": 5.2036026681743376e-06, "loss": 0.6424, "step": 26119 }, { "epoch": 0.67, "grad_norm": 4.156878471374512, "learning_rate": 5.202874262968319e-06, "loss": 0.585, "step": 26120 }, { "epoch": 0.67, "grad_norm": 11.261275291442871, "learning_rate": 5.202145890821073e-06, "loss": 0.7318, "step": 26121 }, { "epoch": 0.67, "grad_norm": 3.7584714889526367, "learning_rate": 5.2014175517376265e-06, "loss": 0.5312, "step": 26122 }, { "epoch": 0.67, "grad_norm": 2.436650514602661, "learning_rate": 5.200689245722996e-06, "loss": 0.5863, "step": 26123 }, { "epoch": 0.67, "grad_norm": 1.4909896850585938, "learning_rate": 5.1999609727821965e-06, "loss": 0.5143, "step": 26124 }, { "epoch": 0.67, "grad_norm": 1.4768664836883545, "learning_rate": 5.199232732920254e-06, "loss": 0.6609, "step": 26125 }, { "epoch": 0.67, "grad_norm": 1.339969515800476, "learning_rate": 5.198504526142183e-06, "loss": 0.5333, "step": 26126 }, { "epoch": 0.67, "grad_norm": 1.5824568271636963, "learning_rate": 5.197776352453002e-06, "loss": 0.5756, "step": 26127 }, { "epoch": 0.67, "grad_norm": 4.810013771057129, "learning_rate": 5.197048211857727e-06, "loss": 0.6859, "step": 26128 }, { "epoch": 0.67, "grad_norm": 0.9881373047828674, "learning_rate": 5.196320104361381e-06, "loss": 0.479, "step": 26129 }, { "epoch": 0.67, "grad_norm": 4.375399589538574, "learning_rate": 5.195592029968977e-06, "loss": 0.7191, "step": 26130 }, { "epoch": 0.67, "grad_norm": 7.177866458892822, "learning_rate": 5.194863988685532e-06, "loss": 0.7102, "step": 26131 }, { "epoch": 0.67, "grad_norm": 1.675356388092041, "learning_rate": 5.1941359805160705e-06, "loss": 0.4604, "step": 26132 }, { "epoch": 0.67, "grad_norm": 1.3907619714736938, "learning_rate": 5.193408005465603e-06, "loss": 0.5808, "step": 26133 }, { "epoch": 0.67, "grad_norm": 3.9515931606292725, "learning_rate": 5.192680063539145e-06, "loss": 0.7096, "step": 26134 }, { "epoch": 0.67, "grad_norm": 1.0733709335327148, "learning_rate": 5.19195215474172e-06, "loss": 0.515, "step": 26135 }, { "epoch": 0.67, "grad_norm": 1.3528504371643066, "learning_rate": 5.191224279078339e-06, "loss": 0.7234, "step": 26136 }, { "epoch": 0.67, "grad_norm": 1.8078840970993042, "learning_rate": 5.190496436554018e-06, "loss": 0.5901, "step": 26137 }, { "epoch": 0.67, "grad_norm": 2.280078172683716, "learning_rate": 5.189768627173773e-06, "loss": 0.4627, "step": 26138 }, { "epoch": 0.67, "grad_norm": 2.2307991981506348, "learning_rate": 5.189040850942622e-06, "loss": 0.451, "step": 26139 }, { "epoch": 0.67, "grad_norm": 2.5054080486297607, "learning_rate": 5.188313107865579e-06, "loss": 0.5812, "step": 26140 }, { "epoch": 0.67, "grad_norm": 1.3685674667358398, "learning_rate": 5.187585397947657e-06, "loss": 0.4273, "step": 26141 }, { "epoch": 0.67, "grad_norm": 1.3420523405075073, "learning_rate": 5.1868577211938745e-06, "loss": 0.5427, "step": 26142 }, { "epoch": 0.67, "grad_norm": 4.360971927642822, "learning_rate": 5.186130077609246e-06, "loss": 0.6243, "step": 26143 }, { "epoch": 0.67, "grad_norm": 1.0534464120864868, "learning_rate": 5.1854024671987835e-06, "loss": 0.4913, "step": 26144 }, { "epoch": 0.67, "grad_norm": 1.5824449062347412, "learning_rate": 5.1846748899675e-06, "loss": 0.5471, "step": 26145 }, { "epoch": 0.67, "grad_norm": 2.006126880645752, "learning_rate": 5.1839473459204145e-06, "loss": 0.6113, "step": 26146 }, { "epoch": 0.67, "grad_norm": 1.4727593660354614, "learning_rate": 5.18321983506254e-06, "loss": 0.6003, "step": 26147 }, { "epoch": 0.67, "grad_norm": 2.5044572353363037, "learning_rate": 5.182492357398883e-06, "loss": 0.5821, "step": 26148 }, { "epoch": 0.67, "grad_norm": 2.0044939517974854, "learning_rate": 5.181764912934466e-06, "loss": 0.5934, "step": 26149 }, { "epoch": 0.67, "grad_norm": 1.6624822616577148, "learning_rate": 5.181037501674298e-06, "loss": 0.5924, "step": 26150 }, { "epoch": 0.67, "grad_norm": 3.6516833305358887, "learning_rate": 5.180310123623388e-06, "loss": 0.6808, "step": 26151 }, { "epoch": 0.67, "grad_norm": 1.7933021783828735, "learning_rate": 5.179582778786756e-06, "loss": 0.4834, "step": 26152 }, { "epoch": 0.67, "grad_norm": 3.279021739959717, "learning_rate": 5.1788554671694125e-06, "loss": 0.731, "step": 26153 }, { "epoch": 0.67, "grad_norm": 2.1883926391601562, "learning_rate": 5.178128188776368e-06, "loss": 0.5362, "step": 26154 }, { "epoch": 0.67, "grad_norm": 1.6215205192565918, "learning_rate": 5.17740094361263e-06, "loss": 0.5462, "step": 26155 }, { "epoch": 0.67, "grad_norm": 1.6818554401397705, "learning_rate": 5.176673731683218e-06, "loss": 0.601, "step": 26156 }, { "epoch": 0.67, "grad_norm": 3.247201681137085, "learning_rate": 5.17594655299314e-06, "loss": 0.5342, "step": 26157 }, { "epoch": 0.67, "grad_norm": 2.8988633155822754, "learning_rate": 5.1752194075474075e-06, "loss": 0.6184, "step": 26158 }, { "epoch": 0.67, "grad_norm": 1.136375904083252, "learning_rate": 5.174492295351032e-06, "loss": 0.5026, "step": 26159 }, { "epoch": 0.67, "grad_norm": 1.0029983520507812, "learning_rate": 5.173765216409025e-06, "loss": 0.6419, "step": 26160 }, { "epoch": 0.67, "grad_norm": 1.09478759765625, "learning_rate": 5.173038170726396e-06, "loss": 0.5144, "step": 26161 }, { "epoch": 0.67, "grad_norm": 1.6718305349349976, "learning_rate": 5.172311158308151e-06, "loss": 0.5307, "step": 26162 }, { "epoch": 0.67, "grad_norm": 1.685955286026001, "learning_rate": 5.171584179159308e-06, "loss": 0.5652, "step": 26163 }, { "epoch": 0.67, "grad_norm": 1.210134506225586, "learning_rate": 5.170857233284874e-06, "loss": 0.3909, "step": 26164 }, { "epoch": 0.67, "grad_norm": 2.273052930831909, "learning_rate": 5.170130320689855e-06, "loss": 0.643, "step": 26165 }, { "epoch": 0.67, "grad_norm": 1.2920620441436768, "learning_rate": 5.169403441379266e-06, "loss": 0.5795, "step": 26166 }, { "epoch": 0.67, "grad_norm": 1.4970042705535889, "learning_rate": 5.168676595358113e-06, "loss": 0.5664, "step": 26167 }, { "epoch": 0.67, "grad_norm": 3.3038599491119385, "learning_rate": 5.167949782631408e-06, "loss": 0.4433, "step": 26168 }, { "epoch": 0.67, "grad_norm": 1.42117178440094, "learning_rate": 5.167223003204152e-06, "loss": 0.3867, "step": 26169 }, { "epoch": 0.67, "grad_norm": 1.2119832038879395, "learning_rate": 5.166496257081362e-06, "loss": 0.5101, "step": 26170 }, { "epoch": 0.67, "grad_norm": 1.936781644821167, "learning_rate": 5.165769544268043e-06, "loss": 0.4794, "step": 26171 }, { "epoch": 0.67, "grad_norm": 6.9425435066223145, "learning_rate": 5.165042864769201e-06, "loss": 0.7166, "step": 26172 }, { "epoch": 0.67, "grad_norm": 1.2577271461486816, "learning_rate": 5.164316218589849e-06, "loss": 0.4491, "step": 26173 }, { "epoch": 0.67, "grad_norm": 0.8723209500312805, "learning_rate": 5.163589605734991e-06, "loss": 0.4338, "step": 26174 }, { "epoch": 0.67, "grad_norm": 3.921201229095459, "learning_rate": 5.16286302620963e-06, "loss": 0.3474, "step": 26175 }, { "epoch": 0.67, "grad_norm": 1.234440803527832, "learning_rate": 5.162136480018785e-06, "loss": 0.5823, "step": 26176 }, { "epoch": 0.67, "grad_norm": 1.7520099878311157, "learning_rate": 5.161409967167453e-06, "loss": 0.6154, "step": 26177 }, { "epoch": 0.67, "grad_norm": 2.283931255340576, "learning_rate": 5.1606834876606445e-06, "loss": 0.5103, "step": 26178 }, { "epoch": 0.67, "grad_norm": 2.187687873840332, "learning_rate": 5.159957041503363e-06, "loss": 0.7167, "step": 26179 }, { "epoch": 0.67, "grad_norm": 4.75031852722168, "learning_rate": 5.159230628700618e-06, "loss": 0.8233, "step": 26180 }, { "epoch": 0.67, "grad_norm": 1.2918106317520142, "learning_rate": 5.158504249257415e-06, "loss": 0.5137, "step": 26181 }, { "epoch": 0.67, "grad_norm": 1.0978236198425293, "learning_rate": 5.157777903178755e-06, "loss": 0.498, "step": 26182 }, { "epoch": 0.67, "grad_norm": 1.8064664602279663, "learning_rate": 5.157051590469653e-06, "loss": 0.5498, "step": 26183 }, { "epoch": 0.67, "grad_norm": 2.134162425994873, "learning_rate": 5.156325311135108e-06, "loss": 0.6167, "step": 26184 }, { "epoch": 0.67, "grad_norm": 1.7830126285552979, "learning_rate": 5.1555990651801255e-06, "loss": 0.56, "step": 26185 }, { "epoch": 0.67, "grad_norm": 1.013843297958374, "learning_rate": 5.154872852609707e-06, "loss": 0.645, "step": 26186 }, { "epoch": 0.67, "grad_norm": 1.4767497777938843, "learning_rate": 5.1541466734288635e-06, "loss": 0.5296, "step": 26187 }, { "epoch": 0.67, "grad_norm": 3.4046988487243652, "learning_rate": 5.153420527642598e-06, "loss": 0.5392, "step": 26188 }, { "epoch": 0.67, "grad_norm": 1.3963899612426758, "learning_rate": 5.15269441525591e-06, "loss": 0.5266, "step": 26189 }, { "epoch": 0.67, "grad_norm": 1.5970145463943481, "learning_rate": 5.15196833627381e-06, "loss": 0.4181, "step": 26190 }, { "epoch": 0.67, "grad_norm": 1.513266682624817, "learning_rate": 5.151242290701297e-06, "loss": 0.5283, "step": 26191 }, { "epoch": 0.67, "grad_norm": 2.3556950092315674, "learning_rate": 5.150516278543373e-06, "loss": 0.7447, "step": 26192 }, { "epoch": 0.67, "grad_norm": 2.604006290435791, "learning_rate": 5.149790299805049e-06, "loss": 0.5752, "step": 26193 }, { "epoch": 0.67, "grad_norm": 0.889904260635376, "learning_rate": 5.149064354491322e-06, "loss": 0.3161, "step": 26194 }, { "epoch": 0.67, "grad_norm": 2.0680346488952637, "learning_rate": 5.1483384426071945e-06, "loss": 0.5556, "step": 26195 }, { "epoch": 0.67, "grad_norm": 1.5598036050796509, "learning_rate": 5.147612564157669e-06, "loss": 0.5979, "step": 26196 }, { "epoch": 0.67, "grad_norm": 1.7486741542816162, "learning_rate": 5.146886719147753e-06, "loss": 0.5297, "step": 26197 }, { "epoch": 0.67, "grad_norm": 4.118530750274658, "learning_rate": 5.1461609075824424e-06, "loss": 0.6374, "step": 26198 }, { "epoch": 0.67, "grad_norm": 4.9366021156311035, "learning_rate": 5.145435129466738e-06, "loss": 0.6919, "step": 26199 }, { "epoch": 0.67, "grad_norm": 1.2474547624588013, "learning_rate": 5.14470938480565e-06, "loss": 0.4256, "step": 26200 }, { "epoch": 0.67, "grad_norm": 2.017831325531006, "learning_rate": 5.143983673604174e-06, "loss": 0.5831, "step": 26201 }, { "epoch": 0.67, "grad_norm": 1.2892581224441528, "learning_rate": 5.14325799586731e-06, "loss": 0.5113, "step": 26202 }, { "epoch": 0.67, "grad_norm": 4.294195652008057, "learning_rate": 5.142532351600059e-06, "loss": 0.4385, "step": 26203 }, { "epoch": 0.67, "grad_norm": 1.0926570892333984, "learning_rate": 5.1418067408074265e-06, "loss": 0.6941, "step": 26204 }, { "epoch": 0.67, "grad_norm": 2.2388994693756104, "learning_rate": 5.141081163494409e-06, "loss": 0.5487, "step": 26205 }, { "epoch": 0.67, "grad_norm": 5.4443793296813965, "learning_rate": 5.140355619666004e-06, "loss": 0.4833, "step": 26206 }, { "epoch": 0.67, "grad_norm": 0.909682035446167, "learning_rate": 5.1396301093272196e-06, "loss": 0.5109, "step": 26207 }, { "epoch": 0.67, "grad_norm": 1.3638421297073364, "learning_rate": 5.13890463248305e-06, "loss": 0.4911, "step": 26208 }, { "epoch": 0.67, "grad_norm": 1.0320801734924316, "learning_rate": 5.138179189138496e-06, "loss": 0.3942, "step": 26209 }, { "epoch": 0.67, "grad_norm": 5.261155605316162, "learning_rate": 5.1374537792985525e-06, "loss": 0.6026, "step": 26210 }, { "epoch": 0.67, "grad_norm": 1.9405370950698853, "learning_rate": 5.136728402968226e-06, "loss": 0.522, "step": 26211 }, { "epoch": 0.67, "grad_norm": 4.659648418426514, "learning_rate": 5.136003060152513e-06, "loss": 0.4464, "step": 26212 }, { "epoch": 0.67, "grad_norm": 1.7315711975097656, "learning_rate": 5.135277750856407e-06, "loss": 0.6593, "step": 26213 }, { "epoch": 0.67, "grad_norm": 1.628516435623169, "learning_rate": 5.134552475084913e-06, "loss": 0.5033, "step": 26214 }, { "epoch": 0.67, "grad_norm": 1.9212145805358887, "learning_rate": 5.133827232843027e-06, "loss": 0.6125, "step": 26215 }, { "epoch": 0.67, "grad_norm": 6.472316265106201, "learning_rate": 5.1331020241357436e-06, "loss": 0.7961, "step": 26216 }, { "epoch": 0.67, "grad_norm": 1.3882100582122803, "learning_rate": 5.132376848968067e-06, "loss": 0.5581, "step": 26217 }, { "epoch": 0.67, "grad_norm": 7.877621650695801, "learning_rate": 5.131651707344991e-06, "loss": 0.7039, "step": 26218 }, { "epoch": 0.67, "grad_norm": 1.635860800743103, "learning_rate": 5.130926599271513e-06, "loss": 0.4865, "step": 26219 }, { "epoch": 0.67, "grad_norm": 1.5291624069213867, "learning_rate": 5.130201524752625e-06, "loss": 0.6384, "step": 26220 }, { "epoch": 0.67, "grad_norm": 1.43735933303833, "learning_rate": 5.129476483793333e-06, "loss": 0.5808, "step": 26221 }, { "epoch": 0.67, "grad_norm": 2.1673851013183594, "learning_rate": 5.128751476398629e-06, "loss": 0.5621, "step": 26222 }, { "epoch": 0.67, "grad_norm": 1.0622841119766235, "learning_rate": 5.128026502573506e-06, "loss": 0.6392, "step": 26223 }, { "epoch": 0.67, "grad_norm": 10.216403007507324, "learning_rate": 5.1273015623229664e-06, "loss": 0.6833, "step": 26224 }, { "epoch": 0.67, "grad_norm": 2.2061266899108887, "learning_rate": 5.126576655652004e-06, "loss": 0.4292, "step": 26225 }, { "epoch": 0.67, "grad_norm": 1.586949348449707, "learning_rate": 5.125851782565614e-06, "loss": 0.5591, "step": 26226 }, { "epoch": 0.67, "grad_norm": 2.0959737300872803, "learning_rate": 5.125126943068785e-06, "loss": 0.6499, "step": 26227 }, { "epoch": 0.67, "grad_norm": 1.0945868492126465, "learning_rate": 5.124402137166524e-06, "loss": 0.5915, "step": 26228 }, { "epoch": 0.67, "grad_norm": 2.8787128925323486, "learning_rate": 5.123677364863821e-06, "loss": 0.5957, "step": 26229 }, { "epoch": 0.67, "grad_norm": 3.179487466812134, "learning_rate": 5.122952626165665e-06, "loss": 0.6595, "step": 26230 }, { "epoch": 0.67, "grad_norm": 4.25600004196167, "learning_rate": 5.1222279210770595e-06, "loss": 0.4957, "step": 26231 }, { "epoch": 0.67, "grad_norm": 2.3356735706329346, "learning_rate": 5.1215032496029945e-06, "loss": 0.5886, "step": 26232 }, { "epoch": 0.67, "grad_norm": 1.2224160432815552, "learning_rate": 5.12077861174846e-06, "loss": 0.5167, "step": 26233 }, { "epoch": 0.67, "grad_norm": 4.430412769317627, "learning_rate": 5.1200540075184604e-06, "loss": 0.6099, "step": 26234 }, { "epoch": 0.67, "grad_norm": 1.5573756694793701, "learning_rate": 5.1193294369179795e-06, "loss": 0.5585, "step": 26235 }, { "epoch": 0.67, "grad_norm": 4.200872421264648, "learning_rate": 5.1186048999520156e-06, "loss": 0.5871, "step": 26236 }, { "epoch": 0.67, "grad_norm": 3.9979662895202637, "learning_rate": 5.117880396625554e-06, "loss": 0.5383, "step": 26237 }, { "epoch": 0.67, "grad_norm": 2.7476446628570557, "learning_rate": 5.1171559269436e-06, "loss": 0.638, "step": 26238 }, { "epoch": 0.67, "grad_norm": 1.2700453996658325, "learning_rate": 5.116431490911138e-06, "loss": 0.5708, "step": 26239 }, { "epoch": 0.67, "grad_norm": 1.1553088426589966, "learning_rate": 5.115707088533158e-06, "loss": 0.3473, "step": 26240 }, { "epoch": 0.67, "grad_norm": 7.628379821777344, "learning_rate": 5.114982719814661e-06, "loss": 0.8119, "step": 26241 }, { "epoch": 0.67, "grad_norm": 1.8239341974258423, "learning_rate": 5.114258384760633e-06, "loss": 0.6722, "step": 26242 }, { "epoch": 0.67, "grad_norm": 1.3732565641403198, "learning_rate": 5.113534083376067e-06, "loss": 0.6429, "step": 26243 }, { "epoch": 0.67, "grad_norm": 2.8628506660461426, "learning_rate": 5.112809815665951e-06, "loss": 0.5132, "step": 26244 }, { "epoch": 0.67, "grad_norm": 6.237765789031982, "learning_rate": 5.112085581635282e-06, "loss": 0.4508, "step": 26245 }, { "epoch": 0.67, "grad_norm": 1.058533787727356, "learning_rate": 5.1113613812890494e-06, "loss": 0.3232, "step": 26246 }, { "epoch": 0.67, "grad_norm": 3.748486042022705, "learning_rate": 5.1106372146322384e-06, "loss": 0.5022, "step": 26247 }, { "epoch": 0.67, "grad_norm": 1.4701114892959595, "learning_rate": 5.109913081669848e-06, "loss": 0.6166, "step": 26248 }, { "epoch": 0.67, "grad_norm": 9.325602531433105, "learning_rate": 5.109188982406864e-06, "loss": 0.7473, "step": 26249 }, { "epoch": 0.67, "grad_norm": 1.5165421962738037, "learning_rate": 5.108464916848277e-06, "loss": 0.3796, "step": 26250 }, { "epoch": 0.67, "grad_norm": 0.9880927205085754, "learning_rate": 5.107740884999073e-06, "loss": 0.518, "step": 26251 }, { "epoch": 0.67, "grad_norm": 2.4274275302886963, "learning_rate": 5.107016886864249e-06, "loss": 0.6902, "step": 26252 }, { "epoch": 0.67, "grad_norm": 5.689681053161621, "learning_rate": 5.10629292244879e-06, "loss": 0.4498, "step": 26253 }, { "epoch": 0.67, "grad_norm": 1.1110345125198364, "learning_rate": 5.1055689917576826e-06, "loss": 0.4517, "step": 26254 }, { "epoch": 0.67, "grad_norm": 1.260441541671753, "learning_rate": 5.104845094795923e-06, "loss": 0.4603, "step": 26255 }, { "epoch": 0.67, "grad_norm": 1.4389560222625732, "learning_rate": 5.1041212315684925e-06, "loss": 0.608, "step": 26256 }, { "epoch": 0.67, "grad_norm": 1.3753975629806519, "learning_rate": 5.103397402080382e-06, "loss": 0.5587, "step": 26257 }, { "epoch": 0.67, "grad_norm": 1.3242592811584473, "learning_rate": 5.102673606336583e-06, "loss": 0.4728, "step": 26258 }, { "epoch": 0.67, "grad_norm": 1.7653502225875854, "learning_rate": 5.101949844342081e-06, "loss": 0.6683, "step": 26259 }, { "epoch": 0.67, "grad_norm": 6.076631546020508, "learning_rate": 5.101226116101863e-06, "loss": 0.5935, "step": 26260 }, { "epoch": 0.67, "grad_norm": 2.1287078857421875, "learning_rate": 5.100502421620914e-06, "loss": 0.6493, "step": 26261 }, { "epoch": 0.67, "grad_norm": 1.2160106897354126, "learning_rate": 5.099778760904226e-06, "loss": 0.5043, "step": 26262 }, { "epoch": 0.67, "grad_norm": 1.2236177921295166, "learning_rate": 5.099055133956786e-06, "loss": 0.5633, "step": 26263 }, { "epoch": 0.67, "grad_norm": 1.7555954456329346, "learning_rate": 5.0983315407835745e-06, "loss": 0.4766, "step": 26264 }, { "epoch": 0.67, "grad_norm": 2.1647729873657227, "learning_rate": 5.097607981389587e-06, "loss": 0.4054, "step": 26265 }, { "epoch": 0.67, "grad_norm": 2.2260773181915283, "learning_rate": 5.096884455779805e-06, "loss": 0.6013, "step": 26266 }, { "epoch": 0.67, "grad_norm": 1.5154422521591187, "learning_rate": 5.0961609639592135e-06, "loss": 0.5505, "step": 26267 }, { "epoch": 0.67, "grad_norm": 1.3828825950622559, "learning_rate": 5.095437505932796e-06, "loss": 0.5136, "step": 26268 }, { "epoch": 0.67, "grad_norm": 4.731526851654053, "learning_rate": 5.094714081705547e-06, "loss": 0.4645, "step": 26269 }, { "epoch": 0.67, "grad_norm": 2.97727108001709, "learning_rate": 5.093990691282447e-06, "loss": 0.5722, "step": 26270 }, { "epoch": 0.67, "grad_norm": 10.095377922058105, "learning_rate": 5.093267334668477e-06, "loss": 0.5979, "step": 26271 }, { "epoch": 0.67, "grad_norm": 1.473568320274353, "learning_rate": 5.0925440118686285e-06, "loss": 0.5827, "step": 26272 }, { "epoch": 0.67, "grad_norm": 3.7470710277557373, "learning_rate": 5.091820722887885e-06, "loss": 0.6051, "step": 26273 }, { "epoch": 0.67, "grad_norm": 5.73393440246582, "learning_rate": 5.091097467731227e-06, "loss": 0.5388, "step": 26274 }, { "epoch": 0.67, "grad_norm": 1.0224858522415161, "learning_rate": 5.0903742464036395e-06, "loss": 0.3969, "step": 26275 }, { "epoch": 0.67, "grad_norm": 1.0979126691818237, "learning_rate": 5.089651058910112e-06, "loss": 0.5107, "step": 26276 }, { "epoch": 0.67, "grad_norm": 2.4374990463256836, "learning_rate": 5.088927905255623e-06, "loss": 0.513, "step": 26277 }, { "epoch": 0.67, "grad_norm": 1.1309443712234497, "learning_rate": 5.0882047854451545e-06, "loss": 0.4999, "step": 26278 }, { "epoch": 0.67, "grad_norm": 2.1666603088378906, "learning_rate": 5.087481699483697e-06, "loss": 0.246, "step": 26279 }, { "epoch": 0.67, "grad_norm": 3.1118884086608887, "learning_rate": 5.086758647376233e-06, "loss": 0.5489, "step": 26280 }, { "epoch": 0.67, "grad_norm": 1.307969570159912, "learning_rate": 5.086035629127732e-06, "loss": 0.4069, "step": 26281 }, { "epoch": 0.67, "grad_norm": 1.5253602266311646, "learning_rate": 5.085312644743193e-06, "loss": 0.5654, "step": 26282 }, { "epoch": 0.67, "grad_norm": 1.6050200462341309, "learning_rate": 5.084589694227591e-06, "loss": 0.6208, "step": 26283 }, { "epoch": 0.67, "grad_norm": 1.3651872873306274, "learning_rate": 5.083866777585903e-06, "loss": 0.5565, "step": 26284 }, { "epoch": 0.67, "grad_norm": 7.049704551696777, "learning_rate": 5.083143894823122e-06, "loss": 0.5842, "step": 26285 }, { "epoch": 0.67, "grad_norm": 1.4312465190887451, "learning_rate": 5.082421045944222e-06, "loss": 0.5402, "step": 26286 }, { "epoch": 0.67, "grad_norm": 1.475203514099121, "learning_rate": 5.081698230954185e-06, "loss": 0.5499, "step": 26287 }, { "epoch": 0.67, "grad_norm": 1.953827142715454, "learning_rate": 5.0809754498579964e-06, "loss": 0.559, "step": 26288 }, { "epoch": 0.67, "grad_norm": 1.9668350219726562, "learning_rate": 5.080252702660635e-06, "loss": 0.6376, "step": 26289 }, { "epoch": 0.67, "grad_norm": 2.0512492656707764, "learning_rate": 5.0795299893670826e-06, "loss": 0.5038, "step": 26290 }, { "epoch": 0.67, "grad_norm": 1.28434157371521, "learning_rate": 5.078807309982312e-06, "loss": 0.4719, "step": 26291 }, { "epoch": 0.67, "grad_norm": 1.639634609222412, "learning_rate": 5.078084664511315e-06, "loss": 0.6474, "step": 26292 }, { "epoch": 0.67, "grad_norm": 2.1727590560913086, "learning_rate": 5.077362052959065e-06, "loss": 0.5749, "step": 26293 }, { "epoch": 0.67, "grad_norm": 1.261466383934021, "learning_rate": 5.07663947533054e-06, "loss": 0.5188, "step": 26294 }, { "epoch": 0.67, "grad_norm": 1.6319841146469116, "learning_rate": 5.075916931630727e-06, "loss": 0.519, "step": 26295 }, { "epoch": 0.67, "grad_norm": 0.9687179923057556, "learning_rate": 5.0751944218646e-06, "loss": 0.5236, "step": 26296 }, { "epoch": 0.67, "grad_norm": 2.578697919845581, "learning_rate": 5.074471946037138e-06, "loss": 0.5354, "step": 26297 }, { "epoch": 0.67, "grad_norm": 1.17581307888031, "learning_rate": 5.073749504153319e-06, "loss": 0.4184, "step": 26298 }, { "epoch": 0.67, "grad_norm": 1.3942687511444092, "learning_rate": 5.073027096218126e-06, "loss": 0.6061, "step": 26299 }, { "epoch": 0.67, "grad_norm": 1.9188859462738037, "learning_rate": 5.072304722236536e-06, "loss": 0.4931, "step": 26300 }, { "epoch": 0.67, "grad_norm": 1.6026982069015503, "learning_rate": 5.07158238221352e-06, "loss": 0.6068, "step": 26301 }, { "epoch": 0.67, "grad_norm": 1.6024280786514282, "learning_rate": 5.070860076154068e-06, "loss": 0.5932, "step": 26302 }, { "epoch": 0.67, "grad_norm": 8.5646333694458, "learning_rate": 5.070137804063151e-06, "loss": 0.5115, "step": 26303 }, { "epoch": 0.67, "grad_norm": 1.5257068872451782, "learning_rate": 5.069415565945747e-06, "loss": 0.3515, "step": 26304 }, { "epoch": 0.67, "grad_norm": 1.3398345708847046, "learning_rate": 5.068693361806829e-06, "loss": 0.5953, "step": 26305 }, { "epoch": 0.67, "grad_norm": 1.2673782110214233, "learning_rate": 5.067971191651382e-06, "loss": 0.6462, "step": 26306 }, { "epoch": 0.67, "grad_norm": 2.6007936000823975, "learning_rate": 5.067249055484378e-06, "loss": 0.5241, "step": 26307 }, { "epoch": 0.67, "grad_norm": 1.3943750858306885, "learning_rate": 5.066526953310793e-06, "loss": 0.4818, "step": 26308 }, { "epoch": 0.67, "grad_norm": 2.700265645980835, "learning_rate": 5.065804885135605e-06, "loss": 0.7294, "step": 26309 }, { "epoch": 0.67, "grad_norm": 2.1210720539093018, "learning_rate": 5.065082850963794e-06, "loss": 0.5492, "step": 26310 }, { "epoch": 0.67, "grad_norm": 2.820706367492676, "learning_rate": 5.064360850800325e-06, "loss": 0.451, "step": 26311 }, { "epoch": 0.67, "grad_norm": 1.2157683372497559, "learning_rate": 5.063638884650185e-06, "loss": 0.3493, "step": 26312 }, { "epoch": 0.67, "grad_norm": 1.4508956670761108, "learning_rate": 5.062916952518344e-06, "loss": 0.6576, "step": 26313 }, { "epoch": 0.67, "grad_norm": 1.7200249433517456, "learning_rate": 5.062195054409777e-06, "loss": 0.6839, "step": 26314 }, { "epoch": 0.67, "grad_norm": 1.4401682615280151, "learning_rate": 5.061473190329458e-06, "loss": 0.4397, "step": 26315 }, { "epoch": 0.67, "grad_norm": 2.228215456008911, "learning_rate": 5.060751360282364e-06, "loss": 0.4775, "step": 26316 }, { "epoch": 0.67, "grad_norm": 1.6899542808532715, "learning_rate": 5.06002956427347e-06, "loss": 0.614, "step": 26317 }, { "epoch": 0.67, "grad_norm": 1.7202634811401367, "learning_rate": 5.059307802307745e-06, "loss": 0.6957, "step": 26318 }, { "epoch": 0.67, "grad_norm": 1.3598682880401611, "learning_rate": 5.058586074390171e-06, "loss": 0.5055, "step": 26319 }, { "epoch": 0.67, "grad_norm": 2.3872642517089844, "learning_rate": 5.057864380525715e-06, "loss": 0.427, "step": 26320 }, { "epoch": 0.67, "grad_norm": 1.0957744121551514, "learning_rate": 5.057142720719355e-06, "loss": 0.3959, "step": 26321 }, { "epoch": 0.67, "grad_norm": 1.3632644414901733, "learning_rate": 5.0564210949760565e-06, "loss": 0.5668, "step": 26322 }, { "epoch": 0.67, "grad_norm": 3.627319574356079, "learning_rate": 5.055699503300802e-06, "loss": 0.6115, "step": 26323 }, { "epoch": 0.67, "grad_norm": 0.9390510320663452, "learning_rate": 5.054977945698561e-06, "loss": 0.3992, "step": 26324 }, { "epoch": 0.67, "grad_norm": 2.0335400104522705, "learning_rate": 5.054256422174302e-06, "loss": 0.5879, "step": 26325 }, { "epoch": 0.67, "grad_norm": 1.7113454341888428, "learning_rate": 5.053534932733002e-06, "loss": 0.5442, "step": 26326 }, { "epoch": 0.67, "grad_norm": 1.6432559490203857, "learning_rate": 5.052813477379633e-06, "loss": 0.5756, "step": 26327 }, { "epoch": 0.67, "grad_norm": 0.9446189403533936, "learning_rate": 5.0520920561191626e-06, "loss": 0.466, "step": 26328 }, { "epoch": 0.67, "grad_norm": 7.2265448570251465, "learning_rate": 5.051370668956568e-06, "loss": 0.6108, "step": 26329 }, { "epoch": 0.67, "grad_norm": 1.424281120300293, "learning_rate": 5.050649315896816e-06, "loss": 0.4861, "step": 26330 }, { "epoch": 0.67, "grad_norm": 1.057361125946045, "learning_rate": 5.049927996944882e-06, "loss": 0.5744, "step": 26331 }, { "epoch": 0.67, "grad_norm": 2.2608225345611572, "learning_rate": 5.04920671210573e-06, "loss": 0.5644, "step": 26332 }, { "epoch": 0.67, "grad_norm": 1.0439518690109253, "learning_rate": 5.048485461384338e-06, "loss": 0.5169, "step": 26333 }, { "epoch": 0.67, "grad_norm": 3.1080679893493652, "learning_rate": 5.0477642447856735e-06, "loss": 0.411, "step": 26334 }, { "epoch": 0.67, "grad_norm": 1.1544181108474731, "learning_rate": 5.047043062314702e-06, "loss": 0.5304, "step": 26335 }, { "epoch": 0.68, "grad_norm": 1.5023654699325562, "learning_rate": 5.046321913976404e-06, "loss": 0.6256, "step": 26336 }, { "epoch": 0.68, "grad_norm": 1.9889956712722778, "learning_rate": 5.04560079977574e-06, "loss": 0.6021, "step": 26337 }, { "epoch": 0.68, "grad_norm": 6.722550868988037, "learning_rate": 5.0448797197176855e-06, "loss": 0.4222, "step": 26338 }, { "epoch": 0.68, "grad_norm": 1.3962574005126953, "learning_rate": 5.044158673807202e-06, "loss": 0.5252, "step": 26339 }, { "epoch": 0.68, "grad_norm": 2.4096343517303467, "learning_rate": 5.043437662049267e-06, "loss": 0.619, "step": 26340 }, { "epoch": 0.68, "grad_norm": 6.942183017730713, "learning_rate": 5.042716684448846e-06, "loss": 0.5649, "step": 26341 }, { "epoch": 0.68, "grad_norm": 1.3903634548187256, "learning_rate": 5.041995741010903e-06, "loss": 0.5822, "step": 26342 }, { "epoch": 0.68, "grad_norm": 4.774684906005859, "learning_rate": 5.0412748317404146e-06, "loss": 0.5795, "step": 26343 }, { "epoch": 0.68, "grad_norm": 5.33842134475708, "learning_rate": 5.040553956642345e-06, "loss": 0.6512, "step": 26344 }, { "epoch": 0.68, "grad_norm": 2.0266165733337402, "learning_rate": 5.039833115721661e-06, "loss": 0.5526, "step": 26345 }, { "epoch": 0.68, "grad_norm": 3.142223358154297, "learning_rate": 5.039112308983327e-06, "loss": 0.7433, "step": 26346 }, { "epoch": 0.68, "grad_norm": 1.564170002937317, "learning_rate": 5.0383915364323185e-06, "loss": 0.5831, "step": 26347 }, { "epoch": 0.68, "grad_norm": 0.8279086351394653, "learning_rate": 5.037670798073597e-06, "loss": 0.5631, "step": 26348 }, { "epoch": 0.68, "grad_norm": 1.3481380939483643, "learning_rate": 5.036950093912129e-06, "loss": 0.4782, "step": 26349 }, { "epoch": 0.68, "grad_norm": 1.3247517347335815, "learning_rate": 5.036229423952886e-06, "loss": 0.5168, "step": 26350 }, { "epoch": 0.68, "grad_norm": 2.306778907775879, "learning_rate": 5.03550878820083e-06, "loss": 0.6504, "step": 26351 }, { "epoch": 0.68, "grad_norm": 1.2326053380966187, "learning_rate": 5.034788186660926e-06, "loss": 0.533, "step": 26352 }, { "epoch": 0.68, "grad_norm": 1.9118626117706299, "learning_rate": 5.034067619338146e-06, "loss": 0.4738, "step": 26353 }, { "epoch": 0.68, "grad_norm": 1.816157341003418, "learning_rate": 5.033347086237451e-06, "loss": 0.4675, "step": 26354 }, { "epoch": 0.68, "grad_norm": 2.1949446201324463, "learning_rate": 5.032626587363809e-06, "loss": 0.3799, "step": 26355 }, { "epoch": 0.68, "grad_norm": 1.2385300397872925, "learning_rate": 5.031906122722179e-06, "loss": 0.5313, "step": 26356 }, { "epoch": 0.68, "grad_norm": 1.2429338693618774, "learning_rate": 5.031185692317535e-06, "loss": 0.2817, "step": 26357 }, { "epoch": 0.68, "grad_norm": 1.326181411743164, "learning_rate": 5.0304652961548375e-06, "loss": 0.5217, "step": 26358 }, { "epoch": 0.68, "grad_norm": 1.8247555494308472, "learning_rate": 5.029744934239047e-06, "loss": 0.6109, "step": 26359 }, { "epoch": 0.68, "grad_norm": 1.252690315246582, "learning_rate": 5.029024606575135e-06, "loss": 0.4909, "step": 26360 }, { "epoch": 0.68, "grad_norm": 1.627175211906433, "learning_rate": 5.028304313168063e-06, "loss": 0.6205, "step": 26361 }, { "epoch": 0.68, "grad_norm": 1.379732370376587, "learning_rate": 5.0275840540227936e-06, "loss": 0.5391, "step": 26362 }, { "epoch": 0.68, "grad_norm": 1.7264000177383423, "learning_rate": 5.026863829144289e-06, "loss": 0.3457, "step": 26363 }, { "epoch": 0.68, "grad_norm": 1.0876818895339966, "learning_rate": 5.026143638537516e-06, "loss": 0.4946, "step": 26364 }, { "epoch": 0.68, "grad_norm": 1.951986312866211, "learning_rate": 5.025423482207436e-06, "loss": 0.6623, "step": 26365 }, { "epoch": 0.68, "grad_norm": 5.1516032218933105, "learning_rate": 5.024703360159011e-06, "loss": 0.5155, "step": 26366 }, { "epoch": 0.68, "grad_norm": 1.604393482208252, "learning_rate": 5.023983272397206e-06, "loss": 0.5673, "step": 26367 }, { "epoch": 0.68, "grad_norm": 4.395626544952393, "learning_rate": 5.0232632189269836e-06, "loss": 0.6078, "step": 26368 }, { "epoch": 0.68, "grad_norm": 3.6643800735473633, "learning_rate": 5.022543199753299e-06, "loss": 0.5308, "step": 26369 }, { "epoch": 0.68, "grad_norm": 1.5625793933868408, "learning_rate": 5.021823214881124e-06, "loss": 0.3654, "step": 26370 }, { "epoch": 0.68, "grad_norm": 1.253600835800171, "learning_rate": 5.0211032643154165e-06, "loss": 0.5124, "step": 26371 }, { "epoch": 0.68, "grad_norm": 1.45143723487854, "learning_rate": 5.020383348061137e-06, "loss": 0.4957, "step": 26372 }, { "epoch": 0.68, "grad_norm": 1.5498945713043213, "learning_rate": 5.019663466123241e-06, "loss": 0.5623, "step": 26373 }, { "epoch": 0.68, "grad_norm": 1.9603208303451538, "learning_rate": 5.018943618506702e-06, "loss": 0.6382, "step": 26374 }, { "epoch": 0.68, "grad_norm": 1.6651791334152222, "learning_rate": 5.018223805216473e-06, "loss": 0.5188, "step": 26375 }, { "epoch": 0.68, "grad_norm": 5.306621551513672, "learning_rate": 5.017504026257512e-06, "loss": 0.7422, "step": 26376 }, { "epoch": 0.68, "grad_norm": 2.1540706157684326, "learning_rate": 5.016784281634788e-06, "loss": 0.4712, "step": 26377 }, { "epoch": 0.68, "grad_norm": 16.111257553100586, "learning_rate": 5.016064571353254e-06, "loss": 0.6159, "step": 26378 }, { "epoch": 0.68, "grad_norm": 1.7123595476150513, "learning_rate": 5.015344895417873e-06, "loss": 0.5082, "step": 26379 }, { "epoch": 0.68, "grad_norm": 1.619256615638733, "learning_rate": 5.014625253833598e-06, "loss": 0.5215, "step": 26380 }, { "epoch": 0.68, "grad_norm": 1.400567889213562, "learning_rate": 5.013905646605399e-06, "loss": 0.568, "step": 26381 }, { "epoch": 0.68, "grad_norm": 0.8646578192710876, "learning_rate": 5.01318607373823e-06, "loss": 0.4077, "step": 26382 }, { "epoch": 0.68, "grad_norm": 3.905721664428711, "learning_rate": 5.012466535237044e-06, "loss": 0.6194, "step": 26383 }, { "epoch": 0.68, "grad_norm": 1.0645619630813599, "learning_rate": 5.011747031106811e-06, "loss": 0.4631, "step": 26384 }, { "epoch": 0.68, "grad_norm": 7.543227195739746, "learning_rate": 5.011027561352482e-06, "loss": 0.7748, "step": 26385 }, { "epoch": 0.68, "grad_norm": 5.905116558074951, "learning_rate": 5.0103081259790175e-06, "loss": 0.5629, "step": 26386 }, { "epoch": 0.68, "grad_norm": 1.4940394163131714, "learning_rate": 5.00958872499137e-06, "loss": 0.5221, "step": 26387 }, { "epoch": 0.68, "grad_norm": 13.278313636779785, "learning_rate": 5.008869358394506e-06, "loss": 0.4705, "step": 26388 }, { "epoch": 0.68, "grad_norm": 1.2032828330993652, "learning_rate": 5.008150026193379e-06, "loss": 0.4593, "step": 26389 }, { "epoch": 0.68, "grad_norm": 1.140006422996521, "learning_rate": 5.007430728392941e-06, "loss": 0.5118, "step": 26390 }, { "epoch": 0.68, "grad_norm": 1.4449553489685059, "learning_rate": 5.006711464998159e-06, "loss": 0.3691, "step": 26391 }, { "epoch": 0.68, "grad_norm": 1.447589635848999, "learning_rate": 5.005992236013983e-06, "loss": 0.457, "step": 26392 }, { "epoch": 0.68, "grad_norm": 1.6784979104995728, "learning_rate": 5.0052730414453675e-06, "loss": 0.5471, "step": 26393 }, { "epoch": 0.68, "grad_norm": 2.0928502082824707, "learning_rate": 5.004553881297276e-06, "loss": 0.6859, "step": 26394 }, { "epoch": 0.68, "grad_norm": 1.9651052951812744, "learning_rate": 5.003834755574661e-06, "loss": 0.5011, "step": 26395 }, { "epoch": 0.68, "grad_norm": 1.069908857345581, "learning_rate": 5.003115664282478e-06, "loss": 0.4892, "step": 26396 }, { "epoch": 0.68, "grad_norm": 1.2058261632919312, "learning_rate": 5.002396607425679e-06, "loss": 0.5869, "step": 26397 }, { "epoch": 0.68, "grad_norm": 1.1358962059020996, "learning_rate": 5.001677585009227e-06, "loss": 0.5445, "step": 26398 }, { "epoch": 0.68, "grad_norm": 1.007095456123352, "learning_rate": 5.000958597038072e-06, "loss": 0.4493, "step": 26399 }, { "epoch": 0.68, "grad_norm": 1.6113566160202026, "learning_rate": 5.000239643517166e-06, "loss": 0.5113, "step": 26400 }, { "epoch": 0.68, "grad_norm": 2.6780195236206055, "learning_rate": 4.999520724451471e-06, "loss": 0.632, "step": 26401 }, { "epoch": 0.68, "grad_norm": 1.4576748609542847, "learning_rate": 4.998801839845937e-06, "loss": 0.6136, "step": 26402 }, { "epoch": 0.68, "grad_norm": 1.2701900005340576, "learning_rate": 4.99808298970552e-06, "loss": 0.5799, "step": 26403 }, { "epoch": 0.68, "grad_norm": 3.1100094318389893, "learning_rate": 4.997364174035168e-06, "loss": 0.6087, "step": 26404 }, { "epoch": 0.68, "grad_norm": 4.363597869873047, "learning_rate": 4.9966453928398425e-06, "loss": 0.6709, "step": 26405 }, { "epoch": 0.68, "grad_norm": 1.5231313705444336, "learning_rate": 4.995926646124494e-06, "loss": 0.4776, "step": 26406 }, { "epoch": 0.68, "grad_norm": 1.3189579248428345, "learning_rate": 4.99520793389407e-06, "loss": 0.5996, "step": 26407 }, { "epoch": 0.68, "grad_norm": 1.3576247692108154, "learning_rate": 4.994489256153535e-06, "loss": 0.4754, "step": 26408 }, { "epoch": 0.68, "grad_norm": 2.006018877029419, "learning_rate": 4.993770612907833e-06, "loss": 0.49, "step": 26409 }, { "epoch": 0.68, "grad_norm": 1.8908886909484863, "learning_rate": 4.99305200416192e-06, "loss": 0.6283, "step": 26410 }, { "epoch": 0.68, "grad_norm": 1.109973430633545, "learning_rate": 4.992333429920744e-06, "loss": 0.4517, "step": 26411 }, { "epoch": 0.68, "grad_norm": 4.037448883056641, "learning_rate": 4.991614890189261e-06, "loss": 0.6719, "step": 26412 }, { "epoch": 0.68, "grad_norm": 1.33441960811615, "learning_rate": 4.990896384972424e-06, "loss": 0.5793, "step": 26413 }, { "epoch": 0.68, "grad_norm": 2.240176200866699, "learning_rate": 4.9901779142751775e-06, "loss": 0.4453, "step": 26414 }, { "epoch": 0.68, "grad_norm": 2.512260913848877, "learning_rate": 4.989459478102481e-06, "loss": 0.5423, "step": 26415 }, { "epoch": 0.68, "grad_norm": 5.121477127075195, "learning_rate": 4.988741076459281e-06, "loss": 0.6105, "step": 26416 }, { "epoch": 0.68, "grad_norm": 1.0626157522201538, "learning_rate": 4.988022709350527e-06, "loss": 0.6195, "step": 26417 }, { "epoch": 0.68, "grad_norm": 1.42506742477417, "learning_rate": 4.987304376781175e-06, "loss": 0.5484, "step": 26418 }, { "epoch": 0.68, "grad_norm": 4.270167350769043, "learning_rate": 4.986586078756171e-06, "loss": 0.6175, "step": 26419 }, { "epoch": 0.68, "grad_norm": 1.145514726638794, "learning_rate": 4.985867815280466e-06, "loss": 0.4941, "step": 26420 }, { "epoch": 0.68, "grad_norm": 3.7286643981933594, "learning_rate": 4.985149586359007e-06, "loss": 0.4687, "step": 26421 }, { "epoch": 0.68, "grad_norm": 1.0871586799621582, "learning_rate": 4.98443139199675e-06, "loss": 0.5076, "step": 26422 }, { "epoch": 0.68, "grad_norm": 1.397792935371399, "learning_rate": 4.98371323219864e-06, "loss": 0.583, "step": 26423 }, { "epoch": 0.68, "grad_norm": 1.072024941444397, "learning_rate": 4.982995106969625e-06, "loss": 0.4477, "step": 26424 }, { "epoch": 0.68, "grad_norm": 1.7415333986282349, "learning_rate": 4.982277016314657e-06, "loss": 0.4548, "step": 26425 }, { "epoch": 0.68, "grad_norm": 0.9778587222099304, "learning_rate": 4.981558960238685e-06, "loss": 0.6144, "step": 26426 }, { "epoch": 0.68, "grad_norm": 1.121297836303711, "learning_rate": 4.980840938746655e-06, "loss": 0.4731, "step": 26427 }, { "epoch": 0.68, "grad_norm": 6.427231311798096, "learning_rate": 4.980122951843513e-06, "loss": 0.5031, "step": 26428 }, { "epoch": 0.68, "grad_norm": 1.437977910041809, "learning_rate": 4.979404999534213e-06, "loss": 0.4898, "step": 26429 }, { "epoch": 0.68, "grad_norm": 1.0801236629486084, "learning_rate": 4.9786870818237e-06, "loss": 0.5179, "step": 26430 }, { "epoch": 0.68, "grad_norm": 1.6858243942260742, "learning_rate": 4.977969198716917e-06, "loss": 0.5727, "step": 26431 }, { "epoch": 0.68, "grad_norm": 1.3359843492507935, "learning_rate": 4.977251350218818e-06, "loss": 0.5825, "step": 26432 }, { "epoch": 0.68, "grad_norm": 1.514243483543396, "learning_rate": 4.976533536334347e-06, "loss": 0.5775, "step": 26433 }, { "epoch": 0.68, "grad_norm": 7.392027854919434, "learning_rate": 4.975815757068448e-06, "loss": 0.6494, "step": 26434 }, { "epoch": 0.68, "grad_norm": 1.2357654571533203, "learning_rate": 4.975098012426074e-06, "loss": 0.6085, "step": 26435 }, { "epoch": 0.68, "grad_norm": 1.3906550407409668, "learning_rate": 4.974380302412167e-06, "loss": 0.5629, "step": 26436 }, { "epoch": 0.68, "grad_norm": 1.2531497478485107, "learning_rate": 4.973662627031674e-06, "loss": 0.4655, "step": 26437 }, { "epoch": 0.68, "grad_norm": 1.390541672706604, "learning_rate": 4.972944986289536e-06, "loss": 0.5641, "step": 26438 }, { "epoch": 0.68, "grad_norm": 1.3201526403427124, "learning_rate": 4.972227380190707e-06, "loss": 0.2455, "step": 26439 }, { "epoch": 0.68, "grad_norm": 1.175523281097412, "learning_rate": 4.9715098087401295e-06, "loss": 0.432, "step": 26440 }, { "epoch": 0.68, "grad_norm": 1.62343430519104, "learning_rate": 4.970792271942741e-06, "loss": 0.5837, "step": 26441 }, { "epoch": 0.68, "grad_norm": 4.846800804138184, "learning_rate": 4.9700747698034975e-06, "loss": 0.8663, "step": 26442 }, { "epoch": 0.68, "grad_norm": 1.7975000143051147, "learning_rate": 4.969357302327339e-06, "loss": 0.6175, "step": 26443 }, { "epoch": 0.68, "grad_norm": 1.3262944221496582, "learning_rate": 4.968639869519208e-06, "loss": 0.5243, "step": 26444 }, { "epoch": 0.68, "grad_norm": 1.565136432647705, "learning_rate": 4.967922471384048e-06, "loss": 0.4804, "step": 26445 }, { "epoch": 0.68, "grad_norm": 10.86852741241455, "learning_rate": 4.967205107926808e-06, "loss": 0.729, "step": 26446 }, { "epoch": 0.68, "grad_norm": 1.5378464460372925, "learning_rate": 4.966487779152428e-06, "loss": 0.6461, "step": 26447 }, { "epoch": 0.68, "grad_norm": 1.499878168106079, "learning_rate": 4.965770485065849e-06, "loss": 0.6123, "step": 26448 }, { "epoch": 0.68, "grad_norm": 6.744762897491455, "learning_rate": 4.96505322567202e-06, "loss": 0.5579, "step": 26449 }, { "epoch": 0.68, "grad_norm": 2.933887481689453, "learning_rate": 4.96433600097588e-06, "loss": 0.6654, "step": 26450 }, { "epoch": 0.68, "grad_norm": 1.333889126777649, "learning_rate": 4.963618810982373e-06, "loss": 0.4564, "step": 26451 }, { "epoch": 0.68, "grad_norm": 2.5913665294647217, "learning_rate": 4.962901655696439e-06, "loss": 0.7106, "step": 26452 }, { "epoch": 0.68, "grad_norm": 12.653847694396973, "learning_rate": 4.962184535123025e-06, "loss": 0.6924, "step": 26453 }, { "epoch": 0.68, "grad_norm": 0.9921767711639404, "learning_rate": 4.96146744926707e-06, "loss": 0.3731, "step": 26454 }, { "epoch": 0.68, "grad_norm": 2.6613824367523193, "learning_rate": 4.960750398133512e-06, "loss": 0.5526, "step": 26455 }, { "epoch": 0.68, "grad_norm": 8.605049133300781, "learning_rate": 4.9600333817272996e-06, "loss": 0.6341, "step": 26456 }, { "epoch": 0.68, "grad_norm": 1.4511847496032715, "learning_rate": 4.959316400053371e-06, "loss": 0.5613, "step": 26457 }, { "epoch": 0.68, "grad_norm": 2.4487051963806152, "learning_rate": 4.9585994531166626e-06, "loss": 0.4984, "step": 26458 }, { "epoch": 0.68, "grad_norm": 1.3696966171264648, "learning_rate": 4.957882540922124e-06, "loss": 0.5704, "step": 26459 }, { "epoch": 0.68, "grad_norm": 1.998366117477417, "learning_rate": 4.957165663474691e-06, "loss": 0.5054, "step": 26460 }, { "epoch": 0.68, "grad_norm": 0.9800342917442322, "learning_rate": 4.956448820779305e-06, "loss": 0.5305, "step": 26461 }, { "epoch": 0.68, "grad_norm": 1.298753261566162, "learning_rate": 4.955732012840899e-06, "loss": 0.676, "step": 26462 }, { "epoch": 0.68, "grad_norm": 1.5734786987304688, "learning_rate": 4.955015239664424e-06, "loss": 0.5223, "step": 26463 }, { "epoch": 0.68, "grad_norm": 1.5700328350067139, "learning_rate": 4.954298501254815e-06, "loss": 0.5772, "step": 26464 }, { "epoch": 0.68, "grad_norm": 12.347833633422852, "learning_rate": 4.953581797617006e-06, "loss": 0.5272, "step": 26465 }, { "epoch": 0.68, "grad_norm": 7.610454082489014, "learning_rate": 4.952865128755945e-06, "loss": 0.4975, "step": 26466 }, { "epoch": 0.68, "grad_norm": 1.9962437152862549, "learning_rate": 4.952148494676566e-06, "loss": 0.6088, "step": 26467 }, { "epoch": 0.68, "grad_norm": 1.4710265398025513, "learning_rate": 4.951431895383809e-06, "loss": 0.6158, "step": 26468 }, { "epoch": 0.68, "grad_norm": 2.0121123790740967, "learning_rate": 4.950715330882607e-06, "loss": 0.5879, "step": 26469 }, { "epoch": 0.68, "grad_norm": 11.864099502563477, "learning_rate": 4.949998801177908e-06, "loss": 0.579, "step": 26470 }, { "epoch": 0.68, "grad_norm": 1.6472539901733398, "learning_rate": 4.949282306274643e-06, "loss": 0.5307, "step": 26471 }, { "epoch": 0.68, "grad_norm": 1.6305243968963623, "learning_rate": 4.948565846177748e-06, "loss": 0.4938, "step": 26472 }, { "epoch": 0.68, "grad_norm": 1.1818767786026, "learning_rate": 4.947849420892167e-06, "loss": 0.5603, "step": 26473 }, { "epoch": 0.68, "grad_norm": 2.391007423400879, "learning_rate": 4.947133030422834e-06, "loss": 0.5487, "step": 26474 }, { "epoch": 0.68, "grad_norm": 2.9475274085998535, "learning_rate": 4.946416674774681e-06, "loss": 0.5188, "step": 26475 }, { "epoch": 0.68, "grad_norm": 2.1927220821380615, "learning_rate": 4.945700353952655e-06, "loss": 0.5697, "step": 26476 }, { "epoch": 0.68, "grad_norm": 1.2567665576934814, "learning_rate": 4.944984067961685e-06, "loss": 0.5285, "step": 26477 }, { "epoch": 0.68, "grad_norm": 5.123916149139404, "learning_rate": 4.9442678168067095e-06, "loss": 0.6191, "step": 26478 }, { "epoch": 0.68, "grad_norm": 1.5947234630584717, "learning_rate": 4.94355160049266e-06, "loss": 0.713, "step": 26479 }, { "epoch": 0.68, "grad_norm": 5.721795558929443, "learning_rate": 4.9428354190244794e-06, "loss": 0.7327, "step": 26480 }, { "epoch": 0.68, "grad_norm": 1.3897820711135864, "learning_rate": 4.942119272407101e-06, "loss": 0.3857, "step": 26481 }, { "epoch": 0.68, "grad_norm": 10.976655006408691, "learning_rate": 4.941403160645455e-06, "loss": 0.5211, "step": 26482 }, { "epoch": 0.68, "grad_norm": 1.7027229070663452, "learning_rate": 4.940687083744483e-06, "loss": 0.4359, "step": 26483 }, { "epoch": 0.68, "grad_norm": 1.693244218826294, "learning_rate": 4.939971041709119e-06, "loss": 0.5497, "step": 26484 }, { "epoch": 0.68, "grad_norm": 1.7452681064605713, "learning_rate": 4.939255034544294e-06, "loss": 0.4378, "step": 26485 }, { "epoch": 0.68, "grad_norm": 1.5612455606460571, "learning_rate": 4.93853906225494e-06, "loss": 0.5419, "step": 26486 }, { "epoch": 0.68, "grad_norm": 1.7433685064315796, "learning_rate": 4.937823124845999e-06, "loss": 0.5356, "step": 26487 }, { "epoch": 0.68, "grad_norm": 3.234893560409546, "learning_rate": 4.9371072223224e-06, "loss": 0.6447, "step": 26488 }, { "epoch": 0.68, "grad_norm": 4.371748447418213, "learning_rate": 4.936391354689074e-06, "loss": 0.5935, "step": 26489 }, { "epoch": 0.68, "grad_norm": 1.9522600173950195, "learning_rate": 4.935675521950961e-06, "loss": 0.545, "step": 26490 }, { "epoch": 0.68, "grad_norm": 12.89289665222168, "learning_rate": 4.934959724112991e-06, "loss": 0.5798, "step": 26491 }, { "epoch": 0.68, "grad_norm": 1.333182454109192, "learning_rate": 4.934243961180096e-06, "loss": 0.4467, "step": 26492 }, { "epoch": 0.68, "grad_norm": 2.0585620403289795, "learning_rate": 4.9335282331572065e-06, "loss": 0.6311, "step": 26493 }, { "epoch": 0.68, "grad_norm": 1.6998372077941895, "learning_rate": 4.93281254004926e-06, "loss": 0.4028, "step": 26494 }, { "epoch": 0.68, "grad_norm": 1.6467063426971436, "learning_rate": 4.932096881861186e-06, "loss": 0.4876, "step": 26495 }, { "epoch": 0.68, "grad_norm": 7.680053234100342, "learning_rate": 4.931381258597912e-06, "loss": 0.4666, "step": 26496 }, { "epoch": 0.68, "grad_norm": 1.9257780313491821, "learning_rate": 4.930665670264379e-06, "loss": 0.6397, "step": 26497 }, { "epoch": 0.68, "grad_norm": 7.094962120056152, "learning_rate": 4.929950116865513e-06, "loss": 0.7215, "step": 26498 }, { "epoch": 0.68, "grad_norm": 3.3495466709136963, "learning_rate": 4.929234598406241e-06, "loss": 0.4231, "step": 26499 }, { "epoch": 0.68, "grad_norm": 2.9486937522888184, "learning_rate": 4.928519114891502e-06, "loss": 0.5076, "step": 26500 }, { "epoch": 0.68, "grad_norm": 1.113466739654541, "learning_rate": 4.927803666326224e-06, "loss": 0.5253, "step": 26501 }, { "epoch": 0.68, "grad_norm": 1.2998294830322266, "learning_rate": 4.927088252715336e-06, "loss": 0.4572, "step": 26502 }, { "epoch": 0.68, "grad_norm": 1.1111412048339844, "learning_rate": 4.926372874063765e-06, "loss": 0.6079, "step": 26503 }, { "epoch": 0.68, "grad_norm": 1.411639928817749, "learning_rate": 4.9256575303764486e-06, "loss": 0.5849, "step": 26504 }, { "epoch": 0.68, "grad_norm": 1.3624011278152466, "learning_rate": 4.924942221658312e-06, "loss": 0.6422, "step": 26505 }, { "epoch": 0.68, "grad_norm": 2.0152361392974854, "learning_rate": 4.924226947914281e-06, "loss": 0.5551, "step": 26506 }, { "epoch": 0.68, "grad_norm": 1.1006382703781128, "learning_rate": 4.923511709149293e-06, "loss": 0.5322, "step": 26507 }, { "epoch": 0.68, "grad_norm": 1.2843116521835327, "learning_rate": 4.922796505368275e-06, "loss": 0.4995, "step": 26508 }, { "epoch": 0.68, "grad_norm": 2.278369903564453, "learning_rate": 4.922081336576151e-06, "loss": 0.6325, "step": 26509 }, { "epoch": 0.68, "grad_norm": 6.531566143035889, "learning_rate": 4.92136620277785e-06, "loss": 0.4978, "step": 26510 }, { "epoch": 0.68, "grad_norm": 1.4519761800765991, "learning_rate": 4.920651103978305e-06, "loss": 0.4702, "step": 26511 }, { "epoch": 0.68, "grad_norm": 2.2355921268463135, "learning_rate": 4.919936040182441e-06, "loss": 0.6222, "step": 26512 }, { "epoch": 0.68, "grad_norm": 3.3198869228363037, "learning_rate": 4.9192210113951844e-06, "loss": 0.8207, "step": 26513 }, { "epoch": 0.68, "grad_norm": 1.425972819328308, "learning_rate": 4.918506017621467e-06, "loss": 0.5815, "step": 26514 }, { "epoch": 0.68, "grad_norm": 1.0199261903762817, "learning_rate": 4.917791058866213e-06, "loss": 0.4415, "step": 26515 }, { "epoch": 0.68, "grad_norm": 1.2040804624557495, "learning_rate": 4.917076135134347e-06, "loss": 0.4868, "step": 26516 }, { "epoch": 0.68, "grad_norm": 1.7206766605377197, "learning_rate": 4.916361246430804e-06, "loss": 0.5671, "step": 26517 }, { "epoch": 0.68, "grad_norm": 1.5230365991592407, "learning_rate": 4.915646392760503e-06, "loss": 0.6003, "step": 26518 }, { "epoch": 0.68, "grad_norm": 2.2722296714782715, "learning_rate": 4.914931574128375e-06, "loss": 0.6449, "step": 26519 }, { "epoch": 0.68, "grad_norm": 2.4319570064544678, "learning_rate": 4.914216790539339e-06, "loss": 0.5909, "step": 26520 }, { "epoch": 0.68, "grad_norm": 2.500737190246582, "learning_rate": 4.913502041998329e-06, "loss": 0.4187, "step": 26521 }, { "epoch": 0.68, "grad_norm": 1.078881859779358, "learning_rate": 4.9127873285102725e-06, "loss": 0.5587, "step": 26522 }, { "epoch": 0.68, "grad_norm": 1.4349870681762695, "learning_rate": 4.912072650080081e-06, "loss": 0.4866, "step": 26523 }, { "epoch": 0.68, "grad_norm": 1.2557246685028076, "learning_rate": 4.911358006712692e-06, "loss": 0.5167, "step": 26524 }, { "epoch": 0.68, "grad_norm": 0.9127871990203857, "learning_rate": 4.910643398413028e-06, "loss": 0.4322, "step": 26525 }, { "epoch": 0.68, "grad_norm": 2.7133877277374268, "learning_rate": 4.909928825186007e-06, "loss": 0.4999, "step": 26526 }, { "epoch": 0.68, "grad_norm": 1.1839405298233032, "learning_rate": 4.909214287036562e-06, "loss": 0.3155, "step": 26527 }, { "epoch": 0.68, "grad_norm": 1.3478450775146484, "learning_rate": 4.908499783969615e-06, "loss": 0.4481, "step": 26528 }, { "epoch": 0.68, "grad_norm": 1.1589125394821167, "learning_rate": 4.907785315990083e-06, "loss": 0.6053, "step": 26529 }, { "epoch": 0.68, "grad_norm": 7.899194717407227, "learning_rate": 4.907070883102901e-06, "loss": 0.474, "step": 26530 }, { "epoch": 0.68, "grad_norm": 1.284896969795227, "learning_rate": 4.906356485312986e-06, "loss": 0.5203, "step": 26531 }, { "epoch": 0.68, "grad_norm": 3.1472129821777344, "learning_rate": 4.905642122625263e-06, "loss": 0.6711, "step": 26532 }, { "epoch": 0.68, "grad_norm": 1.1618373394012451, "learning_rate": 4.904927795044649e-06, "loss": 0.5265, "step": 26533 }, { "epoch": 0.68, "grad_norm": 1.714737892150879, "learning_rate": 4.904213502576076e-06, "loss": 0.6455, "step": 26534 }, { "epoch": 0.68, "grad_norm": 5.02479362487793, "learning_rate": 4.903499245224462e-06, "loss": 0.5509, "step": 26535 }, { "epoch": 0.68, "grad_norm": 1.4692516326904297, "learning_rate": 4.902785022994725e-06, "loss": 0.4245, "step": 26536 }, { "epoch": 0.68, "grad_norm": 5.821251392364502, "learning_rate": 4.9020708358917955e-06, "loss": 0.4772, "step": 26537 }, { "epoch": 0.68, "grad_norm": 5.395853042602539, "learning_rate": 4.901356683920591e-06, "loss": 0.5379, "step": 26538 }, { "epoch": 0.68, "grad_norm": 1.4885907173156738, "learning_rate": 4.900642567086032e-06, "loss": 0.6533, "step": 26539 }, { "epoch": 0.68, "grad_norm": 2.4264919757843018, "learning_rate": 4.899928485393039e-06, "loss": 0.6272, "step": 26540 }, { "epoch": 0.68, "grad_norm": 1.0442360639572144, "learning_rate": 4.899214438846537e-06, "loss": 0.5373, "step": 26541 }, { "epoch": 0.68, "grad_norm": 1.3315953016281128, "learning_rate": 4.898500427451444e-06, "loss": 0.6402, "step": 26542 }, { "epoch": 0.68, "grad_norm": 1.800423264503479, "learning_rate": 4.897786451212678e-06, "loss": 0.6133, "step": 26543 }, { "epoch": 0.68, "grad_norm": 2.6517598628997803, "learning_rate": 4.897072510135166e-06, "loss": 0.7122, "step": 26544 }, { "epoch": 0.68, "grad_norm": 1.2381173372268677, "learning_rate": 4.896358604223825e-06, "loss": 0.5496, "step": 26545 }, { "epoch": 0.68, "grad_norm": 2.994879722595215, "learning_rate": 4.8956447334835725e-06, "loss": 0.577, "step": 26546 }, { "epoch": 0.68, "grad_norm": 1.1358695030212402, "learning_rate": 4.8949308979193265e-06, "loss": 0.49, "step": 26547 }, { "epoch": 0.68, "grad_norm": 2.1577296257019043, "learning_rate": 4.894217097536013e-06, "loss": 0.7357, "step": 26548 }, { "epoch": 0.68, "grad_norm": 4.885995864868164, "learning_rate": 4.893503332338547e-06, "loss": 0.766, "step": 26549 }, { "epoch": 0.68, "grad_norm": 7.722571849822998, "learning_rate": 4.892789602331846e-06, "loss": 0.5203, "step": 26550 }, { "epoch": 0.68, "grad_norm": 1.5295606851577759, "learning_rate": 4.892075907520832e-06, "loss": 0.5088, "step": 26551 }, { "epoch": 0.68, "grad_norm": 1.4193085432052612, "learning_rate": 4.891362247910423e-06, "loss": 0.533, "step": 26552 }, { "epoch": 0.68, "grad_norm": 1.6570897102355957, "learning_rate": 4.890648623505531e-06, "loss": 0.5003, "step": 26553 }, { "epoch": 0.68, "grad_norm": 1.0022096633911133, "learning_rate": 4.889935034311084e-06, "loss": 0.4073, "step": 26554 }, { "epoch": 0.68, "grad_norm": 10.45490837097168, "learning_rate": 4.889221480331993e-06, "loss": 0.494, "step": 26555 }, { "epoch": 0.68, "grad_norm": 1.193805456161499, "learning_rate": 4.888507961573178e-06, "loss": 0.4905, "step": 26556 }, { "epoch": 0.68, "grad_norm": 0.8548697829246521, "learning_rate": 4.88779447803955e-06, "loss": 0.4315, "step": 26557 }, { "epoch": 0.68, "grad_norm": 1.2642450332641602, "learning_rate": 4.887081029736035e-06, "loss": 0.4023, "step": 26558 }, { "epoch": 0.68, "grad_norm": 1.493595004081726, "learning_rate": 4.886367616667545e-06, "loss": 0.4909, "step": 26559 }, { "epoch": 0.68, "grad_norm": 1.0421937704086304, "learning_rate": 4.885654238838993e-06, "loss": 0.3497, "step": 26560 }, { "epoch": 0.68, "grad_norm": 10.599296569824219, "learning_rate": 4.884940896255301e-06, "loss": 0.6324, "step": 26561 }, { "epoch": 0.68, "grad_norm": 2.7961161136627197, "learning_rate": 4.884227588921385e-06, "loss": 0.6501, "step": 26562 }, { "epoch": 0.68, "grad_norm": 1.0447916984558105, "learning_rate": 4.883514316842157e-06, "loss": 0.3659, "step": 26563 }, { "epoch": 0.68, "grad_norm": 3.0443286895751953, "learning_rate": 4.882801080022531e-06, "loss": 0.4809, "step": 26564 }, { "epoch": 0.68, "grad_norm": 3.5664801597595215, "learning_rate": 4.882087878467429e-06, "loss": 0.6274, "step": 26565 }, { "epoch": 0.68, "grad_norm": 2.712888717651367, "learning_rate": 4.8813747121817616e-06, "loss": 0.6566, "step": 26566 }, { "epoch": 0.68, "grad_norm": 1.5899170637130737, "learning_rate": 4.880661581170439e-06, "loss": 0.5322, "step": 26567 }, { "epoch": 0.68, "grad_norm": 1.037106990814209, "learning_rate": 4.879948485438385e-06, "loss": 0.5233, "step": 26568 }, { "epoch": 0.68, "grad_norm": 1.1770752668380737, "learning_rate": 4.87923542499051e-06, "loss": 0.5822, "step": 26569 }, { "epoch": 0.68, "grad_norm": 4.9589948654174805, "learning_rate": 4.878522399831722e-06, "loss": 0.6497, "step": 26570 }, { "epoch": 0.68, "grad_norm": 1.8589593172073364, "learning_rate": 4.877809409966945e-06, "loss": 0.6995, "step": 26571 }, { "epoch": 0.68, "grad_norm": 1.8953486680984497, "learning_rate": 4.877096455401088e-06, "loss": 0.6278, "step": 26572 }, { "epoch": 0.68, "grad_norm": 1.2956056594848633, "learning_rate": 4.8763835361390625e-06, "loss": 0.519, "step": 26573 }, { "epoch": 0.68, "grad_norm": 2.5177817344665527, "learning_rate": 4.8756706521857785e-06, "loss": 0.6039, "step": 26574 }, { "epoch": 0.68, "grad_norm": 1.4669873714447021, "learning_rate": 4.874957803546159e-06, "loss": 0.4796, "step": 26575 }, { "epoch": 0.68, "grad_norm": 1.0865654945373535, "learning_rate": 4.874244990225107e-06, "loss": 0.5492, "step": 26576 }, { "epoch": 0.68, "grad_norm": 1.9152225255966187, "learning_rate": 4.873532212227537e-06, "loss": 0.4863, "step": 26577 }, { "epoch": 0.68, "grad_norm": 1.6183953285217285, "learning_rate": 4.872819469558366e-06, "loss": 0.5099, "step": 26578 }, { "epoch": 0.68, "grad_norm": 1.0615651607513428, "learning_rate": 4.872106762222501e-06, "loss": 0.4266, "step": 26579 }, { "epoch": 0.68, "grad_norm": 1.1117498874664307, "learning_rate": 4.871394090224856e-06, "loss": 0.5898, "step": 26580 }, { "epoch": 0.68, "grad_norm": 1.2309290170669556, "learning_rate": 4.870681453570335e-06, "loss": 0.4704, "step": 26581 }, { "epoch": 0.68, "grad_norm": 3.4571352005004883, "learning_rate": 4.86996885226386e-06, "loss": 0.6408, "step": 26582 }, { "epoch": 0.68, "grad_norm": 1.627803087234497, "learning_rate": 4.869256286310337e-06, "loss": 0.5139, "step": 26583 }, { "epoch": 0.68, "grad_norm": 2.429094076156616, "learning_rate": 4.868543755714671e-06, "loss": 0.6362, "step": 26584 }, { "epoch": 0.68, "grad_norm": 2.096715211868286, "learning_rate": 4.867831260481783e-06, "loss": 0.5045, "step": 26585 }, { "epoch": 0.68, "grad_norm": 1.4636832475662231, "learning_rate": 4.8671188006165756e-06, "loss": 0.5827, "step": 26586 }, { "epoch": 0.68, "grad_norm": 2.6243398189544678, "learning_rate": 4.866406376123962e-06, "loss": 0.693, "step": 26587 }, { "epoch": 0.68, "grad_norm": 1.359378457069397, "learning_rate": 4.865693987008846e-06, "loss": 0.5733, "step": 26588 }, { "epoch": 0.68, "grad_norm": 3.898266553878784, "learning_rate": 4.864981633276146e-06, "loss": 0.7093, "step": 26589 }, { "epoch": 0.68, "grad_norm": 1.0826563835144043, "learning_rate": 4.864269314930766e-06, "loss": 0.4276, "step": 26590 }, { "epoch": 0.68, "grad_norm": 1.791294813156128, "learning_rate": 4.8635570319776115e-06, "loss": 0.6316, "step": 26591 }, { "epoch": 0.68, "grad_norm": 1.9495856761932373, "learning_rate": 4.862844784421599e-06, "loss": 0.4946, "step": 26592 }, { "epoch": 0.68, "grad_norm": 5.241844177246094, "learning_rate": 4.8621325722676315e-06, "loss": 0.6391, "step": 26593 }, { "epoch": 0.68, "grad_norm": 3.7992072105407715, "learning_rate": 4.861420395520616e-06, "loss": 0.7296, "step": 26594 }, { "epoch": 0.68, "grad_norm": 0.9023293256759644, "learning_rate": 4.860708254185466e-06, "loss": 0.3955, "step": 26595 }, { "epoch": 0.68, "grad_norm": 2.776853561401367, "learning_rate": 4.859996148267086e-06, "loss": 0.5504, "step": 26596 }, { "epoch": 0.68, "grad_norm": 1.3730114698410034, "learning_rate": 4.859284077770383e-06, "loss": 0.5245, "step": 26597 }, { "epoch": 0.68, "grad_norm": 1.0720293521881104, "learning_rate": 4.858572042700261e-06, "loss": 0.6082, "step": 26598 }, { "epoch": 0.68, "grad_norm": 1.287144422531128, "learning_rate": 4.857860043061634e-06, "loss": 0.4654, "step": 26599 }, { "epoch": 0.68, "grad_norm": 4.28037691116333, "learning_rate": 4.857148078859405e-06, "loss": 0.6325, "step": 26600 }, { "epoch": 0.68, "grad_norm": 1.7670810222625732, "learning_rate": 4.856436150098477e-06, "loss": 0.5008, "step": 26601 }, { "epoch": 0.68, "grad_norm": 3.56048846244812, "learning_rate": 4.855724256783763e-06, "loss": 0.5125, "step": 26602 }, { "epoch": 0.68, "grad_norm": 1.0800724029541016, "learning_rate": 4.8550123989201645e-06, "loss": 0.4493, "step": 26603 }, { "epoch": 0.68, "grad_norm": 1.6832503080368042, "learning_rate": 4.854300576512589e-06, "loss": 0.6122, "step": 26604 }, { "epoch": 0.68, "grad_norm": 5.052398681640625, "learning_rate": 4.853588789565937e-06, "loss": 0.4243, "step": 26605 }, { "epoch": 0.68, "grad_norm": 1.3658864498138428, "learning_rate": 4.852877038085122e-06, "loss": 0.3906, "step": 26606 }, { "epoch": 0.68, "grad_norm": 1.658050298690796, "learning_rate": 4.8521653220750455e-06, "loss": 0.5628, "step": 26607 }, { "epoch": 0.68, "grad_norm": 1.794025182723999, "learning_rate": 4.851453641540607e-06, "loss": 0.5981, "step": 26608 }, { "epoch": 0.68, "grad_norm": 1.5315890312194824, "learning_rate": 4.850741996486717e-06, "loss": 0.4508, "step": 26609 }, { "epoch": 0.68, "grad_norm": 1.1811422109603882, "learning_rate": 4.850030386918281e-06, "loss": 0.4653, "step": 26610 }, { "epoch": 0.68, "grad_norm": 9.189352035522461, "learning_rate": 4.849318812840194e-06, "loss": 0.7475, "step": 26611 }, { "epoch": 0.68, "grad_norm": 2.2634520530700684, "learning_rate": 4.848607274257372e-06, "loss": 0.6264, "step": 26612 }, { "epoch": 0.68, "grad_norm": 1.917117714881897, "learning_rate": 4.847895771174711e-06, "loss": 0.6506, "step": 26613 }, { "epoch": 0.68, "grad_norm": 1.0425078868865967, "learning_rate": 4.847184303597115e-06, "loss": 0.4303, "step": 26614 }, { "epoch": 0.68, "grad_norm": 1.2030061483383179, "learning_rate": 4.846472871529484e-06, "loss": 0.425, "step": 26615 }, { "epoch": 0.68, "grad_norm": 6.866786479949951, "learning_rate": 4.845761474976729e-06, "loss": 0.4523, "step": 26616 }, { "epoch": 0.68, "grad_norm": 1.2020745277404785, "learning_rate": 4.8450501139437465e-06, "loss": 0.636, "step": 26617 }, { "epoch": 0.68, "grad_norm": 1.0573153495788574, "learning_rate": 4.844338788435437e-06, "loss": 0.4135, "step": 26618 }, { "epoch": 0.68, "grad_norm": 3.732917308807373, "learning_rate": 4.8436274984567076e-06, "loss": 0.6689, "step": 26619 }, { "epoch": 0.68, "grad_norm": 6.74533224105835, "learning_rate": 4.84291624401246e-06, "loss": 0.4189, "step": 26620 }, { "epoch": 0.68, "grad_norm": 1.7170405387878418, "learning_rate": 4.8422050251075926e-06, "loss": 0.4373, "step": 26621 }, { "epoch": 0.68, "grad_norm": 1.9534815549850464, "learning_rate": 4.841493841747004e-06, "loss": 0.5106, "step": 26622 }, { "epoch": 0.68, "grad_norm": 5.540566921234131, "learning_rate": 4.840782693935603e-06, "loss": 0.6194, "step": 26623 }, { "epoch": 0.68, "grad_norm": 3.311443567276001, "learning_rate": 4.840071581678286e-06, "loss": 0.7429, "step": 26624 }, { "epoch": 0.68, "grad_norm": 1.6517095565795898, "learning_rate": 4.839360504979951e-06, "loss": 0.4908, "step": 26625 }, { "epoch": 0.68, "grad_norm": 1.3107270002365112, "learning_rate": 4.838649463845504e-06, "loss": 0.6006, "step": 26626 }, { "epoch": 0.68, "grad_norm": 1.500651240348816, "learning_rate": 4.837938458279841e-06, "loss": 0.4279, "step": 26627 }, { "epoch": 0.68, "grad_norm": 1.562059760093689, "learning_rate": 4.837227488287866e-06, "loss": 0.5069, "step": 26628 }, { "epoch": 0.68, "grad_norm": 1.5095902681350708, "learning_rate": 4.83651655387447e-06, "loss": 0.4723, "step": 26629 }, { "epoch": 0.68, "grad_norm": 1.3697293996810913, "learning_rate": 4.835805655044562e-06, "loss": 0.6148, "step": 26630 }, { "epoch": 0.68, "grad_norm": 1.2584302425384521, "learning_rate": 4.835094791803036e-06, "loss": 0.4645, "step": 26631 }, { "epoch": 0.68, "grad_norm": 3.1483705043792725, "learning_rate": 4.83438396415479e-06, "loss": 0.5325, "step": 26632 }, { "epoch": 0.68, "grad_norm": 14.645554542541504, "learning_rate": 4.833673172104726e-06, "loss": 0.6612, "step": 26633 }, { "epoch": 0.68, "grad_norm": 1.0065665245056152, "learning_rate": 4.832962415657741e-06, "loss": 0.4748, "step": 26634 }, { "epoch": 0.68, "grad_norm": 2.5181846618652344, "learning_rate": 4.83225169481873e-06, "loss": 0.4877, "step": 26635 }, { "epoch": 0.68, "grad_norm": 2.424126386642456, "learning_rate": 4.831541009592597e-06, "loss": 0.7416, "step": 26636 }, { "epoch": 0.68, "grad_norm": 0.8993363380432129, "learning_rate": 4.830830359984238e-06, "loss": 0.3206, "step": 26637 }, { "epoch": 0.68, "grad_norm": 1.1203323602676392, "learning_rate": 4.8301197459985465e-06, "loss": 0.488, "step": 26638 }, { "epoch": 0.68, "grad_norm": 2.1851918697357178, "learning_rate": 4.829409167640418e-06, "loss": 0.6772, "step": 26639 }, { "epoch": 0.68, "grad_norm": 4.573085308074951, "learning_rate": 4.828698624914757e-06, "loss": 0.6253, "step": 26640 }, { "epoch": 0.68, "grad_norm": 2.8587987422943115, "learning_rate": 4.827988117826456e-06, "loss": 0.5446, "step": 26641 }, { "epoch": 0.68, "grad_norm": 1.2000765800476074, "learning_rate": 4.827277646380408e-06, "loss": 0.4757, "step": 26642 }, { "epoch": 0.68, "grad_norm": 2.0108675956726074, "learning_rate": 4.826567210581517e-06, "loss": 0.777, "step": 26643 }, { "epoch": 0.68, "grad_norm": 1.4410362243652344, "learning_rate": 4.825856810434674e-06, "loss": 0.672, "step": 26644 }, { "epoch": 0.68, "grad_norm": 4.908048629760742, "learning_rate": 4.825146445944775e-06, "loss": 0.6313, "step": 26645 }, { "epoch": 0.68, "grad_norm": 2.2781240940093994, "learning_rate": 4.8244361171167105e-06, "loss": 0.5122, "step": 26646 }, { "epoch": 0.68, "grad_norm": 1.8045066595077515, "learning_rate": 4.8237258239553865e-06, "loss": 0.532, "step": 26647 }, { "epoch": 0.68, "grad_norm": 1.2131596803665161, "learning_rate": 4.823015566465692e-06, "loss": 0.4174, "step": 26648 }, { "epoch": 0.68, "grad_norm": 2.5772783756256104, "learning_rate": 4.822305344652517e-06, "loss": 0.5247, "step": 26649 }, { "epoch": 0.68, "grad_norm": 1.189637303352356, "learning_rate": 4.8215951585207645e-06, "loss": 0.5698, "step": 26650 }, { "epoch": 0.68, "grad_norm": 2.9260101318359375, "learning_rate": 4.820885008075325e-06, "loss": 0.5495, "step": 26651 }, { "epoch": 0.68, "grad_norm": 8.053138732910156, "learning_rate": 4.820174893321088e-06, "loss": 0.6565, "step": 26652 }, { "epoch": 0.68, "grad_norm": 3.1213252544403076, "learning_rate": 4.819464814262956e-06, "loss": 0.4414, "step": 26653 }, { "epoch": 0.68, "grad_norm": 13.495990753173828, "learning_rate": 4.8187547709058166e-06, "loss": 0.5994, "step": 26654 }, { "epoch": 0.68, "grad_norm": 1.8297879695892334, "learning_rate": 4.818044763254566e-06, "loss": 0.6518, "step": 26655 }, { "epoch": 0.68, "grad_norm": 2.844789981842041, "learning_rate": 4.81733479131409e-06, "loss": 0.609, "step": 26656 }, { "epoch": 0.68, "grad_norm": 2.164371967315674, "learning_rate": 4.816624855089291e-06, "loss": 0.6626, "step": 26657 }, { "epoch": 0.68, "grad_norm": 1.4815768003463745, "learning_rate": 4.8159149545850556e-06, "loss": 0.4699, "step": 26658 }, { "epoch": 0.68, "grad_norm": 2.014458179473877, "learning_rate": 4.815205089806275e-06, "loss": 0.6821, "step": 26659 }, { "epoch": 0.68, "grad_norm": 1.1697046756744385, "learning_rate": 4.814495260757847e-06, "loss": 0.4046, "step": 26660 }, { "epoch": 0.68, "grad_norm": 1.6152573823928833, "learning_rate": 4.8137854674446596e-06, "loss": 0.5694, "step": 26661 }, { "epoch": 0.68, "grad_norm": 1.5674989223480225, "learning_rate": 4.813075709871604e-06, "loss": 0.634, "step": 26662 }, { "epoch": 0.68, "grad_norm": 4.327645301818848, "learning_rate": 4.812365988043569e-06, "loss": 0.5323, "step": 26663 }, { "epoch": 0.68, "grad_norm": 1.5872136354446411, "learning_rate": 4.811656301965451e-06, "loss": 0.4314, "step": 26664 }, { "epoch": 0.68, "grad_norm": 5.942809104919434, "learning_rate": 4.81094665164214e-06, "loss": 0.5094, "step": 26665 }, { "epoch": 0.68, "grad_norm": 1.6327749490737915, "learning_rate": 4.810237037078519e-06, "loss": 0.4954, "step": 26666 }, { "epoch": 0.68, "grad_norm": 1.5539084672927856, "learning_rate": 4.809527458279489e-06, "loss": 0.6327, "step": 26667 }, { "epoch": 0.68, "grad_norm": 1.4647321701049805, "learning_rate": 4.8088179152499335e-06, "loss": 0.5307, "step": 26668 }, { "epoch": 0.68, "grad_norm": 1.3947468996047974, "learning_rate": 4.808108407994745e-06, "loss": 0.4339, "step": 26669 }, { "epoch": 0.68, "grad_norm": 2.106013536453247, "learning_rate": 4.807398936518807e-06, "loss": 0.7514, "step": 26670 }, { "epoch": 0.68, "grad_norm": 1.872346043586731, "learning_rate": 4.806689500827016e-06, "loss": 0.4904, "step": 26671 }, { "epoch": 0.68, "grad_norm": 3.582890510559082, "learning_rate": 4.805980100924259e-06, "loss": 0.6139, "step": 26672 }, { "epoch": 0.68, "grad_norm": 1.3711059093475342, "learning_rate": 4.805270736815421e-06, "loss": 0.6314, "step": 26673 }, { "epoch": 0.68, "grad_norm": 1.2876968383789062, "learning_rate": 4.804561408505396e-06, "loss": 0.6002, "step": 26674 }, { "epoch": 0.68, "grad_norm": 4.32792854309082, "learning_rate": 4.80385211599907e-06, "loss": 0.7541, "step": 26675 }, { "epoch": 0.68, "grad_norm": 1.5179399251937866, "learning_rate": 4.803142859301327e-06, "loss": 0.5176, "step": 26676 }, { "epoch": 0.68, "grad_norm": 1.5602989196777344, "learning_rate": 4.802433638417062e-06, "loss": 0.5324, "step": 26677 }, { "epoch": 0.68, "grad_norm": 1.3860994577407837, "learning_rate": 4.80172445335116e-06, "loss": 0.6013, "step": 26678 }, { "epoch": 0.68, "grad_norm": 1.2244508266448975, "learning_rate": 4.8010153041085065e-06, "loss": 0.3972, "step": 26679 }, { "epoch": 0.68, "grad_norm": 0.9845122694969177, "learning_rate": 4.800306190693985e-06, "loss": 0.5153, "step": 26680 }, { "epoch": 0.68, "grad_norm": 1.722491979598999, "learning_rate": 4.799597113112491e-06, "loss": 0.368, "step": 26681 }, { "epoch": 0.68, "grad_norm": 2.6958045959472656, "learning_rate": 4.798888071368906e-06, "loss": 0.4664, "step": 26682 }, { "epoch": 0.68, "grad_norm": 1.5685123205184937, "learning_rate": 4.798179065468113e-06, "loss": 0.3966, "step": 26683 }, { "epoch": 0.68, "grad_norm": 3.359206438064575, "learning_rate": 4.7974700954150045e-06, "loss": 0.4363, "step": 26684 }, { "epoch": 0.68, "grad_norm": 1.865297794342041, "learning_rate": 4.796761161214465e-06, "loss": 0.5452, "step": 26685 }, { "epoch": 0.68, "grad_norm": 1.1007792949676514, "learning_rate": 4.796052262871378e-06, "loss": 0.3419, "step": 26686 }, { "epoch": 0.68, "grad_norm": 5.845986366271973, "learning_rate": 4.7953434003906266e-06, "loss": 0.3945, "step": 26687 }, { "epoch": 0.68, "grad_norm": 1.4032913446426392, "learning_rate": 4.794634573777101e-06, "loss": 0.527, "step": 26688 }, { "epoch": 0.68, "grad_norm": 1.2627016305923462, "learning_rate": 4.793925783035685e-06, "loss": 0.5089, "step": 26689 }, { "epoch": 0.68, "grad_norm": 6.630542755126953, "learning_rate": 4.793217028171256e-06, "loss": 0.6266, "step": 26690 }, { "epoch": 0.68, "grad_norm": 1.094849944114685, "learning_rate": 4.7925083091887094e-06, "loss": 0.5327, "step": 26691 }, { "epoch": 0.68, "grad_norm": 6.519950866699219, "learning_rate": 4.791799626092923e-06, "loss": 0.552, "step": 26692 }, { "epoch": 0.68, "grad_norm": 6.527384281158447, "learning_rate": 4.791090978888781e-06, "loss": 0.7489, "step": 26693 }, { "epoch": 0.68, "grad_norm": 14.917596817016602, "learning_rate": 4.7903823675811646e-06, "loss": 0.4731, "step": 26694 }, { "epoch": 0.68, "grad_norm": 3.9569647312164307, "learning_rate": 4.789673792174964e-06, "loss": 0.624, "step": 26695 }, { "epoch": 0.68, "grad_norm": 1.3906298875808716, "learning_rate": 4.788965252675057e-06, "loss": 0.4797, "step": 26696 }, { "epoch": 0.68, "grad_norm": 1.2997404336929321, "learning_rate": 4.7882567490863255e-06, "loss": 0.5751, "step": 26697 }, { "epoch": 0.68, "grad_norm": 1.1275943517684937, "learning_rate": 4.787548281413657e-06, "loss": 0.5753, "step": 26698 }, { "epoch": 0.68, "grad_norm": 5.444533348083496, "learning_rate": 4.7868398496619305e-06, "loss": 0.5635, "step": 26699 }, { "epoch": 0.68, "grad_norm": 2.337354898452759, "learning_rate": 4.786131453836025e-06, "loss": 0.6836, "step": 26700 }, { "epoch": 0.68, "grad_norm": 2.546827793121338, "learning_rate": 4.78542309394083e-06, "loss": 0.645, "step": 26701 }, { "epoch": 0.68, "grad_norm": 5.643177509307861, "learning_rate": 4.784714769981222e-06, "loss": 0.6045, "step": 26702 }, { "epoch": 0.68, "grad_norm": 0.9435960650444031, "learning_rate": 4.784006481962085e-06, "loss": 0.4715, "step": 26703 }, { "epoch": 0.68, "grad_norm": 1.506645917892456, "learning_rate": 4.7832982298882934e-06, "loss": 0.5666, "step": 26704 }, { "epoch": 0.68, "grad_norm": 1.6798065900802612, "learning_rate": 4.782590013764736e-06, "loss": 0.6622, "step": 26705 }, { "epoch": 0.68, "grad_norm": 1.5648671388626099, "learning_rate": 4.781881833596291e-06, "loss": 0.5546, "step": 26706 }, { "epoch": 0.68, "grad_norm": 0.9287816882133484, "learning_rate": 4.781173689387835e-06, "loss": 0.6454, "step": 26707 }, { "epoch": 0.68, "grad_norm": 1.3769762516021729, "learning_rate": 4.7804655811442546e-06, "loss": 0.4721, "step": 26708 }, { "epoch": 0.68, "grad_norm": 1.287723422050476, "learning_rate": 4.779757508870426e-06, "loss": 0.5313, "step": 26709 }, { "epoch": 0.68, "grad_norm": 1.664049506187439, "learning_rate": 4.779049472571229e-06, "loss": 0.4939, "step": 26710 }, { "epoch": 0.68, "grad_norm": 3.153855323791504, "learning_rate": 4.7783414722515384e-06, "loss": 0.5026, "step": 26711 }, { "epoch": 0.68, "grad_norm": 1.8473985195159912, "learning_rate": 4.777633507916243e-06, "loss": 0.566, "step": 26712 }, { "epoch": 0.68, "grad_norm": 1.0887752771377563, "learning_rate": 4.7769255795702155e-06, "loss": 0.3762, "step": 26713 }, { "epoch": 0.68, "grad_norm": 4.476734161376953, "learning_rate": 4.776217687218332e-06, "loss": 0.6092, "step": 26714 }, { "epoch": 0.68, "grad_norm": 3.768716812133789, "learning_rate": 4.775509830865477e-06, "loss": 0.4839, "step": 26715 }, { "epoch": 0.68, "grad_norm": 1.9638829231262207, "learning_rate": 4.774802010516527e-06, "loss": 0.4942, "step": 26716 }, { "epoch": 0.68, "grad_norm": 2.1674039363861084, "learning_rate": 4.7740942261763546e-06, "loss": 0.6113, "step": 26717 }, { "epoch": 0.68, "grad_norm": 1.2995631694793701, "learning_rate": 4.773386477849846e-06, "loss": 0.5324, "step": 26718 }, { "epoch": 0.68, "grad_norm": 0.9769519567489624, "learning_rate": 4.772678765541874e-06, "loss": 0.3659, "step": 26719 }, { "epoch": 0.68, "grad_norm": 2.3960683345794678, "learning_rate": 4.771971089257315e-06, "loss": 0.6708, "step": 26720 }, { "epoch": 0.68, "grad_norm": 1.2445908784866333, "learning_rate": 4.771263449001044e-06, "loss": 0.5652, "step": 26721 }, { "epoch": 0.68, "grad_norm": 1.4945104122161865, "learning_rate": 4.770555844777943e-06, "loss": 0.4576, "step": 26722 }, { "epoch": 0.68, "grad_norm": 4.888328552246094, "learning_rate": 4.769848276592886e-06, "loss": 0.5153, "step": 26723 }, { "epoch": 0.68, "grad_norm": 1.5342882871627808, "learning_rate": 4.769140744450745e-06, "loss": 0.5103, "step": 26724 }, { "epoch": 0.68, "grad_norm": 1.371871829032898, "learning_rate": 4.768433248356403e-06, "loss": 0.5714, "step": 26725 }, { "epoch": 0.69, "grad_norm": 1.8640375137329102, "learning_rate": 4.767725788314733e-06, "loss": 0.5481, "step": 26726 }, { "epoch": 0.69, "grad_norm": 1.6999778747558594, "learning_rate": 4.76701836433061e-06, "loss": 0.4862, "step": 26727 }, { "epoch": 0.69, "grad_norm": 1.1000964641571045, "learning_rate": 4.766310976408905e-06, "loss": 0.4857, "step": 26728 }, { "epoch": 0.69, "grad_norm": 2.0358171463012695, "learning_rate": 4.765603624554499e-06, "loss": 0.4544, "step": 26729 }, { "epoch": 0.69, "grad_norm": 1.7001203298568726, "learning_rate": 4.764896308772264e-06, "loss": 0.7187, "step": 26730 }, { "epoch": 0.69, "grad_norm": 1.0528424978256226, "learning_rate": 4.764189029067072e-06, "loss": 0.5238, "step": 26731 }, { "epoch": 0.69, "grad_norm": 1.4171890020370483, "learning_rate": 4.763481785443804e-06, "loss": 0.7595, "step": 26732 }, { "epoch": 0.69, "grad_norm": 1.3819928169250488, "learning_rate": 4.762774577907329e-06, "loss": 0.6522, "step": 26733 }, { "epoch": 0.69, "grad_norm": 1.327661395072937, "learning_rate": 4.762067406462522e-06, "loss": 0.4094, "step": 26734 }, { "epoch": 0.69, "grad_norm": 1.0140849351882935, "learning_rate": 4.761360271114251e-06, "loss": 0.4147, "step": 26735 }, { "epoch": 0.69, "grad_norm": 2.1808104515075684, "learning_rate": 4.760653171867398e-06, "loss": 0.5196, "step": 26736 }, { "epoch": 0.69, "grad_norm": 1.5874228477478027, "learning_rate": 4.75994610872683e-06, "loss": 0.5268, "step": 26737 }, { "epoch": 0.69, "grad_norm": 1.235787272453308, "learning_rate": 4.75923908169742e-06, "loss": 0.6604, "step": 26738 }, { "epoch": 0.69, "grad_norm": 1.7275398969650269, "learning_rate": 4.758532090784044e-06, "loss": 0.6082, "step": 26739 }, { "epoch": 0.69, "grad_norm": 5.669704914093018, "learning_rate": 4.757825135991571e-06, "loss": 0.564, "step": 26740 }, { "epoch": 0.69, "grad_norm": 3.470658540725708, "learning_rate": 4.757118217324872e-06, "loss": 0.6634, "step": 26741 }, { "epoch": 0.69, "grad_norm": 1.5241684913635254, "learning_rate": 4.756411334788823e-06, "loss": 0.4018, "step": 26742 }, { "epoch": 0.69, "grad_norm": 1.4974862337112427, "learning_rate": 4.755704488388293e-06, "loss": 0.3526, "step": 26743 }, { "epoch": 0.69, "grad_norm": 1.0476813316345215, "learning_rate": 4.754997678128152e-06, "loss": 0.6387, "step": 26744 }, { "epoch": 0.69, "grad_norm": 1.6046279668807983, "learning_rate": 4.75429090401327e-06, "loss": 0.6653, "step": 26745 }, { "epoch": 0.69, "grad_norm": 1.5349647998809814, "learning_rate": 4.7535841660485215e-06, "loss": 0.4789, "step": 26746 }, { "epoch": 0.69, "grad_norm": 1.399288535118103, "learning_rate": 4.7528774642387755e-06, "loss": 0.4669, "step": 26747 }, { "epoch": 0.69, "grad_norm": 2.156661033630371, "learning_rate": 4.752170798588897e-06, "loss": 0.5024, "step": 26748 }, { "epoch": 0.69, "grad_norm": 2.9412667751312256, "learning_rate": 4.751464169103766e-06, "loss": 0.5071, "step": 26749 }, { "epoch": 0.69, "grad_norm": 1.2549479007720947, "learning_rate": 4.750757575788244e-06, "loss": 0.4556, "step": 26750 }, { "epoch": 0.69, "grad_norm": 1.5984653234481812, "learning_rate": 4.750051018647205e-06, "loss": 0.5822, "step": 26751 }, { "epoch": 0.69, "grad_norm": 1.6702924966812134, "learning_rate": 4.749344497685511e-06, "loss": 0.623, "step": 26752 }, { "epoch": 0.69, "grad_norm": 1.2145020961761475, "learning_rate": 4.7486380129080405e-06, "loss": 0.5235, "step": 26753 }, { "epoch": 0.69, "grad_norm": 1.876240611076355, "learning_rate": 4.747931564319656e-06, "loss": 0.5395, "step": 26754 }, { "epoch": 0.69, "grad_norm": 3.041975498199463, "learning_rate": 4.7472251519252254e-06, "loss": 0.6444, "step": 26755 }, { "epoch": 0.69, "grad_norm": 1.361854910850525, "learning_rate": 4.746518775729622e-06, "loss": 0.4746, "step": 26756 }, { "epoch": 0.69, "grad_norm": 1.645812749862671, "learning_rate": 4.74581243573771e-06, "loss": 0.5186, "step": 26757 }, { "epoch": 0.69, "grad_norm": 1.568484902381897, "learning_rate": 4.745106131954355e-06, "loss": 0.5449, "step": 26758 }, { "epoch": 0.69, "grad_norm": 0.986395537853241, "learning_rate": 4.74439986438443e-06, "loss": 0.4456, "step": 26759 }, { "epoch": 0.69, "grad_norm": 1.4688537120819092, "learning_rate": 4.7436936330328e-06, "loss": 0.6424, "step": 26760 }, { "epoch": 0.69, "grad_norm": 2.051201105117798, "learning_rate": 4.7429874379043316e-06, "loss": 0.5316, "step": 26761 }, { "epoch": 0.69, "grad_norm": 3.8956210613250732, "learning_rate": 4.742281279003886e-06, "loss": 0.6351, "step": 26762 }, { "epoch": 0.69, "grad_norm": 0.9895381927490234, "learning_rate": 4.741575156336339e-06, "loss": 0.5733, "step": 26763 }, { "epoch": 0.69, "grad_norm": 9.177966117858887, "learning_rate": 4.7408690699065515e-06, "loss": 0.3827, "step": 26764 }, { "epoch": 0.69, "grad_norm": 1.5827982425689697, "learning_rate": 4.740163019719392e-06, "loss": 0.6517, "step": 26765 }, { "epoch": 0.69, "grad_norm": 1.80776047706604, "learning_rate": 4.7394570057797225e-06, "loss": 0.4448, "step": 26766 }, { "epoch": 0.69, "grad_norm": 1.4830806255340576, "learning_rate": 4.738751028092412e-06, "loss": 0.4799, "step": 26767 }, { "epoch": 0.69, "grad_norm": 0.9946598410606384, "learning_rate": 4.738045086662319e-06, "loss": 0.5568, "step": 26768 }, { "epoch": 0.69, "grad_norm": 1.7525259256362915, "learning_rate": 4.737339181494318e-06, "loss": 0.5209, "step": 26769 }, { "epoch": 0.69, "grad_norm": 2.5638747215270996, "learning_rate": 4.736633312593268e-06, "loss": 0.5773, "step": 26770 }, { "epoch": 0.69, "grad_norm": 1.8891375064849854, "learning_rate": 4.735927479964032e-06, "loss": 0.5742, "step": 26771 }, { "epoch": 0.69, "grad_norm": 1.361457347869873, "learning_rate": 4.73522168361148e-06, "loss": 0.7163, "step": 26772 }, { "epoch": 0.69, "grad_norm": 2.2719619274139404, "learning_rate": 4.734515923540472e-06, "loss": 0.5223, "step": 26773 }, { "epoch": 0.69, "grad_norm": 1.8675258159637451, "learning_rate": 4.733810199755872e-06, "loss": 0.322, "step": 26774 }, { "epoch": 0.69, "grad_norm": 1.5906403064727783, "learning_rate": 4.733104512262541e-06, "loss": 0.5647, "step": 26775 }, { "epoch": 0.69, "grad_norm": 1.1377712488174438, "learning_rate": 4.732398861065347e-06, "loss": 0.6818, "step": 26776 }, { "epoch": 0.69, "grad_norm": 1.2738043069839478, "learning_rate": 4.731693246169151e-06, "loss": 0.4801, "step": 26777 }, { "epoch": 0.69, "grad_norm": 1.158197045326233, "learning_rate": 4.730987667578811e-06, "loss": 0.4413, "step": 26778 }, { "epoch": 0.69, "grad_norm": 1.1475486755371094, "learning_rate": 4.7302821252992e-06, "loss": 0.4685, "step": 26779 }, { "epoch": 0.69, "grad_norm": 1.6049009561538696, "learning_rate": 4.729576619335171e-06, "loss": 0.5424, "step": 26780 }, { "epoch": 0.69, "grad_norm": 9.836336135864258, "learning_rate": 4.72887114969159e-06, "loss": 0.6183, "step": 26781 }, { "epoch": 0.69, "grad_norm": 1.2269439697265625, "learning_rate": 4.728165716373313e-06, "loss": 0.5523, "step": 26782 }, { "epoch": 0.69, "grad_norm": 1.1804760694503784, "learning_rate": 4.72746031938521e-06, "loss": 0.5336, "step": 26783 }, { "epoch": 0.69, "grad_norm": 7.264609336853027, "learning_rate": 4.7267549587321385e-06, "loss": 0.5681, "step": 26784 }, { "epoch": 0.69, "grad_norm": 1.379179835319519, "learning_rate": 4.726049634418953e-06, "loss": 0.5733, "step": 26785 }, { "epoch": 0.69, "grad_norm": 4.013524055480957, "learning_rate": 4.725344346450526e-06, "loss": 0.4617, "step": 26786 }, { "epoch": 0.69, "grad_norm": 5.730611324310303, "learning_rate": 4.724639094831711e-06, "loss": 0.5409, "step": 26787 }, { "epoch": 0.69, "grad_norm": 1.2622932195663452, "learning_rate": 4.723933879567365e-06, "loss": 0.4788, "step": 26788 }, { "epoch": 0.69, "grad_norm": 1.9768978357315063, "learning_rate": 4.723228700662356e-06, "loss": 0.5352, "step": 26789 }, { "epoch": 0.69, "grad_norm": 1.5784519910812378, "learning_rate": 4.72252355812154e-06, "loss": 0.4474, "step": 26790 }, { "epoch": 0.69, "grad_norm": 1.9811153411865234, "learning_rate": 4.721818451949776e-06, "loss": 0.6706, "step": 26791 }, { "epoch": 0.69, "grad_norm": 1.0944921970367432, "learning_rate": 4.721113382151919e-06, "loss": 0.4341, "step": 26792 }, { "epoch": 0.69, "grad_norm": 1.2075891494750977, "learning_rate": 4.720408348732836e-06, "loss": 0.3924, "step": 26793 }, { "epoch": 0.69, "grad_norm": 2.531052589416504, "learning_rate": 4.719703351697381e-06, "loss": 0.5418, "step": 26794 }, { "epoch": 0.69, "grad_norm": 1.6745049953460693, "learning_rate": 4.71899839105041e-06, "loss": 0.6976, "step": 26795 }, { "epoch": 0.69, "grad_norm": 1.0822910070419312, "learning_rate": 4.718293466796789e-06, "loss": 0.4822, "step": 26796 }, { "epoch": 0.69, "grad_norm": 1.3578428030014038, "learning_rate": 4.717588578941369e-06, "loss": 0.6111, "step": 26797 }, { "epoch": 0.69, "grad_norm": 2.0019309520721436, "learning_rate": 4.716883727489011e-06, "loss": 0.58, "step": 26798 }, { "epoch": 0.69, "grad_norm": 1.343830943107605, "learning_rate": 4.716178912444568e-06, "loss": 0.611, "step": 26799 }, { "epoch": 0.69, "grad_norm": 2.0868356227874756, "learning_rate": 4.715474133812903e-06, "loss": 0.5901, "step": 26800 }, { "epoch": 0.69, "grad_norm": 5.564807415008545, "learning_rate": 4.71476939159887e-06, "loss": 0.6378, "step": 26801 }, { "epoch": 0.69, "grad_norm": 5.259405136108398, "learning_rate": 4.714064685807322e-06, "loss": 0.5466, "step": 26802 }, { "epoch": 0.69, "grad_norm": 7.261561870574951, "learning_rate": 4.713360016443125e-06, "loss": 0.5974, "step": 26803 }, { "epoch": 0.69, "grad_norm": 1.3491204977035522, "learning_rate": 4.712655383511128e-06, "loss": 0.5644, "step": 26804 }, { "epoch": 0.69, "grad_norm": 6.7727742195129395, "learning_rate": 4.711950787016187e-06, "loss": 0.6727, "step": 26805 }, { "epoch": 0.69, "grad_norm": 2.148261785507202, "learning_rate": 4.711246226963157e-06, "loss": 0.5237, "step": 26806 }, { "epoch": 0.69, "grad_norm": 1.5436854362487793, "learning_rate": 4.710541703356899e-06, "loss": 0.573, "step": 26807 }, { "epoch": 0.69, "grad_norm": 1.5687813758850098, "learning_rate": 4.7098372162022635e-06, "loss": 0.6164, "step": 26808 }, { "epoch": 0.69, "grad_norm": 2.436171531677246, "learning_rate": 4.709132765504103e-06, "loss": 0.5599, "step": 26809 }, { "epoch": 0.69, "grad_norm": 1.2581781148910522, "learning_rate": 4.708428351267279e-06, "loss": 0.3501, "step": 26810 }, { "epoch": 0.69, "grad_norm": 1.4241682291030884, "learning_rate": 4.707723973496642e-06, "loss": 0.5235, "step": 26811 }, { "epoch": 0.69, "grad_norm": 1.5111472606658936, "learning_rate": 4.707019632197043e-06, "loss": 0.5051, "step": 26812 }, { "epoch": 0.69, "grad_norm": 2.1979219913482666, "learning_rate": 4.706315327373343e-06, "loss": 0.5147, "step": 26813 }, { "epoch": 0.69, "grad_norm": 1.1189323663711548, "learning_rate": 4.705611059030393e-06, "loss": 0.5143, "step": 26814 }, { "epoch": 0.69, "grad_norm": 1.5728837251663208, "learning_rate": 4.704906827173044e-06, "loss": 0.5168, "step": 26815 }, { "epoch": 0.69, "grad_norm": 1.6568248271942139, "learning_rate": 4.704202631806147e-06, "loss": 0.427, "step": 26816 }, { "epoch": 0.69, "grad_norm": 3.6283717155456543, "learning_rate": 4.703498472934562e-06, "loss": 0.5783, "step": 26817 }, { "epoch": 0.69, "grad_norm": 1.5379730463027954, "learning_rate": 4.702794350563138e-06, "loss": 0.5392, "step": 26818 }, { "epoch": 0.69, "grad_norm": 1.3079026937484741, "learning_rate": 4.702090264696724e-06, "loss": 0.6486, "step": 26819 }, { "epoch": 0.69, "grad_norm": 1.3390769958496094, "learning_rate": 4.701386215340179e-06, "loss": 0.5476, "step": 26820 }, { "epoch": 0.69, "grad_norm": 1.6645288467407227, "learning_rate": 4.700682202498351e-06, "loss": 0.5356, "step": 26821 }, { "epoch": 0.69, "grad_norm": 7.090505123138428, "learning_rate": 4.699978226176092e-06, "loss": 0.6425, "step": 26822 }, { "epoch": 0.69, "grad_norm": 1.1159520149230957, "learning_rate": 4.6992742863782495e-06, "loss": 0.4316, "step": 26823 }, { "epoch": 0.69, "grad_norm": 1.9392491579055786, "learning_rate": 4.6985703831096815e-06, "loss": 0.4938, "step": 26824 }, { "epoch": 0.69, "grad_norm": 1.5151232481002808, "learning_rate": 4.697866516375237e-06, "loss": 0.4411, "step": 26825 }, { "epoch": 0.69, "grad_norm": 1.1131306886672974, "learning_rate": 4.697162686179762e-06, "loss": 0.546, "step": 26826 }, { "epoch": 0.69, "grad_norm": 2.056385040283203, "learning_rate": 4.696458892528112e-06, "loss": 0.4283, "step": 26827 }, { "epoch": 0.69, "grad_norm": 1.2231497764587402, "learning_rate": 4.695755135425136e-06, "loss": 0.3593, "step": 26828 }, { "epoch": 0.69, "grad_norm": 1.0990618467330933, "learning_rate": 4.695051414875683e-06, "loss": 0.3969, "step": 26829 }, { "epoch": 0.69, "grad_norm": 1.198479175567627, "learning_rate": 4.6943477308846e-06, "loss": 0.5432, "step": 26830 }, { "epoch": 0.69, "grad_norm": 2.4547159671783447, "learning_rate": 4.693644083456742e-06, "loss": 0.6602, "step": 26831 }, { "epoch": 0.69, "grad_norm": 2.18147611618042, "learning_rate": 4.692940472596955e-06, "loss": 0.6276, "step": 26832 }, { "epoch": 0.69, "grad_norm": 1.0432325601577759, "learning_rate": 4.692236898310086e-06, "loss": 0.5482, "step": 26833 }, { "epoch": 0.69, "grad_norm": 1.6218187808990479, "learning_rate": 4.6915333606009874e-06, "loss": 0.5536, "step": 26834 }, { "epoch": 0.69, "grad_norm": 1.9433051347732544, "learning_rate": 4.690829859474507e-06, "loss": 0.7049, "step": 26835 }, { "epoch": 0.69, "grad_norm": 3.2280399799346924, "learning_rate": 4.690126394935487e-06, "loss": 0.5391, "step": 26836 }, { "epoch": 0.69, "grad_norm": 1.7003936767578125, "learning_rate": 4.689422966988784e-06, "loss": 0.5239, "step": 26837 }, { "epoch": 0.69, "grad_norm": 6.264966011047363, "learning_rate": 4.6887195756392414e-06, "loss": 0.6337, "step": 26838 }, { "epoch": 0.69, "grad_norm": 1.8199421167373657, "learning_rate": 4.688016220891707e-06, "loss": 0.5407, "step": 26839 }, { "epoch": 0.69, "grad_norm": 1.5303822755813599, "learning_rate": 4.687312902751023e-06, "loss": 0.5077, "step": 26840 }, { "epoch": 0.69, "grad_norm": 1.0595697164535522, "learning_rate": 4.686609621222046e-06, "loss": 0.3469, "step": 26841 }, { "epoch": 0.69, "grad_norm": 0.8064414858818054, "learning_rate": 4.685906376309616e-06, "loss": 0.3122, "step": 26842 }, { "epoch": 0.69, "grad_norm": 5.2429399490356445, "learning_rate": 4.685203168018577e-06, "loss": 0.4214, "step": 26843 }, { "epoch": 0.69, "grad_norm": 1.267216682434082, "learning_rate": 4.684499996353783e-06, "loss": 0.373, "step": 26844 }, { "epoch": 0.69, "grad_norm": 1.0964847803115845, "learning_rate": 4.683796861320075e-06, "loss": 0.4348, "step": 26845 }, { "epoch": 0.69, "grad_norm": 3.8139069080352783, "learning_rate": 4.683093762922299e-06, "loss": 0.6983, "step": 26846 }, { "epoch": 0.69, "grad_norm": 1.2193151712417603, "learning_rate": 4.682390701165295e-06, "loss": 0.4501, "step": 26847 }, { "epoch": 0.69, "grad_norm": 1.415995478630066, "learning_rate": 4.68168767605392e-06, "loss": 0.3561, "step": 26848 }, { "epoch": 0.69, "grad_norm": 1.5505447387695312, "learning_rate": 4.680984687593011e-06, "loss": 0.4484, "step": 26849 }, { "epoch": 0.69, "grad_norm": 1.4272807836532593, "learning_rate": 4.6802817357874105e-06, "loss": 0.5812, "step": 26850 }, { "epoch": 0.69, "grad_norm": 2.0308215618133545, "learning_rate": 4.67957882064197e-06, "loss": 0.5311, "step": 26851 }, { "epoch": 0.69, "grad_norm": 1.489296793937683, "learning_rate": 4.67887594216153e-06, "loss": 0.5204, "step": 26852 }, { "epoch": 0.69, "grad_norm": 1.9932953119277954, "learning_rate": 4.678173100350928e-06, "loss": 0.6239, "step": 26853 }, { "epoch": 0.69, "grad_norm": 1.4389971494674683, "learning_rate": 4.6774702952150185e-06, "loss": 0.5602, "step": 26854 }, { "epoch": 0.69, "grad_norm": 3.7549703121185303, "learning_rate": 4.676767526758641e-06, "loss": 0.6705, "step": 26855 }, { "epoch": 0.69, "grad_norm": 1.449095606803894, "learning_rate": 4.676064794986636e-06, "loss": 0.5761, "step": 26856 }, { "epoch": 0.69, "grad_norm": 1.597586750984192, "learning_rate": 4.675362099903844e-06, "loss": 0.6137, "step": 26857 }, { "epoch": 0.69, "grad_norm": 1.6921147108078003, "learning_rate": 4.674659441515115e-06, "loss": 0.6575, "step": 26858 }, { "epoch": 0.69, "grad_norm": 1.606382131576538, "learning_rate": 4.673956819825287e-06, "loss": 0.5063, "step": 26859 }, { "epoch": 0.69, "grad_norm": 5.729389667510986, "learning_rate": 4.6732542348392e-06, "loss": 0.5563, "step": 26860 }, { "epoch": 0.69, "grad_norm": 1.3806263208389282, "learning_rate": 4.672551686561701e-06, "loss": 0.5957, "step": 26861 }, { "epoch": 0.69, "grad_norm": 1.355525016784668, "learning_rate": 4.671849174997628e-06, "loss": 0.5041, "step": 26862 }, { "epoch": 0.69, "grad_norm": 1.4333720207214355, "learning_rate": 4.671146700151824e-06, "loss": 0.6407, "step": 26863 }, { "epoch": 0.69, "grad_norm": 3.8670055866241455, "learning_rate": 4.670444262029126e-06, "loss": 0.5645, "step": 26864 }, { "epoch": 0.69, "grad_norm": 4.529366493225098, "learning_rate": 4.66974186063438e-06, "loss": 0.5002, "step": 26865 }, { "epoch": 0.69, "grad_norm": 1.5391231775283813, "learning_rate": 4.669039495972425e-06, "loss": 0.713, "step": 26866 }, { "epoch": 0.69, "grad_norm": 1.8659898042678833, "learning_rate": 4.668337168048097e-06, "loss": 0.5829, "step": 26867 }, { "epoch": 0.69, "grad_norm": 1.6209110021591187, "learning_rate": 4.667634876866243e-06, "loss": 0.4718, "step": 26868 }, { "epoch": 0.69, "grad_norm": 1.4536793231964111, "learning_rate": 4.6669326224317e-06, "loss": 0.4615, "step": 26869 }, { "epoch": 0.69, "grad_norm": 4.881758213043213, "learning_rate": 4.666230404749306e-06, "loss": 0.5514, "step": 26870 }, { "epoch": 0.69, "grad_norm": 0.9197216629981995, "learning_rate": 4.665528223823897e-06, "loss": 0.4679, "step": 26871 }, { "epoch": 0.69, "grad_norm": 0.9997852444648743, "learning_rate": 4.664826079660321e-06, "loss": 0.5026, "step": 26872 }, { "epoch": 0.69, "grad_norm": 2.2659146785736084, "learning_rate": 4.664123972263411e-06, "loss": 0.5714, "step": 26873 }, { "epoch": 0.69, "grad_norm": 7.039226531982422, "learning_rate": 4.663421901638002e-06, "loss": 0.515, "step": 26874 }, { "epoch": 0.69, "grad_norm": 1.3142876625061035, "learning_rate": 4.662719867788942e-06, "loss": 0.3891, "step": 26875 }, { "epoch": 0.69, "grad_norm": 1.3181514739990234, "learning_rate": 4.662017870721062e-06, "loss": 0.5609, "step": 26876 }, { "epoch": 0.69, "grad_norm": 1.961215853691101, "learning_rate": 4.661315910439197e-06, "loss": 0.5081, "step": 26877 }, { "epoch": 0.69, "grad_norm": 1.571776032447815, "learning_rate": 4.660613986948194e-06, "loss": 0.605, "step": 26878 }, { "epoch": 0.69, "grad_norm": 1.155824065208435, "learning_rate": 4.659912100252884e-06, "loss": 0.4098, "step": 26879 }, { "epoch": 0.69, "grad_norm": 1.893677830696106, "learning_rate": 4.6592102503581034e-06, "loss": 0.7902, "step": 26880 }, { "epoch": 0.69, "grad_norm": 1.0547890663146973, "learning_rate": 4.658508437268689e-06, "loss": 0.5196, "step": 26881 }, { "epoch": 0.69, "grad_norm": 4.0170745849609375, "learning_rate": 4.6578066609894816e-06, "loss": 0.6674, "step": 26882 }, { "epoch": 0.69, "grad_norm": 1.3389699459075928, "learning_rate": 4.657104921525314e-06, "loss": 0.6271, "step": 26883 }, { "epoch": 0.69, "grad_norm": 3.2335219383239746, "learning_rate": 4.656403218881019e-06, "loss": 0.5711, "step": 26884 }, { "epoch": 0.69, "grad_norm": 1.504478931427002, "learning_rate": 4.655701553061439e-06, "loss": 0.5066, "step": 26885 }, { "epoch": 0.69, "grad_norm": 4.93816614151001, "learning_rate": 4.654999924071407e-06, "loss": 0.9246, "step": 26886 }, { "epoch": 0.69, "grad_norm": 1.6212937831878662, "learning_rate": 4.6542983319157574e-06, "loss": 0.6021, "step": 26887 }, { "epoch": 0.69, "grad_norm": 6.901477336883545, "learning_rate": 4.6535967765993215e-06, "loss": 0.4186, "step": 26888 }, { "epoch": 0.69, "grad_norm": 1.0810662508010864, "learning_rate": 4.652895258126942e-06, "loss": 0.5486, "step": 26889 }, { "epoch": 0.69, "grad_norm": 23.973270416259766, "learning_rate": 4.652193776503447e-06, "loss": 0.7509, "step": 26890 }, { "epoch": 0.69, "grad_norm": 16.93170738220215, "learning_rate": 4.651492331733672e-06, "loss": 0.5398, "step": 26891 }, { "epoch": 0.69, "grad_norm": 1.166617512702942, "learning_rate": 4.650790923822453e-06, "loss": 0.5011, "step": 26892 }, { "epoch": 0.69, "grad_norm": 1.460824966430664, "learning_rate": 4.650089552774623e-06, "loss": 0.562, "step": 26893 }, { "epoch": 0.69, "grad_norm": 4.060929298400879, "learning_rate": 4.64938821859501e-06, "loss": 0.6043, "step": 26894 }, { "epoch": 0.69, "grad_norm": 1.8493410348892212, "learning_rate": 4.648686921288457e-06, "loss": 0.6308, "step": 26895 }, { "epoch": 0.69, "grad_norm": 2.0978033542633057, "learning_rate": 4.64798566085979e-06, "loss": 0.5961, "step": 26896 }, { "epoch": 0.69, "grad_norm": 1.3267157077789307, "learning_rate": 4.647284437313845e-06, "loss": 0.5307, "step": 26897 }, { "epoch": 0.69, "grad_norm": 0.7327864170074463, "learning_rate": 4.646583250655447e-06, "loss": 0.3587, "step": 26898 }, { "epoch": 0.69, "grad_norm": 1.606715440750122, "learning_rate": 4.645882100889439e-06, "loss": 0.5331, "step": 26899 }, { "epoch": 0.69, "grad_norm": 1.477725625038147, "learning_rate": 4.645180988020649e-06, "loss": 0.5584, "step": 26900 }, { "epoch": 0.69, "grad_norm": 1.011295199394226, "learning_rate": 4.644479912053902e-06, "loss": 0.4313, "step": 26901 }, { "epoch": 0.69, "grad_norm": 1.494045376777649, "learning_rate": 4.643778872994039e-06, "loss": 0.535, "step": 26902 }, { "epoch": 0.69, "grad_norm": 2.0204007625579834, "learning_rate": 4.643077870845887e-06, "loss": 0.504, "step": 26903 }, { "epoch": 0.69, "grad_norm": 4.418331623077393, "learning_rate": 4.642376905614276e-06, "loss": 0.5706, "step": 26904 }, { "epoch": 0.69, "grad_norm": 1.4708303213119507, "learning_rate": 4.641675977304034e-06, "loss": 0.6217, "step": 26905 }, { "epoch": 0.69, "grad_norm": 2.6061301231384277, "learning_rate": 4.64097508592e-06, "loss": 0.718, "step": 26906 }, { "epoch": 0.69, "grad_norm": 1.5256168842315674, "learning_rate": 4.640274231466997e-06, "loss": 0.5745, "step": 26907 }, { "epoch": 0.69, "grad_norm": 2.309983015060425, "learning_rate": 4.639573413949854e-06, "loss": 0.7044, "step": 26908 }, { "epoch": 0.69, "grad_norm": 3.144331932067871, "learning_rate": 4.638872633373406e-06, "loss": 0.5313, "step": 26909 }, { "epoch": 0.69, "grad_norm": 2.0205888748168945, "learning_rate": 4.63817188974248e-06, "loss": 0.5754, "step": 26910 }, { "epoch": 0.69, "grad_norm": 1.5963202714920044, "learning_rate": 4.637471183061905e-06, "loss": 0.4986, "step": 26911 }, { "epoch": 0.69, "grad_norm": 2.406731605529785, "learning_rate": 4.636770513336506e-06, "loss": 0.5975, "step": 26912 }, { "epoch": 0.69, "grad_norm": 2.0407557487487793, "learning_rate": 4.636069880571118e-06, "loss": 0.6216, "step": 26913 }, { "epoch": 0.69, "grad_norm": 1.3609312772750854, "learning_rate": 4.635369284770567e-06, "loss": 0.6755, "step": 26914 }, { "epoch": 0.69, "grad_norm": 1.6091704368591309, "learning_rate": 4.634668725939677e-06, "loss": 0.4415, "step": 26915 }, { "epoch": 0.69, "grad_norm": 1.8896666765213013, "learning_rate": 4.633968204083282e-06, "loss": 0.4981, "step": 26916 }, { "epoch": 0.69, "grad_norm": 1.0398333072662354, "learning_rate": 4.633267719206208e-06, "loss": 0.3669, "step": 26917 }, { "epoch": 0.69, "grad_norm": 1.7114802598953247, "learning_rate": 4.6325672713132776e-06, "loss": 0.5694, "step": 26918 }, { "epoch": 0.69, "grad_norm": 1.6188563108444214, "learning_rate": 4.6318668604093246e-06, "loss": 0.4689, "step": 26919 }, { "epoch": 0.69, "grad_norm": 1.1074949502944946, "learning_rate": 4.631166486499172e-06, "loss": 0.4829, "step": 26920 }, { "epoch": 0.69, "grad_norm": 1.34575617313385, "learning_rate": 4.630466149587648e-06, "loss": 0.3249, "step": 26921 }, { "epoch": 0.69, "grad_norm": 1.025359869003296, "learning_rate": 4.629765849679574e-06, "loss": 0.5143, "step": 26922 }, { "epoch": 0.69, "grad_norm": 1.8550527095794678, "learning_rate": 4.6290655867797825e-06, "loss": 0.5502, "step": 26923 }, { "epoch": 0.69, "grad_norm": 1.7115333080291748, "learning_rate": 4.628365360893098e-06, "loss": 0.3892, "step": 26924 }, { "epoch": 0.69, "grad_norm": 1.451464056968689, "learning_rate": 4.6276651720243395e-06, "loss": 0.5344, "step": 26925 }, { "epoch": 0.69, "grad_norm": 1.3190624713897705, "learning_rate": 4.6269650201783424e-06, "loss": 0.5572, "step": 26926 }, { "epoch": 0.69, "grad_norm": 1.5250461101531982, "learning_rate": 4.6262649053599265e-06, "loss": 0.5697, "step": 26927 }, { "epoch": 0.69, "grad_norm": 1.3217183351516724, "learning_rate": 4.625564827573916e-06, "loss": 0.5624, "step": 26928 }, { "epoch": 0.69, "grad_norm": 2.4200916290283203, "learning_rate": 4.624864786825133e-06, "loss": 0.4325, "step": 26929 }, { "epoch": 0.69, "grad_norm": 1.7714715003967285, "learning_rate": 4.624164783118408e-06, "loss": 0.5717, "step": 26930 }, { "epoch": 0.69, "grad_norm": 2.2075226306915283, "learning_rate": 4.623464816458563e-06, "loss": 0.5591, "step": 26931 }, { "epoch": 0.69, "grad_norm": 4.65008544921875, "learning_rate": 4.622764886850416e-06, "loss": 0.6397, "step": 26932 }, { "epoch": 0.69, "grad_norm": 1.2769328355789185, "learning_rate": 4.622064994298799e-06, "loss": 0.518, "step": 26933 }, { "epoch": 0.69, "grad_norm": 1.7840262651443481, "learning_rate": 4.621365138808531e-06, "loss": 0.535, "step": 26934 }, { "epoch": 0.69, "grad_norm": 1.8020431995391846, "learning_rate": 4.620665320384432e-06, "loss": 0.5534, "step": 26935 }, { "epoch": 0.69, "grad_norm": 8.441299438476562, "learning_rate": 4.619965539031333e-06, "loss": 0.5006, "step": 26936 }, { "epoch": 0.69, "grad_norm": 2.1937167644500732, "learning_rate": 4.619265794754048e-06, "loss": 0.5482, "step": 26937 }, { "epoch": 0.69, "grad_norm": 2.101213216781616, "learning_rate": 4.618566087557406e-06, "loss": 0.4876, "step": 26938 }, { "epoch": 0.69, "grad_norm": 3.4068009853363037, "learning_rate": 4.61786641744622e-06, "loss": 0.6426, "step": 26939 }, { "epoch": 0.69, "grad_norm": 1.8271548748016357, "learning_rate": 4.617166784425322e-06, "loss": 0.4378, "step": 26940 }, { "epoch": 0.69, "grad_norm": 1.529299020767212, "learning_rate": 4.616467188499528e-06, "loss": 0.5507, "step": 26941 }, { "epoch": 0.69, "grad_norm": 2.583700656890869, "learning_rate": 4.6157676296736564e-06, "loss": 0.6954, "step": 26942 }, { "epoch": 0.69, "grad_norm": 8.336648941040039, "learning_rate": 4.6150681079525365e-06, "loss": 0.6648, "step": 26943 }, { "epoch": 0.69, "grad_norm": 3.7749767303466797, "learning_rate": 4.614368623340982e-06, "loss": 0.7121, "step": 26944 }, { "epoch": 0.69, "grad_norm": 4.069551944732666, "learning_rate": 4.613669175843817e-06, "loss": 0.5145, "step": 26945 }, { "epoch": 0.69, "grad_norm": 1.6536723375320435, "learning_rate": 4.612969765465855e-06, "loss": 0.5112, "step": 26946 }, { "epoch": 0.69, "grad_norm": 1.2965949773788452, "learning_rate": 4.612270392211925e-06, "loss": 0.5239, "step": 26947 }, { "epoch": 0.69, "grad_norm": 3.1398704051971436, "learning_rate": 4.611571056086843e-06, "loss": 0.5074, "step": 26948 }, { "epoch": 0.69, "grad_norm": 1.4859322309494019, "learning_rate": 4.610871757095424e-06, "loss": 0.5768, "step": 26949 }, { "epoch": 0.69, "grad_norm": 1.4470313787460327, "learning_rate": 4.610172495242494e-06, "loss": 0.4383, "step": 26950 }, { "epoch": 0.69, "grad_norm": 1.9165802001953125, "learning_rate": 4.6094732705328704e-06, "loss": 0.5164, "step": 26951 }, { "epoch": 0.69, "grad_norm": 1.1984059810638428, "learning_rate": 4.608774082971369e-06, "loss": 0.471, "step": 26952 }, { "epoch": 0.69, "grad_norm": 1.7502057552337646, "learning_rate": 4.608074932562807e-06, "loss": 0.4946, "step": 26953 }, { "epoch": 0.69, "grad_norm": 1.6672513484954834, "learning_rate": 4.607375819312008e-06, "loss": 0.6052, "step": 26954 }, { "epoch": 0.69, "grad_norm": 1.5978784561157227, "learning_rate": 4.606676743223787e-06, "loss": 0.4009, "step": 26955 }, { "epoch": 0.69, "grad_norm": 1.6397311687469482, "learning_rate": 4.6059777043029575e-06, "loss": 0.5957, "step": 26956 }, { "epoch": 0.69, "grad_norm": 1.9286494255065918, "learning_rate": 4.605278702554346e-06, "loss": 0.6786, "step": 26957 }, { "epoch": 0.69, "grad_norm": 0.9275033473968506, "learning_rate": 4.604579737982763e-06, "loss": 0.3794, "step": 26958 }, { "epoch": 0.69, "grad_norm": 1.4808650016784668, "learning_rate": 4.603880810593022e-06, "loss": 0.5126, "step": 26959 }, { "epoch": 0.69, "grad_norm": 2.160820484161377, "learning_rate": 4.60318192038995e-06, "loss": 0.5241, "step": 26960 }, { "epoch": 0.69, "grad_norm": 1.4025235176086426, "learning_rate": 4.602483067378357e-06, "loss": 0.6067, "step": 26961 }, { "epoch": 0.69, "grad_norm": 6.355547904968262, "learning_rate": 4.6017842515630595e-06, "loss": 0.7259, "step": 26962 }, { "epoch": 0.69, "grad_norm": 2.087618589401245, "learning_rate": 4.60108547294887e-06, "loss": 0.6268, "step": 26963 }, { "epoch": 0.69, "grad_norm": 2.0858314037323, "learning_rate": 4.600386731540611e-06, "loss": 0.5878, "step": 26964 }, { "epoch": 0.69, "grad_norm": 1.7466455698013306, "learning_rate": 4.599688027343093e-06, "loss": 0.6243, "step": 26965 }, { "epoch": 0.69, "grad_norm": 1.1705970764160156, "learning_rate": 4.59898936036113e-06, "loss": 0.3913, "step": 26966 }, { "epoch": 0.69, "grad_norm": 1.4888136386871338, "learning_rate": 4.598290730599543e-06, "loss": 0.488, "step": 26967 }, { "epoch": 0.69, "grad_norm": 1.830718994140625, "learning_rate": 4.597592138063142e-06, "loss": 0.655, "step": 26968 }, { "epoch": 0.69, "grad_norm": 1.1237661838531494, "learning_rate": 4.5968935827567416e-06, "loss": 0.3525, "step": 26969 }, { "epoch": 0.69, "grad_norm": 1.4048492908477783, "learning_rate": 4.596195064685153e-06, "loss": 0.4275, "step": 26970 }, { "epoch": 0.69, "grad_norm": 7.343623638153076, "learning_rate": 4.595496583853196e-06, "loss": 0.5, "step": 26971 }, { "epoch": 0.69, "grad_norm": 4.911203861236572, "learning_rate": 4.594798140265681e-06, "loss": 0.5377, "step": 26972 }, { "epoch": 0.69, "grad_norm": 1.9554764032363892, "learning_rate": 4.594099733927419e-06, "loss": 0.5581, "step": 26973 }, { "epoch": 0.69, "grad_norm": 2.0915706157684326, "learning_rate": 4.593401364843228e-06, "loss": 0.4513, "step": 26974 }, { "epoch": 0.69, "grad_norm": 2.0076589584350586, "learning_rate": 4.592703033017919e-06, "loss": 0.509, "step": 26975 }, { "epoch": 0.69, "grad_norm": 1.4563379287719727, "learning_rate": 4.592004738456303e-06, "loss": 0.4132, "step": 26976 }, { "epoch": 0.69, "grad_norm": 1.3760329484939575, "learning_rate": 4.591306481163188e-06, "loss": 0.4977, "step": 26977 }, { "epoch": 0.69, "grad_norm": 1.5316582918167114, "learning_rate": 4.590608261143396e-06, "loss": 0.5521, "step": 26978 }, { "epoch": 0.69, "grad_norm": 1.360328197479248, "learning_rate": 4.589910078401731e-06, "loss": 0.5297, "step": 26979 }, { "epoch": 0.69, "grad_norm": 1.8737714290618896, "learning_rate": 4.589211932943005e-06, "loss": 0.5021, "step": 26980 }, { "epoch": 0.69, "grad_norm": 1.7201588153839111, "learning_rate": 4.588513824772034e-06, "loss": 0.626, "step": 26981 }, { "epoch": 0.69, "grad_norm": 0.9365280866622925, "learning_rate": 4.587815753893626e-06, "loss": 0.3668, "step": 26982 }, { "epoch": 0.69, "grad_norm": 0.9995469450950623, "learning_rate": 4.587117720312589e-06, "loss": 0.4908, "step": 26983 }, { "epoch": 0.69, "grad_norm": 1.7460129261016846, "learning_rate": 4.586419724033738e-06, "loss": 0.5867, "step": 26984 }, { "epoch": 0.69, "grad_norm": 2.159701347351074, "learning_rate": 4.585721765061883e-06, "loss": 0.567, "step": 26985 }, { "epoch": 0.69, "grad_norm": 1.2112973928451538, "learning_rate": 4.58502384340183e-06, "loss": 0.4001, "step": 26986 }, { "epoch": 0.69, "grad_norm": 2.200852632522583, "learning_rate": 4.584325959058388e-06, "loss": 0.5354, "step": 26987 }, { "epoch": 0.69, "grad_norm": 1.4146645069122314, "learning_rate": 4.583628112036372e-06, "loss": 0.2931, "step": 26988 }, { "epoch": 0.69, "grad_norm": 0.961999237537384, "learning_rate": 4.582930302340589e-06, "loss": 0.4609, "step": 26989 }, { "epoch": 0.69, "grad_norm": 4.510122299194336, "learning_rate": 4.582232529975843e-06, "loss": 0.749, "step": 26990 }, { "epoch": 0.69, "grad_norm": 2.081475257873535, "learning_rate": 4.5815347949469505e-06, "loss": 0.3708, "step": 26991 }, { "epoch": 0.69, "grad_norm": 4.036730766296387, "learning_rate": 4.580837097258715e-06, "loss": 0.5255, "step": 26992 }, { "epoch": 0.69, "grad_norm": 1.3140628337860107, "learning_rate": 4.580139436915947e-06, "loss": 0.4726, "step": 26993 }, { "epoch": 0.69, "grad_norm": 0.9988855123519897, "learning_rate": 4.579441813923448e-06, "loss": 0.4242, "step": 26994 }, { "epoch": 0.69, "grad_norm": 1.6899946928024292, "learning_rate": 4.578744228286035e-06, "loss": 0.5029, "step": 26995 }, { "epoch": 0.69, "grad_norm": 1.5199980735778809, "learning_rate": 4.57804668000851e-06, "loss": 0.4794, "step": 26996 }, { "epoch": 0.69, "grad_norm": 2.424020290374756, "learning_rate": 4.577349169095678e-06, "loss": 0.5349, "step": 26997 }, { "epoch": 0.69, "grad_norm": 6.746376991271973, "learning_rate": 4.576651695552352e-06, "loss": 0.4953, "step": 26998 }, { "epoch": 0.69, "grad_norm": 1.713990330696106, "learning_rate": 4.575954259383335e-06, "loss": 0.6428, "step": 26999 }, { "epoch": 0.69, "grad_norm": 1.7504494190216064, "learning_rate": 4.57525686059343e-06, "loss": 0.475, "step": 27000 }, { "epoch": 0.69, "grad_norm": 2.020616054534912, "learning_rate": 4.574559499187451e-06, "loss": 0.6259, "step": 27001 }, { "epoch": 0.69, "grad_norm": 1.4152189493179321, "learning_rate": 4.573862175170199e-06, "loss": 0.3817, "step": 27002 }, { "epoch": 0.69, "grad_norm": 2.600146532058716, "learning_rate": 4.57316488854648e-06, "loss": 0.5589, "step": 27003 }, { "epoch": 0.69, "grad_norm": 1.384558916091919, "learning_rate": 4.572467639321094e-06, "loss": 0.6201, "step": 27004 }, { "epoch": 0.69, "grad_norm": 1.303954839706421, "learning_rate": 4.571770427498856e-06, "loss": 0.5062, "step": 27005 }, { "epoch": 0.69, "grad_norm": 1.4140375852584839, "learning_rate": 4.571073253084566e-06, "loss": 0.512, "step": 27006 }, { "epoch": 0.69, "grad_norm": 1.3917419910430908, "learning_rate": 4.5703761160830275e-06, "loss": 0.5132, "step": 27007 }, { "epoch": 0.69, "grad_norm": 1.378238320350647, "learning_rate": 4.569679016499047e-06, "loss": 0.5118, "step": 27008 }, { "epoch": 0.69, "grad_norm": 1.2781386375427246, "learning_rate": 4.568981954337426e-06, "loss": 0.5469, "step": 27009 }, { "epoch": 0.69, "grad_norm": 2.472585439682007, "learning_rate": 4.568284929602965e-06, "loss": 0.6182, "step": 27010 }, { "epoch": 0.69, "grad_norm": 0.8277763724327087, "learning_rate": 4.567587942300476e-06, "loss": 0.41, "step": 27011 }, { "epoch": 0.69, "grad_norm": 2.1738460063934326, "learning_rate": 4.566890992434758e-06, "loss": 0.5493, "step": 27012 }, { "epoch": 0.69, "grad_norm": 0.9641879796981812, "learning_rate": 4.566194080010611e-06, "loss": 0.5104, "step": 27013 }, { "epoch": 0.69, "grad_norm": 1.4973728656768799, "learning_rate": 4.565497205032844e-06, "loss": 0.6493, "step": 27014 }, { "epoch": 0.69, "grad_norm": 1.6074906587600708, "learning_rate": 4.564800367506255e-06, "loss": 0.4476, "step": 27015 }, { "epoch": 0.69, "grad_norm": 1.7380539178848267, "learning_rate": 4.564103567435647e-06, "loss": 0.4734, "step": 27016 }, { "epoch": 0.69, "grad_norm": 1.4267945289611816, "learning_rate": 4.56340680482582e-06, "loss": 0.4227, "step": 27017 }, { "epoch": 0.69, "grad_norm": 2.025158405303955, "learning_rate": 4.5627100796815805e-06, "loss": 0.5163, "step": 27018 }, { "epoch": 0.69, "grad_norm": 2.0690369606018066, "learning_rate": 4.562013392007727e-06, "loss": 0.7502, "step": 27019 }, { "epoch": 0.69, "grad_norm": 1.8776991367340088, "learning_rate": 4.5613167418090566e-06, "loss": 0.5858, "step": 27020 }, { "epoch": 0.69, "grad_norm": 1.6855475902557373, "learning_rate": 4.560620129090378e-06, "loss": 0.4778, "step": 27021 }, { "epoch": 0.69, "grad_norm": 6.720380783081055, "learning_rate": 4.559923553856489e-06, "loss": 0.4913, "step": 27022 }, { "epoch": 0.69, "grad_norm": 1.1668896675109863, "learning_rate": 4.559227016112189e-06, "loss": 0.5253, "step": 27023 }, { "epoch": 0.69, "grad_norm": 2.9113857746124268, "learning_rate": 4.558530515862274e-06, "loss": 0.5185, "step": 27024 }, { "epoch": 0.69, "grad_norm": 3.345595598220825, "learning_rate": 4.557834053111552e-06, "loss": 0.5718, "step": 27025 }, { "epoch": 0.69, "grad_norm": 10.719066619873047, "learning_rate": 4.5571376278648174e-06, "loss": 0.7553, "step": 27026 }, { "epoch": 0.69, "grad_norm": 1.274018406867981, "learning_rate": 4.556441240126868e-06, "loss": 0.6425, "step": 27027 }, { "epoch": 0.69, "grad_norm": 1.071632981300354, "learning_rate": 4.555744889902509e-06, "loss": 0.5299, "step": 27028 }, { "epoch": 0.69, "grad_norm": 1.3874844312667847, "learning_rate": 4.555048577196537e-06, "loss": 0.3695, "step": 27029 }, { "epoch": 0.69, "grad_norm": 2.0868451595306396, "learning_rate": 4.554352302013745e-06, "loss": 0.5947, "step": 27030 }, { "epoch": 0.69, "grad_norm": 1.8310924768447876, "learning_rate": 4.553656064358939e-06, "loss": 0.6016, "step": 27031 }, { "epoch": 0.69, "grad_norm": 1.900046944618225, "learning_rate": 4.552959864236913e-06, "loss": 0.6026, "step": 27032 }, { "epoch": 0.69, "grad_norm": 3.2552239894866943, "learning_rate": 4.552263701652466e-06, "loss": 0.7027, "step": 27033 }, { "epoch": 0.69, "grad_norm": 1.4701330661773682, "learning_rate": 4.551567576610392e-06, "loss": 0.5806, "step": 27034 }, { "epoch": 0.69, "grad_norm": 1.5461338758468628, "learning_rate": 4.550871489115495e-06, "loss": 0.559, "step": 27035 }, { "epoch": 0.69, "grad_norm": 1.5021270513534546, "learning_rate": 4.550175439172567e-06, "loss": 0.594, "step": 27036 }, { "epoch": 0.69, "grad_norm": 1.6990256309509277, "learning_rate": 4.549479426786403e-06, "loss": 0.5289, "step": 27037 }, { "epoch": 0.69, "grad_norm": 1.151620864868164, "learning_rate": 4.5487834519618065e-06, "loss": 0.5669, "step": 27038 }, { "epoch": 0.69, "grad_norm": 2.959289789199829, "learning_rate": 4.548087514703568e-06, "loss": 0.6419, "step": 27039 }, { "epoch": 0.69, "grad_norm": 1.4947770833969116, "learning_rate": 4.547391615016488e-06, "loss": 0.4342, "step": 27040 }, { "epoch": 0.69, "grad_norm": 1.8773040771484375, "learning_rate": 4.5466957529053536e-06, "loss": 0.5297, "step": 27041 }, { "epoch": 0.69, "grad_norm": 1.770060658454895, "learning_rate": 4.54599992837497e-06, "loss": 0.5711, "step": 27042 }, { "epoch": 0.69, "grad_norm": 1.3455256223678589, "learning_rate": 4.5453041414301276e-06, "loss": 0.4686, "step": 27043 }, { "epoch": 0.69, "grad_norm": 0.714022159576416, "learning_rate": 4.54460839207562e-06, "loss": 0.426, "step": 27044 }, { "epoch": 0.69, "grad_norm": 3.8858163356781006, "learning_rate": 4.543912680316246e-06, "loss": 0.6425, "step": 27045 }, { "epoch": 0.69, "grad_norm": 3.2622506618499756, "learning_rate": 4.543217006156799e-06, "loss": 0.4617, "step": 27046 }, { "epoch": 0.69, "grad_norm": 1.2747806310653687, "learning_rate": 4.542521369602072e-06, "loss": 0.4742, "step": 27047 }, { "epoch": 0.69, "grad_norm": 10.107836723327637, "learning_rate": 4.541825770656855e-06, "loss": 0.5865, "step": 27048 }, { "epoch": 0.69, "grad_norm": 1.1361968517303467, "learning_rate": 4.54113020932595e-06, "loss": 0.4896, "step": 27049 }, { "epoch": 0.69, "grad_norm": 2.785224199295044, "learning_rate": 4.540434685614146e-06, "loss": 0.5388, "step": 27050 }, { "epoch": 0.69, "grad_norm": 3.3563036918640137, "learning_rate": 4.5397391995262306e-06, "loss": 0.5141, "step": 27051 }, { "epoch": 0.69, "grad_norm": 2.901754140853882, "learning_rate": 4.539043751067008e-06, "loss": 0.4344, "step": 27052 }, { "epoch": 0.69, "grad_norm": 1.9859551191329956, "learning_rate": 4.5383483402412645e-06, "loss": 0.4717, "step": 27053 }, { "epoch": 0.69, "grad_norm": 1.7310154438018799, "learning_rate": 4.537652967053788e-06, "loss": 0.6193, "step": 27054 }, { "epoch": 0.69, "grad_norm": 1.3809959888458252, "learning_rate": 4.53695763150938e-06, "loss": 0.4953, "step": 27055 }, { "epoch": 0.69, "grad_norm": 1.7404985427856445, "learning_rate": 4.536262333612827e-06, "loss": 0.5705, "step": 27056 }, { "epoch": 0.69, "grad_norm": 1.4059503078460693, "learning_rate": 4.535567073368922e-06, "loss": 0.5062, "step": 27057 }, { "epoch": 0.69, "grad_norm": 1.7862836122512817, "learning_rate": 4.534871850782453e-06, "loss": 0.5528, "step": 27058 }, { "epoch": 0.69, "grad_norm": 1.2502508163452148, "learning_rate": 4.534176665858216e-06, "loss": 0.608, "step": 27059 }, { "epoch": 0.69, "grad_norm": 1.3377070426940918, "learning_rate": 4.533481518601e-06, "loss": 0.3743, "step": 27060 }, { "epoch": 0.69, "grad_norm": 1.4964410066604614, "learning_rate": 4.5327864090155915e-06, "loss": 0.4878, "step": 27061 }, { "epoch": 0.69, "grad_norm": 4.483980655670166, "learning_rate": 4.5320913371067876e-06, "loss": 0.5499, "step": 27062 }, { "epoch": 0.69, "grad_norm": 9.68632984161377, "learning_rate": 4.531396302879375e-06, "loss": 0.6939, "step": 27063 }, { "epoch": 0.69, "grad_norm": 2.173154354095459, "learning_rate": 4.530701306338143e-06, "loss": 0.4647, "step": 27064 }, { "epoch": 0.69, "grad_norm": 1.805662751197815, "learning_rate": 4.530006347487878e-06, "loss": 0.5251, "step": 27065 }, { "epoch": 0.69, "grad_norm": 1.628283977508545, "learning_rate": 4.529311426333377e-06, "loss": 0.5613, "step": 27066 }, { "epoch": 0.69, "grad_norm": 2.1314940452575684, "learning_rate": 4.528616542879424e-06, "loss": 0.6408, "step": 27067 }, { "epoch": 0.69, "grad_norm": 1.2077809572219849, "learning_rate": 4.5279216971308045e-06, "loss": 0.4131, "step": 27068 }, { "epoch": 0.69, "grad_norm": 1.6052061319351196, "learning_rate": 4.527226889092314e-06, "loss": 0.5547, "step": 27069 }, { "epoch": 0.69, "grad_norm": 6.548056125640869, "learning_rate": 4.526532118768738e-06, "loss": 0.6999, "step": 27070 }, { "epoch": 0.69, "grad_norm": 1.2410714626312256, "learning_rate": 4.52583738616486e-06, "loss": 0.4684, "step": 27071 }, { "epoch": 0.69, "grad_norm": 3.206038475036621, "learning_rate": 4.5251426912854756e-06, "loss": 0.3922, "step": 27072 }, { "epoch": 0.69, "grad_norm": 1.2335690259933472, "learning_rate": 4.524448034135368e-06, "loss": 0.4983, "step": 27073 }, { "epoch": 0.69, "grad_norm": 1.5552514791488647, "learning_rate": 4.523753414719325e-06, "loss": 0.4954, "step": 27074 }, { "epoch": 0.69, "grad_norm": 1.6521419286727905, "learning_rate": 4.523058833042129e-06, "loss": 0.6674, "step": 27075 }, { "epoch": 0.69, "grad_norm": 2.530474901199341, "learning_rate": 4.522364289108574e-06, "loss": 0.5246, "step": 27076 }, { "epoch": 0.69, "grad_norm": 1.9254395961761475, "learning_rate": 4.521669782923442e-06, "loss": 0.505, "step": 27077 }, { "epoch": 0.69, "grad_norm": 1.2928133010864258, "learning_rate": 4.520975314491518e-06, "loss": 0.6646, "step": 27078 }, { "epoch": 0.69, "grad_norm": 1.0762046575546265, "learning_rate": 4.5202808838175926e-06, "loss": 0.5055, "step": 27079 }, { "epoch": 0.69, "grad_norm": 1.3998839855194092, "learning_rate": 4.519586490906449e-06, "loss": 0.4938, "step": 27080 }, { "epoch": 0.69, "grad_norm": 1.380614995956421, "learning_rate": 4.518892135762873e-06, "loss": 0.4137, "step": 27081 }, { "epoch": 0.69, "grad_norm": 1.6674220561981201, "learning_rate": 4.518197818391643e-06, "loss": 0.5034, "step": 27082 }, { "epoch": 0.69, "grad_norm": 1.5943537950515747, "learning_rate": 4.517503538797554e-06, "loss": 0.5251, "step": 27083 }, { "epoch": 0.69, "grad_norm": 1.706498384475708, "learning_rate": 4.516809296985387e-06, "loss": 0.6209, "step": 27084 }, { "epoch": 0.69, "grad_norm": 4.841279983520508, "learning_rate": 4.516115092959922e-06, "loss": 0.6754, "step": 27085 }, { "epoch": 0.69, "grad_norm": 1.403490424156189, "learning_rate": 4.5154209267259495e-06, "loss": 0.4579, "step": 27086 }, { "epoch": 0.69, "grad_norm": 2.4497923851013184, "learning_rate": 4.5147267982882506e-06, "loss": 0.6415, "step": 27087 }, { "epoch": 0.69, "grad_norm": 4.476964473724365, "learning_rate": 4.514032707651608e-06, "loss": 0.7224, "step": 27088 }, { "epoch": 0.69, "grad_norm": 5.6472368240356445, "learning_rate": 4.5133386548208015e-06, "loss": 0.6428, "step": 27089 }, { "epoch": 0.69, "grad_norm": 1.5670722723007202, "learning_rate": 4.512644639800622e-06, "loss": 0.5109, "step": 27090 }, { "epoch": 0.69, "grad_norm": 4.4752116203308105, "learning_rate": 4.511950662595847e-06, "loss": 0.587, "step": 27091 }, { "epoch": 0.69, "grad_norm": 1.3160086870193481, "learning_rate": 4.511256723211258e-06, "loss": 0.4819, "step": 27092 }, { "epoch": 0.69, "grad_norm": 1.591059684753418, "learning_rate": 4.510562821651643e-06, "loss": 0.4927, "step": 27093 }, { "epoch": 0.69, "grad_norm": 1.3734854459762573, "learning_rate": 4.509868957921779e-06, "loss": 0.4123, "step": 27094 }, { "epoch": 0.69, "grad_norm": 6.185238838195801, "learning_rate": 4.509175132026447e-06, "loss": 0.5687, "step": 27095 }, { "epoch": 0.69, "grad_norm": 4.103571891784668, "learning_rate": 4.508481343970432e-06, "loss": 0.4638, "step": 27096 }, { "epoch": 0.69, "grad_norm": 2.137096643447876, "learning_rate": 4.507787593758515e-06, "loss": 0.5363, "step": 27097 }, { "epoch": 0.69, "grad_norm": 0.9481900334358215, "learning_rate": 4.507093881395474e-06, "loss": 0.4727, "step": 27098 }, { "epoch": 0.69, "grad_norm": 3.6746792793273926, "learning_rate": 4.506400206886088e-06, "loss": 0.7669, "step": 27099 }, { "epoch": 0.69, "grad_norm": 2.1460561752319336, "learning_rate": 4.505706570235145e-06, "loss": 0.4569, "step": 27100 }, { "epoch": 0.69, "grad_norm": 1.9960523843765259, "learning_rate": 4.505012971447419e-06, "loss": 0.4658, "step": 27101 }, { "epoch": 0.69, "grad_norm": 1.5306981801986694, "learning_rate": 4.504319410527688e-06, "loss": 0.5571, "step": 27102 }, { "epoch": 0.69, "grad_norm": 1.5617398023605347, "learning_rate": 4.5036258874807394e-06, "loss": 0.4972, "step": 27103 }, { "epoch": 0.69, "grad_norm": 1.1834688186645508, "learning_rate": 4.502932402311347e-06, "loss": 0.4751, "step": 27104 }, { "epoch": 0.69, "grad_norm": 1.3256113529205322, "learning_rate": 4.502238955024291e-06, "loss": 0.5408, "step": 27105 }, { "epoch": 0.69, "grad_norm": 1.9755016565322876, "learning_rate": 4.5015455456243475e-06, "loss": 0.5, "step": 27106 }, { "epoch": 0.69, "grad_norm": 1.216858148574829, "learning_rate": 4.5008521741163e-06, "loss": 0.4691, "step": 27107 }, { "epoch": 0.69, "grad_norm": 1.6840606927871704, "learning_rate": 4.500158840504924e-06, "loss": 0.6539, "step": 27108 }, { "epoch": 0.69, "grad_norm": 3.598212242126465, "learning_rate": 4.499465544794996e-06, "loss": 0.6187, "step": 27109 }, { "epoch": 0.69, "grad_norm": 1.9686665534973145, "learning_rate": 4.498772286991298e-06, "loss": 0.7292, "step": 27110 }, { "epoch": 0.69, "grad_norm": 1.6547945737838745, "learning_rate": 4.498079067098606e-06, "loss": 0.622, "step": 27111 }, { "epoch": 0.69, "grad_norm": 5.954325199127197, "learning_rate": 4.4973858851216965e-06, "loss": 0.4656, "step": 27112 }, { "epoch": 0.69, "grad_norm": 3.249037981033325, "learning_rate": 4.496692741065341e-06, "loss": 0.507, "step": 27113 }, { "epoch": 0.69, "grad_norm": 1.2086155414581299, "learning_rate": 4.495999634934328e-06, "loss": 0.6019, "step": 27114 }, { "epoch": 0.69, "grad_norm": 3.1975560188293457, "learning_rate": 4.495306566733426e-06, "loss": 0.5929, "step": 27115 }, { "epoch": 0.69, "grad_norm": 1.160818099975586, "learning_rate": 4.494613536467409e-06, "loss": 0.4702, "step": 27116 }, { "epoch": 0.7, "grad_norm": 5.404753684997559, "learning_rate": 4.493920544141061e-06, "loss": 0.5807, "step": 27117 }, { "epoch": 0.7, "grad_norm": 1.2859042882919312, "learning_rate": 4.493227589759153e-06, "loss": 0.4375, "step": 27118 }, { "epoch": 0.7, "grad_norm": 1.9324363470077515, "learning_rate": 4.492534673326457e-06, "loss": 0.5452, "step": 27119 }, { "epoch": 0.7, "grad_norm": 6.994596004486084, "learning_rate": 4.491841794847755e-06, "loss": 0.6811, "step": 27120 }, { "epoch": 0.7, "grad_norm": 1.2204840183258057, "learning_rate": 4.49114895432782e-06, "loss": 0.4448, "step": 27121 }, { "epoch": 0.7, "grad_norm": 1.4733004570007324, "learning_rate": 4.490456151771425e-06, "loss": 0.5223, "step": 27122 }, { "epoch": 0.7, "grad_norm": 0.992408812046051, "learning_rate": 4.489763387183341e-06, "loss": 0.5063, "step": 27123 }, { "epoch": 0.7, "grad_norm": 1.125799298286438, "learning_rate": 4.489070660568349e-06, "loss": 0.4396, "step": 27124 }, { "epoch": 0.7, "grad_norm": 1.547385334968567, "learning_rate": 4.48837797193122e-06, "loss": 0.4729, "step": 27125 }, { "epoch": 0.7, "grad_norm": 1.5895670652389526, "learning_rate": 4.487685321276725e-06, "loss": 0.7009, "step": 27126 }, { "epoch": 0.7, "grad_norm": 1.562390923500061, "learning_rate": 4.486992708609641e-06, "loss": 0.4622, "step": 27127 }, { "epoch": 0.7, "grad_norm": 13.570178031921387, "learning_rate": 4.486300133934741e-06, "loss": 0.5156, "step": 27128 }, { "epoch": 0.7, "grad_norm": 1.8578534126281738, "learning_rate": 4.485607597256796e-06, "loss": 0.5699, "step": 27129 }, { "epoch": 0.7, "grad_norm": 1.634623646736145, "learning_rate": 4.484915098580575e-06, "loss": 0.4775, "step": 27130 }, { "epoch": 0.7, "grad_norm": 1.5781055688858032, "learning_rate": 4.484222637910859e-06, "loss": 0.6182, "step": 27131 }, { "epoch": 0.7, "grad_norm": 1.0731725692749023, "learning_rate": 4.483530215252414e-06, "loss": 0.4607, "step": 27132 }, { "epoch": 0.7, "grad_norm": 2.1684978008270264, "learning_rate": 4.482837830610011e-06, "loss": 0.4387, "step": 27133 }, { "epoch": 0.7, "grad_norm": 1.2010822296142578, "learning_rate": 4.4821454839884245e-06, "loss": 0.4513, "step": 27134 }, { "epoch": 0.7, "grad_norm": 1.36686372756958, "learning_rate": 4.481453175392427e-06, "loss": 0.4788, "step": 27135 }, { "epoch": 0.7, "grad_norm": 1.4865256547927856, "learning_rate": 4.480760904826782e-06, "loss": 0.5367, "step": 27136 }, { "epoch": 0.7, "grad_norm": 2.4105541706085205, "learning_rate": 4.48006867229627e-06, "loss": 0.4789, "step": 27137 }, { "epoch": 0.7, "grad_norm": 1.694338083267212, "learning_rate": 4.479376477805656e-06, "loss": 0.4806, "step": 27138 }, { "epoch": 0.7, "grad_norm": 1.878801703453064, "learning_rate": 4.478684321359712e-06, "loss": 0.4547, "step": 27139 }, { "epoch": 0.7, "grad_norm": 2.923609495162964, "learning_rate": 4.4779922029632014e-06, "loss": 0.6768, "step": 27140 }, { "epoch": 0.7, "grad_norm": 1.9576174020767212, "learning_rate": 4.4773001226209045e-06, "loss": 0.4589, "step": 27141 }, { "epoch": 0.7, "grad_norm": 1.1285396814346313, "learning_rate": 4.476608080337584e-06, "loss": 0.4689, "step": 27142 }, { "epoch": 0.7, "grad_norm": 1.9489033222198486, "learning_rate": 4.475916076118009e-06, "loss": 0.5745, "step": 27143 }, { "epoch": 0.7, "grad_norm": 2.2631638050079346, "learning_rate": 4.475224109966952e-06, "loss": 0.8292, "step": 27144 }, { "epoch": 0.7, "grad_norm": 4.978315830230713, "learning_rate": 4.4745321818891795e-06, "loss": 0.5156, "step": 27145 }, { "epoch": 0.7, "grad_norm": 2.1174814701080322, "learning_rate": 4.47384029188946e-06, "loss": 0.4644, "step": 27146 }, { "epoch": 0.7, "grad_norm": 1.9726060628890991, "learning_rate": 4.473148439972558e-06, "loss": 0.6679, "step": 27147 }, { "epoch": 0.7, "grad_norm": 1.9932618141174316, "learning_rate": 4.472456626143247e-06, "loss": 0.5935, "step": 27148 }, { "epoch": 0.7, "grad_norm": 2.924302101135254, "learning_rate": 4.471764850406292e-06, "loss": 0.767, "step": 27149 }, { "epoch": 0.7, "grad_norm": 1.7006303071975708, "learning_rate": 4.471073112766458e-06, "loss": 0.595, "step": 27150 }, { "epoch": 0.7, "grad_norm": 6.822904586791992, "learning_rate": 4.470381413228519e-06, "loss": 0.8077, "step": 27151 }, { "epoch": 0.7, "grad_norm": 1.9514446258544922, "learning_rate": 4.469689751797236e-06, "loss": 0.819, "step": 27152 }, { "epoch": 0.7, "grad_norm": 3.377004623413086, "learning_rate": 4.4689981284773775e-06, "loss": 0.6605, "step": 27153 }, { "epoch": 0.7, "grad_norm": 4.916428089141846, "learning_rate": 4.468306543273705e-06, "loss": 0.5064, "step": 27154 }, { "epoch": 0.7, "grad_norm": 1.3408613204956055, "learning_rate": 4.467614996190992e-06, "loss": 0.5987, "step": 27155 }, { "epoch": 0.7, "grad_norm": 1.0539638996124268, "learning_rate": 4.466923487234e-06, "loss": 0.59, "step": 27156 }, { "epoch": 0.7, "grad_norm": 2.863781213760376, "learning_rate": 4.466232016407492e-06, "loss": 0.508, "step": 27157 }, { "epoch": 0.7, "grad_norm": 2.4141743183135986, "learning_rate": 4.46554058371624e-06, "loss": 0.6059, "step": 27158 }, { "epoch": 0.7, "grad_norm": 1.156090259552002, "learning_rate": 4.464849189165006e-06, "loss": 0.4902, "step": 27159 }, { "epoch": 0.7, "grad_norm": 5.749197006225586, "learning_rate": 4.464157832758549e-06, "loss": 0.5479, "step": 27160 }, { "epoch": 0.7, "grad_norm": 1.2954564094543457, "learning_rate": 4.463466514501642e-06, "loss": 0.5722, "step": 27161 }, { "epoch": 0.7, "grad_norm": 1.0903257131576538, "learning_rate": 4.462775234399046e-06, "loss": 0.4687, "step": 27162 }, { "epoch": 0.7, "grad_norm": 5.921594142913818, "learning_rate": 4.4620839924555246e-06, "loss": 0.6029, "step": 27163 }, { "epoch": 0.7, "grad_norm": 1.6835241317749023, "learning_rate": 4.4613927886758364e-06, "loss": 0.6352, "step": 27164 }, { "epoch": 0.7, "grad_norm": 1.1431679725646973, "learning_rate": 4.460701623064754e-06, "loss": 0.6071, "step": 27165 }, { "epoch": 0.7, "grad_norm": 1.667816400527954, "learning_rate": 4.460010495627035e-06, "loss": 0.514, "step": 27166 }, { "epoch": 0.7, "grad_norm": 1.0227642059326172, "learning_rate": 4.45931940636744e-06, "loss": 0.4499, "step": 27167 }, { "epoch": 0.7, "grad_norm": 1.639206886291504, "learning_rate": 4.4586283552907386e-06, "loss": 0.5767, "step": 27168 }, { "epoch": 0.7, "grad_norm": 1.3416966199874878, "learning_rate": 4.457937342401688e-06, "loss": 0.3852, "step": 27169 }, { "epoch": 0.7, "grad_norm": 1.8305730819702148, "learning_rate": 4.457246367705051e-06, "loss": 0.7029, "step": 27170 }, { "epoch": 0.7, "grad_norm": 1.123086929321289, "learning_rate": 4.456555431205588e-06, "loss": 0.4516, "step": 27171 }, { "epoch": 0.7, "grad_norm": 1.3126200437545776, "learning_rate": 4.4558645329080655e-06, "loss": 0.6008, "step": 27172 }, { "epoch": 0.7, "grad_norm": 1.4070712327957153, "learning_rate": 4.45517367281724e-06, "loss": 0.4934, "step": 27173 }, { "epoch": 0.7, "grad_norm": 1.4384633302688599, "learning_rate": 4.454482850937871e-06, "loss": 0.5354, "step": 27174 }, { "epoch": 0.7, "grad_norm": 1.965819001197815, "learning_rate": 4.453792067274727e-06, "loss": 0.5996, "step": 27175 }, { "epoch": 0.7, "grad_norm": 1.2268389463424683, "learning_rate": 4.453101321832563e-06, "loss": 0.5195, "step": 27176 }, { "epoch": 0.7, "grad_norm": 0.9453023672103882, "learning_rate": 4.452410614616135e-06, "loss": 0.412, "step": 27177 }, { "epoch": 0.7, "grad_norm": 3.264662265777588, "learning_rate": 4.451719945630213e-06, "loss": 0.6668, "step": 27178 }, { "epoch": 0.7, "grad_norm": 2.800165891647339, "learning_rate": 4.45102931487955e-06, "loss": 0.6527, "step": 27179 }, { "epoch": 0.7, "grad_norm": 1.810164213180542, "learning_rate": 4.4503387223689075e-06, "loss": 0.5561, "step": 27180 }, { "epoch": 0.7, "grad_norm": 1.426122784614563, "learning_rate": 4.44964816810304e-06, "loss": 0.5099, "step": 27181 }, { "epoch": 0.7, "grad_norm": 0.9886988401412964, "learning_rate": 4.4489576520867135e-06, "loss": 0.579, "step": 27182 }, { "epoch": 0.7, "grad_norm": 1.2605385780334473, "learning_rate": 4.448267174324684e-06, "loss": 0.6892, "step": 27183 }, { "epoch": 0.7, "grad_norm": 6.733464241027832, "learning_rate": 4.447576734821706e-06, "loss": 0.5906, "step": 27184 }, { "epoch": 0.7, "grad_norm": 1.5234155654907227, "learning_rate": 4.446886333582543e-06, "loss": 0.6864, "step": 27185 }, { "epoch": 0.7, "grad_norm": 1.465975046157837, "learning_rate": 4.446195970611952e-06, "loss": 0.5607, "step": 27186 }, { "epoch": 0.7, "grad_norm": 1.3456382751464844, "learning_rate": 4.445505645914688e-06, "loss": 0.5182, "step": 27187 }, { "epoch": 0.7, "grad_norm": 5.597438335418701, "learning_rate": 4.444815359495507e-06, "loss": 0.5064, "step": 27188 }, { "epoch": 0.7, "grad_norm": 1.0984426736831665, "learning_rate": 4.444125111359171e-06, "loss": 0.5817, "step": 27189 }, { "epoch": 0.7, "grad_norm": 2.570744514465332, "learning_rate": 4.443434901510434e-06, "loss": 0.5625, "step": 27190 }, { "epoch": 0.7, "grad_norm": 1.7080386877059937, "learning_rate": 4.442744729954049e-06, "loss": 0.4084, "step": 27191 }, { "epoch": 0.7, "grad_norm": 3.7872726917266846, "learning_rate": 4.442054596694781e-06, "loss": 0.661, "step": 27192 }, { "epoch": 0.7, "grad_norm": 1.354444980621338, "learning_rate": 4.44136450173738e-06, "loss": 0.5652, "step": 27193 }, { "epoch": 0.7, "grad_norm": 2.4279770851135254, "learning_rate": 4.440674445086602e-06, "loss": 0.5409, "step": 27194 }, { "epoch": 0.7, "grad_norm": 1.286812424659729, "learning_rate": 4.439984426747201e-06, "loss": 0.611, "step": 27195 }, { "epoch": 0.7, "grad_norm": 1.4433355331420898, "learning_rate": 4.439294446723937e-06, "loss": 0.543, "step": 27196 }, { "epoch": 0.7, "grad_norm": 1.3429101705551147, "learning_rate": 4.438604505021561e-06, "loss": 0.6166, "step": 27197 }, { "epoch": 0.7, "grad_norm": 1.6603904962539673, "learning_rate": 4.437914601644826e-06, "loss": 0.5809, "step": 27198 }, { "epoch": 0.7, "grad_norm": 1.2699958086013794, "learning_rate": 4.437224736598492e-06, "loss": 0.4789, "step": 27199 }, { "epoch": 0.7, "grad_norm": 1.2728197574615479, "learning_rate": 4.436534909887311e-06, "loss": 0.4147, "step": 27200 }, { "epoch": 0.7, "grad_norm": 1.3555325269699097, "learning_rate": 4.435845121516032e-06, "loss": 0.4916, "step": 27201 }, { "epoch": 0.7, "grad_norm": 1.1275972127914429, "learning_rate": 4.435155371489417e-06, "loss": 0.4872, "step": 27202 }, { "epoch": 0.7, "grad_norm": 14.902229309082031, "learning_rate": 4.4344656598122145e-06, "loss": 0.5854, "step": 27203 }, { "epoch": 0.7, "grad_norm": 1.0546491146087646, "learning_rate": 4.433775986489179e-06, "loss": 0.4646, "step": 27204 }, { "epoch": 0.7, "grad_norm": 2.349498987197876, "learning_rate": 4.433086351525058e-06, "loss": 0.5349, "step": 27205 }, { "epoch": 0.7, "grad_norm": 1.3666056394577026, "learning_rate": 4.432396754924612e-06, "loss": 0.5414, "step": 27206 }, { "epoch": 0.7, "grad_norm": 3.5790953636169434, "learning_rate": 4.43170719669259e-06, "loss": 0.6975, "step": 27207 }, { "epoch": 0.7, "grad_norm": 1.1381338834762573, "learning_rate": 4.4310176768337385e-06, "loss": 0.5576, "step": 27208 }, { "epoch": 0.7, "grad_norm": 3.280714273452759, "learning_rate": 4.43032819535282e-06, "loss": 0.5708, "step": 27209 }, { "epoch": 0.7, "grad_norm": 6.206362724304199, "learning_rate": 4.429638752254579e-06, "loss": 0.6036, "step": 27210 }, { "epoch": 0.7, "grad_norm": 4.274889945983887, "learning_rate": 4.428949347543769e-06, "loss": 0.5959, "step": 27211 }, { "epoch": 0.7, "grad_norm": 3.3309168815612793, "learning_rate": 4.428259981225136e-06, "loss": 0.598, "step": 27212 }, { "epoch": 0.7, "grad_norm": 1.375260829925537, "learning_rate": 4.427570653303439e-06, "loss": 0.3877, "step": 27213 }, { "epoch": 0.7, "grad_norm": 3.42126202583313, "learning_rate": 4.426881363783423e-06, "loss": 0.6472, "step": 27214 }, { "epoch": 0.7, "grad_norm": 1.4036401510238647, "learning_rate": 4.426192112669836e-06, "loss": 0.6088, "step": 27215 }, { "epoch": 0.7, "grad_norm": 3.2981200218200684, "learning_rate": 4.425502899967435e-06, "loss": 0.5439, "step": 27216 }, { "epoch": 0.7, "grad_norm": 1.186474323272705, "learning_rate": 4.424813725680966e-06, "loss": 0.5135, "step": 27217 }, { "epoch": 0.7, "grad_norm": 1.561849594116211, "learning_rate": 4.424124589815174e-06, "loss": 0.5404, "step": 27218 }, { "epoch": 0.7, "grad_norm": 2.082613945007324, "learning_rate": 4.423435492374816e-06, "loss": 0.4953, "step": 27219 }, { "epoch": 0.7, "grad_norm": 2.1063742637634277, "learning_rate": 4.422746433364638e-06, "loss": 0.5274, "step": 27220 }, { "epoch": 0.7, "grad_norm": 2.067244529724121, "learning_rate": 4.422057412789385e-06, "loss": 0.5351, "step": 27221 }, { "epoch": 0.7, "grad_norm": 1.3908522129058838, "learning_rate": 4.421368430653806e-06, "loss": 0.6749, "step": 27222 }, { "epoch": 0.7, "grad_norm": 1.590180516242981, "learning_rate": 4.420679486962656e-06, "loss": 0.5641, "step": 27223 }, { "epoch": 0.7, "grad_norm": 1.8469555377960205, "learning_rate": 4.419990581720676e-06, "loss": 0.5905, "step": 27224 }, { "epoch": 0.7, "grad_norm": 1.6500314474105835, "learning_rate": 4.4193017149326115e-06, "loss": 0.4577, "step": 27225 }, { "epoch": 0.7, "grad_norm": 1.4528186321258545, "learning_rate": 4.418612886603218e-06, "loss": 0.5427, "step": 27226 }, { "epoch": 0.7, "grad_norm": 1.9280340671539307, "learning_rate": 4.417924096737236e-06, "loss": 0.5394, "step": 27227 }, { "epoch": 0.7, "grad_norm": 1.0304834842681885, "learning_rate": 4.417235345339416e-06, "loss": 0.5138, "step": 27228 }, { "epoch": 0.7, "grad_norm": 1.0187382698059082, "learning_rate": 4.416546632414498e-06, "loss": 0.447, "step": 27229 }, { "epoch": 0.7, "grad_norm": 3.944969892501831, "learning_rate": 4.4158579579672364e-06, "loss": 0.55, "step": 27230 }, { "epoch": 0.7, "grad_norm": 3.04815411567688, "learning_rate": 4.415169322002374e-06, "loss": 0.4988, "step": 27231 }, { "epoch": 0.7, "grad_norm": 1.2479040622711182, "learning_rate": 4.414480724524652e-06, "loss": 0.5368, "step": 27232 }, { "epoch": 0.7, "grad_norm": 4.054831027984619, "learning_rate": 4.413792165538824e-06, "loss": 0.6965, "step": 27233 }, { "epoch": 0.7, "grad_norm": 10.734136581420898, "learning_rate": 4.41310364504963e-06, "loss": 0.4944, "step": 27234 }, { "epoch": 0.7, "grad_norm": 1.6308999061584473, "learning_rate": 4.412415163061816e-06, "loss": 0.5638, "step": 27235 }, { "epoch": 0.7, "grad_norm": 4.929088115692139, "learning_rate": 4.4117267195801215e-06, "loss": 0.5014, "step": 27236 }, { "epoch": 0.7, "grad_norm": 1.6667810678482056, "learning_rate": 4.411038314609301e-06, "loss": 0.5437, "step": 27237 }, { "epoch": 0.7, "grad_norm": 4.429689884185791, "learning_rate": 4.410349948154091e-06, "loss": 0.6774, "step": 27238 }, { "epoch": 0.7, "grad_norm": 1.1844788789749146, "learning_rate": 4.4096616202192354e-06, "loss": 0.5209, "step": 27239 }, { "epoch": 0.7, "grad_norm": 1.8676109313964844, "learning_rate": 4.408973330809483e-06, "loss": 0.6266, "step": 27240 }, { "epoch": 0.7, "grad_norm": 2.164304494857788, "learning_rate": 4.408285079929574e-06, "loss": 0.618, "step": 27241 }, { "epoch": 0.7, "grad_norm": 1.6160967350006104, "learning_rate": 4.4075968675842475e-06, "loss": 0.6224, "step": 27242 }, { "epoch": 0.7, "grad_norm": 1.6631441116333008, "learning_rate": 4.406908693778252e-06, "loss": 0.6571, "step": 27243 }, { "epoch": 0.7, "grad_norm": 1.643864393234253, "learning_rate": 4.40622055851633e-06, "loss": 0.3664, "step": 27244 }, { "epoch": 0.7, "grad_norm": 3.4043500423431396, "learning_rate": 4.4055324618032205e-06, "loss": 0.4726, "step": 27245 }, { "epoch": 0.7, "grad_norm": 1.4180970191955566, "learning_rate": 4.404844403643663e-06, "loss": 0.6418, "step": 27246 }, { "epoch": 0.7, "grad_norm": 2.494114398956299, "learning_rate": 4.404156384042406e-06, "loss": 0.6346, "step": 27247 }, { "epoch": 0.7, "grad_norm": 2.067471742630005, "learning_rate": 4.403468403004189e-06, "loss": 0.4487, "step": 27248 }, { "epoch": 0.7, "grad_norm": 1.536185622215271, "learning_rate": 4.402780460533746e-06, "loss": 0.721, "step": 27249 }, { "epoch": 0.7, "grad_norm": 1.5294214487075806, "learning_rate": 4.4020925566358315e-06, "loss": 0.5439, "step": 27250 }, { "epoch": 0.7, "grad_norm": 3.009230852127075, "learning_rate": 4.401404691315175e-06, "loss": 0.4946, "step": 27251 }, { "epoch": 0.7, "grad_norm": 0.8119751214981079, "learning_rate": 4.400716864576516e-06, "loss": 0.4715, "step": 27252 }, { "epoch": 0.7, "grad_norm": 1.6410366296768188, "learning_rate": 4.400029076424603e-06, "loss": 0.4367, "step": 27253 }, { "epoch": 0.7, "grad_norm": 1.8743568658828735, "learning_rate": 4.399341326864169e-06, "loss": 0.4961, "step": 27254 }, { "epoch": 0.7, "grad_norm": 1.4446989297866821, "learning_rate": 4.398653615899955e-06, "loss": 0.5651, "step": 27255 }, { "epoch": 0.7, "grad_norm": 4.1646037101745605, "learning_rate": 4.397965943536703e-06, "loss": 0.4887, "step": 27256 }, { "epoch": 0.7, "grad_norm": 6.2365617752075195, "learning_rate": 4.397278309779152e-06, "loss": 0.7029, "step": 27257 }, { "epoch": 0.7, "grad_norm": 1.330660343170166, "learning_rate": 4.396590714632038e-06, "loss": 0.4527, "step": 27258 }, { "epoch": 0.7, "grad_norm": 1.9835728406906128, "learning_rate": 4.395903158100095e-06, "loss": 0.4541, "step": 27259 }, { "epoch": 0.7, "grad_norm": 1.145439863204956, "learning_rate": 4.395215640188072e-06, "loss": 0.4631, "step": 27260 }, { "epoch": 0.7, "grad_norm": 1.306657314300537, "learning_rate": 4.3945281609007015e-06, "loss": 0.5525, "step": 27261 }, { "epoch": 0.7, "grad_norm": 2.7914974689483643, "learning_rate": 4.393840720242717e-06, "loss": 0.4697, "step": 27262 }, { "epoch": 0.7, "grad_norm": 1.4885677099227905, "learning_rate": 4.393153318218865e-06, "loss": 0.5007, "step": 27263 }, { "epoch": 0.7, "grad_norm": 7.072761535644531, "learning_rate": 4.392465954833877e-06, "loss": 0.5931, "step": 27264 }, { "epoch": 0.7, "grad_norm": 1.4349365234375, "learning_rate": 4.39177863009249e-06, "loss": 0.4826, "step": 27265 }, { "epoch": 0.7, "grad_norm": 1.2930760383605957, "learning_rate": 4.3910913439994385e-06, "loss": 0.5132, "step": 27266 }, { "epoch": 0.7, "grad_norm": 1.4659565687179565, "learning_rate": 4.390404096559464e-06, "loss": 0.4944, "step": 27267 }, { "epoch": 0.7, "grad_norm": 1.8851032257080078, "learning_rate": 4.389716887777301e-06, "loss": 0.5869, "step": 27268 }, { "epoch": 0.7, "grad_norm": 1.395940899848938, "learning_rate": 4.389029717657681e-06, "loss": 0.4668, "step": 27269 }, { "epoch": 0.7, "grad_norm": 1.4611051082611084, "learning_rate": 4.388342586205346e-06, "loss": 0.6893, "step": 27270 }, { "epoch": 0.7, "grad_norm": 1.527660608291626, "learning_rate": 4.387655493425029e-06, "loss": 0.5789, "step": 27271 }, { "epoch": 0.7, "grad_norm": 1.2707785367965698, "learning_rate": 4.386968439321462e-06, "loss": 0.4933, "step": 27272 }, { "epoch": 0.7, "grad_norm": 1.983083963394165, "learning_rate": 4.386281423899384e-06, "loss": 0.5794, "step": 27273 }, { "epoch": 0.7, "grad_norm": 1.7746797800064087, "learning_rate": 4.385594447163527e-06, "loss": 0.532, "step": 27274 }, { "epoch": 0.7, "grad_norm": 7.316122055053711, "learning_rate": 4.384907509118626e-06, "loss": 0.509, "step": 27275 }, { "epoch": 0.7, "grad_norm": 1.5871074199676514, "learning_rate": 4.3842206097694125e-06, "loss": 0.5196, "step": 27276 }, { "epoch": 0.7, "grad_norm": 2.164792776107788, "learning_rate": 4.383533749120625e-06, "loss": 0.486, "step": 27277 }, { "epoch": 0.7, "grad_norm": 0.9596657156944275, "learning_rate": 4.382846927176995e-06, "loss": 0.4809, "step": 27278 }, { "epoch": 0.7, "grad_norm": 1.3621701002120972, "learning_rate": 4.38216014394325e-06, "loss": 0.528, "step": 27279 }, { "epoch": 0.7, "grad_norm": 1.7333561182022095, "learning_rate": 4.381473399424132e-06, "loss": 0.5853, "step": 27280 }, { "epoch": 0.7, "grad_norm": 1.4855101108551025, "learning_rate": 4.38078669362437e-06, "loss": 0.4842, "step": 27281 }, { "epoch": 0.7, "grad_norm": 1.5840277671813965, "learning_rate": 4.380100026548695e-06, "loss": 0.5767, "step": 27282 }, { "epoch": 0.7, "grad_norm": 1.7724910974502563, "learning_rate": 4.379413398201837e-06, "loss": 0.584, "step": 27283 }, { "epoch": 0.7, "grad_norm": 1.6614891290664673, "learning_rate": 4.378726808588534e-06, "loss": 0.549, "step": 27284 }, { "epoch": 0.7, "grad_norm": 1.0547499656677246, "learning_rate": 4.3780402577135135e-06, "loss": 0.4625, "step": 27285 }, { "epoch": 0.7, "grad_norm": 1.1423125267028809, "learning_rate": 4.377353745581505e-06, "loss": 0.4692, "step": 27286 }, { "epoch": 0.7, "grad_norm": 2.5303823947906494, "learning_rate": 4.376667272197245e-06, "loss": 0.4715, "step": 27287 }, { "epoch": 0.7, "grad_norm": 3.6467459201812744, "learning_rate": 4.375980837565461e-06, "loss": 0.5789, "step": 27288 }, { "epoch": 0.7, "grad_norm": 1.82791006565094, "learning_rate": 4.375294441690884e-06, "loss": 0.4717, "step": 27289 }, { "epoch": 0.7, "grad_norm": 1.8439595699310303, "learning_rate": 4.37460808457824e-06, "loss": 0.4991, "step": 27290 }, { "epoch": 0.7, "grad_norm": 1.6965245008468628, "learning_rate": 4.373921766232268e-06, "loss": 0.4132, "step": 27291 }, { "epoch": 0.7, "grad_norm": 2.108581066131592, "learning_rate": 4.373235486657691e-06, "loss": 0.4713, "step": 27292 }, { "epoch": 0.7, "grad_norm": 1.718819260597229, "learning_rate": 4.372549245859237e-06, "loss": 0.5637, "step": 27293 }, { "epoch": 0.7, "grad_norm": 1.8795839548110962, "learning_rate": 4.371863043841641e-06, "loss": 0.6561, "step": 27294 }, { "epoch": 0.7, "grad_norm": 1.7445528507232666, "learning_rate": 4.371176880609629e-06, "loss": 0.5656, "step": 27295 }, { "epoch": 0.7, "grad_norm": 1.3255406618118286, "learning_rate": 4.370490756167927e-06, "loss": 0.4184, "step": 27296 }, { "epoch": 0.7, "grad_norm": 8.656880378723145, "learning_rate": 4.369804670521269e-06, "loss": 0.9539, "step": 27297 }, { "epoch": 0.7, "grad_norm": 1.2013484239578247, "learning_rate": 4.3691186236743785e-06, "loss": 0.6447, "step": 27298 }, { "epoch": 0.7, "grad_norm": 2.8515334129333496, "learning_rate": 4.368432615631986e-06, "loss": 0.582, "step": 27299 }, { "epoch": 0.7, "grad_norm": 3.1591763496398926, "learning_rate": 4.367746646398815e-06, "loss": 0.5664, "step": 27300 }, { "epoch": 0.7, "grad_norm": 0.9772937297821045, "learning_rate": 4.367060715979598e-06, "loss": 0.5053, "step": 27301 }, { "epoch": 0.7, "grad_norm": 1.2694941759109497, "learning_rate": 4.36637482437906e-06, "loss": 0.3618, "step": 27302 }, { "epoch": 0.7, "grad_norm": 4.473890781402588, "learning_rate": 4.365688971601923e-06, "loss": 0.5642, "step": 27303 }, { "epoch": 0.7, "grad_norm": 1.926776647567749, "learning_rate": 4.365003157652923e-06, "loss": 0.4455, "step": 27304 }, { "epoch": 0.7, "grad_norm": 1.418630599975586, "learning_rate": 4.3643173825367805e-06, "loss": 0.4515, "step": 27305 }, { "epoch": 0.7, "grad_norm": 5.404426097869873, "learning_rate": 4.36363164625822e-06, "loss": 0.4893, "step": 27306 }, { "epoch": 0.7, "grad_norm": 1.488025426864624, "learning_rate": 4.362945948821968e-06, "loss": 0.6414, "step": 27307 }, { "epoch": 0.7, "grad_norm": 5.39470911026001, "learning_rate": 4.362260290232754e-06, "loss": 0.5775, "step": 27308 }, { "epoch": 0.7, "grad_norm": 1.5042070150375366, "learning_rate": 4.361574670495299e-06, "loss": 0.5246, "step": 27309 }, { "epoch": 0.7, "grad_norm": 1.3643490076065063, "learning_rate": 4.3608890896143265e-06, "loss": 0.5429, "step": 27310 }, { "epoch": 0.7, "grad_norm": 1.5948485136032104, "learning_rate": 4.360203547594567e-06, "loss": 0.5842, "step": 27311 }, { "epoch": 0.7, "grad_norm": 1.2930487394332886, "learning_rate": 4.35951804444074e-06, "loss": 0.4806, "step": 27312 }, { "epoch": 0.7, "grad_norm": 2.166957139968872, "learning_rate": 4.358832580157569e-06, "loss": 0.5999, "step": 27313 }, { "epoch": 0.7, "grad_norm": 1.034082055091858, "learning_rate": 4.358147154749783e-06, "loss": 0.484, "step": 27314 }, { "epoch": 0.7, "grad_norm": 2.542388439178467, "learning_rate": 4.357461768222102e-06, "loss": 0.6673, "step": 27315 }, { "epoch": 0.7, "grad_norm": 4.661708354949951, "learning_rate": 4.356776420579249e-06, "loss": 0.4482, "step": 27316 }, { "epoch": 0.7, "grad_norm": 1.5067307949066162, "learning_rate": 4.356091111825945e-06, "loss": 0.6287, "step": 27317 }, { "epoch": 0.7, "grad_norm": 3.7626373767852783, "learning_rate": 4.355405841966918e-06, "loss": 0.3555, "step": 27318 }, { "epoch": 0.7, "grad_norm": 1.2061043977737427, "learning_rate": 4.3547206110068876e-06, "loss": 0.5425, "step": 27319 }, { "epoch": 0.7, "grad_norm": 1.311034917831421, "learning_rate": 4.354035418950573e-06, "loss": 0.4683, "step": 27320 }, { "epoch": 0.7, "grad_norm": 1.0784403085708618, "learning_rate": 4.353350265802703e-06, "loss": 0.2498, "step": 27321 }, { "epoch": 0.7, "grad_norm": 1.2132787704467773, "learning_rate": 4.352665151567994e-06, "loss": 0.5945, "step": 27322 }, { "epoch": 0.7, "grad_norm": 3.104499340057373, "learning_rate": 4.35198007625117e-06, "loss": 0.5617, "step": 27323 }, { "epoch": 0.7, "grad_norm": 4.5659499168396, "learning_rate": 4.351295039856946e-06, "loss": 0.6114, "step": 27324 }, { "epoch": 0.7, "grad_norm": 1.8039988279342651, "learning_rate": 4.350610042390053e-06, "loss": 0.5174, "step": 27325 }, { "epoch": 0.7, "grad_norm": 1.9999338388442993, "learning_rate": 4.349925083855205e-06, "loss": 0.7336, "step": 27326 }, { "epoch": 0.7, "grad_norm": 1.3427073955535889, "learning_rate": 4.349240164257121e-06, "loss": 0.5224, "step": 27327 }, { "epoch": 0.7, "grad_norm": 1.7108910083770752, "learning_rate": 4.348555283600528e-06, "loss": 0.657, "step": 27328 }, { "epoch": 0.7, "grad_norm": 5.142361164093018, "learning_rate": 4.347870441890139e-06, "loss": 0.4917, "step": 27329 }, { "epoch": 0.7, "grad_norm": 1.0631340742111206, "learning_rate": 4.347185639130677e-06, "loss": 0.5204, "step": 27330 }, { "epoch": 0.7, "grad_norm": 1.3967746496200562, "learning_rate": 4.346500875326857e-06, "loss": 0.6217, "step": 27331 }, { "epoch": 0.7, "grad_norm": 1.3436713218688965, "learning_rate": 4.345816150483404e-06, "loss": 0.5535, "step": 27332 }, { "epoch": 0.7, "grad_norm": 1.7135828733444214, "learning_rate": 4.345131464605034e-06, "loss": 0.5043, "step": 27333 }, { "epoch": 0.7, "grad_norm": 6.628704071044922, "learning_rate": 4.344446817696462e-06, "loss": 0.5328, "step": 27334 }, { "epoch": 0.7, "grad_norm": 1.5071938037872314, "learning_rate": 4.343762209762412e-06, "loss": 0.5682, "step": 27335 }, { "epoch": 0.7, "grad_norm": 1.1658726930618286, "learning_rate": 4.3430776408076e-06, "loss": 0.5371, "step": 27336 }, { "epoch": 0.7, "grad_norm": 1.951259732246399, "learning_rate": 4.342393110836739e-06, "loss": 0.5419, "step": 27337 }, { "epoch": 0.7, "grad_norm": 1.3080946207046509, "learning_rate": 4.341708619854554e-06, "loss": 0.5174, "step": 27338 }, { "epoch": 0.7, "grad_norm": 1.242492437362671, "learning_rate": 4.341024167865758e-06, "loss": 0.534, "step": 27339 }, { "epoch": 0.7, "grad_norm": 2.279197931289673, "learning_rate": 4.3403397548750695e-06, "loss": 0.6979, "step": 27340 }, { "epoch": 0.7, "grad_norm": 1.062778115272522, "learning_rate": 4.339655380887199e-06, "loss": 0.4085, "step": 27341 }, { "epoch": 0.7, "grad_norm": 1.095989465713501, "learning_rate": 4.33897104590687e-06, "loss": 0.578, "step": 27342 }, { "epoch": 0.7, "grad_norm": 2.0082178115844727, "learning_rate": 4.338286749938797e-06, "loss": 0.6871, "step": 27343 }, { "epoch": 0.7, "grad_norm": 1.3602690696716309, "learning_rate": 4.337602492987691e-06, "loss": 0.4082, "step": 27344 }, { "epoch": 0.7, "grad_norm": 1.7306127548217773, "learning_rate": 4.336918275058275e-06, "loss": 0.548, "step": 27345 }, { "epoch": 0.7, "grad_norm": 3.1313679218292236, "learning_rate": 4.33623409615526e-06, "loss": 0.4521, "step": 27346 }, { "epoch": 0.7, "grad_norm": 4.89882230758667, "learning_rate": 4.335549956283361e-06, "loss": 0.5939, "step": 27347 }, { "epoch": 0.7, "grad_norm": 0.931131899356842, "learning_rate": 4.334865855447291e-06, "loss": 0.4198, "step": 27348 }, { "epoch": 0.7, "grad_norm": 1.3500937223434448, "learning_rate": 4.334181793651768e-06, "loss": 0.4892, "step": 27349 }, { "epoch": 0.7, "grad_norm": 1.5124030113220215, "learning_rate": 4.333497770901506e-06, "loss": 0.5396, "step": 27350 }, { "epoch": 0.7, "grad_norm": 2.560819387435913, "learning_rate": 4.332813787201212e-06, "loss": 0.8083, "step": 27351 }, { "epoch": 0.7, "grad_norm": 1.209187626838684, "learning_rate": 4.3321298425556105e-06, "loss": 0.4713, "step": 27352 }, { "epoch": 0.7, "grad_norm": 1.6099568605422974, "learning_rate": 4.3314459369694075e-06, "loss": 0.3947, "step": 27353 }, { "epoch": 0.7, "grad_norm": 1.4798641204833984, "learning_rate": 4.330762070447314e-06, "loss": 0.5765, "step": 27354 }, { "epoch": 0.7, "grad_norm": 1.0800325870513916, "learning_rate": 4.3300782429940515e-06, "loss": 0.6217, "step": 27355 }, { "epoch": 0.7, "grad_norm": 1.3638495206832886, "learning_rate": 4.329394454614328e-06, "loss": 0.5715, "step": 27356 }, { "epoch": 0.7, "grad_norm": 1.4521150588989258, "learning_rate": 4.328710705312853e-06, "loss": 0.5172, "step": 27357 }, { "epoch": 0.7, "grad_norm": 3.959569215774536, "learning_rate": 4.328026995094339e-06, "loss": 0.544, "step": 27358 }, { "epoch": 0.7, "grad_norm": 1.1727900505065918, "learning_rate": 4.327343323963502e-06, "loss": 0.4866, "step": 27359 }, { "epoch": 0.7, "grad_norm": 1.5007762908935547, "learning_rate": 4.3266596919250515e-06, "loss": 0.5287, "step": 27360 }, { "epoch": 0.7, "grad_norm": 4.804421901702881, "learning_rate": 4.325976098983694e-06, "loss": 0.8359, "step": 27361 }, { "epoch": 0.7, "grad_norm": 4.60866641998291, "learning_rate": 4.325292545144148e-06, "loss": 0.606, "step": 27362 }, { "epoch": 0.7, "grad_norm": 1.5910813808441162, "learning_rate": 4.324609030411122e-06, "loss": 0.616, "step": 27363 }, { "epoch": 0.7, "grad_norm": 2.0841033458709717, "learning_rate": 4.323925554789324e-06, "loss": 0.5006, "step": 27364 }, { "epoch": 0.7, "grad_norm": 4.853615760803223, "learning_rate": 4.323242118283461e-06, "loss": 0.5976, "step": 27365 }, { "epoch": 0.7, "grad_norm": 3.5689733028411865, "learning_rate": 4.3225587208982514e-06, "loss": 0.6197, "step": 27366 }, { "epoch": 0.7, "grad_norm": 1.5886808633804321, "learning_rate": 4.321875362638399e-06, "loss": 0.4584, "step": 27367 }, { "epoch": 0.7, "grad_norm": 1.2930561304092407, "learning_rate": 4.321192043508612e-06, "loss": 0.4732, "step": 27368 }, { "epoch": 0.7, "grad_norm": 2.92242693901062, "learning_rate": 4.320508763513604e-06, "loss": 0.7018, "step": 27369 }, { "epoch": 0.7, "grad_norm": 0.984097421169281, "learning_rate": 4.319825522658082e-06, "loss": 0.5366, "step": 27370 }, { "epoch": 0.7, "grad_norm": 3.9852702617645264, "learning_rate": 4.319142320946753e-06, "loss": 0.4243, "step": 27371 }, { "epoch": 0.7, "grad_norm": 1.6241751909255981, "learning_rate": 4.318459158384324e-06, "loss": 0.309, "step": 27372 }, { "epoch": 0.7, "grad_norm": 2.9985857009887695, "learning_rate": 4.317776034975507e-06, "loss": 0.5839, "step": 27373 }, { "epoch": 0.7, "grad_norm": 1.2911690473556519, "learning_rate": 4.317092950725009e-06, "loss": 0.4616, "step": 27374 }, { "epoch": 0.7, "grad_norm": 1.9994375705718994, "learning_rate": 4.316409905637532e-06, "loss": 0.5521, "step": 27375 }, { "epoch": 0.7, "grad_norm": 1.321109652519226, "learning_rate": 4.3157268997177904e-06, "loss": 0.5274, "step": 27376 }, { "epoch": 0.7, "grad_norm": 2.0790836811065674, "learning_rate": 4.315043932970487e-06, "loss": 0.5599, "step": 27377 }, { "epoch": 0.7, "grad_norm": 1.4485487937927246, "learning_rate": 4.314361005400327e-06, "loss": 0.5735, "step": 27378 }, { "epoch": 0.7, "grad_norm": 2.648362874984741, "learning_rate": 4.313678117012022e-06, "loss": 0.6122, "step": 27379 }, { "epoch": 0.7, "grad_norm": 1.1223655939102173, "learning_rate": 4.312995267810276e-06, "loss": 0.5488, "step": 27380 }, { "epoch": 0.7, "grad_norm": 7.498009204864502, "learning_rate": 4.312312457799792e-06, "loss": 0.7717, "step": 27381 }, { "epoch": 0.7, "grad_norm": 1.1446794271469116, "learning_rate": 4.3116296869852745e-06, "loss": 0.4312, "step": 27382 }, { "epoch": 0.7, "grad_norm": 1.1400914192199707, "learning_rate": 4.310946955371435e-06, "loss": 0.503, "step": 27383 }, { "epoch": 0.7, "grad_norm": 1.5005662441253662, "learning_rate": 4.310264262962974e-06, "loss": 0.5217, "step": 27384 }, { "epoch": 0.7, "grad_norm": 3.400300979614258, "learning_rate": 4.3095816097645945e-06, "loss": 0.7253, "step": 27385 }, { "epoch": 0.7, "grad_norm": 1.0410914421081543, "learning_rate": 4.308898995781007e-06, "loss": 0.3954, "step": 27386 }, { "epoch": 0.7, "grad_norm": 1.846585988998413, "learning_rate": 4.308216421016911e-06, "loss": 0.6029, "step": 27387 }, { "epoch": 0.7, "grad_norm": 2.502692222595215, "learning_rate": 4.307533885477012e-06, "loss": 0.3842, "step": 27388 }, { "epoch": 0.7, "grad_norm": 4.600069046020508, "learning_rate": 4.30685138916601e-06, "loss": 0.4864, "step": 27389 }, { "epoch": 0.7, "grad_norm": 1.543900966644287, "learning_rate": 4.306168932088616e-06, "loss": 0.5849, "step": 27390 }, { "epoch": 0.7, "grad_norm": 1.1620317697525024, "learning_rate": 4.305486514249526e-06, "loss": 0.616, "step": 27391 }, { "epoch": 0.7, "grad_norm": 4.258425712585449, "learning_rate": 4.304804135653443e-06, "loss": 0.5497, "step": 27392 }, { "epoch": 0.7, "grad_norm": 4.408046722412109, "learning_rate": 4.304121796305077e-06, "loss": 0.6879, "step": 27393 }, { "epoch": 0.7, "grad_norm": 5.5347394943237305, "learning_rate": 4.303439496209123e-06, "loss": 0.5451, "step": 27394 }, { "epoch": 0.7, "grad_norm": 1.3657689094543457, "learning_rate": 4.302757235370285e-06, "loss": 0.5228, "step": 27395 }, { "epoch": 0.7, "grad_norm": 1.8429772853851318, "learning_rate": 4.302075013793262e-06, "loss": 0.5468, "step": 27396 }, { "epoch": 0.7, "grad_norm": 3.0235331058502197, "learning_rate": 4.301392831482761e-06, "loss": 0.534, "step": 27397 }, { "epoch": 0.7, "grad_norm": 2.324850559234619, "learning_rate": 4.300710688443481e-06, "loss": 0.3615, "step": 27398 }, { "epoch": 0.7, "grad_norm": 3.031802177429199, "learning_rate": 4.300028584680119e-06, "loss": 0.3724, "step": 27399 }, { "epoch": 0.7, "grad_norm": 2.1571733951568604, "learning_rate": 4.299346520197383e-06, "loss": 0.5943, "step": 27400 }, { "epoch": 0.7, "grad_norm": 3.0875861644744873, "learning_rate": 4.2986644949999675e-06, "loss": 0.5139, "step": 27401 }, { "epoch": 0.7, "grad_norm": 6.620054721832275, "learning_rate": 4.297982509092572e-06, "loss": 0.5736, "step": 27402 }, { "epoch": 0.7, "grad_norm": 2.4842278957366943, "learning_rate": 4.297300562479902e-06, "loss": 0.4157, "step": 27403 }, { "epoch": 0.7, "grad_norm": 1.1801460981369019, "learning_rate": 4.296618655166655e-06, "loss": 0.4932, "step": 27404 }, { "epoch": 0.7, "grad_norm": 1.9727948904037476, "learning_rate": 4.2959367871575265e-06, "loss": 0.6872, "step": 27405 }, { "epoch": 0.7, "grad_norm": 2.7043659687042236, "learning_rate": 4.295254958457215e-06, "loss": 0.7266, "step": 27406 }, { "epoch": 0.7, "grad_norm": 1.3862289190292358, "learning_rate": 4.294573169070427e-06, "loss": 0.5191, "step": 27407 }, { "epoch": 0.7, "grad_norm": 1.7506310939788818, "learning_rate": 4.293891419001857e-06, "loss": 0.7004, "step": 27408 }, { "epoch": 0.7, "grad_norm": 7.270449638366699, "learning_rate": 4.293209708256196e-06, "loss": 0.5467, "step": 27409 }, { "epoch": 0.7, "grad_norm": 2.4663889408111572, "learning_rate": 4.292528036838154e-06, "loss": 0.4404, "step": 27410 }, { "epoch": 0.7, "grad_norm": 1.6779632568359375, "learning_rate": 4.291846404752423e-06, "loss": 0.5719, "step": 27411 }, { "epoch": 0.7, "grad_norm": 7.569977760314941, "learning_rate": 4.291164812003699e-06, "loss": 0.5276, "step": 27412 }, { "epoch": 0.7, "grad_norm": 1.7897416353225708, "learning_rate": 4.290483258596678e-06, "loss": 0.5965, "step": 27413 }, { "epoch": 0.7, "grad_norm": 4.763930797576904, "learning_rate": 4.289801744536063e-06, "loss": 0.6792, "step": 27414 }, { "epoch": 0.7, "grad_norm": 1.3282246589660645, "learning_rate": 4.2891202698265465e-06, "loss": 0.462, "step": 27415 }, { "epoch": 0.7, "grad_norm": 2.5013105869293213, "learning_rate": 4.288438834472823e-06, "loss": 0.4911, "step": 27416 }, { "epoch": 0.7, "grad_norm": 1.4761408567428589, "learning_rate": 4.2877574384795916e-06, "loss": 0.3905, "step": 27417 }, { "epoch": 0.7, "grad_norm": 3.2575345039367676, "learning_rate": 4.287076081851549e-06, "loss": 0.6384, "step": 27418 }, { "epoch": 0.7, "grad_norm": 3.5100128650665283, "learning_rate": 4.286394764593385e-06, "loss": 0.7983, "step": 27419 }, { "epoch": 0.7, "grad_norm": 5.504386901855469, "learning_rate": 4.285713486709801e-06, "loss": 0.7782, "step": 27420 }, { "epoch": 0.7, "grad_norm": 1.531119465827942, "learning_rate": 4.285032248205491e-06, "loss": 0.458, "step": 27421 }, { "epoch": 0.7, "grad_norm": 5.547360897064209, "learning_rate": 4.284351049085147e-06, "loss": 0.5667, "step": 27422 }, { "epoch": 0.7, "grad_norm": 1.9950875043869019, "learning_rate": 4.283669889353462e-06, "loss": 0.5128, "step": 27423 }, { "epoch": 0.7, "grad_norm": 7.006683349609375, "learning_rate": 4.282988769015136e-06, "loss": 0.769, "step": 27424 }, { "epoch": 0.7, "grad_norm": 1.51669180393219, "learning_rate": 4.28230768807486e-06, "loss": 0.4403, "step": 27425 }, { "epoch": 0.7, "grad_norm": 0.9886298179626465, "learning_rate": 4.281626646537324e-06, "loss": 0.6024, "step": 27426 }, { "epoch": 0.7, "grad_norm": 3.2356948852539062, "learning_rate": 4.280945644407227e-06, "loss": 0.3707, "step": 27427 }, { "epoch": 0.7, "grad_norm": 1.662682294845581, "learning_rate": 4.28026468168926e-06, "loss": 0.4688, "step": 27428 }, { "epoch": 0.7, "grad_norm": 1.201472520828247, "learning_rate": 4.279583758388116e-06, "loss": 0.5198, "step": 27429 }, { "epoch": 0.7, "grad_norm": 1.773834466934204, "learning_rate": 4.278902874508482e-06, "loss": 0.5119, "step": 27430 }, { "epoch": 0.7, "grad_norm": 1.642708420753479, "learning_rate": 4.27822203005506e-06, "loss": 0.6546, "step": 27431 }, { "epoch": 0.7, "grad_norm": 1.1945269107818604, "learning_rate": 4.277541225032536e-06, "loss": 0.4627, "step": 27432 }, { "epoch": 0.7, "grad_norm": 3.514770030975342, "learning_rate": 4.2768604594456e-06, "loss": 0.5762, "step": 27433 }, { "epoch": 0.7, "grad_norm": 1.3449236154556274, "learning_rate": 4.276179733298948e-06, "loss": 0.458, "step": 27434 }, { "epoch": 0.7, "grad_norm": 5.17975378036499, "learning_rate": 4.275499046597271e-06, "loss": 0.5419, "step": 27435 }, { "epoch": 0.7, "grad_norm": 4.814204692840576, "learning_rate": 4.274818399345257e-06, "loss": 0.7506, "step": 27436 }, { "epoch": 0.7, "grad_norm": 2.5732452869415283, "learning_rate": 4.274137791547595e-06, "loss": 0.5942, "step": 27437 }, { "epoch": 0.7, "grad_norm": 1.3532599210739136, "learning_rate": 4.273457223208981e-06, "loss": 0.4908, "step": 27438 }, { "epoch": 0.7, "grad_norm": 1.383374571800232, "learning_rate": 4.272776694334102e-06, "loss": 0.4949, "step": 27439 }, { "epoch": 0.7, "grad_norm": 2.955796241760254, "learning_rate": 4.272096204927644e-06, "loss": 0.6215, "step": 27440 }, { "epoch": 0.7, "grad_norm": 0.833868145942688, "learning_rate": 4.2714157549943045e-06, "loss": 0.4524, "step": 27441 }, { "epoch": 0.7, "grad_norm": 2.4658315181732178, "learning_rate": 4.270735344538768e-06, "loss": 0.4695, "step": 27442 }, { "epoch": 0.7, "grad_norm": 2.520836591720581, "learning_rate": 4.270054973565721e-06, "loss": 0.6331, "step": 27443 }, { "epoch": 0.7, "grad_norm": 1.8792154788970947, "learning_rate": 4.2693746420798585e-06, "loss": 0.6495, "step": 27444 }, { "epoch": 0.7, "grad_norm": 5.7323527336120605, "learning_rate": 4.268694350085866e-06, "loss": 0.5724, "step": 27445 }, { "epoch": 0.7, "grad_norm": 0.9863436818122864, "learning_rate": 4.268014097588431e-06, "loss": 0.5921, "step": 27446 }, { "epoch": 0.7, "grad_norm": 3.072463035583496, "learning_rate": 4.267333884592237e-06, "loss": 0.4936, "step": 27447 }, { "epoch": 0.7, "grad_norm": 1.683766484260559, "learning_rate": 4.2666537111019816e-06, "loss": 0.5525, "step": 27448 }, { "epoch": 0.7, "grad_norm": 4.055680751800537, "learning_rate": 4.265973577122345e-06, "loss": 0.613, "step": 27449 }, { "epoch": 0.7, "grad_norm": 1.592956781387329, "learning_rate": 4.265293482658014e-06, "loss": 0.4671, "step": 27450 }, { "epoch": 0.7, "grad_norm": 4.835338592529297, "learning_rate": 4.264613427713681e-06, "loss": 0.6373, "step": 27451 }, { "epoch": 0.7, "grad_norm": 5.115451335906982, "learning_rate": 4.263933412294028e-06, "loss": 0.6288, "step": 27452 }, { "epoch": 0.7, "grad_norm": 1.432442545890808, "learning_rate": 4.263253436403743e-06, "loss": 0.4786, "step": 27453 }, { "epoch": 0.7, "grad_norm": 1.2628175020217896, "learning_rate": 4.262573500047508e-06, "loss": 0.5803, "step": 27454 }, { "epoch": 0.7, "grad_norm": 0.8773154616355896, "learning_rate": 4.261893603230014e-06, "loss": 0.4324, "step": 27455 }, { "epoch": 0.7, "grad_norm": 5.347254753112793, "learning_rate": 4.261213745955945e-06, "loss": 0.7713, "step": 27456 }, { "epoch": 0.7, "grad_norm": 1.3937855958938599, "learning_rate": 4.260533928229982e-06, "loss": 0.4868, "step": 27457 }, { "epoch": 0.7, "grad_norm": 1.4968452453613281, "learning_rate": 4.259854150056816e-06, "loss": 0.4312, "step": 27458 }, { "epoch": 0.7, "grad_norm": 2.8618037700653076, "learning_rate": 4.259174411441129e-06, "loss": 0.4214, "step": 27459 }, { "epoch": 0.7, "grad_norm": 1.6445459127426147, "learning_rate": 4.258494712387602e-06, "loss": 0.6016, "step": 27460 }, { "epoch": 0.7, "grad_norm": 1.1640167236328125, "learning_rate": 4.257815052900926e-06, "loss": 0.474, "step": 27461 }, { "epoch": 0.7, "grad_norm": 1.2374505996704102, "learning_rate": 4.257135432985782e-06, "loss": 0.5128, "step": 27462 }, { "epoch": 0.7, "grad_norm": 1.5175669193267822, "learning_rate": 4.256455852646851e-06, "loss": 0.5675, "step": 27463 }, { "epoch": 0.7, "grad_norm": 1.7422298192977905, "learning_rate": 4.2557763118888155e-06, "loss": 0.5158, "step": 27464 }, { "epoch": 0.7, "grad_norm": 2.966320514678955, "learning_rate": 4.255096810716362e-06, "loss": 0.6494, "step": 27465 }, { "epoch": 0.7, "grad_norm": 1.1344780921936035, "learning_rate": 4.254417349134175e-06, "loss": 0.4793, "step": 27466 }, { "epoch": 0.7, "grad_norm": 1.827782154083252, "learning_rate": 4.2537379271469285e-06, "loss": 0.5781, "step": 27467 }, { "epoch": 0.7, "grad_norm": 1.9077060222625732, "learning_rate": 4.253058544759315e-06, "loss": 0.6165, "step": 27468 }, { "epoch": 0.7, "grad_norm": 1.756203293800354, "learning_rate": 4.252379201976009e-06, "loss": 0.5552, "step": 27469 }, { "epoch": 0.7, "grad_norm": 4.718979358673096, "learning_rate": 4.251699898801697e-06, "loss": 0.7868, "step": 27470 }, { "epoch": 0.7, "grad_norm": 1.634605050086975, "learning_rate": 4.251020635241053e-06, "loss": 0.5703, "step": 27471 }, { "epoch": 0.7, "grad_norm": 1.8878023624420166, "learning_rate": 4.2503414112987665e-06, "loss": 0.4805, "step": 27472 }, { "epoch": 0.7, "grad_norm": 1.7375167608261108, "learning_rate": 4.249662226979515e-06, "loss": 0.5518, "step": 27473 }, { "epoch": 0.7, "grad_norm": 6.957272529602051, "learning_rate": 4.248983082287975e-06, "loss": 0.5989, "step": 27474 }, { "epoch": 0.7, "grad_norm": 2.329313039779663, "learning_rate": 4.248303977228834e-06, "loss": 0.46, "step": 27475 }, { "epoch": 0.7, "grad_norm": 1.0623036623001099, "learning_rate": 4.247624911806769e-06, "loss": 0.5046, "step": 27476 }, { "epoch": 0.7, "grad_norm": 1.851582646369934, "learning_rate": 4.246945886026459e-06, "loss": 0.5728, "step": 27477 }, { "epoch": 0.7, "grad_norm": 3.1296768188476562, "learning_rate": 4.246266899892579e-06, "loss": 0.3971, "step": 27478 }, { "epoch": 0.7, "grad_norm": 4.567483425140381, "learning_rate": 4.245587953409818e-06, "loss": 0.5079, "step": 27479 }, { "epoch": 0.7, "grad_norm": 1.1158745288848877, "learning_rate": 4.244909046582848e-06, "loss": 0.4835, "step": 27480 }, { "epoch": 0.7, "grad_norm": 6.239772319793701, "learning_rate": 4.244230179416346e-06, "loss": 0.5716, "step": 27481 }, { "epoch": 0.7, "grad_norm": 1.2164676189422607, "learning_rate": 4.243551351914998e-06, "loss": 0.4601, "step": 27482 }, { "epoch": 0.7, "grad_norm": 1.6346375942230225, "learning_rate": 4.242872564083476e-06, "loss": 0.6711, "step": 27483 }, { "epoch": 0.7, "grad_norm": 2.778935670852661, "learning_rate": 4.242193815926457e-06, "loss": 0.2887, "step": 27484 }, { "epoch": 0.7, "grad_norm": 3.4179069995880127, "learning_rate": 4.241515107448622e-06, "loss": 0.6046, "step": 27485 }, { "epoch": 0.7, "grad_norm": 1.4316169023513794, "learning_rate": 4.240836438654649e-06, "loss": 0.499, "step": 27486 }, { "epoch": 0.7, "grad_norm": 1.075072169303894, "learning_rate": 4.240157809549214e-06, "loss": 0.4864, "step": 27487 }, { "epoch": 0.7, "grad_norm": 1.5003961324691772, "learning_rate": 4.2394792201369875e-06, "loss": 0.5002, "step": 27488 }, { "epoch": 0.7, "grad_norm": 4.8313446044921875, "learning_rate": 4.238800670422655e-06, "loss": 0.618, "step": 27489 }, { "epoch": 0.7, "grad_norm": 1.4616893529891968, "learning_rate": 4.238122160410889e-06, "loss": 0.5352, "step": 27490 }, { "epoch": 0.7, "grad_norm": 1.843002200126648, "learning_rate": 4.237443690106361e-06, "loss": 0.3643, "step": 27491 }, { "epoch": 0.7, "grad_norm": 0.8857018947601318, "learning_rate": 4.236765259513759e-06, "loss": 0.4964, "step": 27492 }, { "epoch": 0.7, "grad_norm": 2.666381359100342, "learning_rate": 4.236086868637746e-06, "loss": 0.597, "step": 27493 }, { "epoch": 0.7, "grad_norm": 1.8628275394439697, "learning_rate": 4.235408517482996e-06, "loss": 0.6308, "step": 27494 }, { "epoch": 0.7, "grad_norm": 5.502317905426025, "learning_rate": 4.234730206054194e-06, "loss": 0.5251, "step": 27495 }, { "epoch": 0.7, "grad_norm": 2.9519121646881104, "learning_rate": 4.234051934356008e-06, "loss": 0.6744, "step": 27496 }, { "epoch": 0.7, "grad_norm": 1.6671561002731323, "learning_rate": 4.233373702393111e-06, "loss": 0.5172, "step": 27497 }, { "epoch": 0.7, "grad_norm": 6.459103584289551, "learning_rate": 4.232695510170183e-06, "loss": 0.5944, "step": 27498 }, { "epoch": 0.7, "grad_norm": 1.0569616556167603, "learning_rate": 4.232017357691894e-06, "loss": 0.5265, "step": 27499 }, { "epoch": 0.7, "grad_norm": 1.0132018327713013, "learning_rate": 4.231339244962917e-06, "loss": 0.4734, "step": 27500 }, { "epoch": 0.7, "grad_norm": 1.1803807020187378, "learning_rate": 4.230661171987923e-06, "loss": 0.3831, "step": 27501 }, { "epoch": 0.7, "grad_norm": 1.6844627857208252, "learning_rate": 4.229983138771592e-06, "loss": 0.5054, "step": 27502 }, { "epoch": 0.7, "grad_norm": 3.979034900665283, "learning_rate": 4.229305145318591e-06, "loss": 0.6438, "step": 27503 }, { "epoch": 0.7, "grad_norm": 1.4780092239379883, "learning_rate": 4.22862719163359e-06, "loss": 0.5857, "step": 27504 }, { "epoch": 0.7, "grad_norm": 5.638798236846924, "learning_rate": 4.227949277721267e-06, "loss": 0.8021, "step": 27505 }, { "epoch": 0.7, "grad_norm": 2.715392827987671, "learning_rate": 4.227271403586292e-06, "loss": 0.6549, "step": 27506 }, { "epoch": 0.71, "grad_norm": 6.063597202301025, "learning_rate": 4.226593569233336e-06, "loss": 0.6132, "step": 27507 }, { "epoch": 0.71, "grad_norm": 1.8805867433547974, "learning_rate": 4.225915774667067e-06, "loss": 0.4749, "step": 27508 }, { "epoch": 0.71, "grad_norm": 1.282414197921753, "learning_rate": 4.225238019892161e-06, "loss": 0.5803, "step": 27509 }, { "epoch": 0.71, "grad_norm": 1.1297158002853394, "learning_rate": 4.224560304913286e-06, "loss": 0.5519, "step": 27510 }, { "epoch": 0.71, "grad_norm": 1.2238246202468872, "learning_rate": 4.223882629735111e-06, "loss": 0.593, "step": 27511 }, { "epoch": 0.71, "grad_norm": 1.2430373430252075, "learning_rate": 4.223204994362311e-06, "loss": 0.4667, "step": 27512 }, { "epoch": 0.71, "grad_norm": 1.8017632961273193, "learning_rate": 4.222527398799553e-06, "loss": 0.5681, "step": 27513 }, { "epoch": 0.71, "grad_norm": 1.3060392141342163, "learning_rate": 4.2218498430515034e-06, "loss": 0.6033, "step": 27514 }, { "epoch": 0.71, "grad_norm": 0.7742681503295898, "learning_rate": 4.221172327122836e-06, "loss": 0.3397, "step": 27515 }, { "epoch": 0.71, "grad_norm": 1.0226529836654663, "learning_rate": 4.220494851018221e-06, "loss": 0.4441, "step": 27516 }, { "epoch": 0.71, "grad_norm": 1.0531548261642456, "learning_rate": 4.219817414742322e-06, "loss": 0.4165, "step": 27517 }, { "epoch": 0.71, "grad_norm": 2.2799158096313477, "learning_rate": 4.219140018299807e-06, "loss": 0.5493, "step": 27518 }, { "epoch": 0.71, "grad_norm": 1.876599907875061, "learning_rate": 4.218462661695352e-06, "loss": 0.6963, "step": 27519 }, { "epoch": 0.71, "grad_norm": 2.2428314685821533, "learning_rate": 4.217785344933619e-06, "loss": 0.5625, "step": 27520 }, { "epoch": 0.71, "grad_norm": 1.3009376525878906, "learning_rate": 4.217108068019273e-06, "loss": 0.5273, "step": 27521 }, { "epoch": 0.71, "grad_norm": 2.070477247238159, "learning_rate": 4.216430830956989e-06, "loss": 0.5275, "step": 27522 }, { "epoch": 0.71, "grad_norm": 3.4367268085479736, "learning_rate": 4.215753633751429e-06, "loss": 0.4712, "step": 27523 }, { "epoch": 0.71, "grad_norm": 1.5818172693252563, "learning_rate": 4.215076476407263e-06, "loss": 0.4458, "step": 27524 }, { "epoch": 0.71, "grad_norm": 1.5640218257904053, "learning_rate": 4.21439935892915e-06, "loss": 0.474, "step": 27525 }, { "epoch": 0.71, "grad_norm": 1.8283663988113403, "learning_rate": 4.213722281321766e-06, "loss": 0.5998, "step": 27526 }, { "epoch": 0.71, "grad_norm": 1.4988065958023071, "learning_rate": 4.213045243589773e-06, "loss": 0.6454, "step": 27527 }, { "epoch": 0.71, "grad_norm": 2.3441054821014404, "learning_rate": 4.212368245737833e-06, "loss": 0.5606, "step": 27528 }, { "epoch": 0.71, "grad_norm": 8.060539245605469, "learning_rate": 4.211691287770619e-06, "loss": 0.6155, "step": 27529 }, { "epoch": 0.71, "grad_norm": 1.1756961345672607, "learning_rate": 4.211014369692791e-06, "loss": 0.563, "step": 27530 }, { "epoch": 0.71, "grad_norm": 1.3706238269805908, "learning_rate": 4.210337491509014e-06, "loss": 0.5618, "step": 27531 }, { "epoch": 0.71, "grad_norm": 2.8412883281707764, "learning_rate": 4.209660653223951e-06, "loss": 0.4029, "step": 27532 }, { "epoch": 0.71, "grad_norm": 1.4922345876693726, "learning_rate": 4.208983854842273e-06, "loss": 0.559, "step": 27533 }, { "epoch": 0.71, "grad_norm": 0.943701446056366, "learning_rate": 4.20830709636864e-06, "loss": 0.6583, "step": 27534 }, { "epoch": 0.71, "grad_norm": 1.2561070919036865, "learning_rate": 4.207630377807712e-06, "loss": 0.6239, "step": 27535 }, { "epoch": 0.71, "grad_norm": 1.3067692518234253, "learning_rate": 4.2069536991641605e-06, "loss": 0.5367, "step": 27536 }, { "epoch": 0.71, "grad_norm": 1.1038323640823364, "learning_rate": 4.206277060442644e-06, "loss": 0.4902, "step": 27537 }, { "epoch": 0.71, "grad_norm": 0.9068986773490906, "learning_rate": 4.205600461647822e-06, "loss": 0.4348, "step": 27538 }, { "epoch": 0.71, "grad_norm": 1.8163982629776, "learning_rate": 4.204923902784365e-06, "loss": 0.5731, "step": 27539 }, { "epoch": 0.71, "grad_norm": 1.0170609951019287, "learning_rate": 4.204247383856932e-06, "loss": 0.6, "step": 27540 }, { "epoch": 0.71, "grad_norm": 3.7048254013061523, "learning_rate": 4.203570904870184e-06, "loss": 0.5122, "step": 27541 }, { "epoch": 0.71, "grad_norm": 2.382108449935913, "learning_rate": 4.202894465828781e-06, "loss": 0.567, "step": 27542 }, { "epoch": 0.71, "grad_norm": 4.7289958000183105, "learning_rate": 4.20221806673739e-06, "loss": 0.6731, "step": 27543 }, { "epoch": 0.71, "grad_norm": 1.7792448997497559, "learning_rate": 4.20154170760067e-06, "loss": 0.6265, "step": 27544 }, { "epoch": 0.71, "grad_norm": 13.125478744506836, "learning_rate": 4.2008653884232786e-06, "loss": 0.7002, "step": 27545 }, { "epoch": 0.71, "grad_norm": 1.8117618560791016, "learning_rate": 4.200189109209882e-06, "loss": 0.661, "step": 27546 }, { "epoch": 0.71, "grad_norm": 4.81997537612915, "learning_rate": 4.199512869965139e-06, "loss": 0.5322, "step": 27547 }, { "epoch": 0.71, "grad_norm": 1.5834957361221313, "learning_rate": 4.198836670693709e-06, "loss": 0.7587, "step": 27548 }, { "epoch": 0.71, "grad_norm": 5.9178900718688965, "learning_rate": 4.198160511400249e-06, "loss": 0.4578, "step": 27549 }, { "epoch": 0.71, "grad_norm": 9.089818000793457, "learning_rate": 4.197484392089424e-06, "loss": 0.578, "step": 27550 }, { "epoch": 0.71, "grad_norm": 1.4067823886871338, "learning_rate": 4.196808312765892e-06, "loss": 0.5621, "step": 27551 }, { "epoch": 0.71, "grad_norm": 1.7144815921783447, "learning_rate": 4.196132273434307e-06, "loss": 0.5066, "step": 27552 }, { "epoch": 0.71, "grad_norm": 5.248351573944092, "learning_rate": 4.195456274099337e-06, "loss": 0.4615, "step": 27553 }, { "epoch": 0.71, "grad_norm": 1.3129054307937622, "learning_rate": 4.194780314765633e-06, "loss": 0.6738, "step": 27554 }, { "epoch": 0.71, "grad_norm": 1.0689060688018799, "learning_rate": 4.194104395437854e-06, "loss": 0.4302, "step": 27555 }, { "epoch": 0.71, "grad_norm": 1.1613788604736328, "learning_rate": 4.193428516120663e-06, "loss": 0.5734, "step": 27556 }, { "epoch": 0.71, "grad_norm": 1.551377534866333, "learning_rate": 4.1927526768187155e-06, "loss": 0.5991, "step": 27557 }, { "epoch": 0.71, "grad_norm": 2.8771884441375732, "learning_rate": 4.192076877536667e-06, "loss": 0.3252, "step": 27558 }, { "epoch": 0.71, "grad_norm": 1.3941853046417236, "learning_rate": 4.191401118279172e-06, "loss": 0.5128, "step": 27559 }, { "epoch": 0.71, "grad_norm": 1.25083327293396, "learning_rate": 4.190725399050896e-06, "loss": 0.3408, "step": 27560 }, { "epoch": 0.71, "grad_norm": 1.3141021728515625, "learning_rate": 4.19004971985649e-06, "loss": 0.3271, "step": 27561 }, { "epoch": 0.71, "grad_norm": 1.3025217056274414, "learning_rate": 4.189374080700608e-06, "loss": 0.4914, "step": 27562 }, { "epoch": 0.71, "grad_norm": 1.4598582983016968, "learning_rate": 4.188698481587913e-06, "loss": 0.6097, "step": 27563 }, { "epoch": 0.71, "grad_norm": 1.479843020439148, "learning_rate": 4.188022922523056e-06, "loss": 0.56, "step": 27564 }, { "epoch": 0.71, "grad_norm": 1.6232277154922485, "learning_rate": 4.1873474035106955e-06, "loss": 0.525, "step": 27565 }, { "epoch": 0.71, "grad_norm": 1.520203709602356, "learning_rate": 4.18667192455548e-06, "loss": 0.659, "step": 27566 }, { "epoch": 0.71, "grad_norm": 2.978848695755005, "learning_rate": 4.185996485662072e-06, "loss": 0.5995, "step": 27567 }, { "epoch": 0.71, "grad_norm": 2.7128372192382812, "learning_rate": 4.1853210868351245e-06, "loss": 0.6387, "step": 27568 }, { "epoch": 0.71, "grad_norm": 9.973488807678223, "learning_rate": 4.184645728079288e-06, "loss": 0.6535, "step": 27569 }, { "epoch": 0.71, "grad_norm": 2.3385515213012695, "learning_rate": 4.183970409399222e-06, "loss": 0.6473, "step": 27570 }, { "epoch": 0.71, "grad_norm": 3.562796115875244, "learning_rate": 4.183295130799578e-06, "loss": 0.4815, "step": 27571 }, { "epoch": 0.71, "grad_norm": 1.185437798500061, "learning_rate": 4.182619892285011e-06, "loss": 0.5038, "step": 27572 }, { "epoch": 0.71, "grad_norm": 1.8345061540603638, "learning_rate": 4.181944693860167e-06, "loss": 0.4793, "step": 27573 }, { "epoch": 0.71, "grad_norm": 1.4658805131912231, "learning_rate": 4.181269535529709e-06, "loss": 0.525, "step": 27574 }, { "epoch": 0.71, "grad_norm": 1.5828062295913696, "learning_rate": 4.180594417298287e-06, "loss": 0.5426, "step": 27575 }, { "epoch": 0.71, "grad_norm": 4.391556262969971, "learning_rate": 4.179919339170549e-06, "loss": 0.5093, "step": 27576 }, { "epoch": 0.71, "grad_norm": 1.7591392993927002, "learning_rate": 4.1792443011511516e-06, "loss": 0.6109, "step": 27577 }, { "epoch": 0.71, "grad_norm": 1.89454185962677, "learning_rate": 4.178569303244747e-06, "loss": 0.5074, "step": 27578 }, { "epoch": 0.71, "grad_norm": 1.7572380304336548, "learning_rate": 4.177894345455982e-06, "loss": 0.5065, "step": 27579 }, { "epoch": 0.71, "grad_norm": 1.147594690322876, "learning_rate": 4.177219427789514e-06, "loss": 0.4822, "step": 27580 }, { "epoch": 0.71, "grad_norm": 1.4281138181686401, "learning_rate": 4.176544550249992e-06, "loss": 0.5527, "step": 27581 }, { "epoch": 0.71, "grad_norm": 1.4073554277420044, "learning_rate": 4.175869712842066e-06, "loss": 0.5073, "step": 27582 }, { "epoch": 0.71, "grad_norm": 1.3248509168624878, "learning_rate": 4.175194915570384e-06, "loss": 0.5757, "step": 27583 }, { "epoch": 0.71, "grad_norm": 1.2807220220565796, "learning_rate": 4.1745201584396026e-06, "loss": 0.5816, "step": 27584 }, { "epoch": 0.71, "grad_norm": 1.0010457038879395, "learning_rate": 4.1738454414543685e-06, "loss": 0.4719, "step": 27585 }, { "epoch": 0.71, "grad_norm": 1.6388261318206787, "learning_rate": 4.173170764619326e-06, "loss": 0.5436, "step": 27586 }, { "epoch": 0.71, "grad_norm": 2.499408483505249, "learning_rate": 4.172496127939135e-06, "loss": 0.5931, "step": 27587 }, { "epoch": 0.71, "grad_norm": 3.8126864433288574, "learning_rate": 4.171821531418439e-06, "loss": 0.5807, "step": 27588 }, { "epoch": 0.71, "grad_norm": 1.63462495803833, "learning_rate": 4.171146975061888e-06, "loss": 0.6688, "step": 27589 }, { "epoch": 0.71, "grad_norm": 2.9118664264678955, "learning_rate": 4.170472458874126e-06, "loss": 0.5128, "step": 27590 }, { "epoch": 0.71, "grad_norm": 1.7738746404647827, "learning_rate": 4.169797982859809e-06, "loss": 0.6344, "step": 27591 }, { "epoch": 0.71, "grad_norm": 1.4462734460830688, "learning_rate": 4.169123547023581e-06, "loss": 0.5071, "step": 27592 }, { "epoch": 0.71, "grad_norm": 1.7665280103683472, "learning_rate": 4.168449151370087e-06, "loss": 0.6111, "step": 27593 }, { "epoch": 0.71, "grad_norm": 4.757059574127197, "learning_rate": 4.167774795903982e-06, "loss": 0.6629, "step": 27594 }, { "epoch": 0.71, "grad_norm": 3.680135726928711, "learning_rate": 4.167100480629907e-06, "loss": 0.503, "step": 27595 }, { "epoch": 0.71, "grad_norm": 1.3538880348205566, "learning_rate": 4.166426205552508e-06, "loss": 0.5553, "step": 27596 }, { "epoch": 0.71, "grad_norm": 1.3257254362106323, "learning_rate": 4.16575197067644e-06, "loss": 0.561, "step": 27597 }, { "epoch": 0.71, "grad_norm": 2.102186679840088, "learning_rate": 4.165077776006342e-06, "loss": 0.4604, "step": 27598 }, { "epoch": 0.71, "grad_norm": 3.8034908771514893, "learning_rate": 4.164403621546863e-06, "loss": 0.5218, "step": 27599 }, { "epoch": 0.71, "grad_norm": 1.2357022762298584, "learning_rate": 4.163729507302644e-06, "loss": 0.3292, "step": 27600 }, { "epoch": 0.71, "grad_norm": 1.8916404247283936, "learning_rate": 4.163055433278338e-06, "loss": 0.5706, "step": 27601 }, { "epoch": 0.71, "grad_norm": 2.1224889755249023, "learning_rate": 4.1623813994785865e-06, "loss": 0.6419, "step": 27602 }, { "epoch": 0.71, "grad_norm": 1.505118727684021, "learning_rate": 4.161707405908032e-06, "loss": 0.4738, "step": 27603 }, { "epoch": 0.71, "grad_norm": 1.4194045066833496, "learning_rate": 4.161033452571326e-06, "loss": 0.5175, "step": 27604 }, { "epoch": 0.71, "grad_norm": 1.4560048580169678, "learning_rate": 4.160359539473109e-06, "loss": 0.5801, "step": 27605 }, { "epoch": 0.71, "grad_norm": 5.997679710388184, "learning_rate": 4.159685666618024e-06, "loss": 0.3906, "step": 27606 }, { "epoch": 0.71, "grad_norm": 1.1465260982513428, "learning_rate": 4.159011834010713e-06, "loss": 0.4857, "step": 27607 }, { "epoch": 0.71, "grad_norm": 2.188889741897583, "learning_rate": 4.158338041655827e-06, "loss": 0.4888, "step": 27608 }, { "epoch": 0.71, "grad_norm": 4.668880939483643, "learning_rate": 4.1576642895580045e-06, "loss": 0.5438, "step": 27609 }, { "epoch": 0.71, "grad_norm": 1.6406683921813965, "learning_rate": 4.1569905777218866e-06, "loss": 0.3718, "step": 27610 }, { "epoch": 0.71, "grad_norm": 1.319309115409851, "learning_rate": 4.156316906152122e-06, "loss": 0.4801, "step": 27611 }, { "epoch": 0.71, "grad_norm": 1.326870322227478, "learning_rate": 4.155643274853349e-06, "loss": 0.3918, "step": 27612 }, { "epoch": 0.71, "grad_norm": 6.060835838317871, "learning_rate": 4.154969683830211e-06, "loss": 0.5835, "step": 27613 }, { "epoch": 0.71, "grad_norm": 4.021490573883057, "learning_rate": 4.1542961330873465e-06, "loss": 0.4484, "step": 27614 }, { "epoch": 0.71, "grad_norm": 2.548661231994629, "learning_rate": 4.153622622629404e-06, "loss": 0.4468, "step": 27615 }, { "epoch": 0.71, "grad_norm": 1.7447361946105957, "learning_rate": 4.152949152461021e-06, "loss": 0.6873, "step": 27616 }, { "epoch": 0.71, "grad_norm": 3.1708881855010986, "learning_rate": 4.152275722586836e-06, "loss": 0.5866, "step": 27617 }, { "epoch": 0.71, "grad_norm": 6.032890319824219, "learning_rate": 4.151602333011496e-06, "loss": 0.5468, "step": 27618 }, { "epoch": 0.71, "grad_norm": 1.7904762029647827, "learning_rate": 4.150928983739639e-06, "loss": 0.4946, "step": 27619 }, { "epoch": 0.71, "grad_norm": 1.1302073001861572, "learning_rate": 4.150255674775899e-06, "loss": 0.3719, "step": 27620 }, { "epoch": 0.71, "grad_norm": 1.8322080373764038, "learning_rate": 4.149582406124928e-06, "loss": 0.5528, "step": 27621 }, { "epoch": 0.71, "grad_norm": 2.881824254989624, "learning_rate": 4.148909177791358e-06, "loss": 0.602, "step": 27622 }, { "epoch": 0.71, "grad_norm": 3.0981557369232178, "learning_rate": 4.1482359897798306e-06, "loss": 0.4785, "step": 27623 }, { "epoch": 0.71, "grad_norm": 2.2035467624664307, "learning_rate": 4.14756284209498e-06, "loss": 0.573, "step": 27624 }, { "epoch": 0.71, "grad_norm": 2.5338780879974365, "learning_rate": 4.146889734741454e-06, "loss": 0.3893, "step": 27625 }, { "epoch": 0.71, "grad_norm": 1.4319761991500854, "learning_rate": 4.146216667723886e-06, "loss": 0.533, "step": 27626 }, { "epoch": 0.71, "grad_norm": 2.4724602699279785, "learning_rate": 4.145543641046911e-06, "loss": 0.543, "step": 27627 }, { "epoch": 0.71, "grad_norm": 1.5673686265945435, "learning_rate": 4.144870654715175e-06, "loss": 0.4509, "step": 27628 }, { "epoch": 0.71, "grad_norm": 1.4029377698898315, "learning_rate": 4.144197708733312e-06, "loss": 0.6893, "step": 27629 }, { "epoch": 0.71, "grad_norm": 2.1345279216766357, "learning_rate": 4.143524803105959e-06, "loss": 0.5721, "step": 27630 }, { "epoch": 0.71, "grad_norm": 1.433444857597351, "learning_rate": 4.1428519378377505e-06, "loss": 0.5129, "step": 27631 }, { "epoch": 0.71, "grad_norm": 2.2563536167144775, "learning_rate": 4.142179112933331e-06, "loss": 0.6479, "step": 27632 }, { "epoch": 0.71, "grad_norm": 1.3486207723617554, "learning_rate": 4.141506328397332e-06, "loss": 0.3476, "step": 27633 }, { "epoch": 0.71, "grad_norm": 1.3030411005020142, "learning_rate": 4.140833584234387e-06, "loss": 0.4719, "step": 27634 }, { "epoch": 0.71, "grad_norm": 1.2619757652282715, "learning_rate": 4.14016088044914e-06, "loss": 0.4078, "step": 27635 }, { "epoch": 0.71, "grad_norm": 5.41951322555542, "learning_rate": 4.1394882170462224e-06, "loss": 0.4768, "step": 27636 }, { "epoch": 0.71, "grad_norm": 1.6054104566574097, "learning_rate": 4.138815594030267e-06, "loss": 0.4831, "step": 27637 }, { "epoch": 0.71, "grad_norm": 5.890485763549805, "learning_rate": 4.138143011405915e-06, "loss": 0.84, "step": 27638 }, { "epoch": 0.71, "grad_norm": 2.113389253616333, "learning_rate": 4.137470469177799e-06, "loss": 0.5248, "step": 27639 }, { "epoch": 0.71, "grad_norm": 1.951701045036316, "learning_rate": 4.136797967350552e-06, "loss": 0.5819, "step": 27640 }, { "epoch": 0.71, "grad_norm": 1.2456287145614624, "learning_rate": 4.136125505928808e-06, "loss": 0.4062, "step": 27641 }, { "epoch": 0.71, "grad_norm": 2.2468183040618896, "learning_rate": 4.1354530849172055e-06, "loss": 0.4704, "step": 27642 }, { "epoch": 0.71, "grad_norm": 1.253783941268921, "learning_rate": 4.134780704320376e-06, "loss": 0.4356, "step": 27643 }, { "epoch": 0.71, "grad_norm": 1.8794732093811035, "learning_rate": 4.1341083641429506e-06, "loss": 0.7332, "step": 27644 }, { "epoch": 0.71, "grad_norm": 1.8617706298828125, "learning_rate": 4.133436064389566e-06, "loss": 0.5461, "step": 27645 }, { "epoch": 0.71, "grad_norm": 2.597257137298584, "learning_rate": 4.132763805064857e-06, "loss": 0.5451, "step": 27646 }, { "epoch": 0.71, "grad_norm": 1.5407661199569702, "learning_rate": 4.132091586173452e-06, "loss": 0.5988, "step": 27647 }, { "epoch": 0.71, "grad_norm": 2.518535614013672, "learning_rate": 4.131419407719983e-06, "loss": 0.5911, "step": 27648 }, { "epoch": 0.71, "grad_norm": 1.2594470977783203, "learning_rate": 4.130747269709088e-06, "loss": 0.5073, "step": 27649 }, { "epoch": 0.71, "grad_norm": 2.176384449005127, "learning_rate": 4.130075172145394e-06, "loss": 0.5535, "step": 27650 }, { "epoch": 0.71, "grad_norm": 1.3701809644699097, "learning_rate": 4.129403115033531e-06, "loss": 0.5798, "step": 27651 }, { "epoch": 0.71, "grad_norm": 1.526537537574768, "learning_rate": 4.1287310983781374e-06, "loss": 0.3949, "step": 27652 }, { "epoch": 0.71, "grad_norm": 5.475710868835449, "learning_rate": 4.128059122183841e-06, "loss": 0.4058, "step": 27653 }, { "epoch": 0.71, "grad_norm": 1.2536770105361938, "learning_rate": 4.1273871864552704e-06, "loss": 0.5548, "step": 27654 }, { "epoch": 0.71, "grad_norm": 1.7476277351379395, "learning_rate": 4.126715291197056e-06, "loss": 0.4448, "step": 27655 }, { "epoch": 0.71, "grad_norm": 1.3111357688903809, "learning_rate": 4.126043436413831e-06, "loss": 0.587, "step": 27656 }, { "epoch": 0.71, "grad_norm": 1.4186275005340576, "learning_rate": 4.125371622110226e-06, "loss": 0.5737, "step": 27657 }, { "epoch": 0.71, "grad_norm": 1.3230066299438477, "learning_rate": 4.124699848290865e-06, "loss": 0.4, "step": 27658 }, { "epoch": 0.71, "grad_norm": 1.432287335395813, "learning_rate": 4.124028114960385e-06, "loss": 0.4439, "step": 27659 }, { "epoch": 0.71, "grad_norm": 1.8534274101257324, "learning_rate": 4.123356422123413e-06, "loss": 0.535, "step": 27660 }, { "epoch": 0.71, "grad_norm": 5.8481035232543945, "learning_rate": 4.122684769784571e-06, "loss": 0.5615, "step": 27661 }, { "epoch": 0.71, "grad_norm": 1.100841760635376, "learning_rate": 4.122013157948497e-06, "loss": 0.481, "step": 27662 }, { "epoch": 0.71, "grad_norm": 1.3413820266723633, "learning_rate": 4.121341586619816e-06, "loss": 0.5639, "step": 27663 }, { "epoch": 0.71, "grad_norm": 1.3043510913848877, "learning_rate": 4.120670055803155e-06, "loss": 0.5425, "step": 27664 }, { "epoch": 0.71, "grad_norm": 1.080431342124939, "learning_rate": 4.1199985655031386e-06, "loss": 0.4759, "step": 27665 }, { "epoch": 0.71, "grad_norm": 1.2164477109909058, "learning_rate": 4.119327115724401e-06, "loss": 0.5935, "step": 27666 }, { "epoch": 0.71, "grad_norm": 2.0651121139526367, "learning_rate": 4.118655706471567e-06, "loss": 0.531, "step": 27667 }, { "epoch": 0.71, "grad_norm": 13.129945755004883, "learning_rate": 4.117984337749259e-06, "loss": 0.5333, "step": 27668 }, { "epoch": 0.71, "grad_norm": 1.1670644283294678, "learning_rate": 4.117313009562112e-06, "loss": 0.4483, "step": 27669 }, { "epoch": 0.71, "grad_norm": 1.3805561065673828, "learning_rate": 4.116641721914747e-06, "loss": 0.5087, "step": 27670 }, { "epoch": 0.71, "grad_norm": 1.6245617866516113, "learning_rate": 4.115970474811792e-06, "loss": 0.4222, "step": 27671 }, { "epoch": 0.71, "grad_norm": 1.6524525880813599, "learning_rate": 4.115299268257866e-06, "loss": 0.5089, "step": 27672 }, { "epoch": 0.71, "grad_norm": 11.961386680603027, "learning_rate": 4.114628102257605e-06, "loss": 0.6198, "step": 27673 }, { "epoch": 0.71, "grad_norm": 1.4223098754882812, "learning_rate": 4.113956976815631e-06, "loss": 0.4897, "step": 27674 }, { "epoch": 0.71, "grad_norm": 1.6479377746582031, "learning_rate": 4.113285891936562e-06, "loss": 0.5265, "step": 27675 }, { "epoch": 0.71, "grad_norm": 1.5136266946792603, "learning_rate": 4.112614847625033e-06, "loss": 0.5749, "step": 27676 }, { "epoch": 0.71, "grad_norm": 1.9039615392684937, "learning_rate": 4.111943843885663e-06, "loss": 0.5427, "step": 27677 }, { "epoch": 0.71, "grad_norm": 1.6777468919754028, "learning_rate": 4.111272880723076e-06, "loss": 0.6003, "step": 27678 }, { "epoch": 0.71, "grad_norm": 1.1037644147872925, "learning_rate": 4.110601958141895e-06, "loss": 0.5159, "step": 27679 }, { "epoch": 0.71, "grad_norm": 1.2845680713653564, "learning_rate": 4.109931076146747e-06, "loss": 0.5812, "step": 27680 }, { "epoch": 0.71, "grad_norm": 2.8326261043548584, "learning_rate": 4.109260234742255e-06, "loss": 0.829, "step": 27681 }, { "epoch": 0.71, "grad_norm": 2.3144679069519043, "learning_rate": 4.1085894339330365e-06, "loss": 0.5744, "step": 27682 }, { "epoch": 0.71, "grad_norm": 0.9769478440284729, "learning_rate": 4.107918673723721e-06, "loss": 0.3346, "step": 27683 }, { "epoch": 0.71, "grad_norm": 1.2548314332962036, "learning_rate": 4.107247954118929e-06, "loss": 0.445, "step": 27684 }, { "epoch": 0.71, "grad_norm": 1.560272455215454, "learning_rate": 4.106577275123279e-06, "loss": 0.4588, "step": 27685 }, { "epoch": 0.71, "grad_norm": 1.239889144897461, "learning_rate": 4.105906636741398e-06, "loss": 0.3672, "step": 27686 }, { "epoch": 0.71, "grad_norm": 1.414095163345337, "learning_rate": 4.105236038977907e-06, "loss": 0.4706, "step": 27687 }, { "epoch": 0.71, "grad_norm": 13.729620933532715, "learning_rate": 4.1045654818374235e-06, "loss": 0.6882, "step": 27688 }, { "epoch": 0.71, "grad_norm": 3.7586886882781982, "learning_rate": 4.1038949653245695e-06, "loss": 0.6399, "step": 27689 }, { "epoch": 0.71, "grad_norm": 6.325520992279053, "learning_rate": 4.10322448944397e-06, "loss": 0.4833, "step": 27690 }, { "epoch": 0.71, "grad_norm": 1.7028883695602417, "learning_rate": 4.102554054200242e-06, "loss": 0.6264, "step": 27691 }, { "epoch": 0.71, "grad_norm": 1.3989366292953491, "learning_rate": 4.1018836595980025e-06, "loss": 0.4123, "step": 27692 }, { "epoch": 0.71, "grad_norm": 1.2838976383209229, "learning_rate": 4.10121330564188e-06, "loss": 0.5629, "step": 27693 }, { "epoch": 0.71, "grad_norm": 7.698078155517578, "learning_rate": 4.100542992336489e-06, "loss": 0.7133, "step": 27694 }, { "epoch": 0.71, "grad_norm": 4.403320789337158, "learning_rate": 4.099872719686448e-06, "loss": 0.5202, "step": 27695 }, { "epoch": 0.71, "grad_norm": 1.2664549350738525, "learning_rate": 4.099202487696375e-06, "loss": 0.5402, "step": 27696 }, { "epoch": 0.71, "grad_norm": 1.3176026344299316, "learning_rate": 4.0985322963708955e-06, "loss": 0.588, "step": 27697 }, { "epoch": 0.71, "grad_norm": 5.128913402557373, "learning_rate": 4.097862145714623e-06, "loss": 0.5218, "step": 27698 }, { "epoch": 0.71, "grad_norm": 8.799180030822754, "learning_rate": 4.097192035732172e-06, "loss": 0.5066, "step": 27699 }, { "epoch": 0.71, "grad_norm": 1.425845742225647, "learning_rate": 4.096521966428171e-06, "loss": 0.5294, "step": 27700 }, { "epoch": 0.71, "grad_norm": 1.3829749822616577, "learning_rate": 4.095851937807229e-06, "loss": 0.5226, "step": 27701 }, { "epoch": 0.71, "grad_norm": 1.7003732919692993, "learning_rate": 4.095181949873964e-06, "loss": 0.3537, "step": 27702 }, { "epoch": 0.71, "grad_norm": 0.9341216087341309, "learning_rate": 4.094512002632998e-06, "loss": 0.5156, "step": 27703 }, { "epoch": 0.71, "grad_norm": 1.3687995672225952, "learning_rate": 4.093842096088947e-06, "loss": 0.5407, "step": 27704 }, { "epoch": 0.71, "grad_norm": 1.8147914409637451, "learning_rate": 4.093172230246424e-06, "loss": 0.6292, "step": 27705 }, { "epoch": 0.71, "grad_norm": 0.8139205574989319, "learning_rate": 4.0925024051100435e-06, "loss": 0.4294, "step": 27706 }, { "epoch": 0.71, "grad_norm": 1.409762978553772, "learning_rate": 4.0918326206844286e-06, "loss": 0.4738, "step": 27707 }, { "epoch": 0.71, "grad_norm": 0.9879459142684937, "learning_rate": 4.0911628769741916e-06, "loss": 0.4032, "step": 27708 }, { "epoch": 0.71, "grad_norm": 0.9587845206260681, "learning_rate": 4.090493173983944e-06, "loss": 0.5339, "step": 27709 }, { "epoch": 0.71, "grad_norm": 2.192593812942505, "learning_rate": 4.089823511718308e-06, "loss": 0.5498, "step": 27710 }, { "epoch": 0.71, "grad_norm": 1.5898321866989136, "learning_rate": 4.089153890181896e-06, "loss": 0.4469, "step": 27711 }, { "epoch": 0.71, "grad_norm": 5.842418670654297, "learning_rate": 4.088484309379321e-06, "loss": 0.611, "step": 27712 }, { "epoch": 0.71, "grad_norm": 1.1329525709152222, "learning_rate": 4.087814769315195e-06, "loss": 0.6494, "step": 27713 }, { "epoch": 0.71, "grad_norm": 0.8983139991760254, "learning_rate": 4.08714526999414e-06, "loss": 0.6021, "step": 27714 }, { "epoch": 0.71, "grad_norm": 1.3242038488388062, "learning_rate": 4.086475811420762e-06, "loss": 0.5327, "step": 27715 }, { "epoch": 0.71, "grad_norm": 1.515290379524231, "learning_rate": 4.085806393599676e-06, "loss": 0.5236, "step": 27716 }, { "epoch": 0.71, "grad_norm": 3.870210886001587, "learning_rate": 4.0851370165355e-06, "loss": 0.5554, "step": 27717 }, { "epoch": 0.71, "grad_norm": 1.3741559982299805, "learning_rate": 4.084467680232844e-06, "loss": 0.6493, "step": 27718 }, { "epoch": 0.71, "grad_norm": 1.1868312358856201, "learning_rate": 4.083798384696319e-06, "loss": 0.5162, "step": 27719 }, { "epoch": 0.71, "grad_norm": 1.5975072383880615, "learning_rate": 4.083129129930537e-06, "loss": 0.5391, "step": 27720 }, { "epoch": 0.71, "grad_norm": 1.8738175630569458, "learning_rate": 4.082459915940113e-06, "loss": 0.7104, "step": 27721 }, { "epoch": 0.71, "grad_norm": 9.201011657714844, "learning_rate": 4.081790742729659e-06, "loss": 0.6641, "step": 27722 }, { "epoch": 0.71, "grad_norm": 3.711535930633545, "learning_rate": 4.081121610303781e-06, "loss": 0.4879, "step": 27723 }, { "epoch": 0.71, "grad_norm": 4.628177642822266, "learning_rate": 4.080452518667097e-06, "loss": 0.6408, "step": 27724 }, { "epoch": 0.71, "grad_norm": 4.014966011047363, "learning_rate": 4.079783467824216e-06, "loss": 0.6997, "step": 27725 }, { "epoch": 0.71, "grad_norm": 1.372725486755371, "learning_rate": 4.0791144577797445e-06, "loss": 0.377, "step": 27726 }, { "epoch": 0.71, "grad_norm": 7.660877227783203, "learning_rate": 4.078445488538301e-06, "loss": 0.5598, "step": 27727 }, { "epoch": 0.71, "grad_norm": 2.7067747116088867, "learning_rate": 4.07777656010449e-06, "loss": 0.6549, "step": 27728 }, { "epoch": 0.71, "grad_norm": 2.565751075744629, "learning_rate": 4.077107672482921e-06, "loss": 0.633, "step": 27729 }, { "epoch": 0.71, "grad_norm": 3.9180970191955566, "learning_rate": 4.076438825678202e-06, "loss": 0.6328, "step": 27730 }, { "epoch": 0.71, "grad_norm": 1.8104041814804077, "learning_rate": 4.075770019694949e-06, "loss": 0.4947, "step": 27731 }, { "epoch": 0.71, "grad_norm": 1.2796794176101685, "learning_rate": 4.075101254537767e-06, "loss": 0.3997, "step": 27732 }, { "epoch": 0.71, "grad_norm": 1.68137526512146, "learning_rate": 4.0744325302112606e-06, "loss": 0.345, "step": 27733 }, { "epoch": 0.71, "grad_norm": 2.119719982147217, "learning_rate": 4.073763846720046e-06, "loss": 0.7432, "step": 27734 }, { "epoch": 0.71, "grad_norm": 2.893949508666992, "learning_rate": 4.0730952040687325e-06, "loss": 0.5147, "step": 27735 }, { "epoch": 0.71, "grad_norm": 12.597518920898438, "learning_rate": 4.072426602261915e-06, "loss": 0.5776, "step": 27736 }, { "epoch": 0.71, "grad_norm": 2.791110038757324, "learning_rate": 4.071758041304212e-06, "loss": 0.6163, "step": 27737 }, { "epoch": 0.71, "grad_norm": 1.7709259986877441, "learning_rate": 4.0710895212002296e-06, "loss": 0.6122, "step": 27738 }, { "epoch": 0.71, "grad_norm": 1.8084756135940552, "learning_rate": 4.07042104195457e-06, "loss": 0.3491, "step": 27739 }, { "epoch": 0.71, "grad_norm": 1.5813348293304443, "learning_rate": 4.069752603571846e-06, "loss": 0.7236, "step": 27740 }, { "epoch": 0.71, "grad_norm": 1.3859814405441284, "learning_rate": 4.069084206056662e-06, "loss": 0.5482, "step": 27741 }, { "epoch": 0.71, "grad_norm": 2.311396837234497, "learning_rate": 4.068415849413622e-06, "loss": 0.6267, "step": 27742 }, { "epoch": 0.71, "grad_norm": 2.279594659805298, "learning_rate": 4.067747533647332e-06, "loss": 0.5797, "step": 27743 }, { "epoch": 0.71, "grad_norm": 0.9911351799964905, "learning_rate": 4.067079258762401e-06, "loss": 0.433, "step": 27744 }, { "epoch": 0.71, "grad_norm": 1.2553783655166626, "learning_rate": 4.066411024763432e-06, "loss": 0.6118, "step": 27745 }, { "epoch": 0.71, "grad_norm": 1.094234585762024, "learning_rate": 4.0657428316550274e-06, "loss": 0.4931, "step": 27746 }, { "epoch": 0.71, "grad_norm": 1.5424877405166626, "learning_rate": 4.065074679441798e-06, "loss": 0.6342, "step": 27747 }, { "epoch": 0.71, "grad_norm": 1.429401159286499, "learning_rate": 4.064406568128346e-06, "loss": 0.5959, "step": 27748 }, { "epoch": 0.71, "grad_norm": 1.8732284307479858, "learning_rate": 4.063738497719275e-06, "loss": 0.5862, "step": 27749 }, { "epoch": 0.71, "grad_norm": 6.005417823791504, "learning_rate": 4.063070468219184e-06, "loss": 0.5027, "step": 27750 }, { "epoch": 0.71, "grad_norm": 1.4497686624526978, "learning_rate": 4.062402479632687e-06, "loss": 0.4808, "step": 27751 }, { "epoch": 0.71, "grad_norm": 1.3903003931045532, "learning_rate": 4.06173453196438e-06, "loss": 0.4957, "step": 27752 }, { "epoch": 0.71, "grad_norm": 1.3182141780853271, "learning_rate": 4.061066625218864e-06, "loss": 0.5185, "step": 27753 }, { "epoch": 0.71, "grad_norm": 1.5093398094177246, "learning_rate": 4.060398759400751e-06, "loss": 0.3891, "step": 27754 }, { "epoch": 0.71, "grad_norm": 1.8159267902374268, "learning_rate": 4.059730934514638e-06, "loss": 0.5719, "step": 27755 }, { "epoch": 0.71, "grad_norm": 1.4159512519836426, "learning_rate": 4.059063150565122e-06, "loss": 0.6128, "step": 27756 }, { "epoch": 0.71, "grad_norm": 1.765655517578125, "learning_rate": 4.058395407556814e-06, "loss": 0.4876, "step": 27757 }, { "epoch": 0.71, "grad_norm": 2.0563852787017822, "learning_rate": 4.057727705494313e-06, "loss": 0.6249, "step": 27758 }, { "epoch": 0.71, "grad_norm": 5.954658031463623, "learning_rate": 4.057060044382219e-06, "loss": 0.574, "step": 27759 }, { "epoch": 0.71, "grad_norm": 1.0426605939865112, "learning_rate": 4.056392424225131e-06, "loss": 0.3642, "step": 27760 }, { "epoch": 0.71, "grad_norm": 1.4747651815414429, "learning_rate": 4.055724845027654e-06, "loss": 0.4646, "step": 27761 }, { "epoch": 0.71, "grad_norm": 1.4810104370117188, "learning_rate": 4.055057306794388e-06, "loss": 0.5618, "step": 27762 }, { "epoch": 0.71, "grad_norm": 1.5691415071487427, "learning_rate": 4.054389809529927e-06, "loss": 0.3943, "step": 27763 }, { "epoch": 0.71, "grad_norm": 0.8333625197410583, "learning_rate": 4.053722353238881e-06, "loss": 0.4877, "step": 27764 }, { "epoch": 0.71, "grad_norm": 2.4789373874664307, "learning_rate": 4.053054937925844e-06, "loss": 0.4904, "step": 27765 }, { "epoch": 0.71, "grad_norm": 1.258442997932434, "learning_rate": 4.052387563595417e-06, "loss": 0.5022, "step": 27766 }, { "epoch": 0.71, "grad_norm": 1.4178401231765747, "learning_rate": 4.051720230252194e-06, "loss": 0.6937, "step": 27767 }, { "epoch": 0.71, "grad_norm": 2.777960777282715, "learning_rate": 4.051052937900782e-06, "loss": 0.4284, "step": 27768 }, { "epoch": 0.71, "grad_norm": 1.460263729095459, "learning_rate": 4.050385686545776e-06, "loss": 0.6814, "step": 27769 }, { "epoch": 0.71, "grad_norm": 1.4250787496566772, "learning_rate": 4.04971847619177e-06, "loss": 0.4785, "step": 27770 }, { "epoch": 0.71, "grad_norm": 3.195371389389038, "learning_rate": 4.049051306843369e-06, "loss": 0.6075, "step": 27771 }, { "epoch": 0.71, "grad_norm": 12.090619087219238, "learning_rate": 4.048384178505168e-06, "loss": 0.7127, "step": 27772 }, { "epoch": 0.71, "grad_norm": 1.6124142408370972, "learning_rate": 4.047717091181761e-06, "loss": 0.5038, "step": 27773 }, { "epoch": 0.71, "grad_norm": 1.2398401498794556, "learning_rate": 4.047050044877752e-06, "loss": 0.5725, "step": 27774 }, { "epoch": 0.71, "grad_norm": 1.4010214805603027, "learning_rate": 4.046383039597732e-06, "loss": 0.4987, "step": 27775 }, { "epoch": 0.71, "grad_norm": 1.3734620809555054, "learning_rate": 4.045716075346302e-06, "loss": 0.4065, "step": 27776 }, { "epoch": 0.71, "grad_norm": 2.428908348083496, "learning_rate": 4.045049152128051e-06, "loss": 0.4347, "step": 27777 }, { "epoch": 0.71, "grad_norm": 4.449500560760498, "learning_rate": 4.044382269947584e-06, "loss": 0.5504, "step": 27778 }, { "epoch": 0.71, "grad_norm": 1.0221699476242065, "learning_rate": 4.043715428809493e-06, "loss": 0.5339, "step": 27779 }, { "epoch": 0.71, "grad_norm": 1.3932806253433228, "learning_rate": 4.043048628718369e-06, "loss": 0.5802, "step": 27780 }, { "epoch": 0.71, "grad_norm": 1.1619391441345215, "learning_rate": 4.042381869678815e-06, "loss": 0.3197, "step": 27781 }, { "epoch": 0.71, "grad_norm": 2.4051144123077393, "learning_rate": 4.0417151516954236e-06, "loss": 0.6787, "step": 27782 }, { "epoch": 0.71, "grad_norm": 3.3847148418426514, "learning_rate": 4.0410484747727865e-06, "loss": 0.4609, "step": 27783 }, { "epoch": 0.71, "grad_norm": 1.8492457866668701, "learning_rate": 4.040381838915496e-06, "loss": 0.5911, "step": 27784 }, { "epoch": 0.71, "grad_norm": 1.4681706428527832, "learning_rate": 4.039715244128154e-06, "loss": 0.4054, "step": 27785 }, { "epoch": 0.71, "grad_norm": 1.6822649240493774, "learning_rate": 4.039048690415349e-06, "loss": 0.48, "step": 27786 }, { "epoch": 0.71, "grad_norm": 2.7697198390960693, "learning_rate": 4.038382177781673e-06, "loss": 0.6245, "step": 27787 }, { "epoch": 0.71, "grad_norm": 1.5618127584457397, "learning_rate": 4.037715706231724e-06, "loss": 0.6364, "step": 27788 }, { "epoch": 0.71, "grad_norm": 1.9713633060455322, "learning_rate": 4.037049275770093e-06, "loss": 0.5749, "step": 27789 }, { "epoch": 0.71, "grad_norm": 1.3110240697860718, "learning_rate": 4.0363828864013725e-06, "loss": 0.6243, "step": 27790 }, { "epoch": 0.71, "grad_norm": 1.0913692712783813, "learning_rate": 4.03571653813015e-06, "loss": 0.5447, "step": 27791 }, { "epoch": 0.71, "grad_norm": 1.4037188291549683, "learning_rate": 4.035050230961025e-06, "loss": 0.487, "step": 27792 }, { "epoch": 0.71, "grad_norm": 1.3827568292617798, "learning_rate": 4.034383964898587e-06, "loss": 0.5625, "step": 27793 }, { "epoch": 0.71, "grad_norm": 1.4557609558105469, "learning_rate": 4.033717739947423e-06, "loss": 0.4719, "step": 27794 }, { "epoch": 0.71, "grad_norm": 7.156991481781006, "learning_rate": 4.033051556112133e-06, "loss": 0.459, "step": 27795 }, { "epoch": 0.71, "grad_norm": 1.1606271266937256, "learning_rate": 4.032385413397302e-06, "loss": 0.532, "step": 27796 }, { "epoch": 0.71, "grad_norm": 2.7600748538970947, "learning_rate": 4.031719311807517e-06, "loss": 0.4291, "step": 27797 }, { "epoch": 0.71, "grad_norm": 2.2106778621673584, "learning_rate": 4.031053251347378e-06, "loss": 0.6473, "step": 27798 }, { "epoch": 0.71, "grad_norm": 1.693186640739441, "learning_rate": 4.030387232021469e-06, "loss": 0.5872, "step": 27799 }, { "epoch": 0.71, "grad_norm": 1.514161467552185, "learning_rate": 4.02972125383438e-06, "loss": 0.5177, "step": 27800 }, { "epoch": 0.71, "grad_norm": 6.470254898071289, "learning_rate": 4.029055316790699e-06, "loss": 0.7455, "step": 27801 }, { "epoch": 0.71, "grad_norm": 12.773486137390137, "learning_rate": 4.02838942089502e-06, "loss": 0.5912, "step": 27802 }, { "epoch": 0.71, "grad_norm": 1.6834825277328491, "learning_rate": 4.0277235661519295e-06, "loss": 0.4384, "step": 27803 }, { "epoch": 0.71, "grad_norm": 1.2388525009155273, "learning_rate": 4.027057752566014e-06, "loss": 0.5661, "step": 27804 }, { "epoch": 0.71, "grad_norm": 1.605734944343567, "learning_rate": 4.026391980141866e-06, "loss": 0.4816, "step": 27805 }, { "epoch": 0.71, "grad_norm": 1.4810943603515625, "learning_rate": 4.025726248884071e-06, "loss": 0.4607, "step": 27806 }, { "epoch": 0.71, "grad_norm": 2.2181618213653564, "learning_rate": 4.025060558797218e-06, "loss": 0.5853, "step": 27807 }, { "epoch": 0.71, "grad_norm": 1.46611487865448, "learning_rate": 4.024394909885891e-06, "loss": 0.4666, "step": 27808 }, { "epoch": 0.71, "grad_norm": 4.987813472747803, "learning_rate": 4.023729302154682e-06, "loss": 1.0142, "step": 27809 }, { "epoch": 0.71, "grad_norm": 1.0961346626281738, "learning_rate": 4.023063735608177e-06, "loss": 0.57, "step": 27810 }, { "epoch": 0.71, "grad_norm": 1.2731730937957764, "learning_rate": 4.022398210250959e-06, "loss": 0.5805, "step": 27811 }, { "epoch": 0.71, "grad_norm": 4.07012939453125, "learning_rate": 4.021732726087618e-06, "loss": 0.5384, "step": 27812 }, { "epoch": 0.71, "grad_norm": 1.3735084533691406, "learning_rate": 4.021067283122741e-06, "loss": 0.5828, "step": 27813 }, { "epoch": 0.71, "grad_norm": 2.934365749359131, "learning_rate": 4.020401881360912e-06, "loss": 0.5398, "step": 27814 }, { "epoch": 0.71, "grad_norm": 1.110417127609253, "learning_rate": 4.019736520806713e-06, "loss": 0.5734, "step": 27815 }, { "epoch": 0.71, "grad_norm": 0.9648444652557373, "learning_rate": 4.019071201464736e-06, "loss": 0.4886, "step": 27816 }, { "epoch": 0.71, "grad_norm": 1.5305254459381104, "learning_rate": 4.018405923339562e-06, "loss": 0.4902, "step": 27817 }, { "epoch": 0.71, "grad_norm": 1.6182141304016113, "learning_rate": 4.017740686435774e-06, "loss": 0.6846, "step": 27818 }, { "epoch": 0.71, "grad_norm": 2.0346319675445557, "learning_rate": 4.017075490757961e-06, "loss": 0.5692, "step": 27819 }, { "epoch": 0.71, "grad_norm": 16.40987777709961, "learning_rate": 4.016410336310706e-06, "loss": 0.6894, "step": 27820 }, { "epoch": 0.71, "grad_norm": 1.2914727926254272, "learning_rate": 4.015745223098587e-06, "loss": 0.558, "step": 27821 }, { "epoch": 0.71, "grad_norm": 1.9976520538330078, "learning_rate": 4.015080151126197e-06, "loss": 0.5662, "step": 27822 }, { "epoch": 0.71, "grad_norm": 6.099248886108398, "learning_rate": 4.014415120398114e-06, "loss": 0.7289, "step": 27823 }, { "epoch": 0.71, "grad_norm": 1.5477908849716187, "learning_rate": 4.013750130918921e-06, "loss": 0.5294, "step": 27824 }, { "epoch": 0.71, "grad_norm": 1.137864112854004, "learning_rate": 4.013085182693199e-06, "loss": 0.5895, "step": 27825 }, { "epoch": 0.71, "grad_norm": 1.6877508163452148, "learning_rate": 4.012420275725535e-06, "loss": 0.5712, "step": 27826 }, { "epoch": 0.71, "grad_norm": 1.1243245601654053, "learning_rate": 4.0117554100205085e-06, "loss": 0.5145, "step": 27827 }, { "epoch": 0.71, "grad_norm": 1.151412844657898, "learning_rate": 4.011090585582699e-06, "loss": 0.6305, "step": 27828 }, { "epoch": 0.71, "grad_norm": 3.4398279190063477, "learning_rate": 4.010425802416693e-06, "loss": 0.5009, "step": 27829 }, { "epoch": 0.71, "grad_norm": 3.7462050914764404, "learning_rate": 4.0097610605270695e-06, "loss": 0.6624, "step": 27830 }, { "epoch": 0.71, "grad_norm": 2.571957588195801, "learning_rate": 4.009096359918409e-06, "loss": 0.5106, "step": 27831 }, { "epoch": 0.71, "grad_norm": 5.366481304168701, "learning_rate": 4.00843170059529e-06, "loss": 0.5852, "step": 27832 }, { "epoch": 0.71, "grad_norm": 2.0307984352111816, "learning_rate": 4.007767082562299e-06, "loss": 0.6065, "step": 27833 }, { "epoch": 0.71, "grad_norm": 1.9367491006851196, "learning_rate": 4.007102505824012e-06, "loss": 0.7177, "step": 27834 }, { "epoch": 0.71, "grad_norm": 1.4223836660385132, "learning_rate": 4.006437970385007e-06, "loss": 0.3643, "step": 27835 }, { "epoch": 0.71, "grad_norm": 1.6432673931121826, "learning_rate": 4.005773476249868e-06, "loss": 0.4885, "step": 27836 }, { "epoch": 0.71, "grad_norm": 3.4967422485351562, "learning_rate": 4.005109023423173e-06, "loss": 0.4745, "step": 27837 }, { "epoch": 0.71, "grad_norm": 1.4509838819503784, "learning_rate": 4.004444611909497e-06, "loss": 0.5505, "step": 27838 }, { "epoch": 0.71, "grad_norm": 1.6234556436538696, "learning_rate": 4.003780241713426e-06, "loss": 0.5575, "step": 27839 }, { "epoch": 0.71, "grad_norm": 1.6129639148712158, "learning_rate": 4.0031159128395324e-06, "loss": 0.5191, "step": 27840 }, { "epoch": 0.71, "grad_norm": 1.4832292795181274, "learning_rate": 4.002451625292397e-06, "loss": 0.6739, "step": 27841 }, { "epoch": 0.71, "grad_norm": 1.0949560403823853, "learning_rate": 4.001787379076593e-06, "loss": 0.3522, "step": 27842 }, { "epoch": 0.71, "grad_norm": 1.3253562450408936, "learning_rate": 4.001123174196707e-06, "loss": 0.4718, "step": 27843 }, { "epoch": 0.71, "grad_norm": 1.460333228111267, "learning_rate": 4.000459010657311e-06, "loss": 0.6291, "step": 27844 }, { "epoch": 0.71, "grad_norm": 1.3504539728164673, "learning_rate": 3.999794888462979e-06, "loss": 0.5479, "step": 27845 }, { "epoch": 0.71, "grad_norm": 5.1076154708862305, "learning_rate": 3.999130807618293e-06, "loss": 0.6407, "step": 27846 }, { "epoch": 0.71, "grad_norm": 1.5688011646270752, "learning_rate": 3.998466768127828e-06, "loss": 0.4982, "step": 27847 }, { "epoch": 0.71, "grad_norm": 4.233242034912109, "learning_rate": 3.997802769996159e-06, "loss": 0.6158, "step": 27848 }, { "epoch": 0.71, "grad_norm": 8.803801536560059, "learning_rate": 3.99713881322786e-06, "loss": 0.5959, "step": 27849 }, { "epoch": 0.71, "grad_norm": 1.5104460716247559, "learning_rate": 3.996474897827513e-06, "loss": 0.5846, "step": 27850 }, { "epoch": 0.71, "grad_norm": 1.6902648210525513, "learning_rate": 3.995811023799687e-06, "loss": 0.4689, "step": 27851 }, { "epoch": 0.71, "grad_norm": 1.5528995990753174, "learning_rate": 3.995147191148957e-06, "loss": 0.6015, "step": 27852 }, { "epoch": 0.71, "grad_norm": 6.110688209533691, "learning_rate": 3.994483399879903e-06, "loss": 0.4622, "step": 27853 }, { "epoch": 0.71, "grad_norm": 1.7674322128295898, "learning_rate": 3.993819649997097e-06, "loss": 0.4852, "step": 27854 }, { "epoch": 0.71, "grad_norm": 2.854102849960327, "learning_rate": 3.9931559415051115e-06, "loss": 0.7005, "step": 27855 }, { "epoch": 0.71, "grad_norm": 1.4370473623275757, "learning_rate": 3.992492274408519e-06, "loss": 0.6395, "step": 27856 }, { "epoch": 0.71, "grad_norm": 2.069000244140625, "learning_rate": 3.991828648711898e-06, "loss": 0.4482, "step": 27857 }, { "epoch": 0.71, "grad_norm": 1.1102817058563232, "learning_rate": 3.9911650644198195e-06, "loss": 0.5884, "step": 27858 }, { "epoch": 0.71, "grad_norm": 1.4098297357559204, "learning_rate": 3.990501521536854e-06, "loss": 0.3981, "step": 27859 }, { "epoch": 0.71, "grad_norm": 2.191199541091919, "learning_rate": 3.989838020067578e-06, "loss": 0.4492, "step": 27860 }, { "epoch": 0.71, "grad_norm": 1.7255237102508545, "learning_rate": 3.989174560016564e-06, "loss": 0.547, "step": 27861 }, { "epoch": 0.71, "grad_norm": 1.390790343284607, "learning_rate": 3.988511141388378e-06, "loss": 0.7118, "step": 27862 }, { "epoch": 0.71, "grad_norm": 6.442171096801758, "learning_rate": 3.9878477641876e-06, "loss": 0.3784, "step": 27863 }, { "epoch": 0.71, "grad_norm": 1.1786425113677979, "learning_rate": 3.987184428418799e-06, "loss": 0.4678, "step": 27864 }, { "epoch": 0.71, "grad_norm": 9.702963829040527, "learning_rate": 3.9865211340865436e-06, "loss": 0.5484, "step": 27865 }, { "epoch": 0.71, "grad_norm": 1.3323349952697754, "learning_rate": 3.985857881195405e-06, "loss": 0.5333, "step": 27866 }, { "epoch": 0.71, "grad_norm": 1.1122885942459106, "learning_rate": 3.9851946697499575e-06, "loss": 0.4557, "step": 27867 }, { "epoch": 0.71, "grad_norm": 1.580714464187622, "learning_rate": 3.9845314997547705e-06, "loss": 0.6082, "step": 27868 }, { "epoch": 0.71, "grad_norm": 1.2748839855194092, "learning_rate": 3.983868371214408e-06, "loss": 0.4109, "step": 27869 }, { "epoch": 0.71, "grad_norm": 1.4800143241882324, "learning_rate": 3.98320528413345e-06, "loss": 0.5698, "step": 27870 }, { "epoch": 0.71, "grad_norm": 1.9705630540847778, "learning_rate": 3.982542238516461e-06, "loss": 0.671, "step": 27871 }, { "epoch": 0.71, "grad_norm": 6.5237202644348145, "learning_rate": 3.98187923436801e-06, "loss": 0.5994, "step": 27872 }, { "epoch": 0.71, "grad_norm": 4.030549049377441, "learning_rate": 3.981216271692662e-06, "loss": 0.5091, "step": 27873 }, { "epoch": 0.71, "grad_norm": 1.4862573146820068, "learning_rate": 3.980553350494996e-06, "loss": 0.5241, "step": 27874 }, { "epoch": 0.71, "grad_norm": 1.4684277772903442, "learning_rate": 3.979890470779573e-06, "loss": 0.3499, "step": 27875 }, { "epoch": 0.71, "grad_norm": 1.4853901863098145, "learning_rate": 3.979227632550958e-06, "loss": 0.4781, "step": 27876 }, { "epoch": 0.71, "grad_norm": 1.6948522329330444, "learning_rate": 3.978564835813728e-06, "loss": 0.5237, "step": 27877 }, { "epoch": 0.71, "grad_norm": 1.553092360496521, "learning_rate": 3.9779020805724465e-06, "loss": 0.3637, "step": 27878 }, { "epoch": 0.71, "grad_norm": 1.5182822942733765, "learning_rate": 3.977239366831677e-06, "loss": 0.7253, "step": 27879 }, { "epoch": 0.71, "grad_norm": 1.7269278764724731, "learning_rate": 3.976576694595992e-06, "loss": 0.4639, "step": 27880 }, { "epoch": 0.71, "grad_norm": 2.4886410236358643, "learning_rate": 3.975914063869958e-06, "loss": 0.5905, "step": 27881 }, { "epoch": 0.71, "grad_norm": 1.8895610570907593, "learning_rate": 3.975251474658139e-06, "loss": 0.5398, "step": 27882 }, { "epoch": 0.71, "grad_norm": 1.3314887285232544, "learning_rate": 3.974588926965098e-06, "loss": 0.4661, "step": 27883 }, { "epoch": 0.71, "grad_norm": 1.8298428058624268, "learning_rate": 3.973926420795408e-06, "loss": 0.4352, "step": 27884 }, { "epoch": 0.71, "grad_norm": 3.0579378604888916, "learning_rate": 3.973263956153631e-06, "loss": 0.6363, "step": 27885 }, { "epoch": 0.71, "grad_norm": 1.434325933456421, "learning_rate": 3.972601533044329e-06, "loss": 0.5101, "step": 27886 }, { "epoch": 0.71, "grad_norm": 1.4011867046356201, "learning_rate": 3.9719391514720755e-06, "loss": 0.6123, "step": 27887 }, { "epoch": 0.71, "grad_norm": 1.51309335231781, "learning_rate": 3.971276811441428e-06, "loss": 0.545, "step": 27888 }, { "epoch": 0.71, "grad_norm": 0.8349566459655762, "learning_rate": 3.970614512956954e-06, "loss": 0.3785, "step": 27889 }, { "epoch": 0.71, "grad_norm": 1.1324740648269653, "learning_rate": 3.969952256023213e-06, "loss": 0.5281, "step": 27890 }, { "epoch": 0.71, "grad_norm": 11.580728530883789, "learning_rate": 3.9692900406447764e-06, "loss": 0.5098, "step": 27891 }, { "epoch": 0.71, "grad_norm": 1.2943613529205322, "learning_rate": 3.968627866826203e-06, "loss": 0.5376, "step": 27892 }, { "epoch": 0.71, "grad_norm": 1.1803653240203857, "learning_rate": 3.967965734572054e-06, "loss": 0.514, "step": 27893 }, { "epoch": 0.71, "grad_norm": 2.873695135116577, "learning_rate": 3.967303643886901e-06, "loss": 0.882, "step": 27894 }, { "epoch": 0.71, "grad_norm": 1.6562068462371826, "learning_rate": 3.966641594775299e-06, "loss": 0.4826, "step": 27895 }, { "epoch": 0.71, "grad_norm": 1.4067070484161377, "learning_rate": 3.965979587241813e-06, "loss": 0.449, "step": 27896 }, { "epoch": 0.72, "grad_norm": 1.674604058265686, "learning_rate": 3.965317621291002e-06, "loss": 0.5782, "step": 27897 }, { "epoch": 0.72, "grad_norm": 1.7431392669677734, "learning_rate": 3.964655696927433e-06, "loss": 0.4412, "step": 27898 }, { "epoch": 0.72, "grad_norm": 1.12467622756958, "learning_rate": 3.9639938141556655e-06, "loss": 0.4547, "step": 27899 }, { "epoch": 0.72, "grad_norm": 1.5531398057937622, "learning_rate": 3.963331972980257e-06, "loss": 0.5837, "step": 27900 }, { "epoch": 0.72, "grad_norm": 1.9331644773483276, "learning_rate": 3.962670173405776e-06, "loss": 0.5249, "step": 27901 }, { "epoch": 0.72, "grad_norm": 1.6444200277328491, "learning_rate": 3.9620084154367785e-06, "loss": 0.438, "step": 27902 }, { "epoch": 0.72, "grad_norm": 1.3975021839141846, "learning_rate": 3.961346699077822e-06, "loss": 0.4639, "step": 27903 }, { "epoch": 0.72, "grad_norm": 3.9549453258514404, "learning_rate": 3.960685024333473e-06, "loss": 0.7526, "step": 27904 }, { "epoch": 0.72, "grad_norm": 1.9411159753799438, "learning_rate": 3.9600233912082905e-06, "loss": 0.5324, "step": 27905 }, { "epoch": 0.72, "grad_norm": 1.697016954421997, "learning_rate": 3.959361799706831e-06, "loss": 0.3945, "step": 27906 }, { "epoch": 0.72, "grad_norm": 2.2559711933135986, "learning_rate": 3.958700249833651e-06, "loss": 0.4444, "step": 27907 }, { "epoch": 0.72, "grad_norm": 1.489632487297058, "learning_rate": 3.958038741593318e-06, "loss": 0.438, "step": 27908 }, { "epoch": 0.72, "grad_norm": 1.232721209526062, "learning_rate": 3.957377274990385e-06, "loss": 0.5016, "step": 27909 }, { "epoch": 0.72, "grad_norm": 1.401748776435852, "learning_rate": 3.956715850029409e-06, "loss": 0.4239, "step": 27910 }, { "epoch": 0.72, "grad_norm": 1.3094056844711304, "learning_rate": 3.9560544667149525e-06, "loss": 0.6198, "step": 27911 }, { "epoch": 0.72, "grad_norm": 1.955335021018982, "learning_rate": 3.955393125051573e-06, "loss": 0.5634, "step": 27912 }, { "epoch": 0.72, "grad_norm": 2.8546597957611084, "learning_rate": 3.954731825043826e-06, "loss": 0.5938, "step": 27913 }, { "epoch": 0.72, "grad_norm": 1.1532394886016846, "learning_rate": 3.9540705666962655e-06, "loss": 0.4579, "step": 27914 }, { "epoch": 0.72, "grad_norm": 1.76455557346344, "learning_rate": 3.9534093500134555e-06, "loss": 0.6627, "step": 27915 }, { "epoch": 0.72, "grad_norm": 1.837381362915039, "learning_rate": 3.952748174999949e-06, "loss": 0.67, "step": 27916 }, { "epoch": 0.72, "grad_norm": 1.3520886898040771, "learning_rate": 3.952087041660301e-06, "loss": 0.4909, "step": 27917 }, { "epoch": 0.72, "grad_norm": 2.2210769653320312, "learning_rate": 3.951425949999071e-06, "loss": 0.3821, "step": 27918 }, { "epoch": 0.72, "grad_norm": 1.2722147703170776, "learning_rate": 3.950764900020815e-06, "loss": 0.4405, "step": 27919 }, { "epoch": 0.72, "grad_norm": 1.5824068784713745, "learning_rate": 3.950103891730083e-06, "loss": 0.5088, "step": 27920 }, { "epoch": 0.72, "grad_norm": 1.131378173828125, "learning_rate": 3.949442925131437e-06, "loss": 0.4129, "step": 27921 }, { "epoch": 0.72, "grad_norm": 1.0786535739898682, "learning_rate": 3.94878200022943e-06, "loss": 0.5245, "step": 27922 }, { "epoch": 0.72, "grad_norm": 1.7644120454788208, "learning_rate": 3.948121117028613e-06, "loss": 0.4786, "step": 27923 }, { "epoch": 0.72, "grad_norm": 2.1782419681549072, "learning_rate": 3.9474602755335426e-06, "loss": 0.5598, "step": 27924 }, { "epoch": 0.72, "grad_norm": 1.7939149141311646, "learning_rate": 3.946799475748776e-06, "loss": 0.6154, "step": 27925 }, { "epoch": 0.72, "grad_norm": 1.9414421319961548, "learning_rate": 3.946138717678864e-06, "loss": 0.682, "step": 27926 }, { "epoch": 0.72, "grad_norm": 2.5506491661071777, "learning_rate": 3.9454780013283564e-06, "loss": 0.7309, "step": 27927 }, { "epoch": 0.72, "grad_norm": 2.0625038146972656, "learning_rate": 3.944817326701815e-06, "loss": 0.3612, "step": 27928 }, { "epoch": 0.72, "grad_norm": 1.2633540630340576, "learning_rate": 3.944156693803789e-06, "loss": 0.4841, "step": 27929 }, { "epoch": 0.72, "grad_norm": 1.4716274738311768, "learning_rate": 3.94349610263883e-06, "loss": 0.5732, "step": 27930 }, { "epoch": 0.72, "grad_norm": 1.2982544898986816, "learning_rate": 3.942835553211487e-06, "loss": 0.6336, "step": 27931 }, { "epoch": 0.72, "grad_norm": 1.6316243410110474, "learning_rate": 3.942175045526319e-06, "loss": 0.6071, "step": 27932 }, { "epoch": 0.72, "grad_norm": 1.1206612586975098, "learning_rate": 3.941514579587875e-06, "loss": 0.5709, "step": 27933 }, { "epoch": 0.72, "grad_norm": 1.1889991760253906, "learning_rate": 3.940854155400703e-06, "loss": 0.7071, "step": 27934 }, { "epoch": 0.72, "grad_norm": 1.1442655324935913, "learning_rate": 3.9401937729693604e-06, "loss": 0.5558, "step": 27935 }, { "epoch": 0.72, "grad_norm": 1.8973146677017212, "learning_rate": 3.939533432298396e-06, "loss": 0.5092, "step": 27936 }, { "epoch": 0.72, "grad_norm": 1.455794334411621, "learning_rate": 3.938873133392359e-06, "loss": 0.5483, "step": 27937 }, { "epoch": 0.72, "grad_norm": 1.4023586511611938, "learning_rate": 3.938212876255797e-06, "loss": 0.3353, "step": 27938 }, { "epoch": 0.72, "grad_norm": 1.077485203742981, "learning_rate": 3.9375526608932666e-06, "loss": 0.3602, "step": 27939 }, { "epoch": 0.72, "grad_norm": 1.9674261808395386, "learning_rate": 3.936892487309315e-06, "loss": 0.6452, "step": 27940 }, { "epoch": 0.72, "grad_norm": 1.431127905845642, "learning_rate": 3.936232355508488e-06, "loss": 0.3802, "step": 27941 }, { "epoch": 0.72, "grad_norm": 1.1244864463806152, "learning_rate": 3.935572265495341e-06, "loss": 0.4496, "step": 27942 }, { "epoch": 0.72, "grad_norm": 2.03910231590271, "learning_rate": 3.934912217274419e-06, "loss": 0.6326, "step": 27943 }, { "epoch": 0.72, "grad_norm": 1.5658832788467407, "learning_rate": 3.934252210850269e-06, "loss": 0.6898, "step": 27944 }, { "epoch": 0.72, "grad_norm": 1.1546063423156738, "learning_rate": 3.933592246227445e-06, "loss": 0.5154, "step": 27945 }, { "epoch": 0.72, "grad_norm": 9.226546287536621, "learning_rate": 3.932932323410493e-06, "loss": 0.5789, "step": 27946 }, { "epoch": 0.72, "grad_norm": 7.25075101852417, "learning_rate": 3.9322724424039596e-06, "loss": 0.553, "step": 27947 }, { "epoch": 0.72, "grad_norm": 1.2208232879638672, "learning_rate": 3.931612603212387e-06, "loss": 0.6096, "step": 27948 }, { "epoch": 0.72, "grad_norm": 1.799359917640686, "learning_rate": 3.930952805840334e-06, "loss": 0.5893, "step": 27949 }, { "epoch": 0.72, "grad_norm": 1.328199028968811, "learning_rate": 3.9302930502923385e-06, "loss": 0.5524, "step": 27950 }, { "epoch": 0.72, "grad_norm": 1.2625548839569092, "learning_rate": 3.929633336572948e-06, "loss": 0.4794, "step": 27951 }, { "epoch": 0.72, "grad_norm": 1.7040327787399292, "learning_rate": 3.9289736646867135e-06, "loss": 0.5117, "step": 27952 }, { "epoch": 0.72, "grad_norm": 2.6459009647369385, "learning_rate": 3.92831403463818e-06, "loss": 0.6591, "step": 27953 }, { "epoch": 0.72, "grad_norm": 1.8018118143081665, "learning_rate": 3.92765444643189e-06, "loss": 0.4399, "step": 27954 }, { "epoch": 0.72, "grad_norm": 1.1658935546875, "learning_rate": 3.926994900072387e-06, "loss": 0.4937, "step": 27955 }, { "epoch": 0.72, "grad_norm": 1.153839111328125, "learning_rate": 3.9263353955642245e-06, "loss": 0.371, "step": 27956 }, { "epoch": 0.72, "grad_norm": 2.146918535232544, "learning_rate": 3.925675932911942e-06, "loss": 0.6176, "step": 27957 }, { "epoch": 0.72, "grad_norm": 1.3034942150115967, "learning_rate": 3.925016512120081e-06, "loss": 0.4067, "step": 27958 }, { "epoch": 0.72, "grad_norm": 1.1703871488571167, "learning_rate": 3.924357133193193e-06, "loss": 0.5282, "step": 27959 }, { "epoch": 0.72, "grad_norm": 1.1316324472427368, "learning_rate": 3.923697796135818e-06, "loss": 0.4436, "step": 27960 }, { "epoch": 0.72, "grad_norm": 1.050384283065796, "learning_rate": 3.923038500952501e-06, "loss": 0.4419, "step": 27961 }, { "epoch": 0.72, "grad_norm": 1.52145516872406, "learning_rate": 3.922379247647782e-06, "loss": 0.5958, "step": 27962 }, { "epoch": 0.72, "grad_norm": 1.7400445938110352, "learning_rate": 3.921720036226209e-06, "loss": 0.6674, "step": 27963 }, { "epoch": 0.72, "grad_norm": 1.7665385007858276, "learning_rate": 3.921060866692323e-06, "loss": 0.6245, "step": 27964 }, { "epoch": 0.72, "grad_norm": 1.8395568132400513, "learning_rate": 3.920401739050663e-06, "loss": 0.5401, "step": 27965 }, { "epoch": 0.72, "grad_norm": 1.2259328365325928, "learning_rate": 3.9197426533057784e-06, "loss": 0.5501, "step": 27966 }, { "epoch": 0.72, "grad_norm": 4.0230207443237305, "learning_rate": 3.919083609462208e-06, "loss": 0.5937, "step": 27967 }, { "epoch": 0.72, "grad_norm": 2.471989631652832, "learning_rate": 3.918424607524488e-06, "loss": 0.5133, "step": 27968 }, { "epoch": 0.72, "grad_norm": 1.5962408781051636, "learning_rate": 3.917765647497169e-06, "loss": 0.4365, "step": 27969 }, { "epoch": 0.72, "grad_norm": 1.117415428161621, "learning_rate": 3.917106729384788e-06, "loss": 0.5551, "step": 27970 }, { "epoch": 0.72, "grad_norm": 1.2968721389770508, "learning_rate": 3.916447853191886e-06, "loss": 0.5494, "step": 27971 }, { "epoch": 0.72, "grad_norm": 1.3514646291732788, "learning_rate": 3.915789018922999e-06, "loss": 0.5786, "step": 27972 }, { "epoch": 0.72, "grad_norm": 2.915231466293335, "learning_rate": 3.915130226582677e-06, "loss": 0.4642, "step": 27973 }, { "epoch": 0.72, "grad_norm": 1.5352160930633545, "learning_rate": 3.914471476175452e-06, "loss": 0.5853, "step": 27974 }, { "epoch": 0.72, "grad_norm": 1.2494467496871948, "learning_rate": 3.913812767705864e-06, "loss": 0.6196, "step": 27975 }, { "epoch": 0.72, "grad_norm": 1.6382286548614502, "learning_rate": 3.913154101178458e-06, "loss": 0.5087, "step": 27976 }, { "epoch": 0.72, "grad_norm": 1.4443285465240479, "learning_rate": 3.912495476597771e-06, "loss": 0.504, "step": 27977 }, { "epoch": 0.72, "grad_norm": 1.6173115968704224, "learning_rate": 3.911836893968339e-06, "loss": 0.6296, "step": 27978 }, { "epoch": 0.72, "grad_norm": 4.981906414031982, "learning_rate": 3.911178353294702e-06, "loss": 0.6771, "step": 27979 }, { "epoch": 0.72, "grad_norm": 1.4589906930923462, "learning_rate": 3.9105198545813995e-06, "loss": 0.5222, "step": 27980 }, { "epoch": 0.72, "grad_norm": 1.170966386795044, "learning_rate": 3.909861397832965e-06, "loss": 0.5067, "step": 27981 }, { "epoch": 0.72, "grad_norm": 1.533438801765442, "learning_rate": 3.9092029830539426e-06, "loss": 0.4122, "step": 27982 }, { "epoch": 0.72, "grad_norm": 1.614946961402893, "learning_rate": 3.9085446102488665e-06, "loss": 0.5626, "step": 27983 }, { "epoch": 0.72, "grad_norm": 2.8486173152923584, "learning_rate": 3.907886279422274e-06, "loss": 0.5718, "step": 27984 }, { "epoch": 0.72, "grad_norm": 8.843700408935547, "learning_rate": 3.907227990578699e-06, "loss": 0.641, "step": 27985 }, { "epoch": 0.72, "grad_norm": 1.3998479843139648, "learning_rate": 3.9065697437226835e-06, "loss": 0.5064, "step": 27986 }, { "epoch": 0.72, "grad_norm": 1.2088559865951538, "learning_rate": 3.905911538858761e-06, "loss": 0.5515, "step": 27987 }, { "epoch": 0.72, "grad_norm": 1.3226819038391113, "learning_rate": 3.905253375991464e-06, "loss": 0.5293, "step": 27988 }, { "epoch": 0.72, "grad_norm": 2.557346820831299, "learning_rate": 3.904595255125336e-06, "loss": 0.6474, "step": 27989 }, { "epoch": 0.72, "grad_norm": 8.295071601867676, "learning_rate": 3.903937176264907e-06, "loss": 0.6588, "step": 27990 }, { "epoch": 0.72, "grad_norm": 1.4506334066390991, "learning_rate": 3.903279139414714e-06, "loss": 0.519, "step": 27991 }, { "epoch": 0.72, "grad_norm": 2.290607213973999, "learning_rate": 3.902621144579287e-06, "loss": 0.684, "step": 27992 }, { "epoch": 0.72, "grad_norm": 1.2179458141326904, "learning_rate": 3.9019631917631674e-06, "loss": 0.4318, "step": 27993 }, { "epoch": 0.72, "grad_norm": 1.560829997062683, "learning_rate": 3.901305280970886e-06, "loss": 0.5287, "step": 27994 }, { "epoch": 0.72, "grad_norm": 0.9241822361946106, "learning_rate": 3.900647412206975e-06, "loss": 0.4048, "step": 27995 }, { "epoch": 0.72, "grad_norm": 1.5285474061965942, "learning_rate": 3.899989585475972e-06, "loss": 0.4162, "step": 27996 }, { "epoch": 0.72, "grad_norm": 1.7483046054840088, "learning_rate": 3.899331800782409e-06, "loss": 0.5026, "step": 27997 }, { "epoch": 0.72, "grad_norm": 1.8078505992889404, "learning_rate": 3.898674058130815e-06, "loss": 0.6959, "step": 27998 }, { "epoch": 0.72, "grad_norm": 17.471923828125, "learning_rate": 3.89801635752573e-06, "loss": 0.5077, "step": 27999 }, { "epoch": 0.72, "grad_norm": 5.115124225616455, "learning_rate": 3.8973586989716815e-06, "loss": 0.5534, "step": 28000 }, { "epoch": 0.72, "grad_norm": 8.99226188659668, "learning_rate": 3.896701082473204e-06, "loss": 0.5402, "step": 28001 }, { "epoch": 0.72, "grad_norm": 1.6343657970428467, "learning_rate": 3.896043508034824e-06, "loss": 0.6033, "step": 28002 }, { "epoch": 0.72, "grad_norm": 6.116938591003418, "learning_rate": 3.895385975661081e-06, "loss": 0.5666, "step": 28003 }, { "epoch": 0.72, "grad_norm": 1.533464789390564, "learning_rate": 3.894728485356503e-06, "loss": 0.4134, "step": 28004 }, { "epoch": 0.72, "grad_norm": 1.9233113527297974, "learning_rate": 3.894071037125616e-06, "loss": 0.5044, "step": 28005 }, { "epoch": 0.72, "grad_norm": 1.4108449220657349, "learning_rate": 3.89341363097296e-06, "loss": 0.5461, "step": 28006 }, { "epoch": 0.72, "grad_norm": 1.3344988822937012, "learning_rate": 3.89275626690306e-06, "loss": 0.5325, "step": 28007 }, { "epoch": 0.72, "grad_norm": 1.4603604078292847, "learning_rate": 3.892098944920447e-06, "loss": 0.6065, "step": 28008 }, { "epoch": 0.72, "grad_norm": 1.0411126613616943, "learning_rate": 3.891441665029647e-06, "loss": 0.4562, "step": 28009 }, { "epoch": 0.72, "grad_norm": 1.3225985765457153, "learning_rate": 3.890784427235197e-06, "loss": 0.57, "step": 28010 }, { "epoch": 0.72, "grad_norm": 1.2396514415740967, "learning_rate": 3.890127231541623e-06, "loss": 0.3732, "step": 28011 }, { "epoch": 0.72, "grad_norm": 1.280777096748352, "learning_rate": 3.889470077953449e-06, "loss": 0.4412, "step": 28012 }, { "epoch": 0.72, "grad_norm": 3.876430034637451, "learning_rate": 3.8888129664752115e-06, "loss": 0.5997, "step": 28013 }, { "epoch": 0.72, "grad_norm": 9.5701904296875, "learning_rate": 3.888155897111436e-06, "loss": 0.4678, "step": 28014 }, { "epoch": 0.72, "grad_norm": 1.2704788446426392, "learning_rate": 3.887498869866648e-06, "loss": 0.584, "step": 28015 }, { "epoch": 0.72, "grad_norm": 1.345268964767456, "learning_rate": 3.88684188474538e-06, "loss": 0.5089, "step": 28016 }, { "epoch": 0.72, "grad_norm": 1.031843900680542, "learning_rate": 3.886184941752157e-06, "loss": 0.4688, "step": 28017 }, { "epoch": 0.72, "grad_norm": 3.4109530448913574, "learning_rate": 3.885528040891506e-06, "loss": 0.7193, "step": 28018 }, { "epoch": 0.72, "grad_norm": 1.7268528938293457, "learning_rate": 3.884871182167952e-06, "loss": 0.5024, "step": 28019 }, { "epoch": 0.72, "grad_norm": 10.507257461547852, "learning_rate": 3.884214365586027e-06, "loss": 0.5636, "step": 28020 }, { "epoch": 0.72, "grad_norm": 3.9446020126342773, "learning_rate": 3.883557591150253e-06, "loss": 0.4948, "step": 28021 }, { "epoch": 0.72, "grad_norm": 1.6697261333465576, "learning_rate": 3.882900858865155e-06, "loss": 0.6367, "step": 28022 }, { "epoch": 0.72, "grad_norm": 3.74847412109375, "learning_rate": 3.882244168735264e-06, "loss": 0.5911, "step": 28023 }, { "epoch": 0.72, "grad_norm": 1.3486477136611938, "learning_rate": 3.8815875207651035e-06, "loss": 0.4833, "step": 28024 }, { "epoch": 0.72, "grad_norm": 1.2724404335021973, "learning_rate": 3.8809309149591976e-06, "loss": 0.496, "step": 28025 }, { "epoch": 0.72, "grad_norm": 0.9729506969451904, "learning_rate": 3.880274351322068e-06, "loss": 0.4499, "step": 28026 }, { "epoch": 0.72, "grad_norm": 1.1979948282241821, "learning_rate": 3.879617829858245e-06, "loss": 0.6037, "step": 28027 }, { "epoch": 0.72, "grad_norm": 1.3323919773101807, "learning_rate": 3.878961350572252e-06, "loss": 0.5994, "step": 28028 }, { "epoch": 0.72, "grad_norm": 1.2185444831848145, "learning_rate": 3.8783049134686076e-06, "loss": 0.5558, "step": 28029 }, { "epoch": 0.72, "grad_norm": 2.088970899581909, "learning_rate": 3.8776485185518445e-06, "loss": 0.4812, "step": 28030 }, { "epoch": 0.72, "grad_norm": 6.3168511390686035, "learning_rate": 3.8769921658264794e-06, "loss": 0.6216, "step": 28031 }, { "epoch": 0.72, "grad_norm": 1.634579062461853, "learning_rate": 3.87633585529704e-06, "loss": 0.5739, "step": 28032 }, { "epoch": 0.72, "grad_norm": 8.692488670349121, "learning_rate": 3.875679586968041e-06, "loss": 0.5739, "step": 28033 }, { "epoch": 0.72, "grad_norm": 1.09137761592865, "learning_rate": 3.875023360844014e-06, "loss": 0.5181, "step": 28034 }, { "epoch": 0.72, "grad_norm": 1.5345169305801392, "learning_rate": 3.87436717692948e-06, "loss": 0.5353, "step": 28035 }, { "epoch": 0.72, "grad_norm": 0.9319717288017273, "learning_rate": 3.873711035228953e-06, "loss": 0.4736, "step": 28036 }, { "epoch": 0.72, "grad_norm": 1.4893207550048828, "learning_rate": 3.873054935746966e-06, "loss": 0.5989, "step": 28037 }, { "epoch": 0.72, "grad_norm": 1.6018692255020142, "learning_rate": 3.872398878488034e-06, "loss": 0.4795, "step": 28038 }, { "epoch": 0.72, "grad_norm": 1.8217167854309082, "learning_rate": 3.871742863456676e-06, "loss": 0.5694, "step": 28039 }, { "epoch": 0.72, "grad_norm": 1.3140740394592285, "learning_rate": 3.871086890657419e-06, "loss": 0.552, "step": 28040 }, { "epoch": 0.72, "grad_norm": 7.806276321411133, "learning_rate": 3.870430960094782e-06, "loss": 0.8118, "step": 28041 }, { "epoch": 0.72, "grad_norm": 1.9749987125396729, "learning_rate": 3.869775071773283e-06, "loss": 0.5434, "step": 28042 }, { "epoch": 0.72, "grad_norm": 1.647011637687683, "learning_rate": 3.869119225697439e-06, "loss": 0.6429, "step": 28043 }, { "epoch": 0.72, "grad_norm": 1.8614481687545776, "learning_rate": 3.868463421871778e-06, "loss": 0.6502, "step": 28044 }, { "epoch": 0.72, "grad_norm": 6.117331027984619, "learning_rate": 3.867807660300813e-06, "loss": 0.8854, "step": 28045 }, { "epoch": 0.72, "grad_norm": 2.658154249191284, "learning_rate": 3.867151940989062e-06, "loss": 0.4825, "step": 28046 }, { "epoch": 0.72, "grad_norm": 1.4457170963287354, "learning_rate": 3.866496263941052e-06, "loss": 0.5414, "step": 28047 }, { "epoch": 0.72, "grad_norm": 5.304574489593506, "learning_rate": 3.865840629161295e-06, "loss": 0.5633, "step": 28048 }, { "epoch": 0.72, "grad_norm": 3.315483570098877, "learning_rate": 3.865185036654311e-06, "loss": 0.5564, "step": 28049 }, { "epoch": 0.72, "grad_norm": 2.349032163619995, "learning_rate": 3.864529486424613e-06, "loss": 0.5292, "step": 28050 }, { "epoch": 0.72, "grad_norm": 1.5041762590408325, "learning_rate": 3.863873978476728e-06, "loss": 0.3917, "step": 28051 }, { "epoch": 0.72, "grad_norm": 1.1219897270202637, "learning_rate": 3.8632185128151675e-06, "loss": 0.4116, "step": 28052 }, { "epoch": 0.72, "grad_norm": 0.9141338467597961, "learning_rate": 3.862563089444447e-06, "loss": 0.4502, "step": 28053 }, { "epoch": 0.72, "grad_norm": 1.5241786241531372, "learning_rate": 3.8619077083690885e-06, "loss": 0.4232, "step": 28054 }, { "epoch": 0.72, "grad_norm": 1.027695655822754, "learning_rate": 3.861252369593606e-06, "loss": 0.5167, "step": 28055 }, { "epoch": 0.72, "grad_norm": 2.631244659423828, "learning_rate": 3.86059707312251e-06, "loss": 0.4751, "step": 28056 }, { "epoch": 0.72, "grad_norm": 1.0437276363372803, "learning_rate": 3.859941818960328e-06, "loss": 0.397, "step": 28057 }, { "epoch": 0.72, "grad_norm": 2.680692672729492, "learning_rate": 3.8592866071115685e-06, "loss": 0.5094, "step": 28058 }, { "epoch": 0.72, "grad_norm": 2.344231128692627, "learning_rate": 3.858631437580748e-06, "loss": 0.4704, "step": 28059 }, { "epoch": 0.72, "grad_norm": 3.729524850845337, "learning_rate": 3.857976310372377e-06, "loss": 0.5984, "step": 28060 }, { "epoch": 0.72, "grad_norm": 2.856710195541382, "learning_rate": 3.857321225490979e-06, "loss": 0.4578, "step": 28061 }, { "epoch": 0.72, "grad_norm": 1.3876874446868896, "learning_rate": 3.856666182941062e-06, "loss": 0.4359, "step": 28062 }, { "epoch": 0.72, "grad_norm": 0.9047229886054993, "learning_rate": 3.856011182727141e-06, "loss": 0.4017, "step": 28063 }, { "epoch": 0.72, "grad_norm": 6.7645368576049805, "learning_rate": 3.855356224853732e-06, "loss": 0.5601, "step": 28064 }, { "epoch": 0.72, "grad_norm": 3.8306832313537598, "learning_rate": 3.85470130932535e-06, "loss": 0.3263, "step": 28065 }, { "epoch": 0.72, "grad_norm": 2.200965166091919, "learning_rate": 3.854046436146505e-06, "loss": 0.6164, "step": 28066 }, { "epoch": 0.72, "grad_norm": 1.2033848762512207, "learning_rate": 3.853391605321706e-06, "loss": 0.5519, "step": 28067 }, { "epoch": 0.72, "grad_norm": 2.6395671367645264, "learning_rate": 3.852736816855475e-06, "loss": 0.5278, "step": 28068 }, { "epoch": 0.72, "grad_norm": 1.5057510137557983, "learning_rate": 3.852082070752319e-06, "loss": 0.5048, "step": 28069 }, { "epoch": 0.72, "grad_norm": 1.4230867624282837, "learning_rate": 3.851427367016747e-06, "loss": 0.5723, "step": 28070 }, { "epoch": 0.72, "grad_norm": 1.2049438953399658, "learning_rate": 3.850772705653279e-06, "loss": 0.5739, "step": 28071 }, { "epoch": 0.72, "grad_norm": 1.800166130065918, "learning_rate": 3.850118086666422e-06, "loss": 0.4719, "step": 28072 }, { "epoch": 0.72, "grad_norm": 1.3190290927886963, "learning_rate": 3.849463510060687e-06, "loss": 0.3888, "step": 28073 }, { "epoch": 0.72, "grad_norm": 1.3989440202713013, "learning_rate": 3.848808975840582e-06, "loss": 0.6139, "step": 28074 }, { "epoch": 0.72, "grad_norm": 4.8290205001831055, "learning_rate": 3.8481544840106255e-06, "loss": 0.8265, "step": 28075 }, { "epoch": 0.72, "grad_norm": 0.9378365874290466, "learning_rate": 3.8475000345753225e-06, "loss": 0.5579, "step": 28076 }, { "epoch": 0.72, "grad_norm": 2.1002941131591797, "learning_rate": 3.846845627539181e-06, "loss": 0.4453, "step": 28077 }, { "epoch": 0.72, "grad_norm": 1.266979694366455, "learning_rate": 3.846191262906717e-06, "loss": 0.49, "step": 28078 }, { "epoch": 0.72, "grad_norm": 1.7028127908706665, "learning_rate": 3.845536940682435e-06, "loss": 0.616, "step": 28079 }, { "epoch": 0.72, "grad_norm": 1.8503694534301758, "learning_rate": 3.8448826608708435e-06, "loss": 0.5966, "step": 28080 }, { "epoch": 0.72, "grad_norm": 1.8765355348587036, "learning_rate": 3.844228423476457e-06, "loss": 0.5652, "step": 28081 }, { "epoch": 0.72, "grad_norm": 4.111703395843506, "learning_rate": 3.843574228503781e-06, "loss": 0.6595, "step": 28082 }, { "epoch": 0.72, "grad_norm": 1.6231564283370972, "learning_rate": 3.842920075957324e-06, "loss": 0.4298, "step": 28083 }, { "epoch": 0.72, "grad_norm": 1.8570959568023682, "learning_rate": 3.84226596584159e-06, "loss": 0.6204, "step": 28084 }, { "epoch": 0.72, "grad_norm": 1.2129135131835938, "learning_rate": 3.8416118981610925e-06, "loss": 0.3694, "step": 28085 }, { "epoch": 0.72, "grad_norm": 1.7429865598678589, "learning_rate": 3.8409578729203375e-06, "loss": 0.5253, "step": 28086 }, { "epoch": 0.72, "grad_norm": 1.8563176393508911, "learning_rate": 3.840303890123828e-06, "loss": 0.509, "step": 28087 }, { "epoch": 0.72, "grad_norm": 1.2174733877182007, "learning_rate": 3.839649949776079e-06, "loss": 0.4788, "step": 28088 }, { "epoch": 0.72, "grad_norm": 1.8143953084945679, "learning_rate": 3.83899605188159e-06, "loss": 0.6458, "step": 28089 }, { "epoch": 0.72, "grad_norm": 1.2482006549835205, "learning_rate": 3.8383421964448705e-06, "loss": 0.5929, "step": 28090 }, { "epoch": 0.72, "grad_norm": 1.4740756750106812, "learning_rate": 3.837688383470421e-06, "loss": 0.5991, "step": 28091 }, { "epoch": 0.72, "grad_norm": 1.0946176052093506, "learning_rate": 3.837034612962757e-06, "loss": 0.3691, "step": 28092 }, { "epoch": 0.72, "grad_norm": 2.2915003299713135, "learning_rate": 3.836380884926377e-06, "loss": 0.5553, "step": 28093 }, { "epoch": 0.72, "grad_norm": 1.3499325513839722, "learning_rate": 3.835727199365785e-06, "loss": 0.3081, "step": 28094 }, { "epoch": 0.72, "grad_norm": 1.2937915325164795, "learning_rate": 3.835073556285491e-06, "loss": 0.437, "step": 28095 }, { "epoch": 0.72, "grad_norm": 11.609587669372559, "learning_rate": 3.834419955689998e-06, "loss": 0.4698, "step": 28096 }, { "epoch": 0.72, "grad_norm": 1.3181909322738647, "learning_rate": 3.833766397583808e-06, "loss": 0.587, "step": 28097 }, { "epoch": 0.72, "grad_norm": 1.3360636234283447, "learning_rate": 3.8331128819714225e-06, "loss": 0.4894, "step": 28098 }, { "epoch": 0.72, "grad_norm": 1.1179392337799072, "learning_rate": 3.832459408857353e-06, "loss": 0.4257, "step": 28099 }, { "epoch": 0.72, "grad_norm": 1.4201925992965698, "learning_rate": 3.831805978246097e-06, "loss": 0.6206, "step": 28100 }, { "epoch": 0.72, "grad_norm": 19.61491584777832, "learning_rate": 3.831152590142157e-06, "loss": 0.6819, "step": 28101 }, { "epoch": 0.72, "grad_norm": 1.2913398742675781, "learning_rate": 3.83049924455004e-06, "loss": 0.5143, "step": 28102 }, { "epoch": 0.72, "grad_norm": 6.941019535064697, "learning_rate": 3.829845941474248e-06, "loss": 0.5682, "step": 28103 }, { "epoch": 0.72, "grad_norm": 1.2630212306976318, "learning_rate": 3.829192680919276e-06, "loss": 0.4535, "step": 28104 }, { "epoch": 0.72, "grad_norm": 0.942649781703949, "learning_rate": 3.8285394628896345e-06, "loss": 0.4161, "step": 28105 }, { "epoch": 0.72, "grad_norm": 3.6789071559906006, "learning_rate": 3.8278862873898225e-06, "loss": 0.547, "step": 28106 }, { "epoch": 0.72, "grad_norm": 1.618064522743225, "learning_rate": 3.82723315442434e-06, "loss": 0.6508, "step": 28107 }, { "epoch": 0.72, "grad_norm": 1.309139609336853, "learning_rate": 3.8265800639976855e-06, "loss": 0.5442, "step": 28108 }, { "epoch": 0.72, "grad_norm": 1.0945498943328857, "learning_rate": 3.825927016114365e-06, "loss": 0.3919, "step": 28109 }, { "epoch": 0.72, "grad_norm": 1.1865549087524414, "learning_rate": 3.825274010778879e-06, "loss": 0.4854, "step": 28110 }, { "epoch": 0.72, "grad_norm": 1.1470050811767578, "learning_rate": 3.82462104799572e-06, "loss": 0.5449, "step": 28111 }, { "epoch": 0.72, "grad_norm": 1.6219831705093384, "learning_rate": 3.823968127769397e-06, "loss": 0.4602, "step": 28112 }, { "epoch": 0.72, "grad_norm": 1.4011276960372925, "learning_rate": 3.823315250104405e-06, "loss": 0.5148, "step": 28113 }, { "epoch": 0.72, "grad_norm": 2.338745594024658, "learning_rate": 3.8226624150052435e-06, "loss": 0.4802, "step": 28114 }, { "epoch": 0.72, "grad_norm": 1.608947515487671, "learning_rate": 3.822009622476409e-06, "loss": 0.6522, "step": 28115 }, { "epoch": 0.72, "grad_norm": 1.4646074771881104, "learning_rate": 3.821356872522406e-06, "loss": 0.5855, "step": 28116 }, { "epoch": 0.72, "grad_norm": 2.0125396251678467, "learning_rate": 3.82070416514773e-06, "loss": 0.4565, "step": 28117 }, { "epoch": 0.72, "grad_norm": 1.3043783903121948, "learning_rate": 3.8200515003568735e-06, "loss": 0.4346, "step": 28118 }, { "epoch": 0.72, "grad_norm": 1.5497887134552002, "learning_rate": 3.819398878154344e-06, "loss": 0.4975, "step": 28119 }, { "epoch": 0.72, "grad_norm": 1.9628907442092896, "learning_rate": 3.818746298544636e-06, "loss": 0.4928, "step": 28120 }, { "epoch": 0.72, "grad_norm": 2.0025362968444824, "learning_rate": 3.818093761532239e-06, "loss": 0.5999, "step": 28121 }, { "epoch": 0.72, "grad_norm": 1.3755788803100586, "learning_rate": 3.817441267121661e-06, "loss": 0.4692, "step": 28122 }, { "epoch": 0.72, "grad_norm": 1.2042368650436401, "learning_rate": 3.816788815317393e-06, "loss": 0.6232, "step": 28123 }, { "epoch": 0.72, "grad_norm": 6.8712029457092285, "learning_rate": 3.816136406123932e-06, "loss": 0.5322, "step": 28124 }, { "epoch": 0.72, "grad_norm": 1.543919563293457, "learning_rate": 3.815484039545771e-06, "loss": 0.5567, "step": 28125 }, { "epoch": 0.72, "grad_norm": 0.9445265531539917, "learning_rate": 3.81483171558741e-06, "loss": 0.3917, "step": 28126 }, { "epoch": 0.72, "grad_norm": 1.5198885202407837, "learning_rate": 3.8141794342533446e-06, "loss": 0.7056, "step": 28127 }, { "epoch": 0.72, "grad_norm": 2.7118172645568848, "learning_rate": 3.8135271955480634e-06, "loss": 0.5877, "step": 28128 }, { "epoch": 0.72, "grad_norm": 3.0518083572387695, "learning_rate": 3.8128749994760706e-06, "loss": 0.6557, "step": 28129 }, { "epoch": 0.72, "grad_norm": 1.629901647567749, "learning_rate": 3.8122228460418563e-06, "loss": 0.4673, "step": 28130 }, { "epoch": 0.72, "grad_norm": 1.245390772819519, "learning_rate": 3.8115707352499144e-06, "loss": 0.5333, "step": 28131 }, { "epoch": 0.72, "grad_norm": 1.1615909337997437, "learning_rate": 3.810918667104735e-06, "loss": 0.5151, "step": 28132 }, { "epoch": 0.72, "grad_norm": 1.4851144552230835, "learning_rate": 3.81026664161082e-06, "loss": 0.6122, "step": 28133 }, { "epoch": 0.72, "grad_norm": 2.606306552886963, "learning_rate": 3.809614658772658e-06, "loss": 0.4606, "step": 28134 }, { "epoch": 0.72, "grad_norm": 1.5794261693954468, "learning_rate": 3.80896271859474e-06, "loss": 0.4954, "step": 28135 }, { "epoch": 0.72, "grad_norm": 2.5952749252319336, "learning_rate": 3.8083108210815636e-06, "loss": 0.5739, "step": 28136 }, { "epoch": 0.72, "grad_norm": 1.3192152976989746, "learning_rate": 3.8076589662376194e-06, "loss": 0.4311, "step": 28137 }, { "epoch": 0.72, "grad_norm": 1.4253621101379395, "learning_rate": 3.8070071540673993e-06, "loss": 0.4595, "step": 28138 }, { "epoch": 0.72, "grad_norm": 2.2849509716033936, "learning_rate": 3.806355384575392e-06, "loss": 0.7227, "step": 28139 }, { "epoch": 0.72, "grad_norm": 1.3393360376358032, "learning_rate": 3.8057036577660954e-06, "loss": 0.511, "step": 28140 }, { "epoch": 0.72, "grad_norm": 0.9722570776939392, "learning_rate": 3.8050519736439974e-06, "loss": 0.4448, "step": 28141 }, { "epoch": 0.72, "grad_norm": 2.1113226413726807, "learning_rate": 3.804400332213586e-06, "loss": 0.5504, "step": 28142 }, { "epoch": 0.72, "grad_norm": 1.71835458278656, "learning_rate": 3.8037487334793577e-06, "loss": 0.5467, "step": 28143 }, { "epoch": 0.72, "grad_norm": 1.8514100313186646, "learning_rate": 3.8030971774458005e-06, "loss": 0.4845, "step": 28144 }, { "epoch": 0.72, "grad_norm": 1.8984090089797974, "learning_rate": 3.802445664117401e-06, "loss": 0.5238, "step": 28145 }, { "epoch": 0.72, "grad_norm": 5.669847011566162, "learning_rate": 3.8017941934986557e-06, "loss": 0.7223, "step": 28146 }, { "epoch": 0.72, "grad_norm": 8.306754112243652, "learning_rate": 3.8011427655940512e-06, "loss": 0.5973, "step": 28147 }, { "epoch": 0.72, "grad_norm": 1.4041998386383057, "learning_rate": 3.8004913804080767e-06, "loss": 0.5598, "step": 28148 }, { "epoch": 0.72, "grad_norm": 3.917144775390625, "learning_rate": 3.7998400379452162e-06, "loss": 0.5547, "step": 28149 }, { "epoch": 0.72, "grad_norm": 1.6247763633728027, "learning_rate": 3.7991887382099668e-06, "loss": 0.5817, "step": 28150 }, { "epoch": 0.72, "grad_norm": 1.9084956645965576, "learning_rate": 3.798537481206812e-06, "loss": 0.617, "step": 28151 }, { "epoch": 0.72, "grad_norm": 1.6221740245819092, "learning_rate": 3.7978862669402384e-06, "loss": 0.6938, "step": 28152 }, { "epoch": 0.72, "grad_norm": 1.397589087486267, "learning_rate": 3.7972350954147384e-06, "loss": 0.5129, "step": 28153 }, { "epoch": 0.72, "grad_norm": 18.35752296447754, "learning_rate": 3.796583966634798e-06, "loss": 0.4917, "step": 28154 }, { "epoch": 0.72, "grad_norm": 1.3348171710968018, "learning_rate": 3.7959328806049033e-06, "loss": 0.5712, "step": 28155 }, { "epoch": 0.72, "grad_norm": 1.121086835861206, "learning_rate": 3.7952818373295386e-06, "loss": 0.4313, "step": 28156 }, { "epoch": 0.72, "grad_norm": 1.5187015533447266, "learning_rate": 3.7946308368131966e-06, "loss": 0.4312, "step": 28157 }, { "epoch": 0.72, "grad_norm": 1.4274041652679443, "learning_rate": 3.79397987906036e-06, "loss": 0.3929, "step": 28158 }, { "epoch": 0.72, "grad_norm": 2.2076804637908936, "learning_rate": 3.793328964075511e-06, "loss": 0.6678, "step": 28159 }, { "epoch": 0.72, "grad_norm": 1.51299250125885, "learning_rate": 3.7926780918631435e-06, "loss": 0.4868, "step": 28160 }, { "epoch": 0.72, "grad_norm": 2.3421127796173096, "learning_rate": 3.792027262427739e-06, "loss": 0.5931, "step": 28161 }, { "epoch": 0.72, "grad_norm": 1.54668128490448, "learning_rate": 3.7913764757737783e-06, "loss": 0.4337, "step": 28162 }, { "epoch": 0.72, "grad_norm": 4.2924628257751465, "learning_rate": 3.7907257319057533e-06, "loss": 0.5358, "step": 28163 }, { "epoch": 0.72, "grad_norm": 1.2083404064178467, "learning_rate": 3.7900750308281464e-06, "loss": 0.4649, "step": 28164 }, { "epoch": 0.72, "grad_norm": 1.2983324527740479, "learning_rate": 3.7894243725454405e-06, "loss": 0.5003, "step": 28165 }, { "epoch": 0.72, "grad_norm": 1.7777881622314453, "learning_rate": 3.7887737570621176e-06, "loss": 0.5468, "step": 28166 }, { "epoch": 0.72, "grad_norm": 2.6494321823120117, "learning_rate": 3.7881231843826662e-06, "loss": 0.5673, "step": 28167 }, { "epoch": 0.72, "grad_norm": 12.875429153442383, "learning_rate": 3.787472654511567e-06, "loss": 0.5915, "step": 28168 }, { "epoch": 0.72, "grad_norm": 1.6344717741012573, "learning_rate": 3.7868221674533002e-06, "loss": 0.4939, "step": 28169 }, { "epoch": 0.72, "grad_norm": 1.5553581714630127, "learning_rate": 3.786171723212354e-06, "loss": 0.6664, "step": 28170 }, { "epoch": 0.72, "grad_norm": 2.5985186100006104, "learning_rate": 3.7855213217932097e-06, "loss": 0.4254, "step": 28171 }, { "epoch": 0.72, "grad_norm": 1.5925569534301758, "learning_rate": 3.7848709632003468e-06, "loss": 0.4927, "step": 28172 }, { "epoch": 0.72, "grad_norm": 1.964941143989563, "learning_rate": 3.784220647438246e-06, "loss": 0.6016, "step": 28173 }, { "epoch": 0.72, "grad_norm": 2.281071662902832, "learning_rate": 3.783570374511395e-06, "loss": 0.4464, "step": 28174 }, { "epoch": 0.72, "grad_norm": 1.5003833770751953, "learning_rate": 3.7829201444242704e-06, "loss": 0.6327, "step": 28175 }, { "epoch": 0.72, "grad_norm": 1.610893964767456, "learning_rate": 3.7822699571813514e-06, "loss": 0.5789, "step": 28176 }, { "epoch": 0.72, "grad_norm": 1.549871563911438, "learning_rate": 3.781619812787125e-06, "loss": 0.5065, "step": 28177 }, { "epoch": 0.72, "grad_norm": 0.9172663688659668, "learning_rate": 3.7809697112460675e-06, "loss": 0.4953, "step": 28178 }, { "epoch": 0.72, "grad_norm": 1.0300992727279663, "learning_rate": 3.7803196525626604e-06, "loss": 0.4754, "step": 28179 }, { "epoch": 0.72, "grad_norm": 8.817130088806152, "learning_rate": 3.7796696367413787e-06, "loss": 0.5796, "step": 28180 }, { "epoch": 0.72, "grad_norm": 1.5791330337524414, "learning_rate": 3.7790196637867094e-06, "loss": 0.5656, "step": 28181 }, { "epoch": 0.72, "grad_norm": 1.1573212146759033, "learning_rate": 3.7783697337031287e-06, "loss": 0.4061, "step": 28182 }, { "epoch": 0.72, "grad_norm": 1.175466537475586, "learning_rate": 3.7777198464951103e-06, "loss": 0.389, "step": 28183 }, { "epoch": 0.72, "grad_norm": 3.636319875717163, "learning_rate": 3.7770700021671425e-06, "loss": 0.5501, "step": 28184 }, { "epoch": 0.72, "grad_norm": 3.9201297760009766, "learning_rate": 3.7764202007236973e-06, "loss": 0.5677, "step": 28185 }, { "epoch": 0.72, "grad_norm": 1.0035489797592163, "learning_rate": 3.775770442169251e-06, "loss": 0.433, "step": 28186 }, { "epoch": 0.72, "grad_norm": 7.12515115737915, "learning_rate": 3.775120726508289e-06, "loss": 0.7291, "step": 28187 }, { "epoch": 0.72, "grad_norm": 1.854331374168396, "learning_rate": 3.7744710537452822e-06, "loss": 0.6189, "step": 28188 }, { "epoch": 0.72, "grad_norm": 1.1147801876068115, "learning_rate": 3.7738214238847103e-06, "loss": 0.534, "step": 28189 }, { "epoch": 0.72, "grad_norm": 3.4793691635131836, "learning_rate": 3.773171836931047e-06, "loss": 0.7706, "step": 28190 }, { "epoch": 0.72, "grad_norm": 2.207138776779175, "learning_rate": 3.7725222928887737e-06, "loss": 0.3481, "step": 28191 }, { "epoch": 0.72, "grad_norm": 1.7922767400741577, "learning_rate": 3.7718727917623643e-06, "loss": 0.5704, "step": 28192 }, { "epoch": 0.72, "grad_norm": 1.1015735864639282, "learning_rate": 3.771223333556292e-06, "loss": 0.4941, "step": 28193 }, { "epoch": 0.72, "grad_norm": 2.6283633708953857, "learning_rate": 3.770573918275038e-06, "loss": 0.5592, "step": 28194 }, { "epoch": 0.72, "grad_norm": 1.8339548110961914, "learning_rate": 3.769924545923075e-06, "loss": 0.6123, "step": 28195 }, { "epoch": 0.72, "grad_norm": 1.1453710794448853, "learning_rate": 3.7692752165048786e-06, "loss": 0.6236, "step": 28196 }, { "epoch": 0.72, "grad_norm": 2.470069646835327, "learning_rate": 3.7686259300249185e-06, "loss": 0.5573, "step": 28197 }, { "epoch": 0.72, "grad_norm": 3.1722564697265625, "learning_rate": 3.7679766864876764e-06, "loss": 0.6001, "step": 28198 }, { "epoch": 0.72, "grad_norm": 1.0097278356552124, "learning_rate": 3.767327485897625e-06, "loss": 0.459, "step": 28199 }, { "epoch": 0.72, "grad_norm": 1.440353512763977, "learning_rate": 3.7666783282592336e-06, "loss": 0.5649, "step": 28200 }, { "epoch": 0.72, "grad_norm": 4.256033897399902, "learning_rate": 3.7660292135769815e-06, "loss": 0.5443, "step": 28201 }, { "epoch": 0.72, "grad_norm": 3.366140604019165, "learning_rate": 3.765380141855339e-06, "loss": 0.5624, "step": 28202 }, { "epoch": 0.72, "grad_norm": 1.3540023565292358, "learning_rate": 3.7647311130987774e-06, "loss": 0.4554, "step": 28203 }, { "epoch": 0.72, "grad_norm": 11.073770523071289, "learning_rate": 3.7640821273117744e-06, "loss": 0.674, "step": 28204 }, { "epoch": 0.72, "grad_norm": 2.7803361415863037, "learning_rate": 3.7634331844987994e-06, "loss": 0.5352, "step": 28205 }, { "epoch": 0.72, "grad_norm": 1.191552996635437, "learning_rate": 3.762784284664325e-06, "loss": 0.5729, "step": 28206 }, { "epoch": 0.72, "grad_norm": 3.6328892707824707, "learning_rate": 3.76213542781282e-06, "loss": 0.7393, "step": 28207 }, { "epoch": 0.72, "grad_norm": 1.521612524986267, "learning_rate": 3.7614866139487603e-06, "loss": 0.4677, "step": 28208 }, { "epoch": 0.72, "grad_norm": 1.3221291303634644, "learning_rate": 3.760837843076617e-06, "loss": 0.5938, "step": 28209 }, { "epoch": 0.72, "grad_norm": 1.5616130828857422, "learning_rate": 3.7601891152008553e-06, "loss": 0.5042, "step": 28210 }, { "epoch": 0.72, "grad_norm": 3.995189666748047, "learning_rate": 3.759540430325954e-06, "loss": 0.5212, "step": 28211 }, { "epoch": 0.72, "grad_norm": 1.201285481452942, "learning_rate": 3.7588917884563792e-06, "loss": 0.5325, "step": 28212 }, { "epoch": 0.72, "grad_norm": 1.535933017730713, "learning_rate": 3.758243189596601e-06, "loss": 0.4578, "step": 28213 }, { "epoch": 0.72, "grad_norm": 1.7049648761749268, "learning_rate": 3.757594633751085e-06, "loss": 0.4619, "step": 28214 }, { "epoch": 0.72, "grad_norm": 1.5406140089035034, "learning_rate": 3.75694612092431e-06, "loss": 0.5403, "step": 28215 }, { "epoch": 0.72, "grad_norm": 1.5867990255355835, "learning_rate": 3.7562976511207395e-06, "loss": 0.4467, "step": 28216 }, { "epoch": 0.72, "grad_norm": 1.0936312675476074, "learning_rate": 3.7556492243448385e-06, "loss": 0.4344, "step": 28217 }, { "epoch": 0.72, "grad_norm": 2.4192028045654297, "learning_rate": 3.755000840601084e-06, "loss": 0.6939, "step": 28218 }, { "epoch": 0.72, "grad_norm": 2.1061673164367676, "learning_rate": 3.7543524998939405e-06, "loss": 0.6634, "step": 28219 }, { "epoch": 0.72, "grad_norm": 1.337193250656128, "learning_rate": 3.753704202227876e-06, "loss": 0.6267, "step": 28220 }, { "epoch": 0.72, "grad_norm": 2.3764398097991943, "learning_rate": 3.753055947607357e-06, "loss": 0.4941, "step": 28221 }, { "epoch": 0.72, "grad_norm": 1.2672063112258911, "learning_rate": 3.7524077360368514e-06, "loss": 0.5541, "step": 28222 }, { "epoch": 0.72, "grad_norm": 1.2440388202667236, "learning_rate": 3.7517595675208242e-06, "loss": 0.6014, "step": 28223 }, { "epoch": 0.72, "grad_norm": 8.454235076904297, "learning_rate": 3.7511114420637475e-06, "loss": 0.6093, "step": 28224 }, { "epoch": 0.72, "grad_norm": 17.29521942138672, "learning_rate": 3.7504633596700845e-06, "loss": 0.624, "step": 28225 }, { "epoch": 0.72, "grad_norm": 1.0755481719970703, "learning_rate": 3.749815320344302e-06, "loss": 0.5001, "step": 28226 }, { "epoch": 0.72, "grad_norm": 2.7517497539520264, "learning_rate": 3.749167324090862e-06, "loss": 0.5814, "step": 28227 }, { "epoch": 0.72, "grad_norm": 6.631547451019287, "learning_rate": 3.7485193709142378e-06, "loss": 1.0094, "step": 28228 }, { "epoch": 0.72, "grad_norm": 1.3953012228012085, "learning_rate": 3.7478714608188894e-06, "loss": 0.3039, "step": 28229 }, { "epoch": 0.72, "grad_norm": 1.7229262590408325, "learning_rate": 3.7472235938092805e-06, "loss": 0.6634, "step": 28230 }, { "epoch": 0.72, "grad_norm": 0.865507185459137, "learning_rate": 3.746575769889881e-06, "loss": 0.4555, "step": 28231 }, { "epoch": 0.72, "grad_norm": 3.469536304473877, "learning_rate": 3.7459279890651523e-06, "loss": 0.6426, "step": 28232 }, { "epoch": 0.72, "grad_norm": 1.4720443487167358, "learning_rate": 3.745280251339559e-06, "loss": 0.4968, "step": 28233 }, { "epoch": 0.72, "grad_norm": 1.6760807037353516, "learning_rate": 3.7446325567175623e-06, "loss": 0.5839, "step": 28234 }, { "epoch": 0.72, "grad_norm": 1.0856480598449707, "learning_rate": 3.7439849052036304e-06, "loss": 0.4722, "step": 28235 }, { "epoch": 0.72, "grad_norm": 4.895646572113037, "learning_rate": 3.743337296802224e-06, "loss": 0.5768, "step": 28236 }, { "epoch": 0.72, "grad_norm": 13.34172534942627, "learning_rate": 3.7426897315178026e-06, "loss": 0.6801, "step": 28237 }, { "epoch": 0.72, "grad_norm": 2.7832977771759033, "learning_rate": 3.7420422093548357e-06, "loss": 0.5179, "step": 28238 }, { "epoch": 0.72, "grad_norm": 5.983799457550049, "learning_rate": 3.7413947303177822e-06, "loss": 0.4046, "step": 28239 }, { "epoch": 0.72, "grad_norm": 2.5516536235809326, "learning_rate": 3.7407472944111014e-06, "loss": 0.6216, "step": 28240 }, { "epoch": 0.72, "grad_norm": 1.2536262273788452, "learning_rate": 3.7400999016392612e-06, "loss": 0.5585, "step": 28241 }, { "epoch": 0.72, "grad_norm": 1.9957891702651978, "learning_rate": 3.7394525520067193e-06, "loss": 0.5779, "step": 28242 }, { "epoch": 0.72, "grad_norm": 0.8675309419631958, "learning_rate": 3.738805245517937e-06, "loss": 0.4173, "step": 28243 }, { "epoch": 0.72, "grad_norm": 7.268749713897705, "learning_rate": 3.738157982177373e-06, "loss": 0.4512, "step": 28244 }, { "epoch": 0.72, "grad_norm": 1.5140807628631592, "learning_rate": 3.737510761989492e-06, "loss": 0.4617, "step": 28245 }, { "epoch": 0.72, "grad_norm": 1.0003832578659058, "learning_rate": 3.736863584958753e-06, "loss": 0.3688, "step": 28246 }, { "epoch": 0.72, "grad_norm": 6.873152732849121, "learning_rate": 3.7362164510896114e-06, "loss": 0.6641, "step": 28247 }, { "epoch": 0.72, "grad_norm": 1.2540332078933716, "learning_rate": 3.735569360386535e-06, "loss": 0.5984, "step": 28248 }, { "epoch": 0.72, "grad_norm": 1.726446270942688, "learning_rate": 3.734922312853978e-06, "loss": 0.5185, "step": 28249 }, { "epoch": 0.72, "grad_norm": 4.470701217651367, "learning_rate": 3.7342753084964013e-06, "loss": 0.5949, "step": 28250 }, { "epoch": 0.72, "grad_norm": 1.5343643426895142, "learning_rate": 3.7336283473182575e-06, "loss": 0.4791, "step": 28251 }, { "epoch": 0.72, "grad_norm": 1.1876839399337769, "learning_rate": 3.732981429324015e-06, "loss": 0.446, "step": 28252 }, { "epoch": 0.72, "grad_norm": 1.1246570348739624, "learning_rate": 3.732334554518128e-06, "loss": 0.4573, "step": 28253 }, { "epoch": 0.72, "grad_norm": 1.171978235244751, "learning_rate": 3.7316877229050487e-06, "loss": 0.5196, "step": 28254 }, { "epoch": 0.72, "grad_norm": 5.321885108947754, "learning_rate": 3.731040934489243e-06, "loss": 0.649, "step": 28255 }, { "epoch": 0.72, "grad_norm": 4.347362518310547, "learning_rate": 3.730394189275165e-06, "loss": 0.7347, "step": 28256 }, { "epoch": 0.72, "grad_norm": 1.4102849960327148, "learning_rate": 3.7297474872672677e-06, "loss": 0.471, "step": 28257 }, { "epoch": 0.72, "grad_norm": 1.006998062133789, "learning_rate": 3.729100828470015e-06, "loss": 0.3996, "step": 28258 }, { "epoch": 0.72, "grad_norm": 1.0021113157272339, "learning_rate": 3.72845421288786e-06, "loss": 0.4702, "step": 28259 }, { "epoch": 0.72, "grad_norm": 1.1165271997451782, "learning_rate": 3.7278076405252573e-06, "loss": 0.5158, "step": 28260 }, { "epoch": 0.72, "grad_norm": 1.2432732582092285, "learning_rate": 3.7271611113866614e-06, "loss": 0.4033, "step": 28261 }, { "epoch": 0.72, "grad_norm": 1.2178864479064941, "learning_rate": 3.726514625476533e-06, "loss": 0.5647, "step": 28262 }, { "epoch": 0.72, "grad_norm": 1.8934893608093262, "learning_rate": 3.7258681827993236e-06, "loss": 0.6229, "step": 28263 }, { "epoch": 0.72, "grad_norm": 1.3299819231033325, "learning_rate": 3.7252217833594874e-06, "loss": 0.532, "step": 28264 }, { "epoch": 0.72, "grad_norm": 2.0966179370880127, "learning_rate": 3.724575427161482e-06, "loss": 0.5549, "step": 28265 }, { "epoch": 0.72, "grad_norm": 1.385858416557312, "learning_rate": 3.723929114209761e-06, "loss": 0.5298, "step": 28266 }, { "epoch": 0.72, "grad_norm": 1.4834513664245605, "learning_rate": 3.7232828445087776e-06, "loss": 0.5022, "step": 28267 }, { "epoch": 0.72, "grad_norm": 5.246059417724609, "learning_rate": 3.7226366180629826e-06, "loss": 0.5179, "step": 28268 }, { "epoch": 0.72, "grad_norm": 1.0700112581253052, "learning_rate": 3.7219904348768345e-06, "loss": 0.6195, "step": 28269 }, { "epoch": 0.72, "grad_norm": 1.9337085485458374, "learning_rate": 3.721344294954784e-06, "loss": 0.3878, "step": 28270 }, { "epoch": 0.72, "grad_norm": 0.9466232657432556, "learning_rate": 3.7206981983012814e-06, "loss": 0.4646, "step": 28271 }, { "epoch": 0.72, "grad_norm": 1.0416340827941895, "learning_rate": 3.7200521449207848e-06, "loss": 0.4608, "step": 28272 }, { "epoch": 0.72, "grad_norm": 1.9921356439590454, "learning_rate": 3.7194061348177433e-06, "loss": 0.5321, "step": 28273 }, { "epoch": 0.72, "grad_norm": 1.1613707542419434, "learning_rate": 3.718760167996609e-06, "loss": 0.5245, "step": 28274 }, { "epoch": 0.72, "grad_norm": 1.447879433631897, "learning_rate": 3.71811424446183e-06, "loss": 0.5974, "step": 28275 }, { "epoch": 0.72, "grad_norm": 1.752259373664856, "learning_rate": 3.7174683642178645e-06, "loss": 0.6238, "step": 28276 }, { "epoch": 0.72, "grad_norm": 2.06234073638916, "learning_rate": 3.7168225272691593e-06, "loss": 0.6187, "step": 28277 }, { "epoch": 0.72, "grad_norm": 6.062761306762695, "learning_rate": 3.716176733620164e-06, "loss": 0.6251, "step": 28278 }, { "epoch": 0.72, "grad_norm": 1.775370717048645, "learning_rate": 3.715530983275333e-06, "loss": 0.4001, "step": 28279 }, { "epoch": 0.72, "grad_norm": 3.011009931564331, "learning_rate": 3.714885276239113e-06, "loss": 0.5384, "step": 28280 }, { "epoch": 0.72, "grad_norm": 1.0657631158828735, "learning_rate": 3.714239612515953e-06, "loss": 0.4913, "step": 28281 }, { "epoch": 0.72, "grad_norm": 1.162182092666626, "learning_rate": 3.713593992110308e-06, "loss": 0.4155, "step": 28282 }, { "epoch": 0.72, "grad_norm": 1.6909822225570679, "learning_rate": 3.7129484150266235e-06, "loss": 0.5661, "step": 28283 }, { "epoch": 0.72, "grad_norm": 1.1805076599121094, "learning_rate": 3.7123028812693486e-06, "loss": 0.411, "step": 28284 }, { "epoch": 0.72, "grad_norm": 1.4921191930770874, "learning_rate": 3.7116573908429276e-06, "loss": 0.5369, "step": 28285 }, { "epoch": 0.72, "grad_norm": 3.1965630054473877, "learning_rate": 3.711011943751818e-06, "loss": 0.5148, "step": 28286 }, { "epoch": 0.73, "grad_norm": 1.452051043510437, "learning_rate": 3.710366540000462e-06, "loss": 0.5631, "step": 28287 }, { "epoch": 0.73, "grad_norm": 3.663134813308716, "learning_rate": 3.709721179593305e-06, "loss": 0.6277, "step": 28288 }, { "epoch": 0.73, "grad_norm": 10.73370361328125, "learning_rate": 3.709075862534801e-06, "loss": 0.434, "step": 28289 }, { "epoch": 0.73, "grad_norm": 1.2271467447280884, "learning_rate": 3.7084305888293947e-06, "loss": 0.4313, "step": 28290 }, { "epoch": 0.73, "grad_norm": 7.905248641967773, "learning_rate": 3.7077853584815314e-06, "loss": 0.6071, "step": 28291 }, { "epoch": 0.73, "grad_norm": 1.3724088668823242, "learning_rate": 3.7071401714956546e-06, "loss": 0.6031, "step": 28292 }, { "epoch": 0.73, "grad_norm": 1.5336068868637085, "learning_rate": 3.7064950278762177e-06, "loss": 0.491, "step": 28293 }, { "epoch": 0.73, "grad_norm": 1.405021071434021, "learning_rate": 3.7058499276276625e-06, "loss": 0.5747, "step": 28294 }, { "epoch": 0.73, "grad_norm": 1.3342863321304321, "learning_rate": 3.705204870754433e-06, "loss": 0.4552, "step": 28295 }, { "epoch": 0.73, "grad_norm": 1.6148217916488647, "learning_rate": 3.7045598572609798e-06, "loss": 0.4345, "step": 28296 }, { "epoch": 0.73, "grad_norm": 1.8562196493148804, "learning_rate": 3.703914887151745e-06, "loss": 0.6266, "step": 28297 }, { "epoch": 0.73, "grad_norm": 1.0279628038406372, "learning_rate": 3.7032699604311695e-06, "loss": 0.5555, "step": 28298 }, { "epoch": 0.73, "grad_norm": 2.8179514408111572, "learning_rate": 3.7026250771037054e-06, "loss": 0.4858, "step": 28299 }, { "epoch": 0.73, "grad_norm": 1.4430171251296997, "learning_rate": 3.701980237173792e-06, "loss": 0.3441, "step": 28300 }, { "epoch": 0.73, "grad_norm": 4.660283088684082, "learning_rate": 3.7013354406458747e-06, "loss": 0.5449, "step": 28301 }, { "epoch": 0.73, "grad_norm": 1.0674515962600708, "learning_rate": 3.700690687524392e-06, "loss": 0.4727, "step": 28302 }, { "epoch": 0.73, "grad_norm": 14.705670356750488, "learning_rate": 3.7000459778137953e-06, "loss": 0.5361, "step": 28303 }, { "epoch": 0.73, "grad_norm": 4.671742916107178, "learning_rate": 3.6994013115185247e-06, "loss": 0.6634, "step": 28304 }, { "epoch": 0.73, "grad_norm": 1.4582083225250244, "learning_rate": 3.698756688643017e-06, "loss": 0.4464, "step": 28305 }, { "epoch": 0.73, "grad_norm": 5.663066864013672, "learning_rate": 3.6981121091917238e-06, "loss": 0.6779, "step": 28306 }, { "epoch": 0.73, "grad_norm": 1.3397735357284546, "learning_rate": 3.6974675731690824e-06, "loss": 0.4427, "step": 28307 }, { "epoch": 0.73, "grad_norm": 3.03830623626709, "learning_rate": 3.6968230805795346e-06, "loss": 0.5932, "step": 28308 }, { "epoch": 0.73, "grad_norm": 1.5420886278152466, "learning_rate": 3.6961786314275182e-06, "loss": 0.345, "step": 28309 }, { "epoch": 0.73, "grad_norm": 1.8945807218551636, "learning_rate": 3.695534225717483e-06, "loss": 0.7009, "step": 28310 }, { "epoch": 0.73, "grad_norm": 1.416864037513733, "learning_rate": 3.6948898634538646e-06, "loss": 0.45, "step": 28311 }, { "epoch": 0.73, "grad_norm": 1.629029393196106, "learning_rate": 3.6942455446410995e-06, "loss": 0.5335, "step": 28312 }, { "epoch": 0.73, "grad_norm": 1.2413177490234375, "learning_rate": 3.693601269283638e-06, "loss": 0.4716, "step": 28313 }, { "epoch": 0.73, "grad_norm": 1.4454641342163086, "learning_rate": 3.6929570373859124e-06, "loss": 0.5007, "step": 28314 }, { "epoch": 0.73, "grad_norm": 1.863347053527832, "learning_rate": 3.692312848952365e-06, "loss": 0.5757, "step": 28315 }, { "epoch": 0.73, "grad_norm": 1.354691982269287, "learning_rate": 3.691668703987431e-06, "loss": 0.3934, "step": 28316 }, { "epoch": 0.73, "grad_norm": 5.490062236785889, "learning_rate": 3.6910246024955575e-06, "loss": 0.6342, "step": 28317 }, { "epoch": 0.73, "grad_norm": 1.0769634246826172, "learning_rate": 3.690380544481178e-06, "loss": 0.6299, "step": 28318 }, { "epoch": 0.73, "grad_norm": 1.7591170072555542, "learning_rate": 3.6897365299487276e-06, "loss": 0.502, "step": 28319 }, { "epoch": 0.73, "grad_norm": 2.242079019546509, "learning_rate": 3.689092558902653e-06, "loss": 0.703, "step": 28320 }, { "epoch": 0.73, "grad_norm": 2.1291215419769287, "learning_rate": 3.6884486313473877e-06, "loss": 0.3963, "step": 28321 }, { "epoch": 0.73, "grad_norm": 1.425537347793579, "learning_rate": 3.6878047472873644e-06, "loss": 0.4449, "step": 28322 }, { "epoch": 0.73, "grad_norm": 1.1887214183807373, "learning_rate": 3.68716090672703e-06, "loss": 0.5666, "step": 28323 }, { "epoch": 0.73, "grad_norm": 1.6288830041885376, "learning_rate": 3.6865171096708153e-06, "loss": 0.5169, "step": 28324 }, { "epoch": 0.73, "grad_norm": 4.976009368896484, "learning_rate": 3.6858733561231587e-06, "loss": 0.5591, "step": 28325 }, { "epoch": 0.73, "grad_norm": 1.6541458368301392, "learning_rate": 3.685229646088493e-06, "loss": 0.5606, "step": 28326 }, { "epoch": 0.73, "grad_norm": 1.4453083276748657, "learning_rate": 3.68458597957126e-06, "loss": 0.5455, "step": 28327 }, { "epoch": 0.73, "grad_norm": 0.9332742691040039, "learning_rate": 3.683942356575894e-06, "loss": 0.4076, "step": 28328 }, { "epoch": 0.73, "grad_norm": 1.0716427564620972, "learning_rate": 3.683298777106824e-06, "loss": 0.4891, "step": 28329 }, { "epoch": 0.73, "grad_norm": 1.241188406944275, "learning_rate": 3.6826552411684944e-06, "loss": 0.5691, "step": 28330 }, { "epoch": 0.73, "grad_norm": 1.4376753568649292, "learning_rate": 3.682011748765336e-06, "loss": 0.5734, "step": 28331 }, { "epoch": 0.73, "grad_norm": 6.912285804748535, "learning_rate": 3.681368299901783e-06, "loss": 0.61, "step": 28332 }, { "epoch": 0.73, "grad_norm": 1.713945746421814, "learning_rate": 3.680724894582267e-06, "loss": 0.5764, "step": 28333 }, { "epoch": 0.73, "grad_norm": 1.781866192817688, "learning_rate": 3.680081532811228e-06, "loss": 0.4984, "step": 28334 }, { "epoch": 0.73, "grad_norm": 1.3565208911895752, "learning_rate": 3.679438214593096e-06, "loss": 0.52, "step": 28335 }, { "epoch": 0.73, "grad_norm": 2.371551752090454, "learning_rate": 3.678794939932302e-06, "loss": 0.5085, "step": 28336 }, { "epoch": 0.73, "grad_norm": 1.3099826574325562, "learning_rate": 3.678151708833285e-06, "loss": 0.4729, "step": 28337 }, { "epoch": 0.73, "grad_norm": 1.5163793563842773, "learning_rate": 3.677508521300476e-06, "loss": 0.3646, "step": 28338 }, { "epoch": 0.73, "grad_norm": 2.204322338104248, "learning_rate": 3.676865377338301e-06, "loss": 0.5316, "step": 28339 }, { "epoch": 0.73, "grad_norm": 6.006038188934326, "learning_rate": 3.6762222769512015e-06, "loss": 0.5399, "step": 28340 }, { "epoch": 0.73, "grad_norm": 2.4109010696411133, "learning_rate": 3.6755792201436047e-06, "loss": 0.4658, "step": 28341 }, { "epoch": 0.73, "grad_norm": 4.794711589813232, "learning_rate": 3.674936206919942e-06, "loss": 0.3521, "step": 28342 }, { "epoch": 0.73, "grad_norm": 1.4991698265075684, "learning_rate": 3.674293237284643e-06, "loss": 0.4174, "step": 28343 }, { "epoch": 0.73, "grad_norm": 2.049825668334961, "learning_rate": 3.673650311242144e-06, "loss": 0.6976, "step": 28344 }, { "epoch": 0.73, "grad_norm": 1.1840661764144897, "learning_rate": 3.673007428796872e-06, "loss": 0.4764, "step": 28345 }, { "epoch": 0.73, "grad_norm": 3.5884149074554443, "learning_rate": 3.6723645899532547e-06, "loss": 0.5769, "step": 28346 }, { "epoch": 0.73, "grad_norm": 2.2744486331939697, "learning_rate": 3.671721794715728e-06, "loss": 0.4391, "step": 28347 }, { "epoch": 0.73, "grad_norm": 1.14995539188385, "learning_rate": 3.67107904308872e-06, "loss": 0.6871, "step": 28348 }, { "epoch": 0.73, "grad_norm": 1.7193588018417358, "learning_rate": 3.670436335076658e-06, "loss": 0.4787, "step": 28349 }, { "epoch": 0.73, "grad_norm": 1.2163127660751343, "learning_rate": 3.6697936706839686e-06, "loss": 0.5731, "step": 28350 }, { "epoch": 0.73, "grad_norm": 1.7070400714874268, "learning_rate": 3.6691510499150884e-06, "loss": 0.5772, "step": 28351 }, { "epoch": 0.73, "grad_norm": 1.0149352550506592, "learning_rate": 3.6685084727744413e-06, "loss": 0.4904, "step": 28352 }, { "epoch": 0.73, "grad_norm": 6.258240222930908, "learning_rate": 3.6678659392664527e-06, "loss": 0.6031, "step": 28353 }, { "epoch": 0.73, "grad_norm": 2.1820411682128906, "learning_rate": 3.6672234493955572e-06, "loss": 0.5365, "step": 28354 }, { "epoch": 0.73, "grad_norm": 2.217700481414795, "learning_rate": 3.6665810031661785e-06, "loss": 0.6503, "step": 28355 }, { "epoch": 0.73, "grad_norm": 1.2594741582870483, "learning_rate": 3.6659386005827457e-06, "loss": 0.5256, "step": 28356 }, { "epoch": 0.73, "grad_norm": 1.3577604293823242, "learning_rate": 3.6652962416496807e-06, "loss": 0.3739, "step": 28357 }, { "epoch": 0.73, "grad_norm": 2.046053409576416, "learning_rate": 3.664653926371418e-06, "loss": 0.5721, "step": 28358 }, { "epoch": 0.73, "grad_norm": 7.496611595153809, "learning_rate": 3.6640116547523784e-06, "loss": 0.483, "step": 28359 }, { "epoch": 0.73, "grad_norm": 1.0049948692321777, "learning_rate": 3.663369426796988e-06, "loss": 0.5309, "step": 28360 }, { "epoch": 0.73, "grad_norm": 1.653359055519104, "learning_rate": 3.662727242509677e-06, "loss": 0.6168, "step": 28361 }, { "epoch": 0.73, "grad_norm": 1.409100890159607, "learning_rate": 3.662085101894869e-06, "loss": 0.5651, "step": 28362 }, { "epoch": 0.73, "grad_norm": 1.2804629802703857, "learning_rate": 3.661443004956985e-06, "loss": 0.4982, "step": 28363 }, { "epoch": 0.73, "grad_norm": 1.361580729484558, "learning_rate": 3.660800951700457e-06, "loss": 0.496, "step": 28364 }, { "epoch": 0.73, "grad_norm": 1.554502010345459, "learning_rate": 3.6601589421297066e-06, "loss": 0.5449, "step": 28365 }, { "epoch": 0.73, "grad_norm": 1.4835162162780762, "learning_rate": 3.659516976249157e-06, "loss": 0.413, "step": 28366 }, { "epoch": 0.73, "grad_norm": 1.3592978715896606, "learning_rate": 3.65887505406323e-06, "loss": 0.3806, "step": 28367 }, { "epoch": 0.73, "grad_norm": 1.6128466129302979, "learning_rate": 3.6582331755763557e-06, "loss": 0.5642, "step": 28368 }, { "epoch": 0.73, "grad_norm": 1.752486228942871, "learning_rate": 3.657591340792953e-06, "loss": 0.6899, "step": 28369 }, { "epoch": 0.73, "grad_norm": 2.998065710067749, "learning_rate": 3.6569495497174434e-06, "loss": 0.7815, "step": 28370 }, { "epoch": 0.73, "grad_norm": 1.0174193382263184, "learning_rate": 3.6563078023542565e-06, "loss": 0.542, "step": 28371 }, { "epoch": 0.73, "grad_norm": 1.043332576751709, "learning_rate": 3.6556660987078096e-06, "loss": 0.4207, "step": 28372 }, { "epoch": 0.73, "grad_norm": 2.264112949371338, "learning_rate": 3.6550244387825264e-06, "loss": 0.542, "step": 28373 }, { "epoch": 0.73, "grad_norm": 1.4014852046966553, "learning_rate": 3.654382822582826e-06, "loss": 0.4713, "step": 28374 }, { "epoch": 0.73, "grad_norm": 2.354736566543579, "learning_rate": 3.653741250113134e-06, "loss": 0.6944, "step": 28375 }, { "epoch": 0.73, "grad_norm": 4.498992443084717, "learning_rate": 3.653099721377872e-06, "loss": 0.6626, "step": 28376 }, { "epoch": 0.73, "grad_norm": 1.289198398590088, "learning_rate": 3.6524582363814552e-06, "loss": 0.4929, "step": 28377 }, { "epoch": 0.73, "grad_norm": 1.370648741722107, "learning_rate": 3.651816795128311e-06, "loss": 0.5282, "step": 28378 }, { "epoch": 0.73, "grad_norm": 1.5698215961456299, "learning_rate": 3.651175397622858e-06, "loss": 0.5745, "step": 28379 }, { "epoch": 0.73, "grad_norm": 2.0824267864227295, "learning_rate": 3.650534043869515e-06, "loss": 0.5226, "step": 28380 }, { "epoch": 0.73, "grad_norm": 1.3240981101989746, "learning_rate": 3.6498927338726987e-06, "loss": 0.519, "step": 28381 }, { "epoch": 0.73, "grad_norm": 1.325119137763977, "learning_rate": 3.6492514676368364e-06, "loss": 0.5748, "step": 28382 }, { "epoch": 0.73, "grad_norm": 1.6362473964691162, "learning_rate": 3.648610245166342e-06, "loss": 0.5694, "step": 28383 }, { "epoch": 0.73, "grad_norm": 1.386900544166565, "learning_rate": 3.647969066465633e-06, "loss": 0.5206, "step": 28384 }, { "epoch": 0.73, "grad_norm": 1.5789170265197754, "learning_rate": 3.647327931539133e-06, "loss": 0.573, "step": 28385 }, { "epoch": 0.73, "grad_norm": 1.3377150297164917, "learning_rate": 3.646686840391258e-06, "loss": 0.3765, "step": 28386 }, { "epoch": 0.73, "grad_norm": 1.4837530851364136, "learning_rate": 3.6460457930264227e-06, "loss": 0.5181, "step": 28387 }, { "epoch": 0.73, "grad_norm": 1.300781488418579, "learning_rate": 3.6454047894490517e-06, "loss": 0.5123, "step": 28388 }, { "epoch": 0.73, "grad_norm": 2.03052020072937, "learning_rate": 3.6447638296635578e-06, "loss": 0.5374, "step": 28389 }, { "epoch": 0.73, "grad_norm": 3.695293664932251, "learning_rate": 3.6441229136743585e-06, "loss": 0.6654, "step": 28390 }, { "epoch": 0.73, "grad_norm": 3.0840773582458496, "learning_rate": 3.6434820414858686e-06, "loss": 0.4052, "step": 28391 }, { "epoch": 0.73, "grad_norm": 1.421459674835205, "learning_rate": 3.642841213102509e-06, "loss": 0.5165, "step": 28392 }, { "epoch": 0.73, "grad_norm": 0.7472501397132874, "learning_rate": 3.642200428528695e-06, "loss": 0.3917, "step": 28393 }, { "epoch": 0.73, "grad_norm": 1.2336843013763428, "learning_rate": 3.6415596877688374e-06, "loss": 0.4989, "step": 28394 }, { "epoch": 0.73, "grad_norm": 2.0268404483795166, "learning_rate": 3.640918990827359e-06, "loss": 0.5182, "step": 28395 }, { "epoch": 0.73, "grad_norm": 1.5222012996673584, "learning_rate": 3.6402783377086716e-06, "loss": 0.6334, "step": 28396 }, { "epoch": 0.73, "grad_norm": 0.8474360704421997, "learning_rate": 3.6396377284171903e-06, "loss": 0.4209, "step": 28397 }, { "epoch": 0.73, "grad_norm": 0.9947187304496765, "learning_rate": 3.638997162957327e-06, "loss": 0.3701, "step": 28398 }, { "epoch": 0.73, "grad_norm": 1.4548450708389282, "learning_rate": 3.6383566413335014e-06, "loss": 0.4939, "step": 28399 }, { "epoch": 0.73, "grad_norm": 1.2415294647216797, "learning_rate": 3.6377161635501257e-06, "loss": 0.5219, "step": 28400 }, { "epoch": 0.73, "grad_norm": 2.9116506576538086, "learning_rate": 3.63707572961161e-06, "loss": 0.4761, "step": 28401 }, { "epoch": 0.73, "grad_norm": 1.3893680572509766, "learning_rate": 3.636435339522374e-06, "loss": 0.4651, "step": 28402 }, { "epoch": 0.73, "grad_norm": 1.1355851888656616, "learning_rate": 3.6357949932868265e-06, "loss": 0.5468, "step": 28403 }, { "epoch": 0.73, "grad_norm": 1.376571536064148, "learning_rate": 3.6351546909093793e-06, "loss": 0.4498, "step": 28404 }, { "epoch": 0.73, "grad_norm": 1.338063359260559, "learning_rate": 3.6345144323944505e-06, "loss": 0.5566, "step": 28405 }, { "epoch": 0.73, "grad_norm": 1.3622231483459473, "learning_rate": 3.6338742177464482e-06, "loss": 0.4375, "step": 28406 }, { "epoch": 0.73, "grad_norm": 1.2505912780761719, "learning_rate": 3.6332340469697856e-06, "loss": 0.6324, "step": 28407 }, { "epoch": 0.73, "grad_norm": 7.895054340362549, "learning_rate": 3.632593920068871e-06, "loss": 0.4595, "step": 28408 }, { "epoch": 0.73, "grad_norm": 1.8044463396072388, "learning_rate": 3.6319538370481223e-06, "loss": 0.5945, "step": 28409 }, { "epoch": 0.73, "grad_norm": 2.3895983695983887, "learning_rate": 3.631313797911946e-06, "loss": 0.4126, "step": 28410 }, { "epoch": 0.73, "grad_norm": 0.9736129641532898, "learning_rate": 3.6306738026647513e-06, "loss": 0.3701, "step": 28411 }, { "epoch": 0.73, "grad_norm": 4.785614013671875, "learning_rate": 3.6300338513109547e-06, "loss": 0.4642, "step": 28412 }, { "epoch": 0.73, "grad_norm": 1.171784520149231, "learning_rate": 3.629393943854962e-06, "loss": 0.513, "step": 28413 }, { "epoch": 0.73, "grad_norm": 4.1455078125, "learning_rate": 3.6287540803011835e-06, "loss": 0.5606, "step": 28414 }, { "epoch": 0.73, "grad_norm": 1.354190468788147, "learning_rate": 3.6281142606540264e-06, "loss": 0.4964, "step": 28415 }, { "epoch": 0.73, "grad_norm": 3.3663551807403564, "learning_rate": 3.6274744849179057e-06, "loss": 0.4538, "step": 28416 }, { "epoch": 0.73, "grad_norm": 1.8502963781356812, "learning_rate": 3.6268347530972283e-06, "loss": 0.5012, "step": 28417 }, { "epoch": 0.73, "grad_norm": 1.666211724281311, "learning_rate": 3.626195065196397e-06, "loss": 0.7312, "step": 28418 }, { "epoch": 0.73, "grad_norm": 2.866244077682495, "learning_rate": 3.6255554212198284e-06, "loss": 0.9318, "step": 28419 }, { "epoch": 0.73, "grad_norm": 3.477905750274658, "learning_rate": 3.6249158211719273e-06, "loss": 0.5841, "step": 28420 }, { "epoch": 0.73, "grad_norm": 1.7277487516403198, "learning_rate": 3.624276265057102e-06, "loss": 0.5067, "step": 28421 }, { "epoch": 0.73, "grad_norm": 1.4077091217041016, "learning_rate": 3.6236367528797545e-06, "loss": 0.5068, "step": 28422 }, { "epoch": 0.73, "grad_norm": 4.883918285369873, "learning_rate": 3.6229972846442996e-06, "loss": 0.5733, "step": 28423 }, { "epoch": 0.73, "grad_norm": 1.4910011291503906, "learning_rate": 3.6223578603551426e-06, "loss": 0.4852, "step": 28424 }, { "epoch": 0.73, "grad_norm": 1.4302095174789429, "learning_rate": 3.6217184800166837e-06, "loss": 0.4315, "step": 28425 }, { "epoch": 0.73, "grad_norm": 1.561750054359436, "learning_rate": 3.621079143633337e-06, "loss": 0.4255, "step": 28426 }, { "epoch": 0.73, "grad_norm": 1.1000832319259644, "learning_rate": 3.6204398512095063e-06, "loss": 0.5348, "step": 28427 }, { "epoch": 0.73, "grad_norm": 3.796475887298584, "learning_rate": 3.6198006027495923e-06, "loss": 0.6493, "step": 28428 }, { "epoch": 0.73, "grad_norm": 5.162136077880859, "learning_rate": 3.6191613982580066e-06, "loss": 0.5184, "step": 28429 }, { "epoch": 0.73, "grad_norm": 8.388129234313965, "learning_rate": 3.6185222377391525e-06, "loss": 0.4762, "step": 28430 }, { "epoch": 0.73, "grad_norm": 1.8812782764434814, "learning_rate": 3.6178831211974343e-06, "loss": 0.5712, "step": 28431 }, { "epoch": 0.73, "grad_norm": 9.858458518981934, "learning_rate": 3.6172440486372517e-06, "loss": 0.4864, "step": 28432 }, { "epoch": 0.73, "grad_norm": 0.9038364291191101, "learning_rate": 3.616605020063018e-06, "loss": 0.3674, "step": 28433 }, { "epoch": 0.73, "grad_norm": 2.026547908782959, "learning_rate": 3.6159660354791304e-06, "loss": 0.4734, "step": 28434 }, { "epoch": 0.73, "grad_norm": 1.403075098991394, "learning_rate": 3.615327094889991e-06, "loss": 0.3279, "step": 28435 }, { "epoch": 0.73, "grad_norm": 1.3329167366027832, "learning_rate": 3.6146881983000102e-06, "loss": 0.566, "step": 28436 }, { "epoch": 0.73, "grad_norm": 1.9411598443984985, "learning_rate": 3.614049345713586e-06, "loss": 0.5014, "step": 28437 }, { "epoch": 0.73, "grad_norm": 1.4068872928619385, "learning_rate": 3.6134105371351212e-06, "loss": 0.6003, "step": 28438 }, { "epoch": 0.73, "grad_norm": 1.147637963294983, "learning_rate": 3.612771772569016e-06, "loss": 0.3614, "step": 28439 }, { "epoch": 0.73, "grad_norm": 1.6302158832550049, "learning_rate": 3.6121330520196784e-06, "loss": 0.5026, "step": 28440 }, { "epoch": 0.73, "grad_norm": 4.395951747894287, "learning_rate": 3.6114943754915066e-06, "loss": 0.6247, "step": 28441 }, { "epoch": 0.73, "grad_norm": 1.3763432502746582, "learning_rate": 3.610855742988898e-06, "loss": 0.5683, "step": 28442 }, { "epoch": 0.73, "grad_norm": 1.8312640190124512, "learning_rate": 3.610217154516261e-06, "loss": 0.4152, "step": 28443 }, { "epoch": 0.73, "grad_norm": 1.4524754285812378, "learning_rate": 3.609578610077993e-06, "loss": 0.5979, "step": 28444 }, { "epoch": 0.73, "grad_norm": 1.4299947023391724, "learning_rate": 3.6089401096784903e-06, "loss": 0.5059, "step": 28445 }, { "epoch": 0.73, "grad_norm": 7.355105876922607, "learning_rate": 3.6083016533221616e-06, "loss": 0.4833, "step": 28446 }, { "epoch": 0.73, "grad_norm": 1.1219515800476074, "learning_rate": 3.6076632410134015e-06, "loss": 0.4672, "step": 28447 }, { "epoch": 0.73, "grad_norm": 1.505733847618103, "learning_rate": 3.6070248727566104e-06, "loss": 0.5209, "step": 28448 }, { "epoch": 0.73, "grad_norm": 1.574755072593689, "learning_rate": 3.6063865485561845e-06, "loss": 0.5389, "step": 28449 }, { "epoch": 0.73, "grad_norm": 1.688649296760559, "learning_rate": 3.605748268416529e-06, "loss": 0.3865, "step": 28450 }, { "epoch": 0.73, "grad_norm": 2.0168497562408447, "learning_rate": 3.6051100323420387e-06, "loss": 0.6057, "step": 28451 }, { "epoch": 0.73, "grad_norm": 1.0554708242416382, "learning_rate": 3.6044718403371093e-06, "loss": 0.497, "step": 28452 }, { "epoch": 0.73, "grad_norm": 1.1818875074386597, "learning_rate": 3.6038336924061455e-06, "loss": 0.5204, "step": 28453 }, { "epoch": 0.73, "grad_norm": 7.853638172149658, "learning_rate": 3.603195588553541e-06, "loss": 0.4923, "step": 28454 }, { "epoch": 0.73, "grad_norm": 3.3330602645874023, "learning_rate": 3.6025575287836932e-06, "loss": 0.6206, "step": 28455 }, { "epoch": 0.73, "grad_norm": 2.1232974529266357, "learning_rate": 3.601919513100998e-06, "loss": 0.6069, "step": 28456 }, { "epoch": 0.73, "grad_norm": 1.8866751194000244, "learning_rate": 3.601281541509856e-06, "loss": 0.4258, "step": 28457 }, { "epoch": 0.73, "grad_norm": 2.192734718322754, "learning_rate": 3.6006436140146604e-06, "loss": 0.4515, "step": 28458 }, { "epoch": 0.73, "grad_norm": 7.773056983947754, "learning_rate": 3.6000057306198067e-06, "loss": 0.5449, "step": 28459 }, { "epoch": 0.73, "grad_norm": 1.2411274909973145, "learning_rate": 3.599367891329696e-06, "loss": 0.6671, "step": 28460 }, { "epoch": 0.73, "grad_norm": 1.3207035064697266, "learning_rate": 3.59873009614872e-06, "loss": 0.3925, "step": 28461 }, { "epoch": 0.73, "grad_norm": 2.106156826019287, "learning_rate": 3.5980923450812753e-06, "loss": 0.4433, "step": 28462 }, { "epoch": 0.73, "grad_norm": 1.470122218132019, "learning_rate": 3.5974546381317554e-06, "loss": 0.5125, "step": 28463 }, { "epoch": 0.73, "grad_norm": 1.5208300352096558, "learning_rate": 3.5968169753045556e-06, "loss": 0.6059, "step": 28464 }, { "epoch": 0.73, "grad_norm": 2.801046371459961, "learning_rate": 3.596179356604067e-06, "loss": 0.5327, "step": 28465 }, { "epoch": 0.73, "grad_norm": 1.0534913539886475, "learning_rate": 3.5955417820346905e-06, "loss": 0.5753, "step": 28466 }, { "epoch": 0.73, "grad_norm": 1.2374553680419922, "learning_rate": 3.594904251600817e-06, "loss": 0.4529, "step": 28467 }, { "epoch": 0.73, "grad_norm": 8.902037620544434, "learning_rate": 3.5942667653068386e-06, "loss": 0.5124, "step": 28468 }, { "epoch": 0.73, "grad_norm": 1.2127747535705566, "learning_rate": 3.593629323157145e-06, "loss": 0.5051, "step": 28469 }, { "epoch": 0.73, "grad_norm": 1.428230881690979, "learning_rate": 3.5929919251561386e-06, "loss": 0.5006, "step": 28470 }, { "epoch": 0.73, "grad_norm": 1.7981444597244263, "learning_rate": 3.5923545713082054e-06, "loss": 0.4725, "step": 28471 }, { "epoch": 0.73, "grad_norm": 1.0069098472595215, "learning_rate": 3.5917172616177353e-06, "loss": 0.5277, "step": 28472 }, { "epoch": 0.73, "grad_norm": 1.4640109539031982, "learning_rate": 3.5910799960891274e-06, "loss": 0.494, "step": 28473 }, { "epoch": 0.73, "grad_norm": 9.485270500183105, "learning_rate": 3.590442774726769e-06, "loss": 0.4609, "step": 28474 }, { "epoch": 0.73, "grad_norm": 2.1371257305145264, "learning_rate": 3.5898055975350497e-06, "loss": 0.63, "step": 28475 }, { "epoch": 0.73, "grad_norm": 1.6372926235198975, "learning_rate": 3.5891684645183654e-06, "loss": 0.4865, "step": 28476 }, { "epoch": 0.73, "grad_norm": 1.0024422407150269, "learning_rate": 3.5885313756811056e-06, "loss": 0.3805, "step": 28477 }, { "epoch": 0.73, "grad_norm": 1.659806251525879, "learning_rate": 3.5878943310276593e-06, "loss": 0.5351, "step": 28478 }, { "epoch": 0.73, "grad_norm": 2.5014970302581787, "learning_rate": 3.5872573305624125e-06, "loss": 0.532, "step": 28479 }, { "epoch": 0.73, "grad_norm": 1.9977538585662842, "learning_rate": 3.5866203742897633e-06, "loss": 0.5435, "step": 28480 }, { "epoch": 0.73, "grad_norm": 3.791166067123413, "learning_rate": 3.5859834622140965e-06, "loss": 0.5275, "step": 28481 }, { "epoch": 0.73, "grad_norm": 1.2807366847991943, "learning_rate": 3.585346594339799e-06, "loss": 0.2675, "step": 28482 }, { "epoch": 0.73, "grad_norm": 1.4347089529037476, "learning_rate": 3.5847097706712662e-06, "loss": 0.5205, "step": 28483 }, { "epoch": 0.73, "grad_norm": 2.0258896350860596, "learning_rate": 3.5840729912128836e-06, "loss": 0.5949, "step": 28484 }, { "epoch": 0.73, "grad_norm": 2.7285685539245605, "learning_rate": 3.5834362559690385e-06, "loss": 0.559, "step": 28485 }, { "epoch": 0.73, "grad_norm": 5.420639514923096, "learning_rate": 3.582799564944116e-06, "loss": 0.7361, "step": 28486 }, { "epoch": 0.73, "grad_norm": 1.1555031538009644, "learning_rate": 3.582162918142511e-06, "loss": 0.5658, "step": 28487 }, { "epoch": 0.73, "grad_norm": 2.9094724655151367, "learning_rate": 3.5815263155686075e-06, "loss": 0.4651, "step": 28488 }, { "epoch": 0.73, "grad_norm": 5.438943386077881, "learning_rate": 3.5808897572267875e-06, "loss": 0.6271, "step": 28489 }, { "epoch": 0.73, "grad_norm": 1.0316944122314453, "learning_rate": 3.5802532431214465e-06, "loss": 0.4857, "step": 28490 }, { "epoch": 0.73, "grad_norm": 19.0008602142334, "learning_rate": 3.5796167732569675e-06, "loss": 0.6574, "step": 28491 }, { "epoch": 0.73, "grad_norm": 1.0394705533981323, "learning_rate": 3.578980347637735e-06, "loss": 0.5416, "step": 28492 }, { "epoch": 0.73, "grad_norm": 3.228440761566162, "learning_rate": 3.578343966268133e-06, "loss": 0.4893, "step": 28493 }, { "epoch": 0.73, "grad_norm": 1.4619816541671753, "learning_rate": 3.5777076291525537e-06, "loss": 0.4409, "step": 28494 }, { "epoch": 0.73, "grad_norm": 1.7491240501403809, "learning_rate": 3.577071336295378e-06, "loss": 0.7775, "step": 28495 }, { "epoch": 0.73, "grad_norm": 1.7671862840652466, "learning_rate": 3.576435087700988e-06, "loss": 0.4546, "step": 28496 }, { "epoch": 0.73, "grad_norm": 1.8810806274414062, "learning_rate": 3.5757988833737745e-06, "loss": 0.478, "step": 28497 }, { "epoch": 0.73, "grad_norm": 2.0522820949554443, "learning_rate": 3.57516272331812e-06, "loss": 0.6209, "step": 28498 }, { "epoch": 0.73, "grad_norm": 1.6035672426223755, "learning_rate": 3.5745266075384033e-06, "loss": 0.425, "step": 28499 }, { "epoch": 0.73, "grad_norm": 1.8086552619934082, "learning_rate": 3.573890536039016e-06, "loss": 0.6244, "step": 28500 }, { "epoch": 0.73, "grad_norm": 1.7835376262664795, "learning_rate": 3.5732545088243387e-06, "loss": 0.4079, "step": 28501 }, { "epoch": 0.73, "grad_norm": 1.6427608728408813, "learning_rate": 3.5726185258987524e-06, "loss": 0.5555, "step": 28502 }, { "epoch": 0.73, "grad_norm": 2.866483688354492, "learning_rate": 3.5719825872666382e-06, "loss": 0.3965, "step": 28503 }, { "epoch": 0.73, "grad_norm": 10.760859489440918, "learning_rate": 3.5713466929323846e-06, "loss": 0.4537, "step": 28504 }, { "epoch": 0.73, "grad_norm": 1.5909291505813599, "learning_rate": 3.570710842900371e-06, "loss": 0.4171, "step": 28505 }, { "epoch": 0.73, "grad_norm": 3.799190044403076, "learning_rate": 3.570075037174976e-06, "loss": 0.4367, "step": 28506 }, { "epoch": 0.73, "grad_norm": 1.3274483680725098, "learning_rate": 3.5694392757605877e-06, "loss": 0.6279, "step": 28507 }, { "epoch": 0.73, "grad_norm": 1.8580306768417358, "learning_rate": 3.568803558661583e-06, "loss": 0.5539, "step": 28508 }, { "epoch": 0.73, "grad_norm": 1.2045663595199585, "learning_rate": 3.5681678858823443e-06, "loss": 0.4199, "step": 28509 }, { "epoch": 0.73, "grad_norm": 1.05745530128479, "learning_rate": 3.5675322574272487e-06, "loss": 0.6389, "step": 28510 }, { "epoch": 0.73, "grad_norm": 1.9298983812332153, "learning_rate": 3.5668966733006817e-06, "loss": 0.5889, "step": 28511 }, { "epoch": 0.73, "grad_norm": 1.4983494281768799, "learning_rate": 3.566261133507022e-06, "loss": 0.527, "step": 28512 }, { "epoch": 0.73, "grad_norm": 1.6131943464279175, "learning_rate": 3.5656256380506448e-06, "loss": 0.5525, "step": 28513 }, { "epoch": 0.73, "grad_norm": 1.2006713151931763, "learning_rate": 3.564990186935937e-06, "loss": 0.5164, "step": 28514 }, { "epoch": 0.73, "grad_norm": 1.4255908727645874, "learning_rate": 3.564354780167274e-06, "loss": 0.5617, "step": 28515 }, { "epoch": 0.73, "grad_norm": 1.1820526123046875, "learning_rate": 3.563719417749034e-06, "loss": 0.4807, "step": 28516 }, { "epoch": 0.73, "grad_norm": 4.179843902587891, "learning_rate": 3.563084099685593e-06, "loss": 0.7112, "step": 28517 }, { "epoch": 0.73, "grad_norm": 1.7863354682922363, "learning_rate": 3.562448825981335e-06, "loss": 0.5251, "step": 28518 }, { "epoch": 0.73, "grad_norm": 1.3778135776519775, "learning_rate": 3.5618135966406366e-06, "loss": 0.3997, "step": 28519 }, { "epoch": 0.73, "grad_norm": 1.947785496711731, "learning_rate": 3.5611784116678694e-06, "loss": 0.3932, "step": 28520 }, { "epoch": 0.73, "grad_norm": 6.122045993804932, "learning_rate": 3.560543271067419e-06, "loss": 0.6196, "step": 28521 }, { "epoch": 0.73, "grad_norm": 2.01192307472229, "learning_rate": 3.5599081748436594e-06, "loss": 0.553, "step": 28522 }, { "epoch": 0.73, "grad_norm": 0.9535887241363525, "learning_rate": 3.5592731230009626e-06, "loss": 0.4249, "step": 28523 }, { "epoch": 0.73, "grad_norm": 1.9275200366973877, "learning_rate": 3.558638115543712e-06, "loss": 0.5917, "step": 28524 }, { "epoch": 0.73, "grad_norm": 1.6487659215927124, "learning_rate": 3.5580031524762816e-06, "loss": 0.6588, "step": 28525 }, { "epoch": 0.73, "grad_norm": 2.782078981399536, "learning_rate": 3.5573682338030456e-06, "loss": 0.6225, "step": 28526 }, { "epoch": 0.73, "grad_norm": 1.6028969287872314, "learning_rate": 3.556733359528377e-06, "loss": 0.6394, "step": 28527 }, { "epoch": 0.73, "grad_norm": 0.8894009590148926, "learning_rate": 3.5560985296566574e-06, "loss": 0.4467, "step": 28528 }, { "epoch": 0.73, "grad_norm": 1.555317997932434, "learning_rate": 3.555463744192258e-06, "loss": 0.5709, "step": 28529 }, { "epoch": 0.73, "grad_norm": 1.6604670286178589, "learning_rate": 3.5548290031395505e-06, "loss": 0.4932, "step": 28530 }, { "epoch": 0.73, "grad_norm": 1.3832212686538696, "learning_rate": 3.554194306502916e-06, "loss": 0.6197, "step": 28531 }, { "epoch": 0.73, "grad_norm": 3.1684458255767822, "learning_rate": 3.5535596542867246e-06, "loss": 0.579, "step": 28532 }, { "epoch": 0.73, "grad_norm": 1.0810548067092896, "learning_rate": 3.552925046495349e-06, "loss": 0.5171, "step": 28533 }, { "epoch": 0.73, "grad_norm": 1.2001837491989136, "learning_rate": 3.5522904831331605e-06, "loss": 0.4974, "step": 28534 }, { "epoch": 0.73, "grad_norm": 1.2347975969314575, "learning_rate": 3.551655964204539e-06, "loss": 0.6525, "step": 28535 }, { "epoch": 0.73, "grad_norm": 3.9164180755615234, "learning_rate": 3.551021489713853e-06, "loss": 0.5985, "step": 28536 }, { "epoch": 0.73, "grad_norm": 6.32440710067749, "learning_rate": 3.550387059665472e-06, "loss": 0.4945, "step": 28537 }, { "epoch": 0.73, "grad_norm": 1.1684643030166626, "learning_rate": 3.5497526740637756e-06, "loss": 0.4818, "step": 28538 }, { "epoch": 0.73, "grad_norm": 2.7155113220214844, "learning_rate": 3.5491183329131295e-06, "loss": 0.5437, "step": 28539 }, { "epoch": 0.73, "grad_norm": 1.2927780151367188, "learning_rate": 3.5484840362179053e-06, "loss": 0.3799, "step": 28540 }, { "epoch": 0.73, "grad_norm": 4.491974830627441, "learning_rate": 3.547849783982479e-06, "loss": 0.637, "step": 28541 }, { "epoch": 0.73, "grad_norm": 1.122101902961731, "learning_rate": 3.547215576211217e-06, "loss": 0.4648, "step": 28542 }, { "epoch": 0.73, "grad_norm": 1.6515820026397705, "learning_rate": 3.546581412908493e-06, "loss": 0.4743, "step": 28543 }, { "epoch": 0.73, "grad_norm": 3.2201218605041504, "learning_rate": 3.545947294078671e-06, "loss": 0.691, "step": 28544 }, { "epoch": 0.73, "grad_norm": 1.567824125289917, "learning_rate": 3.5453132197261287e-06, "loss": 0.483, "step": 28545 }, { "epoch": 0.73, "grad_norm": 2.4471116065979004, "learning_rate": 3.5446791898552326e-06, "loss": 0.5101, "step": 28546 }, { "epoch": 0.73, "grad_norm": 1.45171320438385, "learning_rate": 3.544045204470349e-06, "loss": 0.4679, "step": 28547 }, { "epoch": 0.73, "grad_norm": 1.433972716331482, "learning_rate": 3.5434112635758534e-06, "loss": 0.4619, "step": 28548 }, { "epoch": 0.73, "grad_norm": 1.0680092573165894, "learning_rate": 3.54277736717611e-06, "loss": 0.505, "step": 28549 }, { "epoch": 0.73, "grad_norm": 1.6993181705474854, "learning_rate": 3.542143515275488e-06, "loss": 0.5979, "step": 28550 }, { "epoch": 0.73, "grad_norm": 11.44426155090332, "learning_rate": 3.5415097078783533e-06, "loss": 0.6181, "step": 28551 }, { "epoch": 0.73, "grad_norm": 1.3863779306411743, "learning_rate": 3.5408759449890794e-06, "loss": 0.6513, "step": 28552 }, { "epoch": 0.73, "grad_norm": 3.651505470275879, "learning_rate": 3.5402422266120297e-06, "loss": 0.4754, "step": 28553 }, { "epoch": 0.73, "grad_norm": 2.419269323348999, "learning_rate": 3.539608552751569e-06, "loss": 0.5829, "step": 28554 }, { "epoch": 0.73, "grad_norm": 1.9978200197219849, "learning_rate": 3.5389749234120706e-06, "loss": 0.6033, "step": 28555 }, { "epoch": 0.73, "grad_norm": 1.6696889400482178, "learning_rate": 3.538341338597898e-06, "loss": 0.6448, "step": 28556 }, { "epoch": 0.73, "grad_norm": 0.8952004313468933, "learning_rate": 3.537707798313418e-06, "loss": 0.4211, "step": 28557 }, { "epoch": 0.73, "grad_norm": 1.7595230340957642, "learning_rate": 3.5370743025629916e-06, "loss": 0.407, "step": 28558 }, { "epoch": 0.73, "grad_norm": 11.953496932983398, "learning_rate": 3.536440851350993e-06, "loss": 0.5632, "step": 28559 }, { "epoch": 0.73, "grad_norm": 6.1966376304626465, "learning_rate": 3.5358074446817823e-06, "loss": 0.5082, "step": 28560 }, { "epoch": 0.73, "grad_norm": 1.8734689950942993, "learning_rate": 3.5351740825597226e-06, "loss": 0.3051, "step": 28561 }, { "epoch": 0.73, "grad_norm": 1.4527432918548584, "learning_rate": 3.534540764989185e-06, "loss": 0.463, "step": 28562 }, { "epoch": 0.73, "grad_norm": 1.4784934520721436, "learning_rate": 3.53390749197453e-06, "loss": 0.4788, "step": 28563 }, { "epoch": 0.73, "grad_norm": 1.6509571075439453, "learning_rate": 3.533274263520119e-06, "loss": 0.48, "step": 28564 }, { "epoch": 0.73, "grad_norm": 1.777284026145935, "learning_rate": 3.5326410796303224e-06, "loss": 0.6194, "step": 28565 }, { "epoch": 0.73, "grad_norm": 1.6532213687896729, "learning_rate": 3.5320079403094997e-06, "loss": 0.5609, "step": 28566 }, { "epoch": 0.73, "grad_norm": 2.3565988540649414, "learning_rate": 3.5313748455620156e-06, "loss": 0.4203, "step": 28567 }, { "epoch": 0.73, "grad_norm": 1.4789894819259644, "learning_rate": 3.5307417953922275e-06, "loss": 0.5139, "step": 28568 }, { "epoch": 0.73, "grad_norm": 0.928631603717804, "learning_rate": 3.530108789804507e-06, "loss": 0.4669, "step": 28569 }, { "epoch": 0.73, "grad_norm": 2.1459693908691406, "learning_rate": 3.5294758288032115e-06, "loss": 0.7019, "step": 28570 }, { "epoch": 0.73, "grad_norm": 1.0904674530029297, "learning_rate": 3.5288429123927004e-06, "loss": 0.4713, "step": 28571 }, { "epoch": 0.73, "grad_norm": 1.1267116069793701, "learning_rate": 3.528210040577341e-06, "loss": 0.4657, "step": 28572 }, { "epoch": 0.73, "grad_norm": 1.1958822011947632, "learning_rate": 3.5275772133614926e-06, "loss": 0.636, "step": 28573 }, { "epoch": 0.73, "grad_norm": 2.143620252609253, "learning_rate": 3.526944430749516e-06, "loss": 0.5938, "step": 28574 }, { "epoch": 0.73, "grad_norm": 5.429877281188965, "learning_rate": 3.5263116927457673e-06, "loss": 0.6246, "step": 28575 }, { "epoch": 0.73, "grad_norm": 1.1519056558609009, "learning_rate": 3.525678999354616e-06, "loss": 0.4509, "step": 28576 }, { "epoch": 0.73, "grad_norm": 1.4868590831756592, "learning_rate": 3.5250463505804165e-06, "loss": 0.5649, "step": 28577 }, { "epoch": 0.73, "grad_norm": 1.0912283658981323, "learning_rate": 3.5244137464275265e-06, "loss": 0.6121, "step": 28578 }, { "epoch": 0.73, "grad_norm": 2.01098370552063, "learning_rate": 3.5237811869003115e-06, "loss": 0.6443, "step": 28579 }, { "epoch": 0.73, "grad_norm": 4.0225510597229, "learning_rate": 3.5231486720031282e-06, "loss": 0.5619, "step": 28580 }, { "epoch": 0.73, "grad_norm": 9.10050106048584, "learning_rate": 3.5225162017403313e-06, "loss": 0.5079, "step": 28581 }, { "epoch": 0.73, "grad_norm": 1.2248197793960571, "learning_rate": 3.521883776116287e-06, "loss": 0.3547, "step": 28582 }, { "epoch": 0.73, "grad_norm": 5.235368728637695, "learning_rate": 3.52125139513535e-06, "loss": 0.632, "step": 28583 }, { "epoch": 0.73, "grad_norm": 1.1913645267486572, "learning_rate": 3.5206190588018773e-06, "loss": 0.43, "step": 28584 }, { "epoch": 0.73, "grad_norm": 2.0402274131774902, "learning_rate": 3.5199867671202246e-06, "loss": 0.5431, "step": 28585 }, { "epoch": 0.73, "grad_norm": 1.324913740158081, "learning_rate": 3.5193545200947554e-06, "loss": 0.4467, "step": 28586 }, { "epoch": 0.73, "grad_norm": 1.3652417659759521, "learning_rate": 3.5187223177298225e-06, "loss": 0.5125, "step": 28587 }, { "epoch": 0.73, "grad_norm": 1.431738257408142, "learning_rate": 3.5180901600297814e-06, "loss": 0.5268, "step": 28588 }, { "epoch": 0.73, "grad_norm": 2.012028932571411, "learning_rate": 3.5174580469989926e-06, "loss": 0.549, "step": 28589 }, { "epoch": 0.73, "grad_norm": 3.5984833240509033, "learning_rate": 3.5168259786418104e-06, "loss": 0.5792, "step": 28590 }, { "epoch": 0.73, "grad_norm": 1.8018635511398315, "learning_rate": 3.5161939549625913e-06, "loss": 0.5678, "step": 28591 }, { "epoch": 0.73, "grad_norm": 0.9655289649963379, "learning_rate": 3.5155619759656854e-06, "loss": 0.5212, "step": 28592 }, { "epoch": 0.73, "grad_norm": 3.142284393310547, "learning_rate": 3.5149300416554566e-06, "loss": 0.5223, "step": 28593 }, { "epoch": 0.73, "grad_norm": 1.5519452095031738, "learning_rate": 3.514298152036255e-06, "loss": 0.6006, "step": 28594 }, { "epoch": 0.73, "grad_norm": 1.4063535928726196, "learning_rate": 3.5136663071124332e-06, "loss": 0.4839, "step": 28595 }, { "epoch": 0.73, "grad_norm": 2.0250253677368164, "learning_rate": 3.513034506888351e-06, "loss": 0.4546, "step": 28596 }, { "epoch": 0.73, "grad_norm": 1.8161646127700806, "learning_rate": 3.51240275136836e-06, "loss": 0.5835, "step": 28597 }, { "epoch": 0.73, "grad_norm": 1.5270012617111206, "learning_rate": 3.5117710405568127e-06, "loss": 0.6388, "step": 28598 }, { "epoch": 0.73, "grad_norm": 5.624200820922852, "learning_rate": 3.51113937445806e-06, "loss": 0.6385, "step": 28599 }, { "epoch": 0.73, "grad_norm": 1.8637412786483765, "learning_rate": 3.510507753076462e-06, "loss": 0.4518, "step": 28600 }, { "epoch": 0.73, "grad_norm": 2.746532917022705, "learning_rate": 3.509876176416366e-06, "loss": 0.4767, "step": 28601 }, { "epoch": 0.73, "grad_norm": 1.7904484272003174, "learning_rate": 3.509244644482124e-06, "loss": 0.487, "step": 28602 }, { "epoch": 0.73, "grad_norm": 1.7712210416793823, "learning_rate": 3.5086131572780923e-06, "loss": 0.4123, "step": 28603 }, { "epoch": 0.73, "grad_norm": 1.1207425594329834, "learning_rate": 3.5079817148086213e-06, "loss": 0.3764, "step": 28604 }, { "epoch": 0.73, "grad_norm": 2.2796201705932617, "learning_rate": 3.5073503170780576e-06, "loss": 0.5592, "step": 28605 }, { "epoch": 0.73, "grad_norm": 10.799951553344727, "learning_rate": 3.5067189640907596e-06, "loss": 0.6363, "step": 28606 }, { "epoch": 0.73, "grad_norm": 1.501761555671692, "learning_rate": 3.506087655851076e-06, "loss": 0.6297, "step": 28607 }, { "epoch": 0.73, "grad_norm": 2.6873087882995605, "learning_rate": 3.5054563923633555e-06, "loss": 0.4238, "step": 28608 }, { "epoch": 0.73, "grad_norm": 1.566128134727478, "learning_rate": 3.504825173631946e-06, "loss": 0.6119, "step": 28609 }, { "epoch": 0.73, "grad_norm": 1.4990123510360718, "learning_rate": 3.504193999661205e-06, "loss": 0.452, "step": 28610 }, { "epoch": 0.73, "grad_norm": 1.0483694076538086, "learning_rate": 3.503562870455477e-06, "loss": 0.5298, "step": 28611 }, { "epoch": 0.73, "grad_norm": 1.7203330993652344, "learning_rate": 3.5029317860191093e-06, "loss": 0.4549, "step": 28612 }, { "epoch": 0.73, "grad_norm": 1.130323052406311, "learning_rate": 3.502300746356457e-06, "loss": 0.4977, "step": 28613 }, { "epoch": 0.73, "grad_norm": 0.9790025949478149, "learning_rate": 3.5016697514718655e-06, "loss": 0.4447, "step": 28614 }, { "epoch": 0.73, "grad_norm": 1.0323210954666138, "learning_rate": 3.5010388013696838e-06, "loss": 0.5076, "step": 28615 }, { "epoch": 0.73, "grad_norm": 1.2706698179244995, "learning_rate": 3.5004078960542565e-06, "loss": 0.5363, "step": 28616 }, { "epoch": 0.73, "grad_norm": 1.4997279644012451, "learning_rate": 3.4997770355299374e-06, "loss": 0.4507, "step": 28617 }, { "epoch": 0.73, "grad_norm": 1.6925585269927979, "learning_rate": 3.499146219801072e-06, "loss": 0.5341, "step": 28618 }, { "epoch": 0.73, "grad_norm": 6.567440032958984, "learning_rate": 3.498515448872002e-06, "loss": 0.8425, "step": 28619 }, { "epoch": 0.73, "grad_norm": 6.795145511627197, "learning_rate": 3.4978847227470825e-06, "loss": 0.8214, "step": 28620 }, { "epoch": 0.73, "grad_norm": 1.4630464315414429, "learning_rate": 3.4972540414306565e-06, "loss": 0.478, "step": 28621 }, { "epoch": 0.73, "grad_norm": 2.0232486724853516, "learning_rate": 3.4966234049270677e-06, "loss": 0.6341, "step": 28622 }, { "epoch": 0.73, "grad_norm": 1.179034948348999, "learning_rate": 3.495992813240667e-06, "loss": 0.5088, "step": 28623 }, { "epoch": 0.73, "grad_norm": 1.2356666326522827, "learning_rate": 3.495362266375798e-06, "loss": 0.6521, "step": 28624 }, { "epoch": 0.73, "grad_norm": 1.0620195865631104, "learning_rate": 3.4947317643368063e-06, "loss": 0.4944, "step": 28625 }, { "epoch": 0.73, "grad_norm": 1.3863556385040283, "learning_rate": 3.4941013071280315e-06, "loss": 0.4913, "step": 28626 }, { "epoch": 0.73, "grad_norm": 1.1597446203231812, "learning_rate": 3.4934708947538277e-06, "loss": 0.4848, "step": 28627 }, { "epoch": 0.73, "grad_norm": 1.26872718334198, "learning_rate": 3.492840527218534e-06, "loss": 0.4528, "step": 28628 }, { "epoch": 0.73, "grad_norm": 1.594368815422058, "learning_rate": 3.4922102045264915e-06, "loss": 0.6076, "step": 28629 }, { "epoch": 0.73, "grad_norm": 1.656551480293274, "learning_rate": 3.4915799266820517e-06, "loss": 0.4326, "step": 28630 }, { "epoch": 0.73, "grad_norm": 3.13016414642334, "learning_rate": 3.490949693689554e-06, "loss": 0.6195, "step": 28631 }, { "epoch": 0.73, "grad_norm": 4.414792537689209, "learning_rate": 3.4903195055533414e-06, "loss": 0.5074, "step": 28632 }, { "epoch": 0.73, "grad_norm": 1.8406169414520264, "learning_rate": 3.489689362277754e-06, "loss": 0.4588, "step": 28633 }, { "epoch": 0.73, "grad_norm": 2.8299124240875244, "learning_rate": 3.4890592638671406e-06, "loss": 0.4718, "step": 28634 }, { "epoch": 0.73, "grad_norm": 1.1961252689361572, "learning_rate": 3.48842921032584e-06, "loss": 0.5869, "step": 28635 }, { "epoch": 0.73, "grad_norm": 1.1221733093261719, "learning_rate": 3.4877992016581906e-06, "loss": 0.4827, "step": 28636 }, { "epoch": 0.73, "grad_norm": 1.5490769147872925, "learning_rate": 3.4871692378685416e-06, "loss": 0.5617, "step": 28637 }, { "epoch": 0.73, "grad_norm": 1.4346890449523926, "learning_rate": 3.4865393189612305e-06, "loss": 0.4402, "step": 28638 }, { "epoch": 0.73, "grad_norm": 6.520824432373047, "learning_rate": 3.4859094449405982e-06, "loss": 0.5878, "step": 28639 }, { "epoch": 0.73, "grad_norm": 2.92753529548645, "learning_rate": 3.4852796158109813e-06, "loss": 0.465, "step": 28640 }, { "epoch": 0.73, "grad_norm": 1.219180941581726, "learning_rate": 3.4846498315767287e-06, "loss": 0.4983, "step": 28641 }, { "epoch": 0.73, "grad_norm": 1.4208955764770508, "learning_rate": 3.484020092242176e-06, "loss": 0.4746, "step": 28642 }, { "epoch": 0.73, "grad_norm": 5.732108116149902, "learning_rate": 3.483390397811659e-06, "loss": 0.5911, "step": 28643 }, { "epoch": 0.73, "grad_norm": 1.1249936819076538, "learning_rate": 3.4827607482895254e-06, "loss": 0.5779, "step": 28644 }, { "epoch": 0.73, "grad_norm": 3.2680225372314453, "learning_rate": 3.48213114368011e-06, "loss": 0.5048, "step": 28645 }, { "epoch": 0.73, "grad_norm": 12.112326622009277, "learning_rate": 3.481501583987749e-06, "loss": 0.5195, "step": 28646 }, { "epoch": 0.73, "grad_norm": 1.3206217288970947, "learning_rate": 3.4808720692167864e-06, "loss": 0.4516, "step": 28647 }, { "epoch": 0.73, "grad_norm": 2.2159054279327393, "learning_rate": 3.480242599371557e-06, "loss": 0.6122, "step": 28648 }, { "epoch": 0.73, "grad_norm": 1.0727084875106812, "learning_rate": 3.479613174456401e-06, "loss": 0.5448, "step": 28649 }, { "epoch": 0.73, "grad_norm": 1.516249418258667, "learning_rate": 3.47898379447565e-06, "loss": 0.6272, "step": 28650 }, { "epoch": 0.73, "grad_norm": 1.3056483268737793, "learning_rate": 3.4783544594336505e-06, "loss": 0.4605, "step": 28651 }, { "epoch": 0.73, "grad_norm": 5.141060829162598, "learning_rate": 3.4777251693347338e-06, "loss": 0.6153, "step": 28652 }, { "epoch": 0.73, "grad_norm": 3.965412139892578, "learning_rate": 3.4770959241832337e-06, "loss": 0.5088, "step": 28653 }, { "epoch": 0.73, "grad_norm": 1.0063600540161133, "learning_rate": 3.476466723983495e-06, "loss": 0.4905, "step": 28654 }, { "epoch": 0.73, "grad_norm": 3.2562665939331055, "learning_rate": 3.4758375687398483e-06, "loss": 0.4274, "step": 28655 }, { "epoch": 0.73, "grad_norm": 3.3928723335266113, "learning_rate": 3.4752084584566306e-06, "loss": 0.7008, "step": 28656 }, { "epoch": 0.73, "grad_norm": 1.1087119579315186, "learning_rate": 3.4745793931381734e-06, "loss": 0.464, "step": 28657 }, { "epoch": 0.73, "grad_norm": 1.9664963483810425, "learning_rate": 3.473950372788818e-06, "loss": 0.553, "step": 28658 }, { "epoch": 0.73, "grad_norm": 1.0671312808990479, "learning_rate": 3.4733213974128975e-06, "loss": 0.5971, "step": 28659 }, { "epoch": 0.73, "grad_norm": 4.56124210357666, "learning_rate": 3.4726924670147423e-06, "loss": 0.6343, "step": 28660 }, { "epoch": 0.73, "grad_norm": 1.6793160438537598, "learning_rate": 3.4720635815986923e-06, "loss": 0.5072, "step": 28661 }, { "epoch": 0.73, "grad_norm": 1.989163875579834, "learning_rate": 3.4714347411690797e-06, "loss": 0.4777, "step": 28662 }, { "epoch": 0.73, "grad_norm": 1.4529893398284912, "learning_rate": 3.4708059457302357e-06, "loss": 0.5832, "step": 28663 }, { "epoch": 0.73, "grad_norm": 1.9218218326568604, "learning_rate": 3.470177195286494e-06, "loss": 0.4501, "step": 28664 }, { "epoch": 0.73, "grad_norm": 1.042256236076355, "learning_rate": 3.4695484898421905e-06, "loss": 0.6095, "step": 28665 }, { "epoch": 0.73, "grad_norm": 7.7696051597595215, "learning_rate": 3.4689198294016556e-06, "loss": 0.6999, "step": 28666 }, { "epoch": 0.73, "grad_norm": 4.030510902404785, "learning_rate": 3.4682912139692192e-06, "loss": 0.6036, "step": 28667 }, { "epoch": 0.73, "grad_norm": 1.352195143699646, "learning_rate": 3.46766264354922e-06, "loss": 0.5538, "step": 28668 }, { "epoch": 0.73, "grad_norm": 3.9039466381073, "learning_rate": 3.4670341181459856e-06, "loss": 0.555, "step": 28669 }, { "epoch": 0.73, "grad_norm": 1.412418246269226, "learning_rate": 3.4664056377638433e-06, "loss": 0.5891, "step": 28670 }, { "epoch": 0.73, "grad_norm": 1.140782117843628, "learning_rate": 3.4657772024071336e-06, "loss": 0.4542, "step": 28671 }, { "epoch": 0.73, "grad_norm": 5.005404949188232, "learning_rate": 3.465148812080181e-06, "loss": 0.5443, "step": 28672 }, { "epoch": 0.73, "grad_norm": 1.0040220022201538, "learning_rate": 3.464520466787318e-06, "loss": 0.3947, "step": 28673 }, { "epoch": 0.73, "grad_norm": 1.2600536346435547, "learning_rate": 3.4638921665328694e-06, "loss": 0.4136, "step": 28674 }, { "epoch": 0.73, "grad_norm": 2.3098080158233643, "learning_rate": 3.463263911321174e-06, "loss": 0.6489, "step": 28675 }, { "epoch": 0.73, "grad_norm": 1.1688730716705322, "learning_rate": 3.4626357011565557e-06, "loss": 0.4393, "step": 28676 }, { "epoch": 0.74, "grad_norm": 1.3896069526672363, "learning_rate": 3.462007536043344e-06, "loss": 0.5717, "step": 28677 }, { "epoch": 0.74, "grad_norm": 1.5729625225067139, "learning_rate": 3.4613794159858693e-06, "loss": 0.5096, "step": 28678 }, { "epoch": 0.74, "grad_norm": 3.3547489643096924, "learning_rate": 3.4607513409884617e-06, "loss": 0.6617, "step": 28679 }, { "epoch": 0.74, "grad_norm": 1.8265705108642578, "learning_rate": 3.4601233110554464e-06, "loss": 0.5004, "step": 28680 }, { "epoch": 0.74, "grad_norm": 8.191126823425293, "learning_rate": 3.45949532619115e-06, "loss": 0.4998, "step": 28681 }, { "epoch": 0.74, "grad_norm": 6.1019744873046875, "learning_rate": 3.4588673863999055e-06, "loss": 0.503, "step": 28682 }, { "epoch": 0.74, "grad_norm": 1.0461375713348389, "learning_rate": 3.4582394916860374e-06, "loss": 0.5283, "step": 28683 }, { "epoch": 0.74, "grad_norm": 2.519653797149658, "learning_rate": 3.45761164205387e-06, "loss": 0.6076, "step": 28684 }, { "epoch": 0.74, "grad_norm": 5.970335006713867, "learning_rate": 3.456983837507736e-06, "loss": 0.5591, "step": 28685 }, { "epoch": 0.74, "grad_norm": 1.4233746528625488, "learning_rate": 3.456356078051958e-06, "loss": 0.494, "step": 28686 }, { "epoch": 0.74, "grad_norm": 1.5580635070800781, "learning_rate": 3.4557283636908603e-06, "loss": 0.5366, "step": 28687 }, { "epoch": 0.74, "grad_norm": 1.896984577178955, "learning_rate": 3.4551006944287735e-06, "loss": 0.5649, "step": 28688 }, { "epoch": 0.74, "grad_norm": 1.8814170360565186, "learning_rate": 3.4544730702700215e-06, "loss": 0.4682, "step": 28689 }, { "epoch": 0.74, "grad_norm": 1.527893304824829, "learning_rate": 3.453845491218929e-06, "loss": 0.4398, "step": 28690 }, { "epoch": 0.74, "grad_norm": 1.5054861307144165, "learning_rate": 3.4532179572798174e-06, "loss": 0.3609, "step": 28691 }, { "epoch": 0.74, "grad_norm": 1.1989867687225342, "learning_rate": 3.4525904684570166e-06, "loss": 0.5445, "step": 28692 }, { "epoch": 0.74, "grad_norm": 3.0195860862731934, "learning_rate": 3.4519630247548506e-06, "loss": 0.5434, "step": 28693 }, { "epoch": 0.74, "grad_norm": 1.3741055727005005, "learning_rate": 3.451335626177638e-06, "loss": 0.4868, "step": 28694 }, { "epoch": 0.74, "grad_norm": 1.3909645080566406, "learning_rate": 3.450708272729708e-06, "loss": 0.4785, "step": 28695 }, { "epoch": 0.74, "grad_norm": 1.962023138999939, "learning_rate": 3.450080964415382e-06, "loss": 0.6978, "step": 28696 }, { "epoch": 0.74, "grad_norm": 1.1946606636047363, "learning_rate": 3.449453701238984e-06, "loss": 0.5623, "step": 28697 }, { "epoch": 0.74, "grad_norm": 2.5133185386657715, "learning_rate": 3.4488264832048324e-06, "loss": 0.5433, "step": 28698 }, { "epoch": 0.74, "grad_norm": 1.6996561288833618, "learning_rate": 3.448199310317255e-06, "loss": 0.6115, "step": 28699 }, { "epoch": 0.74, "grad_norm": 1.4269499778747559, "learning_rate": 3.447572182580572e-06, "loss": 0.4647, "step": 28700 }, { "epoch": 0.74, "grad_norm": 1.5715445280075073, "learning_rate": 3.4469450999991027e-06, "loss": 0.6312, "step": 28701 }, { "epoch": 0.74, "grad_norm": 0.8957135081291199, "learning_rate": 3.446318062577173e-06, "loss": 0.4479, "step": 28702 }, { "epoch": 0.74, "grad_norm": 14.019861221313477, "learning_rate": 3.4456910703191013e-06, "loss": 0.5002, "step": 28703 }, { "epoch": 0.74, "grad_norm": 1.5432791709899902, "learning_rate": 3.4450641232292103e-06, "loss": 0.414, "step": 28704 }, { "epoch": 0.74, "grad_norm": 3.738090753555298, "learning_rate": 3.444437221311818e-06, "loss": 0.4347, "step": 28705 }, { "epoch": 0.74, "grad_norm": 1.4817984104156494, "learning_rate": 3.443810364571246e-06, "loss": 0.5811, "step": 28706 }, { "epoch": 0.74, "grad_norm": 1.7847944498062134, "learning_rate": 3.4431835530118117e-06, "loss": 0.601, "step": 28707 }, { "epoch": 0.74, "grad_norm": 1.1063041687011719, "learning_rate": 3.4425567866378396e-06, "loss": 0.504, "step": 28708 }, { "epoch": 0.74, "grad_norm": 2.037470579147339, "learning_rate": 3.441930065453646e-06, "loss": 0.6017, "step": 28709 }, { "epoch": 0.74, "grad_norm": 1.6877549886703491, "learning_rate": 3.4413033894635505e-06, "loss": 0.4354, "step": 28710 }, { "epoch": 0.74, "grad_norm": 1.0896705389022827, "learning_rate": 3.440676758671868e-06, "loss": 0.4866, "step": 28711 }, { "epoch": 0.74, "grad_norm": 2.146981716156006, "learning_rate": 3.440050173082924e-06, "loss": 0.5026, "step": 28712 }, { "epoch": 0.74, "grad_norm": 1.116556167602539, "learning_rate": 3.4394236327010322e-06, "loss": 0.5786, "step": 28713 }, { "epoch": 0.74, "grad_norm": 1.8279407024383545, "learning_rate": 3.4387971375305083e-06, "loss": 0.6252, "step": 28714 }, { "epoch": 0.74, "grad_norm": 1.4754226207733154, "learning_rate": 3.438170687575675e-06, "loss": 0.5644, "step": 28715 }, { "epoch": 0.74, "grad_norm": 1.478623390197754, "learning_rate": 3.4375442828408467e-06, "loss": 0.452, "step": 28716 }, { "epoch": 0.74, "grad_norm": 2.7278761863708496, "learning_rate": 3.4369179233303373e-06, "loss": 0.6828, "step": 28717 }, { "epoch": 0.74, "grad_norm": 2.553004741668701, "learning_rate": 3.4362916090484697e-06, "loss": 0.6664, "step": 28718 }, { "epoch": 0.74, "grad_norm": 1.8084205389022827, "learning_rate": 3.4356653399995575e-06, "loss": 0.6856, "step": 28719 }, { "epoch": 0.74, "grad_norm": 3.3098089694976807, "learning_rate": 3.435039116187915e-06, "loss": 0.7511, "step": 28720 }, { "epoch": 0.74, "grad_norm": 1.4459456205368042, "learning_rate": 3.4344129376178537e-06, "loss": 0.5281, "step": 28721 }, { "epoch": 0.74, "grad_norm": 4.606636047363281, "learning_rate": 3.4337868042936986e-06, "loss": 0.4409, "step": 28722 }, { "epoch": 0.74, "grad_norm": 1.6166670322418213, "learning_rate": 3.4331607162197587e-06, "loss": 0.6531, "step": 28723 }, { "epoch": 0.74, "grad_norm": 1.7098339796066284, "learning_rate": 3.4325346734003464e-06, "loss": 0.4352, "step": 28724 }, { "epoch": 0.74, "grad_norm": 1.4027917385101318, "learning_rate": 3.4319086758397823e-06, "loss": 0.5785, "step": 28725 }, { "epoch": 0.74, "grad_norm": 1.751552700996399, "learning_rate": 3.4312827235423774e-06, "loss": 0.5871, "step": 28726 }, { "epoch": 0.74, "grad_norm": 1.3586026430130005, "learning_rate": 3.4306568165124444e-06, "loss": 0.4708, "step": 28727 }, { "epoch": 0.74, "grad_norm": 2.638370990753174, "learning_rate": 3.4300309547542943e-06, "loss": 0.5618, "step": 28728 }, { "epoch": 0.74, "grad_norm": 2.0488500595092773, "learning_rate": 3.429405138272247e-06, "loss": 0.4298, "step": 28729 }, { "epoch": 0.74, "grad_norm": 1.1988213062286377, "learning_rate": 3.4287793670706115e-06, "loss": 0.5444, "step": 28730 }, { "epoch": 0.74, "grad_norm": 1.7560882568359375, "learning_rate": 3.4281536411536966e-06, "loss": 0.5875, "step": 28731 }, { "epoch": 0.74, "grad_norm": 5.235384464263916, "learning_rate": 3.4275279605258207e-06, "loss": 0.5967, "step": 28732 }, { "epoch": 0.74, "grad_norm": 1.2214033603668213, "learning_rate": 3.4269023251912935e-06, "loss": 0.4252, "step": 28733 }, { "epoch": 0.74, "grad_norm": 3.9832780361175537, "learning_rate": 3.4262767351544257e-06, "loss": 0.7164, "step": 28734 }, { "epoch": 0.74, "grad_norm": 5.361453056335449, "learning_rate": 3.4256511904195243e-06, "loss": 0.4389, "step": 28735 }, { "epoch": 0.74, "grad_norm": 1.5500296354293823, "learning_rate": 3.4250256909909096e-06, "loss": 0.6617, "step": 28736 }, { "epoch": 0.74, "grad_norm": 1.613974690437317, "learning_rate": 3.424400236872886e-06, "loss": 0.583, "step": 28737 }, { "epoch": 0.74, "grad_norm": 2.1415457725524902, "learning_rate": 3.4237748280697615e-06, "loss": 0.6225, "step": 28738 }, { "epoch": 0.74, "grad_norm": 1.239337682723999, "learning_rate": 3.423149464585853e-06, "loss": 0.404, "step": 28739 }, { "epoch": 0.74, "grad_norm": 1.35720956325531, "learning_rate": 3.4225241464254655e-06, "loss": 0.5549, "step": 28740 }, { "epoch": 0.74, "grad_norm": 2.061819314956665, "learning_rate": 3.4218988735929072e-06, "loss": 0.5746, "step": 28741 }, { "epoch": 0.74, "grad_norm": 0.8673865795135498, "learning_rate": 3.421273646092491e-06, "loss": 0.511, "step": 28742 }, { "epoch": 0.74, "grad_norm": 1.396854043006897, "learning_rate": 3.420648463928525e-06, "loss": 0.5172, "step": 28743 }, { "epoch": 0.74, "grad_norm": 1.4380779266357422, "learning_rate": 3.4200233271053153e-06, "loss": 0.5974, "step": 28744 }, { "epoch": 0.74, "grad_norm": 1.2834134101867676, "learning_rate": 3.419398235627168e-06, "loss": 0.5031, "step": 28745 }, { "epoch": 0.74, "grad_norm": 3.638927698135376, "learning_rate": 3.418773189498398e-06, "loss": 0.5559, "step": 28746 }, { "epoch": 0.74, "grad_norm": 1.7643190622329712, "learning_rate": 3.418148188723307e-06, "loss": 0.461, "step": 28747 }, { "epoch": 0.74, "grad_norm": 6.21100378036499, "learning_rate": 3.4175232333062e-06, "loss": 0.5186, "step": 28748 }, { "epoch": 0.74, "grad_norm": 3.7176663875579834, "learning_rate": 3.416898323251392e-06, "loss": 0.604, "step": 28749 }, { "epoch": 0.74, "grad_norm": 3.5679352283477783, "learning_rate": 3.416273458563184e-06, "loss": 0.6543, "step": 28750 }, { "epoch": 0.74, "grad_norm": 1.0551795959472656, "learning_rate": 3.4156486392458834e-06, "loss": 0.529, "step": 28751 }, { "epoch": 0.74, "grad_norm": 0.9719945788383484, "learning_rate": 3.4150238653037913e-06, "loss": 0.4242, "step": 28752 }, { "epoch": 0.74, "grad_norm": 1.499619722366333, "learning_rate": 3.414399136741222e-06, "loss": 0.5132, "step": 28753 }, { "epoch": 0.74, "grad_norm": 1.4860426187515259, "learning_rate": 3.4137744535624763e-06, "loss": 0.5149, "step": 28754 }, { "epoch": 0.74, "grad_norm": 2.2483904361724854, "learning_rate": 3.4131498157718558e-06, "loss": 0.6301, "step": 28755 }, { "epoch": 0.74, "grad_norm": 3.0923120975494385, "learning_rate": 3.4125252233736716e-06, "loss": 0.566, "step": 28756 }, { "epoch": 0.74, "grad_norm": 2.8679165840148926, "learning_rate": 3.411900676372225e-06, "loss": 0.5386, "step": 28757 }, { "epoch": 0.74, "grad_norm": 1.6689066886901855, "learning_rate": 3.411276174771817e-06, "loss": 0.4105, "step": 28758 }, { "epoch": 0.74, "grad_norm": 1.0555163621902466, "learning_rate": 3.4106517185767572e-06, "loss": 0.4984, "step": 28759 }, { "epoch": 0.74, "grad_norm": 2.2183659076690674, "learning_rate": 3.410027307791346e-06, "loss": 0.4815, "step": 28760 }, { "epoch": 0.74, "grad_norm": 1.7240861654281616, "learning_rate": 3.409402942419886e-06, "loss": 0.6175, "step": 28761 }, { "epoch": 0.74, "grad_norm": 3.8672573566436768, "learning_rate": 3.4087786224666775e-06, "loss": 0.4106, "step": 28762 }, { "epoch": 0.74, "grad_norm": 11.011115074157715, "learning_rate": 3.408154347936029e-06, "loss": 0.4587, "step": 28763 }, { "epoch": 0.74, "grad_norm": 1.1262562274932861, "learning_rate": 3.4075301188322384e-06, "loss": 0.5397, "step": 28764 }, { "epoch": 0.74, "grad_norm": 1.852289080619812, "learning_rate": 3.4069059351596055e-06, "loss": 0.4745, "step": 28765 }, { "epoch": 0.74, "grad_norm": 1.677451729774475, "learning_rate": 3.406281796922439e-06, "loss": 0.7151, "step": 28766 }, { "epoch": 0.74, "grad_norm": 1.248497724533081, "learning_rate": 3.405657704125035e-06, "loss": 0.6213, "step": 28767 }, { "epoch": 0.74, "grad_norm": 1.2620558738708496, "learning_rate": 3.405033656771696e-06, "loss": 0.5363, "step": 28768 }, { "epoch": 0.74, "grad_norm": 0.9726373553276062, "learning_rate": 3.404409654866717e-06, "loss": 0.446, "step": 28769 }, { "epoch": 0.74, "grad_norm": 1.1778178215026855, "learning_rate": 3.403785698414407e-06, "loss": 0.4966, "step": 28770 }, { "epoch": 0.74, "grad_norm": 6.595635890960693, "learning_rate": 3.4031617874190614e-06, "loss": 0.5767, "step": 28771 }, { "epoch": 0.74, "grad_norm": 3.456721782684326, "learning_rate": 3.4025379218849773e-06, "loss": 0.6624, "step": 28772 }, { "epoch": 0.74, "grad_norm": 4.03446102142334, "learning_rate": 3.4019141018164604e-06, "loss": 0.6041, "step": 28773 }, { "epoch": 0.74, "grad_norm": 2.2253336906433105, "learning_rate": 3.4012903272178053e-06, "loss": 0.5684, "step": 28774 }, { "epoch": 0.74, "grad_norm": 1.5442390441894531, "learning_rate": 3.4006665980933118e-06, "loss": 0.5002, "step": 28775 }, { "epoch": 0.74, "grad_norm": 1.2395265102386475, "learning_rate": 3.400042914447276e-06, "loss": 0.5548, "step": 28776 }, { "epoch": 0.74, "grad_norm": 7.453185558319092, "learning_rate": 3.3994192762839996e-06, "loss": 0.4318, "step": 28777 }, { "epoch": 0.74, "grad_norm": 0.9374431371688843, "learning_rate": 3.3987956836077797e-06, "loss": 0.4825, "step": 28778 }, { "epoch": 0.74, "grad_norm": 2.6250059604644775, "learning_rate": 3.398172136422908e-06, "loss": 0.5529, "step": 28779 }, { "epoch": 0.74, "grad_norm": 1.2165919542312622, "learning_rate": 3.3975486347336907e-06, "loss": 0.4739, "step": 28780 }, { "epoch": 0.74, "grad_norm": 1.4584697484970093, "learning_rate": 3.3969251785444204e-06, "loss": 0.5813, "step": 28781 }, { "epoch": 0.74, "grad_norm": 1.3684061765670776, "learning_rate": 3.396301767859389e-06, "loss": 0.6044, "step": 28782 }, { "epoch": 0.74, "grad_norm": 1.547548532485962, "learning_rate": 3.3956784026829014e-06, "loss": 0.4733, "step": 28783 }, { "epoch": 0.74, "grad_norm": 4.9539899826049805, "learning_rate": 3.3950550830192485e-06, "loss": 0.5846, "step": 28784 }, { "epoch": 0.74, "grad_norm": 2.601433753967285, "learning_rate": 3.394431808872726e-06, "loss": 0.5547, "step": 28785 }, { "epoch": 0.74, "grad_norm": 1.8717883825302124, "learning_rate": 3.393808580247626e-06, "loss": 0.5561, "step": 28786 }, { "epoch": 0.74, "grad_norm": 2.2892556190490723, "learning_rate": 3.393185397148251e-06, "loss": 0.5517, "step": 28787 }, { "epoch": 0.74, "grad_norm": 2.0503480434417725, "learning_rate": 3.392562259578891e-06, "loss": 0.5367, "step": 28788 }, { "epoch": 0.74, "grad_norm": 1.1419355869293213, "learning_rate": 3.3919391675438375e-06, "loss": 0.5699, "step": 28789 }, { "epoch": 0.74, "grad_norm": 2.261324167251587, "learning_rate": 3.391316121047391e-06, "loss": 0.7038, "step": 28790 }, { "epoch": 0.74, "grad_norm": 1.5455548763275146, "learning_rate": 3.3906931200938407e-06, "loss": 0.4712, "step": 28791 }, { "epoch": 0.74, "grad_norm": 2.2172560691833496, "learning_rate": 3.390070164687482e-06, "loss": 0.5338, "step": 28792 }, { "epoch": 0.74, "grad_norm": 2.190521478652954, "learning_rate": 3.389447254832604e-06, "loss": 0.5301, "step": 28793 }, { "epoch": 0.74, "grad_norm": 1.4420973062515259, "learning_rate": 3.388824390533506e-06, "loss": 0.3328, "step": 28794 }, { "epoch": 0.74, "grad_norm": 1.3161557912826538, "learning_rate": 3.388201571794476e-06, "loss": 0.5784, "step": 28795 }, { "epoch": 0.74, "grad_norm": 1.3518105745315552, "learning_rate": 3.387578798619804e-06, "loss": 0.5057, "step": 28796 }, { "epoch": 0.74, "grad_norm": 1.52079176902771, "learning_rate": 3.386956071013787e-06, "loss": 0.6707, "step": 28797 }, { "epoch": 0.74, "grad_norm": 16.56729507446289, "learning_rate": 3.3863333889807147e-06, "loss": 0.4272, "step": 28798 }, { "epoch": 0.74, "grad_norm": 1.4315135478973389, "learning_rate": 3.3857107525248776e-06, "loss": 0.5409, "step": 28799 }, { "epoch": 0.74, "grad_norm": 11.877185821533203, "learning_rate": 3.3850881616505625e-06, "loss": 0.4831, "step": 28800 }, { "epoch": 0.74, "grad_norm": 1.1060714721679688, "learning_rate": 3.384465616362068e-06, "loss": 0.4949, "step": 28801 }, { "epoch": 0.74, "grad_norm": 3.9880995750427246, "learning_rate": 3.3838431166636798e-06, "loss": 0.4893, "step": 28802 }, { "epoch": 0.74, "grad_norm": 1.993627667427063, "learning_rate": 3.3832206625596853e-06, "loss": 0.5765, "step": 28803 }, { "epoch": 0.74, "grad_norm": 11.578511238098145, "learning_rate": 3.38259825405438e-06, "loss": 0.5477, "step": 28804 }, { "epoch": 0.74, "grad_norm": 1.1297255754470825, "learning_rate": 3.3819758911520495e-06, "loss": 0.4769, "step": 28805 }, { "epoch": 0.74, "grad_norm": 1.882826566696167, "learning_rate": 3.3813535738569805e-06, "loss": 0.5837, "step": 28806 }, { "epoch": 0.74, "grad_norm": 1.346564531326294, "learning_rate": 3.3807313021734676e-06, "loss": 0.5183, "step": 28807 }, { "epoch": 0.74, "grad_norm": 4.477010726928711, "learning_rate": 3.3801090761057965e-06, "loss": 0.5237, "step": 28808 }, { "epoch": 0.74, "grad_norm": 7.140453338623047, "learning_rate": 3.379486895658254e-06, "loss": 0.5877, "step": 28809 }, { "epoch": 0.74, "grad_norm": 1.1486668586730957, "learning_rate": 3.3788647608351254e-06, "loss": 0.5053, "step": 28810 }, { "epoch": 0.74, "grad_norm": 2.2631912231445312, "learning_rate": 3.3782426716407033e-06, "loss": 0.6116, "step": 28811 }, { "epoch": 0.74, "grad_norm": 0.9285700917243958, "learning_rate": 3.3776206280792746e-06, "loss": 0.4539, "step": 28812 }, { "epoch": 0.74, "grad_norm": 24.95217514038086, "learning_rate": 3.3769986301551196e-06, "loss": 0.5209, "step": 28813 }, { "epoch": 0.74, "grad_norm": 0.9266951084136963, "learning_rate": 3.3763766778725316e-06, "loss": 0.4265, "step": 28814 }, { "epoch": 0.74, "grad_norm": 1.3912047147750854, "learning_rate": 3.375754771235795e-06, "loss": 0.5494, "step": 28815 }, { "epoch": 0.74, "grad_norm": 1.8295300006866455, "learning_rate": 3.375132910249195e-06, "loss": 0.5723, "step": 28816 }, { "epoch": 0.74, "grad_norm": 1.5251954793930054, "learning_rate": 3.3745110949170125e-06, "loss": 0.5723, "step": 28817 }, { "epoch": 0.74, "grad_norm": 1.8284401893615723, "learning_rate": 3.373889325243541e-06, "loss": 0.672, "step": 28818 }, { "epoch": 0.74, "grad_norm": 2.403489112854004, "learning_rate": 3.373267601233061e-06, "loss": 0.5475, "step": 28819 }, { "epoch": 0.74, "grad_norm": 1.5089861154556274, "learning_rate": 3.3726459228898543e-06, "loss": 0.5581, "step": 28820 }, { "epoch": 0.74, "grad_norm": 1.2561887502670288, "learning_rate": 3.372024290218212e-06, "loss": 0.5544, "step": 28821 }, { "epoch": 0.74, "grad_norm": 1.6219152212142944, "learning_rate": 3.371402703222414e-06, "loss": 0.5644, "step": 28822 }, { "epoch": 0.74, "grad_norm": 2.0403356552124023, "learning_rate": 3.37078116190674e-06, "loss": 0.4447, "step": 28823 }, { "epoch": 0.74, "grad_norm": 1.3840073347091675, "learning_rate": 3.370159666275481e-06, "loss": 0.4449, "step": 28824 }, { "epoch": 0.74, "grad_norm": 1.1510827541351318, "learning_rate": 3.369538216332916e-06, "loss": 0.3963, "step": 28825 }, { "epoch": 0.74, "grad_norm": 2.052349805831909, "learning_rate": 3.3689168120833295e-06, "loss": 0.5178, "step": 28826 }, { "epoch": 0.74, "grad_norm": 1.0739139318466187, "learning_rate": 3.3682954535309977e-06, "loss": 0.4417, "step": 28827 }, { "epoch": 0.74, "grad_norm": 1.3863005638122559, "learning_rate": 3.3676741406802104e-06, "loss": 0.5659, "step": 28828 }, { "epoch": 0.74, "grad_norm": 2.345954656600952, "learning_rate": 3.367052873535246e-06, "loss": 0.4885, "step": 28829 }, { "epoch": 0.74, "grad_norm": 1.1673264503479004, "learning_rate": 3.3664316521003824e-06, "loss": 0.3453, "step": 28830 }, { "epoch": 0.74, "grad_norm": 2.244934558868408, "learning_rate": 3.365810476379908e-06, "loss": 0.5371, "step": 28831 }, { "epoch": 0.74, "grad_norm": 2.2519617080688477, "learning_rate": 3.3651893463781004e-06, "loss": 0.4472, "step": 28832 }, { "epoch": 0.74, "grad_norm": 1.5374130010604858, "learning_rate": 3.3645682620992394e-06, "loss": 0.6753, "step": 28833 }, { "epoch": 0.74, "grad_norm": 1.0612179040908813, "learning_rate": 3.3639472235476013e-06, "loss": 0.4914, "step": 28834 }, { "epoch": 0.74, "grad_norm": 1.2612384557724, "learning_rate": 3.3633262307274727e-06, "loss": 0.4496, "step": 28835 }, { "epoch": 0.74, "grad_norm": 0.9872039556503296, "learning_rate": 3.3627052836431307e-06, "loss": 0.3986, "step": 28836 }, { "epoch": 0.74, "grad_norm": 1.3094557523727417, "learning_rate": 3.3620843822988513e-06, "loss": 0.4019, "step": 28837 }, { "epoch": 0.74, "grad_norm": 0.982279360294342, "learning_rate": 3.361463526698918e-06, "loss": 0.4351, "step": 28838 }, { "epoch": 0.74, "grad_norm": 4.36130428314209, "learning_rate": 3.3608427168476077e-06, "loss": 0.7129, "step": 28839 }, { "epoch": 0.74, "grad_norm": 4.193141460418701, "learning_rate": 3.3602219527491986e-06, "loss": 0.4783, "step": 28840 }, { "epoch": 0.74, "grad_norm": 1.3595833778381348, "learning_rate": 3.3596012344079654e-06, "loss": 0.5329, "step": 28841 }, { "epoch": 0.74, "grad_norm": 1.540291428565979, "learning_rate": 3.358980561828191e-06, "loss": 0.5914, "step": 28842 }, { "epoch": 0.74, "grad_norm": 1.4369019269943237, "learning_rate": 3.3583599350141505e-06, "loss": 0.3521, "step": 28843 }, { "epoch": 0.74, "grad_norm": 1.6302961111068726, "learning_rate": 3.3577393539701174e-06, "loss": 0.473, "step": 28844 }, { "epoch": 0.74, "grad_norm": 2.221475839614868, "learning_rate": 3.3571188187003746e-06, "loss": 0.7153, "step": 28845 }, { "epoch": 0.74, "grad_norm": 2.5271432399749756, "learning_rate": 3.3564983292091957e-06, "loss": 0.4886, "step": 28846 }, { "epoch": 0.74, "grad_norm": 4.575162887573242, "learning_rate": 3.355877885500853e-06, "loss": 0.743, "step": 28847 }, { "epoch": 0.74, "grad_norm": 5.01906681060791, "learning_rate": 3.3552574875796297e-06, "loss": 0.6636, "step": 28848 }, { "epoch": 0.74, "grad_norm": 2.199350357055664, "learning_rate": 3.3546371354497963e-06, "loss": 0.6702, "step": 28849 }, { "epoch": 0.74, "grad_norm": 3.0145671367645264, "learning_rate": 3.3540168291156293e-06, "loss": 0.4644, "step": 28850 }, { "epoch": 0.74, "grad_norm": 12.226615905761719, "learning_rate": 3.3533965685814e-06, "loss": 0.3956, "step": 28851 }, { "epoch": 0.74, "grad_norm": 1.4923681020736694, "learning_rate": 3.352776353851388e-06, "loss": 0.6051, "step": 28852 }, { "epoch": 0.74, "grad_norm": 6.938601970672607, "learning_rate": 3.3521561849298657e-06, "loss": 0.5769, "step": 28853 }, { "epoch": 0.74, "grad_norm": 9.65787124633789, "learning_rate": 3.351536061821102e-06, "loss": 0.4386, "step": 28854 }, { "epoch": 0.74, "grad_norm": 1.9152430295944214, "learning_rate": 3.3509159845293794e-06, "loss": 0.5796, "step": 28855 }, { "epoch": 0.74, "grad_norm": 1.391681432723999, "learning_rate": 3.350295953058966e-06, "loss": 0.5933, "step": 28856 }, { "epoch": 0.74, "grad_norm": 1.3685332536697388, "learning_rate": 3.3496759674141356e-06, "loss": 0.5012, "step": 28857 }, { "epoch": 0.74, "grad_norm": 1.4353351593017578, "learning_rate": 3.349056027599157e-06, "loss": 0.5053, "step": 28858 }, { "epoch": 0.74, "grad_norm": 1.3562350273132324, "learning_rate": 3.348436133618308e-06, "loss": 0.6679, "step": 28859 }, { "epoch": 0.74, "grad_norm": 1.889173150062561, "learning_rate": 3.347816285475859e-06, "loss": 0.5744, "step": 28860 }, { "epoch": 0.74, "grad_norm": 4.951956748962402, "learning_rate": 3.347196483176078e-06, "loss": 0.5335, "step": 28861 }, { "epoch": 0.74, "grad_norm": 6.6759867668151855, "learning_rate": 3.346576726723242e-06, "loss": 0.3826, "step": 28862 }, { "epoch": 0.74, "grad_norm": 1.5094045400619507, "learning_rate": 3.3459570161216193e-06, "loss": 0.5403, "step": 28863 }, { "epoch": 0.74, "grad_norm": 1.5121338367462158, "learning_rate": 3.345337351375476e-06, "loss": 0.5607, "step": 28864 }, { "epoch": 0.74, "grad_norm": 1.696171522140503, "learning_rate": 3.344717732489091e-06, "loss": 0.5917, "step": 28865 }, { "epoch": 0.74, "grad_norm": 1.4431222677230835, "learning_rate": 3.34409815946673e-06, "loss": 0.4327, "step": 28866 }, { "epoch": 0.74, "grad_norm": 1.2151621580123901, "learning_rate": 3.3434786323126634e-06, "loss": 0.4899, "step": 28867 }, { "epoch": 0.74, "grad_norm": 2.8637704849243164, "learning_rate": 3.342859151031156e-06, "loss": 0.5052, "step": 28868 }, { "epoch": 0.74, "grad_norm": 0.9260560870170593, "learning_rate": 3.342239715626484e-06, "loss": 0.5648, "step": 28869 }, { "epoch": 0.74, "grad_norm": 3.2771739959716797, "learning_rate": 3.3416203261029134e-06, "loss": 0.6442, "step": 28870 }, { "epoch": 0.74, "grad_norm": 2.4876656532287598, "learning_rate": 3.341000982464709e-06, "loss": 0.5631, "step": 28871 }, { "epoch": 0.74, "grad_norm": 1.169642686843872, "learning_rate": 3.3403816847161463e-06, "loss": 0.5029, "step": 28872 }, { "epoch": 0.74, "grad_norm": 1.1511808633804321, "learning_rate": 3.3397624328614887e-06, "loss": 0.5437, "step": 28873 }, { "epoch": 0.74, "grad_norm": 14.642285346984863, "learning_rate": 3.339143226905003e-06, "loss": 0.6509, "step": 28874 }, { "epoch": 0.74, "grad_norm": 1.6829808950424194, "learning_rate": 3.3385240668509544e-06, "loss": 0.4546, "step": 28875 }, { "epoch": 0.74, "grad_norm": 1.2410613298416138, "learning_rate": 3.3379049527036167e-06, "loss": 0.5326, "step": 28876 }, { "epoch": 0.74, "grad_norm": 1.599611759185791, "learning_rate": 3.337285884467253e-06, "loss": 0.6034, "step": 28877 }, { "epoch": 0.74, "grad_norm": 1.1279524564743042, "learning_rate": 3.336666862146125e-06, "loss": 0.4509, "step": 28878 }, { "epoch": 0.74, "grad_norm": 4.194220542907715, "learning_rate": 3.3360478857445054e-06, "loss": 0.6015, "step": 28879 }, { "epoch": 0.74, "grad_norm": 1.4865106344223022, "learning_rate": 3.3354289552666586e-06, "loss": 0.5463, "step": 28880 }, { "epoch": 0.74, "grad_norm": 1.5574761629104614, "learning_rate": 3.3348100707168475e-06, "loss": 0.4345, "step": 28881 }, { "epoch": 0.74, "grad_norm": 3.0691988468170166, "learning_rate": 3.3341912320993342e-06, "loss": 0.7212, "step": 28882 }, { "epoch": 0.74, "grad_norm": 1.2676355838775635, "learning_rate": 3.333572439418391e-06, "loss": 0.5915, "step": 28883 }, { "epoch": 0.74, "grad_norm": 1.5332778692245483, "learning_rate": 3.332953692678278e-06, "loss": 0.5699, "step": 28884 }, { "epoch": 0.74, "grad_norm": 1.730914831161499, "learning_rate": 3.332334991883256e-06, "loss": 0.6412, "step": 28885 }, { "epoch": 0.74, "grad_norm": 4.83126163482666, "learning_rate": 3.331716337037596e-06, "loss": 0.5386, "step": 28886 }, { "epoch": 0.74, "grad_norm": 1.5220223665237427, "learning_rate": 3.331097728145557e-06, "loss": 0.6021, "step": 28887 }, { "epoch": 0.74, "grad_norm": 1.1772024631500244, "learning_rate": 3.3304791652113998e-06, "loss": 0.4746, "step": 28888 }, { "epoch": 0.74, "grad_norm": 1.9938077926635742, "learning_rate": 3.3298606482393923e-06, "loss": 0.5289, "step": 28889 }, { "epoch": 0.74, "grad_norm": 1.3725004196166992, "learning_rate": 3.3292421772337957e-06, "loss": 0.569, "step": 28890 }, { "epoch": 0.74, "grad_norm": 2.8498740196228027, "learning_rate": 3.3286237521988706e-06, "loss": 0.5991, "step": 28891 }, { "epoch": 0.74, "grad_norm": 1.458238959312439, "learning_rate": 3.328005373138876e-06, "loss": 0.5003, "step": 28892 }, { "epoch": 0.74, "grad_norm": 0.9554827809333801, "learning_rate": 3.327387040058081e-06, "loss": 0.5241, "step": 28893 }, { "epoch": 0.74, "grad_norm": 1.8209748268127441, "learning_rate": 3.3267687529607406e-06, "loss": 0.4898, "step": 28894 }, { "epoch": 0.74, "grad_norm": 0.9261329174041748, "learning_rate": 3.326150511851115e-06, "loss": 0.5301, "step": 28895 }, { "epoch": 0.74, "grad_norm": 1.0559148788452148, "learning_rate": 3.325532316733471e-06, "loss": 0.4907, "step": 28896 }, { "epoch": 0.74, "grad_norm": 1.4391080141067505, "learning_rate": 3.3249141676120643e-06, "loss": 0.6249, "step": 28897 }, { "epoch": 0.74, "grad_norm": 1.0670174360275269, "learning_rate": 3.324296064491155e-06, "loss": 0.436, "step": 28898 }, { "epoch": 0.74, "grad_norm": 1.0877606868743896, "learning_rate": 3.323678007375001e-06, "loss": 0.5125, "step": 28899 }, { "epoch": 0.74, "grad_norm": 1.1710443496704102, "learning_rate": 3.3230599962678654e-06, "loss": 0.4439, "step": 28900 }, { "epoch": 0.74, "grad_norm": 1.0815930366516113, "learning_rate": 3.3224420311740067e-06, "loss": 0.4689, "step": 28901 }, { "epoch": 0.74, "grad_norm": 1.1436164379119873, "learning_rate": 3.321824112097679e-06, "loss": 0.4878, "step": 28902 }, { "epoch": 0.74, "grad_norm": 1.0803979635238647, "learning_rate": 3.3212062390431465e-06, "loss": 0.4647, "step": 28903 }, { "epoch": 0.74, "grad_norm": 1.112396478652954, "learning_rate": 3.3205884120146646e-06, "loss": 0.4452, "step": 28904 }, { "epoch": 0.74, "grad_norm": 4.319253921508789, "learning_rate": 3.319970631016488e-06, "loss": 0.7147, "step": 28905 }, { "epoch": 0.74, "grad_norm": 1.362866997718811, "learning_rate": 3.319352896052881e-06, "loss": 0.4747, "step": 28906 }, { "epoch": 0.74, "grad_norm": 2.5699360370635986, "learning_rate": 3.3187352071280955e-06, "loss": 0.6495, "step": 28907 }, { "epoch": 0.74, "grad_norm": 1.4328656196594238, "learning_rate": 3.3181175642463903e-06, "loss": 0.5607, "step": 28908 }, { "epoch": 0.74, "grad_norm": 3.30940318107605, "learning_rate": 3.3174999674120167e-06, "loss": 0.5563, "step": 28909 }, { "epoch": 0.74, "grad_norm": 1.9799076318740845, "learning_rate": 3.316882416629238e-06, "loss": 0.6154, "step": 28910 }, { "epoch": 0.74, "grad_norm": 3.1363892555236816, "learning_rate": 3.3162649119023084e-06, "loss": 0.5826, "step": 28911 }, { "epoch": 0.74, "grad_norm": 1.2389187812805176, "learning_rate": 3.315647453235478e-06, "loss": 0.5167, "step": 28912 }, { "epoch": 0.74, "grad_norm": 1.0589414834976196, "learning_rate": 3.315030040633008e-06, "loss": 0.652, "step": 28913 }, { "epoch": 0.74, "grad_norm": 1.6971579790115356, "learning_rate": 3.3144126740991524e-06, "loss": 0.5612, "step": 28914 }, { "epoch": 0.74, "grad_norm": 1.5273549556732178, "learning_rate": 3.313795353638164e-06, "loss": 0.5443, "step": 28915 }, { "epoch": 0.74, "grad_norm": 3.895678758621216, "learning_rate": 3.3131780792542934e-06, "loss": 0.6101, "step": 28916 }, { "epoch": 0.74, "grad_norm": 9.410194396972656, "learning_rate": 3.3125608509518024e-06, "loss": 0.5843, "step": 28917 }, { "epoch": 0.74, "grad_norm": 1.3892048597335815, "learning_rate": 3.3119436687349404e-06, "loss": 0.6808, "step": 28918 }, { "epoch": 0.74, "grad_norm": 1.4749829769134521, "learning_rate": 3.3113265326079567e-06, "loss": 0.4321, "step": 28919 }, { "epoch": 0.74, "grad_norm": 1.5711454153060913, "learning_rate": 3.310709442575113e-06, "loss": 0.5256, "step": 28920 }, { "epoch": 0.74, "grad_norm": 1.2131460905075073, "learning_rate": 3.3100923986406564e-06, "loss": 0.5474, "step": 28921 }, { "epoch": 0.74, "grad_norm": 1.6291844844818115, "learning_rate": 3.30947540080884e-06, "loss": 0.6087, "step": 28922 }, { "epoch": 0.74, "grad_norm": 6.027129650115967, "learning_rate": 3.3088584490839127e-06, "loss": 0.6562, "step": 28923 }, { "epoch": 0.74, "grad_norm": 1.7478313446044922, "learning_rate": 3.308241543470132e-06, "loss": 0.6108, "step": 28924 }, { "epoch": 0.74, "grad_norm": 1.7289170026779175, "learning_rate": 3.3076246839717473e-06, "loss": 0.4625, "step": 28925 }, { "epoch": 0.74, "grad_norm": 3.3302297592163086, "learning_rate": 3.307007870593004e-06, "loss": 0.5632, "step": 28926 }, { "epoch": 0.74, "grad_norm": 2.3463478088378906, "learning_rate": 3.3063911033381625e-06, "loss": 0.5689, "step": 28927 }, { "epoch": 0.74, "grad_norm": 1.7353252172470093, "learning_rate": 3.3057743822114675e-06, "loss": 0.598, "step": 28928 }, { "epoch": 0.74, "grad_norm": 1.642948031425476, "learning_rate": 3.305157707217166e-06, "loss": 0.4901, "step": 28929 }, { "epoch": 0.74, "grad_norm": 1.4772270917892456, "learning_rate": 3.3045410783595155e-06, "loss": 0.4248, "step": 28930 }, { "epoch": 0.74, "grad_norm": 2.508429765701294, "learning_rate": 3.3039244956427606e-06, "loss": 0.405, "step": 28931 }, { "epoch": 0.74, "grad_norm": 1.172980546951294, "learning_rate": 3.3033079590711516e-06, "loss": 0.6619, "step": 28932 }, { "epoch": 0.74, "grad_norm": 9.068594932556152, "learning_rate": 3.3026914686489332e-06, "loss": 0.5908, "step": 28933 }, { "epoch": 0.74, "grad_norm": 2.746718645095825, "learning_rate": 3.302075024380361e-06, "loss": 0.5777, "step": 28934 }, { "epoch": 0.74, "grad_norm": 1.0289009809494019, "learning_rate": 3.30145862626968e-06, "loss": 0.5548, "step": 28935 }, { "epoch": 0.74, "grad_norm": 4.474620819091797, "learning_rate": 3.3008422743211344e-06, "loss": 0.6962, "step": 28936 }, { "epoch": 0.74, "grad_norm": 1.3587841987609863, "learning_rate": 3.300225968538978e-06, "loss": 0.5588, "step": 28937 }, { "epoch": 0.74, "grad_norm": 1.552991509437561, "learning_rate": 3.2996097089274547e-06, "loss": 0.5531, "step": 28938 }, { "epoch": 0.74, "grad_norm": 1.3093550205230713, "learning_rate": 3.298993495490813e-06, "loss": 0.6189, "step": 28939 }, { "epoch": 0.74, "grad_norm": 1.223114252090454, "learning_rate": 3.2983773282332944e-06, "loss": 0.4234, "step": 28940 }, { "epoch": 0.74, "grad_norm": 4.4277567863464355, "learning_rate": 3.2977612071591516e-06, "loss": 0.5443, "step": 28941 }, { "epoch": 0.74, "grad_norm": 2.725583553314209, "learning_rate": 3.2971451322726277e-06, "loss": 0.4992, "step": 28942 }, { "epoch": 0.74, "grad_norm": 1.1990193128585815, "learning_rate": 3.2965291035779656e-06, "loss": 0.5996, "step": 28943 }, { "epoch": 0.74, "grad_norm": 26.4888973236084, "learning_rate": 3.295913121079416e-06, "loss": 0.6182, "step": 28944 }, { "epoch": 0.74, "grad_norm": 0.9066075682640076, "learning_rate": 3.2952971847812223e-06, "loss": 0.5512, "step": 28945 }, { "epoch": 0.74, "grad_norm": 1.2196685075759888, "learning_rate": 3.2946812946876274e-06, "loss": 0.4504, "step": 28946 }, { "epoch": 0.74, "grad_norm": 1.3791569471359253, "learning_rate": 3.2940654508028726e-06, "loss": 0.4522, "step": 28947 }, { "epoch": 0.74, "grad_norm": 1.7075188159942627, "learning_rate": 3.293449653131213e-06, "loss": 0.652, "step": 28948 }, { "epoch": 0.74, "grad_norm": 1.214111089706421, "learning_rate": 3.2928339016768785e-06, "loss": 0.3625, "step": 28949 }, { "epoch": 0.74, "grad_norm": 1.8002920150756836, "learning_rate": 3.2922181964441213e-06, "loss": 0.502, "step": 28950 }, { "epoch": 0.74, "grad_norm": 1.2292988300323486, "learning_rate": 3.291602537437183e-06, "loss": 0.4206, "step": 28951 }, { "epoch": 0.74, "grad_norm": 1.423293113708496, "learning_rate": 3.2909869246603045e-06, "loss": 0.5862, "step": 28952 }, { "epoch": 0.74, "grad_norm": 1.1788609027862549, "learning_rate": 3.2903713581177265e-06, "loss": 0.5295, "step": 28953 }, { "epoch": 0.74, "grad_norm": 1.4863051176071167, "learning_rate": 3.2897558378136963e-06, "loss": 0.6747, "step": 28954 }, { "epoch": 0.74, "grad_norm": 1.9060641527175903, "learning_rate": 3.2891403637524532e-06, "loss": 0.485, "step": 28955 }, { "epoch": 0.74, "grad_norm": 1.0406180620193481, "learning_rate": 3.2885249359382367e-06, "loss": 0.5919, "step": 28956 }, { "epoch": 0.74, "grad_norm": 1.7210477590560913, "learning_rate": 3.2879095543752915e-06, "loss": 0.4829, "step": 28957 }, { "epoch": 0.74, "grad_norm": 7.016780376434326, "learning_rate": 3.287294219067857e-06, "loss": 0.5121, "step": 28958 }, { "epoch": 0.74, "grad_norm": 1.5161771774291992, "learning_rate": 3.2866789300201694e-06, "loss": 0.7157, "step": 28959 }, { "epoch": 0.74, "grad_norm": 1.4049957990646362, "learning_rate": 3.2860636872364783e-06, "loss": 0.5523, "step": 28960 }, { "epoch": 0.74, "grad_norm": 1.9045910835266113, "learning_rate": 3.285448490721016e-06, "loss": 0.6899, "step": 28961 }, { "epoch": 0.74, "grad_norm": 1.5742560625076294, "learning_rate": 3.2848333404780253e-06, "loss": 0.4717, "step": 28962 }, { "epoch": 0.74, "grad_norm": 1.405536413192749, "learning_rate": 3.2842182365117416e-06, "loss": 0.564, "step": 28963 }, { "epoch": 0.74, "grad_norm": 1.4645919799804688, "learning_rate": 3.283603178826409e-06, "loss": 0.5754, "step": 28964 }, { "epoch": 0.74, "grad_norm": 1.2785660028457642, "learning_rate": 3.282988167426263e-06, "loss": 0.5328, "step": 28965 }, { "epoch": 0.74, "grad_norm": 2.2770912647247314, "learning_rate": 3.2823732023155398e-06, "loss": 0.4504, "step": 28966 }, { "epoch": 0.74, "grad_norm": 1.442130208015442, "learning_rate": 3.281758283498484e-06, "loss": 0.5197, "step": 28967 }, { "epoch": 0.74, "grad_norm": 2.5630409717559814, "learning_rate": 3.2811434109793284e-06, "loss": 0.424, "step": 28968 }, { "epoch": 0.74, "grad_norm": 4.405698299407959, "learning_rate": 3.2805285847623116e-06, "loss": 0.5366, "step": 28969 }, { "epoch": 0.74, "grad_norm": 1.495113492012024, "learning_rate": 3.2799138048516665e-06, "loss": 0.5738, "step": 28970 }, { "epoch": 0.74, "grad_norm": 1.7644102573394775, "learning_rate": 3.279299071251638e-06, "loss": 0.5222, "step": 28971 }, { "epoch": 0.74, "grad_norm": 1.2677942514419556, "learning_rate": 3.2786843839664572e-06, "loss": 0.5226, "step": 28972 }, { "epoch": 0.74, "grad_norm": 1.113691806793213, "learning_rate": 3.278069743000357e-06, "loss": 0.5082, "step": 28973 }, { "epoch": 0.74, "grad_norm": 6.684622287750244, "learning_rate": 3.277455148357581e-06, "loss": 0.6663, "step": 28974 }, { "epoch": 0.74, "grad_norm": 1.7428385019302368, "learning_rate": 3.27684060004236e-06, "loss": 0.5034, "step": 28975 }, { "epoch": 0.74, "grad_norm": 3.595904588699341, "learning_rate": 3.27622609805893e-06, "loss": 0.546, "step": 28976 }, { "epoch": 0.74, "grad_norm": 1.5718228816986084, "learning_rate": 3.2756116424115226e-06, "loss": 0.527, "step": 28977 }, { "epoch": 0.74, "grad_norm": 1.9909266233444214, "learning_rate": 3.2749972331043778e-06, "loss": 0.4425, "step": 28978 }, { "epoch": 0.74, "grad_norm": 1.6909834146499634, "learning_rate": 3.274382870141728e-06, "loss": 0.5005, "step": 28979 }, { "epoch": 0.74, "grad_norm": 1.4636136293411255, "learning_rate": 3.273768553527802e-06, "loss": 0.5719, "step": 28980 }, { "epoch": 0.74, "grad_norm": 1.2725067138671875, "learning_rate": 3.2731542832668405e-06, "loss": 0.4195, "step": 28981 }, { "epoch": 0.74, "grad_norm": 1.2054839134216309, "learning_rate": 3.2725400593630728e-06, "loss": 0.388, "step": 28982 }, { "epoch": 0.74, "grad_norm": 1.1912108659744263, "learning_rate": 3.27192588182073e-06, "loss": 0.4758, "step": 28983 }, { "epoch": 0.74, "grad_norm": 7.867106914520264, "learning_rate": 3.271311750644051e-06, "loss": 0.5791, "step": 28984 }, { "epoch": 0.74, "grad_norm": 1.9507412910461426, "learning_rate": 3.2706976658372626e-06, "loss": 0.5202, "step": 28985 }, { "epoch": 0.74, "grad_norm": 2.9058456420898438, "learning_rate": 3.2700836274045987e-06, "loss": 0.7901, "step": 28986 }, { "epoch": 0.74, "grad_norm": 3.5184664726257324, "learning_rate": 3.2694696353502865e-06, "loss": 0.5558, "step": 28987 }, { "epoch": 0.74, "grad_norm": 2.5390799045562744, "learning_rate": 3.268855689678565e-06, "loss": 0.4905, "step": 28988 }, { "epoch": 0.74, "grad_norm": 1.3061871528625488, "learning_rate": 3.2682417903936616e-06, "loss": 0.4869, "step": 28989 }, { "epoch": 0.74, "grad_norm": 1.1609009504318237, "learning_rate": 3.2676279374998022e-06, "loss": 0.4236, "step": 28990 }, { "epoch": 0.74, "grad_norm": 1.8334287405014038, "learning_rate": 3.2670141310012247e-06, "loss": 0.5729, "step": 28991 }, { "epoch": 0.74, "grad_norm": 1.0017451047897339, "learning_rate": 3.266400370902156e-06, "loss": 0.4895, "step": 28992 }, { "epoch": 0.74, "grad_norm": 2.2562477588653564, "learning_rate": 3.2657866572068263e-06, "loss": 0.5368, "step": 28993 }, { "epoch": 0.74, "grad_norm": 5.671332836151123, "learning_rate": 3.2651729899194596e-06, "loss": 0.4711, "step": 28994 }, { "epoch": 0.74, "grad_norm": 1.404477834701538, "learning_rate": 3.2645593690442934e-06, "loss": 0.6113, "step": 28995 }, { "epoch": 0.74, "grad_norm": 1.9910147190093994, "learning_rate": 3.2639457945855523e-06, "loss": 0.5942, "step": 28996 }, { "epoch": 0.74, "grad_norm": 4.031314849853516, "learning_rate": 3.263332266547461e-06, "loss": 0.7727, "step": 28997 }, { "epoch": 0.74, "grad_norm": 1.6038050651550293, "learning_rate": 3.262718784934256e-06, "loss": 0.2894, "step": 28998 }, { "epoch": 0.74, "grad_norm": 2.953049659729004, "learning_rate": 3.2621053497501585e-06, "loss": 0.5792, "step": 28999 }, { "epoch": 0.74, "grad_norm": 1.5647670030593872, "learning_rate": 3.2614919609993956e-06, "loss": 0.5734, "step": 29000 }, { "epoch": 0.74, "grad_norm": 1.2450393438339233, "learning_rate": 3.2608786186861997e-06, "loss": 0.4514, "step": 29001 }, { "epoch": 0.74, "grad_norm": 1.5538649559020996, "learning_rate": 3.260265322814794e-06, "loss": 0.5604, "step": 29002 }, { "epoch": 0.74, "grad_norm": 7.988642692565918, "learning_rate": 3.2596520733894056e-06, "loss": 0.5392, "step": 29003 }, { "epoch": 0.74, "grad_norm": 1.2867897748947144, "learning_rate": 3.2590388704142574e-06, "loss": 0.5611, "step": 29004 }, { "epoch": 0.74, "grad_norm": 1.0001026391983032, "learning_rate": 3.2584257138935816e-06, "loss": 0.558, "step": 29005 }, { "epoch": 0.74, "grad_norm": 1.493009090423584, "learning_rate": 3.257812603831599e-06, "loss": 0.4476, "step": 29006 }, { "epoch": 0.74, "grad_norm": 1.1107317209243774, "learning_rate": 3.257199540232535e-06, "loss": 0.5439, "step": 29007 }, { "epoch": 0.74, "grad_norm": 3.939455270767212, "learning_rate": 3.2565865231006176e-06, "loss": 0.6196, "step": 29008 }, { "epoch": 0.74, "grad_norm": 2.0776636600494385, "learning_rate": 3.2559735524400692e-06, "loss": 0.5033, "step": 29009 }, { "epoch": 0.74, "grad_norm": 1.1701377630233765, "learning_rate": 3.255360628255114e-06, "loss": 0.501, "step": 29010 }, { "epoch": 0.74, "grad_norm": 1.551103949546814, "learning_rate": 3.2547477505499723e-06, "loss": 0.5104, "step": 29011 }, { "epoch": 0.74, "grad_norm": 5.2026214599609375, "learning_rate": 3.254134919328875e-06, "loss": 0.4932, "step": 29012 }, { "epoch": 0.74, "grad_norm": 1.3074284791946411, "learning_rate": 3.253522134596042e-06, "loss": 0.6531, "step": 29013 }, { "epoch": 0.74, "grad_norm": 15.619553565979004, "learning_rate": 3.2529093963556924e-06, "loss": 0.8804, "step": 29014 }, { "epoch": 0.74, "grad_norm": 1.0947959423065186, "learning_rate": 3.2522967046120555e-06, "loss": 0.6736, "step": 29015 }, { "epoch": 0.74, "grad_norm": 1.1327037811279297, "learning_rate": 3.251684059369351e-06, "loss": 0.4619, "step": 29016 }, { "epoch": 0.74, "grad_norm": 1.179481029510498, "learning_rate": 3.2510714606317997e-06, "loss": 0.5347, "step": 29017 }, { "epoch": 0.74, "grad_norm": 1.41885507106781, "learning_rate": 3.250458908403621e-06, "loss": 0.5991, "step": 29018 }, { "epoch": 0.74, "grad_norm": 2.1338281631469727, "learning_rate": 3.249846402689042e-06, "loss": 0.5322, "step": 29019 }, { "epoch": 0.74, "grad_norm": 2.527975082397461, "learning_rate": 3.2492339434922805e-06, "loss": 0.5685, "step": 29020 }, { "epoch": 0.74, "grad_norm": 1.4286588430404663, "learning_rate": 3.2486215308175537e-06, "loss": 0.5821, "step": 29021 }, { "epoch": 0.74, "grad_norm": 1.451693058013916, "learning_rate": 3.2480091646690894e-06, "loss": 0.4732, "step": 29022 }, { "epoch": 0.74, "grad_norm": 1.382796049118042, "learning_rate": 3.247396845051104e-06, "loss": 0.5284, "step": 29023 }, { "epoch": 0.74, "grad_norm": 1.9890191555023193, "learning_rate": 3.2467845719678135e-06, "loss": 0.4378, "step": 29024 }, { "epoch": 0.74, "grad_norm": 2.0451157093048096, "learning_rate": 3.246172345423445e-06, "loss": 0.6093, "step": 29025 }, { "epoch": 0.74, "grad_norm": 4.108330726623535, "learning_rate": 3.2455601654222134e-06, "loss": 0.5333, "step": 29026 }, { "epoch": 0.74, "grad_norm": 1.7805075645446777, "learning_rate": 3.244948031968337e-06, "loss": 0.7249, "step": 29027 }, { "epoch": 0.74, "grad_norm": 1.5683997869491577, "learning_rate": 3.2443359450660316e-06, "loss": 0.6076, "step": 29028 }, { "epoch": 0.74, "grad_norm": 1.4831023216247559, "learning_rate": 3.243723904719522e-06, "loss": 0.3673, "step": 29029 }, { "epoch": 0.74, "grad_norm": 3.9994454383850098, "learning_rate": 3.2431119109330234e-06, "loss": 0.7171, "step": 29030 }, { "epoch": 0.74, "grad_norm": 4.879148006439209, "learning_rate": 3.2424999637107477e-06, "loss": 0.5398, "step": 29031 }, { "epoch": 0.74, "grad_norm": 1.793661117553711, "learning_rate": 3.2418880630569217e-06, "loss": 0.6882, "step": 29032 }, { "epoch": 0.74, "grad_norm": 1.374432921409607, "learning_rate": 3.2412762089757556e-06, "loss": 0.5861, "step": 29033 }, { "epoch": 0.74, "grad_norm": 7.319456577301025, "learning_rate": 3.2406644014714684e-06, "loss": 0.6301, "step": 29034 }, { "epoch": 0.74, "grad_norm": 10.17525863647461, "learning_rate": 3.2400526405482724e-06, "loss": 0.4993, "step": 29035 }, { "epoch": 0.74, "grad_norm": 1.919654369354248, "learning_rate": 3.2394409262103897e-06, "loss": 0.4595, "step": 29036 }, { "epoch": 0.74, "grad_norm": 1.5423680543899536, "learning_rate": 3.238829258462033e-06, "loss": 0.6055, "step": 29037 }, { "epoch": 0.74, "grad_norm": 1.4917978048324585, "learning_rate": 3.2382176373074138e-06, "loss": 0.6676, "step": 29038 }, { "epoch": 0.74, "grad_norm": 3.0435988903045654, "learning_rate": 3.237606062750753e-06, "loss": 0.568, "step": 29039 }, { "epoch": 0.74, "grad_norm": 1.174336552619934, "learning_rate": 3.236994534796264e-06, "loss": 0.4994, "step": 29040 }, { "epoch": 0.74, "grad_norm": 1.7390412092208862, "learning_rate": 3.2363830534481557e-06, "loss": 0.4749, "step": 29041 }, { "epoch": 0.74, "grad_norm": 1.3693864345550537, "learning_rate": 3.235771618710649e-06, "loss": 0.4686, "step": 29042 }, { "epoch": 0.74, "grad_norm": 2.0290637016296387, "learning_rate": 3.2351602305879547e-06, "loss": 0.677, "step": 29043 }, { "epoch": 0.74, "grad_norm": 2.229769706726074, "learning_rate": 3.234548889084287e-06, "loss": 0.4432, "step": 29044 }, { "epoch": 0.74, "grad_norm": 4.080633640289307, "learning_rate": 3.233937594203853e-06, "loss": 0.4908, "step": 29045 }, { "epoch": 0.74, "grad_norm": 7.747725486755371, "learning_rate": 3.2333263459508747e-06, "loss": 0.4622, "step": 29046 }, { "epoch": 0.74, "grad_norm": 4.672970294952393, "learning_rate": 3.23271514432956e-06, "loss": 0.6158, "step": 29047 }, { "epoch": 0.74, "grad_norm": 4.025396347045898, "learning_rate": 3.2321039893441176e-06, "loss": 0.5213, "step": 29048 }, { "epoch": 0.74, "grad_norm": 2.161414384841919, "learning_rate": 3.2314928809987657e-06, "loss": 0.6041, "step": 29049 }, { "epoch": 0.74, "grad_norm": 1.7224534749984741, "learning_rate": 3.2308818192977122e-06, "loss": 0.4243, "step": 29050 }, { "epoch": 0.74, "grad_norm": 1.3869974613189697, "learning_rate": 3.230270804245168e-06, "loss": 0.5705, "step": 29051 }, { "epoch": 0.74, "grad_norm": 5.033145427703857, "learning_rate": 3.2296598358453413e-06, "loss": 0.4984, "step": 29052 }, { "epoch": 0.74, "grad_norm": 0.8868104219436646, "learning_rate": 3.2290489141024496e-06, "loss": 0.3583, "step": 29053 }, { "epoch": 0.74, "grad_norm": 9.235241889953613, "learning_rate": 3.228438039020697e-06, "loss": 0.4875, "step": 29054 }, { "epoch": 0.74, "grad_norm": 1.0986261367797852, "learning_rate": 3.227827210604294e-06, "loss": 0.5964, "step": 29055 }, { "epoch": 0.74, "grad_norm": 9.370084762573242, "learning_rate": 3.2272164288574524e-06, "loss": 0.6184, "step": 29056 }, { "epoch": 0.74, "grad_norm": 1.5564367771148682, "learning_rate": 3.2266056937843817e-06, "loss": 0.4153, "step": 29057 }, { "epoch": 0.74, "grad_norm": 1.4070018529891968, "learning_rate": 3.2259950053892884e-06, "loss": 0.5729, "step": 29058 }, { "epoch": 0.74, "grad_norm": 1.849703073501587, "learning_rate": 3.2253843636763793e-06, "loss": 0.543, "step": 29059 }, { "epoch": 0.74, "grad_norm": 4.479362487792969, "learning_rate": 3.224773768649867e-06, "loss": 0.6577, "step": 29060 }, { "epoch": 0.74, "grad_norm": 1.6668506860733032, "learning_rate": 3.2241632203139574e-06, "loss": 0.5254, "step": 29061 }, { "epoch": 0.74, "grad_norm": 1.486077070236206, "learning_rate": 3.2235527186728556e-06, "loss": 0.6201, "step": 29062 }, { "epoch": 0.74, "grad_norm": 1.365562081336975, "learning_rate": 3.222942263730774e-06, "loss": 0.6226, "step": 29063 }, { "epoch": 0.74, "grad_norm": 1.7634689807891846, "learning_rate": 3.2223318554919157e-06, "loss": 0.5153, "step": 29064 }, { "epoch": 0.74, "grad_norm": 1.1674790382385254, "learning_rate": 3.2217214939604857e-06, "loss": 0.4828, "step": 29065 }, { "epoch": 0.74, "grad_norm": 1.6309608221054077, "learning_rate": 3.2211111791406955e-06, "loss": 0.5331, "step": 29066 }, { "epoch": 0.75, "grad_norm": 3.7682673931121826, "learning_rate": 3.220500911036748e-06, "loss": 0.4103, "step": 29067 }, { "epoch": 0.75, "grad_norm": 1.5555607080459595, "learning_rate": 3.2198906896528492e-06, "loss": 0.6559, "step": 29068 }, { "epoch": 0.75, "grad_norm": 2.2492775917053223, "learning_rate": 3.2192805149932004e-06, "loss": 0.4714, "step": 29069 }, { "epoch": 0.75, "grad_norm": 1.988494873046875, "learning_rate": 3.218670387062014e-06, "loss": 0.6281, "step": 29070 }, { "epoch": 0.75, "grad_norm": 1.7824130058288574, "learning_rate": 3.218060305863491e-06, "loss": 0.5919, "step": 29071 }, { "epoch": 0.75, "grad_norm": 0.9683904051780701, "learning_rate": 3.217450271401832e-06, "loss": 0.5297, "step": 29072 }, { "epoch": 0.75, "grad_norm": 1.4550917148590088, "learning_rate": 3.2168402836812474e-06, "loss": 0.5481, "step": 29073 }, { "epoch": 0.75, "grad_norm": 1.7526881694793701, "learning_rate": 3.2162303427059384e-06, "loss": 0.5957, "step": 29074 }, { "epoch": 0.75, "grad_norm": 1.280972957611084, "learning_rate": 3.215620448480108e-06, "loss": 0.4401, "step": 29075 }, { "epoch": 0.75, "grad_norm": 1.1658309698104858, "learning_rate": 3.215010601007956e-06, "loss": 0.559, "step": 29076 }, { "epoch": 0.75, "grad_norm": 1.9183084964752197, "learning_rate": 3.214400800293691e-06, "loss": 0.5079, "step": 29077 }, { "epoch": 0.75, "grad_norm": 1.6032806634902954, "learning_rate": 3.2137910463415122e-06, "loss": 0.4749, "step": 29078 }, { "epoch": 0.75, "grad_norm": 1.1288645267486572, "learning_rate": 3.21318133915562e-06, "loss": 0.3535, "step": 29079 }, { "epoch": 0.75, "grad_norm": 1.69786536693573, "learning_rate": 3.2125716787402207e-06, "loss": 0.6256, "step": 29080 }, { "epoch": 0.75, "grad_norm": 1.8834567070007324, "learning_rate": 3.2119620650995122e-06, "loss": 0.4892, "step": 29081 }, { "epoch": 0.75, "grad_norm": 1.4436372518539429, "learning_rate": 3.2113524982376975e-06, "loss": 0.5304, "step": 29082 }, { "epoch": 0.75, "grad_norm": 2.762188673019409, "learning_rate": 3.2107429781589727e-06, "loss": 0.4534, "step": 29083 }, { "epoch": 0.75, "grad_norm": 0.9123425483703613, "learning_rate": 3.210133504867545e-06, "loss": 0.5646, "step": 29084 }, { "epoch": 0.75, "grad_norm": 2.3873157501220703, "learning_rate": 3.2095240783676117e-06, "loss": 0.5329, "step": 29085 }, { "epoch": 0.75, "grad_norm": 1.5025286674499512, "learning_rate": 3.208914698663369e-06, "loss": 0.5512, "step": 29086 }, { "epoch": 0.75, "grad_norm": 2.986375093460083, "learning_rate": 3.2083053657590234e-06, "loss": 0.654, "step": 29087 }, { "epoch": 0.75, "grad_norm": 1.0546202659606934, "learning_rate": 3.20769607965877e-06, "loss": 0.4776, "step": 29088 }, { "epoch": 0.75, "grad_norm": 6.004670143127441, "learning_rate": 3.2070868403668033e-06, "loss": 0.5326, "step": 29089 }, { "epoch": 0.75, "grad_norm": 1.2201626300811768, "learning_rate": 3.2064776478873306e-06, "loss": 0.5044, "step": 29090 }, { "epoch": 0.75, "grad_norm": 2.2240281105041504, "learning_rate": 3.2058685022245463e-06, "loss": 0.4993, "step": 29091 }, { "epoch": 0.75, "grad_norm": 17.253019332885742, "learning_rate": 3.205259403382648e-06, "loss": 0.5525, "step": 29092 }, { "epoch": 0.75, "grad_norm": 0.9609463810920715, "learning_rate": 3.204650351365829e-06, "loss": 0.4791, "step": 29093 }, { "epoch": 0.75, "grad_norm": 2.485271453857422, "learning_rate": 3.2040413461782937e-06, "loss": 0.5098, "step": 29094 }, { "epoch": 0.75, "grad_norm": 2.0867340564727783, "learning_rate": 3.203432387824236e-06, "loss": 0.5291, "step": 29095 }, { "epoch": 0.75, "grad_norm": 1.2738045454025269, "learning_rate": 3.202823476307849e-06, "loss": 0.513, "step": 29096 }, { "epoch": 0.75, "grad_norm": 1.5708998441696167, "learning_rate": 3.202214611633335e-06, "loss": 0.5694, "step": 29097 }, { "epoch": 0.75, "grad_norm": 3.513092041015625, "learning_rate": 3.2016057938048873e-06, "loss": 0.7171, "step": 29098 }, { "epoch": 0.75, "grad_norm": 1.6201815605163574, "learning_rate": 3.200997022826702e-06, "loss": 0.6085, "step": 29099 }, { "epoch": 0.75, "grad_norm": 1.241837501525879, "learning_rate": 3.20038829870297e-06, "loss": 0.5382, "step": 29100 }, { "epoch": 0.75, "grad_norm": 1.976446509361267, "learning_rate": 3.199779621437893e-06, "loss": 0.4872, "step": 29101 }, { "epoch": 0.75, "grad_norm": 1.4609583616256714, "learning_rate": 3.1991709910356628e-06, "loss": 0.6147, "step": 29102 }, { "epoch": 0.75, "grad_norm": 1.2933629751205444, "learning_rate": 3.1985624075004705e-06, "loss": 0.493, "step": 29103 }, { "epoch": 0.75, "grad_norm": 1.2206218242645264, "learning_rate": 3.1979538708365156e-06, "loss": 0.5966, "step": 29104 }, { "epoch": 0.75, "grad_norm": 1.366736888885498, "learning_rate": 3.197345381047989e-06, "loss": 0.4741, "step": 29105 }, { "epoch": 0.75, "grad_norm": 1.4797295331954956, "learning_rate": 3.1967369381390813e-06, "loss": 0.5306, "step": 29106 }, { "epoch": 0.75, "grad_norm": 2.004101037979126, "learning_rate": 3.196128542113992e-06, "loss": 0.3998, "step": 29107 }, { "epoch": 0.75, "grad_norm": 1.573333978652954, "learning_rate": 3.19552019297691e-06, "loss": 0.695, "step": 29108 }, { "epoch": 0.75, "grad_norm": 1.8344687223434448, "learning_rate": 3.1949118907320277e-06, "loss": 0.5892, "step": 29109 }, { "epoch": 0.75, "grad_norm": 1.9574824571609497, "learning_rate": 3.1943036353835334e-06, "loss": 0.5837, "step": 29110 }, { "epoch": 0.75, "grad_norm": 11.05113410949707, "learning_rate": 3.193695426935627e-06, "loss": 0.6618, "step": 29111 }, { "epoch": 0.75, "grad_norm": 2.3057610988616943, "learning_rate": 3.193087265392495e-06, "loss": 0.5829, "step": 29112 }, { "epoch": 0.75, "grad_norm": 5.4339494705200195, "learning_rate": 3.192479150758325e-06, "loss": 0.4548, "step": 29113 }, { "epoch": 0.75, "grad_norm": 1.4889051914215088, "learning_rate": 3.1918710830373166e-06, "loss": 0.5026, "step": 29114 }, { "epoch": 0.75, "grad_norm": 1.3737865686416626, "learning_rate": 3.1912630622336548e-06, "loss": 0.5298, "step": 29115 }, { "epoch": 0.75, "grad_norm": 20.076499938964844, "learning_rate": 3.190655088351531e-06, "loss": 0.4792, "step": 29116 }, { "epoch": 0.75, "grad_norm": 1.496161937713623, "learning_rate": 3.19004716139513e-06, "loss": 0.5187, "step": 29117 }, { "epoch": 0.75, "grad_norm": 2.1523115634918213, "learning_rate": 3.189439281368649e-06, "loss": 0.6554, "step": 29118 }, { "epoch": 0.75, "grad_norm": 1.088287353515625, "learning_rate": 3.1888314482762738e-06, "loss": 0.5805, "step": 29119 }, { "epoch": 0.75, "grad_norm": 2.143233299255371, "learning_rate": 3.1882236621221895e-06, "loss": 0.6795, "step": 29120 }, { "epoch": 0.75, "grad_norm": 4.946224212646484, "learning_rate": 3.187615922910592e-06, "loss": 0.5551, "step": 29121 }, { "epoch": 0.75, "grad_norm": 1.03016996383667, "learning_rate": 3.187008230645665e-06, "loss": 0.4828, "step": 29122 }, { "epoch": 0.75, "grad_norm": 1.5998810529708862, "learning_rate": 3.1864005853315973e-06, "loss": 0.5944, "step": 29123 }, { "epoch": 0.75, "grad_norm": 0.9404001235961914, "learning_rate": 3.1857929869725724e-06, "loss": 0.5776, "step": 29124 }, { "epoch": 0.75, "grad_norm": 4.534368991851807, "learning_rate": 3.1851854355727842e-06, "loss": 0.5629, "step": 29125 }, { "epoch": 0.75, "grad_norm": 2.787926197052002, "learning_rate": 3.1845779311364177e-06, "loss": 0.5294, "step": 29126 }, { "epoch": 0.75, "grad_norm": 2.3637635707855225, "learning_rate": 3.1839704736676537e-06, "loss": 0.3682, "step": 29127 }, { "epoch": 0.75, "grad_norm": 6.168416976928711, "learning_rate": 3.1833630631706867e-06, "loss": 0.5347, "step": 29128 }, { "epoch": 0.75, "grad_norm": 1.6390348672866821, "learning_rate": 3.182755699649699e-06, "loss": 0.5048, "step": 29129 }, { "epoch": 0.75, "grad_norm": 1.7850877046585083, "learning_rate": 3.182148383108873e-06, "loss": 0.6393, "step": 29130 }, { "epoch": 0.75, "grad_norm": 1.5873531103134155, "learning_rate": 3.1815411135524e-06, "loss": 0.5159, "step": 29131 }, { "epoch": 0.75, "grad_norm": 1.7060505151748657, "learning_rate": 3.1809338909844613e-06, "loss": 0.5741, "step": 29132 }, { "epoch": 0.75, "grad_norm": 1.2631436586380005, "learning_rate": 3.180326715409242e-06, "loss": 0.3996, "step": 29133 }, { "epoch": 0.75, "grad_norm": 4.7725510597229, "learning_rate": 3.179719586830924e-06, "loss": 0.3724, "step": 29134 }, { "epoch": 0.75, "grad_norm": 1.9885547161102295, "learning_rate": 3.1791125052536962e-06, "loss": 0.5514, "step": 29135 }, { "epoch": 0.75, "grad_norm": 1.3802345991134644, "learning_rate": 3.1785054706817407e-06, "loss": 0.5441, "step": 29136 }, { "epoch": 0.75, "grad_norm": 1.5380195379257202, "learning_rate": 3.177898483119236e-06, "loss": 0.5059, "step": 29137 }, { "epoch": 0.75, "grad_norm": 1.5608184337615967, "learning_rate": 3.177291542570372e-06, "loss": 0.4572, "step": 29138 }, { "epoch": 0.75, "grad_norm": 1.2449039220809937, "learning_rate": 3.176684649039328e-06, "loss": 0.5319, "step": 29139 }, { "epoch": 0.75, "grad_norm": 1.4572168588638306, "learning_rate": 3.1760778025302872e-06, "loss": 0.5753, "step": 29140 }, { "epoch": 0.75, "grad_norm": 1.2149579524993896, "learning_rate": 3.1754710030474265e-06, "loss": 0.4676, "step": 29141 }, { "epoch": 0.75, "grad_norm": 1.4444544315338135, "learning_rate": 3.174864250594937e-06, "loss": 0.653, "step": 29142 }, { "epoch": 0.75, "grad_norm": 1.2054189443588257, "learning_rate": 3.174257545176994e-06, "loss": 0.3596, "step": 29143 }, { "epoch": 0.75, "grad_norm": 1.889927625656128, "learning_rate": 3.173650886797777e-06, "loss": 0.6562, "step": 29144 }, { "epoch": 0.75, "grad_norm": 1.72225022315979, "learning_rate": 3.173044275461472e-06, "loss": 0.5095, "step": 29145 }, { "epoch": 0.75, "grad_norm": 3.488537073135376, "learning_rate": 3.1724377111722573e-06, "loss": 0.5096, "step": 29146 }, { "epoch": 0.75, "grad_norm": 4.258231163024902, "learning_rate": 3.1718311939343093e-06, "loss": 0.6059, "step": 29147 }, { "epoch": 0.75, "grad_norm": 10.332687377929688, "learning_rate": 3.1712247237518136e-06, "loss": 0.5152, "step": 29148 }, { "epoch": 0.75, "grad_norm": 5.145411491394043, "learning_rate": 3.170618300628947e-06, "loss": 0.5905, "step": 29149 }, { "epoch": 0.75, "grad_norm": 1.0168899297714233, "learning_rate": 3.1700119245698878e-06, "loss": 0.4274, "step": 29150 }, { "epoch": 0.75, "grad_norm": 1.0992152690887451, "learning_rate": 3.169405595578813e-06, "loss": 0.442, "step": 29151 }, { "epoch": 0.75, "grad_norm": 1.474841833114624, "learning_rate": 3.1687993136599073e-06, "loss": 0.5244, "step": 29152 }, { "epoch": 0.75, "grad_norm": 5.266035079956055, "learning_rate": 3.1681930788173433e-06, "loss": 0.465, "step": 29153 }, { "epoch": 0.75, "grad_norm": 1.64578378200531, "learning_rate": 3.167586891055299e-06, "loss": 0.6213, "step": 29154 }, { "epoch": 0.75, "grad_norm": 1.7322537899017334, "learning_rate": 3.1669807503779548e-06, "loss": 0.4777, "step": 29155 }, { "epoch": 0.75, "grad_norm": 1.8242263793945312, "learning_rate": 3.166374656789487e-06, "loss": 0.4258, "step": 29156 }, { "epoch": 0.75, "grad_norm": 1.0527645349502563, "learning_rate": 3.165768610294072e-06, "loss": 0.5953, "step": 29157 }, { "epoch": 0.75, "grad_norm": 4.845081806182861, "learning_rate": 3.165162610895882e-06, "loss": 0.5176, "step": 29158 }, { "epoch": 0.75, "grad_norm": 2.3764638900756836, "learning_rate": 3.1645566585991016e-06, "loss": 0.464, "step": 29159 }, { "epoch": 0.75, "grad_norm": 1.5141228437423706, "learning_rate": 3.1639507534079015e-06, "loss": 0.419, "step": 29160 }, { "epoch": 0.75, "grad_norm": 5.3296051025390625, "learning_rate": 3.1633448953264556e-06, "loss": 0.6352, "step": 29161 }, { "epoch": 0.75, "grad_norm": 3.8351736068725586, "learning_rate": 3.1627390843589435e-06, "loss": 0.5398, "step": 29162 }, { "epoch": 0.75, "grad_norm": 3.967411756515503, "learning_rate": 3.1621333205095394e-06, "loss": 0.5297, "step": 29163 }, { "epoch": 0.75, "grad_norm": 1.1299625635147095, "learning_rate": 3.161527603782415e-06, "loss": 0.4317, "step": 29164 }, { "epoch": 0.75, "grad_norm": 1.6621265411376953, "learning_rate": 3.1609219341817443e-06, "loss": 0.5464, "step": 29165 }, { "epoch": 0.75, "grad_norm": 1.8404536247253418, "learning_rate": 3.160316311711705e-06, "loss": 0.5096, "step": 29166 }, { "epoch": 0.75, "grad_norm": 0.9383926391601562, "learning_rate": 3.159710736376469e-06, "loss": 0.4639, "step": 29167 }, { "epoch": 0.75, "grad_norm": 1.2322670221328735, "learning_rate": 3.159105208180205e-06, "loss": 0.5055, "step": 29168 }, { "epoch": 0.75, "grad_norm": 1.5629698038101196, "learning_rate": 3.1584997271270946e-06, "loss": 0.4475, "step": 29169 }, { "epoch": 0.75, "grad_norm": 1.9560232162475586, "learning_rate": 3.1578942932213054e-06, "loss": 0.4976, "step": 29170 }, { "epoch": 0.75, "grad_norm": 7.936866283416748, "learning_rate": 3.1572889064670066e-06, "loss": 0.5649, "step": 29171 }, { "epoch": 0.75, "grad_norm": 1.2980540990829468, "learning_rate": 3.156683566868377e-06, "loss": 0.5246, "step": 29172 }, { "epoch": 0.75, "grad_norm": 1.6588977575302124, "learning_rate": 3.156078274429585e-06, "loss": 0.5683, "step": 29173 }, { "epoch": 0.75, "grad_norm": 0.9543540477752686, "learning_rate": 3.155473029154802e-06, "loss": 0.546, "step": 29174 }, { "epoch": 0.75, "grad_norm": 0.9061729907989502, "learning_rate": 3.154867831048195e-06, "loss": 0.5791, "step": 29175 }, { "epoch": 0.75, "grad_norm": 1.1640280485153198, "learning_rate": 3.154262680113942e-06, "loss": 0.5315, "step": 29176 }, { "epoch": 0.75, "grad_norm": 1.2172287702560425, "learning_rate": 3.1536575763562094e-06, "loss": 0.5086, "step": 29177 }, { "epoch": 0.75, "grad_norm": 1.33553946018219, "learning_rate": 3.1530525197791638e-06, "loss": 0.4098, "step": 29178 }, { "epoch": 0.75, "grad_norm": 3.6881072521209717, "learning_rate": 3.1524475103869834e-06, "loss": 0.4589, "step": 29179 }, { "epoch": 0.75, "grad_norm": 1.9973310232162476, "learning_rate": 3.151842548183831e-06, "loss": 0.4773, "step": 29180 }, { "epoch": 0.75, "grad_norm": 2.075249433517456, "learning_rate": 3.151237633173878e-06, "loss": 0.3443, "step": 29181 }, { "epoch": 0.75, "grad_norm": 2.8190419673919678, "learning_rate": 3.150632765361289e-06, "loss": 0.731, "step": 29182 }, { "epoch": 0.75, "grad_norm": 1.7380465269088745, "learning_rate": 3.150027944750239e-06, "loss": 0.5804, "step": 29183 }, { "epoch": 0.75, "grad_norm": 1.8253289461135864, "learning_rate": 3.149423171344893e-06, "loss": 0.5063, "step": 29184 }, { "epoch": 0.75, "grad_norm": 2.4099056720733643, "learning_rate": 3.148818445149415e-06, "loss": 0.6726, "step": 29185 }, { "epoch": 0.75, "grad_norm": 1.1284751892089844, "learning_rate": 3.14821376616798e-06, "loss": 0.5717, "step": 29186 }, { "epoch": 0.75, "grad_norm": 3.4272656440734863, "learning_rate": 3.147609134404751e-06, "loss": 0.5099, "step": 29187 }, { "epoch": 0.75, "grad_norm": 2.839276075363159, "learning_rate": 3.1470045498638914e-06, "loss": 0.4246, "step": 29188 }, { "epoch": 0.75, "grad_norm": 1.6826553344726562, "learning_rate": 3.146400012549573e-06, "loss": 0.5425, "step": 29189 }, { "epoch": 0.75, "grad_norm": 1.532827615737915, "learning_rate": 3.145795522465961e-06, "loss": 0.5759, "step": 29190 }, { "epoch": 0.75, "grad_norm": 1.328196406364441, "learning_rate": 3.1451910796172193e-06, "loss": 0.4295, "step": 29191 }, { "epoch": 0.75, "grad_norm": 1.2711186408996582, "learning_rate": 3.1445866840075144e-06, "loss": 0.4982, "step": 29192 }, { "epoch": 0.75, "grad_norm": 1.5552425384521484, "learning_rate": 3.1439823356410115e-06, "loss": 0.4092, "step": 29193 }, { "epoch": 0.75, "grad_norm": 2.8924999237060547, "learning_rate": 3.1433780345218744e-06, "loss": 0.6371, "step": 29194 }, { "epoch": 0.75, "grad_norm": 1.8108289241790771, "learning_rate": 3.1427737806542656e-06, "loss": 0.5652, "step": 29195 }, { "epoch": 0.75, "grad_norm": 2.5019452571868896, "learning_rate": 3.1421695740423543e-06, "loss": 0.5273, "step": 29196 }, { "epoch": 0.75, "grad_norm": 16.76506996154785, "learning_rate": 3.141565414690302e-06, "loss": 0.6345, "step": 29197 }, { "epoch": 0.75, "grad_norm": 2.9564523696899414, "learning_rate": 3.140961302602269e-06, "loss": 0.6383, "step": 29198 }, { "epoch": 0.75, "grad_norm": 1.9720641374588013, "learning_rate": 3.1403572377824245e-06, "loss": 0.6054, "step": 29199 }, { "epoch": 0.75, "grad_norm": 1.5649492740631104, "learning_rate": 3.139753220234928e-06, "loss": 0.516, "step": 29200 }, { "epoch": 0.75, "grad_norm": 1.729875922203064, "learning_rate": 3.1391492499639387e-06, "loss": 0.5272, "step": 29201 }, { "epoch": 0.75, "grad_norm": 1.44156813621521, "learning_rate": 3.1385453269736267e-06, "loss": 0.4685, "step": 29202 }, { "epoch": 0.75, "grad_norm": 1.8027207851409912, "learning_rate": 3.1379414512681484e-06, "loss": 0.4846, "step": 29203 }, { "epoch": 0.75, "grad_norm": 1.1536693572998047, "learning_rate": 3.1373376228516674e-06, "loss": 0.5381, "step": 29204 }, { "epoch": 0.75, "grad_norm": 1.4352951049804688, "learning_rate": 3.13673384172834e-06, "loss": 0.4536, "step": 29205 }, { "epoch": 0.75, "grad_norm": 1.9684802293777466, "learning_rate": 3.1361301079023344e-06, "loss": 0.5775, "step": 29206 }, { "epoch": 0.75, "grad_norm": 4.481031894683838, "learning_rate": 3.135526421377808e-06, "loss": 0.6261, "step": 29207 }, { "epoch": 0.75, "grad_norm": 0.9494262337684631, "learning_rate": 3.134922782158918e-06, "loss": 0.4366, "step": 29208 }, { "epoch": 0.75, "grad_norm": 1.1979045867919922, "learning_rate": 3.134319190249829e-06, "loss": 0.4331, "step": 29209 }, { "epoch": 0.75, "grad_norm": 2.5666701793670654, "learning_rate": 3.1337156456547e-06, "loss": 0.6447, "step": 29210 }, { "epoch": 0.75, "grad_norm": 1.6998445987701416, "learning_rate": 3.1331121483776896e-06, "loss": 0.633, "step": 29211 }, { "epoch": 0.75, "grad_norm": 1.2530772686004639, "learning_rate": 3.132508698422951e-06, "loss": 0.5515, "step": 29212 }, { "epoch": 0.75, "grad_norm": 24.928272247314453, "learning_rate": 3.1319052957946514e-06, "loss": 0.6323, "step": 29213 }, { "epoch": 0.75, "grad_norm": 1.2648283243179321, "learning_rate": 3.131301940496947e-06, "loss": 0.3231, "step": 29214 }, { "epoch": 0.75, "grad_norm": 11.494129180908203, "learning_rate": 3.1306986325339895e-06, "loss": 0.6079, "step": 29215 }, { "epoch": 0.75, "grad_norm": 2.0990207195281982, "learning_rate": 3.1300953719099456e-06, "loss": 0.5204, "step": 29216 }, { "epoch": 0.75, "grad_norm": 2.490532636642456, "learning_rate": 3.1294921586289673e-06, "loss": 0.5481, "step": 29217 }, { "epoch": 0.75, "grad_norm": 1.7528387308120728, "learning_rate": 3.128888992695214e-06, "loss": 0.6244, "step": 29218 }, { "epoch": 0.75, "grad_norm": 1.7000774145126343, "learning_rate": 3.128285874112836e-06, "loss": 0.5306, "step": 29219 }, { "epoch": 0.75, "grad_norm": 1.3236547708511353, "learning_rate": 3.1276828028859984e-06, "loss": 0.5709, "step": 29220 }, { "epoch": 0.75, "grad_norm": 1.298414707183838, "learning_rate": 3.1270797790188534e-06, "loss": 0.4235, "step": 29221 }, { "epoch": 0.75, "grad_norm": 1.5990103483200073, "learning_rate": 3.1264768025155533e-06, "loss": 0.5373, "step": 29222 }, { "epoch": 0.75, "grad_norm": 1.3064560890197754, "learning_rate": 3.1258738733802606e-06, "loss": 0.6859, "step": 29223 }, { "epoch": 0.75, "grad_norm": 2.3403522968292236, "learning_rate": 3.125270991617125e-06, "loss": 0.5438, "step": 29224 }, { "epoch": 0.75, "grad_norm": 1.7329366207122803, "learning_rate": 3.1246681572303006e-06, "loss": 0.5474, "step": 29225 }, { "epoch": 0.75, "grad_norm": 1.1507725715637207, "learning_rate": 3.1240653702239464e-06, "loss": 0.4706, "step": 29226 }, { "epoch": 0.75, "grad_norm": 1.0850434303283691, "learning_rate": 3.123462630602214e-06, "loss": 0.5295, "step": 29227 }, { "epoch": 0.75, "grad_norm": 2.6611506938934326, "learning_rate": 3.122859938369257e-06, "loss": 0.554, "step": 29228 }, { "epoch": 0.75, "grad_norm": 0.9727465510368347, "learning_rate": 3.1222572935292263e-06, "loss": 0.5747, "step": 29229 }, { "epoch": 0.75, "grad_norm": 1.4060274362564087, "learning_rate": 3.1216546960862793e-06, "loss": 0.5264, "step": 29230 }, { "epoch": 0.75, "grad_norm": 1.7813875675201416, "learning_rate": 3.1210521460445665e-06, "loss": 0.5999, "step": 29231 }, { "epoch": 0.75, "grad_norm": 1.3518662452697754, "learning_rate": 3.1204496434082377e-06, "loss": 0.4998, "step": 29232 }, { "epoch": 0.75, "grad_norm": 1.3005034923553467, "learning_rate": 3.119847188181451e-06, "loss": 0.4864, "step": 29233 }, { "epoch": 0.75, "grad_norm": 1.32656729221344, "learning_rate": 3.1192447803683555e-06, "loss": 0.3972, "step": 29234 }, { "epoch": 0.75, "grad_norm": 1.5549852848052979, "learning_rate": 3.1186424199731014e-06, "loss": 0.4906, "step": 29235 }, { "epoch": 0.75, "grad_norm": 1.2974785566329956, "learning_rate": 3.1180401069998377e-06, "loss": 0.5507, "step": 29236 }, { "epoch": 0.75, "grad_norm": 1.5101338624954224, "learning_rate": 3.117437841452721e-06, "loss": 0.6493, "step": 29237 }, { "epoch": 0.75, "grad_norm": 1.2076661586761475, "learning_rate": 3.116835623335898e-06, "loss": 0.5278, "step": 29238 }, { "epoch": 0.75, "grad_norm": 5.932905197143555, "learning_rate": 3.1162334526535165e-06, "loss": 0.6753, "step": 29239 }, { "epoch": 0.75, "grad_norm": 1.7606059312820435, "learning_rate": 3.115631329409733e-06, "loss": 0.5373, "step": 29240 }, { "epoch": 0.75, "grad_norm": 2.0589144229888916, "learning_rate": 3.1150292536086922e-06, "loss": 0.5388, "step": 29241 }, { "epoch": 0.75, "grad_norm": 1.1553895473480225, "learning_rate": 3.1144272252545425e-06, "loss": 0.5553, "step": 29242 }, { "epoch": 0.75, "grad_norm": 1.8261624574661255, "learning_rate": 3.113825244351437e-06, "loss": 0.5532, "step": 29243 }, { "epoch": 0.75, "grad_norm": 2.6084322929382324, "learning_rate": 3.1132233109035213e-06, "loss": 0.6055, "step": 29244 }, { "epoch": 0.75, "grad_norm": 1.9969282150268555, "learning_rate": 3.1126214249149432e-06, "loss": 0.4944, "step": 29245 }, { "epoch": 0.75, "grad_norm": 1.2839888334274292, "learning_rate": 3.1120195863898496e-06, "loss": 0.4314, "step": 29246 }, { "epoch": 0.75, "grad_norm": 1.092357873916626, "learning_rate": 3.111417795332392e-06, "loss": 0.4917, "step": 29247 }, { "epoch": 0.75, "grad_norm": 5.969570636749268, "learning_rate": 3.1108160517467144e-06, "loss": 0.5732, "step": 29248 }, { "epoch": 0.75, "grad_norm": 3.9820716381073, "learning_rate": 3.1102143556369624e-06, "loss": 0.6253, "step": 29249 }, { "epoch": 0.75, "grad_norm": 2.596015453338623, "learning_rate": 3.109612707007287e-06, "loss": 0.5167, "step": 29250 }, { "epoch": 0.75, "grad_norm": 1.4101654291152954, "learning_rate": 3.109011105861831e-06, "loss": 0.4323, "step": 29251 }, { "epoch": 0.75, "grad_norm": 1.632214903831482, "learning_rate": 3.108409552204743e-06, "loss": 0.5111, "step": 29252 }, { "epoch": 0.75, "grad_norm": 1.7895593643188477, "learning_rate": 3.1078080460401615e-06, "loss": 0.6057, "step": 29253 }, { "epoch": 0.75, "grad_norm": 1.6863594055175781, "learning_rate": 3.1072065873722413e-06, "loss": 0.5267, "step": 29254 }, { "epoch": 0.75, "grad_norm": 2.858579158782959, "learning_rate": 3.106605176205123e-06, "loss": 0.4911, "step": 29255 }, { "epoch": 0.75, "grad_norm": 1.1044906377792358, "learning_rate": 3.106003812542947e-06, "loss": 0.5192, "step": 29256 }, { "epoch": 0.75, "grad_norm": 1.0859012603759766, "learning_rate": 3.1054024963898657e-06, "loss": 0.7294, "step": 29257 }, { "epoch": 0.75, "grad_norm": 14.043265342712402, "learning_rate": 3.1048012277500174e-06, "loss": 0.6071, "step": 29258 }, { "epoch": 0.75, "grad_norm": 7.861318111419678, "learning_rate": 3.1042000066275477e-06, "loss": 0.588, "step": 29259 }, { "epoch": 0.75, "grad_norm": 2.939358949661255, "learning_rate": 3.1035988330265965e-06, "loss": 0.5587, "step": 29260 }, { "epoch": 0.75, "grad_norm": 3.3198091983795166, "learning_rate": 3.1029977069513117e-06, "loss": 0.4675, "step": 29261 }, { "epoch": 0.75, "grad_norm": 1.3087022304534912, "learning_rate": 3.1023966284058336e-06, "loss": 0.4657, "step": 29262 }, { "epoch": 0.75, "grad_norm": 3.07389497756958, "learning_rate": 3.1017955973943024e-06, "loss": 0.5835, "step": 29263 }, { "epoch": 0.75, "grad_norm": 1.8680813312530518, "learning_rate": 3.101194613920865e-06, "loss": 0.5638, "step": 29264 }, { "epoch": 0.75, "grad_norm": 1.2571656703948975, "learning_rate": 3.1005936779896583e-06, "loss": 0.5354, "step": 29265 }, { "epoch": 0.75, "grad_norm": 2.0891740322113037, "learning_rate": 3.0999927896048243e-06, "loss": 0.5367, "step": 29266 }, { "epoch": 0.75, "grad_norm": 1.387394666671753, "learning_rate": 3.0993919487705073e-06, "loss": 0.473, "step": 29267 }, { "epoch": 0.75, "grad_norm": 1.1587259769439697, "learning_rate": 3.0987911554908455e-06, "loss": 0.5577, "step": 29268 }, { "epoch": 0.75, "grad_norm": 1.7957886457443237, "learning_rate": 3.098190409769979e-06, "loss": 0.6409, "step": 29269 }, { "epoch": 0.75, "grad_norm": 7.394586563110352, "learning_rate": 3.0975897116120455e-06, "loss": 0.47, "step": 29270 }, { "epoch": 0.75, "grad_norm": 4.90825080871582, "learning_rate": 3.0969890610211904e-06, "loss": 0.4302, "step": 29271 }, { "epoch": 0.75, "grad_norm": 5.540962219238281, "learning_rate": 3.096388458001549e-06, "loss": 0.4085, "step": 29272 }, { "epoch": 0.75, "grad_norm": 0.8832980394363403, "learning_rate": 3.095787902557259e-06, "loss": 0.4587, "step": 29273 }, { "epoch": 0.75, "grad_norm": 1.6725236177444458, "learning_rate": 3.0951873946924637e-06, "loss": 0.5194, "step": 29274 }, { "epoch": 0.75, "grad_norm": 1.6472293138504028, "learning_rate": 3.0945869344112987e-06, "loss": 0.4693, "step": 29275 }, { "epoch": 0.75, "grad_norm": 3.8970389366149902, "learning_rate": 3.093986521717902e-06, "loss": 0.6219, "step": 29276 }, { "epoch": 0.75, "grad_norm": 1.047080636024475, "learning_rate": 3.0933861566164092e-06, "loss": 0.4107, "step": 29277 }, { "epoch": 0.75, "grad_norm": 2.2855563163757324, "learning_rate": 3.092785839110962e-06, "loss": 0.5143, "step": 29278 }, { "epoch": 0.75, "grad_norm": 13.723563194274902, "learning_rate": 3.092185569205696e-06, "loss": 0.5013, "step": 29279 }, { "epoch": 0.75, "grad_norm": 1.3032335042953491, "learning_rate": 3.091585346904743e-06, "loss": 0.5825, "step": 29280 }, { "epoch": 0.75, "grad_norm": 1.1214241981506348, "learning_rate": 3.0909851722122475e-06, "loss": 0.411, "step": 29281 }, { "epoch": 0.75, "grad_norm": 1.2963019609451294, "learning_rate": 3.0903850451323412e-06, "loss": 0.5029, "step": 29282 }, { "epoch": 0.75, "grad_norm": 2.0251548290252686, "learning_rate": 3.089784965669157e-06, "loss": 0.7536, "step": 29283 }, { "epoch": 0.75, "grad_norm": 0.9378504157066345, "learning_rate": 3.0891849338268366e-06, "loss": 0.5946, "step": 29284 }, { "epoch": 0.75, "grad_norm": 5.579495906829834, "learning_rate": 3.088584949609512e-06, "loss": 0.5812, "step": 29285 }, { "epoch": 0.75, "grad_norm": 1.775995135307312, "learning_rate": 3.0879850130213174e-06, "loss": 0.5848, "step": 29286 }, { "epoch": 0.75, "grad_norm": 1.1991297006607056, "learning_rate": 3.087385124066384e-06, "loss": 0.5059, "step": 29287 }, { "epoch": 0.75, "grad_norm": 1.476044774055481, "learning_rate": 3.086785282748854e-06, "loss": 0.4999, "step": 29288 }, { "epoch": 0.75, "grad_norm": 3.915773630142212, "learning_rate": 3.086185489072856e-06, "loss": 0.6527, "step": 29289 }, { "epoch": 0.75, "grad_norm": 1.843889594078064, "learning_rate": 3.0855857430425196e-06, "loss": 0.4482, "step": 29290 }, { "epoch": 0.75, "grad_norm": 1.5357036590576172, "learning_rate": 3.084986044661986e-06, "loss": 0.4769, "step": 29291 }, { "epoch": 0.75, "grad_norm": 5.415892124176025, "learning_rate": 3.0843863939353846e-06, "loss": 0.6046, "step": 29292 }, { "epoch": 0.75, "grad_norm": 1.196717381477356, "learning_rate": 3.0837867908668473e-06, "loss": 0.3913, "step": 29293 }, { "epoch": 0.75, "grad_norm": 0.9194175601005554, "learning_rate": 3.083187235460503e-06, "loss": 0.533, "step": 29294 }, { "epoch": 0.75, "grad_norm": 1.470217227935791, "learning_rate": 3.0825877277204896e-06, "loss": 0.4845, "step": 29295 }, { "epoch": 0.75, "grad_norm": 2.2087173461914062, "learning_rate": 3.081988267650936e-06, "loss": 0.4726, "step": 29296 }, { "epoch": 0.75, "grad_norm": 1.2885546684265137, "learning_rate": 3.081388855255969e-06, "loss": 0.4489, "step": 29297 }, { "epoch": 0.75, "grad_norm": 11.530447006225586, "learning_rate": 3.0807894905397273e-06, "loss": 0.5036, "step": 29298 }, { "epoch": 0.75, "grad_norm": 3.7721128463745117, "learning_rate": 3.0801901735063377e-06, "loss": 0.5833, "step": 29299 }, { "epoch": 0.75, "grad_norm": 1.980725646018982, "learning_rate": 3.0795909041599293e-06, "loss": 0.6713, "step": 29300 }, { "epoch": 0.75, "grad_norm": 11.609990119934082, "learning_rate": 3.0789916825046294e-06, "loss": 0.5785, "step": 29301 }, { "epoch": 0.75, "grad_norm": 5.969959259033203, "learning_rate": 3.078392508544573e-06, "loss": 0.6661, "step": 29302 }, { "epoch": 0.75, "grad_norm": 2.363002061843872, "learning_rate": 3.077793382283888e-06, "loss": 0.6691, "step": 29303 }, { "epoch": 0.75, "grad_norm": 5.050705909729004, "learning_rate": 3.077194303726698e-06, "loss": 0.4895, "step": 29304 }, { "epoch": 0.75, "grad_norm": 1.615815281867981, "learning_rate": 3.0765952728771396e-06, "loss": 0.4607, "step": 29305 }, { "epoch": 0.75, "grad_norm": 1.4079385995864868, "learning_rate": 3.0759962897393368e-06, "loss": 0.6187, "step": 29306 }, { "epoch": 0.75, "grad_norm": 1.657817006111145, "learning_rate": 3.0753973543174143e-06, "loss": 0.46, "step": 29307 }, { "epoch": 0.75, "grad_norm": 1.4768486022949219, "learning_rate": 3.074798466615506e-06, "loss": 0.4586, "step": 29308 }, { "epoch": 0.75, "grad_norm": 1.4606269598007202, "learning_rate": 3.0741996266377348e-06, "loss": 0.5184, "step": 29309 }, { "epoch": 0.75, "grad_norm": 2.1646013259887695, "learning_rate": 3.0736008343882295e-06, "loss": 0.4422, "step": 29310 }, { "epoch": 0.75, "grad_norm": 1.874374270439148, "learning_rate": 3.0730020898711123e-06, "loss": 0.4518, "step": 29311 }, { "epoch": 0.75, "grad_norm": 1.3179444074630737, "learning_rate": 3.072403393090516e-06, "loss": 0.3822, "step": 29312 }, { "epoch": 0.75, "grad_norm": 1.2238926887512207, "learning_rate": 3.0718047440505626e-06, "loss": 0.5073, "step": 29313 }, { "epoch": 0.75, "grad_norm": 1.248932123184204, "learning_rate": 3.0712061427553763e-06, "loss": 0.4436, "step": 29314 }, { "epoch": 0.75, "grad_norm": 1.2654505968093872, "learning_rate": 3.070607589209086e-06, "loss": 0.5187, "step": 29315 }, { "epoch": 0.75, "grad_norm": 1.3362088203430176, "learning_rate": 3.070009083415816e-06, "loss": 0.5894, "step": 29316 }, { "epoch": 0.75, "grad_norm": 1.8397550582885742, "learning_rate": 3.0694106253796886e-06, "loss": 0.5657, "step": 29317 }, { "epoch": 0.75, "grad_norm": 4.031727313995361, "learning_rate": 3.068812215104826e-06, "loss": 0.4744, "step": 29318 }, { "epoch": 0.75, "grad_norm": 1.182910442352295, "learning_rate": 3.0682138525953574e-06, "loss": 0.459, "step": 29319 }, { "epoch": 0.75, "grad_norm": 2.296008825302124, "learning_rate": 3.067615537855405e-06, "loss": 0.5669, "step": 29320 }, { "epoch": 0.75, "grad_norm": 1.7423980236053467, "learning_rate": 3.067017270889088e-06, "loss": 0.5649, "step": 29321 }, { "epoch": 0.75, "grad_norm": 1.1773252487182617, "learning_rate": 3.0664190517005345e-06, "loss": 0.5595, "step": 29322 }, { "epoch": 0.75, "grad_norm": 1.6863154172897339, "learning_rate": 3.065820880293865e-06, "loss": 0.6099, "step": 29323 }, { "epoch": 0.75, "grad_norm": 2.109161615371704, "learning_rate": 3.0652227566731983e-06, "loss": 0.5413, "step": 29324 }, { "epoch": 0.75, "grad_norm": 0.9422024488449097, "learning_rate": 3.0646246808426627e-06, "loss": 0.5034, "step": 29325 }, { "epoch": 0.75, "grad_norm": 2.1549527645111084, "learning_rate": 3.064026652806378e-06, "loss": 0.6433, "step": 29326 }, { "epoch": 0.75, "grad_norm": 4.05962610244751, "learning_rate": 3.0634286725684624e-06, "loss": 0.445, "step": 29327 }, { "epoch": 0.75, "grad_norm": 5.490728378295898, "learning_rate": 3.062830740133035e-06, "loss": 0.4943, "step": 29328 }, { "epoch": 0.75, "grad_norm": 2.2222650051116943, "learning_rate": 3.0622328555042234e-06, "loss": 0.5488, "step": 29329 }, { "epoch": 0.75, "grad_norm": 1.7084382772445679, "learning_rate": 3.0616350186861456e-06, "loss": 0.4734, "step": 29330 }, { "epoch": 0.75, "grad_norm": 2.778953790664673, "learning_rate": 3.061037229682915e-06, "loss": 0.6539, "step": 29331 }, { "epoch": 0.75, "grad_norm": 1.0448822975158691, "learning_rate": 3.0604394884986598e-06, "loss": 0.5564, "step": 29332 }, { "epoch": 0.75, "grad_norm": 0.945917010307312, "learning_rate": 3.059841795137497e-06, "loss": 0.5041, "step": 29333 }, { "epoch": 0.75, "grad_norm": 0.978500485420227, "learning_rate": 3.059244149603543e-06, "loss": 0.5208, "step": 29334 }, { "epoch": 0.75, "grad_norm": 1.655949592590332, "learning_rate": 3.0586465519009155e-06, "loss": 0.5616, "step": 29335 }, { "epoch": 0.75, "grad_norm": 3.7572805881500244, "learning_rate": 3.0580490020337385e-06, "loss": 0.5136, "step": 29336 }, { "epoch": 0.75, "grad_norm": 1.4193742275238037, "learning_rate": 3.0574515000061255e-06, "loss": 0.4147, "step": 29337 }, { "epoch": 0.75, "grad_norm": 1.9004360437393188, "learning_rate": 3.056854045822192e-06, "loss": 0.5684, "step": 29338 }, { "epoch": 0.75, "grad_norm": 5.279716968536377, "learning_rate": 3.0562566394860616e-06, "loss": 0.6113, "step": 29339 }, { "epoch": 0.75, "grad_norm": 2.8979008197784424, "learning_rate": 3.055659281001847e-06, "loss": 0.452, "step": 29340 }, { "epoch": 0.75, "grad_norm": 1.5140057802200317, "learning_rate": 3.0550619703736674e-06, "loss": 0.4802, "step": 29341 }, { "epoch": 0.75, "grad_norm": 2.922842025756836, "learning_rate": 3.0544647076056333e-06, "loss": 0.2946, "step": 29342 }, { "epoch": 0.75, "grad_norm": 1.2739759683609009, "learning_rate": 3.0538674927018684e-06, "loss": 0.418, "step": 29343 }, { "epoch": 0.75, "grad_norm": 1.1526052951812744, "learning_rate": 3.0532703256664844e-06, "loss": 0.5214, "step": 29344 }, { "epoch": 0.75, "grad_norm": 5.02730655670166, "learning_rate": 3.0526732065035937e-06, "loss": 0.5141, "step": 29345 }, { "epoch": 0.75, "grad_norm": 1.7560515403747559, "learning_rate": 3.0520761352173176e-06, "loss": 0.5451, "step": 29346 }, { "epoch": 0.75, "grad_norm": 1.351516604423523, "learning_rate": 3.051479111811768e-06, "loss": 0.3373, "step": 29347 }, { "epoch": 0.75, "grad_norm": 2.00278902053833, "learning_rate": 3.0508821362910556e-06, "loss": 0.4293, "step": 29348 }, { "epoch": 0.75, "grad_norm": 1.4043787717819214, "learning_rate": 3.0502852086593016e-06, "loss": 0.5854, "step": 29349 }, { "epoch": 0.75, "grad_norm": 1.112410068511963, "learning_rate": 3.049688328920615e-06, "loss": 0.5321, "step": 29350 }, { "epoch": 0.75, "grad_norm": 1.188939094543457, "learning_rate": 3.0490914970791096e-06, "loss": 0.5145, "step": 29351 }, { "epoch": 0.75, "grad_norm": 2.8358590602874756, "learning_rate": 3.0484947131388964e-06, "loss": 0.6883, "step": 29352 }, { "epoch": 0.75, "grad_norm": 1.441609501838684, "learning_rate": 3.047897977104093e-06, "loss": 0.5365, "step": 29353 }, { "epoch": 0.75, "grad_norm": 2.403905153274536, "learning_rate": 3.047301288978808e-06, "loss": 0.646, "step": 29354 }, { "epoch": 0.75, "grad_norm": 1.9832179546356201, "learning_rate": 3.0467046487671526e-06, "loss": 0.6349, "step": 29355 }, { "epoch": 0.75, "grad_norm": 6.347543716430664, "learning_rate": 3.0461080564732428e-06, "loss": 0.7556, "step": 29356 }, { "epoch": 0.75, "grad_norm": 1.2539143562316895, "learning_rate": 3.0455115121011867e-06, "loss": 0.3729, "step": 29357 }, { "epoch": 0.75, "grad_norm": 1.1845145225524902, "learning_rate": 3.0449150156550968e-06, "loss": 0.5816, "step": 29358 }, { "epoch": 0.75, "grad_norm": 8.279878616333008, "learning_rate": 3.0443185671390798e-06, "loss": 0.6102, "step": 29359 }, { "epoch": 0.75, "grad_norm": 1.6828280687332153, "learning_rate": 3.0437221665572523e-06, "loss": 0.5144, "step": 29360 }, { "epoch": 0.75, "grad_norm": 2.467841148376465, "learning_rate": 3.043125813913722e-06, "loss": 0.6392, "step": 29361 }, { "epoch": 0.75, "grad_norm": 1.441520094871521, "learning_rate": 3.042529509212594e-06, "loss": 0.5778, "step": 29362 }, { "epoch": 0.75, "grad_norm": 1.1868585348129272, "learning_rate": 3.0419332524579848e-06, "loss": 0.5229, "step": 29363 }, { "epoch": 0.75, "grad_norm": 2.915081739425659, "learning_rate": 3.0413370436540003e-06, "loss": 0.4695, "step": 29364 }, { "epoch": 0.75, "grad_norm": 2.180123805999756, "learning_rate": 3.040740882804748e-06, "loss": 0.6438, "step": 29365 }, { "epoch": 0.75, "grad_norm": 1.814727783203125, "learning_rate": 3.040144769914335e-06, "loss": 0.5057, "step": 29366 }, { "epoch": 0.75, "grad_norm": 1.3844947814941406, "learning_rate": 3.0395487049868755e-06, "loss": 0.4178, "step": 29367 }, { "epoch": 0.75, "grad_norm": 1.915464162826538, "learning_rate": 3.038952688026472e-06, "loss": 0.4407, "step": 29368 }, { "epoch": 0.75, "grad_norm": 9.9412202835083, "learning_rate": 3.038356719037231e-06, "loss": 0.6195, "step": 29369 }, { "epoch": 0.75, "grad_norm": 1.4271608591079712, "learning_rate": 3.037760798023265e-06, "loss": 0.5318, "step": 29370 }, { "epoch": 0.75, "grad_norm": 1.292663335800171, "learning_rate": 3.037164924988677e-06, "loss": 0.5558, "step": 29371 }, { "epoch": 0.75, "grad_norm": 2.2046077251434326, "learning_rate": 3.0365690999375707e-06, "loss": 0.581, "step": 29372 }, { "epoch": 0.75, "grad_norm": 1.5738728046417236, "learning_rate": 3.0359733228740586e-06, "loss": 0.5473, "step": 29373 }, { "epoch": 0.75, "grad_norm": 1.600559949874878, "learning_rate": 3.0353775938022424e-06, "loss": 0.4651, "step": 29374 }, { "epoch": 0.75, "grad_norm": 0.9818308353424072, "learning_rate": 3.034781912726229e-06, "loss": 0.41, "step": 29375 }, { "epoch": 0.75, "grad_norm": 0.9588269591331482, "learning_rate": 3.03418627965012e-06, "loss": 0.4819, "step": 29376 }, { "epoch": 0.75, "grad_norm": 1.1826252937316895, "learning_rate": 3.0335906945780246e-06, "loss": 0.5701, "step": 29377 }, { "epoch": 0.75, "grad_norm": 3.132432460784912, "learning_rate": 3.032995157514046e-06, "loss": 0.6708, "step": 29378 }, { "epoch": 0.75, "grad_norm": 3.6063802242279053, "learning_rate": 3.0323996684622848e-06, "loss": 0.4136, "step": 29379 }, { "epoch": 0.75, "grad_norm": 1.551666498184204, "learning_rate": 3.031804227426851e-06, "loss": 0.3691, "step": 29380 }, { "epoch": 0.75, "grad_norm": 1.0101677179336548, "learning_rate": 3.031208834411844e-06, "loss": 0.5082, "step": 29381 }, { "epoch": 0.75, "grad_norm": 5.931761264801025, "learning_rate": 3.030613489421368e-06, "loss": 0.6491, "step": 29382 }, { "epoch": 0.75, "grad_norm": 2.2332983016967773, "learning_rate": 3.030018192459522e-06, "loss": 0.5438, "step": 29383 }, { "epoch": 0.75, "grad_norm": 0.9905117750167847, "learning_rate": 3.0294229435304146e-06, "loss": 0.3391, "step": 29384 }, { "epoch": 0.75, "grad_norm": 1.6442588567733765, "learning_rate": 3.028827742638144e-06, "loss": 0.5733, "step": 29385 }, { "epoch": 0.75, "grad_norm": 1.3824808597564697, "learning_rate": 3.0282325897868115e-06, "loss": 0.6234, "step": 29386 }, { "epoch": 0.75, "grad_norm": 5.072711944580078, "learning_rate": 3.0276374849805224e-06, "loss": 0.7358, "step": 29387 }, { "epoch": 0.75, "grad_norm": 1.5279576778411865, "learning_rate": 3.0270424282233746e-06, "loss": 0.5245, "step": 29388 }, { "epoch": 0.75, "grad_norm": 1.281970739364624, "learning_rate": 3.026447419519467e-06, "loss": 0.4734, "step": 29389 }, { "epoch": 0.75, "grad_norm": 1.396821141242981, "learning_rate": 3.0258524588729054e-06, "loss": 0.5813, "step": 29390 }, { "epoch": 0.75, "grad_norm": 3.522646903991699, "learning_rate": 3.0252575462877875e-06, "loss": 0.5228, "step": 29391 }, { "epoch": 0.75, "grad_norm": 2.8064939975738525, "learning_rate": 3.0246626817682124e-06, "loss": 0.5995, "step": 29392 }, { "epoch": 0.75, "grad_norm": 1.7143720388412476, "learning_rate": 3.0240678653182766e-06, "loss": 0.4999, "step": 29393 }, { "epoch": 0.75, "grad_norm": 0.9593890905380249, "learning_rate": 3.0234730969420855e-06, "loss": 0.4619, "step": 29394 }, { "epoch": 0.75, "grad_norm": 1.9551531076431274, "learning_rate": 3.022878376643735e-06, "loss": 0.6873, "step": 29395 }, { "epoch": 0.75, "grad_norm": 2.1201796531677246, "learning_rate": 3.02228370442732e-06, "loss": 0.6098, "step": 29396 }, { "epoch": 0.75, "grad_norm": 1.4324750900268555, "learning_rate": 3.0216890802969443e-06, "loss": 0.32, "step": 29397 }, { "epoch": 0.75, "grad_norm": 17.01156234741211, "learning_rate": 3.021094504256704e-06, "loss": 0.5289, "step": 29398 }, { "epoch": 0.75, "grad_norm": 10.581886291503906, "learning_rate": 3.0204999763106955e-06, "loss": 0.5893, "step": 29399 }, { "epoch": 0.75, "grad_norm": 1.2673816680908203, "learning_rate": 3.0199054964630137e-06, "loss": 0.5627, "step": 29400 }, { "epoch": 0.75, "grad_norm": 1.6502145528793335, "learning_rate": 3.0193110647177604e-06, "loss": 0.574, "step": 29401 }, { "epoch": 0.75, "grad_norm": 14.932732582092285, "learning_rate": 3.01871668107903e-06, "loss": 0.4682, "step": 29402 }, { "epoch": 0.75, "grad_norm": 1.180661678314209, "learning_rate": 3.0181223455509145e-06, "loss": 0.4311, "step": 29403 }, { "epoch": 0.75, "grad_norm": 1.1759663820266724, "learning_rate": 3.0175280581375165e-06, "loss": 0.5323, "step": 29404 }, { "epoch": 0.75, "grad_norm": 1.2030730247497559, "learning_rate": 3.0169338188429286e-06, "loss": 0.4878, "step": 29405 }, { "epoch": 0.75, "grad_norm": 1.0161749124526978, "learning_rate": 3.0163396276712453e-06, "loss": 0.4991, "step": 29406 }, { "epoch": 0.75, "grad_norm": 1.6231826543807983, "learning_rate": 3.0157454846265588e-06, "loss": 0.58, "step": 29407 }, { "epoch": 0.75, "grad_norm": 4.113776206970215, "learning_rate": 3.01515138971297e-06, "loss": 0.4824, "step": 29408 }, { "epoch": 0.75, "grad_norm": 1.2122761011123657, "learning_rate": 3.01455734293457e-06, "loss": 0.7454, "step": 29409 }, { "epoch": 0.75, "grad_norm": 2.326988458633423, "learning_rate": 3.0139633442954475e-06, "loss": 0.5725, "step": 29410 }, { "epoch": 0.75, "grad_norm": 1.275494933128357, "learning_rate": 3.013369393799704e-06, "loss": 0.4155, "step": 29411 }, { "epoch": 0.75, "grad_norm": 2.3578991889953613, "learning_rate": 3.0127754914514295e-06, "loss": 0.5702, "step": 29412 }, { "epoch": 0.75, "grad_norm": 4.423821449279785, "learning_rate": 3.012181637254714e-06, "loss": 0.5527, "step": 29413 }, { "epoch": 0.75, "grad_norm": 1.3201180696487427, "learning_rate": 3.011587831213655e-06, "loss": 0.5965, "step": 29414 }, { "epoch": 0.75, "grad_norm": 6.858901023864746, "learning_rate": 3.010994073332342e-06, "loss": 0.4722, "step": 29415 }, { "epoch": 0.75, "grad_norm": 1.3886317014694214, "learning_rate": 3.010400363614867e-06, "loss": 0.3977, "step": 29416 }, { "epoch": 0.75, "grad_norm": 1.458389401435852, "learning_rate": 3.009806702065319e-06, "loss": 0.4625, "step": 29417 }, { "epoch": 0.75, "grad_norm": 1.12062406539917, "learning_rate": 3.0092130886877937e-06, "loss": 0.5666, "step": 29418 }, { "epoch": 0.75, "grad_norm": 1.9286646842956543, "learning_rate": 3.0086195234863804e-06, "loss": 0.5095, "step": 29419 }, { "epoch": 0.75, "grad_norm": 4.493889808654785, "learning_rate": 3.0080260064651646e-06, "loss": 0.673, "step": 29420 }, { "epoch": 0.75, "grad_norm": 2.121943235397339, "learning_rate": 3.0074325376282456e-06, "loss": 0.4902, "step": 29421 }, { "epoch": 0.75, "grad_norm": 2.7846176624298096, "learning_rate": 3.0068391169797073e-06, "loss": 0.4802, "step": 29422 }, { "epoch": 0.75, "grad_norm": 1.8263014554977417, "learning_rate": 3.0062457445236405e-06, "loss": 0.5676, "step": 29423 }, { "epoch": 0.75, "grad_norm": 1.1668912172317505, "learning_rate": 3.005652420264131e-06, "loss": 0.483, "step": 29424 }, { "epoch": 0.75, "grad_norm": 1.2959091663360596, "learning_rate": 3.0050591442052736e-06, "loss": 0.3253, "step": 29425 }, { "epoch": 0.75, "grad_norm": 3.5458548069000244, "learning_rate": 3.0044659163511547e-06, "loss": 0.7333, "step": 29426 }, { "epoch": 0.75, "grad_norm": 1.127648949623108, "learning_rate": 3.0038727367058575e-06, "loss": 0.3226, "step": 29427 }, { "epoch": 0.75, "grad_norm": 1.698155164718628, "learning_rate": 3.003279605273478e-06, "loss": 0.5627, "step": 29428 }, { "epoch": 0.75, "grad_norm": 1.0032241344451904, "learning_rate": 3.0026865220580994e-06, "loss": 0.3845, "step": 29429 }, { "epoch": 0.75, "grad_norm": 1.6017649173736572, "learning_rate": 3.0020934870638064e-06, "loss": 0.6006, "step": 29430 }, { "epoch": 0.75, "grad_norm": 2.709754705429077, "learning_rate": 3.0015005002946905e-06, "loss": 0.6775, "step": 29431 }, { "epoch": 0.75, "grad_norm": 1.1603846549987793, "learning_rate": 3.000907561754838e-06, "loss": 0.4732, "step": 29432 }, { "epoch": 0.75, "grad_norm": 2.9210994243621826, "learning_rate": 3.0003146714483323e-06, "loss": 0.5459, "step": 29433 }, { "epoch": 0.75, "grad_norm": 6.456432819366455, "learning_rate": 2.9997218293792596e-06, "loss": 0.6562, "step": 29434 }, { "epoch": 0.75, "grad_norm": 1.1692088842391968, "learning_rate": 2.9991290355517066e-06, "loss": 0.4795, "step": 29435 }, { "epoch": 0.75, "grad_norm": 1.7144559621810913, "learning_rate": 2.998536289969759e-06, "loss": 0.44, "step": 29436 }, { "epoch": 0.75, "grad_norm": 4.7772111892700195, "learning_rate": 2.9979435926374955e-06, "loss": 0.5041, "step": 29437 }, { "epoch": 0.75, "grad_norm": 1.5548399686813354, "learning_rate": 2.99735094355901e-06, "loss": 0.4444, "step": 29438 }, { "epoch": 0.75, "grad_norm": 1.4299120903015137, "learning_rate": 2.996758342738383e-06, "loss": 0.5081, "step": 29439 }, { "epoch": 0.75, "grad_norm": 1.8560607433319092, "learning_rate": 2.9961657901796925e-06, "loss": 0.4998, "step": 29440 }, { "epoch": 0.75, "grad_norm": 1.0149286985397339, "learning_rate": 2.9955732858870325e-06, "loss": 0.5391, "step": 29441 }, { "epoch": 0.75, "grad_norm": 3.8383102416992188, "learning_rate": 2.994980829864479e-06, "loss": 0.4848, "step": 29442 }, { "epoch": 0.75, "grad_norm": 1.049866795539856, "learning_rate": 2.9943884221161145e-06, "loss": 0.6159, "step": 29443 }, { "epoch": 0.75, "grad_norm": 12.9933443069458, "learning_rate": 2.993796062646026e-06, "loss": 0.553, "step": 29444 }, { "epoch": 0.75, "grad_norm": 1.0671552419662476, "learning_rate": 2.9932037514582933e-06, "loss": 0.3789, "step": 29445 }, { "epoch": 0.75, "grad_norm": 1.3306059837341309, "learning_rate": 2.992611488556999e-06, "loss": 0.4895, "step": 29446 }, { "epoch": 0.75, "grad_norm": 0.9231504797935486, "learning_rate": 2.9920192739462208e-06, "loss": 0.3688, "step": 29447 }, { "epoch": 0.75, "grad_norm": 1.0019384622573853, "learning_rate": 2.991427107630045e-06, "loss": 0.4739, "step": 29448 }, { "epoch": 0.75, "grad_norm": 3.029435157775879, "learning_rate": 2.990834989612551e-06, "loss": 0.5764, "step": 29449 }, { "epoch": 0.75, "grad_norm": 0.8930914402008057, "learning_rate": 2.9902429198978157e-06, "loss": 0.4361, "step": 29450 }, { "epoch": 0.75, "grad_norm": 4.489241600036621, "learning_rate": 2.9896508984899255e-06, "loss": 0.5439, "step": 29451 }, { "epoch": 0.75, "grad_norm": 1.084303617477417, "learning_rate": 2.989058925392957e-06, "loss": 0.3438, "step": 29452 }, { "epoch": 0.75, "grad_norm": 2.3828742504119873, "learning_rate": 2.9884670006109894e-06, "loss": 0.4564, "step": 29453 }, { "epoch": 0.75, "grad_norm": 3.1319358348846436, "learning_rate": 2.9878751241480984e-06, "loss": 0.4459, "step": 29454 }, { "epoch": 0.75, "grad_norm": 2.037770986557007, "learning_rate": 2.987283296008371e-06, "loss": 0.5679, "step": 29455 }, { "epoch": 0.75, "grad_norm": 1.25228750705719, "learning_rate": 2.98669151619588e-06, "loss": 0.4567, "step": 29456 }, { "epoch": 0.75, "grad_norm": 1.3085578680038452, "learning_rate": 2.986099784714702e-06, "loss": 0.5218, "step": 29457 }, { "epoch": 0.76, "grad_norm": 1.325293779373169, "learning_rate": 2.9855081015689212e-06, "loss": 0.5186, "step": 29458 }, { "epoch": 0.76, "grad_norm": 1.2598768472671509, "learning_rate": 2.984916466762612e-06, "loss": 0.519, "step": 29459 }, { "epoch": 0.76, "grad_norm": 1.6896251440048218, "learning_rate": 2.984324880299848e-06, "loss": 0.5306, "step": 29460 }, { "epoch": 0.76, "grad_norm": 6.690264701843262, "learning_rate": 2.9837333421847124e-06, "loss": 0.6037, "step": 29461 }, { "epoch": 0.76, "grad_norm": 1.6434606313705444, "learning_rate": 2.9831418524212774e-06, "loss": 0.4304, "step": 29462 }, { "epoch": 0.76, "grad_norm": 1.3714207410812378, "learning_rate": 2.982550411013622e-06, "loss": 0.5927, "step": 29463 }, { "epoch": 0.76, "grad_norm": 2.4016950130462646, "learning_rate": 2.9819590179658165e-06, "loss": 0.5658, "step": 29464 }, { "epoch": 0.76, "grad_norm": 4.106070041656494, "learning_rate": 2.981367673281943e-06, "loss": 0.6423, "step": 29465 }, { "epoch": 0.76, "grad_norm": 10.013832092285156, "learning_rate": 2.9807763769660747e-06, "loss": 0.8179, "step": 29466 }, { "epoch": 0.76, "grad_norm": 7.338293075561523, "learning_rate": 2.9801851290222816e-06, "loss": 0.5621, "step": 29467 }, { "epoch": 0.76, "grad_norm": 1.2323143482208252, "learning_rate": 2.979593929454646e-06, "loss": 0.5253, "step": 29468 }, { "epoch": 0.76, "grad_norm": 1.6830593347549438, "learning_rate": 2.9790027782672384e-06, "loss": 0.5456, "step": 29469 }, { "epoch": 0.76, "grad_norm": 1.529060959815979, "learning_rate": 2.9784116754641324e-06, "loss": 0.5761, "step": 29470 }, { "epoch": 0.76, "grad_norm": 0.8809759020805359, "learning_rate": 2.977820621049399e-06, "loss": 0.403, "step": 29471 }, { "epoch": 0.76, "grad_norm": 1.7645890712738037, "learning_rate": 2.9772296150271174e-06, "loss": 0.3294, "step": 29472 }, { "epoch": 0.76, "grad_norm": 1.0239166021347046, "learning_rate": 2.9766386574013563e-06, "loss": 0.4921, "step": 29473 }, { "epoch": 0.76, "grad_norm": 16.30030059814453, "learning_rate": 2.976047748176186e-06, "loss": 0.5466, "step": 29474 }, { "epoch": 0.76, "grad_norm": 1.1215187311172485, "learning_rate": 2.9754568873556866e-06, "loss": 0.5416, "step": 29475 }, { "epoch": 0.76, "grad_norm": 2.926849365234375, "learning_rate": 2.974866074943923e-06, "loss": 0.6964, "step": 29476 }, { "epoch": 0.76, "grad_norm": 9.134939193725586, "learning_rate": 2.9742753109449695e-06, "loss": 0.4595, "step": 29477 }, { "epoch": 0.76, "grad_norm": 1.821851372718811, "learning_rate": 2.9736845953628945e-06, "loss": 0.2676, "step": 29478 }, { "epoch": 0.76, "grad_norm": 1.4711523056030273, "learning_rate": 2.9730939282017725e-06, "loss": 0.5865, "step": 29479 }, { "epoch": 0.76, "grad_norm": 1.412117600440979, "learning_rate": 2.972503309465673e-06, "loss": 0.6038, "step": 29480 }, { "epoch": 0.76, "grad_norm": 1.8585807085037231, "learning_rate": 2.971912739158662e-06, "loss": 0.5978, "step": 29481 }, { "epoch": 0.76, "grad_norm": 1.3028310537338257, "learning_rate": 2.971322217284817e-06, "loss": 0.4825, "step": 29482 }, { "epoch": 0.76, "grad_norm": 2.113668918609619, "learning_rate": 2.970731743848203e-06, "loss": 0.5918, "step": 29483 }, { "epoch": 0.76, "grad_norm": 1.926222324371338, "learning_rate": 2.970141318852886e-06, "loss": 0.5089, "step": 29484 }, { "epoch": 0.76, "grad_norm": 1.5705674886703491, "learning_rate": 2.9695509423029422e-06, "loss": 0.3681, "step": 29485 }, { "epoch": 0.76, "grad_norm": 1.7590621709823608, "learning_rate": 2.968960614202436e-06, "loss": 0.5858, "step": 29486 }, { "epoch": 0.76, "grad_norm": 0.9779512286186218, "learning_rate": 2.968370334555436e-06, "loss": 0.5165, "step": 29487 }, { "epoch": 0.76, "grad_norm": 1.3728450536727905, "learning_rate": 2.967780103366007e-06, "loss": 0.5484, "step": 29488 }, { "epoch": 0.76, "grad_norm": 2.5947425365448, "learning_rate": 2.967189920638223e-06, "loss": 0.6918, "step": 29489 }, { "epoch": 0.76, "grad_norm": 1.464884877204895, "learning_rate": 2.9665997863761465e-06, "loss": 0.4172, "step": 29490 }, { "epoch": 0.76, "grad_norm": 1.0528982877731323, "learning_rate": 2.966009700583843e-06, "loss": 0.5337, "step": 29491 }, { "epoch": 0.76, "grad_norm": 3.2415456771850586, "learning_rate": 2.965419663265384e-06, "loss": 0.5586, "step": 29492 }, { "epoch": 0.76, "grad_norm": 2.236287832260132, "learning_rate": 2.964829674424834e-06, "loss": 0.438, "step": 29493 }, { "epoch": 0.76, "grad_norm": 7.817807197570801, "learning_rate": 2.9642397340662577e-06, "loss": 0.624, "step": 29494 }, { "epoch": 0.76, "grad_norm": 1.64248788356781, "learning_rate": 2.9636498421937178e-06, "loss": 0.5146, "step": 29495 }, { "epoch": 0.76, "grad_norm": 1.7403711080551147, "learning_rate": 2.963059998811285e-06, "loss": 0.6506, "step": 29496 }, { "epoch": 0.76, "grad_norm": 5.77842903137207, "learning_rate": 2.9624702039230224e-06, "loss": 0.7405, "step": 29497 }, { "epoch": 0.76, "grad_norm": 4.749969959259033, "learning_rate": 2.9618804575329905e-06, "loss": 0.3752, "step": 29498 }, { "epoch": 0.76, "grad_norm": 1.527521014213562, "learning_rate": 2.961290759645259e-06, "loss": 0.6078, "step": 29499 }, { "epoch": 0.76, "grad_norm": 1.7419487237930298, "learning_rate": 2.9607011102638905e-06, "loss": 0.4506, "step": 29500 }, { "epoch": 0.76, "grad_norm": 1.7279068231582642, "learning_rate": 2.9601115093929466e-06, "loss": 0.5908, "step": 29501 }, { "epoch": 0.76, "grad_norm": 1.500699520111084, "learning_rate": 2.9595219570364886e-06, "loss": 0.5404, "step": 29502 }, { "epoch": 0.76, "grad_norm": 4.403014659881592, "learning_rate": 2.9589324531985852e-06, "loss": 0.7051, "step": 29503 }, { "epoch": 0.76, "grad_norm": 1.364414930343628, "learning_rate": 2.9583429978832954e-06, "loss": 0.5256, "step": 29504 }, { "epoch": 0.76, "grad_norm": 1.9986311197280884, "learning_rate": 2.9577535910946786e-06, "loss": 0.5451, "step": 29505 }, { "epoch": 0.76, "grad_norm": 1.533293604850769, "learning_rate": 2.9571642328368034e-06, "loss": 0.5155, "step": 29506 }, { "epoch": 0.76, "grad_norm": 1.4436498880386353, "learning_rate": 2.9565749231137277e-06, "loss": 0.4686, "step": 29507 }, { "epoch": 0.76, "grad_norm": 3.766799211502075, "learning_rate": 2.955985661929508e-06, "loss": 0.4859, "step": 29508 }, { "epoch": 0.76, "grad_norm": 2.4000298976898193, "learning_rate": 2.955396449288214e-06, "loss": 0.694, "step": 29509 }, { "epoch": 0.76, "grad_norm": 5.274509429931641, "learning_rate": 2.9548072851939012e-06, "loss": 0.4713, "step": 29510 }, { "epoch": 0.76, "grad_norm": 1.2744845151901245, "learning_rate": 2.95421816965063e-06, "loss": 0.5858, "step": 29511 }, { "epoch": 0.76, "grad_norm": 1.7309975624084473, "learning_rate": 2.9536291026624584e-06, "loss": 0.5096, "step": 29512 }, { "epoch": 0.76, "grad_norm": 1.0556776523590088, "learning_rate": 2.9530400842334504e-06, "loss": 0.5636, "step": 29513 }, { "epoch": 0.76, "grad_norm": 1.5038120746612549, "learning_rate": 2.9524511143676636e-06, "loss": 0.5753, "step": 29514 }, { "epoch": 0.76, "grad_norm": 4.2258076667785645, "learning_rate": 2.9518621930691526e-06, "loss": 0.4361, "step": 29515 }, { "epoch": 0.76, "grad_norm": 1.3900141716003418, "learning_rate": 2.9512733203419817e-06, "loss": 0.4387, "step": 29516 }, { "epoch": 0.76, "grad_norm": 1.7014825344085693, "learning_rate": 2.950684496190208e-06, "loss": 0.4887, "step": 29517 }, { "epoch": 0.76, "grad_norm": 1.9356693029403687, "learning_rate": 2.950095720617887e-06, "loss": 0.5708, "step": 29518 }, { "epoch": 0.76, "grad_norm": 3.945577621459961, "learning_rate": 2.949506993629074e-06, "loss": 0.4325, "step": 29519 }, { "epoch": 0.76, "grad_norm": 1.5747977495193481, "learning_rate": 2.9489183152278323e-06, "loss": 0.5052, "step": 29520 }, { "epoch": 0.76, "grad_norm": 2.5302140712738037, "learning_rate": 2.9483296854182164e-06, "loss": 0.6792, "step": 29521 }, { "epoch": 0.76, "grad_norm": 9.317440032958984, "learning_rate": 2.947741104204278e-06, "loss": 0.4309, "step": 29522 }, { "epoch": 0.76, "grad_norm": 1.3144142627716064, "learning_rate": 2.9471525715900808e-06, "loss": 0.5739, "step": 29523 }, { "epoch": 0.76, "grad_norm": 2.2118029594421387, "learning_rate": 2.9465640875796765e-06, "loss": 0.76, "step": 29524 }, { "epoch": 0.76, "grad_norm": 1.3034824132919312, "learning_rate": 2.945975652177119e-06, "loss": 0.482, "step": 29525 }, { "epoch": 0.76, "grad_norm": 1.2634233236312866, "learning_rate": 2.9453872653864677e-06, "loss": 0.4589, "step": 29526 }, { "epoch": 0.76, "grad_norm": 5.904466152191162, "learning_rate": 2.9447989272117762e-06, "loss": 0.7202, "step": 29527 }, { "epoch": 0.76, "grad_norm": 2.6438329219818115, "learning_rate": 2.944210637657097e-06, "loss": 0.5378, "step": 29528 }, { "epoch": 0.76, "grad_norm": 3.3506155014038086, "learning_rate": 2.943622396726482e-06, "loss": 0.6009, "step": 29529 }, { "epoch": 0.76, "grad_norm": 7.3934125900268555, "learning_rate": 2.9430342044239925e-06, "loss": 0.6097, "step": 29530 }, { "epoch": 0.76, "grad_norm": 5.958125114440918, "learning_rate": 2.9424460607536766e-06, "loss": 0.59, "step": 29531 }, { "epoch": 0.76, "grad_norm": 1.6441155672073364, "learning_rate": 2.9418579657195854e-06, "loss": 0.4621, "step": 29532 }, { "epoch": 0.76, "grad_norm": 1.4629935026168823, "learning_rate": 2.9412699193257786e-06, "loss": 0.6103, "step": 29533 }, { "epoch": 0.76, "grad_norm": 1.9315825700759888, "learning_rate": 2.940681921576305e-06, "loss": 0.7766, "step": 29534 }, { "epoch": 0.76, "grad_norm": 1.0476479530334473, "learning_rate": 2.9400939724752153e-06, "loss": 0.4538, "step": 29535 }, { "epoch": 0.76, "grad_norm": 1.8255409002304077, "learning_rate": 2.93950607202656e-06, "loss": 0.5525, "step": 29536 }, { "epoch": 0.76, "grad_norm": 1.6607333421707153, "learning_rate": 2.9389182202343956e-06, "loss": 0.4517, "step": 29537 }, { "epoch": 0.76, "grad_norm": 10.484491348266602, "learning_rate": 2.9383304171027714e-06, "loss": 0.5888, "step": 29538 }, { "epoch": 0.76, "grad_norm": 1.911270022392273, "learning_rate": 2.937742662635734e-06, "loss": 0.6797, "step": 29539 }, { "epoch": 0.76, "grad_norm": 4.755021095275879, "learning_rate": 2.9371549568373404e-06, "loss": 0.422, "step": 29540 }, { "epoch": 0.76, "grad_norm": 3.9653193950653076, "learning_rate": 2.936567299711637e-06, "loss": 0.3919, "step": 29541 }, { "epoch": 0.76, "grad_norm": 1.9910802841186523, "learning_rate": 2.9359796912626747e-06, "loss": 0.5161, "step": 29542 }, { "epoch": 0.76, "grad_norm": 1.553202509880066, "learning_rate": 2.935392131494499e-06, "loss": 0.5213, "step": 29543 }, { "epoch": 0.76, "grad_norm": 1.076302170753479, "learning_rate": 2.9348046204111647e-06, "loss": 0.4318, "step": 29544 }, { "epoch": 0.76, "grad_norm": 1.2386252880096436, "learning_rate": 2.9342171580167187e-06, "loss": 0.5145, "step": 29545 }, { "epoch": 0.76, "grad_norm": 1.2059431076049805, "learning_rate": 2.933629744315206e-06, "loss": 0.5691, "step": 29546 }, { "epoch": 0.76, "grad_norm": 2.9662322998046875, "learning_rate": 2.933042379310681e-06, "loss": 0.4698, "step": 29547 }, { "epoch": 0.76, "grad_norm": 1.017730474472046, "learning_rate": 2.932455063007188e-06, "loss": 0.354, "step": 29548 }, { "epoch": 0.76, "grad_norm": 4.6270270347595215, "learning_rate": 2.9318677954087705e-06, "loss": 0.4884, "step": 29549 }, { "epoch": 0.76, "grad_norm": 1.5669806003570557, "learning_rate": 2.931280576519483e-06, "loss": 0.5121, "step": 29550 }, { "epoch": 0.76, "grad_norm": 1.652294635772705, "learning_rate": 2.930693406343369e-06, "loss": 0.5598, "step": 29551 }, { "epoch": 0.76, "grad_norm": 2.2401363849639893, "learning_rate": 2.9301062848844752e-06, "loss": 0.4766, "step": 29552 }, { "epoch": 0.76, "grad_norm": 1.3218224048614502, "learning_rate": 2.9295192121468428e-06, "loss": 0.5765, "step": 29553 }, { "epoch": 0.76, "grad_norm": 1.4892834424972534, "learning_rate": 2.9289321881345257e-06, "loss": 0.4213, "step": 29554 }, { "epoch": 0.76, "grad_norm": 1.6409270763397217, "learning_rate": 2.9283452128515655e-06, "loss": 0.5428, "step": 29555 }, { "epoch": 0.76, "grad_norm": 4.7724223136901855, "learning_rate": 2.927758286302004e-06, "loss": 0.4998, "step": 29556 }, { "epoch": 0.76, "grad_norm": 1.2675881385803223, "learning_rate": 2.927171408489892e-06, "loss": 0.3903, "step": 29557 }, { "epoch": 0.76, "grad_norm": 1.4989752769470215, "learning_rate": 2.9265845794192717e-06, "loss": 0.5292, "step": 29558 }, { "epoch": 0.76, "grad_norm": 0.9725623726844788, "learning_rate": 2.9259977990941856e-06, "loss": 0.5119, "step": 29559 }, { "epoch": 0.76, "grad_norm": 1.4183493852615356, "learning_rate": 2.9254110675186764e-06, "loss": 0.3187, "step": 29560 }, { "epoch": 0.76, "grad_norm": 1.7107343673706055, "learning_rate": 2.9248243846967916e-06, "loss": 0.5113, "step": 29561 }, { "epoch": 0.76, "grad_norm": 1.5994746685028076, "learning_rate": 2.9242377506325725e-06, "loss": 0.472, "step": 29562 }, { "epoch": 0.76, "grad_norm": 2.604390859603882, "learning_rate": 2.923651165330058e-06, "loss": 0.484, "step": 29563 }, { "epoch": 0.76, "grad_norm": 1.905532717704773, "learning_rate": 2.923064628793297e-06, "loss": 0.4543, "step": 29564 }, { "epoch": 0.76, "grad_norm": 1.5577585697174072, "learning_rate": 2.922478141026328e-06, "loss": 0.5222, "step": 29565 }, { "epoch": 0.76, "grad_norm": 1.2676279544830322, "learning_rate": 2.9218917020331915e-06, "loss": 0.4783, "step": 29566 }, { "epoch": 0.76, "grad_norm": 1.6136441230773926, "learning_rate": 2.9213053118179325e-06, "loss": 0.5817, "step": 29567 }, { "epoch": 0.76, "grad_norm": 1.562537431716919, "learning_rate": 2.9207189703845896e-06, "loss": 0.5511, "step": 29568 }, { "epoch": 0.76, "grad_norm": 2.5726983547210693, "learning_rate": 2.920132677737205e-06, "loss": 0.6913, "step": 29569 }, { "epoch": 0.76, "grad_norm": 1.50638747215271, "learning_rate": 2.919546433879814e-06, "loss": 0.3921, "step": 29570 }, { "epoch": 0.76, "grad_norm": 1.7014343738555908, "learning_rate": 2.918960238816464e-06, "loss": 0.4731, "step": 29571 }, { "epoch": 0.76, "grad_norm": 2.2409348487854004, "learning_rate": 2.9183740925511918e-06, "loss": 0.5798, "step": 29572 }, { "epoch": 0.76, "grad_norm": 2.329044818878174, "learning_rate": 2.917787995088033e-06, "loss": 0.7756, "step": 29573 }, { "epoch": 0.76, "grad_norm": 1.441169261932373, "learning_rate": 2.917201946431032e-06, "loss": 0.5224, "step": 29574 }, { "epoch": 0.76, "grad_norm": 6.818920612335205, "learning_rate": 2.916615946584226e-06, "loss": 0.8045, "step": 29575 }, { "epoch": 0.76, "grad_norm": 13.892993927001953, "learning_rate": 2.9160299955516537e-06, "loss": 0.3329, "step": 29576 }, { "epoch": 0.76, "grad_norm": 1.7725716829299927, "learning_rate": 2.9154440933373472e-06, "loss": 0.4861, "step": 29577 }, { "epoch": 0.76, "grad_norm": 1.2466522455215454, "learning_rate": 2.9148582399453528e-06, "loss": 0.5273, "step": 29578 }, { "epoch": 0.76, "grad_norm": 1.5595555305480957, "learning_rate": 2.914272435379705e-06, "loss": 0.4292, "step": 29579 }, { "epoch": 0.76, "grad_norm": 1.3177802562713623, "learning_rate": 2.913686679644435e-06, "loss": 0.3216, "step": 29580 }, { "epoch": 0.76, "grad_norm": 4.575512886047363, "learning_rate": 2.913100972743589e-06, "loss": 0.4608, "step": 29581 }, { "epoch": 0.76, "grad_norm": 1.6020954847335815, "learning_rate": 2.912515314681198e-06, "loss": 0.5179, "step": 29582 }, { "epoch": 0.76, "grad_norm": 2.1573405265808105, "learning_rate": 2.9119297054612985e-06, "loss": 0.5312, "step": 29583 }, { "epoch": 0.76, "grad_norm": 1.5817736387252808, "learning_rate": 2.911344145087923e-06, "loss": 0.5798, "step": 29584 }, { "epoch": 0.76, "grad_norm": 1.8231805562973022, "learning_rate": 2.910758633565114e-06, "loss": 0.6387, "step": 29585 }, { "epoch": 0.76, "grad_norm": 2.3051464557647705, "learning_rate": 2.910173170896903e-06, "loss": 0.5015, "step": 29586 }, { "epoch": 0.76, "grad_norm": 2.9383838176727295, "learning_rate": 2.90958775708732e-06, "loss": 0.5181, "step": 29587 }, { "epoch": 0.76, "grad_norm": 1.8895173072814941, "learning_rate": 2.909002392140408e-06, "loss": 0.5483, "step": 29588 }, { "epoch": 0.76, "grad_norm": 2.0188465118408203, "learning_rate": 2.908417076060195e-06, "loss": 0.4751, "step": 29589 }, { "epoch": 0.76, "grad_norm": 1.789848804473877, "learning_rate": 2.907831808850713e-06, "loss": 0.6015, "step": 29590 }, { "epoch": 0.76, "grad_norm": 15.557147979736328, "learning_rate": 2.907246590516003e-06, "loss": 0.5756, "step": 29591 }, { "epoch": 0.76, "grad_norm": 0.7957785129547119, "learning_rate": 2.9066614210600917e-06, "loss": 0.437, "step": 29592 }, { "epoch": 0.76, "grad_norm": 6.8368988037109375, "learning_rate": 2.906076300487015e-06, "loss": 0.5013, "step": 29593 }, { "epoch": 0.76, "grad_norm": 1.6004509925842285, "learning_rate": 2.9054912288007997e-06, "loss": 0.4956, "step": 29594 }, { "epoch": 0.76, "grad_norm": 1.0846927165985107, "learning_rate": 2.9049062060054843e-06, "loss": 0.4363, "step": 29595 }, { "epoch": 0.76, "grad_norm": 2.2233617305755615, "learning_rate": 2.9043212321050984e-06, "loss": 0.5956, "step": 29596 }, { "epoch": 0.76, "grad_norm": 7.368587970733643, "learning_rate": 2.9037363071036696e-06, "loss": 0.3404, "step": 29597 }, { "epoch": 0.76, "grad_norm": 1.1191682815551758, "learning_rate": 2.9031514310052343e-06, "loss": 0.4767, "step": 29598 }, { "epoch": 0.76, "grad_norm": 8.131915092468262, "learning_rate": 2.9025666038138213e-06, "loss": 0.45, "step": 29599 }, { "epoch": 0.76, "grad_norm": 1.1610591411590576, "learning_rate": 2.901981825533461e-06, "loss": 0.4437, "step": 29600 }, { "epoch": 0.76, "grad_norm": 1.6530005931854248, "learning_rate": 2.9013970961681783e-06, "loss": 0.5203, "step": 29601 }, { "epoch": 0.76, "grad_norm": 0.9918399453163147, "learning_rate": 2.9008124157220097e-06, "loss": 0.4985, "step": 29602 }, { "epoch": 0.76, "grad_norm": 2.555945634841919, "learning_rate": 2.900227784198982e-06, "loss": 0.5293, "step": 29603 }, { "epoch": 0.76, "grad_norm": 1.0781866312026978, "learning_rate": 2.899643201603122e-06, "loss": 0.4784, "step": 29604 }, { "epoch": 0.76, "grad_norm": 2.3296592235565186, "learning_rate": 2.8990586679384625e-06, "loss": 0.6752, "step": 29605 }, { "epoch": 0.76, "grad_norm": 1.9597982168197632, "learning_rate": 2.898474183209029e-06, "loss": 0.5397, "step": 29606 }, { "epoch": 0.76, "grad_norm": 1.956193447113037, "learning_rate": 2.897889747418847e-06, "loss": 0.5348, "step": 29607 }, { "epoch": 0.76, "grad_norm": 1.155150055885315, "learning_rate": 2.8973053605719503e-06, "loss": 0.453, "step": 29608 }, { "epoch": 0.76, "grad_norm": 1.334926962852478, "learning_rate": 2.896721022672363e-06, "loss": 0.3845, "step": 29609 }, { "epoch": 0.76, "grad_norm": 6.767117977142334, "learning_rate": 2.896136733724111e-06, "loss": 0.4369, "step": 29610 }, { "epoch": 0.76, "grad_norm": 1.6957601308822632, "learning_rate": 2.8955524937312183e-06, "loss": 0.5328, "step": 29611 }, { "epoch": 0.76, "grad_norm": 1.4820497035980225, "learning_rate": 2.8949683026977182e-06, "loss": 0.5858, "step": 29612 }, { "epoch": 0.76, "grad_norm": 1.2088865041732788, "learning_rate": 2.8943841606276323e-06, "loss": 0.5454, "step": 29613 }, { "epoch": 0.76, "grad_norm": 0.9908286333084106, "learning_rate": 2.8938000675249835e-06, "loss": 0.4494, "step": 29614 }, { "epoch": 0.76, "grad_norm": 1.1995882987976074, "learning_rate": 2.8932160233938033e-06, "loss": 0.3326, "step": 29615 }, { "epoch": 0.76, "grad_norm": 1.1306178569793701, "learning_rate": 2.8926320282381126e-06, "loss": 0.4639, "step": 29616 }, { "epoch": 0.76, "grad_norm": 1.6818079948425293, "learning_rate": 2.8920480820619377e-06, "loss": 0.3934, "step": 29617 }, { "epoch": 0.76, "grad_norm": 1.182187795639038, "learning_rate": 2.8914641848692983e-06, "loss": 0.5428, "step": 29618 }, { "epoch": 0.76, "grad_norm": 1.3688825368881226, "learning_rate": 2.8908803366642246e-06, "loss": 0.5286, "step": 29619 }, { "epoch": 0.76, "grad_norm": 1.8976149559020996, "learning_rate": 2.8902965374507373e-06, "loss": 0.5159, "step": 29620 }, { "epoch": 0.76, "grad_norm": 2.7975993156433105, "learning_rate": 2.889712787232857e-06, "loss": 0.5473, "step": 29621 }, { "epoch": 0.76, "grad_norm": 1.7135323286056519, "learning_rate": 2.8891290860146116e-06, "loss": 0.5726, "step": 29622 }, { "epoch": 0.76, "grad_norm": 1.0454645156860352, "learning_rate": 2.888545433800021e-06, "loss": 0.5677, "step": 29623 }, { "epoch": 0.76, "grad_norm": 2.244220495223999, "learning_rate": 2.8879618305931077e-06, "loss": 0.4624, "step": 29624 }, { "epoch": 0.76, "grad_norm": 7.18324613571167, "learning_rate": 2.8873782763978895e-06, "loss": 0.3354, "step": 29625 }, { "epoch": 0.76, "grad_norm": 1.239971399307251, "learning_rate": 2.8867947712183954e-06, "loss": 0.4837, "step": 29626 }, { "epoch": 0.76, "grad_norm": 1.910967230796814, "learning_rate": 2.8862113150586435e-06, "loss": 0.5303, "step": 29627 }, { "epoch": 0.76, "grad_norm": 1.520381212234497, "learning_rate": 2.885627907922649e-06, "loss": 0.4857, "step": 29628 }, { "epoch": 0.76, "grad_norm": 1.3067739009857178, "learning_rate": 2.885044549814442e-06, "loss": 0.4472, "step": 29629 }, { "epoch": 0.76, "grad_norm": 1.0086185932159424, "learning_rate": 2.884461240738037e-06, "loss": 0.3673, "step": 29630 }, { "epoch": 0.76, "grad_norm": 11.154350280761719, "learning_rate": 2.8838779806974514e-06, "loss": 0.6324, "step": 29631 }, { "epoch": 0.76, "grad_norm": 1.7668981552124023, "learning_rate": 2.8832947696967117e-06, "loss": 0.7163, "step": 29632 }, { "epoch": 0.76, "grad_norm": 1.735873818397522, "learning_rate": 2.882711607739833e-06, "loss": 0.5552, "step": 29633 }, { "epoch": 0.76, "grad_norm": 3.952547788619995, "learning_rate": 2.882128494830835e-06, "loss": 0.6712, "step": 29634 }, { "epoch": 0.76, "grad_norm": 2.194828748703003, "learning_rate": 2.881545430973731e-06, "loss": 0.6285, "step": 29635 }, { "epoch": 0.76, "grad_norm": 1.8398661613464355, "learning_rate": 2.880962416172548e-06, "loss": 0.4572, "step": 29636 }, { "epoch": 0.76, "grad_norm": 1.0294697284698486, "learning_rate": 2.8803794504312988e-06, "loss": 0.4874, "step": 29637 }, { "epoch": 0.76, "grad_norm": 1.849457859992981, "learning_rate": 2.879796533753999e-06, "loss": 0.5626, "step": 29638 }, { "epoch": 0.76, "grad_norm": 3.133216619491577, "learning_rate": 2.8792136661446712e-06, "loss": 0.571, "step": 29639 }, { "epoch": 0.76, "grad_norm": 1.3537330627441406, "learning_rate": 2.8786308476073286e-06, "loss": 0.6215, "step": 29640 }, { "epoch": 0.76, "grad_norm": 7.90231990814209, "learning_rate": 2.8780480781459886e-06, "loss": 0.6591, "step": 29641 }, { "epoch": 0.76, "grad_norm": 5.236260414123535, "learning_rate": 2.877465357764663e-06, "loss": 0.6677, "step": 29642 }, { "epoch": 0.76, "grad_norm": 3.647981882095337, "learning_rate": 2.876882686467375e-06, "loss": 0.5359, "step": 29643 }, { "epoch": 0.76, "grad_norm": 1.8376133441925049, "learning_rate": 2.876300064258135e-06, "loss": 0.6271, "step": 29644 }, { "epoch": 0.76, "grad_norm": 1.2282503843307495, "learning_rate": 2.875717491140958e-06, "loss": 0.626, "step": 29645 }, { "epoch": 0.76, "grad_norm": 3.594151735305786, "learning_rate": 2.8751349671198625e-06, "loss": 0.5287, "step": 29646 }, { "epoch": 0.76, "grad_norm": 1.8123762607574463, "learning_rate": 2.874552492198861e-06, "loss": 0.4746, "step": 29647 }, { "epoch": 0.76, "grad_norm": 1.9959163665771484, "learning_rate": 2.873970066381967e-06, "loss": 0.5876, "step": 29648 }, { "epoch": 0.76, "grad_norm": 1.268804669380188, "learning_rate": 2.8733876896731905e-06, "loss": 0.6007, "step": 29649 }, { "epoch": 0.76, "grad_norm": 1.7023053169250488, "learning_rate": 2.8728053620765527e-06, "loss": 0.5766, "step": 29650 }, { "epoch": 0.76, "grad_norm": 10.747355461120605, "learning_rate": 2.872223083596062e-06, "loss": 0.532, "step": 29651 }, { "epoch": 0.76, "grad_norm": 2.7968811988830566, "learning_rate": 2.8716408542357288e-06, "loss": 0.7551, "step": 29652 }, { "epoch": 0.76, "grad_norm": 2.7913763523101807, "learning_rate": 2.8710586739995715e-06, "loss": 0.6863, "step": 29653 }, { "epoch": 0.76, "grad_norm": 7.204273700714111, "learning_rate": 2.870476542891598e-06, "loss": 0.5818, "step": 29654 }, { "epoch": 0.76, "grad_norm": 1.0851922035217285, "learning_rate": 2.8698944609158197e-06, "loss": 0.4163, "step": 29655 }, { "epoch": 0.76, "grad_norm": 1.2562620639801025, "learning_rate": 2.8693124280762507e-06, "loss": 0.511, "step": 29656 }, { "epoch": 0.76, "grad_norm": 1.0206875801086426, "learning_rate": 2.8687304443769013e-06, "loss": 0.4917, "step": 29657 }, { "epoch": 0.76, "grad_norm": 2.939127206802368, "learning_rate": 2.868148509821781e-06, "loss": 0.5436, "step": 29658 }, { "epoch": 0.76, "grad_norm": 1.6135585308074951, "learning_rate": 2.8675666244148983e-06, "loss": 0.4566, "step": 29659 }, { "epoch": 0.76, "grad_norm": 1.137363314628601, "learning_rate": 2.8669847881602677e-06, "loss": 0.4895, "step": 29660 }, { "epoch": 0.76, "grad_norm": 1.6111397743225098, "learning_rate": 2.866403001061896e-06, "loss": 0.7351, "step": 29661 }, { "epoch": 0.76, "grad_norm": 0.9582545757293701, "learning_rate": 2.8658212631237912e-06, "loss": 0.4413, "step": 29662 }, { "epoch": 0.76, "grad_norm": 1.0768861770629883, "learning_rate": 2.865239574349966e-06, "loss": 0.6182, "step": 29663 }, { "epoch": 0.76, "grad_norm": 9.897905349731445, "learning_rate": 2.864657934744428e-06, "loss": 0.6534, "step": 29664 }, { "epoch": 0.76, "grad_norm": 2.807626962661743, "learning_rate": 2.864076344311185e-06, "loss": 0.5032, "step": 29665 }, { "epoch": 0.76, "grad_norm": 1.16290283203125, "learning_rate": 2.8634948030542407e-06, "loss": 0.5729, "step": 29666 }, { "epoch": 0.76, "grad_norm": 2.114516019821167, "learning_rate": 2.86291331097761e-06, "loss": 0.5346, "step": 29667 }, { "epoch": 0.76, "grad_norm": 1.4091116189956665, "learning_rate": 2.8623318680852963e-06, "loss": 0.5465, "step": 29668 }, { "epoch": 0.76, "grad_norm": 3.4458961486816406, "learning_rate": 2.861750474381305e-06, "loss": 0.5793, "step": 29669 }, { "epoch": 0.76, "grad_norm": 1.947309136390686, "learning_rate": 2.8611691298696474e-06, "loss": 0.603, "step": 29670 }, { "epoch": 0.76, "grad_norm": 1.5843806266784668, "learning_rate": 2.860587834554327e-06, "loss": 0.6082, "step": 29671 }, { "epoch": 0.76, "grad_norm": 1.439537763595581, "learning_rate": 2.860006588439347e-06, "loss": 0.5296, "step": 29672 }, { "epoch": 0.76, "grad_norm": 2.609076738357544, "learning_rate": 2.8594253915287184e-06, "loss": 0.6165, "step": 29673 }, { "epoch": 0.76, "grad_norm": 1.9228947162628174, "learning_rate": 2.8588442438264443e-06, "loss": 0.5516, "step": 29674 }, { "epoch": 0.76, "grad_norm": 1.8459587097167969, "learning_rate": 2.8582631453365286e-06, "loss": 0.5625, "step": 29675 }, { "epoch": 0.76, "grad_norm": 1.3698527812957764, "learning_rate": 2.8576820960629757e-06, "loss": 0.4683, "step": 29676 }, { "epoch": 0.76, "grad_norm": 1.3378283977508545, "learning_rate": 2.8571010960097924e-06, "loss": 0.4256, "step": 29677 }, { "epoch": 0.76, "grad_norm": 1.3682178258895874, "learning_rate": 2.85652014518098e-06, "loss": 0.4788, "step": 29678 }, { "epoch": 0.76, "grad_norm": 0.9505103826522827, "learning_rate": 2.855939243580539e-06, "loss": 0.3941, "step": 29679 }, { "epoch": 0.76, "grad_norm": 2.1773059368133545, "learning_rate": 2.8553583912124805e-06, "loss": 0.6631, "step": 29680 }, { "epoch": 0.76, "grad_norm": 1.1061464548110962, "learning_rate": 2.8547775880808025e-06, "loss": 0.4846, "step": 29681 }, { "epoch": 0.76, "grad_norm": 2.074568748474121, "learning_rate": 2.8541968341895053e-06, "loss": 0.414, "step": 29682 }, { "epoch": 0.76, "grad_norm": 1.4224754571914673, "learning_rate": 2.853616129542598e-06, "loss": 0.5752, "step": 29683 }, { "epoch": 0.76, "grad_norm": 1.6746573448181152, "learning_rate": 2.853035474144078e-06, "loss": 0.5215, "step": 29684 }, { "epoch": 0.76, "grad_norm": 2.1897988319396973, "learning_rate": 2.8524548679979447e-06, "loss": 0.5821, "step": 29685 }, { "epoch": 0.76, "grad_norm": 2.035837173461914, "learning_rate": 2.8518743111082036e-06, "loss": 0.5189, "step": 29686 }, { "epoch": 0.76, "grad_norm": 1.181299090385437, "learning_rate": 2.851293803478855e-06, "loss": 0.4348, "step": 29687 }, { "epoch": 0.76, "grad_norm": 1.5239592790603638, "learning_rate": 2.850713345113898e-06, "loss": 0.6166, "step": 29688 }, { "epoch": 0.76, "grad_norm": 3.3397934436798096, "learning_rate": 2.8501329360173304e-06, "loss": 0.8022, "step": 29689 }, { "epoch": 0.76, "grad_norm": 4.154013156890869, "learning_rate": 2.8495525761931575e-06, "loss": 0.6092, "step": 29690 }, { "epoch": 0.76, "grad_norm": 1.2298481464385986, "learning_rate": 2.8489722656453754e-06, "loss": 0.4, "step": 29691 }, { "epoch": 0.76, "grad_norm": 6.965505123138428, "learning_rate": 2.848392004377981e-06, "loss": 0.54, "step": 29692 }, { "epoch": 0.76, "grad_norm": 1.400183916091919, "learning_rate": 2.8478117923949787e-06, "loss": 0.4363, "step": 29693 }, { "epoch": 0.76, "grad_norm": 1.7357054948806763, "learning_rate": 2.8472316297003644e-06, "loss": 0.4543, "step": 29694 }, { "epoch": 0.76, "grad_norm": 2.0322728157043457, "learning_rate": 2.8466515162981358e-06, "loss": 0.5897, "step": 29695 }, { "epoch": 0.76, "grad_norm": 1.188489556312561, "learning_rate": 2.846071452192287e-06, "loss": 0.648, "step": 29696 }, { "epoch": 0.76, "grad_norm": 1.3993229866027832, "learning_rate": 2.8454914373868237e-06, "loss": 0.5521, "step": 29697 }, { "epoch": 0.76, "grad_norm": 1.4590946435928345, "learning_rate": 2.8449114718857376e-06, "loss": 0.5694, "step": 29698 }, { "epoch": 0.76, "grad_norm": 1.2660832405090332, "learning_rate": 2.8443315556930227e-06, "loss": 0.5271, "step": 29699 }, { "epoch": 0.76, "grad_norm": 1.7335714101791382, "learning_rate": 2.8437516888126835e-06, "loss": 0.3503, "step": 29700 }, { "epoch": 0.76, "grad_norm": 1.546744465827942, "learning_rate": 2.843171871248711e-06, "loss": 0.4278, "step": 29701 }, { "epoch": 0.76, "grad_norm": 0.984607458114624, "learning_rate": 2.8425921030050987e-06, "loss": 0.4423, "step": 29702 }, { "epoch": 0.76, "grad_norm": 2.012627363204956, "learning_rate": 2.8420123840858473e-06, "loss": 0.5605, "step": 29703 }, { "epoch": 0.76, "grad_norm": 2.118363618850708, "learning_rate": 2.8414327144949505e-06, "loss": 0.8698, "step": 29704 }, { "epoch": 0.76, "grad_norm": 1.690710186958313, "learning_rate": 2.840853094236401e-06, "loss": 0.6263, "step": 29705 }, { "epoch": 0.76, "grad_norm": 1.4927082061767578, "learning_rate": 2.840273523314192e-06, "loss": 0.4025, "step": 29706 }, { "epoch": 0.76, "grad_norm": 0.9372394680976868, "learning_rate": 2.8396940017323225e-06, "loss": 0.3982, "step": 29707 }, { "epoch": 0.76, "grad_norm": 1.8878498077392578, "learning_rate": 2.839114529494783e-06, "loss": 0.5074, "step": 29708 }, { "epoch": 0.76, "grad_norm": 2.462160348892212, "learning_rate": 2.8385351066055643e-06, "loss": 0.52, "step": 29709 }, { "epoch": 0.76, "grad_norm": 1.8210734128952026, "learning_rate": 2.8379557330686656e-06, "loss": 0.5399, "step": 29710 }, { "epoch": 0.76, "grad_norm": 1.5413975715637207, "learning_rate": 2.837376408888076e-06, "loss": 0.6102, "step": 29711 }, { "epoch": 0.76, "grad_norm": 3.6805362701416016, "learning_rate": 2.836797134067789e-06, "loss": 0.6898, "step": 29712 }, { "epoch": 0.76, "grad_norm": 0.9706138968467712, "learning_rate": 2.8362179086117914e-06, "loss": 0.4641, "step": 29713 }, { "epoch": 0.76, "grad_norm": 2.016904592514038, "learning_rate": 2.835638732524084e-06, "loss": 0.6068, "step": 29714 }, { "epoch": 0.76, "grad_norm": 1.1391186714172363, "learning_rate": 2.8350596058086523e-06, "loss": 0.3952, "step": 29715 }, { "epoch": 0.76, "grad_norm": 3.724622964859009, "learning_rate": 2.8344805284694856e-06, "loss": 0.5459, "step": 29716 }, { "epoch": 0.76, "grad_norm": 1.3203849792480469, "learning_rate": 2.8339015005105796e-06, "loss": 0.4535, "step": 29717 }, { "epoch": 0.76, "grad_norm": 1.9914460182189941, "learning_rate": 2.833322521935923e-06, "loss": 0.5107, "step": 29718 }, { "epoch": 0.76, "grad_norm": 1.1317445039749146, "learning_rate": 2.832743592749505e-06, "loss": 0.5688, "step": 29719 }, { "epoch": 0.76, "grad_norm": 1.1442264318466187, "learning_rate": 2.832164712955312e-06, "loss": 0.5834, "step": 29720 }, { "epoch": 0.76, "grad_norm": 2.114602565765381, "learning_rate": 2.8315858825573396e-06, "loss": 0.3811, "step": 29721 }, { "epoch": 0.76, "grad_norm": 1.3776777982711792, "learning_rate": 2.831007101559574e-06, "loss": 0.6704, "step": 29722 }, { "epoch": 0.76, "grad_norm": 1.5835130214691162, "learning_rate": 2.8304283699659995e-06, "loss": 0.6243, "step": 29723 }, { "epoch": 0.76, "grad_norm": 11.850541114807129, "learning_rate": 2.8298496877806124e-06, "loss": 0.6562, "step": 29724 }, { "epoch": 0.76, "grad_norm": 1.232271432876587, "learning_rate": 2.8292710550073966e-06, "loss": 0.5835, "step": 29725 }, { "epoch": 0.76, "grad_norm": 1.8208611011505127, "learning_rate": 2.8286924716503362e-06, "loss": 0.6702, "step": 29726 }, { "epoch": 0.76, "grad_norm": 6.588431358337402, "learning_rate": 2.8281139377134246e-06, "loss": 0.3555, "step": 29727 }, { "epoch": 0.76, "grad_norm": 1.4931929111480713, "learning_rate": 2.8275354532006462e-06, "loss": 0.3531, "step": 29728 }, { "epoch": 0.76, "grad_norm": 2.2229440212249756, "learning_rate": 2.8269570181159867e-06, "loss": 0.5403, "step": 29729 }, { "epoch": 0.76, "grad_norm": 1.4394906759262085, "learning_rate": 2.82637863246343e-06, "loss": 0.6266, "step": 29730 }, { "epoch": 0.76, "grad_norm": 1.2084901332855225, "learning_rate": 2.825800296246969e-06, "loss": 0.5413, "step": 29731 }, { "epoch": 0.76, "grad_norm": 1.2930903434753418, "learning_rate": 2.825222009470584e-06, "loss": 0.5433, "step": 29732 }, { "epoch": 0.76, "grad_norm": 6.085484027862549, "learning_rate": 2.8246437721382582e-06, "loss": 0.4319, "step": 29733 }, { "epoch": 0.76, "grad_norm": 1.4088870286941528, "learning_rate": 2.824065584253983e-06, "loss": 0.5382, "step": 29734 }, { "epoch": 0.76, "grad_norm": 1.5919594764709473, "learning_rate": 2.8234874458217387e-06, "loss": 0.4752, "step": 29735 }, { "epoch": 0.76, "grad_norm": 3.1129493713378906, "learning_rate": 2.8229093568455114e-06, "loss": 0.3803, "step": 29736 }, { "epoch": 0.76, "grad_norm": 8.805681228637695, "learning_rate": 2.82233131732928e-06, "loss": 0.6884, "step": 29737 }, { "epoch": 0.76, "grad_norm": 1.4940208196640015, "learning_rate": 2.821753327277035e-06, "loss": 0.5253, "step": 29738 }, { "epoch": 0.76, "grad_norm": 1.501502513885498, "learning_rate": 2.821175386692756e-06, "loss": 0.3881, "step": 29739 }, { "epoch": 0.76, "grad_norm": 1.563252329826355, "learning_rate": 2.8205974955804226e-06, "loss": 0.4552, "step": 29740 }, { "epoch": 0.76, "grad_norm": 4.01103401184082, "learning_rate": 2.820019653944024e-06, "loss": 0.5301, "step": 29741 }, { "epoch": 0.76, "grad_norm": 2.102555513381958, "learning_rate": 2.8194418617875397e-06, "loss": 0.6455, "step": 29742 }, { "epoch": 0.76, "grad_norm": 1.0674625635147095, "learning_rate": 2.818864119114947e-06, "loss": 0.4226, "step": 29743 }, { "epoch": 0.76, "grad_norm": 1.3576087951660156, "learning_rate": 2.8182864259302344e-06, "loss": 0.5993, "step": 29744 }, { "epoch": 0.76, "grad_norm": 1.529783844947815, "learning_rate": 2.8177087822373806e-06, "loss": 0.4974, "step": 29745 }, { "epoch": 0.76, "grad_norm": 1.9974275827407837, "learning_rate": 2.817131188040365e-06, "loss": 0.632, "step": 29746 }, { "epoch": 0.76, "grad_norm": 3.6987993717193604, "learning_rate": 2.816553643343165e-06, "loss": 0.4962, "step": 29747 }, { "epoch": 0.76, "grad_norm": 1.5152077674865723, "learning_rate": 2.815976148149767e-06, "loss": 0.5458, "step": 29748 }, { "epoch": 0.76, "grad_norm": 4.049009799957275, "learning_rate": 2.8153987024641494e-06, "loss": 0.4732, "step": 29749 }, { "epoch": 0.76, "grad_norm": 1.8339115381240845, "learning_rate": 2.814821306290285e-06, "loss": 0.591, "step": 29750 }, { "epoch": 0.76, "grad_norm": 5.428500175476074, "learning_rate": 2.8142439596321623e-06, "loss": 0.3237, "step": 29751 }, { "epoch": 0.76, "grad_norm": 1.4686524868011475, "learning_rate": 2.8136666624937557e-06, "loss": 0.4626, "step": 29752 }, { "epoch": 0.76, "grad_norm": 2.0336430072784424, "learning_rate": 2.8130894148790446e-06, "loss": 0.5895, "step": 29753 }, { "epoch": 0.76, "grad_norm": 1.0834800004959106, "learning_rate": 2.8125122167920017e-06, "loss": 0.559, "step": 29754 }, { "epoch": 0.76, "grad_norm": 1.2744797468185425, "learning_rate": 2.811935068236612e-06, "loss": 0.6486, "step": 29755 }, { "epoch": 0.76, "grad_norm": 1.2995669841766357, "learning_rate": 2.8113579692168504e-06, "loss": 0.6991, "step": 29756 }, { "epoch": 0.76, "grad_norm": 1.4575626850128174, "learning_rate": 2.8107809197366885e-06, "loss": 0.4961, "step": 29757 }, { "epoch": 0.76, "grad_norm": 1.2172402143478394, "learning_rate": 2.810203919800113e-06, "loss": 0.4997, "step": 29758 }, { "epoch": 0.76, "grad_norm": 1.6570439338684082, "learning_rate": 2.8096269694110944e-06, "loss": 0.5687, "step": 29759 }, { "epoch": 0.76, "grad_norm": 1.5380198955535889, "learning_rate": 2.8090500685736087e-06, "loss": 0.6739, "step": 29760 }, { "epoch": 0.76, "grad_norm": 1.0355901718139648, "learning_rate": 2.8084732172916297e-06, "loss": 0.5557, "step": 29761 }, { "epoch": 0.76, "grad_norm": 5.902927875518799, "learning_rate": 2.807896415569138e-06, "loss": 0.5645, "step": 29762 }, { "epoch": 0.76, "grad_norm": 1.227219820022583, "learning_rate": 2.807319663410105e-06, "loss": 0.5885, "step": 29763 }, { "epoch": 0.76, "grad_norm": 1.6295790672302246, "learning_rate": 2.8067429608185038e-06, "loss": 0.4274, "step": 29764 }, { "epoch": 0.76, "grad_norm": 1.159352421760559, "learning_rate": 2.8061663077983135e-06, "loss": 0.5072, "step": 29765 }, { "epoch": 0.76, "grad_norm": 1.8441295623779297, "learning_rate": 2.8055897043535065e-06, "loss": 0.5661, "step": 29766 }, { "epoch": 0.76, "grad_norm": 1.6663864850997925, "learning_rate": 2.8050131504880505e-06, "loss": 0.5708, "step": 29767 }, { "epoch": 0.76, "grad_norm": 3.9038290977478027, "learning_rate": 2.804436646205928e-06, "loss": 0.6498, "step": 29768 }, { "epoch": 0.76, "grad_norm": 3.1608800888061523, "learning_rate": 2.8038601915111065e-06, "loss": 0.5608, "step": 29769 }, { "epoch": 0.76, "grad_norm": 1.7363053560256958, "learning_rate": 2.8032837864075602e-06, "loss": 0.4328, "step": 29770 }, { "epoch": 0.76, "grad_norm": 9.85482406616211, "learning_rate": 2.8027074308992575e-06, "loss": 0.6282, "step": 29771 }, { "epoch": 0.76, "grad_norm": 1.3914324045181274, "learning_rate": 2.802131124990176e-06, "loss": 0.6063, "step": 29772 }, { "epoch": 0.76, "grad_norm": 1.7627395391464233, "learning_rate": 2.8015548686842842e-06, "loss": 0.5275, "step": 29773 }, { "epoch": 0.76, "grad_norm": 1.2455918788909912, "learning_rate": 2.8009786619855516e-06, "loss": 0.4086, "step": 29774 }, { "epoch": 0.76, "grad_norm": 1.6551060676574707, "learning_rate": 2.8004025048979545e-06, "loss": 0.6341, "step": 29775 }, { "epoch": 0.76, "grad_norm": 1.3013814687728882, "learning_rate": 2.799826397425459e-06, "loss": 0.3178, "step": 29776 }, { "epoch": 0.76, "grad_norm": 1.360203742980957, "learning_rate": 2.7992503395720373e-06, "loss": 0.5537, "step": 29777 }, { "epoch": 0.76, "grad_norm": 4.470468044281006, "learning_rate": 2.798674331341655e-06, "loss": 0.6346, "step": 29778 }, { "epoch": 0.76, "grad_norm": 1.387498378753662, "learning_rate": 2.7980983727382883e-06, "loss": 0.4755, "step": 29779 }, { "epoch": 0.76, "grad_norm": 3.4645049571990967, "learning_rate": 2.7975224637659013e-06, "loss": 0.5646, "step": 29780 }, { "epoch": 0.76, "grad_norm": 1.1269625425338745, "learning_rate": 2.796946604428463e-06, "loss": 0.555, "step": 29781 }, { "epoch": 0.76, "grad_norm": 15.085968017578125, "learning_rate": 2.7963707947299456e-06, "loss": 0.4116, "step": 29782 }, { "epoch": 0.76, "grad_norm": 1.4201018810272217, "learning_rate": 2.7957950346743157e-06, "loss": 0.5484, "step": 29783 }, { "epoch": 0.76, "grad_norm": 1.48029363155365, "learning_rate": 2.79521932426554e-06, "loss": 0.6551, "step": 29784 }, { "epoch": 0.76, "grad_norm": 1.3593623638153076, "learning_rate": 2.7946436635075824e-06, "loss": 0.4477, "step": 29785 }, { "epoch": 0.76, "grad_norm": 1.3257887363433838, "learning_rate": 2.7940680524044182e-06, "loss": 0.6021, "step": 29786 }, { "epoch": 0.76, "grad_norm": 0.8439455628395081, "learning_rate": 2.793492490960009e-06, "loss": 0.4298, "step": 29787 }, { "epoch": 0.76, "grad_norm": 1.1805673837661743, "learning_rate": 2.7929169791783185e-06, "loss": 0.437, "step": 29788 }, { "epoch": 0.76, "grad_norm": 3.3428361415863037, "learning_rate": 2.7923415170633204e-06, "loss": 0.7722, "step": 29789 }, { "epoch": 0.76, "grad_norm": 4.248373985290527, "learning_rate": 2.791766104618976e-06, "loss": 0.4466, "step": 29790 }, { "epoch": 0.76, "grad_norm": 1.2951406240463257, "learning_rate": 2.791190741849247e-06, "loss": 0.453, "step": 29791 }, { "epoch": 0.76, "grad_norm": 2.0227596759796143, "learning_rate": 2.7906154287581056e-06, "loss": 0.3599, "step": 29792 }, { "epoch": 0.76, "grad_norm": 1.5703340768814087, "learning_rate": 2.7900401653495145e-06, "loss": 0.6007, "step": 29793 }, { "epoch": 0.76, "grad_norm": 1.0563362836837769, "learning_rate": 2.789464951627435e-06, "loss": 0.5668, "step": 29794 }, { "epoch": 0.76, "grad_norm": 6.013106346130371, "learning_rate": 2.788889787595832e-06, "loss": 0.7857, "step": 29795 }, { "epoch": 0.76, "grad_norm": 2.3925223350524902, "learning_rate": 2.788314673258672e-06, "loss": 0.6698, "step": 29796 }, { "epoch": 0.76, "grad_norm": 1.2082818746566772, "learning_rate": 2.787739608619916e-06, "loss": 0.5179, "step": 29797 }, { "epoch": 0.76, "grad_norm": 1.205277442932129, "learning_rate": 2.7871645936835255e-06, "loss": 0.4226, "step": 29798 }, { "epoch": 0.76, "grad_norm": 1.7559665441513062, "learning_rate": 2.7865896284534678e-06, "loss": 0.586, "step": 29799 }, { "epoch": 0.76, "grad_norm": 1.5832825899124146, "learning_rate": 2.786014712933701e-06, "loss": 0.4604, "step": 29800 }, { "epoch": 0.76, "grad_norm": 1.6341615915298462, "learning_rate": 2.7854398471281907e-06, "loss": 0.6172, "step": 29801 }, { "epoch": 0.76, "grad_norm": 1.1357909440994263, "learning_rate": 2.784865031040892e-06, "loss": 0.516, "step": 29802 }, { "epoch": 0.76, "grad_norm": 1.3719358444213867, "learning_rate": 2.7842902646757743e-06, "loss": 0.4556, "step": 29803 }, { "epoch": 0.76, "grad_norm": 2.17315673828125, "learning_rate": 2.783715548036794e-06, "loss": 0.7956, "step": 29804 }, { "epoch": 0.76, "grad_norm": 3.6549150943756104, "learning_rate": 2.7831408811279092e-06, "loss": 0.5092, "step": 29805 }, { "epoch": 0.76, "grad_norm": 0.935066282749176, "learning_rate": 2.782566263953087e-06, "loss": 0.3161, "step": 29806 }, { "epoch": 0.76, "grad_norm": 1.6769253015518188, "learning_rate": 2.781991696516284e-06, "loss": 0.4886, "step": 29807 }, { "epoch": 0.76, "grad_norm": 1.1161925792694092, "learning_rate": 2.781417178821456e-06, "loss": 0.5562, "step": 29808 }, { "epoch": 0.76, "grad_norm": 1.4523380994796753, "learning_rate": 2.7808427108725687e-06, "loss": 0.5496, "step": 29809 }, { "epoch": 0.76, "grad_norm": 4.208897590637207, "learning_rate": 2.780268292673578e-06, "loss": 0.2832, "step": 29810 }, { "epoch": 0.76, "grad_norm": 1.4668322801589966, "learning_rate": 2.7796939242284425e-06, "loss": 0.503, "step": 29811 }, { "epoch": 0.76, "grad_norm": 1.1500563621520996, "learning_rate": 2.7791196055411173e-06, "loss": 0.4312, "step": 29812 }, { "epoch": 0.76, "grad_norm": 1.6577244997024536, "learning_rate": 2.778545336615566e-06, "loss": 0.5136, "step": 29813 }, { "epoch": 0.76, "grad_norm": 2.7947139739990234, "learning_rate": 2.7779711174557423e-06, "loss": 0.2829, "step": 29814 }, { "epoch": 0.76, "grad_norm": 1.8145232200622559, "learning_rate": 2.7773969480656025e-06, "loss": 0.567, "step": 29815 }, { "epoch": 0.76, "grad_norm": 1.3587760925292969, "learning_rate": 2.7768228284491084e-06, "loss": 0.4974, "step": 29816 }, { "epoch": 0.76, "grad_norm": 1.9689838886260986, "learning_rate": 2.7762487586102125e-06, "loss": 0.4322, "step": 29817 }, { "epoch": 0.76, "grad_norm": 3.1315364837646484, "learning_rate": 2.7756747385528715e-06, "loss": 0.4679, "step": 29818 }, { "epoch": 0.76, "grad_norm": 1.1235241889953613, "learning_rate": 2.7751007682810393e-06, "loss": 0.5398, "step": 29819 }, { "epoch": 0.76, "grad_norm": 3.677842140197754, "learning_rate": 2.7745268477986766e-06, "loss": 0.5218, "step": 29820 }, { "epoch": 0.76, "grad_norm": 0.9739826917648315, "learning_rate": 2.7739529771097353e-06, "loss": 0.2756, "step": 29821 }, { "epoch": 0.76, "grad_norm": 2.01717472076416, "learning_rate": 2.7733791562181667e-06, "loss": 0.4489, "step": 29822 }, { "epoch": 0.76, "grad_norm": 1.4777822494506836, "learning_rate": 2.772805385127931e-06, "loss": 0.5888, "step": 29823 }, { "epoch": 0.76, "grad_norm": 1.3087620735168457, "learning_rate": 2.7722316638429813e-06, "loss": 0.54, "step": 29824 }, { "epoch": 0.76, "grad_norm": 1.2916147708892822, "learning_rate": 2.7716579923672703e-06, "loss": 0.4324, "step": 29825 }, { "epoch": 0.76, "grad_norm": 2.039283514022827, "learning_rate": 2.7710843707047474e-06, "loss": 0.6916, "step": 29826 }, { "epoch": 0.76, "grad_norm": 1.9587465524673462, "learning_rate": 2.7705107988593725e-06, "loss": 0.5547, "step": 29827 }, { "epoch": 0.76, "grad_norm": 2.557565927505493, "learning_rate": 2.7699372768350953e-06, "loss": 0.5518, "step": 29828 }, { "epoch": 0.76, "grad_norm": 5.664128303527832, "learning_rate": 2.769363804635865e-06, "loss": 0.5603, "step": 29829 }, { "epoch": 0.76, "grad_norm": 0.9470374584197998, "learning_rate": 2.7687903822656394e-06, "loss": 0.3766, "step": 29830 }, { "epoch": 0.76, "grad_norm": 2.5578949451446533, "learning_rate": 2.7682170097283677e-06, "loss": 0.3815, "step": 29831 }, { "epoch": 0.76, "grad_norm": 3.258049964904785, "learning_rate": 2.7676436870279976e-06, "loss": 0.4361, "step": 29832 }, { "epoch": 0.76, "grad_norm": 1.3655109405517578, "learning_rate": 2.767070414168487e-06, "loss": 0.4245, "step": 29833 }, { "epoch": 0.76, "grad_norm": 2.151348352432251, "learning_rate": 2.766497191153782e-06, "loss": 0.6269, "step": 29834 }, { "epoch": 0.76, "grad_norm": 2.8616220951080322, "learning_rate": 2.7659240179878342e-06, "loss": 0.542, "step": 29835 }, { "epoch": 0.76, "grad_norm": 3.3978981971740723, "learning_rate": 2.76535089467459e-06, "loss": 0.5933, "step": 29836 }, { "epoch": 0.76, "grad_norm": 1.5062958002090454, "learning_rate": 2.7647778212180056e-06, "loss": 0.6348, "step": 29837 }, { "epoch": 0.76, "grad_norm": 1.4090256690979004, "learning_rate": 2.7642047976220276e-06, "loss": 0.3858, "step": 29838 }, { "epoch": 0.76, "grad_norm": 1.1864140033721924, "learning_rate": 2.7636318238906e-06, "loss": 0.4087, "step": 29839 }, { "epoch": 0.76, "grad_norm": 1.9060627222061157, "learning_rate": 2.7630589000276788e-06, "loss": 0.6219, "step": 29840 }, { "epoch": 0.76, "grad_norm": 1.6145718097686768, "learning_rate": 2.762486026037209e-06, "loss": 0.5364, "step": 29841 }, { "epoch": 0.76, "grad_norm": 13.565011024475098, "learning_rate": 2.7619132019231385e-06, "loss": 0.5693, "step": 29842 }, { "epoch": 0.76, "grad_norm": 3.698260545730591, "learning_rate": 2.761340427689412e-06, "loss": 0.4937, "step": 29843 }, { "epoch": 0.76, "grad_norm": 1.1767469644546509, "learning_rate": 2.7607677033399814e-06, "loss": 0.4701, "step": 29844 }, { "epoch": 0.76, "grad_norm": 1.305004596710205, "learning_rate": 2.7601950288787925e-06, "loss": 0.4968, "step": 29845 }, { "epoch": 0.76, "grad_norm": 3.9727468490600586, "learning_rate": 2.7596224043097885e-06, "loss": 0.5002, "step": 29846 }, { "epoch": 0.76, "grad_norm": 2.8057656288146973, "learning_rate": 2.7590498296369205e-06, "loss": 0.6121, "step": 29847 }, { "epoch": 0.77, "grad_norm": 2.936699628829956, "learning_rate": 2.758477304864132e-06, "loss": 0.4858, "step": 29848 }, { "epoch": 0.77, "grad_norm": 1.2297754287719727, "learning_rate": 2.757904829995365e-06, "loss": 0.3365, "step": 29849 }, { "epoch": 0.77, "grad_norm": 1.2175501585006714, "learning_rate": 2.7573324050345708e-06, "loss": 0.4067, "step": 29850 }, { "epoch": 0.77, "grad_norm": 2.010004758834839, "learning_rate": 2.7567600299856924e-06, "loss": 0.5274, "step": 29851 }, { "epoch": 0.77, "grad_norm": 1.319113850593567, "learning_rate": 2.756187704852672e-06, "loss": 0.3867, "step": 29852 }, { "epoch": 0.77, "grad_norm": 1.1003224849700928, "learning_rate": 2.7556154296394534e-06, "loss": 0.4697, "step": 29853 }, { "epoch": 0.77, "grad_norm": 1.0569038391113281, "learning_rate": 2.755043204349983e-06, "loss": 0.4284, "step": 29854 }, { "epoch": 0.77, "grad_norm": 1.4086668491363525, "learning_rate": 2.754471028988205e-06, "loss": 0.5945, "step": 29855 }, { "epoch": 0.77, "grad_norm": 1.3146178722381592, "learning_rate": 2.7538989035580566e-06, "loss": 0.6803, "step": 29856 }, { "epoch": 0.77, "grad_norm": 4.357318878173828, "learning_rate": 2.753326828063487e-06, "loss": 0.6737, "step": 29857 }, { "epoch": 0.77, "grad_norm": 1.336917519569397, "learning_rate": 2.7527548025084373e-06, "loss": 0.3135, "step": 29858 }, { "epoch": 0.77, "grad_norm": 1.9831452369689941, "learning_rate": 2.7521828268968476e-06, "loss": 0.4516, "step": 29859 }, { "epoch": 0.77, "grad_norm": 1.4073437452316284, "learning_rate": 2.7516109012326576e-06, "loss": 0.6756, "step": 29860 }, { "epoch": 0.77, "grad_norm": 1.5436100959777832, "learning_rate": 2.751039025519814e-06, "loss": 0.55, "step": 29861 }, { "epoch": 0.77, "grad_norm": 7.864027500152588, "learning_rate": 2.7504671997622554e-06, "loss": 0.618, "step": 29862 }, { "epoch": 0.77, "grad_norm": 4.5655670166015625, "learning_rate": 2.74989542396392e-06, "loss": 0.5918, "step": 29863 }, { "epoch": 0.77, "grad_norm": 1.2648534774780273, "learning_rate": 2.749323698128752e-06, "loss": 0.3926, "step": 29864 }, { "epoch": 0.77, "grad_norm": 1.0727795362472534, "learning_rate": 2.74875202226069e-06, "loss": 0.4311, "step": 29865 }, { "epoch": 0.77, "grad_norm": 1.377274513244629, "learning_rate": 2.7481803963636743e-06, "loss": 0.447, "step": 29866 }, { "epoch": 0.77, "grad_norm": 1.909008264541626, "learning_rate": 2.7476088204416396e-06, "loss": 0.533, "step": 29867 }, { "epoch": 0.77, "grad_norm": 4.10884952545166, "learning_rate": 2.7470372944985323e-06, "loss": 0.517, "step": 29868 }, { "epoch": 0.77, "grad_norm": 1.0250035524368286, "learning_rate": 2.7464658185382862e-06, "loss": 0.5362, "step": 29869 }, { "epoch": 0.77, "grad_norm": 1.1448293924331665, "learning_rate": 2.7458943925648387e-06, "loss": 0.4714, "step": 29870 }, { "epoch": 0.77, "grad_norm": 5.5592732429504395, "learning_rate": 2.745323016582131e-06, "loss": 0.5184, "step": 29871 }, { "epoch": 0.77, "grad_norm": 1.6839438676834106, "learning_rate": 2.7447516905941007e-06, "loss": 0.5803, "step": 29872 }, { "epoch": 0.77, "grad_norm": 9.73145866394043, "learning_rate": 2.7441804146046793e-06, "loss": 0.7463, "step": 29873 }, { "epoch": 0.77, "grad_norm": 1.0417470932006836, "learning_rate": 2.7436091886178116e-06, "loss": 0.53, "step": 29874 }, { "epoch": 0.77, "grad_norm": 1.918839931488037, "learning_rate": 2.7430380126374313e-06, "loss": 0.57, "step": 29875 }, { "epoch": 0.77, "grad_norm": 1.4678685665130615, "learning_rate": 2.742466886667472e-06, "loss": 0.4948, "step": 29876 }, { "epoch": 0.77, "grad_norm": 1.644714593887329, "learning_rate": 2.74189581071187e-06, "loss": 0.5589, "step": 29877 }, { "epoch": 0.77, "grad_norm": 2.268481731414795, "learning_rate": 2.741324784774564e-06, "loss": 0.5236, "step": 29878 }, { "epoch": 0.77, "grad_norm": 1.9808886051177979, "learning_rate": 2.7407538088594875e-06, "loss": 0.565, "step": 29879 }, { "epoch": 0.77, "grad_norm": 1.645977258682251, "learning_rate": 2.7401828829705714e-06, "loss": 0.3338, "step": 29880 }, { "epoch": 0.77, "grad_norm": 1.3449784517288208, "learning_rate": 2.7396120071117584e-06, "loss": 0.518, "step": 29881 }, { "epoch": 0.77, "grad_norm": 1.6812913417816162, "learning_rate": 2.7390411812869766e-06, "loss": 0.5212, "step": 29882 }, { "epoch": 0.77, "grad_norm": 1.1690818071365356, "learning_rate": 2.738470405500161e-06, "loss": 0.4576, "step": 29883 }, { "epoch": 0.77, "grad_norm": 1.4577361345291138, "learning_rate": 2.7378996797552437e-06, "loss": 0.4522, "step": 29884 }, { "epoch": 0.77, "grad_norm": 1.7571274042129517, "learning_rate": 2.7373290040561606e-06, "loss": 0.5079, "step": 29885 }, { "epoch": 0.77, "grad_norm": 1.554939866065979, "learning_rate": 2.736758378406844e-06, "loss": 0.5619, "step": 29886 }, { "epoch": 0.77, "grad_norm": 2.072601318359375, "learning_rate": 2.736187802811223e-06, "loss": 0.5522, "step": 29887 }, { "epoch": 0.77, "grad_norm": 2.9071555137634277, "learning_rate": 2.7356172772732346e-06, "loss": 0.5562, "step": 29888 }, { "epoch": 0.77, "grad_norm": 1.7085787057876587, "learning_rate": 2.735046801796808e-06, "loss": 0.4707, "step": 29889 }, { "epoch": 0.77, "grad_norm": 2.9007718563079834, "learning_rate": 2.734476376385872e-06, "loss": 0.6559, "step": 29890 }, { "epoch": 0.77, "grad_norm": 1.7801648378372192, "learning_rate": 2.733906001044362e-06, "loss": 0.4874, "step": 29891 }, { "epoch": 0.77, "grad_norm": 1.1695078611373901, "learning_rate": 2.7333356757762075e-06, "loss": 0.5412, "step": 29892 }, { "epoch": 0.77, "grad_norm": 1.3705815076828003, "learning_rate": 2.732765400585339e-06, "loss": 0.5957, "step": 29893 }, { "epoch": 0.77, "grad_norm": 1.323379397392273, "learning_rate": 2.7321951754756817e-06, "loss": 0.4934, "step": 29894 }, { "epoch": 0.77, "grad_norm": 1.1908944845199585, "learning_rate": 2.731625000451171e-06, "loss": 0.2867, "step": 29895 }, { "epoch": 0.77, "grad_norm": 1.3481934070587158, "learning_rate": 2.7310548755157362e-06, "loss": 0.5498, "step": 29896 }, { "epoch": 0.77, "grad_norm": 1.5701687335968018, "learning_rate": 2.730484800673301e-06, "loss": 0.6514, "step": 29897 }, { "epoch": 0.77, "grad_norm": 1.707742691040039, "learning_rate": 2.7299147759278e-06, "loss": 0.6228, "step": 29898 }, { "epoch": 0.77, "grad_norm": 1.624488353729248, "learning_rate": 2.7293448012831594e-06, "loss": 0.5544, "step": 29899 }, { "epoch": 0.77, "grad_norm": 1.2981864213943481, "learning_rate": 2.7287748767433065e-06, "loss": 0.5536, "step": 29900 }, { "epoch": 0.77, "grad_norm": 1.1048578023910522, "learning_rate": 2.7282050023121652e-06, "loss": 0.7, "step": 29901 }, { "epoch": 0.77, "grad_norm": 3.350520372390747, "learning_rate": 2.7276351779936704e-06, "loss": 0.6476, "step": 29902 }, { "epoch": 0.77, "grad_norm": 1.7404325008392334, "learning_rate": 2.7270654037917442e-06, "loss": 0.6196, "step": 29903 }, { "epoch": 0.77, "grad_norm": 1.204948902130127, "learning_rate": 2.7264956797103114e-06, "loss": 0.43, "step": 29904 }, { "epoch": 0.77, "grad_norm": 3.501852512359619, "learning_rate": 2.725926005753303e-06, "loss": 0.6041, "step": 29905 }, { "epoch": 0.77, "grad_norm": 1.1407939195632935, "learning_rate": 2.725356381924643e-06, "loss": 0.4826, "step": 29906 }, { "epoch": 0.77, "grad_norm": 1.7170765399932861, "learning_rate": 2.724786808228257e-06, "loss": 0.5201, "step": 29907 }, { "epoch": 0.77, "grad_norm": 3.0903279781341553, "learning_rate": 2.724217284668066e-06, "loss": 0.8052, "step": 29908 }, { "epoch": 0.77, "grad_norm": 1.2241061925888062, "learning_rate": 2.7236478112480015e-06, "loss": 0.487, "step": 29909 }, { "epoch": 0.77, "grad_norm": 2.0694775581359863, "learning_rate": 2.7230783879719846e-06, "loss": 0.6051, "step": 29910 }, { "epoch": 0.77, "grad_norm": 6.430661678314209, "learning_rate": 2.722509014843937e-06, "loss": 0.5606, "step": 29911 }, { "epoch": 0.77, "grad_norm": 1.436021089553833, "learning_rate": 2.7219396918677876e-06, "loss": 0.5789, "step": 29912 }, { "epoch": 0.77, "grad_norm": 2.268526792526245, "learning_rate": 2.721370419047458e-06, "loss": 0.479, "step": 29913 }, { "epoch": 0.77, "grad_norm": 2.1265199184417725, "learning_rate": 2.7208011963868674e-06, "loss": 0.6674, "step": 29914 }, { "epoch": 0.77, "grad_norm": 1.8710976839065552, "learning_rate": 2.720232023889945e-06, "loss": 0.5016, "step": 29915 }, { "epoch": 0.77, "grad_norm": 1.6561769247055054, "learning_rate": 2.719662901560609e-06, "loss": 0.4529, "step": 29916 }, { "epoch": 0.77, "grad_norm": 4.760768890380859, "learning_rate": 2.7190938294027823e-06, "loss": 0.5191, "step": 29917 }, { "epoch": 0.77, "grad_norm": 1.7560172080993652, "learning_rate": 2.7185248074203873e-06, "loss": 0.4244, "step": 29918 }, { "epoch": 0.77, "grad_norm": 0.8265730142593384, "learning_rate": 2.717955835617345e-06, "loss": 0.4156, "step": 29919 }, { "epoch": 0.77, "grad_norm": 2.6256263256073, "learning_rate": 2.717386913997575e-06, "loss": 0.6044, "step": 29920 }, { "epoch": 0.77, "grad_norm": 1.8108346462249756, "learning_rate": 2.7168180425649972e-06, "loss": 0.656, "step": 29921 }, { "epoch": 0.77, "grad_norm": 1.2690848112106323, "learning_rate": 2.7162492213235357e-06, "loss": 0.5485, "step": 29922 }, { "epoch": 0.77, "grad_norm": 1.2307848930358887, "learning_rate": 2.7156804502771082e-06, "loss": 0.405, "step": 29923 }, { "epoch": 0.77, "grad_norm": 1.9367018938064575, "learning_rate": 2.7151117294296326e-06, "loss": 0.5718, "step": 29924 }, { "epoch": 0.77, "grad_norm": 1.2153316736221313, "learning_rate": 2.714543058785033e-06, "loss": 0.4267, "step": 29925 }, { "epoch": 0.77, "grad_norm": 2.308609962463379, "learning_rate": 2.7139744383472255e-06, "loss": 0.3739, "step": 29926 }, { "epoch": 0.77, "grad_norm": 3.8537614345550537, "learning_rate": 2.7134058681201248e-06, "loss": 0.3755, "step": 29927 }, { "epoch": 0.77, "grad_norm": 1.3935939073562622, "learning_rate": 2.7128373481076562e-06, "loss": 0.3469, "step": 29928 }, { "epoch": 0.77, "grad_norm": 2.0803823471069336, "learning_rate": 2.7122688783137353e-06, "loss": 0.5161, "step": 29929 }, { "epoch": 0.77, "grad_norm": 1.1321691274642944, "learning_rate": 2.711700458742278e-06, "loss": 0.4682, "step": 29930 }, { "epoch": 0.77, "grad_norm": 1.235922932624817, "learning_rate": 2.711132089397199e-06, "loss": 0.6481, "step": 29931 }, { "epoch": 0.77, "grad_norm": 1.1743066310882568, "learning_rate": 2.7105637702824215e-06, "loss": 0.4738, "step": 29932 }, { "epoch": 0.77, "grad_norm": 1.43656587600708, "learning_rate": 2.7099955014018587e-06, "loss": 0.4781, "step": 29933 }, { "epoch": 0.77, "grad_norm": 1.15287184715271, "learning_rate": 2.709427282759424e-06, "loss": 0.4469, "step": 29934 }, { "epoch": 0.77, "grad_norm": 5.545113563537598, "learning_rate": 2.708859114359039e-06, "loss": 0.4786, "step": 29935 }, { "epoch": 0.77, "grad_norm": 3.9017603397369385, "learning_rate": 2.708290996204617e-06, "loss": 0.9587, "step": 29936 }, { "epoch": 0.77, "grad_norm": 3.2575268745422363, "learning_rate": 2.7077229283000717e-06, "loss": 0.7638, "step": 29937 }, { "epoch": 0.77, "grad_norm": 1.6031416654586792, "learning_rate": 2.7071549106493158e-06, "loss": 0.5573, "step": 29938 }, { "epoch": 0.77, "grad_norm": 1.4307799339294434, "learning_rate": 2.7065869432562688e-06, "loss": 0.4888, "step": 29939 }, { "epoch": 0.77, "grad_norm": 1.310760736465454, "learning_rate": 2.706019026124843e-06, "loss": 0.6129, "step": 29940 }, { "epoch": 0.77, "grad_norm": 1.2873057126998901, "learning_rate": 2.7054511592589483e-06, "loss": 0.5536, "step": 29941 }, { "epoch": 0.77, "grad_norm": 1.4341537952423096, "learning_rate": 2.7048833426625043e-06, "loss": 0.6157, "step": 29942 }, { "epoch": 0.77, "grad_norm": 1.6514055728912354, "learning_rate": 2.704315576339421e-06, "loss": 0.5908, "step": 29943 }, { "epoch": 0.77, "grad_norm": 1.3677449226379395, "learning_rate": 2.7037478602936083e-06, "loss": 0.6177, "step": 29944 }, { "epoch": 0.77, "grad_norm": 3.896266222000122, "learning_rate": 2.7031801945289837e-06, "loss": 0.8868, "step": 29945 }, { "epoch": 0.77, "grad_norm": 1.078287959098816, "learning_rate": 2.7026125790494584e-06, "loss": 0.3529, "step": 29946 }, { "epoch": 0.77, "grad_norm": 1.0201696157455444, "learning_rate": 2.7020450138589416e-06, "loss": 0.4459, "step": 29947 }, { "epoch": 0.77, "grad_norm": 1.171871542930603, "learning_rate": 2.7014774989613424e-06, "loss": 0.5928, "step": 29948 }, { "epoch": 0.77, "grad_norm": 1.2937010526657104, "learning_rate": 2.7009100343605777e-06, "loss": 0.5768, "step": 29949 }, { "epoch": 0.77, "grad_norm": 1.2391239404678345, "learning_rate": 2.7003426200605563e-06, "loss": 0.3579, "step": 29950 }, { "epoch": 0.77, "grad_norm": 4.999057292938232, "learning_rate": 2.6997752560651834e-06, "loss": 0.6355, "step": 29951 }, { "epoch": 0.77, "grad_norm": 6.769836902618408, "learning_rate": 2.699207942378377e-06, "loss": 0.7631, "step": 29952 }, { "epoch": 0.77, "grad_norm": 1.997797966003418, "learning_rate": 2.698640679004042e-06, "loss": 0.5458, "step": 29953 }, { "epoch": 0.77, "grad_norm": 4.107271194458008, "learning_rate": 2.698073465946088e-06, "loss": 0.5452, "step": 29954 }, { "epoch": 0.77, "grad_norm": 1.0347261428833008, "learning_rate": 2.6975063032084227e-06, "loss": 0.4573, "step": 29955 }, { "epoch": 0.77, "grad_norm": 4.007130146026611, "learning_rate": 2.696939190794957e-06, "loss": 0.5415, "step": 29956 }, { "epoch": 0.77, "grad_norm": 2.180570363998413, "learning_rate": 2.696372128709599e-06, "loss": 0.5641, "step": 29957 }, { "epoch": 0.77, "grad_norm": 3.4744505882263184, "learning_rate": 2.6958051169562528e-06, "loss": 0.6144, "step": 29958 }, { "epoch": 0.77, "grad_norm": 2.358550786972046, "learning_rate": 2.695238155538832e-06, "loss": 0.4373, "step": 29959 }, { "epoch": 0.77, "grad_norm": 1.4723565578460693, "learning_rate": 2.694671244461241e-06, "loss": 0.5089, "step": 29960 }, { "epoch": 0.77, "grad_norm": 2.86921763420105, "learning_rate": 2.694104383727385e-06, "loss": 0.6152, "step": 29961 }, { "epoch": 0.77, "grad_norm": 7.315765857696533, "learning_rate": 2.6935375733411696e-06, "loss": 0.4074, "step": 29962 }, { "epoch": 0.77, "grad_norm": 1.9414423704147339, "learning_rate": 2.6929708133065046e-06, "loss": 0.5012, "step": 29963 }, { "epoch": 0.77, "grad_norm": 5.474132537841797, "learning_rate": 2.6924041036272952e-06, "loss": 0.5303, "step": 29964 }, { "epoch": 0.77, "grad_norm": 1.3592779636383057, "learning_rate": 2.6918374443074426e-06, "loss": 0.6246, "step": 29965 }, { "epoch": 0.77, "grad_norm": 1.731388807296753, "learning_rate": 2.6912708353508564e-06, "loss": 0.5379, "step": 29966 }, { "epoch": 0.77, "grad_norm": 1.516683578491211, "learning_rate": 2.690704276761441e-06, "loss": 0.5219, "step": 29967 }, { "epoch": 0.77, "grad_norm": 0.8212411999702454, "learning_rate": 2.6901377685430964e-06, "loss": 0.3676, "step": 29968 }, { "epoch": 0.77, "grad_norm": 5.42492151260376, "learning_rate": 2.6895713106997334e-06, "loss": 0.4655, "step": 29969 }, { "epoch": 0.77, "grad_norm": 3.7449951171875, "learning_rate": 2.6890049032352517e-06, "loss": 0.7277, "step": 29970 }, { "epoch": 0.77, "grad_norm": 1.2201488018035889, "learning_rate": 2.6884385461535546e-06, "loss": 0.4376, "step": 29971 }, { "epoch": 0.77, "grad_norm": 1.3879179954528809, "learning_rate": 2.687872239458542e-06, "loss": 0.5344, "step": 29972 }, { "epoch": 0.77, "grad_norm": 3.2369329929351807, "learning_rate": 2.687305983154124e-06, "loss": 0.5536, "step": 29973 }, { "epoch": 0.77, "grad_norm": 14.260916709899902, "learning_rate": 2.686739777244198e-06, "loss": 0.5029, "step": 29974 }, { "epoch": 0.77, "grad_norm": 1.1542913913726807, "learning_rate": 2.686173621732665e-06, "loss": 0.5247, "step": 29975 }, { "epoch": 0.77, "grad_norm": 3.5465660095214844, "learning_rate": 2.685607516623431e-06, "loss": 0.7183, "step": 29976 }, { "epoch": 0.77, "grad_norm": 1.2885421514511108, "learning_rate": 2.6850414619203945e-06, "loss": 0.6888, "step": 29977 }, { "epoch": 0.77, "grad_norm": 1.6483180522918701, "learning_rate": 2.6844754576274567e-06, "loss": 0.5916, "step": 29978 }, { "epoch": 0.77, "grad_norm": 1.790342092514038, "learning_rate": 2.6839095037485142e-06, "loss": 0.5496, "step": 29979 }, { "epoch": 0.77, "grad_norm": 3.0756523609161377, "learning_rate": 2.6833436002874745e-06, "loss": 0.5485, "step": 29980 }, { "epoch": 0.77, "grad_norm": 1.798466444015503, "learning_rate": 2.6827777472482343e-06, "loss": 0.5646, "step": 29981 }, { "epoch": 0.77, "grad_norm": 1.7238608598709106, "learning_rate": 2.6822119446346895e-06, "loss": 0.5664, "step": 29982 }, { "epoch": 0.77, "grad_norm": 5.79709005355835, "learning_rate": 2.6816461924507453e-06, "loss": 0.4567, "step": 29983 }, { "epoch": 0.77, "grad_norm": 1.824395775794983, "learning_rate": 2.6810804907002974e-06, "loss": 0.6169, "step": 29984 }, { "epoch": 0.77, "grad_norm": 1.3743054866790771, "learning_rate": 2.6805148393872416e-06, "loss": 0.4618, "step": 29985 }, { "epoch": 0.77, "grad_norm": 3.3755829334259033, "learning_rate": 2.6799492385154823e-06, "loss": 0.5343, "step": 29986 }, { "epoch": 0.77, "grad_norm": 3.5921168327331543, "learning_rate": 2.6793836880889146e-06, "loss": 0.6356, "step": 29987 }, { "epoch": 0.77, "grad_norm": 0.8683305978775024, "learning_rate": 2.6788181881114352e-06, "loss": 0.4013, "step": 29988 }, { "epoch": 0.77, "grad_norm": 0.9776812195777893, "learning_rate": 2.678252738586937e-06, "loss": 0.4573, "step": 29989 }, { "epoch": 0.77, "grad_norm": 1.7753006219863892, "learning_rate": 2.677687339519325e-06, "loss": 0.5452, "step": 29990 }, { "epoch": 0.77, "grad_norm": 1.7617454528808594, "learning_rate": 2.6771219909124913e-06, "loss": 0.5879, "step": 29991 }, { "epoch": 0.77, "grad_norm": 2.1146440505981445, "learning_rate": 2.676556692770328e-06, "loss": 0.6928, "step": 29992 }, { "epoch": 0.77, "grad_norm": 1.2513900995254517, "learning_rate": 2.6759914450967393e-06, "loss": 0.4109, "step": 29993 }, { "epoch": 0.77, "grad_norm": 1.7817955017089844, "learning_rate": 2.675426247895616e-06, "loss": 0.4457, "step": 29994 }, { "epoch": 0.77, "grad_norm": 1.528730869293213, "learning_rate": 2.6748611011708527e-06, "loss": 0.4983, "step": 29995 }, { "epoch": 0.77, "grad_norm": 1.3511936664581299, "learning_rate": 2.674296004926342e-06, "loss": 0.5796, "step": 29996 }, { "epoch": 0.77, "grad_norm": 1.260776400566101, "learning_rate": 2.6737309591659842e-06, "loss": 0.4188, "step": 29997 }, { "epoch": 0.77, "grad_norm": 1.5849634408950806, "learning_rate": 2.6731659638936693e-06, "loss": 0.423, "step": 29998 }, { "epoch": 0.77, "grad_norm": 1.137657642364502, "learning_rate": 2.6726010191132877e-06, "loss": 0.4686, "step": 29999 }, { "epoch": 0.77, "grad_norm": 1.2981380224227905, "learning_rate": 2.6720361248287406e-06, "loss": 0.3939, "step": 30000 }, { "epoch": 0.77, "grad_norm": 1.318981409072876, "learning_rate": 2.6714712810439158e-06, "loss": 0.4394, "step": 30001 }, { "epoch": 0.77, "grad_norm": 1.369205355644226, "learning_rate": 2.670906487762708e-06, "loss": 0.5219, "step": 30002 }, { "epoch": 0.77, "grad_norm": 6.558651924133301, "learning_rate": 2.6703417449890036e-06, "loss": 0.6055, "step": 30003 }, { "epoch": 0.77, "grad_norm": 1.916815161705017, "learning_rate": 2.6697770527267018e-06, "loss": 0.5947, "step": 30004 }, { "epoch": 0.77, "grad_norm": 1.1499191522598267, "learning_rate": 2.6692124109796924e-06, "loss": 0.4739, "step": 30005 }, { "epoch": 0.77, "grad_norm": 1.3749032020568848, "learning_rate": 2.6686478197518617e-06, "loss": 0.4195, "step": 30006 }, { "epoch": 0.77, "grad_norm": 2.749065399169922, "learning_rate": 2.6680832790471077e-06, "loss": 0.4603, "step": 30007 }, { "epoch": 0.77, "grad_norm": 1.117394208908081, "learning_rate": 2.667518788869318e-06, "loss": 0.5549, "step": 30008 }, { "epoch": 0.77, "grad_norm": 1.0756068229675293, "learning_rate": 2.666954349222377e-06, "loss": 0.5406, "step": 30009 }, { "epoch": 0.77, "grad_norm": 1.696825385093689, "learning_rate": 2.6663899601101837e-06, "loss": 0.5841, "step": 30010 }, { "epoch": 0.77, "grad_norm": 1.3014696836471558, "learning_rate": 2.665825621536624e-06, "loss": 0.5121, "step": 30011 }, { "epoch": 0.77, "grad_norm": 2.157046318054199, "learning_rate": 2.6652613335055854e-06, "loss": 0.582, "step": 30012 }, { "epoch": 0.77, "grad_norm": 3.8175759315490723, "learning_rate": 2.664697096020955e-06, "loss": 0.6134, "step": 30013 }, { "epoch": 0.77, "grad_norm": 1.1254559755325317, "learning_rate": 2.6641329090866276e-06, "loss": 0.5803, "step": 30014 }, { "epoch": 0.77, "grad_norm": 4.239832878112793, "learning_rate": 2.6635687727064865e-06, "loss": 0.656, "step": 30015 }, { "epoch": 0.77, "grad_norm": 1.3045865297317505, "learning_rate": 2.6630046868844163e-06, "loss": 0.3838, "step": 30016 }, { "epoch": 0.77, "grad_norm": 1.6177395582199097, "learning_rate": 2.6624406516243127e-06, "loss": 0.5208, "step": 30017 }, { "epoch": 0.77, "grad_norm": 3.426016092300415, "learning_rate": 2.6618766669300576e-06, "loss": 0.6038, "step": 30018 }, { "epoch": 0.77, "grad_norm": 1.8785436153411865, "learning_rate": 2.6613127328055375e-06, "loss": 0.4586, "step": 30019 }, { "epoch": 0.77, "grad_norm": 1.1194568872451782, "learning_rate": 2.6607488492546374e-06, "loss": 0.5348, "step": 30020 }, { "epoch": 0.77, "grad_norm": 2.2807562351226807, "learning_rate": 2.660185016281248e-06, "loss": 0.5635, "step": 30021 }, { "epoch": 0.77, "grad_norm": 1.60530424118042, "learning_rate": 2.659621233889252e-06, "loss": 0.5143, "step": 30022 }, { "epoch": 0.77, "grad_norm": 1.238850712776184, "learning_rate": 2.6590575020825326e-06, "loss": 0.5069, "step": 30023 }, { "epoch": 0.77, "grad_norm": 1.0205649137496948, "learning_rate": 2.658493820864979e-06, "loss": 0.4576, "step": 30024 }, { "epoch": 0.77, "grad_norm": 1.2569818496704102, "learning_rate": 2.657930190240474e-06, "loss": 0.5257, "step": 30025 }, { "epoch": 0.77, "grad_norm": 1.5895768404006958, "learning_rate": 2.6573666102128983e-06, "loss": 0.4338, "step": 30026 }, { "epoch": 0.77, "grad_norm": 1.3559892177581787, "learning_rate": 2.656803080786142e-06, "loss": 0.6169, "step": 30027 }, { "epoch": 0.77, "grad_norm": 4.245758056640625, "learning_rate": 2.6562396019640845e-06, "loss": 0.6746, "step": 30028 }, { "epoch": 0.77, "grad_norm": 1.4905248880386353, "learning_rate": 2.655676173750611e-06, "loss": 0.4914, "step": 30029 }, { "epoch": 0.77, "grad_norm": 1.5249298810958862, "learning_rate": 2.655112796149599e-06, "loss": 0.5175, "step": 30030 }, { "epoch": 0.77, "grad_norm": 1.5998728275299072, "learning_rate": 2.6545494691649397e-06, "loss": 0.5475, "step": 30031 }, { "epoch": 0.77, "grad_norm": 2.031280279159546, "learning_rate": 2.6539861928005095e-06, "loss": 0.7696, "step": 30032 }, { "epoch": 0.77, "grad_norm": 5.518406867980957, "learning_rate": 2.6534229670601887e-06, "loss": 0.5635, "step": 30033 }, { "epoch": 0.77, "grad_norm": 1.2958998680114746, "learning_rate": 2.6528597919478637e-06, "loss": 0.4592, "step": 30034 }, { "epoch": 0.77, "grad_norm": 1.7310733795166016, "learning_rate": 2.6522966674674134e-06, "loss": 0.4949, "step": 30035 }, { "epoch": 0.77, "grad_norm": 1.3230913877487183, "learning_rate": 2.6517335936227185e-06, "loss": 0.5965, "step": 30036 }, { "epoch": 0.77, "grad_norm": 1.0865756273269653, "learning_rate": 2.651170570417655e-06, "loss": 0.3545, "step": 30037 }, { "epoch": 0.77, "grad_norm": 1.5483936071395874, "learning_rate": 2.6506075978561106e-06, "loss": 0.5324, "step": 30038 }, { "epoch": 0.77, "grad_norm": 1.648349404335022, "learning_rate": 2.6500446759419616e-06, "loss": 0.4932, "step": 30039 }, { "epoch": 0.77, "grad_norm": 1.3108075857162476, "learning_rate": 2.649481804679084e-06, "loss": 0.4943, "step": 30040 }, { "epoch": 0.77, "grad_norm": 0.8801438212394714, "learning_rate": 2.6489189840713625e-06, "loss": 0.4449, "step": 30041 }, { "epoch": 0.77, "grad_norm": 1.5916342735290527, "learning_rate": 2.6483562141226725e-06, "loss": 0.6458, "step": 30042 }, { "epoch": 0.77, "grad_norm": 1.6032038927078247, "learning_rate": 2.6477934948368933e-06, "loss": 0.5708, "step": 30043 }, { "epoch": 0.77, "grad_norm": 2.3202481269836426, "learning_rate": 2.6472308262178993e-06, "loss": 0.7059, "step": 30044 }, { "epoch": 0.77, "grad_norm": 1.4265748262405396, "learning_rate": 2.6466682082695736e-06, "loss": 0.5829, "step": 30045 }, { "epoch": 0.77, "grad_norm": 3.4923720359802246, "learning_rate": 2.6461056409957907e-06, "loss": 0.6076, "step": 30046 }, { "epoch": 0.77, "grad_norm": 9.21030330657959, "learning_rate": 2.6455431244004247e-06, "loss": 0.611, "step": 30047 }, { "epoch": 0.77, "grad_norm": 1.4090982675552368, "learning_rate": 2.644980658487358e-06, "loss": 0.6619, "step": 30048 }, { "epoch": 0.77, "grad_norm": 1.7611205577850342, "learning_rate": 2.644418243260465e-06, "loss": 0.5358, "step": 30049 }, { "epoch": 0.77, "grad_norm": 1.8437520265579224, "learning_rate": 2.6438558787236156e-06, "loss": 0.5689, "step": 30050 }, { "epoch": 0.77, "grad_norm": 1.0305302143096924, "learning_rate": 2.6432935648806933e-06, "loss": 0.5549, "step": 30051 }, { "epoch": 0.77, "grad_norm": 2.4047577381134033, "learning_rate": 2.6427313017355694e-06, "loss": 0.5049, "step": 30052 }, { "epoch": 0.77, "grad_norm": 1.6754943132400513, "learning_rate": 2.6421690892921204e-06, "loss": 0.4781, "step": 30053 }, { "epoch": 0.77, "grad_norm": 1.9034878015518188, "learning_rate": 2.6416069275542154e-06, "loss": 0.535, "step": 30054 }, { "epoch": 0.77, "grad_norm": 1.1004977226257324, "learning_rate": 2.6410448165257353e-06, "loss": 0.5462, "step": 30055 }, { "epoch": 0.77, "grad_norm": 5.742517948150635, "learning_rate": 2.6404827562105517e-06, "loss": 0.7102, "step": 30056 }, { "epoch": 0.77, "grad_norm": 1.3514013290405273, "learning_rate": 2.639920746612533e-06, "loss": 0.5164, "step": 30057 }, { "epoch": 0.77, "grad_norm": 4.6188249588012695, "learning_rate": 2.639358787735561e-06, "loss": 0.6534, "step": 30058 }, { "epoch": 0.77, "grad_norm": 1.3799325227737427, "learning_rate": 2.6387968795835026e-06, "loss": 0.4618, "step": 30059 }, { "epoch": 0.77, "grad_norm": 2.1530609130859375, "learning_rate": 2.6382350221602317e-06, "loss": 0.5217, "step": 30060 }, { "epoch": 0.77, "grad_norm": 1.2418051958084106, "learning_rate": 2.637673215469617e-06, "loss": 0.4844, "step": 30061 }, { "epoch": 0.77, "grad_norm": 1.613411545753479, "learning_rate": 2.6371114595155367e-06, "loss": 0.53, "step": 30062 }, { "epoch": 0.77, "grad_norm": 1.3366508483886719, "learning_rate": 2.636549754301857e-06, "loss": 0.5612, "step": 30063 }, { "epoch": 0.77, "grad_norm": 2.9516336917877197, "learning_rate": 2.6359880998324483e-06, "loss": 0.6061, "step": 30064 }, { "epoch": 0.77, "grad_norm": 1.7974517345428467, "learning_rate": 2.6354264961111864e-06, "loss": 0.6696, "step": 30065 }, { "epoch": 0.77, "grad_norm": 2.9186015129089355, "learning_rate": 2.6348649431419373e-06, "loss": 0.8019, "step": 30066 }, { "epoch": 0.77, "grad_norm": 1.3165078163146973, "learning_rate": 2.634303440928572e-06, "loss": 0.539, "step": 30067 }, { "epoch": 0.77, "grad_norm": 7.1504950523376465, "learning_rate": 2.6337419894749572e-06, "loss": 0.6446, "step": 30068 }, { "epoch": 0.77, "grad_norm": 1.1816644668579102, "learning_rate": 2.633180588784967e-06, "loss": 0.4364, "step": 30069 }, { "epoch": 0.77, "grad_norm": 2.066802978515625, "learning_rate": 2.632619238862468e-06, "loss": 0.6523, "step": 30070 }, { "epoch": 0.77, "grad_norm": 2.5092825889587402, "learning_rate": 2.6320579397113266e-06, "loss": 0.6688, "step": 30071 }, { "epoch": 0.77, "grad_norm": 1.3423199653625488, "learning_rate": 2.6314966913354146e-06, "loss": 0.4339, "step": 30072 }, { "epoch": 0.77, "grad_norm": 1.8482064008712769, "learning_rate": 2.6309354937385987e-06, "loss": 0.4436, "step": 30073 }, { "epoch": 0.77, "grad_norm": 1.9050962924957275, "learning_rate": 2.6303743469247425e-06, "loss": 0.4896, "step": 30074 }, { "epoch": 0.77, "grad_norm": 1.4077996015548706, "learning_rate": 2.629813250897719e-06, "loss": 0.5485, "step": 30075 }, { "epoch": 0.77, "grad_norm": 1.0242770910263062, "learning_rate": 2.6292522056613923e-06, "loss": 0.5357, "step": 30076 }, { "epoch": 0.77, "grad_norm": 2.9544546604156494, "learning_rate": 2.6286912112196296e-06, "loss": 0.6542, "step": 30077 }, { "epoch": 0.77, "grad_norm": 1.3261337280273438, "learning_rate": 2.628130267576292e-06, "loss": 0.4679, "step": 30078 }, { "epoch": 0.77, "grad_norm": 3.344550609588623, "learning_rate": 2.6275693747352527e-06, "loss": 0.7974, "step": 30079 }, { "epoch": 0.77, "grad_norm": 1.930869221687317, "learning_rate": 2.6270085327003726e-06, "loss": 0.6217, "step": 30080 }, { "epoch": 0.77, "grad_norm": 1.0179744958877563, "learning_rate": 2.6264477414755152e-06, "loss": 0.4703, "step": 30081 }, { "epoch": 0.77, "grad_norm": 1.628685712814331, "learning_rate": 2.6258870010645497e-06, "loss": 0.5857, "step": 30082 }, { "epoch": 0.77, "grad_norm": 1.497239589691162, "learning_rate": 2.625326311471339e-06, "loss": 0.5066, "step": 30083 }, { "epoch": 0.77, "grad_norm": 1.459542155265808, "learning_rate": 2.624765672699746e-06, "loss": 0.4727, "step": 30084 }, { "epoch": 0.77, "grad_norm": 1.756608247756958, "learning_rate": 2.6242050847536306e-06, "loss": 0.4844, "step": 30085 }, { "epoch": 0.77, "grad_norm": 4.844832420349121, "learning_rate": 2.6236445476368634e-06, "loss": 0.4887, "step": 30086 }, { "epoch": 0.77, "grad_norm": 2.7667171955108643, "learning_rate": 2.623084061353304e-06, "loss": 0.5262, "step": 30087 }, { "epoch": 0.77, "grad_norm": 2.004992723464966, "learning_rate": 2.622523625906811e-06, "loss": 0.3409, "step": 30088 }, { "epoch": 0.77, "grad_norm": 2.3066771030426025, "learning_rate": 2.621963241301254e-06, "loss": 0.4734, "step": 30089 }, { "epoch": 0.77, "grad_norm": 1.5870801210403442, "learning_rate": 2.62140290754049e-06, "loss": 0.5977, "step": 30090 }, { "epoch": 0.77, "grad_norm": 1.208908200263977, "learning_rate": 2.620842624628379e-06, "loss": 0.5272, "step": 30091 }, { "epoch": 0.77, "grad_norm": 1.6532597541809082, "learning_rate": 2.6202823925687883e-06, "loss": 0.5141, "step": 30092 }, { "epoch": 0.77, "grad_norm": 4.8770527839660645, "learning_rate": 2.6197222113655742e-06, "loss": 0.4629, "step": 30093 }, { "epoch": 0.77, "grad_norm": 1.464432954788208, "learning_rate": 2.619162081022597e-06, "loss": 0.6035, "step": 30094 }, { "epoch": 0.77, "grad_norm": 1.392240285873413, "learning_rate": 2.6186020015437166e-06, "loss": 0.5445, "step": 30095 }, { "epoch": 0.77, "grad_norm": 1.6073979139328003, "learning_rate": 2.6180419729327954e-06, "loss": 0.509, "step": 30096 }, { "epoch": 0.77, "grad_norm": 19.12046241760254, "learning_rate": 2.6174819951936914e-06, "loss": 0.5172, "step": 30097 }, { "epoch": 0.77, "grad_norm": 10.498809814453125, "learning_rate": 2.6169220683302597e-06, "loss": 0.4991, "step": 30098 }, { "epoch": 0.77, "grad_norm": 1.1500494480133057, "learning_rate": 2.6163621923463667e-06, "loss": 0.5467, "step": 30099 }, { "epoch": 0.77, "grad_norm": 9.73051929473877, "learning_rate": 2.6158023672458664e-06, "loss": 0.6429, "step": 30100 }, { "epoch": 0.77, "grad_norm": 1.1580467224121094, "learning_rate": 2.6152425930326165e-06, "loss": 0.5868, "step": 30101 }, { "epoch": 0.77, "grad_norm": 4.225340843200684, "learning_rate": 2.614682869710472e-06, "loss": 0.7494, "step": 30102 }, { "epoch": 0.77, "grad_norm": 1.6895028352737427, "learning_rate": 2.614123197283297e-06, "loss": 0.558, "step": 30103 }, { "epoch": 0.77, "grad_norm": 1.33345627784729, "learning_rate": 2.613563575754944e-06, "loss": 0.5416, "step": 30104 }, { "epoch": 0.77, "grad_norm": 1.253987431526184, "learning_rate": 2.613004005129268e-06, "loss": 0.4823, "step": 30105 }, { "epoch": 0.77, "grad_norm": 11.63149642944336, "learning_rate": 2.612444485410129e-06, "loss": 0.5185, "step": 30106 }, { "epoch": 0.77, "grad_norm": 1.4876021146774292, "learning_rate": 2.611885016601383e-06, "loss": 0.5676, "step": 30107 }, { "epoch": 0.77, "grad_norm": 1.5026763677597046, "learning_rate": 2.611325598706883e-06, "loss": 0.5202, "step": 30108 }, { "epoch": 0.77, "grad_norm": 1.2930960655212402, "learning_rate": 2.6107662317304817e-06, "loss": 0.5258, "step": 30109 }, { "epoch": 0.77, "grad_norm": 2.947011709213257, "learning_rate": 2.61020691567604e-06, "loss": 0.6835, "step": 30110 }, { "epoch": 0.77, "grad_norm": 1.7181744575500488, "learning_rate": 2.609647650547411e-06, "loss": 0.3835, "step": 30111 }, { "epoch": 0.77, "grad_norm": 1.860788106918335, "learning_rate": 2.6090884363484424e-06, "loss": 0.5843, "step": 30112 }, { "epoch": 0.77, "grad_norm": 2.9278013706207275, "learning_rate": 2.608529273082997e-06, "loss": 0.5001, "step": 30113 }, { "epoch": 0.77, "grad_norm": 8.743322372436523, "learning_rate": 2.6079701607549236e-06, "loss": 0.5131, "step": 30114 }, { "epoch": 0.77, "grad_norm": 1.5856519937515259, "learning_rate": 2.6074110993680734e-06, "loss": 0.5415, "step": 30115 }, { "epoch": 0.77, "grad_norm": 4.556816101074219, "learning_rate": 2.6068520889263036e-06, "loss": 0.6343, "step": 30116 }, { "epoch": 0.77, "grad_norm": 3.6741883754730225, "learning_rate": 2.6062931294334648e-06, "loss": 0.4713, "step": 30117 }, { "epoch": 0.77, "grad_norm": 1.9210739135742188, "learning_rate": 2.6057342208934088e-06, "loss": 0.7093, "step": 30118 }, { "epoch": 0.77, "grad_norm": 1.692689061164856, "learning_rate": 2.605175363309983e-06, "loss": 0.4609, "step": 30119 }, { "epoch": 0.77, "grad_norm": 4.901512622833252, "learning_rate": 2.6046165566870474e-06, "loss": 0.6858, "step": 30120 }, { "epoch": 0.77, "grad_norm": 1.1391721963882446, "learning_rate": 2.6040578010284467e-06, "loss": 0.5637, "step": 30121 }, { "epoch": 0.77, "grad_norm": 1.978173017501831, "learning_rate": 2.6034990963380304e-06, "loss": 0.6015, "step": 30122 }, { "epoch": 0.77, "grad_norm": 1.369746208190918, "learning_rate": 2.6029404426196546e-06, "loss": 0.4832, "step": 30123 }, { "epoch": 0.77, "grad_norm": 5.474610805511475, "learning_rate": 2.6023818398771663e-06, "loss": 0.5683, "step": 30124 }, { "epoch": 0.77, "grad_norm": 1.397032618522644, "learning_rate": 2.6018232881144147e-06, "loss": 0.4732, "step": 30125 }, { "epoch": 0.77, "grad_norm": 2.6044111251831055, "learning_rate": 2.6012647873352457e-06, "loss": 0.5129, "step": 30126 }, { "epoch": 0.77, "grad_norm": 9.815765380859375, "learning_rate": 2.600706337543514e-06, "loss": 0.5274, "step": 30127 }, { "epoch": 0.77, "grad_norm": 1.3019518852233887, "learning_rate": 2.600147938743066e-06, "loss": 0.6253, "step": 30128 }, { "epoch": 0.77, "grad_norm": 2.9583053588867188, "learning_rate": 2.5995895909377466e-06, "loss": 0.5313, "step": 30129 }, { "epoch": 0.77, "grad_norm": 6.4152607917785645, "learning_rate": 2.599031294131409e-06, "loss": 0.5403, "step": 30130 }, { "epoch": 0.77, "grad_norm": 1.5900505781173706, "learning_rate": 2.5984730483278986e-06, "loss": 0.5159, "step": 30131 }, { "epoch": 0.77, "grad_norm": 1.033366084098816, "learning_rate": 2.5979148535310585e-06, "loss": 0.4103, "step": 30132 }, { "epoch": 0.77, "grad_norm": 1.356473445892334, "learning_rate": 2.5973567097447405e-06, "loss": 0.5239, "step": 30133 }, { "epoch": 0.77, "grad_norm": 3.461251974105835, "learning_rate": 2.596798616972791e-06, "loss": 0.5312, "step": 30134 }, { "epoch": 0.77, "grad_norm": 1.121619462966919, "learning_rate": 2.5962405752190534e-06, "loss": 0.5252, "step": 30135 }, { "epoch": 0.77, "grad_norm": 1.1706349849700928, "learning_rate": 2.5956825844873712e-06, "loss": 0.5222, "step": 30136 }, { "epoch": 0.77, "grad_norm": 1.862504005432129, "learning_rate": 2.595124644781597e-06, "loss": 0.5284, "step": 30137 }, { "epoch": 0.77, "grad_norm": 1.7151743173599243, "learning_rate": 2.5945667561055697e-06, "loss": 0.5295, "step": 30138 }, { "epoch": 0.77, "grad_norm": 2.014918088912964, "learning_rate": 2.5940089184631343e-06, "loss": 0.5834, "step": 30139 }, { "epoch": 0.77, "grad_norm": 1.7054353952407837, "learning_rate": 2.593451131858139e-06, "loss": 0.642, "step": 30140 }, { "epoch": 0.77, "grad_norm": 4.185184955596924, "learning_rate": 2.592893396294425e-06, "loss": 0.4633, "step": 30141 }, { "epoch": 0.77, "grad_norm": 1.3346772193908691, "learning_rate": 2.5923357117758362e-06, "loss": 0.4418, "step": 30142 }, { "epoch": 0.77, "grad_norm": 2.126997709274292, "learning_rate": 2.5917780783062128e-06, "loss": 0.6535, "step": 30143 }, { "epoch": 0.77, "grad_norm": 1.1475673913955688, "learning_rate": 2.591220495889404e-06, "loss": 0.5089, "step": 30144 }, { "epoch": 0.77, "grad_norm": 1.1404943466186523, "learning_rate": 2.590662964529248e-06, "loss": 0.4973, "step": 30145 }, { "epoch": 0.77, "grad_norm": 1.2079650163650513, "learning_rate": 2.5901054842295857e-06, "loss": 0.5492, "step": 30146 }, { "epoch": 0.77, "grad_norm": 1.6261931657791138, "learning_rate": 2.589548054994264e-06, "loss": 0.6184, "step": 30147 }, { "epoch": 0.77, "grad_norm": 1.437142014503479, "learning_rate": 2.588990676827121e-06, "loss": 0.4618, "step": 30148 }, { "epoch": 0.77, "grad_norm": 1.9941879510879517, "learning_rate": 2.588433349731998e-06, "loss": 0.5478, "step": 30149 }, { "epoch": 0.77, "grad_norm": 2.2931313514709473, "learning_rate": 2.587876073712734e-06, "loss": 0.5497, "step": 30150 }, { "epoch": 0.77, "grad_norm": 1.3212283849716187, "learning_rate": 2.5873188487731738e-06, "loss": 0.4694, "step": 30151 }, { "epoch": 0.77, "grad_norm": 1.3392716646194458, "learning_rate": 2.586761674917154e-06, "loss": 0.5942, "step": 30152 }, { "epoch": 0.77, "grad_norm": 1.4166414737701416, "learning_rate": 2.5862045521485136e-06, "loss": 0.4957, "step": 30153 }, { "epoch": 0.77, "grad_norm": 1.7716678380966187, "learning_rate": 2.585647480471096e-06, "loss": 0.518, "step": 30154 }, { "epoch": 0.77, "grad_norm": 4.330199718475342, "learning_rate": 2.585090459888738e-06, "loss": 0.5346, "step": 30155 }, { "epoch": 0.77, "grad_norm": 0.9702966809272766, "learning_rate": 2.5845334904052745e-06, "loss": 0.4984, "step": 30156 }, { "epoch": 0.77, "grad_norm": 0.982071042060852, "learning_rate": 2.583976572024551e-06, "loss": 0.4146, "step": 30157 }, { "epoch": 0.77, "grad_norm": 10.187097549438477, "learning_rate": 2.5834197047504018e-06, "loss": 0.562, "step": 30158 }, { "epoch": 0.77, "grad_norm": 1.775631308555603, "learning_rate": 2.5828628885866647e-06, "loss": 0.559, "step": 30159 }, { "epoch": 0.77, "grad_norm": 1.42403244972229, "learning_rate": 2.5823061235371727e-06, "loss": 0.5479, "step": 30160 }, { "epoch": 0.77, "grad_norm": 1.4591190814971924, "learning_rate": 2.5817494096057727e-06, "loss": 0.6161, "step": 30161 }, { "epoch": 0.77, "grad_norm": 8.234416007995605, "learning_rate": 2.5811927467962907e-06, "loss": 0.6436, "step": 30162 }, { "epoch": 0.77, "grad_norm": 9.161039352416992, "learning_rate": 2.5806361351125688e-06, "loss": 0.6509, "step": 30163 }, { "epoch": 0.77, "grad_norm": 3.038285493850708, "learning_rate": 2.5800795745584415e-06, "loss": 0.5352, "step": 30164 }, { "epoch": 0.77, "grad_norm": 2.1398463249206543, "learning_rate": 2.5795230651377447e-06, "loss": 0.7529, "step": 30165 }, { "epoch": 0.77, "grad_norm": 1.7681294679641724, "learning_rate": 2.5789666068543094e-06, "loss": 0.497, "step": 30166 }, { "epoch": 0.77, "grad_norm": 1.5158308744430542, "learning_rate": 2.578410199711977e-06, "loss": 0.4259, "step": 30167 }, { "epoch": 0.77, "grad_norm": 5.239327907562256, "learning_rate": 2.5778538437145793e-06, "loss": 0.438, "step": 30168 }, { "epoch": 0.77, "grad_norm": 1.0886874198913574, "learning_rate": 2.577297538865946e-06, "loss": 0.5973, "step": 30169 }, { "epoch": 0.77, "grad_norm": 1.2814942598342896, "learning_rate": 2.576741285169918e-06, "loss": 0.5842, "step": 30170 }, { "epoch": 0.77, "grad_norm": 1.2720204591751099, "learning_rate": 2.576185082630326e-06, "loss": 0.4382, "step": 30171 }, { "epoch": 0.77, "grad_norm": 2.1844217777252197, "learning_rate": 2.575628931251002e-06, "loss": 0.5845, "step": 30172 }, { "epoch": 0.77, "grad_norm": 1.5995726585388184, "learning_rate": 2.5750728310357752e-06, "loss": 0.5979, "step": 30173 }, { "epoch": 0.77, "grad_norm": 1.1319575309753418, "learning_rate": 2.574516781988485e-06, "loss": 0.473, "step": 30174 }, { "epoch": 0.77, "grad_norm": 5.2121500968933105, "learning_rate": 2.573960784112961e-06, "loss": 0.4727, "step": 30175 }, { "epoch": 0.77, "grad_norm": 1.277182698249817, "learning_rate": 2.5734048374130292e-06, "loss": 0.4029, "step": 30176 }, { "epoch": 0.77, "grad_norm": 1.4951387643814087, "learning_rate": 2.5728489418925293e-06, "loss": 0.3952, "step": 30177 }, { "epoch": 0.77, "grad_norm": 1.236492395401001, "learning_rate": 2.5722930975552883e-06, "loss": 0.6174, "step": 30178 }, { "epoch": 0.77, "grad_norm": 1.5645028352737427, "learning_rate": 2.5717373044051376e-06, "loss": 0.5849, "step": 30179 }, { "epoch": 0.77, "grad_norm": 6.239796161651611, "learning_rate": 2.5711815624459026e-06, "loss": 0.5911, "step": 30180 }, { "epoch": 0.77, "grad_norm": 1.332540512084961, "learning_rate": 2.5706258716814193e-06, "loss": 0.4736, "step": 30181 }, { "epoch": 0.77, "grad_norm": 0.8311219811439514, "learning_rate": 2.5700702321155156e-06, "loss": 0.4883, "step": 30182 }, { "epoch": 0.77, "grad_norm": 1.0882294178009033, "learning_rate": 2.569514643752017e-06, "loss": 0.5582, "step": 30183 }, { "epoch": 0.77, "grad_norm": 1.640255093574524, "learning_rate": 2.568959106594758e-06, "loss": 0.5684, "step": 30184 }, { "epoch": 0.77, "grad_norm": 1.7553091049194336, "learning_rate": 2.5684036206475637e-06, "loss": 0.5962, "step": 30185 }, { "epoch": 0.77, "grad_norm": 1.51375412940979, "learning_rate": 2.567848185914261e-06, "loss": 0.4351, "step": 30186 }, { "epoch": 0.77, "grad_norm": 1.2317756414413452, "learning_rate": 2.567292802398681e-06, "loss": 0.4398, "step": 30187 }, { "epoch": 0.77, "grad_norm": 1.2926114797592163, "learning_rate": 2.5667374701046498e-06, "loss": 0.5012, "step": 30188 }, { "epoch": 0.77, "grad_norm": 1.4013826847076416, "learning_rate": 2.566182189035994e-06, "loss": 0.4893, "step": 30189 }, { "epoch": 0.77, "grad_norm": 1.1306017637252808, "learning_rate": 2.5656269591965376e-06, "loss": 0.4676, "step": 30190 }, { "epoch": 0.77, "grad_norm": 1.7807124853134155, "learning_rate": 2.5650717805901117e-06, "loss": 0.4551, "step": 30191 }, { "epoch": 0.77, "grad_norm": 2.5072755813598633, "learning_rate": 2.56451665322054e-06, "loss": 0.5139, "step": 30192 }, { "epoch": 0.77, "grad_norm": 1.8684190511703491, "learning_rate": 2.5639615770916458e-06, "loss": 0.678, "step": 30193 }, { "epoch": 0.77, "grad_norm": 1.3606040477752686, "learning_rate": 2.563406552207259e-06, "loss": 0.4928, "step": 30194 }, { "epoch": 0.77, "grad_norm": 1.6613234281539917, "learning_rate": 2.5628515785712026e-06, "loss": 0.5822, "step": 30195 }, { "epoch": 0.77, "grad_norm": 1.850940465927124, "learning_rate": 2.5622966561873008e-06, "loss": 0.4675, "step": 30196 }, { "epoch": 0.77, "grad_norm": 1.9508074522018433, "learning_rate": 2.561741785059374e-06, "loss": 0.6269, "step": 30197 }, { "epoch": 0.77, "grad_norm": 2.33146333694458, "learning_rate": 2.5611869651912526e-06, "loss": 0.5726, "step": 30198 }, { "epoch": 0.77, "grad_norm": 1.5072256326675415, "learning_rate": 2.5606321965867575e-06, "loss": 0.4378, "step": 30199 }, { "epoch": 0.77, "grad_norm": 1.0512956380844116, "learning_rate": 2.5600774792497087e-06, "loss": 0.4522, "step": 30200 }, { "epoch": 0.77, "grad_norm": 1.6634522676467896, "learning_rate": 2.5595228131839335e-06, "loss": 0.5972, "step": 30201 }, { "epoch": 0.77, "grad_norm": 1.2048841714859009, "learning_rate": 2.5589681983932537e-06, "loss": 0.5811, "step": 30202 }, { "epoch": 0.77, "grad_norm": 2.8554978370666504, "learning_rate": 2.558413634881489e-06, "loss": 0.5985, "step": 30203 }, { "epoch": 0.77, "grad_norm": 1.2854057550430298, "learning_rate": 2.557859122652461e-06, "loss": 0.5313, "step": 30204 }, { "epoch": 0.77, "grad_norm": 1.6835447549819946, "learning_rate": 2.557304661709994e-06, "loss": 0.5301, "step": 30205 }, { "epoch": 0.77, "grad_norm": 7.622346878051758, "learning_rate": 2.556750252057908e-06, "loss": 0.5056, "step": 30206 }, { "epoch": 0.77, "grad_norm": 1.5410329103469849, "learning_rate": 2.55619589370002e-06, "loss": 0.5443, "step": 30207 }, { "epoch": 0.77, "grad_norm": 1.5117093324661255, "learning_rate": 2.5556415866401553e-06, "loss": 0.4823, "step": 30208 }, { "epoch": 0.77, "grad_norm": 2.4632275104522705, "learning_rate": 2.555087330882132e-06, "loss": 0.5717, "step": 30209 }, { "epoch": 0.77, "grad_norm": 1.7660815715789795, "learning_rate": 2.5545331264297667e-06, "loss": 0.4854, "step": 30210 }, { "epoch": 0.77, "grad_norm": 0.9445012807846069, "learning_rate": 2.553978973286885e-06, "loss": 0.4351, "step": 30211 }, { "epoch": 0.77, "grad_norm": 1.4594855308532715, "learning_rate": 2.5534248714573018e-06, "loss": 0.5202, "step": 30212 }, { "epoch": 0.77, "grad_norm": 1.004288673400879, "learning_rate": 2.5528708209448362e-06, "loss": 0.5201, "step": 30213 }, { "epoch": 0.77, "grad_norm": 1.992905616760254, "learning_rate": 2.5523168217533024e-06, "loss": 0.4526, "step": 30214 }, { "epoch": 0.77, "grad_norm": 1.0628751516342163, "learning_rate": 2.551762873886525e-06, "loss": 0.5461, "step": 30215 }, { "epoch": 0.77, "grad_norm": 1.3825639486312866, "learning_rate": 2.5512089773483195e-06, "loss": 0.5696, "step": 30216 }, { "epoch": 0.77, "grad_norm": 1.8211534023284912, "learning_rate": 2.550655132142498e-06, "loss": 0.4587, "step": 30217 }, { "epoch": 0.77, "grad_norm": 1.3515456914901733, "learning_rate": 2.5501013382728847e-06, "loss": 0.5699, "step": 30218 }, { "epoch": 0.77, "grad_norm": 3.1018126010894775, "learning_rate": 2.549547595743291e-06, "loss": 0.577, "step": 30219 }, { "epoch": 0.77, "grad_norm": 1.4100006818771362, "learning_rate": 2.548993904557536e-06, "loss": 0.4944, "step": 30220 }, { "epoch": 0.77, "grad_norm": 1.9138137102127075, "learning_rate": 2.548440264719431e-06, "loss": 0.5623, "step": 30221 }, { "epoch": 0.77, "grad_norm": 8.847179412841797, "learning_rate": 2.547886676232797e-06, "loss": 0.5595, "step": 30222 }, { "epoch": 0.77, "grad_norm": 2.0160186290740967, "learning_rate": 2.5473331391014455e-06, "loss": 0.51, "step": 30223 }, { "epoch": 0.77, "grad_norm": 1.5273901224136353, "learning_rate": 2.546779653329189e-06, "loss": 0.4851, "step": 30224 }, { "epoch": 0.77, "grad_norm": 1.8226598501205444, "learning_rate": 2.5462262189198484e-06, "loss": 0.3549, "step": 30225 }, { "epoch": 0.77, "grad_norm": 1.7102888822555542, "learning_rate": 2.545672835877232e-06, "loss": 0.5237, "step": 30226 }, { "epoch": 0.77, "grad_norm": 2.4299018383026123, "learning_rate": 2.5451195042051535e-06, "loss": 0.5483, "step": 30227 }, { "epoch": 0.77, "grad_norm": 1.608965516090393, "learning_rate": 2.5445662239074308e-06, "loss": 0.511, "step": 30228 }, { "epoch": 0.77, "grad_norm": 1.9761948585510254, "learning_rate": 2.5440129949878734e-06, "loss": 0.5103, "step": 30229 }, { "epoch": 0.77, "grad_norm": 1.8661938905715942, "learning_rate": 2.5434598174502943e-06, "loss": 0.5415, "step": 30230 }, { "epoch": 0.77, "grad_norm": 6.714563846588135, "learning_rate": 2.5429066912985025e-06, "loss": 0.7018, "step": 30231 }, { "epoch": 0.77, "grad_norm": 3.221665382385254, "learning_rate": 2.542353616536315e-06, "loss": 0.3679, "step": 30232 }, { "epoch": 0.77, "grad_norm": 1.8669239282608032, "learning_rate": 2.5418005931675417e-06, "loss": 0.5875, "step": 30233 }, { "epoch": 0.77, "grad_norm": 1.7190765142440796, "learning_rate": 2.54124762119599e-06, "loss": 0.5401, "step": 30234 }, { "epoch": 0.77, "grad_norm": 5.525101661682129, "learning_rate": 2.5406947006254766e-06, "loss": 0.566, "step": 30235 }, { "epoch": 0.77, "grad_norm": 0.9558595418930054, "learning_rate": 2.5401418314598083e-06, "loss": 0.2984, "step": 30236 }, { "epoch": 0.77, "grad_norm": 1.3489876985549927, "learning_rate": 2.539589013702797e-06, "loss": 0.5324, "step": 30237 }, { "epoch": 0.78, "grad_norm": 4.450957298278809, "learning_rate": 2.5390362473582476e-06, "loss": 0.5354, "step": 30238 }, { "epoch": 0.78, "grad_norm": 0.9137552976608276, "learning_rate": 2.538483532429975e-06, "loss": 0.5629, "step": 30239 }, { "epoch": 0.78, "grad_norm": 0.9484407305717468, "learning_rate": 2.5379308689217875e-06, "loss": 0.2911, "step": 30240 }, { "epoch": 0.78, "grad_norm": 5.027024745941162, "learning_rate": 2.537378256837488e-06, "loss": 0.6384, "step": 30241 }, { "epoch": 0.78, "grad_norm": 2.2979471683502197, "learning_rate": 2.5368256961808937e-06, "loss": 0.4968, "step": 30242 }, { "epoch": 0.78, "grad_norm": 1.2943600416183472, "learning_rate": 2.5362731869558065e-06, "loss": 0.5195, "step": 30243 }, { "epoch": 0.78, "grad_norm": 2.1882596015930176, "learning_rate": 2.5357207291660357e-06, "loss": 0.3904, "step": 30244 }, { "epoch": 0.78, "grad_norm": 1.0568636655807495, "learning_rate": 2.5351683228153856e-06, "loss": 0.5693, "step": 30245 }, { "epoch": 0.78, "grad_norm": 4.122735500335693, "learning_rate": 2.5346159679076676e-06, "loss": 0.3735, "step": 30246 }, { "epoch": 0.78, "grad_norm": 1.4794158935546875, "learning_rate": 2.5340636644466874e-06, "loss": 0.5527, "step": 30247 }, { "epoch": 0.78, "grad_norm": 1.6468170881271362, "learning_rate": 2.533511412436246e-06, "loss": 0.5416, "step": 30248 }, { "epoch": 0.78, "grad_norm": 1.6481060981750488, "learning_rate": 2.532959211880156e-06, "loss": 0.4705, "step": 30249 }, { "epoch": 0.78, "grad_norm": 1.106602668762207, "learning_rate": 2.5324070627822206e-06, "loss": 0.4794, "step": 30250 }, { "epoch": 0.78, "grad_norm": 1.5422775745391846, "learning_rate": 2.5318549651462397e-06, "loss": 0.673, "step": 30251 }, { "epoch": 0.78, "grad_norm": 4.428228378295898, "learning_rate": 2.5313029189760276e-06, "loss": 0.4836, "step": 30252 }, { "epoch": 0.78, "grad_norm": 1.4796106815338135, "learning_rate": 2.5307509242753813e-06, "loss": 0.5702, "step": 30253 }, { "epoch": 0.78, "grad_norm": 1.698859691619873, "learning_rate": 2.5301989810481076e-06, "loss": 0.5574, "step": 30254 }, { "epoch": 0.78, "grad_norm": 1.2365164756774902, "learning_rate": 2.5296470892980073e-06, "loss": 0.4693, "step": 30255 }, { "epoch": 0.78, "grad_norm": 2.636225938796997, "learning_rate": 2.529095249028889e-06, "loss": 0.6224, "step": 30256 }, { "epoch": 0.78, "grad_norm": 1.9038349390029907, "learning_rate": 2.5285434602445512e-06, "loss": 0.614, "step": 30257 }, { "epoch": 0.78, "grad_norm": 1.0077606439590454, "learning_rate": 2.527991722948796e-06, "loss": 0.3992, "step": 30258 }, { "epoch": 0.78, "grad_norm": 1.3838473558425903, "learning_rate": 2.52744003714543e-06, "loss": 0.5183, "step": 30259 }, { "epoch": 0.78, "grad_norm": 2.9215564727783203, "learning_rate": 2.526888402838252e-06, "loss": 0.3891, "step": 30260 }, { "epoch": 0.78, "grad_norm": 1.728186845779419, "learning_rate": 2.5263368200310658e-06, "loss": 0.7105, "step": 30261 }, { "epoch": 0.78, "grad_norm": 1.1856673955917358, "learning_rate": 2.525785288727667e-06, "loss": 0.4353, "step": 30262 }, { "epoch": 0.78, "grad_norm": 1.1522568464279175, "learning_rate": 2.5252338089318627e-06, "loss": 0.5465, "step": 30263 }, { "epoch": 0.78, "grad_norm": 1.0966956615447998, "learning_rate": 2.5246823806474506e-06, "loss": 0.4236, "step": 30264 }, { "epoch": 0.78, "grad_norm": 1.6457374095916748, "learning_rate": 2.5241310038782296e-06, "loss": 0.524, "step": 30265 }, { "epoch": 0.78, "grad_norm": 7.590965270996094, "learning_rate": 2.523579678628002e-06, "loss": 0.7567, "step": 30266 }, { "epoch": 0.78, "grad_norm": 1.7362546920776367, "learning_rate": 2.523028404900567e-06, "loss": 0.4701, "step": 30267 }, { "epoch": 0.78, "grad_norm": 1.9208037853240967, "learning_rate": 2.522477182699721e-06, "loss": 0.6155, "step": 30268 }, { "epoch": 0.78, "grad_norm": 1.7155683040618896, "learning_rate": 2.5219260120292653e-06, "loss": 0.5665, "step": 30269 }, { "epoch": 0.78, "grad_norm": 1.8108270168304443, "learning_rate": 2.521374892892998e-06, "loss": 0.5446, "step": 30270 }, { "epoch": 0.78, "grad_norm": 2.3332502841949463, "learning_rate": 2.520823825294717e-06, "loss": 0.6401, "step": 30271 }, { "epoch": 0.78, "grad_norm": 5.391421794891357, "learning_rate": 2.5202728092382168e-06, "loss": 0.6862, "step": 30272 }, { "epoch": 0.78, "grad_norm": 5.208688735961914, "learning_rate": 2.5197218447272987e-06, "loss": 0.615, "step": 30273 }, { "epoch": 0.78, "grad_norm": 2.1850130558013916, "learning_rate": 2.5191709317657585e-06, "loss": 0.6582, "step": 30274 }, { "epoch": 0.78, "grad_norm": 5.842773914337158, "learning_rate": 2.5186200703573904e-06, "loss": 0.3936, "step": 30275 }, { "epoch": 0.78, "grad_norm": 1.4427169561386108, "learning_rate": 2.518069260505994e-06, "loss": 0.4285, "step": 30276 }, { "epoch": 0.78, "grad_norm": 1.0525630712509155, "learning_rate": 2.517518502215365e-06, "loss": 0.4124, "step": 30277 }, { "epoch": 0.78, "grad_norm": 1.394662857055664, "learning_rate": 2.516967795489297e-06, "loss": 0.4824, "step": 30278 }, { "epoch": 0.78, "grad_norm": 2.140179395675659, "learning_rate": 2.5164171403315817e-06, "loss": 0.6466, "step": 30279 }, { "epoch": 0.78, "grad_norm": 3.0999867916107178, "learning_rate": 2.5158665367460222e-06, "loss": 0.4829, "step": 30280 }, { "epoch": 0.78, "grad_norm": 2.0732383728027344, "learning_rate": 2.5153159847364073e-06, "loss": 0.5072, "step": 30281 }, { "epoch": 0.78, "grad_norm": 3.74965238571167, "learning_rate": 2.5147654843065304e-06, "loss": 0.5409, "step": 30282 }, { "epoch": 0.78, "grad_norm": 1.6011062860488892, "learning_rate": 2.514215035460189e-06, "loss": 0.4719, "step": 30283 }, { "epoch": 0.78, "grad_norm": 1.837227702140808, "learning_rate": 2.5136646382011754e-06, "loss": 0.6909, "step": 30284 }, { "epoch": 0.78, "grad_norm": 2.288369655609131, "learning_rate": 2.513114292533282e-06, "loss": 0.4124, "step": 30285 }, { "epoch": 0.78, "grad_norm": 1.6426976919174194, "learning_rate": 2.512563998460298e-06, "loss": 0.5358, "step": 30286 }, { "epoch": 0.78, "grad_norm": 2.3309977054595947, "learning_rate": 2.512013755986021e-06, "loss": 0.4441, "step": 30287 }, { "epoch": 0.78, "grad_norm": 2.8704490661621094, "learning_rate": 2.5114635651142417e-06, "loss": 0.4263, "step": 30288 }, { "epoch": 0.78, "grad_norm": 1.4905439615249634, "learning_rate": 2.5109134258487468e-06, "loss": 0.5925, "step": 30289 }, { "epoch": 0.78, "grad_norm": 1.0943704843521118, "learning_rate": 2.5103633381933356e-06, "loss": 0.5767, "step": 30290 }, { "epoch": 0.78, "grad_norm": 5.053518772125244, "learning_rate": 2.5098133021517945e-06, "loss": 0.462, "step": 30291 }, { "epoch": 0.78, "grad_norm": 1.5030312538146973, "learning_rate": 2.50926331772791e-06, "loss": 0.5299, "step": 30292 }, { "epoch": 0.78, "grad_norm": 1.732826590538025, "learning_rate": 2.508713384925482e-06, "loss": 0.5471, "step": 30293 }, { "epoch": 0.78, "grad_norm": 5.037816047668457, "learning_rate": 2.5081635037482933e-06, "loss": 0.5526, "step": 30294 }, { "epoch": 0.78, "grad_norm": 1.3495841026306152, "learning_rate": 2.5076136742001346e-06, "loss": 0.6032, "step": 30295 }, { "epoch": 0.78, "grad_norm": 1.7019789218902588, "learning_rate": 2.507063896284793e-06, "loss": 0.6237, "step": 30296 }, { "epoch": 0.78, "grad_norm": 1.7557311058044434, "learning_rate": 2.5065141700060626e-06, "loss": 0.5562, "step": 30297 }, { "epoch": 0.78, "grad_norm": 5.469407558441162, "learning_rate": 2.5059644953677274e-06, "loss": 0.8735, "step": 30298 }, { "epoch": 0.78, "grad_norm": 7.269730091094971, "learning_rate": 2.505414872373575e-06, "loss": 0.5176, "step": 30299 }, { "epoch": 0.78, "grad_norm": 1.6779423952102661, "learning_rate": 2.5048653010273983e-06, "loss": 0.5411, "step": 30300 }, { "epoch": 0.78, "grad_norm": 1.347997784614563, "learning_rate": 2.50431578133298e-06, "loss": 0.559, "step": 30301 }, { "epoch": 0.78, "grad_norm": 1.6602586507797241, "learning_rate": 2.503766313294108e-06, "loss": 0.4175, "step": 30302 }, { "epoch": 0.78, "grad_norm": 1.5123357772827148, "learning_rate": 2.5032168969145664e-06, "loss": 0.6637, "step": 30303 }, { "epoch": 0.78, "grad_norm": 1.4972643852233887, "learning_rate": 2.5026675321981466e-06, "loss": 0.4424, "step": 30304 }, { "epoch": 0.78, "grad_norm": 1.2965691089630127, "learning_rate": 2.5021182191486317e-06, "loss": 0.4216, "step": 30305 }, { "epoch": 0.78, "grad_norm": 1.8656530380249023, "learning_rate": 2.5015689577698054e-06, "loss": 0.5124, "step": 30306 }, { "epoch": 0.78, "grad_norm": 1.449594259262085, "learning_rate": 2.5010197480654574e-06, "loss": 0.4652, "step": 30307 }, { "epoch": 0.78, "grad_norm": 0.9659687280654907, "learning_rate": 2.50047059003937e-06, "loss": 0.5288, "step": 30308 }, { "epoch": 0.78, "grad_norm": 1.1350553035736084, "learning_rate": 2.4999214836953244e-06, "loss": 0.4325, "step": 30309 }, { "epoch": 0.78, "grad_norm": 9.000232696533203, "learning_rate": 2.49937242903711e-06, "loss": 0.6388, "step": 30310 }, { "epoch": 0.78, "grad_norm": 5.370386600494385, "learning_rate": 2.4988234260685095e-06, "loss": 0.6112, "step": 30311 }, { "epoch": 0.78, "grad_norm": 2.43501877784729, "learning_rate": 2.498274474793305e-06, "loss": 0.3863, "step": 30312 }, { "epoch": 0.78, "grad_norm": 1.1795756816864014, "learning_rate": 2.497725575215277e-06, "loss": 0.3811, "step": 30313 }, { "epoch": 0.78, "grad_norm": 1.9488047361373901, "learning_rate": 2.4971767273382143e-06, "loss": 0.5708, "step": 30314 }, { "epoch": 0.78, "grad_norm": 1.0349507331848145, "learning_rate": 2.496627931165896e-06, "loss": 0.5666, "step": 30315 }, { "epoch": 0.78, "grad_norm": 1.4094845056533813, "learning_rate": 2.4960791867021007e-06, "loss": 0.4965, "step": 30316 }, { "epoch": 0.78, "grad_norm": 1.5389084815979004, "learning_rate": 2.4955304939506154e-06, "loss": 0.5867, "step": 30317 }, { "epoch": 0.78, "grad_norm": 2.542696237564087, "learning_rate": 2.494981852915219e-06, "loss": 0.6608, "step": 30318 }, { "epoch": 0.78, "grad_norm": 22.45051383972168, "learning_rate": 2.4944332635996937e-06, "loss": 0.6056, "step": 30319 }, { "epoch": 0.78, "grad_norm": 1.6744173765182495, "learning_rate": 2.493884726007816e-06, "loss": 0.4964, "step": 30320 }, { "epoch": 0.78, "grad_norm": 3.795614242553711, "learning_rate": 2.4933362401433714e-06, "loss": 0.5495, "step": 30321 }, { "epoch": 0.78, "grad_norm": 3.21909761428833, "learning_rate": 2.4927878060101374e-06, "loss": 0.6901, "step": 30322 }, { "epoch": 0.78, "grad_norm": 1.3887197971343994, "learning_rate": 2.49223942361189e-06, "loss": 0.4302, "step": 30323 }, { "epoch": 0.78, "grad_norm": 1.82354736328125, "learning_rate": 2.4916910929524154e-06, "loss": 0.4356, "step": 30324 }, { "epoch": 0.78, "grad_norm": 4.884288787841797, "learning_rate": 2.4911428140354876e-06, "loss": 0.6426, "step": 30325 }, { "epoch": 0.78, "grad_norm": 1.1960840225219727, "learning_rate": 2.4905945868648863e-06, "loss": 0.4011, "step": 30326 }, { "epoch": 0.78, "grad_norm": 1.5122822523117065, "learning_rate": 2.4900464114443867e-06, "loss": 0.4481, "step": 30327 }, { "epoch": 0.78, "grad_norm": 1.1809053421020508, "learning_rate": 2.4894982877777718e-06, "loss": 0.5062, "step": 30328 }, { "epoch": 0.78, "grad_norm": 1.4018726348876953, "learning_rate": 2.4889502158688162e-06, "loss": 0.4378, "step": 30329 }, { "epoch": 0.78, "grad_norm": 2.578803539276123, "learning_rate": 2.4884021957212932e-06, "loss": 0.4805, "step": 30330 }, { "epoch": 0.78, "grad_norm": 6.803352355957031, "learning_rate": 2.4878542273389873e-06, "loss": 0.5842, "step": 30331 }, { "epoch": 0.78, "grad_norm": 1.4397786855697632, "learning_rate": 2.487306310725669e-06, "loss": 0.4358, "step": 30332 }, { "epoch": 0.78, "grad_norm": 1.075775146484375, "learning_rate": 2.4867584458851134e-06, "loss": 0.4389, "step": 30333 }, { "epoch": 0.78, "grad_norm": 1.516112208366394, "learning_rate": 2.486210632821101e-06, "loss": 0.5556, "step": 30334 }, { "epoch": 0.78, "grad_norm": 1.684954047203064, "learning_rate": 2.485662871537404e-06, "loss": 0.5371, "step": 30335 }, { "epoch": 0.78, "grad_norm": 3.048412799835205, "learning_rate": 2.4851151620377987e-06, "loss": 0.6852, "step": 30336 }, { "epoch": 0.78, "grad_norm": 1.6773412227630615, "learning_rate": 2.4845675043260553e-06, "loss": 0.5633, "step": 30337 }, { "epoch": 0.78, "grad_norm": 1.5121439695358276, "learning_rate": 2.4840198984059526e-06, "loss": 0.42, "step": 30338 }, { "epoch": 0.78, "grad_norm": 1.8734965324401855, "learning_rate": 2.4834723442812646e-06, "loss": 0.7361, "step": 30339 }, { "epoch": 0.78, "grad_norm": 1.5469828844070435, "learning_rate": 2.4829248419557583e-06, "loss": 0.4746, "step": 30340 }, { "epoch": 0.78, "grad_norm": 0.909247100353241, "learning_rate": 2.482377391433215e-06, "loss": 0.406, "step": 30341 }, { "epoch": 0.78, "grad_norm": 1.4078980684280396, "learning_rate": 2.481829992717404e-06, "loss": 0.4649, "step": 30342 }, { "epoch": 0.78, "grad_norm": 1.1990852355957031, "learning_rate": 2.4812826458120966e-06, "loss": 0.4746, "step": 30343 }, { "epoch": 0.78, "grad_norm": 1.8163772821426392, "learning_rate": 2.480735350721063e-06, "loss": 0.6436, "step": 30344 }, { "epoch": 0.78, "grad_norm": 1.7729625701904297, "learning_rate": 2.4801881074480793e-06, "loss": 0.542, "step": 30345 }, { "epoch": 0.78, "grad_norm": 7.05428409576416, "learning_rate": 2.479640915996916e-06, "loss": 0.6088, "step": 30346 }, { "epoch": 0.78, "grad_norm": 1.6918948888778687, "learning_rate": 2.4790937763713397e-06, "loss": 0.6061, "step": 30347 }, { "epoch": 0.78, "grad_norm": 1.6526763439178467, "learning_rate": 2.4785466885751265e-06, "loss": 0.6329, "step": 30348 }, { "epoch": 0.78, "grad_norm": 1.7789472341537476, "learning_rate": 2.4779996526120442e-06, "loss": 0.5112, "step": 30349 }, { "epoch": 0.78, "grad_norm": 2.764169454574585, "learning_rate": 2.4774526684858626e-06, "loss": 0.6797, "step": 30350 }, { "epoch": 0.78, "grad_norm": 2.5690975189208984, "learning_rate": 2.476905736200348e-06, "loss": 0.4709, "step": 30351 }, { "epoch": 0.78, "grad_norm": 2.3799710273742676, "learning_rate": 2.4763588557592753e-06, "loss": 0.6083, "step": 30352 }, { "epoch": 0.78, "grad_norm": 2.264410972595215, "learning_rate": 2.475812027166411e-06, "loss": 0.5084, "step": 30353 }, { "epoch": 0.78, "grad_norm": 5.683687686920166, "learning_rate": 2.475265250425519e-06, "loss": 0.6053, "step": 30354 }, { "epoch": 0.78, "grad_norm": 1.3782322406768799, "learning_rate": 2.474718525540376e-06, "loss": 0.4509, "step": 30355 }, { "epoch": 0.78, "grad_norm": 1.1297191381454468, "learning_rate": 2.474171852514744e-06, "loss": 0.5555, "step": 30356 }, { "epoch": 0.78, "grad_norm": 1.7462066411972046, "learning_rate": 2.473625231352389e-06, "loss": 0.5149, "step": 30357 }, { "epoch": 0.78, "grad_norm": 1.6960184574127197, "learning_rate": 2.4730786620570824e-06, "loss": 0.6662, "step": 30358 }, { "epoch": 0.78, "grad_norm": 1.154995083808899, "learning_rate": 2.4725321446325902e-06, "loss": 0.4449, "step": 30359 }, { "epoch": 0.78, "grad_norm": 1.512771487236023, "learning_rate": 2.4719856790826755e-06, "loss": 0.6206, "step": 30360 }, { "epoch": 0.78, "grad_norm": 1.672074317932129, "learning_rate": 2.471439265411105e-06, "loss": 0.5136, "step": 30361 }, { "epoch": 0.78, "grad_norm": 1.383655071258545, "learning_rate": 2.4708929036216465e-06, "loss": 0.4469, "step": 30362 }, { "epoch": 0.78, "grad_norm": 1.2420722246170044, "learning_rate": 2.4703465937180648e-06, "loss": 0.5607, "step": 30363 }, { "epoch": 0.78, "grad_norm": 1.6857757568359375, "learning_rate": 2.4698003357041212e-06, "loss": 0.5428, "step": 30364 }, { "epoch": 0.78, "grad_norm": 1.293389081954956, "learning_rate": 2.4692541295835847e-06, "loss": 0.4596, "step": 30365 }, { "epoch": 0.78, "grad_norm": 1.5156774520874023, "learning_rate": 2.4687079753602173e-06, "loss": 0.4817, "step": 30366 }, { "epoch": 0.78, "grad_norm": 1.145459532737732, "learning_rate": 2.468161873037783e-06, "loss": 0.4158, "step": 30367 }, { "epoch": 0.78, "grad_norm": 0.9505652785301208, "learning_rate": 2.4676158226200433e-06, "loss": 0.4168, "step": 30368 }, { "epoch": 0.78, "grad_norm": 2.4454259872436523, "learning_rate": 2.467069824110765e-06, "loss": 0.7168, "step": 30369 }, { "epoch": 0.78, "grad_norm": 1.623637080192566, "learning_rate": 2.466523877513709e-06, "loss": 0.5506, "step": 30370 }, { "epoch": 0.78, "grad_norm": 6.2342376708984375, "learning_rate": 2.465977982832635e-06, "loss": 0.4626, "step": 30371 }, { "epoch": 0.78, "grad_norm": 1.5316485166549683, "learning_rate": 2.46543214007131e-06, "loss": 0.8153, "step": 30372 }, { "epoch": 0.78, "grad_norm": 1.480505347251892, "learning_rate": 2.464886349233493e-06, "loss": 0.4911, "step": 30373 }, { "epoch": 0.78, "grad_norm": 0.9227840900421143, "learning_rate": 2.464340610322942e-06, "loss": 0.5129, "step": 30374 }, { "epoch": 0.78, "grad_norm": 1.5149306058883667, "learning_rate": 2.4637949233434244e-06, "loss": 0.3859, "step": 30375 }, { "epoch": 0.78, "grad_norm": 2.2735815048217773, "learning_rate": 2.463249288298697e-06, "loss": 0.6127, "step": 30376 }, { "epoch": 0.78, "grad_norm": 3.140650510787964, "learning_rate": 2.4627037051925208e-06, "loss": 0.6038, "step": 30377 }, { "epoch": 0.78, "grad_norm": 1.5252599716186523, "learning_rate": 2.4621581740286526e-06, "loss": 0.5711, "step": 30378 }, { "epoch": 0.78, "grad_norm": 1.3947927951812744, "learning_rate": 2.461612694810858e-06, "loss": 0.5303, "step": 30379 }, { "epoch": 0.78, "grad_norm": 9.242778778076172, "learning_rate": 2.4610672675428914e-06, "loss": 0.6964, "step": 30380 }, { "epoch": 0.78, "grad_norm": 3.6913323402404785, "learning_rate": 2.4605218922285102e-06, "loss": 0.7193, "step": 30381 }, { "epoch": 0.78, "grad_norm": 0.9349333643913269, "learning_rate": 2.459976568871478e-06, "loss": 0.3446, "step": 30382 }, { "epoch": 0.78, "grad_norm": 1.745242714881897, "learning_rate": 2.4594312974755506e-06, "loss": 0.6622, "step": 30383 }, { "epoch": 0.78, "grad_norm": 1.598101258277893, "learning_rate": 2.4588860780444857e-06, "loss": 0.4265, "step": 30384 }, { "epoch": 0.78, "grad_norm": 2.3267788887023926, "learning_rate": 2.4583409105820368e-06, "loss": 0.4713, "step": 30385 }, { "epoch": 0.78, "grad_norm": 2.144622325897217, "learning_rate": 2.457795795091967e-06, "loss": 0.7473, "step": 30386 }, { "epoch": 0.78, "grad_norm": 1.4298312664031982, "learning_rate": 2.4572507315780304e-06, "loss": 0.5239, "step": 30387 }, { "epoch": 0.78, "grad_norm": 1.0949403047561646, "learning_rate": 2.4567057200439793e-06, "loss": 0.4533, "step": 30388 }, { "epoch": 0.78, "grad_norm": 2.6009438037872314, "learning_rate": 2.4561607604935765e-06, "loss": 0.7312, "step": 30389 }, { "epoch": 0.78, "grad_norm": 1.4719327688217163, "learning_rate": 2.455615852930575e-06, "loss": 0.6291, "step": 30390 }, { "epoch": 0.78, "grad_norm": 1.192132592201233, "learning_rate": 2.4550709973587283e-06, "loss": 0.5954, "step": 30391 }, { "epoch": 0.78, "grad_norm": 1.5318222045898438, "learning_rate": 2.45452619378179e-06, "loss": 0.6668, "step": 30392 }, { "epoch": 0.78, "grad_norm": 2.2793667316436768, "learning_rate": 2.453981442203519e-06, "loss": 0.4762, "step": 30393 }, { "epoch": 0.78, "grad_norm": 3.160019874572754, "learning_rate": 2.4534367426276673e-06, "loss": 0.6042, "step": 30394 }, { "epoch": 0.78, "grad_norm": 9.113805770874023, "learning_rate": 2.452892095057985e-06, "loss": 0.4872, "step": 30395 }, { "epoch": 0.78, "grad_norm": 1.8462637662887573, "learning_rate": 2.4523474994982322e-06, "loss": 0.4916, "step": 30396 }, { "epoch": 0.78, "grad_norm": 1.014579176902771, "learning_rate": 2.4518029559521584e-06, "loss": 0.5059, "step": 30397 }, { "epoch": 0.78, "grad_norm": 1.336323857307434, "learning_rate": 2.4512584644235125e-06, "loss": 0.5251, "step": 30398 }, { "epoch": 0.78, "grad_norm": 2.31809139251709, "learning_rate": 2.4507140249160544e-06, "loss": 0.5399, "step": 30399 }, { "epoch": 0.78, "grad_norm": 1.6984411478042603, "learning_rate": 2.4501696374335328e-06, "loss": 0.6294, "step": 30400 }, { "epoch": 0.78, "grad_norm": 1.5636966228485107, "learning_rate": 2.449625301979698e-06, "loss": 0.51, "step": 30401 }, { "epoch": 0.78, "grad_norm": 2.0357985496520996, "learning_rate": 2.4490810185582993e-06, "loss": 0.3455, "step": 30402 }, { "epoch": 0.78, "grad_norm": 1.288796305656433, "learning_rate": 2.448536787173094e-06, "loss": 0.4204, "step": 30403 }, { "epoch": 0.78, "grad_norm": 1.6350488662719727, "learning_rate": 2.4479926078278282e-06, "loss": 0.6665, "step": 30404 }, { "epoch": 0.78, "grad_norm": 5.111873149871826, "learning_rate": 2.447448480526252e-06, "loss": 0.6497, "step": 30405 }, { "epoch": 0.78, "grad_norm": 1.5756064653396606, "learning_rate": 2.446904405272117e-06, "loss": 0.5545, "step": 30406 }, { "epoch": 0.78, "grad_norm": 7.951193809509277, "learning_rate": 2.4463603820691705e-06, "loss": 0.5864, "step": 30407 }, { "epoch": 0.78, "grad_norm": 1.4638384580612183, "learning_rate": 2.4458164109211614e-06, "loss": 0.5224, "step": 30408 }, { "epoch": 0.78, "grad_norm": 1.253313422203064, "learning_rate": 2.4452724918318405e-06, "loss": 0.5742, "step": 30409 }, { "epoch": 0.78, "grad_norm": 1.7204785346984863, "learning_rate": 2.4447286248049563e-06, "loss": 0.6121, "step": 30410 }, { "epoch": 0.78, "grad_norm": 1.2188265323638916, "learning_rate": 2.4441848098442535e-06, "loss": 0.4514, "step": 30411 }, { "epoch": 0.78, "grad_norm": 2.379772424697876, "learning_rate": 2.443641046953483e-06, "loss": 0.6256, "step": 30412 }, { "epoch": 0.78, "grad_norm": 4.442567348480225, "learning_rate": 2.443097336136393e-06, "loss": 0.5473, "step": 30413 }, { "epoch": 0.78, "grad_norm": 1.4326996803283691, "learning_rate": 2.4425536773967275e-06, "loss": 0.5759, "step": 30414 }, { "epoch": 0.78, "grad_norm": 4.035040855407715, "learning_rate": 2.442010070738231e-06, "loss": 0.5643, "step": 30415 }, { "epoch": 0.78, "grad_norm": 2.0673255920410156, "learning_rate": 2.4414665161646555e-06, "loss": 0.6726, "step": 30416 }, { "epoch": 0.78, "grad_norm": 1.8484693765640259, "learning_rate": 2.4409230136797437e-06, "loss": 0.5358, "step": 30417 }, { "epoch": 0.78, "grad_norm": 6.6763811111450195, "learning_rate": 2.4403795632872395e-06, "loss": 0.5199, "step": 30418 }, { "epoch": 0.78, "grad_norm": 1.2986915111541748, "learning_rate": 2.439836164990892e-06, "loss": 0.6334, "step": 30419 }, { "epoch": 0.78, "grad_norm": 1.1459027528762817, "learning_rate": 2.439292818794444e-06, "loss": 0.4162, "step": 30420 }, { "epoch": 0.78, "grad_norm": 1.0795010328292847, "learning_rate": 2.4387495247016403e-06, "loss": 0.4829, "step": 30421 }, { "epoch": 0.78, "grad_norm": 0.9925666451454163, "learning_rate": 2.4382062827162213e-06, "loss": 0.535, "step": 30422 }, { "epoch": 0.78, "grad_norm": 1.4432591199874878, "learning_rate": 2.4376630928419375e-06, "loss": 0.4243, "step": 30423 }, { "epoch": 0.78, "grad_norm": 2.1479837894439697, "learning_rate": 2.4371199550825277e-06, "loss": 0.3384, "step": 30424 }, { "epoch": 0.78, "grad_norm": 2.0258612632751465, "learning_rate": 2.4365768694417323e-06, "loss": 0.5669, "step": 30425 }, { "epoch": 0.78, "grad_norm": 1.7227966785430908, "learning_rate": 2.4360338359233003e-06, "loss": 0.4872, "step": 30426 }, { "epoch": 0.78, "grad_norm": 3.6770272254943848, "learning_rate": 2.435490854530972e-06, "loss": 0.6026, "step": 30427 }, { "epoch": 0.78, "grad_norm": 5.103521823883057, "learning_rate": 2.4349479252684847e-06, "loss": 0.7502, "step": 30428 }, { "epoch": 0.78, "grad_norm": 1.410783290863037, "learning_rate": 2.4344050481395864e-06, "loss": 0.5242, "step": 30429 }, { "epoch": 0.78, "grad_norm": 1.4993414878845215, "learning_rate": 2.4338622231480156e-06, "loss": 0.5227, "step": 30430 }, { "epoch": 0.78, "grad_norm": 1.4102238416671753, "learning_rate": 2.433319450297512e-06, "loss": 0.4358, "step": 30431 }, { "epoch": 0.78, "grad_norm": 1.4296801090240479, "learning_rate": 2.4327767295918147e-06, "loss": 0.4988, "step": 30432 }, { "epoch": 0.78, "grad_norm": 0.9481534361839294, "learning_rate": 2.4322340610346695e-06, "loss": 0.3824, "step": 30433 }, { "epoch": 0.78, "grad_norm": 3.964115858078003, "learning_rate": 2.431691444629811e-06, "loss": 0.5193, "step": 30434 }, { "epoch": 0.78, "grad_norm": 1.1528278589248657, "learning_rate": 2.431148880380979e-06, "loss": 0.4176, "step": 30435 }, { "epoch": 0.78, "grad_norm": 0.8957861065864563, "learning_rate": 2.430606368291916e-06, "loss": 0.4908, "step": 30436 }, { "epoch": 0.78, "grad_norm": 1.2073051929473877, "learning_rate": 2.430063908366358e-06, "loss": 0.3439, "step": 30437 }, { "epoch": 0.78, "grad_norm": 2.8070168495178223, "learning_rate": 2.429521500608043e-06, "loss": 0.6253, "step": 30438 }, { "epoch": 0.78, "grad_norm": 4.961893081665039, "learning_rate": 2.428979145020708e-06, "loss": 0.5938, "step": 30439 }, { "epoch": 0.78, "grad_norm": 7.127488136291504, "learning_rate": 2.428436841608095e-06, "loss": 0.4302, "step": 30440 }, { "epoch": 0.78, "grad_norm": 3.2082910537719727, "learning_rate": 2.4278945903739372e-06, "loss": 0.5046, "step": 30441 }, { "epoch": 0.78, "grad_norm": 2.2878382205963135, "learning_rate": 2.427352391321971e-06, "loss": 0.4462, "step": 30442 }, { "epoch": 0.78, "grad_norm": 3.1987223625183105, "learning_rate": 2.426810244455936e-06, "loss": 0.5417, "step": 30443 }, { "epoch": 0.78, "grad_norm": 3.112887382507324, "learning_rate": 2.426268149779568e-06, "loss": 0.5105, "step": 30444 }, { "epoch": 0.78, "grad_norm": 1.1451385021209717, "learning_rate": 2.4257261072965986e-06, "loss": 0.4643, "step": 30445 }, { "epoch": 0.78, "grad_norm": 1.4415489435195923, "learning_rate": 2.425184117010769e-06, "loss": 0.4503, "step": 30446 }, { "epoch": 0.78, "grad_norm": 1.4211440086364746, "learning_rate": 2.424642178925811e-06, "loss": 0.4902, "step": 30447 }, { "epoch": 0.78, "grad_norm": 1.7285503149032593, "learning_rate": 2.4241002930454606e-06, "loss": 0.5363, "step": 30448 }, { "epoch": 0.78, "grad_norm": 3.0558669567108154, "learning_rate": 2.4235584593734485e-06, "loss": 0.5522, "step": 30449 }, { "epoch": 0.78, "grad_norm": 2.0694544315338135, "learning_rate": 2.4230166779135134e-06, "loss": 0.4761, "step": 30450 }, { "epoch": 0.78, "grad_norm": 1.350685477256775, "learning_rate": 2.4224749486693875e-06, "loss": 0.5685, "step": 30451 }, { "epoch": 0.78, "grad_norm": 1.0095807313919067, "learning_rate": 2.4219332716448006e-06, "loss": 0.5158, "step": 30452 }, { "epoch": 0.78, "grad_norm": 3.424928903579712, "learning_rate": 2.4213916468434917e-06, "loss": 0.5237, "step": 30453 }, { "epoch": 0.78, "grad_norm": 1.8014774322509766, "learning_rate": 2.420850074269191e-06, "loss": 0.7289, "step": 30454 }, { "epoch": 0.78, "grad_norm": 3.545651912689209, "learning_rate": 2.4203085539256286e-06, "loss": 0.4579, "step": 30455 }, { "epoch": 0.78, "grad_norm": 1.7436585426330566, "learning_rate": 2.4197670858165356e-06, "loss": 0.6877, "step": 30456 }, { "epoch": 0.78, "grad_norm": 1.939623236656189, "learning_rate": 2.4192256699456474e-06, "loss": 0.4211, "step": 30457 }, { "epoch": 0.78, "grad_norm": 5.129457473754883, "learning_rate": 2.4186843063166945e-06, "loss": 0.7204, "step": 30458 }, { "epoch": 0.78, "grad_norm": 1.9802193641662598, "learning_rate": 2.4181429949334024e-06, "loss": 0.4935, "step": 30459 }, { "epoch": 0.78, "grad_norm": 1.4185742139816284, "learning_rate": 2.4176017357995085e-06, "loss": 0.3873, "step": 30460 }, { "epoch": 0.78, "grad_norm": 1.7323052883148193, "learning_rate": 2.41706052891874e-06, "loss": 0.5031, "step": 30461 }, { "epoch": 0.78, "grad_norm": 2.687673568725586, "learning_rate": 2.4165193742948257e-06, "loss": 0.6434, "step": 30462 }, { "epoch": 0.78, "grad_norm": 1.6299992799758911, "learning_rate": 2.415978271931493e-06, "loss": 0.6181, "step": 30463 }, { "epoch": 0.78, "grad_norm": 1.3624063730239868, "learning_rate": 2.4154372218324763e-06, "loss": 0.5102, "step": 30464 }, { "epoch": 0.78, "grad_norm": 1.2449042797088623, "learning_rate": 2.4148962240015007e-06, "loss": 0.473, "step": 30465 }, { "epoch": 0.78, "grad_norm": 4.45141077041626, "learning_rate": 2.4143552784422917e-06, "loss": 0.5864, "step": 30466 }, { "epoch": 0.78, "grad_norm": 1.108908772468567, "learning_rate": 2.4138143851585837e-06, "loss": 0.5092, "step": 30467 }, { "epoch": 0.78, "grad_norm": 2.0498814582824707, "learning_rate": 2.413273544154101e-06, "loss": 0.4901, "step": 30468 }, { "epoch": 0.78, "grad_norm": 1.3138662576675415, "learning_rate": 2.4127327554325684e-06, "loss": 0.5169, "step": 30469 }, { "epoch": 0.78, "grad_norm": 1.4934837818145752, "learning_rate": 2.4121920189977167e-06, "loss": 0.6062, "step": 30470 }, { "epoch": 0.78, "grad_norm": 1.8326667547225952, "learning_rate": 2.411651334853271e-06, "loss": 0.6012, "step": 30471 }, { "epoch": 0.78, "grad_norm": 1.3306690454483032, "learning_rate": 2.4111107030029566e-06, "loss": 0.4959, "step": 30472 }, { "epoch": 0.78, "grad_norm": 2.1048688888549805, "learning_rate": 2.410570123450496e-06, "loss": 0.6916, "step": 30473 }, { "epoch": 0.78, "grad_norm": 1.1525287628173828, "learning_rate": 2.4100295961996224e-06, "loss": 0.4208, "step": 30474 }, { "epoch": 0.78, "grad_norm": 1.7582296133041382, "learning_rate": 2.409489121254055e-06, "loss": 0.5205, "step": 30475 }, { "epoch": 0.78, "grad_norm": 2.7261250019073486, "learning_rate": 2.408948698617517e-06, "loss": 0.5723, "step": 30476 }, { "epoch": 0.78, "grad_norm": 1.0397042036056519, "learning_rate": 2.4084083282937386e-06, "loss": 0.4468, "step": 30477 }, { "epoch": 0.78, "grad_norm": 1.1856598854064941, "learning_rate": 2.4078680102864403e-06, "loss": 0.5774, "step": 30478 }, { "epoch": 0.78, "grad_norm": 1.0803419351577759, "learning_rate": 2.4073277445993458e-06, "loss": 0.5841, "step": 30479 }, { "epoch": 0.78, "grad_norm": 1.445849895477295, "learning_rate": 2.4067875312361754e-06, "loss": 0.4605, "step": 30480 }, { "epoch": 0.78, "grad_norm": 1.4471423625946045, "learning_rate": 2.4062473702006583e-06, "loss": 0.5233, "step": 30481 }, { "epoch": 0.78, "grad_norm": 12.112247467041016, "learning_rate": 2.4057072614965126e-06, "loss": 0.497, "step": 30482 }, { "epoch": 0.78, "grad_norm": 0.8745192289352417, "learning_rate": 2.4051672051274588e-06, "loss": 0.4436, "step": 30483 }, { "epoch": 0.78, "grad_norm": 1.3383979797363281, "learning_rate": 2.404627201097224e-06, "loss": 0.446, "step": 30484 }, { "epoch": 0.78, "grad_norm": 1.3010367155075073, "learning_rate": 2.4040872494095267e-06, "loss": 0.6037, "step": 30485 }, { "epoch": 0.78, "grad_norm": 1.7730664014816284, "learning_rate": 2.4035473500680883e-06, "loss": 0.4566, "step": 30486 }, { "epoch": 0.78, "grad_norm": 0.9925333261489868, "learning_rate": 2.4030075030766255e-06, "loss": 0.5062, "step": 30487 }, { "epoch": 0.78, "grad_norm": 2.8256242275238037, "learning_rate": 2.4024677084388648e-06, "loss": 0.583, "step": 30488 }, { "epoch": 0.78, "grad_norm": 1.1279321908950806, "learning_rate": 2.4019279661585237e-06, "loss": 0.4278, "step": 30489 }, { "epoch": 0.78, "grad_norm": 1.1307660341262817, "learning_rate": 2.401388276239318e-06, "loss": 0.3389, "step": 30490 }, { "epoch": 0.78, "grad_norm": 3.180851459503174, "learning_rate": 2.400848638684974e-06, "loss": 0.6623, "step": 30491 }, { "epoch": 0.78, "grad_norm": 3.075434446334839, "learning_rate": 2.4003090534992067e-06, "loss": 0.6635, "step": 30492 }, { "epoch": 0.78, "grad_norm": 1.115562081336975, "learning_rate": 2.399769520685732e-06, "loss": 0.429, "step": 30493 }, { "epoch": 0.78, "grad_norm": 3.16752028465271, "learning_rate": 2.3992300402482738e-06, "loss": 0.6063, "step": 30494 }, { "epoch": 0.78, "grad_norm": 1.1093403100967407, "learning_rate": 2.3986906121905475e-06, "loss": 0.4399, "step": 30495 }, { "epoch": 0.78, "grad_norm": 1.1770983934402466, "learning_rate": 2.3981512365162686e-06, "loss": 0.5898, "step": 30496 }, { "epoch": 0.78, "grad_norm": 5.573814392089844, "learning_rate": 2.397611913229153e-06, "loss": 0.6631, "step": 30497 }, { "epoch": 0.78, "grad_norm": 4.962620258331299, "learning_rate": 2.397072642332923e-06, "loss": 0.5438, "step": 30498 }, { "epoch": 0.78, "grad_norm": 1.3899420499801636, "learning_rate": 2.396533423831292e-06, "loss": 0.5296, "step": 30499 }, { "epoch": 0.78, "grad_norm": 2.0334668159484863, "learning_rate": 2.3959942577279717e-06, "loss": 0.5564, "step": 30500 }, { "epoch": 0.78, "grad_norm": 1.5035948753356934, "learning_rate": 2.395455144026685e-06, "loss": 0.4853, "step": 30501 }, { "epoch": 0.78, "grad_norm": 0.9144600033760071, "learning_rate": 2.3949160827311447e-06, "loss": 0.4801, "step": 30502 }, { "epoch": 0.78, "grad_norm": 1.2543607950210571, "learning_rate": 2.3943770738450632e-06, "loss": 0.4978, "step": 30503 }, { "epoch": 0.78, "grad_norm": 1.4710533618927002, "learning_rate": 2.393838117372155e-06, "loss": 0.5752, "step": 30504 }, { "epoch": 0.78, "grad_norm": 4.226992607116699, "learning_rate": 2.393299213316138e-06, "loss": 0.573, "step": 30505 }, { "epoch": 0.78, "grad_norm": 2.1074748039245605, "learning_rate": 2.392760361680725e-06, "loss": 0.6604, "step": 30506 }, { "epoch": 0.78, "grad_norm": 1.1018811464309692, "learning_rate": 2.3922215624696245e-06, "loss": 0.3929, "step": 30507 }, { "epoch": 0.78, "grad_norm": 1.4086334705352783, "learning_rate": 2.3916828156865557e-06, "loss": 0.5585, "step": 30508 }, { "epoch": 0.78, "grad_norm": 1.5965702533721924, "learning_rate": 2.3911441213352295e-06, "loss": 0.4882, "step": 30509 }, { "epoch": 0.78, "grad_norm": 1.4466462135314941, "learning_rate": 2.3906054794193547e-06, "loss": 0.6204, "step": 30510 }, { "epoch": 0.78, "grad_norm": 4.269345760345459, "learning_rate": 2.3900668899426493e-06, "loss": 0.6218, "step": 30511 }, { "epoch": 0.78, "grad_norm": 0.9114639759063721, "learning_rate": 2.3895283529088207e-06, "loss": 0.4524, "step": 30512 }, { "epoch": 0.78, "grad_norm": 1.181939721107483, "learning_rate": 2.388989868321583e-06, "loss": 0.4748, "step": 30513 }, { "epoch": 0.78, "grad_norm": 1.5936496257781982, "learning_rate": 2.388451436184641e-06, "loss": 0.5861, "step": 30514 }, { "epoch": 0.78, "grad_norm": 2.2420356273651123, "learning_rate": 2.387913056501714e-06, "loss": 0.4446, "step": 30515 }, { "epoch": 0.78, "grad_norm": 1.9899170398712158, "learning_rate": 2.387374729276507e-06, "loss": 0.4991, "step": 30516 }, { "epoch": 0.78, "grad_norm": 1.4946460723876953, "learning_rate": 2.3868364545127286e-06, "loss": 0.5823, "step": 30517 }, { "epoch": 0.78, "grad_norm": 1.5886801481246948, "learning_rate": 2.3862982322140925e-06, "loss": 0.5991, "step": 30518 }, { "epoch": 0.78, "grad_norm": 1.7269819974899292, "learning_rate": 2.385760062384306e-06, "loss": 0.4094, "step": 30519 }, { "epoch": 0.78, "grad_norm": 1.7521699666976929, "learning_rate": 2.385221945027076e-06, "loss": 0.4277, "step": 30520 }, { "epoch": 0.78, "grad_norm": 4.161677837371826, "learning_rate": 2.3846838801461104e-06, "loss": 0.6351, "step": 30521 }, { "epoch": 0.78, "grad_norm": 1.2776588201522827, "learning_rate": 2.3841458677451224e-06, "loss": 0.5906, "step": 30522 }, { "epoch": 0.78, "grad_norm": 1.3962445259094238, "learning_rate": 2.3836079078278162e-06, "loss": 0.4364, "step": 30523 }, { "epoch": 0.78, "grad_norm": 2.685961961746216, "learning_rate": 2.3830700003978955e-06, "loss": 0.6234, "step": 30524 }, { "epoch": 0.78, "grad_norm": 1.308129072189331, "learning_rate": 2.3825321454590746e-06, "loss": 0.58, "step": 30525 }, { "epoch": 0.78, "grad_norm": 3.730557441711426, "learning_rate": 2.381994343015056e-06, "loss": 0.6236, "step": 30526 }, { "epoch": 0.78, "grad_norm": 1.1845139265060425, "learning_rate": 2.3814565930695465e-06, "loss": 0.5726, "step": 30527 }, { "epoch": 0.78, "grad_norm": 2.3784775733947754, "learning_rate": 2.3809188956262496e-06, "loss": 0.8366, "step": 30528 }, { "epoch": 0.78, "grad_norm": 1.8895686864852905, "learning_rate": 2.3803812506888746e-06, "loss": 0.6452, "step": 30529 }, { "epoch": 0.78, "grad_norm": 1.3750014305114746, "learning_rate": 2.3798436582611253e-06, "loss": 0.4803, "step": 30530 }, { "epoch": 0.78, "grad_norm": 4.169309139251709, "learning_rate": 2.3793061183467035e-06, "loss": 0.569, "step": 30531 }, { "epoch": 0.78, "grad_norm": 1.6338129043579102, "learning_rate": 2.3787686309493184e-06, "loss": 0.6099, "step": 30532 }, { "epoch": 0.78, "grad_norm": 1.5020169019699097, "learning_rate": 2.378231196072672e-06, "loss": 0.5908, "step": 30533 }, { "epoch": 0.78, "grad_norm": 1.9479620456695557, "learning_rate": 2.377693813720465e-06, "loss": 0.7578, "step": 30534 }, { "epoch": 0.78, "grad_norm": 1.0404950380325317, "learning_rate": 2.3771564838964066e-06, "loss": 0.4729, "step": 30535 }, { "epoch": 0.78, "grad_norm": 1.3458259105682373, "learning_rate": 2.376619206604196e-06, "loss": 0.6265, "step": 30536 }, { "epoch": 0.78, "grad_norm": 2.0699570178985596, "learning_rate": 2.3760819818475367e-06, "loss": 0.6115, "step": 30537 }, { "epoch": 0.78, "grad_norm": 1.1347472667694092, "learning_rate": 2.375544809630127e-06, "loss": 0.4906, "step": 30538 }, { "epoch": 0.78, "grad_norm": 1.679775595664978, "learning_rate": 2.3750076899556763e-06, "loss": 0.4273, "step": 30539 }, { "epoch": 0.78, "grad_norm": 2.48283314704895, "learning_rate": 2.3744706228278814e-06, "loss": 0.4369, "step": 30540 }, { "epoch": 0.78, "grad_norm": 2.4220778942108154, "learning_rate": 2.3739336082504416e-06, "loss": 0.3252, "step": 30541 }, { "epoch": 0.78, "grad_norm": 1.3353408575057983, "learning_rate": 2.373396646227063e-06, "loss": 0.5055, "step": 30542 }, { "epoch": 0.78, "grad_norm": 1.3370534181594849, "learning_rate": 2.3728597367614426e-06, "loss": 0.4215, "step": 30543 }, { "epoch": 0.78, "grad_norm": 1.3729904890060425, "learning_rate": 2.372322879857282e-06, "loss": 0.6221, "step": 30544 }, { "epoch": 0.78, "grad_norm": 1.4006025791168213, "learning_rate": 2.3717860755182765e-06, "loss": 0.4535, "step": 30545 }, { "epoch": 0.78, "grad_norm": 1.4431782960891724, "learning_rate": 2.3712493237481314e-06, "loss": 0.6063, "step": 30546 }, { "epoch": 0.78, "grad_norm": 1.3362524509429932, "learning_rate": 2.370712624550544e-06, "loss": 0.5362, "step": 30547 }, { "epoch": 0.78, "grad_norm": 1.6280717849731445, "learning_rate": 2.370175977929209e-06, "loss": 0.4282, "step": 30548 }, { "epoch": 0.78, "grad_norm": 1.3588995933532715, "learning_rate": 2.3696393838878294e-06, "loss": 0.4176, "step": 30549 }, { "epoch": 0.78, "grad_norm": 1.3132081031799316, "learning_rate": 2.369102842430102e-06, "loss": 0.4954, "step": 30550 }, { "epoch": 0.78, "grad_norm": 4.410156726837158, "learning_rate": 2.3685663535597214e-06, "loss": 0.5822, "step": 30551 }, { "epoch": 0.78, "grad_norm": 1.9418411254882812, "learning_rate": 2.3680299172803887e-06, "loss": 0.5259, "step": 30552 }, { "epoch": 0.78, "grad_norm": 4.3204522132873535, "learning_rate": 2.3674935335958004e-06, "loss": 0.5612, "step": 30553 }, { "epoch": 0.78, "grad_norm": 1.0214834213256836, "learning_rate": 2.36695720250965e-06, "loss": 0.6052, "step": 30554 }, { "epoch": 0.78, "grad_norm": 0.9859037399291992, "learning_rate": 2.366420924025633e-06, "loss": 0.4738, "step": 30555 }, { "epoch": 0.78, "grad_norm": 1.4013917446136475, "learning_rate": 2.3658846981474505e-06, "loss": 0.6541, "step": 30556 }, { "epoch": 0.78, "grad_norm": 1.9661829471588135, "learning_rate": 2.365348524878793e-06, "loss": 0.4189, "step": 30557 }, { "epoch": 0.78, "grad_norm": 4.9037652015686035, "learning_rate": 2.3648124042233544e-06, "loss": 0.6375, "step": 30558 }, { "epoch": 0.78, "grad_norm": 1.141024112701416, "learning_rate": 2.3642763361848354e-06, "loss": 0.3864, "step": 30559 }, { "epoch": 0.78, "grad_norm": 1.1841269731521606, "learning_rate": 2.363740320766926e-06, "loss": 0.3707, "step": 30560 }, { "epoch": 0.78, "grad_norm": 1.4403939247131348, "learning_rate": 2.363204357973321e-06, "loss": 0.5228, "step": 30561 }, { "epoch": 0.78, "grad_norm": 1.3848838806152344, "learning_rate": 2.362668447807711e-06, "loss": 0.435, "step": 30562 }, { "epoch": 0.78, "grad_norm": 1.3956869840621948, "learning_rate": 2.3621325902737946e-06, "loss": 0.4242, "step": 30563 }, { "epoch": 0.78, "grad_norm": 1.490964651107788, "learning_rate": 2.3615967853752606e-06, "loss": 0.5814, "step": 30564 }, { "epoch": 0.78, "grad_norm": 2.74074649810791, "learning_rate": 2.3610610331158014e-06, "loss": 0.6264, "step": 30565 }, { "epoch": 0.78, "grad_norm": 1.4686613082885742, "learning_rate": 2.360525333499113e-06, "loss": 0.5598, "step": 30566 }, { "epoch": 0.78, "grad_norm": 12.415816307067871, "learning_rate": 2.359989686528884e-06, "loss": 0.6686, "step": 30567 }, { "epoch": 0.78, "grad_norm": 1.880388855934143, "learning_rate": 2.3594540922088063e-06, "loss": 0.6745, "step": 30568 }, { "epoch": 0.78, "grad_norm": 2.6772241592407227, "learning_rate": 2.358918550542567e-06, "loss": 0.4891, "step": 30569 }, { "epoch": 0.78, "grad_norm": 1.6737617254257202, "learning_rate": 2.358383061533864e-06, "loss": 0.5691, "step": 30570 }, { "epoch": 0.78, "grad_norm": 1.7732404470443726, "learning_rate": 2.3578476251863845e-06, "loss": 0.4595, "step": 30571 }, { "epoch": 0.78, "grad_norm": 5.088150501251221, "learning_rate": 2.357312241503814e-06, "loss": 0.5978, "step": 30572 }, { "epoch": 0.78, "grad_norm": 1.8533415794372559, "learning_rate": 2.3567769104898497e-06, "loss": 0.4819, "step": 30573 }, { "epoch": 0.78, "grad_norm": 1.729407787322998, "learning_rate": 2.3562416321481772e-06, "loss": 0.4968, "step": 30574 }, { "epoch": 0.78, "grad_norm": 1.9587969779968262, "learning_rate": 2.355706406482481e-06, "loss": 0.563, "step": 30575 }, { "epoch": 0.78, "grad_norm": 1.7075355052947998, "learning_rate": 2.355171233496457e-06, "loss": 0.5012, "step": 30576 }, { "epoch": 0.78, "grad_norm": 1.5855790376663208, "learning_rate": 2.3546361131937913e-06, "loss": 0.636, "step": 30577 }, { "epoch": 0.78, "grad_norm": 1.3658510446548462, "learning_rate": 2.354101045578169e-06, "loss": 0.6129, "step": 30578 }, { "epoch": 0.78, "grad_norm": 1.4569507837295532, "learning_rate": 2.353566030653275e-06, "loss": 0.5433, "step": 30579 }, { "epoch": 0.78, "grad_norm": 1.4179697036743164, "learning_rate": 2.3530310684228043e-06, "loss": 0.4029, "step": 30580 }, { "epoch": 0.78, "grad_norm": 2.027284860610962, "learning_rate": 2.352496158890438e-06, "loss": 0.5719, "step": 30581 }, { "epoch": 0.78, "grad_norm": 5.839676856994629, "learning_rate": 2.3519613020598608e-06, "loss": 0.4969, "step": 30582 }, { "epoch": 0.78, "grad_norm": 1.3775827884674072, "learning_rate": 2.3514264979347643e-06, "loss": 0.4335, "step": 30583 }, { "epoch": 0.78, "grad_norm": 1.752898931503296, "learning_rate": 2.350891746518832e-06, "loss": 0.4276, "step": 30584 }, { "epoch": 0.78, "grad_norm": 0.9331114888191223, "learning_rate": 2.350357047815748e-06, "loss": 0.4478, "step": 30585 }, { "epoch": 0.78, "grad_norm": 3.2178702354431152, "learning_rate": 2.3498224018291938e-06, "loss": 0.3818, "step": 30586 }, { "epoch": 0.78, "grad_norm": 1.2790987491607666, "learning_rate": 2.34928780856286e-06, "loss": 0.5376, "step": 30587 }, { "epoch": 0.78, "grad_norm": 1.1251317262649536, "learning_rate": 2.3487532680204285e-06, "loss": 0.5379, "step": 30588 }, { "epoch": 0.78, "grad_norm": 2.0315053462982178, "learning_rate": 2.348218780205579e-06, "loss": 0.6294, "step": 30589 }, { "epoch": 0.78, "grad_norm": 1.223924994468689, "learning_rate": 2.3476843451220022e-06, "loss": 0.6846, "step": 30590 }, { "epoch": 0.78, "grad_norm": 2.5504250526428223, "learning_rate": 2.3471499627733764e-06, "loss": 0.6928, "step": 30591 }, { "epoch": 0.78, "grad_norm": 1.6794016361236572, "learning_rate": 2.3466156331633816e-06, "loss": 0.6084, "step": 30592 }, { "epoch": 0.78, "grad_norm": 1.4189510345458984, "learning_rate": 2.3460813562957072e-06, "loss": 0.4525, "step": 30593 }, { "epoch": 0.78, "grad_norm": 1.6640268564224243, "learning_rate": 2.345547132174032e-06, "loss": 0.5314, "step": 30594 }, { "epoch": 0.78, "grad_norm": 11.88775634765625, "learning_rate": 2.345012960802037e-06, "loss": 0.5025, "step": 30595 }, { "epoch": 0.78, "grad_norm": 1.8802400827407837, "learning_rate": 2.3444788421834e-06, "loss": 0.6606, "step": 30596 }, { "epoch": 0.78, "grad_norm": 1.5315500497817993, "learning_rate": 2.3439447763218072e-06, "loss": 0.6012, "step": 30597 }, { "epoch": 0.78, "grad_norm": 1.1817221641540527, "learning_rate": 2.343410763220938e-06, "loss": 0.4854, "step": 30598 }, { "epoch": 0.78, "grad_norm": 1.7042474746704102, "learning_rate": 2.3428768028844684e-06, "loss": 0.6409, "step": 30599 }, { "epoch": 0.78, "grad_norm": 1.089210867881775, "learning_rate": 2.3423428953160833e-06, "loss": 0.4564, "step": 30600 }, { "epoch": 0.78, "grad_norm": 1.1415183544158936, "learning_rate": 2.3418090405194603e-06, "loss": 0.5058, "step": 30601 }, { "epoch": 0.78, "grad_norm": 1.479272723197937, "learning_rate": 2.3412752384982782e-06, "loss": 0.4701, "step": 30602 }, { "epoch": 0.78, "grad_norm": 1.7607359886169434, "learning_rate": 2.340741489256213e-06, "loss": 0.5045, "step": 30603 }, { "epoch": 0.78, "grad_norm": 1.5883923768997192, "learning_rate": 2.340207792796949e-06, "loss": 0.4995, "step": 30604 }, { "epoch": 0.78, "grad_norm": 1.5790356397628784, "learning_rate": 2.3396741491241593e-06, "loss": 0.6799, "step": 30605 }, { "epoch": 0.78, "grad_norm": 1.6336630582809448, "learning_rate": 2.3391405582415206e-06, "loss": 0.6082, "step": 30606 }, { "epoch": 0.78, "grad_norm": 1.848389744758606, "learning_rate": 2.3386070201527145e-06, "loss": 0.4482, "step": 30607 }, { "epoch": 0.78, "grad_norm": 3.5996084213256836, "learning_rate": 2.3380735348614148e-06, "loss": 0.5879, "step": 30608 }, { "epoch": 0.78, "grad_norm": 1.8017868995666504, "learning_rate": 2.3375401023712995e-06, "loss": 0.5713, "step": 30609 }, { "epoch": 0.78, "grad_norm": 1.549355387687683, "learning_rate": 2.3370067226860406e-06, "loss": 0.482, "step": 30610 }, { "epoch": 0.78, "grad_norm": 6.1676926612854, "learning_rate": 2.33647339580932e-06, "loss": 0.6492, "step": 30611 }, { "epoch": 0.78, "grad_norm": 1.3349543809890747, "learning_rate": 2.3359401217448097e-06, "loss": 0.5035, "step": 30612 }, { "epoch": 0.78, "grad_norm": 1.571742057800293, "learning_rate": 2.335406900496183e-06, "loss": 0.4614, "step": 30613 }, { "epoch": 0.78, "grad_norm": 2.647702217102051, "learning_rate": 2.3348737320671188e-06, "loss": 0.6723, "step": 30614 }, { "epoch": 0.78, "grad_norm": 1.3882206678390503, "learning_rate": 2.3343406164612893e-06, "loss": 0.5071, "step": 30615 }, { "epoch": 0.78, "grad_norm": 1.8795067071914673, "learning_rate": 2.3338075536823657e-06, "loss": 0.5032, "step": 30616 }, { "epoch": 0.78, "grad_norm": 3.637885332107544, "learning_rate": 2.3332745437340263e-06, "loss": 0.5265, "step": 30617 }, { "epoch": 0.78, "grad_norm": 1.5298205614089966, "learning_rate": 2.332741586619942e-06, "loss": 0.5647, "step": 30618 }, { "epoch": 0.78, "grad_norm": 2.240440607070923, "learning_rate": 2.3322086823437864e-06, "loss": 0.4541, "step": 30619 }, { "epoch": 0.78, "grad_norm": 7.509803295135498, "learning_rate": 2.331675830909228e-06, "loss": 0.495, "step": 30620 }, { "epoch": 0.78, "grad_norm": 1.283806324005127, "learning_rate": 2.3311430323199445e-06, "loss": 0.4468, "step": 30621 }, { "epoch": 0.78, "grad_norm": 1.9664305448532104, "learning_rate": 2.3306102865796056e-06, "loss": 0.4928, "step": 30622 }, { "epoch": 0.78, "grad_norm": 1.3002753257751465, "learning_rate": 2.3300775936918797e-06, "loss": 0.6224, "step": 30623 }, { "epoch": 0.78, "grad_norm": 3.112178087234497, "learning_rate": 2.329544953660442e-06, "loss": 0.5881, "step": 30624 }, { "epoch": 0.78, "grad_norm": 1.770725131034851, "learning_rate": 2.329012366488963e-06, "loss": 0.5267, "step": 30625 }, { "epoch": 0.78, "grad_norm": 11.437248229980469, "learning_rate": 2.3284798321811095e-06, "loss": 0.5799, "step": 30626 }, { "epoch": 0.78, "grad_norm": 1.6406384706497192, "learning_rate": 2.3279473507405527e-06, "loss": 0.5236, "step": 30627 }, { "epoch": 0.79, "grad_norm": 3.353067636489868, "learning_rate": 2.327414922170964e-06, "loss": 0.4701, "step": 30628 }, { "epoch": 0.79, "grad_norm": 1.0738022327423096, "learning_rate": 2.3268825464760115e-06, "loss": 0.539, "step": 30629 }, { "epoch": 0.79, "grad_norm": 1.1333513259887695, "learning_rate": 2.3263502236593603e-06, "loss": 0.407, "step": 30630 }, { "epoch": 0.79, "grad_norm": 1.5834109783172607, "learning_rate": 2.3258179537246863e-06, "loss": 0.6395, "step": 30631 }, { "epoch": 0.79, "grad_norm": 2.3702142238616943, "learning_rate": 2.325285736675653e-06, "loss": 0.4637, "step": 30632 }, { "epoch": 0.79, "grad_norm": 1.9236122369766235, "learning_rate": 2.3247535725159285e-06, "loss": 0.406, "step": 30633 }, { "epoch": 0.79, "grad_norm": 4.6846022605896, "learning_rate": 2.324221461249179e-06, "loss": 0.4974, "step": 30634 }, { "epoch": 0.79, "grad_norm": 1.8431386947631836, "learning_rate": 2.3236894028790734e-06, "loss": 0.6512, "step": 30635 }, { "epoch": 0.79, "grad_norm": 1.1153342723846436, "learning_rate": 2.323157397409279e-06, "loss": 0.3896, "step": 30636 }, { "epoch": 0.79, "grad_norm": 1.9106628894805908, "learning_rate": 2.322625444843457e-06, "loss": 0.5299, "step": 30637 }, { "epoch": 0.79, "grad_norm": 2.2251102924346924, "learning_rate": 2.322093545185281e-06, "loss": 0.5559, "step": 30638 }, { "epoch": 0.79, "grad_norm": 1.5687198638916016, "learning_rate": 2.3215616984384125e-06, "loss": 0.5123, "step": 30639 }, { "epoch": 0.79, "grad_norm": 1.6086465120315552, "learning_rate": 2.3210299046065132e-06, "loss": 0.4964, "step": 30640 }, { "epoch": 0.79, "grad_norm": 1.210707187652588, "learning_rate": 2.3204981636932544e-06, "loss": 0.419, "step": 30641 }, { "epoch": 0.79, "grad_norm": 1.5710057020187378, "learning_rate": 2.3199664757022977e-06, "loss": 0.4806, "step": 30642 }, { "epoch": 0.79, "grad_norm": 1.5903834104537964, "learning_rate": 2.3194348406373067e-06, "loss": 0.512, "step": 30643 }, { "epoch": 0.79, "grad_norm": 1.7599132061004639, "learning_rate": 2.318903258501942e-06, "loss": 0.6598, "step": 30644 }, { "epoch": 0.79, "grad_norm": 1.0276517868041992, "learning_rate": 2.318371729299874e-06, "loss": 0.4252, "step": 30645 }, { "epoch": 0.79, "grad_norm": 1.4092684984207153, "learning_rate": 2.317840253034762e-06, "loss": 0.4835, "step": 30646 }, { "epoch": 0.79, "grad_norm": 1.3880856037139893, "learning_rate": 2.3173088297102684e-06, "loss": 0.5764, "step": 30647 }, { "epoch": 0.79, "grad_norm": 1.0099263191223145, "learning_rate": 2.316777459330056e-06, "loss": 0.5693, "step": 30648 }, { "epoch": 0.79, "grad_norm": 0.8874015808105469, "learning_rate": 2.3162461418977854e-06, "loss": 0.5407, "step": 30649 }, { "epoch": 0.79, "grad_norm": 1.510922908782959, "learning_rate": 2.315714877417117e-06, "loss": 0.5017, "step": 30650 }, { "epoch": 0.79, "grad_norm": 1.3059650659561157, "learning_rate": 2.315183665891716e-06, "loss": 0.4514, "step": 30651 }, { "epoch": 0.79, "grad_norm": 7.204678058624268, "learning_rate": 2.3146525073252414e-06, "loss": 0.675, "step": 30652 }, { "epoch": 0.79, "grad_norm": 1.0529834032058716, "learning_rate": 2.314121401721351e-06, "loss": 0.4643, "step": 30653 }, { "epoch": 0.79, "grad_norm": 2.8796639442443848, "learning_rate": 2.3135903490837095e-06, "loss": 0.3448, "step": 30654 }, { "epoch": 0.79, "grad_norm": 11.125202178955078, "learning_rate": 2.313059349415974e-06, "loss": 0.5834, "step": 30655 }, { "epoch": 0.79, "grad_norm": 1.805316686630249, "learning_rate": 2.3125284027218044e-06, "loss": 0.7214, "step": 30656 }, { "epoch": 0.79, "grad_norm": 1.7943371534347534, "learning_rate": 2.3119975090048563e-06, "loss": 0.487, "step": 30657 }, { "epoch": 0.79, "grad_norm": 1.5800728797912598, "learning_rate": 2.311466668268795e-06, "loss": 0.5587, "step": 30658 }, { "epoch": 0.79, "grad_norm": 2.727914333343506, "learning_rate": 2.310935880517273e-06, "loss": 0.561, "step": 30659 }, { "epoch": 0.79, "grad_norm": 1.0437463521957397, "learning_rate": 2.310405145753949e-06, "loss": 0.4709, "step": 30660 }, { "epoch": 0.79, "grad_norm": 1.6486295461654663, "learning_rate": 2.3098744639824844e-06, "loss": 0.6125, "step": 30661 }, { "epoch": 0.79, "grad_norm": 1.1237587928771973, "learning_rate": 2.3093438352065333e-06, "loss": 0.4926, "step": 30662 }, { "epoch": 0.79, "grad_norm": 1.5218762159347534, "learning_rate": 2.308813259429752e-06, "loss": 0.5617, "step": 30663 }, { "epoch": 0.79, "grad_norm": 7.016025543212891, "learning_rate": 2.308282736655796e-06, "loss": 0.5118, "step": 30664 }, { "epoch": 0.79, "grad_norm": 1.1986953020095825, "learning_rate": 2.3077522668883245e-06, "loss": 0.5649, "step": 30665 }, { "epoch": 0.79, "grad_norm": 1.3571795225143433, "learning_rate": 2.3072218501309928e-06, "loss": 0.5289, "step": 30666 }, { "epoch": 0.79, "grad_norm": 1.1057987213134766, "learning_rate": 2.3066914863874525e-06, "loss": 0.537, "step": 30667 }, { "epoch": 0.79, "grad_norm": 1.7286860942840576, "learning_rate": 2.3061611756613624e-06, "loss": 0.4988, "step": 30668 }, { "epoch": 0.79, "grad_norm": 1.3801008462905884, "learning_rate": 2.3056309179563762e-06, "loss": 0.4424, "step": 30669 }, { "epoch": 0.79, "grad_norm": 1.7240283489227295, "learning_rate": 2.305100713276146e-06, "loss": 0.6017, "step": 30670 }, { "epoch": 0.79, "grad_norm": 1.787569522857666, "learning_rate": 2.3045705616243287e-06, "loss": 0.5058, "step": 30671 }, { "epoch": 0.79, "grad_norm": 5.1590776443481445, "learning_rate": 2.3040404630045776e-06, "loss": 0.7289, "step": 30672 }, { "epoch": 0.79, "grad_norm": 2.97187876701355, "learning_rate": 2.3035104174205434e-06, "loss": 0.4511, "step": 30673 }, { "epoch": 0.79, "grad_norm": 2.6766183376312256, "learning_rate": 2.302980424875878e-06, "loss": 0.71, "step": 30674 }, { "epoch": 0.79, "grad_norm": 2.0567569732666016, "learning_rate": 2.302450485374238e-06, "loss": 0.5945, "step": 30675 }, { "epoch": 0.79, "grad_norm": 2.2928366661071777, "learning_rate": 2.3019205989192737e-06, "loss": 0.4265, "step": 30676 }, { "epoch": 0.79, "grad_norm": 1.3543424606323242, "learning_rate": 2.3013907655146326e-06, "loss": 0.5473, "step": 30677 }, { "epoch": 0.79, "grad_norm": 1.9552011489868164, "learning_rate": 2.300860985163973e-06, "loss": 0.596, "step": 30678 }, { "epoch": 0.79, "grad_norm": 2.7017099857330322, "learning_rate": 2.3003312578709426e-06, "loss": 0.7379, "step": 30679 }, { "epoch": 0.79, "grad_norm": 1.7576488256454468, "learning_rate": 2.2998015836391917e-06, "loss": 0.5939, "step": 30680 }, { "epoch": 0.79, "grad_norm": 1.556868314743042, "learning_rate": 2.299271962472367e-06, "loss": 0.5641, "step": 30681 }, { "epoch": 0.79, "grad_norm": 4.874248504638672, "learning_rate": 2.298742394374125e-06, "loss": 0.4661, "step": 30682 }, { "epoch": 0.79, "grad_norm": 2.145432233810425, "learning_rate": 2.298212879348113e-06, "loss": 0.84, "step": 30683 }, { "epoch": 0.79, "grad_norm": 1.3217191696166992, "learning_rate": 2.297683417397976e-06, "loss": 0.5157, "step": 30684 }, { "epoch": 0.79, "grad_norm": 1.5541248321533203, "learning_rate": 2.2971540085273668e-06, "loss": 0.4064, "step": 30685 }, { "epoch": 0.79, "grad_norm": 1.4366589784622192, "learning_rate": 2.2966246527399348e-06, "loss": 0.4525, "step": 30686 }, { "epoch": 0.79, "grad_norm": 1.5623204708099365, "learning_rate": 2.2960953500393224e-06, "loss": 0.5691, "step": 30687 }, { "epoch": 0.79, "grad_norm": 1.3716273307800293, "learning_rate": 2.295566100429184e-06, "loss": 0.4514, "step": 30688 }, { "epoch": 0.79, "grad_norm": 1.5231446027755737, "learning_rate": 2.2950369039131627e-06, "loss": 0.4584, "step": 30689 }, { "epoch": 0.79, "grad_norm": 1.4102487564086914, "learning_rate": 2.2945077604949074e-06, "loss": 0.547, "step": 30690 }, { "epoch": 0.79, "grad_norm": 1.29279363155365, "learning_rate": 2.2939786701780598e-06, "loss": 0.4856, "step": 30691 }, { "epoch": 0.79, "grad_norm": 2.0424466133117676, "learning_rate": 2.2934496329662736e-06, "loss": 0.6421, "step": 30692 }, { "epoch": 0.79, "grad_norm": 2.2753100395202637, "learning_rate": 2.2929206488631906e-06, "loss": 0.5732, "step": 30693 }, { "epoch": 0.79, "grad_norm": 1.7141519784927368, "learning_rate": 2.292391717872453e-06, "loss": 0.4906, "step": 30694 }, { "epoch": 0.79, "grad_norm": 1.6862324476242065, "learning_rate": 2.2918628399977116e-06, "loss": 0.4303, "step": 30695 }, { "epoch": 0.79, "grad_norm": 2.3919994831085205, "learning_rate": 2.29133401524261e-06, "loss": 0.777, "step": 30696 }, { "epoch": 0.79, "grad_norm": 1.6746435165405273, "learning_rate": 2.290805243610791e-06, "loss": 0.663, "step": 30697 }, { "epoch": 0.79, "grad_norm": 5.960665702819824, "learning_rate": 2.2902765251058954e-06, "loss": 0.4992, "step": 30698 }, { "epoch": 0.79, "grad_norm": 2.391348361968994, "learning_rate": 2.289747859731574e-06, "loss": 0.5201, "step": 30699 }, { "epoch": 0.79, "grad_norm": 1.3842451572418213, "learning_rate": 2.2892192474914664e-06, "loss": 0.4292, "step": 30700 }, { "epoch": 0.79, "grad_norm": 1.0888148546218872, "learning_rate": 2.288690688389211e-06, "loss": 0.5129, "step": 30701 }, { "epoch": 0.79, "grad_norm": 1.425248622894287, "learning_rate": 2.2881621824284595e-06, "loss": 0.5145, "step": 30702 }, { "epoch": 0.79, "grad_norm": 1.545293927192688, "learning_rate": 2.287633729612848e-06, "loss": 0.561, "step": 30703 }, { "epoch": 0.79, "grad_norm": 1.8234018087387085, "learning_rate": 2.28710532994602e-06, "loss": 0.6016, "step": 30704 }, { "epoch": 0.79, "grad_norm": 1.6480382680892944, "learning_rate": 2.286576983431613e-06, "loss": 0.518, "step": 30705 }, { "epoch": 0.79, "grad_norm": 1.8382371664047241, "learning_rate": 2.2860486900732747e-06, "loss": 0.6894, "step": 30706 }, { "epoch": 0.79, "grad_norm": 0.8937974572181702, "learning_rate": 2.285520449874642e-06, "loss": 0.432, "step": 30707 }, { "epoch": 0.79, "grad_norm": 1.2762391567230225, "learning_rate": 2.2849922628393527e-06, "loss": 0.5087, "step": 30708 }, { "epoch": 0.79, "grad_norm": 1.387528657913208, "learning_rate": 2.2844641289710536e-06, "loss": 0.3891, "step": 30709 }, { "epoch": 0.79, "grad_norm": 1.288490891456604, "learning_rate": 2.2839360482733784e-06, "loss": 0.4205, "step": 30710 }, { "epoch": 0.79, "grad_norm": 1.5094108581542969, "learning_rate": 2.2834080207499675e-06, "loss": 0.5376, "step": 30711 }, { "epoch": 0.79, "grad_norm": 3.307871103286743, "learning_rate": 2.282880046404462e-06, "loss": 0.4001, "step": 30712 }, { "epoch": 0.79, "grad_norm": 2.800002336502075, "learning_rate": 2.282352125240499e-06, "loss": 0.6739, "step": 30713 }, { "epoch": 0.79, "grad_norm": 1.6072295904159546, "learning_rate": 2.2818242572617168e-06, "loss": 0.3989, "step": 30714 }, { "epoch": 0.79, "grad_norm": 5.306511402130127, "learning_rate": 2.28129644247175e-06, "loss": 0.505, "step": 30715 }, { "epoch": 0.79, "grad_norm": 0.9854332804679871, "learning_rate": 2.2807686808742415e-06, "loss": 0.4747, "step": 30716 }, { "epoch": 0.79, "grad_norm": 6.071459770202637, "learning_rate": 2.2802409724728246e-06, "loss": 0.6338, "step": 30717 }, { "epoch": 0.79, "grad_norm": 3.179694414138794, "learning_rate": 2.279713317271136e-06, "loss": 0.6517, "step": 30718 }, { "epoch": 0.79, "grad_norm": 1.6570765972137451, "learning_rate": 2.2791857152728146e-06, "loss": 0.5161, "step": 30719 }, { "epoch": 0.79, "grad_norm": 1.139722228050232, "learning_rate": 2.2786581664814946e-06, "loss": 0.4049, "step": 30720 }, { "epoch": 0.79, "grad_norm": 0.9492567181587219, "learning_rate": 2.278130670900811e-06, "loss": 0.5325, "step": 30721 }, { "epoch": 0.79, "grad_norm": 2.226529836654663, "learning_rate": 2.2776032285343976e-06, "loss": 0.6387, "step": 30722 }, { "epoch": 0.79, "grad_norm": 1.718941330909729, "learning_rate": 2.277075839385893e-06, "loss": 0.6423, "step": 30723 }, { "epoch": 0.79, "grad_norm": 2.8106861114501953, "learning_rate": 2.2765485034589297e-06, "loss": 0.6368, "step": 30724 }, { "epoch": 0.79, "grad_norm": 1.805222749710083, "learning_rate": 2.2760212207571397e-06, "loss": 0.5149, "step": 30725 }, { "epoch": 0.79, "grad_norm": 2.768165349960327, "learning_rate": 2.2754939912841613e-06, "loss": 0.6122, "step": 30726 }, { "epoch": 0.79, "grad_norm": 1.383201003074646, "learning_rate": 2.2749668150436254e-06, "loss": 0.4974, "step": 30727 }, { "epoch": 0.79, "grad_norm": 1.7228745222091675, "learning_rate": 2.274439692039162e-06, "loss": 0.5088, "step": 30728 }, { "epoch": 0.79, "grad_norm": 1.0121886730194092, "learning_rate": 2.273912622274409e-06, "loss": 0.3979, "step": 30729 }, { "epoch": 0.79, "grad_norm": 1.3604142665863037, "learning_rate": 2.273385605752997e-06, "loss": 0.6195, "step": 30730 }, { "epoch": 0.79, "grad_norm": 1.5525778532028198, "learning_rate": 2.272858642478556e-06, "loss": 0.5682, "step": 30731 }, { "epoch": 0.79, "grad_norm": 1.7258440256118774, "learning_rate": 2.2723317324547167e-06, "loss": 0.434, "step": 30732 }, { "epoch": 0.79, "grad_norm": 1.515592336654663, "learning_rate": 2.271804875685114e-06, "loss": 0.4396, "step": 30733 }, { "epoch": 0.79, "grad_norm": 3.690950870513916, "learning_rate": 2.2712780721733773e-06, "loss": 0.5716, "step": 30734 }, { "epoch": 0.79, "grad_norm": 1.365792155265808, "learning_rate": 2.2707513219231336e-06, "loss": 0.4251, "step": 30735 }, { "epoch": 0.79, "grad_norm": 1.9175872802734375, "learning_rate": 2.270224624938019e-06, "loss": 0.6409, "step": 30736 }, { "epoch": 0.79, "grad_norm": 1.6583892107009888, "learning_rate": 2.269697981221659e-06, "loss": 0.5538, "step": 30737 }, { "epoch": 0.79, "grad_norm": 1.160683512687683, "learning_rate": 2.2691713907776847e-06, "loss": 0.4779, "step": 30738 }, { "epoch": 0.79, "grad_norm": 1.8571698665618896, "learning_rate": 2.2686448536097216e-06, "loss": 0.4541, "step": 30739 }, { "epoch": 0.79, "grad_norm": 1.0103533267974854, "learning_rate": 2.2681183697214027e-06, "loss": 0.4952, "step": 30740 }, { "epoch": 0.79, "grad_norm": 1.5404369831085205, "learning_rate": 2.2675919391163546e-06, "loss": 0.5689, "step": 30741 }, { "epoch": 0.79, "grad_norm": 1.9532861709594727, "learning_rate": 2.267065561798203e-06, "loss": 0.7017, "step": 30742 }, { "epoch": 0.79, "grad_norm": 1.459078311920166, "learning_rate": 2.26653923777058e-06, "loss": 0.5406, "step": 30743 }, { "epoch": 0.79, "grad_norm": 1.7747235298156738, "learning_rate": 2.266012967037109e-06, "loss": 0.4852, "step": 30744 }, { "epoch": 0.79, "grad_norm": 1.4013694524765015, "learning_rate": 2.265486749601418e-06, "loss": 0.5741, "step": 30745 }, { "epoch": 0.79, "grad_norm": 2.5977392196655273, "learning_rate": 2.2649605854671298e-06, "loss": 0.3356, "step": 30746 }, { "epoch": 0.79, "grad_norm": 0.8543246388435364, "learning_rate": 2.2644344746378766e-06, "loss": 0.4356, "step": 30747 }, { "epoch": 0.79, "grad_norm": 1.39145827293396, "learning_rate": 2.263908417117282e-06, "loss": 0.4019, "step": 30748 }, { "epoch": 0.79, "grad_norm": 0.9074133038520813, "learning_rate": 2.2633824129089667e-06, "loss": 0.3435, "step": 30749 }, { "epoch": 0.79, "grad_norm": 1.8120332956314087, "learning_rate": 2.262856462016562e-06, "loss": 0.41, "step": 30750 }, { "epoch": 0.79, "grad_norm": 1.7168463468551636, "learning_rate": 2.26233056444369e-06, "loss": 0.4235, "step": 30751 }, { "epoch": 0.79, "grad_norm": 1.3503299951553345, "learning_rate": 2.261804720193971e-06, "loss": 0.4602, "step": 30752 }, { "epoch": 0.79, "grad_norm": 0.9201582074165344, "learning_rate": 2.2612789292710357e-06, "loss": 0.5899, "step": 30753 }, { "epoch": 0.79, "grad_norm": 1.4673067331314087, "learning_rate": 2.260753191678503e-06, "loss": 0.4778, "step": 30754 }, { "epoch": 0.79, "grad_norm": 2.01912260055542, "learning_rate": 2.2602275074199987e-06, "loss": 0.495, "step": 30755 }, { "epoch": 0.79, "grad_norm": 1.2080155611038208, "learning_rate": 2.2597018764991398e-06, "loss": 0.3901, "step": 30756 }, { "epoch": 0.79, "grad_norm": 1.200463891029358, "learning_rate": 2.259176298919555e-06, "loss": 0.386, "step": 30757 }, { "epoch": 0.79, "grad_norm": 1.580708384513855, "learning_rate": 2.258650774684865e-06, "loss": 0.6477, "step": 30758 }, { "epoch": 0.79, "grad_norm": 1.1170899868011475, "learning_rate": 2.2581253037986873e-06, "loss": 0.3106, "step": 30759 }, { "epoch": 0.79, "grad_norm": 1.6603538990020752, "learning_rate": 2.2575998862646487e-06, "loss": 0.4221, "step": 30760 }, { "epoch": 0.79, "grad_norm": 1.5551286935806274, "learning_rate": 2.2570745220863678e-06, "loss": 0.6297, "step": 30761 }, { "epoch": 0.79, "grad_norm": 2.6603341102600098, "learning_rate": 2.256549211267465e-06, "loss": 0.5021, "step": 30762 }, { "epoch": 0.79, "grad_norm": 1.446909785270691, "learning_rate": 2.256023953811556e-06, "loss": 0.5041, "step": 30763 }, { "epoch": 0.79, "grad_norm": 1.3399531841278076, "learning_rate": 2.255498749722268e-06, "loss": 0.5764, "step": 30764 }, { "epoch": 0.79, "grad_norm": 2.951167345046997, "learning_rate": 2.2549735990032175e-06, "loss": 0.4395, "step": 30765 }, { "epoch": 0.79, "grad_norm": 2.044759511947632, "learning_rate": 2.2544485016580197e-06, "loss": 0.6374, "step": 30766 }, { "epoch": 0.79, "grad_norm": 6.626231670379639, "learning_rate": 2.2539234576903e-06, "loss": 0.5154, "step": 30767 }, { "epoch": 0.79, "grad_norm": 1.4068561792373657, "learning_rate": 2.2533984671036725e-06, "loss": 0.4791, "step": 30768 }, { "epoch": 0.79, "grad_norm": 1.194953441619873, "learning_rate": 2.252873529901757e-06, "loss": 0.5805, "step": 30769 }, { "epoch": 0.79, "grad_norm": 3.049351692199707, "learning_rate": 2.252348646088166e-06, "loss": 0.5572, "step": 30770 }, { "epoch": 0.79, "grad_norm": 1.3266339302062988, "learning_rate": 2.251823815666524e-06, "loss": 0.4573, "step": 30771 }, { "epoch": 0.79, "grad_norm": 8.02976131439209, "learning_rate": 2.251299038640444e-06, "loss": 0.6241, "step": 30772 }, { "epoch": 0.79, "grad_norm": 6.957205295562744, "learning_rate": 2.2507743150135396e-06, "loss": 0.6133, "step": 30773 }, { "epoch": 0.79, "grad_norm": 3.5293080806732178, "learning_rate": 2.250249644789434e-06, "loss": 0.572, "step": 30774 }, { "epoch": 0.79, "grad_norm": 3.3020002841949463, "learning_rate": 2.2497250279717387e-06, "loss": 0.7599, "step": 30775 }, { "epoch": 0.79, "grad_norm": 4.954614639282227, "learning_rate": 2.2492004645640663e-06, "loss": 0.6863, "step": 30776 }, { "epoch": 0.79, "grad_norm": 1.9786349534988403, "learning_rate": 2.2486759545700366e-06, "loss": 0.5846, "step": 30777 }, { "epoch": 0.79, "grad_norm": 1.2886197566986084, "learning_rate": 2.248151497993264e-06, "loss": 0.4321, "step": 30778 }, { "epoch": 0.79, "grad_norm": 1.598863959312439, "learning_rate": 2.2476270948373604e-06, "loss": 0.5357, "step": 30779 }, { "epoch": 0.79, "grad_norm": 2.810590982437134, "learning_rate": 2.247102745105937e-06, "loss": 0.5341, "step": 30780 }, { "epoch": 0.79, "grad_norm": 1.8722598552703857, "learning_rate": 2.2465784488026145e-06, "loss": 0.4622, "step": 30781 }, { "epoch": 0.79, "grad_norm": 5.701494216918945, "learning_rate": 2.2460542059310008e-06, "loss": 0.5095, "step": 30782 }, { "epoch": 0.79, "grad_norm": 2.1591854095458984, "learning_rate": 2.245530016494708e-06, "loss": 0.6381, "step": 30783 }, { "epoch": 0.79, "grad_norm": 1.3100078105926514, "learning_rate": 2.245005880497354e-06, "loss": 0.4235, "step": 30784 }, { "epoch": 0.79, "grad_norm": 11.3367919921875, "learning_rate": 2.244481797942546e-06, "loss": 0.6391, "step": 30785 }, { "epoch": 0.79, "grad_norm": 1.5608234405517578, "learning_rate": 2.2439577688338977e-06, "loss": 0.5926, "step": 30786 }, { "epoch": 0.79, "grad_norm": 0.9901764392852783, "learning_rate": 2.2434337931750174e-06, "loss": 0.4326, "step": 30787 }, { "epoch": 0.79, "grad_norm": 1.8216619491577148, "learning_rate": 2.2429098709695196e-06, "loss": 0.4361, "step": 30788 }, { "epoch": 0.79, "grad_norm": 1.2113659381866455, "learning_rate": 2.2423860022210144e-06, "loss": 0.3983, "step": 30789 }, { "epoch": 0.79, "grad_norm": 1.8545212745666504, "learning_rate": 2.241862186933108e-06, "loss": 0.4321, "step": 30790 }, { "epoch": 0.79, "grad_norm": 1.2731823921203613, "learning_rate": 2.2413384251094163e-06, "loss": 0.5446, "step": 30791 }, { "epoch": 0.79, "grad_norm": 1.3458592891693115, "learning_rate": 2.2408147167535453e-06, "loss": 0.5788, "step": 30792 }, { "epoch": 0.79, "grad_norm": 1.6192560195922852, "learning_rate": 2.240291061869102e-06, "loss": 0.6632, "step": 30793 }, { "epoch": 0.79, "grad_norm": 2.081791877746582, "learning_rate": 2.2397674604597007e-06, "loss": 0.4816, "step": 30794 }, { "epoch": 0.79, "grad_norm": 1.1408309936523438, "learning_rate": 2.2392439125289455e-06, "loss": 0.5981, "step": 30795 }, { "epoch": 0.79, "grad_norm": 5.289919853210449, "learning_rate": 2.2387204180804465e-06, "loss": 0.5765, "step": 30796 }, { "epoch": 0.79, "grad_norm": 1.6694375276565552, "learning_rate": 2.238196977117807e-06, "loss": 0.5505, "step": 30797 }, { "epoch": 0.79, "grad_norm": 1.4273806810379028, "learning_rate": 2.237673589644641e-06, "loss": 0.484, "step": 30798 }, { "epoch": 0.79, "grad_norm": 3.226362943649292, "learning_rate": 2.2371502556645507e-06, "loss": 0.3668, "step": 30799 }, { "epoch": 0.79, "grad_norm": 1.1366503238677979, "learning_rate": 2.236626975181142e-06, "loss": 0.5281, "step": 30800 }, { "epoch": 0.79, "grad_norm": 2.9681859016418457, "learning_rate": 2.2361037481980254e-06, "loss": 0.6494, "step": 30801 }, { "epoch": 0.79, "grad_norm": 2.3476595878601074, "learning_rate": 2.235580574718803e-06, "loss": 0.6255, "step": 30802 }, { "epoch": 0.79, "grad_norm": 1.6393872499465942, "learning_rate": 2.2350574547470827e-06, "loss": 0.754, "step": 30803 }, { "epoch": 0.79, "grad_norm": 1.5874104499816895, "learning_rate": 2.2345343882864633e-06, "loss": 0.5444, "step": 30804 }, { "epoch": 0.79, "grad_norm": 2.7124574184417725, "learning_rate": 2.234011375340559e-06, "loss": 0.5301, "step": 30805 }, { "epoch": 0.79, "grad_norm": 1.5965970754623413, "learning_rate": 2.2334884159129676e-06, "loss": 0.4218, "step": 30806 }, { "epoch": 0.79, "grad_norm": 1.4995352029800415, "learning_rate": 2.2329655100072935e-06, "loss": 0.6123, "step": 30807 }, { "epoch": 0.79, "grad_norm": 1.2137633562088013, "learning_rate": 2.232442657627143e-06, "loss": 0.5745, "step": 30808 }, { "epoch": 0.79, "grad_norm": 1.782230019569397, "learning_rate": 2.2319198587761172e-06, "loss": 0.7053, "step": 30809 }, { "epoch": 0.79, "grad_norm": 9.619161605834961, "learning_rate": 2.2313971134578204e-06, "loss": 0.7351, "step": 30810 }, { "epoch": 0.79, "grad_norm": 1.151267647743225, "learning_rate": 2.2308744216758505e-06, "loss": 0.4276, "step": 30811 }, { "epoch": 0.79, "grad_norm": 15.821955680847168, "learning_rate": 2.2303517834338164e-06, "loss": 0.4511, "step": 30812 }, { "epoch": 0.79, "grad_norm": 1.6307177543640137, "learning_rate": 2.229829198735316e-06, "loss": 0.6165, "step": 30813 }, { "epoch": 0.79, "grad_norm": 1.5438152551651, "learning_rate": 2.229306667583948e-06, "loss": 0.4398, "step": 30814 }, { "epoch": 0.79, "grad_norm": 5.401534080505371, "learning_rate": 2.2287841899833196e-06, "loss": 0.4697, "step": 30815 }, { "epoch": 0.79, "grad_norm": 1.4722651243209839, "learning_rate": 2.2282617659370285e-06, "loss": 0.6593, "step": 30816 }, { "epoch": 0.79, "grad_norm": 1.4720054864883423, "learning_rate": 2.2277393954486714e-06, "loss": 0.5372, "step": 30817 }, { "epoch": 0.79, "grad_norm": 10.861235618591309, "learning_rate": 2.227217078521855e-06, "loss": 0.5891, "step": 30818 }, { "epoch": 0.79, "grad_norm": 1.2788385152816772, "learning_rate": 2.226694815160174e-06, "loss": 0.4059, "step": 30819 }, { "epoch": 0.79, "grad_norm": 6.157826900482178, "learning_rate": 2.2261726053672294e-06, "loss": 0.5435, "step": 30820 }, { "epoch": 0.79, "grad_norm": 4.943621635437012, "learning_rate": 2.2256504491466158e-06, "loss": 0.6694, "step": 30821 }, { "epoch": 0.79, "grad_norm": 1.6611512899398804, "learning_rate": 2.225128346501938e-06, "loss": 0.4104, "step": 30822 }, { "epoch": 0.79, "grad_norm": 9.496477127075195, "learning_rate": 2.224606297436791e-06, "loss": 0.4808, "step": 30823 }, { "epoch": 0.79, "grad_norm": 1.4817006587982178, "learning_rate": 2.2240843019547697e-06, "loss": 0.5396, "step": 30824 }, { "epoch": 0.79, "grad_norm": 2.0325372219085693, "learning_rate": 2.223562360059477e-06, "loss": 0.5841, "step": 30825 }, { "epoch": 0.79, "grad_norm": 1.266738772392273, "learning_rate": 2.2230404717545074e-06, "loss": 0.4706, "step": 30826 }, { "epoch": 0.79, "grad_norm": 1.6781005859375, "learning_rate": 2.222518637043456e-06, "loss": 0.451, "step": 30827 }, { "epoch": 0.79, "grad_norm": 0.9424945712089539, "learning_rate": 2.2219968559299174e-06, "loss": 0.4817, "step": 30828 }, { "epoch": 0.79, "grad_norm": 3.5283663272857666, "learning_rate": 2.221475128417493e-06, "loss": 0.5928, "step": 30829 }, { "epoch": 0.79, "grad_norm": 1.5664221048355103, "learning_rate": 2.2209534545097744e-06, "loss": 0.7058, "step": 30830 }, { "epoch": 0.79, "grad_norm": 3.778444290161133, "learning_rate": 2.2204318342103547e-06, "loss": 0.4977, "step": 30831 }, { "epoch": 0.79, "grad_norm": 1.117275357246399, "learning_rate": 2.2199102675228347e-06, "loss": 0.3448, "step": 30832 }, { "epoch": 0.79, "grad_norm": 1.4108682870864868, "learning_rate": 2.219388754450804e-06, "loss": 0.5772, "step": 30833 }, { "epoch": 0.79, "grad_norm": 1.2162426710128784, "learning_rate": 2.218867294997855e-06, "loss": 0.514, "step": 30834 }, { "epoch": 0.79, "grad_norm": 2.638963222503662, "learning_rate": 2.218345889167588e-06, "loss": 0.569, "step": 30835 }, { "epoch": 0.79, "grad_norm": 1.0044325590133667, "learning_rate": 2.217824536963591e-06, "loss": 0.488, "step": 30836 }, { "epoch": 0.79, "grad_norm": 2.855886697769165, "learning_rate": 2.217303238389459e-06, "loss": 0.5318, "step": 30837 }, { "epoch": 0.79, "grad_norm": 2.088897228240967, "learning_rate": 2.21678199344878e-06, "loss": 0.455, "step": 30838 }, { "epoch": 0.79, "grad_norm": 1.3592369556427002, "learning_rate": 2.2162608021451524e-06, "loss": 0.4851, "step": 30839 }, { "epoch": 0.79, "grad_norm": 2.0273027420043945, "learning_rate": 2.215739664482166e-06, "loss": 0.6319, "step": 30840 }, { "epoch": 0.79, "grad_norm": 1.6271412372589111, "learning_rate": 2.2152185804634084e-06, "loss": 0.4992, "step": 30841 }, { "epoch": 0.79, "grad_norm": 4.018777847290039, "learning_rate": 2.214697550092475e-06, "loss": 0.5169, "step": 30842 }, { "epoch": 0.79, "grad_norm": 1.5625238418579102, "learning_rate": 2.2141765733729558e-06, "loss": 0.5697, "step": 30843 }, { "epoch": 0.79, "grad_norm": 1.1881183385849, "learning_rate": 2.2136556503084406e-06, "loss": 0.6115, "step": 30844 }, { "epoch": 0.79, "grad_norm": 2.2872986793518066, "learning_rate": 2.213134780902515e-06, "loss": 0.7313, "step": 30845 }, { "epoch": 0.79, "grad_norm": 1.0729249715805054, "learning_rate": 2.2126139651587754e-06, "loss": 0.3985, "step": 30846 }, { "epoch": 0.79, "grad_norm": 1.312054991722107, "learning_rate": 2.2120932030808086e-06, "loss": 0.5706, "step": 30847 }, { "epoch": 0.79, "grad_norm": 1.5998457670211792, "learning_rate": 2.2115724946721982e-06, "loss": 0.5116, "step": 30848 }, { "epoch": 0.79, "grad_norm": 1.2900744676589966, "learning_rate": 2.2110518399365412e-06, "loss": 0.469, "step": 30849 }, { "epoch": 0.79, "grad_norm": 2.282593011856079, "learning_rate": 2.2105312388774214e-06, "loss": 0.5707, "step": 30850 }, { "epoch": 0.79, "grad_norm": 2.772312641143799, "learning_rate": 2.2100106914984255e-06, "loss": 0.6447, "step": 30851 }, { "epoch": 0.79, "grad_norm": 1.9143390655517578, "learning_rate": 2.2094901978031402e-06, "loss": 0.5343, "step": 30852 }, { "epoch": 0.79, "grad_norm": 1.171842098236084, "learning_rate": 2.2089697577951562e-06, "loss": 0.5614, "step": 30853 }, { "epoch": 0.79, "grad_norm": 1.6495217084884644, "learning_rate": 2.2084493714780573e-06, "loss": 0.6764, "step": 30854 }, { "epoch": 0.79, "grad_norm": 1.512048363685608, "learning_rate": 2.2079290388554276e-06, "loss": 0.6097, "step": 30855 }, { "epoch": 0.79, "grad_norm": 1.727539300918579, "learning_rate": 2.207408759930859e-06, "loss": 0.4961, "step": 30856 }, { "epoch": 0.79, "grad_norm": 1.701716661453247, "learning_rate": 2.206888534707933e-06, "loss": 0.409, "step": 30857 }, { "epoch": 0.79, "grad_norm": 3.5316038131713867, "learning_rate": 2.206368363190232e-06, "loss": 0.5037, "step": 30858 }, { "epoch": 0.79, "grad_norm": 4.36971378326416, "learning_rate": 2.2058482453813466e-06, "loss": 0.523, "step": 30859 }, { "epoch": 0.79, "grad_norm": 1.159142255783081, "learning_rate": 2.2053281812848592e-06, "loss": 0.4899, "step": 30860 }, { "epoch": 0.79, "grad_norm": 2.9198360443115234, "learning_rate": 2.2048081709043523e-06, "loss": 0.5383, "step": 30861 }, { "epoch": 0.79, "grad_norm": 1.509663462638855, "learning_rate": 2.2042882142434072e-06, "loss": 0.4537, "step": 30862 }, { "epoch": 0.79, "grad_norm": 1.877643346786499, "learning_rate": 2.2037683113056128e-06, "loss": 0.5131, "step": 30863 }, { "epoch": 0.79, "grad_norm": 5.489828109741211, "learning_rate": 2.2032484620945494e-06, "loss": 0.4981, "step": 30864 }, { "epoch": 0.79, "grad_norm": 1.6572521924972534, "learning_rate": 2.2027286666137958e-06, "loss": 0.5139, "step": 30865 }, { "epoch": 0.79, "grad_norm": 1.0306060314178467, "learning_rate": 2.202208924866941e-06, "loss": 0.4736, "step": 30866 }, { "epoch": 0.79, "grad_norm": 1.3725703954696655, "learning_rate": 2.2016892368575628e-06, "loss": 0.6147, "step": 30867 }, { "epoch": 0.79, "grad_norm": 1.9451344013214111, "learning_rate": 2.2011696025892425e-06, "loss": 0.5746, "step": 30868 }, { "epoch": 0.79, "grad_norm": 1.13435697555542, "learning_rate": 2.2006500220655592e-06, "loss": 0.4749, "step": 30869 }, { "epoch": 0.79, "grad_norm": 1.6218546628952026, "learning_rate": 2.200130495290099e-06, "loss": 0.672, "step": 30870 }, { "epoch": 0.79, "grad_norm": 1.2284547090530396, "learning_rate": 2.199611022266439e-06, "loss": 0.4984, "step": 30871 }, { "epoch": 0.79, "grad_norm": 1.200106143951416, "learning_rate": 2.199091602998157e-06, "loss": 0.4805, "step": 30872 }, { "epoch": 0.79, "grad_norm": 1.0872100591659546, "learning_rate": 2.1985722374888363e-06, "loss": 0.4102, "step": 30873 }, { "epoch": 0.79, "grad_norm": 1.183495044708252, "learning_rate": 2.1980529257420546e-06, "loss": 0.322, "step": 30874 }, { "epoch": 0.79, "grad_norm": 3.626314401626587, "learning_rate": 2.1975336677613877e-06, "loss": 0.5443, "step": 30875 }, { "epoch": 0.79, "grad_norm": 1.4935921430587769, "learning_rate": 2.197014463550421e-06, "loss": 0.4685, "step": 30876 }, { "epoch": 0.79, "grad_norm": 1.5067933797836304, "learning_rate": 2.1964953131127264e-06, "loss": 0.5663, "step": 30877 }, { "epoch": 0.79, "grad_norm": 1.378890037536621, "learning_rate": 2.195976216451885e-06, "loss": 0.6451, "step": 30878 }, { "epoch": 0.79, "grad_norm": 1.1852539777755737, "learning_rate": 2.195457173571468e-06, "loss": 0.5347, "step": 30879 }, { "epoch": 0.79, "grad_norm": 1.1362459659576416, "learning_rate": 2.1949381844750605e-06, "loss": 0.4276, "step": 30880 }, { "epoch": 0.79, "grad_norm": 2.4314019680023193, "learning_rate": 2.1944192491662365e-06, "loss": 0.4947, "step": 30881 }, { "epoch": 0.79, "grad_norm": 6.850085735321045, "learning_rate": 2.1939003676485672e-06, "loss": 0.6722, "step": 30882 }, { "epoch": 0.79, "grad_norm": 1.2616870403289795, "learning_rate": 2.193381539925635e-06, "loss": 0.5041, "step": 30883 }, { "epoch": 0.79, "grad_norm": 1.1014186143875122, "learning_rate": 2.1928627660010127e-06, "loss": 0.5361, "step": 30884 }, { "epoch": 0.79, "grad_norm": 1.4462108612060547, "learning_rate": 2.192344045878275e-06, "loss": 0.5001, "step": 30885 }, { "epoch": 0.79, "grad_norm": 1.1891499757766724, "learning_rate": 2.1918253795609943e-06, "loss": 0.4812, "step": 30886 }, { "epoch": 0.79, "grad_norm": 1.4534072875976562, "learning_rate": 2.191306767052751e-06, "loss": 0.6601, "step": 30887 }, { "epoch": 0.79, "grad_norm": 1.1399296522140503, "learning_rate": 2.1907882083571153e-06, "loss": 0.5476, "step": 30888 }, { "epoch": 0.79, "grad_norm": 2.1709673404693604, "learning_rate": 2.190269703477661e-06, "loss": 0.5331, "step": 30889 }, { "epoch": 0.79, "grad_norm": 1.3986159563064575, "learning_rate": 2.1897512524179608e-06, "loss": 0.3898, "step": 30890 }, { "epoch": 0.79, "grad_norm": 2.2398598194122314, "learning_rate": 2.189232855181589e-06, "loss": 0.5327, "step": 30891 }, { "epoch": 0.79, "grad_norm": 1.2953468561172485, "learning_rate": 2.188714511772113e-06, "loss": 0.5024, "step": 30892 }, { "epoch": 0.79, "grad_norm": 2.3105952739715576, "learning_rate": 2.188196222193113e-06, "loss": 0.6479, "step": 30893 }, { "epoch": 0.79, "grad_norm": 1.1884900331497192, "learning_rate": 2.187677986448157e-06, "loss": 0.5945, "step": 30894 }, { "epoch": 0.79, "grad_norm": 6.14547872543335, "learning_rate": 2.1871598045408126e-06, "loss": 0.6252, "step": 30895 }, { "epoch": 0.79, "grad_norm": 2.1990742683410645, "learning_rate": 2.186641676474657e-06, "loss": 0.5085, "step": 30896 }, { "epoch": 0.79, "grad_norm": 3.0824155807495117, "learning_rate": 2.186123602253258e-06, "loss": 0.548, "step": 30897 }, { "epoch": 0.79, "grad_norm": 1.888456106185913, "learning_rate": 2.1856055818801868e-06, "loss": 0.377, "step": 30898 }, { "epoch": 0.79, "grad_norm": 1.3963203430175781, "learning_rate": 2.1850876153590095e-06, "loss": 0.5359, "step": 30899 }, { "epoch": 0.79, "grad_norm": 4.20027494430542, "learning_rate": 2.184569702693301e-06, "loss": 0.5494, "step": 30900 }, { "epoch": 0.79, "grad_norm": 1.6898059844970703, "learning_rate": 2.184051843886628e-06, "loss": 0.5092, "step": 30901 }, { "epoch": 0.79, "grad_norm": 1.0928430557250977, "learning_rate": 2.183534038942556e-06, "loss": 0.4204, "step": 30902 }, { "epoch": 0.79, "grad_norm": 2.442211389541626, "learning_rate": 2.183016287864661e-06, "loss": 0.6882, "step": 30903 }, { "epoch": 0.79, "grad_norm": 1.405220627784729, "learning_rate": 2.1824985906565056e-06, "loss": 0.6206, "step": 30904 }, { "epoch": 0.79, "grad_norm": 1.3579165935516357, "learning_rate": 2.181980947321658e-06, "loss": 0.4027, "step": 30905 }, { "epoch": 0.79, "grad_norm": 1.7445558309555054, "learning_rate": 2.1814633578636837e-06, "loss": 0.63, "step": 30906 }, { "epoch": 0.79, "grad_norm": 2.7328481674194336, "learning_rate": 2.1809458222861546e-06, "loss": 0.5733, "step": 30907 }, { "epoch": 0.79, "grad_norm": 1.1018050909042358, "learning_rate": 2.180428340592634e-06, "loss": 0.4878, "step": 30908 }, { "epoch": 0.79, "grad_norm": 1.368575930595398, "learning_rate": 2.179910912786687e-06, "loss": 0.3643, "step": 30909 }, { "epoch": 0.79, "grad_norm": 1.889966607093811, "learning_rate": 2.1793935388718823e-06, "loss": 0.5439, "step": 30910 }, { "epoch": 0.79, "grad_norm": 4.255669116973877, "learning_rate": 2.1788762188517843e-06, "loss": 0.6737, "step": 30911 }, { "epoch": 0.79, "grad_norm": 1.4928756952285767, "learning_rate": 2.178358952729955e-06, "loss": 0.4554, "step": 30912 }, { "epoch": 0.79, "grad_norm": 1.6270617246627808, "learning_rate": 2.177841740509965e-06, "loss": 0.563, "step": 30913 }, { "epoch": 0.79, "grad_norm": 1.1852439641952515, "learning_rate": 2.177324582195375e-06, "loss": 0.5336, "step": 30914 }, { "epoch": 0.79, "grad_norm": 1.1364847421646118, "learning_rate": 2.1768074777897498e-06, "loss": 0.5882, "step": 30915 }, { "epoch": 0.79, "grad_norm": 1.412680745124817, "learning_rate": 2.176290427296649e-06, "loss": 0.4947, "step": 30916 }, { "epoch": 0.79, "grad_norm": 1.0516293048858643, "learning_rate": 2.175773430719642e-06, "loss": 0.593, "step": 30917 }, { "epoch": 0.79, "grad_norm": 1.746360421180725, "learning_rate": 2.1752564880622894e-06, "loss": 0.4597, "step": 30918 }, { "epoch": 0.79, "grad_norm": 2.377497673034668, "learning_rate": 2.1747395993281493e-06, "loss": 0.5338, "step": 30919 }, { "epoch": 0.79, "grad_norm": 1.3658853769302368, "learning_rate": 2.1742227645207913e-06, "loss": 0.5896, "step": 30920 }, { "epoch": 0.79, "grad_norm": 1.4647142887115479, "learning_rate": 2.1737059836437737e-06, "loss": 0.495, "step": 30921 }, { "epoch": 0.79, "grad_norm": 1.28276526927948, "learning_rate": 2.173189256700656e-06, "loss": 0.5577, "step": 30922 }, { "epoch": 0.79, "grad_norm": 1.456402063369751, "learning_rate": 2.172672583694999e-06, "loss": 0.592, "step": 30923 }, { "epoch": 0.79, "grad_norm": 1.2907694578170776, "learning_rate": 2.172155964630368e-06, "loss": 0.4893, "step": 30924 }, { "epoch": 0.79, "grad_norm": 2.000638246536255, "learning_rate": 2.1716393995103204e-06, "loss": 0.5397, "step": 30925 }, { "epoch": 0.79, "grad_norm": 1.7754716873168945, "learning_rate": 2.1711228883384115e-06, "loss": 0.5579, "step": 30926 }, { "epoch": 0.79, "grad_norm": 1.3660131692886353, "learning_rate": 2.1706064311182083e-06, "loss": 0.416, "step": 30927 }, { "epoch": 0.79, "grad_norm": 1.4915051460266113, "learning_rate": 2.1700900278532677e-06, "loss": 0.5748, "step": 30928 }, { "epoch": 0.79, "grad_norm": 2.4673686027526855, "learning_rate": 2.1695736785471434e-06, "loss": 0.5228, "step": 30929 }, { "epoch": 0.79, "grad_norm": 2.7417826652526855, "learning_rate": 2.169057383203401e-06, "loss": 0.6113, "step": 30930 }, { "epoch": 0.79, "grad_norm": 1.1700433492660522, "learning_rate": 2.168541141825595e-06, "loss": 0.498, "step": 30931 }, { "epoch": 0.79, "grad_norm": 1.2163705825805664, "learning_rate": 2.168024954417284e-06, "loss": 0.6253, "step": 30932 }, { "epoch": 0.79, "grad_norm": 1.3828401565551758, "learning_rate": 2.167508820982022e-06, "loss": 0.5051, "step": 30933 }, { "epoch": 0.79, "grad_norm": 1.6381332874298096, "learning_rate": 2.1669927415233706e-06, "loss": 0.6248, "step": 30934 }, { "epoch": 0.79, "grad_norm": 3.47342848777771, "learning_rate": 2.166476716044883e-06, "loss": 0.7133, "step": 30935 }, { "epoch": 0.79, "grad_norm": 1.6268309354782104, "learning_rate": 2.1659607445501153e-06, "loss": 0.6503, "step": 30936 }, { "epoch": 0.79, "grad_norm": 2.1018571853637695, "learning_rate": 2.1654448270426254e-06, "loss": 0.6272, "step": 30937 }, { "epoch": 0.79, "grad_norm": 1.8461240530014038, "learning_rate": 2.1649289635259684e-06, "loss": 0.442, "step": 30938 }, { "epoch": 0.79, "grad_norm": 2.6087679862976074, "learning_rate": 2.164413154003698e-06, "loss": 0.5072, "step": 30939 }, { "epoch": 0.79, "grad_norm": 1.2134679555892944, "learning_rate": 2.1638973984793675e-06, "loss": 0.4674, "step": 30940 }, { "epoch": 0.79, "grad_norm": 2.0476255416870117, "learning_rate": 2.1633816969565357e-06, "loss": 0.4361, "step": 30941 }, { "epoch": 0.79, "grad_norm": 1.427666187286377, "learning_rate": 2.1628660494387534e-06, "loss": 0.5217, "step": 30942 }, { "epoch": 0.79, "grad_norm": 1.4278019666671753, "learning_rate": 2.1623504559295727e-06, "loss": 0.4161, "step": 30943 }, { "epoch": 0.79, "grad_norm": 1.9759825468063354, "learning_rate": 2.1618349164325503e-06, "loss": 0.695, "step": 30944 }, { "epoch": 0.79, "grad_norm": 2.6274116039276123, "learning_rate": 2.1613194309512374e-06, "loss": 0.5955, "step": 30945 }, { "epoch": 0.79, "grad_norm": 1.332775354385376, "learning_rate": 2.160803999489186e-06, "loss": 0.5532, "step": 30946 }, { "epoch": 0.79, "grad_norm": 1.0447392463684082, "learning_rate": 2.160288622049945e-06, "loss": 0.5158, "step": 30947 }, { "epoch": 0.79, "grad_norm": 1.2122470140457153, "learning_rate": 2.1597732986370734e-06, "loss": 0.5203, "step": 30948 }, { "epoch": 0.79, "grad_norm": 1.3049136400222778, "learning_rate": 2.1592580292541187e-06, "loss": 0.4218, "step": 30949 }, { "epoch": 0.79, "grad_norm": 10.47918701171875, "learning_rate": 2.1587428139046286e-06, "loss": 0.5236, "step": 30950 }, { "epoch": 0.79, "grad_norm": 8.995020866394043, "learning_rate": 2.1582276525921585e-06, "loss": 0.6441, "step": 30951 }, { "epoch": 0.79, "grad_norm": 1.2609639167785645, "learning_rate": 2.1577125453202574e-06, "loss": 0.4765, "step": 30952 }, { "epoch": 0.79, "grad_norm": 2.3026413917541504, "learning_rate": 2.157197492092472e-06, "loss": 0.5711, "step": 30953 }, { "epoch": 0.79, "grad_norm": 8.866826057434082, "learning_rate": 2.1566824929123566e-06, "loss": 0.6283, "step": 30954 }, { "epoch": 0.79, "grad_norm": 6.268765926361084, "learning_rate": 2.1561675477834575e-06, "loss": 0.5991, "step": 30955 }, { "epoch": 0.79, "grad_norm": 1.1061385869979858, "learning_rate": 2.1556526567093238e-06, "loss": 0.6156, "step": 30956 }, { "epoch": 0.79, "grad_norm": 1.4128735065460205, "learning_rate": 2.1551378196935014e-06, "loss": 0.6603, "step": 30957 }, { "epoch": 0.79, "grad_norm": 1.1484543085098267, "learning_rate": 2.1546230367395426e-06, "loss": 0.5589, "step": 30958 }, { "epoch": 0.79, "grad_norm": 2.0545012950897217, "learning_rate": 2.154108307850994e-06, "loss": 0.4591, "step": 30959 }, { "epoch": 0.79, "grad_norm": 1.1995466947555542, "learning_rate": 2.153593633031398e-06, "loss": 0.4255, "step": 30960 }, { "epoch": 0.79, "grad_norm": 1.4337953329086304, "learning_rate": 2.153079012284308e-06, "loss": 0.5542, "step": 30961 }, { "epoch": 0.79, "grad_norm": 1.7276577949523926, "learning_rate": 2.1525644456132676e-06, "loss": 0.5284, "step": 30962 }, { "epoch": 0.79, "grad_norm": 1.8116371631622314, "learning_rate": 2.1520499330218226e-06, "loss": 0.5527, "step": 30963 }, { "epoch": 0.79, "grad_norm": 1.7013367414474487, "learning_rate": 2.1515354745135165e-06, "loss": 0.4897, "step": 30964 }, { "epoch": 0.79, "grad_norm": 3.8480966091156006, "learning_rate": 2.1510210700919e-06, "loss": 0.5818, "step": 30965 }, { "epoch": 0.79, "grad_norm": 3.644528865814209, "learning_rate": 2.1505067197605144e-06, "loss": 0.614, "step": 30966 }, { "epoch": 0.79, "grad_norm": 2.857309341430664, "learning_rate": 2.149992423522902e-06, "loss": 0.4324, "step": 30967 }, { "epoch": 0.79, "grad_norm": 1.2478911876678467, "learning_rate": 2.149478181382614e-06, "loss": 0.4875, "step": 30968 }, { "epoch": 0.79, "grad_norm": 1.7484556436538696, "learning_rate": 2.1489639933431883e-06, "loss": 0.5812, "step": 30969 }, { "epoch": 0.79, "grad_norm": 1.7140213251113892, "learning_rate": 2.1484498594081693e-06, "loss": 0.6126, "step": 30970 }, { "epoch": 0.79, "grad_norm": 0.9828593730926514, "learning_rate": 2.147935779581103e-06, "loss": 0.2917, "step": 30971 }, { "epoch": 0.79, "grad_norm": 1.5789940357208252, "learning_rate": 2.1474217538655296e-06, "loss": 0.5329, "step": 30972 }, { "epoch": 0.79, "grad_norm": 1.1843302249908447, "learning_rate": 2.146907782264993e-06, "loss": 0.5714, "step": 30973 }, { "epoch": 0.79, "grad_norm": 1.2654587030410767, "learning_rate": 2.146393864783031e-06, "loss": 0.5709, "step": 30974 }, { "epoch": 0.79, "grad_norm": 1.425992727279663, "learning_rate": 2.1458800014231906e-06, "loss": 0.6119, "step": 30975 }, { "epoch": 0.79, "grad_norm": 1.8021457195281982, "learning_rate": 2.145366192189011e-06, "loss": 0.5748, "step": 30976 }, { "epoch": 0.79, "grad_norm": 1.337402582168579, "learning_rate": 2.1448524370840305e-06, "loss": 0.565, "step": 30977 }, { "epoch": 0.79, "grad_norm": 1.172098994255066, "learning_rate": 2.144338736111795e-06, "loss": 0.6726, "step": 30978 }, { "epoch": 0.79, "grad_norm": 1.561492919921875, "learning_rate": 2.1438250892758418e-06, "loss": 0.6056, "step": 30979 }, { "epoch": 0.79, "grad_norm": 1.3070087432861328, "learning_rate": 2.1433114965797086e-06, "loss": 0.4771, "step": 30980 }, { "epoch": 0.79, "grad_norm": 1.070399284362793, "learning_rate": 2.1427979580269353e-06, "loss": 0.6896, "step": 30981 }, { "epoch": 0.79, "grad_norm": 1.6518797874450684, "learning_rate": 2.1422844736210646e-06, "loss": 0.6576, "step": 30982 }, { "epoch": 0.79, "grad_norm": 1.611846685409546, "learning_rate": 2.1417710433656324e-06, "loss": 0.4651, "step": 30983 }, { "epoch": 0.79, "grad_norm": 1.0971951484680176, "learning_rate": 2.141257667264174e-06, "loss": 0.5175, "step": 30984 }, { "epoch": 0.79, "grad_norm": 1.9458562135696411, "learning_rate": 2.140744345320234e-06, "loss": 0.486, "step": 30985 }, { "epoch": 0.79, "grad_norm": 1.1547846794128418, "learning_rate": 2.1402310775373457e-06, "loss": 0.5258, "step": 30986 }, { "epoch": 0.79, "grad_norm": 14.098114967346191, "learning_rate": 2.1397178639190466e-06, "loss": 0.4737, "step": 30987 }, { "epoch": 0.79, "grad_norm": 1.0802831649780273, "learning_rate": 2.139204704468871e-06, "loss": 0.4426, "step": 30988 }, { "epoch": 0.79, "grad_norm": 1.205174207687378, "learning_rate": 2.138691599190361e-06, "loss": 0.4855, "step": 30989 }, { "epoch": 0.79, "grad_norm": 1.5179890394210815, "learning_rate": 2.13817854808705e-06, "loss": 0.5284, "step": 30990 }, { "epoch": 0.79, "grad_norm": 3.4824860095977783, "learning_rate": 2.13766555116247e-06, "loss": 0.5665, "step": 30991 }, { "epoch": 0.79, "grad_norm": 1.8497017621994019, "learning_rate": 2.1371526084201623e-06, "loss": 0.6124, "step": 30992 }, { "epoch": 0.79, "grad_norm": 1.7858492136001587, "learning_rate": 2.136639719863658e-06, "loss": 0.6292, "step": 30993 }, { "epoch": 0.79, "grad_norm": 2.522071599960327, "learning_rate": 2.1361268854964902e-06, "loss": 0.5519, "step": 30994 }, { "epoch": 0.79, "grad_norm": 1.6827291250228882, "learning_rate": 2.1356141053221992e-06, "loss": 0.5733, "step": 30995 }, { "epoch": 0.79, "grad_norm": 1.163153886795044, "learning_rate": 2.135101379344313e-06, "loss": 0.5924, "step": 30996 }, { "epoch": 0.79, "grad_norm": 1.881098747253418, "learning_rate": 2.1345887075663687e-06, "loss": 0.4899, "step": 30997 }, { "epoch": 0.79, "grad_norm": 1.2544035911560059, "learning_rate": 2.1340760899918934e-06, "loss": 0.3983, "step": 30998 }, { "epoch": 0.79, "grad_norm": 2.226959466934204, "learning_rate": 2.133563526624427e-06, "loss": 0.586, "step": 30999 }, { "epoch": 0.79, "grad_norm": 0.8649321794509888, "learning_rate": 2.133051017467499e-06, "loss": 0.5869, "step": 31000 }, { "epoch": 0.79, "grad_norm": 1.4346802234649658, "learning_rate": 2.1325385625246376e-06, "loss": 0.4094, "step": 31001 }, { "epoch": 0.79, "grad_norm": 1.1651511192321777, "learning_rate": 2.132026161799381e-06, "loss": 0.5551, "step": 31002 }, { "epoch": 0.79, "grad_norm": 1.464985966682434, "learning_rate": 2.1315138152952563e-06, "loss": 0.4545, "step": 31003 }, { "epoch": 0.79, "grad_norm": 3.9880614280700684, "learning_rate": 2.1310015230157956e-06, "loss": 0.7447, "step": 31004 }, { "epoch": 0.79, "grad_norm": 1.794782042503357, "learning_rate": 2.130489284964525e-06, "loss": 0.4796, "step": 31005 }, { "epoch": 0.79, "grad_norm": 1.3430591821670532, "learning_rate": 2.129977101144981e-06, "loss": 0.5978, "step": 31006 }, { "epoch": 0.79, "grad_norm": 2.621952772140503, "learning_rate": 2.1294649715606907e-06, "loss": 0.7571, "step": 31007 }, { "epoch": 0.79, "grad_norm": 1.1300538778305054, "learning_rate": 2.1289528962151808e-06, "loss": 0.4004, "step": 31008 }, { "epoch": 0.79, "grad_norm": 7.717105865478516, "learning_rate": 2.1284408751119845e-06, "loss": 0.5247, "step": 31009 }, { "epoch": 0.79, "grad_norm": 1.1757299900054932, "learning_rate": 2.1279289082546285e-06, "loss": 0.3485, "step": 31010 }, { "epoch": 0.79, "grad_norm": 0.9696104526519775, "learning_rate": 2.1274169956466386e-06, "loss": 0.4813, "step": 31011 }, { "epoch": 0.79, "grad_norm": 1.6257562637329102, "learning_rate": 2.126905137291546e-06, "loss": 0.559, "step": 31012 }, { "epoch": 0.79, "grad_norm": 4.170258045196533, "learning_rate": 2.126393333192878e-06, "loss": 0.5918, "step": 31013 }, { "epoch": 0.79, "grad_norm": 2.2886486053466797, "learning_rate": 2.12588158335416e-06, "loss": 0.4835, "step": 31014 }, { "epoch": 0.79, "grad_norm": 1.4912902116775513, "learning_rate": 2.1253698877789164e-06, "loss": 0.5591, "step": 31015 }, { "epoch": 0.79, "grad_norm": 4.271190166473389, "learning_rate": 2.124858246470679e-06, "loss": 0.4665, "step": 31016 }, { "epoch": 0.79, "grad_norm": 3.3353254795074463, "learning_rate": 2.124346659432972e-06, "loss": 0.5528, "step": 31017 }, { "epoch": 0.8, "grad_norm": 2.3710029125213623, "learning_rate": 2.1238351266693157e-06, "loss": 0.5382, "step": 31018 }, { "epoch": 0.8, "grad_norm": 0.9939955472946167, "learning_rate": 2.123323648183243e-06, "loss": 0.4493, "step": 31019 }, { "epoch": 0.8, "grad_norm": 1.1494635343551636, "learning_rate": 2.1228122239782746e-06, "loss": 0.5175, "step": 31020 }, { "epoch": 0.8, "grad_norm": 3.0074455738067627, "learning_rate": 2.122300854057935e-06, "loss": 0.5631, "step": 31021 }, { "epoch": 0.8, "grad_norm": 1.4296282529830933, "learning_rate": 2.121789538425747e-06, "loss": 0.5992, "step": 31022 }, { "epoch": 0.8, "grad_norm": 1.2614943981170654, "learning_rate": 2.121278277085238e-06, "loss": 0.5466, "step": 31023 }, { "epoch": 0.8, "grad_norm": 1.5386412143707275, "learning_rate": 2.1207670700399286e-06, "loss": 0.5377, "step": 31024 }, { "epoch": 0.8, "grad_norm": 1.3110815286636353, "learning_rate": 2.1202559172933414e-06, "loss": 0.4512, "step": 31025 }, { "epoch": 0.8, "grad_norm": 4.791648864746094, "learning_rate": 2.119744818849001e-06, "loss": 0.5042, "step": 31026 }, { "epoch": 0.8, "grad_norm": 2.6828408241271973, "learning_rate": 2.1192337747104284e-06, "loss": 0.5505, "step": 31027 }, { "epoch": 0.8, "grad_norm": 1.8069454431533813, "learning_rate": 2.1187227848811466e-06, "loss": 0.4365, "step": 31028 }, { "epoch": 0.8, "grad_norm": 1.8380974531173706, "learning_rate": 2.118211849364672e-06, "loss": 0.4571, "step": 31029 }, { "epoch": 0.8, "grad_norm": 1.552817463874817, "learning_rate": 2.1177009681645334e-06, "loss": 0.5433, "step": 31030 }, { "epoch": 0.8, "grad_norm": 2.4081015586853027, "learning_rate": 2.1171901412842464e-06, "loss": 0.5154, "step": 31031 }, { "epoch": 0.8, "grad_norm": 6.246886730194092, "learning_rate": 2.11667936872733e-06, "loss": 0.8486, "step": 31032 }, { "epoch": 0.8, "grad_norm": 1.8711391687393188, "learning_rate": 2.11616865049731e-06, "loss": 0.5594, "step": 31033 }, { "epoch": 0.8, "grad_norm": 1.0832149982452393, "learning_rate": 2.1156579865977022e-06, "loss": 0.3281, "step": 31034 }, { "epoch": 0.8, "grad_norm": 1.441430687904358, "learning_rate": 2.1151473770320243e-06, "loss": 0.5921, "step": 31035 }, { "epoch": 0.8, "grad_norm": 1.357895851135254, "learning_rate": 2.1146368218037984e-06, "loss": 0.5494, "step": 31036 }, { "epoch": 0.8, "grad_norm": 1.6148344278335571, "learning_rate": 2.1141263209165417e-06, "loss": 0.5882, "step": 31037 }, { "epoch": 0.8, "grad_norm": 1.7284103631973267, "learning_rate": 2.113615874373772e-06, "loss": 0.6422, "step": 31038 }, { "epoch": 0.8, "grad_norm": 4.7153639793396, "learning_rate": 2.1131054821790042e-06, "loss": 0.4187, "step": 31039 }, { "epoch": 0.8, "grad_norm": 1.4819660186767578, "learning_rate": 2.112595144335762e-06, "loss": 0.5892, "step": 31040 }, { "epoch": 0.8, "grad_norm": 1.724532961845398, "learning_rate": 2.1120848608475575e-06, "loss": 0.5977, "step": 31041 }, { "epoch": 0.8, "grad_norm": 1.2250436544418335, "learning_rate": 2.1115746317179064e-06, "loss": 0.4898, "step": 31042 }, { "epoch": 0.8, "grad_norm": 1.2939165830612183, "learning_rate": 2.1110644569503293e-06, "loss": 0.5742, "step": 31043 }, { "epoch": 0.8, "grad_norm": 1.1566767692565918, "learning_rate": 2.1105543365483396e-06, "loss": 0.4416, "step": 31044 }, { "epoch": 0.8, "grad_norm": 1.4396289587020874, "learning_rate": 2.1100442705154534e-06, "loss": 0.6586, "step": 31045 }, { "epoch": 0.8, "grad_norm": 1.3916890621185303, "learning_rate": 2.1095342588551817e-06, "loss": 0.6007, "step": 31046 }, { "epoch": 0.8, "grad_norm": 1.0042155981063843, "learning_rate": 2.1090243015710455e-06, "loss": 0.5566, "step": 31047 }, { "epoch": 0.8, "grad_norm": 1.0784587860107422, "learning_rate": 2.1085143986665566e-06, "loss": 0.3988, "step": 31048 }, { "epoch": 0.8, "grad_norm": 3.6272284984588623, "learning_rate": 2.108004550145226e-06, "loss": 0.6688, "step": 31049 }, { "epoch": 0.8, "grad_norm": 2.1583127975463867, "learning_rate": 2.107494756010572e-06, "loss": 0.5613, "step": 31050 }, { "epoch": 0.8, "grad_norm": 1.4196979999542236, "learning_rate": 2.1069850162661053e-06, "loss": 0.5165, "step": 31051 }, { "epoch": 0.8, "grad_norm": 1.9590575695037842, "learning_rate": 2.1064753309153395e-06, "loss": 0.5529, "step": 31052 }, { "epoch": 0.8, "grad_norm": 2.0956454277038574, "learning_rate": 2.105965699961784e-06, "loss": 0.3924, "step": 31053 }, { "epoch": 0.8, "grad_norm": 1.8171404600143433, "learning_rate": 2.1054561234089553e-06, "loss": 0.5668, "step": 31054 }, { "epoch": 0.8, "grad_norm": 1.4805963039398193, "learning_rate": 2.104946601260364e-06, "loss": 0.4112, "step": 31055 }, { "epoch": 0.8, "grad_norm": 1.1891759634017944, "learning_rate": 2.104437133519518e-06, "loss": 0.486, "step": 31056 }, { "epoch": 0.8, "grad_norm": 1.3717896938323975, "learning_rate": 2.1039277201899324e-06, "loss": 0.4969, "step": 31057 }, { "epoch": 0.8, "grad_norm": 3.096968650817871, "learning_rate": 2.1034183612751168e-06, "loss": 0.6174, "step": 31058 }, { "epoch": 0.8, "grad_norm": 2.2244040966033936, "learning_rate": 2.1029090567785783e-06, "loss": 0.5892, "step": 31059 }, { "epoch": 0.8, "grad_norm": 1.1000118255615234, "learning_rate": 2.102399806703831e-06, "loss": 0.5679, "step": 31060 }, { "epoch": 0.8, "grad_norm": 1.4946694374084473, "learning_rate": 2.1018906110543834e-06, "loss": 0.4468, "step": 31061 }, { "epoch": 0.8, "grad_norm": 2.6304783821105957, "learning_rate": 2.1013814698337433e-06, "loss": 0.6117, "step": 31062 }, { "epoch": 0.8, "grad_norm": 1.530389428138733, "learning_rate": 2.100872383045417e-06, "loss": 0.6043, "step": 31063 }, { "epoch": 0.8, "grad_norm": 1.9586416482925415, "learning_rate": 2.100363350692918e-06, "loss": 0.632, "step": 31064 }, { "epoch": 0.8, "grad_norm": 0.9808355569839478, "learning_rate": 2.0998543727797528e-06, "loss": 0.4335, "step": 31065 }, { "epoch": 0.8, "grad_norm": 4.653722286224365, "learning_rate": 2.0993454493094232e-06, "loss": 0.5479, "step": 31066 }, { "epoch": 0.8, "grad_norm": 5.407761573791504, "learning_rate": 2.098836580285445e-06, "loss": 0.7078, "step": 31067 }, { "epoch": 0.8, "grad_norm": 5.470625877380371, "learning_rate": 2.098327765711321e-06, "loss": 0.5787, "step": 31068 }, { "epoch": 0.8, "grad_norm": 1.6084976196289062, "learning_rate": 2.097819005590558e-06, "loss": 0.4787, "step": 31069 }, { "epoch": 0.8, "grad_norm": 2.3811686038970947, "learning_rate": 2.097310299926659e-06, "loss": 0.4891, "step": 31070 }, { "epoch": 0.8, "grad_norm": 1.620487093925476, "learning_rate": 2.0968016487231345e-06, "loss": 0.4825, "step": 31071 }, { "epoch": 0.8, "grad_norm": 1.4428701400756836, "learning_rate": 2.096293051983489e-06, "loss": 0.5065, "step": 31072 }, { "epoch": 0.8, "grad_norm": 1.4795494079589844, "learning_rate": 2.095784509711223e-06, "loss": 0.5776, "step": 31073 }, { "epoch": 0.8, "grad_norm": 1.504758358001709, "learning_rate": 2.0952760219098466e-06, "loss": 0.4983, "step": 31074 }, { "epoch": 0.8, "grad_norm": 2.15136981010437, "learning_rate": 2.094767588582861e-06, "loss": 0.5407, "step": 31075 }, { "epoch": 0.8, "grad_norm": 5.024494647979736, "learning_rate": 2.0942592097337687e-06, "loss": 0.8062, "step": 31076 }, { "epoch": 0.8, "grad_norm": 1.4112590551376343, "learning_rate": 2.093750885366078e-06, "loss": 0.5032, "step": 31077 }, { "epoch": 0.8, "grad_norm": 3.5013513565063477, "learning_rate": 2.0932426154832884e-06, "loss": 0.5429, "step": 31078 }, { "epoch": 0.8, "grad_norm": 1.9965211153030396, "learning_rate": 2.092734400088904e-06, "loss": 0.5616, "step": 31079 }, { "epoch": 0.8, "grad_norm": 3.8634727001190186, "learning_rate": 2.0922262391864223e-06, "loss": 0.4921, "step": 31080 }, { "epoch": 0.8, "grad_norm": 1.579859972000122, "learning_rate": 2.0917181327793533e-06, "loss": 0.6804, "step": 31081 }, { "epoch": 0.8, "grad_norm": 2.205256462097168, "learning_rate": 2.0912100808711934e-06, "loss": 0.5372, "step": 31082 }, { "epoch": 0.8, "grad_norm": 1.2200461626052856, "learning_rate": 2.0907020834654423e-06, "loss": 0.446, "step": 31083 }, { "epoch": 0.8, "grad_norm": 2.5239810943603516, "learning_rate": 2.090194140565607e-06, "loss": 0.5593, "step": 31084 }, { "epoch": 0.8, "grad_norm": 2.2419066429138184, "learning_rate": 2.0896862521751835e-06, "loss": 0.6563, "step": 31085 }, { "epoch": 0.8, "grad_norm": 1.0165067911148071, "learning_rate": 2.0891784182976725e-06, "loss": 0.423, "step": 31086 }, { "epoch": 0.8, "grad_norm": 1.2492403984069824, "learning_rate": 2.0886706389365707e-06, "loss": 0.4785, "step": 31087 }, { "epoch": 0.8, "grad_norm": 2.864638566970825, "learning_rate": 2.088162914095384e-06, "loss": 0.6111, "step": 31088 }, { "epoch": 0.8, "grad_norm": 1.7223173379898071, "learning_rate": 2.0876552437776075e-06, "loss": 0.5444, "step": 31089 }, { "epoch": 0.8, "grad_norm": 1.4750012159347534, "learning_rate": 2.0871476279867374e-06, "loss": 0.6058, "step": 31090 }, { "epoch": 0.8, "grad_norm": 2.25822114944458, "learning_rate": 2.0866400667262777e-06, "loss": 0.4604, "step": 31091 }, { "epoch": 0.8, "grad_norm": 2.741192579269409, "learning_rate": 2.086132559999722e-06, "loss": 0.5559, "step": 31092 }, { "epoch": 0.8, "grad_norm": 1.253538966178894, "learning_rate": 2.0856251078105696e-06, "loss": 0.5355, "step": 31093 }, { "epoch": 0.8, "grad_norm": 5.681118488311768, "learning_rate": 2.0851177101623134e-06, "loss": 0.7258, "step": 31094 }, { "epoch": 0.8, "grad_norm": 1.8166042566299438, "learning_rate": 2.0846103670584562e-06, "loss": 0.5179, "step": 31095 }, { "epoch": 0.8, "grad_norm": 2.844849109649658, "learning_rate": 2.084103078502491e-06, "loss": 0.4558, "step": 31096 }, { "epoch": 0.8, "grad_norm": 1.1851110458374023, "learning_rate": 2.0835958444979122e-06, "loss": 0.5083, "step": 31097 }, { "epoch": 0.8, "grad_norm": 1.3319573402404785, "learning_rate": 2.0830886650482185e-06, "loss": 0.4194, "step": 31098 }, { "epoch": 0.8, "grad_norm": 1.2307507991790771, "learning_rate": 2.082581540156905e-06, "loss": 0.4363, "step": 31099 }, { "epoch": 0.8, "grad_norm": 1.4919835329055786, "learning_rate": 2.082074469827463e-06, "loss": 0.365, "step": 31100 }, { "epoch": 0.8, "grad_norm": 2.35678768157959, "learning_rate": 2.081567454063391e-06, "loss": 0.5727, "step": 31101 }, { "epoch": 0.8, "grad_norm": 1.3561359643936157, "learning_rate": 2.081060492868182e-06, "loss": 0.5957, "step": 31102 }, { "epoch": 0.8, "grad_norm": 1.450143575668335, "learning_rate": 2.0805535862453287e-06, "loss": 0.4837, "step": 31103 }, { "epoch": 0.8, "grad_norm": 2.063345432281494, "learning_rate": 2.0800467341983212e-06, "loss": 0.5612, "step": 31104 }, { "epoch": 0.8, "grad_norm": 0.967039167881012, "learning_rate": 2.079539936730659e-06, "loss": 0.3546, "step": 31105 }, { "epoch": 0.8, "grad_norm": 4.1701178550720215, "learning_rate": 2.079033193845832e-06, "loss": 0.6262, "step": 31106 }, { "epoch": 0.8, "grad_norm": 2.1613988876342773, "learning_rate": 2.078526505547329e-06, "loss": 0.7319, "step": 31107 }, { "epoch": 0.8, "grad_norm": 1.785172462463379, "learning_rate": 2.078019871838647e-06, "loss": 0.5499, "step": 31108 }, { "epoch": 0.8, "grad_norm": 1.3766191005706787, "learning_rate": 2.0775132927232746e-06, "loss": 0.4233, "step": 31109 }, { "epoch": 0.8, "grad_norm": 1.621250867843628, "learning_rate": 2.0770067682047035e-06, "loss": 0.5448, "step": 31110 }, { "epoch": 0.8, "grad_norm": 1.8330893516540527, "learning_rate": 2.076500298286421e-06, "loss": 0.3958, "step": 31111 }, { "epoch": 0.8, "grad_norm": 1.4048887491226196, "learning_rate": 2.075993882971924e-06, "loss": 0.4389, "step": 31112 }, { "epoch": 0.8, "grad_norm": 1.2692530155181885, "learning_rate": 2.0754875222646975e-06, "loss": 0.5432, "step": 31113 }, { "epoch": 0.8, "grad_norm": 1.0809680223464966, "learning_rate": 2.07498121616823e-06, "loss": 0.5071, "step": 31114 }, { "epoch": 0.8, "grad_norm": 1.2208181619644165, "learning_rate": 2.0744749646860162e-06, "loss": 0.3965, "step": 31115 }, { "epoch": 0.8, "grad_norm": 1.3693784475326538, "learning_rate": 2.0739687678215413e-06, "loss": 0.582, "step": 31116 }, { "epoch": 0.8, "grad_norm": 1.0303221940994263, "learning_rate": 2.073462625578292e-06, "loss": 0.3858, "step": 31117 }, { "epoch": 0.8, "grad_norm": 2.2876882553100586, "learning_rate": 2.0729565379597604e-06, "loss": 0.478, "step": 31118 }, { "epoch": 0.8, "grad_norm": 1.6599056720733643, "learning_rate": 2.0724505049694323e-06, "loss": 0.4749, "step": 31119 }, { "epoch": 0.8, "grad_norm": 1.478535532951355, "learning_rate": 2.071944526610795e-06, "loss": 0.4091, "step": 31120 }, { "epoch": 0.8, "grad_norm": 1.4809789657592773, "learning_rate": 2.0714386028873325e-06, "loss": 0.5103, "step": 31121 }, { "epoch": 0.8, "grad_norm": 1.938546895980835, "learning_rate": 2.0709327338025353e-06, "loss": 0.4473, "step": 31122 }, { "epoch": 0.8, "grad_norm": 1.7544025182724, "learning_rate": 2.070426919359889e-06, "loss": 0.5558, "step": 31123 }, { "epoch": 0.8, "grad_norm": 1.7195775508880615, "learning_rate": 2.0699211595628763e-06, "loss": 0.6636, "step": 31124 }, { "epoch": 0.8, "grad_norm": 2.4547231197357178, "learning_rate": 2.069415454414987e-06, "loss": 0.4588, "step": 31125 }, { "epoch": 0.8, "grad_norm": 1.1738706827163696, "learning_rate": 2.068909803919704e-06, "loss": 0.5614, "step": 31126 }, { "epoch": 0.8, "grad_norm": 4.006739616394043, "learning_rate": 2.068404208080512e-06, "loss": 0.6711, "step": 31127 }, { "epoch": 0.8, "grad_norm": 1.8197710514068604, "learning_rate": 2.0678986669008926e-06, "loss": 0.4279, "step": 31128 }, { "epoch": 0.8, "grad_norm": 15.514654159545898, "learning_rate": 2.0673931803843337e-06, "loss": 0.6484, "step": 31129 }, { "epoch": 0.8, "grad_norm": 1.3307558298110962, "learning_rate": 2.0668877485343186e-06, "loss": 0.38, "step": 31130 }, { "epoch": 0.8, "grad_norm": 1.4460532665252686, "learning_rate": 2.0663823713543286e-06, "loss": 0.4997, "step": 31131 }, { "epoch": 0.8, "grad_norm": 10.417037963867188, "learning_rate": 2.0658770488478463e-06, "loss": 0.7036, "step": 31132 }, { "epoch": 0.8, "grad_norm": 1.239996075630188, "learning_rate": 2.065371781018356e-06, "loss": 0.4746, "step": 31133 }, { "epoch": 0.8, "grad_norm": 2.662677764892578, "learning_rate": 2.0648665678693348e-06, "loss": 0.5719, "step": 31134 }, { "epoch": 0.8, "grad_norm": 1.3215851783752441, "learning_rate": 2.064361409404271e-06, "loss": 0.4245, "step": 31135 }, { "epoch": 0.8, "grad_norm": 13.863585472106934, "learning_rate": 2.0638563056266414e-06, "loss": 0.5724, "step": 31136 }, { "epoch": 0.8, "grad_norm": 1.1052840948104858, "learning_rate": 2.063351256539927e-06, "loss": 0.4713, "step": 31137 }, { "epoch": 0.8, "grad_norm": 1.7458667755126953, "learning_rate": 2.062846262147612e-06, "loss": 0.4941, "step": 31138 }, { "epoch": 0.8, "grad_norm": 1.9066013097763062, "learning_rate": 2.062341322453173e-06, "loss": 0.5698, "step": 31139 }, { "epoch": 0.8, "grad_norm": 11.251176834106445, "learning_rate": 2.0618364374600908e-06, "loss": 0.4533, "step": 31140 }, { "epoch": 0.8, "grad_norm": 15.136651992797852, "learning_rate": 2.0613316071718427e-06, "loss": 0.6063, "step": 31141 }, { "epoch": 0.8, "grad_norm": 1.9864004850387573, "learning_rate": 2.0608268315919124e-06, "loss": 0.3858, "step": 31142 }, { "epoch": 0.8, "grad_norm": 1.3259698152542114, "learning_rate": 2.0603221107237748e-06, "loss": 0.3752, "step": 31143 }, { "epoch": 0.8, "grad_norm": 4.700689315795898, "learning_rate": 2.0598174445709065e-06, "loss": 0.5521, "step": 31144 }, { "epoch": 0.8, "grad_norm": 4.891317844390869, "learning_rate": 2.0593128331367905e-06, "loss": 0.5888, "step": 31145 }, { "epoch": 0.8, "grad_norm": 1.068703293800354, "learning_rate": 2.0588082764249027e-06, "loss": 0.4729, "step": 31146 }, { "epoch": 0.8, "grad_norm": 1.2058753967285156, "learning_rate": 2.058303774438716e-06, "loss": 0.4541, "step": 31147 }, { "epoch": 0.8, "grad_norm": 2.2076058387756348, "learning_rate": 2.057799327181712e-06, "loss": 0.6131, "step": 31148 }, { "epoch": 0.8, "grad_norm": 1.773175597190857, "learning_rate": 2.0572949346573656e-06, "loss": 0.5425, "step": 31149 }, { "epoch": 0.8, "grad_norm": 1.9775636196136475, "learning_rate": 2.0567905968691525e-06, "loss": 0.5474, "step": 31150 }, { "epoch": 0.8, "grad_norm": 1.204149603843689, "learning_rate": 2.0562863138205457e-06, "loss": 0.5247, "step": 31151 }, { "epoch": 0.8, "grad_norm": 1.21683669090271, "learning_rate": 2.0557820855150257e-06, "loss": 0.4776, "step": 31152 }, { "epoch": 0.8, "grad_norm": 0.9996353387832642, "learning_rate": 2.055277911956065e-06, "loss": 0.5064, "step": 31153 }, { "epoch": 0.8, "grad_norm": 1.1648646593093872, "learning_rate": 2.0547737931471333e-06, "loss": 0.4217, "step": 31154 }, { "epoch": 0.8, "grad_norm": 1.0874173641204834, "learning_rate": 2.0542697290917135e-06, "loss": 0.3753, "step": 31155 }, { "epoch": 0.8, "grad_norm": 3.4904892444610596, "learning_rate": 2.053765719793274e-06, "loss": 0.497, "step": 31156 }, { "epoch": 0.8, "grad_norm": 1.3715405464172363, "learning_rate": 2.0532617652552877e-06, "loss": 0.475, "step": 31157 }, { "epoch": 0.8, "grad_norm": 1.5465177297592163, "learning_rate": 2.0527578654812274e-06, "loss": 0.5611, "step": 31158 }, { "epoch": 0.8, "grad_norm": 1.8927016258239746, "learning_rate": 2.0522540204745687e-06, "loss": 0.6111, "step": 31159 }, { "epoch": 0.8, "grad_norm": 1.2249877452850342, "learning_rate": 2.0517502302387825e-06, "loss": 0.6443, "step": 31160 }, { "epoch": 0.8, "grad_norm": 3.295259952545166, "learning_rate": 2.051246494777338e-06, "loss": 0.3674, "step": 31161 }, { "epoch": 0.8, "grad_norm": 1.4955414533615112, "learning_rate": 2.0507428140937113e-06, "loss": 0.4641, "step": 31162 }, { "epoch": 0.8, "grad_norm": 1.5023454427719116, "learning_rate": 2.05023918819137e-06, "loss": 0.5245, "step": 31163 }, { "epoch": 0.8, "grad_norm": 1.3065192699432373, "learning_rate": 2.049735617073786e-06, "loss": 0.471, "step": 31164 }, { "epoch": 0.8, "grad_norm": 2.7704713344573975, "learning_rate": 2.049232100744427e-06, "loss": 0.481, "step": 31165 }, { "epoch": 0.8, "grad_norm": 3.375335216522217, "learning_rate": 2.0487286392067673e-06, "loss": 0.5801, "step": 31166 }, { "epoch": 0.8, "grad_norm": 3.946845054626465, "learning_rate": 2.048225232464275e-06, "loss": 0.4761, "step": 31167 }, { "epoch": 0.8, "grad_norm": 1.7874369621276855, "learning_rate": 2.047721880520416e-06, "loss": 0.5092, "step": 31168 }, { "epoch": 0.8, "grad_norm": 9.147300720214844, "learning_rate": 2.0472185833786638e-06, "loss": 0.5662, "step": 31169 }, { "epoch": 0.8, "grad_norm": 9.306970596313477, "learning_rate": 2.0467153410424845e-06, "loss": 0.5589, "step": 31170 }, { "epoch": 0.8, "grad_norm": 1.0493345260620117, "learning_rate": 2.0462121535153445e-06, "loss": 0.4834, "step": 31171 }, { "epoch": 0.8, "grad_norm": 1.3181740045547485, "learning_rate": 2.0457090208007158e-06, "loss": 0.3782, "step": 31172 }, { "epoch": 0.8, "grad_norm": 2.283336639404297, "learning_rate": 2.0452059429020622e-06, "loss": 0.6512, "step": 31173 }, { "epoch": 0.8, "grad_norm": 1.847591519355774, "learning_rate": 2.044702919822853e-06, "loss": 0.7313, "step": 31174 }, { "epoch": 0.8, "grad_norm": 1.6041134595870972, "learning_rate": 2.0441999515665488e-06, "loss": 0.4584, "step": 31175 }, { "epoch": 0.8, "grad_norm": 1.1725714206695557, "learning_rate": 2.0436970381366252e-06, "loss": 0.4475, "step": 31176 }, { "epoch": 0.8, "grad_norm": 1.4257954359054565, "learning_rate": 2.0431941795365407e-06, "loss": 0.626, "step": 31177 }, { "epoch": 0.8, "grad_norm": 3.570615291595459, "learning_rate": 2.042691375769762e-06, "loss": 0.4504, "step": 31178 }, { "epoch": 0.8, "grad_norm": 3.921959638595581, "learning_rate": 2.0421886268397562e-06, "loss": 0.7222, "step": 31179 }, { "epoch": 0.8, "grad_norm": 1.5909620523452759, "learning_rate": 2.0416859327499885e-06, "loss": 0.5558, "step": 31180 }, { "epoch": 0.8, "grad_norm": 1.5149765014648438, "learning_rate": 2.04118329350392e-06, "loss": 0.4279, "step": 31181 }, { "epoch": 0.8, "grad_norm": 1.0864899158477783, "learning_rate": 2.040680709105014e-06, "loss": 0.4744, "step": 31182 }, { "epoch": 0.8, "grad_norm": 2.0487060546875, "learning_rate": 2.0401781795567388e-06, "loss": 0.6065, "step": 31183 }, { "epoch": 0.8, "grad_norm": 1.3519386053085327, "learning_rate": 2.0396757048625537e-06, "loss": 0.5033, "step": 31184 }, { "epoch": 0.8, "grad_norm": 2.8835620880126953, "learning_rate": 2.0391732850259203e-06, "loss": 0.6228, "step": 31185 }, { "epoch": 0.8, "grad_norm": 1.2986721992492676, "learning_rate": 2.0386709200503074e-06, "loss": 0.421, "step": 31186 }, { "epoch": 0.8, "grad_norm": 2.369554281234741, "learning_rate": 2.038168609939171e-06, "loss": 0.3941, "step": 31187 }, { "epoch": 0.8, "grad_norm": 2.039703130722046, "learning_rate": 2.0376663546959753e-06, "loss": 0.4663, "step": 31188 }, { "epoch": 0.8, "grad_norm": 1.3295224905014038, "learning_rate": 2.0371641543241772e-06, "loss": 0.4987, "step": 31189 }, { "epoch": 0.8, "grad_norm": 1.8345386981964111, "learning_rate": 2.036662008827244e-06, "loss": 0.535, "step": 31190 }, { "epoch": 0.8, "grad_norm": 16.627201080322266, "learning_rate": 2.0361599182086333e-06, "loss": 0.5085, "step": 31191 }, { "epoch": 0.8, "grad_norm": 1.131985068321228, "learning_rate": 2.035657882471802e-06, "loss": 0.5698, "step": 31192 }, { "epoch": 0.8, "grad_norm": 1.0450279712677002, "learning_rate": 2.035155901620216e-06, "loss": 0.3948, "step": 31193 }, { "epoch": 0.8, "grad_norm": 1.6177996397018433, "learning_rate": 2.0346539756573312e-06, "loss": 0.5833, "step": 31194 }, { "epoch": 0.8, "grad_norm": 1.7529102563858032, "learning_rate": 2.034152104586604e-06, "loss": 0.4833, "step": 31195 }, { "epoch": 0.8, "grad_norm": 1.5531558990478516, "learning_rate": 2.0336502884114994e-06, "loss": 0.6668, "step": 31196 }, { "epoch": 0.8, "grad_norm": 2.173384666442871, "learning_rate": 2.0331485271354724e-06, "loss": 0.7338, "step": 31197 }, { "epoch": 0.8, "grad_norm": 2.5851950645446777, "learning_rate": 2.032646820761979e-06, "loss": 0.5755, "step": 31198 }, { "epoch": 0.8, "grad_norm": 1.3335661888122559, "learning_rate": 2.032145169294476e-06, "loss": 0.4683, "step": 31199 }, { "epoch": 0.8, "grad_norm": 1.9771018028259277, "learning_rate": 2.031643572736426e-06, "loss": 0.5334, "step": 31200 }, { "epoch": 0.8, "grad_norm": 13.884431838989258, "learning_rate": 2.031142031091281e-06, "loss": 0.4478, "step": 31201 }, { "epoch": 0.8, "grad_norm": 2.058479070663452, "learning_rate": 2.0306405443624964e-06, "loss": 0.5957, "step": 31202 }, { "epoch": 0.8, "grad_norm": 2.374392032623291, "learning_rate": 2.030139112553534e-06, "loss": 0.5862, "step": 31203 }, { "epoch": 0.8, "grad_norm": 4.787647247314453, "learning_rate": 2.029637735667844e-06, "loss": 0.4201, "step": 31204 }, { "epoch": 0.8, "grad_norm": 1.6741621494293213, "learning_rate": 2.0291364137088844e-06, "loss": 0.4044, "step": 31205 }, { "epoch": 0.8, "grad_norm": 1.4665820598602295, "learning_rate": 2.028635146680106e-06, "loss": 0.674, "step": 31206 }, { "epoch": 0.8, "grad_norm": 1.442516803741455, "learning_rate": 2.028133934584969e-06, "loss": 0.4432, "step": 31207 }, { "epoch": 0.8, "grad_norm": 14.351101875305176, "learning_rate": 2.0276327774269246e-06, "loss": 0.5423, "step": 31208 }, { "epoch": 0.8, "grad_norm": 1.598063588142395, "learning_rate": 2.027131675209423e-06, "loss": 0.6346, "step": 31209 }, { "epoch": 0.8, "grad_norm": 2.7100257873535156, "learning_rate": 2.026630627935924e-06, "loss": 0.5621, "step": 31210 }, { "epoch": 0.8, "grad_norm": 1.434860110282898, "learning_rate": 2.026129635609877e-06, "loss": 0.4995, "step": 31211 }, { "epoch": 0.8, "grad_norm": 1.2893236875534058, "learning_rate": 2.0256286982347317e-06, "loss": 0.3808, "step": 31212 }, { "epoch": 0.8, "grad_norm": 1.4251548051834106, "learning_rate": 2.0251278158139467e-06, "loss": 0.4835, "step": 31213 }, { "epoch": 0.8, "grad_norm": 1.4499597549438477, "learning_rate": 2.0246269883509708e-06, "loss": 0.5099, "step": 31214 }, { "epoch": 0.8, "grad_norm": 1.717150092124939, "learning_rate": 2.024126215849255e-06, "loss": 0.5216, "step": 31215 }, { "epoch": 0.8, "grad_norm": 1.3534170389175415, "learning_rate": 2.023625498312247e-06, "loss": 0.4418, "step": 31216 }, { "epoch": 0.8, "grad_norm": 1.5201140642166138, "learning_rate": 2.023124835743404e-06, "loss": 0.517, "step": 31217 }, { "epoch": 0.8, "grad_norm": 3.5438990592956543, "learning_rate": 2.022624228146174e-06, "loss": 0.6335, "step": 31218 }, { "epoch": 0.8, "grad_norm": 5.547881603240967, "learning_rate": 2.0221236755240014e-06, "loss": 0.6178, "step": 31219 }, { "epoch": 0.8, "grad_norm": 1.694941759109497, "learning_rate": 2.021623177880344e-06, "loss": 0.6849, "step": 31220 }, { "epoch": 0.8, "grad_norm": 2.8776001930236816, "learning_rate": 2.0211227352186467e-06, "loss": 0.6584, "step": 31221 }, { "epoch": 0.8, "grad_norm": 1.4803080558776855, "learning_rate": 2.020622347542359e-06, "loss": 0.4085, "step": 31222 }, { "epoch": 0.8, "grad_norm": 1.4084192514419556, "learning_rate": 2.020122014854926e-06, "loss": 0.4929, "step": 31223 }, { "epoch": 0.8, "grad_norm": 1.4324101209640503, "learning_rate": 2.0196217371598026e-06, "loss": 0.3784, "step": 31224 }, { "epoch": 0.8, "grad_norm": 1.2014309167861938, "learning_rate": 2.0191215144604316e-06, "loss": 0.5399, "step": 31225 }, { "epoch": 0.8, "grad_norm": 1.3831398487091064, "learning_rate": 2.018621346760258e-06, "loss": 0.4482, "step": 31226 }, { "epoch": 0.8, "grad_norm": 1.7509366273880005, "learning_rate": 2.018121234062734e-06, "loss": 0.4987, "step": 31227 }, { "epoch": 0.8, "grad_norm": 1.5132973194122314, "learning_rate": 2.017621176371304e-06, "loss": 0.6053, "step": 31228 }, { "epoch": 0.8, "grad_norm": 0.9681882858276367, "learning_rate": 2.0171211736894147e-06, "loss": 0.4092, "step": 31229 }, { "epoch": 0.8, "grad_norm": 4.115484714508057, "learning_rate": 2.0166212260205075e-06, "loss": 0.5565, "step": 31230 }, { "epoch": 0.8, "grad_norm": 1.1454099416732788, "learning_rate": 2.016121333368034e-06, "loss": 0.5544, "step": 31231 }, { "epoch": 0.8, "grad_norm": 3.0178704261779785, "learning_rate": 2.015621495735436e-06, "loss": 0.5187, "step": 31232 }, { "epoch": 0.8, "grad_norm": 1.3608980178833008, "learning_rate": 2.0151217131261557e-06, "loss": 0.4049, "step": 31233 }, { "epoch": 0.8, "grad_norm": 1.3952066898345947, "learning_rate": 2.0146219855436433e-06, "loss": 0.513, "step": 31234 }, { "epoch": 0.8, "grad_norm": 4.257059574127197, "learning_rate": 2.014122312991339e-06, "loss": 0.7106, "step": 31235 }, { "epoch": 0.8, "grad_norm": 5.377162456512451, "learning_rate": 2.0136226954726833e-06, "loss": 0.491, "step": 31236 }, { "epoch": 0.8, "grad_norm": 1.5707772970199585, "learning_rate": 2.013123132991126e-06, "loss": 0.5854, "step": 31237 }, { "epoch": 0.8, "grad_norm": 2.151158332824707, "learning_rate": 2.012623625550105e-06, "loss": 0.6272, "step": 31238 }, { "epoch": 0.8, "grad_norm": 1.2013578414916992, "learning_rate": 2.012124173153065e-06, "loss": 0.3978, "step": 31239 }, { "epoch": 0.8, "grad_norm": 1.3099106550216675, "learning_rate": 2.0116247758034435e-06, "loss": 0.5452, "step": 31240 }, { "epoch": 0.8, "grad_norm": 4.318122863769531, "learning_rate": 2.011125433504688e-06, "loss": 0.5497, "step": 31241 }, { "epoch": 0.8, "grad_norm": 2.65893292427063, "learning_rate": 2.0106261462602374e-06, "loss": 0.6333, "step": 31242 }, { "epoch": 0.8, "grad_norm": 1.332141637802124, "learning_rate": 2.010126914073528e-06, "loss": 0.5021, "step": 31243 }, { "epoch": 0.8, "grad_norm": 1.4563775062561035, "learning_rate": 2.0096277369480078e-06, "loss": 0.3913, "step": 31244 }, { "epoch": 0.8, "grad_norm": 1.5137934684753418, "learning_rate": 2.0091286148871135e-06, "loss": 0.5026, "step": 31245 }, { "epoch": 0.8, "grad_norm": 1.5042622089385986, "learning_rate": 2.008629547894284e-06, "loss": 0.4783, "step": 31246 }, { "epoch": 0.8, "grad_norm": 2.186260938644409, "learning_rate": 2.0081305359729565e-06, "loss": 0.6196, "step": 31247 }, { "epoch": 0.8, "grad_norm": 1.5926483869552612, "learning_rate": 2.007631579126574e-06, "loss": 0.6047, "step": 31248 }, { "epoch": 0.8, "grad_norm": 5.346498012542725, "learning_rate": 2.0071326773585754e-06, "loss": 0.4608, "step": 31249 }, { "epoch": 0.8, "grad_norm": 1.3410334587097168, "learning_rate": 2.006633830672392e-06, "loss": 0.5425, "step": 31250 }, { "epoch": 0.8, "grad_norm": 1.2354930639266968, "learning_rate": 2.006135039071471e-06, "loss": 0.5665, "step": 31251 }, { "epoch": 0.8, "grad_norm": 1.2217079401016235, "learning_rate": 2.005636302559243e-06, "loss": 0.5133, "step": 31252 }, { "epoch": 0.8, "grad_norm": 2.846930980682373, "learning_rate": 2.005137621139146e-06, "loss": 0.6368, "step": 31253 }, { "epoch": 0.8, "grad_norm": 2.535011053085327, "learning_rate": 2.0046389948146196e-06, "loss": 0.6267, "step": 31254 }, { "epoch": 0.8, "grad_norm": 3.0329840183258057, "learning_rate": 2.004140423589098e-06, "loss": 0.597, "step": 31255 }, { "epoch": 0.8, "grad_norm": 1.62604820728302, "learning_rate": 2.0036419074660175e-06, "loss": 0.4377, "step": 31256 }, { "epoch": 0.8, "grad_norm": 1.8972312211990356, "learning_rate": 2.00314344644881e-06, "loss": 0.6184, "step": 31257 }, { "epoch": 0.8, "grad_norm": 1.3353335857391357, "learning_rate": 2.002645040540917e-06, "loss": 0.5317, "step": 31258 }, { "epoch": 0.8, "grad_norm": 1.3609161376953125, "learning_rate": 2.00214668974577e-06, "loss": 0.6002, "step": 31259 }, { "epoch": 0.8, "grad_norm": 1.1273409128189087, "learning_rate": 2.0016483940667996e-06, "loss": 0.6705, "step": 31260 }, { "epoch": 0.8, "grad_norm": 2.2889161109924316, "learning_rate": 2.001150153507447e-06, "loss": 0.3807, "step": 31261 }, { "epoch": 0.8, "grad_norm": 2.863063335418701, "learning_rate": 2.0006519680711423e-06, "loss": 0.652, "step": 31262 }, { "epoch": 0.8, "grad_norm": 1.9515609741210938, "learning_rate": 2.000153837761317e-06, "loss": 0.498, "step": 31263 }, { "epoch": 0.8, "grad_norm": 1.4505401849746704, "learning_rate": 1.999655762581405e-06, "loss": 0.4809, "step": 31264 }, { "epoch": 0.8, "grad_norm": 1.3054543733596802, "learning_rate": 1.999157742534841e-06, "loss": 0.5336, "step": 31265 }, { "epoch": 0.8, "grad_norm": 1.7060602903366089, "learning_rate": 1.998659777625055e-06, "loss": 0.5985, "step": 31266 }, { "epoch": 0.8, "grad_norm": 1.4463825225830078, "learning_rate": 1.998161867855476e-06, "loss": 0.4846, "step": 31267 }, { "epoch": 0.8, "grad_norm": 1.4172178506851196, "learning_rate": 1.997664013229541e-06, "loss": 0.4932, "step": 31268 }, { "epoch": 0.8, "grad_norm": 2.241194009780884, "learning_rate": 1.9971662137506777e-06, "loss": 0.6781, "step": 31269 }, { "epoch": 0.8, "grad_norm": 1.2613502740859985, "learning_rate": 1.9966684694223172e-06, "loss": 0.41, "step": 31270 }, { "epoch": 0.8, "grad_norm": 3.696030855178833, "learning_rate": 1.996170780247887e-06, "loss": 0.5809, "step": 31271 }, { "epoch": 0.8, "grad_norm": 1.398079514503479, "learning_rate": 1.995673146230822e-06, "loss": 0.4579, "step": 31272 }, { "epoch": 0.8, "grad_norm": 2.3331358432769775, "learning_rate": 1.9951755673745486e-06, "loss": 0.5549, "step": 31273 }, { "epoch": 0.8, "grad_norm": 1.7899205684661865, "learning_rate": 1.994678043682494e-06, "loss": 0.4441, "step": 31274 }, { "epoch": 0.8, "grad_norm": 0.9790389537811279, "learning_rate": 1.9941805751580913e-06, "loss": 0.3447, "step": 31275 }, { "epoch": 0.8, "grad_norm": 1.2738579511642456, "learning_rate": 1.993683161804766e-06, "loss": 0.5073, "step": 31276 }, { "epoch": 0.8, "grad_norm": 1.1798670291900635, "learning_rate": 1.9931858036259435e-06, "loss": 0.4777, "step": 31277 }, { "epoch": 0.8, "grad_norm": 1.4674334526062012, "learning_rate": 1.992688500625057e-06, "loss": 0.5046, "step": 31278 }, { "epoch": 0.8, "grad_norm": 0.9959414005279541, "learning_rate": 1.992191252805531e-06, "loss": 0.5373, "step": 31279 }, { "epoch": 0.8, "grad_norm": 1.9650484323501587, "learning_rate": 1.9916940601707915e-06, "loss": 0.555, "step": 31280 }, { "epoch": 0.8, "grad_norm": 1.8633227348327637, "learning_rate": 1.991196922724262e-06, "loss": 0.5742, "step": 31281 }, { "epoch": 0.8, "grad_norm": 3.6021740436553955, "learning_rate": 1.990699840469376e-06, "loss": 0.4804, "step": 31282 }, { "epoch": 0.8, "grad_norm": 1.4184699058532715, "learning_rate": 1.990202813409553e-06, "loss": 0.5173, "step": 31283 }, { "epoch": 0.8, "grad_norm": 3.358646869659424, "learning_rate": 1.9897058415482185e-06, "loss": 0.5771, "step": 31284 }, { "epoch": 0.8, "grad_norm": 1.7113827466964722, "learning_rate": 1.9892089248888003e-06, "loss": 0.5613, "step": 31285 }, { "epoch": 0.8, "grad_norm": 1.0360034704208374, "learning_rate": 1.9887120634347222e-06, "loss": 0.4832, "step": 31286 }, { "epoch": 0.8, "grad_norm": 1.6307315826416016, "learning_rate": 1.9882152571894076e-06, "loss": 0.6033, "step": 31287 }, { "epoch": 0.8, "grad_norm": 6.208357334136963, "learning_rate": 1.9877185061562764e-06, "loss": 0.7614, "step": 31288 }, { "epoch": 0.8, "grad_norm": 1.5515743494033813, "learning_rate": 1.9872218103387585e-06, "loss": 0.634, "step": 31289 }, { "epoch": 0.8, "grad_norm": 1.849311351776123, "learning_rate": 1.9867251697402734e-06, "loss": 0.4989, "step": 31290 }, { "epoch": 0.8, "grad_norm": 1.085045337677002, "learning_rate": 1.9862285843642424e-06, "loss": 0.4361, "step": 31291 }, { "epoch": 0.8, "grad_norm": 1.443160057067871, "learning_rate": 1.9857320542140913e-06, "loss": 0.505, "step": 31292 }, { "epoch": 0.8, "grad_norm": 2.1307735443115234, "learning_rate": 1.98523557929324e-06, "loss": 0.5447, "step": 31293 }, { "epoch": 0.8, "grad_norm": 1.5311275720596313, "learning_rate": 1.984739159605107e-06, "loss": 0.6888, "step": 31294 }, { "epoch": 0.8, "grad_norm": 1.892380714416504, "learning_rate": 1.9842427951531185e-06, "loss": 0.7441, "step": 31295 }, { "epoch": 0.8, "grad_norm": 1.869363784790039, "learning_rate": 1.983746485940692e-06, "loss": 0.5259, "step": 31296 }, { "epoch": 0.8, "grad_norm": 2.5194404125213623, "learning_rate": 1.9832502319712488e-06, "loss": 0.5462, "step": 31297 }, { "epoch": 0.8, "grad_norm": 1.1839778423309326, "learning_rate": 1.9827540332482064e-06, "loss": 0.4702, "step": 31298 }, { "epoch": 0.8, "grad_norm": 1.2882800102233887, "learning_rate": 1.982257889774989e-06, "loss": 0.3861, "step": 31299 }, { "epoch": 0.8, "grad_norm": 2.2709121704101562, "learning_rate": 1.981761801555012e-06, "loss": 0.5669, "step": 31300 }, { "epoch": 0.8, "grad_norm": 1.4178214073181152, "learning_rate": 1.9812657685916935e-06, "loss": 0.4831, "step": 31301 }, { "epoch": 0.8, "grad_norm": 1.5460008382797241, "learning_rate": 1.980769790888456e-06, "loss": 0.5357, "step": 31302 }, { "epoch": 0.8, "grad_norm": 1.7086620330810547, "learning_rate": 1.9802738684487144e-06, "loss": 0.6159, "step": 31303 }, { "epoch": 0.8, "grad_norm": 1.5144424438476562, "learning_rate": 1.9797780012758873e-06, "loss": 0.674, "step": 31304 }, { "epoch": 0.8, "grad_norm": 6.478487014770508, "learning_rate": 1.979282189373388e-06, "loss": 0.8628, "step": 31305 }, { "epoch": 0.8, "grad_norm": 1.4572087526321411, "learning_rate": 1.9787864327446393e-06, "loss": 0.576, "step": 31306 }, { "epoch": 0.8, "grad_norm": 2.0631439685821533, "learning_rate": 1.9782907313930556e-06, "loss": 0.5347, "step": 31307 }, { "epoch": 0.8, "grad_norm": 1.6160154342651367, "learning_rate": 1.9777950853220506e-06, "loss": 0.589, "step": 31308 }, { "epoch": 0.8, "grad_norm": 3.2885730266571045, "learning_rate": 1.9772994945350434e-06, "loss": 0.565, "step": 31309 }, { "epoch": 0.8, "grad_norm": 1.042046308517456, "learning_rate": 1.9768039590354483e-06, "loss": 0.4913, "step": 31310 }, { "epoch": 0.8, "grad_norm": 1.5502270460128784, "learning_rate": 1.9763084788266797e-06, "loss": 0.5239, "step": 31311 }, { "epoch": 0.8, "grad_norm": 1.6156890392303467, "learning_rate": 1.975813053912149e-06, "loss": 0.6217, "step": 31312 }, { "epoch": 0.8, "grad_norm": 1.051239252090454, "learning_rate": 1.975317684295276e-06, "loss": 0.499, "step": 31313 }, { "epoch": 0.8, "grad_norm": 2.187009334564209, "learning_rate": 1.9748223699794723e-06, "loss": 0.5836, "step": 31314 }, { "epoch": 0.8, "grad_norm": 2.5043022632598877, "learning_rate": 1.9743271109681474e-06, "loss": 0.7635, "step": 31315 }, { "epoch": 0.8, "grad_norm": 1.7747056484222412, "learning_rate": 1.973831907264722e-06, "loss": 0.6607, "step": 31316 }, { "epoch": 0.8, "grad_norm": 1.892978549003601, "learning_rate": 1.9733367588726027e-06, "loss": 0.3814, "step": 31317 }, { "epoch": 0.8, "grad_norm": 1.9591103792190552, "learning_rate": 1.972841665795202e-06, "loss": 0.6925, "step": 31318 }, { "epoch": 0.8, "grad_norm": 1.3647695779800415, "learning_rate": 1.972346628035936e-06, "loss": 0.5324, "step": 31319 }, { "epoch": 0.8, "grad_norm": 1.16016685962677, "learning_rate": 1.971851645598214e-06, "loss": 0.4753, "step": 31320 }, { "epoch": 0.8, "grad_norm": 2.0249276161193848, "learning_rate": 1.9713567184854467e-06, "loss": 0.5745, "step": 31321 }, { "epoch": 0.8, "grad_norm": 5.015139102935791, "learning_rate": 1.970861846701041e-06, "loss": 0.6628, "step": 31322 }, { "epoch": 0.8, "grad_norm": 1.4862624406814575, "learning_rate": 1.9703670302484147e-06, "loss": 0.4899, "step": 31323 }, { "epoch": 0.8, "grad_norm": 9.033021926879883, "learning_rate": 1.9698722691309747e-06, "loss": 0.4218, "step": 31324 }, { "epoch": 0.8, "grad_norm": 1.2980507612228394, "learning_rate": 1.969377563352126e-06, "loss": 0.4636, "step": 31325 }, { "epoch": 0.8, "grad_norm": 1.3396730422973633, "learning_rate": 1.9688829129152853e-06, "loss": 0.4887, "step": 31326 }, { "epoch": 0.8, "grad_norm": 1.3152625560760498, "learning_rate": 1.9683883178238572e-06, "loss": 0.5478, "step": 31327 }, { "epoch": 0.8, "grad_norm": 2.0305187702178955, "learning_rate": 1.967893778081251e-06, "loss": 0.6293, "step": 31328 }, { "epoch": 0.8, "grad_norm": 1.4881922006607056, "learning_rate": 1.9673992936908726e-06, "loss": 0.5947, "step": 31329 }, { "epoch": 0.8, "grad_norm": 1.4251960515975952, "learning_rate": 1.9669048646561338e-06, "loss": 0.5413, "step": 31330 }, { "epoch": 0.8, "grad_norm": 1.749239206314087, "learning_rate": 1.9664104909804403e-06, "loss": 0.3857, "step": 31331 }, { "epoch": 0.8, "grad_norm": 1.0112642049789429, "learning_rate": 1.965916172667195e-06, "loss": 0.4544, "step": 31332 }, { "epoch": 0.8, "grad_norm": 1.300553321838379, "learning_rate": 1.965421909719811e-06, "loss": 0.4411, "step": 31333 }, { "epoch": 0.8, "grad_norm": 1.3349947929382324, "learning_rate": 1.9649277021416924e-06, "loss": 0.5205, "step": 31334 }, { "epoch": 0.8, "grad_norm": 7.064693450927734, "learning_rate": 1.9644335499362423e-06, "loss": 0.5067, "step": 31335 }, { "epoch": 0.8, "grad_norm": 1.8469387292861938, "learning_rate": 1.9639394531068657e-06, "loss": 0.4769, "step": 31336 }, { "epoch": 0.8, "grad_norm": 1.443678379058838, "learning_rate": 1.9634454116569725e-06, "loss": 0.583, "step": 31337 }, { "epoch": 0.8, "grad_norm": 2.0326616764068604, "learning_rate": 1.962951425589964e-06, "loss": 0.5187, "step": 31338 }, { "epoch": 0.8, "grad_norm": 5.957104206085205, "learning_rate": 1.9624574949092433e-06, "loss": 0.5931, "step": 31339 }, { "epoch": 0.8, "grad_norm": 1.4896372556686401, "learning_rate": 1.961963619618218e-06, "loss": 0.6469, "step": 31340 }, { "epoch": 0.8, "grad_norm": 3.03659725189209, "learning_rate": 1.9614697997202893e-06, "loss": 0.7614, "step": 31341 }, { "epoch": 0.8, "grad_norm": 1.0940908193588257, "learning_rate": 1.960976035218858e-06, "loss": 0.475, "step": 31342 }, { "epoch": 0.8, "grad_norm": 0.9879755973815918, "learning_rate": 1.960482326117331e-06, "loss": 0.4958, "step": 31343 }, { "epoch": 0.8, "grad_norm": 4.681215763092041, "learning_rate": 1.9599886724191096e-06, "loss": 0.5102, "step": 31344 }, { "epoch": 0.8, "grad_norm": 1.483926773071289, "learning_rate": 1.9594950741275955e-06, "loss": 0.4734, "step": 31345 }, { "epoch": 0.8, "grad_norm": 6.987000465393066, "learning_rate": 1.9590015312461863e-06, "loss": 0.7191, "step": 31346 }, { "epoch": 0.8, "grad_norm": 1.1360645294189453, "learning_rate": 1.95850804377829e-06, "loss": 0.3618, "step": 31347 }, { "epoch": 0.8, "grad_norm": 1.3941211700439453, "learning_rate": 1.9580146117273035e-06, "loss": 0.6593, "step": 31348 }, { "epoch": 0.8, "grad_norm": 4.239083290100098, "learning_rate": 1.9575212350966257e-06, "loss": 0.6961, "step": 31349 }, { "epoch": 0.8, "grad_norm": 1.6409517526626587, "learning_rate": 1.9570279138896607e-06, "loss": 0.5616, "step": 31350 }, { "epoch": 0.8, "grad_norm": 1.0788570642471313, "learning_rate": 1.956534648109807e-06, "loss": 0.3507, "step": 31351 }, { "epoch": 0.8, "grad_norm": 1.2309194803237915, "learning_rate": 1.956041437760463e-06, "loss": 0.5175, "step": 31352 }, { "epoch": 0.8, "grad_norm": 0.8765345215797424, "learning_rate": 1.9555482828450244e-06, "loss": 0.4837, "step": 31353 }, { "epoch": 0.8, "grad_norm": 1.7351548671722412, "learning_rate": 1.9550551833668964e-06, "loss": 0.563, "step": 31354 }, { "epoch": 0.8, "grad_norm": 1.7582981586456299, "learning_rate": 1.9545621393294732e-06, "loss": 0.4064, "step": 31355 }, { "epoch": 0.8, "grad_norm": 1.1808902025222778, "learning_rate": 1.954069150736151e-06, "loss": 0.543, "step": 31356 }, { "epoch": 0.8, "grad_norm": 1.3602564334869385, "learning_rate": 1.953576217590332e-06, "loss": 0.5742, "step": 31357 }, { "epoch": 0.8, "grad_norm": 1.3066850900650024, "learning_rate": 1.9530833398954098e-06, "loss": 0.5777, "step": 31358 }, { "epoch": 0.8, "grad_norm": 4.982697486877441, "learning_rate": 1.9525905176547797e-06, "loss": 0.7293, "step": 31359 }, { "epoch": 0.8, "grad_norm": 1.3400322198867798, "learning_rate": 1.952097750871842e-06, "loss": 0.632, "step": 31360 }, { "epoch": 0.8, "grad_norm": 1.9141420125961304, "learning_rate": 1.9516050395499906e-06, "loss": 0.7297, "step": 31361 }, { "epoch": 0.8, "grad_norm": 1.589048981666565, "learning_rate": 1.9511123836926205e-06, "loss": 0.6725, "step": 31362 }, { "epoch": 0.8, "grad_norm": 1.3211798667907715, "learning_rate": 1.950619783303125e-06, "loss": 0.6311, "step": 31363 }, { "epoch": 0.8, "grad_norm": 11.31507396697998, "learning_rate": 1.9501272383849033e-06, "loss": 0.6318, "step": 31364 }, { "epoch": 0.8, "grad_norm": 5.545059680938721, "learning_rate": 1.949634748941347e-06, "loss": 0.5951, "step": 31365 }, { "epoch": 0.8, "grad_norm": 1.7323353290557861, "learning_rate": 1.9491423149758473e-06, "loss": 0.5337, "step": 31366 }, { "epoch": 0.8, "grad_norm": 1.161592721939087, "learning_rate": 1.9486499364918044e-06, "loss": 0.3973, "step": 31367 }, { "epoch": 0.8, "grad_norm": 2.0187675952911377, "learning_rate": 1.948157613492606e-06, "loss": 0.4789, "step": 31368 }, { "epoch": 0.8, "grad_norm": 1.5497498512268066, "learning_rate": 1.9476653459816485e-06, "loss": 0.5328, "step": 31369 }, { "epoch": 0.8, "grad_norm": 7.049090385437012, "learning_rate": 1.947173133962319e-06, "loss": 0.4628, "step": 31370 }, { "epoch": 0.8, "grad_norm": 2.1578705310821533, "learning_rate": 1.946680977438016e-06, "loss": 0.6002, "step": 31371 }, { "epoch": 0.8, "grad_norm": 1.3177446126937866, "learning_rate": 1.9461888764121273e-06, "loss": 0.4214, "step": 31372 }, { "epoch": 0.8, "grad_norm": 1.1662825345993042, "learning_rate": 1.945696830888043e-06, "loss": 0.5654, "step": 31373 }, { "epoch": 0.8, "grad_norm": 2.3331875801086426, "learning_rate": 1.9452048408691615e-06, "loss": 0.6113, "step": 31374 }, { "epoch": 0.8, "grad_norm": 1.192298173904419, "learning_rate": 1.9447129063588644e-06, "loss": 0.3807, "step": 31375 }, { "epoch": 0.8, "grad_norm": 1.4214566946029663, "learning_rate": 1.9442210273605435e-06, "loss": 0.5013, "step": 31376 }, { "epoch": 0.8, "grad_norm": 0.988898754119873, "learning_rate": 1.9437292038775912e-06, "loss": 0.5745, "step": 31377 }, { "epoch": 0.8, "grad_norm": 1.0050277709960938, "learning_rate": 1.9432374359133965e-06, "loss": 0.3638, "step": 31378 }, { "epoch": 0.8, "grad_norm": 1.5646696090698242, "learning_rate": 1.9427457234713455e-06, "loss": 0.5188, "step": 31379 }, { "epoch": 0.8, "grad_norm": 1.5932132005691528, "learning_rate": 1.9422540665548308e-06, "loss": 0.4867, "step": 31380 }, { "epoch": 0.8, "grad_norm": 4.26600456237793, "learning_rate": 1.9417624651672393e-06, "loss": 0.5958, "step": 31381 }, { "epoch": 0.8, "grad_norm": 1.8892525434494019, "learning_rate": 1.9412709193119584e-06, "loss": 0.6154, "step": 31382 }, { "epoch": 0.8, "grad_norm": 1.1570746898651123, "learning_rate": 1.940779428992372e-06, "loss": 0.6314, "step": 31383 }, { "epoch": 0.8, "grad_norm": 1.2169013023376465, "learning_rate": 1.9402879942118736e-06, "loss": 0.4768, "step": 31384 }, { "epoch": 0.8, "grad_norm": 1.5670212507247925, "learning_rate": 1.9397966149738467e-06, "loss": 0.4885, "step": 31385 }, { "epoch": 0.8, "grad_norm": 2.497537851333618, "learning_rate": 1.9393052912816745e-06, "loss": 0.5503, "step": 31386 }, { "epoch": 0.8, "grad_norm": 0.9864203929901123, "learning_rate": 1.9388140231387488e-06, "loss": 0.3907, "step": 31387 }, { "epoch": 0.8, "grad_norm": 1.2344086170196533, "learning_rate": 1.938322810548452e-06, "loss": 0.5108, "step": 31388 }, { "epoch": 0.8, "grad_norm": 1.7958248853683472, "learning_rate": 1.9378316535141683e-06, "loss": 0.6009, "step": 31389 }, { "epoch": 0.8, "grad_norm": 1.8479187488555908, "learning_rate": 1.937340552039285e-06, "loss": 0.4527, "step": 31390 }, { "epoch": 0.8, "grad_norm": 2.419504404067993, "learning_rate": 1.9368495061271854e-06, "loss": 0.4942, "step": 31391 }, { "epoch": 0.8, "grad_norm": 1.2740353345870972, "learning_rate": 1.936358515781254e-06, "loss": 0.5296, "step": 31392 }, { "epoch": 0.8, "grad_norm": 1.5711393356323242, "learning_rate": 1.93586758100487e-06, "loss": 0.571, "step": 31393 }, { "epoch": 0.8, "grad_norm": 1.8969851732254028, "learning_rate": 1.9353767018014234e-06, "loss": 0.4823, "step": 31394 }, { "epoch": 0.8, "grad_norm": 1.7018674612045288, "learning_rate": 1.934885878174294e-06, "loss": 0.5459, "step": 31395 }, { "epoch": 0.8, "grad_norm": 1.3693687915802002, "learning_rate": 1.934395110126861e-06, "loss": 0.4836, "step": 31396 }, { "epoch": 0.8, "grad_norm": 1.138408899307251, "learning_rate": 1.9339043976625127e-06, "loss": 0.515, "step": 31397 }, { "epoch": 0.8, "grad_norm": 1.197806477546692, "learning_rate": 1.9334137407846264e-06, "loss": 0.5014, "step": 31398 }, { "epoch": 0.8, "grad_norm": 1.2646117210388184, "learning_rate": 1.9329231394965865e-06, "loss": 0.3926, "step": 31399 }, { "epoch": 0.8, "grad_norm": 1.249837875366211, "learning_rate": 1.9324325938017687e-06, "loss": 0.5834, "step": 31400 }, { "epoch": 0.8, "grad_norm": 1.3993418216705322, "learning_rate": 1.931942103703559e-06, "loss": 0.4887, "step": 31401 }, { "epoch": 0.8, "grad_norm": 1.4413703680038452, "learning_rate": 1.9314516692053364e-06, "loss": 0.4518, "step": 31402 }, { "epoch": 0.8, "grad_norm": 1.433983325958252, "learning_rate": 1.9309612903104767e-06, "loss": 0.6265, "step": 31403 }, { "epoch": 0.8, "grad_norm": 1.2397916316986084, "learning_rate": 1.9304709670223653e-06, "loss": 0.473, "step": 31404 }, { "epoch": 0.8, "grad_norm": 1.710863471031189, "learning_rate": 1.929980699344377e-06, "loss": 0.5066, "step": 31405 }, { "epoch": 0.8, "grad_norm": 2.311345100402832, "learning_rate": 1.9294904872798925e-06, "loss": 0.8399, "step": 31406 }, { "epoch": 0.8, "grad_norm": 2.246431350708008, "learning_rate": 1.9290003308322867e-06, "loss": 0.4569, "step": 31407 }, { "epoch": 0.81, "grad_norm": 3.4465670585632324, "learning_rate": 1.928510230004943e-06, "loss": 0.549, "step": 31408 }, { "epoch": 0.81, "grad_norm": 0.8718434572219849, "learning_rate": 1.9280201848012347e-06, "loss": 0.3756, "step": 31409 }, { "epoch": 0.81, "grad_norm": 1.4281054735183716, "learning_rate": 1.9275301952245384e-06, "loss": 0.5555, "step": 31410 }, { "epoch": 0.81, "grad_norm": 2.5185303688049316, "learning_rate": 1.927040261278235e-06, "loss": 0.5224, "step": 31411 }, { "epoch": 0.81, "grad_norm": 1.8034212589263916, "learning_rate": 1.9265503829656973e-06, "loss": 0.5216, "step": 31412 }, { "epoch": 0.81, "grad_norm": 2.464715003967285, "learning_rate": 1.926060560290299e-06, "loss": 0.4501, "step": 31413 }, { "epoch": 0.81, "grad_norm": 2.890186071395874, "learning_rate": 1.9255707932554224e-06, "loss": 0.6543, "step": 31414 }, { "epoch": 0.81, "grad_norm": 2.0681910514831543, "learning_rate": 1.9250810818644393e-06, "loss": 0.5367, "step": 31415 }, { "epoch": 0.81, "grad_norm": 2.187474489212036, "learning_rate": 1.924591426120723e-06, "loss": 0.62, "step": 31416 }, { "epoch": 0.81, "grad_norm": 1.8703025579452515, "learning_rate": 1.924101826027648e-06, "loss": 0.6273, "step": 31417 }, { "epoch": 0.81, "grad_norm": 1.299070954322815, "learning_rate": 1.9236122815885914e-06, "loss": 0.4476, "step": 31418 }, { "epoch": 0.81, "grad_norm": 1.392868161201477, "learning_rate": 1.923122792806925e-06, "loss": 0.4528, "step": 31419 }, { "epoch": 0.81, "grad_norm": 1.6394890546798706, "learning_rate": 1.922633359686019e-06, "loss": 0.5187, "step": 31420 }, { "epoch": 0.81, "grad_norm": 1.0298317670822144, "learning_rate": 1.922143982229252e-06, "loss": 0.5429, "step": 31421 }, { "epoch": 0.81, "grad_norm": 5.0183610916137695, "learning_rate": 1.9216546604399934e-06, "loss": 0.7271, "step": 31422 }, { "epoch": 0.81, "grad_norm": 2.2654271125793457, "learning_rate": 1.921165394321616e-06, "loss": 0.71, "step": 31423 }, { "epoch": 0.81, "grad_norm": 1.4888548851013184, "learning_rate": 1.920676183877488e-06, "loss": 0.5969, "step": 31424 }, { "epoch": 0.81, "grad_norm": 1.670155644416809, "learning_rate": 1.920187029110987e-06, "loss": 0.4427, "step": 31425 }, { "epoch": 0.81, "grad_norm": 3.015470027923584, "learning_rate": 1.9196979300254804e-06, "loss": 0.4853, "step": 31426 }, { "epoch": 0.81, "grad_norm": 2.704380750656128, "learning_rate": 1.919208886624336e-06, "loss": 0.6476, "step": 31427 }, { "epoch": 0.81, "grad_norm": 1.2487515211105347, "learning_rate": 1.91871989891093e-06, "loss": 0.4498, "step": 31428 }, { "epoch": 0.81, "grad_norm": 1.8137648105621338, "learning_rate": 1.91823096688863e-06, "loss": 0.3525, "step": 31429 }, { "epoch": 0.81, "grad_norm": 1.8579493761062622, "learning_rate": 1.9177420905608013e-06, "loss": 0.5406, "step": 31430 }, { "epoch": 0.81, "grad_norm": 1.619012475013733, "learning_rate": 1.91725326993082e-06, "loss": 0.6411, "step": 31431 }, { "epoch": 0.81, "grad_norm": 1.2009767293930054, "learning_rate": 1.91676450500205e-06, "loss": 0.3867, "step": 31432 }, { "epoch": 0.81, "grad_norm": 2.167933464050293, "learning_rate": 1.916275795777861e-06, "loss": 0.5034, "step": 31433 }, { "epoch": 0.81, "grad_norm": 3.005964756011963, "learning_rate": 1.915787142261618e-06, "loss": 0.4351, "step": 31434 }, { "epoch": 0.81, "grad_norm": 2.3985791206359863, "learning_rate": 1.915298544456693e-06, "loss": 0.5736, "step": 31435 }, { "epoch": 0.81, "grad_norm": 1.1941123008728027, "learning_rate": 1.914810002366452e-06, "loss": 0.4531, "step": 31436 }, { "epoch": 0.81, "grad_norm": 6.948252201080322, "learning_rate": 1.9143215159942573e-06, "loss": 0.5427, "step": 31437 }, { "epoch": 0.81, "grad_norm": 2.5929958820343018, "learning_rate": 1.9138330853434827e-06, "loss": 0.6424, "step": 31438 }, { "epoch": 0.81, "grad_norm": 2.9688072204589844, "learning_rate": 1.9133447104174896e-06, "loss": 0.6294, "step": 31439 }, { "epoch": 0.81, "grad_norm": 1.588090419769287, "learning_rate": 1.912856391219644e-06, "loss": 0.4475, "step": 31440 }, { "epoch": 0.81, "grad_norm": 1.7386366128921509, "learning_rate": 1.912368127753309e-06, "loss": 0.4889, "step": 31441 }, { "epoch": 0.81, "grad_norm": 0.9879238605499268, "learning_rate": 1.911879920021854e-06, "loss": 0.4403, "step": 31442 }, { "epoch": 0.81, "grad_norm": 1.7799240350723267, "learning_rate": 1.911391768028642e-06, "loss": 0.5538, "step": 31443 }, { "epoch": 0.81, "grad_norm": 1.6741595268249512, "learning_rate": 1.9109036717770325e-06, "loss": 0.6655, "step": 31444 }, { "epoch": 0.81, "grad_norm": 1.4631909132003784, "learning_rate": 1.910415631270396e-06, "loss": 0.5948, "step": 31445 }, { "epoch": 0.81, "grad_norm": 3.707858085632324, "learning_rate": 1.909927646512092e-06, "loss": 0.6305, "step": 31446 }, { "epoch": 0.81, "grad_norm": 1.2568860054016113, "learning_rate": 1.9094397175054845e-06, "loss": 0.3945, "step": 31447 }, { "epoch": 0.81, "grad_norm": 2.2442777156829834, "learning_rate": 1.9089518442539334e-06, "loss": 0.4691, "step": 31448 }, { "epoch": 0.81, "grad_norm": 4.17171573638916, "learning_rate": 1.9084640267608048e-06, "loss": 0.6135, "step": 31449 }, { "epoch": 0.81, "grad_norm": 1.5987504720687866, "learning_rate": 1.9079762650294585e-06, "loss": 0.5287, "step": 31450 }, { "epoch": 0.81, "grad_norm": 7.272785663604736, "learning_rate": 1.907488559063253e-06, "loss": 0.5593, "step": 31451 }, { "epoch": 0.81, "grad_norm": 3.9911510944366455, "learning_rate": 1.907000908865555e-06, "loss": 0.3883, "step": 31452 }, { "epoch": 0.81, "grad_norm": 0.8256994485855103, "learning_rate": 1.906513314439723e-06, "loss": 0.4158, "step": 31453 }, { "epoch": 0.81, "grad_norm": 3.9748024940490723, "learning_rate": 1.906025775789113e-06, "loss": 0.5344, "step": 31454 }, { "epoch": 0.81, "grad_norm": 1.057648777961731, "learning_rate": 1.9055382929170906e-06, "loss": 0.3717, "step": 31455 }, { "epoch": 0.81, "grad_norm": 3.730872631072998, "learning_rate": 1.905050865827014e-06, "loss": 0.4918, "step": 31456 }, { "epoch": 0.81, "grad_norm": 1.228940725326538, "learning_rate": 1.9045634945222401e-06, "loss": 0.7036, "step": 31457 }, { "epoch": 0.81, "grad_norm": 1.6310782432556152, "learning_rate": 1.9040761790061257e-06, "loss": 0.5251, "step": 31458 }, { "epoch": 0.81, "grad_norm": 1.2249990701675415, "learning_rate": 1.9035889192820355e-06, "loss": 0.6264, "step": 31459 }, { "epoch": 0.81, "grad_norm": 1.6432034969329834, "learning_rate": 1.9031017153533227e-06, "loss": 0.5332, "step": 31460 }, { "epoch": 0.81, "grad_norm": 1.5059638023376465, "learning_rate": 1.9026145672233442e-06, "loss": 0.4431, "step": 31461 }, { "epoch": 0.81, "grad_norm": 8.133377075195312, "learning_rate": 1.902127474895461e-06, "loss": 0.5244, "step": 31462 }, { "epoch": 0.81, "grad_norm": 1.399056315422058, "learning_rate": 1.901640438373028e-06, "loss": 0.6632, "step": 31463 }, { "epoch": 0.81, "grad_norm": 1.565773844718933, "learning_rate": 1.9011534576594005e-06, "loss": 0.4462, "step": 31464 }, { "epoch": 0.81, "grad_norm": 1.5069483518600464, "learning_rate": 1.9006665327579331e-06, "loss": 0.4739, "step": 31465 }, { "epoch": 0.81, "grad_norm": 1.0172561407089233, "learning_rate": 1.900179663671986e-06, "loss": 0.4795, "step": 31466 }, { "epoch": 0.81, "grad_norm": 5.711528301239014, "learning_rate": 1.8996928504049117e-06, "loss": 0.5726, "step": 31467 }, { "epoch": 0.81, "grad_norm": 1.6725927591323853, "learning_rate": 1.8992060929600632e-06, "loss": 0.4947, "step": 31468 }, { "epoch": 0.81, "grad_norm": 6.19054651260376, "learning_rate": 1.8987193913407987e-06, "loss": 0.4465, "step": 31469 }, { "epoch": 0.81, "grad_norm": 2.0425643920898438, "learning_rate": 1.8982327455504712e-06, "loss": 0.609, "step": 31470 }, { "epoch": 0.81, "grad_norm": 1.730899453163147, "learning_rate": 1.8977461555924325e-06, "loss": 0.5328, "step": 31471 }, { "epoch": 0.81, "grad_norm": 0.9801363348960876, "learning_rate": 1.8972596214700357e-06, "loss": 0.3374, "step": 31472 }, { "epoch": 0.81, "grad_norm": 3.759124994277954, "learning_rate": 1.8967731431866366e-06, "loss": 0.3737, "step": 31473 }, { "epoch": 0.81, "grad_norm": 1.6738933324813843, "learning_rate": 1.896286720745586e-06, "loss": 0.6138, "step": 31474 }, { "epoch": 0.81, "grad_norm": 1.1268624067306519, "learning_rate": 1.8958003541502346e-06, "loss": 0.3938, "step": 31475 }, { "epoch": 0.81, "grad_norm": 1.9029420614242554, "learning_rate": 1.8953140434039374e-06, "loss": 0.5426, "step": 31476 }, { "epoch": 0.81, "grad_norm": 3.7719995975494385, "learning_rate": 1.8948277885100441e-06, "loss": 0.4817, "step": 31477 }, { "epoch": 0.81, "grad_norm": 1.3038021326065063, "learning_rate": 1.894341589471902e-06, "loss": 0.4638, "step": 31478 }, { "epoch": 0.81, "grad_norm": 2.3757593631744385, "learning_rate": 1.8938554462928681e-06, "loss": 0.4805, "step": 31479 }, { "epoch": 0.81, "grad_norm": 0.9423127770423889, "learning_rate": 1.8933693589762904e-06, "loss": 0.6172, "step": 31480 }, { "epoch": 0.81, "grad_norm": 2.2174198627471924, "learning_rate": 1.8928833275255175e-06, "loss": 0.5746, "step": 31481 }, { "epoch": 0.81, "grad_norm": 1.8561769723892212, "learning_rate": 1.8923973519438976e-06, "loss": 0.4201, "step": 31482 }, { "epoch": 0.81, "grad_norm": 1.7638005018234253, "learning_rate": 1.8919114322347831e-06, "loss": 0.4826, "step": 31483 }, { "epoch": 0.81, "grad_norm": 1.3152555227279663, "learning_rate": 1.8914255684015216e-06, "loss": 0.586, "step": 31484 }, { "epoch": 0.81, "grad_norm": 2.089672803878784, "learning_rate": 1.8909397604474579e-06, "loss": 0.5405, "step": 31485 }, { "epoch": 0.81, "grad_norm": 2.5180749893188477, "learning_rate": 1.8904540083759447e-06, "loss": 0.5565, "step": 31486 }, { "epoch": 0.81, "grad_norm": 2.027561902999878, "learning_rate": 1.8899683121903289e-06, "loss": 0.4764, "step": 31487 }, { "epoch": 0.81, "grad_norm": 1.1460669040679932, "learning_rate": 1.8894826718939552e-06, "loss": 0.4995, "step": 31488 }, { "epoch": 0.81, "grad_norm": 1.7634371519088745, "learning_rate": 1.8889970874901697e-06, "loss": 0.5751, "step": 31489 }, { "epoch": 0.81, "grad_norm": 1.4776568412780762, "learning_rate": 1.8885115589823221e-06, "loss": 0.4084, "step": 31490 }, { "epoch": 0.81, "grad_norm": 1.3569917678833008, "learning_rate": 1.8880260863737577e-06, "loss": 0.4455, "step": 31491 }, { "epoch": 0.81, "grad_norm": 6.737591743469238, "learning_rate": 1.8875406696678179e-06, "loss": 0.5278, "step": 31492 }, { "epoch": 0.81, "grad_norm": 2.9836819171905518, "learning_rate": 1.8870553088678534e-06, "loss": 0.365, "step": 31493 }, { "epoch": 0.81, "grad_norm": 1.5593891143798828, "learning_rate": 1.8865700039772062e-06, "loss": 0.4686, "step": 31494 }, { "epoch": 0.81, "grad_norm": 1.2399441003799438, "learning_rate": 1.88608475499922e-06, "loss": 0.4784, "step": 31495 }, { "epoch": 0.81, "grad_norm": 2.471484422683716, "learning_rate": 1.8855995619372413e-06, "loss": 0.6959, "step": 31496 }, { "epoch": 0.81, "grad_norm": 2.6951677799224854, "learning_rate": 1.885114424794613e-06, "loss": 0.5632, "step": 31497 }, { "epoch": 0.81, "grad_norm": 2.741229772567749, "learning_rate": 1.8846293435746777e-06, "loss": 0.7025, "step": 31498 }, { "epoch": 0.81, "grad_norm": 1.7749146223068237, "learning_rate": 1.884144318280776e-06, "loss": 0.6079, "step": 31499 }, { "epoch": 0.81, "grad_norm": 1.326459288597107, "learning_rate": 1.8836593489162559e-06, "loss": 0.427, "step": 31500 }, { "epoch": 0.81, "grad_norm": 1.1785123348236084, "learning_rate": 1.883174435484455e-06, "loss": 0.4844, "step": 31501 }, { "epoch": 0.81, "grad_norm": 1.3797688484191895, "learning_rate": 1.8826895779887155e-06, "loss": 0.5439, "step": 31502 }, { "epoch": 0.81, "grad_norm": 2.8121817111968994, "learning_rate": 1.8822047764323814e-06, "loss": 0.4017, "step": 31503 }, { "epoch": 0.81, "grad_norm": 10.064189910888672, "learning_rate": 1.8817200308187911e-06, "loss": 0.5443, "step": 31504 }, { "epoch": 0.81, "grad_norm": 1.7558623552322388, "learning_rate": 1.8812353411512874e-06, "loss": 0.4254, "step": 31505 }, { "epoch": 0.81, "grad_norm": 1.461667537689209, "learning_rate": 1.8807507074332053e-06, "loss": 0.6374, "step": 31506 }, { "epoch": 0.81, "grad_norm": 1.1963883638381958, "learning_rate": 1.8802661296678915e-06, "loss": 0.4102, "step": 31507 }, { "epoch": 0.81, "grad_norm": 1.1225836277008057, "learning_rate": 1.8797816078586816e-06, "loss": 0.4349, "step": 31508 }, { "epoch": 0.81, "grad_norm": 7.367664813995361, "learning_rate": 1.8792971420089135e-06, "loss": 0.6147, "step": 31509 }, { "epoch": 0.81, "grad_norm": 1.3138467073440552, "learning_rate": 1.8788127321219297e-06, "loss": 0.5399, "step": 31510 }, { "epoch": 0.81, "grad_norm": 1.8181264400482178, "learning_rate": 1.8783283782010665e-06, "loss": 0.5857, "step": 31511 }, { "epoch": 0.81, "grad_norm": 7.598896503448486, "learning_rate": 1.8778440802496611e-06, "loss": 0.6367, "step": 31512 }, { "epoch": 0.81, "grad_norm": 1.7110849618911743, "learning_rate": 1.8773598382710489e-06, "loss": 0.4725, "step": 31513 }, { "epoch": 0.81, "grad_norm": 2.2037065029144287, "learning_rate": 1.8768756522685727e-06, "loss": 0.6463, "step": 31514 }, { "epoch": 0.81, "grad_norm": 1.4301894903182983, "learning_rate": 1.8763915222455653e-06, "loss": 0.4384, "step": 31515 }, { "epoch": 0.81, "grad_norm": 1.1724700927734375, "learning_rate": 1.875907448205362e-06, "loss": 0.6182, "step": 31516 }, { "epoch": 0.81, "grad_norm": 22.03631591796875, "learning_rate": 1.8754234301513031e-06, "loss": 0.4828, "step": 31517 }, { "epoch": 0.81, "grad_norm": 1.4471508264541626, "learning_rate": 1.8749394680867206e-06, "loss": 0.463, "step": 31518 }, { "epoch": 0.81, "grad_norm": 5.3952131271362305, "learning_rate": 1.8744555620149484e-06, "loss": 0.6534, "step": 31519 }, { "epoch": 0.81, "grad_norm": 2.621678590774536, "learning_rate": 1.8739717119393275e-06, "loss": 0.5034, "step": 31520 }, { "epoch": 0.81, "grad_norm": 1.2593833208084106, "learning_rate": 1.873487917863187e-06, "loss": 0.4421, "step": 31521 }, { "epoch": 0.81, "grad_norm": 1.2585935592651367, "learning_rate": 1.8730041797898624e-06, "loss": 0.4157, "step": 31522 }, { "epoch": 0.81, "grad_norm": 1.455722451210022, "learning_rate": 1.872520497722684e-06, "loss": 0.5134, "step": 31523 }, { "epoch": 0.81, "grad_norm": 3.0771706104278564, "learning_rate": 1.8720368716649916e-06, "loss": 0.7938, "step": 31524 }, { "epoch": 0.81, "grad_norm": 1.515649437904358, "learning_rate": 1.8715533016201138e-06, "loss": 0.3777, "step": 31525 }, { "epoch": 0.81, "grad_norm": 1.5832264423370361, "learning_rate": 1.8710697875913818e-06, "loss": 0.5793, "step": 31526 }, { "epoch": 0.81, "grad_norm": 1.1491466760635376, "learning_rate": 1.8705863295821325e-06, "loss": 0.4143, "step": 31527 }, { "epoch": 0.81, "grad_norm": 3.0872104167938232, "learning_rate": 1.870102927595694e-06, "loss": 0.5724, "step": 31528 }, { "epoch": 0.81, "grad_norm": 7.257349491119385, "learning_rate": 1.869619581635399e-06, "loss": 0.5146, "step": 31529 }, { "epoch": 0.81, "grad_norm": 1.280434250831604, "learning_rate": 1.8691362917045752e-06, "loss": 0.6204, "step": 31530 }, { "epoch": 0.81, "grad_norm": 4.902084827423096, "learning_rate": 1.8686530578065586e-06, "loss": 0.5399, "step": 31531 }, { "epoch": 0.81, "grad_norm": 1.281126856803894, "learning_rate": 1.8681698799446756e-06, "loss": 0.5476, "step": 31532 }, { "epoch": 0.81, "grad_norm": 1.1445063352584839, "learning_rate": 1.8676867581222558e-06, "loss": 0.4879, "step": 31533 }, { "epoch": 0.81, "grad_norm": 1.4858510494232178, "learning_rate": 1.8672036923426307e-06, "loss": 0.4791, "step": 31534 }, { "epoch": 0.81, "grad_norm": 1.7713252305984497, "learning_rate": 1.8667206826091288e-06, "loss": 0.4199, "step": 31535 }, { "epoch": 0.81, "grad_norm": 6.187729835510254, "learning_rate": 1.8662377289250755e-06, "loss": 0.6706, "step": 31536 }, { "epoch": 0.81, "grad_norm": 1.6848679780960083, "learning_rate": 1.8657548312938045e-06, "loss": 0.5408, "step": 31537 }, { "epoch": 0.81, "grad_norm": 1.5297976732254028, "learning_rate": 1.865271989718641e-06, "loss": 0.5631, "step": 31538 }, { "epoch": 0.81, "grad_norm": 11.912330627441406, "learning_rate": 1.8647892042029113e-06, "loss": 0.4949, "step": 31539 }, { "epoch": 0.81, "grad_norm": 2.7535033226013184, "learning_rate": 1.8643064747499417e-06, "loss": 0.5154, "step": 31540 }, { "epoch": 0.81, "grad_norm": 1.6223466396331787, "learning_rate": 1.8638238013630628e-06, "loss": 0.6521, "step": 31541 }, { "epoch": 0.81, "grad_norm": 1.1699515581130981, "learning_rate": 1.8633411840455984e-06, "loss": 0.5943, "step": 31542 }, { "epoch": 0.81, "grad_norm": 1.8802248239517212, "learning_rate": 1.8628586228008726e-06, "loss": 0.4841, "step": 31543 }, { "epoch": 0.81, "grad_norm": 5.4329118728637695, "learning_rate": 1.8623761176322153e-06, "loss": 0.4515, "step": 31544 }, { "epoch": 0.81, "grad_norm": 1.1494759321212769, "learning_rate": 1.861893668542949e-06, "loss": 0.3506, "step": 31545 }, { "epoch": 0.81, "grad_norm": 1.4518131017684937, "learning_rate": 1.8614112755363978e-06, "loss": 0.5632, "step": 31546 }, { "epoch": 0.81, "grad_norm": 1.1225085258483887, "learning_rate": 1.8609289386158857e-06, "loss": 0.5157, "step": 31547 }, { "epoch": 0.81, "grad_norm": 1.3570568561553955, "learning_rate": 1.8604466577847402e-06, "loss": 0.4078, "step": 31548 }, { "epoch": 0.81, "grad_norm": 1.4068677425384521, "learning_rate": 1.859964433046282e-06, "loss": 0.4064, "step": 31549 }, { "epoch": 0.81, "grad_norm": 1.2957468032836914, "learning_rate": 1.8594822644038325e-06, "loss": 0.409, "step": 31550 }, { "epoch": 0.81, "grad_norm": 1.256770372390747, "learning_rate": 1.8590001518607203e-06, "loss": 0.4631, "step": 31551 }, { "epoch": 0.81, "grad_norm": 1.7314037084579468, "learning_rate": 1.858518095420263e-06, "loss": 0.6167, "step": 31552 }, { "epoch": 0.81, "grad_norm": 4.019351482391357, "learning_rate": 1.8580360950857845e-06, "loss": 0.5477, "step": 31553 }, { "epoch": 0.81, "grad_norm": 1.501456379890442, "learning_rate": 1.8575541508606042e-06, "loss": 0.5012, "step": 31554 }, { "epoch": 0.81, "grad_norm": 1.3167216777801514, "learning_rate": 1.8570722627480465e-06, "loss": 0.4552, "step": 31555 }, { "epoch": 0.81, "grad_norm": 2.177884578704834, "learning_rate": 1.8565904307514315e-06, "loss": 0.5286, "step": 31556 }, { "epoch": 0.81, "grad_norm": 1.696282148361206, "learning_rate": 1.856108654874077e-06, "loss": 0.547, "step": 31557 }, { "epoch": 0.81, "grad_norm": 1.445446491241455, "learning_rate": 1.8556269351193068e-06, "loss": 0.463, "step": 31558 }, { "epoch": 0.81, "grad_norm": 2.1388022899627686, "learning_rate": 1.8551452714904395e-06, "loss": 0.5767, "step": 31559 }, { "epoch": 0.81, "grad_norm": 1.9026919603347778, "learning_rate": 1.8546636639907912e-06, "loss": 0.4365, "step": 31560 }, { "epoch": 0.81, "grad_norm": 2.0977351665496826, "learning_rate": 1.854182112623686e-06, "loss": 0.5039, "step": 31561 }, { "epoch": 0.81, "grad_norm": 1.3531781435012817, "learning_rate": 1.8537006173924398e-06, "loss": 0.4393, "step": 31562 }, { "epoch": 0.81, "grad_norm": 0.8431629538536072, "learning_rate": 1.8532191783003706e-06, "loss": 0.4141, "step": 31563 }, { "epoch": 0.81, "grad_norm": 4.5135650634765625, "learning_rate": 1.8527377953507951e-06, "loss": 0.4526, "step": 31564 }, { "epoch": 0.81, "grad_norm": 1.4145394563674927, "learning_rate": 1.8522564685470335e-06, "loss": 0.613, "step": 31565 }, { "epoch": 0.81, "grad_norm": 1.8337634801864624, "learning_rate": 1.8517751978924025e-06, "loss": 0.541, "step": 31566 }, { "epoch": 0.81, "grad_norm": 1.312968134880066, "learning_rate": 1.8512939833902144e-06, "loss": 0.5091, "step": 31567 }, { "epoch": 0.81, "grad_norm": 1.406313419342041, "learning_rate": 1.8508128250437907e-06, "loss": 0.4986, "step": 31568 }, { "epoch": 0.81, "grad_norm": 1.4651950597763062, "learning_rate": 1.8503317228564466e-06, "loss": 0.6416, "step": 31569 }, { "epoch": 0.81, "grad_norm": 1.7658885717391968, "learning_rate": 1.849850676831495e-06, "loss": 0.533, "step": 31570 }, { "epoch": 0.81, "grad_norm": 2.2736315727233887, "learning_rate": 1.84936968697225e-06, "loss": 0.5914, "step": 31571 }, { "epoch": 0.81, "grad_norm": 0.9528161883354187, "learning_rate": 1.848888753282031e-06, "loss": 0.3042, "step": 31572 }, { "epoch": 0.81, "grad_norm": 5.884219646453857, "learning_rate": 1.848407875764149e-06, "loss": 0.5137, "step": 31573 }, { "epoch": 0.81, "grad_norm": 6.923291206359863, "learning_rate": 1.8479270544219162e-06, "loss": 0.5836, "step": 31574 }, { "epoch": 0.81, "grad_norm": 2.001230478286743, "learning_rate": 1.847446289258652e-06, "loss": 0.5221, "step": 31575 }, { "epoch": 0.81, "grad_norm": 1.2192163467407227, "learning_rate": 1.8469655802776654e-06, "loss": 0.4199, "step": 31576 }, { "epoch": 0.81, "grad_norm": 1.1664553880691528, "learning_rate": 1.8464849274822672e-06, "loss": 0.5232, "step": 31577 }, { "epoch": 0.81, "grad_norm": 1.3603955507278442, "learning_rate": 1.8460043308757747e-06, "loss": 0.5261, "step": 31578 }, { "epoch": 0.81, "grad_norm": 1.1231839656829834, "learning_rate": 1.8455237904614986e-06, "loss": 0.3984, "step": 31579 }, { "epoch": 0.81, "grad_norm": 1.1390422582626343, "learning_rate": 1.8450433062427487e-06, "loss": 0.5135, "step": 31580 }, { "epoch": 0.81, "grad_norm": 1.341110348701477, "learning_rate": 1.8445628782228342e-06, "loss": 0.436, "step": 31581 }, { "epoch": 0.81, "grad_norm": 1.0269700288772583, "learning_rate": 1.8440825064050705e-06, "loss": 0.5978, "step": 31582 }, { "epoch": 0.81, "grad_norm": 1.593265175819397, "learning_rate": 1.843602190792767e-06, "loss": 0.5946, "step": 31583 }, { "epoch": 0.81, "grad_norm": 1.0363044738769531, "learning_rate": 1.84312193138923e-06, "loss": 0.414, "step": 31584 }, { "epoch": 0.81, "grad_norm": 2.2510573863983154, "learning_rate": 1.842641728197775e-06, "loss": 0.6275, "step": 31585 }, { "epoch": 0.81, "grad_norm": 1.1999566555023193, "learning_rate": 1.8421615812217075e-06, "loss": 0.4026, "step": 31586 }, { "epoch": 0.81, "grad_norm": 2.0100297927856445, "learning_rate": 1.8416814904643376e-06, "loss": 0.5364, "step": 31587 }, { "epoch": 0.81, "grad_norm": 2.9074673652648926, "learning_rate": 1.8412014559289704e-06, "loss": 0.5564, "step": 31588 }, { "epoch": 0.81, "grad_norm": 3.528319835662842, "learning_rate": 1.8407214776189197e-06, "loss": 0.8391, "step": 31589 }, { "epoch": 0.81, "grad_norm": 2.223145008087158, "learning_rate": 1.840241555537491e-06, "loss": 0.6659, "step": 31590 }, { "epoch": 0.81, "grad_norm": 1.1620880365371704, "learning_rate": 1.8397616896879877e-06, "loss": 0.4748, "step": 31591 }, { "epoch": 0.81, "grad_norm": 4.033310890197754, "learning_rate": 1.8392818800737222e-06, "loss": 0.5709, "step": 31592 }, { "epoch": 0.81, "grad_norm": 2.7707600593566895, "learning_rate": 1.8388021266980004e-06, "loss": 0.5965, "step": 31593 }, { "epoch": 0.81, "grad_norm": 2.1536035537719727, "learning_rate": 1.8383224295641256e-06, "loss": 0.4279, "step": 31594 }, { "epoch": 0.81, "grad_norm": 1.2912698984146118, "learning_rate": 1.8378427886754035e-06, "loss": 0.5057, "step": 31595 }, { "epoch": 0.81, "grad_norm": 1.4091222286224365, "learning_rate": 1.837363204035143e-06, "loss": 0.5439, "step": 31596 }, { "epoch": 0.81, "grad_norm": 1.44217050075531, "learning_rate": 1.8368836756466469e-06, "loss": 0.5542, "step": 31597 }, { "epoch": 0.81, "grad_norm": 1.8506593704223633, "learning_rate": 1.8364042035132168e-06, "loss": 0.5127, "step": 31598 }, { "epoch": 0.81, "grad_norm": 1.2747308015823364, "learning_rate": 1.8359247876381636e-06, "loss": 0.3837, "step": 31599 }, { "epoch": 0.81, "grad_norm": 1.6568723917007446, "learning_rate": 1.8354454280247868e-06, "loss": 0.6755, "step": 31600 }, { "epoch": 0.81, "grad_norm": 1.6787786483764648, "learning_rate": 1.8349661246763884e-06, "loss": 0.6361, "step": 31601 }, { "epoch": 0.81, "grad_norm": 2.4877941608428955, "learning_rate": 1.8344868775962766e-06, "loss": 0.6725, "step": 31602 }, { "epoch": 0.81, "grad_norm": 5.429729461669922, "learning_rate": 1.834007686787751e-06, "loss": 0.5172, "step": 31603 }, { "epoch": 0.81, "grad_norm": 1.1798726320266724, "learning_rate": 1.8335285522541136e-06, "loss": 0.5768, "step": 31604 }, { "epoch": 0.81, "grad_norm": 1.4075745344161987, "learning_rate": 1.8330494739986648e-06, "loss": 0.5016, "step": 31605 }, { "epoch": 0.81, "grad_norm": 1.3213716745376587, "learning_rate": 1.83257045202471e-06, "loss": 0.3386, "step": 31606 }, { "epoch": 0.81, "grad_norm": 11.158539772033691, "learning_rate": 1.8320914863355487e-06, "loss": 0.7633, "step": 31607 }, { "epoch": 0.81, "grad_norm": 1.8269656896591187, "learning_rate": 1.8316125769344794e-06, "loss": 0.4631, "step": 31608 }, { "epoch": 0.81, "grad_norm": 1.2826703786849976, "learning_rate": 1.8311337238248061e-06, "loss": 0.4552, "step": 31609 }, { "epoch": 0.81, "grad_norm": 1.2832587957382202, "learning_rate": 1.8306549270098273e-06, "loss": 0.4479, "step": 31610 }, { "epoch": 0.81, "grad_norm": 3.398914337158203, "learning_rate": 1.8301761864928424e-06, "loss": 0.4965, "step": 31611 }, { "epoch": 0.81, "grad_norm": 2.267043352127075, "learning_rate": 1.8296975022771467e-06, "loss": 0.5732, "step": 31612 }, { "epoch": 0.81, "grad_norm": 1.3148536682128906, "learning_rate": 1.8292188743660466e-06, "loss": 0.5056, "step": 31613 }, { "epoch": 0.81, "grad_norm": 1.7053651809692383, "learning_rate": 1.8287403027628358e-06, "loss": 0.4848, "step": 31614 }, { "epoch": 0.81, "grad_norm": 1.0794973373413086, "learning_rate": 1.8282617874708109e-06, "loss": 0.4947, "step": 31615 }, { "epoch": 0.81, "grad_norm": 2.0289347171783447, "learning_rate": 1.8277833284932745e-06, "loss": 0.4334, "step": 31616 }, { "epoch": 0.81, "grad_norm": 2.202122926712036, "learning_rate": 1.827304925833524e-06, "loss": 0.4223, "step": 31617 }, { "epoch": 0.81, "grad_norm": 2.410414218902588, "learning_rate": 1.826826579494848e-06, "loss": 0.5633, "step": 31618 }, { "epoch": 0.81, "grad_norm": 1.6932157278060913, "learning_rate": 1.8263482894805507e-06, "loss": 0.6374, "step": 31619 }, { "epoch": 0.81, "grad_norm": 1.6444146633148193, "learning_rate": 1.8258700557939257e-06, "loss": 0.4597, "step": 31620 }, { "epoch": 0.81, "grad_norm": 1.5006966590881348, "learning_rate": 1.8253918784382662e-06, "loss": 0.462, "step": 31621 }, { "epoch": 0.81, "grad_norm": 1.3111897706985474, "learning_rate": 1.824913757416873e-06, "loss": 0.4754, "step": 31622 }, { "epoch": 0.81, "grad_norm": 2.0910799503326416, "learning_rate": 1.8244356927330375e-06, "loss": 0.5332, "step": 31623 }, { "epoch": 0.81, "grad_norm": 1.167001724243164, "learning_rate": 1.8239576843900553e-06, "loss": 0.5505, "step": 31624 }, { "epoch": 0.81, "grad_norm": 5.11346435546875, "learning_rate": 1.8234797323912167e-06, "loss": 0.9025, "step": 31625 }, { "epoch": 0.81, "grad_norm": 1.5246782302856445, "learning_rate": 1.8230018367398228e-06, "loss": 0.5396, "step": 31626 }, { "epoch": 0.81, "grad_norm": 1.600612998008728, "learning_rate": 1.8225239974391618e-06, "loss": 0.4092, "step": 31627 }, { "epoch": 0.81, "grad_norm": 0.9859769344329834, "learning_rate": 1.8220462144925255e-06, "loss": 0.4586, "step": 31628 }, { "epoch": 0.81, "grad_norm": 1.747253656387329, "learning_rate": 1.8215684879032113e-06, "loss": 0.6037, "step": 31629 }, { "epoch": 0.81, "grad_norm": 1.8743077516555786, "learning_rate": 1.821090817674509e-06, "loss": 0.5714, "step": 31630 }, { "epoch": 0.81, "grad_norm": 11.626463890075684, "learning_rate": 1.820613203809708e-06, "loss": 0.5398, "step": 31631 }, { "epoch": 0.81, "grad_norm": 7.896973609924316, "learning_rate": 1.8201356463121044e-06, "loss": 0.4776, "step": 31632 }, { "epoch": 0.81, "grad_norm": 1.3328545093536377, "learning_rate": 1.8196581451849871e-06, "loss": 0.4745, "step": 31633 }, { "epoch": 0.81, "grad_norm": 1.1698541641235352, "learning_rate": 1.8191807004316452e-06, "loss": 0.3998, "step": 31634 }, { "epoch": 0.81, "grad_norm": 1.4386799335479736, "learning_rate": 1.8187033120553688e-06, "loss": 0.4299, "step": 31635 }, { "epoch": 0.81, "grad_norm": 1.6798650026321411, "learning_rate": 1.8182259800594516e-06, "loss": 0.5257, "step": 31636 }, { "epoch": 0.81, "grad_norm": 4.269115924835205, "learning_rate": 1.817748704447181e-06, "loss": 0.5356, "step": 31637 }, { "epoch": 0.81, "grad_norm": 1.2622672319412231, "learning_rate": 1.8172714852218421e-06, "loss": 0.5153, "step": 31638 }, { "epoch": 0.81, "grad_norm": 1.6117700338363647, "learning_rate": 1.8167943223867312e-06, "loss": 0.5225, "step": 31639 }, { "epoch": 0.81, "grad_norm": 2.087782621383667, "learning_rate": 1.8163172159451314e-06, "loss": 0.5434, "step": 31640 }, { "epoch": 0.81, "grad_norm": 0.971181333065033, "learning_rate": 1.8158401659003322e-06, "loss": 0.4316, "step": 31641 }, { "epoch": 0.81, "grad_norm": 0.8017496466636658, "learning_rate": 1.8153631722556187e-06, "loss": 0.4857, "step": 31642 }, { "epoch": 0.81, "grad_norm": 1.6574066877365112, "learning_rate": 1.8148862350142826e-06, "loss": 0.4211, "step": 31643 }, { "epoch": 0.81, "grad_norm": 2.7084975242614746, "learning_rate": 1.8144093541796082e-06, "loss": 0.4457, "step": 31644 }, { "epoch": 0.81, "grad_norm": 2.0723845958709717, "learning_rate": 1.8139325297548782e-06, "loss": 0.5246, "step": 31645 }, { "epoch": 0.81, "grad_norm": 1.4046262502670288, "learning_rate": 1.8134557617433856e-06, "loss": 0.3867, "step": 31646 }, { "epoch": 0.81, "grad_norm": 1.3892014026641846, "learning_rate": 1.8129790501484123e-06, "loss": 0.7006, "step": 31647 }, { "epoch": 0.81, "grad_norm": 6.84841251373291, "learning_rate": 1.8125023949732445e-06, "loss": 0.5476, "step": 31648 }, { "epoch": 0.81, "grad_norm": 2.808015823364258, "learning_rate": 1.812025796221163e-06, "loss": 0.345, "step": 31649 }, { "epoch": 0.81, "grad_norm": 1.951905369758606, "learning_rate": 1.8115492538954593e-06, "loss": 0.7187, "step": 31650 }, { "epoch": 0.81, "grad_norm": 4.716053485870361, "learning_rate": 1.8110727679994122e-06, "loss": 0.5751, "step": 31651 }, { "epoch": 0.81, "grad_norm": 2.0745136737823486, "learning_rate": 1.8105963385363046e-06, "loss": 0.4168, "step": 31652 }, { "epoch": 0.81, "grad_norm": 3.542717695236206, "learning_rate": 1.8101199655094248e-06, "loss": 0.5045, "step": 31653 }, { "epoch": 0.81, "grad_norm": 1.1915031671524048, "learning_rate": 1.8096436489220525e-06, "loss": 0.5215, "step": 31654 }, { "epoch": 0.81, "grad_norm": 1.3382289409637451, "learning_rate": 1.8091673887774685e-06, "loss": 0.4306, "step": 31655 }, { "epoch": 0.81, "grad_norm": 1.1814677715301514, "learning_rate": 1.808691185078959e-06, "loss": 0.5632, "step": 31656 }, { "epoch": 0.81, "grad_norm": 1.8324161767959595, "learning_rate": 1.8082150378298036e-06, "loss": 0.6415, "step": 31657 }, { "epoch": 0.81, "grad_norm": 1.6301525831222534, "learning_rate": 1.807738947033284e-06, "loss": 0.5636, "step": 31658 }, { "epoch": 0.81, "grad_norm": 1.1744312047958374, "learning_rate": 1.8072629126926777e-06, "loss": 0.3467, "step": 31659 }, { "epoch": 0.81, "grad_norm": 1.1060431003570557, "learning_rate": 1.806786934811271e-06, "loss": 0.4692, "step": 31660 }, { "epoch": 0.81, "grad_norm": 1.9773567914962769, "learning_rate": 1.8063110133923412e-06, "loss": 0.5072, "step": 31661 }, { "epoch": 0.81, "grad_norm": 2.1255691051483154, "learning_rate": 1.8058351484391645e-06, "loss": 0.4912, "step": 31662 }, { "epoch": 0.81, "grad_norm": 3.990098714828491, "learning_rate": 1.8053593399550274e-06, "loss": 0.5057, "step": 31663 }, { "epoch": 0.81, "grad_norm": 1.7422606945037842, "learning_rate": 1.8048835879432059e-06, "loss": 0.6115, "step": 31664 }, { "epoch": 0.81, "grad_norm": 1.505576252937317, "learning_rate": 1.8044078924069774e-06, "loss": 0.558, "step": 31665 }, { "epoch": 0.81, "grad_norm": 1.4009771347045898, "learning_rate": 1.8039322533496184e-06, "loss": 0.5643, "step": 31666 }, { "epoch": 0.81, "grad_norm": 1.4333032369613647, "learning_rate": 1.8034566707744116e-06, "loss": 0.5685, "step": 31667 }, { "epoch": 0.81, "grad_norm": 1.2402253150939941, "learning_rate": 1.8029811446846313e-06, "loss": 0.4137, "step": 31668 }, { "epoch": 0.81, "grad_norm": 1.9643495082855225, "learning_rate": 1.8025056750835535e-06, "loss": 0.4532, "step": 31669 }, { "epoch": 0.81, "grad_norm": 1.6750929355621338, "learning_rate": 1.8020302619744579e-06, "loss": 0.6688, "step": 31670 }, { "epoch": 0.81, "grad_norm": 1.523718237876892, "learning_rate": 1.8015549053606197e-06, "loss": 0.4945, "step": 31671 }, { "epoch": 0.81, "grad_norm": 1.4776028394699097, "learning_rate": 1.8010796052453116e-06, "loss": 0.3664, "step": 31672 }, { "epoch": 0.81, "grad_norm": 1.234749674797058, "learning_rate": 1.8006043616318148e-06, "loss": 0.4057, "step": 31673 }, { "epoch": 0.81, "grad_norm": 1.226428508758545, "learning_rate": 1.8001291745234006e-06, "loss": 0.5647, "step": 31674 }, { "epoch": 0.81, "grad_norm": 5.383359909057617, "learning_rate": 1.7996540439233457e-06, "loss": 0.5711, "step": 31675 }, { "epoch": 0.81, "grad_norm": 3.547121286392212, "learning_rate": 1.7991789698349194e-06, "loss": 0.6392, "step": 31676 }, { "epoch": 0.81, "grad_norm": 1.9233763217926025, "learning_rate": 1.7987039522614024e-06, "loss": 0.6403, "step": 31677 }, { "epoch": 0.81, "grad_norm": 1.3351465463638306, "learning_rate": 1.7982289912060658e-06, "loss": 0.4522, "step": 31678 }, { "epoch": 0.81, "grad_norm": 1.5415898561477661, "learning_rate": 1.7977540866721787e-06, "loss": 0.5726, "step": 31679 }, { "epoch": 0.81, "grad_norm": 0.8867086172103882, "learning_rate": 1.79727923866302e-06, "loss": 0.3726, "step": 31680 }, { "epoch": 0.81, "grad_norm": 7.489649772644043, "learning_rate": 1.7968044471818601e-06, "loss": 0.5521, "step": 31681 }, { "epoch": 0.81, "grad_norm": 3.2554068565368652, "learning_rate": 1.796329712231969e-06, "loss": 0.8069, "step": 31682 }, { "epoch": 0.81, "grad_norm": 1.7432734966278076, "learning_rate": 1.795855033816618e-06, "loss": 0.5541, "step": 31683 }, { "epoch": 0.81, "grad_norm": 1.4970128536224365, "learning_rate": 1.7953804119390817e-06, "loss": 0.5576, "step": 31684 }, { "epoch": 0.81, "grad_norm": 1.3756529092788696, "learning_rate": 1.7949058466026292e-06, "loss": 0.4788, "step": 31685 }, { "epoch": 0.81, "grad_norm": 21.027952194213867, "learning_rate": 1.7944313378105284e-06, "loss": 0.4316, "step": 31686 }, { "epoch": 0.81, "grad_norm": 1.1081370115280151, "learning_rate": 1.7939568855660527e-06, "loss": 0.445, "step": 31687 }, { "epoch": 0.81, "grad_norm": 1.6512267589569092, "learning_rate": 1.7934824898724723e-06, "loss": 0.5565, "step": 31688 }, { "epoch": 0.81, "grad_norm": 8.190208435058594, "learning_rate": 1.7930081507330544e-06, "loss": 0.5442, "step": 31689 }, { "epoch": 0.81, "grad_norm": 7.237691402435303, "learning_rate": 1.7925338681510652e-06, "loss": 0.6113, "step": 31690 }, { "epoch": 0.81, "grad_norm": 1.8400384187698364, "learning_rate": 1.7920596421297787e-06, "loss": 0.4191, "step": 31691 }, { "epoch": 0.81, "grad_norm": 1.5604182481765747, "learning_rate": 1.7915854726724603e-06, "loss": 0.5368, "step": 31692 }, { "epoch": 0.81, "grad_norm": 1.348158597946167, "learning_rate": 1.7911113597823759e-06, "loss": 0.5028, "step": 31693 }, { "epoch": 0.81, "grad_norm": 1.3338879346847534, "learning_rate": 1.7906373034627965e-06, "loss": 0.5041, "step": 31694 }, { "epoch": 0.81, "grad_norm": 5.694826126098633, "learning_rate": 1.7901633037169873e-06, "loss": 0.6327, "step": 31695 }, { "epoch": 0.81, "grad_norm": 2.168992519378662, "learning_rate": 1.7896893605482124e-06, "loss": 0.4981, "step": 31696 }, { "epoch": 0.81, "grad_norm": 1.6533520221710205, "learning_rate": 1.7892154739597422e-06, "loss": 0.5196, "step": 31697 }, { "epoch": 0.81, "grad_norm": 2.2699198722839355, "learning_rate": 1.788741643954841e-06, "loss": 0.5714, "step": 31698 }, { "epoch": 0.81, "grad_norm": 3.4390649795532227, "learning_rate": 1.788267870536774e-06, "loss": 0.5738, "step": 31699 }, { "epoch": 0.81, "grad_norm": 3.4569027423858643, "learning_rate": 1.7877941537088017e-06, "loss": 0.6773, "step": 31700 }, { "epoch": 0.81, "grad_norm": 1.6746524572372437, "learning_rate": 1.7873204934741971e-06, "loss": 0.5724, "step": 31701 }, { "epoch": 0.81, "grad_norm": 0.9511237740516663, "learning_rate": 1.786846889836219e-06, "loss": 0.4518, "step": 31702 }, { "epoch": 0.81, "grad_norm": 1.4581226110458374, "learning_rate": 1.7863733427981289e-06, "loss": 0.4289, "step": 31703 }, { "epoch": 0.81, "grad_norm": 1.2258286476135254, "learning_rate": 1.7858998523631965e-06, "loss": 0.5297, "step": 31704 }, { "epoch": 0.81, "grad_norm": 1.3791618347167969, "learning_rate": 1.7854264185346826e-06, "loss": 0.5421, "step": 31705 }, { "epoch": 0.81, "grad_norm": 1.152127981185913, "learning_rate": 1.7849530413158479e-06, "loss": 0.6298, "step": 31706 }, { "epoch": 0.81, "grad_norm": 1.41316819190979, "learning_rate": 1.7844797207099529e-06, "loss": 0.4851, "step": 31707 }, { "epoch": 0.81, "grad_norm": 3.976099729537964, "learning_rate": 1.7840064567202653e-06, "loss": 0.6955, "step": 31708 }, { "epoch": 0.81, "grad_norm": 3.45841121673584, "learning_rate": 1.7835332493500435e-06, "loss": 0.5974, "step": 31709 }, { "epoch": 0.81, "grad_norm": 7.458146095275879, "learning_rate": 1.783060098602546e-06, "loss": 0.6278, "step": 31710 }, { "epoch": 0.81, "grad_norm": 1.9600645303726196, "learning_rate": 1.7825870044810378e-06, "loss": 0.6792, "step": 31711 }, { "epoch": 0.81, "grad_norm": 4.184546947479248, "learning_rate": 1.7821139669887777e-06, "loss": 0.6217, "step": 31712 }, { "epoch": 0.81, "grad_norm": 4.435046195983887, "learning_rate": 1.7816409861290229e-06, "loss": 0.6237, "step": 31713 }, { "epoch": 0.81, "grad_norm": 1.9581156969070435, "learning_rate": 1.7811680619050376e-06, "loss": 0.4641, "step": 31714 }, { "epoch": 0.81, "grad_norm": 1.4408447742462158, "learning_rate": 1.780695194320078e-06, "loss": 0.4736, "step": 31715 }, { "epoch": 0.81, "grad_norm": 1.0352345705032349, "learning_rate": 1.7802223833774035e-06, "loss": 0.3695, "step": 31716 }, { "epoch": 0.81, "grad_norm": 1.706985592842102, "learning_rate": 1.7797496290802695e-06, "loss": 0.6856, "step": 31717 }, { "epoch": 0.81, "grad_norm": 1.6345020532608032, "learning_rate": 1.779276931431939e-06, "loss": 0.5816, "step": 31718 }, { "epoch": 0.81, "grad_norm": 1.305842399597168, "learning_rate": 1.7788042904356673e-06, "loss": 0.4898, "step": 31719 }, { "epoch": 0.81, "grad_norm": 3.010507345199585, "learning_rate": 1.778331706094709e-06, "loss": 0.4813, "step": 31720 }, { "epoch": 0.81, "grad_norm": 1.8335293531417847, "learning_rate": 1.7778591784123256e-06, "loss": 0.6162, "step": 31721 }, { "epoch": 0.81, "grad_norm": 1.5638586282730103, "learning_rate": 1.7773867073917706e-06, "loss": 0.4802, "step": 31722 }, { "epoch": 0.81, "grad_norm": 1.2458171844482422, "learning_rate": 1.7769142930363015e-06, "loss": 0.4806, "step": 31723 }, { "epoch": 0.81, "grad_norm": 1.300355315208435, "learning_rate": 1.776441935349169e-06, "loss": 0.5201, "step": 31724 }, { "epoch": 0.81, "grad_norm": 7.44308614730835, "learning_rate": 1.7759696343336363e-06, "loss": 0.5575, "step": 31725 }, { "epoch": 0.81, "grad_norm": 1.0667856931686401, "learning_rate": 1.775497389992953e-06, "loss": 0.4671, "step": 31726 }, { "epoch": 0.81, "grad_norm": 1.255176305770874, "learning_rate": 1.7750252023303726e-06, "loss": 0.5974, "step": 31727 }, { "epoch": 0.81, "grad_norm": 1.3745381832122803, "learning_rate": 1.7745530713491532e-06, "loss": 0.4367, "step": 31728 }, { "epoch": 0.81, "grad_norm": 2.4376518726348877, "learning_rate": 1.7740809970525464e-06, "loss": 0.454, "step": 31729 }, { "epoch": 0.81, "grad_norm": 1.6399002075195312, "learning_rate": 1.773608979443805e-06, "loss": 0.4463, "step": 31730 }, { "epoch": 0.81, "grad_norm": 5.651369094848633, "learning_rate": 1.7731370185261799e-06, "loss": 0.557, "step": 31731 }, { "epoch": 0.81, "grad_norm": 0.9280092120170593, "learning_rate": 1.7726651143029283e-06, "loss": 0.6161, "step": 31732 }, { "epoch": 0.81, "grad_norm": 1.7779464721679688, "learning_rate": 1.7721932667773e-06, "loss": 0.505, "step": 31733 }, { "epoch": 0.81, "grad_norm": 2.0457923412323, "learning_rate": 1.7717214759525436e-06, "loss": 0.548, "step": 31734 }, { "epoch": 0.81, "grad_norm": 1.4165592193603516, "learning_rate": 1.7712497418319164e-06, "loss": 0.4511, "step": 31735 }, { "epoch": 0.81, "grad_norm": 1.9539673328399658, "learning_rate": 1.7707780644186656e-06, "loss": 0.4237, "step": 31736 }, { "epoch": 0.81, "grad_norm": 1.2866572141647339, "learning_rate": 1.77030644371604e-06, "loss": 0.4982, "step": 31737 }, { "epoch": 0.81, "grad_norm": 1.1994295120239258, "learning_rate": 1.7698348797272947e-06, "loss": 0.5201, "step": 31738 }, { "epoch": 0.81, "grad_norm": 1.0865635871887207, "learning_rate": 1.7693633724556758e-06, "loss": 0.4781, "step": 31739 }, { "epoch": 0.81, "grad_norm": 1.1761162281036377, "learning_rate": 1.7688919219044342e-06, "loss": 0.5625, "step": 31740 }, { "epoch": 0.81, "grad_norm": 1.0285745859146118, "learning_rate": 1.7684205280768164e-06, "loss": 0.3727, "step": 31741 }, { "epoch": 0.81, "grad_norm": 1.4154542684555054, "learning_rate": 1.7679491909760737e-06, "loss": 0.3692, "step": 31742 }, { "epoch": 0.81, "grad_norm": 1.2347846031188965, "learning_rate": 1.767477910605454e-06, "loss": 0.5607, "step": 31743 }, { "epoch": 0.81, "grad_norm": 3.7701644897460938, "learning_rate": 1.7670066869682024e-06, "loss": 0.6383, "step": 31744 }, { "epoch": 0.81, "grad_norm": 7.264034748077393, "learning_rate": 1.7665355200675704e-06, "loss": 0.5304, "step": 31745 }, { "epoch": 0.81, "grad_norm": 1.3075165748596191, "learning_rate": 1.7660644099068024e-06, "loss": 0.6077, "step": 31746 }, { "epoch": 0.81, "grad_norm": 1.0741521120071411, "learning_rate": 1.7655933564891458e-06, "loss": 0.4394, "step": 31747 }, { "epoch": 0.81, "grad_norm": 4.507568836212158, "learning_rate": 1.7651223598178446e-06, "loss": 0.5467, "step": 31748 }, { "epoch": 0.81, "grad_norm": 1.0943621397018433, "learning_rate": 1.7646514198961484e-06, "loss": 0.4107, "step": 31749 }, { "epoch": 0.81, "grad_norm": 1.1696345806121826, "learning_rate": 1.7641805367273e-06, "loss": 0.381, "step": 31750 }, { "epoch": 0.81, "grad_norm": 1.5450435876846313, "learning_rate": 1.763709710314543e-06, "loss": 0.6285, "step": 31751 }, { "epoch": 0.81, "grad_norm": 1.3649988174438477, "learning_rate": 1.7632389406611271e-06, "loss": 0.4293, "step": 31752 }, { "epoch": 0.81, "grad_norm": 1.288570761680603, "learning_rate": 1.762768227770293e-06, "loss": 0.5102, "step": 31753 }, { "epoch": 0.81, "grad_norm": 1.136285424232483, "learning_rate": 1.7622975716452862e-06, "loss": 0.413, "step": 31754 }, { "epoch": 0.81, "grad_norm": 2.501939535140991, "learning_rate": 1.7618269722893454e-06, "loss": 0.7006, "step": 31755 }, { "epoch": 0.81, "grad_norm": 2.2690000534057617, "learning_rate": 1.7613564297057205e-06, "loss": 0.7013, "step": 31756 }, { "epoch": 0.81, "grad_norm": 1.4450621604919434, "learning_rate": 1.76088594389765e-06, "loss": 0.3298, "step": 31757 }, { "epoch": 0.81, "grad_norm": 2.5718400478363037, "learning_rate": 1.7604155148683755e-06, "loss": 0.6734, "step": 31758 }, { "epoch": 0.81, "grad_norm": 1.468797206878662, "learning_rate": 1.7599451426211434e-06, "loss": 0.5509, "step": 31759 }, { "epoch": 0.81, "grad_norm": 2.7798595428466797, "learning_rate": 1.7594748271591921e-06, "loss": 0.6221, "step": 31760 }, { "epoch": 0.81, "grad_norm": 1.4061833620071411, "learning_rate": 1.7590045684857616e-06, "loss": 0.5603, "step": 31761 }, { "epoch": 0.81, "grad_norm": 1.417709469795227, "learning_rate": 1.7585343666040956e-06, "loss": 0.6504, "step": 31762 }, { "epoch": 0.81, "grad_norm": 2.32963490486145, "learning_rate": 1.7580642215174326e-06, "loss": 0.5463, "step": 31763 }, { "epoch": 0.81, "grad_norm": 1.0416631698608398, "learning_rate": 1.7575941332290137e-06, "loss": 0.565, "step": 31764 }, { "epoch": 0.81, "grad_norm": 1.2892616987228394, "learning_rate": 1.7571241017420747e-06, "loss": 0.4543, "step": 31765 }, { "epoch": 0.81, "grad_norm": 0.9826864004135132, "learning_rate": 1.7566541270598613e-06, "loss": 0.4047, "step": 31766 }, { "epoch": 0.81, "grad_norm": 1.603380799293518, "learning_rate": 1.7561842091856074e-06, "loss": 0.5763, "step": 31767 }, { "epoch": 0.81, "grad_norm": 1.4641308784484863, "learning_rate": 1.7557143481225504e-06, "loss": 0.5386, "step": 31768 }, { "epoch": 0.81, "grad_norm": 1.642989993095398, "learning_rate": 1.7552445438739329e-06, "loss": 0.7294, "step": 31769 }, { "epoch": 0.81, "grad_norm": 1.3590458631515503, "learning_rate": 1.7547747964429906e-06, "loss": 0.6369, "step": 31770 }, { "epoch": 0.81, "grad_norm": 1.7802900075912476, "learning_rate": 1.7543051058329608e-06, "loss": 0.6135, "step": 31771 }, { "epoch": 0.81, "grad_norm": 1.708343505859375, "learning_rate": 1.7538354720470764e-06, "loss": 0.5902, "step": 31772 }, { "epoch": 0.81, "grad_norm": 7.101880073547363, "learning_rate": 1.753365895088579e-06, "loss": 0.6646, "step": 31773 }, { "epoch": 0.81, "grad_norm": 5.265448570251465, "learning_rate": 1.752896374960704e-06, "loss": 0.5274, "step": 31774 }, { "epoch": 0.81, "grad_norm": 1.1314858198165894, "learning_rate": 1.7524269116666825e-06, "loss": 0.4998, "step": 31775 }, { "epoch": 0.81, "grad_norm": 1.326340913772583, "learning_rate": 1.7519575052097571e-06, "loss": 0.4423, "step": 31776 }, { "epoch": 0.81, "grad_norm": 3.9029414653778076, "learning_rate": 1.7514881555931573e-06, "loss": 0.4577, "step": 31777 }, { "epoch": 0.81, "grad_norm": 1.4280236959457397, "learning_rate": 1.7510188628201164e-06, "loss": 0.49, "step": 31778 }, { "epoch": 0.81, "grad_norm": 1.7489818334579468, "learning_rate": 1.7505496268938737e-06, "loss": 0.5143, "step": 31779 }, { "epoch": 0.81, "grad_norm": 1.662878155708313, "learning_rate": 1.7500804478176603e-06, "loss": 0.4706, "step": 31780 }, { "epoch": 0.81, "grad_norm": 6.747613430023193, "learning_rate": 1.7496113255947089e-06, "loss": 0.5805, "step": 31781 }, { "epoch": 0.81, "grad_norm": 2.7917394638061523, "learning_rate": 1.7491422602282516e-06, "loss": 0.7215, "step": 31782 }, { "epoch": 0.81, "grad_norm": 1.8841252326965332, "learning_rate": 1.7486732517215231e-06, "loss": 0.5409, "step": 31783 }, { "epoch": 0.81, "grad_norm": 3.4103260040283203, "learning_rate": 1.748204300077756e-06, "loss": 0.5273, "step": 31784 }, { "epoch": 0.81, "grad_norm": 1.093553900718689, "learning_rate": 1.747735405300177e-06, "loss": 0.3846, "step": 31785 }, { "epoch": 0.81, "grad_norm": 1.7342339754104614, "learning_rate": 1.7472665673920241e-06, "loss": 0.478, "step": 31786 }, { "epoch": 0.81, "grad_norm": 1.3505691289901733, "learning_rate": 1.7467977863565244e-06, "loss": 0.5662, "step": 31787 }, { "epoch": 0.81, "grad_norm": 6.904769420623779, "learning_rate": 1.7463290621969098e-06, "loss": 0.7476, "step": 31788 }, { "epoch": 0.81, "grad_norm": 2.6960737705230713, "learning_rate": 1.7458603949164066e-06, "loss": 0.4485, "step": 31789 }, { "epoch": 0.81, "grad_norm": 1.252605676651001, "learning_rate": 1.745391784518251e-06, "loss": 0.4851, "step": 31790 }, { "epoch": 0.81, "grad_norm": 3.3629982471466064, "learning_rate": 1.7449232310056697e-06, "loss": 0.482, "step": 31791 }, { "epoch": 0.81, "grad_norm": 3.9529483318328857, "learning_rate": 1.7444547343818875e-06, "loss": 0.5699, "step": 31792 }, { "epoch": 0.81, "grad_norm": 4.029607772827148, "learning_rate": 1.7439862946501395e-06, "loss": 0.916, "step": 31793 }, { "epoch": 0.81, "grad_norm": 1.459671974182129, "learning_rate": 1.7435179118136514e-06, "loss": 0.4294, "step": 31794 }, { "epoch": 0.81, "grad_norm": 1.7734394073486328, "learning_rate": 1.7430495858756502e-06, "loss": 0.4235, "step": 31795 }, { "epoch": 0.81, "grad_norm": 0.8516790270805359, "learning_rate": 1.7425813168393612e-06, "loss": 0.3834, "step": 31796 }, { "epoch": 0.81, "grad_norm": 1.0274755954742432, "learning_rate": 1.7421131047080176e-06, "loss": 0.4782, "step": 31797 }, { "epoch": 0.82, "grad_norm": 3.010366201400757, "learning_rate": 1.741644949484841e-06, "loss": 0.4476, "step": 31798 }, { "epoch": 0.82, "grad_norm": 1.452162265777588, "learning_rate": 1.7411768511730564e-06, "loss": 0.5103, "step": 31799 }, { "epoch": 0.82, "grad_norm": 1.094502329826355, "learning_rate": 1.7407088097758962e-06, "loss": 0.5205, "step": 31800 }, { "epoch": 0.82, "grad_norm": 3.2363882064819336, "learning_rate": 1.7402408252965808e-06, "loss": 0.6485, "step": 31801 }, { "epoch": 0.82, "grad_norm": 1.5172016620635986, "learning_rate": 1.7397728977383333e-06, "loss": 0.5793, "step": 31802 }, { "epoch": 0.82, "grad_norm": 5.814748764038086, "learning_rate": 1.7393050271043855e-06, "loss": 0.3838, "step": 31803 }, { "epoch": 0.82, "grad_norm": 5.472024440765381, "learning_rate": 1.738837213397956e-06, "loss": 0.5637, "step": 31804 }, { "epoch": 0.82, "grad_norm": 1.541725754737854, "learning_rate": 1.738369456622271e-06, "loss": 0.5166, "step": 31805 }, { "epoch": 0.82, "grad_norm": 1.234390377998352, "learning_rate": 1.7379017567805512e-06, "loss": 0.4278, "step": 31806 }, { "epoch": 0.82, "grad_norm": 1.426103949546814, "learning_rate": 1.7374341138760231e-06, "loss": 0.648, "step": 31807 }, { "epoch": 0.82, "grad_norm": 1.4419777393341064, "learning_rate": 1.7369665279119085e-06, "loss": 0.5213, "step": 31808 }, { "epoch": 0.82, "grad_norm": 3.9441773891448975, "learning_rate": 1.7364989988914272e-06, "loss": 0.4996, "step": 31809 }, { "epoch": 0.82, "grad_norm": 1.256168007850647, "learning_rate": 1.7360315268178052e-06, "loss": 0.5124, "step": 31810 }, { "epoch": 0.82, "grad_norm": 1.5587238073349, "learning_rate": 1.7355641116942624e-06, "loss": 0.4955, "step": 31811 }, { "epoch": 0.82, "grad_norm": 2.4518303871154785, "learning_rate": 1.7350967535240194e-06, "loss": 0.391, "step": 31812 }, { "epoch": 0.82, "grad_norm": 1.5813990831375122, "learning_rate": 1.7346294523102946e-06, "loss": 0.635, "step": 31813 }, { "epoch": 0.82, "grad_norm": 1.6846277713775635, "learning_rate": 1.7341622080563124e-06, "loss": 0.5263, "step": 31814 }, { "epoch": 0.82, "grad_norm": 2.213628053665161, "learning_rate": 1.7336950207652925e-06, "loss": 0.3763, "step": 31815 }, { "epoch": 0.82, "grad_norm": 1.187894344329834, "learning_rate": 1.7332278904404499e-06, "loss": 0.5218, "step": 31816 }, { "epoch": 0.82, "grad_norm": 1.7393876314163208, "learning_rate": 1.7327608170850097e-06, "loss": 0.4476, "step": 31817 }, { "epoch": 0.82, "grad_norm": 1.2964454889297485, "learning_rate": 1.7322938007021872e-06, "loss": 0.5483, "step": 31818 }, { "epoch": 0.82, "grad_norm": 1.2951140403747559, "learning_rate": 1.7318268412952e-06, "loss": 0.433, "step": 31819 }, { "epoch": 0.82, "grad_norm": 0.9012845158576965, "learning_rate": 1.7313599388672696e-06, "loss": 0.4255, "step": 31820 }, { "epoch": 0.82, "grad_norm": 1.4127925634384155, "learning_rate": 1.7308930934216118e-06, "loss": 0.4477, "step": 31821 }, { "epoch": 0.82, "grad_norm": 1.5441923141479492, "learning_rate": 1.7304263049614433e-06, "loss": 0.3679, "step": 31822 }, { "epoch": 0.82, "grad_norm": 1.8379895687103271, "learning_rate": 1.72995957348998e-06, "loss": 0.5268, "step": 31823 }, { "epoch": 0.82, "grad_norm": 1.1149827241897583, "learning_rate": 1.7294928990104408e-06, "loss": 0.3433, "step": 31824 }, { "epoch": 0.82, "grad_norm": 2.4527904987335205, "learning_rate": 1.7290262815260405e-06, "loss": 0.5852, "step": 31825 }, { "epoch": 0.82, "grad_norm": 1.2304551601409912, "learning_rate": 1.7285597210399928e-06, "loss": 0.4852, "step": 31826 }, { "epoch": 0.82, "grad_norm": 1.5583014488220215, "learning_rate": 1.7280932175555175e-06, "loss": 0.3667, "step": 31827 }, { "epoch": 0.82, "grad_norm": 1.4553929567337036, "learning_rate": 1.7276267710758266e-06, "loss": 0.5597, "step": 31828 }, { "epoch": 0.82, "grad_norm": 3.6755082607269287, "learning_rate": 1.7271603816041339e-06, "loss": 0.6776, "step": 31829 }, { "epoch": 0.82, "grad_norm": 1.9771620035171509, "learning_rate": 1.7266940491436534e-06, "loss": 0.5948, "step": 31830 }, { "epoch": 0.82, "grad_norm": 1.3719528913497925, "learning_rate": 1.7262277736976008e-06, "loss": 0.5404, "step": 31831 }, { "epoch": 0.82, "grad_norm": 0.9805183410644531, "learning_rate": 1.7257615552691897e-06, "loss": 0.5219, "step": 31832 }, { "epoch": 0.82, "grad_norm": 2.3905396461486816, "learning_rate": 1.725295393861628e-06, "loss": 0.5688, "step": 31833 }, { "epoch": 0.82, "grad_norm": 1.432507038116455, "learning_rate": 1.7248292894781349e-06, "loss": 0.4861, "step": 31834 }, { "epoch": 0.82, "grad_norm": 1.3466496467590332, "learning_rate": 1.7243632421219181e-06, "loss": 0.4884, "step": 31835 }, { "epoch": 0.82, "grad_norm": 2.341179370880127, "learning_rate": 1.7238972517961917e-06, "loss": 0.4829, "step": 31836 }, { "epoch": 0.82, "grad_norm": 4.125147819519043, "learning_rate": 1.723431318504164e-06, "loss": 0.4744, "step": 31837 }, { "epoch": 0.82, "grad_norm": 1.275427222251892, "learning_rate": 1.7229654422490494e-06, "loss": 0.3974, "step": 31838 }, { "epoch": 0.82, "grad_norm": 1.3359808921813965, "learning_rate": 1.7224996230340563e-06, "loss": 0.4656, "step": 31839 }, { "epoch": 0.82, "grad_norm": 0.9448849558830261, "learning_rate": 1.7220338608623932e-06, "loss": 0.4116, "step": 31840 }, { "epoch": 0.82, "grad_norm": 1.699597716331482, "learning_rate": 1.7215681557372754e-06, "loss": 0.5839, "step": 31841 }, { "epoch": 0.82, "grad_norm": 1.9017435312271118, "learning_rate": 1.7211025076619082e-06, "loss": 0.5712, "step": 31842 }, { "epoch": 0.82, "grad_norm": 2.2343251705169678, "learning_rate": 1.7206369166394988e-06, "loss": 0.4664, "step": 31843 }, { "epoch": 0.82, "grad_norm": 2.028273344039917, "learning_rate": 1.7201713826732601e-06, "loss": 0.6242, "step": 31844 }, { "epoch": 0.82, "grad_norm": 1.1800036430358887, "learning_rate": 1.719705905766399e-06, "loss": 0.6277, "step": 31845 }, { "epoch": 0.82, "grad_norm": 2.2737464904785156, "learning_rate": 1.7192404859221224e-06, "loss": 0.6442, "step": 31846 }, { "epoch": 0.82, "grad_norm": 0.831371545791626, "learning_rate": 1.7187751231436356e-06, "loss": 0.5603, "step": 31847 }, { "epoch": 0.82, "grad_norm": 1.4448438882827759, "learning_rate": 1.7183098174341506e-06, "loss": 0.4957, "step": 31848 }, { "epoch": 0.82, "grad_norm": 2.5349559783935547, "learning_rate": 1.7178445687968704e-06, "loss": 0.5475, "step": 31849 }, { "epoch": 0.82, "grad_norm": 1.3274496793746948, "learning_rate": 1.717379377235e-06, "loss": 0.4975, "step": 31850 }, { "epoch": 0.82, "grad_norm": 3.2436702251434326, "learning_rate": 1.7169142427517504e-06, "loss": 0.5251, "step": 31851 }, { "epoch": 0.82, "grad_norm": 1.7431751489639282, "learning_rate": 1.7164491653503234e-06, "loss": 0.5376, "step": 31852 }, { "epoch": 0.82, "grad_norm": 1.9230350255966187, "learning_rate": 1.7159841450339242e-06, "loss": 0.5261, "step": 31853 }, { "epoch": 0.82, "grad_norm": 1.284264087677002, "learning_rate": 1.715519181805756e-06, "loss": 0.5352, "step": 31854 }, { "epoch": 0.82, "grad_norm": 1.0449391603469849, "learning_rate": 1.7150542756690257e-06, "loss": 0.4539, "step": 31855 }, { "epoch": 0.82, "grad_norm": 2.2996063232421875, "learning_rate": 1.714589426626938e-06, "loss": 0.6115, "step": 31856 }, { "epoch": 0.82, "grad_norm": 1.1228584051132202, "learning_rate": 1.7141246346826912e-06, "loss": 0.5136, "step": 31857 }, { "epoch": 0.82, "grad_norm": 1.8888473510742188, "learning_rate": 1.713659899839495e-06, "loss": 0.5996, "step": 31858 }, { "epoch": 0.82, "grad_norm": 1.8103193044662476, "learning_rate": 1.7131952221005498e-06, "loss": 0.4329, "step": 31859 }, { "epoch": 0.82, "grad_norm": 1.5497606992721558, "learning_rate": 1.7127306014690526e-06, "loss": 0.5086, "step": 31860 }, { "epoch": 0.82, "grad_norm": 1.3913335800170898, "learning_rate": 1.7122660379482126e-06, "loss": 0.5867, "step": 31861 }, { "epoch": 0.82, "grad_norm": 9.645081520080566, "learning_rate": 1.7118015315412274e-06, "loss": 0.6089, "step": 31862 }, { "epoch": 0.82, "grad_norm": 5.107088565826416, "learning_rate": 1.7113370822512965e-06, "loss": 0.6704, "step": 31863 }, { "epoch": 0.82, "grad_norm": 1.2231720685958862, "learning_rate": 1.7108726900816263e-06, "loss": 0.4661, "step": 31864 }, { "epoch": 0.82, "grad_norm": 1.498829960823059, "learning_rate": 1.7104083550354133e-06, "loss": 0.6063, "step": 31865 }, { "epoch": 0.82, "grad_norm": 1.410103440284729, "learning_rate": 1.709944077115857e-06, "loss": 0.4942, "step": 31866 }, { "epoch": 0.82, "grad_norm": 1.315416693687439, "learning_rate": 1.709479856326156e-06, "loss": 0.4667, "step": 31867 }, { "epoch": 0.82, "grad_norm": 1.5761456489562988, "learning_rate": 1.7090156926695146e-06, "loss": 0.6727, "step": 31868 }, { "epoch": 0.82, "grad_norm": 16.216157913208008, "learning_rate": 1.7085515861491275e-06, "loss": 0.5853, "step": 31869 }, { "epoch": 0.82, "grad_norm": 1.5857481956481934, "learning_rate": 1.7080875367681904e-06, "loss": 0.4377, "step": 31870 }, { "epoch": 0.82, "grad_norm": 16.47154998779297, "learning_rate": 1.7076235445299083e-06, "loss": 0.809, "step": 31871 }, { "epoch": 0.82, "grad_norm": 0.9997126460075378, "learning_rate": 1.7071596094374744e-06, "loss": 0.416, "step": 31872 }, { "epoch": 0.82, "grad_norm": 1.3677786588668823, "learning_rate": 1.7066957314940836e-06, "loss": 0.5555, "step": 31873 }, { "epoch": 0.82, "grad_norm": 2.0466463565826416, "learning_rate": 1.706231910702938e-06, "loss": 0.5983, "step": 31874 }, { "epoch": 0.82, "grad_norm": 1.0901081562042236, "learning_rate": 1.7057681470672316e-06, "loss": 0.5185, "step": 31875 }, { "epoch": 0.82, "grad_norm": 1.3758188486099243, "learning_rate": 1.7053044405901598e-06, "loss": 0.6336, "step": 31876 }, { "epoch": 0.82, "grad_norm": 1.9912781715393066, "learning_rate": 1.7048407912749164e-06, "loss": 0.5223, "step": 31877 }, { "epoch": 0.82, "grad_norm": 1.077539324760437, "learning_rate": 1.7043771991247016e-06, "loss": 0.497, "step": 31878 }, { "epoch": 0.82, "grad_norm": 9.666474342346191, "learning_rate": 1.7039136641427068e-06, "loss": 0.5638, "step": 31879 }, { "epoch": 0.82, "grad_norm": 1.2300868034362793, "learning_rate": 1.703450186332124e-06, "loss": 0.5032, "step": 31880 }, { "epoch": 0.82, "grad_norm": 1.1002405881881714, "learning_rate": 1.7029867656961529e-06, "loss": 0.441, "step": 31881 }, { "epoch": 0.82, "grad_norm": 4.5954413414001465, "learning_rate": 1.7025234022379845e-06, "loss": 0.6714, "step": 31882 }, { "epoch": 0.82, "grad_norm": 1.487349033355713, "learning_rate": 1.7020600959608125e-06, "loss": 0.2969, "step": 31883 }, { "epoch": 0.82, "grad_norm": 1.6818422079086304, "learning_rate": 1.7015968468678256e-06, "loss": 0.6291, "step": 31884 }, { "epoch": 0.82, "grad_norm": 1.104443073272705, "learning_rate": 1.7011336549622227e-06, "loss": 0.5071, "step": 31885 }, { "epoch": 0.82, "grad_norm": 4.948512077331543, "learning_rate": 1.7006705202471929e-06, "loss": 0.4468, "step": 31886 }, { "epoch": 0.82, "grad_norm": 3.272491931915283, "learning_rate": 1.7002074427259241e-06, "loss": 0.4459, "step": 31887 }, { "epoch": 0.82, "grad_norm": 0.8953377604484558, "learning_rate": 1.6997444224016145e-06, "loss": 0.3705, "step": 31888 }, { "epoch": 0.82, "grad_norm": 0.9829303622245789, "learning_rate": 1.699281459277452e-06, "loss": 0.2905, "step": 31889 }, { "epoch": 0.82, "grad_norm": 1.529369831085205, "learning_rate": 1.6988185533566259e-06, "loss": 0.4537, "step": 31890 }, { "epoch": 0.82, "grad_norm": 0.9424073696136475, "learning_rate": 1.698355704642325e-06, "loss": 0.5217, "step": 31891 }, { "epoch": 0.82, "grad_norm": 1.0932040214538574, "learning_rate": 1.6978929131377431e-06, "loss": 0.4431, "step": 31892 }, { "epoch": 0.82, "grad_norm": 1.5317890644073486, "learning_rate": 1.6974301788460678e-06, "loss": 0.6201, "step": 31893 }, { "epoch": 0.82, "grad_norm": 2.4495251178741455, "learning_rate": 1.6969675017704844e-06, "loss": 0.4372, "step": 31894 }, { "epoch": 0.82, "grad_norm": 1.6610026359558105, "learning_rate": 1.6965048819141883e-06, "loss": 0.493, "step": 31895 }, { "epoch": 0.82, "grad_norm": 5.638103485107422, "learning_rate": 1.6960423192803633e-06, "loss": 0.8068, "step": 31896 }, { "epoch": 0.82, "grad_norm": 1.0787228345870972, "learning_rate": 1.6955798138721945e-06, "loss": 0.4575, "step": 31897 }, { "epoch": 0.82, "grad_norm": 1.4883954524993896, "learning_rate": 1.6951173656928743e-06, "loss": 0.372, "step": 31898 }, { "epoch": 0.82, "grad_norm": 1.2335890531539917, "learning_rate": 1.6946549747455887e-06, "loss": 0.6537, "step": 31899 }, { "epoch": 0.82, "grad_norm": 1.0762053728103638, "learning_rate": 1.6941926410335219e-06, "loss": 0.4867, "step": 31900 }, { "epoch": 0.82, "grad_norm": 1.0498586893081665, "learning_rate": 1.6937303645598602e-06, "loss": 0.524, "step": 31901 }, { "epoch": 0.82, "grad_norm": 1.333977460861206, "learning_rate": 1.6932681453277921e-06, "loss": 0.4155, "step": 31902 }, { "epoch": 0.82, "grad_norm": 2.1065282821655273, "learning_rate": 1.6928059833405008e-06, "loss": 0.3816, "step": 31903 }, { "epoch": 0.82, "grad_norm": 2.35164737701416, "learning_rate": 1.6923438786011692e-06, "loss": 0.4672, "step": 31904 }, { "epoch": 0.82, "grad_norm": 1.9588980674743652, "learning_rate": 1.6918818311129869e-06, "loss": 0.6502, "step": 31905 }, { "epoch": 0.82, "grad_norm": 3.3673572540283203, "learning_rate": 1.6914198408791349e-06, "loss": 0.5242, "step": 31906 }, { "epoch": 0.82, "grad_norm": 1.6893432140350342, "learning_rate": 1.6909579079027983e-06, "loss": 0.461, "step": 31907 }, { "epoch": 0.82, "grad_norm": 1.2615526914596558, "learning_rate": 1.6904960321871565e-06, "loss": 0.5373, "step": 31908 }, { "epoch": 0.82, "grad_norm": 1.4033321142196655, "learning_rate": 1.6900342137353976e-06, "loss": 0.4099, "step": 31909 }, { "epoch": 0.82, "grad_norm": 3.045968770980835, "learning_rate": 1.6895724525507028e-06, "loss": 0.5461, "step": 31910 }, { "epoch": 0.82, "grad_norm": 7.207971096038818, "learning_rate": 1.6891107486362512e-06, "loss": 0.7235, "step": 31911 }, { "epoch": 0.82, "grad_norm": 2.717961072921753, "learning_rate": 1.688649101995229e-06, "loss": 0.6146, "step": 31912 }, { "epoch": 0.82, "grad_norm": 0.9644241333007812, "learning_rate": 1.6881875126308155e-06, "loss": 0.4163, "step": 31913 }, { "epoch": 0.82, "grad_norm": 1.8450696468353271, "learning_rate": 1.6877259805461888e-06, "loss": 0.5552, "step": 31914 }, { "epoch": 0.82, "grad_norm": 1.9463821649551392, "learning_rate": 1.6872645057445348e-06, "loss": 0.6161, "step": 31915 }, { "epoch": 0.82, "grad_norm": 1.579522728919983, "learning_rate": 1.686803088229032e-06, "loss": 0.5522, "step": 31916 }, { "epoch": 0.82, "grad_norm": 2.318723678588867, "learning_rate": 1.6863417280028594e-06, "loss": 0.4327, "step": 31917 }, { "epoch": 0.82, "grad_norm": 2.4215829372406006, "learning_rate": 1.685880425069193e-06, "loss": 0.7033, "step": 31918 }, { "epoch": 0.82, "grad_norm": 1.5957540273666382, "learning_rate": 1.6854191794312192e-06, "loss": 0.5614, "step": 31919 }, { "epoch": 0.82, "grad_norm": 6.078890800476074, "learning_rate": 1.684957991092111e-06, "loss": 0.4386, "step": 31920 }, { "epoch": 0.82, "grad_norm": 1.6980406045913696, "learning_rate": 1.6844968600550472e-06, "loss": 0.4827, "step": 31921 }, { "epoch": 0.82, "grad_norm": 1.2916966676712036, "learning_rate": 1.6840357863232082e-06, "loss": 0.5294, "step": 31922 }, { "epoch": 0.82, "grad_norm": 7.263705253601074, "learning_rate": 1.6835747698997706e-06, "loss": 0.5177, "step": 31923 }, { "epoch": 0.82, "grad_norm": 1.605772852897644, "learning_rate": 1.6831138107879108e-06, "loss": 0.4624, "step": 31924 }, { "epoch": 0.82, "grad_norm": 1.6095476150512695, "learning_rate": 1.6826529089908029e-06, "loss": 0.5708, "step": 31925 }, { "epoch": 0.82, "grad_norm": 1.6414138078689575, "learning_rate": 1.6821920645116286e-06, "loss": 0.4231, "step": 31926 }, { "epoch": 0.82, "grad_norm": 1.2549264430999756, "learning_rate": 1.6817312773535598e-06, "loss": 0.4905, "step": 31927 }, { "epoch": 0.82, "grad_norm": 1.4914236068725586, "learning_rate": 1.681270547519771e-06, "loss": 0.5984, "step": 31928 }, { "epoch": 0.82, "grad_norm": 1.6998116970062256, "learning_rate": 1.6808098750134416e-06, "loss": 0.6435, "step": 31929 }, { "epoch": 0.82, "grad_norm": 2.46567440032959, "learning_rate": 1.6803492598377447e-06, "loss": 0.4995, "step": 31930 }, { "epoch": 0.82, "grad_norm": 9.079229354858398, "learning_rate": 1.6798887019958532e-06, "loss": 0.537, "step": 31931 }, { "epoch": 0.82, "grad_norm": 1.5871249437332153, "learning_rate": 1.6794282014909391e-06, "loss": 0.5213, "step": 31932 }, { "epoch": 0.82, "grad_norm": 1.7772092819213867, "learning_rate": 1.678967758326181e-06, "loss": 0.5363, "step": 31933 }, { "epoch": 0.82, "grad_norm": 1.194597840309143, "learning_rate": 1.6785073725047486e-06, "loss": 0.4143, "step": 31934 }, { "epoch": 0.82, "grad_norm": 1.1217166185379028, "learning_rate": 1.6780470440298135e-06, "loss": 0.4095, "step": 31935 }, { "epoch": 0.82, "grad_norm": 1.0755352973937988, "learning_rate": 1.6775867729045525e-06, "loss": 0.4575, "step": 31936 }, { "epoch": 0.82, "grad_norm": 2.9413514137268066, "learning_rate": 1.6771265591321339e-06, "loss": 0.6947, "step": 31937 }, { "epoch": 0.82, "grad_norm": 0.9397451877593994, "learning_rate": 1.6766664027157287e-06, "loss": 0.4752, "step": 31938 }, { "epoch": 0.82, "grad_norm": 1.2742584943771362, "learning_rate": 1.6762063036585108e-06, "loss": 0.5779, "step": 31939 }, { "epoch": 0.82, "grad_norm": 1.102226972579956, "learning_rate": 1.67574626196365e-06, "loss": 0.5967, "step": 31940 }, { "epoch": 0.82, "grad_norm": 3.403630495071411, "learning_rate": 1.675286277634316e-06, "loss": 0.3081, "step": 31941 }, { "epoch": 0.82, "grad_norm": 0.9226744174957275, "learning_rate": 1.6748263506736772e-06, "loss": 0.4584, "step": 31942 }, { "epoch": 0.82, "grad_norm": 0.995097279548645, "learning_rate": 1.6743664810849069e-06, "loss": 0.5027, "step": 31943 }, { "epoch": 0.82, "grad_norm": 1.1207127571105957, "learning_rate": 1.6739066688711713e-06, "loss": 0.5147, "step": 31944 }, { "epoch": 0.82, "grad_norm": 1.0727102756500244, "learning_rate": 1.673446914035639e-06, "loss": 0.6278, "step": 31945 }, { "epoch": 0.82, "grad_norm": 1.45749831199646, "learning_rate": 1.6729872165814808e-06, "loss": 0.59, "step": 31946 }, { "epoch": 0.82, "grad_norm": 2.8171002864837646, "learning_rate": 1.6725275765118632e-06, "loss": 0.4883, "step": 31947 }, { "epoch": 0.82, "grad_norm": 1.1308382749557495, "learning_rate": 1.6720679938299533e-06, "loss": 0.5442, "step": 31948 }, { "epoch": 0.82, "grad_norm": 3.1167080402374268, "learning_rate": 1.6716084685389167e-06, "loss": 0.6647, "step": 31949 }, { "epoch": 0.82, "grad_norm": 0.917786180973053, "learning_rate": 1.671149000641925e-06, "loss": 0.4191, "step": 31950 }, { "epoch": 0.82, "grad_norm": 1.5202125310897827, "learning_rate": 1.6706895901421415e-06, "loss": 0.5086, "step": 31951 }, { "epoch": 0.82, "grad_norm": 4.14221715927124, "learning_rate": 1.6702302370427292e-06, "loss": 0.5324, "step": 31952 }, { "epoch": 0.82, "grad_norm": 1.363385796546936, "learning_rate": 1.6697709413468598e-06, "loss": 0.501, "step": 31953 }, { "epoch": 0.82, "grad_norm": 1.7954871654510498, "learning_rate": 1.6693117030576955e-06, "loss": 0.4757, "step": 31954 }, { "epoch": 0.82, "grad_norm": 1.4865440130233765, "learning_rate": 1.668852522178399e-06, "loss": 0.4684, "step": 31955 }, { "epoch": 0.82, "grad_norm": 1.4589817523956299, "learning_rate": 1.6683933987121393e-06, "loss": 0.4152, "step": 31956 }, { "epoch": 0.82, "grad_norm": 1.3981796503067017, "learning_rate": 1.6679343326620767e-06, "loss": 0.7225, "step": 31957 }, { "epoch": 0.82, "grad_norm": 1.0997411012649536, "learning_rate": 1.6674753240313768e-06, "loss": 0.5108, "step": 31958 }, { "epoch": 0.82, "grad_norm": 5.898675918579102, "learning_rate": 1.6670163728231991e-06, "loss": 0.8814, "step": 31959 }, { "epoch": 0.82, "grad_norm": 1.0885539054870605, "learning_rate": 1.6665574790407125e-06, "loss": 0.5353, "step": 31960 }, { "epoch": 0.82, "grad_norm": 1.2003315687179565, "learning_rate": 1.6660986426870762e-06, "loss": 0.4811, "step": 31961 }, { "epoch": 0.82, "grad_norm": 1.4170516729354858, "learning_rate": 1.6656398637654491e-06, "loss": 0.5609, "step": 31962 }, { "epoch": 0.82, "grad_norm": 1.3007696866989136, "learning_rate": 1.6651811422789988e-06, "loss": 0.5426, "step": 31963 }, { "epoch": 0.82, "grad_norm": 2.3424835205078125, "learning_rate": 1.6647224782308825e-06, "loss": 0.5988, "step": 31964 }, { "epoch": 0.82, "grad_norm": 3.6961796283721924, "learning_rate": 1.664263871624263e-06, "loss": 0.6035, "step": 31965 }, { "epoch": 0.82, "grad_norm": 0.984539806842804, "learning_rate": 1.6638053224622973e-06, "loss": 0.4808, "step": 31966 }, { "epoch": 0.82, "grad_norm": 1.3825950622558594, "learning_rate": 1.6633468307481504e-06, "loss": 0.5122, "step": 31967 }, { "epoch": 0.82, "grad_norm": 1.8667503595352173, "learning_rate": 1.6628883964849806e-06, "loss": 0.4952, "step": 31968 }, { "epoch": 0.82, "grad_norm": 1.2019089460372925, "learning_rate": 1.6624300196759424e-06, "loss": 0.4985, "step": 31969 }, { "epoch": 0.82, "grad_norm": 1.2984294891357422, "learning_rate": 1.6619717003242008e-06, "loss": 0.4123, "step": 31970 }, { "epoch": 0.82, "grad_norm": 3.709602117538452, "learning_rate": 1.6615134384329123e-06, "loss": 0.6084, "step": 31971 }, { "epoch": 0.82, "grad_norm": 1.0856854915618896, "learning_rate": 1.6610552340052343e-06, "loss": 0.4442, "step": 31972 }, { "epoch": 0.82, "grad_norm": 1.6397886276245117, "learning_rate": 1.6605970870443222e-06, "loss": 0.5473, "step": 31973 }, { "epoch": 0.82, "grad_norm": 1.263280987739563, "learning_rate": 1.6601389975533377e-06, "loss": 0.4779, "step": 31974 }, { "epoch": 0.82, "grad_norm": 2.259385108947754, "learning_rate": 1.6596809655354363e-06, "loss": 0.4957, "step": 31975 }, { "epoch": 0.82, "grad_norm": 1.5941206216812134, "learning_rate": 1.6592229909937707e-06, "loss": 0.5473, "step": 31976 }, { "epoch": 0.82, "grad_norm": 1.2557189464569092, "learning_rate": 1.658765073931503e-06, "loss": 0.5677, "step": 31977 }, { "epoch": 0.82, "grad_norm": 1.7234790325164795, "learning_rate": 1.6583072143517853e-06, "loss": 0.5379, "step": 31978 }, { "epoch": 0.82, "grad_norm": 2.8436102867126465, "learning_rate": 1.6578494122577715e-06, "loss": 0.5706, "step": 31979 }, { "epoch": 0.82, "grad_norm": 1.475245475769043, "learning_rate": 1.6573916676526202e-06, "loss": 0.4002, "step": 31980 }, { "epoch": 0.82, "grad_norm": 1.755034327507019, "learning_rate": 1.6569339805394856e-06, "loss": 0.4892, "step": 31981 }, { "epoch": 0.82, "grad_norm": 8.133810997009277, "learning_rate": 1.6564763509215199e-06, "loss": 0.5786, "step": 31982 }, { "epoch": 0.82, "grad_norm": 2.084718704223633, "learning_rate": 1.6560187788018745e-06, "loss": 0.4283, "step": 31983 }, { "epoch": 0.82, "grad_norm": 1.5288941860198975, "learning_rate": 1.655561264183707e-06, "loss": 0.5912, "step": 31984 }, { "epoch": 0.82, "grad_norm": 3.8937957286834717, "learning_rate": 1.6551038070701697e-06, "loss": 0.6858, "step": 31985 }, { "epoch": 0.82, "grad_norm": 1.8224294185638428, "learning_rate": 1.654646407464412e-06, "loss": 0.5808, "step": 31986 }, { "epoch": 0.82, "grad_norm": 1.7988909482955933, "learning_rate": 1.6541890653695901e-06, "loss": 0.5967, "step": 31987 }, { "epoch": 0.82, "grad_norm": 4.793925762176514, "learning_rate": 1.6537317807888543e-06, "loss": 0.4681, "step": 31988 }, { "epoch": 0.82, "grad_norm": 1.612502098083496, "learning_rate": 1.6532745537253558e-06, "loss": 0.4571, "step": 31989 }, { "epoch": 0.82, "grad_norm": 1.769356369972229, "learning_rate": 1.6528173841822426e-06, "loss": 0.5596, "step": 31990 }, { "epoch": 0.82, "grad_norm": 1.2375805377960205, "learning_rate": 1.6523602721626696e-06, "loss": 0.497, "step": 31991 }, { "epoch": 0.82, "grad_norm": 2.8091108798980713, "learning_rate": 1.6519032176697847e-06, "loss": 0.505, "step": 31992 }, { "epoch": 0.82, "grad_norm": 1.4665803909301758, "learning_rate": 1.651446220706736e-06, "loss": 0.4607, "step": 31993 }, { "epoch": 0.82, "grad_norm": 1.436022162437439, "learning_rate": 1.6509892812766782e-06, "loss": 0.5536, "step": 31994 }, { "epoch": 0.82, "grad_norm": 1.7557213306427002, "learning_rate": 1.650532399382756e-06, "loss": 0.5224, "step": 31995 }, { "epoch": 0.82, "grad_norm": 3.518709897994995, "learning_rate": 1.650075575028116e-06, "loss": 0.6202, "step": 31996 }, { "epoch": 0.82, "grad_norm": 0.9388250112533569, "learning_rate": 1.6496188082159126e-06, "loss": 0.4451, "step": 31997 }, { "epoch": 0.82, "grad_norm": 1.0656105279922485, "learning_rate": 1.6491620989492895e-06, "loss": 0.4528, "step": 31998 }, { "epoch": 0.82, "grad_norm": 1.4824012517929077, "learning_rate": 1.6487054472313958e-06, "loss": 0.5645, "step": 31999 }, { "epoch": 0.82, "grad_norm": 3.4914190769195557, "learning_rate": 1.648248853065374e-06, "loss": 0.6526, "step": 32000 }, { "epoch": 0.82, "grad_norm": 1.938307762145996, "learning_rate": 1.6477923164543764e-06, "loss": 0.6015, "step": 32001 }, { "epoch": 0.82, "grad_norm": 1.1093493700027466, "learning_rate": 1.647335837401547e-06, "loss": 0.5827, "step": 32002 }, { "epoch": 0.82, "grad_norm": 1.4824719429016113, "learning_rate": 1.6468794159100287e-06, "loss": 0.4879, "step": 32003 }, { "epoch": 0.82, "grad_norm": 2.4925239086151123, "learning_rate": 1.6464230519829727e-06, "loss": 0.3993, "step": 32004 }, { "epoch": 0.82, "grad_norm": 1.2630137205123901, "learning_rate": 1.6459667456235195e-06, "loss": 0.4174, "step": 32005 }, { "epoch": 0.82, "grad_norm": 1.0096663236618042, "learning_rate": 1.6455104968348157e-06, "loss": 0.4972, "step": 32006 }, { "epoch": 0.82, "grad_norm": 1.4267446994781494, "learning_rate": 1.645054305620003e-06, "loss": 0.5193, "step": 32007 }, { "epoch": 0.82, "grad_norm": 1.7981961965560913, "learning_rate": 1.644598171982229e-06, "loss": 0.5411, "step": 32008 }, { "epoch": 0.82, "grad_norm": 1.3978074789047241, "learning_rate": 1.6441420959246345e-06, "loss": 0.5181, "step": 32009 }, { "epoch": 0.82, "grad_norm": 1.5272659063339233, "learning_rate": 1.6436860774503605e-06, "loss": 0.65, "step": 32010 }, { "epoch": 0.82, "grad_norm": 1.3941404819488525, "learning_rate": 1.6432301165625552e-06, "loss": 0.4472, "step": 32011 }, { "epoch": 0.82, "grad_norm": 1.7415353059768677, "learning_rate": 1.6427742132643566e-06, "loss": 0.4972, "step": 32012 }, { "epoch": 0.82, "grad_norm": 1.7998405694961548, "learning_rate": 1.6423183675589083e-06, "loss": 0.5716, "step": 32013 }, { "epoch": 0.82, "grad_norm": 1.7407722473144531, "learning_rate": 1.6418625794493493e-06, "loss": 0.4632, "step": 32014 }, { "epoch": 0.82, "grad_norm": 1.243897795677185, "learning_rate": 1.6414068489388235e-06, "loss": 0.5421, "step": 32015 }, { "epoch": 0.82, "grad_norm": 1.7449309825897217, "learning_rate": 1.640951176030471e-06, "loss": 0.4624, "step": 32016 }, { "epoch": 0.82, "grad_norm": 1.249446988105774, "learning_rate": 1.6404955607274287e-06, "loss": 0.39, "step": 32017 }, { "epoch": 0.82, "grad_norm": 1.3406237363815308, "learning_rate": 1.640040003032841e-06, "loss": 0.535, "step": 32018 }, { "epoch": 0.82, "grad_norm": 5.394318103790283, "learning_rate": 1.639584502949847e-06, "loss": 0.6666, "step": 32019 }, { "epoch": 0.82, "grad_norm": 9.014803886413574, "learning_rate": 1.6391290604815802e-06, "loss": 0.4817, "step": 32020 }, { "epoch": 0.82, "grad_norm": 1.146763563156128, "learning_rate": 1.638673675631186e-06, "loss": 0.3864, "step": 32021 }, { "epoch": 0.82, "grad_norm": 1.4768542051315308, "learning_rate": 1.6382183484017989e-06, "loss": 0.484, "step": 32022 }, { "epoch": 0.82, "grad_norm": 1.2613388299942017, "learning_rate": 1.6377630787965582e-06, "loss": 0.4879, "step": 32023 }, { "epoch": 0.82, "grad_norm": 1.5689115524291992, "learning_rate": 1.6373078668185992e-06, "loss": 0.4119, "step": 32024 }, { "epoch": 0.82, "grad_norm": 1.1772948503494263, "learning_rate": 1.636852712471062e-06, "loss": 0.5226, "step": 32025 }, { "epoch": 0.82, "grad_norm": 4.296995639801025, "learning_rate": 1.6363976157570815e-06, "loss": 0.5549, "step": 32026 }, { "epoch": 0.82, "grad_norm": 1.3063791990280151, "learning_rate": 1.635942576679791e-06, "loss": 0.5529, "step": 32027 }, { "epoch": 0.82, "grad_norm": 7.271335124969482, "learning_rate": 1.6354875952423332e-06, "loss": 0.6981, "step": 32028 }, { "epoch": 0.82, "grad_norm": 1.0799881219863892, "learning_rate": 1.6350326714478393e-06, "loss": 0.4209, "step": 32029 }, { "epoch": 0.82, "grad_norm": 1.8627363443374634, "learning_rate": 1.6345778052994444e-06, "loss": 0.4526, "step": 32030 }, { "epoch": 0.82, "grad_norm": 1.484664797782898, "learning_rate": 1.6341229968002813e-06, "loss": 0.6277, "step": 32031 }, { "epoch": 0.82, "grad_norm": 1.0103418827056885, "learning_rate": 1.6336682459534902e-06, "loss": 0.5111, "step": 32032 }, { "epoch": 0.82, "grad_norm": 1.4511535167694092, "learning_rate": 1.6332135527621995e-06, "loss": 0.6396, "step": 32033 }, { "epoch": 0.82, "grad_norm": 3.5641820430755615, "learning_rate": 1.6327589172295432e-06, "loss": 0.694, "step": 32034 }, { "epoch": 0.82, "grad_norm": 8.971020698547363, "learning_rate": 1.6323043393586579e-06, "loss": 0.4903, "step": 32035 }, { "epoch": 0.82, "grad_norm": 1.6243027448654175, "learning_rate": 1.6318498191526732e-06, "loss": 0.5755, "step": 32036 }, { "epoch": 0.82, "grad_norm": 1.5918645858764648, "learning_rate": 1.6313953566147234e-06, "loss": 0.615, "step": 32037 }, { "epoch": 0.82, "grad_norm": 1.7110718488693237, "learning_rate": 1.6309409517479357e-06, "loss": 0.5225, "step": 32038 }, { "epoch": 0.82, "grad_norm": 1.3447321653366089, "learning_rate": 1.630486604555448e-06, "loss": 0.5129, "step": 32039 }, { "epoch": 0.82, "grad_norm": 1.05314302444458, "learning_rate": 1.6300323150403885e-06, "loss": 0.3947, "step": 32040 }, { "epoch": 0.82, "grad_norm": 0.9474879503250122, "learning_rate": 1.629578083205885e-06, "loss": 0.48, "step": 32041 }, { "epoch": 0.82, "grad_norm": 1.5825988054275513, "learning_rate": 1.6291239090550726e-06, "loss": 0.5798, "step": 32042 }, { "epoch": 0.82, "grad_norm": 2.7725107669830322, "learning_rate": 1.62866979259108e-06, "loss": 0.5158, "step": 32043 }, { "epoch": 0.82, "grad_norm": 2.5516915321350098, "learning_rate": 1.6282157338170325e-06, "loss": 0.7571, "step": 32044 }, { "epoch": 0.82, "grad_norm": 1.3007577657699585, "learning_rate": 1.627761732736065e-06, "loss": 0.5717, "step": 32045 }, { "epoch": 0.82, "grad_norm": 2.1305277347564697, "learning_rate": 1.6273077893513044e-06, "loss": 0.5725, "step": 32046 }, { "epoch": 0.82, "grad_norm": 1.617931604385376, "learning_rate": 1.6268539036658782e-06, "loss": 0.5021, "step": 32047 }, { "epoch": 0.82, "grad_norm": 1.3739866018295288, "learning_rate": 1.6264000756829112e-06, "loss": 0.4785, "step": 32048 }, { "epoch": 0.82, "grad_norm": 1.0794938802719116, "learning_rate": 1.6259463054055368e-06, "loss": 0.3922, "step": 32049 }, { "epoch": 0.82, "grad_norm": 6.076328277587891, "learning_rate": 1.6254925928368793e-06, "loss": 0.56, "step": 32050 }, { "epoch": 0.82, "grad_norm": 2.0978190898895264, "learning_rate": 1.6250389379800635e-06, "loss": 0.7352, "step": 32051 }, { "epoch": 0.82, "grad_norm": 4.182042598724365, "learning_rate": 1.6245853408382194e-06, "loss": 0.5279, "step": 32052 }, { "epoch": 0.82, "grad_norm": 1.4449256658554077, "learning_rate": 1.6241318014144724e-06, "loss": 0.5612, "step": 32053 }, { "epoch": 0.82, "grad_norm": 1.4025012254714966, "learning_rate": 1.6236783197119454e-06, "loss": 0.4197, "step": 32054 }, { "epoch": 0.82, "grad_norm": 2.235853672027588, "learning_rate": 1.6232248957337636e-06, "loss": 0.3913, "step": 32055 }, { "epoch": 0.82, "grad_norm": 5.262372970581055, "learning_rate": 1.6227715294830547e-06, "loss": 0.5323, "step": 32056 }, { "epoch": 0.82, "grad_norm": 1.4284332990646362, "learning_rate": 1.6223182209629417e-06, "loss": 0.4459, "step": 32057 }, { "epoch": 0.82, "grad_norm": 1.4460852146148682, "learning_rate": 1.6218649701765455e-06, "loss": 0.593, "step": 32058 }, { "epoch": 0.82, "grad_norm": 8.687437057495117, "learning_rate": 1.6214117771269945e-06, "loss": 0.6277, "step": 32059 }, { "epoch": 0.82, "grad_norm": 2.7501060962677, "learning_rate": 1.6209586418174095e-06, "loss": 0.6309, "step": 32060 }, { "epoch": 0.82, "grad_norm": 1.9639934301376343, "learning_rate": 1.6205055642509115e-06, "loss": 0.6359, "step": 32061 }, { "epoch": 0.82, "grad_norm": 2.255167245864868, "learning_rate": 1.620052544430627e-06, "loss": 0.7162, "step": 32062 }, { "epoch": 0.82, "grad_norm": 3.0282821655273438, "learning_rate": 1.6195995823596755e-06, "loss": 0.5104, "step": 32063 }, { "epoch": 0.82, "grad_norm": 3.9189014434814453, "learning_rate": 1.6191466780411779e-06, "loss": 0.5345, "step": 32064 }, { "epoch": 0.82, "grad_norm": 1.3819360733032227, "learning_rate": 1.618693831478254e-06, "loss": 0.5691, "step": 32065 }, { "epoch": 0.82, "grad_norm": 6.4422831535339355, "learning_rate": 1.618241042674029e-06, "loss": 0.5243, "step": 32066 }, { "epoch": 0.82, "grad_norm": 1.4640133380889893, "learning_rate": 1.6177883116316217e-06, "loss": 0.4766, "step": 32067 }, { "epoch": 0.82, "grad_norm": 1.2820897102355957, "learning_rate": 1.6173356383541473e-06, "loss": 0.4799, "step": 32068 }, { "epoch": 0.82, "grad_norm": 0.9162452816963196, "learning_rate": 1.616883022844733e-06, "loss": 0.453, "step": 32069 }, { "epoch": 0.82, "grad_norm": 1.4877705574035645, "learning_rate": 1.6164304651064932e-06, "loss": 0.3327, "step": 32070 }, { "epoch": 0.82, "grad_norm": 1.230231761932373, "learning_rate": 1.6159779651425479e-06, "loss": 0.4108, "step": 32071 }, { "epoch": 0.82, "grad_norm": 1.7182962894439697, "learning_rate": 1.6155255229560119e-06, "loss": 0.6096, "step": 32072 }, { "epoch": 0.82, "grad_norm": 1.705003261566162, "learning_rate": 1.6150731385500096e-06, "loss": 0.4416, "step": 32073 }, { "epoch": 0.82, "grad_norm": 2.0481300354003906, "learning_rate": 1.6146208119276552e-06, "loss": 0.5617, "step": 32074 }, { "epoch": 0.82, "grad_norm": 1.256003737449646, "learning_rate": 1.6141685430920628e-06, "loss": 0.4565, "step": 32075 }, { "epoch": 0.82, "grad_norm": 1.0309640169143677, "learning_rate": 1.6137163320463556e-06, "loss": 0.299, "step": 32076 }, { "epoch": 0.82, "grad_norm": 1.9833122491836548, "learning_rate": 1.6132641787936465e-06, "loss": 0.5336, "step": 32077 }, { "epoch": 0.82, "grad_norm": 1.9917341470718384, "learning_rate": 1.6128120833370509e-06, "loss": 0.4901, "step": 32078 }, { "epoch": 0.82, "grad_norm": 1.3740307092666626, "learning_rate": 1.612360045679684e-06, "loss": 0.4031, "step": 32079 }, { "epoch": 0.82, "grad_norm": 4.580420970916748, "learning_rate": 1.6119080658246633e-06, "loss": 0.4875, "step": 32080 }, { "epoch": 0.82, "grad_norm": 5.496303081512451, "learning_rate": 1.6114561437751031e-06, "loss": 1.0828, "step": 32081 }, { "epoch": 0.82, "grad_norm": 1.3257249593734741, "learning_rate": 1.611004279534114e-06, "loss": 0.3245, "step": 32082 }, { "epoch": 0.82, "grad_norm": 2.8236565589904785, "learning_rate": 1.610552473104815e-06, "loss": 0.5811, "step": 32083 }, { "epoch": 0.82, "grad_norm": 1.8210190534591675, "learning_rate": 1.610100724490319e-06, "loss": 0.5356, "step": 32084 }, { "epoch": 0.82, "grad_norm": 17.425390243530273, "learning_rate": 1.6096490336937332e-06, "loss": 0.6464, "step": 32085 }, { "epoch": 0.82, "grad_norm": 1.4524931907653809, "learning_rate": 1.6091974007181788e-06, "loss": 0.4941, "step": 32086 }, { "epoch": 0.82, "grad_norm": 1.6252365112304688, "learning_rate": 1.6087458255667642e-06, "loss": 0.5794, "step": 32087 }, { "epoch": 0.82, "grad_norm": 3.4071781635284424, "learning_rate": 1.6082943082426017e-06, "loss": 0.5645, "step": 32088 }, { "epoch": 0.82, "grad_norm": 2.140657901763916, "learning_rate": 1.6078428487487997e-06, "loss": 0.6077, "step": 32089 }, { "epoch": 0.82, "grad_norm": 1.79437255859375, "learning_rate": 1.6073914470884743e-06, "loss": 0.6619, "step": 32090 }, { "epoch": 0.82, "grad_norm": 1.9859182834625244, "learning_rate": 1.6069401032647348e-06, "loss": 0.5175, "step": 32091 }, { "epoch": 0.82, "grad_norm": 1.451973795890808, "learning_rate": 1.6064888172806891e-06, "loss": 0.3914, "step": 32092 }, { "epoch": 0.82, "grad_norm": 2.217825412750244, "learning_rate": 1.6060375891394509e-06, "loss": 0.5574, "step": 32093 }, { "epoch": 0.82, "grad_norm": 2.419430732727051, "learning_rate": 1.6055864188441283e-06, "loss": 0.6197, "step": 32094 }, { "epoch": 0.82, "grad_norm": 1.3582011461257935, "learning_rate": 1.6051353063978303e-06, "loss": 0.4617, "step": 32095 }, { "epoch": 0.82, "grad_norm": 1.9190236330032349, "learning_rate": 1.6046842518036632e-06, "loss": 0.4839, "step": 32096 }, { "epoch": 0.82, "grad_norm": 1.5852694511413574, "learning_rate": 1.6042332550647399e-06, "loss": 0.4052, "step": 32097 }, { "epoch": 0.82, "grad_norm": 1.3181614875793457, "learning_rate": 1.603782316184167e-06, "loss": 0.4148, "step": 32098 }, { "epoch": 0.82, "grad_norm": 0.9312149882316589, "learning_rate": 1.6033314351650486e-06, "loss": 0.498, "step": 32099 }, { "epoch": 0.82, "grad_norm": 3.045673131942749, "learning_rate": 1.6028806120104968e-06, "loss": 0.4821, "step": 32100 }, { "epoch": 0.82, "grad_norm": 1.3192147016525269, "learning_rate": 1.6024298467236166e-06, "loss": 0.5445, "step": 32101 }, { "epoch": 0.82, "grad_norm": 1.5277868509292603, "learning_rate": 1.6019791393075135e-06, "loss": 0.5674, "step": 32102 }, { "epoch": 0.82, "grad_norm": 1.5461887121200562, "learning_rate": 1.6015284897652938e-06, "loss": 0.4301, "step": 32103 }, { "epoch": 0.82, "grad_norm": 1.4267098903656006, "learning_rate": 1.6010778981000641e-06, "loss": 0.5629, "step": 32104 }, { "epoch": 0.82, "grad_norm": 1.4381731748580933, "learning_rate": 1.6006273643149261e-06, "loss": 0.3911, "step": 32105 }, { "epoch": 0.82, "grad_norm": 1.0809195041656494, "learning_rate": 1.6001768884129897e-06, "loss": 0.4667, "step": 32106 }, { "epoch": 0.82, "grad_norm": 1.1173076629638672, "learning_rate": 1.599726470397357e-06, "loss": 0.4786, "step": 32107 }, { "epoch": 0.82, "grad_norm": 1.5632106065750122, "learning_rate": 1.5992761102711319e-06, "loss": 0.5078, "step": 32108 }, { "epoch": 0.82, "grad_norm": 14.13174819946289, "learning_rate": 1.5988258080374152e-06, "loss": 0.5861, "step": 32109 }, { "epoch": 0.82, "grad_norm": 1.5428122282028198, "learning_rate": 1.5983755636993148e-06, "loss": 0.5571, "step": 32110 }, { "epoch": 0.82, "grad_norm": 1.968512773513794, "learning_rate": 1.5979253772599324e-06, "loss": 0.6266, "step": 32111 }, { "epoch": 0.82, "grad_norm": 1.5560894012451172, "learning_rate": 1.5974752487223676e-06, "loss": 0.5114, "step": 32112 }, { "epoch": 0.82, "grad_norm": 6.01476526260376, "learning_rate": 1.5970251780897262e-06, "loss": 0.6757, "step": 32113 }, { "epoch": 0.82, "grad_norm": 2.7104525566101074, "learning_rate": 1.5965751653651086e-06, "loss": 0.5753, "step": 32114 }, { "epoch": 0.82, "grad_norm": 1.4794883728027344, "learning_rate": 1.5961252105516122e-06, "loss": 0.4584, "step": 32115 }, { "epoch": 0.82, "grad_norm": 15.187527656555176, "learning_rate": 1.595675313652344e-06, "loss": 0.4838, "step": 32116 }, { "epoch": 0.82, "grad_norm": 2.499866485595703, "learning_rate": 1.595225474670401e-06, "loss": 0.5775, "step": 32117 }, { "epoch": 0.82, "grad_norm": 10.164746284484863, "learning_rate": 1.5947756936088843e-06, "loss": 0.5558, "step": 32118 }, { "epoch": 0.82, "grad_norm": 1.1241036653518677, "learning_rate": 1.5943259704708903e-06, "loss": 0.5324, "step": 32119 }, { "epoch": 0.82, "grad_norm": 0.791911780834198, "learning_rate": 1.5938763052595218e-06, "loss": 0.4249, "step": 32120 }, { "epoch": 0.82, "grad_norm": 1.3302944898605347, "learning_rate": 1.5934266979778778e-06, "loss": 0.4991, "step": 32121 }, { "epoch": 0.82, "grad_norm": 1.5364112854003906, "learning_rate": 1.5929771486290534e-06, "loss": 0.6166, "step": 32122 }, { "epoch": 0.82, "grad_norm": 1.2761991024017334, "learning_rate": 1.5925276572161508e-06, "loss": 0.4258, "step": 32123 }, { "epoch": 0.82, "grad_norm": 2.311218738555908, "learning_rate": 1.592078223742265e-06, "loss": 0.6805, "step": 32124 }, { "epoch": 0.82, "grad_norm": 1.2593368291854858, "learning_rate": 1.5916288482104936e-06, "loss": 0.4923, "step": 32125 }, { "epoch": 0.82, "grad_norm": 1.2957572937011719, "learning_rate": 1.5911795306239319e-06, "loss": 0.4366, "step": 32126 }, { "epoch": 0.82, "grad_norm": 1.3335542678833008, "learning_rate": 1.5907302709856797e-06, "loss": 0.3939, "step": 32127 }, { "epoch": 0.82, "grad_norm": 0.8401738405227661, "learning_rate": 1.5902810692988314e-06, "loss": 0.3451, "step": 32128 }, { "epoch": 0.82, "grad_norm": 2.379868507385254, "learning_rate": 1.5898319255664796e-06, "loss": 0.7733, "step": 32129 }, { "epoch": 0.82, "grad_norm": 1.0812886953353882, "learning_rate": 1.5893828397917254e-06, "loss": 0.459, "step": 32130 }, { "epoch": 0.82, "grad_norm": 1.7454708814620972, "learning_rate": 1.588933811977661e-06, "loss": 0.588, "step": 32131 }, { "epoch": 0.82, "grad_norm": 1.2356232404708862, "learning_rate": 1.588484842127378e-06, "loss": 0.4216, "step": 32132 }, { "epoch": 0.82, "grad_norm": 1.5450761318206787, "learning_rate": 1.588035930243974e-06, "loss": 0.4871, "step": 32133 }, { "epoch": 0.82, "grad_norm": 1.3628501892089844, "learning_rate": 1.5875870763305423e-06, "loss": 0.4641, "step": 32134 }, { "epoch": 0.82, "grad_norm": 2.310875177383423, "learning_rate": 1.5871382803901757e-06, "loss": 0.52, "step": 32135 }, { "epoch": 0.82, "grad_norm": 3.891278028488159, "learning_rate": 1.586689542425963e-06, "loss": 0.7892, "step": 32136 }, { "epoch": 0.82, "grad_norm": 1.492337942123413, "learning_rate": 1.5862408624410043e-06, "loss": 0.3651, "step": 32137 }, { "epoch": 0.82, "grad_norm": 1.2819663286209106, "learning_rate": 1.5857922404383863e-06, "loss": 0.6243, "step": 32138 }, { "epoch": 0.82, "grad_norm": 2.270045280456543, "learning_rate": 1.5853436764211993e-06, "loss": 0.5128, "step": 32139 }, { "epoch": 0.82, "grad_norm": 1.156160593032837, "learning_rate": 1.5848951703925398e-06, "loss": 0.4374, "step": 32140 }, { "epoch": 0.82, "grad_norm": 1.0221279859542847, "learning_rate": 1.584446722355496e-06, "loss": 0.423, "step": 32141 }, { "epoch": 0.82, "grad_norm": 2.4551773071289062, "learning_rate": 1.5839983323131581e-06, "loss": 0.6391, "step": 32142 }, { "epoch": 0.82, "grad_norm": 1.0647414922714233, "learning_rate": 1.5835500002686143e-06, "loss": 0.5716, "step": 32143 }, { "epoch": 0.82, "grad_norm": 1.6481468677520752, "learning_rate": 1.5831017262249582e-06, "loss": 0.4587, "step": 32144 }, { "epoch": 0.82, "grad_norm": 1.4962406158447266, "learning_rate": 1.5826535101852768e-06, "loss": 0.3856, "step": 32145 }, { "epoch": 0.82, "grad_norm": 3.7160696983337402, "learning_rate": 1.582205352152657e-06, "loss": 0.5015, "step": 32146 }, { "epoch": 0.82, "grad_norm": 1.7741553783416748, "learning_rate": 1.5817572521301917e-06, "loss": 0.4051, "step": 32147 }, { "epoch": 0.82, "grad_norm": 1.8434743881225586, "learning_rate": 1.581309210120966e-06, "loss": 0.5076, "step": 32148 }, { "epoch": 0.82, "grad_norm": 1.1322849988937378, "learning_rate": 1.5808612261280688e-06, "loss": 0.4805, "step": 32149 }, { "epoch": 0.82, "grad_norm": 0.853178858757019, "learning_rate": 1.580413300154584e-06, "loss": 0.4678, "step": 32150 }, { "epoch": 0.82, "grad_norm": 2.837538719177246, "learning_rate": 1.579965432203604e-06, "loss": 0.6402, "step": 32151 }, { "epoch": 0.82, "grad_norm": 1.0970231294631958, "learning_rate": 1.5795176222782116e-06, "loss": 0.5387, "step": 32152 }, { "epoch": 0.82, "grad_norm": 7.552611827850342, "learning_rate": 1.5790698703814912e-06, "loss": 0.7852, "step": 32153 }, { "epoch": 0.82, "grad_norm": 2.0754382610321045, "learning_rate": 1.5786221765165332e-06, "loss": 0.5024, "step": 32154 }, { "epoch": 0.82, "grad_norm": 0.9940919280052185, "learning_rate": 1.5781745406864202e-06, "loss": 0.4403, "step": 32155 }, { "epoch": 0.82, "grad_norm": 2.368382453918457, "learning_rate": 1.5777269628942348e-06, "loss": 0.5083, "step": 32156 }, { "epoch": 0.82, "grad_norm": 0.982677698135376, "learning_rate": 1.577279443143066e-06, "loss": 0.4684, "step": 32157 }, { "epoch": 0.82, "grad_norm": 1.701637625694275, "learning_rate": 1.5768319814359956e-06, "loss": 0.5721, "step": 32158 }, { "epoch": 0.82, "grad_norm": 4.8831658363342285, "learning_rate": 1.5763845777761078e-06, "loss": 0.6572, "step": 32159 }, { "epoch": 0.82, "grad_norm": 1.3516862392425537, "learning_rate": 1.5759372321664824e-06, "loss": 0.4718, "step": 32160 }, { "epoch": 0.82, "grad_norm": 1.2177972793579102, "learning_rate": 1.5754899446102067e-06, "loss": 0.5904, "step": 32161 }, { "epoch": 0.82, "grad_norm": 1.7560352087020874, "learning_rate": 1.575042715110363e-06, "loss": 0.6122, "step": 32162 }, { "epoch": 0.82, "grad_norm": 4.7025604248046875, "learning_rate": 1.5745955436700289e-06, "loss": 0.4586, "step": 32163 }, { "epoch": 0.82, "grad_norm": 1.9185149669647217, "learning_rate": 1.5741484302922905e-06, "loss": 0.4929, "step": 32164 }, { "epoch": 0.82, "grad_norm": 2.905595302581787, "learning_rate": 1.5737013749802288e-06, "loss": 0.6547, "step": 32165 }, { "epoch": 0.82, "grad_norm": 1.2562456130981445, "learning_rate": 1.5732543777369225e-06, "loss": 0.6523, "step": 32166 }, { "epoch": 0.82, "grad_norm": 2.5994715690612793, "learning_rate": 1.57280743856545e-06, "loss": 0.5922, "step": 32167 }, { "epoch": 0.82, "grad_norm": 1.3803983926773071, "learning_rate": 1.5723605574688973e-06, "loss": 0.4631, "step": 32168 }, { "epoch": 0.82, "grad_norm": 1.2272098064422607, "learning_rate": 1.571913734450341e-06, "loss": 0.5236, "step": 32169 }, { "epoch": 0.82, "grad_norm": 1.2112021446228027, "learning_rate": 1.5714669695128582e-06, "loss": 0.4494, "step": 32170 }, { "epoch": 0.82, "grad_norm": 1.22311270236969, "learning_rate": 1.5710202626595307e-06, "loss": 0.5276, "step": 32171 }, { "epoch": 0.82, "grad_norm": 1.7318040132522583, "learning_rate": 1.570573613893437e-06, "loss": 0.4001, "step": 32172 }, { "epoch": 0.82, "grad_norm": 0.9787954092025757, "learning_rate": 1.5701270232176535e-06, "loss": 0.4936, "step": 32173 }, { "epoch": 0.82, "grad_norm": 1.390884518623352, "learning_rate": 1.5696804906352569e-06, "loss": 0.5162, "step": 32174 }, { "epoch": 0.82, "grad_norm": 4.51752233505249, "learning_rate": 1.569234016149327e-06, "loss": 0.6379, "step": 32175 }, { "epoch": 0.82, "grad_norm": 1.3226114511489868, "learning_rate": 1.568787599762941e-06, "loss": 0.4742, "step": 32176 }, { "epoch": 0.82, "grad_norm": 2.3053624629974365, "learning_rate": 1.5683412414791698e-06, "loss": 0.8092, "step": 32177 }, { "epoch": 0.82, "grad_norm": 1.8948427438735962, "learning_rate": 1.567894941301097e-06, "loss": 0.5586, "step": 32178 }, { "epoch": 0.82, "grad_norm": 1.206433892250061, "learning_rate": 1.567448699231795e-06, "loss": 0.4299, "step": 32179 }, { "epoch": 0.82, "grad_norm": 1.5430474281311035, "learning_rate": 1.567002515274335e-06, "loss": 0.5126, "step": 32180 }, { "epoch": 0.82, "grad_norm": 1.3443254232406616, "learning_rate": 1.5665563894317992e-06, "loss": 0.5582, "step": 32181 }, { "epoch": 0.82, "grad_norm": 0.8615291118621826, "learning_rate": 1.566110321707258e-06, "loss": 0.4901, "step": 32182 }, { "epoch": 0.82, "grad_norm": 1.922332763671875, "learning_rate": 1.565664312103785e-06, "loss": 0.5729, "step": 32183 }, { "epoch": 0.82, "grad_norm": 6.757279396057129, "learning_rate": 1.5652183606244532e-06, "loss": 0.7226, "step": 32184 }, { "epoch": 0.82, "grad_norm": 1.783290982246399, "learning_rate": 1.5647724672723397e-06, "loss": 0.7641, "step": 32185 }, { "epoch": 0.82, "grad_norm": 2.0125484466552734, "learning_rate": 1.5643266320505145e-06, "loss": 0.5572, "step": 32186 }, { "epoch": 0.82, "grad_norm": 1.5466221570968628, "learning_rate": 1.5638808549620477e-06, "loss": 0.3955, "step": 32187 }, { "epoch": 0.82, "grad_norm": 4.229511737823486, "learning_rate": 1.5634351360100165e-06, "loss": 0.8942, "step": 32188 }, { "epoch": 0.83, "grad_norm": 2.188706874847412, "learning_rate": 1.5629894751974895e-06, "loss": 0.7446, "step": 32189 }, { "epoch": 0.83, "grad_norm": 3.550941228866577, "learning_rate": 1.5625438725275389e-06, "loss": 0.5353, "step": 32190 }, { "epoch": 0.83, "grad_norm": 2.3246564865112305, "learning_rate": 1.562098328003233e-06, "loss": 0.5205, "step": 32191 }, { "epoch": 0.83, "grad_norm": 1.0610244274139404, "learning_rate": 1.5616528416276454e-06, "loss": 0.5008, "step": 32192 }, { "epoch": 0.83, "grad_norm": 1.2315232753753662, "learning_rate": 1.5612074134038457e-06, "loss": 0.5462, "step": 32193 }, { "epoch": 0.83, "grad_norm": 1.7007005214691162, "learning_rate": 1.5607620433349003e-06, "loss": 0.6042, "step": 32194 }, { "epoch": 0.83, "grad_norm": 1.5798240900039673, "learning_rate": 1.5603167314238832e-06, "loss": 0.4229, "step": 32195 }, { "epoch": 0.83, "grad_norm": 1.6178159713745117, "learning_rate": 1.559871477673861e-06, "loss": 0.4696, "step": 32196 }, { "epoch": 0.83, "grad_norm": 1.3690346479415894, "learning_rate": 1.5594262820879003e-06, "loss": 0.5443, "step": 32197 }, { "epoch": 0.83, "grad_norm": 2.061394453048706, "learning_rate": 1.5589811446690718e-06, "loss": 0.6788, "step": 32198 }, { "epoch": 0.83, "grad_norm": 1.4290496110916138, "learning_rate": 1.5585360654204429e-06, "loss": 0.5127, "step": 32199 }, { "epoch": 0.83, "grad_norm": 1.046613335609436, "learning_rate": 1.5580910443450792e-06, "loss": 0.4961, "step": 32200 }, { "epoch": 0.83, "grad_norm": 1.5572047233581543, "learning_rate": 1.557646081446047e-06, "loss": 0.5315, "step": 32201 }, { "epoch": 0.83, "grad_norm": 1.9505786895751953, "learning_rate": 1.5572011767264162e-06, "loss": 0.5686, "step": 32202 }, { "epoch": 0.83, "grad_norm": 1.2840690612792969, "learning_rate": 1.5567563301892497e-06, "loss": 0.621, "step": 32203 }, { "epoch": 0.83, "grad_norm": 0.8407716155052185, "learning_rate": 1.556311541837612e-06, "loss": 0.4259, "step": 32204 }, { "epoch": 0.83, "grad_norm": 1.2715623378753662, "learning_rate": 1.5558668116745734e-06, "loss": 0.5575, "step": 32205 }, { "epoch": 0.83, "grad_norm": 1.3286528587341309, "learning_rate": 1.5554221397031944e-06, "loss": 0.6031, "step": 32206 }, { "epoch": 0.83, "grad_norm": 1.1798938512802124, "learning_rate": 1.554977525926542e-06, "loss": 0.4325, "step": 32207 }, { "epoch": 0.83, "grad_norm": 4.899576663970947, "learning_rate": 1.5545329703476753e-06, "loss": 0.5944, "step": 32208 }, { "epoch": 0.83, "grad_norm": 1.3300244808197021, "learning_rate": 1.5540884729696636e-06, "loss": 0.5049, "step": 32209 }, { "epoch": 0.83, "grad_norm": 2.0016770362854004, "learning_rate": 1.5536440337955693e-06, "loss": 0.5508, "step": 32210 }, { "epoch": 0.83, "grad_norm": 1.1210957765579224, "learning_rate": 1.55319965282845e-06, "loss": 0.5338, "step": 32211 }, { "epoch": 0.83, "grad_norm": 1.5240572690963745, "learning_rate": 1.552755330071375e-06, "loss": 0.4895, "step": 32212 }, { "epoch": 0.83, "grad_norm": 1.4043430089950562, "learning_rate": 1.5523110655274031e-06, "loss": 0.4616, "step": 32213 }, { "epoch": 0.83, "grad_norm": 16.101423263549805, "learning_rate": 1.5518668591995956e-06, "loss": 0.4296, "step": 32214 }, { "epoch": 0.83, "grad_norm": 1.8171358108520508, "learning_rate": 1.551422711091013e-06, "loss": 0.6898, "step": 32215 }, { "epoch": 0.83, "grad_norm": 1.0466185808181763, "learning_rate": 1.5509786212047184e-06, "loss": 0.6291, "step": 32216 }, { "epoch": 0.83, "grad_norm": 1.738193154335022, "learning_rate": 1.5505345895437706e-06, "loss": 0.6536, "step": 32217 }, { "epoch": 0.83, "grad_norm": 1.1608107089996338, "learning_rate": 1.5500906161112284e-06, "loss": 0.5064, "step": 32218 }, { "epoch": 0.83, "grad_norm": 3.773533582687378, "learning_rate": 1.5496467009101546e-06, "loss": 0.6119, "step": 32219 }, { "epoch": 0.83, "grad_norm": 1.744170069694519, "learning_rate": 1.549202843943607e-06, "loss": 0.4216, "step": 32220 }, { "epoch": 0.83, "grad_norm": 1.8273190259933472, "learning_rate": 1.5487590452146416e-06, "loss": 0.4688, "step": 32221 }, { "epoch": 0.83, "grad_norm": 5.879594802856445, "learning_rate": 1.5483153047263222e-06, "loss": 0.7558, "step": 32222 }, { "epoch": 0.83, "grad_norm": 2.0900230407714844, "learning_rate": 1.5478716224817036e-06, "loss": 0.6357, "step": 32223 }, { "epoch": 0.83, "grad_norm": 1.407725214958191, "learning_rate": 1.5474279984838436e-06, "loss": 0.6706, "step": 32224 }, { "epoch": 0.83, "grad_norm": 1.2497249841690063, "learning_rate": 1.5469844327357974e-06, "loss": 0.4536, "step": 32225 }, { "epoch": 0.83, "grad_norm": 3.2978055477142334, "learning_rate": 1.546540925240625e-06, "loss": 0.689, "step": 32226 }, { "epoch": 0.83, "grad_norm": 2.000955820083618, "learning_rate": 1.5460974760013824e-06, "loss": 0.595, "step": 32227 }, { "epoch": 0.83, "grad_norm": 6.1762566566467285, "learning_rate": 1.5456540850211221e-06, "loss": 0.5784, "step": 32228 }, { "epoch": 0.83, "grad_norm": 0.9977271556854248, "learning_rate": 1.5452107523029048e-06, "loss": 0.4984, "step": 32229 }, { "epoch": 0.83, "grad_norm": 1.817330241203308, "learning_rate": 1.5447674778497834e-06, "loss": 0.5169, "step": 32230 }, { "epoch": 0.83, "grad_norm": 1.9394402503967285, "learning_rate": 1.5443242616648124e-06, "loss": 0.4853, "step": 32231 }, { "epoch": 0.83, "grad_norm": 1.3182892799377441, "learning_rate": 1.5438811037510437e-06, "loss": 0.4445, "step": 32232 }, { "epoch": 0.83, "grad_norm": 1.5873756408691406, "learning_rate": 1.543438004111536e-06, "loss": 0.5224, "step": 32233 }, { "epoch": 0.83, "grad_norm": 2.3473570346832275, "learning_rate": 1.5429949627493413e-06, "loss": 0.598, "step": 32234 }, { "epoch": 0.83, "grad_norm": 2.8079402446746826, "learning_rate": 1.5425519796675092e-06, "loss": 0.6498, "step": 32235 }, { "epoch": 0.83, "grad_norm": 1.3955272436141968, "learning_rate": 1.5421090548690976e-06, "loss": 0.4886, "step": 32236 }, { "epoch": 0.83, "grad_norm": 2.5311036109924316, "learning_rate": 1.541666188357156e-06, "loss": 0.4787, "step": 32237 }, { "epoch": 0.83, "grad_norm": 1.9055852890014648, "learning_rate": 1.5412233801347355e-06, "loss": 0.5893, "step": 32238 }, { "epoch": 0.83, "grad_norm": 1.6164162158966064, "learning_rate": 1.5407806302048922e-06, "loss": 0.5124, "step": 32239 }, { "epoch": 0.83, "grad_norm": 3.74657940864563, "learning_rate": 1.5403379385706729e-06, "loss": 0.4961, "step": 32240 }, { "epoch": 0.83, "grad_norm": 1.7345130443572998, "learning_rate": 1.5398953052351307e-06, "loss": 0.6277, "step": 32241 }, { "epoch": 0.83, "grad_norm": 2.757309913635254, "learning_rate": 1.5394527302013119e-06, "loss": 0.6921, "step": 32242 }, { "epoch": 0.83, "grad_norm": 1.4577133655548096, "learning_rate": 1.5390102134722718e-06, "loss": 0.4573, "step": 32243 }, { "epoch": 0.83, "grad_norm": 1.3682466745376587, "learning_rate": 1.538567755051057e-06, "loss": 0.4037, "step": 32244 }, { "epoch": 0.83, "grad_norm": 1.318113923072815, "learning_rate": 1.538125354940716e-06, "loss": 0.6021, "step": 32245 }, { "epoch": 0.83, "grad_norm": 1.0719108581542969, "learning_rate": 1.5376830131443e-06, "loss": 0.44, "step": 32246 }, { "epoch": 0.83, "grad_norm": 2.98221755027771, "learning_rate": 1.5372407296648572e-06, "loss": 0.4789, "step": 32247 }, { "epoch": 0.83, "grad_norm": 1.6101741790771484, "learning_rate": 1.5367985045054346e-06, "loss": 0.5257, "step": 32248 }, { "epoch": 0.83, "grad_norm": 2.1784493923187256, "learning_rate": 1.536356337669076e-06, "loss": 0.4747, "step": 32249 }, { "epoch": 0.83, "grad_norm": 1.2465052604675293, "learning_rate": 1.5359142291588359e-06, "loss": 0.4495, "step": 32250 }, { "epoch": 0.83, "grad_norm": 1.785484790802002, "learning_rate": 1.535472178977756e-06, "loss": 0.5737, "step": 32251 }, { "epoch": 0.83, "grad_norm": 1.2547721862792969, "learning_rate": 1.535030187128882e-06, "loss": 0.4431, "step": 32252 }, { "epoch": 0.83, "grad_norm": 1.725335717201233, "learning_rate": 1.5345882536152646e-06, "loss": 0.5269, "step": 32253 }, { "epoch": 0.83, "grad_norm": 14.272483825683594, "learning_rate": 1.5341463784399457e-06, "loss": 0.4712, "step": 32254 }, { "epoch": 0.83, "grad_norm": 1.5244603157043457, "learning_rate": 1.533704561605972e-06, "loss": 0.5758, "step": 32255 }, { "epoch": 0.83, "grad_norm": 1.7223800420761108, "learning_rate": 1.5332628031163843e-06, "loss": 0.5196, "step": 32256 }, { "epoch": 0.83, "grad_norm": 1.4571821689605713, "learning_rate": 1.5328211029742334e-06, "loss": 0.5718, "step": 32257 }, { "epoch": 0.83, "grad_norm": 1.2178969383239746, "learning_rate": 1.5323794611825594e-06, "loss": 0.561, "step": 32258 }, { "epoch": 0.83, "grad_norm": 1.100354552268982, "learning_rate": 1.5319378777444028e-06, "loss": 0.6103, "step": 32259 }, { "epoch": 0.83, "grad_norm": 10.159385681152344, "learning_rate": 1.5314963526628134e-06, "loss": 0.587, "step": 32260 }, { "epoch": 0.83, "grad_norm": 1.2373474836349487, "learning_rate": 1.5310548859408314e-06, "loss": 0.4528, "step": 32261 }, { "epoch": 0.83, "grad_norm": 1.1398937702178955, "learning_rate": 1.5306134775814952e-06, "loss": 0.4821, "step": 32262 }, { "epoch": 0.83, "grad_norm": 2.5783231258392334, "learning_rate": 1.5301721275878523e-06, "loss": 0.4868, "step": 32263 }, { "epoch": 0.83, "grad_norm": 1.6190155744552612, "learning_rate": 1.5297308359629425e-06, "loss": 0.5561, "step": 32264 }, { "epoch": 0.83, "grad_norm": 7.142679691314697, "learning_rate": 1.5292896027098059e-06, "loss": 0.3723, "step": 32265 }, { "epoch": 0.83, "grad_norm": 1.6837230920791626, "learning_rate": 1.5288484278314809e-06, "loss": 0.5268, "step": 32266 }, { "epoch": 0.83, "grad_norm": 2.0003867149353027, "learning_rate": 1.5284073113310126e-06, "loss": 0.6297, "step": 32267 }, { "epoch": 0.83, "grad_norm": 1.0263952016830444, "learning_rate": 1.5279662532114392e-06, "loss": 0.5885, "step": 32268 }, { "epoch": 0.83, "grad_norm": 1.6013476848602295, "learning_rate": 1.5275252534757966e-06, "loss": 0.4838, "step": 32269 }, { "epoch": 0.83, "grad_norm": 2.6457066535949707, "learning_rate": 1.5270843121271306e-06, "loss": 0.5161, "step": 32270 }, { "epoch": 0.83, "grad_norm": 1.5000783205032349, "learning_rate": 1.5266434291684761e-06, "loss": 0.5078, "step": 32271 }, { "epoch": 0.83, "grad_norm": 1.0656882524490356, "learning_rate": 1.526202604602871e-06, "loss": 0.5388, "step": 32272 }, { "epoch": 0.83, "grad_norm": 1.7010829448699951, "learning_rate": 1.5257618384333517e-06, "loss": 0.4688, "step": 32273 }, { "epoch": 0.83, "grad_norm": 15.485675811767578, "learning_rate": 1.52532113066296e-06, "loss": 0.4107, "step": 32274 }, { "epoch": 0.83, "grad_norm": 1.4094375371932983, "learning_rate": 1.5248804812947304e-06, "loss": 0.4608, "step": 32275 }, { "epoch": 0.83, "grad_norm": 1.8524588346481323, "learning_rate": 1.5244398903316981e-06, "loss": 0.721, "step": 32276 }, { "epoch": 0.83, "grad_norm": 1.428970456123352, "learning_rate": 1.5239993577769042e-06, "loss": 0.5319, "step": 32277 }, { "epoch": 0.83, "grad_norm": 1.3167730569839478, "learning_rate": 1.5235588836333803e-06, "loss": 0.3857, "step": 32278 }, { "epoch": 0.83, "grad_norm": 0.7997671961784363, "learning_rate": 1.5231184679041611e-06, "loss": 0.327, "step": 32279 }, { "epoch": 0.83, "grad_norm": 1.1119847297668457, "learning_rate": 1.5226781105922872e-06, "loss": 0.3357, "step": 32280 }, { "epoch": 0.83, "grad_norm": 1.297934651374817, "learning_rate": 1.5222378117007885e-06, "loss": 0.492, "step": 32281 }, { "epoch": 0.83, "grad_norm": 1.6859946250915527, "learning_rate": 1.5217975712327004e-06, "loss": 0.5943, "step": 32282 }, { "epoch": 0.83, "grad_norm": 4.802960395812988, "learning_rate": 1.521357389191056e-06, "loss": 0.5547, "step": 32283 }, { "epoch": 0.83, "grad_norm": 1.1979925632476807, "learning_rate": 1.5209172655788907e-06, "loss": 0.4433, "step": 32284 }, { "epoch": 0.83, "grad_norm": 1.7013616561889648, "learning_rate": 1.5204772003992373e-06, "loss": 0.5677, "step": 32285 }, { "epoch": 0.83, "grad_norm": 1.6850764751434326, "learning_rate": 1.5200371936551251e-06, "loss": 0.5227, "step": 32286 }, { "epoch": 0.83, "grad_norm": 1.1503651142120361, "learning_rate": 1.5195972453495911e-06, "loss": 0.439, "step": 32287 }, { "epoch": 0.83, "grad_norm": 1.180195689201355, "learning_rate": 1.5191573554856654e-06, "loss": 0.4011, "step": 32288 }, { "epoch": 0.83, "grad_norm": 4.042330741882324, "learning_rate": 1.5187175240663775e-06, "loss": 0.5335, "step": 32289 }, { "epoch": 0.83, "grad_norm": 1.4253309965133667, "learning_rate": 1.5182777510947589e-06, "loss": 0.5248, "step": 32290 }, { "epoch": 0.83, "grad_norm": 1.526961088180542, "learning_rate": 1.5178380365738443e-06, "loss": 0.4851, "step": 32291 }, { "epoch": 0.83, "grad_norm": 1.348052978515625, "learning_rate": 1.5173983805066594e-06, "loss": 0.6433, "step": 32292 }, { "epoch": 0.83, "grad_norm": 1.2989020347595215, "learning_rate": 1.516958782896234e-06, "loss": 0.4269, "step": 32293 }, { "epoch": 0.83, "grad_norm": 2.2041988372802734, "learning_rate": 1.5165192437456023e-06, "loss": 0.5594, "step": 32294 }, { "epoch": 0.83, "grad_norm": 1.2950787544250488, "learning_rate": 1.5160797630577883e-06, "loss": 0.4437, "step": 32295 }, { "epoch": 0.83, "grad_norm": 1.8620060682296753, "learning_rate": 1.5156403408358245e-06, "loss": 0.4661, "step": 32296 }, { "epoch": 0.83, "grad_norm": 1.8018724918365479, "learning_rate": 1.5152009770827337e-06, "loss": 0.5415, "step": 32297 }, { "epoch": 0.83, "grad_norm": 1.2694621086120605, "learning_rate": 1.514761671801549e-06, "loss": 0.5126, "step": 32298 }, { "epoch": 0.83, "grad_norm": 1.8479435443878174, "learning_rate": 1.514322424995296e-06, "loss": 0.6219, "step": 32299 }, { "epoch": 0.83, "grad_norm": 2.156712532043457, "learning_rate": 1.5138832366669997e-06, "loss": 0.4915, "step": 32300 }, { "epoch": 0.83, "grad_norm": 1.3791345357894897, "learning_rate": 1.5134441068196904e-06, "loss": 0.6246, "step": 32301 }, { "epoch": 0.83, "grad_norm": 1.1507797241210938, "learning_rate": 1.513005035456393e-06, "loss": 0.5408, "step": 32302 }, { "epoch": 0.83, "grad_norm": 1.7034955024719238, "learning_rate": 1.5125660225801309e-06, "loss": 0.485, "step": 32303 }, { "epoch": 0.83, "grad_norm": 1.5842865705490112, "learning_rate": 1.5121270681939337e-06, "loss": 0.5512, "step": 32304 }, { "epoch": 0.83, "grad_norm": 1.7550972700119019, "learning_rate": 1.5116881723008237e-06, "loss": 0.4761, "step": 32305 }, { "epoch": 0.83, "grad_norm": 1.5751410722732544, "learning_rate": 1.511249334903826e-06, "loss": 0.5928, "step": 32306 }, { "epoch": 0.83, "grad_norm": 1.3477400541305542, "learning_rate": 1.5108105560059627e-06, "loss": 0.476, "step": 32307 }, { "epoch": 0.83, "grad_norm": 1.7149113416671753, "learning_rate": 1.5103718356102614e-06, "loss": 0.4877, "step": 32308 }, { "epoch": 0.83, "grad_norm": 6.818388938903809, "learning_rate": 1.5099331737197443e-06, "loss": 0.4285, "step": 32309 }, { "epoch": 0.83, "grad_norm": 1.3465907573699951, "learning_rate": 1.509494570337431e-06, "loss": 0.5451, "step": 32310 }, { "epoch": 0.83, "grad_norm": 2.107264995574951, "learning_rate": 1.509056025466349e-06, "loss": 0.5293, "step": 32311 }, { "epoch": 0.83, "grad_norm": 2.6673598289489746, "learning_rate": 1.5086175391095182e-06, "loss": 0.4038, "step": 32312 }, { "epoch": 0.83, "grad_norm": 1.8242955207824707, "learning_rate": 1.5081791112699618e-06, "loss": 0.61, "step": 32313 }, { "epoch": 0.83, "grad_norm": 1.3399420976638794, "learning_rate": 1.5077407419506963e-06, "loss": 0.4867, "step": 32314 }, { "epoch": 0.83, "grad_norm": 1.372421383857727, "learning_rate": 1.507302431154749e-06, "loss": 0.4586, "step": 32315 }, { "epoch": 0.83, "grad_norm": 1.7620588541030884, "learning_rate": 1.506864178885138e-06, "loss": 0.5305, "step": 32316 }, { "epoch": 0.83, "grad_norm": 1.344130516052246, "learning_rate": 1.5064259851448803e-06, "loss": 0.6063, "step": 32317 }, { "epoch": 0.83, "grad_norm": 1.469260334968567, "learning_rate": 1.5059878499370017e-06, "loss": 0.4632, "step": 32318 }, { "epoch": 0.83, "grad_norm": 2.0963759422302246, "learning_rate": 1.5055497732645174e-06, "loss": 0.7165, "step": 32319 }, { "epoch": 0.83, "grad_norm": 4.046027183532715, "learning_rate": 1.5051117551304473e-06, "loss": 0.5657, "step": 32320 }, { "epoch": 0.83, "grad_norm": 1.6520087718963623, "learning_rate": 1.5046737955378088e-06, "loss": 0.4617, "step": 32321 }, { "epoch": 0.83, "grad_norm": 2.457265615463257, "learning_rate": 1.5042358944896229e-06, "loss": 0.337, "step": 32322 }, { "epoch": 0.83, "grad_norm": 7.9034905433654785, "learning_rate": 1.503798051988905e-06, "loss": 0.4778, "step": 32323 }, { "epoch": 0.83, "grad_norm": 1.298399567604065, "learning_rate": 1.5033602680386717e-06, "loss": 0.4662, "step": 32324 }, { "epoch": 0.83, "grad_norm": 1.1011042594909668, "learning_rate": 1.5029225426419424e-06, "loss": 0.369, "step": 32325 }, { "epoch": 0.83, "grad_norm": 1.1237385272979736, "learning_rate": 1.5024848758017341e-06, "loss": 0.428, "step": 32326 }, { "epoch": 0.83, "grad_norm": 2.665090560913086, "learning_rate": 1.5020472675210585e-06, "loss": 0.4011, "step": 32327 }, { "epoch": 0.83, "grad_norm": 4.567059516906738, "learning_rate": 1.5016097178029366e-06, "loss": 0.6261, "step": 32328 }, { "epoch": 0.83, "grad_norm": 1.5336322784423828, "learning_rate": 1.5011722266503802e-06, "loss": 0.5402, "step": 32329 }, { "epoch": 0.83, "grad_norm": 3.059584856033325, "learning_rate": 1.5007347940664064e-06, "loss": 0.6722, "step": 32330 }, { "epoch": 0.83, "grad_norm": 1.1112288236618042, "learning_rate": 1.5002974200540265e-06, "loss": 0.4584, "step": 32331 }, { "epoch": 0.83, "grad_norm": 1.776845932006836, "learning_rate": 1.4998601046162585e-06, "loss": 0.4662, "step": 32332 }, { "epoch": 0.83, "grad_norm": 1.3161858320236206, "learning_rate": 1.4994228477561145e-06, "loss": 0.6092, "step": 32333 }, { "epoch": 0.83, "grad_norm": 1.987632393836975, "learning_rate": 1.4989856494766052e-06, "loss": 0.6025, "step": 32334 }, { "epoch": 0.83, "grad_norm": 7.708820343017578, "learning_rate": 1.4985485097807484e-06, "loss": 0.5714, "step": 32335 }, { "epoch": 0.83, "grad_norm": 2.030365467071533, "learning_rate": 1.4981114286715549e-06, "loss": 0.542, "step": 32336 }, { "epoch": 0.83, "grad_norm": 1.826382040977478, "learning_rate": 1.4976744061520354e-06, "loss": 0.4954, "step": 32337 }, { "epoch": 0.83, "grad_norm": 1.0638837814331055, "learning_rate": 1.4972374422251989e-06, "loss": 0.5406, "step": 32338 }, { "epoch": 0.83, "grad_norm": 5.197674751281738, "learning_rate": 1.496800536894064e-06, "loss": 0.6437, "step": 32339 }, { "epoch": 0.83, "grad_norm": 1.0122616291046143, "learning_rate": 1.4963636901616363e-06, "loss": 0.495, "step": 32340 }, { "epoch": 0.83, "grad_norm": 1.8327757120132446, "learning_rate": 1.4959269020309264e-06, "loss": 0.6924, "step": 32341 }, { "epoch": 0.83, "grad_norm": 1.0042452812194824, "learning_rate": 1.4954901725049465e-06, "loss": 0.4787, "step": 32342 }, { "epoch": 0.83, "grad_norm": 6.934256076812744, "learning_rate": 1.4950535015867063e-06, "loss": 0.5696, "step": 32343 }, { "epoch": 0.83, "grad_norm": 1.6251921653747559, "learning_rate": 1.4946168892792134e-06, "loss": 0.5453, "step": 32344 }, { "epoch": 0.83, "grad_norm": 1.1522941589355469, "learning_rate": 1.4941803355854778e-06, "loss": 0.494, "step": 32345 }, { "epoch": 0.83, "grad_norm": 1.0340626239776611, "learning_rate": 1.493743840508507e-06, "loss": 0.5268, "step": 32346 }, { "epoch": 0.83, "grad_norm": 3.9286935329437256, "learning_rate": 1.4933074040513075e-06, "loss": 0.7112, "step": 32347 }, { "epoch": 0.83, "grad_norm": 1.3967605829238892, "learning_rate": 1.4928710262168911e-06, "loss": 0.4467, "step": 32348 }, { "epoch": 0.83, "grad_norm": 1.8235334157943726, "learning_rate": 1.4924347070082623e-06, "loss": 0.5943, "step": 32349 }, { "epoch": 0.83, "grad_norm": 1.5180552005767822, "learning_rate": 1.4919984464284298e-06, "loss": 0.6374, "step": 32350 }, { "epoch": 0.83, "grad_norm": 1.6584042310714722, "learning_rate": 1.4915622444803957e-06, "loss": 0.4631, "step": 32351 }, { "epoch": 0.83, "grad_norm": 1.330458402633667, "learning_rate": 1.4911261011671708e-06, "loss": 0.525, "step": 32352 }, { "epoch": 0.83, "grad_norm": 1.5396689176559448, "learning_rate": 1.4906900164917604e-06, "loss": 0.5148, "step": 32353 }, { "epoch": 0.83, "grad_norm": 1.0519468784332275, "learning_rate": 1.4902539904571646e-06, "loss": 0.5664, "step": 32354 }, { "epoch": 0.83, "grad_norm": 1.6353724002838135, "learning_rate": 1.489818023066395e-06, "loss": 0.5274, "step": 32355 }, { "epoch": 0.83, "grad_norm": 1.6655093431472778, "learning_rate": 1.489382114322453e-06, "loss": 0.5352, "step": 32356 }, { "epoch": 0.83, "grad_norm": 1.3267486095428467, "learning_rate": 1.4889462642283403e-06, "loss": 0.4707, "step": 32357 }, { "epoch": 0.83, "grad_norm": 1.4266927242279053, "learning_rate": 1.4885104727870648e-06, "loss": 0.452, "step": 32358 }, { "epoch": 0.83, "grad_norm": 1.2327048778533936, "learning_rate": 1.4880747400016283e-06, "loss": 0.5672, "step": 32359 }, { "epoch": 0.83, "grad_norm": 1.0836306810379028, "learning_rate": 1.4876390658750328e-06, "loss": 0.5369, "step": 32360 }, { "epoch": 0.83, "grad_norm": 1.712873101234436, "learning_rate": 1.4872034504102784e-06, "loss": 0.4771, "step": 32361 }, { "epoch": 0.83, "grad_norm": 1.2964328527450562, "learning_rate": 1.4867678936103713e-06, "loss": 0.4332, "step": 32362 }, { "epoch": 0.83, "grad_norm": 11.015935897827148, "learning_rate": 1.4863323954783116e-06, "loss": 0.5242, "step": 32363 }, { "epoch": 0.83, "grad_norm": 2.7884140014648438, "learning_rate": 1.4858969560170988e-06, "loss": 0.6362, "step": 32364 }, { "epoch": 0.83, "grad_norm": 1.9425766468048096, "learning_rate": 1.4854615752297364e-06, "loss": 0.5187, "step": 32365 }, { "epoch": 0.83, "grad_norm": 1.6255483627319336, "learning_rate": 1.485026253119224e-06, "loss": 0.5773, "step": 32366 }, { "epoch": 0.83, "grad_norm": 1.3776968717575073, "learning_rate": 1.4845909896885603e-06, "loss": 0.413, "step": 32367 }, { "epoch": 0.83, "grad_norm": 1.060060977935791, "learning_rate": 1.484155784940744e-06, "loss": 0.471, "step": 32368 }, { "epoch": 0.83, "grad_norm": 1.9015617370605469, "learning_rate": 1.4837206388787785e-06, "loss": 0.5398, "step": 32369 }, { "epoch": 0.83, "grad_norm": 3.4194886684417725, "learning_rate": 1.4832855515056588e-06, "loss": 0.5861, "step": 32370 }, { "epoch": 0.83, "grad_norm": 1.21876060962677, "learning_rate": 1.4828505228243828e-06, "loss": 0.4706, "step": 32371 }, { "epoch": 0.83, "grad_norm": 1.2736977338790894, "learning_rate": 1.4824155528379524e-06, "loss": 0.3994, "step": 32372 }, { "epoch": 0.83, "grad_norm": 1.1245428323745728, "learning_rate": 1.481980641549362e-06, "loss": 0.4916, "step": 32373 }, { "epoch": 0.83, "grad_norm": 1.1473822593688965, "learning_rate": 1.4815457889616081e-06, "loss": 0.4634, "step": 32374 }, { "epoch": 0.83, "grad_norm": 1.9082448482513428, "learning_rate": 1.4811109950776904e-06, "loss": 0.5213, "step": 32375 }, { "epoch": 0.83, "grad_norm": 1.7626897096633911, "learning_rate": 1.4806762599006042e-06, "loss": 0.6589, "step": 32376 }, { "epoch": 0.83, "grad_norm": 1.9270575046539307, "learning_rate": 1.480241583433344e-06, "loss": 0.4688, "step": 32377 }, { "epoch": 0.83, "grad_norm": 1.5884727239608765, "learning_rate": 1.4798069656789039e-06, "loss": 0.6265, "step": 32378 }, { "epoch": 0.83, "grad_norm": 3.4204771518707275, "learning_rate": 1.4793724066402847e-06, "loss": 0.523, "step": 32379 }, { "epoch": 0.83, "grad_norm": 1.7309743165969849, "learning_rate": 1.4789379063204768e-06, "loss": 0.4156, "step": 32380 }, { "epoch": 0.83, "grad_norm": 1.3059039115905762, "learning_rate": 1.4785034647224738e-06, "loss": 0.5519, "step": 32381 }, { "epoch": 0.83, "grad_norm": 1.1470755338668823, "learning_rate": 1.4780690818492728e-06, "loss": 0.5031, "step": 32382 }, { "epoch": 0.83, "grad_norm": 1.7675644159317017, "learning_rate": 1.4776347577038664e-06, "loss": 0.5309, "step": 32383 }, { "epoch": 0.83, "grad_norm": 1.0601048469543457, "learning_rate": 1.4772004922892458e-06, "loss": 0.4822, "step": 32384 }, { "epoch": 0.83, "grad_norm": 1.230226755142212, "learning_rate": 1.476766285608403e-06, "loss": 0.596, "step": 32385 }, { "epoch": 0.83, "grad_norm": 1.0287500619888306, "learning_rate": 1.4763321376643347e-06, "loss": 0.4835, "step": 32386 }, { "epoch": 0.83, "grad_norm": 0.9026691317558289, "learning_rate": 1.4758980484600304e-06, "loss": 0.5043, "step": 32387 }, { "epoch": 0.83, "grad_norm": 1.4700171947479248, "learning_rate": 1.4754640179984792e-06, "loss": 0.6024, "step": 32388 }, { "epoch": 0.83, "grad_norm": 1.900649070739746, "learning_rate": 1.475030046282676e-06, "loss": 0.5568, "step": 32389 }, { "epoch": 0.83, "grad_norm": 3.897368907928467, "learning_rate": 1.4745961333156111e-06, "loss": 0.4844, "step": 32390 }, { "epoch": 0.83, "grad_norm": 1.5827124118804932, "learning_rate": 1.4741622791002718e-06, "loss": 0.629, "step": 32391 }, { "epoch": 0.83, "grad_norm": 1.5659208297729492, "learning_rate": 1.4737284836396482e-06, "loss": 0.6372, "step": 32392 }, { "epoch": 0.83, "grad_norm": 1.255827784538269, "learning_rate": 1.473294746936733e-06, "loss": 0.5498, "step": 32393 }, { "epoch": 0.83, "grad_norm": 2.009547472000122, "learning_rate": 1.4728610689945144e-06, "loss": 0.4447, "step": 32394 }, { "epoch": 0.83, "grad_norm": 1.3982986211776733, "learning_rate": 1.4724274498159763e-06, "loss": 0.5045, "step": 32395 }, { "epoch": 0.83, "grad_norm": 0.8731333613395691, "learning_rate": 1.4719938894041141e-06, "loss": 0.309, "step": 32396 }, { "epoch": 0.83, "grad_norm": 4.266629695892334, "learning_rate": 1.4715603877619123e-06, "loss": 0.7331, "step": 32397 }, { "epoch": 0.83, "grad_norm": 1.7173389196395874, "learning_rate": 1.4711269448923548e-06, "loss": 0.5976, "step": 32398 }, { "epoch": 0.83, "grad_norm": 1.2110261917114258, "learning_rate": 1.4706935607984352e-06, "loss": 0.3881, "step": 32399 }, { "epoch": 0.83, "grad_norm": 1.9138184785842896, "learning_rate": 1.4702602354831364e-06, "loss": 0.5059, "step": 32400 }, { "epoch": 0.83, "grad_norm": 1.533157229423523, "learning_rate": 1.4698269689494459e-06, "loss": 0.4898, "step": 32401 }, { "epoch": 0.83, "grad_norm": 1.3235700130462646, "learning_rate": 1.4693937612003462e-06, "loss": 0.4168, "step": 32402 }, { "epoch": 0.83, "grad_norm": 1.4920209646224976, "learning_rate": 1.4689606122388278e-06, "loss": 0.4068, "step": 32403 }, { "epoch": 0.83, "grad_norm": 1.6490353345870972, "learning_rate": 1.4685275220678718e-06, "loss": 0.6822, "step": 32404 }, { "epoch": 0.83, "grad_norm": 5.742067813873291, "learning_rate": 1.4680944906904626e-06, "loss": 0.6755, "step": 32405 }, { "epoch": 0.83, "grad_norm": 2.4855682849884033, "learning_rate": 1.4676615181095888e-06, "loss": 0.6755, "step": 32406 }, { "epoch": 0.83, "grad_norm": 0.8529902100563049, "learning_rate": 1.4672286043282313e-06, "loss": 0.3286, "step": 32407 }, { "epoch": 0.83, "grad_norm": 1.3225222826004028, "learning_rate": 1.4667957493493723e-06, "loss": 0.5381, "step": 32408 }, { "epoch": 0.83, "grad_norm": 2.014047861099243, "learning_rate": 1.4663629531759948e-06, "loss": 0.6649, "step": 32409 }, { "epoch": 0.83, "grad_norm": 2.891357660293579, "learning_rate": 1.4659302158110845e-06, "loss": 0.3951, "step": 32410 }, { "epoch": 0.83, "grad_norm": 1.3460897207260132, "learning_rate": 1.4654975372576209e-06, "loss": 0.4962, "step": 32411 }, { "epoch": 0.83, "grad_norm": 1.1797446012496948, "learning_rate": 1.465064917518584e-06, "loss": 0.5152, "step": 32412 }, { "epoch": 0.83, "grad_norm": 2.5239522457122803, "learning_rate": 1.46463235659696e-06, "loss": 0.5543, "step": 32413 }, { "epoch": 0.83, "grad_norm": 1.0947192907333374, "learning_rate": 1.4641998544957281e-06, "loss": 0.4929, "step": 32414 }, { "epoch": 0.83, "grad_norm": 1.2713782787322998, "learning_rate": 1.4637674112178657e-06, "loss": 0.4104, "step": 32415 }, { "epoch": 0.83, "grad_norm": 1.394434928894043, "learning_rate": 1.4633350267663571e-06, "loss": 0.6367, "step": 32416 }, { "epoch": 0.83, "grad_norm": 1.3149079084396362, "learning_rate": 1.4629027011441798e-06, "loss": 0.3942, "step": 32417 }, { "epoch": 0.83, "grad_norm": 9.304015159606934, "learning_rate": 1.462470434354314e-06, "loss": 0.7999, "step": 32418 }, { "epoch": 0.83, "grad_norm": 1.0943198204040527, "learning_rate": 1.4620382263997368e-06, "loss": 0.4979, "step": 32419 }, { "epoch": 0.83, "grad_norm": 0.8999500274658203, "learning_rate": 1.4616060772834295e-06, "loss": 0.4326, "step": 32420 }, { "epoch": 0.83, "grad_norm": 18.169191360473633, "learning_rate": 1.4611739870083685e-06, "loss": 0.5768, "step": 32421 }, { "epoch": 0.83, "grad_norm": 1.5003924369812012, "learning_rate": 1.4607419555775294e-06, "loss": 0.5652, "step": 32422 }, { "epoch": 0.83, "grad_norm": 1.2807414531707764, "learning_rate": 1.460309982993895e-06, "loss": 0.537, "step": 32423 }, { "epoch": 0.83, "grad_norm": 1.2510876655578613, "learning_rate": 1.4598780692604387e-06, "loss": 0.505, "step": 32424 }, { "epoch": 0.83, "grad_norm": 1.4810287952423096, "learning_rate": 1.4594462143801368e-06, "loss": 0.513, "step": 32425 }, { "epoch": 0.83, "grad_norm": 2.978605270385742, "learning_rate": 1.459014418355964e-06, "loss": 0.5735, "step": 32426 }, { "epoch": 0.83, "grad_norm": 1.5161978006362915, "learning_rate": 1.4585826811909e-06, "loss": 0.6532, "step": 32427 }, { "epoch": 0.83, "grad_norm": 3.9527955055236816, "learning_rate": 1.4581510028879186e-06, "loss": 0.565, "step": 32428 }, { "epoch": 0.83, "grad_norm": 2.006432056427002, "learning_rate": 1.4577193834499902e-06, "loss": 0.5401, "step": 32429 }, { "epoch": 0.83, "grad_norm": 1.4780429601669312, "learning_rate": 1.4572878228800968e-06, "loss": 0.656, "step": 32430 }, { "epoch": 0.83, "grad_norm": 1.1307101249694824, "learning_rate": 1.4568563211812081e-06, "loss": 0.3188, "step": 32431 }, { "epoch": 0.83, "grad_norm": 1.6355355978012085, "learning_rate": 1.4564248783562985e-06, "loss": 0.4922, "step": 32432 }, { "epoch": 0.83, "grad_norm": 1.6527007818222046, "learning_rate": 1.4559934944083387e-06, "loss": 0.6312, "step": 32433 }, { "epoch": 0.83, "grad_norm": 1.373827576637268, "learning_rate": 1.4555621693403054e-06, "loss": 0.4503, "step": 32434 }, { "epoch": 0.83, "grad_norm": 1.2413357496261597, "learning_rate": 1.4551309031551707e-06, "loss": 0.5846, "step": 32435 }, { "epoch": 0.83, "grad_norm": 1.2390995025634766, "learning_rate": 1.4546996958559023e-06, "loss": 0.6467, "step": 32436 }, { "epoch": 0.83, "grad_norm": 1.2670403718948364, "learning_rate": 1.4542685474454776e-06, "loss": 0.4988, "step": 32437 }, { "epoch": 0.83, "grad_norm": 1.9601898193359375, "learning_rate": 1.4538374579268643e-06, "loss": 0.6772, "step": 32438 }, { "epoch": 0.83, "grad_norm": 1.5130512714385986, "learning_rate": 1.4534064273030324e-06, "loss": 0.4432, "step": 32439 }, { "epoch": 0.83, "grad_norm": 1.3329689502716064, "learning_rate": 1.452975455576956e-06, "loss": 0.5278, "step": 32440 }, { "epoch": 0.83, "grad_norm": 1.9051570892333984, "learning_rate": 1.4525445427516037e-06, "loss": 0.7036, "step": 32441 }, { "epoch": 0.83, "grad_norm": 1.3903295993804932, "learning_rate": 1.4521136888299436e-06, "loss": 0.6888, "step": 32442 }, { "epoch": 0.83, "grad_norm": 1.150142788887024, "learning_rate": 1.451682893814944e-06, "loss": 0.4059, "step": 32443 }, { "epoch": 0.83, "grad_norm": 1.772994875907898, "learning_rate": 1.4512521577095773e-06, "loss": 0.5815, "step": 32444 }, { "epoch": 0.83, "grad_norm": 1.3425768613815308, "learning_rate": 1.4508214805168097e-06, "loss": 0.6695, "step": 32445 }, { "epoch": 0.83, "grad_norm": 1.8371304273605347, "learning_rate": 1.4503908622396068e-06, "loss": 0.4189, "step": 32446 }, { "epoch": 0.83, "grad_norm": 1.6076109409332275, "learning_rate": 1.4499603028809417e-06, "loss": 0.5414, "step": 32447 }, { "epoch": 0.83, "grad_norm": 1.7166738510131836, "learning_rate": 1.4495298024437777e-06, "loss": 0.586, "step": 32448 }, { "epoch": 0.83, "grad_norm": 1.1156591176986694, "learning_rate": 1.4490993609310832e-06, "loss": 0.4621, "step": 32449 }, { "epoch": 0.83, "grad_norm": 10.273212432861328, "learning_rate": 1.4486689783458208e-06, "loss": 0.6253, "step": 32450 }, { "epoch": 0.83, "grad_norm": 0.9993007779121399, "learning_rate": 1.448238654690961e-06, "loss": 0.4471, "step": 32451 }, { "epoch": 0.83, "grad_norm": 0.9310997128486633, "learning_rate": 1.4478083899694694e-06, "loss": 0.4576, "step": 32452 }, { "epoch": 0.83, "grad_norm": 1.9818618297576904, "learning_rate": 1.447378184184306e-06, "loss": 0.4822, "step": 32453 }, { "epoch": 0.83, "grad_norm": 1.7330563068389893, "learning_rate": 1.4469480373384415e-06, "loss": 0.6023, "step": 32454 }, { "epoch": 0.83, "grad_norm": 1.8911421298980713, "learning_rate": 1.4465179494348369e-06, "loss": 0.5793, "step": 32455 }, { "epoch": 0.83, "grad_norm": 1.7081997394561768, "learning_rate": 1.4460879204764578e-06, "loss": 0.6002, "step": 32456 }, { "epoch": 0.83, "grad_norm": 1.4946205615997314, "learning_rate": 1.4456579504662638e-06, "loss": 0.5494, "step": 32457 }, { "epoch": 0.83, "grad_norm": 1.5701438188552856, "learning_rate": 1.4452280394072226e-06, "loss": 0.6324, "step": 32458 }, { "epoch": 0.83, "grad_norm": 2.1065454483032227, "learning_rate": 1.4447981873022955e-06, "loss": 0.4576, "step": 32459 }, { "epoch": 0.83, "grad_norm": 2.243095874786377, "learning_rate": 1.444368394154443e-06, "loss": 0.6855, "step": 32460 }, { "epoch": 0.83, "grad_norm": 8.679437637329102, "learning_rate": 1.4439386599666293e-06, "loss": 0.6018, "step": 32461 }, { "epoch": 0.83, "grad_norm": 1.3102726936340332, "learning_rate": 1.443508984741816e-06, "loss": 0.4738, "step": 32462 }, { "epoch": 0.83, "grad_norm": 1.7006968259811401, "learning_rate": 1.4430793684829603e-06, "loss": 0.5429, "step": 32463 }, { "epoch": 0.83, "grad_norm": 1.2409473657608032, "learning_rate": 1.4426498111930275e-06, "loss": 0.6782, "step": 32464 }, { "epoch": 0.83, "grad_norm": 1.0258744955062866, "learning_rate": 1.4422203128749767e-06, "loss": 0.4557, "step": 32465 }, { "epoch": 0.83, "grad_norm": 1.874693751335144, "learning_rate": 1.4417908735317666e-06, "loss": 0.5185, "step": 32466 }, { "epoch": 0.83, "grad_norm": 1.3676987886428833, "learning_rate": 1.4413614931663544e-06, "loss": 0.656, "step": 32467 }, { "epoch": 0.83, "grad_norm": 1.4138513803482056, "learning_rate": 1.4409321717817049e-06, "loss": 0.4945, "step": 32468 }, { "epoch": 0.83, "grad_norm": 1.7985719442367554, "learning_rate": 1.440502909380772e-06, "loss": 0.6002, "step": 32469 }, { "epoch": 0.83, "grad_norm": 1.614709734916687, "learning_rate": 1.4400737059665148e-06, "loss": 0.579, "step": 32470 }, { "epoch": 0.83, "grad_norm": 1.1572575569152832, "learning_rate": 1.439644561541893e-06, "loss": 0.5347, "step": 32471 }, { "epoch": 0.83, "grad_norm": 1.6511787176132202, "learning_rate": 1.439215476109863e-06, "loss": 0.5247, "step": 32472 }, { "epoch": 0.83, "grad_norm": 1.914868712425232, "learning_rate": 1.4387864496733816e-06, "loss": 0.617, "step": 32473 }, { "epoch": 0.83, "grad_norm": 3.0375561714172363, "learning_rate": 1.438357482235403e-06, "loss": 0.7037, "step": 32474 }, { "epoch": 0.83, "grad_norm": 1.3832417726516724, "learning_rate": 1.437928573798888e-06, "loss": 0.4865, "step": 32475 }, { "epoch": 0.83, "grad_norm": 1.313859462738037, "learning_rate": 1.437499724366791e-06, "loss": 0.5987, "step": 32476 }, { "epoch": 0.83, "grad_norm": 3.4522554874420166, "learning_rate": 1.437070933942063e-06, "loss": 0.5717, "step": 32477 }, { "epoch": 0.83, "grad_norm": 1.3826605081558228, "learning_rate": 1.436642202527665e-06, "loss": 0.5801, "step": 32478 }, { "epoch": 0.83, "grad_norm": 1.067172646522522, "learning_rate": 1.4362135301265478e-06, "loss": 0.404, "step": 32479 }, { "epoch": 0.83, "grad_norm": 0.9754194617271423, "learning_rate": 1.435784916741666e-06, "loss": 0.4847, "step": 32480 }, { "epoch": 0.83, "grad_norm": 1.3029704093933105, "learning_rate": 1.4353563623759758e-06, "loss": 0.5846, "step": 32481 }, { "epoch": 0.83, "grad_norm": 2.137202739715576, "learning_rate": 1.4349278670324284e-06, "loss": 0.4846, "step": 32482 }, { "epoch": 0.83, "grad_norm": 1.7112772464752197, "learning_rate": 1.434499430713977e-06, "loss": 0.5224, "step": 32483 }, { "epoch": 0.83, "grad_norm": 1.8523122072219849, "learning_rate": 1.4340710534235725e-06, "loss": 0.6724, "step": 32484 }, { "epoch": 0.83, "grad_norm": 0.9914839267730713, "learning_rate": 1.4336427351641703e-06, "loss": 0.4594, "step": 32485 }, { "epoch": 0.83, "grad_norm": 12.630502700805664, "learning_rate": 1.43321447593872e-06, "loss": 0.5158, "step": 32486 }, { "epoch": 0.83, "grad_norm": 1.5460093021392822, "learning_rate": 1.4327862757501721e-06, "loss": 0.5745, "step": 32487 }, { "epoch": 0.83, "grad_norm": 1.6134562492370605, "learning_rate": 1.4323581346014802e-06, "loss": 0.4671, "step": 32488 }, { "epoch": 0.83, "grad_norm": 1.3450812101364136, "learning_rate": 1.4319300524955936e-06, "loss": 0.4433, "step": 32489 }, { "epoch": 0.83, "grad_norm": 4.438807964324951, "learning_rate": 1.431502029435463e-06, "loss": 0.5045, "step": 32490 }, { "epoch": 0.83, "grad_norm": 2.240356683731079, "learning_rate": 1.431074065424034e-06, "loss": 0.4895, "step": 32491 }, { "epoch": 0.83, "grad_norm": 1.345810055732727, "learning_rate": 1.4306461604642607e-06, "loss": 0.3823, "step": 32492 }, { "epoch": 0.83, "grad_norm": 4.8273725509643555, "learning_rate": 1.430218314559091e-06, "loss": 0.5754, "step": 32493 }, { "epoch": 0.83, "grad_norm": 1.7730371952056885, "learning_rate": 1.429790527711471e-06, "loss": 0.4816, "step": 32494 }, { "epoch": 0.83, "grad_norm": 1.1127862930297852, "learning_rate": 1.429362799924352e-06, "loss": 0.4915, "step": 32495 }, { "epoch": 0.83, "grad_norm": 1.6471484899520874, "learning_rate": 1.4289351312006793e-06, "loss": 0.503, "step": 32496 }, { "epoch": 0.83, "grad_norm": 1.2631860971450806, "learning_rate": 1.428507521543402e-06, "loss": 0.5709, "step": 32497 }, { "epoch": 0.83, "grad_norm": 1.2714943885803223, "learning_rate": 1.4280799709554626e-06, "loss": 0.5217, "step": 32498 }, { "epoch": 0.83, "grad_norm": 1.263499140739441, "learning_rate": 1.4276524794398127e-06, "loss": 0.5986, "step": 32499 }, { "epoch": 0.83, "grad_norm": 4.0442214012146, "learning_rate": 1.4272250469993965e-06, "loss": 0.5437, "step": 32500 }, { "epoch": 0.83, "grad_norm": 2.000988006591797, "learning_rate": 1.426797673637158e-06, "loss": 0.4694, "step": 32501 }, { "epoch": 0.83, "grad_norm": 1.4703751802444458, "learning_rate": 1.4263703593560452e-06, "loss": 0.4725, "step": 32502 }, { "epoch": 0.83, "grad_norm": 2.247516632080078, "learning_rate": 1.425943104159001e-06, "loss": 0.4299, "step": 32503 }, { "epoch": 0.83, "grad_norm": 1.5245578289031982, "learning_rate": 1.425515908048969e-06, "loss": 0.2924, "step": 32504 }, { "epoch": 0.83, "grad_norm": 5.790350437164307, "learning_rate": 1.4250887710288952e-06, "loss": 0.5968, "step": 32505 }, { "epoch": 0.83, "grad_norm": 5.2722487449646, "learning_rate": 1.424661693101723e-06, "loss": 0.8357, "step": 32506 }, { "epoch": 0.83, "grad_norm": 1.3546704053878784, "learning_rate": 1.424234674270395e-06, "loss": 0.5725, "step": 32507 }, { "epoch": 0.83, "grad_norm": 1.4470494985580444, "learning_rate": 1.4238077145378516e-06, "loss": 0.309, "step": 32508 }, { "epoch": 0.83, "grad_norm": 1.169955849647522, "learning_rate": 1.4233808139070393e-06, "loss": 0.4932, "step": 32509 }, { "epoch": 0.83, "grad_norm": 1.5653935670852661, "learning_rate": 1.4229539723808983e-06, "loss": 0.3927, "step": 32510 }, { "epoch": 0.83, "grad_norm": 1.42923104763031, "learning_rate": 1.422527189962367e-06, "loss": 0.4848, "step": 32511 }, { "epoch": 0.83, "grad_norm": 1.070825457572937, "learning_rate": 1.4221004666543926e-06, "loss": 0.4262, "step": 32512 }, { "epoch": 0.83, "grad_norm": 1.645398736000061, "learning_rate": 1.421673802459913e-06, "loss": 0.6357, "step": 32513 }, { "epoch": 0.83, "grad_norm": 1.3864179849624634, "learning_rate": 1.4212471973818664e-06, "loss": 0.512, "step": 32514 }, { "epoch": 0.83, "grad_norm": 1.629919409751892, "learning_rate": 1.4208206514231936e-06, "loss": 0.5032, "step": 32515 }, { "epoch": 0.83, "grad_norm": 7.808908939361572, "learning_rate": 1.420394164586837e-06, "loss": 0.6427, "step": 32516 }, { "epoch": 0.83, "grad_norm": 4.5640082359313965, "learning_rate": 1.4199677368757326e-06, "loss": 0.5291, "step": 32517 }, { "epoch": 0.83, "grad_norm": 2.6624467372894287, "learning_rate": 1.4195413682928183e-06, "loss": 0.688, "step": 32518 }, { "epoch": 0.83, "grad_norm": 1.0394790172576904, "learning_rate": 1.4191150588410373e-06, "loss": 0.5779, "step": 32519 }, { "epoch": 0.83, "grad_norm": 0.9108268618583679, "learning_rate": 1.418688808523323e-06, "loss": 0.4226, "step": 32520 }, { "epoch": 0.83, "grad_norm": 12.730351448059082, "learning_rate": 1.4182626173426128e-06, "loss": 0.6315, "step": 32521 }, { "epoch": 0.83, "grad_norm": 1.3205317258834839, "learning_rate": 1.4178364853018467e-06, "loss": 0.4861, "step": 32522 }, { "epoch": 0.83, "grad_norm": 1.093800663948059, "learning_rate": 1.41741041240396e-06, "loss": 0.4655, "step": 32523 }, { "epoch": 0.83, "grad_norm": 2.0900166034698486, "learning_rate": 1.4169843986518883e-06, "loss": 0.5095, "step": 32524 }, { "epoch": 0.83, "grad_norm": 4.896385192871094, "learning_rate": 1.4165584440485658e-06, "loss": 0.6986, "step": 32525 }, { "epoch": 0.83, "grad_norm": 1.8733466863632202, "learning_rate": 1.4161325485969325e-06, "loss": 0.6685, "step": 32526 }, { "epoch": 0.83, "grad_norm": 1.4422675371170044, "learning_rate": 1.4157067122999202e-06, "loss": 0.5976, "step": 32527 }, { "epoch": 0.83, "grad_norm": 1.4755650758743286, "learning_rate": 1.4152809351604613e-06, "loss": 0.5504, "step": 32528 }, { "epoch": 0.83, "grad_norm": 1.3429934978485107, "learning_rate": 1.4148552171814955e-06, "loss": 0.584, "step": 32529 }, { "epoch": 0.83, "grad_norm": 1.6039645671844482, "learning_rate": 1.414429558365954e-06, "loss": 0.5562, "step": 32530 }, { "epoch": 0.83, "grad_norm": 1.5671333074569702, "learning_rate": 1.4140039587167698e-06, "loss": 0.7381, "step": 32531 }, { "epoch": 0.83, "grad_norm": 1.2482541799545288, "learning_rate": 1.413578418236874e-06, "loss": 0.5678, "step": 32532 }, { "epoch": 0.83, "grad_norm": 7.617481231689453, "learning_rate": 1.413152936929204e-06, "loss": 0.7316, "step": 32533 }, { "epoch": 0.83, "grad_norm": 1.593635082244873, "learning_rate": 1.412727514796688e-06, "loss": 0.6429, "step": 32534 }, { "epoch": 0.83, "grad_norm": 3.14227032661438, "learning_rate": 1.4123021518422575e-06, "loss": 0.4586, "step": 32535 }, { "epoch": 0.83, "grad_norm": 1.0002866983413696, "learning_rate": 1.4118768480688471e-06, "loss": 0.4966, "step": 32536 }, { "epoch": 0.83, "grad_norm": 1.3967138528823853, "learning_rate": 1.4114516034793868e-06, "loss": 0.3868, "step": 32537 }, { "epoch": 0.83, "grad_norm": 9.433046340942383, "learning_rate": 1.4110264180768052e-06, "loss": 0.5679, "step": 32538 }, { "epoch": 0.83, "grad_norm": 1.4807870388031006, "learning_rate": 1.410601291864031e-06, "loss": 0.6209, "step": 32539 }, { "epoch": 0.83, "grad_norm": 1.5446568727493286, "learning_rate": 1.4101762248439987e-06, "loss": 0.4771, "step": 32540 }, { "epoch": 0.83, "grad_norm": 1.824421763420105, "learning_rate": 1.409751217019636e-06, "loss": 0.7146, "step": 32541 }, { "epoch": 0.83, "grad_norm": 1.5233603715896606, "learning_rate": 1.409326268393868e-06, "loss": 0.4229, "step": 32542 }, { "epoch": 0.83, "grad_norm": 20.1001033782959, "learning_rate": 1.4089013789696281e-06, "loss": 0.4698, "step": 32543 }, { "epoch": 0.83, "grad_norm": 1.4689756631851196, "learning_rate": 1.4084765487498431e-06, "loss": 0.5191, "step": 32544 }, { "epoch": 0.83, "grad_norm": 7.055705547332764, "learning_rate": 1.4080517777374381e-06, "loss": 0.6078, "step": 32545 }, { "epoch": 0.83, "grad_norm": 1.6658676862716675, "learning_rate": 1.407627065935344e-06, "loss": 0.5504, "step": 32546 }, { "epoch": 0.83, "grad_norm": 1.6421270370483398, "learning_rate": 1.4072024133464856e-06, "loss": 0.6095, "step": 32547 }, { "epoch": 0.83, "grad_norm": 1.51485276222229, "learning_rate": 1.4067778199737903e-06, "loss": 0.4392, "step": 32548 }, { "epoch": 0.83, "grad_norm": 1.316104531288147, "learning_rate": 1.4063532858201801e-06, "loss": 0.4948, "step": 32549 }, { "epoch": 0.83, "grad_norm": 1.382564902305603, "learning_rate": 1.4059288108885872e-06, "loss": 0.4903, "step": 32550 }, { "epoch": 0.83, "grad_norm": 1.4258438348770142, "learning_rate": 1.4055043951819337e-06, "loss": 0.5244, "step": 32551 }, { "epoch": 0.83, "grad_norm": 1.2366364002227783, "learning_rate": 1.4050800387031416e-06, "loss": 0.5019, "step": 32552 }, { "epoch": 0.83, "grad_norm": 1.842373251914978, "learning_rate": 1.404655741455141e-06, "loss": 0.6216, "step": 32553 }, { "epoch": 0.83, "grad_norm": 4.751137733459473, "learning_rate": 1.4042315034408515e-06, "loss": 0.6706, "step": 32554 }, { "epoch": 0.83, "grad_norm": 1.610026240348816, "learning_rate": 1.403807324663199e-06, "loss": 0.6253, "step": 32555 }, { "epoch": 0.83, "grad_norm": 1.7954585552215576, "learning_rate": 1.403383205125104e-06, "loss": 0.633, "step": 32556 }, { "epoch": 0.83, "grad_norm": 1.7040491104125977, "learning_rate": 1.4029591448294921e-06, "loss": 0.4965, "step": 32557 }, { "epoch": 0.83, "grad_norm": 1.5566718578338623, "learning_rate": 1.4025351437792856e-06, "loss": 0.5206, "step": 32558 }, { "epoch": 0.83, "grad_norm": 1.4476851224899292, "learning_rate": 1.402111201977403e-06, "loss": 0.5425, "step": 32559 }, { "epoch": 0.83, "grad_norm": 1.1003460884094238, "learning_rate": 1.40168731942677e-06, "loss": 0.4705, "step": 32560 }, { "epoch": 0.83, "grad_norm": 1.071311354637146, "learning_rate": 1.4012634961303062e-06, "loss": 0.5114, "step": 32561 }, { "epoch": 0.83, "grad_norm": 1.2068278789520264, "learning_rate": 1.4008397320909306e-06, "loss": 0.4208, "step": 32562 }, { "epoch": 0.83, "grad_norm": 5.7089972496032715, "learning_rate": 1.4004160273115664e-06, "loss": 0.6025, "step": 32563 }, { "epoch": 0.83, "grad_norm": 1.3219969272613525, "learning_rate": 1.3999923817951322e-06, "loss": 0.4406, "step": 32564 }, { "epoch": 0.83, "grad_norm": 1.3716185092926025, "learning_rate": 1.399568795544548e-06, "loss": 0.4909, "step": 32565 }, { "epoch": 0.83, "grad_norm": 1.6981909275054932, "learning_rate": 1.3991452685627293e-06, "loss": 0.5426, "step": 32566 }, { "epoch": 0.83, "grad_norm": 1.171800136566162, "learning_rate": 1.3987218008526004e-06, "loss": 0.4669, "step": 32567 }, { "epoch": 0.83, "grad_norm": 3.399949073791504, "learning_rate": 1.3982983924170779e-06, "loss": 0.69, "step": 32568 }, { "epoch": 0.83, "grad_norm": 3.5069491863250732, "learning_rate": 1.397875043259076e-06, "loss": 0.5661, "step": 32569 }, { "epoch": 0.83, "grad_norm": 2.0950450897216797, "learning_rate": 1.397451753381517e-06, "loss": 0.4429, "step": 32570 }, { "epoch": 0.83, "grad_norm": 2.3119046688079834, "learning_rate": 1.3970285227873148e-06, "loss": 0.6237, "step": 32571 }, { "epoch": 0.83, "grad_norm": 3.20522403717041, "learning_rate": 1.3966053514793886e-06, "loss": 0.5674, "step": 32572 }, { "epoch": 0.83, "grad_norm": 3.589407205581665, "learning_rate": 1.3961822394606505e-06, "loss": 0.5469, "step": 32573 }, { "epoch": 0.83, "grad_norm": 3.539675235748291, "learning_rate": 1.3957591867340214e-06, "loss": 0.5581, "step": 32574 }, { "epoch": 0.83, "grad_norm": 1.1653494834899902, "learning_rate": 1.3953361933024135e-06, "loss": 0.4551, "step": 32575 }, { "epoch": 0.83, "grad_norm": 1.2596300840377808, "learning_rate": 1.394913259168741e-06, "loss": 0.497, "step": 32576 }, { "epoch": 0.83, "grad_norm": 1.3122546672821045, "learning_rate": 1.3944903843359225e-06, "loss": 0.4389, "step": 32577 }, { "epoch": 0.83, "grad_norm": 3.217095136642456, "learning_rate": 1.3940675688068683e-06, "loss": 0.4946, "step": 32578 }, { "epoch": 0.84, "grad_norm": 1.2003306150436401, "learning_rate": 1.3936448125844949e-06, "loss": 0.4248, "step": 32579 }, { "epoch": 0.84, "grad_norm": 1.442620038986206, "learning_rate": 1.3932221156717119e-06, "loss": 0.4634, "step": 32580 }, { "epoch": 0.84, "grad_norm": 1.4689117670059204, "learning_rate": 1.3927994780714371e-06, "loss": 0.4407, "step": 32581 }, { "epoch": 0.84, "grad_norm": 1.0925877094268799, "learning_rate": 1.3923768997865816e-06, "loss": 0.4605, "step": 32582 }, { "epoch": 0.84, "grad_norm": 1.377723217010498, "learning_rate": 1.3919543808200531e-06, "loss": 0.4986, "step": 32583 }, { "epoch": 0.84, "grad_norm": 0.8483579754829407, "learning_rate": 1.3915319211747692e-06, "loss": 0.5397, "step": 32584 }, { "epoch": 0.84, "grad_norm": 1.6394832134246826, "learning_rate": 1.3911095208536395e-06, "loss": 0.492, "step": 32585 }, { "epoch": 0.84, "grad_norm": 11.745841026306152, "learning_rate": 1.3906871798595723e-06, "loss": 0.398, "step": 32586 }, { "epoch": 0.84, "grad_norm": 0.8397305607795715, "learning_rate": 1.3902648981954848e-06, "loss": 0.3177, "step": 32587 }, { "epoch": 0.84, "grad_norm": 1.9486764669418335, "learning_rate": 1.389842675864279e-06, "loss": 0.5594, "step": 32588 }, { "epoch": 0.84, "grad_norm": 1.735713005065918, "learning_rate": 1.3894205128688666e-06, "loss": 0.4894, "step": 32589 }, { "epoch": 0.84, "grad_norm": 1.4037295579910278, "learning_rate": 1.38899840921216e-06, "loss": 0.4257, "step": 32590 }, { "epoch": 0.84, "grad_norm": 1.358087182044983, "learning_rate": 1.3885763648970673e-06, "loss": 0.3762, "step": 32591 }, { "epoch": 0.84, "grad_norm": 2.175539016723633, "learning_rate": 1.3881543799264951e-06, "loss": 0.4683, "step": 32592 }, { "epoch": 0.84, "grad_norm": 1.5328917503356934, "learning_rate": 1.387732454303351e-06, "loss": 0.3759, "step": 32593 }, { "epoch": 0.84, "grad_norm": 1.3426547050476074, "learning_rate": 1.387310588030546e-06, "loss": 0.5083, "step": 32594 }, { "epoch": 0.84, "grad_norm": 1.7412669658660889, "learning_rate": 1.3868887811109855e-06, "loss": 0.4984, "step": 32595 }, { "epoch": 0.84, "grad_norm": 1.5457457304000854, "learning_rate": 1.386467033547574e-06, "loss": 0.5544, "step": 32596 }, { "epoch": 0.84, "grad_norm": 1.483623743057251, "learning_rate": 1.3860453453432222e-06, "loss": 0.6405, "step": 32597 }, { "epoch": 0.84, "grad_norm": 1.5298508405685425, "learning_rate": 1.3856237165008347e-06, "loss": 0.5283, "step": 32598 }, { "epoch": 0.84, "grad_norm": 1.3099807500839233, "learning_rate": 1.3852021470233135e-06, "loss": 0.5481, "step": 32599 }, { "epoch": 0.84, "grad_norm": 2.2177734375, "learning_rate": 1.3847806369135686e-06, "loss": 0.5953, "step": 32600 }, { "epoch": 0.84, "grad_norm": 2.2154524326324463, "learning_rate": 1.3843591861745042e-06, "loss": 0.5576, "step": 32601 }, { "epoch": 0.84, "grad_norm": 1.3028512001037598, "learning_rate": 1.3839377948090226e-06, "loss": 0.5412, "step": 32602 }, { "epoch": 0.84, "grad_norm": 1.4952821731567383, "learning_rate": 1.383516462820026e-06, "loss": 0.4691, "step": 32603 }, { "epoch": 0.84, "grad_norm": 1.0730366706848145, "learning_rate": 1.383095190210424e-06, "loss": 0.4623, "step": 32604 }, { "epoch": 0.84, "grad_norm": 1.3697152137756348, "learning_rate": 1.3826739769831155e-06, "loss": 0.551, "step": 32605 }, { "epoch": 0.84, "grad_norm": 2.733496904373169, "learning_rate": 1.3822528231410015e-06, "loss": 0.6386, "step": 32606 }, { "epoch": 0.84, "grad_norm": 1.2354846000671387, "learning_rate": 1.3818317286869898e-06, "loss": 0.4934, "step": 32607 }, { "epoch": 0.84, "grad_norm": 1.9870542287826538, "learning_rate": 1.381410693623979e-06, "loss": 0.4551, "step": 32608 }, { "epoch": 0.84, "grad_norm": 1.7789404392242432, "learning_rate": 1.3809897179548703e-06, "loss": 0.4073, "step": 32609 }, { "epoch": 0.84, "grad_norm": 1.29181969165802, "learning_rate": 1.3805688016825647e-06, "loss": 0.3108, "step": 32610 }, { "epoch": 0.84, "grad_norm": 9.246506690979004, "learning_rate": 1.3801479448099642e-06, "loss": 0.6834, "step": 32611 }, { "epoch": 0.84, "grad_norm": 1.2084308862686157, "learning_rate": 1.3797271473399698e-06, "loss": 0.4483, "step": 32612 }, { "epoch": 0.84, "grad_norm": 1.5567724704742432, "learning_rate": 1.379306409275477e-06, "loss": 0.4847, "step": 32613 }, { "epoch": 0.84, "grad_norm": 2.0249431133270264, "learning_rate": 1.3788857306193915e-06, "loss": 0.6019, "step": 32614 }, { "epoch": 0.84, "grad_norm": 5.512063503265381, "learning_rate": 1.3784651113746083e-06, "loss": 0.7095, "step": 32615 }, { "epoch": 0.84, "grad_norm": 1.1379669904708862, "learning_rate": 1.3780445515440255e-06, "loss": 0.5801, "step": 32616 }, { "epoch": 0.84, "grad_norm": 1.869062066078186, "learning_rate": 1.377624051130545e-06, "loss": 0.4918, "step": 32617 }, { "epoch": 0.84, "grad_norm": 1.2518805265426636, "learning_rate": 1.3772036101370623e-06, "loss": 0.4775, "step": 32618 }, { "epoch": 0.84, "grad_norm": 1.5352742671966553, "learning_rate": 1.3767832285664751e-06, "loss": 0.5708, "step": 32619 }, { "epoch": 0.84, "grad_norm": 1.9356672763824463, "learning_rate": 1.3763629064216778e-06, "loss": 0.5904, "step": 32620 }, { "epoch": 0.84, "grad_norm": 3.4077959060668945, "learning_rate": 1.3759426437055713e-06, "loss": 0.4891, "step": 32621 }, { "epoch": 0.84, "grad_norm": 1.019921064376831, "learning_rate": 1.375522440421051e-06, "loss": 0.3411, "step": 32622 }, { "epoch": 0.84, "grad_norm": 2.0789239406585693, "learning_rate": 1.375102296571008e-06, "loss": 0.5535, "step": 32623 }, { "epoch": 0.84, "grad_norm": 1.7136720418930054, "learning_rate": 1.3746822121583448e-06, "loss": 0.5137, "step": 32624 }, { "epoch": 0.84, "grad_norm": 1.9528861045837402, "learning_rate": 1.3742621871859518e-06, "loss": 0.5493, "step": 32625 }, { "epoch": 0.84, "grad_norm": 1.450792908668518, "learning_rate": 1.3738422216567248e-06, "loss": 0.539, "step": 32626 }, { "epoch": 0.84, "grad_norm": 1.2285346984863281, "learning_rate": 1.373422315573556e-06, "loss": 0.4689, "step": 32627 }, { "epoch": 0.84, "grad_norm": 1.5621395111083984, "learning_rate": 1.373002468939343e-06, "loss": 0.6268, "step": 32628 }, { "epoch": 0.84, "grad_norm": 1.4649826288223267, "learning_rate": 1.3725826817569766e-06, "loss": 0.5913, "step": 32629 }, { "epoch": 0.84, "grad_norm": 3.1908607482910156, "learning_rate": 1.372162954029348e-06, "loss": 0.4882, "step": 32630 }, { "epoch": 0.84, "grad_norm": 0.7105251550674438, "learning_rate": 1.3717432857593537e-06, "loss": 0.4231, "step": 32631 }, { "epoch": 0.84, "grad_norm": 1.3983371257781982, "learning_rate": 1.371323676949885e-06, "loss": 0.5196, "step": 32632 }, { "epoch": 0.84, "grad_norm": 4.107486724853516, "learning_rate": 1.3709041276038315e-06, "loss": 0.583, "step": 32633 }, { "epoch": 0.84, "grad_norm": 1.2594923973083496, "learning_rate": 1.3704846377240833e-06, "loss": 0.4945, "step": 32634 }, { "epoch": 0.84, "grad_norm": 3.4956490993499756, "learning_rate": 1.3700652073135357e-06, "loss": 0.655, "step": 32635 }, { "epoch": 0.84, "grad_norm": 1.6678612232208252, "learning_rate": 1.3696458363750764e-06, "loss": 0.4896, "step": 32636 }, { "epoch": 0.84, "grad_norm": 2.8504576683044434, "learning_rate": 1.3692265249115943e-06, "loss": 0.5032, "step": 32637 }, { "epoch": 0.84, "grad_norm": 1.6709887981414795, "learning_rate": 1.3688072729259826e-06, "loss": 0.5898, "step": 32638 }, { "epoch": 0.84, "grad_norm": 1.6397793292999268, "learning_rate": 1.3683880804211292e-06, "loss": 0.5879, "step": 32639 }, { "epoch": 0.84, "grad_norm": 1.7335180044174194, "learning_rate": 1.3679689473999192e-06, "loss": 0.5858, "step": 32640 }, { "epoch": 0.84, "grad_norm": 1.7571085691452026, "learning_rate": 1.367549873865246e-06, "loss": 0.6072, "step": 32641 }, { "epoch": 0.84, "grad_norm": 1.556665301322937, "learning_rate": 1.3671308598199961e-06, "loss": 0.5089, "step": 32642 }, { "epoch": 0.84, "grad_norm": 1.4694688320159912, "learning_rate": 1.3667119052670564e-06, "loss": 0.3825, "step": 32643 }, { "epoch": 0.84, "grad_norm": 2.4210307598114014, "learning_rate": 1.366293010209313e-06, "loss": 0.6155, "step": 32644 }, { "epoch": 0.84, "grad_norm": 1.3000304698944092, "learning_rate": 1.365874174649655e-06, "loss": 0.4069, "step": 32645 }, { "epoch": 0.84, "grad_norm": 1.6956851482391357, "learning_rate": 1.365455398590968e-06, "loss": 0.6451, "step": 32646 }, { "epoch": 0.84, "grad_norm": 3.605343818664551, "learning_rate": 1.365036682036136e-06, "loss": 0.4794, "step": 32647 }, { "epoch": 0.84, "grad_norm": 2.596625566482544, "learning_rate": 1.364618024988048e-06, "loss": 0.5187, "step": 32648 }, { "epoch": 0.84, "grad_norm": 1.3837947845458984, "learning_rate": 1.3641994274495862e-06, "loss": 0.6782, "step": 32649 }, { "epoch": 0.84, "grad_norm": 1.4472635984420776, "learning_rate": 1.3637808894236382e-06, "loss": 0.4688, "step": 32650 }, { "epoch": 0.84, "grad_norm": 2.7545883655548096, "learning_rate": 1.3633624109130828e-06, "loss": 0.636, "step": 32651 }, { "epoch": 0.84, "grad_norm": 1.7491340637207031, "learning_rate": 1.362943991920811e-06, "loss": 0.7666, "step": 32652 }, { "epoch": 0.84, "grad_norm": 1.2061736583709717, "learning_rate": 1.3625256324497028e-06, "loss": 0.4185, "step": 32653 }, { "epoch": 0.84, "grad_norm": 1.8235470056533813, "learning_rate": 1.362107332502638e-06, "loss": 0.5128, "step": 32654 }, { "epoch": 0.84, "grad_norm": 1.9896258115768433, "learning_rate": 1.3616890920825055e-06, "loss": 0.5173, "step": 32655 }, { "epoch": 0.84, "grad_norm": 1.3852965831756592, "learning_rate": 1.361270911192185e-06, "loss": 0.3459, "step": 32656 }, { "epoch": 0.84, "grad_norm": 2.4717700481414795, "learning_rate": 1.3608527898345559e-06, "loss": 0.4943, "step": 32657 }, { "epoch": 0.84, "grad_norm": 4.085388660430908, "learning_rate": 1.360434728012503e-06, "loss": 0.5787, "step": 32658 }, { "epoch": 0.84, "grad_norm": 0.7571911215782166, "learning_rate": 1.3600167257289065e-06, "loss": 0.4469, "step": 32659 }, { "epoch": 0.84, "grad_norm": 1.753097414970398, "learning_rate": 1.3595987829866463e-06, "loss": 0.5765, "step": 32660 }, { "epoch": 0.84, "grad_norm": 1.8822627067565918, "learning_rate": 1.3591808997886013e-06, "loss": 0.5439, "step": 32661 }, { "epoch": 0.84, "grad_norm": 1.0339256525039673, "learning_rate": 1.3587630761376546e-06, "loss": 0.3837, "step": 32662 }, { "epoch": 0.84, "grad_norm": 1.8530595302581787, "learning_rate": 1.3583453120366842e-06, "loss": 0.6047, "step": 32663 }, { "epoch": 0.84, "grad_norm": 1.5924415588378906, "learning_rate": 1.357927607488566e-06, "loss": 0.5699, "step": 32664 }, { "epoch": 0.84, "grad_norm": 1.7844361066818237, "learning_rate": 1.3575099624961841e-06, "loss": 0.4651, "step": 32665 }, { "epoch": 0.84, "grad_norm": 1.2542222738265991, "learning_rate": 1.3570923770624133e-06, "loss": 0.4153, "step": 32666 }, { "epoch": 0.84, "grad_norm": 1.5095109939575195, "learning_rate": 1.3566748511901318e-06, "loss": 0.7311, "step": 32667 }, { "epoch": 0.84, "grad_norm": 4.968899250030518, "learning_rate": 1.3562573848822147e-06, "loss": 0.6895, "step": 32668 }, { "epoch": 0.84, "grad_norm": 5.538806438446045, "learning_rate": 1.3558399781415443e-06, "loss": 0.6095, "step": 32669 }, { "epoch": 0.84, "grad_norm": 5.393385887145996, "learning_rate": 1.3554226309709927e-06, "loss": 0.5689, "step": 32670 }, { "epoch": 0.84, "grad_norm": 1.036797046661377, "learning_rate": 1.3550053433734356e-06, "loss": 0.4649, "step": 32671 }, { "epoch": 0.84, "grad_norm": 1.878253698348999, "learning_rate": 1.3545881153517526e-06, "loss": 0.7149, "step": 32672 }, { "epoch": 0.84, "grad_norm": 1.2976750135421753, "learning_rate": 1.3541709469088172e-06, "loss": 0.6115, "step": 32673 }, { "epoch": 0.84, "grad_norm": 7.977603912353516, "learning_rate": 1.3537538380475024e-06, "loss": 0.7796, "step": 32674 }, { "epoch": 0.84, "grad_norm": 4.197796821594238, "learning_rate": 1.3533367887706827e-06, "loss": 0.347, "step": 32675 }, { "epoch": 0.84, "grad_norm": 4.76398229598999, "learning_rate": 1.3529197990812359e-06, "loss": 0.4983, "step": 32676 }, { "epoch": 0.84, "grad_norm": 1.0153385400772095, "learning_rate": 1.352502868982033e-06, "loss": 0.3851, "step": 32677 }, { "epoch": 0.84, "grad_norm": 0.9647008180618286, "learning_rate": 1.352085998475946e-06, "loss": 0.4678, "step": 32678 }, { "epoch": 0.84, "grad_norm": 2.137673854827881, "learning_rate": 1.3516691875658505e-06, "loss": 0.4524, "step": 32679 }, { "epoch": 0.84, "grad_norm": 1.7739124298095703, "learning_rate": 1.3512524362546176e-06, "loss": 0.4607, "step": 32680 }, { "epoch": 0.84, "grad_norm": 0.8813568949699402, "learning_rate": 1.350835744545117e-06, "loss": 0.3628, "step": 32681 }, { "epoch": 0.84, "grad_norm": 1.2792080640792847, "learning_rate": 1.3504191124402243e-06, "loss": 0.4489, "step": 32682 }, { "epoch": 0.84, "grad_norm": 2.7122228145599365, "learning_rate": 1.3500025399428097e-06, "loss": 0.5446, "step": 32683 }, { "epoch": 0.84, "grad_norm": 1.7700378894805908, "learning_rate": 1.3495860270557426e-06, "loss": 0.3842, "step": 32684 }, { "epoch": 0.84, "grad_norm": 1.7388368844985962, "learning_rate": 1.349169573781892e-06, "loss": 0.5502, "step": 32685 }, { "epoch": 0.84, "grad_norm": 1.1851602792739868, "learning_rate": 1.3487531801241315e-06, "loss": 0.4085, "step": 32686 }, { "epoch": 0.84, "grad_norm": 1.9536436796188354, "learning_rate": 1.3483368460853296e-06, "loss": 0.5536, "step": 32687 }, { "epoch": 0.84, "grad_norm": 1.5316139459609985, "learning_rate": 1.3479205716683518e-06, "loss": 0.4231, "step": 32688 }, { "epoch": 0.84, "grad_norm": 4.992918491363525, "learning_rate": 1.3475043568760726e-06, "loss": 0.5007, "step": 32689 }, { "epoch": 0.84, "grad_norm": 9.495980262756348, "learning_rate": 1.3470882017113563e-06, "loss": 0.476, "step": 32690 }, { "epoch": 0.84, "grad_norm": 9.550479888916016, "learning_rate": 1.3466721061770727e-06, "loss": 0.6118, "step": 32691 }, { "epoch": 0.84, "grad_norm": 5.130368709564209, "learning_rate": 1.3462560702760851e-06, "loss": 0.582, "step": 32692 }, { "epoch": 0.84, "grad_norm": 1.166563630104065, "learning_rate": 1.345840094011267e-06, "loss": 0.5055, "step": 32693 }, { "epoch": 0.84, "grad_norm": 1.201143741607666, "learning_rate": 1.3454241773854815e-06, "loss": 0.3082, "step": 32694 }, { "epoch": 0.84, "grad_norm": 1.3931865692138672, "learning_rate": 1.3450083204015929e-06, "loss": 0.488, "step": 32695 }, { "epoch": 0.84, "grad_norm": 1.5885355472564697, "learning_rate": 1.3445925230624724e-06, "loss": 0.488, "step": 32696 }, { "epoch": 0.84, "grad_norm": 1.3787933588027954, "learning_rate": 1.344176785370982e-06, "loss": 0.5382, "step": 32697 }, { "epoch": 0.84, "grad_norm": 2.013988971710205, "learning_rate": 1.343761107329984e-06, "loss": 0.4667, "step": 32698 }, { "epoch": 0.84, "grad_norm": 1.2150050401687622, "learning_rate": 1.343345488942349e-06, "loss": 0.564, "step": 32699 }, { "epoch": 0.84, "grad_norm": 1.382483720779419, "learning_rate": 1.3429299302109388e-06, "loss": 0.4824, "step": 32700 }, { "epoch": 0.84, "grad_norm": 2.202951192855835, "learning_rate": 1.3425144311386163e-06, "loss": 0.4476, "step": 32701 }, { "epoch": 0.84, "grad_norm": 5.823421478271484, "learning_rate": 1.3420989917282424e-06, "loss": 0.5753, "step": 32702 }, { "epoch": 0.84, "grad_norm": 1.6736701726913452, "learning_rate": 1.3416836119826848e-06, "loss": 0.6426, "step": 32703 }, { "epoch": 0.84, "grad_norm": 2.0765509605407715, "learning_rate": 1.3412682919048049e-06, "loss": 0.5482, "step": 32704 }, { "epoch": 0.84, "grad_norm": 2.6791579723358154, "learning_rate": 1.340853031497461e-06, "loss": 0.6182, "step": 32705 }, { "epoch": 0.84, "grad_norm": 2.172872543334961, "learning_rate": 1.3404378307635213e-06, "loss": 0.7808, "step": 32706 }, { "epoch": 0.84, "grad_norm": 1.6628504991531372, "learning_rate": 1.3400226897058422e-06, "loss": 0.5127, "step": 32707 }, { "epoch": 0.84, "grad_norm": 1.2010109424591064, "learning_rate": 1.3396076083272869e-06, "loss": 0.5191, "step": 32708 }, { "epoch": 0.84, "grad_norm": 1.403347373008728, "learning_rate": 1.339192586630712e-06, "loss": 0.4204, "step": 32709 }, { "epoch": 0.84, "grad_norm": 1.3039169311523438, "learning_rate": 1.3387776246189832e-06, "loss": 0.5664, "step": 32710 }, { "epoch": 0.84, "grad_norm": 1.545352816581726, "learning_rate": 1.338362722294958e-06, "loss": 0.5966, "step": 32711 }, { "epoch": 0.84, "grad_norm": 1.374632477760315, "learning_rate": 1.337947879661492e-06, "loss": 0.5074, "step": 32712 }, { "epoch": 0.84, "grad_norm": 7.1371941566467285, "learning_rate": 1.3375330967214494e-06, "loss": 0.5635, "step": 32713 }, { "epoch": 0.84, "grad_norm": 1.1436010599136353, "learning_rate": 1.3371183734776871e-06, "loss": 0.5157, "step": 32714 }, { "epoch": 0.84, "grad_norm": 1.0889934301376343, "learning_rate": 1.3367037099330615e-06, "loss": 0.5971, "step": 32715 }, { "epoch": 0.84, "grad_norm": 2.3692469596862793, "learning_rate": 1.3362891060904304e-06, "loss": 0.6745, "step": 32716 }, { "epoch": 0.84, "grad_norm": 1.3969331979751587, "learning_rate": 1.3358745619526526e-06, "loss": 0.6008, "step": 32717 }, { "epoch": 0.84, "grad_norm": 1.6458803415298462, "learning_rate": 1.3354600775225845e-06, "loss": 0.5098, "step": 32718 }, { "epoch": 0.84, "grad_norm": 2.129849910736084, "learning_rate": 1.3350456528030797e-06, "loss": 0.4446, "step": 32719 }, { "epoch": 0.84, "grad_norm": 0.8934644460678101, "learning_rate": 1.334631287796998e-06, "loss": 0.3888, "step": 32720 }, { "epoch": 0.84, "grad_norm": 1.223235845565796, "learning_rate": 1.3342169825071939e-06, "loss": 0.4714, "step": 32721 }, { "epoch": 0.84, "grad_norm": 1.0702378749847412, "learning_rate": 1.3338027369365203e-06, "loss": 0.384, "step": 32722 }, { "epoch": 0.84, "grad_norm": 1.0105643272399902, "learning_rate": 1.3333885510878353e-06, "loss": 0.5139, "step": 32723 }, { "epoch": 0.84, "grad_norm": 3.990713596343994, "learning_rate": 1.3329744249639908e-06, "loss": 0.5824, "step": 32724 }, { "epoch": 0.84, "grad_norm": 3.969473123550415, "learning_rate": 1.3325603585678426e-06, "loss": 0.4763, "step": 32725 }, { "epoch": 0.84, "grad_norm": 1.6922301054000854, "learning_rate": 1.3321463519022405e-06, "loss": 0.5693, "step": 32726 }, { "epoch": 0.84, "grad_norm": 1.0338160991668701, "learning_rate": 1.331732404970042e-06, "loss": 0.4933, "step": 32727 }, { "epoch": 0.84, "grad_norm": 1.533139944076538, "learning_rate": 1.3313185177740973e-06, "loss": 0.502, "step": 32728 }, { "epoch": 0.84, "grad_norm": 1.4301960468292236, "learning_rate": 1.3309046903172574e-06, "loss": 0.4034, "step": 32729 }, { "epoch": 0.84, "grad_norm": 1.4555171728134155, "learning_rate": 1.3304909226023788e-06, "loss": 0.4967, "step": 32730 }, { "epoch": 0.84, "grad_norm": 1.3118754625320435, "learning_rate": 1.3300772146323094e-06, "loss": 0.4749, "step": 32731 }, { "epoch": 0.84, "grad_norm": 1.1991153955459595, "learning_rate": 1.3296635664099012e-06, "loss": 0.5899, "step": 32732 }, { "epoch": 0.84, "grad_norm": 1.838502287864685, "learning_rate": 1.329249977938002e-06, "loss": 0.7373, "step": 32733 }, { "epoch": 0.84, "grad_norm": 2.225048542022705, "learning_rate": 1.328836449219466e-06, "loss": 0.6142, "step": 32734 }, { "epoch": 0.84, "grad_norm": 1.575616478919983, "learning_rate": 1.3284229802571425e-06, "loss": 0.7, "step": 32735 }, { "epoch": 0.84, "grad_norm": 7.596699237823486, "learning_rate": 1.3280095710538776e-06, "loss": 0.3276, "step": 32736 }, { "epoch": 0.84, "grad_norm": 1.6948026418685913, "learning_rate": 1.3275962216125238e-06, "loss": 0.5799, "step": 32737 }, { "epoch": 0.84, "grad_norm": 1.4050304889678955, "learning_rate": 1.3271829319359286e-06, "loss": 0.5237, "step": 32738 }, { "epoch": 0.84, "grad_norm": 1.345720648765564, "learning_rate": 1.3267697020269398e-06, "loss": 0.5075, "step": 32739 }, { "epoch": 0.84, "grad_norm": 2.9348976612091064, "learning_rate": 1.326356531888403e-06, "loss": 0.5322, "step": 32740 }, { "epoch": 0.84, "grad_norm": 1.214942455291748, "learning_rate": 1.3259434215231703e-06, "loss": 0.6192, "step": 32741 }, { "epoch": 0.84, "grad_norm": 1.0854618549346924, "learning_rate": 1.325530370934086e-06, "loss": 0.5096, "step": 32742 }, { "epoch": 0.84, "grad_norm": 1.7205638885498047, "learning_rate": 1.3251173801239935e-06, "loss": 0.6471, "step": 32743 }, { "epoch": 0.84, "grad_norm": 13.634115219116211, "learning_rate": 1.324704449095745e-06, "loss": 0.6217, "step": 32744 }, { "epoch": 0.84, "grad_norm": 1.7062803506851196, "learning_rate": 1.3242915778521825e-06, "loss": 0.5743, "step": 32745 }, { "epoch": 0.84, "grad_norm": 1.1262308359146118, "learning_rate": 1.3238787663961505e-06, "loss": 0.4723, "step": 32746 }, { "epoch": 0.84, "grad_norm": 2.3728015422821045, "learning_rate": 1.3234660147304967e-06, "loss": 0.6091, "step": 32747 }, { "epoch": 0.84, "grad_norm": 1.6772737503051758, "learning_rate": 1.3230533228580634e-06, "loss": 0.4565, "step": 32748 }, { "epoch": 0.84, "grad_norm": 3.104828357696533, "learning_rate": 1.3226406907816958e-06, "loss": 0.4291, "step": 32749 }, { "epoch": 0.84, "grad_norm": 1.9636046886444092, "learning_rate": 1.322228118504235e-06, "loss": 0.5794, "step": 32750 }, { "epoch": 0.84, "grad_norm": 1.735095500946045, "learning_rate": 1.3218156060285281e-06, "loss": 0.6006, "step": 32751 }, { "epoch": 0.84, "grad_norm": 1.839591383934021, "learning_rate": 1.3214031533574156e-06, "loss": 0.4956, "step": 32752 }, { "epoch": 0.84, "grad_norm": 1.8290226459503174, "learning_rate": 1.3209907604937388e-06, "loss": 0.5844, "step": 32753 }, { "epoch": 0.84, "grad_norm": 2.3391001224517822, "learning_rate": 1.3205784274403422e-06, "loss": 0.4038, "step": 32754 }, { "epoch": 0.84, "grad_norm": 1.1000875234603882, "learning_rate": 1.3201661542000666e-06, "loss": 0.4989, "step": 32755 }, { "epoch": 0.84, "grad_norm": 1.5088289976119995, "learning_rate": 1.319753940775752e-06, "loss": 0.4942, "step": 32756 }, { "epoch": 0.84, "grad_norm": 1.2139768600463867, "learning_rate": 1.3193417871702385e-06, "loss": 0.4514, "step": 32757 }, { "epoch": 0.84, "grad_norm": 1.358925700187683, "learning_rate": 1.3189296933863694e-06, "loss": 0.3306, "step": 32758 }, { "epoch": 0.84, "grad_norm": 1.9141063690185547, "learning_rate": 1.3185176594269833e-06, "loss": 0.5447, "step": 32759 }, { "epoch": 0.84, "grad_norm": 1.4202451705932617, "learning_rate": 1.3181056852949158e-06, "loss": 0.5318, "step": 32760 }, { "epoch": 0.84, "grad_norm": 1.5655614137649536, "learning_rate": 1.3176937709930127e-06, "loss": 0.5484, "step": 32761 }, { "epoch": 0.84, "grad_norm": 1.204694151878357, "learning_rate": 1.3172819165241092e-06, "loss": 0.4411, "step": 32762 }, { "epoch": 0.84, "grad_norm": 2.0188636779785156, "learning_rate": 1.3168701218910418e-06, "loss": 0.586, "step": 32763 }, { "epoch": 0.84, "grad_norm": 4.829615116119385, "learning_rate": 1.316458387096652e-06, "loss": 0.453, "step": 32764 }, { "epoch": 0.84, "grad_norm": 1.3372807502746582, "learning_rate": 1.3160467121437758e-06, "loss": 0.7264, "step": 32765 }, { "epoch": 0.84, "grad_norm": 1.0939092636108398, "learning_rate": 1.3156350970352494e-06, "loss": 0.503, "step": 32766 }, { "epoch": 0.84, "grad_norm": 4.58985710144043, "learning_rate": 1.3152235417739078e-06, "loss": 0.6159, "step": 32767 }, { "epoch": 0.84, "grad_norm": 1.3614747524261475, "learning_rate": 1.3148120463625923e-06, "loss": 0.5336, "step": 32768 }, { "epoch": 0.84, "grad_norm": 18.619169235229492, "learning_rate": 1.314400610804134e-06, "loss": 0.6616, "step": 32769 }, { "epoch": 0.84, "grad_norm": 4.678839683532715, "learning_rate": 1.3139892351013694e-06, "loss": 0.5421, "step": 32770 }, { "epoch": 0.84, "grad_norm": 1.5622071027755737, "learning_rate": 1.3135779192571351e-06, "loss": 0.5881, "step": 32771 }, { "epoch": 0.84, "grad_norm": 1.1678202152252197, "learning_rate": 1.3131666632742645e-06, "loss": 0.515, "step": 32772 }, { "epoch": 0.84, "grad_norm": 1.7573468685150146, "learning_rate": 1.312755467155592e-06, "loss": 0.4932, "step": 32773 }, { "epoch": 0.84, "grad_norm": 5.553140163421631, "learning_rate": 1.3123443309039485e-06, "loss": 0.6796, "step": 32774 }, { "epoch": 0.84, "grad_norm": 1.0822595357894897, "learning_rate": 1.311933254522172e-06, "loss": 0.4074, "step": 32775 }, { "epoch": 0.84, "grad_norm": 1.3589777946472168, "learning_rate": 1.3115222380130933e-06, "loss": 0.4563, "step": 32776 }, { "epoch": 0.84, "grad_norm": 1.7753396034240723, "learning_rate": 1.3111112813795413e-06, "loss": 0.6466, "step": 32777 }, { "epoch": 0.84, "grad_norm": 1.7516379356384277, "learning_rate": 1.3107003846243549e-06, "loss": 0.4518, "step": 32778 }, { "epoch": 0.84, "grad_norm": 1.6446549892425537, "learning_rate": 1.3102895477503618e-06, "loss": 0.5865, "step": 32779 }, { "epoch": 0.84, "grad_norm": 4.773373603820801, "learning_rate": 1.3098787707603944e-06, "loss": 0.5554, "step": 32780 }, { "epoch": 0.84, "grad_norm": 4.967352867126465, "learning_rate": 1.3094680536572802e-06, "loss": 0.5005, "step": 32781 }, { "epoch": 0.84, "grad_norm": 1.727380394935608, "learning_rate": 1.3090573964438547e-06, "loss": 0.4997, "step": 32782 }, { "epoch": 0.84, "grad_norm": 1.4626046419143677, "learning_rate": 1.3086467991229446e-06, "loss": 0.6895, "step": 32783 }, { "epoch": 0.84, "grad_norm": 1.3477228879928589, "learning_rate": 1.3082362616973787e-06, "loss": 0.5634, "step": 32784 }, { "epoch": 0.84, "grad_norm": 1.3255748748779297, "learning_rate": 1.3078257841699903e-06, "loss": 0.3409, "step": 32785 }, { "epoch": 0.84, "grad_norm": 2.0107433795928955, "learning_rate": 1.3074153665436052e-06, "loss": 0.605, "step": 32786 }, { "epoch": 0.84, "grad_norm": 1.2483627796173096, "learning_rate": 1.3070050088210507e-06, "loss": 0.4904, "step": 32787 }, { "epoch": 0.84, "grad_norm": 1.7023673057556152, "learning_rate": 1.3065947110051568e-06, "loss": 0.6735, "step": 32788 }, { "epoch": 0.84, "grad_norm": 1.193368673324585, "learning_rate": 1.3061844730987515e-06, "loss": 0.4442, "step": 32789 }, { "epoch": 0.84, "grad_norm": 2.4256153106689453, "learning_rate": 1.3057742951046614e-06, "loss": 0.6965, "step": 32790 }, { "epoch": 0.84, "grad_norm": 1.6392879486083984, "learning_rate": 1.3053641770257098e-06, "loss": 0.5912, "step": 32791 }, { "epoch": 0.84, "grad_norm": 1.0240360498428345, "learning_rate": 1.3049541188647275e-06, "loss": 0.4225, "step": 32792 }, { "epoch": 0.84, "grad_norm": 1.2856512069702148, "learning_rate": 1.3045441206245391e-06, "loss": 0.4722, "step": 32793 }, { "epoch": 0.84, "grad_norm": 5.889585018157959, "learning_rate": 1.3041341823079679e-06, "loss": 0.6641, "step": 32794 }, { "epoch": 0.84, "grad_norm": 2.6130545139312744, "learning_rate": 1.3037243039178416e-06, "loss": 0.6152, "step": 32795 }, { "epoch": 0.84, "grad_norm": 1.2299364805221558, "learning_rate": 1.3033144854569847e-06, "loss": 0.6054, "step": 32796 }, { "epoch": 0.84, "grad_norm": 1.383829116821289, "learning_rate": 1.3029047269282203e-06, "loss": 0.5045, "step": 32797 }, { "epoch": 0.84, "grad_norm": 1.753350019454956, "learning_rate": 1.3024950283343707e-06, "loss": 0.4226, "step": 32798 }, { "epoch": 0.84, "grad_norm": 3.1432876586914062, "learning_rate": 1.3020853896782625e-06, "loss": 0.4295, "step": 32799 }, { "epoch": 0.84, "grad_norm": 3.062922954559326, "learning_rate": 1.301675810962718e-06, "loss": 0.5971, "step": 32800 }, { "epoch": 0.84, "grad_norm": 1.5903570652008057, "learning_rate": 1.3012662921905573e-06, "loss": 0.5453, "step": 32801 }, { "epoch": 0.84, "grad_norm": 8.018484115600586, "learning_rate": 1.3008568333646055e-06, "loss": 0.5509, "step": 32802 }, { "epoch": 0.84, "grad_norm": 1.3883408308029175, "learning_rate": 1.3004474344876828e-06, "loss": 0.4954, "step": 32803 }, { "epoch": 0.84, "grad_norm": 3.66713547706604, "learning_rate": 1.300038095562609e-06, "loss": 0.5798, "step": 32804 }, { "epoch": 0.84, "grad_norm": 1.9766528606414795, "learning_rate": 1.299628816592209e-06, "loss": 0.5845, "step": 32805 }, { "epoch": 0.84, "grad_norm": 1.2153126001358032, "learning_rate": 1.299219597579301e-06, "loss": 0.5422, "step": 32806 }, { "epoch": 0.84, "grad_norm": 4.144504547119141, "learning_rate": 1.2988104385267053e-06, "loss": 0.604, "step": 32807 }, { "epoch": 0.84, "grad_norm": 9.638827323913574, "learning_rate": 1.2984013394372397e-06, "loss": 0.4488, "step": 32808 }, { "epoch": 0.84, "grad_norm": 2.20178484916687, "learning_rate": 1.2979923003137263e-06, "loss": 0.6277, "step": 32809 }, { "epoch": 0.84, "grad_norm": 2.502180814743042, "learning_rate": 1.2975833211589839e-06, "loss": 0.535, "step": 32810 }, { "epoch": 0.84, "grad_norm": 5.622689247131348, "learning_rate": 1.297174401975827e-06, "loss": 0.483, "step": 32811 }, { "epoch": 0.84, "grad_norm": 0.9555889368057251, "learning_rate": 1.296765542767079e-06, "loss": 0.4259, "step": 32812 }, { "epoch": 0.84, "grad_norm": 1.427941083908081, "learning_rate": 1.296356743535554e-06, "loss": 0.5269, "step": 32813 }, { "epoch": 0.84, "grad_norm": 2.3696632385253906, "learning_rate": 1.2959480042840712e-06, "loss": 0.4722, "step": 32814 }, { "epoch": 0.84, "grad_norm": 1.5010319948196411, "learning_rate": 1.2955393250154435e-06, "loss": 0.3993, "step": 32815 }, { "epoch": 0.84, "grad_norm": 1.5005717277526855, "learning_rate": 1.2951307057324935e-06, "loss": 0.5869, "step": 32816 }, { "epoch": 0.84, "grad_norm": 1.4791988134384155, "learning_rate": 1.2947221464380322e-06, "loss": 0.5317, "step": 32817 }, { "epoch": 0.84, "grad_norm": 1.1457656621932983, "learning_rate": 1.2943136471348761e-06, "loss": 0.4079, "step": 32818 }, { "epoch": 0.84, "grad_norm": 2.079355001449585, "learning_rate": 1.2939052078258418e-06, "loss": 0.6208, "step": 32819 }, { "epoch": 0.84, "grad_norm": 1.3396639823913574, "learning_rate": 1.2934968285137438e-06, "loss": 0.6676, "step": 32820 }, { "epoch": 0.84, "grad_norm": 1.4082142114639282, "learning_rate": 1.2930885092013945e-06, "loss": 0.3683, "step": 32821 }, { "epoch": 0.84, "grad_norm": 1.2782936096191406, "learning_rate": 1.292680249891608e-06, "loss": 0.4718, "step": 32822 }, { "epoch": 0.84, "grad_norm": 1.2424125671386719, "learning_rate": 1.292272050587201e-06, "loss": 0.6416, "step": 32823 }, { "epoch": 0.84, "grad_norm": 1.1827348470687866, "learning_rate": 1.2918639112909837e-06, "loss": 0.4943, "step": 32824 }, { "epoch": 0.84, "grad_norm": 1.5172675848007202, "learning_rate": 1.291455832005768e-06, "loss": 0.5737, "step": 32825 }, { "epoch": 0.84, "grad_norm": 1.9817672967910767, "learning_rate": 1.2910478127343685e-06, "loss": 0.5043, "step": 32826 }, { "epoch": 0.84, "grad_norm": 1.0510971546173096, "learning_rate": 1.290639853479596e-06, "loss": 0.4417, "step": 32827 }, { "epoch": 0.84, "grad_norm": 1.564741849899292, "learning_rate": 1.290231954244261e-06, "loss": 0.5441, "step": 32828 }, { "epoch": 0.84, "grad_norm": 7.826167583465576, "learning_rate": 1.2898241150311785e-06, "loss": 0.5706, "step": 32829 }, { "epoch": 0.84, "grad_norm": 1.7808047533035278, "learning_rate": 1.2894163358431543e-06, "loss": 0.518, "step": 32830 }, { "epoch": 0.84, "grad_norm": 1.2940844297409058, "learning_rate": 1.2890086166829974e-06, "loss": 0.5425, "step": 32831 }, { "epoch": 0.84, "grad_norm": 1.5809448957443237, "learning_rate": 1.288600957553523e-06, "loss": 0.5943, "step": 32832 }, { "epoch": 0.84, "grad_norm": 1.634362816810608, "learning_rate": 1.2881933584575378e-06, "loss": 0.5724, "step": 32833 }, { "epoch": 0.84, "grad_norm": 1.2302324771881104, "learning_rate": 1.2877858193978487e-06, "loss": 0.5481, "step": 32834 }, { "epoch": 0.84, "grad_norm": 4.906038284301758, "learning_rate": 1.2873783403772678e-06, "loss": 0.6359, "step": 32835 }, { "epoch": 0.84, "grad_norm": 1.0044124126434326, "learning_rate": 1.2869709213986025e-06, "loss": 0.3919, "step": 32836 }, { "epoch": 0.84, "grad_norm": 3.328575611114502, "learning_rate": 1.2865635624646588e-06, "loss": 0.5578, "step": 32837 }, { "epoch": 0.84, "grad_norm": 9.107364654541016, "learning_rate": 1.286156263578242e-06, "loss": 0.5105, "step": 32838 }, { "epoch": 0.84, "grad_norm": 1.880122184753418, "learning_rate": 1.2857490247421645e-06, "loss": 0.6112, "step": 32839 }, { "epoch": 0.84, "grad_norm": 1.3425997495651245, "learning_rate": 1.2853418459592304e-06, "loss": 0.5985, "step": 32840 }, { "epoch": 0.84, "grad_norm": 1.18894362449646, "learning_rate": 1.284934727232242e-06, "loss": 0.4465, "step": 32841 }, { "epoch": 0.84, "grad_norm": 1.0746691226959229, "learning_rate": 1.2845276685640106e-06, "loss": 0.459, "step": 32842 }, { "epoch": 0.84, "grad_norm": 1.3707220554351807, "learning_rate": 1.2841206699573393e-06, "loss": 0.5959, "step": 32843 }, { "epoch": 0.84, "grad_norm": 1.5207314491271973, "learning_rate": 1.2837137314150328e-06, "loss": 0.4904, "step": 32844 }, { "epoch": 0.84, "grad_norm": 1.388690710067749, "learning_rate": 1.2833068529398929e-06, "loss": 0.4442, "step": 32845 }, { "epoch": 0.84, "grad_norm": 1.6491048336029053, "learning_rate": 1.2829000345347276e-06, "loss": 0.5056, "step": 32846 }, { "epoch": 0.84, "grad_norm": 0.9698295593261719, "learning_rate": 1.282493276202338e-06, "loss": 0.5074, "step": 32847 }, { "epoch": 0.84, "grad_norm": 1.204129695892334, "learning_rate": 1.282086577945526e-06, "loss": 0.4925, "step": 32848 }, { "epoch": 0.84, "grad_norm": 4.591012477874756, "learning_rate": 1.2816799397670988e-06, "loss": 0.8143, "step": 32849 }, { "epoch": 0.84, "grad_norm": 9.275474548339844, "learning_rate": 1.2812733616698558e-06, "loss": 0.5442, "step": 32850 }, { "epoch": 0.84, "grad_norm": 1.47030770778656, "learning_rate": 1.2808668436565996e-06, "loss": 0.5935, "step": 32851 }, { "epoch": 0.84, "grad_norm": 1.4656288623809814, "learning_rate": 1.2804603857301289e-06, "loss": 0.5047, "step": 32852 }, { "epoch": 0.84, "grad_norm": 1.2618417739868164, "learning_rate": 1.2800539878932495e-06, "loss": 0.5255, "step": 32853 }, { "epoch": 0.84, "grad_norm": 4.453989028930664, "learning_rate": 1.2796476501487587e-06, "loss": 0.7177, "step": 32854 }, { "epoch": 0.84, "grad_norm": 1.72728431224823, "learning_rate": 1.2792413724994556e-06, "loss": 0.6196, "step": 32855 }, { "epoch": 0.84, "grad_norm": 1.3910491466522217, "learning_rate": 1.2788351549481447e-06, "loss": 0.5827, "step": 32856 }, { "epoch": 0.84, "grad_norm": 1.6517330408096313, "learning_rate": 1.2784289974976227e-06, "loss": 0.5448, "step": 32857 }, { "epoch": 0.84, "grad_norm": 1.7250654697418213, "learning_rate": 1.278022900150686e-06, "loss": 0.5705, "step": 32858 }, { "epoch": 0.84, "grad_norm": 4.583330154418945, "learning_rate": 1.2776168629101382e-06, "loss": 0.5993, "step": 32859 }, { "epoch": 0.84, "grad_norm": 1.1914476156234741, "learning_rate": 1.2772108857787757e-06, "loss": 0.4704, "step": 32860 }, { "epoch": 0.84, "grad_norm": 1.266002893447876, "learning_rate": 1.2768049687593953e-06, "loss": 0.5422, "step": 32861 }, { "epoch": 0.84, "grad_norm": 1.5161247253417969, "learning_rate": 1.2763991118547914e-06, "loss": 0.4533, "step": 32862 }, { "epoch": 0.84, "grad_norm": 1.6373451948165894, "learning_rate": 1.2759933150677672e-06, "loss": 0.6174, "step": 32863 }, { "epoch": 0.84, "grad_norm": 1.0936247110366821, "learning_rate": 1.2755875784011162e-06, "loss": 0.5567, "step": 32864 }, { "epoch": 0.84, "grad_norm": 1.449214220046997, "learning_rate": 1.2751819018576316e-06, "loss": 0.6308, "step": 32865 }, { "epoch": 0.84, "grad_norm": 1.2068341970443726, "learning_rate": 1.2747762854401146e-06, "loss": 0.5515, "step": 32866 }, { "epoch": 0.84, "grad_norm": 1.6364365816116333, "learning_rate": 1.2743707291513574e-06, "loss": 0.4698, "step": 32867 }, { "epoch": 0.84, "grad_norm": 2.0384299755096436, "learning_rate": 1.2739652329941555e-06, "loss": 0.4916, "step": 32868 }, { "epoch": 0.84, "grad_norm": 2.483480930328369, "learning_rate": 1.273559796971301e-06, "loss": 0.6708, "step": 32869 }, { "epoch": 0.84, "grad_norm": 2.739180326461792, "learning_rate": 1.2731544210855917e-06, "loss": 0.5543, "step": 32870 }, { "epoch": 0.84, "grad_norm": 6.501052379608154, "learning_rate": 1.2727491053398189e-06, "loss": 0.4042, "step": 32871 }, { "epoch": 0.84, "grad_norm": 4.2677764892578125, "learning_rate": 1.2723438497367747e-06, "loss": 0.5557, "step": 32872 }, { "epoch": 0.84, "grad_norm": 1.0908344984054565, "learning_rate": 1.2719386542792556e-06, "loss": 0.5074, "step": 32873 }, { "epoch": 0.84, "grad_norm": 19.2103328704834, "learning_rate": 1.2715335189700516e-06, "loss": 0.6929, "step": 32874 }, { "epoch": 0.84, "grad_norm": 1.0233662128448486, "learning_rate": 1.2711284438119553e-06, "loss": 0.4916, "step": 32875 }, { "epoch": 0.84, "grad_norm": 1.3766939640045166, "learning_rate": 1.2707234288077552e-06, "loss": 0.6363, "step": 32876 }, { "epoch": 0.84, "grad_norm": 1.484257459640503, "learning_rate": 1.270318473960247e-06, "loss": 0.5239, "step": 32877 }, { "epoch": 0.84, "grad_norm": 1.9303032159805298, "learning_rate": 1.2699135792722194e-06, "loss": 0.6845, "step": 32878 }, { "epoch": 0.84, "grad_norm": 3.111978769302368, "learning_rate": 1.2695087447464605e-06, "loss": 0.5244, "step": 32879 }, { "epoch": 0.84, "grad_norm": 2.654144763946533, "learning_rate": 1.2691039703857645e-06, "loss": 0.6808, "step": 32880 }, { "epoch": 0.84, "grad_norm": 1.1061040163040161, "learning_rate": 1.268699256192918e-06, "loss": 0.3678, "step": 32881 }, { "epoch": 0.84, "grad_norm": 5.525635242462158, "learning_rate": 1.2682946021707099e-06, "loss": 0.7353, "step": 32882 }, { "epoch": 0.84, "grad_norm": 1.6409305334091187, "learning_rate": 1.2678900083219303e-06, "loss": 0.4931, "step": 32883 }, { "epoch": 0.84, "grad_norm": 1.4172773361206055, "learning_rate": 1.267485474649368e-06, "loss": 0.6104, "step": 32884 }, { "epoch": 0.84, "grad_norm": 1.8745301961898804, "learning_rate": 1.2670810011558089e-06, "loss": 0.553, "step": 32885 }, { "epoch": 0.84, "grad_norm": 3.026930809020996, "learning_rate": 1.266676587844039e-06, "loss": 0.5769, "step": 32886 }, { "epoch": 0.84, "grad_norm": 6.33202600479126, "learning_rate": 1.2662722347168488e-06, "loss": 0.6829, "step": 32887 }, { "epoch": 0.84, "grad_norm": 1.633193016052246, "learning_rate": 1.2658679417770237e-06, "loss": 0.4671, "step": 32888 }, { "epoch": 0.84, "grad_norm": 0.8607868552207947, "learning_rate": 1.2654637090273481e-06, "loss": 0.5268, "step": 32889 }, { "epoch": 0.84, "grad_norm": 1.5122066736221313, "learning_rate": 1.2650595364706097e-06, "loss": 0.529, "step": 32890 }, { "epoch": 0.84, "grad_norm": 1.1790318489074707, "learning_rate": 1.2646554241095942e-06, "loss": 0.4184, "step": 32891 }, { "epoch": 0.84, "grad_norm": 3.887972116470337, "learning_rate": 1.2642513719470851e-06, "loss": 0.7398, "step": 32892 }, { "epoch": 0.84, "grad_norm": 1.3442540168762207, "learning_rate": 1.2638473799858654e-06, "loss": 0.41, "step": 32893 }, { "epoch": 0.84, "grad_norm": 1.374745488166809, "learning_rate": 1.2634434482287227e-06, "loss": 0.6004, "step": 32894 }, { "epoch": 0.84, "grad_norm": 1.6197590827941895, "learning_rate": 1.2630395766784387e-06, "loss": 0.5281, "step": 32895 }, { "epoch": 0.84, "grad_norm": 3.179168939590454, "learning_rate": 1.2626357653377953e-06, "loss": 0.6441, "step": 32896 }, { "epoch": 0.84, "grad_norm": 1.7566536664962769, "learning_rate": 1.2622320142095789e-06, "loss": 0.453, "step": 32897 }, { "epoch": 0.84, "grad_norm": 1.2998597621917725, "learning_rate": 1.261828323296569e-06, "loss": 0.4415, "step": 32898 }, { "epoch": 0.84, "grad_norm": 1.6896001100540161, "learning_rate": 1.2614246926015472e-06, "loss": 0.4389, "step": 32899 }, { "epoch": 0.84, "grad_norm": 1.0327762365341187, "learning_rate": 1.2610211221272983e-06, "loss": 0.4491, "step": 32900 }, { "epoch": 0.84, "grad_norm": 1.710434079170227, "learning_rate": 1.260617611876601e-06, "loss": 0.6011, "step": 32901 }, { "epoch": 0.84, "grad_norm": 11.037317276000977, "learning_rate": 1.2602141618522367e-06, "loss": 0.4939, "step": 32902 }, { "epoch": 0.84, "grad_norm": 1.3007762432098389, "learning_rate": 1.259810772056984e-06, "loss": 0.3856, "step": 32903 }, { "epoch": 0.84, "grad_norm": 2.255474328994751, "learning_rate": 1.2594074424936265e-06, "loss": 0.61, "step": 32904 }, { "epoch": 0.84, "grad_norm": 1.1704943180084229, "learning_rate": 1.2590041731649405e-06, "loss": 0.6331, "step": 32905 }, { "epoch": 0.84, "grad_norm": 1.6553013324737549, "learning_rate": 1.258600964073704e-06, "loss": 0.5902, "step": 32906 }, { "epoch": 0.84, "grad_norm": 1.366042971611023, "learning_rate": 1.2581978152227014e-06, "loss": 0.4922, "step": 32907 }, { "epoch": 0.84, "grad_norm": 1.801957130432129, "learning_rate": 1.2577947266147062e-06, "loss": 0.5784, "step": 32908 }, { "epoch": 0.84, "grad_norm": 2.042379856109619, "learning_rate": 1.257391698252497e-06, "loss": 0.5591, "step": 32909 }, { "epoch": 0.84, "grad_norm": 1.759383201599121, "learning_rate": 1.2569887301388505e-06, "loss": 0.467, "step": 32910 }, { "epoch": 0.84, "grad_norm": 1.5685639381408691, "learning_rate": 1.2565858222765459e-06, "loss": 0.5161, "step": 32911 }, { "epoch": 0.84, "grad_norm": 1.821890115737915, "learning_rate": 1.2561829746683596e-06, "loss": 0.4818, "step": 32912 }, { "epoch": 0.84, "grad_norm": 1.5550698041915894, "learning_rate": 1.2557801873170638e-06, "loss": 0.4882, "step": 32913 }, { "epoch": 0.84, "grad_norm": 2.422900915145874, "learning_rate": 1.2553774602254398e-06, "loss": 0.4914, "step": 32914 }, { "epoch": 0.84, "grad_norm": 3.6321730613708496, "learning_rate": 1.2549747933962608e-06, "loss": 0.5415, "step": 32915 }, { "epoch": 0.84, "grad_norm": 1.2247883081436157, "learning_rate": 1.2545721868323013e-06, "loss": 0.4779, "step": 32916 }, { "epoch": 0.84, "grad_norm": 11.363627433776855, "learning_rate": 1.2541696405363334e-06, "loss": 0.6474, "step": 32917 }, { "epoch": 0.84, "grad_norm": 1.62269926071167, "learning_rate": 1.253767154511135e-06, "loss": 0.5435, "step": 32918 }, { "epoch": 0.84, "grad_norm": 1.7555705308914185, "learning_rate": 1.2533647287594797e-06, "loss": 0.5695, "step": 32919 }, { "epoch": 0.84, "grad_norm": 1.482200264930725, "learning_rate": 1.252962363284137e-06, "loss": 0.3922, "step": 32920 }, { "epoch": 0.84, "grad_norm": 13.919312477111816, "learning_rate": 1.252560058087885e-06, "loss": 0.5759, "step": 32921 }, { "epoch": 0.84, "grad_norm": 1.652935266494751, "learning_rate": 1.2521578131734925e-06, "loss": 0.5617, "step": 32922 }, { "epoch": 0.84, "grad_norm": 0.81863933801651, "learning_rate": 1.2517556285437304e-06, "loss": 0.4183, "step": 32923 }, { "epoch": 0.84, "grad_norm": 1.1746735572814941, "learning_rate": 1.2513535042013748e-06, "loss": 0.3295, "step": 32924 }, { "epoch": 0.84, "grad_norm": 1.317643642425537, "learning_rate": 1.250951440149194e-06, "loss": 0.5424, "step": 32925 }, { "epoch": 0.84, "grad_norm": 1.0701916217803955, "learning_rate": 1.2505494363899595e-06, "loss": 0.4436, "step": 32926 }, { "epoch": 0.84, "grad_norm": 1.5352904796600342, "learning_rate": 1.250147492926439e-06, "loss": 0.4374, "step": 32927 }, { "epoch": 0.84, "grad_norm": 7.297489166259766, "learning_rate": 1.2497456097614068e-06, "loss": 0.7182, "step": 32928 }, { "epoch": 0.84, "grad_norm": 2.6416776180267334, "learning_rate": 1.249343786897631e-06, "loss": 0.5901, "step": 32929 }, { "epoch": 0.84, "grad_norm": 2.0622124671936035, "learning_rate": 1.248942024337878e-06, "loss": 0.6738, "step": 32930 }, { "epoch": 0.84, "grad_norm": 1.517462134361267, "learning_rate": 1.24854032208492e-06, "loss": 0.6063, "step": 32931 }, { "epoch": 0.84, "grad_norm": 1.8725619316101074, "learning_rate": 1.248138680141524e-06, "loss": 0.5965, "step": 32932 }, { "epoch": 0.84, "grad_norm": 1.396625280380249, "learning_rate": 1.2477370985104587e-06, "loss": 0.4813, "step": 32933 }, { "epoch": 0.84, "grad_norm": 4.626124382019043, "learning_rate": 1.2473355771944873e-06, "loss": 0.6044, "step": 32934 }, { "epoch": 0.84, "grad_norm": 1.7235246896743774, "learning_rate": 1.2469341161963833e-06, "loss": 0.4208, "step": 32935 }, { "epoch": 0.84, "grad_norm": 1.3185780048370361, "learning_rate": 1.24653271551891e-06, "loss": 0.3924, "step": 32936 }, { "epoch": 0.84, "grad_norm": 1.3656740188598633, "learning_rate": 1.2461313751648307e-06, "loss": 0.3035, "step": 32937 }, { "epoch": 0.84, "grad_norm": 2.224886178970337, "learning_rate": 1.2457300951369167e-06, "loss": 0.5069, "step": 32938 }, { "epoch": 0.84, "grad_norm": 1.108684778213501, "learning_rate": 1.245328875437931e-06, "loss": 0.4376, "step": 32939 }, { "epoch": 0.84, "grad_norm": 2.5161306858062744, "learning_rate": 1.2449277160706374e-06, "loss": 0.7075, "step": 32940 }, { "epoch": 0.84, "grad_norm": 1.530242681503296, "learning_rate": 1.2445266170378022e-06, "loss": 0.5979, "step": 32941 }, { "epoch": 0.84, "grad_norm": 1.2582085132598877, "learning_rate": 1.2441255783421891e-06, "loss": 0.5334, "step": 32942 }, { "epoch": 0.84, "grad_norm": 1.5486149787902832, "learning_rate": 1.2437245999865622e-06, "loss": 0.4159, "step": 32943 }, { "epoch": 0.84, "grad_norm": 2.1715688705444336, "learning_rate": 1.2433236819736816e-06, "loss": 0.6662, "step": 32944 }, { "epoch": 0.84, "grad_norm": 1.1464179754257202, "learning_rate": 1.2429228243063153e-06, "loss": 0.48, "step": 32945 }, { "epoch": 0.84, "grad_norm": 1.42319655418396, "learning_rate": 1.2425220269872218e-06, "loss": 0.5257, "step": 32946 }, { "epoch": 0.84, "grad_norm": 1.0939722061157227, "learning_rate": 1.2421212900191638e-06, "loss": 0.5212, "step": 32947 }, { "epoch": 0.84, "grad_norm": 1.3707572221755981, "learning_rate": 1.2417206134049053e-06, "loss": 0.441, "step": 32948 }, { "epoch": 0.84, "grad_norm": 1.9550576210021973, "learning_rate": 1.2413199971472057e-06, "loss": 0.4756, "step": 32949 }, { "epoch": 0.84, "grad_norm": 1.3592780828475952, "learning_rate": 1.2409194412488256e-06, "loss": 0.4453, "step": 32950 }, { "epoch": 0.84, "grad_norm": 1.1829606294631958, "learning_rate": 1.2405189457125244e-06, "loss": 0.5399, "step": 32951 }, { "epoch": 0.84, "grad_norm": 3.8012757301330566, "learning_rate": 1.2401185105410652e-06, "loss": 0.6115, "step": 32952 }, { "epoch": 0.84, "grad_norm": 1.2295403480529785, "learning_rate": 1.2397181357372056e-06, "loss": 0.438, "step": 32953 }, { "epoch": 0.84, "grad_norm": 1.5790700912475586, "learning_rate": 1.2393178213037027e-06, "loss": 0.485, "step": 32954 }, { "epoch": 0.84, "grad_norm": 4.031060695648193, "learning_rate": 1.2389175672433195e-06, "loss": 0.6777, "step": 32955 }, { "epoch": 0.84, "grad_norm": 4.876664161682129, "learning_rate": 1.2385173735588119e-06, "loss": 0.5525, "step": 32956 }, { "epoch": 0.84, "grad_norm": 4.926121234893799, "learning_rate": 1.2381172402529384e-06, "loss": 0.5565, "step": 32957 }, { "epoch": 0.84, "grad_norm": 4.8616228103637695, "learning_rate": 1.237717167328455e-06, "loss": 0.3576, "step": 32958 }, { "epoch": 0.84, "grad_norm": 4.23504638671875, "learning_rate": 1.2373171547881203e-06, "loss": 0.5907, "step": 32959 }, { "epoch": 0.84, "grad_norm": 3.0027260780334473, "learning_rate": 1.236917202634691e-06, "loss": 0.6313, "step": 32960 }, { "epoch": 0.84, "grad_norm": 1.2022209167480469, "learning_rate": 1.2365173108709217e-06, "loss": 0.4191, "step": 32961 }, { "epoch": 0.84, "grad_norm": 1.318657398223877, "learning_rate": 1.23611747949957e-06, "loss": 0.5652, "step": 32962 }, { "epoch": 0.84, "grad_norm": 3.163703441619873, "learning_rate": 1.2357177085233918e-06, "loss": 0.4419, "step": 32963 }, { "epoch": 0.84, "grad_norm": 1.3512213230133057, "learning_rate": 1.235317997945138e-06, "loss": 0.4405, "step": 32964 }, { "epoch": 0.84, "grad_norm": 1.0524005889892578, "learning_rate": 1.2349183477675685e-06, "loss": 0.6047, "step": 32965 }, { "epoch": 0.84, "grad_norm": 1.0679612159729004, "learning_rate": 1.2345187579934347e-06, "loss": 0.4201, "step": 32966 }, { "epoch": 0.84, "grad_norm": 3.088080883026123, "learning_rate": 1.234119228625491e-06, "loss": 0.6046, "step": 32967 }, { "epoch": 0.84, "grad_norm": 1.5253002643585205, "learning_rate": 1.2337197596664884e-06, "loss": 0.7174, "step": 32968 }, { "epoch": 0.85, "grad_norm": 2.868622064590454, "learning_rate": 1.2333203511191828e-06, "loss": 0.6176, "step": 32969 }, { "epoch": 0.85, "grad_norm": 2.378782272338867, "learning_rate": 1.232921002986327e-06, "loss": 0.6985, "step": 32970 }, { "epoch": 0.85, "grad_norm": 2.75878643989563, "learning_rate": 1.232521715270668e-06, "loss": 0.5689, "step": 32971 }, { "epoch": 0.85, "grad_norm": 5.178917407989502, "learning_rate": 1.232122487974964e-06, "loss": 0.619, "step": 32972 }, { "epoch": 0.85, "grad_norm": 1.455483078956604, "learning_rate": 1.231723321101963e-06, "loss": 0.5718, "step": 32973 }, { "epoch": 0.85, "grad_norm": 10.549903869628906, "learning_rate": 1.231324214654417e-06, "loss": 0.696, "step": 32974 }, { "epoch": 0.85, "grad_norm": 1.6745766401290894, "learning_rate": 1.230925168635072e-06, "loss": 0.3932, "step": 32975 }, { "epoch": 0.85, "grad_norm": 1.625123381614685, "learning_rate": 1.2305261830466852e-06, "loss": 0.535, "step": 32976 }, { "epoch": 0.85, "grad_norm": 1.629833459854126, "learning_rate": 1.2301272578920008e-06, "loss": 0.3823, "step": 32977 }, { "epoch": 0.85, "grad_norm": 2.1388416290283203, "learning_rate": 1.229728393173768e-06, "loss": 0.4418, "step": 32978 }, { "epoch": 0.85, "grad_norm": 1.0506515502929688, "learning_rate": 1.22932958889474e-06, "loss": 0.6523, "step": 32979 }, { "epoch": 0.85, "grad_norm": 1.7697042226791382, "learning_rate": 1.228930845057661e-06, "loss": 0.5555, "step": 32980 }, { "epoch": 0.85, "grad_norm": 1.2278395891189575, "learning_rate": 1.228532161665279e-06, "loss": 0.3616, "step": 32981 }, { "epoch": 0.85, "grad_norm": 2.2990736961364746, "learning_rate": 1.2281335387203442e-06, "loss": 0.5661, "step": 32982 }, { "epoch": 0.85, "grad_norm": 1.549134612083435, "learning_rate": 1.2277349762256019e-06, "loss": 0.4317, "step": 32983 }, { "epoch": 0.85, "grad_norm": 1.6726536750793457, "learning_rate": 1.227336474183799e-06, "loss": 0.5446, "step": 32984 }, { "epoch": 0.85, "grad_norm": 4.088535785675049, "learning_rate": 1.2269380325976787e-06, "loss": 0.3172, "step": 32985 }, { "epoch": 0.85, "grad_norm": 3.331021308898926, "learning_rate": 1.2265396514699934e-06, "loss": 0.5208, "step": 32986 }, { "epoch": 0.85, "grad_norm": 1.267512321472168, "learning_rate": 1.2261413308034842e-06, "loss": 0.4964, "step": 32987 }, { "epoch": 0.85, "grad_norm": 1.8874447345733643, "learning_rate": 1.2257430706008944e-06, "loss": 0.6281, "step": 32988 }, { "epoch": 0.85, "grad_norm": 1.198303461074829, "learning_rate": 1.225344870864973e-06, "loss": 0.3806, "step": 32989 }, { "epoch": 0.85, "grad_norm": 1.240041732788086, "learning_rate": 1.2249467315984609e-06, "loss": 0.4976, "step": 32990 }, { "epoch": 0.85, "grad_norm": 1.2918381690979004, "learning_rate": 1.224548652804104e-06, "loss": 0.5606, "step": 32991 }, { "epoch": 0.85, "grad_norm": 1.9619534015655518, "learning_rate": 1.2241506344846421e-06, "loss": 0.5925, "step": 32992 }, { "epoch": 0.85, "grad_norm": 1.3416051864624023, "learning_rate": 1.2237526766428232e-06, "loss": 0.5347, "step": 32993 }, { "epoch": 0.85, "grad_norm": 1.4201990365982056, "learning_rate": 1.223354779281386e-06, "loss": 0.5539, "step": 32994 }, { "epoch": 0.85, "grad_norm": 1.2166433334350586, "learning_rate": 1.222956942403073e-06, "loss": 0.493, "step": 32995 }, { "epoch": 0.85, "grad_norm": 2.0827202796936035, "learning_rate": 1.2225591660106274e-06, "loss": 0.4276, "step": 32996 }, { "epoch": 0.85, "grad_norm": 1.2220293283462524, "learning_rate": 1.2221614501067902e-06, "loss": 0.3977, "step": 32997 }, { "epoch": 0.85, "grad_norm": 4.310308933258057, "learning_rate": 1.2217637946943007e-06, "loss": 0.6734, "step": 32998 }, { "epoch": 0.85, "grad_norm": 0.9852221012115479, "learning_rate": 1.2213661997758984e-06, "loss": 0.3993, "step": 32999 }, { "epoch": 0.85, "grad_norm": 4.306149005889893, "learning_rate": 1.2209686653543274e-06, "loss": 0.4173, "step": 33000 }, { "epoch": 0.85, "grad_norm": 1.354614019393921, "learning_rate": 1.2205711914323237e-06, "loss": 0.4612, "step": 33001 }, { "epoch": 0.85, "grad_norm": 1.3256072998046875, "learning_rate": 1.2201737780126267e-06, "loss": 0.4951, "step": 33002 }, { "epoch": 0.85, "grad_norm": 5.317527770996094, "learning_rate": 1.2197764250979783e-06, "loss": 0.645, "step": 33003 }, { "epoch": 0.85, "grad_norm": 1.8231617212295532, "learning_rate": 1.219379132691113e-06, "loss": 0.4962, "step": 33004 }, { "epoch": 0.85, "grad_norm": 1.847029209136963, "learning_rate": 1.218981900794769e-06, "loss": 0.551, "step": 33005 }, { "epoch": 0.85, "grad_norm": 2.561540365219116, "learning_rate": 1.218584729411687e-06, "loss": 0.6161, "step": 33006 }, { "epoch": 0.85, "grad_norm": 1.7394537925720215, "learning_rate": 1.2181876185446028e-06, "loss": 0.5408, "step": 33007 }, { "epoch": 0.85, "grad_norm": 2.317915916442871, "learning_rate": 1.2177905681962509e-06, "loss": 0.7202, "step": 33008 }, { "epoch": 0.85, "grad_norm": 1.1228103637695312, "learning_rate": 1.217393578369368e-06, "loss": 0.4578, "step": 33009 }, { "epoch": 0.85, "grad_norm": 1.5215392112731934, "learning_rate": 1.2169966490666917e-06, "loss": 0.405, "step": 33010 }, { "epoch": 0.85, "grad_norm": 1.6097667217254639, "learning_rate": 1.216599780290958e-06, "loss": 0.4205, "step": 33011 }, { "epoch": 0.85, "grad_norm": 1.2731529474258423, "learning_rate": 1.2162029720448964e-06, "loss": 0.6524, "step": 33012 }, { "epoch": 0.85, "grad_norm": 8.714239120483398, "learning_rate": 1.2158062243312485e-06, "loss": 0.4593, "step": 33013 }, { "epoch": 0.85, "grad_norm": 1.9510396718978882, "learning_rate": 1.2154095371527453e-06, "loss": 0.4284, "step": 33014 }, { "epoch": 0.85, "grad_norm": 1.2358003854751587, "learning_rate": 1.215012910512121e-06, "loss": 0.4694, "step": 33015 }, { "epoch": 0.85, "grad_norm": 1.5404623746871948, "learning_rate": 1.214616344412105e-06, "loss": 0.3332, "step": 33016 }, { "epoch": 0.85, "grad_norm": 1.3186949491500854, "learning_rate": 1.2142198388554371e-06, "loss": 0.5471, "step": 33017 }, { "epoch": 0.85, "grad_norm": 1.947967290878296, "learning_rate": 1.2138233938448463e-06, "loss": 0.5688, "step": 33018 }, { "epoch": 0.85, "grad_norm": 1.3886052370071411, "learning_rate": 1.2134270093830613e-06, "loss": 0.3284, "step": 33019 }, { "epoch": 0.85, "grad_norm": 1.4656795263290405, "learning_rate": 1.2130306854728202e-06, "loss": 0.4405, "step": 33020 }, { "epoch": 0.85, "grad_norm": 1.7243891954421997, "learning_rate": 1.2126344221168507e-06, "loss": 0.455, "step": 33021 }, { "epoch": 0.85, "grad_norm": 1.1980903148651123, "learning_rate": 1.2122382193178816e-06, "loss": 0.3917, "step": 33022 }, { "epoch": 0.85, "grad_norm": 1.2455857992172241, "learning_rate": 1.2118420770786488e-06, "loss": 0.5254, "step": 33023 }, { "epoch": 0.85, "grad_norm": 1.3420085906982422, "learning_rate": 1.2114459954018786e-06, "loss": 0.442, "step": 33024 }, { "epoch": 0.85, "grad_norm": 1.1246861219406128, "learning_rate": 1.2110499742903004e-06, "loss": 0.2691, "step": 33025 }, { "epoch": 0.85, "grad_norm": 1.2566481828689575, "learning_rate": 1.2106540137466427e-06, "loss": 0.5334, "step": 33026 }, { "epoch": 0.85, "grad_norm": 1.9852923154830933, "learning_rate": 1.210258113773638e-06, "loss": 0.5916, "step": 33027 }, { "epoch": 0.85, "grad_norm": 0.9157333374023438, "learning_rate": 1.2098622743740118e-06, "loss": 0.5407, "step": 33028 }, { "epoch": 0.85, "grad_norm": 1.4949947595596313, "learning_rate": 1.2094664955504898e-06, "loss": 0.558, "step": 33029 }, { "epoch": 0.85, "grad_norm": 1.3060731887817383, "learning_rate": 1.209070777305804e-06, "loss": 0.5711, "step": 33030 }, { "epoch": 0.85, "grad_norm": 1.2386817932128906, "learning_rate": 1.2086751196426793e-06, "loss": 0.5251, "step": 33031 }, { "epoch": 0.85, "grad_norm": 2.1983258724212646, "learning_rate": 1.2082795225638433e-06, "loss": 0.5861, "step": 33032 }, { "epoch": 0.85, "grad_norm": 3.795825481414795, "learning_rate": 1.2078839860720182e-06, "loss": 0.4796, "step": 33033 }, { "epoch": 0.85, "grad_norm": 1.259030818939209, "learning_rate": 1.2074885101699363e-06, "loss": 0.4016, "step": 33034 }, { "epoch": 0.85, "grad_norm": 6.557039260864258, "learning_rate": 1.2070930948603189e-06, "loss": 0.5034, "step": 33035 }, { "epoch": 0.85, "grad_norm": 1.1960192918777466, "learning_rate": 1.206697740145889e-06, "loss": 0.5758, "step": 33036 }, { "epoch": 0.85, "grad_norm": 0.9664322733879089, "learning_rate": 1.206302446029376e-06, "loss": 0.4645, "step": 33037 }, { "epoch": 0.85, "grad_norm": 1.2377891540527344, "learning_rate": 1.205907212513503e-06, "loss": 0.5186, "step": 33038 }, { "epoch": 0.85, "grad_norm": 1.3854963779449463, "learning_rate": 1.2055120396009911e-06, "loss": 0.5138, "step": 33039 }, { "epoch": 0.85, "grad_norm": 2.4710443019866943, "learning_rate": 1.205116927294563e-06, "loss": 0.6598, "step": 33040 }, { "epoch": 0.85, "grad_norm": 1.3153027296066284, "learning_rate": 1.204721875596947e-06, "loss": 0.4106, "step": 33041 }, { "epoch": 0.85, "grad_norm": 1.5625003576278687, "learning_rate": 1.2043268845108601e-06, "loss": 0.5525, "step": 33042 }, { "epoch": 0.85, "grad_norm": 1.3855706453323364, "learning_rate": 1.2039319540390259e-06, "loss": 0.3772, "step": 33043 }, { "epoch": 0.85, "grad_norm": 1.3805313110351562, "learning_rate": 1.2035370841841677e-06, "loss": 0.5399, "step": 33044 }, { "epoch": 0.85, "grad_norm": 10.615254402160645, "learning_rate": 1.2031422749490062e-06, "loss": 0.3544, "step": 33045 }, { "epoch": 0.85, "grad_norm": 2.8283841609954834, "learning_rate": 1.2027475263362587e-06, "loss": 0.5549, "step": 33046 }, { "epoch": 0.85, "grad_norm": 1.1808176040649414, "learning_rate": 1.2023528383486506e-06, "loss": 0.4916, "step": 33047 }, { "epoch": 0.85, "grad_norm": 3.985881805419922, "learning_rate": 1.2019582109888994e-06, "loss": 0.6215, "step": 33048 }, { "epoch": 0.85, "grad_norm": 1.4925506114959717, "learning_rate": 1.2015636442597246e-06, "loss": 0.5737, "step": 33049 }, { "epoch": 0.85, "grad_norm": 2.1787800788879395, "learning_rate": 1.2011691381638436e-06, "loss": 0.5594, "step": 33050 }, { "epoch": 0.85, "grad_norm": 1.0442365407943726, "learning_rate": 1.2007746927039787e-06, "loss": 0.51, "step": 33051 }, { "epoch": 0.85, "grad_norm": 1.4949151277542114, "learning_rate": 1.2003803078828468e-06, "loss": 0.4833, "step": 33052 }, { "epoch": 0.85, "grad_norm": 1.0757815837860107, "learning_rate": 1.1999859837031625e-06, "loss": 0.4242, "step": 33053 }, { "epoch": 0.85, "grad_norm": 4.311130046844482, "learning_rate": 1.1995917201676487e-06, "loss": 0.4533, "step": 33054 }, { "epoch": 0.85, "grad_norm": 1.6580129861831665, "learning_rate": 1.1991975172790204e-06, "loss": 0.5368, "step": 33055 }, { "epoch": 0.85, "grad_norm": 1.366355061531067, "learning_rate": 1.198803375039993e-06, "loss": 0.541, "step": 33056 }, { "epoch": 0.85, "grad_norm": 1.280843734741211, "learning_rate": 1.198409293453281e-06, "loss": 0.407, "step": 33057 }, { "epoch": 0.85, "grad_norm": 1.392109751701355, "learning_rate": 1.1980152725216043e-06, "loss": 0.5146, "step": 33058 }, { "epoch": 0.85, "grad_norm": 1.4609322547912598, "learning_rate": 1.1976213122476777e-06, "loss": 0.5248, "step": 33059 }, { "epoch": 0.85, "grad_norm": 2.524739980697632, "learning_rate": 1.197227412634212e-06, "loss": 0.4246, "step": 33060 }, { "epoch": 0.85, "grad_norm": 1.0212887525558472, "learning_rate": 1.1968335736839264e-06, "loss": 0.5439, "step": 33061 }, { "epoch": 0.85, "grad_norm": 4.139580726623535, "learning_rate": 1.1964397953995333e-06, "loss": 0.4915, "step": 33062 }, { "epoch": 0.85, "grad_norm": 0.8004453182220459, "learning_rate": 1.1960460777837456e-06, "loss": 0.4333, "step": 33063 }, { "epoch": 0.85, "grad_norm": 1.930724024772644, "learning_rate": 1.195652420839276e-06, "loss": 0.6857, "step": 33064 }, { "epoch": 0.85, "grad_norm": 1.055005669593811, "learning_rate": 1.1952588245688402e-06, "loss": 0.3882, "step": 33065 }, { "epoch": 0.85, "grad_norm": 1.6498106718063354, "learning_rate": 1.1948652889751489e-06, "loss": 0.7322, "step": 33066 }, { "epoch": 0.85, "grad_norm": 1.3842120170593262, "learning_rate": 1.1944718140609112e-06, "loss": 0.6153, "step": 33067 }, { "epoch": 0.85, "grad_norm": 1.1392638683319092, "learning_rate": 1.1940783998288453e-06, "loss": 0.5034, "step": 33068 }, { "epoch": 0.85, "grad_norm": 1.819471001625061, "learning_rate": 1.1936850462816574e-06, "loss": 0.4959, "step": 33069 }, { "epoch": 0.85, "grad_norm": 0.9892683625221252, "learning_rate": 1.1932917534220578e-06, "loss": 0.4484, "step": 33070 }, { "epoch": 0.85, "grad_norm": 1.7822496891021729, "learning_rate": 1.192898521252761e-06, "loss": 0.5783, "step": 33071 }, { "epoch": 0.85, "grad_norm": 1.0259571075439453, "learning_rate": 1.192505349776476e-06, "loss": 0.5958, "step": 33072 }, { "epoch": 0.85, "grad_norm": 1.6660077571868896, "learning_rate": 1.192112238995906e-06, "loss": 0.5703, "step": 33073 }, { "epoch": 0.85, "grad_norm": 1.7533563375473022, "learning_rate": 1.191719188913768e-06, "loss": 0.6001, "step": 33074 }, { "epoch": 0.85, "grad_norm": 1.6154017448425293, "learning_rate": 1.1913261995327674e-06, "loss": 0.4466, "step": 33075 }, { "epoch": 0.85, "grad_norm": 1.9242829084396362, "learning_rate": 1.1909332708556099e-06, "loss": 0.4687, "step": 33076 }, { "epoch": 0.85, "grad_norm": 3.588933229446411, "learning_rate": 1.1905404028850077e-06, "loss": 0.7183, "step": 33077 }, { "epoch": 0.85, "grad_norm": 1.5636171102523804, "learning_rate": 1.1901475956236674e-06, "loss": 0.5025, "step": 33078 }, { "epoch": 0.85, "grad_norm": 1.2929097414016724, "learning_rate": 1.189754849074294e-06, "loss": 0.5142, "step": 33079 }, { "epoch": 0.85, "grad_norm": 1.971483826637268, "learning_rate": 1.1893621632395935e-06, "loss": 0.4797, "step": 33080 }, { "epoch": 0.85, "grad_norm": 2.388826370239258, "learning_rate": 1.1889695381222754e-06, "loss": 0.6489, "step": 33081 }, { "epoch": 0.85, "grad_norm": 1.2216827869415283, "learning_rate": 1.188576973725044e-06, "loss": 0.4524, "step": 33082 }, { "epoch": 0.85, "grad_norm": 1.493247628211975, "learning_rate": 1.1881844700506019e-06, "loss": 0.5035, "step": 33083 }, { "epoch": 0.85, "grad_norm": 4.733973026275635, "learning_rate": 1.1877920271016574e-06, "loss": 0.4217, "step": 33084 }, { "epoch": 0.85, "grad_norm": 1.1986185312271118, "learning_rate": 1.1873996448809156e-06, "loss": 0.5461, "step": 33085 }, { "epoch": 0.85, "grad_norm": 1.267279863357544, "learning_rate": 1.1870073233910772e-06, "loss": 0.5715, "step": 33086 }, { "epoch": 0.85, "grad_norm": 5.424757957458496, "learning_rate": 1.1866150626348472e-06, "loss": 0.3969, "step": 33087 }, { "epoch": 0.85, "grad_norm": 1.1417629718780518, "learning_rate": 1.1862228626149297e-06, "loss": 0.3272, "step": 33088 }, { "epoch": 0.85, "grad_norm": 1.7830523252487183, "learning_rate": 1.1858307233340271e-06, "loss": 0.5425, "step": 33089 }, { "epoch": 0.85, "grad_norm": 1.773239016532898, "learning_rate": 1.1854386447948395e-06, "loss": 0.6067, "step": 33090 }, { "epoch": 0.85, "grad_norm": 1.2129144668579102, "learning_rate": 1.1850466270000738e-06, "loss": 0.5758, "step": 33091 }, { "epoch": 0.85, "grad_norm": 5.418098449707031, "learning_rate": 1.1846546699524274e-06, "loss": 0.5563, "step": 33092 }, { "epoch": 0.85, "grad_norm": 0.8553259372711182, "learning_rate": 1.1842627736546042e-06, "loss": 0.4733, "step": 33093 }, { "epoch": 0.85, "grad_norm": 1.2053970098495483, "learning_rate": 1.1838709381093005e-06, "loss": 0.5111, "step": 33094 }, { "epoch": 0.85, "grad_norm": 2.443030834197998, "learning_rate": 1.183479163319221e-06, "loss": 0.5361, "step": 33095 }, { "epoch": 0.85, "grad_norm": 1.5556261539459229, "learning_rate": 1.1830874492870648e-06, "loss": 0.5716, "step": 33096 }, { "epoch": 0.85, "grad_norm": 1.8334425687789917, "learning_rate": 1.1826957960155284e-06, "loss": 0.6922, "step": 33097 }, { "epoch": 0.85, "grad_norm": 1.787716269493103, "learning_rate": 1.182304203507314e-06, "loss": 0.5735, "step": 33098 }, { "epoch": 0.85, "grad_norm": 1.272339940071106, "learning_rate": 1.1819126717651208e-06, "loss": 0.5457, "step": 33099 }, { "epoch": 0.85, "grad_norm": 1.4424744844436646, "learning_rate": 1.1815212007916421e-06, "loss": 0.4786, "step": 33100 }, { "epoch": 0.85, "grad_norm": 7.003265380859375, "learning_rate": 1.1811297905895814e-06, "loss": 0.5176, "step": 33101 }, { "epoch": 0.85, "grad_norm": 2.9098336696624756, "learning_rate": 1.180738441161634e-06, "loss": 0.8475, "step": 33102 }, { "epoch": 0.85, "grad_norm": 2.266026735305786, "learning_rate": 1.1803471525104958e-06, "loss": 0.5088, "step": 33103 }, { "epoch": 0.85, "grad_norm": 1.409906268119812, "learning_rate": 1.1799559246388624e-06, "loss": 0.3571, "step": 33104 }, { "epoch": 0.85, "grad_norm": 1.1629079580307007, "learning_rate": 1.179564757549434e-06, "loss": 0.4049, "step": 33105 }, { "epoch": 0.85, "grad_norm": 6.369517803192139, "learning_rate": 1.1791736512449036e-06, "loss": 0.5491, "step": 33106 }, { "epoch": 0.85, "grad_norm": 2.9856460094451904, "learning_rate": 1.1787826057279638e-06, "loss": 0.5066, "step": 33107 }, { "epoch": 0.85, "grad_norm": 3.2945618629455566, "learning_rate": 1.1783916210013146e-06, "loss": 0.4624, "step": 33108 }, { "epoch": 0.85, "grad_norm": 1.7086272239685059, "learning_rate": 1.1780006970676484e-06, "loss": 0.5621, "step": 33109 }, { "epoch": 0.85, "grad_norm": 1.5271111726760864, "learning_rate": 1.1776098339296582e-06, "loss": 0.6917, "step": 33110 }, { "epoch": 0.85, "grad_norm": 1.6011667251586914, "learning_rate": 1.1772190315900366e-06, "loss": 0.5916, "step": 33111 }, { "epoch": 0.85, "grad_norm": 2.130037307739258, "learning_rate": 1.1768282900514806e-06, "loss": 0.5689, "step": 33112 }, { "epoch": 0.85, "grad_norm": 3.2583532333374023, "learning_rate": 1.1764376093166807e-06, "loss": 0.6944, "step": 33113 }, { "epoch": 0.85, "grad_norm": 2.731458902359009, "learning_rate": 1.1760469893883275e-06, "loss": 0.6258, "step": 33114 }, { "epoch": 0.85, "grad_norm": 1.2928227186203003, "learning_rate": 1.1756564302691165e-06, "loss": 0.4199, "step": 33115 }, { "epoch": 0.85, "grad_norm": 1.2476532459259033, "learning_rate": 1.1752659319617377e-06, "loss": 0.4651, "step": 33116 }, { "epoch": 0.85, "grad_norm": 1.1479681730270386, "learning_rate": 1.174875494468879e-06, "loss": 0.5824, "step": 33117 }, { "epoch": 0.85, "grad_norm": 1.4466220140457153, "learning_rate": 1.174485117793236e-06, "loss": 0.6931, "step": 33118 }, { "epoch": 0.85, "grad_norm": 1.3860770463943481, "learning_rate": 1.1740948019374975e-06, "loss": 0.5329, "step": 33119 }, { "epoch": 0.85, "grad_norm": 1.2420830726623535, "learning_rate": 1.1737045469043528e-06, "loss": 0.57, "step": 33120 }, { "epoch": 0.85, "grad_norm": 1.0772459506988525, "learning_rate": 1.1733143526964885e-06, "loss": 0.6196, "step": 33121 }, { "epoch": 0.85, "grad_norm": 2.59962797164917, "learning_rate": 1.172924219316598e-06, "loss": 0.4016, "step": 33122 }, { "epoch": 0.85, "grad_norm": 1.881435513496399, "learning_rate": 1.172534146767369e-06, "loss": 0.5102, "step": 33123 }, { "epoch": 0.85, "grad_norm": 2.332700490951538, "learning_rate": 1.1721441350514861e-06, "loss": 0.5011, "step": 33124 }, { "epoch": 0.85, "grad_norm": 1.2989451885223389, "learning_rate": 1.1717541841716417e-06, "loss": 0.4564, "step": 33125 }, { "epoch": 0.85, "grad_norm": 1.4692531824111938, "learning_rate": 1.1713642941305214e-06, "loss": 0.441, "step": 33126 }, { "epoch": 0.85, "grad_norm": 1.2193899154663086, "learning_rate": 1.1709744649308109e-06, "loss": 0.4753, "step": 33127 }, { "epoch": 0.85, "grad_norm": 1.1610480546951294, "learning_rate": 1.1705846965751955e-06, "loss": 0.5817, "step": 33128 }, { "epoch": 0.85, "grad_norm": 4.121273517608643, "learning_rate": 1.1701949890663656e-06, "loss": 0.367, "step": 33129 }, { "epoch": 0.85, "grad_norm": 1.5383042097091675, "learning_rate": 1.1698053424070043e-06, "loss": 0.6291, "step": 33130 }, { "epoch": 0.85, "grad_norm": 1.8460088968276978, "learning_rate": 1.1694157565997954e-06, "loss": 0.5009, "step": 33131 }, { "epoch": 0.85, "grad_norm": 1.640022873878479, "learning_rate": 1.1690262316474266e-06, "loss": 0.6315, "step": 33132 }, { "epoch": 0.85, "grad_norm": 2.4697914123535156, "learning_rate": 1.1686367675525812e-06, "loss": 0.6404, "step": 33133 }, { "epoch": 0.85, "grad_norm": 1.8906830549240112, "learning_rate": 1.1682473643179427e-06, "loss": 0.5487, "step": 33134 }, { "epoch": 0.85, "grad_norm": 1.232131004333496, "learning_rate": 1.167858021946192e-06, "loss": 0.4074, "step": 33135 }, { "epoch": 0.85, "grad_norm": 2.4257147312164307, "learning_rate": 1.1674687404400187e-06, "loss": 0.5224, "step": 33136 }, { "epoch": 0.85, "grad_norm": 2.3110015392303467, "learning_rate": 1.1670795198021001e-06, "loss": 0.5135, "step": 33137 }, { "epoch": 0.85, "grad_norm": 1.3601078987121582, "learning_rate": 1.1666903600351188e-06, "loss": 0.6513, "step": 33138 }, { "epoch": 0.85, "grad_norm": 1.55876624584198, "learning_rate": 1.166301261141759e-06, "loss": 0.4851, "step": 33139 }, { "epoch": 0.85, "grad_norm": 1.2508058547973633, "learning_rate": 1.1659122231247022e-06, "loss": 0.3437, "step": 33140 }, { "epoch": 0.85, "grad_norm": 2.288381338119507, "learning_rate": 1.165523245986625e-06, "loss": 0.4668, "step": 33141 }, { "epoch": 0.85, "grad_norm": 1.907241702079773, "learning_rate": 1.1651343297302142e-06, "loss": 0.4699, "step": 33142 }, { "epoch": 0.85, "grad_norm": 1.544671893119812, "learning_rate": 1.1647454743581465e-06, "loss": 0.5169, "step": 33143 }, { "epoch": 0.85, "grad_norm": 1.4429336786270142, "learning_rate": 1.1643566798731032e-06, "loss": 0.6586, "step": 33144 }, { "epoch": 0.85, "grad_norm": 1.4891804456710815, "learning_rate": 1.1639679462777599e-06, "loss": 0.5112, "step": 33145 }, { "epoch": 0.85, "grad_norm": 1.7562333345413208, "learning_rate": 1.1635792735747998e-06, "loss": 0.561, "step": 33146 }, { "epoch": 0.85, "grad_norm": 1.7862352132797241, "learning_rate": 1.1631906617668999e-06, "loss": 0.3871, "step": 33147 }, { "epoch": 0.85, "grad_norm": 1.5540448427200317, "learning_rate": 1.1628021108567356e-06, "loss": 0.4212, "step": 33148 }, { "epoch": 0.85, "grad_norm": 1.3938814401626587, "learning_rate": 1.1624136208469905e-06, "loss": 0.5572, "step": 33149 }, { "epoch": 0.85, "grad_norm": 1.0211533308029175, "learning_rate": 1.162025191740337e-06, "loss": 0.4805, "step": 33150 }, { "epoch": 0.85, "grad_norm": 1.5307892560958862, "learning_rate": 1.1616368235394538e-06, "loss": 0.5432, "step": 33151 }, { "epoch": 0.85, "grad_norm": 1.7189310789108276, "learning_rate": 1.1612485162470156e-06, "loss": 0.5185, "step": 33152 }, { "epoch": 0.85, "grad_norm": 1.2874562740325928, "learning_rate": 1.1608602698657e-06, "loss": 0.4247, "step": 33153 }, { "epoch": 0.85, "grad_norm": 1.8167061805725098, "learning_rate": 1.160472084398183e-06, "loss": 0.6461, "step": 33154 }, { "epoch": 0.85, "grad_norm": 2.691218137741089, "learning_rate": 1.160083959847137e-06, "loss": 0.5719, "step": 33155 }, { "epoch": 0.85, "grad_norm": 1.825507640838623, "learning_rate": 1.1596958962152404e-06, "loss": 0.5759, "step": 33156 }, { "epoch": 0.85, "grad_norm": 1.726714015007019, "learning_rate": 1.159307893505166e-06, "loss": 0.4547, "step": 33157 }, { "epoch": 0.85, "grad_norm": 2.186948537826538, "learning_rate": 1.158919951719587e-06, "loss": 0.6635, "step": 33158 }, { "epoch": 0.85, "grad_norm": 1.122255563735962, "learning_rate": 1.1585320708611757e-06, "loss": 0.448, "step": 33159 }, { "epoch": 0.85, "grad_norm": 1.9833308458328247, "learning_rate": 1.158144250932608e-06, "loss": 0.5135, "step": 33160 }, { "epoch": 0.85, "grad_norm": 1.1123106479644775, "learning_rate": 1.1577564919365548e-06, "loss": 0.4896, "step": 33161 }, { "epoch": 0.85, "grad_norm": 1.2653121948242188, "learning_rate": 1.1573687938756873e-06, "loss": 0.6005, "step": 33162 }, { "epoch": 0.85, "grad_norm": 1.1112792491912842, "learning_rate": 1.15698115675268e-06, "loss": 0.4166, "step": 33163 }, { "epoch": 0.85, "grad_norm": 1.5465096235275269, "learning_rate": 1.1565935805702034e-06, "loss": 0.568, "step": 33164 }, { "epoch": 0.85, "grad_norm": 2.138380527496338, "learning_rate": 1.156206065330926e-06, "loss": 0.5993, "step": 33165 }, { "epoch": 0.85, "grad_norm": 1.8833231925964355, "learning_rate": 1.1558186110375213e-06, "loss": 0.5208, "step": 33166 }, { "epoch": 0.85, "grad_norm": 11.517223358154297, "learning_rate": 1.1554312176926597e-06, "loss": 0.6642, "step": 33167 }, { "epoch": 0.85, "grad_norm": 3.7701847553253174, "learning_rate": 1.1550438852990088e-06, "loss": 0.5235, "step": 33168 }, { "epoch": 0.85, "grad_norm": 2.777479648590088, "learning_rate": 1.1546566138592353e-06, "loss": 0.6395, "step": 33169 }, { "epoch": 0.85, "grad_norm": 1.4572694301605225, "learning_rate": 1.1542694033760148e-06, "loss": 0.3993, "step": 33170 }, { "epoch": 0.85, "grad_norm": 3.6805553436279297, "learning_rate": 1.1538822538520111e-06, "loss": 0.6167, "step": 33171 }, { "epoch": 0.85, "grad_norm": 1.067017674446106, "learning_rate": 1.1534951652898917e-06, "loss": 0.4022, "step": 33172 }, { "epoch": 0.85, "grad_norm": 1.39527428150177, "learning_rate": 1.153108137692328e-06, "loss": 0.4367, "step": 33173 }, { "epoch": 0.85, "grad_norm": 1.3762634992599487, "learning_rate": 1.1527211710619846e-06, "loss": 0.4882, "step": 33174 }, { "epoch": 0.85, "grad_norm": 6.2334489822387695, "learning_rate": 1.152334265401528e-06, "loss": 0.5244, "step": 33175 }, { "epoch": 0.85, "grad_norm": 1.552526593208313, "learning_rate": 1.1519474207136238e-06, "loss": 0.579, "step": 33176 }, { "epoch": 0.85, "grad_norm": 10.374905586242676, "learning_rate": 1.1515606370009402e-06, "loss": 0.7227, "step": 33177 }, { "epoch": 0.85, "grad_norm": 1.835193157196045, "learning_rate": 1.1511739142661427e-06, "loss": 0.3961, "step": 33178 }, { "epoch": 0.85, "grad_norm": 1.3308135271072388, "learning_rate": 1.1507872525118925e-06, "loss": 0.5454, "step": 33179 }, { "epoch": 0.85, "grad_norm": 1.4189668893814087, "learning_rate": 1.1504006517408584e-06, "loss": 0.4831, "step": 33180 }, { "epoch": 0.85, "grad_norm": 3.575021743774414, "learning_rate": 1.150014111955704e-06, "loss": 0.671, "step": 33181 }, { "epoch": 0.85, "grad_norm": 1.9177452325820923, "learning_rate": 1.1496276331590895e-06, "loss": 0.4845, "step": 33182 }, { "epoch": 0.85, "grad_norm": 1.377395510673523, "learning_rate": 1.1492412153536835e-06, "loss": 0.5206, "step": 33183 }, { "epoch": 0.85, "grad_norm": 1.0215868949890137, "learning_rate": 1.1488548585421467e-06, "loss": 0.5261, "step": 33184 }, { "epoch": 0.85, "grad_norm": 1.1346399784088135, "learning_rate": 1.1484685627271407e-06, "loss": 0.4169, "step": 33185 }, { "epoch": 0.85, "grad_norm": 1.085525393486023, "learning_rate": 1.1480823279113274e-06, "loss": 0.6322, "step": 33186 }, { "epoch": 0.85, "grad_norm": 7.708825588226318, "learning_rate": 1.1476961540973707e-06, "loss": 0.703, "step": 33187 }, { "epoch": 0.85, "grad_norm": 1.536881685256958, "learning_rate": 1.1473100412879301e-06, "loss": 0.5169, "step": 33188 }, { "epoch": 0.85, "grad_norm": 1.9286584854125977, "learning_rate": 1.1469239894856655e-06, "loss": 0.494, "step": 33189 }, { "epoch": 0.85, "grad_norm": 1.8885856866836548, "learning_rate": 1.14653799869324e-06, "loss": 0.5429, "step": 33190 }, { "epoch": 0.85, "grad_norm": 1.4622002840042114, "learning_rate": 1.1461520689133132e-06, "loss": 0.5589, "step": 33191 }, { "epoch": 0.85, "grad_norm": 4.933689594268799, "learning_rate": 1.1457662001485436e-06, "loss": 0.7484, "step": 33192 }, { "epoch": 0.85, "grad_norm": 0.9917577505111694, "learning_rate": 1.1453803924015884e-06, "loss": 0.4222, "step": 33193 }, { "epoch": 0.85, "grad_norm": 1.4517452716827393, "learning_rate": 1.1449946456751105e-06, "loss": 0.5743, "step": 33194 }, { "epoch": 0.85, "grad_norm": 1.2206517457962036, "learning_rate": 1.1446089599717668e-06, "loss": 0.4707, "step": 33195 }, { "epoch": 0.85, "grad_norm": 2.0065114498138428, "learning_rate": 1.1442233352942123e-06, "loss": 0.7253, "step": 33196 }, { "epoch": 0.85, "grad_norm": 1.5744346380233765, "learning_rate": 1.1438377716451078e-06, "loss": 0.5652, "step": 33197 }, { "epoch": 0.85, "grad_norm": 3.0923187732696533, "learning_rate": 1.143452269027111e-06, "loss": 0.5276, "step": 33198 }, { "epoch": 0.85, "grad_norm": 1.39626145362854, "learning_rate": 1.1430668274428758e-06, "loss": 0.5111, "step": 33199 }, { "epoch": 0.85, "grad_norm": 3.065340518951416, "learning_rate": 1.1426814468950575e-06, "loss": 0.5963, "step": 33200 }, { "epoch": 0.85, "grad_norm": 4.0406036376953125, "learning_rate": 1.1422961273863164e-06, "loss": 0.5614, "step": 33201 }, { "epoch": 0.85, "grad_norm": 1.40138840675354, "learning_rate": 1.1419108689193059e-06, "loss": 0.6105, "step": 33202 }, { "epoch": 0.85, "grad_norm": 1.0567140579223633, "learning_rate": 1.1415256714966784e-06, "loss": 0.424, "step": 33203 }, { "epoch": 0.85, "grad_norm": 1.426963210105896, "learning_rate": 1.1411405351210913e-06, "loss": 0.4482, "step": 33204 }, { "epoch": 0.85, "grad_norm": 1.2735275030136108, "learning_rate": 1.1407554597951997e-06, "loss": 0.4808, "step": 33205 }, { "epoch": 0.85, "grad_norm": 1.0941969156265259, "learning_rate": 1.1403704455216524e-06, "loss": 0.5046, "step": 33206 }, { "epoch": 0.85, "grad_norm": 1.1888868808746338, "learning_rate": 1.1399854923031084e-06, "loss": 0.5398, "step": 33207 }, { "epoch": 0.85, "grad_norm": 1.9594930410385132, "learning_rate": 1.1396006001422178e-06, "loss": 0.6682, "step": 33208 }, { "epoch": 0.85, "grad_norm": 1.0534193515777588, "learning_rate": 1.1392157690416327e-06, "loss": 0.3892, "step": 33209 }, { "epoch": 0.85, "grad_norm": 33.482173919677734, "learning_rate": 1.1388309990040036e-06, "loss": 0.4712, "step": 33210 }, { "epoch": 0.85, "grad_norm": 1.6435178518295288, "learning_rate": 1.1384462900319871e-06, "loss": 0.4642, "step": 33211 }, { "epoch": 0.85, "grad_norm": 1.0642772912979126, "learning_rate": 1.138061642128231e-06, "loss": 0.4656, "step": 33212 }, { "epoch": 0.85, "grad_norm": 3.9469594955444336, "learning_rate": 1.1376770552953841e-06, "loss": 0.7448, "step": 33213 }, { "epoch": 0.85, "grad_norm": 1.3842464685440063, "learning_rate": 1.1372925295361016e-06, "loss": 0.6326, "step": 33214 }, { "epoch": 0.85, "grad_norm": 1.6697468757629395, "learning_rate": 1.1369080648530294e-06, "loss": 0.5656, "step": 33215 }, { "epoch": 0.85, "grad_norm": 2.266602039337158, "learning_rate": 1.1365236612488206e-06, "loss": 0.7125, "step": 33216 }, { "epoch": 0.85, "grad_norm": 0.9701548218727112, "learning_rate": 1.1361393187261183e-06, "loss": 0.3709, "step": 33217 }, { "epoch": 0.85, "grad_norm": 3.4669227600097656, "learning_rate": 1.1357550372875781e-06, "loss": 0.3363, "step": 33218 }, { "epoch": 0.85, "grad_norm": 2.125446319580078, "learning_rate": 1.1353708169358445e-06, "loss": 0.4859, "step": 33219 }, { "epoch": 0.85, "grad_norm": 1.5692696571350098, "learning_rate": 1.1349866576735645e-06, "loss": 0.584, "step": 33220 }, { "epoch": 0.85, "grad_norm": 1.7110681533813477, "learning_rate": 1.1346025595033893e-06, "loss": 0.5013, "step": 33221 }, { "epoch": 0.85, "grad_norm": 7.8076934814453125, "learning_rate": 1.1342185224279623e-06, "loss": 0.4266, "step": 33222 }, { "epoch": 0.85, "grad_norm": 1.7430436611175537, "learning_rate": 1.1338345464499301e-06, "loss": 0.5586, "step": 33223 }, { "epoch": 0.85, "grad_norm": 9.123298645019531, "learning_rate": 1.1334506315719418e-06, "loss": 0.5248, "step": 33224 }, { "epoch": 0.85, "grad_norm": 1.6377534866333008, "learning_rate": 1.133066777796641e-06, "loss": 0.4845, "step": 33225 }, { "epoch": 0.85, "grad_norm": 1.7839921712875366, "learning_rate": 1.132682985126673e-06, "loss": 0.575, "step": 33226 }, { "epoch": 0.85, "grad_norm": 1.3929380178451538, "learning_rate": 1.1322992535646816e-06, "loss": 0.4959, "step": 33227 }, { "epoch": 0.85, "grad_norm": 1.3481669425964355, "learning_rate": 1.1319155831133144e-06, "loss": 0.5841, "step": 33228 }, { "epoch": 0.85, "grad_norm": 1.3098307847976685, "learning_rate": 1.131531973775213e-06, "loss": 0.4727, "step": 33229 }, { "epoch": 0.85, "grad_norm": 1.9856759309768677, "learning_rate": 1.1311484255530192e-06, "loss": 0.5823, "step": 33230 }, { "epoch": 0.85, "grad_norm": 1.4948798418045044, "learning_rate": 1.1307649384493814e-06, "loss": 0.5502, "step": 33231 }, { "epoch": 0.85, "grad_norm": 2.283738136291504, "learning_rate": 1.1303815124669393e-06, "loss": 0.4659, "step": 33232 }, { "epoch": 0.85, "grad_norm": 1.8122189044952393, "learning_rate": 1.1299981476083343e-06, "loss": 0.3917, "step": 33233 }, { "epoch": 0.85, "grad_norm": 1.6667757034301758, "learning_rate": 1.129614843876209e-06, "loss": 0.6893, "step": 33234 }, { "epoch": 0.85, "grad_norm": 1.3045711517333984, "learning_rate": 1.1292316012732063e-06, "loss": 0.454, "step": 33235 }, { "epoch": 0.85, "grad_norm": 5.631899833679199, "learning_rate": 1.1288484198019666e-06, "loss": 0.7137, "step": 33236 }, { "epoch": 0.85, "grad_norm": 1.2881097793579102, "learning_rate": 1.128465299465128e-06, "loss": 0.5224, "step": 33237 }, { "epoch": 0.85, "grad_norm": 1.8403676748275757, "learning_rate": 1.1280822402653346e-06, "loss": 0.4681, "step": 33238 }, { "epoch": 0.85, "grad_norm": 1.318158745765686, "learning_rate": 1.1276992422052247e-06, "loss": 0.3645, "step": 33239 }, { "epoch": 0.85, "grad_norm": 1.741135835647583, "learning_rate": 1.127316305287438e-06, "loss": 0.4398, "step": 33240 }, { "epoch": 0.85, "grad_norm": 1.1163508892059326, "learning_rate": 1.1269334295146106e-06, "loss": 0.5324, "step": 33241 }, { "epoch": 0.85, "grad_norm": 2.2743849754333496, "learning_rate": 1.1265506148893856e-06, "loss": 0.6262, "step": 33242 }, { "epoch": 0.85, "grad_norm": 1.268372654914856, "learning_rate": 1.1261678614143978e-06, "loss": 0.5636, "step": 33243 }, { "epoch": 0.85, "grad_norm": 1.0873079299926758, "learning_rate": 1.125785169092285e-06, "loss": 0.5538, "step": 33244 }, { "epoch": 0.85, "grad_norm": 1.997945785522461, "learning_rate": 1.1254025379256883e-06, "loss": 0.3888, "step": 33245 }, { "epoch": 0.85, "grad_norm": 1.0723830461502075, "learning_rate": 1.1250199679172403e-06, "loss": 0.4439, "step": 33246 }, { "epoch": 0.85, "grad_norm": 1.52097749710083, "learning_rate": 1.1246374590695774e-06, "loss": 0.5145, "step": 33247 }, { "epoch": 0.85, "grad_norm": 2.1343636512756348, "learning_rate": 1.124255011385339e-06, "loss": 0.4206, "step": 33248 }, { "epoch": 0.85, "grad_norm": 1.1612372398376465, "learning_rate": 1.1238726248671595e-06, "loss": 0.4803, "step": 33249 }, { "epoch": 0.85, "grad_norm": 0.9729304909706116, "learning_rate": 1.1234902995176722e-06, "loss": 0.4359, "step": 33250 }, { "epoch": 0.85, "grad_norm": 1.879867672920227, "learning_rate": 1.123108035339512e-06, "loss": 0.4991, "step": 33251 }, { "epoch": 0.85, "grad_norm": 1.5598490238189697, "learning_rate": 1.1227258323353151e-06, "loss": 0.5736, "step": 33252 }, { "epoch": 0.85, "grad_norm": 6.08540678024292, "learning_rate": 1.1223436905077157e-06, "loss": 0.5541, "step": 33253 }, { "epoch": 0.85, "grad_norm": 1.4959098100662231, "learning_rate": 1.1219616098593433e-06, "loss": 0.5131, "step": 33254 }, { "epoch": 0.85, "grad_norm": 1.267182469367981, "learning_rate": 1.1215795903928362e-06, "loss": 0.3629, "step": 33255 }, { "epoch": 0.85, "grad_norm": 2.114176034927368, "learning_rate": 1.1211976321108241e-06, "loss": 0.5638, "step": 33256 }, { "epoch": 0.85, "grad_norm": 1.238753318786621, "learning_rate": 1.1208157350159398e-06, "loss": 0.3713, "step": 33257 }, { "epoch": 0.85, "grad_norm": 1.8637731075286865, "learning_rate": 1.1204338991108132e-06, "loss": 0.475, "step": 33258 }, { "epoch": 0.85, "grad_norm": 1.4555518627166748, "learning_rate": 1.12005212439808e-06, "loss": 0.5865, "step": 33259 }, { "epoch": 0.85, "grad_norm": 1.620071530342102, "learning_rate": 1.119670410880368e-06, "loss": 0.5166, "step": 33260 }, { "epoch": 0.85, "grad_norm": 1.0206493139266968, "learning_rate": 1.1192887585603063e-06, "loss": 0.4451, "step": 33261 }, { "epoch": 0.85, "grad_norm": 7.089308261871338, "learning_rate": 1.1189071674405294e-06, "loss": 0.6137, "step": 33262 }, { "epoch": 0.85, "grad_norm": 1.8209651708602905, "learning_rate": 1.1185256375236642e-06, "loss": 0.3636, "step": 33263 }, { "epoch": 0.85, "grad_norm": 6.095862865447998, "learning_rate": 1.1181441688123385e-06, "loss": 0.7645, "step": 33264 }, { "epoch": 0.85, "grad_norm": 1.5463944673538208, "learning_rate": 1.1177627613091857e-06, "loss": 0.4502, "step": 33265 }, { "epoch": 0.85, "grad_norm": 1.1861873865127563, "learning_rate": 1.1173814150168305e-06, "loss": 0.6048, "step": 33266 }, { "epoch": 0.85, "grad_norm": 1.3492426872253418, "learning_rate": 1.117000129937903e-06, "loss": 0.553, "step": 33267 }, { "epoch": 0.85, "grad_norm": 1.0936357975006104, "learning_rate": 1.1166189060750276e-06, "loss": 0.4208, "step": 33268 }, { "epoch": 0.85, "grad_norm": 1.794607400894165, "learning_rate": 1.1162377434308346e-06, "loss": 0.5805, "step": 33269 }, { "epoch": 0.85, "grad_norm": 1.584977149963379, "learning_rate": 1.115856642007951e-06, "loss": 0.4646, "step": 33270 }, { "epoch": 0.85, "grad_norm": 8.850370407104492, "learning_rate": 1.1154756018089995e-06, "loss": 0.6435, "step": 33271 }, { "epoch": 0.85, "grad_norm": 1.6718106269836426, "learning_rate": 1.1150946228366099e-06, "loss": 0.6602, "step": 33272 }, { "epoch": 0.85, "grad_norm": 3.561241865158081, "learning_rate": 1.1147137050934065e-06, "loss": 0.4445, "step": 33273 }, { "epoch": 0.85, "grad_norm": 3.160421133041382, "learning_rate": 1.1143328485820137e-06, "loss": 0.412, "step": 33274 }, { "epoch": 0.85, "grad_norm": 1.0656239986419678, "learning_rate": 1.1139520533050552e-06, "loss": 0.4782, "step": 33275 }, { "epoch": 0.85, "grad_norm": 1.2608006000518799, "learning_rate": 1.1135713192651576e-06, "loss": 0.523, "step": 33276 }, { "epoch": 0.85, "grad_norm": 1.2910064458847046, "learning_rate": 1.1131906464649433e-06, "loss": 0.5002, "step": 33277 }, { "epoch": 0.85, "grad_norm": 2.8845789432525635, "learning_rate": 1.1128100349070348e-06, "loss": 0.421, "step": 33278 }, { "epoch": 0.85, "grad_norm": 1.2743793725967407, "learning_rate": 1.1124294845940563e-06, "loss": 0.419, "step": 33279 }, { "epoch": 0.85, "grad_norm": 1.4266279935836792, "learning_rate": 1.1120489955286318e-06, "loss": 0.6414, "step": 33280 }, { "epoch": 0.85, "grad_norm": 0.93487149477005, "learning_rate": 1.111668567713381e-06, "loss": 0.4888, "step": 33281 }, { "epoch": 0.85, "grad_norm": 1.2563358545303345, "learning_rate": 1.111288201150924e-06, "loss": 0.4328, "step": 33282 }, { "epoch": 0.85, "grad_norm": 1.453704595565796, "learning_rate": 1.1109078958438868e-06, "loss": 0.5923, "step": 33283 }, { "epoch": 0.85, "grad_norm": 1.3176182508468628, "learning_rate": 1.110527651794887e-06, "loss": 0.5288, "step": 33284 }, { "epoch": 0.85, "grad_norm": 1.5716685056686401, "learning_rate": 1.110147469006545e-06, "loss": 0.5255, "step": 33285 }, { "epoch": 0.85, "grad_norm": 1.5708800554275513, "learning_rate": 1.109767347481483e-06, "loss": 0.7369, "step": 33286 }, { "epoch": 0.85, "grad_norm": 1.6301008462905884, "learning_rate": 1.1093872872223176e-06, "loss": 0.5121, "step": 33287 }, { "epoch": 0.85, "grad_norm": 3.069392442703247, "learning_rate": 1.1090072882316693e-06, "loss": 0.7378, "step": 33288 }, { "epoch": 0.85, "grad_norm": 4.103424072265625, "learning_rate": 1.108627350512158e-06, "loss": 0.4935, "step": 33289 }, { "epoch": 0.85, "grad_norm": 1.1975516080856323, "learning_rate": 1.1082474740664007e-06, "loss": 0.4883, "step": 33290 }, { "epoch": 0.85, "grad_norm": 2.703794002532959, "learning_rate": 1.1078676588970162e-06, "loss": 0.7743, "step": 33291 }, { "epoch": 0.85, "grad_norm": 1.578271508216858, "learning_rate": 1.107487905006619e-06, "loss": 0.5343, "step": 33292 }, { "epoch": 0.85, "grad_norm": 2.458404302597046, "learning_rate": 1.1071082123978294e-06, "loss": 0.5135, "step": 33293 }, { "epoch": 0.85, "grad_norm": 1.4840351343154907, "learning_rate": 1.1067285810732643e-06, "loss": 0.5464, "step": 33294 }, { "epoch": 0.85, "grad_norm": 1.2849541902542114, "learning_rate": 1.1063490110355357e-06, "loss": 0.4487, "step": 33295 }, { "epoch": 0.85, "grad_norm": 1.6604260206222534, "learning_rate": 1.105969502287264e-06, "loss": 0.5103, "step": 33296 }, { "epoch": 0.85, "grad_norm": 1.2761905193328857, "learning_rate": 1.1055900548310638e-06, "loss": 0.5564, "step": 33297 }, { "epoch": 0.85, "grad_norm": 1.279511570930481, "learning_rate": 1.1052106686695473e-06, "loss": 0.4976, "step": 33298 }, { "epoch": 0.85, "grad_norm": 1.192779541015625, "learning_rate": 1.1048313438053304e-06, "loss": 0.5465, "step": 33299 }, { "epoch": 0.85, "grad_norm": 2.36247181892395, "learning_rate": 1.1044520802410285e-06, "loss": 0.6798, "step": 33300 }, { "epoch": 0.85, "grad_norm": 1.5466457605361938, "learning_rate": 1.1040728779792543e-06, "loss": 0.4764, "step": 33301 }, { "epoch": 0.85, "grad_norm": 1.4090293645858765, "learning_rate": 1.1036937370226174e-06, "loss": 0.4961, "step": 33302 }, { "epoch": 0.85, "grad_norm": 1.9527965784072876, "learning_rate": 1.1033146573737374e-06, "loss": 0.5787, "step": 33303 }, { "epoch": 0.85, "grad_norm": 1.5242253541946411, "learning_rate": 1.1029356390352241e-06, "loss": 0.6669, "step": 33304 }, { "epoch": 0.85, "grad_norm": 1.6342875957489014, "learning_rate": 1.1025566820096856e-06, "loss": 0.691, "step": 33305 }, { "epoch": 0.85, "grad_norm": 6.548581600189209, "learning_rate": 1.1021777862997384e-06, "loss": 0.4795, "step": 33306 }, { "epoch": 0.85, "grad_norm": 1.201341986656189, "learning_rate": 1.101798951907993e-06, "loss": 0.4945, "step": 33307 }, { "epoch": 0.85, "grad_norm": 1.1019293069839478, "learning_rate": 1.101420178837057e-06, "loss": 0.4701, "step": 33308 }, { "epoch": 0.85, "grad_norm": 1.2373548746109009, "learning_rate": 1.1010414670895419e-06, "loss": 0.3716, "step": 33309 }, { "epoch": 0.85, "grad_norm": 1.4834848642349243, "learning_rate": 1.1006628166680599e-06, "loss": 0.3388, "step": 33310 }, { "epoch": 0.85, "grad_norm": 1.9486442804336548, "learning_rate": 1.1002842275752179e-06, "loss": 0.4587, "step": 33311 }, { "epoch": 0.85, "grad_norm": 1.2769938707351685, "learning_rate": 1.0999056998136248e-06, "loss": 0.4671, "step": 33312 }, { "epoch": 0.85, "grad_norm": 1.2183219194412231, "learning_rate": 1.0995272333858909e-06, "loss": 0.4716, "step": 33313 }, { "epoch": 0.85, "grad_norm": 1.2118710279464722, "learning_rate": 1.099148828294624e-06, "loss": 0.4841, "step": 33314 }, { "epoch": 0.85, "grad_norm": 1.5910946130752563, "learning_rate": 1.098770484542432e-06, "loss": 0.4912, "step": 33315 }, { "epoch": 0.85, "grad_norm": 3.0061094760894775, "learning_rate": 1.0983922021319205e-06, "loss": 0.6058, "step": 33316 }, { "epoch": 0.85, "grad_norm": 1.5321515798568726, "learning_rate": 1.0980139810656975e-06, "loss": 0.506, "step": 33317 }, { "epoch": 0.85, "grad_norm": 2.5030760765075684, "learning_rate": 1.0976358213463678e-06, "loss": 0.5177, "step": 33318 }, { "epoch": 0.85, "grad_norm": 1.3161641359329224, "learning_rate": 1.0972577229765413e-06, "loss": 0.5235, "step": 33319 }, { "epoch": 0.85, "grad_norm": 1.5582455396652222, "learning_rate": 1.0968796859588205e-06, "loss": 0.6368, "step": 33320 }, { "epoch": 0.85, "grad_norm": 1.8828738927841187, "learning_rate": 1.096501710295812e-06, "loss": 0.7092, "step": 33321 }, { "epoch": 0.85, "grad_norm": 1.4523983001708984, "learning_rate": 1.0961237959901171e-06, "loss": 0.5449, "step": 33322 }, { "epoch": 0.85, "grad_norm": 1.6402543783187866, "learning_rate": 1.0957459430443463e-06, "loss": 0.4864, "step": 33323 }, { "epoch": 0.85, "grad_norm": 1.5564366579055786, "learning_rate": 1.0953681514610992e-06, "loss": 0.5853, "step": 33324 }, { "epoch": 0.85, "grad_norm": 1.9325413703918457, "learning_rate": 1.0949904212429796e-06, "loss": 0.5229, "step": 33325 }, { "epoch": 0.85, "grad_norm": 2.2486212253570557, "learning_rate": 1.094612752392592e-06, "loss": 0.5297, "step": 33326 }, { "epoch": 0.85, "grad_norm": 1.267822265625, "learning_rate": 1.0942351449125387e-06, "loss": 0.5207, "step": 33327 }, { "epoch": 0.85, "grad_norm": 1.1916563510894775, "learning_rate": 1.0938575988054224e-06, "loss": 0.5278, "step": 33328 }, { "epoch": 0.85, "grad_norm": 1.3022897243499756, "learning_rate": 1.0934801140738416e-06, "loss": 0.6249, "step": 33329 }, { "epoch": 0.85, "grad_norm": 1.3402262926101685, "learning_rate": 1.0931026907204023e-06, "loss": 0.5236, "step": 33330 }, { "epoch": 0.85, "grad_norm": 1.8792526721954346, "learning_rate": 1.0927253287477036e-06, "loss": 0.6421, "step": 33331 }, { "epoch": 0.85, "grad_norm": 1.6182159185409546, "learning_rate": 1.0923480281583431e-06, "loss": 0.5343, "step": 33332 }, { "epoch": 0.85, "grad_norm": 1.5603101253509521, "learning_rate": 1.0919707889549259e-06, "loss": 0.4728, "step": 33333 }, { "epoch": 0.85, "grad_norm": 1.2061293125152588, "learning_rate": 1.0915936111400493e-06, "loss": 0.5415, "step": 33334 }, { "epoch": 0.85, "grad_norm": 1.355165719985962, "learning_rate": 1.0912164947163139e-06, "loss": 0.6231, "step": 33335 }, { "epoch": 0.85, "grad_norm": 2.0993621349334717, "learning_rate": 1.0908394396863142e-06, "loss": 0.4655, "step": 33336 }, { "epoch": 0.85, "grad_norm": 1.4016482830047607, "learning_rate": 1.0904624460526547e-06, "loss": 0.3515, "step": 33337 }, { "epoch": 0.85, "grad_norm": 1.6494940519332886, "learning_rate": 1.09008551381793e-06, "loss": 0.7438, "step": 33338 }, { "epoch": 0.85, "grad_norm": 2.68129301071167, "learning_rate": 1.0897086429847359e-06, "loss": 0.5997, "step": 33339 }, { "epoch": 0.85, "grad_norm": 1.5574184656143188, "learning_rate": 1.0893318335556736e-06, "loss": 0.6848, "step": 33340 }, { "epoch": 0.85, "grad_norm": 1.9129277467727661, "learning_rate": 1.0889550855333386e-06, "loss": 0.524, "step": 33341 }, { "epoch": 0.85, "grad_norm": 2.189283847808838, "learning_rate": 1.0885783989203246e-06, "loss": 0.6683, "step": 33342 }, { "epoch": 0.85, "grad_norm": 1.691469430923462, "learning_rate": 1.0882017737192318e-06, "loss": 0.4415, "step": 33343 }, { "epoch": 0.85, "grad_norm": 1.9439091682434082, "learning_rate": 1.0878252099326536e-06, "loss": 0.4947, "step": 33344 }, { "epoch": 0.85, "grad_norm": 1.3858370780944824, "learning_rate": 1.0874487075631845e-06, "loss": 0.573, "step": 33345 }, { "epoch": 0.85, "grad_norm": 1.5353678464889526, "learning_rate": 1.087072266613417e-06, "loss": 0.4945, "step": 33346 }, { "epoch": 0.85, "grad_norm": 3.136937379837036, "learning_rate": 1.086695887085951e-06, "loss": 0.5831, "step": 33347 }, { "epoch": 0.85, "grad_norm": 1.6109895706176758, "learning_rate": 1.0863195689833762e-06, "loss": 0.5759, "step": 33348 }, { "epoch": 0.85, "grad_norm": 2.208198308944702, "learning_rate": 1.085943312308285e-06, "loss": 0.527, "step": 33349 }, { "epoch": 0.85, "grad_norm": 1.1213209629058838, "learning_rate": 1.0855671170632743e-06, "loss": 0.3951, "step": 33350 }, { "epoch": 0.85, "grad_norm": 1.6514791250228882, "learning_rate": 1.0851909832509344e-06, "loss": 0.6439, "step": 33351 }, { "epoch": 0.85, "grad_norm": 1.5705763101577759, "learning_rate": 1.084814910873857e-06, "loss": 0.6517, "step": 33352 }, { "epoch": 0.85, "grad_norm": 1.8668183088302612, "learning_rate": 1.0844388999346322e-06, "loss": 0.4993, "step": 33353 }, { "epoch": 0.85, "grad_norm": 1.2815626859664917, "learning_rate": 1.0840629504358558e-06, "loss": 0.3955, "step": 33354 }, { "epoch": 0.85, "grad_norm": 1.536643385887146, "learning_rate": 1.0836870623801155e-06, "loss": 0.5666, "step": 33355 }, { "epoch": 0.85, "grad_norm": 1.721732497215271, "learning_rate": 1.0833112357700004e-06, "loss": 0.5119, "step": 33356 }, { "epoch": 0.85, "grad_norm": 3.029862642288208, "learning_rate": 1.0829354706081042e-06, "loss": 0.4508, "step": 33357 }, { "epoch": 0.85, "grad_norm": 1.5765539407730103, "learning_rate": 1.0825597668970144e-06, "loss": 0.5463, "step": 33358 }, { "epoch": 0.86, "grad_norm": 5.990036487579346, "learning_rate": 1.0821841246393182e-06, "loss": 0.3672, "step": 33359 }, { "epoch": 0.86, "grad_norm": 5.5866851806640625, "learning_rate": 1.081808543837608e-06, "loss": 0.5887, "step": 33360 }, { "epoch": 0.86, "grad_norm": 2.7529430389404297, "learning_rate": 1.0814330244944714e-06, "loss": 0.4983, "step": 33361 }, { "epoch": 0.86, "grad_norm": 1.2642101049423218, "learning_rate": 1.0810575666124945e-06, "loss": 0.481, "step": 33362 }, { "epoch": 0.86, "grad_norm": 1.0969500541687012, "learning_rate": 1.0806821701942638e-06, "loss": 0.4443, "step": 33363 }, { "epoch": 0.86, "grad_norm": 2.2444543838500977, "learning_rate": 1.0803068352423696e-06, "loss": 0.5564, "step": 33364 }, { "epoch": 0.86, "grad_norm": 1.2036148309707642, "learning_rate": 1.0799315617593964e-06, "loss": 0.529, "step": 33365 }, { "epoch": 0.86, "grad_norm": 0.8807358741760254, "learning_rate": 1.079556349747929e-06, "loss": 0.4379, "step": 33366 }, { "epoch": 0.86, "grad_norm": 1.6258858442306519, "learning_rate": 1.0791811992105572e-06, "loss": 0.4126, "step": 33367 }, { "epoch": 0.86, "grad_norm": 1.358678936958313, "learning_rate": 1.0788061101498637e-06, "loss": 0.4015, "step": 33368 }, { "epoch": 0.86, "grad_norm": 1.635329246520996, "learning_rate": 1.078431082568433e-06, "loss": 0.543, "step": 33369 }, { "epoch": 0.86, "grad_norm": 1.8430057764053345, "learning_rate": 1.0780561164688496e-06, "loss": 0.6366, "step": 33370 }, { "epoch": 0.86, "grad_norm": 1.3040127754211426, "learning_rate": 1.0776812118536995e-06, "loss": 0.5408, "step": 33371 }, { "epoch": 0.86, "grad_norm": 1.7550898790359497, "learning_rate": 1.0773063687255647e-06, "loss": 0.5839, "step": 33372 }, { "epoch": 0.86, "grad_norm": 2.242006301879883, "learning_rate": 1.0769315870870266e-06, "loss": 0.5886, "step": 33373 }, { "epoch": 0.86, "grad_norm": 1.821473479270935, "learning_rate": 1.0765568669406722e-06, "loss": 0.5565, "step": 33374 }, { "epoch": 0.86, "grad_norm": 0.8557048439979553, "learning_rate": 1.0761822082890816e-06, "loss": 0.5075, "step": 33375 }, { "epoch": 0.86, "grad_norm": 10.201224327087402, "learning_rate": 1.075807611134837e-06, "loss": 0.7743, "step": 33376 }, { "epoch": 0.86, "grad_norm": 1.5336856842041016, "learning_rate": 1.0754330754805164e-06, "loss": 0.5184, "step": 33377 }, { "epoch": 0.86, "grad_norm": 1.4374176263809204, "learning_rate": 1.0750586013287078e-06, "loss": 0.5456, "step": 33378 }, { "epoch": 0.86, "grad_norm": 0.9644969701766968, "learning_rate": 1.074684188681987e-06, "loss": 0.4591, "step": 33379 }, { "epoch": 0.86, "grad_norm": 3.1101443767547607, "learning_rate": 1.074309837542934e-06, "loss": 0.762, "step": 33380 }, { "epoch": 0.86, "grad_norm": 3.6190004348754883, "learning_rate": 1.0739355479141312e-06, "loss": 0.7637, "step": 33381 }, { "epoch": 0.86, "grad_norm": 1.4856024980545044, "learning_rate": 1.0735613197981576e-06, "loss": 0.3888, "step": 33382 }, { "epoch": 0.86, "grad_norm": 3.442107915878296, "learning_rate": 1.0731871531975878e-06, "loss": 0.5449, "step": 33383 }, { "epoch": 0.86, "grad_norm": 1.4462846517562866, "learning_rate": 1.0728130481150068e-06, "loss": 0.454, "step": 33384 }, { "epoch": 0.86, "grad_norm": 1.4980738162994385, "learning_rate": 1.0724390045529898e-06, "loss": 0.4145, "step": 33385 }, { "epoch": 0.86, "grad_norm": 1.150977611541748, "learning_rate": 1.0720650225141139e-06, "loss": 0.4549, "step": 33386 }, { "epoch": 0.86, "grad_norm": 1.3526369333267212, "learning_rate": 1.0716911020009556e-06, "loss": 0.5091, "step": 33387 }, { "epoch": 0.86, "grad_norm": 7.524733066558838, "learning_rate": 1.0713172430160945e-06, "loss": 0.5636, "step": 33388 }, { "epoch": 0.86, "grad_norm": 1.981404423713684, "learning_rate": 1.0709434455621048e-06, "loss": 0.6065, "step": 33389 }, { "epoch": 0.86, "grad_norm": 1.3767497539520264, "learning_rate": 1.0705697096415613e-06, "loss": 0.5419, "step": 33390 }, { "epoch": 0.86, "grad_norm": 1.5831592082977295, "learning_rate": 1.070196035257044e-06, "loss": 0.4617, "step": 33391 }, { "epoch": 0.86, "grad_norm": 1.8924717903137207, "learning_rate": 1.0698224224111243e-06, "loss": 0.5685, "step": 33392 }, { "epoch": 0.86, "grad_norm": 1.177476167678833, "learning_rate": 1.0694488711063789e-06, "loss": 0.5107, "step": 33393 }, { "epoch": 0.86, "grad_norm": 1.1051814556121826, "learning_rate": 1.0690753813453792e-06, "loss": 0.4796, "step": 33394 }, { "epoch": 0.86, "grad_norm": 1.5076789855957031, "learning_rate": 1.0687019531307019e-06, "loss": 0.6287, "step": 33395 }, { "epoch": 0.86, "grad_norm": 2.4316675662994385, "learning_rate": 1.0683285864649207e-06, "loss": 0.5406, "step": 33396 }, { "epoch": 0.86, "grad_norm": 2.4198105335235596, "learning_rate": 1.0679552813506056e-06, "loss": 0.6551, "step": 33397 }, { "epoch": 0.86, "grad_norm": 1.7032665014266968, "learning_rate": 1.0675820377903323e-06, "loss": 0.5821, "step": 33398 }, { "epoch": 0.86, "grad_norm": 2.6537833213806152, "learning_rate": 1.067208855786671e-06, "loss": 0.5557, "step": 33399 }, { "epoch": 0.86, "grad_norm": 2.9118430614471436, "learning_rate": 1.0668357353421933e-06, "loss": 0.6472, "step": 33400 }, { "epoch": 0.86, "grad_norm": 1.2457621097564697, "learning_rate": 1.0664626764594732e-06, "loss": 0.5357, "step": 33401 }, { "epoch": 0.86, "grad_norm": 1.267075538635254, "learning_rate": 1.06608967914108e-06, "loss": 0.4419, "step": 33402 }, { "epoch": 0.86, "grad_norm": 3.201615333557129, "learning_rate": 1.0657167433895831e-06, "loss": 0.5734, "step": 33403 }, { "epoch": 0.86, "grad_norm": 1.2969766855239868, "learning_rate": 1.0653438692075525e-06, "loss": 0.4145, "step": 33404 }, { "epoch": 0.86, "grad_norm": 0.8874902725219727, "learning_rate": 1.064971056597559e-06, "loss": 0.272, "step": 33405 }, { "epoch": 0.86, "grad_norm": 1.4982411861419678, "learning_rate": 1.0645983055621733e-06, "loss": 0.492, "step": 33406 }, { "epoch": 0.86, "grad_norm": 1.7233351469039917, "learning_rate": 1.0642256161039588e-06, "loss": 0.4688, "step": 33407 }, { "epoch": 0.86, "grad_norm": 2.3757147789001465, "learning_rate": 1.06385298822549e-06, "loss": 0.4695, "step": 33408 }, { "epoch": 0.86, "grad_norm": 1.8093810081481934, "learning_rate": 1.0634804219293327e-06, "loss": 0.6363, "step": 33409 }, { "epoch": 0.86, "grad_norm": 14.655118942260742, "learning_rate": 1.0631079172180526e-06, "loss": 0.4953, "step": 33410 }, { "epoch": 0.86, "grad_norm": 1.203037977218628, "learning_rate": 1.0627354740942165e-06, "loss": 0.4293, "step": 33411 }, { "epoch": 0.86, "grad_norm": 2.0176079273223877, "learning_rate": 1.0623630925603957e-06, "loss": 0.5349, "step": 33412 }, { "epoch": 0.86, "grad_norm": 1.3467204570770264, "learning_rate": 1.0619907726191514e-06, "loss": 0.4929, "step": 33413 }, { "epoch": 0.86, "grad_norm": 1.3360165357589722, "learning_rate": 1.0616185142730506e-06, "loss": 0.4442, "step": 33414 }, { "epoch": 0.86, "grad_norm": 2.96392560005188, "learning_rate": 1.0612463175246612e-06, "loss": 0.484, "step": 33415 }, { "epoch": 0.86, "grad_norm": 1.1756470203399658, "learning_rate": 1.0608741823765456e-06, "loss": 0.4933, "step": 33416 }, { "epoch": 0.86, "grad_norm": 2.7628462314605713, "learning_rate": 1.0605021088312695e-06, "loss": 0.6386, "step": 33417 }, { "epoch": 0.86, "grad_norm": 1.4553664922714233, "learning_rate": 1.0601300968913952e-06, "loss": 0.4413, "step": 33418 }, { "epoch": 0.86, "grad_norm": 1.1373615264892578, "learning_rate": 1.0597581465594886e-06, "loss": 0.4022, "step": 33419 }, { "epoch": 0.86, "grad_norm": 7.154712677001953, "learning_rate": 1.059386257838113e-06, "loss": 0.5338, "step": 33420 }, { "epoch": 0.86, "grad_norm": 1.1223255395889282, "learning_rate": 1.0590144307298277e-06, "loss": 0.4447, "step": 33421 }, { "epoch": 0.86, "grad_norm": 1.388995885848999, "learning_rate": 1.0586426652371995e-06, "loss": 0.5475, "step": 33422 }, { "epoch": 0.86, "grad_norm": 1.2508262395858765, "learning_rate": 1.0582709613627883e-06, "loss": 0.3886, "step": 33423 }, { "epoch": 0.86, "grad_norm": 0.9830278158187866, "learning_rate": 1.0578993191091546e-06, "loss": 0.5351, "step": 33424 }, { "epoch": 0.86, "grad_norm": 1.7935984134674072, "learning_rate": 1.057527738478863e-06, "loss": 0.4593, "step": 33425 }, { "epoch": 0.86, "grad_norm": 1.8464670181274414, "learning_rate": 1.0571562194744711e-06, "loss": 0.5451, "step": 33426 }, { "epoch": 0.86, "grad_norm": 1.896937608718872, "learning_rate": 1.0567847620985406e-06, "loss": 0.5101, "step": 33427 }, { "epoch": 0.86, "grad_norm": 10.417093276977539, "learning_rate": 1.0564133663536292e-06, "loss": 0.6826, "step": 33428 }, { "epoch": 0.86, "grad_norm": 2.3431055545806885, "learning_rate": 1.0560420322422992e-06, "loss": 0.6121, "step": 33429 }, { "epoch": 0.86, "grad_norm": 1.4892470836639404, "learning_rate": 1.055670759767109e-06, "loss": 0.5939, "step": 33430 }, { "epoch": 0.86, "grad_norm": 1.0549651384353638, "learning_rate": 1.0552995489306139e-06, "loss": 0.4378, "step": 33431 }, { "epoch": 0.86, "grad_norm": 1.2952054738998413, "learning_rate": 1.0549283997353775e-06, "loss": 0.558, "step": 33432 }, { "epoch": 0.86, "grad_norm": 1.7621046304702759, "learning_rate": 1.0545573121839548e-06, "loss": 0.5458, "step": 33433 }, { "epoch": 0.86, "grad_norm": 15.1140718460083, "learning_rate": 1.0541862862789021e-06, "loss": 0.8991, "step": 33434 }, { "epoch": 0.86, "grad_norm": 1.3642104864120483, "learning_rate": 1.0538153220227753e-06, "loss": 0.4813, "step": 33435 }, { "epoch": 0.86, "grad_norm": 2.225942850112915, "learning_rate": 1.053444419418136e-06, "loss": 0.4464, "step": 33436 }, { "epoch": 0.86, "grad_norm": 1.0042798519134521, "learning_rate": 1.053073578467535e-06, "loss": 0.4738, "step": 33437 }, { "epoch": 0.86, "grad_norm": 0.9177064299583435, "learning_rate": 1.0527027991735295e-06, "loss": 0.4542, "step": 33438 }, { "epoch": 0.86, "grad_norm": 1.8429720401763916, "learning_rate": 1.0523320815386762e-06, "loss": 0.5554, "step": 33439 }, { "epoch": 0.86, "grad_norm": 1.471630334854126, "learning_rate": 1.0519614255655298e-06, "loss": 0.6491, "step": 33440 }, { "epoch": 0.86, "grad_norm": 1.1875444650650024, "learning_rate": 1.0515908312566426e-06, "loss": 0.4485, "step": 33441 }, { "epoch": 0.86, "grad_norm": 1.5231788158416748, "learning_rate": 1.051220298614567e-06, "loss": 0.576, "step": 33442 }, { "epoch": 0.86, "grad_norm": 4.230155944824219, "learning_rate": 1.0508498276418623e-06, "loss": 0.7886, "step": 33443 }, { "epoch": 0.86, "grad_norm": 3.7357232570648193, "learning_rate": 1.0504794183410771e-06, "loss": 0.5059, "step": 33444 }, { "epoch": 0.86, "grad_norm": 1.2269933223724365, "learning_rate": 1.0501090707147632e-06, "loss": 0.3729, "step": 33445 }, { "epoch": 0.86, "grad_norm": 1.588932991027832, "learning_rate": 1.0497387847654772e-06, "loss": 0.5025, "step": 33446 }, { "epoch": 0.86, "grad_norm": 1.63893723487854, "learning_rate": 1.0493685604957681e-06, "loss": 0.61, "step": 33447 }, { "epoch": 0.86, "grad_norm": 2.7302777767181396, "learning_rate": 1.0489983979081864e-06, "loss": 0.5838, "step": 33448 }, { "epoch": 0.86, "grad_norm": 8.115918159484863, "learning_rate": 1.0486282970052853e-06, "loss": 0.6517, "step": 33449 }, { "epoch": 0.86, "grad_norm": 2.2703263759613037, "learning_rate": 1.048258257789614e-06, "loss": 0.5819, "step": 33450 }, { "epoch": 0.86, "grad_norm": 1.492705225944519, "learning_rate": 1.0478882802637224e-06, "loss": 0.6239, "step": 33451 }, { "epoch": 0.86, "grad_norm": 2.0168278217315674, "learning_rate": 1.04751836443016e-06, "loss": 0.6611, "step": 33452 }, { "epoch": 0.86, "grad_norm": 2.0510470867156982, "learning_rate": 1.0471485102914768e-06, "loss": 0.6658, "step": 33453 }, { "epoch": 0.86, "grad_norm": 5.29019021987915, "learning_rate": 1.0467787178502231e-06, "loss": 0.7434, "step": 33454 }, { "epoch": 0.86, "grad_norm": 1.2539196014404297, "learning_rate": 1.0464089871089421e-06, "loss": 0.4965, "step": 33455 }, { "epoch": 0.86, "grad_norm": 1.7548010349273682, "learning_rate": 1.0460393180701878e-06, "loss": 0.4198, "step": 33456 }, { "epoch": 0.86, "grad_norm": 1.391517162322998, "learning_rate": 1.0456697107365055e-06, "loss": 0.5523, "step": 33457 }, { "epoch": 0.86, "grad_norm": 1.2561465501785278, "learning_rate": 1.045300165110441e-06, "loss": 0.5643, "step": 33458 }, { "epoch": 0.86, "grad_norm": 1.8558005094528198, "learning_rate": 1.0449306811945403e-06, "loss": 0.5293, "step": 33459 }, { "epoch": 0.86, "grad_norm": 8.473722457885742, "learning_rate": 1.0445612589913534e-06, "loss": 0.7076, "step": 33460 }, { "epoch": 0.86, "grad_norm": 2.6976754665374756, "learning_rate": 1.0441918985034227e-06, "loss": 0.5191, "step": 33461 }, { "epoch": 0.86, "grad_norm": 1.501429796218872, "learning_rate": 1.043822599733294e-06, "loss": 0.5383, "step": 33462 }, { "epoch": 0.86, "grad_norm": 1.5615040063858032, "learning_rate": 1.0434533626835152e-06, "loss": 0.4969, "step": 33463 }, { "epoch": 0.86, "grad_norm": 0.9982162117958069, "learning_rate": 1.0430841873566288e-06, "loss": 0.3717, "step": 33464 }, { "epoch": 0.86, "grad_norm": 9.161872863769531, "learning_rate": 1.0427150737551762e-06, "loss": 0.4733, "step": 33465 }, { "epoch": 0.86, "grad_norm": 1.0275163650512695, "learning_rate": 1.0423460218817062e-06, "loss": 0.439, "step": 33466 }, { "epoch": 0.86, "grad_norm": 1.1504390239715576, "learning_rate": 1.04197703173876e-06, "loss": 0.4566, "step": 33467 }, { "epoch": 0.86, "grad_norm": 1.4171043634414673, "learning_rate": 1.0416081033288794e-06, "loss": 0.5112, "step": 33468 }, { "epoch": 0.86, "grad_norm": 1.0239793062210083, "learning_rate": 1.0412392366546064e-06, "loss": 0.5489, "step": 33469 }, { "epoch": 0.86, "grad_norm": 1.8568212985992432, "learning_rate": 1.0408704317184858e-06, "loss": 0.5847, "step": 33470 }, { "epoch": 0.86, "grad_norm": 1.5743197202682495, "learning_rate": 1.0405016885230568e-06, "loss": 0.5294, "step": 33471 }, { "epoch": 0.86, "grad_norm": 1.5281835794448853, "learning_rate": 1.0401330070708604e-06, "loss": 0.4986, "step": 33472 }, { "epoch": 0.86, "grad_norm": 1.252472996711731, "learning_rate": 1.0397643873644392e-06, "loss": 0.5771, "step": 33473 }, { "epoch": 0.86, "grad_norm": 1.7197761535644531, "learning_rate": 1.0393958294063333e-06, "loss": 0.4865, "step": 33474 }, { "epoch": 0.86, "grad_norm": 0.9089176654815674, "learning_rate": 1.0390273331990808e-06, "loss": 0.4352, "step": 33475 }, { "epoch": 0.86, "grad_norm": 1.023971438407898, "learning_rate": 1.0386588987452207e-06, "loss": 0.4492, "step": 33476 }, { "epoch": 0.86, "grad_norm": 1.1693336963653564, "learning_rate": 1.0382905260472954e-06, "loss": 0.3989, "step": 33477 }, { "epoch": 0.86, "grad_norm": 5.173202037811279, "learning_rate": 1.0379222151078415e-06, "loss": 0.8367, "step": 33478 }, { "epoch": 0.86, "grad_norm": 1.575981855392456, "learning_rate": 1.0375539659293955e-06, "loss": 0.403, "step": 33479 }, { "epoch": 0.86, "grad_norm": 1.2792819738388062, "learning_rate": 1.0371857785144978e-06, "loss": 0.3247, "step": 33480 }, { "epoch": 0.86, "grad_norm": 1.8421754837036133, "learning_rate": 1.036817652865686e-06, "loss": 0.687, "step": 33481 }, { "epoch": 0.86, "grad_norm": 1.1511555910110474, "learning_rate": 1.0364495889854943e-06, "loss": 0.6315, "step": 33482 }, { "epoch": 0.86, "grad_norm": 13.316165924072266, "learning_rate": 1.0360815868764595e-06, "loss": 0.5257, "step": 33483 }, { "epoch": 0.86, "grad_norm": 1.4049742221832275, "learning_rate": 1.0357136465411198e-06, "loss": 0.5662, "step": 33484 }, { "epoch": 0.86, "grad_norm": 1.8695497512817383, "learning_rate": 1.0353457679820112e-06, "loss": 0.5113, "step": 33485 }, { "epoch": 0.86, "grad_norm": 1.4005272388458252, "learning_rate": 1.0349779512016644e-06, "loss": 0.4273, "step": 33486 }, { "epoch": 0.86, "grad_norm": 11.9313325881958, "learning_rate": 1.03461019620262e-06, "loss": 0.5851, "step": 33487 }, { "epoch": 0.86, "grad_norm": 1.25309419631958, "learning_rate": 1.0342425029874081e-06, "loss": 0.3925, "step": 33488 }, { "epoch": 0.86, "grad_norm": 1.523563265800476, "learning_rate": 1.0338748715585634e-06, "loss": 0.5198, "step": 33489 }, { "epoch": 0.86, "grad_norm": 1.8051629066467285, "learning_rate": 1.0335073019186215e-06, "loss": 0.4435, "step": 33490 }, { "epoch": 0.86, "grad_norm": 1.3905253410339355, "learning_rate": 1.0331397940701138e-06, "loss": 0.3858, "step": 33491 }, { "epoch": 0.86, "grad_norm": 1.1330231428146362, "learning_rate": 1.0327723480155737e-06, "loss": 0.4838, "step": 33492 }, { "epoch": 0.86, "grad_norm": 1.332329273223877, "learning_rate": 1.0324049637575305e-06, "loss": 0.4469, "step": 33493 }, { "epoch": 0.86, "grad_norm": 2.250514507293701, "learning_rate": 1.03203764129852e-06, "loss": 0.6409, "step": 33494 }, { "epoch": 0.86, "grad_norm": 1.0108897686004639, "learning_rate": 1.0316703806410721e-06, "loss": 0.478, "step": 33495 }, { "epoch": 0.86, "grad_norm": 0.6995417475700378, "learning_rate": 1.031303181787715e-06, "loss": 0.3873, "step": 33496 }, { "epoch": 0.86, "grad_norm": 1.8384037017822266, "learning_rate": 1.0309360447409845e-06, "loss": 0.6018, "step": 33497 }, { "epoch": 0.86, "grad_norm": 1.4373682737350464, "learning_rate": 1.030568969503407e-06, "loss": 0.442, "step": 33498 }, { "epoch": 0.86, "grad_norm": 2.478667974472046, "learning_rate": 1.0302019560775135e-06, "loss": 0.6626, "step": 33499 }, { "epoch": 0.86, "grad_norm": 1.651617407798767, "learning_rate": 1.0298350044658313e-06, "loss": 0.6441, "step": 33500 }, { "epoch": 0.86, "grad_norm": 1.2572239637374878, "learning_rate": 1.0294681146708918e-06, "loss": 0.3295, "step": 33501 }, { "epoch": 0.86, "grad_norm": 1.9952689409255981, "learning_rate": 1.0291012866952222e-06, "loss": 0.7108, "step": 33502 }, { "epoch": 0.86, "grad_norm": 1.3244106769561768, "learning_rate": 1.028734520541349e-06, "loss": 0.4914, "step": 33503 }, { "epoch": 0.86, "grad_norm": 1.7258797883987427, "learning_rate": 1.0283678162118026e-06, "loss": 0.6622, "step": 33504 }, { "epoch": 0.86, "grad_norm": 6.464134216308594, "learning_rate": 1.028001173709109e-06, "loss": 0.5394, "step": 33505 }, { "epoch": 0.86, "grad_norm": 1.5441749095916748, "learning_rate": 1.0276345930357924e-06, "loss": 0.4228, "step": 33506 }, { "epoch": 0.86, "grad_norm": 2.8834500312805176, "learning_rate": 1.0272680741943842e-06, "loss": 0.5753, "step": 33507 }, { "epoch": 0.86, "grad_norm": 1.3349285125732422, "learning_rate": 1.0269016171874057e-06, "loss": 0.557, "step": 33508 }, { "epoch": 0.86, "grad_norm": 1.4198334217071533, "learning_rate": 1.0265352220173842e-06, "loss": 0.3831, "step": 33509 }, { "epoch": 0.86, "grad_norm": 0.9309077262878418, "learning_rate": 1.0261688886868425e-06, "loss": 0.353, "step": 33510 }, { "epoch": 0.86, "grad_norm": 1.1325678825378418, "learning_rate": 1.0258026171983093e-06, "loss": 0.615, "step": 33511 }, { "epoch": 0.86, "grad_norm": 0.9556216597557068, "learning_rate": 1.0254364075543056e-06, "loss": 0.3792, "step": 33512 }, { "epoch": 0.86, "grad_norm": 4.005119800567627, "learning_rate": 1.0250702597573548e-06, "loss": 0.4575, "step": 33513 }, { "epoch": 0.86, "grad_norm": 1.9246106147766113, "learning_rate": 1.0247041738099828e-06, "loss": 0.5799, "step": 33514 }, { "epoch": 0.86, "grad_norm": 1.3257830142974854, "learning_rate": 1.0243381497147098e-06, "loss": 0.4619, "step": 33515 }, { "epoch": 0.86, "grad_norm": 1.2817540168762207, "learning_rate": 1.0239721874740605e-06, "loss": 0.5003, "step": 33516 }, { "epoch": 0.86, "grad_norm": 1.7122361660003662, "learning_rate": 1.0236062870905528e-06, "loss": 0.5596, "step": 33517 }, { "epoch": 0.86, "grad_norm": 1.6583858728408813, "learning_rate": 1.0232404485667135e-06, "loss": 0.5776, "step": 33518 }, { "epoch": 0.86, "grad_norm": 1.7056186199188232, "learning_rate": 1.0228746719050607e-06, "loss": 0.4879, "step": 33519 }, { "epoch": 0.86, "grad_norm": 1.349743366241455, "learning_rate": 1.0225089571081137e-06, "loss": 0.5465, "step": 33520 }, { "epoch": 0.86, "grad_norm": 1.475696086883545, "learning_rate": 1.0221433041783969e-06, "loss": 0.5817, "step": 33521 }, { "epoch": 0.86, "grad_norm": 1.051613688468933, "learning_rate": 1.0217777131184282e-06, "loss": 0.5367, "step": 33522 }, { "epoch": 0.86, "grad_norm": 11.98614501953125, "learning_rate": 1.0214121839307267e-06, "loss": 0.61, "step": 33523 }, { "epoch": 0.86, "grad_norm": 0.9039037227630615, "learning_rate": 1.0210467166178107e-06, "loss": 0.3771, "step": 33524 }, { "epoch": 0.86, "grad_norm": 1.4768925905227661, "learning_rate": 1.0206813111822e-06, "loss": 0.5615, "step": 33525 }, { "epoch": 0.86, "grad_norm": 1.1911512613296509, "learning_rate": 1.0203159676264128e-06, "loss": 0.5989, "step": 33526 }, { "epoch": 0.86, "grad_norm": 1.8325923681259155, "learning_rate": 1.019950685952964e-06, "loss": 0.6398, "step": 33527 }, { "epoch": 0.86, "grad_norm": 12.865216255187988, "learning_rate": 1.0195854661643766e-06, "loss": 0.5581, "step": 33528 }, { "epoch": 0.86, "grad_norm": 1.2722853422164917, "learning_rate": 1.0192203082631624e-06, "loss": 0.5096, "step": 33529 }, { "epoch": 0.86, "grad_norm": 1.783182144165039, "learning_rate": 1.018855212251839e-06, "loss": 0.5123, "step": 33530 }, { "epoch": 0.86, "grad_norm": 6.159525394439697, "learning_rate": 1.0184901781329236e-06, "loss": 0.7594, "step": 33531 }, { "epoch": 0.86, "grad_norm": 2.3951613903045654, "learning_rate": 1.0181252059089319e-06, "loss": 0.5235, "step": 33532 }, { "epoch": 0.86, "grad_norm": 1.3724825382232666, "learning_rate": 1.0177602955823784e-06, "loss": 0.533, "step": 33533 }, { "epoch": 0.86, "grad_norm": 2.5720674991607666, "learning_rate": 1.0173954471557756e-06, "loss": 0.5188, "step": 33534 }, { "epoch": 0.86, "grad_norm": 1.892913818359375, "learning_rate": 1.0170306606316426e-06, "loss": 0.6432, "step": 33535 }, { "epoch": 0.86, "grad_norm": 1.5932894945144653, "learning_rate": 1.0166659360124898e-06, "loss": 0.5403, "step": 33536 }, { "epoch": 0.86, "grad_norm": 1.1426284313201904, "learning_rate": 1.0163012733008292e-06, "loss": 0.3883, "step": 33537 }, { "epoch": 0.86, "grad_norm": 1.7166950702667236, "learning_rate": 1.0159366724991793e-06, "loss": 0.3864, "step": 33538 }, { "epoch": 0.86, "grad_norm": 1.1276862621307373, "learning_rate": 1.0155721336100489e-06, "loss": 0.4617, "step": 33539 }, { "epoch": 0.86, "grad_norm": 1.508154034614563, "learning_rate": 1.0152076566359503e-06, "loss": 0.6258, "step": 33540 }, { "epoch": 0.86, "grad_norm": 1.2664021253585815, "learning_rate": 1.014843241579394e-06, "loss": 0.4784, "step": 33541 }, { "epoch": 0.86, "grad_norm": 2.179270029067993, "learning_rate": 1.0144788884428958e-06, "loss": 0.6297, "step": 33542 }, { "epoch": 0.86, "grad_norm": 0.9811652302742004, "learning_rate": 1.0141145972289635e-06, "loss": 0.4496, "step": 33543 }, { "epoch": 0.86, "grad_norm": 2.0176022052764893, "learning_rate": 1.0137503679401051e-06, "loss": 0.6381, "step": 33544 }, { "epoch": 0.86, "grad_norm": 1.2177835702896118, "learning_rate": 1.0133862005788365e-06, "loss": 0.596, "step": 33545 }, { "epoch": 0.86, "grad_norm": 0.949715256690979, "learning_rate": 1.0130220951476633e-06, "loss": 0.4324, "step": 33546 }, { "epoch": 0.86, "grad_norm": 1.3162331581115723, "learning_rate": 1.0126580516490947e-06, "loss": 0.5, "step": 33547 }, { "epoch": 0.86, "grad_norm": 1.3096485137939453, "learning_rate": 1.0122940700856409e-06, "loss": 0.4572, "step": 33548 }, { "epoch": 0.86, "grad_norm": 1.0506932735443115, "learning_rate": 1.011930150459811e-06, "loss": 0.4817, "step": 33549 }, { "epoch": 0.86, "grad_norm": 10.677193641662598, "learning_rate": 1.011566292774111e-06, "loss": 0.4333, "step": 33550 }, { "epoch": 0.86, "grad_norm": 1.6466273069381714, "learning_rate": 1.0112024970310474e-06, "loss": 0.6187, "step": 33551 }, { "epoch": 0.86, "grad_norm": 3.497265100479126, "learning_rate": 1.0108387632331319e-06, "loss": 0.5493, "step": 33552 }, { "epoch": 0.86, "grad_norm": 1.2370513677597046, "learning_rate": 1.0104750913828666e-06, "loss": 0.4597, "step": 33553 }, { "epoch": 0.86, "grad_norm": 1.3948112726211548, "learning_rate": 1.0101114814827572e-06, "loss": 0.4743, "step": 33554 }, { "epoch": 0.86, "grad_norm": 1.0682640075683594, "learning_rate": 1.0097479335353144e-06, "loss": 0.4675, "step": 33555 }, { "epoch": 0.86, "grad_norm": 12.84656810760498, "learning_rate": 1.0093844475430392e-06, "loss": 0.8368, "step": 33556 }, { "epoch": 0.86, "grad_norm": 1.2507106065750122, "learning_rate": 1.0090210235084397e-06, "loss": 0.4173, "step": 33557 }, { "epoch": 0.86, "grad_norm": 1.5863996744155884, "learning_rate": 1.0086576614340172e-06, "loss": 0.5384, "step": 33558 }, { "epoch": 0.86, "grad_norm": 1.4330002069473267, "learning_rate": 1.0082943613222773e-06, "loss": 0.5172, "step": 33559 }, { "epoch": 0.86, "grad_norm": 2.6523995399475098, "learning_rate": 1.0079311231757228e-06, "loss": 0.6521, "step": 33560 }, { "epoch": 0.86, "grad_norm": 1.7140275239944458, "learning_rate": 1.007567946996858e-06, "loss": 0.5229, "step": 33561 }, { "epoch": 0.86, "grad_norm": 1.7188748121261597, "learning_rate": 1.0072048327881866e-06, "loss": 0.4448, "step": 33562 }, { "epoch": 0.86, "grad_norm": 1.0849183797836304, "learning_rate": 1.0068417805522091e-06, "loss": 0.4337, "step": 33563 }, { "epoch": 0.86, "grad_norm": 3.7663650512695312, "learning_rate": 1.0064787902914263e-06, "loss": 0.5923, "step": 33564 }, { "epoch": 0.86, "grad_norm": 0.8824663162231445, "learning_rate": 1.0061158620083434e-06, "loss": 0.4316, "step": 33565 }, { "epoch": 0.86, "grad_norm": 1.5574535131454468, "learning_rate": 1.005752995705459e-06, "loss": 0.5435, "step": 33566 }, { "epoch": 0.86, "grad_norm": 1.6034024953842163, "learning_rate": 1.0053901913852725e-06, "loss": 0.3795, "step": 33567 }, { "epoch": 0.86, "grad_norm": 1.3125483989715576, "learning_rate": 1.0050274490502876e-06, "loss": 0.6325, "step": 33568 }, { "epoch": 0.86, "grad_norm": 5.5321855545043945, "learning_rate": 1.004664768703002e-06, "loss": 0.4827, "step": 33569 }, { "epoch": 0.86, "grad_norm": 1.4836928844451904, "learning_rate": 1.004302150345916e-06, "loss": 0.5694, "step": 33570 }, { "epoch": 0.86, "grad_norm": 8.575757026672363, "learning_rate": 1.0039395939815255e-06, "loss": 0.4624, "step": 33571 }, { "epoch": 0.86, "grad_norm": 1.7908711433410645, "learning_rate": 1.003577099612334e-06, "loss": 0.4697, "step": 33572 }, { "epoch": 0.86, "grad_norm": 1.9567196369171143, "learning_rate": 1.0032146672408372e-06, "loss": 0.5722, "step": 33573 }, { "epoch": 0.86, "grad_norm": 1.1092643737792969, "learning_rate": 1.0028522968695299e-06, "loss": 0.5529, "step": 33574 }, { "epoch": 0.86, "grad_norm": 1.2776522636413574, "learning_rate": 1.0024899885009144e-06, "loss": 0.3374, "step": 33575 }, { "epoch": 0.86, "grad_norm": 1.206556797027588, "learning_rate": 1.0021277421374842e-06, "loss": 0.5204, "step": 33576 }, { "epoch": 0.86, "grad_norm": 1.7030096054077148, "learning_rate": 1.0017655577817375e-06, "loss": 0.6354, "step": 33577 }, { "epoch": 0.86, "grad_norm": 2.2118985652923584, "learning_rate": 1.0014034354361679e-06, "loss": 0.5441, "step": 33578 }, { "epoch": 0.86, "grad_norm": 1.2372556924819946, "learning_rate": 1.0010413751032733e-06, "loss": 0.605, "step": 33579 }, { "epoch": 0.86, "grad_norm": 1.2802444696426392, "learning_rate": 1.0006793767855483e-06, "loss": 0.4464, "step": 33580 }, { "epoch": 0.86, "grad_norm": 1.816435694694519, "learning_rate": 1.0003174404854843e-06, "loss": 0.6266, "step": 33581 }, { "epoch": 0.86, "grad_norm": 3.07932186126709, "learning_rate": 9.999555662055816e-07, "loss": 0.4961, "step": 33582 }, { "epoch": 0.86, "grad_norm": 1.4587534666061401, "learning_rate": 9.995937539483303e-07, "loss": 0.5663, "step": 33583 }, { "epoch": 0.86, "grad_norm": 1.5276437997817993, "learning_rate": 9.992320037162217e-07, "loss": 0.4116, "step": 33584 }, { "epoch": 0.86, "grad_norm": 1.2689340114593506, "learning_rate": 9.98870315511754e-07, "loss": 0.5057, "step": 33585 }, { "epoch": 0.86, "grad_norm": 1.1049948930740356, "learning_rate": 9.985086893374163e-07, "loss": 0.4163, "step": 33586 }, { "epoch": 0.86, "grad_norm": 2.252228021621704, "learning_rate": 9.98147125195702e-07, "loss": 0.5607, "step": 33587 }, { "epoch": 0.86, "grad_norm": 1.6995667219161987, "learning_rate": 9.977856230891003e-07, "loss": 0.5071, "step": 33588 }, { "epoch": 0.86, "grad_norm": 1.7778196334838867, "learning_rate": 9.97424183020106e-07, "loss": 0.5837, "step": 33589 }, { "epoch": 0.86, "grad_norm": 1.4409676790237427, "learning_rate": 9.970628049912078e-07, "loss": 0.5214, "step": 33590 }, { "epoch": 0.86, "grad_norm": 1.2604175806045532, "learning_rate": 9.967014890048953e-07, "loss": 0.4295, "step": 33591 }, { "epoch": 0.86, "grad_norm": 1.2880144119262695, "learning_rate": 9.96340235063662e-07, "loss": 0.7404, "step": 33592 }, { "epoch": 0.86, "grad_norm": 3.0189826488494873, "learning_rate": 9.959790431699944e-07, "loss": 0.4308, "step": 33593 }, { "epoch": 0.86, "grad_norm": 1.6554044485092163, "learning_rate": 9.95617913326382e-07, "loss": 0.541, "step": 33594 }, { "epoch": 0.86, "grad_norm": 1.4072895050048828, "learning_rate": 9.952568455353128e-07, "loss": 0.5205, "step": 33595 }, { "epoch": 0.86, "grad_norm": 1.319006323814392, "learning_rate": 9.948958397992781e-07, "loss": 0.6447, "step": 33596 }, { "epoch": 0.86, "grad_norm": 2.101088047027588, "learning_rate": 9.945348961207635e-07, "loss": 0.5745, "step": 33597 }, { "epoch": 0.86, "grad_norm": 1.1975575685501099, "learning_rate": 9.941740145022538e-07, "loss": 0.5472, "step": 33598 }, { "epoch": 0.86, "grad_norm": 1.4315330982208252, "learning_rate": 9.938131949462416e-07, "loss": 0.6999, "step": 33599 }, { "epoch": 0.86, "grad_norm": 1.2188060283660889, "learning_rate": 9.934524374552102e-07, "loss": 0.5651, "step": 33600 }, { "epoch": 0.86, "grad_norm": 3.876274824142456, "learning_rate": 9.930917420316443e-07, "loss": 0.7547, "step": 33601 }, { "epoch": 0.86, "grad_norm": 1.0338865518569946, "learning_rate": 9.927311086780334e-07, "loss": 0.5139, "step": 33602 }, { "epoch": 0.86, "grad_norm": 1.1742357015609741, "learning_rate": 9.923705373968618e-07, "loss": 0.5562, "step": 33603 }, { "epoch": 0.86, "grad_norm": 1.3204622268676758, "learning_rate": 9.92010028190612e-07, "loss": 0.5355, "step": 33604 }, { "epoch": 0.86, "grad_norm": 1.160742163658142, "learning_rate": 9.916495810617677e-07, "loss": 0.3629, "step": 33605 }, { "epoch": 0.86, "grad_norm": 2.7050440311431885, "learning_rate": 9.912891960128179e-07, "loss": 0.532, "step": 33606 }, { "epoch": 0.86, "grad_norm": 1.2304545640945435, "learning_rate": 9.909288730462418e-07, "loss": 0.471, "step": 33607 }, { "epoch": 0.86, "grad_norm": 1.5193994045257568, "learning_rate": 9.90568612164523e-07, "loss": 0.3529, "step": 33608 }, { "epoch": 0.86, "grad_norm": 2.5471298694610596, "learning_rate": 9.902084133701462e-07, "loss": 0.4195, "step": 33609 }, { "epoch": 0.86, "grad_norm": 1.5818469524383545, "learning_rate": 9.898482766655937e-07, "loss": 0.5597, "step": 33610 }, { "epoch": 0.86, "grad_norm": 1.2635418176651, "learning_rate": 9.894882020533446e-07, "loss": 0.4989, "step": 33611 }, { "epoch": 0.86, "grad_norm": 1.4810274839401245, "learning_rate": 9.891281895358805e-07, "loss": 0.4717, "step": 33612 }, { "epoch": 0.86, "grad_norm": 1.4469610452651978, "learning_rate": 9.887682391156861e-07, "loss": 0.4555, "step": 33613 }, { "epoch": 0.86, "grad_norm": 1.084214448928833, "learning_rate": 9.884083507952392e-07, "loss": 0.4662, "step": 33614 }, { "epoch": 0.86, "grad_norm": 1.477674961090088, "learning_rate": 9.88048524577019e-07, "loss": 0.5781, "step": 33615 }, { "epoch": 0.86, "grad_norm": 1.0971306562423706, "learning_rate": 9.876887604635076e-07, "loss": 0.4349, "step": 33616 }, { "epoch": 0.86, "grad_norm": 2.093064546585083, "learning_rate": 9.873290584571837e-07, "loss": 0.5889, "step": 33617 }, { "epoch": 0.86, "grad_norm": 1.9643415212631226, "learning_rate": 9.86969418560525e-07, "loss": 0.555, "step": 33618 }, { "epoch": 0.86, "grad_norm": 0.9767026901245117, "learning_rate": 9.866098407760094e-07, "loss": 0.5741, "step": 33619 }, { "epoch": 0.86, "grad_norm": 1.7502917051315308, "learning_rate": 9.862503251061172e-07, "loss": 0.609, "step": 33620 }, { "epoch": 0.86, "grad_norm": 1.901211142539978, "learning_rate": 9.858908715533255e-07, "loss": 0.4727, "step": 33621 }, { "epoch": 0.86, "grad_norm": 1.6590981483459473, "learning_rate": 9.855314801201088e-07, "loss": 0.4863, "step": 33622 }, { "epoch": 0.86, "grad_norm": 1.1649103164672852, "learning_rate": 9.851721508089473e-07, "loss": 0.5909, "step": 33623 }, { "epoch": 0.86, "grad_norm": 1.4099688529968262, "learning_rate": 9.848128836223148e-07, "loss": 0.5562, "step": 33624 }, { "epoch": 0.86, "grad_norm": 1.763167142868042, "learning_rate": 9.84453678562687e-07, "loss": 0.5267, "step": 33625 }, { "epoch": 0.86, "grad_norm": 1.2901387214660645, "learning_rate": 9.840945356325426e-07, "loss": 0.5079, "step": 33626 }, { "epoch": 0.86, "grad_norm": 1.3466469049453735, "learning_rate": 9.837354548343536e-07, "loss": 0.5684, "step": 33627 }, { "epoch": 0.86, "grad_norm": 1.8098104000091553, "learning_rate": 9.833764361705955e-07, "loss": 0.5858, "step": 33628 }, { "epoch": 0.86, "grad_norm": 1.3773677349090576, "learning_rate": 9.830174796437397e-07, "loss": 0.4933, "step": 33629 }, { "epoch": 0.86, "grad_norm": 1.8383662700653076, "learning_rate": 9.826585852562653e-07, "loss": 0.5432, "step": 33630 }, { "epoch": 0.86, "grad_norm": 15.59850788116455, "learning_rate": 9.822997530106427e-07, "loss": 0.5273, "step": 33631 }, { "epoch": 0.86, "grad_norm": 1.0570391416549683, "learning_rate": 9.819409829093418e-07, "loss": 0.4823, "step": 33632 }, { "epoch": 0.86, "grad_norm": 1.5470250844955444, "learning_rate": 9.815822749548409e-07, "loss": 0.4855, "step": 33633 }, { "epoch": 0.86, "grad_norm": 1.6527290344238281, "learning_rate": 9.81223629149608e-07, "loss": 0.5302, "step": 33634 }, { "epoch": 0.86, "grad_norm": 1.1495084762573242, "learning_rate": 9.808650454961167e-07, "loss": 0.5375, "step": 33635 }, { "epoch": 0.86, "grad_norm": 1.5954232215881348, "learning_rate": 9.805065239968348e-07, "loss": 0.5016, "step": 33636 }, { "epoch": 0.86, "grad_norm": 2.198181629180908, "learning_rate": 9.801480646542373e-07, "loss": 0.5495, "step": 33637 }, { "epoch": 0.86, "grad_norm": 1.163030743598938, "learning_rate": 9.79789667470793e-07, "loss": 0.5783, "step": 33638 }, { "epoch": 0.86, "grad_norm": 6.606358528137207, "learning_rate": 9.79431332448968e-07, "loss": 0.4617, "step": 33639 }, { "epoch": 0.86, "grad_norm": 1.2285927534103394, "learning_rate": 9.79073059591238e-07, "loss": 0.5205, "step": 33640 }, { "epoch": 0.86, "grad_norm": 0.9993188381195068, "learning_rate": 9.787148489000686e-07, "loss": 0.4604, "step": 33641 }, { "epoch": 0.86, "grad_norm": 1.8735170364379883, "learning_rate": 9.783567003779271e-07, "loss": 0.5377, "step": 33642 }, { "epoch": 0.86, "grad_norm": 1.9001166820526123, "learning_rate": 9.779986140272847e-07, "loss": 0.4747, "step": 33643 }, { "epoch": 0.86, "grad_norm": 1.4903380870819092, "learning_rate": 9.77640589850608e-07, "loss": 0.3267, "step": 33644 }, { "epoch": 0.86, "grad_norm": 2.687324047088623, "learning_rate": 9.772826278503645e-07, "loss": 0.6973, "step": 33645 }, { "epoch": 0.86, "grad_norm": 1.781323790550232, "learning_rate": 9.769247280290183e-07, "loss": 0.6105, "step": 33646 }, { "epoch": 0.86, "grad_norm": 1.6512811183929443, "learning_rate": 9.765668903890391e-07, "loss": 0.41, "step": 33647 }, { "epoch": 0.86, "grad_norm": 7.725361347198486, "learning_rate": 9.762091149328923e-07, "loss": 0.5121, "step": 33648 }, { "epoch": 0.86, "grad_norm": 2.1043288707733154, "learning_rate": 9.758514016630416e-07, "loss": 0.6982, "step": 33649 }, { "epoch": 0.86, "grad_norm": 1.2209877967834473, "learning_rate": 9.75493750581955e-07, "loss": 0.5575, "step": 33650 }, { "epoch": 0.86, "grad_norm": 2.3979501724243164, "learning_rate": 9.751361616920951e-07, "loss": 0.5546, "step": 33651 }, { "epoch": 0.86, "grad_norm": 4.5144362449646, "learning_rate": 9.747786349959275e-07, "loss": 0.5773, "step": 33652 }, { "epoch": 0.86, "grad_norm": 1.493277668952942, "learning_rate": 9.74421170495914e-07, "loss": 0.5059, "step": 33653 }, { "epoch": 0.86, "grad_norm": 1.7239488363265991, "learning_rate": 9.740637681945198e-07, "loss": 0.6087, "step": 33654 }, { "epoch": 0.86, "grad_norm": 0.9512547850608826, "learning_rate": 9.737064280942088e-07, "loss": 0.4973, "step": 33655 }, { "epoch": 0.86, "grad_norm": 2.0167441368103027, "learning_rate": 9.73349150197439e-07, "loss": 0.4966, "step": 33656 }, { "epoch": 0.86, "grad_norm": 1.5641905069351196, "learning_rate": 9.729919345066786e-07, "loss": 0.599, "step": 33657 }, { "epoch": 0.86, "grad_norm": 3.5676090717315674, "learning_rate": 9.726347810243864e-07, "loss": 0.5941, "step": 33658 }, { "epoch": 0.86, "grad_norm": 7.018155574798584, "learning_rate": 9.72277689753024e-07, "loss": 0.7603, "step": 33659 }, { "epoch": 0.86, "grad_norm": 1.642897367477417, "learning_rate": 9.719206606950493e-07, "loss": 0.5323, "step": 33660 }, { "epoch": 0.86, "grad_norm": 2.387953996658325, "learning_rate": 9.715636938529284e-07, "loss": 0.6703, "step": 33661 }, { "epoch": 0.86, "grad_norm": 1.9279913902282715, "learning_rate": 9.712067892291176e-07, "loss": 0.6315, "step": 33662 }, { "epoch": 0.86, "grad_norm": 1.4752999544143677, "learning_rate": 9.708499468260746e-07, "loss": 0.4678, "step": 33663 }, { "epoch": 0.86, "grad_norm": 1.3452969789505005, "learning_rate": 9.704931666462646e-07, "loss": 0.5556, "step": 33664 }, { "epoch": 0.86, "grad_norm": 0.9164878129959106, "learning_rate": 9.701364486921417e-07, "loss": 0.2931, "step": 33665 }, { "epoch": 0.86, "grad_norm": 1.3109393119812012, "learning_rate": 9.697797929661635e-07, "loss": 0.6243, "step": 33666 }, { "epoch": 0.86, "grad_norm": 0.9681355357170105, "learning_rate": 9.694231994707915e-07, "loss": 0.4488, "step": 33667 }, { "epoch": 0.86, "grad_norm": 1.187947392463684, "learning_rate": 9.690666682084815e-07, "loss": 0.6612, "step": 33668 }, { "epoch": 0.86, "grad_norm": 1.6017837524414062, "learning_rate": 9.687101991816905e-07, "loss": 0.5939, "step": 33669 }, { "epoch": 0.86, "grad_norm": 2.5693721771240234, "learning_rate": 9.68353792392872e-07, "loss": 0.5903, "step": 33670 }, { "epoch": 0.86, "grad_norm": 1.4487743377685547, "learning_rate": 9.679974478444877e-07, "loss": 0.5104, "step": 33671 }, { "epoch": 0.86, "grad_norm": 2.7651150226593018, "learning_rate": 9.676411655389895e-07, "loss": 0.5187, "step": 33672 }, { "epoch": 0.86, "grad_norm": 1.3376688957214355, "learning_rate": 9.672849454788326e-07, "loss": 0.3774, "step": 33673 }, { "epoch": 0.86, "grad_norm": 1.496821641921997, "learning_rate": 9.669287876664747e-07, "loss": 0.5018, "step": 33674 }, { "epoch": 0.86, "grad_norm": 1.2689770460128784, "learning_rate": 9.665726921043683e-07, "loss": 0.4247, "step": 33675 }, { "epoch": 0.86, "grad_norm": 3.913329601287842, "learning_rate": 9.662166587949684e-07, "loss": 0.5353, "step": 33676 }, { "epoch": 0.86, "grad_norm": 1.4567077159881592, "learning_rate": 9.65860687740725e-07, "loss": 0.6382, "step": 33677 }, { "epoch": 0.86, "grad_norm": 1.727034568786621, "learning_rate": 9.655047789440962e-07, "loss": 0.5154, "step": 33678 }, { "epoch": 0.86, "grad_norm": 2.515232801437378, "learning_rate": 9.651489324075314e-07, "loss": 0.4632, "step": 33679 }, { "epoch": 0.86, "grad_norm": 2.0539391040802, "learning_rate": 9.647931481334826e-07, "loss": 0.624, "step": 33680 }, { "epoch": 0.86, "grad_norm": 2.2649099826812744, "learning_rate": 9.644374261244061e-07, "loss": 0.5192, "step": 33681 }, { "epoch": 0.86, "grad_norm": 1.2486445903778076, "learning_rate": 9.640817663827485e-07, "loss": 0.5511, "step": 33682 }, { "epoch": 0.86, "grad_norm": 2.841700315475464, "learning_rate": 9.63726168910961e-07, "loss": 0.5504, "step": 33683 }, { "epoch": 0.86, "grad_norm": 1.8499703407287598, "learning_rate": 9.633706337114978e-07, "loss": 0.6395, "step": 33684 }, { "epoch": 0.86, "grad_norm": 1.3855619430541992, "learning_rate": 9.630151607868067e-07, "loss": 0.5494, "step": 33685 }, { "epoch": 0.86, "grad_norm": 0.9425344467163086, "learning_rate": 9.626597501393376e-07, "loss": 0.4032, "step": 33686 }, { "epoch": 0.86, "grad_norm": 5.281027317047119, "learning_rate": 9.623044017715378e-07, "loss": 0.5462, "step": 33687 }, { "epoch": 0.86, "grad_norm": 1.9548972845077515, "learning_rate": 9.619491156858595e-07, "loss": 0.6817, "step": 33688 }, { "epoch": 0.86, "grad_norm": 1.3876044750213623, "learning_rate": 9.6159389188475e-07, "loss": 0.4938, "step": 33689 }, { "epoch": 0.86, "grad_norm": 1.9882166385650635, "learning_rate": 9.61238730370655e-07, "loss": 0.56, "step": 33690 }, { "epoch": 0.86, "grad_norm": 6.166428089141846, "learning_rate": 9.608836311460268e-07, "loss": 0.6844, "step": 33691 }, { "epoch": 0.86, "grad_norm": 4.567207336425781, "learning_rate": 9.60528594213308e-07, "loss": 0.653, "step": 33692 }, { "epoch": 0.86, "grad_norm": 1.7303547859191895, "learning_rate": 9.601736195749478e-07, "loss": 0.4967, "step": 33693 }, { "epoch": 0.86, "grad_norm": 1.6006665229797363, "learning_rate": 9.598187072333899e-07, "loss": 0.4287, "step": 33694 }, { "epoch": 0.86, "grad_norm": 2.0645651817321777, "learning_rate": 9.59463857191083e-07, "loss": 0.6351, "step": 33695 }, { "epoch": 0.86, "grad_norm": 1.1280643939971924, "learning_rate": 9.591090694504723e-07, "loss": 0.5131, "step": 33696 }, { "epoch": 0.86, "grad_norm": 2.153932809829712, "learning_rate": 9.587543440140002e-07, "loss": 0.5825, "step": 33697 }, { "epoch": 0.86, "grad_norm": 2.3927009105682373, "learning_rate": 9.583996808841134e-07, "loss": 0.4361, "step": 33698 }, { "epoch": 0.86, "grad_norm": 1.934786081314087, "learning_rate": 9.580450800632569e-07, "loss": 0.6026, "step": 33699 }, { "epoch": 0.86, "grad_norm": 1.5081816911697388, "learning_rate": 9.576905415538717e-07, "loss": 0.5062, "step": 33700 }, { "epoch": 0.86, "grad_norm": 5.518195152282715, "learning_rate": 9.573360653584008e-07, "loss": 0.7155, "step": 33701 }, { "epoch": 0.86, "grad_norm": 1.4707809686660767, "learning_rate": 9.569816514792908e-07, "loss": 0.4198, "step": 33702 }, { "epoch": 0.86, "grad_norm": 1.412788987159729, "learning_rate": 9.56627299918982e-07, "loss": 0.565, "step": 33703 }, { "epoch": 0.86, "grad_norm": 1.8429510593414307, "learning_rate": 9.562730106799146e-07, "loss": 0.5912, "step": 33704 }, { "epoch": 0.86, "grad_norm": 1.7781672477722168, "learning_rate": 9.559187837645333e-07, "loss": 0.5425, "step": 33705 }, { "epoch": 0.86, "grad_norm": 1.1265466213226318, "learning_rate": 9.555646191752777e-07, "loss": 0.5144, "step": 33706 }, { "epoch": 0.86, "grad_norm": 1.1997630596160889, "learning_rate": 9.552105169145864e-07, "loss": 0.5672, "step": 33707 }, { "epoch": 0.86, "grad_norm": 1.3349546194076538, "learning_rate": 9.548564769849033e-07, "loss": 0.5686, "step": 33708 }, { "epoch": 0.86, "grad_norm": 16.116897583007812, "learning_rate": 9.545024993886676e-07, "loss": 0.5345, "step": 33709 }, { "epoch": 0.86, "grad_norm": 1.8036543130874634, "learning_rate": 9.541485841283172e-07, "loss": 0.465, "step": 33710 }, { "epoch": 0.86, "grad_norm": 1.817257046699524, "learning_rate": 9.537947312062901e-07, "loss": 0.5376, "step": 33711 }, { "epoch": 0.86, "grad_norm": 20.456504821777344, "learning_rate": 9.534409406250277e-07, "loss": 0.5963, "step": 33712 }, { "epoch": 0.86, "grad_norm": 2.5257556438446045, "learning_rate": 9.530872123869672e-07, "loss": 0.609, "step": 33713 }, { "epoch": 0.86, "grad_norm": 1.3431313037872314, "learning_rate": 9.527335464945431e-07, "loss": 0.358, "step": 33714 }, { "epoch": 0.86, "grad_norm": 1.0394325256347656, "learning_rate": 9.523799429501979e-07, "loss": 0.3999, "step": 33715 }, { "epoch": 0.86, "grad_norm": 1.6343681812286377, "learning_rate": 9.520264017563663e-07, "loss": 0.6348, "step": 33716 }, { "epoch": 0.86, "grad_norm": 1.1867940425872803, "learning_rate": 9.516729229154831e-07, "loss": 0.4284, "step": 33717 }, { "epoch": 0.86, "grad_norm": 1.055508017539978, "learning_rate": 9.51319506429984e-07, "loss": 0.5186, "step": 33718 }, { "epoch": 0.86, "grad_norm": 1.5266985893249512, "learning_rate": 9.509661523023084e-07, "loss": 0.4888, "step": 33719 }, { "epoch": 0.86, "grad_norm": 1.3496235609054565, "learning_rate": 9.506128605348885e-07, "loss": 0.498, "step": 33720 }, { "epoch": 0.86, "grad_norm": 3.769019842147827, "learning_rate": 9.502596311301571e-07, "loss": 0.4691, "step": 33721 }, { "epoch": 0.86, "grad_norm": 1.5076191425323486, "learning_rate": 9.499064640905531e-07, "loss": 0.6321, "step": 33722 }, { "epoch": 0.86, "grad_norm": 1.4578717947006226, "learning_rate": 9.495533594185069e-07, "loss": 0.5618, "step": 33723 }, { "epoch": 0.86, "grad_norm": 1.3570646047592163, "learning_rate": 9.492003171164521e-07, "loss": 0.5012, "step": 33724 }, { "epoch": 0.86, "grad_norm": 3.606060743331909, "learning_rate": 9.488473371868246e-07, "loss": 0.7088, "step": 33725 }, { "epoch": 0.86, "grad_norm": 2.305220365524292, "learning_rate": 9.484944196320545e-07, "loss": 0.5253, "step": 33726 }, { "epoch": 0.86, "grad_norm": 3.257067918777466, "learning_rate": 9.481415644545733e-07, "loss": 0.6325, "step": 33727 }, { "epoch": 0.86, "grad_norm": 1.5847195386886597, "learning_rate": 9.477887716568124e-07, "loss": 0.5518, "step": 33728 }, { "epoch": 0.86, "grad_norm": 2.4807891845703125, "learning_rate": 9.474360412412053e-07, "loss": 0.3068, "step": 33729 }, { "epoch": 0.86, "grad_norm": 1.7364169359207153, "learning_rate": 9.470833732101825e-07, "loss": 0.6463, "step": 33730 }, { "epoch": 0.86, "grad_norm": 1.4646954536437988, "learning_rate": 9.467307675661718e-07, "loss": 0.5936, "step": 33731 }, { "epoch": 0.86, "grad_norm": 0.9288041591644287, "learning_rate": 9.46378224311606e-07, "loss": 0.6093, "step": 33732 }, { "epoch": 0.86, "grad_norm": 1.472936987876892, "learning_rate": 9.460257434489129e-07, "loss": 0.5111, "step": 33733 }, { "epoch": 0.86, "grad_norm": 1.5462435483932495, "learning_rate": 9.456733249805228e-07, "loss": 0.5303, "step": 33734 }, { "epoch": 0.86, "grad_norm": 1.301092505455017, "learning_rate": 9.453209689088627e-07, "loss": 0.5089, "step": 33735 }, { "epoch": 0.86, "grad_norm": 1.6094470024108887, "learning_rate": 9.449686752363618e-07, "loss": 0.5981, "step": 33736 }, { "epoch": 0.86, "grad_norm": 3.081716299057007, "learning_rate": 9.446164439654493e-07, "loss": 0.4956, "step": 33737 }, { "epoch": 0.86, "grad_norm": 1.6680068969726562, "learning_rate": 9.442642750985487e-07, "loss": 0.4492, "step": 33738 }, { "epoch": 0.86, "grad_norm": 1.545021414756775, "learning_rate": 9.439121686380915e-07, "loss": 0.4815, "step": 33739 }, { "epoch": 0.86, "grad_norm": 1.1859134435653687, "learning_rate": 9.435601245865023e-07, "loss": 0.4799, "step": 33740 }, { "epoch": 0.86, "grad_norm": 1.399491786956787, "learning_rate": 9.432081429462059e-07, "loss": 0.4439, "step": 33741 }, { "epoch": 0.86, "grad_norm": 1.1923521757125854, "learning_rate": 9.42856223719627e-07, "loss": 0.5308, "step": 33742 }, { "epoch": 0.86, "grad_norm": 1.6906856298446655, "learning_rate": 9.42504366909196e-07, "loss": 0.5772, "step": 33743 }, { "epoch": 0.86, "grad_norm": 14.66356372833252, "learning_rate": 9.421525725173331e-07, "loss": 0.5008, "step": 33744 }, { "epoch": 0.86, "grad_norm": 2.029222011566162, "learning_rate": 9.418008405464629e-07, "loss": 0.6771, "step": 33745 }, { "epoch": 0.86, "grad_norm": 0.9678283929824829, "learning_rate": 9.414491709990125e-07, "loss": 0.5379, "step": 33746 }, { "epoch": 0.86, "grad_norm": 1.1342389583587646, "learning_rate": 9.410975638774023e-07, "loss": 0.4759, "step": 33747 }, { "epoch": 0.86, "grad_norm": 1.2948681116104126, "learning_rate": 9.407460191840556e-07, "loss": 0.5011, "step": 33748 }, { "epoch": 0.87, "grad_norm": 1.5654613971710205, "learning_rate": 9.403945369213963e-07, "loss": 0.52, "step": 33749 }, { "epoch": 0.87, "grad_norm": 1.3411380052566528, "learning_rate": 9.400431170918467e-07, "loss": 0.4912, "step": 33750 }, { "epoch": 0.87, "grad_norm": 1.728173017501831, "learning_rate": 9.396917596978284e-07, "loss": 0.6559, "step": 33751 }, { "epoch": 0.87, "grad_norm": 2.236485004425049, "learning_rate": 9.393404647417592e-07, "loss": 0.5545, "step": 33752 }, { "epoch": 0.87, "grad_norm": 1.2088974714279175, "learning_rate": 9.389892322260664e-07, "loss": 0.5212, "step": 33753 }, { "epoch": 0.87, "grad_norm": 3.141450881958008, "learning_rate": 9.386380621531665e-07, "loss": 0.6565, "step": 33754 }, { "epoch": 0.87, "grad_norm": 1.4398846626281738, "learning_rate": 9.38286954525478e-07, "loss": 0.3785, "step": 33755 }, { "epoch": 0.87, "grad_norm": 3.037632703781128, "learning_rate": 9.379359093454244e-07, "loss": 0.5931, "step": 33756 }, { "epoch": 0.87, "grad_norm": 1.4826550483703613, "learning_rate": 9.375849266154235e-07, "loss": 0.5932, "step": 33757 }, { "epoch": 0.87, "grad_norm": 1.0100775957107544, "learning_rate": 9.372340063378937e-07, "loss": 0.5344, "step": 33758 }, { "epoch": 0.87, "grad_norm": 0.9386565089225769, "learning_rate": 9.368831485152519e-07, "loss": 0.5405, "step": 33759 }, { "epoch": 0.87, "grad_norm": 0.9987207651138306, "learning_rate": 9.365323531499182e-07, "loss": 0.5792, "step": 33760 }, { "epoch": 0.87, "grad_norm": 1.202393651008606, "learning_rate": 9.361816202443108e-07, "loss": 0.3067, "step": 33761 }, { "epoch": 0.87, "grad_norm": 1.1606864929199219, "learning_rate": 9.358309498008423e-07, "loss": 0.597, "step": 33762 }, { "epoch": 0.87, "grad_norm": 1.7686885595321655, "learning_rate": 9.354803418219338e-07, "loss": 0.4615, "step": 33763 }, { "epoch": 0.87, "grad_norm": 1.628865361213684, "learning_rate": 9.351297963100003e-07, "loss": 0.5368, "step": 33764 }, { "epoch": 0.87, "grad_norm": 1.189746379852295, "learning_rate": 9.347793132674576e-07, "loss": 0.4687, "step": 33765 }, { "epoch": 0.87, "grad_norm": 1.424649953842163, "learning_rate": 9.344288926967183e-07, "loss": 0.5505, "step": 33766 }, { "epoch": 0.87, "grad_norm": 3.8303301334381104, "learning_rate": 9.340785346002012e-07, "loss": 0.4005, "step": 33767 }, { "epoch": 0.87, "grad_norm": 1.8315201997756958, "learning_rate": 9.337282389803193e-07, "loss": 0.576, "step": 33768 }, { "epoch": 0.87, "grad_norm": 1.215002179145813, "learning_rate": 9.333780058394848e-07, "loss": 0.6006, "step": 33769 }, { "epoch": 0.87, "grad_norm": 1.1861234903335571, "learning_rate": 9.330278351801147e-07, "loss": 0.4658, "step": 33770 }, { "epoch": 0.87, "grad_norm": 4.824984550476074, "learning_rate": 9.326777270046195e-07, "loss": 0.4447, "step": 33771 }, { "epoch": 0.87, "grad_norm": 1.4743400812149048, "learning_rate": 9.323276813154114e-07, "loss": 0.496, "step": 33772 }, { "epoch": 0.87, "grad_norm": 5.0798821449279785, "learning_rate": 9.319776981149065e-07, "loss": 0.6519, "step": 33773 }, { "epoch": 0.87, "grad_norm": 1.5072340965270996, "learning_rate": 9.316277774055138e-07, "loss": 0.5791, "step": 33774 }, { "epoch": 0.87, "grad_norm": 7.318011283874512, "learning_rate": 9.312779191896448e-07, "loss": 0.8681, "step": 33775 }, { "epoch": 0.87, "grad_norm": 1.6281814575195312, "learning_rate": 9.309281234697088e-07, "loss": 0.631, "step": 33776 }, { "epoch": 0.87, "grad_norm": 2.7408764362335205, "learning_rate": 9.305783902481214e-07, "loss": 0.4361, "step": 33777 }, { "epoch": 0.87, "grad_norm": 2.182377815246582, "learning_rate": 9.302287195272886e-07, "loss": 0.5897, "step": 33778 }, { "epoch": 0.87, "grad_norm": 4.5568928718566895, "learning_rate": 9.298791113096206e-07, "loss": 0.5727, "step": 33779 }, { "epoch": 0.87, "grad_norm": 1.1610208749771118, "learning_rate": 9.295295655975278e-07, "loss": 0.3556, "step": 33780 }, { "epoch": 0.87, "grad_norm": 5.996993064880371, "learning_rate": 9.291800823934194e-07, "loss": 0.4326, "step": 33781 }, { "epoch": 0.87, "grad_norm": 1.6175724267959595, "learning_rate": 9.288306616997034e-07, "loss": 0.4167, "step": 33782 }, { "epoch": 0.87, "grad_norm": 3.4016668796539307, "learning_rate": 9.284813035187845e-07, "loss": 0.5496, "step": 33783 }, { "epoch": 0.87, "grad_norm": 4.59081506729126, "learning_rate": 9.281320078530764e-07, "loss": 0.6941, "step": 33784 }, { "epoch": 0.87, "grad_norm": 1.5770512819290161, "learning_rate": 9.277827747049817e-07, "loss": 0.5028, "step": 33785 }, { "epoch": 0.87, "grad_norm": 1.1977640390396118, "learning_rate": 9.274336040769072e-07, "loss": 0.5248, "step": 33786 }, { "epoch": 0.87, "grad_norm": 2.1099281311035156, "learning_rate": 9.270844959712621e-07, "loss": 0.5261, "step": 33787 }, { "epoch": 0.87, "grad_norm": 1.389464020729065, "learning_rate": 9.267354503904502e-07, "loss": 0.4273, "step": 33788 }, { "epoch": 0.87, "grad_norm": 1.528456211090088, "learning_rate": 9.263864673368761e-07, "loss": 0.431, "step": 33789 }, { "epoch": 0.87, "grad_norm": 1.1770590543746948, "learning_rate": 9.260375468129468e-07, "loss": 0.4307, "step": 33790 }, { "epoch": 0.87, "grad_norm": 2.7124059200286865, "learning_rate": 9.25688688821067e-07, "loss": 0.6112, "step": 33791 }, { "epoch": 0.87, "grad_norm": 1.280989170074463, "learning_rate": 9.253398933636382e-07, "loss": 0.4417, "step": 33792 }, { "epoch": 0.87, "grad_norm": 3.0281708240509033, "learning_rate": 9.24991160443065e-07, "loss": 0.4476, "step": 33793 }, { "epoch": 0.87, "grad_norm": 1.3744144439697266, "learning_rate": 9.246424900617523e-07, "loss": 0.4561, "step": 33794 }, { "epoch": 0.87, "grad_norm": 1.742334008216858, "learning_rate": 9.242938822221025e-07, "loss": 0.6896, "step": 33795 }, { "epoch": 0.87, "grad_norm": 2.276048183441162, "learning_rate": 9.23945336926515e-07, "loss": 0.5487, "step": 33796 }, { "epoch": 0.87, "grad_norm": 1.3834940195083618, "learning_rate": 9.235968541773965e-07, "loss": 0.5722, "step": 33797 }, { "epoch": 0.87, "grad_norm": 2.052849769592285, "learning_rate": 9.232484339771453e-07, "loss": 0.3189, "step": 33798 }, { "epoch": 0.87, "grad_norm": 2.4590909481048584, "learning_rate": 9.229000763281649e-07, "loss": 0.4206, "step": 33799 }, { "epoch": 0.87, "grad_norm": 5.09330940246582, "learning_rate": 9.225517812328522e-07, "loss": 0.5916, "step": 33800 }, { "epoch": 0.87, "grad_norm": 1.2767919301986694, "learning_rate": 9.222035486936109e-07, "loss": 0.4723, "step": 33801 }, { "epoch": 0.87, "grad_norm": 1.7054005861282349, "learning_rate": 9.218553787128382e-07, "loss": 0.4328, "step": 33802 }, { "epoch": 0.87, "grad_norm": 1.1782110929489136, "learning_rate": 9.215072712929351e-07, "loss": 0.4775, "step": 33803 }, { "epoch": 0.87, "grad_norm": 0.9699671864509583, "learning_rate": 9.211592264363011e-07, "loss": 0.5121, "step": 33804 }, { "epoch": 0.87, "grad_norm": 0.9983375072479248, "learning_rate": 9.208112441453343e-07, "loss": 0.3934, "step": 33805 }, { "epoch": 0.87, "grad_norm": 1.1608552932739258, "learning_rate": 9.204633244224293e-07, "loss": 0.4321, "step": 33806 }, { "epoch": 0.87, "grad_norm": 1.39516019821167, "learning_rate": 9.201154672699897e-07, "loss": 0.5861, "step": 33807 }, { "epoch": 0.87, "grad_norm": 1.1090359687805176, "learning_rate": 9.197676726904093e-07, "loss": 0.5294, "step": 33808 }, { "epoch": 0.87, "grad_norm": 1.8255853652954102, "learning_rate": 9.194199406860837e-07, "loss": 0.2923, "step": 33809 }, { "epoch": 0.87, "grad_norm": 1.8441470861434937, "learning_rate": 9.190722712594124e-07, "loss": 0.6206, "step": 33810 }, { "epoch": 0.87, "grad_norm": 1.6684712171554565, "learning_rate": 9.187246644127901e-07, "loss": 0.4944, "step": 33811 }, { "epoch": 0.87, "grad_norm": 0.9829545021057129, "learning_rate": 9.183771201486114e-07, "loss": 0.421, "step": 33812 }, { "epoch": 0.87, "grad_norm": 2.938324451446533, "learning_rate": 9.1802963846927e-07, "loss": 0.733, "step": 33813 }, { "epoch": 0.87, "grad_norm": 1.9301258325576782, "learning_rate": 9.17682219377165e-07, "loss": 0.5115, "step": 33814 }, { "epoch": 0.87, "grad_norm": 1.4402376413345337, "learning_rate": 9.173348628746881e-07, "loss": 0.6185, "step": 33815 }, { "epoch": 0.87, "grad_norm": 2.6761651039123535, "learning_rate": 9.169875689642316e-07, "loss": 0.5719, "step": 33816 }, { "epoch": 0.87, "grad_norm": 4.475643157958984, "learning_rate": 9.166403376481914e-07, "loss": 0.5715, "step": 33817 }, { "epoch": 0.87, "grad_norm": 1.1938730478286743, "learning_rate": 9.162931689289589e-07, "loss": 0.5998, "step": 33818 }, { "epoch": 0.87, "grad_norm": 1.2146974802017212, "learning_rate": 9.159460628089256e-07, "loss": 0.4719, "step": 33819 }, { "epoch": 0.87, "grad_norm": 1.044650912284851, "learning_rate": 9.155990192904863e-07, "loss": 0.4924, "step": 33820 }, { "epoch": 0.87, "grad_norm": 1.837465763092041, "learning_rate": 9.152520383760322e-07, "loss": 0.4613, "step": 33821 }, { "epoch": 0.87, "grad_norm": 5.636322975158691, "learning_rate": 9.149051200679526e-07, "loss": 0.8228, "step": 33822 }, { "epoch": 0.87, "grad_norm": 1.1230603456497192, "learning_rate": 9.14558264368638e-07, "loss": 0.3358, "step": 33823 }, { "epoch": 0.87, "grad_norm": 1.3671694993972778, "learning_rate": 9.142114712804806e-07, "loss": 0.4757, "step": 33824 }, { "epoch": 0.87, "grad_norm": 1.034356713294983, "learning_rate": 9.138647408058698e-07, "loss": 0.4548, "step": 33825 }, { "epoch": 0.87, "grad_norm": 3.260634183883667, "learning_rate": 9.135180729471938e-07, "loss": 0.4945, "step": 33826 }, { "epoch": 0.87, "grad_norm": 1.0584633350372314, "learning_rate": 9.131714677068437e-07, "loss": 0.5358, "step": 33827 }, { "epoch": 0.87, "grad_norm": 1.0948951244354248, "learning_rate": 9.128249250872068e-07, "loss": 0.5437, "step": 33828 }, { "epoch": 0.87, "grad_norm": 1.5820997953414917, "learning_rate": 9.12478445090671e-07, "loss": 0.3663, "step": 33829 }, { "epoch": 0.87, "grad_norm": 1.2623977661132812, "learning_rate": 9.121320277196233e-07, "loss": 0.3443, "step": 33830 }, { "epoch": 0.87, "grad_norm": 3.38946795463562, "learning_rate": 9.117856729764541e-07, "loss": 0.7462, "step": 33831 }, { "epoch": 0.87, "grad_norm": 1.7138622999191284, "learning_rate": 9.11439380863548e-07, "loss": 0.5422, "step": 33832 }, { "epoch": 0.87, "grad_norm": 0.8431631326675415, "learning_rate": 9.110931513832899e-07, "loss": 0.381, "step": 33833 }, { "epoch": 0.87, "grad_norm": 1.1378545761108398, "learning_rate": 9.10746984538069e-07, "loss": 0.4729, "step": 33834 }, { "epoch": 0.87, "grad_norm": 1.1033926010131836, "learning_rate": 9.104008803302699e-07, "loss": 0.3533, "step": 33835 }, { "epoch": 0.87, "grad_norm": 2.390986919403076, "learning_rate": 9.100548387622766e-07, "loss": 0.4758, "step": 33836 }, { "epoch": 0.87, "grad_norm": 1.6620612144470215, "learning_rate": 9.097088598364734e-07, "loss": 0.5703, "step": 33837 }, { "epoch": 0.87, "grad_norm": 3.0488314628601074, "learning_rate": 9.093629435552475e-07, "loss": 0.6836, "step": 33838 }, { "epoch": 0.87, "grad_norm": 4.622570991516113, "learning_rate": 9.090170899209805e-07, "loss": 0.6116, "step": 33839 }, { "epoch": 0.87, "grad_norm": 1.1011197566986084, "learning_rate": 9.086712989360547e-07, "loss": 0.7198, "step": 33840 }, { "epoch": 0.87, "grad_norm": 1.9870007038116455, "learning_rate": 9.08325570602856e-07, "loss": 0.453, "step": 33841 }, { "epoch": 0.87, "grad_norm": 2.0952532291412354, "learning_rate": 9.079799049237659e-07, "loss": 0.5758, "step": 33842 }, { "epoch": 0.87, "grad_norm": 1.0765260457992554, "learning_rate": 9.076343019011646e-07, "loss": 0.5973, "step": 33843 }, { "epoch": 0.87, "grad_norm": 2.432835340499878, "learning_rate": 9.072887615374359e-07, "loss": 0.6016, "step": 33844 }, { "epoch": 0.87, "grad_norm": 2.035142421722412, "learning_rate": 9.069432838349612e-07, "loss": 0.6072, "step": 33845 }, { "epoch": 0.87, "grad_norm": 1.7245631217956543, "learning_rate": 9.065978687961208e-07, "loss": 0.5694, "step": 33846 }, { "epoch": 0.87, "grad_norm": 2.075528860092163, "learning_rate": 9.062525164232927e-07, "loss": 0.6254, "step": 33847 }, { "epoch": 0.87, "grad_norm": 1.378965973854065, "learning_rate": 9.059072267188618e-07, "loss": 0.5558, "step": 33848 }, { "epoch": 0.87, "grad_norm": 4.761213779449463, "learning_rate": 9.055619996852039e-07, "loss": 0.5819, "step": 33849 }, { "epoch": 0.87, "grad_norm": 1.5265566110610962, "learning_rate": 9.052168353246971e-07, "loss": 0.6037, "step": 33850 }, { "epoch": 0.87, "grad_norm": 1.4128093719482422, "learning_rate": 9.048717336397239e-07, "loss": 0.5538, "step": 33851 }, { "epoch": 0.87, "grad_norm": 2.169868230819702, "learning_rate": 9.045266946326603e-07, "loss": 0.6079, "step": 33852 }, { "epoch": 0.87, "grad_norm": 0.941028356552124, "learning_rate": 9.041817183058854e-07, "loss": 0.5136, "step": 33853 }, { "epoch": 0.87, "grad_norm": 1.4805262088775635, "learning_rate": 9.038368046617729e-07, "loss": 0.5926, "step": 33854 }, { "epoch": 0.87, "grad_norm": 1.57277250289917, "learning_rate": 9.034919537027043e-07, "loss": 0.5043, "step": 33855 }, { "epoch": 0.87, "grad_norm": 1.9560037851333618, "learning_rate": 9.031471654310542e-07, "loss": 0.6622, "step": 33856 }, { "epoch": 0.87, "grad_norm": 5.6134772300720215, "learning_rate": 9.028024398491975e-07, "loss": 0.4328, "step": 33857 }, { "epoch": 0.87, "grad_norm": 1.372056245803833, "learning_rate": 9.024577769595122e-07, "loss": 0.6487, "step": 33858 }, { "epoch": 0.87, "grad_norm": 4.012623310089111, "learning_rate": 9.02113176764372e-07, "loss": 0.4483, "step": 33859 }, { "epoch": 0.87, "grad_norm": 1.7336983680725098, "learning_rate": 9.017686392661518e-07, "loss": 0.6241, "step": 33860 }, { "epoch": 0.87, "grad_norm": 1.231299877166748, "learning_rate": 9.01424164467225e-07, "loss": 0.4279, "step": 33861 }, { "epoch": 0.87, "grad_norm": 1.3902056217193604, "learning_rate": 9.010797523699677e-07, "loss": 0.4151, "step": 33862 }, { "epoch": 0.87, "grad_norm": 1.4652094841003418, "learning_rate": 9.007354029767523e-07, "loss": 0.4493, "step": 33863 }, { "epoch": 0.87, "grad_norm": 2.929323434829712, "learning_rate": 9.003911162899503e-07, "loss": 0.6494, "step": 33864 }, { "epoch": 0.87, "grad_norm": 10.967589378356934, "learning_rate": 9.000468923119377e-07, "loss": 0.498, "step": 33865 }, { "epoch": 0.87, "grad_norm": 1.4084209203720093, "learning_rate": 8.997027310450846e-07, "loss": 0.5647, "step": 33866 }, { "epoch": 0.87, "grad_norm": 1.2011183500289917, "learning_rate": 8.993586324917614e-07, "loss": 0.5158, "step": 33867 }, { "epoch": 0.87, "grad_norm": 1.2560839653015137, "learning_rate": 8.990145966543417e-07, "loss": 0.5153, "step": 33868 }, { "epoch": 0.87, "grad_norm": 1.9548033475875854, "learning_rate": 8.986706235351972e-07, "loss": 0.5002, "step": 33869 }, { "epoch": 0.87, "grad_norm": 3.7366943359375, "learning_rate": 8.983267131366957e-07, "loss": 0.6332, "step": 33870 }, { "epoch": 0.87, "grad_norm": 1.2542442083358765, "learning_rate": 8.979828654612066e-07, "loss": 0.4585, "step": 33871 }, { "epoch": 0.87, "grad_norm": 1.658707857131958, "learning_rate": 8.976390805111024e-07, "loss": 0.5168, "step": 33872 }, { "epoch": 0.87, "grad_norm": 1.2333108186721802, "learning_rate": 8.972953582887522e-07, "loss": 0.4162, "step": 33873 }, { "epoch": 0.87, "grad_norm": 1.3200650215148926, "learning_rate": 8.969516987965209e-07, "loss": 0.6936, "step": 33874 }, { "epoch": 0.87, "grad_norm": 1.3309745788574219, "learning_rate": 8.966081020367812e-07, "loss": 0.6314, "step": 33875 }, { "epoch": 0.87, "grad_norm": 1.4404913187026978, "learning_rate": 8.962645680118998e-07, "loss": 0.5403, "step": 33876 }, { "epoch": 0.87, "grad_norm": 1.1307896375656128, "learning_rate": 8.959210967242427e-07, "loss": 0.4847, "step": 33877 }, { "epoch": 0.87, "grad_norm": 1.5645437240600586, "learning_rate": 8.955776881761758e-07, "loss": 0.5505, "step": 33878 }, { "epoch": 0.87, "grad_norm": 2.152143716812134, "learning_rate": 8.952343423700705e-07, "loss": 0.4281, "step": 33879 }, { "epoch": 0.87, "grad_norm": 5.908409595489502, "learning_rate": 8.948910593082882e-07, "loss": 0.5242, "step": 33880 }, { "epoch": 0.87, "grad_norm": 1.348505973815918, "learning_rate": 8.945478389931961e-07, "loss": 0.5446, "step": 33881 }, { "epoch": 0.87, "grad_norm": 1.111333966255188, "learning_rate": 8.94204681427161e-07, "loss": 0.4947, "step": 33882 }, { "epoch": 0.87, "grad_norm": 1.0445221662521362, "learning_rate": 8.938615866125456e-07, "loss": 0.5477, "step": 33883 }, { "epoch": 0.87, "grad_norm": 2.460968017578125, "learning_rate": 8.935185545517145e-07, "loss": 0.4222, "step": 33884 }, { "epoch": 0.87, "grad_norm": 1.4803720712661743, "learning_rate": 8.931755852470336e-07, "loss": 0.4495, "step": 33885 }, { "epoch": 0.87, "grad_norm": 6.517333984375, "learning_rate": 8.928326787008645e-07, "loss": 0.5549, "step": 33886 }, { "epoch": 0.87, "grad_norm": 1.634343147277832, "learning_rate": 8.92489834915572e-07, "loss": 0.538, "step": 33887 }, { "epoch": 0.87, "grad_norm": 1.3749788999557495, "learning_rate": 8.921470538935151e-07, "loss": 0.3891, "step": 33888 }, { "epoch": 0.87, "grad_norm": 1.6453872919082642, "learning_rate": 8.918043356370599e-07, "loss": 0.5763, "step": 33889 }, { "epoch": 0.87, "grad_norm": 2.9342219829559326, "learning_rate": 8.914616801485676e-07, "loss": 0.559, "step": 33890 }, { "epoch": 0.87, "grad_norm": 1.084905982017517, "learning_rate": 8.911190874303977e-07, "loss": 0.3689, "step": 33891 }, { "epoch": 0.87, "grad_norm": 3.4318997859954834, "learning_rate": 8.907765574849126e-07, "loss": 0.4525, "step": 33892 }, { "epoch": 0.87, "grad_norm": 4.893056869506836, "learning_rate": 8.904340903144737e-07, "loss": 0.4444, "step": 33893 }, { "epoch": 0.87, "grad_norm": 2.4522042274475098, "learning_rate": 8.900916859214393e-07, "loss": 0.6226, "step": 33894 }, { "epoch": 0.87, "grad_norm": 1.1809890270233154, "learning_rate": 8.897493443081684e-07, "loss": 0.446, "step": 33895 }, { "epoch": 0.87, "grad_norm": 1.3590413331985474, "learning_rate": 8.894070654770226e-07, "loss": 0.3776, "step": 33896 }, { "epoch": 0.87, "grad_norm": 3.004854202270508, "learning_rate": 8.890648494303599e-07, "loss": 0.6294, "step": 33897 }, { "epoch": 0.87, "grad_norm": 1.8930044174194336, "learning_rate": 8.887226961705353e-07, "loss": 0.4706, "step": 33898 }, { "epoch": 0.87, "grad_norm": 1.0258574485778809, "learning_rate": 8.883806056999122e-07, "loss": 0.3437, "step": 33899 }, { "epoch": 0.87, "grad_norm": 6.083282947540283, "learning_rate": 8.880385780208456e-07, "loss": 0.4795, "step": 33900 }, { "epoch": 0.87, "grad_norm": 1.4314254522323608, "learning_rate": 8.876966131356923e-07, "loss": 0.525, "step": 33901 }, { "epoch": 0.87, "grad_norm": 4.082827091217041, "learning_rate": 8.873547110468073e-07, "loss": 0.5173, "step": 33902 }, { "epoch": 0.87, "grad_norm": 1.8735214471817017, "learning_rate": 8.870128717565507e-07, "loss": 0.4593, "step": 33903 }, { "epoch": 0.87, "grad_norm": 8.933781623840332, "learning_rate": 8.866710952672752e-07, "loss": 0.4834, "step": 33904 }, { "epoch": 0.87, "grad_norm": 1.2824151515960693, "learning_rate": 8.863293815813356e-07, "loss": 0.4593, "step": 33905 }, { "epoch": 0.87, "grad_norm": 5.064713001251221, "learning_rate": 8.8598773070109e-07, "loss": 0.4505, "step": 33906 }, { "epoch": 0.87, "grad_norm": 1.552473783493042, "learning_rate": 8.85646142628891e-07, "loss": 0.5034, "step": 33907 }, { "epoch": 0.87, "grad_norm": 13.582930564880371, "learning_rate": 8.85304617367091e-07, "loss": 0.6851, "step": 33908 }, { "epoch": 0.87, "grad_norm": 1.3304693698883057, "learning_rate": 8.849631549180471e-07, "loss": 0.4753, "step": 33909 }, { "epoch": 0.87, "grad_norm": 1.0265121459960938, "learning_rate": 8.846217552841097e-07, "loss": 0.4315, "step": 33910 }, { "epoch": 0.87, "grad_norm": 1.3966195583343506, "learning_rate": 8.842804184676323e-07, "loss": 0.4377, "step": 33911 }, { "epoch": 0.87, "grad_norm": 1.2138479948043823, "learning_rate": 8.839391444709655e-07, "loss": 0.5904, "step": 33912 }, { "epoch": 0.87, "grad_norm": 1.1670955419540405, "learning_rate": 8.83597933296465e-07, "loss": 0.5183, "step": 33913 }, { "epoch": 0.87, "grad_norm": 1.9133957624435425, "learning_rate": 8.832567849464801e-07, "loss": 0.5544, "step": 33914 }, { "epoch": 0.87, "grad_norm": 1.1821211576461792, "learning_rate": 8.829156994233601e-07, "loss": 0.488, "step": 33915 }, { "epoch": 0.87, "grad_norm": 16.96085548400879, "learning_rate": 8.825746767294585e-07, "loss": 0.5699, "step": 33916 }, { "epoch": 0.87, "grad_norm": 1.454418420791626, "learning_rate": 8.822337168671247e-07, "loss": 0.5277, "step": 33917 }, { "epoch": 0.87, "grad_norm": 1.227712631225586, "learning_rate": 8.818928198387078e-07, "loss": 0.4485, "step": 33918 }, { "epoch": 0.87, "grad_norm": 8.36188793182373, "learning_rate": 8.815519856465549e-07, "loss": 0.5847, "step": 33919 }, { "epoch": 0.87, "grad_norm": 1.5118849277496338, "learning_rate": 8.812112142930185e-07, "loss": 0.4868, "step": 33920 }, { "epoch": 0.87, "grad_norm": 3.3444297313690186, "learning_rate": 8.808705057804456e-07, "loss": 0.5431, "step": 33921 }, { "epoch": 0.87, "grad_norm": 1.0293086767196655, "learning_rate": 8.805298601111823e-07, "loss": 0.3863, "step": 33922 }, { "epoch": 0.87, "grad_norm": 1.5573503971099854, "learning_rate": 8.801892772875786e-07, "loss": 0.57, "step": 33923 }, { "epoch": 0.87, "grad_norm": 4.0164031982421875, "learning_rate": 8.798487573119818e-07, "loss": 0.6823, "step": 33924 }, { "epoch": 0.87, "grad_norm": 1.4781700372695923, "learning_rate": 8.795083001867354e-07, "loss": 0.5751, "step": 33925 }, { "epoch": 0.87, "grad_norm": 2.3224363327026367, "learning_rate": 8.7916790591419e-07, "loss": 0.6595, "step": 33926 }, { "epoch": 0.87, "grad_norm": 1.6010304689407349, "learning_rate": 8.78827574496689e-07, "loss": 0.6039, "step": 33927 }, { "epoch": 0.87, "grad_norm": 1.1745562553405762, "learning_rate": 8.784873059365773e-07, "loss": 0.3498, "step": 33928 }, { "epoch": 0.87, "grad_norm": 1.6807745695114136, "learning_rate": 8.781471002361997e-07, "loss": 0.6727, "step": 33929 }, { "epoch": 0.87, "grad_norm": 1.9054325819015503, "learning_rate": 8.77806957397902e-07, "loss": 0.5504, "step": 33930 }, { "epoch": 0.87, "grad_norm": 1.0229512453079224, "learning_rate": 8.774668774240281e-07, "loss": 0.4572, "step": 33931 }, { "epoch": 0.87, "grad_norm": 2.1172311305999756, "learning_rate": 8.771268603169191e-07, "loss": 0.7895, "step": 33932 }, { "epoch": 0.87, "grad_norm": 1.0879428386688232, "learning_rate": 8.767869060789225e-07, "loss": 0.5363, "step": 33933 }, { "epoch": 0.87, "grad_norm": 1.5539839267730713, "learning_rate": 8.764470147123783e-07, "loss": 0.4896, "step": 33934 }, { "epoch": 0.87, "grad_norm": 2.498878240585327, "learning_rate": 8.761071862196302e-07, "loss": 0.5222, "step": 33935 }, { "epoch": 0.87, "grad_norm": 1.6291375160217285, "learning_rate": 8.757674206030164e-07, "loss": 0.5129, "step": 33936 }, { "epoch": 0.87, "grad_norm": 1.1105690002441406, "learning_rate": 8.754277178648829e-07, "loss": 0.5137, "step": 33937 }, { "epoch": 0.87, "grad_norm": 12.650835037231445, "learning_rate": 8.750880780075699e-07, "loss": 0.5943, "step": 33938 }, { "epoch": 0.87, "grad_norm": 1.7889630794525146, "learning_rate": 8.747485010334144e-07, "loss": 0.5234, "step": 33939 }, { "epoch": 0.87, "grad_norm": 1.534358024597168, "learning_rate": 8.744089869447614e-07, "loss": 0.3968, "step": 33940 }, { "epoch": 0.87, "grad_norm": 0.846350371837616, "learning_rate": 8.740695357439477e-07, "loss": 0.4579, "step": 33941 }, { "epoch": 0.87, "grad_norm": 1.3416601419448853, "learning_rate": 8.737301474333138e-07, "loss": 0.4889, "step": 33942 }, { "epoch": 0.87, "grad_norm": 6.720012664794922, "learning_rate": 8.733908220151965e-07, "loss": 0.4595, "step": 33943 }, { "epoch": 0.87, "grad_norm": 2.315784454345703, "learning_rate": 8.730515594919376e-07, "loss": 0.4144, "step": 33944 }, { "epoch": 0.87, "grad_norm": 1.5162475109100342, "learning_rate": 8.727123598658726e-07, "loss": 0.4831, "step": 33945 }, { "epoch": 0.87, "grad_norm": 2.051025152206421, "learning_rate": 8.723732231393389e-07, "loss": 0.441, "step": 33946 }, { "epoch": 0.87, "grad_norm": 1.5944620370864868, "learning_rate": 8.720341493146756e-07, "loss": 0.59, "step": 33947 }, { "epoch": 0.87, "grad_norm": 1.5362472534179688, "learning_rate": 8.716951383942174e-07, "loss": 0.5447, "step": 33948 }, { "epoch": 0.87, "grad_norm": 3.4982187747955322, "learning_rate": 8.713561903803003e-07, "loss": 0.4265, "step": 33949 }, { "epoch": 0.87, "grad_norm": 1.0682415962219238, "learning_rate": 8.710173052752635e-07, "loss": 0.3862, "step": 33950 }, { "epoch": 0.87, "grad_norm": 1.0770034790039062, "learning_rate": 8.706784830814396e-07, "loss": 0.3945, "step": 33951 }, { "epoch": 0.87, "grad_norm": 1.679518461227417, "learning_rate": 8.703397238011635e-07, "loss": 0.587, "step": 33952 }, { "epoch": 0.87, "grad_norm": 1.1580750942230225, "learning_rate": 8.700010274367688e-07, "loss": 0.6041, "step": 33953 }, { "epoch": 0.87, "grad_norm": 1.0176955461502075, "learning_rate": 8.696623939905924e-07, "loss": 0.4977, "step": 33954 }, { "epoch": 0.87, "grad_norm": 3.3613250255584717, "learning_rate": 8.69323823464967e-07, "loss": 0.4377, "step": 33955 }, { "epoch": 0.87, "grad_norm": 4.1292243003845215, "learning_rate": 8.689853158622241e-07, "loss": 0.5025, "step": 33956 }, { "epoch": 0.87, "grad_norm": 1.2243645191192627, "learning_rate": 8.686468711846985e-07, "loss": 0.4287, "step": 33957 }, { "epoch": 0.87, "grad_norm": 2.00365948677063, "learning_rate": 8.683084894347227e-07, "loss": 0.7161, "step": 33958 }, { "epoch": 0.87, "grad_norm": 1.3099033832550049, "learning_rate": 8.679701706146271e-07, "loss": 0.3965, "step": 33959 }, { "epoch": 0.87, "grad_norm": 1.74000084400177, "learning_rate": 8.67631914726742e-07, "loss": 0.6836, "step": 33960 }, { "epoch": 0.87, "grad_norm": 6.947310924530029, "learning_rate": 8.672937217734023e-07, "loss": 0.5193, "step": 33961 }, { "epoch": 0.87, "grad_norm": 6.4212565422058105, "learning_rate": 8.669555917569361e-07, "loss": 0.6739, "step": 33962 }, { "epoch": 0.87, "grad_norm": 1.5386050939559937, "learning_rate": 8.666175246796726e-07, "loss": 0.6008, "step": 33963 }, { "epoch": 0.87, "grad_norm": 1.8462880849838257, "learning_rate": 8.662795205439444e-07, "loss": 0.3796, "step": 33964 }, { "epoch": 0.87, "grad_norm": 6.4669084548950195, "learning_rate": 8.659415793520798e-07, "loss": 0.5409, "step": 33965 }, { "epoch": 0.87, "grad_norm": 1.0817619562149048, "learning_rate": 8.656037011064055e-07, "loss": 0.5816, "step": 33966 }, { "epoch": 0.87, "grad_norm": 1.252351999282837, "learning_rate": 8.652658858092533e-07, "loss": 0.5485, "step": 33967 }, { "epoch": 0.87, "grad_norm": 5.23560094833374, "learning_rate": 8.649281334629489e-07, "loss": 0.5747, "step": 33968 }, { "epoch": 0.87, "grad_norm": 5.450282573699951, "learning_rate": 8.645904440698216e-07, "loss": 0.6942, "step": 33969 }, { "epoch": 0.87, "grad_norm": 1.5159070491790771, "learning_rate": 8.642528176321952e-07, "loss": 0.4391, "step": 33970 }, { "epoch": 0.87, "grad_norm": 1.9343624114990234, "learning_rate": 8.63915254152401e-07, "loss": 0.4328, "step": 33971 }, { "epoch": 0.87, "grad_norm": 1.2039598226547241, "learning_rate": 8.635777536327628e-07, "loss": 0.439, "step": 33972 }, { "epoch": 0.87, "grad_norm": 1.5033835172653198, "learning_rate": 8.632403160756053e-07, "loss": 0.6172, "step": 33973 }, { "epoch": 0.87, "grad_norm": 1.82920241355896, "learning_rate": 8.629029414832569e-07, "loss": 0.4351, "step": 33974 }, { "epoch": 0.87, "grad_norm": 1.174755573272705, "learning_rate": 8.625656298580398e-07, "loss": 0.4778, "step": 33975 }, { "epoch": 0.87, "grad_norm": 5.091320514678955, "learning_rate": 8.622283812022813e-07, "loss": 0.772, "step": 33976 }, { "epoch": 0.87, "grad_norm": 0.9502970576286316, "learning_rate": 8.618911955183007e-07, "loss": 0.4019, "step": 33977 }, { "epoch": 0.87, "grad_norm": 5.043507099151611, "learning_rate": 8.615540728084271e-07, "loss": 0.369, "step": 33978 }, { "epoch": 0.87, "grad_norm": 1.5155386924743652, "learning_rate": 8.61217013074982e-07, "loss": 0.486, "step": 33979 }, { "epoch": 0.87, "grad_norm": 5.58999490737915, "learning_rate": 8.608800163202857e-07, "loss": 0.5067, "step": 33980 }, { "epoch": 0.87, "grad_norm": 7.799713134765625, "learning_rate": 8.605430825466643e-07, "loss": 0.5372, "step": 33981 }, { "epoch": 0.87, "grad_norm": 3.055621862411499, "learning_rate": 8.60206211756438e-07, "loss": 0.5575, "step": 33982 }, { "epoch": 0.87, "grad_norm": 1.5398313999176025, "learning_rate": 8.598694039519284e-07, "loss": 0.5082, "step": 33983 }, { "epoch": 0.87, "grad_norm": 1.213665246963501, "learning_rate": 8.595326591354547e-07, "loss": 0.5324, "step": 33984 }, { "epoch": 0.87, "grad_norm": 1.8317495584487915, "learning_rate": 8.591959773093405e-07, "loss": 0.6139, "step": 33985 }, { "epoch": 0.87, "grad_norm": 2.3525781631469727, "learning_rate": 8.588593584759053e-07, "loss": 0.442, "step": 33986 }, { "epoch": 0.87, "grad_norm": 9.059886932373047, "learning_rate": 8.585228026374659e-07, "loss": 0.5745, "step": 33987 }, { "epoch": 0.87, "grad_norm": 3.1691601276397705, "learning_rate": 8.581863097963472e-07, "loss": 0.7885, "step": 33988 }, { "epoch": 0.87, "grad_norm": 1.0760129690170288, "learning_rate": 8.57849879954864e-07, "loss": 0.3943, "step": 33989 }, { "epoch": 0.87, "grad_norm": 1.7282496690750122, "learning_rate": 8.575135131153345e-07, "loss": 0.5346, "step": 33990 }, { "epoch": 0.87, "grad_norm": 3.8171873092651367, "learning_rate": 8.571772092800801e-07, "loss": 0.565, "step": 33991 }, { "epoch": 0.87, "grad_norm": 1.12934410572052, "learning_rate": 8.568409684514156e-07, "loss": 0.2682, "step": 33992 }, { "epoch": 0.87, "grad_norm": 1.4475806951522827, "learning_rate": 8.565047906316592e-07, "loss": 0.5366, "step": 33993 }, { "epoch": 0.87, "grad_norm": 2.680290937423706, "learning_rate": 8.561686758231247e-07, "loss": 0.6965, "step": 33994 }, { "epoch": 0.87, "grad_norm": 1.9819790124893188, "learning_rate": 8.558326240281333e-07, "loss": 0.6245, "step": 33995 }, { "epoch": 0.87, "grad_norm": 1.432494044303894, "learning_rate": 8.554966352489991e-07, "loss": 0.348, "step": 33996 }, { "epoch": 0.87, "grad_norm": 1.11515212059021, "learning_rate": 8.551607094880343e-07, "loss": 0.3153, "step": 33997 }, { "epoch": 0.87, "grad_norm": 14.245627403259277, "learning_rate": 8.548248467475584e-07, "loss": 0.4225, "step": 33998 }, { "epoch": 0.87, "grad_norm": 2.342006206512451, "learning_rate": 8.54489047029885e-07, "loss": 0.598, "step": 33999 }, { "epoch": 0.87, "grad_norm": 2.039008855819702, "learning_rate": 8.541533103373267e-07, "loss": 0.5145, "step": 34000 }, { "epoch": 0.87, "grad_norm": 1.8814914226531982, "learning_rate": 8.538176366721961e-07, "loss": 0.6646, "step": 34001 }, { "epoch": 0.87, "grad_norm": 1.4287711381912231, "learning_rate": 8.534820260368093e-07, "loss": 0.5768, "step": 34002 }, { "epoch": 0.87, "grad_norm": 1.694820523262024, "learning_rate": 8.531464784334786e-07, "loss": 0.4826, "step": 34003 }, { "epoch": 0.87, "grad_norm": 7.640029430389404, "learning_rate": 8.528109938645146e-07, "loss": 0.688, "step": 34004 }, { "epoch": 0.87, "grad_norm": 23.231592178344727, "learning_rate": 8.524755723322309e-07, "loss": 0.4532, "step": 34005 }, { "epoch": 0.87, "grad_norm": 1.2227071523666382, "learning_rate": 8.52140213838939e-07, "loss": 0.5605, "step": 34006 }, { "epoch": 0.87, "grad_norm": 2.3092474937438965, "learning_rate": 8.518049183869481e-07, "loss": 0.6569, "step": 34007 }, { "epoch": 0.87, "grad_norm": 1.3162710666656494, "learning_rate": 8.51469685978572e-07, "loss": 0.6135, "step": 34008 }, { "epoch": 0.87, "grad_norm": 1.1999088525772095, "learning_rate": 8.511345166161189e-07, "loss": 0.4347, "step": 34009 }, { "epoch": 0.87, "grad_norm": 0.9598439931869507, "learning_rate": 8.507994103018979e-07, "loss": 0.5194, "step": 34010 }, { "epoch": 0.87, "grad_norm": 1.115831732749939, "learning_rate": 8.504643670382184e-07, "loss": 0.5169, "step": 34011 }, { "epoch": 0.87, "grad_norm": 1.4276202917099, "learning_rate": 8.50129386827393e-07, "loss": 0.3812, "step": 34012 }, { "epoch": 0.87, "grad_norm": 1.6925803422927856, "learning_rate": 8.497944696717264e-07, "loss": 0.4812, "step": 34013 }, { "epoch": 0.87, "grad_norm": 1.801026463508606, "learning_rate": 8.494596155735257e-07, "loss": 0.4439, "step": 34014 }, { "epoch": 0.87, "grad_norm": 1.9457957744598389, "learning_rate": 8.491248245351014e-07, "loss": 0.6508, "step": 34015 }, { "epoch": 0.87, "grad_norm": 1.3021196126937866, "learning_rate": 8.487900965587615e-07, "loss": 0.391, "step": 34016 }, { "epoch": 0.87, "grad_norm": 1.1039884090423584, "learning_rate": 8.484554316468096e-07, "loss": 0.3636, "step": 34017 }, { "epoch": 0.87, "grad_norm": 1.6914273500442505, "learning_rate": 8.48120829801552e-07, "loss": 0.6212, "step": 34018 }, { "epoch": 0.87, "grad_norm": 1.6393433809280396, "learning_rate": 8.477862910252965e-07, "loss": 0.3665, "step": 34019 }, { "epoch": 0.87, "grad_norm": 2.6366000175476074, "learning_rate": 8.474518153203493e-07, "loss": 0.5683, "step": 34020 }, { "epoch": 0.87, "grad_norm": 6.39991569519043, "learning_rate": 8.471174026890117e-07, "loss": 0.5878, "step": 34021 }, { "epoch": 0.87, "grad_norm": 1.7975059747695923, "learning_rate": 8.46783053133593e-07, "loss": 0.526, "step": 34022 }, { "epoch": 0.87, "grad_norm": 4.624505519866943, "learning_rate": 8.464487666563936e-07, "loss": 0.5206, "step": 34023 }, { "epoch": 0.87, "grad_norm": 4.12668514251709, "learning_rate": 8.461145432597195e-07, "loss": 0.4987, "step": 34024 }, { "epoch": 0.87, "grad_norm": 1.233713150024414, "learning_rate": 8.457803829458711e-07, "loss": 0.3918, "step": 34025 }, { "epoch": 0.87, "grad_norm": 2.05816388130188, "learning_rate": 8.454462857171542e-07, "loss": 0.591, "step": 34026 }, { "epoch": 0.87, "grad_norm": 7.34978723526001, "learning_rate": 8.451122515758703e-07, "loss": 0.6792, "step": 34027 }, { "epoch": 0.87, "grad_norm": 1.2975525856018066, "learning_rate": 8.447782805243199e-07, "loss": 0.3298, "step": 34028 }, { "epoch": 0.87, "grad_norm": 0.9725027680397034, "learning_rate": 8.444443725648066e-07, "loss": 0.4754, "step": 34029 }, { "epoch": 0.87, "grad_norm": 1.5112197399139404, "learning_rate": 8.441105276996309e-07, "loss": 0.5997, "step": 34030 }, { "epoch": 0.87, "grad_norm": 1.5576177835464478, "learning_rate": 8.43776745931092e-07, "loss": 0.6331, "step": 34031 }, { "epoch": 0.87, "grad_norm": 13.148666381835938, "learning_rate": 8.434430272614924e-07, "loss": 0.7406, "step": 34032 }, { "epoch": 0.87, "grad_norm": 1.1831560134887695, "learning_rate": 8.431093716931305e-07, "loss": 0.5834, "step": 34033 }, { "epoch": 0.87, "grad_norm": 2.2707972526550293, "learning_rate": 8.427757792283064e-07, "loss": 0.604, "step": 34034 }, { "epoch": 0.87, "grad_norm": 2.187937021255493, "learning_rate": 8.424422498693163e-07, "loss": 0.6441, "step": 34035 }, { "epoch": 0.87, "grad_norm": 2.171701669692993, "learning_rate": 8.421087836184627e-07, "loss": 0.4847, "step": 34036 }, { "epoch": 0.87, "grad_norm": 7.109560012817383, "learning_rate": 8.417753804780426e-07, "loss": 0.5322, "step": 34037 }, { "epoch": 0.87, "grad_norm": 1.8735226392745972, "learning_rate": 8.414420404503509e-07, "loss": 0.5221, "step": 34038 }, { "epoch": 0.87, "grad_norm": 4.519899845123291, "learning_rate": 8.41108763537688e-07, "loss": 0.562, "step": 34039 }, { "epoch": 0.87, "grad_norm": 1.0770639181137085, "learning_rate": 8.407755497423487e-07, "loss": 0.4924, "step": 34040 }, { "epoch": 0.87, "grad_norm": 1.4035348892211914, "learning_rate": 8.404423990666311e-07, "loss": 0.4652, "step": 34041 }, { "epoch": 0.87, "grad_norm": 3.40547251701355, "learning_rate": 8.401093115128289e-07, "loss": 0.6562, "step": 34042 }, { "epoch": 0.87, "grad_norm": 1.028204083442688, "learning_rate": 8.397762870832394e-07, "loss": 0.53, "step": 34043 }, { "epoch": 0.87, "grad_norm": 1.576804280281067, "learning_rate": 8.394433257801548e-07, "loss": 0.588, "step": 34044 }, { "epoch": 0.87, "grad_norm": 1.496016263961792, "learning_rate": 8.391104276058726e-07, "loss": 0.5071, "step": 34045 }, { "epoch": 0.87, "grad_norm": 1.532804250717163, "learning_rate": 8.387775925626862e-07, "loss": 0.6992, "step": 34046 }, { "epoch": 0.87, "grad_norm": 1.7408183813095093, "learning_rate": 8.384448206528894e-07, "loss": 0.6214, "step": 34047 }, { "epoch": 0.87, "grad_norm": 1.690847396850586, "learning_rate": 8.381121118787727e-07, "loss": 0.4195, "step": 34048 }, { "epoch": 0.87, "grad_norm": 3.1190545558929443, "learning_rate": 8.377794662426342e-07, "loss": 0.4539, "step": 34049 }, { "epoch": 0.87, "grad_norm": 0.8312985897064209, "learning_rate": 8.37446883746762e-07, "loss": 0.4487, "step": 34050 }, { "epoch": 0.87, "grad_norm": 3.166531562805176, "learning_rate": 8.371143643934487e-07, "loss": 0.5454, "step": 34051 }, { "epoch": 0.87, "grad_norm": 2.8540151119232178, "learning_rate": 8.36781908184987e-07, "loss": 0.6757, "step": 34052 }, { "epoch": 0.87, "grad_norm": 1.6524289846420288, "learning_rate": 8.364495151236685e-07, "loss": 0.5431, "step": 34053 }, { "epoch": 0.87, "grad_norm": 1.4428739547729492, "learning_rate": 8.361171852117822e-07, "loss": 0.6115, "step": 34054 }, { "epoch": 0.87, "grad_norm": 1.2743664979934692, "learning_rate": 8.357849184516176e-07, "loss": 0.4593, "step": 34055 }, { "epoch": 0.87, "grad_norm": 1.1319776773452759, "learning_rate": 8.354527148454683e-07, "loss": 0.4016, "step": 34056 }, { "epoch": 0.87, "grad_norm": 1.1393054723739624, "learning_rate": 8.351205743956203e-07, "loss": 0.4194, "step": 34057 }, { "epoch": 0.87, "grad_norm": 9.901884078979492, "learning_rate": 8.347884971043618e-07, "loss": 0.7107, "step": 34058 }, { "epoch": 0.87, "grad_norm": 1.4077831506729126, "learning_rate": 8.344564829739843e-07, "loss": 0.4763, "step": 34059 }, { "epoch": 0.87, "grad_norm": 1.361122965812683, "learning_rate": 8.341245320067748e-07, "loss": 0.4706, "step": 34060 }, { "epoch": 0.87, "grad_norm": 1.6202834844589233, "learning_rate": 8.337926442050181e-07, "loss": 0.4008, "step": 34061 }, { "epoch": 0.87, "grad_norm": 1.6497108936309814, "learning_rate": 8.334608195710059e-07, "loss": 0.4648, "step": 34062 }, { "epoch": 0.87, "grad_norm": 1.4654227495193481, "learning_rate": 8.331290581070228e-07, "loss": 0.5086, "step": 34063 }, { "epoch": 0.87, "grad_norm": 1.1745641231536865, "learning_rate": 8.327973598153549e-07, "loss": 0.4144, "step": 34064 }, { "epoch": 0.87, "grad_norm": 1.2691816091537476, "learning_rate": 8.324657246982859e-07, "loss": 0.5789, "step": 34065 }, { "epoch": 0.87, "grad_norm": 1.2066490650177002, "learning_rate": 8.321341527581062e-07, "loss": 0.4198, "step": 34066 }, { "epoch": 0.87, "grad_norm": 1.1098365783691406, "learning_rate": 8.318026439970972e-07, "loss": 0.5389, "step": 34067 }, { "epoch": 0.87, "grad_norm": 1.0783997774124146, "learning_rate": 8.314711984175439e-07, "loss": 0.3026, "step": 34068 }, { "epoch": 0.87, "grad_norm": 1.5164434909820557, "learning_rate": 8.31139816021731e-07, "loss": 0.5908, "step": 34069 }, { "epoch": 0.87, "grad_norm": 1.1640279293060303, "learning_rate": 8.308084968119434e-07, "loss": 0.4093, "step": 34070 }, { "epoch": 0.87, "grad_norm": 1.5158058404922485, "learning_rate": 8.304772407904626e-07, "loss": 0.5021, "step": 34071 }, { "epoch": 0.87, "grad_norm": 1.2257158756256104, "learning_rate": 8.301460479595691e-07, "loss": 0.5017, "step": 34072 }, { "epoch": 0.87, "grad_norm": 1.832082986831665, "learning_rate": 8.29814918321551e-07, "loss": 0.4921, "step": 34073 }, { "epoch": 0.87, "grad_norm": 0.8525804281234741, "learning_rate": 8.294838518786863e-07, "loss": 0.5451, "step": 34074 }, { "epoch": 0.87, "grad_norm": 1.6273353099822998, "learning_rate": 8.291528486332568e-07, "loss": 0.4483, "step": 34075 }, { "epoch": 0.87, "grad_norm": 1.7316771745681763, "learning_rate": 8.288219085875449e-07, "loss": 0.5153, "step": 34076 }, { "epoch": 0.87, "grad_norm": 9.634542465209961, "learning_rate": 8.28491031743831e-07, "loss": 0.5549, "step": 34077 }, { "epoch": 0.87, "grad_norm": 1.1888214349746704, "learning_rate": 8.281602181043947e-07, "loss": 0.4763, "step": 34078 }, { "epoch": 0.87, "grad_norm": 3.116792917251587, "learning_rate": 8.278294676715137e-07, "loss": 0.5593, "step": 34079 }, { "epoch": 0.87, "grad_norm": 6.4596076011657715, "learning_rate": 8.274987804474721e-07, "loss": 0.5598, "step": 34080 }, { "epoch": 0.87, "grad_norm": 1.989318609237671, "learning_rate": 8.271681564345457e-07, "loss": 0.4897, "step": 34081 }, { "epoch": 0.87, "grad_norm": 1.2383018732070923, "learning_rate": 8.268375956350117e-07, "loss": 0.6405, "step": 34082 }, { "epoch": 0.87, "grad_norm": 2.5584142208099365, "learning_rate": 8.265070980511513e-07, "loss": 0.5958, "step": 34083 }, { "epoch": 0.87, "grad_norm": 1.4746328592300415, "learning_rate": 8.261766636852408e-07, "loss": 0.5662, "step": 34084 }, { "epoch": 0.87, "grad_norm": 1.7695428133010864, "learning_rate": 8.258462925395549e-07, "loss": 0.5564, "step": 34085 }, { "epoch": 0.87, "grad_norm": 1.1055805683135986, "learning_rate": 8.255159846163751e-07, "loss": 0.4354, "step": 34086 }, { "epoch": 0.87, "grad_norm": 1.9104053974151611, "learning_rate": 8.251857399179752e-07, "loss": 0.6844, "step": 34087 }, { "epoch": 0.87, "grad_norm": 1.549553394317627, "learning_rate": 8.248555584466311e-07, "loss": 0.5004, "step": 34088 }, { "epoch": 0.87, "grad_norm": 2.3050622940063477, "learning_rate": 8.245254402046155e-07, "loss": 0.6736, "step": 34089 }, { "epoch": 0.87, "grad_norm": 3.340130090713501, "learning_rate": 8.241953851942086e-07, "loss": 0.4042, "step": 34090 }, { "epoch": 0.87, "grad_norm": 1.3566300868988037, "learning_rate": 8.238653934176821e-07, "loss": 0.3546, "step": 34091 }, { "epoch": 0.87, "grad_norm": 1.5491183996200562, "learning_rate": 8.235354648773086e-07, "loss": 0.5278, "step": 34092 }, { "epoch": 0.87, "grad_norm": 1.1027342081069946, "learning_rate": 8.232055995753641e-07, "loss": 0.5017, "step": 34093 }, { "epoch": 0.87, "grad_norm": 1.2209155559539795, "learning_rate": 8.228757975141222e-07, "loss": 0.5947, "step": 34094 }, { "epoch": 0.87, "grad_norm": 1.30323326587677, "learning_rate": 8.225460586958545e-07, "loss": 0.5604, "step": 34095 }, { "epoch": 0.87, "grad_norm": 1.4336268901824951, "learning_rate": 8.222163831228325e-07, "loss": 0.4139, "step": 34096 }, { "epoch": 0.87, "grad_norm": 2.782160997390747, "learning_rate": 8.218867707973299e-07, "loss": 0.4757, "step": 34097 }, { "epoch": 0.87, "grad_norm": 4.4096269607543945, "learning_rate": 8.215572217216173e-07, "loss": 0.5709, "step": 34098 }, { "epoch": 0.87, "grad_norm": 0.9217985272407532, "learning_rate": 8.212277358979648e-07, "loss": 0.4507, "step": 34099 }, { "epoch": 0.87, "grad_norm": 1.3963383436203003, "learning_rate": 8.208983133286453e-07, "loss": 0.5144, "step": 34100 }, { "epoch": 0.87, "grad_norm": 1.2516652345657349, "learning_rate": 8.205689540159289e-07, "loss": 0.606, "step": 34101 }, { "epoch": 0.87, "grad_norm": 1.3260704278945923, "learning_rate": 8.202396579620808e-07, "loss": 0.5767, "step": 34102 }, { "epoch": 0.87, "grad_norm": 1.639880895614624, "learning_rate": 8.199104251693768e-07, "loss": 0.5033, "step": 34103 }, { "epoch": 0.87, "grad_norm": 1.654630422592163, "learning_rate": 8.195812556400818e-07, "loss": 0.42, "step": 34104 }, { "epoch": 0.87, "grad_norm": 1.381382942199707, "learning_rate": 8.19252149376465e-07, "loss": 0.6101, "step": 34105 }, { "epoch": 0.87, "grad_norm": 3.5737099647521973, "learning_rate": 8.189231063807934e-07, "loss": 0.6062, "step": 34106 }, { "epoch": 0.87, "grad_norm": 22.4022216796875, "learning_rate": 8.185941266553365e-07, "loss": 0.6089, "step": 34107 }, { "epoch": 0.87, "grad_norm": 2.5629403591156006, "learning_rate": 8.182652102023614e-07, "loss": 0.4522, "step": 34108 }, { "epoch": 0.87, "grad_norm": 0.9388523101806641, "learning_rate": 8.179363570241316e-07, "loss": 0.5215, "step": 34109 }, { "epoch": 0.87, "grad_norm": 1.938629388809204, "learning_rate": 8.176075671229178e-07, "loss": 0.529, "step": 34110 }, { "epoch": 0.87, "grad_norm": 1.4968063831329346, "learning_rate": 8.172788405009835e-07, "loss": 0.5185, "step": 34111 }, { "epoch": 0.87, "grad_norm": 1.1984570026397705, "learning_rate": 8.169501771605948e-07, "loss": 0.3859, "step": 34112 }, { "epoch": 0.87, "grad_norm": 2.3184986114501953, "learning_rate": 8.166215771040142e-07, "loss": 0.5042, "step": 34113 }, { "epoch": 0.87, "grad_norm": 1.4499362707138062, "learning_rate": 8.162930403335089e-07, "loss": 0.4839, "step": 34114 }, { "epoch": 0.87, "grad_norm": 1.3732917308807373, "learning_rate": 8.159645668513438e-07, "loss": 0.5909, "step": 34115 }, { "epoch": 0.87, "grad_norm": 1.4436062574386597, "learning_rate": 8.156361566597782e-07, "loss": 0.57, "step": 34116 }, { "epoch": 0.87, "grad_norm": 1.5666139125823975, "learning_rate": 8.153078097610801e-07, "loss": 0.4956, "step": 34117 }, { "epoch": 0.87, "grad_norm": 6.0534796714782715, "learning_rate": 8.1497952615751e-07, "loss": 0.4122, "step": 34118 }, { "epoch": 0.87, "grad_norm": 0.8932211995124817, "learning_rate": 8.146513058513306e-07, "loss": 0.3768, "step": 34119 }, { "epoch": 0.87, "grad_norm": 1.0669300556182861, "learning_rate": 8.143231488448011e-07, "loss": 0.4748, "step": 34120 }, { "epoch": 0.87, "grad_norm": 1.5387109518051147, "learning_rate": 8.139950551401876e-07, "loss": 0.5893, "step": 34121 }, { "epoch": 0.87, "grad_norm": 1.5264415740966797, "learning_rate": 8.136670247397493e-07, "loss": 0.5089, "step": 34122 }, { "epoch": 0.87, "grad_norm": 1.5337055921554565, "learning_rate": 8.133390576457445e-07, "loss": 0.5521, "step": 34123 }, { "epoch": 0.87, "grad_norm": 1.5642735958099365, "learning_rate": 8.130111538604368e-07, "loss": 0.5809, "step": 34124 }, { "epoch": 0.87, "grad_norm": 2.2098746299743652, "learning_rate": 8.126833133860845e-07, "loss": 0.4622, "step": 34125 }, { "epoch": 0.87, "grad_norm": 1.9002829790115356, "learning_rate": 8.123555362249458e-07, "loss": 0.5436, "step": 34126 }, { "epoch": 0.87, "grad_norm": 1.7267346382141113, "learning_rate": 8.120278223792811e-07, "loss": 0.5809, "step": 34127 }, { "epoch": 0.87, "grad_norm": 2.043125867843628, "learning_rate": 8.117001718513485e-07, "loss": 0.5355, "step": 34128 }, { "epoch": 0.87, "grad_norm": 1.6080840826034546, "learning_rate": 8.113725846434062e-07, "loss": 0.4848, "step": 34129 }, { "epoch": 0.87, "grad_norm": 1.2818456888198853, "learning_rate": 8.110450607577092e-07, "loss": 0.3825, "step": 34130 }, { "epoch": 0.87, "grad_norm": 1.5868377685546875, "learning_rate": 8.107176001965178e-07, "loss": 0.5485, "step": 34131 }, { "epoch": 0.87, "grad_norm": 5.836065769195557, "learning_rate": 8.10390202962088e-07, "loss": 0.5217, "step": 34132 }, { "epoch": 0.87, "grad_norm": 1.2709429264068604, "learning_rate": 8.100628690566737e-07, "loss": 0.5096, "step": 34133 }, { "epoch": 0.87, "grad_norm": 1.7961517572402954, "learning_rate": 8.097355984825339e-07, "loss": 0.4225, "step": 34134 }, { "epoch": 0.87, "grad_norm": 1.5567939281463623, "learning_rate": 8.094083912419226e-07, "loss": 0.5873, "step": 34135 }, { "epoch": 0.87, "grad_norm": 2.2113049030303955, "learning_rate": 8.090812473370946e-07, "loss": 0.5147, "step": 34136 }, { "epoch": 0.87, "grad_norm": 2.798466205596924, "learning_rate": 8.087541667703025e-07, "loss": 0.4155, "step": 34137 }, { "epoch": 0.87, "grad_norm": 3.434889316558838, "learning_rate": 8.084271495438046e-07, "loss": 0.6918, "step": 34138 }, { "epoch": 0.88, "grad_norm": 2.0673935413360596, "learning_rate": 8.081001956598511e-07, "loss": 0.5612, "step": 34139 }, { "epoch": 0.88, "grad_norm": 3.7896993160247803, "learning_rate": 8.077733051206948e-07, "loss": 0.646, "step": 34140 }, { "epoch": 0.88, "grad_norm": 2.582839012145996, "learning_rate": 8.074464779285918e-07, "loss": 0.5395, "step": 34141 }, { "epoch": 0.88, "grad_norm": 1.2971513271331787, "learning_rate": 8.071197140857911e-07, "loss": 0.4046, "step": 34142 }, { "epoch": 0.88, "grad_norm": 1.5499234199523926, "learning_rate": 8.067930135945467e-07, "loss": 0.441, "step": 34143 }, { "epoch": 0.88, "grad_norm": 1.5081357955932617, "learning_rate": 8.064663764571079e-07, "loss": 0.3822, "step": 34144 }, { "epoch": 0.88, "grad_norm": 2.243685245513916, "learning_rate": 8.061398026757283e-07, "loss": 0.4455, "step": 34145 }, { "epoch": 0.88, "grad_norm": 1.1879507303237915, "learning_rate": 8.058132922526563e-07, "loss": 0.6634, "step": 34146 }, { "epoch": 0.88, "grad_norm": 1.2161697149276733, "learning_rate": 8.05486845190142e-07, "loss": 0.4744, "step": 34147 }, { "epoch": 0.88, "grad_norm": 15.86120891571045, "learning_rate": 8.051604614904363e-07, "loss": 0.5657, "step": 34148 }, { "epoch": 0.88, "grad_norm": 1.207961082458496, "learning_rate": 8.048341411557891e-07, "loss": 0.5897, "step": 34149 }, { "epoch": 0.88, "grad_norm": 1.4598397016525269, "learning_rate": 8.045078841884458e-07, "loss": 0.4521, "step": 34150 }, { "epoch": 0.88, "grad_norm": 1.5410542488098145, "learning_rate": 8.041816905906586e-07, "loss": 0.4817, "step": 34151 }, { "epoch": 0.88, "grad_norm": 1.727850079536438, "learning_rate": 8.038555603646747e-07, "loss": 0.4811, "step": 34152 }, { "epoch": 0.88, "grad_norm": 1.3383368253707886, "learning_rate": 8.035294935127392e-07, "loss": 0.4277, "step": 34153 }, { "epoch": 0.88, "grad_norm": 2.406249523162842, "learning_rate": 8.032034900371e-07, "loss": 0.6824, "step": 34154 }, { "epoch": 0.88, "grad_norm": 2.208433151245117, "learning_rate": 8.028775499400055e-07, "loss": 0.4774, "step": 34155 }, { "epoch": 0.88, "grad_norm": 6.881739139556885, "learning_rate": 8.025516732237015e-07, "loss": 0.5704, "step": 34156 }, { "epoch": 0.88, "grad_norm": 0.9713054895401001, "learning_rate": 8.022258598904309e-07, "loss": 0.3558, "step": 34157 }, { "epoch": 0.88, "grad_norm": 14.195856094360352, "learning_rate": 8.019001099424428e-07, "loss": 0.6443, "step": 34158 }, { "epoch": 0.88, "grad_norm": 1.4794659614562988, "learning_rate": 8.0157442338198e-07, "loss": 0.4063, "step": 34159 }, { "epoch": 0.88, "grad_norm": 1.753243088722229, "learning_rate": 8.012488002112872e-07, "loss": 0.5484, "step": 34160 }, { "epoch": 0.88, "grad_norm": 1.9576172828674316, "learning_rate": 8.009232404326073e-07, "loss": 0.586, "step": 34161 }, { "epoch": 0.88, "grad_norm": 2.072701930999756, "learning_rate": 8.00597744048186e-07, "loss": 0.5177, "step": 34162 }, { "epoch": 0.88, "grad_norm": 1.3171358108520508, "learning_rate": 8.002723110602661e-07, "loss": 0.3829, "step": 34163 }, { "epoch": 0.88, "grad_norm": 1.0902446508407593, "learning_rate": 7.99946941471088e-07, "loss": 0.5538, "step": 34164 }, { "epoch": 0.88, "grad_norm": 0.9187778234481812, "learning_rate": 7.996216352828967e-07, "loss": 0.4185, "step": 34165 }, { "epoch": 0.88, "grad_norm": 1.9349746704101562, "learning_rate": 7.992963924979324e-07, "loss": 0.6145, "step": 34166 }, { "epoch": 0.88, "grad_norm": 2.7909231185913086, "learning_rate": 7.989712131184357e-07, "loss": 0.4803, "step": 34167 }, { "epoch": 0.88, "grad_norm": 1.2363418340682983, "learning_rate": 7.986460971466515e-07, "loss": 0.5208, "step": 34168 }, { "epoch": 0.88, "grad_norm": 1.4731451272964478, "learning_rate": 7.983210445848166e-07, "loss": 0.6578, "step": 34169 }, { "epoch": 0.88, "grad_norm": 2.304164409637451, "learning_rate": 7.979960554351718e-07, "loss": 0.3748, "step": 34170 }, { "epoch": 0.88, "grad_norm": 1.734405755996704, "learning_rate": 7.976711296999551e-07, "loss": 0.4389, "step": 34171 }, { "epoch": 0.88, "grad_norm": 1.210998773574829, "learning_rate": 7.973462673814092e-07, "loss": 0.5792, "step": 34172 }, { "epoch": 0.88, "grad_norm": 1.7647490501403809, "learning_rate": 7.970214684817723e-07, "loss": 0.5469, "step": 34173 }, { "epoch": 0.88, "grad_norm": 3.376356840133667, "learning_rate": 7.966967330032782e-07, "loss": 0.4498, "step": 34174 }, { "epoch": 0.88, "grad_norm": 1.2981609106063843, "learning_rate": 7.963720609481707e-07, "loss": 0.4261, "step": 34175 }, { "epoch": 0.88, "grad_norm": 1.0850913524627686, "learning_rate": 7.960474523186846e-07, "loss": 0.3925, "step": 34176 }, { "epoch": 0.88, "grad_norm": 1.838592290878296, "learning_rate": 7.957229071170569e-07, "loss": 0.664, "step": 34177 }, { "epoch": 0.88, "grad_norm": 1.5647724866867065, "learning_rate": 7.953984253455227e-07, "loss": 0.4181, "step": 34178 }, { "epoch": 0.88, "grad_norm": 1.2104218006134033, "learning_rate": 7.950740070063212e-07, "loss": 0.5372, "step": 34179 }, { "epoch": 0.88, "grad_norm": 10.617755889892578, "learning_rate": 7.947496521016873e-07, "loss": 0.463, "step": 34180 }, { "epoch": 0.88, "grad_norm": 1.2476806640625, "learning_rate": 7.944253606338526e-07, "loss": 0.4064, "step": 34181 }, { "epoch": 0.88, "grad_norm": 0.9912741184234619, "learning_rate": 7.941011326050574e-07, "loss": 0.4294, "step": 34182 }, { "epoch": 0.88, "grad_norm": 2.3360977172851562, "learning_rate": 7.937769680175334e-07, "loss": 0.4909, "step": 34183 }, { "epoch": 0.88, "grad_norm": 2.317847490310669, "learning_rate": 7.934528668735142e-07, "loss": 0.5124, "step": 34184 }, { "epoch": 0.88, "grad_norm": 1.5087577104568481, "learning_rate": 7.931288291752315e-07, "loss": 0.5142, "step": 34185 }, { "epoch": 0.88, "grad_norm": 1.4521982669830322, "learning_rate": 7.928048549249234e-07, "loss": 0.4338, "step": 34186 }, { "epoch": 0.88, "grad_norm": 1.020115613937378, "learning_rate": 7.924809441248194e-07, "loss": 0.454, "step": 34187 }, { "epoch": 0.88, "grad_norm": 1.4761337041854858, "learning_rate": 7.921570967771497e-07, "loss": 0.5034, "step": 34188 }, { "epoch": 0.88, "grad_norm": 1.8917486667633057, "learning_rate": 7.918333128841505e-07, "loss": 0.5409, "step": 34189 }, { "epoch": 0.88, "grad_norm": 1.4646520614624023, "learning_rate": 7.915095924480509e-07, "loss": 0.4124, "step": 34190 }, { "epoch": 0.88, "grad_norm": 0.9756224155426025, "learning_rate": 7.911859354710805e-07, "loss": 0.449, "step": 34191 }, { "epoch": 0.88, "grad_norm": 2.6567416191101074, "learning_rate": 7.908623419554729e-07, "loss": 0.4637, "step": 34192 }, { "epoch": 0.88, "grad_norm": 1.4634613990783691, "learning_rate": 7.905388119034551e-07, "loss": 0.4169, "step": 34193 }, { "epoch": 0.88, "grad_norm": 0.8252492547035217, "learning_rate": 7.90215345317259e-07, "loss": 0.4124, "step": 34194 }, { "epoch": 0.88, "grad_norm": 5.883719444274902, "learning_rate": 7.898919421991113e-07, "loss": 0.5265, "step": 34195 }, { "epoch": 0.88, "grad_norm": 1.997158169746399, "learning_rate": 7.895686025512427e-07, "loss": 0.4243, "step": 34196 }, { "epoch": 0.88, "grad_norm": 2.081510066986084, "learning_rate": 7.892453263758804e-07, "loss": 0.6333, "step": 34197 }, { "epoch": 0.88, "grad_norm": 1.5537899732589722, "learning_rate": 7.889221136752523e-07, "loss": 0.4414, "step": 34198 }, { "epoch": 0.88, "grad_norm": 1.3347409963607788, "learning_rate": 7.885989644515868e-07, "loss": 0.5222, "step": 34199 }, { "epoch": 0.88, "grad_norm": 1.2127681970596313, "learning_rate": 7.882758787071099e-07, "loss": 0.5159, "step": 34200 }, { "epoch": 0.88, "grad_norm": 1.1315287351608276, "learning_rate": 7.879528564440487e-07, "loss": 0.5024, "step": 34201 }, { "epoch": 0.88, "grad_norm": 1.0373799800872803, "learning_rate": 7.876298976646269e-07, "loss": 0.4284, "step": 34202 }, { "epoch": 0.88, "grad_norm": 19.290494918823242, "learning_rate": 7.873070023710739e-07, "loss": 0.5729, "step": 34203 }, { "epoch": 0.88, "grad_norm": 0.8041166663169861, "learning_rate": 7.869841705656123e-07, "loss": 0.4318, "step": 34204 }, { "epoch": 0.88, "grad_norm": 5.7971696853637695, "learning_rate": 7.866614022504671e-07, "loss": 0.3651, "step": 34205 }, { "epoch": 0.88, "grad_norm": 1.610617995262146, "learning_rate": 7.863386974278642e-07, "loss": 0.4154, "step": 34206 }, { "epoch": 0.88, "grad_norm": 1.6455714702606201, "learning_rate": 7.860160561000263e-07, "loss": 0.5828, "step": 34207 }, { "epoch": 0.88, "grad_norm": 1.2203280925750732, "learning_rate": 7.85693478269175e-07, "loss": 0.6133, "step": 34208 }, { "epoch": 0.88, "grad_norm": 0.873455286026001, "learning_rate": 7.853709639375373e-07, "loss": 0.406, "step": 34209 }, { "epoch": 0.88, "grad_norm": 1.0035148859024048, "learning_rate": 7.850485131073338e-07, "loss": 0.3921, "step": 34210 }, { "epoch": 0.88, "grad_norm": 0.980235755443573, "learning_rate": 7.84726125780787e-07, "loss": 0.3933, "step": 34211 }, { "epoch": 0.88, "grad_norm": 8.248736381530762, "learning_rate": 7.844038019601164e-07, "loss": 0.7123, "step": 34212 }, { "epoch": 0.88, "grad_norm": 2.1163463592529297, "learning_rate": 7.840815416475456e-07, "loss": 0.4663, "step": 34213 }, { "epoch": 0.88, "grad_norm": 1.6279573440551758, "learning_rate": 7.837593448452962e-07, "loss": 0.4936, "step": 34214 }, { "epoch": 0.88, "grad_norm": 1.2527973651885986, "learning_rate": 7.834372115555843e-07, "loss": 0.4576, "step": 34215 }, { "epoch": 0.88, "grad_norm": 1.8271273374557495, "learning_rate": 7.831151417806348e-07, "loss": 0.7102, "step": 34216 }, { "epoch": 0.88, "grad_norm": 3.4650278091430664, "learning_rate": 7.827931355226659e-07, "loss": 0.5726, "step": 34217 }, { "epoch": 0.88, "grad_norm": 2.420391798019409, "learning_rate": 7.824711927838946e-07, "loss": 0.4975, "step": 34218 }, { "epoch": 0.88, "grad_norm": 5.983492851257324, "learning_rate": 7.821493135665404e-07, "loss": 0.6956, "step": 34219 }, { "epoch": 0.88, "grad_norm": 2.1943230628967285, "learning_rate": 7.818274978728224e-07, "loss": 0.6228, "step": 34220 }, { "epoch": 0.88, "grad_norm": 0.923613965511322, "learning_rate": 7.815057457049591e-07, "loss": 0.5471, "step": 34221 }, { "epoch": 0.88, "grad_norm": 1.973230004310608, "learning_rate": 7.811840570651641e-07, "loss": 0.5783, "step": 34222 }, { "epoch": 0.88, "grad_norm": 4.070454120635986, "learning_rate": 7.808624319556579e-07, "loss": 0.6239, "step": 34223 }, { "epoch": 0.88, "grad_norm": 1.175986647605896, "learning_rate": 7.805408703786566e-07, "loss": 0.3448, "step": 34224 }, { "epoch": 0.88, "grad_norm": 1.577859878540039, "learning_rate": 7.802193723363749e-07, "loss": 0.55, "step": 34225 }, { "epoch": 0.88, "grad_norm": 1.3430795669555664, "learning_rate": 7.798979378310278e-07, "loss": 0.6102, "step": 34226 }, { "epoch": 0.88, "grad_norm": 1.7555466890335083, "learning_rate": 7.795765668648325e-07, "loss": 0.5754, "step": 34227 }, { "epoch": 0.88, "grad_norm": 1.261375904083252, "learning_rate": 7.792552594400038e-07, "loss": 0.4193, "step": 34228 }, { "epoch": 0.88, "grad_norm": 1.3862980604171753, "learning_rate": 7.789340155587522e-07, "loss": 0.5483, "step": 34229 }, { "epoch": 0.88, "grad_norm": 1.226842999458313, "learning_rate": 7.786128352232958e-07, "loss": 0.4745, "step": 34230 }, { "epoch": 0.88, "grad_norm": 1.418799638748169, "learning_rate": 7.782917184358463e-07, "loss": 0.6508, "step": 34231 }, { "epoch": 0.88, "grad_norm": 1.2119555473327637, "learning_rate": 7.779706651986151e-07, "loss": 0.4887, "step": 34232 }, { "epoch": 0.88, "grad_norm": 1.1880027055740356, "learning_rate": 7.776496755138185e-07, "loss": 0.4201, "step": 34233 }, { "epoch": 0.88, "grad_norm": 0.9527661800384521, "learning_rate": 7.773287493836656e-07, "loss": 0.5171, "step": 34234 }, { "epoch": 0.88, "grad_norm": 1.1734951734542847, "learning_rate": 7.770078868103681e-07, "loss": 0.3787, "step": 34235 }, { "epoch": 0.88, "grad_norm": 1.2143874168395996, "learning_rate": 7.766870877961375e-07, "loss": 0.5731, "step": 34236 }, { "epoch": 0.88, "grad_norm": 1.5094367265701294, "learning_rate": 7.763663523431853e-07, "loss": 0.5293, "step": 34237 }, { "epoch": 0.88, "grad_norm": 4.008552551269531, "learning_rate": 7.76045680453722e-07, "loss": 0.7846, "step": 34238 }, { "epoch": 0.88, "grad_norm": 1.7641011476516724, "learning_rate": 7.757250721299547e-07, "loss": 0.3842, "step": 34239 }, { "epoch": 0.88, "grad_norm": 1.5759994983673096, "learning_rate": 7.754045273740973e-07, "loss": 0.5218, "step": 34240 }, { "epoch": 0.88, "grad_norm": 1.283207893371582, "learning_rate": 7.750840461883569e-07, "loss": 0.6437, "step": 34241 }, { "epoch": 0.88, "grad_norm": 1.31684410572052, "learning_rate": 7.747636285749405e-07, "loss": 0.475, "step": 34242 }, { "epoch": 0.88, "grad_norm": 1.602957010269165, "learning_rate": 7.744432745360553e-07, "loss": 0.6313, "step": 34243 }, { "epoch": 0.88, "grad_norm": 1.2252299785614014, "learning_rate": 7.74122984073914e-07, "loss": 0.5533, "step": 34244 }, { "epoch": 0.88, "grad_norm": 2.6884653568267822, "learning_rate": 7.738027571907203e-07, "loss": 0.515, "step": 34245 }, { "epoch": 0.88, "grad_norm": 3.1705009937286377, "learning_rate": 7.734825938886792e-07, "loss": 0.5754, "step": 34246 }, { "epoch": 0.88, "grad_norm": 0.912828266620636, "learning_rate": 7.731624941700011e-07, "loss": 0.4834, "step": 34247 }, { "epoch": 0.88, "grad_norm": 1.771518349647522, "learning_rate": 7.728424580368909e-07, "loss": 0.4444, "step": 34248 }, { "epoch": 0.88, "grad_norm": 2.657528877258301, "learning_rate": 7.725224854915514e-07, "loss": 0.6407, "step": 34249 }, { "epoch": 0.88, "grad_norm": 1.1026198863983154, "learning_rate": 7.722025765361918e-07, "loss": 0.5257, "step": 34250 }, { "epoch": 0.88, "grad_norm": 3.1438729763031006, "learning_rate": 7.718827311730137e-07, "loss": 0.5852, "step": 34251 }, { "epoch": 0.88, "grad_norm": 1.3264659643173218, "learning_rate": 7.715629494042232e-07, "loss": 0.6181, "step": 34252 }, { "epoch": 0.88, "grad_norm": 2.0343446731567383, "learning_rate": 7.712432312320206e-07, "loss": 0.5924, "step": 34253 }, { "epoch": 0.88, "grad_norm": 1.8647736310958862, "learning_rate": 7.709235766586132e-07, "loss": 0.4534, "step": 34254 }, { "epoch": 0.88, "grad_norm": 1.0388354063034058, "learning_rate": 7.706039856862024e-07, "loss": 0.6156, "step": 34255 }, { "epoch": 0.88, "grad_norm": 12.179585456848145, "learning_rate": 7.702844583169889e-07, "loss": 0.5077, "step": 34256 }, { "epoch": 0.88, "grad_norm": 1.5368263721466064, "learning_rate": 7.699649945531784e-07, "loss": 0.5247, "step": 34257 }, { "epoch": 0.88, "grad_norm": 12.58617877960205, "learning_rate": 7.696455943969694e-07, "loss": 0.5458, "step": 34258 }, { "epoch": 0.88, "grad_norm": 1.4483473300933838, "learning_rate": 7.693262578505645e-07, "loss": 0.5317, "step": 34259 }, { "epoch": 0.88, "grad_norm": 4.159268379211426, "learning_rate": 7.690069849161618e-07, "loss": 0.6035, "step": 34260 }, { "epoch": 0.88, "grad_norm": 0.970390260219574, "learning_rate": 7.686877755959643e-07, "loss": 0.4521, "step": 34261 }, { "epoch": 0.88, "grad_norm": 1.3881183862686157, "learning_rate": 7.683686298921722e-07, "loss": 0.5572, "step": 34262 }, { "epoch": 0.88, "grad_norm": 2.3095648288726807, "learning_rate": 7.680495478069816e-07, "loss": 0.5079, "step": 34263 }, { "epoch": 0.88, "grad_norm": 1.4159157276153564, "learning_rate": 7.677305293425951e-07, "loss": 0.5013, "step": 34264 }, { "epoch": 0.88, "grad_norm": 1.1064047813415527, "learning_rate": 7.6741157450121e-07, "loss": 0.4844, "step": 34265 }, { "epoch": 0.88, "grad_norm": 1.465760350227356, "learning_rate": 7.670926832850234e-07, "loss": 0.5966, "step": 34266 }, { "epoch": 0.88, "grad_norm": 1.1027628183364868, "learning_rate": 7.667738556962312e-07, "loss": 0.3241, "step": 34267 }, { "epoch": 0.88, "grad_norm": 3.386289358139038, "learning_rate": 7.664550917370351e-07, "loss": 0.4906, "step": 34268 }, { "epoch": 0.88, "grad_norm": 1.2175623178482056, "learning_rate": 7.661363914096287e-07, "loss": 0.6296, "step": 34269 }, { "epoch": 0.88, "grad_norm": 2.071960687637329, "learning_rate": 7.658177547162082e-07, "loss": 0.3715, "step": 34270 }, { "epoch": 0.88, "grad_norm": 1.4254285097122192, "learning_rate": 7.654991816589708e-07, "loss": 0.5977, "step": 34271 }, { "epoch": 0.88, "grad_norm": 2.9667274951934814, "learning_rate": 7.651806722401123e-07, "loss": 0.4766, "step": 34272 }, { "epoch": 0.88, "grad_norm": 1.1652069091796875, "learning_rate": 7.648622264618244e-07, "loss": 0.3434, "step": 34273 }, { "epoch": 0.88, "grad_norm": 2.739380359649658, "learning_rate": 7.645438443263065e-07, "loss": 0.6154, "step": 34274 }, { "epoch": 0.88, "grad_norm": 1.3196622133255005, "learning_rate": 7.6422552583575e-07, "loss": 0.5979, "step": 34275 }, { "epoch": 0.88, "grad_norm": 1.2773195505142212, "learning_rate": 7.639072709923478e-07, "loss": 0.5001, "step": 34276 }, { "epoch": 0.88, "grad_norm": 1.5470566749572754, "learning_rate": 7.635890797982925e-07, "loss": 0.5472, "step": 34277 }, { "epoch": 0.88, "grad_norm": 1.4988186359405518, "learning_rate": 7.632709522557813e-07, "loss": 0.4872, "step": 34278 }, { "epoch": 0.88, "grad_norm": 1.2127060890197754, "learning_rate": 7.629528883670024e-07, "loss": 0.3592, "step": 34279 }, { "epoch": 0.88, "grad_norm": 1.7395840883255005, "learning_rate": 7.626348881341472e-07, "loss": 0.4858, "step": 34280 }, { "epoch": 0.88, "grad_norm": 3.4777567386627197, "learning_rate": 7.623169515594109e-07, "loss": 0.4822, "step": 34281 }, { "epoch": 0.88, "grad_norm": 1.0085110664367676, "learning_rate": 7.619990786449827e-07, "loss": 0.4821, "step": 34282 }, { "epoch": 0.88, "grad_norm": 1.0417221784591675, "learning_rate": 7.616812693930531e-07, "loss": 0.486, "step": 34283 }, { "epoch": 0.88, "grad_norm": 1.7631853818893433, "learning_rate": 7.613635238058103e-07, "loss": 0.5761, "step": 34284 }, { "epoch": 0.88, "grad_norm": 1.6588698625564575, "learning_rate": 7.610458418854493e-07, "loss": 0.3686, "step": 34285 }, { "epoch": 0.88, "grad_norm": 13.576271057128906, "learning_rate": 7.607282236341517e-07, "loss": 0.4755, "step": 34286 }, { "epoch": 0.88, "grad_norm": 1.2150375843048096, "learning_rate": 7.604106690541124e-07, "loss": 0.4342, "step": 34287 }, { "epoch": 0.88, "grad_norm": 1.308586597442627, "learning_rate": 7.600931781475185e-07, "loss": 0.5049, "step": 34288 }, { "epoch": 0.88, "grad_norm": 1.2664812803268433, "learning_rate": 7.597757509165571e-07, "loss": 0.4879, "step": 34289 }, { "epoch": 0.88, "grad_norm": 2.6193511486053467, "learning_rate": 7.594583873634142e-07, "loss": 0.5328, "step": 34290 }, { "epoch": 0.88, "grad_norm": 1.3019582033157349, "learning_rate": 7.591410874902805e-07, "loss": 0.4906, "step": 34291 }, { "epoch": 0.88, "grad_norm": 1.7661285400390625, "learning_rate": 7.588238512993407e-07, "loss": 0.6241, "step": 34292 }, { "epoch": 0.88, "grad_norm": 0.968032956123352, "learning_rate": 7.585066787927786e-07, "loss": 0.4791, "step": 34293 }, { "epoch": 0.88, "grad_norm": 1.4539715051651, "learning_rate": 7.581895699727848e-07, "loss": 0.5388, "step": 34294 }, { "epoch": 0.88, "grad_norm": 0.8862947821617126, "learning_rate": 7.57872524841542e-07, "loss": 0.5894, "step": 34295 }, { "epoch": 0.88, "grad_norm": 1.3933111429214478, "learning_rate": 7.575555434012361e-07, "loss": 0.5134, "step": 34296 }, { "epoch": 0.88, "grad_norm": 1.6859393119812012, "learning_rate": 7.572386256540476e-07, "loss": 0.5454, "step": 34297 }, { "epoch": 0.88, "grad_norm": 1.6617491245269775, "learning_rate": 7.56921771602166e-07, "loss": 0.7774, "step": 34298 }, { "epoch": 0.88, "grad_norm": 1.7483620643615723, "learning_rate": 7.566049812477728e-07, "loss": 0.5803, "step": 34299 }, { "epoch": 0.88, "grad_norm": 1.4312127828598022, "learning_rate": 7.562882545930483e-07, "loss": 0.5582, "step": 34300 }, { "epoch": 0.88, "grad_norm": 1.020050287246704, "learning_rate": 7.5597159164018e-07, "loss": 0.5339, "step": 34301 }, { "epoch": 0.88, "grad_norm": 1.132538080215454, "learning_rate": 7.556549923913481e-07, "loss": 0.4423, "step": 34302 }, { "epoch": 0.88, "grad_norm": 1.6922327280044556, "learning_rate": 7.55338456848732e-07, "loss": 0.5295, "step": 34303 }, { "epoch": 0.88, "grad_norm": 1.2590439319610596, "learning_rate": 7.550219850145169e-07, "loss": 0.6081, "step": 34304 }, { "epoch": 0.88, "grad_norm": 2.2901604175567627, "learning_rate": 7.547055768908818e-07, "loss": 0.6126, "step": 34305 }, { "epoch": 0.88, "grad_norm": 1.1962872743606567, "learning_rate": 7.543892324800084e-07, "loss": 0.4452, "step": 34306 }, { "epoch": 0.88, "grad_norm": 6.233904838562012, "learning_rate": 7.540729517840728e-07, "loss": 0.5405, "step": 34307 }, { "epoch": 0.88, "grad_norm": 1.3375959396362305, "learning_rate": 7.537567348052599e-07, "loss": 0.397, "step": 34308 }, { "epoch": 0.88, "grad_norm": 1.354830026626587, "learning_rate": 7.534405815457468e-07, "loss": 0.2687, "step": 34309 }, { "epoch": 0.88, "grad_norm": 1.5170295238494873, "learning_rate": 7.531244920077096e-07, "loss": 0.6071, "step": 34310 }, { "epoch": 0.88, "grad_norm": 1.7470760345458984, "learning_rate": 7.528084661933311e-07, "loss": 0.5444, "step": 34311 }, { "epoch": 0.88, "grad_norm": 1.3724844455718994, "learning_rate": 7.524925041047859e-07, "loss": 0.4144, "step": 34312 }, { "epoch": 0.88, "grad_norm": 1.5192110538482666, "learning_rate": 7.521766057442537e-07, "loss": 0.559, "step": 34313 }, { "epoch": 0.88, "grad_norm": 5.239048957824707, "learning_rate": 7.518607711139081e-07, "loss": 0.7344, "step": 34314 }, { "epoch": 0.88, "grad_norm": 1.4117486476898193, "learning_rate": 7.515450002159297e-07, "loss": 0.7003, "step": 34315 }, { "epoch": 0.88, "grad_norm": 9.092011451721191, "learning_rate": 7.512292930524933e-07, "loss": 0.4772, "step": 34316 }, { "epoch": 0.88, "grad_norm": 1.3908945322036743, "learning_rate": 7.509136496257707e-07, "loss": 0.5205, "step": 34317 }, { "epoch": 0.88, "grad_norm": 1.4443074464797974, "learning_rate": 7.505980699379433e-07, "loss": 0.583, "step": 34318 }, { "epoch": 0.88, "grad_norm": 2.1675758361816406, "learning_rate": 7.502825539911829e-07, "loss": 0.64, "step": 34319 }, { "epoch": 0.88, "grad_norm": 1.092483639717102, "learning_rate": 7.499671017876642e-07, "loss": 0.4412, "step": 34320 }, { "epoch": 0.88, "grad_norm": 0.9127304553985596, "learning_rate": 7.496517133295578e-07, "loss": 0.4359, "step": 34321 }, { "epoch": 0.88, "grad_norm": 0.8441566824913025, "learning_rate": 7.493363886190431e-07, "loss": 0.3877, "step": 34322 }, { "epoch": 0.88, "grad_norm": 2.903510093688965, "learning_rate": 7.490211276582893e-07, "loss": 0.5141, "step": 34323 }, { "epoch": 0.88, "grad_norm": 1.2095904350280762, "learning_rate": 7.487059304494693e-07, "loss": 0.47, "step": 34324 }, { "epoch": 0.88, "grad_norm": 4.278916835784912, "learning_rate": 7.483907969947557e-07, "loss": 0.6424, "step": 34325 }, { "epoch": 0.88, "grad_norm": 1.149502158164978, "learning_rate": 7.480757272963224e-07, "loss": 0.5396, "step": 34326 }, { "epoch": 0.88, "grad_norm": 1.3377810716629028, "learning_rate": 7.477607213563354e-07, "loss": 0.4797, "step": 34327 }, { "epoch": 0.88, "grad_norm": 2.1856589317321777, "learning_rate": 7.474457791769707e-07, "loss": 0.6845, "step": 34328 }, { "epoch": 0.88, "grad_norm": 1.6202583312988281, "learning_rate": 7.471309007603966e-07, "loss": 0.5659, "step": 34329 }, { "epoch": 0.88, "grad_norm": 3.3126630783081055, "learning_rate": 7.468160861087836e-07, "loss": 0.4885, "step": 34330 }, { "epoch": 0.88, "grad_norm": 1.4610121250152588, "learning_rate": 7.465013352242989e-07, "loss": 0.5195, "step": 34331 }, { "epoch": 0.88, "grad_norm": 8.40544319152832, "learning_rate": 7.46186648109114e-07, "loss": 1.0032, "step": 34332 }, { "epoch": 0.88, "grad_norm": 1.3684641122817993, "learning_rate": 7.458720247653983e-07, "loss": 0.646, "step": 34333 }, { "epoch": 0.88, "grad_norm": 2.54347562789917, "learning_rate": 7.455574651953168e-07, "loss": 0.6543, "step": 34334 }, { "epoch": 0.88, "grad_norm": 1.7423681020736694, "learning_rate": 7.452429694010399e-07, "loss": 0.4927, "step": 34335 }, { "epoch": 0.88, "grad_norm": 1.4264909029006958, "learning_rate": 7.449285373847348e-07, "loss": 0.4563, "step": 34336 }, { "epoch": 0.88, "grad_norm": 15.171419143676758, "learning_rate": 7.446141691485665e-07, "loss": 0.5618, "step": 34337 }, { "epoch": 0.88, "grad_norm": 3.077322244644165, "learning_rate": 7.44299864694702e-07, "loss": 0.7057, "step": 34338 }, { "epoch": 0.88, "grad_norm": 14.472114562988281, "learning_rate": 7.439856240253085e-07, "loss": 0.4414, "step": 34339 }, { "epoch": 0.88, "grad_norm": 1.3356074094772339, "learning_rate": 7.436714471425521e-07, "loss": 0.4206, "step": 34340 }, { "epoch": 0.88, "grad_norm": 1.26921546459198, "learning_rate": 7.433573340485945e-07, "loss": 0.5407, "step": 34341 }, { "epoch": 0.88, "grad_norm": 1.153351068496704, "learning_rate": 7.430432847456037e-07, "loss": 0.4564, "step": 34342 }, { "epoch": 0.88, "grad_norm": 1.2296648025512695, "learning_rate": 7.427292992357438e-07, "loss": 0.587, "step": 34343 }, { "epoch": 0.88, "grad_norm": 10.01259994506836, "learning_rate": 7.424153775211751e-07, "loss": 0.8638, "step": 34344 }, { "epoch": 0.88, "grad_norm": 1.365867018699646, "learning_rate": 7.42101519604066e-07, "loss": 0.4518, "step": 34345 }, { "epoch": 0.88, "grad_norm": 1.5266965627670288, "learning_rate": 7.417877254865757e-07, "loss": 0.4877, "step": 34346 }, { "epoch": 0.88, "grad_norm": 1.0383025407791138, "learning_rate": 7.414739951708683e-07, "loss": 0.5191, "step": 34347 }, { "epoch": 0.88, "grad_norm": 1.176244854927063, "learning_rate": 7.41160328659104e-07, "loss": 0.5338, "step": 34348 }, { "epoch": 0.88, "grad_norm": 1.280194640159607, "learning_rate": 7.408467259534469e-07, "loss": 0.3447, "step": 34349 }, { "epoch": 0.88, "grad_norm": 1.5681071281433105, "learning_rate": 7.405331870560572e-07, "loss": 0.6755, "step": 34350 }, { "epoch": 0.88, "grad_norm": 1.854418396949768, "learning_rate": 7.402197119690935e-07, "loss": 0.454, "step": 34351 }, { "epoch": 0.88, "grad_norm": 6.373711585998535, "learning_rate": 7.399063006947204e-07, "loss": 0.5503, "step": 34352 }, { "epoch": 0.88, "grad_norm": 1.4542309045791626, "learning_rate": 7.395929532350942e-07, "loss": 0.4845, "step": 34353 }, { "epoch": 0.88, "grad_norm": 1.8385705947875977, "learning_rate": 7.392796695923765e-07, "loss": 0.5275, "step": 34354 }, { "epoch": 0.88, "grad_norm": 5.147289276123047, "learning_rate": 7.389664497687233e-07, "loss": 0.669, "step": 34355 }, { "epoch": 0.88, "grad_norm": 1.0205092430114746, "learning_rate": 7.38653293766296e-07, "loss": 0.4333, "step": 34356 }, { "epoch": 0.88, "grad_norm": 2.319108486175537, "learning_rate": 7.383402015872521e-07, "loss": 0.6743, "step": 34357 }, { "epoch": 0.88, "grad_norm": 1.0768591165542603, "learning_rate": 7.380271732337474e-07, "loss": 0.4108, "step": 34358 }, { "epoch": 0.88, "grad_norm": 1.5157140493392944, "learning_rate": 7.377142087079414e-07, "loss": 0.6664, "step": 34359 }, { "epoch": 0.88, "grad_norm": 1.0726802349090576, "learning_rate": 7.374013080119902e-07, "loss": 0.4435, "step": 34360 }, { "epoch": 0.88, "grad_norm": 2.1414194107055664, "learning_rate": 7.370884711480497e-07, "loss": 0.497, "step": 34361 }, { "epoch": 0.88, "grad_norm": 1.064613938331604, "learning_rate": 7.367756981182749e-07, "loss": 0.4365, "step": 34362 }, { "epoch": 0.88, "grad_norm": 1.2230066061019897, "learning_rate": 7.364629889248231e-07, "loss": 0.4169, "step": 34363 }, { "epoch": 0.88, "grad_norm": 1.6324927806854248, "learning_rate": 7.361503435698492e-07, "loss": 0.5111, "step": 34364 }, { "epoch": 0.88, "grad_norm": 1.81194007396698, "learning_rate": 7.358377620555057e-07, "loss": 0.516, "step": 34365 }, { "epoch": 0.88, "grad_norm": 0.7809368371963501, "learning_rate": 7.355252443839489e-07, "loss": 0.2728, "step": 34366 }, { "epoch": 0.88, "grad_norm": 0.9686205983161926, "learning_rate": 7.352127905573314e-07, "loss": 0.4995, "step": 34367 }, { "epoch": 0.88, "grad_norm": 0.9659457206726074, "learning_rate": 7.349004005778049e-07, "loss": 0.4076, "step": 34368 }, { "epoch": 0.88, "grad_norm": 1.4258917570114136, "learning_rate": 7.345880744475264e-07, "loss": 0.6497, "step": 34369 }, { "epoch": 0.88, "grad_norm": 6.181579113006592, "learning_rate": 7.342758121686456e-07, "loss": 0.5546, "step": 34370 }, { "epoch": 0.88, "grad_norm": 2.841411590576172, "learning_rate": 7.339636137433148e-07, "loss": 0.6417, "step": 34371 }, { "epoch": 0.88, "grad_norm": 1.331102728843689, "learning_rate": 7.336514791736826e-07, "loss": 0.4799, "step": 34372 }, { "epoch": 0.88, "grad_norm": 12.203293800354004, "learning_rate": 7.33339408461905e-07, "loss": 0.49, "step": 34373 }, { "epoch": 0.88, "grad_norm": 4.509538173675537, "learning_rate": 7.330274016101302e-07, "loss": 0.753, "step": 34374 }, { "epoch": 0.88, "grad_norm": 1.5974688529968262, "learning_rate": 7.327154586205076e-07, "loss": 0.6452, "step": 34375 }, { "epoch": 0.88, "grad_norm": 1.1900542974472046, "learning_rate": 7.324035794951889e-07, "loss": 0.4551, "step": 34376 }, { "epoch": 0.88, "grad_norm": 3.3707146644592285, "learning_rate": 7.320917642363213e-07, "loss": 0.4734, "step": 34377 }, { "epoch": 0.88, "grad_norm": 4.60344934463501, "learning_rate": 7.317800128460562e-07, "loss": 0.5126, "step": 34378 }, { "epoch": 0.88, "grad_norm": 1.3683058023452759, "learning_rate": 7.314683253265375e-07, "loss": 0.6018, "step": 34379 }, { "epoch": 0.88, "grad_norm": 1.3913886547088623, "learning_rate": 7.311567016799182e-07, "loss": 0.5321, "step": 34380 }, { "epoch": 0.88, "grad_norm": 2.019890546798706, "learning_rate": 7.30845141908344e-07, "loss": 0.6934, "step": 34381 }, { "epoch": 0.88, "grad_norm": 1.898521065711975, "learning_rate": 7.305336460139589e-07, "loss": 0.513, "step": 34382 }, { "epoch": 0.88, "grad_norm": 1.6858078241348267, "learning_rate": 7.302222139989145e-07, "loss": 0.6905, "step": 34383 }, { "epoch": 0.88, "grad_norm": 1.7525618076324463, "learning_rate": 7.299108458653547e-07, "loss": 0.5877, "step": 34384 }, { "epoch": 0.88, "grad_norm": 1.136618733406067, "learning_rate": 7.295995416154245e-07, "loss": 0.5776, "step": 34385 }, { "epoch": 0.88, "grad_norm": 1.944344401359558, "learning_rate": 7.292883012512708e-07, "loss": 0.4584, "step": 34386 }, { "epoch": 0.88, "grad_norm": 2.7927842140197754, "learning_rate": 7.289771247750376e-07, "loss": 0.7469, "step": 34387 }, { "epoch": 0.88, "grad_norm": 1.4142422676086426, "learning_rate": 7.2866601218887e-07, "loss": 0.6357, "step": 34388 }, { "epoch": 0.88, "grad_norm": 1.4878859519958496, "learning_rate": 7.283549634949094e-07, "loss": 0.5772, "step": 34389 }, { "epoch": 0.88, "grad_norm": 1.5343517065048218, "learning_rate": 7.280439786953031e-07, "loss": 0.4694, "step": 34390 }, { "epoch": 0.88, "grad_norm": 1.5321375131607056, "learning_rate": 7.277330577921926e-07, "loss": 0.5024, "step": 34391 }, { "epoch": 0.88, "grad_norm": 1.1396725177764893, "learning_rate": 7.274222007877185e-07, "loss": 0.5427, "step": 34392 }, { "epoch": 0.88, "grad_norm": 2.045773983001709, "learning_rate": 7.271114076840269e-07, "loss": 0.5323, "step": 34393 }, { "epoch": 0.88, "grad_norm": 5.0972466468811035, "learning_rate": 7.268006784832571e-07, "loss": 0.7362, "step": 34394 }, { "epoch": 0.88, "grad_norm": 9.641823768615723, "learning_rate": 7.264900131875519e-07, "loss": 0.3732, "step": 34395 }, { "epoch": 0.88, "grad_norm": 1.577789068222046, "learning_rate": 7.261794117990495e-07, "loss": 0.524, "step": 34396 }, { "epoch": 0.88, "grad_norm": 1.2226794958114624, "learning_rate": 7.258688743198938e-07, "loss": 0.526, "step": 34397 }, { "epoch": 0.88, "grad_norm": 6.324737548828125, "learning_rate": 7.255584007522231e-07, "loss": 0.7155, "step": 34398 }, { "epoch": 0.88, "grad_norm": 1.6906585693359375, "learning_rate": 7.252479910981758e-07, "loss": 0.6119, "step": 34399 }, { "epoch": 0.88, "grad_norm": 1.7586289644241333, "learning_rate": 7.249376453598933e-07, "loss": 0.518, "step": 34400 }, { "epoch": 0.88, "grad_norm": 1.5260350704193115, "learning_rate": 7.24627363539514e-07, "loss": 0.5646, "step": 34401 }, { "epoch": 0.88, "grad_norm": 1.549767017364502, "learning_rate": 7.243171456391751e-07, "loss": 0.4663, "step": 34402 }, { "epoch": 0.88, "grad_norm": 1.1849780082702637, "learning_rate": 7.240069916610137e-07, "loss": 0.4798, "step": 34403 }, { "epoch": 0.88, "grad_norm": 1.2821093797683716, "learning_rate": 7.236969016071693e-07, "loss": 0.4495, "step": 34404 }, { "epoch": 0.88, "grad_norm": 1.6860337257385254, "learning_rate": 7.23386875479779e-07, "loss": 0.5089, "step": 34405 }, { "epoch": 0.88, "grad_norm": 1.0993421077728271, "learning_rate": 7.230769132809757e-07, "loss": 0.4365, "step": 34406 }, { "epoch": 0.88, "grad_norm": 1.6999545097351074, "learning_rate": 7.227670150128995e-07, "loss": 0.5725, "step": 34407 }, { "epoch": 0.88, "grad_norm": 4.3525309562683105, "learning_rate": 7.224571806776848e-07, "loss": 0.6447, "step": 34408 }, { "epoch": 0.88, "grad_norm": 1.3825736045837402, "learning_rate": 7.221474102774651e-07, "loss": 0.5365, "step": 34409 }, { "epoch": 0.88, "grad_norm": 1.2880347967147827, "learning_rate": 7.218377038143787e-07, "loss": 0.3128, "step": 34410 }, { "epoch": 0.88, "grad_norm": 1.9211885929107666, "learning_rate": 7.215280612905562e-07, "loss": 0.5859, "step": 34411 }, { "epoch": 0.88, "grad_norm": 2.0148396492004395, "learning_rate": 7.212184827081348e-07, "loss": 0.5366, "step": 34412 }, { "epoch": 0.88, "grad_norm": 1.579490303993225, "learning_rate": 7.209089680692427e-07, "loss": 0.6541, "step": 34413 }, { "epoch": 0.88, "grad_norm": 4.432182788848877, "learning_rate": 7.205995173760194e-07, "loss": 0.6642, "step": 34414 }, { "epoch": 0.88, "grad_norm": 1.2489738464355469, "learning_rate": 7.202901306305932e-07, "loss": 0.4658, "step": 34415 }, { "epoch": 0.88, "grad_norm": 1.9834712743759155, "learning_rate": 7.199808078350967e-07, "loss": 0.5507, "step": 34416 }, { "epoch": 0.88, "grad_norm": 1.0647692680358887, "learning_rate": 7.196715489916628e-07, "loss": 0.3988, "step": 34417 }, { "epoch": 0.88, "grad_norm": 4.365349292755127, "learning_rate": 7.193623541024231e-07, "loss": 0.52, "step": 34418 }, { "epoch": 0.88, "grad_norm": 1.701082706451416, "learning_rate": 7.190532231695069e-07, "loss": 0.5853, "step": 34419 }, { "epoch": 0.88, "grad_norm": 2.080929756164551, "learning_rate": 7.187441561950437e-07, "loss": 0.5342, "step": 34420 }, { "epoch": 0.88, "grad_norm": 1.8699184656143188, "learning_rate": 7.184351531811651e-07, "loss": 0.5762, "step": 34421 }, { "epoch": 0.88, "grad_norm": 1.5888737440109253, "learning_rate": 7.181262141300016e-07, "loss": 0.5625, "step": 34422 }, { "epoch": 0.88, "grad_norm": 1.8742848634719849, "learning_rate": 7.178173390436782e-07, "loss": 0.4939, "step": 34423 }, { "epoch": 0.88, "grad_norm": 1.3383357524871826, "learning_rate": 7.175085279243288e-07, "loss": 0.3571, "step": 34424 }, { "epoch": 0.88, "grad_norm": 1.6014330387115479, "learning_rate": 7.171997807740783e-07, "loss": 0.6458, "step": 34425 }, { "epoch": 0.88, "grad_norm": 2.6529171466827393, "learning_rate": 7.168910975950527e-07, "loss": 0.7619, "step": 34426 }, { "epoch": 0.88, "grad_norm": 7.551156520843506, "learning_rate": 7.165824783893838e-07, "loss": 0.4505, "step": 34427 }, { "epoch": 0.88, "grad_norm": 2.033029079437256, "learning_rate": 7.162739231591953e-07, "loss": 0.4617, "step": 34428 }, { "epoch": 0.88, "grad_norm": 3.833292007446289, "learning_rate": 7.159654319066156e-07, "loss": 0.4292, "step": 34429 }, { "epoch": 0.88, "grad_norm": 1.1320712566375732, "learning_rate": 7.156570046337663e-07, "loss": 0.513, "step": 34430 }, { "epoch": 0.88, "grad_norm": 1.4228547811508179, "learning_rate": 7.153486413427779e-07, "loss": 0.4598, "step": 34431 }, { "epoch": 0.88, "grad_norm": 9.992185592651367, "learning_rate": 7.150403420357743e-07, "loss": 0.5683, "step": 34432 }, { "epoch": 0.88, "grad_norm": 3.0578787326812744, "learning_rate": 7.147321067148772e-07, "loss": 0.6497, "step": 34433 }, { "epoch": 0.88, "grad_norm": 4.547121047973633, "learning_rate": 7.144239353822136e-07, "loss": 0.4911, "step": 34434 }, { "epoch": 0.88, "grad_norm": 2.004129409790039, "learning_rate": 7.141158280399074e-07, "loss": 0.6439, "step": 34435 }, { "epoch": 0.88, "grad_norm": 2.129486560821533, "learning_rate": 7.138077846900804e-07, "loss": 0.5067, "step": 34436 }, { "epoch": 0.88, "grad_norm": 12.605961799621582, "learning_rate": 7.134998053348552e-07, "loss": 0.5061, "step": 34437 }, { "epoch": 0.88, "grad_norm": 1.795696496963501, "learning_rate": 7.131918899763557e-07, "loss": 0.6088, "step": 34438 }, { "epoch": 0.88, "grad_norm": 1.5060672760009766, "learning_rate": 7.128840386167035e-07, "loss": 0.5276, "step": 34439 }, { "epoch": 0.88, "grad_norm": 1.6293281316757202, "learning_rate": 7.125762512580181e-07, "loss": 0.6354, "step": 34440 }, { "epoch": 0.88, "grad_norm": 2.0454776287078857, "learning_rate": 7.122685279024244e-07, "loss": 0.4703, "step": 34441 }, { "epoch": 0.88, "grad_norm": 1.0443620681762695, "learning_rate": 7.119608685520408e-07, "loss": 0.4861, "step": 34442 }, { "epoch": 0.88, "grad_norm": 0.9769310355186462, "learning_rate": 7.116532732089876e-07, "loss": 0.458, "step": 34443 }, { "epoch": 0.88, "grad_norm": 2.4565107822418213, "learning_rate": 7.113457418753822e-07, "loss": 0.6115, "step": 34444 }, { "epoch": 0.88, "grad_norm": 1.8337000608444214, "learning_rate": 7.110382745533484e-07, "loss": 0.4914, "step": 34445 }, { "epoch": 0.88, "grad_norm": 3.3913064002990723, "learning_rate": 7.107308712450023e-07, "loss": 0.8057, "step": 34446 }, { "epoch": 0.88, "grad_norm": 1.9440914392471313, "learning_rate": 7.104235319524622e-07, "loss": 0.5603, "step": 34447 }, { "epoch": 0.88, "grad_norm": 1.2053395509719849, "learning_rate": 7.101162566778474e-07, "loss": 0.4256, "step": 34448 }, { "epoch": 0.88, "grad_norm": 4.374106407165527, "learning_rate": 7.098090454232753e-07, "loss": 0.5424, "step": 34449 }, { "epoch": 0.88, "grad_norm": 16.162763595581055, "learning_rate": 7.095018981908619e-07, "loss": 0.5245, "step": 34450 }, { "epoch": 0.88, "grad_norm": 2.3657126426696777, "learning_rate": 7.091948149827255e-07, "loss": 0.4574, "step": 34451 }, { "epoch": 0.88, "grad_norm": 1.0212336778640747, "learning_rate": 7.08887795800981e-07, "loss": 0.4163, "step": 34452 }, { "epoch": 0.88, "grad_norm": 1.922209620475769, "learning_rate": 7.085808406477446e-07, "loss": 0.5358, "step": 34453 }, { "epoch": 0.88, "grad_norm": 4.391221523284912, "learning_rate": 7.0827394952513e-07, "loss": 0.6969, "step": 34454 }, { "epoch": 0.88, "grad_norm": 6.764223098754883, "learning_rate": 7.079671224352558e-07, "loss": 0.6708, "step": 34455 }, { "epoch": 0.88, "grad_norm": 1.2666311264038086, "learning_rate": 7.076603593802345e-07, "loss": 0.499, "step": 34456 }, { "epoch": 0.88, "grad_norm": 1.191408634185791, "learning_rate": 7.073536603621778e-07, "loss": 0.4459, "step": 34457 }, { "epoch": 0.88, "grad_norm": 1.2489804029464722, "learning_rate": 7.070470253832029e-07, "loss": 0.6621, "step": 34458 }, { "epoch": 0.88, "grad_norm": 1.184513807296753, "learning_rate": 7.067404544454226e-07, "loss": 0.406, "step": 34459 }, { "epoch": 0.88, "grad_norm": 1.59919273853302, "learning_rate": 7.064339475509474e-07, "loss": 0.6116, "step": 34460 }, { "epoch": 0.88, "grad_norm": 1.228944182395935, "learning_rate": 7.061275047018901e-07, "loss": 0.4946, "step": 34461 }, { "epoch": 0.88, "grad_norm": 2.1623244285583496, "learning_rate": 7.058211259003634e-07, "loss": 0.5675, "step": 34462 }, { "epoch": 0.88, "grad_norm": 2.2083842754364014, "learning_rate": 7.055148111484788e-07, "loss": 0.6339, "step": 34463 }, { "epoch": 0.88, "grad_norm": 1.0975762605667114, "learning_rate": 7.05208560448345e-07, "loss": 0.5421, "step": 34464 }, { "epoch": 0.88, "grad_norm": 1.5141019821166992, "learning_rate": 7.049023738020766e-07, "loss": 0.4917, "step": 34465 }, { "epoch": 0.88, "grad_norm": 1.053929090499878, "learning_rate": 7.04596251211781e-07, "loss": 0.5215, "step": 34466 }, { "epoch": 0.88, "grad_norm": 1.7599620819091797, "learning_rate": 7.042901926795675e-07, "loss": 0.3544, "step": 34467 }, { "epoch": 0.88, "grad_norm": 2.801460027694702, "learning_rate": 7.039841982075446e-07, "loss": 0.499, "step": 34468 }, { "epoch": 0.88, "grad_norm": 1.0755558013916016, "learning_rate": 7.036782677978238e-07, "loss": 0.4277, "step": 34469 }, { "epoch": 0.88, "grad_norm": 1.5594804286956787, "learning_rate": 7.033724014525123e-07, "loss": 0.5434, "step": 34470 }, { "epoch": 0.88, "grad_norm": 2.3013744354248047, "learning_rate": 7.030665991737151e-07, "loss": 0.435, "step": 34471 }, { "epoch": 0.88, "grad_norm": 1.1787899732589722, "learning_rate": 7.027608609635439e-07, "loss": 0.4968, "step": 34472 }, { "epoch": 0.88, "grad_norm": 4.9991374015808105, "learning_rate": 7.024551868241047e-07, "loss": 0.6632, "step": 34473 }, { "epoch": 0.88, "grad_norm": 1.38652503490448, "learning_rate": 7.021495767575004e-07, "loss": 0.3805, "step": 34474 }, { "epoch": 0.88, "grad_norm": 1.2030936479568481, "learning_rate": 7.018440307658414e-07, "loss": 0.3642, "step": 34475 }, { "epoch": 0.88, "grad_norm": 1.70906662940979, "learning_rate": 7.015385488512316e-07, "loss": 0.5077, "step": 34476 }, { "epoch": 0.88, "grad_norm": 0.8912684917449951, "learning_rate": 7.012331310157772e-07, "loss": 0.5082, "step": 34477 }, { "epoch": 0.88, "grad_norm": 1.7992198467254639, "learning_rate": 7.009277772615797e-07, "loss": 0.5502, "step": 34478 }, { "epoch": 0.88, "grad_norm": 1.6159299612045288, "learning_rate": 7.006224875907475e-07, "loss": 0.5755, "step": 34479 }, { "epoch": 0.88, "grad_norm": 1.629351019859314, "learning_rate": 7.003172620053822e-07, "loss": 0.5905, "step": 34480 }, { "epoch": 0.88, "grad_norm": 1.3137778043746948, "learning_rate": 7.000121005075866e-07, "loss": 0.3036, "step": 34481 }, { "epoch": 0.88, "grad_norm": 1.0888264179229736, "learning_rate": 6.997070030994657e-07, "loss": 0.4439, "step": 34482 }, { "epoch": 0.88, "grad_norm": 1.2990365028381348, "learning_rate": 6.994019697831211e-07, "loss": 0.4842, "step": 34483 }, { "epoch": 0.88, "grad_norm": 1.6488410234451294, "learning_rate": 6.990970005606557e-07, "loss": 0.4702, "step": 34484 }, { "epoch": 0.88, "grad_norm": 1.031968593597412, "learning_rate": 6.987920954341687e-07, "loss": 0.4612, "step": 34485 }, { "epoch": 0.88, "grad_norm": 1.4922583103179932, "learning_rate": 6.984872544057641e-07, "loss": 0.5947, "step": 34486 }, { "epoch": 0.88, "grad_norm": 1.2818372249603271, "learning_rate": 6.981824774775426e-07, "loss": 0.4372, "step": 34487 }, { "epoch": 0.88, "grad_norm": 2.552227258682251, "learning_rate": 6.978777646516011e-07, "loss": 0.5544, "step": 34488 }, { "epoch": 0.88, "grad_norm": 1.5252172946929932, "learning_rate": 6.975731159300436e-07, "loss": 0.5798, "step": 34489 }, { "epoch": 0.88, "grad_norm": 3.0827531814575195, "learning_rate": 6.972685313149674e-07, "loss": 0.5735, "step": 34490 }, { "epoch": 0.88, "grad_norm": 3.1234018802642822, "learning_rate": 6.969640108084708e-07, "loss": 0.6484, "step": 34491 }, { "epoch": 0.88, "grad_norm": 1.6977282762527466, "learning_rate": 6.966595544126542e-07, "loss": 0.5142, "step": 34492 }, { "epoch": 0.88, "grad_norm": 1.718557596206665, "learning_rate": 6.96355162129616e-07, "loss": 0.588, "step": 34493 }, { "epoch": 0.88, "grad_norm": 1.024492621421814, "learning_rate": 6.960508339614524e-07, "loss": 0.4238, "step": 34494 }, { "epoch": 0.88, "grad_norm": 5.161343574523926, "learning_rate": 6.957465699102606e-07, "loss": 0.6225, "step": 34495 }, { "epoch": 0.88, "grad_norm": 1.654914140701294, "learning_rate": 6.954423699781387e-07, "loss": 0.7002, "step": 34496 }, { "epoch": 0.88, "grad_norm": 2.0660369396209717, "learning_rate": 6.95138234167182e-07, "loss": 0.5994, "step": 34497 }, { "epoch": 0.88, "grad_norm": 1.75974440574646, "learning_rate": 6.948341624794852e-07, "loss": 0.492, "step": 34498 }, { "epoch": 0.88, "grad_norm": 1.5625368356704712, "learning_rate": 6.945301549171479e-07, "loss": 0.5246, "step": 34499 }, { "epoch": 0.88, "grad_norm": 1.3135939836502075, "learning_rate": 6.942262114822618e-07, "loss": 0.5765, "step": 34500 }, { "epoch": 0.88, "grad_norm": 1.9980316162109375, "learning_rate": 6.939223321769217e-07, "loss": 0.4699, "step": 34501 }, { "epoch": 0.88, "grad_norm": 1.8267409801483154, "learning_rate": 6.936185170032206e-07, "loss": 0.591, "step": 34502 }, { "epoch": 0.88, "grad_norm": 4.185629367828369, "learning_rate": 6.933147659632566e-07, "loss": 0.5862, "step": 34503 }, { "epoch": 0.88, "grad_norm": 1.107413411140442, "learning_rate": 6.930110790591193e-07, "loss": 0.5213, "step": 34504 }, { "epoch": 0.88, "grad_norm": 1.2369335889816284, "learning_rate": 6.927074562929014e-07, "loss": 0.4619, "step": 34505 }, { "epoch": 0.88, "grad_norm": 1.9462816715240479, "learning_rate": 6.924038976666969e-07, "loss": 0.4911, "step": 34506 }, { "epoch": 0.88, "grad_norm": 9.614786148071289, "learning_rate": 6.921004031825984e-07, "loss": 0.5972, "step": 34507 }, { "epoch": 0.88, "grad_norm": 2.3659250736236572, "learning_rate": 6.917969728426954e-07, "loss": 0.5648, "step": 34508 }, { "epoch": 0.88, "grad_norm": 2.055069923400879, "learning_rate": 6.914936066490774e-07, "loss": 0.6498, "step": 34509 }, { "epoch": 0.88, "grad_norm": 2.3065905570983887, "learning_rate": 6.911903046038404e-07, "loss": 0.5247, "step": 34510 }, { "epoch": 0.88, "grad_norm": 1.2987911701202393, "learning_rate": 6.908870667090706e-07, "loss": 0.4427, "step": 34511 }, { "epoch": 0.88, "grad_norm": 1.7803361415863037, "learning_rate": 6.905838929668563e-07, "loss": 0.7588, "step": 34512 }, { "epoch": 0.88, "grad_norm": 1.2985203266143799, "learning_rate": 6.902807833792924e-07, "loss": 0.4636, "step": 34513 }, { "epoch": 0.88, "grad_norm": 1.857827067375183, "learning_rate": 6.899777379484629e-07, "loss": 0.3141, "step": 34514 }, { "epoch": 0.88, "grad_norm": 1.4407607316970825, "learning_rate": 6.896747566764561e-07, "loss": 0.5491, "step": 34515 }, { "epoch": 0.88, "grad_norm": 1.5410165786743164, "learning_rate": 6.893718395653648e-07, "loss": 0.6624, "step": 34516 }, { "epoch": 0.88, "grad_norm": 2.1545581817626953, "learning_rate": 6.890689866172717e-07, "loss": 0.498, "step": 34517 }, { "epoch": 0.88, "grad_norm": 19.5518798828125, "learning_rate": 6.887661978342663e-07, "loss": 0.5381, "step": 34518 }, { "epoch": 0.88, "grad_norm": 0.9567387104034424, "learning_rate": 6.884634732184314e-07, "loss": 0.4455, "step": 34519 }, { "epoch": 0.88, "grad_norm": 1.0645397901535034, "learning_rate": 6.881608127718598e-07, "loss": 0.5038, "step": 34520 }, { "epoch": 0.88, "grad_norm": 1.53207528591156, "learning_rate": 6.878582164966319e-07, "loss": 0.5532, "step": 34521 }, { "epoch": 0.88, "grad_norm": 1.5504887104034424, "learning_rate": 6.875556843948339e-07, "loss": 0.5624, "step": 34522 }, { "epoch": 0.88, "grad_norm": 1.3893742561340332, "learning_rate": 6.87253216468553e-07, "loss": 0.4838, "step": 34523 }, { "epoch": 0.88, "grad_norm": 1.4860323667526245, "learning_rate": 6.86950812719872e-07, "loss": 0.6641, "step": 34524 }, { "epoch": 0.88, "grad_norm": 2.7466423511505127, "learning_rate": 6.866484731508749e-07, "loss": 0.4916, "step": 34525 }, { "epoch": 0.88, "grad_norm": 0.8632209897041321, "learning_rate": 6.863461977636443e-07, "loss": 0.4554, "step": 34526 }, { "epoch": 0.88, "grad_norm": 2.4098336696624756, "learning_rate": 6.860439865602653e-07, "loss": 0.4255, "step": 34527 }, { "epoch": 0.88, "grad_norm": 1.138066291809082, "learning_rate": 6.857418395428195e-07, "loss": 0.4393, "step": 34528 }, { "epoch": 0.88, "grad_norm": 2.1294875144958496, "learning_rate": 6.854397567133886e-07, "loss": 0.546, "step": 34529 }, { "epoch": 0.89, "grad_norm": 2.1899869441986084, "learning_rate": 6.851377380740565e-07, "loss": 0.403, "step": 34530 }, { "epoch": 0.89, "grad_norm": 1.9695948362350464, "learning_rate": 6.848357836269015e-07, "loss": 0.4506, "step": 34531 }, { "epoch": 0.89, "grad_norm": 2.636579990386963, "learning_rate": 6.845338933740054e-07, "loss": 0.559, "step": 34532 }, { "epoch": 0.89, "grad_norm": 1.372341275215149, "learning_rate": 6.842320673174496e-07, "loss": 0.5327, "step": 34533 }, { "epoch": 0.89, "grad_norm": 1.0628831386566162, "learning_rate": 6.839303054593149e-07, "loss": 0.4996, "step": 34534 }, { "epoch": 0.89, "grad_norm": 1.9662137031555176, "learning_rate": 6.836286078016774e-07, "loss": 0.5436, "step": 34535 }, { "epoch": 0.89, "grad_norm": 5.952138423919678, "learning_rate": 6.833269743466198e-07, "loss": 0.7464, "step": 34536 }, { "epoch": 0.89, "grad_norm": 1.715186595916748, "learning_rate": 6.830254050962192e-07, "loss": 0.3634, "step": 34537 }, { "epoch": 0.89, "grad_norm": 1.0665804147720337, "learning_rate": 6.827239000525543e-07, "loss": 0.4111, "step": 34538 }, { "epoch": 0.89, "grad_norm": 1.1600313186645508, "learning_rate": 6.824224592177009e-07, "loss": 0.4739, "step": 34539 }, { "epoch": 0.89, "grad_norm": 1.0794830322265625, "learning_rate": 6.821210825937408e-07, "loss": 0.4906, "step": 34540 }, { "epoch": 0.89, "grad_norm": 3.6372175216674805, "learning_rate": 6.818197701827467e-07, "loss": 0.6879, "step": 34541 }, { "epoch": 0.89, "grad_norm": 1.9621978998184204, "learning_rate": 6.815185219867948e-07, "loss": 0.5401, "step": 34542 }, { "epoch": 0.89, "grad_norm": 8.674162864685059, "learning_rate": 6.812173380079656e-07, "loss": 0.535, "step": 34543 }, { "epoch": 0.89, "grad_norm": 2.0565786361694336, "learning_rate": 6.809162182483309e-07, "loss": 0.6094, "step": 34544 }, { "epoch": 0.89, "grad_norm": 2.093883514404297, "learning_rate": 6.806151627099655e-07, "loss": 0.5999, "step": 34545 }, { "epoch": 0.89, "grad_norm": 1.4399280548095703, "learning_rate": 6.803141713949468e-07, "loss": 0.3677, "step": 34546 }, { "epoch": 0.89, "grad_norm": 1.7805263996124268, "learning_rate": 6.800132443053486e-07, "loss": 0.7075, "step": 34547 }, { "epoch": 0.89, "grad_norm": 1.0657936334609985, "learning_rate": 6.797123814432427e-07, "loss": 0.4597, "step": 34548 }, { "epoch": 0.89, "grad_norm": 1.2550748586654663, "learning_rate": 6.794115828107018e-07, "loss": 0.592, "step": 34549 }, { "epoch": 0.89, "grad_norm": 1.5921165943145752, "learning_rate": 6.791108484098031e-07, "loss": 0.655, "step": 34550 }, { "epoch": 0.89, "grad_norm": 3.1577224731445312, "learning_rate": 6.78810178242616e-07, "loss": 0.5439, "step": 34551 }, { "epoch": 0.89, "grad_norm": 1.4688000679016113, "learning_rate": 6.785095723112112e-07, "loss": 0.5182, "step": 34552 }, { "epoch": 0.89, "grad_norm": 0.9379403591156006, "learning_rate": 6.782090306176636e-07, "loss": 0.5284, "step": 34553 }, { "epoch": 0.89, "grad_norm": 1.512215495109558, "learning_rate": 6.779085531640428e-07, "loss": 0.571, "step": 34554 }, { "epoch": 0.89, "grad_norm": 1.2679028511047363, "learning_rate": 6.776081399524193e-07, "loss": 0.4581, "step": 34555 }, { "epoch": 0.89, "grad_norm": 1.3962817192077637, "learning_rate": 6.773077909848624e-07, "loss": 0.5293, "step": 34556 }, { "epoch": 0.89, "grad_norm": 1.4919191598892212, "learning_rate": 6.770075062634451e-07, "loss": 0.6417, "step": 34557 }, { "epoch": 0.89, "grad_norm": 1.9226033687591553, "learning_rate": 6.767072857902346e-07, "loss": 0.5242, "step": 34558 }, { "epoch": 0.89, "grad_norm": 3.2079250812530518, "learning_rate": 6.76407129567298e-07, "loss": 0.5789, "step": 34559 }, { "epoch": 0.89, "grad_norm": 2.041738510131836, "learning_rate": 6.761070375967072e-07, "loss": 0.6219, "step": 34560 }, { "epoch": 0.89, "grad_norm": 2.04500150680542, "learning_rate": 6.758070098805303e-07, "loss": 0.563, "step": 34561 }, { "epoch": 0.89, "grad_norm": 1.1628623008728027, "learning_rate": 6.755070464208324e-07, "loss": 0.5677, "step": 34562 }, { "epoch": 0.89, "grad_norm": 1.302707314491272, "learning_rate": 6.752071472196797e-07, "loss": 0.463, "step": 34563 }, { "epoch": 0.89, "grad_norm": 3.2951478958129883, "learning_rate": 6.749073122791427e-07, "loss": 0.5598, "step": 34564 }, { "epoch": 0.89, "grad_norm": 1.3799468278884888, "learning_rate": 6.746075416012865e-07, "loss": 0.5739, "step": 34565 }, { "epoch": 0.89, "grad_norm": 1.3826786279678345, "learning_rate": 6.743078351881749e-07, "loss": 0.5612, "step": 34566 }, { "epoch": 0.89, "grad_norm": 1.3970222473144531, "learning_rate": 6.740081930418752e-07, "loss": 0.5743, "step": 34567 }, { "epoch": 0.89, "grad_norm": 2.054192543029785, "learning_rate": 6.737086151644534e-07, "loss": 0.6125, "step": 34568 }, { "epoch": 0.89, "grad_norm": 1.8348041772842407, "learning_rate": 6.734091015579692e-07, "loss": 0.4354, "step": 34569 }, { "epoch": 0.89, "grad_norm": 2.1697280406951904, "learning_rate": 6.731096522244929e-07, "loss": 0.4935, "step": 34570 }, { "epoch": 0.89, "grad_norm": 1.3699558973312378, "learning_rate": 6.728102671660841e-07, "loss": 0.6268, "step": 34571 }, { "epoch": 0.89, "grad_norm": 1.7226933240890503, "learning_rate": 6.725109463848078e-07, "loss": 0.5695, "step": 34572 }, { "epoch": 0.89, "grad_norm": 1.5931475162506104, "learning_rate": 6.722116898827246e-07, "loss": 0.5058, "step": 34573 }, { "epoch": 0.89, "grad_norm": 3.0135178565979004, "learning_rate": 6.719124976618984e-07, "loss": 0.5457, "step": 34574 }, { "epoch": 0.89, "grad_norm": 1.6437190771102905, "learning_rate": 6.716133697243921e-07, "loss": 0.5396, "step": 34575 }, { "epoch": 0.89, "grad_norm": 1.9389588832855225, "learning_rate": 6.713143060722638e-07, "loss": 0.4158, "step": 34576 }, { "epoch": 0.89, "grad_norm": 1.6815831661224365, "learning_rate": 6.710153067075775e-07, "loss": 0.5602, "step": 34577 }, { "epoch": 0.89, "grad_norm": 1.3847084045410156, "learning_rate": 6.707163716323939e-07, "loss": 0.5327, "step": 34578 }, { "epoch": 0.89, "grad_norm": 1.4508086442947388, "learning_rate": 6.704175008487701e-07, "loss": 0.7544, "step": 34579 }, { "epoch": 0.89, "grad_norm": 1.7623416185379028, "learning_rate": 6.701186943587667e-07, "loss": 0.5708, "step": 34580 }, { "epoch": 0.89, "grad_norm": 3.543065071105957, "learning_rate": 6.698199521644455e-07, "loss": 0.68, "step": 34581 }, { "epoch": 0.89, "grad_norm": 2.201669454574585, "learning_rate": 6.695212742678636e-07, "loss": 0.8522, "step": 34582 }, { "epoch": 0.89, "grad_norm": 1.3115595579147339, "learning_rate": 6.692226606710772e-07, "loss": 0.5224, "step": 34583 }, { "epoch": 0.89, "grad_norm": 6.698187351226807, "learning_rate": 6.68924111376148e-07, "loss": 0.6282, "step": 34584 }, { "epoch": 0.89, "grad_norm": 2.0310757160186768, "learning_rate": 6.68625626385131e-07, "loss": 0.5425, "step": 34585 }, { "epoch": 0.89, "grad_norm": 1.2625453472137451, "learning_rate": 6.683272057000823e-07, "loss": 0.5253, "step": 34586 }, { "epoch": 0.89, "grad_norm": 1.5060060024261475, "learning_rate": 6.680288493230614e-07, "loss": 0.4835, "step": 34587 }, { "epoch": 0.89, "grad_norm": 2.119903564453125, "learning_rate": 6.677305572561221e-07, "loss": 0.6812, "step": 34588 }, { "epoch": 0.89, "grad_norm": 1.6305482387542725, "learning_rate": 6.674323295013207e-07, "loss": 0.5674, "step": 34589 }, { "epoch": 0.89, "grad_norm": 1.375605821609497, "learning_rate": 6.671341660607112e-07, "loss": 0.5171, "step": 34590 }, { "epoch": 0.89, "grad_norm": 1.3809648752212524, "learning_rate": 6.668360669363516e-07, "loss": 0.6043, "step": 34591 }, { "epoch": 0.89, "grad_norm": 1.8507788181304932, "learning_rate": 6.665380321302939e-07, "loss": 0.4959, "step": 34592 }, { "epoch": 0.89, "grad_norm": 1.4346224069595337, "learning_rate": 6.662400616445907e-07, "loss": 0.5242, "step": 34593 }, { "epoch": 0.89, "grad_norm": 1.6768437623977661, "learning_rate": 6.659421554812984e-07, "loss": 0.6769, "step": 34594 }, { "epoch": 0.89, "grad_norm": 2.666327476501465, "learning_rate": 6.656443136424684e-07, "loss": 0.5114, "step": 34595 }, { "epoch": 0.89, "grad_norm": 1.791345238685608, "learning_rate": 6.653465361301526e-07, "loss": 0.6066, "step": 34596 }, { "epoch": 0.89, "grad_norm": 1.1831443309783936, "learning_rate": 6.650488229464036e-07, "loss": 0.5159, "step": 34597 }, { "epoch": 0.89, "grad_norm": 2.1073219776153564, "learning_rate": 6.647511740932744e-07, "loss": 0.5557, "step": 34598 }, { "epoch": 0.89, "grad_norm": 1.246504545211792, "learning_rate": 6.644535895728155e-07, "loss": 0.4057, "step": 34599 }, { "epoch": 0.89, "grad_norm": 14.660523414611816, "learning_rate": 6.641560693870752e-07, "loss": 0.5614, "step": 34600 }, { "epoch": 0.89, "grad_norm": 1.1397889852523804, "learning_rate": 6.638586135381076e-07, "loss": 0.6007, "step": 34601 }, { "epoch": 0.89, "grad_norm": 2.5047495365142822, "learning_rate": 6.635612220279608e-07, "loss": 0.4414, "step": 34602 }, { "epoch": 0.89, "grad_norm": 0.8400920629501343, "learning_rate": 6.632638948586845e-07, "loss": 0.4339, "step": 34603 }, { "epoch": 0.89, "grad_norm": 1.1506344079971313, "learning_rate": 6.629666320323258e-07, "loss": 0.4303, "step": 34604 }, { "epoch": 0.89, "grad_norm": 1.648579478263855, "learning_rate": 6.626694335509365e-07, "loss": 0.5111, "step": 34605 }, { "epoch": 0.89, "grad_norm": 1.6694133281707764, "learning_rate": 6.623722994165638e-07, "loss": 0.5956, "step": 34606 }, { "epoch": 0.89, "grad_norm": 7.198776721954346, "learning_rate": 6.620752296312527e-07, "loss": 0.4855, "step": 34607 }, { "epoch": 0.89, "grad_norm": 1.4583035707473755, "learning_rate": 6.617782241970538e-07, "loss": 0.4606, "step": 34608 }, { "epoch": 0.89, "grad_norm": 1.5824360847473145, "learning_rate": 6.614812831160134e-07, "loss": 0.5501, "step": 34609 }, { "epoch": 0.89, "grad_norm": 1.3431425094604492, "learning_rate": 6.61184406390174e-07, "loss": 0.6396, "step": 34610 }, { "epoch": 0.89, "grad_norm": 1.8021283149719238, "learning_rate": 6.608875940215876e-07, "loss": 0.5765, "step": 34611 }, { "epoch": 0.89, "grad_norm": 1.1625949144363403, "learning_rate": 6.605908460122956e-07, "loss": 0.3875, "step": 34612 }, { "epoch": 0.89, "grad_norm": 1.4236536026000977, "learning_rate": 6.602941623643433e-07, "loss": 0.5592, "step": 34613 }, { "epoch": 0.89, "grad_norm": 1.1386010646820068, "learning_rate": 6.599975430797756e-07, "loss": 0.5155, "step": 34614 }, { "epoch": 0.89, "grad_norm": 1.4830867052078247, "learning_rate": 6.597009881606376e-07, "loss": 0.602, "step": 34615 }, { "epoch": 0.89, "grad_norm": 1.2884962558746338, "learning_rate": 6.59404497608972e-07, "loss": 0.4429, "step": 34616 }, { "epoch": 0.89, "grad_norm": 1.1443240642547607, "learning_rate": 6.591080714268216e-07, "loss": 0.373, "step": 34617 }, { "epoch": 0.89, "grad_norm": 1.2351335287094116, "learning_rate": 6.588117096162305e-07, "loss": 0.4357, "step": 34618 }, { "epoch": 0.89, "grad_norm": 2.8239123821258545, "learning_rate": 6.585154121792414e-07, "loss": 0.4969, "step": 34619 }, { "epoch": 0.89, "grad_norm": 1.6252459287643433, "learning_rate": 6.582191791178949e-07, "loss": 0.4952, "step": 34620 }, { "epoch": 0.89, "grad_norm": 8.29043197631836, "learning_rate": 6.579230104342305e-07, "loss": 0.5595, "step": 34621 }, { "epoch": 0.89, "grad_norm": 2.045605182647705, "learning_rate": 6.576269061302931e-07, "loss": 0.587, "step": 34622 }, { "epoch": 0.89, "grad_norm": 1.5169037580490112, "learning_rate": 6.573308662081223e-07, "loss": 0.4799, "step": 34623 }, { "epoch": 0.89, "grad_norm": 2.0360307693481445, "learning_rate": 6.570348906697555e-07, "loss": 0.6374, "step": 34624 }, { "epoch": 0.89, "grad_norm": 3.7382400035858154, "learning_rate": 6.567389795172363e-07, "loss": 0.7276, "step": 34625 }, { "epoch": 0.89, "grad_norm": 0.8477600812911987, "learning_rate": 6.564431327526022e-07, "loss": 0.4299, "step": 34626 }, { "epoch": 0.89, "grad_norm": 1.7972424030303955, "learning_rate": 6.561473503778892e-07, "loss": 0.5516, "step": 34627 }, { "epoch": 0.89, "grad_norm": 2.281085252761841, "learning_rate": 6.558516323951414e-07, "loss": 0.6347, "step": 34628 }, { "epoch": 0.89, "grad_norm": 1.8593075275421143, "learning_rate": 6.555559788063925e-07, "loss": 0.5449, "step": 34629 }, { "epoch": 0.89, "grad_norm": 3.4189364910125732, "learning_rate": 6.552603896136811e-07, "loss": 0.5526, "step": 34630 }, { "epoch": 0.89, "grad_norm": 1.0193147659301758, "learning_rate": 6.549648648190421e-07, "loss": 0.4046, "step": 34631 }, { "epoch": 0.89, "grad_norm": 1.763418197631836, "learning_rate": 6.546694044245172e-07, "loss": 0.567, "step": 34632 }, { "epoch": 0.89, "grad_norm": 1.082336187362671, "learning_rate": 6.543740084321382e-07, "loss": 0.4956, "step": 34633 }, { "epoch": 0.89, "grad_norm": 1.893845796585083, "learning_rate": 6.540786768439411e-07, "loss": 0.5701, "step": 34634 }, { "epoch": 0.89, "grad_norm": 1.272697925567627, "learning_rate": 6.537834096619633e-07, "loss": 0.6267, "step": 34635 }, { "epoch": 0.89, "grad_norm": 5.857130527496338, "learning_rate": 6.534882068882386e-07, "loss": 0.4897, "step": 34636 }, { "epoch": 0.89, "grad_norm": 1.2335493564605713, "learning_rate": 6.531930685248012e-07, "loss": 0.3528, "step": 34637 }, { "epoch": 0.89, "grad_norm": 0.9869492650032043, "learning_rate": 6.528979945736825e-07, "loss": 0.5651, "step": 34638 }, { "epoch": 0.89, "grad_norm": 2.423691749572754, "learning_rate": 6.526029850369208e-07, "loss": 0.4325, "step": 34639 }, { "epoch": 0.89, "grad_norm": 1.8701372146606445, "learning_rate": 6.52308039916546e-07, "loss": 0.4777, "step": 34640 }, { "epoch": 0.89, "grad_norm": 10.155048370361328, "learning_rate": 6.520131592145907e-07, "loss": 0.5778, "step": 34641 }, { "epoch": 0.89, "grad_norm": 1.8637604713439941, "learning_rate": 6.517183429330886e-07, "loss": 0.7243, "step": 34642 }, { "epoch": 0.89, "grad_norm": 1.4346121549606323, "learning_rate": 6.514235910740718e-07, "loss": 0.4516, "step": 34643 }, { "epoch": 0.89, "grad_norm": 1.0050981044769287, "learning_rate": 6.511289036395685e-07, "loss": 0.5786, "step": 34644 }, { "epoch": 0.89, "grad_norm": 20.398969650268555, "learning_rate": 6.508342806316114e-07, "loss": 0.4089, "step": 34645 }, { "epoch": 0.89, "grad_norm": 1.2730973958969116, "learning_rate": 6.505397220522314e-07, "loss": 0.5175, "step": 34646 }, { "epoch": 0.89, "grad_norm": 1.4970885515213013, "learning_rate": 6.502452279034577e-07, "loss": 0.4922, "step": 34647 }, { "epoch": 0.89, "grad_norm": 1.825771689414978, "learning_rate": 6.499507981873177e-07, "loss": 0.6157, "step": 34648 }, { "epoch": 0.89, "grad_norm": 2.328418016433716, "learning_rate": 6.496564329058452e-07, "loss": 0.5297, "step": 34649 }, { "epoch": 0.89, "grad_norm": 1.7349125146865845, "learning_rate": 6.493621320610655e-07, "loss": 0.5491, "step": 34650 }, { "epoch": 0.89, "grad_norm": 2.0436906814575195, "learning_rate": 6.490678956550056e-07, "loss": 0.5409, "step": 34651 }, { "epoch": 0.89, "grad_norm": 1.8150701522827148, "learning_rate": 6.487737236896963e-07, "loss": 0.3853, "step": 34652 }, { "epoch": 0.89, "grad_norm": 1.6714755296707153, "learning_rate": 6.484796161671647e-07, "loss": 0.5062, "step": 34653 }, { "epoch": 0.89, "grad_norm": 1.755540370941162, "learning_rate": 6.481855730894349e-07, "loss": 0.4577, "step": 34654 }, { "epoch": 0.89, "grad_norm": 5.072505950927734, "learning_rate": 6.47891594458534e-07, "loss": 0.6462, "step": 34655 }, { "epoch": 0.89, "grad_norm": 1.556052803993225, "learning_rate": 6.475976802764905e-07, "loss": 0.6561, "step": 34656 }, { "epoch": 0.89, "grad_norm": 1.4644027948379517, "learning_rate": 6.473038305453272e-07, "loss": 0.4543, "step": 34657 }, { "epoch": 0.89, "grad_norm": 0.8666681051254272, "learning_rate": 6.470100452670691e-07, "loss": 0.4175, "step": 34658 }, { "epoch": 0.89, "grad_norm": 1.290586233139038, "learning_rate": 6.467163244437425e-07, "loss": 0.4611, "step": 34659 }, { "epoch": 0.89, "grad_norm": 2.7078683376312256, "learning_rate": 6.464226680773711e-07, "loss": 0.7058, "step": 34660 }, { "epoch": 0.89, "grad_norm": 1.5121690034866333, "learning_rate": 6.46129076169979e-07, "loss": 0.4981, "step": 34661 }, { "epoch": 0.89, "grad_norm": 1.3577613830566406, "learning_rate": 6.458355487235856e-07, "loss": 0.4285, "step": 34662 }, { "epoch": 0.89, "grad_norm": 1.4556596279144287, "learning_rate": 6.455420857402194e-07, "loss": 0.5587, "step": 34663 }, { "epoch": 0.89, "grad_norm": 0.906619131565094, "learning_rate": 6.452486872219e-07, "loss": 0.4363, "step": 34664 }, { "epoch": 0.89, "grad_norm": 1.460484266281128, "learning_rate": 6.449553531706476e-07, "loss": 0.3849, "step": 34665 }, { "epoch": 0.89, "grad_norm": 11.146050453186035, "learning_rate": 6.446620835884865e-07, "loss": 0.6379, "step": 34666 }, { "epoch": 0.89, "grad_norm": 15.485590934753418, "learning_rate": 6.443688784774371e-07, "loss": 0.7188, "step": 34667 }, { "epoch": 0.89, "grad_norm": 1.1092619895935059, "learning_rate": 6.440757378395179e-07, "loss": 0.4482, "step": 34668 }, { "epoch": 0.89, "grad_norm": 1.30272376537323, "learning_rate": 6.437826616767528e-07, "loss": 0.4354, "step": 34669 }, { "epoch": 0.89, "grad_norm": 5.29935359954834, "learning_rate": 6.434896499911592e-07, "loss": 0.4554, "step": 34670 }, { "epoch": 0.89, "grad_norm": 1.7812455892562866, "learning_rate": 6.431967027847563e-07, "loss": 0.55, "step": 34671 }, { "epoch": 0.89, "grad_norm": 5.187154293060303, "learning_rate": 6.429038200595627e-07, "loss": 0.6675, "step": 34672 }, { "epoch": 0.89, "grad_norm": 1.3940372467041016, "learning_rate": 6.426110018175979e-07, "loss": 0.5619, "step": 34673 }, { "epoch": 0.89, "grad_norm": 1.2015321254730225, "learning_rate": 6.423182480608792e-07, "loss": 0.5656, "step": 34674 }, { "epoch": 0.89, "grad_norm": 1.4993044137954712, "learning_rate": 6.420255587914226e-07, "loss": 0.5245, "step": 34675 }, { "epoch": 0.89, "grad_norm": 1.9470469951629639, "learning_rate": 6.417329340112477e-07, "loss": 0.5359, "step": 34676 }, { "epoch": 0.89, "grad_norm": 1.900006890296936, "learning_rate": 6.414403737223707e-07, "loss": 0.6411, "step": 34677 }, { "epoch": 0.89, "grad_norm": 1.565420150756836, "learning_rate": 6.411478779268077e-07, "loss": 0.4339, "step": 34678 }, { "epoch": 0.89, "grad_norm": 2.5520472526550293, "learning_rate": 6.408554466265704e-07, "loss": 0.5893, "step": 34679 }, { "epoch": 0.89, "grad_norm": 1.5826084613800049, "learning_rate": 6.405630798236806e-07, "loss": 0.7197, "step": 34680 }, { "epoch": 0.89, "grad_norm": 3.2487378120422363, "learning_rate": 6.402707775201489e-07, "loss": 0.5687, "step": 34681 }, { "epoch": 0.89, "grad_norm": 4.596944332122803, "learning_rate": 6.399785397179892e-07, "loss": 0.6033, "step": 34682 }, { "epoch": 0.89, "grad_norm": 2.4469289779663086, "learning_rate": 6.396863664192177e-07, "loss": 0.6682, "step": 34683 }, { "epoch": 0.89, "grad_norm": 1.2232811450958252, "learning_rate": 6.393942576258483e-07, "loss": 0.5083, "step": 34684 }, { "epoch": 0.89, "grad_norm": 0.9828789830207825, "learning_rate": 6.391022133398928e-07, "loss": 0.4834, "step": 34685 }, { "epoch": 0.89, "grad_norm": 3.014558792114258, "learning_rate": 6.388102335633617e-07, "loss": 0.548, "step": 34686 }, { "epoch": 0.89, "grad_norm": 1.2637014389038086, "learning_rate": 6.385183182982701e-07, "loss": 0.5345, "step": 34687 }, { "epoch": 0.89, "grad_norm": 1.04072105884552, "learning_rate": 6.382264675466299e-07, "loss": 0.3445, "step": 34688 }, { "epoch": 0.89, "grad_norm": 1.0014687776565552, "learning_rate": 6.379346813104492e-07, "loss": 0.5438, "step": 34689 }, { "epoch": 0.89, "grad_norm": 15.067426681518555, "learning_rate": 6.376429595917421e-07, "loss": 0.551, "step": 34690 }, { "epoch": 0.89, "grad_norm": 1.601408839225769, "learning_rate": 6.373513023925182e-07, "loss": 0.6264, "step": 34691 }, { "epoch": 0.89, "grad_norm": 1.4230481386184692, "learning_rate": 6.370597097147846e-07, "loss": 0.5336, "step": 34692 }, { "epoch": 0.89, "grad_norm": 1.1541348695755005, "learning_rate": 6.367681815605553e-07, "loss": 0.5165, "step": 34693 }, { "epoch": 0.89, "grad_norm": 3.6542654037475586, "learning_rate": 6.364767179318377e-07, "loss": 0.6373, "step": 34694 }, { "epoch": 0.89, "grad_norm": 4.218438625335693, "learning_rate": 6.361853188306389e-07, "loss": 0.5973, "step": 34695 }, { "epoch": 0.89, "grad_norm": 2.1232798099517822, "learning_rate": 6.358939842589673e-07, "loss": 0.5631, "step": 34696 }, { "epoch": 0.89, "grad_norm": 1.0242962837219238, "learning_rate": 6.356027142188315e-07, "loss": 0.4578, "step": 34697 }, { "epoch": 0.89, "grad_norm": 2.0021119117736816, "learning_rate": 6.353115087122396e-07, "loss": 0.6685, "step": 34698 }, { "epoch": 0.89, "grad_norm": 1.3146352767944336, "learning_rate": 6.350203677411959e-07, "loss": 0.4947, "step": 34699 }, { "epoch": 0.89, "grad_norm": 0.8424506187438965, "learning_rate": 6.347292913077085e-07, "loss": 0.4839, "step": 34700 }, { "epoch": 0.89, "grad_norm": 3.978710412979126, "learning_rate": 6.344382794137838e-07, "loss": 0.4874, "step": 34701 }, { "epoch": 0.89, "grad_norm": 2.041569709777832, "learning_rate": 6.341473320614267e-07, "loss": 0.5265, "step": 34702 }, { "epoch": 0.89, "grad_norm": 1.8222792148590088, "learning_rate": 6.3385644925264e-07, "loss": 0.6707, "step": 34703 }, { "epoch": 0.89, "grad_norm": 1.2598402500152588, "learning_rate": 6.335656309894322e-07, "loss": 0.4787, "step": 34704 }, { "epoch": 0.89, "grad_norm": 2.030449628829956, "learning_rate": 6.332748772738051e-07, "loss": 0.6514, "step": 34705 }, { "epoch": 0.89, "grad_norm": 1.3716293573379517, "learning_rate": 6.329841881077614e-07, "loss": 0.4023, "step": 34706 }, { "epoch": 0.89, "grad_norm": 1.5435281991958618, "learning_rate": 6.326935634933074e-07, "loss": 0.6033, "step": 34707 }, { "epoch": 0.89, "grad_norm": 0.9565860033035278, "learning_rate": 6.324030034324435e-07, "loss": 0.5351, "step": 34708 }, { "epoch": 0.89, "grad_norm": 1.4169514179229736, "learning_rate": 6.321125079271717e-07, "loss": 0.4357, "step": 34709 }, { "epoch": 0.89, "grad_norm": 1.2706387042999268, "learning_rate": 6.318220769794969e-07, "loss": 0.4833, "step": 34710 }, { "epoch": 0.89, "grad_norm": 1.3911854028701782, "learning_rate": 6.315317105914187e-07, "loss": 0.4596, "step": 34711 }, { "epoch": 0.89, "grad_norm": 1.9591318368911743, "learning_rate": 6.312414087649376e-07, "loss": 0.5831, "step": 34712 }, { "epoch": 0.89, "grad_norm": 1.1793179512023926, "learning_rate": 6.309511715020533e-07, "loss": 0.5402, "step": 34713 }, { "epoch": 0.89, "grad_norm": 1.2445327043533325, "learning_rate": 6.306609988047685e-07, "loss": 0.4849, "step": 34714 }, { "epoch": 0.89, "grad_norm": 1.6369402408599854, "learning_rate": 6.303708906750827e-07, "loss": 0.4626, "step": 34715 }, { "epoch": 0.89, "grad_norm": 1.0503636598587036, "learning_rate": 6.300808471149922e-07, "loss": 0.4425, "step": 34716 }, { "epoch": 0.89, "grad_norm": 1.8537225723266602, "learning_rate": 6.297908681264985e-07, "loss": 0.4811, "step": 34717 }, { "epoch": 0.89, "grad_norm": 2.3795416355133057, "learning_rate": 6.295009537116004e-07, "loss": 0.5844, "step": 34718 }, { "epoch": 0.89, "grad_norm": 7.032543182373047, "learning_rate": 6.292111038722937e-07, "loss": 0.559, "step": 34719 }, { "epoch": 0.89, "grad_norm": 5.413579940795898, "learning_rate": 6.289213186105758e-07, "loss": 0.6584, "step": 34720 }, { "epoch": 0.89, "grad_norm": 12.581756591796875, "learning_rate": 6.286315979284463e-07, "loss": 0.5368, "step": 34721 }, { "epoch": 0.89, "grad_norm": 0.9900023341178894, "learning_rate": 6.283419418278991e-07, "loss": 0.531, "step": 34722 }, { "epoch": 0.89, "grad_norm": 1.7018029689788818, "learning_rate": 6.280523503109315e-07, "loss": 0.6, "step": 34723 }, { "epoch": 0.89, "grad_norm": 1.6917188167572021, "learning_rate": 6.277628233795397e-07, "loss": 0.5692, "step": 34724 }, { "epoch": 0.89, "grad_norm": 1.2253775596618652, "learning_rate": 6.274733610357175e-07, "loss": 0.3278, "step": 34725 }, { "epoch": 0.89, "grad_norm": 1.3114864826202393, "learning_rate": 6.271839632814614e-07, "loss": 0.6175, "step": 34726 }, { "epoch": 0.89, "grad_norm": 3.3114302158355713, "learning_rate": 6.26894630118764e-07, "loss": 0.5772, "step": 34727 }, { "epoch": 0.89, "grad_norm": 0.8694698214530945, "learning_rate": 6.266053615496203e-07, "loss": 0.4938, "step": 34728 }, { "epoch": 0.89, "grad_norm": 1.6710050106048584, "learning_rate": 6.263161575760246e-07, "loss": 0.4282, "step": 34729 }, { "epoch": 0.89, "grad_norm": 1.1277189254760742, "learning_rate": 6.260270181999661e-07, "loss": 0.4377, "step": 34730 }, { "epoch": 0.89, "grad_norm": 1.3984850645065308, "learning_rate": 6.257379434234422e-07, "loss": 0.5007, "step": 34731 }, { "epoch": 0.89, "grad_norm": 1.2625236511230469, "learning_rate": 6.254489332484426e-07, "loss": 0.4517, "step": 34732 }, { "epoch": 0.89, "grad_norm": 1.51841139793396, "learning_rate": 6.251599876769588e-07, "loss": 0.5402, "step": 34733 }, { "epoch": 0.89, "grad_norm": 1.1127855777740479, "learning_rate": 6.248711067109825e-07, "loss": 0.501, "step": 34734 }, { "epoch": 0.89, "grad_norm": 2.937901258468628, "learning_rate": 6.245822903525056e-07, "loss": 0.4927, "step": 34735 }, { "epoch": 0.89, "grad_norm": 2.903848886489868, "learning_rate": 6.242935386035176e-07, "loss": 0.6671, "step": 34736 }, { "epoch": 0.89, "grad_norm": 7.395209789276123, "learning_rate": 6.240048514660058e-07, "loss": 0.7134, "step": 34737 }, { "epoch": 0.89, "grad_norm": 1.8258439302444458, "learning_rate": 6.23716228941964e-07, "loss": 0.413, "step": 34738 }, { "epoch": 0.89, "grad_norm": 1.3770233392715454, "learning_rate": 6.234276710333786e-07, "loss": 0.5055, "step": 34739 }, { "epoch": 0.89, "grad_norm": 1.8350309133529663, "learning_rate": 6.23139177742238e-07, "loss": 0.5111, "step": 34740 }, { "epoch": 0.89, "grad_norm": 1.385597586631775, "learning_rate": 6.228507490705315e-07, "loss": 0.577, "step": 34741 }, { "epoch": 0.89, "grad_norm": 1.6700654029846191, "learning_rate": 6.225623850202466e-07, "loss": 0.5749, "step": 34742 }, { "epoch": 0.89, "grad_norm": 2.3245015144348145, "learning_rate": 6.222740855933717e-07, "loss": 0.5188, "step": 34743 }, { "epoch": 0.89, "grad_norm": 1.3581877946853638, "learning_rate": 6.219858507918897e-07, "loss": 0.5673, "step": 34744 }, { "epoch": 0.89, "grad_norm": 2.1874189376831055, "learning_rate": 6.216976806177899e-07, "loss": 0.3858, "step": 34745 }, { "epoch": 0.89, "grad_norm": 1.8416534662246704, "learning_rate": 6.214095750730587e-07, "loss": 0.5764, "step": 34746 }, { "epoch": 0.89, "grad_norm": 2.100524425506592, "learning_rate": 6.211215341596788e-07, "loss": 0.5759, "step": 34747 }, { "epoch": 0.89, "grad_norm": 1.3283634185791016, "learning_rate": 6.208335578796388e-07, "loss": 0.5813, "step": 34748 }, { "epoch": 0.89, "grad_norm": 1.5023143291473389, "learning_rate": 6.205456462349213e-07, "loss": 0.4693, "step": 34749 }, { "epoch": 0.89, "grad_norm": 1.285183310508728, "learning_rate": 6.202577992275094e-07, "loss": 0.5542, "step": 34750 }, { "epoch": 0.89, "grad_norm": 0.9202457070350647, "learning_rate": 6.19970016859387e-07, "loss": 0.4757, "step": 34751 }, { "epoch": 0.89, "grad_norm": 5.735982418060303, "learning_rate": 6.196822991325401e-07, "loss": 0.602, "step": 34752 }, { "epoch": 0.89, "grad_norm": 2.568702459335327, "learning_rate": 6.193946460489486e-07, "loss": 0.5983, "step": 34753 }, { "epoch": 0.89, "grad_norm": 1.901307225227356, "learning_rate": 6.19107057610595e-07, "loss": 0.5479, "step": 34754 }, { "epoch": 0.89, "grad_norm": 3.4055254459381104, "learning_rate": 6.188195338194635e-07, "loss": 0.6294, "step": 34755 }, { "epoch": 0.89, "grad_norm": 1.51276433467865, "learning_rate": 6.185320746775336e-07, "loss": 0.4737, "step": 34756 }, { "epoch": 0.89, "grad_norm": 1.098331332206726, "learning_rate": 6.182446801867848e-07, "loss": 0.4769, "step": 34757 }, { "epoch": 0.89, "grad_norm": 1.509312391281128, "learning_rate": 6.179573503492009e-07, "loss": 0.6961, "step": 34758 }, { "epoch": 0.89, "grad_norm": 1.2711374759674072, "learning_rate": 6.176700851667617e-07, "loss": 0.4747, "step": 34759 }, { "epoch": 0.89, "grad_norm": 1.5640456676483154, "learning_rate": 6.173828846414443e-07, "loss": 0.4127, "step": 34760 }, { "epoch": 0.89, "grad_norm": 1.1563791036605835, "learning_rate": 6.170957487752283e-07, "loss": 0.4344, "step": 34761 }, { "epoch": 0.89, "grad_norm": 1.2510803937911987, "learning_rate": 6.168086775700954e-07, "loss": 0.4519, "step": 34762 }, { "epoch": 0.89, "grad_norm": 1.6035746335983276, "learning_rate": 6.16521671028022e-07, "loss": 0.5777, "step": 34763 }, { "epoch": 0.89, "grad_norm": 1.64314603805542, "learning_rate": 6.162347291509841e-07, "loss": 0.6674, "step": 34764 }, { "epoch": 0.89, "grad_norm": 0.8496870398521423, "learning_rate": 6.159478519409623e-07, "loss": 0.2686, "step": 34765 }, { "epoch": 0.89, "grad_norm": 1.7069371938705444, "learning_rate": 6.15661039399933e-07, "loss": 0.5677, "step": 34766 }, { "epoch": 0.89, "grad_norm": 1.7946503162384033, "learning_rate": 6.153742915298711e-07, "loss": 0.5062, "step": 34767 }, { "epoch": 0.89, "grad_norm": 1.1063529253005981, "learning_rate": 6.150876083327529e-07, "loss": 0.5109, "step": 34768 }, { "epoch": 0.89, "grad_norm": 1.306787371635437, "learning_rate": 6.148009898105556e-07, "loss": 0.6634, "step": 34769 }, { "epoch": 0.89, "grad_norm": 1.1240673065185547, "learning_rate": 6.145144359652533e-07, "loss": 0.3283, "step": 34770 }, { "epoch": 0.89, "grad_norm": 0.8938019275665283, "learning_rate": 6.142279467988222e-07, "loss": 0.3475, "step": 34771 }, { "epoch": 0.89, "grad_norm": 1.889430046081543, "learning_rate": 6.139415223132339e-07, "loss": 0.5006, "step": 34772 }, { "epoch": 0.89, "grad_norm": 5.506486415863037, "learning_rate": 6.136551625104648e-07, "loss": 0.6164, "step": 34773 }, { "epoch": 0.89, "grad_norm": 1.182413101196289, "learning_rate": 6.133688673924854e-07, "loss": 0.496, "step": 34774 }, { "epoch": 0.89, "grad_norm": 1.9268405437469482, "learning_rate": 6.130826369612719e-07, "loss": 0.59, "step": 34775 }, { "epoch": 0.89, "grad_norm": 1.6277811527252197, "learning_rate": 6.127964712187951e-07, "loss": 0.6929, "step": 34776 }, { "epoch": 0.89, "grad_norm": 1.1781508922576904, "learning_rate": 6.125103701670265e-07, "loss": 0.5174, "step": 34777 }, { "epoch": 0.89, "grad_norm": 1.5967233180999756, "learning_rate": 6.122243338079404e-07, "loss": 0.4175, "step": 34778 }, { "epoch": 0.89, "grad_norm": 1.1304950714111328, "learning_rate": 6.119383621435049e-07, "loss": 0.5606, "step": 34779 }, { "epoch": 0.89, "grad_norm": 1.0964820384979248, "learning_rate": 6.116524551756931e-07, "loss": 0.5691, "step": 34780 }, { "epoch": 0.89, "grad_norm": 1.5976247787475586, "learning_rate": 6.113666129064733e-07, "loss": 0.4978, "step": 34781 }, { "epoch": 0.89, "grad_norm": 1.6792576313018799, "learning_rate": 6.110808353378161e-07, "loss": 0.647, "step": 34782 }, { "epoch": 0.89, "grad_norm": 1.8910322189331055, "learning_rate": 6.107951224716923e-07, "loss": 0.4134, "step": 34783 }, { "epoch": 0.89, "grad_norm": 1.3520488739013672, "learning_rate": 6.105094743100681e-07, "loss": 0.4845, "step": 34784 }, { "epoch": 0.89, "grad_norm": 16.045379638671875, "learning_rate": 6.102238908549152e-07, "loss": 0.6511, "step": 34785 }, { "epoch": 0.89, "grad_norm": 0.9841450452804565, "learning_rate": 6.099383721081987e-07, "loss": 0.3277, "step": 34786 }, { "epoch": 0.89, "grad_norm": 1.0257505178451538, "learning_rate": 6.096529180718869e-07, "loss": 0.4261, "step": 34787 }, { "epoch": 0.89, "grad_norm": 2.0497608184814453, "learning_rate": 6.093675287479495e-07, "loss": 0.5944, "step": 34788 }, { "epoch": 0.89, "grad_norm": 2.4875097274780273, "learning_rate": 6.090822041383516e-07, "loss": 0.3879, "step": 34789 }, { "epoch": 0.89, "grad_norm": 1.3468073606491089, "learning_rate": 6.087969442450581e-07, "loss": 0.5601, "step": 34790 }, { "epoch": 0.89, "grad_norm": 2.344627618789673, "learning_rate": 6.085117490700343e-07, "loss": 0.5719, "step": 34791 }, { "epoch": 0.89, "grad_norm": 1.1950037479400635, "learning_rate": 6.082266186152496e-07, "loss": 0.4863, "step": 34792 }, { "epoch": 0.89, "grad_norm": 1.2013685703277588, "learning_rate": 6.07941552882666e-07, "loss": 0.4968, "step": 34793 }, { "epoch": 0.89, "grad_norm": 1.7698007822036743, "learning_rate": 6.076565518742472e-07, "loss": 0.5928, "step": 34794 }, { "epoch": 0.89, "grad_norm": 1.1560943126678467, "learning_rate": 6.073716155919607e-07, "loss": 0.5965, "step": 34795 }, { "epoch": 0.89, "grad_norm": 2.2370152473449707, "learning_rate": 6.07086744037767e-07, "loss": 0.487, "step": 34796 }, { "epoch": 0.89, "grad_norm": 1.3476346731185913, "learning_rate": 6.068019372136313e-07, "loss": 0.4983, "step": 34797 }, { "epoch": 0.89, "grad_norm": 1.3090581893920898, "learning_rate": 6.065171951215132e-07, "loss": 0.4835, "step": 34798 }, { "epoch": 0.89, "grad_norm": 8.37944221496582, "learning_rate": 6.062325177633787e-07, "loss": 0.517, "step": 34799 }, { "epoch": 0.89, "grad_norm": 3.2396538257598877, "learning_rate": 6.059479051411876e-07, "loss": 0.6424, "step": 34800 }, { "epoch": 0.89, "grad_norm": 1.2721483707427979, "learning_rate": 6.056633572569004e-07, "loss": 0.403, "step": 34801 }, { "epoch": 0.89, "grad_norm": 5.682457447052002, "learning_rate": 6.0537887411248e-07, "loss": 0.4994, "step": 34802 }, { "epoch": 0.89, "grad_norm": 3.301661729812622, "learning_rate": 6.05094455709887e-07, "loss": 0.6857, "step": 34803 }, { "epoch": 0.89, "grad_norm": 1.2216668128967285, "learning_rate": 6.048101020510789e-07, "loss": 0.473, "step": 34804 }, { "epoch": 0.89, "grad_norm": 1.4044736623764038, "learning_rate": 6.045258131380182e-07, "loss": 0.5186, "step": 34805 }, { "epoch": 0.89, "grad_norm": 4.850429534912109, "learning_rate": 6.042415889726627e-07, "loss": 0.4904, "step": 34806 }, { "epoch": 0.89, "grad_norm": 3.4101877212524414, "learning_rate": 6.039574295569717e-07, "loss": 0.5512, "step": 34807 }, { "epoch": 0.89, "grad_norm": 1.6921215057373047, "learning_rate": 6.036733348929013e-07, "loss": 0.4834, "step": 34808 }, { "epoch": 0.89, "grad_norm": 1.1498197317123413, "learning_rate": 6.033893049824124e-07, "loss": 0.4939, "step": 34809 }, { "epoch": 0.89, "grad_norm": 1.1403027772903442, "learning_rate": 6.0310533982746e-07, "loss": 0.4989, "step": 34810 }, { "epoch": 0.89, "grad_norm": 1.3710414171218872, "learning_rate": 6.028214394300014e-07, "loss": 0.5189, "step": 34811 }, { "epoch": 0.89, "grad_norm": 8.472713470458984, "learning_rate": 6.02537603791995e-07, "loss": 0.6267, "step": 34812 }, { "epoch": 0.89, "grad_norm": 1.652599811553955, "learning_rate": 6.022538329153948e-07, "loss": 0.5456, "step": 34813 }, { "epoch": 0.89, "grad_norm": 1.4085743427276611, "learning_rate": 6.019701268021572e-07, "loss": 0.4179, "step": 34814 }, { "epoch": 0.89, "grad_norm": 1.234470009803772, "learning_rate": 6.016864854542359e-07, "loss": 0.5075, "step": 34815 }, { "epoch": 0.89, "grad_norm": 6.846564292907715, "learning_rate": 6.014029088735884e-07, "loss": 0.5474, "step": 34816 }, { "epoch": 0.89, "grad_norm": 8.106064796447754, "learning_rate": 6.011193970621665e-07, "loss": 0.6493, "step": 34817 }, { "epoch": 0.89, "grad_norm": 0.7716891765594482, "learning_rate": 6.008359500219241e-07, "loss": 0.5071, "step": 34818 }, { "epoch": 0.89, "grad_norm": 1.2358975410461426, "learning_rate": 6.005525677548163e-07, "loss": 0.5056, "step": 34819 }, { "epoch": 0.89, "grad_norm": 1.3033275604248047, "learning_rate": 6.002692502627949e-07, "loss": 0.4867, "step": 34820 }, { "epoch": 0.89, "grad_norm": 1.3829519748687744, "learning_rate": 5.999859975478117e-07, "loss": 0.5329, "step": 34821 }, { "epoch": 0.89, "grad_norm": 1.8189188241958618, "learning_rate": 5.997028096118184e-07, "loss": 0.6566, "step": 34822 }, { "epoch": 0.89, "grad_norm": 1.7218369245529175, "learning_rate": 5.994196864567692e-07, "loss": 0.6517, "step": 34823 }, { "epoch": 0.89, "grad_norm": 1.185063362121582, "learning_rate": 5.991366280846134e-07, "loss": 0.4267, "step": 34824 }, { "epoch": 0.89, "grad_norm": 5.406190872192383, "learning_rate": 5.988536344972995e-07, "loss": 0.6235, "step": 34825 }, { "epoch": 0.89, "grad_norm": 1.4441144466400146, "learning_rate": 5.985707056967827e-07, "loss": 0.5343, "step": 34826 }, { "epoch": 0.89, "grad_norm": 1.4905680418014526, "learning_rate": 5.982878416850091e-07, "loss": 0.476, "step": 34827 }, { "epoch": 0.89, "grad_norm": 1.4698125123977661, "learning_rate": 5.980050424639283e-07, "loss": 0.5067, "step": 34828 }, { "epoch": 0.89, "grad_norm": 1.318824291229248, "learning_rate": 5.97722308035491e-07, "loss": 0.5434, "step": 34829 }, { "epoch": 0.89, "grad_norm": 0.9275297522544861, "learning_rate": 5.974396384016434e-07, "loss": 0.343, "step": 34830 }, { "epoch": 0.89, "grad_norm": 1.6550531387329102, "learning_rate": 5.971570335643362e-07, "loss": 0.6089, "step": 34831 }, { "epoch": 0.89, "grad_norm": 1.533425211906433, "learning_rate": 5.968744935255133e-07, "loss": 0.4531, "step": 34832 }, { "epoch": 0.89, "grad_norm": 1.7293989658355713, "learning_rate": 5.965920182871244e-07, "loss": 0.5504, "step": 34833 }, { "epoch": 0.89, "grad_norm": 1.1747078895568848, "learning_rate": 5.963096078511166e-07, "loss": 0.5239, "step": 34834 }, { "epoch": 0.89, "grad_norm": 1.4141699075698853, "learning_rate": 5.96027262219433e-07, "loss": 0.4939, "step": 34835 }, { "epoch": 0.89, "grad_norm": 1.6137555837631226, "learning_rate": 5.957449813940219e-07, "loss": 0.2987, "step": 34836 }, { "epoch": 0.89, "grad_norm": 1.257040023803711, "learning_rate": 5.954627653768297e-07, "loss": 0.5053, "step": 34837 }, { "epoch": 0.89, "grad_norm": 1.8725279569625854, "learning_rate": 5.951806141697991e-07, "loss": 0.3195, "step": 34838 }, { "epoch": 0.89, "grad_norm": 3.9707372188568115, "learning_rate": 5.948985277748731e-07, "loss": 0.7581, "step": 34839 }, { "epoch": 0.89, "grad_norm": 5.551615238189697, "learning_rate": 5.946165061939991e-07, "loss": 0.4334, "step": 34840 }, { "epoch": 0.89, "grad_norm": 1.8102983236312866, "learning_rate": 5.943345494291197e-07, "loss": 0.6001, "step": 34841 }, { "epoch": 0.89, "grad_norm": 1.4395335912704468, "learning_rate": 5.940526574821759e-07, "loss": 0.5131, "step": 34842 }, { "epoch": 0.89, "grad_norm": 1.4396631717681885, "learning_rate": 5.937708303551126e-07, "loss": 0.4851, "step": 34843 }, { "epoch": 0.89, "grad_norm": 1.821810245513916, "learning_rate": 5.934890680498717e-07, "loss": 0.4618, "step": 34844 }, { "epoch": 0.89, "grad_norm": 9.570762634277344, "learning_rate": 5.932073705683939e-07, "loss": 0.5761, "step": 34845 }, { "epoch": 0.89, "grad_norm": 1.2278683185577393, "learning_rate": 5.929257379126196e-07, "loss": 0.5201, "step": 34846 }, { "epoch": 0.89, "grad_norm": 1.0616470575332642, "learning_rate": 5.926441700844931e-07, "loss": 0.4735, "step": 34847 }, { "epoch": 0.89, "grad_norm": 1.5896960496902466, "learning_rate": 5.923626670859516e-07, "loss": 0.6219, "step": 34848 }, { "epoch": 0.89, "grad_norm": 12.033650398254395, "learning_rate": 5.92081228918936e-07, "loss": 0.5211, "step": 34849 }, { "epoch": 0.89, "grad_norm": 2.375669479370117, "learning_rate": 5.917998555853866e-07, "loss": 0.4222, "step": 34850 }, { "epoch": 0.89, "grad_norm": 1.4496657848358154, "learning_rate": 5.91518547087242e-07, "loss": 0.4807, "step": 34851 }, { "epoch": 0.89, "grad_norm": 2.965021848678589, "learning_rate": 5.912373034264385e-07, "loss": 0.4502, "step": 34852 }, { "epoch": 0.89, "grad_norm": 1.2792490720748901, "learning_rate": 5.909561246049189e-07, "loss": 0.4706, "step": 34853 }, { "epoch": 0.89, "grad_norm": 1.2167271375656128, "learning_rate": 5.906750106246173e-07, "loss": 0.4156, "step": 34854 }, { "epoch": 0.89, "grad_norm": 1.6824116706848145, "learning_rate": 5.903939614874721e-07, "loss": 0.5511, "step": 34855 }, { "epoch": 0.89, "grad_norm": 2.796600818634033, "learning_rate": 5.901129771954195e-07, "loss": 0.5081, "step": 34856 }, { "epoch": 0.89, "grad_norm": 4.742642402648926, "learning_rate": 5.898320577503968e-07, "loss": 0.5505, "step": 34857 }, { "epoch": 0.89, "grad_norm": 1.1664137840270996, "learning_rate": 5.895512031543405e-07, "loss": 0.5198, "step": 34858 }, { "epoch": 0.89, "grad_norm": 3.540128707885742, "learning_rate": 5.892704134091831e-07, "loss": 0.5019, "step": 34859 }, { "epoch": 0.89, "grad_norm": 1.0511083602905273, "learning_rate": 5.889896885168634e-07, "loss": 0.4751, "step": 34860 }, { "epoch": 0.89, "grad_norm": 1.239500641822815, "learning_rate": 5.88709028479314e-07, "loss": 0.5424, "step": 34861 }, { "epoch": 0.89, "grad_norm": 1.1570771932601929, "learning_rate": 5.884284332984691e-07, "loss": 0.4628, "step": 34862 }, { "epoch": 0.89, "grad_norm": 5.061130523681641, "learning_rate": 5.881479029762605e-07, "loss": 0.5947, "step": 34863 }, { "epoch": 0.89, "grad_norm": 0.9600832462310791, "learning_rate": 5.878674375146254e-07, "loss": 0.5074, "step": 34864 }, { "epoch": 0.89, "grad_norm": 1.412832498550415, "learning_rate": 5.875870369154946e-07, "loss": 0.487, "step": 34865 }, { "epoch": 0.89, "grad_norm": 5.617006778717041, "learning_rate": 5.873067011807999e-07, "loss": 0.385, "step": 34866 }, { "epoch": 0.89, "grad_norm": 1.3971372842788696, "learning_rate": 5.87026430312474e-07, "loss": 0.7321, "step": 34867 }, { "epoch": 0.89, "grad_norm": 1.9431769847869873, "learning_rate": 5.867462243124489e-07, "loss": 0.612, "step": 34868 }, { "epoch": 0.89, "grad_norm": 1.9082484245300293, "learning_rate": 5.864660831826529e-07, "loss": 0.5881, "step": 34869 }, { "epoch": 0.89, "grad_norm": 3.725902795791626, "learning_rate": 5.861860069250203e-07, "loss": 0.6131, "step": 34870 }, { "epoch": 0.89, "grad_norm": 1.647434115409851, "learning_rate": 5.859059955414792e-07, "loss": 0.5765, "step": 34871 }, { "epoch": 0.89, "grad_norm": 1.080723524093628, "learning_rate": 5.856260490339593e-07, "loss": 0.3715, "step": 34872 }, { "epoch": 0.89, "grad_norm": 1.6224915981292725, "learning_rate": 5.853461674043892e-07, "loss": 0.4471, "step": 34873 }, { "epoch": 0.89, "grad_norm": 1.866047739982605, "learning_rate": 5.850663506546994e-07, "loss": 0.4981, "step": 34874 }, { "epoch": 0.89, "grad_norm": 1.6052669286727905, "learning_rate": 5.847865987868174e-07, "loss": 0.4868, "step": 34875 }, { "epoch": 0.89, "grad_norm": 1.2881731986999512, "learning_rate": 5.845069118026691e-07, "loss": 0.5827, "step": 34876 }, { "epoch": 0.89, "grad_norm": 3.886887788772583, "learning_rate": 5.842272897041857e-07, "loss": 0.6667, "step": 34877 }, { "epoch": 0.89, "grad_norm": 1.6315597295761108, "learning_rate": 5.839477324932918e-07, "loss": 0.6365, "step": 34878 }, { "epoch": 0.89, "grad_norm": 1.3742649555206299, "learning_rate": 5.836682401719152e-07, "loss": 0.3582, "step": 34879 }, { "epoch": 0.89, "grad_norm": 1.562919020652771, "learning_rate": 5.833888127419785e-07, "loss": 0.6779, "step": 34880 }, { "epoch": 0.89, "grad_norm": 1.3002794981002808, "learning_rate": 5.831094502054124e-07, "loss": 0.3745, "step": 34881 }, { "epoch": 0.89, "grad_norm": 1.2524281740188599, "learning_rate": 5.828301525641389e-07, "loss": 0.4441, "step": 34882 }, { "epoch": 0.89, "grad_norm": 1.099241852760315, "learning_rate": 5.82550919820083e-07, "loss": 0.4239, "step": 34883 }, { "epoch": 0.89, "grad_norm": 1.5896562337875366, "learning_rate": 5.822717519751709e-07, "loss": 0.5433, "step": 34884 }, { "epoch": 0.89, "grad_norm": 0.8894679546356201, "learning_rate": 5.819926490313243e-07, "loss": 0.4617, "step": 34885 }, { "epoch": 0.89, "grad_norm": 13.816229820251465, "learning_rate": 5.817136109904687e-07, "loss": 0.6884, "step": 34886 }, { "epoch": 0.89, "grad_norm": 1.1555061340332031, "learning_rate": 5.814346378545233e-07, "loss": 0.3616, "step": 34887 }, { "epoch": 0.89, "grad_norm": 1.133874535560608, "learning_rate": 5.811557296254145e-07, "loss": 0.511, "step": 34888 }, { "epoch": 0.89, "grad_norm": 1.4970147609710693, "learning_rate": 5.808768863050629e-07, "loss": 0.4398, "step": 34889 }, { "epoch": 0.89, "grad_norm": 1.4068636894226074, "learning_rate": 5.805981078953893e-07, "loss": 0.337, "step": 34890 }, { "epoch": 0.89, "grad_norm": 2.219766855239868, "learning_rate": 5.803193943983165e-07, "loss": 0.5479, "step": 34891 }, { "epoch": 0.89, "grad_norm": 1.6484824419021606, "learning_rate": 5.800407458157641e-07, "loss": 0.4995, "step": 34892 }, { "epoch": 0.89, "grad_norm": 1.1135717630386353, "learning_rate": 5.797621621496519e-07, "loss": 0.4113, "step": 34893 }, { "epoch": 0.89, "grad_norm": 1.3360692262649536, "learning_rate": 5.794836434019024e-07, "loss": 0.7157, "step": 34894 }, { "epoch": 0.89, "grad_norm": 1.5049127340316772, "learning_rate": 5.792051895744322e-07, "loss": 0.5732, "step": 34895 }, { "epoch": 0.89, "grad_norm": 1.3750675916671753, "learning_rate": 5.789268006691617e-07, "loss": 0.4292, "step": 34896 }, { "epoch": 0.89, "grad_norm": 1.2597301006317139, "learning_rate": 5.786484766880074e-07, "loss": 0.4997, "step": 34897 }, { "epoch": 0.89, "grad_norm": 2.2413294315338135, "learning_rate": 5.783702176328898e-07, "loss": 0.5356, "step": 34898 }, { "epoch": 0.89, "grad_norm": 1.9777138233184814, "learning_rate": 5.780920235057252e-07, "loss": 0.6002, "step": 34899 }, { "epoch": 0.89, "grad_norm": 2.030661106109619, "learning_rate": 5.778138943084299e-07, "loss": 0.6282, "step": 34900 }, { "epoch": 0.89, "grad_norm": 1.0577713251113892, "learning_rate": 5.775358300429224e-07, "loss": 0.4766, "step": 34901 }, { "epoch": 0.89, "grad_norm": 1.3661471605300903, "learning_rate": 5.772578307111188e-07, "loss": 0.3488, "step": 34902 }, { "epoch": 0.89, "grad_norm": 2.2932047843933105, "learning_rate": 5.769798963149342e-07, "loss": 0.5757, "step": 34903 }, { "epoch": 0.89, "grad_norm": 1.2255419492721558, "learning_rate": 5.767020268562828e-07, "loss": 0.5544, "step": 34904 }, { "epoch": 0.89, "grad_norm": 2.3980157375335693, "learning_rate": 5.764242223370819e-07, "loss": 0.4198, "step": 34905 }, { "epoch": 0.89, "grad_norm": 1.0933367013931274, "learning_rate": 5.761464827592455e-07, "loss": 0.4585, "step": 34906 }, { "epoch": 0.89, "grad_norm": 1.23659086227417, "learning_rate": 5.758688081246844e-07, "loss": 0.5214, "step": 34907 }, { "epoch": 0.89, "grad_norm": 2.102835178375244, "learning_rate": 5.755911984353169e-07, "loss": 0.8342, "step": 34908 }, { "epoch": 0.89, "grad_norm": 1.832221269607544, "learning_rate": 5.753136536930537e-07, "loss": 0.5764, "step": 34909 }, { "epoch": 0.89, "grad_norm": 3.203070640563965, "learning_rate": 5.750361738998056e-07, "loss": 0.5528, "step": 34910 }, { "epoch": 0.89, "grad_norm": 1.4728487730026245, "learning_rate": 5.747587590574888e-07, "loss": 0.4148, "step": 34911 }, { "epoch": 0.89, "grad_norm": 1.1374669075012207, "learning_rate": 5.744814091680129e-07, "loss": 0.4504, "step": 34912 }, { "epoch": 0.89, "grad_norm": 7.405977249145508, "learning_rate": 5.742041242332897e-07, "loss": 0.5997, "step": 34913 }, { "epoch": 0.89, "grad_norm": 1.1145817041397095, "learning_rate": 5.739269042552275e-07, "loss": 0.4317, "step": 34914 }, { "epoch": 0.89, "grad_norm": 2.1072866916656494, "learning_rate": 5.736497492357418e-07, "loss": 0.4602, "step": 34915 }, { "epoch": 0.89, "grad_norm": 1.4421147108078003, "learning_rate": 5.733726591767385e-07, "loss": 0.4843, "step": 34916 }, { "epoch": 0.89, "grad_norm": 1.257918119430542, "learning_rate": 5.730956340801286e-07, "loss": 0.5374, "step": 34917 }, { "epoch": 0.89, "grad_norm": 4.763691425323486, "learning_rate": 5.728186739478214e-07, "loss": 0.5504, "step": 34918 }, { "epoch": 0.89, "grad_norm": 0.8421186208724976, "learning_rate": 5.725417787817256e-07, "loss": 0.3665, "step": 34919 }, { "epoch": 0.9, "grad_norm": 1.5389333963394165, "learning_rate": 5.722649485837483e-07, "loss": 0.577, "step": 34920 }, { "epoch": 0.9, "grad_norm": 1.1510813236236572, "learning_rate": 5.719881833557973e-07, "loss": 0.5019, "step": 34921 }, { "epoch": 0.9, "grad_norm": 7.646809101104736, "learning_rate": 5.717114830997805e-07, "loss": 0.6534, "step": 34922 }, { "epoch": 0.9, "grad_norm": 1.3099638223648071, "learning_rate": 5.714348478176057e-07, "loss": 0.5065, "step": 34923 }, { "epoch": 0.9, "grad_norm": 2.1041276454925537, "learning_rate": 5.711582775111769e-07, "loss": 0.5778, "step": 34924 }, { "epoch": 0.9, "grad_norm": 1.6505025625228882, "learning_rate": 5.708817721824034e-07, "loss": 0.4978, "step": 34925 }, { "epoch": 0.9, "grad_norm": 1.504480004310608, "learning_rate": 5.706053318331884e-07, "loss": 0.6855, "step": 34926 }, { "epoch": 0.9, "grad_norm": 1.2577325105667114, "learning_rate": 5.703289564654368e-07, "loss": 0.4441, "step": 34927 }, { "epoch": 0.9, "grad_norm": 1.7970322370529175, "learning_rate": 5.700526460810529e-07, "loss": 0.3838, "step": 34928 }, { "epoch": 0.9, "grad_norm": 1.5975191593170166, "learning_rate": 5.697764006819428e-07, "loss": 0.457, "step": 34929 }, { "epoch": 0.9, "grad_norm": 1.3527023792266846, "learning_rate": 5.695002202700095e-07, "loss": 0.5581, "step": 34930 }, { "epoch": 0.9, "grad_norm": 5.560457229614258, "learning_rate": 5.692241048471536e-07, "loss": 0.5305, "step": 34931 }, { "epoch": 0.9, "grad_norm": 4.260491371154785, "learning_rate": 5.689480544152826e-07, "loss": 0.5615, "step": 34932 }, { "epoch": 0.9, "grad_norm": 1.7497305870056152, "learning_rate": 5.686720689762958e-07, "loss": 0.5369, "step": 34933 }, { "epoch": 0.9, "grad_norm": 1.088474154472351, "learning_rate": 5.683961485320944e-07, "loss": 0.5545, "step": 34934 }, { "epoch": 0.9, "grad_norm": 8.682795524597168, "learning_rate": 5.681202930845831e-07, "loss": 0.4215, "step": 34935 }, { "epoch": 0.9, "grad_norm": 2.068727731704712, "learning_rate": 5.678445026356605e-07, "loss": 0.6452, "step": 34936 }, { "epoch": 0.9, "grad_norm": 1.6126654148101807, "learning_rate": 5.675687771872273e-07, "loss": 0.5016, "step": 34937 }, { "epoch": 0.9, "grad_norm": 8.679702758789062, "learning_rate": 5.672931167411832e-07, "loss": 0.3967, "step": 34938 }, { "epoch": 0.9, "grad_norm": 1.5339329242706299, "learning_rate": 5.670175212994311e-07, "loss": 0.4927, "step": 34939 }, { "epoch": 0.9, "grad_norm": 1.5350102186203003, "learning_rate": 5.667419908638672e-07, "loss": 0.5597, "step": 34940 }, { "epoch": 0.9, "grad_norm": 1.768693208694458, "learning_rate": 5.664665254363888e-07, "loss": 0.5406, "step": 34941 }, { "epoch": 0.9, "grad_norm": 1.7998543977737427, "learning_rate": 5.661911250188978e-07, "loss": 0.5137, "step": 34942 }, { "epoch": 0.9, "grad_norm": 4.076415061950684, "learning_rate": 5.659157896132916e-07, "loss": 0.4924, "step": 34943 }, { "epoch": 0.9, "grad_norm": 2.3133182525634766, "learning_rate": 5.656405192214665e-07, "loss": 0.6003, "step": 34944 }, { "epoch": 0.9, "grad_norm": 1.686480164527893, "learning_rate": 5.653653138453185e-07, "loss": 0.5067, "step": 34945 }, { "epoch": 0.9, "grad_norm": 1.6100513935089111, "learning_rate": 5.650901734867453e-07, "loss": 0.6058, "step": 34946 }, { "epoch": 0.9, "grad_norm": 0.9743305444717407, "learning_rate": 5.648150981476441e-07, "loss": 0.4966, "step": 34947 }, { "epoch": 0.9, "grad_norm": 4.119636535644531, "learning_rate": 5.64540087829909e-07, "loss": 0.6711, "step": 34948 }, { "epoch": 0.9, "grad_norm": 1.622209072113037, "learning_rate": 5.642651425354362e-07, "loss": 0.5385, "step": 34949 }, { "epoch": 0.9, "grad_norm": 1.5619035959243774, "learning_rate": 5.639902622661197e-07, "loss": 0.4331, "step": 34950 }, { "epoch": 0.9, "grad_norm": 2.6180405616760254, "learning_rate": 5.637154470238526e-07, "loss": 0.6145, "step": 34951 }, { "epoch": 0.9, "grad_norm": 2.0422511100769043, "learning_rate": 5.63440696810531e-07, "loss": 0.5172, "step": 34952 }, { "epoch": 0.9, "grad_norm": 1.4848984479904175, "learning_rate": 5.631660116280491e-07, "loss": 0.4989, "step": 34953 }, { "epoch": 0.9, "grad_norm": 18.6064453125, "learning_rate": 5.628913914782974e-07, "loss": 0.3482, "step": 34954 }, { "epoch": 0.9, "grad_norm": 1.3375380039215088, "learning_rate": 5.626168363631679e-07, "loss": 0.5599, "step": 34955 }, { "epoch": 0.9, "grad_norm": 1.4956481456756592, "learning_rate": 5.623423462845546e-07, "loss": 0.6844, "step": 34956 }, { "epoch": 0.9, "grad_norm": 1.8400452136993408, "learning_rate": 5.620679212443502e-07, "loss": 0.3887, "step": 34957 }, { "epoch": 0.9, "grad_norm": 1.6257494688034058, "learning_rate": 5.617935612444414e-07, "loss": 0.3956, "step": 34958 }, { "epoch": 0.9, "grad_norm": 0.9551164507865906, "learning_rate": 5.615192662867242e-07, "loss": 0.4011, "step": 34959 }, { "epoch": 0.9, "grad_norm": 2.173092842102051, "learning_rate": 5.612450363730848e-07, "loss": 0.5686, "step": 34960 }, { "epoch": 0.9, "grad_norm": 4.299315929412842, "learning_rate": 5.609708715054163e-07, "loss": 0.7062, "step": 34961 }, { "epoch": 0.9, "grad_norm": 2.4427716732025146, "learning_rate": 5.606967716856037e-07, "loss": 0.6494, "step": 34962 }, { "epoch": 0.9, "grad_norm": 1.6343414783477783, "learning_rate": 5.604227369155412e-07, "loss": 0.6119, "step": 34963 }, { "epoch": 0.9, "grad_norm": 1.864068627357483, "learning_rate": 5.60148767197114e-07, "loss": 0.5973, "step": 34964 }, { "epoch": 0.9, "grad_norm": 1.8124663829803467, "learning_rate": 5.598748625322092e-07, "loss": 0.3966, "step": 34965 }, { "epoch": 0.9, "grad_norm": 7.111714839935303, "learning_rate": 5.596010229227178e-07, "loss": 0.6634, "step": 34966 }, { "epoch": 0.9, "grad_norm": 3.7958381175994873, "learning_rate": 5.593272483705248e-07, "loss": 0.4932, "step": 34967 }, { "epoch": 0.9, "grad_norm": 2.5659797191619873, "learning_rate": 5.590535388775175e-07, "loss": 0.4823, "step": 34968 }, { "epoch": 0.9, "grad_norm": 1.1196541786193848, "learning_rate": 5.587798944455803e-07, "loss": 0.4666, "step": 34969 }, { "epoch": 0.9, "grad_norm": 1.584610104560852, "learning_rate": 5.585063150766013e-07, "loss": 0.3764, "step": 34970 }, { "epoch": 0.9, "grad_norm": 1.3098784685134888, "learning_rate": 5.582328007724658e-07, "loss": 0.5438, "step": 34971 }, { "epoch": 0.9, "grad_norm": 2.079667806625366, "learning_rate": 5.579593515350568e-07, "loss": 0.5912, "step": 34972 }, { "epoch": 0.9, "grad_norm": 1.298399806022644, "learning_rate": 5.576859673662604e-07, "loss": 0.4715, "step": 34973 }, { "epoch": 0.9, "grad_norm": 1.3167006969451904, "learning_rate": 5.57412648267961e-07, "loss": 0.4107, "step": 34974 }, { "epoch": 0.9, "grad_norm": 6.651234149932861, "learning_rate": 5.571393942420389e-07, "loss": 0.5873, "step": 34975 }, { "epoch": 0.9, "grad_norm": 1.8308767080307007, "learning_rate": 5.568662052903817e-07, "loss": 0.4744, "step": 34976 }, { "epoch": 0.9, "grad_norm": 0.9631378054618835, "learning_rate": 5.56593081414869e-07, "loss": 0.598, "step": 34977 }, { "epoch": 0.9, "grad_norm": 20.799819946289062, "learning_rate": 5.563200226173848e-07, "loss": 0.5149, "step": 34978 }, { "epoch": 0.9, "grad_norm": 2.058680772781372, "learning_rate": 5.560470288998076e-07, "loss": 0.6836, "step": 34979 }, { "epoch": 0.9, "grad_norm": 1.357571005821228, "learning_rate": 5.557741002640227e-07, "loss": 0.4381, "step": 34980 }, { "epoch": 0.9, "grad_norm": 8.311293601989746, "learning_rate": 5.555012367119094e-07, "loss": 0.7369, "step": 34981 }, { "epoch": 0.9, "grad_norm": 1.2398868799209595, "learning_rate": 5.552284382453465e-07, "loss": 0.5183, "step": 34982 }, { "epoch": 0.9, "grad_norm": 1.4363361597061157, "learning_rate": 5.549557048662169e-07, "loss": 0.5236, "step": 34983 }, { "epoch": 0.9, "grad_norm": 1.5593478679656982, "learning_rate": 5.546830365763989e-07, "loss": 0.467, "step": 34984 }, { "epoch": 0.9, "grad_norm": 1.4539525508880615, "learning_rate": 5.544104333777712e-07, "loss": 0.4467, "step": 34985 }, { "epoch": 0.9, "grad_norm": 1.6349308490753174, "learning_rate": 5.541378952722109e-07, "loss": 0.5193, "step": 34986 }, { "epoch": 0.9, "grad_norm": 2.017526388168335, "learning_rate": 5.538654222615991e-07, "loss": 0.431, "step": 34987 }, { "epoch": 0.9, "grad_norm": 14.089706420898438, "learning_rate": 5.535930143478119e-07, "loss": 0.5359, "step": 34988 }, { "epoch": 0.9, "grad_norm": 2.5745153427124023, "learning_rate": 5.533206715327255e-07, "loss": 0.6201, "step": 34989 }, { "epoch": 0.9, "grad_norm": 3.355142831802368, "learning_rate": 5.530483938182196e-07, "loss": 0.5045, "step": 34990 }, { "epoch": 0.9, "grad_norm": 1.3171138763427734, "learning_rate": 5.527761812061694e-07, "loss": 0.4908, "step": 34991 }, { "epoch": 0.9, "grad_norm": 1.8052998781204224, "learning_rate": 5.525040336984489e-07, "loss": 0.5462, "step": 34992 }, { "epoch": 0.9, "grad_norm": 2.2871034145355225, "learning_rate": 5.522319512969366e-07, "loss": 0.5363, "step": 34993 }, { "epoch": 0.9, "grad_norm": 6.8058180809021, "learning_rate": 5.519599340035053e-07, "loss": 0.8544, "step": 34994 }, { "epoch": 0.9, "grad_norm": 1.78880774974823, "learning_rate": 5.516879818200305e-07, "loss": 0.61, "step": 34995 }, { "epoch": 0.9, "grad_norm": 1.1493865251541138, "learning_rate": 5.514160947483849e-07, "loss": 0.4922, "step": 34996 }, { "epoch": 0.9, "grad_norm": 1.3204877376556396, "learning_rate": 5.511442727904448e-07, "loss": 0.4203, "step": 34997 }, { "epoch": 0.9, "grad_norm": 1.2748304605484009, "learning_rate": 5.508725159480821e-07, "loss": 0.6332, "step": 34998 }, { "epoch": 0.9, "grad_norm": 1.298398733139038, "learning_rate": 5.506008242231686e-07, "loss": 0.3948, "step": 34999 }, { "epoch": 0.9, "grad_norm": 1.397546410560608, "learning_rate": 5.503291976175784e-07, "loss": 0.6164, "step": 35000 }, { "epoch": 0.9, "grad_norm": 1.1989052295684814, "learning_rate": 5.500576361331822e-07, "loss": 0.4431, "step": 35001 }, { "epoch": 0.9, "grad_norm": 3.9771995544433594, "learning_rate": 5.497861397718518e-07, "loss": 0.611, "step": 35002 }, { "epoch": 0.9, "grad_norm": 1.6755249500274658, "learning_rate": 5.495147085354568e-07, "loss": 0.647, "step": 35003 }, { "epoch": 0.9, "grad_norm": 2.01782488822937, "learning_rate": 5.492433424258714e-07, "loss": 0.6284, "step": 35004 }, { "epoch": 0.9, "grad_norm": 2.0285356044769287, "learning_rate": 5.489720414449629e-07, "loss": 0.6153, "step": 35005 }, { "epoch": 0.9, "grad_norm": 1.2035906314849854, "learning_rate": 5.487008055945986e-07, "loss": 0.6331, "step": 35006 }, { "epoch": 0.9, "grad_norm": 1.215153694152832, "learning_rate": 5.484296348766538e-07, "loss": 0.4361, "step": 35007 }, { "epoch": 0.9, "grad_norm": 1.163854956626892, "learning_rate": 5.481585292929925e-07, "loss": 0.5988, "step": 35008 }, { "epoch": 0.9, "grad_norm": 1.3003513813018799, "learning_rate": 5.478874888454844e-07, "loss": 0.4349, "step": 35009 }, { "epoch": 0.9, "grad_norm": 1.9425922632217407, "learning_rate": 5.476165135359968e-07, "loss": 0.5085, "step": 35010 }, { "epoch": 0.9, "grad_norm": 2.982774496078491, "learning_rate": 5.473456033663982e-07, "loss": 0.434, "step": 35011 }, { "epoch": 0.9, "grad_norm": 1.1341978311538696, "learning_rate": 5.47074758338556e-07, "loss": 0.5194, "step": 35012 }, { "epoch": 0.9, "grad_norm": 1.6851550340652466, "learning_rate": 5.468039784543344e-07, "loss": 0.5673, "step": 35013 }, { "epoch": 0.9, "grad_norm": 2.1848602294921875, "learning_rate": 5.465332637156017e-07, "loss": 0.7099, "step": 35014 }, { "epoch": 0.9, "grad_norm": 0.841304361820221, "learning_rate": 5.462626141242222e-07, "loss": 0.5291, "step": 35015 }, { "epoch": 0.9, "grad_norm": 1.6561148166656494, "learning_rate": 5.459920296820598e-07, "loss": 0.4442, "step": 35016 }, { "epoch": 0.9, "grad_norm": 2.044412851333618, "learning_rate": 5.457215103909818e-07, "loss": 0.6845, "step": 35017 }, { "epoch": 0.9, "grad_norm": 1.7139712572097778, "learning_rate": 5.454510562528526e-07, "loss": 0.5131, "step": 35018 }, { "epoch": 0.9, "grad_norm": 1.1518034934997559, "learning_rate": 5.451806672695326e-07, "loss": 0.628, "step": 35019 }, { "epoch": 0.9, "grad_norm": 1.6844348907470703, "learning_rate": 5.449103434428882e-07, "loss": 0.6163, "step": 35020 }, { "epoch": 0.9, "grad_norm": 1.3921542167663574, "learning_rate": 5.446400847747824e-07, "loss": 0.5686, "step": 35021 }, { "epoch": 0.9, "grad_norm": 1.422747015953064, "learning_rate": 5.443698912670758e-07, "loss": 0.4569, "step": 35022 }, { "epoch": 0.9, "grad_norm": 4.248854637145996, "learning_rate": 5.440997629216305e-07, "loss": 0.72, "step": 35023 }, { "epoch": 0.9, "grad_norm": 22.342329025268555, "learning_rate": 5.438296997403103e-07, "loss": 0.6329, "step": 35024 }, { "epoch": 0.9, "grad_norm": 19.226032257080078, "learning_rate": 5.43559701724975e-07, "loss": 0.4333, "step": 35025 }, { "epoch": 0.9, "grad_norm": 1.1626471281051636, "learning_rate": 5.432897688774829e-07, "loss": 0.4335, "step": 35026 }, { "epoch": 0.9, "grad_norm": 4.0506086349487305, "learning_rate": 5.430199011996984e-07, "loss": 0.508, "step": 35027 }, { "epoch": 0.9, "grad_norm": 0.8605509996414185, "learning_rate": 5.427500986934797e-07, "loss": 0.5547, "step": 35028 }, { "epoch": 0.9, "grad_norm": 1.9845473766326904, "learning_rate": 5.424803613606833e-07, "loss": 0.4542, "step": 35029 }, { "epoch": 0.9, "grad_norm": 2.1435983180999756, "learning_rate": 5.42210689203172e-07, "loss": 0.5775, "step": 35030 }, { "epoch": 0.9, "grad_norm": 2.657094955444336, "learning_rate": 5.419410822228032e-07, "loss": 0.4503, "step": 35031 }, { "epoch": 0.9, "grad_norm": 1.8328580856323242, "learning_rate": 5.416715404214345e-07, "loss": 0.6698, "step": 35032 }, { "epoch": 0.9, "grad_norm": 1.0231338739395142, "learning_rate": 5.41402063800921e-07, "loss": 0.3649, "step": 35033 }, { "epoch": 0.9, "grad_norm": 1.2662194967269897, "learning_rate": 5.411326523631244e-07, "loss": 0.5803, "step": 35034 }, { "epoch": 0.9, "grad_norm": 3.8284366130828857, "learning_rate": 5.408633061098989e-07, "loss": 0.6801, "step": 35035 }, { "epoch": 0.9, "grad_norm": 1.5283619165420532, "learning_rate": 5.405940250430985e-07, "loss": 0.4201, "step": 35036 }, { "epoch": 0.9, "grad_norm": 1.664528489112854, "learning_rate": 5.40324809164583e-07, "loss": 0.4704, "step": 35037 }, { "epoch": 0.9, "grad_norm": 5.305538654327393, "learning_rate": 5.400556584762051e-07, "loss": 0.5112, "step": 35038 }, { "epoch": 0.9, "grad_norm": 1.084940791130066, "learning_rate": 5.397865729798212e-07, "loss": 0.4549, "step": 35039 }, { "epoch": 0.9, "grad_norm": 1.2553659677505493, "learning_rate": 5.395175526772823e-07, "loss": 0.536, "step": 35040 }, { "epoch": 0.9, "grad_norm": 0.9574212431907654, "learning_rate": 5.392485975704465e-07, "loss": 0.4517, "step": 35041 }, { "epoch": 0.9, "grad_norm": 0.9605914354324341, "learning_rate": 5.389797076611658e-07, "loss": 0.4656, "step": 35042 }, { "epoch": 0.9, "grad_norm": 1.9126943349838257, "learning_rate": 5.387108829512921e-07, "loss": 0.5141, "step": 35043 }, { "epoch": 0.9, "grad_norm": 1.3744157552719116, "learning_rate": 5.384421234426796e-07, "loss": 0.5, "step": 35044 }, { "epoch": 0.9, "grad_norm": 1.8010904788970947, "learning_rate": 5.381734291371798e-07, "loss": 0.5134, "step": 35045 }, { "epoch": 0.9, "grad_norm": 1.4528383016586304, "learning_rate": 5.379048000366427e-07, "loss": 0.5261, "step": 35046 }, { "epoch": 0.9, "grad_norm": 1.997304916381836, "learning_rate": 5.376362361429222e-07, "loss": 0.5009, "step": 35047 }, { "epoch": 0.9, "grad_norm": 2.035076379776001, "learning_rate": 5.37367737457869e-07, "loss": 0.3543, "step": 35048 }, { "epoch": 0.9, "grad_norm": 1.491331934928894, "learning_rate": 5.370993039833317e-07, "loss": 0.5317, "step": 35049 }, { "epoch": 0.9, "grad_norm": 1.5688190460205078, "learning_rate": 5.368309357211587e-07, "loss": 0.5435, "step": 35050 }, { "epoch": 0.9, "grad_norm": 3.0063300132751465, "learning_rate": 5.365626326732032e-07, "loss": 0.5488, "step": 35051 }, { "epoch": 0.9, "grad_norm": 1.223374605178833, "learning_rate": 5.362943948413135e-07, "loss": 0.4732, "step": 35052 }, { "epoch": 0.9, "grad_norm": 4.21068811416626, "learning_rate": 5.360262222273349e-07, "loss": 0.6023, "step": 35053 }, { "epoch": 0.9, "grad_norm": 2.015868902206421, "learning_rate": 5.357581148331193e-07, "loss": 0.6086, "step": 35054 }, { "epoch": 0.9, "grad_norm": 3.5424935817718506, "learning_rate": 5.354900726605128e-07, "loss": 0.6643, "step": 35055 }, { "epoch": 0.9, "grad_norm": 8.03613567352295, "learning_rate": 5.352220957113629e-07, "loss": 0.5152, "step": 35056 }, { "epoch": 0.9, "grad_norm": 1.7773388624191284, "learning_rate": 5.349541839875139e-07, "loss": 0.5382, "step": 35057 }, { "epoch": 0.9, "grad_norm": 1.252722144126892, "learning_rate": 5.346863374908162e-07, "loss": 0.4408, "step": 35058 }, { "epoch": 0.9, "grad_norm": 1.3666934967041016, "learning_rate": 5.34418556223113e-07, "loss": 0.516, "step": 35059 }, { "epoch": 0.9, "grad_norm": 2.2195687294006348, "learning_rate": 5.341508401862483e-07, "loss": 0.68, "step": 35060 }, { "epoch": 0.9, "grad_norm": 1.6848095655441284, "learning_rate": 5.338831893820718e-07, "loss": 0.6469, "step": 35061 }, { "epoch": 0.9, "grad_norm": 1.194836974143982, "learning_rate": 5.33615603812424e-07, "loss": 0.4142, "step": 35062 }, { "epoch": 0.9, "grad_norm": 1.3712451457977295, "learning_rate": 5.333480834791504e-07, "loss": 0.4686, "step": 35063 }, { "epoch": 0.9, "grad_norm": 1.746266484260559, "learning_rate": 5.330806283840939e-07, "loss": 0.5594, "step": 35064 }, { "epoch": 0.9, "grad_norm": 6.354395389556885, "learning_rate": 5.328132385290985e-07, "loss": 0.4696, "step": 35065 }, { "epoch": 0.9, "grad_norm": 2.7741777896881104, "learning_rate": 5.325459139160061e-07, "loss": 0.6776, "step": 35066 }, { "epoch": 0.9, "grad_norm": 1.2193102836608887, "learning_rate": 5.322786545466585e-07, "loss": 0.556, "step": 35067 }, { "epoch": 0.9, "grad_norm": 1.603728175163269, "learning_rate": 5.320114604228988e-07, "loss": 0.5068, "step": 35068 }, { "epoch": 0.9, "grad_norm": 3.552098512649536, "learning_rate": 5.317443315465687e-07, "loss": 0.4348, "step": 35069 }, { "epoch": 0.9, "grad_norm": 4.691623687744141, "learning_rate": 5.314772679195058e-07, "loss": 0.5662, "step": 35070 }, { "epoch": 0.9, "grad_norm": 1.4077816009521484, "learning_rate": 5.31210269543555e-07, "loss": 0.4952, "step": 35071 }, { "epoch": 0.9, "grad_norm": 1.2446080446243286, "learning_rate": 5.30943336420554e-07, "loss": 0.5686, "step": 35072 }, { "epoch": 0.9, "grad_norm": 1.4699641466140747, "learning_rate": 5.306764685523425e-07, "loss": 0.5157, "step": 35073 }, { "epoch": 0.9, "grad_norm": 1.829388976097107, "learning_rate": 5.304096659407576e-07, "loss": 0.4521, "step": 35074 }, { "epoch": 0.9, "grad_norm": 4.212850093841553, "learning_rate": 5.301429285876414e-07, "loss": 0.6851, "step": 35075 }, { "epoch": 0.9, "grad_norm": 1.5695420503616333, "learning_rate": 5.298762564948312e-07, "loss": 0.3619, "step": 35076 }, { "epoch": 0.9, "grad_norm": 5.914524078369141, "learning_rate": 5.296096496641623e-07, "loss": 0.5609, "step": 35077 }, { "epoch": 0.9, "grad_norm": 1.7960219383239746, "learning_rate": 5.293431080974753e-07, "loss": 0.7353, "step": 35078 }, { "epoch": 0.9, "grad_norm": 1.7933106422424316, "learning_rate": 5.290766317966057e-07, "loss": 0.6538, "step": 35079 }, { "epoch": 0.9, "grad_norm": 3.0441062450408936, "learning_rate": 5.288102207633894e-07, "loss": 0.5117, "step": 35080 }, { "epoch": 0.9, "grad_norm": 1.4587054252624512, "learning_rate": 5.285438749996618e-07, "loss": 0.4776, "step": 35081 }, { "epoch": 0.9, "grad_norm": 1.4200276136398315, "learning_rate": 5.282775945072604e-07, "loss": 0.4554, "step": 35082 }, { "epoch": 0.9, "grad_norm": 1.6528576612472534, "learning_rate": 5.280113792880193e-07, "loss": 0.61, "step": 35083 }, { "epoch": 0.9, "grad_norm": 1.482660174369812, "learning_rate": 5.277452293437713e-07, "loss": 0.5, "step": 35084 }, { "epoch": 0.9, "grad_norm": 1.17268705368042, "learning_rate": 5.274791446763539e-07, "loss": 0.5881, "step": 35085 }, { "epoch": 0.9, "grad_norm": 1.1672238111495972, "learning_rate": 5.272131252875978e-07, "loss": 0.7154, "step": 35086 }, { "epoch": 0.9, "grad_norm": 2.432607889175415, "learning_rate": 5.269471711793372e-07, "loss": 0.356, "step": 35087 }, { "epoch": 0.9, "grad_norm": 3.5231401920318604, "learning_rate": 5.266812823534062e-07, "loss": 0.573, "step": 35088 }, { "epoch": 0.9, "grad_norm": 9.385924339294434, "learning_rate": 5.264154588116355e-07, "loss": 0.6969, "step": 35089 }, { "epoch": 0.9, "grad_norm": 7.1483001708984375, "learning_rate": 5.261497005558591e-07, "loss": 0.5744, "step": 35090 }, { "epoch": 0.9, "grad_norm": 1.614115595817566, "learning_rate": 5.258840075879046e-07, "loss": 0.5076, "step": 35091 }, { "epoch": 0.9, "grad_norm": 1.3327993154525757, "learning_rate": 5.25618379909606e-07, "loss": 0.5465, "step": 35092 }, { "epoch": 0.9, "grad_norm": 1.695241928100586, "learning_rate": 5.25352817522794e-07, "loss": 0.4851, "step": 35093 }, { "epoch": 0.9, "grad_norm": 2.625621795654297, "learning_rate": 5.250873204292961e-07, "loss": 0.5746, "step": 35094 }, { "epoch": 0.9, "grad_norm": 2.0182695388793945, "learning_rate": 5.248218886309453e-07, "loss": 0.5006, "step": 35095 }, { "epoch": 0.9, "grad_norm": 1.3593199253082275, "learning_rate": 5.24556522129569e-07, "loss": 0.5379, "step": 35096 }, { "epoch": 0.9, "grad_norm": 1.0339953899383545, "learning_rate": 5.242912209269957e-07, "loss": 0.3768, "step": 35097 }, { "epoch": 0.9, "grad_norm": 1.0344382524490356, "learning_rate": 5.240259850250529e-07, "loss": 0.5308, "step": 35098 }, { "epoch": 0.9, "grad_norm": 2.3544533252716064, "learning_rate": 5.237608144255713e-07, "loss": 0.5601, "step": 35099 }, { "epoch": 0.9, "grad_norm": 1.6954277753829956, "learning_rate": 5.234957091303772e-07, "loss": 0.5293, "step": 35100 }, { "epoch": 0.9, "grad_norm": 1.080773949623108, "learning_rate": 5.232306691412947e-07, "loss": 0.4887, "step": 35101 }, { "epoch": 0.9, "grad_norm": 8.959056854248047, "learning_rate": 5.229656944601536e-07, "loss": 0.6827, "step": 35102 }, { "epoch": 0.9, "grad_norm": 1.6950627565383911, "learning_rate": 5.227007850887799e-07, "loss": 0.5321, "step": 35103 }, { "epoch": 0.9, "grad_norm": 1.3942369222640991, "learning_rate": 5.224359410289981e-07, "loss": 0.6545, "step": 35104 }, { "epoch": 0.9, "grad_norm": 1.4092540740966797, "learning_rate": 5.221711622826309e-07, "loss": 0.573, "step": 35105 }, { "epoch": 0.9, "grad_norm": 1.407453179359436, "learning_rate": 5.21906448851508e-07, "loss": 0.5007, "step": 35106 }, { "epoch": 0.9, "grad_norm": 1.5049030780792236, "learning_rate": 5.216418007374502e-07, "loss": 0.4186, "step": 35107 }, { "epoch": 0.9, "grad_norm": 1.76094651222229, "learning_rate": 5.213772179422815e-07, "loss": 0.5652, "step": 35108 }, { "epoch": 0.9, "grad_norm": 1.8416602611541748, "learning_rate": 5.211127004678274e-07, "loss": 0.5158, "step": 35109 }, { "epoch": 0.9, "grad_norm": 1.542533278465271, "learning_rate": 5.208482483159094e-07, "loss": 0.5842, "step": 35110 }, { "epoch": 0.9, "grad_norm": 1.1216442584991455, "learning_rate": 5.205838614883474e-07, "loss": 0.4494, "step": 35111 }, { "epoch": 0.9, "grad_norm": 1.8723788261413574, "learning_rate": 5.203195399869687e-07, "loss": 0.6538, "step": 35112 }, { "epoch": 0.9, "grad_norm": 1.1912554502487183, "learning_rate": 5.200552838135908e-07, "loss": 0.5418, "step": 35113 }, { "epoch": 0.9, "grad_norm": 1.7881861925125122, "learning_rate": 5.197910929700367e-07, "loss": 0.6269, "step": 35114 }, { "epoch": 0.9, "grad_norm": 2.30020809173584, "learning_rate": 5.195269674581249e-07, "loss": 0.6817, "step": 35115 }, { "epoch": 0.9, "grad_norm": 3.016867160797119, "learning_rate": 5.192629072796784e-07, "loss": 0.4109, "step": 35116 }, { "epoch": 0.9, "grad_norm": 1.1744935512542725, "learning_rate": 5.189989124365158e-07, "loss": 0.3753, "step": 35117 }, { "epoch": 0.9, "grad_norm": 3.1323158740997314, "learning_rate": 5.187349829304544e-07, "loss": 0.6411, "step": 35118 }, { "epoch": 0.9, "grad_norm": 1.1106425523757935, "learning_rate": 5.184711187633163e-07, "loss": 0.4897, "step": 35119 }, { "epoch": 0.9, "grad_norm": 1.3884427547454834, "learning_rate": 5.182073199369175e-07, "loss": 0.4647, "step": 35120 }, { "epoch": 0.9, "grad_norm": 1.1485538482666016, "learning_rate": 5.17943586453078e-07, "loss": 0.4194, "step": 35121 }, { "epoch": 0.9, "grad_norm": 1.1715205907821655, "learning_rate": 5.176799183136127e-07, "loss": 0.474, "step": 35122 }, { "epoch": 0.9, "grad_norm": 1.9996381998062134, "learning_rate": 5.174163155203404e-07, "loss": 0.5201, "step": 35123 }, { "epoch": 0.9, "grad_norm": 1.3467352390289307, "learning_rate": 5.171527780750785e-07, "loss": 0.4929, "step": 35124 }, { "epoch": 0.9, "grad_norm": 2.1618850231170654, "learning_rate": 5.168893059796398e-07, "loss": 0.5794, "step": 35125 }, { "epoch": 0.9, "grad_norm": 1.333310842514038, "learning_rate": 5.166258992358441e-07, "loss": 0.5085, "step": 35126 }, { "epoch": 0.9, "grad_norm": 1.5025084018707275, "learning_rate": 5.163625578455044e-07, "loss": 0.4772, "step": 35127 }, { "epoch": 0.9, "grad_norm": 1.2480709552764893, "learning_rate": 5.160992818104338e-07, "loss": 0.4429, "step": 35128 }, { "epoch": 0.9, "grad_norm": 1.2454743385314941, "learning_rate": 5.158360711324506e-07, "loss": 0.4438, "step": 35129 }, { "epoch": 0.9, "grad_norm": 1.0620603561401367, "learning_rate": 5.155729258133668e-07, "loss": 0.4537, "step": 35130 }, { "epoch": 0.9, "grad_norm": 1.6768195629119873, "learning_rate": 5.153098458549943e-07, "loss": 0.5921, "step": 35131 }, { "epoch": 0.9, "grad_norm": 1.7079710960388184, "learning_rate": 5.150468312591472e-07, "loss": 0.4245, "step": 35132 }, { "epoch": 0.9, "grad_norm": 1.6721433401107788, "learning_rate": 5.147838820276396e-07, "loss": 0.6288, "step": 35133 }, { "epoch": 0.9, "grad_norm": 1.0277926921844482, "learning_rate": 5.145209981622834e-07, "loss": 0.5958, "step": 35134 }, { "epoch": 0.9, "grad_norm": 1.4446512460708618, "learning_rate": 5.14258179664886e-07, "loss": 0.4801, "step": 35135 }, { "epoch": 0.9, "grad_norm": 1.3923004865646362, "learning_rate": 5.139954265372638e-07, "loss": 0.5499, "step": 35136 }, { "epoch": 0.9, "grad_norm": 1.5370415449142456, "learning_rate": 5.137327387812252e-07, "loss": 0.3934, "step": 35137 }, { "epoch": 0.9, "grad_norm": 4.186748504638672, "learning_rate": 5.134701163985811e-07, "loss": 0.5563, "step": 35138 }, { "epoch": 0.9, "grad_norm": 1.333545446395874, "learning_rate": 5.13207559391139e-07, "loss": 0.4026, "step": 35139 }, { "epoch": 0.9, "grad_norm": 1.7225708961486816, "learning_rate": 5.129450677607129e-07, "loss": 0.3262, "step": 35140 }, { "epoch": 0.9, "grad_norm": 1.214401364326477, "learning_rate": 5.12682641509108e-07, "loss": 0.4097, "step": 35141 }, { "epoch": 0.9, "grad_norm": 1.9908688068389893, "learning_rate": 5.124202806381318e-07, "loss": 0.4027, "step": 35142 }, { "epoch": 0.9, "grad_norm": 1.6710278987884521, "learning_rate": 5.121579851495972e-07, "loss": 0.5087, "step": 35143 }, { "epoch": 0.9, "grad_norm": 3.8705835342407227, "learning_rate": 5.118957550453075e-07, "loss": 0.5466, "step": 35144 }, { "epoch": 0.9, "grad_norm": 1.6925983428955078, "learning_rate": 5.116335903270719e-07, "loss": 0.4433, "step": 35145 }, { "epoch": 0.9, "grad_norm": 1.5218998193740845, "learning_rate": 5.113714909966949e-07, "loss": 0.4832, "step": 35146 }, { "epoch": 0.9, "grad_norm": 1.4330374002456665, "learning_rate": 5.11109457055986e-07, "loss": 0.6568, "step": 35147 }, { "epoch": 0.9, "grad_norm": 1.8948482275009155, "learning_rate": 5.108474885067494e-07, "loss": 0.498, "step": 35148 }, { "epoch": 0.9, "grad_norm": 1.3454763889312744, "learning_rate": 5.105855853507879e-07, "loss": 0.4823, "step": 35149 }, { "epoch": 0.9, "grad_norm": 2.048537254333496, "learning_rate": 5.103237475899103e-07, "loss": 0.3118, "step": 35150 }, { "epoch": 0.9, "grad_norm": 1.2464985847473145, "learning_rate": 5.100619752259206e-07, "loss": 0.5425, "step": 35151 }, { "epoch": 0.9, "grad_norm": 1.123868465423584, "learning_rate": 5.098002682606195e-07, "loss": 0.4253, "step": 35152 }, { "epoch": 0.9, "grad_norm": 1.4007189273834229, "learning_rate": 5.095386266958135e-07, "loss": 0.4615, "step": 35153 }, { "epoch": 0.9, "grad_norm": 1.2113690376281738, "learning_rate": 5.092770505333056e-07, "loss": 0.4649, "step": 35154 }, { "epoch": 0.9, "grad_norm": 1.333569884300232, "learning_rate": 5.090155397748964e-07, "loss": 0.6089, "step": 35155 }, { "epoch": 0.9, "grad_norm": 1.5940706729888916, "learning_rate": 5.08754094422389e-07, "loss": 0.4148, "step": 35156 }, { "epoch": 0.9, "grad_norm": 1.246427059173584, "learning_rate": 5.084927144775864e-07, "loss": 0.5326, "step": 35157 }, { "epoch": 0.9, "grad_norm": 1.7178775072097778, "learning_rate": 5.082313999422895e-07, "loss": 0.5044, "step": 35158 }, { "epoch": 0.9, "grad_norm": 2.5992679595947266, "learning_rate": 5.079701508182966e-07, "loss": 0.5035, "step": 35159 }, { "epoch": 0.9, "grad_norm": 1.1978518962860107, "learning_rate": 5.077089671074109e-07, "loss": 0.3739, "step": 35160 }, { "epoch": 0.9, "grad_norm": 1.39155113697052, "learning_rate": 5.074478488114321e-07, "loss": 0.589, "step": 35161 }, { "epoch": 0.9, "grad_norm": 1.4893604516983032, "learning_rate": 5.071867959321585e-07, "loss": 0.546, "step": 35162 }, { "epoch": 0.9, "grad_norm": 2.094215154647827, "learning_rate": 5.069258084713879e-07, "loss": 0.5027, "step": 35163 }, { "epoch": 0.9, "grad_norm": 1.117207646369934, "learning_rate": 5.06664886430922e-07, "loss": 0.382, "step": 35164 }, { "epoch": 0.9, "grad_norm": 2.7370474338531494, "learning_rate": 5.064040298125572e-07, "loss": 0.4206, "step": 35165 }, { "epoch": 0.9, "grad_norm": 2.0052921772003174, "learning_rate": 5.061432386180909e-07, "loss": 0.5352, "step": 35166 }, { "epoch": 0.9, "grad_norm": 9.158243179321289, "learning_rate": 5.058825128493216e-07, "loss": 0.5309, "step": 35167 }, { "epoch": 0.9, "grad_norm": 0.8543481826782227, "learning_rate": 5.056218525080447e-07, "loss": 0.4828, "step": 35168 }, { "epoch": 0.9, "grad_norm": 15.663484573364258, "learning_rate": 5.053612575960575e-07, "loss": 0.7069, "step": 35169 }, { "epoch": 0.9, "grad_norm": 1.2089675664901733, "learning_rate": 5.051007281151543e-07, "loss": 0.5186, "step": 35170 }, { "epoch": 0.9, "grad_norm": 5.603659152984619, "learning_rate": 5.048402640671335e-07, "loss": 0.7184, "step": 35171 }, { "epoch": 0.9, "grad_norm": 3.040104866027832, "learning_rate": 5.04579865453787e-07, "loss": 0.6628, "step": 35172 }, { "epoch": 0.9, "grad_norm": 1.8119930028915405, "learning_rate": 5.043195322769101e-07, "loss": 0.6122, "step": 35173 }, { "epoch": 0.9, "grad_norm": 5.213520050048828, "learning_rate": 5.04059264538298e-07, "loss": 0.4605, "step": 35174 }, { "epoch": 0.9, "grad_norm": 1.554818034172058, "learning_rate": 5.037990622397427e-07, "loss": 0.5234, "step": 35175 }, { "epoch": 0.9, "grad_norm": 3.943250894546509, "learning_rate": 5.03538925383038e-07, "loss": 0.5123, "step": 35176 }, { "epoch": 0.9, "grad_norm": 6.15150785446167, "learning_rate": 5.032788539699773e-07, "loss": 0.5053, "step": 35177 }, { "epoch": 0.9, "grad_norm": 1.6266323328018188, "learning_rate": 5.030188480023534e-07, "loss": 0.4627, "step": 35178 }, { "epoch": 0.9, "grad_norm": 2.195732355117798, "learning_rate": 5.027589074819561e-07, "loss": 0.4161, "step": 35179 }, { "epoch": 0.9, "grad_norm": 1.3396998643875122, "learning_rate": 5.024990324105761e-07, "loss": 0.4588, "step": 35180 }, { "epoch": 0.9, "grad_norm": 1.2856577634811401, "learning_rate": 5.022392227900086e-07, "loss": 0.5824, "step": 35181 }, { "epoch": 0.9, "grad_norm": 1.2837949991226196, "learning_rate": 5.0197947862204e-07, "loss": 0.5302, "step": 35182 }, { "epoch": 0.9, "grad_norm": 1.3606888055801392, "learning_rate": 5.017197999084611e-07, "loss": 0.5237, "step": 35183 }, { "epoch": 0.9, "grad_norm": 1.738802194595337, "learning_rate": 5.014601866510626e-07, "loss": 0.454, "step": 35184 }, { "epoch": 0.9, "grad_norm": 2.01918888092041, "learning_rate": 5.012006388516332e-07, "loss": 0.5775, "step": 35185 }, { "epoch": 0.9, "grad_norm": 1.2034595012664795, "learning_rate": 5.009411565119604e-07, "loss": 0.4712, "step": 35186 }, { "epoch": 0.9, "grad_norm": 1.50234055519104, "learning_rate": 5.006817396338326e-07, "loss": 0.5257, "step": 35187 }, { "epoch": 0.9, "grad_norm": 1.516700267791748, "learning_rate": 5.004223882190406e-07, "loss": 0.4912, "step": 35188 }, { "epoch": 0.9, "grad_norm": 2.4985218048095703, "learning_rate": 5.001631022693676e-07, "loss": 0.3948, "step": 35189 }, { "epoch": 0.9, "grad_norm": 1.5211708545684814, "learning_rate": 4.99903881786602e-07, "loss": 0.5325, "step": 35190 }, { "epoch": 0.9, "grad_norm": 1.548531413078308, "learning_rate": 4.996447267725313e-07, "loss": 0.4125, "step": 35191 }, { "epoch": 0.9, "grad_norm": 1.4705106019973755, "learning_rate": 4.993856372289396e-07, "loss": 0.565, "step": 35192 }, { "epoch": 0.9, "grad_norm": 1.6453546285629272, "learning_rate": 4.991266131576133e-07, "loss": 0.4077, "step": 35193 }, { "epoch": 0.9, "grad_norm": 2.8615481853485107, "learning_rate": 4.988676545603377e-07, "loss": 0.4231, "step": 35194 }, { "epoch": 0.9, "grad_norm": 1.1796091794967651, "learning_rate": 4.986087614388968e-07, "loss": 0.3206, "step": 35195 }, { "epoch": 0.9, "grad_norm": 12.721915245056152, "learning_rate": 4.983499337950759e-07, "loss": 0.5963, "step": 35196 }, { "epoch": 0.9, "grad_norm": 3.2555458545684814, "learning_rate": 4.980911716306558e-07, "loss": 0.557, "step": 35197 }, { "epoch": 0.9, "grad_norm": 5.211446762084961, "learning_rate": 4.978324749474228e-07, "loss": 0.7609, "step": 35198 }, { "epoch": 0.9, "grad_norm": 3.5847599506378174, "learning_rate": 4.975738437471589e-07, "loss": 0.5739, "step": 35199 }, { "epoch": 0.9, "grad_norm": 1.2001880407333374, "learning_rate": 4.973152780316448e-07, "loss": 0.4609, "step": 35200 }, { "epoch": 0.9, "grad_norm": 2.1205615997314453, "learning_rate": 4.970567778026647e-07, "loss": 0.4834, "step": 35201 }, { "epoch": 0.9, "grad_norm": 2.662229061126709, "learning_rate": 4.967983430619994e-07, "loss": 0.6409, "step": 35202 }, { "epoch": 0.9, "grad_norm": 1.628190040588379, "learning_rate": 4.965399738114296e-07, "loss": 0.4533, "step": 35203 }, { "epoch": 0.9, "grad_norm": 1.5942189693450928, "learning_rate": 4.96281670052734e-07, "loss": 0.557, "step": 35204 }, { "epoch": 0.9, "grad_norm": 9.748647689819336, "learning_rate": 4.960234317876955e-07, "loss": 0.6368, "step": 35205 }, { "epoch": 0.9, "grad_norm": 1.4497874975204468, "learning_rate": 4.957652590180917e-07, "loss": 0.4871, "step": 35206 }, { "epoch": 0.9, "grad_norm": 1.3040999174118042, "learning_rate": 4.955071517457021e-07, "loss": 0.5326, "step": 35207 }, { "epoch": 0.9, "grad_norm": 1.4583326578140259, "learning_rate": 4.952491099723078e-07, "loss": 0.5775, "step": 35208 }, { "epoch": 0.9, "grad_norm": 1.5136767625808716, "learning_rate": 4.949911336996838e-07, "loss": 0.4848, "step": 35209 }, { "epoch": 0.9, "grad_norm": 1.052710771560669, "learning_rate": 4.947332229296108e-07, "loss": 0.3293, "step": 35210 }, { "epoch": 0.9, "grad_norm": 9.97500991821289, "learning_rate": 4.944753776638622e-07, "loss": 0.6521, "step": 35211 }, { "epoch": 0.9, "grad_norm": 2.921440601348877, "learning_rate": 4.942175979042196e-07, "loss": 0.6047, "step": 35212 }, { "epoch": 0.9, "grad_norm": 1.2083193063735962, "learning_rate": 4.939598836524561e-07, "loss": 0.496, "step": 35213 }, { "epoch": 0.9, "grad_norm": 1.7226020097732544, "learning_rate": 4.937022349103482e-07, "loss": 0.5768, "step": 35214 }, { "epoch": 0.9, "grad_norm": 0.8940639495849609, "learning_rate": 4.934446516796732e-07, "loss": 0.4029, "step": 35215 }, { "epoch": 0.9, "grad_norm": 1.0932586193084717, "learning_rate": 4.931871339622052e-07, "loss": 0.452, "step": 35216 }, { "epoch": 0.9, "grad_norm": 1.3133403062820435, "learning_rate": 4.929296817597174e-07, "loss": 0.4317, "step": 35217 }, { "epoch": 0.9, "grad_norm": 1.2285555601119995, "learning_rate": 4.926722950739859e-07, "loss": 0.4404, "step": 35218 }, { "epoch": 0.9, "grad_norm": 4.242293834686279, "learning_rate": 4.924149739067841e-07, "loss": 0.6379, "step": 35219 }, { "epoch": 0.9, "grad_norm": 5.41796875, "learning_rate": 4.921577182598858e-07, "loss": 0.5357, "step": 35220 }, { "epoch": 0.9, "grad_norm": 1.8680226802825928, "learning_rate": 4.919005281350608e-07, "loss": 0.5918, "step": 35221 }, { "epoch": 0.9, "grad_norm": 5.704257488250732, "learning_rate": 4.916434035340856e-07, "loss": 0.829, "step": 35222 }, { "epoch": 0.9, "grad_norm": 1.4679189920425415, "learning_rate": 4.913863444587308e-07, "loss": 0.4718, "step": 35223 }, { "epoch": 0.9, "grad_norm": 1.500269889831543, "learning_rate": 4.911293509107651e-07, "loss": 0.4949, "step": 35224 }, { "epoch": 0.9, "grad_norm": 2.7373881340026855, "learning_rate": 4.908724228919636e-07, "loss": 0.7013, "step": 35225 }, { "epoch": 0.9, "grad_norm": 7.94612979888916, "learning_rate": 4.906155604040963e-07, "loss": 0.7118, "step": 35226 }, { "epoch": 0.9, "grad_norm": 1.5871925354003906, "learning_rate": 4.903587634489304e-07, "loss": 0.3655, "step": 35227 }, { "epoch": 0.9, "grad_norm": 1.5078710317611694, "learning_rate": 4.901020320282368e-07, "loss": 0.5305, "step": 35228 }, { "epoch": 0.9, "grad_norm": 1.538304090499878, "learning_rate": 4.898453661437874e-07, "loss": 0.5608, "step": 35229 }, { "epoch": 0.9, "grad_norm": 2.179619789123535, "learning_rate": 4.895887657973475e-07, "loss": 0.6381, "step": 35230 }, { "epoch": 0.9, "grad_norm": 4.386651039123535, "learning_rate": 4.893322309906867e-07, "loss": 0.553, "step": 35231 }, { "epoch": 0.9, "grad_norm": 0.9475570321083069, "learning_rate": 4.890757617255737e-07, "loss": 0.3875, "step": 35232 }, { "epoch": 0.9, "grad_norm": 2.4423065185546875, "learning_rate": 4.888193580037748e-07, "loss": 0.5493, "step": 35233 }, { "epoch": 0.9, "grad_norm": 1.3913888931274414, "learning_rate": 4.885630198270564e-07, "loss": 0.4948, "step": 35234 }, { "epoch": 0.9, "grad_norm": 0.9703997373580933, "learning_rate": 4.88306747197187e-07, "loss": 0.426, "step": 35235 }, { "epoch": 0.9, "grad_norm": 5.455413341522217, "learning_rate": 4.880505401159319e-07, "loss": 0.6333, "step": 35236 }, { "epoch": 0.9, "grad_norm": 8.522406578063965, "learning_rate": 4.877943985850564e-07, "loss": 0.5587, "step": 35237 }, { "epoch": 0.9, "grad_norm": 1.453555941581726, "learning_rate": 4.875383226063246e-07, "loss": 0.515, "step": 35238 }, { "epoch": 0.9, "grad_norm": 4.09616231918335, "learning_rate": 4.87282312181504e-07, "loss": 0.6454, "step": 35239 }, { "epoch": 0.9, "grad_norm": 2.1494719982147217, "learning_rate": 4.870263673123577e-07, "loss": 0.4515, "step": 35240 }, { "epoch": 0.9, "grad_norm": 3.867095470428467, "learning_rate": 4.867704880006463e-07, "loss": 0.4537, "step": 35241 }, { "epoch": 0.9, "grad_norm": 0.9888855218887329, "learning_rate": 4.865146742481374e-07, "loss": 0.4825, "step": 35242 }, { "epoch": 0.9, "grad_norm": 1.8265836238861084, "learning_rate": 4.862589260565931e-07, "loss": 0.3904, "step": 35243 }, { "epoch": 0.9, "grad_norm": 2.2408053874969482, "learning_rate": 4.86003243427775e-07, "loss": 0.5091, "step": 35244 }, { "epoch": 0.9, "grad_norm": 1.2237621545791626, "learning_rate": 4.857476263634442e-07, "loss": 0.5415, "step": 35245 }, { "epoch": 0.9, "grad_norm": 1.948814034461975, "learning_rate": 4.854920748653646e-07, "loss": 0.4892, "step": 35246 }, { "epoch": 0.9, "grad_norm": 1.2053749561309814, "learning_rate": 4.85236588935295e-07, "loss": 0.5679, "step": 35247 }, { "epoch": 0.9, "grad_norm": 1.4470497369766235, "learning_rate": 4.849811685749972e-07, "loss": 0.5573, "step": 35248 }, { "epoch": 0.9, "grad_norm": 1.2700870037078857, "learning_rate": 4.847258137862321e-07, "loss": 0.5838, "step": 35249 }, { "epoch": 0.9, "grad_norm": 1.3213073015213013, "learning_rate": 4.844705245707581e-07, "loss": 0.4023, "step": 35250 }, { "epoch": 0.9, "grad_norm": 1.3781936168670654, "learning_rate": 4.842153009303352e-07, "loss": 0.4676, "step": 35251 }, { "epoch": 0.9, "grad_norm": 0.9080002307891846, "learning_rate": 4.839601428667206e-07, "loss": 0.4535, "step": 35252 }, { "epoch": 0.9, "grad_norm": 1.023949146270752, "learning_rate": 4.837050503816754e-07, "loss": 0.4633, "step": 35253 }, { "epoch": 0.9, "grad_norm": 1.0878185033798218, "learning_rate": 4.834500234769568e-07, "loss": 0.4976, "step": 35254 }, { "epoch": 0.9, "grad_norm": 1.7807905673980713, "learning_rate": 4.831950621543213e-07, "loss": 0.5723, "step": 35255 }, { "epoch": 0.9, "grad_norm": 12.138132095336914, "learning_rate": 4.829401664155264e-07, "loss": 0.5186, "step": 35256 }, { "epoch": 0.9, "grad_norm": 2.305241107940674, "learning_rate": 4.826853362623296e-07, "loss": 0.629, "step": 35257 }, { "epoch": 0.9, "grad_norm": 5.3618364334106445, "learning_rate": 4.824305716964839e-07, "loss": 0.5837, "step": 35258 }, { "epoch": 0.9, "grad_norm": 1.5517572164535522, "learning_rate": 4.821758727197489e-07, "loss": 0.5546, "step": 35259 }, { "epoch": 0.9, "grad_norm": 1.0330524444580078, "learning_rate": 4.819212393338779e-07, "loss": 0.433, "step": 35260 }, { "epoch": 0.9, "grad_norm": 1.9415547847747803, "learning_rate": 4.816666715406237e-07, "loss": 0.4623, "step": 35261 }, { "epoch": 0.9, "grad_norm": 1.2755895853042603, "learning_rate": 4.81412169341745e-07, "loss": 0.4335, "step": 35262 }, { "epoch": 0.9, "grad_norm": 1.7617862224578857, "learning_rate": 4.811577327389938e-07, "loss": 0.5065, "step": 35263 }, { "epoch": 0.9, "grad_norm": 1.3934345245361328, "learning_rate": 4.80903361734123e-07, "loss": 0.4567, "step": 35264 }, { "epoch": 0.9, "grad_norm": 1.4285091161727905, "learning_rate": 4.806490563288835e-07, "loss": 0.4674, "step": 35265 }, { "epoch": 0.9, "grad_norm": 1.1281476020812988, "learning_rate": 4.803948165250328e-07, "loss": 0.4548, "step": 35266 }, { "epoch": 0.9, "grad_norm": 1.2661709785461426, "learning_rate": 4.801406423243193e-07, "loss": 0.4657, "step": 35267 }, { "epoch": 0.9, "grad_norm": 5.199464321136475, "learning_rate": 4.798865337284942e-07, "loss": 0.565, "step": 35268 }, { "epoch": 0.9, "grad_norm": 1.441049575805664, "learning_rate": 4.796324907393113e-07, "loss": 0.5903, "step": 35269 }, { "epoch": 0.9, "grad_norm": 1.5340505838394165, "learning_rate": 4.793785133585205e-07, "loss": 0.5359, "step": 35270 }, { "epoch": 0.9, "grad_norm": 1.0607026815414429, "learning_rate": 4.791246015878703e-07, "loss": 0.4927, "step": 35271 }, { "epoch": 0.9, "grad_norm": 1.859773874282837, "learning_rate": 4.788707554291128e-07, "loss": 0.5207, "step": 35272 }, { "epoch": 0.9, "grad_norm": 1.3950191736221313, "learning_rate": 4.786169748839953e-07, "loss": 0.4348, "step": 35273 }, { "epoch": 0.9, "grad_norm": 1.307437539100647, "learning_rate": 4.783632599542687e-07, "loss": 0.5274, "step": 35274 }, { "epoch": 0.9, "grad_norm": 3.4836373329162598, "learning_rate": 4.781096106416794e-07, "loss": 0.5761, "step": 35275 }, { "epoch": 0.9, "grad_norm": 1.2777179479599, "learning_rate": 4.778560269479771e-07, "loss": 0.5218, "step": 35276 }, { "epoch": 0.9, "grad_norm": 1.1819301843643188, "learning_rate": 4.776025088749092e-07, "loss": 0.4118, "step": 35277 }, { "epoch": 0.9, "grad_norm": 3.069418430328369, "learning_rate": 4.7734905642422e-07, "loss": 0.5631, "step": 35278 }, { "epoch": 0.9, "grad_norm": 1.782597303390503, "learning_rate": 4.770956695976614e-07, "loss": 0.6352, "step": 35279 }, { "epoch": 0.9, "grad_norm": 1.2610692977905273, "learning_rate": 4.768423483969752e-07, "loss": 0.5082, "step": 35280 }, { "epoch": 0.9, "grad_norm": 1.5188437700271606, "learning_rate": 4.765890928239092e-07, "loss": 0.7376, "step": 35281 }, { "epoch": 0.9, "grad_norm": 1.5959217548370361, "learning_rate": 4.7633590288020615e-07, "loss": 0.3786, "step": 35282 }, { "epoch": 0.9, "grad_norm": 1.4359089136123657, "learning_rate": 4.7608277856761364e-07, "loss": 0.6529, "step": 35283 }, { "epoch": 0.9, "grad_norm": 1.5960979461669922, "learning_rate": 4.7582971988787585e-07, "loss": 0.627, "step": 35284 }, { "epoch": 0.9, "grad_norm": 1.4896022081375122, "learning_rate": 4.755767268427347e-07, "loss": 0.5096, "step": 35285 }, { "epoch": 0.9, "grad_norm": 2.379002094268799, "learning_rate": 4.7532379943393547e-07, "loss": 0.3977, "step": 35286 }, { "epoch": 0.9, "grad_norm": 1.9166138172149658, "learning_rate": 4.7507093766322123e-07, "loss": 0.5294, "step": 35287 }, { "epoch": 0.9, "grad_norm": 1.1711822748184204, "learning_rate": 4.748181415323327e-07, "loss": 0.4237, "step": 35288 }, { "epoch": 0.9, "grad_norm": 1.4427956342697144, "learning_rate": 4.745654110430142e-07, "loss": 0.3767, "step": 35289 }, { "epoch": 0.9, "grad_norm": 1.2891889810562134, "learning_rate": 4.743127461970065e-07, "loss": 0.3832, "step": 35290 }, { "epoch": 0.9, "grad_norm": 1.1998225450515747, "learning_rate": 4.740601469960504e-07, "loss": 0.4286, "step": 35291 }, { "epoch": 0.9, "grad_norm": 2.8673346042633057, "learning_rate": 4.7380761344188677e-07, "loss": 0.5783, "step": 35292 }, { "epoch": 0.9, "grad_norm": 1.498522400856018, "learning_rate": 4.735551455362564e-07, "loss": 0.3501, "step": 35293 }, { "epoch": 0.9, "grad_norm": 1.5843456983566284, "learning_rate": 4.73302743280899e-07, "loss": 0.3845, "step": 35294 }, { "epoch": 0.9, "grad_norm": 1.2912330627441406, "learning_rate": 4.730504066775532e-07, "loss": 0.5386, "step": 35295 }, { "epoch": 0.9, "grad_norm": 1.5229063034057617, "learning_rate": 4.7279813572795987e-07, "loss": 0.5273, "step": 35296 }, { "epoch": 0.9, "grad_norm": 1.6960598230361938, "learning_rate": 4.7254593043385534e-07, "loss": 0.5676, "step": 35297 }, { "epoch": 0.9, "grad_norm": 4.481015682220459, "learning_rate": 4.722937907969793e-07, "loss": 0.4947, "step": 35298 }, { "epoch": 0.9, "grad_norm": 1.115782380104065, "learning_rate": 4.7204171681906605e-07, "loss": 0.4151, "step": 35299 }, { "epoch": 0.9, "grad_norm": 0.9613991975784302, "learning_rate": 4.7178970850185746e-07, "loss": 0.5623, "step": 35300 }, { "epoch": 0.9, "grad_norm": 7.3051066398620605, "learning_rate": 4.715377658470888e-07, "loss": 0.4632, "step": 35301 }, { "epoch": 0.9, "grad_norm": 1.5124422311782837, "learning_rate": 4.712858888564931e-07, "loss": 0.5758, "step": 35302 }, { "epoch": 0.9, "grad_norm": 6.156271457672119, "learning_rate": 4.710340775318101e-07, "loss": 0.4048, "step": 35303 }, { "epoch": 0.9, "grad_norm": 14.897895812988281, "learning_rate": 4.707823318747728e-07, "loss": 0.4793, "step": 35304 }, { "epoch": 0.9, "grad_norm": 1.5796477794647217, "learning_rate": 4.7053065188711777e-07, "loss": 0.4781, "step": 35305 }, { "epoch": 0.9, "grad_norm": 2.02506422996521, "learning_rate": 4.702790375705768e-07, "loss": 0.6618, "step": 35306 }, { "epoch": 0.9, "grad_norm": 0.9212126731872559, "learning_rate": 4.7002748892688743e-07, "loss": 0.4045, "step": 35307 }, { "epoch": 0.9, "grad_norm": 1.8588483333587646, "learning_rate": 4.697760059577805e-07, "loss": 0.4662, "step": 35308 }, { "epoch": 0.9, "grad_norm": 1.3931432962417603, "learning_rate": 4.6952458866498905e-07, "loss": 0.5472, "step": 35309 }, { "epoch": 0.91, "grad_norm": 1.8057786226272583, "learning_rate": 4.692732370502484e-07, "loss": 0.4263, "step": 35310 }, { "epoch": 0.91, "grad_norm": 1.5349384546279907, "learning_rate": 4.6902195111528715e-07, "loss": 0.5745, "step": 35311 }, { "epoch": 0.91, "grad_norm": 0.8854836821556091, "learning_rate": 4.687707308618383e-07, "loss": 0.4319, "step": 35312 }, { "epoch": 0.91, "grad_norm": 1.2598259449005127, "learning_rate": 4.6851957629163504e-07, "loss": 0.4324, "step": 35313 }, { "epoch": 0.91, "grad_norm": 1.6494700908660889, "learning_rate": 4.6826848740640695e-07, "loss": 0.457, "step": 35314 }, { "epoch": 0.91, "grad_norm": 2.2170605659484863, "learning_rate": 4.6801746420788273e-07, "loss": 0.5838, "step": 35315 }, { "epoch": 0.91, "grad_norm": 2.5527384281158447, "learning_rate": 4.677665066977932e-07, "loss": 0.3441, "step": 35316 }, { "epoch": 0.91, "grad_norm": 6.1510329246521, "learning_rate": 4.6751561487786925e-07, "loss": 0.5348, "step": 35317 }, { "epoch": 0.91, "grad_norm": 2.618685245513916, "learning_rate": 4.672647887498394e-07, "loss": 0.5988, "step": 35318 }, { "epoch": 0.91, "grad_norm": 1.1886210441589355, "learning_rate": 4.67014028315429e-07, "loss": 0.5063, "step": 35319 }, { "epoch": 0.91, "grad_norm": 1.4983845949172974, "learning_rate": 4.6676333357637103e-07, "loss": 0.5184, "step": 35320 }, { "epoch": 0.91, "grad_norm": 1.9321285486221313, "learning_rate": 4.665127045343898e-07, "loss": 0.4412, "step": 35321 }, { "epoch": 0.91, "grad_norm": 1.5754327774047852, "learning_rate": 4.662621411912149e-07, "loss": 0.4007, "step": 35322 }, { "epoch": 0.91, "grad_norm": 1.8899610042572021, "learning_rate": 4.660116435485695e-07, "loss": 0.468, "step": 35323 }, { "epoch": 0.91, "grad_norm": 1.5860663652420044, "learning_rate": 4.657612116081833e-07, "loss": 0.5976, "step": 35324 }, { "epoch": 0.91, "grad_norm": 1.611566424369812, "learning_rate": 4.655108453717805e-07, "loss": 0.5125, "step": 35325 }, { "epoch": 0.91, "grad_norm": 1.0882741212844849, "learning_rate": 4.652605448410863e-07, "loss": 0.42, "step": 35326 }, { "epoch": 0.91, "grad_norm": 2.7330105304718018, "learning_rate": 4.6501031001782715e-07, "loss": 0.5156, "step": 35327 }, { "epoch": 0.91, "grad_norm": 1.342085838317871, "learning_rate": 4.647601409037261e-07, "loss": 0.4517, "step": 35328 }, { "epoch": 0.91, "grad_norm": 3.265726327896118, "learning_rate": 4.6451003750050626e-07, "loss": 0.5995, "step": 35329 }, { "epoch": 0.91, "grad_norm": 1.855244755744934, "learning_rate": 4.64259999809894e-07, "loss": 0.4999, "step": 35330 }, { "epoch": 0.91, "grad_norm": 1.5117261409759521, "learning_rate": 4.6401002783361126e-07, "loss": 0.3602, "step": 35331 }, { "epoch": 0.91, "grad_norm": 1.0086121559143066, "learning_rate": 4.6376012157338003e-07, "loss": 0.5477, "step": 35332 }, { "epoch": 0.91, "grad_norm": 1.261649250984192, "learning_rate": 4.635102810309211e-07, "loss": 0.4273, "step": 35333 }, { "epoch": 0.91, "grad_norm": 1.262047290802002, "learning_rate": 4.6326050620795873e-07, "loss": 0.4982, "step": 35334 }, { "epoch": 0.91, "grad_norm": 3.640200614929199, "learning_rate": 4.6301079710621475e-07, "loss": 0.5281, "step": 35335 }, { "epoch": 0.91, "grad_norm": 1.2656354904174805, "learning_rate": 4.627611537274068e-07, "loss": 0.6498, "step": 35336 }, { "epoch": 0.91, "grad_norm": 1.9372730255126953, "learning_rate": 4.625115760732579e-07, "loss": 0.5758, "step": 35337 }, { "epoch": 0.91, "grad_norm": 1.1151418685913086, "learning_rate": 4.622620641454878e-07, "loss": 0.456, "step": 35338 }, { "epoch": 0.91, "grad_norm": 2.875330686569214, "learning_rate": 4.620126179458151e-07, "loss": 0.3739, "step": 35339 }, { "epoch": 0.91, "grad_norm": 3.087629556655884, "learning_rate": 4.617632374759573e-07, "loss": 0.6022, "step": 35340 }, { "epoch": 0.91, "grad_norm": 2.45967698097229, "learning_rate": 4.615139227376375e-07, "loss": 0.5913, "step": 35341 }, { "epoch": 0.91, "grad_norm": 1.5398706197738647, "learning_rate": 4.612646737325699e-07, "loss": 0.5023, "step": 35342 }, { "epoch": 0.91, "grad_norm": 1.2333273887634277, "learning_rate": 4.61015490462472e-07, "loss": 0.4571, "step": 35343 }, { "epoch": 0.91, "grad_norm": 1.2731281518936157, "learning_rate": 4.607663729290646e-07, "loss": 0.5192, "step": 35344 }, { "epoch": 0.91, "grad_norm": 1.438319206237793, "learning_rate": 4.6051732113406077e-07, "loss": 0.54, "step": 35345 }, { "epoch": 0.91, "grad_norm": 1.1927409172058105, "learning_rate": 4.6026833507917924e-07, "loss": 0.4182, "step": 35346 }, { "epoch": 0.91, "grad_norm": 2.587918996810913, "learning_rate": 4.600194147661341e-07, "loss": 0.6516, "step": 35347 }, { "epoch": 0.91, "grad_norm": 1.0840752124786377, "learning_rate": 4.597705601966418e-07, "loss": 0.4824, "step": 35348 }, { "epoch": 0.91, "grad_norm": 5.930566310882568, "learning_rate": 4.595217713724176e-07, "loss": 0.5721, "step": 35349 }, { "epoch": 0.91, "grad_norm": 1.7696175575256348, "learning_rate": 4.592730482951735e-07, "loss": 0.4804, "step": 35350 }, { "epoch": 0.91, "grad_norm": 5.830190181732178, "learning_rate": 4.590243909666281e-07, "loss": 0.5561, "step": 35351 }, { "epoch": 0.91, "grad_norm": 1.5165820121765137, "learning_rate": 4.587757993884912e-07, "loss": 0.6221, "step": 35352 }, { "epoch": 0.91, "grad_norm": 1.3503825664520264, "learning_rate": 4.5852727356247575e-07, "loss": 0.4813, "step": 35353 }, { "epoch": 0.91, "grad_norm": 1.8743189573287964, "learning_rate": 4.582788134902982e-07, "loss": 0.5659, "step": 35354 }, { "epoch": 0.91, "grad_norm": 1.243299126625061, "learning_rate": 4.580304191736673e-07, "loss": 0.5291, "step": 35355 }, { "epoch": 0.91, "grad_norm": 1.166595458984375, "learning_rate": 4.57782090614296e-07, "loss": 0.4226, "step": 35356 }, { "epoch": 0.91, "grad_norm": 0.9681333303451538, "learning_rate": 4.575338278138952e-07, "loss": 0.6131, "step": 35357 }, { "epoch": 0.91, "grad_norm": 1.7085986137390137, "learning_rate": 4.5728563077417574e-07, "loss": 0.6278, "step": 35358 }, { "epoch": 0.91, "grad_norm": 1.762721300125122, "learning_rate": 4.570374994968496e-07, "loss": 0.5515, "step": 35359 }, { "epoch": 0.91, "grad_norm": 4.307423114776611, "learning_rate": 4.5678943398362427e-07, "loss": 0.708, "step": 35360 }, { "epoch": 0.91, "grad_norm": 1.1921005249023438, "learning_rate": 4.5654143423621176e-07, "loss": 0.625, "step": 35361 }, { "epoch": 0.91, "grad_norm": 2.176154613494873, "learning_rate": 4.562935002563196e-07, "loss": 0.3769, "step": 35362 }, { "epoch": 0.91, "grad_norm": 2.646127700805664, "learning_rate": 4.5604563204565634e-07, "loss": 0.6309, "step": 35363 }, { "epoch": 0.91, "grad_norm": 1.657588005065918, "learning_rate": 4.557978296059296e-07, "loss": 0.5991, "step": 35364 }, { "epoch": 0.91, "grad_norm": 9.35758113861084, "learning_rate": 4.55550092938849e-07, "loss": 0.5488, "step": 35365 }, { "epoch": 0.91, "grad_norm": 1.0721924304962158, "learning_rate": 4.5530242204612105e-07, "loss": 0.5124, "step": 35366 }, { "epoch": 0.91, "grad_norm": 1.4138281345367432, "learning_rate": 4.550548169294511e-07, "loss": 0.5298, "step": 35367 }, { "epoch": 0.91, "grad_norm": 1.564042568206787, "learning_rate": 4.5480727759054767e-07, "loss": 0.5043, "step": 35368 }, { "epoch": 0.91, "grad_norm": 1.3081774711608887, "learning_rate": 4.545598040311161e-07, "loss": 0.4478, "step": 35369 }, { "epoch": 0.91, "grad_norm": 1.6457867622375488, "learning_rate": 4.543123962528595e-07, "loss": 0.6098, "step": 35370 }, { "epoch": 0.91, "grad_norm": 1.6845213174819946, "learning_rate": 4.5406505425748536e-07, "loss": 0.6238, "step": 35371 }, { "epoch": 0.91, "grad_norm": 1.152181625366211, "learning_rate": 4.53817778046699e-07, "loss": 0.4183, "step": 35372 }, { "epoch": 0.91, "grad_norm": 1.5365978479385376, "learning_rate": 4.535705676222024e-07, "loss": 0.4998, "step": 35373 }, { "epoch": 0.91, "grad_norm": 0.9754654765129089, "learning_rate": 4.5332342298569755e-07, "loss": 0.4794, "step": 35374 }, { "epoch": 0.91, "grad_norm": 1.0751073360443115, "learning_rate": 4.530763441388919e-07, "loss": 0.3886, "step": 35375 }, { "epoch": 0.91, "grad_norm": 1.7915807962417603, "learning_rate": 4.528293310834864e-07, "loss": 0.5846, "step": 35376 }, { "epoch": 0.91, "grad_norm": 52.587642669677734, "learning_rate": 4.5258238382118183e-07, "loss": 0.6009, "step": 35377 }, { "epoch": 0.91, "grad_norm": 2.368051052093506, "learning_rate": 4.523355023536824e-07, "loss": 0.6593, "step": 35378 }, { "epoch": 0.91, "grad_norm": 0.8527941703796387, "learning_rate": 4.5208868668268793e-07, "loss": 0.4071, "step": 35379 }, { "epoch": 0.91, "grad_norm": 1.9313791990280151, "learning_rate": 4.5184193680989916e-07, "loss": 0.5904, "step": 35380 }, { "epoch": 0.91, "grad_norm": 1.3717809915542603, "learning_rate": 4.515952527370171e-07, "loss": 0.6284, "step": 35381 }, { "epoch": 0.91, "grad_norm": 0.9817925095558167, "learning_rate": 4.513486344657414e-07, "loss": 0.6102, "step": 35382 }, { "epoch": 0.91, "grad_norm": 1.327146291732788, "learning_rate": 4.5110208199777293e-07, "loss": 0.5587, "step": 35383 }, { "epoch": 0.91, "grad_norm": 2.220590353012085, "learning_rate": 4.5085559533480815e-07, "loss": 0.6381, "step": 35384 }, { "epoch": 0.91, "grad_norm": 1.7699246406555176, "learning_rate": 4.506091744785479e-07, "loss": 0.7116, "step": 35385 }, { "epoch": 0.91, "grad_norm": 1.2083394527435303, "learning_rate": 4.503628194306908e-07, "loss": 0.4821, "step": 35386 }, { "epoch": 0.91, "grad_norm": 1.3271691799163818, "learning_rate": 4.501165301929333e-07, "loss": 0.5553, "step": 35387 }, { "epoch": 0.91, "grad_norm": 0.998075544834137, "learning_rate": 4.4987030676697075e-07, "loss": 0.494, "step": 35388 }, { "epoch": 0.91, "grad_norm": 2.0474188327789307, "learning_rate": 4.4962414915450393e-07, "loss": 0.5888, "step": 35389 }, { "epoch": 0.91, "grad_norm": 1.0813260078430176, "learning_rate": 4.4937805735722705e-07, "loss": 0.5371, "step": 35390 }, { "epoch": 0.91, "grad_norm": 2.8152778148651123, "learning_rate": 4.491320313768355e-07, "loss": 0.5199, "step": 35391 }, { "epoch": 0.91, "grad_norm": 8.165822982788086, "learning_rate": 4.488860712150267e-07, "loss": 0.5217, "step": 35392 }, { "epoch": 0.91, "grad_norm": 1.3912919759750366, "learning_rate": 4.486401768734938e-07, "loss": 0.432, "step": 35393 }, { "epoch": 0.91, "grad_norm": 1.7100064754486084, "learning_rate": 4.4839434835393214e-07, "loss": 0.5349, "step": 35394 }, { "epoch": 0.91, "grad_norm": 1.9041321277618408, "learning_rate": 4.4814858565803586e-07, "loss": 0.5662, "step": 35395 }, { "epoch": 0.91, "grad_norm": 1.1755141019821167, "learning_rate": 4.479028887874992e-07, "loss": 0.4902, "step": 35396 }, { "epoch": 0.91, "grad_norm": 1.2768107652664185, "learning_rate": 4.4765725774401413e-07, "loss": 0.5057, "step": 35397 }, { "epoch": 0.91, "grad_norm": 2.3736605644226074, "learning_rate": 4.474116925292726e-07, "loss": 0.4675, "step": 35398 }, { "epoch": 0.91, "grad_norm": 1.457268476486206, "learning_rate": 4.471661931449689e-07, "loss": 0.4702, "step": 35399 }, { "epoch": 0.91, "grad_norm": 1.277985692024231, "learning_rate": 4.4692075959279493e-07, "loss": 0.4505, "step": 35400 }, { "epoch": 0.91, "grad_norm": 1.4101991653442383, "learning_rate": 4.4667539187444043e-07, "loss": 0.6258, "step": 35401 }, { "epoch": 0.91, "grad_norm": 2.0127310752868652, "learning_rate": 4.4643008999159745e-07, "loss": 0.444, "step": 35402 }, { "epoch": 0.91, "grad_norm": 1.2345550060272217, "learning_rate": 4.461848539459568e-07, "loss": 0.397, "step": 35403 }, { "epoch": 0.91, "grad_norm": 1.239537239074707, "learning_rate": 4.4593968373920713e-07, "loss": 0.5296, "step": 35404 }, { "epoch": 0.91, "grad_norm": 1.5689988136291504, "learning_rate": 4.456945793730372e-07, "loss": 0.6553, "step": 35405 }, { "epoch": 0.91, "grad_norm": 1.264517068862915, "learning_rate": 4.454495408491399e-07, "loss": 0.4766, "step": 35406 }, { "epoch": 0.91, "grad_norm": 1.5039931535720825, "learning_rate": 4.4520456816920075e-07, "loss": 0.5623, "step": 35407 }, { "epoch": 0.91, "grad_norm": 23.220849990844727, "learning_rate": 4.4495966133490833e-07, "loss": 0.3525, "step": 35408 }, { "epoch": 0.91, "grad_norm": 1.4484084844589233, "learning_rate": 4.447148203479523e-07, "loss": 0.5058, "step": 35409 }, { "epoch": 0.91, "grad_norm": 1.2111327648162842, "learning_rate": 4.4447004521001813e-07, "loss": 0.4078, "step": 35410 }, { "epoch": 0.91, "grad_norm": 1.4397823810577393, "learning_rate": 4.4422533592279106e-07, "loss": 0.5536, "step": 35411 }, { "epoch": 0.91, "grad_norm": 1.411226749420166, "learning_rate": 4.439806924879619e-07, "loss": 0.5717, "step": 35412 }, { "epoch": 0.91, "grad_norm": 1.7147107124328613, "learning_rate": 4.437361149072128e-07, "loss": 0.5486, "step": 35413 }, { "epoch": 0.91, "grad_norm": 10.949289321899414, "learning_rate": 4.4349160318223115e-07, "loss": 0.5398, "step": 35414 }, { "epoch": 0.91, "grad_norm": 1.7237964868545532, "learning_rate": 4.4324715731470123e-07, "loss": 0.501, "step": 35415 }, { "epoch": 0.91, "grad_norm": 1.4637898206710815, "learning_rate": 4.4300277730630723e-07, "loss": 0.4541, "step": 35416 }, { "epoch": 0.91, "grad_norm": 1.815152883529663, "learning_rate": 4.427584631587345e-07, "loss": 0.534, "step": 35417 }, { "epoch": 0.91, "grad_norm": 1.447047472000122, "learning_rate": 4.42514214873665e-07, "loss": 0.5217, "step": 35418 }, { "epoch": 0.91, "grad_norm": 1.238257646560669, "learning_rate": 4.42270032452784e-07, "loss": 0.4979, "step": 35419 }, { "epoch": 0.91, "grad_norm": 1.777400255203247, "learning_rate": 4.4202591589777356e-07, "loss": 0.6254, "step": 35420 }, { "epoch": 0.91, "grad_norm": 1.1183397769927979, "learning_rate": 4.4178186521031565e-07, "loss": 0.349, "step": 35421 }, { "epoch": 0.91, "grad_norm": 1.2210370302200317, "learning_rate": 4.4153788039209e-07, "loss": 0.5622, "step": 35422 }, { "epoch": 0.91, "grad_norm": 2.0533828735351562, "learning_rate": 4.41293961444782e-07, "loss": 0.5544, "step": 35423 }, { "epoch": 0.91, "grad_norm": 3.6570048332214355, "learning_rate": 4.4105010837007025e-07, "loss": 0.5384, "step": 35424 }, { "epoch": 0.91, "grad_norm": 1.9342707395553589, "learning_rate": 4.4080632116963453e-07, "loss": 0.4892, "step": 35425 }, { "epoch": 0.91, "grad_norm": 1.318128228187561, "learning_rate": 4.405625998451568e-07, "loss": 0.6049, "step": 35426 }, { "epoch": 0.91, "grad_norm": 2.3818702697753906, "learning_rate": 4.403189443983169e-07, "loss": 0.4801, "step": 35427 }, { "epoch": 0.91, "grad_norm": 1.141128420829773, "learning_rate": 4.4007535483079235e-07, "loss": 0.56, "step": 35428 }, { "epoch": 0.91, "grad_norm": 3.965111017227173, "learning_rate": 4.3983183114426063e-07, "loss": 0.4114, "step": 35429 }, { "epoch": 0.91, "grad_norm": 0.9897357821464539, "learning_rate": 4.395883733404027e-07, "loss": 0.4199, "step": 35430 }, { "epoch": 0.91, "grad_norm": 1.5084335803985596, "learning_rate": 4.393449814208961e-07, "loss": 0.589, "step": 35431 }, { "epoch": 0.91, "grad_norm": 2.048084259033203, "learning_rate": 4.391016553874161e-07, "loss": 0.4469, "step": 35432 }, { "epoch": 0.91, "grad_norm": 1.3086802959442139, "learning_rate": 4.388583952416414e-07, "loss": 0.3383, "step": 35433 }, { "epoch": 0.91, "grad_norm": 1.9453617334365845, "learning_rate": 4.3861520098524733e-07, "loss": 0.4414, "step": 35434 }, { "epoch": 0.91, "grad_norm": 1.5050849914550781, "learning_rate": 4.3837207261991035e-07, "loss": 0.4186, "step": 35435 }, { "epoch": 0.91, "grad_norm": 1.7856056690216064, "learning_rate": 4.381290101473057e-07, "loss": 0.5181, "step": 35436 }, { "epoch": 0.91, "grad_norm": 1.1093242168426514, "learning_rate": 4.3788601356910985e-07, "loss": 0.3675, "step": 35437 }, { "epoch": 0.91, "grad_norm": 1.968546986579895, "learning_rate": 4.3764308288699485e-07, "loss": 0.7095, "step": 35438 }, { "epoch": 0.91, "grad_norm": 1.788597583770752, "learning_rate": 4.37400218102636e-07, "loss": 0.4915, "step": 35439 }, { "epoch": 0.91, "grad_norm": 1.325062870979309, "learning_rate": 4.3715741921770746e-07, "loss": 0.5022, "step": 35440 }, { "epoch": 0.91, "grad_norm": 1.1764246225357056, "learning_rate": 4.3691468623388244e-07, "loss": 0.5641, "step": 35441 }, { "epoch": 0.91, "grad_norm": 1.6587167978286743, "learning_rate": 4.3667201915283177e-07, "loss": 0.6326, "step": 35442 }, { "epoch": 0.91, "grad_norm": 16.00531005859375, "learning_rate": 4.364294179762296e-07, "loss": 0.6937, "step": 35443 }, { "epoch": 0.91, "grad_norm": 1.1223139762878418, "learning_rate": 4.361868827057492e-07, "loss": 0.4959, "step": 35444 }, { "epoch": 0.91, "grad_norm": 3.3532984256744385, "learning_rate": 4.3594441334305903e-07, "loss": 0.5422, "step": 35445 }, { "epoch": 0.91, "grad_norm": 1.6803562641143799, "learning_rate": 4.3570200988983013e-07, "loss": 0.485, "step": 35446 }, { "epoch": 0.91, "grad_norm": 1.3479961156845093, "learning_rate": 4.3545967234773554e-07, "loss": 0.6178, "step": 35447 }, { "epoch": 0.91, "grad_norm": 0.9314749240875244, "learning_rate": 4.3521740071844397e-07, "loss": 0.3915, "step": 35448 }, { "epoch": 0.91, "grad_norm": 2.012086868286133, "learning_rate": 4.3497519500362407e-07, "loss": 0.5606, "step": 35449 }, { "epoch": 0.91, "grad_norm": 1.656891942024231, "learning_rate": 4.347330552049467e-07, "loss": 0.6354, "step": 35450 }, { "epoch": 0.91, "grad_norm": 1.604248046875, "learning_rate": 4.344909813240794e-07, "loss": 0.535, "step": 35451 }, { "epoch": 0.91, "grad_norm": 0.9760255217552185, "learning_rate": 4.3424897336269087e-07, "loss": 0.4644, "step": 35452 }, { "epoch": 0.91, "grad_norm": 1.2192062139511108, "learning_rate": 4.340070313224465e-07, "loss": 0.5371, "step": 35453 }, { "epoch": 0.91, "grad_norm": 1.4065630435943604, "learning_rate": 4.337651552050182e-07, "loss": 0.3922, "step": 35454 }, { "epoch": 0.91, "grad_norm": 1.487009882926941, "learning_rate": 4.3352334501207016e-07, "loss": 0.5968, "step": 35455 }, { "epoch": 0.91, "grad_norm": 1.1880840063095093, "learning_rate": 4.332816007452667e-07, "loss": 0.5544, "step": 35456 }, { "epoch": 0.91, "grad_norm": 1.485710620880127, "learning_rate": 4.3303992240627867e-07, "loss": 0.4236, "step": 35457 }, { "epoch": 0.91, "grad_norm": 1.1227043867111206, "learning_rate": 4.3279830999676806e-07, "loss": 0.5213, "step": 35458 }, { "epoch": 0.91, "grad_norm": 1.1731127500534058, "learning_rate": 4.3255676351840027e-07, "loss": 0.3782, "step": 35459 }, { "epoch": 0.91, "grad_norm": 4.027245044708252, "learning_rate": 4.323152829728405e-07, "loss": 0.4436, "step": 35460 }, { "epoch": 0.91, "grad_norm": 1.098923921585083, "learning_rate": 4.3207386836175314e-07, "loss": 0.4068, "step": 35461 }, { "epoch": 0.91, "grad_norm": 1.2209140062332153, "learning_rate": 4.3183251968680227e-07, "loss": 0.5795, "step": 35462 }, { "epoch": 0.91, "grad_norm": 1.2740428447723389, "learning_rate": 4.3159123694964887e-07, "loss": 0.5625, "step": 35463 }, { "epoch": 0.91, "grad_norm": 2.2536096572875977, "learning_rate": 4.3135002015195827e-07, "loss": 0.643, "step": 35464 }, { "epoch": 0.91, "grad_norm": 1.4888980388641357, "learning_rate": 4.3110886929539133e-07, "loss": 0.3793, "step": 35465 }, { "epoch": 0.91, "grad_norm": 1.690164566040039, "learning_rate": 4.3086778438161005e-07, "loss": 0.5093, "step": 35466 }, { "epoch": 0.91, "grad_norm": 1.5060006380081177, "learning_rate": 4.3062676541227646e-07, "loss": 0.519, "step": 35467 }, { "epoch": 0.91, "grad_norm": 1.3504085540771484, "learning_rate": 4.303858123890514e-07, "loss": 0.5091, "step": 35468 }, { "epoch": 0.91, "grad_norm": 1.2419359683990479, "learning_rate": 4.3014492531359477e-07, "loss": 0.4168, "step": 35469 }, { "epoch": 0.91, "grad_norm": 1.4808231592178345, "learning_rate": 4.2990410418756625e-07, "loss": 0.4823, "step": 35470 }, { "epoch": 0.91, "grad_norm": 1.7831916809082031, "learning_rate": 4.2966334901262674e-07, "loss": 0.5098, "step": 35471 }, { "epoch": 0.91, "grad_norm": 1.7238928079605103, "learning_rate": 4.2942265979043495e-07, "loss": 0.3826, "step": 35472 }, { "epoch": 0.91, "grad_norm": 6.713595390319824, "learning_rate": 4.2918203652264845e-07, "loss": 0.4112, "step": 35473 }, { "epoch": 0.91, "grad_norm": 7.029852390289307, "learning_rate": 4.289414792109281e-07, "loss": 0.4947, "step": 35474 }, { "epoch": 0.91, "grad_norm": 1.80805504322052, "learning_rate": 4.2870098785692926e-07, "loss": 0.5355, "step": 35475 }, { "epoch": 0.91, "grad_norm": 2.7175662517547607, "learning_rate": 4.284605624623084e-07, "loss": 0.2818, "step": 35476 }, { "epoch": 0.91, "grad_norm": 1.5741829872131348, "learning_rate": 4.2822020302872527e-07, "loss": 0.5873, "step": 35477 }, { "epoch": 0.91, "grad_norm": 1.0811702013015747, "learning_rate": 4.279799095578352e-07, "loss": 0.4255, "step": 35478 }, { "epoch": 0.91, "grad_norm": 1.6628718376159668, "learning_rate": 4.2773968205129356e-07, "loss": 0.57, "step": 35479 }, { "epoch": 0.91, "grad_norm": 2.2389254570007324, "learning_rate": 4.274995205107557e-07, "loss": 0.476, "step": 35480 }, { "epoch": 0.91, "grad_norm": 1.8005367517471313, "learning_rate": 4.272594249378781e-07, "loss": 0.4984, "step": 35481 }, { "epoch": 0.91, "grad_norm": 5.411256790161133, "learning_rate": 4.270193953343138e-07, "loss": 0.657, "step": 35482 }, { "epoch": 0.91, "grad_norm": 1.6606438159942627, "learning_rate": 4.26779431701716e-07, "loss": 0.4772, "step": 35483 }, { "epoch": 0.91, "grad_norm": 1.8003897666931152, "learning_rate": 4.265395340417422e-07, "loss": 0.5081, "step": 35484 }, { "epoch": 0.91, "grad_norm": 4.641985893249512, "learning_rate": 4.2629970235604223e-07, "loss": 0.657, "step": 35485 }, { "epoch": 0.91, "grad_norm": 8.768415451049805, "learning_rate": 4.2605993664627033e-07, "loss": 0.7492, "step": 35486 }, { "epoch": 0.91, "grad_norm": 1.6927762031555176, "learning_rate": 4.2582023691407735e-07, "loss": 0.3644, "step": 35487 }, { "epoch": 0.91, "grad_norm": 2.884227752685547, "learning_rate": 4.255806031611176e-07, "loss": 0.8193, "step": 35488 }, { "epoch": 0.91, "grad_norm": 1.5209143161773682, "learning_rate": 4.253410353890408e-07, "loss": 0.5884, "step": 35489 }, { "epoch": 0.91, "grad_norm": 2.875117301940918, "learning_rate": 4.251015335994968e-07, "loss": 0.6339, "step": 35490 }, { "epoch": 0.91, "grad_norm": 1.2664783000946045, "learning_rate": 4.248620977941387e-07, "loss": 0.3158, "step": 35491 }, { "epoch": 0.91, "grad_norm": 10.529684066772461, "learning_rate": 4.246227279746162e-07, "loss": 0.5162, "step": 35492 }, { "epoch": 0.91, "grad_norm": 1.4012999534606934, "learning_rate": 4.2438342414257706e-07, "loss": 0.542, "step": 35493 }, { "epoch": 0.91, "grad_norm": 1.3712297677993774, "learning_rate": 4.241441862996709e-07, "loss": 0.525, "step": 35494 }, { "epoch": 0.91, "grad_norm": 1.235736608505249, "learning_rate": 4.239050144475476e-07, "loss": 0.5122, "step": 35495 }, { "epoch": 0.91, "grad_norm": 1.1892344951629639, "learning_rate": 4.2366590858785475e-07, "loss": 0.4763, "step": 35496 }, { "epoch": 0.91, "grad_norm": 13.072381973266602, "learning_rate": 4.234268687222387e-07, "loss": 0.6305, "step": 35497 }, { "epoch": 0.91, "grad_norm": 1.1340079307556152, "learning_rate": 4.2318789485235046e-07, "loss": 0.4692, "step": 35498 }, { "epoch": 0.91, "grad_norm": 1.517676591873169, "learning_rate": 4.229489869798331e-07, "loss": 0.5198, "step": 35499 }, { "epoch": 0.91, "grad_norm": 1.2757768630981445, "learning_rate": 4.2271014510633313e-07, "loss": 0.5342, "step": 35500 }, { "epoch": 0.91, "grad_norm": 1.2790026664733887, "learning_rate": 4.224713692335003e-07, "loss": 0.5227, "step": 35501 }, { "epoch": 0.91, "grad_norm": 2.716327428817749, "learning_rate": 4.2223265936297665e-07, "loss": 0.5094, "step": 35502 }, { "epoch": 0.91, "grad_norm": 3.1851539611816406, "learning_rate": 4.219940154964064e-07, "loss": 0.6974, "step": 35503 }, { "epoch": 0.91, "grad_norm": 1.6524617671966553, "learning_rate": 4.2175543763543824e-07, "loss": 0.5785, "step": 35504 }, { "epoch": 0.91, "grad_norm": 1.445212483406067, "learning_rate": 4.215169257817131e-07, "loss": 0.4235, "step": 35505 }, { "epoch": 0.91, "grad_norm": 1.41579270362854, "learning_rate": 4.212784799368741e-07, "loss": 0.5133, "step": 35506 }, { "epoch": 0.91, "grad_norm": 8.542665481567383, "learning_rate": 4.210401001025677e-07, "loss": 0.557, "step": 35507 }, { "epoch": 0.91, "grad_norm": 1.2399266958236694, "learning_rate": 4.208017862804348e-07, "loss": 0.4897, "step": 35508 }, { "epoch": 0.91, "grad_norm": 1.7624129056930542, "learning_rate": 4.205635384721185e-07, "loss": 0.5004, "step": 35509 }, { "epoch": 0.91, "grad_norm": 1.0967111587524414, "learning_rate": 4.203253566792576e-07, "loss": 0.5087, "step": 35510 }, { "epoch": 0.91, "grad_norm": 1.96018648147583, "learning_rate": 4.2008724090349727e-07, "loss": 0.5666, "step": 35511 }, { "epoch": 0.91, "grad_norm": 5.584095001220703, "learning_rate": 4.1984919114647637e-07, "loss": 0.8347, "step": 35512 }, { "epoch": 0.91, "grad_norm": 5.576531887054443, "learning_rate": 4.1961120740983575e-07, "loss": 0.401, "step": 35513 }, { "epoch": 0.91, "grad_norm": 1.2521754503250122, "learning_rate": 4.193732896952163e-07, "loss": 0.4851, "step": 35514 }, { "epoch": 0.91, "grad_norm": 1.4657129049301147, "learning_rate": 4.191354380042578e-07, "loss": 0.5053, "step": 35515 }, { "epoch": 0.91, "grad_norm": 1.595662236213684, "learning_rate": 4.188976523385979e-07, "loss": 0.6078, "step": 35516 }, { "epoch": 0.91, "grad_norm": 4.2539448738098145, "learning_rate": 4.1865993269987525e-07, "loss": 0.4652, "step": 35517 }, { "epoch": 0.91, "grad_norm": 1.270187497138977, "learning_rate": 4.1842227908972966e-07, "loss": 0.3589, "step": 35518 }, { "epoch": 0.91, "grad_norm": 7.679783344268799, "learning_rate": 4.1818469150979755e-07, "loss": 0.4863, "step": 35519 }, { "epoch": 0.91, "grad_norm": 1.2333952188491821, "learning_rate": 4.1794716996171545e-07, "loss": 0.4356, "step": 35520 }, { "epoch": 0.91, "grad_norm": 2.4689691066741943, "learning_rate": 4.1770971444712314e-07, "loss": 0.3655, "step": 35521 }, { "epoch": 0.91, "grad_norm": 2.1093509197235107, "learning_rate": 4.17472324967656e-07, "loss": 0.6304, "step": 35522 }, { "epoch": 0.91, "grad_norm": 1.0750348567962646, "learning_rate": 4.172350015249482e-07, "loss": 0.4452, "step": 35523 }, { "epoch": 0.91, "grad_norm": 1.2933269739151, "learning_rate": 4.169977441206352e-07, "loss": 0.4278, "step": 35524 }, { "epoch": 0.91, "grad_norm": 2.167823553085327, "learning_rate": 4.167605527563545e-07, "loss": 0.5588, "step": 35525 }, { "epoch": 0.91, "grad_norm": 1.3908244371414185, "learning_rate": 4.165234274337393e-07, "loss": 0.4635, "step": 35526 }, { "epoch": 0.91, "grad_norm": 1.464909315109253, "learning_rate": 4.1628636815442267e-07, "loss": 0.4314, "step": 35527 }, { "epoch": 0.91, "grad_norm": 2.297292947769165, "learning_rate": 4.1604937492004007e-07, "loss": 0.5741, "step": 35528 }, { "epoch": 0.91, "grad_norm": 1.9744305610656738, "learning_rate": 4.158124477322234e-07, "loss": 0.5557, "step": 35529 }, { "epoch": 0.91, "grad_norm": 1.5133600234985352, "learning_rate": 4.155755865926059e-07, "loss": 0.467, "step": 35530 }, { "epoch": 0.91, "grad_norm": 1.6682950258255005, "learning_rate": 4.1533879150282064e-07, "loss": 0.5323, "step": 35531 }, { "epoch": 0.91, "grad_norm": 2.2954506874084473, "learning_rate": 4.151020624644986e-07, "loss": 0.4534, "step": 35532 }, { "epoch": 0.91, "grad_norm": 0.8748831748962402, "learning_rate": 4.148653994792717e-07, "loss": 0.3817, "step": 35533 }, { "epoch": 0.91, "grad_norm": 1.1823431253433228, "learning_rate": 4.1462880254876993e-07, "loss": 0.2866, "step": 35534 }, { "epoch": 0.91, "grad_norm": 1.1525521278381348, "learning_rate": 4.14392271674624e-07, "loss": 0.5323, "step": 35535 }, { "epoch": 0.91, "grad_norm": 2.590958833694458, "learning_rate": 4.14155806858465e-07, "loss": 0.4801, "step": 35536 }, { "epoch": 0.91, "grad_norm": 1.0874698162078857, "learning_rate": 4.1391940810192153e-07, "loss": 0.4753, "step": 35537 }, { "epoch": 0.91, "grad_norm": 3.2901952266693115, "learning_rate": 4.136830754066223e-07, "loss": 0.3939, "step": 35538 }, { "epoch": 0.91, "grad_norm": 2.5463364124298096, "learning_rate": 4.134468087741983e-07, "loss": 0.4532, "step": 35539 }, { "epoch": 0.91, "grad_norm": 0.9710592031478882, "learning_rate": 4.1321060820627477e-07, "loss": 0.4276, "step": 35540 }, { "epoch": 0.91, "grad_norm": 1.0154905319213867, "learning_rate": 4.1297447370448053e-07, "loss": 0.4397, "step": 35541 }, { "epoch": 0.91, "grad_norm": 1.1039972305297852, "learning_rate": 4.127384052704431e-07, "loss": 0.3806, "step": 35542 }, { "epoch": 0.91, "grad_norm": 2.2219440937042236, "learning_rate": 4.1250240290579003e-07, "loss": 0.4686, "step": 35543 }, { "epoch": 0.91, "grad_norm": 1.4968101978302002, "learning_rate": 4.1226646661214565e-07, "loss": 0.5024, "step": 35544 }, { "epoch": 0.91, "grad_norm": 2.211425542831421, "learning_rate": 4.1203059639113865e-07, "loss": 0.5757, "step": 35545 }, { "epoch": 0.91, "grad_norm": 1.9135184288024902, "learning_rate": 4.117947922443921e-07, "loss": 0.559, "step": 35546 }, { "epoch": 0.91, "grad_norm": 1.9338692426681519, "learning_rate": 4.115590541735326e-07, "loss": 0.5176, "step": 35547 }, { "epoch": 0.91, "grad_norm": 1.1395447254180908, "learning_rate": 4.1132338218018317e-07, "loss": 0.5808, "step": 35548 }, { "epoch": 0.91, "grad_norm": 1.9355403184890747, "learning_rate": 4.110877762659693e-07, "loss": 0.4773, "step": 35549 }, { "epoch": 0.91, "grad_norm": 1.2657920122146606, "learning_rate": 4.1085223643251515e-07, "loss": 0.5219, "step": 35550 }, { "epoch": 0.91, "grad_norm": 1.0971741676330566, "learning_rate": 4.1061676268144056e-07, "loss": 0.3695, "step": 35551 }, { "epoch": 0.91, "grad_norm": 1.9223344326019287, "learning_rate": 4.10381355014372e-07, "loss": 0.4762, "step": 35552 }, { "epoch": 0.91, "grad_norm": 1.8893195390701294, "learning_rate": 4.101460134329305e-07, "loss": 0.4914, "step": 35553 }, { "epoch": 0.91, "grad_norm": 1.5133024454116821, "learning_rate": 4.0991073793873683e-07, "loss": 0.6191, "step": 35554 }, { "epoch": 0.91, "grad_norm": 1.4164782762527466, "learning_rate": 4.096755285334142e-07, "loss": 0.6943, "step": 35555 }, { "epoch": 0.91, "grad_norm": 5.843821048736572, "learning_rate": 4.094403852185824e-07, "loss": 0.5253, "step": 35556 }, { "epoch": 0.91, "grad_norm": 1.133435606956482, "learning_rate": 4.092053079958624e-07, "loss": 0.5878, "step": 35557 }, { "epoch": 0.91, "grad_norm": 0.8900576233863831, "learning_rate": 4.0897029686687295e-07, "loss": 0.5099, "step": 35558 }, { "epoch": 0.91, "grad_norm": 1.9421788454055786, "learning_rate": 4.0873535183323596e-07, "loss": 0.5853, "step": 35559 }, { "epoch": 0.91, "grad_norm": 1.0209630727767944, "learning_rate": 4.08500472896568e-07, "loss": 0.4986, "step": 35560 }, { "epoch": 0.91, "grad_norm": 9.618529319763184, "learning_rate": 4.0826566005848886e-07, "loss": 0.661, "step": 35561 }, { "epoch": 0.91, "grad_norm": 1.6102784872055054, "learning_rate": 4.080309133206173e-07, "loss": 0.5707, "step": 35562 }, { "epoch": 0.91, "grad_norm": 1.1464577913284302, "learning_rate": 4.077962326845697e-07, "loss": 0.5563, "step": 35563 }, { "epoch": 0.91, "grad_norm": 1.285064935684204, "learning_rate": 4.0756161815196483e-07, "loss": 0.3373, "step": 35564 }, { "epoch": 0.91, "grad_norm": 1.165844440460205, "learning_rate": 4.073270697244169e-07, "loss": 0.3522, "step": 35565 }, { "epoch": 0.91, "grad_norm": 1.608810544013977, "learning_rate": 4.0709258740354586e-07, "loss": 0.4539, "step": 35566 }, { "epoch": 0.91, "grad_norm": 1.69684898853302, "learning_rate": 4.068581711909658e-07, "loss": 0.5012, "step": 35567 }, { "epoch": 0.91, "grad_norm": 0.9431702494621277, "learning_rate": 4.0662382108829113e-07, "loss": 0.4146, "step": 35568 }, { "epoch": 0.91, "grad_norm": 1.2236355543136597, "learning_rate": 4.0638953709713937e-07, "loss": 0.6002, "step": 35569 }, { "epoch": 0.91, "grad_norm": 2.1289098262786865, "learning_rate": 4.0615531921912255e-07, "loss": 0.6031, "step": 35570 }, { "epoch": 0.91, "grad_norm": 1.5517656803131104, "learning_rate": 4.0592116745585606e-07, "loss": 0.5885, "step": 35571 }, { "epoch": 0.91, "grad_norm": 2.5545918941497803, "learning_rate": 4.056870818089531e-07, "loss": 0.5511, "step": 35572 }, { "epoch": 0.91, "grad_norm": 1.7326382398605347, "learning_rate": 4.054530622800279e-07, "loss": 0.5777, "step": 35573 }, { "epoch": 0.91, "grad_norm": 1.4130187034606934, "learning_rate": 4.052191088706925e-07, "loss": 0.6814, "step": 35574 }, { "epoch": 0.91, "grad_norm": 1.4533547163009644, "learning_rate": 4.049852215825567e-07, "loss": 0.5553, "step": 35575 }, { "epoch": 0.91, "grad_norm": 1.3986729383468628, "learning_rate": 4.0475140041723707e-07, "loss": 0.4848, "step": 35576 }, { "epoch": 0.91, "grad_norm": 2.3004047870635986, "learning_rate": 4.045176453763411e-07, "loss": 0.6769, "step": 35577 }, { "epoch": 0.91, "grad_norm": 1.2179518938064575, "learning_rate": 4.042839564614809e-07, "loss": 0.5267, "step": 35578 }, { "epoch": 0.91, "grad_norm": 1.5181065797805786, "learning_rate": 4.0405033367426847e-07, "loss": 0.4624, "step": 35579 }, { "epoch": 0.91, "grad_norm": 1.5179367065429688, "learning_rate": 4.038167770163115e-07, "loss": 0.5706, "step": 35580 }, { "epoch": 0.91, "grad_norm": 10.376179695129395, "learning_rate": 4.0358328648922085e-07, "loss": 0.7843, "step": 35581 }, { "epoch": 0.91, "grad_norm": 1.8254258632659912, "learning_rate": 4.0334986209460415e-07, "loss": 0.4864, "step": 35582 }, { "epoch": 0.91, "grad_norm": 2.743328332901001, "learning_rate": 4.031165038340712e-07, "loss": 0.5801, "step": 35583 }, { "epoch": 0.91, "grad_norm": 6.815133094787598, "learning_rate": 4.028832117092307e-07, "loss": 0.6039, "step": 35584 }, { "epoch": 0.91, "grad_norm": 1.2764734029769897, "learning_rate": 4.0264998572168813e-07, "loss": 0.4088, "step": 35585 }, { "epoch": 0.91, "grad_norm": 1.2033979892730713, "learning_rate": 4.024168258730543e-07, "loss": 0.3494, "step": 35586 }, { "epoch": 0.91, "grad_norm": 2.3386902809143066, "learning_rate": 4.0218373216493246e-07, "loss": 0.5169, "step": 35587 }, { "epoch": 0.91, "grad_norm": 1.8752390146255493, "learning_rate": 4.0195070459893126e-07, "loss": 0.6078, "step": 35588 }, { "epoch": 0.91, "grad_norm": 1.2390930652618408, "learning_rate": 4.01717743176655e-07, "loss": 0.5581, "step": 35589 }, { "epoch": 0.91, "grad_norm": 1.3003337383270264, "learning_rate": 4.014848478997102e-07, "loss": 0.4631, "step": 35590 }, { "epoch": 0.91, "grad_norm": 1.3251235485076904, "learning_rate": 4.012520187697011e-07, "loss": 0.3914, "step": 35591 }, { "epoch": 0.91, "grad_norm": 1.7143203020095825, "learning_rate": 4.01019255788232e-07, "loss": 0.594, "step": 35592 }, { "epoch": 0.91, "grad_norm": 1.0205581188201904, "learning_rate": 4.007865589569093e-07, "loss": 0.3847, "step": 35593 }, { "epoch": 0.91, "grad_norm": 1.458785057067871, "learning_rate": 4.005539282773341e-07, "loss": 0.4828, "step": 35594 }, { "epoch": 0.91, "grad_norm": 3.7660932540893555, "learning_rate": 4.003213637511083e-07, "loss": 0.4799, "step": 35595 }, { "epoch": 0.91, "grad_norm": 1.2397428750991821, "learning_rate": 4.000888653798396e-07, "loss": 0.414, "step": 35596 }, { "epoch": 0.91, "grad_norm": 1.1134949922561646, "learning_rate": 3.998564331651256e-07, "loss": 0.5141, "step": 35597 }, { "epoch": 0.91, "grad_norm": 1.4334498643875122, "learning_rate": 3.9962406710857047e-07, "loss": 0.5078, "step": 35598 }, { "epoch": 0.91, "grad_norm": 1.0181665420532227, "learning_rate": 3.993917672117731e-07, "loss": 0.4786, "step": 35599 }, { "epoch": 0.91, "grad_norm": 2.5132479667663574, "learning_rate": 3.9915953347633764e-07, "loss": 0.4927, "step": 35600 }, { "epoch": 0.91, "grad_norm": 0.8276414275169373, "learning_rate": 3.9892736590386283e-07, "loss": 0.3555, "step": 35601 }, { "epoch": 0.91, "grad_norm": 1.4414117336273193, "learning_rate": 3.9869526449594633e-07, "loss": 0.4691, "step": 35602 }, { "epoch": 0.91, "grad_norm": 1.5862187147140503, "learning_rate": 3.9846322925419236e-07, "loss": 0.4848, "step": 35603 }, { "epoch": 0.91, "grad_norm": 1.0147795677185059, "learning_rate": 3.982312601801974e-07, "loss": 0.4924, "step": 35604 }, { "epoch": 0.91, "grad_norm": 1.567347526550293, "learning_rate": 3.979993572755592e-07, "loss": 0.5144, "step": 35605 }, { "epoch": 0.91, "grad_norm": 1.0214734077453613, "learning_rate": 3.977675205418763e-07, "loss": 0.5047, "step": 35606 }, { "epoch": 0.91, "grad_norm": 1.305410623550415, "learning_rate": 3.975357499807486e-07, "loss": 0.4305, "step": 35607 }, { "epoch": 0.91, "grad_norm": 0.9687178134918213, "learning_rate": 3.973040455937716e-07, "loss": 0.5994, "step": 35608 }, { "epoch": 0.91, "grad_norm": 1.6200413703918457, "learning_rate": 3.970724073825405e-07, "loss": 0.4375, "step": 35609 }, { "epoch": 0.91, "grad_norm": 1.1911934614181519, "learning_rate": 3.968408353486541e-07, "loss": 0.5126, "step": 35610 }, { "epoch": 0.91, "grad_norm": 1.559564471244812, "learning_rate": 3.966093294937079e-07, "loss": 0.5267, "step": 35611 }, { "epoch": 0.91, "grad_norm": 1.7049695253372192, "learning_rate": 3.963778898192949e-07, "loss": 0.4996, "step": 35612 }, { "epoch": 0.91, "grad_norm": 6.550114631652832, "learning_rate": 3.9614651632701393e-07, "loss": 0.4499, "step": 35613 }, { "epoch": 0.91, "grad_norm": 1.7385358810424805, "learning_rate": 3.9591520901845706e-07, "loss": 0.5013, "step": 35614 }, { "epoch": 0.91, "grad_norm": 1.385791301727295, "learning_rate": 3.956839678952196e-07, "loss": 0.5728, "step": 35615 }, { "epoch": 0.91, "grad_norm": 1.484504222869873, "learning_rate": 3.954527929588914e-07, "loss": 0.4439, "step": 35616 }, { "epoch": 0.91, "grad_norm": 1.365311622619629, "learning_rate": 3.952216842110701e-07, "loss": 0.4253, "step": 35617 }, { "epoch": 0.91, "grad_norm": 1.5441454648971558, "learning_rate": 3.9499064165334776e-07, "loss": 0.5032, "step": 35618 }, { "epoch": 0.91, "grad_norm": 1.5783073902130127, "learning_rate": 3.947596652873131e-07, "loss": 0.4642, "step": 35619 }, { "epoch": 0.91, "grad_norm": 1.6867517232894897, "learning_rate": 3.945287551145616e-07, "loss": 0.5467, "step": 35620 }, { "epoch": 0.91, "grad_norm": 1.458518385887146, "learning_rate": 3.942979111366829e-07, "loss": 0.4882, "step": 35621 }, { "epoch": 0.91, "grad_norm": 1.107286810874939, "learning_rate": 3.9406713335526924e-07, "loss": 0.5127, "step": 35622 }, { "epoch": 0.91, "grad_norm": 1.3795489072799683, "learning_rate": 3.938364217719082e-07, "loss": 0.5591, "step": 35623 }, { "epoch": 0.91, "grad_norm": 1.3585001230239868, "learning_rate": 3.936057763881918e-07, "loss": 0.4851, "step": 35624 }, { "epoch": 0.91, "grad_norm": 1.3511453866958618, "learning_rate": 3.933751972057098e-07, "loss": 0.5456, "step": 35625 }, { "epoch": 0.91, "grad_norm": 2.07843017578125, "learning_rate": 3.931446842260489e-07, "loss": 0.5728, "step": 35626 }, { "epoch": 0.91, "grad_norm": 1.6688860654830933, "learning_rate": 3.9291423745079994e-07, "loss": 0.5276, "step": 35627 }, { "epoch": 0.91, "grad_norm": 1.1880645751953125, "learning_rate": 3.9268385688155166e-07, "loss": 0.4573, "step": 35628 }, { "epoch": 0.91, "grad_norm": 1.031622290611267, "learning_rate": 3.9245354251988943e-07, "loss": 0.5134, "step": 35629 }, { "epoch": 0.91, "grad_norm": 1.3355129957199097, "learning_rate": 3.922232943673998e-07, "loss": 0.4463, "step": 35630 }, { "epoch": 0.91, "grad_norm": 3.174628496170044, "learning_rate": 3.919931124256726e-07, "loss": 0.6317, "step": 35631 }, { "epoch": 0.91, "grad_norm": 1.7161717414855957, "learning_rate": 3.9176299669629324e-07, "loss": 0.4192, "step": 35632 }, { "epoch": 0.91, "grad_norm": 1.7382965087890625, "learning_rate": 3.915329471808449e-07, "loss": 0.3811, "step": 35633 }, { "epoch": 0.91, "grad_norm": 1.597654938697815, "learning_rate": 3.9130296388091624e-07, "loss": 0.4067, "step": 35634 }, { "epoch": 0.91, "grad_norm": 1.3163025379180908, "learning_rate": 3.9107304679809054e-07, "loss": 0.5721, "step": 35635 }, { "epoch": 0.91, "grad_norm": 3.7920501232147217, "learning_rate": 3.9084319593395094e-07, "loss": 0.5987, "step": 35636 }, { "epoch": 0.91, "grad_norm": 2.681957721710205, "learning_rate": 3.9061341129008503e-07, "loss": 0.5829, "step": 35637 }, { "epoch": 0.91, "grad_norm": 2.311383008956909, "learning_rate": 3.9038369286807375e-07, "loss": 0.5763, "step": 35638 }, { "epoch": 0.91, "grad_norm": 2.7958803176879883, "learning_rate": 3.901540406695004e-07, "loss": 0.5591, "step": 35639 }, { "epoch": 0.91, "grad_norm": 1.2081586122512817, "learning_rate": 3.89924454695948e-07, "loss": 0.5101, "step": 35640 }, { "epoch": 0.91, "grad_norm": 1.3595380783081055, "learning_rate": 3.8969493494899866e-07, "loss": 0.4391, "step": 35641 }, { "epoch": 0.91, "grad_norm": 1.3216010332107544, "learning_rate": 3.8946548143023453e-07, "loss": 0.5433, "step": 35642 }, { "epoch": 0.91, "grad_norm": 1.6826329231262207, "learning_rate": 3.892360941412343e-07, "loss": 0.4094, "step": 35643 }, { "epoch": 0.91, "grad_norm": 3.377689838409424, "learning_rate": 3.8900677308358337e-07, "loss": 0.3727, "step": 35644 }, { "epoch": 0.91, "grad_norm": 1.2672566175460815, "learning_rate": 3.8877751825885826e-07, "loss": 0.4441, "step": 35645 }, { "epoch": 0.91, "grad_norm": 1.0644898414611816, "learning_rate": 3.88548329668641e-07, "loss": 0.3053, "step": 35646 }, { "epoch": 0.91, "grad_norm": 1.200523853302002, "learning_rate": 3.883192073145092e-07, "loss": 0.5154, "step": 35647 }, { "epoch": 0.91, "grad_norm": 2.0719189643859863, "learning_rate": 3.880901511980428e-07, "loss": 0.6447, "step": 35648 }, { "epoch": 0.91, "grad_norm": 1.2304977178573608, "learning_rate": 3.878611613208216e-07, "loss": 0.4781, "step": 35649 }, { "epoch": 0.91, "grad_norm": 1.2145112752914429, "learning_rate": 3.8763223768441993e-07, "loss": 0.5235, "step": 35650 }, { "epoch": 0.91, "grad_norm": 1.4339786767959595, "learning_rate": 3.8740338029041977e-07, "loss": 0.5045, "step": 35651 }, { "epoch": 0.91, "grad_norm": 4.647945880889893, "learning_rate": 3.8717458914039663e-07, "loss": 0.6582, "step": 35652 }, { "epoch": 0.91, "grad_norm": 1.3969312906265259, "learning_rate": 3.869458642359247e-07, "loss": 0.5042, "step": 35653 }, { "epoch": 0.91, "grad_norm": 3.3659846782684326, "learning_rate": 3.8671720557858394e-07, "loss": 0.4875, "step": 35654 }, { "epoch": 0.91, "grad_norm": 2.4988672733306885, "learning_rate": 3.8648861316994854e-07, "loss": 0.5583, "step": 35655 }, { "epoch": 0.91, "grad_norm": 1.6783757209777832, "learning_rate": 3.86260087011594e-07, "loss": 0.5104, "step": 35656 }, { "epoch": 0.91, "grad_norm": 2.592674493789673, "learning_rate": 3.8603162710509344e-07, "loss": 0.7334, "step": 35657 }, { "epoch": 0.91, "grad_norm": 1.2540037631988525, "learning_rate": 3.8580323345202453e-07, "loss": 0.5133, "step": 35658 }, { "epoch": 0.91, "grad_norm": 15.454206466674805, "learning_rate": 3.855749060539593e-07, "loss": 0.6185, "step": 35659 }, { "epoch": 0.91, "grad_norm": 1.41187584400177, "learning_rate": 3.853466449124699e-07, "loss": 0.5545, "step": 35660 }, { "epoch": 0.91, "grad_norm": 2.411050319671631, "learning_rate": 3.8511845002913273e-07, "loss": 0.5983, "step": 35661 }, { "epoch": 0.91, "grad_norm": 12.291990280151367, "learning_rate": 3.8489032140551777e-07, "loss": 0.6344, "step": 35662 }, { "epoch": 0.91, "grad_norm": 1.3545286655426025, "learning_rate": 3.846622590431992e-07, "loss": 0.6494, "step": 35663 }, { "epoch": 0.91, "grad_norm": 2.1309494972229004, "learning_rate": 3.8443426294374474e-07, "loss": 0.3685, "step": 35664 }, { "epoch": 0.91, "grad_norm": 2.104628801345825, "learning_rate": 3.8420633310872976e-07, "loss": 0.6298, "step": 35665 }, { "epoch": 0.91, "grad_norm": 0.8291720747947693, "learning_rate": 3.839784695397242e-07, "loss": 0.4086, "step": 35666 }, { "epoch": 0.91, "grad_norm": 6.7699432373046875, "learning_rate": 3.8375067223829666e-07, "loss": 0.4444, "step": 35667 }, { "epoch": 0.91, "grad_norm": 1.3995448350906372, "learning_rate": 3.8352294120601817e-07, "loss": 0.4898, "step": 35668 }, { "epoch": 0.91, "grad_norm": 1.5246142148971558, "learning_rate": 3.8329527644445864e-07, "loss": 0.4369, "step": 35669 }, { "epoch": 0.91, "grad_norm": 1.5038737058639526, "learning_rate": 3.830676779551856e-07, "loss": 0.5689, "step": 35670 }, { "epoch": 0.91, "grad_norm": 1.2306151390075684, "learning_rate": 3.828401457397668e-07, "loss": 0.4828, "step": 35671 }, { "epoch": 0.91, "grad_norm": 1.3490606546401978, "learning_rate": 3.826126797997731e-07, "loss": 0.6448, "step": 35672 }, { "epoch": 0.91, "grad_norm": 14.493586540222168, "learning_rate": 3.823852801367711e-07, "loss": 0.5568, "step": 35673 }, { "epoch": 0.91, "grad_norm": 1.773849606513977, "learning_rate": 3.821579467523262e-07, "loss": 0.517, "step": 35674 }, { "epoch": 0.91, "grad_norm": 3.208146095275879, "learning_rate": 3.819306796480071e-07, "loss": 0.4914, "step": 35675 }, { "epoch": 0.91, "grad_norm": 1.369859218597412, "learning_rate": 3.817034788253793e-07, "loss": 0.3903, "step": 35676 }, { "epoch": 0.91, "grad_norm": 5.330447673797607, "learning_rate": 3.814763442860059e-07, "loss": 0.5269, "step": 35677 }, { "epoch": 0.91, "grad_norm": 1.161939024925232, "learning_rate": 3.8124927603145676e-07, "loss": 0.5053, "step": 35678 }, { "epoch": 0.91, "grad_norm": 4.12175989151001, "learning_rate": 3.8102227406329517e-07, "loss": 0.5522, "step": 35679 }, { "epoch": 0.91, "grad_norm": 9.463882446289062, "learning_rate": 3.8079533838308425e-07, "loss": 0.9385, "step": 35680 }, { "epoch": 0.91, "grad_norm": 9.840710639953613, "learning_rate": 3.805684689923872e-07, "loss": 0.7375, "step": 35681 }, { "epoch": 0.91, "grad_norm": 1.3575111627578735, "learning_rate": 3.803416658927694e-07, "loss": 0.6633, "step": 35682 }, { "epoch": 0.91, "grad_norm": 5.395118236541748, "learning_rate": 3.801149290857942e-07, "loss": 0.6609, "step": 35683 }, { "epoch": 0.91, "grad_norm": 1.5580917596817017, "learning_rate": 3.7988825857302126e-07, "loss": 0.4476, "step": 35684 }, { "epoch": 0.91, "grad_norm": 1.4435685873031616, "learning_rate": 3.796616543560161e-07, "loss": 0.5164, "step": 35685 }, { "epoch": 0.91, "grad_norm": 4.983702182769775, "learning_rate": 3.794351164363386e-07, "loss": 0.5514, "step": 35686 }, { "epoch": 0.91, "grad_norm": 9.223124504089355, "learning_rate": 3.7920864481554966e-07, "loss": 0.3108, "step": 35687 }, { "epoch": 0.91, "grad_norm": 1.371224284172058, "learning_rate": 3.789822394952092e-07, "loss": 0.5024, "step": 35688 }, { "epoch": 0.91, "grad_norm": 1.613972544670105, "learning_rate": 3.787559004768804e-07, "loss": 0.5606, "step": 35689 }, { "epoch": 0.91, "grad_norm": 0.8890793323516846, "learning_rate": 3.7852962776212087e-07, "loss": 0.4567, "step": 35690 }, { "epoch": 0.91, "grad_norm": 14.964051246643066, "learning_rate": 3.7830342135248944e-07, "loss": 0.7573, "step": 35691 }, { "epoch": 0.91, "grad_norm": 5.054005146026611, "learning_rate": 3.78077281249547e-07, "loss": 0.4609, "step": 35692 }, { "epoch": 0.91, "grad_norm": 2.9270682334899902, "learning_rate": 3.778512074548502e-07, "loss": 0.4569, "step": 35693 }, { "epoch": 0.91, "grad_norm": 1.1137272119522095, "learning_rate": 3.776251999699565e-07, "loss": 0.416, "step": 35694 }, { "epoch": 0.91, "grad_norm": 1.3303290605545044, "learning_rate": 3.77399258796427e-07, "loss": 0.6103, "step": 35695 }, { "epoch": 0.91, "grad_norm": 1.5834630727767944, "learning_rate": 3.7717338393581494e-07, "loss": 0.4576, "step": 35696 }, { "epoch": 0.91, "grad_norm": 1.9515774250030518, "learning_rate": 3.76947575389679e-07, "loss": 0.5874, "step": 35697 }, { "epoch": 0.91, "grad_norm": 1.6398537158966064, "learning_rate": 3.7672183315957347e-07, "loss": 0.5575, "step": 35698 }, { "epoch": 0.91, "grad_norm": 1.6756194829940796, "learning_rate": 3.76496157247056e-07, "loss": 0.6185, "step": 35699 }, { "epoch": 0.92, "grad_norm": 2.2349109649658203, "learning_rate": 3.76270547653681e-07, "loss": 0.3113, "step": 35700 }, { "epoch": 0.92, "grad_norm": 2.211958885192871, "learning_rate": 3.760450043810016e-07, "loss": 0.6404, "step": 35701 }, { "epoch": 0.92, "grad_norm": 1.5289766788482666, "learning_rate": 3.7581952743057535e-07, "loss": 0.5196, "step": 35702 }, { "epoch": 0.92, "grad_norm": 1.3075206279754639, "learning_rate": 3.7559411680395454e-07, "loss": 0.5135, "step": 35703 }, { "epoch": 0.92, "grad_norm": 1.4980069398880005, "learning_rate": 3.753687725026922e-07, "loss": 0.3902, "step": 35704 }, { "epoch": 0.92, "grad_norm": 2.061974287033081, "learning_rate": 3.7514349452834054e-07, "loss": 0.5178, "step": 35705 }, { "epoch": 0.92, "grad_norm": 7.1336822509765625, "learning_rate": 3.749182828824538e-07, "loss": 0.6126, "step": 35706 }, { "epoch": 0.92, "grad_norm": 1.3094542026519775, "learning_rate": 3.74693137566583e-07, "loss": 0.5827, "step": 35707 }, { "epoch": 0.92, "grad_norm": 3.134066343307495, "learning_rate": 3.744680585822791e-07, "loss": 0.5985, "step": 35708 }, { "epoch": 0.92, "grad_norm": 1.8044887781143188, "learning_rate": 3.742430459310953e-07, "loss": 0.5952, "step": 35709 }, { "epoch": 0.92, "grad_norm": 1.2557334899902344, "learning_rate": 3.740180996145815e-07, "loss": 0.4947, "step": 35710 }, { "epoch": 0.92, "grad_norm": 1.4196125268936157, "learning_rate": 3.7379321963428637e-07, "loss": 0.3868, "step": 35711 }, { "epoch": 0.92, "grad_norm": 1.5969202518463135, "learning_rate": 3.735684059917599e-07, "loss": 0.6518, "step": 35712 }, { "epoch": 0.92, "grad_norm": 2.1399877071380615, "learning_rate": 3.7334365868855193e-07, "loss": 0.6318, "step": 35713 }, { "epoch": 0.92, "grad_norm": 1.3198493719100952, "learning_rate": 3.7311897772621343e-07, "loss": 0.593, "step": 35714 }, { "epoch": 0.92, "grad_norm": 1.6651182174682617, "learning_rate": 3.728943631062876e-07, "loss": 0.6235, "step": 35715 }, { "epoch": 0.92, "grad_norm": 1.4670017957687378, "learning_rate": 3.726698148303276e-07, "loss": 0.3677, "step": 35716 }, { "epoch": 0.92, "grad_norm": 2.726010799407959, "learning_rate": 3.7244533289987895e-07, "loss": 0.5886, "step": 35717 }, { "epoch": 0.92, "grad_norm": 1.1600617170333862, "learning_rate": 3.72220917316487e-07, "loss": 0.527, "step": 35718 }, { "epoch": 0.92, "grad_norm": 1.6739388704299927, "learning_rate": 3.719965680817017e-07, "loss": 0.7287, "step": 35719 }, { "epoch": 0.92, "grad_norm": 1.2207807302474976, "learning_rate": 3.717722851970662e-07, "loss": 0.474, "step": 35720 }, { "epoch": 0.92, "grad_norm": 1.1137173175811768, "learning_rate": 3.715480686641271e-07, "loss": 0.397, "step": 35721 }, { "epoch": 0.92, "grad_norm": 1.2737510204315186, "learning_rate": 3.713239184844286e-07, "loss": 0.4893, "step": 35722 }, { "epoch": 0.92, "grad_norm": 2.1451992988586426, "learning_rate": 3.710998346595174e-07, "loss": 0.5418, "step": 35723 }, { "epoch": 0.92, "grad_norm": 1.2329528331756592, "learning_rate": 3.708758171909366e-07, "loss": 0.3091, "step": 35724 }, { "epoch": 0.92, "grad_norm": 1.639093041419983, "learning_rate": 3.706518660802294e-07, "loss": 0.4886, "step": 35725 }, { "epoch": 0.92, "grad_norm": 2.103422164916992, "learning_rate": 3.704279813289402e-07, "loss": 0.5376, "step": 35726 }, { "epoch": 0.92, "grad_norm": 7.543537139892578, "learning_rate": 3.7020416293861217e-07, "loss": 0.5375, "step": 35727 }, { "epoch": 0.92, "grad_norm": 1.1423124074935913, "learning_rate": 3.699804109107863e-07, "loss": 0.4403, "step": 35728 }, { "epoch": 0.92, "grad_norm": 1.933558702468872, "learning_rate": 3.697567252470047e-07, "loss": 0.5165, "step": 35729 }, { "epoch": 0.92, "grad_norm": 11.34152603149414, "learning_rate": 3.695331059488105e-07, "loss": 0.4314, "step": 35730 }, { "epoch": 0.92, "grad_norm": 17.13079071044922, "learning_rate": 3.6930955301774373e-07, "loss": 0.7993, "step": 35731 }, { "epoch": 0.92, "grad_norm": 1.231204867362976, "learning_rate": 3.6908606645534305e-07, "loss": 0.6134, "step": 35732 }, { "epoch": 0.92, "grad_norm": 1.376467227935791, "learning_rate": 3.688626462631528e-07, "loss": 0.4492, "step": 35733 }, { "epoch": 0.92, "grad_norm": 0.9094882607460022, "learning_rate": 3.686392924427096e-07, "loss": 0.5301, "step": 35734 }, { "epoch": 0.92, "grad_norm": 2.9129233360290527, "learning_rate": 3.684160049955532e-07, "loss": 0.5577, "step": 35735 }, { "epoch": 0.92, "grad_norm": 1.3116636276245117, "learning_rate": 3.6819278392322245e-07, "loss": 0.3697, "step": 35736 }, { "epoch": 0.92, "grad_norm": 1.9137362241744995, "learning_rate": 3.679696292272561e-07, "loss": 0.4252, "step": 35737 }, { "epoch": 0.92, "grad_norm": 1.74778413772583, "learning_rate": 3.677465409091918e-07, "loss": 0.4857, "step": 35738 }, { "epoch": 0.92, "grad_norm": 1.6299504041671753, "learning_rate": 3.6752351897056503e-07, "loss": 0.4894, "step": 35739 }, { "epoch": 0.92, "grad_norm": 1.2406322956085205, "learning_rate": 3.6730056341291564e-07, "loss": 0.4474, "step": 35740 }, { "epoch": 0.92, "grad_norm": 1.8759310245513916, "learning_rate": 3.6707767423778127e-07, "loss": 0.4572, "step": 35741 }, { "epoch": 0.92, "grad_norm": 2.7671124935150146, "learning_rate": 3.668548514466919e-07, "loss": 0.6507, "step": 35742 }, { "epoch": 0.92, "grad_norm": 13.224616050720215, "learning_rate": 3.666320950411884e-07, "loss": 0.4949, "step": 35743 }, { "epoch": 0.92, "grad_norm": 1.781969666481018, "learning_rate": 3.66409405022804e-07, "loss": 0.5012, "step": 35744 }, { "epoch": 0.92, "grad_norm": 1.1781995296478271, "learning_rate": 3.6618678139307305e-07, "loss": 0.4374, "step": 35745 }, { "epoch": 0.92, "grad_norm": 1.313190221786499, "learning_rate": 3.659642241535311e-07, "loss": 0.536, "step": 35746 }, { "epoch": 0.92, "grad_norm": 2.3551931381225586, "learning_rate": 3.6574173330571116e-07, "loss": 0.7657, "step": 35747 }, { "epoch": 0.92, "grad_norm": 1.0997288227081299, "learning_rate": 3.655193088511455e-07, "loss": 0.6043, "step": 35748 }, { "epoch": 0.92, "grad_norm": 1.3100779056549072, "learning_rate": 3.652969507913684e-07, "loss": 0.5656, "step": 35749 }, { "epoch": 0.92, "grad_norm": 1.7115339040756226, "learning_rate": 3.6507465912791085e-07, "loss": 0.4279, "step": 35750 }, { "epoch": 0.92, "grad_norm": 1.5750170946121216, "learning_rate": 3.648524338623072e-07, "loss": 0.5505, "step": 35751 }, { "epoch": 0.92, "grad_norm": 1.3046776056289673, "learning_rate": 3.6463027499608506e-07, "loss": 0.5918, "step": 35752 }, { "epoch": 0.92, "grad_norm": 1.3698726892471313, "learning_rate": 3.644081825307788e-07, "loss": 0.484, "step": 35753 }, { "epoch": 0.92, "grad_norm": 1.49138343334198, "learning_rate": 3.641861564679172e-07, "loss": 0.4856, "step": 35754 }, { "epoch": 0.92, "grad_norm": 2.5982301235198975, "learning_rate": 3.639641968090302e-07, "loss": 0.7405, "step": 35755 }, { "epoch": 0.92, "grad_norm": 1.3037058115005493, "learning_rate": 3.637423035556487e-07, "loss": 0.3995, "step": 35756 }, { "epoch": 0.92, "grad_norm": 1.1209896802902222, "learning_rate": 3.6352047670930147e-07, "loss": 0.5218, "step": 35757 }, { "epoch": 0.92, "grad_norm": 1.427205204963684, "learning_rate": 3.6329871627151734e-07, "loss": 0.3649, "step": 35758 }, { "epoch": 0.92, "grad_norm": 1.3479270935058594, "learning_rate": 3.630770222438229e-07, "loss": 0.4187, "step": 35759 }, { "epoch": 0.92, "grad_norm": 1.563722014427185, "learning_rate": 3.62855394627748e-07, "loss": 0.4526, "step": 35760 }, { "epoch": 0.92, "grad_norm": 1.2586443424224854, "learning_rate": 3.6263383342481804e-07, "loss": 0.5641, "step": 35761 }, { "epoch": 0.92, "grad_norm": 1.341231107711792, "learning_rate": 3.6241233863656076e-07, "loss": 0.4872, "step": 35762 }, { "epoch": 0.92, "grad_norm": 12.549861907958984, "learning_rate": 3.6219091026450383e-07, "loss": 0.4065, "step": 35763 }, { "epoch": 0.92, "grad_norm": 0.9356077313423157, "learning_rate": 3.619695483101726e-07, "loss": 0.4807, "step": 35764 }, { "epoch": 0.92, "grad_norm": 3.3819141387939453, "learning_rate": 3.617482527750915e-07, "loss": 0.7151, "step": 35765 }, { "epoch": 0.92, "grad_norm": 1.357305645942688, "learning_rate": 3.6152702366078484e-07, "loss": 0.6043, "step": 35766 }, { "epoch": 0.92, "grad_norm": 1.6782366037368774, "learning_rate": 3.613058609687792e-07, "loss": 0.3765, "step": 35767 }, { "epoch": 0.92, "grad_norm": 1.1859209537506104, "learning_rate": 3.610847647005988e-07, "loss": 0.5071, "step": 35768 }, { "epoch": 0.92, "grad_norm": 1.5485047101974487, "learning_rate": 3.608637348577648e-07, "loss": 0.595, "step": 35769 }, { "epoch": 0.92, "grad_norm": 3.1745474338531494, "learning_rate": 3.6064277144180373e-07, "loss": 0.3543, "step": 35770 }, { "epoch": 0.92, "grad_norm": 0.9768843054771423, "learning_rate": 3.6042187445423647e-07, "loss": 0.5303, "step": 35771 }, { "epoch": 0.92, "grad_norm": 4.419188976287842, "learning_rate": 3.6020104389658303e-07, "loss": 0.5005, "step": 35772 }, { "epoch": 0.92, "grad_norm": 9.275757789611816, "learning_rate": 3.5998027977037e-07, "loss": 0.5106, "step": 35773 }, { "epoch": 0.92, "grad_norm": 1.3022621870040894, "learning_rate": 3.5975958207711717e-07, "loss": 0.6374, "step": 35774 }, { "epoch": 0.92, "grad_norm": 1.3656315803527832, "learning_rate": 3.595389508183433e-07, "loss": 0.4696, "step": 35775 }, { "epoch": 0.92, "grad_norm": 1.4348742961883545, "learning_rate": 3.593183859955696e-07, "loss": 0.5993, "step": 35776 }, { "epoch": 0.92, "grad_norm": 1.2135404348373413, "learning_rate": 3.5909788761031793e-07, "loss": 0.5554, "step": 35777 }, { "epoch": 0.92, "grad_norm": 1.8193614482879639, "learning_rate": 3.5887745566410726e-07, "loss": 0.4364, "step": 35778 }, { "epoch": 0.92, "grad_norm": 5.826880931854248, "learning_rate": 3.5865709015845407e-07, "loss": 0.6086, "step": 35779 }, { "epoch": 0.92, "grad_norm": 1.4692785739898682, "learning_rate": 3.5843679109487937e-07, "loss": 0.6158, "step": 35780 }, { "epoch": 0.92, "grad_norm": 1.446698784828186, "learning_rate": 3.58216558474902e-07, "loss": 0.3492, "step": 35781 }, { "epoch": 0.92, "grad_norm": 1.5401729345321655, "learning_rate": 3.579963923000385e-07, "loss": 0.3623, "step": 35782 }, { "epoch": 0.92, "grad_norm": 1.868306279182434, "learning_rate": 3.5777629257180533e-07, "loss": 0.559, "step": 35783 }, { "epoch": 0.92, "grad_norm": 1.594584345817566, "learning_rate": 3.5755625929172033e-07, "loss": 0.4839, "step": 35784 }, { "epoch": 0.92, "grad_norm": 1.2913981676101685, "learning_rate": 3.5733629246129996e-07, "loss": 0.5187, "step": 35785 }, { "epoch": 0.92, "grad_norm": 1.67516028881073, "learning_rate": 3.571163920820586e-07, "loss": 0.4669, "step": 35786 }, { "epoch": 0.92, "grad_norm": 1.407301664352417, "learning_rate": 3.5689655815551396e-07, "loss": 0.6744, "step": 35787 }, { "epoch": 0.92, "grad_norm": 1.308752417564392, "learning_rate": 3.566767906831803e-07, "loss": 0.5919, "step": 35788 }, { "epoch": 0.92, "grad_norm": 1.747270941734314, "learning_rate": 3.564570896665698e-07, "loss": 0.5872, "step": 35789 }, { "epoch": 0.92, "grad_norm": 1.2191693782806396, "learning_rate": 3.562374551072001e-07, "loss": 0.3772, "step": 35790 }, { "epoch": 0.92, "grad_norm": 1.1588879823684692, "learning_rate": 3.5601788700658223e-07, "loss": 0.5643, "step": 35791 }, { "epoch": 0.92, "grad_norm": 1.0094597339630127, "learning_rate": 3.557983853662306e-07, "loss": 0.5067, "step": 35792 }, { "epoch": 0.92, "grad_norm": 1.6300888061523438, "learning_rate": 3.555789501876572e-07, "loss": 0.6131, "step": 35793 }, { "epoch": 0.92, "grad_norm": 1.6625562906265259, "learning_rate": 3.553595814723743e-07, "loss": 0.6077, "step": 35794 }, { "epoch": 0.92, "grad_norm": 4.005761623382568, "learning_rate": 3.551402792218939e-07, "loss": 0.8441, "step": 35795 }, { "epoch": 0.92, "grad_norm": 2.1792080402374268, "learning_rate": 3.54921043437727e-07, "loss": 0.6233, "step": 35796 }, { "epoch": 0.92, "grad_norm": 1.683356761932373, "learning_rate": 3.547018741213848e-07, "loss": 0.4773, "step": 35797 }, { "epoch": 0.92, "grad_norm": 1.5649296045303345, "learning_rate": 3.5448277127437814e-07, "loss": 0.4746, "step": 35798 }, { "epoch": 0.92, "grad_norm": 2.234496831893921, "learning_rate": 3.5426373489821586e-07, "loss": 0.6621, "step": 35799 }, { "epoch": 0.92, "grad_norm": 1.5139093399047852, "learning_rate": 3.540447649944079e-07, "loss": 0.4199, "step": 35800 }, { "epoch": 0.92, "grad_norm": 1.9762754440307617, "learning_rate": 3.53825861564463e-07, "loss": 0.5639, "step": 35801 }, { "epoch": 0.92, "grad_norm": 1.593271255493164, "learning_rate": 3.5360702460988993e-07, "loss": 0.5071, "step": 35802 }, { "epoch": 0.92, "grad_norm": 1.9805309772491455, "learning_rate": 3.5338825413219647e-07, "loss": 0.6598, "step": 35803 }, { "epoch": 0.92, "grad_norm": 1.576930284500122, "learning_rate": 3.5316955013289136e-07, "loss": 0.6377, "step": 35804 }, { "epoch": 0.92, "grad_norm": 1.3955061435699463, "learning_rate": 3.5295091261348116e-07, "loss": 0.4419, "step": 35805 }, { "epoch": 0.92, "grad_norm": 5.462518215179443, "learning_rate": 3.527323415754724e-07, "loss": 0.528, "step": 35806 }, { "epoch": 0.92, "grad_norm": 2.326287269592285, "learning_rate": 3.525138370203696e-07, "loss": 0.4769, "step": 35807 }, { "epoch": 0.92, "grad_norm": 1.410720944404602, "learning_rate": 3.5229539894968247e-07, "loss": 0.427, "step": 35808 }, { "epoch": 0.92, "grad_norm": 1.02591872215271, "learning_rate": 3.520770273649132e-07, "loss": 0.4038, "step": 35809 }, { "epoch": 0.92, "grad_norm": 1.123979926109314, "learning_rate": 3.518587222675662e-07, "loss": 0.4859, "step": 35810 }, { "epoch": 0.92, "grad_norm": 1.61846125125885, "learning_rate": 3.516404836591492e-07, "loss": 0.4551, "step": 35811 }, { "epoch": 0.92, "grad_norm": 1.832534670829773, "learning_rate": 3.5142231154116413e-07, "loss": 0.447, "step": 35812 }, { "epoch": 0.92, "grad_norm": 0.9398809671401978, "learning_rate": 3.512042059151144e-07, "loss": 0.3965, "step": 35813 }, { "epoch": 0.92, "grad_norm": 0.8704807162284851, "learning_rate": 3.509861667825032e-07, "loss": 0.4166, "step": 35814 }, { "epoch": 0.92, "grad_norm": 1.197670578956604, "learning_rate": 3.5076819414483376e-07, "loss": 0.3582, "step": 35815 }, { "epoch": 0.92, "grad_norm": 8.308104515075684, "learning_rate": 3.5055028800360715e-07, "loss": 0.6794, "step": 35816 }, { "epoch": 0.92, "grad_norm": 1.948903203010559, "learning_rate": 3.503324483603254e-07, "loss": 0.4228, "step": 35817 }, { "epoch": 0.92, "grad_norm": 0.9976298213005066, "learning_rate": 3.501146752164908e-07, "loss": 0.414, "step": 35818 }, { "epoch": 0.92, "grad_norm": 1.5884836912155151, "learning_rate": 3.4989696857360313e-07, "loss": 0.5504, "step": 35819 }, { "epoch": 0.92, "grad_norm": 1.5395007133483887, "learning_rate": 3.496793284331612e-07, "loss": 0.4709, "step": 35820 }, { "epoch": 0.92, "grad_norm": 1.352325677871704, "learning_rate": 3.494617547966672e-07, "loss": 0.6078, "step": 35821 }, { "epoch": 0.92, "grad_norm": 1.0150697231292725, "learning_rate": 3.4924424766562107e-07, "loss": 0.3664, "step": 35822 }, { "epoch": 0.92, "grad_norm": 5.500583648681641, "learning_rate": 3.4902680704151923e-07, "loss": 0.622, "step": 35823 }, { "epoch": 0.92, "grad_norm": 1.9326573610305786, "learning_rate": 3.4880943292586065e-07, "loss": 0.3856, "step": 35824 }, { "epoch": 0.92, "grad_norm": 1.2971680164337158, "learning_rate": 3.485921253201452e-07, "loss": 0.4823, "step": 35825 }, { "epoch": 0.92, "grad_norm": 1.7192950248718262, "learning_rate": 3.483748842258683e-07, "loss": 0.4809, "step": 35826 }, { "epoch": 0.92, "grad_norm": 1.3418326377868652, "learning_rate": 3.4815770964452766e-07, "loss": 0.3961, "step": 35827 }, { "epoch": 0.92, "grad_norm": 5.3234429359436035, "learning_rate": 3.479406015776221e-07, "loss": 0.5909, "step": 35828 }, { "epoch": 0.92, "grad_norm": 1.772688627243042, "learning_rate": 3.477235600266449e-07, "loss": 0.4532, "step": 35829 }, { "epoch": 0.92, "grad_norm": 2.674989938735962, "learning_rate": 3.475065849930914e-07, "loss": 0.7402, "step": 35830 }, { "epoch": 0.92, "grad_norm": 1.303714632987976, "learning_rate": 3.4728967647845945e-07, "loss": 0.3846, "step": 35831 }, { "epoch": 0.92, "grad_norm": 1.6627676486968994, "learning_rate": 3.4707283448424334e-07, "loss": 0.5793, "step": 35832 }, { "epoch": 0.92, "grad_norm": 1.6484357118606567, "learning_rate": 3.468560590119363e-07, "loss": 0.5958, "step": 35833 }, { "epoch": 0.92, "grad_norm": 1.4490535259246826, "learning_rate": 3.466393500630316e-07, "loss": 0.3695, "step": 35834 }, { "epoch": 0.92, "grad_norm": 1.6152478456497192, "learning_rate": 3.4642270763902476e-07, "loss": 0.4416, "step": 35835 }, { "epoch": 0.92, "grad_norm": 1.009825348854065, "learning_rate": 3.462061317414067e-07, "loss": 0.5205, "step": 35836 }, { "epoch": 0.92, "grad_norm": 2.3023464679718018, "learning_rate": 3.459896223716708e-07, "loss": 0.4866, "step": 35837 }, { "epoch": 0.92, "grad_norm": 1.5805531740188599, "learning_rate": 3.457731795313102e-07, "loss": 0.4919, "step": 35838 }, { "epoch": 0.92, "grad_norm": 1.0513100624084473, "learning_rate": 3.455568032218148e-07, "loss": 0.4866, "step": 35839 }, { "epoch": 0.92, "grad_norm": 5.193349838256836, "learning_rate": 3.4534049344467693e-07, "loss": 0.5627, "step": 35840 }, { "epoch": 0.92, "grad_norm": 1.4840339422225952, "learning_rate": 3.451242502013863e-07, "loss": 0.4661, "step": 35841 }, { "epoch": 0.92, "grad_norm": 2.222794532775879, "learning_rate": 3.4490807349343405e-07, "loss": 0.6691, "step": 35842 }, { "epoch": 0.92, "grad_norm": 1.7451509237289429, "learning_rate": 3.446919633223089e-07, "loss": 0.6529, "step": 35843 }, { "epoch": 0.92, "grad_norm": 1.8412855863571167, "learning_rate": 3.444759196894998e-07, "loss": 0.4606, "step": 35844 }, { "epoch": 0.92, "grad_norm": 0.9378197193145752, "learning_rate": 3.4425994259649764e-07, "loss": 0.5494, "step": 35845 }, { "epoch": 0.92, "grad_norm": 1.9526935815811157, "learning_rate": 3.440440320447902e-07, "loss": 0.5819, "step": 35846 }, { "epoch": 0.92, "grad_norm": 1.3274872303009033, "learning_rate": 3.4382818803586403e-07, "loss": 0.6205, "step": 35847 }, { "epoch": 0.92, "grad_norm": 1.1893904209136963, "learning_rate": 3.436124105712069e-07, "loss": 0.3523, "step": 35848 }, { "epoch": 0.92, "grad_norm": 3.111802816390991, "learning_rate": 3.433966996523075e-07, "loss": 0.6178, "step": 35849 }, { "epoch": 0.92, "grad_norm": 1.426593542098999, "learning_rate": 3.431810552806503e-07, "loss": 0.5263, "step": 35850 }, { "epoch": 0.92, "grad_norm": 3.615354299545288, "learning_rate": 3.429654774577218e-07, "loss": 0.6483, "step": 35851 }, { "epoch": 0.92, "grad_norm": 1.4968831539154053, "learning_rate": 3.427499661850098e-07, "loss": 0.3935, "step": 35852 }, { "epoch": 0.92, "grad_norm": 2.9037859439849854, "learning_rate": 3.4253452146399636e-07, "loss": 0.6425, "step": 35853 }, { "epoch": 0.92, "grad_norm": 3.58882212638855, "learning_rate": 3.42319143296167e-07, "loss": 0.568, "step": 35854 }, { "epoch": 0.92, "grad_norm": 1.3279551267623901, "learning_rate": 3.421038316830083e-07, "loss": 0.4213, "step": 35855 }, { "epoch": 0.92, "grad_norm": 1.3156144618988037, "learning_rate": 3.4188858662600135e-07, "loss": 0.4123, "step": 35856 }, { "epoch": 0.92, "grad_norm": 1.7478934526443481, "learning_rate": 3.416734081266315e-07, "loss": 0.5818, "step": 35857 }, { "epoch": 0.92, "grad_norm": 6.20159387588501, "learning_rate": 3.414582961863788e-07, "loss": 0.7243, "step": 35858 }, { "epoch": 0.92, "grad_norm": 1.2690868377685547, "learning_rate": 3.412432508067287e-07, "loss": 0.5532, "step": 35859 }, { "epoch": 0.92, "grad_norm": 1.6158735752105713, "learning_rate": 3.410282719891622e-07, "loss": 0.6509, "step": 35860 }, { "epoch": 0.92, "grad_norm": 2.0290229320526123, "learning_rate": 3.4081335973515816e-07, "loss": 0.6095, "step": 35861 }, { "epoch": 0.92, "grad_norm": 1.4646661281585693, "learning_rate": 3.40598514046202e-07, "loss": 0.3479, "step": 35862 }, { "epoch": 0.92, "grad_norm": 5.292525291442871, "learning_rate": 3.403837349237726e-07, "loss": 0.613, "step": 35863 }, { "epoch": 0.92, "grad_norm": 4.395012855529785, "learning_rate": 3.401690223693488e-07, "loss": 0.4784, "step": 35864 }, { "epoch": 0.92, "grad_norm": 2.3306591510772705, "learning_rate": 3.399543763844104e-07, "loss": 0.7045, "step": 35865 }, { "epoch": 0.92, "grad_norm": 2.110740900039673, "learning_rate": 3.3973979697043855e-07, "loss": 0.6637, "step": 35866 }, { "epoch": 0.92, "grad_norm": 1.313950777053833, "learning_rate": 3.3952528412891097e-07, "loss": 0.3525, "step": 35867 }, { "epoch": 0.92, "grad_norm": 1.933398723602295, "learning_rate": 3.3931083786130525e-07, "loss": 0.6435, "step": 35868 }, { "epoch": 0.92, "grad_norm": 1.8510431051254272, "learning_rate": 3.390964581691003e-07, "loss": 0.5749, "step": 35869 }, { "epoch": 0.92, "grad_norm": 1.3101876974105835, "learning_rate": 3.388821450537727e-07, "loss": 0.4945, "step": 35870 }, { "epoch": 0.92, "grad_norm": 1.9322670698165894, "learning_rate": 3.3866789851680016e-07, "loss": 0.42, "step": 35871 }, { "epoch": 0.92, "grad_norm": 2.018527030944824, "learning_rate": 3.384537185596581e-07, "loss": 0.5382, "step": 35872 }, { "epoch": 0.92, "grad_norm": 1.865890383720398, "learning_rate": 3.3823960518382324e-07, "loss": 0.5327, "step": 35873 }, { "epoch": 0.92, "grad_norm": 1.6843678951263428, "learning_rate": 3.38025558390771e-07, "loss": 0.4545, "step": 35874 }, { "epoch": 0.92, "grad_norm": 1.4240295886993408, "learning_rate": 3.3781157818197464e-07, "loss": 0.6421, "step": 35875 }, { "epoch": 0.92, "grad_norm": 1.238364338874817, "learning_rate": 3.3759766455891295e-07, "loss": 0.5026, "step": 35876 }, { "epoch": 0.92, "grad_norm": 1.7426064014434814, "learning_rate": 3.3738381752305703e-07, "loss": 0.5236, "step": 35877 }, { "epoch": 0.92, "grad_norm": 1.537094235420227, "learning_rate": 3.3717003707588015e-07, "loss": 0.5464, "step": 35878 }, { "epoch": 0.92, "grad_norm": 1.3765422105789185, "learning_rate": 3.369563232188577e-07, "loss": 0.5322, "step": 35879 }, { "epoch": 0.92, "grad_norm": 0.894594132900238, "learning_rate": 3.367426759534609e-07, "loss": 0.3931, "step": 35880 }, { "epoch": 0.92, "grad_norm": 1.2868735790252686, "learning_rate": 3.3652909528116286e-07, "loss": 0.3593, "step": 35881 }, { "epoch": 0.92, "grad_norm": 1.4926707744598389, "learning_rate": 3.3631558120343354e-07, "loss": 0.4384, "step": 35882 }, { "epoch": 0.92, "grad_norm": 1.4452977180480957, "learning_rate": 3.3610213372174736e-07, "loss": 0.5209, "step": 35883 }, { "epoch": 0.92, "grad_norm": 2.0866189002990723, "learning_rate": 3.358887528375743e-07, "loss": 0.5342, "step": 35884 }, { "epoch": 0.92, "grad_norm": 0.9654463529586792, "learning_rate": 3.3567543855238303e-07, "loss": 0.4435, "step": 35885 }, { "epoch": 0.92, "grad_norm": 3.1578145027160645, "learning_rate": 3.354621908676459e-07, "loss": 0.6671, "step": 35886 }, { "epoch": 0.92, "grad_norm": 1.5422275066375732, "learning_rate": 3.3524900978483155e-07, "loss": 0.5556, "step": 35887 }, { "epoch": 0.92, "grad_norm": 1.747633934020996, "learning_rate": 3.350358953054089e-07, "loss": 0.5061, "step": 35888 }, { "epoch": 0.92, "grad_norm": 5.467662811279297, "learning_rate": 3.348228474308457e-07, "loss": 0.5749, "step": 35889 }, { "epoch": 0.92, "grad_norm": 1.6280955076217651, "learning_rate": 3.3460986616261184e-07, "loss": 0.6979, "step": 35890 }, { "epoch": 0.92, "grad_norm": 1.76310133934021, "learning_rate": 3.34396951502175e-07, "loss": 0.5753, "step": 35891 }, { "epoch": 0.92, "grad_norm": 2.484052896499634, "learning_rate": 3.3418410345100073e-07, "loss": 0.5793, "step": 35892 }, { "epoch": 0.92, "grad_norm": 2.4769561290740967, "learning_rate": 3.3397132201055784e-07, "loss": 0.489, "step": 35893 }, { "epoch": 0.92, "grad_norm": 1.1404263973236084, "learning_rate": 3.337586071823118e-07, "loss": 0.3628, "step": 35894 }, { "epoch": 0.92, "grad_norm": 1.2603317499160767, "learning_rate": 3.33545958967727e-07, "loss": 0.551, "step": 35895 }, { "epoch": 0.92, "grad_norm": 1.505152702331543, "learning_rate": 3.333333773682723e-07, "loss": 0.5028, "step": 35896 }, { "epoch": 0.92, "grad_norm": 1.471565842628479, "learning_rate": 3.3312086238541096e-07, "loss": 0.6953, "step": 35897 }, { "epoch": 0.92, "grad_norm": 8.586797714233398, "learning_rate": 3.3290841402060625e-07, "loss": 0.4673, "step": 35898 }, { "epoch": 0.92, "grad_norm": 1.539422631263733, "learning_rate": 3.3269603227532257e-07, "loss": 0.5896, "step": 35899 }, { "epoch": 0.92, "grad_norm": 1.1740111112594604, "learning_rate": 3.3248371715102535e-07, "loss": 0.426, "step": 35900 }, { "epoch": 0.92, "grad_norm": 1.680300235748291, "learning_rate": 3.322714686491768e-07, "loss": 0.563, "step": 35901 }, { "epoch": 0.92, "grad_norm": 1.4815044403076172, "learning_rate": 3.320592867712391e-07, "loss": 0.4648, "step": 35902 }, { "epoch": 0.92, "grad_norm": 1.6071568727493286, "learning_rate": 3.318471715186744e-07, "loss": 0.497, "step": 35903 }, { "epoch": 0.92, "grad_norm": 1.6512503623962402, "learning_rate": 3.3163512289294596e-07, "loss": 0.5361, "step": 35904 }, { "epoch": 0.92, "grad_norm": 1.311998724937439, "learning_rate": 3.3142314089551377e-07, "loss": 0.4797, "step": 35905 }, { "epoch": 0.92, "grad_norm": 5.893874168395996, "learning_rate": 3.312112255278377e-07, "loss": 0.2764, "step": 35906 }, { "epoch": 0.92, "grad_norm": 1.381750226020813, "learning_rate": 3.3099937679137997e-07, "loss": 0.4841, "step": 35907 }, { "epoch": 0.92, "grad_norm": 1.6164236068725586, "learning_rate": 3.3078759468760046e-07, "loss": 0.4449, "step": 35908 }, { "epoch": 0.92, "grad_norm": 1.5336121320724487, "learning_rate": 3.3057587921795695e-07, "loss": 0.4935, "step": 35909 }, { "epoch": 0.92, "grad_norm": 1.8051832914352417, "learning_rate": 3.3036423038391053e-07, "loss": 0.4529, "step": 35910 }, { "epoch": 0.92, "grad_norm": 4.553867816925049, "learning_rate": 3.301526481869177e-07, "loss": 0.5554, "step": 35911 }, { "epoch": 0.92, "grad_norm": 1.789015769958496, "learning_rate": 3.299411326284396e-07, "loss": 0.6495, "step": 35912 }, { "epoch": 0.92, "grad_norm": 3.7810275554656982, "learning_rate": 3.2972968370992954e-07, "loss": 0.3942, "step": 35913 }, { "epoch": 0.92, "grad_norm": 3.1356844902038574, "learning_rate": 3.295183014328485e-07, "loss": 0.5734, "step": 35914 }, { "epoch": 0.92, "grad_norm": 1.4243932962417603, "learning_rate": 3.2930698579865085e-07, "loss": 0.6179, "step": 35915 }, { "epoch": 0.92, "grad_norm": 1.822981595993042, "learning_rate": 3.290957368087944e-07, "loss": 0.6367, "step": 35916 }, { "epoch": 0.92, "grad_norm": 2.041520118713379, "learning_rate": 3.2888455446473343e-07, "loss": 0.5176, "step": 35917 }, { "epoch": 0.92, "grad_norm": 1.2848546504974365, "learning_rate": 3.286734387679258e-07, "loss": 0.4741, "step": 35918 }, { "epoch": 0.92, "grad_norm": 1.6981377601623535, "learning_rate": 3.2846238971982247e-07, "loss": 0.4872, "step": 35919 }, { "epoch": 0.92, "grad_norm": 1.4463621377944946, "learning_rate": 3.2825140732188233e-07, "loss": 0.4664, "step": 35920 }, { "epoch": 0.92, "grad_norm": 2.0836310386657715, "learning_rate": 3.280404915755564e-07, "loss": 0.5186, "step": 35921 }, { "epoch": 0.92, "grad_norm": 2.4319233894348145, "learning_rate": 3.27829642482298e-07, "loss": 0.5315, "step": 35922 }, { "epoch": 0.92, "grad_norm": 1.2761025428771973, "learning_rate": 3.276188600435615e-07, "loss": 0.5002, "step": 35923 }, { "epoch": 0.92, "grad_norm": 0.9834506511688232, "learning_rate": 3.2740814426079904e-07, "loss": 0.3757, "step": 35924 }, { "epoch": 0.92, "grad_norm": 0.9551882147789001, "learning_rate": 3.2719749513546284e-07, "loss": 0.3654, "step": 35925 }, { "epoch": 0.92, "grad_norm": 12.736127853393555, "learning_rate": 3.2698691266900395e-07, "loss": 0.6983, "step": 35926 }, { "epoch": 0.92, "grad_norm": 1.5289273262023926, "learning_rate": 3.2677639686287455e-07, "loss": 0.5109, "step": 35927 }, { "epoch": 0.92, "grad_norm": 1.1835014820098877, "learning_rate": 3.2656594771852567e-07, "loss": 0.5026, "step": 35928 }, { "epoch": 0.92, "grad_norm": 2.0383453369140625, "learning_rate": 3.263555652374062e-07, "loss": 0.607, "step": 35929 }, { "epoch": 0.92, "grad_norm": 2.220996856689453, "learning_rate": 3.26145249420966e-07, "loss": 0.408, "step": 35930 }, { "epoch": 0.92, "grad_norm": 1.2963591814041138, "learning_rate": 3.259350002706563e-07, "loss": 0.4562, "step": 35931 }, { "epoch": 0.92, "grad_norm": 1.1558880805969238, "learning_rate": 3.2572481778792353e-07, "loss": 0.4637, "step": 35932 }, { "epoch": 0.92, "grad_norm": 1.6523385047912598, "learning_rate": 3.2551470197421665e-07, "loss": 0.5578, "step": 35933 }, { "epoch": 0.92, "grad_norm": 3.094583511352539, "learning_rate": 3.2530465283098667e-07, "loss": 0.5943, "step": 35934 }, { "epoch": 0.92, "grad_norm": 1.7775733470916748, "learning_rate": 3.25094670359678e-07, "loss": 0.4958, "step": 35935 }, { "epoch": 0.92, "grad_norm": 1.5437856912612915, "learning_rate": 3.248847545617362e-07, "loss": 0.4347, "step": 35936 }, { "epoch": 0.92, "grad_norm": 1.3066737651824951, "learning_rate": 3.2467490543861337e-07, "loss": 0.4735, "step": 35937 }, { "epoch": 0.92, "grad_norm": 1.231642723083496, "learning_rate": 3.2446512299175057e-07, "loss": 0.481, "step": 35938 }, { "epoch": 0.92, "grad_norm": 2.3416919708251953, "learning_rate": 3.2425540722259675e-07, "loss": 0.4378, "step": 35939 }, { "epoch": 0.92, "grad_norm": 1.1704051494598389, "learning_rate": 3.2404575813259397e-07, "loss": 0.3928, "step": 35940 }, { "epoch": 0.92, "grad_norm": 1.2466679811477661, "learning_rate": 3.2383617572319e-07, "loss": 0.5097, "step": 35941 }, { "epoch": 0.92, "grad_norm": 7.9209747314453125, "learning_rate": 3.236266599958282e-07, "loss": 0.4386, "step": 35942 }, { "epoch": 0.92, "grad_norm": 1.403233289718628, "learning_rate": 3.2341721095195064e-07, "loss": 0.4952, "step": 35943 }, { "epoch": 0.92, "grad_norm": 1.3429449796676636, "learning_rate": 3.23207828593004e-07, "loss": 0.5015, "step": 35944 }, { "epoch": 0.92, "grad_norm": 4.449924945831299, "learning_rate": 3.2299851292042827e-07, "loss": 0.4884, "step": 35945 }, { "epoch": 0.92, "grad_norm": 1.166290044784546, "learning_rate": 3.227892639356689e-07, "loss": 0.4842, "step": 35946 }, { "epoch": 0.92, "grad_norm": 5.9190473556518555, "learning_rate": 3.225800816401636e-07, "loss": 0.5038, "step": 35947 }, { "epoch": 0.92, "grad_norm": 4.091197967529297, "learning_rate": 3.22370966035358e-07, "loss": 0.5624, "step": 35948 }, { "epoch": 0.92, "grad_norm": 11.773337364196777, "learning_rate": 3.2216191712269086e-07, "loss": 0.5535, "step": 35949 }, { "epoch": 0.92, "grad_norm": 1.7473231554031372, "learning_rate": 3.2195293490360326e-07, "loss": 0.5897, "step": 35950 }, { "epoch": 0.92, "grad_norm": 2.9221410751342773, "learning_rate": 3.2174401937953735e-07, "loss": 0.4643, "step": 35951 }, { "epoch": 0.92, "grad_norm": 1.5799497365951538, "learning_rate": 3.2153517055192985e-07, "loss": 0.5434, "step": 35952 }, { "epoch": 0.92, "grad_norm": 1.3149058818817139, "learning_rate": 3.2132638842222174e-07, "loss": 0.4968, "step": 35953 }, { "epoch": 0.92, "grad_norm": 1.4547003507614136, "learning_rate": 3.2111767299185084e-07, "loss": 0.6743, "step": 35954 }, { "epoch": 0.92, "grad_norm": 1.8576124906539917, "learning_rate": 3.20909024262257e-07, "loss": 0.4721, "step": 35955 }, { "epoch": 0.92, "grad_norm": 2.4207358360290527, "learning_rate": 3.2070044223487696e-07, "loss": 0.4222, "step": 35956 }, { "epoch": 0.92, "grad_norm": 0.9713292717933655, "learning_rate": 3.204919269111473e-07, "loss": 0.4385, "step": 35957 }, { "epoch": 0.92, "grad_norm": 2.9499704837799072, "learning_rate": 3.202834782925068e-07, "loss": 0.5343, "step": 35958 }, { "epoch": 0.92, "grad_norm": 2.1453707218170166, "learning_rate": 3.2007509638039227e-07, "loss": 0.48, "step": 35959 }, { "epoch": 0.92, "grad_norm": 2.040931463241577, "learning_rate": 3.1986678117623684e-07, "loss": 0.3822, "step": 35960 }, { "epoch": 0.92, "grad_norm": 1.6669814586639404, "learning_rate": 3.196585326814783e-07, "loss": 0.549, "step": 35961 }, { "epoch": 0.92, "grad_norm": 1.5412585735321045, "learning_rate": 3.194503508975522e-07, "loss": 0.5913, "step": 35962 }, { "epoch": 0.92, "grad_norm": 1.5228818655014038, "learning_rate": 3.1924223582589286e-07, "loss": 0.5685, "step": 35963 }, { "epoch": 0.92, "grad_norm": 1.1149011850357056, "learning_rate": 3.1903418746793145e-07, "loss": 0.3531, "step": 35964 }, { "epoch": 0.92, "grad_norm": 1.4295780658721924, "learning_rate": 3.188262058251068e-07, "loss": 0.5832, "step": 35965 }, { "epoch": 0.92, "grad_norm": 1.5367686748504639, "learning_rate": 3.186182908988489e-07, "loss": 0.6463, "step": 35966 }, { "epoch": 0.92, "grad_norm": 3.229915142059326, "learning_rate": 3.1841044269058985e-07, "loss": 0.5247, "step": 35967 }, { "epoch": 0.92, "grad_norm": 1.552154541015625, "learning_rate": 3.1820266120176525e-07, "loss": 0.3968, "step": 35968 }, { "epoch": 0.92, "grad_norm": 1.27443265914917, "learning_rate": 3.17994946433805e-07, "loss": 0.4704, "step": 35969 }, { "epoch": 0.92, "grad_norm": 1.3439350128173828, "learning_rate": 3.177872983881403e-07, "loss": 0.4952, "step": 35970 }, { "epoch": 0.92, "grad_norm": 2.4364993572235107, "learning_rate": 3.175797170662021e-07, "loss": 0.5246, "step": 35971 }, { "epoch": 0.92, "grad_norm": 1.307178020477295, "learning_rate": 3.1737220246942146e-07, "loss": 0.5825, "step": 35972 }, { "epoch": 0.92, "grad_norm": 1.3582725524902344, "learning_rate": 3.171647545992296e-07, "loss": 0.5269, "step": 35973 }, { "epoch": 0.92, "grad_norm": 1.800363540649414, "learning_rate": 3.16957373457053e-07, "loss": 0.5601, "step": 35974 }, { "epoch": 0.92, "grad_norm": 1.4481691122055054, "learning_rate": 3.167500590443251e-07, "loss": 0.6122, "step": 35975 }, { "epoch": 0.92, "grad_norm": 2.6647024154663086, "learning_rate": 3.165428113624702e-07, "loss": 0.5154, "step": 35976 }, { "epoch": 0.92, "grad_norm": 1.8257805109024048, "learning_rate": 3.1633563041291836e-07, "loss": 0.5725, "step": 35977 }, { "epoch": 0.92, "grad_norm": 4.345743179321289, "learning_rate": 3.161285161970995e-07, "loss": 0.5334, "step": 35978 }, { "epoch": 0.92, "grad_norm": 1.2035424709320068, "learning_rate": 3.15921468716438e-07, "loss": 0.5387, "step": 35979 }, { "epoch": 0.92, "grad_norm": 1.430785059928894, "learning_rate": 3.157144879723617e-07, "loss": 0.5392, "step": 35980 }, { "epoch": 0.92, "grad_norm": 0.7640694975852966, "learning_rate": 3.1550757396629496e-07, "loss": 0.4357, "step": 35981 }, { "epoch": 0.92, "grad_norm": 1.4957823753356934, "learning_rate": 3.153007266996677e-07, "loss": 0.4874, "step": 35982 }, { "epoch": 0.92, "grad_norm": 1.598873496055603, "learning_rate": 3.150939461739044e-07, "loss": 0.6233, "step": 35983 }, { "epoch": 0.92, "grad_norm": 2.368476390838623, "learning_rate": 3.148872323904262e-07, "loss": 0.6268, "step": 35984 }, { "epoch": 0.92, "grad_norm": 1.5581854581832886, "learning_rate": 3.146805853506618e-07, "loss": 0.402, "step": 35985 }, { "epoch": 0.92, "grad_norm": 2.601116895675659, "learning_rate": 3.144740050560346e-07, "loss": 0.6048, "step": 35986 }, { "epoch": 0.92, "grad_norm": 1.1081230640411377, "learning_rate": 3.1426749150796574e-07, "loss": 0.4452, "step": 35987 }, { "epoch": 0.92, "grad_norm": 1.6999231576919556, "learning_rate": 3.140610447078818e-07, "loss": 0.6023, "step": 35988 }, { "epoch": 0.92, "grad_norm": 1.5963237285614014, "learning_rate": 3.138546646572027e-07, "loss": 0.6169, "step": 35989 }, { "epoch": 0.92, "grad_norm": 1.4781608581542969, "learning_rate": 3.136483513573518e-07, "loss": 0.4849, "step": 35990 }, { "epoch": 0.92, "grad_norm": 1.2943187952041626, "learning_rate": 3.1344210480975136e-07, "loss": 0.4386, "step": 35991 }, { "epoch": 0.92, "grad_norm": 1.7806349992752075, "learning_rate": 3.132359250158223e-07, "loss": 0.5157, "step": 35992 }, { "epoch": 0.92, "grad_norm": 4.989658355712891, "learning_rate": 3.130298119769859e-07, "loss": 0.6472, "step": 35993 }, { "epoch": 0.92, "grad_norm": 2.3970258235931396, "learning_rate": 3.128237656946609e-07, "loss": 0.6273, "step": 35994 }, { "epoch": 0.92, "grad_norm": 2.075892925262451, "learning_rate": 3.126177861702684e-07, "loss": 0.6227, "step": 35995 }, { "epoch": 0.92, "grad_norm": 2.873676061630249, "learning_rate": 3.124118734052295e-07, "loss": 0.6446, "step": 35996 }, { "epoch": 0.92, "grad_norm": 1.5438380241394043, "learning_rate": 3.122060274009597e-07, "loss": 0.4833, "step": 35997 }, { "epoch": 0.92, "grad_norm": 2.3903214931488037, "learning_rate": 3.120002481588802e-07, "loss": 0.6839, "step": 35998 }, { "epoch": 0.92, "grad_norm": 1.660659670829773, "learning_rate": 3.1179453568040976e-07, "loss": 0.5072, "step": 35999 }, { "epoch": 0.92, "grad_norm": 1.8431105613708496, "learning_rate": 3.115888899669628e-07, "loss": 0.6282, "step": 36000 }, { "epoch": 0.92, "grad_norm": 2.4460315704345703, "learning_rate": 3.1138331101995823e-07, "loss": 0.5369, "step": 36001 }, { "epoch": 0.92, "grad_norm": 0.7075741291046143, "learning_rate": 3.111777988408149e-07, "loss": 0.4683, "step": 36002 }, { "epoch": 0.92, "grad_norm": 1.2397716045379639, "learning_rate": 3.109723534309461e-07, "loss": 0.4659, "step": 36003 }, { "epoch": 0.92, "grad_norm": 1.9472641944885254, "learning_rate": 3.1076697479176744e-07, "loss": 0.5845, "step": 36004 }, { "epoch": 0.92, "grad_norm": 1.4049726724624634, "learning_rate": 3.105616629246966e-07, "loss": 0.4917, "step": 36005 }, { "epoch": 0.92, "grad_norm": 1.978255033493042, "learning_rate": 3.103564178311469e-07, "loss": 0.4284, "step": 36006 }, { "epoch": 0.92, "grad_norm": 1.415885090827942, "learning_rate": 3.101512395125339e-07, "loss": 0.6009, "step": 36007 }, { "epoch": 0.92, "grad_norm": 5.625041961669922, "learning_rate": 3.099461279702698e-07, "loss": 0.7379, "step": 36008 }, { "epoch": 0.92, "grad_norm": 2.3281614780426025, "learning_rate": 3.09741083205769e-07, "loss": 0.4113, "step": 36009 }, { "epoch": 0.92, "grad_norm": 1.8703540563583374, "learning_rate": 3.0953610522044595e-07, "loss": 0.4182, "step": 36010 }, { "epoch": 0.92, "grad_norm": 1.583251953125, "learning_rate": 3.0933119401570957e-07, "loss": 0.4421, "step": 36011 }, { "epoch": 0.92, "grad_norm": 9.582392692565918, "learning_rate": 3.091263495929764e-07, "loss": 0.6748, "step": 36012 }, { "epoch": 0.92, "grad_norm": 1.4017122983932495, "learning_rate": 3.0892157195365647e-07, "loss": 0.4534, "step": 36013 }, { "epoch": 0.92, "grad_norm": 6.693221092224121, "learning_rate": 3.087168610991587e-07, "loss": 0.4942, "step": 36014 }, { "epoch": 0.92, "grad_norm": 1.4222197532653809, "learning_rate": 3.0851221703089626e-07, "loss": 0.5164, "step": 36015 }, { "epoch": 0.92, "grad_norm": 3.4799492359161377, "learning_rate": 3.0830763975027934e-07, "loss": 0.6452, "step": 36016 }, { "epoch": 0.92, "grad_norm": 1.307520866394043, "learning_rate": 3.081031292587178e-07, "loss": 0.4814, "step": 36017 }, { "epoch": 0.92, "grad_norm": 3.2357585430145264, "learning_rate": 3.0789868555761825e-07, "loss": 0.7193, "step": 36018 }, { "epoch": 0.92, "grad_norm": 1.0014015436172485, "learning_rate": 3.07694308648393e-07, "loss": 0.4447, "step": 36019 }, { "epoch": 0.92, "grad_norm": 1.1461257934570312, "learning_rate": 3.074899985324498e-07, "loss": 0.4754, "step": 36020 }, { "epoch": 0.92, "grad_norm": 1.820712685585022, "learning_rate": 3.0728575521119517e-07, "loss": 0.5006, "step": 36021 }, { "epoch": 0.92, "grad_norm": 1.2503011226654053, "learning_rate": 3.0708157868603816e-07, "loss": 0.551, "step": 36022 }, { "epoch": 0.92, "grad_norm": 0.8475116491317749, "learning_rate": 3.068774689583853e-07, "loss": 0.4169, "step": 36023 }, { "epoch": 0.92, "grad_norm": 1.9136056900024414, "learning_rate": 3.066734260296433e-07, "loss": 0.5115, "step": 36024 }, { "epoch": 0.92, "grad_norm": 1.1235756874084473, "learning_rate": 3.0646944990121773e-07, "loss": 0.5008, "step": 36025 }, { "epoch": 0.92, "grad_norm": 1.6267164945602417, "learning_rate": 3.062655405745152e-07, "loss": 0.5279, "step": 36026 }, { "epoch": 0.92, "grad_norm": 3.0567376613616943, "learning_rate": 3.060616980509401e-07, "loss": 0.5144, "step": 36027 }, { "epoch": 0.92, "grad_norm": 3.1843349933624268, "learning_rate": 3.058579223318958e-07, "loss": 0.6517, "step": 36028 }, { "epoch": 0.92, "grad_norm": 1.0589910745620728, "learning_rate": 3.056542134187901e-07, "loss": 0.5243, "step": 36029 }, { "epoch": 0.92, "grad_norm": 4.285520553588867, "learning_rate": 3.0545057131302513e-07, "loss": 0.4462, "step": 36030 }, { "epoch": 0.92, "grad_norm": 1.1765354871749878, "learning_rate": 3.0524699601600206e-07, "loss": 0.3202, "step": 36031 }, { "epoch": 0.92, "grad_norm": 0.9125891923904419, "learning_rate": 3.050434875291275e-07, "loss": 0.4975, "step": 36032 }, { "epoch": 0.92, "grad_norm": 1.8677490949630737, "learning_rate": 3.0484004585380257e-07, "loss": 0.4828, "step": 36033 }, { "epoch": 0.92, "grad_norm": 2.2582736015319824, "learning_rate": 3.0463667099142835e-07, "loss": 0.7981, "step": 36034 }, { "epoch": 0.92, "grad_norm": 1.610398530960083, "learning_rate": 3.044333629434071e-07, "loss": 0.3734, "step": 36035 }, { "epoch": 0.92, "grad_norm": 1.625291109085083, "learning_rate": 3.0423012171113983e-07, "loss": 0.5263, "step": 36036 }, { "epoch": 0.92, "grad_norm": 2.266695261001587, "learning_rate": 3.040269472960267e-07, "loss": 0.4553, "step": 36037 }, { "epoch": 0.92, "grad_norm": 1.0154880285263062, "learning_rate": 3.038238396994686e-07, "loss": 0.3816, "step": 36038 }, { "epoch": 0.92, "grad_norm": 2.0043041706085205, "learning_rate": 3.036207989228646e-07, "loss": 0.5024, "step": 36039 }, { "epoch": 0.92, "grad_norm": 3.4567623138427734, "learning_rate": 3.0341782496761454e-07, "loss": 0.5601, "step": 36040 }, { "epoch": 0.92, "grad_norm": 1.772214412689209, "learning_rate": 3.032149178351174e-07, "loss": 0.5274, "step": 36041 }, { "epoch": 0.92, "grad_norm": 2.435138702392578, "learning_rate": 3.0301207752676865e-07, "loss": 0.6326, "step": 36042 }, { "epoch": 0.92, "grad_norm": 2.7853128910064697, "learning_rate": 3.0280930404397055e-07, "loss": 0.569, "step": 36043 }, { "epoch": 0.92, "grad_norm": 2.823349714279175, "learning_rate": 3.026065973881176e-07, "loss": 0.6134, "step": 36044 }, { "epoch": 0.92, "grad_norm": 1.5143085718154907, "learning_rate": 3.024039575606075e-07, "loss": 0.5411, "step": 36045 }, { "epoch": 0.92, "grad_norm": 1.6272307634353638, "learning_rate": 3.0220138456283686e-07, "loss": 0.4408, "step": 36046 }, { "epoch": 0.92, "grad_norm": 1.0711476802825928, "learning_rate": 3.0199887839620134e-07, "loss": 0.5522, "step": 36047 }, { "epoch": 0.92, "grad_norm": 3.764798164367676, "learning_rate": 3.0179643906209754e-07, "loss": 0.7074, "step": 36048 }, { "epoch": 0.92, "grad_norm": 2.105025053024292, "learning_rate": 3.015940665619177e-07, "loss": 0.6318, "step": 36049 }, { "epoch": 0.92, "grad_norm": 2.487107992172241, "learning_rate": 3.013917608970607e-07, "loss": 0.4954, "step": 36050 }, { "epoch": 0.92, "grad_norm": 6.914841175079346, "learning_rate": 3.011895220689176e-07, "loss": 0.5376, "step": 36051 }, { "epoch": 0.92, "grad_norm": 1.73079252243042, "learning_rate": 3.009873500788818e-07, "loss": 0.5739, "step": 36052 }, { "epoch": 0.92, "grad_norm": 1.275168538093567, "learning_rate": 3.0078524492834884e-07, "loss": 0.5054, "step": 36053 }, { "epoch": 0.92, "grad_norm": 1.8073431253433228, "learning_rate": 3.0058320661871087e-07, "loss": 0.5476, "step": 36054 }, { "epoch": 0.92, "grad_norm": 1.2101901769638062, "learning_rate": 3.00381235151358e-07, "loss": 0.4429, "step": 36055 }, { "epoch": 0.92, "grad_norm": 1.257765531539917, "learning_rate": 3.0017933052768457e-07, "loss": 0.4668, "step": 36056 }, { "epoch": 0.92, "grad_norm": 1.6520570516586304, "learning_rate": 2.999774927490817e-07, "loss": 0.5949, "step": 36057 }, { "epoch": 0.92, "grad_norm": 2.2043063640594482, "learning_rate": 2.997757218169395e-07, "loss": 0.5829, "step": 36058 }, { "epoch": 0.92, "grad_norm": 1.5077788829803467, "learning_rate": 2.995740177326478e-07, "loss": 0.3917, "step": 36059 }, { "epoch": 0.92, "grad_norm": 1.6233253479003906, "learning_rate": 2.99372380497599e-07, "loss": 0.4913, "step": 36060 }, { "epoch": 0.92, "grad_norm": 1.4781076908111572, "learning_rate": 2.991708101131807e-07, "loss": 0.4853, "step": 36061 }, { "epoch": 0.92, "grad_norm": 1.3336533308029175, "learning_rate": 2.9896930658078085e-07, "loss": 0.3979, "step": 36062 }, { "epoch": 0.92, "grad_norm": 1.9900997877120972, "learning_rate": 2.9876786990179153e-07, "loss": 0.5443, "step": 36063 }, { "epoch": 0.92, "grad_norm": 0.9563397169113159, "learning_rate": 2.9856650007759946e-07, "loss": 0.4326, "step": 36064 }, { "epoch": 0.92, "grad_norm": 1.1600793600082397, "learning_rate": 2.9836519710959244e-07, "loss": 0.5442, "step": 36065 }, { "epoch": 0.92, "grad_norm": 1.0322020053863525, "learning_rate": 2.981639609991549e-07, "loss": 0.4548, "step": 36066 }, { "epoch": 0.92, "grad_norm": 10.83664608001709, "learning_rate": 2.9796279174767796e-07, "loss": 0.6261, "step": 36067 }, { "epoch": 0.92, "grad_norm": 2.491091012954712, "learning_rate": 2.9776168935654605e-07, "loss": 0.6562, "step": 36068 }, { "epoch": 0.92, "grad_norm": 7.741049766540527, "learning_rate": 2.975606538271436e-07, "loss": 0.615, "step": 36069 }, { "epoch": 0.92, "grad_norm": 1.5372897386550903, "learning_rate": 2.9735968516085847e-07, "loss": 0.5791, "step": 36070 }, { "epoch": 0.92, "grad_norm": 1.8192168474197388, "learning_rate": 2.97158783359075e-07, "loss": 0.567, "step": 36071 }, { "epoch": 0.92, "grad_norm": 1.2931573390960693, "learning_rate": 2.969579484231766e-07, "loss": 0.4806, "step": 36072 }, { "epoch": 0.92, "grad_norm": 1.4414117336273193, "learning_rate": 2.967571803545477e-07, "loss": 0.4935, "step": 36073 }, { "epoch": 0.92, "grad_norm": 1.4591245651245117, "learning_rate": 2.9655647915457276e-07, "loss": 0.633, "step": 36074 }, { "epoch": 0.92, "grad_norm": 0.8558632731437683, "learning_rate": 2.9635584482463507e-07, "loss": 0.4445, "step": 36075 }, { "epoch": 0.92, "grad_norm": 1.1562455892562866, "learning_rate": 2.9615527736611474e-07, "loss": 0.4301, "step": 36076 }, { "epoch": 0.92, "grad_norm": 1.324303388595581, "learning_rate": 2.959547767803972e-07, "loss": 0.4125, "step": 36077 }, { "epoch": 0.92, "grad_norm": 2.367971658706665, "learning_rate": 2.957543430688614e-07, "loss": 0.58, "step": 36078 }, { "epoch": 0.92, "grad_norm": 1.074959397315979, "learning_rate": 2.9555397623288964e-07, "loss": 0.561, "step": 36079 }, { "epoch": 0.92, "grad_norm": 1.4793881177902222, "learning_rate": 2.95353676273864e-07, "loss": 0.4689, "step": 36080 }, { "epoch": 0.92, "grad_norm": 1.1033587455749512, "learning_rate": 2.951534431931635e-07, "loss": 0.4392, "step": 36081 }, { "epoch": 0.92, "grad_norm": 1.0166929960250854, "learning_rate": 2.9495327699216924e-07, "loss": 0.5178, "step": 36082 }, { "epoch": 0.92, "grad_norm": 1.5259120464324951, "learning_rate": 2.9475317767225786e-07, "loss": 0.5827, "step": 36083 }, { "epoch": 0.92, "grad_norm": 1.6001958847045898, "learning_rate": 2.945531452348116e-07, "loss": 0.6012, "step": 36084 }, { "epoch": 0.92, "grad_norm": 6.28436279296875, "learning_rate": 2.943531796812071e-07, "loss": 0.5728, "step": 36085 }, { "epoch": 0.92, "grad_norm": 1.2845525741577148, "learning_rate": 2.9415328101282113e-07, "loss": 0.5206, "step": 36086 }, { "epoch": 0.92, "grad_norm": 1.3194040060043335, "learning_rate": 2.939534492310348e-07, "loss": 0.5168, "step": 36087 }, { "epoch": 0.92, "grad_norm": 3.153257131576538, "learning_rate": 2.937536843372235e-07, "loss": 0.5206, "step": 36088 }, { "epoch": 0.92, "grad_norm": 1.7455484867095947, "learning_rate": 2.9355398633276413e-07, "loss": 0.5057, "step": 36089 }, { "epoch": 0.93, "grad_norm": 1.9884452819824219, "learning_rate": 2.933543552190299e-07, "loss": 0.6445, "step": 36090 }, { "epoch": 0.93, "grad_norm": 1.080887794494629, "learning_rate": 2.9315479099740195e-07, "loss": 0.3632, "step": 36091 }, { "epoch": 0.93, "grad_norm": 1.7437678575515747, "learning_rate": 2.9295529366925145e-07, "loss": 0.6464, "step": 36092 }, { "epoch": 0.93, "grad_norm": 1.506472110748291, "learning_rate": 2.9275586323595396e-07, "loss": 0.4274, "step": 36093 }, { "epoch": 0.93, "grad_norm": 1.1858206987380981, "learning_rate": 2.92556499698885e-07, "loss": 0.4321, "step": 36094 }, { "epoch": 0.93, "grad_norm": 1.6795504093170166, "learning_rate": 2.9235720305941796e-07, "loss": 0.5385, "step": 36095 }, { "epoch": 0.93, "grad_norm": 1.2262743711471558, "learning_rate": 2.9215797331892616e-07, "loss": 0.5187, "step": 36096 }, { "epoch": 0.93, "grad_norm": 1.6551578044891357, "learning_rate": 2.9195881047878294e-07, "loss": 0.6023, "step": 36097 }, { "epoch": 0.93, "grad_norm": 1.3002727031707764, "learning_rate": 2.9175971454035946e-07, "loss": 0.5804, "step": 36098 }, { "epoch": 0.93, "grad_norm": 1.4530154466629028, "learning_rate": 2.9156068550503015e-07, "loss": 0.617, "step": 36099 }, { "epoch": 0.93, "grad_norm": 9.072856903076172, "learning_rate": 2.913617233741628e-07, "loss": 0.6808, "step": 36100 }, { "epoch": 0.93, "grad_norm": 13.71580982208252, "learning_rate": 2.9116282814913297e-07, "loss": 0.4679, "step": 36101 }, { "epoch": 0.93, "grad_norm": 1.4397976398468018, "learning_rate": 2.9096399983130853e-07, "loss": 0.4606, "step": 36102 }, { "epoch": 0.93, "grad_norm": 1.4462876319885254, "learning_rate": 2.907652384220594e-07, "loss": 0.5657, "step": 36103 }, { "epoch": 0.93, "grad_norm": 1.6042265892028809, "learning_rate": 2.9056654392275785e-07, "loss": 0.4538, "step": 36104 }, { "epoch": 0.93, "grad_norm": 2.23581600189209, "learning_rate": 2.903679163347717e-07, "loss": 0.5581, "step": 36105 }, { "epoch": 0.93, "grad_norm": 1.5530176162719727, "learning_rate": 2.901693556594698e-07, "loss": 0.4467, "step": 36106 }, { "epoch": 0.93, "grad_norm": 1.127295732498169, "learning_rate": 2.899708618982189e-07, "loss": 0.6269, "step": 36107 }, { "epoch": 0.93, "grad_norm": 1.1606438159942627, "learning_rate": 2.8977243505238896e-07, "loss": 0.589, "step": 36108 }, { "epoch": 0.93, "grad_norm": 1.5490334033966064, "learning_rate": 2.895740751233478e-07, "loss": 0.646, "step": 36109 }, { "epoch": 0.93, "grad_norm": 26.80422592163086, "learning_rate": 2.893757821124599e-07, "loss": 0.6982, "step": 36110 }, { "epoch": 0.93, "grad_norm": 1.5018696784973145, "learning_rate": 2.891775560210941e-07, "loss": 0.4368, "step": 36111 }, { "epoch": 0.93, "grad_norm": 0.8146915435791016, "learning_rate": 2.88979396850616e-07, "loss": 0.4223, "step": 36112 }, { "epoch": 0.93, "grad_norm": 2.7930073738098145, "learning_rate": 2.8878130460238907e-07, "loss": 0.4623, "step": 36113 }, { "epoch": 0.93, "grad_norm": 1.7515769004821777, "learning_rate": 2.8858327927778206e-07, "loss": 0.5248, "step": 36114 }, { "epoch": 0.93, "grad_norm": 1.2572375535964966, "learning_rate": 2.883853208781573e-07, "loss": 0.4645, "step": 36115 }, { "epoch": 0.93, "grad_norm": 1.980059266090393, "learning_rate": 2.881874294048803e-07, "loss": 0.493, "step": 36116 }, { "epoch": 0.93, "grad_norm": 0.815711259841919, "learning_rate": 2.8798960485931224e-07, "loss": 0.4707, "step": 36117 }, { "epoch": 0.93, "grad_norm": 1.5200692415237427, "learning_rate": 2.877918472428187e-07, "loss": 0.5246, "step": 36118 }, { "epoch": 0.93, "grad_norm": 1.2051458358764648, "learning_rate": 2.8759415655676305e-07, "loss": 0.5226, "step": 36119 }, { "epoch": 0.93, "grad_norm": 1.5368406772613525, "learning_rate": 2.8739653280250523e-07, "loss": 0.4021, "step": 36120 }, { "epoch": 0.93, "grad_norm": 1.1100575923919678, "learning_rate": 2.8719897598140865e-07, "loss": 0.4947, "step": 36121 }, { "epoch": 0.93, "grad_norm": 1.6661304235458374, "learning_rate": 2.8700148609483556e-07, "loss": 0.4293, "step": 36122 }, { "epoch": 0.93, "grad_norm": 1.8660972118377686, "learning_rate": 2.8680406314414597e-07, "loss": 0.5203, "step": 36123 }, { "epoch": 0.93, "grad_norm": 1.472087025642395, "learning_rate": 2.8660670713069884e-07, "loss": 0.5139, "step": 36124 }, { "epoch": 0.93, "grad_norm": 1.1933168172836304, "learning_rate": 2.864094180558563e-07, "loss": 0.4587, "step": 36125 }, { "epoch": 0.93, "grad_norm": 1.6132341623306274, "learning_rate": 2.862121959209785e-07, "loss": 0.4753, "step": 36126 }, { "epoch": 0.93, "grad_norm": 1.6994775533676147, "learning_rate": 2.860150407274209e-07, "loss": 0.5207, "step": 36127 }, { "epoch": 0.93, "grad_norm": 9.262676239013672, "learning_rate": 2.8581795247654697e-07, "loss": 0.6767, "step": 36128 }, { "epoch": 0.93, "grad_norm": 2.562431812286377, "learning_rate": 2.8562093116971224e-07, "loss": 0.5893, "step": 36129 }, { "epoch": 0.93, "grad_norm": 1.5587995052337646, "learning_rate": 2.854239768082756e-07, "loss": 0.4993, "step": 36130 }, { "epoch": 0.93, "grad_norm": 1.3865430355072021, "learning_rate": 2.8522708939359154e-07, "loss": 0.3179, "step": 36131 }, { "epoch": 0.93, "grad_norm": 1.3057472705841064, "learning_rate": 2.8503026892702015e-07, "loss": 0.4561, "step": 36132 }, { "epoch": 0.93, "grad_norm": 1.2227357625961304, "learning_rate": 2.8483351540991687e-07, "loss": 0.393, "step": 36133 }, { "epoch": 0.93, "grad_norm": 1.4025057554244995, "learning_rate": 2.846368288436363e-07, "loss": 0.544, "step": 36134 }, { "epoch": 0.93, "grad_norm": 2.076282262802124, "learning_rate": 2.844402092295362e-07, "loss": 0.361, "step": 36135 }, { "epoch": 0.93, "grad_norm": 2.1939871311187744, "learning_rate": 2.842436565689699e-07, "loss": 0.5376, "step": 36136 }, { "epoch": 0.93, "grad_norm": 2.3569250106811523, "learning_rate": 2.840471708632919e-07, "loss": 0.4907, "step": 36137 }, { "epoch": 0.93, "grad_norm": 4.122928619384766, "learning_rate": 2.838507521138567e-07, "loss": 0.6503, "step": 36138 }, { "epoch": 0.93, "grad_norm": 4.83152961730957, "learning_rate": 2.8365440032201874e-07, "loss": 0.4696, "step": 36139 }, { "epoch": 0.93, "grad_norm": 9.025418281555176, "learning_rate": 2.8345811548912914e-07, "loss": 0.5936, "step": 36140 }, { "epoch": 0.93, "grad_norm": 1.3251113891601562, "learning_rate": 2.832618976165413e-07, "loss": 0.564, "step": 36141 }, { "epoch": 0.93, "grad_norm": 7.2162017822265625, "learning_rate": 2.830657467056086e-07, "loss": 0.5059, "step": 36142 }, { "epoch": 0.93, "grad_norm": 5.358943939208984, "learning_rate": 2.8286966275768324e-07, "loss": 0.5254, "step": 36143 }, { "epoch": 0.93, "grad_norm": 3.073676347732544, "learning_rate": 2.8267364577411306e-07, "loss": 0.4906, "step": 36144 }, { "epoch": 0.93, "grad_norm": 1.4791529178619385, "learning_rate": 2.8247769575625137e-07, "loss": 0.4535, "step": 36145 }, { "epoch": 0.93, "grad_norm": 1.3397730588912964, "learning_rate": 2.822818127054494e-07, "loss": 0.5212, "step": 36146 }, { "epoch": 0.93, "grad_norm": 1.3645806312561035, "learning_rate": 2.8208599662305604e-07, "loss": 0.4224, "step": 36147 }, { "epoch": 0.93, "grad_norm": 1.0446137189865112, "learning_rate": 2.81890247510419e-07, "loss": 0.4496, "step": 36148 }, { "epoch": 0.93, "grad_norm": 1.48393976688385, "learning_rate": 2.816945653688907e-07, "loss": 0.3935, "step": 36149 }, { "epoch": 0.93, "grad_norm": 1.2605613470077515, "learning_rate": 2.8149895019981664e-07, "loss": 0.5503, "step": 36150 }, { "epoch": 0.93, "grad_norm": 1.2684929370880127, "learning_rate": 2.813034020045458e-07, "loss": 0.562, "step": 36151 }, { "epoch": 0.93, "grad_norm": 3.264181137084961, "learning_rate": 2.811079207844269e-07, "loss": 0.5802, "step": 36152 }, { "epoch": 0.93, "grad_norm": 2.40674090385437, "learning_rate": 2.8091250654080584e-07, "loss": 0.51, "step": 36153 }, { "epoch": 0.93, "grad_norm": 1.7058466672897339, "learning_rate": 2.807171592750302e-07, "loss": 0.5528, "step": 36154 }, { "epoch": 0.93, "grad_norm": 1.4873982667922974, "learning_rate": 2.805218789884445e-07, "loss": 0.6105, "step": 36155 }, { "epoch": 0.93, "grad_norm": 0.9483929872512817, "learning_rate": 2.8032666568239663e-07, "loss": 0.415, "step": 36156 }, { "epoch": 0.93, "grad_norm": 1.249549388885498, "learning_rate": 2.80131519358231e-07, "loss": 0.4513, "step": 36157 }, { "epoch": 0.93, "grad_norm": 1.2121163606643677, "learning_rate": 2.79936440017291e-07, "loss": 0.6344, "step": 36158 }, { "epoch": 0.93, "grad_norm": 1.2352608442306519, "learning_rate": 2.797414276609234e-07, "loss": 0.3435, "step": 36159 }, { "epoch": 0.93, "grad_norm": 1.8971974849700928, "learning_rate": 2.7954648229047146e-07, "loss": 0.4705, "step": 36160 }, { "epoch": 0.93, "grad_norm": 1.9118486642837524, "learning_rate": 2.7935160390727633e-07, "loss": 0.4721, "step": 36161 }, { "epoch": 0.93, "grad_norm": 1.586314082145691, "learning_rate": 2.7915679251268366e-07, "loss": 0.4958, "step": 36162 }, { "epoch": 0.93, "grad_norm": 1.3146796226501465, "learning_rate": 2.7896204810803574e-07, "loss": 0.495, "step": 36163 }, { "epoch": 0.93, "grad_norm": 1.3513413667678833, "learning_rate": 2.7876737069467474e-07, "loss": 0.4851, "step": 36164 }, { "epoch": 0.93, "grad_norm": 0.9830155968666077, "learning_rate": 2.7857276027393966e-07, "loss": 0.3926, "step": 36165 }, { "epoch": 0.93, "grad_norm": 2.2188661098480225, "learning_rate": 2.783782168471749e-07, "loss": 0.4797, "step": 36166 }, { "epoch": 0.93, "grad_norm": 5.976834297180176, "learning_rate": 2.781837404157195e-07, "loss": 0.5261, "step": 36167 }, { "epoch": 0.93, "grad_norm": 1.1538949012756348, "learning_rate": 2.779893309809134e-07, "loss": 0.5648, "step": 36168 }, { "epoch": 0.93, "grad_norm": 1.5154496431350708, "learning_rate": 2.777949885440967e-07, "loss": 0.5959, "step": 36169 }, { "epoch": 0.93, "grad_norm": 1.0698022842407227, "learning_rate": 2.776007131066094e-07, "loss": 0.5121, "step": 36170 }, { "epoch": 0.93, "grad_norm": 1.3886569738388062, "learning_rate": 2.774065046697905e-07, "loss": 0.5071, "step": 36171 }, { "epoch": 0.93, "grad_norm": 1.2158327102661133, "learning_rate": 2.772123632349755e-07, "loss": 0.5644, "step": 36172 }, { "epoch": 0.93, "grad_norm": 2.320525884628296, "learning_rate": 2.770182888035067e-07, "loss": 0.4129, "step": 36173 }, { "epoch": 0.93, "grad_norm": 1.3089580535888672, "learning_rate": 2.7682428137671857e-07, "loss": 0.5734, "step": 36174 }, { "epoch": 0.93, "grad_norm": 1.3316148519515991, "learning_rate": 2.7663034095594786e-07, "loss": 0.5398, "step": 36175 }, { "epoch": 0.93, "grad_norm": 1.3893344402313232, "learning_rate": 2.764364675425324e-07, "loss": 0.5072, "step": 36176 }, { "epoch": 0.93, "grad_norm": 1.535746455192566, "learning_rate": 2.7624266113780883e-07, "loss": 0.462, "step": 36177 }, { "epoch": 0.93, "grad_norm": 1.4204497337341309, "learning_rate": 2.760489217431106e-07, "loss": 0.522, "step": 36178 }, { "epoch": 0.93, "grad_norm": 1.2828166484832764, "learning_rate": 2.758552493597744e-07, "loss": 0.4581, "step": 36179 }, { "epoch": 0.93, "grad_norm": 1.7430845499038696, "learning_rate": 2.7566164398913575e-07, "loss": 0.554, "step": 36180 }, { "epoch": 0.93, "grad_norm": 1.7150518894195557, "learning_rate": 2.75468105632527e-07, "loss": 0.4857, "step": 36181 }, { "epoch": 0.93, "grad_norm": 2.78483510017395, "learning_rate": 2.752746342912804e-07, "loss": 0.5364, "step": 36182 }, { "epoch": 0.93, "grad_norm": 1.2763577699661255, "learning_rate": 2.750812299667338e-07, "loss": 0.5131, "step": 36183 }, { "epoch": 0.93, "grad_norm": 2.524852991104126, "learning_rate": 2.748878926602172e-07, "loss": 0.6208, "step": 36184 }, { "epoch": 0.93, "grad_norm": 1.4084724187850952, "learning_rate": 2.7469462237306176e-07, "loss": 0.4809, "step": 36185 }, { "epoch": 0.93, "grad_norm": 1.4232068061828613, "learning_rate": 2.745014191066031e-07, "loss": 0.5656, "step": 36186 }, { "epoch": 0.93, "grad_norm": 2.772562026977539, "learning_rate": 2.7430828286216904e-07, "loss": 0.5197, "step": 36187 }, { "epoch": 0.93, "grad_norm": 2.321885585784912, "learning_rate": 2.7411521364109295e-07, "loss": 0.6892, "step": 36188 }, { "epoch": 0.93, "grad_norm": 7.454115867614746, "learning_rate": 2.7392221144470377e-07, "loss": 0.6813, "step": 36189 }, { "epoch": 0.93, "grad_norm": 2.039942741394043, "learning_rate": 2.7372927627433266e-07, "loss": 0.4551, "step": 36190 }, { "epoch": 0.93, "grad_norm": 1.2491202354431152, "learning_rate": 2.735364081313086e-07, "loss": 0.4188, "step": 36191 }, { "epoch": 0.93, "grad_norm": 1.4021598100662231, "learning_rate": 2.733436070169593e-07, "loss": 0.5542, "step": 36192 }, { "epoch": 0.93, "grad_norm": 1.3181215524673462, "learning_rate": 2.731508729326171e-07, "loss": 0.473, "step": 36193 }, { "epoch": 0.93, "grad_norm": 1.0576530694961548, "learning_rate": 2.7295820587960764e-07, "loss": 0.4533, "step": 36194 }, { "epoch": 0.93, "grad_norm": 1.5254021883010864, "learning_rate": 2.727656058592587e-07, "loss": 0.4995, "step": 36195 }, { "epoch": 0.93, "grad_norm": 1.6177667379379272, "learning_rate": 2.7257307287289703e-07, "loss": 0.544, "step": 36196 }, { "epoch": 0.93, "grad_norm": 2.6917519569396973, "learning_rate": 2.723806069218526e-07, "loss": 0.5653, "step": 36197 }, { "epoch": 0.93, "grad_norm": 1.4404199123382568, "learning_rate": 2.721882080074478e-07, "loss": 0.6124, "step": 36198 }, { "epoch": 0.93, "grad_norm": 1.9932719469070435, "learning_rate": 2.719958761310104e-07, "loss": 0.6642, "step": 36199 }, { "epoch": 0.93, "grad_norm": 1.480501651763916, "learning_rate": 2.71803611293866e-07, "loss": 0.5271, "step": 36200 }, { "epoch": 0.93, "grad_norm": 1.796048879623413, "learning_rate": 2.7161141349734024e-07, "loss": 0.6123, "step": 36201 }, { "epoch": 0.93, "grad_norm": 3.6957015991210938, "learning_rate": 2.714192827427553e-07, "loss": 0.5168, "step": 36202 }, { "epoch": 0.93, "grad_norm": 1.289565086364746, "learning_rate": 2.712272190314369e-07, "loss": 0.4993, "step": 36203 }, { "epoch": 0.93, "grad_norm": 2.4346728324890137, "learning_rate": 2.7103522236470836e-07, "loss": 0.6088, "step": 36204 }, { "epoch": 0.93, "grad_norm": 1.0285930633544922, "learning_rate": 2.708432927438931e-07, "loss": 0.5364, "step": 36205 }, { "epoch": 0.93, "grad_norm": 0.9603602290153503, "learning_rate": 2.706514301703123e-07, "loss": 0.3736, "step": 36206 }, { "epoch": 0.93, "grad_norm": 2.114914894104004, "learning_rate": 2.7045963464529034e-07, "loss": 0.4667, "step": 36207 }, { "epoch": 0.93, "grad_norm": 1.0612921714782715, "learning_rate": 2.702679061701485e-07, "loss": 0.4842, "step": 36208 }, { "epoch": 0.93, "grad_norm": 2.098076820373535, "learning_rate": 2.7007624474620577e-07, "loss": 0.601, "step": 36209 }, { "epoch": 0.93, "grad_norm": 1.2628222703933716, "learning_rate": 2.698846503747854e-07, "loss": 0.438, "step": 36210 }, { "epoch": 0.93, "grad_norm": 2.1678030490875244, "learning_rate": 2.696931230572075e-07, "loss": 0.5034, "step": 36211 }, { "epoch": 0.93, "grad_norm": 1.2116153240203857, "learning_rate": 2.6950166279479104e-07, "loss": 0.4681, "step": 36212 }, { "epoch": 0.93, "grad_norm": 2.864773750305176, "learning_rate": 2.6931026958885497e-07, "loss": 0.5946, "step": 36213 }, { "epoch": 0.93, "grad_norm": 1.2884507179260254, "learning_rate": 2.6911894344072045e-07, "loss": 0.4933, "step": 36214 }, { "epoch": 0.93, "grad_norm": 4.138819217681885, "learning_rate": 2.6892768435170415e-07, "loss": 0.6864, "step": 36215 }, { "epoch": 0.93, "grad_norm": 2.433353900909424, "learning_rate": 2.6873649232312395e-07, "loss": 0.6358, "step": 36216 }, { "epoch": 0.93, "grad_norm": 2.07958984375, "learning_rate": 2.685453673562988e-07, "loss": 0.6357, "step": 36217 }, { "epoch": 0.93, "grad_norm": 1.0949485301971436, "learning_rate": 2.6835430945254424e-07, "loss": 0.5939, "step": 36218 }, { "epoch": 0.93, "grad_norm": 1.1148207187652588, "learning_rate": 2.681633186131782e-07, "loss": 0.4902, "step": 36219 }, { "epoch": 0.93, "grad_norm": 1.4030375480651855, "learning_rate": 2.679723948395163e-07, "loss": 0.4779, "step": 36220 }, { "epoch": 0.93, "grad_norm": 1.4672573804855347, "learning_rate": 2.677815381328752e-07, "loss": 0.5305, "step": 36221 }, { "epoch": 0.93, "grad_norm": 1.2596983909606934, "learning_rate": 2.6759074849456945e-07, "loss": 0.5455, "step": 36222 }, { "epoch": 0.93, "grad_norm": 3.5585074424743652, "learning_rate": 2.674000259259124e-07, "loss": 0.5078, "step": 36223 }, { "epoch": 0.93, "grad_norm": 3.1596484184265137, "learning_rate": 2.672093704282208e-07, "loss": 0.616, "step": 36224 }, { "epoch": 0.93, "grad_norm": 9.753193855285645, "learning_rate": 2.6701878200280696e-07, "loss": 0.6392, "step": 36225 }, { "epoch": 0.93, "grad_norm": 1.7329916954040527, "learning_rate": 2.668282606509853e-07, "loss": 0.5305, "step": 36226 }, { "epoch": 0.93, "grad_norm": 1.6770853996276855, "learning_rate": 2.666378063740682e-07, "loss": 0.5555, "step": 36227 }, { "epoch": 0.93, "grad_norm": 4.422652244567871, "learning_rate": 2.66447419173369e-07, "loss": 0.5869, "step": 36228 }, { "epoch": 0.93, "grad_norm": 1.5583558082580566, "learning_rate": 2.662570990501967e-07, "loss": 0.5576, "step": 36229 }, { "epoch": 0.93, "grad_norm": 1.1378812789916992, "learning_rate": 2.6606684600586686e-07, "loss": 0.4654, "step": 36230 }, { "epoch": 0.93, "grad_norm": 1.3872624635696411, "learning_rate": 2.658766600416884e-07, "loss": 0.4638, "step": 36231 }, { "epoch": 0.93, "grad_norm": 5.537508010864258, "learning_rate": 2.656865411589726e-07, "loss": 0.551, "step": 36232 }, { "epoch": 0.93, "grad_norm": 3.100250244140625, "learning_rate": 2.654964893590295e-07, "loss": 0.6514, "step": 36233 }, { "epoch": 0.93, "grad_norm": 1.4645594358444214, "learning_rate": 2.653065046431691e-07, "loss": 0.2976, "step": 36234 }, { "epoch": 0.93, "grad_norm": 15.38660717010498, "learning_rate": 2.651165870126993e-07, "loss": 0.5234, "step": 36235 }, { "epoch": 0.93, "grad_norm": 17.097869873046875, "learning_rate": 2.649267364689301e-07, "loss": 0.5847, "step": 36236 }, { "epoch": 0.93, "grad_norm": 1.5695409774780273, "learning_rate": 2.647369530131705e-07, "loss": 0.4527, "step": 36237 }, { "epoch": 0.93, "grad_norm": 1.7174748182296753, "learning_rate": 2.6454723664672723e-07, "loss": 0.5179, "step": 36238 }, { "epoch": 0.93, "grad_norm": 1.5628889799118042, "learning_rate": 2.64357587370907e-07, "loss": 0.4836, "step": 36239 }, { "epoch": 0.93, "grad_norm": 1.1463208198547363, "learning_rate": 2.641680051870188e-07, "loss": 0.4498, "step": 36240 }, { "epoch": 0.93, "grad_norm": 1.3816077709197998, "learning_rate": 2.6397849009636825e-07, "loss": 0.5199, "step": 36241 }, { "epoch": 0.93, "grad_norm": 1.9348698854446411, "learning_rate": 2.637890421002609e-07, "loss": 0.5294, "step": 36242 }, { "epoch": 0.93, "grad_norm": 0.9854542016983032, "learning_rate": 2.6359966120000247e-07, "loss": 0.5199, "step": 36243 }, { "epoch": 0.93, "grad_norm": 1.348370909690857, "learning_rate": 2.634103473968985e-07, "loss": 0.6418, "step": 36244 }, { "epoch": 0.93, "grad_norm": 6.691985130310059, "learning_rate": 2.6322110069225246e-07, "loss": 0.6117, "step": 36245 }, { "epoch": 0.93, "grad_norm": 11.051791191101074, "learning_rate": 2.6303192108737e-07, "loss": 0.5869, "step": 36246 }, { "epoch": 0.93, "grad_norm": 1.7821961641311646, "learning_rate": 2.6284280858355436e-07, "loss": 0.665, "step": 36247 }, { "epoch": 0.93, "grad_norm": 1.1872999668121338, "learning_rate": 2.62653763182108e-07, "loss": 0.3777, "step": 36248 }, { "epoch": 0.93, "grad_norm": 0.9761084318161011, "learning_rate": 2.6246478488433424e-07, "loss": 0.3406, "step": 36249 }, { "epoch": 0.93, "grad_norm": 1.65181303024292, "learning_rate": 2.622758736915354e-07, "loss": 0.4679, "step": 36250 }, { "epoch": 0.93, "grad_norm": 1.2022225856781006, "learning_rate": 2.620870296050149e-07, "loss": 0.4315, "step": 36251 }, { "epoch": 0.93, "grad_norm": 5.48322868347168, "learning_rate": 2.618982526260716e-07, "loss": 0.4748, "step": 36252 }, { "epoch": 0.93, "grad_norm": 1.1473296880722046, "learning_rate": 2.617095427560068e-07, "loss": 0.4331, "step": 36253 }, { "epoch": 0.93, "grad_norm": 2.0715794563293457, "learning_rate": 2.615208999961227e-07, "loss": 0.5068, "step": 36254 }, { "epoch": 0.93, "grad_norm": 1.2099316120147705, "learning_rate": 2.613323243477173e-07, "loss": 0.5299, "step": 36255 }, { "epoch": 0.93, "grad_norm": 1.2475661039352417, "learning_rate": 2.6114381581209156e-07, "loss": 0.364, "step": 36256 }, { "epoch": 0.93, "grad_norm": 32.35810852050781, "learning_rate": 2.609553743905435e-07, "loss": 0.5696, "step": 36257 }, { "epoch": 0.93, "grad_norm": 1.1416116952896118, "learning_rate": 2.6076700008437427e-07, "loss": 0.398, "step": 36258 }, { "epoch": 0.93, "grad_norm": 1.0925354957580566, "learning_rate": 2.605786928948784e-07, "loss": 0.4179, "step": 36259 }, { "epoch": 0.93, "grad_norm": 1.632135272026062, "learning_rate": 2.6039045282335474e-07, "loss": 0.6003, "step": 36260 }, { "epoch": 0.93, "grad_norm": 2.420166254043579, "learning_rate": 2.6020227987110236e-07, "loss": 0.612, "step": 36261 }, { "epoch": 0.93, "grad_norm": 1.9663941860198975, "learning_rate": 2.6001417403941685e-07, "loss": 0.4383, "step": 36262 }, { "epoch": 0.93, "grad_norm": 1.2266055345535278, "learning_rate": 2.598261353295928e-07, "loss": 0.4565, "step": 36263 }, { "epoch": 0.93, "grad_norm": 1.5467499494552612, "learning_rate": 2.5963816374292907e-07, "loss": 0.6241, "step": 36264 }, { "epoch": 0.93, "grad_norm": 1.5814836025238037, "learning_rate": 2.594502592807202e-07, "loss": 0.5451, "step": 36265 }, { "epoch": 0.93, "grad_norm": 1.77302885055542, "learning_rate": 2.592624219442597e-07, "loss": 0.5494, "step": 36266 }, { "epoch": 0.93, "grad_norm": 3.878613233566284, "learning_rate": 2.5907465173484194e-07, "loss": 0.6649, "step": 36267 }, { "epoch": 0.93, "grad_norm": 1.7093110084533691, "learning_rate": 2.5888694865376264e-07, "loss": 0.5044, "step": 36268 }, { "epoch": 0.93, "grad_norm": 1.776253581047058, "learning_rate": 2.586993127023152e-07, "loss": 0.5474, "step": 36269 }, { "epoch": 0.93, "grad_norm": 1.2775166034698486, "learning_rate": 2.585117438817908e-07, "loss": 0.6262, "step": 36270 }, { "epoch": 0.93, "grad_norm": 5.178333282470703, "learning_rate": 2.5832424219348396e-07, "loss": 0.7083, "step": 36271 }, { "epoch": 0.93, "grad_norm": 1.5008132457733154, "learning_rate": 2.581368076386859e-07, "loss": 0.4604, "step": 36272 }, { "epoch": 0.93, "grad_norm": 0.9254153966903687, "learning_rate": 2.5794944021868774e-07, "loss": 0.4501, "step": 36273 }, { "epoch": 0.93, "grad_norm": 1.7786575555801392, "learning_rate": 2.5776213993478294e-07, "loss": 0.6156, "step": 36274 }, { "epoch": 0.93, "grad_norm": 1.0482462644577026, "learning_rate": 2.5757490678826046e-07, "loss": 0.5163, "step": 36275 }, { "epoch": 0.93, "grad_norm": 2.0579237937927246, "learning_rate": 2.573877407804104e-07, "loss": 0.5199, "step": 36276 }, { "epoch": 0.93, "grad_norm": 1.594806432723999, "learning_rate": 2.5720064191252283e-07, "loss": 0.4035, "step": 36277 }, { "epoch": 0.93, "grad_norm": 2.350398063659668, "learning_rate": 2.5701361018588886e-07, "loss": 0.6724, "step": 36278 }, { "epoch": 0.93, "grad_norm": 1.970484972000122, "learning_rate": 2.5682664560179537e-07, "loss": 0.5822, "step": 36279 }, { "epoch": 0.93, "grad_norm": 1.657766580581665, "learning_rate": 2.566397481615301e-07, "loss": 0.6656, "step": 36280 }, { "epoch": 0.93, "grad_norm": 1.5572830438613892, "learning_rate": 2.5645291786638325e-07, "loss": 0.4697, "step": 36281 }, { "epoch": 0.93, "grad_norm": 2.442991018295288, "learning_rate": 2.562661547176415e-07, "loss": 0.607, "step": 36282 }, { "epoch": 0.93, "grad_norm": 1.1058400869369507, "learning_rate": 2.5607945871659267e-07, "loss": 0.4562, "step": 36283 }, { "epoch": 0.93, "grad_norm": 1.2216140031814575, "learning_rate": 2.5589282986452025e-07, "loss": 0.5408, "step": 36284 }, { "epoch": 0.93, "grad_norm": 2.5629773139953613, "learning_rate": 2.5570626816271425e-07, "loss": 0.5501, "step": 36285 }, { "epoch": 0.93, "grad_norm": 7.329484462738037, "learning_rate": 2.5551977361245816e-07, "loss": 0.8345, "step": 36286 }, { "epoch": 0.93, "grad_norm": 1.9888527393341064, "learning_rate": 2.553333462150376e-07, "loss": 0.4535, "step": 36287 }, { "epoch": 0.93, "grad_norm": 1.6068696975708008, "learning_rate": 2.5514698597173814e-07, "loss": 0.4675, "step": 36288 }, { "epoch": 0.93, "grad_norm": 1.2423725128173828, "learning_rate": 2.549606928838433e-07, "loss": 0.5344, "step": 36289 }, { "epoch": 0.93, "grad_norm": 0.9819707274436951, "learning_rate": 2.547744669526364e-07, "loss": 0.4606, "step": 36290 }, { "epoch": 0.93, "grad_norm": 1.4706913232803345, "learning_rate": 2.5458830817940095e-07, "loss": 0.6261, "step": 36291 }, { "epoch": 0.93, "grad_norm": 1.427125334739685, "learning_rate": 2.544022165654203e-07, "loss": 0.4442, "step": 36292 }, { "epoch": 0.93, "grad_norm": 1.8174952268600464, "learning_rate": 2.542161921119779e-07, "loss": 0.5114, "step": 36293 }, { "epoch": 0.93, "grad_norm": 1.515631914138794, "learning_rate": 2.5403023482035274e-07, "loss": 0.526, "step": 36294 }, { "epoch": 0.93, "grad_norm": 2.838352918624878, "learning_rate": 2.538443446918304e-07, "loss": 0.383, "step": 36295 }, { "epoch": 0.93, "grad_norm": 1.142041802406311, "learning_rate": 2.536585217276888e-07, "loss": 0.5605, "step": 36296 }, { "epoch": 0.93, "grad_norm": 1.6737067699432373, "learning_rate": 2.53472765929208e-07, "loss": 0.4993, "step": 36297 }, { "epoch": 0.93, "grad_norm": 1.4232704639434814, "learning_rate": 2.532870772976714e-07, "loss": 0.5403, "step": 36298 }, { "epoch": 0.93, "grad_norm": 1.9976439476013184, "learning_rate": 2.531014558343559e-07, "loss": 0.6242, "step": 36299 }, { "epoch": 0.93, "grad_norm": 1.4714186191558838, "learning_rate": 2.5291590154054246e-07, "loss": 0.4742, "step": 36300 }, { "epoch": 0.93, "grad_norm": 2.384193181991577, "learning_rate": 2.527304144175069e-07, "loss": 0.4754, "step": 36301 }, { "epoch": 0.93, "grad_norm": 1.1486642360687256, "learning_rate": 2.525449944665315e-07, "loss": 0.4212, "step": 36302 }, { "epoch": 0.93, "grad_norm": 1.952593445777893, "learning_rate": 2.5235964168889183e-07, "loss": 0.425, "step": 36303 }, { "epoch": 0.93, "grad_norm": 1.0259742736816406, "learning_rate": 2.521743560858647e-07, "loss": 0.4618, "step": 36304 }, { "epoch": 0.93, "grad_norm": 3.883369207382202, "learning_rate": 2.51989137658728e-07, "loss": 0.7198, "step": 36305 }, { "epoch": 0.93, "grad_norm": 2.75144362449646, "learning_rate": 2.518039864087585e-07, "loss": 0.5302, "step": 36306 }, { "epoch": 0.93, "grad_norm": 2.245148181915283, "learning_rate": 2.516189023372306e-07, "loss": 0.6169, "step": 36307 }, { "epoch": 0.93, "grad_norm": 1.3396509885787964, "learning_rate": 2.5143388544542014e-07, "loss": 0.392, "step": 36308 }, { "epoch": 0.93, "grad_norm": 1.18343186378479, "learning_rate": 2.5124893573460375e-07, "loss": 0.4891, "step": 36309 }, { "epoch": 0.93, "grad_norm": 25.369365692138672, "learning_rate": 2.5106405320605486e-07, "loss": 0.5491, "step": 36310 }, { "epoch": 0.93, "grad_norm": 1.3885788917541504, "learning_rate": 2.5087923786104697e-07, "loss": 0.5186, "step": 36311 }, { "epoch": 0.93, "grad_norm": 1.8026931285858154, "learning_rate": 2.5069448970085566e-07, "loss": 0.6108, "step": 36312 }, { "epoch": 0.93, "grad_norm": 1.6268317699432373, "learning_rate": 2.5050980872675213e-07, "loss": 0.5904, "step": 36313 }, { "epoch": 0.93, "grad_norm": 0.8825387358665466, "learning_rate": 2.503251949400087e-07, "loss": 0.4247, "step": 36314 }, { "epoch": 0.93, "grad_norm": 1.5577397346496582, "learning_rate": 2.501406483419e-07, "loss": 0.5471, "step": 36315 }, { "epoch": 0.93, "grad_norm": 1.5271562337875366, "learning_rate": 2.4995616893369714e-07, "loss": 0.415, "step": 36316 }, { "epoch": 0.93, "grad_norm": 1.5932397842407227, "learning_rate": 2.497717567166691e-07, "loss": 0.5176, "step": 36317 }, { "epoch": 0.93, "grad_norm": 2.3628361225128174, "learning_rate": 2.495874116920893e-07, "loss": 0.5043, "step": 36318 }, { "epoch": 0.93, "grad_norm": 1.3459972143173218, "learning_rate": 2.494031338612268e-07, "loss": 0.5564, "step": 36319 }, { "epoch": 0.93, "grad_norm": 2.1293270587921143, "learning_rate": 2.492189232253528e-07, "loss": 0.5163, "step": 36320 }, { "epoch": 0.93, "grad_norm": 0.9924649596214294, "learning_rate": 2.49034779785734e-07, "loss": 0.3645, "step": 36321 }, { "epoch": 0.93, "grad_norm": 1.2781169414520264, "learning_rate": 2.488507035436438e-07, "loss": 0.4782, "step": 36322 }, { "epoch": 0.93, "grad_norm": 1.2805402278900146, "learning_rate": 2.4866669450034685e-07, "loss": 0.4637, "step": 36323 }, { "epoch": 0.93, "grad_norm": 1.6256905794143677, "learning_rate": 2.4848275265711317e-07, "loss": 0.4871, "step": 36324 }, { "epoch": 0.93, "grad_norm": 1.8821141719818115, "learning_rate": 2.4829887801520956e-07, "loss": 0.5507, "step": 36325 }, { "epoch": 0.93, "grad_norm": 1.8082596063613892, "learning_rate": 2.481150705759039e-07, "loss": 0.5464, "step": 36326 }, { "epoch": 0.93, "grad_norm": 5.827528476715088, "learning_rate": 2.479313303404618e-07, "loss": 0.8026, "step": 36327 }, { "epoch": 0.93, "grad_norm": 1.2705754041671753, "learning_rate": 2.4774765731015006e-07, "loss": 0.4075, "step": 36328 }, { "epoch": 0.93, "grad_norm": 1.5518734455108643, "learning_rate": 2.475640514862354e-07, "loss": 0.5248, "step": 36329 }, { "epoch": 0.93, "grad_norm": 1.7910020351409912, "learning_rate": 2.4738051286998244e-07, "loss": 0.5776, "step": 36330 }, { "epoch": 0.93, "grad_norm": 1.373561143875122, "learning_rate": 2.4719704146265453e-07, "loss": 0.4763, "step": 36331 }, { "epoch": 0.93, "grad_norm": 1.6215986013412476, "learning_rate": 2.470136372655174e-07, "loss": 0.5298, "step": 36332 }, { "epoch": 0.93, "grad_norm": 1.7537504434585571, "learning_rate": 2.4683030027983557e-07, "loss": 0.583, "step": 36333 }, { "epoch": 0.93, "grad_norm": 4.926742076873779, "learning_rate": 2.466470305068713e-07, "loss": 0.4489, "step": 36334 }, { "epoch": 0.93, "grad_norm": 1.5169689655303955, "learning_rate": 2.4646382794788816e-07, "loss": 0.5105, "step": 36335 }, { "epoch": 0.93, "grad_norm": 1.2785519361495972, "learning_rate": 2.4628069260414834e-07, "loss": 0.5442, "step": 36336 }, { "epoch": 0.93, "grad_norm": 1.1127609014511108, "learning_rate": 2.460976244769153e-07, "loss": 0.4339, "step": 36337 }, { "epoch": 0.93, "grad_norm": 2.047518730163574, "learning_rate": 2.459146235674481e-07, "loss": 0.5981, "step": 36338 }, { "epoch": 0.93, "grad_norm": 4.212918758392334, "learning_rate": 2.45731689877009e-07, "loss": 0.6457, "step": 36339 }, { "epoch": 0.93, "grad_norm": 0.9947883486747742, "learning_rate": 2.4554882340686036e-07, "loss": 0.4759, "step": 36340 }, { "epoch": 0.93, "grad_norm": 1.5392348766326904, "learning_rate": 2.4536602415826007e-07, "loss": 0.494, "step": 36341 }, { "epoch": 0.93, "grad_norm": 1.541565179824829, "learning_rate": 2.451832921324682e-07, "loss": 0.643, "step": 36342 }, { "epoch": 0.93, "grad_norm": 1.5504159927368164, "learning_rate": 2.450006273307448e-07, "loss": 0.4662, "step": 36343 }, { "epoch": 0.93, "grad_norm": 1.1995716094970703, "learning_rate": 2.44818029754349e-07, "loss": 0.5837, "step": 36344 }, { "epoch": 0.93, "grad_norm": 1.8644976615905762, "learning_rate": 2.4463549940453746e-07, "loss": 0.666, "step": 36345 }, { "epoch": 0.93, "grad_norm": 3.2514703273773193, "learning_rate": 2.4445303628256924e-07, "loss": 0.3417, "step": 36346 }, { "epoch": 0.93, "grad_norm": 1.2771832942962646, "learning_rate": 2.442706403897033e-07, "loss": 0.4867, "step": 36347 }, { "epoch": 0.93, "grad_norm": 1.4384857416152954, "learning_rate": 2.4408831172719417e-07, "loss": 0.4981, "step": 36348 }, { "epoch": 0.93, "grad_norm": 1.484403133392334, "learning_rate": 2.4390605029629756e-07, "loss": 0.4458, "step": 36349 }, { "epoch": 0.93, "grad_norm": 2.0821120738983154, "learning_rate": 2.437238560982724e-07, "loss": 0.5657, "step": 36350 }, { "epoch": 0.93, "grad_norm": 7.849400997161865, "learning_rate": 2.4354172913437335e-07, "loss": 0.4996, "step": 36351 }, { "epoch": 0.93, "grad_norm": 1.9906508922576904, "learning_rate": 2.4335966940585377e-07, "loss": 0.6585, "step": 36352 }, { "epoch": 0.93, "grad_norm": 0.9203330278396606, "learning_rate": 2.4317767691397044e-07, "loss": 0.3934, "step": 36353 }, { "epoch": 0.93, "grad_norm": 1.2663846015930176, "learning_rate": 2.429957516599768e-07, "loss": 0.6523, "step": 36354 }, { "epoch": 0.93, "grad_norm": 1.4383230209350586, "learning_rate": 2.428138936451263e-07, "loss": 0.3962, "step": 36355 }, { "epoch": 0.93, "grad_norm": 1.829994797706604, "learning_rate": 2.426321028706724e-07, "loss": 0.5139, "step": 36356 }, { "epoch": 0.93, "grad_norm": 3.8314576148986816, "learning_rate": 2.4245037933786853e-07, "loss": 0.719, "step": 36357 }, { "epoch": 0.93, "grad_norm": 1.3815964460372925, "learning_rate": 2.4226872304796587e-07, "loss": 0.6026, "step": 36358 }, { "epoch": 0.93, "grad_norm": 1.527092695236206, "learning_rate": 2.4208713400221573e-07, "loss": 0.5368, "step": 36359 }, { "epoch": 0.93, "grad_norm": 1.7648087739944458, "learning_rate": 2.419056122018715e-07, "loss": 0.5861, "step": 36360 }, { "epoch": 0.93, "grad_norm": 7.6366496086120605, "learning_rate": 2.4172415764818324e-07, "loss": 0.5439, "step": 36361 }, { "epoch": 0.93, "grad_norm": 1.813769817352295, "learning_rate": 2.415427703424011e-07, "loss": 0.5813, "step": 36362 }, { "epoch": 0.93, "grad_norm": 1.2692523002624512, "learning_rate": 2.413614502857753e-07, "loss": 0.4497, "step": 36363 }, { "epoch": 0.93, "grad_norm": 2.065810441970825, "learning_rate": 2.4118019747955577e-07, "loss": 0.5982, "step": 36364 }, { "epoch": 0.93, "grad_norm": 1.3185951709747314, "learning_rate": 2.4099901192499165e-07, "loss": 0.5379, "step": 36365 }, { "epoch": 0.93, "grad_norm": 1.3243070840835571, "learning_rate": 2.4081789362332963e-07, "loss": 0.5686, "step": 36366 }, { "epoch": 0.93, "grad_norm": 1.659152865409851, "learning_rate": 2.406368425758199e-07, "loss": 0.5572, "step": 36367 }, { "epoch": 0.93, "grad_norm": 1.6808273792266846, "learning_rate": 2.4045585878371023e-07, "loss": 0.4889, "step": 36368 }, { "epoch": 0.93, "grad_norm": 10.27702808380127, "learning_rate": 2.4027494224824646e-07, "loss": 0.6172, "step": 36369 }, { "epoch": 0.93, "grad_norm": 1.7049155235290527, "learning_rate": 2.400940929706763e-07, "loss": 0.5482, "step": 36370 }, { "epoch": 0.93, "grad_norm": 1.169435977935791, "learning_rate": 2.399133109522467e-07, "loss": 0.4813, "step": 36371 }, { "epoch": 0.93, "grad_norm": 1.300321102142334, "learning_rate": 2.397325961942032e-07, "loss": 0.4873, "step": 36372 }, { "epoch": 0.93, "grad_norm": 0.9860133528709412, "learning_rate": 2.395519486977882e-07, "loss": 0.431, "step": 36373 }, { "epoch": 0.93, "grad_norm": 1.796958088874817, "learning_rate": 2.393713684642507e-07, "loss": 0.7242, "step": 36374 }, { "epoch": 0.93, "grad_norm": 1.9679598808288574, "learning_rate": 2.391908554948341e-07, "loss": 0.5565, "step": 36375 }, { "epoch": 0.93, "grad_norm": 1.6018718481063843, "learning_rate": 2.390104097907797e-07, "loss": 0.537, "step": 36376 }, { "epoch": 0.93, "grad_norm": 1.0200004577636719, "learning_rate": 2.3883003135333427e-07, "loss": 0.5614, "step": 36377 }, { "epoch": 0.93, "grad_norm": 1.6308673620224, "learning_rate": 2.386497201837401e-07, "loss": 0.6613, "step": 36378 }, { "epoch": 0.93, "grad_norm": 1.136289119720459, "learning_rate": 2.3846947628323844e-07, "loss": 0.4175, "step": 36379 }, { "epoch": 0.93, "grad_norm": 1.3345680236816406, "learning_rate": 2.382892996530739e-07, "loss": 0.5851, "step": 36380 }, { "epoch": 0.93, "grad_norm": 2.9331092834472656, "learning_rate": 2.3810919029448544e-07, "loss": 0.5767, "step": 36381 }, { "epoch": 0.93, "grad_norm": 6.774838447570801, "learning_rate": 2.3792914820871648e-07, "loss": 0.4891, "step": 36382 }, { "epoch": 0.93, "grad_norm": 14.967903137207031, "learning_rate": 2.37749173397005e-07, "loss": 0.7415, "step": 36383 }, { "epoch": 0.93, "grad_norm": 2.233588218688965, "learning_rate": 2.3756926586059327e-07, "loss": 0.4778, "step": 36384 }, { "epoch": 0.93, "grad_norm": 1.4382376670837402, "learning_rate": 2.3738942560072253e-07, "loss": 0.5444, "step": 36385 }, { "epoch": 0.93, "grad_norm": 1.6129714250564575, "learning_rate": 2.372096526186285e-07, "loss": 0.5513, "step": 36386 }, { "epoch": 0.93, "grad_norm": 1.7036073207855225, "learning_rate": 2.3702994691555237e-07, "loss": 0.3688, "step": 36387 }, { "epoch": 0.93, "grad_norm": 3.558090925216675, "learning_rate": 2.3685030849273316e-07, "loss": 0.6422, "step": 36388 }, { "epoch": 0.93, "grad_norm": 1.9433006048202515, "learning_rate": 2.3667073735140656e-07, "loss": 0.52, "step": 36389 }, { "epoch": 0.93, "grad_norm": 1.4120328426361084, "learning_rate": 2.3649123349281155e-07, "loss": 0.5892, "step": 36390 }, { "epoch": 0.93, "grad_norm": 2.4683167934417725, "learning_rate": 2.3631179691818608e-07, "loss": 0.7138, "step": 36391 }, { "epoch": 0.93, "grad_norm": 1.657901406288147, "learning_rate": 2.3613242762876465e-07, "loss": 0.4689, "step": 36392 }, { "epoch": 0.93, "grad_norm": 1.4503177404403687, "learning_rate": 2.35953125625783e-07, "loss": 0.5365, "step": 36393 }, { "epoch": 0.93, "grad_norm": 1.5704244375228882, "learning_rate": 2.3577389091048008e-07, "loss": 0.5786, "step": 36394 }, { "epoch": 0.93, "grad_norm": 1.4131966829299927, "learning_rate": 2.355947234840883e-07, "loss": 0.3439, "step": 36395 }, { "epoch": 0.93, "grad_norm": 2.1291756629943848, "learning_rate": 2.3541562334784218e-07, "loss": 0.4731, "step": 36396 }, { "epoch": 0.93, "grad_norm": 1.4833561182022095, "learning_rate": 2.3523659050297854e-07, "loss": 0.5938, "step": 36397 }, { "epoch": 0.93, "grad_norm": 1.881981372833252, "learning_rate": 2.350576249507286e-07, "loss": 0.6319, "step": 36398 }, { "epoch": 0.93, "grad_norm": 4.81353235244751, "learning_rate": 2.348787266923269e-07, "loss": 0.5801, "step": 36399 }, { "epoch": 0.93, "grad_norm": 2.1893231868743896, "learning_rate": 2.3469989572900586e-07, "loss": 0.5689, "step": 36400 }, { "epoch": 0.93, "grad_norm": 1.0532338619232178, "learning_rate": 2.3452113206199777e-07, "loss": 0.5339, "step": 36401 }, { "epoch": 0.93, "grad_norm": 8.2186861038208, "learning_rate": 2.34342435692535e-07, "loss": 0.5459, "step": 36402 }, { "epoch": 0.93, "grad_norm": 1.404587984085083, "learning_rate": 2.3416380662184767e-07, "loss": 0.6585, "step": 36403 }, { "epoch": 0.93, "grad_norm": 1.5883890390396118, "learning_rate": 2.3398524485116925e-07, "loss": 0.43, "step": 36404 }, { "epoch": 0.93, "grad_norm": 1.587508201599121, "learning_rate": 2.3380675038172873e-07, "loss": 0.494, "step": 36405 }, { "epoch": 0.93, "grad_norm": 1.6231237649917603, "learning_rate": 2.3362832321475627e-07, "loss": 0.515, "step": 36406 }, { "epoch": 0.93, "grad_norm": 1.020469307899475, "learning_rate": 2.3344996335148084e-07, "loss": 0.5012, "step": 36407 }, { "epoch": 0.93, "grad_norm": 1.5141701698303223, "learning_rate": 2.3327167079313372e-07, "loss": 0.4088, "step": 36408 }, { "epoch": 0.93, "grad_norm": 1.452048897743225, "learning_rate": 2.3309344554094172e-07, "loss": 0.5273, "step": 36409 }, { "epoch": 0.93, "grad_norm": 2.022381544113159, "learning_rate": 2.3291528759613268e-07, "loss": 0.5445, "step": 36410 }, { "epoch": 0.93, "grad_norm": 1.7443645000457764, "learning_rate": 2.327371969599357e-07, "loss": 0.4983, "step": 36411 }, { "epoch": 0.93, "grad_norm": 1.4729629755020142, "learning_rate": 2.325591736335775e-07, "loss": 0.6335, "step": 36412 }, { "epoch": 0.93, "grad_norm": 3.7409586906433105, "learning_rate": 2.323812176182849e-07, "loss": 0.5903, "step": 36413 }, { "epoch": 0.93, "grad_norm": 1.7598063945770264, "learning_rate": 2.3220332891528475e-07, "loss": 0.4576, "step": 36414 }, { "epoch": 0.93, "grad_norm": 1.5616474151611328, "learning_rate": 2.3202550752580155e-07, "loss": 0.5016, "step": 36415 }, { "epoch": 0.93, "grad_norm": 1.1270980834960938, "learning_rate": 2.3184775345106326e-07, "loss": 0.3895, "step": 36416 }, { "epoch": 0.93, "grad_norm": 1.1191425323486328, "learning_rate": 2.3167006669229108e-07, "loss": 0.4621, "step": 36417 }, { "epoch": 0.93, "grad_norm": 5.395112991333008, "learning_rate": 2.3149244725071408e-07, "loss": 0.4448, "step": 36418 }, { "epoch": 0.93, "grad_norm": 1.777093529701233, "learning_rate": 2.3131489512755344e-07, "loss": 0.7405, "step": 36419 }, { "epoch": 0.93, "grad_norm": 0.9125970005989075, "learning_rate": 2.311374103240316e-07, "loss": 0.2906, "step": 36420 }, { "epoch": 0.93, "grad_norm": 2.3173468112945557, "learning_rate": 2.3095999284137527e-07, "loss": 0.4856, "step": 36421 }, { "epoch": 0.93, "grad_norm": 3.146355628967285, "learning_rate": 2.3078264268080464e-07, "loss": 0.5571, "step": 36422 }, { "epoch": 0.93, "grad_norm": 1.1797337532043457, "learning_rate": 2.3060535984354203e-07, "loss": 0.5188, "step": 36423 }, { "epoch": 0.93, "grad_norm": 2.386317253112793, "learning_rate": 2.3042814433080874e-07, "loss": 0.4837, "step": 36424 }, { "epoch": 0.93, "grad_norm": 1.1788129806518555, "learning_rate": 2.3025099614382818e-07, "loss": 0.5667, "step": 36425 }, { "epoch": 0.93, "grad_norm": 1.6242918968200684, "learning_rate": 2.300739152838194e-07, "loss": 0.4377, "step": 36426 }, { "epoch": 0.93, "grad_norm": 1.4177124500274658, "learning_rate": 2.2989690175200252e-07, "loss": 0.4572, "step": 36427 }, { "epoch": 0.93, "grad_norm": 5.452981472015381, "learning_rate": 2.297199555495988e-07, "loss": 0.5083, "step": 36428 }, { "epoch": 0.93, "grad_norm": 1.4769586324691772, "learning_rate": 2.2954307667782728e-07, "loss": 0.4162, "step": 36429 }, { "epoch": 0.93, "grad_norm": 1.6244397163391113, "learning_rate": 2.2936626513790583e-07, "loss": 0.4968, "step": 36430 }, { "epoch": 0.93, "grad_norm": 1.7950365543365479, "learning_rate": 2.2918952093105352e-07, "loss": 0.4509, "step": 36431 }, { "epoch": 0.93, "grad_norm": 1.2918773889541626, "learning_rate": 2.2901284405848824e-07, "loss": 0.6324, "step": 36432 }, { "epoch": 0.93, "grad_norm": 1.2883658409118652, "learning_rate": 2.288362345214279e-07, "loss": 0.5582, "step": 36433 }, { "epoch": 0.93, "grad_norm": 2.941148042678833, "learning_rate": 2.286596923210893e-07, "loss": 0.5485, "step": 36434 }, { "epoch": 0.93, "grad_norm": 2.5055019855499268, "learning_rate": 2.284832174586893e-07, "loss": 0.4421, "step": 36435 }, { "epoch": 0.93, "grad_norm": 1.3702888488769531, "learning_rate": 2.2830680993544463e-07, "loss": 0.6393, "step": 36436 }, { "epoch": 0.93, "grad_norm": 1.4067885875701904, "learning_rate": 2.2813046975256993e-07, "loss": 0.6286, "step": 36437 }, { "epoch": 0.93, "grad_norm": 1.1253420114517212, "learning_rate": 2.2795419691127974e-07, "loss": 0.4923, "step": 36438 }, { "epoch": 0.93, "grad_norm": 5.47072172164917, "learning_rate": 2.277779914127898e-07, "loss": 0.5062, "step": 36439 }, { "epoch": 0.93, "grad_norm": 3.760164260864258, "learning_rate": 2.2760185325831575e-07, "loss": 0.6273, "step": 36440 }, { "epoch": 0.93, "grad_norm": 1.3327314853668213, "learning_rate": 2.2742578244906888e-07, "loss": 0.487, "step": 36441 }, { "epoch": 0.93, "grad_norm": 1.3299390077590942, "learning_rate": 2.2724977898626377e-07, "loss": 0.3937, "step": 36442 }, { "epoch": 0.93, "grad_norm": 2.8510541915893555, "learning_rate": 2.2707384287111502e-07, "loss": 0.5965, "step": 36443 }, { "epoch": 0.93, "grad_norm": 1.8373456001281738, "learning_rate": 2.268979741048305e-07, "loss": 0.4166, "step": 36444 }, { "epoch": 0.93, "grad_norm": 5.972034931182861, "learning_rate": 2.2672217268862706e-07, "loss": 0.6705, "step": 36445 }, { "epoch": 0.93, "grad_norm": 1.608209490776062, "learning_rate": 2.2654643862371482e-07, "loss": 0.5825, "step": 36446 }, { "epoch": 0.93, "grad_norm": 1.7587705850601196, "learning_rate": 2.2637077191130395e-07, "loss": 0.5177, "step": 36447 }, { "epoch": 0.93, "grad_norm": 6.541360378265381, "learning_rate": 2.2619517255260348e-07, "loss": 0.5402, "step": 36448 }, { "epoch": 0.93, "grad_norm": 1.1246566772460938, "learning_rate": 2.2601964054882684e-07, "loss": 0.4428, "step": 36449 }, { "epoch": 0.93, "grad_norm": 1.865246295928955, "learning_rate": 2.2584417590118312e-07, "loss": 0.5835, "step": 36450 }, { "epoch": 0.93, "grad_norm": 1.214157223701477, "learning_rate": 2.25668778610878e-07, "loss": 0.4938, "step": 36451 }, { "epoch": 0.93, "grad_norm": 1.886812686920166, "learning_rate": 2.2549344867912492e-07, "loss": 0.6847, "step": 36452 }, { "epoch": 0.93, "grad_norm": 0.9756920337677002, "learning_rate": 2.2531818610712962e-07, "loss": 0.3711, "step": 36453 }, { "epoch": 0.93, "grad_norm": 2.0008153915405273, "learning_rate": 2.2514299089610115e-07, "loss": 0.6645, "step": 36454 }, { "epoch": 0.93, "grad_norm": 1.2383809089660645, "learning_rate": 2.2496786304724404e-07, "loss": 0.5521, "step": 36455 }, { "epoch": 0.93, "grad_norm": 1.1024980545043945, "learning_rate": 2.247928025617685e-07, "loss": 0.5596, "step": 36456 }, { "epoch": 0.93, "grad_norm": 3.2124297618865967, "learning_rate": 2.2461780944087906e-07, "loss": 0.5331, "step": 36457 }, { "epoch": 0.93, "grad_norm": 1.2460476160049438, "learning_rate": 2.2444288368578148e-07, "loss": 0.4368, "step": 36458 }, { "epoch": 0.93, "grad_norm": 1.2914711236953735, "learning_rate": 2.2426802529768366e-07, "loss": 0.3418, "step": 36459 }, { "epoch": 0.93, "grad_norm": 1.090883493423462, "learning_rate": 2.2409323427778794e-07, "loss": 0.5344, "step": 36460 }, { "epoch": 0.93, "grad_norm": 1.8995519876480103, "learning_rate": 2.2391851062729897e-07, "loss": 0.5003, "step": 36461 }, { "epoch": 0.93, "grad_norm": 1.3642038106918335, "learning_rate": 2.2374385434742353e-07, "loss": 0.4737, "step": 36462 }, { "epoch": 0.93, "grad_norm": 7.762618064880371, "learning_rate": 2.2356926543936286e-07, "loss": 0.5095, "step": 36463 }, { "epoch": 0.93, "grad_norm": 4.2015533447265625, "learning_rate": 2.2339474390432048e-07, "loss": 0.6665, "step": 36464 }, { "epoch": 0.93, "grad_norm": 1.987385630607605, "learning_rate": 2.2322028974349873e-07, "loss": 0.586, "step": 36465 }, { "epoch": 0.93, "grad_norm": 2.204618215560913, "learning_rate": 2.2304590295810002e-07, "loss": 0.6413, "step": 36466 }, { "epoch": 0.93, "grad_norm": 7.265467166900635, "learning_rate": 2.2287158354932782e-07, "loss": 0.4998, "step": 36467 }, { "epoch": 0.93, "grad_norm": 1.5581254959106445, "learning_rate": 2.2269733151838224e-07, "loss": 0.4703, "step": 36468 }, { "epoch": 0.93, "grad_norm": 1.4254287481307983, "learning_rate": 2.2252314686646347e-07, "loss": 0.5137, "step": 36469 }, { "epoch": 0.93, "grad_norm": 2.106635332107544, "learning_rate": 2.2234902959477166e-07, "loss": 0.7807, "step": 36470 }, { "epoch": 0.93, "grad_norm": 2.4992401599884033, "learning_rate": 2.2217497970450697e-07, "loss": 0.5269, "step": 36471 }, { "epoch": 0.93, "grad_norm": 1.249746561050415, "learning_rate": 2.2200099719687062e-07, "loss": 0.4033, "step": 36472 }, { "epoch": 0.93, "grad_norm": 1.351307988166809, "learning_rate": 2.2182708207305947e-07, "loss": 0.5134, "step": 36473 }, { "epoch": 0.93, "grad_norm": 1.9259454011917114, "learning_rate": 2.2165323433427256e-07, "loss": 0.5624, "step": 36474 }, { "epoch": 0.93, "grad_norm": 1.0610456466674805, "learning_rate": 2.2147945398170888e-07, "loss": 0.4458, "step": 36475 }, { "epoch": 0.93, "grad_norm": 1.300682783126831, "learning_rate": 2.2130574101656643e-07, "loss": 0.4674, "step": 36476 }, { "epoch": 0.93, "grad_norm": 2.2345616817474365, "learning_rate": 2.2113209544003978e-07, "loss": 0.5503, "step": 36477 }, { "epoch": 0.93, "grad_norm": 1.3501564264297485, "learning_rate": 2.2095851725332684e-07, "loss": 0.5043, "step": 36478 }, { "epoch": 0.93, "grad_norm": 1.4123258590698242, "learning_rate": 2.2078500645762448e-07, "loss": 0.5111, "step": 36479 }, { "epoch": 0.94, "grad_norm": 1.9156564474105835, "learning_rate": 2.2061156305412834e-07, "loss": 0.424, "step": 36480 }, { "epoch": 0.94, "grad_norm": 1.4955207109451294, "learning_rate": 2.2043818704403197e-07, "loss": 0.4506, "step": 36481 }, { "epoch": 0.94, "grad_norm": 1.1016052961349487, "learning_rate": 2.2026487842853218e-07, "loss": 0.4436, "step": 36482 }, { "epoch": 0.94, "grad_norm": 1.1977860927581787, "learning_rate": 2.2009163720882355e-07, "loss": 0.4555, "step": 36483 }, { "epoch": 0.94, "grad_norm": 1.2848140001296997, "learning_rate": 2.1991846338609846e-07, "loss": 0.4963, "step": 36484 }, { "epoch": 0.94, "grad_norm": 1.5595978498458862, "learning_rate": 2.197453569615504e-07, "loss": 0.5117, "step": 36485 }, { "epoch": 0.94, "grad_norm": 1.8151885271072388, "learning_rate": 2.1957231793637291e-07, "loss": 0.6484, "step": 36486 }, { "epoch": 0.94, "grad_norm": 1.6625970602035522, "learning_rate": 2.193993463117583e-07, "loss": 0.4858, "step": 36487 }, { "epoch": 0.94, "grad_norm": 1.6978933811187744, "learning_rate": 2.19226442088899e-07, "loss": 0.7128, "step": 36488 }, { "epoch": 0.94, "grad_norm": 1.3098869323730469, "learning_rate": 2.190536052689862e-07, "loss": 0.4779, "step": 36489 }, { "epoch": 0.94, "grad_norm": 15.360003471374512, "learning_rate": 2.1888083585321017e-07, "loss": 0.6096, "step": 36490 }, { "epoch": 0.94, "grad_norm": 2.064861297607422, "learning_rate": 2.1870813384276214e-07, "loss": 0.4056, "step": 36491 }, { "epoch": 0.94, "grad_norm": 1.2966573238372803, "learning_rate": 2.1853549923883444e-07, "loss": 0.5007, "step": 36492 }, { "epoch": 0.94, "grad_norm": 3.941476821899414, "learning_rate": 2.1836293204261282e-07, "loss": 0.6019, "step": 36493 }, { "epoch": 0.94, "grad_norm": 1.6485799551010132, "learning_rate": 2.181904322552897e-07, "loss": 0.5149, "step": 36494 }, { "epoch": 0.94, "grad_norm": 1.4280911684036255, "learning_rate": 2.1801799987805183e-07, "loss": 0.5147, "step": 36495 }, { "epoch": 0.94, "grad_norm": 1.5841542482376099, "learning_rate": 2.1784563491208833e-07, "loss": 0.5085, "step": 36496 }, { "epoch": 0.94, "grad_norm": 1.2558518648147583, "learning_rate": 2.1767333735858708e-07, "loss": 0.533, "step": 36497 }, { "epoch": 0.94, "grad_norm": 1.4958059787750244, "learning_rate": 2.1750110721873496e-07, "loss": 0.5751, "step": 36498 }, { "epoch": 0.94, "grad_norm": 0.9852790832519531, "learning_rate": 2.173289444937199e-07, "loss": 0.4711, "step": 36499 }, { "epoch": 0.94, "grad_norm": 1.5664747953414917, "learning_rate": 2.1715684918472756e-07, "loss": 0.5773, "step": 36500 }, { "epoch": 0.94, "grad_norm": 1.4426950216293335, "learning_rate": 2.1698482129294374e-07, "loss": 0.4892, "step": 36501 }, { "epoch": 0.94, "grad_norm": 4.5426344871521, "learning_rate": 2.168128608195541e-07, "loss": 0.5544, "step": 36502 }, { "epoch": 0.94, "grad_norm": 1.4432740211486816, "learning_rate": 2.1664096776574439e-07, "loss": 0.51, "step": 36503 }, { "epoch": 0.94, "grad_norm": 5.855037689208984, "learning_rate": 2.164691421326981e-07, "loss": 0.7792, "step": 36504 }, { "epoch": 0.94, "grad_norm": 2.871425151824951, "learning_rate": 2.1629738392159983e-07, "loss": 0.6672, "step": 36505 }, { "epoch": 0.94, "grad_norm": 1.2598707675933838, "learning_rate": 2.1612569313363418e-07, "loss": 0.2297, "step": 36506 }, { "epoch": 0.94, "grad_norm": 2.5472700595855713, "learning_rate": 2.1595406976998245e-07, "loss": 0.491, "step": 36507 }, { "epoch": 0.94, "grad_norm": 1.4638646841049194, "learning_rate": 2.1578251383182924e-07, "loss": 0.5001, "step": 36508 }, { "epoch": 0.94, "grad_norm": 1.653839349746704, "learning_rate": 2.156110253203547e-07, "loss": 0.6311, "step": 36509 }, { "epoch": 0.94, "grad_norm": 14.738658905029297, "learning_rate": 2.1543960423674237e-07, "loss": 0.5552, "step": 36510 }, { "epoch": 0.94, "grad_norm": 1.817049264907837, "learning_rate": 2.152682505821735e-07, "loss": 0.4839, "step": 36511 }, { "epoch": 0.94, "grad_norm": 1.1401863098144531, "learning_rate": 2.1509696435782712e-07, "loss": 0.4517, "step": 36512 }, { "epoch": 0.94, "grad_norm": 3.325993299484253, "learning_rate": 2.1492574556488678e-07, "loss": 0.6078, "step": 36513 }, { "epoch": 0.94, "grad_norm": 1.5295833349227905, "learning_rate": 2.1475459420452926e-07, "loss": 0.5864, "step": 36514 }, { "epoch": 0.94, "grad_norm": 1.6788722276687622, "learning_rate": 2.145835102779359e-07, "loss": 0.4686, "step": 36515 }, { "epoch": 0.94, "grad_norm": 1.2387059926986694, "learning_rate": 2.1441249378628458e-07, "loss": 0.5986, "step": 36516 }, { "epoch": 0.94, "grad_norm": 1.3915189504623413, "learning_rate": 2.1424154473075553e-07, "loss": 0.4945, "step": 36517 }, { "epoch": 0.94, "grad_norm": 1.208505630493164, "learning_rate": 2.1407066311252555e-07, "loss": 0.4868, "step": 36518 }, { "epoch": 0.94, "grad_norm": 1.1707566976547241, "learning_rate": 2.1389984893277148e-07, "loss": 0.4508, "step": 36519 }, { "epoch": 0.94, "grad_norm": 1.5716222524642944, "learning_rate": 2.137291021926724e-07, "loss": 0.5711, "step": 36520 }, { "epoch": 0.94, "grad_norm": 1.28662109375, "learning_rate": 2.13558422893404e-07, "loss": 0.5446, "step": 36521 }, { "epoch": 0.94, "grad_norm": 1.7938817739486694, "learning_rate": 2.1338781103614203e-07, "loss": 0.5235, "step": 36522 }, { "epoch": 0.94, "grad_norm": 2.7847740650177, "learning_rate": 2.1321726662206222e-07, "loss": 0.5403, "step": 36523 }, { "epoch": 0.94, "grad_norm": 1.6359398365020752, "learning_rate": 2.1304678965234138e-07, "loss": 0.5296, "step": 36524 }, { "epoch": 0.94, "grad_norm": 1.4699972867965698, "learning_rate": 2.1287638012815305e-07, "loss": 0.6644, "step": 36525 }, { "epoch": 0.94, "grad_norm": 1.2862796783447266, "learning_rate": 2.127060380506718e-07, "loss": 0.3398, "step": 36526 }, { "epoch": 0.94, "grad_norm": 7.323453426361084, "learning_rate": 2.1253576342107117e-07, "loss": 0.7057, "step": 36527 }, { "epoch": 0.94, "grad_norm": 1.3985655307769775, "learning_rate": 2.123655562405258e-07, "loss": 0.4706, "step": 36528 }, { "epoch": 0.94, "grad_norm": 1.8051844835281372, "learning_rate": 2.121954165102069e-07, "loss": 0.5432, "step": 36529 }, { "epoch": 0.94, "grad_norm": 1.1822192668914795, "learning_rate": 2.1202534423128806e-07, "loss": 0.5503, "step": 36530 }, { "epoch": 0.94, "grad_norm": 1.5158629417419434, "learning_rate": 2.1185533940494162e-07, "loss": 0.496, "step": 36531 }, { "epoch": 0.94, "grad_norm": 1.544708013534546, "learning_rate": 2.116854020323378e-07, "loss": 0.4338, "step": 36532 }, { "epoch": 0.94, "grad_norm": 1.1321256160736084, "learning_rate": 2.1151553211464892e-07, "loss": 0.4583, "step": 36533 }, { "epoch": 0.94, "grad_norm": 2.260286331176758, "learning_rate": 2.1134572965304635e-07, "loss": 0.4183, "step": 36534 }, { "epoch": 0.94, "grad_norm": 1.4043099880218506, "learning_rate": 2.1117599464869797e-07, "loss": 0.4103, "step": 36535 }, { "epoch": 0.94, "grad_norm": 1.5521315336227417, "learning_rate": 2.1100632710277514e-07, "loss": 0.5135, "step": 36536 }, { "epoch": 0.94, "grad_norm": 1.507700800895691, "learning_rate": 2.1083672701644685e-07, "loss": 0.6606, "step": 36537 }, { "epoch": 0.94, "grad_norm": 0.9535340070724487, "learning_rate": 2.1066719439088112e-07, "loss": 0.4309, "step": 36538 }, { "epoch": 0.94, "grad_norm": 2.9133493900299072, "learning_rate": 2.1049772922724698e-07, "loss": 0.4264, "step": 36539 }, { "epoch": 0.94, "grad_norm": 1.3514021635055542, "learning_rate": 2.103283315267124e-07, "loss": 0.5383, "step": 36540 }, { "epoch": 0.94, "grad_norm": 1.4359103441238403, "learning_rate": 2.101590012904442e-07, "loss": 0.4635, "step": 36541 }, { "epoch": 0.94, "grad_norm": 1.0325020551681519, "learning_rate": 2.0998973851961034e-07, "loss": 0.484, "step": 36542 }, { "epoch": 0.94, "grad_norm": 1.3587524890899658, "learning_rate": 2.0982054321537548e-07, "loss": 0.4625, "step": 36543 }, { "epoch": 0.94, "grad_norm": 6.633455276489258, "learning_rate": 2.0965141537890642e-07, "loss": 0.5437, "step": 36544 }, { "epoch": 0.94, "grad_norm": 1.8165283203125, "learning_rate": 2.0948235501137005e-07, "loss": 0.5621, "step": 36545 }, { "epoch": 0.94, "grad_norm": 1.3881489038467407, "learning_rate": 2.0931336211392872e-07, "loss": 0.4858, "step": 36546 }, { "epoch": 0.94, "grad_norm": 2.2912099361419678, "learning_rate": 2.0914443668775042e-07, "loss": 0.7093, "step": 36547 }, { "epoch": 0.94, "grad_norm": 3.262162446975708, "learning_rate": 2.0897557873399644e-07, "loss": 0.5076, "step": 36548 }, { "epoch": 0.94, "grad_norm": 1.2985291481018066, "learning_rate": 2.088067882538325e-07, "loss": 0.4152, "step": 36549 }, { "epoch": 0.94, "grad_norm": 1.6069880723953247, "learning_rate": 2.086380652484188e-07, "loss": 0.6353, "step": 36550 }, { "epoch": 0.94, "grad_norm": 2.4195821285247803, "learning_rate": 2.0846940971892217e-07, "loss": 0.4046, "step": 36551 }, { "epoch": 0.94, "grad_norm": 8.410663604736328, "learning_rate": 2.0830082166650168e-07, "loss": 0.538, "step": 36552 }, { "epoch": 0.94, "grad_norm": 1.4516223669052124, "learning_rate": 2.0813230109231974e-07, "loss": 0.4692, "step": 36553 }, { "epoch": 0.94, "grad_norm": 4.214451789855957, "learning_rate": 2.0796384799753876e-07, "loss": 0.5585, "step": 36554 }, { "epoch": 0.94, "grad_norm": 2.134875774383545, "learning_rate": 2.0779546238331893e-07, "loss": 0.4283, "step": 36555 }, { "epoch": 0.94, "grad_norm": 1.3930052518844604, "learning_rate": 2.0762714425081932e-07, "loss": 0.5913, "step": 36556 }, { "epoch": 0.94, "grad_norm": 1.2525169849395752, "learning_rate": 2.074588936012034e-07, "loss": 0.4826, "step": 36557 }, { "epoch": 0.94, "grad_norm": 1.4997214078903198, "learning_rate": 2.072907104356281e-07, "loss": 0.5304, "step": 36558 }, { "epoch": 0.94, "grad_norm": 1.3946863412857056, "learning_rate": 2.0712259475525243e-07, "loss": 0.4886, "step": 36559 }, { "epoch": 0.94, "grad_norm": 1.926316738128662, "learning_rate": 2.0695454656123438e-07, "loss": 0.4856, "step": 36560 }, { "epoch": 0.94, "grad_norm": 1.1728421449661255, "learning_rate": 2.067865658547341e-07, "loss": 0.4325, "step": 36561 }, { "epoch": 0.94, "grad_norm": 3.0360188484191895, "learning_rate": 2.066186526369085e-07, "loss": 0.3167, "step": 36562 }, { "epoch": 0.94, "grad_norm": 1.7056119441986084, "learning_rate": 2.0645080690891218e-07, "loss": 0.4368, "step": 36563 }, { "epoch": 0.94, "grad_norm": 1.482795000076294, "learning_rate": 2.0628302867190641e-07, "loss": 0.5, "step": 36564 }, { "epoch": 0.94, "grad_norm": 3.495609998703003, "learning_rate": 2.0611531792704477e-07, "loss": 0.6385, "step": 36565 }, { "epoch": 0.94, "grad_norm": 1.7216897010803223, "learning_rate": 2.0594767467548294e-07, "loss": 0.434, "step": 36566 }, { "epoch": 0.94, "grad_norm": 1.199742317199707, "learning_rate": 2.0578009891837447e-07, "loss": 0.4364, "step": 36567 }, { "epoch": 0.94, "grad_norm": 2.03597092628479, "learning_rate": 2.0561259065687844e-07, "loss": 0.5162, "step": 36568 }, { "epoch": 0.94, "grad_norm": 1.2879544496536255, "learning_rate": 2.0544514989214725e-07, "loss": 0.4398, "step": 36569 }, { "epoch": 0.94, "grad_norm": 1.303009033203125, "learning_rate": 2.0527777662533332e-07, "loss": 0.601, "step": 36570 }, { "epoch": 0.94, "grad_norm": 0.8861620426177979, "learning_rate": 2.051104708575913e-07, "loss": 0.4074, "step": 36571 }, { "epoch": 0.94, "grad_norm": 3.927201747894287, "learning_rate": 2.0494323259007466e-07, "loss": 0.5103, "step": 36572 }, { "epoch": 0.94, "grad_norm": 1.1109193563461304, "learning_rate": 2.0477606182393583e-07, "loss": 0.314, "step": 36573 }, { "epoch": 0.94, "grad_norm": 1.7625553607940674, "learning_rate": 2.0460895856032615e-07, "loss": 0.6172, "step": 36574 }, { "epoch": 0.94, "grad_norm": 1.4267789125442505, "learning_rate": 2.0444192280039688e-07, "loss": 0.438, "step": 36575 }, { "epoch": 0.94, "grad_norm": 1.9943139553070068, "learning_rate": 2.0427495454530045e-07, "loss": 0.4436, "step": 36576 }, { "epoch": 0.94, "grad_norm": 1.3204503059387207, "learning_rate": 2.0410805379618593e-07, "loss": 0.4013, "step": 36577 }, { "epoch": 0.94, "grad_norm": 1.3762849569320679, "learning_rate": 2.0394122055420462e-07, "loss": 0.4649, "step": 36578 }, { "epoch": 0.94, "grad_norm": 6.097789764404297, "learning_rate": 2.0377445482050672e-07, "loss": 0.5444, "step": 36579 }, { "epoch": 0.94, "grad_norm": 2.089054584503174, "learning_rate": 2.036077565962391e-07, "loss": 0.6351, "step": 36580 }, { "epoch": 0.94, "grad_norm": 1.4272055625915527, "learning_rate": 2.0344112588255304e-07, "loss": 0.4719, "step": 36581 }, { "epoch": 0.94, "grad_norm": 1.6791400909423828, "learning_rate": 2.0327456268059653e-07, "loss": 0.5305, "step": 36582 }, { "epoch": 0.94, "grad_norm": 4.636966228485107, "learning_rate": 2.031080669915164e-07, "loss": 0.686, "step": 36583 }, { "epoch": 0.94, "grad_norm": 1.6081867218017578, "learning_rate": 2.0294163881646067e-07, "loss": 0.5503, "step": 36584 }, { "epoch": 0.94, "grad_norm": 2.705125331878662, "learning_rate": 2.0277527815657506e-07, "loss": 0.6021, "step": 36585 }, { "epoch": 0.94, "grad_norm": 16.236326217651367, "learning_rate": 2.0260898501300864e-07, "loss": 0.4744, "step": 36586 }, { "epoch": 0.94, "grad_norm": 0.7451160550117493, "learning_rate": 2.0244275938690383e-07, "loss": 0.3841, "step": 36587 }, { "epoch": 0.94, "grad_norm": 2.733475923538208, "learning_rate": 2.0227660127940974e-07, "loss": 0.5525, "step": 36588 }, { "epoch": 0.94, "grad_norm": 1.6994136571884155, "learning_rate": 2.0211051069166876e-07, "loss": 0.5163, "step": 36589 }, { "epoch": 0.94, "grad_norm": 0.8253219127655029, "learning_rate": 2.0194448762482777e-07, "loss": 0.4419, "step": 36590 }, { "epoch": 0.94, "grad_norm": 3.352715492248535, "learning_rate": 2.0177853208002806e-07, "loss": 0.3359, "step": 36591 }, { "epoch": 0.94, "grad_norm": 11.717002868652344, "learning_rate": 2.0161264405841541e-07, "loss": 0.715, "step": 36592 }, { "epoch": 0.94, "grad_norm": 1.905379056930542, "learning_rate": 2.0144682356113332e-07, "loss": 0.4344, "step": 36593 }, { "epoch": 0.94, "grad_norm": 1.384839653968811, "learning_rate": 2.01281070589322e-07, "loss": 0.4594, "step": 36594 }, { "epoch": 0.94, "grad_norm": 1.3359160423278809, "learning_rate": 2.011153851441261e-07, "loss": 0.4927, "step": 36595 }, { "epoch": 0.94, "grad_norm": 5.013521194458008, "learning_rate": 2.009497672266869e-07, "loss": 0.4097, "step": 36596 }, { "epoch": 0.94, "grad_norm": 1.2043029069900513, "learning_rate": 2.0078421683814574e-07, "loss": 0.613, "step": 36597 }, { "epoch": 0.94, "grad_norm": 1.680396556854248, "learning_rate": 2.0061873397964283e-07, "loss": 0.5445, "step": 36598 }, { "epoch": 0.94, "grad_norm": 7.732542514801025, "learning_rate": 2.0045331865231944e-07, "loss": 0.5202, "step": 36599 }, { "epoch": 0.94, "grad_norm": 1.1493383646011353, "learning_rate": 2.0028797085731577e-07, "loss": 0.4548, "step": 36600 }, { "epoch": 0.94, "grad_norm": 3.6786410808563232, "learning_rate": 2.001226905957687e-07, "loss": 0.602, "step": 36601 }, { "epoch": 0.94, "grad_norm": 1.3327399492263794, "learning_rate": 1.999574778688207e-07, "loss": 0.3875, "step": 36602 }, { "epoch": 0.94, "grad_norm": 8.005648612976074, "learning_rate": 1.9979233267760855e-07, "loss": 0.3818, "step": 36603 }, { "epoch": 0.94, "grad_norm": 1.573455810546875, "learning_rate": 1.996272550232703e-07, "loss": 0.6436, "step": 36604 }, { "epoch": 0.94, "grad_norm": 1.2891736030578613, "learning_rate": 1.9946224490694388e-07, "loss": 0.5002, "step": 36605 }, { "epoch": 0.94, "grad_norm": 1.4909182786941528, "learning_rate": 1.9929730232976618e-07, "loss": 0.7561, "step": 36606 }, { "epoch": 0.94, "grad_norm": 3.993711471557617, "learning_rate": 1.991324272928752e-07, "loss": 0.7287, "step": 36607 }, { "epoch": 0.94, "grad_norm": 1.0716019868850708, "learning_rate": 1.9896761979740442e-07, "loss": 0.4109, "step": 36608 }, { "epoch": 0.94, "grad_norm": 1.8037198781967163, "learning_rate": 1.9880287984449187e-07, "loss": 0.4675, "step": 36609 }, { "epoch": 0.94, "grad_norm": 1.5649811029434204, "learning_rate": 1.9863820743527217e-07, "loss": 0.7014, "step": 36610 }, { "epoch": 0.94, "grad_norm": 0.9495744109153748, "learning_rate": 1.984736025708789e-07, "loss": 0.5611, "step": 36611 }, { "epoch": 0.94, "grad_norm": 1.310826063156128, "learning_rate": 1.9830906525244997e-07, "loss": 0.4276, "step": 36612 }, { "epoch": 0.94, "grad_norm": 1.775315523147583, "learning_rate": 1.9814459548111563e-07, "loss": 0.5695, "step": 36613 }, { "epoch": 0.94, "grad_norm": 2.0579967498779297, "learning_rate": 1.9798019325801054e-07, "loss": 0.6578, "step": 36614 }, { "epoch": 0.94, "grad_norm": 1.6748980283737183, "learning_rate": 1.9781585858426823e-07, "loss": 0.3982, "step": 36615 }, { "epoch": 0.94, "grad_norm": 1.255953073501587, "learning_rate": 1.9765159146101997e-07, "loss": 0.3879, "step": 36616 }, { "epoch": 0.94, "grad_norm": 1.6254603862762451, "learning_rate": 1.9748739188939935e-07, "loss": 0.6512, "step": 36617 }, { "epoch": 0.94, "grad_norm": 2.320485830307007, "learning_rate": 1.973232598705366e-07, "loss": 0.4478, "step": 36618 }, { "epoch": 0.94, "grad_norm": 1.5669323205947876, "learning_rate": 1.9715919540556294e-07, "loss": 0.5706, "step": 36619 }, { "epoch": 0.94, "grad_norm": 1.2478222846984863, "learning_rate": 1.9699519849560978e-07, "loss": 0.4376, "step": 36620 }, { "epoch": 0.94, "grad_norm": 7.3112568855285645, "learning_rate": 1.9683126914180617e-07, "loss": 0.7821, "step": 36621 }, { "epoch": 0.94, "grad_norm": 4.3449788093566895, "learning_rate": 1.9666740734528344e-07, "loss": 0.6176, "step": 36622 }, { "epoch": 0.94, "grad_norm": 19.134197235107422, "learning_rate": 1.965036131071707e-07, "loss": 0.4448, "step": 36623 }, { "epoch": 0.94, "grad_norm": 1.467712640762329, "learning_rate": 1.9633988642859481e-07, "loss": 0.6298, "step": 36624 }, { "epoch": 0.94, "grad_norm": 1.46082603931427, "learning_rate": 1.9617622731068485e-07, "loss": 0.6569, "step": 36625 }, { "epoch": 0.94, "grad_norm": 1.458056092262268, "learning_rate": 1.9601263575456887e-07, "loss": 0.5398, "step": 36626 }, { "epoch": 0.94, "grad_norm": 12.842042922973633, "learning_rate": 1.958491117613759e-07, "loss": 0.6593, "step": 36627 }, { "epoch": 0.94, "grad_norm": 1.1846652030944824, "learning_rate": 1.956856553322295e-07, "loss": 0.6035, "step": 36628 }, { "epoch": 0.94, "grad_norm": 1.2314964532852173, "learning_rate": 1.9552226646825877e-07, "loss": 0.4945, "step": 36629 }, { "epoch": 0.94, "grad_norm": 1.263623833656311, "learning_rate": 1.953589451705884e-07, "loss": 0.4459, "step": 36630 }, { "epoch": 0.94, "grad_norm": 2.4077749252319336, "learning_rate": 1.9519569144034412e-07, "loss": 0.5238, "step": 36631 }, { "epoch": 0.94, "grad_norm": 1.3046865463256836, "learning_rate": 1.9503250527865169e-07, "loss": 0.5624, "step": 36632 }, { "epoch": 0.94, "grad_norm": 1.995825171470642, "learning_rate": 1.9486938668663468e-07, "loss": 0.5516, "step": 36633 }, { "epoch": 0.94, "grad_norm": 1.378669261932373, "learning_rate": 1.9470633566541886e-07, "loss": 0.4552, "step": 36634 }, { "epoch": 0.94, "grad_norm": 2.4490692615509033, "learning_rate": 1.945433522161244e-07, "loss": 0.4713, "step": 36635 }, { "epoch": 0.94, "grad_norm": 1.4247530698776245, "learning_rate": 1.9438043633987823e-07, "loss": 0.5308, "step": 36636 }, { "epoch": 0.94, "grad_norm": 1.1624083518981934, "learning_rate": 1.9421758803780278e-07, "loss": 0.5013, "step": 36637 }, { "epoch": 0.94, "grad_norm": 1.6623197793960571, "learning_rate": 1.9405480731101712e-07, "loss": 0.5615, "step": 36638 }, { "epoch": 0.94, "grad_norm": 1.0876556634902954, "learning_rate": 1.938920941606459e-07, "loss": 0.5475, "step": 36639 }, { "epoch": 0.94, "grad_norm": 0.9480615854263306, "learning_rate": 1.937294485878094e-07, "loss": 0.4433, "step": 36640 }, { "epoch": 0.94, "grad_norm": 1.631644368171692, "learning_rate": 1.9356687059362889e-07, "loss": 0.6237, "step": 36641 }, { "epoch": 0.94, "grad_norm": 3.1901278495788574, "learning_rate": 1.9340436017922347e-07, "loss": 0.7164, "step": 36642 }, { "epoch": 0.94, "grad_norm": 2.8042922019958496, "learning_rate": 1.9324191734571562e-07, "loss": 0.6152, "step": 36643 }, { "epoch": 0.94, "grad_norm": 1.214148998260498, "learning_rate": 1.9307954209422218e-07, "loss": 0.5991, "step": 36644 }, { "epoch": 0.94, "grad_norm": 1.410386562347412, "learning_rate": 1.929172344258634e-07, "loss": 0.3226, "step": 36645 }, { "epoch": 0.94, "grad_norm": 1.5819875001907349, "learning_rate": 1.9275499434175838e-07, "loss": 0.5502, "step": 36646 }, { "epoch": 0.94, "grad_norm": 1.1701304912567139, "learning_rate": 1.9259282184302396e-07, "loss": 0.401, "step": 36647 }, { "epoch": 0.94, "grad_norm": 13.655652046203613, "learning_rate": 1.9243071693077818e-07, "loss": 0.6958, "step": 36648 }, { "epoch": 0.94, "grad_norm": 1.1251208782196045, "learning_rate": 1.9226867960613682e-07, "loss": 0.3939, "step": 36649 }, { "epoch": 0.94, "grad_norm": 16.811996459960938, "learning_rate": 1.9210670987022007e-07, "loss": 0.5684, "step": 36650 }, { "epoch": 0.94, "grad_norm": 1.2372148036956787, "learning_rate": 1.9194480772414147e-07, "loss": 0.549, "step": 36651 }, { "epoch": 0.94, "grad_norm": 3.331780433654785, "learning_rate": 1.9178297316901574e-07, "loss": 0.6769, "step": 36652 }, { "epoch": 0.94, "grad_norm": 1.5100609064102173, "learning_rate": 1.916212062059608e-07, "loss": 0.4707, "step": 36653 }, { "epoch": 0.94, "grad_norm": 7.516968250274658, "learning_rate": 1.9145950683609026e-07, "loss": 0.5492, "step": 36654 }, { "epoch": 0.94, "grad_norm": 2.257441520690918, "learning_rate": 1.9129787506051878e-07, "loss": 0.4735, "step": 36655 }, { "epoch": 0.94, "grad_norm": 1.5730535984039307, "learning_rate": 1.911363108803599e-07, "loss": 0.3822, "step": 36656 }, { "epoch": 0.94, "grad_norm": 1.1614727973937988, "learning_rate": 1.9097481429672603e-07, "loss": 0.3462, "step": 36657 }, { "epoch": 0.94, "grad_norm": 1.8979296684265137, "learning_rate": 1.9081338531073192e-07, "loss": 0.4374, "step": 36658 }, { "epoch": 0.94, "grad_norm": 1.0714160203933716, "learning_rate": 1.906520239234888e-07, "loss": 0.4906, "step": 36659 }, { "epoch": 0.94, "grad_norm": 1.1555207967758179, "learning_rate": 1.904907301361092e-07, "loss": 0.5131, "step": 36660 }, { "epoch": 0.94, "grad_norm": 30.175813674926758, "learning_rate": 1.903295039497055e-07, "loss": 0.4624, "step": 36661 }, { "epoch": 0.94, "grad_norm": 1.0282825231552124, "learning_rate": 1.9016834536538576e-07, "loss": 0.4785, "step": 36662 }, { "epoch": 0.94, "grad_norm": 3.729661464691162, "learning_rate": 1.900072543842646e-07, "loss": 0.4476, "step": 36663 }, { "epoch": 0.94, "grad_norm": 1.5032471418380737, "learning_rate": 1.8984623100745003e-07, "loss": 0.4609, "step": 36664 }, { "epoch": 0.94, "grad_norm": 0.9884411096572876, "learning_rate": 1.8968527523605118e-07, "loss": 0.4085, "step": 36665 }, { "epoch": 0.94, "grad_norm": 1.1333520412445068, "learning_rate": 1.8952438707117827e-07, "loss": 0.4289, "step": 36666 }, { "epoch": 0.94, "grad_norm": 2.370912790298462, "learning_rate": 1.8936356651393927e-07, "loss": 0.4952, "step": 36667 }, { "epoch": 0.94, "grad_norm": 2.6052658557891846, "learning_rate": 1.8920281356544334e-07, "loss": 0.6362, "step": 36668 }, { "epoch": 0.94, "grad_norm": 1.5644875764846802, "learning_rate": 1.8904212822679735e-07, "loss": 0.5733, "step": 36669 }, { "epoch": 0.94, "grad_norm": 1.4394842386245728, "learning_rate": 1.888815104991104e-07, "loss": 0.5816, "step": 36670 }, { "epoch": 0.94, "grad_norm": 1.230879306793213, "learning_rate": 1.887209603834872e-07, "loss": 0.6037, "step": 36671 }, { "epoch": 0.94, "grad_norm": 1.3376432657241821, "learning_rate": 1.885604778810357e-07, "loss": 0.633, "step": 36672 }, { "epoch": 0.94, "grad_norm": 1.4385309219360352, "learning_rate": 1.8840006299285952e-07, "loss": 0.3392, "step": 36673 }, { "epoch": 0.94, "grad_norm": 1.1493066549301147, "learning_rate": 1.882397157200677e-07, "loss": 0.5224, "step": 36674 }, { "epoch": 0.94, "grad_norm": 1.5627490282058716, "learning_rate": 1.8807943606376278e-07, "loss": 0.5087, "step": 36675 }, { "epoch": 0.94, "grad_norm": 2.1011130809783936, "learning_rate": 1.8791922402504935e-07, "loss": 0.4848, "step": 36676 }, { "epoch": 0.94, "grad_norm": 1.290700078010559, "learning_rate": 1.8775907960503325e-07, "loss": 0.523, "step": 36677 }, { "epoch": 0.94, "grad_norm": 1.8188652992248535, "learning_rate": 1.8759900280481692e-07, "loss": 0.6495, "step": 36678 }, { "epoch": 0.94, "grad_norm": 0.9382984042167664, "learning_rate": 1.8743899362550277e-07, "loss": 0.5297, "step": 36679 }, { "epoch": 0.94, "grad_norm": 1.701170802116394, "learning_rate": 1.872790520681944e-07, "loss": 0.5625, "step": 36680 }, { "epoch": 0.94, "grad_norm": 2.652426242828369, "learning_rate": 1.8711917813399427e-07, "loss": 0.778, "step": 36681 }, { "epoch": 0.94, "grad_norm": 1.2976011037826538, "learning_rate": 1.869593718240026e-07, "loss": 0.4962, "step": 36682 }, { "epoch": 0.94, "grad_norm": 1.3304342031478882, "learning_rate": 1.8679963313932291e-07, "loss": 0.384, "step": 36683 }, { "epoch": 0.94, "grad_norm": 1.2633916139602661, "learning_rate": 1.8663996208105439e-07, "loss": 0.5721, "step": 36684 }, { "epoch": 0.94, "grad_norm": 1.7362793684005737, "learning_rate": 1.8648035865029834e-07, "loss": 0.3899, "step": 36685 }, { "epoch": 0.94, "grad_norm": 2.6701111793518066, "learning_rate": 1.8632082284815279e-07, "loss": 0.6563, "step": 36686 }, { "epoch": 0.94, "grad_norm": 1.158782720565796, "learning_rate": 1.8616135467572017e-07, "loss": 0.4668, "step": 36687 }, { "epoch": 0.94, "grad_norm": 1.327683448791504, "learning_rate": 1.8600195413409738e-07, "loss": 0.4969, "step": 36688 }, { "epoch": 0.94, "grad_norm": 12.163675308227539, "learning_rate": 1.8584262122438356e-07, "loss": 0.6395, "step": 36689 }, { "epoch": 0.94, "grad_norm": 1.2414597272872925, "learning_rate": 1.856833559476756e-07, "loss": 0.5555, "step": 36690 }, { "epoch": 0.94, "grad_norm": 1.0800970792770386, "learning_rate": 1.8552415830507263e-07, "loss": 0.454, "step": 36691 }, { "epoch": 0.94, "grad_norm": 1.8771378993988037, "learning_rate": 1.8536502829767156e-07, "loss": 0.5722, "step": 36692 }, { "epoch": 0.94, "grad_norm": 1.7631404399871826, "learning_rate": 1.8520596592656704e-07, "loss": 0.5754, "step": 36693 }, { "epoch": 0.94, "grad_norm": 1.4233200550079346, "learning_rate": 1.8504697119285818e-07, "loss": 0.3683, "step": 36694 }, { "epoch": 0.94, "grad_norm": 1.3514403104782104, "learning_rate": 1.8488804409763972e-07, "loss": 0.302, "step": 36695 }, { "epoch": 0.94, "grad_norm": 2.2864086627960205, "learning_rate": 1.8472918464200517e-07, "loss": 0.6439, "step": 36696 }, { "epoch": 0.94, "grad_norm": 1.462831974029541, "learning_rate": 1.8457039282705036e-07, "loss": 0.5491, "step": 36697 }, { "epoch": 0.94, "grad_norm": 1.8218151330947876, "learning_rate": 1.8441166865387106e-07, "loss": 0.5218, "step": 36698 }, { "epoch": 0.94, "grad_norm": 2.0074350833892822, "learning_rate": 1.8425301212355862e-07, "loss": 0.5242, "step": 36699 }, { "epoch": 0.94, "grad_norm": 1.6214499473571777, "learning_rate": 1.8409442323720772e-07, "loss": 0.5324, "step": 36700 }, { "epoch": 0.94, "grad_norm": 2.521416187286377, "learning_rate": 1.8393590199591193e-07, "loss": 0.5133, "step": 36701 }, { "epoch": 0.94, "grad_norm": 1.0595990419387817, "learning_rate": 1.837774484007626e-07, "loss": 0.5417, "step": 36702 }, { "epoch": 0.94, "grad_norm": 1.6788365840911865, "learning_rate": 1.8361906245285112e-07, "loss": 0.5752, "step": 36703 }, { "epoch": 0.94, "grad_norm": 1.1052393913269043, "learning_rate": 1.8346074415326987e-07, "loss": 0.4655, "step": 36704 }, { "epoch": 0.94, "grad_norm": 9.568846702575684, "learning_rate": 1.8330249350311025e-07, "loss": 0.529, "step": 36705 }, { "epoch": 0.94, "grad_norm": 1.3705494403839111, "learning_rate": 1.831443105034625e-07, "loss": 0.6043, "step": 36706 }, { "epoch": 0.94, "grad_norm": 1.9921772480010986, "learning_rate": 1.8298619515541682e-07, "loss": 0.533, "step": 36707 }, { "epoch": 0.94, "grad_norm": 1.0941966772079468, "learning_rate": 1.8282814746006238e-07, "loss": 0.3937, "step": 36708 }, { "epoch": 0.94, "grad_norm": 1.7605595588684082, "learning_rate": 1.826701674184883e-07, "loss": 0.4348, "step": 36709 }, { "epoch": 0.94, "grad_norm": 2.677574396133423, "learning_rate": 1.825122550317837e-07, "loss": 0.6116, "step": 36710 }, { "epoch": 0.94, "grad_norm": 0.9316872954368591, "learning_rate": 1.8235441030103773e-07, "loss": 0.6137, "step": 36711 }, { "epoch": 0.94, "grad_norm": 1.4183489084243774, "learning_rate": 1.8219663322733617e-07, "loss": 0.4563, "step": 36712 }, { "epoch": 0.94, "grad_norm": 1.7250406742095947, "learning_rate": 1.8203892381176701e-07, "loss": 0.5846, "step": 36713 }, { "epoch": 0.94, "grad_norm": 1.2882572412490845, "learning_rate": 1.818812820554172e-07, "loss": 0.412, "step": 36714 }, { "epoch": 0.94, "grad_norm": 2.2390308380126953, "learning_rate": 1.8172370795937367e-07, "loss": 0.4136, "step": 36715 }, { "epoch": 0.94, "grad_norm": 1.1060566902160645, "learning_rate": 1.8156620152472215e-07, "loss": 0.4456, "step": 36716 }, { "epoch": 0.94, "grad_norm": 1.0279074907302856, "learning_rate": 1.8140876275254737e-07, "loss": 0.3267, "step": 36717 }, { "epoch": 0.94, "grad_norm": 1.5963191986083984, "learning_rate": 1.8125139164393512e-07, "loss": 0.6215, "step": 36718 }, { "epoch": 0.94, "grad_norm": 12.513338088989258, "learning_rate": 1.81094088199969e-07, "loss": 0.6901, "step": 36719 }, { "epoch": 0.94, "grad_norm": 1.8189976215362549, "learning_rate": 1.8093685242173365e-07, "loss": 0.5339, "step": 36720 }, { "epoch": 0.94, "grad_norm": 1.184888482093811, "learning_rate": 1.8077968431031378e-07, "loss": 0.4552, "step": 36721 }, { "epoch": 0.94, "grad_norm": 1.7887190580368042, "learning_rate": 1.8062258386678966e-07, "loss": 0.6333, "step": 36722 }, { "epoch": 0.94, "grad_norm": 1.4839013814926147, "learning_rate": 1.8046555109224595e-07, "loss": 0.5157, "step": 36723 }, { "epoch": 0.94, "grad_norm": 1.664276123046875, "learning_rate": 1.8030858598776512e-07, "loss": 0.4939, "step": 36724 }, { "epoch": 0.94, "grad_norm": 2.570350170135498, "learning_rate": 1.8015168855442744e-07, "loss": 0.5611, "step": 36725 }, { "epoch": 0.94, "grad_norm": 1.377714991569519, "learning_rate": 1.7999485879331645e-07, "loss": 0.4703, "step": 36726 }, { "epoch": 0.94, "grad_norm": 3.0267324447631836, "learning_rate": 1.7983809670550912e-07, "loss": 0.5643, "step": 36727 }, { "epoch": 0.94, "grad_norm": 2.2547731399536133, "learning_rate": 1.7968140229208897e-07, "loss": 0.6531, "step": 36728 }, { "epoch": 0.94, "grad_norm": 2.4402916431427, "learning_rate": 1.7952477555413517e-07, "loss": 0.5771, "step": 36729 }, { "epoch": 0.94, "grad_norm": 1.4222990274429321, "learning_rate": 1.7936821649272572e-07, "loss": 0.4592, "step": 36730 }, { "epoch": 0.94, "grad_norm": 2.758578300476074, "learning_rate": 1.7921172510894203e-07, "loss": 0.5215, "step": 36731 }, { "epoch": 0.94, "grad_norm": 8.990185737609863, "learning_rate": 1.7905530140386096e-07, "loss": 0.4457, "step": 36732 }, { "epoch": 0.94, "grad_norm": 1.6637150049209595, "learning_rate": 1.7889894537855946e-07, "loss": 0.5264, "step": 36733 }, { "epoch": 0.94, "grad_norm": 3.4331154823303223, "learning_rate": 1.7874265703411664e-07, "loss": 0.5179, "step": 36734 }, { "epoch": 0.94, "grad_norm": 2.109450101852417, "learning_rate": 1.7858643637161056e-07, "loss": 0.5521, "step": 36735 }, { "epoch": 0.94, "grad_norm": 1.1725066900253296, "learning_rate": 1.7843028339211478e-07, "loss": 0.5835, "step": 36736 }, { "epoch": 0.94, "grad_norm": 9.807682037353516, "learning_rate": 1.7827419809670732e-07, "loss": 0.4833, "step": 36737 }, { "epoch": 0.94, "grad_norm": 1.5568876266479492, "learning_rate": 1.781181804864629e-07, "loss": 0.7051, "step": 36738 }, { "epoch": 0.94, "grad_norm": 1.6041045188903809, "learning_rate": 1.7796223056245841e-07, "loss": 0.523, "step": 36739 }, { "epoch": 0.94, "grad_norm": 1.1302522420883179, "learning_rate": 1.7780634832576637e-07, "loss": 0.5055, "step": 36740 }, { "epoch": 0.94, "grad_norm": 1.320530652999878, "learning_rate": 1.7765053377746256e-07, "loss": 0.4823, "step": 36741 }, { "epoch": 0.94, "grad_norm": 3.1388254165649414, "learning_rate": 1.7749478691862055e-07, "loss": 0.4649, "step": 36742 }, { "epoch": 0.94, "grad_norm": 2.018996238708496, "learning_rate": 1.7733910775031392e-07, "loss": 0.5781, "step": 36743 }, { "epoch": 0.94, "grad_norm": 1.3322621583938599, "learning_rate": 1.7718349627361298e-07, "loss": 0.4329, "step": 36744 }, { "epoch": 0.94, "grad_norm": 3.937575101852417, "learning_rate": 1.770279524895935e-07, "loss": 0.6227, "step": 36745 }, { "epoch": 0.94, "grad_norm": 1.04306960105896, "learning_rate": 1.768724763993257e-07, "loss": 0.4762, "step": 36746 }, { "epoch": 0.94, "grad_norm": 1.53554105758667, "learning_rate": 1.76717068003881e-07, "loss": 0.6377, "step": 36747 }, { "epoch": 0.94, "grad_norm": 1.630837082862854, "learning_rate": 1.7656172730433185e-07, "loss": 0.3411, "step": 36748 }, { "epoch": 0.94, "grad_norm": 1.9835669994354248, "learning_rate": 1.764064543017463e-07, "loss": 0.4634, "step": 36749 }, { "epoch": 0.94, "grad_norm": 2.720306396484375, "learning_rate": 1.762512489971968e-07, "loss": 0.6816, "step": 36750 }, { "epoch": 0.94, "grad_norm": 3.735612630844116, "learning_rate": 1.7609611139175033e-07, "loss": 0.5734, "step": 36751 }, { "epoch": 0.94, "grad_norm": 1.347868800163269, "learning_rate": 1.7594104148647817e-07, "loss": 0.5792, "step": 36752 }, { "epoch": 0.94, "grad_norm": 0.9089546203613281, "learning_rate": 1.7578603928244953e-07, "loss": 0.3855, "step": 36753 }, { "epoch": 0.94, "grad_norm": 2.458055019378662, "learning_rate": 1.756311047807291e-07, "loss": 0.5218, "step": 36754 }, { "epoch": 0.94, "grad_norm": 7.102389812469482, "learning_rate": 1.7547623798238822e-07, "loss": 0.5734, "step": 36755 }, { "epoch": 0.94, "grad_norm": 1.4390355348587036, "learning_rate": 1.7532143888849273e-07, "loss": 0.4468, "step": 36756 }, { "epoch": 0.94, "grad_norm": 1.8792386054992676, "learning_rate": 1.7516670750010844e-07, "loss": 0.6728, "step": 36757 }, { "epoch": 0.94, "grad_norm": 0.9580222964286804, "learning_rate": 1.750120438183034e-07, "loss": 0.4159, "step": 36758 }, { "epoch": 0.94, "grad_norm": 3.3588404655456543, "learning_rate": 1.7485744784414337e-07, "loss": 0.5633, "step": 36759 }, { "epoch": 0.94, "grad_norm": 1.6106950044631958, "learning_rate": 1.74702919578692e-07, "loss": 0.5844, "step": 36760 }, { "epoch": 0.94, "grad_norm": 1.2115342617034912, "learning_rate": 1.7454845902301508e-07, "loss": 0.5535, "step": 36761 }, { "epoch": 0.94, "grad_norm": 2.0774331092834473, "learning_rate": 1.7439406617817846e-07, "loss": 0.4354, "step": 36762 }, { "epoch": 0.94, "grad_norm": 1.6165846586227417, "learning_rate": 1.7423974104524344e-07, "loss": 0.5412, "step": 36763 }, { "epoch": 0.94, "grad_norm": 1.1767539978027344, "learning_rate": 1.7408548362527588e-07, "loss": 0.3278, "step": 36764 }, { "epoch": 0.94, "grad_norm": 1.3382372856140137, "learning_rate": 1.7393129391933716e-07, "loss": 0.5095, "step": 36765 }, { "epoch": 0.94, "grad_norm": 3.1225011348724365, "learning_rate": 1.7377717192849197e-07, "loss": 0.5525, "step": 36766 }, { "epoch": 0.94, "grad_norm": 1.53876531124115, "learning_rate": 1.7362311765380058e-07, "loss": 0.505, "step": 36767 }, { "epoch": 0.94, "grad_norm": 1.6347492933273315, "learning_rate": 1.7346913109632435e-07, "loss": 0.5703, "step": 36768 }, { "epoch": 0.94, "grad_norm": 1.3670908212661743, "learning_rate": 1.7331521225712579e-07, "loss": 0.72, "step": 36769 }, { "epoch": 0.94, "grad_norm": 1.71241295337677, "learning_rate": 1.7316136113726511e-07, "loss": 0.5801, "step": 36770 }, { "epoch": 0.94, "grad_norm": 1.7696940898895264, "learning_rate": 1.7300757773780152e-07, "loss": 0.564, "step": 36771 }, { "epoch": 0.94, "grad_norm": 1.4156419038772583, "learning_rate": 1.7285386205979637e-07, "loss": 0.4033, "step": 36772 }, { "epoch": 0.94, "grad_norm": 1.9048722982406616, "learning_rate": 1.727002141043088e-07, "loss": 0.5095, "step": 36773 }, { "epoch": 0.94, "grad_norm": 1.288657546043396, "learning_rate": 1.725466338723969e-07, "loss": 0.5005, "step": 36774 }, { "epoch": 0.94, "grad_norm": 1.5435513257980347, "learning_rate": 1.7239312136511976e-07, "loss": 0.4583, "step": 36775 }, { "epoch": 0.94, "grad_norm": 1.6472949981689453, "learning_rate": 1.7223967658353545e-07, "loss": 0.5364, "step": 36776 }, { "epoch": 0.94, "grad_norm": 1.2799383401870728, "learning_rate": 1.7208629952869983e-07, "loss": 0.439, "step": 36777 }, { "epoch": 0.94, "grad_norm": 1.34098219871521, "learning_rate": 1.7193299020167087e-07, "loss": 0.4938, "step": 36778 }, { "epoch": 0.94, "grad_norm": 1.2627005577087402, "learning_rate": 1.7177974860350555e-07, "loss": 0.4327, "step": 36779 }, { "epoch": 0.94, "grad_norm": 1.8312923908233643, "learning_rate": 1.7162657473525967e-07, "loss": 0.4868, "step": 36780 }, { "epoch": 0.94, "grad_norm": 2.047025680541992, "learning_rate": 1.7147346859798797e-07, "loss": 0.5317, "step": 36781 }, { "epoch": 0.94, "grad_norm": 2.4450879096984863, "learning_rate": 1.7132043019274735e-07, "loss": 0.5256, "step": 36782 }, { "epoch": 0.94, "grad_norm": 1.3815950155258179, "learning_rate": 1.7116745952059032e-07, "loss": 0.5561, "step": 36783 }, { "epoch": 0.94, "grad_norm": 2.114675760269165, "learning_rate": 1.7101455658257272e-07, "loss": 0.6039, "step": 36784 }, { "epoch": 0.94, "grad_norm": 1.2892014980316162, "learning_rate": 1.70861721379747e-07, "loss": 0.3519, "step": 36785 }, { "epoch": 0.94, "grad_norm": 1.513866901397705, "learning_rate": 1.707089539131679e-07, "loss": 0.5529, "step": 36786 }, { "epoch": 0.94, "grad_norm": 1.0157331228256226, "learning_rate": 1.7055625418388678e-07, "loss": 0.5816, "step": 36787 }, { "epoch": 0.94, "grad_norm": 1.7408907413482666, "learning_rate": 1.7040362219295614e-07, "loss": 0.5786, "step": 36788 }, { "epoch": 0.94, "grad_norm": 1.3920197486877441, "learning_rate": 1.702510579414285e-07, "loss": 0.4261, "step": 36789 }, { "epoch": 0.94, "grad_norm": 1.4923622608184814, "learning_rate": 1.700985614303552e-07, "loss": 0.4158, "step": 36790 }, { "epoch": 0.94, "grad_norm": 4.863336563110352, "learning_rate": 1.6994613266078764e-07, "loss": 0.6989, "step": 36791 }, { "epoch": 0.94, "grad_norm": 1.3926185369491577, "learning_rate": 1.6979377163377386e-07, "loss": 0.3994, "step": 36792 }, { "epoch": 0.94, "grad_norm": 0.9075482487678528, "learning_rate": 1.6964147835036638e-07, "loss": 0.3831, "step": 36793 }, { "epoch": 0.94, "grad_norm": 2.2975921630859375, "learning_rate": 1.694892528116132e-07, "loss": 0.6682, "step": 36794 }, { "epoch": 0.94, "grad_norm": 1.3860429525375366, "learning_rate": 1.6933709501856355e-07, "loss": 0.5738, "step": 36795 }, { "epoch": 0.94, "grad_norm": 1.8240902423858643, "learning_rate": 1.6918500497226765e-07, "loss": 0.4967, "step": 36796 }, { "epoch": 0.94, "grad_norm": 1.2462443113327026, "learning_rate": 1.6903298267377243e-07, "loss": 0.4504, "step": 36797 }, { "epoch": 0.94, "grad_norm": 0.910601019859314, "learning_rate": 1.6888102812412377e-07, "loss": 0.5523, "step": 36798 }, { "epoch": 0.94, "grad_norm": 8.280813217163086, "learning_rate": 1.687291413243719e-07, "loss": 0.6404, "step": 36799 }, { "epoch": 0.94, "grad_norm": 11.71288013458252, "learning_rate": 1.6857732227556268e-07, "loss": 0.5625, "step": 36800 }, { "epoch": 0.94, "grad_norm": 1.7868413925170898, "learning_rate": 1.6842557097874192e-07, "loss": 0.5983, "step": 36801 }, { "epoch": 0.94, "grad_norm": 3.5790956020355225, "learning_rate": 1.6827388743495322e-07, "loss": 0.6126, "step": 36802 }, { "epoch": 0.94, "grad_norm": 1.3735398054122925, "learning_rate": 1.6812227164524575e-07, "loss": 0.3673, "step": 36803 }, { "epoch": 0.94, "grad_norm": 1.29551100730896, "learning_rate": 1.6797072361066201e-07, "loss": 0.5631, "step": 36804 }, { "epoch": 0.94, "grad_norm": 1.9945427179336548, "learning_rate": 1.6781924333224675e-07, "loss": 0.5159, "step": 36805 }, { "epoch": 0.94, "grad_norm": 1.4009052515029907, "learning_rate": 1.6766783081104464e-07, "loss": 0.4174, "step": 36806 }, { "epoch": 0.94, "grad_norm": 1.4321527481079102, "learning_rate": 1.6751648604809934e-07, "loss": 0.454, "step": 36807 }, { "epoch": 0.94, "grad_norm": 2.006413459777832, "learning_rate": 1.673652090444522e-07, "loss": 0.6057, "step": 36808 }, { "epoch": 0.94, "grad_norm": 2.0094900131225586, "learning_rate": 1.6721399980114573e-07, "loss": 0.4774, "step": 36809 }, { "epoch": 0.94, "grad_norm": 1.6918339729309082, "learning_rate": 1.6706285831922354e-07, "loss": 0.545, "step": 36810 }, { "epoch": 0.94, "grad_norm": 0.8652130961418152, "learning_rate": 1.66911784599727e-07, "loss": 0.5334, "step": 36811 }, { "epoch": 0.94, "grad_norm": 8.428121566772461, "learning_rate": 1.6676077864369534e-07, "loss": 0.4681, "step": 36812 }, { "epoch": 0.94, "grad_norm": 3.266202211380005, "learning_rate": 1.666098404521721e-07, "loss": 0.5215, "step": 36813 }, { "epoch": 0.94, "grad_norm": 1.170807957649231, "learning_rate": 1.6645897002619539e-07, "loss": 0.5485, "step": 36814 }, { "epoch": 0.94, "grad_norm": 1.9442225694656372, "learning_rate": 1.6630816736680434e-07, "loss": 0.6207, "step": 36815 }, { "epoch": 0.94, "grad_norm": 1.9866137504577637, "learning_rate": 1.6615743247504035e-07, "loss": 0.5556, "step": 36816 }, { "epoch": 0.94, "grad_norm": 1.6398602724075317, "learning_rate": 1.6600676535194148e-07, "loss": 0.5158, "step": 36817 }, { "epoch": 0.94, "grad_norm": 1.1578783988952637, "learning_rate": 1.658561659985447e-07, "loss": 0.4439, "step": 36818 }, { "epoch": 0.94, "grad_norm": 1.5166620016098022, "learning_rate": 1.6570563441588916e-07, "loss": 0.4684, "step": 36819 }, { "epoch": 0.94, "grad_norm": 1.6265146732330322, "learning_rate": 1.655551706050118e-07, "loss": 0.4341, "step": 36820 }, { "epoch": 0.94, "grad_norm": 1.405966877937317, "learning_rate": 1.6540477456694958e-07, "loss": 0.3715, "step": 36821 }, { "epoch": 0.94, "grad_norm": 2.689748764038086, "learning_rate": 1.6525444630273946e-07, "loss": 0.2521, "step": 36822 }, { "epoch": 0.94, "grad_norm": 1.1598247289657593, "learning_rate": 1.6510418581341615e-07, "loss": 0.5149, "step": 36823 }, { "epoch": 0.94, "grad_norm": 0.9377824068069458, "learning_rate": 1.6495399310001658e-07, "loss": 0.5301, "step": 36824 }, { "epoch": 0.94, "grad_norm": 1.4935894012451172, "learning_rate": 1.6480386816357553e-07, "loss": 0.5257, "step": 36825 }, { "epoch": 0.94, "grad_norm": 1.755651831626892, "learning_rate": 1.6465381100512547e-07, "loss": 0.6511, "step": 36826 }, { "epoch": 0.94, "grad_norm": 2.6541688442230225, "learning_rate": 1.6450382162570333e-07, "loss": 0.6366, "step": 36827 }, { "epoch": 0.94, "grad_norm": 1.2083159685134888, "learning_rate": 1.6435390002634166e-07, "loss": 0.539, "step": 36828 }, { "epoch": 0.94, "grad_norm": 1.3837093114852905, "learning_rate": 1.6420404620807294e-07, "loss": 0.5573, "step": 36829 }, { "epoch": 0.94, "grad_norm": 1.6121737957000732, "learning_rate": 1.640542601719308e-07, "loss": 0.6687, "step": 36830 }, { "epoch": 0.94, "grad_norm": 1.4812390804290771, "learning_rate": 1.6390454191894778e-07, "loss": 0.5551, "step": 36831 }, { "epoch": 0.94, "grad_norm": 1.654302954673767, "learning_rate": 1.637548914501541e-07, "loss": 0.5444, "step": 36832 }, { "epoch": 0.94, "grad_norm": 1.5151349306106567, "learning_rate": 1.636053087665823e-07, "loss": 0.5811, "step": 36833 }, { "epoch": 0.94, "grad_norm": 1.3290382623672485, "learning_rate": 1.6345579386926269e-07, "loss": 0.4436, "step": 36834 }, { "epoch": 0.94, "grad_norm": 1.6116927862167358, "learning_rate": 1.6330634675922663e-07, "loss": 0.4604, "step": 36835 }, { "epoch": 0.94, "grad_norm": 1.2080737352371216, "learning_rate": 1.631569674375022e-07, "loss": 0.4269, "step": 36836 }, { "epoch": 0.94, "grad_norm": 2.2122693061828613, "learning_rate": 1.6300765590511968e-07, "loss": 0.4814, "step": 36837 }, { "epoch": 0.94, "grad_norm": 1.60924232006073, "learning_rate": 1.6285841216310938e-07, "loss": 0.5803, "step": 36838 }, { "epoch": 0.94, "grad_norm": 1.204300045967102, "learning_rate": 1.6270923621249713e-07, "loss": 0.3505, "step": 36839 }, { "epoch": 0.94, "grad_norm": 0.9488237500190735, "learning_rate": 1.6256012805431322e-07, "loss": 0.368, "step": 36840 }, { "epoch": 0.94, "grad_norm": 1.0353533029556274, "learning_rate": 1.6241108768958457e-07, "loss": 0.4607, "step": 36841 }, { "epoch": 0.94, "grad_norm": 1.3312269449234009, "learning_rate": 1.6226211511933931e-07, "loss": 0.5193, "step": 36842 }, { "epoch": 0.94, "grad_norm": 1.5427343845367432, "learning_rate": 1.6211321034460105e-07, "loss": 0.5102, "step": 36843 }, { "epoch": 0.94, "grad_norm": 1.080042839050293, "learning_rate": 1.619643733663978e-07, "loss": 0.5186, "step": 36844 }, { "epoch": 0.94, "grad_norm": 1.6892468929290771, "learning_rate": 1.618156041857566e-07, "loss": 0.5131, "step": 36845 }, { "epoch": 0.94, "grad_norm": 3.706611394882202, "learning_rate": 1.616669028036999e-07, "loss": 0.5024, "step": 36846 }, { "epoch": 0.94, "grad_norm": 1.958388328552246, "learning_rate": 1.615182692212547e-07, "loss": 0.6244, "step": 36847 }, { "epoch": 0.94, "grad_norm": 0.7105594873428345, "learning_rate": 1.6136970343944348e-07, "loss": 0.4734, "step": 36848 }, { "epoch": 0.94, "grad_norm": 9.449358940124512, "learning_rate": 1.612212054592921e-07, "loss": 0.4439, "step": 36849 }, { "epoch": 0.94, "grad_norm": 3.2141482830047607, "learning_rate": 1.6107277528182198e-07, "loss": 0.6086, "step": 36850 }, { "epoch": 0.94, "grad_norm": 1.8862749338150024, "learning_rate": 1.6092441290805784e-07, "loss": 0.4695, "step": 36851 }, { "epoch": 0.94, "grad_norm": 1.3204511404037476, "learning_rate": 1.6077611833902106e-07, "loss": 0.4612, "step": 36852 }, { "epoch": 0.94, "grad_norm": 1.2068248987197876, "learning_rate": 1.60627891575732e-07, "loss": 0.4481, "step": 36853 }, { "epoch": 0.94, "grad_norm": 1.54416823387146, "learning_rate": 1.6047973261921534e-07, "loss": 0.5776, "step": 36854 }, { "epoch": 0.94, "grad_norm": 3.672882556915283, "learning_rate": 1.6033164147049142e-07, "loss": 0.6412, "step": 36855 }, { "epoch": 0.94, "grad_norm": 1.0081734657287598, "learning_rate": 1.6018361813057825e-07, "loss": 0.23, "step": 36856 }, { "epoch": 0.94, "grad_norm": 1.366613507270813, "learning_rate": 1.600356626004984e-07, "loss": 0.4981, "step": 36857 }, { "epoch": 0.94, "grad_norm": 1.3496732711791992, "learning_rate": 1.5988777488126995e-07, "loss": 0.502, "step": 36858 }, { "epoch": 0.94, "grad_norm": 1.6197479963302612, "learning_rate": 1.5973995497391427e-07, "loss": 0.5288, "step": 36859 }, { "epoch": 0.94, "grad_norm": 4.1968865394592285, "learning_rate": 1.595922028794461e-07, "loss": 0.6518, "step": 36860 }, { "epoch": 0.94, "grad_norm": 1.9805355072021484, "learning_rate": 1.5944451859888798e-07, "loss": 0.5582, "step": 36861 }, { "epoch": 0.94, "grad_norm": 1.2125166654586792, "learning_rate": 1.5929690213325577e-07, "loss": 0.4031, "step": 36862 }, { "epoch": 0.94, "grad_norm": 1.7559785842895508, "learning_rate": 1.5914935348356643e-07, "loss": 0.3278, "step": 36863 }, { "epoch": 0.94, "grad_norm": 1.478682279586792, "learning_rate": 1.590018726508369e-07, "loss": 0.7284, "step": 36864 }, { "epoch": 0.94, "grad_norm": 1.72433340549469, "learning_rate": 1.5885445963608415e-07, "loss": 0.5873, "step": 36865 }, { "epoch": 0.94, "grad_norm": 2.846376895904541, "learning_rate": 1.5870711444032405e-07, "loss": 0.5906, "step": 36866 }, { "epoch": 0.94, "grad_norm": 1.609043002128601, "learning_rate": 1.5855983706457022e-07, "loss": 0.442, "step": 36867 }, { "epoch": 0.94, "grad_norm": 1.8856099843978882, "learning_rate": 1.5841262750983967e-07, "loss": 0.5403, "step": 36868 }, { "epoch": 0.94, "grad_norm": 3.621464729309082, "learning_rate": 1.5826548577714707e-07, "loss": 0.558, "step": 36869 }, { "epoch": 0.94, "grad_norm": 1.2588411569595337, "learning_rate": 1.5811841186750388e-07, "loss": 0.4985, "step": 36870 }, { "epoch": 0.95, "grad_norm": 2.6424014568328857, "learning_rate": 1.5797140578192705e-07, "loss": 0.649, "step": 36871 }, { "epoch": 0.95, "grad_norm": 5.138027191162109, "learning_rate": 1.578244675214269e-07, "loss": 0.7803, "step": 36872 }, { "epoch": 0.95, "grad_norm": 1.8485302925109863, "learning_rate": 1.5767759708701814e-07, "loss": 0.4301, "step": 36873 }, { "epoch": 0.95, "grad_norm": 1.153200626373291, "learning_rate": 1.5753079447971e-07, "loss": 0.5638, "step": 36874 }, { "epoch": 0.95, "grad_norm": 1.5479161739349365, "learning_rate": 1.573840597005172e-07, "loss": 0.3281, "step": 36875 }, { "epoch": 0.95, "grad_norm": 1.3687151670455933, "learning_rate": 1.5723739275045003e-07, "loss": 0.5734, "step": 36876 }, { "epoch": 0.95, "grad_norm": 6.3850860595703125, "learning_rate": 1.5709079363051772e-07, "loss": 0.4963, "step": 36877 }, { "epoch": 0.95, "grad_norm": 1.3962090015411377, "learning_rate": 1.5694426234173277e-07, "loss": 0.4128, "step": 36878 }, { "epoch": 0.95, "grad_norm": 1.706794023513794, "learning_rate": 1.5679779888510327e-07, "loss": 0.4903, "step": 36879 }, { "epoch": 0.95, "grad_norm": 1.6480293273925781, "learning_rate": 1.5665140326163953e-07, "loss": 0.3976, "step": 36880 }, { "epoch": 0.95, "grad_norm": 1.095221757888794, "learning_rate": 1.565050754723496e-07, "loss": 0.4877, "step": 36881 }, { "epoch": 0.95, "grad_norm": 1.4112880229949951, "learning_rate": 1.5635881551824384e-07, "loss": 0.6086, "step": 36882 }, { "epoch": 0.95, "grad_norm": 1.4550881385803223, "learning_rate": 1.5621262340032805e-07, "loss": 0.4292, "step": 36883 }, { "epoch": 0.95, "grad_norm": 1.3126094341278076, "learning_rate": 1.5606649911960926e-07, "loss": 0.5579, "step": 36884 }, { "epoch": 0.95, "grad_norm": 2.4961118698120117, "learning_rate": 1.5592044267709662e-07, "loss": 0.4762, "step": 36885 }, { "epoch": 0.95, "grad_norm": 1.3377593755722046, "learning_rate": 1.5577445407379598e-07, "loss": 0.5139, "step": 36886 }, { "epoch": 0.95, "grad_norm": 1.343953013420105, "learning_rate": 1.5562853331071214e-07, "loss": 0.4884, "step": 36887 }, { "epoch": 0.95, "grad_norm": 1.3907662630081177, "learning_rate": 1.5548268038885207e-07, "loss": 0.4812, "step": 36888 }, { "epoch": 0.95, "grad_norm": 1.6957100629806519, "learning_rate": 1.5533689530922047e-07, "loss": 0.5706, "step": 36889 }, { "epoch": 0.95, "grad_norm": 1.531325340270996, "learning_rate": 1.5519117807282214e-07, "loss": 0.4699, "step": 36890 }, { "epoch": 0.95, "grad_norm": 1.1335678100585938, "learning_rate": 1.550455286806607e-07, "loss": 0.4416, "step": 36891 }, { "epoch": 0.95, "grad_norm": 2.731862783432007, "learning_rate": 1.548999471337409e-07, "loss": 0.6602, "step": 36892 }, { "epoch": 0.95, "grad_norm": 1.2348616123199463, "learning_rate": 1.5475443343306528e-07, "loss": 0.5996, "step": 36893 }, { "epoch": 0.95, "grad_norm": 2.970247507095337, "learning_rate": 1.5460898757963527e-07, "loss": 0.5294, "step": 36894 }, { "epoch": 0.95, "grad_norm": 11.232872009277344, "learning_rate": 1.5446360957445672e-07, "loss": 0.5855, "step": 36895 }, { "epoch": 0.95, "grad_norm": 1.3841668367385864, "learning_rate": 1.543182994185277e-07, "loss": 0.456, "step": 36896 }, { "epoch": 0.95, "grad_norm": 1.25216543674469, "learning_rate": 1.5417305711285301e-07, "loss": 0.5244, "step": 36897 }, { "epoch": 0.95, "grad_norm": 2.1206650733947754, "learning_rate": 1.540278826584296e-07, "loss": 0.4387, "step": 36898 }, { "epoch": 0.95, "grad_norm": 1.5584295988082886, "learning_rate": 1.5388277605626224e-07, "loss": 0.4781, "step": 36899 }, { "epoch": 0.95, "grad_norm": 1.487512469291687, "learning_rate": 1.5373773730734786e-07, "loss": 0.6774, "step": 36900 }, { "epoch": 0.95, "grad_norm": 1.831816554069519, "learning_rate": 1.5359276641268683e-07, "loss": 0.491, "step": 36901 }, { "epoch": 0.95, "grad_norm": 1.4900445938110352, "learning_rate": 1.5344786337327833e-07, "loss": 0.5594, "step": 36902 }, { "epoch": 0.95, "grad_norm": 10.564935684204102, "learning_rate": 1.5330302819012155e-07, "loss": 0.4573, "step": 36903 }, { "epoch": 0.95, "grad_norm": 1.3650976419448853, "learning_rate": 1.531582608642135e-07, "loss": 0.5205, "step": 36904 }, { "epoch": 0.95, "grad_norm": 1.997680425643921, "learning_rate": 1.530135613965522e-07, "loss": 0.5841, "step": 36905 }, { "epoch": 0.95, "grad_norm": 1.887082815170288, "learning_rate": 1.5286892978813583e-07, "loss": 0.6989, "step": 36906 }, { "epoch": 0.95, "grad_norm": 1.5117801427841187, "learning_rate": 1.5272436603996022e-07, "loss": 0.6042, "step": 36907 }, { "epoch": 0.95, "grad_norm": 1.2172621488571167, "learning_rate": 1.5257987015302011e-07, "loss": 0.4221, "step": 36908 }, { "epoch": 0.95, "grad_norm": 1.9896653890609741, "learning_rate": 1.5243544212831473e-07, "loss": 0.7409, "step": 36909 }, { "epoch": 0.95, "grad_norm": 12.90674114227295, "learning_rate": 1.5229108196683663e-07, "loss": 0.5783, "step": 36910 }, { "epoch": 0.95, "grad_norm": 1.202793836593628, "learning_rate": 1.5214678966958052e-07, "loss": 0.5682, "step": 36911 }, { "epoch": 0.95, "grad_norm": 1.0544564723968506, "learning_rate": 1.5200256523754232e-07, "loss": 0.5698, "step": 36912 }, { "epoch": 0.95, "grad_norm": 1.0121345520019531, "learning_rate": 1.5185840867171564e-07, "loss": 0.4802, "step": 36913 }, { "epoch": 0.95, "grad_norm": 2.1079771518707275, "learning_rate": 1.5171431997309416e-07, "loss": 0.501, "step": 36914 }, { "epoch": 0.95, "grad_norm": 2.5929877758026123, "learning_rate": 1.515702991426682e-07, "loss": 0.4233, "step": 36915 }, { "epoch": 0.95, "grad_norm": 1.3869342803955078, "learning_rate": 1.514263461814347e-07, "loss": 0.4992, "step": 36916 }, { "epoch": 0.95, "grad_norm": 0.9272416234016418, "learning_rate": 1.512824610903818e-07, "loss": 0.3444, "step": 36917 }, { "epoch": 0.95, "grad_norm": 1.4410570859909058, "learning_rate": 1.5113864387050313e-07, "loss": 0.5939, "step": 36918 }, { "epoch": 0.95, "grad_norm": 1.701993703842163, "learning_rate": 1.5099489452278905e-07, "loss": 0.6114, "step": 36919 }, { "epoch": 0.95, "grad_norm": 1.1615263223648071, "learning_rate": 1.5085121304823091e-07, "loss": 0.4777, "step": 36920 }, { "epoch": 0.95, "grad_norm": 1.5026335716247559, "learning_rate": 1.5070759944781798e-07, "loss": 0.4871, "step": 36921 }, { "epoch": 0.95, "grad_norm": 1.3104690313339233, "learning_rate": 1.5056405372254058e-07, "loss": 0.4298, "step": 36922 }, { "epoch": 0.95, "grad_norm": 1.0238492488861084, "learning_rate": 1.5042057587338786e-07, "loss": 0.5525, "step": 36923 }, { "epoch": 0.95, "grad_norm": 1.4567058086395264, "learning_rate": 1.50277165901348e-07, "loss": 0.5141, "step": 36924 }, { "epoch": 0.95, "grad_norm": 1.3010367155075073, "learning_rate": 1.5013382380740904e-07, "loss": 0.5402, "step": 36925 }, { "epoch": 0.95, "grad_norm": 1.7981113195419312, "learning_rate": 1.4999054959256132e-07, "loss": 0.5342, "step": 36926 }, { "epoch": 0.95, "grad_norm": 1.5345441102981567, "learning_rate": 1.498473432577896e-07, "loss": 0.8233, "step": 36927 }, { "epoch": 0.95, "grad_norm": 1.808827519416809, "learning_rate": 1.4970420480408089e-07, "loss": 0.4986, "step": 36928 }, { "epoch": 0.95, "grad_norm": 1.5778744220733643, "learning_rate": 1.4956113423242214e-07, "loss": 0.5269, "step": 36929 }, { "epoch": 0.95, "grad_norm": 1.2287492752075195, "learning_rate": 1.4941813154380036e-07, "loss": 0.519, "step": 36930 }, { "epoch": 0.95, "grad_norm": 9.6264009475708, "learning_rate": 1.492751967391992e-07, "loss": 0.4495, "step": 36931 }, { "epoch": 0.95, "grad_norm": 1.0661859512329102, "learning_rate": 1.4913232981960458e-07, "loss": 0.4259, "step": 36932 }, { "epoch": 0.95, "grad_norm": 1.5865305662155151, "learning_rate": 1.4898953078600232e-07, "loss": 0.47, "step": 36933 }, { "epoch": 0.95, "grad_norm": 0.9594651460647583, "learning_rate": 1.4884679963937387e-07, "loss": 0.4155, "step": 36934 }, { "epoch": 0.95, "grad_norm": 1.8274441957473755, "learning_rate": 1.48704136380704e-07, "loss": 0.617, "step": 36935 }, { "epoch": 0.95, "grad_norm": 1.8359040021896362, "learning_rate": 1.4856154101097642e-07, "loss": 0.6013, "step": 36936 }, { "epoch": 0.95, "grad_norm": 17.877717971801758, "learning_rate": 1.4841901353117473e-07, "loss": 0.6599, "step": 36937 }, { "epoch": 0.95, "grad_norm": 1.3131887912750244, "learning_rate": 1.4827655394227814e-07, "loss": 0.5499, "step": 36938 }, { "epoch": 0.95, "grad_norm": 1.3466359376907349, "learning_rate": 1.4813416224527033e-07, "loss": 0.379, "step": 36939 }, { "epoch": 0.95, "grad_norm": 2.9952337741851807, "learning_rate": 1.4799183844113273e-07, "loss": 0.6332, "step": 36940 }, { "epoch": 0.95, "grad_norm": 2.383049726486206, "learning_rate": 1.478495825308457e-07, "loss": 0.5928, "step": 36941 }, { "epoch": 0.95, "grad_norm": 4.501444339752197, "learning_rate": 1.4770739451538952e-07, "loss": 0.5586, "step": 36942 }, { "epoch": 0.95, "grad_norm": 1.65487539768219, "learning_rate": 1.4756527439574454e-07, "loss": 0.2941, "step": 36943 }, { "epoch": 0.95, "grad_norm": 1.7731680870056152, "learning_rate": 1.4742322217288884e-07, "loss": 0.5497, "step": 36944 }, { "epoch": 0.95, "grad_norm": 4.7725677490234375, "learning_rate": 1.472812378478028e-07, "loss": 0.5596, "step": 36945 }, { "epoch": 0.95, "grad_norm": 2.4895405769348145, "learning_rate": 1.471393214214645e-07, "loss": 0.644, "step": 36946 }, { "epoch": 0.95, "grad_norm": 1.1711162328720093, "learning_rate": 1.46997472894852e-07, "loss": 0.5365, "step": 36947 }, { "epoch": 0.95, "grad_norm": 1.577744483947754, "learning_rate": 1.4685569226894235e-07, "loss": 0.6096, "step": 36948 }, { "epoch": 0.95, "grad_norm": 1.854949712753296, "learning_rate": 1.4671397954471257e-07, "loss": 0.6285, "step": 36949 }, { "epoch": 0.95, "grad_norm": 11.47230339050293, "learning_rate": 1.4657233472314069e-07, "loss": 0.3927, "step": 36950 }, { "epoch": 0.95, "grad_norm": 1.350999355316162, "learning_rate": 1.4643075780520155e-07, "loss": 0.4108, "step": 36951 }, { "epoch": 0.95, "grad_norm": 1.2559175491333008, "learning_rate": 1.462892487918699e-07, "loss": 0.5144, "step": 36952 }, { "epoch": 0.95, "grad_norm": 1.6407825946807861, "learning_rate": 1.4614780768412385e-07, "loss": 0.6754, "step": 36953 }, { "epoch": 0.95, "grad_norm": 1.4543589353561401, "learning_rate": 1.4600643448293483e-07, "loss": 0.6233, "step": 36954 }, { "epoch": 0.95, "grad_norm": 1.0968003273010254, "learning_rate": 1.4586512918927876e-07, "loss": 0.4746, "step": 36955 }, { "epoch": 0.95, "grad_norm": 1.3897061347961426, "learning_rate": 1.4572389180412926e-07, "loss": 0.4732, "step": 36956 }, { "epoch": 0.95, "grad_norm": 2.5836238861083984, "learning_rate": 1.4558272232846004e-07, "loss": 0.4294, "step": 36957 }, { "epoch": 0.95, "grad_norm": 1.181296706199646, "learning_rate": 1.4544162076324253e-07, "loss": 0.4688, "step": 36958 }, { "epoch": 0.95, "grad_norm": 1.349431037902832, "learning_rate": 1.453005871094515e-07, "loss": 0.5765, "step": 36959 }, { "epoch": 0.95, "grad_norm": 1.2602633237838745, "learning_rate": 1.4515962136805727e-07, "loss": 0.5729, "step": 36960 }, { "epoch": 0.95, "grad_norm": 1.2100698947906494, "learning_rate": 1.4501872354003132e-07, "loss": 0.3994, "step": 36961 }, { "epoch": 0.95, "grad_norm": 2.1241793632507324, "learning_rate": 1.4487789362634396e-07, "loss": 0.5139, "step": 36962 }, { "epoch": 0.95, "grad_norm": 1.2727335691452026, "learning_rate": 1.4473713162796665e-07, "loss": 0.5433, "step": 36963 }, { "epoch": 0.95, "grad_norm": 2.137080430984497, "learning_rate": 1.4459643754587084e-07, "loss": 0.4253, "step": 36964 }, { "epoch": 0.95, "grad_norm": 1.4192947149276733, "learning_rate": 1.444558113810235e-07, "loss": 0.5307, "step": 36965 }, { "epoch": 0.95, "grad_norm": 1.3034489154815674, "learning_rate": 1.4431525313439498e-07, "loss": 0.4925, "step": 36966 }, { "epoch": 0.95, "grad_norm": 1.69986093044281, "learning_rate": 1.4417476280695343e-07, "loss": 0.5274, "step": 36967 }, { "epoch": 0.95, "grad_norm": 1.2362312078475952, "learning_rate": 1.4403434039966802e-07, "loss": 0.5317, "step": 36968 }, { "epoch": 0.95, "grad_norm": 5.727886199951172, "learning_rate": 1.438939859135058e-07, "loss": 0.5863, "step": 36969 }, { "epoch": 0.95, "grad_norm": 1.9887443780899048, "learning_rate": 1.4375369934943373e-07, "loss": 0.5496, "step": 36970 }, { "epoch": 0.95, "grad_norm": 1.751908779144287, "learning_rate": 1.4361348070841886e-07, "loss": 0.6246, "step": 36971 }, { "epoch": 0.95, "grad_norm": 1.2770928144454956, "learning_rate": 1.4347332999142706e-07, "loss": 0.4714, "step": 36972 }, { "epoch": 0.95, "grad_norm": 1.026581048965454, "learning_rate": 1.4333324719942533e-07, "loss": 0.474, "step": 36973 }, { "epoch": 0.95, "grad_norm": 2.400702714920044, "learning_rate": 1.4319323233337846e-07, "loss": 0.6592, "step": 36974 }, { "epoch": 0.95, "grad_norm": 4.316686153411865, "learning_rate": 1.4305328539425011e-07, "loss": 0.503, "step": 36975 }, { "epoch": 0.95, "grad_norm": 0.9196698665618896, "learning_rate": 1.4291340638300733e-07, "loss": 0.5114, "step": 36976 }, { "epoch": 0.95, "grad_norm": 1.5928049087524414, "learning_rate": 1.4277359530061153e-07, "loss": 0.5549, "step": 36977 }, { "epoch": 0.95, "grad_norm": 1.972220540046692, "learning_rate": 1.426338521480275e-07, "loss": 0.4618, "step": 36978 }, { "epoch": 0.95, "grad_norm": 1.5312108993530273, "learning_rate": 1.424941769262167e-07, "loss": 0.4609, "step": 36979 }, { "epoch": 0.95, "grad_norm": 5.477120399475098, "learning_rate": 1.4235456963614502e-07, "loss": 0.5074, "step": 36980 }, { "epoch": 0.95, "grad_norm": 2.9377360343933105, "learning_rate": 1.422150302787717e-07, "loss": 0.5505, "step": 36981 }, { "epoch": 0.95, "grad_norm": 4.488470554351807, "learning_rate": 1.420755588550582e-07, "loss": 0.3332, "step": 36982 }, { "epoch": 0.95, "grad_norm": 3.7746634483337402, "learning_rate": 1.4193615536596706e-07, "loss": 0.5264, "step": 36983 }, { "epoch": 0.95, "grad_norm": 4.265127658843994, "learning_rate": 1.4179681981245864e-07, "loss": 0.5159, "step": 36984 }, { "epoch": 0.95, "grad_norm": 2.3493270874023438, "learning_rate": 1.4165755219549327e-07, "loss": 0.5593, "step": 36985 }, { "epoch": 0.95, "grad_norm": 1.7987483739852905, "learning_rate": 1.415183525160302e-07, "loss": 0.5412, "step": 36986 }, { "epoch": 0.95, "grad_norm": 1.518224835395813, "learning_rate": 1.4137922077502862e-07, "loss": 0.5459, "step": 36987 }, { "epoch": 0.95, "grad_norm": 0.9481062293052673, "learning_rate": 1.4124015697344896e-07, "loss": 0.4294, "step": 36988 }, { "epoch": 0.95, "grad_norm": 1.6261540651321411, "learning_rate": 1.4110116111224702e-07, "loss": 0.568, "step": 36989 }, { "epoch": 0.95, "grad_norm": 1.2930798530578613, "learning_rate": 1.409622331923821e-07, "loss": 0.5261, "step": 36990 }, { "epoch": 0.95, "grad_norm": 3.9276719093322754, "learning_rate": 1.408233732148112e-07, "loss": 0.5725, "step": 36991 }, { "epoch": 0.95, "grad_norm": 1.0600147247314453, "learning_rate": 1.4068458118049243e-07, "loss": 0.417, "step": 36992 }, { "epoch": 0.95, "grad_norm": 1.560749888420105, "learning_rate": 1.405458570903806e-07, "loss": 0.5102, "step": 36993 }, { "epoch": 0.95, "grad_norm": 5.2816362380981445, "learning_rate": 1.404072009454327e-07, "loss": 0.5185, "step": 36994 }, { "epoch": 0.95, "grad_norm": 1.738184928894043, "learning_rate": 1.4026861274660353e-07, "loss": 0.4942, "step": 36995 }, { "epoch": 0.95, "grad_norm": 1.1669187545776367, "learning_rate": 1.4013009249484789e-07, "loss": 0.5795, "step": 36996 }, { "epoch": 0.95, "grad_norm": 1.4501755237579346, "learning_rate": 1.3999164019112276e-07, "loss": 0.4858, "step": 36997 }, { "epoch": 0.95, "grad_norm": 1.1475435495376587, "learning_rate": 1.3985325583637966e-07, "loss": 0.4867, "step": 36998 }, { "epoch": 0.95, "grad_norm": 1.9747117757797241, "learning_rate": 1.3971493943157334e-07, "loss": 0.6755, "step": 36999 }, { "epoch": 0.95, "grad_norm": 0.8372125029563904, "learning_rate": 1.3957669097765635e-07, "loss": 0.4639, "step": 37000 }, { "epoch": 0.95, "grad_norm": 0.7861217260360718, "learning_rate": 1.3943851047558243e-07, "loss": 0.3359, "step": 37001 }, { "epoch": 0.95, "grad_norm": 1.3156930208206177, "learning_rate": 1.393003979263041e-07, "loss": 0.4571, "step": 37002 }, { "epoch": 0.95, "grad_norm": 1.1298683881759644, "learning_rate": 1.3916235333077065e-07, "loss": 0.4627, "step": 37003 }, { "epoch": 0.95, "grad_norm": 5.364206314086914, "learning_rate": 1.3902437668993573e-07, "loss": 0.557, "step": 37004 }, { "epoch": 0.95, "grad_norm": 1.7300466299057007, "learning_rate": 1.388864680047497e-07, "loss": 0.4489, "step": 37005 }, { "epoch": 0.95, "grad_norm": 1.621125340461731, "learning_rate": 1.387486272761629e-07, "loss": 0.4818, "step": 37006 }, { "epoch": 0.95, "grad_norm": 1.3567183017730713, "learning_rate": 1.3861085450512457e-07, "loss": 0.573, "step": 37007 }, { "epoch": 0.95, "grad_norm": 1.7681416273117065, "learning_rate": 1.3847314969258508e-07, "loss": 0.4218, "step": 37008 }, { "epoch": 0.95, "grad_norm": 3.5816266536712646, "learning_rate": 1.3833551283949364e-07, "loss": 0.5292, "step": 37009 }, { "epoch": 0.95, "grad_norm": 1.6357200145721436, "learning_rate": 1.3819794394679732e-07, "loss": 0.432, "step": 37010 }, { "epoch": 0.95, "grad_norm": 3.0467281341552734, "learning_rate": 1.380604430154453e-07, "loss": 0.6034, "step": 37011 }, { "epoch": 0.95, "grad_norm": 1.748322606086731, "learning_rate": 1.3792301004638463e-07, "loss": 0.4649, "step": 37012 }, { "epoch": 0.95, "grad_norm": 5.811155796051025, "learning_rate": 1.3778564504056237e-07, "loss": 0.6035, "step": 37013 }, { "epoch": 0.95, "grad_norm": 2.461475372314453, "learning_rate": 1.3764834799892547e-07, "loss": 0.5937, "step": 37014 }, { "epoch": 0.95, "grad_norm": 1.4313100576400757, "learning_rate": 1.3751111892242097e-07, "loss": 0.4734, "step": 37015 }, { "epoch": 0.95, "grad_norm": 1.2809568643569946, "learning_rate": 1.3737395781199148e-07, "loss": 0.4957, "step": 37016 }, { "epoch": 0.95, "grad_norm": 1.6425005197525024, "learning_rate": 1.3723686466858622e-07, "loss": 0.5584, "step": 37017 }, { "epoch": 0.95, "grad_norm": 1.3178353309631348, "learning_rate": 1.3709983949314775e-07, "loss": 0.5414, "step": 37018 }, { "epoch": 0.95, "grad_norm": 1.6250734329223633, "learning_rate": 1.3696288228662092e-07, "loss": 0.5272, "step": 37019 }, { "epoch": 0.95, "grad_norm": 0.982833743095398, "learning_rate": 1.3682599304994825e-07, "loss": 0.287, "step": 37020 }, { "epoch": 0.95, "grad_norm": 5.6319050788879395, "learning_rate": 1.3668917178407571e-07, "loss": 0.3812, "step": 37021 }, { "epoch": 0.95, "grad_norm": 1.6667026281356812, "learning_rate": 1.365524184899436e-07, "loss": 0.3541, "step": 37022 }, { "epoch": 0.95, "grad_norm": 1.4118272066116333, "learning_rate": 1.3641573316849566e-07, "loss": 0.4899, "step": 37023 }, { "epoch": 0.95, "grad_norm": 1.2454540729522705, "learning_rate": 1.3627911582067333e-07, "loss": 0.4567, "step": 37024 }, { "epoch": 0.95, "grad_norm": 8.675949096679688, "learning_rate": 1.3614256644741918e-07, "loss": 0.4044, "step": 37025 }, { "epoch": 0.95, "grad_norm": 1.351589322090149, "learning_rate": 1.360060850496725e-07, "loss": 0.5249, "step": 37026 }, { "epoch": 0.95, "grad_norm": 2.6660261154174805, "learning_rate": 1.358696716283747e-07, "loss": 0.6588, "step": 37027 }, { "epoch": 0.95, "grad_norm": 11.263556480407715, "learning_rate": 1.357333261844662e-07, "loss": 0.5525, "step": 37028 }, { "epoch": 0.95, "grad_norm": 1.4939237833023071, "learning_rate": 1.3559704871888735e-07, "loss": 0.3302, "step": 37029 }, { "epoch": 0.95, "grad_norm": 1.1956429481506348, "learning_rate": 1.3546083923257401e-07, "loss": 0.4898, "step": 37030 }, { "epoch": 0.95, "grad_norm": 1.4809259176254272, "learning_rate": 1.3532469772646883e-07, "loss": 0.5913, "step": 37031 }, { "epoch": 0.95, "grad_norm": 1.2432185411453247, "learning_rate": 1.351886242015077e-07, "loss": 0.5102, "step": 37032 }, { "epoch": 0.95, "grad_norm": 1.914594292640686, "learning_rate": 1.3505261865862873e-07, "loss": 0.6841, "step": 37033 }, { "epoch": 0.95, "grad_norm": 2.083343267440796, "learning_rate": 1.34916681098769e-07, "loss": 0.701, "step": 37034 }, { "epoch": 0.95, "grad_norm": 1.701595664024353, "learning_rate": 1.347808115228666e-07, "loss": 0.4905, "step": 37035 }, { "epoch": 0.95, "grad_norm": 1.3660244941711426, "learning_rate": 1.3464500993185637e-07, "loss": 0.5283, "step": 37036 }, { "epoch": 0.95, "grad_norm": 1.1535329818725586, "learning_rate": 1.3450927632667422e-07, "loss": 0.376, "step": 37037 }, { "epoch": 0.95, "grad_norm": 3.886185646057129, "learning_rate": 1.3437361070825716e-07, "loss": 0.4177, "step": 37038 }, { "epoch": 0.95, "grad_norm": 1.1670546531677246, "learning_rate": 1.342380130775378e-07, "loss": 0.6457, "step": 37039 }, { "epoch": 0.95, "grad_norm": 1.797187089920044, "learning_rate": 1.3410248343545206e-07, "loss": 0.5493, "step": 37040 }, { "epoch": 0.95, "grad_norm": 1.5152430534362793, "learning_rate": 1.3396702178293364e-07, "loss": 0.4404, "step": 37041 }, { "epoch": 0.95, "grad_norm": 1.4625385999679565, "learning_rate": 1.3383162812091732e-07, "loss": 0.5033, "step": 37042 }, { "epoch": 0.95, "grad_norm": 2.322267770767212, "learning_rate": 1.336963024503335e-07, "loss": 0.5966, "step": 37043 }, { "epoch": 0.95, "grad_norm": 2.7706730365753174, "learning_rate": 1.3356104477211584e-07, "loss": 0.5992, "step": 37044 }, { "epoch": 0.95, "grad_norm": 1.2915937900543213, "learning_rate": 1.3342585508719807e-07, "loss": 0.4933, "step": 37045 }, { "epoch": 0.95, "grad_norm": 2.5336883068084717, "learning_rate": 1.3329073339651056e-07, "loss": 0.7997, "step": 37046 }, { "epoch": 0.95, "grad_norm": 4.491057872772217, "learning_rate": 1.3315567970098253e-07, "loss": 0.4432, "step": 37047 }, { "epoch": 0.95, "grad_norm": 1.084511399269104, "learning_rate": 1.330206940015488e-07, "loss": 0.3676, "step": 37048 }, { "epoch": 0.95, "grad_norm": 1.492347240447998, "learning_rate": 1.3288577629913646e-07, "loss": 0.6467, "step": 37049 }, { "epoch": 0.95, "grad_norm": 1.552339792251587, "learning_rate": 1.327509265946758e-07, "loss": 0.3611, "step": 37050 }, { "epoch": 0.95, "grad_norm": 1.5991384983062744, "learning_rate": 1.3261614488909612e-07, "loss": 0.5374, "step": 37051 }, { "epoch": 0.95, "grad_norm": 7.712532997131348, "learning_rate": 1.324814311833278e-07, "loss": 0.4162, "step": 37052 }, { "epoch": 0.95, "grad_norm": 1.2712140083312988, "learning_rate": 1.3234678547829894e-07, "loss": 0.3696, "step": 37053 }, { "epoch": 0.95, "grad_norm": 1.024266004562378, "learning_rate": 1.3221220777493438e-07, "loss": 0.4526, "step": 37054 }, { "epoch": 0.95, "grad_norm": 1.3416815996170044, "learning_rate": 1.3207769807416447e-07, "loss": 0.6206, "step": 37055 }, { "epoch": 0.95, "grad_norm": 1.723730444908142, "learning_rate": 1.319432563769163e-07, "loss": 0.5254, "step": 37056 }, { "epoch": 0.95, "grad_norm": 1.3990474939346313, "learning_rate": 1.3180888268411464e-07, "loss": 0.3827, "step": 37057 }, { "epoch": 0.95, "grad_norm": 10.46821403503418, "learning_rate": 1.316745769966865e-07, "loss": 0.4261, "step": 37058 }, { "epoch": 0.95, "grad_norm": 5.868499755859375, "learning_rate": 1.3154033931555787e-07, "loss": 0.6124, "step": 37059 }, { "epoch": 0.95, "grad_norm": 1.3719234466552734, "learning_rate": 1.3140616964165243e-07, "loss": 0.4438, "step": 37060 }, { "epoch": 0.95, "grad_norm": 2.4010813236236572, "learning_rate": 1.3127206797589608e-07, "loss": 0.5053, "step": 37061 }, { "epoch": 0.95, "grad_norm": 1.239357829093933, "learning_rate": 1.3113803431921258e-07, "loss": 0.3773, "step": 37062 }, { "epoch": 0.95, "grad_norm": 1.388451099395752, "learning_rate": 1.3100406867252559e-07, "loss": 0.4072, "step": 37063 }, { "epoch": 0.95, "grad_norm": 2.1434855461120605, "learning_rate": 1.308701710367566e-07, "loss": 0.6154, "step": 37064 }, { "epoch": 0.95, "grad_norm": 2.7188119888305664, "learning_rate": 1.3073634141283153e-07, "loss": 0.4886, "step": 37065 }, { "epoch": 0.95, "grad_norm": 1.2433223724365234, "learning_rate": 1.306025798016719e-07, "loss": 0.5211, "step": 37066 }, { "epoch": 0.95, "grad_norm": 1.182173252105713, "learning_rate": 1.3046888620419806e-07, "loss": 0.4953, "step": 37067 }, { "epoch": 0.95, "grad_norm": 2.8735361099243164, "learning_rate": 1.303352606213304e-07, "loss": 0.467, "step": 37068 }, { "epoch": 0.95, "grad_norm": 2.3914387226104736, "learning_rate": 1.3020170305399372e-07, "loss": 0.5585, "step": 37069 }, { "epoch": 0.95, "grad_norm": 2.1730897426605225, "learning_rate": 1.3006821350310507e-07, "loss": 0.6071, "step": 37070 }, { "epoch": 0.95, "grad_norm": 1.3952449560165405, "learning_rate": 1.299347919695848e-07, "loss": 0.5377, "step": 37071 }, { "epoch": 0.95, "grad_norm": 1.3934921026229858, "learning_rate": 1.2980143845435333e-07, "loss": 0.4453, "step": 37072 }, { "epoch": 0.95, "grad_norm": 2.716388702392578, "learning_rate": 1.2966815295832992e-07, "loss": 0.6101, "step": 37073 }, { "epoch": 0.95, "grad_norm": 0.9223410487174988, "learning_rate": 1.2953493548243267e-07, "loss": 0.4254, "step": 37074 }, { "epoch": 0.95, "grad_norm": 2.1392714977264404, "learning_rate": 1.294017860275776e-07, "loss": 0.6098, "step": 37075 }, { "epoch": 0.95, "grad_norm": 1.502607822418213, "learning_rate": 1.292687045946861e-07, "loss": 0.525, "step": 37076 }, { "epoch": 0.95, "grad_norm": 1.2792845964431763, "learning_rate": 1.2913569118467194e-07, "loss": 0.3734, "step": 37077 }, { "epoch": 0.95, "grad_norm": 1.2519829273223877, "learning_rate": 1.290027457984533e-07, "loss": 0.465, "step": 37078 }, { "epoch": 0.95, "grad_norm": 1.7849575281143188, "learning_rate": 1.2886986843694715e-07, "loss": 0.4597, "step": 37079 }, { "epoch": 0.95, "grad_norm": 1.301588535308838, "learning_rate": 1.2873705910106726e-07, "loss": 0.4496, "step": 37080 }, { "epoch": 0.95, "grad_norm": 1.118159532546997, "learning_rate": 1.2860431779172955e-07, "loss": 0.4595, "step": 37081 }, { "epoch": 0.95, "grad_norm": 1.667562484741211, "learning_rate": 1.2847164450984996e-07, "loss": 0.5252, "step": 37082 }, { "epoch": 0.95, "grad_norm": 1.626229166984558, "learning_rate": 1.2833903925634216e-07, "loss": 0.5609, "step": 37083 }, { "epoch": 0.95, "grad_norm": 1.3348290920257568, "learning_rate": 1.2820650203211994e-07, "loss": 0.4427, "step": 37084 }, { "epoch": 0.95, "grad_norm": 2.2559702396392822, "learning_rate": 1.280740328380947e-07, "loss": 0.6162, "step": 37085 }, { "epoch": 0.95, "grad_norm": 1.2401536703109741, "learning_rate": 1.279416316751825e-07, "loss": 0.7247, "step": 37086 }, { "epoch": 0.95, "grad_norm": 1.9395146369934082, "learning_rate": 1.2780929854429358e-07, "loss": 0.515, "step": 37087 }, { "epoch": 0.95, "grad_norm": 1.5750377178192139, "learning_rate": 1.2767703344634065e-07, "loss": 0.5552, "step": 37088 }, { "epoch": 0.95, "grad_norm": 1.361698031425476, "learning_rate": 1.2754483638223514e-07, "loss": 0.5147, "step": 37089 }, { "epoch": 0.95, "grad_norm": 21.432594299316406, "learning_rate": 1.2741270735288968e-07, "loss": 0.6358, "step": 37090 }, { "epoch": 0.95, "grad_norm": 1.7777726650238037, "learning_rate": 1.2728064635921135e-07, "loss": 0.445, "step": 37091 }, { "epoch": 0.95, "grad_norm": 2.0359206199645996, "learning_rate": 1.271486534021127e-07, "loss": 0.5801, "step": 37092 }, { "epoch": 0.95, "grad_norm": 1.2930552959442139, "learning_rate": 1.2701672848250303e-07, "loss": 0.5021, "step": 37093 }, { "epoch": 0.95, "grad_norm": 2.7536609172821045, "learning_rate": 1.2688487160129159e-07, "loss": 0.4589, "step": 37094 }, { "epoch": 0.95, "grad_norm": 1.327871561050415, "learning_rate": 1.2675308275938548e-07, "loss": 0.3886, "step": 37095 }, { "epoch": 0.95, "grad_norm": 1.6448370218276978, "learning_rate": 1.2662136195769615e-07, "loss": 0.609, "step": 37096 }, { "epoch": 0.95, "grad_norm": 1.0077548027038574, "learning_rate": 1.2648970919712732e-07, "loss": 0.4411, "step": 37097 }, { "epoch": 0.95, "grad_norm": 2.72731351852417, "learning_rate": 1.2635812447858942e-07, "loss": 0.6736, "step": 37098 }, { "epoch": 0.95, "grad_norm": 25.12996482849121, "learning_rate": 1.2622660780298722e-07, "loss": 0.5629, "step": 37099 }, { "epoch": 0.95, "grad_norm": 1.264064073562622, "learning_rate": 1.2609515917122894e-07, "loss": 0.426, "step": 37100 }, { "epoch": 0.95, "grad_norm": 1.6882898807525635, "learning_rate": 1.2596377858421937e-07, "loss": 0.4905, "step": 37101 }, { "epoch": 0.95, "grad_norm": 1.3961716890335083, "learning_rate": 1.2583246604286337e-07, "loss": 0.6553, "step": 37102 }, { "epoch": 0.95, "grad_norm": 6.136422634124756, "learning_rate": 1.2570122154806685e-07, "loss": 0.5633, "step": 37103 }, { "epoch": 0.95, "grad_norm": 2.238830804824829, "learning_rate": 1.2557004510073356e-07, "loss": 0.564, "step": 37104 }, { "epoch": 0.95, "grad_norm": 1.3237838745117188, "learning_rate": 1.2543893670176722e-07, "loss": 0.5092, "step": 37105 }, { "epoch": 0.95, "grad_norm": 1.072493314743042, "learning_rate": 1.2530789635207263e-07, "loss": 0.4868, "step": 37106 }, { "epoch": 0.95, "grad_norm": 1.664109230041504, "learning_rate": 1.2517692405255244e-07, "loss": 0.4415, "step": 37107 }, { "epoch": 0.95, "grad_norm": 2.298365831375122, "learning_rate": 1.2504601980410924e-07, "loss": 0.5518, "step": 37108 }, { "epoch": 0.95, "grad_norm": 1.8223005533218384, "learning_rate": 1.2491518360764344e-07, "loss": 0.6166, "step": 37109 }, { "epoch": 0.95, "grad_norm": 1.1483145952224731, "learning_rate": 1.2478441546405874e-07, "loss": 0.4997, "step": 37110 }, { "epoch": 0.95, "grad_norm": 2.34891414642334, "learning_rate": 1.2465371537425663e-07, "loss": 0.6671, "step": 37111 }, { "epoch": 0.95, "grad_norm": 1.2248320579528809, "learning_rate": 1.245230833391353e-07, "loss": 0.4479, "step": 37112 }, { "epoch": 0.95, "grad_norm": 1.645466923713684, "learning_rate": 1.243925193595974e-07, "loss": 0.5306, "step": 37113 }, { "epoch": 0.95, "grad_norm": 1.0702556371688843, "learning_rate": 1.2426202343654104e-07, "loss": 0.5819, "step": 37114 }, { "epoch": 0.95, "grad_norm": 1.106034278869629, "learning_rate": 1.2413159557086662e-07, "loss": 0.4147, "step": 37115 }, { "epoch": 0.95, "grad_norm": 1.2662891149520874, "learning_rate": 1.2400123576347233e-07, "loss": 0.5254, "step": 37116 }, { "epoch": 0.95, "grad_norm": 1.4526509046554565, "learning_rate": 1.2387094401525747e-07, "loss": 0.6202, "step": 37117 }, { "epoch": 0.95, "grad_norm": 14.233742713928223, "learning_rate": 1.2374072032711903e-07, "loss": 0.5473, "step": 37118 }, { "epoch": 0.95, "grad_norm": 1.471199631690979, "learning_rate": 1.2361056469995413e-07, "loss": 0.4986, "step": 37119 }, { "epoch": 0.95, "grad_norm": 2.075105667114258, "learning_rate": 1.2348047713466093e-07, "loss": 0.4494, "step": 37120 }, { "epoch": 0.95, "grad_norm": 1.4091439247131348, "learning_rate": 1.233504576321354e-07, "loss": 0.498, "step": 37121 }, { "epoch": 0.95, "grad_norm": 1.0596097707748413, "learning_rate": 1.232205061932723e-07, "loss": 0.5025, "step": 37122 }, { "epoch": 0.95, "grad_norm": 1.259529709815979, "learning_rate": 1.2309062281896988e-07, "loss": 0.436, "step": 37123 }, { "epoch": 0.95, "grad_norm": 1.256203532218933, "learning_rate": 1.2296080751012075e-07, "loss": 0.4792, "step": 37124 }, { "epoch": 0.95, "grad_norm": 7.753757953643799, "learning_rate": 1.228310602676208e-07, "loss": 0.5501, "step": 37125 }, { "epoch": 0.95, "grad_norm": 1.7767565250396729, "learning_rate": 1.2270138109236274e-07, "loss": 0.4935, "step": 37126 }, { "epoch": 0.95, "grad_norm": 13.38253116607666, "learning_rate": 1.2257176998524133e-07, "loss": 0.6818, "step": 37127 }, { "epoch": 0.95, "grad_norm": 1.3964978456497192, "learning_rate": 1.2244222694715037e-07, "loss": 0.5182, "step": 37128 }, { "epoch": 0.95, "grad_norm": 1.1961171627044678, "learning_rate": 1.223127519789813e-07, "loss": 0.4859, "step": 37129 }, { "epoch": 0.95, "grad_norm": 1.4646997451782227, "learning_rate": 1.221833450816279e-07, "loss": 0.4226, "step": 37130 }, { "epoch": 0.95, "grad_norm": 2.305745840072632, "learning_rate": 1.2205400625598051e-07, "loss": 0.5107, "step": 37131 }, { "epoch": 0.95, "grad_norm": 2.9267196655273438, "learning_rate": 1.2192473550293181e-07, "loss": 0.7503, "step": 37132 }, { "epoch": 0.95, "grad_norm": 1.1983873844146729, "learning_rate": 1.2179553282336998e-07, "loss": 0.4425, "step": 37133 }, { "epoch": 0.95, "grad_norm": 1.3883631229400635, "learning_rate": 1.216663982181887e-07, "loss": 0.3775, "step": 37134 }, { "epoch": 0.95, "grad_norm": 1.332200288772583, "learning_rate": 1.215373316882762e-07, "loss": 0.4217, "step": 37135 }, { "epoch": 0.95, "grad_norm": 1.321276068687439, "learning_rate": 1.2140833323452062e-07, "loss": 0.5412, "step": 37136 }, { "epoch": 0.95, "grad_norm": 1.7223366498947144, "learning_rate": 1.2127940285781458e-07, "loss": 0.511, "step": 37137 }, { "epoch": 0.95, "grad_norm": 1.6994184255599976, "learning_rate": 1.2115054055904407e-07, "loss": 0.4506, "step": 37138 }, { "epoch": 0.95, "grad_norm": 3.1879096031188965, "learning_rate": 1.2102174633909724e-07, "loss": 0.3275, "step": 37139 }, { "epoch": 0.95, "grad_norm": 2.0882484912872314, "learning_rate": 1.2089302019886117e-07, "loss": 0.5815, "step": 37140 }, { "epoch": 0.95, "grad_norm": 1.9160116910934448, "learning_rate": 1.2076436213922405e-07, "loss": 0.525, "step": 37141 }, { "epoch": 0.95, "grad_norm": 1.5488345623016357, "learning_rate": 1.206357721610729e-07, "loss": 0.589, "step": 37142 }, { "epoch": 0.95, "grad_norm": 2.0395708084106445, "learning_rate": 1.2050725026529264e-07, "loss": 0.4749, "step": 37143 }, { "epoch": 0.95, "grad_norm": 1.3843333721160889, "learning_rate": 1.2037879645276918e-07, "loss": 0.4922, "step": 37144 }, { "epoch": 0.95, "grad_norm": 1.9100852012634277, "learning_rate": 1.2025041072438958e-07, "loss": 0.5207, "step": 37145 }, { "epoch": 0.95, "grad_norm": 1.4363435506820679, "learning_rate": 1.201220930810354e-07, "loss": 0.6017, "step": 37146 }, { "epoch": 0.95, "grad_norm": 1.8629931211471558, "learning_rate": 1.1999384352359368e-07, "loss": 0.4996, "step": 37147 }, { "epoch": 0.95, "grad_norm": 1.2669181823730469, "learning_rate": 1.1986566205294704e-07, "loss": 0.5335, "step": 37148 }, { "epoch": 0.95, "grad_norm": 1.1777538061141968, "learning_rate": 1.197375486699781e-07, "loss": 0.4635, "step": 37149 }, { "epoch": 0.95, "grad_norm": 2.208045721054077, "learning_rate": 1.1960950337557064e-07, "loss": 0.4178, "step": 37150 }, { "epoch": 0.95, "grad_norm": 1.5045883655548096, "learning_rate": 1.1948152617060838e-07, "loss": 0.5079, "step": 37151 }, { "epoch": 0.95, "grad_norm": 1.1738628149032593, "learning_rate": 1.193536170559706e-07, "loss": 0.4885, "step": 37152 }, { "epoch": 0.95, "grad_norm": 2.105924606323242, "learning_rate": 1.1922577603253993e-07, "loss": 0.4715, "step": 37153 }, { "epoch": 0.95, "grad_norm": 1.3368531465530396, "learning_rate": 1.19098003101199e-07, "loss": 0.3852, "step": 37154 }, { "epoch": 0.95, "grad_norm": 1.5984197854995728, "learning_rate": 1.1897029826282602e-07, "loss": 0.5197, "step": 37155 }, { "epoch": 0.95, "grad_norm": 1.6142951250076294, "learning_rate": 1.1884266151830137e-07, "loss": 0.4682, "step": 37156 }, { "epoch": 0.95, "grad_norm": 2.9621338844299316, "learning_rate": 1.1871509286850546e-07, "loss": 0.5115, "step": 37157 }, { "epoch": 0.95, "grad_norm": 1.6163276433944702, "learning_rate": 1.185875923143176e-07, "loss": 0.4063, "step": 37158 }, { "epoch": 0.95, "grad_norm": 2.0892772674560547, "learning_rate": 1.1846015985661486e-07, "loss": 0.6911, "step": 37159 }, { "epoch": 0.95, "grad_norm": 1.40669846534729, "learning_rate": 1.1833279549627763e-07, "loss": 0.5204, "step": 37160 }, { "epoch": 0.95, "grad_norm": 2.1118874549865723, "learning_rate": 1.182054992341819e-07, "loss": 0.4382, "step": 37161 }, { "epoch": 0.95, "grad_norm": 1.4408437013626099, "learning_rate": 1.1807827107120584e-07, "loss": 0.564, "step": 37162 }, { "epoch": 0.95, "grad_norm": 1.5583583116531372, "learning_rate": 1.1795111100822543e-07, "loss": 0.4329, "step": 37163 }, { "epoch": 0.95, "grad_norm": 1.4587852954864502, "learning_rate": 1.1782401904611773e-07, "loss": 0.4102, "step": 37164 }, { "epoch": 0.95, "grad_norm": 1.7618675231933594, "learning_rate": 1.1769699518575873e-07, "loss": 0.5, "step": 37165 }, { "epoch": 0.95, "grad_norm": 11.927309036254883, "learning_rate": 1.1757003942802325e-07, "loss": 0.7269, "step": 37166 }, { "epoch": 0.95, "grad_norm": 1.0399866104125977, "learning_rate": 1.1744315177378507e-07, "loss": 0.3293, "step": 37167 }, { "epoch": 0.95, "grad_norm": 1.2113486528396606, "learning_rate": 1.1731633222392125e-07, "loss": 0.481, "step": 37168 }, { "epoch": 0.95, "grad_norm": 1.1250691413879395, "learning_rate": 1.1718958077930331e-07, "loss": 0.625, "step": 37169 }, { "epoch": 0.95, "grad_norm": 1.5286544561386108, "learning_rate": 1.1706289744080613e-07, "loss": 0.5967, "step": 37170 }, { "epoch": 0.95, "grad_norm": 1.6334428787231445, "learning_rate": 1.1693628220930342e-07, "loss": 0.5367, "step": 37171 }, { "epoch": 0.95, "grad_norm": 1.7092320919036865, "learning_rate": 1.1680973508566562e-07, "loss": 0.545, "step": 37172 }, { "epoch": 0.95, "grad_norm": 1.093270182609558, "learning_rate": 1.1668325607076647e-07, "loss": 0.3923, "step": 37173 }, { "epoch": 0.95, "grad_norm": 1.947024941444397, "learning_rate": 1.1655684516547638e-07, "loss": 0.6823, "step": 37174 }, { "epoch": 0.95, "grad_norm": 2.0076732635498047, "learning_rate": 1.16430502370668e-07, "loss": 0.4289, "step": 37175 }, { "epoch": 0.95, "grad_norm": 1.45268714427948, "learning_rate": 1.1630422768721061e-07, "loss": 0.4279, "step": 37176 }, { "epoch": 0.95, "grad_norm": 1.0943747758865356, "learning_rate": 1.1617802111597465e-07, "loss": 0.4867, "step": 37177 }, { "epoch": 0.95, "grad_norm": 1.7833056449890137, "learning_rate": 1.1605188265783052e-07, "loss": 0.4006, "step": 37178 }, { "epoch": 0.95, "grad_norm": 1.8207439184188843, "learning_rate": 1.1592581231364752e-07, "loss": 0.4275, "step": 37179 }, { "epoch": 0.95, "grad_norm": 2.90576171875, "learning_rate": 1.1579981008429275e-07, "loss": 0.4486, "step": 37180 }, { "epoch": 0.95, "grad_norm": 1.8060157299041748, "learning_rate": 1.1567387597063662e-07, "loss": 0.4535, "step": 37181 }, { "epoch": 0.95, "grad_norm": 1.4014099836349487, "learning_rate": 1.155480099735462e-07, "loss": 0.4603, "step": 37182 }, { "epoch": 0.95, "grad_norm": 1.4364434480667114, "learning_rate": 1.1542221209388971e-07, "loss": 0.5035, "step": 37183 }, { "epoch": 0.95, "grad_norm": 1.5119816064834595, "learning_rate": 1.1529648233253198e-07, "loss": 0.5052, "step": 37184 }, { "epoch": 0.95, "grad_norm": 1.2331615686416626, "learning_rate": 1.1517082069034125e-07, "loss": 0.6031, "step": 37185 }, { "epoch": 0.95, "grad_norm": 1.4079103469848633, "learning_rate": 1.1504522716818233e-07, "loss": 0.4354, "step": 37186 }, { "epoch": 0.95, "grad_norm": 1.3810429573059082, "learning_rate": 1.1491970176692125e-07, "loss": 0.4187, "step": 37187 }, { "epoch": 0.95, "grad_norm": 1.990036964416504, "learning_rate": 1.1479424448742504e-07, "loss": 0.3857, "step": 37188 }, { "epoch": 0.95, "grad_norm": 1.5127419233322144, "learning_rate": 1.1466885533055528e-07, "loss": 0.5648, "step": 37189 }, { "epoch": 0.95, "grad_norm": 2.199249505996704, "learning_rate": 1.145435342971768e-07, "loss": 0.6119, "step": 37190 }, { "epoch": 0.95, "grad_norm": 2.5165908336639404, "learning_rate": 1.1441828138815336e-07, "loss": 0.4885, "step": 37191 }, { "epoch": 0.95, "grad_norm": 0.9280326962471008, "learning_rate": 1.1429309660434872e-07, "loss": 0.6372, "step": 37192 }, { "epoch": 0.95, "grad_norm": 1.8027914762496948, "learning_rate": 1.1416797994662665e-07, "loss": 0.572, "step": 37193 }, { "epoch": 0.95, "grad_norm": 1.542722225189209, "learning_rate": 1.1404293141584643e-07, "loss": 0.5019, "step": 37194 }, { "epoch": 0.95, "grad_norm": 1.7501140832901, "learning_rate": 1.1391795101287184e-07, "loss": 0.4667, "step": 37195 }, { "epoch": 0.95, "grad_norm": 3.0227127075195312, "learning_rate": 1.137930387385644e-07, "loss": 0.4303, "step": 37196 }, { "epoch": 0.95, "grad_norm": 1.3252660036087036, "learning_rate": 1.1366819459378231e-07, "loss": 0.5413, "step": 37197 }, { "epoch": 0.95, "grad_norm": 1.613537311553955, "learning_rate": 1.1354341857938934e-07, "loss": 0.6002, "step": 37198 }, { "epoch": 0.95, "grad_norm": 7.680845737457275, "learning_rate": 1.1341871069624366e-07, "loss": 0.7779, "step": 37199 }, { "epoch": 0.95, "grad_norm": 4.768238067626953, "learning_rate": 1.1329407094520351e-07, "loss": 0.5995, "step": 37200 }, { "epoch": 0.95, "grad_norm": 1.1473101377487183, "learning_rate": 1.131694993271315e-07, "loss": 0.4536, "step": 37201 }, { "epoch": 0.95, "grad_norm": 1.1126359701156616, "learning_rate": 1.1304499584288254e-07, "loss": 0.5164, "step": 37202 }, { "epoch": 0.95, "grad_norm": 2.9449141025543213, "learning_rate": 1.129205604933159e-07, "loss": 0.7907, "step": 37203 }, { "epoch": 0.95, "grad_norm": 6.967599868774414, "learning_rate": 1.127961932792887e-07, "loss": 0.5493, "step": 37204 }, { "epoch": 0.95, "grad_norm": 2.4082655906677246, "learning_rate": 1.1267189420165913e-07, "loss": 0.5931, "step": 37205 }, { "epoch": 0.95, "grad_norm": 1.4813746213912964, "learning_rate": 1.1254766326128207e-07, "loss": 0.5366, "step": 37206 }, { "epoch": 0.95, "grad_norm": 0.9840043783187866, "learning_rate": 1.1242350045901573e-07, "loss": 0.4508, "step": 37207 }, { "epoch": 0.95, "grad_norm": 1.5093061923980713, "learning_rate": 1.1229940579571386e-07, "loss": 0.7344, "step": 37208 }, { "epoch": 0.95, "grad_norm": 1.3995673656463623, "learning_rate": 1.1217537927223243e-07, "loss": 0.571, "step": 37209 }, { "epoch": 0.95, "grad_norm": 1.165320873260498, "learning_rate": 1.1205142088942744e-07, "loss": 0.6449, "step": 37210 }, { "epoch": 0.95, "grad_norm": 1.2524690628051758, "learning_rate": 1.1192753064815043e-07, "loss": 0.5353, "step": 37211 }, { "epoch": 0.95, "grad_norm": 1.2041106224060059, "learning_rate": 1.1180370854925626e-07, "loss": 0.5766, "step": 37212 }, { "epoch": 0.95, "grad_norm": 1.564969539642334, "learning_rate": 1.1167995459359981e-07, "loss": 0.5328, "step": 37213 }, { "epoch": 0.95, "grad_norm": 1.4596253633499146, "learning_rate": 1.1155626878203152e-07, "loss": 0.5186, "step": 37214 }, { "epoch": 0.95, "grad_norm": 4.019292831420898, "learning_rate": 1.1143265111540625e-07, "loss": 0.3764, "step": 37215 }, { "epoch": 0.95, "grad_norm": 1.5988494157791138, "learning_rate": 1.1130910159457331e-07, "loss": 0.4501, "step": 37216 }, { "epoch": 0.95, "grad_norm": 2.1360762119293213, "learning_rate": 1.1118562022038537e-07, "loss": 0.6792, "step": 37217 }, { "epoch": 0.95, "grad_norm": 1.2335622310638428, "learning_rate": 1.1106220699369396e-07, "loss": 0.4442, "step": 37218 }, { "epoch": 0.95, "grad_norm": 1.256920576095581, "learning_rate": 1.1093886191534953e-07, "loss": 0.5072, "step": 37219 }, { "epoch": 0.95, "grad_norm": 1.4034227132797241, "learning_rate": 1.1081558498620138e-07, "loss": 0.4529, "step": 37220 }, { "epoch": 0.95, "grad_norm": 2.016655206680298, "learning_rate": 1.1069237620709771e-07, "loss": 0.4461, "step": 37221 }, { "epoch": 0.95, "grad_norm": 1.354062557220459, "learning_rate": 1.105692355788901e-07, "loss": 0.6289, "step": 37222 }, { "epoch": 0.95, "grad_norm": 2.409430742263794, "learning_rate": 1.1044616310242674e-07, "loss": 0.5822, "step": 37223 }, { "epoch": 0.95, "grad_norm": 8.919836044311523, "learning_rate": 1.103231587785547e-07, "loss": 0.4987, "step": 37224 }, { "epoch": 0.95, "grad_norm": 2.0224201679229736, "learning_rate": 1.1020022260812224e-07, "loss": 0.7266, "step": 37225 }, { "epoch": 0.95, "grad_norm": 1.332914113998413, "learning_rate": 1.1007735459197644e-07, "loss": 0.6153, "step": 37226 }, { "epoch": 0.95, "grad_norm": 5.1834893226623535, "learning_rate": 1.0995455473096329e-07, "loss": 0.6029, "step": 37227 }, { "epoch": 0.95, "grad_norm": 1.5731346607208252, "learning_rate": 1.0983182302593098e-07, "loss": 0.4725, "step": 37228 }, { "epoch": 0.95, "grad_norm": 1.4876502752304077, "learning_rate": 1.0970915947772331e-07, "loss": 0.5401, "step": 37229 }, { "epoch": 0.95, "grad_norm": 1.5161011219024658, "learning_rate": 1.0958656408718737e-07, "loss": 0.5758, "step": 37230 }, { "epoch": 0.95, "grad_norm": 11.885370254516602, "learning_rate": 1.0946403685516582e-07, "loss": 0.5729, "step": 37231 }, { "epoch": 0.95, "grad_norm": 1.5600528717041016, "learning_rate": 1.0934157778250466e-07, "loss": 0.6147, "step": 37232 }, { "epoch": 0.95, "grad_norm": 1.244913101196289, "learning_rate": 1.0921918687004762e-07, "loss": 0.4807, "step": 37233 }, { "epoch": 0.95, "grad_norm": 1.6494613885879517, "learning_rate": 1.090968641186374e-07, "loss": 0.6007, "step": 37234 }, { "epoch": 0.95, "grad_norm": 2.9194061756134033, "learning_rate": 1.0897460952911887e-07, "loss": 0.6173, "step": 37235 }, { "epoch": 0.95, "grad_norm": 0.96784508228302, "learning_rate": 1.0885242310233246e-07, "loss": 0.5512, "step": 37236 }, { "epoch": 0.95, "grad_norm": 1.4606349468231201, "learning_rate": 1.0873030483912084e-07, "loss": 0.3704, "step": 37237 }, { "epoch": 0.95, "grad_norm": 1.4642012119293213, "learning_rate": 1.0860825474032555e-07, "loss": 0.5698, "step": 37238 }, { "epoch": 0.95, "grad_norm": 1.2564761638641357, "learning_rate": 1.0848627280678814e-07, "loss": 0.5014, "step": 37239 }, { "epoch": 0.95, "grad_norm": 5.266284942626953, "learning_rate": 1.0836435903934906e-07, "loss": 0.612, "step": 37240 }, { "epoch": 0.95, "grad_norm": 1.302803635597229, "learning_rate": 1.0824251343884762e-07, "loss": 0.4775, "step": 37241 }, { "epoch": 0.95, "grad_norm": 2.359463930130005, "learning_rate": 1.0812073600612427e-07, "loss": 0.637, "step": 37242 }, { "epoch": 0.95, "grad_norm": 1.991769552230835, "learning_rate": 1.0799902674201835e-07, "loss": 0.4042, "step": 37243 }, { "epoch": 0.95, "grad_norm": 1.492695689201355, "learning_rate": 1.0787738564736805e-07, "loss": 0.4767, "step": 37244 }, { "epoch": 0.95, "grad_norm": 1.365016222000122, "learning_rate": 1.0775581272301161e-07, "loss": 0.338, "step": 37245 }, { "epoch": 0.95, "grad_norm": 1.4651896953582764, "learning_rate": 1.0763430796978835e-07, "loss": 0.5091, "step": 37246 }, { "epoch": 0.95, "grad_norm": 1.536821722984314, "learning_rate": 1.0751287138853428e-07, "loss": 0.4778, "step": 37247 }, { "epoch": 0.95, "grad_norm": 1.0749437808990479, "learning_rate": 1.0739150298008538e-07, "loss": 0.3779, "step": 37248 }, { "epoch": 0.95, "grad_norm": 1.597309947013855, "learning_rate": 1.0727020274527988e-07, "loss": 0.4846, "step": 37249 }, { "epoch": 0.95, "grad_norm": 0.9588381052017212, "learning_rate": 1.0714897068495377e-07, "loss": 0.5859, "step": 37250 }, { "epoch": 0.95, "grad_norm": 1.1440849304199219, "learning_rate": 1.0702780679994085e-07, "loss": 0.3699, "step": 37251 }, { "epoch": 0.95, "grad_norm": 2.4434497356414795, "learning_rate": 1.0690671109107598e-07, "loss": 0.5865, "step": 37252 }, { "epoch": 0.95, "grad_norm": 3.176201343536377, "learning_rate": 1.0678568355919517e-07, "loss": 0.4722, "step": 37253 }, { "epoch": 0.95, "grad_norm": 2.849668502807617, "learning_rate": 1.0666472420513219e-07, "loss": 0.556, "step": 37254 }, { "epoch": 0.95, "grad_norm": 1.3299897909164429, "learning_rate": 1.0654383302971971e-07, "loss": 0.5159, "step": 37255 }, { "epoch": 0.95, "grad_norm": 1.6978851556777954, "learning_rate": 1.0642301003379263e-07, "loss": 0.5442, "step": 37256 }, { "epoch": 0.95, "grad_norm": 2.2829947471618652, "learning_rate": 1.063022552181825e-07, "loss": 0.44, "step": 37257 }, { "epoch": 0.95, "grad_norm": 1.484570860862732, "learning_rate": 1.0618156858371976e-07, "loss": 0.5096, "step": 37258 }, { "epoch": 0.95, "grad_norm": 0.8287914991378784, "learning_rate": 1.0606095013123929e-07, "loss": 0.4088, "step": 37259 }, { "epoch": 0.95, "grad_norm": 1.1776313781738281, "learning_rate": 1.0594039986156934e-07, "loss": 0.4469, "step": 37260 }, { "epoch": 0.96, "grad_norm": 2.6408040523529053, "learning_rate": 1.0581991777554368e-07, "loss": 0.4451, "step": 37261 }, { "epoch": 0.96, "grad_norm": 4.216622352600098, "learning_rate": 1.0569950387398942e-07, "loss": 0.6713, "step": 37262 }, { "epoch": 0.96, "grad_norm": 1.822375774383545, "learning_rate": 1.0557915815773923e-07, "loss": 0.5187, "step": 37263 }, { "epoch": 0.96, "grad_norm": 1.1474498510360718, "learning_rate": 1.0545888062762132e-07, "loss": 0.5027, "step": 37264 }, { "epoch": 0.96, "grad_norm": 1.2365748882293701, "learning_rate": 1.0533867128446285e-07, "loss": 0.4858, "step": 37265 }, { "epoch": 0.96, "grad_norm": 1.402342677116394, "learning_rate": 1.0521853012909533e-07, "loss": 0.5961, "step": 37266 }, { "epoch": 0.96, "grad_norm": 1.4000641107559204, "learning_rate": 1.0509845716234479e-07, "loss": 0.5502, "step": 37267 }, { "epoch": 0.96, "grad_norm": 1.4567716121673584, "learning_rate": 1.0497845238503945e-07, "loss": 0.6756, "step": 37268 }, { "epoch": 0.96, "grad_norm": 2.1231493949890137, "learning_rate": 1.0485851579800532e-07, "loss": 0.639, "step": 37269 }, { "epoch": 0.96, "grad_norm": 2.0422871112823486, "learning_rate": 1.0473864740206952e-07, "loss": 0.4247, "step": 37270 }, { "epoch": 0.96, "grad_norm": 1.4038774967193604, "learning_rate": 1.0461884719805804e-07, "loss": 0.4656, "step": 37271 }, { "epoch": 0.96, "grad_norm": 1.9673658609390259, "learning_rate": 1.044991151867969e-07, "loss": 0.4816, "step": 37272 }, { "epoch": 0.96, "grad_norm": 1.9300761222839355, "learning_rate": 1.0437945136911098e-07, "loss": 0.6032, "step": 37273 }, { "epoch": 0.96, "grad_norm": 0.8710636496543884, "learning_rate": 1.042598557458252e-07, "loss": 0.537, "step": 37274 }, { "epoch": 0.96, "grad_norm": 1.5080063343048096, "learning_rate": 1.0414032831776333e-07, "loss": 0.4496, "step": 37275 }, { "epoch": 0.96, "grad_norm": 1.2041651010513306, "learning_rate": 1.0402086908574805e-07, "loss": 0.4597, "step": 37276 }, { "epoch": 0.96, "grad_norm": 1.0539910793304443, "learning_rate": 1.0390147805060425e-07, "loss": 0.4258, "step": 37277 }, { "epoch": 0.96, "grad_norm": 1.4843010902404785, "learning_rate": 1.037821552131546e-07, "loss": 0.4493, "step": 37278 }, { "epoch": 0.96, "grad_norm": 1.7260971069335938, "learning_rate": 1.0366290057422068e-07, "loss": 0.5818, "step": 37279 }, { "epoch": 0.96, "grad_norm": 0.9713280200958252, "learning_rate": 1.0354371413462405e-07, "loss": 0.54, "step": 37280 }, { "epoch": 0.96, "grad_norm": 6.207233905792236, "learning_rate": 1.0342459589518738e-07, "loss": 0.5125, "step": 37281 }, { "epoch": 0.96, "grad_norm": 2.8630759716033936, "learning_rate": 1.0330554585673002e-07, "loss": 0.7531, "step": 37282 }, { "epoch": 0.96, "grad_norm": 0.980772078037262, "learning_rate": 1.0318656402007355e-07, "loss": 0.5267, "step": 37283 }, { "epoch": 0.96, "grad_norm": 1.3729232549667358, "learning_rate": 1.0306765038603727e-07, "loss": 0.4783, "step": 37284 }, { "epoch": 0.96, "grad_norm": 1.064856767654419, "learning_rate": 1.0294880495544168e-07, "loss": 0.3505, "step": 37285 }, { "epoch": 0.96, "grad_norm": 3.5253005027770996, "learning_rate": 1.0283002772910389e-07, "loss": 0.6205, "step": 37286 }, { "epoch": 0.96, "grad_norm": 7.2371416091918945, "learning_rate": 1.0271131870784434e-07, "loss": 0.6936, "step": 37287 }, { "epoch": 0.96, "grad_norm": 1.1634734869003296, "learning_rate": 1.0259267789248017e-07, "loss": 0.4718, "step": 37288 }, { "epoch": 0.96, "grad_norm": 1.5827511548995972, "learning_rate": 1.024741052838285e-07, "loss": 0.5389, "step": 37289 }, { "epoch": 0.96, "grad_norm": 5.02066707611084, "learning_rate": 1.0235560088270758e-07, "loss": 0.516, "step": 37290 }, { "epoch": 0.96, "grad_norm": 1.0686497688293457, "learning_rate": 1.0223716468993339e-07, "loss": 0.3908, "step": 37291 }, { "epoch": 0.96, "grad_norm": 1.5612051486968994, "learning_rate": 1.0211879670632307e-07, "loss": 0.5103, "step": 37292 }, { "epoch": 0.96, "grad_norm": 15.028385162353516, "learning_rate": 1.0200049693268933e-07, "loss": 0.5092, "step": 37293 }, { "epoch": 0.96, "grad_norm": 6.220006942749023, "learning_rate": 1.0188226536985146e-07, "loss": 0.5379, "step": 37294 }, { "epoch": 0.96, "grad_norm": 2.0121967792510986, "learning_rate": 1.0176410201862219e-07, "loss": 0.5825, "step": 37295 }, { "epoch": 0.96, "grad_norm": 2.0095667839050293, "learning_rate": 1.016460068798153e-07, "loss": 0.4552, "step": 37296 }, { "epoch": 0.96, "grad_norm": 1.3593937158584595, "learning_rate": 1.0152797995424679e-07, "loss": 0.4828, "step": 37297 }, { "epoch": 0.96, "grad_norm": 1.7444473505020142, "learning_rate": 1.0141002124272825e-07, "loss": 0.52, "step": 37298 }, { "epoch": 0.96, "grad_norm": 6.691499710083008, "learning_rate": 1.0129213074607125e-07, "loss": 0.5492, "step": 37299 }, { "epoch": 0.96, "grad_norm": 0.9134505987167358, "learning_rate": 1.011743084650918e-07, "loss": 0.3759, "step": 37300 }, { "epoch": 0.96, "grad_norm": 2.9013516902923584, "learning_rate": 1.0105655440059926e-07, "loss": 0.5203, "step": 37301 }, { "epoch": 0.96, "grad_norm": 1.328246831893921, "learning_rate": 1.0093886855340629e-07, "loss": 0.4092, "step": 37302 }, { "epoch": 0.96, "grad_norm": 2.2983129024505615, "learning_rate": 1.0082125092432226e-07, "loss": 0.6476, "step": 37303 }, { "epoch": 0.96, "grad_norm": 2.567465305328369, "learning_rate": 1.0070370151415987e-07, "loss": 0.6119, "step": 37304 }, { "epoch": 0.96, "grad_norm": 2.317999839782715, "learning_rate": 1.0058622032372845e-07, "loss": 0.6667, "step": 37305 }, { "epoch": 0.96, "grad_norm": 1.2173404693603516, "learning_rate": 1.0046880735383623e-07, "loss": 0.4415, "step": 37306 }, { "epoch": 0.96, "grad_norm": 7.535525321960449, "learning_rate": 1.0035146260529483e-07, "loss": 0.6828, "step": 37307 }, { "epoch": 0.96, "grad_norm": 2.191556930541992, "learning_rate": 1.0023418607891022e-07, "loss": 0.5767, "step": 37308 }, { "epoch": 0.96, "grad_norm": 5.453535079956055, "learning_rate": 1.001169777754929e-07, "loss": 0.5893, "step": 37309 }, { "epoch": 0.96, "grad_norm": 3.4710254669189453, "learning_rate": 9.999983769584998e-08, "loss": 0.5771, "step": 37310 }, { "epoch": 0.96, "grad_norm": 1.438364863395691, "learning_rate": 9.988276584078749e-08, "loss": 0.5848, "step": 37311 }, { "epoch": 0.96, "grad_norm": 1.3544710874557495, "learning_rate": 9.976576221111368e-08, "loss": 0.3864, "step": 37312 }, { "epoch": 0.96, "grad_norm": 1.2993477582931519, "learning_rate": 9.964882680763343e-08, "loss": 0.5449, "step": 37313 }, { "epoch": 0.96, "grad_norm": 1.1567915678024292, "learning_rate": 9.953195963115502e-08, "loss": 0.5664, "step": 37314 }, { "epoch": 0.96, "grad_norm": 1.561508297920227, "learning_rate": 9.941516068248113e-08, "loss": 0.4056, "step": 37315 }, { "epoch": 0.96, "grad_norm": 9.075230598449707, "learning_rate": 9.929842996241779e-08, "loss": 0.6087, "step": 37316 }, { "epoch": 0.96, "grad_norm": 1.5290857553482056, "learning_rate": 9.918176747176989e-08, "loss": 0.5966, "step": 37317 }, { "epoch": 0.96, "grad_norm": 2.2103071212768555, "learning_rate": 9.906517321134013e-08, "loss": 0.7004, "step": 37318 }, { "epoch": 0.96, "grad_norm": 1.0092804431915283, "learning_rate": 9.894864718193454e-08, "loss": 0.49, "step": 37319 }, { "epoch": 0.96, "grad_norm": 1.2405495643615723, "learning_rate": 9.883218938435247e-08, "loss": 0.3426, "step": 37320 }, { "epoch": 0.96, "grad_norm": 1.2487033605575562, "learning_rate": 9.871579981939882e-08, "loss": 0.5333, "step": 37321 }, { "epoch": 0.96, "grad_norm": 8.865138053894043, "learning_rate": 9.859947848787632e-08, "loss": 0.3797, "step": 37322 }, { "epoch": 0.96, "grad_norm": 1.4968786239624023, "learning_rate": 9.84832253905843e-08, "loss": 0.5818, "step": 37323 }, { "epoch": 0.96, "grad_norm": 6.596480846405029, "learning_rate": 9.836704052832658e-08, "loss": 0.4619, "step": 37324 }, { "epoch": 0.96, "grad_norm": 1.4496315717697144, "learning_rate": 9.825092390190138e-08, "loss": 0.5795, "step": 37325 }, { "epoch": 0.96, "grad_norm": 1.4089487791061401, "learning_rate": 9.813487551211032e-08, "loss": 0.2889, "step": 37326 }, { "epoch": 0.96, "grad_norm": 4.433660984039307, "learning_rate": 9.801889535975274e-08, "loss": 0.5656, "step": 37327 }, { "epoch": 0.96, "grad_norm": 1.2080693244934082, "learning_rate": 9.790298344562798e-08, "loss": 0.5326, "step": 37328 }, { "epoch": 0.96, "grad_norm": 2.0340006351470947, "learning_rate": 9.778713977053433e-08, "loss": 0.6397, "step": 37329 }, { "epoch": 0.96, "grad_norm": 1.2410320043563843, "learning_rate": 9.767136433527003e-08, "loss": 0.4756, "step": 37330 }, { "epoch": 0.96, "grad_norm": 1.4444459676742554, "learning_rate": 9.755565714063442e-08, "loss": 0.3988, "step": 37331 }, { "epoch": 0.96, "grad_norm": 2.4881246089935303, "learning_rate": 9.744001818742355e-08, "loss": 0.5011, "step": 37332 }, { "epoch": 0.96, "grad_norm": 1.5584977865219116, "learning_rate": 9.732444747643455e-08, "loss": 0.3972, "step": 37333 }, { "epoch": 0.96, "grad_norm": 1.4078408479690552, "learning_rate": 9.720894500846457e-08, "loss": 0.5748, "step": 37334 }, { "epoch": 0.96, "grad_norm": 1.53075110912323, "learning_rate": 9.709351078430851e-08, "loss": 0.5999, "step": 37335 }, { "epoch": 0.96, "grad_norm": 1.417804479598999, "learning_rate": 9.697814480476242e-08, "loss": 0.6158, "step": 37336 }, { "epoch": 0.96, "grad_norm": 1.5917202234268188, "learning_rate": 9.686284707062121e-08, "loss": 0.5926, "step": 37337 }, { "epoch": 0.96, "grad_norm": 1.4005975723266602, "learning_rate": 9.674761758267982e-08, "loss": 0.4737, "step": 37338 }, { "epoch": 0.96, "grad_norm": 1.593673586845398, "learning_rate": 9.663245634173202e-08, "loss": 0.4289, "step": 37339 }, { "epoch": 0.96, "grad_norm": 2.0712199211120605, "learning_rate": 9.651736334857053e-08, "loss": 0.6398, "step": 37340 }, { "epoch": 0.96, "grad_norm": 1.4511455297470093, "learning_rate": 9.640233860399029e-08, "loss": 0.4486, "step": 37341 }, { "epoch": 0.96, "grad_norm": 1.4340823888778687, "learning_rate": 9.628738210878286e-08, "loss": 0.5487, "step": 37342 }, { "epoch": 0.96, "grad_norm": 4.918682098388672, "learning_rate": 9.617249386374095e-08, "loss": 0.5762, "step": 37343 }, { "epoch": 0.96, "grad_norm": 1.9494880437850952, "learning_rate": 9.605767386965503e-08, "loss": 0.5932, "step": 37344 }, { "epoch": 0.96, "grad_norm": 3.9781641960144043, "learning_rate": 9.594292212731782e-08, "loss": 0.4867, "step": 37345 }, { "epoch": 0.96, "grad_norm": 3.5449764728546143, "learning_rate": 9.582823863752089e-08, "loss": 0.6869, "step": 37346 }, { "epoch": 0.96, "grad_norm": 7.497952938079834, "learning_rate": 9.571362340105139e-08, "loss": 0.6855, "step": 37347 }, { "epoch": 0.96, "grad_norm": 1.839568853378296, "learning_rate": 9.559907641870204e-08, "loss": 0.4238, "step": 37348 }, { "epoch": 0.96, "grad_norm": 1.0366973876953125, "learning_rate": 9.548459769126218e-08, "loss": 0.542, "step": 37349 }, { "epoch": 0.96, "grad_norm": 1.516101598739624, "learning_rate": 9.537018721951897e-08, "loss": 0.5194, "step": 37350 }, { "epoch": 0.96, "grad_norm": 1.184377670288086, "learning_rate": 9.525584500426066e-08, "loss": 0.4861, "step": 37351 }, { "epoch": 0.96, "grad_norm": 0.9518228769302368, "learning_rate": 9.514157104627775e-08, "loss": 0.4514, "step": 37352 }, { "epoch": 0.96, "grad_norm": 1.1096227169036865, "learning_rate": 9.502736534635625e-08, "loss": 0.4762, "step": 37353 }, { "epoch": 0.96, "grad_norm": 1.6649143695831299, "learning_rate": 9.491322790528224e-08, "loss": 0.4147, "step": 37354 }, { "epoch": 0.96, "grad_norm": 3.000474691390991, "learning_rate": 9.479915872384393e-08, "loss": 0.4364, "step": 37355 }, { "epoch": 0.96, "grad_norm": 1.9812242984771729, "learning_rate": 9.468515780282739e-08, "loss": 0.5339, "step": 37356 }, { "epoch": 0.96, "grad_norm": 0.9005905389785767, "learning_rate": 9.457122514301642e-08, "loss": 0.3877, "step": 37357 }, { "epoch": 0.96, "grad_norm": 1.5418429374694824, "learning_rate": 9.445736074519818e-08, "loss": 0.4095, "step": 37358 }, { "epoch": 0.96, "grad_norm": 15.340201377868652, "learning_rate": 9.434356461015649e-08, "loss": 0.5453, "step": 37359 }, { "epoch": 0.96, "grad_norm": 2.011382579803467, "learning_rate": 9.422983673867514e-08, "loss": 0.5576, "step": 37360 }, { "epoch": 0.96, "grad_norm": 1.1924241781234741, "learning_rate": 9.411617713153798e-08, "loss": 0.4061, "step": 37361 }, { "epoch": 0.96, "grad_norm": 1.2362701892852783, "learning_rate": 9.400258578952992e-08, "loss": 0.5704, "step": 37362 }, { "epoch": 0.96, "grad_norm": 1.677323818206787, "learning_rate": 9.388906271343146e-08, "loss": 0.3525, "step": 37363 }, { "epoch": 0.96, "grad_norm": 5.812562942504883, "learning_rate": 9.37756079040264e-08, "loss": 0.7699, "step": 37364 }, { "epoch": 0.96, "grad_norm": 1.2193127870559692, "learning_rate": 9.366222136209634e-08, "loss": 0.3857, "step": 37365 }, { "epoch": 0.96, "grad_norm": 1.0497812032699585, "learning_rate": 9.354890308842179e-08, "loss": 0.5107, "step": 37366 }, { "epoch": 0.96, "grad_norm": 1.471526861190796, "learning_rate": 9.343565308378544e-08, "loss": 0.6696, "step": 37367 }, { "epoch": 0.96, "grad_norm": 1.2799898386001587, "learning_rate": 9.332247134896555e-08, "loss": 0.5772, "step": 37368 }, { "epoch": 0.96, "grad_norm": 1.2445462942123413, "learning_rate": 9.320935788474372e-08, "loss": 0.3312, "step": 37369 }, { "epoch": 0.96, "grad_norm": 1.1667068004608154, "learning_rate": 9.309631269189933e-08, "loss": 0.3969, "step": 37370 }, { "epoch": 0.96, "grad_norm": 1.1305450201034546, "learning_rate": 9.298333577120955e-08, "loss": 0.5553, "step": 37371 }, { "epoch": 0.96, "grad_norm": 1.6274312734603882, "learning_rate": 9.287042712345595e-08, "loss": 0.5052, "step": 37372 }, { "epoch": 0.96, "grad_norm": 1.84498131275177, "learning_rate": 9.275758674941459e-08, "loss": 0.4116, "step": 37373 }, { "epoch": 0.96, "grad_norm": 1.4955490827560425, "learning_rate": 9.264481464986375e-08, "loss": 0.7383, "step": 37374 }, { "epoch": 0.96, "grad_norm": 1.633466124534607, "learning_rate": 9.253211082558055e-08, "loss": 0.4313, "step": 37375 }, { "epoch": 0.96, "grad_norm": 1.3350436687469482, "learning_rate": 9.241947527734108e-08, "loss": 0.4987, "step": 37376 }, { "epoch": 0.96, "grad_norm": 1.443699598312378, "learning_rate": 9.230690800592246e-08, "loss": 0.4934, "step": 37377 }, { "epoch": 0.96, "grad_norm": 5.161599636077881, "learning_rate": 9.219440901209964e-08, "loss": 0.6516, "step": 37378 }, { "epoch": 0.96, "grad_norm": 13.174117088317871, "learning_rate": 9.208197829664866e-08, "loss": 0.5603, "step": 37379 }, { "epoch": 0.96, "grad_norm": 1.656211018562317, "learning_rate": 9.196961586034337e-08, "loss": 0.525, "step": 37380 }, { "epoch": 0.96, "grad_norm": 1.1211940050125122, "learning_rate": 9.185732170395867e-08, "loss": 0.4334, "step": 37381 }, { "epoch": 0.96, "grad_norm": 3.2528438568115234, "learning_rate": 9.174509582826841e-08, "loss": 0.6438, "step": 37382 }, { "epoch": 0.96, "grad_norm": 1.2705763578414917, "learning_rate": 9.16329382340464e-08, "loss": 0.4329, "step": 37383 }, { "epoch": 0.96, "grad_norm": 0.9415048360824585, "learning_rate": 9.152084892206537e-08, "loss": 0.4568, "step": 37384 }, { "epoch": 0.96, "grad_norm": 4.664527416229248, "learning_rate": 9.14088278930969e-08, "loss": 0.3775, "step": 37385 }, { "epoch": 0.96, "grad_norm": 1.3236886262893677, "learning_rate": 9.129687514791375e-08, "loss": 0.5116, "step": 37386 }, { "epoch": 0.96, "grad_norm": 1.4378591775894165, "learning_rate": 9.118499068728748e-08, "loss": 0.5462, "step": 37387 }, { "epoch": 0.96, "grad_norm": 1.0797902345657349, "learning_rate": 9.107317451198861e-08, "loss": 0.422, "step": 37388 }, { "epoch": 0.96, "grad_norm": 1.3084638118743896, "learning_rate": 9.096142662278873e-08, "loss": 0.493, "step": 37389 }, { "epoch": 0.96, "grad_norm": 1.5521320104599, "learning_rate": 9.084974702045613e-08, "loss": 0.5815, "step": 37390 }, { "epoch": 0.96, "grad_norm": 1.8115005493164062, "learning_rate": 9.07381357057624e-08, "loss": 0.6749, "step": 37391 }, { "epoch": 0.96, "grad_norm": 1.508518099784851, "learning_rate": 9.062659267947582e-08, "loss": 0.4982, "step": 37392 }, { "epoch": 0.96, "grad_norm": 3.3921618461608887, "learning_rate": 9.051511794236467e-08, "loss": 0.6454, "step": 37393 }, { "epoch": 0.96, "grad_norm": 3.0748398303985596, "learning_rate": 9.040371149519833e-08, "loss": 0.5723, "step": 37394 }, { "epoch": 0.96, "grad_norm": 1.537967562675476, "learning_rate": 9.029237333874285e-08, "loss": 0.476, "step": 37395 }, { "epoch": 0.96, "grad_norm": 1.147106409072876, "learning_rate": 9.018110347376651e-08, "loss": 0.58, "step": 37396 }, { "epoch": 0.96, "grad_norm": 1.0515429973602295, "learning_rate": 9.006990190103648e-08, "loss": 0.6902, "step": 37397 }, { "epoch": 0.96, "grad_norm": 1.4381768703460693, "learning_rate": 8.995876862131881e-08, "loss": 0.6293, "step": 37398 }, { "epoch": 0.96, "grad_norm": 1.9433298110961914, "learning_rate": 8.984770363537843e-08, "loss": 0.5398, "step": 37399 }, { "epoch": 0.96, "grad_norm": 6.884991645812988, "learning_rate": 8.973670694398251e-08, "loss": 0.4054, "step": 37400 }, { "epoch": 0.96, "grad_norm": 6.668463706970215, "learning_rate": 8.96257785478949e-08, "loss": 0.5703, "step": 37401 }, { "epoch": 0.96, "grad_norm": 1.1935471296310425, "learning_rate": 8.951491844787829e-08, "loss": 0.3993, "step": 37402 }, { "epoch": 0.96, "grad_norm": 1.236729621887207, "learning_rate": 8.9404126644701e-08, "loss": 0.6068, "step": 37403 }, { "epoch": 0.96, "grad_norm": 1.338937520980835, "learning_rate": 8.929340313912238e-08, "loss": 0.499, "step": 37404 }, { "epoch": 0.96, "grad_norm": 2.174713373184204, "learning_rate": 8.91827479319074e-08, "loss": 0.5274, "step": 37405 }, { "epoch": 0.96, "grad_norm": 0.9995449781417847, "learning_rate": 8.907216102381877e-08, "loss": 0.6538, "step": 37406 }, { "epoch": 0.96, "grad_norm": 15.382646560668945, "learning_rate": 8.896164241561811e-08, "loss": 0.6599, "step": 37407 }, { "epoch": 0.96, "grad_norm": 1.6541029214859009, "learning_rate": 8.885119210806703e-08, "loss": 0.4891, "step": 37408 }, { "epoch": 0.96, "grad_norm": 1.2541807889938354, "learning_rate": 8.874081010192603e-08, "loss": 0.6101, "step": 37409 }, { "epoch": 0.96, "grad_norm": 1.4696786403656006, "learning_rate": 8.863049639795784e-08, "loss": 0.5582, "step": 37410 }, { "epoch": 0.96, "grad_norm": 1.2170741558074951, "learning_rate": 8.852025099692074e-08, "loss": 0.4802, "step": 37411 }, { "epoch": 0.96, "grad_norm": 1.585852026939392, "learning_rate": 8.841007389957412e-08, "loss": 0.4698, "step": 37412 }, { "epoch": 0.96, "grad_norm": 1.9071930646896362, "learning_rate": 8.82999651066796e-08, "loss": 0.5048, "step": 37413 }, { "epoch": 0.96, "grad_norm": 0.9913286566734314, "learning_rate": 8.818992461899323e-08, "loss": 0.4928, "step": 37414 }, { "epoch": 0.96, "grad_norm": 3.0246171951293945, "learning_rate": 8.807995243727663e-08, "loss": 0.5124, "step": 37415 }, { "epoch": 0.96, "grad_norm": 1.7077171802520752, "learning_rate": 8.797004856228363e-08, "loss": 0.5537, "step": 37416 }, { "epoch": 0.96, "grad_norm": 2.8478055000305176, "learning_rate": 8.786021299477476e-08, "loss": 0.441, "step": 37417 }, { "epoch": 0.96, "grad_norm": 1.9752665758132935, "learning_rate": 8.775044573550607e-08, "loss": 0.5547, "step": 37418 }, { "epoch": 0.96, "grad_norm": 1.3240669965744019, "learning_rate": 8.764074678523249e-08, "loss": 0.4465, "step": 37419 }, { "epoch": 0.96, "grad_norm": 1.6611270904541016, "learning_rate": 8.753111614471344e-08, "loss": 0.4047, "step": 37420 }, { "epoch": 0.96, "grad_norm": 1.4424782991409302, "learning_rate": 8.742155381470052e-08, "loss": 0.3771, "step": 37421 }, { "epoch": 0.96, "grad_norm": 2.454263925552368, "learning_rate": 8.731205979595203e-08, "loss": 0.5887, "step": 37422 }, { "epoch": 0.96, "grad_norm": 1.6004173755645752, "learning_rate": 8.720263408921958e-08, "loss": 0.5182, "step": 37423 }, { "epoch": 0.96, "grad_norm": 1.1429702043533325, "learning_rate": 8.709327669525925e-08, "loss": 0.4915, "step": 37424 }, { "epoch": 0.96, "grad_norm": 1.1593905687332153, "learning_rate": 8.698398761482485e-08, "loss": 0.4925, "step": 37425 }, { "epoch": 0.96, "grad_norm": 1.2228007316589355, "learning_rate": 8.687476684866803e-08, "loss": 0.5675, "step": 37426 }, { "epoch": 0.96, "grad_norm": 1.4914478063583374, "learning_rate": 8.67656143975426e-08, "loss": 0.5871, "step": 37427 }, { "epoch": 0.96, "grad_norm": 1.8047243356704712, "learning_rate": 8.665653026220133e-08, "loss": 0.5693, "step": 37428 }, { "epoch": 0.96, "grad_norm": 1.8784782886505127, "learning_rate": 8.65475144433936e-08, "loss": 0.4641, "step": 37429 }, { "epoch": 0.96, "grad_norm": 9.30095100402832, "learning_rate": 8.643856694187214e-08, "loss": 0.6618, "step": 37430 }, { "epoch": 0.96, "grad_norm": 5.267217636108398, "learning_rate": 8.632968775838858e-08, "loss": 0.4924, "step": 37431 }, { "epoch": 0.96, "grad_norm": 22.84168243408203, "learning_rate": 8.62208768936923e-08, "loss": 0.6597, "step": 37432 }, { "epoch": 0.96, "grad_norm": 1.7037264108657837, "learning_rate": 8.611213434853383e-08, "loss": 0.4569, "step": 37433 }, { "epoch": 0.96, "grad_norm": 1.4074203968048096, "learning_rate": 8.600346012366146e-08, "loss": 0.5899, "step": 37434 }, { "epoch": 0.96, "grad_norm": 1.0531808137893677, "learning_rate": 8.589485421982457e-08, "loss": 0.4063, "step": 37435 }, { "epoch": 0.96, "grad_norm": 1.6319841146469116, "learning_rate": 8.578631663777148e-08, "loss": 0.4802, "step": 37436 }, { "epoch": 0.96, "grad_norm": 2.123530149459839, "learning_rate": 8.567784737825047e-08, "loss": 0.4968, "step": 37437 }, { "epoch": 0.96, "grad_norm": 6.115634441375732, "learning_rate": 8.556944644200982e-08, "loss": 0.4947, "step": 37438 }, { "epoch": 0.96, "grad_norm": 1.6200885772705078, "learning_rate": 8.546111382979449e-08, "loss": 0.3995, "step": 37439 }, { "epoch": 0.96, "grad_norm": 1.1258691549301147, "learning_rate": 8.535284954235279e-08, "loss": 0.5021, "step": 37440 }, { "epoch": 0.96, "grad_norm": 1.4030674695968628, "learning_rate": 8.524465358043077e-08, "loss": 0.6697, "step": 37441 }, { "epoch": 0.96, "grad_norm": 1.8613280057907104, "learning_rate": 8.513652594477229e-08, "loss": 0.5793, "step": 37442 }, { "epoch": 0.96, "grad_norm": 1.222998023033142, "learning_rate": 8.502846663612452e-08, "loss": 0.5519, "step": 37443 }, { "epoch": 0.96, "grad_norm": 2.0516743659973145, "learning_rate": 8.492047565523132e-08, "loss": 0.3916, "step": 37444 }, { "epoch": 0.96, "grad_norm": 1.6217213869094849, "learning_rate": 8.481255300283764e-08, "loss": 0.5445, "step": 37445 }, { "epoch": 0.96, "grad_norm": 1.5273053646087646, "learning_rate": 8.470469867968512e-08, "loss": 0.6426, "step": 37446 }, { "epoch": 0.96, "grad_norm": 1.004762053489685, "learning_rate": 8.45969126865187e-08, "loss": 0.4768, "step": 37447 }, { "epoch": 0.96, "grad_norm": 8.307868957519531, "learning_rate": 8.448919502408226e-08, "loss": 0.6169, "step": 37448 }, { "epoch": 0.96, "grad_norm": 1.246620535850525, "learning_rate": 8.438154569311519e-08, "loss": 0.4636, "step": 37449 }, { "epoch": 0.96, "grad_norm": 1.3237229585647583, "learning_rate": 8.427396469436133e-08, "loss": 0.5193, "step": 37450 }, { "epoch": 0.96, "grad_norm": 0.8120094537734985, "learning_rate": 8.416645202856233e-08, "loss": 0.3806, "step": 37451 }, { "epoch": 0.96, "grad_norm": 1.4132132530212402, "learning_rate": 8.405900769645758e-08, "loss": 0.4617, "step": 37452 }, { "epoch": 0.96, "grad_norm": 2.8963494300842285, "learning_rate": 8.395163169878873e-08, "loss": 0.754, "step": 37453 }, { "epoch": 0.96, "grad_norm": 1.1002593040466309, "learning_rate": 8.384432403629517e-08, "loss": 0.3991, "step": 37454 }, { "epoch": 0.96, "grad_norm": 1.113285779953003, "learning_rate": 8.373708470971631e-08, "loss": 0.472, "step": 37455 }, { "epoch": 0.96, "grad_norm": 4.531806468963623, "learning_rate": 8.362991371979157e-08, "loss": 0.5767, "step": 37456 }, { "epoch": 0.96, "grad_norm": 1.3241902589797974, "learning_rate": 8.352281106726035e-08, "loss": 0.5507, "step": 37457 }, { "epoch": 0.96, "grad_norm": 1.1299328804016113, "learning_rate": 8.341577675285872e-08, "loss": 0.4287, "step": 37458 }, { "epoch": 0.96, "grad_norm": 2.140723943710327, "learning_rate": 8.3308810777325e-08, "loss": 0.6668, "step": 37459 }, { "epoch": 0.96, "grad_norm": 2.139106512069702, "learning_rate": 8.320191314139747e-08, "loss": 0.5887, "step": 37460 }, { "epoch": 0.96, "grad_norm": 17.239295959472656, "learning_rate": 8.309508384581221e-08, "loss": 0.6976, "step": 37461 }, { "epoch": 0.96, "grad_norm": 3.3339717388153076, "learning_rate": 8.298832289130531e-08, "loss": 0.6132, "step": 37462 }, { "epoch": 0.96, "grad_norm": 1.332743525505066, "learning_rate": 8.288163027861174e-08, "loss": 0.5386, "step": 37463 }, { "epoch": 0.96, "grad_norm": 1.1793370246887207, "learning_rate": 8.277500600846756e-08, "loss": 0.4115, "step": 37464 }, { "epoch": 0.96, "grad_norm": 1.0383299589157104, "learning_rate": 8.266845008160773e-08, "loss": 0.6028, "step": 37465 }, { "epoch": 0.96, "grad_norm": 1.211470365524292, "learning_rate": 8.256196249876613e-08, "loss": 0.5119, "step": 37466 }, { "epoch": 0.96, "grad_norm": 1.0530498027801514, "learning_rate": 8.245554326067662e-08, "loss": 0.6354, "step": 37467 }, { "epoch": 0.96, "grad_norm": 2.4763824939727783, "learning_rate": 8.234919236807303e-08, "loss": 0.5599, "step": 37468 }, { "epoch": 0.96, "grad_norm": 1.0601767301559448, "learning_rate": 8.224290982168814e-08, "loss": 0.4674, "step": 37469 }, { "epoch": 0.96, "grad_norm": 20.394054412841797, "learning_rate": 8.213669562225357e-08, "loss": 0.5173, "step": 37470 }, { "epoch": 0.96, "grad_norm": 1.6574500799179077, "learning_rate": 8.203054977050207e-08, "loss": 0.5982, "step": 37471 }, { "epoch": 0.96, "grad_norm": 2.043956995010376, "learning_rate": 8.192447226716416e-08, "loss": 0.5033, "step": 37472 }, { "epoch": 0.96, "grad_norm": 1.1589888334274292, "learning_rate": 8.181846311297259e-08, "loss": 0.3923, "step": 37473 }, { "epoch": 0.96, "grad_norm": 1.216373324394226, "learning_rate": 8.171252230865678e-08, "loss": 0.434, "step": 37474 }, { "epoch": 0.96, "grad_norm": 1.3901491165161133, "learning_rate": 8.160664985494726e-08, "loss": 0.5227, "step": 37475 }, { "epoch": 0.96, "grad_norm": 1.6978440284729004, "learning_rate": 8.150084575257234e-08, "loss": 0.5786, "step": 37476 }, { "epoch": 0.96, "grad_norm": 3.6984500885009766, "learning_rate": 8.139511000226363e-08, "loss": 0.5998, "step": 37477 }, { "epoch": 0.96, "grad_norm": 2.5990352630615234, "learning_rate": 8.128944260474724e-08, "loss": 0.6578, "step": 37478 }, { "epoch": 0.96, "grad_norm": 1.272835612297058, "learning_rate": 8.118384356075371e-08, "loss": 0.491, "step": 37479 }, { "epoch": 0.96, "grad_norm": 5.048050403594971, "learning_rate": 8.107831287100798e-08, "loss": 0.4689, "step": 37480 }, { "epoch": 0.96, "grad_norm": 7.801594257354736, "learning_rate": 8.097285053623949e-08, "loss": 0.6033, "step": 37481 }, { "epoch": 0.96, "grad_norm": 1.4094007015228271, "learning_rate": 8.086745655717432e-08, "loss": 0.5787, "step": 37482 }, { "epoch": 0.96, "grad_norm": 10.24974536895752, "learning_rate": 8.076213093453856e-08, "loss": 0.6901, "step": 37483 }, { "epoch": 0.96, "grad_norm": 1.438733696937561, "learning_rate": 8.065687366905938e-08, "loss": 0.6362, "step": 37484 }, { "epoch": 0.96, "grad_norm": 1.1530263423919678, "learning_rate": 8.055168476145958e-08, "loss": 0.4693, "step": 37485 }, { "epoch": 0.96, "grad_norm": 1.4777147769927979, "learning_rate": 8.044656421246744e-08, "loss": 0.4611, "step": 37486 }, { "epoch": 0.96, "grad_norm": 1.263472557067871, "learning_rate": 8.034151202280349e-08, "loss": 0.4109, "step": 37487 }, { "epoch": 0.96, "grad_norm": 1.484673023223877, "learning_rate": 8.023652819319494e-08, "loss": 0.4826, "step": 37488 }, { "epoch": 0.96, "grad_norm": 3.9041857719421387, "learning_rate": 8.013161272436343e-08, "loss": 0.5229, "step": 37489 }, { "epoch": 0.96, "grad_norm": 1.3199652433395386, "learning_rate": 8.002676561703282e-08, "loss": 0.4842, "step": 37490 }, { "epoch": 0.96, "grad_norm": 1.6866220235824585, "learning_rate": 7.992198687192588e-08, "loss": 0.535, "step": 37491 }, { "epoch": 0.96, "grad_norm": 1.0341503620147705, "learning_rate": 7.981727648976423e-08, "loss": 0.4405, "step": 37492 }, { "epoch": 0.96, "grad_norm": 1.4392223358154297, "learning_rate": 7.971263447126842e-08, "loss": 0.4591, "step": 37493 }, { "epoch": 0.96, "grad_norm": 1.3579853773117065, "learning_rate": 7.960806081716122e-08, "loss": 0.5119, "step": 37494 }, { "epoch": 0.96, "grad_norm": 4.410475254058838, "learning_rate": 7.950355552816203e-08, "loss": 0.4331, "step": 37495 }, { "epoch": 0.96, "grad_norm": 1.2812381982803345, "learning_rate": 7.93991186049925e-08, "loss": 0.5726, "step": 37496 }, { "epoch": 0.96, "grad_norm": 3.613962411880493, "learning_rate": 7.929475004837094e-08, "loss": 0.7342, "step": 37497 }, { "epoch": 0.96, "grad_norm": 2.4788613319396973, "learning_rate": 7.919044985901792e-08, "loss": 0.6145, "step": 37498 }, { "epoch": 0.96, "grad_norm": 1.394203782081604, "learning_rate": 7.90862180376506e-08, "loss": 0.5825, "step": 37499 }, { "epoch": 0.96, "grad_norm": 1.2832157611846924, "learning_rate": 7.898205458498843e-08, "loss": 0.583, "step": 37500 }, { "epoch": 0.96, "grad_norm": 1.0662604570388794, "learning_rate": 7.88779595017497e-08, "loss": 0.4757, "step": 37501 }, { "epoch": 0.96, "grad_norm": 1.3154118061065674, "learning_rate": 7.877393278865053e-08, "loss": 0.3776, "step": 37502 }, { "epoch": 0.96, "grad_norm": 1.3571728467941284, "learning_rate": 7.866997444640811e-08, "loss": 0.3706, "step": 37503 }, { "epoch": 0.96, "grad_norm": 1.3439949750900269, "learning_rate": 7.856608447573855e-08, "loss": 0.4387, "step": 37504 }, { "epoch": 0.96, "grad_norm": 11.620729446411133, "learning_rate": 7.846226287735903e-08, "loss": 0.6029, "step": 37505 }, { "epoch": 0.96, "grad_norm": 1.2745648622512817, "learning_rate": 7.835850965198455e-08, "loss": 0.4928, "step": 37506 }, { "epoch": 0.96, "grad_norm": 1.2947924137115479, "learning_rate": 7.825482480032897e-08, "loss": 0.4159, "step": 37507 }, { "epoch": 0.96, "grad_norm": 1.592307448387146, "learning_rate": 7.815120832310729e-08, "loss": 0.5991, "step": 37508 }, { "epoch": 0.96, "grad_norm": 1.541686773300171, "learning_rate": 7.80476602210356e-08, "loss": 0.4929, "step": 37509 }, { "epoch": 0.96, "grad_norm": 1.379881501197815, "learning_rate": 7.794418049482444e-08, "loss": 0.536, "step": 37510 }, { "epoch": 0.96, "grad_norm": 1.8450802564620972, "learning_rate": 7.784076914518879e-08, "loss": 0.4797, "step": 37511 }, { "epoch": 0.96, "grad_norm": 1.6108777523040771, "learning_rate": 7.773742617284141e-08, "loss": 0.4464, "step": 37512 }, { "epoch": 0.96, "grad_norm": 6.436851501464844, "learning_rate": 7.763415157849396e-08, "loss": 0.5852, "step": 37513 }, { "epoch": 0.96, "grad_norm": 0.9628443717956543, "learning_rate": 7.7530945362857e-08, "loss": 0.5339, "step": 37514 }, { "epoch": 0.96, "grad_norm": 1.4908643960952759, "learning_rate": 7.742780752664325e-08, "loss": 0.6356, "step": 37515 }, { "epoch": 0.96, "grad_norm": 1.0176982879638672, "learning_rate": 7.732473807056329e-08, "loss": 0.4979, "step": 37516 }, { "epoch": 0.96, "grad_norm": 2.1432056427001953, "learning_rate": 7.722173699532654e-08, "loss": 0.5526, "step": 37517 }, { "epoch": 0.96, "grad_norm": 4.510316371917725, "learning_rate": 7.711880430164464e-08, "loss": 0.3518, "step": 37518 }, { "epoch": 0.96, "grad_norm": 2.0877511501312256, "learning_rate": 7.701593999022594e-08, "loss": 0.533, "step": 37519 }, { "epoch": 0.96, "grad_norm": 1.265840768814087, "learning_rate": 7.691314406177763e-08, "loss": 0.55, "step": 37520 }, { "epoch": 0.96, "grad_norm": 1.7531465291976929, "learning_rate": 7.681041651701026e-08, "loss": 0.4816, "step": 37521 }, { "epoch": 0.96, "grad_norm": 1.830739140510559, "learning_rate": 7.670775735663216e-08, "loss": 0.3619, "step": 37522 }, { "epoch": 0.96, "grad_norm": 1.326495885848999, "learning_rate": 7.660516658134831e-08, "loss": 0.4714, "step": 37523 }, { "epoch": 0.96, "grad_norm": 5.920604228973389, "learning_rate": 7.650264419186814e-08, "loss": 0.6125, "step": 37524 }, { "epoch": 0.96, "grad_norm": 1.570713758468628, "learning_rate": 7.640019018889666e-08, "loss": 0.5582, "step": 37525 }, { "epoch": 0.96, "grad_norm": 1.4051382541656494, "learning_rate": 7.629780457314106e-08, "loss": 0.4551, "step": 37526 }, { "epoch": 0.96, "grad_norm": 2.0681045055389404, "learning_rate": 7.619548734530636e-08, "loss": 0.5704, "step": 37527 }, { "epoch": 0.96, "grad_norm": 8.880653381347656, "learning_rate": 7.60932385060964e-08, "loss": 0.5442, "step": 37528 }, { "epoch": 0.96, "grad_norm": 1.548406720161438, "learning_rate": 7.599105805621843e-08, "loss": 0.5903, "step": 37529 }, { "epoch": 0.96, "grad_norm": 1.3475768566131592, "learning_rate": 7.588894599637409e-08, "loss": 0.6143, "step": 37530 }, { "epoch": 0.96, "grad_norm": 1.2364563941955566, "learning_rate": 7.578690232726838e-08, "loss": 0.4662, "step": 37531 }, { "epoch": 0.96, "grad_norm": 1.1544862985610962, "learning_rate": 7.568492704960517e-08, "loss": 0.597, "step": 37532 }, { "epoch": 0.96, "grad_norm": 1.7760347127914429, "learning_rate": 7.558302016408614e-08, "loss": 0.3735, "step": 37533 }, { "epoch": 0.96, "grad_norm": 1.4157989025115967, "learning_rate": 7.548118167141294e-08, "loss": 0.4572, "step": 37534 }, { "epoch": 0.96, "grad_norm": 1.796065092086792, "learning_rate": 7.537941157228835e-08, "loss": 0.4936, "step": 37535 }, { "epoch": 0.96, "grad_norm": 1.711350679397583, "learning_rate": 7.527770986741511e-08, "loss": 0.493, "step": 37536 }, { "epoch": 0.96, "grad_norm": 1.9556444883346558, "learning_rate": 7.517607655749159e-08, "loss": 0.5602, "step": 37537 }, { "epoch": 0.96, "grad_norm": 1.6539289951324463, "learning_rate": 7.507451164321833e-08, "loss": 0.5462, "step": 37538 }, { "epoch": 0.96, "grad_norm": 1.855088233947754, "learning_rate": 7.497301512529697e-08, "loss": 0.7331, "step": 37539 }, { "epoch": 0.96, "grad_norm": 1.299458384513855, "learning_rate": 7.487158700442587e-08, "loss": 0.5057, "step": 37540 }, { "epoch": 0.96, "grad_norm": 1.7594248056411743, "learning_rate": 7.477022728130445e-08, "loss": 0.5379, "step": 37541 }, { "epoch": 0.96, "grad_norm": 2.8811471462249756, "learning_rate": 7.466893595663105e-08, "loss": 0.6341, "step": 37542 }, { "epoch": 0.96, "grad_norm": 6.054971694946289, "learning_rate": 7.456771303110288e-08, "loss": 0.4945, "step": 37543 }, { "epoch": 0.96, "grad_norm": 4.785072326660156, "learning_rate": 7.44665585054194e-08, "loss": 0.5234, "step": 37544 }, { "epoch": 0.96, "grad_norm": 1.2435357570648193, "learning_rate": 7.436547238027558e-08, "loss": 0.4657, "step": 37545 }, { "epoch": 0.96, "grad_norm": 4.104802131652832, "learning_rate": 7.426445465636978e-08, "loss": 0.6135, "step": 37546 }, { "epoch": 0.96, "grad_norm": 1.2039841413497925, "learning_rate": 7.416350533439698e-08, "loss": 0.5047, "step": 37547 }, { "epoch": 0.96, "grad_norm": 2.149663209915161, "learning_rate": 7.40626244150533e-08, "loss": 0.4615, "step": 37548 }, { "epoch": 0.96, "grad_norm": 2.5689620971679688, "learning_rate": 7.396181189903373e-08, "loss": 0.6985, "step": 37549 }, { "epoch": 0.96, "grad_norm": 1.3900057077407837, "learning_rate": 7.386106778703328e-08, "loss": 0.4773, "step": 37550 }, { "epoch": 0.96, "grad_norm": 2.3296704292297363, "learning_rate": 7.376039207974695e-08, "loss": 0.52, "step": 37551 }, { "epoch": 0.96, "grad_norm": 1.3400061130523682, "learning_rate": 7.365978477786639e-08, "loss": 0.5571, "step": 37552 }, { "epoch": 0.96, "grad_norm": 1.6877771615982056, "learning_rate": 7.355924588208774e-08, "loss": 0.4969, "step": 37553 }, { "epoch": 0.96, "grad_norm": 1.7725898027420044, "learning_rate": 7.345877539310153e-08, "loss": 0.5001, "step": 37554 }, { "epoch": 0.96, "grad_norm": 1.74643874168396, "learning_rate": 7.335837331160056e-08, "loss": 0.6201, "step": 37555 }, { "epoch": 0.96, "grad_norm": 1.4695868492126465, "learning_rate": 7.32580396382776e-08, "loss": 0.54, "step": 37556 }, { "epoch": 0.96, "grad_norm": 2.23815655708313, "learning_rate": 7.315777437382432e-08, "loss": 0.571, "step": 37557 }, { "epoch": 0.96, "grad_norm": 0.918148934841156, "learning_rate": 7.305757751893016e-08, "loss": 0.4181, "step": 37558 }, { "epoch": 0.96, "grad_norm": 1.8106285333633423, "learning_rate": 7.295744907428681e-08, "loss": 0.6347, "step": 37559 }, { "epoch": 0.96, "grad_norm": 5.975430488586426, "learning_rate": 7.28573890405837e-08, "loss": 0.7167, "step": 37560 }, { "epoch": 0.96, "grad_norm": 21.2989501953125, "learning_rate": 7.275739741851029e-08, "loss": 0.7015, "step": 37561 }, { "epoch": 0.96, "grad_norm": 1.068776249885559, "learning_rate": 7.265747420875602e-08, "loss": 0.4092, "step": 37562 }, { "epoch": 0.96, "grad_norm": 6.318748950958252, "learning_rate": 7.255761941201034e-08, "loss": 0.6383, "step": 37563 }, { "epoch": 0.96, "grad_norm": 1.470117211341858, "learning_rate": 7.245783302896048e-08, "loss": 0.5523, "step": 37564 }, { "epoch": 0.96, "grad_norm": 1.4351096153259277, "learning_rate": 7.235811506029367e-08, "loss": 0.4577, "step": 37565 }, { "epoch": 0.96, "grad_norm": 2.73984694480896, "learning_rate": 7.225846550669712e-08, "loss": 0.5592, "step": 37566 }, { "epoch": 0.96, "grad_norm": 1.3325998783111572, "learning_rate": 7.21588843688592e-08, "loss": 0.5295, "step": 37567 }, { "epoch": 0.96, "grad_norm": 1.9926767349243164, "learning_rate": 7.20593716474638e-08, "loss": 0.6017, "step": 37568 }, { "epoch": 0.96, "grad_norm": 2.054205894470215, "learning_rate": 7.195992734319813e-08, "loss": 0.5852, "step": 37569 }, { "epoch": 0.96, "grad_norm": 1.860809564590454, "learning_rate": 7.18605514567472e-08, "loss": 0.6137, "step": 37570 }, { "epoch": 0.96, "grad_norm": 1.2341396808624268, "learning_rate": 7.176124398879713e-08, "loss": 0.3536, "step": 37571 }, { "epoch": 0.96, "grad_norm": 1.5249966382980347, "learning_rate": 7.166200494002962e-08, "loss": 0.4383, "step": 37572 }, { "epoch": 0.96, "grad_norm": 2.600565195083618, "learning_rate": 7.156283431112965e-08, "loss": 0.7107, "step": 37573 }, { "epoch": 0.96, "grad_norm": 1.3167067766189575, "learning_rate": 7.146373210278224e-08, "loss": 0.4841, "step": 37574 }, { "epoch": 0.96, "grad_norm": 2.1272878646850586, "learning_rate": 7.136469831566906e-08, "loss": 0.435, "step": 37575 }, { "epoch": 0.96, "grad_norm": 1.0305505990982056, "learning_rate": 7.12657329504718e-08, "loss": 0.5535, "step": 37576 }, { "epoch": 0.96, "grad_norm": 1.7164080142974854, "learning_rate": 7.116683600787322e-08, "loss": 0.5112, "step": 37577 }, { "epoch": 0.96, "grad_norm": 2.048858880996704, "learning_rate": 7.106800748855502e-08, "loss": 0.6169, "step": 37578 }, { "epoch": 0.96, "grad_norm": 2.2063136100769043, "learning_rate": 7.096924739319888e-08, "loss": 0.479, "step": 37579 }, { "epoch": 0.96, "grad_norm": 1.1994284391403198, "learning_rate": 7.087055572248424e-08, "loss": 0.4777, "step": 37580 }, { "epoch": 0.96, "grad_norm": 4.1961212158203125, "learning_rate": 7.077193247709169e-08, "loss": 0.5201, "step": 37581 }, { "epoch": 0.96, "grad_norm": 1.4517593383789062, "learning_rate": 7.067337765769956e-08, "loss": 0.7094, "step": 37582 }, { "epoch": 0.96, "grad_norm": 2.8331708908081055, "learning_rate": 7.05748912649895e-08, "loss": 0.4375, "step": 37583 }, { "epoch": 0.96, "grad_norm": 4.412650108337402, "learning_rate": 7.04764732996388e-08, "loss": 0.5427, "step": 37584 }, { "epoch": 0.96, "grad_norm": 1.1236525774002075, "learning_rate": 7.037812376232689e-08, "loss": 0.4741, "step": 37585 }, { "epoch": 0.96, "grad_norm": 0.9932541847229004, "learning_rate": 7.027984265372878e-08, "loss": 0.5348, "step": 37586 }, { "epoch": 0.96, "grad_norm": 11.93549633026123, "learning_rate": 7.018162997452504e-08, "loss": 0.6008, "step": 37587 }, { "epoch": 0.96, "grad_norm": 1.5610381364822388, "learning_rate": 7.00834857253907e-08, "loss": 0.5563, "step": 37588 }, { "epoch": 0.96, "grad_norm": 2.289487838745117, "learning_rate": 6.998540990700186e-08, "loss": 0.4919, "step": 37589 }, { "epoch": 0.96, "grad_norm": 1.523368239402771, "learning_rate": 6.988740252003467e-08, "loss": 0.6062, "step": 37590 }, { "epoch": 0.96, "grad_norm": 1.3108174800872803, "learning_rate": 6.978946356516525e-08, "loss": 0.4889, "step": 37591 }, { "epoch": 0.96, "grad_norm": 1.3028643131256104, "learning_rate": 6.96915930430686e-08, "loss": 0.4418, "step": 37592 }, { "epoch": 0.96, "grad_norm": 1.5967998504638672, "learning_rate": 6.959379095441753e-08, "loss": 0.503, "step": 37593 }, { "epoch": 0.96, "grad_norm": 8.454038619995117, "learning_rate": 6.949605729988817e-08, "loss": 0.6003, "step": 37594 }, { "epoch": 0.96, "grad_norm": 2.6531527042388916, "learning_rate": 6.939839208015331e-08, "loss": 0.4968, "step": 37595 }, { "epoch": 0.96, "grad_norm": 1.2044697999954224, "learning_rate": 6.930079529588463e-08, "loss": 0.3588, "step": 37596 }, { "epoch": 0.96, "grad_norm": 1.1970324516296387, "learning_rate": 6.920326694775603e-08, "loss": 0.4717, "step": 37597 }, { "epoch": 0.96, "grad_norm": 6.345302581787109, "learning_rate": 6.910580703643921e-08, "loss": 0.5902, "step": 37598 }, { "epoch": 0.96, "grad_norm": 1.7947685718536377, "learning_rate": 6.900841556260696e-08, "loss": 0.4736, "step": 37599 }, { "epoch": 0.96, "grad_norm": 1.4735383987426758, "learning_rate": 6.891109252692873e-08, "loss": 0.5436, "step": 37600 }, { "epoch": 0.96, "grad_norm": 1.8082056045532227, "learning_rate": 6.881383793007623e-08, "loss": 0.6329, "step": 37601 }, { "epoch": 0.96, "grad_norm": 1.7671128511428833, "learning_rate": 6.87166517727189e-08, "loss": 0.4964, "step": 37602 }, { "epoch": 0.96, "grad_norm": 1.4392033815383911, "learning_rate": 6.861953405552624e-08, "loss": 0.5445, "step": 37603 }, { "epoch": 0.96, "grad_norm": 1.230284333229065, "learning_rate": 6.852248477916989e-08, "loss": 0.4419, "step": 37604 }, { "epoch": 0.96, "grad_norm": 1.5959937572479248, "learning_rate": 6.842550394431602e-08, "loss": 0.5367, "step": 37605 }, { "epoch": 0.96, "grad_norm": 1.7183482646942139, "learning_rate": 6.832859155163407e-08, "loss": 0.5852, "step": 37606 }, { "epoch": 0.96, "grad_norm": 1.228574275970459, "learning_rate": 6.82317476017924e-08, "loss": 0.3719, "step": 37607 }, { "epoch": 0.96, "grad_norm": 1.3000620603561401, "learning_rate": 6.813497209545716e-08, "loss": 0.4893, "step": 37608 }, { "epoch": 0.96, "grad_norm": 1.1066808700561523, "learning_rate": 6.803826503329669e-08, "loss": 0.491, "step": 37609 }, { "epoch": 0.96, "grad_norm": 1.5483946800231934, "learning_rate": 6.794162641597601e-08, "loss": 0.6026, "step": 37610 }, { "epoch": 0.96, "grad_norm": 1.7642838954925537, "learning_rate": 6.784505624416127e-08, "loss": 0.4068, "step": 37611 }, { "epoch": 0.96, "grad_norm": 7.988805294036865, "learning_rate": 6.774855451851969e-08, "loss": 0.6124, "step": 37612 }, { "epoch": 0.96, "grad_norm": 1.1655807495117188, "learning_rate": 6.765212123971299e-08, "loss": 0.4531, "step": 37613 }, { "epoch": 0.96, "grad_norm": 1.5823454856872559, "learning_rate": 6.75557564084095e-08, "loss": 0.4773, "step": 37614 }, { "epoch": 0.96, "grad_norm": 13.90841293334961, "learning_rate": 6.745946002527093e-08, "loss": 0.4709, "step": 37615 }, { "epoch": 0.96, "grad_norm": 2.0231945514678955, "learning_rate": 6.73632320909623e-08, "loss": 0.4394, "step": 37616 }, { "epoch": 0.96, "grad_norm": 2.8832340240478516, "learning_rate": 6.726707260614417e-08, "loss": 0.4451, "step": 37617 }, { "epoch": 0.96, "grad_norm": 1.3853631019592285, "learning_rate": 6.717098157148272e-08, "loss": 0.5222, "step": 37618 }, { "epoch": 0.96, "grad_norm": 1.2960481643676758, "learning_rate": 6.70749589876385e-08, "loss": 0.4435, "step": 37619 }, { "epoch": 0.96, "grad_norm": 1.9704012870788574, "learning_rate": 6.697900485527208e-08, "loss": 0.4084, "step": 37620 }, { "epoch": 0.96, "grad_norm": 2.741961717605591, "learning_rate": 6.68831191750463e-08, "loss": 0.5357, "step": 37621 }, { "epoch": 0.96, "grad_norm": 1.2882544994354248, "learning_rate": 6.678730194762173e-08, "loss": 0.5171, "step": 37622 }, { "epoch": 0.96, "grad_norm": 1.5726310014724731, "learning_rate": 6.669155317365783e-08, "loss": 0.5479, "step": 37623 }, { "epoch": 0.96, "grad_norm": 1.3699610233306885, "learning_rate": 6.659587285381519e-08, "loss": 0.4694, "step": 37624 }, { "epoch": 0.96, "grad_norm": 1.5084381103515625, "learning_rate": 6.650026098875328e-08, "loss": 0.5011, "step": 37625 }, { "epoch": 0.96, "grad_norm": 1.378637433052063, "learning_rate": 6.640471757913048e-08, "loss": 0.6329, "step": 37626 }, { "epoch": 0.96, "grad_norm": 1.3155503273010254, "learning_rate": 6.630924262560623e-08, "loss": 0.4811, "step": 37627 }, { "epoch": 0.96, "grad_norm": 1.9024817943572998, "learning_rate": 6.621383612883669e-08, "loss": 0.43, "step": 37628 }, { "epoch": 0.96, "grad_norm": 1.5834249258041382, "learning_rate": 6.611849808948134e-08, "loss": 0.6683, "step": 37629 }, { "epoch": 0.96, "grad_norm": 1.1593278646469116, "learning_rate": 6.60232285081952e-08, "loss": 0.5171, "step": 37630 }, { "epoch": 0.96, "grad_norm": 0.9982285499572754, "learning_rate": 6.592802738563664e-08, "loss": 0.401, "step": 37631 }, { "epoch": 0.96, "grad_norm": 1.3628166913986206, "learning_rate": 6.583289472246069e-08, "loss": 0.356, "step": 37632 }, { "epoch": 0.96, "grad_norm": 0.8333258628845215, "learning_rate": 6.573783051932348e-08, "loss": 0.5041, "step": 37633 }, { "epoch": 0.96, "grad_norm": 2.97865629196167, "learning_rate": 6.564283477687893e-08, "loss": 0.5199, "step": 37634 }, { "epoch": 0.96, "grad_norm": 1.6383026838302612, "learning_rate": 6.554790749578321e-08, "loss": 0.5606, "step": 37635 }, { "epoch": 0.96, "grad_norm": 1.3098832368850708, "learning_rate": 6.545304867669023e-08, "loss": 0.5108, "step": 37636 }, { "epoch": 0.96, "grad_norm": 2.0390961170196533, "learning_rate": 6.535825832025166e-08, "loss": 0.6273, "step": 37637 }, { "epoch": 0.96, "grad_norm": 1.6360108852386475, "learning_rate": 6.526353642712369e-08, "loss": 0.493, "step": 37638 }, { "epoch": 0.96, "grad_norm": 1.9861570596694946, "learning_rate": 6.516888299795798e-08, "loss": 0.791, "step": 37639 }, { "epoch": 0.96, "grad_norm": 1.1306172609329224, "learning_rate": 6.507429803340736e-08, "loss": 0.5858, "step": 37640 }, { "epoch": 0.96, "grad_norm": 1.4595603942871094, "learning_rate": 6.497978153412132e-08, "loss": 0.5011, "step": 37641 }, { "epoch": 0.96, "grad_norm": 1.8789910078048706, "learning_rate": 6.488533350075376e-08, "loss": 0.3862, "step": 37642 }, { "epoch": 0.96, "grad_norm": 1.4657272100448608, "learning_rate": 6.479095393395419e-08, "loss": 0.6567, "step": 37643 }, { "epoch": 0.96, "grad_norm": 1.330666422843933, "learning_rate": 6.469664283437426e-08, "loss": 0.6183, "step": 37644 }, { "epoch": 0.96, "grad_norm": 1.247812032699585, "learning_rate": 6.46024002026624e-08, "loss": 0.4346, "step": 37645 }, { "epoch": 0.96, "grad_norm": 1.6597994565963745, "learning_rate": 6.450822603946915e-08, "loss": 0.4382, "step": 37646 }, { "epoch": 0.96, "grad_norm": 2.275326728820801, "learning_rate": 6.441412034544182e-08, "loss": 0.7167, "step": 37647 }, { "epoch": 0.96, "grad_norm": 2.0572853088378906, "learning_rate": 6.432008312123206e-08, "loss": 0.4392, "step": 37648 }, { "epoch": 0.96, "grad_norm": 20.024723052978516, "learning_rate": 6.422611436748494e-08, "loss": 0.5981, "step": 37649 }, { "epoch": 0.96, "grad_norm": 5.896745204925537, "learning_rate": 6.413221408484993e-08, "loss": 0.6369, "step": 37650 }, { "epoch": 0.97, "grad_norm": 5.762973785400391, "learning_rate": 6.403838227397208e-08, "loss": 0.5203, "step": 37651 }, { "epoch": 0.97, "grad_norm": 1.1260641813278198, "learning_rate": 6.394461893550085e-08, "loss": 0.5609, "step": 37652 }, { "epoch": 0.97, "grad_norm": 2.322766065597534, "learning_rate": 6.385092407008021e-08, "loss": 0.6394, "step": 37653 }, { "epoch": 0.97, "grad_norm": 1.5591822862625122, "learning_rate": 6.375729767835626e-08, "loss": 0.5567, "step": 37654 }, { "epoch": 0.97, "grad_norm": 3.938459873199463, "learning_rate": 6.366373976097518e-08, "loss": 0.4401, "step": 37655 }, { "epoch": 0.97, "grad_norm": 1.3849157094955444, "learning_rate": 6.357025031858088e-08, "loss": 0.5104, "step": 37656 }, { "epoch": 0.97, "grad_norm": 1.40829598903656, "learning_rate": 6.347682935181731e-08, "loss": 0.53, "step": 37657 }, { "epoch": 0.97, "grad_norm": 1.2753492593765259, "learning_rate": 6.33834768613284e-08, "loss": 0.4133, "step": 37658 }, { "epoch": 0.97, "grad_norm": 1.5050389766693115, "learning_rate": 6.329019284775806e-08, "loss": 0.5938, "step": 37659 }, { "epoch": 0.97, "grad_norm": 1.2333120107650757, "learning_rate": 6.319697731174912e-08, "loss": 0.5592, "step": 37660 }, { "epoch": 0.97, "grad_norm": 2.656695604324341, "learning_rate": 6.310383025394328e-08, "loss": 0.6767, "step": 37661 }, { "epoch": 0.97, "grad_norm": 6.047588348388672, "learning_rate": 6.301075167498338e-08, "loss": 0.6083, "step": 37662 }, { "epoch": 0.97, "grad_norm": 2.6357991695404053, "learning_rate": 6.291774157551e-08, "loss": 0.6518, "step": 37663 }, { "epoch": 0.97, "grad_norm": 1.3328313827514648, "learning_rate": 6.282479995616375e-08, "loss": 0.6045, "step": 37664 }, { "epoch": 0.97, "grad_norm": 0.9183782339096069, "learning_rate": 6.273192681758744e-08, "loss": 0.384, "step": 37665 }, { "epoch": 0.97, "grad_norm": 3.140623092651367, "learning_rate": 6.263912216041834e-08, "loss": 0.682, "step": 37666 }, { "epoch": 0.97, "grad_norm": 1.8999390602111816, "learning_rate": 6.254638598529816e-08, "loss": 0.6244, "step": 37667 }, { "epoch": 0.97, "grad_norm": 1.3352593183517456, "learning_rate": 6.245371829286418e-08, "loss": 0.4931, "step": 37668 }, { "epoch": 0.97, "grad_norm": 1.7525814771652222, "learning_rate": 6.236111908375586e-08, "loss": 0.6332, "step": 37669 }, { "epoch": 0.97, "grad_norm": 1.6846504211425781, "learning_rate": 6.226858835861161e-08, "loss": 0.402, "step": 37670 }, { "epoch": 0.97, "grad_norm": 1.4505722522735596, "learning_rate": 6.217612611806756e-08, "loss": 0.496, "step": 37671 }, { "epoch": 0.97, "grad_norm": 1.2990076541900635, "learning_rate": 6.208373236276322e-08, "loss": 0.6034, "step": 37672 }, { "epoch": 0.97, "grad_norm": 8.470683097839355, "learning_rate": 6.199140709333362e-08, "loss": 0.6111, "step": 37673 }, { "epoch": 0.97, "grad_norm": 2.7616970539093018, "learning_rate": 6.189915031041605e-08, "loss": 0.5557, "step": 37674 }, { "epoch": 0.97, "grad_norm": 1.8286079168319702, "learning_rate": 6.180696201464553e-08, "loss": 0.5098, "step": 37675 }, { "epoch": 0.97, "grad_norm": 3.677816152572632, "learning_rate": 6.171484220665824e-08, "loss": 0.8047, "step": 37676 }, { "epoch": 0.97, "grad_norm": 1.9531381130218506, "learning_rate": 6.162279088708812e-08, "loss": 0.6078, "step": 37677 }, { "epoch": 0.97, "grad_norm": 1.886148452758789, "learning_rate": 6.153080805656908e-08, "loss": 0.5946, "step": 37678 }, { "epoch": 0.97, "grad_norm": 2.583482027053833, "learning_rate": 6.143889371573619e-08, "loss": 0.6261, "step": 37679 }, { "epoch": 0.97, "grad_norm": 1.5354784727096558, "learning_rate": 6.134704786522339e-08, "loss": 0.3549, "step": 37680 }, { "epoch": 0.97, "grad_norm": 1.7784156799316406, "learning_rate": 6.125527050566238e-08, "loss": 0.6603, "step": 37681 }, { "epoch": 0.97, "grad_norm": 2.8026561737060547, "learning_rate": 6.116356163768489e-08, "loss": 0.5277, "step": 37682 }, { "epoch": 0.97, "grad_norm": 1.6504753828048706, "learning_rate": 6.107192126192485e-08, "loss": 0.462, "step": 37683 }, { "epoch": 0.97, "grad_norm": 3.174636125564575, "learning_rate": 6.098034937901176e-08, "loss": 0.5968, "step": 37684 }, { "epoch": 0.97, "grad_norm": 1.2966023683547974, "learning_rate": 6.088884598957844e-08, "loss": 0.5573, "step": 37685 }, { "epoch": 0.97, "grad_norm": 1.0721478462219238, "learning_rate": 6.079741109425552e-08, "loss": 0.5496, "step": 37686 }, { "epoch": 0.97, "grad_norm": 1.280463695526123, "learning_rate": 6.070604469367247e-08, "loss": 0.5406, "step": 37687 }, { "epoch": 0.97, "grad_norm": 1.9094350337982178, "learning_rate": 6.061474678845769e-08, "loss": 0.6054, "step": 37688 }, { "epoch": 0.97, "grad_norm": 1.418195366859436, "learning_rate": 6.052351737924178e-08, "loss": 0.464, "step": 37689 }, { "epoch": 0.97, "grad_norm": 3.2292721271514893, "learning_rate": 6.043235646665424e-08, "loss": 0.5338, "step": 37690 }, { "epoch": 0.97, "grad_norm": 3.296424627304077, "learning_rate": 6.034126405132013e-08, "loss": 0.6425, "step": 37691 }, { "epoch": 0.97, "grad_norm": 1.1590814590454102, "learning_rate": 6.025024013387115e-08, "loss": 0.6138, "step": 37692 }, { "epoch": 0.97, "grad_norm": 1.0758895874023438, "learning_rate": 6.015928471493238e-08, "loss": 0.559, "step": 37693 }, { "epoch": 0.97, "grad_norm": 1.278641700744629, "learning_rate": 6.006839779513107e-08, "loss": 0.3995, "step": 37694 }, { "epoch": 0.97, "grad_norm": 0.8483319878578186, "learning_rate": 5.99775793750934e-08, "loss": 0.3417, "step": 37695 }, { "epoch": 0.97, "grad_norm": 1.115919589996338, "learning_rate": 5.988682945544555e-08, "loss": 0.4656, "step": 37696 }, { "epoch": 0.97, "grad_norm": 1.2574310302734375, "learning_rate": 5.979614803681255e-08, "loss": 0.4704, "step": 37697 }, { "epoch": 0.97, "grad_norm": 1.262960433959961, "learning_rate": 5.970553511981836e-08, "loss": 0.6376, "step": 37698 }, { "epoch": 0.97, "grad_norm": 2.290189027786255, "learning_rate": 5.961499070509025e-08, "loss": 0.5442, "step": 37699 }, { "epoch": 0.97, "grad_norm": 1.381367564201355, "learning_rate": 5.9524514793249944e-08, "loss": 0.4165, "step": 37700 }, { "epoch": 0.97, "grad_norm": 8.70352554321289, "learning_rate": 5.943410738492028e-08, "loss": 0.5019, "step": 37701 }, { "epoch": 0.97, "grad_norm": 1.2722643613815308, "learning_rate": 5.9343768480726314e-08, "loss": 0.4673, "step": 37702 }, { "epoch": 0.97, "grad_norm": 1.648860216140747, "learning_rate": 5.925349808128977e-08, "loss": 0.6472, "step": 37703 }, { "epoch": 0.97, "grad_norm": 1.5981568098068237, "learning_rate": 5.916329618723349e-08, "loss": 0.5999, "step": 37704 }, { "epoch": 0.97, "grad_norm": 3.1029162406921387, "learning_rate": 5.907316279917696e-08, "loss": 0.4911, "step": 37705 }, { "epoch": 0.97, "grad_norm": 1.4405285120010376, "learning_rate": 5.8983097917744145e-08, "loss": 0.4089, "step": 37706 }, { "epoch": 0.97, "grad_norm": 1.2978359460830688, "learning_rate": 5.8893101543553436e-08, "loss": 0.56, "step": 37707 }, { "epoch": 0.97, "grad_norm": 2.440321922302246, "learning_rate": 5.880317367722543e-08, "loss": 0.6198, "step": 37708 }, { "epoch": 0.97, "grad_norm": 1.1934293508529663, "learning_rate": 5.8713314319380766e-08, "loss": 0.4555, "step": 37709 }, { "epoch": 0.97, "grad_norm": 1.6997060775756836, "learning_rate": 5.862352347063782e-08, "loss": 0.401, "step": 37710 }, { "epoch": 0.97, "grad_norm": 1.5248503684997559, "learning_rate": 5.853380113161611e-08, "loss": 0.4915, "step": 37711 }, { "epoch": 0.97, "grad_norm": 1.1321046352386475, "learning_rate": 5.8444147302934016e-08, "loss": 0.5062, "step": 37712 }, { "epoch": 0.97, "grad_norm": 1.5039366483688354, "learning_rate": 5.8354561985208834e-08, "loss": 0.5466, "step": 37713 }, { "epoch": 0.97, "grad_norm": 1.2517659664154053, "learning_rate": 5.8265045179057845e-08, "loss": 0.3922, "step": 37714 }, { "epoch": 0.97, "grad_norm": 1.3526934385299683, "learning_rate": 5.817559688509722e-08, "loss": 0.4882, "step": 37715 }, { "epoch": 0.97, "grad_norm": 1.8748844861984253, "learning_rate": 5.808621710394535e-08, "loss": 0.603, "step": 37716 }, { "epoch": 0.97, "grad_norm": 2.201244354248047, "learning_rate": 5.799690583621731e-08, "loss": 0.7441, "step": 37717 }, { "epoch": 0.97, "grad_norm": 8.134135246276855, "learning_rate": 5.7907663082528154e-08, "loss": 0.6173, "step": 37718 }, { "epoch": 0.97, "grad_norm": 1.4295380115509033, "learning_rate": 5.781848884349295e-08, "loss": 0.5282, "step": 37719 }, { "epoch": 0.97, "grad_norm": 1.2187504768371582, "learning_rate": 5.772938311972676e-08, "loss": 0.5092, "step": 37720 }, { "epoch": 0.97, "grad_norm": 1.3365824222564697, "learning_rate": 5.7640345911843534e-08, "loss": 0.4844, "step": 37721 }, { "epoch": 0.97, "grad_norm": 1.235109567642212, "learning_rate": 5.755137722045501e-08, "loss": 0.5713, "step": 37722 }, { "epoch": 0.97, "grad_norm": 3.2666287422180176, "learning_rate": 5.7462477046177356e-08, "loss": 0.3924, "step": 37723 }, { "epoch": 0.97, "grad_norm": 1.0719062089920044, "learning_rate": 5.7373645389622315e-08, "loss": 0.4416, "step": 37724 }, { "epoch": 0.97, "grad_norm": 2.2803730964660645, "learning_rate": 5.72848822514005e-08, "loss": 0.6373, "step": 37725 }, { "epoch": 0.97, "grad_norm": 1.5538322925567627, "learning_rate": 5.7196187632124755e-08, "loss": 0.5563, "step": 37726 }, { "epoch": 0.97, "grad_norm": 7.573861122131348, "learning_rate": 5.710756153240682e-08, "loss": 0.541, "step": 37727 }, { "epoch": 0.97, "grad_norm": 1.7386242151260376, "learning_rate": 5.70190039528562e-08, "loss": 0.486, "step": 37728 }, { "epoch": 0.97, "grad_norm": 2.0547800064086914, "learning_rate": 5.693051489408352e-08, "loss": 0.5003, "step": 37729 }, { "epoch": 0.97, "grad_norm": 1.3936147689819336, "learning_rate": 5.6842094356700514e-08, "loss": 0.6635, "step": 37730 }, { "epoch": 0.97, "grad_norm": 1.4026445150375366, "learning_rate": 5.6753742341313366e-08, "loss": 0.5372, "step": 37731 }, { "epoch": 0.97, "grad_norm": 0.9372673630714417, "learning_rate": 5.66654588485327e-08, "loss": 0.4824, "step": 37732 }, { "epoch": 0.97, "grad_norm": 1.7839733362197876, "learning_rate": 5.65772438789669e-08, "loss": 0.4635, "step": 37733 }, { "epoch": 0.97, "grad_norm": 1.1869299411773682, "learning_rate": 5.6489097433223284e-08, "loss": 0.4271, "step": 37734 }, { "epoch": 0.97, "grad_norm": 2.0438833236694336, "learning_rate": 5.640101951191024e-08, "loss": 0.564, "step": 37735 }, { "epoch": 0.97, "grad_norm": 2.5273685455322266, "learning_rate": 5.6313010115633946e-08, "loss": 0.4804, "step": 37736 }, { "epoch": 0.97, "grad_norm": 1.0567110776901245, "learning_rate": 5.622506924500171e-08, "loss": 0.4266, "step": 37737 }, { "epoch": 0.97, "grad_norm": 1.2861576080322266, "learning_rate": 5.6137196900618585e-08, "loss": 0.6618, "step": 37738 }, { "epoch": 0.97, "grad_norm": 1.2947120666503906, "learning_rate": 5.6049393083089655e-08, "loss": 0.4398, "step": 37739 }, { "epoch": 0.97, "grad_norm": 1.7329738140106201, "learning_rate": 5.5961657793022204e-08, "loss": 0.5045, "step": 37740 }, { "epoch": 0.97, "grad_norm": 0.9666070342063904, "learning_rate": 5.587399103101909e-08, "loss": 0.3724, "step": 37741 }, { "epoch": 0.97, "grad_norm": 1.3160828351974487, "learning_rate": 5.578639279768427e-08, "loss": 0.5467, "step": 37742 }, { "epoch": 0.97, "grad_norm": 1.4663372039794922, "learning_rate": 5.5698863093622824e-08, "loss": 0.6695, "step": 37743 }, { "epoch": 0.97, "grad_norm": 1.2827142477035522, "learning_rate": 5.561140191943759e-08, "loss": 0.4535, "step": 37744 }, { "epoch": 0.97, "grad_norm": 1.340583324432373, "learning_rate": 5.5524009275730315e-08, "loss": 0.4315, "step": 37745 }, { "epoch": 0.97, "grad_norm": 1.3182826042175293, "learning_rate": 5.5436685163103855e-08, "loss": 0.5059, "step": 37746 }, { "epoch": 0.97, "grad_norm": 1.4728286266326904, "learning_rate": 5.5349429582159944e-08, "loss": 0.455, "step": 37747 }, { "epoch": 0.97, "grad_norm": 1.3283766508102417, "learning_rate": 5.526224253350032e-08, "loss": 0.5161, "step": 37748 }, { "epoch": 0.97, "grad_norm": 1.6066415309906006, "learning_rate": 5.517512401772451e-08, "loss": 0.4576, "step": 37749 }, { "epoch": 0.97, "grad_norm": 2.2529022693634033, "learning_rate": 5.508807403543426e-08, "loss": 0.675, "step": 37750 }, { "epoch": 0.97, "grad_norm": 1.1246228218078613, "learning_rate": 5.5001092587229076e-08, "loss": 0.4896, "step": 37751 }, { "epoch": 0.97, "grad_norm": 1.957626223564148, "learning_rate": 5.491417967370849e-08, "loss": 0.454, "step": 37752 }, { "epoch": 0.97, "grad_norm": 1.3369337320327759, "learning_rate": 5.482733529547091e-08, "loss": 0.5724, "step": 37753 }, { "epoch": 0.97, "grad_norm": 2.2198972702026367, "learning_rate": 5.474055945311585e-08, "loss": 0.3911, "step": 37754 }, { "epoch": 0.97, "grad_norm": 1.5523591041564941, "learning_rate": 5.4653852147239506e-08, "loss": 0.5412, "step": 37755 }, { "epoch": 0.97, "grad_norm": 2.1947803497314453, "learning_rate": 5.45672133784414e-08, "loss": 0.3632, "step": 37756 }, { "epoch": 0.97, "grad_norm": 1.5790354013442993, "learning_rate": 5.4480643147317716e-08, "loss": 0.5799, "step": 37757 }, { "epoch": 0.97, "grad_norm": 1.6737083196640015, "learning_rate": 5.439414145446464e-08, "loss": 0.5369, "step": 37758 }, { "epoch": 0.97, "grad_norm": 0.9766805768013, "learning_rate": 5.430770830047838e-08, "loss": 0.4317, "step": 37759 }, { "epoch": 0.97, "grad_norm": 1.569825530052185, "learning_rate": 5.422134368595511e-08, "loss": 0.6073, "step": 37760 }, { "epoch": 0.97, "grad_norm": 1.434504508972168, "learning_rate": 5.413504761149102e-08, "loss": 0.6851, "step": 37761 }, { "epoch": 0.97, "grad_norm": 2.382049083709717, "learning_rate": 5.404882007767786e-08, "loss": 0.757, "step": 37762 }, { "epoch": 0.97, "grad_norm": 1.6725237369537354, "learning_rate": 5.396266108511295e-08, "loss": 0.642, "step": 37763 }, { "epoch": 0.97, "grad_norm": 1.2703213691711426, "learning_rate": 5.3876570634388e-08, "loss": 0.4822, "step": 37764 }, { "epoch": 0.97, "grad_norm": 1.158236026763916, "learning_rate": 5.3790548726097014e-08, "loss": 0.4441, "step": 37765 }, { "epoch": 0.97, "grad_norm": 1.080528736114502, "learning_rate": 5.3704595360832836e-08, "loss": 0.5462, "step": 37766 }, { "epoch": 0.97, "grad_norm": 1.0095553398132324, "learning_rate": 5.361871053918721e-08, "loss": 0.4216, "step": 37767 }, { "epoch": 0.97, "grad_norm": 7.447912216186523, "learning_rate": 5.353289426175301e-08, "loss": 0.6387, "step": 37768 }, { "epoch": 0.97, "grad_norm": 1.7381465435028076, "learning_rate": 5.3447146529120864e-08, "loss": 0.4602, "step": 37769 }, { "epoch": 0.97, "grad_norm": 1.1848965883255005, "learning_rate": 5.3361467341882526e-08, "loss": 0.3737, "step": 37770 }, { "epoch": 0.97, "grad_norm": 1.9552640914916992, "learning_rate": 5.327585670062752e-08, "loss": 0.6387, "step": 37771 }, { "epoch": 0.97, "grad_norm": 1.6425219774246216, "learning_rate": 5.319031460594537e-08, "loss": 0.555, "step": 37772 }, { "epoch": 0.97, "grad_norm": 4.301064968109131, "learning_rate": 5.3104841058427833e-08, "loss": 0.6574, "step": 37773 }, { "epoch": 0.97, "grad_norm": 1.53719162940979, "learning_rate": 5.3019436058661114e-08, "loss": 0.4959, "step": 37774 }, { "epoch": 0.97, "grad_norm": 1.281518578529358, "learning_rate": 5.293409960723583e-08, "loss": 0.4528, "step": 37775 }, { "epoch": 0.97, "grad_norm": 6.505570411682129, "learning_rate": 5.2848831704739315e-08, "loss": 0.6813, "step": 37776 }, { "epoch": 0.97, "grad_norm": 1.1478036642074585, "learning_rate": 5.276363235175885e-08, "loss": 0.4197, "step": 37777 }, { "epoch": 0.97, "grad_norm": 1.3134018182754517, "learning_rate": 5.2678501548882877e-08, "loss": 0.4826, "step": 37778 }, { "epoch": 0.97, "grad_norm": 1.2533687353134155, "learning_rate": 5.259343929669647e-08, "loss": 0.4328, "step": 37779 }, { "epoch": 0.97, "grad_norm": 19.155208587646484, "learning_rate": 5.250844559578694e-08, "loss": 0.4607, "step": 37780 }, { "epoch": 0.97, "grad_norm": 1.9218157529830933, "learning_rate": 5.242352044673937e-08, "loss": 0.49, "step": 37781 }, { "epoch": 0.97, "grad_norm": 1.2752277851104736, "learning_rate": 5.233866385013886e-08, "loss": 0.5924, "step": 37782 }, { "epoch": 0.97, "grad_norm": 1.65861177444458, "learning_rate": 5.2253875806570484e-08, "loss": 0.5808, "step": 37783 }, { "epoch": 0.97, "grad_norm": 1.7499349117279053, "learning_rate": 5.216915631661934e-08, "loss": 0.4036, "step": 37784 }, { "epoch": 0.97, "grad_norm": 1.3258774280548096, "learning_rate": 5.208450538086829e-08, "loss": 0.364, "step": 37785 }, { "epoch": 0.97, "grad_norm": 1.147290587425232, "learning_rate": 5.199992299990131e-08, "loss": 0.5175, "step": 37786 }, { "epoch": 0.97, "grad_norm": 3.271962881088257, "learning_rate": 5.191540917430016e-08, "loss": 0.5527, "step": 37787 }, { "epoch": 0.97, "grad_norm": 1.4102652072906494, "learning_rate": 5.18309639046477e-08, "loss": 0.4518, "step": 37788 }, { "epoch": 0.97, "grad_norm": 2.244034767150879, "learning_rate": 5.1746587191526806e-08, "loss": 0.5687, "step": 37789 }, { "epoch": 0.97, "grad_norm": 2.437453269958496, "learning_rate": 5.1662279035518126e-08, "loss": 0.6319, "step": 37790 }, { "epoch": 0.97, "grad_norm": 1.0156714916229248, "learning_rate": 5.157803943720341e-08, "loss": 0.4629, "step": 37791 }, { "epoch": 0.97, "grad_norm": 2.6677780151367188, "learning_rate": 5.14938683971622e-08, "loss": 0.4068, "step": 37792 }, { "epoch": 0.97, "grad_norm": 1.0610431432724, "learning_rate": 5.140976591597513e-08, "loss": 0.4843, "step": 37793 }, { "epoch": 0.97, "grad_norm": 1.2893277406692505, "learning_rate": 5.132573199422064e-08, "loss": 0.5874, "step": 37794 }, { "epoch": 0.97, "grad_norm": 1.0450977087020874, "learning_rate": 5.124176663248048e-08, "loss": 0.4663, "step": 37795 }, { "epoch": 0.97, "grad_norm": 2.17006778717041, "learning_rate": 5.1157869831330864e-08, "loss": 0.5812, "step": 37796 }, { "epoch": 0.97, "grad_norm": 1.4711394309997559, "learning_rate": 5.107404159135021e-08, "loss": 0.5623, "step": 37797 }, { "epoch": 0.97, "grad_norm": 7.960158824920654, "learning_rate": 5.0990281913116945e-08, "loss": 0.584, "step": 37798 }, { "epoch": 0.97, "grad_norm": 4.212800025939941, "learning_rate": 5.09065907972095e-08, "loss": 0.6872, "step": 37799 }, { "epoch": 0.97, "grad_norm": 1.0134384632110596, "learning_rate": 5.0822968244200744e-08, "loss": 0.6439, "step": 37800 }, { "epoch": 0.97, "grad_norm": 1.4447369575500488, "learning_rate": 5.073941425467133e-08, "loss": 0.5998, "step": 37801 }, { "epoch": 0.97, "grad_norm": 1.5954800844192505, "learning_rate": 5.065592882919523e-08, "loss": 0.6385, "step": 37802 }, { "epoch": 0.97, "grad_norm": 1.3685117959976196, "learning_rate": 5.057251196834645e-08, "loss": 0.4558, "step": 37803 }, { "epoch": 0.97, "grad_norm": 1.2221437692642212, "learning_rate": 5.048916367270118e-08, "loss": 0.4451, "step": 37804 }, { "epoch": 0.97, "grad_norm": 2.309295654296875, "learning_rate": 5.040588394283452e-08, "loss": 0.7202, "step": 37805 }, { "epoch": 0.97, "grad_norm": 1.2599271535873413, "learning_rate": 5.032267277931935e-08, "loss": 0.5931, "step": 37806 }, { "epoch": 0.97, "grad_norm": 2.023751735687256, "learning_rate": 5.023953018272853e-08, "loss": 0.528, "step": 37807 }, { "epoch": 0.97, "grad_norm": 1.6823095083236694, "learning_rate": 5.015645615363718e-08, "loss": 0.4899, "step": 37808 }, { "epoch": 0.97, "grad_norm": 1.7210713624954224, "learning_rate": 5.0073450692615935e-08, "loss": 0.4512, "step": 37809 }, { "epoch": 0.97, "grad_norm": 1.2170164585113525, "learning_rate": 4.999051380023656e-08, "loss": 0.479, "step": 37810 }, { "epoch": 0.97, "grad_norm": 3.4854493141174316, "learning_rate": 4.990764547707194e-08, "loss": 0.4267, "step": 37811 }, { "epoch": 0.97, "grad_norm": 2.11757493019104, "learning_rate": 4.9824845723692726e-08, "loss": 0.6059, "step": 37812 }, { "epoch": 0.97, "grad_norm": 1.668101191520691, "learning_rate": 4.974211454066846e-08, "loss": 0.5251, "step": 37813 }, { "epoch": 0.97, "grad_norm": 2.845724582672119, "learning_rate": 4.965945192857091e-08, "loss": 0.4627, "step": 37814 }, { "epoch": 0.97, "grad_norm": 1.3874421119689941, "learning_rate": 4.957685788796962e-08, "loss": 0.4572, "step": 37815 }, { "epoch": 0.97, "grad_norm": 1.4835484027862549, "learning_rate": 4.9494332419431915e-08, "loss": 0.565, "step": 37816 }, { "epoch": 0.97, "grad_norm": 1.5683701038360596, "learning_rate": 4.941187552352844e-08, "loss": 0.5515, "step": 37817 }, { "epoch": 0.97, "grad_norm": 1.0354039669036865, "learning_rate": 4.932948720082653e-08, "loss": 0.4503, "step": 37818 }, { "epoch": 0.97, "grad_norm": 9.779228210449219, "learning_rate": 4.924716745189462e-08, "loss": 0.5106, "step": 37819 }, { "epoch": 0.97, "grad_norm": 1.6779305934906006, "learning_rate": 4.916491627729891e-08, "loss": 0.4006, "step": 37820 }, { "epoch": 0.97, "grad_norm": 1.038064956665039, "learning_rate": 4.9082733677606745e-08, "loss": 0.3493, "step": 37821 }, { "epoch": 0.97, "grad_norm": 1.6727337837219238, "learning_rate": 4.900061965338543e-08, "loss": 0.5525, "step": 37822 }, { "epoch": 0.97, "grad_norm": 1.975152850151062, "learning_rate": 4.891857420520008e-08, "loss": 0.5295, "step": 37823 }, { "epoch": 0.97, "grad_norm": 1.3255515098571777, "learning_rate": 4.8836597333614676e-08, "loss": 0.5169, "step": 37824 }, { "epoch": 0.97, "grad_norm": 5.112898349761963, "learning_rate": 4.8754689039196554e-08, "loss": 0.4366, "step": 37825 }, { "epoch": 0.97, "grad_norm": 1.5700688362121582, "learning_rate": 4.867284932250971e-08, "loss": 0.4787, "step": 37826 }, { "epoch": 0.97, "grad_norm": 2.5325162410736084, "learning_rate": 4.8591078184115905e-08, "loss": 0.4911, "step": 37827 }, { "epoch": 0.97, "grad_norm": 1.845093846321106, "learning_rate": 4.8509375624581354e-08, "loss": 0.7094, "step": 37828 }, { "epoch": 0.97, "grad_norm": 1.2686067819595337, "learning_rate": 4.842774164446784e-08, "loss": 0.4945, "step": 37829 }, { "epoch": 0.97, "grad_norm": 1.3613532781600952, "learning_rate": 4.834617624433824e-08, "loss": 0.408, "step": 37830 }, { "epoch": 0.97, "grad_norm": 2.151293992996216, "learning_rate": 4.826467942475432e-08, "loss": 0.6006, "step": 37831 }, { "epoch": 0.97, "grad_norm": 1.2405425310134888, "learning_rate": 4.818325118627787e-08, "loss": 0.5574, "step": 37832 }, { "epoch": 0.97, "grad_norm": 1.2968482971191406, "learning_rate": 4.8101891529470645e-08, "loss": 0.4814, "step": 37833 }, { "epoch": 0.97, "grad_norm": 1.1052416563034058, "learning_rate": 4.8020600454893315e-08, "loss": 0.4399, "step": 37834 }, { "epoch": 0.97, "grad_norm": 2.6610140800476074, "learning_rate": 4.793937796310433e-08, "loss": 0.608, "step": 37835 }, { "epoch": 0.97, "grad_norm": 1.2300945520401, "learning_rate": 4.7858224054665444e-08, "loss": 0.4238, "step": 37836 }, { "epoch": 0.97, "grad_norm": 0.9998540282249451, "learning_rate": 4.777713873013401e-08, "loss": 0.5277, "step": 37837 }, { "epoch": 0.97, "grad_norm": 1.2863918542861938, "learning_rate": 4.7696121990070674e-08, "loss": 0.6162, "step": 37838 }, { "epoch": 0.97, "grad_norm": 4.77451753616333, "learning_rate": 4.761517383503278e-08, "loss": 0.5634, "step": 37839 }, { "epoch": 0.97, "grad_norm": 1.5756587982177734, "learning_rate": 4.7534294265578765e-08, "loss": 0.4078, "step": 37840 }, { "epoch": 0.97, "grad_norm": 1.44343101978302, "learning_rate": 4.7453483282265954e-08, "loss": 0.6013, "step": 37841 }, { "epoch": 0.97, "grad_norm": 1.1036732196807861, "learning_rate": 4.737274088565058e-08, "loss": 0.3984, "step": 37842 }, { "epoch": 0.97, "grad_norm": 4.311278343200684, "learning_rate": 4.729206707628886e-08, "loss": 0.6033, "step": 37843 }, { "epoch": 0.97, "grad_norm": 0.8995924592018127, "learning_rate": 4.7211461854738124e-08, "loss": 0.3477, "step": 37844 }, { "epoch": 0.97, "grad_norm": 1.2713408470153809, "learning_rate": 4.713092522155238e-08, "loss": 0.5205, "step": 37845 }, { "epoch": 0.97, "grad_norm": 4.761396884918213, "learning_rate": 4.7050457177287845e-08, "loss": 0.6047, "step": 37846 }, { "epoch": 0.97, "grad_norm": 5.183210849761963, "learning_rate": 4.697005772249852e-08, "loss": 0.66, "step": 37847 }, { "epoch": 0.97, "grad_norm": 1.3111459016799927, "learning_rate": 4.68897268577384e-08, "loss": 0.5104, "step": 37848 }, { "epoch": 0.97, "grad_norm": 1.7311100959777832, "learning_rate": 4.680946458356039e-08, "loss": 0.4969, "step": 37849 }, { "epoch": 0.97, "grad_norm": 1.7140358686447144, "learning_rate": 4.67292709005196e-08, "loss": 0.6045, "step": 37850 }, { "epoch": 0.97, "grad_norm": 1.2916889190673828, "learning_rate": 4.664914580916668e-08, "loss": 0.5582, "step": 37851 }, { "epoch": 0.97, "grad_norm": 1.1404775381088257, "learning_rate": 4.656908931005455e-08, "loss": 0.3714, "step": 37852 }, { "epoch": 0.97, "grad_norm": 1.9031065702438354, "learning_rate": 4.648910140373497e-08, "loss": 0.6535, "step": 37853 }, { "epoch": 0.97, "grad_norm": 3.738605260848999, "learning_rate": 4.640918209075862e-08, "loss": 0.3857, "step": 37854 }, { "epoch": 0.97, "grad_norm": 1.5356708765029907, "learning_rate": 4.632933137167728e-08, "loss": 0.5327, "step": 37855 }, { "epoch": 0.97, "grad_norm": 1.5671757459640503, "learning_rate": 4.6249549247040507e-08, "loss": 0.4562, "step": 37856 }, { "epoch": 0.97, "grad_norm": 1.4982335567474365, "learning_rate": 4.6169835717397856e-08, "loss": 0.4945, "step": 37857 }, { "epoch": 0.97, "grad_norm": 17.827327728271484, "learning_rate": 4.6090190783300017e-08, "loss": 0.6416, "step": 37858 }, { "epoch": 0.97, "grad_norm": 1.6183770895004272, "learning_rate": 4.601061444529431e-08, "loss": 0.5936, "step": 37859 }, { "epoch": 0.97, "grad_norm": 3.161717653274536, "learning_rate": 4.593110670393031e-08, "loss": 0.6206, "step": 37860 }, { "epoch": 0.97, "grad_norm": 1.4480321407318115, "learning_rate": 4.585166755975534e-08, "loss": 0.4571, "step": 37861 }, { "epoch": 0.97, "grad_norm": 1.5051162242889404, "learning_rate": 4.577229701331565e-08, "loss": 0.5737, "step": 37862 }, { "epoch": 0.97, "grad_norm": 0.8563435673713684, "learning_rate": 4.569299506516078e-08, "loss": 0.2327, "step": 37863 }, { "epoch": 0.97, "grad_norm": 4.079192161560059, "learning_rate": 4.561376171583476e-08, "loss": 0.5195, "step": 37864 }, { "epoch": 0.97, "grad_norm": 1.487604022026062, "learning_rate": 4.553459696588491e-08, "loss": 0.5385, "step": 37865 }, { "epoch": 0.97, "grad_norm": 7.576411724090576, "learning_rate": 4.5455500815857476e-08, "loss": 0.548, "step": 37866 }, { "epoch": 0.97, "grad_norm": 1.0026907920837402, "learning_rate": 4.537647326629646e-08, "loss": 0.4733, "step": 37867 }, { "epoch": 0.97, "grad_norm": 9.354074478149414, "learning_rate": 4.5297514317746983e-08, "loss": 0.5839, "step": 37868 }, { "epoch": 0.97, "grad_norm": 1.4762941598892212, "learning_rate": 4.521862397075194e-08, "loss": 0.4881, "step": 37869 }, { "epoch": 0.97, "grad_norm": 1.9106884002685547, "learning_rate": 4.5139802225856454e-08, "loss": 0.5409, "step": 37870 }, { "epoch": 0.97, "grad_norm": 0.9670442938804626, "learning_rate": 4.5061049083603424e-08, "loss": 0.5191, "step": 37871 }, { "epoch": 0.97, "grad_norm": 12.84903621673584, "learning_rate": 4.498236454453464e-08, "loss": 0.542, "step": 37872 }, { "epoch": 0.97, "grad_norm": 1.6998342275619507, "learning_rate": 4.4903748609193e-08, "loss": 0.623, "step": 37873 }, { "epoch": 0.97, "grad_norm": 1.2726120948791504, "learning_rate": 4.48252012781214e-08, "loss": 0.4, "step": 37874 }, { "epoch": 0.97, "grad_norm": 4.644513130187988, "learning_rate": 4.474672255185941e-08, "loss": 0.7012, "step": 37875 }, { "epoch": 0.97, "grad_norm": 1.5328364372253418, "learning_rate": 4.466831243094771e-08, "loss": 0.4202, "step": 37876 }, { "epoch": 0.97, "grad_norm": 1.780890703201294, "learning_rate": 4.4589970915929206e-08, "loss": 0.5511, "step": 37877 }, { "epoch": 0.97, "grad_norm": 1.6715781688690186, "learning_rate": 4.451169800734012e-08, "loss": 0.46, "step": 37878 }, { "epoch": 0.97, "grad_norm": 7.3377366065979, "learning_rate": 4.443349370572336e-08, "loss": 0.6543, "step": 37879 }, { "epoch": 0.97, "grad_norm": 1.1362544298171997, "learning_rate": 4.435535801161517e-08, "loss": 0.4666, "step": 37880 }, { "epoch": 0.97, "grad_norm": 1.496756672859192, "learning_rate": 4.4277290925556217e-08, "loss": 0.5085, "step": 37881 }, { "epoch": 0.97, "grad_norm": 2.1947624683380127, "learning_rate": 4.419929244808274e-08, "loss": 0.4092, "step": 37882 }, { "epoch": 0.97, "grad_norm": 1.093066930770874, "learning_rate": 4.41213625797321e-08, "loss": 0.5606, "step": 37883 }, { "epoch": 0.97, "grad_norm": 1.3949521780014038, "learning_rate": 4.4043501321042736e-08, "loss": 0.4333, "step": 37884 }, { "epoch": 0.97, "grad_norm": 1.5821014642715454, "learning_rate": 4.39657086725509e-08, "loss": 0.4883, "step": 37885 }, { "epoch": 0.97, "grad_norm": 8.134370803833008, "learning_rate": 4.3887984634791714e-08, "loss": 0.5396, "step": 37886 }, { "epoch": 0.97, "grad_norm": 5.574471950531006, "learning_rate": 4.381032920830253e-08, "loss": 0.7353, "step": 37887 }, { "epoch": 0.97, "grad_norm": 1.2320280075073242, "learning_rate": 4.373274239361736e-08, "loss": 0.4914, "step": 37888 }, { "epoch": 0.97, "grad_norm": 1.5365808010101318, "learning_rate": 4.365522419127022e-08, "loss": 0.4942, "step": 37889 }, { "epoch": 0.97, "grad_norm": 1.8076013326644897, "learning_rate": 4.357777460179624e-08, "loss": 0.5556, "step": 37890 }, { "epoch": 0.97, "grad_norm": 1.354581594467163, "learning_rate": 4.3500393625729445e-08, "loss": 0.445, "step": 37891 }, { "epoch": 0.97, "grad_norm": 1.423745036125183, "learning_rate": 4.342308126360273e-08, "loss": 0.6195, "step": 37892 }, { "epoch": 0.97, "grad_norm": 1.6135026216506958, "learning_rate": 4.33458375159479e-08, "loss": 0.5637, "step": 37893 }, { "epoch": 0.97, "grad_norm": 1.4857772588729858, "learning_rate": 4.3268662383298965e-08, "loss": 0.5951, "step": 37894 }, { "epoch": 0.97, "grad_norm": 1.594012975692749, "learning_rate": 4.319155586618662e-08, "loss": 0.4324, "step": 37895 }, { "epoch": 0.97, "grad_norm": 1.2695950269699097, "learning_rate": 4.311451796514265e-08, "loss": 0.4706, "step": 37896 }, { "epoch": 0.97, "grad_norm": 0.9898406267166138, "learning_rate": 4.303754868069887e-08, "loss": 0.4905, "step": 37897 }, { "epoch": 0.97, "grad_norm": 2.0301318168640137, "learning_rate": 4.296064801338373e-08, "loss": 0.4452, "step": 37898 }, { "epoch": 0.97, "grad_norm": 0.9439999461174011, "learning_rate": 4.288381596372793e-08, "loss": 0.6442, "step": 37899 }, { "epoch": 0.97, "grad_norm": 2.778115749359131, "learning_rate": 4.280705253226214e-08, "loss": 0.5737, "step": 37900 }, { "epoch": 0.97, "grad_norm": 2.184662103652954, "learning_rate": 4.273035771951373e-08, "loss": 0.5253, "step": 37901 }, { "epoch": 0.97, "grad_norm": 1.7588869333267212, "learning_rate": 4.265373152601227e-08, "loss": 0.5463, "step": 37902 }, { "epoch": 0.97, "grad_norm": 1.572852611541748, "learning_rate": 4.257717395228511e-08, "loss": 0.5291, "step": 37903 }, { "epoch": 0.97, "grad_norm": 1.0967068672180176, "learning_rate": 4.250068499885962e-08, "loss": 0.5266, "step": 37904 }, { "epoch": 0.97, "grad_norm": 1.2418032884597778, "learning_rate": 4.242426466626426e-08, "loss": 0.5711, "step": 37905 }, { "epoch": 0.97, "grad_norm": 1.7144412994384766, "learning_rate": 4.2347912955025275e-08, "loss": 0.5984, "step": 37906 }, { "epoch": 0.97, "grad_norm": 1.8559463024139404, "learning_rate": 4.22716298656678e-08, "loss": 0.4654, "step": 37907 }, { "epoch": 0.97, "grad_norm": 2.1357531547546387, "learning_rate": 4.2195415398719185e-08, "loss": 0.5647, "step": 37908 }, { "epoch": 0.97, "grad_norm": 16.95083236694336, "learning_rate": 4.211926955470236e-08, "loss": 0.5011, "step": 37909 }, { "epoch": 0.97, "grad_norm": 1.9824036359786987, "learning_rate": 4.2043192334143555e-08, "loss": 0.4676, "step": 37910 }, { "epoch": 0.97, "grad_norm": 1.382792592048645, "learning_rate": 4.196718373756792e-08, "loss": 0.497, "step": 37911 }, { "epoch": 0.97, "grad_norm": 1.7011497020721436, "learning_rate": 4.189124376549725e-08, "loss": 0.4361, "step": 37912 }, { "epoch": 0.97, "grad_norm": 0.8376826047897339, "learning_rate": 4.181537241845557e-08, "loss": 0.4265, "step": 37913 }, { "epoch": 0.97, "grad_norm": 1.5874686241149902, "learning_rate": 4.1739569696966906e-08, "loss": 0.5515, "step": 37914 }, { "epoch": 0.97, "grad_norm": 1.1196892261505127, "learning_rate": 4.166383560155307e-08, "loss": 0.4372, "step": 37915 }, { "epoch": 0.97, "grad_norm": 1.8763558864593506, "learning_rate": 4.158817013273475e-08, "loss": 0.5127, "step": 37916 }, { "epoch": 0.97, "grad_norm": 1.220409870147705, "learning_rate": 4.151257329103375e-08, "loss": 0.4651, "step": 37917 }, { "epoch": 0.97, "grad_norm": 2.503018617630005, "learning_rate": 4.1437045076972994e-08, "loss": 0.5623, "step": 37918 }, { "epoch": 0.97, "grad_norm": 1.5009509325027466, "learning_rate": 4.1361585491070945e-08, "loss": 0.4524, "step": 37919 }, { "epoch": 0.97, "grad_norm": 1.0121865272521973, "learning_rate": 4.12861945338483e-08, "loss": 0.5716, "step": 37920 }, { "epoch": 0.97, "grad_norm": 3.839337110519409, "learning_rate": 4.1210872205823537e-08, "loss": 0.7722, "step": 37921 }, { "epoch": 0.97, "grad_norm": 2.394197463989258, "learning_rate": 4.113561850751846e-08, "loss": 0.4309, "step": 37922 }, { "epoch": 0.97, "grad_norm": 1.4985127449035645, "learning_rate": 4.106043343944821e-08, "loss": 0.5474, "step": 37923 }, { "epoch": 0.97, "grad_norm": 8.52540397644043, "learning_rate": 4.098531700213348e-08, "loss": 0.2984, "step": 37924 }, { "epoch": 0.97, "grad_norm": 2.0841217041015625, "learning_rate": 4.0910269196091645e-08, "loss": 0.5296, "step": 37925 }, { "epoch": 0.97, "grad_norm": 1.7259607315063477, "learning_rate": 4.0835290021838946e-08, "loss": 0.6096, "step": 37926 }, { "epoch": 0.97, "grad_norm": 1.7442582845687866, "learning_rate": 4.0760379479891646e-08, "loss": 0.6223, "step": 37927 }, { "epoch": 0.97, "grad_norm": 2.054270029067993, "learning_rate": 4.0685537570767096e-08, "loss": 0.51, "step": 37928 }, { "epoch": 0.97, "grad_norm": 1.3087035417556763, "learning_rate": 4.061076429498156e-08, "loss": 0.5197, "step": 37929 }, { "epoch": 0.97, "grad_norm": 1.2851167917251587, "learning_rate": 4.053605965304908e-08, "loss": 0.6416, "step": 37930 }, { "epoch": 0.97, "grad_norm": 2.177855968475342, "learning_rate": 4.046142364548478e-08, "loss": 0.6369, "step": 37931 }, { "epoch": 0.97, "grad_norm": 1.853100299835205, "learning_rate": 4.038685627280381e-08, "loss": 0.5516, "step": 37932 }, { "epoch": 0.97, "grad_norm": 1.6501436233520508, "learning_rate": 4.031235753551799e-08, "loss": 0.6068, "step": 37933 }, { "epoch": 0.97, "grad_norm": 1.1370083093643188, "learning_rate": 4.0237927434143567e-08, "loss": 0.4546, "step": 37934 }, { "epoch": 0.97, "grad_norm": 4.848893642425537, "learning_rate": 4.0163565969191243e-08, "loss": 0.6533, "step": 37935 }, { "epoch": 0.97, "grad_norm": 1.5685060024261475, "learning_rate": 4.008927314117394e-08, "loss": 0.6069, "step": 37936 }, { "epoch": 0.97, "grad_norm": 1.7611953020095825, "learning_rate": 4.0015048950604596e-08, "loss": 0.6014, "step": 37937 }, { "epoch": 0.97, "grad_norm": 1.3548405170440674, "learning_rate": 3.99408933979939e-08, "loss": 0.4949, "step": 37938 }, { "epoch": 0.97, "grad_norm": 2.2737746238708496, "learning_rate": 3.986680648385255e-08, "loss": 0.5267, "step": 37939 }, { "epoch": 0.97, "grad_norm": 1.6468229293823242, "learning_rate": 3.979278820869237e-08, "loss": 0.727, "step": 37940 }, { "epoch": 0.97, "grad_norm": 12.311640739440918, "learning_rate": 3.971883857302183e-08, "loss": 0.6615, "step": 37941 }, { "epoch": 0.97, "grad_norm": 1.8400191068649292, "learning_rate": 3.964495757735276e-08, "loss": 0.6028, "step": 37942 }, { "epoch": 0.97, "grad_norm": 1.4603585004806519, "learning_rate": 3.957114522219141e-08, "loss": 0.5197, "step": 37943 }, { "epoch": 0.97, "grad_norm": 1.2704213857650757, "learning_rate": 3.949740150804848e-08, "loss": 0.5346, "step": 37944 }, { "epoch": 0.97, "grad_norm": 1.1851061582565308, "learning_rate": 3.942372643543246e-08, "loss": 0.5506, "step": 37945 }, { "epoch": 0.97, "grad_norm": 4.1706624031066895, "learning_rate": 3.935012000484961e-08, "loss": 0.5152, "step": 37946 }, { "epoch": 0.97, "grad_norm": 2.045468807220459, "learning_rate": 3.92765822168073e-08, "loss": 0.6102, "step": 37947 }, { "epoch": 0.97, "grad_norm": 2.524381160736084, "learning_rate": 3.920311307181401e-08, "loss": 0.4964, "step": 37948 }, { "epoch": 0.97, "grad_norm": 1.258965253829956, "learning_rate": 3.912971257037379e-08, "loss": 0.4073, "step": 37949 }, { "epoch": 0.97, "grad_norm": 1.7008203268051147, "learning_rate": 3.9056380712994005e-08, "loss": 0.5279, "step": 37950 }, { "epoch": 0.97, "grad_norm": 1.7847692966461182, "learning_rate": 3.8983117500179804e-08, "loss": 0.5661, "step": 37951 }, { "epoch": 0.97, "grad_norm": 1.7848546504974365, "learning_rate": 3.890992293243634e-08, "loss": 0.4083, "step": 37952 }, { "epoch": 0.97, "grad_norm": 1.9682449102401733, "learning_rate": 3.8836797010266546e-08, "loss": 0.526, "step": 37953 }, { "epoch": 0.97, "grad_norm": 1.460525631904602, "learning_rate": 3.8763739734175576e-08, "loss": 0.5702, "step": 37954 }, { "epoch": 0.97, "grad_norm": 1.6162564754486084, "learning_rate": 3.8690751104666355e-08, "loss": 0.661, "step": 37955 }, { "epoch": 0.97, "grad_norm": 1.2597817182540894, "learning_rate": 3.861783112224293e-08, "loss": 0.5258, "step": 37956 }, { "epoch": 0.97, "grad_norm": 1.1666226387023926, "learning_rate": 3.854497978740601e-08, "loss": 0.5534, "step": 37957 }, { "epoch": 0.97, "grad_norm": 1.5115644931793213, "learning_rate": 3.847219710065964e-08, "loss": 0.4393, "step": 37958 }, { "epoch": 0.97, "grad_norm": 1.2294095754623413, "learning_rate": 3.839948306250452e-08, "loss": 0.3917, "step": 37959 }, { "epoch": 0.97, "grad_norm": 1.2913981676101685, "learning_rate": 3.8326837673440255e-08, "loss": 0.5346, "step": 37960 }, { "epoch": 0.97, "grad_norm": 1.5942732095718384, "learning_rate": 3.825426093396978e-08, "loss": 0.5668, "step": 37961 }, { "epoch": 0.97, "grad_norm": 1.13685142993927, "learning_rate": 3.818175284459269e-08, "loss": 0.457, "step": 37962 }, { "epoch": 0.97, "grad_norm": 2.8558878898620605, "learning_rate": 3.810931340580859e-08, "loss": 0.496, "step": 37963 }, { "epoch": 0.97, "grad_norm": 1.6072620153427124, "learning_rate": 3.803694261811597e-08, "loss": 0.4811, "step": 37964 }, { "epoch": 0.97, "grad_norm": 12.411211013793945, "learning_rate": 3.796464048201332e-08, "loss": 0.4758, "step": 37965 }, { "epoch": 0.97, "grad_norm": 1.5685045719146729, "learning_rate": 3.789240699800134e-08, "loss": 0.6505, "step": 37966 }, { "epoch": 0.97, "grad_norm": 1.7605830430984497, "learning_rate": 3.782024216657409e-08, "loss": 0.5432, "step": 37967 }, { "epoch": 0.97, "grad_norm": 8.283255577087402, "learning_rate": 3.7748145988232286e-08, "loss": 0.4642, "step": 37968 }, { "epoch": 0.97, "grad_norm": 1.7265574932098389, "learning_rate": 3.767611846347108e-08, "loss": 0.5173, "step": 37969 }, { "epoch": 0.97, "grad_norm": 1.2943557500839233, "learning_rate": 3.7604159592786735e-08, "loss": 0.4573, "step": 37970 }, { "epoch": 0.97, "grad_norm": 1.4341744184494019, "learning_rate": 3.7532269376675536e-08, "loss": 0.5235, "step": 37971 }, { "epoch": 0.97, "grad_norm": 1.3100526332855225, "learning_rate": 3.746044781563374e-08, "loss": 0.4291, "step": 37972 }, { "epoch": 0.97, "grad_norm": 1.6002060174942017, "learning_rate": 3.7388694910154286e-08, "loss": 0.6692, "step": 37973 }, { "epoch": 0.97, "grad_norm": 2.2540531158447266, "learning_rate": 3.7317010660733456e-08, "loss": 0.6976, "step": 37974 }, { "epoch": 0.97, "grad_norm": 1.3289496898651123, "learning_rate": 3.724539506786418e-08, "loss": 0.5512, "step": 37975 }, { "epoch": 0.97, "grad_norm": 1.4074840545654297, "learning_rate": 3.71738481320405e-08, "loss": 0.3482, "step": 37976 }, { "epoch": 0.97, "grad_norm": 1.7881025075912476, "learning_rate": 3.710236985375648e-08, "loss": 0.4482, "step": 37977 }, { "epoch": 0.97, "grad_norm": 1.1756772994995117, "learning_rate": 3.703096023350283e-08, "loss": 0.4414, "step": 37978 }, { "epoch": 0.97, "grad_norm": 2.8451783657073975, "learning_rate": 3.6959619271772496e-08, "loss": 0.7457, "step": 37979 }, { "epoch": 0.97, "grad_norm": 1.4961096048355103, "learning_rate": 3.6888346969056186e-08, "loss": 0.5191, "step": 37980 }, { "epoch": 0.97, "grad_norm": 1.2519683837890625, "learning_rate": 3.681714332584685e-08, "loss": 0.3669, "step": 37981 }, { "epoch": 0.97, "grad_norm": 1.4365730285644531, "learning_rate": 3.6746008342634084e-08, "loss": 0.4706, "step": 37982 }, { "epoch": 0.97, "grad_norm": 2.503392457962036, "learning_rate": 3.667494201990862e-08, "loss": 0.5747, "step": 37983 }, { "epoch": 0.97, "grad_norm": 1.7700347900390625, "learning_rate": 3.660394435815895e-08, "loss": 0.6206, "step": 37984 }, { "epoch": 0.97, "grad_norm": 8.503168106079102, "learning_rate": 3.65330153578769e-08, "loss": 0.4148, "step": 37985 }, { "epoch": 0.97, "grad_norm": 1.8030049800872803, "learning_rate": 3.646215501954875e-08, "loss": 0.3829, "step": 37986 }, { "epoch": 0.97, "grad_norm": 7.547427177429199, "learning_rate": 3.639136334366411e-08, "loss": 0.7331, "step": 37987 }, { "epoch": 0.97, "grad_norm": 1.7680156230926514, "learning_rate": 3.632064033070925e-08, "loss": 0.4634, "step": 37988 }, { "epoch": 0.97, "grad_norm": 9.739522933959961, "learning_rate": 3.62499859811738e-08, "loss": 0.6448, "step": 37989 }, { "epoch": 0.97, "grad_norm": 1.5379741191864014, "learning_rate": 3.61794002955429e-08, "loss": 0.5687, "step": 37990 }, { "epoch": 0.97, "grad_norm": 1.0681354999542236, "learning_rate": 3.610888327430395e-08, "loss": 0.4735, "step": 37991 }, { "epoch": 0.97, "grad_norm": 1.2870416641235352, "learning_rate": 3.603843491794323e-08, "loss": 0.4382, "step": 37992 }, { "epoch": 0.97, "grad_norm": 2.249166965484619, "learning_rate": 3.59680552269448e-08, "loss": 0.5727, "step": 37993 }, { "epoch": 0.97, "grad_norm": 1.4966388940811157, "learning_rate": 3.589774420179604e-08, "loss": 0.6097, "step": 37994 }, { "epoch": 0.97, "grad_norm": 2.7393245697021484, "learning_rate": 3.5827501842977676e-08, "loss": 0.4305, "step": 37995 }, { "epoch": 0.97, "grad_norm": 1.847952961921692, "learning_rate": 3.575732815097821e-08, "loss": 0.5362, "step": 37996 }, { "epoch": 0.97, "grad_norm": 3.7564659118652344, "learning_rate": 3.568722312627726e-08, "loss": 0.7067, "step": 37997 }, { "epoch": 0.97, "grad_norm": 1.754370927810669, "learning_rate": 3.56171867693611e-08, "loss": 0.4879, "step": 37998 }, { "epoch": 0.97, "grad_norm": 4.606520175933838, "learning_rate": 3.554721908071046e-08, "loss": 0.5218, "step": 37999 }, { "epoch": 0.97, "grad_norm": 1.3651366233825684, "learning_rate": 3.547732006080829e-08, "loss": 0.5538, "step": 38000 }, { "epoch": 0.97, "grad_norm": 3.0303897857666016, "learning_rate": 3.54074897101353e-08, "loss": 0.5893, "step": 38001 }, { "epoch": 0.97, "grad_norm": 5.0499444007873535, "learning_rate": 3.5337728029174456e-08, "loss": 0.6492, "step": 38002 }, { "epoch": 0.97, "grad_norm": 2.128706693649292, "learning_rate": 3.526803501840537e-08, "loss": 0.7242, "step": 38003 }, { "epoch": 0.97, "grad_norm": 1.1405378580093384, "learning_rate": 3.5198410678308756e-08, "loss": 0.5722, "step": 38004 }, { "epoch": 0.97, "grad_norm": 1.419731855392456, "learning_rate": 3.512885500936425e-08, "loss": 0.3657, "step": 38005 }, { "epoch": 0.97, "grad_norm": 1.3696666955947876, "learning_rate": 3.505936801205034e-08, "loss": 0.4138, "step": 38006 }, { "epoch": 0.97, "grad_norm": 1.5621311664581299, "learning_rate": 3.4989949686847765e-08, "loss": 0.4804, "step": 38007 }, { "epoch": 0.97, "grad_norm": 0.8951558470726013, "learning_rate": 3.492060003423281e-08, "loss": 0.4078, "step": 38008 }, { "epoch": 0.97, "grad_norm": 11.712407112121582, "learning_rate": 3.485131905468509e-08, "loss": 0.5011, "step": 38009 }, { "epoch": 0.97, "grad_norm": 1.5730705261230469, "learning_rate": 3.4782106748680875e-08, "loss": 0.4955, "step": 38010 }, { "epoch": 0.97, "grad_norm": 1.4740275144577026, "learning_rate": 3.471296311669869e-08, "loss": 0.5941, "step": 38011 }, { "epoch": 0.97, "grad_norm": 2.5830063819885254, "learning_rate": 3.46438881592126e-08, "loss": 0.5965, "step": 38012 }, { "epoch": 0.97, "grad_norm": 1.8775324821472168, "learning_rate": 3.4574881876701105e-08, "loss": 0.5999, "step": 38013 }, { "epoch": 0.97, "grad_norm": 1.5895017385482788, "learning_rate": 3.4505944269637156e-08, "loss": 0.5998, "step": 38014 }, { "epoch": 0.97, "grad_norm": 1.3072904348373413, "learning_rate": 3.443707533849816e-08, "loss": 0.4796, "step": 38015 }, { "epoch": 0.97, "grad_norm": 1.44380784034729, "learning_rate": 3.436827508375818e-08, "loss": 0.4221, "step": 38016 }, { "epoch": 0.97, "grad_norm": 1.2348988056182861, "learning_rate": 3.429954350589126e-08, "loss": 0.4326, "step": 38017 }, { "epoch": 0.97, "grad_norm": 1.6291574239730835, "learning_rate": 3.423088060537039e-08, "loss": 0.4888, "step": 38018 }, { "epoch": 0.97, "grad_norm": 1.239694356918335, "learning_rate": 3.41622863826685e-08, "loss": 0.443, "step": 38019 }, { "epoch": 0.97, "grad_norm": 1.2307173013687134, "learning_rate": 3.409376083825966e-08, "loss": 0.5281, "step": 38020 }, { "epoch": 0.97, "grad_norm": 1.2894906997680664, "learning_rate": 3.402530397261572e-08, "loss": 0.5894, "step": 38021 }, { "epoch": 0.97, "grad_norm": 1.367996096611023, "learning_rate": 3.395691578620741e-08, "loss": 0.5409, "step": 38022 }, { "epoch": 0.97, "grad_norm": 2.5911707878112793, "learning_rate": 3.388859627950658e-08, "loss": 0.5163, "step": 38023 }, { "epoch": 0.97, "grad_norm": 1.6129214763641357, "learning_rate": 3.382034545298507e-08, "loss": 0.4469, "step": 38024 }, { "epoch": 0.97, "grad_norm": 1.1479761600494385, "learning_rate": 3.375216330711251e-08, "loss": 0.4461, "step": 38025 }, { "epoch": 0.97, "grad_norm": 1.2847782373428345, "learning_rate": 3.36840498423574e-08, "loss": 0.5449, "step": 38026 }, { "epoch": 0.97, "grad_norm": 1.7201263904571533, "learning_rate": 3.36160050591916e-08, "loss": 0.522, "step": 38027 }, { "epoch": 0.97, "grad_norm": 1.2845265865325928, "learning_rate": 3.354802895808251e-08, "loss": 0.405, "step": 38028 }, { "epoch": 0.97, "grad_norm": 3.008589744567871, "learning_rate": 3.3480121539498645e-08, "loss": 0.5006, "step": 38029 }, { "epoch": 0.97, "grad_norm": 1.2556946277618408, "learning_rate": 3.3412282803908514e-08, "loss": 0.4608, "step": 38030 }, { "epoch": 0.97, "grad_norm": 1.798417568206787, "learning_rate": 3.334451275177841e-08, "loss": 0.4237, "step": 38031 }, { "epoch": 0.97, "grad_norm": 1.3968602418899536, "learning_rate": 3.3276811383576855e-08, "loss": 0.4131, "step": 38032 }, { "epoch": 0.97, "grad_norm": 1.8301478624343872, "learning_rate": 3.320917869977014e-08, "loss": 0.503, "step": 38033 }, { "epoch": 0.97, "grad_norm": 1.2863980531692505, "learning_rate": 3.314161470082344e-08, "loss": 0.5054, "step": 38034 }, { "epoch": 0.97, "grad_norm": 1.301171064376831, "learning_rate": 3.307411938720306e-08, "loss": 0.4775, "step": 38035 }, { "epoch": 0.97, "grad_norm": 2.4446420669555664, "learning_rate": 3.300669275937418e-08, "loss": 0.5678, "step": 38036 }, { "epoch": 0.97, "grad_norm": 1.2854743003845215, "learning_rate": 3.293933481780087e-08, "loss": 0.4997, "step": 38037 }, { "epoch": 0.97, "grad_norm": 1.5307116508483887, "learning_rate": 3.2872045562948317e-08, "loss": 0.3137, "step": 38038 }, { "epoch": 0.97, "grad_norm": 1.3744806051254272, "learning_rate": 3.280482499527948e-08, "loss": 0.3974, "step": 38039 }, { "epoch": 0.97, "grad_norm": 1.1031852960586548, "learning_rate": 3.273767311525844e-08, "loss": 0.451, "step": 38040 }, { "epoch": 0.98, "grad_norm": 12.928657531738281, "learning_rate": 3.267058992334593e-08, "loss": 0.4853, "step": 38041 }, { "epoch": 0.98, "grad_norm": 1.4277546405792236, "learning_rate": 3.2603575420007136e-08, "loss": 0.4917, "step": 38042 }, { "epoch": 0.98, "grad_norm": 11.613537788391113, "learning_rate": 3.25366296057017e-08, "loss": 0.389, "step": 38043 }, { "epoch": 0.98, "grad_norm": 1.364720106124878, "learning_rate": 3.246975248089146e-08, "loss": 0.6235, "step": 38044 }, { "epoch": 0.98, "grad_norm": 0.9579476714134216, "learning_rate": 3.2402944046037165e-08, "loss": 0.3216, "step": 38045 }, { "epoch": 0.98, "grad_norm": 1.9785841703414917, "learning_rate": 3.233620430159956e-08, "loss": 0.4578, "step": 38046 }, { "epoch": 0.98, "grad_norm": 1.3001066446304321, "learning_rate": 3.226953324803939e-08, "loss": 0.5881, "step": 38047 }, { "epoch": 0.98, "grad_norm": 1.1542128324508667, "learning_rate": 3.220293088581405e-08, "loss": 0.4874, "step": 38048 }, { "epoch": 0.98, "grad_norm": 3.0726070404052734, "learning_rate": 3.213639721538431e-08, "loss": 0.547, "step": 38049 }, { "epoch": 0.98, "grad_norm": 1.2232550382614136, "learning_rate": 3.2069932237208665e-08, "loss": 0.6613, "step": 38050 }, { "epoch": 0.98, "grad_norm": 1.2820944786071777, "learning_rate": 3.2003535951744544e-08, "loss": 0.4826, "step": 38051 }, { "epoch": 0.98, "grad_norm": 1.8292068243026733, "learning_rate": 3.1937208359449353e-08, "loss": 0.602, "step": 38052 }, { "epoch": 0.98, "grad_norm": 1.4882256984710693, "learning_rate": 3.18709494607794e-08, "loss": 0.4764, "step": 38053 }, { "epoch": 0.98, "grad_norm": 2.570631980895996, "learning_rate": 3.1804759256193193e-08, "loss": 0.4872, "step": 38054 }, { "epoch": 0.98, "grad_norm": 1.8759739398956299, "learning_rate": 3.173863774614594e-08, "loss": 0.5651, "step": 38055 }, { "epoch": 0.98, "grad_norm": 1.2735967636108398, "learning_rate": 3.167258493109282e-08, "loss": 0.5936, "step": 38056 }, { "epoch": 0.98, "grad_norm": 1.5169061422348022, "learning_rate": 3.160660081149014e-08, "loss": 0.5358, "step": 38057 }, { "epoch": 0.98, "grad_norm": 2.1766388416290283, "learning_rate": 3.154068538779198e-08, "loss": 0.5065, "step": 38058 }, { "epoch": 0.98, "grad_norm": 1.2209820747375488, "learning_rate": 3.1474838660452424e-08, "loss": 0.4417, "step": 38059 }, { "epoch": 0.98, "grad_norm": 1.469939947128296, "learning_rate": 3.140906062992444e-08, "loss": 0.4686, "step": 38060 }, { "epoch": 0.98, "grad_norm": 1.4975204467773438, "learning_rate": 3.134335129666322e-08, "loss": 0.5068, "step": 38061 }, { "epoch": 0.98, "grad_norm": 3.8006033897399902, "learning_rate": 3.127771066112062e-08, "loss": 0.6347, "step": 38062 }, { "epoch": 0.98, "grad_norm": 1.8352665901184082, "learning_rate": 3.121213872374962e-08, "loss": 0.7187, "step": 38063 }, { "epoch": 0.98, "grad_norm": 1.3775742053985596, "learning_rate": 3.114663548500097e-08, "loss": 0.4167, "step": 38064 }, { "epoch": 0.98, "grad_norm": 1.7461224794387817, "learning_rate": 3.108120094532652e-08, "loss": 0.5537, "step": 38065 }, { "epoch": 0.98, "grad_norm": 1.1705803871154785, "learning_rate": 3.101583510517814e-08, "loss": 0.5577, "step": 38066 }, { "epoch": 0.98, "grad_norm": 3.366778612136841, "learning_rate": 3.095053796500436e-08, "loss": 0.5708, "step": 38067 }, { "epoch": 0.98, "grad_norm": 40.53988265991211, "learning_rate": 3.088530952525703e-08, "loss": 0.469, "step": 38068 }, { "epoch": 0.98, "grad_norm": 2.008641004562378, "learning_rate": 3.082014978638581e-08, "loss": 0.3596, "step": 38069 }, { "epoch": 0.98, "grad_norm": 1.7424023151397705, "learning_rate": 3.0755058748837e-08, "loss": 0.559, "step": 38070 }, { "epoch": 0.98, "grad_norm": 1.5297499895095825, "learning_rate": 3.069003641306245e-08, "loss": 0.5974, "step": 38071 }, { "epoch": 0.98, "grad_norm": 1.6029691696166992, "learning_rate": 3.0625082779508484e-08, "loss": 0.6514, "step": 38072 }, { "epoch": 0.98, "grad_norm": 1.0304021835327148, "learning_rate": 3.056019784862252e-08, "loss": 0.3596, "step": 38073 }, { "epoch": 0.98, "grad_norm": 1.5428460836410522, "learning_rate": 3.049538162085308e-08, "loss": 0.4112, "step": 38074 }, { "epoch": 0.98, "grad_norm": 2.744757652282715, "learning_rate": 3.043063409664537e-08, "loss": 0.6633, "step": 38075 }, { "epoch": 0.98, "grad_norm": 1.1819850206375122, "learning_rate": 3.0365955276446816e-08, "loss": 0.4903, "step": 38076 }, { "epoch": 0.98, "grad_norm": 1.2022674083709717, "learning_rate": 3.030134516070149e-08, "loss": 0.4146, "step": 38077 }, { "epoch": 0.98, "grad_norm": 1.9293369054794312, "learning_rate": 3.023680374985682e-08, "loss": 0.4907, "step": 38078 }, { "epoch": 0.98, "grad_norm": 8.615938186645508, "learning_rate": 3.01723310443558e-08, "loss": 0.5563, "step": 38079 }, { "epoch": 0.98, "grad_norm": 1.2989264726638794, "learning_rate": 3.0107927044643607e-08, "loss": 0.4655, "step": 38080 }, { "epoch": 0.98, "grad_norm": 1.5292757749557495, "learning_rate": 3.0043591751164335e-08, "loss": 0.3944, "step": 38081 }, { "epoch": 0.98, "grad_norm": 1.2922019958496094, "learning_rate": 2.997932516435986e-08, "loss": 0.3806, "step": 38082 }, { "epoch": 0.98, "grad_norm": 2.19091534614563, "learning_rate": 2.991512728467427e-08, "loss": 0.5537, "step": 38083 }, { "epoch": 0.98, "grad_norm": 1.6360805034637451, "learning_rate": 2.985099811255054e-08, "loss": 0.5434, "step": 38084 }, { "epoch": 0.98, "grad_norm": 1.7702054977416992, "learning_rate": 2.978693764843055e-08, "loss": 0.5462, "step": 38085 }, { "epoch": 0.98, "grad_norm": 1.2227098941802979, "learning_rate": 2.972294589275393e-08, "loss": 0.4979, "step": 38086 }, { "epoch": 0.98, "grad_norm": 2.023702621459961, "learning_rate": 2.965902284596256e-08, "loss": 0.6317, "step": 38087 }, { "epoch": 0.98, "grad_norm": 2.2448337078094482, "learning_rate": 2.9595168508498305e-08, "loss": 0.6154, "step": 38088 }, { "epoch": 0.98, "grad_norm": 1.1211084127426147, "learning_rate": 2.95313828807986e-08, "loss": 0.3897, "step": 38089 }, { "epoch": 0.98, "grad_norm": 1.488676905632019, "learning_rate": 2.9467665963305304e-08, "loss": 0.5271, "step": 38090 }, { "epoch": 0.98, "grad_norm": 5.069465637207031, "learning_rate": 2.940401775645696e-08, "loss": 0.4931, "step": 38091 }, { "epoch": 0.98, "grad_norm": 1.0318737030029297, "learning_rate": 2.9340438260692105e-08, "loss": 0.4741, "step": 38092 }, { "epoch": 0.98, "grad_norm": 6.922301769256592, "learning_rate": 2.927692747644928e-08, "loss": 0.6524, "step": 38093 }, { "epoch": 0.98, "grad_norm": 1.2669869661331177, "learning_rate": 2.92134854041648e-08, "loss": 0.5137, "step": 38094 }, { "epoch": 0.98, "grad_norm": 1.3619766235351562, "learning_rate": 2.91501120442772e-08, "loss": 0.4201, "step": 38095 }, { "epoch": 0.98, "grad_norm": 1.556975245475769, "learning_rate": 2.9086807397221695e-08, "loss": 0.3818, "step": 38096 }, { "epoch": 0.98, "grad_norm": 1.8453091382980347, "learning_rate": 2.9023571463436817e-08, "loss": 0.5446, "step": 38097 }, { "epoch": 0.98, "grad_norm": 1.4987406730651855, "learning_rate": 2.8960404243356664e-08, "loss": 0.3562, "step": 38098 }, { "epoch": 0.98, "grad_norm": 1.1755300760269165, "learning_rate": 2.8897305737416447e-08, "loss": 0.514, "step": 38099 }, { "epoch": 0.98, "grad_norm": 1.5403982400894165, "learning_rate": 2.8834275946052482e-08, "loss": 0.4239, "step": 38100 }, { "epoch": 0.98, "grad_norm": 1.1124012470245361, "learning_rate": 2.877131486969664e-08, "loss": 0.4942, "step": 38101 }, { "epoch": 0.98, "grad_norm": 1.2081331014633179, "learning_rate": 2.870842250878636e-08, "loss": 0.4526, "step": 38102 }, { "epoch": 0.98, "grad_norm": 1.9086425304412842, "learning_rate": 2.8645598863751288e-08, "loss": 0.6143, "step": 38103 }, { "epoch": 0.98, "grad_norm": 1.5814011096954346, "learning_rate": 2.858284393502664e-08, "loss": 0.6055, "step": 38104 }, { "epoch": 0.98, "grad_norm": 1.9659624099731445, "learning_rate": 2.8520157723045393e-08, "loss": 0.5221, "step": 38105 }, { "epoch": 0.98, "grad_norm": 1.6050307750701904, "learning_rate": 2.845754022823721e-08, "loss": 0.6676, "step": 38106 }, { "epoch": 0.98, "grad_norm": 1.5554341077804565, "learning_rate": 2.839499145103619e-08, "loss": 0.4516, "step": 38107 }, { "epoch": 0.98, "grad_norm": 1.5338987112045288, "learning_rate": 2.8332511391870875e-08, "loss": 0.4945, "step": 38108 }, { "epoch": 0.98, "grad_norm": 1.6344921588897705, "learning_rate": 2.8270100051174253e-08, "loss": 0.4778, "step": 38109 }, { "epoch": 0.98, "grad_norm": 3.6805121898651123, "learning_rate": 2.820775742937487e-08, "loss": 0.5044, "step": 38110 }, { "epoch": 0.98, "grad_norm": 1.2521235942840576, "learning_rate": 2.8145483526902384e-08, "loss": 0.5999, "step": 38111 }, { "epoch": 0.98, "grad_norm": 1.5775736570358276, "learning_rate": 2.8083278344186448e-08, "loss": 0.4539, "step": 38112 }, { "epoch": 0.98, "grad_norm": 1.926456093788147, "learning_rate": 2.8021141881655613e-08, "loss": 0.5206, "step": 38113 }, { "epoch": 0.98, "grad_norm": 2.6414332389831543, "learning_rate": 2.7959074139737308e-08, "loss": 0.6795, "step": 38114 }, { "epoch": 0.98, "grad_norm": 1.2220685482025146, "learning_rate": 2.7897075118861194e-08, "loss": 0.389, "step": 38115 }, { "epoch": 0.98, "grad_norm": 2.0946528911590576, "learning_rate": 2.7835144819452486e-08, "loss": 0.5591, "step": 38116 }, { "epoch": 0.98, "grad_norm": 1.4762200117111206, "learning_rate": 2.777328324193973e-08, "loss": 0.5141, "step": 38117 }, { "epoch": 0.98, "grad_norm": 1.1505507230758667, "learning_rate": 2.771149038674703e-08, "loss": 0.4647, "step": 38118 }, { "epoch": 0.98, "grad_norm": 2.203779697418213, "learning_rate": 2.7649766254302936e-08, "loss": 0.5771, "step": 38119 }, { "epoch": 0.98, "grad_norm": 4.323018550872803, "learning_rate": 2.7588110845030436e-08, "loss": 0.5232, "step": 38120 }, { "epoch": 0.98, "grad_norm": 1.2655287981033325, "learning_rate": 2.752652415935586e-08, "loss": 0.5987, "step": 38121 }, { "epoch": 0.98, "grad_norm": 0.9815625548362732, "learning_rate": 2.7465006197702205e-08, "loss": 0.5114, "step": 38122 }, { "epoch": 0.98, "grad_norm": 1.4942853450775146, "learning_rate": 2.7403556960495792e-08, "loss": 0.5683, "step": 38123 }, { "epoch": 0.98, "grad_norm": 1.089569091796875, "learning_rate": 2.734217644815851e-08, "loss": 0.4347, "step": 38124 }, { "epoch": 0.98, "grad_norm": 1.4239109754562378, "learning_rate": 2.7280864661112237e-08, "loss": 0.5733, "step": 38125 }, { "epoch": 0.98, "grad_norm": 2.1722795963287354, "learning_rate": 2.7219621599782197e-08, "loss": 0.4764, "step": 38126 }, { "epoch": 0.98, "grad_norm": 1.7984691858291626, "learning_rate": 2.715844726458805e-08, "loss": 0.4877, "step": 38127 }, { "epoch": 0.98, "grad_norm": 1.7606303691864014, "learning_rate": 2.709734165595279e-08, "loss": 0.4543, "step": 38128 }, { "epoch": 0.98, "grad_norm": 1.02919340133667, "learning_rate": 2.7036304774297195e-08, "loss": 0.511, "step": 38129 }, { "epoch": 0.98, "grad_norm": 4.542165279388428, "learning_rate": 2.6975336620040925e-08, "loss": 0.5651, "step": 38130 }, { "epoch": 0.98, "grad_norm": 1.3507349491119385, "learning_rate": 2.691443719360587e-08, "loss": 0.5161, "step": 38131 }, { "epoch": 0.98, "grad_norm": 1.388901710510254, "learning_rate": 2.685360649541058e-08, "loss": 0.3331, "step": 38132 }, { "epoch": 0.98, "grad_norm": 1.1516406536102295, "learning_rate": 2.6792844525874718e-08, "loss": 0.4414, "step": 38133 }, { "epoch": 0.98, "grad_norm": 1.5012892484664917, "learning_rate": 2.673215128541573e-08, "loss": 0.4174, "step": 38134 }, { "epoch": 0.98, "grad_norm": 1.4778820276260376, "learning_rate": 2.6671526774454393e-08, "loss": 0.5845, "step": 38135 }, { "epoch": 0.98, "grad_norm": 1.375449299812317, "learning_rate": 2.6610970993405928e-08, "loss": 0.6193, "step": 38136 }, { "epoch": 0.98, "grad_norm": 1.6518231630325317, "learning_rate": 2.655048394268889e-08, "loss": 0.647, "step": 38137 }, { "epoch": 0.98, "grad_norm": 1.672766089439392, "learning_rate": 2.6490065622720718e-08, "loss": 0.589, "step": 38138 }, { "epoch": 0.98, "grad_norm": 1.1516603231430054, "learning_rate": 2.6429716033916643e-08, "loss": 0.5164, "step": 38139 }, { "epoch": 0.98, "grad_norm": 1.248770833015442, "learning_rate": 2.636943517669299e-08, "loss": 0.4575, "step": 38140 }, { "epoch": 0.98, "grad_norm": 1.7020331621170044, "learning_rate": 2.6309223051464993e-08, "loss": 0.4608, "step": 38141 }, { "epoch": 0.98, "grad_norm": 1.2311593294143677, "learning_rate": 2.6249079658647868e-08, "loss": 0.524, "step": 38142 }, { "epoch": 0.98, "grad_norm": 2.1084518432617188, "learning_rate": 2.618900499865573e-08, "loss": 0.5402, "step": 38143 }, { "epoch": 0.98, "grad_norm": 1.3466248512268066, "learning_rate": 2.6128999071902693e-08, "loss": 0.4225, "step": 38144 }, { "epoch": 0.98, "grad_norm": 1.3817006349563599, "learning_rate": 2.606906187880287e-08, "loss": 0.5347, "step": 38145 }, { "epoch": 0.98, "grad_norm": 1.5983935594558716, "learning_rate": 2.6009193419768154e-08, "loss": 0.4038, "step": 38146 }, { "epoch": 0.98, "grad_norm": 1.592910885810852, "learning_rate": 2.5949393695212654e-08, "loss": 0.4406, "step": 38147 }, { "epoch": 0.98, "grad_norm": 1.115784764289856, "learning_rate": 2.5889662705547158e-08, "loss": 0.4692, "step": 38148 }, { "epoch": 0.98, "grad_norm": 5.075392246246338, "learning_rate": 2.5830000451183558e-08, "loss": 0.4442, "step": 38149 }, { "epoch": 0.98, "grad_norm": 1.5299521684646606, "learning_rate": 2.5770406932533742e-08, "loss": 0.5132, "step": 38150 }, { "epoch": 0.98, "grad_norm": 2.961085557937622, "learning_rate": 2.5710882150007387e-08, "loss": 0.6564, "step": 38151 }, { "epoch": 0.98, "grad_norm": 1.9278045892715454, "learning_rate": 2.5651426104015275e-08, "loss": 0.6174, "step": 38152 }, { "epoch": 0.98, "grad_norm": 1.2947570085525513, "learning_rate": 2.5592038794967077e-08, "loss": 0.6376, "step": 38153 }, { "epoch": 0.98, "grad_norm": 10.784594535827637, "learning_rate": 2.5532720223272466e-08, "loss": 0.6004, "step": 38154 }, { "epoch": 0.98, "grad_norm": 1.8970075845718384, "learning_rate": 2.5473470389338896e-08, "loss": 0.604, "step": 38155 }, { "epoch": 0.98, "grad_norm": 0.928939938545227, "learning_rate": 2.5414289293576032e-08, "loss": 0.5424, "step": 38156 }, { "epoch": 0.98, "grad_norm": 1.4112803936004639, "learning_rate": 2.5355176936391334e-08, "loss": 0.5748, "step": 38157 }, { "epoch": 0.98, "grad_norm": 1.169414758682251, "learning_rate": 2.529613331819225e-08, "loss": 0.4873, "step": 38158 }, { "epoch": 0.98, "grad_norm": 1.8295021057128906, "learning_rate": 2.5237158439385123e-08, "loss": 0.6767, "step": 38159 }, { "epoch": 0.98, "grad_norm": 1.3188389539718628, "learning_rate": 2.5178252300377404e-08, "loss": 0.4072, "step": 38160 }, { "epoch": 0.98, "grad_norm": 1.0391385555267334, "learning_rate": 2.5119414901573215e-08, "loss": 0.4144, "step": 38161 }, { "epoch": 0.98, "grad_norm": 1.853267788887024, "learning_rate": 2.5060646243380003e-08, "loss": 0.6147, "step": 38162 }, { "epoch": 0.98, "grad_norm": 4.473154544830322, "learning_rate": 2.5001946326201898e-08, "loss": 0.4354, "step": 38163 }, { "epoch": 0.98, "grad_norm": 10.248289108276367, "learning_rate": 2.4943315150443016e-08, "loss": 0.4305, "step": 38164 }, { "epoch": 0.98, "grad_norm": 1.34834885597229, "learning_rate": 2.4884752716508586e-08, "loss": 0.4761, "step": 38165 }, { "epoch": 0.98, "grad_norm": 1.3875938653945923, "learning_rate": 2.4826259024800516e-08, "loss": 0.5099, "step": 38166 }, { "epoch": 0.98, "grad_norm": 1.5478217601776123, "learning_rate": 2.4767834075724028e-08, "loss": 0.6219, "step": 38167 }, { "epoch": 0.98, "grad_norm": 4.095362186431885, "learning_rate": 2.470947786967992e-08, "loss": 0.6051, "step": 38168 }, { "epoch": 0.98, "grad_norm": 1.9348222017288208, "learning_rate": 2.4651190407070093e-08, "loss": 0.5774, "step": 38169 }, { "epoch": 0.98, "grad_norm": 1.1655665636062622, "learning_rate": 2.459297168829866e-08, "loss": 0.4906, "step": 38170 }, { "epoch": 0.98, "grad_norm": 2.7537994384765625, "learning_rate": 2.45348217137642e-08, "loss": 0.7673, "step": 38171 }, { "epoch": 0.98, "grad_norm": 1.1242884397506714, "learning_rate": 2.447674048386972e-08, "loss": 0.5821, "step": 38172 }, { "epoch": 0.98, "grad_norm": 1.4142179489135742, "learning_rate": 2.441872799901379e-08, "loss": 0.5572, "step": 38173 }, { "epoch": 0.98, "grad_norm": 1.128566861152649, "learning_rate": 2.4360784259596093e-08, "loss": 0.4962, "step": 38174 }, { "epoch": 0.98, "grad_norm": 1.5406227111816406, "learning_rate": 2.4302909266016305e-08, "loss": 0.5049, "step": 38175 }, { "epoch": 0.98, "grad_norm": 1.5552881956100464, "learning_rate": 2.424510301867522e-08, "loss": 0.5494, "step": 38176 }, { "epoch": 0.98, "grad_norm": 1.8183459043502808, "learning_rate": 2.4187365517968076e-08, "loss": 0.628, "step": 38177 }, { "epoch": 0.98, "grad_norm": 1.2590340375900269, "learning_rate": 2.412969676429455e-08, "loss": 0.5209, "step": 38178 }, { "epoch": 0.98, "grad_norm": 1.1572279930114746, "learning_rate": 2.407209675805211e-08, "loss": 0.5563, "step": 38179 }, { "epoch": 0.98, "grad_norm": 2.3981974124908447, "learning_rate": 2.40145654996371e-08, "loss": 0.5659, "step": 38180 }, { "epoch": 0.98, "grad_norm": 1.7385573387145996, "learning_rate": 2.3957102989445867e-08, "loss": 0.517, "step": 38181 }, { "epoch": 0.98, "grad_norm": 1.457047939300537, "learning_rate": 2.3899709227874768e-08, "loss": 0.6003, "step": 38182 }, { "epoch": 0.98, "grad_norm": 1.2372366189956665, "learning_rate": 2.3842384215319037e-08, "loss": 0.4251, "step": 38183 }, { "epoch": 0.98, "grad_norm": 1.520729660987854, "learning_rate": 2.3785127952173915e-08, "loss": 0.5669, "step": 38184 }, { "epoch": 0.98, "grad_norm": 1.3889201879501343, "learning_rate": 2.372794043883464e-08, "loss": 0.4634, "step": 38185 }, { "epoch": 0.98, "grad_norm": 1.2356984615325928, "learning_rate": 2.3670821675694232e-08, "loss": 0.429, "step": 38186 }, { "epoch": 0.98, "grad_norm": 1.9489620923995972, "learning_rate": 2.361377166314682e-08, "loss": 0.392, "step": 38187 }, { "epoch": 0.98, "grad_norm": 1.1548259258270264, "learning_rate": 2.355679040158654e-08, "loss": 0.2991, "step": 38188 }, { "epoch": 0.98, "grad_norm": 1.0268659591674805, "learning_rate": 2.3499877891403067e-08, "loss": 0.4867, "step": 38189 }, { "epoch": 0.98, "grad_norm": 1.4983981847763062, "learning_rate": 2.344303413299276e-08, "loss": 0.4866, "step": 38190 }, { "epoch": 0.98, "grad_norm": 3.94952392578125, "learning_rate": 2.3386259126744192e-08, "loss": 0.5479, "step": 38191 }, { "epoch": 0.98, "grad_norm": 1.4136236906051636, "learning_rate": 2.3329552873049277e-08, "loss": 0.5554, "step": 38192 }, { "epoch": 0.98, "grad_norm": 3.231483221054077, "learning_rate": 2.327291537229881e-08, "loss": 0.6408, "step": 38193 }, { "epoch": 0.98, "grad_norm": 1.8958579301834106, "learning_rate": 2.3216346624884702e-08, "loss": 0.3363, "step": 38194 }, { "epoch": 0.98, "grad_norm": 1.1752960681915283, "learning_rate": 2.3159846631194415e-08, "loss": 0.5325, "step": 38195 }, { "epoch": 0.98, "grad_norm": 1.466416597366333, "learning_rate": 2.3103415391618756e-08, "loss": 0.3594, "step": 38196 }, { "epoch": 0.98, "grad_norm": 7.991842746734619, "learning_rate": 2.3047052906546297e-08, "loss": 0.511, "step": 38197 }, { "epoch": 0.98, "grad_norm": 1.5298372507095337, "learning_rate": 2.2990759176365617e-08, "loss": 0.466, "step": 38198 }, { "epoch": 0.98, "grad_norm": 1.260029911994934, "learning_rate": 2.2934534201463078e-08, "loss": 0.472, "step": 38199 }, { "epoch": 0.98, "grad_norm": 1.689620018005371, "learning_rate": 2.287837798222947e-08, "loss": 0.5756, "step": 38200 }, { "epoch": 0.98, "grad_norm": 2.0400350093841553, "learning_rate": 2.282229051904894e-08, "loss": 0.5344, "step": 38201 }, { "epoch": 0.98, "grad_norm": 1.2187960147857666, "learning_rate": 2.2766271812308948e-08, "loss": 0.4751, "step": 38202 }, { "epoch": 0.98, "grad_norm": 1.5780963897705078, "learning_rate": 2.2710321862395858e-08, "loss": 0.4877, "step": 38203 }, { "epoch": 0.98, "grad_norm": 1.3108134269714355, "learning_rate": 2.2654440669693802e-08, "loss": 0.4951, "step": 38204 }, { "epoch": 0.98, "grad_norm": 7.622490406036377, "learning_rate": 2.2598628234590248e-08, "loss": 0.5762, "step": 38205 }, { "epoch": 0.98, "grad_norm": 1.6668730974197388, "learning_rate": 2.2542884557468224e-08, "loss": 0.387, "step": 38206 }, { "epoch": 0.98, "grad_norm": 1.0726431608200073, "learning_rate": 2.248720963871187e-08, "loss": 0.4447, "step": 38207 }, { "epoch": 0.98, "grad_norm": 3.6296257972717285, "learning_rate": 2.243160347870532e-08, "loss": 0.5865, "step": 38208 }, { "epoch": 0.98, "grad_norm": 1.1925327777862549, "learning_rate": 2.2376066077831603e-08, "loss": 0.5935, "step": 38209 }, { "epoch": 0.98, "grad_norm": 0.9810964465141296, "learning_rate": 2.2320597436472636e-08, "loss": 0.4143, "step": 38210 }, { "epoch": 0.98, "grad_norm": 1.4948475360870361, "learning_rate": 2.2265197555012553e-08, "loss": 0.4416, "step": 38211 }, { "epoch": 0.98, "grad_norm": 2.0954368114471436, "learning_rate": 2.2209866433832162e-08, "loss": 0.4451, "step": 38212 }, { "epoch": 0.98, "grad_norm": 1.3013087511062622, "learning_rate": 2.2154604073312268e-08, "loss": 0.5318, "step": 38213 }, { "epoch": 0.98, "grad_norm": 2.3127658367156982, "learning_rate": 2.2099410473834793e-08, "loss": 0.5617, "step": 38214 }, { "epoch": 0.98, "grad_norm": 1.956314206123352, "learning_rate": 2.2044285635778318e-08, "loss": 0.6024, "step": 38215 }, { "epoch": 0.98, "grad_norm": 12.208487510681152, "learning_rate": 2.1989229559524762e-08, "loss": 0.4492, "step": 38216 }, { "epoch": 0.98, "grad_norm": 12.761354446411133, "learning_rate": 2.193424224545271e-08, "loss": 0.6103, "step": 38217 }, { "epoch": 0.98, "grad_norm": 1.6719282865524292, "learning_rate": 2.1879323693940746e-08, "loss": 0.4093, "step": 38218 }, { "epoch": 0.98, "grad_norm": 1.0038577318191528, "learning_rate": 2.182447390536746e-08, "loss": 0.3346, "step": 38219 }, { "epoch": 0.98, "grad_norm": 1.6235079765319824, "learning_rate": 2.1769692880111437e-08, "loss": 0.4336, "step": 38220 }, { "epoch": 0.98, "grad_norm": 1.5014631748199463, "learning_rate": 2.1714980618549042e-08, "loss": 0.5605, "step": 38221 }, { "epoch": 0.98, "grad_norm": 1.6497604846954346, "learning_rate": 2.166033712105886e-08, "loss": 0.5454, "step": 38222 }, { "epoch": 0.98, "grad_norm": 1.5226337909698486, "learning_rate": 2.160576238801615e-08, "loss": 0.5121, "step": 38223 }, { "epoch": 0.98, "grad_norm": 1.650145411491394, "learning_rate": 2.1551256419797272e-08, "loss": 0.4943, "step": 38224 }, { "epoch": 0.98, "grad_norm": 1.112143635749817, "learning_rate": 2.1496819216778597e-08, "loss": 0.2985, "step": 38225 }, { "epoch": 0.98, "grad_norm": 1.8341126441955566, "learning_rate": 2.1442450779334268e-08, "loss": 0.5442, "step": 38226 }, { "epoch": 0.98, "grad_norm": 1.6744356155395508, "learning_rate": 2.1388151107839538e-08, "loss": 0.6131, "step": 38227 }, { "epoch": 0.98, "grad_norm": 1.1160262823104858, "learning_rate": 2.1333920202668556e-08, "loss": 0.455, "step": 38228 }, { "epoch": 0.98, "grad_norm": 10.383610725402832, "learning_rate": 2.1279758064194356e-08, "loss": 0.5449, "step": 38229 }, { "epoch": 0.98, "grad_norm": 1.636521816253662, "learning_rate": 2.122566469279219e-08, "loss": 0.5565, "step": 38230 }, { "epoch": 0.98, "grad_norm": 0.81727534532547, "learning_rate": 2.117164008883288e-08, "loss": 0.4108, "step": 38231 }, { "epoch": 0.98, "grad_norm": 1.8358228206634521, "learning_rate": 2.111768425268834e-08, "loss": 0.4026, "step": 38232 }, { "epoch": 0.98, "grad_norm": 1.248133659362793, "learning_rate": 2.1063797184732725e-08, "loss": 0.4008, "step": 38233 }, { "epoch": 0.98, "grad_norm": 1.8980128765106201, "learning_rate": 2.1009978885335735e-08, "loss": 0.531, "step": 38234 }, { "epoch": 0.98, "grad_norm": 1.025046944618225, "learning_rate": 2.0956229354868185e-08, "loss": 0.5139, "step": 38235 }, { "epoch": 0.98, "grad_norm": 21.020843505859375, "learning_rate": 2.0902548593700888e-08, "loss": 0.6906, "step": 38236 }, { "epoch": 0.98, "grad_norm": 5.233006477355957, "learning_rate": 2.084893660220355e-08, "loss": 0.543, "step": 38237 }, { "epoch": 0.98, "grad_norm": 6.884825706481934, "learning_rate": 2.0795393380745877e-08, "loss": 0.6834, "step": 38238 }, { "epoch": 0.98, "grad_norm": 4.369068145751953, "learning_rate": 2.0741918929697568e-08, "loss": 0.5213, "step": 38239 }, { "epoch": 0.98, "grad_norm": 1.3917651176452637, "learning_rate": 2.0688513249425002e-08, "loss": 0.4239, "step": 38240 }, { "epoch": 0.98, "grad_norm": 4.4223713874816895, "learning_rate": 2.063517634029899e-08, "loss": 0.5231, "step": 38241 }, { "epoch": 0.98, "grad_norm": 1.205003261566162, "learning_rate": 2.0581908202684796e-08, "loss": 0.4778, "step": 38242 }, { "epoch": 0.98, "grad_norm": 2.9278252124786377, "learning_rate": 2.0528708836951016e-08, "loss": 0.5911, "step": 38243 }, { "epoch": 0.98, "grad_norm": 1.3567326068878174, "learning_rate": 2.0475578243462914e-08, "loss": 0.5966, "step": 38244 }, { "epoch": 0.98, "grad_norm": 2.332270383834839, "learning_rate": 2.0422516422587968e-08, "loss": 0.495, "step": 38245 }, { "epoch": 0.98, "grad_norm": 1.6227682828903198, "learning_rate": 2.0369523374691446e-08, "loss": 0.5258, "step": 38246 }, { "epoch": 0.98, "grad_norm": 2.283473253250122, "learning_rate": 2.0316599100137503e-08, "loss": 0.6433, "step": 38247 }, { "epoch": 0.98, "grad_norm": 1.99821138381958, "learning_rate": 2.0263743599292505e-08, "loss": 0.4799, "step": 38248 }, { "epoch": 0.98, "grad_norm": 1.4807672500610352, "learning_rate": 2.0210956872519504e-08, "loss": 0.5923, "step": 38249 }, { "epoch": 0.98, "grad_norm": 1.3222907781600952, "learning_rate": 2.015823892018265e-08, "loss": 0.5121, "step": 38250 }, { "epoch": 0.98, "grad_norm": 1.4659299850463867, "learning_rate": 2.0105589742646092e-08, "loss": 0.6216, "step": 38251 }, { "epoch": 0.98, "grad_norm": 1.263979434967041, "learning_rate": 2.005300934027066e-08, "loss": 0.5248, "step": 38252 }, { "epoch": 0.98, "grad_norm": 8.519166946411133, "learning_rate": 2.0000497713420498e-08, "loss": 0.4881, "step": 38253 }, { "epoch": 0.98, "grad_norm": 1.2514325380325317, "learning_rate": 1.9948054862456433e-08, "loss": 0.5258, "step": 38254 }, { "epoch": 0.98, "grad_norm": 1.2908521890640259, "learning_rate": 1.989568078774151e-08, "loss": 0.4592, "step": 38255 }, { "epoch": 0.98, "grad_norm": 2.9985568523406982, "learning_rate": 1.9843375489634332e-08, "loss": 0.6086, "step": 38256 }, { "epoch": 0.98, "grad_norm": 1.4852851629257202, "learning_rate": 1.9791138968495714e-08, "loss": 0.4993, "step": 38257 }, { "epoch": 0.98, "grad_norm": 1.638532042503357, "learning_rate": 1.97389712246876e-08, "loss": 0.6921, "step": 38258 }, { "epoch": 0.98, "grad_norm": 1.152082920074463, "learning_rate": 1.9686872258568578e-08, "loss": 0.4751, "step": 38259 }, { "epoch": 0.98, "grad_norm": 2.1691412925720215, "learning_rate": 1.9634842070496153e-08, "loss": 0.561, "step": 38260 }, { "epoch": 0.98, "grad_norm": 1.2741917371749878, "learning_rate": 1.958288066083114e-08, "loss": 0.5225, "step": 38261 }, { "epoch": 0.98, "grad_norm": 4.022562503814697, "learning_rate": 1.953098802992992e-08, "loss": 0.757, "step": 38262 }, { "epoch": 0.98, "grad_norm": 2.306692361831665, "learning_rate": 1.94791641781511e-08, "loss": 0.5423, "step": 38263 }, { "epoch": 0.98, "grad_norm": 2.058479070663452, "learning_rate": 1.9427409105851058e-08, "loss": 0.6003, "step": 38264 }, { "epoch": 0.98, "grad_norm": 1.3851890563964844, "learning_rate": 1.9375722813388397e-08, "loss": 0.5829, "step": 38265 }, { "epoch": 0.98, "grad_norm": 1.0682446956634521, "learning_rate": 1.9324105301116168e-08, "loss": 0.5039, "step": 38266 }, { "epoch": 0.98, "grad_norm": 0.8554060459136963, "learning_rate": 1.9272556569391866e-08, "loss": 0.4681, "step": 38267 }, { "epoch": 0.98, "grad_norm": 1.882444143295288, "learning_rate": 1.9221076618571864e-08, "loss": 0.6773, "step": 38268 }, { "epoch": 0.98, "grad_norm": 1.3766727447509766, "learning_rate": 1.9169665449008113e-08, "loss": 0.4285, "step": 38269 }, { "epoch": 0.98, "grad_norm": 1.3375391960144043, "learning_rate": 1.91183230610581e-08, "loss": 0.6189, "step": 38270 }, { "epoch": 0.98, "grad_norm": 7.59076452255249, "learning_rate": 1.906704945507265e-08, "loss": 0.6405, "step": 38271 }, { "epoch": 0.98, "grad_norm": 1.8732950687408447, "learning_rate": 1.9015844631407042e-08, "loss": 0.4902, "step": 38272 }, { "epoch": 0.98, "grad_norm": 2.309701919555664, "learning_rate": 1.8964708590413216e-08, "loss": 0.6397, "step": 38273 }, { "epoch": 0.98, "grad_norm": 1.7036067247390747, "learning_rate": 1.891364133244422e-08, "loss": 0.6259, "step": 38274 }, { "epoch": 0.98, "grad_norm": 1.5443625450134277, "learning_rate": 1.8862642857850888e-08, "loss": 0.6021, "step": 38275 }, { "epoch": 0.98, "grad_norm": 1.9625928401947021, "learning_rate": 1.8811713166986267e-08, "loss": 0.6765, "step": 38276 }, { "epoch": 0.98, "grad_norm": 1.2452197074890137, "learning_rate": 1.876085226020119e-08, "loss": 0.4648, "step": 38277 }, { "epoch": 0.98, "grad_norm": 0.9909485578536987, "learning_rate": 1.8710060137844265e-08, "loss": 0.4895, "step": 38278 }, { "epoch": 0.98, "grad_norm": 1.5079349279403687, "learning_rate": 1.8659336800267434e-08, "loss": 0.5683, "step": 38279 }, { "epoch": 0.98, "grad_norm": 1.356026291847229, "learning_rate": 1.860868224781931e-08, "loss": 0.398, "step": 38280 }, { "epoch": 0.98, "grad_norm": 3.3537235260009766, "learning_rate": 1.8558096480848498e-08, "loss": 0.4842, "step": 38281 }, { "epoch": 0.98, "grad_norm": 1.3766297101974487, "learning_rate": 1.850757949970583e-08, "loss": 0.5528, "step": 38282 }, { "epoch": 0.98, "grad_norm": 1.3776092529296875, "learning_rate": 1.8457131304736587e-08, "loss": 0.5651, "step": 38283 }, { "epoch": 0.98, "grad_norm": 1.8129202127456665, "learning_rate": 1.8406751896290488e-08, "loss": 0.5735, "step": 38284 }, { "epoch": 0.98, "grad_norm": 1.1754705905914307, "learning_rate": 1.835644127471392e-08, "loss": 0.4149, "step": 38285 }, { "epoch": 0.98, "grad_norm": 1.9923259019851685, "learning_rate": 1.8306199440353278e-08, "loss": 0.6005, "step": 38286 }, { "epoch": 0.98, "grad_norm": 1.6959002017974854, "learning_rate": 1.8256026393556058e-08, "loss": 0.4431, "step": 38287 }, { "epoch": 0.98, "grad_norm": 0.9099721312522888, "learning_rate": 1.8205922134666432e-08, "loss": 0.3682, "step": 38288 }, { "epoch": 0.98, "grad_norm": 2.356468677520752, "learning_rate": 1.8155886664030785e-08, "loss": 0.7223, "step": 38289 }, { "epoch": 0.98, "grad_norm": 5.248746871948242, "learning_rate": 1.810591998199329e-08, "loss": 0.4911, "step": 38290 }, { "epoch": 0.98, "grad_norm": 5.782477378845215, "learning_rate": 1.8056022088898116e-08, "loss": 0.5813, "step": 38291 }, { "epoch": 0.98, "grad_norm": 7.016438007354736, "learning_rate": 1.8006192985090544e-08, "loss": 0.518, "step": 38292 }, { "epoch": 0.98, "grad_norm": 1.8525753021240234, "learning_rate": 1.7956432670912517e-08, "loss": 0.5728, "step": 38293 }, { "epoch": 0.98, "grad_norm": 0.914762020111084, "learning_rate": 1.7906741146707095e-08, "loss": 0.3302, "step": 38294 }, { "epoch": 0.98, "grad_norm": 1.0580648183822632, "learning_rate": 1.7857118412817343e-08, "loss": 0.4185, "step": 38295 }, { "epoch": 0.98, "grad_norm": 1.4217909574508667, "learning_rate": 1.7807564469585202e-08, "loss": 0.6311, "step": 38296 }, { "epoch": 0.98, "grad_norm": 3.148864984512329, "learning_rate": 1.7758079317351516e-08, "loss": 0.5478, "step": 38297 }, { "epoch": 0.98, "grad_norm": 1.031048059463501, "learning_rate": 1.7708662956458233e-08, "loss": 0.5067, "step": 38298 }, { "epoch": 0.98, "grad_norm": 1.366965413093567, "learning_rate": 1.765931538724508e-08, "loss": 0.4981, "step": 38299 }, { "epoch": 0.98, "grad_norm": 1.600220799446106, "learning_rate": 1.7610036610051785e-08, "loss": 0.5537, "step": 38300 }, { "epoch": 0.98, "grad_norm": 1.2711011171340942, "learning_rate": 1.7560826625219184e-08, "loss": 0.4479, "step": 38301 }, { "epoch": 0.98, "grad_norm": 1.2421512603759766, "learning_rate": 1.75116854330859e-08, "loss": 0.6423, "step": 38302 }, { "epoch": 0.98, "grad_norm": 1.2587443590164185, "learning_rate": 1.746261303399055e-08, "loss": 0.5888, "step": 38303 }, { "epoch": 0.98, "grad_norm": 1.6015063524246216, "learning_rate": 1.741360942827064e-08, "loss": 0.53, "step": 38304 }, { "epoch": 0.98, "grad_norm": 1.4498876333236694, "learning_rate": 1.736467461626479e-08, "loss": 0.4947, "step": 38305 }, { "epoch": 0.98, "grad_norm": 2.989471673965454, "learning_rate": 1.731580859830939e-08, "loss": 0.5934, "step": 38306 }, { "epoch": 0.98, "grad_norm": 1.659179449081421, "learning_rate": 1.726701137474307e-08, "loss": 0.542, "step": 38307 }, { "epoch": 0.98, "grad_norm": 1.0330809354782104, "learning_rate": 1.721828294589889e-08, "loss": 0.5403, "step": 38308 }, { "epoch": 0.98, "grad_norm": 1.5422848463058472, "learning_rate": 1.7169623312115468e-08, "loss": 0.4787, "step": 38309 }, { "epoch": 0.98, "grad_norm": 1.1770391464233398, "learning_rate": 1.7121032473726984e-08, "loss": 0.3689, "step": 38310 }, { "epoch": 0.98, "grad_norm": 1.576467514038086, "learning_rate": 1.707251043106872e-08, "loss": 0.5999, "step": 38311 }, { "epoch": 0.98, "grad_norm": 3.057948589324951, "learning_rate": 1.702405718447486e-08, "loss": 0.5249, "step": 38312 }, { "epoch": 0.98, "grad_norm": 1.9263750314712524, "learning_rate": 1.697567273427958e-08, "loss": 0.653, "step": 38313 }, { "epoch": 0.98, "grad_norm": 1.5022119283676147, "learning_rate": 1.6927357080815944e-08, "loss": 0.618, "step": 38314 }, { "epoch": 0.98, "grad_norm": 1.1636848449707031, "learning_rate": 1.6879110224415907e-08, "loss": 0.4994, "step": 38315 }, { "epoch": 0.98, "grad_norm": 1.6481555700302124, "learning_rate": 1.6830932165413648e-08, "loss": 0.4819, "step": 38316 }, { "epoch": 0.98, "grad_norm": 2.4016780853271484, "learning_rate": 1.6782822904141128e-08, "loss": 0.4791, "step": 38317 }, { "epoch": 0.98, "grad_norm": 3.9130771160125732, "learning_rate": 1.673478244092919e-08, "loss": 0.7385, "step": 38318 }, { "epoch": 0.98, "grad_norm": 0.9665387272834778, "learning_rate": 1.6686810776108676e-08, "loss": 0.4206, "step": 38319 }, { "epoch": 0.98, "grad_norm": 1.46573007106781, "learning_rate": 1.6638907910010437e-08, "loss": 0.6536, "step": 38320 }, { "epoch": 0.98, "grad_norm": 10.123779296875, "learning_rate": 1.659107384296421e-08, "loss": 0.6151, "step": 38321 }, { "epoch": 0.98, "grad_norm": 1.5495896339416504, "learning_rate": 1.6543308575300844e-08, "loss": 0.4281, "step": 38322 }, { "epoch": 0.98, "grad_norm": 1.7493326663970947, "learning_rate": 1.649561210734896e-08, "loss": 0.5845, "step": 38323 }, { "epoch": 0.98, "grad_norm": 1.9997472763061523, "learning_rate": 1.644798443943718e-08, "loss": 0.7248, "step": 38324 }, { "epoch": 0.98, "grad_norm": 6.6511125564575195, "learning_rate": 1.640042557189303e-08, "loss": 0.6187, "step": 38325 }, { "epoch": 0.98, "grad_norm": 1.297724723815918, "learning_rate": 1.6352935505045132e-08, "loss": 0.5867, "step": 38326 }, { "epoch": 0.98, "grad_norm": 1.2864100933074951, "learning_rate": 1.630551423921989e-08, "loss": 0.4347, "step": 38327 }, { "epoch": 0.98, "grad_norm": 1.2902016639709473, "learning_rate": 1.6258161774744817e-08, "loss": 0.4962, "step": 38328 }, { "epoch": 0.98, "grad_norm": 1.2689180374145508, "learning_rate": 1.6210878111946326e-08, "loss": 0.3937, "step": 38329 }, { "epoch": 0.98, "grad_norm": 1.1445857286453247, "learning_rate": 1.6163663251150818e-08, "loss": 0.3821, "step": 38330 }, { "epoch": 0.98, "grad_norm": 1.839573860168457, "learning_rate": 1.611651719268137e-08, "loss": 0.5623, "step": 38331 }, { "epoch": 0.98, "grad_norm": 1.415210485458374, "learning_rate": 1.6069439936865494e-08, "loss": 0.6136, "step": 38332 }, { "epoch": 0.98, "grad_norm": 2.199223518371582, "learning_rate": 1.602243148402627e-08, "loss": 0.4282, "step": 38333 }, { "epoch": 0.98, "grad_norm": 1.0583746433258057, "learning_rate": 1.5975491834487877e-08, "loss": 0.3739, "step": 38334 }, { "epoch": 0.98, "grad_norm": 1.2251911163330078, "learning_rate": 1.5928620988573395e-08, "loss": 0.4088, "step": 38335 }, { "epoch": 0.98, "grad_norm": 1.0645238161087036, "learning_rate": 1.5881818946607008e-08, "loss": 0.5682, "step": 38336 }, { "epoch": 0.98, "grad_norm": 0.9620593786239624, "learning_rate": 1.583508570890957e-08, "loss": 0.4726, "step": 38337 }, { "epoch": 0.98, "grad_norm": 1.6522256135940552, "learning_rate": 1.5788421275805267e-08, "loss": 0.5795, "step": 38338 }, { "epoch": 0.98, "grad_norm": 1.490929365158081, "learning_rate": 1.574182564761384e-08, "loss": 0.4195, "step": 38339 }, { "epoch": 0.98, "grad_norm": 1.2395036220550537, "learning_rate": 1.5695298824656148e-08, "loss": 0.5796, "step": 38340 }, { "epoch": 0.98, "grad_norm": 1.1940253973007202, "learning_rate": 1.564884080725526e-08, "loss": 0.5639, "step": 38341 }, { "epoch": 0.98, "grad_norm": 1.219283103942871, "learning_rate": 1.560245159572871e-08, "loss": 0.4986, "step": 38342 }, { "epoch": 0.98, "grad_norm": 1.4736756086349487, "learning_rate": 1.555613119039734e-08, "loss": 0.4415, "step": 38343 }, { "epoch": 0.98, "grad_norm": 1.8057785034179688, "learning_rate": 1.5509879591580902e-08, "loss": 0.6513, "step": 38344 }, { "epoch": 0.98, "grad_norm": 7.048114776611328, "learning_rate": 1.5463696799596916e-08, "loss": 0.5624, "step": 38345 }, { "epoch": 0.98, "grad_norm": 4.907357215881348, "learning_rate": 1.541758281476402e-08, "loss": 0.6906, "step": 38346 }, { "epoch": 0.98, "grad_norm": 6.377290725708008, "learning_rate": 1.5371537637400847e-08, "loss": 0.5798, "step": 38347 }, { "epoch": 0.98, "grad_norm": 1.9001637697219849, "learning_rate": 1.5325561267823808e-08, "loss": 0.4951, "step": 38348 }, { "epoch": 0.98, "grad_norm": 12.081847190856934, "learning_rate": 1.5279653706350427e-08, "loss": 0.6133, "step": 38349 }, { "epoch": 0.98, "grad_norm": 1.6926218271255493, "learning_rate": 1.5233814953296013e-08, "loss": 0.5365, "step": 38350 }, { "epoch": 0.98, "grad_norm": 1.1834388971328735, "learning_rate": 1.518804500897808e-08, "loss": 0.4249, "step": 38351 }, { "epoch": 0.98, "grad_norm": 1.7625916004180908, "learning_rate": 1.5142343873709718e-08, "loss": 0.5988, "step": 38352 }, { "epoch": 0.98, "grad_norm": 1.921306848526001, "learning_rate": 1.509671154780845e-08, "loss": 0.5908, "step": 38353 }, { "epoch": 0.98, "grad_norm": 1.3435368537902832, "learning_rate": 1.505114803158625e-08, "loss": 0.5855, "step": 38354 }, { "epoch": 0.98, "grad_norm": 1.0424203872680664, "learning_rate": 1.5005653325359527e-08, "loss": 0.4921, "step": 38355 }, { "epoch": 0.98, "grad_norm": 1.037938117980957, "learning_rate": 1.4960227429440254e-08, "loss": 0.4875, "step": 38356 }, { "epoch": 0.98, "grad_norm": 0.9653990268707275, "learning_rate": 1.491487034414152e-08, "loss": 0.4196, "step": 38357 }, { "epoch": 0.98, "grad_norm": 8.650046348571777, "learning_rate": 1.48695820697764e-08, "loss": 0.5233, "step": 38358 }, { "epoch": 0.98, "grad_norm": 2.090850591659546, "learning_rate": 1.4824362606656872e-08, "loss": 0.4817, "step": 38359 }, { "epoch": 0.98, "grad_norm": 1.905760645866394, "learning_rate": 1.4779211955094908e-08, "loss": 0.5628, "step": 38360 }, { "epoch": 0.98, "grad_norm": 2.7174289226531982, "learning_rate": 1.4734130115401368e-08, "loss": 0.7612, "step": 38361 }, { "epoch": 0.98, "grad_norm": 1.024401068687439, "learning_rate": 1.4689117087886007e-08, "loss": 0.5127, "step": 38362 }, { "epoch": 0.98, "grad_norm": 9.119732856750488, "learning_rate": 1.4644172872860796e-08, "loss": 0.4978, "step": 38363 }, { "epoch": 0.98, "grad_norm": 1.7732632160186768, "learning_rate": 1.4599297470634376e-08, "loss": 0.5838, "step": 38364 }, { "epoch": 0.98, "grad_norm": 1.5678831338882446, "learning_rate": 1.4554490881515394e-08, "loss": 0.516, "step": 38365 }, { "epoch": 0.98, "grad_norm": 1.78528892993927, "learning_rate": 1.4509753105813595e-08, "loss": 0.6266, "step": 38366 }, { "epoch": 0.98, "grad_norm": 1.2141910791397095, "learning_rate": 1.4465084143837627e-08, "loss": 0.5988, "step": 38367 }, { "epoch": 0.98, "grad_norm": 0.881373941898346, "learning_rate": 1.4420483995895018e-08, "loss": 0.3961, "step": 38368 }, { "epoch": 0.98, "grad_norm": 1.4598881006240845, "learning_rate": 1.4375952662292192e-08, "loss": 0.6656, "step": 38369 }, { "epoch": 0.98, "grad_norm": 3.331400156021118, "learning_rate": 1.4331490143336679e-08, "loss": 0.639, "step": 38370 }, { "epoch": 0.98, "grad_norm": 1.8119875192642212, "learning_rate": 1.4287096439334902e-08, "loss": 0.59, "step": 38371 }, { "epoch": 0.98, "grad_norm": 1.2126202583312988, "learning_rate": 1.4242771550593282e-08, "loss": 0.3977, "step": 38372 }, { "epoch": 0.98, "grad_norm": 1.23770272731781, "learning_rate": 1.419851547741602e-08, "loss": 0.6094, "step": 38373 }, { "epoch": 0.98, "grad_norm": 1.3660732507705688, "learning_rate": 1.4154328220109536e-08, "loss": 0.5873, "step": 38374 }, { "epoch": 0.98, "grad_norm": 1.4369993209838867, "learning_rate": 1.4110209778978035e-08, "loss": 0.5948, "step": 38375 }, { "epoch": 0.98, "grad_norm": 1.532508134841919, "learning_rate": 1.4066160154324603e-08, "loss": 0.6128, "step": 38376 }, { "epoch": 0.98, "grad_norm": 4.666624546051025, "learning_rate": 1.4022179346453446e-08, "loss": 0.5105, "step": 38377 }, { "epoch": 0.98, "grad_norm": 1.1694436073303223, "learning_rate": 1.3978267355667652e-08, "loss": 0.5557, "step": 38378 }, { "epoch": 0.98, "grad_norm": 6.748875141143799, "learning_rate": 1.3934424182270312e-08, "loss": 0.4353, "step": 38379 }, { "epoch": 0.98, "grad_norm": 1.7771145105361938, "learning_rate": 1.3890649826562297e-08, "loss": 0.5321, "step": 38380 }, { "epoch": 0.98, "grad_norm": 1.3440792560577393, "learning_rate": 1.3846944288846697e-08, "loss": 0.4147, "step": 38381 }, { "epoch": 0.98, "grad_norm": 1.4932736158370972, "learning_rate": 1.3803307569424384e-08, "loss": 0.432, "step": 38382 }, { "epoch": 0.98, "grad_norm": 1.382722020149231, "learning_rate": 1.3759739668595117e-08, "loss": 0.4645, "step": 38383 }, { "epoch": 0.98, "grad_norm": 0.9737610816955566, "learning_rate": 1.3716240586658658e-08, "loss": 0.2891, "step": 38384 }, { "epoch": 0.98, "grad_norm": 1.0004504919052124, "learning_rate": 1.3672810323918095e-08, "loss": 0.452, "step": 38385 }, { "epoch": 0.98, "grad_norm": 1.305306315422058, "learning_rate": 1.362944888066875e-08, "loss": 0.4829, "step": 38386 }, { "epoch": 0.98, "grad_norm": 10.073637962341309, "learning_rate": 1.358615625721149e-08, "loss": 0.4989, "step": 38387 }, { "epoch": 0.98, "grad_norm": 1.1625304222106934, "learning_rate": 1.354293245384497e-08, "loss": 0.5143, "step": 38388 }, { "epoch": 0.98, "grad_norm": 1.5072137117385864, "learning_rate": 1.3499777470866726e-08, "loss": 0.5572, "step": 38389 }, { "epoch": 0.98, "grad_norm": 1.5909148454666138, "learning_rate": 1.3456691308573188e-08, "loss": 0.4285, "step": 38390 }, { "epoch": 0.98, "grad_norm": 5.890847206115723, "learning_rate": 1.3413673967261898e-08, "loss": 0.4777, "step": 38391 }, { "epoch": 0.98, "grad_norm": 3.4432451725006104, "learning_rate": 1.3370725447229283e-08, "loss": 0.5771, "step": 38392 }, { "epoch": 0.98, "grad_norm": 2.0392091274261475, "learning_rate": 1.3327845748771772e-08, "loss": 0.6399, "step": 38393 }, { "epoch": 0.98, "grad_norm": 1.440872073173523, "learning_rate": 1.3285034872184688e-08, "loss": 0.4529, "step": 38394 }, { "epoch": 0.98, "grad_norm": 1.0293288230895996, "learning_rate": 1.3242292817762237e-08, "loss": 0.5525, "step": 38395 }, { "epoch": 0.98, "grad_norm": 1.2539218664169312, "learning_rate": 1.319961958580085e-08, "loss": 0.3859, "step": 38396 }, { "epoch": 0.98, "grad_norm": 12.200758934020996, "learning_rate": 1.3157015176591404e-08, "loss": 0.3256, "step": 38397 }, { "epoch": 0.98, "grad_norm": 5.62337589263916, "learning_rate": 1.311447959043144e-08, "loss": 0.4356, "step": 38398 }, { "epoch": 0.98, "grad_norm": 1.5340721607208252, "learning_rate": 1.3072012827610725e-08, "loss": 0.568, "step": 38399 }, { "epoch": 0.98, "grad_norm": 1.8549057245254517, "learning_rate": 1.302961488842458e-08, "loss": 0.499, "step": 38400 }, { "epoch": 0.98, "grad_norm": 1.322101354598999, "learning_rate": 1.2987285773162773e-08, "loss": 0.5131, "step": 38401 }, { "epoch": 0.98, "grad_norm": 1.6818761825561523, "learning_rate": 1.2945025482118401e-08, "loss": 0.6321, "step": 38402 }, { "epoch": 0.98, "grad_norm": 1.4433563947677612, "learning_rate": 1.2902834015582344e-08, "loss": 0.4934, "step": 38403 }, { "epoch": 0.98, "grad_norm": 1.1088963747024536, "learning_rate": 1.286071137384548e-08, "loss": 0.4081, "step": 38404 }, { "epoch": 0.98, "grad_norm": 1.6811749935150146, "learning_rate": 1.2818657557198688e-08, "loss": 0.4213, "step": 38405 }, { "epoch": 0.98, "grad_norm": 1.6518056392669678, "learning_rate": 1.2776672565930625e-08, "loss": 0.6059, "step": 38406 }, { "epoch": 0.98, "grad_norm": 1.4722009897232056, "learning_rate": 1.273475640033106e-08, "loss": 0.5604, "step": 38407 }, { "epoch": 0.98, "grad_norm": 1.398055076599121, "learning_rate": 1.2692909060688652e-08, "loss": 0.5478, "step": 38408 }, { "epoch": 0.98, "grad_norm": 1.9556351900100708, "learning_rate": 1.2651130547293167e-08, "loss": 0.7148, "step": 38409 }, { "epoch": 0.98, "grad_norm": 1.3024582862854004, "learning_rate": 1.2609420860429934e-08, "loss": 0.3954, "step": 38410 }, { "epoch": 0.98, "grad_norm": 1.7106678485870361, "learning_rate": 1.2567780000388718e-08, "loss": 0.479, "step": 38411 }, { "epoch": 0.98, "grad_norm": 1.624834656715393, "learning_rate": 1.2526207967455961e-08, "loss": 0.5609, "step": 38412 }, { "epoch": 0.98, "grad_norm": 3.1116456985473633, "learning_rate": 1.2484704761918098e-08, "loss": 0.5543, "step": 38413 }, { "epoch": 0.98, "grad_norm": 1.0923992395401, "learning_rate": 1.2443270384059347e-08, "loss": 0.5064, "step": 38414 }, { "epoch": 0.98, "grad_norm": 1.9629085063934326, "learning_rate": 1.2401904834168366e-08, "loss": 0.5387, "step": 38415 }, { "epoch": 0.98, "grad_norm": 2.1633858680725098, "learning_rate": 1.236060811252826e-08, "loss": 0.4867, "step": 38416 }, { "epoch": 0.98, "grad_norm": 0.9016611576080322, "learning_rate": 1.231938021942325e-08, "loss": 0.3807, "step": 38417 }, { "epoch": 0.98, "grad_norm": 2.7732465267181396, "learning_rate": 1.2278221155138659e-08, "loss": 0.6959, "step": 38418 }, { "epoch": 0.98, "grad_norm": 1.3524425029754639, "learning_rate": 1.2237130919957596e-08, "loss": 0.5882, "step": 38419 }, { "epoch": 0.98, "grad_norm": 1.0888962745666504, "learning_rate": 1.2196109514163167e-08, "loss": 0.386, "step": 38420 }, { "epoch": 0.98, "grad_norm": 0.8640494346618652, "learning_rate": 1.2155156938038482e-08, "loss": 0.4239, "step": 38421 }, { "epoch": 0.98, "grad_norm": 1.6234463453292847, "learning_rate": 1.2114273191865534e-08, "loss": 0.615, "step": 38422 }, { "epoch": 0.98, "grad_norm": 1.461187481880188, "learning_rate": 1.207345827592632e-08, "loss": 0.4889, "step": 38423 }, { "epoch": 0.98, "grad_norm": 3.42024564743042, "learning_rate": 1.2032712190500617e-08, "loss": 0.5936, "step": 38424 }, { "epoch": 0.98, "grad_norm": 1.0733052492141724, "learning_rate": 1.1992034935871533e-08, "loss": 0.3441, "step": 38425 }, { "epoch": 0.98, "grad_norm": 1.0166196823120117, "learning_rate": 1.1951426512317733e-08, "loss": 0.4417, "step": 38426 }, { "epoch": 0.98, "grad_norm": 3.3244335651397705, "learning_rate": 1.1910886920118992e-08, "loss": 0.4856, "step": 38427 }, { "epoch": 0.98, "grad_norm": 2.709529399871826, "learning_rate": 1.1870416159556197e-08, "loss": 0.5812, "step": 38428 }, { "epoch": 0.98, "grad_norm": 3.730550527572632, "learning_rate": 1.1830014230908016e-08, "loss": 0.5516, "step": 38429 }, { "epoch": 0.98, "grad_norm": 2.9787824153900146, "learning_rate": 1.1789681134450892e-08, "loss": 0.6269, "step": 38430 }, { "epoch": 0.99, "grad_norm": 1.0330065488815308, "learning_rate": 1.1749416870464603e-08, "loss": 0.4204, "step": 38431 }, { "epoch": 0.99, "grad_norm": 2.2922890186309814, "learning_rate": 1.1709221439225593e-08, "loss": 0.6046, "step": 38432 }, { "epoch": 0.99, "grad_norm": 1.2735650539398193, "learning_rate": 1.1669094841012529e-08, "loss": 0.4776, "step": 38433 }, { "epoch": 0.99, "grad_norm": 1.488426685333252, "learning_rate": 1.1629037076099637e-08, "loss": 0.4453, "step": 38434 }, { "epoch": 0.99, "grad_norm": 3.3369102478027344, "learning_rate": 1.158904814476447e-08, "loss": 0.5353, "step": 38435 }, { "epoch": 0.99, "grad_norm": 1.261675238609314, "learning_rate": 1.1549128047282366e-08, "loss": 0.5687, "step": 38436 }, { "epoch": 0.99, "grad_norm": 4.087310791015625, "learning_rate": 1.1509276783928658e-08, "loss": 0.455, "step": 38437 }, { "epoch": 0.99, "grad_norm": 1.3012319803237915, "learning_rate": 1.1469494354976462e-08, "loss": 0.4851, "step": 38438 }, { "epoch": 0.99, "grad_norm": 1.4366991519927979, "learning_rate": 1.1429780760702224e-08, "loss": 0.546, "step": 38439 }, { "epoch": 0.99, "grad_norm": 1.429714560508728, "learning_rate": 1.1390136001377949e-08, "loss": 0.6751, "step": 38440 }, { "epoch": 0.99, "grad_norm": 10.853346824645996, "learning_rate": 1.1350560077277862e-08, "loss": 0.6835, "step": 38441 }, { "epoch": 0.99, "grad_norm": 2.0275721549987793, "learning_rate": 1.1311052988673965e-08, "loss": 0.6342, "step": 38442 }, { "epoch": 0.99, "grad_norm": 3.906106472015381, "learning_rate": 1.1271614735838266e-08, "loss": 0.6023, "step": 38443 }, { "epoch": 0.99, "grad_norm": 1.3431684970855713, "learning_rate": 1.1232245319043878e-08, "loss": 0.5346, "step": 38444 }, { "epoch": 0.99, "grad_norm": 3.284735918045044, "learning_rate": 1.1192944738560585e-08, "loss": 0.5603, "step": 38445 }, { "epoch": 0.99, "grad_norm": 2.2019412517547607, "learning_rate": 1.1153712994660393e-08, "loss": 0.5134, "step": 38446 }, { "epoch": 0.99, "grad_norm": 1.6841429471969604, "learning_rate": 1.1114550087613086e-08, "loss": 0.5502, "step": 38447 }, { "epoch": 0.99, "grad_norm": 3.6139650344848633, "learning_rate": 1.1075456017688446e-08, "loss": 0.4241, "step": 38448 }, { "epoch": 0.99, "grad_norm": 2.201777935028076, "learning_rate": 1.1036430785157371e-08, "loss": 0.7554, "step": 38449 }, { "epoch": 0.99, "grad_norm": 8.742813110351562, "learning_rate": 1.0997474390286312e-08, "loss": 0.4838, "step": 38450 }, { "epoch": 0.99, "grad_norm": 1.7549833059310913, "learning_rate": 1.0958586833345053e-08, "loss": 0.4679, "step": 38451 }, { "epoch": 0.99, "grad_norm": 1.7256829738616943, "learning_rate": 1.0919768114601159e-08, "loss": 0.3859, "step": 38452 }, { "epoch": 0.99, "grad_norm": 1.367488980293274, "learning_rate": 1.0881018234323304e-08, "loss": 0.5154, "step": 38453 }, { "epoch": 0.99, "grad_norm": 1.0955586433410645, "learning_rate": 1.0842337192776831e-08, "loss": 0.5287, "step": 38454 }, { "epoch": 0.99, "grad_norm": 1.3087879419326782, "learning_rate": 1.0803724990230413e-08, "loss": 0.5188, "step": 38455 }, { "epoch": 0.99, "grad_norm": 1.4223939180374146, "learning_rate": 1.0765181626948284e-08, "loss": 0.4988, "step": 38456 }, { "epoch": 0.99, "grad_norm": 2.0951173305511475, "learning_rate": 1.0726707103195787e-08, "loss": 0.6521, "step": 38457 }, { "epoch": 0.99, "grad_norm": 1.1982301473617554, "learning_rate": 1.0688301419239377e-08, "loss": 0.5351, "step": 38458 }, { "epoch": 0.99, "grad_norm": 0.9930201172828674, "learning_rate": 1.0649964575344396e-08, "loss": 0.5285, "step": 38459 }, { "epoch": 0.99, "grad_norm": 1.402748703956604, "learning_rate": 1.0611696571772856e-08, "loss": 0.3861, "step": 38460 }, { "epoch": 0.99, "grad_norm": 1.4064269065856934, "learning_rate": 1.05734974087901e-08, "loss": 0.4466, "step": 38461 }, { "epoch": 0.99, "grad_norm": 1.5700806379318237, "learning_rate": 1.0535367086658143e-08, "loss": 0.5804, "step": 38462 }, { "epoch": 0.99, "grad_norm": 1.2061851024627686, "learning_rate": 1.0497305605641217e-08, "loss": 0.49, "step": 38463 }, { "epoch": 0.99, "grad_norm": 1.70026695728302, "learning_rate": 1.0459312966000223e-08, "loss": 0.6119, "step": 38464 }, { "epoch": 0.99, "grad_norm": 3.7865958213806152, "learning_rate": 1.0421389167998286e-08, "loss": 0.4654, "step": 38465 }, { "epoch": 0.99, "grad_norm": 1.7470784187316895, "learning_rate": 1.0383534211895197e-08, "loss": 0.4182, "step": 38466 }, { "epoch": 0.99, "grad_norm": 1.3061209917068481, "learning_rate": 1.0345748097952967e-08, "loss": 0.62, "step": 38467 }, { "epoch": 0.99, "grad_norm": 11.444849014282227, "learning_rate": 1.0308030826431392e-08, "loss": 0.5473, "step": 38468 }, { "epoch": 0.99, "grad_norm": 1.3619670867919922, "learning_rate": 1.0270382397591372e-08, "loss": 0.5901, "step": 38469 }, { "epoch": 0.99, "grad_norm": 1.52959406375885, "learning_rate": 1.0232802811691589e-08, "loss": 0.5472, "step": 38470 }, { "epoch": 0.99, "grad_norm": 1.844610571861267, "learning_rate": 1.0195292068991836e-08, "loss": 0.4462, "step": 38471 }, { "epoch": 0.99, "grad_norm": 1.4277684688568115, "learning_rate": 1.0157850169748573e-08, "loss": 0.4949, "step": 38472 }, { "epoch": 0.99, "grad_norm": 1.3748232126235962, "learning_rate": 1.0120477114222704e-08, "loss": 0.4755, "step": 38473 }, { "epoch": 0.99, "grad_norm": 1.8893018960952759, "learning_rate": 1.008317290266847e-08, "loss": 0.6419, "step": 38474 }, { "epoch": 0.99, "grad_norm": 15.852627754211426, "learning_rate": 1.0045937535345662e-08, "loss": 0.3351, "step": 38475 }, { "epoch": 0.99, "grad_norm": 1.3779404163360596, "learning_rate": 1.0008771012510743e-08, "loss": 0.4868, "step": 38476 }, { "epoch": 0.99, "grad_norm": 1.9483870267868042, "learning_rate": 9.971673334417953e-09, "loss": 0.5694, "step": 38477 }, { "epoch": 0.99, "grad_norm": 1.3775362968444824, "learning_rate": 9.934644501323754e-09, "loss": 0.4842, "step": 38478 }, { "epoch": 0.99, "grad_norm": 11.534632682800293, "learning_rate": 9.897684513484606e-09, "loss": 0.4917, "step": 38479 }, { "epoch": 0.99, "grad_norm": 2.4314420223236084, "learning_rate": 9.860793371153643e-09, "loss": 0.5909, "step": 38480 }, { "epoch": 0.99, "grad_norm": 1.7688632011413574, "learning_rate": 9.823971074586214e-09, "loss": 0.6585, "step": 38481 }, { "epoch": 0.99, "grad_norm": 6.993359088897705, "learning_rate": 9.78721762403434e-09, "loss": 0.6294, "step": 38482 }, { "epoch": 0.99, "grad_norm": 1.6581764221191406, "learning_rate": 9.750533019753372e-09, "loss": 0.5244, "step": 38483 }, { "epoch": 0.99, "grad_norm": 1.125815510749817, "learning_rate": 9.713917261995331e-09, "loss": 0.4645, "step": 38484 }, { "epoch": 0.99, "grad_norm": 1.4422131776809692, "learning_rate": 9.677370351012238e-09, "loss": 0.5546, "step": 38485 }, { "epoch": 0.99, "grad_norm": 8.31079387664795, "learning_rate": 9.640892287056115e-09, "loss": 0.6993, "step": 38486 }, { "epoch": 0.99, "grad_norm": 1.309685468673706, "learning_rate": 9.604483070378978e-09, "loss": 0.399, "step": 38487 }, { "epoch": 0.99, "grad_norm": 1.5717582702636719, "learning_rate": 9.568142701230632e-09, "loss": 0.547, "step": 38488 }, { "epoch": 0.99, "grad_norm": 2.2695116996765137, "learning_rate": 9.531871179861984e-09, "loss": 0.6561, "step": 38489 }, { "epoch": 0.99, "grad_norm": 1.7440828084945679, "learning_rate": 9.495668506522837e-09, "loss": 0.5942, "step": 38490 }, { "epoch": 0.99, "grad_norm": 1.6629406213760376, "learning_rate": 9.459534681462989e-09, "loss": 0.5481, "step": 38491 }, { "epoch": 0.99, "grad_norm": 7.622034072875977, "learning_rate": 9.423469704931132e-09, "loss": 0.5816, "step": 38492 }, { "epoch": 0.99, "grad_norm": 1.2289379835128784, "learning_rate": 9.387473577177065e-09, "loss": 0.5416, "step": 38493 }, { "epoch": 0.99, "grad_norm": 1.3706480264663696, "learning_rate": 9.351546298447256e-09, "loss": 0.6479, "step": 38494 }, { "epoch": 0.99, "grad_norm": 1.2648813724517822, "learning_rate": 9.315687868989287e-09, "loss": 0.5162, "step": 38495 }, { "epoch": 0.99, "grad_norm": 1.2334038019180298, "learning_rate": 9.279898289051848e-09, "loss": 0.5566, "step": 38496 }, { "epoch": 0.99, "grad_norm": 1.6523957252502441, "learning_rate": 9.244177558880296e-09, "loss": 0.4743, "step": 38497 }, { "epoch": 0.99, "grad_norm": 4.180507659912109, "learning_rate": 9.208525678721102e-09, "loss": 0.6065, "step": 38498 }, { "epoch": 0.99, "grad_norm": 1.22809636592865, "learning_rate": 9.172942648818518e-09, "loss": 0.3463, "step": 38499 }, { "epoch": 0.99, "grad_norm": 1.4197640419006348, "learning_rate": 9.137428469421227e-09, "loss": 0.4552, "step": 38500 }, { "epoch": 0.99, "grad_norm": 1.4493684768676758, "learning_rate": 9.101983140770155e-09, "loss": 0.5515, "step": 38501 }, { "epoch": 0.99, "grad_norm": 1.3484197854995728, "learning_rate": 9.066606663112876e-09, "loss": 0.4032, "step": 38502 }, { "epoch": 0.99, "grad_norm": 1.2744982242584229, "learning_rate": 9.03129903669031e-09, "loss": 0.4585, "step": 38503 }, { "epoch": 0.99, "grad_norm": 2.0197291374206543, "learning_rate": 8.99606026174782e-09, "loss": 0.416, "step": 38504 }, { "epoch": 0.99, "grad_norm": 1.128170132637024, "learning_rate": 8.960890338527427e-09, "loss": 0.479, "step": 38505 }, { "epoch": 0.99, "grad_norm": 1.221088171005249, "learning_rate": 8.925789267271167e-09, "loss": 0.5913, "step": 38506 }, { "epoch": 0.99, "grad_norm": 2.896238327026367, "learning_rate": 8.890757048222177e-09, "loss": 0.4982, "step": 38507 }, { "epoch": 0.99, "grad_norm": 1.644038200378418, "learning_rate": 8.855793681620262e-09, "loss": 0.7679, "step": 38508 }, { "epoch": 0.99, "grad_norm": 1.1613816022872925, "learning_rate": 8.820899167707452e-09, "loss": 0.437, "step": 38509 }, { "epoch": 0.99, "grad_norm": 15.966156959533691, "learning_rate": 8.786073506724668e-09, "loss": 0.5352, "step": 38510 }, { "epoch": 0.99, "grad_norm": 4.97352933883667, "learning_rate": 8.751316698910607e-09, "loss": 0.5797, "step": 38511 }, { "epoch": 0.99, "grad_norm": 2.0087482929229736, "learning_rate": 8.716628744505073e-09, "loss": 0.6949, "step": 38512 }, { "epoch": 0.99, "grad_norm": 1.1293052434921265, "learning_rate": 8.68200964374677e-09, "loss": 0.472, "step": 38513 }, { "epoch": 0.99, "grad_norm": 1.6800134181976318, "learning_rate": 8.647459396876611e-09, "loss": 0.5964, "step": 38514 }, { "epoch": 0.99, "grad_norm": 1.7002379894256592, "learning_rate": 8.612978004129968e-09, "loss": 0.5158, "step": 38515 }, { "epoch": 0.99, "grad_norm": 7.031637668609619, "learning_rate": 8.578565465745537e-09, "loss": 0.5502, "step": 38516 }, { "epoch": 0.99, "grad_norm": 1.8423869609832764, "learning_rate": 8.544221781959795e-09, "loss": 0.5508, "step": 38517 }, { "epoch": 0.99, "grad_norm": 8.687843322753906, "learning_rate": 8.509946953011439e-09, "loss": 0.5617, "step": 38518 }, { "epoch": 0.99, "grad_norm": 1.7135696411132812, "learning_rate": 8.475740979133617e-09, "loss": 0.5387, "step": 38519 }, { "epoch": 0.99, "grad_norm": 1.1349775791168213, "learning_rate": 8.441603860565028e-09, "loss": 0.4131, "step": 38520 }, { "epoch": 0.99, "grad_norm": 2.513622760772705, "learning_rate": 8.407535597538819e-09, "loss": 0.5183, "step": 38521 }, { "epoch": 0.99, "grad_norm": 1.5460177659988403, "learning_rate": 8.373536190291464e-09, "loss": 0.4884, "step": 38522 }, { "epoch": 0.99, "grad_norm": 1.4114347696304321, "learning_rate": 8.339605639055004e-09, "loss": 0.4968, "step": 38523 }, { "epoch": 0.99, "grad_norm": 3.9617435932159424, "learning_rate": 8.305743944065914e-09, "loss": 0.4496, "step": 38524 }, { "epoch": 0.99, "grad_norm": 2.1951379776000977, "learning_rate": 8.27195110555512e-09, "loss": 0.4696, "step": 38525 }, { "epoch": 0.99, "grad_norm": 1.4814081192016602, "learning_rate": 8.23822712375688e-09, "loss": 0.5815, "step": 38526 }, { "epoch": 0.99, "grad_norm": 1.1895796060562134, "learning_rate": 8.204571998904342e-09, "loss": 0.4778, "step": 38527 }, { "epoch": 0.99, "grad_norm": 1.190020203590393, "learning_rate": 8.170985731227322e-09, "loss": 0.5271, "step": 38528 }, { "epoch": 0.99, "grad_norm": 1.519116997718811, "learning_rate": 8.137468320958964e-09, "loss": 0.4379, "step": 38529 }, { "epoch": 0.99, "grad_norm": 1.1915104389190674, "learning_rate": 8.104019768329085e-09, "loss": 0.4454, "step": 38530 }, { "epoch": 0.99, "grad_norm": 1.4725532531738281, "learning_rate": 8.070640073569724e-09, "loss": 0.5383, "step": 38531 }, { "epoch": 0.99, "grad_norm": 1.7617048025131226, "learning_rate": 8.037329236909585e-09, "loss": 0.5814, "step": 38532 }, { "epoch": 0.99, "grad_norm": 1.1591888666152954, "learning_rate": 8.004087258578486e-09, "loss": 0.3924, "step": 38533 }, { "epoch": 0.99, "grad_norm": 1.5396027565002441, "learning_rate": 7.97091413880513e-09, "loss": 0.4102, "step": 38534 }, { "epoch": 0.99, "grad_norm": 1.2871358394622803, "learning_rate": 7.937809877819336e-09, "loss": 0.402, "step": 38535 }, { "epoch": 0.99, "grad_norm": 4.443397521972656, "learning_rate": 7.904774475848698e-09, "loss": 0.5025, "step": 38536 }, { "epoch": 0.99, "grad_norm": 1.6223562955856323, "learning_rate": 7.871807933119702e-09, "loss": 0.522, "step": 38537 }, { "epoch": 0.99, "grad_norm": 1.0722242593765259, "learning_rate": 7.838910249861054e-09, "loss": 0.55, "step": 38538 }, { "epoch": 0.99, "grad_norm": 5.063685894012451, "learning_rate": 7.80608142629924e-09, "loss": 0.6351, "step": 38539 }, { "epoch": 0.99, "grad_norm": 1.3892804384231567, "learning_rate": 7.773321462659634e-09, "loss": 0.312, "step": 38540 }, { "epoch": 0.99, "grad_norm": 1.4457955360412598, "learning_rate": 7.740630359168721e-09, "loss": 0.5302, "step": 38541 }, { "epoch": 0.99, "grad_norm": 14.999938011169434, "learning_rate": 7.70800811605188e-09, "loss": 0.4383, "step": 38542 }, { "epoch": 0.99, "grad_norm": 1.1490812301635742, "learning_rate": 7.675454733534481e-09, "loss": 0.4863, "step": 38543 }, { "epoch": 0.99, "grad_norm": 1.1786632537841797, "learning_rate": 7.642970211838574e-09, "loss": 0.4547, "step": 38544 }, { "epoch": 0.99, "grad_norm": 8.387768745422363, "learning_rate": 7.610554551190641e-09, "loss": 0.6196, "step": 38545 }, { "epoch": 0.99, "grad_norm": 1.2583889961242676, "learning_rate": 7.578207751811618e-09, "loss": 0.4306, "step": 38546 }, { "epoch": 0.99, "grad_norm": 1.2772029638290405, "learning_rate": 7.545929813926877e-09, "loss": 0.4736, "step": 38547 }, { "epoch": 0.99, "grad_norm": 2.3105316162109375, "learning_rate": 7.513720737757358e-09, "loss": 0.6789, "step": 38548 }, { "epoch": 0.99, "grad_norm": 1.711787223815918, "learning_rate": 7.4815805235251e-09, "loss": 0.5386, "step": 38549 }, { "epoch": 0.99, "grad_norm": 1.2632381916046143, "learning_rate": 7.449509171452152e-09, "loss": 0.5203, "step": 38550 }, { "epoch": 0.99, "grad_norm": 9.900394439697266, "learning_rate": 7.4175066817594455e-09, "loss": 0.6317, "step": 38551 }, { "epoch": 0.99, "grad_norm": 2.8830149173736572, "learning_rate": 7.385573054666806e-09, "loss": 0.6104, "step": 38552 }, { "epoch": 0.99, "grad_norm": 1.3372989892959595, "learning_rate": 7.353708290395167e-09, "loss": 0.4973, "step": 38553 }, { "epoch": 0.99, "grad_norm": 1.5087453126907349, "learning_rate": 7.321912389164354e-09, "loss": 0.3859, "step": 38554 }, { "epoch": 0.99, "grad_norm": 1.347630262374878, "learning_rate": 7.2901853511919695e-09, "loss": 0.4372, "step": 38555 }, { "epoch": 0.99, "grad_norm": 2.173018455505371, "learning_rate": 7.258527176697838e-09, "loss": 0.631, "step": 38556 }, { "epoch": 0.99, "grad_norm": 3.4955079555511475, "learning_rate": 7.226937865899564e-09, "loss": 0.4636, "step": 38557 }, { "epoch": 0.99, "grad_norm": 1.1874538660049438, "learning_rate": 7.19541741901586e-09, "loss": 0.5259, "step": 38558 }, { "epoch": 0.99, "grad_norm": 1.2096881866455078, "learning_rate": 7.16396583626322e-09, "loss": 0.5117, "step": 38559 }, { "epoch": 0.99, "grad_norm": 2.084230422973633, "learning_rate": 7.132583117858138e-09, "loss": 0.5188, "step": 38560 }, { "epoch": 0.99, "grad_norm": 1.3127360343933105, "learning_rate": 7.101269264017107e-09, "loss": 0.5467, "step": 38561 }, { "epoch": 0.99, "grad_norm": 1.1554762125015259, "learning_rate": 7.070024274955511e-09, "loss": 0.4005, "step": 38562 }, { "epoch": 0.99, "grad_norm": 1.2355115413665771, "learning_rate": 7.0388481508898434e-09, "loss": 0.3187, "step": 38563 }, { "epoch": 0.99, "grad_norm": 1.400251865386963, "learning_rate": 7.0077408920343755e-09, "loss": 0.3961, "step": 38564 }, { "epoch": 0.99, "grad_norm": 6.754087924957275, "learning_rate": 6.976702498603383e-09, "loss": 0.5919, "step": 38565 }, { "epoch": 0.99, "grad_norm": 1.1747326850891113, "learning_rate": 6.945732970810026e-09, "loss": 0.5092, "step": 38566 }, { "epoch": 0.99, "grad_norm": 2.968327760696411, "learning_rate": 6.914832308868579e-09, "loss": 0.5811, "step": 38567 }, { "epoch": 0.99, "grad_norm": 2.1934914588928223, "learning_rate": 6.884000512992206e-09, "loss": 0.5322, "step": 38568 }, { "epoch": 0.99, "grad_norm": 2.0372095108032227, "learning_rate": 6.853237583392958e-09, "loss": 0.5025, "step": 38569 }, { "epoch": 0.99, "grad_norm": 1.0022207498550415, "learning_rate": 6.822543520282887e-09, "loss": 0.469, "step": 38570 }, { "epoch": 0.99, "grad_norm": 2.3047733306884766, "learning_rate": 6.791918323872937e-09, "loss": 0.5085, "step": 38571 }, { "epoch": 0.99, "grad_norm": 1.518713116645813, "learning_rate": 6.76136199437516e-09, "loss": 0.6212, "step": 38572 }, { "epoch": 0.99, "grad_norm": 1.3238458633422852, "learning_rate": 6.7308745319993874e-09, "loss": 0.4571, "step": 38573 }, { "epoch": 0.99, "grad_norm": 1.5675441026687622, "learning_rate": 6.700455936956562e-09, "loss": 0.4573, "step": 38574 }, { "epoch": 0.99, "grad_norm": 12.040407180786133, "learning_rate": 6.670106209455407e-09, "loss": 0.5891, "step": 38575 }, { "epoch": 0.99, "grad_norm": 2.2200541496276855, "learning_rate": 6.639825349705753e-09, "loss": 0.5929, "step": 38576 }, { "epoch": 0.99, "grad_norm": 1.6879628896713257, "learning_rate": 6.609613357916322e-09, "loss": 0.6865, "step": 38577 }, { "epoch": 0.99, "grad_norm": 1.5256983041763306, "learning_rate": 6.579470234294727e-09, "loss": 0.3009, "step": 38578 }, { "epoch": 0.99, "grad_norm": 1.5558044910430908, "learning_rate": 6.549395979048578e-09, "loss": 0.6597, "step": 38579 }, { "epoch": 0.99, "grad_norm": 5.155849933624268, "learning_rate": 6.519390592385488e-09, "loss": 0.6357, "step": 38580 }, { "epoch": 0.99, "grad_norm": 1.777940034866333, "learning_rate": 6.489454074511958e-09, "loss": 0.5274, "step": 38581 }, { "epoch": 0.99, "grad_norm": 1.200621247291565, "learning_rate": 6.4595864256356e-09, "loss": 0.458, "step": 38582 }, { "epoch": 0.99, "grad_norm": 1.4571501016616821, "learning_rate": 6.429787645960695e-09, "loss": 0.4316, "step": 38583 }, { "epoch": 0.99, "grad_norm": 1.4759163856506348, "learning_rate": 6.400057735692633e-09, "loss": 0.556, "step": 38584 }, { "epoch": 0.99, "grad_norm": 8.633981704711914, "learning_rate": 6.370396695037917e-09, "loss": 0.5002, "step": 38585 }, { "epoch": 0.99, "grad_norm": 1.4933745861053467, "learning_rate": 6.340804524198607e-09, "loss": 0.4231, "step": 38586 }, { "epoch": 0.99, "grad_norm": 5.02151346206665, "learning_rate": 6.311281223380094e-09, "loss": 0.5318, "step": 38587 }, { "epoch": 0.99, "grad_norm": 1.763593316078186, "learning_rate": 6.2818267927855505e-09, "loss": 0.4633, "step": 38588 }, { "epoch": 0.99, "grad_norm": 1.4256248474121094, "learning_rate": 6.252441232619255e-09, "loss": 0.4687, "step": 38589 }, { "epoch": 0.99, "grad_norm": 1.5076919794082642, "learning_rate": 6.223124543081049e-09, "loss": 0.4241, "step": 38590 }, { "epoch": 0.99, "grad_norm": 1.2543991804122925, "learning_rate": 6.193876724375214e-09, "loss": 0.5937, "step": 38591 }, { "epoch": 0.99, "grad_norm": 1.7925902605056763, "learning_rate": 6.164697776701589e-09, "loss": 0.554, "step": 38592 }, { "epoch": 0.99, "grad_norm": 1.7626935243606567, "learning_rate": 6.135587700262236e-09, "loss": 0.4847, "step": 38593 }, { "epoch": 0.99, "grad_norm": 3.079789161682129, "learning_rate": 6.1065464952581035e-09, "loss": 0.472, "step": 38594 }, { "epoch": 0.99, "grad_norm": 1.424875259399414, "learning_rate": 6.077574161889033e-09, "loss": 0.5462, "step": 38595 }, { "epoch": 0.99, "grad_norm": 3.8186938762664795, "learning_rate": 6.048670700353754e-09, "loss": 0.6036, "step": 38596 }, { "epoch": 0.99, "grad_norm": 1.297254204750061, "learning_rate": 6.019836110853217e-09, "loss": 0.5186, "step": 38597 }, { "epoch": 0.99, "grad_norm": 1.6319652795791626, "learning_rate": 5.991070393583931e-09, "loss": 0.6416, "step": 38598 }, { "epoch": 0.99, "grad_norm": 1.4382140636444092, "learning_rate": 5.962373548746847e-09, "loss": 0.4867, "step": 38599 }, { "epoch": 0.99, "grad_norm": 1.0371767282485962, "learning_rate": 5.933745576537364e-09, "loss": 0.5601, "step": 38600 }, { "epoch": 0.99, "grad_norm": 3.5412003993988037, "learning_rate": 5.905186477153102e-09, "loss": 0.5409, "step": 38601 }, { "epoch": 0.99, "grad_norm": 1.7388825416564941, "learning_rate": 5.87669625079168e-09, "loss": 0.4616, "step": 38602 }, { "epoch": 0.99, "grad_norm": 3.3560285568237305, "learning_rate": 5.848274897649608e-09, "loss": 0.5971, "step": 38603 }, { "epoch": 0.99, "grad_norm": 1.2292038202285767, "learning_rate": 5.819922417922286e-09, "loss": 0.4633, "step": 38604 }, { "epoch": 0.99, "grad_norm": 1.2957144975662231, "learning_rate": 5.791638811805111e-09, "loss": 0.4253, "step": 38605 }, { "epoch": 0.99, "grad_norm": 3.180716037750244, "learning_rate": 5.763424079492375e-09, "loss": 0.5105, "step": 38606 }, { "epoch": 0.99, "grad_norm": 1.9535919427871704, "learning_rate": 5.735278221179475e-09, "loss": 0.4986, "step": 38607 }, { "epoch": 0.99, "grad_norm": 1.4716836214065552, "learning_rate": 5.70720123705959e-09, "loss": 0.4685, "step": 38608 }, { "epoch": 0.99, "grad_norm": 1.574012279510498, "learning_rate": 5.6791931273281196e-09, "loss": 0.4695, "step": 38609 }, { "epoch": 0.99, "grad_norm": 2.1620304584503174, "learning_rate": 5.651253892174913e-09, "loss": 0.5225, "step": 38610 }, { "epoch": 0.99, "grad_norm": 1.3654052019119263, "learning_rate": 5.623383531795368e-09, "loss": 0.531, "step": 38611 }, { "epoch": 0.99, "grad_norm": 1.6633731126785278, "learning_rate": 5.595582046379333e-09, "loss": 0.706, "step": 38612 }, { "epoch": 0.99, "grad_norm": 2.010770797729492, "learning_rate": 5.567849436119988e-09, "loss": 0.5839, "step": 38613 }, { "epoch": 0.99, "grad_norm": 2.8189358711242676, "learning_rate": 5.540185701208289e-09, "loss": 0.472, "step": 38614 }, { "epoch": 0.99, "grad_norm": 1.1926857233047485, "learning_rate": 5.512590841834087e-09, "loss": 0.4573, "step": 38615 }, { "epoch": 0.99, "grad_norm": 1.4654555320739746, "learning_rate": 5.485064858188338e-09, "loss": 0.5289, "step": 38616 }, { "epoch": 0.99, "grad_norm": 1.8264095783233643, "learning_rate": 5.4576077504597816e-09, "loss": 0.4391, "step": 38617 }, { "epoch": 0.99, "grad_norm": 1.4513323307037354, "learning_rate": 5.430219518838264e-09, "loss": 0.5025, "step": 38618 }, { "epoch": 0.99, "grad_norm": 49.65858459472656, "learning_rate": 5.402900163512526e-09, "loss": 0.4974, "step": 38619 }, { "epoch": 0.99, "grad_norm": 4.838986396789551, "learning_rate": 5.375649684671302e-09, "loss": 0.6396, "step": 38620 }, { "epoch": 0.99, "grad_norm": 1.1790661811828613, "learning_rate": 5.348468082501113e-09, "loss": 0.5108, "step": 38621 }, { "epoch": 0.99, "grad_norm": 1.551841139793396, "learning_rate": 5.3213553571906936e-09, "loss": 0.521, "step": 38622 }, { "epoch": 0.99, "grad_norm": 2.235013246536255, "learning_rate": 5.2943115089254535e-09, "loss": 0.45, "step": 38623 }, { "epoch": 0.99, "grad_norm": 2.0269055366516113, "learning_rate": 5.26733653789302e-09, "loss": 0.674, "step": 38624 }, { "epoch": 0.99, "grad_norm": 1.1469814777374268, "learning_rate": 5.240430444278799e-09, "loss": 0.4161, "step": 38625 }, { "epoch": 0.99, "grad_norm": 1.672794222831726, "learning_rate": 5.213593228268199e-09, "loss": 0.4618, "step": 38626 }, { "epoch": 0.99, "grad_norm": 1.2114152908325195, "learning_rate": 5.186824890046627e-09, "loss": 0.5078, "step": 38627 }, { "epoch": 0.99, "grad_norm": 1.469128131866455, "learning_rate": 5.160125429797269e-09, "loss": 0.3589, "step": 38628 }, { "epoch": 0.99, "grad_norm": 1.8163485527038574, "learning_rate": 5.1334948477055335e-09, "loss": 0.3802, "step": 38629 }, { "epoch": 0.99, "grad_norm": 5.090967178344727, "learning_rate": 5.106933143953497e-09, "loss": 0.6293, "step": 38630 }, { "epoch": 0.99, "grad_norm": 1.7264097929000854, "learning_rate": 5.080440318726565e-09, "loss": 0.4668, "step": 38631 }, { "epoch": 0.99, "grad_norm": 1.4577548503875732, "learning_rate": 5.054016372204595e-09, "loss": 0.4456, "step": 38632 }, { "epoch": 0.99, "grad_norm": 1.2190030813217163, "learning_rate": 5.027661304570774e-09, "loss": 0.5524, "step": 38633 }, { "epoch": 0.99, "grad_norm": 2.0881052017211914, "learning_rate": 5.001375116007179e-09, "loss": 0.6148, "step": 38634 }, { "epoch": 0.99, "grad_norm": 1.5170297622680664, "learning_rate": 4.975157806695885e-09, "loss": 0.3754, "step": 38635 }, { "epoch": 0.99, "grad_norm": 3.2758278846740723, "learning_rate": 4.9490093768145285e-09, "loss": 0.5246, "step": 38636 }, { "epoch": 0.99, "grad_norm": 3.085625171661377, "learning_rate": 4.922929826546297e-09, "loss": 0.4615, "step": 38637 }, { "epoch": 0.99, "grad_norm": 1.888168454170227, "learning_rate": 4.896919156068825e-09, "loss": 0.6084, "step": 38638 }, { "epoch": 0.99, "grad_norm": 1.904466152191162, "learning_rate": 4.8709773655619685e-09, "loss": 0.4807, "step": 38639 }, { "epoch": 0.99, "grad_norm": 1.972151517868042, "learning_rate": 4.845104455205585e-09, "loss": 0.6805, "step": 38640 }, { "epoch": 0.99, "grad_norm": 1.2551312446594238, "learning_rate": 4.819300425177309e-09, "loss": 0.5037, "step": 38641 }, { "epoch": 0.99, "grad_norm": 1.2480700016021729, "learning_rate": 4.793565275654777e-09, "loss": 0.5481, "step": 38642 }, { "epoch": 0.99, "grad_norm": 1.3363595008850098, "learning_rate": 4.767899006814513e-09, "loss": 0.5364, "step": 38643 }, { "epoch": 0.99, "grad_norm": 1.9721261262893677, "learning_rate": 4.7423016188352654e-09, "loss": 0.4914, "step": 38644 }, { "epoch": 0.99, "grad_norm": 1.2481718063354492, "learning_rate": 4.7167731118924474e-09, "loss": 0.4455, "step": 38645 }, { "epoch": 0.99, "grad_norm": 1.3116958141326904, "learning_rate": 4.6913134861614754e-09, "loss": 0.5405, "step": 38646 }, { "epoch": 0.99, "grad_norm": 2.000866651535034, "learning_rate": 4.665922741818873e-09, "loss": 0.5646, "step": 38647 }, { "epoch": 0.99, "grad_norm": 1.260528564453125, "learning_rate": 4.640600879037837e-09, "loss": 0.548, "step": 38648 }, { "epoch": 0.99, "grad_norm": 1.248629093170166, "learning_rate": 4.615347897994893e-09, "loss": 0.5425, "step": 38649 }, { "epoch": 0.99, "grad_norm": 1.5157716274261475, "learning_rate": 4.590163798863234e-09, "loss": 0.4345, "step": 38650 }, { "epoch": 0.99, "grad_norm": 1.3968194723129272, "learning_rate": 4.565048581817166e-09, "loss": 0.4008, "step": 38651 }, { "epoch": 0.99, "grad_norm": 2.6947762966156006, "learning_rate": 4.540002247027664e-09, "loss": 0.425, "step": 38652 }, { "epoch": 0.99, "grad_norm": 7.050130844116211, "learning_rate": 4.515024794669032e-09, "loss": 0.5939, "step": 38653 }, { "epoch": 0.99, "grad_norm": 4.107833385467529, "learning_rate": 4.490116224913355e-09, "loss": 0.6919, "step": 38654 }, { "epoch": 0.99, "grad_norm": 3.210871458053589, "learning_rate": 4.4652765379327164e-09, "loss": 0.6568, "step": 38655 }, { "epoch": 0.99, "grad_norm": 1.5894896984100342, "learning_rate": 4.4405057338969825e-09, "loss": 0.4485, "step": 38656 }, { "epoch": 0.99, "grad_norm": 1.569242000579834, "learning_rate": 4.415803812977126e-09, "loss": 0.5745, "step": 38657 }, { "epoch": 0.99, "grad_norm": 3.265814781188965, "learning_rate": 4.3911707753441225e-09, "loss": 0.5094, "step": 38658 }, { "epoch": 0.99, "grad_norm": 1.561793565750122, "learning_rate": 4.3666066211667245e-09, "loss": 0.5545, "step": 38659 }, { "epoch": 0.99, "grad_norm": 2.56615948677063, "learning_rate": 4.342111350614797e-09, "loss": 0.6656, "step": 38660 }, { "epoch": 0.99, "grad_norm": 1.1897647380828857, "learning_rate": 4.317684963858204e-09, "loss": 0.515, "step": 38661 }, { "epoch": 0.99, "grad_norm": 1.3541767597198486, "learning_rate": 4.293327461063479e-09, "loss": 0.4416, "step": 38662 }, { "epoch": 0.99, "grad_norm": 12.706974983215332, "learning_rate": 4.269038842399376e-09, "loss": 0.4711, "step": 38663 }, { "epoch": 0.99, "grad_norm": 1.1019370555877686, "learning_rate": 4.244819108032428e-09, "loss": 0.4637, "step": 38664 }, { "epoch": 0.99, "grad_norm": 5.535733222961426, "learning_rate": 4.2206682581313884e-09, "loss": 0.5686, "step": 38665 }, { "epoch": 0.99, "grad_norm": 1.3678492307662964, "learning_rate": 4.196586292860572e-09, "loss": 0.556, "step": 38666 }, { "epoch": 0.99, "grad_norm": 1.8567310571670532, "learning_rate": 4.17257321238762e-09, "loss": 0.5751, "step": 38667 }, { "epoch": 0.99, "grad_norm": 7.036158561706543, "learning_rate": 4.148629016876848e-09, "loss": 0.5632, "step": 38668 }, { "epoch": 0.99, "grad_norm": 1.1000359058380127, "learning_rate": 4.124753706493679e-09, "loss": 0.5255, "step": 38669 }, { "epoch": 0.99, "grad_norm": 1.7949031591415405, "learning_rate": 4.1009472814035335e-09, "loss": 0.5402, "step": 38670 }, { "epoch": 0.99, "grad_norm": 6.424732208251953, "learning_rate": 4.077209741768506e-09, "loss": 0.5895, "step": 38671 }, { "epoch": 0.99, "grad_norm": 1.0482293367385864, "learning_rate": 4.053541087754021e-09, "loss": 0.5292, "step": 38672 }, { "epoch": 0.99, "grad_norm": 18.212785720825195, "learning_rate": 4.029941319522168e-09, "loss": 0.7719, "step": 38673 }, { "epoch": 0.99, "grad_norm": 3.2592248916625977, "learning_rate": 4.006410437236152e-09, "loss": 0.4533, "step": 38674 }, { "epoch": 0.99, "grad_norm": 2.234595537185669, "learning_rate": 3.982948441058065e-09, "loss": 0.643, "step": 38675 }, { "epoch": 0.99, "grad_norm": 2.565110206604004, "learning_rate": 3.95955533114889e-09, "loss": 0.5017, "step": 38676 }, { "epoch": 0.99, "grad_norm": 1.6958448886871338, "learning_rate": 3.936231107669608e-09, "loss": 0.5412, "step": 38677 }, { "epoch": 0.99, "grad_norm": 1.592523455619812, "learning_rate": 3.912975770783423e-09, "loss": 0.4661, "step": 38678 }, { "epoch": 0.99, "grad_norm": 1.6023200750350952, "learning_rate": 3.889789320647985e-09, "loss": 0.6525, "step": 38679 }, { "epoch": 0.99, "grad_norm": 11.095710754394531, "learning_rate": 3.866671757423168e-09, "loss": 0.66, "step": 38680 }, { "epoch": 0.99, "grad_norm": 1.2349109649658203, "learning_rate": 3.8436230812699535e-09, "loss": 0.5456, "step": 38681 }, { "epoch": 0.99, "grad_norm": 2.268059492111206, "learning_rate": 3.820643292345993e-09, "loss": 0.4891, "step": 38682 }, { "epoch": 0.99, "grad_norm": 1.248643159866333, "learning_rate": 3.797732390810049e-09, "loss": 0.4745, "step": 38683 }, { "epoch": 0.99, "grad_norm": 6.060177326202393, "learning_rate": 3.7748903768197735e-09, "loss": 0.6278, "step": 38684 }, { "epoch": 0.99, "grad_norm": 1.188214898109436, "learning_rate": 3.7521172505328164e-09, "loss": 0.4301, "step": 38685 }, { "epoch": 0.99, "grad_norm": 2.6477675437927246, "learning_rate": 3.72941301210572e-09, "loss": 0.5632, "step": 38686 }, { "epoch": 0.99, "grad_norm": 4.972602367401123, "learning_rate": 3.706777661696137e-09, "loss": 0.5443, "step": 38687 }, { "epoch": 0.99, "grad_norm": 1.179075002670288, "learning_rate": 3.6842111994583874e-09, "loss": 0.4531, "step": 38688 }, { "epoch": 0.99, "grad_norm": 1.7445366382598877, "learning_rate": 3.6617136255490127e-09, "loss": 0.6177, "step": 38689 }, { "epoch": 0.99, "grad_norm": 9.054930686950684, "learning_rate": 3.639284940122334e-09, "loss": 0.6171, "step": 38690 }, { "epoch": 0.99, "grad_norm": 1.125007152557373, "learning_rate": 3.616925143333783e-09, "loss": 0.3924, "step": 38691 }, { "epoch": 0.99, "grad_norm": 15.40774154663086, "learning_rate": 3.5946342353376795e-09, "loss": 0.5259, "step": 38692 }, { "epoch": 0.99, "grad_norm": 1.2260315418243408, "learning_rate": 3.5724122162861254e-09, "loss": 0.5029, "step": 38693 }, { "epoch": 0.99, "grad_norm": 1.2491528987884521, "learning_rate": 3.550259086333441e-09, "loss": 0.5436, "step": 38694 }, { "epoch": 0.99, "grad_norm": 1.2804548740386963, "learning_rate": 3.5281748456317267e-09, "loss": 0.5851, "step": 38695 }, { "epoch": 0.99, "grad_norm": 1.6796256303787231, "learning_rate": 3.5061594943330833e-09, "loss": 0.5827, "step": 38696 }, { "epoch": 0.99, "grad_norm": 2.1274728775024414, "learning_rate": 3.4842130325907218e-09, "loss": 0.6067, "step": 38697 }, { "epoch": 0.99, "grad_norm": 1.4864985942840576, "learning_rate": 3.4623354605534123e-09, "loss": 0.4611, "step": 38698 }, { "epoch": 0.99, "grad_norm": 1.3953418731689453, "learning_rate": 3.4405267783743647e-09, "loss": 0.4999, "step": 38699 }, { "epoch": 0.99, "grad_norm": 1.3194864988327026, "learning_rate": 3.4187869862023493e-09, "loss": 0.5396, "step": 38700 }, { "epoch": 0.99, "grad_norm": 11.421183586120605, "learning_rate": 3.3971160841872464e-09, "loss": 0.6184, "step": 38701 }, { "epoch": 0.99, "grad_norm": 1.3860398530960083, "learning_rate": 3.375514072478936e-09, "loss": 0.4907, "step": 38702 }, { "epoch": 0.99, "grad_norm": 1.1497730016708374, "learning_rate": 3.3539809512261876e-09, "loss": 0.5127, "step": 38703 }, { "epoch": 0.99, "grad_norm": 1.1884435415267944, "learning_rate": 3.332516720577772e-09, "loss": 0.5616, "step": 38704 }, { "epoch": 0.99, "grad_norm": 1.173688530921936, "learning_rate": 3.3111213806802377e-09, "loss": 0.3898, "step": 38705 }, { "epoch": 0.99, "grad_norm": 6.095064163208008, "learning_rate": 3.289794931682355e-09, "loss": 0.5819, "step": 38706 }, { "epoch": 0.99, "grad_norm": 16.254430770874023, "learning_rate": 3.2685373737306735e-09, "loss": 0.5241, "step": 38707 }, { "epoch": 0.99, "grad_norm": 1.7225115299224854, "learning_rate": 3.2473487069717424e-09, "loss": 0.5483, "step": 38708 }, { "epoch": 0.99, "grad_norm": 1.1861674785614014, "learning_rate": 3.2262289315521113e-09, "loss": 0.5132, "step": 38709 }, { "epoch": 0.99, "grad_norm": 1.3805404901504517, "learning_rate": 3.2051780476149985e-09, "loss": 0.4364, "step": 38710 }, { "epoch": 0.99, "grad_norm": 5.585461139678955, "learning_rate": 3.1841960553091746e-09, "loss": 0.4681, "step": 38711 }, { "epoch": 0.99, "grad_norm": 1.1342443227767944, "learning_rate": 3.163282954775637e-09, "loss": 0.4414, "step": 38712 }, { "epoch": 0.99, "grad_norm": 1.1387481689453125, "learning_rate": 3.1424387461609363e-09, "loss": 0.429, "step": 38713 }, { "epoch": 0.99, "grad_norm": 1.422417402267456, "learning_rate": 3.12166342960829e-09, "loss": 0.5532, "step": 38714 }, { "epoch": 0.99, "grad_norm": 1.4123364686965942, "learning_rate": 3.100957005259808e-09, "loss": 0.6251, "step": 38715 }, { "epoch": 0.99, "grad_norm": 9.55024242401123, "learning_rate": 3.080319473258708e-09, "loss": 0.7252, "step": 38716 }, { "epoch": 0.99, "grad_norm": 1.298895001411438, "learning_rate": 3.0597508337470993e-09, "loss": 0.5593, "step": 38717 }, { "epoch": 0.99, "grad_norm": 2.4037232398986816, "learning_rate": 3.0392510868682002e-09, "loss": 0.7984, "step": 38718 }, { "epoch": 0.99, "grad_norm": 1.5052783489227295, "learning_rate": 3.0188202327618987e-09, "loss": 0.5683, "step": 38719 }, { "epoch": 0.99, "grad_norm": 1.4554635286331177, "learning_rate": 2.9984582715680833e-09, "loss": 0.6169, "step": 38720 }, { "epoch": 0.99, "grad_norm": 1.657274842262268, "learning_rate": 2.978165203428862e-09, "loss": 0.5059, "step": 38721 }, { "epoch": 0.99, "grad_norm": 2.047654151916504, "learning_rate": 2.9579410284830133e-09, "loss": 0.6547, "step": 38722 }, { "epoch": 0.99, "grad_norm": 1.0344181060791016, "learning_rate": 2.9377857468704254e-09, "loss": 0.4703, "step": 38723 }, { "epoch": 0.99, "grad_norm": 1.4681923389434814, "learning_rate": 2.9176993587298753e-09, "loss": 0.5648, "step": 38724 }, { "epoch": 0.99, "grad_norm": 1.4020816087722778, "learning_rate": 2.8976818642001414e-09, "loss": 0.4666, "step": 38725 }, { "epoch": 0.99, "grad_norm": 2.284594774246216, "learning_rate": 2.8777332634188915e-09, "loss": 0.6968, "step": 38726 }, { "epoch": 0.99, "grad_norm": 1.2911453247070312, "learning_rate": 2.857853556522683e-09, "loss": 0.4482, "step": 38727 }, { "epoch": 0.99, "grad_norm": 1.9371471405029297, "learning_rate": 2.8380427436502935e-09, "loss": 0.5608, "step": 38728 }, { "epoch": 0.99, "grad_norm": 1.4078893661499023, "learning_rate": 2.81830082493717e-09, "loss": 0.497, "step": 38729 }, { "epoch": 0.99, "grad_norm": 1.01823890209198, "learning_rate": 2.7986278005187605e-09, "loss": 0.355, "step": 38730 }, { "epoch": 0.99, "grad_norm": 1.6460601091384888, "learning_rate": 2.779023670531622e-09, "loss": 0.6777, "step": 38731 }, { "epoch": 0.99, "grad_norm": 1.4760187864303589, "learning_rate": 2.7594884351100916e-09, "loss": 0.6245, "step": 38732 }, { "epoch": 0.99, "grad_norm": 1.7885483503341675, "learning_rate": 2.740022094390726e-09, "loss": 0.5702, "step": 38733 }, { "epoch": 0.99, "grad_norm": 1.5936466455459595, "learning_rate": 2.720624648504533e-09, "loss": 0.4717, "step": 38734 }, { "epoch": 0.99, "grad_norm": 1.5183625221252441, "learning_rate": 2.7012960975880685e-09, "loss": 0.5344, "step": 38735 }, { "epoch": 0.99, "grad_norm": 1.7211120128631592, "learning_rate": 2.6820364417723398e-09, "loss": 0.4745, "step": 38736 }, { "epoch": 0.99, "grad_norm": 7.179574966430664, "learning_rate": 2.6628456811916835e-09, "loss": 0.3081, "step": 38737 }, { "epoch": 0.99, "grad_norm": 1.2978334426879883, "learning_rate": 2.6437238159782165e-09, "loss": 0.5366, "step": 38738 }, { "epoch": 0.99, "grad_norm": 2.1813480854034424, "learning_rate": 2.6246708462629445e-09, "loss": 0.5718, "step": 38739 }, { "epoch": 0.99, "grad_norm": 1.0889250040054321, "learning_rate": 2.605686772176874e-09, "loss": 0.4673, "step": 38740 }, { "epoch": 0.99, "grad_norm": 2.1007518768310547, "learning_rate": 2.5867715938521222e-09, "loss": 0.6147, "step": 38741 }, { "epoch": 0.99, "grad_norm": 2.0582752227783203, "learning_rate": 2.5679253114185844e-09, "loss": 0.5607, "step": 38742 }, { "epoch": 0.99, "grad_norm": 1.1505359411239624, "learning_rate": 2.5491479250050464e-09, "loss": 0.525, "step": 38743 }, { "epoch": 0.99, "grad_norm": 2.229612112045288, "learning_rate": 2.5304394347425155e-09, "loss": 0.8107, "step": 38744 }, { "epoch": 0.99, "grad_norm": 2.6838176250457764, "learning_rate": 2.5117998407586665e-09, "loss": 0.532, "step": 38745 }, { "epoch": 0.99, "grad_norm": 7.234950065612793, "learning_rate": 2.4932291431822854e-09, "loss": 0.8228, "step": 38746 }, { "epoch": 0.99, "grad_norm": 1.4454692602157593, "learning_rate": 2.474727342142158e-09, "loss": 0.5165, "step": 38747 }, { "epoch": 0.99, "grad_norm": 1.271366000175476, "learning_rate": 2.45629443776485e-09, "loss": 0.5021, "step": 38748 }, { "epoch": 0.99, "grad_norm": 1.3468468189239502, "learning_rate": 2.437930430178037e-09, "loss": 0.4705, "step": 38749 }, { "epoch": 0.99, "grad_norm": 1.8531306982040405, "learning_rate": 2.419635319507174e-09, "loss": 0.6902, "step": 38750 }, { "epoch": 0.99, "grad_norm": 1.4777209758758545, "learning_rate": 2.401409105878827e-09, "loss": 0.5356, "step": 38751 }, { "epoch": 0.99, "grad_norm": 1.7423181533813477, "learning_rate": 2.3832517894195607e-09, "loss": 0.5156, "step": 38752 }, { "epoch": 0.99, "grad_norm": 1.353022575378418, "learning_rate": 2.3651633702537203e-09, "loss": 0.511, "step": 38753 }, { "epoch": 0.99, "grad_norm": 1.4257493019104004, "learning_rate": 2.347143848506761e-09, "loss": 0.5703, "step": 38754 }, { "epoch": 0.99, "grad_norm": 2.052081346511841, "learning_rate": 2.3291932243008075e-09, "loss": 0.5136, "step": 38755 }, { "epoch": 0.99, "grad_norm": 1.2665421962738037, "learning_rate": 2.311311497762425e-09, "loss": 0.4882, "step": 38756 }, { "epoch": 0.99, "grad_norm": 2.256258249282837, "learning_rate": 2.293498669011518e-09, "loss": 0.521, "step": 38757 }, { "epoch": 0.99, "grad_norm": 1.1470898389816284, "learning_rate": 2.275754738173541e-09, "loss": 0.4467, "step": 38758 }, { "epoch": 0.99, "grad_norm": 1.2414950132369995, "learning_rate": 2.2580797053706195e-09, "loss": 0.3501, "step": 38759 }, { "epoch": 0.99, "grad_norm": 1.1181318759918213, "learning_rate": 2.2404735707226566e-09, "loss": 0.4315, "step": 38760 }, { "epoch": 0.99, "grad_norm": 1.046889305114746, "learning_rate": 2.2229363343528876e-09, "loss": 0.5551, "step": 38761 }, { "epoch": 0.99, "grad_norm": 1.866331934928894, "learning_rate": 2.2054679963812164e-09, "loss": 0.522, "step": 38762 }, { "epoch": 0.99, "grad_norm": 7.608800888061523, "learning_rate": 2.1880685569275473e-09, "loss": 0.4383, "step": 38763 }, { "epoch": 0.99, "grad_norm": 1.6142747402191162, "learning_rate": 2.1707380161140045e-09, "loss": 0.4367, "step": 38764 }, { "epoch": 0.99, "grad_norm": 1.427915096282959, "learning_rate": 2.1534763740571617e-09, "loss": 0.4174, "step": 38765 }, { "epoch": 0.99, "grad_norm": 0.9899865388870239, "learning_rate": 2.1362836308780334e-09, "loss": 0.552, "step": 38766 }, { "epoch": 0.99, "grad_norm": 3.3222358226776123, "learning_rate": 2.1191597866931925e-09, "loss": 0.4298, "step": 38767 }, { "epoch": 0.99, "grad_norm": 1.5539101362228394, "learning_rate": 2.102104841622543e-09, "loss": 0.4778, "step": 38768 }, { "epoch": 0.99, "grad_norm": 1.3122859001159668, "learning_rate": 2.0851187957837694e-09, "loss": 0.4828, "step": 38769 }, { "epoch": 0.99, "grad_norm": 1.0223759412765503, "learning_rate": 2.0682016492923342e-09, "loss": 0.4496, "step": 38770 }, { "epoch": 0.99, "grad_norm": 1.6259037256240845, "learning_rate": 2.051353402264811e-09, "loss": 0.5604, "step": 38771 }, { "epoch": 0.99, "grad_norm": 1.2429805994033813, "learning_rate": 2.034574054818883e-09, "loss": 0.4141, "step": 38772 }, { "epoch": 0.99, "grad_norm": 1.6784065961837769, "learning_rate": 2.0178636070689038e-09, "loss": 0.4465, "step": 38773 }, { "epoch": 0.99, "grad_norm": 1.4431990385055542, "learning_rate": 2.001222059131447e-09, "loss": 0.5234, "step": 38774 }, { "epoch": 0.99, "grad_norm": 1.7919039726257324, "learning_rate": 1.984649411118644e-09, "loss": 0.4214, "step": 38775 }, { "epoch": 0.99, "grad_norm": 1.1021620035171509, "learning_rate": 1.9681456631470696e-09, "loss": 0.4422, "step": 38776 }, { "epoch": 0.99, "grad_norm": 1.7352783679962158, "learning_rate": 1.9517108153299657e-09, "loss": 0.5705, "step": 38777 }, { "epoch": 0.99, "grad_norm": 1.2102385759353638, "learning_rate": 1.935344867780575e-09, "loss": 0.538, "step": 38778 }, { "epoch": 0.99, "grad_norm": 2.6968271732330322, "learning_rate": 1.9190478206099205e-09, "loss": 0.5992, "step": 38779 }, { "epoch": 0.99, "grad_norm": 1.445356845855713, "learning_rate": 1.902819673933465e-09, "loss": 0.3757, "step": 38780 }, { "epoch": 0.99, "grad_norm": 1.2227057218551636, "learning_rate": 1.88666042786001e-09, "loss": 0.5925, "step": 38781 }, { "epoch": 0.99, "grad_norm": 4.556734085083008, "learning_rate": 1.8705700825027983e-09, "loss": 0.5155, "step": 38782 }, { "epoch": 0.99, "grad_norm": 4.802799701690674, "learning_rate": 1.8545486379728528e-09, "loss": 0.767, "step": 38783 }, { "epoch": 0.99, "grad_norm": 1.2981292009353638, "learning_rate": 1.8385960943789748e-09, "loss": 0.6187, "step": 38784 }, { "epoch": 0.99, "grad_norm": 4.281484127044678, "learning_rate": 1.822712451832187e-09, "loss": 0.4363, "step": 38785 }, { "epoch": 0.99, "grad_norm": 1.9346345663070679, "learning_rate": 1.8068977104412911e-09, "loss": 0.4507, "step": 38786 }, { "epoch": 0.99, "grad_norm": 9.110925674438477, "learning_rate": 1.7911518703173092e-09, "loss": 0.5813, "step": 38787 }, { "epoch": 0.99, "grad_norm": 1.333125352859497, "learning_rate": 1.7754749315657128e-09, "loss": 0.4078, "step": 38788 }, { "epoch": 0.99, "grad_norm": 1.179555058479309, "learning_rate": 1.7598668942964136e-09, "loss": 0.4433, "step": 38789 }, { "epoch": 0.99, "grad_norm": 1.0904196500778198, "learning_rate": 1.7443277586171036e-09, "loss": 0.4533, "step": 38790 }, { "epoch": 0.99, "grad_norm": 1.7323569059371948, "learning_rate": 1.7288575246343642e-09, "loss": 0.5315, "step": 38791 }, { "epoch": 0.99, "grad_norm": 3.6803970336914062, "learning_rate": 1.7134561924547765e-09, "loss": 0.5595, "step": 38792 }, { "epoch": 0.99, "grad_norm": 1.467297077178955, "learning_rate": 1.698123762183812e-09, "loss": 0.6538, "step": 38793 }, { "epoch": 0.99, "grad_norm": 1.360564112663269, "learning_rate": 1.682860233928052e-09, "loss": 0.4973, "step": 38794 }, { "epoch": 0.99, "grad_norm": 1.4188557863235474, "learning_rate": 1.6676656077929675e-09, "loss": 0.5021, "step": 38795 }, { "epoch": 0.99, "grad_norm": 1.0981953144073486, "learning_rate": 1.6525398838829198e-09, "loss": 0.559, "step": 38796 }, { "epoch": 0.99, "grad_norm": 1.3444340229034424, "learning_rate": 1.6374830623011596e-09, "loss": 0.5738, "step": 38797 }, { "epoch": 0.99, "grad_norm": 1.654894471168518, "learning_rate": 1.6224951431531577e-09, "loss": 0.5242, "step": 38798 }, { "epoch": 0.99, "grad_norm": 1.6514320373535156, "learning_rate": 1.6075761265399447e-09, "loss": 0.5915, "step": 38799 }, { "epoch": 0.99, "grad_norm": 1.7021591663360596, "learning_rate": 1.5927260125669918e-09, "loss": 0.6532, "step": 38800 }, { "epoch": 0.99, "grad_norm": 3.6577436923980713, "learning_rate": 1.5779448013353294e-09, "loss": 0.3894, "step": 38801 }, { "epoch": 0.99, "grad_norm": 1.2004437446594238, "learning_rate": 1.5632324929459875e-09, "loss": 0.6318, "step": 38802 }, { "epoch": 0.99, "grad_norm": 4.169992923736572, "learning_rate": 1.5485890875022169e-09, "loss": 0.6839, "step": 38803 }, { "epoch": 0.99, "grad_norm": 1.3259416818618774, "learning_rate": 1.5340145851028277e-09, "loss": 0.4981, "step": 38804 }, { "epoch": 0.99, "grad_norm": 1.1088457107543945, "learning_rate": 1.5195089858499601e-09, "loss": 0.4991, "step": 38805 }, { "epoch": 0.99, "grad_norm": 1.2857418060302734, "learning_rate": 1.5050722898435344e-09, "loss": 0.4727, "step": 38806 }, { "epoch": 0.99, "grad_norm": 7.817850589752197, "learning_rate": 1.4907044971812501e-09, "loss": 0.643, "step": 38807 }, { "epoch": 0.99, "grad_norm": 2.367278814315796, "learning_rate": 1.4764056079641377e-09, "loss": 0.5416, "step": 38808 }, { "epoch": 0.99, "grad_norm": 1.1289252042770386, "learning_rate": 1.4621756222898964e-09, "loss": 0.5413, "step": 38809 }, { "epoch": 0.99, "grad_norm": 2.216060161590576, "learning_rate": 1.4480145402562262e-09, "loss": 0.699, "step": 38810 }, { "epoch": 0.99, "grad_norm": 1.2183382511138916, "learning_rate": 1.4339223619619369e-09, "loss": 0.5036, "step": 38811 }, { "epoch": 0.99, "grad_norm": 1.2791950702667236, "learning_rate": 1.4198990875025075e-09, "loss": 0.4196, "step": 38812 }, { "epoch": 0.99, "grad_norm": 1.6361501216888428, "learning_rate": 1.405944716975638e-09, "loss": 0.6714, "step": 38813 }, { "epoch": 0.99, "grad_norm": 2.2067506313323975, "learning_rate": 1.3920592504779173e-09, "loss": 0.492, "step": 38814 }, { "epoch": 0.99, "grad_norm": 2.2328941822052, "learning_rate": 1.3782426881037147e-09, "loss": 0.6235, "step": 38815 }, { "epoch": 0.99, "grad_norm": 1.2305735349655151, "learning_rate": 1.3644950299496195e-09, "loss": 0.6549, "step": 38816 }, { "epoch": 0.99, "grad_norm": 1.399380087852478, "learning_rate": 1.3508162761088906e-09, "loss": 0.5848, "step": 38817 }, { "epoch": 0.99, "grad_norm": 1.4295668601989746, "learning_rate": 1.3372064266770068e-09, "loss": 0.5871, "step": 38818 }, { "epoch": 0.99, "grad_norm": 1.611964225769043, "learning_rate": 1.3236654817472271e-09, "loss": 0.5755, "step": 38819 }, { "epoch": 0.99, "grad_norm": 1.5151432752609253, "learning_rate": 1.3101934414128104e-09, "loss": 0.5432, "step": 38820 }, { "epoch": 1.0, "grad_norm": 1.0874830484390259, "learning_rate": 1.2967903057659048e-09, "loss": 0.4272, "step": 38821 }, { "epoch": 1.0, "grad_norm": 6.0826215744018555, "learning_rate": 1.2834560749008796e-09, "loss": 0.4933, "step": 38822 }, { "epoch": 1.0, "grad_norm": 1.5951848030090332, "learning_rate": 1.2701907489087728e-09, "loss": 0.5273, "step": 38823 }, { "epoch": 1.0, "grad_norm": 1.2572094202041626, "learning_rate": 1.2569943278795126e-09, "loss": 0.5299, "step": 38824 }, { "epoch": 1.0, "grad_norm": 1.6517292261123657, "learning_rate": 1.2438668119063579e-09, "loss": 0.65, "step": 38825 }, { "epoch": 1.0, "grad_norm": 3.6600959300994873, "learning_rate": 1.2308082010781264e-09, "loss": 0.5201, "step": 38826 }, { "epoch": 1.0, "grad_norm": 1.2248939275741577, "learning_rate": 1.217818495484746e-09, "loss": 0.4499, "step": 38827 }, { "epoch": 1.0, "grad_norm": 1.1627835035324097, "learning_rate": 1.2048976952172553e-09, "loss": 0.6734, "step": 38828 }, { "epoch": 1.0, "grad_norm": 1.7306894063949585, "learning_rate": 1.1920458003633618e-09, "loss": 0.534, "step": 38829 }, { "epoch": 1.0, "grad_norm": 5.4759111404418945, "learning_rate": 1.179262811011883e-09, "loss": 0.48, "step": 38830 }, { "epoch": 1.0, "grad_norm": 1.139440894126892, "learning_rate": 1.1665487272516374e-09, "loss": 0.4818, "step": 38831 }, { "epoch": 1.0, "grad_norm": 1.1025111675262451, "learning_rate": 1.1539035491692218e-09, "loss": 0.3623, "step": 38832 }, { "epoch": 1.0, "grad_norm": 8.04212474822998, "learning_rate": 1.141327276851234e-09, "loss": 0.631, "step": 38833 }, { "epoch": 1.0, "grad_norm": 1.0671257972717285, "learning_rate": 1.1288199103864917e-09, "loss": 0.362, "step": 38834 }, { "epoch": 1.0, "grad_norm": 2.071561336517334, "learning_rate": 1.116381449860482e-09, "loss": 0.7351, "step": 38835 }, { "epoch": 1.0, "grad_norm": 1.4639346599578857, "learning_rate": 1.1040118953575818e-09, "loss": 0.6027, "step": 38836 }, { "epoch": 1.0, "grad_norm": 1.031441330909729, "learning_rate": 1.0917112469643887e-09, "loss": 0.3912, "step": 38837 }, { "epoch": 1.0, "grad_norm": 1.179291844367981, "learning_rate": 1.0794795047641692e-09, "loss": 0.5033, "step": 38838 }, { "epoch": 1.0, "grad_norm": 1.2837986946105957, "learning_rate": 1.0673166688435211e-09, "loss": 0.4372, "step": 38839 }, { "epoch": 1.0, "grad_norm": 1.2867501974105835, "learning_rate": 1.0552227392846004e-09, "loss": 0.449, "step": 38840 }, { "epoch": 1.0, "grad_norm": 1.2353190183639526, "learning_rate": 1.0431977161706741e-09, "loss": 0.5072, "step": 38841 }, { "epoch": 1.0, "grad_norm": 2.0086684226989746, "learning_rate": 1.031241599585009e-09, "loss": 0.6135, "step": 38842 }, { "epoch": 1.0, "grad_norm": 1.261983036994934, "learning_rate": 1.0193543896108716e-09, "loss": 0.5471, "step": 38843 }, { "epoch": 1.0, "grad_norm": 1.540995478630066, "learning_rate": 1.0075360863293082e-09, "loss": 0.5844, "step": 38844 }, { "epoch": 1.0, "grad_norm": 1.846030831336975, "learning_rate": 9.957866898213653e-10, "loss": 0.5862, "step": 38845 }, { "epoch": 1.0, "grad_norm": 1.5836753845214844, "learning_rate": 9.84106200168089e-10, "loss": 0.561, "step": 38846 }, { "epoch": 1.0, "grad_norm": 3.2649128437042236, "learning_rate": 9.724946174505257e-10, "loss": 0.4804, "step": 38847 }, { "epoch": 1.0, "grad_norm": 0.9049289226531982, "learning_rate": 9.609519417497215e-10, "loss": 0.6046, "step": 38848 }, { "epoch": 1.0, "grad_norm": 5.580356121063232, "learning_rate": 9.49478173142282e-10, "loss": 0.5794, "step": 38849 }, { "epoch": 1.0, "grad_norm": 1.5522875785827637, "learning_rate": 9.380733117103635e-10, "loss": 0.5814, "step": 38850 }, { "epoch": 1.0, "grad_norm": 1.6805931329727173, "learning_rate": 9.267373575305716e-10, "loss": 0.5179, "step": 38851 }, { "epoch": 1.0, "grad_norm": 1.555249810218811, "learning_rate": 9.154703106817319e-10, "loss": 0.6708, "step": 38852 }, { "epoch": 1.0, "grad_norm": 1.104007363319397, "learning_rate": 9.042721712415603e-10, "loss": 0.428, "step": 38853 }, { "epoch": 1.0, "grad_norm": 2.091603994369507, "learning_rate": 8.931429392866619e-10, "loss": 0.6008, "step": 38854 }, { "epoch": 1.0, "grad_norm": 1.5278440713882446, "learning_rate": 8.820826148947525e-10, "loss": 0.4867, "step": 38855 }, { "epoch": 1.0, "grad_norm": 2.5330257415771484, "learning_rate": 8.710911981413272e-10, "loss": 0.642, "step": 38856 }, { "epoch": 1.0, "grad_norm": 2.4498531818389893, "learning_rate": 8.601686891018812e-10, "loss": 0.599, "step": 38857 }, { "epoch": 1.0, "grad_norm": 1.1761162281036377, "learning_rate": 8.493150878530199e-10, "loss": 0.5148, "step": 38858 }, { "epoch": 1.0, "grad_norm": 2.01951265335083, "learning_rate": 8.385303944680179e-10, "loss": 0.5567, "step": 38859 }, { "epoch": 1.0, "grad_norm": 1.9001001119613647, "learning_rate": 8.278146090212602e-10, "loss": 0.4171, "step": 38860 }, { "epoch": 1.0, "grad_norm": 1.531692624092102, "learning_rate": 8.17167731588242e-10, "loss": 0.4343, "step": 38861 }, { "epoch": 1.0, "grad_norm": 1.1435028314590454, "learning_rate": 8.065897622400176e-10, "loss": 0.4448, "step": 38862 }, { "epoch": 1.0, "grad_norm": 1.3607521057128906, "learning_rate": 7.960807010509719e-10, "loss": 0.5778, "step": 38863 }, { "epoch": 1.0, "grad_norm": 1.2208858728408813, "learning_rate": 7.856405480932694e-10, "loss": 0.6145, "step": 38864 }, { "epoch": 1.0, "grad_norm": 1.668275237083435, "learning_rate": 7.752693034390746e-10, "loss": 0.5391, "step": 38865 }, { "epoch": 1.0, "grad_norm": 1.4101494550704956, "learning_rate": 7.649669671594418e-10, "loss": 0.5732, "step": 38866 }, { "epoch": 1.0, "grad_norm": 1.1746169328689575, "learning_rate": 7.547335393265354e-10, "loss": 0.5133, "step": 38867 }, { "epoch": 1.0, "grad_norm": 1.2544643878936768, "learning_rate": 7.445690200080791e-10, "loss": 0.5758, "step": 38868 }, { "epoch": 1.0, "grad_norm": 1.4124990701675415, "learning_rate": 7.344734092773476e-10, "loss": 0.553, "step": 38869 }, { "epoch": 1.0, "grad_norm": 2.339390754699707, "learning_rate": 7.244467072020644e-10, "loss": 0.3648, "step": 38870 }, { "epoch": 1.0, "grad_norm": 1.6501388549804688, "learning_rate": 7.144889138510636e-10, "loss": 0.5963, "step": 38871 }, { "epoch": 1.0, "grad_norm": 2.259165048599243, "learning_rate": 7.04600029294289e-10, "loss": 0.498, "step": 38872 }, { "epoch": 1.0, "grad_norm": 1.5878490209579468, "learning_rate": 6.947800535994642e-10, "loss": 0.5996, "step": 38873 }, { "epoch": 1.0, "grad_norm": 1.8813821077346802, "learning_rate": 6.850289868332027e-10, "loss": 0.5526, "step": 38874 }, { "epoch": 1.0, "grad_norm": 1.756340742111206, "learning_rate": 6.753468290643384e-10, "loss": 0.5423, "step": 38875 }, { "epoch": 1.0, "grad_norm": 3.7248730659484863, "learning_rate": 6.657335803583742e-10, "loss": 0.705, "step": 38876 }, { "epoch": 1.0, "grad_norm": 2.704411029815674, "learning_rate": 6.561892407819237e-10, "loss": 0.6266, "step": 38877 }, { "epoch": 1.0, "grad_norm": 2.4842584133148193, "learning_rate": 6.4671381040049e-10, "loss": 0.5507, "step": 38878 }, { "epoch": 1.0, "grad_norm": 2.080451250076294, "learning_rate": 6.373072892806864e-10, "loss": 0.6688, "step": 38879 }, { "epoch": 1.0, "grad_norm": 2.302725076675415, "learning_rate": 6.279696774857958e-10, "loss": 0.6392, "step": 38880 }, { "epoch": 1.0, "grad_norm": 1.7940232753753662, "learning_rate": 6.187009750802109e-10, "loss": 0.6061, "step": 38881 }, { "epoch": 1.0, "grad_norm": 1.1580952405929565, "learning_rate": 6.09501182129435e-10, "loss": 0.3794, "step": 38882 }, { "epoch": 1.0, "grad_norm": 1.522947072982788, "learning_rate": 6.003702986945304e-10, "loss": 0.4434, "step": 38883 }, { "epoch": 1.0, "grad_norm": 1.2436721324920654, "learning_rate": 5.913083248398899e-10, "loss": 0.4266, "step": 38884 }, { "epoch": 1.0, "grad_norm": 1.3683358430862427, "learning_rate": 5.823152606287963e-10, "loss": 0.3545, "step": 38885 }, { "epoch": 1.0, "grad_norm": 1.4615917205810547, "learning_rate": 5.733911061212016e-10, "loss": 0.5379, "step": 38886 }, { "epoch": 1.0, "grad_norm": 21.139711380004883, "learning_rate": 5.645358613792784e-10, "loss": 0.7029, "step": 38887 }, { "epoch": 1.0, "grad_norm": 1.324957013130188, "learning_rate": 5.557495264651991e-10, "loss": 0.4707, "step": 38888 }, { "epoch": 1.0, "grad_norm": 3.5471010208129883, "learning_rate": 5.470321014378055e-10, "loss": 0.6411, "step": 38889 }, { "epoch": 1.0, "grad_norm": 1.540165901184082, "learning_rate": 5.383835863581599e-10, "loss": 0.674, "step": 38890 }, { "epoch": 1.0, "grad_norm": 1.47736656665802, "learning_rate": 5.298039812862144e-10, "loss": 0.4453, "step": 38891 }, { "epoch": 1.0, "grad_norm": 8.207088470458984, "learning_rate": 5.212932862808106e-10, "loss": 0.65, "step": 38892 }, { "epoch": 1.0, "grad_norm": 1.365370750427246, "learning_rate": 5.128515013996804e-10, "loss": 0.3854, "step": 38893 }, { "epoch": 1.0, "grad_norm": 1.2540854215621948, "learning_rate": 5.044786267016655e-10, "loss": 0.4162, "step": 38894 }, { "epoch": 1.0, "grad_norm": 1.7721573114395142, "learning_rate": 4.961746622444974e-10, "loss": 0.449, "step": 38895 }, { "epoch": 1.0, "grad_norm": 2.904217004776001, "learning_rate": 4.879396080859078e-10, "loss": 0.4589, "step": 38896 }, { "epoch": 1.0, "grad_norm": 2.41644024848938, "learning_rate": 4.797734642814079e-10, "loss": 0.5513, "step": 38897 }, { "epoch": 1.0, "grad_norm": 1.9902153015136719, "learning_rate": 4.716762308887291e-10, "loss": 0.495, "step": 38898 }, { "epoch": 1.0, "grad_norm": 1.1153591871261597, "learning_rate": 4.6364790796227245e-10, "loss": 0.4435, "step": 38899 }, { "epoch": 1.0, "grad_norm": 1.0406008958816528, "learning_rate": 4.5568849555865935e-10, "loss": 0.4959, "step": 38900 }, { "epoch": 1.0, "grad_norm": 1.6812347173690796, "learning_rate": 4.477979937311805e-10, "loss": 0.5752, "step": 38901 }, { "epoch": 1.0, "grad_norm": 1.9099302291870117, "learning_rate": 4.3997640253645723e-10, "loss": 0.4607, "step": 38902 }, { "epoch": 1.0, "grad_norm": 6.0943803787231445, "learning_rate": 4.3222372202555983e-10, "loss": 0.49, "step": 38903 }, { "epoch": 1.0, "grad_norm": 1.408218502998352, "learning_rate": 4.2453995225399944e-10, "loss": 0.5792, "step": 38904 }, { "epoch": 1.0, "grad_norm": 1.832855463027954, "learning_rate": 4.169250932750668e-10, "loss": 0.6549, "step": 38905 }, { "epoch": 1.0, "grad_norm": 1.6979281902313232, "learning_rate": 4.0937914513983215e-10, "loss": 0.3859, "step": 38906 }, { "epoch": 1.0, "grad_norm": 1.2640413045883179, "learning_rate": 4.019021079004759e-10, "loss": 0.5002, "step": 38907 }, { "epoch": 1.0, "grad_norm": 1.4984756708145142, "learning_rate": 3.944939816091786e-10, "loss": 0.5455, "step": 38908 }, { "epoch": 1.0, "grad_norm": 2.54388427734375, "learning_rate": 3.871547663159003e-10, "loss": 0.3975, "step": 38909 }, { "epoch": 1.0, "grad_norm": 1.113991141319275, "learning_rate": 3.798844620728215e-10, "loss": 0.603, "step": 38910 }, { "epoch": 1.0, "grad_norm": 3.933459520339966, "learning_rate": 3.7268306892879193e-10, "loss": 0.409, "step": 38911 }, { "epoch": 1.0, "grad_norm": 1.6208550930023193, "learning_rate": 3.6555058693377163e-10, "loss": 0.6065, "step": 38912 }, { "epoch": 1.0, "grad_norm": 3.6099514961242676, "learning_rate": 3.5848701613772077e-10, "loss": 0.6213, "step": 38913 }, { "epoch": 1.0, "grad_norm": 10.45859432220459, "learning_rate": 3.514923565883788e-10, "loss": 0.5483, "step": 38914 }, { "epoch": 1.0, "grad_norm": 2.108637809753418, "learning_rate": 3.445666083334853e-10, "loss": 0.629, "step": 38915 }, { "epoch": 1.0, "grad_norm": 1.2164456844329834, "learning_rate": 3.377097714218902e-10, "loss": 0.5057, "step": 38916 }, { "epoch": 1.0, "grad_norm": 5.69153356552124, "learning_rate": 3.309218459002228e-10, "loss": 0.5619, "step": 38917 }, { "epoch": 1.0, "grad_norm": 1.1270427703857422, "learning_rate": 3.242028318151125e-10, "loss": 0.5297, "step": 38918 }, { "epoch": 1.0, "grad_norm": 1.5724042654037476, "learning_rate": 3.175527292142988e-10, "loss": 0.5222, "step": 38919 }, { "epoch": 1.0, "grad_norm": 0.953561007976532, "learning_rate": 3.109715381410805e-10, "loss": 0.4701, "step": 38920 }, { "epoch": 1.0, "grad_norm": 1.2355897426605225, "learning_rate": 3.0445925864319716e-10, "loss": 0.5582, "step": 38921 }, { "epoch": 1.0, "grad_norm": 1.6622788906097412, "learning_rate": 2.980158907639474e-10, "loss": 0.5016, "step": 38922 }, { "epoch": 1.0, "grad_norm": 1.3895649909973145, "learning_rate": 2.9164143454885055e-10, "loss": 0.5188, "step": 38923 }, { "epoch": 1.0, "grad_norm": 1.412329912185669, "learning_rate": 2.853358900412051e-10, "loss": 0.4718, "step": 38924 }, { "epoch": 1.0, "grad_norm": 1.6631760597229004, "learning_rate": 2.7909925728430986e-10, "loss": 0.4276, "step": 38925 }, { "epoch": 1.0, "grad_norm": 1.7668559551239014, "learning_rate": 2.7293153632257373e-10, "loss": 0.6115, "step": 38926 }, { "epoch": 1.0, "grad_norm": 0.92037433385849, "learning_rate": 2.6683272719596475e-10, "loss": 0.5287, "step": 38927 }, { "epoch": 1.0, "grad_norm": 1.2851365804672241, "learning_rate": 2.6080282994889183e-10, "loss": 0.461, "step": 38928 }, { "epoch": 1.0, "grad_norm": 1.2551517486572266, "learning_rate": 2.54841844621323e-10, "loss": 0.6476, "step": 38929 }, { "epoch": 1.0, "grad_norm": 1.6619480848312378, "learning_rate": 2.489497712543365e-10, "loss": 0.6918, "step": 38930 }, { "epoch": 1.0, "grad_norm": 5.212338447570801, "learning_rate": 2.431266098901208e-10, "loss": 0.8057, "step": 38931 }, { "epoch": 1.0, "grad_norm": 1.254227876663208, "learning_rate": 2.373723605675338e-10, "loss": 0.3412, "step": 38932 }, { "epoch": 1.0, "grad_norm": 1.694922924041748, "learning_rate": 2.3168702332654335e-10, "loss": 0.508, "step": 38933 }, { "epoch": 1.0, "grad_norm": 10.846832275390625, "learning_rate": 2.2607059820600742e-10, "loss": 0.4438, "step": 38934 }, { "epoch": 1.0, "grad_norm": 1.8747010231018066, "learning_rate": 2.2052308524478373e-10, "loss": 0.5496, "step": 38935 }, { "epoch": 1.0, "grad_norm": 1.9103394746780396, "learning_rate": 2.150444844817301e-10, "loss": 0.4005, "step": 38936 }, { "epoch": 1.0, "grad_norm": 0.9605774283409119, "learning_rate": 2.0963479595459412e-10, "loss": 0.4142, "step": 38937 }, { "epoch": 1.0, "grad_norm": 1.1655941009521484, "learning_rate": 2.0429401969890294e-10, "loss": 0.3672, "step": 38938 }, { "epoch": 1.0, "grad_norm": 1.4480550289154053, "learning_rate": 1.9902215575351435e-10, "loss": 0.4541, "step": 38939 }, { "epoch": 1.0, "grad_norm": 1.6052545309066772, "learning_rate": 1.938192041539555e-10, "loss": 0.5269, "step": 38940 }, { "epoch": 1.0, "grad_norm": 1.3576716184616089, "learning_rate": 1.8868516493575352e-10, "loss": 0.4477, "step": 38941 }, { "epoch": 1.0, "grad_norm": 1.240419864654541, "learning_rate": 1.8362003813554574e-10, "loss": 0.4718, "step": 38942 }, { "epoch": 1.0, "grad_norm": 1.093379020690918, "learning_rate": 1.786238237866389e-10, "loss": 0.4396, "step": 38943 }, { "epoch": 1.0, "grad_norm": 1.8536180257797241, "learning_rate": 1.736965219234499e-10, "loss": 0.7184, "step": 38944 }, { "epoch": 1.0, "grad_norm": 1.4416736364364624, "learning_rate": 1.6883813258150584e-10, "loss": 0.4012, "step": 38945 }, { "epoch": 1.0, "grad_norm": 1.913781762123108, "learning_rate": 1.6404865579300321e-10, "loss": 0.5149, "step": 38946 }, { "epoch": 1.0, "grad_norm": 2.804459810256958, "learning_rate": 1.5932809159235895e-10, "loss": 0.3823, "step": 38947 }, { "epoch": 1.0, "grad_norm": 2.7639739513397217, "learning_rate": 1.546764400106593e-10, "loss": 0.5084, "step": 38948 }, { "epoch": 1.0, "grad_norm": 2.7762603759765625, "learning_rate": 1.5009370108010069e-10, "loss": 0.6753, "step": 38949 }, { "epoch": 1.0, "grad_norm": 1.7336066961288452, "learning_rate": 1.455798748317694e-10, "loss": 0.586, "step": 38950 }, { "epoch": 1.0, "grad_norm": 1.4732036590576172, "learning_rate": 1.4113496129897207e-10, "loss": 0.4848, "step": 38951 }, { "epoch": 1.0, "grad_norm": 2.338303804397583, "learning_rate": 1.3675896051057457e-10, "loss": 0.5136, "step": 38952 }, { "epoch": 1.0, "grad_norm": 1.4837745428085327, "learning_rate": 1.324518724965529e-10, "loss": 0.4261, "step": 38953 }, { "epoch": 1.0, "grad_norm": 0.9179459810256958, "learning_rate": 1.2821369728799327e-10, "loss": 0.3938, "step": 38954 }, { "epoch": 1.0, "grad_norm": 1.3868683576583862, "learning_rate": 1.2404443491265128e-10, "loss": 0.4774, "step": 38955 }, { "epoch": 1.0, "grad_norm": 1.6206995248794556, "learning_rate": 1.1994408539939273e-10, "loss": 0.5571, "step": 38956 }, { "epoch": 1.0, "grad_norm": 1.1937505006790161, "learning_rate": 1.1591264877819364e-10, "loss": 0.4923, "step": 38957 }, { "epoch": 1.0, "grad_norm": 1.2383848428726196, "learning_rate": 1.1195012507458914e-10, "loss": 0.3966, "step": 38958 }, { "epoch": 1.0, "grad_norm": 1.4311960935592651, "learning_rate": 1.0805651431744501e-10, "loss": 0.4645, "step": 38959 }, { "epoch": 1.0, "grad_norm": 3.056215763092041, "learning_rate": 1.0423181653229641e-10, "loss": 0.5809, "step": 38960 }, { "epoch": 1.0, "grad_norm": 1.7961372137069702, "learning_rate": 1.0047603174689891e-10, "loss": 0.6019, "step": 38961 }, { "epoch": 1.0, "grad_norm": 16.851593017578125, "learning_rate": 9.67891599856774e-11, "loss": 0.5008, "step": 38962 }, { "epoch": 1.0, "grad_norm": 1.7813512086868286, "learning_rate": 9.317120127638746e-11, "loss": 0.5391, "step": 38963 }, { "epoch": 1.0, "grad_norm": 17.89112663269043, "learning_rate": 8.962215564123356e-11, "loss": 0.4404, "step": 38964 }, { "epoch": 1.0, "grad_norm": 1.1221121549606323, "learning_rate": 8.614202310575081e-11, "loss": 0.5016, "step": 38965 }, { "epoch": 1.0, "grad_norm": 1.5897091627120972, "learning_rate": 8.273080369436415e-11, "loss": 0.6154, "step": 38966 }, { "epoch": 1.0, "grad_norm": 3.6902921199798584, "learning_rate": 7.938849743038824e-11, "loss": 0.5369, "step": 38967 }, { "epoch": 1.0, "grad_norm": 1.2950823307037354, "learning_rate": 7.611510433602754e-11, "loss": 0.4576, "step": 38968 }, { "epoch": 1.0, "grad_norm": 2.038729190826416, "learning_rate": 7.291062443570695e-11, "loss": 0.6106, "step": 38969 }, { "epoch": 1.0, "grad_norm": 2.130936622619629, "learning_rate": 6.977505774941051e-11, "loss": 0.54, "step": 38970 }, { "epoch": 1.0, "grad_norm": 1.35580575466156, "learning_rate": 6.670840429934267e-11, "loss": 0.4226, "step": 38971 }, { "epoch": 1.0, "grad_norm": 2.175191879272461, "learning_rate": 6.371066410770787e-11, "loss": 0.6058, "step": 38972 }, { "epoch": 1.0, "grad_norm": 1.3260893821716309, "learning_rate": 6.078183719449016e-11, "loss": 0.5609, "step": 38973 }, { "epoch": 1.0, "grad_norm": 1.1994094848632812, "learning_rate": 5.79219235785633e-11, "loss": 0.4914, "step": 38974 }, { "epoch": 1.0, "grad_norm": 1.3970363140106201, "learning_rate": 5.513092328213176e-11, "loss": 0.4453, "step": 38975 }, { "epoch": 1.0, "grad_norm": 1.8121787309646606, "learning_rate": 5.240883632295912e-11, "loss": 0.4372, "step": 38976 }, { "epoch": 1.0, "grad_norm": 1.3004897832870483, "learning_rate": 4.975566271991916e-11, "loss": 0.3999, "step": 38977 }, { "epoch": 1.0, "grad_norm": 3.143728256225586, "learning_rate": 4.717140249077545e-11, "loss": 0.5276, "step": 38978 }, { "epoch": 1.0, "grad_norm": 1.1197160482406616, "learning_rate": 4.4656055654401784e-11, "loss": 0.583, "step": 38979 }, { "epoch": 1.0, "grad_norm": 2.3203787803649902, "learning_rate": 4.220962222856173e-11, "loss": 0.5964, "step": 38980 }, { "epoch": 1.0, "grad_norm": 1.6226567029953003, "learning_rate": 3.983210222768818e-11, "loss": 0.3764, "step": 38981 }, { "epoch": 1.0, "grad_norm": 0.9319593906402588, "learning_rate": 3.752349567065494e-11, "loss": 0.3869, "step": 38982 }, { "epoch": 1.0, "grad_norm": 1.4989314079284668, "learning_rate": 3.52838025718949e-11, "loss": 0.5548, "step": 38983 }, { "epoch": 1.0, "grad_norm": 1.6261223554611206, "learning_rate": 3.31130229480614e-11, "loss": 0.5804, "step": 38984 }, { "epoch": 1.0, "grad_norm": 1.0012257099151611, "learning_rate": 3.101115681247713e-11, "loss": 0.5407, "step": 38985 }, { "epoch": 1.0, "grad_norm": 1.435221552848816, "learning_rate": 2.8978204180685197e-11, "loss": 0.5067, "step": 38986 }, { "epoch": 1.0, "grad_norm": 1.6376019716262817, "learning_rate": 2.7014165067118513e-11, "loss": 0.5295, "step": 38987 }, { "epoch": 1.0, "grad_norm": 1.7296502590179443, "learning_rate": 2.511903948509975e-11, "loss": 0.6733, "step": 38988 }, { "epoch": 1.0, "grad_norm": 1.6365643739700317, "learning_rate": 2.329282744684136e-11, "loss": 0.6141, "step": 38989 }, { "epoch": 1.0, "grad_norm": 1.0913448333740234, "learning_rate": 2.153552896566602e-11, "loss": 0.5521, "step": 38990 }, { "epoch": 1.0, "grad_norm": 1.389140009880066, "learning_rate": 1.984714405267596e-11, "loss": 0.4028, "step": 38991 }, { "epoch": 1.0, "grad_norm": 1.6802141666412354, "learning_rate": 1.8227672721193856e-11, "loss": 0.4619, "step": 38992 }, { "epoch": 1.0, "grad_norm": 1.4907159805297852, "learning_rate": 1.6677114981211716e-11, "loss": 0.6194, "step": 38993 }, { "epoch": 1.0, "grad_norm": 1.9406495094299316, "learning_rate": 1.5195470843831773e-11, "loss": 0.5708, "step": 38994 }, { "epoch": 1.0, "grad_norm": 1.4126396179199219, "learning_rate": 1.3782740319046029e-11, "loss": 0.4964, "step": 38995 }, { "epoch": 1.0, "grad_norm": 2.6313204765319824, "learning_rate": 1.2438923415736271e-11, "loss": 0.3654, "step": 38996 }, { "epoch": 1.0, "grad_norm": 1.3852535486221313, "learning_rate": 1.1164020145004728e-11, "loss": 0.4646, "step": 38997 }, { "epoch": 1.0, "grad_norm": 1.1846591234207153, "learning_rate": 9.958030514622963e-12, "loss": 0.4909, "step": 38998 }, { "epoch": 1.0, "grad_norm": 1.7924268245697021, "learning_rate": 8.820954533472759e-12, "loss": 0.4398, "step": 38999 }, { "epoch": 1.0, "grad_norm": 1.4228426218032837, "learning_rate": 7.752792208215455e-12, "loss": 0.4714, "step": 39000 }, { "epoch": 1.0, "grad_norm": 1.0532944202423096, "learning_rate": 6.753543547732832e-12, "loss": 0.5711, "step": 39001 }, { "epoch": 1.0, "grad_norm": 1.596328854560852, "learning_rate": 5.82320855757601e-12, "loss": 0.5197, "step": 39002 }, { "epoch": 1.0, "grad_norm": 6.371967792510986, "learning_rate": 4.9617872444063244e-12, "loss": 0.6106, "step": 39003 }, { "epoch": 1.0, "grad_norm": 9.992445945739746, "learning_rate": 4.169279614885113e-12, "loss": 0.6674, "step": 39004 }, { "epoch": 1.0, "grad_norm": 1.8931881189346313, "learning_rate": 3.4456856734532696e-12, "loss": 0.6623, "step": 39005 }, { "epoch": 1.0, "grad_norm": 1.3694530725479126, "learning_rate": 2.791005425661908e-12, "loss": 0.5264, "step": 39006 }, { "epoch": 1.0, "grad_norm": 2.0961904525756836, "learning_rate": 2.2052388770621437e-12, "loss": 0.5759, "step": 39007 }, { "epoch": 1.0, "grad_norm": 0.963153064250946, "learning_rate": 1.6883860298744226e-12, "loss": 0.3736, "step": 39008 }, { "epoch": 1.0, "grad_norm": 2.0593433380126953, "learning_rate": 1.240446888539637e-12, "loss": 0.4948, "step": 39009 }, { "epoch": 1.0, "grad_norm": 1.4505186080932617, "learning_rate": 8.614214552782329e-13, "loss": 0.5559, "step": 39010 }, { "epoch": 1.0, "grad_norm": 1.2437456846237183, "learning_rate": 5.513097345311025e-13, "loss": 0.5562, "step": 39011 }, { "epoch": 1.0, "grad_norm": 1.6914098262786865, "learning_rate": 3.101117262982456e-13, "loss": 0.4397, "step": 39012 }, { "epoch": 1.0, "grad_norm": 1.558327078819275, "learning_rate": 1.378274350205544e-13, "loss": 0.5675, "step": 39013 }, { "epoch": 1.0, "grad_norm": 1.3717198371887207, "learning_rate": 3.4456858477582845e-14, "loss": 0.5883, "step": 39014 }, { "epoch": 1.0, "grad_norm": 1.220456600189209, "learning_rate": 0.0, "loss": 0.5715, "step": 39015 }, { "epoch": 1.0, "step": 39015, "total_flos": 2.77077130157328e+18, "train_loss": 0.5994104020147618, "train_runtime": 134959.7775, "train_samples_per_second": 4.625, "train_steps_per_second": 0.289 } ], "logging_steps": 1, "max_steps": 39015, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 24000, "total_flos": 2.77077130157328e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }