{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9887464832760238, "eval_steps": 1600, "global_step": 6398, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00031259768677711783, "grad_norm": 0.21875, "learning_rate": 2e-05, "loss": 2.2083, "step": 1 }, { "epoch": 0.00031259768677711783, "eval_loss": 2.0116653442382812, "eval_runtime": 1898.4475, "eval_samples_per_second": 4.813, "eval_steps_per_second": 2.407, "step": 1 }, { "epoch": 0.0006251953735542357, "grad_norm": 0.216796875, "learning_rate": 4e-05, "loss": 2.0017, "step": 2 }, { "epoch": 0.0009377930603313535, "grad_norm": 0.216796875, "learning_rate": 6e-05, "loss": 2.2668, "step": 3 }, { "epoch": 0.0012503907471084713, "grad_norm": 0.220703125, "learning_rate": 8e-05, "loss": 1.9291, "step": 4 }, { "epoch": 0.0015629884338855893, "grad_norm": 0.22265625, "learning_rate": 0.0001, "loss": 1.8984, "step": 5 }, { "epoch": 0.001875586120662707, "grad_norm": 0.1845703125, "learning_rate": 0.00012, "loss": 2.1924, "step": 6 }, { "epoch": 0.002188183807439825, "grad_norm": 0.1982421875, "learning_rate": 0.00014, "loss": 2.135, "step": 7 }, { "epoch": 0.0025007814942169426, "grad_norm": 0.1865234375, "learning_rate": 0.00016, "loss": 2.0434, "step": 8 }, { "epoch": 0.002813379180994061, "grad_norm": 0.1826171875, "learning_rate": 0.00018, "loss": 1.892, "step": 9 }, { "epoch": 0.0031259768677711786, "grad_norm": 0.1953125, "learning_rate": 0.0002, "loss": 2.0976, "step": 10 }, { "epoch": 0.0034385745545482964, "grad_norm": 0.2060546875, "learning_rate": 0.0001999999969814363, "loss": 2.257, "step": 11 }, { "epoch": 0.003751172241325414, "grad_norm": 0.185546875, "learning_rate": 0.00019999998792574533, "loss": 2.0452, "step": 12 }, { "epoch": 0.004063769928102532, "grad_norm": 0.1865234375, "learning_rate": 0.00019999997283292765, "loss": 1.9728, "step": 13 }, { "epoch": 0.00437636761487965, "grad_norm": 0.1806640625, "learning_rate": 0.0001999999517029842, "loss": 2.0065, "step": 14 }, { "epoch": 0.004688965301656768, "grad_norm": 0.1826171875, "learning_rate": 0.00019999992453591622, "loss": 2.0628, "step": 15 }, { "epoch": 0.005001562988433885, "grad_norm": 0.1728515625, "learning_rate": 0.00019999989133172538, "loss": 2.0292, "step": 16 }, { "epoch": 0.0053141606752110035, "grad_norm": 0.1669921875, "learning_rate": 0.00019999985209041366, "loss": 1.9125, "step": 17 }, { "epoch": 0.005626758361988122, "grad_norm": 0.1748046875, "learning_rate": 0.00019999980681198345, "loss": 1.8677, "step": 18 }, { "epoch": 0.005939356048765239, "grad_norm": 0.16796875, "learning_rate": 0.00019999975549643746, "loss": 1.984, "step": 19 }, { "epoch": 0.006251953735542357, "grad_norm": 0.177734375, "learning_rate": 0.00019999969814377878, "loss": 1.9886, "step": 20 }, { "epoch": 0.006564551422319475, "grad_norm": 0.1689453125, "learning_rate": 0.0001999996347540109, "loss": 2.0277, "step": 21 }, { "epoch": 0.006877149109096593, "grad_norm": 0.1728515625, "learning_rate": 0.0001999995653271377, "loss": 2.0216, "step": 22 }, { "epoch": 0.00718974679587371, "grad_norm": 0.1689453125, "learning_rate": 0.00019999948986316324, "loss": 1.8909, "step": 23 }, { "epoch": 0.007502344482650828, "grad_norm": 0.1748046875, "learning_rate": 0.00019999940836209215, "loss": 1.9551, "step": 24 }, { "epoch": 0.007814942169427946, "grad_norm": 0.1826171875, "learning_rate": 0.00019999932082392937, "loss": 1.9186, "step": 25 }, { "epoch": 0.008127539856205065, "grad_norm": 0.18359375, "learning_rate": 0.00019999922724868015, "loss": 2.076, "step": 26 }, { "epoch": 0.008440137542982182, "grad_norm": 0.181640625, "learning_rate": 0.00019999912763635016, "loss": 1.9682, "step": 27 }, { "epoch": 0.0087527352297593, "grad_norm": 0.1630859375, "learning_rate": 0.00019999902198694543, "loss": 1.8056, "step": 28 }, { "epoch": 0.009065332916536417, "grad_norm": 0.1728515625, "learning_rate": 0.00019999891030047227, "loss": 1.8404, "step": 29 }, { "epoch": 0.009377930603313536, "grad_norm": 0.16015625, "learning_rate": 0.0001999987925769375, "loss": 2.113, "step": 30 }, { "epoch": 0.009690528290090653, "grad_norm": 0.166015625, "learning_rate": 0.00019999866881634815, "loss": 1.8715, "step": 31 }, { "epoch": 0.01000312597686777, "grad_norm": 0.8359375, "learning_rate": 0.00019999853901871175, "loss": 3.09, "step": 32 }, { "epoch": 0.01031572366364489, "grad_norm": 0.1640625, "learning_rate": 0.00019999840318403613, "loss": 2.1366, "step": 33 }, { "epoch": 0.010628321350422007, "grad_norm": 0.1787109375, "learning_rate": 0.00019999826131232947, "loss": 1.9007, "step": 34 }, { "epoch": 0.010940919037199124, "grad_norm": 0.173828125, "learning_rate": 0.00019999811340360034, "loss": 1.9831, "step": 35 }, { "epoch": 0.011253516723976243, "grad_norm": 0.1611328125, "learning_rate": 0.0001999979594578577, "loss": 1.9656, "step": 36 }, { "epoch": 0.01156611441075336, "grad_norm": 0.48828125, "learning_rate": 0.0001999977994751108, "loss": 2.8559, "step": 37 }, { "epoch": 0.011878712097530478, "grad_norm": 0.177734375, "learning_rate": 0.00019999763345536934, "loss": 2.0553, "step": 38 }, { "epoch": 0.012191309784307595, "grad_norm": 0.18359375, "learning_rate": 0.0001999974613986433, "loss": 1.8912, "step": 39 }, { "epoch": 0.012503907471084715, "grad_norm": 0.169921875, "learning_rate": 0.0001999972833049431, "loss": 1.8342, "step": 40 }, { "epoch": 0.012816505157861832, "grad_norm": 0.1669921875, "learning_rate": 0.00019999709917427946, "loss": 1.9032, "step": 41 }, { "epoch": 0.01312910284463895, "grad_norm": 0.1806640625, "learning_rate": 0.00019999690900666353, "loss": 1.9228, "step": 42 }, { "epoch": 0.013441700531416068, "grad_norm": 0.1845703125, "learning_rate": 0.00019999671280210676, "loss": 1.9666, "step": 43 }, { "epoch": 0.013754298218193186, "grad_norm": 0.1787109375, "learning_rate": 0.00019999651056062102, "loss": 1.901, "step": 44 }, { "epoch": 0.014066895904970303, "grad_norm": 0.1806640625, "learning_rate": 0.00019999630228221852, "loss": 2.1406, "step": 45 }, { "epoch": 0.01437949359174742, "grad_norm": 0.16796875, "learning_rate": 0.0001999960879669118, "loss": 1.9389, "step": 46 }, { "epoch": 0.01469209127852454, "grad_norm": 0.1689453125, "learning_rate": 0.00019999586761471384, "loss": 1.9625, "step": 47 }, { "epoch": 0.015004688965301657, "grad_norm": 0.1767578125, "learning_rate": 0.00019999564122563795, "loss": 2.0456, "step": 48 }, { "epoch": 0.015317286652078774, "grad_norm": 0.171875, "learning_rate": 0.00019999540879969775, "loss": 1.9955, "step": 49 }, { "epoch": 0.01562988433885589, "grad_norm": 0.1669921875, "learning_rate": 0.00019999517033690727, "loss": 1.8969, "step": 50 }, { "epoch": 0.01594248202563301, "grad_norm": 0.166015625, "learning_rate": 0.00019999492583728097, "loss": 1.8544, "step": 51 }, { "epoch": 0.01625507971241013, "grad_norm": 0.1748046875, "learning_rate": 0.00019999467530083356, "loss": 1.779, "step": 52 }, { "epoch": 0.016567677399187245, "grad_norm": 0.177734375, "learning_rate": 0.00019999441872758017, "loss": 1.8127, "step": 53 }, { "epoch": 0.016880275085964364, "grad_norm": 0.1796875, "learning_rate": 0.0001999941561175363, "loss": 2.0516, "step": 54 }, { "epoch": 0.017192872772741483, "grad_norm": 0.173828125, "learning_rate": 0.0001999938874707178, "loss": 2.1641, "step": 55 }, { "epoch": 0.0175054704595186, "grad_norm": 0.1708984375, "learning_rate": 0.00019999361278714092, "loss": 2.0483, "step": 56 }, { "epoch": 0.017818068146295718, "grad_norm": 0.1748046875, "learning_rate": 0.00019999333206682218, "loss": 1.9931, "step": 57 }, { "epoch": 0.018130665833072834, "grad_norm": 0.181640625, "learning_rate": 0.00019999304530977856, "loss": 1.9795, "step": 58 }, { "epoch": 0.018443263519849953, "grad_norm": 0.1669921875, "learning_rate": 0.00019999275251602738, "loss": 2.13, "step": 59 }, { "epoch": 0.018755861206627072, "grad_norm": 0.1669921875, "learning_rate": 0.0001999924536855863, "loss": 2.0652, "step": 60 }, { "epoch": 0.019068458893404187, "grad_norm": 0.1689453125, "learning_rate": 0.00019999214881847338, "loss": 1.9731, "step": 61 }, { "epoch": 0.019381056580181306, "grad_norm": 0.1708984375, "learning_rate": 0.00019999183791470702, "loss": 1.9303, "step": 62 }, { "epoch": 0.019693654266958426, "grad_norm": 0.169921875, "learning_rate": 0.000199991520974306, "loss": 1.9115, "step": 63 }, { "epoch": 0.02000625195373554, "grad_norm": 0.1669921875, "learning_rate": 0.0001999911979972894, "loss": 2.1622, "step": 64 }, { "epoch": 0.02031884964051266, "grad_norm": 0.169921875, "learning_rate": 0.00019999086898367678, "loss": 1.9662, "step": 65 }, { "epoch": 0.02063144732728978, "grad_norm": 0.16796875, "learning_rate": 0.00019999053393348796, "loss": 1.8382, "step": 66 }, { "epoch": 0.020944045014066895, "grad_norm": 0.17578125, "learning_rate": 0.00019999019284674317, "loss": 1.9147, "step": 67 }, { "epoch": 0.021256642700844014, "grad_norm": 0.171875, "learning_rate": 0.00019998984572346308, "loss": 2.0712, "step": 68 }, { "epoch": 0.021569240387621133, "grad_norm": 0.1767578125, "learning_rate": 0.00019998949256366854, "loss": 2.0207, "step": 69 }, { "epoch": 0.02188183807439825, "grad_norm": 0.1806640625, "learning_rate": 0.00019998913336738094, "loss": 2.1334, "step": 70 }, { "epoch": 0.022194435761175368, "grad_norm": 0.7109375, "learning_rate": 0.00019998876813462192, "loss": 2.7085, "step": 71 }, { "epoch": 0.022507033447952487, "grad_norm": 0.17578125, "learning_rate": 0.00019998839686541356, "loss": 1.7364, "step": 72 }, { "epoch": 0.022819631134729602, "grad_norm": 0.18359375, "learning_rate": 0.0001999880195597783, "loss": 1.9281, "step": 73 }, { "epoch": 0.02313222882150672, "grad_norm": 0.1787109375, "learning_rate": 0.00019998763621773883, "loss": 1.9648, "step": 74 }, { "epoch": 0.023444826508283837, "grad_norm": 0.1806640625, "learning_rate": 0.00019998724683931838, "loss": 1.9874, "step": 75 }, { "epoch": 0.023757424195060956, "grad_norm": 0.171875, "learning_rate": 0.0001999868514245404, "loss": 1.785, "step": 76 }, { "epoch": 0.024070021881838075, "grad_norm": 0.181640625, "learning_rate": 0.0001999864499734288, "loss": 1.9094, "step": 77 }, { "epoch": 0.02438261956861519, "grad_norm": 0.162109375, "learning_rate": 0.00019998604248600777, "loss": 1.9723, "step": 78 }, { "epoch": 0.02469521725539231, "grad_norm": 0.1689453125, "learning_rate": 0.00019998562896230196, "loss": 1.8739, "step": 79 }, { "epoch": 0.02500781494216943, "grad_norm": 0.1845703125, "learning_rate": 0.00019998520940233636, "loss": 1.936, "step": 80 }, { "epoch": 0.025320412628946545, "grad_norm": 0.16015625, "learning_rate": 0.0001999847838061362, "loss": 1.8807, "step": 81 }, { "epoch": 0.025633010315723664, "grad_norm": 0.173828125, "learning_rate": 0.00019998435217372728, "loss": 1.7412, "step": 82 }, { "epoch": 0.025945608002500783, "grad_norm": 0.17578125, "learning_rate": 0.00019998391450513556, "loss": 1.8404, "step": 83 }, { "epoch": 0.0262582056892779, "grad_norm": 0.169921875, "learning_rate": 0.00019998347080038754, "loss": 1.8108, "step": 84 }, { "epoch": 0.026570803376055017, "grad_norm": 0.181640625, "learning_rate": 0.00019998302105950994, "loss": 2.0934, "step": 85 }, { "epoch": 0.026883401062832137, "grad_norm": 0.19140625, "learning_rate": 0.00019998256528252998, "loss": 2.0021, "step": 86 }, { "epoch": 0.027195998749609252, "grad_norm": 0.1630859375, "learning_rate": 0.00019998210346947515, "loss": 1.9675, "step": 87 }, { "epoch": 0.02750859643638637, "grad_norm": 0.1650390625, "learning_rate": 0.00019998163562037332, "loss": 1.8488, "step": 88 }, { "epoch": 0.02782119412316349, "grad_norm": 0.1806640625, "learning_rate": 0.00019998116173525272, "loss": 1.9255, "step": 89 }, { "epoch": 0.028133791809940606, "grad_norm": 0.16796875, "learning_rate": 0.000199980681814142, "loss": 2.1055, "step": 90 }, { "epoch": 0.028446389496717725, "grad_norm": 0.1748046875, "learning_rate": 0.0001999801958570701, "loss": 2.1303, "step": 91 }, { "epoch": 0.02875898718349484, "grad_norm": 0.1728515625, "learning_rate": 0.00019997970386406637, "loss": 2.0517, "step": 92 }, { "epoch": 0.02907158487027196, "grad_norm": 0.181640625, "learning_rate": 0.00019997920583516053, "loss": 1.8314, "step": 93 }, { "epoch": 0.02938418255704908, "grad_norm": 0.1845703125, "learning_rate": 0.0001999787017703826, "loss": 1.7953, "step": 94 }, { "epoch": 0.029696780243826194, "grad_norm": 0.169921875, "learning_rate": 0.00019997819166976308, "loss": 1.8238, "step": 95 }, { "epoch": 0.030009377930603313, "grad_norm": 0.173828125, "learning_rate": 0.0001999776755333327, "loss": 1.8905, "step": 96 }, { "epoch": 0.030321975617380433, "grad_norm": 0.169921875, "learning_rate": 0.00019997715336112263, "loss": 1.7594, "step": 97 }, { "epoch": 0.030634573304157548, "grad_norm": 0.171875, "learning_rate": 0.0001999766251531644, "loss": 1.9648, "step": 98 }, { "epoch": 0.030947170990934667, "grad_norm": 0.1669921875, "learning_rate": 0.00019997609090948996, "loss": 2.1577, "step": 99 }, { "epoch": 0.03125976867771178, "grad_norm": 0.1806640625, "learning_rate": 0.0001999755506301315, "loss": 2.0563, "step": 100 }, { "epoch": 0.0315723663644889, "grad_norm": 0.1669921875, "learning_rate": 0.0001999750043151216, "loss": 1.8857, "step": 101 }, { "epoch": 0.03188496405126602, "grad_norm": 0.1748046875, "learning_rate": 0.00019997445196449337, "loss": 1.8832, "step": 102 }, { "epoch": 0.03219756173804314, "grad_norm": 0.189453125, "learning_rate": 0.00019997389357828, "loss": 2.0352, "step": 103 }, { "epoch": 0.03251015942482026, "grad_norm": 0.18359375, "learning_rate": 0.00019997332915651532, "loss": 2.0126, "step": 104 }, { "epoch": 0.03282275711159737, "grad_norm": 0.1650390625, "learning_rate": 0.00019997275869923335, "loss": 2.0201, "step": 105 }, { "epoch": 0.03313535479837449, "grad_norm": 0.177734375, "learning_rate": 0.00019997218220646853, "loss": 2.3295, "step": 106 }, { "epoch": 0.03344795248515161, "grad_norm": 0.16796875, "learning_rate": 0.0001999715996782557, "loss": 1.8919, "step": 107 }, { "epoch": 0.03376055017192873, "grad_norm": 0.1806640625, "learning_rate": 0.00019997101111462998, "loss": 1.7797, "step": 108 }, { "epoch": 0.03407314785870585, "grad_norm": 0.193359375, "learning_rate": 0.00019997041651562695, "loss": 1.8956, "step": 109 }, { "epoch": 0.03438574554548297, "grad_norm": 0.1640625, "learning_rate": 0.00019996981588128244, "loss": 1.9683, "step": 110 }, { "epoch": 0.03469834323226008, "grad_norm": 0.1748046875, "learning_rate": 0.00019996920921163278, "loss": 1.727, "step": 111 }, { "epoch": 0.0350109409190372, "grad_norm": 0.1630859375, "learning_rate": 0.00019996859650671457, "loss": 1.8966, "step": 112 }, { "epoch": 0.03532353860581432, "grad_norm": 0.16796875, "learning_rate": 0.0001999679777665648, "loss": 1.8195, "step": 113 }, { "epoch": 0.035636136292591436, "grad_norm": 0.173828125, "learning_rate": 0.0001999673529912208, "loss": 1.8785, "step": 114 }, { "epoch": 0.035948733979368555, "grad_norm": 0.17578125, "learning_rate": 0.0001999667221807203, "loss": 1.9363, "step": 115 }, { "epoch": 0.03626133166614567, "grad_norm": 0.1689453125, "learning_rate": 0.00019996608533510144, "loss": 2.0314, "step": 116 }, { "epoch": 0.036573929352922786, "grad_norm": 0.17578125, "learning_rate": 0.0001999654424544026, "loss": 1.9304, "step": 117 }, { "epoch": 0.036886527039699905, "grad_norm": 0.181640625, "learning_rate": 0.0001999647935386626, "loss": 2.1456, "step": 118 }, { "epoch": 0.037199124726477024, "grad_norm": 0.1806640625, "learning_rate": 0.0001999641385879206, "loss": 1.6728, "step": 119 }, { "epoch": 0.037511722413254144, "grad_norm": 0.1650390625, "learning_rate": 0.00019996347760221624, "loss": 1.8201, "step": 120 }, { "epoch": 0.03782432010003126, "grad_norm": 0.1708984375, "learning_rate": 0.0001999628105815893, "loss": 2.096, "step": 121 }, { "epoch": 0.038136917786808375, "grad_norm": 0.171875, "learning_rate": 0.0001999621375260801, "loss": 1.9948, "step": 122 }, { "epoch": 0.038449515473585494, "grad_norm": 0.1748046875, "learning_rate": 0.0001999614584357293, "loss": 1.9562, "step": 123 }, { "epoch": 0.03876211316036261, "grad_norm": 0.173828125, "learning_rate": 0.00019996077331057788, "loss": 1.8452, "step": 124 }, { "epoch": 0.03907471084713973, "grad_norm": 0.1689453125, "learning_rate": 0.00019996008215066716, "loss": 1.7615, "step": 125 }, { "epoch": 0.03938730853391685, "grad_norm": 0.1728515625, "learning_rate": 0.00019995938495603893, "loss": 1.7628, "step": 126 }, { "epoch": 0.03969990622069397, "grad_norm": 0.1845703125, "learning_rate": 0.00019995868172673523, "loss": 2.0241, "step": 127 }, { "epoch": 0.04001250390747108, "grad_norm": 0.1689453125, "learning_rate": 0.00019995797246279856, "loss": 2.0807, "step": 128 }, { "epoch": 0.0403251015942482, "grad_norm": 0.1884765625, "learning_rate": 0.00019995725716427169, "loss": 1.8564, "step": 129 }, { "epoch": 0.04063769928102532, "grad_norm": 0.1767578125, "learning_rate": 0.00019995653583119785, "loss": 2.1278, "step": 130 }, { "epoch": 0.04095029696780244, "grad_norm": 0.1689453125, "learning_rate": 0.00019995580846362055, "loss": 2.095, "step": 131 }, { "epoch": 0.04126289465457956, "grad_norm": 0.1748046875, "learning_rate": 0.00019995507506158372, "loss": 1.6848, "step": 132 }, { "epoch": 0.04157549234135667, "grad_norm": 0.1748046875, "learning_rate": 0.00019995433562513163, "loss": 1.8979, "step": 133 }, { "epoch": 0.04188809002813379, "grad_norm": 0.427734375, "learning_rate": 0.00019995359015430894, "loss": 2.9492, "step": 134 }, { "epoch": 0.04220068771491091, "grad_norm": 0.173828125, "learning_rate": 0.0001999528386491606, "loss": 1.8951, "step": 135 }, { "epoch": 0.04251328540168803, "grad_norm": 0.17578125, "learning_rate": 0.00019995208110973206, "loss": 1.7656, "step": 136 }, { "epoch": 0.04282588308846515, "grad_norm": 0.1787109375, "learning_rate": 0.00019995131753606902, "loss": 2.0607, "step": 137 }, { "epoch": 0.043138480775242266, "grad_norm": 0.1611328125, "learning_rate": 0.00019995054792821754, "loss": 1.6803, "step": 138 }, { "epoch": 0.04345107846201938, "grad_norm": 0.1767578125, "learning_rate": 0.00019994977228622414, "loss": 2.0165, "step": 139 }, { "epoch": 0.0437636761487965, "grad_norm": 0.1787109375, "learning_rate": 0.0001999489906101356, "loss": 1.9388, "step": 140 }, { "epoch": 0.044076273835573616, "grad_norm": 0.1884765625, "learning_rate": 0.00019994820289999913, "loss": 1.6209, "step": 141 }, { "epoch": 0.044388871522350735, "grad_norm": 0.1826171875, "learning_rate": 0.0001999474091558623, "loss": 1.8157, "step": 142 }, { "epoch": 0.044701469209127855, "grad_norm": 0.171875, "learning_rate": 0.00019994660937777301, "loss": 1.7581, "step": 143 }, { "epoch": 0.045014066895904974, "grad_norm": 0.279296875, "learning_rate": 0.00019994580356577957, "loss": 2.6888, "step": 144 }, { "epoch": 0.045326664582682086, "grad_norm": 0.1796875, "learning_rate": 0.00019994499171993056, "loss": 2.0103, "step": 145 }, { "epoch": 0.045639262269459205, "grad_norm": 0.171875, "learning_rate": 0.00019994417384027507, "loss": 1.7455, "step": 146 }, { "epoch": 0.045951859956236324, "grad_norm": 0.1884765625, "learning_rate": 0.00019994334992686245, "loss": 1.9287, "step": 147 }, { "epoch": 0.04626445764301344, "grad_norm": 0.1875, "learning_rate": 0.00019994251997974241, "loss": 1.8521, "step": 148 }, { "epoch": 0.04657705532979056, "grad_norm": 0.1806640625, "learning_rate": 0.00019994168399896508, "loss": 2.0915, "step": 149 }, { "epoch": 0.046889653016567674, "grad_norm": 0.1787109375, "learning_rate": 0.00019994084198458097, "loss": 2.0972, "step": 150 }, { "epoch": 0.04720225070334479, "grad_norm": 0.1884765625, "learning_rate": 0.00019993999393664083, "loss": 2.2031, "step": 151 }, { "epoch": 0.04751484839012191, "grad_norm": 0.1748046875, "learning_rate": 0.00019993913985519592, "loss": 1.8532, "step": 152 }, { "epoch": 0.04782744607689903, "grad_norm": 0.18359375, "learning_rate": 0.0001999382797402978, "loss": 1.9278, "step": 153 }, { "epoch": 0.04814004376367615, "grad_norm": 0.1796875, "learning_rate": 0.00019993741359199834, "loss": 1.6459, "step": 154 }, { "epoch": 0.04845264145045327, "grad_norm": 0.1826171875, "learning_rate": 0.0001999365414103499, "loss": 1.8459, "step": 155 }, { "epoch": 0.04876523913723038, "grad_norm": 0.1748046875, "learning_rate": 0.0001999356631954051, "loss": 1.9833, "step": 156 }, { "epoch": 0.0490778368240075, "grad_norm": 0.177734375, "learning_rate": 0.00019993477894721698, "loss": 1.8361, "step": 157 }, { "epoch": 0.04939043451078462, "grad_norm": 0.17578125, "learning_rate": 0.0001999338886658389, "loss": 2.0878, "step": 158 }, { "epoch": 0.04970303219756174, "grad_norm": 0.1943359375, "learning_rate": 0.0001999329923513246, "loss": 1.9454, "step": 159 }, { "epoch": 0.05001562988433886, "grad_norm": 0.177734375, "learning_rate": 0.00019993209000372818, "loss": 1.982, "step": 160 }, { "epoch": 0.05032822757111598, "grad_norm": 0.1796875, "learning_rate": 0.00019993118162310415, "loss": 1.9192, "step": 161 }, { "epoch": 0.05064082525789309, "grad_norm": 0.1826171875, "learning_rate": 0.0001999302672095074, "loss": 1.8865, "step": 162 }, { "epoch": 0.05095342294467021, "grad_norm": 0.1748046875, "learning_rate": 0.00019992934676299302, "loss": 1.6733, "step": 163 }, { "epoch": 0.05126602063144733, "grad_norm": 0.169921875, "learning_rate": 0.00019992842028361665, "loss": 1.9374, "step": 164 }, { "epoch": 0.051578618318224446, "grad_norm": 0.1953125, "learning_rate": 0.0001999274877714342, "loss": 1.9537, "step": 165 }, { "epoch": 0.051891216005001566, "grad_norm": 0.17578125, "learning_rate": 0.000199926549226502, "loss": 1.8767, "step": 166 }, { "epoch": 0.05220381369177868, "grad_norm": 0.1728515625, "learning_rate": 0.00019992560464887667, "loss": 1.8994, "step": 167 }, { "epoch": 0.0525164113785558, "grad_norm": 0.185546875, "learning_rate": 0.00019992465403861524, "loss": 1.7415, "step": 168 }, { "epoch": 0.052829009065332916, "grad_norm": 0.1826171875, "learning_rate": 0.00019992369739577512, "loss": 1.7688, "step": 169 }, { "epoch": 0.053141606752110035, "grad_norm": 0.1826171875, "learning_rate": 0.00019992273472041404, "loss": 1.7507, "step": 170 }, { "epoch": 0.053454204438887154, "grad_norm": 0.1728515625, "learning_rate": 0.00019992176601259015, "loss": 1.995, "step": 171 }, { "epoch": 0.05376680212566427, "grad_norm": 0.1806640625, "learning_rate": 0.00019992079127236192, "loss": 1.9025, "step": 172 }, { "epoch": 0.054079399812441385, "grad_norm": 0.1689453125, "learning_rate": 0.0001999198104997882, "loss": 1.7634, "step": 173 }, { "epoch": 0.054391997499218504, "grad_norm": 0.1708984375, "learning_rate": 0.00019991882369492815, "loss": 1.8371, "step": 174 }, { "epoch": 0.05470459518599562, "grad_norm": 0.17578125, "learning_rate": 0.0001999178308578414, "loss": 1.7978, "step": 175 }, { "epoch": 0.05501719287277274, "grad_norm": 0.1748046875, "learning_rate": 0.0001999168319885879, "loss": 2.0066, "step": 176 }, { "epoch": 0.05532979055954986, "grad_norm": 0.17578125, "learning_rate": 0.00019991582708722792, "loss": 1.6957, "step": 177 }, { "epoch": 0.05564238824632698, "grad_norm": 0.1767578125, "learning_rate": 0.0001999148161538221, "loss": 1.8989, "step": 178 }, { "epoch": 0.05595498593310409, "grad_norm": 0.1767578125, "learning_rate": 0.00019991379918843155, "loss": 2.0687, "step": 179 }, { "epoch": 0.05626758361988121, "grad_norm": 0.1865234375, "learning_rate": 0.00019991277619111763, "loss": 1.9398, "step": 180 }, { "epoch": 0.05658018130665833, "grad_norm": 0.1904296875, "learning_rate": 0.00019991174716194203, "loss": 1.7309, "step": 181 }, { "epoch": 0.05689277899343545, "grad_norm": 0.1845703125, "learning_rate": 0.00019991071210096698, "loss": 1.8865, "step": 182 }, { "epoch": 0.05720537668021257, "grad_norm": 0.173828125, "learning_rate": 0.00019990967100825491, "loss": 1.8802, "step": 183 }, { "epoch": 0.05751797436698968, "grad_norm": 0.1826171875, "learning_rate": 0.0001999086238838687, "loss": 1.814, "step": 184 }, { "epoch": 0.0578305720537668, "grad_norm": 0.16796875, "learning_rate": 0.00019990757072787152, "loss": 1.6507, "step": 185 }, { "epoch": 0.05814316974054392, "grad_norm": 0.17578125, "learning_rate": 0.000199906511540327, "loss": 1.921, "step": 186 }, { "epoch": 0.05845576742732104, "grad_norm": 0.1923828125, "learning_rate": 0.0001999054463212991, "loss": 1.9151, "step": 187 }, { "epoch": 0.05876836511409816, "grad_norm": 0.1748046875, "learning_rate": 0.00019990437507085202, "loss": 2.0727, "step": 188 }, { "epoch": 0.05908096280087528, "grad_norm": 0.17578125, "learning_rate": 0.00019990329778905058, "loss": 2.0359, "step": 189 }, { "epoch": 0.05939356048765239, "grad_norm": 0.19921875, "learning_rate": 0.00019990221447595968, "loss": 1.9311, "step": 190 }, { "epoch": 0.05970615817442951, "grad_norm": 0.1767578125, "learning_rate": 0.00019990112513164484, "loss": 1.8018, "step": 191 }, { "epoch": 0.06001875586120663, "grad_norm": 0.1826171875, "learning_rate": 0.00019990002975617174, "loss": 1.9104, "step": 192 }, { "epoch": 0.060331353547983746, "grad_norm": 0.1806640625, "learning_rate": 0.00019989892834960656, "loss": 1.7227, "step": 193 }, { "epoch": 0.060643951234760865, "grad_norm": 0.1787109375, "learning_rate": 0.00019989782091201573, "loss": 1.7287, "step": 194 }, { "epoch": 0.06095654892153798, "grad_norm": 0.181640625, "learning_rate": 0.0001998967074434662, "loss": 1.8525, "step": 195 }, { "epoch": 0.061269146608315096, "grad_norm": 0.439453125, "learning_rate": 0.00019989558794402515, "loss": 2.4259, "step": 196 }, { "epoch": 0.061581744295092215, "grad_norm": 0.1865234375, "learning_rate": 0.0001998944624137601, "loss": 2.1134, "step": 197 }, { "epoch": 0.061894341981869334, "grad_norm": 0.169921875, "learning_rate": 0.0001998933308527391, "loss": 1.9239, "step": 198 }, { "epoch": 0.06220693966864645, "grad_norm": 0.1806640625, "learning_rate": 0.0001998921932610304, "loss": 1.7292, "step": 199 }, { "epoch": 0.06251953735542357, "grad_norm": 0.1826171875, "learning_rate": 0.0001998910496387027, "loss": 1.7629, "step": 200 }, { "epoch": 0.06283213504220068, "grad_norm": 0.1962890625, "learning_rate": 0.00019988989998582506, "loss": 2.005, "step": 201 }, { "epoch": 0.0631447327289778, "grad_norm": 0.173828125, "learning_rate": 0.00019988874430246686, "loss": 1.7605, "step": 202 }, { "epoch": 0.06345733041575492, "grad_norm": 0.1787109375, "learning_rate": 0.0001998875825886979, "loss": 1.748, "step": 203 }, { "epoch": 0.06376992810253204, "grad_norm": 0.1787109375, "learning_rate": 0.00019988641484458826, "loss": 2.1037, "step": 204 }, { "epoch": 0.06408252578930916, "grad_norm": 0.181640625, "learning_rate": 0.00019988524107020846, "loss": 1.9274, "step": 205 }, { "epoch": 0.06439512347608628, "grad_norm": 0.173828125, "learning_rate": 0.00019988406126562937, "loss": 1.7823, "step": 206 }, { "epoch": 0.0647077211628634, "grad_norm": 0.1796875, "learning_rate": 0.00019988287543092225, "loss": 2.06, "step": 207 }, { "epoch": 0.06502031884964052, "grad_norm": 0.193359375, "learning_rate": 0.00019988168356615865, "loss": 1.9327, "step": 208 }, { "epoch": 0.06533291653641764, "grad_norm": 0.17578125, "learning_rate": 0.00019988048567141052, "loss": 1.9889, "step": 209 }, { "epoch": 0.06564551422319474, "grad_norm": 0.1787109375, "learning_rate": 0.00019987928174675023, "loss": 1.6262, "step": 210 }, { "epoch": 0.06595811190997186, "grad_norm": 0.173828125, "learning_rate": 0.00019987807179225035, "loss": 1.8805, "step": 211 }, { "epoch": 0.06627070959674898, "grad_norm": 0.181640625, "learning_rate": 0.00019987685580798403, "loss": 1.7265, "step": 212 }, { "epoch": 0.0665833072835261, "grad_norm": 0.17578125, "learning_rate": 0.0001998756337940247, "loss": 1.7049, "step": 213 }, { "epoch": 0.06689590497030322, "grad_norm": 0.173828125, "learning_rate": 0.00019987440575044602, "loss": 1.7256, "step": 214 }, { "epoch": 0.06720850265708034, "grad_norm": 0.1748046875, "learning_rate": 0.00019987317167732222, "loss": 1.9469, "step": 215 }, { "epoch": 0.06752110034385746, "grad_norm": 0.177734375, "learning_rate": 0.00019987193157472777, "loss": 2.0254, "step": 216 }, { "epoch": 0.06783369803063458, "grad_norm": 0.1904296875, "learning_rate": 0.00019987068544273756, "loss": 2.1006, "step": 217 }, { "epoch": 0.0681462957174117, "grad_norm": 0.1767578125, "learning_rate": 0.00019986943328142678, "loss": 1.9486, "step": 218 }, { "epoch": 0.06845889340418881, "grad_norm": 0.181640625, "learning_rate": 0.00019986817509087107, "loss": 1.9707, "step": 219 }, { "epoch": 0.06877149109096593, "grad_norm": 0.169921875, "learning_rate": 0.00019986691087114635, "loss": 1.868, "step": 220 }, { "epoch": 0.06908408877774304, "grad_norm": 0.181640625, "learning_rate": 0.00019986564062232897, "loss": 1.9028, "step": 221 }, { "epoch": 0.06939668646452016, "grad_norm": 0.1787109375, "learning_rate": 0.0001998643643444956, "loss": 1.9136, "step": 222 }, { "epoch": 0.06970928415129728, "grad_norm": 0.181640625, "learning_rate": 0.0001998630820377233, "loss": 1.8039, "step": 223 }, { "epoch": 0.0700218818380744, "grad_norm": 0.1884765625, "learning_rate": 0.00019986179370208947, "loss": 1.7326, "step": 224 }, { "epoch": 0.07033447952485151, "grad_norm": 0.169921875, "learning_rate": 0.0001998604993376719, "loss": 1.7712, "step": 225 }, { "epoch": 0.07064707721162863, "grad_norm": 0.1826171875, "learning_rate": 0.00019985919894454875, "loss": 1.9061, "step": 226 }, { "epoch": 0.07095967489840575, "grad_norm": 0.181640625, "learning_rate": 0.00019985789252279846, "loss": 1.8444, "step": 227 }, { "epoch": 0.07127227258518287, "grad_norm": 0.1787109375, "learning_rate": 0.0001998565800725, "loss": 2.1696, "step": 228 }, { "epoch": 0.07158487027195999, "grad_norm": 0.19140625, "learning_rate": 0.00019985526159373255, "loss": 1.9888, "step": 229 }, { "epoch": 0.07189746795873711, "grad_norm": 0.1865234375, "learning_rate": 0.00019985393708657568, "loss": 2.018, "step": 230 }, { "epoch": 0.07221006564551423, "grad_norm": 0.18359375, "learning_rate": 0.0001998526065511094, "loss": 1.7847, "step": 231 }, { "epoch": 0.07252266333229133, "grad_norm": 0.1826171875, "learning_rate": 0.00019985126998741404, "loss": 1.879, "step": 232 }, { "epoch": 0.07283526101906845, "grad_norm": 0.177734375, "learning_rate": 0.00019984992739557024, "loss": 1.7065, "step": 233 }, { "epoch": 0.07314785870584557, "grad_norm": 0.1806640625, "learning_rate": 0.00019984857877565907, "loss": 1.7451, "step": 234 }, { "epoch": 0.07346045639262269, "grad_norm": 0.173828125, "learning_rate": 0.000199847224127762, "loss": 1.8228, "step": 235 }, { "epoch": 0.07377305407939981, "grad_norm": 0.1806640625, "learning_rate": 0.00019984586345196074, "loss": 1.9904, "step": 236 }, { "epoch": 0.07408565176617693, "grad_norm": 0.1767578125, "learning_rate": 0.0001998444967483375, "loss": 1.8958, "step": 237 }, { "epoch": 0.07439824945295405, "grad_norm": 0.1845703125, "learning_rate": 0.00019984312401697473, "loss": 1.8913, "step": 238 }, { "epoch": 0.07471084713973117, "grad_norm": 0.193359375, "learning_rate": 0.00019984174525795536, "loss": 1.9273, "step": 239 }, { "epoch": 0.07502344482650829, "grad_norm": 0.189453125, "learning_rate": 0.00019984036047136257, "loss": 1.8831, "step": 240 }, { "epoch": 0.0753360425132854, "grad_norm": 0.19140625, "learning_rate": 0.00019983896965728001, "loss": 1.9506, "step": 241 }, { "epoch": 0.07564864020006253, "grad_norm": 0.173828125, "learning_rate": 0.00019983757281579162, "loss": 1.971, "step": 242 }, { "epoch": 0.07596123788683964, "grad_norm": 0.1865234375, "learning_rate": 0.00019983616994698173, "loss": 1.8156, "step": 243 }, { "epoch": 0.07627383557361675, "grad_norm": 0.1826171875, "learning_rate": 0.00019983476105093505, "loss": 1.9397, "step": 244 }, { "epoch": 0.07658643326039387, "grad_norm": 0.177734375, "learning_rate": 0.00019983334612773662, "loss": 1.7567, "step": 245 }, { "epoch": 0.07689903094717099, "grad_norm": 0.1767578125, "learning_rate": 0.00019983192517747186, "loss": 1.8685, "step": 246 }, { "epoch": 0.0772116286339481, "grad_norm": 0.1767578125, "learning_rate": 0.00019983049820022656, "loss": 2.2285, "step": 247 }, { "epoch": 0.07752422632072523, "grad_norm": 0.193359375, "learning_rate": 0.00019982906519608687, "loss": 1.9532, "step": 248 }, { "epoch": 0.07783682400750234, "grad_norm": 0.1767578125, "learning_rate": 0.0001998276261651393, "loss": 1.8775, "step": 249 }, { "epoch": 0.07814942169427946, "grad_norm": 0.18359375, "learning_rate": 0.00019982618110747074, "loss": 1.892, "step": 250 }, { "epoch": 0.07846201938105658, "grad_norm": 0.1787109375, "learning_rate": 0.00019982473002316838, "loss": 2.2827, "step": 251 }, { "epoch": 0.0787746170678337, "grad_norm": 0.185546875, "learning_rate": 0.0001998232729123199, "loss": 2.1452, "step": 252 }, { "epoch": 0.07908721475461082, "grad_norm": 0.1845703125, "learning_rate": 0.00019982180977501322, "loss": 1.7888, "step": 253 }, { "epoch": 0.07939981244138794, "grad_norm": 0.1845703125, "learning_rate": 0.00019982034061133666, "loss": 1.7486, "step": 254 }, { "epoch": 0.07971241012816505, "grad_norm": 0.1728515625, "learning_rate": 0.00019981886542137892, "loss": 1.8143, "step": 255 }, { "epoch": 0.08002500781494216, "grad_norm": 0.18359375, "learning_rate": 0.00019981738420522913, "loss": 1.839, "step": 256 }, { "epoch": 0.08033760550171928, "grad_norm": 0.169921875, "learning_rate": 0.00019981589696297663, "loss": 1.918, "step": 257 }, { "epoch": 0.0806502031884964, "grad_norm": 0.19140625, "learning_rate": 0.00019981440369471124, "loss": 1.9144, "step": 258 }, { "epoch": 0.08096280087527352, "grad_norm": 0.1826171875, "learning_rate": 0.00019981290440052306, "loss": 1.7846, "step": 259 }, { "epoch": 0.08127539856205064, "grad_norm": 0.1826171875, "learning_rate": 0.0001998113990805027, "loss": 1.9837, "step": 260 }, { "epoch": 0.08158799624882776, "grad_norm": 0.1875, "learning_rate": 0.00019980988773474098, "loss": 1.9422, "step": 261 }, { "epoch": 0.08190059393560488, "grad_norm": 0.1845703125, "learning_rate": 0.00019980837036332917, "loss": 1.7637, "step": 262 }, { "epoch": 0.082213191622382, "grad_norm": 0.67578125, "learning_rate": 0.0001998068469663588, "loss": 2.5924, "step": 263 }, { "epoch": 0.08252578930915912, "grad_norm": 0.185546875, "learning_rate": 0.0001998053175439219, "loss": 1.8041, "step": 264 }, { "epoch": 0.08283838699593624, "grad_norm": 0.19921875, "learning_rate": 0.00019980378209611083, "loss": 2.139, "step": 265 }, { "epoch": 0.08315098468271334, "grad_norm": 0.1904296875, "learning_rate": 0.0001998022406230182, "loss": 1.8233, "step": 266 }, { "epoch": 0.08346358236949046, "grad_norm": 0.2021484375, "learning_rate": 0.0001998006931247372, "loss": 1.9227, "step": 267 }, { "epoch": 0.08377618005626758, "grad_norm": 0.19140625, "learning_rate": 0.00019979913960136114, "loss": 1.7389, "step": 268 }, { "epoch": 0.0840887777430447, "grad_norm": 0.1826171875, "learning_rate": 0.00019979758005298385, "loss": 1.6342, "step": 269 }, { "epoch": 0.08440137542982182, "grad_norm": 0.181640625, "learning_rate": 0.0001997960144796995, "loss": 1.9472, "step": 270 }, { "epoch": 0.08471397311659894, "grad_norm": 0.18359375, "learning_rate": 0.00019979444288160253, "loss": 1.7985, "step": 271 }, { "epoch": 0.08502657080337606, "grad_norm": 0.1806640625, "learning_rate": 0.00019979286525878792, "loss": 1.8546, "step": 272 }, { "epoch": 0.08533916849015317, "grad_norm": 0.19140625, "learning_rate": 0.00019979128161135083, "loss": 1.9697, "step": 273 }, { "epoch": 0.0856517661769303, "grad_norm": 0.2001953125, "learning_rate": 0.00019978969193938694, "loss": 2.095, "step": 274 }, { "epoch": 0.08596436386370741, "grad_norm": 0.1884765625, "learning_rate": 0.00019978809624299218, "loss": 1.9491, "step": 275 }, { "epoch": 0.08627696155048453, "grad_norm": 0.1787109375, "learning_rate": 0.00019978649452226285, "loss": 1.9463, "step": 276 }, { "epoch": 0.08658955923726164, "grad_norm": 0.1865234375, "learning_rate": 0.00019978488677729574, "loss": 1.8981, "step": 277 }, { "epoch": 0.08690215692403876, "grad_norm": 0.1923828125, "learning_rate": 0.00019978327300818784, "loss": 1.9126, "step": 278 }, { "epoch": 0.08721475461081588, "grad_norm": 0.18359375, "learning_rate": 0.0001997816532150366, "loss": 1.8987, "step": 279 }, { "epoch": 0.087527352297593, "grad_norm": 0.201171875, "learning_rate": 0.00019978002739793978, "loss": 1.7486, "step": 280 }, { "epoch": 0.08783994998437011, "grad_norm": 0.2041015625, "learning_rate": 0.00019977839555699553, "loss": 1.9603, "step": 281 }, { "epoch": 0.08815254767114723, "grad_norm": 0.19140625, "learning_rate": 0.00019977675769230246, "loss": 1.8714, "step": 282 }, { "epoch": 0.08846514535792435, "grad_norm": 0.2001953125, "learning_rate": 0.00019977511380395933, "loss": 2.0087, "step": 283 }, { "epoch": 0.08877774304470147, "grad_norm": 0.177734375, "learning_rate": 0.00019977346389206545, "loss": 2.1653, "step": 284 }, { "epoch": 0.08909034073147859, "grad_norm": 0.1845703125, "learning_rate": 0.00019977180795672044, "loss": 2.0311, "step": 285 }, { "epoch": 0.08940293841825571, "grad_norm": 0.1826171875, "learning_rate": 0.00019977014599802418, "loss": 1.8212, "step": 286 }, { "epoch": 0.08971553610503283, "grad_norm": 0.193359375, "learning_rate": 0.00019976847801607712, "loss": 2.0245, "step": 287 }, { "epoch": 0.09002813379180995, "grad_norm": 0.1806640625, "learning_rate": 0.0001997668040109799, "loss": 1.8573, "step": 288 }, { "epoch": 0.09034073147858705, "grad_norm": 0.1806640625, "learning_rate": 0.00019976512398283357, "loss": 1.7208, "step": 289 }, { "epoch": 0.09065332916536417, "grad_norm": 0.181640625, "learning_rate": 0.00019976343793173958, "loss": 1.7056, "step": 290 }, { "epoch": 0.09096592685214129, "grad_norm": 0.1806640625, "learning_rate": 0.00019976174585779972, "loss": 1.8874, "step": 291 }, { "epoch": 0.09127852453891841, "grad_norm": 0.181640625, "learning_rate": 0.00019976004776111613, "loss": 1.5886, "step": 292 }, { "epoch": 0.09159112222569553, "grad_norm": 0.181640625, "learning_rate": 0.00019975834364179134, "loss": 1.7725, "step": 293 }, { "epoch": 0.09190371991247265, "grad_norm": 0.189453125, "learning_rate": 0.0001997566334999282, "loss": 1.7855, "step": 294 }, { "epoch": 0.09221631759924977, "grad_norm": 0.1875, "learning_rate": 0.00019975491733563, "loss": 1.7919, "step": 295 }, { "epoch": 0.09252891528602689, "grad_norm": 0.185546875, "learning_rate": 0.00019975319514900028, "loss": 1.7353, "step": 296 }, { "epoch": 0.092841512972804, "grad_norm": 0.1826171875, "learning_rate": 0.00019975146694014312, "loss": 1.8983, "step": 297 }, { "epoch": 0.09315411065958112, "grad_norm": 0.185546875, "learning_rate": 0.00019974973270916273, "loss": 2.115, "step": 298 }, { "epoch": 0.09346670834635824, "grad_norm": 0.177734375, "learning_rate": 0.00019974799245616387, "loss": 1.9605, "step": 299 }, { "epoch": 0.09377930603313535, "grad_norm": 0.1953125, "learning_rate": 0.0001997462461812516, "loss": 1.9963, "step": 300 }, { "epoch": 0.09409190371991247, "grad_norm": 0.189453125, "learning_rate": 0.00019974449388453135, "loss": 1.8288, "step": 301 }, { "epoch": 0.09440450140668959, "grad_norm": 0.1904296875, "learning_rate": 0.0001997427355661089, "loss": 1.7948, "step": 302 }, { "epoch": 0.0947170990934667, "grad_norm": 0.1826171875, "learning_rate": 0.0001997409712260904, "loss": 1.868, "step": 303 }, { "epoch": 0.09502969678024382, "grad_norm": 0.1708984375, "learning_rate": 0.00019973920086458237, "loss": 1.8929, "step": 304 }, { "epoch": 0.09534229446702094, "grad_norm": 0.1796875, "learning_rate": 0.00019973742448169165, "loss": 1.6884, "step": 305 }, { "epoch": 0.09565489215379806, "grad_norm": 0.1826171875, "learning_rate": 0.00019973564207752554, "loss": 1.6901, "step": 306 }, { "epoch": 0.09596748984057518, "grad_norm": 0.1875, "learning_rate": 0.00019973385365219164, "loss": 1.7943, "step": 307 }, { "epoch": 0.0962800875273523, "grad_norm": 0.1875, "learning_rate": 0.0001997320592057979, "loss": 1.9581, "step": 308 }, { "epoch": 0.09659268521412942, "grad_norm": 0.1845703125, "learning_rate": 0.00019973025873845263, "loss": 1.6522, "step": 309 }, { "epoch": 0.09690528290090654, "grad_norm": 0.189453125, "learning_rate": 0.00019972845225026456, "loss": 1.9327, "step": 310 }, { "epoch": 0.09721788058768364, "grad_norm": 0.18359375, "learning_rate": 0.00019972663974134275, "loss": 1.9542, "step": 311 }, { "epoch": 0.09753047827446076, "grad_norm": 0.189453125, "learning_rate": 0.00019972482121179664, "loss": 2.0571, "step": 312 }, { "epoch": 0.09784307596123788, "grad_norm": 0.181640625, "learning_rate": 0.00019972299666173594, "loss": 2.2707, "step": 313 }, { "epoch": 0.098155673648015, "grad_norm": 0.185546875, "learning_rate": 0.0001997211660912709, "loss": 1.9587, "step": 314 }, { "epoch": 0.09846827133479212, "grad_norm": 0.189453125, "learning_rate": 0.00019971932950051198, "loss": 2.0126, "step": 315 }, { "epoch": 0.09878086902156924, "grad_norm": 0.1806640625, "learning_rate": 0.00019971748688957003, "loss": 1.7935, "step": 316 }, { "epoch": 0.09909346670834636, "grad_norm": 0.18359375, "learning_rate": 0.00019971563825855638, "loss": 1.8761, "step": 317 }, { "epoch": 0.09940606439512348, "grad_norm": 0.19921875, "learning_rate": 0.00019971378360758254, "loss": 2.2404, "step": 318 }, { "epoch": 0.0997186620819006, "grad_norm": 0.177734375, "learning_rate": 0.0001997119229367605, "loss": 1.8394, "step": 319 }, { "epoch": 0.10003125976867772, "grad_norm": 0.1845703125, "learning_rate": 0.00019971005624620265, "loss": 1.8923, "step": 320 }, { "epoch": 0.10034385745545484, "grad_norm": 0.1953125, "learning_rate": 0.00019970818353602163, "loss": 1.6077, "step": 321 }, { "epoch": 0.10065645514223195, "grad_norm": 0.1806640625, "learning_rate": 0.00019970630480633047, "loss": 1.8617, "step": 322 }, { "epoch": 0.10096905282900906, "grad_norm": 0.1845703125, "learning_rate": 0.0001997044200572427, "loss": 1.892, "step": 323 }, { "epoch": 0.10128165051578618, "grad_norm": 0.181640625, "learning_rate": 0.000199702529288872, "loss": 1.7457, "step": 324 }, { "epoch": 0.1015942482025633, "grad_norm": 0.173828125, "learning_rate": 0.00019970063250133256, "loss": 1.9309, "step": 325 }, { "epoch": 0.10190684588934042, "grad_norm": 0.1923828125, "learning_rate": 0.00019969872969473888, "loss": 1.905, "step": 326 }, { "epoch": 0.10221944357611754, "grad_norm": 0.1826171875, "learning_rate": 0.00019969682086920585, "loss": 1.697, "step": 327 }, { "epoch": 0.10253204126289465, "grad_norm": 0.1865234375, "learning_rate": 0.0001996949060248487, "loss": 1.8728, "step": 328 }, { "epoch": 0.10284463894967177, "grad_norm": 0.1796875, "learning_rate": 0.00019969298516178303, "loss": 1.7783, "step": 329 }, { "epoch": 0.10315723663644889, "grad_norm": 0.1806640625, "learning_rate": 0.0001996910582801248, "loss": 1.8591, "step": 330 }, { "epoch": 0.10346983432322601, "grad_norm": 0.181640625, "learning_rate": 0.00019968912537999034, "loss": 1.8009, "step": 331 }, { "epoch": 0.10378243201000313, "grad_norm": 0.177734375, "learning_rate": 0.00019968718646149635, "loss": 1.6679, "step": 332 }, { "epoch": 0.10409502969678025, "grad_norm": 0.1787109375, "learning_rate": 0.00019968524152475986, "loss": 1.9598, "step": 333 }, { "epoch": 0.10440762738355736, "grad_norm": 0.185546875, "learning_rate": 0.00019968329056989836, "loss": 1.7525, "step": 334 }, { "epoch": 0.10472022507033447, "grad_norm": 0.1875, "learning_rate": 0.00019968133359702956, "loss": 1.9891, "step": 335 }, { "epoch": 0.1050328227571116, "grad_norm": 0.27734375, "learning_rate": 0.00019967937060627163, "loss": 2.6398, "step": 336 }, { "epoch": 0.10534542044388871, "grad_norm": 0.1884765625, "learning_rate": 0.00019967740159774304, "loss": 1.8126, "step": 337 }, { "epoch": 0.10565801813066583, "grad_norm": 0.1845703125, "learning_rate": 0.0001996754265715627, "loss": 1.5844, "step": 338 }, { "epoch": 0.10597061581744295, "grad_norm": 0.443359375, "learning_rate": 0.00019967344552784987, "loss": 2.6948, "step": 339 }, { "epoch": 0.10628321350422007, "grad_norm": 0.1796875, "learning_rate": 0.00019967145846672412, "loss": 1.8124, "step": 340 }, { "epoch": 0.10659581119099719, "grad_norm": 0.17578125, "learning_rate": 0.00019966946538830537, "loss": 1.7512, "step": 341 }, { "epoch": 0.10690840887777431, "grad_norm": 0.203125, "learning_rate": 0.00019966746629271402, "loss": 1.886, "step": 342 }, { "epoch": 0.10722100656455143, "grad_norm": 0.1865234375, "learning_rate": 0.0001996654611800707, "loss": 1.8067, "step": 343 }, { "epoch": 0.10753360425132855, "grad_norm": 0.185546875, "learning_rate": 0.0001996634500504965, "loss": 1.8013, "step": 344 }, { "epoch": 0.10784620193810565, "grad_norm": 0.1904296875, "learning_rate": 0.00019966143290411282, "loss": 1.701, "step": 345 }, { "epoch": 0.10815879962488277, "grad_norm": 0.1953125, "learning_rate": 0.00019965940974104145, "loss": 1.6386, "step": 346 }, { "epoch": 0.10847139731165989, "grad_norm": 0.1962890625, "learning_rate": 0.0001996573805614045, "loss": 1.9652, "step": 347 }, { "epoch": 0.10878399499843701, "grad_norm": 0.189453125, "learning_rate": 0.0001996553453653245, "loss": 1.8178, "step": 348 }, { "epoch": 0.10909659268521413, "grad_norm": 0.177734375, "learning_rate": 0.00019965330415292428, "loss": 1.8802, "step": 349 }, { "epoch": 0.10940919037199125, "grad_norm": 0.1767578125, "learning_rate": 0.0001996512569243271, "loss": 1.6879, "step": 350 }, { "epoch": 0.10972178805876837, "grad_norm": 0.1923828125, "learning_rate": 0.0001996492036796566, "loss": 1.8288, "step": 351 }, { "epoch": 0.11003438574554548, "grad_norm": 0.1826171875, "learning_rate": 0.00019964714441903663, "loss": 1.8453, "step": 352 }, { "epoch": 0.1103469834323226, "grad_norm": 0.1904296875, "learning_rate": 0.00019964507914259157, "loss": 1.8259, "step": 353 }, { "epoch": 0.11065958111909972, "grad_norm": 0.1884765625, "learning_rate": 0.00019964300785044615, "loss": 1.9748, "step": 354 }, { "epoch": 0.11097217880587684, "grad_norm": 0.18359375, "learning_rate": 0.00019964093054272535, "loss": 2.0296, "step": 355 }, { "epoch": 0.11128477649265396, "grad_norm": 0.1806640625, "learning_rate": 0.0001996388472195546, "loss": 2.1065, "step": 356 }, { "epoch": 0.11159737417943107, "grad_norm": 0.19140625, "learning_rate": 0.00019963675788105967, "loss": 1.712, "step": 357 }, { "epoch": 0.11190997186620819, "grad_norm": 0.173828125, "learning_rate": 0.0001996346625273667, "loss": 2.178, "step": 358 }, { "epoch": 0.1122225695529853, "grad_norm": 0.2001953125, "learning_rate": 0.00019963256115860219, "loss": 1.6854, "step": 359 }, { "epoch": 0.11253516723976242, "grad_norm": 0.1845703125, "learning_rate": 0.00019963045377489297, "loss": 1.7912, "step": 360 }, { "epoch": 0.11284776492653954, "grad_norm": 0.181640625, "learning_rate": 0.00019962834037636634, "loss": 1.7385, "step": 361 }, { "epoch": 0.11316036261331666, "grad_norm": 0.19140625, "learning_rate": 0.00019962622096314983, "loss": 1.787, "step": 362 }, { "epoch": 0.11347296030009378, "grad_norm": 0.185546875, "learning_rate": 0.00019962409553537141, "loss": 1.7083, "step": 363 }, { "epoch": 0.1137855579868709, "grad_norm": 0.177734375, "learning_rate": 0.00019962196409315937, "loss": 1.7489, "step": 364 }, { "epoch": 0.11409815567364802, "grad_norm": 0.1875, "learning_rate": 0.00019961982663664244, "loss": 1.8184, "step": 365 }, { "epoch": 0.11441075336042514, "grad_norm": 0.181640625, "learning_rate": 0.0001996176831659496, "loss": 1.924, "step": 366 }, { "epoch": 0.11472335104720226, "grad_norm": 0.1904296875, "learning_rate": 0.0001996155336812103, "loss": 2.1837, "step": 367 }, { "epoch": 0.11503594873397936, "grad_norm": 0.1806640625, "learning_rate": 0.00019961337818255424, "loss": 1.9305, "step": 368 }, { "epoch": 0.11534854642075648, "grad_norm": 0.1923828125, "learning_rate": 0.00019961121667011166, "loss": 1.9867, "step": 369 }, { "epoch": 0.1156611441075336, "grad_norm": 0.1904296875, "learning_rate": 0.00019960904914401298, "loss": 1.968, "step": 370 }, { "epoch": 0.11597374179431072, "grad_norm": 0.1845703125, "learning_rate": 0.00019960687560438908, "loss": 1.6922, "step": 371 }, { "epoch": 0.11628633948108784, "grad_norm": 0.169921875, "learning_rate": 0.00019960469605137114, "loss": 1.7978, "step": 372 }, { "epoch": 0.11659893716786496, "grad_norm": 0.189453125, "learning_rate": 0.0001996025104850908, "loss": 1.8674, "step": 373 }, { "epoch": 0.11691153485464208, "grad_norm": 0.1796875, "learning_rate": 0.00019960031890567997, "loss": 1.7445, "step": 374 }, { "epoch": 0.1172241325414192, "grad_norm": 0.185546875, "learning_rate": 0.00019959812131327095, "loss": 1.7513, "step": 375 }, { "epoch": 0.11753673022819631, "grad_norm": 0.1923828125, "learning_rate": 0.00019959591770799643, "loss": 1.7463, "step": 376 }, { "epoch": 0.11784932791497343, "grad_norm": 0.189453125, "learning_rate": 0.00019959370808998945, "loss": 1.6496, "step": 377 }, { "epoch": 0.11816192560175055, "grad_norm": 0.18359375, "learning_rate": 0.0001995914924593834, "loss": 1.6407, "step": 378 }, { "epoch": 0.11847452328852766, "grad_norm": 0.19140625, "learning_rate": 0.00019958927081631205, "loss": 1.9992, "step": 379 }, { "epoch": 0.11878712097530478, "grad_norm": 0.1875, "learning_rate": 0.0001995870431609095, "loss": 1.7538, "step": 380 }, { "epoch": 0.1190997186620819, "grad_norm": 0.1845703125, "learning_rate": 0.00019958480949331024, "loss": 1.6851, "step": 381 }, { "epoch": 0.11941231634885902, "grad_norm": 0.189453125, "learning_rate": 0.00019958256981364916, "loss": 1.7887, "step": 382 }, { "epoch": 0.11972491403563613, "grad_norm": 0.181640625, "learning_rate": 0.00019958032412206142, "loss": 1.8162, "step": 383 }, { "epoch": 0.12003751172241325, "grad_norm": 0.1875, "learning_rate": 0.0001995780724186826, "loss": 1.8541, "step": 384 }, { "epoch": 0.12035010940919037, "grad_norm": 0.1875, "learning_rate": 0.00019957581470364869, "loss": 1.8194, "step": 385 }, { "epoch": 0.12066270709596749, "grad_norm": 0.20703125, "learning_rate": 0.0001995735509770959, "loss": 1.7891, "step": 386 }, { "epoch": 0.12097530478274461, "grad_norm": 0.1884765625, "learning_rate": 0.00019957128123916103, "loss": 1.992, "step": 387 }, { "epoch": 0.12128790246952173, "grad_norm": 0.1845703125, "learning_rate": 0.00019956900548998097, "loss": 1.9259, "step": 388 }, { "epoch": 0.12160050015629885, "grad_norm": 0.1845703125, "learning_rate": 0.00019956672372969315, "loss": 2.0642, "step": 389 }, { "epoch": 0.12191309784307595, "grad_norm": 0.1826171875, "learning_rate": 0.0001995644359584354, "loss": 1.6211, "step": 390 }, { "epoch": 0.12222569552985307, "grad_norm": 0.1904296875, "learning_rate": 0.00019956214217634575, "loss": 1.7604, "step": 391 }, { "epoch": 0.12253829321663019, "grad_norm": 0.177734375, "learning_rate": 0.00019955984238356268, "loss": 1.8761, "step": 392 }, { "epoch": 0.12285089090340731, "grad_norm": 0.193359375, "learning_rate": 0.0001995575365802251, "loss": 2.0069, "step": 393 }, { "epoch": 0.12316348859018443, "grad_norm": 0.1845703125, "learning_rate": 0.0001995552247664721, "loss": 1.7372, "step": 394 }, { "epoch": 0.12347608627696155, "grad_norm": 0.1923828125, "learning_rate": 0.00019955290694244338, "loss": 1.8025, "step": 395 }, { "epoch": 0.12378868396373867, "grad_norm": 0.1884765625, "learning_rate": 0.00019955058310827878, "loss": 1.8633, "step": 396 }, { "epoch": 0.12410128165051579, "grad_norm": 0.1787109375, "learning_rate": 0.00019954825326411863, "loss": 1.9765, "step": 397 }, { "epoch": 0.1244138793372929, "grad_norm": 0.197265625, "learning_rate": 0.0001995459174101036, "loss": 1.6959, "step": 398 }, { "epoch": 0.12472647702407003, "grad_norm": 0.1865234375, "learning_rate": 0.0001995435755463746, "loss": 1.6401, "step": 399 }, { "epoch": 0.12503907471084713, "grad_norm": 0.185546875, "learning_rate": 0.00019954122767307318, "loss": 2.1424, "step": 400 }, { "epoch": 0.12535167239762426, "grad_norm": 0.17578125, "learning_rate": 0.00019953887379034094, "loss": 1.9393, "step": 401 }, { "epoch": 0.12566427008440137, "grad_norm": 0.193359375, "learning_rate": 0.00019953651389832008, "loss": 1.8414, "step": 402 }, { "epoch": 0.1259768677711785, "grad_norm": 0.2001953125, "learning_rate": 0.00019953414799715304, "loss": 1.9348, "step": 403 }, { "epoch": 0.1262894654579556, "grad_norm": 0.1904296875, "learning_rate": 0.00019953177608698263, "loss": 1.6774, "step": 404 }, { "epoch": 0.12660206314473274, "grad_norm": 0.18359375, "learning_rate": 0.00019952939816795205, "loss": 1.9635, "step": 405 }, { "epoch": 0.12691466083150985, "grad_norm": 0.189453125, "learning_rate": 0.0001995270142402049, "loss": 1.788, "step": 406 }, { "epoch": 0.12722725851828695, "grad_norm": 0.177734375, "learning_rate": 0.00019952462430388506, "loss": 1.7256, "step": 407 }, { "epoch": 0.12753985620506408, "grad_norm": 0.1884765625, "learning_rate": 0.00019952222835913682, "loss": 1.8476, "step": 408 }, { "epoch": 0.1278524538918412, "grad_norm": 0.19140625, "learning_rate": 0.00019951982640610484, "loss": 1.9212, "step": 409 }, { "epoch": 0.12816505157861832, "grad_norm": 0.2001953125, "learning_rate": 0.00019951741844493413, "loss": 1.807, "step": 410 }, { "epoch": 0.12847764926539543, "grad_norm": 0.1865234375, "learning_rate": 0.00019951500447577003, "loss": 1.6015, "step": 411 }, { "epoch": 0.12879024695217256, "grad_norm": 0.1845703125, "learning_rate": 0.00019951258449875828, "loss": 1.8802, "step": 412 }, { "epoch": 0.12910284463894967, "grad_norm": 0.1884765625, "learning_rate": 0.00019951015851404504, "loss": 1.9614, "step": 413 }, { "epoch": 0.1294154423257268, "grad_norm": 0.197265625, "learning_rate": 0.0001995077265217767, "loss": 1.8907, "step": 414 }, { "epoch": 0.1297280400125039, "grad_norm": 0.197265625, "learning_rate": 0.00019950528852210014, "loss": 1.8123, "step": 415 }, { "epoch": 0.13004063769928104, "grad_norm": 0.18359375, "learning_rate": 0.00019950284451516245, "loss": 1.6966, "step": 416 }, { "epoch": 0.13035323538605814, "grad_norm": 0.1865234375, "learning_rate": 0.00019950039450111127, "loss": 2.0439, "step": 417 }, { "epoch": 0.13066583307283527, "grad_norm": 0.185546875, "learning_rate": 0.00019949793848009448, "loss": 1.9781, "step": 418 }, { "epoch": 0.13097843075961238, "grad_norm": 0.1884765625, "learning_rate": 0.00019949547645226035, "loss": 1.9264, "step": 419 }, { "epoch": 0.13129102844638948, "grad_norm": 0.197265625, "learning_rate": 0.00019949300841775753, "loss": 2.0297, "step": 420 }, { "epoch": 0.13160362613316662, "grad_norm": 0.19140625, "learning_rate": 0.000199490534376735, "loss": 1.9136, "step": 421 }, { "epoch": 0.13191622381994372, "grad_norm": 0.1904296875, "learning_rate": 0.00019948805432934213, "loss": 1.8224, "step": 422 }, { "epoch": 0.13222882150672086, "grad_norm": 0.1923828125, "learning_rate": 0.00019948556827572862, "loss": 1.7871, "step": 423 }, { "epoch": 0.13254141919349796, "grad_norm": 0.1962890625, "learning_rate": 0.00019948307621604457, "loss": 1.7048, "step": 424 }, { "epoch": 0.1328540168802751, "grad_norm": 0.1904296875, "learning_rate": 0.00019948057815044048, "loss": 1.9041, "step": 425 }, { "epoch": 0.1331666145670522, "grad_norm": 0.1796875, "learning_rate": 0.0001994780740790671, "loss": 1.7443, "step": 426 }, { "epoch": 0.13347921225382933, "grad_norm": 0.189453125, "learning_rate": 0.0001994755640020756, "loss": 1.6474, "step": 427 }, { "epoch": 0.13379180994060644, "grad_norm": 0.1962890625, "learning_rate": 0.00019947304791961758, "loss": 1.8303, "step": 428 }, { "epoch": 0.13410440762738357, "grad_norm": 0.1962890625, "learning_rate": 0.00019947052583184488, "loss": 1.64, "step": 429 }, { "epoch": 0.13441700531416068, "grad_norm": 0.189453125, "learning_rate": 0.00019946799773890974, "loss": 1.7586, "step": 430 }, { "epoch": 0.13472960300093778, "grad_norm": 0.1826171875, "learning_rate": 0.00019946546364096488, "loss": 1.8402, "step": 431 }, { "epoch": 0.13504220068771491, "grad_norm": 0.64453125, "learning_rate": 0.00019946292353816318, "loss": 2.2409, "step": 432 }, { "epoch": 0.13535479837449202, "grad_norm": 0.193359375, "learning_rate": 0.0001994603774306581, "loss": 1.8416, "step": 433 }, { "epoch": 0.13566739606126915, "grad_norm": 0.181640625, "learning_rate": 0.00019945782531860325, "loss": 1.7372, "step": 434 }, { "epoch": 0.13597999374804626, "grad_norm": 0.1923828125, "learning_rate": 0.00019945526720215273, "loss": 1.9704, "step": 435 }, { "epoch": 0.1362925914348234, "grad_norm": 0.185546875, "learning_rate": 0.00019945270308146103, "loss": 1.6651, "step": 436 }, { "epoch": 0.1366051891216005, "grad_norm": 0.19921875, "learning_rate": 0.00019945013295668288, "loss": 1.7958, "step": 437 }, { "epoch": 0.13691778680837763, "grad_norm": 0.1904296875, "learning_rate": 0.0001994475568279735, "loss": 2.0826, "step": 438 }, { "epoch": 0.13723038449515473, "grad_norm": 0.19140625, "learning_rate": 0.00019944497469548837, "loss": 1.8808, "step": 439 }, { "epoch": 0.13754298218193187, "grad_norm": 0.2041015625, "learning_rate": 0.00019944238655938339, "loss": 2.257, "step": 440 }, { "epoch": 0.13785557986870897, "grad_norm": 0.1796875, "learning_rate": 0.0001994397924198148, "loss": 2.0791, "step": 441 }, { "epoch": 0.13816817755548608, "grad_norm": 0.193359375, "learning_rate": 0.00019943719227693928, "loss": 1.8917, "step": 442 }, { "epoch": 0.1384807752422632, "grad_norm": 0.2001953125, "learning_rate": 0.0001994345861309137, "loss": 1.8261, "step": 443 }, { "epoch": 0.13879337292904032, "grad_norm": 0.189453125, "learning_rate": 0.00019943197398189546, "loss": 1.626, "step": 444 }, { "epoch": 0.13910597061581745, "grad_norm": 0.193359375, "learning_rate": 0.00019942935583004223, "loss": 1.7819, "step": 445 }, { "epoch": 0.13941856830259455, "grad_norm": 0.19921875, "learning_rate": 0.0001994267316755121, "loss": 1.8149, "step": 446 }, { "epoch": 0.1397311659893717, "grad_norm": 0.1796875, "learning_rate": 0.00019942410151846347, "loss": 1.9703, "step": 447 }, { "epoch": 0.1400437636761488, "grad_norm": 0.1884765625, "learning_rate": 0.00019942146535905514, "loss": 1.7519, "step": 448 }, { "epoch": 0.14035636136292592, "grad_norm": 0.201171875, "learning_rate": 0.00019941882319744625, "loss": 1.8088, "step": 449 }, { "epoch": 0.14066895904970303, "grad_norm": 0.1953125, "learning_rate": 0.0001994161750337963, "loss": 2.0352, "step": 450 }, { "epoch": 0.14098155673648016, "grad_norm": 0.19921875, "learning_rate": 0.0001994135208682652, "loss": 1.7832, "step": 451 }, { "epoch": 0.14129415442325727, "grad_norm": 0.1865234375, "learning_rate": 0.00019941086070101314, "loss": 1.7351, "step": 452 }, { "epoch": 0.14160675211003437, "grad_norm": 0.1845703125, "learning_rate": 0.00019940819453220074, "loss": 1.9127, "step": 453 }, { "epoch": 0.1419193497968115, "grad_norm": 0.478515625, "learning_rate": 0.00019940552236198897, "loss": 2.6953, "step": 454 }, { "epoch": 0.1422319474835886, "grad_norm": 0.1962890625, "learning_rate": 0.00019940284419053914, "loss": 2.0053, "step": 455 }, { "epoch": 0.14254454517036574, "grad_norm": 0.1865234375, "learning_rate": 0.00019940016001801294, "loss": 1.7283, "step": 456 }, { "epoch": 0.14285714285714285, "grad_norm": 0.1923828125, "learning_rate": 0.0001993974698445724, "loss": 1.7655, "step": 457 }, { "epoch": 0.14316974054391998, "grad_norm": 0.19921875, "learning_rate": 0.00019939477367037994, "loss": 1.8373, "step": 458 }, { "epoch": 0.1434823382306971, "grad_norm": 0.1962890625, "learning_rate": 0.00019939207149559835, "loss": 1.8626, "step": 459 }, { "epoch": 0.14379493591747422, "grad_norm": 0.1943359375, "learning_rate": 0.00019938936332039077, "loss": 1.6125, "step": 460 }, { "epoch": 0.14410753360425133, "grad_norm": 0.2021484375, "learning_rate": 0.00019938664914492062, "loss": 2.0307, "step": 461 }, { "epoch": 0.14442013129102846, "grad_norm": 0.193359375, "learning_rate": 0.00019938392896935183, "loss": 1.84, "step": 462 }, { "epoch": 0.14473272897780556, "grad_norm": 0.19921875, "learning_rate": 0.0001993812027938486, "loss": 1.9634, "step": 463 }, { "epoch": 0.14504532666458267, "grad_norm": 0.1953125, "learning_rate": 0.00019937847061857552, "loss": 2.0152, "step": 464 }, { "epoch": 0.1453579243513598, "grad_norm": 0.201171875, "learning_rate": 0.00019937573244369753, "loss": 1.8692, "step": 465 }, { "epoch": 0.1456705220381369, "grad_norm": 0.19140625, "learning_rate": 0.00019937298826937995, "loss": 1.7805, "step": 466 }, { "epoch": 0.14598311972491404, "grad_norm": 0.197265625, "learning_rate": 0.00019937023809578843, "loss": 1.9569, "step": 467 }, { "epoch": 0.14629571741169115, "grad_norm": 0.1865234375, "learning_rate": 0.000199367481923089, "loss": 1.9791, "step": 468 }, { "epoch": 0.14660831509846828, "grad_norm": 0.189453125, "learning_rate": 0.00019936471975144805, "loss": 1.7193, "step": 469 }, { "epoch": 0.14692091278524538, "grad_norm": 0.19140625, "learning_rate": 0.00019936195158103237, "loss": 1.7506, "step": 470 }, { "epoch": 0.14723351047202252, "grad_norm": 0.1865234375, "learning_rate": 0.00019935917741200902, "loss": 1.9867, "step": 471 }, { "epoch": 0.14754610815879962, "grad_norm": 0.1953125, "learning_rate": 0.00019935639724454556, "loss": 1.8894, "step": 472 }, { "epoch": 0.14785870584557675, "grad_norm": 0.197265625, "learning_rate": 0.00019935361107880977, "loss": 1.7917, "step": 473 }, { "epoch": 0.14817130353235386, "grad_norm": 0.1884765625, "learning_rate": 0.00019935081891496985, "loss": 1.9643, "step": 474 }, { "epoch": 0.14848390121913096, "grad_norm": 0.1962890625, "learning_rate": 0.0001993480207531944, "loss": 1.6624, "step": 475 }, { "epoch": 0.1487964989059081, "grad_norm": 0.1943359375, "learning_rate": 0.00019934521659365235, "loss": 1.5768, "step": 476 }, { "epoch": 0.1491090965926852, "grad_norm": 0.1884765625, "learning_rate": 0.00019934240643651298, "loss": 1.8556, "step": 477 }, { "epoch": 0.14942169427946234, "grad_norm": 0.189453125, "learning_rate": 0.00019933959028194592, "loss": 1.9329, "step": 478 }, { "epoch": 0.14973429196623944, "grad_norm": 0.203125, "learning_rate": 0.0001993367681301212, "loss": 1.7054, "step": 479 }, { "epoch": 0.15004688965301657, "grad_norm": 0.1904296875, "learning_rate": 0.0001993339399812092, "loss": 1.8809, "step": 480 }, { "epoch": 0.15035948733979368, "grad_norm": 0.1865234375, "learning_rate": 0.0001993311058353807, "loss": 1.5983, "step": 481 }, { "epoch": 0.1506720850265708, "grad_norm": 0.1865234375, "learning_rate": 0.00019932826569280673, "loss": 1.7169, "step": 482 }, { "epoch": 0.15098468271334792, "grad_norm": 0.1953125, "learning_rate": 0.00019932541955365883, "loss": 1.9345, "step": 483 }, { "epoch": 0.15129728040012505, "grad_norm": 0.1982421875, "learning_rate": 0.00019932256741810874, "loss": 2.1597, "step": 484 }, { "epoch": 0.15160987808690216, "grad_norm": 0.1826171875, "learning_rate": 0.0001993197092863287, "loss": 1.5661, "step": 485 }, { "epoch": 0.1519224757736793, "grad_norm": 0.19140625, "learning_rate": 0.0001993168451584912, "loss": 1.8121, "step": 486 }, { "epoch": 0.1522350734604564, "grad_norm": 0.18359375, "learning_rate": 0.00019931397503476924, "loss": 1.7365, "step": 487 }, { "epoch": 0.1525476711472335, "grad_norm": 0.1962890625, "learning_rate": 0.00019931109891533605, "loss": 1.6982, "step": 488 }, { "epoch": 0.15286026883401063, "grad_norm": 0.189453125, "learning_rate": 0.00019930821680036527, "loss": 1.9638, "step": 489 }, { "epoch": 0.15317286652078774, "grad_norm": 0.201171875, "learning_rate": 0.00019930532869003086, "loss": 2.1991, "step": 490 }, { "epoch": 0.15348546420756487, "grad_norm": 0.1923828125, "learning_rate": 0.00019930243458450724, "loss": 1.8095, "step": 491 }, { "epoch": 0.15379806189434198, "grad_norm": 0.177734375, "learning_rate": 0.0001992995344839691, "loss": 1.9021, "step": 492 }, { "epoch": 0.1541106595811191, "grad_norm": 0.19921875, "learning_rate": 0.0001992966283885915, "loss": 1.9448, "step": 493 }, { "epoch": 0.1544232572678962, "grad_norm": 0.19921875, "learning_rate": 0.00019929371629854992, "loss": 1.9806, "step": 494 }, { "epoch": 0.15473585495467335, "grad_norm": 0.1982421875, "learning_rate": 0.0001992907982140202, "loss": 1.7495, "step": 495 }, { "epoch": 0.15504845264145045, "grad_norm": 0.203125, "learning_rate": 0.00019928787413517842, "loss": 2.0022, "step": 496 }, { "epoch": 0.15536105032822758, "grad_norm": 0.193359375, "learning_rate": 0.00019928494406220115, "loss": 1.7185, "step": 497 }, { "epoch": 0.1556736480150047, "grad_norm": 0.1943359375, "learning_rate": 0.00019928200799526532, "loss": 2.0288, "step": 498 }, { "epoch": 0.1559862457017818, "grad_norm": 0.1923828125, "learning_rate": 0.00019927906593454812, "loss": 1.7969, "step": 499 }, { "epoch": 0.15629884338855893, "grad_norm": 0.1943359375, "learning_rate": 0.0001992761178802272, "loss": 2.1816, "step": 500 }, { "epoch": 0.15661144107533603, "grad_norm": 0.1953125, "learning_rate": 0.00019927316383248054, "loss": 1.8524, "step": 501 }, { "epoch": 0.15692403876211317, "grad_norm": 0.1923828125, "learning_rate": 0.00019927020379148646, "loss": 1.6543, "step": 502 }, { "epoch": 0.15723663644889027, "grad_norm": 0.203125, "learning_rate": 0.0001992672377574237, "loss": 1.7662, "step": 503 }, { "epoch": 0.1575492341356674, "grad_norm": 0.1953125, "learning_rate": 0.0001992642657304713, "loss": 1.8305, "step": 504 }, { "epoch": 0.1578618318224445, "grad_norm": 0.1845703125, "learning_rate": 0.00019926128771080868, "loss": 1.6887, "step": 505 }, { "epoch": 0.15817442950922164, "grad_norm": 0.1953125, "learning_rate": 0.00019925830369861564, "loss": 1.9668, "step": 506 }, { "epoch": 0.15848702719599875, "grad_norm": 0.2021484375, "learning_rate": 0.00019925531369407228, "loss": 1.8739, "step": 507 }, { "epoch": 0.15879962488277588, "grad_norm": 0.1962890625, "learning_rate": 0.00019925231769735917, "loss": 1.8289, "step": 508 }, { "epoch": 0.15911222256955299, "grad_norm": 0.185546875, "learning_rate": 0.0001992493157086572, "loss": 1.9057, "step": 509 }, { "epoch": 0.1594248202563301, "grad_norm": 0.1904296875, "learning_rate": 0.00019924630772814753, "loss": 1.8643, "step": 510 }, { "epoch": 0.15973741794310722, "grad_norm": 0.2080078125, "learning_rate": 0.00019924329375601177, "loss": 1.8911, "step": 511 }, { "epoch": 0.16005001562988433, "grad_norm": 0.1962890625, "learning_rate": 0.00019924027379243192, "loss": 1.6922, "step": 512 }, { "epoch": 0.16036261331666146, "grad_norm": 0.1923828125, "learning_rate": 0.0001992372478375903, "loss": 1.9621, "step": 513 }, { "epoch": 0.16067521100343857, "grad_norm": 0.1962890625, "learning_rate": 0.00019923421589166954, "loss": 1.8731, "step": 514 }, { "epoch": 0.1609878086902157, "grad_norm": 0.201171875, "learning_rate": 0.00019923117795485272, "loss": 1.6659, "step": 515 }, { "epoch": 0.1613004063769928, "grad_norm": 0.2080078125, "learning_rate": 0.00019922813402732325, "loss": 1.9896, "step": 516 }, { "epoch": 0.16161300406376994, "grad_norm": 0.1982421875, "learning_rate": 0.00019922508410926489, "loss": 1.8087, "step": 517 }, { "epoch": 0.16192560175054704, "grad_norm": 0.19921875, "learning_rate": 0.00019922202820086171, "loss": 2.0338, "step": 518 }, { "epoch": 0.16223819943732418, "grad_norm": 0.1884765625, "learning_rate": 0.00019921896630229827, "loss": 1.8984, "step": 519 }, { "epoch": 0.16255079712410128, "grad_norm": 0.205078125, "learning_rate": 0.0001992158984137594, "loss": 1.7892, "step": 520 }, { "epoch": 0.1628633948108784, "grad_norm": 0.19921875, "learning_rate": 0.00019921282453543032, "loss": 1.6763, "step": 521 }, { "epoch": 0.16317599249765552, "grad_norm": 0.185546875, "learning_rate": 0.0001992097446674966, "loss": 1.8474, "step": 522 }, { "epoch": 0.16348859018443263, "grad_norm": 0.193359375, "learning_rate": 0.00019920665881014416, "loss": 1.9876, "step": 523 }, { "epoch": 0.16380118787120976, "grad_norm": 0.1904296875, "learning_rate": 0.0001992035669635593, "loss": 1.7454, "step": 524 }, { "epoch": 0.16411378555798686, "grad_norm": 0.1904296875, "learning_rate": 0.0001992004691279287, "loss": 1.9164, "step": 525 }, { "epoch": 0.164426383244764, "grad_norm": 0.1923828125, "learning_rate": 0.00019919736530343935, "loss": 1.9096, "step": 526 }, { "epoch": 0.1647389809315411, "grad_norm": 0.1953125, "learning_rate": 0.00019919425549027865, "loss": 1.9148, "step": 527 }, { "epoch": 0.16505157861831823, "grad_norm": 0.1953125, "learning_rate": 0.00019919113968863437, "loss": 1.9967, "step": 528 }, { "epoch": 0.16536417630509534, "grad_norm": 0.2109375, "learning_rate": 0.00019918801789869453, "loss": 1.9329, "step": 529 }, { "epoch": 0.16567677399187247, "grad_norm": 0.19921875, "learning_rate": 0.00019918489012064772, "loss": 1.9399, "step": 530 }, { "epoch": 0.16598937167864958, "grad_norm": 0.1904296875, "learning_rate": 0.00019918175635468265, "loss": 1.9082, "step": 531 }, { "epoch": 0.16630196936542668, "grad_norm": 0.193359375, "learning_rate": 0.00019917861660098858, "loss": 1.9138, "step": 532 }, { "epoch": 0.16661456705220382, "grad_norm": 0.1904296875, "learning_rate": 0.00019917547085975505, "loss": 1.7534, "step": 533 }, { "epoch": 0.16692716473898092, "grad_norm": 0.181640625, "learning_rate": 0.00019917231913117197, "loss": 1.8574, "step": 534 }, { "epoch": 0.16723976242575805, "grad_norm": 0.19921875, "learning_rate": 0.0001991691614154296, "loss": 1.7967, "step": 535 }, { "epoch": 0.16755236011253516, "grad_norm": 0.1943359375, "learning_rate": 0.00019916599771271855, "loss": 1.765, "step": 536 }, { "epoch": 0.1678649577993123, "grad_norm": 0.1962890625, "learning_rate": 0.00019916282802322989, "loss": 1.9999, "step": 537 }, { "epoch": 0.1681775554860894, "grad_norm": 0.197265625, "learning_rate": 0.00019915965234715491, "loss": 1.9353, "step": 538 }, { "epoch": 0.16849015317286653, "grad_norm": 0.1962890625, "learning_rate": 0.00019915647068468538, "loss": 1.8003, "step": 539 }, { "epoch": 0.16880275085964364, "grad_norm": 0.19921875, "learning_rate": 0.00019915328303601334, "loss": 2.1542, "step": 540 }, { "epoch": 0.16911534854642077, "grad_norm": 0.2158203125, "learning_rate": 0.00019915008940133127, "loss": 1.9446, "step": 541 }, { "epoch": 0.16942794623319787, "grad_norm": 0.1982421875, "learning_rate": 0.00019914688978083192, "loss": 2.0184, "step": 542 }, { "epoch": 0.16974054391997498, "grad_norm": 0.1875, "learning_rate": 0.00019914368417470852, "loss": 1.8707, "step": 543 }, { "epoch": 0.1700531416067521, "grad_norm": 0.1982421875, "learning_rate": 0.00019914047258315457, "loss": 1.8503, "step": 544 }, { "epoch": 0.17036573929352922, "grad_norm": 0.1884765625, "learning_rate": 0.00019913725500636393, "loss": 1.9382, "step": 545 }, { "epoch": 0.17067833698030635, "grad_norm": 0.1962890625, "learning_rate": 0.00019913403144453088, "loss": 1.6436, "step": 546 }, { "epoch": 0.17099093466708346, "grad_norm": 0.197265625, "learning_rate": 0.00019913080189785002, "loss": 2.0155, "step": 547 }, { "epoch": 0.1713035323538606, "grad_norm": 0.1875, "learning_rate": 0.00019912756636651638, "loss": 1.9679, "step": 548 }, { "epoch": 0.1716161300406377, "grad_norm": 0.197265625, "learning_rate": 0.00019912432485072516, "loss": 1.619, "step": 549 }, { "epoch": 0.17192872772741483, "grad_norm": 0.1943359375, "learning_rate": 0.0001991210773506722, "loss": 1.8251, "step": 550 }, { "epoch": 0.17224132541419193, "grad_norm": 0.197265625, "learning_rate": 0.00019911782386655341, "loss": 1.9356, "step": 551 }, { "epoch": 0.17255392310096906, "grad_norm": 0.193359375, "learning_rate": 0.00019911456439856536, "loss": 1.7967, "step": 552 }, { "epoch": 0.17286652078774617, "grad_norm": 0.1953125, "learning_rate": 0.00019911129894690475, "loss": 1.7887, "step": 553 }, { "epoch": 0.17317911847452327, "grad_norm": 0.201171875, "learning_rate": 0.00019910802751176867, "loss": 1.8225, "step": 554 }, { "epoch": 0.1734917161613004, "grad_norm": 0.1943359375, "learning_rate": 0.00019910475009335472, "loss": 1.7761, "step": 555 }, { "epoch": 0.1738043138480775, "grad_norm": 0.1943359375, "learning_rate": 0.0001991014666918607, "loss": 1.917, "step": 556 }, { "epoch": 0.17411691153485465, "grad_norm": 0.1865234375, "learning_rate": 0.00019909817730748487, "loss": 1.707, "step": 557 }, { "epoch": 0.17442950922163175, "grad_norm": 0.1953125, "learning_rate": 0.00019909488194042575, "loss": 2.2473, "step": 558 }, { "epoch": 0.17474210690840888, "grad_norm": 0.1953125, "learning_rate": 0.00019909158059088235, "loss": 1.5952, "step": 559 }, { "epoch": 0.175054704595186, "grad_norm": 0.1923828125, "learning_rate": 0.000199088273259054, "loss": 1.6575, "step": 560 }, { "epoch": 0.17536730228196312, "grad_norm": 0.2001953125, "learning_rate": 0.00019908495994514026, "loss": 1.9749, "step": 561 }, { "epoch": 0.17567989996874023, "grad_norm": 0.1923828125, "learning_rate": 0.00019908164064934126, "loss": 1.681, "step": 562 }, { "epoch": 0.17599249765551736, "grad_norm": 0.1806640625, "learning_rate": 0.00019907831537185734, "loss": 1.7532, "step": 563 }, { "epoch": 0.17630509534229447, "grad_norm": 0.19140625, "learning_rate": 0.00019907498411288925, "loss": 2.0639, "step": 564 }, { "epoch": 0.1766176930290716, "grad_norm": 0.2119140625, "learning_rate": 0.00019907164687263813, "loss": 2.1285, "step": 565 }, { "epoch": 0.1769302907158487, "grad_norm": 0.189453125, "learning_rate": 0.00019906830365130546, "loss": 1.7988, "step": 566 }, { "epoch": 0.1772428884026258, "grad_norm": 0.1923828125, "learning_rate": 0.00019906495444909302, "loss": 1.6593, "step": 567 }, { "epoch": 0.17755548608940294, "grad_norm": 0.1904296875, "learning_rate": 0.00019906159926620306, "loss": 1.8094, "step": 568 }, { "epoch": 0.17786808377618005, "grad_norm": 0.1826171875, "learning_rate": 0.00019905823810283812, "loss": 1.6249, "step": 569 }, { "epoch": 0.17818068146295718, "grad_norm": 0.185546875, "learning_rate": 0.0001990548709592011, "loss": 1.6268, "step": 570 }, { "epoch": 0.17849327914973429, "grad_norm": 0.1962890625, "learning_rate": 0.00019905149783549532, "loss": 1.5067, "step": 571 }, { "epoch": 0.17880587683651142, "grad_norm": 0.19140625, "learning_rate": 0.00019904811873192437, "loss": 1.7792, "step": 572 }, { "epoch": 0.17911847452328852, "grad_norm": 0.1845703125, "learning_rate": 0.0001990447336486923, "loss": 1.7893, "step": 573 }, { "epoch": 0.17943107221006566, "grad_norm": 0.2001953125, "learning_rate": 0.0001990413425860034, "loss": 1.7304, "step": 574 }, { "epoch": 0.17974366989684276, "grad_norm": 0.193359375, "learning_rate": 0.00019903794554406248, "loss": 1.9092, "step": 575 }, { "epoch": 0.1800562675836199, "grad_norm": 0.193359375, "learning_rate": 0.00019903454252307454, "loss": 1.6916, "step": 576 }, { "epoch": 0.180368865270397, "grad_norm": 0.1943359375, "learning_rate": 0.0001990311335232451, "loss": 1.7437, "step": 577 }, { "epoch": 0.1806814629571741, "grad_norm": 0.203125, "learning_rate": 0.00019902771854477994, "loss": 1.7296, "step": 578 }, { "epoch": 0.18099406064395124, "grad_norm": 0.2001953125, "learning_rate": 0.0001990242975878852, "loss": 1.6257, "step": 579 }, { "epoch": 0.18130665833072834, "grad_norm": 0.2041015625, "learning_rate": 0.0001990208706527674, "loss": 1.6635, "step": 580 }, { "epoch": 0.18161925601750548, "grad_norm": 0.208984375, "learning_rate": 0.00019901743773963353, "loss": 1.8428, "step": 581 }, { "epoch": 0.18193185370428258, "grad_norm": 0.1953125, "learning_rate": 0.00019901399884869072, "loss": 1.7945, "step": 582 }, { "epoch": 0.18224445139105971, "grad_norm": 0.1923828125, "learning_rate": 0.00019901055398014662, "loss": 1.7858, "step": 583 }, { "epoch": 0.18255704907783682, "grad_norm": 0.19921875, "learning_rate": 0.0001990071031342092, "loss": 1.62, "step": 584 }, { "epoch": 0.18286964676461395, "grad_norm": 0.201171875, "learning_rate": 0.00019900364631108682, "loss": 1.8136, "step": 585 }, { "epoch": 0.18318224445139106, "grad_norm": 0.1962890625, "learning_rate": 0.00019900018351098813, "loss": 1.9074, "step": 586 }, { "epoch": 0.1834948421381682, "grad_norm": 0.2099609375, "learning_rate": 0.0001989967147341222, "loss": 1.8761, "step": 587 }, { "epoch": 0.1838074398249453, "grad_norm": 0.19921875, "learning_rate": 0.00019899323998069846, "loss": 1.8516, "step": 588 }, { "epoch": 0.1841200375117224, "grad_norm": 0.1865234375, "learning_rate": 0.0001989897592509267, "loss": 1.7505, "step": 589 }, { "epoch": 0.18443263519849953, "grad_norm": 0.189453125, "learning_rate": 0.00019898627254501697, "loss": 1.9066, "step": 590 }, { "epoch": 0.18474523288527664, "grad_norm": 0.1982421875, "learning_rate": 0.0001989827798631799, "loss": 1.926, "step": 591 }, { "epoch": 0.18505783057205377, "grad_norm": 0.2138671875, "learning_rate": 0.00019897928120562623, "loss": 1.9225, "step": 592 }, { "epoch": 0.18537042825883088, "grad_norm": 0.2041015625, "learning_rate": 0.00019897577657256724, "loss": 2.0965, "step": 593 }, { "epoch": 0.185683025945608, "grad_norm": 0.20703125, "learning_rate": 0.00019897226596421447, "loss": 1.7195, "step": 594 }, { "epoch": 0.18599562363238512, "grad_norm": 0.197265625, "learning_rate": 0.00019896874938077992, "loss": 1.8197, "step": 595 }, { "epoch": 0.18630822131916225, "grad_norm": 0.1884765625, "learning_rate": 0.0001989652268224758, "loss": 2.2171, "step": 596 }, { "epoch": 0.18662081900593935, "grad_norm": 0.1875, "learning_rate": 0.00019896169828951488, "loss": 1.8195, "step": 597 }, { "epoch": 0.1869334166927165, "grad_norm": 0.1826171875, "learning_rate": 0.00019895816378211008, "loss": 1.6969, "step": 598 }, { "epoch": 0.1872460143794936, "grad_norm": 0.19921875, "learning_rate": 0.00019895462330047484, "loss": 1.8099, "step": 599 }, { "epoch": 0.1875586120662707, "grad_norm": 0.189453125, "learning_rate": 0.00019895107684482293, "loss": 1.7597, "step": 600 }, { "epoch": 0.18787120975304783, "grad_norm": 0.1962890625, "learning_rate": 0.00019894752441536838, "loss": 1.7928, "step": 601 }, { "epoch": 0.18818380743982493, "grad_norm": 0.2021484375, "learning_rate": 0.00019894396601232567, "loss": 1.7385, "step": 602 }, { "epoch": 0.18849640512660207, "grad_norm": 0.1982421875, "learning_rate": 0.0001989404016359097, "loss": 1.7216, "step": 603 }, { "epoch": 0.18880900281337917, "grad_norm": 0.19140625, "learning_rate": 0.00019893683128633557, "loss": 1.749, "step": 604 }, { "epoch": 0.1891216005001563, "grad_norm": 0.189453125, "learning_rate": 0.00019893325496381884, "loss": 1.8708, "step": 605 }, { "epoch": 0.1894341981869334, "grad_norm": 0.197265625, "learning_rate": 0.00019892967266857547, "loss": 1.9852, "step": 606 }, { "epoch": 0.18974679587371054, "grad_norm": 0.203125, "learning_rate": 0.0001989260844008217, "loss": 1.7595, "step": 607 }, { "epoch": 0.19005939356048765, "grad_norm": 0.197265625, "learning_rate": 0.00019892249016077412, "loss": 1.7231, "step": 608 }, { "epoch": 0.19037199124726478, "grad_norm": 0.212890625, "learning_rate": 0.0001989188899486498, "loss": 1.7735, "step": 609 }, { "epoch": 0.1906845889340419, "grad_norm": 0.1953125, "learning_rate": 0.00019891528376466598, "loss": 1.8502, "step": 610 }, { "epoch": 0.190997186620819, "grad_norm": 0.19921875, "learning_rate": 0.00019891167160904046, "loss": 1.8522, "step": 611 }, { "epoch": 0.19130978430759613, "grad_norm": 0.19921875, "learning_rate": 0.0001989080534819913, "loss": 2.0308, "step": 612 }, { "epoch": 0.19162238199437323, "grad_norm": 0.2001953125, "learning_rate": 0.00019890442938373686, "loss": 1.7471, "step": 613 }, { "epoch": 0.19193497968115036, "grad_norm": 0.1962890625, "learning_rate": 0.000198900799314496, "loss": 1.5426, "step": 614 }, { "epoch": 0.19224757736792747, "grad_norm": 0.197265625, "learning_rate": 0.0001988971632744879, "loss": 2.0733, "step": 615 }, { "epoch": 0.1925601750547046, "grad_norm": 0.193359375, "learning_rate": 0.00019889352126393198, "loss": 1.8229, "step": 616 }, { "epoch": 0.1928727727414817, "grad_norm": 0.19921875, "learning_rate": 0.00019888987328304817, "loss": 1.9119, "step": 617 }, { "epoch": 0.19318537042825884, "grad_norm": 0.2021484375, "learning_rate": 0.0001988862193320567, "loss": 1.6569, "step": 618 }, { "epoch": 0.19349796811503595, "grad_norm": 0.189453125, "learning_rate": 0.00019888255941117816, "loss": 2.0652, "step": 619 }, { "epoch": 0.19381056580181308, "grad_norm": 0.2021484375, "learning_rate": 0.0001988788935206335, "loss": 1.6115, "step": 620 }, { "epoch": 0.19412316348859018, "grad_norm": 0.197265625, "learning_rate": 0.00019887522166064402, "loss": 1.6017, "step": 621 }, { "epoch": 0.1944357611753673, "grad_norm": 0.1875, "learning_rate": 0.00019887154383143143, "loss": 1.9108, "step": 622 }, { "epoch": 0.19474835886214442, "grad_norm": 0.1943359375, "learning_rate": 0.00019886786003321772, "loss": 1.6372, "step": 623 }, { "epoch": 0.19506095654892153, "grad_norm": 0.296875, "learning_rate": 0.0001988641702662253, "loss": 2.4569, "step": 624 }, { "epoch": 0.19537355423569866, "grad_norm": 0.1875, "learning_rate": 0.000198860474530677, "loss": 1.6954, "step": 625 }, { "epoch": 0.19568615192247577, "grad_norm": 0.2060546875, "learning_rate": 0.00019885677282679585, "loss": 1.8825, "step": 626 }, { "epoch": 0.1959987496092529, "grad_norm": 0.1943359375, "learning_rate": 0.00019885306515480533, "loss": 1.7887, "step": 627 }, { "epoch": 0.19631134729603, "grad_norm": 0.2109375, "learning_rate": 0.00019884935151492933, "loss": 1.8936, "step": 628 }, { "epoch": 0.19662394498280714, "grad_norm": 0.19921875, "learning_rate": 0.00019884563190739196, "loss": 1.7583, "step": 629 }, { "epoch": 0.19693654266958424, "grad_norm": 0.201171875, "learning_rate": 0.0001988419063324179, "loss": 1.898, "step": 630 }, { "epoch": 0.19724914035636137, "grad_norm": 0.193359375, "learning_rate": 0.0001988381747902319, "loss": 2.0045, "step": 631 }, { "epoch": 0.19756173804313848, "grad_norm": 0.1923828125, "learning_rate": 0.00019883443728105943, "loss": 1.9453, "step": 632 }, { "epoch": 0.1978743357299156, "grad_norm": 0.2001953125, "learning_rate": 0.000198830693805126, "loss": 1.8481, "step": 633 }, { "epoch": 0.19818693341669272, "grad_norm": 0.203125, "learning_rate": 0.00019882694436265764, "loss": 1.8409, "step": 634 }, { "epoch": 0.19849953110346982, "grad_norm": 0.208984375, "learning_rate": 0.00019882318895388072, "loss": 1.8232, "step": 635 }, { "epoch": 0.19881212879024696, "grad_norm": 0.193359375, "learning_rate": 0.00019881942757902197, "loss": 1.7768, "step": 636 }, { "epoch": 0.19912472647702406, "grad_norm": 0.2099609375, "learning_rate": 0.0001988156602383084, "loss": 1.7056, "step": 637 }, { "epoch": 0.1994373241638012, "grad_norm": 0.2041015625, "learning_rate": 0.00019881188693196756, "loss": 1.5243, "step": 638 }, { "epoch": 0.1997499218505783, "grad_norm": 0.2001953125, "learning_rate": 0.00019880810766022714, "loss": 2.0564, "step": 639 }, { "epoch": 0.20006251953735543, "grad_norm": 0.203125, "learning_rate": 0.00019880432242331536, "loss": 1.8789, "step": 640 }, { "epoch": 0.20037511722413254, "grad_norm": 0.1923828125, "learning_rate": 0.00019880053122146073, "loss": 1.8037, "step": 641 }, { "epoch": 0.20068771491090967, "grad_norm": 0.205078125, "learning_rate": 0.00019879673405489215, "loss": 1.7692, "step": 642 }, { "epoch": 0.20100031259768678, "grad_norm": 0.1923828125, "learning_rate": 0.00019879293092383882, "loss": 1.7066, "step": 643 }, { "epoch": 0.2013129102844639, "grad_norm": 0.197265625, "learning_rate": 0.00019878912182853036, "loss": 1.8715, "step": 644 }, { "epoch": 0.201625507971241, "grad_norm": 0.19140625, "learning_rate": 0.0001987853067691967, "loss": 1.6647, "step": 645 }, { "epoch": 0.20193810565801812, "grad_norm": 0.2001953125, "learning_rate": 0.00019878148574606824, "loss": 1.6027, "step": 646 }, { "epoch": 0.20225070334479525, "grad_norm": 0.1904296875, "learning_rate": 0.00019877765875937558, "loss": 1.6788, "step": 647 }, { "epoch": 0.20256330103157236, "grad_norm": 0.205078125, "learning_rate": 0.00019877382580934977, "loss": 1.7934, "step": 648 }, { "epoch": 0.2028758987183495, "grad_norm": 0.1904296875, "learning_rate": 0.00019876998689622225, "loss": 1.6556, "step": 649 }, { "epoch": 0.2031884964051266, "grad_norm": 0.1953125, "learning_rate": 0.00019876614202022475, "loss": 1.7103, "step": 650 }, { "epoch": 0.20350109409190373, "grad_norm": 0.193359375, "learning_rate": 0.0001987622911815894, "loss": 1.7654, "step": 651 }, { "epoch": 0.20381369177868083, "grad_norm": 0.1875, "learning_rate": 0.00019875843438054864, "loss": 1.7043, "step": 652 }, { "epoch": 0.20412628946545797, "grad_norm": 0.1943359375, "learning_rate": 0.0001987545716173354, "loss": 1.966, "step": 653 }, { "epoch": 0.20443888715223507, "grad_norm": 0.19921875, "learning_rate": 0.0001987507028921828, "loss": 1.7629, "step": 654 }, { "epoch": 0.2047514848390122, "grad_norm": 0.2060546875, "learning_rate": 0.00019874682820532444, "loss": 1.766, "step": 655 }, { "epoch": 0.2050640825257893, "grad_norm": 0.201171875, "learning_rate": 0.00019874294755699423, "loss": 1.6821, "step": 656 }, { "epoch": 0.20537668021256641, "grad_norm": 0.21875, "learning_rate": 0.00019873906094742644, "loss": 1.806, "step": 657 }, { "epoch": 0.20568927789934355, "grad_norm": 0.1962890625, "learning_rate": 0.0001987351683768557, "loss": 1.8864, "step": 658 }, { "epoch": 0.20600187558612065, "grad_norm": 0.19921875, "learning_rate": 0.00019873126984551703, "loss": 1.7406, "step": 659 }, { "epoch": 0.20631447327289779, "grad_norm": 0.201171875, "learning_rate": 0.0001987273653536458, "loss": 1.7246, "step": 660 }, { "epoch": 0.2066270709596749, "grad_norm": 0.1875, "learning_rate": 0.00019872345490147772, "loss": 1.9874, "step": 661 }, { "epoch": 0.20693966864645202, "grad_norm": 0.2001953125, "learning_rate": 0.00019871953848924886, "loss": 1.7792, "step": 662 }, { "epoch": 0.20725226633322913, "grad_norm": 0.1982421875, "learning_rate": 0.00019871561611719564, "loss": 1.8759, "step": 663 }, { "epoch": 0.20756486402000626, "grad_norm": 0.2001953125, "learning_rate": 0.00019871168778555492, "loss": 1.9906, "step": 664 }, { "epoch": 0.20787746170678337, "grad_norm": 0.2021484375, "learning_rate": 0.0001987077534945638, "loss": 1.8973, "step": 665 }, { "epoch": 0.2081900593935605, "grad_norm": 0.1982421875, "learning_rate": 0.00019870381324445978, "loss": 1.6312, "step": 666 }, { "epoch": 0.2085026570803376, "grad_norm": 0.208984375, "learning_rate": 0.0001986998670354808, "loss": 1.8406, "step": 667 }, { "epoch": 0.2088152547671147, "grad_norm": 0.2099609375, "learning_rate": 0.0001986959148678651, "loss": 1.7828, "step": 668 }, { "epoch": 0.20912785245389184, "grad_norm": 0.201171875, "learning_rate": 0.00019869195674185122, "loss": 1.9185, "step": 669 }, { "epoch": 0.20944045014066895, "grad_norm": 0.201171875, "learning_rate": 0.00019868799265767816, "loss": 1.7588, "step": 670 }, { "epoch": 0.20975304782744608, "grad_norm": 0.203125, "learning_rate": 0.00019868402261558524, "loss": 1.7387, "step": 671 }, { "epoch": 0.2100656455142232, "grad_norm": 0.1904296875, "learning_rate": 0.00019868004661581208, "loss": 1.6164, "step": 672 }, { "epoch": 0.21037824320100032, "grad_norm": 0.19140625, "learning_rate": 0.0001986760646585988, "loss": 1.8667, "step": 673 }, { "epoch": 0.21069084088777743, "grad_norm": 0.189453125, "learning_rate": 0.00019867207674418568, "loss": 1.9312, "step": 674 }, { "epoch": 0.21100343857455456, "grad_norm": 0.19921875, "learning_rate": 0.0001986680828728136, "loss": 1.7665, "step": 675 }, { "epoch": 0.21131603626133166, "grad_norm": 0.2099609375, "learning_rate": 0.00019866408304472364, "loss": 1.6056, "step": 676 }, { "epoch": 0.2116286339481088, "grad_norm": 0.2001953125, "learning_rate": 0.00019866007726015723, "loss": 1.5752, "step": 677 }, { "epoch": 0.2119412316348859, "grad_norm": 0.208984375, "learning_rate": 0.00019865606551935626, "loss": 1.8815, "step": 678 }, { "epoch": 0.212253829321663, "grad_norm": 0.203125, "learning_rate": 0.00019865204782256287, "loss": 1.7828, "step": 679 }, { "epoch": 0.21256642700844014, "grad_norm": 0.2080078125, "learning_rate": 0.0001986480241700196, "loss": 1.9457, "step": 680 }, { "epoch": 0.21287902469521724, "grad_norm": 0.1953125, "learning_rate": 0.00019864399456196946, "loss": 1.9523, "step": 681 }, { "epoch": 0.21319162238199438, "grad_norm": 0.19140625, "learning_rate": 0.00019863995899865565, "loss": 1.5974, "step": 682 }, { "epoch": 0.21350422006877148, "grad_norm": 0.2021484375, "learning_rate": 0.00019863591748032184, "loss": 1.8886, "step": 683 }, { "epoch": 0.21381681775554862, "grad_norm": 0.201171875, "learning_rate": 0.00019863187000721197, "loss": 1.8564, "step": 684 }, { "epoch": 0.21412941544232572, "grad_norm": 0.203125, "learning_rate": 0.00019862781657957045, "loss": 1.8022, "step": 685 }, { "epoch": 0.21444201312910285, "grad_norm": 0.2041015625, "learning_rate": 0.00019862375719764192, "loss": 1.855, "step": 686 }, { "epoch": 0.21475461081587996, "grad_norm": 0.208984375, "learning_rate": 0.0001986196918616715, "loss": 2.0019, "step": 687 }, { "epoch": 0.2150672085026571, "grad_norm": 0.1953125, "learning_rate": 0.00019861562057190462, "loss": 1.8597, "step": 688 }, { "epoch": 0.2153798061894342, "grad_norm": 0.1962890625, "learning_rate": 0.00019861154332858708, "loss": 1.9685, "step": 689 }, { "epoch": 0.2156924038762113, "grad_norm": 0.197265625, "learning_rate": 0.00019860746013196495, "loss": 1.8702, "step": 690 }, { "epoch": 0.21600500156298844, "grad_norm": 0.2138671875, "learning_rate": 0.00019860337098228485, "loss": 1.6556, "step": 691 }, { "epoch": 0.21631759924976554, "grad_norm": 0.2060546875, "learning_rate": 0.00019859927587979358, "loss": 2.0366, "step": 692 }, { "epoch": 0.21663019693654267, "grad_norm": 0.1982421875, "learning_rate": 0.00019859517482473838, "loss": 1.9303, "step": 693 }, { "epoch": 0.21694279462331978, "grad_norm": 0.1982421875, "learning_rate": 0.00019859106781736682, "loss": 1.6981, "step": 694 }, { "epoch": 0.2172553923100969, "grad_norm": 0.205078125, "learning_rate": 0.00019858695485792686, "loss": 1.4825, "step": 695 }, { "epoch": 0.21756798999687402, "grad_norm": 0.19921875, "learning_rate": 0.0001985828359466668, "loss": 1.779, "step": 696 }, { "epoch": 0.21788058768365115, "grad_norm": 0.2001953125, "learning_rate": 0.00019857871108383532, "loss": 1.7535, "step": 697 }, { "epoch": 0.21819318537042826, "grad_norm": 0.2060546875, "learning_rate": 0.00019857458026968143, "loss": 1.7039, "step": 698 }, { "epoch": 0.2185057830572054, "grad_norm": 0.2001953125, "learning_rate": 0.0001985704435044545, "loss": 1.7501, "step": 699 }, { "epoch": 0.2188183807439825, "grad_norm": 0.2041015625, "learning_rate": 0.0001985663007884043, "loss": 1.8791, "step": 700 }, { "epoch": 0.2191309784307596, "grad_norm": 0.20703125, "learning_rate": 0.00019856215212178094, "loss": 1.904, "step": 701 }, { "epoch": 0.21944357611753673, "grad_norm": 0.2109375, "learning_rate": 0.00019855799750483484, "loss": 1.4772, "step": 702 }, { "epoch": 0.21975617380431384, "grad_norm": 0.3984375, "learning_rate": 0.00019855383693781682, "loss": 2.4316, "step": 703 }, { "epoch": 0.22006877149109097, "grad_norm": 0.2109375, "learning_rate": 0.0001985496704209781, "loss": 1.6331, "step": 704 }, { "epoch": 0.22038136917786808, "grad_norm": 0.22265625, "learning_rate": 0.0001985454979545702, "loss": 1.7665, "step": 705 }, { "epoch": 0.2206939668646452, "grad_norm": 0.1923828125, "learning_rate": 0.00019854131953884495, "loss": 1.9052, "step": 706 }, { "epoch": 0.2210065645514223, "grad_norm": 0.1923828125, "learning_rate": 0.00019853713517405472, "loss": 1.8316, "step": 707 }, { "epoch": 0.22131916223819945, "grad_norm": 0.1962890625, "learning_rate": 0.00019853294486045208, "loss": 1.6123, "step": 708 }, { "epoch": 0.22163175992497655, "grad_norm": 0.2060546875, "learning_rate": 0.00019852874859828997, "loss": 1.8111, "step": 709 }, { "epoch": 0.22194435761175368, "grad_norm": 0.2060546875, "learning_rate": 0.00019852454638782176, "loss": 1.8234, "step": 710 }, { "epoch": 0.2222569552985308, "grad_norm": 0.2060546875, "learning_rate": 0.00019852033822930114, "loss": 1.6664, "step": 711 }, { "epoch": 0.22256955298530792, "grad_norm": 0.201171875, "learning_rate": 0.00019851612412298214, "loss": 1.9896, "step": 712 }, { "epoch": 0.22288215067208503, "grad_norm": 0.2001953125, "learning_rate": 0.0001985119040691192, "loss": 1.6152, "step": 713 }, { "epoch": 0.22319474835886213, "grad_norm": 0.19140625, "learning_rate": 0.00019850767806796707, "loss": 2.165, "step": 714 }, { "epoch": 0.22350734604563927, "grad_norm": 0.1982421875, "learning_rate": 0.00019850344611978087, "loss": 2.1852, "step": 715 }, { "epoch": 0.22381994373241637, "grad_norm": 0.2001953125, "learning_rate": 0.00019849920822481614, "loss": 1.7914, "step": 716 }, { "epoch": 0.2241325414191935, "grad_norm": 0.2158203125, "learning_rate": 0.00019849496438332866, "loss": 2.0296, "step": 717 }, { "epoch": 0.2244451391059706, "grad_norm": 0.201171875, "learning_rate": 0.0001984907145955747, "loss": 1.7981, "step": 718 }, { "epoch": 0.22475773679274774, "grad_norm": 0.2080078125, "learning_rate": 0.00019848645886181074, "loss": 1.7928, "step": 719 }, { "epoch": 0.22507033447952485, "grad_norm": 0.2080078125, "learning_rate": 0.00019848219718229378, "loss": 1.8671, "step": 720 }, { "epoch": 0.22538293216630198, "grad_norm": 0.203125, "learning_rate": 0.00019847792955728107, "loss": 1.8564, "step": 721 }, { "epoch": 0.22569552985307909, "grad_norm": 0.20703125, "learning_rate": 0.0001984736559870303, "loss": 1.6293, "step": 722 }, { "epoch": 0.22600812753985622, "grad_norm": 0.201171875, "learning_rate": 0.0001984693764717994, "loss": 1.9545, "step": 723 }, { "epoch": 0.22632072522663332, "grad_norm": 0.2021484375, "learning_rate": 0.00019846509101184679, "loss": 1.8173, "step": 724 }, { "epoch": 0.22663332291341043, "grad_norm": 0.197265625, "learning_rate": 0.00019846079960743112, "loss": 1.649, "step": 725 }, { "epoch": 0.22694592060018756, "grad_norm": 0.1923828125, "learning_rate": 0.00019845650225881154, "loss": 1.8916, "step": 726 }, { "epoch": 0.22725851828696467, "grad_norm": 0.2099609375, "learning_rate": 0.00019845219896624743, "loss": 1.7553, "step": 727 }, { "epoch": 0.2275711159737418, "grad_norm": 0.19140625, "learning_rate": 0.0001984478897299986, "loss": 1.6849, "step": 728 }, { "epoch": 0.2278837136605189, "grad_norm": 0.19921875, "learning_rate": 0.00019844357455032526, "loss": 1.8667, "step": 729 }, { "epoch": 0.22819631134729604, "grad_norm": 0.203125, "learning_rate": 0.00019843925342748783, "loss": 1.651, "step": 730 }, { "epoch": 0.22850890903407314, "grad_norm": 0.19921875, "learning_rate": 0.00019843492636174728, "loss": 1.6074, "step": 731 }, { "epoch": 0.22882150672085028, "grad_norm": 0.189453125, "learning_rate": 0.00019843059335336474, "loss": 1.8431, "step": 732 }, { "epoch": 0.22913410440762738, "grad_norm": 0.1904296875, "learning_rate": 0.00019842625440260188, "loss": 1.7872, "step": 733 }, { "epoch": 0.22944670209440451, "grad_norm": 0.197265625, "learning_rate": 0.0001984219095097206, "loss": 1.6808, "step": 734 }, { "epoch": 0.22975929978118162, "grad_norm": 0.1962890625, "learning_rate": 0.00019841755867498322, "loss": 1.7461, "step": 735 }, { "epoch": 0.23007189746795872, "grad_norm": 0.2109375, "learning_rate": 0.00019841320189865243, "loss": 1.7838, "step": 736 }, { "epoch": 0.23038449515473586, "grad_norm": 0.20703125, "learning_rate": 0.0001984088391809912, "loss": 1.9104, "step": 737 }, { "epoch": 0.23069709284151296, "grad_norm": 0.197265625, "learning_rate": 0.00019840447052226298, "loss": 1.9961, "step": 738 }, { "epoch": 0.2310096905282901, "grad_norm": 0.2060546875, "learning_rate": 0.00019840009592273143, "loss": 1.987, "step": 739 }, { "epoch": 0.2313222882150672, "grad_norm": 0.1982421875, "learning_rate": 0.00019839571538266072, "loss": 1.6382, "step": 740 }, { "epoch": 0.23163488590184433, "grad_norm": 0.19921875, "learning_rate": 0.0001983913289023153, "loss": 1.6738, "step": 741 }, { "epoch": 0.23194748358862144, "grad_norm": 0.2119140625, "learning_rate": 0.00019838693648195995, "loss": 1.8182, "step": 742 }, { "epoch": 0.23226008127539857, "grad_norm": 0.2041015625, "learning_rate": 0.00019838253812185988, "loss": 1.598, "step": 743 }, { "epoch": 0.23257267896217568, "grad_norm": 0.19140625, "learning_rate": 0.00019837813382228063, "loss": 1.7465, "step": 744 }, { "epoch": 0.2328852766489528, "grad_norm": 0.2119140625, "learning_rate": 0.00019837372358348806, "loss": 1.8831, "step": 745 }, { "epoch": 0.23319787433572992, "grad_norm": 0.1962890625, "learning_rate": 0.00019836930740574845, "loss": 1.525, "step": 746 }, { "epoch": 0.23351047202250702, "grad_norm": 0.84375, "learning_rate": 0.00019836488528932836, "loss": 3.4084, "step": 747 }, { "epoch": 0.23382306970928415, "grad_norm": 0.1982421875, "learning_rate": 0.00019836045723449483, "loss": 1.7993, "step": 748 }, { "epoch": 0.23413566739606126, "grad_norm": 0.2314453125, "learning_rate": 0.00019835602324151514, "loss": 1.8971, "step": 749 }, { "epoch": 0.2344482650828384, "grad_norm": 0.1962890625, "learning_rate": 0.00019835158331065703, "loss": 1.7286, "step": 750 }, { "epoch": 0.2347608627696155, "grad_norm": 0.1982421875, "learning_rate": 0.00019834713744218844, "loss": 1.6018, "step": 751 }, { "epoch": 0.23507346045639263, "grad_norm": 0.2109375, "learning_rate": 0.00019834268563637787, "loss": 1.8705, "step": 752 }, { "epoch": 0.23538605814316974, "grad_norm": 0.2001953125, "learning_rate": 0.00019833822789349409, "loss": 1.8121, "step": 753 }, { "epoch": 0.23569865582994687, "grad_norm": 0.1953125, "learning_rate": 0.00019833376421380612, "loss": 1.6886, "step": 754 }, { "epoch": 0.23601125351672397, "grad_norm": 0.205078125, "learning_rate": 0.00019832929459758352, "loss": 1.6922, "step": 755 }, { "epoch": 0.2363238512035011, "grad_norm": 0.208984375, "learning_rate": 0.0001983248190450961, "loss": 1.7953, "step": 756 }, { "epoch": 0.2366364488902782, "grad_norm": 0.2021484375, "learning_rate": 0.00019832033755661405, "loss": 1.7892, "step": 757 }, { "epoch": 0.23694904657705532, "grad_norm": 0.208984375, "learning_rate": 0.00019831585013240793, "loss": 1.5738, "step": 758 }, { "epoch": 0.23726164426383245, "grad_norm": 0.2041015625, "learning_rate": 0.0001983113567727487, "loss": 1.6719, "step": 759 }, { "epoch": 0.23757424195060955, "grad_norm": 0.203125, "learning_rate": 0.00019830685747790748, "loss": 1.9564, "step": 760 }, { "epoch": 0.2378868396373867, "grad_norm": 0.2138671875, "learning_rate": 0.0001983023522481561, "loss": 2.1432, "step": 761 }, { "epoch": 0.2381994373241638, "grad_norm": 0.2099609375, "learning_rate": 0.0001982978410837664, "loss": 1.8179, "step": 762 }, { "epoch": 0.23851203501094093, "grad_norm": 0.2119140625, "learning_rate": 0.0001982933239850108, "loss": 1.9499, "step": 763 }, { "epoch": 0.23882463269771803, "grad_norm": 0.2060546875, "learning_rate": 0.00019828880095216193, "loss": 1.5989, "step": 764 }, { "epoch": 0.23913723038449516, "grad_norm": 0.2109375, "learning_rate": 0.00019828427198549293, "loss": 1.8161, "step": 765 }, { "epoch": 0.23944982807127227, "grad_norm": 0.208984375, "learning_rate": 0.0001982797370852772, "loss": 1.9312, "step": 766 }, { "epoch": 0.2397624257580494, "grad_norm": 0.2041015625, "learning_rate": 0.00019827519625178846, "loss": 1.7275, "step": 767 }, { "epoch": 0.2400750234448265, "grad_norm": 0.203125, "learning_rate": 0.0001982706494853009, "loss": 2.0002, "step": 768 }, { "epoch": 0.2403876211316036, "grad_norm": 0.2080078125, "learning_rate": 0.00019826609678608902, "loss": 1.8021, "step": 769 }, { "epoch": 0.24070021881838075, "grad_norm": 0.205078125, "learning_rate": 0.00019826153815442763, "loss": 1.5546, "step": 770 }, { "epoch": 0.24101281650515785, "grad_norm": 0.212890625, "learning_rate": 0.000198256973590592, "loss": 1.5848, "step": 771 }, { "epoch": 0.24132541419193498, "grad_norm": 0.2021484375, "learning_rate": 0.00019825240309485765, "loss": 1.7763, "step": 772 }, { "epoch": 0.2416380118787121, "grad_norm": 0.2119140625, "learning_rate": 0.0001982478266675005, "loss": 1.6906, "step": 773 }, { "epoch": 0.24195060956548922, "grad_norm": 0.21484375, "learning_rate": 0.00019824324430879687, "loss": 1.9644, "step": 774 }, { "epoch": 0.24226320725226633, "grad_norm": 0.2041015625, "learning_rate": 0.00019823865601902341, "loss": 1.9122, "step": 775 }, { "epoch": 0.24257580493904346, "grad_norm": 0.2158203125, "learning_rate": 0.00019823406179845707, "loss": 1.9017, "step": 776 }, { "epoch": 0.24288840262582057, "grad_norm": 0.21875, "learning_rate": 0.00019822946164737526, "loss": 1.8361, "step": 777 }, { "epoch": 0.2432010003125977, "grad_norm": 0.1982421875, "learning_rate": 0.00019822485556605566, "loss": 1.7349, "step": 778 }, { "epoch": 0.2435135979993748, "grad_norm": 0.212890625, "learning_rate": 0.00019822024355477637, "loss": 1.6017, "step": 779 }, { "epoch": 0.2438261956861519, "grad_norm": 0.2060546875, "learning_rate": 0.0001982156256138158, "loss": 1.8296, "step": 780 }, { "epoch": 0.24413879337292904, "grad_norm": 0.2041015625, "learning_rate": 0.00019821100174345277, "loss": 1.6754, "step": 781 }, { "epoch": 0.24445139105970615, "grad_norm": 0.216796875, "learning_rate": 0.0001982063719439664, "loss": 2.0037, "step": 782 }, { "epoch": 0.24476398874648328, "grad_norm": 0.2109375, "learning_rate": 0.00019820173621563623, "loss": 1.887, "step": 783 }, { "epoch": 0.24507658643326038, "grad_norm": 0.19140625, "learning_rate": 0.0001981970945587421, "loss": 1.5708, "step": 784 }, { "epoch": 0.24538918412003752, "grad_norm": 0.68359375, "learning_rate": 0.0001981924469735642, "loss": 2.3282, "step": 785 }, { "epoch": 0.24570178180681462, "grad_norm": 0.203125, "learning_rate": 0.00019818779346038318, "loss": 1.7515, "step": 786 }, { "epoch": 0.24601437949359176, "grad_norm": 0.20703125, "learning_rate": 0.00019818313401947997, "loss": 1.7623, "step": 787 }, { "epoch": 0.24632697718036886, "grad_norm": 0.2099609375, "learning_rate": 0.00019817846865113577, "loss": 1.8036, "step": 788 }, { "epoch": 0.246639574867146, "grad_norm": 0.2001953125, "learning_rate": 0.0001981737973556324, "loss": 1.8455, "step": 789 }, { "epoch": 0.2469521725539231, "grad_norm": 0.2099609375, "learning_rate": 0.0001981691201332517, "loss": 1.7791, "step": 790 }, { "epoch": 0.24726477024070023, "grad_norm": 0.205078125, "learning_rate": 0.00019816443698427615, "loss": 2.0416, "step": 791 }, { "epoch": 0.24757736792747734, "grad_norm": 0.2265625, "learning_rate": 0.00019815974790898846, "loss": 2.2271, "step": 792 }, { "epoch": 0.24788996561425444, "grad_norm": 0.203125, "learning_rate": 0.00019815505290767172, "loss": 1.5433, "step": 793 }, { "epoch": 0.24820256330103158, "grad_norm": 0.2138671875, "learning_rate": 0.0001981503519806093, "loss": 1.7228, "step": 794 }, { "epoch": 0.24851516098780868, "grad_norm": 0.20703125, "learning_rate": 0.00019814564512808512, "loss": 1.8217, "step": 795 }, { "epoch": 0.2488277586745858, "grad_norm": 0.203125, "learning_rate": 0.00019814093235038323, "loss": 1.8205, "step": 796 }, { "epoch": 0.24914035636136292, "grad_norm": 0.2109375, "learning_rate": 0.00019813621364778817, "loss": 1.8541, "step": 797 }, { "epoch": 0.24945295404814005, "grad_norm": 0.2080078125, "learning_rate": 0.0001981314890205849, "loss": 1.8656, "step": 798 }, { "epoch": 0.24976555173491716, "grad_norm": 0.2109375, "learning_rate": 0.00019812675846905855, "loss": 1.809, "step": 799 }, { "epoch": 0.25007814942169426, "grad_norm": 0.20703125, "learning_rate": 0.00019812202199349476, "loss": 2.0585, "step": 800 }, { "epoch": 0.2503907471084714, "grad_norm": 0.2138671875, "learning_rate": 0.00019811727959417945, "loss": 1.9492, "step": 801 }, { "epoch": 0.25070334479524853, "grad_norm": 0.2080078125, "learning_rate": 0.00019811253127139896, "loss": 1.8192, "step": 802 }, { "epoch": 0.25101594248202563, "grad_norm": 0.2109375, "learning_rate": 0.0001981077770254399, "loss": 1.4981, "step": 803 }, { "epoch": 0.25132854016880274, "grad_norm": 0.205078125, "learning_rate": 0.00019810301685658935, "loss": 1.8598, "step": 804 }, { "epoch": 0.25164113785557984, "grad_norm": 0.2119140625, "learning_rate": 0.00019809825076513464, "loss": 1.7946, "step": 805 }, { "epoch": 0.251953735542357, "grad_norm": 0.23046875, "learning_rate": 0.00019809347875136352, "loss": 1.784, "step": 806 }, { "epoch": 0.2522663332291341, "grad_norm": 0.1962890625, "learning_rate": 0.00019808870081556413, "loss": 1.9401, "step": 807 }, { "epoch": 0.2525789309159112, "grad_norm": 0.2080078125, "learning_rate": 0.00019808391695802483, "loss": 2.0217, "step": 808 }, { "epoch": 0.2528915286026883, "grad_norm": 0.20703125, "learning_rate": 0.0001980791271790345, "loss": 1.7354, "step": 809 }, { "epoch": 0.2532041262894655, "grad_norm": 0.20703125, "learning_rate": 0.00019807433147888225, "loss": 2.1094, "step": 810 }, { "epoch": 0.2535167239762426, "grad_norm": 0.8125, "learning_rate": 0.00019806952985785764, "loss": 2.8019, "step": 811 }, { "epoch": 0.2538293216630197, "grad_norm": 0.193359375, "learning_rate": 0.00019806472231625056, "loss": 1.554, "step": 812 }, { "epoch": 0.2541419193497968, "grad_norm": 0.1953125, "learning_rate": 0.0001980599088543512, "loss": 1.7158, "step": 813 }, { "epoch": 0.2544545170365739, "grad_norm": 0.2060546875, "learning_rate": 0.00019805508947245021, "loss": 1.934, "step": 814 }, { "epoch": 0.25476711472335106, "grad_norm": 0.2119140625, "learning_rate": 0.0001980502641708385, "loss": 2.0267, "step": 815 }, { "epoch": 0.25507971241012817, "grad_norm": 0.20703125, "learning_rate": 0.0001980454329498074, "loss": 1.6819, "step": 816 }, { "epoch": 0.2553923100969053, "grad_norm": 0.203125, "learning_rate": 0.00019804059580964855, "loss": 1.7279, "step": 817 }, { "epoch": 0.2557049077836824, "grad_norm": 0.2109375, "learning_rate": 0.00019803575275065404, "loss": 1.6234, "step": 818 }, { "epoch": 0.25601750547045954, "grad_norm": 0.205078125, "learning_rate": 0.0001980309037731162, "loss": 1.4631, "step": 819 }, { "epoch": 0.25633010315723664, "grad_norm": 0.2109375, "learning_rate": 0.00019802604887732774, "loss": 1.7769, "step": 820 }, { "epoch": 0.25664270084401375, "grad_norm": 0.2119140625, "learning_rate": 0.00019802118806358182, "loss": 1.7928, "step": 821 }, { "epoch": 0.25695529853079085, "grad_norm": 0.2158203125, "learning_rate": 0.00019801632133217189, "loss": 1.639, "step": 822 }, { "epoch": 0.257267896217568, "grad_norm": 0.212890625, "learning_rate": 0.0001980114486833917, "loss": 1.6918, "step": 823 }, { "epoch": 0.2575804939043451, "grad_norm": 0.2138671875, "learning_rate": 0.00019800657011753548, "loss": 1.8273, "step": 824 }, { "epoch": 0.2578930915911222, "grad_norm": 0.2041015625, "learning_rate": 0.0001980016856348977, "loss": 1.9625, "step": 825 }, { "epoch": 0.25820568927789933, "grad_norm": 0.201171875, "learning_rate": 0.00019799679523577332, "loss": 1.741, "step": 826 }, { "epoch": 0.25851828696467644, "grad_norm": 0.19921875, "learning_rate": 0.00019799189892045748, "loss": 2.0397, "step": 827 }, { "epoch": 0.2588308846514536, "grad_norm": 0.2001953125, "learning_rate": 0.00019798699668924585, "loss": 1.7246, "step": 828 }, { "epoch": 0.2591434823382307, "grad_norm": 0.203125, "learning_rate": 0.00019798208854243437, "loss": 1.5622, "step": 829 }, { "epoch": 0.2594560800250078, "grad_norm": 0.1953125, "learning_rate": 0.00019797717448031936, "loss": 1.4121, "step": 830 }, { "epoch": 0.2597686777117849, "grad_norm": 0.20703125, "learning_rate": 0.00019797225450319744, "loss": 1.6693, "step": 831 }, { "epoch": 0.2600812753985621, "grad_norm": 0.208984375, "learning_rate": 0.0001979673286113657, "loss": 1.6021, "step": 832 }, { "epoch": 0.2603938730853392, "grad_norm": 0.2119140625, "learning_rate": 0.0001979623968051215, "loss": 1.9199, "step": 833 }, { "epoch": 0.2607064707721163, "grad_norm": 0.2080078125, "learning_rate": 0.00019795745908476254, "loss": 2.0403, "step": 834 }, { "epoch": 0.2610190684588934, "grad_norm": 0.2138671875, "learning_rate": 0.00019795251545058694, "loss": 1.8294, "step": 835 }, { "epoch": 0.26133166614567055, "grad_norm": 0.20703125, "learning_rate": 0.00019794756590289317, "loss": 1.8625, "step": 836 }, { "epoch": 0.26164426383244765, "grad_norm": 0.21484375, "learning_rate": 0.00019794261044198003, "loss": 1.8086, "step": 837 }, { "epoch": 0.26195686151922476, "grad_norm": 0.2119140625, "learning_rate": 0.0001979376490681467, "loss": 1.6601, "step": 838 }, { "epoch": 0.26226945920600186, "grad_norm": 0.2060546875, "learning_rate": 0.00019793268178169268, "loss": 1.5396, "step": 839 }, { "epoch": 0.26258205689277897, "grad_norm": 0.20703125, "learning_rate": 0.00019792770858291788, "loss": 1.7095, "step": 840 }, { "epoch": 0.26289465457955613, "grad_norm": 0.2177734375, "learning_rate": 0.00019792272947212253, "loss": 1.8782, "step": 841 }, { "epoch": 0.26320725226633324, "grad_norm": 0.19921875, "learning_rate": 0.00019791774444960717, "loss": 1.9358, "step": 842 }, { "epoch": 0.26351984995311034, "grad_norm": 0.2001953125, "learning_rate": 0.00019791275351567286, "loss": 1.7342, "step": 843 }, { "epoch": 0.26383244763988745, "grad_norm": 0.2001953125, "learning_rate": 0.00019790775667062081, "loss": 1.7846, "step": 844 }, { "epoch": 0.2641450453266646, "grad_norm": 0.201171875, "learning_rate": 0.0001979027539147527, "loss": 1.845, "step": 845 }, { "epoch": 0.2644576430134417, "grad_norm": 0.2060546875, "learning_rate": 0.0001978977452483706, "loss": 1.9035, "step": 846 }, { "epoch": 0.2647702407002188, "grad_norm": 0.1982421875, "learning_rate": 0.0001978927306717769, "loss": 1.5762, "step": 847 }, { "epoch": 0.2650828383869959, "grad_norm": 0.1953125, "learning_rate": 0.0001978877101852743, "loss": 2.1721, "step": 848 }, { "epoch": 0.26539543607377303, "grad_norm": 0.2080078125, "learning_rate": 0.00019788268378916586, "loss": 1.7108, "step": 849 }, { "epoch": 0.2657080337605502, "grad_norm": 1.0234375, "learning_rate": 0.00019787765148375508, "loss": 2.5699, "step": 850 }, { "epoch": 0.2660206314473273, "grad_norm": 0.203125, "learning_rate": 0.00019787261326934577, "loss": 1.6568, "step": 851 }, { "epoch": 0.2663332291341044, "grad_norm": 0.2001953125, "learning_rate": 0.00019786756914624208, "loss": 1.8594, "step": 852 }, { "epoch": 0.2666458268208815, "grad_norm": 0.2041015625, "learning_rate": 0.00019786251911474849, "loss": 1.8597, "step": 853 }, { "epoch": 0.26695842450765866, "grad_norm": 0.216796875, "learning_rate": 0.00019785746317516994, "loss": 2.0457, "step": 854 }, { "epoch": 0.26727102219443577, "grad_norm": 0.2275390625, "learning_rate": 0.00019785240132781163, "loss": 1.832, "step": 855 }, { "epoch": 0.2675836198812129, "grad_norm": 0.2080078125, "learning_rate": 0.00019784733357297915, "loss": 1.68, "step": 856 }, { "epoch": 0.26789621756799, "grad_norm": 0.2109375, "learning_rate": 0.00019784225991097848, "loss": 1.8997, "step": 857 }, { "epoch": 0.26820881525476714, "grad_norm": 0.2119140625, "learning_rate": 0.00019783718034211586, "loss": 1.7594, "step": 858 }, { "epoch": 0.26852141294154425, "grad_norm": 0.2138671875, "learning_rate": 0.000197832094866698, "loss": 1.7918, "step": 859 }, { "epoch": 0.26883401062832135, "grad_norm": 0.203125, "learning_rate": 0.00019782700348503193, "loss": 1.6616, "step": 860 }, { "epoch": 0.26914660831509846, "grad_norm": 0.201171875, "learning_rate": 0.00019782190619742495, "loss": 1.8357, "step": 861 }, { "epoch": 0.26945920600187556, "grad_norm": 0.2119140625, "learning_rate": 0.0001978168030041849, "loss": 1.74, "step": 862 }, { "epoch": 0.2697718036886527, "grad_norm": 0.203125, "learning_rate": 0.00019781169390561975, "loss": 1.4934, "step": 863 }, { "epoch": 0.27008440137542983, "grad_norm": 0.216796875, "learning_rate": 0.000197806578902038, "loss": 1.6285, "step": 864 }, { "epoch": 0.27039699906220693, "grad_norm": 0.2060546875, "learning_rate": 0.00019780145799374848, "loss": 1.5881, "step": 865 }, { "epoch": 0.27070959674898404, "grad_norm": 0.2080078125, "learning_rate": 0.00019779633118106028, "loss": 1.714, "step": 866 }, { "epoch": 0.2710221944357612, "grad_norm": 0.2119140625, "learning_rate": 0.000197791198464283, "loss": 1.9303, "step": 867 }, { "epoch": 0.2713347921225383, "grad_norm": 0.2060546875, "learning_rate": 0.0001977860598437264, "loss": 1.6095, "step": 868 }, { "epoch": 0.2716473898093154, "grad_norm": 0.2216796875, "learning_rate": 0.00019778091531970072, "loss": 1.7565, "step": 869 }, { "epoch": 0.2719599874960925, "grad_norm": 0.2099609375, "learning_rate": 0.00019777576489251664, "loss": 1.5668, "step": 870 }, { "epoch": 0.2722725851828696, "grad_norm": 0.20703125, "learning_rate": 0.00019777060856248504, "loss": 1.6762, "step": 871 }, { "epoch": 0.2725851828696468, "grad_norm": 0.2158203125, "learning_rate": 0.00019776544632991717, "loss": 1.7808, "step": 872 }, { "epoch": 0.2728977805564239, "grad_norm": 0.203125, "learning_rate": 0.00019776027819512474, "loss": 1.8983, "step": 873 }, { "epoch": 0.273210378243201, "grad_norm": 0.20703125, "learning_rate": 0.00019775510415841977, "loss": 1.837, "step": 874 }, { "epoch": 0.2735229759299781, "grad_norm": 0.20703125, "learning_rate": 0.00019774992422011452, "loss": 1.7363, "step": 875 }, { "epoch": 0.27383557361675526, "grad_norm": 0.2109375, "learning_rate": 0.00019774473838052184, "loss": 1.8509, "step": 876 }, { "epoch": 0.27414817130353236, "grad_norm": 0.23046875, "learning_rate": 0.00019773954663995476, "loss": 1.8239, "step": 877 }, { "epoch": 0.27446076899030947, "grad_norm": 0.205078125, "learning_rate": 0.00019773434899872665, "loss": 2.0633, "step": 878 }, { "epoch": 0.2747733666770866, "grad_norm": 0.21484375, "learning_rate": 0.00019772914545715135, "loss": 2.0269, "step": 879 }, { "epoch": 0.27508596436386373, "grad_norm": 0.2021484375, "learning_rate": 0.00019772393601554303, "loss": 1.7389, "step": 880 }, { "epoch": 0.27539856205064084, "grad_norm": 0.212890625, "learning_rate": 0.00019771872067421615, "loss": 2.0936, "step": 881 }, { "epoch": 0.27571115973741794, "grad_norm": 0.2080078125, "learning_rate": 0.00019771349943348558, "loss": 1.7132, "step": 882 }, { "epoch": 0.27602375742419505, "grad_norm": 0.21484375, "learning_rate": 0.00019770827229366654, "loss": 1.6179, "step": 883 }, { "epoch": 0.27633635511097215, "grad_norm": 0.2021484375, "learning_rate": 0.00019770303925507456, "loss": 1.9907, "step": 884 }, { "epoch": 0.2766489527977493, "grad_norm": 0.2080078125, "learning_rate": 0.0001976978003180256, "loss": 1.5918, "step": 885 }, { "epoch": 0.2769615504845264, "grad_norm": 0.2060546875, "learning_rate": 0.000197692555482836, "loss": 1.6069, "step": 886 }, { "epoch": 0.2772741481713035, "grad_norm": 0.2060546875, "learning_rate": 0.00019768730474982227, "loss": 1.9966, "step": 887 }, { "epoch": 0.27758674585808063, "grad_norm": 0.2236328125, "learning_rate": 0.00019768204811930148, "loss": 1.7923, "step": 888 }, { "epoch": 0.2778993435448578, "grad_norm": 0.2001953125, "learning_rate": 0.00019767678559159098, "loss": 1.6497, "step": 889 }, { "epoch": 0.2782119412316349, "grad_norm": 0.2109375, "learning_rate": 0.00019767151716700845, "loss": 1.9629, "step": 890 }, { "epoch": 0.278524538918412, "grad_norm": 0.20703125, "learning_rate": 0.00019766624284587195, "loss": 1.8348, "step": 891 }, { "epoch": 0.2788371366051891, "grad_norm": 0.2158203125, "learning_rate": 0.00019766096262849994, "loss": 1.8409, "step": 892 }, { "epoch": 0.27914973429196627, "grad_norm": 0.208984375, "learning_rate": 0.00019765567651521115, "loss": 1.7796, "step": 893 }, { "epoch": 0.2794623319787434, "grad_norm": 0.201171875, "learning_rate": 0.00019765038450632476, "loss": 1.9009, "step": 894 }, { "epoch": 0.2797749296655205, "grad_norm": 0.203125, "learning_rate": 0.00019764508660216019, "loss": 1.4491, "step": 895 }, { "epoch": 0.2800875273522976, "grad_norm": 0.2275390625, "learning_rate": 0.0001976397828030373, "loss": 1.5436, "step": 896 }, { "epoch": 0.2804001250390747, "grad_norm": 0.2109375, "learning_rate": 0.0001976344731092763, "loss": 1.8577, "step": 897 }, { "epoch": 0.28071272272585185, "grad_norm": 0.3828125, "learning_rate": 0.0001976291575211978, "loss": 2.6341, "step": 898 }, { "epoch": 0.28102532041262895, "grad_norm": 0.203125, "learning_rate": 0.00019762383603912258, "loss": 1.6624, "step": 899 }, { "epoch": 0.28133791809940606, "grad_norm": 0.205078125, "learning_rate": 0.000197618508663372, "loss": 1.6193, "step": 900 }, { "epoch": 0.28165051578618316, "grad_norm": 0.1982421875, "learning_rate": 0.00019761317539426765, "loss": 1.6416, "step": 901 }, { "epoch": 0.2819631134729603, "grad_norm": 0.2138671875, "learning_rate": 0.00019760783623213153, "loss": 1.5813, "step": 902 }, { "epoch": 0.28227571115973743, "grad_norm": 0.2119140625, "learning_rate": 0.00019760249117728592, "loss": 2.1245, "step": 903 }, { "epoch": 0.28258830884651454, "grad_norm": 0.236328125, "learning_rate": 0.00019759714023005357, "loss": 2.0305, "step": 904 }, { "epoch": 0.28290090653329164, "grad_norm": 0.2060546875, "learning_rate": 0.0001975917833907575, "loss": 1.4689, "step": 905 }, { "epoch": 0.28321350422006875, "grad_norm": 0.1962890625, "learning_rate": 0.00019758642065972112, "loss": 1.9306, "step": 906 }, { "epoch": 0.2835261019068459, "grad_norm": 0.212890625, "learning_rate": 0.0001975810520372681, "loss": 1.8309, "step": 907 }, { "epoch": 0.283838699593623, "grad_norm": 0.216796875, "learning_rate": 0.0001975756775237227, "loss": 1.732, "step": 908 }, { "epoch": 0.2841512972804001, "grad_norm": 0.208984375, "learning_rate": 0.00019757029711940923, "loss": 1.5233, "step": 909 }, { "epoch": 0.2844638949671772, "grad_norm": 0.2099609375, "learning_rate": 0.00019756491082465263, "loss": 1.6491, "step": 910 }, { "epoch": 0.2847764926539544, "grad_norm": 0.201171875, "learning_rate": 0.00019755951863977805, "loss": 2.2236, "step": 911 }, { "epoch": 0.2850890903407315, "grad_norm": 0.2060546875, "learning_rate": 0.00019755412056511097, "loss": 1.8299, "step": 912 }, { "epoch": 0.2854016880275086, "grad_norm": 0.2119140625, "learning_rate": 0.00019754871660097734, "loss": 1.5403, "step": 913 }, { "epoch": 0.2857142857142857, "grad_norm": 0.2001953125, "learning_rate": 0.00019754330674770339, "loss": 1.5712, "step": 914 }, { "epoch": 0.28602688340106286, "grad_norm": 0.2080078125, "learning_rate": 0.00019753789100561569, "loss": 1.8814, "step": 915 }, { "epoch": 0.28633948108783996, "grad_norm": 0.2080078125, "learning_rate": 0.0001975324693750412, "loss": 1.7153, "step": 916 }, { "epoch": 0.28665207877461707, "grad_norm": 0.2060546875, "learning_rate": 0.0001975270418563073, "loss": 2.0221, "step": 917 }, { "epoch": 0.2869646764613942, "grad_norm": 0.2109375, "learning_rate": 0.00019752160844974158, "loss": 1.7176, "step": 918 }, { "epoch": 0.2872772741481713, "grad_norm": 0.208984375, "learning_rate": 0.0001975161691556721, "loss": 1.8581, "step": 919 }, { "epoch": 0.28758987183494844, "grad_norm": 0.2021484375, "learning_rate": 0.00019751072397442715, "loss": 1.8127, "step": 920 }, { "epoch": 0.28790246952172555, "grad_norm": 0.2041015625, "learning_rate": 0.0001975052729063356, "loss": 1.5183, "step": 921 }, { "epoch": 0.28821506720850265, "grad_norm": 0.2060546875, "learning_rate": 0.00019749981595172647, "loss": 1.7887, "step": 922 }, { "epoch": 0.28852766489527976, "grad_norm": 0.2001953125, "learning_rate": 0.00019749435311092919, "loss": 1.7053, "step": 923 }, { "epoch": 0.2888402625820569, "grad_norm": 0.212890625, "learning_rate": 0.00019748888438427358, "loss": 1.7008, "step": 924 }, { "epoch": 0.289152860268834, "grad_norm": 0.2099609375, "learning_rate": 0.00019748340977208977, "loss": 1.888, "step": 925 }, { "epoch": 0.2894654579556111, "grad_norm": 0.2001953125, "learning_rate": 0.0001974779292747083, "loss": 1.708, "step": 926 }, { "epoch": 0.28977805564238823, "grad_norm": 0.21484375, "learning_rate": 0.00019747244289246006, "loss": 1.8244, "step": 927 }, { "epoch": 0.29009065332916534, "grad_norm": 0.1982421875, "learning_rate": 0.0001974669506256762, "loss": 1.5614, "step": 928 }, { "epoch": 0.2904032510159425, "grad_norm": 0.38671875, "learning_rate": 0.00019746145247468832, "loss": 2.2925, "step": 929 }, { "epoch": 0.2907158487027196, "grad_norm": 0.2001953125, "learning_rate": 0.00019745594843982836, "loss": 1.7933, "step": 930 }, { "epoch": 0.2910284463894967, "grad_norm": 0.2041015625, "learning_rate": 0.0001974504385214286, "loss": 1.8521, "step": 931 }, { "epoch": 0.2913410440762738, "grad_norm": 0.2119140625, "learning_rate": 0.00019744492271982168, "loss": 1.6939, "step": 932 }, { "epoch": 0.291653641763051, "grad_norm": 0.2158203125, "learning_rate": 0.00019743940103534062, "loss": 1.6783, "step": 933 }, { "epoch": 0.2919662394498281, "grad_norm": 0.203125, "learning_rate": 0.00019743387346831876, "loss": 2.0204, "step": 934 }, { "epoch": 0.2922788371366052, "grad_norm": 0.197265625, "learning_rate": 0.00019742834001908977, "loss": 1.7812, "step": 935 }, { "epoch": 0.2925914348233823, "grad_norm": 0.2041015625, "learning_rate": 0.00019742280068798775, "loss": 1.7483, "step": 936 }, { "epoch": 0.29290403251015945, "grad_norm": 0.21484375, "learning_rate": 0.00019741725547534712, "loss": 1.8223, "step": 937 }, { "epoch": 0.29321663019693656, "grad_norm": 0.20703125, "learning_rate": 0.0001974117043815026, "loss": 1.8306, "step": 938 }, { "epoch": 0.29352922788371366, "grad_norm": 0.2080078125, "learning_rate": 0.00019740614740678937, "loss": 1.9111, "step": 939 }, { "epoch": 0.29384182557049077, "grad_norm": 0.224609375, "learning_rate": 0.0001974005845515429, "loss": 1.7384, "step": 940 }, { "epoch": 0.29415442325726787, "grad_norm": 0.2197265625, "learning_rate": 0.00019739501581609903, "loss": 1.7809, "step": 941 }, { "epoch": 0.29446702094404503, "grad_norm": 0.212890625, "learning_rate": 0.00019738944120079393, "loss": 1.8266, "step": 942 }, { "epoch": 0.29477961863082214, "grad_norm": 0.203125, "learning_rate": 0.0001973838607059642, "loss": 2.0459, "step": 943 }, { "epoch": 0.29509221631759924, "grad_norm": 0.2197265625, "learning_rate": 0.00019737827433194665, "loss": 1.7519, "step": 944 }, { "epoch": 0.29540481400437635, "grad_norm": 0.2080078125, "learning_rate": 0.0001973726820790786, "loss": 1.6264, "step": 945 }, { "epoch": 0.2957174116911535, "grad_norm": 0.2041015625, "learning_rate": 0.00019736708394769764, "loss": 1.6892, "step": 946 }, { "epoch": 0.2960300093779306, "grad_norm": 0.2158203125, "learning_rate": 0.00019736147993814176, "loss": 1.9491, "step": 947 }, { "epoch": 0.2963426070647077, "grad_norm": 0.1953125, "learning_rate": 0.00019735587005074927, "loss": 1.7754, "step": 948 }, { "epoch": 0.2966552047514848, "grad_norm": 0.2236328125, "learning_rate": 0.00019735025428585886, "loss": 1.9126, "step": 949 }, { "epoch": 0.29696780243826193, "grad_norm": 0.2080078125, "learning_rate": 0.00019734463264380953, "loss": 2.071, "step": 950 }, { "epoch": 0.2972804001250391, "grad_norm": 0.2041015625, "learning_rate": 0.0001973390051249407, "loss": 1.6336, "step": 951 }, { "epoch": 0.2975929978118162, "grad_norm": 0.2080078125, "learning_rate": 0.00019733337172959204, "loss": 1.4598, "step": 952 }, { "epoch": 0.2979055954985933, "grad_norm": 0.2041015625, "learning_rate": 0.0001973277324581037, "loss": 1.5984, "step": 953 }, { "epoch": 0.2982181931853704, "grad_norm": 0.21875, "learning_rate": 0.00019732208731081615, "loss": 1.9082, "step": 954 }, { "epoch": 0.29853079087214757, "grad_norm": 0.2099609375, "learning_rate": 0.00019731643628807018, "loss": 1.6075, "step": 955 }, { "epoch": 0.29884338855892467, "grad_norm": 0.2021484375, "learning_rate": 0.00019731077939020693, "loss": 1.9933, "step": 956 }, { "epoch": 0.2991559862457018, "grad_norm": 0.20703125, "learning_rate": 0.00019730511661756792, "loss": 1.5719, "step": 957 }, { "epoch": 0.2994685839324789, "grad_norm": 0.20703125, "learning_rate": 0.00019729944797049502, "loss": 1.6318, "step": 958 }, { "epoch": 0.29978118161925604, "grad_norm": 0.1982421875, "learning_rate": 0.00019729377344933043, "loss": 1.8574, "step": 959 }, { "epoch": 0.30009377930603315, "grad_norm": 0.208984375, "learning_rate": 0.0001972880930544168, "loss": 1.9144, "step": 960 }, { "epoch": 0.30040637699281025, "grad_norm": 0.2001953125, "learning_rate": 0.000197282406786097, "loss": 1.7335, "step": 961 }, { "epoch": 0.30071897467958736, "grad_norm": 0.203125, "learning_rate": 0.00019727671464471436, "loss": 1.7289, "step": 962 }, { "epoch": 0.30103157236636446, "grad_norm": 0.2119140625, "learning_rate": 0.00019727101663061247, "loss": 1.994, "step": 963 }, { "epoch": 0.3013441700531416, "grad_norm": 0.205078125, "learning_rate": 0.00019726531274413532, "loss": 1.7233, "step": 964 }, { "epoch": 0.30165676773991873, "grad_norm": 0.2099609375, "learning_rate": 0.00019725960298562733, "loss": 1.8961, "step": 965 }, { "epoch": 0.30196936542669583, "grad_norm": 0.21484375, "learning_rate": 0.00019725388735543318, "loss": 1.6978, "step": 966 }, { "epoch": 0.30228196311347294, "grad_norm": 0.2119140625, "learning_rate": 0.0001972481658538979, "loss": 1.752, "step": 967 }, { "epoch": 0.3025945608002501, "grad_norm": 0.205078125, "learning_rate": 0.00019724243848136692, "loss": 2.0531, "step": 968 }, { "epoch": 0.3029071584870272, "grad_norm": 0.208984375, "learning_rate": 0.000197236705238186, "loss": 1.7117, "step": 969 }, { "epoch": 0.3032197561738043, "grad_norm": 0.20703125, "learning_rate": 0.00019723096612470133, "loss": 1.5911, "step": 970 }, { "epoch": 0.3035323538605814, "grad_norm": 0.20703125, "learning_rate": 0.00019722522114125929, "loss": 1.8811, "step": 971 }, { "epoch": 0.3038449515473586, "grad_norm": 0.22265625, "learning_rate": 0.00019721947028820676, "loss": 1.6444, "step": 972 }, { "epoch": 0.3041575492341357, "grad_norm": 0.2080078125, "learning_rate": 0.0001972137135658909, "loss": 1.5187, "step": 973 }, { "epoch": 0.3044701469209128, "grad_norm": 0.2138671875, "learning_rate": 0.0001972079509746593, "loss": 1.6957, "step": 974 }, { "epoch": 0.3047827446076899, "grad_norm": 0.2099609375, "learning_rate": 0.00019720218251485983, "loss": 1.5887, "step": 975 }, { "epoch": 0.305095342294467, "grad_norm": 0.216796875, "learning_rate": 0.0001971964081868407, "loss": 1.7837, "step": 976 }, { "epoch": 0.30540793998124416, "grad_norm": 0.2099609375, "learning_rate": 0.0001971906279909506, "loss": 1.8848, "step": 977 }, { "epoch": 0.30572053766802126, "grad_norm": 0.224609375, "learning_rate": 0.0001971848419275384, "loss": 1.8966, "step": 978 }, { "epoch": 0.30603313535479837, "grad_norm": 0.1982421875, "learning_rate": 0.00019717904999695348, "loss": 1.6581, "step": 979 }, { "epoch": 0.3063457330415755, "grad_norm": 0.2021484375, "learning_rate": 0.00019717325219954543, "loss": 1.6071, "step": 980 }, { "epoch": 0.30665833072835263, "grad_norm": 0.2060546875, "learning_rate": 0.00019716744853566436, "loss": 1.8169, "step": 981 }, { "epoch": 0.30697092841512974, "grad_norm": 0.197265625, "learning_rate": 0.0001971616390056606, "loss": 1.6017, "step": 982 }, { "epoch": 0.30728352610190685, "grad_norm": 0.1982421875, "learning_rate": 0.00019715582360988482, "loss": 1.6999, "step": 983 }, { "epoch": 0.30759612378868395, "grad_norm": 0.2099609375, "learning_rate": 0.00019715000234868821, "loss": 1.7758, "step": 984 }, { "epoch": 0.30790872147546106, "grad_norm": 0.2080078125, "learning_rate": 0.00019714417522242214, "loss": 1.9776, "step": 985 }, { "epoch": 0.3082213191622382, "grad_norm": 0.2265625, "learning_rate": 0.00019713834223143844, "loss": 1.7776, "step": 986 }, { "epoch": 0.3085339168490153, "grad_norm": 0.2197265625, "learning_rate": 0.00019713250337608922, "loss": 1.8847, "step": 987 }, { "epoch": 0.3088465145357924, "grad_norm": 0.2109375, "learning_rate": 0.000197126658656727, "loss": 1.8091, "step": 988 }, { "epoch": 0.30915911222256953, "grad_norm": 0.212890625, "learning_rate": 0.00019712080807370464, "loss": 1.804, "step": 989 }, { "epoch": 0.3094717099093467, "grad_norm": 0.22265625, "learning_rate": 0.00019711495162737529, "loss": 1.782, "step": 990 }, { "epoch": 0.3097843075961238, "grad_norm": 0.201171875, "learning_rate": 0.0001971090893180926, "loss": 1.5211, "step": 991 }, { "epoch": 0.3100969052829009, "grad_norm": 0.2001953125, "learning_rate": 0.0001971032211462104, "loss": 1.4168, "step": 992 }, { "epoch": 0.310409502969678, "grad_norm": 0.212890625, "learning_rate": 0.00019709734711208303, "loss": 1.5656, "step": 993 }, { "epoch": 0.31072210065645517, "grad_norm": 0.2080078125, "learning_rate": 0.00019709146721606509, "loss": 1.818, "step": 994 }, { "epoch": 0.3110346983432323, "grad_norm": 0.205078125, "learning_rate": 0.00019708558145851152, "loss": 1.7158, "step": 995 }, { "epoch": 0.3113472960300094, "grad_norm": 0.2197265625, "learning_rate": 0.0001970796898397777, "loss": 1.6944, "step": 996 }, { "epoch": 0.3116598937167865, "grad_norm": 0.2109375, "learning_rate": 0.0001970737923602193, "loss": 1.7961, "step": 997 }, { "epoch": 0.3119724914035636, "grad_norm": 0.201171875, "learning_rate": 0.00019706788902019233, "loss": 1.8871, "step": 998 }, { "epoch": 0.31228508909034075, "grad_norm": 0.205078125, "learning_rate": 0.00019706197982005322, "loss": 1.8513, "step": 999 }, { "epoch": 0.31259768677711786, "grad_norm": 0.2138671875, "learning_rate": 0.0001970560647601587, "loss": 1.6529, "step": 1000 }, { "epoch": 0.31291028446389496, "grad_norm": 0.21875, "learning_rate": 0.0001970501438408659, "loss": 1.7564, "step": 1001 }, { "epoch": 0.31322288215067207, "grad_norm": 0.2041015625, "learning_rate": 0.0001970442170625322, "loss": 1.5718, "step": 1002 }, { "epoch": 0.3135354798374492, "grad_norm": 0.201171875, "learning_rate": 0.00019703828442551547, "loss": 1.9791, "step": 1003 }, { "epoch": 0.31384807752422633, "grad_norm": 0.2158203125, "learning_rate": 0.00019703234593017386, "loss": 1.5583, "step": 1004 }, { "epoch": 0.31416067521100344, "grad_norm": 0.19921875, "learning_rate": 0.00019702640157686586, "loss": 1.8005, "step": 1005 }, { "epoch": 0.31447327289778054, "grad_norm": 0.216796875, "learning_rate": 0.00019702045136595032, "loss": 2.0622, "step": 1006 }, { "epoch": 0.31478587058455765, "grad_norm": 0.19921875, "learning_rate": 0.00019701449529778656, "loss": 1.6313, "step": 1007 }, { "epoch": 0.3150984682713348, "grad_norm": 0.2138671875, "learning_rate": 0.00019700853337273406, "loss": 1.7088, "step": 1008 }, { "epoch": 0.3154110659581119, "grad_norm": 0.2041015625, "learning_rate": 0.0001970025655911528, "loss": 1.7942, "step": 1009 }, { "epoch": 0.315723663644889, "grad_norm": 0.2041015625, "learning_rate": 0.00019699659195340303, "loss": 1.8139, "step": 1010 }, { "epoch": 0.3160362613316661, "grad_norm": 0.212890625, "learning_rate": 0.0001969906124598454, "loss": 1.6704, "step": 1011 }, { "epoch": 0.3163488590184433, "grad_norm": 0.2109375, "learning_rate": 0.00019698462711084091, "loss": 1.9731, "step": 1012 }, { "epoch": 0.3166614567052204, "grad_norm": 0.2109375, "learning_rate": 0.00019697863590675086, "loss": 1.6923, "step": 1013 }, { "epoch": 0.3169740543919975, "grad_norm": 0.21484375, "learning_rate": 0.00019697263884793702, "loss": 1.8974, "step": 1014 }, { "epoch": 0.3172866520787746, "grad_norm": 0.212890625, "learning_rate": 0.0001969666359347614, "loss": 2.0298, "step": 1015 }, { "epoch": 0.31759924976555176, "grad_norm": 0.2001953125, "learning_rate": 0.00019696062716758638, "loss": 1.6155, "step": 1016 }, { "epoch": 0.31791184745232887, "grad_norm": 0.212890625, "learning_rate": 0.00019695461254677475, "loss": 1.6622, "step": 1017 }, { "epoch": 0.31822444513910597, "grad_norm": 0.201171875, "learning_rate": 0.00019694859207268958, "loss": 2.0245, "step": 1018 }, { "epoch": 0.3185370428258831, "grad_norm": 0.205078125, "learning_rate": 0.0001969425657456944, "loss": 1.7654, "step": 1019 }, { "epoch": 0.3188496405126602, "grad_norm": 0.203125, "learning_rate": 0.00019693653356615297, "loss": 1.6629, "step": 1020 }, { "epoch": 0.31916223819943734, "grad_norm": 0.2099609375, "learning_rate": 0.00019693049553442952, "loss": 1.7823, "step": 1021 }, { "epoch": 0.31947483588621445, "grad_norm": 0.2099609375, "learning_rate": 0.0001969244516508885, "loss": 1.5993, "step": 1022 }, { "epoch": 0.31978743357299155, "grad_norm": 0.2109375, "learning_rate": 0.0001969184019158948, "loss": 1.7385, "step": 1023 }, { "epoch": 0.32010003125976866, "grad_norm": 0.220703125, "learning_rate": 0.00019691234632981372, "loss": 2.0781, "step": 1024 }, { "epoch": 0.3204126289465458, "grad_norm": 0.2080078125, "learning_rate": 0.00019690628489301077, "loss": 1.6396, "step": 1025 }, { "epoch": 0.3207252266333229, "grad_norm": 0.2099609375, "learning_rate": 0.00019690021760585192, "loss": 1.7066, "step": 1026 }, { "epoch": 0.32103782432010003, "grad_norm": 0.216796875, "learning_rate": 0.00019689414446870344, "loss": 1.6741, "step": 1027 }, { "epoch": 0.32135042200687713, "grad_norm": 0.2109375, "learning_rate": 0.000196888065481932, "loss": 1.8628, "step": 1028 }, { "epoch": 0.32166301969365424, "grad_norm": 0.234375, "learning_rate": 0.00019688198064590458, "loss": 1.8129, "step": 1029 }, { "epoch": 0.3219756173804314, "grad_norm": 0.203125, "learning_rate": 0.00019687588996098853, "loss": 1.9068, "step": 1030 }, { "epoch": 0.3222882150672085, "grad_norm": 0.2109375, "learning_rate": 0.00019686979342755154, "loss": 1.8664, "step": 1031 }, { "epoch": 0.3226008127539856, "grad_norm": 0.2236328125, "learning_rate": 0.0001968636910459617, "loss": 1.7239, "step": 1032 }, { "epoch": 0.3229134104407627, "grad_norm": 0.201171875, "learning_rate": 0.00019685758281658738, "loss": 1.9294, "step": 1033 }, { "epoch": 0.3232260081275399, "grad_norm": 0.20703125, "learning_rate": 0.00019685146873979736, "loss": 1.7469, "step": 1034 }, { "epoch": 0.323538605814317, "grad_norm": 0.208984375, "learning_rate": 0.00019684534881596078, "loss": 1.8425, "step": 1035 }, { "epoch": 0.3238512035010941, "grad_norm": 0.208984375, "learning_rate": 0.00019683922304544705, "loss": 1.5658, "step": 1036 }, { "epoch": 0.3241638011878712, "grad_norm": 0.20703125, "learning_rate": 0.000196833091428626, "loss": 1.7025, "step": 1037 }, { "epoch": 0.32447639887464835, "grad_norm": 0.20703125, "learning_rate": 0.00019682695396586785, "loss": 1.7166, "step": 1038 }, { "epoch": 0.32478899656142546, "grad_norm": 0.220703125, "learning_rate": 0.00019682081065754313, "loss": 1.8159, "step": 1039 }, { "epoch": 0.32510159424820256, "grad_norm": 0.2080078125, "learning_rate": 0.00019681466150402266, "loss": 1.7957, "step": 1040 }, { "epoch": 0.32541419193497967, "grad_norm": 0.2373046875, "learning_rate": 0.0001968085065056777, "loss": 1.6375, "step": 1041 }, { "epoch": 0.3257267896217568, "grad_norm": 0.21484375, "learning_rate": 0.00019680234566287985, "loss": 2.1855, "step": 1042 }, { "epoch": 0.32603938730853393, "grad_norm": 0.20703125, "learning_rate": 0.00019679617897600102, "loss": 1.8348, "step": 1043 }, { "epoch": 0.32635198499531104, "grad_norm": 0.2021484375, "learning_rate": 0.00019679000644541356, "loss": 1.6444, "step": 1044 }, { "epoch": 0.32666458268208814, "grad_norm": 0.205078125, "learning_rate": 0.00019678382807149003, "loss": 1.8918, "step": 1045 }, { "epoch": 0.32697718036886525, "grad_norm": 0.2080078125, "learning_rate": 0.00019677764385460348, "loss": 1.6544, "step": 1046 }, { "epoch": 0.3272897780556424, "grad_norm": 0.2099609375, "learning_rate": 0.00019677145379512723, "loss": 1.8734, "step": 1047 }, { "epoch": 0.3276023757424195, "grad_norm": 0.208984375, "learning_rate": 0.00019676525789343502, "loss": 1.8792, "step": 1048 }, { "epoch": 0.3279149734291966, "grad_norm": 0.1982421875, "learning_rate": 0.00019675905614990085, "loss": 1.8914, "step": 1049 }, { "epoch": 0.3282275711159737, "grad_norm": 0.20703125, "learning_rate": 0.0001967528485648992, "loss": 1.6186, "step": 1050 }, { "epoch": 0.3285401688027509, "grad_norm": 0.2236328125, "learning_rate": 0.00019674663513880475, "loss": 1.7937, "step": 1051 }, { "epoch": 0.328852766489528, "grad_norm": 0.203125, "learning_rate": 0.00019674041587199268, "loss": 1.7155, "step": 1052 }, { "epoch": 0.3291653641763051, "grad_norm": 0.2099609375, "learning_rate": 0.0001967341907648384, "loss": 1.8787, "step": 1053 }, { "epoch": 0.3294779618630822, "grad_norm": 0.220703125, "learning_rate": 0.00019672795981771777, "loss": 1.6195, "step": 1054 }, { "epoch": 0.3297905595498593, "grad_norm": 0.203125, "learning_rate": 0.00019672172303100696, "loss": 1.9987, "step": 1055 }, { "epoch": 0.33010315723663647, "grad_norm": 0.2099609375, "learning_rate": 0.00019671548040508244, "loss": 1.6107, "step": 1056 }, { "epoch": 0.3304157549234136, "grad_norm": 0.20703125, "learning_rate": 0.00019670923194032116, "loss": 1.6394, "step": 1057 }, { "epoch": 0.3307283526101907, "grad_norm": 0.19921875, "learning_rate": 0.00019670297763710028, "loss": 1.7142, "step": 1058 }, { "epoch": 0.3310409502969678, "grad_norm": 0.205078125, "learning_rate": 0.00019669671749579742, "loss": 1.8344, "step": 1059 }, { "epoch": 0.33135354798374494, "grad_norm": 0.212890625, "learning_rate": 0.0001966904515167905, "loss": 1.933, "step": 1060 }, { "epoch": 0.33166614567052205, "grad_norm": 0.2041015625, "learning_rate": 0.0001966841797004578, "loss": 1.763, "step": 1061 }, { "epoch": 0.33197874335729916, "grad_norm": 0.2041015625, "learning_rate": 0.000196677902047178, "loss": 1.8741, "step": 1062 }, { "epoch": 0.33229134104407626, "grad_norm": 0.2099609375, "learning_rate": 0.00019667161855733002, "loss": 1.8624, "step": 1063 }, { "epoch": 0.33260393873085337, "grad_norm": 0.216796875, "learning_rate": 0.00019666532923129327, "loss": 1.899, "step": 1064 }, { "epoch": 0.3329165364176305, "grad_norm": 0.30078125, "learning_rate": 0.00019665903406944737, "loss": 2.3084, "step": 1065 }, { "epoch": 0.33322913410440763, "grad_norm": 0.197265625, "learning_rate": 0.00019665273307217245, "loss": 1.6737, "step": 1066 }, { "epoch": 0.33354173179118474, "grad_norm": 0.216796875, "learning_rate": 0.00019664642623984886, "loss": 1.6899, "step": 1067 }, { "epoch": 0.33385432947796184, "grad_norm": 0.2060546875, "learning_rate": 0.00019664011357285735, "loss": 1.8702, "step": 1068 }, { "epoch": 0.334166927164739, "grad_norm": 0.2060546875, "learning_rate": 0.00019663379507157903, "loss": 2.0766, "step": 1069 }, { "epoch": 0.3344795248515161, "grad_norm": 0.1953125, "learning_rate": 0.00019662747073639537, "loss": 1.9336, "step": 1070 }, { "epoch": 0.3347921225382932, "grad_norm": 0.2041015625, "learning_rate": 0.00019662114056768815, "loss": 1.8872, "step": 1071 }, { "epoch": 0.3351047202250703, "grad_norm": 0.2216796875, "learning_rate": 0.00019661480456583958, "loss": 1.7719, "step": 1072 }, { "epoch": 0.3354173179118475, "grad_norm": 0.220703125, "learning_rate": 0.00019660846273123213, "loss": 1.695, "step": 1073 }, { "epoch": 0.3357299155986246, "grad_norm": 0.208984375, "learning_rate": 0.00019660211506424867, "loss": 1.8269, "step": 1074 }, { "epoch": 0.3360425132854017, "grad_norm": 0.2119140625, "learning_rate": 0.0001965957615652724, "loss": 1.8746, "step": 1075 }, { "epoch": 0.3363551109721788, "grad_norm": 0.2216796875, "learning_rate": 0.00019658940223468693, "loss": 1.5041, "step": 1076 }, { "epoch": 0.3366677086589559, "grad_norm": 0.22265625, "learning_rate": 0.00019658303707287617, "loss": 1.8079, "step": 1077 }, { "epoch": 0.33698030634573306, "grad_norm": 0.2060546875, "learning_rate": 0.00019657666608022438, "loss": 1.7644, "step": 1078 }, { "epoch": 0.33729290403251017, "grad_norm": 0.212890625, "learning_rate": 0.00019657028925711617, "loss": 1.759, "step": 1079 }, { "epoch": 0.33760550171928727, "grad_norm": 0.220703125, "learning_rate": 0.00019656390660393659, "loss": 1.9192, "step": 1080 }, { "epoch": 0.3379180994060644, "grad_norm": 0.2099609375, "learning_rate": 0.00019655751812107085, "loss": 1.9153, "step": 1081 }, { "epoch": 0.33823069709284154, "grad_norm": 0.2158203125, "learning_rate": 0.00019655112380890475, "loss": 1.688, "step": 1082 }, { "epoch": 0.33854329477961864, "grad_norm": 0.2197265625, "learning_rate": 0.00019654472366782425, "loss": 1.907, "step": 1083 }, { "epoch": 0.33885589246639575, "grad_norm": 0.212890625, "learning_rate": 0.00019653831769821575, "loss": 1.9453, "step": 1084 }, { "epoch": 0.33916849015317285, "grad_norm": 0.2099609375, "learning_rate": 0.000196531905900466, "loss": 1.6311, "step": 1085 }, { "epoch": 0.33948108783994996, "grad_norm": 0.2119140625, "learning_rate": 0.00019652548827496207, "loss": 1.9493, "step": 1086 }, { "epoch": 0.3397936855267271, "grad_norm": 0.208984375, "learning_rate": 0.0001965190648220914, "loss": 1.8175, "step": 1087 }, { "epoch": 0.3401062832135042, "grad_norm": 0.19921875, "learning_rate": 0.0001965126355422418, "loss": 1.8018, "step": 1088 }, { "epoch": 0.34041888090028133, "grad_norm": 0.2099609375, "learning_rate": 0.0001965062004358014, "loss": 1.6674, "step": 1089 }, { "epoch": 0.34073147858705843, "grad_norm": 0.2158203125, "learning_rate": 0.0001964997595031587, "loss": 1.9538, "step": 1090 }, { "epoch": 0.3410440762738356, "grad_norm": 0.203125, "learning_rate": 0.00019649331274470256, "loss": 1.8417, "step": 1091 }, { "epoch": 0.3413566739606127, "grad_norm": 0.21484375, "learning_rate": 0.00019648686016082216, "loss": 2.0019, "step": 1092 }, { "epoch": 0.3416692716473898, "grad_norm": 0.21484375, "learning_rate": 0.00019648040175190707, "loss": 1.7955, "step": 1093 }, { "epoch": 0.3419818693341669, "grad_norm": 0.22265625, "learning_rate": 0.00019647393751834718, "loss": 1.6747, "step": 1094 }, { "epoch": 0.34229446702094407, "grad_norm": 0.2177734375, "learning_rate": 0.00019646746746053274, "loss": 1.7818, "step": 1095 }, { "epoch": 0.3426070647077212, "grad_norm": 0.20703125, "learning_rate": 0.00019646099157885437, "loss": 1.7983, "step": 1096 }, { "epoch": 0.3429196623944983, "grad_norm": 0.2265625, "learning_rate": 0.00019645450987370298, "loss": 1.677, "step": 1097 }, { "epoch": 0.3432322600812754, "grad_norm": 0.2119140625, "learning_rate": 0.00019644802234546993, "loss": 1.9241, "step": 1098 }, { "epoch": 0.3435448577680525, "grad_norm": 0.212890625, "learning_rate": 0.0001964415289945469, "loss": 1.9008, "step": 1099 }, { "epoch": 0.34385745545482965, "grad_norm": 0.2021484375, "learning_rate": 0.00019643502982132581, "loss": 1.6438, "step": 1100 }, { "epoch": 0.34417005314160676, "grad_norm": 0.2177734375, "learning_rate": 0.0001964285248261991, "loss": 1.7665, "step": 1101 }, { "epoch": 0.34448265082838386, "grad_norm": 0.2109375, "learning_rate": 0.0001964220140095595, "loss": 1.7259, "step": 1102 }, { "epoch": 0.34479524851516097, "grad_norm": 0.2099609375, "learning_rate": 0.00019641549737180002, "loss": 1.7119, "step": 1103 }, { "epoch": 0.34510784620193813, "grad_norm": 0.2314453125, "learning_rate": 0.00019640897491331408, "loss": 1.6551, "step": 1104 }, { "epoch": 0.34542044388871523, "grad_norm": 0.2138671875, "learning_rate": 0.0001964024466344955, "loss": 1.9882, "step": 1105 }, { "epoch": 0.34573304157549234, "grad_norm": 0.2275390625, "learning_rate": 0.00019639591253573836, "loss": 1.7573, "step": 1106 }, { "epoch": 0.34604563926226944, "grad_norm": 0.208984375, "learning_rate": 0.00019638937261743714, "loss": 1.6814, "step": 1107 }, { "epoch": 0.34635823694904655, "grad_norm": 0.2060546875, "learning_rate": 0.00019638282687998667, "loss": 1.943, "step": 1108 }, { "epoch": 0.3466708346358237, "grad_norm": 0.220703125, "learning_rate": 0.00019637627532378212, "loss": 1.6896, "step": 1109 }, { "epoch": 0.3469834323226008, "grad_norm": 0.2197265625, "learning_rate": 0.000196369717949219, "loss": 1.8984, "step": 1110 }, { "epoch": 0.3472960300093779, "grad_norm": 0.201171875, "learning_rate": 0.00019636315475669324, "loss": 1.4845, "step": 1111 }, { "epoch": 0.347608627696155, "grad_norm": 0.2080078125, "learning_rate": 0.00019635658574660098, "loss": 1.7234, "step": 1112 }, { "epoch": 0.3479212253829322, "grad_norm": 0.201171875, "learning_rate": 0.0001963500109193389, "loss": 1.5583, "step": 1113 }, { "epoch": 0.3482338230697093, "grad_norm": 0.220703125, "learning_rate": 0.00019634343027530383, "loss": 1.8789, "step": 1114 }, { "epoch": 0.3485464207564864, "grad_norm": 0.21484375, "learning_rate": 0.00019633684381489315, "loss": 2.0262, "step": 1115 }, { "epoch": 0.3488590184432635, "grad_norm": 0.2080078125, "learning_rate": 0.00019633025153850442, "loss": 1.7877, "step": 1116 }, { "epoch": 0.34917161613004066, "grad_norm": 0.216796875, "learning_rate": 0.00019632365344653563, "loss": 1.7381, "step": 1117 }, { "epoch": 0.34948421381681777, "grad_norm": 0.2138671875, "learning_rate": 0.00019631704953938518, "loss": 1.7758, "step": 1118 }, { "epoch": 0.3497968115035949, "grad_norm": 0.212890625, "learning_rate": 0.0001963104398174517, "loss": 1.8063, "step": 1119 }, { "epoch": 0.350109409190372, "grad_norm": 0.2041015625, "learning_rate": 0.00019630382428113417, "loss": 1.8691, "step": 1120 }, { "epoch": 0.3504220068771491, "grad_norm": 0.203125, "learning_rate": 0.00019629720293083214, "loss": 1.7844, "step": 1121 }, { "epoch": 0.35073460456392624, "grad_norm": 0.2001953125, "learning_rate": 0.00019629057576694522, "loss": 1.6097, "step": 1122 }, { "epoch": 0.35104720225070335, "grad_norm": 0.21875, "learning_rate": 0.00019628394278987355, "loss": 1.9393, "step": 1123 }, { "epoch": 0.35135979993748045, "grad_norm": 0.2060546875, "learning_rate": 0.0001962773040000175, "loss": 1.7556, "step": 1124 }, { "epoch": 0.35167239762425756, "grad_norm": 0.220703125, "learning_rate": 0.000196270659397778, "loss": 1.7145, "step": 1125 }, { "epoch": 0.3519849953110347, "grad_norm": 0.220703125, "learning_rate": 0.0001962640089835561, "loss": 1.6505, "step": 1126 }, { "epoch": 0.3522975929978118, "grad_norm": 0.2197265625, "learning_rate": 0.00019625735275775327, "loss": 1.6953, "step": 1127 }, { "epoch": 0.35261019068458893, "grad_norm": 0.224609375, "learning_rate": 0.00019625069072077138, "loss": 1.7897, "step": 1128 }, { "epoch": 0.35292278837136604, "grad_norm": 0.2109375, "learning_rate": 0.0001962440228730127, "loss": 1.8916, "step": 1129 }, { "epoch": 0.3532353860581432, "grad_norm": 0.2138671875, "learning_rate": 0.00019623734921487965, "loss": 1.5444, "step": 1130 }, { "epoch": 0.3535479837449203, "grad_norm": 0.2138671875, "learning_rate": 0.00019623066974677525, "loss": 1.6391, "step": 1131 }, { "epoch": 0.3538605814316974, "grad_norm": 0.2138671875, "learning_rate": 0.00019622398446910263, "loss": 1.6171, "step": 1132 }, { "epoch": 0.3541731791184745, "grad_norm": 0.216796875, "learning_rate": 0.0001962172933822655, "loss": 1.6352, "step": 1133 }, { "epoch": 0.3544857768052516, "grad_norm": 0.220703125, "learning_rate": 0.00019621059648666772, "loss": 1.8147, "step": 1134 }, { "epoch": 0.3547983744920288, "grad_norm": 0.2099609375, "learning_rate": 0.00019620389378271366, "loss": 1.7773, "step": 1135 }, { "epoch": 0.3551109721788059, "grad_norm": 0.212890625, "learning_rate": 0.0001961971852708079, "loss": 1.7441, "step": 1136 }, { "epoch": 0.355423569865583, "grad_norm": 0.2197265625, "learning_rate": 0.00019619047095135553, "loss": 1.9931, "step": 1137 }, { "epoch": 0.3557361675523601, "grad_norm": 0.2177734375, "learning_rate": 0.00019618375082476182, "loss": 1.6723, "step": 1138 }, { "epoch": 0.35604876523913725, "grad_norm": 0.2119140625, "learning_rate": 0.0001961770248914325, "loss": 1.8312, "step": 1139 }, { "epoch": 0.35636136292591436, "grad_norm": 0.2216796875, "learning_rate": 0.00019617029315177365, "loss": 2.0553, "step": 1140 }, { "epoch": 0.35667396061269147, "grad_norm": 0.20703125, "learning_rate": 0.00019616355560619163, "loss": 1.6513, "step": 1141 }, { "epoch": 0.35698655829946857, "grad_norm": 0.2041015625, "learning_rate": 0.00019615681225509325, "loss": 1.8244, "step": 1142 }, { "epoch": 0.3572991559862457, "grad_norm": 0.20703125, "learning_rate": 0.00019615006309888552, "loss": 1.9322, "step": 1143 }, { "epoch": 0.35761175367302284, "grad_norm": 0.2138671875, "learning_rate": 0.000196143308137976, "loss": 1.7572, "step": 1144 }, { "epoch": 0.35792435135979994, "grad_norm": 0.2099609375, "learning_rate": 0.00019613654737277245, "loss": 1.5536, "step": 1145 }, { "epoch": 0.35823694904657705, "grad_norm": 0.2197265625, "learning_rate": 0.000196129780803683, "loss": 1.9036, "step": 1146 }, { "epoch": 0.35854954673335415, "grad_norm": 0.2158203125, "learning_rate": 0.00019612300843111622, "loss": 2.1856, "step": 1147 }, { "epoch": 0.3588621444201313, "grad_norm": 0.212890625, "learning_rate": 0.0001961162302554809, "loss": 1.6396, "step": 1148 }, { "epoch": 0.3591747421069084, "grad_norm": 0.2177734375, "learning_rate": 0.00019610944627718627, "loss": 1.8837, "step": 1149 }, { "epoch": 0.3594873397936855, "grad_norm": 0.2158203125, "learning_rate": 0.00019610265649664193, "loss": 1.7418, "step": 1150 }, { "epoch": 0.35979993748046263, "grad_norm": 0.2109375, "learning_rate": 0.00019609586091425774, "loss": 1.8848, "step": 1151 }, { "epoch": 0.3601125351672398, "grad_norm": 0.2099609375, "learning_rate": 0.00019608905953044396, "loss": 1.4857, "step": 1152 }, { "epoch": 0.3604251328540169, "grad_norm": 0.2119140625, "learning_rate": 0.00019608225234561121, "loss": 1.6741, "step": 1153 }, { "epoch": 0.360737730540794, "grad_norm": 0.19921875, "learning_rate": 0.00019607543936017046, "loss": 1.6363, "step": 1154 }, { "epoch": 0.3610503282275711, "grad_norm": 0.2109375, "learning_rate": 0.00019606862057453298, "loss": 1.8323, "step": 1155 }, { "epoch": 0.3613629259143482, "grad_norm": 0.21484375, "learning_rate": 0.00019606179598911049, "loss": 1.6778, "step": 1156 }, { "epoch": 0.36167552360112537, "grad_norm": 0.208984375, "learning_rate": 0.00019605496560431496, "loss": 1.8691, "step": 1157 }, { "epoch": 0.3619881212879025, "grad_norm": 0.2109375, "learning_rate": 0.00019604812942055873, "loss": 1.6175, "step": 1158 }, { "epoch": 0.3623007189746796, "grad_norm": 0.212890625, "learning_rate": 0.00019604128743825453, "loss": 1.717, "step": 1159 }, { "epoch": 0.3626133166614567, "grad_norm": 0.201171875, "learning_rate": 0.00019603443965781543, "loss": 1.773, "step": 1160 }, { "epoch": 0.36292591434823385, "grad_norm": 0.212890625, "learning_rate": 0.00019602758607965484, "loss": 1.8844, "step": 1161 }, { "epoch": 0.36323851203501095, "grad_norm": 0.2109375, "learning_rate": 0.00019602072670418647, "loss": 1.9545, "step": 1162 }, { "epoch": 0.36355110972178806, "grad_norm": 0.2021484375, "learning_rate": 0.00019601386153182451, "loss": 1.523, "step": 1163 }, { "epoch": 0.36386370740856516, "grad_norm": 0.224609375, "learning_rate": 0.00019600699056298337, "loss": 2.0468, "step": 1164 }, { "epoch": 0.36417630509534227, "grad_norm": 0.2275390625, "learning_rate": 0.00019600011379807786, "loss": 1.9032, "step": 1165 }, { "epoch": 0.36448890278211943, "grad_norm": 0.2099609375, "learning_rate": 0.00019599323123752315, "loss": 1.3631, "step": 1166 }, { "epoch": 0.36480150046889653, "grad_norm": 0.21875, "learning_rate": 0.00019598634288173474, "loss": 1.6805, "step": 1167 }, { "epoch": 0.36511409815567364, "grad_norm": 0.2158203125, "learning_rate": 0.00019597944873112852, "loss": 1.4813, "step": 1168 }, { "epoch": 0.36542669584245074, "grad_norm": 0.216796875, "learning_rate": 0.00019597254878612065, "loss": 1.7945, "step": 1169 }, { "epoch": 0.3657392935292279, "grad_norm": 0.2177734375, "learning_rate": 0.0001959656430471277, "loss": 1.5851, "step": 1170 }, { "epoch": 0.366051891216005, "grad_norm": 0.21875, "learning_rate": 0.0001959587315145666, "loss": 1.8493, "step": 1171 }, { "epoch": 0.3663644889027821, "grad_norm": 0.1982421875, "learning_rate": 0.0001959518141888546, "loss": 1.7852, "step": 1172 }, { "epoch": 0.3666770865895592, "grad_norm": 0.22265625, "learning_rate": 0.00019594489107040928, "loss": 1.9668, "step": 1173 }, { "epoch": 0.3669896842763364, "grad_norm": 0.216796875, "learning_rate": 0.00019593796215964867, "loss": 1.656, "step": 1174 }, { "epoch": 0.3673022819631135, "grad_norm": 0.20703125, "learning_rate": 0.000195931027456991, "loss": 1.5947, "step": 1175 }, { "epoch": 0.3676148796498906, "grad_norm": 0.2255859375, "learning_rate": 0.00019592408696285496, "loss": 1.7685, "step": 1176 }, { "epoch": 0.3679274773366677, "grad_norm": 0.220703125, "learning_rate": 0.00019591714067765953, "loss": 1.6027, "step": 1177 }, { "epoch": 0.3682400750234448, "grad_norm": 0.205078125, "learning_rate": 0.0001959101886018241, "loss": 2.2013, "step": 1178 }, { "epoch": 0.36855267271022196, "grad_norm": 0.208984375, "learning_rate": 0.0001959032307357684, "loss": 1.6995, "step": 1179 }, { "epoch": 0.36886527039699907, "grad_norm": 0.20703125, "learning_rate": 0.00019589626707991242, "loss": 1.7104, "step": 1180 }, { "epoch": 0.3691778680837762, "grad_norm": 0.2041015625, "learning_rate": 0.00019588929763467657, "loss": 1.6798, "step": 1181 }, { "epoch": 0.3694904657705533, "grad_norm": 0.20703125, "learning_rate": 0.00019588232240048167, "loss": 1.5464, "step": 1182 }, { "epoch": 0.36980306345733044, "grad_norm": 0.2216796875, "learning_rate": 0.0001958753413777488, "loss": 1.7789, "step": 1183 }, { "epoch": 0.37011566114410754, "grad_norm": 0.2265625, "learning_rate": 0.00019586835456689934, "loss": 1.7634, "step": 1184 }, { "epoch": 0.37042825883088465, "grad_norm": 0.2109375, "learning_rate": 0.0001958613619683552, "loss": 1.9015, "step": 1185 }, { "epoch": 0.37074085651766175, "grad_norm": 0.318359375, "learning_rate": 0.00019585436358253845, "loss": 2.3964, "step": 1186 }, { "epoch": 0.37105345420443886, "grad_norm": 0.216796875, "learning_rate": 0.00019584735940987163, "loss": 1.7068, "step": 1187 }, { "epoch": 0.371366051891216, "grad_norm": 0.2060546875, "learning_rate": 0.00019584034945077758, "loss": 1.9431, "step": 1188 }, { "epoch": 0.3716786495779931, "grad_norm": 0.2177734375, "learning_rate": 0.0001958333337056795, "loss": 1.6602, "step": 1189 }, { "epoch": 0.37199124726477023, "grad_norm": 0.2060546875, "learning_rate": 0.00019582631217500093, "loss": 1.9655, "step": 1190 }, { "epoch": 0.37230384495154734, "grad_norm": 0.2099609375, "learning_rate": 0.0001958192848591658, "loss": 1.7755, "step": 1191 }, { "epoch": 0.3726164426383245, "grad_norm": 0.2158203125, "learning_rate": 0.00019581225175859833, "loss": 1.7425, "step": 1192 }, { "epoch": 0.3729290403251016, "grad_norm": 0.2080078125, "learning_rate": 0.00019580521287372317, "loss": 1.8308, "step": 1193 }, { "epoch": 0.3732416380118787, "grad_norm": 0.2265625, "learning_rate": 0.00019579816820496516, "loss": 1.7996, "step": 1194 }, { "epoch": 0.3735542356986558, "grad_norm": 0.224609375, "learning_rate": 0.0001957911177527497, "loss": 1.8265, "step": 1195 }, { "epoch": 0.373866833385433, "grad_norm": 0.21484375, "learning_rate": 0.00019578406151750236, "loss": 1.5686, "step": 1196 }, { "epoch": 0.3741794310722101, "grad_norm": 0.212890625, "learning_rate": 0.0001957769994996492, "loss": 1.7951, "step": 1197 }, { "epoch": 0.3744920287589872, "grad_norm": 0.2314453125, "learning_rate": 0.00019576993169961653, "loss": 1.7821, "step": 1198 }, { "epoch": 0.3748046264457643, "grad_norm": 0.2158203125, "learning_rate": 0.000195762858117831, "loss": 1.7286, "step": 1199 }, { "epoch": 0.3751172241325414, "grad_norm": 0.212890625, "learning_rate": 0.00019575577875471974, "loss": 1.707, "step": 1200 }, { "epoch": 0.37542982181931855, "grad_norm": 0.2080078125, "learning_rate": 0.00019574869361071006, "loss": 1.9656, "step": 1201 }, { "epoch": 0.37574241950609566, "grad_norm": 0.2265625, "learning_rate": 0.00019574160268622976, "loss": 1.7242, "step": 1202 }, { "epoch": 0.37605501719287276, "grad_norm": 0.21484375, "learning_rate": 0.00019573450598170687, "loss": 1.7001, "step": 1203 }, { "epoch": 0.37636761487964987, "grad_norm": 0.21875, "learning_rate": 0.00019572740349756992, "loss": 1.8952, "step": 1204 }, { "epoch": 0.37668021256642703, "grad_norm": 0.2255859375, "learning_rate": 0.00019572029523424756, "loss": 1.8052, "step": 1205 }, { "epoch": 0.37699281025320414, "grad_norm": 0.21875, "learning_rate": 0.00019571318119216904, "loss": 1.8727, "step": 1206 }, { "epoch": 0.37730540793998124, "grad_norm": 0.2158203125, "learning_rate": 0.0001957060613717638, "loss": 1.6054, "step": 1207 }, { "epoch": 0.37761800562675835, "grad_norm": 0.2041015625, "learning_rate": 0.00019569893577346168, "loss": 1.8537, "step": 1208 }, { "epoch": 0.3779306033135355, "grad_norm": 0.2060546875, "learning_rate": 0.00019569180439769283, "loss": 1.6096, "step": 1209 }, { "epoch": 0.3782432010003126, "grad_norm": 0.2275390625, "learning_rate": 0.00019568466724488782, "loss": 1.9668, "step": 1210 }, { "epoch": 0.3785557986870897, "grad_norm": 0.20703125, "learning_rate": 0.00019567752431547754, "loss": 1.6992, "step": 1211 }, { "epoch": 0.3788683963738668, "grad_norm": 0.2109375, "learning_rate": 0.00019567037560989315, "loss": 1.6169, "step": 1212 }, { "epoch": 0.37918099406064393, "grad_norm": 0.21875, "learning_rate": 0.00019566322112856633, "loss": 1.7126, "step": 1213 }, { "epoch": 0.3794935917474211, "grad_norm": 0.203125, "learning_rate": 0.0001956560608719289, "loss": 1.6279, "step": 1214 }, { "epoch": 0.3798061894341982, "grad_norm": 0.2177734375, "learning_rate": 0.0001956488948404132, "loss": 2.0578, "step": 1215 }, { "epoch": 0.3801187871209753, "grad_norm": 0.2236328125, "learning_rate": 0.00019564172303445182, "loss": 1.7761, "step": 1216 }, { "epoch": 0.3804313848077524, "grad_norm": 0.2119140625, "learning_rate": 0.00019563454545447773, "loss": 1.6644, "step": 1217 }, { "epoch": 0.38074398249452956, "grad_norm": 0.2109375, "learning_rate": 0.00019562736210092428, "loss": 1.8542, "step": 1218 }, { "epoch": 0.38105658018130667, "grad_norm": 0.208984375, "learning_rate": 0.0001956201729742251, "loss": 1.7917, "step": 1219 }, { "epoch": 0.3813691778680838, "grad_norm": 0.2177734375, "learning_rate": 0.00019561297807481427, "loss": 1.8474, "step": 1220 }, { "epoch": 0.3816817755548609, "grad_norm": 0.2080078125, "learning_rate": 0.0001956057774031261, "loss": 1.627, "step": 1221 }, { "epoch": 0.381994373241638, "grad_norm": 0.2109375, "learning_rate": 0.00019559857095959528, "loss": 1.6842, "step": 1222 }, { "epoch": 0.38230697092841515, "grad_norm": 0.2197265625, "learning_rate": 0.00019559135874465695, "loss": 1.7735, "step": 1223 }, { "epoch": 0.38261956861519225, "grad_norm": 0.2158203125, "learning_rate": 0.00019558414075874646, "loss": 1.8281, "step": 1224 }, { "epoch": 0.38293216630196936, "grad_norm": 0.21484375, "learning_rate": 0.00019557691700229957, "loss": 1.5633, "step": 1225 }, { "epoch": 0.38324476398874646, "grad_norm": 0.212890625, "learning_rate": 0.00019556968747575244, "loss": 1.8649, "step": 1226 }, { "epoch": 0.3835573616755236, "grad_norm": 0.2177734375, "learning_rate": 0.00019556245217954149, "loss": 1.6938, "step": 1227 }, { "epoch": 0.38386995936230073, "grad_norm": 0.2158203125, "learning_rate": 0.0001955552111141035, "loss": 1.6866, "step": 1228 }, { "epoch": 0.38418255704907783, "grad_norm": 0.232421875, "learning_rate": 0.00019554796427987566, "loss": 1.9343, "step": 1229 }, { "epoch": 0.38449515473585494, "grad_norm": 0.2177734375, "learning_rate": 0.00019554071167729545, "loss": 1.9785, "step": 1230 }, { "epoch": 0.3848077524226321, "grad_norm": 0.2080078125, "learning_rate": 0.00019553345330680077, "loss": 1.876, "step": 1231 }, { "epoch": 0.3851203501094092, "grad_norm": 0.2236328125, "learning_rate": 0.00019552618916882973, "loss": 1.671, "step": 1232 }, { "epoch": 0.3854329477961863, "grad_norm": 0.2255859375, "learning_rate": 0.00019551891926382093, "loss": 1.6575, "step": 1233 }, { "epoch": 0.3857455454829634, "grad_norm": 0.216796875, "learning_rate": 0.00019551164359221326, "loss": 1.9775, "step": 1234 }, { "epoch": 0.3860581431697405, "grad_norm": 0.2177734375, "learning_rate": 0.00019550436215444594, "loss": 1.7329, "step": 1235 }, { "epoch": 0.3863707408565177, "grad_norm": 0.2119140625, "learning_rate": 0.0001954970749509586, "loss": 1.6745, "step": 1236 }, { "epoch": 0.3866833385432948, "grad_norm": 0.21484375, "learning_rate": 0.00019548978198219113, "loss": 1.7502, "step": 1237 }, { "epoch": 0.3869959362300719, "grad_norm": 0.228515625, "learning_rate": 0.00019548248324858386, "loss": 1.6299, "step": 1238 }, { "epoch": 0.387308533916849, "grad_norm": 0.21875, "learning_rate": 0.00019547517875057738, "loss": 1.6477, "step": 1239 }, { "epoch": 0.38762113160362616, "grad_norm": 0.2138671875, "learning_rate": 0.00019546786848861268, "loss": 1.8717, "step": 1240 }, { "epoch": 0.38793372929040326, "grad_norm": 0.2080078125, "learning_rate": 0.00019546055246313113, "loss": 1.5382, "step": 1241 }, { "epoch": 0.38824632697718037, "grad_norm": 0.2294921875, "learning_rate": 0.00019545323067457439, "loss": 2.0394, "step": 1242 }, { "epoch": 0.3885589246639575, "grad_norm": 0.2138671875, "learning_rate": 0.00019544590312338444, "loss": 1.8064, "step": 1243 }, { "epoch": 0.3888715223507346, "grad_norm": 0.2265625, "learning_rate": 0.00019543856981000371, "loss": 1.6846, "step": 1244 }, { "epoch": 0.38918412003751174, "grad_norm": 0.203125, "learning_rate": 0.0001954312307348749, "loss": 1.7834, "step": 1245 }, { "epoch": 0.38949671772428884, "grad_norm": 0.21484375, "learning_rate": 0.0001954238858984411, "loss": 1.8043, "step": 1246 }, { "epoch": 0.38980931541106595, "grad_norm": 0.2119140625, "learning_rate": 0.00019541653530114568, "loss": 1.7905, "step": 1247 }, { "epoch": 0.39012191309784305, "grad_norm": 0.2119140625, "learning_rate": 0.00019540917894343246, "loss": 1.6521, "step": 1248 }, { "epoch": 0.3904345107846202, "grad_norm": 0.2177734375, "learning_rate": 0.00019540181682574552, "loss": 1.6881, "step": 1249 }, { "epoch": 0.3907471084713973, "grad_norm": 0.2158203125, "learning_rate": 0.0001953944489485293, "loss": 2.0565, "step": 1250 }, { "epoch": 0.3910597061581744, "grad_norm": 0.2197265625, "learning_rate": 0.00019538707531222867, "loss": 1.7884, "step": 1251 }, { "epoch": 0.39137230384495153, "grad_norm": 0.2041015625, "learning_rate": 0.00019537969591728872, "loss": 1.5153, "step": 1252 }, { "epoch": 0.3916849015317287, "grad_norm": 0.220703125, "learning_rate": 0.000195372310764155, "loss": 1.8401, "step": 1253 }, { "epoch": 0.3919974992185058, "grad_norm": 0.2138671875, "learning_rate": 0.00019536491985327334, "loss": 1.5898, "step": 1254 }, { "epoch": 0.3923100969052829, "grad_norm": 0.208984375, "learning_rate": 0.00019535752318508998, "loss": 1.8118, "step": 1255 }, { "epoch": 0.39262269459206, "grad_norm": 0.2060546875, "learning_rate": 0.00019535012076005138, "loss": 1.4033, "step": 1256 }, { "epoch": 0.3929352922788371, "grad_norm": 0.2099609375, "learning_rate": 0.00019534271257860448, "loss": 1.672, "step": 1257 }, { "epoch": 0.3932478899656143, "grad_norm": 0.2197265625, "learning_rate": 0.00019533529864119658, "loss": 1.752, "step": 1258 }, { "epoch": 0.3935604876523914, "grad_norm": 0.2099609375, "learning_rate": 0.0001953278789482752, "loss": 1.3813, "step": 1259 }, { "epoch": 0.3938730853391685, "grad_norm": 0.2265625, "learning_rate": 0.00019532045350028826, "loss": 1.8827, "step": 1260 }, { "epoch": 0.3941856830259456, "grad_norm": 0.224609375, "learning_rate": 0.00019531302229768404, "loss": 1.9363, "step": 1261 }, { "epoch": 0.39449828071272275, "grad_norm": 0.2138671875, "learning_rate": 0.00019530558534091127, "loss": 1.8975, "step": 1262 }, { "epoch": 0.39481087839949985, "grad_norm": 0.2275390625, "learning_rate": 0.00019529814263041884, "loss": 1.7931, "step": 1263 }, { "epoch": 0.39512347608627696, "grad_norm": 0.2099609375, "learning_rate": 0.0001952906941666561, "loss": 1.7258, "step": 1264 }, { "epoch": 0.39543607377305406, "grad_norm": 0.2119140625, "learning_rate": 0.0001952832399500727, "loss": 1.8547, "step": 1265 }, { "epoch": 0.3957486714598312, "grad_norm": 0.2138671875, "learning_rate": 0.00019527577998111874, "loss": 1.7344, "step": 1266 }, { "epoch": 0.39606126914660833, "grad_norm": 0.2109375, "learning_rate": 0.0001952683142602445, "loss": 1.7313, "step": 1267 }, { "epoch": 0.39637386683338544, "grad_norm": 0.2119140625, "learning_rate": 0.00019526084278790074, "loss": 1.8261, "step": 1268 }, { "epoch": 0.39668646452016254, "grad_norm": 0.2060546875, "learning_rate": 0.00019525336556453852, "loss": 1.7306, "step": 1269 }, { "epoch": 0.39699906220693965, "grad_norm": 0.2021484375, "learning_rate": 0.0001952458825906092, "loss": 1.9536, "step": 1270 }, { "epoch": 0.3973116598937168, "grad_norm": 0.2138671875, "learning_rate": 0.00019523839386656458, "loss": 1.7486, "step": 1271 }, { "epoch": 0.3976242575804939, "grad_norm": 0.2099609375, "learning_rate": 0.00019523089939285675, "loss": 1.9232, "step": 1272 }, { "epoch": 0.397936855267271, "grad_norm": 0.220703125, "learning_rate": 0.0001952233991699382, "loss": 1.5959, "step": 1273 }, { "epoch": 0.3982494529540481, "grad_norm": 0.224609375, "learning_rate": 0.00019521589319826168, "loss": 1.9811, "step": 1274 }, { "epoch": 0.3985620506408253, "grad_norm": 0.2255859375, "learning_rate": 0.00019520838147828035, "loss": 1.6908, "step": 1275 }, { "epoch": 0.3988746483276024, "grad_norm": 0.208984375, "learning_rate": 0.00019520086401044772, "loss": 1.7011, "step": 1276 }, { "epoch": 0.3991872460143795, "grad_norm": 0.2197265625, "learning_rate": 0.0001951933407952176, "loss": 1.6478, "step": 1277 }, { "epoch": 0.3994998437011566, "grad_norm": 0.2255859375, "learning_rate": 0.0001951858118330442, "loss": 1.5169, "step": 1278 }, { "epoch": 0.3998124413879337, "grad_norm": 0.2216796875, "learning_rate": 0.00019517827712438207, "loss": 1.7061, "step": 1279 }, { "epoch": 0.40012503907471086, "grad_norm": 0.212890625, "learning_rate": 0.00019517073666968604, "loss": 1.7499, "step": 1280 }, { "epoch": 0.40043763676148797, "grad_norm": 0.212890625, "learning_rate": 0.00019516319046941134, "loss": 2.132, "step": 1281 }, { "epoch": 0.4007502344482651, "grad_norm": 0.20703125, "learning_rate": 0.00019515563852401358, "loss": 1.56, "step": 1282 }, { "epoch": 0.4010628321350422, "grad_norm": 0.216796875, "learning_rate": 0.00019514808083394866, "loss": 1.86, "step": 1283 }, { "epoch": 0.40137542982181934, "grad_norm": 0.22265625, "learning_rate": 0.00019514051739967286, "loss": 1.6877, "step": 1284 }, { "epoch": 0.40168802750859645, "grad_norm": 0.2099609375, "learning_rate": 0.00019513294822164277, "loss": 1.5612, "step": 1285 }, { "epoch": 0.40200062519537355, "grad_norm": 0.2099609375, "learning_rate": 0.00019512537330031537, "loss": 1.7812, "step": 1286 }, { "epoch": 0.40231322288215066, "grad_norm": 0.2119140625, "learning_rate": 0.00019511779263614798, "loss": 1.5228, "step": 1287 }, { "epoch": 0.4026258205689278, "grad_norm": 0.2119140625, "learning_rate": 0.00019511020622959823, "loss": 1.4276, "step": 1288 }, { "epoch": 0.4029384182557049, "grad_norm": 0.2080078125, "learning_rate": 0.00019510261408112414, "loss": 1.8561, "step": 1289 }, { "epoch": 0.403251015942482, "grad_norm": 0.21875, "learning_rate": 0.00019509501619118403, "loss": 1.8674, "step": 1290 }, { "epoch": 0.40356361362925913, "grad_norm": 0.20703125, "learning_rate": 0.0001950874125602366, "loss": 1.8583, "step": 1291 }, { "epoch": 0.40387621131603624, "grad_norm": 0.2099609375, "learning_rate": 0.00019507980318874096, "loss": 1.686, "step": 1292 }, { "epoch": 0.4041888090028134, "grad_norm": 0.21484375, "learning_rate": 0.00019507218807715638, "loss": 1.7897, "step": 1293 }, { "epoch": 0.4045014066895905, "grad_norm": 0.228515625, "learning_rate": 0.00019506456722594265, "loss": 1.7626, "step": 1294 }, { "epoch": 0.4048140043763676, "grad_norm": 0.212890625, "learning_rate": 0.0001950569406355599, "loss": 1.9098, "step": 1295 }, { "epoch": 0.4051266020631447, "grad_norm": 0.2080078125, "learning_rate": 0.0001950493083064685, "loss": 1.5848, "step": 1296 }, { "epoch": 0.4054391997499219, "grad_norm": 0.220703125, "learning_rate": 0.00019504167023912922, "loss": 1.6362, "step": 1297 }, { "epoch": 0.405751797436699, "grad_norm": 0.2177734375, "learning_rate": 0.0001950340264340032, "loss": 1.9604, "step": 1298 }, { "epoch": 0.4060643951234761, "grad_norm": 0.306640625, "learning_rate": 0.0001950263768915519, "loss": 2.5325, "step": 1299 }, { "epoch": 0.4063769928102532, "grad_norm": 0.21484375, "learning_rate": 0.00019501872161223712, "loss": 1.9979, "step": 1300 }, { "epoch": 0.4066895904970303, "grad_norm": 0.2177734375, "learning_rate": 0.00019501106059652108, "loss": 1.714, "step": 1301 }, { "epoch": 0.40700218818380746, "grad_norm": 0.220703125, "learning_rate": 0.0001950033938448662, "loss": 1.7827, "step": 1302 }, { "epoch": 0.40731478587058456, "grad_norm": 0.21484375, "learning_rate": 0.00019499572135773537, "loss": 1.6062, "step": 1303 }, { "epoch": 0.40762738355736167, "grad_norm": 0.21484375, "learning_rate": 0.0001949880431355918, "loss": 1.6599, "step": 1304 }, { "epoch": 0.40793998124413877, "grad_norm": 0.2197265625, "learning_rate": 0.000194980359178899, "loss": 1.5345, "step": 1305 }, { "epoch": 0.40825257893091593, "grad_norm": 0.220703125, "learning_rate": 0.0001949726694881209, "loss": 1.8149, "step": 1306 }, { "epoch": 0.40856517661769304, "grad_norm": 0.220703125, "learning_rate": 0.00019496497406372174, "loss": 1.6207, "step": 1307 }, { "epoch": 0.40887777430447014, "grad_norm": 0.2099609375, "learning_rate": 0.00019495727290616606, "loss": 1.7058, "step": 1308 }, { "epoch": 0.40919037199124725, "grad_norm": 0.216796875, "learning_rate": 0.0001949495660159188, "loss": 1.5045, "step": 1309 }, { "epoch": 0.4095029696780244, "grad_norm": 0.21875, "learning_rate": 0.00019494185339344523, "loss": 1.8221, "step": 1310 }, { "epoch": 0.4098155673648015, "grad_norm": 0.224609375, "learning_rate": 0.000194934135039211, "loss": 1.4478, "step": 1311 }, { "epoch": 0.4101281650515786, "grad_norm": 0.228515625, "learning_rate": 0.0001949264109536821, "loss": 1.4922, "step": 1312 }, { "epoch": 0.4104407627383557, "grad_norm": 0.2275390625, "learning_rate": 0.00019491868113732474, "loss": 1.8462, "step": 1313 }, { "epoch": 0.41075336042513283, "grad_norm": 0.2138671875, "learning_rate": 0.0001949109455906057, "loss": 1.831, "step": 1314 }, { "epoch": 0.41106595811191, "grad_norm": 0.2099609375, "learning_rate": 0.0001949032043139919, "loss": 1.5742, "step": 1315 }, { "epoch": 0.4113785557986871, "grad_norm": 0.2099609375, "learning_rate": 0.0001948954573079507, "loss": 1.7099, "step": 1316 }, { "epoch": 0.4116911534854642, "grad_norm": 0.21875, "learning_rate": 0.00019488770457294985, "loss": 1.8771, "step": 1317 }, { "epoch": 0.4120037511722413, "grad_norm": 0.21875, "learning_rate": 0.00019487994610945734, "loss": 1.9056, "step": 1318 }, { "epoch": 0.41231634885901847, "grad_norm": 0.20703125, "learning_rate": 0.00019487218191794158, "loss": 1.7384, "step": 1319 }, { "epoch": 0.41262894654579557, "grad_norm": 0.212890625, "learning_rate": 0.00019486441199887132, "loss": 1.9079, "step": 1320 }, { "epoch": 0.4129415442325727, "grad_norm": 0.224609375, "learning_rate": 0.00019485663635271562, "loss": 1.8313, "step": 1321 }, { "epoch": 0.4132541419193498, "grad_norm": 0.2216796875, "learning_rate": 0.00019484885497994387, "loss": 1.642, "step": 1322 }, { "epoch": 0.4135667396061269, "grad_norm": 0.2138671875, "learning_rate": 0.00019484106788102593, "loss": 1.7165, "step": 1323 }, { "epoch": 0.41387933729290405, "grad_norm": 0.2197265625, "learning_rate": 0.0001948332750564318, "loss": 1.6474, "step": 1324 }, { "epoch": 0.41419193497968115, "grad_norm": 0.2099609375, "learning_rate": 0.00019482547650663206, "loss": 1.5541, "step": 1325 }, { "epoch": 0.41450453266645826, "grad_norm": 0.20703125, "learning_rate": 0.00019481767223209745, "loss": 2.0118, "step": 1326 }, { "epoch": 0.41481713035323536, "grad_norm": 0.2138671875, "learning_rate": 0.00019480986223329913, "loss": 1.8306, "step": 1327 }, { "epoch": 0.4151297280400125, "grad_norm": 0.2314453125, "learning_rate": 0.00019480204651070864, "loss": 1.6828, "step": 1328 }, { "epoch": 0.41544232572678963, "grad_norm": 0.2177734375, "learning_rate": 0.00019479422506479775, "loss": 1.6071, "step": 1329 }, { "epoch": 0.41575492341356673, "grad_norm": 0.203125, "learning_rate": 0.00019478639789603872, "loss": 1.6847, "step": 1330 }, { "epoch": 0.41606752110034384, "grad_norm": 0.21484375, "learning_rate": 0.00019477856500490405, "loss": 1.6309, "step": 1331 }, { "epoch": 0.416380118787121, "grad_norm": 0.21484375, "learning_rate": 0.00019477072639186664, "loss": 1.9451, "step": 1332 }, { "epoch": 0.4166927164738981, "grad_norm": 0.220703125, "learning_rate": 0.0001947628820573997, "loss": 1.8675, "step": 1333 }, { "epoch": 0.4170053141606752, "grad_norm": 0.2236328125, "learning_rate": 0.00019475503200197685, "loss": 1.5601, "step": 1334 }, { "epoch": 0.4173179118474523, "grad_norm": 0.2119140625, "learning_rate": 0.00019474717622607195, "loss": 1.5294, "step": 1335 }, { "epoch": 0.4176305095342294, "grad_norm": 0.2177734375, "learning_rate": 0.00019473931473015926, "loss": 1.7433, "step": 1336 }, { "epoch": 0.4179431072210066, "grad_norm": 0.2138671875, "learning_rate": 0.00019473144751471345, "loss": 1.6771, "step": 1337 }, { "epoch": 0.4182557049077837, "grad_norm": 0.212890625, "learning_rate": 0.0001947235745802094, "loss": 1.9994, "step": 1338 }, { "epoch": 0.4185683025945608, "grad_norm": 0.2216796875, "learning_rate": 0.0001947156959271225, "loss": 1.726, "step": 1339 }, { "epoch": 0.4188809002813379, "grad_norm": 0.2080078125, "learning_rate": 0.00019470781155592827, "loss": 1.8079, "step": 1340 }, { "epoch": 0.41919349796811506, "grad_norm": 0.2099609375, "learning_rate": 0.00019469992146710282, "loss": 1.8046, "step": 1341 }, { "epoch": 0.41950609565489216, "grad_norm": 0.2177734375, "learning_rate": 0.0001946920256611224, "loss": 1.619, "step": 1342 }, { "epoch": 0.41981869334166927, "grad_norm": 0.2158203125, "learning_rate": 0.00019468412413846373, "loss": 1.6015, "step": 1343 }, { "epoch": 0.4201312910284464, "grad_norm": 0.212890625, "learning_rate": 0.00019467621689960385, "loss": 1.7538, "step": 1344 }, { "epoch": 0.42044388871522353, "grad_norm": 0.20703125, "learning_rate": 0.00019466830394502009, "loss": 1.8732, "step": 1345 }, { "epoch": 0.42075648640200064, "grad_norm": 0.2294921875, "learning_rate": 0.0001946603852751902, "loss": 1.7492, "step": 1346 }, { "epoch": 0.42106908408877775, "grad_norm": 0.224609375, "learning_rate": 0.0001946524608905922, "loss": 1.6893, "step": 1347 }, { "epoch": 0.42138168177555485, "grad_norm": 0.220703125, "learning_rate": 0.00019464453079170454, "loss": 1.5848, "step": 1348 }, { "epoch": 0.42169427946233196, "grad_norm": 0.2080078125, "learning_rate": 0.00019463659497900593, "loss": 1.5974, "step": 1349 }, { "epoch": 0.4220068771491091, "grad_norm": 0.2216796875, "learning_rate": 0.0001946286534529755, "loss": 1.9757, "step": 1350 }, { "epoch": 0.4223194748358862, "grad_norm": 0.2275390625, "learning_rate": 0.0001946207062140927, "loss": 1.9514, "step": 1351 }, { "epoch": 0.4226320725226633, "grad_norm": 0.2158203125, "learning_rate": 0.00019461275326283724, "loss": 1.894, "step": 1352 }, { "epoch": 0.42294467020944043, "grad_norm": 0.2236328125, "learning_rate": 0.00019460479459968932, "loss": 1.5872, "step": 1353 }, { "epoch": 0.4232572678962176, "grad_norm": 0.21484375, "learning_rate": 0.0001945968302251294, "loss": 1.5275, "step": 1354 }, { "epoch": 0.4235698655829947, "grad_norm": 0.220703125, "learning_rate": 0.0001945888601396383, "loss": 1.6427, "step": 1355 }, { "epoch": 0.4238824632697718, "grad_norm": 0.21875, "learning_rate": 0.00019458088434369715, "loss": 1.6407, "step": 1356 }, { "epoch": 0.4241950609565489, "grad_norm": 0.224609375, "learning_rate": 0.00019457290283778747, "loss": 1.9373, "step": 1357 }, { "epoch": 0.424507658643326, "grad_norm": 0.21484375, "learning_rate": 0.0001945649156223912, "loss": 1.7385, "step": 1358 }, { "epoch": 0.4248202563301032, "grad_norm": 0.2236328125, "learning_rate": 0.0001945569226979904, "loss": 1.8262, "step": 1359 }, { "epoch": 0.4251328540168803, "grad_norm": 0.2158203125, "learning_rate": 0.00019454892406506775, "loss": 1.6286, "step": 1360 }, { "epoch": 0.4254454517036574, "grad_norm": 0.22265625, "learning_rate": 0.00019454091972410603, "loss": 1.7992, "step": 1361 }, { "epoch": 0.4257580493904345, "grad_norm": 0.2197265625, "learning_rate": 0.0001945329096755885, "loss": 1.9609, "step": 1362 }, { "epoch": 0.42607064707721165, "grad_norm": 0.2138671875, "learning_rate": 0.00019452489391999874, "loss": 1.9051, "step": 1363 }, { "epoch": 0.42638324476398876, "grad_norm": 0.2294921875, "learning_rate": 0.00019451687245782072, "loss": 1.7331, "step": 1364 }, { "epoch": 0.42669584245076586, "grad_norm": 0.2158203125, "learning_rate": 0.00019450884528953864, "loss": 2.1455, "step": 1365 }, { "epoch": 0.42700844013754297, "grad_norm": 0.2353515625, "learning_rate": 0.00019450081241563716, "loss": 1.8298, "step": 1366 }, { "epoch": 0.4273210378243201, "grad_norm": 0.2216796875, "learning_rate": 0.00019449277383660118, "loss": 1.8084, "step": 1367 }, { "epoch": 0.42763363551109723, "grad_norm": 0.2236328125, "learning_rate": 0.00019448472955291605, "loss": 1.6876, "step": 1368 }, { "epoch": 0.42794623319787434, "grad_norm": 0.2080078125, "learning_rate": 0.0001944766795650674, "loss": 1.7431, "step": 1369 }, { "epoch": 0.42825883088465144, "grad_norm": 0.228515625, "learning_rate": 0.0001944686238735412, "loss": 1.7904, "step": 1370 }, { "epoch": 0.42857142857142855, "grad_norm": 0.2099609375, "learning_rate": 0.00019446056247882378, "loss": 1.8465, "step": 1371 }, { "epoch": 0.4288840262582057, "grad_norm": 0.2197265625, "learning_rate": 0.00019445249538140185, "loss": 1.6672, "step": 1372 }, { "epoch": 0.4291966239449828, "grad_norm": 0.2275390625, "learning_rate": 0.0001944444225817624, "loss": 1.9209, "step": 1373 }, { "epoch": 0.4295092216317599, "grad_norm": 0.220703125, "learning_rate": 0.00019443634408039282, "loss": 1.8336, "step": 1374 }, { "epoch": 0.429821819318537, "grad_norm": 0.22265625, "learning_rate": 0.0001944282598777808, "loss": 1.9261, "step": 1375 }, { "epoch": 0.4301344170053142, "grad_norm": 0.2255859375, "learning_rate": 0.0001944201699744144, "loss": 1.6371, "step": 1376 }, { "epoch": 0.4304470146920913, "grad_norm": 0.220703125, "learning_rate": 0.00019441207437078203, "loss": 1.4774, "step": 1377 }, { "epoch": 0.4307596123788684, "grad_norm": 0.2255859375, "learning_rate": 0.0001944039730673724, "loss": 1.5849, "step": 1378 }, { "epoch": 0.4310722100656455, "grad_norm": 0.2255859375, "learning_rate": 0.0001943958660646746, "loss": 1.8103, "step": 1379 }, { "epoch": 0.4313848077524226, "grad_norm": 0.2158203125, "learning_rate": 0.00019438775336317812, "loss": 1.8946, "step": 1380 }, { "epoch": 0.43169740543919977, "grad_norm": 0.20703125, "learning_rate": 0.00019437963496337266, "loss": 1.6056, "step": 1381 }, { "epoch": 0.43201000312597687, "grad_norm": 0.220703125, "learning_rate": 0.00019437151086574837, "loss": 1.6991, "step": 1382 }, { "epoch": 0.432322600812754, "grad_norm": 0.2265625, "learning_rate": 0.00019436338107079574, "loss": 1.6126, "step": 1383 }, { "epoch": 0.4326351984995311, "grad_norm": 0.216796875, "learning_rate": 0.00019435524557900551, "loss": 1.4967, "step": 1384 }, { "epoch": 0.43294779618630824, "grad_norm": 0.212890625, "learning_rate": 0.00019434710439086888, "loss": 1.5868, "step": 1385 }, { "epoch": 0.43326039387308535, "grad_norm": 0.2265625, "learning_rate": 0.00019433895750687734, "loss": 1.7528, "step": 1386 }, { "epoch": 0.43357299155986245, "grad_norm": 0.2255859375, "learning_rate": 0.00019433080492752268, "loss": 1.899, "step": 1387 }, { "epoch": 0.43388558924663956, "grad_norm": 0.2275390625, "learning_rate": 0.00019432264665329715, "loss": 2.0873, "step": 1388 }, { "epoch": 0.4341981869334167, "grad_norm": 0.216796875, "learning_rate": 0.00019431448268469325, "loss": 1.4453, "step": 1389 }, { "epoch": 0.4345107846201938, "grad_norm": 0.2177734375, "learning_rate": 0.00019430631302220385, "loss": 1.9314, "step": 1390 }, { "epoch": 0.43482338230697093, "grad_norm": 0.21875, "learning_rate": 0.0001942981376663221, "loss": 1.5989, "step": 1391 }, { "epoch": 0.43513597999374803, "grad_norm": 0.2216796875, "learning_rate": 0.00019428995661754171, "loss": 1.8037, "step": 1392 }, { "epoch": 0.43544857768052514, "grad_norm": 0.20703125, "learning_rate": 0.0001942817698763564, "loss": 1.7903, "step": 1393 }, { "epoch": 0.4357611753673023, "grad_norm": 0.2216796875, "learning_rate": 0.00019427357744326057, "loss": 1.7809, "step": 1394 }, { "epoch": 0.4360737730540794, "grad_norm": 0.2099609375, "learning_rate": 0.0001942653793187487, "loss": 1.552, "step": 1395 }, { "epoch": 0.4363863707408565, "grad_norm": 0.2138671875, "learning_rate": 0.00019425717550331572, "loss": 1.7079, "step": 1396 }, { "epoch": 0.4366989684276336, "grad_norm": 0.2119140625, "learning_rate": 0.000194248965997457, "loss": 1.8321, "step": 1397 }, { "epoch": 0.4370115661144108, "grad_norm": 0.2255859375, "learning_rate": 0.00019424075080166805, "loss": 1.6185, "step": 1398 }, { "epoch": 0.4373241638011879, "grad_norm": 0.2216796875, "learning_rate": 0.00019423252991644492, "loss": 1.7149, "step": 1399 }, { "epoch": 0.437636761487965, "grad_norm": 0.2236328125, "learning_rate": 0.00019422430334228386, "loss": 1.7048, "step": 1400 }, { "epoch": 0.4379493591747421, "grad_norm": 0.2158203125, "learning_rate": 0.00019421607107968154, "loss": 1.8062, "step": 1401 }, { "epoch": 0.4382619568615192, "grad_norm": 0.2265625, "learning_rate": 0.00019420783312913494, "loss": 1.8332, "step": 1402 }, { "epoch": 0.43857455454829636, "grad_norm": 0.306640625, "learning_rate": 0.0001941995894911414, "loss": 2.397, "step": 1403 }, { "epoch": 0.43888715223507346, "grad_norm": 0.2216796875, "learning_rate": 0.00019419134016619865, "loss": 1.6672, "step": 1404 }, { "epoch": 0.43919974992185057, "grad_norm": 0.2265625, "learning_rate": 0.0001941830851548046, "loss": 1.6112, "step": 1405 }, { "epoch": 0.4395123476086277, "grad_norm": 0.2197265625, "learning_rate": 0.0001941748244574577, "loss": 1.7182, "step": 1406 }, { "epoch": 0.43982494529540483, "grad_norm": 0.2236328125, "learning_rate": 0.00019416655807465667, "loss": 1.7438, "step": 1407 }, { "epoch": 0.44013754298218194, "grad_norm": 0.216796875, "learning_rate": 0.0001941582860069005, "loss": 1.8327, "step": 1408 }, { "epoch": 0.44045014066895904, "grad_norm": 0.224609375, "learning_rate": 0.00019415000825468863, "loss": 2.0563, "step": 1409 }, { "epoch": 0.44076273835573615, "grad_norm": 0.2158203125, "learning_rate": 0.0001941417248185208, "loss": 1.9451, "step": 1410 }, { "epoch": 0.4410753360425133, "grad_norm": 0.224609375, "learning_rate": 0.00019413343569889702, "loss": 1.8786, "step": 1411 }, { "epoch": 0.4413879337292904, "grad_norm": 0.2294921875, "learning_rate": 0.00019412514089631785, "loss": 1.7905, "step": 1412 }, { "epoch": 0.4417005314160675, "grad_norm": 0.2255859375, "learning_rate": 0.00019411684041128392, "loss": 1.7573, "step": 1413 }, { "epoch": 0.4420131291028446, "grad_norm": 0.220703125, "learning_rate": 0.00019410853424429642, "loss": 1.6898, "step": 1414 }, { "epoch": 0.44232572678962173, "grad_norm": 0.220703125, "learning_rate": 0.00019410022239585678, "loss": 1.7676, "step": 1415 }, { "epoch": 0.4426383244763989, "grad_norm": 0.2470703125, "learning_rate": 0.0001940919048664668, "loss": 1.7774, "step": 1416 }, { "epoch": 0.442950922163176, "grad_norm": 0.2119140625, "learning_rate": 0.00019408358165662866, "loss": 1.6328, "step": 1417 }, { "epoch": 0.4432635198499531, "grad_norm": 0.2265625, "learning_rate": 0.00019407525276684474, "loss": 1.7037, "step": 1418 }, { "epoch": 0.4435761175367302, "grad_norm": 0.2080078125, "learning_rate": 0.00019406691819761796, "loss": 1.81, "step": 1419 }, { "epoch": 0.44388871522350737, "grad_norm": 0.2421875, "learning_rate": 0.00019405857794945147, "loss": 1.8474, "step": 1420 }, { "epoch": 0.4442013129102845, "grad_norm": 0.2197265625, "learning_rate": 0.00019405023202284874, "loss": 1.6398, "step": 1421 }, { "epoch": 0.4445139105970616, "grad_norm": 0.22265625, "learning_rate": 0.0001940418804183137, "loss": 1.5592, "step": 1422 }, { "epoch": 0.4448265082838387, "grad_norm": 0.2099609375, "learning_rate": 0.00019403352313635046, "loss": 1.6566, "step": 1423 }, { "epoch": 0.44513910597061584, "grad_norm": 0.21484375, "learning_rate": 0.0001940251601774636, "loss": 1.6928, "step": 1424 }, { "epoch": 0.44545170365739295, "grad_norm": 0.2158203125, "learning_rate": 0.00019401679154215802, "loss": 2.029, "step": 1425 }, { "epoch": 0.44576430134417006, "grad_norm": 0.220703125, "learning_rate": 0.0001940084172309389, "loss": 1.9225, "step": 1426 }, { "epoch": 0.44607689903094716, "grad_norm": 0.2431640625, "learning_rate": 0.00019400003724431185, "loss": 1.9033, "step": 1427 }, { "epoch": 0.44638949671772427, "grad_norm": 0.2197265625, "learning_rate": 0.00019399165158278279, "loss": 1.9373, "step": 1428 }, { "epoch": 0.4467020944045014, "grad_norm": 0.2216796875, "learning_rate": 0.00019398326024685792, "loss": 1.8287, "step": 1429 }, { "epoch": 0.44701469209127853, "grad_norm": 0.2353515625, "learning_rate": 0.00019397486323704388, "loss": 1.4876, "step": 1430 }, { "epoch": 0.44732728977805564, "grad_norm": 0.2109375, "learning_rate": 0.0001939664605538476, "loss": 1.7532, "step": 1431 }, { "epoch": 0.44763988746483274, "grad_norm": 0.2216796875, "learning_rate": 0.0001939580521977763, "loss": 1.8811, "step": 1432 }, { "epoch": 0.4479524851516099, "grad_norm": 0.22265625, "learning_rate": 0.00019394963816933772, "loss": 1.8956, "step": 1433 }, { "epoch": 0.448265082838387, "grad_norm": 0.2236328125, "learning_rate": 0.00019394121846903975, "loss": 1.7634, "step": 1434 }, { "epoch": 0.4485776805251641, "grad_norm": 0.2158203125, "learning_rate": 0.0001939327930973907, "loss": 1.5284, "step": 1435 }, { "epoch": 0.4488902782119412, "grad_norm": 0.234375, "learning_rate": 0.00019392436205489924, "loss": 1.8581, "step": 1436 }, { "epoch": 0.4492028758987183, "grad_norm": 0.2216796875, "learning_rate": 0.00019391592534207436, "loss": 1.4981, "step": 1437 }, { "epoch": 0.4495154735854955, "grad_norm": 0.220703125, "learning_rate": 0.00019390748295942535, "loss": 1.6315, "step": 1438 }, { "epoch": 0.4498280712722726, "grad_norm": 0.216796875, "learning_rate": 0.00019389903490746194, "loss": 1.755, "step": 1439 }, { "epoch": 0.4501406689590497, "grad_norm": 0.2177734375, "learning_rate": 0.00019389058118669418, "loss": 1.6564, "step": 1440 }, { "epoch": 0.4504532666458268, "grad_norm": 0.232421875, "learning_rate": 0.00019388212179763235, "loss": 1.8079, "step": 1441 }, { "epoch": 0.45076586433260396, "grad_norm": 0.2197265625, "learning_rate": 0.0001938736567407872, "loss": 1.7621, "step": 1442 }, { "epoch": 0.45107846201938107, "grad_norm": 0.2216796875, "learning_rate": 0.00019386518601666977, "loss": 2.0246, "step": 1443 }, { "epoch": 0.45139105970615817, "grad_norm": 0.228515625, "learning_rate": 0.0001938567096257914, "loss": 1.7006, "step": 1444 }, { "epoch": 0.4517036573929353, "grad_norm": 0.23046875, "learning_rate": 0.00019384822756866394, "loss": 1.7433, "step": 1445 }, { "epoch": 0.45201625507971244, "grad_norm": 0.220703125, "learning_rate": 0.00019383973984579936, "loss": 1.6673, "step": 1446 }, { "epoch": 0.45232885276648954, "grad_norm": 0.20703125, "learning_rate": 0.00019383124645771008, "loss": 1.7402, "step": 1447 }, { "epoch": 0.45264145045326665, "grad_norm": 0.220703125, "learning_rate": 0.00019382274740490892, "loss": 1.7445, "step": 1448 }, { "epoch": 0.45295404814004375, "grad_norm": 0.2216796875, "learning_rate": 0.0001938142426879089, "loss": 1.752, "step": 1449 }, { "epoch": 0.45326664582682086, "grad_norm": 0.224609375, "learning_rate": 0.00019380573230722353, "loss": 1.7653, "step": 1450 }, { "epoch": 0.453579243513598, "grad_norm": 0.224609375, "learning_rate": 0.00019379721626336656, "loss": 1.4672, "step": 1451 }, { "epoch": 0.4538918412003751, "grad_norm": 0.224609375, "learning_rate": 0.0001937886945568521, "loss": 1.6907, "step": 1452 }, { "epoch": 0.45420443888715223, "grad_norm": 0.2265625, "learning_rate": 0.00019378016718819466, "loss": 1.7775, "step": 1453 }, { "epoch": 0.45451703657392933, "grad_norm": 0.2216796875, "learning_rate": 0.00019377163415790902, "loss": 1.913, "step": 1454 }, { "epoch": 0.4548296342607065, "grad_norm": 0.216796875, "learning_rate": 0.00019376309546651033, "loss": 1.8471, "step": 1455 }, { "epoch": 0.4551422319474836, "grad_norm": 0.228515625, "learning_rate": 0.00019375455111451405, "loss": 1.5682, "step": 1456 }, { "epoch": 0.4554548296342607, "grad_norm": 0.220703125, "learning_rate": 0.00019374600110243608, "loss": 1.7008, "step": 1457 }, { "epoch": 0.4557674273210378, "grad_norm": 0.21875, "learning_rate": 0.00019373744543079257, "loss": 1.7075, "step": 1458 }, { "epoch": 0.4560800250078149, "grad_norm": 0.2158203125, "learning_rate": 0.0001937288841001, "loss": 1.6143, "step": 1459 }, { "epoch": 0.4563926226945921, "grad_norm": 0.21484375, "learning_rate": 0.00019372031711087527, "loss": 1.6665, "step": 1460 }, { "epoch": 0.4567052203813692, "grad_norm": 0.2158203125, "learning_rate": 0.00019371174446363557, "loss": 1.6533, "step": 1461 }, { "epoch": 0.4570178180681463, "grad_norm": 0.2294921875, "learning_rate": 0.00019370316615889842, "loss": 1.5501, "step": 1462 }, { "epoch": 0.4573304157549234, "grad_norm": 0.232421875, "learning_rate": 0.00019369458219718175, "loss": 1.8101, "step": 1463 }, { "epoch": 0.45764301344170055, "grad_norm": 0.216796875, "learning_rate": 0.00019368599257900372, "loss": 1.6708, "step": 1464 }, { "epoch": 0.45795561112847766, "grad_norm": 0.2158203125, "learning_rate": 0.00019367739730488296, "loss": 1.6922, "step": 1465 }, { "epoch": 0.45826820881525476, "grad_norm": 0.220703125, "learning_rate": 0.00019366879637533834, "loss": 1.6808, "step": 1466 }, { "epoch": 0.45858080650203187, "grad_norm": 0.2216796875, "learning_rate": 0.00019366018979088913, "loss": 1.654, "step": 1467 }, { "epoch": 0.45889340418880903, "grad_norm": 0.2099609375, "learning_rate": 0.0001936515775520549, "loss": 1.7892, "step": 1468 }, { "epoch": 0.45920600187558613, "grad_norm": 0.2158203125, "learning_rate": 0.00019364295965935562, "loss": 1.6039, "step": 1469 }, { "epoch": 0.45951859956236324, "grad_norm": 0.2294921875, "learning_rate": 0.0001936343361133115, "loss": 1.6348, "step": 1470 }, { "epoch": 0.45983119724914034, "grad_norm": 0.2177734375, "learning_rate": 0.0001936257069144432, "loss": 2.0579, "step": 1471 }, { "epoch": 0.46014379493591745, "grad_norm": 0.2275390625, "learning_rate": 0.00019361707206327168, "loss": 1.5824, "step": 1472 }, { "epoch": 0.4604563926226946, "grad_norm": 0.2197265625, "learning_rate": 0.0001936084315603182, "loss": 1.6563, "step": 1473 }, { "epoch": 0.4607689903094717, "grad_norm": 0.21484375, "learning_rate": 0.0001935997854061044, "loss": 1.7782, "step": 1474 }, { "epoch": 0.4610815879962488, "grad_norm": 0.22265625, "learning_rate": 0.00019359113360115234, "loss": 1.7625, "step": 1475 }, { "epoch": 0.4613941856830259, "grad_norm": 0.2294921875, "learning_rate": 0.00019358247614598427, "loss": 1.5607, "step": 1476 }, { "epoch": 0.4617067833698031, "grad_norm": 0.2197265625, "learning_rate": 0.00019357381304112281, "loss": 1.6091, "step": 1477 }, { "epoch": 0.4620193810565802, "grad_norm": 0.21875, "learning_rate": 0.00019356514428709104, "loss": 1.5822, "step": 1478 }, { "epoch": 0.4623319787433573, "grad_norm": 0.21875, "learning_rate": 0.0001935564698844123, "loss": 1.8785, "step": 1479 }, { "epoch": 0.4626445764301344, "grad_norm": 0.2197265625, "learning_rate": 0.0001935477898336102, "loss": 1.4933, "step": 1480 }, { "epoch": 0.4629571741169115, "grad_norm": 0.341796875, "learning_rate": 0.00019353910413520887, "loss": 2.2543, "step": 1481 }, { "epoch": 0.46326977180368867, "grad_norm": 0.2060546875, "learning_rate": 0.0001935304127897326, "loss": 1.6022, "step": 1482 }, { "epoch": 0.4635823694904658, "grad_norm": 0.224609375, "learning_rate": 0.00019352171579770615, "loss": 1.9542, "step": 1483 }, { "epoch": 0.4638949671772429, "grad_norm": 0.2158203125, "learning_rate": 0.00019351301315965452, "loss": 1.5863, "step": 1484 }, { "epoch": 0.46420756486402, "grad_norm": 0.2216796875, "learning_rate": 0.00019350430487610312, "loss": 1.9259, "step": 1485 }, { "epoch": 0.46452016255079714, "grad_norm": 0.2197265625, "learning_rate": 0.0001934955909475777, "loss": 1.9044, "step": 1486 }, { "epoch": 0.46483276023757425, "grad_norm": 0.2255859375, "learning_rate": 0.00019348687137460432, "loss": 1.829, "step": 1487 }, { "epoch": 0.46514535792435135, "grad_norm": 0.2294921875, "learning_rate": 0.00019347814615770933, "loss": 1.5524, "step": 1488 }, { "epoch": 0.46545795561112846, "grad_norm": 0.2236328125, "learning_rate": 0.00019346941529741954, "loss": 1.683, "step": 1489 }, { "epoch": 0.4657705532979056, "grad_norm": 0.220703125, "learning_rate": 0.0001934606787942621, "loss": 1.8919, "step": 1490 }, { "epoch": 0.4660831509846827, "grad_norm": 0.2275390625, "learning_rate": 0.00019345193664876433, "loss": 1.7553, "step": 1491 }, { "epoch": 0.46639574867145983, "grad_norm": 0.21875, "learning_rate": 0.0001934431888614541, "loss": 1.9543, "step": 1492 }, { "epoch": 0.46670834635823694, "grad_norm": 0.2197265625, "learning_rate": 0.00019343443543285945, "loss": 1.6919, "step": 1493 }, { "epoch": 0.46702094404501404, "grad_norm": 0.2412109375, "learning_rate": 0.00019342567636350887, "loss": 1.6121, "step": 1494 }, { "epoch": 0.4673335417317912, "grad_norm": 0.2255859375, "learning_rate": 0.00019341691165393116, "loss": 1.5772, "step": 1495 }, { "epoch": 0.4676461394185683, "grad_norm": 0.2216796875, "learning_rate": 0.00019340814130465548, "loss": 1.9449, "step": 1496 }, { "epoch": 0.4679587371053454, "grad_norm": 0.224609375, "learning_rate": 0.00019339936531621122, "loss": 1.7063, "step": 1497 }, { "epoch": 0.4682713347921225, "grad_norm": 0.212890625, "learning_rate": 0.0001933905836891283, "loss": 1.7768, "step": 1498 }, { "epoch": 0.4685839324788997, "grad_norm": 0.21875, "learning_rate": 0.00019338179642393685, "loss": 1.7279, "step": 1499 }, { "epoch": 0.4688965301656768, "grad_norm": 0.2197265625, "learning_rate": 0.0001933730035211673, "loss": 1.7344, "step": 1500 }, { "epoch": 0.4692091278524539, "grad_norm": 0.21484375, "learning_rate": 0.00019336420498135057, "loss": 1.6349, "step": 1501 }, { "epoch": 0.469521725539231, "grad_norm": 0.2255859375, "learning_rate": 0.0001933554008050178, "loss": 1.703, "step": 1502 }, { "epoch": 0.46983432322600815, "grad_norm": 0.21875, "learning_rate": 0.00019334659099270053, "loss": 1.6039, "step": 1503 }, { "epoch": 0.47014692091278526, "grad_norm": 0.2236328125, "learning_rate": 0.0001933377755449306, "loss": 1.7018, "step": 1504 }, { "epoch": 0.47045951859956237, "grad_norm": 0.22265625, "learning_rate": 0.00019332895446224022, "loss": 1.5957, "step": 1505 }, { "epoch": 0.47077211628633947, "grad_norm": 0.2265625, "learning_rate": 0.00019332012774516191, "loss": 1.6054, "step": 1506 }, { "epoch": 0.4710847139731166, "grad_norm": 0.216796875, "learning_rate": 0.0001933112953942286, "loss": 1.6822, "step": 1507 }, { "epoch": 0.47139731165989374, "grad_norm": 0.2216796875, "learning_rate": 0.00019330245740997346, "loss": 1.6045, "step": 1508 }, { "epoch": 0.47170990934667084, "grad_norm": 0.2294921875, "learning_rate": 0.00019329361379293006, "loss": 1.6817, "step": 1509 }, { "epoch": 0.47202250703344795, "grad_norm": 0.26953125, "learning_rate": 0.00019328476454363237, "loss": 1.6334, "step": 1510 }, { "epoch": 0.47233510472022505, "grad_norm": 0.236328125, "learning_rate": 0.00019327590966261452, "loss": 1.9416, "step": 1511 }, { "epoch": 0.4726477024070022, "grad_norm": 0.21484375, "learning_rate": 0.00019326704915041115, "loss": 1.8148, "step": 1512 }, { "epoch": 0.4729603000937793, "grad_norm": 0.2158203125, "learning_rate": 0.0001932581830075572, "loss": 1.6804, "step": 1513 }, { "epoch": 0.4732728977805564, "grad_norm": 0.224609375, "learning_rate": 0.00019324931123458784, "loss": 1.6578, "step": 1514 }, { "epoch": 0.47358549546733353, "grad_norm": 0.232421875, "learning_rate": 0.00019324043383203875, "loss": 1.7513, "step": 1515 }, { "epoch": 0.47389809315411063, "grad_norm": 0.2060546875, "learning_rate": 0.00019323155080044587, "loss": 1.8009, "step": 1516 }, { "epoch": 0.4742106908408878, "grad_norm": 0.23828125, "learning_rate": 0.00019322266214034546, "loss": 1.5399, "step": 1517 }, { "epoch": 0.4745232885276649, "grad_norm": 0.2216796875, "learning_rate": 0.00019321376785227416, "loss": 1.6751, "step": 1518 }, { "epoch": 0.474835886214442, "grad_norm": 0.2236328125, "learning_rate": 0.00019320486793676889, "loss": 1.5572, "step": 1519 }, { "epoch": 0.4751484839012191, "grad_norm": 0.228515625, "learning_rate": 0.00019319596239436698, "loss": 1.6178, "step": 1520 }, { "epoch": 0.47546108158799627, "grad_norm": 0.2294921875, "learning_rate": 0.00019318705122560602, "loss": 1.5581, "step": 1521 }, { "epoch": 0.4757736792747734, "grad_norm": 0.2265625, "learning_rate": 0.00019317813443102408, "loss": 1.6904, "step": 1522 }, { "epoch": 0.4760862769615505, "grad_norm": 0.2373046875, "learning_rate": 0.0001931692120111594, "loss": 1.9162, "step": 1523 }, { "epoch": 0.4763988746483276, "grad_norm": 0.2265625, "learning_rate": 0.0001931602839665507, "loss": 1.6703, "step": 1524 }, { "epoch": 0.47671147233510475, "grad_norm": 0.2216796875, "learning_rate": 0.0001931513502977369, "loss": 1.6865, "step": 1525 }, { "epoch": 0.47702407002188185, "grad_norm": 0.2177734375, "learning_rate": 0.00019314241100525738, "loss": 1.7221, "step": 1526 }, { "epoch": 0.47733666770865896, "grad_norm": 0.2216796875, "learning_rate": 0.00019313346608965183, "loss": 1.6306, "step": 1527 }, { "epoch": 0.47764926539543606, "grad_norm": 0.224609375, "learning_rate": 0.00019312451555146022, "loss": 2.0435, "step": 1528 }, { "epoch": 0.47796186308221317, "grad_norm": 0.2265625, "learning_rate": 0.00019311555939122298, "loss": 1.4892, "step": 1529 }, { "epoch": 0.47827446076899033, "grad_norm": 0.2236328125, "learning_rate": 0.00019310659760948075, "loss": 1.7291, "step": 1530 }, { "epoch": 0.47858705845576743, "grad_norm": 0.2119140625, "learning_rate": 0.00019309763020677458, "loss": 1.7014, "step": 1531 }, { "epoch": 0.47889965614254454, "grad_norm": 0.23828125, "learning_rate": 0.00019308865718364583, "loss": 2.0065, "step": 1532 }, { "epoch": 0.47921225382932164, "grad_norm": 0.23046875, "learning_rate": 0.00019307967854063622, "loss": 1.5883, "step": 1533 }, { "epoch": 0.4795248515160988, "grad_norm": 0.236328125, "learning_rate": 0.0001930706942782878, "loss": 1.7971, "step": 1534 }, { "epoch": 0.4798374492028759, "grad_norm": 0.224609375, "learning_rate": 0.00019306170439714298, "loss": 1.6701, "step": 1535 }, { "epoch": 0.480150046889653, "grad_norm": 0.2275390625, "learning_rate": 0.00019305270889774444, "loss": 1.611, "step": 1536 }, { "epoch": 0.4804626445764301, "grad_norm": 0.240234375, "learning_rate": 0.00019304370778063534, "loss": 1.8515, "step": 1537 }, { "epoch": 0.4807752422632072, "grad_norm": 0.2216796875, "learning_rate": 0.00019303470104635898, "loss": 1.64, "step": 1538 }, { "epoch": 0.4810878399499844, "grad_norm": 0.228515625, "learning_rate": 0.0001930256886954592, "loss": 1.7283, "step": 1539 }, { "epoch": 0.4814004376367615, "grad_norm": 0.244140625, "learning_rate": 0.00019301667072848004, "loss": 1.8076, "step": 1540 }, { "epoch": 0.4817130353235386, "grad_norm": 0.2294921875, "learning_rate": 0.00019300764714596594, "loss": 1.9384, "step": 1541 }, { "epoch": 0.4820256330103157, "grad_norm": 0.2294921875, "learning_rate": 0.00019299861794846166, "loss": 1.8492, "step": 1542 }, { "epoch": 0.48233823069709286, "grad_norm": 0.21875, "learning_rate": 0.00019298958313651227, "loss": 1.744, "step": 1543 }, { "epoch": 0.48265082838386997, "grad_norm": 0.220703125, "learning_rate": 0.0001929805427106633, "loss": 1.7691, "step": 1544 }, { "epoch": 0.4829634260706471, "grad_norm": 0.2216796875, "learning_rate": 0.00019297149667146045, "loss": 1.6095, "step": 1545 }, { "epoch": 0.4832760237574242, "grad_norm": 0.2099609375, "learning_rate": 0.0001929624450194499, "loss": 1.8153, "step": 1546 }, { "epoch": 0.48358862144420134, "grad_norm": 0.2236328125, "learning_rate": 0.00019295338775517803, "loss": 1.8315, "step": 1547 }, { "epoch": 0.48390121913097844, "grad_norm": 0.208984375, "learning_rate": 0.00019294432487919173, "loss": 1.6651, "step": 1548 }, { "epoch": 0.48421381681775555, "grad_norm": 0.20703125, "learning_rate": 0.0001929352563920381, "loss": 1.632, "step": 1549 }, { "epoch": 0.48452641450453265, "grad_norm": 0.2080078125, "learning_rate": 0.0001929261822942646, "loss": 1.5682, "step": 1550 }, { "epoch": 0.48483901219130976, "grad_norm": 0.220703125, "learning_rate": 0.00019291710258641907, "loss": 1.7631, "step": 1551 }, { "epoch": 0.4851516098780869, "grad_norm": 0.212890625, "learning_rate": 0.00019290801726904962, "loss": 1.6418, "step": 1552 }, { "epoch": 0.485464207564864, "grad_norm": 0.2255859375, "learning_rate": 0.0001928989263427048, "loss": 1.4744, "step": 1553 }, { "epoch": 0.48577680525164113, "grad_norm": 0.2080078125, "learning_rate": 0.0001928898298079334, "loss": 1.7507, "step": 1554 }, { "epoch": 0.48608940293841824, "grad_norm": 0.2275390625, "learning_rate": 0.00019288072766528462, "loss": 1.5483, "step": 1555 }, { "epoch": 0.4864020006251954, "grad_norm": 0.228515625, "learning_rate": 0.00019287161991530792, "loss": 1.7318, "step": 1556 }, { "epoch": 0.4867145983119725, "grad_norm": 0.228515625, "learning_rate": 0.0001928625065585532, "loss": 1.8483, "step": 1557 }, { "epoch": 0.4870271959987496, "grad_norm": 0.21875, "learning_rate": 0.00019285338759557065, "loss": 1.6431, "step": 1558 }, { "epoch": 0.4873397936855267, "grad_norm": 0.2294921875, "learning_rate": 0.00019284426302691073, "loss": 1.6648, "step": 1559 }, { "epoch": 0.4876523913723038, "grad_norm": 0.21484375, "learning_rate": 0.00019283513285312437, "loss": 1.5061, "step": 1560 }, { "epoch": 0.487964989059081, "grad_norm": 0.310546875, "learning_rate": 0.0001928259970747627, "loss": 2.72, "step": 1561 }, { "epoch": 0.4882775867458581, "grad_norm": 0.2314453125, "learning_rate": 0.00019281685569237734, "loss": 1.6893, "step": 1562 }, { "epoch": 0.4885901844326352, "grad_norm": 0.216796875, "learning_rate": 0.0001928077087065201, "loss": 1.6951, "step": 1563 }, { "epoch": 0.4889027821194123, "grad_norm": 0.220703125, "learning_rate": 0.0001927985561177432, "loss": 1.7366, "step": 1564 }, { "epoch": 0.48921537980618945, "grad_norm": 0.2060546875, "learning_rate": 0.00019278939792659924, "loss": 1.7637, "step": 1565 }, { "epoch": 0.48952797749296656, "grad_norm": 0.212890625, "learning_rate": 0.00019278023413364106, "loss": 1.5522, "step": 1566 }, { "epoch": 0.48984057517974366, "grad_norm": 0.2373046875, "learning_rate": 0.00019277106473942194, "loss": 1.8184, "step": 1567 }, { "epoch": 0.49015317286652077, "grad_norm": 0.2255859375, "learning_rate": 0.00019276188974449543, "loss": 1.5573, "step": 1568 }, { "epoch": 0.49046577055329793, "grad_norm": 0.2255859375, "learning_rate": 0.00019275270914941538, "loss": 1.5074, "step": 1569 }, { "epoch": 0.49077836824007504, "grad_norm": 0.2275390625, "learning_rate": 0.00019274352295473612, "loss": 1.9685, "step": 1570 }, { "epoch": 0.49109096592685214, "grad_norm": 0.23046875, "learning_rate": 0.00019273433116101217, "loss": 1.8918, "step": 1571 }, { "epoch": 0.49140356361362925, "grad_norm": 0.240234375, "learning_rate": 0.00019272513376879854, "loss": 1.8173, "step": 1572 }, { "epoch": 0.49171616130040635, "grad_norm": 0.220703125, "learning_rate": 0.00019271593077865035, "loss": 1.7093, "step": 1573 }, { "epoch": 0.4920287589871835, "grad_norm": 0.2255859375, "learning_rate": 0.00019270672219112332, "loss": 1.7993, "step": 1574 }, { "epoch": 0.4923413566739606, "grad_norm": 0.2080078125, "learning_rate": 0.00019269750800677331, "loss": 1.7468, "step": 1575 }, { "epoch": 0.4926539543607377, "grad_norm": 0.208984375, "learning_rate": 0.00019268828822615661, "loss": 1.4455, "step": 1576 }, { "epoch": 0.4929665520475148, "grad_norm": 0.2392578125, "learning_rate": 0.00019267906284982985, "loss": 1.9409, "step": 1577 }, { "epoch": 0.493279149734292, "grad_norm": 0.220703125, "learning_rate": 0.00019266983187834995, "loss": 1.8848, "step": 1578 }, { "epoch": 0.4935917474210691, "grad_norm": 0.220703125, "learning_rate": 0.0001926605953122742, "loss": 1.5927, "step": 1579 }, { "epoch": 0.4939043451078462, "grad_norm": 0.2216796875, "learning_rate": 0.00019265135315216028, "loss": 1.7506, "step": 1580 }, { "epoch": 0.4942169427946233, "grad_norm": 0.22265625, "learning_rate": 0.00019264210539856607, "loss": 1.7024, "step": 1581 }, { "epoch": 0.49452954048140046, "grad_norm": 0.228515625, "learning_rate": 0.0001926328520520499, "loss": 1.8899, "step": 1582 }, { "epoch": 0.49484213816817757, "grad_norm": 0.2138671875, "learning_rate": 0.0001926235931131704, "loss": 1.7209, "step": 1583 }, { "epoch": 0.4951547358549547, "grad_norm": 0.22265625, "learning_rate": 0.00019261432858248657, "loss": 1.582, "step": 1584 }, { "epoch": 0.4954673335417318, "grad_norm": 0.216796875, "learning_rate": 0.0001926050584605577, "loss": 1.7583, "step": 1585 }, { "epoch": 0.4957799312285089, "grad_norm": 0.2353515625, "learning_rate": 0.00019259578274794344, "loss": 1.7366, "step": 1586 }, { "epoch": 0.49609252891528605, "grad_norm": 0.2294921875, "learning_rate": 0.0001925865014452038, "loss": 1.7721, "step": 1587 }, { "epoch": 0.49640512660206315, "grad_norm": 0.2236328125, "learning_rate": 0.00019257721455289906, "loss": 1.9818, "step": 1588 }, { "epoch": 0.49671772428884026, "grad_norm": 0.220703125, "learning_rate": 0.00019256792207158991, "loss": 1.719, "step": 1589 }, { "epoch": 0.49703032197561736, "grad_norm": 0.220703125, "learning_rate": 0.00019255862400183733, "loss": 1.7085, "step": 1590 }, { "epoch": 0.4973429196623945, "grad_norm": 0.2197265625, "learning_rate": 0.00019254932034420266, "loss": 1.5593, "step": 1591 }, { "epoch": 0.4976555173491716, "grad_norm": 0.220703125, "learning_rate": 0.00019254001109924763, "loss": 1.6743, "step": 1592 }, { "epoch": 0.49796811503594873, "grad_norm": 0.220703125, "learning_rate": 0.0001925306962675342, "loss": 1.5977, "step": 1593 }, { "epoch": 0.49828071272272584, "grad_norm": 0.216796875, "learning_rate": 0.00019252137584962472, "loss": 1.6007, "step": 1594 }, { "epoch": 0.49859331040950294, "grad_norm": 0.2177734375, "learning_rate": 0.00019251204984608184, "loss": 1.5078, "step": 1595 }, { "epoch": 0.4989059080962801, "grad_norm": 0.2216796875, "learning_rate": 0.00019250271825746866, "loss": 1.9624, "step": 1596 }, { "epoch": 0.4992185057830572, "grad_norm": 0.21484375, "learning_rate": 0.0001924933810843485, "loss": 1.6749, "step": 1597 }, { "epoch": 0.4995311034698343, "grad_norm": 0.23046875, "learning_rate": 0.00019248403832728504, "loss": 1.7965, "step": 1598 }, { "epoch": 0.4998437011566114, "grad_norm": 0.2255859375, "learning_rate": 0.00019247468998684233, "loss": 1.7333, "step": 1599 }, { "epoch": 0.5001562988433885, "grad_norm": 0.21484375, "learning_rate": 0.00019246533606358476, "loss": 1.9014, "step": 1600 }, { "epoch": 0.5001562988433885, "eval_loss": 1.6468836069107056, "eval_runtime": 1904.4552, "eval_samples_per_second": 4.798, "eval_steps_per_second": 2.399, "step": 1600 }, { "epoch": 0.5004688965301657, "grad_norm": 0.2060546875, "learning_rate": 0.000192455976558077, "loss": 1.8399, "step": 1601 }, { "epoch": 0.5007814942169428, "grad_norm": 0.22265625, "learning_rate": 0.00019244661147088413, "loss": 1.7516, "step": 1602 }, { "epoch": 0.5010940919037199, "grad_norm": 0.2236328125, "learning_rate": 0.00019243724080257154, "loss": 1.6023, "step": 1603 }, { "epoch": 0.5014066895904971, "grad_norm": 0.240234375, "learning_rate": 0.0001924278645537049, "loss": 1.8678, "step": 1604 }, { "epoch": 0.5017192872772741, "grad_norm": 0.2138671875, "learning_rate": 0.0001924184827248503, "loss": 1.8877, "step": 1605 }, { "epoch": 0.5020318849640513, "grad_norm": 0.2197265625, "learning_rate": 0.00019240909531657415, "loss": 1.7109, "step": 1606 }, { "epoch": 0.5023444826508284, "grad_norm": 0.21484375, "learning_rate": 0.00019239970232944314, "loss": 1.9394, "step": 1607 }, { "epoch": 0.5026570803376055, "grad_norm": 0.2265625, "learning_rate": 0.00019239030376402437, "loss": 1.6907, "step": 1608 }, { "epoch": 0.5029696780243826, "grad_norm": 0.21875, "learning_rate": 0.00019238089962088522, "loss": 1.3726, "step": 1609 }, { "epoch": 0.5032822757111597, "grad_norm": 0.2197265625, "learning_rate": 0.00019237148990059342, "loss": 1.4186, "step": 1610 }, { "epoch": 0.5035948733979368, "grad_norm": 0.232421875, "learning_rate": 0.00019236207460371707, "loss": 1.8961, "step": 1611 }, { "epoch": 0.503907471084714, "grad_norm": 0.2216796875, "learning_rate": 0.0001923526537308246, "loss": 1.5122, "step": 1612 }, { "epoch": 0.5042200687714911, "grad_norm": 0.2099609375, "learning_rate": 0.00019234322728248473, "loss": 1.6718, "step": 1613 }, { "epoch": 0.5045326664582682, "grad_norm": 0.2177734375, "learning_rate": 0.00019233379525926652, "loss": 1.5157, "step": 1614 }, { "epoch": 0.5048452641450454, "grad_norm": 0.2197265625, "learning_rate": 0.00019232435766173946, "loss": 1.8013, "step": 1615 }, { "epoch": 0.5051578618318224, "grad_norm": 0.2138671875, "learning_rate": 0.00019231491449047327, "loss": 1.6126, "step": 1616 }, { "epoch": 0.5054704595185996, "grad_norm": 0.2314453125, "learning_rate": 0.00019230546574603805, "loss": 1.9199, "step": 1617 }, { "epoch": 0.5057830572053766, "grad_norm": 0.216796875, "learning_rate": 0.00019229601142900426, "loss": 1.8629, "step": 1618 }, { "epoch": 0.5060956548921538, "grad_norm": 0.2177734375, "learning_rate": 0.0001922865515399426, "loss": 1.9572, "step": 1619 }, { "epoch": 0.506408252578931, "grad_norm": 0.212890625, "learning_rate": 0.0001922770860794243, "loss": 1.8666, "step": 1620 }, { "epoch": 0.506720850265708, "grad_norm": 0.2158203125, "learning_rate": 0.00019226761504802066, "loss": 1.6269, "step": 1621 }, { "epoch": 0.5070334479524852, "grad_norm": 0.212890625, "learning_rate": 0.00019225813844630355, "loss": 1.4542, "step": 1622 }, { "epoch": 0.5073460456392622, "grad_norm": 0.232421875, "learning_rate": 0.00019224865627484502, "loss": 1.726, "step": 1623 }, { "epoch": 0.5076586433260394, "grad_norm": 0.2197265625, "learning_rate": 0.00019223916853421756, "loss": 1.9227, "step": 1624 }, { "epoch": 0.5079712410128165, "grad_norm": 0.232421875, "learning_rate": 0.000192229675224994, "loss": 1.7876, "step": 1625 }, { "epoch": 0.5082838386995936, "grad_norm": 0.2119140625, "learning_rate": 0.0001922201763477474, "loss": 1.9213, "step": 1626 }, { "epoch": 0.5085964363863708, "grad_norm": 0.2294921875, "learning_rate": 0.00019221067190305121, "loss": 1.8536, "step": 1627 }, { "epoch": 0.5089090340731478, "grad_norm": 0.236328125, "learning_rate": 0.00019220116189147928, "loss": 1.7391, "step": 1628 }, { "epoch": 0.509221631759925, "grad_norm": 0.22265625, "learning_rate": 0.00019219164631360572, "loss": 1.5871, "step": 1629 }, { "epoch": 0.5095342294467021, "grad_norm": 0.236328125, "learning_rate": 0.00019218212517000497, "loss": 1.7358, "step": 1630 }, { "epoch": 0.5098468271334792, "grad_norm": 0.2197265625, "learning_rate": 0.00019217259846125186, "loss": 1.7538, "step": 1631 }, { "epoch": 0.5101594248202563, "grad_norm": 0.228515625, "learning_rate": 0.00019216306618792151, "loss": 2.0148, "step": 1632 }, { "epoch": 0.5104720225070335, "grad_norm": 0.2216796875, "learning_rate": 0.00019215352835058944, "loss": 1.655, "step": 1633 }, { "epoch": 0.5107846201938105, "grad_norm": 0.2353515625, "learning_rate": 0.0001921439849498314, "loss": 1.8552, "step": 1634 }, { "epoch": 0.5110972178805877, "grad_norm": 0.2333984375, "learning_rate": 0.0001921344359862236, "loss": 2.0283, "step": 1635 }, { "epoch": 0.5114098155673648, "grad_norm": 0.2158203125, "learning_rate": 0.00019212488146034247, "loss": 1.8859, "step": 1636 }, { "epoch": 0.5117224132541419, "grad_norm": 0.2041015625, "learning_rate": 0.00019211532137276485, "loss": 1.7173, "step": 1637 }, { "epoch": 0.5120350109409191, "grad_norm": 0.2119140625, "learning_rate": 0.0001921057557240679, "loss": 1.6262, "step": 1638 }, { "epoch": 0.5123476086276961, "grad_norm": 0.234375, "learning_rate": 0.00019209618451482911, "loss": 1.6141, "step": 1639 }, { "epoch": 0.5126602063144733, "grad_norm": 0.224609375, "learning_rate": 0.0001920866077456263, "loss": 1.7475, "step": 1640 }, { "epoch": 0.5129728040012503, "grad_norm": 0.23828125, "learning_rate": 0.0001920770254170376, "loss": 1.7333, "step": 1641 }, { "epoch": 0.5132854016880275, "grad_norm": 0.2314453125, "learning_rate": 0.0001920674375296416, "loss": 1.8058, "step": 1642 }, { "epoch": 0.5135979993748047, "grad_norm": 0.216796875, "learning_rate": 0.00019205784408401705, "loss": 1.5659, "step": 1643 }, { "epoch": 0.5139105970615817, "grad_norm": 0.2421875, "learning_rate": 0.00019204824508074314, "loss": 1.6922, "step": 1644 }, { "epoch": 0.5142231947483589, "grad_norm": 0.20703125, "learning_rate": 0.00019203864052039937, "loss": 1.5329, "step": 1645 }, { "epoch": 0.514535792435136, "grad_norm": 0.2265625, "learning_rate": 0.00019202903040356557, "loss": 1.5799, "step": 1646 }, { "epoch": 0.5148483901219131, "grad_norm": 0.216796875, "learning_rate": 0.00019201941473082196, "loss": 1.7131, "step": 1647 }, { "epoch": 0.5151609878086902, "grad_norm": 0.2236328125, "learning_rate": 0.00019200979350274898, "loss": 1.668, "step": 1648 }, { "epoch": 0.5154735854954673, "grad_norm": 0.22265625, "learning_rate": 0.00019200016671992755, "loss": 1.8212, "step": 1649 }, { "epoch": 0.5157861831822445, "grad_norm": 0.2353515625, "learning_rate": 0.00019199053438293884, "loss": 1.745, "step": 1650 }, { "epoch": 0.5160987808690216, "grad_norm": 0.224609375, "learning_rate": 0.0001919808964923643, "loss": 1.9392, "step": 1651 }, { "epoch": 0.5164113785557987, "grad_norm": 0.2255859375, "learning_rate": 0.00019197125304878587, "loss": 1.8001, "step": 1652 }, { "epoch": 0.5167239762425758, "grad_norm": 0.2119140625, "learning_rate": 0.00019196160405278567, "loss": 1.6449, "step": 1653 }, { "epoch": 0.5170365739293529, "grad_norm": 0.2177734375, "learning_rate": 0.00019195194950494623, "loss": 1.7974, "step": 1654 }, { "epoch": 0.51734917161613, "grad_norm": 0.2333984375, "learning_rate": 0.00019194228940585043, "loss": 1.6213, "step": 1655 }, { "epoch": 0.5176617693029072, "grad_norm": 0.2216796875, "learning_rate": 0.0001919326237560815, "loss": 1.7459, "step": 1656 }, { "epoch": 0.5179743669896842, "grad_norm": 0.23046875, "learning_rate": 0.00019192295255622286, "loss": 2.0187, "step": 1657 }, { "epoch": 0.5182869646764614, "grad_norm": 0.22265625, "learning_rate": 0.00019191327580685846, "loss": 1.465, "step": 1658 }, { "epoch": 0.5185995623632386, "grad_norm": 0.21875, "learning_rate": 0.0001919035935085725, "loss": 1.7626, "step": 1659 }, { "epoch": 0.5189121600500156, "grad_norm": 0.228515625, "learning_rate": 0.00019189390566194943, "loss": 1.6333, "step": 1660 }, { "epoch": 0.5192247577367928, "grad_norm": 0.2236328125, "learning_rate": 0.00019188421226757423, "loss": 1.6854, "step": 1661 }, { "epoch": 0.5195373554235698, "grad_norm": 0.2177734375, "learning_rate": 0.00019187451332603202, "loss": 1.5598, "step": 1662 }, { "epoch": 0.519849953110347, "grad_norm": 0.224609375, "learning_rate": 0.00019186480883790836, "loss": 1.7953, "step": 1663 }, { "epoch": 0.5201625507971241, "grad_norm": 0.22265625, "learning_rate": 0.00019185509880378912, "loss": 1.7901, "step": 1664 }, { "epoch": 0.5204751484839012, "grad_norm": 0.23046875, "learning_rate": 0.00019184538322426054, "loss": 1.6819, "step": 1665 }, { "epoch": 0.5207877461706784, "grad_norm": 0.2236328125, "learning_rate": 0.00019183566209990911, "loss": 1.8034, "step": 1666 }, { "epoch": 0.5211003438574554, "grad_norm": 0.2255859375, "learning_rate": 0.00019182593543132174, "loss": 2.0384, "step": 1667 }, { "epoch": 0.5214129415442326, "grad_norm": 0.2080078125, "learning_rate": 0.00019181620321908564, "loss": 1.9369, "step": 1668 }, { "epoch": 0.5217255392310097, "grad_norm": 0.2333984375, "learning_rate": 0.00019180646546378832, "loss": 1.8764, "step": 1669 }, { "epoch": 0.5220381369177868, "grad_norm": 0.220703125, "learning_rate": 0.00019179672216601773, "loss": 1.6419, "step": 1670 }, { "epoch": 0.5223507346045639, "grad_norm": 0.408203125, "learning_rate": 0.00019178697332636202, "loss": 2.427, "step": 1671 }, { "epoch": 0.5226633322913411, "grad_norm": 0.2314453125, "learning_rate": 0.00019177721894540975, "loss": 1.81, "step": 1672 }, { "epoch": 0.5229759299781181, "grad_norm": 0.216796875, "learning_rate": 0.0001917674590237499, "loss": 1.67, "step": 1673 }, { "epoch": 0.5232885276648953, "grad_norm": 0.2255859375, "learning_rate": 0.00019175769356197153, "loss": 1.6198, "step": 1674 }, { "epoch": 0.5236011253516724, "grad_norm": 0.2314453125, "learning_rate": 0.0001917479225606643, "loss": 1.8033, "step": 1675 }, { "epoch": 0.5239137230384495, "grad_norm": 0.220703125, "learning_rate": 0.00019173814602041803, "loss": 1.6005, "step": 1676 }, { "epoch": 0.5242263207252267, "grad_norm": 0.22265625, "learning_rate": 0.00019172836394182303, "loss": 1.6983, "step": 1677 }, { "epoch": 0.5245389184120037, "grad_norm": 0.2216796875, "learning_rate": 0.00019171857632546978, "loss": 1.8186, "step": 1678 }, { "epoch": 0.5248515160987809, "grad_norm": 0.220703125, "learning_rate": 0.00019170878317194924, "loss": 1.6052, "step": 1679 }, { "epoch": 0.5251641137855579, "grad_norm": 0.23828125, "learning_rate": 0.00019169898448185256, "loss": 1.7156, "step": 1680 }, { "epoch": 0.5254767114723351, "grad_norm": 0.2177734375, "learning_rate": 0.00019168918025577134, "loss": 1.7039, "step": 1681 }, { "epoch": 0.5257893091591123, "grad_norm": 0.2275390625, "learning_rate": 0.00019167937049429745, "loss": 1.8326, "step": 1682 }, { "epoch": 0.5261019068458893, "grad_norm": 0.228515625, "learning_rate": 0.00019166955519802316, "loss": 1.6872, "step": 1683 }, { "epoch": 0.5264145045326665, "grad_norm": 0.2138671875, "learning_rate": 0.00019165973436754098, "loss": 1.6172, "step": 1684 }, { "epoch": 0.5267271022194435, "grad_norm": 0.2255859375, "learning_rate": 0.00019164990800344387, "loss": 1.7482, "step": 1685 }, { "epoch": 0.5270396999062207, "grad_norm": 0.224609375, "learning_rate": 0.000191640076106325, "loss": 1.6177, "step": 1686 }, { "epoch": 0.5273522975929978, "grad_norm": 0.2333984375, "learning_rate": 0.00019163023867677797, "loss": 1.6793, "step": 1687 }, { "epoch": 0.5276648952797749, "grad_norm": 0.2275390625, "learning_rate": 0.00019162039571539666, "loss": 1.6634, "step": 1688 }, { "epoch": 0.527977492966552, "grad_norm": 0.2197265625, "learning_rate": 0.0001916105472227753, "loss": 1.7808, "step": 1689 }, { "epoch": 0.5282900906533292, "grad_norm": 0.228515625, "learning_rate": 0.00019160069319950845, "loss": 1.7203, "step": 1690 }, { "epoch": 0.5286026883401063, "grad_norm": 0.23046875, "learning_rate": 0.00019159083364619103, "loss": 1.6893, "step": 1691 }, { "epoch": 0.5289152860268834, "grad_norm": 0.349609375, "learning_rate": 0.0001915809685634183, "loss": 2.3232, "step": 1692 }, { "epoch": 0.5292278837136605, "grad_norm": 0.2138671875, "learning_rate": 0.0001915710979517858, "loss": 1.554, "step": 1693 }, { "epoch": 0.5295404814004376, "grad_norm": 0.234375, "learning_rate": 0.0001915612218118894, "loss": 1.6621, "step": 1694 }, { "epoch": 0.5298530790872148, "grad_norm": 0.2197265625, "learning_rate": 0.00019155134014432534, "loss": 1.8881, "step": 1695 }, { "epoch": 0.5301656767739918, "grad_norm": 0.22265625, "learning_rate": 0.00019154145294969022, "loss": 1.8313, "step": 1696 }, { "epoch": 0.530478274460769, "grad_norm": 0.2158203125, "learning_rate": 0.00019153156022858094, "loss": 1.7908, "step": 1697 }, { "epoch": 0.5307908721475461, "grad_norm": 0.224609375, "learning_rate": 0.00019152166198159476, "loss": 1.6425, "step": 1698 }, { "epoch": 0.5311034698343232, "grad_norm": 0.21484375, "learning_rate": 0.00019151175820932917, "loss": 1.7114, "step": 1699 }, { "epoch": 0.5314160675211004, "grad_norm": 0.2109375, "learning_rate": 0.00019150184891238216, "loss": 1.5121, "step": 1700 }, { "epoch": 0.5317286652078774, "grad_norm": 0.2353515625, "learning_rate": 0.00019149193409135192, "loss": 1.7762, "step": 1701 }, { "epoch": 0.5320412628946546, "grad_norm": 0.2216796875, "learning_rate": 0.00019148201374683704, "loss": 1.8021, "step": 1702 }, { "epoch": 0.5323538605814317, "grad_norm": 0.2392578125, "learning_rate": 0.00019147208787943638, "loss": 1.8559, "step": 1703 }, { "epoch": 0.5326664582682088, "grad_norm": 0.33984375, "learning_rate": 0.00019146215648974924, "loss": 2.3382, "step": 1704 }, { "epoch": 0.532979055954986, "grad_norm": 0.23046875, "learning_rate": 0.00019145221957837515, "loss": 1.6269, "step": 1705 }, { "epoch": 0.533291653641763, "grad_norm": 0.2197265625, "learning_rate": 0.00019144227714591402, "loss": 1.8329, "step": 1706 }, { "epoch": 0.5336042513285402, "grad_norm": 0.23046875, "learning_rate": 0.0001914323291929661, "loss": 1.7395, "step": 1707 }, { "epoch": 0.5339168490153173, "grad_norm": 0.2216796875, "learning_rate": 0.00019142237572013197, "loss": 1.4983, "step": 1708 }, { "epoch": 0.5342294467020944, "grad_norm": 0.220703125, "learning_rate": 0.00019141241672801247, "loss": 1.7625, "step": 1709 }, { "epoch": 0.5345420443888715, "grad_norm": 0.23046875, "learning_rate": 0.0001914024522172089, "loss": 1.8429, "step": 1710 }, { "epoch": 0.5348546420756486, "grad_norm": 0.2294921875, "learning_rate": 0.00019139248218832285, "loss": 1.9247, "step": 1711 }, { "epoch": 0.5351672397624258, "grad_norm": 0.2216796875, "learning_rate": 0.00019138250664195615, "loss": 1.6563, "step": 1712 }, { "epoch": 0.5354798374492029, "grad_norm": 0.216796875, "learning_rate": 0.0001913725255787111, "loss": 1.5108, "step": 1713 }, { "epoch": 0.53579243513598, "grad_norm": 0.2197265625, "learning_rate": 0.00019136253899919024, "loss": 1.8109, "step": 1714 }, { "epoch": 0.5361050328227571, "grad_norm": 0.21875, "learning_rate": 0.00019135254690399648, "loss": 1.7063, "step": 1715 }, { "epoch": 0.5364176305095343, "grad_norm": 0.2294921875, "learning_rate": 0.00019134254929373303, "loss": 1.7218, "step": 1716 }, { "epoch": 0.5367302281963113, "grad_norm": 0.232421875, "learning_rate": 0.00019133254616900347, "loss": 1.6555, "step": 1717 }, { "epoch": 0.5370428258830885, "grad_norm": 0.2216796875, "learning_rate": 0.00019132253753041174, "loss": 1.9246, "step": 1718 }, { "epoch": 0.5373554235698655, "grad_norm": 0.2216796875, "learning_rate": 0.00019131252337856205, "loss": 1.818, "step": 1719 }, { "epoch": 0.5376680212566427, "grad_norm": 0.2216796875, "learning_rate": 0.00019130250371405895, "loss": 1.6691, "step": 1720 }, { "epoch": 0.5379806189434199, "grad_norm": 0.2255859375, "learning_rate": 0.00019129247853750733, "loss": 1.6272, "step": 1721 }, { "epoch": 0.5382932166301969, "grad_norm": 0.2109375, "learning_rate": 0.0001912824478495125, "loss": 1.529, "step": 1722 }, { "epoch": 0.5386058143169741, "grad_norm": 0.224609375, "learning_rate": 0.00019127241165067994, "loss": 1.8957, "step": 1723 }, { "epoch": 0.5389184120037511, "grad_norm": 0.216796875, "learning_rate": 0.00019126236994161558, "loss": 1.6643, "step": 1724 }, { "epoch": 0.5392310096905283, "grad_norm": 0.2216796875, "learning_rate": 0.00019125232272292563, "loss": 1.8746, "step": 1725 }, { "epoch": 0.5395436073773054, "grad_norm": 0.2392578125, "learning_rate": 0.00019124226999521672, "loss": 1.5691, "step": 1726 }, { "epoch": 0.5398562050640825, "grad_norm": 0.2158203125, "learning_rate": 0.00019123221175909567, "loss": 1.7902, "step": 1727 }, { "epoch": 0.5401688027508597, "grad_norm": 0.2314453125, "learning_rate": 0.00019122214801516973, "loss": 1.6767, "step": 1728 }, { "epoch": 0.5404814004376368, "grad_norm": 0.2177734375, "learning_rate": 0.00019121207876404648, "loss": 1.727, "step": 1729 }, { "epoch": 0.5407939981244139, "grad_norm": 0.228515625, "learning_rate": 0.0001912020040063338, "loss": 1.6355, "step": 1730 }, { "epoch": 0.541106595811191, "grad_norm": 0.2255859375, "learning_rate": 0.00019119192374263992, "loss": 1.9062, "step": 1731 }, { "epoch": 0.5414191934979681, "grad_norm": 0.2353515625, "learning_rate": 0.00019118183797357338, "loss": 1.5986, "step": 1732 }, { "epoch": 0.5417317911847452, "grad_norm": 0.2119140625, "learning_rate": 0.00019117174669974312, "loss": 1.5961, "step": 1733 }, { "epoch": 0.5420443888715224, "grad_norm": 0.2294921875, "learning_rate": 0.00019116164992175828, "loss": 1.8585, "step": 1734 }, { "epoch": 0.5423569865582994, "grad_norm": 0.2412109375, "learning_rate": 0.00019115154764022852, "loss": 1.731, "step": 1735 }, { "epoch": 0.5426695842450766, "grad_norm": 0.2138671875, "learning_rate": 0.00019114143985576366, "loss": 1.9891, "step": 1736 }, { "epoch": 0.5429821819318537, "grad_norm": 0.23046875, "learning_rate": 0.0001911313265689739, "loss": 1.6551, "step": 1737 }, { "epoch": 0.5432947796186308, "grad_norm": 0.2158203125, "learning_rate": 0.00019112120778046987, "loss": 2.0219, "step": 1738 }, { "epoch": 0.543607377305408, "grad_norm": 0.21875, "learning_rate": 0.0001911110834908624, "loss": 1.7808, "step": 1739 }, { "epoch": 0.543919974992185, "grad_norm": 0.23046875, "learning_rate": 0.0001911009537007627, "loss": 1.7043, "step": 1740 }, { "epoch": 0.5442325726789622, "grad_norm": 0.2255859375, "learning_rate": 0.00019109081841078233, "loss": 1.7296, "step": 1741 }, { "epoch": 0.5445451703657392, "grad_norm": 0.2177734375, "learning_rate": 0.0001910806776215332, "loss": 1.6465, "step": 1742 }, { "epoch": 0.5448577680525164, "grad_norm": 0.2216796875, "learning_rate": 0.00019107053133362749, "loss": 1.8411, "step": 1743 }, { "epoch": 0.5451703657392936, "grad_norm": 0.220703125, "learning_rate": 0.00019106037954767774, "loss": 1.4522, "step": 1744 }, { "epoch": 0.5454829634260706, "grad_norm": 0.212890625, "learning_rate": 0.00019105022226429682, "loss": 1.7463, "step": 1745 }, { "epoch": 0.5457955611128478, "grad_norm": 0.22265625, "learning_rate": 0.00019104005948409797, "loss": 1.622, "step": 1746 }, { "epoch": 0.5461081587996249, "grad_norm": 0.234375, "learning_rate": 0.00019102989120769475, "loss": 1.8334, "step": 1747 }, { "epoch": 0.546420756486402, "grad_norm": 0.236328125, "learning_rate": 0.00019101971743570094, "loss": 1.6375, "step": 1748 }, { "epoch": 0.5467333541731791, "grad_norm": 0.224609375, "learning_rate": 0.00019100953816873084, "loss": 1.4945, "step": 1749 }, { "epoch": 0.5470459518599562, "grad_norm": 0.21875, "learning_rate": 0.00019099935340739893, "loss": 1.687, "step": 1750 }, { "epoch": 0.5473585495467334, "grad_norm": 0.251953125, "learning_rate": 0.0001909891631523201, "loss": 1.8769, "step": 1751 }, { "epoch": 0.5476711472335105, "grad_norm": 0.228515625, "learning_rate": 0.00019097896740410955, "loss": 1.814, "step": 1752 }, { "epoch": 0.5479837449202876, "grad_norm": 0.224609375, "learning_rate": 0.00019096876616338278, "loss": 1.8215, "step": 1753 }, { "epoch": 0.5482963426070647, "grad_norm": 0.2470703125, "learning_rate": 0.00019095855943075568, "loss": 1.6682, "step": 1754 }, { "epoch": 0.5486089402938418, "grad_norm": 0.234375, "learning_rate": 0.00019094834720684447, "loss": 1.8052, "step": 1755 }, { "epoch": 0.5489215379806189, "grad_norm": 0.2255859375, "learning_rate": 0.0001909381294922656, "loss": 1.7685, "step": 1756 }, { "epoch": 0.5492341356673961, "grad_norm": 0.2392578125, "learning_rate": 0.000190927906287636, "loss": 1.6704, "step": 1757 }, { "epoch": 0.5495467333541731, "grad_norm": 0.2197265625, "learning_rate": 0.0001909176775935728, "loss": 1.75, "step": 1758 }, { "epoch": 0.5498593310409503, "grad_norm": 0.240234375, "learning_rate": 0.00019090744341069356, "loss": 1.5139, "step": 1759 }, { "epoch": 0.5501719287277275, "grad_norm": 0.2275390625, "learning_rate": 0.00019089720373961612, "loss": 1.5844, "step": 1760 }, { "epoch": 0.5504845264145045, "grad_norm": 0.2314453125, "learning_rate": 0.00019088695858095864, "loss": 1.7899, "step": 1761 }, { "epoch": 0.5507971241012817, "grad_norm": 0.2275390625, "learning_rate": 0.00019087670793533967, "loss": 1.7717, "step": 1762 }, { "epoch": 0.5511097217880587, "grad_norm": 0.2255859375, "learning_rate": 0.00019086645180337803, "loss": 1.7754, "step": 1763 }, { "epoch": 0.5514223194748359, "grad_norm": 0.2177734375, "learning_rate": 0.0001908561901856929, "loss": 1.8412, "step": 1764 }, { "epoch": 0.551734917161613, "grad_norm": 0.2294921875, "learning_rate": 0.0001908459230829038, "loss": 1.7254, "step": 1765 }, { "epoch": 0.5520475148483901, "grad_norm": 0.2314453125, "learning_rate": 0.00019083565049563057, "loss": 1.8097, "step": 1766 }, { "epoch": 0.5523601125351673, "grad_norm": 0.2255859375, "learning_rate": 0.00019082537242449333, "loss": 1.8441, "step": 1767 }, { "epoch": 0.5526727102219443, "grad_norm": 0.328125, "learning_rate": 0.00019081508887011263, "loss": 2.4757, "step": 1768 }, { "epoch": 0.5529853079087215, "grad_norm": 0.21875, "learning_rate": 0.0001908047998331093, "loss": 1.5833, "step": 1769 }, { "epoch": 0.5532979055954986, "grad_norm": 0.359375, "learning_rate": 0.0001907945053141045, "loss": 2.4293, "step": 1770 }, { "epoch": 0.5536105032822757, "grad_norm": 0.2236328125, "learning_rate": 0.0001907842053137197, "loss": 1.9397, "step": 1771 }, { "epoch": 0.5539231009690528, "grad_norm": 0.2353515625, "learning_rate": 0.0001907738998325767, "loss": 2.0662, "step": 1772 }, { "epoch": 0.55423569865583, "grad_norm": 0.228515625, "learning_rate": 0.00019076358887129774, "loss": 1.8447, "step": 1773 }, { "epoch": 0.554548296342607, "grad_norm": 0.32421875, "learning_rate": 0.00019075327243050526, "loss": 2.3451, "step": 1774 }, { "epoch": 0.5548608940293842, "grad_norm": 0.228515625, "learning_rate": 0.00019074295051082205, "loss": 1.623, "step": 1775 }, { "epoch": 0.5551734917161613, "grad_norm": 0.240234375, "learning_rate": 0.0001907326231128713, "loss": 2.0579, "step": 1776 }, { "epoch": 0.5554860894029384, "grad_norm": 0.216796875, "learning_rate": 0.00019072229023727645, "loss": 1.6111, "step": 1777 }, { "epoch": 0.5557986870897156, "grad_norm": 0.224609375, "learning_rate": 0.00019071195188466135, "loss": 1.87, "step": 1778 }, { "epoch": 0.5561112847764926, "grad_norm": 0.2041015625, "learning_rate": 0.00019070160805565012, "loss": 1.6437, "step": 1779 }, { "epoch": 0.5564238824632698, "grad_norm": 0.2314453125, "learning_rate": 0.00019069125875086722, "loss": 1.6752, "step": 1780 }, { "epoch": 0.5567364801500468, "grad_norm": 0.236328125, "learning_rate": 0.00019068090397093745, "loss": 1.7323, "step": 1781 }, { "epoch": 0.557049077836824, "grad_norm": 0.228515625, "learning_rate": 0.000190670543716486, "loss": 1.7324, "step": 1782 }, { "epoch": 0.5573616755236012, "grad_norm": 0.22265625, "learning_rate": 0.00019066017798813825, "loss": 1.5224, "step": 1783 }, { "epoch": 0.5576742732103782, "grad_norm": 0.326171875, "learning_rate": 0.00019064980678652, "loss": 2.3167, "step": 1784 }, { "epoch": 0.5579868708971554, "grad_norm": 0.212890625, "learning_rate": 0.00019063943011225743, "loss": 1.7731, "step": 1785 }, { "epoch": 0.5582994685839325, "grad_norm": 0.23828125, "learning_rate": 0.00019062904796597697, "loss": 1.6789, "step": 1786 }, { "epoch": 0.5586120662707096, "grad_norm": 0.224609375, "learning_rate": 0.00019061866034830534, "loss": 1.7119, "step": 1787 }, { "epoch": 0.5589246639574867, "grad_norm": 0.2294921875, "learning_rate": 0.00019060826725986977, "loss": 1.6962, "step": 1788 }, { "epoch": 0.5592372616442638, "grad_norm": 0.2294921875, "learning_rate": 0.00019059786870129761, "loss": 1.6318, "step": 1789 }, { "epoch": 0.559549859331041, "grad_norm": 0.21875, "learning_rate": 0.0001905874646732167, "loss": 1.8541, "step": 1790 }, { "epoch": 0.5598624570178181, "grad_norm": 0.2265625, "learning_rate": 0.00019057705517625505, "loss": 1.8081, "step": 1791 }, { "epoch": 0.5601750547045952, "grad_norm": 0.2333984375, "learning_rate": 0.0001905666402110412, "loss": 1.4779, "step": 1792 }, { "epoch": 0.5604876523913723, "grad_norm": 0.2060546875, "learning_rate": 0.00019055621977820387, "loss": 1.6657, "step": 1793 }, { "epoch": 0.5608002500781494, "grad_norm": 0.2255859375, "learning_rate": 0.00019054579387837214, "loss": 1.5665, "step": 1794 }, { "epoch": 0.5611128477649265, "grad_norm": 0.2392578125, "learning_rate": 0.00019053536251217545, "loss": 1.5586, "step": 1795 }, { "epoch": 0.5614254454517037, "grad_norm": 0.2294921875, "learning_rate": 0.00019052492568024355, "loss": 1.5323, "step": 1796 }, { "epoch": 0.5617380431384807, "grad_norm": 0.2255859375, "learning_rate": 0.00019051448338320656, "loss": 1.7868, "step": 1797 }, { "epoch": 0.5620506408252579, "grad_norm": 0.2255859375, "learning_rate": 0.00019050403562169486, "loss": 1.6351, "step": 1798 }, { "epoch": 0.562363238512035, "grad_norm": 0.2216796875, "learning_rate": 0.00019049358239633916, "loss": 2.0889, "step": 1799 }, { "epoch": 0.5626758361988121, "grad_norm": 0.2255859375, "learning_rate": 0.00019048312370777062, "loss": 1.5398, "step": 1800 }, { "epoch": 0.5629884338855893, "grad_norm": 0.2275390625, "learning_rate": 0.00019047265955662054, "loss": 1.6967, "step": 1801 }, { "epoch": 0.5633010315723663, "grad_norm": 0.2177734375, "learning_rate": 0.00019046218994352076, "loss": 1.6917, "step": 1802 }, { "epoch": 0.5636136292591435, "grad_norm": 0.2216796875, "learning_rate": 0.0001904517148691033, "loss": 1.4587, "step": 1803 }, { "epoch": 0.5639262269459207, "grad_norm": 0.23046875, "learning_rate": 0.00019044123433400052, "loss": 1.8214, "step": 1804 }, { "epoch": 0.5642388246326977, "grad_norm": 0.2314453125, "learning_rate": 0.0001904307483388452, "loss": 1.6375, "step": 1805 }, { "epoch": 0.5645514223194749, "grad_norm": 0.2265625, "learning_rate": 0.00019042025688427035, "loss": 1.5963, "step": 1806 }, { "epoch": 0.5648640200062519, "grad_norm": 0.2314453125, "learning_rate": 0.00019040975997090936, "loss": 1.8623, "step": 1807 }, { "epoch": 0.5651766176930291, "grad_norm": 0.220703125, "learning_rate": 0.00019039925759939597, "loss": 1.6458, "step": 1808 }, { "epoch": 0.5654892153798062, "grad_norm": 0.2255859375, "learning_rate": 0.0001903887497703642, "loss": 1.6367, "step": 1809 }, { "epoch": 0.5658018130665833, "grad_norm": 0.216796875, "learning_rate": 0.00019037823648444842, "loss": 1.6211, "step": 1810 }, { "epoch": 0.5661144107533604, "grad_norm": 0.220703125, "learning_rate": 0.0001903677177422833, "loss": 1.5955, "step": 1811 }, { "epoch": 0.5664270084401375, "grad_norm": 0.2353515625, "learning_rate": 0.00019035719354450393, "loss": 1.6509, "step": 1812 }, { "epoch": 0.5667396061269147, "grad_norm": 0.2373046875, "learning_rate": 0.00019034666389174568, "loss": 1.5193, "step": 1813 }, { "epoch": 0.5670522038136918, "grad_norm": 0.23046875, "learning_rate": 0.00019033612878464412, "loss": 1.8779, "step": 1814 }, { "epoch": 0.5673648015004689, "grad_norm": 0.232421875, "learning_rate": 0.00019032558822383542, "loss": 1.746, "step": 1815 }, { "epoch": 0.567677399187246, "grad_norm": 0.21875, "learning_rate": 0.0001903150422099558, "loss": 1.6802, "step": 1816 }, { "epoch": 0.5679899968740232, "grad_norm": 0.2373046875, "learning_rate": 0.00019030449074364204, "loss": 1.8168, "step": 1817 }, { "epoch": 0.5683025945608002, "grad_norm": 0.23046875, "learning_rate": 0.00019029393382553108, "loss": 1.6261, "step": 1818 }, { "epoch": 0.5686151922475774, "grad_norm": 0.2197265625, "learning_rate": 0.00019028337145626028, "loss": 1.6126, "step": 1819 }, { "epoch": 0.5689277899343544, "grad_norm": 0.2333984375, "learning_rate": 0.00019027280363646728, "loss": 1.7607, "step": 1820 }, { "epoch": 0.5692403876211316, "grad_norm": 0.2275390625, "learning_rate": 0.0001902622303667901, "loss": 1.6267, "step": 1821 }, { "epoch": 0.5695529853079088, "grad_norm": 0.22265625, "learning_rate": 0.00019025165164786705, "loss": 1.7209, "step": 1822 }, { "epoch": 0.5698655829946858, "grad_norm": 0.2314453125, "learning_rate": 0.00019024106748033679, "loss": 1.4932, "step": 1823 }, { "epoch": 0.570178180681463, "grad_norm": 0.236328125, "learning_rate": 0.00019023047786483828, "loss": 1.4764, "step": 1824 }, { "epoch": 0.57049077836824, "grad_norm": 0.228515625, "learning_rate": 0.00019021988280201084, "loss": 1.6664, "step": 1825 }, { "epoch": 0.5708033760550172, "grad_norm": 0.2490234375, "learning_rate": 0.0001902092822924941, "loss": 1.5628, "step": 1826 }, { "epoch": 0.5711159737417943, "grad_norm": 0.2412109375, "learning_rate": 0.00019019867633692802, "loss": 1.8942, "step": 1827 }, { "epoch": 0.5714285714285714, "grad_norm": 0.2275390625, "learning_rate": 0.00019018806493595293, "loss": 1.5664, "step": 1828 }, { "epoch": 0.5717411691153486, "grad_norm": 0.2314453125, "learning_rate": 0.00019017744809020942, "loss": 1.4663, "step": 1829 }, { "epoch": 0.5720537668021257, "grad_norm": 0.22265625, "learning_rate": 0.00019016682580033848, "loss": 1.8574, "step": 1830 }, { "epoch": 0.5723663644889028, "grad_norm": 0.2216796875, "learning_rate": 0.00019015619806698135, "loss": 1.7824, "step": 1831 }, { "epoch": 0.5726789621756799, "grad_norm": 0.224609375, "learning_rate": 0.00019014556489077965, "loss": 1.5226, "step": 1832 }, { "epoch": 0.572991559862457, "grad_norm": 0.2216796875, "learning_rate": 0.00019013492627237532, "loss": 1.8333, "step": 1833 }, { "epoch": 0.5733041575492341, "grad_norm": 0.2236328125, "learning_rate": 0.00019012428221241065, "loss": 1.5824, "step": 1834 }, { "epoch": 0.5736167552360113, "grad_norm": 0.232421875, "learning_rate": 0.00019011363271152822, "loss": 1.7483, "step": 1835 }, { "epoch": 0.5739293529227883, "grad_norm": 0.2099609375, "learning_rate": 0.00019010297777037093, "loss": 1.6215, "step": 1836 }, { "epoch": 0.5742419506095655, "grad_norm": 0.2314453125, "learning_rate": 0.00019009231738958206, "loss": 1.6124, "step": 1837 }, { "epoch": 0.5745545482963426, "grad_norm": 0.234375, "learning_rate": 0.00019008165156980517, "loss": 1.8104, "step": 1838 }, { "epoch": 0.5748671459831197, "grad_norm": 0.2333984375, "learning_rate": 0.0001900709803116842, "loss": 1.7839, "step": 1839 }, { "epoch": 0.5751797436698969, "grad_norm": 0.216796875, "learning_rate": 0.0001900603036158634, "loss": 1.6926, "step": 1840 }, { "epoch": 0.5754923413566739, "grad_norm": 0.2275390625, "learning_rate": 0.00019004962148298725, "loss": 1.8372, "step": 1841 }, { "epoch": 0.5758049390434511, "grad_norm": 0.2275390625, "learning_rate": 0.0001900389339137007, "loss": 1.5496, "step": 1842 }, { "epoch": 0.5761175367302281, "grad_norm": 0.2275390625, "learning_rate": 0.000190028240908649, "loss": 1.7024, "step": 1843 }, { "epoch": 0.5764301344170053, "grad_norm": 0.236328125, "learning_rate": 0.00019001754246847767, "loss": 1.6237, "step": 1844 }, { "epoch": 0.5767427321037825, "grad_norm": 0.23046875, "learning_rate": 0.00019000683859383258, "loss": 1.6012, "step": 1845 }, { "epoch": 0.5770553297905595, "grad_norm": 0.2119140625, "learning_rate": 0.00018999612928535995, "loss": 1.7586, "step": 1846 }, { "epoch": 0.5773679274773367, "grad_norm": 0.2373046875, "learning_rate": 0.00018998541454370632, "loss": 1.4823, "step": 1847 }, { "epoch": 0.5776805251641138, "grad_norm": 0.2265625, "learning_rate": 0.00018997469436951854, "loss": 1.5688, "step": 1848 }, { "epoch": 0.5779931228508909, "grad_norm": 0.318359375, "learning_rate": 0.0001899639687634438, "loss": 2.5108, "step": 1849 }, { "epoch": 0.578305720537668, "grad_norm": 0.2490234375, "learning_rate": 0.00018995323772612964, "loss": 1.6868, "step": 1850 }, { "epoch": 0.5786183182244451, "grad_norm": 0.2275390625, "learning_rate": 0.00018994250125822386, "loss": 1.6238, "step": 1851 }, { "epoch": 0.5789309159112223, "grad_norm": 0.220703125, "learning_rate": 0.0001899317593603747, "loss": 1.5826, "step": 1852 }, { "epoch": 0.5792435135979994, "grad_norm": 0.2265625, "learning_rate": 0.0001899210120332306, "loss": 1.6792, "step": 1853 }, { "epoch": 0.5795561112847765, "grad_norm": 0.2275390625, "learning_rate": 0.00018991025927744042, "loss": 1.8574, "step": 1854 }, { "epoch": 0.5798687089715536, "grad_norm": 0.2412109375, "learning_rate": 0.0001898995010936533, "loss": 1.7686, "step": 1855 }, { "epoch": 0.5801813066583307, "grad_norm": 0.2255859375, "learning_rate": 0.00018988873748251877, "loss": 1.7198, "step": 1856 }, { "epoch": 0.5804939043451078, "grad_norm": 0.2177734375, "learning_rate": 0.00018987796844468658, "loss": 1.7134, "step": 1857 }, { "epoch": 0.580806502031885, "grad_norm": 0.212890625, "learning_rate": 0.00018986719398080695, "loss": 1.5788, "step": 1858 }, { "epoch": 0.581119099718662, "grad_norm": 0.2265625, "learning_rate": 0.00018985641409153026, "loss": 1.6557, "step": 1859 }, { "epoch": 0.5814316974054392, "grad_norm": 0.23046875, "learning_rate": 0.00018984562877750737, "loss": 1.719, "step": 1860 }, { "epoch": 0.5817442950922164, "grad_norm": 0.2265625, "learning_rate": 0.00018983483803938932, "loss": 1.7116, "step": 1861 }, { "epoch": 0.5820568927789934, "grad_norm": 0.236328125, "learning_rate": 0.0001898240418778277, "loss": 1.9006, "step": 1862 }, { "epoch": 0.5823694904657706, "grad_norm": 0.2255859375, "learning_rate": 0.00018981324029347416, "loss": 1.3191, "step": 1863 }, { "epoch": 0.5826820881525476, "grad_norm": 0.2255859375, "learning_rate": 0.00018980243328698088, "loss": 1.7602, "step": 1864 }, { "epoch": 0.5829946858393248, "grad_norm": 0.2392578125, "learning_rate": 0.00018979162085900025, "loss": 2.0473, "step": 1865 }, { "epoch": 0.583307283526102, "grad_norm": 0.234375, "learning_rate": 0.00018978080301018503, "loss": 1.7591, "step": 1866 }, { "epoch": 0.583619881212879, "grad_norm": 0.2275390625, "learning_rate": 0.00018976997974118836, "loss": 1.9532, "step": 1867 }, { "epoch": 0.5839324788996562, "grad_norm": 0.234375, "learning_rate": 0.0001897591510526636, "loss": 1.8456, "step": 1868 }, { "epoch": 0.5842450765864332, "grad_norm": 0.23046875, "learning_rate": 0.00018974831694526452, "loss": 1.7148, "step": 1869 }, { "epoch": 0.5845576742732104, "grad_norm": 0.2158203125, "learning_rate": 0.00018973747741964515, "loss": 1.6221, "step": 1870 }, { "epoch": 0.5848702719599875, "grad_norm": 0.2236328125, "learning_rate": 0.00018972663247645994, "loss": 2.0677, "step": 1871 }, { "epoch": 0.5851828696467646, "grad_norm": 0.2421875, "learning_rate": 0.00018971578211636359, "loss": 1.4428, "step": 1872 }, { "epoch": 0.5854954673335417, "grad_norm": 0.224609375, "learning_rate": 0.00018970492634001114, "loss": 1.6225, "step": 1873 }, { "epoch": 0.5858080650203189, "grad_norm": 0.2236328125, "learning_rate": 0.00018969406514805797, "loss": 1.5286, "step": 1874 }, { "epoch": 0.586120662707096, "grad_norm": 0.2421875, "learning_rate": 0.00018968319854115978, "loss": 1.7499, "step": 1875 }, { "epoch": 0.5864332603938731, "grad_norm": 0.2197265625, "learning_rate": 0.00018967232651997265, "loss": 1.9038, "step": 1876 }, { "epoch": 0.5867458580806502, "grad_norm": 0.21484375, "learning_rate": 0.00018966144908515284, "loss": 1.5464, "step": 1877 }, { "epoch": 0.5870584557674273, "grad_norm": 0.23046875, "learning_rate": 0.00018965056623735713, "loss": 1.6405, "step": 1878 }, { "epoch": 0.5873710534542045, "grad_norm": 0.2099609375, "learning_rate": 0.00018963967797724248, "loss": 1.727, "step": 1879 }, { "epoch": 0.5876836511409815, "grad_norm": 0.2333984375, "learning_rate": 0.00018962878430546626, "loss": 1.7438, "step": 1880 }, { "epoch": 0.5879962488277587, "grad_norm": 0.2314453125, "learning_rate": 0.0001896178852226861, "loss": 1.6973, "step": 1881 }, { "epoch": 0.5883088465145357, "grad_norm": 0.228515625, "learning_rate": 0.00018960698072956, "loss": 1.7813, "step": 1882 }, { "epoch": 0.5886214442013129, "grad_norm": 0.224609375, "learning_rate": 0.00018959607082674632, "loss": 1.8691, "step": 1883 }, { "epoch": 0.5889340418880901, "grad_norm": 0.2265625, "learning_rate": 0.00018958515551490364, "loss": 1.8186, "step": 1884 }, { "epoch": 0.5892466395748671, "grad_norm": 0.224609375, "learning_rate": 0.00018957423479469096, "loss": 1.6628, "step": 1885 }, { "epoch": 0.5895592372616443, "grad_norm": 0.2314453125, "learning_rate": 0.0001895633086667676, "loss": 1.8004, "step": 1886 }, { "epoch": 0.5898718349484214, "grad_norm": 0.24609375, "learning_rate": 0.00018955237713179314, "loss": 1.781, "step": 1887 }, { "epoch": 0.5901844326351985, "grad_norm": 0.2275390625, "learning_rate": 0.00018954144019042759, "loss": 1.7539, "step": 1888 }, { "epoch": 0.5904970303219756, "grad_norm": 0.23046875, "learning_rate": 0.00018953049784333116, "loss": 1.6668, "step": 1889 }, { "epoch": 0.5908096280087527, "grad_norm": 0.228515625, "learning_rate": 0.00018951955009116449, "loss": 1.954, "step": 1890 }, { "epoch": 0.5911222256955299, "grad_norm": 0.2275390625, "learning_rate": 0.0001895085969345885, "loss": 1.8232, "step": 1891 }, { "epoch": 0.591434823382307, "grad_norm": 0.2216796875, "learning_rate": 0.00018949763837426445, "loss": 1.5966, "step": 1892 }, { "epoch": 0.5917474210690841, "grad_norm": 0.2236328125, "learning_rate": 0.00018948667441085398, "loss": 1.5623, "step": 1893 }, { "epoch": 0.5920600187558612, "grad_norm": 0.2216796875, "learning_rate": 0.00018947570504501888, "loss": 1.689, "step": 1894 }, { "epoch": 0.5923726164426383, "grad_norm": 0.2216796875, "learning_rate": 0.00018946473027742146, "loss": 1.6939, "step": 1895 }, { "epoch": 0.5926852141294154, "grad_norm": 0.228515625, "learning_rate": 0.00018945375010872426, "loss": 1.7252, "step": 1896 }, { "epoch": 0.5929978118161926, "grad_norm": 0.220703125, "learning_rate": 0.0001894427645395902, "loss": 1.7894, "step": 1897 }, { "epoch": 0.5933104095029696, "grad_norm": 0.234375, "learning_rate": 0.00018943177357068244, "loss": 1.8643, "step": 1898 }, { "epoch": 0.5936230071897468, "grad_norm": 0.2158203125, "learning_rate": 0.00018942077720266454, "loss": 1.6017, "step": 1899 }, { "epoch": 0.5939356048765239, "grad_norm": 0.22265625, "learning_rate": 0.0001894097754362004, "loss": 1.514, "step": 1900 }, { "epoch": 0.594248202563301, "grad_norm": 0.2275390625, "learning_rate": 0.00018939876827195418, "loss": 1.8716, "step": 1901 }, { "epoch": 0.5945608002500782, "grad_norm": 0.232421875, "learning_rate": 0.00018938775571059039, "loss": 1.8103, "step": 1902 }, { "epoch": 0.5948733979368552, "grad_norm": 0.2216796875, "learning_rate": 0.00018937673775277388, "loss": 1.5777, "step": 1903 }, { "epoch": 0.5951859956236324, "grad_norm": 0.2314453125, "learning_rate": 0.0001893657143991698, "loss": 1.6428, "step": 1904 }, { "epoch": 0.5954985933104096, "grad_norm": 0.224609375, "learning_rate": 0.00018935468565044368, "loss": 2.0165, "step": 1905 }, { "epoch": 0.5958111909971866, "grad_norm": 0.22265625, "learning_rate": 0.00018934365150726133, "loss": 1.5724, "step": 1906 }, { "epoch": 0.5961237886839638, "grad_norm": 0.2216796875, "learning_rate": 0.00018933261197028885, "loss": 1.9301, "step": 1907 }, { "epoch": 0.5964363863707408, "grad_norm": 0.2275390625, "learning_rate": 0.0001893215670401928, "loss": 1.6571, "step": 1908 }, { "epoch": 0.596748984057518, "grad_norm": 0.2255859375, "learning_rate": 0.00018931051671763988, "loss": 1.7479, "step": 1909 }, { "epoch": 0.5970615817442951, "grad_norm": 0.228515625, "learning_rate": 0.00018929946100329725, "loss": 1.6891, "step": 1910 }, { "epoch": 0.5973741794310722, "grad_norm": 0.23046875, "learning_rate": 0.0001892883998978324, "loss": 1.646, "step": 1911 }, { "epoch": 0.5976867771178493, "grad_norm": 0.2197265625, "learning_rate": 0.00018927733340191308, "loss": 1.6963, "step": 1912 }, { "epoch": 0.5979993748046264, "grad_norm": 0.2265625, "learning_rate": 0.00018926626151620732, "loss": 1.9789, "step": 1913 }, { "epoch": 0.5983119724914036, "grad_norm": 0.2236328125, "learning_rate": 0.00018925518424138361, "loss": 1.9244, "step": 1914 }, { "epoch": 0.5986245701781807, "grad_norm": 0.2275390625, "learning_rate": 0.00018924410157811073, "loss": 1.5019, "step": 1915 }, { "epoch": 0.5989371678649578, "grad_norm": 0.2265625, "learning_rate": 0.0001892330135270577, "loss": 1.7337, "step": 1916 }, { "epoch": 0.5992497655517349, "grad_norm": 0.2236328125, "learning_rate": 0.0001892219200888939, "loss": 1.6027, "step": 1917 }, { "epoch": 0.5995623632385121, "grad_norm": 0.2470703125, "learning_rate": 0.00018921082126428912, "loss": 1.6431, "step": 1918 }, { "epoch": 0.5998749609252891, "grad_norm": 0.22265625, "learning_rate": 0.00018919971705391335, "loss": 1.822, "step": 1919 }, { "epoch": 0.6001875586120663, "grad_norm": 0.2275390625, "learning_rate": 0.00018918860745843703, "loss": 1.656, "step": 1920 }, { "epoch": 0.6005001562988433, "grad_norm": 0.2314453125, "learning_rate": 0.00018917749247853078, "loss": 1.6685, "step": 1921 }, { "epoch": 0.6008127539856205, "grad_norm": 0.21875, "learning_rate": 0.0001891663721148657, "loss": 1.8229, "step": 1922 }, { "epoch": 0.6011253516723977, "grad_norm": 0.228515625, "learning_rate": 0.0001891552463681131, "loss": 1.7224, "step": 1923 }, { "epoch": 0.6014379493591747, "grad_norm": 0.23046875, "learning_rate": 0.00018914411523894467, "loss": 1.9986, "step": 1924 }, { "epoch": 0.6017505470459519, "grad_norm": 0.22265625, "learning_rate": 0.0001891329787280324, "loss": 1.4848, "step": 1925 }, { "epoch": 0.6020631447327289, "grad_norm": 0.224609375, "learning_rate": 0.00018912183683604864, "loss": 1.7737, "step": 1926 }, { "epoch": 0.6023757424195061, "grad_norm": 0.2255859375, "learning_rate": 0.00018911068956366597, "loss": 1.7155, "step": 1927 }, { "epoch": 0.6026883401062832, "grad_norm": 0.2353515625, "learning_rate": 0.00018909953691155745, "loss": 1.7669, "step": 1928 }, { "epoch": 0.6030009377930603, "grad_norm": 0.232421875, "learning_rate": 0.00018908837888039637, "loss": 1.8628, "step": 1929 }, { "epoch": 0.6033135354798375, "grad_norm": 0.23046875, "learning_rate": 0.0001890772154708563, "loss": 1.7606, "step": 1930 }, { "epoch": 0.6036261331666146, "grad_norm": 0.2275390625, "learning_rate": 0.0001890660466836112, "loss": 1.5453, "step": 1931 }, { "epoch": 0.6039387308533917, "grad_norm": 0.236328125, "learning_rate": 0.00018905487251933542, "loss": 1.7034, "step": 1932 }, { "epoch": 0.6042513285401688, "grad_norm": 0.23046875, "learning_rate": 0.00018904369297870349, "loss": 1.6582, "step": 1933 }, { "epoch": 0.6045639262269459, "grad_norm": 0.2421875, "learning_rate": 0.0001890325080623903, "loss": 1.5893, "step": 1934 }, { "epoch": 0.604876523913723, "grad_norm": 0.220703125, "learning_rate": 0.00018902131777107117, "loss": 1.602, "step": 1935 }, { "epoch": 0.6051891216005002, "grad_norm": 0.2275390625, "learning_rate": 0.00018901012210542165, "loss": 1.636, "step": 1936 }, { "epoch": 0.6055017192872773, "grad_norm": 0.2373046875, "learning_rate": 0.00018899892106611762, "loss": 1.8495, "step": 1937 }, { "epoch": 0.6058143169740544, "grad_norm": 0.2373046875, "learning_rate": 0.00018898771465383532, "loss": 1.7294, "step": 1938 }, { "epoch": 0.6061269146608315, "grad_norm": 0.23046875, "learning_rate": 0.0001889765028692513, "loss": 1.6063, "step": 1939 }, { "epoch": 0.6064395123476086, "grad_norm": 0.23046875, "learning_rate": 0.0001889652857130424, "loss": 1.5972, "step": 1940 }, { "epoch": 0.6067521100343858, "grad_norm": 0.224609375, "learning_rate": 0.00018895406318588585, "loss": 1.9705, "step": 1941 }, { "epoch": 0.6070647077211628, "grad_norm": 0.2294921875, "learning_rate": 0.00018894283528845914, "loss": 1.9463, "step": 1942 }, { "epoch": 0.60737730540794, "grad_norm": 0.2333984375, "learning_rate": 0.00018893160202144012, "loss": 1.7365, "step": 1943 }, { "epoch": 0.6076899030947172, "grad_norm": 0.2275390625, "learning_rate": 0.00018892036338550696, "loss": 1.6313, "step": 1944 }, { "epoch": 0.6080025007814942, "grad_norm": 0.228515625, "learning_rate": 0.00018890911938133814, "loss": 1.7297, "step": 1945 }, { "epoch": 0.6083150984682714, "grad_norm": 0.2275390625, "learning_rate": 0.0001888978700096125, "loss": 1.5932, "step": 1946 }, { "epoch": 0.6086276961550484, "grad_norm": 0.255859375, "learning_rate": 0.00018888661527100914, "loss": 1.7416, "step": 1947 }, { "epoch": 0.6089402938418256, "grad_norm": 0.2177734375, "learning_rate": 0.0001888753551662076, "loss": 1.5615, "step": 1948 }, { "epoch": 0.6092528915286027, "grad_norm": 0.21875, "learning_rate": 0.00018886408969588756, "loss": 1.9525, "step": 1949 }, { "epoch": 0.6095654892153798, "grad_norm": 0.2275390625, "learning_rate": 0.0001888528188607292, "loss": 1.4709, "step": 1950 }, { "epoch": 0.609878086902157, "grad_norm": 0.2265625, "learning_rate": 0.00018884154266141296, "loss": 1.6341, "step": 1951 }, { "epoch": 0.610190684588934, "grad_norm": 0.2314453125, "learning_rate": 0.00018883026109861955, "loss": 1.6915, "step": 1952 }, { "epoch": 0.6105032822757112, "grad_norm": 0.2216796875, "learning_rate": 0.0001888189741730301, "loss": 1.7387, "step": 1953 }, { "epoch": 0.6108158799624883, "grad_norm": 0.23828125, "learning_rate": 0.000188807681885326, "loss": 1.4454, "step": 1954 }, { "epoch": 0.6111284776492654, "grad_norm": 0.22265625, "learning_rate": 0.00018879638423618893, "loss": 1.644, "step": 1955 }, { "epoch": 0.6114410753360425, "grad_norm": 0.2265625, "learning_rate": 0.00018878508122630106, "loss": 1.6955, "step": 1956 }, { "epoch": 0.6117536730228196, "grad_norm": 0.228515625, "learning_rate": 0.00018877377285634464, "loss": 1.5826, "step": 1957 }, { "epoch": 0.6120662707095967, "grad_norm": 0.2255859375, "learning_rate": 0.00018876245912700243, "loss": 1.7957, "step": 1958 }, { "epoch": 0.6123788683963739, "grad_norm": 0.23046875, "learning_rate": 0.00018875114003895748, "loss": 1.5181, "step": 1959 }, { "epoch": 0.612691466083151, "grad_norm": 0.23046875, "learning_rate": 0.00018873981559289308, "loss": 1.7115, "step": 1960 }, { "epoch": 0.6130040637699281, "grad_norm": 0.236328125, "learning_rate": 0.00018872848578949296, "loss": 1.9347, "step": 1961 }, { "epoch": 0.6133166614567053, "grad_norm": 0.23046875, "learning_rate": 0.00018871715062944108, "loss": 1.7506, "step": 1962 }, { "epoch": 0.6136292591434823, "grad_norm": 0.29296875, "learning_rate": 0.00018870581011342174, "loss": 2.3271, "step": 1963 }, { "epoch": 0.6139418568302595, "grad_norm": 0.228515625, "learning_rate": 0.00018869446424211962, "loss": 2.0109, "step": 1964 }, { "epoch": 0.6142544545170365, "grad_norm": 0.23046875, "learning_rate": 0.00018868311301621968, "loss": 1.5306, "step": 1965 }, { "epoch": 0.6145670522038137, "grad_norm": 0.224609375, "learning_rate": 0.00018867175643640717, "loss": 1.7745, "step": 1966 }, { "epoch": 0.6148796498905909, "grad_norm": 0.23046875, "learning_rate": 0.00018866039450336777, "loss": 1.7684, "step": 1967 }, { "epoch": 0.6151922475773679, "grad_norm": 0.2373046875, "learning_rate": 0.00018864902721778734, "loss": 1.738, "step": 1968 }, { "epoch": 0.6155048452641451, "grad_norm": 0.2314453125, "learning_rate": 0.00018863765458035218, "loss": 1.6707, "step": 1969 }, { "epoch": 0.6158174429509221, "grad_norm": 0.2255859375, "learning_rate": 0.00018862627659174886, "loss": 1.5577, "step": 1970 }, { "epoch": 0.6161300406376993, "grad_norm": 0.2275390625, "learning_rate": 0.00018861489325266425, "loss": 1.6428, "step": 1971 }, { "epoch": 0.6164426383244764, "grad_norm": 0.2421875, "learning_rate": 0.00018860350456378566, "loss": 1.5885, "step": 1972 }, { "epoch": 0.6167552360112535, "grad_norm": 0.21875, "learning_rate": 0.00018859211052580057, "loss": 1.3899, "step": 1973 }, { "epoch": 0.6170678336980306, "grad_norm": 0.23046875, "learning_rate": 0.0001885807111393969, "loss": 1.8002, "step": 1974 }, { "epoch": 0.6173804313848078, "grad_norm": 0.2265625, "learning_rate": 0.0001885693064052628, "loss": 1.7554, "step": 1975 }, { "epoch": 0.6176930290715849, "grad_norm": 0.22265625, "learning_rate": 0.0001885578963240868, "loss": 1.5717, "step": 1976 }, { "epoch": 0.618005626758362, "grad_norm": 0.228515625, "learning_rate": 0.00018854648089655776, "loss": 1.6693, "step": 1977 }, { "epoch": 0.6183182244451391, "grad_norm": 0.2265625, "learning_rate": 0.00018853506012336482, "loss": 1.8787, "step": 1978 }, { "epoch": 0.6186308221319162, "grad_norm": 0.220703125, "learning_rate": 0.00018852363400519745, "loss": 1.6435, "step": 1979 }, { "epoch": 0.6189434198186934, "grad_norm": 0.224609375, "learning_rate": 0.00018851220254274554, "loss": 1.7522, "step": 1980 }, { "epoch": 0.6192560175054704, "grad_norm": 0.2333984375, "learning_rate": 0.00018850076573669915, "loss": 1.5828, "step": 1981 }, { "epoch": 0.6195686151922476, "grad_norm": 0.2294921875, "learning_rate": 0.0001884893235877488, "loss": 1.457, "step": 1982 }, { "epoch": 0.6198812128790246, "grad_norm": 0.22265625, "learning_rate": 0.00018847787609658516, "loss": 1.5991, "step": 1983 }, { "epoch": 0.6201938105658018, "grad_norm": 0.2392578125, "learning_rate": 0.0001884664232638994, "loss": 1.598, "step": 1984 }, { "epoch": 0.620506408252579, "grad_norm": 0.228515625, "learning_rate": 0.00018845496509038294, "loss": 1.6774, "step": 1985 }, { "epoch": 0.620819005939356, "grad_norm": 0.220703125, "learning_rate": 0.00018844350157672755, "loss": 1.7232, "step": 1986 }, { "epoch": 0.6211316036261332, "grad_norm": 0.228515625, "learning_rate": 0.00018843203272362523, "loss": 1.7184, "step": 1987 }, { "epoch": 0.6214442013129103, "grad_norm": 0.22265625, "learning_rate": 0.00018842055853176838, "loss": 1.6561, "step": 1988 }, { "epoch": 0.6217567989996874, "grad_norm": 0.2294921875, "learning_rate": 0.0001884090790018498, "loss": 1.5792, "step": 1989 }, { "epoch": 0.6220693966864645, "grad_norm": 0.2255859375, "learning_rate": 0.0001883975941345624, "loss": 1.9449, "step": 1990 }, { "epoch": 0.6223819943732416, "grad_norm": 0.251953125, "learning_rate": 0.00018838610393059964, "loss": 2.1031, "step": 1991 }, { "epoch": 0.6226945920600188, "grad_norm": 0.228515625, "learning_rate": 0.00018837460839065515, "loss": 1.9063, "step": 1992 }, { "epoch": 0.6230071897467959, "grad_norm": 0.25390625, "learning_rate": 0.0001883631075154229, "loss": 2.1289, "step": 1993 }, { "epoch": 0.623319787433573, "grad_norm": 0.23828125, "learning_rate": 0.0001883516013055973, "loss": 2.0025, "step": 1994 }, { "epoch": 0.6236323851203501, "grad_norm": 0.2353515625, "learning_rate": 0.0001883400897618729, "loss": 1.8512, "step": 1995 }, { "epoch": 0.6239449828071272, "grad_norm": 0.236328125, "learning_rate": 0.0001883285728849447, "loss": 1.8326, "step": 1996 }, { "epoch": 0.6242575804939043, "grad_norm": 0.224609375, "learning_rate": 0.00018831705067550805, "loss": 1.6852, "step": 1997 }, { "epoch": 0.6245701781806815, "grad_norm": 0.2197265625, "learning_rate": 0.00018830552313425845, "loss": 1.8256, "step": 1998 }, { "epoch": 0.6248827758674586, "grad_norm": 0.23046875, "learning_rate": 0.0001882939902618919, "loss": 1.6083, "step": 1999 }, { "epoch": 0.6251953735542357, "grad_norm": 0.224609375, "learning_rate": 0.00018828245205910465, "loss": 1.7561, "step": 2000 }, { "epoch": 0.6255079712410128, "grad_norm": 0.2421875, "learning_rate": 0.0001882709085265933, "loss": 1.7635, "step": 2001 }, { "epoch": 0.6258205689277899, "grad_norm": 0.2333984375, "learning_rate": 0.0001882593596650547, "loss": 1.8553, "step": 2002 }, { "epoch": 0.6261331666145671, "grad_norm": 0.2353515625, "learning_rate": 0.0001882478054751861, "loss": 1.6012, "step": 2003 }, { "epoch": 0.6264457643013441, "grad_norm": 0.2333984375, "learning_rate": 0.00018823624595768498, "loss": 1.8742, "step": 2004 }, { "epoch": 0.6267583619881213, "grad_norm": 0.23828125, "learning_rate": 0.0001882246811132493, "loss": 1.2608, "step": 2005 }, { "epoch": 0.6270709596748985, "grad_norm": 0.2216796875, "learning_rate": 0.00018821311094257716, "loss": 1.5808, "step": 2006 }, { "epoch": 0.6273835573616755, "grad_norm": 0.2392578125, "learning_rate": 0.00018820153544636713, "loss": 1.6451, "step": 2007 }, { "epoch": 0.6276961550484527, "grad_norm": 0.2392578125, "learning_rate": 0.000188189954625318, "loss": 1.6479, "step": 2008 }, { "epoch": 0.6280087527352297, "grad_norm": 0.23046875, "learning_rate": 0.0001881783684801289, "loss": 1.6755, "step": 2009 }, { "epoch": 0.6283213504220069, "grad_norm": 0.228515625, "learning_rate": 0.00018816677701149939, "loss": 1.6337, "step": 2010 }, { "epoch": 0.628633948108784, "grad_norm": 0.2373046875, "learning_rate": 0.00018815518022012915, "loss": 1.648, "step": 2011 }, { "epoch": 0.6289465457955611, "grad_norm": 0.234375, "learning_rate": 0.00018814357810671833, "loss": 1.586, "step": 2012 }, { "epoch": 0.6292591434823382, "grad_norm": 0.232421875, "learning_rate": 0.0001881319706719674, "loss": 1.5722, "step": 2013 }, { "epoch": 0.6295717411691153, "grad_norm": 0.251953125, "learning_rate": 0.0001881203579165771, "loss": 1.946, "step": 2014 }, { "epoch": 0.6298843388558925, "grad_norm": 0.228515625, "learning_rate": 0.0001881087398412485, "loss": 1.7869, "step": 2015 }, { "epoch": 0.6301969365426696, "grad_norm": 0.21875, "learning_rate": 0.000188097116446683, "loss": 1.7194, "step": 2016 }, { "epoch": 0.6305095342294467, "grad_norm": 0.2353515625, "learning_rate": 0.0001880854877335823, "loss": 2.0099, "step": 2017 }, { "epoch": 0.6308221319162238, "grad_norm": 0.228515625, "learning_rate": 0.00018807385370264848, "loss": 1.8415, "step": 2018 }, { "epoch": 0.631134729603001, "grad_norm": 0.2314453125, "learning_rate": 0.00018806221435458388, "loss": 1.6398, "step": 2019 }, { "epoch": 0.631447327289778, "grad_norm": 0.2216796875, "learning_rate": 0.00018805056969009115, "loss": 1.8436, "step": 2020 }, { "epoch": 0.6317599249765552, "grad_norm": 0.22265625, "learning_rate": 0.00018803891970987333, "loss": 1.5016, "step": 2021 }, { "epoch": 0.6320725226633322, "grad_norm": 0.2275390625, "learning_rate": 0.00018802726441463375, "loss": 1.5147, "step": 2022 }, { "epoch": 0.6323851203501094, "grad_norm": 0.2294921875, "learning_rate": 0.00018801560380507604, "loss": 1.5146, "step": 2023 }, { "epoch": 0.6326977180368866, "grad_norm": 0.224609375, "learning_rate": 0.00018800393788190415, "loss": 1.8504, "step": 2024 }, { "epoch": 0.6330103157236636, "grad_norm": 0.2333984375, "learning_rate": 0.00018799226664582245, "loss": 1.6024, "step": 2025 }, { "epoch": 0.6333229134104408, "grad_norm": 0.2294921875, "learning_rate": 0.00018798059009753542, "loss": 1.8456, "step": 2026 }, { "epoch": 0.6336355110972178, "grad_norm": 0.2236328125, "learning_rate": 0.00018796890823774806, "loss": 1.5829, "step": 2027 }, { "epoch": 0.633948108783995, "grad_norm": 0.2333984375, "learning_rate": 0.00018795722106716562, "loss": 1.8332, "step": 2028 }, { "epoch": 0.6342607064707722, "grad_norm": 0.2255859375, "learning_rate": 0.00018794552858649366, "loss": 1.8867, "step": 2029 }, { "epoch": 0.6345733041575492, "grad_norm": 0.23828125, "learning_rate": 0.00018793383079643804, "loss": 1.7046, "step": 2030 }, { "epoch": 0.6348859018443264, "grad_norm": 0.2353515625, "learning_rate": 0.00018792212769770507, "loss": 1.4539, "step": 2031 }, { "epoch": 0.6351984995311035, "grad_norm": 0.224609375, "learning_rate": 0.00018791041929100115, "loss": 1.7966, "step": 2032 }, { "epoch": 0.6355110972178806, "grad_norm": 0.2373046875, "learning_rate": 0.0001878987055770332, "loss": 1.7888, "step": 2033 }, { "epoch": 0.6358236949046577, "grad_norm": 0.24609375, "learning_rate": 0.0001878869865565084, "loss": 1.5578, "step": 2034 }, { "epoch": 0.6361362925914348, "grad_norm": 0.228515625, "learning_rate": 0.0001878752622301342, "loss": 1.7211, "step": 2035 }, { "epoch": 0.6364488902782119, "grad_norm": 0.228515625, "learning_rate": 0.00018786353259861847, "loss": 1.5837, "step": 2036 }, { "epoch": 0.6367614879649891, "grad_norm": 0.2333984375, "learning_rate": 0.0001878517976626693, "loss": 1.6654, "step": 2037 }, { "epoch": 0.6370740856517662, "grad_norm": 0.2255859375, "learning_rate": 0.00018784005742299514, "loss": 1.9085, "step": 2038 }, { "epoch": 0.6373866833385433, "grad_norm": 0.275390625, "learning_rate": 0.0001878283118803048, "loss": 1.6215, "step": 2039 }, { "epoch": 0.6376992810253204, "grad_norm": 0.240234375, "learning_rate": 0.00018781656103530737, "loss": 1.9168, "step": 2040 }, { "epoch": 0.6380118787120975, "grad_norm": 0.224609375, "learning_rate": 0.0001878048048887122, "loss": 1.8944, "step": 2041 }, { "epoch": 0.6383244763988747, "grad_norm": 0.2275390625, "learning_rate": 0.00018779304344122908, "loss": 1.7528, "step": 2042 }, { "epoch": 0.6386370740856517, "grad_norm": 0.228515625, "learning_rate": 0.00018778127669356805, "loss": 1.8204, "step": 2043 }, { "epoch": 0.6389496717724289, "grad_norm": 0.2314453125, "learning_rate": 0.0001877695046464395, "loss": 1.7069, "step": 2044 }, { "epoch": 0.6392622694592061, "grad_norm": 0.2197265625, "learning_rate": 0.0001877577273005541, "loss": 1.3533, "step": 2045 }, { "epoch": 0.6395748671459831, "grad_norm": 0.22265625, "learning_rate": 0.00018774594465662288, "loss": 1.6023, "step": 2046 }, { "epoch": 0.6398874648327603, "grad_norm": 0.2255859375, "learning_rate": 0.00018773415671535714, "loss": 1.9426, "step": 2047 }, { "epoch": 0.6402000625195373, "grad_norm": 0.2216796875, "learning_rate": 0.00018772236347746856, "loss": 1.7982, "step": 2048 }, { "epoch": 0.6405126602063145, "grad_norm": 0.2255859375, "learning_rate": 0.00018771056494366913, "loss": 1.7041, "step": 2049 }, { "epoch": 0.6408252578930916, "grad_norm": 0.2216796875, "learning_rate": 0.00018769876111467113, "loss": 1.7406, "step": 2050 }, { "epoch": 0.6411378555798687, "grad_norm": 0.240234375, "learning_rate": 0.00018768695199118717, "loss": 1.6077, "step": 2051 }, { "epoch": 0.6414504532666458, "grad_norm": 0.2373046875, "learning_rate": 0.00018767513757393016, "loss": 1.7813, "step": 2052 }, { "epoch": 0.6417630509534229, "grad_norm": 0.2294921875, "learning_rate": 0.00018766331786361338, "loss": 1.6976, "step": 2053 }, { "epoch": 0.6420756486402001, "grad_norm": 0.2421875, "learning_rate": 0.00018765149286095037, "loss": 1.6368, "step": 2054 }, { "epoch": 0.6423882463269772, "grad_norm": 0.2353515625, "learning_rate": 0.00018763966256665505, "loss": 1.6045, "step": 2055 }, { "epoch": 0.6427008440137543, "grad_norm": 0.22265625, "learning_rate": 0.00018762782698144163, "loss": 1.5185, "step": 2056 }, { "epoch": 0.6430134417005314, "grad_norm": 0.23828125, "learning_rate": 0.00018761598610602463, "loss": 1.5806, "step": 2057 }, { "epoch": 0.6433260393873085, "grad_norm": 0.2470703125, "learning_rate": 0.0001876041399411189, "loss": 1.6609, "step": 2058 }, { "epoch": 0.6436386370740856, "grad_norm": 0.2197265625, "learning_rate": 0.0001875922884874396, "loss": 1.6643, "step": 2059 }, { "epoch": 0.6439512347608628, "grad_norm": 0.236328125, "learning_rate": 0.00018758043174570222, "loss": 1.5697, "step": 2060 }, { "epoch": 0.6442638324476399, "grad_norm": 0.22265625, "learning_rate": 0.00018756856971662258, "loss": 1.6761, "step": 2061 }, { "epoch": 0.644576430134417, "grad_norm": 0.2255859375, "learning_rate": 0.00018755670240091677, "loss": 1.5763, "step": 2062 }, { "epoch": 0.6448890278211942, "grad_norm": 0.240234375, "learning_rate": 0.0001875448297993013, "loss": 1.7233, "step": 2063 }, { "epoch": 0.6452016255079712, "grad_norm": 0.2353515625, "learning_rate": 0.00018753295191249286, "loss": 1.623, "step": 2064 }, { "epoch": 0.6455142231947484, "grad_norm": 0.2236328125, "learning_rate": 0.00018752106874120862, "loss": 1.5065, "step": 2065 }, { "epoch": 0.6458268208815254, "grad_norm": 0.251953125, "learning_rate": 0.0001875091802861659, "loss": 2.0689, "step": 2066 }, { "epoch": 0.6461394185683026, "grad_norm": 0.2216796875, "learning_rate": 0.00018749728654808242, "loss": 1.7316, "step": 2067 }, { "epoch": 0.6464520162550798, "grad_norm": 0.224609375, "learning_rate": 0.0001874853875276763, "loss": 1.7759, "step": 2068 }, { "epoch": 0.6467646139418568, "grad_norm": 0.2236328125, "learning_rate": 0.00018747348322566582, "loss": 1.6177, "step": 2069 }, { "epoch": 0.647077211628634, "grad_norm": 0.244140625, "learning_rate": 0.0001874615736427697, "loss": 1.8813, "step": 2070 }, { "epoch": 0.647389809315411, "grad_norm": 0.2255859375, "learning_rate": 0.00018744965877970696, "loss": 1.6428, "step": 2071 }, { "epoch": 0.6477024070021882, "grad_norm": 0.2890625, "learning_rate": 0.00018743773863719683, "loss": 2.3381, "step": 2072 }, { "epoch": 0.6480150046889653, "grad_norm": 0.2265625, "learning_rate": 0.00018742581321595902, "loss": 1.4568, "step": 2073 }, { "epoch": 0.6483276023757424, "grad_norm": 0.220703125, "learning_rate": 0.00018741388251671345, "loss": 1.5651, "step": 2074 }, { "epoch": 0.6486402000625195, "grad_norm": 0.2421875, "learning_rate": 0.0001874019465401804, "loss": 1.8459, "step": 2075 }, { "epoch": 0.6489527977492967, "grad_norm": 0.2265625, "learning_rate": 0.00018739000528708046, "loss": 1.6691, "step": 2076 }, { "epoch": 0.6492653954360738, "grad_norm": 0.2255859375, "learning_rate": 0.00018737805875813454, "loss": 1.8378, "step": 2077 }, { "epoch": 0.6495779931228509, "grad_norm": 0.2353515625, "learning_rate": 0.00018736610695406386, "loss": 1.8245, "step": 2078 }, { "epoch": 0.649890590809628, "grad_norm": 0.2373046875, "learning_rate": 0.00018735414987559, "loss": 1.7107, "step": 2079 }, { "epoch": 0.6502031884964051, "grad_norm": 0.2333984375, "learning_rate": 0.00018734218752343478, "loss": 1.7694, "step": 2080 }, { "epoch": 0.6505157861831823, "grad_norm": 0.2236328125, "learning_rate": 0.00018733021989832035, "loss": 1.7134, "step": 2081 }, { "epoch": 0.6508283838699593, "grad_norm": 0.216796875, "learning_rate": 0.00018731824700096933, "loss": 1.8064, "step": 2082 }, { "epoch": 0.6511409815567365, "grad_norm": 0.23828125, "learning_rate": 0.00018730626883210443, "loss": 1.694, "step": 2083 }, { "epoch": 0.6514535792435135, "grad_norm": 0.2158203125, "learning_rate": 0.00018729428539244884, "loss": 1.7573, "step": 2084 }, { "epoch": 0.6517661769302907, "grad_norm": 0.228515625, "learning_rate": 0.00018728229668272598, "loss": 1.6263, "step": 2085 }, { "epoch": 0.6520787746170679, "grad_norm": 0.2158203125, "learning_rate": 0.00018727030270365965, "loss": 1.846, "step": 2086 }, { "epoch": 0.6523913723038449, "grad_norm": 0.244140625, "learning_rate": 0.00018725830345597396, "loss": 1.7912, "step": 2087 }, { "epoch": 0.6527039699906221, "grad_norm": 0.2236328125, "learning_rate": 0.0001872462989403933, "loss": 1.777, "step": 2088 }, { "epoch": 0.6530165676773992, "grad_norm": 0.2216796875, "learning_rate": 0.00018723428915764237, "loss": 1.675, "step": 2089 }, { "epoch": 0.6533291653641763, "grad_norm": 0.2314453125, "learning_rate": 0.00018722227410844625, "loss": 1.5869, "step": 2090 }, { "epoch": 0.6536417630509535, "grad_norm": 0.244140625, "learning_rate": 0.00018721025379353026, "loss": 1.8295, "step": 2091 }, { "epoch": 0.6539543607377305, "grad_norm": 0.23046875, "learning_rate": 0.00018719822821362017, "loss": 1.6437, "step": 2092 }, { "epoch": 0.6542669584245077, "grad_norm": 0.2421875, "learning_rate": 0.0001871861973694419, "loss": 1.8373, "step": 2093 }, { "epoch": 0.6545795561112848, "grad_norm": 0.2353515625, "learning_rate": 0.00018717416126172177, "loss": 1.3641, "step": 2094 }, { "epoch": 0.6548921537980619, "grad_norm": 0.2373046875, "learning_rate": 0.00018716211989118646, "loss": 1.7446, "step": 2095 }, { "epoch": 0.655204751484839, "grad_norm": 0.234375, "learning_rate": 0.00018715007325856292, "loss": 1.7373, "step": 2096 }, { "epoch": 0.6555173491716161, "grad_norm": 0.23828125, "learning_rate": 0.00018713802136457837, "loss": 1.6263, "step": 2097 }, { "epoch": 0.6558299468583932, "grad_norm": 0.23046875, "learning_rate": 0.00018712596420996045, "loss": 1.7508, "step": 2098 }, { "epoch": 0.6561425445451704, "grad_norm": 0.232421875, "learning_rate": 0.00018711390179543703, "loss": 1.8481, "step": 2099 }, { "epoch": 0.6564551422319475, "grad_norm": 0.232421875, "learning_rate": 0.00018710183412173635, "loss": 1.7739, "step": 2100 }, { "epoch": 0.6567677399187246, "grad_norm": 0.2265625, "learning_rate": 0.00018708976118958693, "loss": 1.989, "step": 2101 }, { "epoch": 0.6570803376055018, "grad_norm": 0.2216796875, "learning_rate": 0.0001870776829997177, "loss": 1.8054, "step": 2102 }, { "epoch": 0.6573929352922788, "grad_norm": 0.2265625, "learning_rate": 0.00018706559955285773, "loss": 1.665, "step": 2103 }, { "epoch": 0.657705532979056, "grad_norm": 0.22265625, "learning_rate": 0.0001870535108497366, "loss": 1.703, "step": 2104 }, { "epoch": 0.658018130665833, "grad_norm": 0.2333984375, "learning_rate": 0.0001870414168910841, "loss": 1.7818, "step": 2105 }, { "epoch": 0.6583307283526102, "grad_norm": 0.2275390625, "learning_rate": 0.00018702931767763028, "loss": 1.5893, "step": 2106 }, { "epoch": 0.6586433260393874, "grad_norm": 0.2236328125, "learning_rate": 0.0001870172132101057, "loss": 1.6743, "step": 2107 }, { "epoch": 0.6589559237261644, "grad_norm": 0.2255859375, "learning_rate": 0.00018700510348924106, "loss": 1.5062, "step": 2108 }, { "epoch": 0.6592685214129416, "grad_norm": 0.2314453125, "learning_rate": 0.00018699298851576743, "loss": 1.4517, "step": 2109 }, { "epoch": 0.6595811190997186, "grad_norm": 0.23828125, "learning_rate": 0.00018698086829041627, "loss": 1.7555, "step": 2110 }, { "epoch": 0.6598937167864958, "grad_norm": 0.2333984375, "learning_rate": 0.0001869687428139192, "loss": 1.7701, "step": 2111 }, { "epoch": 0.6602063144732729, "grad_norm": 0.228515625, "learning_rate": 0.00018695661208700836, "loss": 1.5693, "step": 2112 }, { "epoch": 0.66051891216005, "grad_norm": 0.2265625, "learning_rate": 0.000186944476110416, "loss": 1.473, "step": 2113 }, { "epoch": 0.6608315098468271, "grad_norm": 0.2353515625, "learning_rate": 0.00018693233488487483, "loss": 1.4396, "step": 2114 }, { "epoch": 0.6611441075336042, "grad_norm": 0.236328125, "learning_rate": 0.00018692018841111782, "loss": 1.9964, "step": 2115 }, { "epoch": 0.6614567052203814, "grad_norm": 0.2255859375, "learning_rate": 0.00018690803668987827, "loss": 1.6639, "step": 2116 }, { "epoch": 0.6617693029071585, "grad_norm": 0.2353515625, "learning_rate": 0.0001868958797218898, "loss": 1.7607, "step": 2117 }, { "epoch": 0.6620819005939356, "grad_norm": 0.2314453125, "learning_rate": 0.00018688371750788635, "loss": 1.5137, "step": 2118 }, { "epoch": 0.6623944982807127, "grad_norm": 0.21875, "learning_rate": 0.00018687155004860215, "loss": 1.5756, "step": 2119 }, { "epoch": 0.6627070959674899, "grad_norm": 0.2314453125, "learning_rate": 0.00018685937734477177, "loss": 1.7926, "step": 2120 }, { "epoch": 0.6630196936542669, "grad_norm": 0.2333984375, "learning_rate": 0.0001868471993971301, "loss": 1.7269, "step": 2121 }, { "epoch": 0.6633322913410441, "grad_norm": 0.232421875, "learning_rate": 0.0001868350162064123, "loss": 1.6515, "step": 2122 }, { "epoch": 0.6636448890278211, "grad_norm": 0.2255859375, "learning_rate": 0.00018682282777335397, "loss": 1.5462, "step": 2123 }, { "epoch": 0.6639574867145983, "grad_norm": 0.2470703125, "learning_rate": 0.00018681063409869085, "loss": 1.7719, "step": 2124 }, { "epoch": 0.6642700844013755, "grad_norm": 0.2294921875, "learning_rate": 0.00018679843518315913, "loss": 1.9495, "step": 2125 }, { "epoch": 0.6645826820881525, "grad_norm": 0.232421875, "learning_rate": 0.0001867862310274953, "loss": 1.5323, "step": 2126 }, { "epoch": 0.6648952797749297, "grad_norm": 0.2470703125, "learning_rate": 0.00018677402163243606, "loss": 1.5997, "step": 2127 }, { "epoch": 0.6652078774617067, "grad_norm": 0.2265625, "learning_rate": 0.0001867618069987186, "loss": 1.891, "step": 2128 }, { "epoch": 0.6655204751484839, "grad_norm": 0.2275390625, "learning_rate": 0.00018674958712708027, "loss": 1.7805, "step": 2129 }, { "epoch": 0.665833072835261, "grad_norm": 0.228515625, "learning_rate": 0.00018673736201825882, "loss": 1.7896, "step": 2130 }, { "epoch": 0.6661456705220381, "grad_norm": 0.2314453125, "learning_rate": 0.0001867251316729923, "loss": 1.8483, "step": 2131 }, { "epoch": 0.6664582682088153, "grad_norm": 0.234375, "learning_rate": 0.00018671289609201907, "loss": 1.8642, "step": 2132 }, { "epoch": 0.6667708658955924, "grad_norm": 0.2294921875, "learning_rate": 0.0001867006552760778, "loss": 1.4944, "step": 2133 }, { "epoch": 0.6670834635823695, "grad_norm": 0.2265625, "learning_rate": 0.00018668840922590746, "loss": 1.4096, "step": 2134 }, { "epoch": 0.6673960612691466, "grad_norm": 0.2216796875, "learning_rate": 0.00018667615794224743, "loss": 1.8447, "step": 2135 }, { "epoch": 0.6677086589559237, "grad_norm": 0.228515625, "learning_rate": 0.00018666390142583724, "loss": 1.7672, "step": 2136 }, { "epoch": 0.6680212566427008, "grad_norm": 0.224609375, "learning_rate": 0.00018665163967741694, "loss": 1.4677, "step": 2137 }, { "epoch": 0.668333854329478, "grad_norm": 0.248046875, "learning_rate": 0.0001866393726977267, "loss": 1.9113, "step": 2138 }, { "epoch": 0.668646452016255, "grad_norm": 0.2294921875, "learning_rate": 0.00018662710048750712, "loss": 1.6074, "step": 2139 }, { "epoch": 0.6689590497030322, "grad_norm": 0.2412109375, "learning_rate": 0.00018661482304749915, "loss": 1.9865, "step": 2140 }, { "epoch": 0.6692716473898093, "grad_norm": 0.234375, "learning_rate": 0.00018660254037844388, "loss": 1.5433, "step": 2141 }, { "epoch": 0.6695842450765864, "grad_norm": 0.2412109375, "learning_rate": 0.00018659025248108288, "loss": 1.7213, "step": 2142 }, { "epoch": 0.6698968427633636, "grad_norm": 0.220703125, "learning_rate": 0.00018657795935615802, "loss": 1.7668, "step": 2143 }, { "epoch": 0.6702094404501406, "grad_norm": 0.240234375, "learning_rate": 0.00018656566100441144, "loss": 1.7344, "step": 2144 }, { "epoch": 0.6705220381369178, "grad_norm": 0.21875, "learning_rate": 0.00018655335742658556, "loss": 1.6451, "step": 2145 }, { "epoch": 0.670834635823695, "grad_norm": 0.224609375, "learning_rate": 0.00018654104862342324, "loss": 1.6888, "step": 2146 }, { "epoch": 0.671147233510472, "grad_norm": 0.2294921875, "learning_rate": 0.00018652873459566749, "loss": 1.426, "step": 2147 }, { "epoch": 0.6714598311972492, "grad_norm": 0.2353515625, "learning_rate": 0.00018651641534406178, "loss": 1.6177, "step": 2148 }, { "epoch": 0.6717724288840262, "grad_norm": 0.240234375, "learning_rate": 0.00018650409086934985, "loss": 1.6962, "step": 2149 }, { "epoch": 0.6720850265708034, "grad_norm": 0.2275390625, "learning_rate": 0.0001864917611722757, "loss": 1.6879, "step": 2150 }, { "epoch": 0.6723976242575805, "grad_norm": 0.22265625, "learning_rate": 0.0001864794262535837, "loss": 1.9992, "step": 2151 }, { "epoch": 0.6727102219443576, "grad_norm": 0.23046875, "learning_rate": 0.0001864670861140186, "loss": 1.9401, "step": 2152 }, { "epoch": 0.6730228196311347, "grad_norm": 0.2421875, "learning_rate": 0.00018645474075432524, "loss": 1.8057, "step": 2153 }, { "epoch": 0.6733354173179118, "grad_norm": 0.2412109375, "learning_rate": 0.00018644239017524906, "loss": 2.0631, "step": 2154 }, { "epoch": 0.673648015004689, "grad_norm": 0.234375, "learning_rate": 0.00018643003437753558, "loss": 1.6794, "step": 2155 }, { "epoch": 0.6739606126914661, "grad_norm": 0.2236328125, "learning_rate": 0.00018641767336193086, "loss": 1.7738, "step": 2156 }, { "epoch": 0.6742732103782432, "grad_norm": 0.240234375, "learning_rate": 0.000186405307129181, "loss": 1.8517, "step": 2157 }, { "epoch": 0.6745858080650203, "grad_norm": 0.2353515625, "learning_rate": 0.00018639293568003268, "loss": 1.5776, "step": 2158 }, { "epoch": 0.6748984057517975, "grad_norm": 0.2236328125, "learning_rate": 0.00018638055901523277, "loss": 1.6955, "step": 2159 }, { "epoch": 0.6752110034385745, "grad_norm": 0.23828125, "learning_rate": 0.00018636817713552837, "loss": 1.6111, "step": 2160 }, { "epoch": 0.6755236011253517, "grad_norm": 0.2421875, "learning_rate": 0.00018635579004166712, "loss": 1.8155, "step": 2161 }, { "epoch": 0.6758361988121288, "grad_norm": 0.2177734375, "learning_rate": 0.00018634339773439674, "loss": 1.6656, "step": 2162 }, { "epoch": 0.6761487964989059, "grad_norm": 0.2314453125, "learning_rate": 0.0001863310002144654, "loss": 1.5922, "step": 2163 }, { "epoch": 0.6764613941856831, "grad_norm": 0.220703125, "learning_rate": 0.0001863185974826216, "loss": 1.7238, "step": 2164 }, { "epoch": 0.6767739918724601, "grad_norm": 0.2431640625, "learning_rate": 0.00018630618953961408, "loss": 1.6582, "step": 2165 }, { "epoch": 0.6770865895592373, "grad_norm": 0.283203125, "learning_rate": 0.0001862937763861919, "loss": 2.3931, "step": 2166 }, { "epoch": 0.6773991872460143, "grad_norm": 0.2294921875, "learning_rate": 0.00018628135802310446, "loss": 1.7434, "step": 2167 }, { "epoch": 0.6777117849327915, "grad_norm": 0.251953125, "learning_rate": 0.0001862689344511015, "loss": 2.0366, "step": 2168 }, { "epoch": 0.6780243826195687, "grad_norm": 0.232421875, "learning_rate": 0.000186256505670933, "loss": 1.6197, "step": 2169 }, { "epoch": 0.6783369803063457, "grad_norm": 0.22265625, "learning_rate": 0.0001862440716833494, "loss": 1.5561, "step": 2170 }, { "epoch": 0.6786495779931229, "grad_norm": 0.248046875, "learning_rate": 0.00018623163248910127, "loss": 1.8304, "step": 2171 }, { "epoch": 0.6789621756798999, "grad_norm": 0.2255859375, "learning_rate": 0.00018621918808893958, "loss": 1.3873, "step": 2172 }, { "epoch": 0.6792747733666771, "grad_norm": 0.23046875, "learning_rate": 0.00018620673848361566, "loss": 1.4493, "step": 2173 }, { "epoch": 0.6795873710534542, "grad_norm": 0.25390625, "learning_rate": 0.00018619428367388103, "loss": 1.7057, "step": 2174 }, { "epoch": 0.6798999687402313, "grad_norm": 0.232421875, "learning_rate": 0.0001861818236604877, "loss": 1.5443, "step": 2175 }, { "epoch": 0.6802125664270084, "grad_norm": 0.228515625, "learning_rate": 0.00018616935844418785, "loss": 1.651, "step": 2176 }, { "epoch": 0.6805251641137856, "grad_norm": 0.2470703125, "learning_rate": 0.000186156888025734, "loss": 1.7987, "step": 2177 }, { "epoch": 0.6808377618005627, "grad_norm": 0.234375, "learning_rate": 0.00018614441240587907, "loss": 1.8154, "step": 2178 }, { "epoch": 0.6811503594873398, "grad_norm": 0.232421875, "learning_rate": 0.0001861319315853762, "loss": 1.7168, "step": 2179 }, { "epoch": 0.6814629571741169, "grad_norm": 0.234375, "learning_rate": 0.0001861194455649788, "loss": 1.4816, "step": 2180 }, { "epoch": 0.681775554860894, "grad_norm": 0.2236328125, "learning_rate": 0.00018610695434544074, "loss": 1.5243, "step": 2181 }, { "epoch": 0.6820881525476712, "grad_norm": 0.2255859375, "learning_rate": 0.00018609445792751618, "loss": 1.7344, "step": 2182 }, { "epoch": 0.6824007502344482, "grad_norm": 0.228515625, "learning_rate": 0.00018608195631195939, "loss": 1.8136, "step": 2183 }, { "epoch": 0.6827133479212254, "grad_norm": 0.2353515625, "learning_rate": 0.00018606944949952524, "loss": 1.7538, "step": 2184 }, { "epoch": 0.6830259456080024, "grad_norm": 0.236328125, "learning_rate": 0.00018605693749096876, "loss": 1.8747, "step": 2185 }, { "epoch": 0.6833385432947796, "grad_norm": 0.2353515625, "learning_rate": 0.00018604442028704533, "loss": 1.6926, "step": 2186 }, { "epoch": 0.6836511409815568, "grad_norm": 0.228515625, "learning_rate": 0.00018603189788851055, "loss": 1.7869, "step": 2187 }, { "epoch": 0.6839637386683338, "grad_norm": 0.2236328125, "learning_rate": 0.00018601937029612048, "loss": 1.6719, "step": 2188 }, { "epoch": 0.684276336355111, "grad_norm": 0.23828125, "learning_rate": 0.0001860068375106314, "loss": 1.7719, "step": 2189 }, { "epoch": 0.6845889340418881, "grad_norm": 0.2265625, "learning_rate": 0.00018599429953279994, "loss": 1.618, "step": 2190 }, { "epoch": 0.6849015317286652, "grad_norm": 0.2294921875, "learning_rate": 0.00018598175636338305, "loss": 1.7768, "step": 2191 }, { "epoch": 0.6852141294154424, "grad_norm": 0.234375, "learning_rate": 0.00018596920800313798, "loss": 1.9978, "step": 2192 }, { "epoch": 0.6855267271022194, "grad_norm": 0.22265625, "learning_rate": 0.0001859566544528222, "loss": 1.3867, "step": 2193 }, { "epoch": 0.6858393247889966, "grad_norm": 0.2294921875, "learning_rate": 0.0001859440957131937, "loss": 1.5844, "step": 2194 }, { "epoch": 0.6861519224757737, "grad_norm": 0.234375, "learning_rate": 0.00018593153178501063, "loss": 1.7227, "step": 2195 }, { "epoch": 0.6864645201625508, "grad_norm": 0.25390625, "learning_rate": 0.0001859189626690315, "loss": 1.8812, "step": 2196 }, { "epoch": 0.6867771178493279, "grad_norm": 0.2236328125, "learning_rate": 0.00018590638836601505, "loss": 1.5477, "step": 2197 }, { "epoch": 0.687089715536105, "grad_norm": 0.2255859375, "learning_rate": 0.0001858938088767205, "loss": 1.8684, "step": 2198 }, { "epoch": 0.6874023132228821, "grad_norm": 0.236328125, "learning_rate": 0.00018588122420190722, "loss": 1.8864, "step": 2199 }, { "epoch": 0.6877149109096593, "grad_norm": 0.2333984375, "learning_rate": 0.00018586863434233504, "loss": 1.7888, "step": 2200 }, { "epoch": 0.6880275085964364, "grad_norm": 0.2353515625, "learning_rate": 0.00018585603929876395, "loss": 1.6452, "step": 2201 }, { "epoch": 0.6883401062832135, "grad_norm": 0.2373046875, "learning_rate": 0.00018584343907195437, "loss": 1.585, "step": 2202 }, { "epoch": 0.6886527039699907, "grad_norm": 0.23828125, "learning_rate": 0.000185830833662667, "loss": 1.7144, "step": 2203 }, { "epoch": 0.6889653016567677, "grad_norm": 0.2275390625, "learning_rate": 0.00018581822307166281, "loss": 1.7379, "step": 2204 }, { "epoch": 0.6892778993435449, "grad_norm": 0.2333984375, "learning_rate": 0.00018580560729970313, "loss": 1.777, "step": 2205 }, { "epoch": 0.6895904970303219, "grad_norm": 0.2451171875, "learning_rate": 0.00018579298634754962, "loss": 1.902, "step": 2206 }, { "epoch": 0.6899030947170991, "grad_norm": 0.220703125, "learning_rate": 0.00018578036021596415, "loss": 1.6602, "step": 2207 }, { "epoch": 0.6902156924038763, "grad_norm": 0.2412109375, "learning_rate": 0.00018576772890570905, "loss": 1.8837, "step": 2208 }, { "epoch": 0.6905282900906533, "grad_norm": 0.251953125, "learning_rate": 0.00018575509241754685, "loss": 1.6694, "step": 2209 }, { "epoch": 0.6908408877774305, "grad_norm": 0.2294921875, "learning_rate": 0.00018574245075224046, "loss": 1.7201, "step": 2210 }, { "epoch": 0.6911534854642075, "grad_norm": 0.2392578125, "learning_rate": 0.00018572980391055305, "loss": 1.4998, "step": 2211 }, { "epoch": 0.6914660831509847, "grad_norm": 0.2255859375, "learning_rate": 0.00018571715189324813, "loss": 1.4607, "step": 2212 }, { "epoch": 0.6917786808377618, "grad_norm": 0.2158203125, "learning_rate": 0.00018570449470108952, "loss": 1.8028, "step": 2213 }, { "epoch": 0.6920912785245389, "grad_norm": 0.234375, "learning_rate": 0.00018569183233484133, "loss": 1.5558, "step": 2214 }, { "epoch": 0.692403876211316, "grad_norm": 0.2275390625, "learning_rate": 0.00018567916479526804, "loss": 1.5834, "step": 2215 }, { "epoch": 0.6927164738980931, "grad_norm": 0.232421875, "learning_rate": 0.0001856664920831344, "loss": 1.6607, "step": 2216 }, { "epoch": 0.6930290715848703, "grad_norm": 0.236328125, "learning_rate": 0.00018565381419920546, "loss": 1.5378, "step": 2217 }, { "epoch": 0.6933416692716474, "grad_norm": 0.2412109375, "learning_rate": 0.00018564113114424662, "loss": 1.8949, "step": 2218 }, { "epoch": 0.6936542669584245, "grad_norm": 0.234375, "learning_rate": 0.00018562844291902353, "loss": 1.9261, "step": 2219 }, { "epoch": 0.6939668646452016, "grad_norm": 0.2353515625, "learning_rate": 0.00018561574952430222, "loss": 2.0413, "step": 2220 }, { "epoch": 0.6942794623319788, "grad_norm": 0.2216796875, "learning_rate": 0.00018560305096084904, "loss": 1.7628, "step": 2221 }, { "epoch": 0.6945920600187558, "grad_norm": 0.240234375, "learning_rate": 0.00018559034722943056, "loss": 1.6226, "step": 2222 }, { "epoch": 0.694904657705533, "grad_norm": 0.22265625, "learning_rate": 0.00018557763833081377, "loss": 1.8693, "step": 2223 }, { "epoch": 0.69521725539231, "grad_norm": 0.2216796875, "learning_rate": 0.0001855649242657659, "loss": 1.4996, "step": 2224 }, { "epoch": 0.6955298530790872, "grad_norm": 0.25390625, "learning_rate": 0.00018555220503505452, "loss": 2.2346, "step": 2225 }, { "epoch": 0.6958424507658644, "grad_norm": 0.2255859375, "learning_rate": 0.00018553948063944749, "loss": 1.773, "step": 2226 }, { "epoch": 0.6961550484526414, "grad_norm": 0.2373046875, "learning_rate": 0.000185526751079713, "loss": 1.8362, "step": 2227 }, { "epoch": 0.6964676461394186, "grad_norm": 0.2373046875, "learning_rate": 0.00018551401635661958, "loss": 1.6007, "step": 2228 }, { "epoch": 0.6967802438261956, "grad_norm": 0.234375, "learning_rate": 0.00018550127647093601, "loss": 1.5875, "step": 2229 }, { "epoch": 0.6970928415129728, "grad_norm": 0.228515625, "learning_rate": 0.00018548853142343142, "loss": 1.7156, "step": 2230 }, { "epoch": 0.69740543919975, "grad_norm": 0.2412109375, "learning_rate": 0.00018547578121487528, "loss": 1.784, "step": 2231 }, { "epoch": 0.697718036886527, "grad_norm": 0.2333984375, "learning_rate": 0.00018546302584603727, "loss": 1.6756, "step": 2232 }, { "epoch": 0.6980306345733042, "grad_norm": 0.25, "learning_rate": 0.0001854502653176875, "loss": 1.8622, "step": 2233 }, { "epoch": 0.6983432322600813, "grad_norm": 0.2197265625, "learning_rate": 0.0001854374996305963, "loss": 1.383, "step": 2234 }, { "epoch": 0.6986558299468584, "grad_norm": 0.2314453125, "learning_rate": 0.0001854247287855344, "loss": 1.516, "step": 2235 }, { "epoch": 0.6989684276336355, "grad_norm": 0.2294921875, "learning_rate": 0.00018541195278327276, "loss": 1.5284, "step": 2236 }, { "epoch": 0.6992810253204126, "grad_norm": 0.2255859375, "learning_rate": 0.0001853991716245827, "loss": 1.4208, "step": 2237 }, { "epoch": 0.6995936230071897, "grad_norm": 0.2314453125, "learning_rate": 0.0001853863853102358, "loss": 1.8169, "step": 2238 }, { "epoch": 0.6999062206939669, "grad_norm": 0.23046875, "learning_rate": 0.000185373593841004, "loss": 1.686, "step": 2239 }, { "epoch": 0.700218818380744, "grad_norm": 0.234375, "learning_rate": 0.00018536079721765956, "loss": 1.4067, "step": 2240 }, { "epoch": 0.7005314160675211, "grad_norm": 0.228515625, "learning_rate": 0.00018534799544097505, "loss": 1.7239, "step": 2241 }, { "epoch": 0.7008440137542982, "grad_norm": 0.2255859375, "learning_rate": 0.00018533518851172325, "loss": 1.6176, "step": 2242 }, { "epoch": 0.7011566114410753, "grad_norm": 0.2333984375, "learning_rate": 0.0001853223764306774, "loss": 1.6086, "step": 2243 }, { "epoch": 0.7014692091278525, "grad_norm": 0.2255859375, "learning_rate": 0.00018530955919861096, "loss": 1.5131, "step": 2244 }, { "epoch": 0.7017818068146295, "grad_norm": 0.224609375, "learning_rate": 0.0001852967368162977, "loss": 1.685, "step": 2245 }, { "epoch": 0.7020944045014067, "grad_norm": 0.232421875, "learning_rate": 0.00018528390928451173, "loss": 1.8137, "step": 2246 }, { "epoch": 0.7024070021881839, "grad_norm": 0.2333984375, "learning_rate": 0.00018527107660402752, "loss": 1.7175, "step": 2247 }, { "epoch": 0.7027195998749609, "grad_norm": 0.2255859375, "learning_rate": 0.00018525823877561974, "loss": 1.6921, "step": 2248 }, { "epoch": 0.7030321975617381, "grad_norm": 0.23046875, "learning_rate": 0.0001852453958000634, "loss": 1.9215, "step": 2249 }, { "epoch": 0.7033447952485151, "grad_norm": 0.228515625, "learning_rate": 0.00018523254767813393, "loss": 1.5655, "step": 2250 }, { "epoch": 0.7036573929352923, "grad_norm": 0.2177734375, "learning_rate": 0.00018521969441060695, "loss": 1.6418, "step": 2251 }, { "epoch": 0.7039699906220694, "grad_norm": 0.240234375, "learning_rate": 0.0001852068359982584, "loss": 1.8771, "step": 2252 }, { "epoch": 0.7042825883088465, "grad_norm": 0.2412109375, "learning_rate": 0.00018519397244186458, "loss": 1.7217, "step": 2253 }, { "epoch": 0.7045951859956237, "grad_norm": 0.228515625, "learning_rate": 0.0001851811037422021, "loss": 1.8586, "step": 2254 }, { "epoch": 0.7049077836824007, "grad_norm": 0.2236328125, "learning_rate": 0.00018516822990004782, "loss": 1.5904, "step": 2255 }, { "epoch": 0.7052203813691779, "grad_norm": 0.2255859375, "learning_rate": 0.00018515535091617898, "loss": 1.6428, "step": 2256 }, { "epoch": 0.705532979055955, "grad_norm": 0.234375, "learning_rate": 0.0001851424667913731, "loss": 1.7164, "step": 2257 }, { "epoch": 0.7058455767427321, "grad_norm": 0.23046875, "learning_rate": 0.00018512957752640799, "loss": 1.7193, "step": 2258 }, { "epoch": 0.7061581744295092, "grad_norm": 0.2294921875, "learning_rate": 0.00018511668312206177, "loss": 1.5025, "step": 2259 }, { "epoch": 0.7064707721162864, "grad_norm": 0.2294921875, "learning_rate": 0.00018510378357911296, "loss": 1.612, "step": 2260 }, { "epoch": 0.7067833698030634, "grad_norm": 0.2412109375, "learning_rate": 0.00018509087889834031, "loss": 1.5849, "step": 2261 }, { "epoch": 0.7070959674898406, "grad_norm": 0.25, "learning_rate": 0.00018507796908052285, "loss": 1.6807, "step": 2262 }, { "epoch": 0.7074085651766177, "grad_norm": 0.228515625, "learning_rate": 0.00018506505412643995, "loss": 1.6728, "step": 2263 }, { "epoch": 0.7077211628633948, "grad_norm": 0.234375, "learning_rate": 0.00018505213403687137, "loss": 1.7322, "step": 2264 }, { "epoch": 0.708033760550172, "grad_norm": 0.2265625, "learning_rate": 0.00018503920881259703, "loss": 1.6204, "step": 2265 }, { "epoch": 0.708346358236949, "grad_norm": 0.228515625, "learning_rate": 0.00018502627845439732, "loss": 1.5918, "step": 2266 }, { "epoch": 0.7086589559237262, "grad_norm": 0.2421875, "learning_rate": 0.00018501334296305285, "loss": 1.8249, "step": 2267 }, { "epoch": 0.7089715536105032, "grad_norm": 0.2431640625, "learning_rate": 0.00018500040233934454, "loss": 1.974, "step": 2268 }, { "epoch": 0.7092841512972804, "grad_norm": 0.2255859375, "learning_rate": 0.00018498745658405356, "loss": 1.6999, "step": 2269 }, { "epoch": 0.7095967489840576, "grad_norm": 0.23046875, "learning_rate": 0.00018497450569796158, "loss": 1.9307, "step": 2270 }, { "epoch": 0.7099093466708346, "grad_norm": 0.240234375, "learning_rate": 0.00018496154968185036, "loss": 1.7392, "step": 2271 }, { "epoch": 0.7102219443576118, "grad_norm": 0.2353515625, "learning_rate": 0.00018494858853650213, "loss": 1.7068, "step": 2272 }, { "epoch": 0.7105345420443888, "grad_norm": 0.232421875, "learning_rate": 0.0001849356222626994, "loss": 1.8758, "step": 2273 }, { "epoch": 0.710847139731166, "grad_norm": 0.236328125, "learning_rate": 0.00018492265086122488, "loss": 1.6345, "step": 2274 }, { "epoch": 0.7111597374179431, "grad_norm": 0.24609375, "learning_rate": 0.0001849096743328617, "loss": 1.7491, "step": 2275 }, { "epoch": 0.7114723351047202, "grad_norm": 0.2265625, "learning_rate": 0.0001848966926783933, "loss": 1.5166, "step": 2276 }, { "epoch": 0.7117849327914973, "grad_norm": 0.234375, "learning_rate": 0.0001848837058986034, "loss": 1.7068, "step": 2277 }, { "epoch": 0.7120975304782745, "grad_norm": 0.23046875, "learning_rate": 0.00018487071399427599, "loss": 1.7652, "step": 2278 }, { "epoch": 0.7124101281650516, "grad_norm": 0.2392578125, "learning_rate": 0.00018485771696619542, "loss": 1.7871, "step": 2279 }, { "epoch": 0.7127227258518287, "grad_norm": 0.228515625, "learning_rate": 0.00018484471481514635, "loss": 1.9055, "step": 2280 }, { "epoch": 0.7130353235386058, "grad_norm": 0.2275390625, "learning_rate": 0.0001848317075419137, "loss": 1.8693, "step": 2281 }, { "epoch": 0.7133479212253829, "grad_norm": 0.2294921875, "learning_rate": 0.00018481869514728279, "loss": 1.548, "step": 2282 }, { "epoch": 0.7136605189121601, "grad_norm": 0.2392578125, "learning_rate": 0.00018480567763203918, "loss": 1.614, "step": 2283 }, { "epoch": 0.7139731165989371, "grad_norm": 0.2314453125, "learning_rate": 0.0001847926549969687, "loss": 1.4828, "step": 2284 }, { "epoch": 0.7142857142857143, "grad_norm": 0.2294921875, "learning_rate": 0.00018477962724285763, "loss": 1.8229, "step": 2285 }, { "epoch": 0.7145983119724914, "grad_norm": 0.2314453125, "learning_rate": 0.00018476659437049238, "loss": 1.877, "step": 2286 }, { "epoch": 0.7149109096592685, "grad_norm": 0.23828125, "learning_rate": 0.00018475355638065984, "loss": 1.5996, "step": 2287 }, { "epoch": 0.7152235073460457, "grad_norm": 0.2373046875, "learning_rate": 0.00018474051327414709, "loss": 1.6033, "step": 2288 }, { "epoch": 0.7155361050328227, "grad_norm": 0.248046875, "learning_rate": 0.00018472746505174156, "loss": 1.6509, "step": 2289 }, { "epoch": 0.7158487027195999, "grad_norm": 0.2353515625, "learning_rate": 0.00018471441171423103, "loss": 1.8609, "step": 2290 }, { "epoch": 0.716161300406377, "grad_norm": 0.224609375, "learning_rate": 0.00018470135326240347, "loss": 1.8864, "step": 2291 }, { "epoch": 0.7164738980931541, "grad_norm": 0.2412109375, "learning_rate": 0.0001846882896970473, "loss": 1.5743, "step": 2292 }, { "epoch": 0.7167864957799313, "grad_norm": 0.2314453125, "learning_rate": 0.00018467522101895116, "loss": 1.8124, "step": 2293 }, { "epoch": 0.7170990934667083, "grad_norm": 0.2373046875, "learning_rate": 0.00018466214722890402, "loss": 1.4247, "step": 2294 }, { "epoch": 0.7174116911534855, "grad_norm": 0.2275390625, "learning_rate": 0.00018464906832769517, "loss": 1.5627, "step": 2295 }, { "epoch": 0.7177242888402626, "grad_norm": 0.2314453125, "learning_rate": 0.0001846359843161142, "loss": 1.8247, "step": 2296 }, { "epoch": 0.7180368865270397, "grad_norm": 0.234375, "learning_rate": 0.000184622895194951, "loss": 1.6003, "step": 2297 }, { "epoch": 0.7183494842138168, "grad_norm": 0.236328125, "learning_rate": 0.0001846098009649958, "loss": 1.6546, "step": 2298 }, { "epoch": 0.7186620819005939, "grad_norm": 0.23046875, "learning_rate": 0.00018459670162703905, "loss": 1.8521, "step": 2299 }, { "epoch": 0.718974679587371, "grad_norm": 0.2421875, "learning_rate": 0.00018458359718187165, "loss": 1.7397, "step": 2300 }, { "epoch": 0.7192872772741482, "grad_norm": 0.232421875, "learning_rate": 0.0001845704876302847, "loss": 1.7336, "step": 2301 }, { "epoch": 0.7195998749609253, "grad_norm": 0.2490234375, "learning_rate": 0.00018455737297306963, "loss": 1.6112, "step": 2302 }, { "epoch": 0.7199124726477024, "grad_norm": 0.2275390625, "learning_rate": 0.00018454425321101826, "loss": 1.8522, "step": 2303 }, { "epoch": 0.7202250703344796, "grad_norm": 0.236328125, "learning_rate": 0.0001845311283449225, "loss": 1.6348, "step": 2304 }, { "epoch": 0.7205376680212566, "grad_norm": 0.2275390625, "learning_rate": 0.00018451799837557485, "loss": 1.7101, "step": 2305 }, { "epoch": 0.7208502657080338, "grad_norm": 0.2392578125, "learning_rate": 0.00018450486330376793, "loss": 1.4738, "step": 2306 }, { "epoch": 0.7211628633948108, "grad_norm": 0.23828125, "learning_rate": 0.00018449172313029472, "loss": 1.6334, "step": 2307 }, { "epoch": 0.721475461081588, "grad_norm": 0.2373046875, "learning_rate": 0.00018447857785594852, "loss": 1.5218, "step": 2308 }, { "epoch": 0.7217880587683652, "grad_norm": 0.2421875, "learning_rate": 0.00018446542748152292, "loss": 1.8324, "step": 2309 }, { "epoch": 0.7221006564551422, "grad_norm": 0.234375, "learning_rate": 0.00018445227200781185, "loss": 1.8051, "step": 2310 }, { "epoch": 0.7224132541419194, "grad_norm": 0.228515625, "learning_rate": 0.0001844391114356095, "loss": 1.65, "step": 2311 }, { "epoch": 0.7227258518286964, "grad_norm": 0.2373046875, "learning_rate": 0.00018442594576571035, "loss": 1.8499, "step": 2312 }, { "epoch": 0.7230384495154736, "grad_norm": 0.2353515625, "learning_rate": 0.0001844127749989093, "loss": 1.671, "step": 2313 }, { "epoch": 0.7233510472022507, "grad_norm": 0.22265625, "learning_rate": 0.0001843995991360014, "loss": 1.7405, "step": 2314 }, { "epoch": 0.7236636448890278, "grad_norm": 0.2451171875, "learning_rate": 0.0001843864181777822, "loss": 1.8025, "step": 2315 }, { "epoch": 0.723976242575805, "grad_norm": 0.244140625, "learning_rate": 0.00018437323212504742, "loss": 1.5695, "step": 2316 }, { "epoch": 0.7242888402625821, "grad_norm": 0.22265625, "learning_rate": 0.00018436004097859308, "loss": 1.2384, "step": 2317 }, { "epoch": 0.7246014379493592, "grad_norm": 0.2265625, "learning_rate": 0.00018434684473921556, "loss": 1.6555, "step": 2318 }, { "epoch": 0.7249140356361363, "grad_norm": 0.2294921875, "learning_rate": 0.00018433364340771153, "loss": 1.6447, "step": 2319 }, { "epoch": 0.7252266333229134, "grad_norm": 0.232421875, "learning_rate": 0.00018432043698487797, "loss": 1.6859, "step": 2320 }, { "epoch": 0.7255392310096905, "grad_norm": 0.232421875, "learning_rate": 0.0001843072254715122, "loss": 1.7087, "step": 2321 }, { "epoch": 0.7258518286964677, "grad_norm": 0.2353515625, "learning_rate": 0.0001842940088684118, "loss": 1.7149, "step": 2322 }, { "epoch": 0.7261644263832447, "grad_norm": 0.2294921875, "learning_rate": 0.00018428078717637467, "loss": 1.8408, "step": 2323 }, { "epoch": 0.7264770240700219, "grad_norm": 0.2294921875, "learning_rate": 0.000184267560396199, "loss": 1.7943, "step": 2324 }, { "epoch": 0.726789621756799, "grad_norm": 0.2333984375, "learning_rate": 0.00018425432852868333, "loss": 1.7252, "step": 2325 }, { "epoch": 0.7271022194435761, "grad_norm": 0.2392578125, "learning_rate": 0.0001842410915746265, "loss": 1.6914, "step": 2326 }, { "epoch": 0.7274148171303533, "grad_norm": 0.232421875, "learning_rate": 0.0001842278495348276, "loss": 1.9011, "step": 2327 }, { "epoch": 0.7277274148171303, "grad_norm": 0.236328125, "learning_rate": 0.00018421460241008607, "loss": 1.8245, "step": 2328 }, { "epoch": 0.7280400125039075, "grad_norm": 0.2451171875, "learning_rate": 0.00018420135020120172, "loss": 1.8638, "step": 2329 }, { "epoch": 0.7283526101906845, "grad_norm": 0.2294921875, "learning_rate": 0.00018418809290897455, "loss": 1.7493, "step": 2330 }, { "epoch": 0.7286652078774617, "grad_norm": 0.2353515625, "learning_rate": 0.0001841748305342049, "loss": 1.5843, "step": 2331 }, { "epoch": 0.7289778055642389, "grad_norm": 0.236328125, "learning_rate": 0.0001841615630776935, "loss": 1.5289, "step": 2332 }, { "epoch": 0.7292904032510159, "grad_norm": 0.240234375, "learning_rate": 0.00018414829054024128, "loss": 1.6851, "step": 2333 }, { "epoch": 0.7296030009377931, "grad_norm": 0.21875, "learning_rate": 0.0001841350129226495, "loss": 1.3236, "step": 2334 }, { "epoch": 0.7299155986245702, "grad_norm": 0.2431640625, "learning_rate": 0.00018412173022571982, "loss": 1.9465, "step": 2335 }, { "epoch": 0.7302281963113473, "grad_norm": 0.2412109375, "learning_rate": 0.00018410844245025408, "loss": 1.7362, "step": 2336 }, { "epoch": 0.7305407939981244, "grad_norm": 0.228515625, "learning_rate": 0.00018409514959705448, "loss": 1.7688, "step": 2337 }, { "epoch": 0.7308533916849015, "grad_norm": 0.24609375, "learning_rate": 0.0001840818516669235, "loss": 1.658, "step": 2338 }, { "epoch": 0.7311659893716786, "grad_norm": 0.2373046875, "learning_rate": 0.00018406854866066403, "loss": 1.6786, "step": 2339 }, { "epoch": 0.7314785870584558, "grad_norm": 0.23828125, "learning_rate": 0.00018405524057907915, "loss": 1.6658, "step": 2340 }, { "epoch": 0.7317911847452329, "grad_norm": 0.2578125, "learning_rate": 0.0001840419274229723, "loss": 1.6022, "step": 2341 }, { "epoch": 0.73210378243201, "grad_norm": 0.2294921875, "learning_rate": 0.00018402860919314713, "loss": 1.7735, "step": 2342 }, { "epoch": 0.7324163801187871, "grad_norm": 0.2275390625, "learning_rate": 0.0001840152858904078, "loss": 1.4977, "step": 2343 }, { "epoch": 0.7327289778055642, "grad_norm": 0.236328125, "learning_rate": 0.00018400195751555858, "loss": 1.7735, "step": 2344 }, { "epoch": 0.7330415754923414, "grad_norm": 0.2421875, "learning_rate": 0.00018398862406940412, "loss": 1.5705, "step": 2345 }, { "epoch": 0.7333541731791184, "grad_norm": 0.228515625, "learning_rate": 0.00018397528555274943, "loss": 1.9914, "step": 2346 }, { "epoch": 0.7336667708658956, "grad_norm": 0.224609375, "learning_rate": 0.00018396194196639972, "loss": 1.6567, "step": 2347 }, { "epoch": 0.7339793685526728, "grad_norm": 0.2255859375, "learning_rate": 0.0001839485933111606, "loss": 1.5779, "step": 2348 }, { "epoch": 0.7342919662394498, "grad_norm": 0.2294921875, "learning_rate": 0.00018393523958783788, "loss": 1.6902, "step": 2349 }, { "epoch": 0.734604563926227, "grad_norm": 0.23046875, "learning_rate": 0.00018392188079723786, "loss": 1.8415, "step": 2350 }, { "epoch": 0.734917161613004, "grad_norm": 0.2421875, "learning_rate": 0.0001839085169401669, "loss": 1.7724, "step": 2351 }, { "epoch": 0.7352297592997812, "grad_norm": 0.23046875, "learning_rate": 0.00018389514801743186, "loss": 1.4619, "step": 2352 }, { "epoch": 0.7355423569865583, "grad_norm": 0.2392578125, "learning_rate": 0.00018388177402983984, "loss": 1.7035, "step": 2353 }, { "epoch": 0.7358549546733354, "grad_norm": 0.23828125, "learning_rate": 0.00018386839497819821, "loss": 1.6311, "step": 2354 }, { "epoch": 0.7361675523601126, "grad_norm": 0.2353515625, "learning_rate": 0.00018385501086331472, "loss": 1.4891, "step": 2355 }, { "epoch": 0.7364801500468896, "grad_norm": 0.23046875, "learning_rate": 0.00018384162168599735, "loss": 1.7706, "step": 2356 }, { "epoch": 0.7367927477336668, "grad_norm": 0.259765625, "learning_rate": 0.00018382822744705444, "loss": 1.7342, "step": 2357 }, { "epoch": 0.7371053454204439, "grad_norm": 0.2451171875, "learning_rate": 0.0001838148281472946, "loss": 1.7338, "step": 2358 }, { "epoch": 0.737417943107221, "grad_norm": 0.244140625, "learning_rate": 0.0001838014237875268, "loss": 1.6715, "step": 2359 }, { "epoch": 0.7377305407939981, "grad_norm": 0.2294921875, "learning_rate": 0.00018378801436856027, "loss": 1.8231, "step": 2360 }, { "epoch": 0.7380431384807753, "grad_norm": 0.25, "learning_rate": 0.00018377459989120452, "loss": 1.6681, "step": 2361 }, { "epoch": 0.7383557361675523, "grad_norm": 0.2265625, "learning_rate": 0.00018376118035626942, "loss": 1.6599, "step": 2362 }, { "epoch": 0.7386683338543295, "grad_norm": 0.265625, "learning_rate": 0.00018374775576456513, "loss": 1.8036, "step": 2363 }, { "epoch": 0.7389809315411066, "grad_norm": 0.232421875, "learning_rate": 0.00018373432611690208, "loss": 1.8082, "step": 2364 }, { "epoch": 0.7392935292278837, "grad_norm": 0.2216796875, "learning_rate": 0.0001837208914140911, "loss": 1.4781, "step": 2365 }, { "epoch": 0.7396061269146609, "grad_norm": 0.2236328125, "learning_rate": 0.00018370745165694318, "loss": 1.3993, "step": 2366 }, { "epoch": 0.7399187246014379, "grad_norm": 0.23046875, "learning_rate": 0.00018369400684626976, "loss": 1.5936, "step": 2367 }, { "epoch": 0.7402313222882151, "grad_norm": 0.2265625, "learning_rate": 0.00018368055698288248, "loss": 1.4418, "step": 2368 }, { "epoch": 0.7405439199749921, "grad_norm": 0.2412109375, "learning_rate": 0.00018366710206759335, "loss": 1.5162, "step": 2369 }, { "epoch": 0.7408565176617693, "grad_norm": 0.2353515625, "learning_rate": 0.00018365364210121466, "loss": 1.9776, "step": 2370 }, { "epoch": 0.7411691153485465, "grad_norm": 0.2265625, "learning_rate": 0.00018364017708455895, "loss": 1.3729, "step": 2371 }, { "epoch": 0.7414817130353235, "grad_norm": 0.2353515625, "learning_rate": 0.0001836267070184392, "loss": 1.6031, "step": 2372 }, { "epoch": 0.7417943107221007, "grad_norm": 0.251953125, "learning_rate": 0.0001836132319036686, "loss": 1.5944, "step": 2373 }, { "epoch": 0.7421069084088777, "grad_norm": 0.2353515625, "learning_rate": 0.0001835997517410606, "loss": 1.3653, "step": 2374 }, { "epoch": 0.7424195060956549, "grad_norm": 0.232421875, "learning_rate": 0.0001835862665314291, "loss": 1.7253, "step": 2375 }, { "epoch": 0.742732103782432, "grad_norm": 0.234375, "learning_rate": 0.00018357277627558815, "loss": 1.326, "step": 2376 }, { "epoch": 0.7430447014692091, "grad_norm": 0.2294921875, "learning_rate": 0.00018355928097435218, "loss": 1.8161, "step": 2377 }, { "epoch": 0.7433572991559863, "grad_norm": 0.2216796875, "learning_rate": 0.00018354578062853595, "loss": 1.8656, "step": 2378 }, { "epoch": 0.7436698968427634, "grad_norm": 0.23046875, "learning_rate": 0.0001835322752389545, "loss": 1.8657, "step": 2379 }, { "epoch": 0.7439824945295405, "grad_norm": 0.2275390625, "learning_rate": 0.0001835187648064231, "loss": 1.5715, "step": 2380 }, { "epoch": 0.7442950922163176, "grad_norm": 0.2392578125, "learning_rate": 0.0001835052493317575, "loss": 1.7185, "step": 2381 }, { "epoch": 0.7446076899030947, "grad_norm": 0.234375, "learning_rate": 0.00018349172881577356, "loss": 1.7779, "step": 2382 }, { "epoch": 0.7449202875898718, "grad_norm": 0.2275390625, "learning_rate": 0.00018347820325928754, "loss": 1.9479, "step": 2383 }, { "epoch": 0.745232885276649, "grad_norm": 0.234375, "learning_rate": 0.00018346467266311604, "loss": 1.7667, "step": 2384 }, { "epoch": 0.745545482963426, "grad_norm": 0.248046875, "learning_rate": 0.00018345113702807585, "loss": 1.4014, "step": 2385 }, { "epoch": 0.7458580806502032, "grad_norm": 0.240234375, "learning_rate": 0.00018343759635498422, "loss": 1.8576, "step": 2386 }, { "epoch": 0.7461706783369803, "grad_norm": 0.23828125, "learning_rate": 0.00018342405064465856, "loss": 1.6006, "step": 2387 }, { "epoch": 0.7464832760237574, "grad_norm": 0.2333984375, "learning_rate": 0.00018341049989791666, "loss": 1.5874, "step": 2388 }, { "epoch": 0.7467958737105346, "grad_norm": 0.251953125, "learning_rate": 0.00018339694411557655, "loss": 1.6729, "step": 2389 }, { "epoch": 0.7471084713973116, "grad_norm": 0.2333984375, "learning_rate": 0.00018338338329845668, "loss": 1.5282, "step": 2390 }, { "epoch": 0.7474210690840888, "grad_norm": 0.2373046875, "learning_rate": 0.00018336981744737573, "loss": 1.5829, "step": 2391 }, { "epoch": 0.747733666770866, "grad_norm": 0.2373046875, "learning_rate": 0.0001833562465631526, "loss": 1.6278, "step": 2392 }, { "epoch": 0.748046264457643, "grad_norm": 0.2412109375, "learning_rate": 0.00018334267064660668, "loss": 1.6944, "step": 2393 }, { "epoch": 0.7483588621444202, "grad_norm": 0.2353515625, "learning_rate": 0.00018332908969855753, "loss": 1.8641, "step": 2394 }, { "epoch": 0.7486714598311972, "grad_norm": 0.2421875, "learning_rate": 0.00018331550371982505, "loss": 1.6727, "step": 2395 }, { "epoch": 0.7489840575179744, "grad_norm": 0.2314453125, "learning_rate": 0.00018330191271122943, "loss": 1.6077, "step": 2396 }, { "epoch": 0.7492966552047515, "grad_norm": 0.244140625, "learning_rate": 0.0001832883166735912, "loss": 1.4713, "step": 2397 }, { "epoch": 0.7496092528915286, "grad_norm": 0.248046875, "learning_rate": 0.00018327471560773112, "loss": 1.9724, "step": 2398 }, { "epoch": 0.7499218505783057, "grad_norm": 0.2470703125, "learning_rate": 0.00018326110951447037, "loss": 1.852, "step": 2399 }, { "epoch": 0.7502344482650828, "grad_norm": 0.2353515625, "learning_rate": 0.00018324749839463035, "loss": 1.7013, "step": 2400 }, { "epoch": 0.75054704595186, "grad_norm": 0.234375, "learning_rate": 0.00018323388224903274, "loss": 2.0012, "step": 2401 }, { "epoch": 0.7508596436386371, "grad_norm": 0.2421875, "learning_rate": 0.0001832202610784996, "loss": 1.7133, "step": 2402 }, { "epoch": 0.7511722413254142, "grad_norm": 0.2392578125, "learning_rate": 0.00018320663488385327, "loss": 1.7841, "step": 2403 }, { "epoch": 0.7514848390121913, "grad_norm": 0.2275390625, "learning_rate": 0.00018319300366591637, "loss": 1.8134, "step": 2404 }, { "epoch": 0.7517974366989685, "grad_norm": 0.23046875, "learning_rate": 0.00018317936742551178, "loss": 1.5865, "step": 2405 }, { "epoch": 0.7521100343857455, "grad_norm": 0.2333984375, "learning_rate": 0.0001831657261634628, "loss": 1.6447, "step": 2406 }, { "epoch": 0.7524226320725227, "grad_norm": 0.2451171875, "learning_rate": 0.00018315207988059298, "loss": 1.4747, "step": 2407 }, { "epoch": 0.7527352297592997, "grad_norm": 0.2451171875, "learning_rate": 0.0001831384285777261, "loss": 1.7538, "step": 2408 }, { "epoch": 0.7530478274460769, "grad_norm": 0.2275390625, "learning_rate": 0.00018312477225568635, "loss": 1.6004, "step": 2409 }, { "epoch": 0.7533604251328541, "grad_norm": 0.25, "learning_rate": 0.00018311111091529818, "loss": 1.6864, "step": 2410 }, { "epoch": 0.7536730228196311, "grad_norm": 0.2412109375, "learning_rate": 0.00018309744455738633, "loss": 1.8215, "step": 2411 }, { "epoch": 0.7539856205064083, "grad_norm": 0.236328125, "learning_rate": 0.00018308377318277587, "loss": 1.672, "step": 2412 }, { "epoch": 0.7542982181931853, "grad_norm": 0.2451171875, "learning_rate": 0.0001830700967922921, "loss": 1.7247, "step": 2413 }, { "epoch": 0.7546108158799625, "grad_norm": 0.24609375, "learning_rate": 0.00018305641538676079, "loss": 1.6188, "step": 2414 }, { "epoch": 0.7549234135667396, "grad_norm": 0.251953125, "learning_rate": 0.00018304272896700784, "loss": 1.8593, "step": 2415 }, { "epoch": 0.7552360112535167, "grad_norm": 0.2490234375, "learning_rate": 0.0001830290375338595, "loss": 1.5332, "step": 2416 }, { "epoch": 0.7555486089402939, "grad_norm": 0.224609375, "learning_rate": 0.00018301534108814234, "loss": 1.5756, "step": 2417 }, { "epoch": 0.755861206627071, "grad_norm": 0.244140625, "learning_rate": 0.0001830016396306833, "loss": 2.0714, "step": 2418 }, { "epoch": 0.7561738043138481, "grad_norm": 0.2451171875, "learning_rate": 0.00018298793316230948, "loss": 1.64, "step": 2419 }, { "epoch": 0.7564864020006252, "grad_norm": 0.220703125, "learning_rate": 0.00018297422168384836, "loss": 1.5317, "step": 2420 }, { "epoch": 0.7567989996874023, "grad_norm": 0.2451171875, "learning_rate": 0.00018296050519612777, "loss": 1.8879, "step": 2421 }, { "epoch": 0.7571115973741794, "grad_norm": 0.2421875, "learning_rate": 0.00018294678369997578, "loss": 1.7005, "step": 2422 }, { "epoch": 0.7574241950609566, "grad_norm": 0.376953125, "learning_rate": 0.00018293305719622072, "loss": 2.244, "step": 2423 }, { "epoch": 0.7577367927477336, "grad_norm": 0.2431640625, "learning_rate": 0.00018291932568569134, "loss": 1.5323, "step": 2424 }, { "epoch": 0.7580493904345108, "grad_norm": 0.2451171875, "learning_rate": 0.00018290558916921659, "loss": 1.6395, "step": 2425 }, { "epoch": 0.7583619881212879, "grad_norm": 0.25, "learning_rate": 0.00018289184764762575, "loss": 1.648, "step": 2426 }, { "epoch": 0.758674585808065, "grad_norm": 0.234375, "learning_rate": 0.0001828781011217485, "loss": 1.5622, "step": 2427 }, { "epoch": 0.7589871834948422, "grad_norm": 0.2275390625, "learning_rate": 0.00018286434959241462, "loss": 1.5481, "step": 2428 }, { "epoch": 0.7592997811816192, "grad_norm": 0.2294921875, "learning_rate": 0.00018285059306045437, "loss": 1.77, "step": 2429 }, { "epoch": 0.7596123788683964, "grad_norm": 0.2314453125, "learning_rate": 0.00018283683152669824, "loss": 1.4071, "step": 2430 }, { "epoch": 0.7599249765551734, "grad_norm": 0.2490234375, "learning_rate": 0.00018282306499197703, "loss": 2.0644, "step": 2431 }, { "epoch": 0.7602375742419506, "grad_norm": 0.2421875, "learning_rate": 0.00018280929345712186, "loss": 1.7075, "step": 2432 }, { "epoch": 0.7605501719287278, "grad_norm": 0.2353515625, "learning_rate": 0.0001827955169229641, "loss": 1.3107, "step": 2433 }, { "epoch": 0.7608627696155048, "grad_norm": 0.23046875, "learning_rate": 0.00018278173539033548, "loss": 1.7646, "step": 2434 }, { "epoch": 0.761175367302282, "grad_norm": 0.23828125, "learning_rate": 0.00018276794886006804, "loss": 2.0252, "step": 2435 }, { "epoch": 0.7614879649890591, "grad_norm": 0.2294921875, "learning_rate": 0.00018275415733299402, "loss": 1.5208, "step": 2436 }, { "epoch": 0.7618005626758362, "grad_norm": 0.2412109375, "learning_rate": 0.00018274036080994605, "loss": 1.8906, "step": 2437 }, { "epoch": 0.7621131603626133, "grad_norm": 0.2275390625, "learning_rate": 0.00018272655929175708, "loss": 1.8472, "step": 2438 }, { "epoch": 0.7624257580493904, "grad_norm": 0.2392578125, "learning_rate": 0.0001827127527792603, "loss": 1.7364, "step": 2439 }, { "epoch": 0.7627383557361676, "grad_norm": 0.2373046875, "learning_rate": 0.00018269894127328926, "loss": 1.8149, "step": 2440 }, { "epoch": 0.7630509534229447, "grad_norm": 0.232421875, "learning_rate": 0.00018268512477467774, "loss": 1.8335, "step": 2441 }, { "epoch": 0.7633635511097218, "grad_norm": 0.2353515625, "learning_rate": 0.00018267130328425985, "loss": 1.7762, "step": 2442 }, { "epoch": 0.7636761487964989, "grad_norm": 0.2265625, "learning_rate": 0.00018265747680287008, "loss": 1.5251, "step": 2443 }, { "epoch": 0.763988746483276, "grad_norm": 0.2333984375, "learning_rate": 0.00018264364533134304, "loss": 1.5232, "step": 2444 }, { "epoch": 0.7643013441700531, "grad_norm": 0.2333984375, "learning_rate": 0.00018262980887051385, "loss": 1.5101, "step": 2445 }, { "epoch": 0.7646139418568303, "grad_norm": 0.2275390625, "learning_rate": 0.00018261596742121777, "loss": 1.6831, "step": 2446 }, { "epoch": 0.7649265395436073, "grad_norm": 0.24609375, "learning_rate": 0.00018260212098429054, "loss": 1.8748, "step": 2447 }, { "epoch": 0.7652391372303845, "grad_norm": 0.2451171875, "learning_rate": 0.00018258826956056793, "loss": 1.7539, "step": 2448 }, { "epoch": 0.7655517349171617, "grad_norm": 0.2412109375, "learning_rate": 0.00018257441315088627, "loss": 1.5779, "step": 2449 }, { "epoch": 0.7658643326039387, "grad_norm": 0.2421875, "learning_rate": 0.00018256055175608205, "loss": 1.7147, "step": 2450 }, { "epoch": 0.7661769302907159, "grad_norm": 0.2265625, "learning_rate": 0.00018254668537699212, "loss": 1.682, "step": 2451 }, { "epoch": 0.7664895279774929, "grad_norm": 0.2412109375, "learning_rate": 0.0001825328140144536, "loss": 1.8002, "step": 2452 }, { "epoch": 0.7668021256642701, "grad_norm": 0.234375, "learning_rate": 0.0001825189376693039, "loss": 1.3419, "step": 2453 }, { "epoch": 0.7671147233510472, "grad_norm": 0.24609375, "learning_rate": 0.0001825050563423808, "loss": 1.7038, "step": 2454 }, { "epoch": 0.7674273210378243, "grad_norm": 0.232421875, "learning_rate": 0.00018249117003452234, "loss": 1.6925, "step": 2455 }, { "epoch": 0.7677399187246015, "grad_norm": 0.2333984375, "learning_rate": 0.00018247727874656683, "loss": 1.7601, "step": 2456 }, { "epoch": 0.7680525164113785, "grad_norm": 0.23828125, "learning_rate": 0.00018246338247935285, "loss": 1.6095, "step": 2457 }, { "epoch": 0.7683651140981557, "grad_norm": 0.2431640625, "learning_rate": 0.0001824494812337194, "loss": 1.4805, "step": 2458 }, { "epoch": 0.7686777117849328, "grad_norm": 0.23046875, "learning_rate": 0.00018243557501050573, "loss": 1.6642, "step": 2459 }, { "epoch": 0.7689903094717099, "grad_norm": 0.2431640625, "learning_rate": 0.00018242166381055133, "loss": 1.4541, "step": 2460 }, { "epoch": 0.769302907158487, "grad_norm": 0.2470703125, "learning_rate": 0.00018240774763469606, "loss": 1.5884, "step": 2461 }, { "epoch": 0.7696155048452642, "grad_norm": 0.224609375, "learning_rate": 0.00018239382648378006, "loss": 1.6074, "step": 2462 }, { "epoch": 0.7699281025320412, "grad_norm": 0.232421875, "learning_rate": 0.00018237990035864372, "loss": 1.7759, "step": 2463 }, { "epoch": 0.7702407002188184, "grad_norm": 0.2236328125, "learning_rate": 0.00018236596926012787, "loss": 1.6379, "step": 2464 }, { "epoch": 0.7705532979055955, "grad_norm": 0.248046875, "learning_rate": 0.00018235203318907347, "loss": 1.7159, "step": 2465 }, { "epoch": 0.7708658955923726, "grad_norm": 0.251953125, "learning_rate": 0.00018233809214632184, "loss": 1.6911, "step": 2466 }, { "epoch": 0.7711784932791498, "grad_norm": 0.2392578125, "learning_rate": 0.00018232414613271475, "loss": 1.422, "step": 2467 }, { "epoch": 0.7714910909659268, "grad_norm": 0.2314453125, "learning_rate": 0.00018231019514909397, "loss": 1.551, "step": 2468 }, { "epoch": 0.771803688652704, "grad_norm": 0.2412109375, "learning_rate": 0.00018229623919630188, "loss": 1.8121, "step": 2469 }, { "epoch": 0.772116286339481, "grad_norm": 0.2265625, "learning_rate": 0.00018228227827518095, "loss": 1.9086, "step": 2470 }, { "epoch": 0.7724288840262582, "grad_norm": 0.330078125, "learning_rate": 0.000182268312386574, "loss": 2.5265, "step": 2471 }, { "epoch": 0.7727414817130354, "grad_norm": 0.234375, "learning_rate": 0.0001822543415313242, "loss": 1.8133, "step": 2472 }, { "epoch": 0.7730540793998124, "grad_norm": 0.255859375, "learning_rate": 0.00018224036571027501, "loss": 1.9486, "step": 2473 }, { "epoch": 0.7733666770865896, "grad_norm": 0.240234375, "learning_rate": 0.0001822263849242701, "loss": 1.7464, "step": 2474 }, { "epoch": 0.7736792747733667, "grad_norm": 0.2431640625, "learning_rate": 0.0001822123991741536, "loss": 1.743, "step": 2475 }, { "epoch": 0.7739918724601438, "grad_norm": 0.2353515625, "learning_rate": 0.00018219840846076977, "loss": 1.4856, "step": 2476 }, { "epoch": 0.7743044701469209, "grad_norm": 0.2451171875, "learning_rate": 0.00018218441278496328, "loss": 1.7813, "step": 2477 }, { "epoch": 0.774617067833698, "grad_norm": 0.234375, "learning_rate": 0.00018217041214757903, "loss": 1.7274, "step": 2478 }, { "epoch": 0.7749296655204752, "grad_norm": 0.2333984375, "learning_rate": 0.00018215640654946233, "loss": 1.5569, "step": 2479 }, { "epoch": 0.7752422632072523, "grad_norm": 0.2353515625, "learning_rate": 0.00018214239599145866, "loss": 1.5575, "step": 2480 }, { "epoch": 0.7755548608940294, "grad_norm": 0.2294921875, "learning_rate": 0.00018212838047441387, "loss": 1.5972, "step": 2481 }, { "epoch": 0.7758674585808065, "grad_norm": 0.2275390625, "learning_rate": 0.0001821143599991741, "loss": 1.668, "step": 2482 }, { "epoch": 0.7761800562675836, "grad_norm": 0.2373046875, "learning_rate": 0.00018210033456658576, "loss": 1.646, "step": 2483 }, { "epoch": 0.7764926539543607, "grad_norm": 0.26171875, "learning_rate": 0.00018208630417749561, "loss": 2.3322, "step": 2484 }, { "epoch": 0.7768052516411379, "grad_norm": 0.2353515625, "learning_rate": 0.00018207226883275069, "loss": 1.5657, "step": 2485 }, { "epoch": 0.777117849327915, "grad_norm": 0.2275390625, "learning_rate": 0.0001820582285331983, "loss": 1.4964, "step": 2486 }, { "epoch": 0.7774304470146921, "grad_norm": 0.2333984375, "learning_rate": 0.00018204418327968607, "loss": 1.5711, "step": 2487 }, { "epoch": 0.7777430447014692, "grad_norm": 0.26171875, "learning_rate": 0.00018203013307306195, "loss": 1.999, "step": 2488 }, { "epoch": 0.7780556423882463, "grad_norm": 0.244140625, "learning_rate": 0.00018201607791417418, "loss": 1.5581, "step": 2489 }, { "epoch": 0.7783682400750235, "grad_norm": 0.23046875, "learning_rate": 0.00018200201780387126, "loss": 1.5618, "step": 2490 }, { "epoch": 0.7786808377618005, "grad_norm": 0.251953125, "learning_rate": 0.00018198795274300205, "loss": 1.6855, "step": 2491 }, { "epoch": 0.7789934354485777, "grad_norm": 0.23046875, "learning_rate": 0.00018197388273241563, "loss": 1.4388, "step": 2492 }, { "epoch": 0.7793060331353548, "grad_norm": 0.23046875, "learning_rate": 0.00018195980777296146, "loss": 1.3961, "step": 2493 }, { "epoch": 0.7796186308221319, "grad_norm": 0.2373046875, "learning_rate": 0.00018194572786548924, "loss": 1.3543, "step": 2494 }, { "epoch": 0.7799312285089091, "grad_norm": 0.2451171875, "learning_rate": 0.00018193164301084905, "loss": 1.6291, "step": 2495 }, { "epoch": 0.7802438261956861, "grad_norm": 0.244140625, "learning_rate": 0.00018191755320989112, "loss": 1.8612, "step": 2496 }, { "epoch": 0.7805564238824633, "grad_norm": 0.2431640625, "learning_rate": 0.00018190345846346613, "loss": 1.507, "step": 2497 }, { "epoch": 0.7808690215692404, "grad_norm": 0.2392578125, "learning_rate": 0.00018188935877242496, "loss": 1.4034, "step": 2498 }, { "epoch": 0.7811816192560175, "grad_norm": 0.2431640625, "learning_rate": 0.00018187525413761887, "loss": 1.3682, "step": 2499 }, { "epoch": 0.7814942169427946, "grad_norm": 0.25, "learning_rate": 0.00018186114455989936, "loss": 1.3907, "step": 2500 }, { "epoch": 0.7818068146295717, "grad_norm": 0.251953125, "learning_rate": 0.00018184703004011822, "loss": 1.506, "step": 2501 }, { "epoch": 0.7821194123163488, "grad_norm": 0.2294921875, "learning_rate": 0.00018183291057912758, "loss": 1.6376, "step": 2502 }, { "epoch": 0.782432010003126, "grad_norm": 0.23046875, "learning_rate": 0.00018181878617777985, "loss": 1.6524, "step": 2503 }, { "epoch": 0.7827446076899031, "grad_norm": 0.251953125, "learning_rate": 0.00018180465683692774, "loss": 1.6575, "step": 2504 }, { "epoch": 0.7830572053766802, "grad_norm": 0.2353515625, "learning_rate": 0.00018179052255742423, "loss": 1.6608, "step": 2505 }, { "epoch": 0.7833698030634574, "grad_norm": 0.2255859375, "learning_rate": 0.00018177638334012267, "loss": 1.7274, "step": 2506 }, { "epoch": 0.7836824007502344, "grad_norm": 0.2451171875, "learning_rate": 0.00018176223918587664, "loss": 1.7459, "step": 2507 }, { "epoch": 0.7839949984370116, "grad_norm": 0.232421875, "learning_rate": 0.00018174809009554005, "loss": 1.366, "step": 2508 }, { "epoch": 0.7843075961237886, "grad_norm": 0.234375, "learning_rate": 0.00018173393606996707, "loss": 1.7907, "step": 2509 }, { "epoch": 0.7846201938105658, "grad_norm": 0.240234375, "learning_rate": 0.0001817197771100122, "loss": 1.7705, "step": 2510 }, { "epoch": 0.784932791497343, "grad_norm": 0.248046875, "learning_rate": 0.00018170561321653026, "loss": 1.4995, "step": 2511 }, { "epoch": 0.78524538918412, "grad_norm": 0.24609375, "learning_rate": 0.00018169144439037632, "loss": 1.6226, "step": 2512 }, { "epoch": 0.7855579868708972, "grad_norm": 0.240234375, "learning_rate": 0.00018167727063240582, "loss": 1.619, "step": 2513 }, { "epoch": 0.7858705845576742, "grad_norm": 0.2275390625, "learning_rate": 0.00018166309194347438, "loss": 1.9021, "step": 2514 }, { "epoch": 0.7861831822444514, "grad_norm": 0.2275390625, "learning_rate": 0.000181648908324438, "loss": 1.9489, "step": 2515 }, { "epoch": 0.7864957799312285, "grad_norm": 0.2421875, "learning_rate": 0.00018163471977615303, "loss": 1.5399, "step": 2516 }, { "epoch": 0.7868083776180056, "grad_norm": 0.236328125, "learning_rate": 0.000181620526299476, "loss": 1.5515, "step": 2517 }, { "epoch": 0.7871209753047828, "grad_norm": 0.240234375, "learning_rate": 0.00018160632789526374, "loss": 1.4493, "step": 2518 }, { "epoch": 0.7874335729915599, "grad_norm": 0.2392578125, "learning_rate": 0.00018159212456437347, "loss": 1.6494, "step": 2519 }, { "epoch": 0.787746170678337, "grad_norm": 0.23828125, "learning_rate": 0.0001815779163076627, "loss": 1.7547, "step": 2520 }, { "epoch": 0.7880587683651141, "grad_norm": 0.2294921875, "learning_rate": 0.00018156370312598914, "loss": 1.7275, "step": 2521 }, { "epoch": 0.7883713660518912, "grad_norm": 0.2294921875, "learning_rate": 0.0001815494850202109, "loss": 1.3418, "step": 2522 }, { "epoch": 0.7886839637386683, "grad_norm": 0.2431640625, "learning_rate": 0.00018153526199118634, "loss": 1.5102, "step": 2523 }, { "epoch": 0.7889965614254455, "grad_norm": 0.248046875, "learning_rate": 0.0001815210340397741, "loss": 2.0452, "step": 2524 }, { "epoch": 0.7893091591122225, "grad_norm": 0.25, "learning_rate": 0.00018150680116683313, "loss": 1.5017, "step": 2525 }, { "epoch": 0.7896217567989997, "grad_norm": 0.2373046875, "learning_rate": 0.00018149256337322275, "loss": 2.0215, "step": 2526 }, { "epoch": 0.7899343544857768, "grad_norm": 0.2392578125, "learning_rate": 0.00018147832065980245, "loss": 1.7694, "step": 2527 }, { "epoch": 0.7902469521725539, "grad_norm": 0.240234375, "learning_rate": 0.00018146407302743208, "loss": 1.6186, "step": 2528 }, { "epoch": 0.7905595498593311, "grad_norm": 0.232421875, "learning_rate": 0.00018144982047697185, "loss": 1.7227, "step": 2529 }, { "epoch": 0.7908721475461081, "grad_norm": 0.2470703125, "learning_rate": 0.00018143556300928215, "loss": 1.6313, "step": 2530 }, { "epoch": 0.7911847452328853, "grad_norm": 0.232421875, "learning_rate": 0.00018142130062522377, "loss": 1.4294, "step": 2531 }, { "epoch": 0.7914973429196624, "grad_norm": 0.23828125, "learning_rate": 0.00018140703332565768, "loss": 1.5747, "step": 2532 }, { "epoch": 0.7918099406064395, "grad_norm": 0.25390625, "learning_rate": 0.00018139276111144525, "loss": 1.6087, "step": 2533 }, { "epoch": 0.7921225382932167, "grad_norm": 0.248046875, "learning_rate": 0.0001813784839834481, "loss": 1.6986, "step": 2534 }, { "epoch": 0.7924351359799937, "grad_norm": 0.251953125, "learning_rate": 0.00018136420194252818, "loss": 1.5952, "step": 2535 }, { "epoch": 0.7927477336667709, "grad_norm": 0.2392578125, "learning_rate": 0.00018134991498954773, "loss": 1.7808, "step": 2536 }, { "epoch": 0.793060331353548, "grad_norm": 0.2333984375, "learning_rate": 0.0001813356231253692, "loss": 1.518, "step": 2537 }, { "epoch": 0.7933729290403251, "grad_norm": 0.2421875, "learning_rate": 0.0001813213263508555, "loss": 1.82, "step": 2538 }, { "epoch": 0.7936855267271022, "grad_norm": 0.2353515625, "learning_rate": 0.0001813070246668697, "loss": 1.5595, "step": 2539 }, { "epoch": 0.7939981244138793, "grad_norm": 0.2333984375, "learning_rate": 0.00018129271807427517, "loss": 1.8371, "step": 2540 }, { "epoch": 0.7943107221006565, "grad_norm": 0.2265625, "learning_rate": 0.0001812784065739357, "loss": 1.5297, "step": 2541 }, { "epoch": 0.7946233197874336, "grad_norm": 0.236328125, "learning_rate": 0.0001812640901667152, "loss": 1.6262, "step": 2542 }, { "epoch": 0.7949359174742107, "grad_norm": 0.2373046875, "learning_rate": 0.00018124976885347806, "loss": 1.7128, "step": 2543 }, { "epoch": 0.7952485151609878, "grad_norm": 0.2314453125, "learning_rate": 0.00018123544263508884, "loss": 1.9219, "step": 2544 }, { "epoch": 0.7955611128477649, "grad_norm": 0.228515625, "learning_rate": 0.00018122111151241241, "loss": 1.5844, "step": 2545 }, { "epoch": 0.795873710534542, "grad_norm": 0.2333984375, "learning_rate": 0.000181206775486314, "loss": 1.806, "step": 2546 }, { "epoch": 0.7961863082213192, "grad_norm": 0.2470703125, "learning_rate": 0.00018119243455765903, "loss": 1.648, "step": 2547 }, { "epoch": 0.7964989059080962, "grad_norm": 0.251953125, "learning_rate": 0.00018117808872731336, "loss": 1.5256, "step": 2548 }, { "epoch": 0.7968115035948734, "grad_norm": 0.2421875, "learning_rate": 0.000181163737996143, "loss": 1.491, "step": 2549 }, { "epoch": 0.7971241012816506, "grad_norm": 0.2294921875, "learning_rate": 0.00018114938236501438, "loss": 1.8205, "step": 2550 }, { "epoch": 0.7974366989684276, "grad_norm": 0.234375, "learning_rate": 0.0001811350218347941, "loss": 1.6017, "step": 2551 }, { "epoch": 0.7977492966552048, "grad_norm": 0.240234375, "learning_rate": 0.0001811206564063492, "loss": 1.4423, "step": 2552 }, { "epoch": 0.7980618943419818, "grad_norm": 0.255859375, "learning_rate": 0.00018110628608054686, "loss": 1.8525, "step": 2553 }, { "epoch": 0.798374492028759, "grad_norm": 0.2373046875, "learning_rate": 0.0001810919108582547, "loss": 1.7098, "step": 2554 }, { "epoch": 0.7986870897155361, "grad_norm": 0.2275390625, "learning_rate": 0.00018107753074034054, "loss": 1.7347, "step": 2555 }, { "epoch": 0.7989996874023132, "grad_norm": 0.244140625, "learning_rate": 0.00018106314572767252, "loss": 1.6353, "step": 2556 }, { "epoch": 0.7993122850890904, "grad_norm": 0.244140625, "learning_rate": 0.00018104875582111913, "loss": 1.7014, "step": 2557 }, { "epoch": 0.7996248827758674, "grad_norm": 0.23046875, "learning_rate": 0.00018103436102154903, "loss": 1.5313, "step": 2558 }, { "epoch": 0.7999374804626446, "grad_norm": 0.24609375, "learning_rate": 0.0001810199613298313, "loss": 1.671, "step": 2559 }, { "epoch": 0.8002500781494217, "grad_norm": 0.240234375, "learning_rate": 0.00018100555674683527, "loss": 1.5859, "step": 2560 }, { "epoch": 0.8005626758361988, "grad_norm": 0.232421875, "learning_rate": 0.00018099114727343057, "loss": 1.4992, "step": 2561 }, { "epoch": 0.8008752735229759, "grad_norm": 0.232421875, "learning_rate": 0.00018097673291048706, "loss": 1.6654, "step": 2562 }, { "epoch": 0.8011878712097531, "grad_norm": 0.236328125, "learning_rate": 0.000180962313658875, "loss": 1.6192, "step": 2563 }, { "epoch": 0.8015004688965301, "grad_norm": 0.2333984375, "learning_rate": 0.0001809478895194649, "loss": 1.7311, "step": 2564 }, { "epoch": 0.8018130665833073, "grad_norm": 0.2373046875, "learning_rate": 0.00018093346049312758, "loss": 1.5685, "step": 2565 }, { "epoch": 0.8021256642700844, "grad_norm": 0.2412109375, "learning_rate": 0.0001809190265807341, "loss": 1.9562, "step": 2566 }, { "epoch": 0.8024382619568615, "grad_norm": 0.251953125, "learning_rate": 0.00018090458778315588, "loss": 1.662, "step": 2567 }, { "epoch": 0.8027508596436387, "grad_norm": 0.251953125, "learning_rate": 0.00018089014410126457, "loss": 1.611, "step": 2568 }, { "epoch": 0.8030634573304157, "grad_norm": 0.2265625, "learning_rate": 0.0001808756955359322, "loss": 1.7113, "step": 2569 }, { "epoch": 0.8033760550171929, "grad_norm": 0.234375, "learning_rate": 0.00018086124208803103, "loss": 1.3589, "step": 2570 }, { "epoch": 0.8036886527039699, "grad_norm": 0.23828125, "learning_rate": 0.00018084678375843364, "loss": 1.819, "step": 2571 }, { "epoch": 0.8040012503907471, "grad_norm": 0.2412109375, "learning_rate": 0.00018083232054801288, "loss": 1.6764, "step": 2572 }, { "epoch": 0.8043138480775243, "grad_norm": 0.2451171875, "learning_rate": 0.0001808178524576419, "loss": 1.5922, "step": 2573 }, { "epoch": 0.8046264457643013, "grad_norm": 0.251953125, "learning_rate": 0.0001808033794881942, "loss": 1.5336, "step": 2574 }, { "epoch": 0.8049390434510785, "grad_norm": 0.2275390625, "learning_rate": 0.0001807889016405435, "loss": 1.443, "step": 2575 }, { "epoch": 0.8052516411378556, "grad_norm": 0.2431640625, "learning_rate": 0.0001807744189155639, "loss": 1.7123, "step": 2576 }, { "epoch": 0.8055642388246327, "grad_norm": 0.24609375, "learning_rate": 0.00018075993131412966, "loss": 1.9127, "step": 2577 }, { "epoch": 0.8058768365114098, "grad_norm": 0.2412109375, "learning_rate": 0.00018074543883711547, "loss": 1.7716, "step": 2578 }, { "epoch": 0.8061894341981869, "grad_norm": 0.2470703125, "learning_rate": 0.00018073094148539625, "loss": 1.7905, "step": 2579 }, { "epoch": 0.806502031884964, "grad_norm": 0.236328125, "learning_rate": 0.00018071643925984717, "loss": 1.5217, "step": 2580 }, { "epoch": 0.8068146295717412, "grad_norm": 0.2412109375, "learning_rate": 0.00018070193216134384, "loss": 1.6451, "step": 2581 }, { "epoch": 0.8071272272585183, "grad_norm": 0.2353515625, "learning_rate": 0.00018068742019076203, "loss": 1.7439, "step": 2582 }, { "epoch": 0.8074398249452954, "grad_norm": 0.25, "learning_rate": 0.0001806729033489778, "loss": 2.0439, "step": 2583 }, { "epoch": 0.8077524226320725, "grad_norm": 0.263671875, "learning_rate": 0.0001806583816368676, "loss": 1.7726, "step": 2584 }, { "epoch": 0.8080650203188496, "grad_norm": 0.2333984375, "learning_rate": 0.00018064385505530813, "loss": 1.8142, "step": 2585 }, { "epoch": 0.8083776180056268, "grad_norm": 0.234375, "learning_rate": 0.00018062932360517637, "loss": 1.8507, "step": 2586 }, { "epoch": 0.8086902156924038, "grad_norm": 0.2412109375, "learning_rate": 0.0001806147872873496, "loss": 1.8861, "step": 2587 }, { "epoch": 0.809002813379181, "grad_norm": 0.24609375, "learning_rate": 0.00018060024610270538, "loss": 2.04, "step": 2588 }, { "epoch": 0.8093154110659581, "grad_norm": 0.23828125, "learning_rate": 0.0001805857000521216, "loss": 1.5433, "step": 2589 }, { "epoch": 0.8096280087527352, "grad_norm": 0.23828125, "learning_rate": 0.00018057114913647642, "loss": 1.5803, "step": 2590 }, { "epoch": 0.8099406064395124, "grad_norm": 0.244140625, "learning_rate": 0.0001805565933566483, "loss": 1.7928, "step": 2591 }, { "epoch": 0.8102532041262894, "grad_norm": 0.25390625, "learning_rate": 0.00018054203271351599, "loss": 1.8568, "step": 2592 }, { "epoch": 0.8105658018130666, "grad_norm": 0.2353515625, "learning_rate": 0.00018052746720795848, "loss": 1.5727, "step": 2593 }, { "epoch": 0.8108783994998437, "grad_norm": 0.251953125, "learning_rate": 0.00018051289684085518, "loss": 1.543, "step": 2594 }, { "epoch": 0.8111909971866208, "grad_norm": 0.2421875, "learning_rate": 0.00018049832161308574, "loss": 1.5196, "step": 2595 }, { "epoch": 0.811503594873398, "grad_norm": 0.2412109375, "learning_rate": 0.00018048374152553, "loss": 1.592, "step": 2596 }, { "epoch": 0.811816192560175, "grad_norm": 0.2421875, "learning_rate": 0.00018046915657906826, "loss": 1.6238, "step": 2597 }, { "epoch": 0.8121287902469522, "grad_norm": 0.2421875, "learning_rate": 0.00018045456677458094, "loss": 1.6494, "step": 2598 }, { "epoch": 0.8124413879337293, "grad_norm": 0.2373046875, "learning_rate": 0.00018043997211294896, "loss": 1.7159, "step": 2599 }, { "epoch": 0.8127539856205064, "grad_norm": 0.244140625, "learning_rate": 0.00018042537259505332, "loss": 1.7333, "step": 2600 }, { "epoch": 0.8130665833072835, "grad_norm": 0.2275390625, "learning_rate": 0.00018041076822177546, "loss": 1.7428, "step": 2601 }, { "epoch": 0.8133791809940606, "grad_norm": 0.244140625, "learning_rate": 0.00018039615899399704, "loss": 1.5266, "step": 2602 }, { "epoch": 0.8136917786808378, "grad_norm": 0.2451171875, "learning_rate": 0.00018038154491260006, "loss": 1.4482, "step": 2603 }, { "epoch": 0.8140043763676149, "grad_norm": 0.25, "learning_rate": 0.0001803669259784668, "loss": 1.8164, "step": 2604 }, { "epoch": 0.814316974054392, "grad_norm": 0.24609375, "learning_rate": 0.00018035230219247978, "loss": 1.7801, "step": 2605 }, { "epoch": 0.8146295717411691, "grad_norm": 0.224609375, "learning_rate": 0.0001803376735555219, "loss": 1.5818, "step": 2606 }, { "epoch": 0.8149421694279463, "grad_norm": 0.236328125, "learning_rate": 0.0001803230400684763, "loss": 2.0025, "step": 2607 }, { "epoch": 0.8152547671147233, "grad_norm": 0.240234375, "learning_rate": 0.0001803084017322264, "loss": 1.6328, "step": 2608 }, { "epoch": 0.8155673648015005, "grad_norm": 0.30859375, "learning_rate": 0.00018029375854765597, "loss": 2.289, "step": 2609 }, { "epoch": 0.8158799624882775, "grad_norm": 0.2333984375, "learning_rate": 0.00018027911051564897, "loss": 1.4681, "step": 2610 }, { "epoch": 0.8161925601750547, "grad_norm": 0.2373046875, "learning_rate": 0.0001802644576370898, "loss": 1.7437, "step": 2611 }, { "epoch": 0.8165051578618319, "grad_norm": 0.228515625, "learning_rate": 0.00018024979991286303, "loss": 2.0136, "step": 2612 }, { "epoch": 0.8168177555486089, "grad_norm": 0.240234375, "learning_rate": 0.0001802351373438536, "loss": 1.6401, "step": 2613 }, { "epoch": 0.8171303532353861, "grad_norm": 0.2373046875, "learning_rate": 0.00018022046993094665, "loss": 1.5986, "step": 2614 }, { "epoch": 0.8174429509221631, "grad_norm": 0.228515625, "learning_rate": 0.00018020579767502774, "loss": 1.7392, "step": 2615 }, { "epoch": 0.8177555486089403, "grad_norm": 0.2333984375, "learning_rate": 0.0001801911205769826, "loss": 1.6622, "step": 2616 }, { "epoch": 0.8180681462957174, "grad_norm": 0.232421875, "learning_rate": 0.0001801764386376973, "loss": 1.6786, "step": 2617 }, { "epoch": 0.8183807439824945, "grad_norm": 0.2490234375, "learning_rate": 0.0001801617518580583, "loss": 1.6723, "step": 2618 }, { "epoch": 0.8186933416692717, "grad_norm": 0.232421875, "learning_rate": 0.0001801470602389521, "loss": 1.6344, "step": 2619 }, { "epoch": 0.8190059393560488, "grad_norm": 0.2431640625, "learning_rate": 0.0001801323637812658, "loss": 1.8773, "step": 2620 }, { "epoch": 0.8193185370428259, "grad_norm": 0.2412109375, "learning_rate": 0.00018011766248588655, "loss": 1.7633, "step": 2621 }, { "epoch": 0.819631134729603, "grad_norm": 0.24609375, "learning_rate": 0.00018010295635370192, "loss": 1.7818, "step": 2622 }, { "epoch": 0.8199437324163801, "grad_norm": 0.2314453125, "learning_rate": 0.00018008824538559977, "loss": 1.5338, "step": 2623 }, { "epoch": 0.8202563301031572, "grad_norm": 0.2890625, "learning_rate": 0.00018007352958246818, "loss": 2.1521, "step": 2624 }, { "epoch": 0.8205689277899344, "grad_norm": 0.2421875, "learning_rate": 0.00018005880894519555, "loss": 1.6819, "step": 2625 }, { "epoch": 0.8208815254767114, "grad_norm": 0.2314453125, "learning_rate": 0.00018004408347467062, "loss": 1.7966, "step": 2626 }, { "epoch": 0.8211941231634886, "grad_norm": 0.2275390625, "learning_rate": 0.00018002935317178235, "loss": 1.5681, "step": 2627 }, { "epoch": 0.8215067208502657, "grad_norm": 0.2490234375, "learning_rate": 0.00018001461803742008, "loss": 1.8119, "step": 2628 }, { "epoch": 0.8218193185370428, "grad_norm": 0.259765625, "learning_rate": 0.00017999987807247334, "loss": 2.2241, "step": 2629 }, { "epoch": 0.82213191622382, "grad_norm": 0.2470703125, "learning_rate": 0.00017998513327783199, "loss": 1.5033, "step": 2630 }, { "epoch": 0.822444513910597, "grad_norm": 0.2431640625, "learning_rate": 0.00017997038365438628, "loss": 1.481, "step": 2631 }, { "epoch": 0.8227571115973742, "grad_norm": 0.2578125, "learning_rate": 0.00017995562920302652, "loss": 1.7684, "step": 2632 }, { "epoch": 0.8230697092841514, "grad_norm": 0.251953125, "learning_rate": 0.0001799408699246436, "loss": 1.6599, "step": 2633 }, { "epoch": 0.8233823069709284, "grad_norm": 0.255859375, "learning_rate": 0.00017992610582012847, "loss": 1.3327, "step": 2634 }, { "epoch": 0.8236949046577056, "grad_norm": 0.236328125, "learning_rate": 0.0001799113368903725, "loss": 1.7121, "step": 2635 }, { "epoch": 0.8240075023444826, "grad_norm": 0.22265625, "learning_rate": 0.00017989656313626727, "loss": 1.766, "step": 2636 }, { "epoch": 0.8243201000312598, "grad_norm": 0.2265625, "learning_rate": 0.0001798817845587047, "loss": 1.85, "step": 2637 }, { "epoch": 0.8246326977180369, "grad_norm": 0.240234375, "learning_rate": 0.000179867001158577, "loss": 1.8962, "step": 2638 }, { "epoch": 0.824945295404814, "grad_norm": 0.2236328125, "learning_rate": 0.0001798522129367767, "loss": 1.4497, "step": 2639 }, { "epoch": 0.8252578930915911, "grad_norm": 0.2421875, "learning_rate": 0.00017983741989419655, "loss": 1.6794, "step": 2640 }, { "epoch": 0.8255704907783682, "grad_norm": 0.2490234375, "learning_rate": 0.0001798226220317296, "loss": 1.718, "step": 2641 }, { "epoch": 0.8258830884651454, "grad_norm": 0.23046875, "learning_rate": 0.00017980781935026925, "loss": 1.7489, "step": 2642 }, { "epoch": 0.8261956861519225, "grad_norm": 0.25390625, "learning_rate": 0.0001797930118507091, "loss": 1.7344, "step": 2643 }, { "epoch": 0.8265082838386996, "grad_norm": 0.25, "learning_rate": 0.0001797781995339432, "loss": 1.7674, "step": 2644 }, { "epoch": 0.8268208815254767, "grad_norm": 0.2373046875, "learning_rate": 0.0001797633824008657, "loss": 2.0352, "step": 2645 }, { "epoch": 0.8271334792122538, "grad_norm": 0.2265625, "learning_rate": 0.00017974856045237117, "loss": 1.6354, "step": 2646 }, { "epoch": 0.8274460768990309, "grad_norm": 0.2294921875, "learning_rate": 0.00017973373368935445, "loss": 1.737, "step": 2647 }, { "epoch": 0.8277586745858081, "grad_norm": 0.25, "learning_rate": 0.00017971890211271059, "loss": 1.7081, "step": 2648 }, { "epoch": 0.8280712722725851, "grad_norm": 0.251953125, "learning_rate": 0.000179704065723335, "loss": 1.3865, "step": 2649 }, { "epoch": 0.8283838699593623, "grad_norm": 0.2373046875, "learning_rate": 0.00017968922452212343, "loss": 1.5347, "step": 2650 }, { "epoch": 0.8286964676461395, "grad_norm": 0.2255859375, "learning_rate": 0.00017967437850997185, "loss": 1.7372, "step": 2651 }, { "epoch": 0.8290090653329165, "grad_norm": 0.2392578125, "learning_rate": 0.00017965952768777649, "loss": 1.5994, "step": 2652 }, { "epoch": 0.8293216630196937, "grad_norm": 0.2431640625, "learning_rate": 0.0001796446720564339, "loss": 1.8905, "step": 2653 }, { "epoch": 0.8296342607064707, "grad_norm": 0.3359375, "learning_rate": 0.00017962981161684098, "loss": 2.5074, "step": 2654 }, { "epoch": 0.8299468583932479, "grad_norm": 0.2421875, "learning_rate": 0.00017961494636989486, "loss": 1.9347, "step": 2655 }, { "epoch": 0.830259456080025, "grad_norm": 0.24609375, "learning_rate": 0.00017960007631649298, "loss": 1.8819, "step": 2656 }, { "epoch": 0.8305720537668021, "grad_norm": 0.240234375, "learning_rate": 0.00017958520145753307, "loss": 1.6299, "step": 2657 }, { "epoch": 0.8308846514535793, "grad_norm": 0.25390625, "learning_rate": 0.00017957032179391312, "loss": 1.7028, "step": 2658 }, { "epoch": 0.8311972491403563, "grad_norm": 0.23046875, "learning_rate": 0.00017955543732653143, "loss": 1.8788, "step": 2659 }, { "epoch": 0.8315098468271335, "grad_norm": 0.228515625, "learning_rate": 0.0001795405480562866, "loss": 1.7432, "step": 2660 }, { "epoch": 0.8318224445139106, "grad_norm": 0.24609375, "learning_rate": 0.00017952565398407757, "loss": 1.583, "step": 2661 }, { "epoch": 0.8321350422006877, "grad_norm": 0.240234375, "learning_rate": 0.00017951075511080347, "loss": 1.7078, "step": 2662 }, { "epoch": 0.8324476398874648, "grad_norm": 0.234375, "learning_rate": 0.0001794958514373637, "loss": 1.4488, "step": 2663 }, { "epoch": 0.832760237574242, "grad_norm": 0.2421875, "learning_rate": 0.00017948094296465814, "loss": 1.6082, "step": 2664 }, { "epoch": 0.833072835261019, "grad_norm": 0.244140625, "learning_rate": 0.00017946602969358673, "loss": 1.6088, "step": 2665 }, { "epoch": 0.8333854329477962, "grad_norm": 0.248046875, "learning_rate": 0.00017945111162504987, "loss": 1.7525, "step": 2666 }, { "epoch": 0.8336980306345733, "grad_norm": 0.2470703125, "learning_rate": 0.00017943618875994815, "loss": 1.8168, "step": 2667 }, { "epoch": 0.8340106283213504, "grad_norm": 0.244140625, "learning_rate": 0.00017942126109918248, "loss": 1.7631, "step": 2668 }, { "epoch": 0.8343232260081276, "grad_norm": 0.234375, "learning_rate": 0.00017940632864365408, "loss": 1.665, "step": 2669 }, { "epoch": 0.8346358236949046, "grad_norm": 0.2265625, "learning_rate": 0.00017939139139426443, "loss": 1.7743, "step": 2670 }, { "epoch": 0.8349484213816818, "grad_norm": 0.2353515625, "learning_rate": 0.0001793764493519153, "loss": 1.6251, "step": 2671 }, { "epoch": 0.8352610190684588, "grad_norm": 0.2294921875, "learning_rate": 0.00017936150251750876, "loss": 1.5676, "step": 2672 }, { "epoch": 0.835573616755236, "grad_norm": 0.244140625, "learning_rate": 0.0001793465508919472, "loss": 1.9198, "step": 2673 }, { "epoch": 0.8358862144420132, "grad_norm": 0.2333984375, "learning_rate": 0.00017933159447613325, "loss": 1.8999, "step": 2674 }, { "epoch": 0.8361988121287902, "grad_norm": 0.232421875, "learning_rate": 0.00017931663327096985, "loss": 1.5773, "step": 2675 }, { "epoch": 0.8365114098155674, "grad_norm": 0.251953125, "learning_rate": 0.00017930166727736022, "loss": 1.5615, "step": 2676 }, { "epoch": 0.8368240075023445, "grad_norm": 0.2265625, "learning_rate": 0.0001792866964962079, "loss": 1.7466, "step": 2677 }, { "epoch": 0.8371366051891216, "grad_norm": 0.236328125, "learning_rate": 0.00017927172092841665, "loss": 1.5719, "step": 2678 }, { "epoch": 0.8374492028758987, "grad_norm": 0.236328125, "learning_rate": 0.00017925674057489062, "loss": 1.8351, "step": 2679 }, { "epoch": 0.8377618005626758, "grad_norm": 0.2421875, "learning_rate": 0.00017924175543653412, "loss": 1.3423, "step": 2680 }, { "epoch": 0.838074398249453, "grad_norm": 0.2314453125, "learning_rate": 0.0001792267655142519, "loss": 1.8691, "step": 2681 }, { "epoch": 0.8383869959362301, "grad_norm": 0.23046875, "learning_rate": 0.00017921177080894887, "loss": 1.5727, "step": 2682 }, { "epoch": 0.8386995936230072, "grad_norm": 0.2236328125, "learning_rate": 0.0001791967713215303, "loss": 1.5138, "step": 2683 }, { "epoch": 0.8390121913097843, "grad_norm": 0.244140625, "learning_rate": 0.00017918176705290174, "loss": 1.7783, "step": 2684 }, { "epoch": 0.8393247889965614, "grad_norm": 0.23828125, "learning_rate": 0.00017916675800396897, "loss": 1.8948, "step": 2685 }, { "epoch": 0.8396373866833385, "grad_norm": 0.240234375, "learning_rate": 0.00017915174417563816, "loss": 1.6654, "step": 2686 }, { "epoch": 0.8399499843701157, "grad_norm": 0.2451171875, "learning_rate": 0.00017913672556881566, "loss": 1.8393, "step": 2687 }, { "epoch": 0.8402625820568927, "grad_norm": 0.232421875, "learning_rate": 0.00017912170218440822, "loss": 1.5724, "step": 2688 }, { "epoch": 0.8405751797436699, "grad_norm": 0.236328125, "learning_rate": 0.0001791066740233228, "loss": 1.5801, "step": 2689 }, { "epoch": 0.8408877774304471, "grad_norm": 0.259765625, "learning_rate": 0.00017909164108646667, "loss": 1.6645, "step": 2690 }, { "epoch": 0.8412003751172241, "grad_norm": 0.2255859375, "learning_rate": 0.00017907660337474735, "loss": 1.6794, "step": 2691 }, { "epoch": 0.8415129728040013, "grad_norm": 0.2392578125, "learning_rate": 0.0001790615608890727, "loss": 1.6382, "step": 2692 }, { "epoch": 0.8418255704907783, "grad_norm": 0.24609375, "learning_rate": 0.00017904651363035093, "loss": 1.6977, "step": 2693 }, { "epoch": 0.8421381681775555, "grad_norm": 0.23828125, "learning_rate": 0.00017903146159949036, "loss": 1.4432, "step": 2694 }, { "epoch": 0.8424507658643327, "grad_norm": 0.22265625, "learning_rate": 0.00017901640479739975, "loss": 1.7628, "step": 2695 }, { "epoch": 0.8427633635511097, "grad_norm": 0.2470703125, "learning_rate": 0.0001790013432249881, "loss": 1.6406, "step": 2696 }, { "epoch": 0.8430759612378869, "grad_norm": 0.37890625, "learning_rate": 0.00017898627688316468, "loss": 2.2605, "step": 2697 }, { "epoch": 0.8433885589246639, "grad_norm": 0.255859375, "learning_rate": 0.00017897120577283908, "loss": 1.6559, "step": 2698 }, { "epoch": 0.8437011566114411, "grad_norm": 0.2373046875, "learning_rate": 0.00017895612989492113, "loss": 1.7878, "step": 2699 }, { "epoch": 0.8440137542982182, "grad_norm": 0.25, "learning_rate": 0.000178941049250321, "loss": 1.7082, "step": 2700 }, { "epoch": 0.8443263519849953, "grad_norm": 0.228515625, "learning_rate": 0.00017892596383994915, "loss": 1.6265, "step": 2701 }, { "epoch": 0.8446389496717724, "grad_norm": 0.2421875, "learning_rate": 0.00017891087366471632, "loss": 1.6036, "step": 2702 }, { "epoch": 0.8449515473585495, "grad_norm": 0.2470703125, "learning_rate": 0.00017889577872553343, "loss": 1.4701, "step": 2703 }, { "epoch": 0.8452641450453267, "grad_norm": 0.236328125, "learning_rate": 0.00017888067902331186, "loss": 1.7345, "step": 2704 }, { "epoch": 0.8455767427321038, "grad_norm": 0.2373046875, "learning_rate": 0.0001788655745589632, "loss": 1.7042, "step": 2705 }, { "epoch": 0.8458893404188809, "grad_norm": 0.2392578125, "learning_rate": 0.0001788504653333993, "loss": 1.9033, "step": 2706 }, { "epoch": 0.846201938105658, "grad_norm": 0.216796875, "learning_rate": 0.0001788353513475323, "loss": 1.6525, "step": 2707 }, { "epoch": 0.8465145357924352, "grad_norm": 0.251953125, "learning_rate": 0.0001788202326022747, "loss": 1.6119, "step": 2708 }, { "epoch": 0.8468271334792122, "grad_norm": 0.2392578125, "learning_rate": 0.0001788051090985392, "loss": 1.7473, "step": 2709 }, { "epoch": 0.8471397311659894, "grad_norm": 0.244140625, "learning_rate": 0.00017878998083723885, "loss": 1.8992, "step": 2710 }, { "epoch": 0.8474523288527664, "grad_norm": 0.2353515625, "learning_rate": 0.00017877484781928698, "loss": 1.6285, "step": 2711 }, { "epoch": 0.8477649265395436, "grad_norm": 0.24609375, "learning_rate": 0.00017875971004559712, "loss": 1.671, "step": 2712 }, { "epoch": 0.8480775242263208, "grad_norm": 0.2314453125, "learning_rate": 0.0001787445675170832, "loss": 1.639, "step": 2713 }, { "epoch": 0.8483901219130978, "grad_norm": 0.263671875, "learning_rate": 0.00017872942023465944, "loss": 2.2887, "step": 2714 }, { "epoch": 0.848702719599875, "grad_norm": 0.2392578125, "learning_rate": 0.00017871426819924025, "loss": 1.6424, "step": 2715 }, { "epoch": 0.849015317286652, "grad_norm": 0.2373046875, "learning_rate": 0.00017869911141174034, "loss": 1.6615, "step": 2716 }, { "epoch": 0.8493279149734292, "grad_norm": 0.251953125, "learning_rate": 0.00017868394987307482, "loss": 1.8865, "step": 2717 }, { "epoch": 0.8496405126602063, "grad_norm": 0.251953125, "learning_rate": 0.00017866878358415895, "loss": 1.4584, "step": 2718 }, { "epoch": 0.8499531103469834, "grad_norm": 0.236328125, "learning_rate": 0.0001786536125459084, "loss": 1.7852, "step": 2719 }, { "epoch": 0.8502657080337606, "grad_norm": 0.2392578125, "learning_rate": 0.000178638436759239, "loss": 1.5773, "step": 2720 }, { "epoch": 0.8505783057205377, "grad_norm": 0.255859375, "learning_rate": 0.00017862325622506698, "loss": 1.5571, "step": 2721 }, { "epoch": 0.8508909034073148, "grad_norm": 0.2490234375, "learning_rate": 0.00017860807094430877, "loss": 1.6325, "step": 2722 }, { "epoch": 0.8512035010940919, "grad_norm": 0.2421875, "learning_rate": 0.0001785928809178812, "loss": 2.1872, "step": 2723 }, { "epoch": 0.851516098780869, "grad_norm": 0.2421875, "learning_rate": 0.0001785776861467012, "loss": 1.7218, "step": 2724 }, { "epoch": 0.8518286964676461, "grad_norm": 0.232421875, "learning_rate": 0.00017856248663168618, "loss": 1.8967, "step": 2725 }, { "epoch": 0.8521412941544233, "grad_norm": 0.234375, "learning_rate": 0.00017854728237375373, "loss": 1.412, "step": 2726 }, { "epoch": 0.8524538918412004, "grad_norm": 0.2490234375, "learning_rate": 0.00017853207337382174, "loss": 1.5824, "step": 2727 }, { "epoch": 0.8527664895279775, "grad_norm": 0.2392578125, "learning_rate": 0.0001785168596328084, "loss": 1.6068, "step": 2728 }, { "epoch": 0.8530790872147546, "grad_norm": 0.2314453125, "learning_rate": 0.0001785016411516322, "loss": 1.5164, "step": 2729 }, { "epoch": 0.8533916849015317, "grad_norm": 0.2412109375, "learning_rate": 0.00017848641793121188, "loss": 1.8491, "step": 2730 }, { "epoch": 0.8537042825883089, "grad_norm": 0.24609375, "learning_rate": 0.0001784711899724665, "loss": 1.6247, "step": 2731 }, { "epoch": 0.8540168802750859, "grad_norm": 0.25390625, "learning_rate": 0.0001784559572763154, "loss": 1.4966, "step": 2732 }, { "epoch": 0.8543294779618631, "grad_norm": 0.2275390625, "learning_rate": 0.00017844071984367816, "loss": 1.5311, "step": 2733 }, { "epoch": 0.8546420756486403, "grad_norm": 0.2255859375, "learning_rate": 0.0001784254776754747, "loss": 1.521, "step": 2734 }, { "epoch": 0.8549546733354173, "grad_norm": 0.2353515625, "learning_rate": 0.00017841023077262523, "loss": 1.7637, "step": 2735 }, { "epoch": 0.8552672710221945, "grad_norm": 0.2353515625, "learning_rate": 0.0001783949791360502, "loss": 1.3663, "step": 2736 }, { "epoch": 0.8555798687089715, "grad_norm": 0.2431640625, "learning_rate": 0.0001783797227666704, "loss": 1.6854, "step": 2737 }, { "epoch": 0.8558924663957487, "grad_norm": 0.232421875, "learning_rate": 0.00017836446166540683, "loss": 1.7461, "step": 2738 }, { "epoch": 0.8562050640825258, "grad_norm": 0.2373046875, "learning_rate": 0.00017834919583318087, "loss": 1.5579, "step": 2739 }, { "epoch": 0.8565176617693029, "grad_norm": 0.2333984375, "learning_rate": 0.00017833392527091412, "loss": 1.8503, "step": 2740 }, { "epoch": 0.85683025945608, "grad_norm": 0.2373046875, "learning_rate": 0.00017831864997952846, "loss": 1.7036, "step": 2741 }, { "epoch": 0.8571428571428571, "grad_norm": 0.2392578125, "learning_rate": 0.00017830336995994608, "loss": 1.546, "step": 2742 }, { "epoch": 0.8574554548296343, "grad_norm": 0.2421875, "learning_rate": 0.00017828808521308949, "loss": 1.7367, "step": 2743 }, { "epoch": 0.8577680525164114, "grad_norm": 0.2294921875, "learning_rate": 0.00017827279573988145, "loss": 1.6342, "step": 2744 }, { "epoch": 0.8580806502031885, "grad_norm": 0.2421875, "learning_rate": 0.00017825750154124497, "loss": 1.4992, "step": 2745 }, { "epoch": 0.8583932478899656, "grad_norm": 0.2333984375, "learning_rate": 0.00017824220261810337, "loss": 1.6274, "step": 2746 }, { "epoch": 0.8587058455767427, "grad_norm": 0.25, "learning_rate": 0.00017822689897138035, "loss": 1.4625, "step": 2747 }, { "epoch": 0.8590184432635198, "grad_norm": 0.232421875, "learning_rate": 0.00017821159060199974, "loss": 1.4388, "step": 2748 }, { "epoch": 0.859331040950297, "grad_norm": 0.23828125, "learning_rate": 0.00017819627751088573, "loss": 1.4505, "step": 2749 }, { "epoch": 0.859643638637074, "grad_norm": 0.2255859375, "learning_rate": 0.0001781809596989628, "loss": 1.4593, "step": 2750 }, { "epoch": 0.8599562363238512, "grad_norm": 0.224609375, "learning_rate": 0.0001781656371671557, "loss": 1.5498, "step": 2751 }, { "epoch": 0.8602688340106284, "grad_norm": 0.30859375, "learning_rate": 0.00017815030991638947, "loss": 2.1876, "step": 2752 }, { "epoch": 0.8605814316974054, "grad_norm": 0.2392578125, "learning_rate": 0.00017813497794758946, "loss": 1.4955, "step": 2753 }, { "epoch": 0.8608940293841826, "grad_norm": 0.236328125, "learning_rate": 0.00017811964126168123, "loss": 1.6525, "step": 2754 }, { "epoch": 0.8612066270709596, "grad_norm": 0.24609375, "learning_rate": 0.00017810429985959077, "loss": 1.7273, "step": 2755 }, { "epoch": 0.8615192247577368, "grad_norm": 0.26171875, "learning_rate": 0.00017808895374224414, "loss": 1.6337, "step": 2756 }, { "epoch": 0.861831822444514, "grad_norm": 0.232421875, "learning_rate": 0.0001780736029105679, "loss": 1.572, "step": 2757 }, { "epoch": 0.862144420131291, "grad_norm": 0.2421875, "learning_rate": 0.00017805824736548872, "loss": 1.7677, "step": 2758 }, { "epoch": 0.8624570178180682, "grad_norm": 0.2353515625, "learning_rate": 0.00017804288710793374, "loss": 1.4813, "step": 2759 }, { "epoch": 0.8627696155048452, "grad_norm": 0.255859375, "learning_rate": 0.00017802752213883017, "loss": 1.863, "step": 2760 }, { "epoch": 0.8630822131916224, "grad_norm": 0.232421875, "learning_rate": 0.00017801215245910569, "loss": 1.7106, "step": 2761 }, { "epoch": 0.8633948108783995, "grad_norm": 0.2392578125, "learning_rate": 0.00017799677806968811, "loss": 1.5748, "step": 2762 }, { "epoch": 0.8637074085651766, "grad_norm": 0.263671875, "learning_rate": 0.00017798139897150564, "loss": 1.7248, "step": 2763 }, { "epoch": 0.8640200062519537, "grad_norm": 0.2451171875, "learning_rate": 0.00017796601516548676, "loss": 1.7132, "step": 2764 }, { "epoch": 0.8643326039387309, "grad_norm": 0.2373046875, "learning_rate": 0.0001779506266525602, "loss": 1.742, "step": 2765 }, { "epoch": 0.864645201625508, "grad_norm": 0.2431640625, "learning_rate": 0.000177935233433655, "loss": 1.8706, "step": 2766 }, { "epoch": 0.8649577993122851, "grad_norm": 0.2412109375, "learning_rate": 0.0001779198355097004, "loss": 1.5686, "step": 2767 }, { "epoch": 0.8652703969990622, "grad_norm": 0.234375, "learning_rate": 0.00017790443288162605, "loss": 1.7863, "step": 2768 }, { "epoch": 0.8655829946858393, "grad_norm": 0.248046875, "learning_rate": 0.00017788902555036182, "loss": 1.6466, "step": 2769 }, { "epoch": 0.8658955923726165, "grad_norm": 0.26171875, "learning_rate": 0.00017787361351683786, "loss": 1.7133, "step": 2770 }, { "epoch": 0.8662081900593935, "grad_norm": 0.2314453125, "learning_rate": 0.00017785819678198462, "loss": 1.7669, "step": 2771 }, { "epoch": 0.8665207877461707, "grad_norm": 0.23046875, "learning_rate": 0.0001778427753467328, "loss": 1.7054, "step": 2772 }, { "epoch": 0.8668333854329477, "grad_norm": 0.240234375, "learning_rate": 0.00017782734921201348, "loss": 1.5878, "step": 2773 }, { "epoch": 0.8671459831197249, "grad_norm": 0.2490234375, "learning_rate": 0.00017781191837875788, "loss": 1.5847, "step": 2774 }, { "epoch": 0.8674585808065021, "grad_norm": 0.240234375, "learning_rate": 0.0001777964828478976, "loss": 1.556, "step": 2775 }, { "epoch": 0.8677711784932791, "grad_norm": 0.236328125, "learning_rate": 0.00017778104262036455, "loss": 1.481, "step": 2776 }, { "epoch": 0.8680837761800563, "grad_norm": 0.2373046875, "learning_rate": 0.0001777655976970908, "loss": 1.5842, "step": 2777 }, { "epoch": 0.8683963738668334, "grad_norm": 0.251953125, "learning_rate": 0.00017775014807900884, "loss": 1.6188, "step": 2778 }, { "epoch": 0.8687089715536105, "grad_norm": 0.2431640625, "learning_rate": 0.00017773469376705138, "loss": 1.7405, "step": 2779 }, { "epoch": 0.8690215692403876, "grad_norm": 0.234375, "learning_rate": 0.00017771923476215138, "loss": 2.009, "step": 2780 }, { "epoch": 0.8693341669271647, "grad_norm": 0.2373046875, "learning_rate": 0.00017770377106524215, "loss": 1.5022, "step": 2781 }, { "epoch": 0.8696467646139419, "grad_norm": 0.298828125, "learning_rate": 0.0001776883026772572, "loss": 2.3243, "step": 2782 }, { "epoch": 0.869959362300719, "grad_norm": 0.25, "learning_rate": 0.00017767282959913047, "loss": 1.5778, "step": 2783 }, { "epoch": 0.8702719599874961, "grad_norm": 0.244140625, "learning_rate": 0.00017765735183179602, "loss": 1.648, "step": 2784 }, { "epoch": 0.8705845576742732, "grad_norm": 0.23828125, "learning_rate": 0.00017764186937618828, "loss": 1.9461, "step": 2785 }, { "epoch": 0.8708971553610503, "grad_norm": 0.23828125, "learning_rate": 0.00017762638223324192, "loss": 1.6331, "step": 2786 }, { "epoch": 0.8712097530478274, "grad_norm": 0.23046875, "learning_rate": 0.00017761089040389198, "loss": 1.5506, "step": 2787 }, { "epoch": 0.8715223507346046, "grad_norm": 0.236328125, "learning_rate": 0.00017759539388907366, "loss": 1.4817, "step": 2788 }, { "epoch": 0.8718349484213817, "grad_norm": 0.2412109375, "learning_rate": 0.00017757989268972257, "loss": 1.4606, "step": 2789 }, { "epoch": 0.8721475461081588, "grad_norm": 0.2373046875, "learning_rate": 0.00017756438680677445, "loss": 1.4484, "step": 2790 }, { "epoch": 0.872460143794936, "grad_norm": 0.234375, "learning_rate": 0.00017754887624116548, "loss": 1.5865, "step": 2791 }, { "epoch": 0.872772741481713, "grad_norm": 0.2353515625, "learning_rate": 0.00017753336099383203, "loss": 1.514, "step": 2792 }, { "epoch": 0.8730853391684902, "grad_norm": 0.2373046875, "learning_rate": 0.00017751784106571079, "loss": 1.3963, "step": 2793 }, { "epoch": 0.8733979368552672, "grad_norm": 0.2392578125, "learning_rate": 0.00017750231645773869, "loss": 1.8982, "step": 2794 }, { "epoch": 0.8737105345420444, "grad_norm": 0.232421875, "learning_rate": 0.00017748678717085297, "loss": 1.7107, "step": 2795 }, { "epoch": 0.8740231322288216, "grad_norm": 0.2431640625, "learning_rate": 0.00017747125320599118, "loss": 1.5219, "step": 2796 }, { "epoch": 0.8743357299155986, "grad_norm": 0.236328125, "learning_rate": 0.0001774557145640911, "loss": 1.6148, "step": 2797 }, { "epoch": 0.8746483276023758, "grad_norm": 0.2412109375, "learning_rate": 0.00017744017124609083, "loss": 1.4968, "step": 2798 }, { "epoch": 0.8749609252891528, "grad_norm": 0.24609375, "learning_rate": 0.00017742462325292873, "loss": 1.6438, "step": 2799 }, { "epoch": 0.87527352297593, "grad_norm": 0.2431640625, "learning_rate": 0.0001774090705855435, "loss": 1.8157, "step": 2800 }, { "epoch": 0.8755861206627071, "grad_norm": 0.2314453125, "learning_rate": 0.000177393513244874, "loss": 1.8969, "step": 2801 }, { "epoch": 0.8758987183494842, "grad_norm": 0.2353515625, "learning_rate": 0.0001773779512318595, "loss": 1.7561, "step": 2802 }, { "epoch": 0.8762113160362613, "grad_norm": 0.2421875, "learning_rate": 0.00017736238454743946, "loss": 1.8387, "step": 2803 }, { "epoch": 0.8765239137230384, "grad_norm": 0.2421875, "learning_rate": 0.0001773468131925537, "loss": 1.8426, "step": 2804 }, { "epoch": 0.8768365114098156, "grad_norm": 0.25, "learning_rate": 0.00017733123716814225, "loss": 1.5613, "step": 2805 }, { "epoch": 0.8771491090965927, "grad_norm": 0.255859375, "learning_rate": 0.0001773156564751455, "loss": 1.9907, "step": 2806 }, { "epoch": 0.8774617067833698, "grad_norm": 0.232421875, "learning_rate": 0.00017730007111450402, "loss": 1.3814, "step": 2807 }, { "epoch": 0.8777743044701469, "grad_norm": 0.23046875, "learning_rate": 0.00017728448108715874, "loss": 1.459, "step": 2808 }, { "epoch": 0.8780869021569241, "grad_norm": 0.2451171875, "learning_rate": 0.00017726888639405086, "loss": 1.6541, "step": 2809 }, { "epoch": 0.8783994998437011, "grad_norm": 0.2294921875, "learning_rate": 0.00017725328703612183, "loss": 1.6136, "step": 2810 }, { "epoch": 0.8787120975304783, "grad_norm": 0.2392578125, "learning_rate": 0.00017723768301431344, "loss": 1.9023, "step": 2811 }, { "epoch": 0.8790246952172553, "grad_norm": 0.3203125, "learning_rate": 0.00017722207432956767, "loss": 2.4062, "step": 2812 }, { "epoch": 0.8793372929040325, "grad_norm": 0.232421875, "learning_rate": 0.00017720646098282687, "loss": 1.6481, "step": 2813 }, { "epoch": 0.8796498905908097, "grad_norm": 0.232421875, "learning_rate": 0.00017719084297503367, "loss": 1.7955, "step": 2814 }, { "epoch": 0.8799624882775867, "grad_norm": 0.2333984375, "learning_rate": 0.0001771752203071309, "loss": 1.7442, "step": 2815 }, { "epoch": 0.8802750859643639, "grad_norm": 0.2451171875, "learning_rate": 0.0001771595929800617, "loss": 1.9734, "step": 2816 }, { "epoch": 0.8805876836511409, "grad_norm": 0.244140625, "learning_rate": 0.0001771439609947696, "loss": 1.651, "step": 2817 }, { "epoch": 0.8809002813379181, "grad_norm": 0.240234375, "learning_rate": 0.00017712832435219823, "loss": 1.6914, "step": 2818 }, { "epoch": 0.8812128790246953, "grad_norm": 0.2490234375, "learning_rate": 0.00017711268305329166, "loss": 1.9028, "step": 2819 }, { "epoch": 0.8815254767114723, "grad_norm": 0.2412109375, "learning_rate": 0.00017709703709899413, "loss": 1.7345, "step": 2820 }, { "epoch": 0.8818380743982495, "grad_norm": 0.2255859375, "learning_rate": 0.00017708138649025023, "loss": 1.8512, "step": 2821 }, { "epoch": 0.8821506720850266, "grad_norm": 0.2412109375, "learning_rate": 0.0001770657312280048, "loss": 1.6781, "step": 2822 }, { "epoch": 0.8824632697718037, "grad_norm": 0.23828125, "learning_rate": 0.00017705007131320298, "loss": 1.5084, "step": 2823 }, { "epoch": 0.8827758674585808, "grad_norm": 0.2431640625, "learning_rate": 0.00017703440674679015, "loss": 1.5801, "step": 2824 }, { "epoch": 0.8830884651453579, "grad_norm": 0.2265625, "learning_rate": 0.00017701873752971206, "loss": 1.7738, "step": 2825 }, { "epoch": 0.883401062832135, "grad_norm": 0.232421875, "learning_rate": 0.00017700306366291458, "loss": 1.7093, "step": 2826 }, { "epoch": 0.8837136605189122, "grad_norm": 0.23046875, "learning_rate": 0.00017698738514734406, "loss": 1.7994, "step": 2827 }, { "epoch": 0.8840262582056893, "grad_norm": 0.2216796875, "learning_rate": 0.00017697170198394696, "loss": 1.7524, "step": 2828 }, { "epoch": 0.8843388558924664, "grad_norm": 0.2470703125, "learning_rate": 0.0001769560141736702, "loss": 1.4667, "step": 2829 }, { "epoch": 0.8846514535792435, "grad_norm": 0.23828125, "learning_rate": 0.00017694032171746072, "loss": 1.4843, "step": 2830 }, { "epoch": 0.8849640512660206, "grad_norm": 0.240234375, "learning_rate": 0.000176924624616266, "loss": 1.4988, "step": 2831 }, { "epoch": 0.8852766489527978, "grad_norm": 0.2412109375, "learning_rate": 0.00017690892287103367, "loss": 1.5816, "step": 2832 }, { "epoch": 0.8855892466395748, "grad_norm": 0.2333984375, "learning_rate": 0.00017689321648271166, "loss": 1.7245, "step": 2833 }, { "epoch": 0.885901844326352, "grad_norm": 0.2216796875, "learning_rate": 0.00017687750545224815, "loss": 1.7804, "step": 2834 }, { "epoch": 0.8862144420131292, "grad_norm": 0.251953125, "learning_rate": 0.0001768617897805917, "loss": 1.5097, "step": 2835 }, { "epoch": 0.8865270396999062, "grad_norm": 0.2333984375, "learning_rate": 0.00017684606946869106, "loss": 1.5496, "step": 2836 }, { "epoch": 0.8868396373866834, "grad_norm": 0.236328125, "learning_rate": 0.00017683034451749526, "loss": 1.829, "step": 2837 }, { "epoch": 0.8871522350734604, "grad_norm": 0.251953125, "learning_rate": 0.0001768146149279537, "loss": 1.4844, "step": 2838 }, { "epoch": 0.8874648327602376, "grad_norm": 0.2333984375, "learning_rate": 0.00017679888070101592, "loss": 1.7066, "step": 2839 }, { "epoch": 0.8877774304470147, "grad_norm": 0.2333984375, "learning_rate": 0.00017678314183763183, "loss": 1.5307, "step": 2840 }, { "epoch": 0.8880900281337918, "grad_norm": 0.240234375, "learning_rate": 0.00017676739833875164, "loss": 1.4304, "step": 2841 }, { "epoch": 0.888402625820569, "grad_norm": 0.22265625, "learning_rate": 0.00017675165020532578, "loss": 1.6068, "step": 2842 }, { "epoch": 0.888715223507346, "grad_norm": 0.244140625, "learning_rate": 0.000176735897438305, "loss": 1.4709, "step": 2843 }, { "epoch": 0.8890278211941232, "grad_norm": 0.23828125, "learning_rate": 0.00017672014003864033, "loss": 1.6562, "step": 2844 }, { "epoch": 0.8893404188809003, "grad_norm": 0.2265625, "learning_rate": 0.000176704378007283, "loss": 1.8352, "step": 2845 }, { "epoch": 0.8896530165676774, "grad_norm": 0.259765625, "learning_rate": 0.0001766886113451846, "loss": 1.8639, "step": 2846 }, { "epoch": 0.8899656142544545, "grad_norm": 0.2353515625, "learning_rate": 0.00017667284005329708, "loss": 1.6163, "step": 2847 }, { "epoch": 0.8902782119412317, "grad_norm": 0.2412109375, "learning_rate": 0.00017665706413257245, "loss": 1.7933, "step": 2848 }, { "epoch": 0.8905908096280087, "grad_norm": 0.232421875, "learning_rate": 0.0001766412835839632, "loss": 1.6013, "step": 2849 }, { "epoch": 0.8909034073147859, "grad_norm": 0.248046875, "learning_rate": 0.000176625498408422, "loss": 1.6694, "step": 2850 }, { "epoch": 0.891216005001563, "grad_norm": 0.25390625, "learning_rate": 0.0001766097086069018, "loss": 1.6816, "step": 2851 }, { "epoch": 0.8915286026883401, "grad_norm": 0.2451171875, "learning_rate": 0.00017659391418035588, "loss": 1.7289, "step": 2852 }, { "epoch": 0.8918412003751173, "grad_norm": 0.2353515625, "learning_rate": 0.0001765781151297377, "loss": 1.4146, "step": 2853 }, { "epoch": 0.8921537980618943, "grad_norm": 0.2373046875, "learning_rate": 0.0001765623114560012, "loss": 1.6338, "step": 2854 }, { "epoch": 0.8924663957486715, "grad_norm": 0.2412109375, "learning_rate": 0.00017654650316010036, "loss": 1.623, "step": 2855 }, { "epoch": 0.8927789934354485, "grad_norm": 0.24609375, "learning_rate": 0.00017653069024298957, "loss": 1.6547, "step": 2856 }, { "epoch": 0.8930915911222257, "grad_norm": 0.2314453125, "learning_rate": 0.0001765148727056235, "loss": 1.7697, "step": 2857 }, { "epoch": 0.8934041888090029, "grad_norm": 0.2412109375, "learning_rate": 0.00017649905054895705, "loss": 1.7488, "step": 2858 }, { "epoch": 0.8937167864957799, "grad_norm": 0.3046875, "learning_rate": 0.00017648322377394546, "loss": 2.1237, "step": 2859 }, { "epoch": 0.8940293841825571, "grad_norm": 0.2373046875, "learning_rate": 0.00017646739238154417, "loss": 1.6839, "step": 2860 }, { "epoch": 0.8943419818693341, "grad_norm": 0.2373046875, "learning_rate": 0.00017645155637270897, "loss": 1.6423, "step": 2861 }, { "epoch": 0.8946545795561113, "grad_norm": 0.2294921875, "learning_rate": 0.00017643571574839587, "loss": 1.7184, "step": 2862 }, { "epoch": 0.8949671772428884, "grad_norm": 0.251953125, "learning_rate": 0.00017641987050956122, "loss": 1.8355, "step": 2863 }, { "epoch": 0.8952797749296655, "grad_norm": 0.2333984375, "learning_rate": 0.0001764040206571616, "loss": 1.6686, "step": 2864 }, { "epoch": 0.8955923726164426, "grad_norm": 0.23046875, "learning_rate": 0.00017638816619215388, "loss": 1.7545, "step": 2865 }, { "epoch": 0.8959049703032198, "grad_norm": 0.2392578125, "learning_rate": 0.00017637230711549525, "loss": 1.7738, "step": 2866 }, { "epoch": 0.8962175679899969, "grad_norm": 0.2431640625, "learning_rate": 0.0001763564434281431, "loss": 1.7099, "step": 2867 }, { "epoch": 0.896530165676774, "grad_norm": 0.23046875, "learning_rate": 0.00017634057513105515, "loss": 1.6731, "step": 2868 }, { "epoch": 0.8968427633635511, "grad_norm": 0.244140625, "learning_rate": 0.0001763247022251894, "loss": 1.4654, "step": 2869 }, { "epoch": 0.8971553610503282, "grad_norm": 0.2470703125, "learning_rate": 0.00017630882471150413, "loss": 1.7359, "step": 2870 }, { "epoch": 0.8974679587371054, "grad_norm": 0.2421875, "learning_rate": 0.00017629294259095785, "loss": 1.5702, "step": 2871 }, { "epoch": 0.8977805564238824, "grad_norm": 0.26171875, "learning_rate": 0.00017627705586450944, "loss": 2.429, "step": 2872 }, { "epoch": 0.8980931541106596, "grad_norm": 0.25, "learning_rate": 0.00017626116453311794, "loss": 1.8714, "step": 2873 }, { "epoch": 0.8984057517974366, "grad_norm": 0.2421875, "learning_rate": 0.00017624526859774274, "loss": 1.592, "step": 2874 }, { "epoch": 0.8987183494842138, "grad_norm": 0.234375, "learning_rate": 0.00017622936805934355, "loss": 1.9351, "step": 2875 }, { "epoch": 0.899030947170991, "grad_norm": 0.244140625, "learning_rate": 0.00017621346291888025, "loss": 1.5676, "step": 2876 }, { "epoch": 0.899343544857768, "grad_norm": 0.2490234375, "learning_rate": 0.0001761975531773131, "loss": 2.0676, "step": 2877 }, { "epoch": 0.8996561425445452, "grad_norm": 0.234375, "learning_rate": 0.00017618163883560255, "loss": 1.8676, "step": 2878 }, { "epoch": 0.8999687402313223, "grad_norm": 0.2412109375, "learning_rate": 0.00017616571989470937, "loss": 1.6823, "step": 2879 }, { "epoch": 0.9002813379180994, "grad_norm": 0.24609375, "learning_rate": 0.00017614979635559462, "loss": 1.6829, "step": 2880 }, { "epoch": 0.9005939356048765, "grad_norm": 0.2333984375, "learning_rate": 0.00017613386821921964, "loss": 1.3811, "step": 2881 }, { "epoch": 0.9009065332916536, "grad_norm": 0.259765625, "learning_rate": 0.00017611793548654602, "loss": 1.3734, "step": 2882 }, { "epoch": 0.9012191309784308, "grad_norm": 0.220703125, "learning_rate": 0.00017610199815853563, "loss": 1.8464, "step": 2883 }, { "epoch": 0.9015317286652079, "grad_norm": 0.2392578125, "learning_rate": 0.00017608605623615063, "loss": 1.4275, "step": 2884 }, { "epoch": 0.901844326351985, "grad_norm": 0.232421875, "learning_rate": 0.00017607010972035348, "loss": 1.5875, "step": 2885 }, { "epoch": 0.9021569240387621, "grad_norm": 0.236328125, "learning_rate": 0.00017605415861210685, "loss": 1.8575, "step": 2886 }, { "epoch": 0.9024695217255392, "grad_norm": 0.2451171875, "learning_rate": 0.00017603820291237375, "loss": 1.8156, "step": 2887 }, { "epoch": 0.9027821194123163, "grad_norm": 0.240234375, "learning_rate": 0.00017602224262211743, "loss": 1.4908, "step": 2888 }, { "epoch": 0.9030947170990935, "grad_norm": 0.244140625, "learning_rate": 0.00017600627774230144, "loss": 1.7584, "step": 2889 }, { "epoch": 0.9034073147858706, "grad_norm": 0.25, "learning_rate": 0.00017599030827388965, "loss": 1.7706, "step": 2890 }, { "epoch": 0.9037199124726477, "grad_norm": 0.2431640625, "learning_rate": 0.0001759743342178461, "loss": 1.6771, "step": 2891 }, { "epoch": 0.9040325101594249, "grad_norm": 0.251953125, "learning_rate": 0.00017595835557513516, "loss": 1.838, "step": 2892 }, { "epoch": 0.9043451078462019, "grad_norm": 0.2373046875, "learning_rate": 0.00017594237234672152, "loss": 1.7833, "step": 2893 }, { "epoch": 0.9046577055329791, "grad_norm": 0.244140625, "learning_rate": 0.00017592638453357005, "loss": 1.8564, "step": 2894 }, { "epoch": 0.9049703032197561, "grad_norm": 0.236328125, "learning_rate": 0.000175910392136646, "loss": 1.4054, "step": 2895 }, { "epoch": 0.9052829009065333, "grad_norm": 0.234375, "learning_rate": 0.00017589439515691487, "loss": 1.7344, "step": 2896 }, { "epoch": 0.9055954985933105, "grad_norm": 0.2412109375, "learning_rate": 0.0001758783935953424, "loss": 1.6391, "step": 2897 }, { "epoch": 0.9059080962800875, "grad_norm": 0.236328125, "learning_rate": 0.00017586238745289457, "loss": 1.6244, "step": 2898 }, { "epoch": 0.9062206939668647, "grad_norm": 0.2451171875, "learning_rate": 0.00017584637673053778, "loss": 1.6056, "step": 2899 }, { "epoch": 0.9065332916536417, "grad_norm": 0.2333984375, "learning_rate": 0.00017583036142923856, "loss": 1.7858, "step": 2900 }, { "epoch": 0.9068458893404189, "grad_norm": 0.2373046875, "learning_rate": 0.0001758143415499638, "loss": 1.6028, "step": 2901 }, { "epoch": 0.907158487027196, "grad_norm": 0.23828125, "learning_rate": 0.0001757983170936806, "loss": 1.6918, "step": 2902 }, { "epoch": 0.9074710847139731, "grad_norm": 0.2490234375, "learning_rate": 0.00017578228806135643, "loss": 1.9901, "step": 2903 }, { "epoch": 0.9077836824007502, "grad_norm": 0.2392578125, "learning_rate": 0.00017576625445395893, "loss": 1.5383, "step": 2904 }, { "epoch": 0.9080962800875274, "grad_norm": 0.2412109375, "learning_rate": 0.00017575021627245612, "loss": 1.5068, "step": 2905 }, { "epoch": 0.9084088777743045, "grad_norm": 0.2333984375, "learning_rate": 0.00017573417351781625, "loss": 1.8062, "step": 2906 }, { "epoch": 0.9087214754610816, "grad_norm": 0.2275390625, "learning_rate": 0.00017571812619100778, "loss": 1.4791, "step": 2907 }, { "epoch": 0.9090340731478587, "grad_norm": 0.255859375, "learning_rate": 0.00017570207429299956, "loss": 1.7496, "step": 2908 }, { "epoch": 0.9093466708346358, "grad_norm": 0.244140625, "learning_rate": 0.00017568601782476064, "loss": 1.5202, "step": 2909 }, { "epoch": 0.909659268521413, "grad_norm": 0.2451171875, "learning_rate": 0.00017566995678726038, "loss": 1.6579, "step": 2910 }, { "epoch": 0.90997186620819, "grad_norm": 0.251953125, "learning_rate": 0.0001756538911814684, "loss": 1.606, "step": 2911 }, { "epoch": 0.9102844638949672, "grad_norm": 0.244140625, "learning_rate": 0.0001756378210083546, "loss": 1.6417, "step": 2912 }, { "epoch": 0.9105970615817442, "grad_norm": 0.232421875, "learning_rate": 0.00017562174626888918, "loss": 1.6654, "step": 2913 }, { "epoch": 0.9109096592685214, "grad_norm": 0.2392578125, "learning_rate": 0.00017560566696404254, "loss": 1.676, "step": 2914 }, { "epoch": 0.9112222569552986, "grad_norm": 0.240234375, "learning_rate": 0.00017558958309478543, "loss": 1.5845, "step": 2915 }, { "epoch": 0.9115348546420756, "grad_norm": 0.2412109375, "learning_rate": 0.0001755734946620889, "loss": 1.5907, "step": 2916 }, { "epoch": 0.9118474523288528, "grad_norm": 0.23828125, "learning_rate": 0.00017555740166692418, "loss": 1.8526, "step": 2917 }, { "epoch": 0.9121600500156298, "grad_norm": 0.255859375, "learning_rate": 0.00017554130411026283, "loss": 1.4743, "step": 2918 }, { "epoch": 0.912472647702407, "grad_norm": 0.2421875, "learning_rate": 0.0001755252019930767, "loss": 1.4929, "step": 2919 }, { "epoch": 0.9127852453891842, "grad_norm": 0.2392578125, "learning_rate": 0.0001755090953163379, "loss": 1.4583, "step": 2920 }, { "epoch": 0.9130978430759612, "grad_norm": 0.2451171875, "learning_rate": 0.00017549298408101876, "loss": 1.7967, "step": 2921 }, { "epoch": 0.9134104407627384, "grad_norm": 0.244140625, "learning_rate": 0.00017547686828809196, "loss": 1.9172, "step": 2922 }, { "epoch": 0.9137230384495155, "grad_norm": 0.244140625, "learning_rate": 0.00017546074793853048, "loss": 1.5975, "step": 2923 }, { "epoch": 0.9140356361362926, "grad_norm": 0.2255859375, "learning_rate": 0.00017544462303330748, "loss": 1.8838, "step": 2924 }, { "epoch": 0.9143482338230697, "grad_norm": 0.2421875, "learning_rate": 0.00017542849357339644, "loss": 1.8619, "step": 2925 }, { "epoch": 0.9146608315098468, "grad_norm": 0.2333984375, "learning_rate": 0.00017541235955977112, "loss": 1.6366, "step": 2926 }, { "epoch": 0.9149734291966239, "grad_norm": 0.2431640625, "learning_rate": 0.00017539622099340554, "loss": 1.5817, "step": 2927 }, { "epoch": 0.9152860268834011, "grad_norm": 0.248046875, "learning_rate": 0.000175380077875274, "loss": 1.5323, "step": 2928 }, { "epoch": 0.9155986245701782, "grad_norm": 0.240234375, "learning_rate": 0.00017536393020635118, "loss": 1.762, "step": 2929 }, { "epoch": 0.9159112222569553, "grad_norm": 0.2373046875, "learning_rate": 0.0001753477779876118, "loss": 1.5217, "step": 2930 }, { "epoch": 0.9162238199437324, "grad_norm": 0.2392578125, "learning_rate": 0.00017533162122003107, "loss": 1.6377, "step": 2931 }, { "epoch": 0.9165364176305095, "grad_norm": 0.25, "learning_rate": 0.00017531545990458436, "loss": 1.5614, "step": 2932 }, { "epoch": 0.9168490153172867, "grad_norm": 0.25390625, "learning_rate": 0.00017529929404224733, "loss": 1.9785, "step": 2933 }, { "epoch": 0.9171616130040637, "grad_norm": 0.2333984375, "learning_rate": 0.00017528312363399598, "loss": 1.6278, "step": 2934 }, { "epoch": 0.9174742106908409, "grad_norm": 0.2421875, "learning_rate": 0.00017526694868080656, "loss": 1.62, "step": 2935 }, { "epoch": 0.9177868083776181, "grad_norm": 0.244140625, "learning_rate": 0.0001752507691836555, "loss": 1.66, "step": 2936 }, { "epoch": 0.9180994060643951, "grad_norm": 0.234375, "learning_rate": 0.00017523458514351963, "loss": 1.711, "step": 2937 }, { "epoch": 0.9184120037511723, "grad_norm": 0.236328125, "learning_rate": 0.00017521839656137598, "loss": 1.606, "step": 2938 }, { "epoch": 0.9187246014379493, "grad_norm": 0.2451171875, "learning_rate": 0.00017520220343820184, "loss": 1.8548, "step": 2939 }, { "epoch": 0.9190371991247265, "grad_norm": 0.26171875, "learning_rate": 0.00017518600577497487, "loss": 1.6217, "step": 2940 }, { "epoch": 0.9193497968115036, "grad_norm": 0.326171875, "learning_rate": 0.00017516980357267295, "loss": 2.4887, "step": 2941 }, { "epoch": 0.9196623944982807, "grad_norm": 0.2431640625, "learning_rate": 0.00017515359683227416, "loss": 1.7841, "step": 2942 }, { "epoch": 0.9199749921850578, "grad_norm": 0.2314453125, "learning_rate": 0.00017513738555475697, "loss": 1.7065, "step": 2943 }, { "epoch": 0.9202875898718349, "grad_norm": 0.2373046875, "learning_rate": 0.0001751211697411001, "loss": 1.7469, "step": 2944 }, { "epoch": 0.9206001875586121, "grad_norm": 0.228515625, "learning_rate": 0.00017510494939228246, "loss": 1.5839, "step": 2945 }, { "epoch": 0.9209127852453892, "grad_norm": 0.24609375, "learning_rate": 0.0001750887245092833, "loss": 1.7413, "step": 2946 }, { "epoch": 0.9212253829321663, "grad_norm": 0.2470703125, "learning_rate": 0.00017507249509308217, "loss": 1.433, "step": 2947 }, { "epoch": 0.9215379806189434, "grad_norm": 0.244140625, "learning_rate": 0.00017505626114465886, "loss": 1.5907, "step": 2948 }, { "epoch": 0.9218505783057206, "grad_norm": 0.25, "learning_rate": 0.0001750400226649934, "loss": 1.6737, "step": 2949 }, { "epoch": 0.9221631759924976, "grad_norm": 0.25390625, "learning_rate": 0.00017502377965506613, "loss": 1.5084, "step": 2950 }, { "epoch": 0.9224757736792748, "grad_norm": 0.2392578125, "learning_rate": 0.00017500753211585772, "loss": 1.4999, "step": 2951 }, { "epoch": 0.9227883713660519, "grad_norm": 0.2451171875, "learning_rate": 0.000174991280048349, "loss": 1.6843, "step": 2952 }, { "epoch": 0.923100969052829, "grad_norm": 0.2421875, "learning_rate": 0.00017497502345352112, "loss": 1.6222, "step": 2953 }, { "epoch": 0.9234135667396062, "grad_norm": 0.23828125, "learning_rate": 0.00017495876233235554, "loss": 1.5935, "step": 2954 }, { "epoch": 0.9237261644263832, "grad_norm": 0.25390625, "learning_rate": 0.000174942496685834, "loss": 1.9363, "step": 2955 }, { "epoch": 0.9240387621131604, "grad_norm": 0.2431640625, "learning_rate": 0.00017492622651493837, "loss": 1.8212, "step": 2956 }, { "epoch": 0.9243513597999374, "grad_norm": 0.255859375, "learning_rate": 0.000174909951820651, "loss": 1.8014, "step": 2957 }, { "epoch": 0.9246639574867146, "grad_norm": 0.251953125, "learning_rate": 0.00017489367260395438, "loss": 1.7982, "step": 2958 }, { "epoch": 0.9249765551734918, "grad_norm": 0.240234375, "learning_rate": 0.0001748773888658313, "loss": 1.6039, "step": 2959 }, { "epoch": 0.9252891528602688, "grad_norm": 0.24609375, "learning_rate": 0.00017486110060726485, "loss": 1.8941, "step": 2960 }, { "epoch": 0.925601750547046, "grad_norm": 0.25, "learning_rate": 0.00017484480782923835, "loss": 2.0574, "step": 2961 }, { "epoch": 0.925914348233823, "grad_norm": 0.244140625, "learning_rate": 0.00017482851053273542, "loss": 1.404, "step": 2962 }, { "epoch": 0.9262269459206002, "grad_norm": 0.23046875, "learning_rate": 0.00017481220871873996, "loss": 1.6843, "step": 2963 }, { "epoch": 0.9265395436073773, "grad_norm": 0.263671875, "learning_rate": 0.00017479590238823613, "loss": 1.61, "step": 2964 }, { "epoch": 0.9268521412941544, "grad_norm": 0.388671875, "learning_rate": 0.00017477959154220835, "loss": 2.4723, "step": 2965 }, { "epoch": 0.9271647389809315, "grad_norm": 0.240234375, "learning_rate": 0.0001747632761816413, "loss": 1.6597, "step": 2966 }, { "epoch": 0.9274773366677087, "grad_norm": 0.2412109375, "learning_rate": 0.00017474695630752008, "loss": 1.5784, "step": 2967 }, { "epoch": 0.9277899343544858, "grad_norm": 0.2275390625, "learning_rate": 0.00017473063192082982, "loss": 1.8403, "step": 2968 }, { "epoch": 0.9281025320412629, "grad_norm": 0.25, "learning_rate": 0.00017471430302255604, "loss": 1.8024, "step": 2969 }, { "epoch": 0.92841512972804, "grad_norm": 0.2431640625, "learning_rate": 0.00017469796961368462, "loss": 1.714, "step": 2970 }, { "epoch": 0.9287277274148171, "grad_norm": 0.251953125, "learning_rate": 0.00017468163169520156, "loss": 1.4359, "step": 2971 }, { "epoch": 0.9290403251015943, "grad_norm": 0.23828125, "learning_rate": 0.00017466528926809324, "loss": 1.6177, "step": 2972 }, { "epoch": 0.9293529227883713, "grad_norm": 0.2578125, "learning_rate": 0.00017464894233334627, "loss": 1.9172, "step": 2973 }, { "epoch": 0.9296655204751485, "grad_norm": 0.2353515625, "learning_rate": 0.00017463259089194752, "loss": 2.023, "step": 2974 }, { "epoch": 0.9299781181619255, "grad_norm": 0.2392578125, "learning_rate": 0.00017461623494488416, "loss": 1.3345, "step": 2975 }, { "epoch": 0.9302907158487027, "grad_norm": 0.2373046875, "learning_rate": 0.0001745998744931436, "loss": 1.6451, "step": 2976 }, { "epoch": 0.9306033135354799, "grad_norm": 0.2431640625, "learning_rate": 0.00017458350953771355, "loss": 1.4398, "step": 2977 }, { "epoch": 0.9309159112222569, "grad_norm": 0.236328125, "learning_rate": 0.000174567140079582, "loss": 1.4698, "step": 2978 }, { "epoch": 0.9312285089090341, "grad_norm": 0.2421875, "learning_rate": 0.00017455076611973716, "loss": 1.586, "step": 2979 }, { "epoch": 0.9315411065958112, "grad_norm": 0.2412109375, "learning_rate": 0.00017453438765916758, "loss": 1.4608, "step": 2980 }, { "epoch": 0.9318537042825883, "grad_norm": 0.2392578125, "learning_rate": 0.00017451800469886207, "loss": 1.7327, "step": 2981 }, { "epoch": 0.9321663019693655, "grad_norm": 0.232421875, "learning_rate": 0.0001745016172398096, "loss": 1.7701, "step": 2982 }, { "epoch": 0.9324788996561425, "grad_norm": 0.2421875, "learning_rate": 0.0001744852252829996, "loss": 1.6054, "step": 2983 }, { "epoch": 0.9327914973429197, "grad_norm": 0.2392578125, "learning_rate": 0.00017446882882942162, "loss": 1.7484, "step": 2984 }, { "epoch": 0.9331040950296968, "grad_norm": 0.2373046875, "learning_rate": 0.00017445242788006552, "loss": 1.6647, "step": 2985 }, { "epoch": 0.9334166927164739, "grad_norm": 0.248046875, "learning_rate": 0.0001744360224359215, "loss": 1.6536, "step": 2986 }, { "epoch": 0.933729290403251, "grad_norm": 0.25, "learning_rate": 0.00017441961249797995, "loss": 1.9033, "step": 2987 }, { "epoch": 0.9340418880900281, "grad_norm": 0.24609375, "learning_rate": 0.00017440319806723157, "loss": 1.5145, "step": 2988 }, { "epoch": 0.9343544857768052, "grad_norm": 0.25390625, "learning_rate": 0.0001743867791446673, "loss": 1.6766, "step": 2989 }, { "epoch": 0.9346670834635824, "grad_norm": 0.232421875, "learning_rate": 0.00017437035573127836, "loss": 1.5665, "step": 2990 }, { "epoch": 0.9349796811503595, "grad_norm": 0.2353515625, "learning_rate": 0.00017435392782805628, "loss": 1.7932, "step": 2991 }, { "epoch": 0.9352922788371366, "grad_norm": 0.2490234375, "learning_rate": 0.00017433749543599287, "loss": 1.595, "step": 2992 }, { "epoch": 0.9356048765239138, "grad_norm": 0.228515625, "learning_rate": 0.00017432105855608008, "loss": 1.7333, "step": 2993 }, { "epoch": 0.9359174742106908, "grad_norm": 0.240234375, "learning_rate": 0.0001743046171893103, "loss": 1.6385, "step": 2994 }, { "epoch": 0.936230071897468, "grad_norm": 0.25390625, "learning_rate": 0.0001742881713366761, "loss": 1.7989, "step": 2995 }, { "epoch": 0.936542669584245, "grad_norm": 0.2353515625, "learning_rate": 0.00017427172099917032, "loss": 1.5065, "step": 2996 }, { "epoch": 0.9368552672710222, "grad_norm": 0.244140625, "learning_rate": 0.0001742552661777861, "loss": 1.6564, "step": 2997 }, { "epoch": 0.9371678649577994, "grad_norm": 0.2373046875, "learning_rate": 0.00017423880687351685, "loss": 1.5779, "step": 2998 }, { "epoch": 0.9374804626445764, "grad_norm": 0.2451171875, "learning_rate": 0.0001742223430873562, "loss": 1.7974, "step": 2999 }, { "epoch": 0.9377930603313536, "grad_norm": 0.234375, "learning_rate": 0.0001742058748202981, "loss": 1.4744, "step": 3000 }, { "epoch": 0.9381056580181306, "grad_norm": 0.236328125, "learning_rate": 0.0001741894020733368, "loss": 1.6008, "step": 3001 }, { "epoch": 0.9384182557049078, "grad_norm": 0.248046875, "learning_rate": 0.00017417292484746676, "loss": 1.5435, "step": 3002 }, { "epoch": 0.9387308533916849, "grad_norm": 0.2470703125, "learning_rate": 0.00017415644314368274, "loss": 1.6641, "step": 3003 }, { "epoch": 0.939043451078462, "grad_norm": 0.244140625, "learning_rate": 0.00017413995696297972, "loss": 1.661, "step": 3004 }, { "epoch": 0.9393560487652391, "grad_norm": 0.248046875, "learning_rate": 0.00017412346630635303, "loss": 1.5462, "step": 3005 }, { "epoch": 0.9396686464520163, "grad_norm": 0.263671875, "learning_rate": 0.00017410697117479823, "loss": 1.7804, "step": 3006 }, { "epoch": 0.9399812441387934, "grad_norm": 0.2333984375, "learning_rate": 0.00017409047156931114, "loss": 1.8893, "step": 3007 }, { "epoch": 0.9402938418255705, "grad_norm": 0.248046875, "learning_rate": 0.00017407396749088787, "loss": 1.5371, "step": 3008 }, { "epoch": 0.9406064395123476, "grad_norm": 0.2392578125, "learning_rate": 0.00017405745894052477, "loss": 1.5866, "step": 3009 }, { "epoch": 0.9409190371991247, "grad_norm": 0.24609375, "learning_rate": 0.00017404094591921853, "loss": 1.5388, "step": 3010 }, { "epoch": 0.9412316348859019, "grad_norm": 0.25390625, "learning_rate": 0.00017402442842796604, "loss": 1.438, "step": 3011 }, { "epoch": 0.9415442325726789, "grad_norm": 0.251953125, "learning_rate": 0.00017400790646776443, "loss": 1.892, "step": 3012 }, { "epoch": 0.9418568302594561, "grad_norm": 0.24609375, "learning_rate": 0.00017399138003961124, "loss": 1.4763, "step": 3013 }, { "epoch": 0.9421694279462332, "grad_norm": 0.25, "learning_rate": 0.0001739748491445041, "loss": 1.6418, "step": 3014 }, { "epoch": 0.9424820256330103, "grad_norm": 0.240234375, "learning_rate": 0.00017395831378344112, "loss": 1.7746, "step": 3015 }, { "epoch": 0.9427946233197875, "grad_norm": 0.236328125, "learning_rate": 0.00017394177395742047, "loss": 1.8002, "step": 3016 }, { "epoch": 0.9431072210065645, "grad_norm": 0.2412109375, "learning_rate": 0.00017392522966744068, "loss": 1.686, "step": 3017 }, { "epoch": 0.9434198186933417, "grad_norm": 0.2392578125, "learning_rate": 0.00017390868091450055, "loss": 1.6964, "step": 3018 }, { "epoch": 0.9437324163801187, "grad_norm": 0.240234375, "learning_rate": 0.00017389212769959922, "loss": 1.656, "step": 3019 }, { "epoch": 0.9440450140668959, "grad_norm": 0.240234375, "learning_rate": 0.00017387557002373596, "loss": 1.6357, "step": 3020 }, { "epoch": 0.944357611753673, "grad_norm": 0.236328125, "learning_rate": 0.00017385900788791038, "loss": 1.8136, "step": 3021 }, { "epoch": 0.9446702094404501, "grad_norm": 0.236328125, "learning_rate": 0.00017384244129312239, "loss": 1.5841, "step": 3022 }, { "epoch": 0.9449828071272273, "grad_norm": 0.23828125, "learning_rate": 0.00017382587024037212, "loss": 1.5595, "step": 3023 }, { "epoch": 0.9452954048140044, "grad_norm": 0.248046875, "learning_rate": 0.00017380929473066, "loss": 1.6447, "step": 3024 }, { "epoch": 0.9456080025007815, "grad_norm": 0.2421875, "learning_rate": 0.00017379271476498665, "loss": 1.6323, "step": 3025 }, { "epoch": 0.9459206001875586, "grad_norm": 0.25, "learning_rate": 0.00017377613034435315, "loss": 1.62, "step": 3026 }, { "epoch": 0.9462331978743357, "grad_norm": 0.2451171875, "learning_rate": 0.00017375954146976058, "loss": 1.4751, "step": 3027 }, { "epoch": 0.9465457955611128, "grad_norm": 0.2431640625, "learning_rate": 0.00017374294814221055, "loss": 2.2368, "step": 3028 }, { "epoch": 0.94685839324789, "grad_norm": 0.2412109375, "learning_rate": 0.00017372635036270472, "loss": 1.7495, "step": 3029 }, { "epoch": 0.9471709909346671, "grad_norm": 0.2373046875, "learning_rate": 0.0001737097481322452, "loss": 1.9299, "step": 3030 }, { "epoch": 0.9474835886214442, "grad_norm": 0.2392578125, "learning_rate": 0.00017369314145183426, "loss": 1.5842, "step": 3031 }, { "epoch": 0.9477961863082213, "grad_norm": 0.240234375, "learning_rate": 0.00017367653032247446, "loss": 1.6439, "step": 3032 }, { "epoch": 0.9481087839949984, "grad_norm": 0.24609375, "learning_rate": 0.0001736599147451686, "loss": 1.7489, "step": 3033 }, { "epoch": 0.9484213816817756, "grad_norm": 0.2431640625, "learning_rate": 0.00017364329472091986, "loss": 1.6981, "step": 3034 }, { "epoch": 0.9487339793685526, "grad_norm": 0.24609375, "learning_rate": 0.0001736266702507316, "loss": 1.98, "step": 3035 }, { "epoch": 0.9490465770553298, "grad_norm": 0.24609375, "learning_rate": 0.0001736100413356074, "loss": 1.5686, "step": 3036 }, { "epoch": 0.949359174742107, "grad_norm": 0.2421875, "learning_rate": 0.00017359340797655116, "loss": 1.6756, "step": 3037 }, { "epoch": 0.949671772428884, "grad_norm": 0.2431640625, "learning_rate": 0.00017357677017456715, "loss": 1.6345, "step": 3038 }, { "epoch": 0.9499843701156612, "grad_norm": 0.2451171875, "learning_rate": 0.00017356012793065976, "loss": 1.6958, "step": 3039 }, { "epoch": 0.9502969678024382, "grad_norm": 0.234375, "learning_rate": 0.0001735434812458337, "loss": 1.6856, "step": 3040 }, { "epoch": 0.9506095654892154, "grad_norm": 0.2412109375, "learning_rate": 0.00017352683012109395, "loss": 1.6888, "step": 3041 }, { "epoch": 0.9509221631759925, "grad_norm": 0.25, "learning_rate": 0.0001735101745574458, "loss": 1.7944, "step": 3042 }, { "epoch": 0.9512347608627696, "grad_norm": 0.244140625, "learning_rate": 0.0001734935145558947, "loss": 1.4633, "step": 3043 }, { "epoch": 0.9515473585495468, "grad_norm": 0.251953125, "learning_rate": 0.0001734768501174465, "loss": 1.5549, "step": 3044 }, { "epoch": 0.9518599562363238, "grad_norm": 0.24609375, "learning_rate": 0.00017346018124310723, "loss": 1.6942, "step": 3045 }, { "epoch": 0.952172553923101, "grad_norm": 0.232421875, "learning_rate": 0.0001734435079338832, "loss": 1.8094, "step": 3046 }, { "epoch": 0.9524851516098781, "grad_norm": 0.244140625, "learning_rate": 0.00017342683019078102, "loss": 1.6422, "step": 3047 }, { "epoch": 0.9527977492966552, "grad_norm": 0.2470703125, "learning_rate": 0.00017341014801480748, "loss": 1.4798, "step": 3048 }, { "epoch": 0.9531103469834323, "grad_norm": 0.2353515625, "learning_rate": 0.0001733934614069698, "loss": 1.6282, "step": 3049 }, { "epoch": 0.9534229446702095, "grad_norm": 0.23828125, "learning_rate": 0.00017337677036827534, "loss": 1.5165, "step": 3050 }, { "epoch": 0.9537355423569865, "grad_norm": 0.248046875, "learning_rate": 0.00017336007489973171, "loss": 1.6635, "step": 3051 }, { "epoch": 0.9540481400437637, "grad_norm": 0.2578125, "learning_rate": 0.00017334337500234687, "loss": 1.7504, "step": 3052 }, { "epoch": 0.9543607377305408, "grad_norm": 0.25, "learning_rate": 0.00017332667067712905, "loss": 1.8412, "step": 3053 }, { "epoch": 0.9546733354173179, "grad_norm": 0.2421875, "learning_rate": 0.0001733099619250867, "loss": 1.616, "step": 3054 }, { "epoch": 0.9549859331040951, "grad_norm": 0.2353515625, "learning_rate": 0.00017329324874722847, "loss": 1.7954, "step": 3055 }, { "epoch": 0.9552985307908721, "grad_norm": 0.2421875, "learning_rate": 0.00017327653114456343, "loss": 1.6591, "step": 3056 }, { "epoch": 0.9556111284776493, "grad_norm": 0.240234375, "learning_rate": 0.00017325980911810085, "loss": 1.6327, "step": 3057 }, { "epoch": 0.9559237261644263, "grad_norm": 0.24609375, "learning_rate": 0.00017324308266885026, "loss": 1.5621, "step": 3058 }, { "epoch": 0.9562363238512035, "grad_norm": 0.2578125, "learning_rate": 0.00017322635179782138, "loss": 2.0408, "step": 3059 }, { "epoch": 0.9565489215379807, "grad_norm": 0.23828125, "learning_rate": 0.00017320961650602436, "loss": 1.5293, "step": 3060 }, { "epoch": 0.9568615192247577, "grad_norm": 0.2373046875, "learning_rate": 0.00017319287679446949, "loss": 1.5787, "step": 3061 }, { "epoch": 0.9571741169115349, "grad_norm": 0.23828125, "learning_rate": 0.0001731761326641674, "loss": 1.6182, "step": 3062 }, { "epoch": 0.957486714598312, "grad_norm": 0.244140625, "learning_rate": 0.0001731593841161289, "loss": 1.6671, "step": 3063 }, { "epoch": 0.9577993122850891, "grad_norm": 0.23828125, "learning_rate": 0.00017314263115136516, "loss": 1.6618, "step": 3064 }, { "epoch": 0.9581119099718662, "grad_norm": 0.25, "learning_rate": 0.00017312587377088756, "loss": 1.6887, "step": 3065 }, { "epoch": 0.9584245076586433, "grad_norm": 0.2490234375, "learning_rate": 0.00017310911197570777, "loss": 1.6217, "step": 3066 }, { "epoch": 0.9587371053454204, "grad_norm": 0.240234375, "learning_rate": 0.00017309234576683778, "loss": 1.7303, "step": 3067 }, { "epoch": 0.9590497030321976, "grad_norm": 0.25, "learning_rate": 0.0001730755751452897, "loss": 1.6497, "step": 3068 }, { "epoch": 0.9593623007189747, "grad_norm": 0.228515625, "learning_rate": 0.000173058800112076, "loss": 1.8203, "step": 3069 }, { "epoch": 0.9596748984057518, "grad_norm": 0.255859375, "learning_rate": 0.00017304202066820948, "loss": 2.1236, "step": 3070 }, { "epoch": 0.9599874960925289, "grad_norm": 0.2392578125, "learning_rate": 0.0001730252368147031, "loss": 1.7534, "step": 3071 }, { "epoch": 0.960300093779306, "grad_norm": 0.25390625, "learning_rate": 0.00017300844855257008, "loss": 1.6816, "step": 3072 }, { "epoch": 0.9606126914660832, "grad_norm": 0.2373046875, "learning_rate": 0.000172991655882824, "loss": 1.5992, "step": 3073 }, { "epoch": 0.9609252891528602, "grad_norm": 0.24609375, "learning_rate": 0.00017297485880647862, "loss": 1.8889, "step": 3074 }, { "epoch": 0.9612378868396374, "grad_norm": 0.240234375, "learning_rate": 0.00017295805732454804, "loss": 1.6511, "step": 3075 }, { "epoch": 0.9615504845264145, "grad_norm": 0.265625, "learning_rate": 0.00017294125143804657, "loss": 1.7686, "step": 3076 }, { "epoch": 0.9618630822131916, "grad_norm": 0.26953125, "learning_rate": 0.0001729244411479888, "loss": 1.7564, "step": 3077 }, { "epoch": 0.9621756798999688, "grad_norm": 0.23046875, "learning_rate": 0.0001729076264553896, "loss": 1.6458, "step": 3078 }, { "epoch": 0.9624882775867458, "grad_norm": 0.2412109375, "learning_rate": 0.00017289080736126409, "loss": 1.698, "step": 3079 }, { "epoch": 0.962800875273523, "grad_norm": 0.25, "learning_rate": 0.00017287398386662764, "loss": 1.684, "step": 3080 }, { "epoch": 0.9631134729603001, "grad_norm": 0.234375, "learning_rate": 0.0001728571559724959, "loss": 1.7003, "step": 3081 }, { "epoch": 0.9634260706470772, "grad_norm": 0.2373046875, "learning_rate": 0.00017284032367988482, "loss": 1.5827, "step": 3082 }, { "epoch": 0.9637386683338544, "grad_norm": 0.240234375, "learning_rate": 0.0001728234869898106, "loss": 1.7952, "step": 3083 }, { "epoch": 0.9640512660206314, "grad_norm": 0.2470703125, "learning_rate": 0.00017280664590328966, "loss": 1.5528, "step": 3084 }, { "epoch": 0.9643638637074086, "grad_norm": 0.25, "learning_rate": 0.0001727898004213387, "loss": 1.8732, "step": 3085 }, { "epoch": 0.9646764613941857, "grad_norm": 0.234375, "learning_rate": 0.00017277295054497478, "loss": 1.5453, "step": 3086 }, { "epoch": 0.9649890590809628, "grad_norm": 0.25, "learning_rate": 0.00017275609627521508, "loss": 1.8652, "step": 3087 }, { "epoch": 0.9653016567677399, "grad_norm": 0.23828125, "learning_rate": 0.00017273923761307712, "loss": 1.5761, "step": 3088 }, { "epoch": 0.965614254454517, "grad_norm": 0.232421875, "learning_rate": 0.00017272237455957868, "loss": 1.3679, "step": 3089 }, { "epoch": 0.9659268521412941, "grad_norm": 0.365234375, "learning_rate": 0.00017270550711573788, "loss": 2.1864, "step": 3090 }, { "epoch": 0.9662394498280713, "grad_norm": 0.25390625, "learning_rate": 0.0001726886352825729, "loss": 1.8203, "step": 3091 }, { "epoch": 0.9665520475148484, "grad_norm": 0.236328125, "learning_rate": 0.0001726717590611024, "loss": 1.6397, "step": 3092 }, { "epoch": 0.9668646452016255, "grad_norm": 0.2412109375, "learning_rate": 0.00017265487845234524, "loss": 1.7298, "step": 3093 }, { "epoch": 0.9671772428884027, "grad_norm": 0.2392578125, "learning_rate": 0.00017263799345732043, "loss": 1.4412, "step": 3094 }, { "epoch": 0.9674898405751797, "grad_norm": 0.2578125, "learning_rate": 0.0001726211040770474, "loss": 1.6235, "step": 3095 }, { "epoch": 0.9678024382619569, "grad_norm": 0.2392578125, "learning_rate": 0.0001726042103125458, "loss": 1.4866, "step": 3096 }, { "epoch": 0.9681150359487339, "grad_norm": 0.25, "learning_rate": 0.0001725873121648355, "loss": 1.8129, "step": 3097 }, { "epoch": 0.9684276336355111, "grad_norm": 0.244140625, "learning_rate": 0.00017257040963493663, "loss": 1.7193, "step": 3098 }, { "epoch": 0.9687402313222883, "grad_norm": 0.2275390625, "learning_rate": 0.00017255350272386968, "loss": 1.6863, "step": 3099 }, { "epoch": 0.9690528290090653, "grad_norm": 0.2353515625, "learning_rate": 0.00017253659143265534, "loss": 1.5868, "step": 3100 }, { "epoch": 0.9693654266958425, "grad_norm": 0.26171875, "learning_rate": 0.00017251967576231448, "loss": 1.9038, "step": 3101 }, { "epoch": 0.9696780243826195, "grad_norm": 0.2490234375, "learning_rate": 0.0001725027557138684, "loss": 1.5963, "step": 3102 }, { "epoch": 0.9699906220693967, "grad_norm": 0.25390625, "learning_rate": 0.0001724858312883386, "loss": 1.9158, "step": 3103 }, { "epoch": 0.9703032197561738, "grad_norm": 0.2431640625, "learning_rate": 0.0001724689024867468, "loss": 1.7879, "step": 3104 }, { "epoch": 0.9706158174429509, "grad_norm": 0.349609375, "learning_rate": 0.00017245196931011495, "loss": 2.2104, "step": 3105 }, { "epoch": 0.970928415129728, "grad_norm": 0.25390625, "learning_rate": 0.00017243503175946542, "loss": 1.3733, "step": 3106 }, { "epoch": 0.9712410128165052, "grad_norm": 0.26171875, "learning_rate": 0.0001724180898358207, "loss": 1.8072, "step": 3107 }, { "epoch": 0.9715536105032823, "grad_norm": 0.25, "learning_rate": 0.00017240114354020368, "loss": 1.6554, "step": 3108 }, { "epoch": 0.9718662081900594, "grad_norm": 0.232421875, "learning_rate": 0.0001723841928736373, "loss": 1.8434, "step": 3109 }, { "epoch": 0.9721788058768365, "grad_norm": 0.251953125, "learning_rate": 0.00017236723783714496, "loss": 1.7078, "step": 3110 }, { "epoch": 0.9724914035636136, "grad_norm": 0.248046875, "learning_rate": 0.00017235027843175027, "loss": 1.4973, "step": 3111 }, { "epoch": 0.9728040012503908, "grad_norm": 0.23828125, "learning_rate": 0.00017233331465847705, "loss": 2.0236, "step": 3112 }, { "epoch": 0.9731165989371678, "grad_norm": 0.248046875, "learning_rate": 0.00017231634651834946, "loss": 1.55, "step": 3113 }, { "epoch": 0.973429196623945, "grad_norm": 0.240234375, "learning_rate": 0.00017229937401239188, "loss": 1.5074, "step": 3114 }, { "epoch": 0.973741794310722, "grad_norm": 0.232421875, "learning_rate": 0.00017228239714162896, "loss": 1.4308, "step": 3115 }, { "epoch": 0.9740543919974992, "grad_norm": 0.2412109375, "learning_rate": 0.00017226541590708566, "loss": 1.8249, "step": 3116 }, { "epoch": 0.9743669896842764, "grad_norm": 0.2451171875, "learning_rate": 0.00017224843030978705, "loss": 1.8337, "step": 3117 }, { "epoch": 0.9746795873710534, "grad_norm": 0.26171875, "learning_rate": 0.00017223144035075864, "loss": 1.7211, "step": 3118 }, { "epoch": 0.9749921850578306, "grad_norm": 0.25, "learning_rate": 0.00017221444603102617, "loss": 1.7391, "step": 3119 }, { "epoch": 0.9753047827446076, "grad_norm": 0.2470703125, "learning_rate": 0.00017219744735161554, "loss": 2.0078, "step": 3120 }, { "epoch": 0.9756173804313848, "grad_norm": 0.2412109375, "learning_rate": 0.000172180444313553, "loss": 1.7833, "step": 3121 }, { "epoch": 0.975929978118162, "grad_norm": 0.251953125, "learning_rate": 0.00017216343691786509, "loss": 1.508, "step": 3122 }, { "epoch": 0.976242575804939, "grad_norm": 0.251953125, "learning_rate": 0.0001721464251655785, "loss": 2.0652, "step": 3123 }, { "epoch": 0.9765551734917162, "grad_norm": 0.2421875, "learning_rate": 0.0001721294090577203, "loss": 1.5267, "step": 3124 }, { "epoch": 0.9768677711784933, "grad_norm": 0.2373046875, "learning_rate": 0.00017211238859531774, "loss": 1.838, "step": 3125 }, { "epoch": 0.9771803688652704, "grad_norm": 0.234375, "learning_rate": 0.00017209536377939846, "loss": 1.7286, "step": 3126 }, { "epoch": 0.9774929665520475, "grad_norm": 0.2412109375, "learning_rate": 0.0001720783346109901, "loss": 1.8045, "step": 3127 }, { "epoch": 0.9778055642388246, "grad_norm": 0.2353515625, "learning_rate": 0.0001720613010911209, "loss": 1.712, "step": 3128 }, { "epoch": 0.9781181619256017, "grad_norm": 0.234375, "learning_rate": 0.0001720442632208191, "loss": 1.5521, "step": 3129 }, { "epoch": 0.9784307596123789, "grad_norm": 0.23828125, "learning_rate": 0.0001720272210011133, "loss": 1.7718, "step": 3130 }, { "epoch": 0.978743357299156, "grad_norm": 0.2578125, "learning_rate": 0.00017201017443303242, "loss": 1.4686, "step": 3131 }, { "epoch": 0.9790559549859331, "grad_norm": 0.251953125, "learning_rate": 0.00017199312351760555, "loss": 1.6478, "step": 3132 }, { "epoch": 0.9793685526727102, "grad_norm": 0.228515625, "learning_rate": 0.00017197606825586204, "loss": 1.4012, "step": 3133 }, { "epoch": 0.9796811503594873, "grad_norm": 0.24609375, "learning_rate": 0.00017195900864883158, "loss": 1.6166, "step": 3134 }, { "epoch": 0.9799937480462645, "grad_norm": 0.255859375, "learning_rate": 0.00017194194469754407, "loss": 1.7632, "step": 3135 }, { "epoch": 0.9803063457330415, "grad_norm": 0.248046875, "learning_rate": 0.00017192487640302969, "loss": 1.497, "step": 3136 }, { "epoch": 0.9806189434198187, "grad_norm": 0.25, "learning_rate": 0.00017190780376631886, "loss": 1.756, "step": 3137 }, { "epoch": 0.9809315411065959, "grad_norm": 0.2451171875, "learning_rate": 0.0001718907267884423, "loss": 1.5489, "step": 3138 }, { "epoch": 0.9812441387933729, "grad_norm": 0.26171875, "learning_rate": 0.00017187364547043091, "loss": 1.5929, "step": 3139 }, { "epoch": 0.9815567364801501, "grad_norm": 0.244140625, "learning_rate": 0.000171856559813316, "loss": 1.7889, "step": 3140 }, { "epoch": 0.9818693341669271, "grad_norm": 0.234375, "learning_rate": 0.00017183946981812897, "loss": 1.4263, "step": 3141 }, { "epoch": 0.9821819318537043, "grad_norm": 0.259765625, "learning_rate": 0.00017182237548590162, "loss": 1.8588, "step": 3142 }, { "epoch": 0.9824945295404814, "grad_norm": 0.2431640625, "learning_rate": 0.00017180527681766593, "loss": 1.7062, "step": 3143 }, { "epoch": 0.9828071272272585, "grad_norm": 0.2373046875, "learning_rate": 0.00017178817381445418, "loss": 1.5145, "step": 3144 }, { "epoch": 0.9831197249140357, "grad_norm": 0.2431640625, "learning_rate": 0.0001717710664772989, "loss": 1.6806, "step": 3145 }, { "epoch": 0.9834323226008127, "grad_norm": 0.2373046875, "learning_rate": 0.00017175395480723286, "loss": 1.9361, "step": 3146 }, { "epoch": 0.9837449202875899, "grad_norm": 0.23828125, "learning_rate": 0.00017173683880528917, "loss": 1.5781, "step": 3147 }, { "epoch": 0.984057517974367, "grad_norm": 0.2451171875, "learning_rate": 0.00017171971847250106, "loss": 1.5337, "step": 3148 }, { "epoch": 0.9843701156611441, "grad_norm": 0.24609375, "learning_rate": 0.00017170259380990216, "loss": 1.8557, "step": 3149 }, { "epoch": 0.9846827133479212, "grad_norm": 0.2470703125, "learning_rate": 0.00017168546481852634, "loss": 1.735, "step": 3150 }, { "epoch": 0.9849953110346984, "grad_norm": 0.234375, "learning_rate": 0.00017166833149940763, "loss": 1.6696, "step": 3151 }, { "epoch": 0.9853079087214754, "grad_norm": 0.2392578125, "learning_rate": 0.00017165119385358045, "loss": 1.5103, "step": 3152 }, { "epoch": 0.9856205064082526, "grad_norm": 0.232421875, "learning_rate": 0.00017163405188207932, "loss": 1.3137, "step": 3153 }, { "epoch": 0.9859331040950297, "grad_norm": 0.3125, "learning_rate": 0.00017161690558593925, "loss": 2.1945, "step": 3154 }, { "epoch": 0.9862457017818068, "grad_norm": 0.251953125, "learning_rate": 0.0001715997549661953, "loss": 1.8129, "step": 3155 }, { "epoch": 0.986558299468584, "grad_norm": 0.2431640625, "learning_rate": 0.00017158260002388294, "loss": 1.7308, "step": 3156 }, { "epoch": 0.986870897155361, "grad_norm": 0.240234375, "learning_rate": 0.00017156544076003778, "loss": 1.7969, "step": 3157 }, { "epoch": 0.9871834948421382, "grad_norm": 0.25, "learning_rate": 0.00017154827717569577, "loss": 1.5541, "step": 3158 }, { "epoch": 0.9874960925289152, "grad_norm": 0.2431640625, "learning_rate": 0.00017153110927189307, "loss": 1.6279, "step": 3159 }, { "epoch": 0.9878086902156924, "grad_norm": 0.2578125, "learning_rate": 0.00017151393704966617, "loss": 1.7777, "step": 3160 }, { "epoch": 0.9881212879024696, "grad_norm": 0.240234375, "learning_rate": 0.00017149676051005176, "loss": 1.7864, "step": 3161 }, { "epoch": 0.9884338855892466, "grad_norm": 0.263671875, "learning_rate": 0.0001714795796540868, "loss": 1.8507, "step": 3162 }, { "epoch": 0.9887464832760238, "grad_norm": 0.2373046875, "learning_rate": 0.00017146239448280853, "loss": 1.5787, "step": 3163 }, { "epoch": 0.9890590809628009, "grad_norm": 0.25, "learning_rate": 0.00017144520499725444, "loss": 1.6532, "step": 3164 }, { "epoch": 0.989371678649578, "grad_norm": 0.265625, "learning_rate": 0.00017142801119846227, "loss": 1.4543, "step": 3165 }, { "epoch": 0.9896842763363551, "grad_norm": 0.240234375, "learning_rate": 0.00017141081308747003, "loss": 1.639, "step": 3166 }, { "epoch": 0.9899968740231322, "grad_norm": 0.26171875, "learning_rate": 0.00017139361066531605, "loss": 1.6788, "step": 3167 }, { "epoch": 0.9903094717099094, "grad_norm": 0.25390625, "learning_rate": 0.00017137640393303878, "loss": 1.5768, "step": 3168 }, { "epoch": 0.9906220693966865, "grad_norm": 0.2294921875, "learning_rate": 0.00017135919289167707, "loss": 1.6102, "step": 3169 }, { "epoch": 0.9909346670834636, "grad_norm": 0.255859375, "learning_rate": 0.00017134197754226996, "loss": 1.5106, "step": 3170 }, { "epoch": 0.9912472647702407, "grad_norm": 0.24609375, "learning_rate": 0.00017132475788585674, "loss": 1.4294, "step": 3171 }, { "epoch": 0.9915598624570178, "grad_norm": 0.2490234375, "learning_rate": 0.00017130753392347698, "loss": 1.552, "step": 3172 }, { "epoch": 0.9918724601437949, "grad_norm": 0.2421875, "learning_rate": 0.00017129030565617053, "loss": 1.4553, "step": 3173 }, { "epoch": 0.9921850578305721, "grad_norm": 0.2333984375, "learning_rate": 0.00017127307308497752, "loss": 1.6594, "step": 3174 }, { "epoch": 0.9924976555173491, "grad_norm": 0.2373046875, "learning_rate": 0.0001712558362109382, "loss": 1.7315, "step": 3175 }, { "epoch": 0.9928102532041263, "grad_norm": 0.248046875, "learning_rate": 0.0001712385950350933, "loss": 1.5794, "step": 3176 }, { "epoch": 0.9931228508909034, "grad_norm": 0.240234375, "learning_rate": 0.0001712213495584836, "loss": 1.7619, "step": 3177 }, { "epoch": 0.9934354485776805, "grad_norm": 0.228515625, "learning_rate": 0.00017120409978215034, "loss": 1.6773, "step": 3178 }, { "epoch": 0.9937480462644577, "grad_norm": 0.2294921875, "learning_rate": 0.00017118684570713476, "loss": 1.5635, "step": 3179 }, { "epoch": 0.9940606439512347, "grad_norm": 0.25390625, "learning_rate": 0.00017116958733447862, "loss": 1.8061, "step": 3180 }, { "epoch": 0.9943732416380119, "grad_norm": 0.2451171875, "learning_rate": 0.00017115232466522379, "loss": 1.496, "step": 3181 }, { "epoch": 0.994685839324789, "grad_norm": 0.283203125, "learning_rate": 0.0001711350577004125, "loss": 1.9932, "step": 3182 }, { "epoch": 0.9949984370115661, "grad_norm": 0.2373046875, "learning_rate": 0.00017111778644108707, "loss": 1.7719, "step": 3183 }, { "epoch": 0.9953110346983433, "grad_norm": 0.234375, "learning_rate": 0.00017110051088829023, "loss": 1.9202, "step": 3184 }, { "epoch": 0.9956236323851203, "grad_norm": 0.2392578125, "learning_rate": 0.000171083231043065, "loss": 1.7274, "step": 3185 }, { "epoch": 0.9959362300718975, "grad_norm": 0.2412109375, "learning_rate": 0.00017106594690645454, "loss": 1.6006, "step": 3186 }, { "epoch": 0.9962488277586746, "grad_norm": 0.25, "learning_rate": 0.00017104865847950224, "loss": 1.8627, "step": 3187 }, { "epoch": 0.9965614254454517, "grad_norm": 0.234375, "learning_rate": 0.00017103136576325194, "loss": 1.6147, "step": 3188 }, { "epoch": 0.9968740231322288, "grad_norm": 0.2412109375, "learning_rate": 0.00017101406875874754, "loss": 1.8255, "step": 3189 }, { "epoch": 0.9971866208190059, "grad_norm": 0.25390625, "learning_rate": 0.0001709967674670333, "loss": 1.6937, "step": 3190 }, { "epoch": 0.997499218505783, "grad_norm": 0.2431640625, "learning_rate": 0.0001709794618891538, "loss": 1.7125, "step": 3191 }, { "epoch": 0.9978118161925602, "grad_norm": 0.2470703125, "learning_rate": 0.0001709621520261537, "loss": 1.7602, "step": 3192 }, { "epoch": 0.9981244138793373, "grad_norm": 0.2412109375, "learning_rate": 0.00017094483787907804, "loss": 1.8293, "step": 3193 }, { "epoch": 0.9984370115661144, "grad_norm": 0.2314453125, "learning_rate": 0.00017092751944897214, "loss": 2.0362, "step": 3194 }, { "epoch": 0.9987496092528916, "grad_norm": 0.25, "learning_rate": 0.00017091019673688148, "loss": 1.8003, "step": 3195 }, { "epoch": 0.9990622069396686, "grad_norm": 0.2353515625, "learning_rate": 0.0001708928697438519, "loss": 1.6969, "step": 3196 }, { "epoch": 0.9993748046264458, "grad_norm": 0.2470703125, "learning_rate": 0.00017087553847092943, "loss": 1.4631, "step": 3197 }, { "epoch": 0.9996874023132228, "grad_norm": 0.248046875, "learning_rate": 0.0001708582029191604, "loss": 2.0063, "step": 3198 }, { "epoch": 1.0, "grad_norm": 0.2255859375, "learning_rate": 0.00017084086308959132, "loss": 1.4657, "step": 3199 }, { "epoch": 1.000312597686777, "grad_norm": 0.24609375, "learning_rate": 0.0001708235189832691, "loss": 1.6879, "step": 3200 }, { "epoch": 1.000312597686777, "eval_loss": 1.5738756656646729, "eval_runtime": 1912.6507, "eval_samples_per_second": 4.777, "eval_steps_per_second": 2.389, "step": 3200 }, { "epoch": 1.0006251953735543, "grad_norm": 0.244140625, "learning_rate": 0.0001708061706012408, "loss": 1.8955, "step": 3201 }, { "epoch": 1.0009377930603314, "grad_norm": 0.232421875, "learning_rate": 0.00017078881794455373, "loss": 1.4155, "step": 3202 }, { "epoch": 1.0012503907471084, "grad_norm": 0.2470703125, "learning_rate": 0.0001707714610142555, "loss": 1.3249, "step": 3203 }, { "epoch": 1.0015629884338857, "grad_norm": 0.24609375, "learning_rate": 0.000170754099811394, "loss": 1.8316, "step": 3204 }, { "epoch": 1.0018755861206627, "grad_norm": 0.265625, "learning_rate": 0.00017073673433701733, "loss": 1.7691, "step": 3205 }, { "epoch": 1.0021881838074398, "grad_norm": 0.25, "learning_rate": 0.00017071936459217386, "loss": 1.7469, "step": 3206 }, { "epoch": 1.0025007814942168, "grad_norm": 0.25390625, "learning_rate": 0.00017070199057791222, "loss": 1.6787, "step": 3207 }, { "epoch": 1.0028133791809941, "grad_norm": 0.234375, "learning_rate": 0.00017068461229528134, "loss": 1.601, "step": 3208 }, { "epoch": 1.0031259768677712, "grad_norm": 0.2412109375, "learning_rate": 0.00017066722974533032, "loss": 1.7489, "step": 3209 }, { "epoch": 1.0034385745545482, "grad_norm": 0.2451171875, "learning_rate": 0.0001706498429291086, "loss": 1.5783, "step": 3210 }, { "epoch": 1.0037511722413255, "grad_norm": 0.2451171875, "learning_rate": 0.00017063245184766585, "loss": 1.6569, "step": 3211 }, { "epoch": 1.0040637699281025, "grad_norm": 0.2373046875, "learning_rate": 0.00017061505650205194, "loss": 1.6837, "step": 3212 }, { "epoch": 1.0043763676148796, "grad_norm": 0.2392578125, "learning_rate": 0.0001705976568933171, "loss": 1.6423, "step": 3213 }, { "epoch": 1.0046889653016569, "grad_norm": 0.2451171875, "learning_rate": 0.00017058025302251175, "loss": 1.5818, "step": 3214 }, { "epoch": 1.005001562988434, "grad_norm": 0.2392578125, "learning_rate": 0.0001705628448906866, "loss": 1.4841, "step": 3215 }, { "epoch": 1.005314160675211, "grad_norm": 0.25390625, "learning_rate": 0.00017054543249889258, "loss": 1.5093, "step": 3216 }, { "epoch": 1.0056267583619882, "grad_norm": 0.2392578125, "learning_rate": 0.00017052801584818085, "loss": 1.6689, "step": 3217 }, { "epoch": 1.0059393560487653, "grad_norm": 0.2421875, "learning_rate": 0.00017051059493960297, "loss": 1.5929, "step": 3218 }, { "epoch": 1.0062519537355423, "grad_norm": 0.2451171875, "learning_rate": 0.0001704931697742106, "loss": 1.7321, "step": 3219 }, { "epoch": 1.0065645514223194, "grad_norm": 0.2392578125, "learning_rate": 0.00017047574035305578, "loss": 1.7554, "step": 3220 }, { "epoch": 1.0068771491090966, "grad_norm": 0.2314453125, "learning_rate": 0.00017045830667719068, "loss": 1.5192, "step": 3221 }, { "epoch": 1.0071897467958737, "grad_norm": 0.23046875, "learning_rate": 0.00017044086874766783, "loss": 1.5958, "step": 3222 }, { "epoch": 1.0075023444826507, "grad_norm": 0.24609375, "learning_rate": 0.00017042342656553995, "loss": 1.6242, "step": 3223 }, { "epoch": 1.007814942169428, "grad_norm": 0.2421875, "learning_rate": 0.00017040598013186008, "loss": 1.8164, "step": 3224 }, { "epoch": 1.008127539856205, "grad_norm": 0.251953125, "learning_rate": 0.00017038852944768152, "loss": 1.5937, "step": 3225 }, { "epoch": 1.0084401375429821, "grad_norm": 0.2451171875, "learning_rate": 0.00017037107451405768, "loss": 1.7428, "step": 3226 }, { "epoch": 1.0087527352297594, "grad_norm": 0.263671875, "learning_rate": 0.00017035361533204239, "loss": 1.6019, "step": 3227 }, { "epoch": 1.0090653329165364, "grad_norm": 0.24609375, "learning_rate": 0.00017033615190268972, "loss": 1.5994, "step": 3228 }, { "epoch": 1.0093779306033135, "grad_norm": 0.240234375, "learning_rate": 0.00017031868422705393, "loss": 1.6935, "step": 3229 }, { "epoch": 1.0096905282900908, "grad_norm": 0.2333984375, "learning_rate": 0.00017030121230618954, "loss": 1.4656, "step": 3230 }, { "epoch": 1.0100031259768678, "grad_norm": 0.244140625, "learning_rate": 0.0001702837361411514, "loss": 1.755, "step": 3231 }, { "epoch": 1.0103157236636449, "grad_norm": 0.23828125, "learning_rate": 0.00017026625573299454, "loss": 1.8469, "step": 3232 }, { "epoch": 1.010628321350422, "grad_norm": 0.2451171875, "learning_rate": 0.00017024877108277425, "loss": 1.6266, "step": 3233 }, { "epoch": 1.0109409190371992, "grad_norm": 0.25390625, "learning_rate": 0.00017023128219154616, "loss": 1.7379, "step": 3234 }, { "epoch": 1.0112535167239762, "grad_norm": 0.25, "learning_rate": 0.0001702137890603661, "loss": 1.6069, "step": 3235 }, { "epoch": 1.000312597686777, "grad_norm": 0.232421875, "learning_rate": 0.00017019629169029007, "loss": 1.7767, "step": 3236 }, { "epoch": 1.0006251953735543, "grad_norm": 0.251953125, "learning_rate": 0.0001701787900823745, "loss": 1.8416, "step": 3237 }, { "epoch": 1.0009377930603314, "grad_norm": 0.25, "learning_rate": 0.0001701612842376759, "loss": 1.3778, "step": 3238 }, { "epoch": 1.0012503907471084, "grad_norm": 0.2421875, "learning_rate": 0.00017014377415725118, "loss": 1.7909, "step": 3239 }, { "epoch": 1.0015629884338857, "grad_norm": 0.25, "learning_rate": 0.0001701262598421574, "loss": 1.6575, "step": 3240 }, { "epoch": 1.0018755861206627, "grad_norm": 0.2353515625, "learning_rate": 0.000170108741293452, "loss": 1.8209, "step": 3241 }, { "epoch": 1.0021881838074398, "grad_norm": 0.26953125, "learning_rate": 0.00017009121851219253, "loss": 1.6912, "step": 3242 }, { "epoch": 1.0025007814942168, "grad_norm": 0.263671875, "learning_rate": 0.0001700736914994369, "loss": 1.5038, "step": 3243 }, { "epoch": 1.0028133791809941, "grad_norm": 0.259765625, "learning_rate": 0.00017005616025624317, "loss": 1.364, "step": 3244 }, { "epoch": 1.0031259768677712, "grad_norm": 0.25390625, "learning_rate": 0.0001700386247836698, "loss": 1.8364, "step": 3245 }, { "epoch": 1.0034385745545482, "grad_norm": 0.265625, "learning_rate": 0.00017002108508277542, "loss": 1.6403, "step": 3246 }, { "epoch": 1.0037511722413255, "grad_norm": 0.259765625, "learning_rate": 0.00017000354115461887, "loss": 1.8994, "step": 3247 }, { "epoch": 1.0040637699281025, "grad_norm": 0.24609375, "learning_rate": 0.00016998599300025938, "loss": 1.6125, "step": 3248 }, { "epoch": 1.0043763676148796, "grad_norm": 0.267578125, "learning_rate": 0.00016996844062075624, "loss": 1.7269, "step": 3249 }, { "epoch": 1.0046889653016569, "grad_norm": 0.25390625, "learning_rate": 0.00016995088401716924, "loss": 1.4252, "step": 3250 }, { "epoch": 1.005001562988434, "grad_norm": 0.248046875, "learning_rate": 0.00016993332319055818, "loss": 1.5086, "step": 3251 }, { "epoch": 1.005314160675211, "grad_norm": 0.26171875, "learning_rate": 0.00016991575814198333, "loss": 1.702, "step": 3252 }, { "epoch": 1.0056267583619882, "grad_norm": 0.2451171875, "learning_rate": 0.00016989818887250504, "loss": 1.635, "step": 3253 }, { "epoch": 1.0059393560487653, "grad_norm": 0.25, "learning_rate": 0.00016988061538318402, "loss": 1.7163, "step": 3254 }, { "epoch": 1.0062519537355423, "grad_norm": 0.23828125, "learning_rate": 0.0001698630376750812, "loss": 1.5299, "step": 3255 }, { "epoch": 1.0065645514223194, "grad_norm": 0.2490234375, "learning_rate": 0.0001698454557492578, "loss": 2.0135, "step": 3256 }, { "epoch": 1.0068771491090966, "grad_norm": 0.2490234375, "learning_rate": 0.00016982786960677518, "loss": 1.4811, "step": 3257 }, { "epoch": 1.0071897467958737, "grad_norm": 0.36328125, "learning_rate": 0.0001698102792486951, "loss": 2.2739, "step": 3258 }, { "epoch": 1.0075023444826507, "grad_norm": 0.251953125, "learning_rate": 0.00016979268467607952, "loss": 1.3993, "step": 3259 }, { "epoch": 1.007814942169428, "grad_norm": 0.2431640625, "learning_rate": 0.00016977508588999062, "loss": 1.4281, "step": 3260 }, { "epoch": 1.008127539856205, "grad_norm": 0.255859375, "learning_rate": 0.00016975748289149088, "loss": 1.4793, "step": 3261 }, { "epoch": 1.0084401375429821, "grad_norm": 0.26171875, "learning_rate": 0.00016973987568164297, "loss": 1.8675, "step": 3262 }, { "epoch": 1.0087527352297594, "grad_norm": 0.2490234375, "learning_rate": 0.00016972226426150994, "loss": 1.5013, "step": 3263 }, { "epoch": 1.0090653329165364, "grad_norm": 0.255859375, "learning_rate": 0.00016970464863215495, "loss": 1.6938, "step": 3264 }, { "epoch": 1.0093779306033135, "grad_norm": 0.255859375, "learning_rate": 0.0001696870287946415, "loss": 1.4364, "step": 3265 }, { "epoch": 1.0096905282900908, "grad_norm": 0.279296875, "learning_rate": 0.0001696694047500333, "loss": 1.6278, "step": 3266 }, { "epoch": 1.0100031259768678, "grad_norm": 0.263671875, "learning_rate": 0.0001696517764993944, "loss": 2.03, "step": 3267 }, { "epoch": 1.0103157236636449, "grad_norm": 0.263671875, "learning_rate": 0.0001696341440437889, "loss": 1.6937, "step": 3268 }, { "epoch": 1.010628321350422, "grad_norm": 0.2578125, "learning_rate": 0.00016961650738428146, "loss": 1.6141, "step": 3269 }, { "epoch": 1.0109409190371992, "grad_norm": 0.259765625, "learning_rate": 0.00016959886652193678, "loss": 1.5632, "step": 3270 }, { "epoch": 1.0112535167239762, "grad_norm": 0.25390625, "learning_rate": 0.0001695812214578198, "loss": 1.8156, "step": 3271 }, { "epoch": 1.0115661144107533, "grad_norm": 0.2353515625, "learning_rate": 0.00016956357219299583, "loss": 1.5758, "step": 3272 }, { "epoch": 1.0118787120975306, "grad_norm": 0.25, "learning_rate": 0.00016954591872853035, "loss": 1.3459, "step": 3273 }, { "epoch": 1.0121913097843076, "grad_norm": 0.244140625, "learning_rate": 0.0001695282610654891, "loss": 1.4463, "step": 3274 }, { "epoch": 1.0125039074710847, "grad_norm": 0.25390625, "learning_rate": 0.00016951059920493816, "loss": 1.4842, "step": 3275 }, { "epoch": 1.012816505157862, "grad_norm": 0.2734375, "learning_rate": 0.0001694929331479438, "loss": 1.8982, "step": 3276 }, { "epoch": 1.013129102844639, "grad_norm": 0.2421875, "learning_rate": 0.00016947526289557246, "loss": 1.5836, "step": 3277 }, { "epoch": 1.013441700531416, "grad_norm": 0.263671875, "learning_rate": 0.000169457588448891, "loss": 1.659, "step": 3278 }, { "epoch": 1.0137542982181933, "grad_norm": 0.2578125, "learning_rate": 0.00016943990980896638, "loss": 1.8123, "step": 3279 }, { "epoch": 1.0140668959049703, "grad_norm": 0.275390625, "learning_rate": 0.00016942222697686595, "loss": 1.9833, "step": 3280 }, { "epoch": 1.0143794935917474, "grad_norm": 0.2734375, "learning_rate": 0.0001694045399536572, "loss": 1.414, "step": 3281 }, { "epoch": 1.0146920912785244, "grad_norm": 0.26171875, "learning_rate": 0.00016938684874040792, "loss": 1.8165, "step": 3282 }, { "epoch": 1.0150046889653017, "grad_norm": 0.23828125, "learning_rate": 0.00016936915333818617, "loss": 1.5251, "step": 3283 }, { "epoch": 1.0153172866520788, "grad_norm": 0.265625, "learning_rate": 0.00016935145374806024, "loss": 1.4809, "step": 3284 }, { "epoch": 1.0156298843388558, "grad_norm": 0.2490234375, "learning_rate": 0.00016933374997109865, "loss": 1.3905, "step": 3285 }, { "epoch": 1.015942482025633, "grad_norm": 0.251953125, "learning_rate": 0.00016931604200837023, "loss": 1.7941, "step": 3286 }, { "epoch": 1.0162550797124101, "grad_norm": 0.2470703125, "learning_rate": 0.00016929832986094402, "loss": 1.547, "step": 3287 }, { "epoch": 1.0165676773991872, "grad_norm": 0.24609375, "learning_rate": 0.00016928061352988936, "loss": 1.6516, "step": 3288 }, { "epoch": 1.0168802750859645, "grad_norm": 0.2373046875, "learning_rate": 0.00016926289301627575, "loss": 1.6519, "step": 3289 }, { "epoch": 1.0171928727727415, "grad_norm": 0.255859375, "learning_rate": 0.000169245168321173, "loss": 1.6238, "step": 3290 }, { "epoch": 1.0175054704595186, "grad_norm": 0.26171875, "learning_rate": 0.00016922743944565124, "loss": 1.6475, "step": 3291 }, { "epoch": 1.0178180681462958, "grad_norm": 0.25390625, "learning_rate": 0.00016920970639078075, "loss": 1.7816, "step": 3292 }, { "epoch": 1.0181306658330729, "grad_norm": 0.259765625, "learning_rate": 0.00016919196915763206, "loss": 1.612, "step": 3293 }, { "epoch": 1.01844326351985, "grad_norm": 0.275390625, "learning_rate": 0.00016917422774727602, "loss": 1.6574, "step": 3294 }, { "epoch": 1.018755861206627, "grad_norm": 0.26953125, "learning_rate": 0.00016915648216078374, "loss": 1.698, "step": 3295 }, { "epoch": 1.0190684588934042, "grad_norm": 0.25, "learning_rate": 0.0001691387323992265, "loss": 1.3921, "step": 3296 }, { "epoch": 1.0193810565801813, "grad_norm": 0.259765625, "learning_rate": 0.00016912097846367584, "loss": 1.5295, "step": 3297 }, { "epoch": 1.0196936542669583, "grad_norm": 0.240234375, "learning_rate": 0.00016910322035520363, "loss": 1.5175, "step": 3298 }, { "epoch": 1.0200062519537356, "grad_norm": 0.2451171875, "learning_rate": 0.00016908545807488195, "loss": 1.8336, "step": 3299 }, { "epoch": 1.0203188496405127, "grad_norm": 0.25, "learning_rate": 0.00016906769162378316, "loss": 1.6812, "step": 3300 }, { "epoch": 1.0206314473272897, "grad_norm": 0.251953125, "learning_rate": 0.0001690499210029798, "loss": 1.4225, "step": 3301 }, { "epoch": 1.020944045014067, "grad_norm": 0.26171875, "learning_rate": 0.00016903214621354472, "loss": 1.5756, "step": 3302 }, { "epoch": 1.021256642700844, "grad_norm": 0.2578125, "learning_rate": 0.000169014367256551, "loss": 1.7286, "step": 3303 }, { "epoch": 1.021569240387621, "grad_norm": 0.255859375, "learning_rate": 0.00016899658413307197, "loss": 1.462, "step": 3304 }, { "epoch": 1.0218818380743981, "grad_norm": 0.25, "learning_rate": 0.00016897879684418126, "loss": 1.6298, "step": 3305 }, { "epoch": 1.0221944357611754, "grad_norm": 0.2490234375, "learning_rate": 0.00016896100539095266, "loss": 1.5951, "step": 3306 }, { "epoch": 1.0225070334479525, "grad_norm": 0.25, "learning_rate": 0.00016894320977446032, "loss": 1.6417, "step": 3307 }, { "epoch": 1.0228196311347295, "grad_norm": 0.25390625, "learning_rate": 0.0001689254099957785, "loss": 1.5105, "step": 3308 }, { "epoch": 1.0231322288215068, "grad_norm": 0.259765625, "learning_rate": 0.0001689076060559819, "loss": 1.6498, "step": 3309 }, { "epoch": 1.0234448265082838, "grad_norm": 0.3203125, "learning_rate": 0.00016888979795614525, "loss": 2.3866, "step": 3310 }, { "epoch": 1.0237574241950609, "grad_norm": 0.2421875, "learning_rate": 0.00016887198569734375, "loss": 1.6526, "step": 3311 }, { "epoch": 1.0240700218818382, "grad_norm": 0.251953125, "learning_rate": 0.00016885416928065272, "loss": 1.6504, "step": 3312 }, { "epoch": 1.0243826195686152, "grad_norm": 0.26953125, "learning_rate": 0.00016883634870714772, "loss": 1.5203, "step": 3313 }, { "epoch": 1.0246952172553923, "grad_norm": 0.2578125, "learning_rate": 0.00016881852397790465, "loss": 1.3883, "step": 3314 }, { "epoch": 1.0250078149421695, "grad_norm": 0.24609375, "learning_rate": 0.0001688006950939996, "loss": 1.419, "step": 3315 }, { "epoch": 1.0253204126289466, "grad_norm": 0.30859375, "learning_rate": 0.00016878286205650888, "loss": 2.3922, "step": 3316 }, { "epoch": 1.0256330103157236, "grad_norm": 0.251953125, "learning_rate": 0.00016876502486650914, "loss": 1.4724, "step": 3317 }, { "epoch": 1.0259456080025007, "grad_norm": 0.25390625, "learning_rate": 0.0001687471835250772, "loss": 1.541, "step": 3318 }, { "epoch": 1.026258205689278, "grad_norm": 0.25390625, "learning_rate": 0.00016872933803329025, "loss": 1.6618, "step": 3319 }, { "epoch": 1.026570803376055, "grad_norm": 0.251953125, "learning_rate": 0.00016871148839222552, "loss": 1.8275, "step": 3320 }, { "epoch": 1.026883401062832, "grad_norm": 0.2578125, "learning_rate": 0.0001686936346029607, "loss": 1.7057, "step": 3321 }, { "epoch": 1.0271959987496093, "grad_norm": 0.259765625, "learning_rate": 0.00016867577666657363, "loss": 1.5628, "step": 3322 }, { "epoch": 1.0275085964363864, "grad_norm": 0.255859375, "learning_rate": 0.0001686579145841424, "loss": 1.5199, "step": 3323 }, { "epoch": 1.0278211941231634, "grad_norm": 0.255859375, "learning_rate": 0.00016864004835674535, "loss": 1.1948, "step": 3324 }, { "epoch": 1.0281337918099407, "grad_norm": 0.2451171875, "learning_rate": 0.00016862217798546117, "loss": 1.569, "step": 3325 }, { "epoch": 1.0284463894967177, "grad_norm": 0.26171875, "learning_rate": 0.0001686043034713686, "loss": 1.5452, "step": 3326 }, { "epoch": 1.0287589871834948, "grad_norm": 0.255859375, "learning_rate": 0.00016858642481554684, "loss": 1.5146, "step": 3327 }, { "epoch": 1.029071584870272, "grad_norm": 0.24609375, "learning_rate": 0.0001685685420190752, "loss": 1.7647, "step": 3328 }, { "epoch": 1.029384182557049, "grad_norm": 0.26171875, "learning_rate": 0.0001685506550830333, "loss": 1.705, "step": 3329 }, { "epoch": 1.0296967802438262, "grad_norm": 0.25, "learning_rate": 0.000168532764008501, "loss": 1.7709, "step": 3330 }, { "epoch": 1.0300093779306032, "grad_norm": 0.263671875, "learning_rate": 0.0001685148687965584, "loss": 1.3867, "step": 3331 }, { "epoch": 1.0303219756173805, "grad_norm": 0.2578125, "learning_rate": 0.00016849696944828586, "loss": 1.5167, "step": 3332 }, { "epoch": 1.0306345733041575, "grad_norm": 0.26953125, "learning_rate": 0.000168479065964764, "loss": 1.7646, "step": 3333 }, { "epoch": 1.0309471709909346, "grad_norm": 0.279296875, "learning_rate": 0.00016846115834707367, "loss": 1.7008, "step": 3334 }, { "epoch": 1.0312597686777119, "grad_norm": 0.2578125, "learning_rate": 0.00016844324659629595, "loss": 1.3537, "step": 3335 }, { "epoch": 1.031572366364489, "grad_norm": 0.24609375, "learning_rate": 0.00016842533071351223, "loss": 1.2803, "step": 3336 }, { "epoch": 1.031884964051266, "grad_norm": 0.26171875, "learning_rate": 0.00016840741069980408, "loss": 1.6595, "step": 3337 }, { "epoch": 1.0321975617380432, "grad_norm": 0.263671875, "learning_rate": 0.00016838948655625337, "loss": 2.0696, "step": 3338 }, { "epoch": 1.0325101594248203, "grad_norm": 0.240234375, "learning_rate": 0.00016837155828394225, "loss": 1.5362, "step": 3339 }, { "epoch": 1.0328227571115973, "grad_norm": 0.2470703125, "learning_rate": 0.000168353625883953, "loss": 1.4373, "step": 3340 }, { "epoch": 1.0331353547983746, "grad_norm": 0.2431640625, "learning_rate": 0.00016833568935736826, "loss": 1.2943, "step": 3341 }, { "epoch": 1.0334479524851516, "grad_norm": 0.271484375, "learning_rate": 0.00016831774870527087, "loss": 1.6662, "step": 3342 }, { "epoch": 1.0337605501719287, "grad_norm": 0.2412109375, "learning_rate": 0.00016829980392874392, "loss": 1.5353, "step": 3343 }, { "epoch": 1.0340731478587057, "grad_norm": 0.25390625, "learning_rate": 0.00016828185502887078, "loss": 1.6163, "step": 3344 }, { "epoch": 1.034385745545483, "grad_norm": 0.25390625, "learning_rate": 0.00016826390200673502, "loss": 1.3735, "step": 3345 }, { "epoch": 1.03469834323226, "grad_norm": 0.255859375, "learning_rate": 0.00016824594486342052, "loss": 1.6119, "step": 3346 }, { "epoch": 1.0350109409190371, "grad_norm": 0.255859375, "learning_rate": 0.00016822798360001138, "loss": 1.8152, "step": 3347 }, { "epoch": 1.0353235386058144, "grad_norm": 0.2578125, "learning_rate": 0.00016821001821759192, "loss": 1.61, "step": 3348 }, { "epoch": 1.0356361362925914, "grad_norm": 0.2490234375, "learning_rate": 0.0001681920487172467, "loss": 1.6514, "step": 3349 }, { "epoch": 1.0359487339793685, "grad_norm": 0.24609375, "learning_rate": 0.00016817407510006066, "loss": 1.6709, "step": 3350 }, { "epoch": 1.0362613316661458, "grad_norm": 0.265625, "learning_rate": 0.00016815609736711882, "loss": 1.7892, "step": 3351 }, { "epoch": 1.0365739293529228, "grad_norm": 0.26171875, "learning_rate": 0.0001681381155195065, "loss": 1.4647, "step": 3352 }, { "epoch": 1.0368865270396999, "grad_norm": 0.2490234375, "learning_rate": 0.00016812012955830935, "loss": 1.5796, "step": 3353 }, { "epoch": 1.0371991247264771, "grad_norm": 0.255859375, "learning_rate": 0.00016810213948461315, "loss": 1.525, "step": 3354 }, { "epoch": 1.0375117224132542, "grad_norm": 0.2578125, "learning_rate": 0.000168084145299504, "loss": 1.6335, "step": 3355 }, { "epoch": 1.0378243201000312, "grad_norm": 0.26171875, "learning_rate": 0.00016806614700406826, "loss": 1.5538, "step": 3356 }, { "epoch": 1.0381369177868083, "grad_norm": 0.24609375, "learning_rate": 0.00016804814459939248, "loss": 1.7529, "step": 3357 }, { "epoch": 1.0384495154735855, "grad_norm": 0.25390625, "learning_rate": 0.00016803013808656348, "loss": 1.9922, "step": 3358 }, { "epoch": 1.0387621131603626, "grad_norm": 0.2734375, "learning_rate": 0.00016801212746666834, "loss": 1.807, "step": 3359 }, { "epoch": 1.0390747108471396, "grad_norm": 0.2578125, "learning_rate": 0.00016799411274079446, "loss": 1.7129, "step": 3360 }, { "epoch": 1.039387308533917, "grad_norm": 0.255859375, "learning_rate": 0.00016797609391002932, "loss": 1.6807, "step": 3361 }, { "epoch": 1.039699906220694, "grad_norm": 0.255859375, "learning_rate": 0.00016795807097546073, "loss": 1.5031, "step": 3362 }, { "epoch": 1.040012503907471, "grad_norm": 0.2470703125, "learning_rate": 0.00016794004393817682, "loss": 1.4893, "step": 3363 }, { "epoch": 1.0403251015942483, "grad_norm": 0.248046875, "learning_rate": 0.00016792201279926586, "loss": 1.8596, "step": 3364 }, { "epoch": 1.0406376992810253, "grad_norm": 0.25390625, "learning_rate": 0.0001679039775598165, "loss": 1.5344, "step": 3365 }, { "epoch": 1.0409502969678024, "grad_norm": 0.27734375, "learning_rate": 0.00016788593822091743, "loss": 1.5466, "step": 3366 }, { "epoch": 1.0412628946545797, "grad_norm": 0.265625, "learning_rate": 0.00016786789478365777, "loss": 1.5067, "step": 3367 }, { "epoch": 1.0415754923413567, "grad_norm": 0.25, "learning_rate": 0.0001678498472491268, "loss": 1.5952, "step": 3368 }, { "epoch": 1.0418880900281338, "grad_norm": 0.255859375, "learning_rate": 0.0001678317956184141, "loss": 1.3462, "step": 3369 }, { "epoch": 1.0422006877149108, "grad_norm": 0.265625, "learning_rate": 0.0001678137398926095, "loss": 1.9309, "step": 3370 }, { "epoch": 1.042513285401688, "grad_norm": 0.2431640625, "learning_rate": 0.00016779568007280294, "loss": 1.3741, "step": 3371 }, { "epoch": 1.0428258830884651, "grad_norm": 0.255859375, "learning_rate": 0.00016777761616008482, "loss": 1.6094, "step": 3372 }, { "epoch": 1.0431384807752422, "grad_norm": 0.25390625, "learning_rate": 0.00016775954815554563, "loss": 1.5275, "step": 3373 }, { "epoch": 1.0434510784620195, "grad_norm": 0.259765625, "learning_rate": 0.0001677414760602762, "loss": 1.6487, "step": 3374 }, { "epoch": 1.0437636761487965, "grad_norm": 0.2470703125, "learning_rate": 0.0001677233998753675, "loss": 1.7877, "step": 3375 }, { "epoch": 1.0440762738355736, "grad_norm": 0.259765625, "learning_rate": 0.00016770531960191086, "loss": 1.8232, "step": 3376 }, { "epoch": 1.0443888715223508, "grad_norm": 0.251953125, "learning_rate": 0.00016768723524099782, "loss": 1.5395, "step": 3377 }, { "epoch": 1.0447014692091279, "grad_norm": 0.251953125, "learning_rate": 0.0001676691467937201, "loss": 1.5682, "step": 3378 }, { "epoch": 1.045014066895905, "grad_norm": 0.25390625, "learning_rate": 0.00016765105426116977, "loss": 1.9006, "step": 3379 }, { "epoch": 1.045326664582682, "grad_norm": 0.2490234375, "learning_rate": 0.0001676329576444391, "loss": 1.7142, "step": 3380 }, { "epoch": 1.0456392622694592, "grad_norm": 0.26171875, "learning_rate": 0.00016761485694462058, "loss": 1.502, "step": 3381 }, { "epoch": 1.0459518599562363, "grad_norm": 0.25, "learning_rate": 0.00016759675216280697, "loss": 1.5799, "step": 3382 }, { "epoch": 1.0462644576430133, "grad_norm": 0.283203125, "learning_rate": 0.00016757864330009132, "loss": 1.3851, "step": 3383 }, { "epoch": 1.0465770553297906, "grad_norm": 0.2451171875, "learning_rate": 0.0001675605303575668, "loss": 1.747, "step": 3384 }, { "epoch": 1.0468896530165677, "grad_norm": 0.26171875, "learning_rate": 0.00016754241333632704, "loss": 1.5268, "step": 3385 }, { "epoch": 1.0472022507033447, "grad_norm": 0.25390625, "learning_rate": 0.0001675242922374657, "loss": 1.8177, "step": 3386 }, { "epoch": 1.047514848390122, "grad_norm": 0.23828125, "learning_rate": 0.00016750616706207678, "loss": 1.841, "step": 3387 }, { "epoch": 1.047827446076899, "grad_norm": 0.25390625, "learning_rate": 0.00016748803781125454, "loss": 1.6151, "step": 3388 }, { "epoch": 1.048140043763676, "grad_norm": 0.255859375, "learning_rate": 0.00016746990448609342, "loss": 1.5231, "step": 3389 }, { "epoch": 1.0484526414504534, "grad_norm": 0.26171875, "learning_rate": 0.00016745176708768823, "loss": 1.5842, "step": 3390 }, { "epoch": 1.0487652391372304, "grad_norm": 0.263671875, "learning_rate": 0.00016743362561713387, "loss": 1.5161, "step": 3391 }, { "epoch": 1.0490778368240075, "grad_norm": 0.25390625, "learning_rate": 0.00016741548007552566, "loss": 1.5454, "step": 3392 }, { "epoch": 1.0493904345107845, "grad_norm": 0.271484375, "learning_rate": 0.00016739733046395894, "loss": 1.5117, "step": 3393 }, { "epoch": 1.0497030321975618, "grad_norm": 0.26171875, "learning_rate": 0.00016737917678352954, "loss": 1.7243, "step": 3394 }, { "epoch": 1.0500156298843388, "grad_norm": 0.265625, "learning_rate": 0.00016736101903533335, "loss": 1.7013, "step": 3395 }, { "epoch": 1.0503282275711159, "grad_norm": 0.267578125, "learning_rate": 0.00016734285722046663, "loss": 1.6861, "step": 3396 }, { "epoch": 1.0506408252578932, "grad_norm": 0.244140625, "learning_rate": 0.00016732469134002578, "loss": 1.5176, "step": 3397 }, { "epoch": 1.0509534229446702, "grad_norm": 0.2734375, "learning_rate": 0.00016730652139510752, "loss": 1.4484, "step": 3398 }, { "epoch": 1.0512660206314473, "grad_norm": 0.25390625, "learning_rate": 0.0001672883473868088, "loss": 1.6714, "step": 3399 }, { "epoch": 1.0515786183182245, "grad_norm": 0.2578125, "learning_rate": 0.0001672701693162268, "loss": 1.7261, "step": 3400 }, { "epoch": 1.0518912160050016, "grad_norm": 0.263671875, "learning_rate": 0.00016725198718445898, "loss": 1.7724, "step": 3401 }, { "epoch": 1.0522038136917786, "grad_norm": 0.26171875, "learning_rate": 0.00016723380099260295, "loss": 1.6345, "step": 3402 }, { "epoch": 1.052516411378556, "grad_norm": 0.265625, "learning_rate": 0.00016721561074175672, "loss": 1.7645, "step": 3403 }, { "epoch": 1.052829009065333, "grad_norm": 0.251953125, "learning_rate": 0.0001671974164330184, "loss": 1.4015, "step": 3404 }, { "epoch": 1.05314160675211, "grad_norm": 0.2490234375, "learning_rate": 0.0001671792180674864, "loss": 1.6586, "step": 3405 }, { "epoch": 1.053454204438887, "grad_norm": 0.25, "learning_rate": 0.0001671610156462594, "loss": 1.7158, "step": 3406 }, { "epoch": 1.0537668021256643, "grad_norm": 0.25, "learning_rate": 0.0001671428091704363, "loss": 1.615, "step": 3407 }, { "epoch": 1.0540793998124414, "grad_norm": 0.2578125, "learning_rate": 0.0001671245986411163, "loss": 1.9367, "step": 3408 }, { "epoch": 1.0543919974992184, "grad_norm": 0.25390625, "learning_rate": 0.00016710638405939866, "loss": 1.6939, "step": 3409 }, { "epoch": 1.0547045951859957, "grad_norm": 0.25390625, "learning_rate": 0.00016708816542638317, "loss": 1.4502, "step": 3410 }, { "epoch": 1.0550171928727727, "grad_norm": 0.265625, "learning_rate": 0.00016706994274316963, "loss": 1.4356, "step": 3411 }, { "epoch": 1.0553297905595498, "grad_norm": 0.255859375, "learning_rate": 0.00016705171601085814, "loss": 1.672, "step": 3412 }, { "epoch": 1.055642388246327, "grad_norm": 0.263671875, "learning_rate": 0.00016703348523054915, "loss": 1.6083, "step": 3413 }, { "epoch": 1.055954985933104, "grad_norm": 0.26953125, "learning_rate": 0.00016701525040334323, "loss": 1.3438, "step": 3414 }, { "epoch": 1.0562675836198812, "grad_norm": 0.2470703125, "learning_rate": 0.00016699701153034122, "loss": 1.5055, "step": 3415 }, { "epoch": 1.0565801813066584, "grad_norm": 0.267578125, "learning_rate": 0.00016697876861264426, "loss": 1.6327, "step": 3416 }, { "epoch": 1.0568927789934355, "grad_norm": 0.26171875, "learning_rate": 0.0001669605216513537, "loss": 1.4462, "step": 3417 }, { "epoch": 1.0572053766802125, "grad_norm": 0.248046875, "learning_rate": 0.0001669422706475711, "loss": 1.4378, "step": 3418 }, { "epoch": 1.0575179743669896, "grad_norm": 0.240234375, "learning_rate": 0.00016692401560239835, "loss": 1.7001, "step": 3419 }, { "epoch": 1.0578305720537668, "grad_norm": 0.2451171875, "learning_rate": 0.00016690575651693746, "loss": 1.5977, "step": 3420 }, { "epoch": 1.058143169740544, "grad_norm": 0.2470703125, "learning_rate": 0.00016688749339229079, "loss": 1.5273, "step": 3421 }, { "epoch": 1.058455767427321, "grad_norm": 0.26171875, "learning_rate": 0.00016686922622956093, "loss": 1.6957, "step": 3422 }, { "epoch": 1.0587683651140982, "grad_norm": 0.287109375, "learning_rate": 0.00016685095502985066, "loss": 2.3469, "step": 3423 }, { "epoch": 1.0590809628008753, "grad_norm": 0.267578125, "learning_rate": 0.000166832679794263, "loss": 2.0036, "step": 3424 }, { "epoch": 1.0593935604876523, "grad_norm": 0.275390625, "learning_rate": 0.0001668144005239014, "loss": 1.606, "step": 3425 }, { "epoch": 1.0597061581744296, "grad_norm": 0.26171875, "learning_rate": 0.00016679611721986923, "loss": 1.5605, "step": 3426 }, { "epoch": 1.0600187558612066, "grad_norm": 0.2421875, "learning_rate": 0.00016677782988327032, "loss": 1.4457, "step": 3427 }, { "epoch": 1.0603313535479837, "grad_norm": 0.259765625, "learning_rate": 0.00016675953851520876, "loss": 1.6452, "step": 3428 }, { "epoch": 1.060643951234761, "grad_norm": 0.294921875, "learning_rate": 0.00016674124311678878, "loss": 2.1626, "step": 3429 }, { "epoch": 1.060956548921538, "grad_norm": 0.259765625, "learning_rate": 0.00016672294368911495, "loss": 1.6135, "step": 3430 }, { "epoch": 1.061269146608315, "grad_norm": 0.263671875, "learning_rate": 0.00016670464023329194, "loss": 1.6071, "step": 3431 }, { "epoch": 1.0615817442950921, "grad_norm": 0.2734375, "learning_rate": 0.00016668633275042477, "loss": 1.8194, "step": 3432 }, { "epoch": 1.0618943419818694, "grad_norm": 0.26171875, "learning_rate": 0.00016666802124161876, "loss": 1.3389, "step": 3433 }, { "epoch": 1.0622069396686464, "grad_norm": 0.248046875, "learning_rate": 0.0001666497057079793, "loss": 1.832, "step": 3434 }, { "epoch": 1.0625195373554235, "grad_norm": 0.26171875, "learning_rate": 0.00016663138615061223, "loss": 1.5173, "step": 3435 }, { "epoch": 1.0628321350422008, "grad_norm": 0.2490234375, "learning_rate": 0.00016661306257062346, "loss": 1.5316, "step": 3436 }, { "epoch": 1.0631447327289778, "grad_norm": 0.2470703125, "learning_rate": 0.0001665947349691192, "loss": 1.8877, "step": 3437 }, { "epoch": 1.0634573304157549, "grad_norm": 0.2578125, "learning_rate": 0.00016657640334720594, "loss": 1.6651, "step": 3438 }, { "epoch": 1.0637699281025321, "grad_norm": 0.263671875, "learning_rate": 0.00016655806770599034, "loss": 1.3243, "step": 3439 }, { "epoch": 1.0640825257893092, "grad_norm": 0.251953125, "learning_rate": 0.00016653972804657938, "loss": 1.704, "step": 3440 }, { "epoch": 1.0643951234760862, "grad_norm": 0.2490234375, "learning_rate": 0.00016652138437008027, "loss": 1.4222, "step": 3441 }, { "epoch": 1.0647077211628635, "grad_norm": 0.251953125, "learning_rate": 0.00016650303667760044, "loss": 1.7757, "step": 3442 }, { "epoch": 1.0650203188496405, "grad_norm": 0.267578125, "learning_rate": 0.0001664846849702475, "loss": 1.7258, "step": 3443 }, { "epoch": 1.0653329165364176, "grad_norm": 0.263671875, "learning_rate": 0.00016646632924912939, "loss": 1.7039, "step": 3444 }, { "epoch": 1.0656455142231946, "grad_norm": 0.25390625, "learning_rate": 0.00016644796951535432, "loss": 1.6286, "step": 3445 }, { "epoch": 1.065958111909972, "grad_norm": 0.263671875, "learning_rate": 0.00016642960577003066, "loss": 1.4467, "step": 3446 }, { "epoch": 1.066270709596749, "grad_norm": 0.275390625, "learning_rate": 0.000166411238014267, "loss": 1.5944, "step": 3447 }, { "epoch": 1.066583307283526, "grad_norm": 0.24609375, "learning_rate": 0.00016639286624917232, "loss": 1.7422, "step": 3448 }, { "epoch": 1.0668959049703033, "grad_norm": 0.251953125, "learning_rate": 0.00016637449047585568, "loss": 1.6975, "step": 3449 }, { "epoch": 1.0672085026570803, "grad_norm": 0.248046875, "learning_rate": 0.00016635611069542648, "loss": 1.5701, "step": 3450 }, { "epoch": 1.0675211003438574, "grad_norm": 0.24609375, "learning_rate": 0.0001663377269089943, "loss": 1.7853, "step": 3451 }, { "epoch": 1.0678336980306347, "grad_norm": 0.2578125, "learning_rate": 0.00016631933911766904, "loss": 1.5712, "step": 3452 }, { "epoch": 1.0681462957174117, "grad_norm": 0.25390625, "learning_rate": 0.00016630094732256073, "loss": 1.6186, "step": 3453 }, { "epoch": 1.0684588934041888, "grad_norm": 0.275390625, "learning_rate": 0.00016628255152477978, "loss": 1.5015, "step": 3454 }, { "epoch": 1.068771491090966, "grad_norm": 0.25390625, "learning_rate": 0.00016626415172543672, "loss": 1.6665, "step": 3455 }, { "epoch": 1.069084088777743, "grad_norm": 0.255859375, "learning_rate": 0.00016624574792564235, "loss": 1.7477, "step": 3456 }, { "epoch": 1.0693966864645201, "grad_norm": 0.26953125, "learning_rate": 0.0001662273401265078, "loss": 1.7283, "step": 3457 }, { "epoch": 1.0697092841512972, "grad_norm": 0.263671875, "learning_rate": 0.0001662089283291443, "loss": 1.5704, "step": 3458 }, { "epoch": 1.0700218818380745, "grad_norm": 0.26953125, "learning_rate": 0.00016619051253466343, "loss": 1.5737, "step": 3459 }, { "epoch": 1.0703344795248515, "grad_norm": 0.2578125, "learning_rate": 0.000166172092744177, "loss": 1.4126, "step": 3460 }, { "epoch": 1.0706470772116286, "grad_norm": 0.265625, "learning_rate": 0.000166153668958797, "loss": 1.5957, "step": 3461 }, { "epoch": 1.0709596748984058, "grad_norm": 0.2734375, "learning_rate": 0.00016613524117963565, "loss": 1.8575, "step": 3462 }, { "epoch": 1.0712722725851829, "grad_norm": 0.265625, "learning_rate": 0.00016611680940780557, "loss": 1.4722, "step": 3463 }, { "epoch": 1.07158487027196, "grad_norm": 0.267578125, "learning_rate": 0.00016609837364441944, "loss": 1.3546, "step": 3464 }, { "epoch": 1.0718974679587372, "grad_norm": 0.265625, "learning_rate": 0.00016607993389059023, "loss": 1.7442, "step": 3465 }, { "epoch": 1.0722100656455142, "grad_norm": 0.26171875, "learning_rate": 0.00016606149014743125, "loss": 1.58, "step": 3466 }, { "epoch": 1.0725226633322913, "grad_norm": 0.25390625, "learning_rate": 0.0001660430424160559, "loss": 1.4769, "step": 3467 }, { "epoch": 1.0728352610190686, "grad_norm": 0.255859375, "learning_rate": 0.00016602459069757795, "loss": 1.6728, "step": 3468 }, { "epoch": 1.0731478587058456, "grad_norm": 0.275390625, "learning_rate": 0.0001660061349931113, "loss": 1.6091, "step": 3469 }, { "epoch": 1.0734604563926227, "grad_norm": 0.2578125, "learning_rate": 0.00016598767530377016, "loss": 1.5068, "step": 3470 }, { "epoch": 1.0737730540793997, "grad_norm": 0.255859375, "learning_rate": 0.00016596921163066899, "loss": 1.4278, "step": 3471 }, { "epoch": 1.074085651766177, "grad_norm": 0.27734375, "learning_rate": 0.00016595074397492246, "loss": 1.458, "step": 3472 }, { "epoch": 1.074398249452954, "grad_norm": 0.2734375, "learning_rate": 0.00016593227233764545, "loss": 1.6513, "step": 3473 }, { "epoch": 1.074710847139731, "grad_norm": 0.25390625, "learning_rate": 0.00016591379671995315, "loss": 1.397, "step": 3474 }, { "epoch": 1.0750234448265084, "grad_norm": 0.265625, "learning_rate": 0.00016589531712296092, "loss": 1.5697, "step": 3475 }, { "epoch": 1.0753360425132854, "grad_norm": 0.255859375, "learning_rate": 0.00016587683354778445, "loss": 1.4832, "step": 3476 }, { "epoch": 1.0756486402000625, "grad_norm": 0.25390625, "learning_rate": 0.00016585834599553958, "loss": 1.6339, "step": 3477 }, { "epoch": 1.0759612378868397, "grad_norm": 0.263671875, "learning_rate": 0.00016583985446734246, "loss": 1.6584, "step": 3478 }, { "epoch": 1.0762738355736168, "grad_norm": 0.26953125, "learning_rate": 0.00016582135896430945, "loss": 1.5755, "step": 3479 }, { "epoch": 1.0765864332603938, "grad_norm": 0.26171875, "learning_rate": 0.00016580285948755705, "loss": 1.4067, "step": 3480 }, { "epoch": 1.076899030947171, "grad_norm": 0.263671875, "learning_rate": 0.0001657843560382022, "loss": 1.5585, "step": 3481 }, { "epoch": 1.0772116286339481, "grad_norm": 0.27734375, "learning_rate": 0.00016576584861736197, "loss": 1.7006, "step": 3482 }, { "epoch": 1.0775242263207252, "grad_norm": 0.259765625, "learning_rate": 0.00016574733722615363, "loss": 1.2913, "step": 3483 }, { "epoch": 1.0778368240075022, "grad_norm": 0.2578125, "learning_rate": 0.00016572882186569477, "loss": 1.6498, "step": 3484 }, { "epoch": 1.0781494216942795, "grad_norm": 0.2470703125, "learning_rate": 0.00016571030253710315, "loss": 1.6436, "step": 3485 }, { "epoch": 1.0784620193810566, "grad_norm": 0.25390625, "learning_rate": 0.00016569177924149686, "loss": 1.5416, "step": 3486 }, { "epoch": 1.0787746170678336, "grad_norm": 0.271484375, "learning_rate": 0.00016567325197999413, "loss": 1.3999, "step": 3487 }, { "epoch": 1.079087214754611, "grad_norm": 0.25390625, "learning_rate": 0.0001656547207537135, "loss": 1.4576, "step": 3488 }, { "epoch": 1.079399812441388, "grad_norm": 0.275390625, "learning_rate": 0.00016563618556377372, "loss": 1.7535, "step": 3489 }, { "epoch": 1.079712410128165, "grad_norm": 0.259765625, "learning_rate": 0.00016561764641129372, "loss": 1.5851, "step": 3490 }, { "epoch": 1.0800250078149423, "grad_norm": 0.2578125, "learning_rate": 0.00016559910329739285, "loss": 1.4995, "step": 3491 }, { "epoch": 1.0803376055017193, "grad_norm": 0.2734375, "learning_rate": 0.0001655805562231905, "loss": 1.6751, "step": 3492 }, { "epoch": 1.0806502031884964, "grad_norm": 0.259765625, "learning_rate": 0.00016556200518980641, "loss": 1.4372, "step": 3493 }, { "epoch": 1.0809628008752736, "grad_norm": 0.267578125, "learning_rate": 0.0001655434501983605, "loss": 1.7528, "step": 3494 }, { "epoch": 1.0812753985620507, "grad_norm": 0.275390625, "learning_rate": 0.000165524891249973, "loss": 1.5594, "step": 3495 }, { "epoch": 1.0815879962488277, "grad_norm": 0.259765625, "learning_rate": 0.0001655063283457643, "loss": 1.5924, "step": 3496 }, { "epoch": 1.0819005939356048, "grad_norm": 0.26953125, "learning_rate": 0.00016548776148685512, "loss": 1.7186, "step": 3497 }, { "epoch": 1.082213191622382, "grad_norm": 0.265625, "learning_rate": 0.00016546919067436628, "loss": 1.6862, "step": 3498 }, { "epoch": 1.082525789309159, "grad_norm": 0.25, "learning_rate": 0.000165450615909419, "loss": 1.4892, "step": 3499 }, { "epoch": 1.0828383869959362, "grad_norm": 0.24609375, "learning_rate": 0.0001654320371931346, "loss": 1.43, "step": 3500 }, { "epoch": 1.0831509846827134, "grad_norm": 0.33984375, "learning_rate": 0.00016541345452663478, "loss": 2.2191, "step": 3501 }, { "epoch": 1.0834635823694905, "grad_norm": 0.255859375, "learning_rate": 0.00016539486791104132, "loss": 1.3285, "step": 3502 }, { "epoch": 1.0837761800562675, "grad_norm": 0.2578125, "learning_rate": 0.00016537627734747635, "loss": 1.4255, "step": 3503 }, { "epoch": 1.0840887777430448, "grad_norm": 0.26171875, "learning_rate": 0.0001653576828370622, "loss": 1.9487, "step": 3504 }, { "epoch": 1.0844013754298218, "grad_norm": 0.2578125, "learning_rate": 0.0001653390843809215, "loss": 1.5777, "step": 3505 }, { "epoch": 1.084713973116599, "grad_norm": 0.255859375, "learning_rate": 0.00016532048198017696, "loss": 1.3895, "step": 3506 }, { "epoch": 1.0850265708033762, "grad_norm": 0.255859375, "learning_rate": 0.0001653018756359517, "loss": 1.5765, "step": 3507 }, { "epoch": 1.0853391684901532, "grad_norm": 0.263671875, "learning_rate": 0.000165283265349369, "loss": 1.7731, "step": 3508 }, { "epoch": 1.0856517661769303, "grad_norm": 0.26953125, "learning_rate": 0.00016526465112155238, "loss": 1.6334, "step": 3509 }, { "epoch": 1.0859643638637073, "grad_norm": 0.265625, "learning_rate": 0.00016524603295362558, "loss": 1.3732, "step": 3510 }, { "epoch": 1.0862769615504846, "grad_norm": 0.255859375, "learning_rate": 0.00016522741084671268, "loss": 1.7101, "step": 3511 }, { "epoch": 1.0865895592372616, "grad_norm": 0.26171875, "learning_rate": 0.0001652087848019378, "loss": 1.6682, "step": 3512 }, { "epoch": 1.0869021569240387, "grad_norm": 0.25390625, "learning_rate": 0.00016519015482042556, "loss": 1.8203, "step": 3513 }, { "epoch": 1.087214754610816, "grad_norm": 0.26953125, "learning_rate": 0.00016517152090330054, "loss": 1.544, "step": 3514 }, { "epoch": 1.087527352297593, "grad_norm": 0.265625, "learning_rate": 0.0001651528830516878, "loss": 1.5914, "step": 3515 }, { "epoch": 1.08783994998437, "grad_norm": 0.251953125, "learning_rate": 0.00016513424126671241, "loss": 1.8966, "step": 3516 }, { "epoch": 1.0881525476711473, "grad_norm": 0.263671875, "learning_rate": 0.00016511559554949993, "loss": 1.3955, "step": 3517 }, { "epoch": 1.0884651453579244, "grad_norm": 0.25390625, "learning_rate": 0.00016509694590117598, "loss": 1.6077, "step": 3518 }, { "epoch": 1.0887777430447014, "grad_norm": 0.267578125, "learning_rate": 0.00016507829232286644, "loss": 1.5418, "step": 3519 }, { "epoch": 1.0890903407314787, "grad_norm": 0.2578125, "learning_rate": 0.00016505963481569747, "loss": 1.638, "step": 3520 }, { "epoch": 1.0894029384182558, "grad_norm": 0.26953125, "learning_rate": 0.0001650409733807954, "loss": 1.6119, "step": 3521 }, { "epoch": 1.0897155361050328, "grad_norm": 0.24609375, "learning_rate": 0.00016502230801928694, "loss": 1.5711, "step": 3522 }, { "epoch": 1.0900281337918099, "grad_norm": 0.259765625, "learning_rate": 0.00016500363873229882, "loss": 1.5858, "step": 3523 }, { "epoch": 1.0903407314785871, "grad_norm": 0.271484375, "learning_rate": 0.00016498496552095823, "loss": 1.5925, "step": 3524 }, { "epoch": 1.0906533291653642, "grad_norm": 0.25, "learning_rate": 0.0001649662883863925, "loss": 1.5732, "step": 3525 }, { "epoch": 1.0909659268521412, "grad_norm": 0.26171875, "learning_rate": 0.0001649476073297291, "loss": 1.5185, "step": 3526 }, { "epoch": 1.0912785245389185, "grad_norm": 0.251953125, "learning_rate": 0.00016492892235209588, "loss": 1.5267, "step": 3527 }, { "epoch": 1.0915911222256955, "grad_norm": 0.2578125, "learning_rate": 0.00016491023345462091, "loss": 1.6801, "step": 3528 }, { "epoch": 1.0919037199124726, "grad_norm": 0.25390625, "learning_rate": 0.00016489154063843242, "loss": 1.5361, "step": 3529 }, { "epoch": 1.0922163175992499, "grad_norm": 0.255859375, "learning_rate": 0.00016487284390465893, "loss": 1.5234, "step": 3530 }, { "epoch": 1.092528915286027, "grad_norm": 0.26953125, "learning_rate": 0.00016485414325442918, "loss": 1.4755, "step": 3531 }, { "epoch": 1.092841512972804, "grad_norm": 0.263671875, "learning_rate": 0.00016483543868887215, "loss": 1.3309, "step": 3532 }, { "epoch": 1.0931541106595812, "grad_norm": 0.2578125, "learning_rate": 0.00016481673020911708, "loss": 1.9242, "step": 3533 }, { "epoch": 1.0934667083463583, "grad_norm": 0.2734375, "learning_rate": 0.00016479801781629338, "loss": 1.4854, "step": 3534 }, { "epoch": 1.0937793060331353, "grad_norm": 0.255859375, "learning_rate": 0.0001647793015115308, "loss": 1.3303, "step": 3535 }, { "epoch": 1.0940919037199124, "grad_norm": 0.255859375, "learning_rate": 0.00016476058129595927, "loss": 1.4886, "step": 3536 }, { "epoch": 1.0944045014066897, "grad_norm": 0.259765625, "learning_rate": 0.00016474185717070886, "loss": 1.5427, "step": 3537 }, { "epoch": 1.0947170990934667, "grad_norm": 0.267578125, "learning_rate": 0.00016472312913691007, "loss": 1.3118, "step": 3538 }, { "epoch": 1.0950296967802438, "grad_norm": 0.26953125, "learning_rate": 0.00016470439719569346, "loss": 1.4825, "step": 3539 }, { "epoch": 1.095342294467021, "grad_norm": 0.26171875, "learning_rate": 0.00016468566134818997, "loss": 1.672, "step": 3540 }, { "epoch": 1.095654892153798, "grad_norm": 0.30859375, "learning_rate": 0.00016466692159553066, "loss": 2.2183, "step": 3541 }, { "epoch": 1.0959674898405751, "grad_norm": 0.259765625, "learning_rate": 0.0001646481779388469, "loss": 1.6677, "step": 3542 }, { "epoch": 1.0962800875273524, "grad_norm": 0.25, "learning_rate": 0.00016462943037927024, "loss": 1.3428, "step": 3543 }, { "epoch": 1.0965926852141294, "grad_norm": 0.27734375, "learning_rate": 0.00016461067891793252, "loss": 1.3268, "step": 3544 }, { "epoch": 1.0969052829009065, "grad_norm": 0.265625, "learning_rate": 0.00016459192355596576, "loss": 1.3895, "step": 3545 }, { "epoch": 1.0972178805876835, "grad_norm": 0.2734375, "learning_rate": 0.0001645731642945023, "loss": 1.5051, "step": 3546 }, { "epoch": 1.0975304782744608, "grad_norm": 0.2578125, "learning_rate": 0.0001645544011346746, "loss": 1.5549, "step": 3547 }, { "epoch": 1.0978430759612379, "grad_norm": 0.28515625, "learning_rate": 0.00016453563407761544, "loss": 1.6162, "step": 3548 }, { "epoch": 1.098155673648015, "grad_norm": 0.255859375, "learning_rate": 0.00016451686312445783, "loss": 1.575, "step": 3549 }, { "epoch": 1.0984682713347922, "grad_norm": 0.2734375, "learning_rate": 0.00016449808827633498, "loss": 1.641, "step": 3550 }, { "epoch": 1.0987808690215692, "grad_norm": 0.2470703125, "learning_rate": 0.00016447930953438034, "loss": 1.4562, "step": 3551 }, { "epoch": 1.0990934667083463, "grad_norm": 0.271484375, "learning_rate": 0.0001644605268997276, "loss": 1.6116, "step": 3552 }, { "epoch": 1.0994060643951236, "grad_norm": 0.265625, "learning_rate": 0.00016444174037351074, "loss": 1.408, "step": 3553 }, { "epoch": 1.0997186620819006, "grad_norm": 0.263671875, "learning_rate": 0.00016442294995686388, "loss": 1.4525, "step": 3554 }, { "epoch": 1.1000312597686777, "grad_norm": 0.271484375, "learning_rate": 0.00016440415565092145, "loss": 1.6818, "step": 3555 }, { "epoch": 1.100343857455455, "grad_norm": 0.251953125, "learning_rate": 0.00016438535745681802, "loss": 1.4404, "step": 3556 }, { "epoch": 1.100656455142232, "grad_norm": 0.2578125, "learning_rate": 0.00016436655537568857, "loss": 1.3231, "step": 3557 }, { "epoch": 1.100969052829009, "grad_norm": 0.25390625, "learning_rate": 0.00016434774940866814, "loss": 1.5111, "step": 3558 }, { "epoch": 1.101281650515786, "grad_norm": 0.275390625, "learning_rate": 0.00016432893955689205, "loss": 1.4369, "step": 3559 }, { "epoch": 1.1015942482025634, "grad_norm": 0.2734375, "learning_rate": 0.00016431012582149594, "loss": 1.5618, "step": 3560 }, { "epoch": 1.1019068458893404, "grad_norm": 0.26953125, "learning_rate": 0.00016429130820361555, "loss": 1.536, "step": 3561 }, { "epoch": 1.1022194435761175, "grad_norm": 0.25, "learning_rate": 0.00016427248670438697, "loss": 1.5354, "step": 3562 }, { "epoch": 1.1025320412628947, "grad_norm": 0.259765625, "learning_rate": 0.0001642536613249465, "loss": 1.9262, "step": 3563 }, { "epoch": 1.1028446389496718, "grad_norm": 0.25, "learning_rate": 0.00016423483206643057, "loss": 1.5448, "step": 3564 }, { "epoch": 1.1031572366364488, "grad_norm": 0.263671875, "learning_rate": 0.00016421599892997595, "loss": 1.5508, "step": 3565 }, { "epoch": 1.103469834323226, "grad_norm": 0.267578125, "learning_rate": 0.0001641971619167197, "loss": 1.4567, "step": 3566 }, { "epoch": 1.1037824320100031, "grad_norm": 0.26171875, "learning_rate": 0.00016417832102779895, "loss": 1.532, "step": 3567 }, { "epoch": 1.1040950296967802, "grad_norm": 0.279296875, "learning_rate": 0.0001641594762643512, "loss": 1.8043, "step": 3568 }, { "epoch": 1.1044076273835572, "grad_norm": 0.251953125, "learning_rate": 0.00016414062762751407, "loss": 1.5684, "step": 3569 }, { "epoch": 1.1047202250703345, "grad_norm": 0.255859375, "learning_rate": 0.00016412177511842554, "loss": 1.5399, "step": 3570 }, { "epoch": 1.1050328227571116, "grad_norm": 0.2890625, "learning_rate": 0.00016410291873822375, "loss": 1.8069, "step": 3571 }, { "epoch": 1.1053454204438886, "grad_norm": 0.25390625, "learning_rate": 0.00016408405848804703, "loss": 1.4957, "step": 3572 }, { "epoch": 1.1056580181306659, "grad_norm": 0.26171875, "learning_rate": 0.00016406519436903407, "loss": 1.589, "step": 3573 }, { "epoch": 1.105970615817443, "grad_norm": 0.2578125, "learning_rate": 0.00016404632638232367, "loss": 1.3094, "step": 3574 }, { "epoch": 1.10628321350422, "grad_norm": 0.287109375, "learning_rate": 0.00016402745452905496, "loss": 2.02, "step": 3575 }, { "epoch": 1.1065958111909973, "grad_norm": 0.26953125, "learning_rate": 0.00016400857881036717, "loss": 1.2923, "step": 3576 }, { "epoch": 1.1069084088777743, "grad_norm": 0.251953125, "learning_rate": 0.00016398969922739996, "loss": 1.4637, "step": 3577 }, { "epoch": 1.1072210065645514, "grad_norm": 0.259765625, "learning_rate": 0.00016397081578129304, "loss": 1.547, "step": 3578 }, { "epoch": 1.1075336042513286, "grad_norm": 0.26171875, "learning_rate": 0.00016395192847318648, "loss": 1.507, "step": 3579 }, { "epoch": 1.1078462019381057, "grad_norm": 0.251953125, "learning_rate": 0.00016393303730422048, "loss": 1.78, "step": 3580 }, { "epoch": 1.1081587996248827, "grad_norm": 0.2578125, "learning_rate": 0.00016391414227553554, "loss": 1.4306, "step": 3581 }, { "epoch": 1.1084713973116598, "grad_norm": 0.26953125, "learning_rate": 0.00016389524338827237, "loss": 1.7353, "step": 3582 }, { "epoch": 1.108783994998437, "grad_norm": 0.265625, "learning_rate": 0.00016387634064357197, "loss": 1.3343, "step": 3583 }, { "epoch": 1.109096592685214, "grad_norm": 0.251953125, "learning_rate": 0.0001638574340425755, "loss": 1.4785, "step": 3584 }, { "epoch": 1.1094091903719911, "grad_norm": 0.2578125, "learning_rate": 0.00016383852358642432, "loss": 1.7248, "step": 3585 }, { "epoch": 1.1097217880587684, "grad_norm": 0.26953125, "learning_rate": 0.00016381960927626014, "loss": 1.6223, "step": 3586 }, { "epoch": 1.1100343857455455, "grad_norm": 0.2890625, "learning_rate": 0.00016380069111322483, "loss": 1.7505, "step": 3587 }, { "epoch": 1.1103469834323225, "grad_norm": 0.251953125, "learning_rate": 0.00016378176909846048, "loss": 1.4425, "step": 3588 }, { "epoch": 1.1106595811190998, "grad_norm": 0.26953125, "learning_rate": 0.0001637628432331095, "loss": 1.4442, "step": 3589 }, { "epoch": 1.1109721788058768, "grad_norm": 0.265625, "learning_rate": 0.00016374391351831435, "loss": 1.5085, "step": 3590 }, { "epoch": 1.111284776492654, "grad_norm": 0.265625, "learning_rate": 0.00016372497995521793, "loss": 1.7437, "step": 3591 }, { "epoch": 1.1115973741794312, "grad_norm": 0.2578125, "learning_rate": 0.0001637060425449633, "loss": 1.7599, "step": 3592 }, { "epoch": 1.1119099718662082, "grad_norm": 0.251953125, "learning_rate": 0.00016368710128869367, "loss": 1.6399, "step": 3593 }, { "epoch": 1.1122225695529853, "grad_norm": 0.259765625, "learning_rate": 0.00016366815618755256, "loss": 1.7215, "step": 3594 }, { "epoch": 1.1125351672397623, "grad_norm": 0.28515625, "learning_rate": 0.00016364920724268377, "loss": 1.5205, "step": 3595 }, { "epoch": 1.1128477649265396, "grad_norm": 0.265625, "learning_rate": 0.0001636302544552312, "loss": 1.5398, "step": 3596 }, { "epoch": 1.1131603626133166, "grad_norm": 0.24609375, "learning_rate": 0.00016361129782633911, "loss": 1.6437, "step": 3597 }, { "epoch": 1.1134729603000937, "grad_norm": 0.259765625, "learning_rate": 0.0001635923373571519, "loss": 1.5017, "step": 3598 }, { "epoch": 1.113785557986871, "grad_norm": 0.265625, "learning_rate": 0.00016357337304881423, "loss": 1.724, "step": 3599 }, { "epoch": 1.114098155673648, "grad_norm": 0.271484375, "learning_rate": 0.00016355440490247103, "loss": 1.4658, "step": 3600 }, { "epoch": 1.114410753360425, "grad_norm": 0.263671875, "learning_rate": 0.0001635354329192674, "loss": 1.8218, "step": 3601 }, { "epoch": 1.1147233510472023, "grad_norm": 0.267578125, "learning_rate": 0.00016351645710034873, "loss": 1.6092, "step": 3602 }, { "epoch": 1.1150359487339794, "grad_norm": 0.267578125, "learning_rate": 0.00016349747744686064, "loss": 1.7769, "step": 3603 }, { "epoch": 1.1153485464207564, "grad_norm": 0.259765625, "learning_rate": 0.00016347849395994887, "loss": 1.4195, "step": 3604 }, { "epoch": 1.1156611441075337, "grad_norm": 0.271484375, "learning_rate": 0.00016345950664075956, "loss": 1.5754, "step": 3605 }, { "epoch": 1.1159737417943107, "grad_norm": 0.259765625, "learning_rate": 0.00016344051549043896, "loss": 1.5302, "step": 3606 }, { "epoch": 1.1162863394810878, "grad_norm": 0.259765625, "learning_rate": 0.0001634215205101336, "loss": 1.5346, "step": 3607 }, { "epoch": 1.1165989371678648, "grad_norm": 0.25390625, "learning_rate": 0.0001634025217009902, "loss": 1.5775, "step": 3608 }, { "epoch": 1.1169115348546421, "grad_norm": 0.267578125, "learning_rate": 0.0001633835190641558, "loss": 1.4287, "step": 3609 }, { "epoch": 1.1172241325414192, "grad_norm": 0.26171875, "learning_rate": 0.0001633645126007776, "loss": 1.6013, "step": 3610 }, { "epoch": 1.1175367302281962, "grad_norm": 0.26953125, "learning_rate": 0.000163345502312003, "loss": 1.7343, "step": 3611 }, { "epoch": 1.1178493279149735, "grad_norm": 0.265625, "learning_rate": 0.00016332648819897968, "loss": 1.8112, "step": 3612 }, { "epoch": 1.1181619256017505, "grad_norm": 0.265625, "learning_rate": 0.00016330747026285563, "loss": 1.6694, "step": 3613 }, { "epoch": 1.1184745232885276, "grad_norm": 0.259765625, "learning_rate": 0.0001632884485047789, "loss": 1.6057, "step": 3614 }, { "epoch": 1.1187871209753049, "grad_norm": 0.271484375, "learning_rate": 0.00016326942292589785, "loss": 1.2595, "step": 3615 }, { "epoch": 1.119099718662082, "grad_norm": 0.279296875, "learning_rate": 0.00016325039352736113, "loss": 1.6727, "step": 3616 }, { "epoch": 1.119412316348859, "grad_norm": 0.2890625, "learning_rate": 0.0001632313603103176, "loss": 1.6012, "step": 3617 }, { "epoch": 1.1197249140356362, "grad_norm": 0.2578125, "learning_rate": 0.00016321232327591622, "loss": 1.5811, "step": 3618 }, { "epoch": 1.1200375117224133, "grad_norm": 0.26953125, "learning_rate": 0.00016319328242530635, "loss": 1.5852, "step": 3619 }, { "epoch": 1.1203501094091903, "grad_norm": 0.265625, "learning_rate": 0.00016317423775963748, "loss": 1.9542, "step": 3620 }, { "epoch": 1.1206627070959674, "grad_norm": 0.283203125, "learning_rate": 0.0001631551892800594, "loss": 1.5583, "step": 3621 }, { "epoch": 1.1209753047827447, "grad_norm": 0.2578125, "learning_rate": 0.000163136136987722, "loss": 1.5256, "step": 3622 }, { "epoch": 1.1212879024695217, "grad_norm": 0.263671875, "learning_rate": 0.00016311708088377562, "loss": 1.6357, "step": 3623 }, { "epoch": 1.1216005001562988, "grad_norm": 0.28125, "learning_rate": 0.0001630980209693706, "loss": 1.3957, "step": 3624 }, { "epoch": 1.121913097843076, "grad_norm": 0.26171875, "learning_rate": 0.0001630789572456577, "loss": 1.6948, "step": 3625 }, { "epoch": 1.122225695529853, "grad_norm": 0.271484375, "learning_rate": 0.0001630598897137877, "loss": 1.5104, "step": 3626 }, { "epoch": 1.1225382932166301, "grad_norm": 0.25390625, "learning_rate": 0.00016304081837491185, "loss": 1.6538, "step": 3627 }, { "epoch": 1.1228508909034074, "grad_norm": 0.2734375, "learning_rate": 0.00016302174323018146, "loss": 1.495, "step": 3628 }, { "epoch": 1.1231634885901844, "grad_norm": 0.26171875, "learning_rate": 0.0001630026642807481, "loss": 1.4547, "step": 3629 }, { "epoch": 1.1234760862769615, "grad_norm": 0.267578125, "learning_rate": 0.00016298358152776361, "loss": 1.6914, "step": 3630 }, { "epoch": 1.1237886839637388, "grad_norm": 0.259765625, "learning_rate": 0.00016296449497238004, "loss": 1.7995, "step": 3631 }, { "epoch": 1.1241012816505158, "grad_norm": 0.259765625, "learning_rate": 0.00016294540461574968, "loss": 1.3836, "step": 3632 }, { "epoch": 1.1244138793372929, "grad_norm": 0.255859375, "learning_rate": 0.00016292631045902506, "loss": 1.4697, "step": 3633 }, { "epoch": 1.12472647702407, "grad_norm": 0.259765625, "learning_rate": 0.00016290721250335883, "loss": 1.5252, "step": 3634 }, { "epoch": 1.1250390747108472, "grad_norm": 0.26171875, "learning_rate": 0.00016288811074990407, "loss": 1.6655, "step": 3635 }, { "epoch": 1.1253516723976242, "grad_norm": 0.265625, "learning_rate": 0.0001628690051998139, "loss": 1.4658, "step": 3636 }, { "epoch": 1.1256642700844013, "grad_norm": 0.26171875, "learning_rate": 0.0001628498958542418, "loss": 1.6769, "step": 3637 }, { "epoch": 1.1259768677711786, "grad_norm": 0.263671875, "learning_rate": 0.00016283078271434135, "loss": 1.71, "step": 3638 }, { "epoch": 1.1262894654579556, "grad_norm": 0.26171875, "learning_rate": 0.00016281166578126653, "loss": 1.4462, "step": 3639 }, { "epoch": 1.1266020631447327, "grad_norm": 0.294921875, "learning_rate": 0.00016279254505617138, "loss": 2.2732, "step": 3640 }, { "epoch": 1.12691466083151, "grad_norm": 0.2578125, "learning_rate": 0.00016277342054021022, "loss": 1.4325, "step": 3641 }, { "epoch": 1.127227258518287, "grad_norm": 0.28125, "learning_rate": 0.00016275429223453776, "loss": 1.6099, "step": 3642 }, { "epoch": 1.127539856205064, "grad_norm": 0.271484375, "learning_rate": 0.00016273516014030865, "loss": 1.7282, "step": 3643 }, { "epoch": 1.1278524538918413, "grad_norm": 0.265625, "learning_rate": 0.000162716024258678, "loss": 1.5569, "step": 3644 }, { "epoch": 1.1281650515786183, "grad_norm": 0.25, "learning_rate": 0.00016269688459080104, "loss": 1.4227, "step": 3645 }, { "epoch": 1.1284776492653954, "grad_norm": 0.275390625, "learning_rate": 0.00016267774113783325, "loss": 1.5628, "step": 3646 }, { "epoch": 1.1287902469521724, "grad_norm": 0.259765625, "learning_rate": 0.00016265859390093037, "loss": 1.9694, "step": 3647 }, { "epoch": 1.1291028446389497, "grad_norm": 0.275390625, "learning_rate": 0.00016263944288124832, "loss": 1.3973, "step": 3648 }, { "epoch": 1.1294154423257268, "grad_norm": 0.244140625, "learning_rate": 0.00016262028807994332, "loss": 1.5886, "step": 3649 }, { "epoch": 1.1297280400125038, "grad_norm": 0.26953125, "learning_rate": 0.0001626011294981717, "loss": 1.8958, "step": 3650 }, { "epoch": 1.130040637699281, "grad_norm": 0.2490234375, "learning_rate": 0.0001625819671370901, "loss": 1.5393, "step": 3651 }, { "epoch": 1.1303532353860581, "grad_norm": 0.259765625, "learning_rate": 0.0001625628009978554, "loss": 1.4789, "step": 3652 }, { "epoch": 1.1306658330728352, "grad_norm": 0.251953125, "learning_rate": 0.00016254363108162472, "loss": 1.5094, "step": 3653 }, { "epoch": 1.1309784307596125, "grad_norm": 0.263671875, "learning_rate": 0.00016252445738955529, "loss": 1.6394, "step": 3654 }, { "epoch": 1.1312910284463895, "grad_norm": 0.251953125, "learning_rate": 0.0001625052799228047, "loss": 1.4005, "step": 3655 }, { "epoch": 1.1316036261331666, "grad_norm": 0.26171875, "learning_rate": 0.00016248609868253072, "loss": 1.3923, "step": 3656 }, { "epoch": 1.1319162238199438, "grad_norm": 0.25390625, "learning_rate": 0.00016246691366989132, "loss": 1.4759, "step": 3657 }, { "epoch": 1.1322288215067209, "grad_norm": 0.265625, "learning_rate": 0.00016244772488604477, "loss": 1.4589, "step": 3658 }, { "epoch": 1.132541419193498, "grad_norm": 0.265625, "learning_rate": 0.00016242853233214944, "loss": 1.6923, "step": 3659 }, { "epoch": 1.132854016880275, "grad_norm": 0.263671875, "learning_rate": 0.00016240933600936413, "loss": 1.5223, "step": 3660 }, { "epoch": 1.1331666145670523, "grad_norm": 0.251953125, "learning_rate": 0.00016239013591884765, "loss": 1.8563, "step": 3661 }, { "epoch": 1.1334792122538293, "grad_norm": 0.26171875, "learning_rate": 0.0001623709320617591, "loss": 1.7337, "step": 3662 }, { "epoch": 1.1337918099406064, "grad_norm": 0.26171875, "learning_rate": 0.00016235172443925796, "loss": 1.313, "step": 3663 }, { "epoch": 1.1341044076273836, "grad_norm": 0.265625, "learning_rate": 0.00016233251305250375, "loss": 1.8978, "step": 3664 }, { "epoch": 1.1344170053141607, "grad_norm": 0.26171875, "learning_rate": 0.0001623132979026563, "loss": 1.7873, "step": 3665 }, { "epoch": 1.1347296030009377, "grad_norm": 0.259765625, "learning_rate": 0.00016229407899087566, "loss": 1.6152, "step": 3666 }, { "epoch": 1.135042200687715, "grad_norm": 0.25, "learning_rate": 0.00016227485631832206, "loss": 1.3307, "step": 3667 }, { "epoch": 1.135354798374492, "grad_norm": 0.275390625, "learning_rate": 0.00016225562988615605, "loss": 1.5199, "step": 3668 }, { "epoch": 1.135667396061269, "grad_norm": 0.25390625, "learning_rate": 0.0001622363996955383, "loss": 1.5746, "step": 3669 }, { "epoch": 1.1359799937480464, "grad_norm": 0.267578125, "learning_rate": 0.00016221716574762982, "loss": 1.734, "step": 3670 }, { "epoch": 1.1362925914348234, "grad_norm": 0.2734375, "learning_rate": 0.00016219792804359173, "loss": 1.5776, "step": 3671 }, { "epoch": 1.1366051891216005, "grad_norm": 0.267578125, "learning_rate": 0.00016217868658458554, "loss": 1.3582, "step": 3672 }, { "epoch": 1.1369177868083775, "grad_norm": 0.248046875, "learning_rate": 0.00016215944137177273, "loss": 1.5522, "step": 3673 }, { "epoch": 1.1372303844951548, "grad_norm": 0.26171875, "learning_rate": 0.00016214019240631523, "loss": 1.5427, "step": 3674 }, { "epoch": 1.1375429821819318, "grad_norm": 0.26171875, "learning_rate": 0.00016212093968937517, "loss": 1.5371, "step": 3675 }, { "epoch": 1.1378555798687089, "grad_norm": 0.25390625, "learning_rate": 0.0001621016832221148, "loss": 1.5362, "step": 3676 }, { "epoch": 1.1381681775554862, "grad_norm": 0.259765625, "learning_rate": 0.00016208242300569668, "loss": 1.5966, "step": 3677 }, { "epoch": 1.1384807752422632, "grad_norm": 0.267578125, "learning_rate": 0.00016206315904128358, "loss": 1.5752, "step": 3678 }, { "epoch": 1.1387933729290403, "grad_norm": 0.26171875, "learning_rate": 0.00016204389133003848, "loss": 1.6001, "step": 3679 }, { "epoch": 1.1391059706158175, "grad_norm": 0.26171875, "learning_rate": 0.00016202461987312457, "loss": 1.7705, "step": 3680 }, { "epoch": 1.1394185683025946, "grad_norm": 0.26171875, "learning_rate": 0.00016200534467170533, "loss": 1.9231, "step": 3681 }, { "epoch": 1.1397311659893716, "grad_norm": 0.255859375, "learning_rate": 0.00016198606572694443, "loss": 1.4175, "step": 3682 }, { "epoch": 1.140043763676149, "grad_norm": 0.267578125, "learning_rate": 0.00016196678304000573, "loss": 1.6075, "step": 3683 }, { "epoch": 1.140356361362926, "grad_norm": 0.25390625, "learning_rate": 0.00016194749661205341, "loss": 1.3712, "step": 3684 }, { "epoch": 1.140668959049703, "grad_norm": 0.275390625, "learning_rate": 0.00016192820644425176, "loss": 1.6146, "step": 3685 }, { "epoch": 1.14098155673648, "grad_norm": 0.267578125, "learning_rate": 0.0001619089125377654, "loss": 1.5327, "step": 3686 }, { "epoch": 1.1412941544232573, "grad_norm": 0.26953125, "learning_rate": 0.00016188961489375903, "loss": 1.4308, "step": 3687 }, { "epoch": 1.1416067521100344, "grad_norm": 0.26171875, "learning_rate": 0.0001618703135133978, "loss": 1.693, "step": 3688 }, { "epoch": 1.1419193497968114, "grad_norm": 0.259765625, "learning_rate": 0.0001618510083978469, "loss": 1.2902, "step": 3689 }, { "epoch": 1.1422319474835887, "grad_norm": 0.267578125, "learning_rate": 0.00016183169954827177, "loss": 1.4811, "step": 3690 }, { "epoch": 1.1425445451703657, "grad_norm": 0.26953125, "learning_rate": 0.00016181238696583815, "loss": 1.4203, "step": 3691 }, { "epoch": 1.1428571428571428, "grad_norm": 0.28515625, "learning_rate": 0.000161793070651712, "loss": 1.515, "step": 3692 }, { "epoch": 1.14316974054392, "grad_norm": 0.26953125, "learning_rate": 0.0001617737506070594, "loss": 1.4142, "step": 3693 }, { "epoch": 1.1434823382306971, "grad_norm": 0.25, "learning_rate": 0.00016175442683304673, "loss": 1.3988, "step": 3694 }, { "epoch": 1.1437949359174742, "grad_norm": 0.263671875, "learning_rate": 0.00016173509933084068, "loss": 1.5393, "step": 3695 }, { "epoch": 1.1441075336042514, "grad_norm": 0.271484375, "learning_rate": 0.00016171576810160797, "loss": 1.6045, "step": 3696 }, { "epoch": 1.1444201312910285, "grad_norm": 0.279296875, "learning_rate": 0.00016169643314651572, "loss": 1.5186, "step": 3697 }, { "epoch": 1.1447327289778055, "grad_norm": 0.2578125, "learning_rate": 0.0001616770944667312, "loss": 1.6072, "step": 3698 }, { "epoch": 1.1450453266645826, "grad_norm": 0.2734375, "learning_rate": 0.00016165775206342185, "loss": 1.6905, "step": 3699 }, { "epoch": 1.1453579243513599, "grad_norm": 0.267578125, "learning_rate": 0.00016163840593775541, "loss": 1.5007, "step": 3700 }, { "epoch": 1.145670522038137, "grad_norm": 0.265625, "learning_rate": 0.0001616190560908999, "loss": 1.6387, "step": 3701 }, { "epoch": 1.145983119724914, "grad_norm": 0.265625, "learning_rate": 0.00016159970252402345, "loss": 1.347, "step": 3702 }, { "epoch": 1.1462957174116912, "grad_norm": 0.2578125, "learning_rate": 0.00016158034523829445, "loss": 1.5434, "step": 3703 }, { "epoch": 1.1466083150984683, "grad_norm": 0.28515625, "learning_rate": 0.00016156098423488155, "loss": 1.5995, "step": 3704 }, { "epoch": 1.1469209127852453, "grad_norm": 0.25390625, "learning_rate": 0.0001615416195149536, "loss": 1.7075, "step": 3705 }, { "epoch": 1.1472335104720226, "grad_norm": 0.275390625, "learning_rate": 0.00016152225107967963, "loss": 1.6339, "step": 3706 }, { "epoch": 1.1475461081587996, "grad_norm": 0.2734375, "learning_rate": 0.00016150287893022894, "loss": 1.6718, "step": 3707 }, { "epoch": 1.1478587058455767, "grad_norm": 0.255859375, "learning_rate": 0.00016148350306777111, "loss": 1.7101, "step": 3708 }, { "epoch": 1.148171303532354, "grad_norm": 0.259765625, "learning_rate": 0.00016146412349347583, "loss": 1.8711, "step": 3709 }, { "epoch": 1.148483901219131, "grad_norm": 0.259765625, "learning_rate": 0.00016144474020851312, "loss": 1.4087, "step": 3710 }, { "epoch": 1.148796498905908, "grad_norm": 0.265625, "learning_rate": 0.00016142535321405312, "loss": 1.2498, "step": 3711 }, { "epoch": 1.1491090965926851, "grad_norm": 0.2578125, "learning_rate": 0.00016140596251126626, "loss": 1.6021, "step": 3712 }, { "epoch": 1.1494216942794624, "grad_norm": 0.2578125, "learning_rate": 0.00016138656810132322, "loss": 1.5073, "step": 3713 }, { "epoch": 1.1497342919662394, "grad_norm": 0.251953125, "learning_rate": 0.00016136716998539483, "loss": 1.4993, "step": 3714 }, { "epoch": 1.1500468896530165, "grad_norm": 0.26171875, "learning_rate": 0.0001613477681646522, "loss": 1.5221, "step": 3715 }, { "epoch": 1.1503594873397938, "grad_norm": 0.259765625, "learning_rate": 0.0001613283626402666, "loss": 1.3269, "step": 3716 }, { "epoch": 1.1506720850265708, "grad_norm": 0.259765625, "learning_rate": 0.00016130895341340962, "loss": 1.6539, "step": 3717 }, { "epoch": 1.1509846827133479, "grad_norm": 0.279296875, "learning_rate": 0.00016128954048525297, "loss": 1.6418, "step": 3718 }, { "epoch": 1.1512972804001251, "grad_norm": 0.26171875, "learning_rate": 0.0001612701238569687, "loss": 1.4092, "step": 3719 }, { "epoch": 1.1516098780869022, "grad_norm": 0.2578125, "learning_rate": 0.00016125070352972896, "loss": 1.4823, "step": 3720 }, { "epoch": 1.1519224757736792, "grad_norm": 0.275390625, "learning_rate": 0.00016123127950470618, "loss": 1.5155, "step": 3721 }, { "epoch": 1.1522350734604565, "grad_norm": 0.267578125, "learning_rate": 0.000161211851783073, "loss": 1.4642, "step": 3722 }, { "epoch": 1.1525476711472336, "grad_norm": 0.275390625, "learning_rate": 0.00016119242036600237, "loss": 1.8195, "step": 3723 }, { "epoch": 1.1528602688340106, "grad_norm": 0.259765625, "learning_rate": 0.00016117298525466733, "loss": 1.3579, "step": 3724 }, { "epoch": 1.1531728665207877, "grad_norm": 0.28515625, "learning_rate": 0.00016115354645024126, "loss": 1.6316, "step": 3725 }, { "epoch": 1.153485464207565, "grad_norm": 0.26953125, "learning_rate": 0.0001611341039538976, "loss": 1.7061, "step": 3726 }, { "epoch": 1.153798061894342, "grad_norm": 0.25390625, "learning_rate": 0.00016111465776681022, "loss": 1.5465, "step": 3727 }, { "epoch": 1.154110659581119, "grad_norm": 0.2578125, "learning_rate": 0.00016109520789015305, "loss": 1.4982, "step": 3728 }, { "epoch": 1.1544232572678963, "grad_norm": 0.267578125, "learning_rate": 0.0001610757543251003, "loss": 1.7491, "step": 3729 }, { "epoch": 1.1547358549546733, "grad_norm": 0.26953125, "learning_rate": 0.0001610562970728265, "loss": 1.6059, "step": 3730 }, { "epoch": 1.1550484526414504, "grad_norm": 0.2734375, "learning_rate": 0.00016103683613450618, "loss": 1.3079, "step": 3731 }, { "epoch": 1.1553610503282277, "grad_norm": 0.263671875, "learning_rate": 0.0001610173715113143, "loss": 1.5474, "step": 3732 }, { "epoch": 1.1556736480150047, "grad_norm": 0.2734375, "learning_rate": 0.00016099790320442593, "loss": 1.6536, "step": 3733 }, { "epoch": 1.1559862457017818, "grad_norm": 0.265625, "learning_rate": 0.00016097843121501646, "loss": 1.6882, "step": 3734 }, { "epoch": 1.156298843388559, "grad_norm": 0.26953125, "learning_rate": 0.00016095895554426134, "loss": 1.4942, "step": 3735 }, { "epoch": 1.156611441075336, "grad_norm": 0.25390625, "learning_rate": 0.00016093947619333644, "loss": 1.6029, "step": 3736 }, { "epoch": 1.1569240387621131, "grad_norm": 0.28125, "learning_rate": 0.00016091999316341767, "loss": 1.5016, "step": 3737 }, { "epoch": 1.1572366364488902, "grad_norm": 0.24609375, "learning_rate": 0.0001609005064556813, "loss": 1.8828, "step": 3738 }, { "epoch": 1.1575492341356675, "grad_norm": 0.25, "learning_rate": 0.00016088101607130377, "loss": 1.2023, "step": 3739 }, { "epoch": 1.1578618318224445, "grad_norm": 0.28125, "learning_rate": 0.00016086152201146166, "loss": 1.6386, "step": 3740 }, { "epoch": 1.1581744295092216, "grad_norm": 0.271484375, "learning_rate": 0.00016084202427733198, "loss": 1.413, "step": 3741 }, { "epoch": 1.1584870271959988, "grad_norm": 0.24609375, "learning_rate": 0.00016082252287009173, "loss": 1.6755, "step": 3742 }, { "epoch": 1.1587996248827759, "grad_norm": 0.27734375, "learning_rate": 0.00016080301779091826, "loss": 1.8094, "step": 3743 }, { "epoch": 1.159112222569553, "grad_norm": 0.27734375, "learning_rate": 0.00016078350904098914, "loss": 1.5788, "step": 3744 }, { "epoch": 1.15942482025633, "grad_norm": 0.25, "learning_rate": 0.00016076399662148208, "loss": 1.2638, "step": 3745 }, { "epoch": 1.1597374179431073, "grad_norm": 0.25390625, "learning_rate": 0.00016074448053357516, "loss": 1.6745, "step": 3746 }, { "epoch": 1.1600500156298843, "grad_norm": 0.259765625, "learning_rate": 0.0001607249607784465, "loss": 1.5774, "step": 3747 }, { "epoch": 1.1603626133166616, "grad_norm": 0.271484375, "learning_rate": 0.00016070543735727464, "loss": 1.7768, "step": 3748 }, { "epoch": 1.1606752110034386, "grad_norm": 0.25, "learning_rate": 0.00016068591027123812, "loss": 1.4988, "step": 3749 }, { "epoch": 1.1609878086902157, "grad_norm": 0.271484375, "learning_rate": 0.00016066637952151587, "loss": 1.7819, "step": 3750 }, { "epoch": 1.1613004063769927, "grad_norm": 0.25, "learning_rate": 0.000160646845109287, "loss": 1.7408, "step": 3751 }, { "epoch": 1.16161300406377, "grad_norm": 0.267578125, "learning_rate": 0.00016062730703573076, "loss": 1.6412, "step": 3752 }, { "epoch": 1.161925601750547, "grad_norm": 0.27734375, "learning_rate": 0.00016060776530202678, "loss": 1.6517, "step": 3753 }, { "epoch": 1.162238199437324, "grad_norm": 0.275390625, "learning_rate": 0.00016058821990935475, "loss": 1.4441, "step": 3754 }, { "epoch": 1.1625507971241014, "grad_norm": 0.259765625, "learning_rate": 0.0001605686708588947, "loss": 1.4506, "step": 3755 }, { "epoch": 1.1628633948108784, "grad_norm": 0.26171875, "learning_rate": 0.0001605491181518268, "loss": 1.5241, "step": 3756 }, { "epoch": 1.1631759924976555, "grad_norm": 0.27734375, "learning_rate": 0.00016052956178933147, "loss": 1.722, "step": 3757 }, { "epoch": 1.1634885901844325, "grad_norm": 0.25390625, "learning_rate": 0.00016051000177258934, "loss": 1.6285, "step": 3758 }, { "epoch": 1.1638011878712098, "grad_norm": 0.337890625, "learning_rate": 0.00016049043810278132, "loss": 2.0735, "step": 3759 }, { "epoch": 1.1641137855579868, "grad_norm": 0.265625, "learning_rate": 0.0001604708707810885, "loss": 1.5888, "step": 3760 }, { "epoch": 1.164426383244764, "grad_norm": 0.28515625, "learning_rate": 0.0001604512998086921, "loss": 1.3625, "step": 3761 }, { "epoch": 1.1647389809315412, "grad_norm": 0.2734375, "learning_rate": 0.00016043172518677372, "loss": 1.5767, "step": 3762 }, { "epoch": 1.1650515786183182, "grad_norm": 0.26171875, "learning_rate": 0.00016041214691651508, "loss": 1.6241, "step": 3763 }, { "epoch": 1.1653641763050953, "grad_norm": 0.271484375, "learning_rate": 0.00016039256499909813, "loss": 1.7103, "step": 3764 }, { "epoch": 1.1656767739918725, "grad_norm": 0.259765625, "learning_rate": 0.00016037297943570508, "loss": 1.7265, "step": 3765 }, { "epoch": 1.1659893716786496, "grad_norm": 0.263671875, "learning_rate": 0.00016035339022751836, "loss": 1.6959, "step": 3766 }, { "epoch": 1.1663019693654266, "grad_norm": 0.26171875, "learning_rate": 0.00016033379737572054, "loss": 1.408, "step": 3767 }, { "epoch": 1.166614567052204, "grad_norm": 0.26171875, "learning_rate": 0.0001603142008814945, "loss": 1.4535, "step": 3768 }, { "epoch": 1.166927164738981, "grad_norm": 0.259765625, "learning_rate": 0.00016029460074602325, "loss": 1.5532, "step": 3769 }, { "epoch": 1.167239762425758, "grad_norm": 0.2734375, "learning_rate": 0.00016027499697049015, "loss": 1.5624, "step": 3770 }, { "epoch": 1.167552360112535, "grad_norm": 0.2734375, "learning_rate": 0.00016025538955607865, "loss": 1.7583, "step": 3771 }, { "epoch": 1.1678649577993123, "grad_norm": 0.267578125, "learning_rate": 0.00016023577850397252, "loss": 1.6003, "step": 3772 }, { "epoch": 1.1681775554860894, "grad_norm": 0.267578125, "learning_rate": 0.0001602161638153557, "loss": 1.528, "step": 3773 }, { "epoch": 1.1684901531728666, "grad_norm": 0.271484375, "learning_rate": 0.00016019654549141233, "loss": 1.4343, "step": 3774 }, { "epoch": 1.1688027508596437, "grad_norm": 0.271484375, "learning_rate": 0.0001601769235333268, "loss": 1.38, "step": 3775 }, { "epoch": 1.1691153485464207, "grad_norm": 0.26171875, "learning_rate": 0.00016015729794228366, "loss": 1.6298, "step": 3776 }, { "epoch": 1.1694279462331978, "grad_norm": 0.26171875, "learning_rate": 0.00016013766871946785, "loss": 1.5653, "step": 3777 }, { "epoch": 1.169740543919975, "grad_norm": 0.259765625, "learning_rate": 0.0001601180358660643, "loss": 1.3986, "step": 3778 }, { "epoch": 1.1700531416067521, "grad_norm": 0.267578125, "learning_rate": 0.00016009839938325836, "loss": 1.5358, "step": 3779 }, { "epoch": 1.1703657392935292, "grad_norm": 0.26171875, "learning_rate": 0.00016007875927223544, "loss": 1.7622, "step": 3780 }, { "epoch": 1.1706783369803064, "grad_norm": 0.265625, "learning_rate": 0.00016005911553418126, "loss": 1.4816, "step": 3781 }, { "epoch": 1.1709909346670835, "grad_norm": 0.265625, "learning_rate": 0.00016003946817028173, "loss": 1.5785, "step": 3782 }, { "epoch": 1.1713035323538605, "grad_norm": 0.27734375, "learning_rate": 0.00016001981718172302, "loss": 1.6437, "step": 3783 }, { "epoch": 1.1716161300406376, "grad_norm": 0.263671875, "learning_rate": 0.00016000016256969145, "loss": 1.5012, "step": 3784 }, { "epoch": 1.1719287277274149, "grad_norm": 0.265625, "learning_rate": 0.00015998050433537362, "loss": 1.8112, "step": 3785 }, { "epoch": 1.172241325414192, "grad_norm": 0.2734375, "learning_rate": 0.0001599608424799563, "loss": 1.7065, "step": 3786 }, { "epoch": 1.1725539231009692, "grad_norm": 0.287109375, "learning_rate": 0.00015994117700462648, "loss": 1.6396, "step": 3787 }, { "epoch": 1.1728665207877462, "grad_norm": 0.26953125, "learning_rate": 0.00015992150791057147, "loss": 1.5667, "step": 3788 }, { "epoch": 1.1731791184745233, "grad_norm": 0.2734375, "learning_rate": 0.00015990183519897866, "loss": 1.5552, "step": 3789 }, { "epoch": 1.1734917161613003, "grad_norm": 0.251953125, "learning_rate": 0.0001598821588710357, "loss": 1.3596, "step": 3790 }, { "epoch": 1.1738043138480776, "grad_norm": 0.267578125, "learning_rate": 0.00015986247892793053, "loss": 1.6007, "step": 3791 }, { "epoch": 1.1741169115348546, "grad_norm": 0.26171875, "learning_rate": 0.0001598427953708512, "loss": 1.6813, "step": 3792 }, { "epoch": 1.1744295092216317, "grad_norm": 0.251953125, "learning_rate": 0.00015982310820098608, "loss": 1.6009, "step": 3793 }, { "epoch": 1.174742106908409, "grad_norm": 0.248046875, "learning_rate": 0.00015980341741952367, "loss": 1.4627, "step": 3794 }, { "epoch": 1.175054704595186, "grad_norm": 0.263671875, "learning_rate": 0.00015978372302765273, "loss": 1.5302, "step": 3795 }, { "epoch": 1.175367302281963, "grad_norm": 0.2734375, "learning_rate": 0.00015976402502656227, "loss": 1.6636, "step": 3796 }, { "epoch": 1.1756798999687401, "grad_norm": 0.255859375, "learning_rate": 0.00015974432341744142, "loss": 1.4919, "step": 3797 }, { "epoch": 1.1759924976555174, "grad_norm": 0.26953125, "learning_rate": 0.00015972461820147968, "loss": 1.5591, "step": 3798 }, { "epoch": 1.1763050953422944, "grad_norm": 0.302734375, "learning_rate": 0.00015970490937986662, "loss": 2.0899, "step": 3799 }, { "epoch": 1.1766176930290717, "grad_norm": 0.26171875, "learning_rate": 0.0001596851969537921, "loss": 1.6985, "step": 3800 }, { "epoch": 1.1769302907158488, "grad_norm": 0.26171875, "learning_rate": 0.00015966548092444618, "loss": 1.448, "step": 3801 }, { "epoch": 1.1772428884026258, "grad_norm": 0.287109375, "learning_rate": 0.0001596457612930191, "loss": 1.5943, "step": 3802 }, { "epoch": 1.1775554860894029, "grad_norm": 0.255859375, "learning_rate": 0.00015962603806070146, "loss": 1.6764, "step": 3803 }, { "epoch": 1.1778680837761801, "grad_norm": 0.263671875, "learning_rate": 0.0001596063112286839, "loss": 1.6969, "step": 3804 }, { "epoch": 1.1781806814629572, "grad_norm": 0.271484375, "learning_rate": 0.00015958658079815737, "loss": 1.4794, "step": 3805 }, { "epoch": 1.1784932791497342, "grad_norm": 0.255859375, "learning_rate": 0.00015956684677031303, "loss": 1.6924, "step": 3806 }, { "epoch": 1.1788058768365115, "grad_norm": 0.2578125, "learning_rate": 0.00015954710914634226, "loss": 1.487, "step": 3807 }, { "epoch": 1.1791184745232886, "grad_norm": 0.26171875, "learning_rate": 0.0001595273679274366, "loss": 1.7106, "step": 3808 }, { "epoch": 1.1794310722100656, "grad_norm": 0.265625, "learning_rate": 0.0001595076231147879, "loss": 1.5495, "step": 3809 }, { "epoch": 1.1797436698968427, "grad_norm": 0.2578125, "learning_rate": 0.00015948787470958817, "loss": 1.7602, "step": 3810 }, { "epoch": 1.18005626758362, "grad_norm": 0.328125, "learning_rate": 0.0001594681227130296, "loss": 2.4393, "step": 3811 }, { "epoch": 1.180368865270397, "grad_norm": 0.267578125, "learning_rate": 0.00015944836712630472, "loss": 1.4862, "step": 3812 }, { "epoch": 1.180681462957174, "grad_norm": 0.271484375, "learning_rate": 0.00015942860795060618, "loss": 1.5807, "step": 3813 }, { "epoch": 1.1809940606439513, "grad_norm": 0.267578125, "learning_rate": 0.00015940884518712676, "loss": 1.6587, "step": 3814 }, { "epoch": 1.1813066583307283, "grad_norm": 0.259765625, "learning_rate": 0.00015938907883705973, "loss": 1.6634, "step": 3815 }, { "epoch": 1.1816192560175054, "grad_norm": 0.263671875, "learning_rate": 0.0001593693089015983, "loss": 1.4222, "step": 3816 }, { "epoch": 1.1819318537042827, "grad_norm": 0.271484375, "learning_rate": 0.00015934953538193603, "loss": 1.5701, "step": 3817 }, { "epoch": 1.1822444513910597, "grad_norm": 0.259765625, "learning_rate": 0.0001593297582792667, "loss": 1.5497, "step": 3818 }, { "epoch": 1.1825570490778368, "grad_norm": 0.25390625, "learning_rate": 0.00015930997759478426, "loss": 1.4805, "step": 3819 }, { "epoch": 1.182869646764614, "grad_norm": 0.265625, "learning_rate": 0.00015929019332968286, "loss": 1.6392, "step": 3820 }, { "epoch": 1.183182244451391, "grad_norm": 0.287109375, "learning_rate": 0.00015927040548515696, "loss": 1.6403, "step": 3821 }, { "epoch": 1.1834948421381681, "grad_norm": 0.259765625, "learning_rate": 0.00015925061406240116, "loss": 1.5835, "step": 3822 }, { "epoch": 1.1838074398249452, "grad_norm": 0.287109375, "learning_rate": 0.00015923081906261025, "loss": 1.5995, "step": 3823 }, { "epoch": 1.1841200375117225, "grad_norm": 0.26171875, "learning_rate": 0.00015921102048697936, "loss": 1.7164, "step": 3824 }, { "epoch": 1.1844326351984995, "grad_norm": 0.271484375, "learning_rate": 0.00015919121833670368, "loss": 1.3208, "step": 3825 }, { "epoch": 1.1847452328852766, "grad_norm": 0.26171875, "learning_rate": 0.00015917141261297875, "loss": 1.6998, "step": 3826 }, { "epoch": 1.1850578305720538, "grad_norm": 0.259765625, "learning_rate": 0.0001591516033170002, "loss": 1.7813, "step": 3827 }, { "epoch": 1.1853704282588309, "grad_norm": 0.2470703125, "learning_rate": 0.000159131790449964, "loss": 1.249, "step": 3828 }, { "epoch": 1.185683025945608, "grad_norm": 0.259765625, "learning_rate": 0.00015911197401306625, "loss": 1.2939, "step": 3829 }, { "epoch": 1.1859956236323852, "grad_norm": 0.251953125, "learning_rate": 0.0001590921540075033, "loss": 1.6855, "step": 3830 }, { "epoch": 1.1863082213191622, "grad_norm": 0.26953125, "learning_rate": 0.00015907233043447173, "loss": 1.8501, "step": 3831 }, { "epoch": 1.1866208190059393, "grad_norm": 0.263671875, "learning_rate": 0.00015905250329516829, "loss": 1.496, "step": 3832 }, { "epoch": 1.1869334166927166, "grad_norm": 0.25390625, "learning_rate": 0.00015903267259078995, "loss": 1.6494, "step": 3833 }, { "epoch": 1.1872460143794936, "grad_norm": 0.283203125, "learning_rate": 0.00015901283832253397, "loss": 1.6233, "step": 3834 }, { "epoch": 1.1875586120662707, "grad_norm": 0.287109375, "learning_rate": 0.00015899300049159772, "loss": 1.8152, "step": 3835 }, { "epoch": 1.1878712097530477, "grad_norm": 0.26953125, "learning_rate": 0.00015897315909917887, "loss": 1.652, "step": 3836 }, { "epoch": 1.188183807439825, "grad_norm": 0.271484375, "learning_rate": 0.00015895331414647523, "loss": 1.4338, "step": 3837 }, { "epoch": 1.188496405126602, "grad_norm": 0.259765625, "learning_rate": 0.0001589334656346849, "loss": 1.5069, "step": 3838 }, { "epoch": 1.188809002813379, "grad_norm": 0.283203125, "learning_rate": 0.00015891361356500618, "loss": 1.5154, "step": 3839 }, { "epoch": 1.1891216005001564, "grad_norm": 0.28125, "learning_rate": 0.0001588937579386375, "loss": 1.7832, "step": 3840 }, { "epoch": 1.1894341981869334, "grad_norm": 0.263671875, "learning_rate": 0.0001588738987567776, "loss": 1.5498, "step": 3841 }, { "epoch": 1.1897467958737105, "grad_norm": 0.271484375, "learning_rate": 0.00015885403602062544, "loss": 1.5846, "step": 3842 }, { "epoch": 1.1900593935604877, "grad_norm": 0.265625, "learning_rate": 0.00015883416973138013, "loss": 1.6232, "step": 3843 }, { "epoch": 1.1903719912472648, "grad_norm": 0.26171875, "learning_rate": 0.00015881429989024096, "loss": 1.7651, "step": 3844 }, { "epoch": 1.1906845889340418, "grad_norm": 0.271484375, "learning_rate": 0.0001587944264984076, "loss": 1.3701, "step": 3845 }, { "epoch": 1.190997186620819, "grad_norm": 0.259765625, "learning_rate": 0.0001587745495570798, "loss": 1.449, "step": 3846 }, { "epoch": 1.1913097843075962, "grad_norm": 0.26171875, "learning_rate": 0.00015875466906745752, "loss": 1.3349, "step": 3847 }, { "epoch": 1.1916223819943732, "grad_norm": 0.265625, "learning_rate": 0.00015873478503074102, "loss": 1.5907, "step": 3848 }, { "epoch": 1.1919349796811503, "grad_norm": 0.26171875, "learning_rate": 0.0001587148974481307, "loss": 1.5187, "step": 3849 }, { "epoch": 1.1922475773679275, "grad_norm": 0.275390625, "learning_rate": 0.0001586950063208272, "loss": 1.5337, "step": 3850 }, { "epoch": 1.1925601750547046, "grad_norm": 0.25390625, "learning_rate": 0.00015867511165003134, "loss": 1.5069, "step": 3851 }, { "epoch": 1.1928727727414816, "grad_norm": 0.275390625, "learning_rate": 0.00015865521343694426, "loss": 1.5727, "step": 3852 }, { "epoch": 1.193185370428259, "grad_norm": 0.283203125, "learning_rate": 0.00015863531168276718, "loss": 1.7666, "step": 3853 }, { "epoch": 1.193497968115036, "grad_norm": 0.28125, "learning_rate": 0.00015861540638870163, "loss": 1.82, "step": 3854 }, { "epoch": 1.193810565801813, "grad_norm": 0.275390625, "learning_rate": 0.0001585954975559493, "loss": 1.6074, "step": 3855 }, { "epoch": 1.1941231634885903, "grad_norm": 0.25390625, "learning_rate": 0.00015857558518571208, "loss": 1.38, "step": 3856 }, { "epoch": 1.1944357611753673, "grad_norm": 0.271484375, "learning_rate": 0.00015855566927919216, "loss": 1.3888, "step": 3857 }, { "epoch": 1.1947483588621444, "grad_norm": 0.259765625, "learning_rate": 0.00015853574983759185, "loss": 1.5808, "step": 3858 }, { "epoch": 1.1950609565489216, "grad_norm": 0.283203125, "learning_rate": 0.00015851582686211377, "loss": 1.333, "step": 3859 }, { "epoch": 1.1953735542356987, "grad_norm": 0.26953125, "learning_rate": 0.00015849590035396064, "loss": 1.389, "step": 3860 }, { "epoch": 1.1956861519224757, "grad_norm": 0.26953125, "learning_rate": 0.00015847597031433546, "loss": 1.6015, "step": 3861 }, { "epoch": 1.1959987496092528, "grad_norm": 0.265625, "learning_rate": 0.00015845603674444144, "loss": 1.5003, "step": 3862 }, { "epoch": 1.19631134729603, "grad_norm": 0.2734375, "learning_rate": 0.00015843609964548197, "loss": 1.3325, "step": 3863 }, { "epoch": 1.196623944982807, "grad_norm": 0.2734375, "learning_rate": 0.0001584161590186607, "loss": 1.5014, "step": 3864 }, { "epoch": 1.1969365426695842, "grad_norm": 0.25, "learning_rate": 0.00015839621486518147, "loss": 1.5025, "step": 3865 }, { "epoch": 1.1972491403563614, "grad_norm": 0.263671875, "learning_rate": 0.00015837626718624836, "loss": 1.509, "step": 3866 }, { "epoch": 1.1975617380431385, "grad_norm": 0.267578125, "learning_rate": 0.0001583563159830656, "loss": 1.5751, "step": 3867 }, { "epoch": 1.1978743357299155, "grad_norm": 0.28125, "learning_rate": 0.00015833636125683767, "loss": 1.7019, "step": 3868 }, { "epoch": 1.1981869334166928, "grad_norm": 0.275390625, "learning_rate": 0.00015831640300876927, "loss": 1.2996, "step": 3869 }, { "epoch": 1.1984995311034699, "grad_norm": 0.2734375, "learning_rate": 0.0001582964412400653, "loss": 1.8939, "step": 3870 }, { "epoch": 1.198812128790247, "grad_norm": 0.27734375, "learning_rate": 0.0001582764759519309, "loss": 1.6502, "step": 3871 }, { "epoch": 1.1991247264770242, "grad_norm": 0.265625, "learning_rate": 0.0001582565071455714, "loss": 1.5804, "step": 3872 }, { "epoch": 1.1994373241638012, "grad_norm": 0.26171875, "learning_rate": 0.0001582365348221923, "loss": 1.4524, "step": 3873 }, { "epoch": 1.1997499218505783, "grad_norm": 0.283203125, "learning_rate": 0.0001582165589829994, "loss": 1.5058, "step": 3874 }, { "epoch": 1.2000625195373553, "grad_norm": 0.263671875, "learning_rate": 0.00015819657962919863, "loss": 1.4116, "step": 3875 }, { "epoch": 1.2003751172241326, "grad_norm": 0.265625, "learning_rate": 0.00015817659676199618, "loss": 1.7421, "step": 3876 }, { "epoch": 1.2006877149109096, "grad_norm": 0.26953125, "learning_rate": 0.00015815661038259848, "loss": 1.4993, "step": 3877 }, { "epoch": 1.2010003125976867, "grad_norm": 0.2578125, "learning_rate": 0.0001581366204922121, "loss": 1.5964, "step": 3878 }, { "epoch": 1.201312910284464, "grad_norm": 0.267578125, "learning_rate": 0.00015811662709204382, "loss": 1.6863, "step": 3879 }, { "epoch": 1.201625507971241, "grad_norm": 0.259765625, "learning_rate": 0.0001580966301833007, "loss": 1.3213, "step": 3880 }, { "epoch": 1.201938105658018, "grad_norm": 0.248046875, "learning_rate": 0.00015807662976719005, "loss": 1.5946, "step": 3881 }, { "epoch": 1.2022507033447953, "grad_norm": 0.2734375, "learning_rate": 0.00015805662584491922, "loss": 1.5478, "step": 3882 }, { "epoch": 1.2025633010315724, "grad_norm": 0.2734375, "learning_rate": 0.0001580366184176959, "loss": 1.4345, "step": 3883 }, { "epoch": 1.2028758987183494, "grad_norm": 0.271484375, "learning_rate": 0.00015801660748672794, "loss": 1.3025, "step": 3884 }, { "epoch": 1.2031884964051267, "grad_norm": 0.27734375, "learning_rate": 0.00015799659305322348, "loss": 1.5366, "step": 3885 }, { "epoch": 1.2035010940919038, "grad_norm": 0.26953125, "learning_rate": 0.0001579765751183908, "loss": 1.5513, "step": 3886 }, { "epoch": 1.2038136917786808, "grad_norm": 0.271484375, "learning_rate": 0.00015795655368343838, "loss": 1.4599, "step": 3887 }, { "epoch": 1.2041262894654579, "grad_norm": 0.2490234375, "learning_rate": 0.00015793652874957498, "loss": 1.1852, "step": 3888 }, { "epoch": 1.2044388871522351, "grad_norm": 0.279296875, "learning_rate": 0.0001579165003180095, "loss": 1.5249, "step": 3889 }, { "epoch": 1.2047514848390122, "grad_norm": 0.263671875, "learning_rate": 0.0001578964683899511, "loss": 1.5092, "step": 3890 }, { "epoch": 1.2050640825257892, "grad_norm": 0.283203125, "learning_rate": 0.00015787643296660912, "loss": 1.8863, "step": 3891 }, { "epoch": 1.2053766802125665, "grad_norm": 0.2734375, "learning_rate": 0.00015785639404919315, "loss": 1.4129, "step": 3892 }, { "epoch": 1.2056892778993435, "grad_norm": 0.255859375, "learning_rate": 0.00015783635163891288, "loss": 1.5204, "step": 3893 }, { "epoch": 1.2060018755861206, "grad_norm": 0.263671875, "learning_rate": 0.0001578163057369784, "loss": 1.4376, "step": 3894 }, { "epoch": 1.2063144732728979, "grad_norm": 0.265625, "learning_rate": 0.0001577962563445999, "loss": 1.3734, "step": 3895 }, { "epoch": 1.206627070959675, "grad_norm": 0.267578125, "learning_rate": 0.0001577762034629877, "loss": 1.4842, "step": 3896 }, { "epoch": 1.206939668646452, "grad_norm": 0.26171875, "learning_rate": 0.00015775614709335253, "loss": 1.9202, "step": 3897 }, { "epoch": 1.2072522663332292, "grad_norm": 0.322265625, "learning_rate": 0.0001577360872369051, "loss": 2.3281, "step": 3898 }, { "epoch": 1.2075648640200063, "grad_norm": 0.265625, "learning_rate": 0.00015771602389485654, "loss": 1.658, "step": 3899 }, { "epoch": 1.2078774617067833, "grad_norm": 0.271484375, "learning_rate": 0.00015769595706841807, "loss": 1.6683, "step": 3900 }, { "epoch": 1.2081900593935604, "grad_norm": 0.265625, "learning_rate": 0.00015767588675880115, "loss": 1.2917, "step": 3901 }, { "epoch": 1.2085026570803377, "grad_norm": 0.263671875, "learning_rate": 0.00015765581296721742, "loss": 1.6233, "step": 3902 }, { "epoch": 1.2088152547671147, "grad_norm": 0.25390625, "learning_rate": 0.00015763573569487881, "loss": 1.5035, "step": 3903 }, { "epoch": 1.2091278524538918, "grad_norm": 0.2734375, "learning_rate": 0.0001576156549429974, "loss": 1.5276, "step": 3904 }, { "epoch": 1.209440450140669, "grad_norm": 0.25390625, "learning_rate": 0.00015759557071278547, "loss": 1.6975, "step": 3905 }, { "epoch": 1.209753047827446, "grad_norm": 0.26171875, "learning_rate": 0.00015757548300545556, "loss": 1.6439, "step": 3906 }, { "epoch": 1.2100656455142231, "grad_norm": 0.26171875, "learning_rate": 0.00015755539182222034, "loss": 1.3377, "step": 3907 }, { "epoch": 1.2103782432010004, "grad_norm": 0.26953125, "learning_rate": 0.0001575352971642928, "loss": 1.8499, "step": 3908 }, { "epoch": 1.2106908408877775, "grad_norm": 0.2578125, "learning_rate": 0.00015751519903288604, "loss": 1.694, "step": 3909 }, { "epoch": 1.2110034385745545, "grad_norm": 0.2578125, "learning_rate": 0.00015749509742921341, "loss": 1.6426, "step": 3910 }, { "epoch": 1.2113160362613318, "grad_norm": 0.26953125, "learning_rate": 0.00015747499235448852, "loss": 1.4628, "step": 3911 }, { "epoch": 1.2116286339481088, "grad_norm": 0.2734375, "learning_rate": 0.00015745488380992505, "loss": 1.3588, "step": 3912 }, { "epoch": 1.2119412316348859, "grad_norm": 0.259765625, "learning_rate": 0.00015743477179673709, "loss": 1.5574, "step": 3913 }, { "epoch": 1.212253829321663, "grad_norm": 0.26953125, "learning_rate": 0.00015741465631613873, "loss": 1.481, "step": 3914 }, { "epoch": 1.2125664270084402, "grad_norm": 0.271484375, "learning_rate": 0.0001573945373693444, "loss": 1.7328, "step": 3915 }, { "epoch": 1.2128790246952172, "grad_norm": 0.28515625, "learning_rate": 0.00015737441495756871, "loss": 1.4424, "step": 3916 }, { "epoch": 1.2131916223819943, "grad_norm": 0.271484375, "learning_rate": 0.00015735428908202645, "loss": 1.5498, "step": 3917 }, { "epoch": 1.2135042200687716, "grad_norm": 0.26171875, "learning_rate": 0.0001573341597439327, "loss": 1.673, "step": 3918 }, { "epoch": 1.2138168177555486, "grad_norm": 0.279296875, "learning_rate": 0.00015731402694450268, "loss": 1.3998, "step": 3919 }, { "epoch": 1.2141294154423257, "grad_norm": 0.267578125, "learning_rate": 0.00015729389068495182, "loss": 1.6412, "step": 3920 }, { "epoch": 1.214442013129103, "grad_norm": 0.259765625, "learning_rate": 0.00015727375096649576, "loss": 1.4878, "step": 3921 }, { "epoch": 1.21475461081588, "grad_norm": 0.283203125, "learning_rate": 0.00015725360779035035, "loss": 1.5505, "step": 3922 }, { "epoch": 1.215067208502657, "grad_norm": 0.263671875, "learning_rate": 0.0001572334611577317, "loss": 1.6475, "step": 3923 }, { "epoch": 1.2153798061894343, "grad_norm": 0.2734375, "learning_rate": 0.000157213311069856, "loss": 1.5155, "step": 3924 }, { "epoch": 1.2156924038762114, "grad_norm": 0.263671875, "learning_rate": 0.0001571931575279399, "loss": 1.5424, "step": 3925 }, { "epoch": 1.2160050015629884, "grad_norm": 0.263671875, "learning_rate": 0.00015717300053319996, "loss": 1.797, "step": 3926 }, { "epoch": 1.2163175992497655, "grad_norm": 0.265625, "learning_rate": 0.0001571528400868531, "loss": 1.382, "step": 3927 }, { "epoch": 1.2166301969365427, "grad_norm": 0.2578125, "learning_rate": 0.0001571326761901165, "loss": 1.3145, "step": 3928 }, { "epoch": 1.2169427946233198, "grad_norm": 0.259765625, "learning_rate": 0.0001571125088442074, "loss": 1.5764, "step": 3929 }, { "epoch": 1.2172553923100968, "grad_norm": 0.2578125, "learning_rate": 0.00015709233805034337, "loss": 1.4217, "step": 3930 }, { "epoch": 1.217567989996874, "grad_norm": 0.259765625, "learning_rate": 0.00015707216380974215, "loss": 1.4045, "step": 3931 }, { "epoch": 1.2178805876836512, "grad_norm": 0.26953125, "learning_rate": 0.00015705198612362165, "loss": 1.4157, "step": 3932 }, { "epoch": 1.2181931853704282, "grad_norm": 0.275390625, "learning_rate": 0.00015703180499320008, "loss": 1.6023, "step": 3933 }, { "epoch": 1.2185057830572055, "grad_norm": 0.263671875, "learning_rate": 0.00015701162041969574, "loss": 1.5164, "step": 3934 }, { "epoch": 1.2188183807439825, "grad_norm": 0.275390625, "learning_rate": 0.0001569914324043272, "loss": 2.0203, "step": 3935 }, { "epoch": 1.2191309784307596, "grad_norm": 0.2734375, "learning_rate": 0.0001569712409483133, "loss": 1.3004, "step": 3936 }, { "epoch": 1.2194435761175368, "grad_norm": 0.267578125, "learning_rate": 0.00015695104605287295, "loss": 1.7227, "step": 3937 }, { "epoch": 1.219756173804314, "grad_norm": 0.2578125, "learning_rate": 0.0001569308477192254, "loss": 1.5001, "step": 3938 }, { "epoch": 1.220068771491091, "grad_norm": 0.26953125, "learning_rate": 0.00015691064594859004, "loss": 1.3581, "step": 3939 }, { "epoch": 1.220381369177868, "grad_norm": 0.26953125, "learning_rate": 0.00015689044074218645, "loss": 1.7562, "step": 3940 }, { "epoch": 1.2206939668646453, "grad_norm": 0.2734375, "learning_rate": 0.00015687023210123443, "loss": 1.8915, "step": 3941 }, { "epoch": 1.2210065645514223, "grad_norm": 0.26171875, "learning_rate": 0.00015685002002695407, "loss": 1.4896, "step": 3942 }, { "epoch": 1.2213191622381994, "grad_norm": 0.26953125, "learning_rate": 0.00015682980452056552, "loss": 1.3309, "step": 3943 }, { "epoch": 1.2216317599249766, "grad_norm": 0.2578125, "learning_rate": 0.0001568095855832893, "loss": 1.673, "step": 3944 }, { "epoch": 1.2219443576117537, "grad_norm": 0.267578125, "learning_rate": 0.00015678936321634598, "loss": 1.2337, "step": 3945 }, { "epoch": 1.2222569552985307, "grad_norm": 0.283203125, "learning_rate": 0.0001567691374209564, "loss": 1.3578, "step": 3946 }, { "epoch": 1.222569552985308, "grad_norm": 0.279296875, "learning_rate": 0.00015674890819834168, "loss": 1.561, "step": 3947 }, { "epoch": 1.222882150672085, "grad_norm": 0.27734375, "learning_rate": 0.00015672867554972306, "loss": 1.3768, "step": 3948 }, { "epoch": 1.223194748358862, "grad_norm": 0.271484375, "learning_rate": 0.000156708439476322, "loss": 1.6251, "step": 3949 }, { "epoch": 1.2235073460456394, "grad_norm": 0.2734375, "learning_rate": 0.0001566881999793602, "loss": 1.3627, "step": 3950 }, { "epoch": 1.2238199437324164, "grad_norm": 0.255859375, "learning_rate": 0.0001566679570600595, "loss": 1.6194, "step": 3951 }, { "epoch": 1.2241325414191935, "grad_norm": 0.2734375, "learning_rate": 0.00015664771071964207, "loss": 1.4797, "step": 3952 }, { "epoch": 1.2244451391059705, "grad_norm": 0.263671875, "learning_rate": 0.0001566274609593301, "loss": 1.5906, "step": 3953 }, { "epoch": 1.2247577367927478, "grad_norm": 0.279296875, "learning_rate": 0.00015660720778034616, "loss": 1.4935, "step": 3954 }, { "epoch": 1.2250703344795248, "grad_norm": 0.26171875, "learning_rate": 0.000156586951183913, "loss": 1.5746, "step": 3955 }, { "epoch": 1.225382932166302, "grad_norm": 0.28515625, "learning_rate": 0.00015656669117125344, "loss": 1.7911, "step": 3956 }, { "epoch": 1.2256955298530792, "grad_norm": 0.26171875, "learning_rate": 0.00015654642774359068, "loss": 1.5117, "step": 3957 }, { "epoch": 1.2260081275398562, "grad_norm": 0.2734375, "learning_rate": 0.000156526160902148, "loss": 1.6573, "step": 3958 }, { "epoch": 1.2263207252266333, "grad_norm": 0.263671875, "learning_rate": 0.00015650589064814896, "loss": 1.5288, "step": 3959 }, { "epoch": 1.2266333229134103, "grad_norm": 0.279296875, "learning_rate": 0.00015648561698281728, "loss": 1.458, "step": 3960 }, { "epoch": 1.2269459206001876, "grad_norm": 0.265625, "learning_rate": 0.00015646533990737696, "loss": 1.2534, "step": 3961 }, { "epoch": 1.2272585182869646, "grad_norm": 0.25390625, "learning_rate": 0.00015644505942305207, "loss": 1.6487, "step": 3962 }, { "epoch": 1.227571115973742, "grad_norm": 0.26171875, "learning_rate": 0.00015642477553106702, "loss": 1.637, "step": 3963 }, { "epoch": 1.227883713660519, "grad_norm": 0.263671875, "learning_rate": 0.00015640448823264638, "loss": 1.6421, "step": 3964 }, { "epoch": 1.228196311347296, "grad_norm": 0.2734375, "learning_rate": 0.00015638419752901493, "loss": 1.4292, "step": 3965 }, { "epoch": 1.228508909034073, "grad_norm": 0.2578125, "learning_rate": 0.0001563639034213976, "loss": 1.542, "step": 3966 }, { "epoch": 1.2288215067208503, "grad_norm": 0.267578125, "learning_rate": 0.0001563436059110196, "loss": 1.4943, "step": 3967 }, { "epoch": 1.2291341044076274, "grad_norm": 0.2578125, "learning_rate": 0.00015632330499910633, "loss": 1.4636, "step": 3968 }, { "epoch": 1.2294467020944044, "grad_norm": 0.2578125, "learning_rate": 0.00015630300068688333, "loss": 1.5381, "step": 3969 }, { "epoch": 1.2297592997811817, "grad_norm": 0.271484375, "learning_rate": 0.00015628269297557646, "loss": 1.4995, "step": 3970 }, { "epoch": 1.2300718974679588, "grad_norm": 0.267578125, "learning_rate": 0.00015626238186641168, "loss": 1.4239, "step": 3971 }, { "epoch": 1.2303844951547358, "grad_norm": 0.271484375, "learning_rate": 0.0001562420673606152, "loss": 1.633, "step": 3972 }, { "epoch": 1.2306970928415129, "grad_norm": 0.29296875, "learning_rate": 0.00015622174945941346, "loss": 1.6459, "step": 3973 }, { "epoch": 1.2310096905282901, "grad_norm": 0.279296875, "learning_rate": 0.00015620142816403308, "loss": 1.6795, "step": 3974 }, { "epoch": 1.2313222882150672, "grad_norm": 0.279296875, "learning_rate": 0.0001561811034757008, "loss": 1.4258, "step": 3975 }, { "epoch": 1.2316348859018444, "grad_norm": 0.2734375, "learning_rate": 0.00015616077539564377, "loss": 1.66, "step": 3976 }, { "epoch": 1.2319474835886215, "grad_norm": 0.275390625, "learning_rate": 0.00015614044392508913, "loss": 1.5547, "step": 3977 }, { "epoch": 1.2322600812753985, "grad_norm": 0.26953125, "learning_rate": 0.00015612010906526438, "loss": 1.6773, "step": 3978 }, { "epoch": 1.2325726789621756, "grad_norm": 0.255859375, "learning_rate": 0.00015609977081739712, "loss": 1.2064, "step": 3979 }, { "epoch": 1.2328852766489529, "grad_norm": 0.27734375, "learning_rate": 0.00015607942918271519, "loss": 1.4814, "step": 3980 }, { "epoch": 1.23319787433573, "grad_norm": 0.259765625, "learning_rate": 0.00015605908416244666, "loss": 1.7813, "step": 3981 }, { "epoch": 1.233510472022507, "grad_norm": 0.26953125, "learning_rate": 0.00015603873575781977, "loss": 1.5441, "step": 3982 }, { "epoch": 1.2338230697092842, "grad_norm": 0.267578125, "learning_rate": 0.00015601838397006303, "loss": 1.7521, "step": 3983 }, { "epoch": 1.2341356673960613, "grad_norm": 0.271484375, "learning_rate": 0.00015599802880040503, "loss": 1.4041, "step": 3984 }, { "epoch": 1.2344482650828383, "grad_norm": 0.259765625, "learning_rate": 0.00015597767025007472, "loss": 1.814, "step": 3985 }, { "epoch": 1.2347608627696154, "grad_norm": 0.265625, "learning_rate": 0.00015595730832030106, "loss": 1.6398, "step": 3986 }, { "epoch": 1.2350734604563927, "grad_norm": 0.2734375, "learning_rate": 0.00015593694301231347, "loss": 1.5699, "step": 3987 }, { "epoch": 1.2353860581431697, "grad_norm": 0.267578125, "learning_rate": 0.00015591657432734128, "loss": 1.526, "step": 3988 }, { "epoch": 1.235698655829947, "grad_norm": 0.26953125, "learning_rate": 0.00015589620226661425, "loss": 1.6643, "step": 3989 }, { "epoch": 1.236011253516724, "grad_norm": 0.267578125, "learning_rate": 0.0001558758268313623, "loss": 1.6431, "step": 3990 }, { "epoch": 1.236323851203501, "grad_norm": 0.279296875, "learning_rate": 0.00015585544802281545, "loss": 1.4348, "step": 3991 }, { "epoch": 1.2366364488902781, "grad_norm": 0.263671875, "learning_rate": 0.00015583506584220403, "loss": 1.6411, "step": 3992 }, { "epoch": 1.2369490465770554, "grad_norm": 0.287109375, "learning_rate": 0.00015581468029075854, "loss": 1.6184, "step": 3993 }, { "epoch": 1.2372616442638324, "grad_norm": 0.27734375, "learning_rate": 0.00015579429136970967, "loss": 1.5082, "step": 3994 }, { "epoch": 1.2375742419506095, "grad_norm": 0.279296875, "learning_rate": 0.00015577389908028836, "loss": 1.5839, "step": 3995 }, { "epoch": 1.2378868396373868, "grad_norm": 0.267578125, "learning_rate": 0.00015575350342372562, "loss": 1.5707, "step": 3996 }, { "epoch": 1.2381994373241638, "grad_norm": 0.28515625, "learning_rate": 0.00015573310440125288, "loss": 1.7643, "step": 3997 }, { "epoch": 1.2385120350109409, "grad_norm": 0.26953125, "learning_rate": 0.0001557127020141016, "loss": 1.4169, "step": 3998 }, { "epoch": 1.238824632697718, "grad_norm": 0.26953125, "learning_rate": 0.0001556922962635035, "loss": 1.6456, "step": 3999 }, { "epoch": 1.2391372303844952, "grad_norm": 0.265625, "learning_rate": 0.0001556718871506905, "loss": 1.8587, "step": 4000 }, { "epoch": 1.2394498280712722, "grad_norm": 0.27734375, "learning_rate": 0.00015565147467689477, "loss": 1.7377, "step": 4001 }, { "epoch": 1.2397624257580495, "grad_norm": 0.2734375, "learning_rate": 0.00015563105884334853, "loss": 1.7558, "step": 4002 }, { "epoch": 1.2400750234448266, "grad_norm": 0.275390625, "learning_rate": 0.00015561063965128442, "loss": 1.4029, "step": 4003 }, { "epoch": 1.2403876211316036, "grad_norm": 0.26171875, "learning_rate": 0.0001555902171019351, "loss": 1.7066, "step": 4004 }, { "epoch": 1.2407002188183807, "grad_norm": 0.287109375, "learning_rate": 0.00015556979119653357, "loss": 1.6246, "step": 4005 }, { "epoch": 1.241012816505158, "grad_norm": 0.255859375, "learning_rate": 0.00015554936193631292, "loss": 1.3696, "step": 4006 }, { "epoch": 1.241325414191935, "grad_norm": 0.255859375, "learning_rate": 0.0001555289293225065, "loss": 1.4733, "step": 4007 }, { "epoch": 1.241638011878712, "grad_norm": 0.267578125, "learning_rate": 0.00015550849335634786, "loss": 1.6661, "step": 4008 }, { "epoch": 1.2419506095654893, "grad_norm": 0.26171875, "learning_rate": 0.00015548805403907073, "loss": 1.3743, "step": 4009 }, { "epoch": 1.2422632072522664, "grad_norm": 0.28515625, "learning_rate": 0.00015546761137190905, "loss": 1.4139, "step": 4010 }, { "epoch": 1.2425758049390434, "grad_norm": 0.26171875, "learning_rate": 0.000155447165356097, "loss": 1.7938, "step": 4011 }, { "epoch": 1.2428884026258205, "grad_norm": 0.28515625, "learning_rate": 0.00015542671599286893, "loss": 1.4203, "step": 4012 }, { "epoch": 1.2432010003125977, "grad_norm": 0.267578125, "learning_rate": 0.0001554062632834594, "loss": 1.5658, "step": 4013 }, { "epoch": 1.2435135979993748, "grad_norm": 0.28125, "learning_rate": 0.00015538580722910313, "loss": 1.726, "step": 4014 }, { "epoch": 1.2438261956861518, "grad_norm": 0.259765625, "learning_rate": 0.0001553653478310351, "loss": 1.3631, "step": 4015 }, { "epoch": 1.244138793372929, "grad_norm": 0.275390625, "learning_rate": 0.00015534488509049048, "loss": 1.6755, "step": 4016 }, { "epoch": 1.2444513910597061, "grad_norm": 0.275390625, "learning_rate": 0.0001553244190087046, "loss": 1.4839, "step": 4017 }, { "epoch": 1.2447639887464832, "grad_norm": 0.265625, "learning_rate": 0.0001553039495869131, "loss": 1.6007, "step": 4018 }, { "epoch": 1.2450765864332605, "grad_norm": 0.275390625, "learning_rate": 0.00015528347682635163, "loss": 1.6159, "step": 4019 }, { "epoch": 1.2453891841200375, "grad_norm": 0.26171875, "learning_rate": 0.0001552630007282562, "loss": 1.4538, "step": 4020 }, { "epoch": 1.2457017818068146, "grad_norm": 0.2734375, "learning_rate": 0.00015524252129386302, "loss": 1.4477, "step": 4021 }, { "epoch": 1.2460143794935918, "grad_norm": 0.275390625, "learning_rate": 0.00015522203852440843, "loss": 1.6675, "step": 4022 }, { "epoch": 1.246326977180369, "grad_norm": 0.25390625, "learning_rate": 0.00015520155242112904, "loss": 1.5112, "step": 4023 }, { "epoch": 1.246639574867146, "grad_norm": 0.263671875, "learning_rate": 0.00015518106298526157, "loss": 1.4082, "step": 4024 }, { "epoch": 1.246952172553923, "grad_norm": 0.2734375, "learning_rate": 0.000155160570218043, "loss": 1.5276, "step": 4025 }, { "epoch": 1.2472647702407003, "grad_norm": 0.267578125, "learning_rate": 0.00015514007412071053, "loss": 1.597, "step": 4026 }, { "epoch": 1.2475773679274773, "grad_norm": 0.2734375, "learning_rate": 0.00015511957469450146, "loss": 1.5105, "step": 4027 }, { "epoch": 1.2478899656142544, "grad_norm": 0.279296875, "learning_rate": 0.0001550990719406535, "loss": 1.4222, "step": 4028 }, { "epoch": 1.2482025633010316, "grad_norm": 0.251953125, "learning_rate": 0.0001550785658604043, "loss": 1.6306, "step": 4029 }, { "epoch": 1.2485151609878087, "grad_norm": 0.263671875, "learning_rate": 0.00015505805645499193, "loss": 1.3462, "step": 4030 }, { "epoch": 1.2488277586745857, "grad_norm": 0.263671875, "learning_rate": 0.00015503754372565452, "loss": 1.5608, "step": 4031 }, { "epoch": 1.249140356361363, "grad_norm": 0.279296875, "learning_rate": 0.00015501702767363045, "loss": 1.4008, "step": 4032 }, { "epoch": 1.24945295404814, "grad_norm": 0.271484375, "learning_rate": 0.0001549965083001583, "loss": 1.6971, "step": 4033 }, { "epoch": 1.249765551734917, "grad_norm": 0.265625, "learning_rate": 0.00015497598560647687, "loss": 1.3958, "step": 4034 }, { "epoch": 1.2500781494216944, "grad_norm": 0.265625, "learning_rate": 0.00015495545959382512, "loss": 1.6028, "step": 4035 }, { "epoch": 1.2503907471084714, "grad_norm": 0.26953125, "learning_rate": 0.0001549349302634423, "loss": 1.578, "step": 4036 }, { "epoch": 1.2507033447952485, "grad_norm": 0.283203125, "learning_rate": 0.0001549143976165677, "loss": 1.6286, "step": 4037 }, { "epoch": 1.2510159424820255, "grad_norm": 0.265625, "learning_rate": 0.00015489386165444094, "loss": 1.6549, "step": 4038 }, { "epoch": 1.2513285401688028, "grad_norm": 0.314453125, "learning_rate": 0.0001548733223783018, "loss": 2.2127, "step": 4039 }, { "epoch": 1.2516411378555798, "grad_norm": 0.259765625, "learning_rate": 0.00015485277978939026, "loss": 1.9045, "step": 4040 }, { "epoch": 1.2519537355423571, "grad_norm": 0.263671875, "learning_rate": 0.00015483223388894647, "loss": 1.4649, "step": 4041 }, { "epoch": 1.2522663332291342, "grad_norm": 0.28515625, "learning_rate": 0.00015481168467821092, "loss": 1.5532, "step": 4042 }, { "epoch": 1.2525789309159112, "grad_norm": 0.27734375, "learning_rate": 0.00015479113215842406, "loss": 1.4651, "step": 4043 }, { "epoch": 1.2528915286026883, "grad_norm": 0.271484375, "learning_rate": 0.00015477057633082674, "loss": 1.9448, "step": 4044 }, { "epoch": 1.2532041262894655, "grad_norm": 0.263671875, "learning_rate": 0.00015475001719665997, "loss": 1.6166, "step": 4045 }, { "epoch": 1.2535167239762426, "grad_norm": 0.259765625, "learning_rate": 0.00015472945475716486, "loss": 1.6965, "step": 4046 }, { "epoch": 1.2538293216630196, "grad_norm": 0.2890625, "learning_rate": 0.0001547088890135828, "loss": 1.4891, "step": 4047 }, { "epoch": 1.254141919349797, "grad_norm": 0.271484375, "learning_rate": 0.00015468831996715544, "loss": 1.332, "step": 4048 }, { "epoch": 1.254454517036574, "grad_norm": 0.28515625, "learning_rate": 0.0001546677476191245, "loss": 1.8816, "step": 4049 }, { "epoch": 1.254767114723351, "grad_norm": 0.271484375, "learning_rate": 0.00015464717197073195, "loss": 1.9537, "step": 4050 }, { "epoch": 1.255079712410128, "grad_norm": 0.271484375, "learning_rate": 0.00015462659302322001, "loss": 1.6064, "step": 4051 }, { "epoch": 1.2553923100969053, "grad_norm": 0.294921875, "learning_rate": 0.00015460601077783102, "loss": 1.8206, "step": 4052 }, { "epoch": 1.2557049077836824, "grad_norm": 0.271484375, "learning_rate": 0.0001545854252358076, "loss": 1.5358, "step": 4053 }, { "epoch": 1.2560175054704596, "grad_norm": 0.27734375, "learning_rate": 0.00015456483639839251, "loss": 1.7593, "step": 4054 }, { "epoch": 1.2563301031572367, "grad_norm": 0.275390625, "learning_rate": 0.0001545442442668287, "loss": 1.5966, "step": 4055 }, { "epoch": 1.2566427008440137, "grad_norm": 0.26953125, "learning_rate": 0.00015452364884235931, "loss": 1.6445, "step": 4056 }, { "epoch": 1.2569552985307908, "grad_norm": 0.26171875, "learning_rate": 0.00015450305012622783, "loss": 1.6093, "step": 4057 }, { "epoch": 1.257267896217568, "grad_norm": 0.26953125, "learning_rate": 0.00015448244811967773, "loss": 1.337, "step": 4058 }, { "epoch": 1.2575804939043451, "grad_norm": 0.2578125, "learning_rate": 0.00015446184282395282, "loss": 1.3599, "step": 4059 }, { "epoch": 1.2578930915911222, "grad_norm": 0.275390625, "learning_rate": 0.00015444123424029703, "loss": 1.6384, "step": 4060 }, { "epoch": 1.2582056892778994, "grad_norm": 0.263671875, "learning_rate": 0.0001544206223699546, "loss": 1.456, "step": 4061 }, { "epoch": 1.2585182869646765, "grad_norm": 0.2734375, "learning_rate": 0.0001544000072141698, "loss": 1.5647, "step": 4062 }, { "epoch": 1.2588308846514535, "grad_norm": 0.259765625, "learning_rate": 0.00015437938877418725, "loss": 1.326, "step": 4063 }, { "epoch": 1.2591434823382306, "grad_norm": 0.271484375, "learning_rate": 0.00015435876705125173, "loss": 1.4247, "step": 4064 }, { "epoch": 1.2594560800250079, "grad_norm": 0.267578125, "learning_rate": 0.00015433814204660816, "loss": 1.6699, "step": 4065 }, { "epoch": 1.259768677711785, "grad_norm": 0.267578125, "learning_rate": 0.0001543175137615017, "loss": 1.4876, "step": 4066 }, { "epoch": 1.2600812753985622, "grad_norm": 0.302734375, "learning_rate": 0.00015429688219717772, "loss": 1.3906, "step": 4067 }, { "epoch": 1.2603938730853392, "grad_norm": 0.265625, "learning_rate": 0.0001542762473548818, "loss": 1.5093, "step": 4068 }, { "epoch": 1.2607064707721163, "grad_norm": 0.27734375, "learning_rate": 0.00015425560923585963, "loss": 1.4614, "step": 4069 }, { "epoch": 1.2610190684588933, "grad_norm": 0.263671875, "learning_rate": 0.0001542349678413572, "loss": 1.6383, "step": 4070 }, { "epoch": 1.2613316661456706, "grad_norm": 0.279296875, "learning_rate": 0.00015421432317262065, "loss": 1.3294, "step": 4071 }, { "epoch": 1.2616442638324477, "grad_norm": 0.271484375, "learning_rate": 0.00015419367523089635, "loss": 1.4523, "step": 4072 }, { "epoch": 1.2619568615192247, "grad_norm": 0.2890625, "learning_rate": 0.00015417302401743075, "loss": 1.793, "step": 4073 }, { "epoch": 1.262269459206002, "grad_norm": 0.267578125, "learning_rate": 0.0001541523695334707, "loss": 1.5598, "step": 4074 }, { "epoch": 1.262582056892779, "grad_norm": 0.26171875, "learning_rate": 0.0001541317117802631, "loss": 1.4749, "step": 4075 }, { "epoch": 1.262894654579556, "grad_norm": 0.28125, "learning_rate": 0.00015411105075905504, "loss": 1.4417, "step": 4076 }, { "epoch": 1.2632072522663331, "grad_norm": 0.279296875, "learning_rate": 0.00015409038647109396, "loss": 1.5956, "step": 4077 }, { "epoch": 1.2635198499531104, "grad_norm": 0.26171875, "learning_rate": 0.00015406971891762726, "loss": 1.5047, "step": 4078 }, { "epoch": 1.2638324476398874, "grad_norm": 0.279296875, "learning_rate": 0.00015404904809990274, "loss": 1.718, "step": 4079 }, { "epoch": 1.2641450453266647, "grad_norm": 0.2890625, "learning_rate": 0.00015402837401916835, "loss": 1.5212, "step": 4080 }, { "epoch": 1.2644576430134418, "grad_norm": 0.275390625, "learning_rate": 0.00015400769667667214, "loss": 1.564, "step": 4081 }, { "epoch": 1.2647702407002188, "grad_norm": 0.275390625, "learning_rate": 0.00015398701607366246, "loss": 1.3742, "step": 4082 }, { "epoch": 1.2650828383869959, "grad_norm": 0.271484375, "learning_rate": 0.00015396633221138783, "loss": 1.6599, "step": 4083 }, { "epoch": 1.265395436073773, "grad_norm": 0.265625, "learning_rate": 0.00015394564509109692, "loss": 1.5583, "step": 4084 }, { "epoch": 1.2657080337605502, "grad_norm": 0.275390625, "learning_rate": 0.00015392495471403873, "loss": 1.5196, "step": 4085 }, { "epoch": 1.2660206314473272, "grad_norm": 0.279296875, "learning_rate": 0.00015390426108146226, "loss": 1.5672, "step": 4086 }, { "epoch": 1.2663332291341045, "grad_norm": 0.271484375, "learning_rate": 0.00015388356419461686, "loss": 1.3556, "step": 4087 }, { "epoch": 1.2666458268208816, "grad_norm": 0.26953125, "learning_rate": 0.00015386286405475206, "loss": 1.9784, "step": 4088 }, { "epoch": 1.2669584245076586, "grad_norm": 0.2734375, "learning_rate": 0.00015384216066311753, "loss": 1.4852, "step": 4089 }, { "epoch": 1.2672710221944357, "grad_norm": 0.26171875, "learning_rate": 0.00015382145402096308, "loss": 1.5272, "step": 4090 }, { "epoch": 1.267583619881213, "grad_norm": 0.267578125, "learning_rate": 0.0001538007441295389, "loss": 1.5805, "step": 4091 }, { "epoch": 1.26789621756799, "grad_norm": 0.26171875, "learning_rate": 0.00015378003099009524, "loss": 1.4972, "step": 4092 }, { "epoch": 1.2682088152547673, "grad_norm": 0.259765625, "learning_rate": 0.00015375931460388258, "loss": 1.7508, "step": 4093 }, { "epoch": 1.2685214129415443, "grad_norm": 0.2734375, "learning_rate": 0.0001537385949721516, "loss": 1.477, "step": 4094 }, { "epoch": 1.2688340106283214, "grad_norm": 0.2734375, "learning_rate": 0.00015371787209615312, "loss": 1.69, "step": 4095 }, { "epoch": 1.2691466083150984, "grad_norm": 0.298828125, "learning_rate": 0.0001536971459771383, "loss": 1.5547, "step": 4096 }, { "epoch": 1.2694592060018755, "grad_norm": 0.2578125, "learning_rate": 0.00015367641661635833, "loss": 1.4637, "step": 4097 }, { "epoch": 1.2697718036886527, "grad_norm": 0.2734375, "learning_rate": 0.0001536556840150647, "loss": 1.4967, "step": 4098 }, { "epoch": 1.2700844013754298, "grad_norm": 0.251953125, "learning_rate": 0.000153634948174509, "loss": 1.4932, "step": 4099 }, { "epoch": 1.270396999062207, "grad_norm": 0.28125, "learning_rate": 0.0001536142090959432, "loss": 1.7785, "step": 4100 }, { "epoch": 1.270709596748984, "grad_norm": 0.25390625, "learning_rate": 0.00015359346678061925, "loss": 1.6011, "step": 4101 }, { "epoch": 1.2710221944357611, "grad_norm": 0.248046875, "learning_rate": 0.00015357272122978942, "loss": 1.3854, "step": 4102 }, { "epoch": 1.2713347921225382, "grad_norm": 0.28125, "learning_rate": 0.00015355197244470612, "loss": 1.7274, "step": 4103 }, { "epoch": 1.2716473898093155, "grad_norm": 0.279296875, "learning_rate": 0.00015353122042662201, "loss": 1.3538, "step": 4104 }, { "epoch": 1.2719599874960925, "grad_norm": 0.2734375, "learning_rate": 0.0001535104651767899, "loss": 1.4942, "step": 4105 }, { "epoch": 1.2722725851828696, "grad_norm": 0.27734375, "learning_rate": 0.00015348970669646286, "loss": 1.4381, "step": 4106 }, { "epoch": 1.2725851828696468, "grad_norm": 0.275390625, "learning_rate": 0.00015346894498689402, "loss": 1.7603, "step": 4107 }, { "epoch": 1.2728977805564239, "grad_norm": 0.255859375, "learning_rate": 0.00015344818004933686, "loss": 1.6616, "step": 4108 }, { "epoch": 1.273210378243201, "grad_norm": 0.279296875, "learning_rate": 0.00015342741188504496, "loss": 1.5816, "step": 4109 }, { "epoch": 1.273522975929978, "grad_norm": 0.279296875, "learning_rate": 0.00015340664049527208, "loss": 1.5475, "step": 4110 }, { "epoch": 1.2738355736167553, "grad_norm": 0.267578125, "learning_rate": 0.0001533858658812723, "loss": 1.3622, "step": 4111 }, { "epoch": 1.2741481713035323, "grad_norm": 0.263671875, "learning_rate": 0.00015336508804429975, "loss": 1.608, "step": 4112 }, { "epoch": 1.2744607689903096, "grad_norm": 0.255859375, "learning_rate": 0.00015334430698560884, "loss": 1.5008, "step": 4113 }, { "epoch": 1.2747733666770866, "grad_norm": 0.265625, "learning_rate": 0.00015332352270645412, "loss": 1.6675, "step": 4114 }, { "epoch": 1.2750859643638637, "grad_norm": 0.267578125, "learning_rate": 0.00015330273520809042, "loss": 1.5997, "step": 4115 }, { "epoch": 1.2753985620506407, "grad_norm": 0.2734375, "learning_rate": 0.00015328194449177262, "loss": 1.7242, "step": 4116 }, { "epoch": 1.275711159737418, "grad_norm": 0.26953125, "learning_rate": 0.00015326115055875597, "loss": 1.6441, "step": 4117 }, { "epoch": 1.276023757424195, "grad_norm": 0.267578125, "learning_rate": 0.00015324035341029578, "loss": 1.5648, "step": 4118 }, { "epoch": 1.276336355110972, "grad_norm": 0.26953125, "learning_rate": 0.00015321955304764765, "loss": 1.7804, "step": 4119 }, { "epoch": 1.2766489527977494, "grad_norm": 0.259765625, "learning_rate": 0.0001531987494720672, "loss": 1.4064, "step": 4120 }, { "epoch": 1.2769615504845264, "grad_norm": 0.291015625, "learning_rate": 0.0001531779426848105, "loss": 1.6266, "step": 4121 }, { "epoch": 1.2772741481713035, "grad_norm": 0.28125, "learning_rate": 0.00015315713268713364, "loss": 1.5447, "step": 4122 }, { "epoch": 1.2775867458580805, "grad_norm": 0.26953125, "learning_rate": 0.00015313631948029292, "loss": 1.3518, "step": 4123 }, { "epoch": 1.2778993435448578, "grad_norm": 0.265625, "learning_rate": 0.00015311550306554492, "loss": 1.4158, "step": 4124 }, { "epoch": 1.2782119412316348, "grad_norm": 0.275390625, "learning_rate": 0.00015309468344414627, "loss": 1.6997, "step": 4125 }, { "epoch": 1.2785245389184121, "grad_norm": 0.259765625, "learning_rate": 0.00015307386061735393, "loss": 1.3474, "step": 4126 }, { "epoch": 1.2788371366051892, "grad_norm": 0.255859375, "learning_rate": 0.00015305303458642503, "loss": 1.5881, "step": 4127 }, { "epoch": 1.2791497342919662, "grad_norm": 0.275390625, "learning_rate": 0.0001530322053526168, "loss": 1.767, "step": 4128 }, { "epoch": 1.2794623319787433, "grad_norm": 0.265625, "learning_rate": 0.00015301137291718676, "loss": 1.4263, "step": 4129 }, { "epoch": 1.2797749296655205, "grad_norm": 0.259765625, "learning_rate": 0.00015299053728139256, "loss": 1.6506, "step": 4130 }, { "epoch": 1.2800875273522976, "grad_norm": 0.271484375, "learning_rate": 0.0001529696984464921, "loss": 1.5171, "step": 4131 }, { "epoch": 1.2804001250390746, "grad_norm": 0.275390625, "learning_rate": 0.00015294885641374347, "loss": 1.4701, "step": 4132 }, { "epoch": 1.280712722725852, "grad_norm": 0.251953125, "learning_rate": 0.0001529280111844049, "loss": 1.5263, "step": 4133 }, { "epoch": 1.281025320412629, "grad_norm": 0.2734375, "learning_rate": 0.0001529071627597348, "loss": 1.4748, "step": 4134 }, { "epoch": 1.281337918099406, "grad_norm": 0.26953125, "learning_rate": 0.00015288631114099196, "loss": 1.5851, "step": 4135 }, { "epoch": 1.281650515786183, "grad_norm": 0.267578125, "learning_rate": 0.00015286545632943506, "loss": 1.6182, "step": 4136 }, { "epoch": 1.2819631134729603, "grad_norm": 0.271484375, "learning_rate": 0.00015284459832632318, "loss": 1.6872, "step": 4137 }, { "epoch": 1.2822757111597374, "grad_norm": 0.26953125, "learning_rate": 0.0001528237371329156, "loss": 1.2909, "step": 4138 }, { "epoch": 1.2825883088465146, "grad_norm": 0.271484375, "learning_rate": 0.00015280287275047167, "loss": 1.4174, "step": 4139 }, { "epoch": 1.2829009065332917, "grad_norm": 0.27734375, "learning_rate": 0.00015278200518025102, "loss": 1.5288, "step": 4140 }, { "epoch": 1.2832135042200687, "grad_norm": 0.26171875, "learning_rate": 0.00015276113442351345, "loss": 1.4028, "step": 4141 }, { "epoch": 1.2835261019068458, "grad_norm": 0.259765625, "learning_rate": 0.00015274026048151898, "loss": 1.6801, "step": 4142 }, { "epoch": 1.283838699593623, "grad_norm": 0.283203125, "learning_rate": 0.0001527193833555278, "loss": 1.6331, "step": 4143 }, { "epoch": 1.2841512972804001, "grad_norm": 0.26953125, "learning_rate": 0.00015269850304680023, "loss": 1.4745, "step": 4144 }, { "epoch": 1.2844638949671772, "grad_norm": 0.26171875, "learning_rate": 0.00015267761955659688, "loss": 1.4401, "step": 4145 }, { "epoch": 1.2847764926539544, "grad_norm": 0.275390625, "learning_rate": 0.0001526567328861785, "loss": 1.408, "step": 4146 }, { "epoch": 1.2850890903407315, "grad_norm": 0.275390625, "learning_rate": 0.00015263584303680609, "loss": 1.4133, "step": 4147 }, { "epoch": 1.2854016880275085, "grad_norm": 0.27734375, "learning_rate": 0.00015261495000974074, "loss": 1.5989, "step": 4148 }, { "epoch": 1.2857142857142856, "grad_norm": 0.271484375, "learning_rate": 0.00015259405380624384, "loss": 1.7035, "step": 4149 }, { "epoch": 1.2860268834010629, "grad_norm": 0.263671875, "learning_rate": 0.00015257315442757685, "loss": 1.2579, "step": 4150 }, { "epoch": 1.28633948108784, "grad_norm": 0.271484375, "learning_rate": 0.00015255225187500154, "loss": 1.3545, "step": 4151 }, { "epoch": 1.2866520787746172, "grad_norm": 0.267578125, "learning_rate": 0.00015253134614977979, "loss": 1.6228, "step": 4152 }, { "epoch": 1.2869646764613942, "grad_norm": 0.28125, "learning_rate": 0.0001525104372531738, "loss": 1.7938, "step": 4153 }, { "epoch": 1.2872772741481713, "grad_norm": 0.2734375, "learning_rate": 0.00015248952518644577, "loss": 1.5676, "step": 4154 }, { "epoch": 1.2875898718349483, "grad_norm": 0.29296875, "learning_rate": 0.0001524686099508582, "loss": 1.6865, "step": 4155 }, { "epoch": 1.2879024695217256, "grad_norm": 0.283203125, "learning_rate": 0.0001524476915476738, "loss": 1.5875, "step": 4156 }, { "epoch": 1.2882150672085027, "grad_norm": 0.265625, "learning_rate": 0.00015242676997815542, "loss": 1.5304, "step": 4157 }, { "epoch": 1.2885276648952797, "grad_norm": 0.2734375, "learning_rate": 0.00015240584524356613, "loss": 1.5193, "step": 4158 }, { "epoch": 1.288840262582057, "grad_norm": 0.275390625, "learning_rate": 0.00015238491734516916, "loss": 1.6902, "step": 4159 }, { "epoch": 1.289152860268834, "grad_norm": 0.2734375, "learning_rate": 0.000152363986284228, "loss": 1.6487, "step": 4160 }, { "epoch": 1.289465457955611, "grad_norm": 0.296875, "learning_rate": 0.00015234305206200625, "loss": 1.5843, "step": 4161 }, { "epoch": 1.2897780556423881, "grad_norm": 0.27734375, "learning_rate": 0.00015232211467976775, "loss": 1.9194, "step": 4162 }, { "epoch": 1.2900906533291654, "grad_norm": 0.267578125, "learning_rate": 0.00015230117413877654, "loss": 1.6171, "step": 4163 }, { "epoch": 1.2904032510159424, "grad_norm": 0.3359375, "learning_rate": 0.00015228023044029673, "loss": 2.1398, "step": 4164 }, { "epoch": 1.2907158487027197, "grad_norm": 0.296875, "learning_rate": 0.00015225928358559285, "loss": 1.7288, "step": 4165 }, { "epoch": 1.2910284463894968, "grad_norm": 0.275390625, "learning_rate": 0.0001522383335759294, "loss": 1.4731, "step": 4166 }, { "epoch": 1.2913410440762738, "grad_norm": 0.29296875, "learning_rate": 0.0001522173804125712, "loss": 1.8033, "step": 4167 }, { "epoch": 1.2916536417630509, "grad_norm": 0.279296875, "learning_rate": 0.00015219642409678317, "loss": 1.4459, "step": 4168 }, { "epoch": 1.2919662394498281, "grad_norm": 0.2734375, "learning_rate": 0.0001521754646298305, "loss": 1.7603, "step": 4169 }, { "epoch": 1.2922788371366052, "grad_norm": 0.298828125, "learning_rate": 0.0001521545020129786, "loss": 1.405, "step": 4170 }, { "epoch": 1.2925914348233822, "grad_norm": 0.2734375, "learning_rate": 0.0001521335362474929, "loss": 1.6413, "step": 4171 }, { "epoch": 1.2929040325101595, "grad_norm": 0.287109375, "learning_rate": 0.0001521125673346392, "loss": 1.7767, "step": 4172 }, { "epoch": 1.2932166301969366, "grad_norm": 0.275390625, "learning_rate": 0.00015209159527568343, "loss": 1.3952, "step": 4173 }, { "epoch": 1.2935292278837136, "grad_norm": 0.2734375, "learning_rate": 0.00015207062007189165, "loss": 1.7186, "step": 4174 }, { "epoch": 1.2938418255704907, "grad_norm": 0.27734375, "learning_rate": 0.00015204964172453014, "loss": 1.735, "step": 4175 }, { "epoch": 1.294154423257268, "grad_norm": 0.259765625, "learning_rate": 0.0001520286602348655, "loss": 1.6331, "step": 4176 }, { "epoch": 1.294467020944045, "grad_norm": 0.263671875, "learning_rate": 0.0001520076756041643, "loss": 1.2599, "step": 4177 }, { "epoch": 1.2947796186308222, "grad_norm": 0.255859375, "learning_rate": 0.00015198668783369346, "loss": 1.5036, "step": 4178 }, { "epoch": 1.2950922163175993, "grad_norm": 0.259765625, "learning_rate": 0.00015196569692472005, "loss": 1.5502, "step": 4179 }, { "epoch": 1.2954048140043763, "grad_norm": 0.265625, "learning_rate": 0.00015194470287851125, "loss": 1.6561, "step": 4180 }, { "epoch": 1.2957174116911534, "grad_norm": 0.2734375, "learning_rate": 0.00015192370569633458, "loss": 1.5461, "step": 4181 }, { "epoch": 1.2960300093779307, "grad_norm": 0.2734375, "learning_rate": 0.0001519027053794576, "loss": 1.7504, "step": 4182 }, { "epoch": 1.2963426070647077, "grad_norm": 0.283203125, "learning_rate": 0.00015188170192914822, "loss": 1.7119, "step": 4183 }, { "epoch": 1.2966552047514848, "grad_norm": 0.275390625, "learning_rate": 0.0001518606953466743, "loss": 1.2634, "step": 4184 }, { "epoch": 1.296967802438262, "grad_norm": 0.2734375, "learning_rate": 0.00015183968563330414, "loss": 1.4318, "step": 4185 }, { "epoch": 1.297280400125039, "grad_norm": 0.271484375, "learning_rate": 0.0001518186727903061, "loss": 1.5242, "step": 4186 }, { "epoch": 1.2975929978118161, "grad_norm": 0.26953125, "learning_rate": 0.0001517976568189488, "loss": 1.7578, "step": 4187 }, { "epoch": 1.2979055954985932, "grad_norm": 0.271484375, "learning_rate": 0.00015177663772050087, "loss": 1.6491, "step": 4188 }, { "epoch": 1.2982181931853705, "grad_norm": 0.275390625, "learning_rate": 0.0001517556154962314, "loss": 1.7881, "step": 4189 }, { "epoch": 1.2985307908721475, "grad_norm": 0.28515625, "learning_rate": 0.00015173459014740945, "loss": 1.3904, "step": 4190 }, { "epoch": 1.2988433885589248, "grad_norm": 0.29296875, "learning_rate": 0.0001517135616753044, "loss": 1.6517, "step": 4191 }, { "epoch": 1.2991559862457018, "grad_norm": 0.3515625, "learning_rate": 0.00015169253008118566, "loss": 2.2789, "step": 4192 }, { "epoch": 1.2994685839324789, "grad_norm": 0.2734375, "learning_rate": 0.00015167149536632305, "loss": 1.7229, "step": 4193 }, { "epoch": 1.299781181619256, "grad_norm": 0.291015625, "learning_rate": 0.00015165045753198642, "loss": 1.6937, "step": 4194 }, { "epoch": 1.3000937793060332, "grad_norm": 0.283203125, "learning_rate": 0.00015162941657944585, "loss": 1.3607, "step": 4195 }, { "epoch": 1.3004063769928103, "grad_norm": 0.275390625, "learning_rate": 0.0001516083725099716, "loss": 1.5234, "step": 4196 }, { "epoch": 1.3007189746795873, "grad_norm": 0.267578125, "learning_rate": 0.00015158732532483414, "loss": 1.6549, "step": 4197 }, { "epoch": 1.3010315723663646, "grad_norm": 0.283203125, "learning_rate": 0.0001515662750253041, "loss": 1.5429, "step": 4198 }, { "epoch": 1.3013441700531416, "grad_norm": 0.2734375, "learning_rate": 0.00015154522161265236, "loss": 1.5854, "step": 4199 }, { "epoch": 1.3016567677399187, "grad_norm": 0.265625, "learning_rate": 0.00015152416508814985, "loss": 1.8248, "step": 4200 }, { "epoch": 1.3019693654266957, "grad_norm": 0.271484375, "learning_rate": 0.00015150310545306793, "loss": 1.6711, "step": 4201 }, { "epoch": 1.302281963113473, "grad_norm": 0.279296875, "learning_rate": 0.00015148204270867783, "loss": 1.5947, "step": 4202 }, { "epoch": 1.30259456080025, "grad_norm": 0.26953125, "learning_rate": 0.0001514609768562512, "loss": 1.3376, "step": 4203 }, { "epoch": 1.3029071584870273, "grad_norm": 0.275390625, "learning_rate": 0.00015143990789705984, "loss": 1.6146, "step": 4204 }, { "epoch": 1.3032197561738044, "grad_norm": 0.287109375, "learning_rate": 0.00015141883583237568, "loss": 1.4284, "step": 4205 }, { "epoch": 1.3035323538605814, "grad_norm": 0.255859375, "learning_rate": 0.00015139776066347088, "loss": 1.4654, "step": 4206 }, { "epoch": 1.3038449515473585, "grad_norm": 0.283203125, "learning_rate": 0.00015137668239161782, "loss": 1.4928, "step": 4207 }, { "epoch": 1.3041575492341357, "grad_norm": 0.2890625, "learning_rate": 0.0001513556010180889, "loss": 1.5986, "step": 4208 }, { "epoch": 1.3044701469209128, "grad_norm": 0.275390625, "learning_rate": 0.00015133451654415696, "loss": 1.4188, "step": 4209 }, { "epoch": 1.3047827446076898, "grad_norm": 0.2890625, "learning_rate": 0.00015131342897109482, "loss": 1.5309, "step": 4210 }, { "epoch": 1.305095342294467, "grad_norm": 0.291015625, "learning_rate": 0.00015129233830017558, "loss": 1.7263, "step": 4211 }, { "epoch": 1.3054079399812442, "grad_norm": 0.28515625, "learning_rate": 0.0001512712445326725, "loss": 1.3039, "step": 4212 }, { "epoch": 1.3057205376680212, "grad_norm": 0.28515625, "learning_rate": 0.00015125014766985908, "loss": 1.888, "step": 4213 }, { "epoch": 1.3060331353547983, "grad_norm": 0.279296875, "learning_rate": 0.0001512290477130089, "loss": 1.4978, "step": 4214 }, { "epoch": 1.3063457330415755, "grad_norm": 0.2578125, "learning_rate": 0.00015120794466339587, "loss": 1.6995, "step": 4215 }, { "epoch": 1.3066583307283526, "grad_norm": 0.255859375, "learning_rate": 0.00015118683852229393, "loss": 1.5695, "step": 4216 }, { "epoch": 1.3069709284151299, "grad_norm": 0.328125, "learning_rate": 0.00015116572929097733, "loss": 2.5649, "step": 4217 }, { "epoch": 1.307283526101907, "grad_norm": 0.275390625, "learning_rate": 0.00015114461697072047, "loss": 1.3824, "step": 4218 }, { "epoch": 1.307596123788684, "grad_norm": 0.275390625, "learning_rate": 0.0001511235015627979, "loss": 1.5618, "step": 4219 }, { "epoch": 1.307908721475461, "grad_norm": 0.28515625, "learning_rate": 0.0001511023830684844, "loss": 1.9096, "step": 4220 }, { "epoch": 1.3082213191622383, "grad_norm": 0.267578125, "learning_rate": 0.0001510812614890549, "loss": 1.4379, "step": 4221 }, { "epoch": 1.3085339168490153, "grad_norm": 0.279296875, "learning_rate": 0.00015106013682578454, "loss": 1.3855, "step": 4222 }, { "epoch": 1.3088465145357924, "grad_norm": 0.26171875, "learning_rate": 0.00015103900907994868, "loss": 1.6518, "step": 4223 }, { "epoch": 1.3091591122225696, "grad_norm": 0.279296875, "learning_rate": 0.0001510178782528228, "loss": 1.6682, "step": 4224 }, { "epoch": 1.3094717099093467, "grad_norm": 0.263671875, "learning_rate": 0.00015099674434568261, "loss": 1.4411, "step": 4225 }, { "epoch": 1.3097843075961237, "grad_norm": 0.259765625, "learning_rate": 0.0001509756073598039, "loss": 1.7525, "step": 4226 }, { "epoch": 1.3100969052829008, "grad_norm": 0.28515625, "learning_rate": 0.0001509544672964629, "loss": 1.5352, "step": 4227 }, { "epoch": 1.310409502969678, "grad_norm": 0.267578125, "learning_rate": 0.00015093332415693574, "loss": 1.4847, "step": 4228 }, { "epoch": 1.3107221006564551, "grad_norm": 0.27734375, "learning_rate": 0.0001509121779424989, "loss": 1.6143, "step": 4229 }, { "epoch": 1.3110346983432324, "grad_norm": 0.283203125, "learning_rate": 0.00015089102865442904, "loss": 1.6455, "step": 4230 }, { "epoch": 1.3113472960300094, "grad_norm": 0.267578125, "learning_rate": 0.0001508698762940029, "loss": 1.5467, "step": 4231 }, { "epoch": 1.3116598937167865, "grad_norm": 0.263671875, "learning_rate": 0.00015084872086249746, "loss": 1.4301, "step": 4232 }, { "epoch": 1.3119724914035635, "grad_norm": 0.2578125, "learning_rate": 0.00015082756236118998, "loss": 1.7866, "step": 4233 }, { "epoch": 1.3122850890903408, "grad_norm": 0.26953125, "learning_rate": 0.0001508064007913578, "loss": 1.4359, "step": 4234 }, { "epoch": 1.3125976867771179, "grad_norm": 0.265625, "learning_rate": 0.00015078523615427844, "loss": 1.2895, "step": 4235 }, { "epoch": 1.312910284463895, "grad_norm": 0.2734375, "learning_rate": 0.0001507640684512297, "loss": 1.673, "step": 4236 }, { "epoch": 1.3132228821506722, "grad_norm": 0.26953125, "learning_rate": 0.0001507428976834894, "loss": 1.4894, "step": 4237 }, { "epoch": 1.3135354798374492, "grad_norm": 0.26953125, "learning_rate": 0.00015072172385233575, "loss": 1.5068, "step": 4238 }, { "epoch": 1.3138480775242263, "grad_norm": 0.265625, "learning_rate": 0.00015070054695904696, "loss": 1.4145, "step": 4239 }, { "epoch": 1.3141606752110033, "grad_norm": 0.26953125, "learning_rate": 0.00015067936700490154, "loss": 1.4855, "step": 4240 }, { "epoch": 1.3144732728977806, "grad_norm": 0.275390625, "learning_rate": 0.0001506581839911782, "loss": 1.5471, "step": 4241 }, { "epoch": 1.3147858705845576, "grad_norm": 0.2734375, "learning_rate": 0.0001506369979191557, "loss": 1.5411, "step": 4242 }, { "epoch": 1.315098468271335, "grad_norm": 0.271484375, "learning_rate": 0.00015061580879011314, "loss": 1.4331, "step": 4243 }, { "epoch": 1.315411065958112, "grad_norm": 0.2734375, "learning_rate": 0.00015059461660532966, "loss": 1.5143, "step": 4244 }, { "epoch": 1.315723663644889, "grad_norm": 0.2734375, "learning_rate": 0.00015057342136608472, "loss": 1.5241, "step": 4245 }, { "epoch": 1.316036261331666, "grad_norm": 0.26171875, "learning_rate": 0.00015055222307365788, "loss": 1.7544, "step": 4246 }, { "epoch": 1.3163488590184433, "grad_norm": 0.263671875, "learning_rate": 0.00015053102172932895, "loss": 1.4044, "step": 4247 }, { "epoch": 1.3166614567052204, "grad_norm": 0.28515625, "learning_rate": 0.0001505098173343778, "loss": 1.6263, "step": 4248 }, { "epoch": 1.3169740543919974, "grad_norm": 0.2734375, "learning_rate": 0.00015048860989008467, "loss": 1.3161, "step": 4249 }, { "epoch": 1.3172866520787747, "grad_norm": 0.30078125, "learning_rate": 0.00015046739939772973, "loss": 1.6841, "step": 4250 }, { "epoch": 1.3175992497655518, "grad_norm": 0.263671875, "learning_rate": 0.00015044618585859367, "loss": 1.5257, "step": 4251 }, { "epoch": 1.3179118474523288, "grad_norm": 0.27734375, "learning_rate": 0.00015042496927395702, "loss": 1.4318, "step": 4252 }, { "epoch": 1.3182244451391059, "grad_norm": 0.271484375, "learning_rate": 0.00015040374964510076, "loss": 1.6423, "step": 4253 }, { "epoch": 1.3185370428258831, "grad_norm": 0.271484375, "learning_rate": 0.00015038252697330588, "loss": 1.6024, "step": 4254 }, { "epoch": 1.3188496405126602, "grad_norm": 0.263671875, "learning_rate": 0.00015036130125985366, "loss": 1.4965, "step": 4255 }, { "epoch": 1.3191622381994375, "grad_norm": 0.283203125, "learning_rate": 0.0001503400725060255, "loss": 1.6, "step": 4256 }, { "epoch": 1.3194748358862145, "grad_norm": 0.27734375, "learning_rate": 0.000150318840713103, "loss": 1.8479, "step": 4257 }, { "epoch": 1.3197874335729916, "grad_norm": 0.27734375, "learning_rate": 0.00015029760588236796, "loss": 1.567, "step": 4258 }, { "epoch": 1.3201000312597686, "grad_norm": 0.263671875, "learning_rate": 0.00015027636801510238, "loss": 1.4983, "step": 4259 }, { "epoch": 1.3204126289465459, "grad_norm": 0.28125, "learning_rate": 0.0001502551271125884, "loss": 1.448, "step": 4260 }, { "epoch": 1.320725226633323, "grad_norm": 0.271484375, "learning_rate": 0.00015023388317610833, "loss": 1.8337, "step": 4261 }, { "epoch": 1.3210378243201, "grad_norm": 0.267578125, "learning_rate": 0.00015021263620694476, "loss": 1.4686, "step": 4262 }, { "epoch": 1.3213504220068772, "grad_norm": 0.267578125, "learning_rate": 0.0001501913862063803, "loss": 1.6757, "step": 4263 }, { "epoch": 1.3216630196936543, "grad_norm": 0.2734375, "learning_rate": 0.00015017013317569793, "loss": 1.6048, "step": 4264 }, { "epoch": 1.3219756173804313, "grad_norm": 0.271484375, "learning_rate": 0.0001501488771161807, "loss": 1.2832, "step": 4265 }, { "epoch": 1.3222882150672084, "grad_norm": 0.265625, "learning_rate": 0.00015012761802911184, "loss": 1.4612, "step": 4266 }, { "epoch": 1.3226008127539857, "grad_norm": 0.263671875, "learning_rate": 0.00015010635591577477, "loss": 1.7251, "step": 4267 }, { "epoch": 1.3229134104407627, "grad_norm": 0.259765625, "learning_rate": 0.00015008509077745318, "loss": 1.6412, "step": 4268 }, { "epoch": 1.32322600812754, "grad_norm": 0.2734375, "learning_rate": 0.00015006382261543083, "loss": 1.4868, "step": 4269 }, { "epoch": 1.323538605814317, "grad_norm": 0.28125, "learning_rate": 0.0001500425514309917, "loss": 1.5384, "step": 4270 }, { "epoch": 1.323851203501094, "grad_norm": 0.28125, "learning_rate": 0.00015002127722542, "loss": 1.7487, "step": 4271 }, { "epoch": 1.3241638011878711, "grad_norm": 0.28125, "learning_rate": 0.00015000000000000001, "loss": 1.6189, "step": 4272 }, { "epoch": 1.3244763988746484, "grad_norm": 0.27734375, "learning_rate": 0.0001499787197560163, "loss": 1.5159, "step": 4273 }, { "epoch": 1.3247889965614255, "grad_norm": 0.26953125, "learning_rate": 0.00014995743649475363, "loss": 1.4979, "step": 4274 }, { "epoch": 1.3251015942482025, "grad_norm": 0.275390625, "learning_rate": 0.00014993615021749684, "loss": 1.6714, "step": 4275 }, { "epoch": 1.3254141919349798, "grad_norm": 0.27734375, "learning_rate": 0.00014991486092553102, "loss": 1.4344, "step": 4276 }, { "epoch": 1.3257267896217568, "grad_norm": 0.265625, "learning_rate": 0.00014989356862014146, "loss": 1.7938, "step": 4277 }, { "epoch": 1.3260393873085339, "grad_norm": 0.248046875, "learning_rate": 0.00014987227330261356, "loss": 1.6565, "step": 4278 }, { "epoch": 1.326351984995311, "grad_norm": 0.263671875, "learning_rate": 0.00014985097497423298, "loss": 1.2759, "step": 4279 }, { "epoch": 1.3266645826820882, "grad_norm": 0.263671875, "learning_rate": 0.0001498296736362855, "loss": 1.799, "step": 4280 }, { "epoch": 1.3269771803688653, "grad_norm": 0.263671875, "learning_rate": 0.00014980836929005714, "loss": 1.4938, "step": 4281 }, { "epoch": 1.3272897780556425, "grad_norm": 0.26171875, "learning_rate": 0.00014978706193683405, "loss": 1.422, "step": 4282 }, { "epoch": 1.3276023757424196, "grad_norm": 0.26171875, "learning_rate": 0.00014976575157790262, "loss": 1.3706, "step": 4283 }, { "epoch": 1.3279149734291966, "grad_norm": 0.26953125, "learning_rate": 0.00014974443821454928, "loss": 1.4645, "step": 4284 }, { "epoch": 1.3282275711159737, "grad_norm": 0.275390625, "learning_rate": 0.00014972312184806085, "loss": 1.4608, "step": 4285 }, { "epoch": 1.328540168802751, "grad_norm": 0.26953125, "learning_rate": 0.0001497018024797242, "loss": 1.4634, "step": 4286 }, { "epoch": 1.328852766489528, "grad_norm": 0.2734375, "learning_rate": 0.00014968048011082638, "loss": 1.6588, "step": 4287 }, { "epoch": 1.329165364176305, "grad_norm": 0.265625, "learning_rate": 0.00014965915474265468, "loss": 1.528, "step": 4288 }, { "epoch": 1.3294779618630823, "grad_norm": 0.28125, "learning_rate": 0.00014963782637649652, "loss": 1.3804, "step": 4289 }, { "epoch": 1.3297905595498594, "grad_norm": 0.26953125, "learning_rate": 0.00014961649501363955, "loss": 1.6437, "step": 4290 }, { "epoch": 1.3301031572366364, "grad_norm": 0.263671875, "learning_rate": 0.0001495951606553715, "loss": 1.6061, "step": 4291 }, { "epoch": 1.3304157549234135, "grad_norm": 0.275390625, "learning_rate": 0.00014957382330298046, "loss": 1.3768, "step": 4292 }, { "epoch": 1.3307283526101907, "grad_norm": 0.265625, "learning_rate": 0.0001495524829577545, "loss": 1.6755, "step": 4293 }, { "epoch": 1.3310409502969678, "grad_norm": 0.279296875, "learning_rate": 0.00014953113962098201, "loss": 1.3769, "step": 4294 }, { "epoch": 1.331353547983745, "grad_norm": 0.267578125, "learning_rate": 0.0001495097932939515, "loss": 1.3683, "step": 4295 }, { "epoch": 1.331666145670522, "grad_norm": 0.283203125, "learning_rate": 0.00014948844397795168, "loss": 1.4352, "step": 4296 }, { "epoch": 1.3319787433572992, "grad_norm": 0.26953125, "learning_rate": 0.00014946709167427142, "loss": 1.4394, "step": 4297 }, { "epoch": 1.3322913410440762, "grad_norm": 0.2734375, "learning_rate": 0.0001494457363841998, "loss": 1.5841, "step": 4298 }, { "epoch": 1.3326039387308533, "grad_norm": 0.2734375, "learning_rate": 0.00014942437810902607, "loss": 1.46, "step": 4299 }, { "epoch": 1.3329165364176305, "grad_norm": 0.275390625, "learning_rate": 0.00014940301685003967, "loss": 1.7945, "step": 4300 }, { "epoch": 1.3332291341044076, "grad_norm": 0.271484375, "learning_rate": 0.00014938165260853018, "loss": 1.557, "step": 4301 }, { "epoch": 1.3335417317911848, "grad_norm": 0.28515625, "learning_rate": 0.0001493602853857874, "loss": 1.6546, "step": 4302 }, { "epoch": 1.333854329477962, "grad_norm": 0.26953125, "learning_rate": 0.00014933891518310126, "loss": 1.204, "step": 4303 }, { "epoch": 1.334166927164739, "grad_norm": 0.263671875, "learning_rate": 0.000149317542001762, "loss": 1.4083, "step": 4304 }, { "epoch": 1.334479524851516, "grad_norm": 0.271484375, "learning_rate": 0.0001492961658430598, "loss": 1.5677, "step": 4305 }, { "epoch": 1.3347921225382933, "grad_norm": 0.27734375, "learning_rate": 0.0001492747867082853, "loss": 1.5351, "step": 4306 }, { "epoch": 1.3351047202250703, "grad_norm": 0.267578125, "learning_rate": 0.00014925340459872913, "loss": 1.635, "step": 4307 }, { "epoch": 1.3354173179118476, "grad_norm": 0.259765625, "learning_rate": 0.00014923201951568216, "loss": 1.5736, "step": 4308 }, { "epoch": 1.3357299155986246, "grad_norm": 0.263671875, "learning_rate": 0.00014921063146043542, "loss": 1.6811, "step": 4309 }, { "epoch": 1.3360425132854017, "grad_norm": 0.267578125, "learning_rate": 0.00014918924043428016, "loss": 1.673, "step": 4310 }, { "epoch": 1.3363551109721787, "grad_norm": 0.2734375, "learning_rate": 0.00014916784643850773, "loss": 1.6695, "step": 4311 }, { "epoch": 1.3366677086589558, "grad_norm": 0.283203125, "learning_rate": 0.00014914644947440982, "loss": 1.6808, "step": 4312 }, { "epoch": 1.336980306345733, "grad_norm": 0.267578125, "learning_rate": 0.0001491250495432781, "loss": 1.2547, "step": 4313 }, { "epoch": 1.33729290403251, "grad_norm": 0.2734375, "learning_rate": 0.00014910364664640454, "loss": 1.5897, "step": 4314 }, { "epoch": 1.3376055017192874, "grad_norm": 0.2890625, "learning_rate": 0.00014908224078508125, "loss": 1.5416, "step": 4315 }, { "epoch": 1.3379180994060644, "grad_norm": 0.259765625, "learning_rate": 0.00014906083196060058, "loss": 1.3056, "step": 4316 }, { "epoch": 1.3382306970928415, "grad_norm": 0.275390625, "learning_rate": 0.0001490394201742549, "loss": 1.3732, "step": 4317 }, { "epoch": 1.3385432947796185, "grad_norm": 0.2734375, "learning_rate": 0.000149018005427337, "loss": 1.51, "step": 4318 }, { "epoch": 1.3388558924663958, "grad_norm": 0.2890625, "learning_rate": 0.0001489965877211396, "loss": 1.6639, "step": 4319 }, { "epoch": 1.3391684901531729, "grad_norm": 0.26171875, "learning_rate": 0.00014897516705695578, "loss": 1.6686, "step": 4320 }, { "epoch": 1.33948108783995, "grad_norm": 0.275390625, "learning_rate": 0.0001489537434360787, "loss": 1.5546, "step": 4321 }, { "epoch": 1.3397936855267272, "grad_norm": 0.28125, "learning_rate": 0.00014893231685980175, "loss": 1.5238, "step": 4322 }, { "epoch": 1.3401062832135042, "grad_norm": 0.263671875, "learning_rate": 0.0001489108873294185, "loss": 1.7619, "step": 4323 }, { "epoch": 1.3404188809002813, "grad_norm": 0.263671875, "learning_rate": 0.00014888945484622265, "loss": 1.4808, "step": 4324 }, { "epoch": 1.3407314785870583, "grad_norm": 0.275390625, "learning_rate": 0.0001488680194115081, "loss": 1.5137, "step": 4325 }, { "epoch": 1.3410440762738356, "grad_norm": 0.275390625, "learning_rate": 0.00014884658102656893, "loss": 1.4063, "step": 4326 }, { "epoch": 1.3413566739606126, "grad_norm": 0.279296875, "learning_rate": 0.0001488251396926994, "loss": 1.6675, "step": 4327 }, { "epoch": 1.34166927164739, "grad_norm": 0.267578125, "learning_rate": 0.00014880369541119402, "loss": 1.415, "step": 4328 }, { "epoch": 1.341981869334167, "grad_norm": 0.271484375, "learning_rate": 0.00014878224818334733, "loss": 1.5076, "step": 4329 }, { "epoch": 1.342294467020944, "grad_norm": 0.27734375, "learning_rate": 0.0001487607980104542, "loss": 1.3998, "step": 4330 }, { "epoch": 1.342607064707721, "grad_norm": 0.2890625, "learning_rate": 0.0001487393448938095, "loss": 1.7118, "step": 4331 }, { "epoch": 1.3429196623944983, "grad_norm": 0.25390625, "learning_rate": 0.00014871788883470845, "loss": 1.5633, "step": 4332 }, { "epoch": 1.3432322600812754, "grad_norm": 0.275390625, "learning_rate": 0.00014869642983444638, "loss": 1.5311, "step": 4333 }, { "epoch": 1.3435448577680524, "grad_norm": 0.283203125, "learning_rate": 0.0001486749678943188, "loss": 1.5362, "step": 4334 }, { "epoch": 1.3438574554548297, "grad_norm": 0.271484375, "learning_rate": 0.00014865350301562134, "loss": 1.4536, "step": 4335 }, { "epoch": 1.3441700531416068, "grad_norm": 0.275390625, "learning_rate": 0.00014863203519964994, "loss": 1.5429, "step": 4336 }, { "epoch": 1.3444826508283838, "grad_norm": 0.27734375, "learning_rate": 0.00014861056444770058, "loss": 1.5523, "step": 4337 }, { "epoch": 1.3447952485151609, "grad_norm": 0.263671875, "learning_rate": 0.0001485890907610695, "loss": 1.7627, "step": 4338 }, { "epoch": 1.3451078462019381, "grad_norm": 0.267578125, "learning_rate": 0.00014856761414105312, "loss": 1.5484, "step": 4339 }, { "epoch": 1.3454204438887152, "grad_norm": 0.26953125, "learning_rate": 0.00014854613458894795, "loss": 1.4169, "step": 4340 }, { "epoch": 1.3457330415754925, "grad_norm": 0.271484375, "learning_rate": 0.00014852465210605077, "loss": 1.368, "step": 4341 }, { "epoch": 1.3460456392622695, "grad_norm": 0.271484375, "learning_rate": 0.00014850316669365855, "loss": 1.6043, "step": 4342 }, { "epoch": 1.3463582369490465, "grad_norm": 0.27734375, "learning_rate": 0.00014848167835306833, "loss": 1.5628, "step": 4343 }, { "epoch": 1.3466708346358236, "grad_norm": 0.26171875, "learning_rate": 0.0001484601870855774, "loss": 1.5112, "step": 4344 }, { "epoch": 1.3469834323226009, "grad_norm": 0.28515625, "learning_rate": 0.0001484386928924832, "loss": 1.5425, "step": 4345 }, { "epoch": 1.347296030009378, "grad_norm": 0.259765625, "learning_rate": 0.00014841719577508343, "loss": 1.6774, "step": 4346 }, { "epoch": 1.347608627696155, "grad_norm": 0.271484375, "learning_rate": 0.0001483956957346758, "loss": 1.4458, "step": 4347 }, { "epoch": 1.3479212253829322, "grad_norm": 0.26953125, "learning_rate": 0.0001483741927725584, "loss": 1.6429, "step": 4348 }, { "epoch": 1.3482338230697093, "grad_norm": 0.263671875, "learning_rate": 0.0001483526868900293, "loss": 1.5116, "step": 4349 }, { "epoch": 1.3485464207564863, "grad_norm": 0.279296875, "learning_rate": 0.0001483311780883869, "loss": 1.5369, "step": 4350 }, { "epoch": 1.3488590184432634, "grad_norm": 0.267578125, "learning_rate": 0.00014830966636892966, "loss": 1.2129, "step": 4351 }, { "epoch": 1.3491716161300407, "grad_norm": 0.27734375, "learning_rate": 0.00014828815173295633, "loss": 1.5552, "step": 4352 }, { "epoch": 1.3494842138168177, "grad_norm": 0.26953125, "learning_rate": 0.00014826663418176573, "loss": 1.6241, "step": 4353 }, { "epoch": 1.349796811503595, "grad_norm": 0.27734375, "learning_rate": 0.00014824511371665694, "loss": 1.8416, "step": 4354 }, { "epoch": 1.350109409190372, "grad_norm": 0.267578125, "learning_rate": 0.00014822359033892914, "loss": 1.8527, "step": 4355 }, { "epoch": 1.350422006877149, "grad_norm": 0.287109375, "learning_rate": 0.00014820206404988175, "loss": 1.669, "step": 4356 }, { "epoch": 1.3507346045639261, "grad_norm": 0.275390625, "learning_rate": 0.00014818053485081428, "loss": 1.414, "step": 4357 }, { "epoch": 1.3510472022507034, "grad_norm": 0.275390625, "learning_rate": 0.0001481590027430266, "loss": 1.3761, "step": 4358 }, { "epoch": 1.3513597999374805, "grad_norm": 0.263671875, "learning_rate": 0.00014813746772781848, "loss": 1.5144, "step": 4359 }, { "epoch": 1.3516723976242575, "grad_norm": 0.27734375, "learning_rate": 0.00014811592980649017, "loss": 1.5364, "step": 4360 }, { "epoch": 1.3519849953110348, "grad_norm": 0.26171875, "learning_rate": 0.00014809438898034181, "loss": 1.6639, "step": 4361 }, { "epoch": 1.3522975929978118, "grad_norm": 0.27734375, "learning_rate": 0.00014807284525067388, "loss": 1.4128, "step": 4362 }, { "epoch": 1.3526101906845889, "grad_norm": 0.267578125, "learning_rate": 0.00014805129861878707, "loss": 1.553, "step": 4363 }, { "epoch": 1.352922788371366, "grad_norm": 0.2734375, "learning_rate": 0.00014802974908598212, "loss": 1.6525, "step": 4364 }, { "epoch": 1.3532353860581432, "grad_norm": 0.279296875, "learning_rate": 0.00014800819665355998, "loss": 1.3909, "step": 4365 }, { "epoch": 1.3535479837449202, "grad_norm": 0.306640625, "learning_rate": 0.00014798664132282188, "loss": 1.8208, "step": 4366 }, { "epoch": 1.3538605814316975, "grad_norm": 0.275390625, "learning_rate": 0.00014796508309506906, "loss": 1.6694, "step": 4367 }, { "epoch": 1.3541731791184746, "grad_norm": 0.2734375, "learning_rate": 0.00014794352197160306, "loss": 1.5647, "step": 4368 }, { "epoch": 1.3544857768052516, "grad_norm": 0.28515625, "learning_rate": 0.0001479219579537255, "loss": 1.5541, "step": 4369 }, { "epoch": 1.3547983744920287, "grad_norm": 0.267578125, "learning_rate": 0.00014790039104273833, "loss": 1.579, "step": 4370 }, { "epoch": 1.355110972178806, "grad_norm": 0.267578125, "learning_rate": 0.0001478788212399435, "loss": 1.6018, "step": 4371 }, { "epoch": 1.355423569865583, "grad_norm": 0.26171875, "learning_rate": 0.00014785724854664325, "loss": 1.5671, "step": 4372 }, { "epoch": 1.35573616755236, "grad_norm": 0.279296875, "learning_rate": 0.00014783567296413987, "loss": 1.8291, "step": 4373 }, { "epoch": 1.3560487652391373, "grad_norm": 0.267578125, "learning_rate": 0.000147814094493736, "loss": 1.2627, "step": 4374 }, { "epoch": 1.3563613629259144, "grad_norm": 0.2890625, "learning_rate": 0.00014779251313673425, "loss": 1.4725, "step": 4375 }, { "epoch": 1.3566739606126914, "grad_norm": 0.275390625, "learning_rate": 0.0001477709288944376, "loss": 1.2851, "step": 4376 }, { "epoch": 1.3569865582994685, "grad_norm": 0.271484375, "learning_rate": 0.00014774934176814915, "loss": 1.2496, "step": 4377 }, { "epoch": 1.3572991559862457, "grad_norm": 0.27734375, "learning_rate": 0.00014772775175917206, "loss": 1.6098, "step": 4378 }, { "epoch": 1.3576117536730228, "grad_norm": 0.279296875, "learning_rate": 0.00014770615886880973, "loss": 1.5777, "step": 4379 }, { "epoch": 1.3579243513598, "grad_norm": 0.279296875, "learning_rate": 0.00014768456309836587, "loss": 1.6135, "step": 4380 }, { "epoch": 1.358236949046577, "grad_norm": 0.259765625, "learning_rate": 0.00014766296444914412, "loss": 1.6868, "step": 4381 }, { "epoch": 1.3585495467333542, "grad_norm": 0.26953125, "learning_rate": 0.00014764136292244847, "loss": 1.5089, "step": 4382 }, { "epoch": 1.3588621444201312, "grad_norm": 0.275390625, "learning_rate": 0.00014761975851958307, "loss": 1.2554, "step": 4383 }, { "epoch": 1.3591747421069085, "grad_norm": 0.2734375, "learning_rate": 0.00014759815124185214, "loss": 1.3712, "step": 4384 }, { "epoch": 1.3594873397936855, "grad_norm": 0.267578125, "learning_rate": 0.00014757654109056017, "loss": 1.5727, "step": 4385 }, { "epoch": 1.3597999374804626, "grad_norm": 0.28125, "learning_rate": 0.0001475549280670118, "loss": 1.5521, "step": 4386 }, { "epoch": 1.3601125351672398, "grad_norm": 0.2734375, "learning_rate": 0.00014753331217251176, "loss": 1.6843, "step": 4387 }, { "epoch": 1.360425132854017, "grad_norm": 0.263671875, "learning_rate": 0.00014751169340836514, "loss": 1.4582, "step": 4388 }, { "epoch": 1.360737730540794, "grad_norm": 0.26953125, "learning_rate": 0.00014749007177587706, "loss": 1.5996, "step": 4389 }, { "epoch": 1.361050328227571, "grad_norm": 0.26171875, "learning_rate": 0.00014746844727635283, "loss": 1.5941, "step": 4390 }, { "epoch": 1.3613629259143483, "grad_norm": 0.26953125, "learning_rate": 0.00014744681991109792, "loss": 1.2932, "step": 4391 }, { "epoch": 1.3616755236011253, "grad_norm": 0.283203125, "learning_rate": 0.00014742518968141803, "loss": 1.499, "step": 4392 }, { "epoch": 1.3619881212879026, "grad_norm": 0.2734375, "learning_rate": 0.00014740355658861902, "loss": 1.3968, "step": 4393 }, { "epoch": 1.3623007189746796, "grad_norm": 0.279296875, "learning_rate": 0.00014738192063400689, "loss": 1.7609, "step": 4394 }, { "epoch": 1.3626133166614567, "grad_norm": 0.265625, "learning_rate": 0.0001473602818188878, "loss": 1.361, "step": 4395 }, { "epoch": 1.3629259143482337, "grad_norm": 0.263671875, "learning_rate": 0.0001473386401445682, "loss": 1.4909, "step": 4396 }, { "epoch": 1.363238512035011, "grad_norm": 0.27734375, "learning_rate": 0.00014731699561235453, "loss": 1.7449, "step": 4397 }, { "epoch": 1.363551109721788, "grad_norm": 0.2734375, "learning_rate": 0.00014729534822355354, "loss": 1.5876, "step": 4398 }, { "epoch": 1.363863707408565, "grad_norm": 0.265625, "learning_rate": 0.00014727369797947214, "loss": 1.7072, "step": 4399 }, { "epoch": 1.3641763050953424, "grad_norm": 0.28125, "learning_rate": 0.00014725204488141733, "loss": 1.603, "step": 4400 }, { "epoch": 1.3644889027821194, "grad_norm": 0.287109375, "learning_rate": 0.00014723038893069636, "loss": 1.7367, "step": 4401 }, { "epoch": 1.3648015004688965, "grad_norm": 0.279296875, "learning_rate": 0.00014720873012861663, "loss": 1.6072, "step": 4402 }, { "epoch": 1.3651140981556735, "grad_norm": 0.267578125, "learning_rate": 0.0001471870684764857, "loss": 1.4582, "step": 4403 }, { "epoch": 1.3654266958424508, "grad_norm": 0.26953125, "learning_rate": 0.00014716540397561128, "loss": 1.7162, "step": 4404 }, { "epoch": 1.3657392935292278, "grad_norm": 0.275390625, "learning_rate": 0.00014714373662730136, "loss": 1.465, "step": 4405 }, { "epoch": 1.3660518912160051, "grad_norm": 0.275390625, "learning_rate": 0.00014712206643286398, "loss": 1.4183, "step": 4406 }, { "epoch": 1.3663644889027822, "grad_norm": 0.287109375, "learning_rate": 0.0001471003933936074, "loss": 1.8007, "step": 4407 }, { "epoch": 1.3666770865895592, "grad_norm": 0.2734375, "learning_rate": 0.00014707871751084003, "loss": 1.6014, "step": 4408 }, { "epoch": 1.3669896842763363, "grad_norm": 0.28125, "learning_rate": 0.0001470570387858705, "loss": 1.5298, "step": 4409 }, { "epoch": 1.3673022819631135, "grad_norm": 0.265625, "learning_rate": 0.00014703535722000762, "loss": 1.5156, "step": 4410 }, { "epoch": 1.3676148796498906, "grad_norm": 0.326171875, "learning_rate": 0.0001470136728145602, "loss": 2.1164, "step": 4411 }, { "epoch": 1.3679274773366676, "grad_norm": 0.2734375, "learning_rate": 0.0001469919855708375, "loss": 1.5791, "step": 4412 }, { "epoch": 1.368240075023445, "grad_norm": 0.26171875, "learning_rate": 0.00014697029549014874, "loss": 1.5723, "step": 4413 }, { "epoch": 1.368552672710222, "grad_norm": 0.26953125, "learning_rate": 0.00014694860257380337, "loss": 1.7224, "step": 4414 }, { "epoch": 1.368865270396999, "grad_norm": 0.2734375, "learning_rate": 0.00014692690682311106, "loss": 1.4698, "step": 4415 }, { "epoch": 1.369177868083776, "grad_norm": 0.27734375, "learning_rate": 0.0001469052082393816, "loss": 1.6278, "step": 4416 }, { "epoch": 1.3694904657705533, "grad_norm": 0.259765625, "learning_rate": 0.0001468835068239249, "loss": 1.4451, "step": 4417 }, { "epoch": 1.3698030634573304, "grad_norm": 0.279296875, "learning_rate": 0.00014686180257805117, "loss": 1.556, "step": 4418 }, { "epoch": 1.3701156611441077, "grad_norm": 0.28515625, "learning_rate": 0.0001468400955030707, "loss": 1.5471, "step": 4419 }, { "epoch": 1.3704282588308847, "grad_norm": 0.275390625, "learning_rate": 0.00014681838560029397, "loss": 1.4154, "step": 4420 }, { "epoch": 1.3707408565176618, "grad_norm": 0.263671875, "learning_rate": 0.00014679667287103166, "loss": 1.5814, "step": 4421 }, { "epoch": 1.3710534542044388, "grad_norm": 0.28125, "learning_rate": 0.00014677495731659453, "loss": 1.4309, "step": 4422 }, { "epoch": 1.371366051891216, "grad_norm": 0.283203125, "learning_rate": 0.00014675323893829364, "loss": 1.4414, "step": 4423 }, { "epoch": 1.3716786495779931, "grad_norm": 0.2734375, "learning_rate": 0.0001467315177374402, "loss": 1.4838, "step": 4424 }, { "epoch": 1.3719912472647702, "grad_norm": 0.275390625, "learning_rate": 0.00014670979371534542, "loss": 1.5512, "step": 4425 }, { "epoch": 1.3723038449515474, "grad_norm": 0.287109375, "learning_rate": 0.0001466880668733209, "loss": 1.5403, "step": 4426 }, { "epoch": 1.3726164426383245, "grad_norm": 0.267578125, "learning_rate": 0.00014666633721267824, "loss": 1.2641, "step": 4427 }, { "epoch": 1.3729290403251015, "grad_norm": 0.279296875, "learning_rate": 0.00014664460473472936, "loss": 1.4777, "step": 4428 }, { "epoch": 1.3732416380118786, "grad_norm": 0.271484375, "learning_rate": 0.00014662286944078625, "loss": 1.4161, "step": 4429 }, { "epoch": 1.3735542356986559, "grad_norm": 0.271484375, "learning_rate": 0.00014660113133216113, "loss": 1.2669, "step": 4430 }, { "epoch": 1.373866833385433, "grad_norm": 0.26953125, "learning_rate": 0.0001465793904101663, "loss": 1.7179, "step": 4431 }, { "epoch": 1.3741794310722102, "grad_norm": 0.275390625, "learning_rate": 0.00014655764667611434, "loss": 1.9041, "step": 4432 }, { "epoch": 1.3744920287589872, "grad_norm": 0.271484375, "learning_rate": 0.0001465359001313179, "loss": 1.5532, "step": 4433 }, { "epoch": 1.3748046264457643, "grad_norm": 0.26171875, "learning_rate": 0.00014651415077708986, "loss": 1.4049, "step": 4434 }, { "epoch": 1.3751172241325413, "grad_norm": 0.28515625, "learning_rate": 0.00014649239861474323, "loss": 1.6996, "step": 4435 }, { "epoch": 1.3754298218193186, "grad_norm": 0.28125, "learning_rate": 0.00014647064364559133, "loss": 1.716, "step": 4436 }, { "epoch": 1.3757424195060957, "grad_norm": 0.27734375, "learning_rate": 0.0001464488858709474, "loss": 1.4723, "step": 4437 }, { "epoch": 1.3760550171928727, "grad_norm": 0.26171875, "learning_rate": 0.0001464271252921251, "loss": 1.6228, "step": 4438 }, { "epoch": 1.37636761487965, "grad_norm": 0.27734375, "learning_rate": 0.000146405361910438, "loss": 1.4123, "step": 4439 }, { "epoch": 1.376680212566427, "grad_norm": 0.27734375, "learning_rate": 0.00014638359572720014, "loss": 1.7919, "step": 4440 }, { "epoch": 1.376992810253204, "grad_norm": 0.27734375, "learning_rate": 0.00014636182674372542, "loss": 1.5778, "step": 4441 }, { "epoch": 1.3773054079399811, "grad_norm": 0.259765625, "learning_rate": 0.0001463400549613282, "loss": 1.4883, "step": 4442 }, { "epoch": 1.3776180056267584, "grad_norm": 0.28515625, "learning_rate": 0.0001463182803813228, "loss": 1.6173, "step": 4443 }, { "epoch": 1.3779306033135355, "grad_norm": 0.287109375, "learning_rate": 0.0001462965030050238, "loss": 1.7092, "step": 4444 }, { "epoch": 1.3782432010003127, "grad_norm": 0.28515625, "learning_rate": 0.0001462747228337459, "loss": 1.3684, "step": 4445 }, { "epoch": 1.3785557986870898, "grad_norm": 0.2734375, "learning_rate": 0.00014625293986880402, "loss": 1.684, "step": 4446 }, { "epoch": 1.3788683963738668, "grad_norm": 0.263671875, "learning_rate": 0.0001462311541115132, "loss": 1.4425, "step": 4447 }, { "epoch": 1.3791809940606439, "grad_norm": 0.287109375, "learning_rate": 0.00014620936556318872, "loss": 1.4826, "step": 4448 }, { "epoch": 1.3794935917474211, "grad_norm": 0.28515625, "learning_rate": 0.00014618757422514596, "loss": 1.5377, "step": 4449 }, { "epoch": 1.3798061894341982, "grad_norm": 0.27734375, "learning_rate": 0.00014616578009870045, "loss": 1.7899, "step": 4450 }, { "epoch": 1.3801187871209752, "grad_norm": 0.27734375, "learning_rate": 0.000146143983185168, "loss": 1.4614, "step": 4451 }, { "epoch": 1.3804313848077525, "grad_norm": 0.291015625, "learning_rate": 0.00014612218348586446, "loss": 1.6547, "step": 4452 }, { "epoch": 1.3807439824945296, "grad_norm": 0.2890625, "learning_rate": 0.00014610038100210592, "loss": 1.6675, "step": 4453 }, { "epoch": 1.3810565801813066, "grad_norm": 0.259765625, "learning_rate": 0.00014607857573520867, "loss": 1.3751, "step": 4454 }, { "epoch": 1.3813691778680837, "grad_norm": 0.287109375, "learning_rate": 0.00014605676768648907, "loss": 1.5265, "step": 4455 }, { "epoch": 1.381681775554861, "grad_norm": 0.27734375, "learning_rate": 0.00014603495685726372, "loss": 1.6233, "step": 4456 }, { "epoch": 1.381994373241638, "grad_norm": 0.271484375, "learning_rate": 0.00014601314324884935, "loss": 1.6946, "step": 4457 }, { "epoch": 1.3823069709284153, "grad_norm": 0.263671875, "learning_rate": 0.0001459913268625629, "loss": 1.523, "step": 4458 }, { "epoch": 1.3826195686151923, "grad_norm": 0.26953125, "learning_rate": 0.0001459695076997214, "loss": 1.3612, "step": 4459 }, { "epoch": 1.3829321663019694, "grad_norm": 0.28125, "learning_rate": 0.00014594768576164216, "loss": 1.7067, "step": 4460 }, { "epoch": 1.3832447639887464, "grad_norm": 0.279296875, "learning_rate": 0.00014592586104964262, "loss": 1.5837, "step": 4461 }, { "epoch": 1.3835573616755237, "grad_norm": 0.34765625, "learning_rate": 0.0001459040335650403, "loss": 2.4029, "step": 4462 }, { "epoch": 1.3838699593623007, "grad_norm": 0.26953125, "learning_rate": 0.00014588220330915297, "loss": 1.672, "step": 4463 }, { "epoch": 1.3841825570490778, "grad_norm": 0.275390625, "learning_rate": 0.00014586037028329856, "loss": 1.3609, "step": 4464 }, { "epoch": 1.384495154735855, "grad_norm": 0.291015625, "learning_rate": 0.00014583853448879515, "loss": 1.5009, "step": 4465 }, { "epoch": 1.384807752422632, "grad_norm": 0.267578125, "learning_rate": 0.000145816695926961, "loss": 1.5723, "step": 4466 }, { "epoch": 1.3851203501094091, "grad_norm": 0.28125, "learning_rate": 0.00014579485459911452, "loss": 1.3228, "step": 4467 }, { "epoch": 1.3854329477961862, "grad_norm": 0.2890625, "learning_rate": 0.00014577301050657432, "loss": 1.6831, "step": 4468 }, { "epoch": 1.3857455454829635, "grad_norm": 0.27734375, "learning_rate": 0.00014575116365065917, "loss": 1.5932, "step": 4469 }, { "epoch": 1.3860581431697405, "grad_norm": 0.271484375, "learning_rate": 0.00014572931403268794, "loss": 1.6896, "step": 4470 }, { "epoch": 1.3863707408565178, "grad_norm": 0.255859375, "learning_rate": 0.0001457074616539798, "loss": 1.5783, "step": 4471 }, { "epoch": 1.3866833385432948, "grad_norm": 0.283203125, "learning_rate": 0.00014568560651585387, "loss": 1.7211, "step": 4472 }, { "epoch": 1.386995936230072, "grad_norm": 0.28515625, "learning_rate": 0.00014566374861962972, "loss": 1.4425, "step": 4473 }, { "epoch": 1.387308533916849, "grad_norm": 0.28515625, "learning_rate": 0.00014564188796662686, "loss": 1.4965, "step": 4474 }, { "epoch": 1.3876211316036262, "grad_norm": 0.283203125, "learning_rate": 0.00014562002455816505, "loss": 1.8338, "step": 4475 }, { "epoch": 1.3879337292904033, "grad_norm": 0.26171875, "learning_rate": 0.00014559815839556425, "loss": 1.59, "step": 4476 }, { "epoch": 1.3882463269771803, "grad_norm": 0.287109375, "learning_rate": 0.0001455762894801445, "loss": 1.5326, "step": 4477 }, { "epoch": 1.3885589246639576, "grad_norm": 0.271484375, "learning_rate": 0.00014555441781322607, "loss": 1.6449, "step": 4478 }, { "epoch": 1.3888715223507346, "grad_norm": 0.279296875, "learning_rate": 0.00014553254339612942, "loss": 1.646, "step": 4479 }, { "epoch": 1.3891841200375117, "grad_norm": 0.27734375, "learning_rate": 0.00014551066623017507, "loss": 1.4298, "step": 4480 }, { "epoch": 1.3894967177242887, "grad_norm": 0.275390625, "learning_rate": 0.00014548878631668382, "loss": 1.7506, "step": 4481 }, { "epoch": 1.389809315411066, "grad_norm": 0.283203125, "learning_rate": 0.00014546690365697658, "loss": 1.4685, "step": 4482 }, { "epoch": 1.390121913097843, "grad_norm": 0.271484375, "learning_rate": 0.0001454450182523744, "loss": 1.756, "step": 4483 }, { "epoch": 1.3904345107846203, "grad_norm": 0.283203125, "learning_rate": 0.00014542313010419857, "loss": 1.3334, "step": 4484 }, { "epoch": 1.3907471084713974, "grad_norm": 0.2734375, "learning_rate": 0.00014540123921377048, "loss": 1.6596, "step": 4485 }, { "epoch": 1.3910597061581744, "grad_norm": 0.271484375, "learning_rate": 0.00014537934558241173, "loss": 1.3967, "step": 4486 }, { "epoch": 1.3913723038449515, "grad_norm": 0.2734375, "learning_rate": 0.00014535744921144407, "loss": 1.6193, "step": 4487 }, { "epoch": 1.3916849015317287, "grad_norm": 0.287109375, "learning_rate": 0.00014533555010218943, "loss": 1.5161, "step": 4488 }, { "epoch": 1.3919974992185058, "grad_norm": 0.271484375, "learning_rate": 0.00014531364825596981, "loss": 1.3273, "step": 4489 }, { "epoch": 1.3923100969052828, "grad_norm": 0.271484375, "learning_rate": 0.0001452917436741075, "loss": 1.5277, "step": 4490 }, { "epoch": 1.3926226945920601, "grad_norm": 0.275390625, "learning_rate": 0.00014526983635792495, "loss": 1.6286, "step": 4491 }, { "epoch": 1.3929352922788372, "grad_norm": 0.263671875, "learning_rate": 0.00014524792630874465, "loss": 1.7798, "step": 4492 }, { "epoch": 1.3932478899656142, "grad_norm": 0.267578125, "learning_rate": 0.0001452260135278894, "loss": 1.3343, "step": 4493 }, { "epoch": 1.3935604876523913, "grad_norm": 0.26953125, "learning_rate": 0.00014520409801668208, "loss": 1.5668, "step": 4494 }, { "epoch": 1.3938730853391685, "grad_norm": 0.26953125, "learning_rate": 0.00014518217977644576, "loss": 1.8438, "step": 4495 }, { "epoch": 1.3941856830259456, "grad_norm": 0.27734375, "learning_rate": 0.00014516025880850365, "loss": 1.4986, "step": 4496 }, { "epoch": 1.3944982807127229, "grad_norm": 0.271484375, "learning_rate": 0.0001451383351141792, "loss": 1.5536, "step": 4497 }, { "epoch": 1.3948108783995, "grad_norm": 0.267578125, "learning_rate": 0.00014511640869479593, "loss": 1.317, "step": 4498 }, { "epoch": 1.395123476086277, "grad_norm": 0.271484375, "learning_rate": 0.00014509447955167757, "loss": 1.4538, "step": 4499 }, { "epoch": 1.395436073773054, "grad_norm": 0.29296875, "learning_rate": 0.000145072547686148, "loss": 1.4144, "step": 4500 }, { "epoch": 1.3957486714598313, "grad_norm": 0.27734375, "learning_rate": 0.00014505061309953132, "loss": 1.673, "step": 4501 }, { "epoch": 1.3960612691466083, "grad_norm": 0.30078125, "learning_rate": 0.0001450286757931517, "loss": 1.5612, "step": 4502 }, { "epoch": 1.3963738668333854, "grad_norm": 0.2890625, "learning_rate": 0.00014500673576833353, "loss": 1.8646, "step": 4503 }, { "epoch": 1.3966864645201627, "grad_norm": 0.267578125, "learning_rate": 0.00014498479302640138, "loss": 1.4747, "step": 4504 }, { "epoch": 1.3969990622069397, "grad_norm": 0.271484375, "learning_rate": 0.00014496284756867995, "loss": 1.5751, "step": 4505 }, { "epoch": 1.3973116598937168, "grad_norm": 0.28125, "learning_rate": 0.0001449408993964941, "loss": 1.631, "step": 4506 }, { "epoch": 1.3976242575804938, "grad_norm": 0.291015625, "learning_rate": 0.00014491894851116895, "loss": 1.366, "step": 4507 }, { "epoch": 1.397936855267271, "grad_norm": 0.263671875, "learning_rate": 0.00014489699491402957, "loss": 1.5104, "step": 4508 }, { "epoch": 1.3982494529540481, "grad_norm": 0.296875, "learning_rate": 0.00014487503860640143, "loss": 1.9793, "step": 4509 }, { "epoch": 1.3985620506408254, "grad_norm": 0.275390625, "learning_rate": 0.00014485307958960999, "loss": 1.5269, "step": 4510 }, { "epoch": 1.3988746483276024, "grad_norm": 0.2734375, "learning_rate": 0.000144831117864981, "loss": 1.8076, "step": 4511 }, { "epoch": 1.3991872460143795, "grad_norm": 0.275390625, "learning_rate": 0.00014480915343384026, "loss": 1.5925, "step": 4512 }, { "epoch": 1.3994998437011565, "grad_norm": 0.2890625, "learning_rate": 0.0001447871862975139, "loss": 1.7906, "step": 4513 }, { "epoch": 1.3998124413879336, "grad_norm": 0.27734375, "learning_rate": 0.00014476521645732796, "loss": 1.5804, "step": 4514 }, { "epoch": 1.4001250390747109, "grad_norm": 0.279296875, "learning_rate": 0.0001447432439146089, "loss": 1.552, "step": 4515 }, { "epoch": 1.400437636761488, "grad_norm": 0.27734375, "learning_rate": 0.00014472126867068317, "loss": 1.3919, "step": 4516 }, { "epoch": 1.4007502344482652, "grad_norm": 0.2734375, "learning_rate": 0.00014469929072687747, "loss": 1.7584, "step": 4517 }, { "epoch": 1.4010628321350422, "grad_norm": 0.279296875, "learning_rate": 0.00014467731008451862, "loss": 1.5451, "step": 4518 }, { "epoch": 1.4013754298218193, "grad_norm": 0.26953125, "learning_rate": 0.00014465532674493364, "loss": 1.3442, "step": 4519 }, { "epoch": 1.4016880275085963, "grad_norm": 0.271484375, "learning_rate": 0.00014463334070944966, "loss": 1.6606, "step": 4520 }, { "epoch": 1.4020006251953736, "grad_norm": 0.2734375, "learning_rate": 0.00014461135197939406, "loss": 1.7989, "step": 4521 }, { "epoch": 1.4023132228821507, "grad_norm": 0.279296875, "learning_rate": 0.00014458936055609424, "loss": 1.5901, "step": 4522 }, { "epoch": 1.402625820568928, "grad_norm": 0.283203125, "learning_rate": 0.00014456736644087793, "loss": 1.5985, "step": 4523 }, { "epoch": 1.402938418255705, "grad_norm": 0.2890625, "learning_rate": 0.00014454536963507292, "loss": 1.7222, "step": 4524 }, { "epoch": 1.403251015942482, "grad_norm": 0.271484375, "learning_rate": 0.0001445233701400072, "loss": 1.6968, "step": 4525 }, { "epoch": 1.403563613629259, "grad_norm": 0.279296875, "learning_rate": 0.00014450136795700886, "loss": 1.4162, "step": 4526 }, { "epoch": 1.4038762113160361, "grad_norm": 0.267578125, "learning_rate": 0.0001444793630874062, "loss": 1.4792, "step": 4527 }, { "epoch": 1.4041888090028134, "grad_norm": 0.271484375, "learning_rate": 0.00014445735553252775, "loss": 1.5032, "step": 4528 }, { "epoch": 1.4045014066895904, "grad_norm": 0.27734375, "learning_rate": 0.0001444353452937021, "loss": 1.4928, "step": 4529 }, { "epoch": 1.4048140043763677, "grad_norm": 0.265625, "learning_rate": 0.00014441333237225803, "loss": 1.5954, "step": 4530 }, { "epoch": 1.4051266020631448, "grad_norm": 0.275390625, "learning_rate": 0.00014439131676952446, "loss": 1.4347, "step": 4531 }, { "epoch": 1.4054391997499218, "grad_norm": 0.2734375, "learning_rate": 0.00014436929848683053, "loss": 1.4861, "step": 4532 }, { "epoch": 1.4057517974366989, "grad_norm": 0.28125, "learning_rate": 0.00014434727752550555, "loss": 1.8632, "step": 4533 }, { "epoch": 1.4060643951234761, "grad_norm": 0.287109375, "learning_rate": 0.00014432525388687886, "loss": 1.7035, "step": 4534 }, { "epoch": 1.4063769928102532, "grad_norm": 0.283203125, "learning_rate": 0.00014430322757228014, "loss": 1.4831, "step": 4535 }, { "epoch": 1.4066895904970302, "grad_norm": 0.279296875, "learning_rate": 0.00014428119858303913, "loss": 1.7658, "step": 4536 }, { "epoch": 1.4070021881838075, "grad_norm": 0.275390625, "learning_rate": 0.00014425916692048572, "loss": 1.6409, "step": 4537 }, { "epoch": 1.4073147858705846, "grad_norm": 0.271484375, "learning_rate": 0.00014423713258594997, "loss": 1.4858, "step": 4538 }, { "epoch": 1.4076273835573616, "grad_norm": 0.2734375, "learning_rate": 0.0001442150955807622, "loss": 1.3965, "step": 4539 }, { "epoch": 1.4079399812441387, "grad_norm": 0.26953125, "learning_rate": 0.00014419305590625272, "loss": 1.4889, "step": 4540 }, { "epoch": 1.408252578930916, "grad_norm": 0.265625, "learning_rate": 0.00014417101356375218, "loss": 1.4343, "step": 4541 }, { "epoch": 1.408565176617693, "grad_norm": 0.275390625, "learning_rate": 0.00014414896855459124, "loss": 1.3446, "step": 4542 }, { "epoch": 1.4088777743044703, "grad_norm": 0.271484375, "learning_rate": 0.00014412692088010083, "loss": 1.5692, "step": 4543 }, { "epoch": 1.4091903719912473, "grad_norm": 0.267578125, "learning_rate": 0.00014410487054161196, "loss": 1.4879, "step": 4544 }, { "epoch": 1.4095029696780244, "grad_norm": 0.28125, "learning_rate": 0.00014408281754045585, "loss": 1.5013, "step": 4545 }, { "epoch": 1.4098155673648014, "grad_norm": 0.265625, "learning_rate": 0.00014406076187796387, "loss": 1.4588, "step": 4546 }, { "epoch": 1.4101281650515787, "grad_norm": 0.275390625, "learning_rate": 0.00014403870355546755, "loss": 1.6314, "step": 4547 }, { "epoch": 1.4104407627383557, "grad_norm": 0.26953125, "learning_rate": 0.00014401664257429858, "loss": 1.5477, "step": 4548 }, { "epoch": 1.4107533604251328, "grad_norm": 0.267578125, "learning_rate": 0.00014399457893578884, "loss": 1.5232, "step": 4549 }, { "epoch": 1.41106595811191, "grad_norm": 0.27734375, "learning_rate": 0.00014397251264127022, "loss": 1.7653, "step": 4550 }, { "epoch": 1.411378555798687, "grad_norm": 0.275390625, "learning_rate": 0.00014395044369207505, "loss": 1.8346, "step": 4551 }, { "epoch": 1.4116911534854641, "grad_norm": 0.26953125, "learning_rate": 0.00014392837208953557, "loss": 1.6111, "step": 4552 }, { "epoch": 1.4120037511722412, "grad_norm": 0.298828125, "learning_rate": 0.00014390629783498428, "loss": 1.6006, "step": 4553 }, { "epoch": 1.4123163488590185, "grad_norm": 0.263671875, "learning_rate": 0.00014388422092975387, "loss": 1.3409, "step": 4554 }, { "epoch": 1.4126289465457955, "grad_norm": 0.2734375, "learning_rate": 0.00014386214137517708, "loss": 1.4263, "step": 4555 }, { "epoch": 1.4129415442325728, "grad_norm": 0.27734375, "learning_rate": 0.00014384005917258695, "loss": 1.4879, "step": 4556 }, { "epoch": 1.4132541419193498, "grad_norm": 0.275390625, "learning_rate": 0.00014381797432331658, "loss": 1.6945, "step": 4557 }, { "epoch": 1.4135667396061269, "grad_norm": 0.255859375, "learning_rate": 0.0001437958868286992, "loss": 1.3465, "step": 4558 }, { "epoch": 1.413879337292904, "grad_norm": 0.263671875, "learning_rate": 0.0001437737966900684, "loss": 1.3708, "step": 4559 }, { "epoch": 1.4141919349796812, "grad_norm": 0.287109375, "learning_rate": 0.00014375170390875768, "loss": 1.5018, "step": 4560 }, { "epoch": 1.4145045326664583, "grad_norm": 0.2890625, "learning_rate": 0.00014372960848610085, "loss": 1.428, "step": 4561 }, { "epoch": 1.4148171303532353, "grad_norm": 0.26953125, "learning_rate": 0.00014370751042343182, "loss": 1.3923, "step": 4562 }, { "epoch": 1.4151297280400126, "grad_norm": 0.251953125, "learning_rate": 0.00014368540972208471, "loss": 1.3904, "step": 4563 }, { "epoch": 1.4154423257267896, "grad_norm": 0.2734375, "learning_rate": 0.0001436633063833937, "loss": 1.7971, "step": 4564 }, { "epoch": 1.4157549234135667, "grad_norm": 0.271484375, "learning_rate": 0.0001436412004086933, "loss": 1.3722, "step": 4565 }, { "epoch": 1.4160675211003437, "grad_norm": 0.275390625, "learning_rate": 0.000143619091799318, "loss": 1.7189, "step": 4566 }, { "epoch": 1.416380118787121, "grad_norm": 0.27734375, "learning_rate": 0.00014359698055660256, "loss": 1.4373, "step": 4567 }, { "epoch": 1.416692716473898, "grad_norm": 0.2734375, "learning_rate": 0.0001435748666818818, "loss": 1.5061, "step": 4568 }, { "epoch": 1.4170053141606753, "grad_norm": 0.27734375, "learning_rate": 0.00014355275017649083, "loss": 1.5032, "step": 4569 }, { "epoch": 1.4173179118474524, "grad_norm": 0.283203125, "learning_rate": 0.00014353063104176483, "loss": 1.5084, "step": 4570 }, { "epoch": 1.4176305095342294, "grad_norm": 0.29296875, "learning_rate": 0.0001435085092790392, "loss": 1.4757, "step": 4571 }, { "epoch": 1.4179431072210065, "grad_norm": 0.291015625, "learning_rate": 0.00014348638488964938, "loss": 1.5824, "step": 4572 }, { "epoch": 1.4182557049077837, "grad_norm": 0.2734375, "learning_rate": 0.00014346425787493112, "loss": 1.7873, "step": 4573 }, { "epoch": 1.4185683025945608, "grad_norm": 0.28515625, "learning_rate": 0.00014344212823622017, "loss": 1.6968, "step": 4574 }, { "epoch": 1.4188809002813378, "grad_norm": 0.2578125, "learning_rate": 0.00014341999597485266, "loss": 1.776, "step": 4575 }, { "epoch": 1.4191934979681151, "grad_norm": 0.2890625, "learning_rate": 0.00014339786109216458, "loss": 1.7142, "step": 4576 }, { "epoch": 1.4195060956548922, "grad_norm": 0.255859375, "learning_rate": 0.00014337572358949242, "loss": 1.4147, "step": 4577 }, { "epoch": 1.4198186933416692, "grad_norm": 0.271484375, "learning_rate": 0.00014335358346817244, "loss": 1.6323, "step": 4578 }, { "epoch": 1.4201312910284463, "grad_norm": 0.275390625, "learning_rate": 0.00014333144072954144, "loss": 1.6688, "step": 4579 }, { "epoch": 1.4204438887152235, "grad_norm": 0.28125, "learning_rate": 0.00014330929537493615, "loss": 1.6052, "step": 4580 }, { "epoch": 1.4207564864020006, "grad_norm": 0.275390625, "learning_rate": 0.0001432871474056935, "loss": 1.4074, "step": 4581 }, { "epoch": 1.4210690840887779, "grad_norm": 0.26171875, "learning_rate": 0.00014326499682315057, "loss": 1.4452, "step": 4582 }, { "epoch": 1.421381681775555, "grad_norm": 0.275390625, "learning_rate": 0.00014324284362864472, "loss": 1.6482, "step": 4583 }, { "epoch": 1.421694279462332, "grad_norm": 0.279296875, "learning_rate": 0.00014322068782351323, "loss": 1.5454, "step": 4584 }, { "epoch": 1.422006877149109, "grad_norm": 0.291015625, "learning_rate": 0.00014319852940909377, "loss": 1.4762, "step": 4585 }, { "epoch": 1.4223194748358863, "grad_norm": 0.287109375, "learning_rate": 0.00014317636838672402, "loss": 1.4904, "step": 4586 }, { "epoch": 1.4226320725226633, "grad_norm": 0.28515625, "learning_rate": 0.0001431542047577419, "loss": 1.4558, "step": 4587 }, { "epoch": 1.4229446702094404, "grad_norm": 0.28125, "learning_rate": 0.00014313203852348545, "loss": 1.5608, "step": 4588 }, { "epoch": 1.4232572678962176, "grad_norm": 0.38671875, "learning_rate": 0.0001431098696852929, "loss": 2.2835, "step": 4589 }, { "epoch": 1.4235698655829947, "grad_norm": 0.2734375, "learning_rate": 0.00014308769824450252, "loss": 1.5749, "step": 4590 }, { "epoch": 1.4238824632697717, "grad_norm": 0.279296875, "learning_rate": 0.00014306552420245293, "loss": 1.7626, "step": 4591 }, { "epoch": 1.4241950609565488, "grad_norm": 0.2734375, "learning_rate": 0.00014304334756048273, "loss": 1.4544, "step": 4592 }, { "epoch": 1.424507658643326, "grad_norm": 0.27734375, "learning_rate": 0.00014302116831993084, "loss": 1.4499, "step": 4593 }, { "epoch": 1.4248202563301031, "grad_norm": 0.287109375, "learning_rate": 0.00014299898648213616, "loss": 1.5541, "step": 4594 }, { "epoch": 1.4251328540168804, "grad_norm": 0.26953125, "learning_rate": 0.0001429768020484379, "loss": 1.3729, "step": 4595 }, { "epoch": 1.4254454517036574, "grad_norm": 0.271484375, "learning_rate": 0.00014295461502017532, "loss": 1.5893, "step": 4596 }, { "epoch": 1.4257580493904345, "grad_norm": 0.271484375, "learning_rate": 0.0001429324253986879, "loss": 1.4734, "step": 4597 }, { "epoch": 1.4260706470772115, "grad_norm": 0.28125, "learning_rate": 0.0001429102331853152, "loss": 1.7623, "step": 4598 }, { "epoch": 1.4263832447639888, "grad_norm": 0.28125, "learning_rate": 0.0001428880383813971, "loss": 1.4604, "step": 4599 }, { "epoch": 1.4266958424507659, "grad_norm": 0.291015625, "learning_rate": 0.00014286584098827345, "loss": 1.4894, "step": 4600 }, { "epoch": 1.427008440137543, "grad_norm": 0.267578125, "learning_rate": 0.0001428436410072844, "loss": 1.743, "step": 4601 }, { "epoch": 1.4273210378243202, "grad_norm": 0.267578125, "learning_rate": 0.00014282143843977004, "loss": 1.3815, "step": 4602 }, { "epoch": 1.4276336355110972, "grad_norm": 0.265625, "learning_rate": 0.00014279923328707096, "loss": 1.4671, "step": 4603 }, { "epoch": 1.4279462331978743, "grad_norm": 0.291015625, "learning_rate": 0.0001427770255505276, "loss": 1.9332, "step": 4604 }, { "epoch": 1.4282588308846513, "grad_norm": 0.283203125, "learning_rate": 0.0001427548152314807, "loss": 1.6423, "step": 4605 }, { "epoch": 1.4285714285714286, "grad_norm": 0.2890625, "learning_rate": 0.0001427326023312711, "loss": 1.7537, "step": 4606 }, { "epoch": 1.4288840262582057, "grad_norm": 0.283203125, "learning_rate": 0.0001427103868512399, "loss": 1.4938, "step": 4607 }, { "epoch": 1.429196623944983, "grad_norm": 0.283203125, "learning_rate": 0.00014268816879272817, "loss": 1.434, "step": 4608 }, { "epoch": 1.42950922163176, "grad_norm": 0.279296875, "learning_rate": 0.0001426659481570773, "loss": 1.5592, "step": 4609 }, { "epoch": 1.429821819318537, "grad_norm": 0.28125, "learning_rate": 0.00014264372494562878, "loss": 1.7722, "step": 4610 }, { "epoch": 1.430134417005314, "grad_norm": 0.29296875, "learning_rate": 0.00014262149915972422, "loss": 1.5044, "step": 4611 }, { "epoch": 1.4304470146920913, "grad_norm": 0.2578125, "learning_rate": 0.00014259927080070546, "loss": 1.804, "step": 4612 }, { "epoch": 1.4307596123788684, "grad_norm": 0.271484375, "learning_rate": 0.00014257703986991446, "loss": 1.2922, "step": 4613 }, { "epoch": 1.4310722100656454, "grad_norm": 0.283203125, "learning_rate": 0.00014255480636869328, "loss": 1.646, "step": 4614 }, { "epoch": 1.4313848077524227, "grad_norm": 0.279296875, "learning_rate": 0.0001425325702983842, "loss": 1.4114, "step": 4615 }, { "epoch": 1.4316974054391998, "grad_norm": 0.291015625, "learning_rate": 0.00014251033166032964, "loss": 1.7375, "step": 4616 }, { "epoch": 1.4320100031259768, "grad_norm": 0.2578125, "learning_rate": 0.00014248809045587223, "loss": 1.4065, "step": 4617 }, { "epoch": 1.4323226008127539, "grad_norm": 0.275390625, "learning_rate": 0.00014246584668635464, "loss": 1.4145, "step": 4618 }, { "epoch": 1.4326351984995311, "grad_norm": 0.2734375, "learning_rate": 0.00014244360035311977, "loss": 1.576, "step": 4619 }, { "epoch": 1.4329477961863082, "grad_norm": 0.28125, "learning_rate": 0.0001424213514575107, "loss": 1.581, "step": 4620 }, { "epoch": 1.4332603938730855, "grad_norm": 0.28125, "learning_rate": 0.00014239910000087052, "loss": 1.7384, "step": 4621 }, { "epoch": 1.4335729915598625, "grad_norm": 0.27734375, "learning_rate": 0.00014237684598454267, "loss": 1.353, "step": 4622 }, { "epoch": 1.4338855892466396, "grad_norm": 0.27734375, "learning_rate": 0.0001423545894098706, "loss": 1.1421, "step": 4623 }, { "epoch": 1.4341981869334166, "grad_norm": 0.26953125, "learning_rate": 0.00014233233027819803, "loss": 1.594, "step": 4624 }, { "epoch": 1.4345107846201939, "grad_norm": 0.26953125, "learning_rate": 0.00014231006859086875, "loss": 1.3478, "step": 4625 }, { "epoch": 1.434823382306971, "grad_norm": 0.27734375, "learning_rate": 0.00014228780434922666, "loss": 1.642, "step": 4626 }, { "epoch": 1.435135979993748, "grad_norm": 0.28125, "learning_rate": 0.00014226553755461598, "loss": 1.6561, "step": 4627 }, { "epoch": 1.4354485776805253, "grad_norm": 0.28515625, "learning_rate": 0.0001422432682083809, "loss": 1.4408, "step": 4628 }, { "epoch": 1.4357611753673023, "grad_norm": 0.26953125, "learning_rate": 0.0001422209963118659, "loss": 1.8463, "step": 4629 }, { "epoch": 1.4360737730540793, "grad_norm": 0.2734375, "learning_rate": 0.00014219872186641558, "loss": 1.6674, "step": 4630 }, { "epoch": 1.4363863707408564, "grad_norm": 0.275390625, "learning_rate": 0.00014217644487337462, "loss": 1.5182, "step": 4631 }, { "epoch": 1.4366989684276337, "grad_norm": 0.267578125, "learning_rate": 0.00014215416533408794, "loss": 1.541, "step": 4632 }, { "epoch": 1.4370115661144107, "grad_norm": 0.27734375, "learning_rate": 0.00014213188324990058, "loss": 1.5117, "step": 4633 }, { "epoch": 1.437324163801188, "grad_norm": 0.2734375, "learning_rate": 0.00014210959862215775, "loss": 1.9105, "step": 4634 }, { "epoch": 1.437636761487965, "grad_norm": 0.267578125, "learning_rate": 0.00014208731145220476, "loss": 1.8691, "step": 4635 }, { "epoch": 1.437949359174742, "grad_norm": 0.2734375, "learning_rate": 0.00014206502174138718, "loss": 1.4839, "step": 4636 }, { "epoch": 1.4382619568615191, "grad_norm": 0.2734375, "learning_rate": 0.0001420427294910506, "loss": 1.6767, "step": 4637 }, { "epoch": 1.4385745545482964, "grad_norm": 0.283203125, "learning_rate": 0.0001420204347025409, "loss": 1.6293, "step": 4638 }, { "epoch": 1.4388871522350735, "grad_norm": 0.28515625, "learning_rate": 0.000141998137377204, "loss": 1.6073, "step": 4639 }, { "epoch": 1.4391997499218505, "grad_norm": 0.263671875, "learning_rate": 0.00014197583751638602, "loss": 1.7099, "step": 4640 }, { "epoch": 1.4395123476086278, "grad_norm": 0.27734375, "learning_rate": 0.00014195353512143322, "loss": 1.5057, "step": 4641 }, { "epoch": 1.4398249452954048, "grad_norm": 0.291015625, "learning_rate": 0.0001419312301936921, "loss": 1.6119, "step": 4642 }, { "epoch": 1.4401375429821819, "grad_norm": 0.279296875, "learning_rate": 0.00014190892273450913, "loss": 1.4598, "step": 4643 }, { "epoch": 1.440450140668959, "grad_norm": 0.275390625, "learning_rate": 0.0001418866127452311, "loss": 1.3623, "step": 4644 }, { "epoch": 1.4407627383557362, "grad_norm": 0.298828125, "learning_rate": 0.0001418643002272049, "loss": 1.5883, "step": 4645 }, { "epoch": 1.4410753360425133, "grad_norm": 0.279296875, "learning_rate": 0.00014184198518177752, "loss": 1.7833, "step": 4646 }, { "epoch": 1.4413879337292905, "grad_norm": 0.296875, "learning_rate": 0.00014181966761029618, "loss": 1.8222, "step": 4647 }, { "epoch": 1.4417005314160676, "grad_norm": 0.263671875, "learning_rate": 0.0001417973475141082, "loss": 1.6506, "step": 4648 }, { "epoch": 1.4420131291028446, "grad_norm": 0.287109375, "learning_rate": 0.00014177502489456112, "loss": 1.4809, "step": 4649 }, { "epoch": 1.4423257267896217, "grad_norm": 0.267578125, "learning_rate": 0.00014175269975300257, "loss": 1.6961, "step": 4650 }, { "epoch": 1.442638324476399, "grad_norm": 0.294921875, "learning_rate": 0.0001417303720907803, "loss": 1.7772, "step": 4651 }, { "epoch": 1.442950922163176, "grad_norm": 0.26953125, "learning_rate": 0.00014170804190924226, "loss": 1.4788, "step": 4652 }, { "epoch": 1.443263519849953, "grad_norm": 0.26953125, "learning_rate": 0.00014168570920973663, "loss": 1.6487, "step": 4653 }, { "epoch": 1.4435761175367303, "grad_norm": 0.265625, "learning_rate": 0.0001416633739936116, "loss": 1.3923, "step": 4654 }, { "epoch": 1.4438887152235074, "grad_norm": 0.275390625, "learning_rate": 0.00014164103626221558, "loss": 1.5774, "step": 4655 }, { "epoch": 1.4442013129102844, "grad_norm": 0.265625, "learning_rate": 0.00014161869601689715, "loss": 1.5656, "step": 4656 }, { "epoch": 1.4445139105970615, "grad_norm": 0.279296875, "learning_rate": 0.000141596353259005, "loss": 1.3998, "step": 4657 }, { "epoch": 1.4448265082838387, "grad_norm": 0.271484375, "learning_rate": 0.000141574007989888, "loss": 1.4597, "step": 4658 }, { "epoch": 1.4451391059706158, "grad_norm": 0.279296875, "learning_rate": 0.00014155166021089514, "loss": 1.5555, "step": 4659 }, { "epoch": 1.445451703657393, "grad_norm": 0.25390625, "learning_rate": 0.00014152930992337564, "loss": 1.5083, "step": 4660 }, { "epoch": 1.44576430134417, "grad_norm": 0.26171875, "learning_rate": 0.00014150695712867873, "loss": 1.526, "step": 4661 }, { "epoch": 1.4460768990309472, "grad_norm": 0.283203125, "learning_rate": 0.00014148460182815398, "loss": 1.6716, "step": 4662 }, { "epoch": 1.4463894967177242, "grad_norm": 0.279296875, "learning_rate": 0.0001414622440231509, "loss": 1.4131, "step": 4663 }, { "epoch": 1.4467020944045015, "grad_norm": 0.265625, "learning_rate": 0.00014143988371501936, "loss": 1.52, "step": 4664 }, { "epoch": 1.4470146920912785, "grad_norm": 0.271484375, "learning_rate": 0.0001414175209051092, "loss": 1.5605, "step": 4665 }, { "epoch": 1.4473272897780556, "grad_norm": 0.265625, "learning_rate": 0.00014139515559477057, "loss": 1.3161, "step": 4666 }, { "epoch": 1.4476398874648329, "grad_norm": 0.271484375, "learning_rate": 0.0001413727877853536, "loss": 1.3972, "step": 4667 }, { "epoch": 1.44795248515161, "grad_norm": 0.25390625, "learning_rate": 0.0001413504174782087, "loss": 1.4792, "step": 4668 }, { "epoch": 1.448265082838387, "grad_norm": 0.26953125, "learning_rate": 0.00014132804467468642, "loss": 1.474, "step": 4669 }, { "epoch": 1.448577680525164, "grad_norm": 0.2890625, "learning_rate": 0.00014130566937613743, "loss": 1.4871, "step": 4670 }, { "epoch": 1.4488902782119413, "grad_norm": 0.2890625, "learning_rate": 0.00014128329158391253, "loss": 1.5615, "step": 4671 }, { "epoch": 1.4492028758987183, "grad_norm": 0.2734375, "learning_rate": 0.00014126091129936275, "loss": 1.6239, "step": 4672 }, { "epoch": 1.4495154735854956, "grad_norm": 0.275390625, "learning_rate": 0.0001412385285238391, "loss": 1.4107, "step": 4673 }, { "epoch": 1.4498280712722726, "grad_norm": 0.283203125, "learning_rate": 0.00014121614325869302, "loss": 1.4174, "step": 4674 }, { "epoch": 1.4501406689590497, "grad_norm": 0.2890625, "learning_rate": 0.00014119375550527581, "loss": 1.6103, "step": 4675 }, { "epoch": 1.4504532666458267, "grad_norm": 0.26171875, "learning_rate": 0.00014117136526493903, "loss": 1.4591, "step": 4676 }, { "epoch": 1.450765864332604, "grad_norm": 0.27734375, "learning_rate": 0.00014114897253903453, "loss": 1.7177, "step": 4677 }, { "epoch": 1.451078462019381, "grad_norm": 0.271484375, "learning_rate": 0.00014112657732891413, "loss": 1.6403, "step": 4678 }, { "epoch": 1.4513910597061581, "grad_norm": 0.27734375, "learning_rate": 0.00014110417963592983, "loss": 1.3365, "step": 4679 }, { "epoch": 1.4517036573929354, "grad_norm": 0.279296875, "learning_rate": 0.0001410817794614338, "loss": 1.7381, "step": 4680 }, { "epoch": 1.4520162550797124, "grad_norm": 0.2734375, "learning_rate": 0.0001410593768067784, "loss": 1.7538, "step": 4681 }, { "epoch": 1.4523288527664895, "grad_norm": 0.26171875, "learning_rate": 0.00014103697167331612, "loss": 1.5666, "step": 4682 }, { "epoch": 1.4526414504532665, "grad_norm": 0.26171875, "learning_rate": 0.00014101456406239956, "loss": 1.3204, "step": 4683 }, { "epoch": 1.4529540481400438, "grad_norm": 0.28125, "learning_rate": 0.0001409921539753815, "loss": 1.5704, "step": 4684 }, { "epoch": 1.4532666458268209, "grad_norm": 0.357421875, "learning_rate": 0.00014096974141361484, "loss": 2.0966, "step": 4685 }, { "epoch": 1.4535792435135981, "grad_norm": 0.271484375, "learning_rate": 0.00014094732637845274, "loss": 1.3494, "step": 4686 }, { "epoch": 1.4538918412003752, "grad_norm": 0.2890625, "learning_rate": 0.00014092490887124832, "loss": 1.5677, "step": 4687 }, { "epoch": 1.4542044388871522, "grad_norm": 0.279296875, "learning_rate": 0.000140902488893355, "loss": 1.6437, "step": 4688 }, { "epoch": 1.4545170365739293, "grad_norm": 0.267578125, "learning_rate": 0.00014088006644612634, "loss": 1.4874, "step": 4689 }, { "epoch": 1.4548296342607066, "grad_norm": 0.287109375, "learning_rate": 0.00014085764153091597, "loss": 1.4281, "step": 4690 }, { "epoch": 1.4551422319474836, "grad_norm": 0.28515625, "learning_rate": 0.00014083521414907766, "loss": 1.7058, "step": 4691 }, { "epoch": 1.4554548296342606, "grad_norm": 0.27734375, "learning_rate": 0.00014081278430196553, "loss": 1.5046, "step": 4692 }, { "epoch": 1.455767427321038, "grad_norm": 0.271484375, "learning_rate": 0.0001407903519909335, "loss": 1.5766, "step": 4693 }, { "epoch": 1.456080025007815, "grad_norm": 0.28515625, "learning_rate": 0.000140767917217336, "loss": 1.5645, "step": 4694 }, { "epoch": 1.456392622694592, "grad_norm": 0.265625, "learning_rate": 0.00014074547998252742, "loss": 1.4359, "step": 4695 }, { "epoch": 1.456705220381369, "grad_norm": 0.28515625, "learning_rate": 0.00014072304028786223, "loss": 1.5674, "step": 4696 }, { "epoch": 1.4570178180681463, "grad_norm": 0.267578125, "learning_rate": 0.0001407005981346952, "loss": 1.4606, "step": 4697 }, { "epoch": 1.4573304157549234, "grad_norm": 0.28125, "learning_rate": 0.00014067815352438123, "loss": 1.4547, "step": 4698 }, { "epoch": 1.4576430134417007, "grad_norm": 0.287109375, "learning_rate": 0.00014065570645827526, "loss": 1.688, "step": 4699 }, { "epoch": 1.4579556111284777, "grad_norm": 0.28125, "learning_rate": 0.00014063325693773252, "loss": 1.6072, "step": 4700 }, { "epoch": 1.4582682088152548, "grad_norm": 0.29296875, "learning_rate": 0.00014061080496410825, "loss": 1.5536, "step": 4701 }, { "epoch": 1.4585808065020318, "grad_norm": 0.2734375, "learning_rate": 0.00014058835053875796, "loss": 1.676, "step": 4702 }, { "epoch": 1.458893404188809, "grad_norm": 0.28515625, "learning_rate": 0.0001405658936630372, "loss": 1.7725, "step": 4703 }, { "epoch": 1.4592060018755861, "grad_norm": 0.291015625, "learning_rate": 0.00014054343433830176, "loss": 1.7091, "step": 4704 }, { "epoch": 1.4595185995623632, "grad_norm": 0.349609375, "learning_rate": 0.00014052097256590752, "loss": 2.3213, "step": 4705 }, { "epoch": 1.4598311972491405, "grad_norm": 0.275390625, "learning_rate": 0.00014049850834721054, "loss": 1.3884, "step": 4706 }, { "epoch": 1.4601437949359175, "grad_norm": 0.28515625, "learning_rate": 0.000140476041683567, "loss": 1.5953, "step": 4707 }, { "epoch": 1.4604563926226946, "grad_norm": 0.26171875, "learning_rate": 0.00014045357257633323, "loss": 1.5026, "step": 4708 }, { "epoch": 1.4607689903094716, "grad_norm": 0.28125, "learning_rate": 0.00014043110102686574, "loss": 1.512, "step": 4709 }, { "epoch": 1.4610815879962489, "grad_norm": 0.287109375, "learning_rate": 0.0001404086270365212, "loss": 1.5442, "step": 4710 }, { "epoch": 1.461394185683026, "grad_norm": 0.2890625, "learning_rate": 0.00014038615060665626, "loss": 1.5655, "step": 4711 }, { "epoch": 1.4617067833698032, "grad_norm": 0.275390625, "learning_rate": 0.00014036367173862803, "loss": 1.6321, "step": 4712 }, { "epoch": 1.4620193810565802, "grad_norm": 0.2734375, "learning_rate": 0.00014034119043379348, "loss": 1.3139, "step": 4713 }, { "epoch": 1.4623319787433573, "grad_norm": 0.283203125, "learning_rate": 0.00014031870669350984, "loss": 1.6902, "step": 4714 }, { "epoch": 1.4626445764301343, "grad_norm": 0.271484375, "learning_rate": 0.0001402962205191345, "loss": 1.5025, "step": 4715 }, { "epoch": 1.4629571741169114, "grad_norm": 0.2734375, "learning_rate": 0.000140273731912025, "loss": 1.6473, "step": 4716 }, { "epoch": 1.4632697718036887, "grad_norm": 0.279296875, "learning_rate": 0.00014025124087353892, "loss": 1.4133, "step": 4717 }, { "epoch": 1.4635823694904657, "grad_norm": 0.265625, "learning_rate": 0.0001402287474050342, "loss": 1.3423, "step": 4718 }, { "epoch": 1.463894967177243, "grad_norm": 0.2890625, "learning_rate": 0.00014020625150786874, "loss": 1.6132, "step": 4719 }, { "epoch": 1.46420756486402, "grad_norm": 0.271484375, "learning_rate": 0.0001401837531834006, "loss": 1.4984, "step": 4720 }, { "epoch": 1.464520162550797, "grad_norm": 0.275390625, "learning_rate": 0.0001401612524329881, "loss": 1.5169, "step": 4721 }, { "epoch": 1.4648327602375741, "grad_norm": 0.275390625, "learning_rate": 0.0001401387492579896, "loss": 1.5537, "step": 4722 }, { "epoch": 1.4651453579243514, "grad_norm": 0.291015625, "learning_rate": 0.00014011624365976364, "loss": 1.8074, "step": 4723 }, { "epoch": 1.4654579556111285, "grad_norm": 0.26953125, "learning_rate": 0.000140093735639669, "loss": 1.6142, "step": 4724 }, { "epoch": 1.4657705532979057, "grad_norm": 0.296875, "learning_rate": 0.00014007122519906436, "loss": 1.7389, "step": 4725 }, { "epoch": 1.4660831509846828, "grad_norm": 0.271484375, "learning_rate": 0.00014004871233930883, "loss": 1.8496, "step": 4726 }, { "epoch": 1.4663957486714598, "grad_norm": 0.291015625, "learning_rate": 0.0001400261970617615, "loss": 1.3808, "step": 4727 }, { "epoch": 1.4667083463582369, "grad_norm": 0.296875, "learning_rate": 0.00014000367936778166, "loss": 1.4361, "step": 4728 }, { "epoch": 1.467020944045014, "grad_norm": 0.275390625, "learning_rate": 0.00013998115925872867, "loss": 1.2945, "step": 4729 }, { "epoch": 1.4673335417317912, "grad_norm": 0.28515625, "learning_rate": 0.00013995863673596225, "loss": 1.8561, "step": 4730 }, { "epoch": 1.4676461394185683, "grad_norm": 0.279296875, "learning_rate": 0.0001399361118008419, "loss": 1.3074, "step": 4731 }, { "epoch": 1.4679587371053455, "grad_norm": 0.279296875, "learning_rate": 0.00013991358445472764, "loss": 1.5621, "step": 4732 }, { "epoch": 1.4682713347921226, "grad_norm": 0.27734375, "learning_rate": 0.0001398910546989794, "loss": 1.3776, "step": 4733 }, { "epoch": 1.4685839324788996, "grad_norm": 0.265625, "learning_rate": 0.00013986852253495738, "loss": 1.4626, "step": 4734 }, { "epoch": 1.4688965301656767, "grad_norm": 0.28515625, "learning_rate": 0.00013984598796402183, "loss": 1.7205, "step": 4735 }, { "epoch": 1.469209127852454, "grad_norm": 0.29296875, "learning_rate": 0.00013982345098753325, "loss": 1.3697, "step": 4736 }, { "epoch": 1.469521725539231, "grad_norm": 0.396484375, "learning_rate": 0.00013980091160685216, "loss": 2.0519, "step": 4737 }, { "epoch": 1.4698343232260083, "grad_norm": 0.287109375, "learning_rate": 0.0001397783698233393, "loss": 1.5893, "step": 4738 }, { "epoch": 1.4701469209127853, "grad_norm": 0.2890625, "learning_rate": 0.0001397558256383556, "loss": 1.7312, "step": 4739 }, { "epoch": 1.4704595185995624, "grad_norm": 0.28125, "learning_rate": 0.00013973327905326204, "loss": 1.4599, "step": 4740 }, { "epoch": 1.4707721162863394, "grad_norm": 0.283203125, "learning_rate": 0.00013971073006941974, "loss": 1.5603, "step": 4741 }, { "epoch": 1.4710847139731165, "grad_norm": 0.271484375, "learning_rate": 0.00013968817868819012, "loss": 1.5871, "step": 4742 }, { "epoch": 1.4713973116598937, "grad_norm": 0.279296875, "learning_rate": 0.00013966562491093455, "loss": 1.7609, "step": 4743 }, { "epoch": 1.4717099093466708, "grad_norm": 0.271484375, "learning_rate": 0.00013964306873901467, "loss": 1.5724, "step": 4744 }, { "epoch": 1.472022507033448, "grad_norm": 0.27734375, "learning_rate": 0.00013962051017379218, "loss": 1.6633, "step": 4745 }, { "epoch": 1.472335104720225, "grad_norm": 0.265625, "learning_rate": 0.00013959794921662906, "loss": 1.5911, "step": 4746 }, { "epoch": 1.4726477024070022, "grad_norm": 0.291015625, "learning_rate": 0.00013957538586888724, "loss": 1.642, "step": 4747 }, { "epoch": 1.4729603000937792, "grad_norm": 0.283203125, "learning_rate": 0.00013955282013192898, "loss": 1.4164, "step": 4748 }, { "epoch": 1.4732728977805565, "grad_norm": 0.357421875, "learning_rate": 0.00013953025200711652, "loss": 2.1398, "step": 4749 }, { "epoch": 1.4735854954673335, "grad_norm": 0.283203125, "learning_rate": 0.00013950768149581242, "loss": 1.5324, "step": 4750 }, { "epoch": 1.4738980931541106, "grad_norm": 0.287109375, "learning_rate": 0.00013948510859937917, "loss": 1.5179, "step": 4751 }, { "epoch": 1.4742106908408878, "grad_norm": 0.28515625, "learning_rate": 0.00013946253331917967, "loss": 1.6216, "step": 4752 }, { "epoch": 1.474523288527665, "grad_norm": 0.2890625, "learning_rate": 0.0001394399556565767, "loss": 1.7629, "step": 4753 }, { "epoch": 1.474835886214442, "grad_norm": 0.26953125, "learning_rate": 0.00013941737561293339, "loss": 1.3801, "step": 4754 }, { "epoch": 1.475148483901219, "grad_norm": 0.267578125, "learning_rate": 0.00013939479318961286, "loss": 1.2722, "step": 4755 }, { "epoch": 1.4754610815879963, "grad_norm": 0.271484375, "learning_rate": 0.0001393722083879785, "loss": 1.5343, "step": 4756 }, { "epoch": 1.4757736792747733, "grad_norm": 0.271484375, "learning_rate": 0.00013934962120939367, "loss": 1.4174, "step": 4757 }, { "epoch": 1.4760862769615506, "grad_norm": 0.271484375, "learning_rate": 0.0001393270316552221, "loss": 1.6169, "step": 4758 }, { "epoch": 1.4763988746483276, "grad_norm": 0.287109375, "learning_rate": 0.00013930443972682755, "loss": 1.5633, "step": 4759 }, { "epoch": 1.4767114723351047, "grad_norm": 0.287109375, "learning_rate": 0.00013928184542557386, "loss": 1.5436, "step": 4760 }, { "epoch": 1.4770240700218817, "grad_norm": 0.283203125, "learning_rate": 0.0001392592487528251, "loss": 1.4456, "step": 4761 }, { "epoch": 1.477336667708659, "grad_norm": 0.291015625, "learning_rate": 0.00013923664970994548, "loss": 1.7102, "step": 4762 }, { "epoch": 1.477649265395436, "grad_norm": 0.283203125, "learning_rate": 0.0001392140482982993, "loss": 1.2565, "step": 4763 }, { "epoch": 1.4779618630822131, "grad_norm": 0.28125, "learning_rate": 0.00013919144451925107, "loss": 1.6703, "step": 4764 }, { "epoch": 1.4782744607689904, "grad_norm": 0.27734375, "learning_rate": 0.00013916883837416537, "loss": 1.6054, "step": 4765 }, { "epoch": 1.4785870584557674, "grad_norm": 0.2734375, "learning_rate": 0.000139146229864407, "loss": 1.7249, "step": 4766 }, { "epoch": 1.4788996561425445, "grad_norm": 0.2890625, "learning_rate": 0.00013912361899134083, "loss": 1.3501, "step": 4767 }, { "epoch": 1.4792122538293215, "grad_norm": 0.26953125, "learning_rate": 0.00013910100575633197, "loss": 1.3183, "step": 4768 }, { "epoch": 1.4795248515160988, "grad_norm": 0.27734375, "learning_rate": 0.0001390783901607455, "loss": 1.4104, "step": 4769 }, { "epoch": 1.4798374492028759, "grad_norm": 0.291015625, "learning_rate": 0.00013905577220594688, "loss": 1.5617, "step": 4770 }, { "epoch": 1.4801500468896531, "grad_norm": 0.296875, "learning_rate": 0.0001390331518933015, "loss": 1.5939, "step": 4771 }, { "epoch": 1.4804626445764302, "grad_norm": 0.275390625, "learning_rate": 0.000139010529224175, "loss": 1.8937, "step": 4772 }, { "epoch": 1.4807752422632072, "grad_norm": 0.2890625, "learning_rate": 0.00013898790419993314, "loss": 1.5376, "step": 4773 }, { "epoch": 1.4810878399499843, "grad_norm": 0.275390625, "learning_rate": 0.00013896527682194182, "loss": 1.6127, "step": 4774 }, { "epoch": 1.4814004376367615, "grad_norm": 0.271484375, "learning_rate": 0.00013894264709156704, "loss": 1.6904, "step": 4775 }, { "epoch": 1.4817130353235386, "grad_norm": 0.265625, "learning_rate": 0.00013892001501017507, "loss": 1.2735, "step": 4776 }, { "epoch": 1.4820256330103156, "grad_norm": 0.30078125, "learning_rate": 0.00013889738057913222, "loss": 1.5661, "step": 4777 }, { "epoch": 1.482338230697093, "grad_norm": 0.29296875, "learning_rate": 0.0001388747437998049, "loss": 1.5189, "step": 4778 }, { "epoch": 1.48265082838387, "grad_norm": 0.26171875, "learning_rate": 0.00013885210467355977, "loss": 1.6821, "step": 4779 }, { "epoch": 1.482963426070647, "grad_norm": 0.294921875, "learning_rate": 0.0001388294632017636, "loss": 1.539, "step": 4780 }, { "epoch": 1.483276023757424, "grad_norm": 0.287109375, "learning_rate": 0.0001388068193857832, "loss": 1.3788, "step": 4781 }, { "epoch": 1.4835886214442013, "grad_norm": 0.287109375, "learning_rate": 0.0001387841732269857, "loss": 1.6355, "step": 4782 }, { "epoch": 1.4839012191309784, "grad_norm": 0.265625, "learning_rate": 0.00013876152472673824, "loss": 1.5469, "step": 4783 }, { "epoch": 1.4842138168177557, "grad_norm": 0.275390625, "learning_rate": 0.00013873887388640813, "loss": 1.4548, "step": 4784 }, { "epoch": 1.4845264145045327, "grad_norm": 0.2734375, "learning_rate": 0.00013871622070736283, "loss": 1.6311, "step": 4785 }, { "epoch": 1.4848390121913098, "grad_norm": 0.271484375, "learning_rate": 0.00013869356519096996, "loss": 1.6191, "step": 4786 }, { "epoch": 1.4851516098780868, "grad_norm": 0.28125, "learning_rate": 0.00013867090733859724, "loss": 1.6485, "step": 4787 }, { "epoch": 1.485464207564864, "grad_norm": 0.291015625, "learning_rate": 0.00013864824715161258, "loss": 1.4496, "step": 4788 }, { "epoch": 1.4857768052516411, "grad_norm": 0.28125, "learning_rate": 0.000138625584631384, "loss": 1.4758, "step": 4789 }, { "epoch": 1.4860894029384182, "grad_norm": 0.2890625, "learning_rate": 0.00013860291977927963, "loss": 1.5562, "step": 4790 }, { "epoch": 1.4864020006251955, "grad_norm": 0.294921875, "learning_rate": 0.00013858025259666778, "loss": 1.4644, "step": 4791 }, { "epoch": 1.4867145983119725, "grad_norm": 0.26953125, "learning_rate": 0.00013855758308491697, "loss": 1.4079, "step": 4792 }, { "epoch": 1.4870271959987496, "grad_norm": 0.28125, "learning_rate": 0.0001385349112453957, "loss": 1.7239, "step": 4793 }, { "epoch": 1.4873397936855266, "grad_norm": 0.2734375, "learning_rate": 0.00013851223707947273, "loss": 1.4636, "step": 4794 }, { "epoch": 1.4876523913723039, "grad_norm": 0.287109375, "learning_rate": 0.00013848956058851695, "loss": 1.7056, "step": 4795 }, { "epoch": 1.487964989059081, "grad_norm": 0.27734375, "learning_rate": 0.00013846688177389735, "loss": 1.6746, "step": 4796 }, { "epoch": 1.4882775867458582, "grad_norm": 0.28515625, "learning_rate": 0.00013844420063698307, "loss": 1.9306, "step": 4797 }, { "epoch": 1.4885901844326352, "grad_norm": 0.275390625, "learning_rate": 0.00013842151717914343, "loss": 1.5027, "step": 4798 }, { "epoch": 1.4889027821194123, "grad_norm": 0.275390625, "learning_rate": 0.0001383988314017478, "loss": 1.5049, "step": 4799 }, { "epoch": 1.4892153798061893, "grad_norm": 0.2734375, "learning_rate": 0.0001383761433061658, "loss": 1.4956, "step": 4800 }, { "epoch": 1.4892153798061893, "eval_loss": 1.540488600730896, "eval_runtime": 1909.2842, "eval_samples_per_second": 4.786, "eval_steps_per_second": 2.393, "step": 4800 }, { "epoch": 1.4895279774929666, "grad_norm": 0.271484375, "learning_rate": 0.00013835345289376713, "loss": 1.6199, "step": 4801 }, { "epoch": 1.4898405751797437, "grad_norm": 0.279296875, "learning_rate": 0.00013833076016592162, "loss": 1.5649, "step": 4802 }, { "epoch": 1.4901531728665207, "grad_norm": 0.265625, "learning_rate": 0.0001383080651239993, "loss": 1.2639, "step": 4803 }, { "epoch": 1.490465770553298, "grad_norm": 0.26953125, "learning_rate": 0.00013828536776937025, "loss": 1.4622, "step": 4804 }, { "epoch": 1.490778368240075, "grad_norm": 0.279296875, "learning_rate": 0.00013826266810340477, "loss": 1.711, "step": 4805 }, { "epoch": 1.491090965926852, "grad_norm": 0.28125, "learning_rate": 0.00013823996612747326, "loss": 1.8856, "step": 4806 }, { "epoch": 1.4914035636136291, "grad_norm": 0.275390625, "learning_rate": 0.00013821726184294625, "loss": 1.3782, "step": 4807 }, { "epoch": 1.4917161613004064, "grad_norm": 0.34765625, "learning_rate": 0.00013819455525119448, "loss": 2.2278, "step": 4808 }, { "epoch": 1.4920287589871835, "grad_norm": 0.279296875, "learning_rate": 0.0001381718463535887, "loss": 1.5884, "step": 4809 }, { "epoch": 1.4923413566739607, "grad_norm": 0.287109375, "learning_rate": 0.00013814913515149992, "loss": 1.7534, "step": 4810 }, { "epoch": 1.4926539543607378, "grad_norm": 0.267578125, "learning_rate": 0.00013812642164629923, "loss": 1.5207, "step": 4811 }, { "epoch": 1.4929665520475148, "grad_norm": 0.2734375, "learning_rate": 0.0001381037058393579, "loss": 1.9244, "step": 4812 }, { "epoch": 1.4932791497342919, "grad_norm": 0.275390625, "learning_rate": 0.00013808098773204728, "loss": 1.5499, "step": 4813 }, { "epoch": 1.4935917474210691, "grad_norm": 0.275390625, "learning_rate": 0.0001380582673257389, "loss": 1.4949, "step": 4814 }, { "epoch": 1.4939043451078462, "grad_norm": 0.28125, "learning_rate": 0.00013803554462180442, "loss": 1.6817, "step": 4815 }, { "epoch": 1.4942169427946232, "grad_norm": 0.2734375, "learning_rate": 0.0001380128196216157, "loss": 1.5036, "step": 4816 }, { "epoch": 1.4945295404814005, "grad_norm": 0.2734375, "learning_rate": 0.00013799009232654452, "loss": 1.7792, "step": 4817 }, { "epoch": 1.4948421381681776, "grad_norm": 0.279296875, "learning_rate": 0.0001379673627379631, "loss": 1.7147, "step": 4818 }, { "epoch": 1.4951547358549546, "grad_norm": 0.27734375, "learning_rate": 0.00013794463085724362, "loss": 1.6417, "step": 4819 }, { "epoch": 1.4954673335417317, "grad_norm": 0.28125, "learning_rate": 0.00013792189668575844, "loss": 1.6789, "step": 4820 }, { "epoch": 1.495779931228509, "grad_norm": 0.26171875, "learning_rate": 0.00013789916022488, "loss": 1.5019, "step": 4821 }, { "epoch": 1.496092528915286, "grad_norm": 0.279296875, "learning_rate": 0.00013787642147598098, "loss": 1.3454, "step": 4822 }, { "epoch": 1.4964051266020633, "grad_norm": 0.291015625, "learning_rate": 0.0001378536804404341, "loss": 1.3393, "step": 4823 }, { "epoch": 1.4967177242888403, "grad_norm": 0.283203125, "learning_rate": 0.00013783093711961232, "loss": 1.7143, "step": 4824 }, { "epoch": 1.4970303219756174, "grad_norm": 0.265625, "learning_rate": 0.00013780819151488867, "loss": 1.7801, "step": 4825 }, { "epoch": 1.4973429196623944, "grad_norm": 0.275390625, "learning_rate": 0.0001377854436276363, "loss": 1.4511, "step": 4826 }, { "epoch": 1.4976555173491717, "grad_norm": 0.2578125, "learning_rate": 0.00013776269345922853, "loss": 1.5489, "step": 4827 }, { "epoch": 1.4979681150359487, "grad_norm": 0.275390625, "learning_rate": 0.00013773994101103887, "loss": 1.4367, "step": 4828 }, { "epoch": 1.4982807127227258, "grad_norm": 0.27734375, "learning_rate": 0.00013771718628444084, "loss": 1.3745, "step": 4829 }, { "epoch": 1.498593310409503, "grad_norm": 0.27734375, "learning_rate": 0.00013769442928080825, "loss": 1.4794, "step": 4830 }, { "epoch": 1.49890590809628, "grad_norm": 0.271484375, "learning_rate": 0.0001376716700015149, "loss": 1.4688, "step": 4831 }, { "epoch": 1.4992185057830572, "grad_norm": 0.28515625, "learning_rate": 0.00013764890844793486, "loss": 1.6018, "step": 4832 }, { "epoch": 1.4995311034698342, "grad_norm": 0.27734375, "learning_rate": 0.00013762614462144222, "loss": 1.5939, "step": 4833 }, { "epoch": 1.4998437011566115, "grad_norm": 0.2890625, "learning_rate": 0.00013760337852341127, "loss": 1.4394, "step": 4834 }, { "epoch": 1.5001562988433885, "grad_norm": 0.28515625, "learning_rate": 0.00013758061015521644, "loss": 1.7241, "step": 4835 }, { "epoch": 1.5004688965301658, "grad_norm": 0.28515625, "learning_rate": 0.00013755783951823231, "loss": 1.3598, "step": 4836 }, { "epoch": 1.5007814942169428, "grad_norm": 0.291015625, "learning_rate": 0.00013753506661383353, "loss": 1.6162, "step": 4837 }, { "epoch": 1.50109409190372, "grad_norm": 0.271484375, "learning_rate": 0.00013751229144339498, "loss": 1.8877, "step": 4838 }, { "epoch": 1.501406689590497, "grad_norm": 0.28515625, "learning_rate": 0.00013748951400829152, "loss": 1.4028, "step": 4839 }, { "epoch": 1.501719287277274, "grad_norm": 0.2734375, "learning_rate": 0.0001374667343098984, "loss": 1.4623, "step": 4840 }, { "epoch": 1.5020318849640513, "grad_norm": 0.271484375, "learning_rate": 0.00013744395234959074, "loss": 1.4917, "step": 4841 }, { "epoch": 1.5023444826508285, "grad_norm": 0.283203125, "learning_rate": 0.00013742116812874398, "loss": 1.7893, "step": 4842 }, { "epoch": 1.5026570803376056, "grad_norm": 0.279296875, "learning_rate": 0.00013739838164873361, "loss": 1.8276, "step": 4843 }, { "epoch": 1.5029696780243826, "grad_norm": 0.2890625, "learning_rate": 0.00013737559291093532, "loss": 1.3587, "step": 4844 }, { "epoch": 1.5032822757111597, "grad_norm": 0.275390625, "learning_rate": 0.0001373528019167248, "loss": 1.4224, "step": 4845 }, { "epoch": 1.5035948733979367, "grad_norm": 0.28125, "learning_rate": 0.00013733000866747805, "loss": 1.5353, "step": 4846 }, { "epoch": 1.503907471084714, "grad_norm": 0.26171875, "learning_rate": 0.0001373072131645711, "loss": 1.4161, "step": 4847 }, { "epoch": 1.504220068771491, "grad_norm": 0.28125, "learning_rate": 0.00013728441540938015, "loss": 1.6566, "step": 4848 }, { "epoch": 1.5045326664582683, "grad_norm": 0.298828125, "learning_rate": 0.00013726161540328154, "loss": 1.7235, "step": 4849 }, { "epoch": 1.5048452641450454, "grad_norm": 0.275390625, "learning_rate": 0.00013723881314765173, "loss": 1.4253, "step": 4850 }, { "epoch": 1.5051578618318224, "grad_norm": 0.265625, "learning_rate": 0.0001372160086438673, "loss": 1.4144, "step": 4851 }, { "epoch": 1.5054704595185995, "grad_norm": 0.29296875, "learning_rate": 0.00013719320189330502, "loss": 1.4077, "step": 4852 }, { "epoch": 1.5057830572053765, "grad_norm": 0.28515625, "learning_rate": 0.00013717039289734174, "loss": 1.3204, "step": 4853 }, { "epoch": 1.5060956548921538, "grad_norm": 0.279296875, "learning_rate": 0.00013714758165735445, "loss": 1.701, "step": 4854 }, { "epoch": 1.506408252578931, "grad_norm": 0.26171875, "learning_rate": 0.00013712476817472036, "loss": 1.6931, "step": 4855 }, { "epoch": 1.5067208502657081, "grad_norm": 0.28125, "learning_rate": 0.00013710195245081666, "loss": 1.3077, "step": 4856 }, { "epoch": 1.5070334479524852, "grad_norm": 0.26953125, "learning_rate": 0.00013707913448702085, "loss": 1.6396, "step": 4857 }, { "epoch": 1.5073460456392622, "grad_norm": 0.28125, "learning_rate": 0.00013705631428471046, "loss": 1.3695, "step": 4858 }, { "epoch": 1.5076586433260393, "grad_norm": 0.279296875, "learning_rate": 0.0001370334918452631, "loss": 1.6431, "step": 4859 }, { "epoch": 1.5079712410128165, "grad_norm": 0.275390625, "learning_rate": 0.00013701066717005669, "loss": 1.639, "step": 4860 }, { "epoch": 1.5082838386995936, "grad_norm": 0.275390625, "learning_rate": 0.00013698784026046912, "loss": 1.587, "step": 4861 }, { "epoch": 1.5085964363863709, "grad_norm": 0.279296875, "learning_rate": 0.00013696501111787847, "loss": 1.3576, "step": 4862 }, { "epoch": 1.508909034073148, "grad_norm": 0.2734375, "learning_rate": 0.00013694217974366305, "loss": 1.8408, "step": 4863 }, { "epoch": 1.509221631759925, "grad_norm": 0.279296875, "learning_rate": 0.00013691934613920112, "loss": 1.4075, "step": 4864 }, { "epoch": 1.509534229446702, "grad_norm": 0.296875, "learning_rate": 0.00013689651030587122, "loss": 1.5765, "step": 4865 }, { "epoch": 1.509846827133479, "grad_norm": 0.28125, "learning_rate": 0.000136873672245052, "loss": 1.4989, "step": 4866 }, { "epoch": 1.5101594248202563, "grad_norm": 0.26953125, "learning_rate": 0.00013685083195812218, "loss": 1.2015, "step": 4867 }, { "epoch": 1.5104720225070336, "grad_norm": 0.2890625, "learning_rate": 0.00013682798944646067, "loss": 1.5619, "step": 4868 }, { "epoch": 1.5107846201938107, "grad_norm": 0.279296875, "learning_rate": 0.00013680514471144654, "loss": 1.8652, "step": 4869 }, { "epoch": 1.5110972178805877, "grad_norm": 0.29296875, "learning_rate": 0.0001367822977544589, "loss": 1.4711, "step": 4870 }, { "epoch": 1.5114098155673648, "grad_norm": 0.265625, "learning_rate": 0.00013675944857687708, "loss": 1.6081, "step": 4871 }, { "epoch": 1.5117224132541418, "grad_norm": 0.28515625, "learning_rate": 0.00013673659718008046, "loss": 1.7525, "step": 4872 }, { "epoch": 1.512035010940919, "grad_norm": 0.271484375, "learning_rate": 0.00013671374356544872, "loss": 1.422, "step": 4873 }, { "epoch": 1.5123476086276961, "grad_norm": 0.27734375, "learning_rate": 0.00013669088773436144, "loss": 1.496, "step": 4874 }, { "epoch": 1.5126602063144734, "grad_norm": 0.287109375, "learning_rate": 0.00013666802968819857, "loss": 2.1415, "step": 4875 }, { "epoch": 1.5129728040012504, "grad_norm": 0.28125, "learning_rate": 0.00013664516942833997, "loss": 1.4566, "step": 4876 }, { "epoch": 1.5132854016880275, "grad_norm": 0.26953125, "learning_rate": 0.00013662230695616584, "loss": 1.3616, "step": 4877 }, { "epoch": 1.5135979993748045, "grad_norm": 0.2890625, "learning_rate": 0.00013659944227305634, "loss": 1.3566, "step": 4878 }, { "epoch": 1.5139105970615816, "grad_norm": 0.279296875, "learning_rate": 0.0001365765753803919, "loss": 1.4325, "step": 4879 }, { "epoch": 1.5142231947483589, "grad_norm": 0.27734375, "learning_rate": 0.00013655370627955294, "loss": 1.4522, "step": 4880 }, { "epoch": 1.5145357924351361, "grad_norm": 0.28125, "learning_rate": 0.0001365308349719202, "loss": 1.3943, "step": 4881 }, { "epoch": 1.5148483901219132, "grad_norm": 0.2734375, "learning_rate": 0.0001365079614588744, "loss": 1.5389, "step": 4882 }, { "epoch": 1.5151609878086902, "grad_norm": 0.294921875, "learning_rate": 0.0001364850857417964, "loss": 1.6869, "step": 4883 }, { "epoch": 1.5154735854954673, "grad_norm": 0.28125, "learning_rate": 0.00013646220782206732, "loss": 1.5724, "step": 4884 }, { "epoch": 1.5157861831822443, "grad_norm": 0.27734375, "learning_rate": 0.00013643932770106827, "loss": 1.6731, "step": 4885 }, { "epoch": 1.5160987808690216, "grad_norm": 0.28125, "learning_rate": 0.00013641644538018056, "loss": 1.5248, "step": 4886 }, { "epoch": 1.5164113785557987, "grad_norm": 0.279296875, "learning_rate": 0.00013639356086078566, "loss": 1.5507, "step": 4887 }, { "epoch": 1.516723976242576, "grad_norm": 0.2890625, "learning_rate": 0.00013637067414426512, "loss": 1.5547, "step": 4888 }, { "epoch": 1.517036573929353, "grad_norm": 0.27734375, "learning_rate": 0.00013634778523200064, "loss": 1.4582, "step": 4889 }, { "epoch": 1.51734917161613, "grad_norm": 0.26953125, "learning_rate": 0.000136324894125374, "loss": 1.5639, "step": 4890 }, { "epoch": 1.517661769302907, "grad_norm": 0.28515625, "learning_rate": 0.00013630200082576728, "loss": 1.7309, "step": 4891 }, { "epoch": 1.5179743669896841, "grad_norm": 0.283203125, "learning_rate": 0.00013627910533456244, "loss": 1.5389, "step": 4892 }, { "epoch": 1.5182869646764614, "grad_norm": 0.28515625, "learning_rate": 0.00013625620765314182, "loss": 1.4838, "step": 4893 }, { "epoch": 1.5185995623632387, "grad_norm": 0.28125, "learning_rate": 0.00013623330778288775, "loss": 1.484, "step": 4894 }, { "epoch": 1.5189121600500157, "grad_norm": 0.29296875, "learning_rate": 0.0001362104057251827, "loss": 1.5483, "step": 4895 }, { "epoch": 1.5192247577367928, "grad_norm": 0.3359375, "learning_rate": 0.0001361875014814093, "loss": 2.0742, "step": 4896 }, { "epoch": 1.5195373554235698, "grad_norm": 0.287109375, "learning_rate": 0.00013616459505295036, "loss": 1.7894, "step": 4897 }, { "epoch": 1.5198499531103469, "grad_norm": 0.30078125, "learning_rate": 0.0001361416864411887, "loss": 1.6285, "step": 4898 }, { "epoch": 1.5201625507971241, "grad_norm": 0.279296875, "learning_rate": 0.0001361187756475074, "loss": 1.4062, "step": 4899 }, { "epoch": 1.5204751484839012, "grad_norm": 0.28515625, "learning_rate": 0.00013609586267328955, "loss": 1.5702, "step": 4900 }, { "epoch": 1.5207877461706785, "grad_norm": 0.279296875, "learning_rate": 0.0001360729475199185, "loss": 1.7382, "step": 4901 }, { "epoch": 1.5211003438574555, "grad_norm": 0.296875, "learning_rate": 0.0001360500301887776, "loss": 1.634, "step": 4902 }, { "epoch": 1.5214129415442326, "grad_norm": 0.3046875, "learning_rate": 0.0001360271106812505, "loss": 1.8597, "step": 4903 }, { "epoch": 1.5217255392310096, "grad_norm": 0.279296875, "learning_rate": 0.0001360041889987208, "loss": 1.5893, "step": 4904 }, { "epoch": 1.5220381369177867, "grad_norm": 0.28125, "learning_rate": 0.0001359812651425723, "loss": 1.3249, "step": 4905 }, { "epoch": 1.522350734604564, "grad_norm": 0.2734375, "learning_rate": 0.00013595833911418897, "loss": 1.4003, "step": 4906 }, { "epoch": 1.5226633322913412, "grad_norm": 0.294921875, "learning_rate": 0.00013593541091495495, "loss": 1.4114, "step": 4907 }, { "epoch": 1.5229759299781183, "grad_norm": 0.298828125, "learning_rate": 0.00013591248054625434, "loss": 1.6106, "step": 4908 }, { "epoch": 1.5232885276648953, "grad_norm": 0.26953125, "learning_rate": 0.0001358895480094715, "loss": 1.6379, "step": 4909 }, { "epoch": 1.5236011253516724, "grad_norm": 0.29296875, "learning_rate": 0.00013586661330599093, "loss": 1.5513, "step": 4910 }, { "epoch": 1.5239137230384494, "grad_norm": 0.275390625, "learning_rate": 0.0001358436764371972, "loss": 1.776, "step": 4911 }, { "epoch": 1.5242263207252267, "grad_norm": 0.26953125, "learning_rate": 0.00013582073740447506, "loss": 1.4984, "step": 4912 }, { "epoch": 1.5245389184120037, "grad_norm": 0.279296875, "learning_rate": 0.00013579779620920935, "loss": 1.4635, "step": 4913 }, { "epoch": 1.524851516098781, "grad_norm": 0.28125, "learning_rate": 0.00013577485285278505, "loss": 1.6161, "step": 4914 }, { "epoch": 1.525164113785558, "grad_norm": 0.271484375, "learning_rate": 0.0001357519073365873, "loss": 1.6316, "step": 4915 }, { "epoch": 1.525476711472335, "grad_norm": 0.283203125, "learning_rate": 0.00013572895966200137, "loss": 1.6609, "step": 4916 }, { "epoch": 1.5257893091591122, "grad_norm": 0.30078125, "learning_rate": 0.00013570600983041258, "loss": 1.7015, "step": 4917 }, { "epoch": 1.5261019068458892, "grad_norm": 0.271484375, "learning_rate": 0.0001356830578432065, "loss": 1.6981, "step": 4918 }, { "epoch": 1.5264145045326665, "grad_norm": 0.291015625, "learning_rate": 0.00013566010370176876, "loss": 1.9733, "step": 4919 }, { "epoch": 1.5267271022194435, "grad_norm": 0.27734375, "learning_rate": 0.00013563714740748507, "loss": 1.6131, "step": 4920 }, { "epoch": 1.5270396999062208, "grad_norm": 0.294921875, "learning_rate": 0.00013561418896174143, "loss": 1.6744, "step": 4921 }, { "epoch": 1.5273522975929978, "grad_norm": 0.283203125, "learning_rate": 0.00013559122836592378, "loss": 1.6179, "step": 4922 }, { "epoch": 1.527664895279775, "grad_norm": 0.2734375, "learning_rate": 0.00013556826562141833, "loss": 1.6257, "step": 4923 }, { "epoch": 1.527977492966552, "grad_norm": 0.275390625, "learning_rate": 0.00013554530072961137, "loss": 1.4059, "step": 4924 }, { "epoch": 1.5282900906533292, "grad_norm": 0.28125, "learning_rate": 0.00013552233369188934, "loss": 1.6328, "step": 4925 }, { "epoch": 1.5286026883401063, "grad_norm": 0.291015625, "learning_rate": 0.0001354993645096387, "loss": 1.4355, "step": 4926 }, { "epoch": 1.5289152860268835, "grad_norm": 0.27734375, "learning_rate": 0.00013547639318424622, "loss": 1.3874, "step": 4927 }, { "epoch": 1.5292278837136606, "grad_norm": 0.279296875, "learning_rate": 0.00013545341971709865, "loss": 1.658, "step": 4928 }, { "epoch": 1.5295404814004376, "grad_norm": 0.291015625, "learning_rate": 0.00013543044410958295, "loss": 1.6181, "step": 4929 }, { "epoch": 1.5298530790872147, "grad_norm": 0.279296875, "learning_rate": 0.00013540746636308624, "loss": 1.5662, "step": 4930 }, { "epoch": 1.5301656767739917, "grad_norm": 0.28515625, "learning_rate": 0.00013538448647899563, "loss": 1.5986, "step": 4931 }, { "epoch": 1.530478274460769, "grad_norm": 0.275390625, "learning_rate": 0.00013536150445869847, "loss": 1.4048, "step": 4932 }, { "epoch": 1.530790872147546, "grad_norm": 0.2890625, "learning_rate": 0.00013533852030358224, "loss": 1.7174, "step": 4933 }, { "epoch": 1.5311034698343233, "grad_norm": 0.287109375, "learning_rate": 0.0001353155340150345, "loss": 1.513, "step": 4934 }, { "epoch": 1.5314160675211004, "grad_norm": 0.26953125, "learning_rate": 0.000135292545594443, "loss": 1.6644, "step": 4935 }, { "epoch": 1.5317286652078774, "grad_norm": 0.283203125, "learning_rate": 0.0001352695550431955, "loss": 1.6251, "step": 4936 }, { "epoch": 1.5320412628946545, "grad_norm": 0.27734375, "learning_rate": 0.00013524656236268005, "loss": 1.4987, "step": 4937 }, { "epoch": 1.5323538605814317, "grad_norm": 0.28125, "learning_rate": 0.0001352235675542847, "loss": 1.4943, "step": 4938 }, { "epoch": 1.5326664582682088, "grad_norm": 0.279296875, "learning_rate": 0.0001352005706193977, "loss": 1.571, "step": 4939 }, { "epoch": 1.532979055954986, "grad_norm": 0.28515625, "learning_rate": 0.00013517757155940736, "loss": 1.687, "step": 4940 }, { "epoch": 1.5332916536417631, "grad_norm": 0.28125, "learning_rate": 0.00013515457037570222, "loss": 1.5219, "step": 4941 }, { "epoch": 1.5336042513285402, "grad_norm": 0.283203125, "learning_rate": 0.0001351315670696709, "loss": 1.7764, "step": 4942 }, { "epoch": 1.5339168490153172, "grad_norm": 0.28125, "learning_rate": 0.0001351085616427021, "loss": 1.6955, "step": 4943 }, { "epoch": 1.5342294467020943, "grad_norm": 0.27734375, "learning_rate": 0.00013508555409618466, "loss": 1.4089, "step": 4944 }, { "epoch": 1.5345420443888715, "grad_norm": 0.302734375, "learning_rate": 0.00013506254443150764, "loss": 1.5426, "step": 4945 }, { "epoch": 1.5348546420756486, "grad_norm": 0.28125, "learning_rate": 0.0001350395326500601, "loss": 1.8171, "step": 4946 }, { "epoch": 1.5351672397624259, "grad_norm": 0.287109375, "learning_rate": 0.00013501651875323133, "loss": 1.4028, "step": 4947 }, { "epoch": 1.535479837449203, "grad_norm": 0.291015625, "learning_rate": 0.00013499350274241074, "loss": 1.8051, "step": 4948 }, { "epoch": 1.53579243513598, "grad_norm": 0.291015625, "learning_rate": 0.00013497048461898775, "loss": 1.8259, "step": 4949 }, { "epoch": 1.536105032822757, "grad_norm": 0.294921875, "learning_rate": 0.00013494746438435205, "loss": 1.4848, "step": 4950 }, { "epoch": 1.5364176305095343, "grad_norm": 0.2890625, "learning_rate": 0.0001349244420398934, "loss": 1.506, "step": 4951 }, { "epoch": 1.5367302281963113, "grad_norm": 0.265625, "learning_rate": 0.00013490141758700167, "loss": 1.4098, "step": 4952 }, { "epoch": 1.5370428258830886, "grad_norm": 0.2890625, "learning_rate": 0.00013487839102706693, "loss": 1.6048, "step": 4953 }, { "epoch": 1.5373554235698657, "grad_norm": 0.2734375, "learning_rate": 0.00013485536236147925, "loss": 1.7807, "step": 4954 }, { "epoch": 1.5376680212566427, "grad_norm": 0.279296875, "learning_rate": 0.00013483233159162892, "loss": 1.6532, "step": 4955 }, { "epoch": 1.5379806189434198, "grad_norm": 0.279296875, "learning_rate": 0.00013480929871890633, "loss": 1.5882, "step": 4956 }, { "epoch": 1.5382932166301968, "grad_norm": 0.28125, "learning_rate": 0.00013478626374470202, "loss": 1.3998, "step": 4957 }, { "epoch": 1.538605814316974, "grad_norm": 0.28125, "learning_rate": 0.00013476322667040663, "loss": 1.5972, "step": 4958 }, { "epoch": 1.5389184120037511, "grad_norm": 0.27734375, "learning_rate": 0.000134740187497411, "loss": 1.8103, "step": 4959 }, { "epoch": 1.5392310096905284, "grad_norm": 0.283203125, "learning_rate": 0.00013471714622710596, "loss": 1.5846, "step": 4960 }, { "epoch": 1.5395436073773054, "grad_norm": 0.28515625, "learning_rate": 0.00013469410286088255, "loss": 1.2591, "step": 4961 }, { "epoch": 1.5398562050640825, "grad_norm": 0.291015625, "learning_rate": 0.00013467105740013193, "loss": 1.6226, "step": 4962 }, { "epoch": 1.5401688027508595, "grad_norm": 0.2890625, "learning_rate": 0.00013464800984624542, "loss": 1.5573, "step": 4963 }, { "epoch": 1.5404814004376368, "grad_norm": 0.263671875, "learning_rate": 0.00013462496020061438, "loss": 1.7478, "step": 4964 }, { "epoch": 1.5407939981244139, "grad_norm": 0.2890625, "learning_rate": 0.00013460190846463035, "loss": 1.522, "step": 4965 }, { "epoch": 1.5411065958111911, "grad_norm": 0.265625, "learning_rate": 0.00013457885463968508, "loss": 1.8658, "step": 4966 }, { "epoch": 1.5414191934979682, "grad_norm": 0.28515625, "learning_rate": 0.00013455579872717025, "loss": 1.6142, "step": 4967 }, { "epoch": 1.5417317911847452, "grad_norm": 0.28125, "learning_rate": 0.0001345327407284778, "loss": 1.5669, "step": 4968 }, { "epoch": 1.5420443888715223, "grad_norm": 0.28125, "learning_rate": 0.0001345096806449998, "loss": 1.5322, "step": 4969 }, { "epoch": 1.5423569865582993, "grad_norm": 0.267578125, "learning_rate": 0.0001344866184781284, "loss": 1.3081, "step": 4970 }, { "epoch": 1.5426695842450766, "grad_norm": 0.28515625, "learning_rate": 0.00013446355422925592, "loss": 1.6975, "step": 4971 }, { "epoch": 1.5429821819318537, "grad_norm": 0.294921875, "learning_rate": 0.00013444048789977472, "loss": 1.695, "step": 4972 }, { "epoch": 1.543294779618631, "grad_norm": 0.26171875, "learning_rate": 0.0001344174194910774, "loss": 1.7535, "step": 4973 }, { "epoch": 1.543607377305408, "grad_norm": 0.29296875, "learning_rate": 0.0001343943490045566, "loss": 1.4863, "step": 4974 }, { "epoch": 1.543919974992185, "grad_norm": 0.2734375, "learning_rate": 0.0001343712764416051, "loss": 1.4841, "step": 4975 }, { "epoch": 1.544232572678962, "grad_norm": 0.26171875, "learning_rate": 0.00013434820180361587, "loss": 1.6745, "step": 4976 }, { "epoch": 1.5445451703657391, "grad_norm": 0.291015625, "learning_rate": 0.00013432512509198196, "loss": 1.4136, "step": 4977 }, { "epoch": 1.5448577680525164, "grad_norm": 0.28125, "learning_rate": 0.00013430204630809645, "loss": 1.6532, "step": 4978 }, { "epoch": 1.5451703657392937, "grad_norm": 0.294921875, "learning_rate": 0.00013427896545335273, "loss": 1.765, "step": 4979 }, { "epoch": 1.5454829634260707, "grad_norm": 0.26953125, "learning_rate": 0.00013425588252914415, "loss": 1.6788, "step": 4980 }, { "epoch": 1.5457955611128478, "grad_norm": 0.28515625, "learning_rate": 0.0001342327975368643, "loss": 1.5697, "step": 4981 }, { "epoch": 1.5461081587996248, "grad_norm": 0.275390625, "learning_rate": 0.00013420971047790683, "loss": 1.566, "step": 4982 }, { "epoch": 1.5464207564864019, "grad_norm": 0.291015625, "learning_rate": 0.00013418662135366557, "loss": 1.6018, "step": 4983 }, { "epoch": 1.5467333541731791, "grad_norm": 0.279296875, "learning_rate": 0.0001341635301655344, "loss": 1.5691, "step": 4984 }, { "epoch": 1.5470459518599562, "grad_norm": 0.28515625, "learning_rate": 0.0001341404369149074, "loss": 1.509, "step": 4985 }, { "epoch": 1.5473585495467335, "grad_norm": 0.2890625, "learning_rate": 0.00013411734160317866, "loss": 1.6771, "step": 4986 }, { "epoch": 1.5476711472335105, "grad_norm": 0.28125, "learning_rate": 0.00013409424423174257, "loss": 1.4246, "step": 4987 }, { "epoch": 1.5479837449202876, "grad_norm": 0.279296875, "learning_rate": 0.00013407114480199349, "loss": 1.589, "step": 4988 }, { "epoch": 1.5482963426070646, "grad_norm": 0.26953125, "learning_rate": 0.00013404804331532605, "loss": 1.4727, "step": 4989 }, { "epoch": 1.5486089402938417, "grad_norm": 0.2734375, "learning_rate": 0.00013402493977313478, "loss": 1.6676, "step": 4990 }, { "epoch": 1.548921537980619, "grad_norm": 0.279296875, "learning_rate": 0.00013400183417681456, "loss": 1.4347, "step": 4991 }, { "epoch": 1.5492341356673962, "grad_norm": 0.265625, "learning_rate": 0.00013397872652776025, "loss": 1.4311, "step": 4992 }, { "epoch": 1.5495467333541733, "grad_norm": 0.28515625, "learning_rate": 0.00013395561682736694, "loss": 1.3854, "step": 4993 }, { "epoch": 1.5498593310409503, "grad_norm": 0.275390625, "learning_rate": 0.00013393250507702978, "loss": 1.513, "step": 4994 }, { "epoch": 1.5501719287277274, "grad_norm": 0.291015625, "learning_rate": 0.0001339093912781441, "loss": 1.6341, "step": 4995 }, { "epoch": 1.5504845264145044, "grad_norm": 0.279296875, "learning_rate": 0.00013388627543210515, "loss": 1.5025, "step": 4996 }, { "epoch": 1.5507971241012817, "grad_norm": 0.279296875, "learning_rate": 0.00013386315754030864, "loss": 1.6215, "step": 4997 }, { "epoch": 1.5511097217880587, "grad_norm": 0.275390625, "learning_rate": 0.0001338400376041501, "loss": 1.6972, "step": 4998 }, { "epoch": 1.551422319474836, "grad_norm": 0.279296875, "learning_rate": 0.00013381691562502543, "loss": 1.7307, "step": 4999 }, { "epoch": 1.551734917161613, "grad_norm": 0.296875, "learning_rate": 0.00013379379160433045, "loss": 1.9134, "step": 5000 }, { "epoch": 1.55204751484839, "grad_norm": 0.28515625, "learning_rate": 0.00013377066554346123, "loss": 1.6386, "step": 5001 }, { "epoch": 1.5523601125351671, "grad_norm": 0.291015625, "learning_rate": 0.00013374753744381385, "loss": 1.2689, "step": 5002 }, { "epoch": 1.5526727102219442, "grad_norm": 0.28515625, "learning_rate": 0.00013372440730678465, "loss": 1.5805, "step": 5003 }, { "epoch": 1.5529853079087215, "grad_norm": 0.2890625, "learning_rate": 0.00013370127513377, "loss": 1.4609, "step": 5004 }, { "epoch": 1.5532979055954987, "grad_norm": 0.283203125, "learning_rate": 0.00013367814092616644, "loss": 1.6764, "step": 5005 }, { "epoch": 1.5536105032822758, "grad_norm": 0.296875, "learning_rate": 0.00013365500468537057, "loss": 1.4582, "step": 5006 }, { "epoch": 1.5539231009690528, "grad_norm": 0.28125, "learning_rate": 0.00013363186641277922, "loss": 1.6675, "step": 5007 }, { "epoch": 1.55423569865583, "grad_norm": 0.2890625, "learning_rate": 0.0001336087261097892, "loss": 1.5632, "step": 5008 }, { "epoch": 1.554548296342607, "grad_norm": 0.287109375, "learning_rate": 0.0001335855837777976, "loss": 1.6013, "step": 5009 }, { "epoch": 1.5548608940293842, "grad_norm": 0.267578125, "learning_rate": 0.00013356243941820144, "loss": 1.8472, "step": 5010 }, { "epoch": 1.5551734917161613, "grad_norm": 0.275390625, "learning_rate": 0.0001335392930323981, "loss": 1.6139, "step": 5011 }, { "epoch": 1.5554860894029385, "grad_norm": 0.28125, "learning_rate": 0.00013351614462178487, "loss": 1.5984, "step": 5012 }, { "epoch": 1.5557986870897156, "grad_norm": 0.279296875, "learning_rate": 0.0001334929941877593, "loss": 1.5614, "step": 5013 }, { "epoch": 1.5561112847764926, "grad_norm": 0.32421875, "learning_rate": 0.00013346984173171896, "loss": 2.3071, "step": 5014 }, { "epoch": 1.5564238824632697, "grad_norm": 0.296875, "learning_rate": 0.00013344668725506165, "loss": 1.3938, "step": 5015 }, { "epoch": 1.5567364801500467, "grad_norm": 0.267578125, "learning_rate": 0.00013342353075918522, "loss": 1.7061, "step": 5016 }, { "epoch": 1.557049077836824, "grad_norm": 0.28125, "learning_rate": 0.00013340037224548765, "loss": 1.7717, "step": 5017 }, { "epoch": 1.5573616755236013, "grad_norm": 0.28125, "learning_rate": 0.000133377211715367, "loss": 1.478, "step": 5018 }, { "epoch": 1.5576742732103783, "grad_norm": 0.279296875, "learning_rate": 0.0001333540491702216, "loss": 1.64, "step": 5019 }, { "epoch": 1.5579868708971554, "grad_norm": 0.2890625, "learning_rate": 0.00013333088461144968, "loss": 1.7206, "step": 5020 }, { "epoch": 1.5582994685839324, "grad_norm": 0.275390625, "learning_rate": 0.00013330771804044984, "loss": 1.5077, "step": 5021 }, { "epoch": 1.5586120662707095, "grad_norm": 0.294921875, "learning_rate": 0.0001332845494586206, "loss": 1.9248, "step": 5022 }, { "epoch": 1.5589246639574867, "grad_norm": 0.2890625, "learning_rate": 0.00013326137886736069, "loss": 1.6623, "step": 5023 }, { "epoch": 1.5592372616442638, "grad_norm": 0.287109375, "learning_rate": 0.00013323820626806896, "loss": 1.4811, "step": 5024 }, { "epoch": 1.559549859331041, "grad_norm": 0.265625, "learning_rate": 0.00013321503166214435, "loss": 1.5143, "step": 5025 }, { "epoch": 1.5598624570178181, "grad_norm": 0.279296875, "learning_rate": 0.00013319185505098597, "loss": 1.5872, "step": 5026 }, { "epoch": 1.5601750547045952, "grad_norm": 0.267578125, "learning_rate": 0.000133168676435993, "loss": 1.4517, "step": 5027 }, { "epoch": 1.5604876523913722, "grad_norm": 0.27734375, "learning_rate": 0.00013314549581856474, "loss": 1.3147, "step": 5028 }, { "epoch": 1.5608002500781493, "grad_norm": 0.275390625, "learning_rate": 0.00013312231320010068, "loss": 1.5296, "step": 5029 }, { "epoch": 1.5611128477649265, "grad_norm": 0.2578125, "learning_rate": 0.00013309912858200037, "loss": 1.4328, "step": 5030 }, { "epoch": 1.5614254454517038, "grad_norm": 0.28125, "learning_rate": 0.00013307594196566348, "loss": 1.3094, "step": 5031 }, { "epoch": 1.5617380431384809, "grad_norm": 0.275390625, "learning_rate": 0.00013305275335248983, "loss": 1.3946, "step": 5032 }, { "epoch": 1.562050640825258, "grad_norm": 0.298828125, "learning_rate": 0.00013302956274387933, "loss": 1.3143, "step": 5033 }, { "epoch": 1.562363238512035, "grad_norm": 0.28125, "learning_rate": 0.00013300637014123206, "loss": 1.4089, "step": 5034 }, { "epoch": 1.562675836198812, "grad_norm": 0.2734375, "learning_rate": 0.00013298317554594815, "loss": 1.808, "step": 5035 }, { "epoch": 1.5629884338855893, "grad_norm": 0.279296875, "learning_rate": 0.00013295997895942788, "loss": 1.4552, "step": 5036 }, { "epoch": 1.5633010315723663, "grad_norm": 0.291015625, "learning_rate": 0.00013293678038307172, "loss": 1.5808, "step": 5037 }, { "epoch": 1.5636136292591436, "grad_norm": 0.28515625, "learning_rate": 0.00013291357981828013, "loss": 1.6731, "step": 5038 }, { "epoch": 1.5639262269459207, "grad_norm": 0.265625, "learning_rate": 0.0001328903772664538, "loss": 1.7043, "step": 5039 }, { "epoch": 1.5642388246326977, "grad_norm": 0.29296875, "learning_rate": 0.00013286717272899346, "loss": 1.4325, "step": 5040 }, { "epoch": 1.5645514223194747, "grad_norm": 0.283203125, "learning_rate": 0.00013284396620730002, "loss": 1.4672, "step": 5041 }, { "epoch": 1.5648640200062518, "grad_norm": 0.26171875, "learning_rate": 0.0001328207577027745, "loss": 1.602, "step": 5042 }, { "epoch": 1.565176617693029, "grad_norm": 0.2734375, "learning_rate": 0.000132797547216818, "loss": 1.2986, "step": 5043 }, { "epoch": 1.5654892153798063, "grad_norm": 0.271484375, "learning_rate": 0.00013277433475083182, "loss": 1.3907, "step": 5044 }, { "epoch": 1.5658018130665834, "grad_norm": 0.2890625, "learning_rate": 0.00013275112030621724, "loss": 1.4888, "step": 5045 }, { "epoch": 1.5661144107533604, "grad_norm": 0.287109375, "learning_rate": 0.00013272790388437579, "loss": 1.5324, "step": 5046 }, { "epoch": 1.5664270084401375, "grad_norm": 0.267578125, "learning_rate": 0.00013270468548670913, "loss": 1.4203, "step": 5047 }, { "epoch": 1.5667396061269145, "grad_norm": 0.26953125, "learning_rate": 0.0001326814651146189, "loss": 1.6704, "step": 5048 }, { "epoch": 1.5670522038136918, "grad_norm": 0.271484375, "learning_rate": 0.00013265824276950696, "loss": 1.4298, "step": 5049 }, { "epoch": 1.5673648015004689, "grad_norm": 0.267578125, "learning_rate": 0.00013263501845277528, "loss": 1.2549, "step": 5050 }, { "epoch": 1.5676773991872461, "grad_norm": 0.279296875, "learning_rate": 0.000132611792165826, "loss": 1.4979, "step": 5051 }, { "epoch": 1.5679899968740232, "grad_norm": 0.2890625, "learning_rate": 0.0001325885639100612, "loss": 1.5943, "step": 5052 }, { "epoch": 1.5683025945608002, "grad_norm": 0.265625, "learning_rate": 0.00013256533368688334, "loss": 1.5182, "step": 5053 }, { "epoch": 1.5686151922475773, "grad_norm": 0.302734375, "learning_rate": 0.00013254210149769475, "loss": 1.413, "step": 5054 }, { "epoch": 1.5689277899343543, "grad_norm": 0.28125, "learning_rate": 0.000132518867343898, "loss": 1.7251, "step": 5055 }, { "epoch": 1.5692403876211316, "grad_norm": 0.306640625, "learning_rate": 0.00013249563122689584, "loss": 1.774, "step": 5056 }, { "epoch": 1.5695529853079089, "grad_norm": 0.263671875, "learning_rate": 0.000132472393148091, "loss": 1.4336, "step": 5057 }, { "epoch": 1.569865582994686, "grad_norm": 0.2734375, "learning_rate": 0.00013244915310888636, "loss": 1.551, "step": 5058 }, { "epoch": 1.570178180681463, "grad_norm": 0.279296875, "learning_rate": 0.00013242591111068506, "loss": 1.5781, "step": 5059 }, { "epoch": 1.57049077836824, "grad_norm": 0.279296875, "learning_rate": 0.00013240266715489017, "loss": 1.2934, "step": 5060 }, { "epoch": 1.570803376055017, "grad_norm": 0.27734375, "learning_rate": 0.0001323794212429049, "loss": 1.2185, "step": 5061 }, { "epoch": 1.5711159737417943, "grad_norm": 0.28515625, "learning_rate": 0.0001323561733761328, "loss": 1.4913, "step": 5062 }, { "epoch": 1.5714285714285714, "grad_norm": 0.26953125, "learning_rate": 0.00013233292355597725, "loss": 1.7294, "step": 5063 }, { "epoch": 1.5717411691153487, "grad_norm": 0.271484375, "learning_rate": 0.0001323096717838419, "loss": 1.6615, "step": 5064 }, { "epoch": 1.5720537668021257, "grad_norm": 0.392578125, "learning_rate": 0.0001322864180611305, "loss": 2.0632, "step": 5065 }, { "epoch": 1.5723663644889028, "grad_norm": 0.27734375, "learning_rate": 0.0001322631623892469, "loss": 1.6026, "step": 5066 }, { "epoch": 1.5726789621756798, "grad_norm": 0.279296875, "learning_rate": 0.00013223990476959505, "loss": 1.3494, "step": 5067 }, { "epoch": 1.5729915598624569, "grad_norm": 0.28125, "learning_rate": 0.0001322166452035791, "loss": 1.6345, "step": 5068 }, { "epoch": 1.5733041575492341, "grad_norm": 0.275390625, "learning_rate": 0.00013219338369260317, "loss": 1.3887, "step": 5069 }, { "epoch": 1.5736167552360114, "grad_norm": 0.27734375, "learning_rate": 0.0001321701202380717, "loss": 1.525, "step": 5070 }, { "epoch": 1.5739293529227885, "grad_norm": 0.28125, "learning_rate": 0.00013214685484138903, "loss": 1.5575, "step": 5071 }, { "epoch": 1.5742419506095655, "grad_norm": 0.279296875, "learning_rate": 0.00013212358750395984, "loss": 1.4927, "step": 5072 }, { "epoch": 1.5745545482963426, "grad_norm": 0.26171875, "learning_rate": 0.00013210031822718867, "loss": 1.7897, "step": 5073 }, { "epoch": 1.5748671459831196, "grad_norm": 0.27734375, "learning_rate": 0.0001320770470124804, "loss": 1.4913, "step": 5074 }, { "epoch": 1.5751797436698969, "grad_norm": 0.27734375, "learning_rate": 0.0001320537738612399, "loss": 1.5544, "step": 5075 }, { "epoch": 1.575492341356674, "grad_norm": 0.28125, "learning_rate": 0.00013203049877487226, "loss": 1.5843, "step": 5076 }, { "epoch": 1.5758049390434512, "grad_norm": 0.28125, "learning_rate": 0.0001320072217547826, "loss": 1.4814, "step": 5077 }, { "epoch": 1.5761175367302283, "grad_norm": 0.30078125, "learning_rate": 0.00013198394280237617, "loss": 1.5352, "step": 5078 }, { "epoch": 1.5764301344170053, "grad_norm": 0.26953125, "learning_rate": 0.00013196066191905833, "loss": 1.5874, "step": 5079 }, { "epoch": 1.5767427321037824, "grad_norm": 0.279296875, "learning_rate": 0.00013193737910623463, "loss": 1.3757, "step": 5080 }, { "epoch": 1.5770553297905594, "grad_norm": 0.294921875, "learning_rate": 0.00013191409436531063, "loss": 1.9468, "step": 5081 }, { "epoch": 1.5773679274773367, "grad_norm": 0.28125, "learning_rate": 0.00013189080769769208, "loss": 1.6272, "step": 5082 }, { "epoch": 1.577680525164114, "grad_norm": 0.2734375, "learning_rate": 0.00013186751910478488, "loss": 1.3636, "step": 5083 }, { "epoch": 1.577993122850891, "grad_norm": 0.275390625, "learning_rate": 0.00013184422858799493, "loss": 1.6307, "step": 5084 }, { "epoch": 1.578305720537668, "grad_norm": 0.283203125, "learning_rate": 0.00013182093614872827, "loss": 1.7675, "step": 5085 }, { "epoch": 1.578618318224445, "grad_norm": 0.296875, "learning_rate": 0.0001317976417883912, "loss": 1.6317, "step": 5086 }, { "epoch": 1.5789309159112221, "grad_norm": 0.298828125, "learning_rate": 0.0001317743455083899, "loss": 1.7721, "step": 5087 }, { "epoch": 1.5792435135979994, "grad_norm": 0.28125, "learning_rate": 0.00013175104731013096, "loss": 1.5939, "step": 5088 }, { "epoch": 1.5795561112847765, "grad_norm": 0.28125, "learning_rate": 0.0001317277471950208, "loss": 1.3737, "step": 5089 }, { "epoch": 1.5798687089715537, "grad_norm": 0.28125, "learning_rate": 0.0001317044451644661, "loss": 1.7188, "step": 5090 }, { "epoch": 1.5801813066583308, "grad_norm": 0.283203125, "learning_rate": 0.00013168114121987366, "loss": 1.4267, "step": 5091 }, { "epoch": 1.5804939043451078, "grad_norm": 0.26953125, "learning_rate": 0.00013165783536265034, "loss": 1.8701, "step": 5092 }, { "epoch": 1.5808065020318849, "grad_norm": 0.279296875, "learning_rate": 0.00013163452759420313, "loss": 1.7387, "step": 5093 }, { "epoch": 1.581119099718662, "grad_norm": 0.275390625, "learning_rate": 0.0001316112179159392, "loss": 1.541, "step": 5094 }, { "epoch": 1.5814316974054392, "grad_norm": 0.29296875, "learning_rate": 0.0001315879063292658, "loss": 1.7659, "step": 5095 }, { "epoch": 1.5817442950922165, "grad_norm": 0.28125, "learning_rate": 0.00013156459283559022, "loss": 1.6057, "step": 5096 }, { "epoch": 1.5820568927789935, "grad_norm": 0.28515625, "learning_rate": 0.00013154127743631992, "loss": 1.5351, "step": 5097 }, { "epoch": 1.5823694904657706, "grad_norm": 0.283203125, "learning_rate": 0.00013151796013286253, "loss": 1.2022, "step": 5098 }, { "epoch": 1.5826820881525476, "grad_norm": 0.302734375, "learning_rate": 0.00013149464092662572, "loss": 1.5904, "step": 5099 }, { "epoch": 1.5829946858393247, "grad_norm": 0.267578125, "learning_rate": 0.0001314713198190173, "loss": 1.641, "step": 5100 }, { "epoch": 1.583307283526102, "grad_norm": 0.283203125, "learning_rate": 0.0001314479968114452, "loss": 1.502, "step": 5101 }, { "epoch": 1.583619881212879, "grad_norm": 0.28515625, "learning_rate": 0.00013142467190531746, "loss": 1.3542, "step": 5102 }, { "epoch": 1.5839324788996563, "grad_norm": 0.287109375, "learning_rate": 0.00013140134510204222, "loss": 1.6327, "step": 5103 }, { "epoch": 1.5842450765864333, "grad_norm": 0.275390625, "learning_rate": 0.00013137801640302778, "loss": 1.8839, "step": 5104 }, { "epoch": 1.5845576742732104, "grad_norm": 0.2890625, "learning_rate": 0.00013135468580968248, "loss": 1.7045, "step": 5105 }, { "epoch": 1.5848702719599874, "grad_norm": 0.2890625, "learning_rate": 0.0001313313533234149, "loss": 1.5658, "step": 5106 }, { "epoch": 1.5851828696467645, "grad_norm": 0.28515625, "learning_rate": 0.00013130801894563354, "loss": 1.4013, "step": 5107 }, { "epoch": 1.5854954673335417, "grad_norm": 0.283203125, "learning_rate": 0.00013128468267774722, "loss": 1.6291, "step": 5108 }, { "epoch": 1.585808065020319, "grad_norm": 0.265625, "learning_rate": 0.00013126134452116466, "loss": 1.5818, "step": 5109 }, { "epoch": 1.586120662707096, "grad_norm": 0.27734375, "learning_rate": 0.00013123800447729497, "loss": 1.6866, "step": 5110 }, { "epoch": 1.5864332603938731, "grad_norm": 0.275390625, "learning_rate": 0.00013121466254754712, "loss": 1.697, "step": 5111 }, { "epoch": 1.5867458580806502, "grad_norm": 0.2734375, "learning_rate": 0.0001311913187333303, "loss": 1.6696, "step": 5112 }, { "epoch": 1.5870584557674272, "grad_norm": 0.2890625, "learning_rate": 0.00013116797303605387, "loss": 1.3883, "step": 5113 }, { "epoch": 1.5873710534542045, "grad_norm": 0.27734375, "learning_rate": 0.00013114462545712715, "loss": 1.7757, "step": 5114 }, { "epoch": 1.5876836511409815, "grad_norm": 0.2890625, "learning_rate": 0.0001311212759979597, "loss": 1.6715, "step": 5115 }, { "epoch": 1.5879962488277588, "grad_norm": 0.2890625, "learning_rate": 0.00013109792465996117, "loss": 1.423, "step": 5116 }, { "epoch": 1.5883088465145359, "grad_norm": 0.283203125, "learning_rate": 0.00013107457144454128, "loss": 1.6068, "step": 5117 }, { "epoch": 1.588621444201313, "grad_norm": 0.318359375, "learning_rate": 0.00013105121635310996, "loss": 2.3311, "step": 5118 }, { "epoch": 1.58893404188809, "grad_norm": 0.275390625, "learning_rate": 0.00013102785938707708, "loss": 1.6465, "step": 5119 }, { "epoch": 1.589246639574867, "grad_norm": 0.28125, "learning_rate": 0.00013100450054785284, "loss": 1.6986, "step": 5120 }, { "epoch": 1.5895592372616443, "grad_norm": 0.28515625, "learning_rate": 0.00013098113983684735, "loss": 1.3467, "step": 5121 }, { "epoch": 1.5898718349484215, "grad_norm": 0.279296875, "learning_rate": 0.000130957777255471, "loss": 1.6384, "step": 5122 }, { "epoch": 1.5901844326351986, "grad_norm": 0.27734375, "learning_rate": 0.00013093441280513415, "loss": 1.4282, "step": 5123 }, { "epoch": 1.5904970303219756, "grad_norm": 0.283203125, "learning_rate": 0.00013091104648724745, "loss": 1.5334, "step": 5124 }, { "epoch": 1.5908096280087527, "grad_norm": 0.279296875, "learning_rate": 0.00013088767830322145, "loss": 1.5452, "step": 5125 }, { "epoch": 1.5911222256955297, "grad_norm": 0.283203125, "learning_rate": 0.00013086430825446694, "loss": 1.4513, "step": 5126 }, { "epoch": 1.591434823382307, "grad_norm": 0.259765625, "learning_rate": 0.0001308409363423948, "loss": 1.5732, "step": 5127 }, { "epoch": 1.591747421069084, "grad_norm": 0.275390625, "learning_rate": 0.00013081756256841604, "loss": 1.9518, "step": 5128 }, { "epoch": 1.5920600187558613, "grad_norm": 0.267578125, "learning_rate": 0.00013079418693394174, "loss": 1.9115, "step": 5129 }, { "epoch": 1.5923726164426384, "grad_norm": 0.275390625, "learning_rate": 0.00013077080944038318, "loss": 2.0819, "step": 5130 }, { "epoch": 1.5926852141294154, "grad_norm": 0.291015625, "learning_rate": 0.0001307474300891516, "loss": 1.6396, "step": 5131 }, { "epoch": 1.5929978118161925, "grad_norm": 0.26953125, "learning_rate": 0.00013072404888165852, "loss": 1.4752, "step": 5132 }, { "epoch": 1.5933104095029695, "grad_norm": 0.271484375, "learning_rate": 0.0001307006658193154, "loss": 1.5275, "step": 5133 }, { "epoch": 1.5936230071897468, "grad_norm": 0.291015625, "learning_rate": 0.00013067728090353402, "loss": 1.7718, "step": 5134 }, { "epoch": 1.5939356048765239, "grad_norm": 0.2734375, "learning_rate": 0.00013065389413572607, "loss": 1.3918, "step": 5135 }, { "epoch": 1.5942482025633011, "grad_norm": 0.27734375, "learning_rate": 0.00013063050551730351, "loss": 1.3909, "step": 5136 }, { "epoch": 1.5945608002500782, "grad_norm": 0.28125, "learning_rate": 0.00013060711504967823, "loss": 1.6993, "step": 5137 }, { "epoch": 1.5948733979368552, "grad_norm": 0.283203125, "learning_rate": 0.00013058372273426247, "loss": 1.6457, "step": 5138 }, { "epoch": 1.5951859956236323, "grad_norm": 0.291015625, "learning_rate": 0.00013056032857246836, "loss": 1.4885, "step": 5139 }, { "epoch": 1.5954985933104096, "grad_norm": 0.2734375, "learning_rate": 0.0001305369325657083, "loss": 1.5207, "step": 5140 }, { "epoch": 1.5958111909971866, "grad_norm": 0.275390625, "learning_rate": 0.00013051353471539465, "loss": 1.6183, "step": 5141 }, { "epoch": 1.5961237886839639, "grad_norm": 0.291015625, "learning_rate": 0.0001304901350229401, "loss": 1.3675, "step": 5142 }, { "epoch": 1.596436386370741, "grad_norm": 0.2890625, "learning_rate": 0.0001304667334897572, "loss": 1.4596, "step": 5143 }, { "epoch": 1.596748984057518, "grad_norm": 0.298828125, "learning_rate": 0.00013044333011725878, "loss": 1.5626, "step": 5144 }, { "epoch": 1.597061581744295, "grad_norm": 0.28515625, "learning_rate": 0.00013041992490685773, "loss": 1.4892, "step": 5145 }, { "epoch": 1.597374179431072, "grad_norm": 0.28125, "learning_rate": 0.00013039651785996706, "loss": 1.3213, "step": 5146 }, { "epoch": 1.5976867771178493, "grad_norm": 0.283203125, "learning_rate": 0.00013037310897799986, "loss": 1.3564, "step": 5147 }, { "epoch": 1.5979993748046264, "grad_norm": 0.30859375, "learning_rate": 0.00013034969826236937, "loss": 1.6341, "step": 5148 }, { "epoch": 1.5983119724914037, "grad_norm": 0.279296875, "learning_rate": 0.0001303262857144889, "loss": 1.611, "step": 5149 }, { "epoch": 1.5986245701781807, "grad_norm": 0.2734375, "learning_rate": 0.00013030287133577195, "loss": 1.4833, "step": 5150 }, { "epoch": 1.5989371678649578, "grad_norm": 0.291015625, "learning_rate": 0.00013027945512763202, "loss": 1.4268, "step": 5151 }, { "epoch": 1.5992497655517348, "grad_norm": 0.287109375, "learning_rate": 0.0001302560370914828, "loss": 1.5177, "step": 5152 }, { "epoch": 1.599562363238512, "grad_norm": 0.28125, "learning_rate": 0.00013023261722873807, "loss": 1.6612, "step": 5153 }, { "epoch": 1.5998749609252891, "grad_norm": 0.28515625, "learning_rate": 0.00013020919554081173, "loss": 1.698, "step": 5154 }, { "epoch": 1.6001875586120664, "grad_norm": 0.275390625, "learning_rate": 0.00013018577202911775, "loss": 1.5155, "step": 5155 }, { "epoch": 1.6005001562988435, "grad_norm": 0.30078125, "learning_rate": 0.00013016234669507024, "loss": 1.5116, "step": 5156 }, { "epoch": 1.6008127539856205, "grad_norm": 0.2890625, "learning_rate": 0.00013013891954008342, "loss": 1.55, "step": 5157 }, { "epoch": 1.6011253516723976, "grad_norm": 0.287109375, "learning_rate": 0.00013011549056557163, "loss": 1.5349, "step": 5158 }, { "epoch": 1.6014379493591746, "grad_norm": 0.28515625, "learning_rate": 0.00013009205977294926, "loss": 1.561, "step": 5159 }, { "epoch": 1.6017505470459519, "grad_norm": 0.2890625, "learning_rate": 0.00013006862716363098, "loss": 1.6057, "step": 5160 }, { "epoch": 1.602063144732729, "grad_norm": 0.275390625, "learning_rate": 0.0001300451927390313, "loss": 1.3553, "step": 5161 }, { "epoch": 1.6023757424195062, "grad_norm": 0.271484375, "learning_rate": 0.00013002175650056504, "loss": 1.7408, "step": 5162 }, { "epoch": 1.6026883401062832, "grad_norm": 0.283203125, "learning_rate": 0.0001299983184496471, "loss": 1.5022, "step": 5163 }, { "epoch": 1.6030009377930603, "grad_norm": 0.27734375, "learning_rate": 0.00012997487858769244, "loss": 1.6159, "step": 5164 }, { "epoch": 1.6033135354798373, "grad_norm": 0.2890625, "learning_rate": 0.00012995143691611616, "loss": 1.3795, "step": 5165 }, { "epoch": 1.6036261331666146, "grad_norm": 0.27734375, "learning_rate": 0.0001299279934363335, "loss": 1.6353, "step": 5166 }, { "epoch": 1.6039387308533917, "grad_norm": 0.2734375, "learning_rate": 0.0001299045481497597, "loss": 1.6614, "step": 5167 }, { "epoch": 1.604251328540169, "grad_norm": 0.29296875, "learning_rate": 0.00012988110105781024, "loss": 1.5381, "step": 5168 }, { "epoch": 1.604563926226946, "grad_norm": 0.28515625, "learning_rate": 0.0001298576521619006, "loss": 1.5451, "step": 5169 }, { "epoch": 1.604876523913723, "grad_norm": 0.298828125, "learning_rate": 0.0001298342014634465, "loss": 1.6875, "step": 5170 }, { "epoch": 1.6051891216005, "grad_norm": 0.28515625, "learning_rate": 0.00012981074896386362, "loss": 1.6455, "step": 5171 }, { "epoch": 1.6055017192872771, "grad_norm": 0.279296875, "learning_rate": 0.00012978729466456783, "loss": 1.6731, "step": 5172 }, { "epoch": 1.6058143169740544, "grad_norm": 0.271484375, "learning_rate": 0.0001297638385669751, "loss": 1.2996, "step": 5173 }, { "epoch": 1.6061269146608315, "grad_norm": 0.302734375, "learning_rate": 0.0001297403806725015, "loss": 1.5165, "step": 5174 }, { "epoch": 1.6064395123476087, "grad_norm": 0.275390625, "learning_rate": 0.00012971692098256323, "loss": 1.585, "step": 5175 }, { "epoch": 1.6067521100343858, "grad_norm": 0.302734375, "learning_rate": 0.0001296934594985766, "loss": 1.3452, "step": 5176 }, { "epoch": 1.6070647077211628, "grad_norm": 0.27734375, "learning_rate": 0.00012966999622195794, "loss": 1.5152, "step": 5177 }, { "epoch": 1.6073773054079399, "grad_norm": 0.2890625, "learning_rate": 0.00012964653115412383, "loss": 1.3423, "step": 5178 }, { "epoch": 1.6076899030947172, "grad_norm": 0.28125, "learning_rate": 0.00012962306429649084, "loss": 1.5529, "step": 5179 }, { "epoch": 1.6080025007814942, "grad_norm": 0.271484375, "learning_rate": 0.0001295995956504757, "loss": 1.6913, "step": 5180 }, { "epoch": 1.6083150984682715, "grad_norm": 0.26953125, "learning_rate": 0.00012957612521749523, "loss": 1.7764, "step": 5181 }, { "epoch": 1.6086276961550485, "grad_norm": 0.283203125, "learning_rate": 0.00012955265299896646, "loss": 1.4887, "step": 5182 }, { "epoch": 1.6089402938418256, "grad_norm": 0.27734375, "learning_rate": 0.00012952917899630633, "loss": 1.4849, "step": 5183 }, { "epoch": 1.6092528915286026, "grad_norm": 0.275390625, "learning_rate": 0.00012950570321093206, "loss": 1.6065, "step": 5184 }, { "epoch": 1.6095654892153797, "grad_norm": 0.287109375, "learning_rate": 0.00012948222564426084, "loss": 1.5114, "step": 5185 }, { "epoch": 1.609878086902157, "grad_norm": 0.283203125, "learning_rate": 0.00012945874629771012, "loss": 1.3768, "step": 5186 }, { "epoch": 1.610190684588934, "grad_norm": 0.279296875, "learning_rate": 0.00012943526517269734, "loss": 1.3997, "step": 5187 }, { "epoch": 1.6105032822757113, "grad_norm": 0.27734375, "learning_rate": 0.00012941178227064007, "loss": 1.6198, "step": 5188 }, { "epoch": 1.6108158799624883, "grad_norm": 0.271484375, "learning_rate": 0.00012938829759295606, "loss": 1.3631, "step": 5189 }, { "epoch": 1.6111284776492654, "grad_norm": 0.275390625, "learning_rate": 0.00012936481114106307, "loss": 1.4819, "step": 5190 }, { "epoch": 1.6114410753360424, "grad_norm": 0.287109375, "learning_rate": 0.000129341322916379, "loss": 1.4377, "step": 5191 }, { "epoch": 1.6117536730228195, "grad_norm": 0.271484375, "learning_rate": 0.00012931783292032187, "loss": 1.3551, "step": 5192 }, { "epoch": 1.6120662707095967, "grad_norm": 0.283203125, "learning_rate": 0.0001292943411543098, "loss": 1.5592, "step": 5193 }, { "epoch": 1.612378868396374, "grad_norm": 0.26953125, "learning_rate": 0.00012927084761976104, "loss": 1.6148, "step": 5194 }, { "epoch": 1.612691466083151, "grad_norm": 0.26953125, "learning_rate": 0.0001292473523180939, "loss": 1.5892, "step": 5195 }, { "epoch": 1.613004063769928, "grad_norm": 0.283203125, "learning_rate": 0.00012922385525072685, "loss": 1.5989, "step": 5196 }, { "epoch": 1.6133166614567052, "grad_norm": 0.291015625, "learning_rate": 0.00012920035641907838, "loss": 1.7232, "step": 5197 }, { "epoch": 1.6136292591434822, "grad_norm": 0.275390625, "learning_rate": 0.00012917685582456722, "loss": 1.6316, "step": 5198 }, { "epoch": 1.6139418568302595, "grad_norm": 0.271484375, "learning_rate": 0.0001291533534686121, "loss": 1.3499, "step": 5199 }, { "epoch": 1.6142544545170365, "grad_norm": 0.26171875, "learning_rate": 0.00012912984935263184, "loss": 1.6058, "step": 5200 }, { "epoch": 1.6145670522038138, "grad_norm": 0.294921875, "learning_rate": 0.0001291063434780455, "loss": 1.2795, "step": 5201 }, { "epoch": 1.6148796498905909, "grad_norm": 0.294921875, "learning_rate": 0.0001290828358462721, "loss": 1.438, "step": 5202 }, { "epoch": 1.615192247577368, "grad_norm": 0.275390625, "learning_rate": 0.0001290593264587308, "loss": 1.5131, "step": 5203 }, { "epoch": 1.615504845264145, "grad_norm": 0.3203125, "learning_rate": 0.00012903581531684098, "loss": 2.1409, "step": 5204 }, { "epoch": 1.615817442950922, "grad_norm": 0.263671875, "learning_rate": 0.00012901230242202193, "loss": 1.5455, "step": 5205 }, { "epoch": 1.6161300406376993, "grad_norm": 0.283203125, "learning_rate": 0.00012898878777569328, "loss": 1.4208, "step": 5206 }, { "epoch": 1.6164426383244765, "grad_norm": 0.2890625, "learning_rate": 0.00012896527137927453, "loss": 1.6268, "step": 5207 }, { "epoch": 1.6167552360112536, "grad_norm": 0.2890625, "learning_rate": 0.00012894175323418546, "loss": 1.524, "step": 5208 }, { "epoch": 1.6170678336980306, "grad_norm": 0.310546875, "learning_rate": 0.00012891823334184585, "loss": 1.6437, "step": 5209 }, { "epoch": 1.6173804313848077, "grad_norm": 0.2734375, "learning_rate": 0.00012889471170367565, "loss": 1.4686, "step": 5210 }, { "epoch": 1.6176930290715847, "grad_norm": 0.275390625, "learning_rate": 0.00012887118832109485, "loss": 1.5133, "step": 5211 }, { "epoch": 1.618005626758362, "grad_norm": 0.283203125, "learning_rate": 0.00012884766319552367, "loss": 1.5129, "step": 5212 }, { "epoch": 1.618318224445139, "grad_norm": 0.283203125, "learning_rate": 0.00012882413632838227, "loss": 1.5047, "step": 5213 }, { "epoch": 1.6186308221319163, "grad_norm": 0.34765625, "learning_rate": 0.00012880060772109105, "loss": 2.2321, "step": 5214 }, { "epoch": 1.6189434198186934, "grad_norm": 0.283203125, "learning_rate": 0.00012877707737507044, "loss": 1.5715, "step": 5215 }, { "epoch": 1.6192560175054704, "grad_norm": 0.275390625, "learning_rate": 0.000128753545291741, "loss": 1.6164, "step": 5216 }, { "epoch": 1.6195686151922475, "grad_norm": 0.294921875, "learning_rate": 0.00012873001147252334, "loss": 1.6011, "step": 5217 }, { "epoch": 1.6198812128790245, "grad_norm": 0.275390625, "learning_rate": 0.00012870647591883833, "loss": 1.4171, "step": 5218 }, { "epoch": 1.6201938105658018, "grad_norm": 0.283203125, "learning_rate": 0.0001286829386321068, "loss": 1.6437, "step": 5219 }, { "epoch": 1.620506408252579, "grad_norm": 0.279296875, "learning_rate": 0.00012865939961374969, "loss": 1.6432, "step": 5220 }, { "epoch": 1.6208190059393561, "grad_norm": 0.28515625, "learning_rate": 0.00012863585886518808, "loss": 1.8146, "step": 5221 }, { "epoch": 1.6211316036261332, "grad_norm": 0.279296875, "learning_rate": 0.0001286123163878432, "loss": 1.6181, "step": 5222 }, { "epoch": 1.6214442013129102, "grad_norm": 0.2734375, "learning_rate": 0.0001285887721831363, "loss": 1.7212, "step": 5223 }, { "epoch": 1.6217567989996873, "grad_norm": 0.291015625, "learning_rate": 0.00012856522625248883, "loss": 1.4609, "step": 5224 }, { "epoch": 1.6220693966864645, "grad_norm": 0.2734375, "learning_rate": 0.00012854167859732222, "loss": 1.6521, "step": 5225 }, { "epoch": 1.6223819943732416, "grad_norm": 0.27734375, "learning_rate": 0.00012851812921905813, "loss": 1.6265, "step": 5226 }, { "epoch": 1.6226945920600189, "grad_norm": 0.283203125, "learning_rate": 0.0001284945781191182, "loss": 1.3805, "step": 5227 }, { "epoch": 1.623007189746796, "grad_norm": 0.2734375, "learning_rate": 0.00012847102529892432, "loss": 1.5773, "step": 5228 }, { "epoch": 1.623319787433573, "grad_norm": 0.27734375, "learning_rate": 0.00012844747075989833, "loss": 1.3183, "step": 5229 }, { "epoch": 1.62363238512035, "grad_norm": 0.2734375, "learning_rate": 0.00012842391450346228, "loss": 1.6123, "step": 5230 }, { "epoch": 1.623944982807127, "grad_norm": 0.27734375, "learning_rate": 0.0001284003565310383, "loss": 1.763, "step": 5231 }, { "epoch": 1.6242575804939043, "grad_norm": 0.3046875, "learning_rate": 0.00012837679684404862, "loss": 1.5674, "step": 5232 }, { "epoch": 1.6245701781806816, "grad_norm": 0.271484375, "learning_rate": 0.00012835323544391553, "loss": 1.6564, "step": 5233 }, { "epoch": 1.6248827758674587, "grad_norm": 0.271484375, "learning_rate": 0.0001283296723320615, "loss": 1.7379, "step": 5234 }, { "epoch": 1.6251953735542357, "grad_norm": 0.2890625, "learning_rate": 0.00012830610750990906, "loss": 1.4359, "step": 5235 }, { "epoch": 1.6255079712410128, "grad_norm": 0.283203125, "learning_rate": 0.00012828254097888082, "loss": 1.461, "step": 5236 }, { "epoch": 1.6258205689277898, "grad_norm": 0.2890625, "learning_rate": 0.00012825897274039956, "loss": 1.5576, "step": 5237 }, { "epoch": 1.626133166614567, "grad_norm": 0.271484375, "learning_rate": 0.00012823540279588807, "loss": 1.9163, "step": 5238 }, { "epoch": 1.6264457643013441, "grad_norm": 0.265625, "learning_rate": 0.00012821183114676937, "loss": 1.4001, "step": 5239 }, { "epoch": 1.6267583619881214, "grad_norm": 0.294921875, "learning_rate": 0.00012818825779446644, "loss": 1.5625, "step": 5240 }, { "epoch": 1.6270709596748985, "grad_norm": 0.298828125, "learning_rate": 0.00012816468274040246, "loss": 1.6887, "step": 5241 }, { "epoch": 1.6273835573616755, "grad_norm": 0.28125, "learning_rate": 0.00012814110598600073, "loss": 1.3923, "step": 5242 }, { "epoch": 1.6276961550484526, "grad_norm": 0.294921875, "learning_rate": 0.00012811752753268455, "loss": 1.4533, "step": 5243 }, { "epoch": 1.6280087527352296, "grad_norm": 0.279296875, "learning_rate": 0.00012809394738187742, "loss": 1.2255, "step": 5244 }, { "epoch": 1.6283213504220069, "grad_norm": 0.27734375, "learning_rate": 0.00012807036553500286, "loss": 1.4549, "step": 5245 }, { "epoch": 1.6286339481087841, "grad_norm": 0.287109375, "learning_rate": 0.00012804678199348457, "loss": 1.4125, "step": 5246 }, { "epoch": 1.6289465457955612, "grad_norm": 0.267578125, "learning_rate": 0.00012802319675874632, "loss": 1.7342, "step": 5247 }, { "epoch": 1.6292591434823382, "grad_norm": 0.2734375, "learning_rate": 0.00012799960983221197, "loss": 1.3691, "step": 5248 }, { "epoch": 1.6295717411691153, "grad_norm": 0.279296875, "learning_rate": 0.0001279760212153055, "loss": 1.5872, "step": 5249 }, { "epoch": 1.6298843388558923, "grad_norm": 0.294921875, "learning_rate": 0.00012795243090945094, "loss": 1.7517, "step": 5250 }, { "epoch": 1.6301969365426696, "grad_norm": 0.3125, "learning_rate": 0.00012792883891607257, "loss": 1.5518, "step": 5251 }, { "epoch": 1.6305095342294467, "grad_norm": 0.263671875, "learning_rate": 0.00012790524523659458, "loss": 1.6365, "step": 5252 }, { "epoch": 1.630822131916224, "grad_norm": 0.283203125, "learning_rate": 0.00012788164987244133, "loss": 1.6613, "step": 5253 }, { "epoch": 1.631134729603001, "grad_norm": 0.29296875, "learning_rate": 0.0001278580528250374, "loss": 1.6701, "step": 5254 }, { "epoch": 1.631447327289778, "grad_norm": 0.326171875, "learning_rate": 0.00012783445409580733, "loss": 2.2097, "step": 5255 }, { "epoch": 1.631759924976555, "grad_norm": 0.28125, "learning_rate": 0.00012781085368617574, "loss": 1.5379, "step": 5256 }, { "epoch": 1.6320725226633321, "grad_norm": 0.287109375, "learning_rate": 0.00012778725159756752, "loss": 1.5629, "step": 5257 }, { "epoch": 1.6323851203501094, "grad_norm": 0.283203125, "learning_rate": 0.0001277636478314075, "loss": 1.5427, "step": 5258 }, { "epoch": 1.6326977180368867, "grad_norm": 0.2734375, "learning_rate": 0.00012774004238912066, "loss": 1.4046, "step": 5259 }, { "epoch": 1.6330103157236637, "grad_norm": 0.275390625, "learning_rate": 0.00012771643527213213, "loss": 1.405, "step": 5260 }, { "epoch": 1.6333229134104408, "grad_norm": 0.279296875, "learning_rate": 0.0001276928264818671, "loss": 1.6566, "step": 5261 }, { "epoch": 1.6336355110972178, "grad_norm": 0.283203125, "learning_rate": 0.00012766921601975082, "loss": 1.3281, "step": 5262 }, { "epoch": 1.6339481087839949, "grad_norm": 0.28125, "learning_rate": 0.00012764560388720873, "loss": 1.5187, "step": 5263 }, { "epoch": 1.6342607064707722, "grad_norm": 0.287109375, "learning_rate": 0.00012762199008566627, "loss": 1.47, "step": 5264 }, { "epoch": 1.6345733041575492, "grad_norm": 0.283203125, "learning_rate": 0.0001275983746165491, "loss": 1.565, "step": 5265 }, { "epoch": 1.6348859018443265, "grad_norm": 0.2734375, "learning_rate": 0.00012757475748128287, "loss": 1.6159, "step": 5266 }, { "epoch": 1.6351984995311035, "grad_norm": 0.291015625, "learning_rate": 0.0001275511386812934, "loss": 1.446, "step": 5267 }, { "epoch": 1.6355110972178806, "grad_norm": 0.27734375, "learning_rate": 0.00012752751821800657, "loss": 1.4458, "step": 5268 }, { "epoch": 1.6358236949046576, "grad_norm": 0.26953125, "learning_rate": 0.0001275038960928484, "loss": 1.6677, "step": 5269 }, { "epoch": 1.6361362925914347, "grad_norm": 0.27734375, "learning_rate": 0.00012748027230724497, "loss": 1.7076, "step": 5270 }, { "epoch": 1.636448890278212, "grad_norm": 0.294921875, "learning_rate": 0.0001274566468626225, "loss": 1.4324, "step": 5271 }, { "epoch": 1.6367614879649892, "grad_norm": 0.28125, "learning_rate": 0.00012743301976040722, "loss": 1.3423, "step": 5272 }, { "epoch": 1.6370740856517663, "grad_norm": 0.275390625, "learning_rate": 0.00012740939100202564, "loss": 1.5177, "step": 5273 }, { "epoch": 1.6373866833385433, "grad_norm": 0.263671875, "learning_rate": 0.00012738576058890413, "loss": 1.4188, "step": 5274 }, { "epoch": 1.6376992810253204, "grad_norm": 0.271484375, "learning_rate": 0.0001273621285224694, "loss": 1.4759, "step": 5275 }, { "epoch": 1.6380118787120974, "grad_norm": 0.279296875, "learning_rate": 0.00012733849480414807, "loss": 1.5118, "step": 5276 }, { "epoch": 1.6383244763988747, "grad_norm": 0.294921875, "learning_rate": 0.00012731485943536704, "loss": 1.3826, "step": 5277 }, { "epoch": 1.6386370740856517, "grad_norm": 0.294921875, "learning_rate": 0.0001272912224175531, "loss": 1.417, "step": 5278 }, { "epoch": 1.638949671772429, "grad_norm": 0.27734375, "learning_rate": 0.00012726758375213327, "loss": 1.4968, "step": 5279 }, { "epoch": 1.639262269459206, "grad_norm": 0.28125, "learning_rate": 0.00012724394344053465, "loss": 1.4183, "step": 5280 }, { "epoch": 1.639574867145983, "grad_norm": 0.279296875, "learning_rate": 0.00012722030148418448, "loss": 1.573, "step": 5281 }, { "epoch": 1.6398874648327602, "grad_norm": 0.267578125, "learning_rate": 0.00012719665788451, "loss": 1.5447, "step": 5282 }, { "epoch": 1.6402000625195372, "grad_norm": 0.28125, "learning_rate": 0.00012717301264293865, "loss": 1.6017, "step": 5283 }, { "epoch": 1.6405126602063145, "grad_norm": 0.29296875, "learning_rate": 0.0001271493657608979, "loss": 1.5853, "step": 5284 }, { "epoch": 1.6408252578930917, "grad_norm": 0.2890625, "learning_rate": 0.00012712571723981532, "loss": 1.737, "step": 5285 }, { "epoch": 1.6411378555798688, "grad_norm": 0.29296875, "learning_rate": 0.00012710206708111863, "loss": 1.4768, "step": 5286 }, { "epoch": 1.6414504532666458, "grad_norm": 0.2734375, "learning_rate": 0.0001270784152862356, "loss": 1.5415, "step": 5287 }, { "epoch": 1.641763050953423, "grad_norm": 0.28515625, "learning_rate": 0.00012705476185659412, "loss": 1.4512, "step": 5288 }, { "epoch": 1.6420756486402, "grad_norm": 0.294921875, "learning_rate": 0.00012703110679362226, "loss": 1.4359, "step": 5289 }, { "epoch": 1.6423882463269772, "grad_norm": 0.2890625, "learning_rate": 0.00012700745009874799, "loss": 2.0473, "step": 5290 }, { "epoch": 1.6427008440137543, "grad_norm": 0.27734375, "learning_rate": 0.00012698379177339956, "loss": 1.7714, "step": 5291 }, { "epoch": 1.6430134417005315, "grad_norm": 0.380859375, "learning_rate": 0.00012696013181900522, "loss": 2.3856, "step": 5292 }, { "epoch": 1.6433260393873086, "grad_norm": 0.29296875, "learning_rate": 0.00012693647023699335, "loss": 1.3935, "step": 5293 }, { "epoch": 1.6436386370740856, "grad_norm": 0.267578125, "learning_rate": 0.00012691280702879247, "loss": 1.5267, "step": 5294 }, { "epoch": 1.6439512347608627, "grad_norm": 0.28125, "learning_rate": 0.00012688914219583116, "loss": 1.7312, "step": 5295 }, { "epoch": 1.6442638324476397, "grad_norm": 0.283203125, "learning_rate": 0.00012686547573953803, "loss": 1.5775, "step": 5296 }, { "epoch": 1.644576430134417, "grad_norm": 0.37890625, "learning_rate": 0.0001268418076613419, "loss": 1.9265, "step": 5297 }, { "epoch": 1.6448890278211943, "grad_norm": 0.287109375, "learning_rate": 0.00012681813796267162, "loss": 1.5896, "step": 5298 }, { "epoch": 1.6452016255079713, "grad_norm": 0.26953125, "learning_rate": 0.00012679446664495622, "loss": 1.5002, "step": 5299 }, { "epoch": 1.6455142231947484, "grad_norm": 0.2890625, "learning_rate": 0.00012677079370962467, "loss": 1.3045, "step": 5300 }, { "epoch": 1.6458268208815254, "grad_norm": 0.291015625, "learning_rate": 0.00012674711915810626, "loss": 1.4689, "step": 5301 }, { "epoch": 1.6461394185683025, "grad_norm": 0.2890625, "learning_rate": 0.00012672344299183012, "loss": 1.4018, "step": 5302 }, { "epoch": 1.6464520162550798, "grad_norm": 0.283203125, "learning_rate": 0.0001266997652122257, "loss": 1.5373, "step": 5303 }, { "epoch": 1.6467646139418568, "grad_norm": 0.271484375, "learning_rate": 0.0001266760858207224, "loss": 1.5459, "step": 5304 }, { "epoch": 1.647077211628634, "grad_norm": 0.275390625, "learning_rate": 0.00012665240481874986, "loss": 1.7597, "step": 5305 }, { "epoch": 1.6473898093154111, "grad_norm": 0.28515625, "learning_rate": 0.00012662872220773762, "loss": 1.628, "step": 5306 }, { "epoch": 1.6477024070021882, "grad_norm": 0.27734375, "learning_rate": 0.00012660503798911555, "loss": 1.5585, "step": 5307 }, { "epoch": 1.6480150046889652, "grad_norm": 0.279296875, "learning_rate": 0.0001265813521643134, "loss": 1.5892, "step": 5308 }, { "epoch": 1.6483276023757423, "grad_norm": 0.294921875, "learning_rate": 0.00012655766473476115, "loss": 1.54, "step": 5309 }, { "epoch": 1.6486402000625195, "grad_norm": 0.287109375, "learning_rate": 0.00012653397570188882, "loss": 1.9819, "step": 5310 }, { "epoch": 1.6489527977492968, "grad_norm": 0.275390625, "learning_rate": 0.0001265102850671266, "loss": 1.4561, "step": 5311 }, { "epoch": 1.6492653954360739, "grad_norm": 0.2734375, "learning_rate": 0.00012648659283190464, "loss": 1.5799, "step": 5312 }, { "epoch": 1.649577993122851, "grad_norm": 0.294921875, "learning_rate": 0.00012646289899765338, "loss": 1.3482, "step": 5313 }, { "epoch": 1.649890590809628, "grad_norm": 0.291015625, "learning_rate": 0.00012643920356580313, "loss": 1.8221, "step": 5314 }, { "epoch": 1.650203188496405, "grad_norm": 0.30078125, "learning_rate": 0.00012641550653778448, "loss": 1.4486, "step": 5315 }, { "epoch": 1.6505157861831823, "grad_norm": 0.306640625, "learning_rate": 0.00012639180791502804, "loss": 1.445, "step": 5316 }, { "epoch": 1.6508283838699593, "grad_norm": 0.283203125, "learning_rate": 0.00012636810769896454, "loss": 1.48, "step": 5317 }, { "epoch": 1.6511409815567366, "grad_norm": 0.283203125, "learning_rate": 0.00012634440589102478, "loss": 1.4193, "step": 5318 }, { "epoch": 1.6514535792435137, "grad_norm": 0.296875, "learning_rate": 0.00012632070249263969, "loss": 1.7591, "step": 5319 }, { "epoch": 1.6517661769302907, "grad_norm": 0.287109375, "learning_rate": 0.0001262969975052402, "loss": 1.5557, "step": 5320 }, { "epoch": 1.6520787746170678, "grad_norm": 0.27734375, "learning_rate": 0.00012627329093025747, "loss": 1.6633, "step": 5321 }, { "epoch": 1.6523913723038448, "grad_norm": 0.275390625, "learning_rate": 0.00012624958276912266, "loss": 1.6311, "step": 5322 }, { "epoch": 1.652703969990622, "grad_norm": 0.2890625, "learning_rate": 0.00012622587302326714, "loss": 1.6457, "step": 5323 }, { "epoch": 1.6530165676773994, "grad_norm": 0.2734375, "learning_rate": 0.0001262021616941222, "loss": 1.5174, "step": 5324 }, { "epoch": 1.6533291653641764, "grad_norm": 0.271484375, "learning_rate": 0.00012617844878311943, "loss": 1.922, "step": 5325 }, { "epoch": 1.6536417630509535, "grad_norm": 0.28125, "learning_rate": 0.0001261547342916903, "loss": 1.6622, "step": 5326 }, { "epoch": 1.6539543607377305, "grad_norm": 0.279296875, "learning_rate": 0.00012613101822126654, "loss": 1.6665, "step": 5327 }, { "epoch": 1.6542669584245075, "grad_norm": 0.28125, "learning_rate": 0.00012610730057327992, "loss": 1.781, "step": 5328 }, { "epoch": 1.6545795561112848, "grad_norm": 0.287109375, "learning_rate": 0.00012608358134916228, "loss": 1.7471, "step": 5329 }, { "epoch": 1.6548921537980619, "grad_norm": 0.28515625, "learning_rate": 0.00012605986055034562, "loss": 1.6828, "step": 5330 }, { "epoch": 1.6552047514848391, "grad_norm": 0.275390625, "learning_rate": 0.00012603613817826193, "loss": 1.7583, "step": 5331 }, { "epoch": 1.6555173491716162, "grad_norm": 0.294921875, "learning_rate": 0.0001260124142343434, "loss": 1.5421, "step": 5332 }, { "epoch": 1.6558299468583932, "grad_norm": 0.296875, "learning_rate": 0.00012598868872002234, "loss": 1.4766, "step": 5333 }, { "epoch": 1.6561425445451703, "grad_norm": 0.287109375, "learning_rate": 0.00012596496163673097, "loss": 1.6682, "step": 5334 }, { "epoch": 1.6564551422319473, "grad_norm": 0.287109375, "learning_rate": 0.0001259412329859018, "loss": 2.1212, "step": 5335 }, { "epoch": 1.6567677399187246, "grad_norm": 0.283203125, "learning_rate": 0.00012591750276896732, "loss": 1.5356, "step": 5336 }, { "epoch": 1.6570803376055019, "grad_norm": 0.287109375, "learning_rate": 0.00012589377098736019, "loss": 1.4372, "step": 5337 }, { "epoch": 1.657392935292279, "grad_norm": 0.279296875, "learning_rate": 0.0001258700376425131, "loss": 1.4218, "step": 5338 }, { "epoch": 1.657705532979056, "grad_norm": 0.3046875, "learning_rate": 0.00012584630273585886, "loss": 1.753, "step": 5339 }, { "epoch": 1.658018130665833, "grad_norm": 0.287109375, "learning_rate": 0.0001258225662688304, "loss": 1.2731, "step": 5340 }, { "epoch": 1.65833072835261, "grad_norm": 0.26953125, "learning_rate": 0.0001257988282428607, "loss": 1.879, "step": 5341 }, { "epoch": 1.6586433260393874, "grad_norm": 0.28515625, "learning_rate": 0.00012577508865938288, "loss": 1.5619, "step": 5342 }, { "epoch": 1.6589559237261644, "grad_norm": 0.2890625, "learning_rate": 0.00012575134751983012, "loss": 1.514, "step": 5343 }, { "epoch": 1.6592685214129417, "grad_norm": 0.2890625, "learning_rate": 0.00012572760482563566, "loss": 1.4545, "step": 5344 }, { "epoch": 1.6595811190997187, "grad_norm": 0.2890625, "learning_rate": 0.00012570386057823293, "loss": 1.4924, "step": 5345 }, { "epoch": 1.6598937167864958, "grad_norm": 0.263671875, "learning_rate": 0.00012568011477905538, "loss": 1.4768, "step": 5346 }, { "epoch": 1.6602063144732728, "grad_norm": 0.283203125, "learning_rate": 0.00012565636742953656, "loss": 1.6362, "step": 5347 }, { "epoch": 1.6605189121600499, "grad_norm": 0.28515625, "learning_rate": 0.00012563261853111018, "loss": 1.4759, "step": 5348 }, { "epoch": 1.6608315098468271, "grad_norm": 0.29296875, "learning_rate": 0.00012560886808520992, "loss": 1.7087, "step": 5349 }, { "epoch": 1.6611441075336042, "grad_norm": 0.29296875, "learning_rate": 0.00012558511609326968, "loss": 1.6689, "step": 5350 }, { "epoch": 1.6614567052203815, "grad_norm": 0.26953125, "learning_rate": 0.00012556136255672339, "loss": 1.6328, "step": 5351 }, { "epoch": 1.6617693029071585, "grad_norm": 0.28515625, "learning_rate": 0.00012553760747700503, "loss": 1.5469, "step": 5352 }, { "epoch": 1.6620819005939356, "grad_norm": 0.27734375, "learning_rate": 0.00012551385085554878, "loss": 1.9411, "step": 5353 }, { "epoch": 1.6623944982807126, "grad_norm": 0.27734375, "learning_rate": 0.00012549009269378886, "loss": 1.6302, "step": 5354 }, { "epoch": 1.66270709596749, "grad_norm": 0.294921875, "learning_rate": 0.00012546633299315954, "loss": 1.3778, "step": 5355 }, { "epoch": 1.663019693654267, "grad_norm": 0.29296875, "learning_rate": 0.00012544257175509525, "loss": 1.6108, "step": 5356 }, { "epoch": 1.6633322913410442, "grad_norm": 0.28125, "learning_rate": 0.00012541880898103052, "loss": 1.4986, "step": 5357 }, { "epoch": 1.6636448890278213, "grad_norm": 0.2890625, "learning_rate": 0.00012539504467239985, "loss": 1.6511, "step": 5358 }, { "epoch": 1.6639574867145983, "grad_norm": 0.287109375, "learning_rate": 0.00012537127883063798, "loss": 1.7448, "step": 5359 }, { "epoch": 1.6642700844013754, "grad_norm": 0.31640625, "learning_rate": 0.00012534751145717969, "loss": 1.4229, "step": 5360 }, { "epoch": 1.6645826820881524, "grad_norm": 0.28515625, "learning_rate": 0.00012532374255345982, "loss": 1.8999, "step": 5361 }, { "epoch": 1.6648952797749297, "grad_norm": 0.27734375, "learning_rate": 0.00012529997212091336, "loss": 1.4283, "step": 5362 }, { "epoch": 1.6652078774617067, "grad_norm": 0.28125, "learning_rate": 0.00012527620016097532, "loss": 1.5923, "step": 5363 }, { "epoch": 1.665520475148484, "grad_norm": 0.27734375, "learning_rate": 0.00012525242667508089, "loss": 1.2956, "step": 5364 }, { "epoch": 1.665833072835261, "grad_norm": 0.29296875, "learning_rate": 0.00012522865166466528, "loss": 1.3285, "step": 5365 }, { "epoch": 1.666145670522038, "grad_norm": 0.2890625, "learning_rate": 0.0001252048751311638, "loss": 1.4168, "step": 5366 }, { "epoch": 1.6664582682088152, "grad_norm": 0.275390625, "learning_rate": 0.00012518109707601192, "loss": 1.4103, "step": 5367 }, { "epoch": 1.6667708658955924, "grad_norm": 0.28515625, "learning_rate": 0.0001251573175006451, "loss": 1.5798, "step": 5368 }, { "epoch": 1.6670834635823695, "grad_norm": 0.279296875, "learning_rate": 0.00012513353640649895, "loss": 1.488, "step": 5369 }, { "epoch": 1.6673960612691467, "grad_norm": 0.267578125, "learning_rate": 0.00012510975379500917, "loss": 1.2702, "step": 5370 }, { "epoch": 1.6677086589559238, "grad_norm": 0.30078125, "learning_rate": 0.00012508596966761163, "loss": 1.5452, "step": 5371 }, { "epoch": 1.6680212566427008, "grad_norm": 0.2734375, "learning_rate": 0.00012506218402574202, "loss": 1.4895, "step": 5372 }, { "epoch": 1.668333854329478, "grad_norm": 0.279296875, "learning_rate": 0.00012503839687083652, "loss": 1.7254, "step": 5373 }, { "epoch": 1.668646452016255, "grad_norm": 0.296875, "learning_rate": 0.00012501460820433103, "loss": 1.6656, "step": 5374 }, { "epoch": 1.6689590497030322, "grad_norm": 0.283203125, "learning_rate": 0.0001249908180276618, "loss": 1.7435, "step": 5375 }, { "epoch": 1.6692716473898093, "grad_norm": 0.2734375, "learning_rate": 0.00012496702634226504, "loss": 1.667, "step": 5376 }, { "epoch": 1.6695842450765865, "grad_norm": 0.291015625, "learning_rate": 0.00012494323314957712, "loss": 1.7495, "step": 5377 }, { "epoch": 1.6698968427633636, "grad_norm": 0.279296875, "learning_rate": 0.00012491943845103438, "loss": 1.4274, "step": 5378 }, { "epoch": 1.6702094404501406, "grad_norm": 0.283203125, "learning_rate": 0.00012489564224807338, "loss": 1.6359, "step": 5379 }, { "epoch": 1.6705220381369177, "grad_norm": 0.283203125, "learning_rate": 0.00012487184454213073, "loss": 1.5136, "step": 5380 }, { "epoch": 1.670834635823695, "grad_norm": 0.298828125, "learning_rate": 0.00012484804533464315, "loss": 1.4721, "step": 5381 }, { "epoch": 1.671147233510472, "grad_norm": 0.271484375, "learning_rate": 0.0001248242446270474, "loss": 1.6776, "step": 5382 }, { "epoch": 1.6714598311972493, "grad_norm": 0.275390625, "learning_rate": 0.00012480044242078043, "loss": 1.5605, "step": 5383 }, { "epoch": 1.6717724288840263, "grad_norm": 0.29296875, "learning_rate": 0.00012477663871727907, "loss": 1.3675, "step": 5384 }, { "epoch": 1.6720850265708034, "grad_norm": 0.279296875, "learning_rate": 0.0001247528335179805, "loss": 1.4221, "step": 5385 }, { "epoch": 1.6723976242575804, "grad_norm": 0.2734375, "learning_rate": 0.00012472902682432178, "loss": 1.4183, "step": 5386 }, { "epoch": 1.6727102219443575, "grad_norm": 0.30078125, "learning_rate": 0.00012470521863774023, "loss": 1.6643, "step": 5387 }, { "epoch": 1.6730228196311347, "grad_norm": 0.283203125, "learning_rate": 0.00012468140895967314, "loss": 1.5463, "step": 5388 }, { "epoch": 1.6733354173179118, "grad_norm": 0.26953125, "learning_rate": 0.00012465759779155798, "loss": 1.3038, "step": 5389 }, { "epoch": 1.673648015004689, "grad_norm": 0.27734375, "learning_rate": 0.00012463378513483215, "loss": 1.6816, "step": 5390 }, { "epoch": 1.6739606126914661, "grad_norm": 0.2734375, "learning_rate": 0.00012460997099093335, "loss": 1.8097, "step": 5391 }, { "epoch": 1.6742732103782432, "grad_norm": 0.28515625, "learning_rate": 0.00012458615536129925, "loss": 1.2823, "step": 5392 }, { "epoch": 1.6745858080650202, "grad_norm": 0.283203125, "learning_rate": 0.0001245623382473676, "loss": 1.3995, "step": 5393 }, { "epoch": 1.6748984057517975, "grad_norm": 0.275390625, "learning_rate": 0.0001245385196505763, "loss": 1.4217, "step": 5394 }, { "epoch": 1.6752110034385745, "grad_norm": 0.275390625, "learning_rate": 0.00012451469957236336, "loss": 1.466, "step": 5395 }, { "epoch": 1.6755236011253518, "grad_norm": 0.283203125, "learning_rate": 0.00012449087801416668, "loss": 1.4582, "step": 5396 }, { "epoch": 1.6758361988121289, "grad_norm": 0.296875, "learning_rate": 0.00012446705497742454, "loss": 1.6292, "step": 5397 }, { "epoch": 1.676148796498906, "grad_norm": 0.2890625, "learning_rate": 0.00012444323046357505, "loss": 1.6885, "step": 5398 }, { "epoch": 1.676461394185683, "grad_norm": 0.27734375, "learning_rate": 0.00012441940447405664, "loss": 1.5673, "step": 5399 }, { "epoch": 1.67677399187246, "grad_norm": 0.2734375, "learning_rate": 0.00012439557701030763, "loss": 1.3717, "step": 5400 }, { "epoch": 1.6770865895592373, "grad_norm": 0.283203125, "learning_rate": 0.00012437174807376658, "loss": 1.873, "step": 5401 }, { "epoch": 1.6773991872460143, "grad_norm": 0.294921875, "learning_rate": 0.00012434791766587205, "loss": 1.7381, "step": 5402 }, { "epoch": 1.6777117849327916, "grad_norm": 0.287109375, "learning_rate": 0.00012432408578806267, "loss": 1.4546, "step": 5403 }, { "epoch": 1.6780243826195687, "grad_norm": 0.291015625, "learning_rate": 0.00012430025244177727, "loss": 1.4063, "step": 5404 }, { "epoch": 1.6783369803063457, "grad_norm": 0.27734375, "learning_rate": 0.00012427641762845465, "loss": 1.4917, "step": 5405 }, { "epoch": 1.6786495779931228, "grad_norm": 0.283203125, "learning_rate": 0.00012425258134953371, "loss": 1.6743, "step": 5406 }, { "epoch": 1.6789621756798998, "grad_norm": 0.2734375, "learning_rate": 0.00012422874360645358, "loss": 1.7126, "step": 5407 }, { "epoch": 1.679274773366677, "grad_norm": 0.2734375, "learning_rate": 0.00012420490440065335, "loss": 1.4323, "step": 5408 }, { "epoch": 1.6795873710534543, "grad_norm": 0.294921875, "learning_rate": 0.00012418106373357217, "loss": 1.4172, "step": 5409 }, { "epoch": 1.6798999687402314, "grad_norm": 0.3046875, "learning_rate": 0.00012415722160664933, "loss": 1.8328, "step": 5410 }, { "epoch": 1.6802125664270084, "grad_norm": 0.275390625, "learning_rate": 0.00012413337802132424, "loss": 1.5253, "step": 5411 }, { "epoch": 1.6805251641137855, "grad_norm": 0.27734375, "learning_rate": 0.00012410953297903643, "loss": 1.4609, "step": 5412 }, { "epoch": 1.6808377618005625, "grad_norm": 0.275390625, "learning_rate": 0.00012408568648122531, "loss": 1.6043, "step": 5413 }, { "epoch": 1.6811503594873398, "grad_norm": 0.2890625, "learning_rate": 0.00012406183852933068, "loss": 1.4588, "step": 5414 }, { "epoch": 1.6814629571741169, "grad_norm": 0.279296875, "learning_rate": 0.00012403798912479216, "loss": 1.3241, "step": 5415 }, { "epoch": 1.6817755548608941, "grad_norm": 0.287109375, "learning_rate": 0.00012401413826904957, "loss": 1.2465, "step": 5416 }, { "epoch": 1.6820881525476712, "grad_norm": 0.2734375, "learning_rate": 0.0001239902859635429, "loss": 1.6052, "step": 5417 }, { "epoch": 1.6824007502344482, "grad_norm": 0.28125, "learning_rate": 0.00012396643220971207, "loss": 1.7932, "step": 5418 }, { "epoch": 1.6827133479212253, "grad_norm": 0.29296875, "learning_rate": 0.00012394257700899718, "loss": 1.3542, "step": 5419 }, { "epoch": 1.6830259456080023, "grad_norm": 0.271484375, "learning_rate": 0.00012391872036283843, "loss": 1.7026, "step": 5420 }, { "epoch": 1.6833385432947796, "grad_norm": 0.27734375, "learning_rate": 0.00012389486227267605, "loss": 1.519, "step": 5421 }, { "epoch": 1.6836511409815569, "grad_norm": 0.291015625, "learning_rate": 0.00012387100273995036, "loss": 1.4518, "step": 5422 }, { "epoch": 1.683963738668334, "grad_norm": 0.306640625, "learning_rate": 0.00012384714176610185, "loss": 1.7116, "step": 5423 }, { "epoch": 1.684276336355111, "grad_norm": 0.29296875, "learning_rate": 0.00012382327935257098, "loss": 1.656, "step": 5424 }, { "epoch": 1.684588934041888, "grad_norm": 0.2734375, "learning_rate": 0.00012379941550079836, "loss": 1.7991, "step": 5425 }, { "epoch": 1.684901531728665, "grad_norm": 0.30078125, "learning_rate": 0.00012377555021222473, "loss": 1.425, "step": 5426 }, { "epoch": 1.6852141294154424, "grad_norm": 0.263671875, "learning_rate": 0.00012375168348829085, "loss": 1.6756, "step": 5427 }, { "epoch": 1.6855267271022194, "grad_norm": 0.279296875, "learning_rate": 0.00012372781533043754, "loss": 1.5876, "step": 5428 }, { "epoch": 1.6858393247889967, "grad_norm": 0.287109375, "learning_rate": 0.00012370394574010577, "loss": 1.6633, "step": 5429 }, { "epoch": 1.6861519224757737, "grad_norm": 0.28125, "learning_rate": 0.00012368007471873663, "loss": 1.4921, "step": 5430 }, { "epoch": 1.6864645201625508, "grad_norm": 0.283203125, "learning_rate": 0.00012365620226777117, "loss": 1.5537, "step": 5431 }, { "epoch": 1.6867771178493278, "grad_norm": 0.291015625, "learning_rate": 0.00012363232838865065, "loss": 1.3548, "step": 5432 }, { "epoch": 1.6870897155361049, "grad_norm": 0.28515625, "learning_rate": 0.00012360845308281634, "loss": 1.4696, "step": 5433 }, { "epoch": 1.6874023132228821, "grad_norm": 0.287109375, "learning_rate": 0.00012358457635170965, "loss": 1.6547, "step": 5434 }, { "epoch": 1.6877149109096594, "grad_norm": 0.283203125, "learning_rate": 0.000123560698196772, "loss": 1.4953, "step": 5435 }, { "epoch": 1.6880275085964365, "grad_norm": 0.28515625, "learning_rate": 0.000123536818619445, "loss": 2.0835, "step": 5436 }, { "epoch": 1.6883401062832135, "grad_norm": 0.2890625, "learning_rate": 0.00012351293762117026, "loss": 1.351, "step": 5437 }, { "epoch": 1.6886527039699906, "grad_norm": 0.296875, "learning_rate": 0.00012348905520338953, "loss": 1.482, "step": 5438 }, { "epoch": 1.6889653016567676, "grad_norm": 0.27734375, "learning_rate": 0.0001234651713675446, "loss": 1.6435, "step": 5439 }, { "epoch": 1.6892778993435449, "grad_norm": 0.291015625, "learning_rate": 0.00012344128611507734, "loss": 1.4134, "step": 5440 }, { "epoch": 1.689590497030322, "grad_norm": 0.2734375, "learning_rate": 0.0001234173994474298, "loss": 1.5871, "step": 5441 }, { "epoch": 1.6899030947170992, "grad_norm": 0.283203125, "learning_rate": 0.00012339351136604403, "loss": 1.4284, "step": 5442 }, { "epoch": 1.6902156924038763, "grad_norm": 0.41015625, "learning_rate": 0.0001233696218723621, "loss": 2.1214, "step": 5443 }, { "epoch": 1.6905282900906533, "grad_norm": 0.306640625, "learning_rate": 0.00012334573096782638, "loss": 1.8422, "step": 5444 }, { "epoch": 1.6908408877774304, "grad_norm": 0.28125, "learning_rate": 0.00012332183865387908, "loss": 1.6021, "step": 5445 }, { "epoch": 1.6911534854642074, "grad_norm": 0.275390625, "learning_rate": 0.00012329794493196272, "loss": 1.5454, "step": 5446 }, { "epoch": 1.6914660831509847, "grad_norm": 0.298828125, "learning_rate": 0.00012327404980351972, "loss": 1.5053, "step": 5447 }, { "epoch": 1.691778680837762, "grad_norm": 0.29296875, "learning_rate": 0.00012325015326999269, "loss": 1.449, "step": 5448 }, { "epoch": 1.692091278524539, "grad_norm": 0.28515625, "learning_rate": 0.0001232262553328242, "loss": 1.4623, "step": 5449 }, { "epoch": 1.692403876211316, "grad_norm": 0.287109375, "learning_rate": 0.00012320235599345714, "loss": 1.6873, "step": 5450 }, { "epoch": 1.692716473898093, "grad_norm": 0.28515625, "learning_rate": 0.00012317845525333426, "loss": 1.7797, "step": 5451 }, { "epoch": 1.6930290715848701, "grad_norm": 0.29296875, "learning_rate": 0.0001231545531138985, "loss": 1.4857, "step": 5452 }, { "epoch": 1.6933416692716474, "grad_norm": 0.28515625, "learning_rate": 0.0001231306495765929, "loss": 1.6462, "step": 5453 }, { "epoch": 1.6936542669584245, "grad_norm": 0.283203125, "learning_rate": 0.0001231067446428605, "loss": 1.4372, "step": 5454 }, { "epoch": 1.6939668646452017, "grad_norm": 0.287109375, "learning_rate": 0.00012308283831414445, "loss": 1.2759, "step": 5455 }, { "epoch": 1.6942794623319788, "grad_norm": 0.265625, "learning_rate": 0.00012305893059188805, "loss": 1.3461, "step": 5456 }, { "epoch": 1.6945920600187558, "grad_norm": 0.294921875, "learning_rate": 0.0001230350214775346, "loss": 1.544, "step": 5457 }, { "epoch": 1.694904657705533, "grad_norm": 0.34765625, "learning_rate": 0.0001230111109725276, "loss": 2.1854, "step": 5458 }, { "epoch": 1.69521725539231, "grad_norm": 0.291015625, "learning_rate": 0.0001229871990783105, "loss": 1.6051, "step": 5459 }, { "epoch": 1.6955298530790872, "grad_norm": 0.283203125, "learning_rate": 0.00012296328579632687, "loss": 1.603, "step": 5460 }, { "epoch": 1.6958424507658645, "grad_norm": 0.306640625, "learning_rate": 0.0001229393711280204, "loss": 1.6455, "step": 5461 }, { "epoch": 1.6961550484526415, "grad_norm": 0.28125, "learning_rate": 0.0001229154550748349, "loss": 1.3636, "step": 5462 }, { "epoch": 1.6964676461394186, "grad_norm": 0.28125, "learning_rate": 0.00012289153763821417, "loss": 1.6271, "step": 5463 }, { "epoch": 1.6967802438261956, "grad_norm": 0.287109375, "learning_rate": 0.00012286761881960214, "loss": 1.7037, "step": 5464 }, { "epoch": 1.6970928415129727, "grad_norm": 0.291015625, "learning_rate": 0.00012284369862044283, "loss": 1.6694, "step": 5465 }, { "epoch": 1.69740543919975, "grad_norm": 0.294921875, "learning_rate": 0.0001228197770421803, "loss": 1.4603, "step": 5466 }, { "epoch": 1.697718036886527, "grad_norm": 0.2734375, "learning_rate": 0.00012279585408625875, "loss": 1.4662, "step": 5467 }, { "epoch": 1.6980306345733043, "grad_norm": 0.291015625, "learning_rate": 0.00012277192975412247, "loss": 1.3703, "step": 5468 }, { "epoch": 1.6983432322600813, "grad_norm": 0.2890625, "learning_rate": 0.00012274800404721574, "loss": 1.5891, "step": 5469 }, { "epoch": 1.6986558299468584, "grad_norm": 0.2890625, "learning_rate": 0.00012272407696698305, "loss": 1.8182, "step": 5470 }, { "epoch": 1.6989684276336354, "grad_norm": 0.275390625, "learning_rate": 0.00012270014851486886, "loss": 1.6121, "step": 5471 }, { "epoch": 1.6992810253204125, "grad_norm": 0.287109375, "learning_rate": 0.00012267621869231778, "loss": 1.3421, "step": 5472 }, { "epoch": 1.6995936230071897, "grad_norm": 0.283203125, "learning_rate": 0.00012265228750077447, "loss": 1.5966, "step": 5473 }, { "epoch": 1.699906220693967, "grad_norm": 0.283203125, "learning_rate": 0.0001226283549416837, "loss": 1.38, "step": 5474 }, { "epoch": 1.700218818380744, "grad_norm": 0.28125, "learning_rate": 0.00012260442101649031, "loss": 1.7053, "step": 5475 }, { "epoch": 1.7005314160675211, "grad_norm": 0.279296875, "learning_rate": 0.0001225804857266392, "loss": 1.483, "step": 5476 }, { "epoch": 1.7008440137542982, "grad_norm": 0.3046875, "learning_rate": 0.00012255654907357544, "loss": 1.6787, "step": 5477 }, { "epoch": 1.7011566114410752, "grad_norm": 0.28515625, "learning_rate": 0.00012253261105874404, "loss": 1.5571, "step": 5478 }, { "epoch": 1.7014692091278525, "grad_norm": 0.28125, "learning_rate": 0.00012250867168359016, "loss": 1.8794, "step": 5479 }, { "epoch": 1.7017818068146295, "grad_norm": 0.26953125, "learning_rate": 0.00012248473094955914, "loss": 1.4557, "step": 5480 }, { "epoch": 1.7020944045014068, "grad_norm": 0.27734375, "learning_rate": 0.00012246078885809624, "loss": 1.8304, "step": 5481 }, { "epoch": 1.7024070021881839, "grad_norm": 0.279296875, "learning_rate": 0.00012243684541064692, "loss": 1.7549, "step": 5482 }, { "epoch": 1.702719599874961, "grad_norm": 0.28515625, "learning_rate": 0.0001224129006086566, "loss": 1.728, "step": 5483 }, { "epoch": 1.703032197561738, "grad_norm": 0.287109375, "learning_rate": 0.00012238895445357094, "loss": 1.6094, "step": 5484 }, { "epoch": 1.703344795248515, "grad_norm": 0.2890625, "learning_rate": 0.00012236500694683556, "loss": 1.5115, "step": 5485 }, { "epoch": 1.7036573929352923, "grad_norm": 0.28125, "learning_rate": 0.00012234105808989621, "loss": 1.2878, "step": 5486 }, { "epoch": 1.7039699906220696, "grad_norm": 0.287109375, "learning_rate": 0.00012231710788419871, "loss": 1.3691, "step": 5487 }, { "epoch": 1.7042825883088466, "grad_norm": 0.296875, "learning_rate": 0.000122293156331189, "loss": 1.6046, "step": 5488 }, { "epoch": 1.7045951859956237, "grad_norm": 0.283203125, "learning_rate": 0.00012226920343231303, "loss": 1.3087, "step": 5489 }, { "epoch": 1.7049077836824007, "grad_norm": 0.27734375, "learning_rate": 0.00012224524918901685, "loss": 1.5949, "step": 5490 }, { "epoch": 1.7052203813691778, "grad_norm": 0.2734375, "learning_rate": 0.0001222212936027466, "loss": 1.404, "step": 5491 }, { "epoch": 1.705532979055955, "grad_norm": 0.2890625, "learning_rate": 0.0001221973366749486, "loss": 1.4464, "step": 5492 }, { "epoch": 1.705845576742732, "grad_norm": 0.294921875, "learning_rate": 0.00012217337840706905, "loss": 1.6028, "step": 5493 }, { "epoch": 1.7061581744295093, "grad_norm": 0.333984375, "learning_rate": 0.00012214941880055443, "loss": 1.9414, "step": 5494 }, { "epoch": 1.7064707721162864, "grad_norm": 0.2890625, "learning_rate": 0.00012212545785685115, "loss": 1.6799, "step": 5495 }, { "epoch": 1.7067833698030634, "grad_norm": 0.2734375, "learning_rate": 0.0001221014955774058, "loss": 1.4497, "step": 5496 }, { "epoch": 1.7070959674898405, "grad_norm": 0.28515625, "learning_rate": 0.00012207753196366496, "loss": 1.3977, "step": 5497 }, { "epoch": 1.7074085651766175, "grad_norm": 0.30078125, "learning_rate": 0.00012205356701707541, "loss": 1.5275, "step": 5498 }, { "epoch": 1.7077211628633948, "grad_norm": 0.29296875, "learning_rate": 0.00012202960073908389, "loss": 1.6336, "step": 5499 }, { "epoch": 1.708033760550172, "grad_norm": 0.298828125, "learning_rate": 0.00012200563313113733, "loss": 1.4646, "step": 5500 }, { "epoch": 1.7083463582369491, "grad_norm": 0.294921875, "learning_rate": 0.00012198166419468266, "loss": 1.5693, "step": 5501 }, { "epoch": 1.7086589559237262, "grad_norm": 0.267578125, "learning_rate": 0.00012195769393116692, "loss": 1.7618, "step": 5502 }, { "epoch": 1.7089715536105032, "grad_norm": 0.271484375, "learning_rate": 0.00012193372234203718, "loss": 1.634, "step": 5503 }, { "epoch": 1.7092841512972803, "grad_norm": 0.330078125, "learning_rate": 0.00012190974942874069, "loss": 2.2696, "step": 5504 }, { "epoch": 1.7095967489840576, "grad_norm": 0.287109375, "learning_rate": 0.00012188577519272468, "loss": 1.6976, "step": 5505 }, { "epoch": 1.7099093466708346, "grad_norm": 0.28125, "learning_rate": 0.00012186179963543655, "loss": 1.6067, "step": 5506 }, { "epoch": 1.7102219443576119, "grad_norm": 0.28125, "learning_rate": 0.00012183782275832374, "loss": 1.4573, "step": 5507 }, { "epoch": 1.710534542044389, "grad_norm": 0.287109375, "learning_rate": 0.00012181384456283374, "loss": 1.6158, "step": 5508 }, { "epoch": 1.710847139731166, "grad_norm": 0.3046875, "learning_rate": 0.00012178986505041412, "loss": 1.4864, "step": 5509 }, { "epoch": 1.711159737417943, "grad_norm": 0.275390625, "learning_rate": 0.0001217658842225126, "loss": 1.45, "step": 5510 }, { "epoch": 1.71147233510472, "grad_norm": 0.33203125, "learning_rate": 0.00012174190208057687, "loss": 2.2129, "step": 5511 }, { "epoch": 1.7117849327914973, "grad_norm": 0.294921875, "learning_rate": 0.00012171791862605488, "loss": 1.7555, "step": 5512 }, { "epoch": 1.7120975304782746, "grad_norm": 0.3359375, "learning_rate": 0.0001216939338603944, "loss": 2.254, "step": 5513 }, { "epoch": 1.7124101281650517, "grad_norm": 0.27734375, "learning_rate": 0.00012166994778504355, "loss": 1.5429, "step": 5514 }, { "epoch": 1.7127227258518287, "grad_norm": 0.275390625, "learning_rate": 0.0001216459604014503, "loss": 1.6032, "step": 5515 }, { "epoch": 1.7130353235386058, "grad_norm": 0.29296875, "learning_rate": 0.00012162197171106282, "loss": 1.3861, "step": 5516 }, { "epoch": 1.7133479212253828, "grad_norm": 0.26953125, "learning_rate": 0.00012159798171532937, "loss": 1.5142, "step": 5517 }, { "epoch": 1.71366051891216, "grad_norm": 0.287109375, "learning_rate": 0.00012157399041569826, "loss": 1.4573, "step": 5518 }, { "epoch": 1.7139731165989371, "grad_norm": 0.302734375, "learning_rate": 0.00012154999781361782, "loss": 1.5277, "step": 5519 }, { "epoch": 1.7142857142857144, "grad_norm": 0.29296875, "learning_rate": 0.0001215260039105366, "loss": 1.747, "step": 5520 }, { "epoch": 1.7145983119724915, "grad_norm": 0.267578125, "learning_rate": 0.00012150200870790305, "loss": 1.3299, "step": 5521 }, { "epoch": 1.7149109096592685, "grad_norm": 0.2890625, "learning_rate": 0.00012147801220716585, "loss": 1.4527, "step": 5522 }, { "epoch": 1.7152235073460456, "grad_norm": 0.271484375, "learning_rate": 0.00012145401440977366, "loss": 1.3729, "step": 5523 }, { "epoch": 1.7155361050328226, "grad_norm": 0.28515625, "learning_rate": 0.00012143001531717535, "loss": 1.4216, "step": 5524 }, { "epoch": 1.7158487027195999, "grad_norm": 0.3046875, "learning_rate": 0.00012140601493081964, "loss": 1.5151, "step": 5525 }, { "epoch": 1.7161613004063772, "grad_norm": 0.2734375, "learning_rate": 0.00012138201325215557, "loss": 1.671, "step": 5526 }, { "epoch": 1.7164738980931542, "grad_norm": 0.287109375, "learning_rate": 0.00012135801028263207, "loss": 1.379, "step": 5527 }, { "epoch": 1.7167864957799313, "grad_norm": 0.28125, "learning_rate": 0.0001213340060236983, "loss": 1.6137, "step": 5528 }, { "epoch": 1.7170990934667083, "grad_norm": 0.263671875, "learning_rate": 0.00012131000047680339, "loss": 1.4956, "step": 5529 }, { "epoch": 1.7174116911534854, "grad_norm": 0.291015625, "learning_rate": 0.00012128599364339664, "loss": 1.5429, "step": 5530 }, { "epoch": 1.7177242888402626, "grad_norm": 0.287109375, "learning_rate": 0.00012126198552492727, "loss": 1.4727, "step": 5531 }, { "epoch": 1.7180368865270397, "grad_norm": 0.2890625, "learning_rate": 0.00012123797612284475, "loss": 1.5302, "step": 5532 }, { "epoch": 1.718349484213817, "grad_norm": 0.27734375, "learning_rate": 0.00012121396543859855, "loss": 1.7077, "step": 5533 }, { "epoch": 1.718662081900594, "grad_norm": 0.302734375, "learning_rate": 0.00012118995347363824, "loss": 1.517, "step": 5534 }, { "epoch": 1.718974679587371, "grad_norm": 0.2890625, "learning_rate": 0.0001211659402294134, "loss": 1.7013, "step": 5535 }, { "epoch": 1.719287277274148, "grad_norm": 0.279296875, "learning_rate": 0.00012114192570737383, "loss": 1.4074, "step": 5536 }, { "epoch": 1.7195998749609251, "grad_norm": 0.287109375, "learning_rate": 0.00012111790990896923, "loss": 1.7123, "step": 5537 }, { "epoch": 1.7199124726477024, "grad_norm": 0.30859375, "learning_rate": 0.00012109389283564952, "loss": 1.513, "step": 5538 }, { "epoch": 1.7202250703344797, "grad_norm": 0.2890625, "learning_rate": 0.0001210698744888646, "loss": 1.7622, "step": 5539 }, { "epoch": 1.7205376680212567, "grad_norm": 0.2890625, "learning_rate": 0.0001210458548700645, "loss": 1.5918, "step": 5540 }, { "epoch": 1.7208502657080338, "grad_norm": 0.294921875, "learning_rate": 0.00012102183398069932, "loss": 1.4455, "step": 5541 }, { "epoch": 1.7211628633948108, "grad_norm": 0.29296875, "learning_rate": 0.00012099781182221929, "loss": 1.3134, "step": 5542 }, { "epoch": 1.7214754610815879, "grad_norm": 0.291015625, "learning_rate": 0.00012097378839607453, "loss": 1.68, "step": 5543 }, { "epoch": 1.7217880587683652, "grad_norm": 0.287109375, "learning_rate": 0.00012094976370371548, "loss": 1.5016, "step": 5544 }, { "epoch": 1.7221006564551422, "grad_norm": 0.27734375, "learning_rate": 0.00012092573774659248, "loss": 1.6668, "step": 5545 }, { "epoch": 1.7224132541419195, "grad_norm": 0.283203125, "learning_rate": 0.00012090171052615605, "loss": 1.3413, "step": 5546 }, { "epoch": 1.7227258518286965, "grad_norm": 0.275390625, "learning_rate": 0.00012087768204385668, "loss": 1.7143, "step": 5547 }, { "epoch": 1.7230384495154736, "grad_norm": 0.287109375, "learning_rate": 0.0001208536523011451, "loss": 1.3945, "step": 5548 }, { "epoch": 1.7233510472022506, "grad_norm": 0.291015625, "learning_rate": 0.0001208296212994719, "loss": 1.4571, "step": 5549 }, { "epoch": 1.7236636448890277, "grad_norm": 0.2890625, "learning_rate": 0.00012080558904028796, "loss": 1.7265, "step": 5550 }, { "epoch": 1.723976242575805, "grad_norm": 0.291015625, "learning_rate": 0.00012078155552504404, "loss": 1.2998, "step": 5551 }, { "epoch": 1.7242888402625822, "grad_norm": 0.294921875, "learning_rate": 0.00012075752075519118, "loss": 1.5475, "step": 5552 }, { "epoch": 1.7246014379493593, "grad_norm": 0.2890625, "learning_rate": 0.00012073348473218031, "loss": 1.5446, "step": 5553 }, { "epoch": 1.7249140356361363, "grad_norm": 0.30078125, "learning_rate": 0.0001207094474574626, "loss": 1.8914, "step": 5554 }, { "epoch": 1.7252266333229134, "grad_norm": 0.283203125, "learning_rate": 0.0001206854089324891, "loss": 1.8127, "step": 5555 }, { "epoch": 1.7255392310096904, "grad_norm": 0.287109375, "learning_rate": 0.00012066136915871113, "loss": 1.435, "step": 5556 }, { "epoch": 1.7258518286964677, "grad_norm": 0.294921875, "learning_rate": 0.00012063732813757995, "loss": 1.6434, "step": 5557 }, { "epoch": 1.7261644263832447, "grad_norm": 0.294921875, "learning_rate": 0.00012061328587054701, "loss": 1.4652, "step": 5558 }, { "epoch": 1.726477024070022, "grad_norm": 0.2734375, "learning_rate": 0.0001205892423590637, "loss": 1.3189, "step": 5559 }, { "epoch": 1.726789621756799, "grad_norm": 0.279296875, "learning_rate": 0.00012056519760458163, "loss": 1.3971, "step": 5560 }, { "epoch": 1.7271022194435761, "grad_norm": 0.294921875, "learning_rate": 0.00012054115160855233, "loss": 1.2699, "step": 5561 }, { "epoch": 1.7274148171303532, "grad_norm": 0.275390625, "learning_rate": 0.00012051710437242756, "loss": 1.3872, "step": 5562 }, { "epoch": 1.7277274148171302, "grad_norm": 0.283203125, "learning_rate": 0.00012049305589765905, "loss": 1.6135, "step": 5563 }, { "epoch": 1.7280400125039075, "grad_norm": 0.2890625, "learning_rate": 0.00012046900618569863, "loss": 1.5243, "step": 5564 }, { "epoch": 1.7283526101906845, "grad_norm": 0.302734375, "learning_rate": 0.00012044495523799824, "loss": 1.4615, "step": 5565 }, { "epoch": 1.7286652078774618, "grad_norm": 0.27734375, "learning_rate": 0.00012042090305600984, "loss": 1.6188, "step": 5566 }, { "epoch": 1.7289778055642389, "grad_norm": 0.294921875, "learning_rate": 0.00012039684964118547, "loss": 1.6997, "step": 5567 }, { "epoch": 1.729290403251016, "grad_norm": 0.283203125, "learning_rate": 0.00012037279499497734, "loss": 1.6044, "step": 5568 }, { "epoch": 1.729603000937793, "grad_norm": 0.294921875, "learning_rate": 0.00012034873911883759, "loss": 1.4182, "step": 5569 }, { "epoch": 1.7299155986245702, "grad_norm": 0.298828125, "learning_rate": 0.00012032468201421853, "loss": 1.4877, "step": 5570 }, { "epoch": 1.7302281963113473, "grad_norm": 0.279296875, "learning_rate": 0.00012030062368257253, "loss": 1.6926, "step": 5571 }, { "epoch": 1.7305407939981245, "grad_norm": 0.275390625, "learning_rate": 0.00012027656412535202, "loss": 1.8898, "step": 5572 }, { "epoch": 1.7308533916849016, "grad_norm": 0.287109375, "learning_rate": 0.00012025250334400946, "loss": 1.5818, "step": 5573 }, { "epoch": 1.7311659893716786, "grad_norm": 0.310546875, "learning_rate": 0.0001202284413399975, "loss": 1.716, "step": 5574 }, { "epoch": 1.7314785870584557, "grad_norm": 0.2734375, "learning_rate": 0.00012020437811476872, "loss": 1.6398, "step": 5575 }, { "epoch": 1.7317911847452327, "grad_norm": 0.27734375, "learning_rate": 0.00012018031366977591, "loss": 1.4197, "step": 5576 }, { "epoch": 1.73210378243201, "grad_norm": 0.29296875, "learning_rate": 0.00012015624800647185, "loss": 1.7078, "step": 5577 }, { "epoch": 1.732416380118787, "grad_norm": 0.275390625, "learning_rate": 0.00012013218112630942, "loss": 1.6071, "step": 5578 }, { "epoch": 1.7327289778055643, "grad_norm": 0.283203125, "learning_rate": 0.00012010811303074154, "loss": 1.4001, "step": 5579 }, { "epoch": 1.7330415754923414, "grad_norm": 0.287109375, "learning_rate": 0.00012008404372122126, "loss": 1.4807, "step": 5580 }, { "epoch": 1.7333541731791184, "grad_norm": 0.29296875, "learning_rate": 0.00012005997319920166, "loss": 1.4388, "step": 5581 }, { "epoch": 1.7336667708658955, "grad_norm": 0.2734375, "learning_rate": 0.00012003590146613592, "loss": 1.571, "step": 5582 }, { "epoch": 1.7339793685526728, "grad_norm": 0.2578125, "learning_rate": 0.0001200118285234773, "loss": 1.3583, "step": 5583 }, { "epoch": 1.7342919662394498, "grad_norm": 0.275390625, "learning_rate": 0.00011998775437267906, "loss": 1.5505, "step": 5584 }, { "epoch": 1.734604563926227, "grad_norm": 0.279296875, "learning_rate": 0.00011996367901519463, "loss": 1.297, "step": 5585 }, { "epoch": 1.7349171616130041, "grad_norm": 0.314453125, "learning_rate": 0.00011993960245247746, "loss": 1.6469, "step": 5586 }, { "epoch": 1.7352297592997812, "grad_norm": 0.287109375, "learning_rate": 0.00011991552468598106, "loss": 1.4381, "step": 5587 }, { "epoch": 1.7355423569865582, "grad_norm": 0.314453125, "learning_rate": 0.00011989144571715908, "loss": 1.4866, "step": 5588 }, { "epoch": 1.7358549546733353, "grad_norm": 0.275390625, "learning_rate": 0.00011986736554746518, "loss": 1.5193, "step": 5589 }, { "epoch": 1.7361675523601126, "grad_norm": 0.291015625, "learning_rate": 0.00011984328417835309, "loss": 1.3002, "step": 5590 }, { "epoch": 1.7364801500468896, "grad_norm": 0.28125, "learning_rate": 0.00011981920161127665, "loss": 1.2616, "step": 5591 }, { "epoch": 1.7367927477336669, "grad_norm": 0.287109375, "learning_rate": 0.00011979511784768975, "loss": 1.5705, "step": 5592 }, { "epoch": 1.737105345420444, "grad_norm": 0.28125, "learning_rate": 0.00011977103288904635, "loss": 1.627, "step": 5593 }, { "epoch": 1.737417943107221, "grad_norm": 0.283203125, "learning_rate": 0.00011974694673680052, "loss": 1.4498, "step": 5594 }, { "epoch": 1.737730540793998, "grad_norm": 0.29296875, "learning_rate": 0.00011972285939240635, "loss": 1.4407, "step": 5595 }, { "epoch": 1.7380431384807753, "grad_norm": 0.279296875, "learning_rate": 0.00011969877085731802, "loss": 1.3994, "step": 5596 }, { "epoch": 1.7383557361675523, "grad_norm": 0.296875, "learning_rate": 0.00011967468113298978, "loss": 1.4255, "step": 5597 }, { "epoch": 1.7386683338543296, "grad_norm": 0.28515625, "learning_rate": 0.00011965059022087599, "loss": 1.3308, "step": 5598 }, { "epoch": 1.7389809315411067, "grad_norm": 0.294921875, "learning_rate": 0.00011962649812243101, "loss": 1.576, "step": 5599 }, { "epoch": 1.7392935292278837, "grad_norm": 0.291015625, "learning_rate": 0.00011960240483910934, "loss": 1.5344, "step": 5600 }, { "epoch": 1.7396061269146608, "grad_norm": 0.302734375, "learning_rate": 0.0001195783103723655, "loss": 1.3961, "step": 5601 }, { "epoch": 1.7399187246014378, "grad_norm": 0.283203125, "learning_rate": 0.00011955421472365413, "loss": 1.4467, "step": 5602 }, { "epoch": 1.740231322288215, "grad_norm": 0.2890625, "learning_rate": 0.00011953011789442987, "loss": 1.5196, "step": 5603 }, { "epoch": 1.7405439199749921, "grad_norm": 0.283203125, "learning_rate": 0.00011950601988614754, "loss": 1.8276, "step": 5604 }, { "epoch": 1.7408565176617694, "grad_norm": 0.29296875, "learning_rate": 0.0001194819207002619, "loss": 1.4798, "step": 5605 }, { "epoch": 1.7411691153485465, "grad_norm": 0.28515625, "learning_rate": 0.0001194578203382279, "loss": 1.6241, "step": 5606 }, { "epoch": 1.7414817130353235, "grad_norm": 0.27734375, "learning_rate": 0.00011943371880150049, "loss": 1.336, "step": 5607 }, { "epoch": 1.7417943107221006, "grad_norm": 0.27734375, "learning_rate": 0.00011940961609153472, "loss": 1.5331, "step": 5608 }, { "epoch": 1.7421069084088776, "grad_norm": 0.287109375, "learning_rate": 0.00011938551220978566, "loss": 1.4683, "step": 5609 }, { "epoch": 1.7424195060956549, "grad_norm": 0.291015625, "learning_rate": 0.00011936140715770856, "loss": 1.5798, "step": 5610 }, { "epoch": 1.7427321037824322, "grad_norm": 0.27734375, "learning_rate": 0.00011933730093675861, "loss": 1.7233, "step": 5611 }, { "epoch": 1.7430447014692092, "grad_norm": 0.310546875, "learning_rate": 0.00011931319354839118, "loss": 1.5721, "step": 5612 }, { "epoch": 1.7433572991559863, "grad_norm": 0.28515625, "learning_rate": 0.00011928908499406164, "loss": 1.7042, "step": 5613 }, { "epoch": 1.7436698968427633, "grad_norm": 0.30078125, "learning_rate": 0.00011926497527522546, "loss": 1.662, "step": 5614 }, { "epoch": 1.7439824945295404, "grad_norm": 0.2890625, "learning_rate": 0.00011924086439333817, "loss": 1.6444, "step": 5615 }, { "epoch": 1.7442950922163176, "grad_norm": 0.2890625, "learning_rate": 0.0001192167523498554, "loss": 1.5565, "step": 5616 }, { "epoch": 1.7446076899030947, "grad_norm": 0.28515625, "learning_rate": 0.00011919263914623276, "loss": 1.6223, "step": 5617 }, { "epoch": 1.744920287589872, "grad_norm": 0.287109375, "learning_rate": 0.00011916852478392607, "loss": 1.8656, "step": 5618 }, { "epoch": 1.745232885276649, "grad_norm": 0.29296875, "learning_rate": 0.0001191444092643911, "loss": 1.4532, "step": 5619 }, { "epoch": 1.745545482963426, "grad_norm": 0.291015625, "learning_rate": 0.00011912029258908372, "loss": 1.5138, "step": 5620 }, { "epoch": 1.745858080650203, "grad_norm": 0.283203125, "learning_rate": 0.00011909617475945995, "loss": 1.8386, "step": 5621 }, { "epoch": 1.7461706783369801, "grad_norm": 0.27734375, "learning_rate": 0.00011907205577697577, "loss": 1.6579, "step": 5622 }, { "epoch": 1.7464832760237574, "grad_norm": 0.287109375, "learning_rate": 0.00011904793564308727, "loss": 1.4472, "step": 5623 }, { "epoch": 1.7467958737105347, "grad_norm": 0.27734375, "learning_rate": 0.00011902381435925064, "loss": 1.7691, "step": 5624 }, { "epoch": 1.7471084713973117, "grad_norm": 0.287109375, "learning_rate": 0.00011899969192692207, "loss": 1.7234, "step": 5625 }, { "epoch": 1.7474210690840888, "grad_norm": 0.2890625, "learning_rate": 0.00011897556834755788, "loss": 1.8648, "step": 5626 }, { "epoch": 1.7477336667708658, "grad_norm": 0.296875, "learning_rate": 0.00011895144362261448, "loss": 1.605, "step": 5627 }, { "epoch": 1.7480462644576429, "grad_norm": 0.283203125, "learning_rate": 0.00011892731775354827, "loss": 1.3145, "step": 5628 }, { "epoch": 1.7483588621444202, "grad_norm": 0.326171875, "learning_rate": 0.00011890319074181576, "loss": 1.5106, "step": 5629 }, { "epoch": 1.7486714598311972, "grad_norm": 0.28125, "learning_rate": 0.00011887906258887351, "loss": 1.7029, "step": 5630 }, { "epoch": 1.7489840575179745, "grad_norm": 0.28125, "learning_rate": 0.00011885493329617825, "loss": 1.7823, "step": 5631 }, { "epoch": 1.7492966552047515, "grad_norm": 0.2890625, "learning_rate": 0.00011883080286518659, "loss": 1.4526, "step": 5632 }, { "epoch": 1.7496092528915286, "grad_norm": 0.271484375, "learning_rate": 0.00011880667129735539, "loss": 1.6187, "step": 5633 }, { "epoch": 1.7499218505783056, "grad_norm": 0.28515625, "learning_rate": 0.00011878253859414148, "loss": 1.6686, "step": 5634 }, { "epoch": 1.7502344482650827, "grad_norm": 0.275390625, "learning_rate": 0.00011875840475700175, "loss": 1.4003, "step": 5635 }, { "epoch": 1.75054704595186, "grad_norm": 0.279296875, "learning_rate": 0.00011873426978739326, "loss": 1.7285, "step": 5636 }, { "epoch": 1.7508596436386372, "grad_norm": 0.27734375, "learning_rate": 0.00011871013368677302, "loss": 1.4314, "step": 5637 }, { "epoch": 1.7511722413254143, "grad_norm": 0.291015625, "learning_rate": 0.00011868599645659815, "loss": 1.7216, "step": 5638 }, { "epoch": 1.7514848390121913, "grad_norm": 0.291015625, "learning_rate": 0.00011866185809832588, "loss": 1.6379, "step": 5639 }, { "epoch": 1.7517974366989684, "grad_norm": 0.29296875, "learning_rate": 0.00011863771861341347, "loss": 1.8494, "step": 5640 }, { "epoch": 1.7521100343857454, "grad_norm": 0.271484375, "learning_rate": 0.00011861357800331818, "loss": 1.6464, "step": 5641 }, { "epoch": 1.7524226320725227, "grad_norm": 0.287109375, "learning_rate": 0.00011858943626949752, "loss": 1.4425, "step": 5642 }, { "epoch": 1.7527352297592997, "grad_norm": 0.275390625, "learning_rate": 0.0001185652934134089, "loss": 1.258, "step": 5643 }, { "epoch": 1.753047827446077, "grad_norm": 0.298828125, "learning_rate": 0.00011854114943650983, "loss": 1.5252, "step": 5644 }, { "epoch": 1.753360425132854, "grad_norm": 0.283203125, "learning_rate": 0.00011851700434025795, "loss": 1.5082, "step": 5645 }, { "epoch": 1.753673022819631, "grad_norm": 0.2890625, "learning_rate": 0.00011849285812611093, "loss": 1.2727, "step": 5646 }, { "epoch": 1.7539856205064082, "grad_norm": 0.29296875, "learning_rate": 0.00011846871079552649, "loss": 1.5176, "step": 5647 }, { "epoch": 1.7542982181931852, "grad_norm": 0.28515625, "learning_rate": 0.00011844456234996246, "loss": 1.5197, "step": 5648 }, { "epoch": 1.7546108158799625, "grad_norm": 0.287109375, "learning_rate": 0.0001184204127908767, "loss": 1.4711, "step": 5649 }, { "epoch": 1.7549234135667398, "grad_norm": 0.29296875, "learning_rate": 0.00011839626211972712, "loss": 1.5836, "step": 5650 }, { "epoch": 1.7552360112535168, "grad_norm": 0.291015625, "learning_rate": 0.00011837211033797175, "loss": 1.4072, "step": 5651 }, { "epoch": 1.7555486089402939, "grad_norm": 0.28125, "learning_rate": 0.0001183479574470687, "loss": 1.6971, "step": 5652 }, { "epoch": 1.755861206627071, "grad_norm": 0.283203125, "learning_rate": 0.00011832380344847604, "loss": 1.5817, "step": 5653 }, { "epoch": 1.756173804313848, "grad_norm": 0.2890625, "learning_rate": 0.00011829964834365205, "loss": 1.5695, "step": 5654 }, { "epoch": 1.7564864020006252, "grad_norm": 0.28515625, "learning_rate": 0.00011827549213405497, "loss": 1.5927, "step": 5655 }, { "epoch": 1.7567989996874023, "grad_norm": 0.265625, "learning_rate": 0.00011825133482114312, "loss": 1.3896, "step": 5656 }, { "epoch": 1.7571115973741795, "grad_norm": 0.279296875, "learning_rate": 0.00011822717640637493, "loss": 1.6788, "step": 5657 }, { "epoch": 1.7574241950609566, "grad_norm": 0.275390625, "learning_rate": 0.00011820301689120887, "loss": 1.2537, "step": 5658 }, { "epoch": 1.7577367927477336, "grad_norm": 0.287109375, "learning_rate": 0.0001181788562771035, "loss": 1.4692, "step": 5659 }, { "epoch": 1.7580493904345107, "grad_norm": 0.29296875, "learning_rate": 0.00011815469456551743, "loss": 1.6666, "step": 5660 }, { "epoch": 1.7583619881212877, "grad_norm": 0.28125, "learning_rate": 0.0001181305317579093, "loss": 1.69, "step": 5661 }, { "epoch": 1.758674585808065, "grad_norm": 0.2734375, "learning_rate": 0.00011810636785573784, "loss": 1.5877, "step": 5662 }, { "epoch": 1.7589871834948423, "grad_norm": 0.28515625, "learning_rate": 0.0001180822028604619, "loss": 1.3923, "step": 5663 }, { "epoch": 1.7592997811816193, "grad_norm": 0.283203125, "learning_rate": 0.00011805803677354031, "loss": 1.6914, "step": 5664 }, { "epoch": 1.7596123788683964, "grad_norm": 0.267578125, "learning_rate": 0.00011803386959643205, "loss": 1.4907, "step": 5665 }, { "epoch": 1.7599249765551734, "grad_norm": 0.279296875, "learning_rate": 0.0001180097013305961, "loss": 1.583, "step": 5666 }, { "epoch": 1.7602375742419505, "grad_norm": 0.2890625, "learning_rate": 0.00011798553197749152, "loss": 1.4735, "step": 5667 }, { "epoch": 1.7605501719287278, "grad_norm": 0.28125, "learning_rate": 0.00011796136153857744, "loss": 1.7344, "step": 5668 }, { "epoch": 1.7608627696155048, "grad_norm": 0.287109375, "learning_rate": 0.00011793719001531311, "loss": 1.4779, "step": 5669 }, { "epoch": 1.761175367302282, "grad_norm": 0.265625, "learning_rate": 0.00011791301740915774, "loss": 1.5088, "step": 5670 }, { "epoch": 1.7614879649890591, "grad_norm": 0.287109375, "learning_rate": 0.00011788884372157072, "loss": 1.4494, "step": 5671 }, { "epoch": 1.7618005626758362, "grad_norm": 0.296875, "learning_rate": 0.00011786466895401136, "loss": 1.5749, "step": 5672 }, { "epoch": 1.7621131603626132, "grad_norm": 0.28515625, "learning_rate": 0.00011784049310793919, "loss": 1.3202, "step": 5673 }, { "epoch": 1.7624257580493903, "grad_norm": 0.283203125, "learning_rate": 0.00011781631618481371, "loss": 1.4288, "step": 5674 }, { "epoch": 1.7627383557361676, "grad_norm": 0.302734375, "learning_rate": 0.00011779213818609451, "loss": 1.4675, "step": 5675 }, { "epoch": 1.7630509534229448, "grad_norm": 0.287109375, "learning_rate": 0.00011776795911324127, "loss": 1.6259, "step": 5676 }, { "epoch": 1.7633635511097219, "grad_norm": 0.28515625, "learning_rate": 0.00011774377896771371, "loss": 1.7223, "step": 5677 }, { "epoch": 1.763676148796499, "grad_norm": 0.2890625, "learning_rate": 0.0001177195977509716, "loss": 1.5379, "step": 5678 }, { "epoch": 1.763988746483276, "grad_norm": 0.27734375, "learning_rate": 0.00011769541546447478, "loss": 1.4777, "step": 5679 }, { "epoch": 1.764301344170053, "grad_norm": 0.28125, "learning_rate": 0.00011767123210968316, "loss": 1.8492, "step": 5680 }, { "epoch": 1.7646139418568303, "grad_norm": 0.26953125, "learning_rate": 0.00011764704768805677, "loss": 1.3462, "step": 5681 }, { "epoch": 1.7649265395436073, "grad_norm": 0.287109375, "learning_rate": 0.00011762286220105558, "loss": 1.5042, "step": 5682 }, { "epoch": 1.7652391372303846, "grad_norm": 0.298828125, "learning_rate": 0.0001175986756501398, "loss": 1.7829, "step": 5683 }, { "epoch": 1.7655517349171617, "grad_norm": 0.283203125, "learning_rate": 0.00011757448803676951, "loss": 1.6488, "step": 5684 }, { "epoch": 1.7658643326039387, "grad_norm": 0.28125, "learning_rate": 0.00011755029936240501, "loss": 1.5213, "step": 5685 }, { "epoch": 1.7661769302907158, "grad_norm": 0.287109375, "learning_rate": 0.00011752610962850652, "loss": 1.7373, "step": 5686 }, { "epoch": 1.7664895279774928, "grad_norm": 0.294921875, "learning_rate": 0.0001175019188365345, "loss": 1.5355, "step": 5687 }, { "epoch": 1.76680212566427, "grad_norm": 0.28125, "learning_rate": 0.00011747772698794932, "loss": 1.3043, "step": 5688 }, { "epoch": 1.7671147233510474, "grad_norm": 0.298828125, "learning_rate": 0.00011745353408421154, "loss": 1.5752, "step": 5689 }, { "epoch": 1.7674273210378244, "grad_norm": 0.279296875, "learning_rate": 0.0001174293401267816, "loss": 1.5955, "step": 5690 }, { "epoch": 1.7677399187246015, "grad_norm": 0.26171875, "learning_rate": 0.00011740514511712022, "loss": 1.3573, "step": 5691 }, { "epoch": 1.7680525164113785, "grad_norm": 0.28515625, "learning_rate": 0.00011738094905668802, "loss": 1.5345, "step": 5692 }, { "epoch": 1.7683651140981556, "grad_norm": 0.287109375, "learning_rate": 0.0001173567519469458, "loss": 1.5061, "step": 5693 }, { "epoch": 1.7686777117849328, "grad_norm": 0.296875, "learning_rate": 0.00011733255378935432, "loss": 1.4444, "step": 5694 }, { "epoch": 1.7689903094717099, "grad_norm": 0.287109375, "learning_rate": 0.00011730835458537455, "loss": 1.524, "step": 5695 }, { "epoch": 1.7693029071584871, "grad_norm": 0.287109375, "learning_rate": 0.00011728415433646728, "loss": 1.5181, "step": 5696 }, { "epoch": 1.7696155048452642, "grad_norm": 0.287109375, "learning_rate": 0.00011725995304409363, "loss": 1.43, "step": 5697 }, { "epoch": 1.7699281025320412, "grad_norm": 0.310546875, "learning_rate": 0.00011723575070971459, "loss": 1.4528, "step": 5698 }, { "epoch": 1.7702407002188183, "grad_norm": 0.279296875, "learning_rate": 0.00011721154733479135, "loss": 1.8226, "step": 5699 }, { "epoch": 1.7705532979055953, "grad_norm": 0.291015625, "learning_rate": 0.00011718734292078503, "loss": 1.4686, "step": 5700 }, { "epoch": 1.7708658955923726, "grad_norm": 0.298828125, "learning_rate": 0.00011716313746915696, "loss": 1.4765, "step": 5701 }, { "epoch": 1.77117849327915, "grad_norm": 0.29296875, "learning_rate": 0.00011713893098136839, "loss": 1.6228, "step": 5702 }, { "epoch": 1.771491090965927, "grad_norm": 0.279296875, "learning_rate": 0.00011711472345888071, "loss": 1.4686, "step": 5703 }, { "epoch": 1.771803688652704, "grad_norm": 0.287109375, "learning_rate": 0.00011709051490315534, "loss": 1.5617, "step": 5704 }, { "epoch": 1.772116286339481, "grad_norm": 0.2890625, "learning_rate": 0.00011706630531565386, "loss": 1.4651, "step": 5705 }, { "epoch": 1.772428884026258, "grad_norm": 0.26953125, "learning_rate": 0.00011704209469783773, "loss": 1.6951, "step": 5706 }, { "epoch": 1.7727414817130354, "grad_norm": 0.298828125, "learning_rate": 0.00011701788305116868, "loss": 1.3827, "step": 5707 }, { "epoch": 1.7730540793998124, "grad_norm": 0.27734375, "learning_rate": 0.00011699367037710829, "loss": 1.63, "step": 5708 }, { "epoch": 1.7733666770865897, "grad_norm": 0.279296875, "learning_rate": 0.0001169694566771184, "loss": 1.3955, "step": 5709 }, { "epoch": 1.7736792747733667, "grad_norm": 0.29296875, "learning_rate": 0.00011694524195266077, "loss": 1.5899, "step": 5710 }, { "epoch": 1.7739918724601438, "grad_norm": 0.271484375, "learning_rate": 0.00011692102620519729, "loss": 1.1515, "step": 5711 }, { "epoch": 1.7743044701469208, "grad_norm": 0.28125, "learning_rate": 0.0001168968094361899, "loss": 1.8175, "step": 5712 }, { "epoch": 1.7746170678336979, "grad_norm": 0.28515625, "learning_rate": 0.0001168725916471006, "loss": 1.6077, "step": 5713 }, { "epoch": 1.7749296655204752, "grad_norm": 0.28125, "learning_rate": 0.00011684837283939143, "loss": 1.61, "step": 5714 }, { "epoch": 1.7752422632072524, "grad_norm": 0.283203125, "learning_rate": 0.00011682415301452452, "loss": 1.3214, "step": 5715 }, { "epoch": 1.7755548608940295, "grad_norm": 0.296875, "learning_rate": 0.00011679993217396206, "loss": 1.6774, "step": 5716 }, { "epoch": 1.7758674585808065, "grad_norm": 0.2890625, "learning_rate": 0.00011677571031916627, "loss": 1.6886, "step": 5717 }, { "epoch": 1.7761800562675836, "grad_norm": 0.2890625, "learning_rate": 0.00011675148745159949, "loss": 1.554, "step": 5718 }, { "epoch": 1.7764926539543606, "grad_norm": 0.283203125, "learning_rate": 0.00011672726357272408, "loss": 1.3875, "step": 5719 }, { "epoch": 1.776805251641138, "grad_norm": 0.287109375, "learning_rate": 0.00011670303868400241, "loss": 1.2662, "step": 5720 }, { "epoch": 1.777117849327915, "grad_norm": 0.287109375, "learning_rate": 0.00011667881278689705, "loss": 1.4726, "step": 5721 }, { "epoch": 1.7774304470146922, "grad_norm": 0.287109375, "learning_rate": 0.00011665458588287048, "loss": 1.4821, "step": 5722 }, { "epoch": 1.7777430447014693, "grad_norm": 0.275390625, "learning_rate": 0.00011663035797338535, "loss": 1.5008, "step": 5723 }, { "epoch": 1.7780556423882463, "grad_norm": 0.279296875, "learning_rate": 0.00011660612905990431, "loss": 1.3734, "step": 5724 }, { "epoch": 1.7783682400750234, "grad_norm": 0.27734375, "learning_rate": 0.00011658189914389011, "loss": 1.869, "step": 5725 }, { "epoch": 1.7786808377618004, "grad_norm": 0.275390625, "learning_rate": 0.00011655766822680552, "loss": 1.5493, "step": 5726 }, { "epoch": 1.7789934354485777, "grad_norm": 0.27734375, "learning_rate": 0.0001165334363101134, "loss": 1.5773, "step": 5727 }, { "epoch": 1.779306033135355, "grad_norm": 0.28125, "learning_rate": 0.00011650920339527665, "loss": 1.2535, "step": 5728 }, { "epoch": 1.779618630822132, "grad_norm": 0.314453125, "learning_rate": 0.00011648496948375827, "loss": 1.5035, "step": 5729 }, { "epoch": 1.779931228508909, "grad_norm": 0.29296875, "learning_rate": 0.00011646073457702127, "loss": 1.5281, "step": 5730 }, { "epoch": 1.780243826195686, "grad_norm": 0.287109375, "learning_rate": 0.00011643649867652875, "loss": 1.5663, "step": 5731 }, { "epoch": 1.7805564238824632, "grad_norm": 0.287109375, "learning_rate": 0.00011641226178374385, "loss": 1.4889, "step": 5732 }, { "epoch": 1.7808690215692404, "grad_norm": 0.2734375, "learning_rate": 0.0001163880239001298, "loss": 1.5487, "step": 5733 }, { "epoch": 1.7811816192560175, "grad_norm": 0.287109375, "learning_rate": 0.00011636378502714984, "loss": 1.4356, "step": 5734 }, { "epoch": 1.7814942169427948, "grad_norm": 0.283203125, "learning_rate": 0.00011633954516626735, "loss": 1.821, "step": 5735 }, { "epoch": 1.7818068146295718, "grad_norm": 0.32421875, "learning_rate": 0.00011631530431894571, "loss": 1.3116, "step": 5736 }, { "epoch": 1.7821194123163488, "grad_norm": 0.26953125, "learning_rate": 0.00011629106248664834, "loss": 1.5322, "step": 5737 }, { "epoch": 1.782432010003126, "grad_norm": 0.279296875, "learning_rate": 0.00011626681967083877, "loss": 1.7254, "step": 5738 }, { "epoch": 1.782744607689903, "grad_norm": 0.28125, "learning_rate": 0.00011624257587298056, "loss": 1.6379, "step": 5739 }, { "epoch": 1.7830572053766802, "grad_norm": 0.27734375, "learning_rate": 0.00011621833109453734, "loss": 1.4709, "step": 5740 }, { "epoch": 1.7833698030634575, "grad_norm": 0.287109375, "learning_rate": 0.00011619408533697282, "loss": 1.604, "step": 5741 }, { "epoch": 1.7836824007502345, "grad_norm": 0.283203125, "learning_rate": 0.00011616983860175075, "loss": 1.3792, "step": 5742 }, { "epoch": 1.7839949984370116, "grad_norm": 0.283203125, "learning_rate": 0.0001161455908903349, "loss": 1.5704, "step": 5743 }, { "epoch": 1.7843075961237886, "grad_norm": 0.2734375, "learning_rate": 0.00011612134220418913, "loss": 1.5962, "step": 5744 }, { "epoch": 1.7846201938105657, "grad_norm": 0.283203125, "learning_rate": 0.00011609709254477741, "loss": 1.4397, "step": 5745 }, { "epoch": 1.784932791497343, "grad_norm": 0.287109375, "learning_rate": 0.0001160728419135637, "loss": 1.5748, "step": 5746 }, { "epoch": 1.78524538918412, "grad_norm": 0.28515625, "learning_rate": 0.00011604859031201202, "loss": 1.5306, "step": 5747 }, { "epoch": 1.7855579868708973, "grad_norm": 0.27734375, "learning_rate": 0.00011602433774158654, "loss": 1.6912, "step": 5748 }, { "epoch": 1.7858705845576743, "grad_norm": 0.3203125, "learning_rate": 0.00011600008420375135, "loss": 1.5453, "step": 5749 }, { "epoch": 1.7861831822444514, "grad_norm": 0.287109375, "learning_rate": 0.00011597582969997067, "loss": 1.3837, "step": 5750 }, { "epoch": 1.7864957799312284, "grad_norm": 0.34765625, "learning_rate": 0.00011595157423170879, "loss": 1.457, "step": 5751 }, { "epoch": 1.7868083776180055, "grad_norm": 0.287109375, "learning_rate": 0.00011592731780043005, "loss": 1.3404, "step": 5752 }, { "epoch": 1.7871209753047828, "grad_norm": 0.29296875, "learning_rate": 0.00011590306040759886, "loss": 1.4875, "step": 5753 }, { "epoch": 1.78743357299156, "grad_norm": 0.27734375, "learning_rate": 0.00011587880205467965, "loss": 1.4653, "step": 5754 }, { "epoch": 1.787746170678337, "grad_norm": 0.345703125, "learning_rate": 0.00011585454274313692, "loss": 2.3537, "step": 5755 }, { "epoch": 1.7880587683651141, "grad_norm": 0.2890625, "learning_rate": 0.00011583028247443522, "loss": 1.8865, "step": 5756 }, { "epoch": 1.7883713660518912, "grad_norm": 0.28515625, "learning_rate": 0.00011580602125003921, "loss": 1.7538, "step": 5757 }, { "epoch": 1.7886839637386682, "grad_norm": 0.3125, "learning_rate": 0.00011578175907141354, "loss": 1.6741, "step": 5758 }, { "epoch": 1.7889965614254455, "grad_norm": 0.275390625, "learning_rate": 0.00011575749594002299, "loss": 1.5212, "step": 5759 }, { "epoch": 1.7893091591122225, "grad_norm": 0.296875, "learning_rate": 0.00011573323185733231, "loss": 1.6821, "step": 5760 }, { "epoch": 1.7896217567989998, "grad_norm": 0.27734375, "learning_rate": 0.00011570896682480638, "loss": 1.3915, "step": 5761 }, { "epoch": 1.7899343544857769, "grad_norm": 0.27734375, "learning_rate": 0.0001156847008439101, "loss": 1.7713, "step": 5762 }, { "epoch": 1.790246952172554, "grad_norm": 0.291015625, "learning_rate": 0.00011566043391610843, "loss": 1.6889, "step": 5763 }, { "epoch": 1.790559549859331, "grad_norm": 0.302734375, "learning_rate": 0.00011563616604286643, "loss": 1.4813, "step": 5764 }, { "epoch": 1.790872147546108, "grad_norm": 0.279296875, "learning_rate": 0.00011561189722564918, "loss": 1.5978, "step": 5765 }, { "epoch": 1.7911847452328853, "grad_norm": 0.2890625, "learning_rate": 0.00011558762746592175, "loss": 1.4352, "step": 5766 }, { "epoch": 1.7914973429196626, "grad_norm": 0.2734375, "learning_rate": 0.00011556335676514942, "loss": 1.4835, "step": 5767 }, { "epoch": 1.7918099406064396, "grad_norm": 0.3125, "learning_rate": 0.00011553908512479738, "loss": 1.5135, "step": 5768 }, { "epoch": 1.7921225382932167, "grad_norm": 0.287109375, "learning_rate": 0.000115514812546331, "loss": 1.3119, "step": 5769 }, { "epoch": 1.7924351359799937, "grad_norm": 0.294921875, "learning_rate": 0.00011549053903121561, "loss": 1.9613, "step": 5770 }, { "epoch": 1.7927477336667708, "grad_norm": 0.287109375, "learning_rate": 0.00011546626458091665, "loss": 2.0067, "step": 5771 }, { "epoch": 1.793060331353548, "grad_norm": 0.279296875, "learning_rate": 0.00011544198919689957, "loss": 1.2376, "step": 5772 }, { "epoch": 1.793372929040325, "grad_norm": 0.291015625, "learning_rate": 0.00011541771288062993, "loss": 1.6036, "step": 5773 }, { "epoch": 1.7936855267271024, "grad_norm": 0.294921875, "learning_rate": 0.0001153934356335733, "loss": 1.7418, "step": 5774 }, { "epoch": 1.7939981244138794, "grad_norm": 0.2734375, "learning_rate": 0.00011536915745719537, "loss": 1.5465, "step": 5775 }, { "epoch": 1.7943107221006565, "grad_norm": 0.27734375, "learning_rate": 0.00011534487835296179, "loss": 1.543, "step": 5776 }, { "epoch": 1.7946233197874335, "grad_norm": 0.291015625, "learning_rate": 0.0001153205983223384, "loss": 1.5265, "step": 5777 }, { "epoch": 1.7949359174742106, "grad_norm": 0.298828125, "learning_rate": 0.00011529631736679092, "loss": 1.351, "step": 5778 }, { "epoch": 1.7952485151609878, "grad_norm": 0.291015625, "learning_rate": 0.0001152720354877853, "loss": 1.6525, "step": 5779 }, { "epoch": 1.7955611128477649, "grad_norm": 0.291015625, "learning_rate": 0.00011524775268678739, "loss": 1.6968, "step": 5780 }, { "epoch": 1.7958737105345421, "grad_norm": 0.27734375, "learning_rate": 0.00011522346896526326, "loss": 1.4864, "step": 5781 }, { "epoch": 1.7961863082213192, "grad_norm": 0.302734375, "learning_rate": 0.00011519918432467887, "loss": 1.6597, "step": 5782 }, { "epoch": 1.7964989059080962, "grad_norm": 0.26953125, "learning_rate": 0.00011517489876650041, "loss": 1.5225, "step": 5783 }, { "epoch": 1.7968115035948733, "grad_norm": 0.275390625, "learning_rate": 0.00011515061229219392, "loss": 1.6224, "step": 5784 }, { "epoch": 1.7971241012816506, "grad_norm": 0.283203125, "learning_rate": 0.0001151263249032257, "loss": 1.3991, "step": 5785 }, { "epoch": 1.7974366989684276, "grad_norm": 0.279296875, "learning_rate": 0.00011510203660106195, "loss": 1.5783, "step": 5786 }, { "epoch": 1.7977492966552049, "grad_norm": 0.279296875, "learning_rate": 0.00011507774738716901, "loss": 1.7398, "step": 5787 }, { "epoch": 1.798061894341982, "grad_norm": 0.287109375, "learning_rate": 0.00011505345726301325, "loss": 1.5499, "step": 5788 }, { "epoch": 1.798374492028759, "grad_norm": 0.271484375, "learning_rate": 0.00011502916623006107, "loss": 1.4048, "step": 5789 }, { "epoch": 1.798687089715536, "grad_norm": 0.29296875, "learning_rate": 0.00011500487428977901, "loss": 1.6217, "step": 5790 }, { "epoch": 1.798999687402313, "grad_norm": 0.287109375, "learning_rate": 0.00011498058144363356, "loss": 1.4457, "step": 5791 }, { "epoch": 1.7993122850890904, "grad_norm": 0.29296875, "learning_rate": 0.00011495628769309128, "loss": 1.7167, "step": 5792 }, { "epoch": 1.7996248827758674, "grad_norm": 0.279296875, "learning_rate": 0.0001149319930396189, "loss": 1.4423, "step": 5793 }, { "epoch": 1.7999374804626447, "grad_norm": 0.271484375, "learning_rate": 0.00011490769748468304, "loss": 1.3099, "step": 5794 }, { "epoch": 1.8002500781494217, "grad_norm": 0.29296875, "learning_rate": 0.00011488340102975051, "loss": 1.3818, "step": 5795 }, { "epoch": 1.8005626758361988, "grad_norm": 0.30078125, "learning_rate": 0.00011485910367628809, "loss": 1.5885, "step": 5796 }, { "epoch": 1.8008752735229758, "grad_norm": 0.283203125, "learning_rate": 0.00011483480542576264, "loss": 1.6998, "step": 5797 }, { "epoch": 1.801187871209753, "grad_norm": 0.2890625, "learning_rate": 0.00011481050627964107, "loss": 1.4423, "step": 5798 }, { "epoch": 1.8015004688965301, "grad_norm": 0.28125, "learning_rate": 0.00011478620623939039, "loss": 1.533, "step": 5799 }, { "epoch": 1.8018130665833074, "grad_norm": 0.291015625, "learning_rate": 0.00011476190530647755, "loss": 1.3115, "step": 5800 }, { "epoch": 1.8021256642700845, "grad_norm": 0.29296875, "learning_rate": 0.00011473760348236973, "loss": 1.9355, "step": 5801 }, { "epoch": 1.8024382619568615, "grad_norm": 0.291015625, "learning_rate": 0.00011471330076853398, "loss": 1.6219, "step": 5802 }, { "epoch": 1.8027508596436386, "grad_norm": 0.2890625, "learning_rate": 0.00011468899716643753, "loss": 1.6172, "step": 5803 }, { "epoch": 1.8030634573304156, "grad_norm": 0.28515625, "learning_rate": 0.00011466469267754756, "loss": 1.6974, "step": 5804 }, { "epoch": 1.803376055017193, "grad_norm": 0.306640625, "learning_rate": 0.00011464038730333144, "loss": 1.465, "step": 5805 }, { "epoch": 1.80368865270397, "grad_norm": 0.2890625, "learning_rate": 0.00011461608104525647, "loss": 1.5435, "step": 5806 }, { "epoch": 1.8040012503907472, "grad_norm": 0.302734375, "learning_rate": 0.00011459177390479008, "loss": 1.537, "step": 5807 }, { "epoch": 1.8043138480775243, "grad_norm": 0.28515625, "learning_rate": 0.0001145674658833997, "loss": 1.6696, "step": 5808 }, { "epoch": 1.8046264457643013, "grad_norm": 0.283203125, "learning_rate": 0.00011454315698255284, "loss": 1.3391, "step": 5809 }, { "epoch": 1.8049390434510784, "grad_norm": 0.294921875, "learning_rate": 0.00011451884720371704, "loss": 1.4825, "step": 5810 }, { "epoch": 1.8052516411378556, "grad_norm": 0.291015625, "learning_rate": 0.00011449453654835996, "loss": 1.9999, "step": 5811 }, { "epoch": 1.8055642388246327, "grad_norm": 0.26953125, "learning_rate": 0.00011447022501794924, "loss": 1.5462, "step": 5812 }, { "epoch": 1.80587683651141, "grad_norm": 0.298828125, "learning_rate": 0.00011444591261395254, "loss": 1.3408, "step": 5813 }, { "epoch": 1.806189434198187, "grad_norm": 0.28515625, "learning_rate": 0.00011442159933783776, "loss": 1.5263, "step": 5814 }, { "epoch": 1.806502031884964, "grad_norm": 0.287109375, "learning_rate": 0.00011439728519107262, "loss": 1.3573, "step": 5815 }, { "epoch": 1.806814629571741, "grad_norm": 0.279296875, "learning_rate": 0.000114372970175125, "loss": 1.8411, "step": 5816 }, { "epoch": 1.8071272272585182, "grad_norm": 0.298828125, "learning_rate": 0.00011434865429146291, "loss": 1.6114, "step": 5817 }, { "epoch": 1.8074398249452954, "grad_norm": 0.294921875, "learning_rate": 0.00011432433754155425, "loss": 1.5172, "step": 5818 }, { "epoch": 1.8077524226320725, "grad_norm": 0.275390625, "learning_rate": 0.00011430001992686705, "loss": 1.4666, "step": 5819 }, { "epoch": 1.8080650203188497, "grad_norm": 0.29296875, "learning_rate": 0.00011427570144886946, "loss": 1.7839, "step": 5820 }, { "epoch": 1.8083776180056268, "grad_norm": 0.294921875, "learning_rate": 0.00011425138210902957, "loss": 1.5083, "step": 5821 }, { "epoch": 1.8086902156924038, "grad_norm": 0.291015625, "learning_rate": 0.00011422706190881557, "loss": 1.3222, "step": 5822 }, { "epoch": 1.809002813379181, "grad_norm": 0.275390625, "learning_rate": 0.00011420274084969573, "loss": 1.3257, "step": 5823 }, { "epoch": 1.809315411065958, "grad_norm": 0.28125, "learning_rate": 0.00011417841893313835, "loss": 1.6301, "step": 5824 }, { "epoch": 1.8096280087527352, "grad_norm": 0.294921875, "learning_rate": 0.0001141540961606117, "loss": 1.3007, "step": 5825 }, { "epoch": 1.8099406064395125, "grad_norm": 0.279296875, "learning_rate": 0.00011412977253358425, "loss": 1.9514, "step": 5826 }, { "epoch": 1.8102532041262895, "grad_norm": 0.2890625, "learning_rate": 0.00011410544805352444, "loss": 1.4303, "step": 5827 }, { "epoch": 1.8105658018130666, "grad_norm": 0.2890625, "learning_rate": 0.00011408112272190072, "loss": 1.8044, "step": 5828 }, { "epoch": 1.8108783994998436, "grad_norm": 0.296875, "learning_rate": 0.00011405679654018171, "loss": 1.5914, "step": 5829 }, { "epoch": 1.8111909971866207, "grad_norm": 0.294921875, "learning_rate": 0.000114032469509836, "loss": 1.6985, "step": 5830 }, { "epoch": 1.811503594873398, "grad_norm": 0.3046875, "learning_rate": 0.00011400814163233217, "loss": 1.4168, "step": 5831 }, { "epoch": 1.811816192560175, "grad_norm": 0.296875, "learning_rate": 0.00011398381290913902, "loss": 1.5645, "step": 5832 }, { "epoch": 1.8121287902469523, "grad_norm": 0.275390625, "learning_rate": 0.00011395948334172524, "loss": 1.7897, "step": 5833 }, { "epoch": 1.8124413879337293, "grad_norm": 0.30859375, "learning_rate": 0.00011393515293155967, "loss": 1.7071, "step": 5834 }, { "epoch": 1.8127539856205064, "grad_norm": 0.2890625, "learning_rate": 0.00011391082168011116, "loss": 1.659, "step": 5835 }, { "epoch": 1.8130665833072834, "grad_norm": 0.279296875, "learning_rate": 0.00011388648958884862, "loss": 1.4107, "step": 5836 }, { "epoch": 1.8133791809940605, "grad_norm": 0.291015625, "learning_rate": 0.000113862156659241, "loss": 1.3115, "step": 5837 }, { "epoch": 1.8136917786808378, "grad_norm": 0.291015625, "learning_rate": 0.00011383782289275733, "loss": 1.3619, "step": 5838 }, { "epoch": 1.814004376367615, "grad_norm": 0.357421875, "learning_rate": 0.00011381348829086665, "loss": 2.0378, "step": 5839 }, { "epoch": 1.814316974054392, "grad_norm": 0.291015625, "learning_rate": 0.00011378915285503808, "loss": 1.4385, "step": 5840 }, { "epoch": 1.8146295717411691, "grad_norm": 0.294921875, "learning_rate": 0.00011376481658674079, "loss": 1.6725, "step": 5841 }, { "epoch": 1.8149421694279462, "grad_norm": 0.2734375, "learning_rate": 0.00011374047948744398, "loss": 1.6127, "step": 5842 }, { "epoch": 1.8152547671147232, "grad_norm": 0.296875, "learning_rate": 0.0001137161415586169, "loss": 1.7975, "step": 5843 }, { "epoch": 1.8155673648015005, "grad_norm": 0.28125, "learning_rate": 0.00011369180280172888, "loss": 1.6108, "step": 5844 }, { "epoch": 1.8158799624882775, "grad_norm": 0.27734375, "learning_rate": 0.00011366746321824927, "loss": 1.5536, "step": 5845 }, { "epoch": 1.8161925601750548, "grad_norm": 0.296875, "learning_rate": 0.00011364312280964751, "loss": 1.4006, "step": 5846 }, { "epoch": 1.8165051578618319, "grad_norm": 0.294921875, "learning_rate": 0.00011361878157739302, "loss": 1.3971, "step": 5847 }, { "epoch": 1.816817755548609, "grad_norm": 0.279296875, "learning_rate": 0.00011359443952295537, "loss": 1.6335, "step": 5848 }, { "epoch": 1.817130353235386, "grad_norm": 0.296875, "learning_rate": 0.00011357009664780404, "loss": 1.5704, "step": 5849 }, { "epoch": 1.817442950922163, "grad_norm": 0.296875, "learning_rate": 0.0001135457529534087, "loss": 1.7051, "step": 5850 }, { "epoch": 1.8177555486089403, "grad_norm": 0.2890625, "learning_rate": 0.00011352140844123897, "loss": 1.7543, "step": 5851 }, { "epoch": 1.8180681462957176, "grad_norm": 0.2890625, "learning_rate": 0.00011349706311276462, "loss": 1.7139, "step": 5852 }, { "epoch": 1.8183807439824946, "grad_norm": 0.296875, "learning_rate": 0.00011347271696945536, "loss": 1.5561, "step": 5853 }, { "epoch": 1.8186933416692717, "grad_norm": 0.2734375, "learning_rate": 0.00011344837001278101, "loss": 1.7376, "step": 5854 }, { "epoch": 1.8190059393560487, "grad_norm": 0.291015625, "learning_rate": 0.0001134240222442114, "loss": 1.4673, "step": 5855 }, { "epoch": 1.8193185370428258, "grad_norm": 0.2890625, "learning_rate": 0.00011339967366521648, "loss": 1.9459, "step": 5856 }, { "epoch": 1.819631134729603, "grad_norm": 0.28515625, "learning_rate": 0.00011337532427726616, "loss": 1.5291, "step": 5857 }, { "epoch": 1.81994373241638, "grad_norm": 0.291015625, "learning_rate": 0.00011335097408183051, "loss": 1.5755, "step": 5858 }, { "epoch": 1.8202563301031573, "grad_norm": 0.28515625, "learning_rate": 0.0001133266230803795, "loss": 1.6344, "step": 5859 }, { "epoch": 1.8205689277899344, "grad_norm": 0.28125, "learning_rate": 0.00011330227127438333, "loss": 1.5934, "step": 5860 }, { "epoch": 1.8208815254767114, "grad_norm": 0.265625, "learning_rate": 0.00011327791866531203, "loss": 1.6294, "step": 5861 }, { "epoch": 1.8211941231634885, "grad_norm": 0.28125, "learning_rate": 0.00011325356525463591, "loss": 1.8007, "step": 5862 }, { "epoch": 1.8215067208502655, "grad_norm": 0.28515625, "learning_rate": 0.00011322921104382511, "loss": 1.33, "step": 5863 }, { "epoch": 1.8218193185370428, "grad_norm": 0.291015625, "learning_rate": 0.00011320485603435002, "loss": 1.7424, "step": 5864 }, { "epoch": 1.82213191622382, "grad_norm": 0.283203125, "learning_rate": 0.00011318050022768096, "loss": 1.6708, "step": 5865 }, { "epoch": 1.8224445139105971, "grad_norm": 0.28515625, "learning_rate": 0.00011315614362528828, "loss": 1.6851, "step": 5866 }, { "epoch": 1.8227571115973742, "grad_norm": 0.28125, "learning_rate": 0.00011313178622864245, "loss": 1.4527, "step": 5867 }, { "epoch": 1.8230697092841512, "grad_norm": 0.27734375, "learning_rate": 0.00011310742803921395, "loss": 1.3163, "step": 5868 }, { "epoch": 1.8233823069709283, "grad_norm": 0.291015625, "learning_rate": 0.0001130830690584733, "loss": 1.9049, "step": 5869 }, { "epoch": 1.8236949046577056, "grad_norm": 0.28125, "learning_rate": 0.0001130587092878911, "loss": 1.4765, "step": 5870 }, { "epoch": 1.8240075023444826, "grad_norm": 0.296875, "learning_rate": 0.00011303434872893801, "loss": 1.6214, "step": 5871 }, { "epoch": 1.8243201000312599, "grad_norm": 0.291015625, "learning_rate": 0.00011300998738308468, "loss": 1.6034, "step": 5872 }, { "epoch": 1.824632697718037, "grad_norm": 0.279296875, "learning_rate": 0.00011298562525180178, "loss": 1.298, "step": 5873 }, { "epoch": 1.824945295404814, "grad_norm": 0.291015625, "learning_rate": 0.0001129612623365602, "loss": 1.6643, "step": 5874 }, { "epoch": 1.825257893091591, "grad_norm": 0.2890625, "learning_rate": 0.00011293689863883063, "loss": 1.494, "step": 5875 }, { "epoch": 1.825570490778368, "grad_norm": 0.306640625, "learning_rate": 0.00011291253416008405, "loss": 1.6174, "step": 5876 }, { "epoch": 1.8258830884651454, "grad_norm": 0.26953125, "learning_rate": 0.00011288816890179132, "loss": 1.6457, "step": 5877 }, { "epoch": 1.8261956861519226, "grad_norm": 0.28125, "learning_rate": 0.00011286380286542342, "loss": 1.4945, "step": 5878 }, { "epoch": 1.8265082838386997, "grad_norm": 0.279296875, "learning_rate": 0.00011283943605245133, "loss": 1.5943, "step": 5879 }, { "epoch": 1.8268208815254767, "grad_norm": 0.3046875, "learning_rate": 0.00011281506846434613, "loss": 1.4638, "step": 5880 }, { "epoch": 1.8271334792122538, "grad_norm": 0.298828125, "learning_rate": 0.0001127907001025789, "loss": 1.7105, "step": 5881 }, { "epoch": 1.8274460768990308, "grad_norm": 0.291015625, "learning_rate": 0.00011276633096862083, "loss": 1.4812, "step": 5882 }, { "epoch": 1.827758674585808, "grad_norm": 0.271484375, "learning_rate": 0.00011274196106394308, "loss": 1.2702, "step": 5883 }, { "epoch": 1.8280712722725851, "grad_norm": 0.291015625, "learning_rate": 0.00011271759039001693, "loss": 1.578, "step": 5884 }, { "epoch": 1.8283838699593624, "grad_norm": 0.28125, "learning_rate": 0.0001126932189483136, "loss": 1.7245, "step": 5885 }, { "epoch": 1.8286964676461395, "grad_norm": 0.28515625, "learning_rate": 0.00011266884674030449, "loss": 1.7638, "step": 5886 }, { "epoch": 1.8290090653329165, "grad_norm": 0.30078125, "learning_rate": 0.00011264447376746095, "loss": 1.7444, "step": 5887 }, { "epoch": 1.8293216630196936, "grad_norm": 0.294921875, "learning_rate": 0.00011262010003125443, "loss": 1.4711, "step": 5888 }, { "epoch": 1.8296342607064706, "grad_norm": 0.283203125, "learning_rate": 0.00011259572553315636, "loss": 1.6199, "step": 5889 }, { "epoch": 1.8299468583932479, "grad_norm": 0.298828125, "learning_rate": 0.00011257135027463832, "loss": 1.4648, "step": 5890 }, { "epoch": 1.8302594560800252, "grad_norm": 0.296875, "learning_rate": 0.00011254697425717182, "loss": 1.5086, "step": 5891 }, { "epoch": 1.8305720537668022, "grad_norm": 0.287109375, "learning_rate": 0.0001125225974822285, "loss": 1.6374, "step": 5892 }, { "epoch": 1.8308846514535793, "grad_norm": 0.283203125, "learning_rate": 0.00011249821995128003, "loss": 1.5425, "step": 5893 }, { "epoch": 1.8311972491403563, "grad_norm": 0.2734375, "learning_rate": 0.00011247384166579808, "loss": 1.3473, "step": 5894 }, { "epoch": 1.8315098468271334, "grad_norm": 0.275390625, "learning_rate": 0.00011244946262725442, "loss": 1.4025, "step": 5895 }, { "epoch": 1.8318224445139106, "grad_norm": 0.28515625, "learning_rate": 0.00011242508283712084, "loss": 1.6087, "step": 5896 }, { "epoch": 1.8321350422006877, "grad_norm": 0.30078125, "learning_rate": 0.00011240070229686916, "loss": 1.544, "step": 5897 }, { "epoch": 1.832447639887465, "grad_norm": 0.279296875, "learning_rate": 0.00011237632100797129, "loss": 1.6119, "step": 5898 }, { "epoch": 1.832760237574242, "grad_norm": 0.30078125, "learning_rate": 0.00011235193897189913, "loss": 1.4214, "step": 5899 }, { "epoch": 1.833072835261019, "grad_norm": 0.283203125, "learning_rate": 0.00011232755619012469, "loss": 1.4287, "step": 5900 }, { "epoch": 1.833385432947796, "grad_norm": 0.283203125, "learning_rate": 0.00011230317266412, "loss": 1.2955, "step": 5901 }, { "epoch": 1.8336980306345732, "grad_norm": 0.279296875, "learning_rate": 0.00011227878839535708, "loss": 1.7294, "step": 5902 }, { "epoch": 1.8340106283213504, "grad_norm": 0.283203125, "learning_rate": 0.00011225440338530804, "loss": 1.3389, "step": 5903 }, { "epoch": 1.8343232260081277, "grad_norm": 0.291015625, "learning_rate": 0.00011223001763544507, "loss": 1.572, "step": 5904 }, { "epoch": 1.8346358236949047, "grad_norm": 0.275390625, "learning_rate": 0.00011220563114724032, "loss": 1.491, "step": 5905 }, { "epoch": 1.8349484213816818, "grad_norm": 0.283203125, "learning_rate": 0.00011218124392216609, "loss": 1.7087, "step": 5906 }, { "epoch": 1.8352610190684588, "grad_norm": 0.294921875, "learning_rate": 0.00011215685596169465, "loss": 1.8135, "step": 5907 }, { "epoch": 1.835573616755236, "grad_norm": 0.2734375, "learning_rate": 0.00011213246726729832, "loss": 1.5235, "step": 5908 }, { "epoch": 1.8358862144420132, "grad_norm": 0.28125, "learning_rate": 0.00011210807784044945, "loss": 1.5631, "step": 5909 }, { "epoch": 1.8361988121287902, "grad_norm": 0.28515625, "learning_rate": 0.00011208368768262054, "loss": 1.6162, "step": 5910 }, { "epoch": 1.8365114098155675, "grad_norm": 0.294921875, "learning_rate": 0.00011205929679528395, "loss": 1.5487, "step": 5911 }, { "epoch": 1.8368240075023445, "grad_norm": 0.28515625, "learning_rate": 0.00011203490517991231, "loss": 1.7964, "step": 5912 }, { "epoch": 1.8371366051891216, "grad_norm": 0.291015625, "learning_rate": 0.00011201051283797808, "loss": 1.8237, "step": 5913 }, { "epoch": 1.8374492028758986, "grad_norm": 0.3046875, "learning_rate": 0.00011198611977095388, "loss": 1.7002, "step": 5914 }, { "epoch": 1.8377618005626757, "grad_norm": 0.2890625, "learning_rate": 0.00011196172598031233, "loss": 1.6531, "step": 5915 }, { "epoch": 1.838074398249453, "grad_norm": 0.28515625, "learning_rate": 0.00011193733146752618, "loss": 1.4178, "step": 5916 }, { "epoch": 1.8383869959362302, "grad_norm": 0.26171875, "learning_rate": 0.00011191293623406809, "loss": 1.6298, "step": 5917 }, { "epoch": 1.8386995936230073, "grad_norm": 0.30078125, "learning_rate": 0.00011188854028141089, "loss": 1.5161, "step": 5918 }, { "epoch": 1.8390121913097843, "grad_norm": 0.287109375, "learning_rate": 0.00011186414361102732, "loss": 1.4156, "step": 5919 }, { "epoch": 1.8393247889965614, "grad_norm": 0.294921875, "learning_rate": 0.00011183974622439033, "loss": 1.4677, "step": 5920 }, { "epoch": 1.8396373866833384, "grad_norm": 0.294921875, "learning_rate": 0.00011181534812297272, "loss": 1.4706, "step": 5921 }, { "epoch": 1.8399499843701157, "grad_norm": 0.283203125, "learning_rate": 0.00011179094930824754, "loss": 1.5606, "step": 5922 }, { "epoch": 1.8402625820568927, "grad_norm": 0.27734375, "learning_rate": 0.00011176654978168768, "loss": 1.4768, "step": 5923 }, { "epoch": 1.84057517974367, "grad_norm": 0.29296875, "learning_rate": 0.00011174214954476625, "loss": 1.597, "step": 5924 }, { "epoch": 1.840887777430447, "grad_norm": 0.291015625, "learning_rate": 0.00011171774859895628, "loss": 1.5222, "step": 5925 }, { "epoch": 1.8412003751172241, "grad_norm": 0.271484375, "learning_rate": 0.00011169334694573088, "loss": 1.6, "step": 5926 }, { "epoch": 1.8415129728040012, "grad_norm": 0.29296875, "learning_rate": 0.00011166894458656323, "loss": 1.6754, "step": 5927 }, { "epoch": 1.8418255704907782, "grad_norm": 0.29296875, "learning_rate": 0.00011164454152292653, "loss": 1.5128, "step": 5928 }, { "epoch": 1.8421381681775555, "grad_norm": 0.29296875, "learning_rate": 0.00011162013775629401, "loss": 1.4511, "step": 5929 }, { "epoch": 1.8424507658643328, "grad_norm": 0.298828125, "learning_rate": 0.000111595733288139, "loss": 1.6394, "step": 5930 }, { "epoch": 1.8427633635511098, "grad_norm": 0.283203125, "learning_rate": 0.00011157132811993475, "loss": 1.6909, "step": 5931 }, { "epoch": 1.8430759612378869, "grad_norm": 0.279296875, "learning_rate": 0.00011154692225315471, "loss": 1.4136, "step": 5932 }, { "epoch": 1.843388558924664, "grad_norm": 0.28125, "learning_rate": 0.00011152251568927223, "loss": 1.742, "step": 5933 }, { "epoch": 1.843701156611441, "grad_norm": 0.306640625, "learning_rate": 0.00011149810842976081, "loss": 1.4884, "step": 5934 }, { "epoch": 1.8440137542982182, "grad_norm": 0.294921875, "learning_rate": 0.00011147370047609391, "loss": 1.5825, "step": 5935 }, { "epoch": 1.8443263519849953, "grad_norm": 0.271484375, "learning_rate": 0.00011144929182974515, "loss": 1.4863, "step": 5936 }, { "epoch": 1.8446389496717726, "grad_norm": 0.29296875, "learning_rate": 0.000111424882492188, "loss": 1.4343, "step": 5937 }, { "epoch": 1.8449515473585496, "grad_norm": 0.279296875, "learning_rate": 0.00011140047246489616, "loss": 1.487, "step": 5938 }, { "epoch": 1.8452641450453267, "grad_norm": 0.30078125, "learning_rate": 0.00011137606174934323, "loss": 1.6822, "step": 5939 }, { "epoch": 1.8455767427321037, "grad_norm": 0.28515625, "learning_rate": 0.00011135165034700299, "loss": 1.6089, "step": 5940 }, { "epoch": 1.8458893404188808, "grad_norm": 0.27734375, "learning_rate": 0.00011132723825934914, "loss": 1.6872, "step": 5941 }, { "epoch": 1.846201938105658, "grad_norm": 0.29296875, "learning_rate": 0.0001113028254878555, "loss": 1.5907, "step": 5942 }, { "epoch": 1.8465145357924353, "grad_norm": 0.298828125, "learning_rate": 0.00011127841203399587, "loss": 1.5709, "step": 5943 }, { "epoch": 1.8468271334792123, "grad_norm": 0.30078125, "learning_rate": 0.00011125399789924415, "loss": 1.5323, "step": 5944 }, { "epoch": 1.8471397311659894, "grad_norm": 0.27734375, "learning_rate": 0.0001112295830850742, "loss": 1.4699, "step": 5945 }, { "epoch": 1.8474523288527664, "grad_norm": 0.2890625, "learning_rate": 0.00011120516759296005, "loss": 1.8765, "step": 5946 }, { "epoch": 1.8477649265395435, "grad_norm": 0.279296875, "learning_rate": 0.0001111807514243756, "loss": 1.3026, "step": 5947 }, { "epoch": 1.8480775242263208, "grad_norm": 0.287109375, "learning_rate": 0.00011115633458079501, "loss": 1.4757, "step": 5948 }, { "epoch": 1.8483901219130978, "grad_norm": 0.279296875, "learning_rate": 0.00011113191706369224, "loss": 1.5611, "step": 5949 }, { "epoch": 1.848702719599875, "grad_norm": 0.318359375, "learning_rate": 0.00011110749887454145, "loss": 2.0363, "step": 5950 }, { "epoch": 1.8490153172866521, "grad_norm": 0.296875, "learning_rate": 0.0001110830800148168, "loss": 1.5084, "step": 5951 }, { "epoch": 1.8493279149734292, "grad_norm": 0.2890625, "learning_rate": 0.0001110586604859925, "loss": 1.335, "step": 5952 }, { "epoch": 1.8496405126602062, "grad_norm": 0.28515625, "learning_rate": 0.00011103424028954275, "loss": 1.5252, "step": 5953 }, { "epoch": 1.8499531103469833, "grad_norm": 0.294921875, "learning_rate": 0.0001110098194269419, "loss": 1.6241, "step": 5954 }, { "epoch": 1.8502657080337606, "grad_norm": 0.287109375, "learning_rate": 0.00011098539789966418, "loss": 1.674, "step": 5955 }, { "epoch": 1.8505783057205378, "grad_norm": 0.283203125, "learning_rate": 0.000110960975709184, "loss": 1.265, "step": 5956 }, { "epoch": 1.8508909034073149, "grad_norm": 0.271484375, "learning_rate": 0.00011093655285697573, "loss": 1.4216, "step": 5957 }, { "epoch": 1.851203501094092, "grad_norm": 0.294921875, "learning_rate": 0.00011091212934451387, "loss": 1.6109, "step": 5958 }, { "epoch": 1.851516098780869, "grad_norm": 0.283203125, "learning_rate": 0.00011088770517327283, "loss": 1.5298, "step": 5959 }, { "epoch": 1.851828696467646, "grad_norm": 0.259765625, "learning_rate": 0.00011086328034472717, "loss": 1.6639, "step": 5960 }, { "epoch": 1.8521412941544233, "grad_norm": 0.294921875, "learning_rate": 0.0001108388548603514, "loss": 1.5286, "step": 5961 }, { "epoch": 1.8524538918412004, "grad_norm": 0.29296875, "learning_rate": 0.00011081442872162018, "loss": 1.6964, "step": 5962 }, { "epoch": 1.8527664895279776, "grad_norm": 0.26953125, "learning_rate": 0.00011079000193000808, "loss": 1.3428, "step": 5963 }, { "epoch": 1.8530790872147547, "grad_norm": 0.287109375, "learning_rate": 0.00011076557448698985, "loss": 1.4291, "step": 5964 }, { "epoch": 1.8533916849015317, "grad_norm": 0.279296875, "learning_rate": 0.00011074114639404016, "loss": 1.6868, "step": 5965 }, { "epoch": 1.8537042825883088, "grad_norm": 0.279296875, "learning_rate": 0.0001107167176526338, "loss": 1.5273, "step": 5966 }, { "epoch": 1.8540168802750858, "grad_norm": 0.271484375, "learning_rate": 0.00011069228826424548, "loss": 1.2799, "step": 5967 }, { "epoch": 1.854329477961863, "grad_norm": 0.27734375, "learning_rate": 0.00011066785823035015, "loss": 1.8009, "step": 5968 }, { "epoch": 1.8546420756486404, "grad_norm": 0.35546875, "learning_rate": 0.00011064342755242259, "loss": 2.1242, "step": 5969 }, { "epoch": 1.8549546733354174, "grad_norm": 0.27734375, "learning_rate": 0.00011061899623193774, "loss": 1.8559, "step": 5970 }, { "epoch": 1.8552672710221945, "grad_norm": 0.2890625, "learning_rate": 0.00011059456427037059, "loss": 1.5747, "step": 5971 }, { "epoch": 1.8555798687089715, "grad_norm": 0.3203125, "learning_rate": 0.00011057013166919607, "loss": 1.7598, "step": 5972 }, { "epoch": 1.8558924663957486, "grad_norm": 0.28515625, "learning_rate": 0.00011054569842988925, "loss": 1.7396, "step": 5973 }, { "epoch": 1.8562050640825258, "grad_norm": 0.28125, "learning_rate": 0.00011052126455392515, "loss": 1.4872, "step": 5974 }, { "epoch": 1.8565176617693029, "grad_norm": 0.287109375, "learning_rate": 0.00011049683004277892, "loss": 1.3617, "step": 5975 }, { "epoch": 1.8568302594560802, "grad_norm": 0.28125, "learning_rate": 0.00011047239489792567, "loss": 1.3131, "step": 5976 }, { "epoch": 1.8571428571428572, "grad_norm": 0.296875, "learning_rate": 0.00011044795912084063, "loss": 1.7335, "step": 5977 }, { "epoch": 1.8574554548296343, "grad_norm": 0.314453125, "learning_rate": 0.00011042352271299897, "loss": 1.5964, "step": 5978 }, { "epoch": 1.8577680525164113, "grad_norm": 0.291015625, "learning_rate": 0.00011039908567587591, "loss": 1.5923, "step": 5979 }, { "epoch": 1.8580806502031884, "grad_norm": 0.2890625, "learning_rate": 0.00011037464801094686, "loss": 1.4119, "step": 5980 }, { "epoch": 1.8583932478899656, "grad_norm": 0.28515625, "learning_rate": 0.00011035020971968704, "loss": 1.6322, "step": 5981 }, { "epoch": 1.8587058455767427, "grad_norm": 0.287109375, "learning_rate": 0.00011032577080357189, "loss": 1.7092, "step": 5982 }, { "epoch": 1.85901844326352, "grad_norm": 0.30078125, "learning_rate": 0.00011030133126407682, "loss": 1.6851, "step": 5983 }, { "epoch": 1.859331040950297, "grad_norm": 0.37109375, "learning_rate": 0.00011027689110267724, "loss": 2.1295, "step": 5984 }, { "epoch": 1.859643638637074, "grad_norm": 0.30078125, "learning_rate": 0.00011025245032084863, "loss": 1.6702, "step": 5985 }, { "epoch": 1.859956236323851, "grad_norm": 0.29296875, "learning_rate": 0.00011022800892006655, "loss": 1.544, "step": 5986 }, { "epoch": 1.8602688340106284, "grad_norm": 0.2890625, "learning_rate": 0.00011020356690180653, "loss": 1.38, "step": 5987 }, { "epoch": 1.8605814316974054, "grad_norm": 0.2734375, "learning_rate": 0.00011017912426754417, "loss": 1.6264, "step": 5988 }, { "epoch": 1.8608940293841827, "grad_norm": 0.28515625, "learning_rate": 0.00011015468101875512, "loss": 1.4654, "step": 5989 }, { "epoch": 1.8612066270709597, "grad_norm": 0.29296875, "learning_rate": 0.00011013023715691504, "loss": 1.5018, "step": 5990 }, { "epoch": 1.8615192247577368, "grad_norm": 0.296875, "learning_rate": 0.00011010579268349961, "loss": 1.4131, "step": 5991 }, { "epoch": 1.8618318224445138, "grad_norm": 0.2734375, "learning_rate": 0.00011008134759998464, "loss": 1.6969, "step": 5992 }, { "epoch": 1.862144420131291, "grad_norm": 0.2890625, "learning_rate": 0.00011005690190784582, "loss": 1.7404, "step": 5993 }, { "epoch": 1.8624570178180682, "grad_norm": 0.296875, "learning_rate": 0.00011003245560855901, "loss": 1.2808, "step": 5994 }, { "epoch": 1.8627696155048452, "grad_norm": 0.29296875, "learning_rate": 0.00011000800870360013, "loss": 1.7481, "step": 5995 }, { "epoch": 1.8630822131916225, "grad_norm": 0.30078125, "learning_rate": 0.00010998356119444497, "loss": 1.3703, "step": 5996 }, { "epoch": 1.8633948108783995, "grad_norm": 0.283203125, "learning_rate": 0.0001099591130825695, "loss": 1.651, "step": 5997 }, { "epoch": 1.8637074085651766, "grad_norm": 0.2890625, "learning_rate": 0.00010993466436944968, "loss": 1.6761, "step": 5998 }, { "epoch": 1.8640200062519536, "grad_norm": 0.296875, "learning_rate": 0.00010991021505656152, "loss": 1.4074, "step": 5999 }, { "epoch": 1.864332603938731, "grad_norm": 0.28515625, "learning_rate": 0.00010988576514538105, "loss": 1.3369, "step": 6000 }, { "epoch": 1.864645201625508, "grad_norm": 0.28125, "learning_rate": 0.00010986131463738434, "loss": 1.3919, "step": 6001 }, { "epoch": 1.8649577993122852, "grad_norm": 0.27734375, "learning_rate": 0.0001098368635340475, "loss": 1.6825, "step": 6002 }, { "epoch": 1.8652703969990623, "grad_norm": 0.2890625, "learning_rate": 0.00010981241183684667, "loss": 1.5925, "step": 6003 }, { "epoch": 1.8655829946858393, "grad_norm": 0.27734375, "learning_rate": 0.00010978795954725804, "loss": 1.5749, "step": 6004 }, { "epoch": 1.8658955923726164, "grad_norm": 0.28515625, "learning_rate": 0.00010976350666675781, "loss": 1.7939, "step": 6005 }, { "epoch": 1.8662081900593934, "grad_norm": 0.287109375, "learning_rate": 0.00010973905319682223, "loss": 1.806, "step": 6006 }, { "epoch": 1.8665207877461707, "grad_norm": 0.279296875, "learning_rate": 0.00010971459913892763, "loss": 1.5721, "step": 6007 }, { "epoch": 1.8668333854329477, "grad_norm": 0.283203125, "learning_rate": 0.00010969014449455026, "loss": 1.5858, "step": 6008 }, { "epoch": 1.867145983119725, "grad_norm": 0.3046875, "learning_rate": 0.00010966568926516656, "loss": 1.7618, "step": 6009 }, { "epoch": 1.867458580806502, "grad_norm": 0.28125, "learning_rate": 0.00010964123345225285, "loss": 1.4188, "step": 6010 }, { "epoch": 1.8677711784932791, "grad_norm": 0.28125, "learning_rate": 0.00010961677705728561, "loss": 1.5006, "step": 6011 }, { "epoch": 1.8680837761800562, "grad_norm": 0.294921875, "learning_rate": 0.00010959232008174127, "loss": 1.554, "step": 6012 }, { "epoch": 1.8683963738668334, "grad_norm": 0.291015625, "learning_rate": 0.00010956786252709637, "loss": 1.3672, "step": 6013 }, { "epoch": 1.8687089715536105, "grad_norm": 0.271484375, "learning_rate": 0.00010954340439482738, "loss": 1.2607, "step": 6014 }, { "epoch": 1.8690215692403878, "grad_norm": 0.28515625, "learning_rate": 0.00010951894568641093, "loss": 1.4708, "step": 6015 }, { "epoch": 1.8693341669271648, "grad_norm": 0.2890625, "learning_rate": 0.0001094944864033236, "loss": 1.6332, "step": 6016 }, { "epoch": 1.8696467646139419, "grad_norm": 0.283203125, "learning_rate": 0.00010947002654704201, "loss": 1.5205, "step": 6017 }, { "epoch": 1.869959362300719, "grad_norm": 0.296875, "learning_rate": 0.00010944556611904286, "loss": 1.5209, "step": 6018 }, { "epoch": 1.870271959987496, "grad_norm": 0.298828125, "learning_rate": 0.00010942110512080286, "loss": 1.4901, "step": 6019 }, { "epoch": 1.8705845576742732, "grad_norm": 0.28125, "learning_rate": 0.0001093966435537987, "loss": 1.582, "step": 6020 }, { "epoch": 1.8708971553610503, "grad_norm": 0.283203125, "learning_rate": 0.00010937218141950722, "loss": 1.868, "step": 6021 }, { "epoch": 1.8712097530478276, "grad_norm": 0.3046875, "learning_rate": 0.00010934771871940521, "loss": 1.5889, "step": 6022 }, { "epoch": 1.8715223507346046, "grad_norm": 0.28125, "learning_rate": 0.00010932325545496948, "loss": 1.6362, "step": 6023 }, { "epoch": 1.8718349484213817, "grad_norm": 0.296875, "learning_rate": 0.00010929879162767698, "loss": 1.4245, "step": 6024 }, { "epoch": 1.8721475461081587, "grad_norm": 0.28125, "learning_rate": 0.00010927432723900457, "loss": 1.3501, "step": 6025 }, { "epoch": 1.872460143794936, "grad_norm": 0.283203125, "learning_rate": 0.00010924986229042917, "loss": 1.6494, "step": 6026 }, { "epoch": 1.872772741481713, "grad_norm": 0.287109375, "learning_rate": 0.00010922539678342785, "loss": 1.6763, "step": 6027 }, { "epoch": 1.8730853391684903, "grad_norm": 0.271484375, "learning_rate": 0.00010920093071947755, "loss": 1.3311, "step": 6028 }, { "epoch": 1.8733979368552673, "grad_norm": 0.283203125, "learning_rate": 0.00010917646410005532, "loss": 1.4565, "step": 6029 }, { "epoch": 1.8737105345420444, "grad_norm": 0.28515625, "learning_rate": 0.00010915199692663827, "loss": 1.4469, "step": 6030 }, { "epoch": 1.8740231322288214, "grad_norm": 0.265625, "learning_rate": 0.00010912752920070351, "loss": 1.3675, "step": 6031 }, { "epoch": 1.8743357299155985, "grad_norm": 0.283203125, "learning_rate": 0.00010910306092372815, "loss": 1.7303, "step": 6032 }, { "epoch": 1.8746483276023758, "grad_norm": 0.275390625, "learning_rate": 0.00010907859209718942, "loss": 1.3998, "step": 6033 }, { "epoch": 1.8749609252891528, "grad_norm": 0.291015625, "learning_rate": 0.00010905412272256452, "loss": 1.4973, "step": 6034 }, { "epoch": 1.87527352297593, "grad_norm": 0.279296875, "learning_rate": 0.00010902965280133068, "loss": 1.5443, "step": 6035 }, { "epoch": 1.8755861206627071, "grad_norm": 0.28125, "learning_rate": 0.00010900518233496522, "loss": 1.582, "step": 6036 }, { "epoch": 1.8758987183494842, "grad_norm": 0.29296875, "learning_rate": 0.00010898071132494543, "loss": 1.5631, "step": 6037 }, { "epoch": 1.8762113160362612, "grad_norm": 0.2734375, "learning_rate": 0.00010895623977274863, "loss": 1.2673, "step": 6038 }, { "epoch": 1.8765239137230383, "grad_norm": 0.294921875, "learning_rate": 0.00010893176767985223, "loss": 1.6438, "step": 6039 }, { "epoch": 1.8768365114098156, "grad_norm": 0.283203125, "learning_rate": 0.0001089072950477336, "loss": 1.4649, "step": 6040 }, { "epoch": 1.8771491090965928, "grad_norm": 0.287109375, "learning_rate": 0.00010888282187787026, "loss": 1.6903, "step": 6041 }, { "epoch": 1.8774617067833699, "grad_norm": 0.28125, "learning_rate": 0.00010885834817173966, "loss": 1.4262, "step": 6042 }, { "epoch": 1.877774304470147, "grad_norm": 0.279296875, "learning_rate": 0.00010883387393081928, "loss": 1.4242, "step": 6043 }, { "epoch": 1.878086902156924, "grad_norm": 0.291015625, "learning_rate": 0.00010880939915658663, "loss": 1.3279, "step": 6044 }, { "epoch": 1.878399499843701, "grad_norm": 0.3046875, "learning_rate": 0.00010878492385051937, "loss": 1.785, "step": 6045 }, { "epoch": 1.8787120975304783, "grad_norm": 0.294921875, "learning_rate": 0.00010876044801409502, "loss": 1.7062, "step": 6046 }, { "epoch": 1.8790246952172553, "grad_norm": 0.28515625, "learning_rate": 0.0001087359716487913, "loss": 1.3882, "step": 6047 }, { "epoch": 1.8793372929040326, "grad_norm": 0.279296875, "learning_rate": 0.00010871149475608584, "loss": 1.3782, "step": 6048 }, { "epoch": 1.8796498905908097, "grad_norm": 0.28125, "learning_rate": 0.00010868701733745634, "loss": 1.8456, "step": 6049 }, { "epoch": 1.8799624882775867, "grad_norm": 0.298828125, "learning_rate": 0.00010866253939438049, "loss": 1.6852, "step": 6050 }, { "epoch": 1.8802750859643638, "grad_norm": 0.298828125, "learning_rate": 0.00010863806092833615, "loss": 1.7426, "step": 6051 }, { "epoch": 1.8805876836511408, "grad_norm": 0.287109375, "learning_rate": 0.00010861358194080102, "loss": 1.3551, "step": 6052 }, { "epoch": 1.880900281337918, "grad_norm": 0.283203125, "learning_rate": 0.00010858910243325301, "loss": 1.8453, "step": 6053 }, { "epoch": 1.8812128790246954, "grad_norm": 0.275390625, "learning_rate": 0.00010856462240716994, "loss": 1.5246, "step": 6054 }, { "epoch": 1.8815254767114724, "grad_norm": 0.291015625, "learning_rate": 0.0001085401418640297, "loss": 1.7768, "step": 6055 }, { "epoch": 1.8818380743982495, "grad_norm": 0.27734375, "learning_rate": 0.00010851566080531017, "loss": 1.4684, "step": 6056 }, { "epoch": 1.8821506720850265, "grad_norm": 0.3203125, "learning_rate": 0.0001084911792324894, "loss": 1.6255, "step": 6057 }, { "epoch": 1.8824632697718036, "grad_norm": 0.291015625, "learning_rate": 0.00010846669714704527, "loss": 1.5263, "step": 6058 }, { "epoch": 1.8827758674585808, "grad_norm": 0.3046875, "learning_rate": 0.00010844221455045591, "loss": 1.6249, "step": 6059 }, { "epoch": 1.8830884651453579, "grad_norm": 0.2890625, "learning_rate": 0.00010841773144419924, "loss": 1.6932, "step": 6060 }, { "epoch": 1.8834010628321352, "grad_norm": 0.302734375, "learning_rate": 0.00010839324782975341, "loss": 1.5831, "step": 6061 }, { "epoch": 1.8837136605189122, "grad_norm": 0.28515625, "learning_rate": 0.00010836876370859652, "loss": 1.747, "step": 6062 }, { "epoch": 1.8840262582056893, "grad_norm": 0.271484375, "learning_rate": 0.0001083442790822067, "loss": 1.7763, "step": 6063 }, { "epoch": 1.8843388558924663, "grad_norm": 0.28515625, "learning_rate": 0.00010831979395206211, "loss": 1.7771, "step": 6064 }, { "epoch": 1.8846514535792434, "grad_norm": 0.27734375, "learning_rate": 0.00010829530831964098, "loss": 1.6032, "step": 6065 }, { "epoch": 1.8849640512660206, "grad_norm": 0.30859375, "learning_rate": 0.00010827082218642149, "loss": 1.6424, "step": 6066 }, { "epoch": 1.885276648952798, "grad_norm": 0.28125, "learning_rate": 0.00010824633555388196, "loss": 1.6392, "step": 6067 }, { "epoch": 1.885589246639575, "grad_norm": 0.291015625, "learning_rate": 0.00010822184842350058, "loss": 1.2719, "step": 6068 }, { "epoch": 1.885901844326352, "grad_norm": 0.287109375, "learning_rate": 0.00010819736079675577, "loss": 1.6951, "step": 6069 }, { "epoch": 1.886214442013129, "grad_norm": 0.287109375, "learning_rate": 0.00010817287267512584, "loss": 1.5512, "step": 6070 }, { "epoch": 1.886527039699906, "grad_norm": 0.28515625, "learning_rate": 0.0001081483840600892, "loss": 1.4913, "step": 6071 }, { "epoch": 1.8868396373866834, "grad_norm": 0.279296875, "learning_rate": 0.0001081238949531242, "loss": 1.4639, "step": 6072 }, { "epoch": 1.8871522350734604, "grad_norm": 0.306640625, "learning_rate": 0.00010809940535570932, "loss": 1.6983, "step": 6073 }, { "epoch": 1.8874648327602377, "grad_norm": 0.294921875, "learning_rate": 0.000108074915269323, "loss": 1.7414, "step": 6074 }, { "epoch": 1.8877774304470147, "grad_norm": 0.306640625, "learning_rate": 0.0001080504246954438, "loss": 1.6636, "step": 6075 }, { "epoch": 1.8880900281337918, "grad_norm": 0.298828125, "learning_rate": 0.00010802593363555013, "loss": 1.6001, "step": 6076 }, { "epoch": 1.8884026258205688, "grad_norm": 0.29296875, "learning_rate": 0.00010800144209112071, "loss": 1.49, "step": 6077 }, { "epoch": 1.8887152235073459, "grad_norm": 0.27734375, "learning_rate": 0.00010797695006363398, "loss": 1.5432, "step": 6078 }, { "epoch": 1.8890278211941232, "grad_norm": 0.287109375, "learning_rate": 0.00010795245755456861, "loss": 1.5761, "step": 6079 }, { "epoch": 1.8893404188809004, "grad_norm": 0.28515625, "learning_rate": 0.00010792796456540324, "loss": 1.4362, "step": 6080 }, { "epoch": 1.8896530165676775, "grad_norm": 0.2890625, "learning_rate": 0.00010790347109761656, "loss": 1.4679, "step": 6081 }, { "epoch": 1.8899656142544545, "grad_norm": 0.28125, "learning_rate": 0.00010787897715268724, "loss": 1.4464, "step": 6082 }, { "epoch": 1.8902782119412316, "grad_norm": 0.28125, "learning_rate": 0.00010785448273209406, "loss": 1.5998, "step": 6083 }, { "epoch": 1.8905908096280086, "grad_norm": 0.28515625, "learning_rate": 0.00010782998783731573, "loss": 1.6596, "step": 6084 }, { "epoch": 1.890903407314786, "grad_norm": 0.2734375, "learning_rate": 0.00010780549246983107, "loss": 1.7341, "step": 6085 }, { "epoch": 1.891216005001563, "grad_norm": 0.283203125, "learning_rate": 0.00010778099663111885, "loss": 1.4339, "step": 6086 }, { "epoch": 1.8915286026883402, "grad_norm": 0.283203125, "learning_rate": 0.00010775650032265798, "loss": 1.5452, "step": 6087 }, { "epoch": 1.8918412003751173, "grad_norm": 0.283203125, "learning_rate": 0.00010773200354592727, "loss": 1.4955, "step": 6088 }, { "epoch": 1.8921537980618943, "grad_norm": 0.302734375, "learning_rate": 0.0001077075063024057, "loss": 1.4702, "step": 6089 }, { "epoch": 1.8924663957486714, "grad_norm": 0.2890625, "learning_rate": 0.00010768300859357212, "loss": 1.3393, "step": 6090 }, { "epoch": 1.8927789934354484, "grad_norm": 0.28125, "learning_rate": 0.00010765851042090554, "loss": 1.7447, "step": 6091 }, { "epoch": 1.8930915911222257, "grad_norm": 0.28515625, "learning_rate": 0.00010763401178588488, "loss": 1.6784, "step": 6092 }, { "epoch": 1.893404188809003, "grad_norm": 0.287109375, "learning_rate": 0.00010760951268998925, "loss": 1.6135, "step": 6093 }, { "epoch": 1.89371678649578, "grad_norm": 0.287109375, "learning_rate": 0.0001075850131346976, "loss": 1.5082, "step": 6094 }, { "epoch": 1.894029384182557, "grad_norm": 0.279296875, "learning_rate": 0.0001075605131214891, "loss": 1.462, "step": 6095 }, { "epoch": 1.8943419818693341, "grad_norm": 0.287109375, "learning_rate": 0.00010753601265184274, "loss": 1.396, "step": 6096 }, { "epoch": 1.8946545795561112, "grad_norm": 0.30078125, "learning_rate": 0.00010751151172723773, "loss": 1.6575, "step": 6097 }, { "epoch": 1.8949671772428884, "grad_norm": 0.29296875, "learning_rate": 0.00010748701034915314, "loss": 1.7544, "step": 6098 }, { "epoch": 1.8952797749296655, "grad_norm": 0.29296875, "learning_rate": 0.00010746250851906823, "loss": 1.3533, "step": 6099 }, { "epoch": 1.8955923726164428, "grad_norm": 0.2890625, "learning_rate": 0.00010743800623846214, "loss": 1.9792, "step": 6100 }, { "epoch": 1.8959049703032198, "grad_norm": 0.291015625, "learning_rate": 0.00010741350350881419, "loss": 1.4636, "step": 6101 }, { "epoch": 1.8962175679899969, "grad_norm": 0.287109375, "learning_rate": 0.00010738900033160353, "loss": 1.6061, "step": 6102 }, { "epoch": 1.896530165676774, "grad_norm": 0.298828125, "learning_rate": 0.00010736449670830953, "loss": 1.7217, "step": 6103 }, { "epoch": 1.896842763363551, "grad_norm": 0.279296875, "learning_rate": 0.00010733999264041146, "loss": 1.708, "step": 6104 }, { "epoch": 1.8971553610503282, "grad_norm": 0.267578125, "learning_rate": 0.0001073154881293887, "loss": 1.5846, "step": 6105 }, { "epoch": 1.8974679587371055, "grad_norm": 0.2890625, "learning_rate": 0.00010729098317672059, "loss": 1.5484, "step": 6106 }, { "epoch": 1.8977805564238825, "grad_norm": 0.287109375, "learning_rate": 0.00010726647778388654, "loss": 1.4086, "step": 6107 }, { "epoch": 1.8980931541106596, "grad_norm": 0.283203125, "learning_rate": 0.00010724197195236596, "loss": 1.7811, "step": 6108 }, { "epoch": 1.8984057517974366, "grad_norm": 0.287109375, "learning_rate": 0.00010721746568363831, "loss": 1.2625, "step": 6109 }, { "epoch": 1.8987183494842137, "grad_norm": 0.27734375, "learning_rate": 0.00010719295897918305, "loss": 1.8101, "step": 6110 }, { "epoch": 1.899030947170991, "grad_norm": 0.29296875, "learning_rate": 0.0001071684518404797, "loss": 1.4197, "step": 6111 }, { "epoch": 1.899343544857768, "grad_norm": 0.28125, "learning_rate": 0.00010714394426900778, "loss": 1.3144, "step": 6112 }, { "epoch": 1.8996561425445453, "grad_norm": 0.294921875, "learning_rate": 0.00010711943626624686, "loss": 1.5552, "step": 6113 }, { "epoch": 1.8999687402313223, "grad_norm": 0.294921875, "learning_rate": 0.00010709492783367645, "loss": 1.5563, "step": 6114 }, { "epoch": 1.9002813379180994, "grad_norm": 0.28515625, "learning_rate": 0.00010707041897277623, "loss": 1.6451, "step": 6115 }, { "epoch": 1.9005939356048764, "grad_norm": 0.294921875, "learning_rate": 0.00010704590968502581, "loss": 1.5293, "step": 6116 }, { "epoch": 1.9009065332916535, "grad_norm": 0.29296875, "learning_rate": 0.00010702139997190483, "loss": 1.4838, "step": 6117 }, { "epoch": 1.9012191309784308, "grad_norm": 0.2734375, "learning_rate": 0.00010699688983489302, "loss": 1.5037, "step": 6118 }, { "epoch": 1.901531728665208, "grad_norm": 0.2890625, "learning_rate": 0.00010697237927547003, "loss": 1.3853, "step": 6119 }, { "epoch": 1.901844326351985, "grad_norm": 0.27734375, "learning_rate": 0.00010694786829511562, "loss": 1.3902, "step": 6120 }, { "epoch": 1.9021569240387621, "grad_norm": 0.30859375, "learning_rate": 0.00010692335689530955, "loss": 1.5142, "step": 6121 }, { "epoch": 1.9024695217255392, "grad_norm": 0.30078125, "learning_rate": 0.00010689884507753159, "loss": 1.6241, "step": 6122 }, { "epoch": 1.9027821194123162, "grad_norm": 0.294921875, "learning_rate": 0.00010687433284326159, "loss": 1.451, "step": 6123 }, { "epoch": 1.9030947170990935, "grad_norm": 0.2890625, "learning_rate": 0.00010684982019397934, "loss": 1.3795, "step": 6124 }, { "epoch": 1.9034073147858706, "grad_norm": 0.28515625, "learning_rate": 0.00010682530713116472, "loss": 1.4519, "step": 6125 }, { "epoch": 1.9037199124726478, "grad_norm": 0.2734375, "learning_rate": 0.00010680079365629758, "loss": 1.6463, "step": 6126 }, { "epoch": 1.9040325101594249, "grad_norm": 0.283203125, "learning_rate": 0.00010677627977085788, "loss": 1.305, "step": 6127 }, { "epoch": 1.904345107846202, "grad_norm": 0.2890625, "learning_rate": 0.00010675176547632555, "loss": 1.6685, "step": 6128 }, { "epoch": 1.904657705532979, "grad_norm": 0.2890625, "learning_rate": 0.00010672725077418054, "loss": 1.8257, "step": 6129 }, { "epoch": 1.904970303219756, "grad_norm": 0.287109375, "learning_rate": 0.00010670273566590281, "loss": 1.4169, "step": 6130 }, { "epoch": 1.9052829009065333, "grad_norm": 0.28515625, "learning_rate": 0.0001066782201529724, "loss": 1.7248, "step": 6131 }, { "epoch": 1.9055954985933106, "grad_norm": 0.28125, "learning_rate": 0.00010665370423686931, "loss": 1.6358, "step": 6132 }, { "epoch": 1.9059080962800876, "grad_norm": 0.27734375, "learning_rate": 0.00010662918791907364, "loss": 1.3639, "step": 6133 }, { "epoch": 1.9062206939668647, "grad_norm": 0.298828125, "learning_rate": 0.00010660467120106541, "loss": 1.4551, "step": 6134 }, { "epoch": 1.9065332916536417, "grad_norm": 0.283203125, "learning_rate": 0.00010658015408432478, "loss": 1.7171, "step": 6135 }, { "epoch": 1.9068458893404188, "grad_norm": 0.287109375, "learning_rate": 0.00010655563657033187, "loss": 1.6136, "step": 6136 }, { "epoch": 1.907158487027196, "grad_norm": 0.3046875, "learning_rate": 0.00010653111866056685, "loss": 1.5219, "step": 6137 }, { "epoch": 1.907471084713973, "grad_norm": 0.30078125, "learning_rate": 0.00010650660035650984, "loss": 1.3852, "step": 6138 }, { "epoch": 1.9077836824007504, "grad_norm": 0.283203125, "learning_rate": 0.00010648208165964109, "loss": 1.6926, "step": 6139 }, { "epoch": 1.9080962800875274, "grad_norm": 0.275390625, "learning_rate": 0.0001064575625714408, "loss": 1.4854, "step": 6140 }, { "epoch": 1.9084088777743045, "grad_norm": 0.294921875, "learning_rate": 0.00010643304309338921, "loss": 1.4488, "step": 6141 }, { "epoch": 1.9087214754610815, "grad_norm": 0.2890625, "learning_rate": 0.00010640852322696666, "loss": 1.7811, "step": 6142 }, { "epoch": 1.9090340731478586, "grad_norm": 0.27734375, "learning_rate": 0.00010638400297365336, "loss": 1.506, "step": 6143 }, { "epoch": 1.9093466708346358, "grad_norm": 0.357421875, "learning_rate": 0.00010635948233492968, "loss": 1.994, "step": 6144 }, { "epoch": 1.909659268521413, "grad_norm": 0.28125, "learning_rate": 0.00010633496131227593, "loss": 1.4202, "step": 6145 }, { "epoch": 1.9099718662081901, "grad_norm": 0.283203125, "learning_rate": 0.00010631043990717251, "loss": 1.2593, "step": 6146 }, { "epoch": 1.9102844638949672, "grad_norm": 0.29296875, "learning_rate": 0.00010628591812109978, "loss": 1.5465, "step": 6147 }, { "epoch": 1.9105970615817442, "grad_norm": 0.291015625, "learning_rate": 0.00010626139595553819, "loss": 1.4455, "step": 6148 }, { "epoch": 1.9109096592685213, "grad_norm": 0.28515625, "learning_rate": 0.00010623687341196813, "loss": 1.5223, "step": 6149 }, { "epoch": 1.9112222569552986, "grad_norm": 0.28515625, "learning_rate": 0.00010621235049187006, "loss": 1.5872, "step": 6150 }, { "epoch": 1.9115348546420756, "grad_norm": 0.283203125, "learning_rate": 0.0001061878271967245, "loss": 1.5521, "step": 6151 }, { "epoch": 1.911847452328853, "grad_norm": 0.275390625, "learning_rate": 0.0001061633035280119, "loss": 1.3491, "step": 6152 }, { "epoch": 1.91216005001563, "grad_norm": 0.296875, "learning_rate": 0.00010613877948721282, "loss": 1.84, "step": 6153 }, { "epoch": 1.912472647702407, "grad_norm": 0.283203125, "learning_rate": 0.00010611425507580781, "loss": 1.7756, "step": 6154 }, { "epoch": 1.912785245389184, "grad_norm": 0.28125, "learning_rate": 0.00010608973029527742, "loss": 1.4592, "step": 6155 }, { "epoch": 1.913097843075961, "grad_norm": 0.271484375, "learning_rate": 0.00010606520514710225, "loss": 1.4681, "step": 6156 }, { "epoch": 1.9134104407627384, "grad_norm": 0.28515625, "learning_rate": 0.00010604067963276294, "loss": 1.2755, "step": 6157 }, { "epoch": 1.9137230384495156, "grad_norm": 0.271484375, "learning_rate": 0.00010601615375374006, "loss": 1.4351, "step": 6158 }, { "epoch": 1.9140356361362927, "grad_norm": 0.271484375, "learning_rate": 0.00010599162751151437, "loss": 1.3687, "step": 6159 }, { "epoch": 1.9143482338230697, "grad_norm": 0.310546875, "learning_rate": 0.00010596710090756643, "loss": 1.8149, "step": 6160 }, { "epoch": 1.9146608315098468, "grad_norm": 0.279296875, "learning_rate": 0.00010594257394337706, "loss": 1.5358, "step": 6161 }, { "epoch": 1.9149734291966238, "grad_norm": 0.296875, "learning_rate": 0.00010591804662042688, "loss": 1.851, "step": 6162 }, { "epoch": 1.915286026883401, "grad_norm": 0.29296875, "learning_rate": 0.0001058935189401967, "loss": 1.5856, "step": 6163 }, { "epoch": 1.9155986245701782, "grad_norm": 0.298828125, "learning_rate": 0.00010586899090416727, "loss": 1.681, "step": 6164 }, { "epoch": 1.9159112222569554, "grad_norm": 0.283203125, "learning_rate": 0.00010584446251381941, "loss": 1.5954, "step": 6165 }, { "epoch": 1.9162238199437325, "grad_norm": 0.291015625, "learning_rate": 0.00010581993377063387, "loss": 1.2033, "step": 6166 }, { "epoch": 1.9165364176305095, "grad_norm": 0.2890625, "learning_rate": 0.00010579540467609153, "loss": 1.549, "step": 6167 }, { "epoch": 1.9168490153172866, "grad_norm": 0.26953125, "learning_rate": 0.0001057708752316732, "loss": 1.6916, "step": 6168 }, { "epoch": 1.9171616130040636, "grad_norm": 0.27734375, "learning_rate": 0.0001057463454388598, "loss": 1.5429, "step": 6169 }, { "epoch": 1.917474210690841, "grad_norm": 0.28515625, "learning_rate": 0.00010572181529913216, "loss": 1.7526, "step": 6170 }, { "epoch": 1.9177868083776182, "grad_norm": 0.28515625, "learning_rate": 0.00010569728481397132, "loss": 1.5457, "step": 6171 }, { "epoch": 1.9180994060643952, "grad_norm": 0.29296875, "learning_rate": 0.00010567275398485807, "loss": 2.0223, "step": 6172 }, { "epoch": 1.9184120037511723, "grad_norm": 0.2890625, "learning_rate": 0.00010564822281327346, "loss": 1.2732, "step": 6173 }, { "epoch": 1.9187246014379493, "grad_norm": 0.3046875, "learning_rate": 0.00010562369130069842, "loss": 1.4948, "step": 6174 }, { "epoch": 1.9190371991247264, "grad_norm": 0.27734375, "learning_rate": 0.00010559915944861398, "loss": 1.5727, "step": 6175 }, { "epoch": 1.9193497968115036, "grad_norm": 0.28515625, "learning_rate": 0.00010557462725850114, "loss": 1.6334, "step": 6176 }, { "epoch": 1.9196623944982807, "grad_norm": 0.29296875, "learning_rate": 0.00010555009473184096, "loss": 1.3827, "step": 6177 }, { "epoch": 1.919974992185058, "grad_norm": 0.287109375, "learning_rate": 0.00010552556187011451, "loss": 1.3521, "step": 6178 }, { "epoch": 1.920287589871835, "grad_norm": 0.296875, "learning_rate": 0.00010550102867480283, "loss": 1.5802, "step": 6179 }, { "epoch": 1.920600187558612, "grad_norm": 0.27734375, "learning_rate": 0.00010547649514738702, "loss": 1.5329, "step": 6180 }, { "epoch": 1.920912785245389, "grad_norm": 0.279296875, "learning_rate": 0.00010545196128934824, "loss": 1.7083, "step": 6181 }, { "epoch": 1.9212253829321662, "grad_norm": 0.28515625, "learning_rate": 0.0001054274271021676, "loss": 1.4001, "step": 6182 }, { "epoch": 1.9215379806189434, "grad_norm": 0.287109375, "learning_rate": 0.00010540289258732627, "loss": 1.3963, "step": 6183 }, { "epoch": 1.9218505783057207, "grad_norm": 0.298828125, "learning_rate": 0.00010537835774630547, "loss": 1.6051, "step": 6184 }, { "epoch": 1.9221631759924978, "grad_norm": 0.294921875, "learning_rate": 0.00010535382258058632, "loss": 1.5225, "step": 6185 }, { "epoch": 1.9224757736792748, "grad_norm": 0.296875, "learning_rate": 0.00010532928709165006, "loss": 1.3148, "step": 6186 }, { "epoch": 1.9227883713660519, "grad_norm": 0.2734375, "learning_rate": 0.00010530475128097799, "loss": 1.4773, "step": 6187 }, { "epoch": 1.923100969052829, "grad_norm": 0.28515625, "learning_rate": 0.0001052802151500513, "loss": 1.4304, "step": 6188 }, { "epoch": 1.9234135667396062, "grad_norm": 0.29296875, "learning_rate": 0.00010525567870035129, "loss": 1.6024, "step": 6189 }, { "epoch": 1.9237261644263832, "grad_norm": 0.279296875, "learning_rate": 0.00010523114193335928, "loss": 1.57, "step": 6190 }, { "epoch": 1.9240387621131605, "grad_norm": 0.275390625, "learning_rate": 0.00010520660485055656, "loss": 1.622, "step": 6191 }, { "epoch": 1.9243513597999375, "grad_norm": 0.294921875, "learning_rate": 0.00010518206745342445, "loss": 1.465, "step": 6192 }, { "epoch": 1.9246639574867146, "grad_norm": 0.27734375, "learning_rate": 0.00010515752974344432, "loss": 1.4871, "step": 6193 }, { "epoch": 1.9249765551734916, "grad_norm": 0.302734375, "learning_rate": 0.00010513299172209756, "loss": 1.5585, "step": 6194 }, { "epoch": 1.9252891528602687, "grad_norm": 0.30859375, "learning_rate": 0.00010510845339086557, "loss": 1.7355, "step": 6195 }, { "epoch": 1.925601750547046, "grad_norm": 0.283203125, "learning_rate": 0.00010508391475122972, "loss": 1.6971, "step": 6196 }, { "epoch": 1.925914348233823, "grad_norm": 0.3046875, "learning_rate": 0.00010505937580467146, "loss": 1.394, "step": 6197 }, { "epoch": 1.9262269459206003, "grad_norm": 0.302734375, "learning_rate": 0.00010503483655267224, "loss": 1.9124, "step": 6198 }, { "epoch": 1.9265395436073773, "grad_norm": 0.30078125, "learning_rate": 0.00010501029699671352, "loss": 1.6373, "step": 6199 }, { "epoch": 1.9268521412941544, "grad_norm": 0.29296875, "learning_rate": 0.00010498575713827677, "loss": 1.6814, "step": 6200 }, { "epoch": 1.9271647389809314, "grad_norm": 0.302734375, "learning_rate": 0.00010496121697884352, "loss": 1.6828, "step": 6201 }, { "epoch": 1.9274773366677087, "grad_norm": 0.296875, "learning_rate": 0.00010493667651989529, "loss": 1.5589, "step": 6202 }, { "epoch": 1.9277899343544858, "grad_norm": 0.28125, "learning_rate": 0.0001049121357629136, "loss": 1.6377, "step": 6203 }, { "epoch": 1.928102532041263, "grad_norm": 0.291015625, "learning_rate": 0.00010488759470937998, "loss": 1.3152, "step": 6204 }, { "epoch": 1.92841512972804, "grad_norm": 0.3046875, "learning_rate": 0.00010486305336077609, "loss": 1.4533, "step": 6205 }, { "epoch": 1.9287277274148171, "grad_norm": 0.298828125, "learning_rate": 0.00010483851171858346, "loss": 1.4259, "step": 6206 }, { "epoch": 1.9290403251015942, "grad_norm": 0.275390625, "learning_rate": 0.00010481396978428368, "loss": 1.5113, "step": 6207 }, { "epoch": 1.9293529227883712, "grad_norm": 0.279296875, "learning_rate": 0.00010478942755935846, "loss": 1.7184, "step": 6208 }, { "epoch": 1.9296655204751485, "grad_norm": 0.294921875, "learning_rate": 0.00010476488504528936, "loss": 1.3053, "step": 6209 }, { "epoch": 1.9299781181619255, "grad_norm": 0.296875, "learning_rate": 0.00010474034224355808, "loss": 1.5313, "step": 6210 }, { "epoch": 1.9302907158487028, "grad_norm": 0.294921875, "learning_rate": 0.00010471579915564631, "loss": 1.5783, "step": 6211 }, { "epoch": 1.9306033135354799, "grad_norm": 0.29296875, "learning_rate": 0.00010469125578303573, "loss": 1.4423, "step": 6212 }, { "epoch": 1.930915911222257, "grad_norm": 0.28125, "learning_rate": 0.00010466671212720805, "loss": 1.7387, "step": 6213 }, { "epoch": 1.931228508909034, "grad_norm": 0.296875, "learning_rate": 0.00010464216818964502, "loss": 1.6482, "step": 6214 }, { "epoch": 1.9315411065958112, "grad_norm": 0.28125, "learning_rate": 0.00010461762397182837, "loss": 1.6284, "step": 6215 }, { "epoch": 1.9318537042825883, "grad_norm": 0.298828125, "learning_rate": 0.00010459307947523991, "loss": 1.5385, "step": 6216 }, { "epoch": 1.9321663019693656, "grad_norm": 0.30078125, "learning_rate": 0.00010456853470136136, "loss": 1.7142, "step": 6217 }, { "epoch": 1.9324788996561426, "grad_norm": 0.291015625, "learning_rate": 0.00010454398965167458, "loss": 1.5259, "step": 6218 }, { "epoch": 1.9327914973429197, "grad_norm": 0.283203125, "learning_rate": 0.00010451944432766131, "loss": 1.5003, "step": 6219 }, { "epoch": 1.9331040950296967, "grad_norm": 0.291015625, "learning_rate": 0.00010449489873080345, "loss": 1.3931, "step": 6220 }, { "epoch": 1.9334166927164738, "grad_norm": 0.287109375, "learning_rate": 0.00010447035286258282, "loss": 1.4621, "step": 6221 }, { "epoch": 1.933729290403251, "grad_norm": 0.283203125, "learning_rate": 0.0001044458067244813, "loss": 1.3083, "step": 6222 }, { "epoch": 1.934041888090028, "grad_norm": 0.294921875, "learning_rate": 0.00010442126031798076, "loss": 1.7117, "step": 6223 }, { "epoch": 1.9343544857768054, "grad_norm": 0.373046875, "learning_rate": 0.00010439671364456312, "loss": 2.2069, "step": 6224 }, { "epoch": 1.9346670834635824, "grad_norm": 0.2890625, "learning_rate": 0.00010437216670571021, "loss": 1.3417, "step": 6225 }, { "epoch": 1.9349796811503595, "grad_norm": 0.29296875, "learning_rate": 0.00010434761950290408, "loss": 1.4831, "step": 6226 }, { "epoch": 1.9352922788371365, "grad_norm": 0.322265625, "learning_rate": 0.0001043230720376266, "loss": 1.6731, "step": 6227 }, { "epoch": 1.9356048765239138, "grad_norm": 0.296875, "learning_rate": 0.00010429852431135976, "loss": 1.6335, "step": 6228 }, { "epoch": 1.9359174742106908, "grad_norm": 0.28125, "learning_rate": 0.00010427397632558556, "loss": 1.5968, "step": 6229 }, { "epoch": 1.936230071897468, "grad_norm": 0.291015625, "learning_rate": 0.00010424942808178593, "loss": 1.3624, "step": 6230 }, { "epoch": 1.9365426695842451, "grad_norm": 0.2734375, "learning_rate": 0.00010422487958144289, "loss": 1.5697, "step": 6231 }, { "epoch": 1.9368552672710222, "grad_norm": 0.291015625, "learning_rate": 0.0001042003308260385, "loss": 1.4961, "step": 6232 }, { "epoch": 1.9371678649577992, "grad_norm": 0.2890625, "learning_rate": 0.0001041757818170548, "loss": 1.7063, "step": 6233 }, { "epoch": 1.9374804626445763, "grad_norm": 0.30078125, "learning_rate": 0.00010415123255597383, "loss": 1.6431, "step": 6234 }, { "epoch": 1.9377930603313536, "grad_norm": 0.294921875, "learning_rate": 0.00010412668304427766, "loss": 1.6485, "step": 6235 }, { "epoch": 1.9381056580181306, "grad_norm": 0.30078125, "learning_rate": 0.00010410213328344838, "loss": 1.4691, "step": 6236 }, { "epoch": 1.9384182557049079, "grad_norm": 0.3046875, "learning_rate": 0.00010407758327496807, "loss": 1.5958, "step": 6237 }, { "epoch": 1.938730853391685, "grad_norm": 0.28515625, "learning_rate": 0.00010405303302031888, "loss": 1.3413, "step": 6238 }, { "epoch": 1.939043451078462, "grad_norm": 0.27734375, "learning_rate": 0.0001040284825209829, "loss": 1.2792, "step": 6239 }, { "epoch": 1.939356048765239, "grad_norm": 0.294921875, "learning_rate": 0.0001040039317784423, "loss": 1.3078, "step": 6240 }, { "epoch": 1.9396686464520163, "grad_norm": 0.2890625, "learning_rate": 0.00010397938079417926, "loss": 1.7102, "step": 6241 }, { "epoch": 1.9399812441387934, "grad_norm": 0.2890625, "learning_rate": 0.00010395482956967592, "loss": 1.5055, "step": 6242 }, { "epoch": 1.9402938418255706, "grad_norm": 0.291015625, "learning_rate": 0.00010393027810641445, "loss": 1.7202, "step": 6243 }, { "epoch": 1.9406064395123477, "grad_norm": 0.296875, "learning_rate": 0.00010390572640587713, "loss": 1.5117, "step": 6244 }, { "epoch": 1.9409190371991247, "grad_norm": 0.291015625, "learning_rate": 0.00010388117446954609, "loss": 1.581, "step": 6245 }, { "epoch": 1.9412316348859018, "grad_norm": 0.28515625, "learning_rate": 0.0001038566222989036, "loss": 1.7444, "step": 6246 }, { "epoch": 1.9415442325726788, "grad_norm": 0.28515625, "learning_rate": 0.00010383206989543195, "loss": 1.4812, "step": 6247 }, { "epoch": 1.941856830259456, "grad_norm": 0.296875, "learning_rate": 0.00010380751726061333, "loss": 1.6901, "step": 6248 }, { "epoch": 1.9421694279462332, "grad_norm": 0.283203125, "learning_rate": 0.00010378296439593002, "loss": 1.5478, "step": 6249 }, { "epoch": 1.9424820256330104, "grad_norm": 0.29296875, "learning_rate": 0.00010375841130286437, "loss": 1.305, "step": 6250 }, { "epoch": 1.9427946233197875, "grad_norm": 0.28515625, "learning_rate": 0.00010373385798289861, "loss": 1.6149, "step": 6251 }, { "epoch": 1.9431072210065645, "grad_norm": 0.298828125, "learning_rate": 0.0001037093044375151, "loss": 1.5205, "step": 6252 }, { "epoch": 1.9434198186933416, "grad_norm": 0.28515625, "learning_rate": 0.00010368475066819613, "loss": 1.4071, "step": 6253 }, { "epoch": 1.9437324163801186, "grad_norm": 0.306640625, "learning_rate": 0.00010366019667642412, "loss": 1.4452, "step": 6254 }, { "epoch": 1.944045014066896, "grad_norm": 0.287109375, "learning_rate": 0.00010363564246368134, "loss": 1.4957, "step": 6255 }, { "epoch": 1.9443576117536732, "grad_norm": 0.2890625, "learning_rate": 0.00010361108803145019, "loss": 1.6569, "step": 6256 }, { "epoch": 1.9446702094404502, "grad_norm": 0.28125, "learning_rate": 0.00010358653338121305, "loss": 1.4824, "step": 6257 }, { "epoch": 1.9449828071272273, "grad_norm": 0.29296875, "learning_rate": 0.00010356197851445233, "loss": 1.4794, "step": 6258 }, { "epoch": 1.9452954048140043, "grad_norm": 0.29296875, "learning_rate": 0.00010353742343265043, "loss": 1.7687, "step": 6259 }, { "epoch": 1.9456080025007814, "grad_norm": 0.314453125, "learning_rate": 0.00010351286813728978, "loss": 1.3405, "step": 6260 }, { "epoch": 1.9459206001875586, "grad_norm": 0.29296875, "learning_rate": 0.00010348831262985277, "loss": 1.4164, "step": 6261 }, { "epoch": 1.9462331978743357, "grad_norm": 0.302734375, "learning_rate": 0.00010346375691182191, "loss": 1.5927, "step": 6262 }, { "epoch": 1.946545795561113, "grad_norm": 0.28515625, "learning_rate": 0.00010343920098467958, "loss": 1.7809, "step": 6263 }, { "epoch": 1.94685839324789, "grad_norm": 0.33203125, "learning_rate": 0.00010341464484990837, "loss": 1.4906, "step": 6264 }, { "epoch": 1.947170990934667, "grad_norm": 0.306640625, "learning_rate": 0.00010339008850899068, "loss": 1.5188, "step": 6265 }, { "epoch": 1.947483588621444, "grad_norm": 0.28515625, "learning_rate": 0.00010336553196340902, "loss": 1.3992, "step": 6266 }, { "epoch": 1.9477961863082212, "grad_norm": 0.291015625, "learning_rate": 0.00010334097521464589, "loss": 1.4773, "step": 6267 }, { "epoch": 1.9481087839949984, "grad_norm": 0.271484375, "learning_rate": 0.00010331641826418385, "loss": 1.3414, "step": 6268 }, { "epoch": 1.9484213816817757, "grad_norm": 0.29296875, "learning_rate": 0.0001032918611135054, "loss": 1.617, "step": 6269 }, { "epoch": 1.9487339793685527, "grad_norm": 0.294921875, "learning_rate": 0.0001032673037640931, "loss": 1.2843, "step": 6270 }, { "epoch": 1.9490465770553298, "grad_norm": 0.2890625, "learning_rate": 0.00010324274621742953, "loss": 1.5678, "step": 6271 }, { "epoch": 1.9493591747421068, "grad_norm": 0.2734375, "learning_rate": 0.00010321818847499725, "loss": 1.3233, "step": 6272 }, { "epoch": 1.949671772428884, "grad_norm": 0.30078125, "learning_rate": 0.00010319363053827878, "loss": 1.3855, "step": 6273 }, { "epoch": 1.9499843701156612, "grad_norm": 0.29296875, "learning_rate": 0.0001031690724087568, "loss": 1.5765, "step": 6274 }, { "epoch": 1.9502969678024382, "grad_norm": 0.30078125, "learning_rate": 0.00010314451408791385, "loss": 1.3791, "step": 6275 }, { "epoch": 1.9506095654892155, "grad_norm": 0.267578125, "learning_rate": 0.00010311995557723262, "loss": 1.3775, "step": 6276 }, { "epoch": 1.9509221631759925, "grad_norm": 0.296875, "learning_rate": 0.00010309539687819567, "loss": 1.611, "step": 6277 }, { "epoch": 1.9512347608627696, "grad_norm": 0.29296875, "learning_rate": 0.00010307083799228567, "loss": 1.4502, "step": 6278 }, { "epoch": 1.9515473585495466, "grad_norm": 0.298828125, "learning_rate": 0.00010304627892098526, "loss": 1.521, "step": 6279 }, { "epoch": 1.9518599562363237, "grad_norm": 0.302734375, "learning_rate": 0.00010302171966577711, "loss": 1.5794, "step": 6280 }, { "epoch": 1.952172553923101, "grad_norm": 0.296875, "learning_rate": 0.0001029971602281439, "loss": 1.5355, "step": 6281 }, { "epoch": 1.9524851516098782, "grad_norm": 0.29296875, "learning_rate": 0.00010297260060956831, "loss": 1.3984, "step": 6282 }, { "epoch": 1.9527977492966553, "grad_norm": 0.294921875, "learning_rate": 0.00010294804081153304, "loss": 1.3863, "step": 6283 }, { "epoch": 1.9531103469834323, "grad_norm": 0.28515625, "learning_rate": 0.00010292348083552079, "loss": 1.571, "step": 6284 }, { "epoch": 1.9534229446702094, "grad_norm": 0.298828125, "learning_rate": 0.00010289892068301426, "loss": 1.3656, "step": 6285 }, { "epoch": 1.9537355423569864, "grad_norm": 0.291015625, "learning_rate": 0.00010287436035549621, "loss": 1.8751, "step": 6286 }, { "epoch": 1.9540481400437637, "grad_norm": 0.30078125, "learning_rate": 0.00010284979985444933, "loss": 1.3707, "step": 6287 }, { "epoch": 1.9543607377305408, "grad_norm": 0.271484375, "learning_rate": 0.00010282523918135642, "loss": 1.4945, "step": 6288 }, { "epoch": 1.954673335417318, "grad_norm": 0.29296875, "learning_rate": 0.00010280067833770024, "loss": 1.4987, "step": 6289 }, { "epoch": 1.954985933104095, "grad_norm": 0.287109375, "learning_rate": 0.00010277611732496353, "loss": 1.461, "step": 6290 }, { "epoch": 1.9552985307908721, "grad_norm": 0.302734375, "learning_rate": 0.00010275155614462905, "loss": 1.6741, "step": 6291 }, { "epoch": 1.9556111284776492, "grad_norm": 0.28515625, "learning_rate": 0.00010272699479817967, "loss": 1.4173, "step": 6292 }, { "epoch": 1.9559237261644262, "grad_norm": 0.298828125, "learning_rate": 0.0001027024332870981, "loss": 1.8938, "step": 6293 }, { "epoch": 1.9562363238512035, "grad_norm": 0.279296875, "learning_rate": 0.00010267787161286719, "loss": 1.5966, "step": 6294 }, { "epoch": 1.9565489215379808, "grad_norm": 0.291015625, "learning_rate": 0.00010265330977696979, "loss": 1.4133, "step": 6295 }, { "epoch": 1.9568615192247578, "grad_norm": 0.279296875, "learning_rate": 0.00010262874778088869, "loss": 1.3799, "step": 6296 }, { "epoch": 1.9571741169115349, "grad_norm": 0.310546875, "learning_rate": 0.0001026041856261067, "loss": 1.4744, "step": 6297 }, { "epoch": 1.957486714598312, "grad_norm": 0.287109375, "learning_rate": 0.00010257962331410673, "loss": 1.3745, "step": 6298 }, { "epoch": 1.957799312285089, "grad_norm": 0.287109375, "learning_rate": 0.00010255506084637161, "loss": 1.5977, "step": 6299 }, { "epoch": 1.9581119099718662, "grad_norm": 0.296875, "learning_rate": 0.0001025304982243842, "loss": 1.4493, "step": 6300 }, { "epoch": 1.9584245076586433, "grad_norm": 0.298828125, "learning_rate": 0.00010250593544962744, "loss": 1.6393, "step": 6301 }, { "epoch": 1.9587371053454206, "grad_norm": 0.279296875, "learning_rate": 0.00010248137252358412, "loss": 1.7133, "step": 6302 }, { "epoch": 1.9590497030321976, "grad_norm": 0.2890625, "learning_rate": 0.00010245680944773717, "loss": 1.4606, "step": 6303 }, { "epoch": 1.9593623007189747, "grad_norm": 0.275390625, "learning_rate": 0.00010243224622356951, "loss": 1.3692, "step": 6304 }, { "epoch": 1.9596748984057517, "grad_norm": 0.302734375, "learning_rate": 0.00010240768285256404, "loss": 1.5115, "step": 6305 }, { "epoch": 1.9599874960925288, "grad_norm": 0.294921875, "learning_rate": 0.00010238311933620373, "loss": 1.3411, "step": 6306 }, { "epoch": 1.960300093779306, "grad_norm": 0.283203125, "learning_rate": 0.00010235855567597143, "loss": 1.7619, "step": 6307 }, { "epoch": 1.9606126914660833, "grad_norm": 0.27734375, "learning_rate": 0.00010233399187335013, "loss": 1.4151, "step": 6308 }, { "epoch": 1.9609252891528604, "grad_norm": 0.27734375, "learning_rate": 0.00010230942792982275, "loss": 1.4594, "step": 6309 }, { "epoch": 1.9612378868396374, "grad_norm": 0.30078125, "learning_rate": 0.00010228486384687227, "loss": 1.3581, "step": 6310 }, { "epoch": 1.9615504845264145, "grad_norm": 0.28125, "learning_rate": 0.00010226029962598165, "loss": 1.5394, "step": 6311 }, { "epoch": 1.9618630822131915, "grad_norm": 0.275390625, "learning_rate": 0.0001022357352686339, "loss": 1.7881, "step": 6312 }, { "epoch": 1.9621756798999688, "grad_norm": 0.294921875, "learning_rate": 0.0001022111707763119, "loss": 1.352, "step": 6313 }, { "epoch": 1.9624882775867458, "grad_norm": 0.287109375, "learning_rate": 0.00010218660615049876, "loss": 1.5106, "step": 6314 }, { "epoch": 1.962800875273523, "grad_norm": 0.2890625, "learning_rate": 0.00010216204139267737, "loss": 1.7533, "step": 6315 }, { "epoch": 1.9631134729603001, "grad_norm": 0.29296875, "learning_rate": 0.00010213747650433081, "loss": 1.5134, "step": 6316 }, { "epoch": 1.9634260706470772, "grad_norm": 0.287109375, "learning_rate": 0.00010211291148694204, "loss": 1.4083, "step": 6317 }, { "epoch": 1.9637386683338542, "grad_norm": 0.3046875, "learning_rate": 0.00010208834634199418, "loss": 1.4584, "step": 6318 }, { "epoch": 1.9640512660206313, "grad_norm": 0.30859375, "learning_rate": 0.00010206378107097012, "loss": 1.2652, "step": 6319 }, { "epoch": 1.9643638637074086, "grad_norm": 0.2890625, "learning_rate": 0.00010203921567535301, "loss": 1.562, "step": 6320 }, { "epoch": 1.9646764613941858, "grad_norm": 0.283203125, "learning_rate": 0.00010201465015662583, "loss": 1.5787, "step": 6321 }, { "epoch": 1.9649890590809629, "grad_norm": 0.298828125, "learning_rate": 0.00010199008451627166, "loss": 1.546, "step": 6322 }, { "epoch": 1.96530165676774, "grad_norm": 0.2890625, "learning_rate": 0.00010196551875577354, "loss": 1.2498, "step": 6323 }, { "epoch": 1.965614254454517, "grad_norm": 0.271484375, "learning_rate": 0.00010194095287661458, "loss": 1.639, "step": 6324 }, { "epoch": 1.965926852141294, "grad_norm": 0.310546875, "learning_rate": 0.00010191638688027778, "loss": 1.601, "step": 6325 }, { "epoch": 1.9662394498280713, "grad_norm": 0.376953125, "learning_rate": 0.0001018918207682463, "loss": 2.1508, "step": 6326 }, { "epoch": 1.9665520475148484, "grad_norm": 0.3046875, "learning_rate": 0.00010186725454200316, "loss": 1.4983, "step": 6327 }, { "epoch": 1.9668646452016256, "grad_norm": 0.29296875, "learning_rate": 0.00010184268820303149, "loss": 1.6153, "step": 6328 }, { "epoch": 1.9671772428884027, "grad_norm": 0.294921875, "learning_rate": 0.00010181812175281438, "loss": 1.541, "step": 6329 }, { "epoch": 1.9674898405751797, "grad_norm": 0.287109375, "learning_rate": 0.00010179355519283498, "loss": 1.4212, "step": 6330 }, { "epoch": 1.9678024382619568, "grad_norm": 0.298828125, "learning_rate": 0.00010176898852457633, "loss": 1.4843, "step": 6331 }, { "epoch": 1.9681150359487338, "grad_norm": 0.2890625, "learning_rate": 0.00010174442174952161, "loss": 1.5049, "step": 6332 }, { "epoch": 1.968427633635511, "grad_norm": 0.28515625, "learning_rate": 0.00010171985486915389, "loss": 1.4579, "step": 6333 }, { "epoch": 1.9687402313222884, "grad_norm": 0.287109375, "learning_rate": 0.00010169528788495637, "loss": 1.3979, "step": 6334 }, { "epoch": 1.9690528290090654, "grad_norm": 0.294921875, "learning_rate": 0.00010167072079841216, "loss": 1.4632, "step": 6335 }, { "epoch": 1.9693654266958425, "grad_norm": 0.28125, "learning_rate": 0.00010164615361100442, "loss": 1.5255, "step": 6336 }, { "epoch": 1.9696780243826195, "grad_norm": 0.28515625, "learning_rate": 0.00010162158632421625, "loss": 1.7949, "step": 6337 }, { "epoch": 1.9699906220693966, "grad_norm": 0.306640625, "learning_rate": 0.00010159701893953089, "loss": 1.482, "step": 6338 }, { "epoch": 1.9703032197561738, "grad_norm": 0.296875, "learning_rate": 0.00010157245145843141, "loss": 1.892, "step": 6339 }, { "epoch": 1.970615817442951, "grad_norm": 0.302734375, "learning_rate": 0.00010154788388240106, "loss": 1.4037, "step": 6340 }, { "epoch": 1.9709284151297282, "grad_norm": 0.27734375, "learning_rate": 0.00010152331621292299, "loss": 1.4799, "step": 6341 }, { "epoch": 1.9712410128165052, "grad_norm": 0.291015625, "learning_rate": 0.0001014987484514804, "loss": 1.6288, "step": 6342 }, { "epoch": 1.9715536105032823, "grad_norm": 0.30078125, "learning_rate": 0.00010147418059955643, "loss": 1.8096, "step": 6343 }, { "epoch": 1.9718662081900593, "grad_norm": 0.291015625, "learning_rate": 0.00010144961265863431, "loss": 1.6755, "step": 6344 }, { "epoch": 1.9721788058768364, "grad_norm": 0.296875, "learning_rate": 0.0001014250446301972, "loss": 1.594, "step": 6345 }, { "epoch": 1.9724914035636136, "grad_norm": 0.310546875, "learning_rate": 0.00010140047651572835, "loss": 1.4913, "step": 6346 }, { "epoch": 1.972804001250391, "grad_norm": 0.298828125, "learning_rate": 0.00010137590831671093, "loss": 1.6336, "step": 6347 }, { "epoch": 1.973116598937168, "grad_norm": 0.34765625, "learning_rate": 0.00010135134003462823, "loss": 1.7255, "step": 6348 }, { "epoch": 1.973429196623945, "grad_norm": 0.28515625, "learning_rate": 0.00010132677167096333, "loss": 1.7847, "step": 6349 }, { "epoch": 1.973741794310722, "grad_norm": 0.40625, "learning_rate": 0.00010130220322719958, "loss": 2.4353, "step": 6350 }, { "epoch": 1.974054391997499, "grad_norm": 0.296875, "learning_rate": 0.00010127763470482014, "loss": 1.3342, "step": 6351 }, { "epoch": 1.9743669896842764, "grad_norm": 0.28515625, "learning_rate": 0.0001012530661053083, "loss": 1.5728, "step": 6352 }, { "epoch": 1.9746795873710534, "grad_norm": 0.296875, "learning_rate": 0.00010122849743014722, "loss": 1.7076, "step": 6353 }, { "epoch": 1.9749921850578307, "grad_norm": 0.2890625, "learning_rate": 0.00010120392868082022, "loss": 1.4061, "step": 6354 }, { "epoch": 1.9753047827446077, "grad_norm": 0.291015625, "learning_rate": 0.00010117935985881049, "loss": 1.5505, "step": 6355 }, { "epoch": 1.9756173804313848, "grad_norm": 0.287109375, "learning_rate": 0.00010115479096560133, "loss": 1.484, "step": 6356 }, { "epoch": 1.9759299781181618, "grad_norm": 0.283203125, "learning_rate": 0.00010113022200267593, "loss": 1.3421, "step": 6357 }, { "epoch": 1.976242575804939, "grad_norm": 0.275390625, "learning_rate": 0.0001011056529715176, "loss": 1.566, "step": 6358 }, { "epoch": 1.9765551734917162, "grad_norm": 0.29296875, "learning_rate": 0.00010108108387360961, "loss": 1.6118, "step": 6359 }, { "epoch": 1.9768677711784934, "grad_norm": 0.3046875, "learning_rate": 0.0001010565147104352, "loss": 1.6341, "step": 6360 }, { "epoch": 1.9771803688652705, "grad_norm": 0.296875, "learning_rate": 0.00010103194548347763, "loss": 1.5569, "step": 6361 }, { "epoch": 1.9774929665520475, "grad_norm": 0.28515625, "learning_rate": 0.00010100737619422023, "loss": 1.7467, "step": 6362 }, { "epoch": 1.9778055642388246, "grad_norm": 0.283203125, "learning_rate": 0.0001009828068441462, "loss": 1.3301, "step": 6363 }, { "epoch": 1.9781181619256016, "grad_norm": 0.291015625, "learning_rate": 0.00010095823743473891, "loss": 1.5612, "step": 6364 }, { "epoch": 1.978430759612379, "grad_norm": 0.279296875, "learning_rate": 0.00010093366796748158, "loss": 1.7422, "step": 6365 }, { "epoch": 1.978743357299156, "grad_norm": 0.27734375, "learning_rate": 0.00010090909844385754, "loss": 1.6783, "step": 6366 }, { "epoch": 1.9790559549859332, "grad_norm": 0.291015625, "learning_rate": 0.00010088452886535005, "loss": 1.5214, "step": 6367 }, { "epoch": 1.9793685526727103, "grad_norm": 0.302734375, "learning_rate": 0.00010085995923344245, "loss": 1.7021, "step": 6368 }, { "epoch": 1.9796811503594873, "grad_norm": 0.294921875, "learning_rate": 0.00010083538954961799, "loss": 1.5531, "step": 6369 }, { "epoch": 1.9799937480462644, "grad_norm": 0.287109375, "learning_rate": 0.00010081081981536003, "loss": 1.1152, "step": 6370 }, { "epoch": 1.9803063457330414, "grad_norm": 0.298828125, "learning_rate": 0.00010078625003215182, "loss": 1.5247, "step": 6371 }, { "epoch": 1.9806189434198187, "grad_norm": 0.314453125, "learning_rate": 0.00010076168020147672, "loss": 2.2323, "step": 6372 }, { "epoch": 1.980931541106596, "grad_norm": 0.271484375, "learning_rate": 0.00010073711032481799, "loss": 1.494, "step": 6373 }, { "epoch": 1.981244138793373, "grad_norm": 0.283203125, "learning_rate": 0.00010071254040365896, "loss": 1.4078, "step": 6374 }, { "epoch": 1.98155673648015, "grad_norm": 0.279296875, "learning_rate": 0.00010068797043948297, "loss": 1.5707, "step": 6375 }, { "epoch": 1.9818693341669271, "grad_norm": 0.28125, "learning_rate": 0.00010066340043377333, "loss": 1.4195, "step": 6376 }, { "epoch": 1.9821819318537042, "grad_norm": 0.302734375, "learning_rate": 0.00010063883038801338, "loss": 1.3336, "step": 6377 }, { "epoch": 1.9824945295404814, "grad_norm": 0.294921875, "learning_rate": 0.00010061426030368641, "loss": 1.5426, "step": 6378 }, { "epoch": 1.9828071272272585, "grad_norm": 0.28515625, "learning_rate": 0.00010058969018227575, "loss": 1.4451, "step": 6379 }, { "epoch": 1.9831197249140358, "grad_norm": 0.28515625, "learning_rate": 0.00010056512002526475, "loss": 1.5482, "step": 6380 }, { "epoch": 1.9834323226008128, "grad_norm": 0.29296875, "learning_rate": 0.00010054054983413673, "loss": 1.3617, "step": 6381 }, { "epoch": 1.9837449202875899, "grad_norm": 0.283203125, "learning_rate": 0.00010051597961037505, "loss": 1.5777, "step": 6382 }, { "epoch": 1.984057517974367, "grad_norm": 0.28515625, "learning_rate": 0.00010049140935546299, "loss": 1.6081, "step": 6383 }, { "epoch": 1.984370115661144, "grad_norm": 0.310546875, "learning_rate": 0.00010046683907088395, "loss": 1.4387, "step": 6384 }, { "epoch": 1.9846827133479212, "grad_norm": 0.28125, "learning_rate": 0.00010044226875812121, "loss": 1.6327, "step": 6385 }, { "epoch": 1.9849953110346985, "grad_norm": 0.287109375, "learning_rate": 0.00010041769841865818, "loss": 1.6473, "step": 6386 }, { "epoch": 1.9853079087214756, "grad_norm": 0.3046875, "learning_rate": 0.00010039312805397813, "loss": 1.3955, "step": 6387 }, { "epoch": 1.9856205064082526, "grad_norm": 0.296875, "learning_rate": 0.00010036855766556446, "loss": 1.2537, "step": 6388 }, { "epoch": 1.9859331040950297, "grad_norm": 0.291015625, "learning_rate": 0.00010034398725490051, "loss": 1.8257, "step": 6389 }, { "epoch": 1.9862457017818067, "grad_norm": 0.310546875, "learning_rate": 0.00010031941682346957, "loss": 1.5564, "step": 6390 }, { "epoch": 1.986558299468584, "grad_norm": 0.291015625, "learning_rate": 0.00010029484637275504, "loss": 1.4865, "step": 6391 }, { "epoch": 1.986870897155361, "grad_norm": 0.296875, "learning_rate": 0.00010027027590424028, "loss": 1.834, "step": 6392 }, { "epoch": 1.9871834948421383, "grad_norm": 0.30078125, "learning_rate": 0.00010024570541940858, "loss": 1.6117, "step": 6393 }, { "epoch": 1.9874960925289153, "grad_norm": 0.28125, "learning_rate": 0.00010022113491974336, "loss": 1.4604, "step": 6394 }, { "epoch": 1.9878086902156924, "grad_norm": 0.2890625, "learning_rate": 0.00010019656440672795, "loss": 1.7833, "step": 6395 }, { "epoch": 1.9881212879024694, "grad_norm": 0.2890625, "learning_rate": 0.00010017199388184568, "loss": 1.4786, "step": 6396 }, { "epoch": 1.9884338855892465, "grad_norm": 0.29296875, "learning_rate": 0.00010014742334657993, "loss": 1.3509, "step": 6397 }, { "epoch": 1.9887464832760238, "grad_norm": 0.302734375, "learning_rate": 0.00010012285280241404, "loss": 1.3208, "step": 6398 } ], "logging_steps": 1, "max_steps": 12796, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 3199, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3076986683457536e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }