{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999288104221542, "eval_steps": 352, "global_step": 7023, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 3.5899, "step": 1 }, { "epoch": 0.0, "eval_loss": 4.178114414215088, "eval_runtime": 473.0271, "eval_samples_per_second": 43.31, "eval_steps_per_second": 14.437, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 4.1967, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 4.2584, "step": 3 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 4.2861, "step": 4 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 3.449, "step": 5 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.6379, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 3.5514, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.4000000000000001e-05, "loss": 3.3639, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 3.2568, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.8e-05, "loss": 3.3578, "step": 10 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 3.3858, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.2000000000000003e-05, "loss": 3.4463, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.4e-05, "loss": 3.3023, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.6000000000000002e-05, "loss": 3.3967, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-05, "loss": 3.3402, "step": 15 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 3.2368, "step": 16 }, { "epoch": 0.0, "learning_rate": 3.2000000000000005e-05, "loss": 3.3075, "step": 17 }, { "epoch": 0.0, "learning_rate": 3.4000000000000007e-05, "loss": 3.3434, "step": 18 }, { "epoch": 0.0, "learning_rate": 3.6e-05, "loss": 3.3037, "step": 19 }, { "epoch": 0.0, "learning_rate": 3.8e-05, "loss": 3.2338, "step": 20 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.1854, "step": 21 }, { "epoch": 0.0, "learning_rate": 4.2e-05, "loss": 3.2481, "step": 22 }, { "epoch": 0.0, "learning_rate": 4.4000000000000006e-05, "loss": 3.2006, "step": 23 }, { "epoch": 0.0, "learning_rate": 4.600000000000001e-05, "loss": 3.2349, "step": 24 }, { "epoch": 0.0, "learning_rate": 4.8e-05, "loss": 3.2476, "step": 25 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 3.169, "step": 26 }, { "epoch": 0.0, "learning_rate": 5.2000000000000004e-05, "loss": 3.2349, "step": 27 }, { "epoch": 0.0, "learning_rate": 5.4000000000000005e-05, "loss": 3.2007, "step": 28 }, { "epoch": 0.0, "learning_rate": 5.6000000000000006e-05, "loss": 3.2467, "step": 29 }, { "epoch": 0.0, "learning_rate": 5.8e-05, "loss": 3.2974, "step": 30 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 3.0882, "step": 31 }, { "epoch": 0.0, "learning_rate": 6.2e-05, "loss": 3.1722, "step": 32 }, { "epoch": 0.0, "learning_rate": 6.400000000000001e-05, "loss": 3.1964, "step": 33 }, { "epoch": 0.0, "learning_rate": 6.6e-05, "loss": 3.2419, "step": 34 }, { "epoch": 0.0, "learning_rate": 6.800000000000001e-05, "loss": 3.1016, "step": 35 }, { "epoch": 0.01, "learning_rate": 7e-05, "loss": 3.1917, "step": 36 }, { "epoch": 0.01, "learning_rate": 7.2e-05, "loss": 3.2126, "step": 37 }, { "epoch": 0.01, "learning_rate": 7.4e-05, "loss": 3.2239, "step": 38 }, { "epoch": 0.01, "learning_rate": 7.6e-05, "loss": 3.1186, "step": 39 }, { "epoch": 0.01, "learning_rate": 7.800000000000001e-05, "loss": 3.2016, "step": 40 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 3.2209, "step": 41 }, { "epoch": 0.01, "learning_rate": 8.2e-05, "loss": 3.2119, "step": 42 }, { "epoch": 0.01, "learning_rate": 8.4e-05, "loss": 3.3246, "step": 43 }, { "epoch": 0.01, "learning_rate": 8.6e-05, "loss": 3.1526, "step": 44 }, { "epoch": 0.01, "learning_rate": 8.800000000000001e-05, "loss": 3.22, "step": 45 }, { "epoch": 0.01, "learning_rate": 9e-05, "loss": 3.1782, "step": 46 }, { "epoch": 0.01, "learning_rate": 9.200000000000001e-05, "loss": 3.1501, "step": 47 }, { "epoch": 0.01, "learning_rate": 9.4e-05, "loss": 3.2063, "step": 48 }, { "epoch": 0.01, "learning_rate": 9.6e-05, "loss": 3.2535, "step": 49 }, { "epoch": 0.01, "learning_rate": 9.8e-05, "loss": 3.1148, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 3.232, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.00010200000000000001, "loss": 3.1592, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.00010400000000000001, "loss": 3.3169, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.00010600000000000002, "loss": 3.2689, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.00010800000000000001, "loss": 3.2544, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.00011000000000000002, "loss": 3.2462, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.00011200000000000001, "loss": 3.2163, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.00011399999999999999, "loss": 3.1604, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.000116, "loss": 3.2952, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.000118, "loss": 3.2749, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 3.2169, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.000122, "loss": 3.2749, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.000124, "loss": 3.26, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.000126, "loss": 3.2608, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.00012800000000000002, "loss": 3.1959, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.00013000000000000002, "loss": 3.1977, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.000132, "loss": 3.2184, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.000134, "loss": 3.1538, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.00013600000000000003, "loss": 3.3864, "step": 69 }, { "epoch": 0.01, "learning_rate": 0.000138, "loss": 3.2612, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 3.3267, "step": 71 }, { "epoch": 0.01, "learning_rate": 0.000142, "loss": 3.3697, "step": 72 }, { "epoch": 0.01, "learning_rate": 0.000144, "loss": 3.2061, "step": 73 }, { "epoch": 0.01, "learning_rate": 0.000146, "loss": 3.1879, "step": 74 }, { "epoch": 0.01, "learning_rate": 0.000148, "loss": 3.2455, "step": 75 }, { "epoch": 0.01, "learning_rate": 0.00015000000000000001, "loss": 3.2665, "step": 76 }, { "epoch": 0.01, "learning_rate": 0.000152, "loss": 3.3222, "step": 77 }, { "epoch": 0.01, "learning_rate": 0.000154, "loss": 3.2912, "step": 78 }, { "epoch": 0.01, "learning_rate": 0.00015600000000000002, "loss": 3.3734, "step": 79 }, { "epoch": 0.01, "learning_rate": 0.00015800000000000002, "loss": 3.2328, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 3.3124, "step": 81 }, { "epoch": 0.01, "learning_rate": 0.000162, "loss": 3.2042, "step": 82 }, { "epoch": 0.01, "learning_rate": 0.000164, "loss": 3.3126, "step": 83 }, { "epoch": 0.01, "learning_rate": 0.000166, "loss": 3.3218, "step": 84 }, { "epoch": 0.01, "learning_rate": 0.000168, "loss": 3.3561, "step": 85 }, { "epoch": 0.01, "learning_rate": 0.00017, "loss": 3.4065, "step": 86 }, { "epoch": 0.01, "learning_rate": 0.000172, "loss": 3.4102, "step": 87 }, { "epoch": 0.01, "learning_rate": 0.000174, "loss": 3.4352, "step": 88 }, { "epoch": 0.01, "learning_rate": 0.00017600000000000002, "loss": 3.2285, "step": 89 }, { "epoch": 0.01, "learning_rate": 0.00017800000000000002, "loss": 3.3196, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 3.4187, "step": 91 }, { "epoch": 0.01, "learning_rate": 0.000182, "loss": 3.505, "step": 92 }, { "epoch": 0.01, "learning_rate": 0.00018400000000000003, "loss": 3.4067, "step": 93 }, { "epoch": 0.01, "learning_rate": 0.00018600000000000002, "loss": 3.4128, "step": 94 }, { "epoch": 0.01, "learning_rate": 0.000188, "loss": 3.4732, "step": 95 }, { "epoch": 0.01, "learning_rate": 0.00019, "loss": 3.3481, "step": 96 }, { "epoch": 0.01, "learning_rate": 0.000192, "loss": 3.4073, "step": 97 }, { "epoch": 0.01, "learning_rate": 0.000194, "loss": 3.5063, "step": 98 }, { "epoch": 0.01, "learning_rate": 0.000196, "loss": 3.3983, "step": 99 }, { "epoch": 0.01, "learning_rate": 0.00019800000000000002, "loss": 3.4428, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 3.3764, "step": 101 }, { "epoch": 0.01, "learning_rate": 0.0001999711107901199, "loss": 3.3813, "step": 102 }, { "epoch": 0.01, "learning_rate": 0.0001999422215802398, "loss": 3.4354, "step": 103 }, { "epoch": 0.01, "learning_rate": 0.00019991333237035966, "loss": 3.2258, "step": 104 }, { "epoch": 0.01, "learning_rate": 0.00019988444316047956, "loss": 3.3943, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.00019985555395059945, "loss": 3.3837, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.00019982666474071937, "loss": 3.3826, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.00019979777553083924, "loss": 3.4589, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.00019976888632095913, "loss": 3.3665, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.00019973999711107903, "loss": 3.4585, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.00019971110790119892, "loss": 3.5142, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0001996822186913188, "loss": 3.4871, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.00019965332948143868, "loss": 3.4486, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.00019962444027155858, "loss": 3.5011, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.00019959555106167847, "loss": 3.3932, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.00019956666185179836, "loss": 3.5907, "step": 116 }, { "epoch": 0.02, "learning_rate": 0.00019953777264191826, "loss": 3.4969, "step": 117 }, { "epoch": 0.02, "learning_rate": 0.00019950888343203815, "loss": 3.5815, "step": 118 }, { "epoch": 0.02, "learning_rate": 0.00019947999422215804, "loss": 3.4717, "step": 119 }, { "epoch": 0.02, "learning_rate": 0.00019945110501227794, "loss": 3.4238, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0001994222158023978, "loss": 3.5099, "step": 121 }, { "epoch": 0.02, "learning_rate": 0.0001993933265925177, "loss": 3.5688, "step": 122 }, { "epoch": 0.02, "learning_rate": 0.0001993644373826376, "loss": 3.4204, "step": 123 }, { "epoch": 0.02, "learning_rate": 0.00019933554817275749, "loss": 3.5261, "step": 124 }, { "epoch": 0.02, "learning_rate": 0.00019930665896287738, "loss": 3.6011, "step": 125 }, { "epoch": 0.02, "learning_rate": 0.00019927776975299725, "loss": 3.5179, "step": 126 }, { "epoch": 0.02, "learning_rate": 0.00019924888054311714, "loss": 3.5101, "step": 127 }, { "epoch": 0.02, "learning_rate": 0.00019921999133323706, "loss": 3.5243, "step": 128 }, { "epoch": 0.02, "learning_rate": 0.00019919110212335696, "loss": 3.6438, "step": 129 }, { "epoch": 0.02, "learning_rate": 0.00019916221291347682, "loss": 3.4501, "step": 130 }, { "epoch": 0.02, "learning_rate": 0.00019913332370359672, "loss": 3.4317, "step": 131 }, { "epoch": 0.02, "learning_rate": 0.0001991044344937166, "loss": 3.5163, "step": 132 }, { "epoch": 0.02, "learning_rate": 0.0001990755452838365, "loss": 3.4548, "step": 133 }, { "epoch": 0.02, "learning_rate": 0.00019904665607395637, "loss": 3.5115, "step": 134 }, { "epoch": 0.02, "learning_rate": 0.00019901776686407626, "loss": 3.5406, "step": 135 }, { "epoch": 0.02, "learning_rate": 0.00019898887765419616, "loss": 3.4877, "step": 136 }, { "epoch": 0.02, "learning_rate": 0.00019895998844431605, "loss": 3.567, "step": 137 }, { "epoch": 0.02, "learning_rate": 0.00019893109923443595, "loss": 3.5098, "step": 138 }, { "epoch": 0.02, "learning_rate": 0.00019890221002455584, "loss": 3.4999, "step": 139 }, { "epoch": 0.02, "learning_rate": 0.00019887332081467573, "loss": 3.515, "step": 140 }, { "epoch": 0.02, "learning_rate": 0.00019884443160479563, "loss": 3.6411, "step": 141 }, { "epoch": 0.02, "learning_rate": 0.00019881554239491552, "loss": 3.6045, "step": 142 }, { "epoch": 0.02, "learning_rate": 0.0001987866531850354, "loss": 3.5639, "step": 143 }, { "epoch": 0.02, "learning_rate": 0.00019875776397515528, "loss": 3.3698, "step": 144 }, { "epoch": 0.02, "learning_rate": 0.00019872887476527518, "loss": 3.5527, "step": 145 }, { "epoch": 0.02, "learning_rate": 0.00019869998555539507, "loss": 3.4809, "step": 146 }, { "epoch": 0.02, "learning_rate": 0.00019867109634551496, "loss": 3.5931, "step": 147 }, { "epoch": 0.02, "learning_rate": 0.00019864220713563483, "loss": 3.4159, "step": 148 }, { "epoch": 0.02, "learning_rate": 0.00019861331792575475, "loss": 3.5496, "step": 149 }, { "epoch": 0.02, "learning_rate": 0.00019858442871587464, "loss": 3.5335, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.00019855553950599454, "loss": 3.4063, "step": 151 }, { "epoch": 0.02, "learning_rate": 0.0001985266502961144, "loss": 3.4679, "step": 152 }, { "epoch": 0.02, "learning_rate": 0.0001984977610862343, "loss": 3.4876, "step": 153 }, { "epoch": 0.02, "learning_rate": 0.0001984688718763542, "loss": 3.5482, "step": 154 }, { "epoch": 0.02, "learning_rate": 0.00019843998266647409, "loss": 3.6088, "step": 155 }, { "epoch": 0.02, "learning_rate": 0.00019841109345659395, "loss": 3.5708, "step": 156 }, { "epoch": 0.02, "learning_rate": 0.00019838220424671385, "loss": 3.4716, "step": 157 }, { "epoch": 0.02, "learning_rate": 0.00019835331503683374, "loss": 3.6032, "step": 158 }, { "epoch": 0.02, "learning_rate": 0.00019832442582695366, "loss": 3.4731, "step": 159 }, { "epoch": 0.02, "learning_rate": 0.00019829553661707353, "loss": 3.4766, "step": 160 }, { "epoch": 0.02, "learning_rate": 0.00019826664740719342, "loss": 3.4977, "step": 161 }, { "epoch": 0.02, "learning_rate": 0.00019823775819731332, "loss": 3.4669, "step": 162 }, { "epoch": 0.02, "learning_rate": 0.0001982088689874332, "loss": 3.6169, "step": 163 }, { "epoch": 0.02, "learning_rate": 0.0001981799797775531, "loss": 3.4544, "step": 164 }, { "epoch": 0.02, "learning_rate": 0.00019815109056767297, "loss": 3.5422, "step": 165 }, { "epoch": 0.02, "learning_rate": 0.00019812220135779286, "loss": 3.6311, "step": 166 }, { "epoch": 0.02, "learning_rate": 0.00019809331214791276, "loss": 3.513, "step": 167 }, { "epoch": 0.02, "learning_rate": 0.00019806442293803265, "loss": 3.5973, "step": 168 }, { "epoch": 0.02, "learning_rate": 0.00019803553372815255, "loss": 3.4974, "step": 169 }, { "epoch": 0.02, "learning_rate": 0.00019800664451827244, "loss": 3.6382, "step": 170 }, { "epoch": 0.02, "learning_rate": 0.00019797775530839233, "loss": 3.5406, "step": 171 }, { "epoch": 0.02, "learning_rate": 0.00019794886609851223, "loss": 3.596, "step": 172 }, { "epoch": 0.02, "learning_rate": 0.00019791997688863212, "loss": 3.5675, "step": 173 }, { "epoch": 0.02, "learning_rate": 0.000197891087678752, "loss": 3.4857, "step": 174 }, { "epoch": 0.02, "learning_rate": 0.00019786219846887188, "loss": 3.5948, "step": 175 }, { "epoch": 0.03, "learning_rate": 0.00019783330925899178, "loss": 3.5377, "step": 176 }, { "epoch": 0.03, "learning_rate": 0.00019780442004911167, "loss": 3.4115, "step": 177 }, { "epoch": 0.03, "learning_rate": 0.00019777553083923154, "loss": 3.5925, "step": 178 }, { "epoch": 0.03, "learning_rate": 0.00019774664162935143, "loss": 3.6072, "step": 179 }, { "epoch": 0.03, "learning_rate": 0.00019771775241947135, "loss": 3.4459, "step": 180 }, { "epoch": 0.03, "learning_rate": 0.00019768886320959124, "loss": 3.675, "step": 181 }, { "epoch": 0.03, "learning_rate": 0.0001976599739997111, "loss": 3.5013, "step": 182 }, { "epoch": 0.03, "learning_rate": 0.000197631084789831, "loss": 3.5503, "step": 183 }, { "epoch": 0.03, "learning_rate": 0.0001976021955799509, "loss": 3.4639, "step": 184 }, { "epoch": 0.03, "learning_rate": 0.0001975733063700708, "loss": 3.6222, "step": 185 }, { "epoch": 0.03, "learning_rate": 0.0001975444171601907, "loss": 3.6222, "step": 186 }, { "epoch": 0.03, "learning_rate": 0.00019751552795031055, "loss": 3.5586, "step": 187 }, { "epoch": 0.03, "learning_rate": 0.00019748663874043045, "loss": 3.353, "step": 188 }, { "epoch": 0.03, "learning_rate": 0.00019745774953055034, "loss": 3.5714, "step": 189 }, { "epoch": 0.03, "learning_rate": 0.00019742886032067023, "loss": 3.5703, "step": 190 }, { "epoch": 0.03, "learning_rate": 0.00019739997111079013, "loss": 3.4602, "step": 191 }, { "epoch": 0.03, "learning_rate": 0.00019737108190091002, "loss": 3.5076, "step": 192 }, { "epoch": 0.03, "learning_rate": 0.00019734219269102992, "loss": 3.6046, "step": 193 }, { "epoch": 0.03, "learning_rate": 0.0001973133034811498, "loss": 3.4482, "step": 194 }, { "epoch": 0.03, "learning_rate": 0.0001972844142712697, "loss": 3.4445, "step": 195 }, { "epoch": 0.03, "learning_rate": 0.00019725552506138957, "loss": 3.5888, "step": 196 }, { "epoch": 0.03, "learning_rate": 0.00019722663585150946, "loss": 3.2793, "step": 197 }, { "epoch": 0.03, "learning_rate": 0.00019719774664162936, "loss": 3.4957, "step": 198 }, { "epoch": 0.03, "learning_rate": 0.00019716885743174925, "loss": 3.6092, "step": 199 }, { "epoch": 0.03, "learning_rate": 0.00019713996822186912, "loss": 3.5575, "step": 200 }, { "epoch": 0.03, "learning_rate": 0.00019711107901198904, "loss": 3.4428, "step": 201 }, { "epoch": 0.03, "learning_rate": 0.00019708218980210893, "loss": 3.4696, "step": 202 }, { "epoch": 0.03, "learning_rate": 0.00019705330059222883, "loss": 3.6291, "step": 203 }, { "epoch": 0.03, "learning_rate": 0.0001970244113823487, "loss": 3.5864, "step": 204 }, { "epoch": 0.03, "learning_rate": 0.0001969955221724686, "loss": 3.6399, "step": 205 }, { "epoch": 0.03, "learning_rate": 0.00019696663296258848, "loss": 3.547, "step": 206 }, { "epoch": 0.03, "learning_rate": 0.00019693774375270838, "loss": 3.5143, "step": 207 }, { "epoch": 0.03, "learning_rate": 0.00019690885454282827, "loss": 3.4557, "step": 208 }, { "epoch": 0.03, "learning_rate": 0.00019687996533294814, "loss": 3.4584, "step": 209 }, { "epoch": 0.03, "learning_rate": 0.00019685107612306803, "loss": 3.4184, "step": 210 }, { "epoch": 0.03, "learning_rate": 0.00019682218691318795, "loss": 3.5372, "step": 211 }, { "epoch": 0.03, "learning_rate": 0.00019679329770330784, "loss": 3.4767, "step": 212 }, { "epoch": 0.03, "learning_rate": 0.0001967644084934277, "loss": 3.5187, "step": 213 }, { "epoch": 0.03, "learning_rate": 0.0001967355192835476, "loss": 3.5174, "step": 214 }, { "epoch": 0.03, "learning_rate": 0.0001967066300736675, "loss": 3.5481, "step": 215 }, { "epoch": 0.03, "learning_rate": 0.0001966777408637874, "loss": 3.5149, "step": 216 }, { "epoch": 0.03, "learning_rate": 0.0001966488516539073, "loss": 3.5833, "step": 217 }, { "epoch": 0.03, "learning_rate": 0.00019661996244402715, "loss": 3.3972, "step": 218 }, { "epoch": 0.03, "learning_rate": 0.00019659107323414705, "loss": 3.5472, "step": 219 }, { "epoch": 0.03, "learning_rate": 0.00019656218402426694, "loss": 3.5609, "step": 220 }, { "epoch": 0.03, "learning_rate": 0.00019653329481438683, "loss": 3.6299, "step": 221 }, { "epoch": 0.03, "learning_rate": 0.00019650440560450673, "loss": 3.4379, "step": 222 }, { "epoch": 0.03, "learning_rate": 0.00019647551639462662, "loss": 3.5986, "step": 223 }, { "epoch": 0.03, "learning_rate": 0.00019644662718474652, "loss": 3.5838, "step": 224 }, { "epoch": 0.03, "learning_rate": 0.0001964177379748664, "loss": 3.4134, "step": 225 }, { "epoch": 0.03, "learning_rate": 0.00019638884876498628, "loss": 3.3992, "step": 226 }, { "epoch": 0.03, "learning_rate": 0.00019635995955510617, "loss": 3.5541, "step": 227 }, { "epoch": 0.03, "learning_rate": 0.00019633107034522606, "loss": 3.5869, "step": 228 }, { "epoch": 0.03, "learning_rate": 0.00019630218113534596, "loss": 3.5119, "step": 229 }, { "epoch": 0.03, "learning_rate": 0.00019627329192546585, "loss": 3.4892, "step": 230 }, { "epoch": 0.03, "learning_rate": 0.00019624440271558572, "loss": 3.6, "step": 231 }, { "epoch": 0.03, "learning_rate": 0.00019621551350570564, "loss": 3.4632, "step": 232 }, { "epoch": 0.03, "learning_rate": 0.00019618662429582553, "loss": 3.5046, "step": 233 }, { "epoch": 0.03, "learning_rate": 0.00019615773508594543, "loss": 3.5646, "step": 234 }, { "epoch": 0.03, "learning_rate": 0.0001961288458760653, "loss": 3.5583, "step": 235 }, { "epoch": 0.03, "learning_rate": 0.0001960999566661852, "loss": 3.5467, "step": 236 }, { "epoch": 0.03, "learning_rate": 0.00019607106745630508, "loss": 3.3647, "step": 237 }, { "epoch": 0.03, "learning_rate": 0.00019604217824642498, "loss": 3.6019, "step": 238 }, { "epoch": 0.03, "learning_rate": 0.00019601328903654484, "loss": 3.3073, "step": 239 }, { "epoch": 0.03, "learning_rate": 0.00019598439982666474, "loss": 3.5884, "step": 240 }, { "epoch": 0.03, "learning_rate": 0.00019595551061678463, "loss": 3.509, "step": 241 }, { "epoch": 0.03, "learning_rate": 0.00019592662140690452, "loss": 3.3664, "step": 242 }, { "epoch": 0.03, "learning_rate": 0.00019589773219702442, "loss": 3.5424, "step": 243 }, { "epoch": 0.03, "learning_rate": 0.0001958688429871443, "loss": 3.3911, "step": 244 }, { "epoch": 0.03, "learning_rate": 0.0001958399537772642, "loss": 3.4583, "step": 245 }, { "epoch": 0.04, "learning_rate": 0.0001958110645673841, "loss": 3.5823, "step": 246 }, { "epoch": 0.04, "learning_rate": 0.000195782175357504, "loss": 3.5662, "step": 247 }, { "epoch": 0.04, "learning_rate": 0.00019575328614762386, "loss": 3.472, "step": 248 }, { "epoch": 0.04, "learning_rate": 0.00019572439693774375, "loss": 3.5061, "step": 249 }, { "epoch": 0.04, "learning_rate": 0.00019569550772786365, "loss": 3.5083, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.00019566661851798354, "loss": 3.4507, "step": 251 }, { "epoch": 0.04, "learning_rate": 0.00019563772930810344, "loss": 3.4526, "step": 252 }, { "epoch": 0.04, "learning_rate": 0.00019560884009822333, "loss": 3.5103, "step": 253 }, { "epoch": 0.04, "learning_rate": 0.00019557995088834322, "loss": 3.5585, "step": 254 }, { "epoch": 0.04, "learning_rate": 0.00019555106167846312, "loss": 3.505, "step": 255 }, { "epoch": 0.04, "learning_rate": 0.000195522172468583, "loss": 3.4499, "step": 256 }, { "epoch": 0.04, "learning_rate": 0.00019549328325870288, "loss": 3.3568, "step": 257 }, { "epoch": 0.04, "learning_rate": 0.00019546439404882277, "loss": 3.7057, "step": 258 }, { "epoch": 0.04, "learning_rate": 0.00019543550483894266, "loss": 3.517, "step": 259 }, { "epoch": 0.04, "learning_rate": 0.00019540661562906256, "loss": 3.4745, "step": 260 }, { "epoch": 0.04, "learning_rate": 0.00019537772641918243, "loss": 3.4541, "step": 261 }, { "epoch": 0.04, "learning_rate": 0.00019534883720930232, "loss": 3.5701, "step": 262 }, { "epoch": 0.04, "learning_rate": 0.0001953199479994222, "loss": 3.6835, "step": 263 }, { "epoch": 0.04, "learning_rate": 0.00019529105878954213, "loss": 3.5116, "step": 264 }, { "epoch": 0.04, "learning_rate": 0.000195262169579662, "loss": 3.3648, "step": 265 }, { "epoch": 0.04, "learning_rate": 0.0001952332803697819, "loss": 3.3722, "step": 266 }, { "epoch": 0.04, "learning_rate": 0.0001952043911599018, "loss": 3.3931, "step": 267 }, { "epoch": 0.04, "learning_rate": 0.00019517550195002168, "loss": 3.5807, "step": 268 }, { "epoch": 0.04, "learning_rate": 0.00019514661274014158, "loss": 3.5416, "step": 269 }, { "epoch": 0.04, "learning_rate": 0.00019511772353026144, "loss": 3.582, "step": 270 }, { "epoch": 0.04, "learning_rate": 0.00019508883432038134, "loss": 3.4652, "step": 271 }, { "epoch": 0.04, "learning_rate": 0.00019505994511050123, "loss": 3.5732, "step": 272 }, { "epoch": 0.04, "learning_rate": 0.00019503105590062112, "loss": 3.5297, "step": 273 }, { "epoch": 0.04, "learning_rate": 0.00019500216669074102, "loss": 3.7003, "step": 274 }, { "epoch": 0.04, "learning_rate": 0.0001949732774808609, "loss": 3.5983, "step": 275 }, { "epoch": 0.04, "learning_rate": 0.0001949443882709808, "loss": 3.6118, "step": 276 }, { "epoch": 0.04, "learning_rate": 0.0001949154990611007, "loss": 3.4791, "step": 277 }, { "epoch": 0.04, "learning_rate": 0.0001948866098512206, "loss": 3.6391, "step": 278 }, { "epoch": 0.04, "learning_rate": 0.00019485772064134046, "loss": 3.5959, "step": 279 }, { "epoch": 0.04, "learning_rate": 0.00019482883143146035, "loss": 3.5101, "step": 280 }, { "epoch": 0.04, "learning_rate": 0.00019479994222158025, "loss": 3.5986, "step": 281 }, { "epoch": 0.04, "learning_rate": 0.00019477105301170014, "loss": 3.5414, "step": 282 }, { "epoch": 0.04, "learning_rate": 0.00019474216380182, "loss": 3.6653, "step": 283 }, { "epoch": 0.04, "learning_rate": 0.0001947132745919399, "loss": 3.5027, "step": 284 }, { "epoch": 0.04, "learning_rate": 0.00019468438538205982, "loss": 3.4249, "step": 285 }, { "epoch": 0.04, "learning_rate": 0.00019465549617217972, "loss": 3.4433, "step": 286 }, { "epoch": 0.04, "learning_rate": 0.00019462660696229958, "loss": 3.6223, "step": 287 }, { "epoch": 0.04, "learning_rate": 0.00019459771775241948, "loss": 3.5467, "step": 288 }, { "epoch": 0.04, "learning_rate": 0.00019456882854253937, "loss": 3.61, "step": 289 }, { "epoch": 0.04, "learning_rate": 0.00019453993933265926, "loss": 3.6137, "step": 290 }, { "epoch": 0.04, "learning_rate": 0.00019451105012277916, "loss": 3.5088, "step": 291 }, { "epoch": 0.04, "learning_rate": 0.00019448216091289903, "loss": 3.5359, "step": 292 }, { "epoch": 0.04, "learning_rate": 0.00019445327170301892, "loss": 3.5458, "step": 293 }, { "epoch": 0.04, "learning_rate": 0.0001944243824931388, "loss": 3.5681, "step": 294 }, { "epoch": 0.04, "learning_rate": 0.00019439549328325873, "loss": 3.4299, "step": 295 }, { "epoch": 0.04, "learning_rate": 0.0001943666040733786, "loss": 3.5176, "step": 296 }, { "epoch": 0.04, "learning_rate": 0.0001943377148634985, "loss": 3.641, "step": 297 }, { "epoch": 0.04, "learning_rate": 0.0001943088256536184, "loss": 3.4167, "step": 298 }, { "epoch": 0.04, "learning_rate": 0.00019427993644373828, "loss": 3.4261, "step": 299 }, { "epoch": 0.04, "learning_rate": 0.00019425104723385818, "loss": 3.4559, "step": 300 }, { "epoch": 0.04, "learning_rate": 0.00019422215802397804, "loss": 3.5327, "step": 301 }, { "epoch": 0.04, "learning_rate": 0.00019419326881409794, "loss": 3.4398, "step": 302 }, { "epoch": 0.04, "learning_rate": 0.00019416437960421783, "loss": 3.5033, "step": 303 }, { "epoch": 0.04, "learning_rate": 0.00019413549039433772, "loss": 3.6118, "step": 304 }, { "epoch": 0.04, "learning_rate": 0.0001941066011844576, "loss": 3.4967, "step": 305 }, { "epoch": 0.04, "learning_rate": 0.0001940777119745775, "loss": 3.4717, "step": 306 }, { "epoch": 0.04, "learning_rate": 0.0001940488227646974, "loss": 3.355, "step": 307 }, { "epoch": 0.04, "learning_rate": 0.0001940199335548173, "loss": 3.5887, "step": 308 }, { "epoch": 0.04, "learning_rate": 0.00019399104434493717, "loss": 3.6378, "step": 309 }, { "epoch": 0.04, "learning_rate": 0.00019396215513505706, "loss": 3.5147, "step": 310 }, { "epoch": 0.04, "learning_rate": 0.00019393326592517695, "loss": 3.4631, "step": 311 }, { "epoch": 0.04, "learning_rate": 0.00019390437671529685, "loss": 3.5992, "step": 312 }, { "epoch": 0.04, "learning_rate": 0.00019387548750541674, "loss": 3.5586, "step": 313 }, { "epoch": 0.04, "learning_rate": 0.0001938465982955366, "loss": 3.5399, "step": 314 }, { "epoch": 0.04, "learning_rate": 0.0001938177090856565, "loss": 3.4912, "step": 315 }, { "epoch": 0.04, "learning_rate": 0.00019378881987577642, "loss": 3.6064, "step": 316 }, { "epoch": 0.05, "learning_rate": 0.00019375993066589632, "loss": 3.7006, "step": 317 }, { "epoch": 0.05, "learning_rate": 0.00019373104145601618, "loss": 3.5503, "step": 318 }, { "epoch": 0.05, "learning_rate": 0.00019370215224613608, "loss": 3.5618, "step": 319 }, { "epoch": 0.05, "learning_rate": 0.00019367326303625597, "loss": 3.651, "step": 320 }, { "epoch": 0.05, "learning_rate": 0.00019364437382637587, "loss": 3.5181, "step": 321 }, { "epoch": 0.05, "learning_rate": 0.00019361548461649576, "loss": 3.706, "step": 322 }, { "epoch": 0.05, "learning_rate": 0.00019358659540661563, "loss": 3.601, "step": 323 }, { "epoch": 0.05, "learning_rate": 0.00019355770619673552, "loss": 3.5891, "step": 324 }, { "epoch": 0.05, "learning_rate": 0.0001935288169868554, "loss": 3.3504, "step": 325 }, { "epoch": 0.05, "learning_rate": 0.0001934999277769753, "loss": 3.4464, "step": 326 }, { "epoch": 0.05, "learning_rate": 0.0001934710385670952, "loss": 3.5102, "step": 327 }, { "epoch": 0.05, "learning_rate": 0.0001934421493572151, "loss": 3.5796, "step": 328 }, { "epoch": 0.05, "learning_rate": 0.000193413260147335, "loss": 3.5751, "step": 329 }, { "epoch": 0.05, "learning_rate": 0.00019338437093745488, "loss": 3.4911, "step": 330 }, { "epoch": 0.05, "learning_rate": 0.00019335548172757475, "loss": 3.5161, "step": 331 }, { "epoch": 0.05, "learning_rate": 0.00019332659251769464, "loss": 3.4412, "step": 332 }, { "epoch": 0.05, "learning_rate": 0.00019329770330781454, "loss": 3.4896, "step": 333 }, { "epoch": 0.05, "learning_rate": 0.00019326881409793443, "loss": 3.5323, "step": 334 }, { "epoch": 0.05, "learning_rate": 0.00019323992488805432, "loss": 3.633, "step": 335 }, { "epoch": 0.05, "learning_rate": 0.0001932110356781742, "loss": 3.5541, "step": 336 }, { "epoch": 0.05, "learning_rate": 0.0001931821464682941, "loss": 3.5204, "step": 337 }, { "epoch": 0.05, "learning_rate": 0.000193153257258414, "loss": 3.7043, "step": 338 }, { "epoch": 0.05, "learning_rate": 0.0001931243680485339, "loss": 3.5533, "step": 339 }, { "epoch": 0.05, "learning_rate": 0.00019309547883865377, "loss": 3.5269, "step": 340 }, { "epoch": 0.05, "learning_rate": 0.00019306658962877366, "loss": 3.4716, "step": 341 }, { "epoch": 0.05, "learning_rate": 0.00019303770041889355, "loss": 3.4324, "step": 342 }, { "epoch": 0.05, "learning_rate": 0.00019300881120901345, "loss": 3.5459, "step": 343 }, { "epoch": 0.05, "learning_rate": 0.00019297992199913334, "loss": 3.543, "step": 344 }, { "epoch": 0.05, "learning_rate": 0.0001929510327892532, "loss": 3.5318, "step": 345 }, { "epoch": 0.05, "learning_rate": 0.0001929221435793731, "loss": 3.6708, "step": 346 }, { "epoch": 0.05, "learning_rate": 0.000192893254369493, "loss": 3.5583, "step": 347 }, { "epoch": 0.05, "learning_rate": 0.0001928643651596129, "loss": 3.5955, "step": 348 }, { "epoch": 0.05, "learning_rate": 0.00019283547594973278, "loss": 3.4893, "step": 349 }, { "epoch": 0.05, "learning_rate": 0.00019280658673985268, "loss": 3.6199, "step": 350 }, { "epoch": 0.05, "learning_rate": 0.00019277769752997257, "loss": 3.4667, "step": 351 }, { "epoch": 0.05, "learning_rate": 0.00019274880832009247, "loss": 3.4689, "step": 352 }, { "epoch": 0.05, "eval_loss": 3.59875750541687, "eval_runtime": 472.4811, "eval_samples_per_second": 43.36, "eval_steps_per_second": 14.453, "step": 352 }, { "epoch": 0.05, "learning_rate": 0.00019271991911021233, "loss": 3.5819, "step": 353 }, { "epoch": 0.05, "learning_rate": 0.00019269102990033223, "loss": 3.5459, "step": 354 }, { "epoch": 0.05, "learning_rate": 0.00019266214069045212, "loss": 3.5412, "step": 355 }, { "epoch": 0.05, "learning_rate": 0.000192633251480572, "loss": 3.5603, "step": 356 }, { "epoch": 0.05, "learning_rate": 0.0001926043622706919, "loss": 3.3498, "step": 357 }, { "epoch": 0.05, "learning_rate": 0.0001925754730608118, "loss": 3.2999, "step": 358 }, { "epoch": 0.05, "learning_rate": 0.0001925465838509317, "loss": 3.5822, "step": 359 }, { "epoch": 0.05, "learning_rate": 0.0001925176946410516, "loss": 3.583, "step": 360 }, { "epoch": 0.05, "learning_rate": 0.00019248880543117148, "loss": 3.6066, "step": 361 }, { "epoch": 0.05, "learning_rate": 0.00019245991622129135, "loss": 3.6497, "step": 362 }, { "epoch": 0.05, "learning_rate": 0.00019243102701141124, "loss": 3.551, "step": 363 }, { "epoch": 0.05, "learning_rate": 0.00019240213780153114, "loss": 3.3119, "step": 364 }, { "epoch": 0.05, "learning_rate": 0.00019237324859165103, "loss": 3.5062, "step": 365 }, { "epoch": 0.05, "learning_rate": 0.0001923443593817709, "loss": 3.5501, "step": 366 }, { "epoch": 0.05, "learning_rate": 0.0001923154701718908, "loss": 3.5859, "step": 367 }, { "epoch": 0.05, "learning_rate": 0.00019228658096201069, "loss": 3.5105, "step": 368 }, { "epoch": 0.05, "learning_rate": 0.0001922576917521306, "loss": 3.5117, "step": 369 }, { "epoch": 0.05, "learning_rate": 0.00019222880254225047, "loss": 3.5607, "step": 370 }, { "epoch": 0.05, "learning_rate": 0.00019219991333237037, "loss": 3.5971, "step": 371 }, { "epoch": 0.05, "learning_rate": 0.00019217102412249026, "loss": 3.573, "step": 372 }, { "epoch": 0.05, "learning_rate": 0.00019214213491261015, "loss": 3.5984, "step": 373 }, { "epoch": 0.05, "learning_rate": 0.00019211324570273005, "loss": 3.4534, "step": 374 }, { "epoch": 0.05, "learning_rate": 0.00019208435649284991, "loss": 3.5523, "step": 375 }, { "epoch": 0.05, "learning_rate": 0.0001920554672829698, "loss": 3.5324, "step": 376 }, { "epoch": 0.05, "learning_rate": 0.0001920265780730897, "loss": 3.5271, "step": 377 }, { "epoch": 0.05, "learning_rate": 0.0001919976888632096, "loss": 3.5481, "step": 378 }, { "epoch": 0.05, "learning_rate": 0.0001919687996533295, "loss": 3.5117, "step": 379 }, { "epoch": 0.05, "learning_rate": 0.00019193991044344938, "loss": 3.3971, "step": 380 }, { "epoch": 0.05, "learning_rate": 0.00019191102123356928, "loss": 3.5115, "step": 381 }, { "epoch": 0.05, "learning_rate": 0.00019188213202368917, "loss": 3.51, "step": 382 }, { "epoch": 0.05, "learning_rate": 0.00019185324281380907, "loss": 3.6974, "step": 383 }, { "epoch": 0.05, "learning_rate": 0.00019182435360392893, "loss": 3.5188, "step": 384 }, { "epoch": 0.05, "learning_rate": 0.00019179546439404883, "loss": 3.5516, "step": 385 }, { "epoch": 0.05, "learning_rate": 0.00019176657518416872, "loss": 3.6164, "step": 386 }, { "epoch": 0.06, "learning_rate": 0.00019173768597428861, "loss": 3.6524, "step": 387 }, { "epoch": 0.06, "learning_rate": 0.00019170879676440848, "loss": 3.6024, "step": 388 }, { "epoch": 0.06, "learning_rate": 0.00019167990755452837, "loss": 3.4035, "step": 389 }, { "epoch": 0.06, "learning_rate": 0.0001916510183446483, "loss": 3.3601, "step": 390 }, { "epoch": 0.06, "learning_rate": 0.0001916221291347682, "loss": 3.5517, "step": 391 }, { "epoch": 0.06, "learning_rate": 0.00019159323992488806, "loss": 3.4734, "step": 392 }, { "epoch": 0.06, "learning_rate": 0.00019156435071500795, "loss": 3.4639, "step": 393 }, { "epoch": 0.06, "learning_rate": 0.00019153546150512784, "loss": 3.4812, "step": 394 }, { "epoch": 0.06, "learning_rate": 0.00019150657229524774, "loss": 3.7109, "step": 395 }, { "epoch": 0.06, "learning_rate": 0.00019147768308536763, "loss": 3.4681, "step": 396 }, { "epoch": 0.06, "learning_rate": 0.0001914487938754875, "loss": 3.6094, "step": 397 }, { "epoch": 0.06, "learning_rate": 0.0001914199046656074, "loss": 3.5162, "step": 398 }, { "epoch": 0.06, "learning_rate": 0.00019139101545572729, "loss": 3.4222, "step": 399 }, { "epoch": 0.06, "learning_rate": 0.0001913621262458472, "loss": 3.577, "step": 400 }, { "epoch": 0.06, "learning_rate": 0.00019133323703596707, "loss": 3.6015, "step": 401 }, { "epoch": 0.06, "learning_rate": 0.00019130434782608697, "loss": 3.6354, "step": 402 }, { "epoch": 0.06, "learning_rate": 0.00019127545861620686, "loss": 3.4482, "step": 403 }, { "epoch": 0.06, "learning_rate": 0.00019124656940632675, "loss": 3.5481, "step": 404 }, { "epoch": 0.06, "learning_rate": 0.00019121768019644665, "loss": 3.4145, "step": 405 }, { "epoch": 0.06, "learning_rate": 0.00019118879098656652, "loss": 3.365, "step": 406 }, { "epoch": 0.06, "learning_rate": 0.0001911599017766864, "loss": 3.5916, "step": 407 }, { "epoch": 0.06, "learning_rate": 0.0001911310125668063, "loss": 3.6518, "step": 408 }, { "epoch": 0.06, "learning_rate": 0.0001911021233569262, "loss": 3.6107, "step": 409 }, { "epoch": 0.06, "learning_rate": 0.0001910732341470461, "loss": 3.549, "step": 410 }, { "epoch": 0.06, "learning_rate": 0.00019104434493716598, "loss": 3.6271, "step": 411 }, { "epoch": 0.06, "learning_rate": 0.00019101545572728588, "loss": 3.5452, "step": 412 }, { "epoch": 0.06, "learning_rate": 0.00019098656651740577, "loss": 3.6264, "step": 413 }, { "epoch": 0.06, "learning_rate": 0.00019095767730752564, "loss": 3.5031, "step": 414 }, { "epoch": 0.06, "learning_rate": 0.00019092878809764553, "loss": 3.5594, "step": 415 }, { "epoch": 0.06, "learning_rate": 0.00019089989888776543, "loss": 3.3923, "step": 416 }, { "epoch": 0.06, "learning_rate": 0.00019087100967788532, "loss": 3.3375, "step": 417 }, { "epoch": 0.06, "learning_rate": 0.00019084212046800521, "loss": 3.5472, "step": 418 }, { "epoch": 0.06, "learning_rate": 0.00019081323125812508, "loss": 3.6317, "step": 419 }, { "epoch": 0.06, "learning_rate": 0.00019078434204824497, "loss": 3.6466, "step": 420 }, { "epoch": 0.06, "learning_rate": 0.0001907554528383649, "loss": 3.5929, "step": 421 }, { "epoch": 0.06, "learning_rate": 0.0001907265636284848, "loss": 3.4864, "step": 422 }, { "epoch": 0.06, "learning_rate": 0.00019069767441860466, "loss": 3.5866, "step": 423 }, { "epoch": 0.06, "learning_rate": 0.00019066878520872455, "loss": 3.3665, "step": 424 }, { "epoch": 0.06, "learning_rate": 0.00019063989599884444, "loss": 3.5244, "step": 425 }, { "epoch": 0.06, "learning_rate": 0.00019061100678896434, "loss": 3.3427, "step": 426 }, { "epoch": 0.06, "learning_rate": 0.00019058211757908423, "loss": 3.6314, "step": 427 }, { "epoch": 0.06, "learning_rate": 0.0001905532283692041, "loss": 3.5961, "step": 428 }, { "epoch": 0.06, "learning_rate": 0.000190524339159324, "loss": 3.6028, "step": 429 }, { "epoch": 0.06, "learning_rate": 0.00019049544994944389, "loss": 3.565, "step": 430 }, { "epoch": 0.06, "learning_rate": 0.0001904665607395638, "loss": 3.6294, "step": 431 }, { "epoch": 0.06, "learning_rate": 0.00019043767152968367, "loss": 3.5588, "step": 432 }, { "epoch": 0.06, "learning_rate": 0.00019040878231980357, "loss": 3.5346, "step": 433 }, { "epoch": 0.06, "learning_rate": 0.00019037989310992346, "loss": 3.5715, "step": 434 }, { "epoch": 0.06, "learning_rate": 0.00019035100390004335, "loss": 3.6058, "step": 435 }, { "epoch": 0.06, "learning_rate": 0.00019032211469016322, "loss": 3.6087, "step": 436 }, { "epoch": 0.06, "learning_rate": 0.00019029322548028312, "loss": 3.5346, "step": 437 }, { "epoch": 0.06, "learning_rate": 0.000190264336270403, "loss": 3.5482, "step": 438 }, { "epoch": 0.06, "learning_rate": 0.0001902354470605229, "loss": 3.553, "step": 439 }, { "epoch": 0.06, "learning_rate": 0.0001902065578506428, "loss": 3.5424, "step": 440 }, { "epoch": 0.06, "learning_rate": 0.00019017766864076266, "loss": 3.3804, "step": 441 }, { "epoch": 0.06, "learning_rate": 0.00019014877943088258, "loss": 3.5441, "step": 442 }, { "epoch": 0.06, "learning_rate": 0.00019011989022100248, "loss": 3.5132, "step": 443 }, { "epoch": 0.06, "learning_rate": 0.00019009100101112237, "loss": 3.5497, "step": 444 }, { "epoch": 0.06, "learning_rate": 0.00019006211180124224, "loss": 3.5957, "step": 445 }, { "epoch": 0.06, "learning_rate": 0.00019003322259136213, "loss": 3.5656, "step": 446 }, { "epoch": 0.06, "learning_rate": 0.00019000433338148203, "loss": 3.5914, "step": 447 }, { "epoch": 0.06, "learning_rate": 0.00018997544417160192, "loss": 3.5543, "step": 448 }, { "epoch": 0.06, "learning_rate": 0.00018994655496172181, "loss": 3.6238, "step": 449 }, { "epoch": 0.06, "learning_rate": 0.00018991766575184168, "loss": 3.4488, "step": 450 }, { "epoch": 0.06, "learning_rate": 0.00018988877654196157, "loss": 3.5148, "step": 451 }, { "epoch": 0.06, "learning_rate": 0.0001898598873320815, "loss": 3.6172, "step": 452 }, { "epoch": 0.06, "learning_rate": 0.00018983099812220136, "loss": 3.5025, "step": 453 }, { "epoch": 0.06, "learning_rate": 0.00018980210891232126, "loss": 3.4935, "step": 454 }, { "epoch": 0.06, "learning_rate": 0.00018977321970244115, "loss": 3.4364, "step": 455 }, { "epoch": 0.06, "learning_rate": 0.00018974433049256104, "loss": 3.5489, "step": 456 }, { "epoch": 0.07, "learning_rate": 0.00018971544128268094, "loss": 3.5546, "step": 457 }, { "epoch": 0.07, "learning_rate": 0.0001896865520728008, "loss": 3.4535, "step": 458 }, { "epoch": 0.07, "learning_rate": 0.0001896576628629207, "loss": 3.545, "step": 459 }, { "epoch": 0.07, "learning_rate": 0.0001896287736530406, "loss": 3.5972, "step": 460 }, { "epoch": 0.07, "learning_rate": 0.00018959988444316049, "loss": 3.6178, "step": 461 }, { "epoch": 0.07, "learning_rate": 0.00018957099523328038, "loss": 3.3621, "step": 462 }, { "epoch": 0.07, "learning_rate": 0.00018954210602340027, "loss": 3.4738, "step": 463 }, { "epoch": 0.07, "learning_rate": 0.00018951321681352017, "loss": 3.6158, "step": 464 }, { "epoch": 0.07, "learning_rate": 0.00018948432760364006, "loss": 3.4559, "step": 465 }, { "epoch": 0.07, "learning_rate": 0.00018945543839375995, "loss": 3.4635, "step": 466 }, { "epoch": 0.07, "learning_rate": 0.00018942654918387982, "loss": 3.7056, "step": 467 }, { "epoch": 0.07, "learning_rate": 0.00018939765997399972, "loss": 3.5761, "step": 468 }, { "epoch": 0.07, "learning_rate": 0.0001893687707641196, "loss": 3.537, "step": 469 }, { "epoch": 0.07, "learning_rate": 0.0001893398815542395, "loss": 3.6607, "step": 470 }, { "epoch": 0.07, "learning_rate": 0.0001893109923443594, "loss": 3.4395, "step": 471 }, { "epoch": 0.07, "learning_rate": 0.00018928210313447926, "loss": 3.4993, "step": 472 }, { "epoch": 0.07, "learning_rate": 0.00018925321392459918, "loss": 3.4373, "step": 473 }, { "epoch": 0.07, "learning_rate": 0.00018922432471471908, "loss": 3.5638, "step": 474 }, { "epoch": 0.07, "learning_rate": 0.00018919543550483895, "loss": 3.5201, "step": 475 }, { "epoch": 0.07, "learning_rate": 0.00018916654629495884, "loss": 3.3825, "step": 476 }, { "epoch": 0.07, "learning_rate": 0.00018913765708507873, "loss": 3.4802, "step": 477 }, { "epoch": 0.07, "learning_rate": 0.00018910876787519863, "loss": 3.4747, "step": 478 }, { "epoch": 0.07, "learning_rate": 0.00018907987866531852, "loss": 3.4511, "step": 479 }, { "epoch": 0.07, "learning_rate": 0.0001890509894554384, "loss": 3.5461, "step": 480 }, { "epoch": 0.07, "learning_rate": 0.00018902210024555828, "loss": 3.455, "step": 481 }, { "epoch": 0.07, "learning_rate": 0.00018899321103567817, "loss": 3.5123, "step": 482 }, { "epoch": 0.07, "learning_rate": 0.00018896432182579807, "loss": 3.5906, "step": 483 }, { "epoch": 0.07, "learning_rate": 0.00018893543261591796, "loss": 3.421, "step": 484 }, { "epoch": 0.07, "learning_rate": 0.00018890654340603786, "loss": 3.5119, "step": 485 }, { "epoch": 0.07, "learning_rate": 0.00018887765419615775, "loss": 3.6598, "step": 486 }, { "epoch": 0.07, "learning_rate": 0.00018884876498627764, "loss": 3.5187, "step": 487 }, { "epoch": 0.07, "learning_rate": 0.00018881987577639754, "loss": 3.5036, "step": 488 }, { "epoch": 0.07, "learning_rate": 0.0001887909865665174, "loss": 3.6146, "step": 489 }, { "epoch": 0.07, "learning_rate": 0.0001887620973566373, "loss": 3.6127, "step": 490 }, { "epoch": 0.07, "learning_rate": 0.0001887332081467572, "loss": 3.494, "step": 491 }, { "epoch": 0.07, "learning_rate": 0.00018870431893687709, "loss": 3.5811, "step": 492 }, { "epoch": 0.07, "learning_rate": 0.00018867542972699695, "loss": 3.6014, "step": 493 }, { "epoch": 0.07, "learning_rate": 0.00018864654051711687, "loss": 3.4939, "step": 494 }, { "epoch": 0.07, "learning_rate": 0.00018861765130723677, "loss": 3.5964, "step": 495 }, { "epoch": 0.07, "learning_rate": 0.00018858876209735666, "loss": 3.5323, "step": 496 }, { "epoch": 0.07, "learning_rate": 0.00018855987288747653, "loss": 3.5668, "step": 497 }, { "epoch": 0.07, "learning_rate": 0.00018853098367759642, "loss": 3.5236, "step": 498 }, { "epoch": 0.07, "learning_rate": 0.00018850209446771632, "loss": 3.6417, "step": 499 }, { "epoch": 0.07, "learning_rate": 0.0001884732052578362, "loss": 3.7174, "step": 500 }, { "epoch": 0.07, "learning_rate": 0.0001884443160479561, "loss": 3.4265, "step": 501 }, { "epoch": 0.07, "learning_rate": 0.00018841542683807597, "loss": 3.4595, "step": 502 }, { "epoch": 0.07, "learning_rate": 0.00018838653762819586, "loss": 3.3742, "step": 503 }, { "epoch": 0.07, "learning_rate": 0.00018835764841831576, "loss": 3.6812, "step": 504 }, { "epoch": 0.07, "learning_rate": 0.00018832875920843568, "loss": 3.64, "step": 505 }, { "epoch": 0.07, "learning_rate": 0.00018829986999855555, "loss": 3.4502, "step": 506 }, { "epoch": 0.07, "learning_rate": 0.00018827098078867544, "loss": 3.4054, "step": 507 }, { "epoch": 0.07, "learning_rate": 0.00018824209157879533, "loss": 3.4928, "step": 508 }, { "epoch": 0.07, "learning_rate": 0.00018821320236891523, "loss": 3.4226, "step": 509 }, { "epoch": 0.07, "learning_rate": 0.00018818431315903512, "loss": 3.5288, "step": 510 }, { "epoch": 0.07, "learning_rate": 0.000188155423949155, "loss": 3.5659, "step": 511 }, { "epoch": 0.07, "learning_rate": 0.00018812653473927488, "loss": 3.5009, "step": 512 }, { "epoch": 0.07, "learning_rate": 0.00018809764552939477, "loss": 3.4401, "step": 513 }, { "epoch": 0.07, "learning_rate": 0.00018806875631951467, "loss": 3.5149, "step": 514 }, { "epoch": 0.07, "learning_rate": 0.00018803986710963456, "loss": 3.5233, "step": 515 }, { "epoch": 0.07, "learning_rate": 0.00018801097789975446, "loss": 3.4313, "step": 516 }, { "epoch": 0.07, "learning_rate": 0.00018798208868987435, "loss": 3.5574, "step": 517 }, { "epoch": 0.07, "learning_rate": 0.00018795319947999424, "loss": 3.5375, "step": 518 }, { "epoch": 0.07, "learning_rate": 0.0001879243102701141, "loss": 3.5875, "step": 519 }, { "epoch": 0.07, "learning_rate": 0.000187895421060234, "loss": 3.4169, "step": 520 }, { "epoch": 0.07, "learning_rate": 0.0001878665318503539, "loss": 3.4563, "step": 521 }, { "epoch": 0.07, "learning_rate": 0.0001878376426404738, "loss": 3.3797, "step": 522 }, { "epoch": 0.07, "learning_rate": 0.00018780875343059369, "loss": 3.4075, "step": 523 }, { "epoch": 0.07, "learning_rate": 0.00018777986422071355, "loss": 3.5773, "step": 524 }, { "epoch": 0.07, "learning_rate": 0.00018775097501083345, "loss": 3.5523, "step": 525 }, { "epoch": 0.07, "learning_rate": 0.00018772208580095337, "loss": 3.5288, "step": 526 }, { "epoch": 0.08, "learning_rate": 0.00018769319659107326, "loss": 3.6048, "step": 527 }, { "epoch": 0.08, "learning_rate": 0.00018766430738119313, "loss": 3.56, "step": 528 }, { "epoch": 0.08, "learning_rate": 0.00018763541817131302, "loss": 3.5448, "step": 529 }, { "epoch": 0.08, "learning_rate": 0.00018760652896143292, "loss": 3.6165, "step": 530 }, { "epoch": 0.08, "learning_rate": 0.0001875776397515528, "loss": 3.495, "step": 531 }, { "epoch": 0.08, "learning_rate": 0.0001875487505416727, "loss": 3.6955, "step": 532 }, { "epoch": 0.08, "learning_rate": 0.00018751986133179257, "loss": 3.4293, "step": 533 }, { "epoch": 0.08, "learning_rate": 0.00018749097212191246, "loss": 3.4176, "step": 534 }, { "epoch": 0.08, "learning_rate": 0.00018746208291203236, "loss": 3.5641, "step": 535 }, { "epoch": 0.08, "learning_rate": 0.00018743319370215228, "loss": 3.6007, "step": 536 }, { "epoch": 0.08, "learning_rate": 0.00018740430449227215, "loss": 3.4111, "step": 537 }, { "epoch": 0.08, "learning_rate": 0.00018737541528239204, "loss": 3.456, "step": 538 }, { "epoch": 0.08, "learning_rate": 0.00018734652607251193, "loss": 3.507, "step": 539 }, { "epoch": 0.08, "learning_rate": 0.00018731763686263183, "loss": 3.6267, "step": 540 }, { "epoch": 0.08, "learning_rate": 0.0001872887476527517, "loss": 3.5538, "step": 541 }, { "epoch": 0.08, "learning_rate": 0.0001872598584428716, "loss": 3.4112, "step": 542 }, { "epoch": 0.08, "learning_rate": 0.00018723096923299148, "loss": 3.5022, "step": 543 }, { "epoch": 0.08, "learning_rate": 0.00018720208002311138, "loss": 3.6393, "step": 544 }, { "epoch": 0.08, "learning_rate": 0.00018717319081323127, "loss": 3.6233, "step": 545 }, { "epoch": 0.08, "learning_rate": 0.00018714430160335114, "loss": 3.651, "step": 546 }, { "epoch": 0.08, "learning_rate": 0.00018711541239347106, "loss": 3.6567, "step": 547 }, { "epoch": 0.08, "learning_rate": 0.00018708652318359095, "loss": 3.3348, "step": 548 }, { "epoch": 0.08, "learning_rate": 0.00018705763397371084, "loss": 3.5277, "step": 549 }, { "epoch": 0.08, "learning_rate": 0.0001870287447638307, "loss": 3.4866, "step": 550 }, { "epoch": 0.08, "learning_rate": 0.0001869998555539506, "loss": 3.5642, "step": 551 }, { "epoch": 0.08, "learning_rate": 0.0001869709663440705, "loss": 3.4392, "step": 552 }, { "epoch": 0.08, "learning_rate": 0.0001869420771341904, "loss": 3.6856, "step": 553 }, { "epoch": 0.08, "learning_rate": 0.00018691318792431029, "loss": 3.4941, "step": 554 }, { "epoch": 0.08, "learning_rate": 0.00018688429871443015, "loss": 3.5972, "step": 555 }, { "epoch": 0.08, "learning_rate": 0.00018685540950455005, "loss": 3.5312, "step": 556 }, { "epoch": 0.08, "learning_rate": 0.00018682652029466997, "loss": 3.5929, "step": 557 }, { "epoch": 0.08, "learning_rate": 0.00018679763108478986, "loss": 3.6768, "step": 558 }, { "epoch": 0.08, "learning_rate": 0.00018676874187490973, "loss": 3.5386, "step": 559 }, { "epoch": 0.08, "learning_rate": 0.00018673985266502962, "loss": 3.4734, "step": 560 }, { "epoch": 0.08, "learning_rate": 0.00018671096345514952, "loss": 3.5429, "step": 561 }, { "epoch": 0.08, "learning_rate": 0.0001866820742452694, "loss": 3.4172, "step": 562 }, { "epoch": 0.08, "learning_rate": 0.00018665318503538928, "loss": 3.5298, "step": 563 }, { "epoch": 0.08, "learning_rate": 0.00018662429582550917, "loss": 3.547, "step": 564 }, { "epoch": 0.08, "learning_rate": 0.00018659540661562906, "loss": 3.4804, "step": 565 }, { "epoch": 0.08, "learning_rate": 0.00018656651740574896, "loss": 3.5119, "step": 566 }, { "epoch": 0.08, "learning_rate": 0.00018653762819586885, "loss": 3.5192, "step": 567 }, { "epoch": 0.08, "learning_rate": 0.00018650873898598875, "loss": 3.5618, "step": 568 }, { "epoch": 0.08, "learning_rate": 0.00018647984977610864, "loss": 3.5257, "step": 569 }, { "epoch": 0.08, "learning_rate": 0.00018645096056622853, "loss": 3.5611, "step": 570 }, { "epoch": 0.08, "learning_rate": 0.00018642207135634843, "loss": 3.3794, "step": 571 }, { "epoch": 0.08, "learning_rate": 0.0001863931821464683, "loss": 3.4684, "step": 572 }, { "epoch": 0.08, "learning_rate": 0.0001863642929365882, "loss": 3.5756, "step": 573 }, { "epoch": 0.08, "learning_rate": 0.00018633540372670808, "loss": 3.6312, "step": 574 }, { "epoch": 0.08, "learning_rate": 0.00018630651451682798, "loss": 3.447, "step": 575 }, { "epoch": 0.08, "learning_rate": 0.00018627762530694787, "loss": 3.5265, "step": 576 }, { "epoch": 0.08, "learning_rate": 0.00018624873609706774, "loss": 3.5244, "step": 577 }, { "epoch": 0.08, "learning_rate": 0.00018621984688718766, "loss": 3.5743, "step": 578 }, { "epoch": 0.08, "learning_rate": 0.00018619095767730755, "loss": 3.4801, "step": 579 }, { "epoch": 0.08, "learning_rate": 0.00018616206846742742, "loss": 3.5325, "step": 580 }, { "epoch": 0.08, "learning_rate": 0.0001861331792575473, "loss": 3.603, "step": 581 }, { "epoch": 0.08, "learning_rate": 0.0001861042900476672, "loss": 3.4021, "step": 582 }, { "epoch": 0.08, "learning_rate": 0.0001860754008377871, "loss": 3.5223, "step": 583 }, { "epoch": 0.08, "learning_rate": 0.000186046511627907, "loss": 3.5119, "step": 584 }, { "epoch": 0.08, "learning_rate": 0.00018601762241802686, "loss": 3.4831, "step": 585 }, { "epoch": 0.08, "learning_rate": 0.00018598873320814675, "loss": 3.4985, "step": 586 }, { "epoch": 0.08, "learning_rate": 0.00018595984399826665, "loss": 3.6217, "step": 587 }, { "epoch": 0.08, "learning_rate": 0.00018593095478838654, "loss": 3.5672, "step": 588 }, { "epoch": 0.08, "learning_rate": 0.00018590206557850643, "loss": 3.4158, "step": 589 }, { "epoch": 0.08, "learning_rate": 0.00018587317636862633, "loss": 3.5823, "step": 590 }, { "epoch": 0.08, "learning_rate": 0.00018584428715874622, "loss": 3.5354, "step": 591 }, { "epoch": 0.08, "learning_rate": 0.00018581539794886612, "loss": 3.479, "step": 592 }, { "epoch": 0.08, "learning_rate": 0.000185786508738986, "loss": 3.4195, "step": 593 }, { "epoch": 0.08, "learning_rate": 0.00018575761952910588, "loss": 3.6027, "step": 594 }, { "epoch": 0.08, "learning_rate": 0.00018572873031922577, "loss": 3.6142, "step": 595 }, { "epoch": 0.08, "learning_rate": 0.00018569984110934566, "loss": 3.5468, "step": 596 }, { "epoch": 0.09, "learning_rate": 0.00018567095189946556, "loss": 3.504, "step": 597 }, { "epoch": 0.09, "learning_rate": 0.00018564206268958545, "loss": 3.5126, "step": 598 }, { "epoch": 0.09, "learning_rate": 0.00018561317347970535, "loss": 3.4528, "step": 599 }, { "epoch": 0.09, "learning_rate": 0.00018558428426982524, "loss": 3.3295, "step": 600 }, { "epoch": 0.09, "learning_rate": 0.00018555539505994513, "loss": 3.5697, "step": 601 }, { "epoch": 0.09, "learning_rate": 0.000185526505850065, "loss": 3.5069, "step": 602 }, { "epoch": 0.09, "learning_rate": 0.0001854976166401849, "loss": 3.5449, "step": 603 }, { "epoch": 0.09, "learning_rate": 0.0001854687274303048, "loss": 3.4998, "step": 604 }, { "epoch": 0.09, "learning_rate": 0.00018543983822042468, "loss": 3.2614, "step": 605 }, { "epoch": 0.09, "learning_rate": 0.00018541094901054458, "loss": 3.5806, "step": 606 }, { "epoch": 0.09, "learning_rate": 0.00018538205980066444, "loss": 3.568, "step": 607 }, { "epoch": 0.09, "learning_rate": 0.00018535317059078434, "loss": 3.6575, "step": 608 }, { "epoch": 0.09, "learning_rate": 0.00018532428138090426, "loss": 3.4329, "step": 609 }, { "epoch": 0.09, "learning_rate": 0.00018529539217102415, "loss": 3.5881, "step": 610 }, { "epoch": 0.09, "learning_rate": 0.00018526650296114402, "loss": 3.4916, "step": 611 }, { "epoch": 0.09, "learning_rate": 0.0001852376137512639, "loss": 3.5186, "step": 612 }, { "epoch": 0.09, "learning_rate": 0.0001852087245413838, "loss": 3.528, "step": 613 }, { "epoch": 0.09, "learning_rate": 0.0001851798353315037, "loss": 3.4139, "step": 614 }, { "epoch": 0.09, "learning_rate": 0.0001851509461216236, "loss": 3.5184, "step": 615 }, { "epoch": 0.09, "learning_rate": 0.00018512205691174346, "loss": 3.5681, "step": 616 }, { "epoch": 0.09, "learning_rate": 0.00018509316770186335, "loss": 3.5774, "step": 617 }, { "epoch": 0.09, "learning_rate": 0.00018506427849198325, "loss": 3.5222, "step": 618 }, { "epoch": 0.09, "learning_rate": 0.00018503538928210314, "loss": 3.561, "step": 619 }, { "epoch": 0.09, "learning_rate": 0.00018500650007222303, "loss": 3.4017, "step": 620 }, { "epoch": 0.09, "learning_rate": 0.00018497761086234293, "loss": 3.4531, "step": 621 }, { "epoch": 0.09, "learning_rate": 0.00018494872165246282, "loss": 3.5615, "step": 622 }, { "epoch": 0.09, "learning_rate": 0.00018491983244258272, "loss": 3.5878, "step": 623 }, { "epoch": 0.09, "learning_rate": 0.00018489094323270258, "loss": 3.6159, "step": 624 }, { "epoch": 0.09, "learning_rate": 0.00018486205402282248, "loss": 3.496, "step": 625 }, { "epoch": 0.09, "learning_rate": 0.00018483316481294237, "loss": 3.4758, "step": 626 }, { "epoch": 0.09, "learning_rate": 0.00018480427560306226, "loss": 3.4669, "step": 627 }, { "epoch": 0.09, "learning_rate": 0.00018477538639318216, "loss": 3.6198, "step": 628 }, { "epoch": 0.09, "learning_rate": 0.00018474649718330203, "loss": 3.5373, "step": 629 }, { "epoch": 0.09, "learning_rate": 0.00018471760797342195, "loss": 3.5283, "step": 630 }, { "epoch": 0.09, "learning_rate": 0.00018468871876354184, "loss": 3.5688, "step": 631 }, { "epoch": 0.09, "learning_rate": 0.00018465982955366173, "loss": 3.439, "step": 632 }, { "epoch": 0.09, "learning_rate": 0.0001846309403437816, "loss": 3.4053, "step": 633 }, { "epoch": 0.09, "learning_rate": 0.0001846020511339015, "loss": 3.3194, "step": 634 }, { "epoch": 0.09, "learning_rate": 0.0001845731619240214, "loss": 3.4613, "step": 635 }, { "epoch": 0.09, "learning_rate": 0.00018454427271414128, "loss": 3.5199, "step": 636 }, { "epoch": 0.09, "learning_rate": 0.00018451538350426118, "loss": 3.5138, "step": 637 }, { "epoch": 0.09, "learning_rate": 0.00018448649429438104, "loss": 3.5518, "step": 638 }, { "epoch": 0.09, "learning_rate": 0.00018445760508450094, "loss": 3.5416, "step": 639 }, { "epoch": 0.09, "learning_rate": 0.00018442871587462083, "loss": 3.4615, "step": 640 }, { "epoch": 0.09, "learning_rate": 0.00018439982666474075, "loss": 3.638, "step": 641 }, { "epoch": 0.09, "learning_rate": 0.00018437093745486062, "loss": 3.5444, "step": 642 }, { "epoch": 0.09, "learning_rate": 0.0001843420482449805, "loss": 3.638, "step": 643 }, { "epoch": 0.09, "learning_rate": 0.0001843131590351004, "loss": 3.451, "step": 644 }, { "epoch": 0.09, "learning_rate": 0.0001842842698252203, "loss": 3.5095, "step": 645 }, { "epoch": 0.09, "learning_rate": 0.00018425538061534017, "loss": 3.6475, "step": 646 }, { "epoch": 0.09, "learning_rate": 0.00018422649140546006, "loss": 3.5567, "step": 647 }, { "epoch": 0.09, "learning_rate": 0.00018419760219557995, "loss": 3.4693, "step": 648 }, { "epoch": 0.09, "learning_rate": 0.00018416871298569985, "loss": 3.6224, "step": 649 }, { "epoch": 0.09, "learning_rate": 0.00018413982377581974, "loss": 3.4074, "step": 650 }, { "epoch": 0.09, "learning_rate": 0.00018411093456593963, "loss": 3.4079, "step": 651 }, { "epoch": 0.09, "learning_rate": 0.00018408204535605953, "loss": 3.6761, "step": 652 }, { "epoch": 0.09, "learning_rate": 0.00018405315614617942, "loss": 3.4546, "step": 653 }, { "epoch": 0.09, "learning_rate": 0.00018402426693629932, "loss": 3.4474, "step": 654 }, { "epoch": 0.09, "learning_rate": 0.00018399537772641918, "loss": 3.5165, "step": 655 }, { "epoch": 0.09, "learning_rate": 0.00018396648851653908, "loss": 3.5854, "step": 656 }, { "epoch": 0.09, "learning_rate": 0.00018393759930665897, "loss": 3.5001, "step": 657 }, { "epoch": 0.09, "learning_rate": 0.00018390871009677886, "loss": 3.5853, "step": 658 }, { "epoch": 0.09, "learning_rate": 0.00018387982088689876, "loss": 3.5338, "step": 659 }, { "epoch": 0.09, "learning_rate": 0.00018385093167701863, "loss": 3.5332, "step": 660 }, { "epoch": 0.09, "learning_rate": 0.00018382204246713852, "loss": 3.5204, "step": 661 }, { "epoch": 0.09, "learning_rate": 0.00018379315325725844, "loss": 3.4821, "step": 662 }, { "epoch": 0.09, "learning_rate": 0.00018376426404737833, "loss": 3.4963, "step": 663 }, { "epoch": 0.09, "learning_rate": 0.0001837353748374982, "loss": 3.5454, "step": 664 }, { "epoch": 0.09, "learning_rate": 0.0001837064856276181, "loss": 3.5903, "step": 665 }, { "epoch": 0.09, "learning_rate": 0.000183677596417738, "loss": 3.5212, "step": 666 }, { "epoch": 0.09, "learning_rate": 0.00018364870720785788, "loss": 3.5175, "step": 667 }, { "epoch": 0.1, "learning_rate": 0.00018361981799797775, "loss": 3.5781, "step": 668 }, { "epoch": 0.1, "learning_rate": 0.00018359092878809764, "loss": 3.4063, "step": 669 }, { "epoch": 0.1, "learning_rate": 0.00018356203957821754, "loss": 3.3629, "step": 670 }, { "epoch": 0.1, "learning_rate": 0.00018353315036833743, "loss": 3.512, "step": 671 }, { "epoch": 0.1, "learning_rate": 0.00018350426115845732, "loss": 3.5793, "step": 672 }, { "epoch": 0.1, "learning_rate": 0.00018347537194857722, "loss": 3.4232, "step": 673 }, { "epoch": 0.1, "learning_rate": 0.0001834464827386971, "loss": 3.608, "step": 674 }, { "epoch": 0.1, "learning_rate": 0.000183417593528817, "loss": 3.4862, "step": 675 }, { "epoch": 0.1, "learning_rate": 0.0001833887043189369, "loss": 3.5062, "step": 676 }, { "epoch": 0.1, "learning_rate": 0.00018335981510905677, "loss": 3.5194, "step": 677 }, { "epoch": 0.1, "learning_rate": 0.00018333092589917666, "loss": 3.4268, "step": 678 }, { "epoch": 0.1, "learning_rate": 0.00018330203668929655, "loss": 3.4015, "step": 679 }, { "epoch": 0.1, "learning_rate": 0.00018327314747941645, "loss": 3.5977, "step": 680 }, { "epoch": 0.1, "learning_rate": 0.00018324425826953634, "loss": 3.4977, "step": 681 }, { "epoch": 0.1, "learning_rate": 0.0001832153690596562, "loss": 3.5338, "step": 682 }, { "epoch": 0.1, "learning_rate": 0.00018318647984977613, "loss": 3.5607, "step": 683 }, { "epoch": 0.1, "learning_rate": 0.00018315759063989602, "loss": 3.562, "step": 684 }, { "epoch": 0.1, "learning_rate": 0.00018312870143001592, "loss": 3.5139, "step": 685 }, { "epoch": 0.1, "learning_rate": 0.00018309981222013578, "loss": 3.5853, "step": 686 }, { "epoch": 0.1, "learning_rate": 0.00018307092301025568, "loss": 3.5065, "step": 687 }, { "epoch": 0.1, "learning_rate": 0.00018304203380037557, "loss": 3.5055, "step": 688 }, { "epoch": 0.1, "learning_rate": 0.00018301314459049546, "loss": 3.494, "step": 689 }, { "epoch": 0.1, "learning_rate": 0.00018298425538061533, "loss": 3.563, "step": 690 }, { "epoch": 0.1, "learning_rate": 0.00018295536617073523, "loss": 3.5116, "step": 691 }, { "epoch": 0.1, "learning_rate": 0.00018292647696085512, "loss": 3.5446, "step": 692 }, { "epoch": 0.1, "learning_rate": 0.00018289758775097504, "loss": 3.4678, "step": 693 }, { "epoch": 0.1, "learning_rate": 0.0001828686985410949, "loss": 3.5063, "step": 694 }, { "epoch": 0.1, "learning_rate": 0.0001828398093312148, "loss": 3.49, "step": 695 }, { "epoch": 0.1, "learning_rate": 0.0001828109201213347, "loss": 3.5029, "step": 696 }, { "epoch": 0.1, "learning_rate": 0.0001827820309114546, "loss": 3.504, "step": 697 }, { "epoch": 0.1, "learning_rate": 0.00018275314170157448, "loss": 3.5638, "step": 698 }, { "epoch": 0.1, "learning_rate": 0.00018272425249169435, "loss": 3.5541, "step": 699 }, { "epoch": 0.1, "learning_rate": 0.00018269536328181424, "loss": 3.5315, "step": 700 }, { "epoch": 0.1, "learning_rate": 0.00018266647407193414, "loss": 3.5832, "step": 701 }, { "epoch": 0.1, "learning_rate": 0.00018263758486205403, "loss": 3.5319, "step": 702 }, { "epoch": 0.1, "learning_rate": 0.00018260869565217392, "loss": 3.5295, "step": 703 }, { "epoch": 0.1, "learning_rate": 0.00018257980644229382, "loss": 3.519, "step": 704 }, { "epoch": 0.1, "eval_loss": 3.620858669281006, "eval_runtime": 471.8255, "eval_samples_per_second": 43.421, "eval_steps_per_second": 14.474, "step": 704 }, { "epoch": 0.1, "learning_rate": 0.0001825509172324137, "loss": 3.4535, "step": 705 }, { "epoch": 0.1, "learning_rate": 0.0001825220280225336, "loss": 3.33, "step": 706 }, { "epoch": 0.1, "learning_rate": 0.00018249313881265347, "loss": 3.4858, "step": 707 }, { "epoch": 0.1, "learning_rate": 0.00018246424960277337, "loss": 3.4598, "step": 708 }, { "epoch": 0.1, "learning_rate": 0.00018243536039289326, "loss": 3.5521, "step": 709 }, { "epoch": 0.1, "learning_rate": 0.00018240647118301315, "loss": 3.5254, "step": 710 }, { "epoch": 0.1, "learning_rate": 0.00018237758197313305, "loss": 3.5396, "step": 711 }, { "epoch": 0.1, "learning_rate": 0.00018234869276325291, "loss": 3.5268, "step": 712 }, { "epoch": 0.1, "learning_rate": 0.0001823198035533728, "loss": 3.6661, "step": 713 }, { "epoch": 0.1, "learning_rate": 0.00018229091434349273, "loss": 3.5742, "step": 714 }, { "epoch": 0.1, "learning_rate": 0.00018226202513361262, "loss": 3.6033, "step": 715 }, { "epoch": 0.1, "learning_rate": 0.0001822331359237325, "loss": 3.5636, "step": 716 }, { "epoch": 0.1, "learning_rate": 0.00018220424671385238, "loss": 3.5835, "step": 717 }, { "epoch": 0.1, "learning_rate": 0.00018217535750397228, "loss": 3.5446, "step": 718 }, { "epoch": 0.1, "learning_rate": 0.00018214646829409217, "loss": 3.5002, "step": 719 }, { "epoch": 0.1, "learning_rate": 0.00018211757908421206, "loss": 3.3511, "step": 720 }, { "epoch": 0.1, "learning_rate": 0.00018208868987433193, "loss": 3.4461, "step": 721 }, { "epoch": 0.1, "learning_rate": 0.00018205980066445183, "loss": 3.4288, "step": 722 }, { "epoch": 0.1, "learning_rate": 0.00018203091145457172, "loss": 3.4757, "step": 723 }, { "epoch": 0.1, "learning_rate": 0.0001820020222446916, "loss": 3.4678, "step": 724 }, { "epoch": 0.1, "learning_rate": 0.0001819731330348115, "loss": 3.3662, "step": 725 }, { "epoch": 0.1, "learning_rate": 0.0001819442438249314, "loss": 3.6651, "step": 726 }, { "epoch": 0.1, "learning_rate": 0.0001819153546150513, "loss": 3.5741, "step": 727 }, { "epoch": 0.1, "learning_rate": 0.0001818864654051712, "loss": 3.636, "step": 728 }, { "epoch": 0.1, "learning_rate": 0.00018185757619529106, "loss": 3.5109, "step": 729 }, { "epoch": 0.1, "learning_rate": 0.00018182868698541095, "loss": 3.4351, "step": 730 }, { "epoch": 0.1, "learning_rate": 0.00018179979777553084, "loss": 3.495, "step": 731 }, { "epoch": 0.1, "learning_rate": 0.00018177090856565074, "loss": 3.5606, "step": 732 }, { "epoch": 0.1, "learning_rate": 0.00018174201935577063, "loss": 3.511, "step": 733 }, { "epoch": 0.1, "learning_rate": 0.0001817131301458905, "loss": 3.4717, "step": 734 }, { "epoch": 0.1, "learning_rate": 0.00018168424093601042, "loss": 3.5888, "step": 735 }, { "epoch": 0.1, "learning_rate": 0.0001816553517261303, "loss": 3.3341, "step": 736 }, { "epoch": 0.1, "learning_rate": 0.0001816264625162502, "loss": 3.4462, "step": 737 }, { "epoch": 0.11, "learning_rate": 0.00018159757330637007, "loss": 3.5978, "step": 738 }, { "epoch": 0.11, "learning_rate": 0.00018156868409648997, "loss": 3.5568, "step": 739 }, { "epoch": 0.11, "learning_rate": 0.00018153979488660986, "loss": 3.4909, "step": 740 }, { "epoch": 0.11, "learning_rate": 0.00018151090567672975, "loss": 3.4277, "step": 741 }, { "epoch": 0.11, "learning_rate": 0.00018148201646684965, "loss": 3.5154, "step": 742 }, { "epoch": 0.11, "learning_rate": 0.00018145312725696951, "loss": 3.5742, "step": 743 }, { "epoch": 0.11, "learning_rate": 0.0001814242380470894, "loss": 3.4974, "step": 744 }, { "epoch": 0.11, "learning_rate": 0.0001813953488372093, "loss": 3.4369, "step": 745 }, { "epoch": 0.11, "learning_rate": 0.00018136645962732922, "loss": 3.5458, "step": 746 }, { "epoch": 0.11, "learning_rate": 0.0001813375704174491, "loss": 3.5238, "step": 747 }, { "epoch": 0.11, "learning_rate": 0.00018130868120756898, "loss": 3.4968, "step": 748 }, { "epoch": 0.11, "learning_rate": 0.00018127979199768888, "loss": 3.5419, "step": 749 }, { "epoch": 0.11, "learning_rate": 0.00018125090278780877, "loss": 3.3761, "step": 750 }, { "epoch": 0.11, "learning_rate": 0.00018122201357792864, "loss": 3.6181, "step": 751 }, { "epoch": 0.11, "learning_rate": 0.00018119312436804853, "loss": 3.5131, "step": 752 }, { "epoch": 0.11, "learning_rate": 0.00018116423515816843, "loss": 3.4547, "step": 753 }, { "epoch": 0.11, "learning_rate": 0.00018113534594828832, "loss": 3.5306, "step": 754 }, { "epoch": 0.11, "learning_rate": 0.0001811064567384082, "loss": 3.5011, "step": 755 }, { "epoch": 0.11, "learning_rate": 0.0001810775675285281, "loss": 3.5871, "step": 756 }, { "epoch": 0.11, "learning_rate": 0.000181048678318648, "loss": 3.4811, "step": 757 }, { "epoch": 0.11, "learning_rate": 0.0001810197891087679, "loss": 3.544, "step": 758 }, { "epoch": 0.11, "learning_rate": 0.0001809908998988878, "loss": 3.5499, "step": 759 }, { "epoch": 0.11, "learning_rate": 0.00018096201068900766, "loss": 3.6015, "step": 760 }, { "epoch": 0.11, "learning_rate": 0.00018093312147912755, "loss": 3.4873, "step": 761 }, { "epoch": 0.11, "learning_rate": 0.00018090423226924744, "loss": 3.5266, "step": 762 }, { "epoch": 0.11, "learning_rate": 0.00018087534305936734, "loss": 3.5291, "step": 763 }, { "epoch": 0.11, "learning_rate": 0.00018084645384948723, "loss": 3.465, "step": 764 }, { "epoch": 0.11, "learning_rate": 0.0001808175646396071, "loss": 3.6005, "step": 765 }, { "epoch": 0.11, "learning_rate": 0.000180788675429727, "loss": 3.5141, "step": 766 }, { "epoch": 0.11, "learning_rate": 0.0001807597862198469, "loss": 3.5216, "step": 767 }, { "epoch": 0.11, "learning_rate": 0.0001807308970099668, "loss": 3.4973, "step": 768 }, { "epoch": 0.11, "learning_rate": 0.00018070200780008667, "loss": 3.3215, "step": 769 }, { "epoch": 0.11, "learning_rate": 0.00018067311859020657, "loss": 3.4744, "step": 770 }, { "epoch": 0.11, "learning_rate": 0.00018064422938032646, "loss": 3.5823, "step": 771 }, { "epoch": 0.11, "learning_rate": 0.00018061534017044635, "loss": 3.5777, "step": 772 }, { "epoch": 0.11, "learning_rate": 0.00018058645096056622, "loss": 3.506, "step": 773 }, { "epoch": 0.11, "learning_rate": 0.00018055756175068611, "loss": 3.4742, "step": 774 }, { "epoch": 0.11, "learning_rate": 0.000180528672540806, "loss": 3.5748, "step": 775 }, { "epoch": 0.11, "learning_rate": 0.0001804997833309259, "loss": 3.6195, "step": 776 }, { "epoch": 0.11, "learning_rate": 0.0001804708941210458, "loss": 3.3161, "step": 777 }, { "epoch": 0.11, "learning_rate": 0.0001804420049111657, "loss": 3.5649, "step": 778 }, { "epoch": 0.11, "learning_rate": 0.00018041311570128558, "loss": 3.5283, "step": 779 }, { "epoch": 0.11, "learning_rate": 0.00018038422649140548, "loss": 3.6222, "step": 780 }, { "epoch": 0.11, "learning_rate": 0.00018035533728152537, "loss": 3.5523, "step": 781 }, { "epoch": 0.11, "learning_rate": 0.00018032644807164524, "loss": 3.5396, "step": 782 }, { "epoch": 0.11, "learning_rate": 0.00018029755886176513, "loss": 3.5446, "step": 783 }, { "epoch": 0.11, "learning_rate": 0.00018026866965188503, "loss": 3.6128, "step": 784 }, { "epoch": 0.11, "learning_rate": 0.00018023978044200492, "loss": 3.4507, "step": 785 }, { "epoch": 0.11, "learning_rate": 0.0001802108912321248, "loss": 3.4899, "step": 786 }, { "epoch": 0.11, "learning_rate": 0.00018018200202224468, "loss": 3.6131, "step": 787 }, { "epoch": 0.11, "learning_rate": 0.0001801531128123646, "loss": 3.5835, "step": 788 }, { "epoch": 0.11, "learning_rate": 0.0001801242236024845, "loss": 3.5657, "step": 789 }, { "epoch": 0.11, "learning_rate": 0.0001800953343926044, "loss": 3.5208, "step": 790 }, { "epoch": 0.11, "learning_rate": 0.00018006644518272426, "loss": 3.3454, "step": 791 }, { "epoch": 0.11, "learning_rate": 0.00018003755597284415, "loss": 3.5561, "step": 792 }, { "epoch": 0.11, "learning_rate": 0.00018000866676296404, "loss": 3.5146, "step": 793 }, { "epoch": 0.11, "learning_rate": 0.00017997977755308394, "loss": 3.5223, "step": 794 }, { "epoch": 0.11, "learning_rate": 0.0001799508883432038, "loss": 3.4998, "step": 795 }, { "epoch": 0.11, "learning_rate": 0.0001799219991333237, "loss": 3.3141, "step": 796 }, { "epoch": 0.11, "learning_rate": 0.0001798931099234436, "loss": 3.3507, "step": 797 }, { "epoch": 0.11, "learning_rate": 0.0001798642207135635, "loss": 3.6048, "step": 798 }, { "epoch": 0.11, "learning_rate": 0.00017983533150368338, "loss": 3.6106, "step": 799 }, { "epoch": 0.11, "learning_rate": 0.00017980644229380327, "loss": 3.4502, "step": 800 }, { "epoch": 0.11, "learning_rate": 0.00017977755308392317, "loss": 3.514, "step": 801 }, { "epoch": 0.11, "learning_rate": 0.00017974866387404306, "loss": 3.6228, "step": 802 }, { "epoch": 0.11, "learning_rate": 0.00017971977466416295, "loss": 3.5165, "step": 803 }, { "epoch": 0.11, "learning_rate": 0.00017969088545428282, "loss": 3.4641, "step": 804 }, { "epoch": 0.11, "learning_rate": 0.00017966199624440271, "loss": 3.5434, "step": 805 }, { "epoch": 0.11, "learning_rate": 0.0001796331070345226, "loss": 3.5272, "step": 806 }, { "epoch": 0.11, "learning_rate": 0.0001796042178246425, "loss": 3.4221, "step": 807 }, { "epoch": 0.12, "learning_rate": 0.0001795753286147624, "loss": 3.4645, "step": 808 }, { "epoch": 0.12, "learning_rate": 0.0001795464394048823, "loss": 3.6072, "step": 809 }, { "epoch": 0.12, "learning_rate": 0.00017951755019500218, "loss": 3.6087, "step": 810 }, { "epoch": 0.12, "learning_rate": 0.00017948866098512208, "loss": 3.5988, "step": 811 }, { "epoch": 0.12, "learning_rate": 0.00017945977177524197, "loss": 3.6459, "step": 812 }, { "epoch": 0.12, "learning_rate": 0.00017943088256536184, "loss": 3.4854, "step": 813 }, { "epoch": 0.12, "learning_rate": 0.00017940199335548173, "loss": 3.4805, "step": 814 }, { "epoch": 0.12, "learning_rate": 0.00017937310414560163, "loss": 3.435, "step": 815 }, { "epoch": 0.12, "learning_rate": 0.00017934421493572152, "loss": 3.5022, "step": 816 }, { "epoch": 0.12, "learning_rate": 0.0001793153257258414, "loss": 3.6624, "step": 817 }, { "epoch": 0.12, "learning_rate": 0.00017928643651596128, "loss": 3.5145, "step": 818 }, { "epoch": 0.12, "learning_rate": 0.0001792575473060812, "loss": 3.4939, "step": 819 }, { "epoch": 0.12, "learning_rate": 0.0001792286580962011, "loss": 3.4377, "step": 820 }, { "epoch": 0.12, "learning_rate": 0.00017919976888632096, "loss": 3.5569, "step": 821 }, { "epoch": 0.12, "learning_rate": 0.00017917087967644086, "loss": 3.5119, "step": 822 }, { "epoch": 0.12, "learning_rate": 0.00017914199046656075, "loss": 3.5576, "step": 823 }, { "epoch": 0.12, "learning_rate": 0.00017911310125668064, "loss": 3.4825, "step": 824 }, { "epoch": 0.12, "learning_rate": 0.00017908421204680054, "loss": 3.5241, "step": 825 }, { "epoch": 0.12, "learning_rate": 0.0001790553228369204, "loss": 3.4232, "step": 826 }, { "epoch": 0.12, "learning_rate": 0.0001790264336270403, "loss": 3.429, "step": 827 }, { "epoch": 0.12, "learning_rate": 0.0001789975444171602, "loss": 3.5378, "step": 828 }, { "epoch": 0.12, "learning_rate": 0.0001789686552072801, "loss": 3.604, "step": 829 }, { "epoch": 0.12, "learning_rate": 0.00017893976599739998, "loss": 3.6098, "step": 830 }, { "epoch": 0.12, "learning_rate": 0.00017891087678751987, "loss": 3.4572, "step": 831 }, { "epoch": 0.12, "learning_rate": 0.00017888198757763977, "loss": 3.4124, "step": 832 }, { "epoch": 0.12, "learning_rate": 0.00017885309836775966, "loss": 3.5226, "step": 833 }, { "epoch": 0.12, "learning_rate": 0.00017882420915787953, "loss": 3.5553, "step": 834 }, { "epoch": 0.12, "learning_rate": 0.00017879531994799942, "loss": 3.5273, "step": 835 }, { "epoch": 0.12, "learning_rate": 0.00017876643073811932, "loss": 3.5051, "step": 836 }, { "epoch": 0.12, "learning_rate": 0.0001787375415282392, "loss": 3.5559, "step": 837 }, { "epoch": 0.12, "learning_rate": 0.0001787086523183591, "loss": 3.605, "step": 838 }, { "epoch": 0.12, "learning_rate": 0.00017867976310847897, "loss": 3.4009, "step": 839 }, { "epoch": 0.12, "learning_rate": 0.0001786508738985989, "loss": 3.4324, "step": 840 }, { "epoch": 0.12, "learning_rate": 0.00017862198468871878, "loss": 3.5615, "step": 841 }, { "epoch": 0.12, "learning_rate": 0.00017859309547883868, "loss": 3.458, "step": 842 }, { "epoch": 0.12, "learning_rate": 0.00017856420626895854, "loss": 3.4252, "step": 843 }, { "epoch": 0.12, "learning_rate": 0.00017853531705907844, "loss": 3.4661, "step": 844 }, { "epoch": 0.12, "learning_rate": 0.00017850642784919833, "loss": 3.4558, "step": 845 }, { "epoch": 0.12, "learning_rate": 0.00017847753863931823, "loss": 3.5175, "step": 846 }, { "epoch": 0.12, "learning_rate": 0.00017844864942943812, "loss": 3.5751, "step": 847 }, { "epoch": 0.12, "learning_rate": 0.000178419760219558, "loss": 3.5493, "step": 848 }, { "epoch": 0.12, "learning_rate": 0.00017839087100967788, "loss": 3.6275, "step": 849 }, { "epoch": 0.12, "learning_rate": 0.0001783619817997978, "loss": 3.5884, "step": 850 }, { "epoch": 0.12, "learning_rate": 0.0001783330925899177, "loss": 3.519, "step": 851 }, { "epoch": 0.12, "learning_rate": 0.00017830420338003756, "loss": 3.6041, "step": 852 }, { "epoch": 0.12, "learning_rate": 0.00017827531417015746, "loss": 3.4436, "step": 853 }, { "epoch": 0.12, "learning_rate": 0.00017824642496027735, "loss": 3.5615, "step": 854 }, { "epoch": 0.12, "learning_rate": 0.00017821753575039724, "loss": 3.4636, "step": 855 }, { "epoch": 0.12, "learning_rate": 0.0001781886465405171, "loss": 3.4988, "step": 856 }, { "epoch": 0.12, "learning_rate": 0.000178159757330637, "loss": 3.4776, "step": 857 }, { "epoch": 0.12, "learning_rate": 0.0001781308681207569, "loss": 3.5114, "step": 858 }, { "epoch": 0.12, "learning_rate": 0.0001781019789108768, "loss": 3.5242, "step": 859 }, { "epoch": 0.12, "learning_rate": 0.00017807308970099669, "loss": 3.3103, "step": 860 }, { "epoch": 0.12, "learning_rate": 0.00017804420049111658, "loss": 3.5707, "step": 861 }, { "epoch": 0.12, "learning_rate": 0.00017801531128123647, "loss": 3.5229, "step": 862 }, { "epoch": 0.12, "learning_rate": 0.00017798642207135637, "loss": 3.548, "step": 863 }, { "epoch": 0.12, "learning_rate": 0.00017795753286147626, "loss": 3.4077, "step": 864 }, { "epoch": 0.12, "learning_rate": 0.00017792864365159613, "loss": 3.5361, "step": 865 }, { "epoch": 0.12, "learning_rate": 0.00017789975444171602, "loss": 3.569, "step": 866 }, { "epoch": 0.12, "learning_rate": 0.00017787086523183592, "loss": 3.5805, "step": 867 }, { "epoch": 0.12, "learning_rate": 0.0001778419760219558, "loss": 3.5906, "step": 868 }, { "epoch": 0.12, "learning_rate": 0.0001778130868120757, "loss": 3.5841, "step": 869 }, { "epoch": 0.12, "learning_rate": 0.00017778419760219557, "loss": 3.3957, "step": 870 }, { "epoch": 0.12, "learning_rate": 0.0001777553083923155, "loss": 3.4768, "step": 871 }, { "epoch": 0.12, "learning_rate": 0.00017772641918243538, "loss": 3.4825, "step": 872 }, { "epoch": 0.12, "learning_rate": 0.00017769752997255528, "loss": 3.4059, "step": 873 }, { "epoch": 0.12, "learning_rate": 0.00017766864076267514, "loss": 3.4614, "step": 874 }, { "epoch": 0.12, "learning_rate": 0.00017763975155279504, "loss": 3.4827, "step": 875 }, { "epoch": 0.12, "learning_rate": 0.00017761086234291493, "loss": 3.559, "step": 876 }, { "epoch": 0.12, "learning_rate": 0.00017758197313303483, "loss": 3.5241, "step": 877 }, { "epoch": 0.13, "learning_rate": 0.0001775530839231547, "loss": 3.5867, "step": 878 }, { "epoch": 0.13, "learning_rate": 0.0001775241947132746, "loss": 3.5094, "step": 879 }, { "epoch": 0.13, "learning_rate": 0.00017749530550339448, "loss": 3.3131, "step": 880 }, { "epoch": 0.13, "learning_rate": 0.00017746641629351437, "loss": 3.4676, "step": 881 }, { "epoch": 0.13, "learning_rate": 0.00017743752708363427, "loss": 3.6489, "step": 882 }, { "epoch": 0.13, "learning_rate": 0.00017740863787375416, "loss": 3.5398, "step": 883 }, { "epoch": 0.13, "learning_rate": 0.00017737974866387406, "loss": 3.5125, "step": 884 }, { "epoch": 0.13, "learning_rate": 0.00017735085945399395, "loss": 3.5213, "step": 885 }, { "epoch": 0.13, "learning_rate": 0.00017732197024411384, "loss": 3.4799, "step": 886 }, { "epoch": 0.13, "learning_rate": 0.0001772930810342337, "loss": 3.4404, "step": 887 }, { "epoch": 0.13, "learning_rate": 0.0001772641918243536, "loss": 3.478, "step": 888 }, { "epoch": 0.13, "learning_rate": 0.0001772353026144735, "loss": 3.6084, "step": 889 }, { "epoch": 0.13, "learning_rate": 0.0001772064134045934, "loss": 3.5364, "step": 890 }, { "epoch": 0.13, "learning_rate": 0.00017717752419471329, "loss": 3.6162, "step": 891 }, { "epoch": 0.13, "learning_rate": 0.00017714863498483318, "loss": 3.4878, "step": 892 }, { "epoch": 0.13, "learning_rate": 0.00017711974577495307, "loss": 3.444, "step": 893 }, { "epoch": 0.13, "learning_rate": 0.00017709085656507297, "loss": 3.5809, "step": 894 }, { "epoch": 0.13, "learning_rate": 0.00017706196735519286, "loss": 3.5722, "step": 895 }, { "epoch": 0.13, "learning_rate": 0.00017703307814531273, "loss": 3.5038, "step": 896 }, { "epoch": 0.13, "learning_rate": 0.00017700418893543262, "loss": 3.5314, "step": 897 }, { "epoch": 0.13, "learning_rate": 0.00017697529972555252, "loss": 3.3688, "step": 898 }, { "epoch": 0.13, "learning_rate": 0.0001769464105156724, "loss": 3.6036, "step": 899 }, { "epoch": 0.13, "learning_rate": 0.00017691752130579228, "loss": 3.615, "step": 900 }, { "epoch": 0.13, "learning_rate": 0.00017688863209591217, "loss": 3.3827, "step": 901 }, { "epoch": 0.13, "learning_rate": 0.00017685974288603206, "loss": 3.538, "step": 902 }, { "epoch": 0.13, "learning_rate": 0.00017683085367615198, "loss": 3.4969, "step": 903 }, { "epoch": 0.13, "learning_rate": 0.00017680196446627185, "loss": 3.5288, "step": 904 }, { "epoch": 0.13, "learning_rate": 0.00017677307525639175, "loss": 3.6388, "step": 905 }, { "epoch": 0.13, "learning_rate": 0.00017674418604651164, "loss": 3.5413, "step": 906 }, { "epoch": 0.13, "learning_rate": 0.00017671529683663153, "loss": 3.5144, "step": 907 }, { "epoch": 0.13, "learning_rate": 0.00017668640762675143, "loss": 3.3827, "step": 908 }, { "epoch": 0.13, "learning_rate": 0.0001766575184168713, "loss": 3.4117, "step": 909 }, { "epoch": 0.13, "learning_rate": 0.0001766286292069912, "loss": 3.5603, "step": 910 }, { "epoch": 0.13, "learning_rate": 0.00017659973999711108, "loss": 3.446, "step": 911 }, { "epoch": 0.13, "learning_rate": 0.00017657085078723097, "loss": 3.5327, "step": 912 }, { "epoch": 0.13, "learning_rate": 0.00017654196157735087, "loss": 3.3939, "step": 913 }, { "epoch": 0.13, "learning_rate": 0.00017651307236747076, "loss": 3.3279, "step": 914 }, { "epoch": 0.13, "learning_rate": 0.00017648418315759066, "loss": 3.5229, "step": 915 }, { "epoch": 0.13, "learning_rate": 0.00017645529394771055, "loss": 3.4836, "step": 916 }, { "epoch": 0.13, "learning_rate": 0.00017642640473783044, "loss": 3.5151, "step": 917 }, { "epoch": 0.13, "learning_rate": 0.0001763975155279503, "loss": 3.5036, "step": 918 }, { "epoch": 0.13, "learning_rate": 0.0001763686263180702, "loss": 3.3613, "step": 919 }, { "epoch": 0.13, "learning_rate": 0.0001763397371081901, "loss": 3.416, "step": 920 }, { "epoch": 0.13, "learning_rate": 0.00017631084789831, "loss": 3.5282, "step": 921 }, { "epoch": 0.13, "learning_rate": 0.00017628195868842986, "loss": 3.4353, "step": 922 }, { "epoch": 0.13, "learning_rate": 0.00017625306947854975, "loss": 3.5206, "step": 923 }, { "epoch": 0.13, "learning_rate": 0.00017622418026866967, "loss": 3.5056, "step": 924 }, { "epoch": 0.13, "learning_rate": 0.00017619529105878957, "loss": 3.6753, "step": 925 }, { "epoch": 0.13, "learning_rate": 0.00017616640184890943, "loss": 3.5445, "step": 926 }, { "epoch": 0.13, "learning_rate": 0.00017613751263902933, "loss": 3.54, "step": 927 }, { "epoch": 0.13, "learning_rate": 0.00017610862342914922, "loss": 3.5076, "step": 928 }, { "epoch": 0.13, "learning_rate": 0.00017607973421926912, "loss": 3.341, "step": 929 }, { "epoch": 0.13, "learning_rate": 0.000176050845009389, "loss": 3.5414, "step": 930 }, { "epoch": 0.13, "learning_rate": 0.00017602195579950888, "loss": 3.5658, "step": 931 }, { "epoch": 0.13, "learning_rate": 0.00017599306658962877, "loss": 3.4999, "step": 932 }, { "epoch": 0.13, "learning_rate": 0.00017596417737974866, "loss": 3.5007, "step": 933 }, { "epoch": 0.13, "learning_rate": 0.00017593528816986858, "loss": 3.5981, "step": 934 }, { "epoch": 0.13, "learning_rate": 0.00017590639895998845, "loss": 3.4566, "step": 935 }, { "epoch": 0.13, "learning_rate": 0.00017587750975010835, "loss": 3.6219, "step": 936 }, { "epoch": 0.13, "learning_rate": 0.00017584862054022824, "loss": 3.4128, "step": 937 }, { "epoch": 0.13, "learning_rate": 0.00017581973133034813, "loss": 3.3593, "step": 938 }, { "epoch": 0.13, "learning_rate": 0.00017579084212046803, "loss": 3.4622, "step": 939 }, { "epoch": 0.13, "learning_rate": 0.0001757619529105879, "loss": 3.6345, "step": 940 }, { "epoch": 0.13, "learning_rate": 0.0001757330637007078, "loss": 3.5757, "step": 941 }, { "epoch": 0.13, "learning_rate": 0.00017570417449082768, "loss": 3.4232, "step": 942 }, { "epoch": 0.13, "learning_rate": 0.00017567528528094757, "loss": 3.2727, "step": 943 }, { "epoch": 0.13, "learning_rate": 0.00017564639607106744, "loss": 3.5383, "step": 944 }, { "epoch": 0.13, "learning_rate": 0.00017561750686118736, "loss": 3.4219, "step": 945 }, { "epoch": 0.13, "learning_rate": 0.00017558861765130726, "loss": 3.3755, "step": 946 }, { "epoch": 0.13, "learning_rate": 0.00017555972844142715, "loss": 3.4804, "step": 947 }, { "epoch": 0.13, "learning_rate": 0.00017553083923154702, "loss": 3.5328, "step": 948 }, { "epoch": 0.14, "learning_rate": 0.0001755019500216669, "loss": 3.4785, "step": 949 }, { "epoch": 0.14, "learning_rate": 0.0001754730608117868, "loss": 3.4262, "step": 950 }, { "epoch": 0.14, "learning_rate": 0.0001754441716019067, "loss": 3.5284, "step": 951 }, { "epoch": 0.14, "learning_rate": 0.0001754152823920266, "loss": 3.5174, "step": 952 }, { "epoch": 0.14, "learning_rate": 0.00017538639318214646, "loss": 3.5466, "step": 953 }, { "epoch": 0.14, "learning_rate": 0.00017535750397226635, "loss": 3.5061, "step": 954 }, { "epoch": 0.14, "learning_rate": 0.00017532861476238627, "loss": 3.3919, "step": 955 }, { "epoch": 0.14, "learning_rate": 0.00017529972555250617, "loss": 3.4249, "step": 956 }, { "epoch": 0.14, "learning_rate": 0.00017527083634262603, "loss": 3.5617, "step": 957 }, { "epoch": 0.14, "learning_rate": 0.00017524194713274593, "loss": 3.5016, "step": 958 }, { "epoch": 0.14, "learning_rate": 0.00017521305792286582, "loss": 3.4626, "step": 959 }, { "epoch": 0.14, "learning_rate": 0.00017518416871298572, "loss": 3.3283, "step": 960 }, { "epoch": 0.14, "learning_rate": 0.00017515527950310558, "loss": 3.5361, "step": 961 }, { "epoch": 0.14, "learning_rate": 0.00017512639029322548, "loss": 3.5714, "step": 962 }, { "epoch": 0.14, "learning_rate": 0.00017509750108334537, "loss": 3.6037, "step": 963 }, { "epoch": 0.14, "learning_rate": 0.00017506861187346526, "loss": 3.4375, "step": 964 }, { "epoch": 0.14, "learning_rate": 0.00017503972266358516, "loss": 3.4013, "step": 965 }, { "epoch": 0.14, "learning_rate": 0.00017501083345370505, "loss": 3.3052, "step": 966 }, { "epoch": 0.14, "learning_rate": 0.00017498194424382495, "loss": 3.5285, "step": 967 }, { "epoch": 0.14, "learning_rate": 0.00017495305503394484, "loss": 3.477, "step": 968 }, { "epoch": 0.14, "learning_rate": 0.00017492416582406473, "loss": 3.4421, "step": 969 }, { "epoch": 0.14, "learning_rate": 0.0001748952766141846, "loss": 3.5492, "step": 970 }, { "epoch": 0.14, "learning_rate": 0.0001748663874043045, "loss": 3.5483, "step": 971 }, { "epoch": 0.14, "learning_rate": 0.0001748374981944244, "loss": 3.4165, "step": 972 }, { "epoch": 0.14, "learning_rate": 0.00017480860898454428, "loss": 3.5144, "step": 973 }, { "epoch": 0.14, "learning_rate": 0.00017477971977466418, "loss": 3.6121, "step": 974 }, { "epoch": 0.14, "learning_rate": 0.00017475083056478404, "loss": 3.6716, "step": 975 }, { "epoch": 0.14, "learning_rate": 0.00017472194135490396, "loss": 3.5422, "step": 976 }, { "epoch": 0.14, "learning_rate": 0.00017469305214502386, "loss": 3.5777, "step": 977 }, { "epoch": 0.14, "learning_rate": 0.00017466416293514375, "loss": 3.406, "step": 978 }, { "epoch": 0.14, "learning_rate": 0.00017463527372526362, "loss": 3.5384, "step": 979 }, { "epoch": 0.14, "learning_rate": 0.0001746063845153835, "loss": 3.4563, "step": 980 }, { "epoch": 0.14, "learning_rate": 0.0001745774953055034, "loss": 3.481, "step": 981 }, { "epoch": 0.14, "learning_rate": 0.0001745486060956233, "loss": 3.3857, "step": 982 }, { "epoch": 0.14, "learning_rate": 0.00017451971688574317, "loss": 3.43, "step": 983 }, { "epoch": 0.14, "learning_rate": 0.00017449082767586306, "loss": 3.4773, "step": 984 }, { "epoch": 0.14, "learning_rate": 0.00017446193846598295, "loss": 3.5036, "step": 985 }, { "epoch": 0.14, "learning_rate": 0.00017443304925610285, "loss": 3.4525, "step": 986 }, { "epoch": 0.14, "learning_rate": 0.00017440416004622274, "loss": 3.3283, "step": 987 }, { "epoch": 0.14, "learning_rate": 0.00017437527083634263, "loss": 3.5363, "step": 988 }, { "epoch": 0.14, "learning_rate": 0.00017434638162646253, "loss": 3.5391, "step": 989 }, { "epoch": 0.14, "learning_rate": 0.00017431749241658242, "loss": 3.582, "step": 990 }, { "epoch": 0.14, "learning_rate": 0.00017428860320670232, "loss": 3.6314, "step": 991 }, { "epoch": 0.14, "learning_rate": 0.00017425971399682218, "loss": 3.53, "step": 992 }, { "epoch": 0.14, "learning_rate": 0.00017423082478694208, "loss": 3.3956, "step": 993 }, { "epoch": 0.14, "learning_rate": 0.00017420193557706197, "loss": 3.4508, "step": 994 }, { "epoch": 0.14, "learning_rate": 0.00017417304636718186, "loss": 3.512, "step": 995 }, { "epoch": 0.14, "learning_rate": 0.00017414415715730176, "loss": 3.3145, "step": 996 }, { "epoch": 0.14, "learning_rate": 0.00017411526794742165, "loss": 3.5319, "step": 997 }, { "epoch": 0.14, "learning_rate": 0.00017408637873754155, "loss": 3.4918, "step": 998 }, { "epoch": 0.14, "learning_rate": 0.00017405748952766144, "loss": 3.4096, "step": 999 }, { "epoch": 0.14, "learning_rate": 0.00017402860031778133, "loss": 3.6008, "step": 1000 }, { "epoch": 0.14, "learning_rate": 0.0001739997111079012, "loss": 3.2475, "step": 1001 }, { "epoch": 0.14, "learning_rate": 0.0001739708218980211, "loss": 3.6255, "step": 1002 }, { "epoch": 0.14, "learning_rate": 0.000173941932688141, "loss": 3.491, "step": 1003 }, { "epoch": 0.14, "learning_rate": 0.00017391304347826088, "loss": 3.3948, "step": 1004 }, { "epoch": 0.14, "learning_rate": 0.00017388415426838075, "loss": 3.5865, "step": 1005 }, { "epoch": 0.14, "learning_rate": 0.00017385526505850064, "loss": 3.5071, "step": 1006 }, { "epoch": 0.14, "learning_rate": 0.00017382637584862056, "loss": 3.546, "step": 1007 }, { "epoch": 0.14, "learning_rate": 0.00017379748663874046, "loss": 3.5369, "step": 1008 }, { "epoch": 0.14, "learning_rate": 0.00017376859742886032, "loss": 3.4211, "step": 1009 }, { "epoch": 0.14, "learning_rate": 0.00017373970821898022, "loss": 3.4982, "step": 1010 }, { "epoch": 0.14, "learning_rate": 0.0001737108190091001, "loss": 3.4012, "step": 1011 }, { "epoch": 0.14, "learning_rate": 0.00017368192979922, "loss": 3.4594, "step": 1012 }, { "epoch": 0.14, "learning_rate": 0.0001736530405893399, "loss": 3.5041, "step": 1013 }, { "epoch": 0.14, "learning_rate": 0.00017362415137945977, "loss": 3.497, "step": 1014 }, { "epoch": 0.14, "learning_rate": 0.00017359526216957966, "loss": 3.3829, "step": 1015 }, { "epoch": 0.14, "learning_rate": 0.00017356637295969955, "loss": 3.4956, "step": 1016 }, { "epoch": 0.14, "learning_rate": 0.00017353748374981945, "loss": 3.6394, "step": 1017 }, { "epoch": 0.14, "learning_rate": 0.00017350859453993934, "loss": 3.629, "step": 1018 }, { "epoch": 0.15, "learning_rate": 0.00017347970533005923, "loss": 3.5818, "step": 1019 }, { "epoch": 0.15, "learning_rate": 0.00017345081612017913, "loss": 3.4767, "step": 1020 }, { "epoch": 0.15, "learning_rate": 0.00017342192691029902, "loss": 3.6218, "step": 1021 }, { "epoch": 0.15, "learning_rate": 0.00017339303770041892, "loss": 3.555, "step": 1022 }, { "epoch": 0.15, "learning_rate": 0.00017336414849053878, "loss": 3.697, "step": 1023 }, { "epoch": 0.15, "learning_rate": 0.00017333525928065868, "loss": 3.5714, "step": 1024 }, { "epoch": 0.15, "learning_rate": 0.00017330637007077857, "loss": 3.4656, "step": 1025 }, { "epoch": 0.15, "learning_rate": 0.00017327748086089846, "loss": 3.488, "step": 1026 }, { "epoch": 0.15, "learning_rate": 0.00017324859165101833, "loss": 3.5766, "step": 1027 }, { "epoch": 0.15, "learning_rate": 0.00017321970244113825, "loss": 3.6, "step": 1028 }, { "epoch": 0.15, "learning_rate": 0.00017319081323125815, "loss": 3.662, "step": 1029 }, { "epoch": 0.15, "learning_rate": 0.00017316192402137804, "loss": 3.4725, "step": 1030 }, { "epoch": 0.15, "learning_rate": 0.0001731330348114979, "loss": 3.4792, "step": 1031 }, { "epoch": 0.15, "learning_rate": 0.0001731041456016178, "loss": 3.143, "step": 1032 }, { "epoch": 0.15, "learning_rate": 0.0001730752563917377, "loss": 3.5497, "step": 1033 }, { "epoch": 0.15, "learning_rate": 0.0001730463671818576, "loss": 3.362, "step": 1034 }, { "epoch": 0.15, "learning_rate": 0.00017301747797197748, "loss": 3.5541, "step": 1035 }, { "epoch": 0.15, "learning_rate": 0.00017298858876209735, "loss": 3.4184, "step": 1036 }, { "epoch": 0.15, "learning_rate": 0.00017295969955221724, "loss": 3.4563, "step": 1037 }, { "epoch": 0.15, "learning_rate": 0.00017293081034233714, "loss": 3.4454, "step": 1038 }, { "epoch": 0.15, "learning_rate": 0.00017290192113245706, "loss": 3.497, "step": 1039 }, { "epoch": 0.15, "learning_rate": 0.00017287303192257692, "loss": 3.5273, "step": 1040 }, { "epoch": 0.15, "learning_rate": 0.00017284414271269682, "loss": 3.3877, "step": 1041 }, { "epoch": 0.15, "learning_rate": 0.0001728152535028167, "loss": 3.5384, "step": 1042 }, { "epoch": 0.15, "learning_rate": 0.0001727863642929366, "loss": 3.5564, "step": 1043 }, { "epoch": 0.15, "learning_rate": 0.0001727574750830565, "loss": 3.5066, "step": 1044 }, { "epoch": 0.15, "learning_rate": 0.00017272858587317637, "loss": 3.3032, "step": 1045 }, { "epoch": 0.15, "learning_rate": 0.00017269969666329626, "loss": 3.4389, "step": 1046 }, { "epoch": 0.15, "learning_rate": 0.00017267080745341615, "loss": 3.5787, "step": 1047 }, { "epoch": 0.15, "learning_rate": 0.00017264191824353605, "loss": 3.442, "step": 1048 }, { "epoch": 0.15, "learning_rate": 0.00017261302903365594, "loss": 3.5452, "step": 1049 }, { "epoch": 0.15, "learning_rate": 0.00017258413982377583, "loss": 3.3751, "step": 1050 }, { "epoch": 0.15, "learning_rate": 0.00017255525061389573, "loss": 3.316, "step": 1051 }, { "epoch": 0.15, "learning_rate": 0.00017252636140401562, "loss": 3.5407, "step": 1052 }, { "epoch": 0.15, "learning_rate": 0.0001724974721941355, "loss": 3.3702, "step": 1053 }, { "epoch": 0.15, "learning_rate": 0.00017246858298425538, "loss": 3.4369, "step": 1054 }, { "epoch": 0.15, "learning_rate": 0.00017243969377437528, "loss": 3.438, "step": 1055 }, { "epoch": 0.15, "learning_rate": 0.00017241080456449517, "loss": 3.4168, "step": 1056 }, { "epoch": 0.15, "eval_loss": 3.6160545349121094, "eval_runtime": 472.1217, "eval_samples_per_second": 43.393, "eval_steps_per_second": 14.464, "step": 1056 }, { "epoch": 0.15, "learning_rate": 0.00017238191535461506, "loss": 3.4049, "step": 1057 }, { "epoch": 0.15, "learning_rate": 0.00017235302614473493, "loss": 3.5561, "step": 1058 }, { "epoch": 0.15, "learning_rate": 0.00017232413693485482, "loss": 3.5493, "step": 1059 }, { "epoch": 0.15, "learning_rate": 0.00017229524772497475, "loss": 3.5245, "step": 1060 }, { "epoch": 0.15, "learning_rate": 0.00017226635851509464, "loss": 3.565, "step": 1061 }, { "epoch": 0.15, "learning_rate": 0.0001722374693052145, "loss": 3.5078, "step": 1062 }, { "epoch": 0.15, "learning_rate": 0.0001722085800953344, "loss": 3.5372, "step": 1063 }, { "epoch": 0.15, "learning_rate": 0.0001721796908854543, "loss": 3.4221, "step": 1064 }, { "epoch": 0.15, "learning_rate": 0.0001721508016755742, "loss": 3.5337, "step": 1065 }, { "epoch": 0.15, "learning_rate": 0.00017212191246569408, "loss": 3.3991, "step": 1066 }, { "epoch": 0.15, "learning_rate": 0.00017209302325581395, "loss": 3.5483, "step": 1067 }, { "epoch": 0.15, "learning_rate": 0.00017206413404593384, "loss": 3.4667, "step": 1068 }, { "epoch": 0.15, "learning_rate": 0.00017203524483605374, "loss": 3.612, "step": 1069 }, { "epoch": 0.15, "learning_rate": 0.00017200635562617363, "loss": 3.485, "step": 1070 }, { "epoch": 0.15, "learning_rate": 0.00017197746641629352, "loss": 3.516, "step": 1071 }, { "epoch": 0.15, "learning_rate": 0.00017194857720641342, "loss": 3.5625, "step": 1072 }, { "epoch": 0.15, "learning_rate": 0.0001719196879965333, "loss": 3.4549, "step": 1073 }, { "epoch": 0.15, "learning_rate": 0.0001718907987866532, "loss": 3.6844, "step": 1074 }, { "epoch": 0.15, "learning_rate": 0.00017186190957677307, "loss": 3.5497, "step": 1075 }, { "epoch": 0.15, "learning_rate": 0.00017183302036689297, "loss": 3.3467, "step": 1076 }, { "epoch": 0.15, "learning_rate": 0.00017180413115701286, "loss": 3.5433, "step": 1077 }, { "epoch": 0.15, "learning_rate": 0.00017177524194713275, "loss": 3.42, "step": 1078 }, { "epoch": 0.15, "learning_rate": 0.00017174635273725265, "loss": 3.4682, "step": 1079 }, { "epoch": 0.15, "learning_rate": 0.00017171746352737251, "loss": 3.4201, "step": 1080 }, { "epoch": 0.15, "learning_rate": 0.00017168857431749243, "loss": 3.5717, "step": 1081 }, { "epoch": 0.15, "learning_rate": 0.00017165968510761233, "loss": 3.453, "step": 1082 }, { "epoch": 0.15, "learning_rate": 0.00017163079589773222, "loss": 3.6388, "step": 1083 }, { "epoch": 0.15, "learning_rate": 0.0001716019066878521, "loss": 3.5073, "step": 1084 }, { "epoch": 0.15, "learning_rate": 0.00017157301747797198, "loss": 3.529, "step": 1085 }, { "epoch": 0.15, "learning_rate": 0.00017154412826809188, "loss": 3.5095, "step": 1086 }, { "epoch": 0.15, "learning_rate": 0.00017151523905821177, "loss": 3.4548, "step": 1087 }, { "epoch": 0.15, "learning_rate": 0.00017148634984833164, "loss": 3.3342, "step": 1088 }, { "epoch": 0.16, "learning_rate": 0.00017145746063845153, "loss": 3.3946, "step": 1089 }, { "epoch": 0.16, "learning_rate": 0.00017142857142857143, "loss": 3.5994, "step": 1090 }, { "epoch": 0.16, "learning_rate": 0.00017139968221869135, "loss": 3.5959, "step": 1091 }, { "epoch": 0.16, "learning_rate": 0.0001713707930088112, "loss": 3.507, "step": 1092 }, { "epoch": 0.16, "learning_rate": 0.0001713419037989311, "loss": 3.6017, "step": 1093 }, { "epoch": 0.16, "learning_rate": 0.000171313014589051, "loss": 3.3432, "step": 1094 }, { "epoch": 0.16, "learning_rate": 0.0001712841253791709, "loss": 3.4669, "step": 1095 }, { "epoch": 0.16, "learning_rate": 0.0001712552361692908, "loss": 3.4304, "step": 1096 }, { "epoch": 0.16, "learning_rate": 0.00017122634695941065, "loss": 3.5036, "step": 1097 }, { "epoch": 0.16, "learning_rate": 0.00017119745774953055, "loss": 3.3196, "step": 1098 }, { "epoch": 0.16, "learning_rate": 0.00017116856853965044, "loss": 3.4165, "step": 1099 }, { "epoch": 0.16, "learning_rate": 0.00017113967932977034, "loss": 3.3619, "step": 1100 }, { "epoch": 0.16, "learning_rate": 0.00017111079011989023, "loss": 3.546, "step": 1101 }, { "epoch": 0.16, "learning_rate": 0.00017108190091001012, "loss": 3.3974, "step": 1102 }, { "epoch": 0.16, "learning_rate": 0.00017105301170013002, "loss": 3.4734, "step": 1103 }, { "epoch": 0.16, "learning_rate": 0.0001710241224902499, "loss": 3.4852, "step": 1104 }, { "epoch": 0.16, "learning_rate": 0.0001709952332803698, "loss": 3.4784, "step": 1105 }, { "epoch": 0.16, "learning_rate": 0.00017096634407048967, "loss": 3.5011, "step": 1106 }, { "epoch": 0.16, "learning_rate": 0.00017093745486060957, "loss": 3.6002, "step": 1107 }, { "epoch": 0.16, "learning_rate": 0.00017090856565072946, "loss": 3.4947, "step": 1108 }, { "epoch": 0.16, "learning_rate": 0.00017087967644084935, "loss": 3.5663, "step": 1109 }, { "epoch": 0.16, "learning_rate": 0.00017085078723096922, "loss": 3.411, "step": 1110 }, { "epoch": 0.16, "learning_rate": 0.00017082189802108911, "loss": 3.5298, "step": 1111 }, { "epoch": 0.16, "learning_rate": 0.00017079300881120904, "loss": 3.4539, "step": 1112 }, { "epoch": 0.16, "learning_rate": 0.00017076411960132893, "loss": 3.3371, "step": 1113 }, { "epoch": 0.16, "learning_rate": 0.0001707352303914488, "loss": 3.4276, "step": 1114 }, { "epoch": 0.16, "learning_rate": 0.0001707063411815687, "loss": 3.5796, "step": 1115 }, { "epoch": 0.16, "learning_rate": 0.00017067745197168858, "loss": 3.4728, "step": 1116 }, { "epoch": 0.16, "learning_rate": 0.00017064856276180848, "loss": 3.4893, "step": 1117 }, { "epoch": 0.16, "learning_rate": 0.00017061967355192837, "loss": 3.575, "step": 1118 }, { "epoch": 0.16, "learning_rate": 0.00017059078434204824, "loss": 3.5082, "step": 1119 }, { "epoch": 0.16, "learning_rate": 0.00017056189513216813, "loss": 3.3607, "step": 1120 }, { "epoch": 0.16, "learning_rate": 0.00017053300592228803, "loss": 3.49, "step": 1121 }, { "epoch": 0.16, "learning_rate": 0.00017050411671240792, "loss": 3.4581, "step": 1122 }, { "epoch": 0.16, "learning_rate": 0.0001704752275025278, "loss": 3.5894, "step": 1123 }, { "epoch": 0.16, "learning_rate": 0.0001704463382926477, "loss": 3.3818, "step": 1124 }, { "epoch": 0.16, "learning_rate": 0.0001704174490827676, "loss": 3.7091, "step": 1125 }, { "epoch": 0.16, "learning_rate": 0.0001703885598728875, "loss": 3.5166, "step": 1126 }, { "epoch": 0.16, "learning_rate": 0.0001703596706630074, "loss": 3.547, "step": 1127 }, { "epoch": 0.16, "learning_rate": 0.00017033078145312726, "loss": 3.392, "step": 1128 }, { "epoch": 0.16, "learning_rate": 0.00017030189224324715, "loss": 3.6263, "step": 1129 }, { "epoch": 0.16, "learning_rate": 0.00017027300303336704, "loss": 3.5654, "step": 1130 }, { "epoch": 0.16, "learning_rate": 0.00017024411382348694, "loss": 3.547, "step": 1131 }, { "epoch": 0.16, "learning_rate": 0.0001702152246136068, "loss": 3.5592, "step": 1132 }, { "epoch": 0.16, "learning_rate": 0.00017018633540372672, "loss": 3.4518, "step": 1133 }, { "epoch": 0.16, "learning_rate": 0.00017015744619384662, "loss": 3.55, "step": 1134 }, { "epoch": 0.16, "learning_rate": 0.0001701285569839665, "loss": 3.3945, "step": 1135 }, { "epoch": 0.16, "learning_rate": 0.00017009966777408638, "loss": 3.4937, "step": 1136 }, { "epoch": 0.16, "learning_rate": 0.00017007077856420627, "loss": 3.5241, "step": 1137 }, { "epoch": 0.16, "learning_rate": 0.00017004188935432617, "loss": 3.4706, "step": 1138 }, { "epoch": 0.16, "learning_rate": 0.00017001300014444606, "loss": 3.4276, "step": 1139 }, { "epoch": 0.16, "learning_rate": 0.00016998411093456595, "loss": 3.569, "step": 1140 }, { "epoch": 0.16, "learning_rate": 0.00016995522172468582, "loss": 3.5449, "step": 1141 }, { "epoch": 0.16, "learning_rate": 0.00016992633251480571, "loss": 3.3942, "step": 1142 }, { "epoch": 0.16, "learning_rate": 0.0001698974433049256, "loss": 3.5298, "step": 1143 }, { "epoch": 0.16, "learning_rate": 0.00016986855409504553, "loss": 3.5164, "step": 1144 }, { "epoch": 0.16, "learning_rate": 0.0001698396648851654, "loss": 3.5107, "step": 1145 }, { "epoch": 0.16, "learning_rate": 0.0001698107756752853, "loss": 3.5288, "step": 1146 }, { "epoch": 0.16, "learning_rate": 0.00016978188646540518, "loss": 3.2389, "step": 1147 }, { "epoch": 0.16, "learning_rate": 0.00016975299725552508, "loss": 3.3991, "step": 1148 }, { "epoch": 0.16, "learning_rate": 0.00016972410804564497, "loss": 3.3714, "step": 1149 }, { "epoch": 0.16, "learning_rate": 0.00016969521883576484, "loss": 3.5075, "step": 1150 }, { "epoch": 0.16, "learning_rate": 0.00016966632962588473, "loss": 3.4373, "step": 1151 }, { "epoch": 0.16, "learning_rate": 0.00016963744041600463, "loss": 3.5701, "step": 1152 }, { "epoch": 0.16, "learning_rate": 0.00016960855120612452, "loss": 3.4354, "step": 1153 }, { "epoch": 0.16, "learning_rate": 0.0001695796619962444, "loss": 3.3822, "step": 1154 }, { "epoch": 0.16, "learning_rate": 0.0001695507727863643, "loss": 3.3253, "step": 1155 }, { "epoch": 0.16, "learning_rate": 0.0001695218835764842, "loss": 3.5329, "step": 1156 }, { "epoch": 0.16, "learning_rate": 0.0001694929943666041, "loss": 3.5455, "step": 1157 }, { "epoch": 0.16, "learning_rate": 0.00016946410515672396, "loss": 3.4045, "step": 1158 }, { "epoch": 0.17, "learning_rate": 0.00016943521594684386, "loss": 3.5935, "step": 1159 }, { "epoch": 0.17, "learning_rate": 0.00016940632673696375, "loss": 3.4058, "step": 1160 }, { "epoch": 0.17, "learning_rate": 0.00016937743752708364, "loss": 3.5842, "step": 1161 }, { "epoch": 0.17, "learning_rate": 0.00016934854831720354, "loss": 3.429, "step": 1162 }, { "epoch": 0.17, "learning_rate": 0.0001693196591073234, "loss": 3.4422, "step": 1163 }, { "epoch": 0.17, "learning_rate": 0.0001692907698974433, "loss": 3.5908, "step": 1164 }, { "epoch": 0.17, "learning_rate": 0.00016926188068756322, "loss": 3.6408, "step": 1165 }, { "epoch": 0.17, "learning_rate": 0.0001692329914776831, "loss": 3.4388, "step": 1166 }, { "epoch": 0.17, "learning_rate": 0.00016920410226780298, "loss": 3.5309, "step": 1167 }, { "epoch": 0.17, "learning_rate": 0.00016917521305792287, "loss": 3.3306, "step": 1168 }, { "epoch": 0.17, "learning_rate": 0.00016914632384804277, "loss": 3.5048, "step": 1169 }, { "epoch": 0.17, "learning_rate": 0.00016911743463816266, "loss": 3.6547, "step": 1170 }, { "epoch": 0.17, "learning_rate": 0.00016908854542828255, "loss": 3.5393, "step": 1171 }, { "epoch": 0.17, "learning_rate": 0.00016905965621840242, "loss": 3.4526, "step": 1172 }, { "epoch": 0.17, "learning_rate": 0.00016903076700852231, "loss": 3.5734, "step": 1173 }, { "epoch": 0.17, "learning_rate": 0.0001690018777986422, "loss": 3.4974, "step": 1174 }, { "epoch": 0.17, "learning_rate": 0.0001689729885887621, "loss": 3.5531, "step": 1175 }, { "epoch": 0.17, "learning_rate": 0.000168944099378882, "loss": 3.6911, "step": 1176 }, { "epoch": 0.17, "learning_rate": 0.0001689152101690019, "loss": 3.5546, "step": 1177 }, { "epoch": 0.17, "learning_rate": 0.00016888632095912178, "loss": 3.5638, "step": 1178 }, { "epoch": 0.17, "learning_rate": 0.00016885743174924168, "loss": 3.5208, "step": 1179 }, { "epoch": 0.17, "learning_rate": 0.00016882854253936154, "loss": 3.5458, "step": 1180 }, { "epoch": 0.17, "learning_rate": 0.00016879965332948144, "loss": 3.4993, "step": 1181 }, { "epoch": 0.17, "learning_rate": 0.00016877076411960133, "loss": 3.5327, "step": 1182 }, { "epoch": 0.17, "learning_rate": 0.00016874187490972123, "loss": 3.6325, "step": 1183 }, { "epoch": 0.17, "learning_rate": 0.00016871298569984112, "loss": 3.5104, "step": 1184 }, { "epoch": 0.17, "learning_rate": 0.00016868409648996099, "loss": 3.3472, "step": 1185 }, { "epoch": 0.17, "learning_rate": 0.0001686552072800809, "loss": 3.2308, "step": 1186 }, { "epoch": 0.17, "learning_rate": 0.0001686263180702008, "loss": 3.5748, "step": 1187 }, { "epoch": 0.17, "learning_rate": 0.0001685974288603207, "loss": 3.5129, "step": 1188 }, { "epoch": 0.17, "learning_rate": 0.00016856853965044056, "loss": 3.2358, "step": 1189 }, { "epoch": 0.17, "learning_rate": 0.00016853965044056046, "loss": 3.6463, "step": 1190 }, { "epoch": 0.17, "learning_rate": 0.00016851076123068035, "loss": 3.4268, "step": 1191 }, { "epoch": 0.17, "learning_rate": 0.00016848187202080024, "loss": 3.5219, "step": 1192 }, { "epoch": 0.17, "learning_rate": 0.00016845298281092014, "loss": 3.5516, "step": 1193 }, { "epoch": 0.17, "learning_rate": 0.00016842409360104, "loss": 3.5834, "step": 1194 }, { "epoch": 0.17, "learning_rate": 0.0001683952043911599, "loss": 3.4691, "step": 1195 }, { "epoch": 0.17, "learning_rate": 0.00016836631518127982, "loss": 3.5435, "step": 1196 }, { "epoch": 0.17, "learning_rate": 0.00016833742597139969, "loss": 3.4956, "step": 1197 }, { "epoch": 0.17, "learning_rate": 0.00016830853676151958, "loss": 3.5069, "step": 1198 }, { "epoch": 0.17, "learning_rate": 0.00016827964755163947, "loss": 3.411, "step": 1199 }, { "epoch": 0.17, "learning_rate": 0.00016825075834175937, "loss": 3.5438, "step": 1200 }, { "epoch": 0.17, "learning_rate": 0.00016822186913187926, "loss": 3.5069, "step": 1201 }, { "epoch": 0.17, "learning_rate": 0.00016819297992199913, "loss": 3.5991, "step": 1202 }, { "epoch": 0.17, "learning_rate": 0.00016816409071211902, "loss": 3.3631, "step": 1203 }, { "epoch": 0.17, "learning_rate": 0.00016813520150223891, "loss": 3.5341, "step": 1204 }, { "epoch": 0.17, "learning_rate": 0.0001681063122923588, "loss": 3.4704, "step": 1205 }, { "epoch": 0.17, "learning_rate": 0.0001680774230824787, "loss": 3.3993, "step": 1206 }, { "epoch": 0.17, "learning_rate": 0.0001680485338725986, "loss": 3.3834, "step": 1207 }, { "epoch": 0.17, "learning_rate": 0.0001680196446627185, "loss": 3.5037, "step": 1208 }, { "epoch": 0.17, "learning_rate": 0.00016799075545283838, "loss": 3.5927, "step": 1209 }, { "epoch": 0.17, "learning_rate": 0.00016796186624295828, "loss": 3.625, "step": 1210 }, { "epoch": 0.17, "learning_rate": 0.00016793297703307814, "loss": 3.5295, "step": 1211 }, { "epoch": 0.17, "learning_rate": 0.00016790408782319804, "loss": 3.3316, "step": 1212 }, { "epoch": 0.17, "learning_rate": 0.00016787519861331793, "loss": 3.5151, "step": 1213 }, { "epoch": 0.17, "learning_rate": 0.00016784630940343783, "loss": 3.4836, "step": 1214 }, { "epoch": 0.17, "learning_rate": 0.00016781742019355772, "loss": 3.5249, "step": 1215 }, { "epoch": 0.17, "learning_rate": 0.00016778853098367759, "loss": 3.3766, "step": 1216 }, { "epoch": 0.17, "learning_rate": 0.0001677596417737975, "loss": 3.5304, "step": 1217 }, { "epoch": 0.17, "learning_rate": 0.0001677307525639174, "loss": 3.4835, "step": 1218 }, { "epoch": 0.17, "learning_rate": 0.00016770186335403727, "loss": 3.7327, "step": 1219 }, { "epoch": 0.17, "learning_rate": 0.00016767297414415716, "loss": 3.4814, "step": 1220 }, { "epoch": 0.17, "learning_rate": 0.00016764408493427706, "loss": 3.4791, "step": 1221 }, { "epoch": 0.17, "learning_rate": 0.00016761519572439695, "loss": 3.5619, "step": 1222 }, { "epoch": 0.17, "learning_rate": 0.00016758630651451684, "loss": 3.5676, "step": 1223 }, { "epoch": 0.17, "learning_rate": 0.0001675574173046367, "loss": 3.3664, "step": 1224 }, { "epoch": 0.17, "learning_rate": 0.0001675285280947566, "loss": 3.3978, "step": 1225 }, { "epoch": 0.17, "learning_rate": 0.0001674996388848765, "loss": 3.5553, "step": 1226 }, { "epoch": 0.17, "learning_rate": 0.00016747074967499642, "loss": 3.383, "step": 1227 }, { "epoch": 0.17, "learning_rate": 0.00016744186046511629, "loss": 3.5232, "step": 1228 }, { "epoch": 0.17, "learning_rate": 0.00016741297125523618, "loss": 3.3742, "step": 1229 }, { "epoch": 0.18, "learning_rate": 0.00016738408204535607, "loss": 3.4612, "step": 1230 }, { "epoch": 0.18, "learning_rate": 0.00016735519283547597, "loss": 3.5645, "step": 1231 }, { "epoch": 0.18, "learning_rate": 0.00016732630362559586, "loss": 3.4287, "step": 1232 }, { "epoch": 0.18, "learning_rate": 0.00016729741441571573, "loss": 3.5151, "step": 1233 }, { "epoch": 0.18, "learning_rate": 0.00016726852520583562, "loss": 3.5459, "step": 1234 }, { "epoch": 0.18, "learning_rate": 0.00016723963599595551, "loss": 3.6089, "step": 1235 }, { "epoch": 0.18, "learning_rate": 0.0001672107467860754, "loss": 3.5577, "step": 1236 }, { "epoch": 0.18, "learning_rate": 0.00016718185757619528, "loss": 3.6152, "step": 1237 }, { "epoch": 0.18, "learning_rate": 0.0001671529683663152, "loss": 3.5544, "step": 1238 }, { "epoch": 0.18, "learning_rate": 0.0001671240791564351, "loss": 3.519, "step": 1239 }, { "epoch": 0.18, "learning_rate": 0.00016709518994655498, "loss": 3.4751, "step": 1240 }, { "epoch": 0.18, "learning_rate": 0.00016706630073667485, "loss": 3.4899, "step": 1241 }, { "epoch": 0.18, "learning_rate": 0.00016703741152679474, "loss": 3.3328, "step": 1242 }, { "epoch": 0.18, "learning_rate": 0.00016700852231691464, "loss": 3.3789, "step": 1243 }, { "epoch": 0.18, "learning_rate": 0.00016697963310703453, "loss": 3.4779, "step": 1244 }, { "epoch": 0.18, "learning_rate": 0.00016695074389715443, "loss": 3.4797, "step": 1245 }, { "epoch": 0.18, "learning_rate": 0.0001669218546872743, "loss": 3.4116, "step": 1246 }, { "epoch": 0.18, "learning_rate": 0.0001668929654773942, "loss": 3.5402, "step": 1247 }, { "epoch": 0.18, "learning_rate": 0.0001668640762675141, "loss": 3.4977, "step": 1248 }, { "epoch": 0.18, "learning_rate": 0.000166835187057634, "loss": 3.5899, "step": 1249 }, { "epoch": 0.18, "learning_rate": 0.00016680629784775387, "loss": 3.4811, "step": 1250 }, { "epoch": 0.18, "learning_rate": 0.00016677740863787376, "loss": 3.5071, "step": 1251 }, { "epoch": 0.18, "learning_rate": 0.00016674851942799366, "loss": 3.5884, "step": 1252 }, { "epoch": 0.18, "learning_rate": 0.00016671963021811355, "loss": 3.5284, "step": 1253 }, { "epoch": 0.18, "learning_rate": 0.00016669074100823344, "loss": 3.3557, "step": 1254 }, { "epoch": 0.18, "learning_rate": 0.0001666618517983533, "loss": 3.3826, "step": 1255 }, { "epoch": 0.18, "learning_rate": 0.0001666329625884732, "loss": 3.5119, "step": 1256 }, { "epoch": 0.18, "learning_rate": 0.0001666040733785931, "loss": 3.4175, "step": 1257 }, { "epoch": 0.18, "learning_rate": 0.000166575184168713, "loss": 3.4374, "step": 1258 }, { "epoch": 0.18, "learning_rate": 0.00016654629495883289, "loss": 3.4862, "step": 1259 }, { "epoch": 0.18, "learning_rate": 0.00016651740574895278, "loss": 3.3808, "step": 1260 }, { "epoch": 0.18, "learning_rate": 0.00016648851653907267, "loss": 3.2596, "step": 1261 }, { "epoch": 0.18, "learning_rate": 0.00016645962732919257, "loss": 3.4759, "step": 1262 }, { "epoch": 0.18, "learning_rate": 0.00016643073811931243, "loss": 3.5809, "step": 1263 }, { "epoch": 0.18, "learning_rate": 0.00016640184890943233, "loss": 3.5571, "step": 1264 }, { "epoch": 0.18, "learning_rate": 0.00016637295969955222, "loss": 3.4506, "step": 1265 }, { "epoch": 0.18, "learning_rate": 0.00016634407048967212, "loss": 3.496, "step": 1266 }, { "epoch": 0.18, "learning_rate": 0.000166315181279792, "loss": 3.5072, "step": 1267 }, { "epoch": 0.18, "learning_rate": 0.00016628629206991188, "loss": 3.4787, "step": 1268 }, { "epoch": 0.18, "learning_rate": 0.0001662574028600318, "loss": 3.4776, "step": 1269 }, { "epoch": 0.18, "learning_rate": 0.0001662285136501517, "loss": 3.4604, "step": 1270 }, { "epoch": 0.18, "learning_rate": 0.00016619962444027158, "loss": 3.4504, "step": 1271 }, { "epoch": 0.18, "learning_rate": 0.00016617073523039145, "loss": 3.4307, "step": 1272 }, { "epoch": 0.18, "learning_rate": 0.00016614184602051134, "loss": 3.4938, "step": 1273 }, { "epoch": 0.18, "learning_rate": 0.00016611295681063124, "loss": 3.3841, "step": 1274 }, { "epoch": 0.18, "learning_rate": 0.00016608406760075113, "loss": 3.0949, "step": 1275 }, { "epoch": 0.18, "learning_rate": 0.00016605517839087103, "loss": 3.5167, "step": 1276 }, { "epoch": 0.18, "learning_rate": 0.0001660262891809909, "loss": 3.3093, "step": 1277 }, { "epoch": 0.18, "learning_rate": 0.0001659973999711108, "loss": 3.3595, "step": 1278 }, { "epoch": 0.18, "learning_rate": 0.00016596851076123068, "loss": 3.4556, "step": 1279 }, { "epoch": 0.18, "learning_rate": 0.0001659396215513506, "loss": 3.564, "step": 1280 }, { "epoch": 0.18, "learning_rate": 0.00016591073234147047, "loss": 3.6636, "step": 1281 }, { "epoch": 0.18, "learning_rate": 0.00016588184313159036, "loss": 3.5133, "step": 1282 }, { "epoch": 0.18, "learning_rate": 0.00016585295392171026, "loss": 3.5039, "step": 1283 }, { "epoch": 0.18, "learning_rate": 0.00016582406471183015, "loss": 3.4521, "step": 1284 }, { "epoch": 0.18, "learning_rate": 0.00016579517550195002, "loss": 3.4918, "step": 1285 }, { "epoch": 0.18, "learning_rate": 0.0001657662862920699, "loss": 3.3308, "step": 1286 }, { "epoch": 0.18, "learning_rate": 0.0001657373970821898, "loss": 3.5266, "step": 1287 }, { "epoch": 0.18, "learning_rate": 0.0001657085078723097, "loss": 3.5503, "step": 1288 }, { "epoch": 0.18, "learning_rate": 0.0001656796186624296, "loss": 3.4942, "step": 1289 }, { "epoch": 0.18, "learning_rate": 0.00016565072945254949, "loss": 3.3854, "step": 1290 }, { "epoch": 0.18, "learning_rate": 0.00016562184024266938, "loss": 3.5463, "step": 1291 }, { "epoch": 0.18, "learning_rate": 0.00016559295103278927, "loss": 3.3519, "step": 1292 }, { "epoch": 0.18, "learning_rate": 0.00016556406182290917, "loss": 3.3959, "step": 1293 }, { "epoch": 0.18, "learning_rate": 0.00016553517261302903, "loss": 3.3282, "step": 1294 }, { "epoch": 0.18, "learning_rate": 0.00016550628340314893, "loss": 3.4241, "step": 1295 }, { "epoch": 0.18, "learning_rate": 0.00016547739419326882, "loss": 3.5177, "step": 1296 }, { "epoch": 0.18, "learning_rate": 0.00016544850498338872, "loss": 3.4661, "step": 1297 }, { "epoch": 0.18, "learning_rate": 0.0001654196157735086, "loss": 3.4302, "step": 1298 }, { "epoch": 0.18, "learning_rate": 0.00016539072656362848, "loss": 3.5691, "step": 1299 }, { "epoch": 0.19, "learning_rate": 0.00016536183735374837, "loss": 3.47, "step": 1300 }, { "epoch": 0.19, "learning_rate": 0.0001653329481438683, "loss": 3.4886, "step": 1301 }, { "epoch": 0.19, "learning_rate": 0.00016530405893398816, "loss": 3.6058, "step": 1302 }, { "epoch": 0.19, "learning_rate": 0.00016527516972410805, "loss": 3.2861, "step": 1303 }, { "epoch": 0.19, "learning_rate": 0.00016524628051422794, "loss": 3.3734, "step": 1304 }, { "epoch": 0.19, "learning_rate": 0.00016521739130434784, "loss": 3.581, "step": 1305 }, { "epoch": 0.19, "learning_rate": 0.00016518850209446773, "loss": 3.4347, "step": 1306 }, { "epoch": 0.19, "learning_rate": 0.0001651596128845876, "loss": 3.4264, "step": 1307 }, { "epoch": 0.19, "learning_rate": 0.0001651307236747075, "loss": 3.5391, "step": 1308 }, { "epoch": 0.19, "learning_rate": 0.0001651018344648274, "loss": 3.3605, "step": 1309 }, { "epoch": 0.19, "learning_rate": 0.00016507294525494728, "loss": 3.4065, "step": 1310 }, { "epoch": 0.19, "learning_rate": 0.00016504405604506717, "loss": 3.4079, "step": 1311 }, { "epoch": 0.19, "learning_rate": 0.00016501516683518707, "loss": 3.5497, "step": 1312 }, { "epoch": 0.19, "learning_rate": 0.00016498627762530696, "loss": 3.4103, "step": 1313 }, { "epoch": 0.19, "learning_rate": 0.00016495738841542686, "loss": 3.4212, "step": 1314 }, { "epoch": 0.19, "learning_rate": 0.00016492849920554675, "loss": 3.5213, "step": 1315 }, { "epoch": 0.19, "learning_rate": 0.00016489960999566662, "loss": 3.3493, "step": 1316 }, { "epoch": 0.19, "learning_rate": 0.0001648707207857865, "loss": 3.5141, "step": 1317 }, { "epoch": 0.19, "learning_rate": 0.0001648418315759064, "loss": 3.3057, "step": 1318 }, { "epoch": 0.19, "learning_rate": 0.0001648129423660263, "loss": 3.5041, "step": 1319 }, { "epoch": 0.19, "learning_rate": 0.0001647840531561462, "loss": 3.5368, "step": 1320 }, { "epoch": 0.19, "learning_rate": 0.00016475516394626606, "loss": 3.422, "step": 1321 }, { "epoch": 0.19, "learning_rate": 0.00016472627473638598, "loss": 3.5449, "step": 1322 }, { "epoch": 0.19, "learning_rate": 0.00016469738552650587, "loss": 3.5309, "step": 1323 }, { "epoch": 0.19, "learning_rate": 0.00016466849631662574, "loss": 3.4928, "step": 1324 }, { "epoch": 0.19, "learning_rate": 0.00016463960710674563, "loss": 3.5239, "step": 1325 }, { "epoch": 0.19, "learning_rate": 0.00016461071789686553, "loss": 3.4141, "step": 1326 }, { "epoch": 0.19, "learning_rate": 0.00016458182868698542, "loss": 3.5027, "step": 1327 }, { "epoch": 0.19, "learning_rate": 0.00016455293947710532, "loss": 3.4508, "step": 1328 }, { "epoch": 0.19, "learning_rate": 0.00016452405026722518, "loss": 3.4924, "step": 1329 }, { "epoch": 0.19, "learning_rate": 0.00016449516105734508, "loss": 3.4917, "step": 1330 }, { "epoch": 0.19, "learning_rate": 0.00016446627184746497, "loss": 3.5257, "step": 1331 }, { "epoch": 0.19, "learning_rate": 0.0001644373826375849, "loss": 3.5661, "step": 1332 }, { "epoch": 0.19, "learning_rate": 0.00016440849342770476, "loss": 3.4663, "step": 1333 }, { "epoch": 0.19, "learning_rate": 0.00016437960421782465, "loss": 3.4531, "step": 1334 }, { "epoch": 0.19, "learning_rate": 0.00016435071500794455, "loss": 3.4828, "step": 1335 }, { "epoch": 0.19, "learning_rate": 0.00016432182579806444, "loss": 3.4431, "step": 1336 }, { "epoch": 0.19, "learning_rate": 0.00016429293658818433, "loss": 3.4394, "step": 1337 }, { "epoch": 0.19, "learning_rate": 0.0001642640473783042, "loss": 3.4915, "step": 1338 }, { "epoch": 0.19, "learning_rate": 0.0001642351581684241, "loss": 3.5472, "step": 1339 }, { "epoch": 0.19, "learning_rate": 0.000164206268958544, "loss": 3.3655, "step": 1340 }, { "epoch": 0.19, "learning_rate": 0.00016417737974866388, "loss": 3.4857, "step": 1341 }, { "epoch": 0.19, "learning_rate": 0.00016414849053878377, "loss": 3.4078, "step": 1342 }, { "epoch": 0.19, "learning_rate": 0.00016411960132890367, "loss": 3.5025, "step": 1343 }, { "epoch": 0.19, "learning_rate": 0.00016409071211902356, "loss": 3.4253, "step": 1344 }, { "epoch": 0.19, "learning_rate": 0.00016406182290914346, "loss": 3.5449, "step": 1345 }, { "epoch": 0.19, "learning_rate": 0.00016403293369926332, "loss": 3.5968, "step": 1346 }, { "epoch": 0.19, "learning_rate": 0.00016400404448938322, "loss": 3.2201, "step": 1347 }, { "epoch": 0.19, "learning_rate": 0.0001639751552795031, "loss": 3.5532, "step": 1348 }, { "epoch": 0.19, "learning_rate": 0.000163946266069623, "loss": 3.51, "step": 1349 }, { "epoch": 0.19, "learning_rate": 0.0001639173768597429, "loss": 3.4733, "step": 1350 }, { "epoch": 0.19, "learning_rate": 0.00016388848764986276, "loss": 3.603, "step": 1351 }, { "epoch": 0.19, "learning_rate": 0.00016385959843998266, "loss": 3.4945, "step": 1352 }, { "epoch": 0.19, "learning_rate": 0.00016383070923010258, "loss": 3.5108, "step": 1353 }, { "epoch": 0.19, "learning_rate": 0.00016380182002022247, "loss": 3.5857, "step": 1354 }, { "epoch": 0.19, "learning_rate": 0.00016377293081034234, "loss": 3.376, "step": 1355 }, { "epoch": 0.19, "learning_rate": 0.00016374404160046223, "loss": 3.5224, "step": 1356 }, { "epoch": 0.19, "learning_rate": 0.00016371515239058213, "loss": 3.4412, "step": 1357 }, { "epoch": 0.19, "learning_rate": 0.00016368626318070202, "loss": 3.4796, "step": 1358 }, { "epoch": 0.19, "learning_rate": 0.00016365737397082192, "loss": 3.5574, "step": 1359 }, { "epoch": 0.19, "learning_rate": 0.00016362848476094178, "loss": 3.3681, "step": 1360 }, { "epoch": 0.19, "learning_rate": 0.00016359959555106168, "loss": 3.462, "step": 1361 }, { "epoch": 0.19, "learning_rate": 0.00016357070634118157, "loss": 3.3938, "step": 1362 }, { "epoch": 0.19, "learning_rate": 0.00016354181713130146, "loss": 3.628, "step": 1363 }, { "epoch": 0.19, "learning_rate": 0.00016351292792142136, "loss": 3.5116, "step": 1364 }, { "epoch": 0.19, "learning_rate": 0.00016348403871154125, "loss": 3.5233, "step": 1365 }, { "epoch": 0.19, "learning_rate": 0.00016345514950166115, "loss": 3.2241, "step": 1366 }, { "epoch": 0.19, "learning_rate": 0.00016342626029178104, "loss": 3.4676, "step": 1367 }, { "epoch": 0.19, "learning_rate": 0.0001633973710819009, "loss": 3.5448, "step": 1368 }, { "epoch": 0.19, "learning_rate": 0.0001633684818720208, "loss": 3.5828, "step": 1369 }, { "epoch": 0.2, "learning_rate": 0.0001633395926621407, "loss": 3.5154, "step": 1370 }, { "epoch": 0.2, "learning_rate": 0.0001633107034522606, "loss": 3.4541, "step": 1371 }, { "epoch": 0.2, "learning_rate": 0.00016328181424238048, "loss": 3.4759, "step": 1372 }, { "epoch": 0.2, "learning_rate": 0.00016325292503250035, "loss": 3.4711, "step": 1373 }, { "epoch": 0.2, "learning_rate": 0.00016322403582262027, "loss": 3.563, "step": 1374 }, { "epoch": 0.2, "learning_rate": 0.00016319514661274016, "loss": 3.3796, "step": 1375 }, { "epoch": 0.2, "learning_rate": 0.00016316625740286006, "loss": 3.4592, "step": 1376 }, { "epoch": 0.2, "learning_rate": 0.00016313736819297992, "loss": 3.4986, "step": 1377 }, { "epoch": 0.2, "learning_rate": 0.00016310847898309982, "loss": 3.4143, "step": 1378 }, { "epoch": 0.2, "learning_rate": 0.0001630795897732197, "loss": 3.6112, "step": 1379 }, { "epoch": 0.2, "learning_rate": 0.0001630507005633396, "loss": 3.3096, "step": 1380 }, { "epoch": 0.2, "learning_rate": 0.0001630218113534595, "loss": 3.4938, "step": 1381 }, { "epoch": 0.2, "learning_rate": 0.00016299292214357937, "loss": 3.4479, "step": 1382 }, { "epoch": 0.2, "learning_rate": 0.00016296403293369926, "loss": 3.4046, "step": 1383 }, { "epoch": 0.2, "learning_rate": 0.00016293514372381915, "loss": 3.4154, "step": 1384 }, { "epoch": 0.2, "learning_rate": 0.00016290625451393907, "loss": 3.5072, "step": 1385 }, { "epoch": 0.2, "learning_rate": 0.00016287736530405894, "loss": 3.4145, "step": 1386 }, { "epoch": 0.2, "learning_rate": 0.00016284847609417883, "loss": 3.4102, "step": 1387 }, { "epoch": 0.2, "learning_rate": 0.00016281958688429873, "loss": 3.5517, "step": 1388 }, { "epoch": 0.2, "learning_rate": 0.00016279069767441862, "loss": 3.4998, "step": 1389 }, { "epoch": 0.2, "learning_rate": 0.0001627618084645385, "loss": 3.5467, "step": 1390 }, { "epoch": 0.2, "learning_rate": 0.00016273291925465838, "loss": 3.4933, "step": 1391 }, { "epoch": 0.2, "learning_rate": 0.00016270403004477828, "loss": 3.5235, "step": 1392 }, { "epoch": 0.2, "learning_rate": 0.00016267514083489817, "loss": 3.5461, "step": 1393 }, { "epoch": 0.2, "learning_rate": 0.00016264625162501806, "loss": 3.3848, "step": 1394 }, { "epoch": 0.2, "learning_rate": 0.00016261736241513796, "loss": 3.3974, "step": 1395 }, { "epoch": 0.2, "learning_rate": 0.00016258847320525785, "loss": 3.37, "step": 1396 }, { "epoch": 0.2, "learning_rate": 0.00016255958399537775, "loss": 3.6363, "step": 1397 }, { "epoch": 0.2, "learning_rate": 0.00016253069478549764, "loss": 3.4066, "step": 1398 }, { "epoch": 0.2, "learning_rate": 0.0001625018055756175, "loss": 3.345, "step": 1399 }, { "epoch": 0.2, "learning_rate": 0.0001624729163657374, "loss": 3.4497, "step": 1400 }, { "epoch": 0.2, "learning_rate": 0.0001624440271558573, "loss": 3.5384, "step": 1401 }, { "epoch": 0.2, "learning_rate": 0.0001624151379459772, "loss": 3.5991, "step": 1402 }, { "epoch": 0.2, "learning_rate": 0.00016238624873609708, "loss": 3.4692, "step": 1403 }, { "epoch": 0.2, "learning_rate": 0.00016235735952621695, "loss": 3.4696, "step": 1404 }, { "epoch": 0.2, "learning_rate": 0.00016232847031633687, "loss": 3.5949, "step": 1405 }, { "epoch": 0.2, "learning_rate": 0.00016229958110645676, "loss": 3.3968, "step": 1406 }, { "epoch": 0.2, "learning_rate": 0.00016227069189657666, "loss": 3.5565, "step": 1407 }, { "epoch": 0.2, "learning_rate": 0.00016224180268669652, "loss": 3.4906, "step": 1408 }, { "epoch": 0.2, "eval_loss": 3.605428695678711, "eval_runtime": 471.784, "eval_samples_per_second": 43.425, "eval_steps_per_second": 14.475, "step": 1408 }, { "epoch": 0.2, "learning_rate": 0.00016221291347681642, "loss": 3.3684, "step": 1409 }, { "epoch": 0.2, "learning_rate": 0.0001621840242669363, "loss": 3.4983, "step": 1410 }, { "epoch": 0.2, "learning_rate": 0.0001621551350570562, "loss": 3.5674, "step": 1411 }, { "epoch": 0.2, "learning_rate": 0.00016212624584717607, "loss": 3.3104, "step": 1412 }, { "epoch": 0.2, "learning_rate": 0.00016209735663729597, "loss": 3.5129, "step": 1413 }, { "epoch": 0.2, "learning_rate": 0.00016206846742741586, "loss": 3.2757, "step": 1414 }, { "epoch": 0.2, "learning_rate": 0.00016203957821753575, "loss": 3.5879, "step": 1415 }, { "epoch": 0.2, "learning_rate": 0.00016201068900765565, "loss": 3.5024, "step": 1416 }, { "epoch": 0.2, "learning_rate": 0.00016198179979777554, "loss": 3.6142, "step": 1417 }, { "epoch": 0.2, "learning_rate": 0.00016195291058789543, "loss": 3.5676, "step": 1418 }, { "epoch": 0.2, "learning_rate": 0.00016192402137801533, "loss": 3.5295, "step": 1419 }, { "epoch": 0.2, "learning_rate": 0.00016189513216813522, "loss": 3.5358, "step": 1420 }, { "epoch": 0.2, "learning_rate": 0.0001618662429582551, "loss": 3.4825, "step": 1421 }, { "epoch": 0.2, "learning_rate": 0.00016183735374837498, "loss": 3.3527, "step": 1422 }, { "epoch": 0.2, "learning_rate": 0.00016180846453849488, "loss": 3.5883, "step": 1423 }, { "epoch": 0.2, "learning_rate": 0.00016177957532861477, "loss": 3.407, "step": 1424 }, { "epoch": 0.2, "learning_rate": 0.00016175068611873466, "loss": 3.5245, "step": 1425 }, { "epoch": 0.2, "learning_rate": 0.00016172179690885456, "loss": 3.4443, "step": 1426 }, { "epoch": 0.2, "learning_rate": 0.00016169290769897445, "loss": 3.3916, "step": 1427 }, { "epoch": 0.2, "learning_rate": 0.00016166401848909435, "loss": 3.3604, "step": 1428 }, { "epoch": 0.2, "learning_rate": 0.0001616351292792142, "loss": 3.475, "step": 1429 }, { "epoch": 0.2, "learning_rate": 0.0001616062400693341, "loss": 3.6084, "step": 1430 }, { "epoch": 0.2, "learning_rate": 0.000161577350859454, "loss": 3.5751, "step": 1431 }, { "epoch": 0.2, "learning_rate": 0.0001615484616495739, "loss": 3.485, "step": 1432 }, { "epoch": 0.2, "learning_rate": 0.0001615195724396938, "loss": 3.5544, "step": 1433 }, { "epoch": 0.2, "learning_rate": 0.00016149068322981365, "loss": 3.5604, "step": 1434 }, { "epoch": 0.2, "learning_rate": 0.00016146179401993355, "loss": 3.5081, "step": 1435 }, { "epoch": 0.2, "learning_rate": 0.00016143290481005344, "loss": 3.4819, "step": 1436 }, { "epoch": 0.2, "learning_rate": 0.00016140401560017336, "loss": 3.4616, "step": 1437 }, { "epoch": 0.2, "learning_rate": 0.00016137512639029323, "loss": 3.5589, "step": 1438 }, { "epoch": 0.2, "learning_rate": 0.00016134623718041312, "loss": 3.4721, "step": 1439 }, { "epoch": 0.21, "learning_rate": 0.00016131734797053302, "loss": 3.5011, "step": 1440 }, { "epoch": 0.21, "learning_rate": 0.0001612884587606529, "loss": 3.5185, "step": 1441 }, { "epoch": 0.21, "learning_rate": 0.0001612595695507728, "loss": 3.5604, "step": 1442 }, { "epoch": 0.21, "learning_rate": 0.00016123068034089267, "loss": 3.577, "step": 1443 }, { "epoch": 0.21, "learning_rate": 0.00016120179113101257, "loss": 3.3364, "step": 1444 }, { "epoch": 0.21, "learning_rate": 0.00016117290192113246, "loss": 3.4346, "step": 1445 }, { "epoch": 0.21, "learning_rate": 0.00016114401271125235, "loss": 3.5262, "step": 1446 }, { "epoch": 0.21, "learning_rate": 0.00016111512350137225, "loss": 3.4903, "step": 1447 }, { "epoch": 0.21, "learning_rate": 0.00016108623429149214, "loss": 3.2734, "step": 1448 }, { "epoch": 0.21, "learning_rate": 0.00016105734508161203, "loss": 3.5319, "step": 1449 }, { "epoch": 0.21, "learning_rate": 0.00016102845587173193, "loss": 3.4755, "step": 1450 }, { "epoch": 0.21, "learning_rate": 0.0001609995666618518, "loss": 3.3927, "step": 1451 }, { "epoch": 0.21, "learning_rate": 0.0001609706774519717, "loss": 3.523, "step": 1452 }, { "epoch": 0.21, "learning_rate": 0.00016094178824209158, "loss": 3.448, "step": 1453 }, { "epoch": 0.21, "learning_rate": 0.00016091289903221148, "loss": 3.5125, "step": 1454 }, { "epoch": 0.21, "learning_rate": 0.00016088400982233137, "loss": 3.3653, "step": 1455 }, { "epoch": 0.21, "learning_rate": 0.00016085512061245124, "loss": 3.5488, "step": 1456 }, { "epoch": 0.21, "learning_rate": 0.00016082623140257113, "loss": 3.3857, "step": 1457 }, { "epoch": 0.21, "learning_rate": 0.00016079734219269105, "loss": 3.4681, "step": 1458 }, { "epoch": 0.21, "learning_rate": 0.00016076845298281095, "loss": 3.5075, "step": 1459 }, { "epoch": 0.21, "learning_rate": 0.0001607395637729308, "loss": 3.4557, "step": 1460 }, { "epoch": 0.21, "learning_rate": 0.0001607106745630507, "loss": 3.5808, "step": 1461 }, { "epoch": 0.21, "learning_rate": 0.0001606817853531706, "loss": 3.6319, "step": 1462 }, { "epoch": 0.21, "learning_rate": 0.0001606528961432905, "loss": 3.3235, "step": 1463 }, { "epoch": 0.21, "learning_rate": 0.0001606240069334104, "loss": 3.4253, "step": 1464 }, { "epoch": 0.21, "learning_rate": 0.00016059511772353025, "loss": 3.4317, "step": 1465 }, { "epoch": 0.21, "learning_rate": 0.00016056622851365015, "loss": 3.3315, "step": 1466 }, { "epoch": 0.21, "learning_rate": 0.00016053733930377004, "loss": 3.389, "step": 1467 }, { "epoch": 0.21, "learning_rate": 0.00016050845009388996, "loss": 3.5869, "step": 1468 }, { "epoch": 0.21, "learning_rate": 0.00016047956088400983, "loss": 3.4756, "step": 1469 }, { "epoch": 0.21, "learning_rate": 0.00016045067167412972, "loss": 3.5431, "step": 1470 }, { "epoch": 0.21, "learning_rate": 0.00016042178246424962, "loss": 3.5496, "step": 1471 }, { "epoch": 0.21, "learning_rate": 0.0001603928932543695, "loss": 3.4815, "step": 1472 }, { "epoch": 0.21, "learning_rate": 0.00016036400404448938, "loss": 3.4222, "step": 1473 }, { "epoch": 0.21, "learning_rate": 0.00016033511483460927, "loss": 3.3973, "step": 1474 }, { "epoch": 0.21, "learning_rate": 0.00016030622562472917, "loss": 3.5076, "step": 1475 }, { "epoch": 0.21, "learning_rate": 0.00016027733641484906, "loss": 3.5157, "step": 1476 }, { "epoch": 0.21, "learning_rate": 0.00016024844720496895, "loss": 3.3839, "step": 1477 }, { "epoch": 0.21, "learning_rate": 0.00016021955799508882, "loss": 3.4792, "step": 1478 }, { "epoch": 0.21, "learning_rate": 0.00016019066878520874, "loss": 3.4945, "step": 1479 }, { "epoch": 0.21, "learning_rate": 0.00016016177957532863, "loss": 3.5246, "step": 1480 }, { "epoch": 0.21, "learning_rate": 0.00016013289036544853, "loss": 3.3951, "step": 1481 }, { "epoch": 0.21, "learning_rate": 0.0001601040011555684, "loss": 3.548, "step": 1482 }, { "epoch": 0.21, "learning_rate": 0.0001600751119456883, "loss": 3.5085, "step": 1483 }, { "epoch": 0.21, "learning_rate": 0.00016004622273580818, "loss": 3.3503, "step": 1484 }, { "epoch": 0.21, "learning_rate": 0.00016001733352592808, "loss": 3.5932, "step": 1485 }, { "epoch": 0.21, "learning_rate": 0.00015998844431604797, "loss": 3.4716, "step": 1486 }, { "epoch": 0.21, "learning_rate": 0.00015995955510616784, "loss": 3.6247, "step": 1487 }, { "epoch": 0.21, "learning_rate": 0.00015993066589628773, "loss": 3.5502, "step": 1488 }, { "epoch": 0.21, "learning_rate": 0.00015990177668640765, "loss": 3.4689, "step": 1489 }, { "epoch": 0.21, "learning_rate": 0.00015987288747652755, "loss": 3.4942, "step": 1490 }, { "epoch": 0.21, "learning_rate": 0.0001598439982666474, "loss": 3.4845, "step": 1491 }, { "epoch": 0.21, "learning_rate": 0.0001598151090567673, "loss": 3.4916, "step": 1492 }, { "epoch": 0.21, "learning_rate": 0.0001597862198468872, "loss": 3.3864, "step": 1493 }, { "epoch": 0.21, "learning_rate": 0.0001597573306370071, "loss": 3.4457, "step": 1494 }, { "epoch": 0.21, "learning_rate": 0.00015972844142712696, "loss": 3.3781, "step": 1495 }, { "epoch": 0.21, "learning_rate": 0.00015969955221724685, "loss": 3.3533, "step": 1496 }, { "epoch": 0.21, "learning_rate": 0.00015967066300736675, "loss": 3.5256, "step": 1497 }, { "epoch": 0.21, "learning_rate": 0.00015964177379748664, "loss": 3.5881, "step": 1498 }, { "epoch": 0.21, "learning_rate": 0.00015961288458760654, "loss": 3.4377, "step": 1499 }, { "epoch": 0.21, "learning_rate": 0.00015958399537772643, "loss": 3.4979, "step": 1500 }, { "epoch": 0.21, "learning_rate": 0.00015955510616784632, "loss": 3.2714, "step": 1501 }, { "epoch": 0.21, "learning_rate": 0.00015952621695796622, "loss": 3.5759, "step": 1502 }, { "epoch": 0.21, "learning_rate": 0.0001594973277480861, "loss": 3.5093, "step": 1503 }, { "epoch": 0.21, "learning_rate": 0.00015946843853820598, "loss": 3.2263, "step": 1504 }, { "epoch": 0.21, "learning_rate": 0.00015943954932832587, "loss": 3.417, "step": 1505 }, { "epoch": 0.21, "learning_rate": 0.00015941066011844577, "loss": 3.3492, "step": 1506 }, { "epoch": 0.21, "learning_rate": 0.00015938177090856566, "loss": 3.4252, "step": 1507 }, { "epoch": 0.21, "learning_rate": 0.00015935288169868555, "loss": 3.3727, "step": 1508 }, { "epoch": 0.21, "learning_rate": 0.00015932399248880542, "loss": 3.5917, "step": 1509 }, { "epoch": 0.21, "learning_rate": 0.00015929510327892534, "loss": 3.4071, "step": 1510 }, { "epoch": 0.22, "learning_rate": 0.00015926621406904523, "loss": 3.4108, "step": 1511 }, { "epoch": 0.22, "learning_rate": 0.00015923732485916513, "loss": 3.5325, "step": 1512 }, { "epoch": 0.22, "learning_rate": 0.000159208435649285, "loss": 3.4885, "step": 1513 }, { "epoch": 0.22, "learning_rate": 0.0001591795464394049, "loss": 3.4643, "step": 1514 }, { "epoch": 0.22, "learning_rate": 0.00015915065722952478, "loss": 3.6002, "step": 1515 }, { "epoch": 0.22, "learning_rate": 0.00015912176801964468, "loss": 3.3506, "step": 1516 }, { "epoch": 0.22, "learning_rate": 0.00015909287880976454, "loss": 3.3864, "step": 1517 }, { "epoch": 0.22, "learning_rate": 0.00015906398959988444, "loss": 3.4929, "step": 1518 }, { "epoch": 0.22, "learning_rate": 0.00015903510039000433, "loss": 3.508, "step": 1519 }, { "epoch": 0.22, "learning_rate": 0.00015900621118012423, "loss": 3.4869, "step": 1520 }, { "epoch": 0.22, "learning_rate": 0.00015897732197024412, "loss": 3.4724, "step": 1521 }, { "epoch": 0.22, "learning_rate": 0.000158948432760364, "loss": 3.4725, "step": 1522 }, { "epoch": 0.22, "learning_rate": 0.0001589195435504839, "loss": 3.4134, "step": 1523 }, { "epoch": 0.22, "learning_rate": 0.0001588906543406038, "loss": 3.3603, "step": 1524 }, { "epoch": 0.22, "learning_rate": 0.0001588617651307237, "loss": 3.4585, "step": 1525 }, { "epoch": 0.22, "learning_rate": 0.00015883287592084356, "loss": 3.5225, "step": 1526 }, { "epoch": 0.22, "learning_rate": 0.00015880398671096345, "loss": 3.4008, "step": 1527 }, { "epoch": 0.22, "learning_rate": 0.00015877509750108335, "loss": 3.5919, "step": 1528 }, { "epoch": 0.22, "learning_rate": 0.00015874620829120324, "loss": 3.5007, "step": 1529 }, { "epoch": 0.22, "learning_rate": 0.00015871731908132314, "loss": 3.538, "step": 1530 }, { "epoch": 0.22, "learning_rate": 0.00015868842987144303, "loss": 3.413, "step": 1531 }, { "epoch": 0.22, "learning_rate": 0.00015865954066156292, "loss": 3.5105, "step": 1532 }, { "epoch": 0.22, "learning_rate": 0.00015863065145168282, "loss": 3.3884, "step": 1533 }, { "epoch": 0.22, "learning_rate": 0.0001586017622418027, "loss": 3.5485, "step": 1534 }, { "epoch": 0.22, "learning_rate": 0.00015857287303192258, "loss": 3.5247, "step": 1535 }, { "epoch": 0.22, "learning_rate": 0.00015854398382204247, "loss": 3.441, "step": 1536 }, { "epoch": 0.22, "learning_rate": 0.00015851509461216237, "loss": 3.351, "step": 1537 }, { "epoch": 0.22, "learning_rate": 0.00015848620540228226, "loss": 3.3742, "step": 1538 }, { "epoch": 0.22, "learning_rate": 0.00015845731619240213, "loss": 3.4401, "step": 1539 }, { "epoch": 0.22, "learning_rate": 0.00015842842698252202, "loss": 3.5104, "step": 1540 }, { "epoch": 0.22, "learning_rate": 0.00015839953777264191, "loss": 3.3604, "step": 1541 }, { "epoch": 0.22, "learning_rate": 0.00015837064856276184, "loss": 3.5065, "step": 1542 }, { "epoch": 0.22, "learning_rate": 0.0001583417593528817, "loss": 3.3764, "step": 1543 }, { "epoch": 0.22, "learning_rate": 0.0001583128701430016, "loss": 3.5065, "step": 1544 }, { "epoch": 0.22, "learning_rate": 0.0001582839809331215, "loss": 3.4282, "step": 1545 }, { "epoch": 0.22, "learning_rate": 0.00015825509172324138, "loss": 3.4819, "step": 1546 }, { "epoch": 0.22, "learning_rate": 0.00015822620251336128, "loss": 3.4324, "step": 1547 }, { "epoch": 0.22, "learning_rate": 0.00015819731330348114, "loss": 3.4524, "step": 1548 }, { "epoch": 0.22, "learning_rate": 0.00015816842409360104, "loss": 3.5542, "step": 1549 }, { "epoch": 0.22, "learning_rate": 0.00015813953488372093, "loss": 3.6157, "step": 1550 }, { "epoch": 0.22, "learning_rate": 0.00015811064567384083, "loss": 3.4819, "step": 1551 }, { "epoch": 0.22, "learning_rate": 0.00015808175646396072, "loss": 3.3803, "step": 1552 }, { "epoch": 0.22, "learning_rate": 0.0001580528672540806, "loss": 3.2802, "step": 1553 }, { "epoch": 0.22, "learning_rate": 0.0001580239780442005, "loss": 3.4394, "step": 1554 }, { "epoch": 0.22, "learning_rate": 0.0001579950888343204, "loss": 3.3469, "step": 1555 }, { "epoch": 0.22, "learning_rate": 0.00015796619962444027, "loss": 3.495, "step": 1556 }, { "epoch": 0.22, "learning_rate": 0.00015793731041456016, "loss": 3.5226, "step": 1557 }, { "epoch": 0.22, "learning_rate": 0.00015790842120468006, "loss": 3.4985, "step": 1558 }, { "epoch": 0.22, "learning_rate": 0.00015787953199479995, "loss": 3.5033, "step": 1559 }, { "epoch": 0.22, "learning_rate": 0.00015785064278491984, "loss": 3.3505, "step": 1560 }, { "epoch": 0.22, "learning_rate": 0.0001578217535750397, "loss": 3.39, "step": 1561 }, { "epoch": 0.22, "learning_rate": 0.0001577928643651596, "loss": 3.5471, "step": 1562 }, { "epoch": 0.22, "learning_rate": 0.00015776397515527952, "loss": 3.4033, "step": 1563 }, { "epoch": 0.22, "learning_rate": 0.00015773508594539942, "loss": 3.5035, "step": 1564 }, { "epoch": 0.22, "learning_rate": 0.00015770619673551928, "loss": 3.5532, "step": 1565 }, { "epoch": 0.22, "learning_rate": 0.00015767730752563918, "loss": 3.3897, "step": 1566 }, { "epoch": 0.22, "learning_rate": 0.00015764841831575907, "loss": 3.5054, "step": 1567 }, { "epoch": 0.22, "learning_rate": 0.00015761952910587897, "loss": 3.3783, "step": 1568 }, { "epoch": 0.22, "learning_rate": 0.00015759063989599886, "loss": 3.588, "step": 1569 }, { "epoch": 0.22, "learning_rate": 0.00015756175068611873, "loss": 3.4535, "step": 1570 }, { "epoch": 0.22, "learning_rate": 0.00015753286147623862, "loss": 3.3799, "step": 1571 }, { "epoch": 0.22, "learning_rate": 0.00015750397226635851, "loss": 3.396, "step": 1572 }, { "epoch": 0.22, "learning_rate": 0.00015747508305647844, "loss": 3.385, "step": 1573 }, { "epoch": 0.22, "learning_rate": 0.0001574461938465983, "loss": 3.1487, "step": 1574 }, { "epoch": 0.22, "learning_rate": 0.0001574173046367182, "loss": 3.3268, "step": 1575 }, { "epoch": 0.22, "learning_rate": 0.0001573884154268381, "loss": 3.4515, "step": 1576 }, { "epoch": 0.22, "learning_rate": 0.00015735952621695798, "loss": 3.5814, "step": 1577 }, { "epoch": 0.22, "learning_rate": 0.00015733063700707785, "loss": 3.4728, "step": 1578 }, { "epoch": 0.22, "learning_rate": 0.00015730174779719774, "loss": 3.5349, "step": 1579 }, { "epoch": 0.22, "learning_rate": 0.00015727285858731764, "loss": 3.5144, "step": 1580 }, { "epoch": 0.23, "learning_rate": 0.00015724396937743753, "loss": 3.4857, "step": 1581 }, { "epoch": 0.23, "learning_rate": 0.00015721508016755743, "loss": 3.5092, "step": 1582 }, { "epoch": 0.23, "learning_rate": 0.0001571861909576773, "loss": 3.3719, "step": 1583 }, { "epoch": 0.23, "learning_rate": 0.0001571573017477972, "loss": 3.4875, "step": 1584 }, { "epoch": 0.23, "learning_rate": 0.0001571284125379171, "loss": 3.1237, "step": 1585 }, { "epoch": 0.23, "learning_rate": 0.000157099523328037, "loss": 3.4557, "step": 1586 }, { "epoch": 0.23, "learning_rate": 0.00015707063411815687, "loss": 3.3924, "step": 1587 }, { "epoch": 0.23, "learning_rate": 0.00015704174490827676, "loss": 3.4948, "step": 1588 }, { "epoch": 0.23, "learning_rate": 0.00015701285569839666, "loss": 3.505, "step": 1589 }, { "epoch": 0.23, "learning_rate": 0.00015698396648851655, "loss": 3.4542, "step": 1590 }, { "epoch": 0.23, "learning_rate": 0.00015695507727863644, "loss": 3.4866, "step": 1591 }, { "epoch": 0.23, "learning_rate": 0.0001569261880687563, "loss": 3.435, "step": 1592 }, { "epoch": 0.23, "learning_rate": 0.0001568972988588762, "loss": 3.4887, "step": 1593 }, { "epoch": 0.23, "learning_rate": 0.00015686840964899612, "loss": 3.3567, "step": 1594 }, { "epoch": 0.23, "learning_rate": 0.00015683952043911602, "loss": 3.4085, "step": 1595 }, { "epoch": 0.23, "learning_rate": 0.00015681063122923588, "loss": 3.4561, "step": 1596 }, { "epoch": 0.23, "learning_rate": 0.00015678174201935578, "loss": 3.5834, "step": 1597 }, { "epoch": 0.23, "learning_rate": 0.00015675285280947567, "loss": 3.3371, "step": 1598 }, { "epoch": 0.23, "learning_rate": 0.00015672396359959557, "loss": 3.374, "step": 1599 }, { "epoch": 0.23, "learning_rate": 0.00015669507438971543, "loss": 3.419, "step": 1600 }, { "epoch": 0.23, "learning_rate": 0.00015666618517983533, "loss": 3.5542, "step": 1601 }, { "epoch": 0.23, "learning_rate": 0.00015663729596995522, "loss": 3.3417, "step": 1602 }, { "epoch": 0.23, "learning_rate": 0.00015660840676007511, "loss": 3.3868, "step": 1603 }, { "epoch": 0.23, "learning_rate": 0.000156579517550195, "loss": 3.4926, "step": 1604 }, { "epoch": 0.23, "learning_rate": 0.0001565506283403149, "loss": 3.3727, "step": 1605 }, { "epoch": 0.23, "learning_rate": 0.0001565217391304348, "loss": 3.4854, "step": 1606 }, { "epoch": 0.23, "learning_rate": 0.0001564928499205547, "loss": 3.4923, "step": 1607 }, { "epoch": 0.23, "learning_rate": 0.00015646396071067458, "loss": 3.4982, "step": 1608 }, { "epoch": 0.23, "learning_rate": 0.00015643507150079445, "loss": 3.5484, "step": 1609 }, { "epoch": 0.23, "learning_rate": 0.00015640618229091434, "loss": 3.4584, "step": 1610 }, { "epoch": 0.23, "learning_rate": 0.00015637729308103424, "loss": 3.4635, "step": 1611 }, { "epoch": 0.23, "learning_rate": 0.00015634840387115413, "loss": 3.336, "step": 1612 }, { "epoch": 0.23, "learning_rate": 0.00015631951466127403, "loss": 3.4563, "step": 1613 }, { "epoch": 0.23, "learning_rate": 0.0001562906254513939, "loss": 3.4882, "step": 1614 }, { "epoch": 0.23, "learning_rate": 0.0001562617362415138, "loss": 3.4069, "step": 1615 }, { "epoch": 0.23, "learning_rate": 0.0001562328470316337, "loss": 3.3776, "step": 1616 }, { "epoch": 0.23, "learning_rate": 0.0001562039578217536, "loss": 3.4601, "step": 1617 }, { "epoch": 0.23, "learning_rate": 0.00015617506861187347, "loss": 3.4655, "step": 1618 }, { "epoch": 0.23, "learning_rate": 0.00015614617940199336, "loss": 3.4411, "step": 1619 }, { "epoch": 0.23, "learning_rate": 0.00015611729019211326, "loss": 3.424, "step": 1620 }, { "epoch": 0.23, "learning_rate": 0.00015608840098223315, "loss": 3.5681, "step": 1621 }, { "epoch": 0.23, "learning_rate": 0.00015605951177235302, "loss": 3.5628, "step": 1622 }, { "epoch": 0.23, "learning_rate": 0.0001560306225624729, "loss": 3.6016, "step": 1623 }, { "epoch": 0.23, "learning_rate": 0.0001560017333525928, "loss": 3.3233, "step": 1624 }, { "epoch": 0.23, "learning_rate": 0.00015597284414271272, "loss": 3.4938, "step": 1625 }, { "epoch": 0.23, "learning_rate": 0.0001559439549328326, "loss": 3.5279, "step": 1626 }, { "epoch": 0.23, "learning_rate": 0.00015591506572295249, "loss": 3.351, "step": 1627 }, { "epoch": 0.23, "learning_rate": 0.00015588617651307238, "loss": 3.4287, "step": 1628 }, { "epoch": 0.23, "learning_rate": 0.00015585728730319227, "loss": 3.5784, "step": 1629 }, { "epoch": 0.23, "learning_rate": 0.00015582839809331217, "loss": 3.5075, "step": 1630 }, { "epoch": 0.23, "learning_rate": 0.00015579950888343203, "loss": 3.2393, "step": 1631 }, { "epoch": 0.23, "learning_rate": 0.00015577061967355193, "loss": 3.44, "step": 1632 }, { "epoch": 0.23, "learning_rate": 0.00015574173046367182, "loss": 3.4562, "step": 1633 }, { "epoch": 0.23, "learning_rate": 0.00015571284125379171, "loss": 3.4367, "step": 1634 }, { "epoch": 0.23, "learning_rate": 0.0001556839520439116, "loss": 3.3648, "step": 1635 }, { "epoch": 0.23, "learning_rate": 0.0001556550628340315, "loss": 3.4151, "step": 1636 }, { "epoch": 0.23, "learning_rate": 0.0001556261736241514, "loss": 3.4663, "step": 1637 }, { "epoch": 0.23, "learning_rate": 0.0001555972844142713, "loss": 3.3799, "step": 1638 }, { "epoch": 0.23, "learning_rate": 0.00015556839520439118, "loss": 3.4718, "step": 1639 }, { "epoch": 0.23, "learning_rate": 0.00015553950599451105, "loss": 3.3207, "step": 1640 }, { "epoch": 0.23, "learning_rate": 0.00015551061678463094, "loss": 3.5074, "step": 1641 }, { "epoch": 0.23, "learning_rate": 0.00015548172757475084, "loss": 3.4117, "step": 1642 }, { "epoch": 0.23, "learning_rate": 0.00015545283836487073, "loss": 3.4783, "step": 1643 }, { "epoch": 0.23, "learning_rate": 0.0001554239491549906, "loss": 3.477, "step": 1644 }, { "epoch": 0.23, "learning_rate": 0.0001553950599451105, "loss": 3.41, "step": 1645 }, { "epoch": 0.23, "learning_rate": 0.0001553661707352304, "loss": 3.4416, "step": 1646 }, { "epoch": 0.23, "learning_rate": 0.0001553372815253503, "loss": 3.5455, "step": 1647 }, { "epoch": 0.23, "learning_rate": 0.00015530839231547017, "loss": 3.3198, "step": 1648 }, { "epoch": 0.23, "learning_rate": 0.00015527950310559007, "loss": 3.5073, "step": 1649 }, { "epoch": 0.23, "learning_rate": 0.00015525061389570996, "loss": 3.3613, "step": 1650 }, { "epoch": 0.24, "learning_rate": 0.00015522172468582986, "loss": 3.4919, "step": 1651 }, { "epoch": 0.24, "learning_rate": 0.00015519283547594975, "loss": 3.371, "step": 1652 }, { "epoch": 0.24, "learning_rate": 0.00015516394626606962, "loss": 3.5187, "step": 1653 }, { "epoch": 0.24, "learning_rate": 0.0001551350570561895, "loss": 3.3989, "step": 1654 }, { "epoch": 0.24, "learning_rate": 0.0001551061678463094, "loss": 3.4935, "step": 1655 }, { "epoch": 0.24, "learning_rate": 0.0001550772786364293, "loss": 3.5323, "step": 1656 }, { "epoch": 0.24, "learning_rate": 0.0001550483894265492, "loss": 3.5203, "step": 1657 }, { "epoch": 0.24, "learning_rate": 0.00015501950021666909, "loss": 3.395, "step": 1658 }, { "epoch": 0.24, "learning_rate": 0.00015499061100678898, "loss": 3.4209, "step": 1659 }, { "epoch": 0.24, "learning_rate": 0.00015496172179690887, "loss": 3.5168, "step": 1660 }, { "epoch": 0.24, "learning_rate": 0.00015493283258702877, "loss": 3.5469, "step": 1661 }, { "epoch": 0.24, "learning_rate": 0.00015490394337714863, "loss": 3.4716, "step": 1662 }, { "epoch": 0.24, "learning_rate": 0.00015487505416726853, "loss": 3.4165, "step": 1663 }, { "epoch": 0.24, "learning_rate": 0.00015484616495738842, "loss": 3.3731, "step": 1664 }, { "epoch": 0.24, "learning_rate": 0.00015481727574750831, "loss": 3.5362, "step": 1665 }, { "epoch": 0.24, "learning_rate": 0.00015478838653762818, "loss": 3.5687, "step": 1666 }, { "epoch": 0.24, "learning_rate": 0.0001547594973277481, "loss": 3.5811, "step": 1667 }, { "epoch": 0.24, "learning_rate": 0.000154730608117868, "loss": 3.4635, "step": 1668 }, { "epoch": 0.24, "learning_rate": 0.0001547017189079879, "loss": 3.3834, "step": 1669 }, { "epoch": 0.24, "learning_rate": 0.00015467282969810776, "loss": 3.4669, "step": 1670 }, { "epoch": 0.24, "learning_rate": 0.00015464394048822765, "loss": 3.4839, "step": 1671 }, { "epoch": 0.24, "learning_rate": 0.00015461505127834754, "loss": 3.4275, "step": 1672 }, { "epoch": 0.24, "learning_rate": 0.00015458616206846744, "loss": 3.5108, "step": 1673 }, { "epoch": 0.24, "learning_rate": 0.00015455727285858733, "loss": 3.5041, "step": 1674 }, { "epoch": 0.24, "learning_rate": 0.0001545283836487072, "loss": 3.4974, "step": 1675 }, { "epoch": 0.24, "learning_rate": 0.0001544994944388271, "loss": 3.3863, "step": 1676 }, { "epoch": 0.24, "learning_rate": 0.000154470605228947, "loss": 3.4526, "step": 1677 }, { "epoch": 0.24, "learning_rate": 0.0001544417160190669, "loss": 3.5786, "step": 1678 }, { "epoch": 0.24, "learning_rate": 0.00015441282680918677, "loss": 3.3061, "step": 1679 }, { "epoch": 0.24, "learning_rate": 0.00015438393759930667, "loss": 3.4446, "step": 1680 }, { "epoch": 0.24, "learning_rate": 0.00015435504838942656, "loss": 3.486, "step": 1681 }, { "epoch": 0.24, "learning_rate": 0.00015432615917954646, "loss": 3.4333, "step": 1682 }, { "epoch": 0.24, "learning_rate": 0.00015429726996966632, "loss": 3.3177, "step": 1683 }, { "epoch": 0.24, "learning_rate": 0.00015426838075978622, "loss": 3.5748, "step": 1684 }, { "epoch": 0.24, "learning_rate": 0.0001542394915499061, "loss": 3.495, "step": 1685 }, { "epoch": 0.24, "learning_rate": 0.000154210602340026, "loss": 3.4927, "step": 1686 }, { "epoch": 0.24, "learning_rate": 0.0001541817131301459, "loss": 3.4502, "step": 1687 }, { "epoch": 0.24, "learning_rate": 0.0001541528239202658, "loss": 3.4559, "step": 1688 }, { "epoch": 0.24, "learning_rate": 0.00015412393471038569, "loss": 3.3388, "step": 1689 }, { "epoch": 0.24, "learning_rate": 0.00015409504550050558, "loss": 3.437, "step": 1690 }, { "epoch": 0.24, "learning_rate": 0.00015406615629062547, "loss": 3.3642, "step": 1691 }, { "epoch": 0.24, "learning_rate": 0.00015403726708074534, "loss": 3.4297, "step": 1692 }, { "epoch": 0.24, "learning_rate": 0.00015400837787086523, "loss": 3.3197, "step": 1693 }, { "epoch": 0.24, "learning_rate": 0.00015397948866098513, "loss": 3.5311, "step": 1694 }, { "epoch": 0.24, "learning_rate": 0.00015395059945110502, "loss": 3.3404, "step": 1695 }, { "epoch": 0.24, "learning_rate": 0.00015392171024122492, "loss": 3.4481, "step": 1696 }, { "epoch": 0.24, "learning_rate": 0.00015389282103134478, "loss": 3.4041, "step": 1697 }, { "epoch": 0.24, "learning_rate": 0.00015386393182146468, "loss": 3.4734, "step": 1698 }, { "epoch": 0.24, "learning_rate": 0.0001538350426115846, "loss": 3.5042, "step": 1699 }, { "epoch": 0.24, "learning_rate": 0.0001538061534017045, "loss": 3.3428, "step": 1700 }, { "epoch": 0.24, "learning_rate": 0.00015377726419182436, "loss": 3.5199, "step": 1701 }, { "epoch": 0.24, "learning_rate": 0.00015374837498194425, "loss": 3.3486, "step": 1702 }, { "epoch": 0.24, "learning_rate": 0.00015371948577206414, "loss": 3.3362, "step": 1703 }, { "epoch": 0.24, "learning_rate": 0.00015369059656218404, "loss": 3.3543, "step": 1704 }, { "epoch": 0.24, "learning_rate": 0.0001536617073523039, "loss": 3.4948, "step": 1705 }, { "epoch": 0.24, "learning_rate": 0.0001536328181424238, "loss": 3.4326, "step": 1706 }, { "epoch": 0.24, "learning_rate": 0.0001536039289325437, "loss": 3.3218, "step": 1707 }, { "epoch": 0.24, "learning_rate": 0.0001535750397226636, "loss": 3.4999, "step": 1708 }, { "epoch": 0.24, "learning_rate": 0.00015354615051278348, "loss": 3.4222, "step": 1709 }, { "epoch": 0.24, "learning_rate": 0.00015351726130290337, "loss": 3.0983, "step": 1710 }, { "epoch": 0.24, "learning_rate": 0.00015348837209302327, "loss": 3.447, "step": 1711 }, { "epoch": 0.24, "learning_rate": 0.00015345948288314316, "loss": 3.4486, "step": 1712 }, { "epoch": 0.24, "learning_rate": 0.00015343059367326306, "loss": 3.398, "step": 1713 }, { "epoch": 0.24, "learning_rate": 0.00015340170446338292, "loss": 3.3366, "step": 1714 }, { "epoch": 0.24, "learning_rate": 0.00015337281525350282, "loss": 3.5535, "step": 1715 }, { "epoch": 0.24, "learning_rate": 0.0001533439260436227, "loss": 3.4162, "step": 1716 }, { "epoch": 0.24, "learning_rate": 0.0001533150368337426, "loss": 3.3429, "step": 1717 }, { "epoch": 0.24, "learning_rate": 0.0001532861476238625, "loss": 3.4877, "step": 1718 }, { "epoch": 0.24, "learning_rate": 0.00015325725841398236, "loss": 3.4085, "step": 1719 }, { "epoch": 0.24, "learning_rate": 0.00015322836920410229, "loss": 3.5294, "step": 1720 }, { "epoch": 0.25, "learning_rate": 0.00015319947999422218, "loss": 3.3905, "step": 1721 }, { "epoch": 0.25, "learning_rate": 0.00015317059078434207, "loss": 3.4958, "step": 1722 }, { "epoch": 0.25, "learning_rate": 0.00015314170157446194, "loss": 3.4739, "step": 1723 }, { "epoch": 0.25, "learning_rate": 0.00015311281236458183, "loss": 3.4774, "step": 1724 }, { "epoch": 0.25, "learning_rate": 0.00015308392315470173, "loss": 3.4016, "step": 1725 }, { "epoch": 0.25, "learning_rate": 0.00015305503394482162, "loss": 3.4329, "step": 1726 }, { "epoch": 0.25, "learning_rate": 0.0001530261447349415, "loss": 3.4621, "step": 1727 }, { "epoch": 0.25, "learning_rate": 0.00015299725552506138, "loss": 3.4899, "step": 1728 }, { "epoch": 0.25, "learning_rate": 0.00015296836631518128, "loss": 3.397, "step": 1729 }, { "epoch": 0.25, "learning_rate": 0.0001529394771053012, "loss": 3.4158, "step": 1730 }, { "epoch": 0.25, "learning_rate": 0.00015291058789542106, "loss": 3.455, "step": 1731 }, { "epoch": 0.25, "learning_rate": 0.00015288169868554096, "loss": 3.5955, "step": 1732 }, { "epoch": 0.25, "learning_rate": 0.00015285280947566085, "loss": 3.4992, "step": 1733 }, { "epoch": 0.25, "learning_rate": 0.00015282392026578074, "loss": 3.1957, "step": 1734 }, { "epoch": 0.25, "learning_rate": 0.00015279503105590064, "loss": 3.4251, "step": 1735 }, { "epoch": 0.25, "learning_rate": 0.0001527661418460205, "loss": 3.3176, "step": 1736 }, { "epoch": 0.25, "learning_rate": 0.0001527372526361404, "loss": 3.5188, "step": 1737 }, { "epoch": 0.25, "learning_rate": 0.0001527083634262603, "loss": 3.4461, "step": 1738 }, { "epoch": 0.25, "learning_rate": 0.0001526794742163802, "loss": 3.3749, "step": 1739 }, { "epoch": 0.25, "learning_rate": 0.00015265058500650008, "loss": 3.4516, "step": 1740 }, { "epoch": 0.25, "learning_rate": 0.00015262169579661997, "loss": 3.5357, "step": 1741 }, { "epoch": 0.25, "learning_rate": 0.00015259280658673987, "loss": 3.4015, "step": 1742 }, { "epoch": 0.25, "learning_rate": 0.00015256391737685976, "loss": 3.5292, "step": 1743 }, { "epoch": 0.25, "learning_rate": 0.00015253502816697966, "loss": 3.4333, "step": 1744 }, { "epoch": 0.25, "learning_rate": 0.00015250613895709952, "loss": 3.3426, "step": 1745 }, { "epoch": 0.25, "learning_rate": 0.00015247724974721942, "loss": 3.4477, "step": 1746 }, { "epoch": 0.25, "learning_rate": 0.0001524483605373393, "loss": 3.4709, "step": 1747 }, { "epoch": 0.25, "learning_rate": 0.0001524194713274592, "loss": 3.4584, "step": 1748 }, { "epoch": 0.25, "learning_rate": 0.00015239058211757907, "loss": 3.448, "step": 1749 }, { "epoch": 0.25, "learning_rate": 0.00015236169290769896, "loss": 3.4082, "step": 1750 }, { "epoch": 0.25, "learning_rate": 0.00015233280369781889, "loss": 3.4354, "step": 1751 }, { "epoch": 0.25, "learning_rate": 0.00015230391448793878, "loss": 3.4206, "step": 1752 }, { "epoch": 0.25, "learning_rate": 0.00015227502527805865, "loss": 3.3987, "step": 1753 }, { "epoch": 0.25, "learning_rate": 0.00015224613606817854, "loss": 3.3487, "step": 1754 }, { "epoch": 0.25, "learning_rate": 0.00015221724685829843, "loss": 3.5518, "step": 1755 }, { "epoch": 0.25, "learning_rate": 0.00015218835764841833, "loss": 3.341, "step": 1756 }, { "epoch": 0.25, "learning_rate": 0.00015215946843853822, "loss": 3.4783, "step": 1757 }, { "epoch": 0.25, "learning_rate": 0.0001521305792286581, "loss": 3.3948, "step": 1758 }, { "epoch": 0.25, "learning_rate": 0.00015210169001877798, "loss": 3.2644, "step": 1759 }, { "epoch": 0.25, "learning_rate": 0.00015207280080889788, "loss": 3.4853, "step": 1760 }, { "epoch": 0.25, "eval_loss": 3.5950064659118652, "eval_runtime": 471.9526, "eval_samples_per_second": 43.409, "eval_steps_per_second": 14.47, "step": 1760 }, { "epoch": 0.25, "learning_rate": 0.00015204391159901777, "loss": 3.4051, "step": 1761 }, { "epoch": 0.25, "learning_rate": 0.00015201502238913766, "loss": 3.3596, "step": 1762 }, { "epoch": 0.25, "learning_rate": 0.00015198613317925756, "loss": 3.4631, "step": 1763 }, { "epoch": 0.25, "learning_rate": 0.00015195724396937745, "loss": 3.416, "step": 1764 }, { "epoch": 0.25, "learning_rate": 0.00015192835475949735, "loss": 3.4119, "step": 1765 }, { "epoch": 0.25, "learning_rate": 0.00015189946554961724, "loss": 3.3318, "step": 1766 }, { "epoch": 0.25, "learning_rate": 0.0001518705763397371, "loss": 3.4591, "step": 1767 }, { "epoch": 0.25, "learning_rate": 0.000151841687129857, "loss": 3.483, "step": 1768 }, { "epoch": 0.25, "learning_rate": 0.0001518127979199769, "loss": 3.4671, "step": 1769 }, { "epoch": 0.25, "learning_rate": 0.0001517839087100968, "loss": 3.3936, "step": 1770 }, { "epoch": 0.25, "learning_rate": 0.00015175501950021665, "loss": 3.4857, "step": 1771 }, { "epoch": 0.25, "learning_rate": 0.00015172613029033657, "loss": 3.3232, "step": 1772 }, { "epoch": 0.25, "learning_rate": 0.00015169724108045647, "loss": 3.4324, "step": 1773 }, { "epoch": 0.25, "learning_rate": 0.00015166835187057636, "loss": 3.4477, "step": 1774 }, { "epoch": 0.25, "learning_rate": 0.00015163946266069623, "loss": 3.593, "step": 1775 }, { "epoch": 0.25, "learning_rate": 0.00015161057345081612, "loss": 3.2523, "step": 1776 }, { "epoch": 0.25, "learning_rate": 0.00015158168424093602, "loss": 3.4394, "step": 1777 }, { "epoch": 0.25, "learning_rate": 0.0001515527950310559, "loss": 3.4362, "step": 1778 }, { "epoch": 0.25, "learning_rate": 0.0001515239058211758, "loss": 3.5046, "step": 1779 }, { "epoch": 0.25, "learning_rate": 0.00015149501661129567, "loss": 3.35, "step": 1780 }, { "epoch": 0.25, "learning_rate": 0.00015146612740141556, "loss": 3.4668, "step": 1781 }, { "epoch": 0.25, "learning_rate": 0.00015143723819153546, "loss": 3.2848, "step": 1782 }, { "epoch": 0.25, "learning_rate": 0.00015140834898165538, "loss": 3.5605, "step": 1783 }, { "epoch": 0.25, "learning_rate": 0.00015137945977177525, "loss": 3.3905, "step": 1784 }, { "epoch": 0.25, "learning_rate": 0.00015135057056189514, "loss": 3.4506, "step": 1785 }, { "epoch": 0.25, "learning_rate": 0.00015132168135201503, "loss": 3.4539, "step": 1786 }, { "epoch": 0.25, "learning_rate": 0.00015129279214213493, "loss": 3.532, "step": 1787 }, { "epoch": 0.25, "learning_rate": 0.00015126390293225482, "loss": 3.4072, "step": 1788 }, { "epoch": 0.25, "learning_rate": 0.0001512350137223747, "loss": 3.5478, "step": 1789 }, { "epoch": 0.25, "learning_rate": 0.00015120612451249458, "loss": 3.5377, "step": 1790 }, { "epoch": 0.26, "learning_rate": 0.00015117723530261448, "loss": 3.3438, "step": 1791 }, { "epoch": 0.26, "learning_rate": 0.00015114834609273437, "loss": 3.4662, "step": 1792 }, { "epoch": 0.26, "learning_rate": 0.00015111945688285426, "loss": 3.3743, "step": 1793 }, { "epoch": 0.26, "learning_rate": 0.00015109056767297416, "loss": 3.4603, "step": 1794 }, { "epoch": 0.26, "learning_rate": 0.00015106167846309405, "loss": 3.5661, "step": 1795 }, { "epoch": 0.26, "learning_rate": 0.00015103278925321395, "loss": 3.6666, "step": 1796 }, { "epoch": 0.26, "learning_rate": 0.0001510039000433338, "loss": 3.5021, "step": 1797 }, { "epoch": 0.26, "learning_rate": 0.0001509750108334537, "loss": 3.4549, "step": 1798 }, { "epoch": 0.26, "learning_rate": 0.0001509461216235736, "loss": 3.4754, "step": 1799 }, { "epoch": 0.26, "learning_rate": 0.0001509172324136935, "loss": 3.3963, "step": 1800 }, { "epoch": 0.26, "learning_rate": 0.0001508883432038134, "loss": 3.5162, "step": 1801 }, { "epoch": 0.26, "learning_rate": 0.00015085945399393325, "loss": 3.4586, "step": 1802 }, { "epoch": 0.26, "learning_rate": 0.00015083056478405317, "loss": 3.4553, "step": 1803 }, { "epoch": 0.26, "learning_rate": 0.00015080167557417307, "loss": 3.4765, "step": 1804 }, { "epoch": 0.26, "learning_rate": 0.00015077278636429296, "loss": 3.5084, "step": 1805 }, { "epoch": 0.26, "learning_rate": 0.00015074389715441283, "loss": 3.3074, "step": 1806 }, { "epoch": 0.26, "learning_rate": 0.00015071500794453272, "loss": 3.4487, "step": 1807 }, { "epoch": 0.26, "learning_rate": 0.00015068611873465262, "loss": 3.4417, "step": 1808 }, { "epoch": 0.26, "learning_rate": 0.0001506572295247725, "loss": 3.364, "step": 1809 }, { "epoch": 0.26, "learning_rate": 0.0001506283403148924, "loss": 3.4581, "step": 1810 }, { "epoch": 0.26, "learning_rate": 0.00015059945110501227, "loss": 3.4035, "step": 1811 }, { "epoch": 0.26, "learning_rate": 0.00015057056189513217, "loss": 3.4665, "step": 1812 }, { "epoch": 0.26, "learning_rate": 0.00015054167268525206, "loss": 3.4461, "step": 1813 }, { "epoch": 0.26, "learning_rate": 0.00015051278347537195, "loss": 3.4914, "step": 1814 }, { "epoch": 0.26, "learning_rate": 0.00015048389426549185, "loss": 3.3571, "step": 1815 }, { "epoch": 0.26, "learning_rate": 0.00015045500505561174, "loss": 3.3196, "step": 1816 }, { "epoch": 0.26, "learning_rate": 0.00015042611584573163, "loss": 3.4052, "step": 1817 }, { "epoch": 0.26, "learning_rate": 0.00015039722663585153, "loss": 3.341, "step": 1818 }, { "epoch": 0.26, "learning_rate": 0.0001503683374259714, "loss": 3.5346, "step": 1819 }, { "epoch": 0.26, "learning_rate": 0.0001503394482160913, "loss": 3.2565, "step": 1820 }, { "epoch": 0.26, "learning_rate": 0.00015031055900621118, "loss": 3.5817, "step": 1821 }, { "epoch": 0.26, "learning_rate": 0.00015028166979633108, "loss": 3.5139, "step": 1822 }, { "epoch": 0.26, "learning_rate": 0.00015025278058645097, "loss": 3.5845, "step": 1823 }, { "epoch": 0.26, "learning_rate": 0.00015022389137657086, "loss": 3.4028, "step": 1824 }, { "epoch": 0.26, "learning_rate": 0.00015019500216669076, "loss": 3.555, "step": 1825 }, { "epoch": 0.26, "learning_rate": 0.00015016611295681065, "loss": 3.3109, "step": 1826 }, { "epoch": 0.26, "learning_rate": 0.00015013722374693055, "loss": 3.5086, "step": 1827 }, { "epoch": 0.26, "learning_rate": 0.0001501083345370504, "loss": 3.3717, "step": 1828 }, { "epoch": 0.26, "learning_rate": 0.0001500794453271703, "loss": 3.375, "step": 1829 }, { "epoch": 0.26, "learning_rate": 0.0001500505561172902, "loss": 3.4272, "step": 1830 }, { "epoch": 0.26, "learning_rate": 0.0001500216669074101, "loss": 3.5006, "step": 1831 }, { "epoch": 0.26, "learning_rate": 0.00014999277769752996, "loss": 3.4847, "step": 1832 }, { "epoch": 0.26, "learning_rate": 0.00014996388848764985, "loss": 3.4681, "step": 1833 }, { "epoch": 0.26, "learning_rate": 0.00014993499927776975, "loss": 3.342, "step": 1834 }, { "epoch": 0.26, "learning_rate": 0.00014990611006788967, "loss": 3.3611, "step": 1835 }, { "epoch": 0.26, "learning_rate": 0.00014987722085800954, "loss": 3.3995, "step": 1836 }, { "epoch": 0.26, "learning_rate": 0.00014984833164812943, "loss": 3.4514, "step": 1837 }, { "epoch": 0.26, "learning_rate": 0.00014981944243824932, "loss": 3.5673, "step": 1838 }, { "epoch": 0.26, "learning_rate": 0.00014979055322836922, "loss": 3.4605, "step": 1839 }, { "epoch": 0.26, "learning_rate": 0.0001497616640184891, "loss": 3.5023, "step": 1840 }, { "epoch": 0.26, "learning_rate": 0.00014973277480860898, "loss": 3.3265, "step": 1841 }, { "epoch": 0.26, "learning_rate": 0.00014970388559872887, "loss": 3.4755, "step": 1842 }, { "epoch": 0.26, "learning_rate": 0.00014967499638884877, "loss": 3.3242, "step": 1843 }, { "epoch": 0.26, "learning_rate": 0.00014964610717896866, "loss": 3.3834, "step": 1844 }, { "epoch": 0.26, "learning_rate": 0.00014961721796908855, "loss": 3.4341, "step": 1845 }, { "epoch": 0.26, "learning_rate": 0.00014958832875920845, "loss": 3.5213, "step": 1846 }, { "epoch": 0.26, "learning_rate": 0.00014955943954932834, "loss": 3.543, "step": 1847 }, { "epoch": 0.26, "learning_rate": 0.00014953055033944823, "loss": 3.4442, "step": 1848 }, { "epoch": 0.26, "learning_rate": 0.00014950166112956813, "loss": 3.5597, "step": 1849 }, { "epoch": 0.26, "learning_rate": 0.000149472771919688, "loss": 3.3681, "step": 1850 }, { "epoch": 0.26, "learning_rate": 0.0001494438827098079, "loss": 3.3805, "step": 1851 }, { "epoch": 0.26, "learning_rate": 0.00014941499349992778, "loss": 3.4571, "step": 1852 }, { "epoch": 0.26, "learning_rate": 0.00014938610429004768, "loss": 3.444, "step": 1853 }, { "epoch": 0.26, "learning_rate": 0.00014935721508016754, "loss": 3.4871, "step": 1854 }, { "epoch": 0.26, "learning_rate": 0.00014932832587028744, "loss": 3.4849, "step": 1855 }, { "epoch": 0.26, "learning_rate": 0.00014929943666040736, "loss": 3.2477, "step": 1856 }, { "epoch": 0.26, "learning_rate": 0.00014927054745052725, "loss": 3.3903, "step": 1857 }, { "epoch": 0.26, "learning_rate": 0.00014924165824064712, "loss": 3.5671, "step": 1858 }, { "epoch": 0.26, "learning_rate": 0.000149212769030767, "loss": 3.2997, "step": 1859 }, { "epoch": 0.26, "learning_rate": 0.0001491838798208869, "loss": 3.5099, "step": 1860 }, { "epoch": 0.26, "learning_rate": 0.0001491549906110068, "loss": 3.3259, "step": 1861 }, { "epoch": 0.27, "learning_rate": 0.0001491261014011267, "loss": 3.4265, "step": 1862 }, { "epoch": 0.27, "learning_rate": 0.00014909721219124656, "loss": 3.357, "step": 1863 }, { "epoch": 0.27, "learning_rate": 0.00014906832298136645, "loss": 3.6089, "step": 1864 }, { "epoch": 0.27, "learning_rate": 0.00014903943377148635, "loss": 3.4923, "step": 1865 }, { "epoch": 0.27, "learning_rate": 0.00014901054456160627, "loss": 3.5827, "step": 1866 }, { "epoch": 0.27, "learning_rate": 0.00014898165535172614, "loss": 3.4838, "step": 1867 }, { "epoch": 0.27, "learning_rate": 0.00014895276614184603, "loss": 3.3623, "step": 1868 }, { "epoch": 0.27, "learning_rate": 0.00014892387693196592, "loss": 3.4234, "step": 1869 }, { "epoch": 0.27, "learning_rate": 0.00014889498772208582, "loss": 3.42, "step": 1870 }, { "epoch": 0.27, "learning_rate": 0.0001488660985122057, "loss": 3.4147, "step": 1871 }, { "epoch": 0.27, "learning_rate": 0.00014883720930232558, "loss": 3.4886, "step": 1872 }, { "epoch": 0.27, "learning_rate": 0.00014880832009244547, "loss": 3.2949, "step": 1873 }, { "epoch": 0.27, "learning_rate": 0.00014877943088256537, "loss": 3.4814, "step": 1874 }, { "epoch": 0.27, "learning_rate": 0.00014875054167268526, "loss": 3.4084, "step": 1875 }, { "epoch": 0.27, "learning_rate": 0.00014872165246280513, "loss": 3.4247, "step": 1876 }, { "epoch": 0.27, "learning_rate": 0.00014869276325292505, "loss": 3.4906, "step": 1877 }, { "epoch": 0.27, "learning_rate": 0.00014866387404304494, "loss": 3.4889, "step": 1878 }, { "epoch": 0.27, "learning_rate": 0.00014863498483316483, "loss": 3.4885, "step": 1879 }, { "epoch": 0.27, "learning_rate": 0.0001486060956232847, "loss": 3.4409, "step": 1880 }, { "epoch": 0.27, "learning_rate": 0.0001485772064134046, "loss": 3.2834, "step": 1881 }, { "epoch": 0.27, "learning_rate": 0.0001485483172035245, "loss": 3.4257, "step": 1882 }, { "epoch": 0.27, "learning_rate": 0.00014851942799364438, "loss": 3.5979, "step": 1883 }, { "epoch": 0.27, "learning_rate": 0.00014849053878376428, "loss": 3.3264, "step": 1884 }, { "epoch": 0.27, "learning_rate": 0.00014846164957388414, "loss": 3.4631, "step": 1885 }, { "epoch": 0.27, "learning_rate": 0.00014843276036400404, "loss": 3.4964, "step": 1886 }, { "epoch": 0.27, "learning_rate": 0.00014840387115412396, "loss": 3.5215, "step": 1887 }, { "epoch": 0.27, "learning_rate": 0.00014837498194424385, "loss": 3.4671, "step": 1888 }, { "epoch": 0.27, "learning_rate": 0.00014834609273436372, "loss": 3.4544, "step": 1889 }, { "epoch": 0.27, "learning_rate": 0.0001483172035244836, "loss": 3.4728, "step": 1890 }, { "epoch": 0.27, "learning_rate": 0.0001482883143146035, "loss": 3.5478, "step": 1891 }, { "epoch": 0.27, "learning_rate": 0.0001482594251047234, "loss": 3.3491, "step": 1892 }, { "epoch": 0.27, "learning_rate": 0.0001482305358948433, "loss": 3.5396, "step": 1893 }, { "epoch": 0.27, "learning_rate": 0.00014820164668496316, "loss": 3.4655, "step": 1894 }, { "epoch": 0.27, "learning_rate": 0.00014817275747508305, "loss": 3.3832, "step": 1895 }, { "epoch": 0.27, "learning_rate": 0.00014814386826520295, "loss": 3.485, "step": 1896 }, { "epoch": 0.27, "learning_rate": 0.00014811497905532284, "loss": 3.5032, "step": 1897 }, { "epoch": 0.27, "learning_rate": 0.00014808608984544274, "loss": 3.4827, "step": 1898 }, { "epoch": 0.27, "learning_rate": 0.00014805720063556263, "loss": 3.4626, "step": 1899 }, { "epoch": 0.27, "learning_rate": 0.00014802831142568252, "loss": 3.3802, "step": 1900 }, { "epoch": 0.27, "learning_rate": 0.00014799942221580242, "loss": 3.5136, "step": 1901 }, { "epoch": 0.27, "learning_rate": 0.00014797053300592228, "loss": 3.5037, "step": 1902 }, { "epoch": 0.27, "learning_rate": 0.00014794164379604218, "loss": 3.5207, "step": 1903 }, { "epoch": 0.27, "learning_rate": 0.00014791275458616207, "loss": 3.4731, "step": 1904 }, { "epoch": 0.27, "learning_rate": 0.00014788386537628197, "loss": 3.4823, "step": 1905 }, { "epoch": 0.27, "learning_rate": 0.00014785497616640186, "loss": 3.4794, "step": 1906 }, { "epoch": 0.27, "learning_rate": 0.00014782608695652173, "loss": 3.4531, "step": 1907 }, { "epoch": 0.27, "learning_rate": 0.00014779719774664165, "loss": 3.3548, "step": 1908 }, { "epoch": 0.27, "learning_rate": 0.00014776830853676154, "loss": 3.4536, "step": 1909 }, { "epoch": 0.27, "learning_rate": 0.00014773941932688143, "loss": 3.5259, "step": 1910 }, { "epoch": 0.27, "learning_rate": 0.0001477105301170013, "loss": 3.4277, "step": 1911 }, { "epoch": 0.27, "learning_rate": 0.0001476816409071212, "loss": 3.3827, "step": 1912 }, { "epoch": 0.27, "learning_rate": 0.0001476527516972411, "loss": 3.4711, "step": 1913 }, { "epoch": 0.27, "learning_rate": 0.00014762386248736098, "loss": 3.4863, "step": 1914 }, { "epoch": 0.27, "learning_rate": 0.00014759497327748088, "loss": 3.5021, "step": 1915 }, { "epoch": 0.27, "learning_rate": 0.00014756608406760074, "loss": 3.3306, "step": 1916 }, { "epoch": 0.27, "learning_rate": 0.00014753719485772064, "loss": 3.5057, "step": 1917 }, { "epoch": 0.27, "learning_rate": 0.00014750830564784053, "loss": 3.4257, "step": 1918 }, { "epoch": 0.27, "learning_rate": 0.00014747941643796042, "loss": 3.3859, "step": 1919 }, { "epoch": 0.27, "learning_rate": 0.00014745052722808032, "loss": 3.488, "step": 1920 }, { "epoch": 0.27, "learning_rate": 0.0001474216380182002, "loss": 3.2959, "step": 1921 }, { "epoch": 0.27, "learning_rate": 0.0001473927488083201, "loss": 3.5709, "step": 1922 }, { "epoch": 0.27, "learning_rate": 0.00014736385959844, "loss": 3.3738, "step": 1923 }, { "epoch": 0.27, "learning_rate": 0.00014733497038855987, "loss": 3.4031, "step": 1924 }, { "epoch": 0.27, "learning_rate": 0.00014730608117867976, "loss": 3.3902, "step": 1925 }, { "epoch": 0.27, "learning_rate": 0.00014727719196879965, "loss": 3.3969, "step": 1926 }, { "epoch": 0.27, "learning_rate": 0.00014724830275891955, "loss": 3.3293, "step": 1927 }, { "epoch": 0.27, "learning_rate": 0.00014721941354903944, "loss": 3.442, "step": 1928 }, { "epoch": 0.27, "learning_rate": 0.00014719052433915934, "loss": 3.4116, "step": 1929 }, { "epoch": 0.27, "learning_rate": 0.00014716163512927923, "loss": 3.5437, "step": 1930 }, { "epoch": 0.27, "learning_rate": 0.00014713274591939912, "loss": 3.4743, "step": 1931 }, { "epoch": 0.28, "learning_rate": 0.00014710385670951902, "loss": 3.281, "step": 1932 }, { "epoch": 0.28, "learning_rate": 0.00014707496749963888, "loss": 3.4105, "step": 1933 }, { "epoch": 0.28, "learning_rate": 0.00014704607828975878, "loss": 3.6074, "step": 1934 }, { "epoch": 0.28, "learning_rate": 0.00014701718907987867, "loss": 3.5347, "step": 1935 }, { "epoch": 0.28, "learning_rate": 0.00014698829986999857, "loss": 3.4239, "step": 1936 }, { "epoch": 0.28, "learning_rate": 0.00014695941066011846, "loss": 3.4338, "step": 1937 }, { "epoch": 0.28, "learning_rate": 0.00014693052145023833, "loss": 3.4685, "step": 1938 }, { "epoch": 0.28, "learning_rate": 0.00014690163224035822, "loss": 3.3967, "step": 1939 }, { "epoch": 0.28, "learning_rate": 0.00014687274303047814, "loss": 3.4177, "step": 1940 }, { "epoch": 0.28, "learning_rate": 0.000146843853820598, "loss": 3.4269, "step": 1941 }, { "epoch": 0.28, "learning_rate": 0.0001468149646107179, "loss": 3.4652, "step": 1942 }, { "epoch": 0.28, "learning_rate": 0.0001467860754008378, "loss": 3.4627, "step": 1943 }, { "epoch": 0.28, "learning_rate": 0.0001467571861909577, "loss": 3.5027, "step": 1944 }, { "epoch": 0.28, "learning_rate": 0.00014672829698107758, "loss": 3.5213, "step": 1945 }, { "epoch": 0.28, "learning_rate": 0.00014669940777119745, "loss": 3.5287, "step": 1946 }, { "epoch": 0.28, "learning_rate": 0.00014667051856131734, "loss": 3.5134, "step": 1947 }, { "epoch": 0.28, "learning_rate": 0.00014664162935143724, "loss": 3.439, "step": 1948 }, { "epoch": 0.28, "learning_rate": 0.00014661274014155713, "loss": 3.4804, "step": 1949 }, { "epoch": 0.28, "learning_rate": 0.00014658385093167703, "loss": 3.2259, "step": 1950 }, { "epoch": 0.28, "learning_rate": 0.00014655496172179692, "loss": 3.4906, "step": 1951 }, { "epoch": 0.28, "learning_rate": 0.0001465260725119168, "loss": 3.4134, "step": 1952 }, { "epoch": 0.28, "learning_rate": 0.0001464971833020367, "loss": 3.5899, "step": 1953 }, { "epoch": 0.28, "learning_rate": 0.0001464682940921566, "loss": 3.4322, "step": 1954 }, { "epoch": 0.28, "learning_rate": 0.00014643940488227647, "loss": 3.2959, "step": 1955 }, { "epoch": 0.28, "learning_rate": 0.00014641051567239636, "loss": 3.3999, "step": 1956 }, { "epoch": 0.28, "learning_rate": 0.00014638162646251625, "loss": 3.444, "step": 1957 }, { "epoch": 0.28, "learning_rate": 0.00014635273725263615, "loss": 3.4835, "step": 1958 }, { "epoch": 0.28, "learning_rate": 0.00014632384804275602, "loss": 3.4171, "step": 1959 }, { "epoch": 0.28, "learning_rate": 0.0001462949588328759, "loss": 3.3343, "step": 1960 }, { "epoch": 0.28, "learning_rate": 0.00014626606962299583, "loss": 3.4857, "step": 1961 }, { "epoch": 0.28, "learning_rate": 0.00014623718041311572, "loss": 3.2975, "step": 1962 }, { "epoch": 0.28, "learning_rate": 0.0001462082912032356, "loss": 3.3762, "step": 1963 }, { "epoch": 0.28, "learning_rate": 0.00014617940199335548, "loss": 3.5087, "step": 1964 }, { "epoch": 0.28, "learning_rate": 0.00014615051278347538, "loss": 3.5289, "step": 1965 }, { "epoch": 0.28, "learning_rate": 0.00014612162357359527, "loss": 3.4127, "step": 1966 }, { "epoch": 0.28, "learning_rate": 0.00014609273436371517, "loss": 3.4432, "step": 1967 }, { "epoch": 0.28, "learning_rate": 0.00014606384515383503, "loss": 3.4058, "step": 1968 }, { "epoch": 0.28, "learning_rate": 0.00014603495594395493, "loss": 3.5486, "step": 1969 }, { "epoch": 0.28, "learning_rate": 0.00014600606673407482, "loss": 3.491, "step": 1970 }, { "epoch": 0.28, "learning_rate": 0.00014597717752419474, "loss": 3.3858, "step": 1971 }, { "epoch": 0.28, "learning_rate": 0.0001459482883143146, "loss": 3.3609, "step": 1972 }, { "epoch": 0.28, "learning_rate": 0.0001459193991044345, "loss": 3.4312, "step": 1973 }, { "epoch": 0.28, "learning_rate": 0.0001458905098945544, "loss": 3.403, "step": 1974 }, { "epoch": 0.28, "learning_rate": 0.0001458616206846743, "loss": 3.2376, "step": 1975 }, { "epoch": 0.28, "learning_rate": 0.00014583273147479418, "loss": 3.4582, "step": 1976 }, { "epoch": 0.28, "learning_rate": 0.00014580384226491405, "loss": 3.5404, "step": 1977 }, { "epoch": 0.28, "learning_rate": 0.00014577495305503394, "loss": 3.5013, "step": 1978 }, { "epoch": 0.28, "learning_rate": 0.00014574606384515384, "loss": 3.4204, "step": 1979 }, { "epoch": 0.28, "learning_rate": 0.00014571717463527373, "loss": 3.3112, "step": 1980 }, { "epoch": 0.28, "learning_rate": 0.00014568828542539363, "loss": 3.562, "step": 1981 }, { "epoch": 0.28, "learning_rate": 0.00014565939621551352, "loss": 3.2597, "step": 1982 }, { "epoch": 0.28, "learning_rate": 0.0001456305070056334, "loss": 3.4568, "step": 1983 }, { "epoch": 0.28, "learning_rate": 0.0001456016177957533, "loss": 3.5787, "step": 1984 }, { "epoch": 0.28, "learning_rate": 0.00014557272858587317, "loss": 3.4654, "step": 1985 }, { "epoch": 0.28, "learning_rate": 0.00014554383937599307, "loss": 3.5657, "step": 1986 }, { "epoch": 0.28, "learning_rate": 0.00014551495016611296, "loss": 3.4461, "step": 1987 }, { "epoch": 0.28, "learning_rate": 0.00014548606095623285, "loss": 3.5129, "step": 1988 }, { "epoch": 0.28, "learning_rate": 0.00014545717174635275, "loss": 3.5618, "step": 1989 }, { "epoch": 0.28, "learning_rate": 0.00014542828253647262, "loss": 3.324, "step": 1990 }, { "epoch": 0.28, "learning_rate": 0.0001453993933265925, "loss": 3.3, "step": 1991 }, { "epoch": 0.28, "learning_rate": 0.00014537050411671243, "loss": 3.395, "step": 1992 }, { "epoch": 0.28, "learning_rate": 0.00014534161490683232, "loss": 3.5426, "step": 1993 }, { "epoch": 0.28, "learning_rate": 0.0001453127256969522, "loss": 3.4614, "step": 1994 }, { "epoch": 0.28, "learning_rate": 0.00014528383648707208, "loss": 3.3017, "step": 1995 }, { "epoch": 0.28, "learning_rate": 0.00014525494727719198, "loss": 3.4191, "step": 1996 }, { "epoch": 0.28, "learning_rate": 0.00014522605806731187, "loss": 3.4152, "step": 1997 }, { "epoch": 0.28, "learning_rate": 0.00014519716885743177, "loss": 3.2961, "step": 1998 }, { "epoch": 0.28, "learning_rate": 0.00014516827964755163, "loss": 3.3462, "step": 1999 }, { "epoch": 0.28, "learning_rate": 0.00014513939043767153, "loss": 3.1992, "step": 2000 }, { "epoch": 0.28, "learning_rate": 0.00014511050122779142, "loss": 3.4572, "step": 2001 }, { "epoch": 0.29, "learning_rate": 0.00014508161201791134, "loss": 3.3327, "step": 2002 }, { "epoch": 0.29, "learning_rate": 0.0001450527228080312, "loss": 3.4493, "step": 2003 }, { "epoch": 0.29, "learning_rate": 0.0001450238335981511, "loss": 3.4782, "step": 2004 }, { "epoch": 0.29, "learning_rate": 0.000144994944388271, "loss": 3.5013, "step": 2005 }, { "epoch": 0.29, "learning_rate": 0.0001449660551783909, "loss": 3.3044, "step": 2006 }, { "epoch": 0.29, "learning_rate": 0.00014493716596851076, "loss": 3.65, "step": 2007 }, { "epoch": 0.29, "learning_rate": 0.00014490827675863065, "loss": 3.4377, "step": 2008 }, { "epoch": 0.29, "learning_rate": 0.00014487938754875054, "loss": 3.2571, "step": 2009 }, { "epoch": 0.29, "learning_rate": 0.00014485049833887044, "loss": 3.4659, "step": 2010 }, { "epoch": 0.29, "learning_rate": 0.00014482160912899033, "loss": 3.3477, "step": 2011 }, { "epoch": 0.29, "learning_rate": 0.0001447927199191102, "loss": 3.4271, "step": 2012 }, { "epoch": 0.29, "learning_rate": 0.00014476383070923012, "loss": 3.5187, "step": 2013 }, { "epoch": 0.29, "learning_rate": 0.00014473494149935, "loss": 3.4195, "step": 2014 }, { "epoch": 0.29, "learning_rate": 0.0001447060522894699, "loss": 3.4035, "step": 2015 }, { "epoch": 0.29, "learning_rate": 0.00014467716307958977, "loss": 3.3835, "step": 2016 }, { "epoch": 0.29, "learning_rate": 0.00014464827386970967, "loss": 3.5223, "step": 2017 }, { "epoch": 0.29, "learning_rate": 0.00014461938465982956, "loss": 3.412, "step": 2018 }, { "epoch": 0.29, "learning_rate": 0.00014459049544994946, "loss": 3.3719, "step": 2019 }, { "epoch": 0.29, "learning_rate": 0.00014456160624006935, "loss": 3.5132, "step": 2020 }, { "epoch": 0.29, "learning_rate": 0.00014453271703018922, "loss": 3.4407, "step": 2021 }, { "epoch": 0.29, "learning_rate": 0.0001445038278203091, "loss": 3.5427, "step": 2022 }, { "epoch": 0.29, "learning_rate": 0.00014447493861042903, "loss": 3.3198, "step": 2023 }, { "epoch": 0.29, "learning_rate": 0.0001444460494005489, "loss": 3.4871, "step": 2024 }, { "epoch": 0.29, "learning_rate": 0.0001444171601906688, "loss": 3.3314, "step": 2025 }, { "epoch": 0.29, "learning_rate": 0.00014438827098078868, "loss": 3.4372, "step": 2026 }, { "epoch": 0.29, "learning_rate": 0.00014435938177090858, "loss": 3.2933, "step": 2027 }, { "epoch": 0.29, "learning_rate": 0.00014433049256102847, "loss": 3.4497, "step": 2028 }, { "epoch": 0.29, "learning_rate": 0.00014430160335114834, "loss": 3.501, "step": 2029 }, { "epoch": 0.29, "learning_rate": 0.00014427271414126823, "loss": 3.3996, "step": 2030 }, { "epoch": 0.29, "learning_rate": 0.00014424382493138813, "loss": 3.5048, "step": 2031 }, { "epoch": 0.29, "learning_rate": 0.00014421493572150802, "loss": 3.4347, "step": 2032 }, { "epoch": 0.29, "learning_rate": 0.00014418604651162791, "loss": 3.2714, "step": 2033 }, { "epoch": 0.29, "learning_rate": 0.0001441571573017478, "loss": 3.5157, "step": 2034 }, { "epoch": 0.29, "learning_rate": 0.0001441282680918677, "loss": 3.4685, "step": 2035 }, { "epoch": 0.29, "learning_rate": 0.0001440993788819876, "loss": 3.4565, "step": 2036 }, { "epoch": 0.29, "learning_rate": 0.0001440704896721075, "loss": 3.4407, "step": 2037 }, { "epoch": 0.29, "learning_rate": 0.00014404160046222736, "loss": 3.4024, "step": 2038 }, { "epoch": 0.29, "learning_rate": 0.00014401271125234725, "loss": 3.4478, "step": 2039 }, { "epoch": 0.29, "learning_rate": 0.00014398382204246714, "loss": 3.5271, "step": 2040 }, { "epoch": 0.29, "learning_rate": 0.00014395493283258704, "loss": 3.5119, "step": 2041 }, { "epoch": 0.29, "learning_rate": 0.00014392604362270693, "loss": 3.2467, "step": 2042 }, { "epoch": 0.29, "learning_rate": 0.0001438971544128268, "loss": 3.504, "step": 2043 }, { "epoch": 0.29, "learning_rate": 0.00014386826520294672, "loss": 3.4109, "step": 2044 }, { "epoch": 0.29, "learning_rate": 0.0001438393759930666, "loss": 3.3956, "step": 2045 }, { "epoch": 0.29, "learning_rate": 0.00014381048678318648, "loss": 3.4761, "step": 2046 }, { "epoch": 0.29, "learning_rate": 0.00014378159757330637, "loss": 3.5453, "step": 2047 }, { "epoch": 0.29, "learning_rate": 0.00014375270836342627, "loss": 3.317, "step": 2048 }, { "epoch": 0.29, "learning_rate": 0.00014372381915354616, "loss": 3.4624, "step": 2049 }, { "epoch": 0.29, "learning_rate": 0.00014369492994366606, "loss": 3.3344, "step": 2050 }, { "epoch": 0.29, "learning_rate": 0.00014366604073378592, "loss": 3.3991, "step": 2051 }, { "epoch": 0.29, "learning_rate": 0.00014363715152390582, "loss": 3.3717, "step": 2052 }, { "epoch": 0.29, "learning_rate": 0.0001436082623140257, "loss": 3.4737, "step": 2053 }, { "epoch": 0.29, "learning_rate": 0.0001435793731041456, "loss": 3.4253, "step": 2054 }, { "epoch": 0.29, "learning_rate": 0.0001435504838942655, "loss": 3.25, "step": 2055 }, { "epoch": 0.29, "learning_rate": 0.0001435215946843854, "loss": 3.4574, "step": 2056 }, { "epoch": 0.29, "learning_rate": 0.00014349270547450529, "loss": 3.2754, "step": 2057 }, { "epoch": 0.29, "learning_rate": 0.00014346381626462518, "loss": 3.391, "step": 2058 }, { "epoch": 0.29, "learning_rate": 0.00014343492705474507, "loss": 3.5016, "step": 2059 }, { "epoch": 0.29, "learning_rate": 0.00014340603784486494, "loss": 3.3742, "step": 2060 }, { "epoch": 0.29, "learning_rate": 0.00014337714863498483, "loss": 3.3986, "step": 2061 }, { "epoch": 0.29, "learning_rate": 0.00014334825942510473, "loss": 3.2683, "step": 2062 }, { "epoch": 0.29, "learning_rate": 0.00014331937021522462, "loss": 3.3845, "step": 2063 }, { "epoch": 0.29, "learning_rate": 0.00014329048100534451, "loss": 3.2227, "step": 2064 }, { "epoch": 0.29, "learning_rate": 0.0001432615917954644, "loss": 3.3658, "step": 2065 }, { "epoch": 0.29, "learning_rate": 0.0001432327025855843, "loss": 3.455, "step": 2066 }, { "epoch": 0.29, "learning_rate": 0.0001432038133757042, "loss": 3.3727, "step": 2067 }, { "epoch": 0.29, "learning_rate": 0.00014317492416582406, "loss": 3.2789, "step": 2068 }, { "epoch": 0.29, "learning_rate": 0.00014314603495594396, "loss": 3.4471, "step": 2069 }, { "epoch": 0.29, "learning_rate": 0.00014311714574606385, "loss": 3.412, "step": 2070 }, { "epoch": 0.29, "learning_rate": 0.00014308825653618374, "loss": 3.573, "step": 2071 }, { "epoch": 0.3, "learning_rate": 0.00014305936732630364, "loss": 3.3721, "step": 2072 }, { "epoch": 0.3, "learning_rate": 0.0001430304781164235, "loss": 3.5033, "step": 2073 }, { "epoch": 0.3, "learning_rate": 0.0001430015889065434, "loss": 3.4696, "step": 2074 }, { "epoch": 0.3, "learning_rate": 0.0001429726996966633, "loss": 3.4221, "step": 2075 }, { "epoch": 0.3, "learning_rate": 0.0001429438104867832, "loss": 3.5093, "step": 2076 }, { "epoch": 0.3, "learning_rate": 0.00014291492127690308, "loss": 3.4657, "step": 2077 }, { "epoch": 0.3, "learning_rate": 0.00014288603206702297, "loss": 3.3822, "step": 2078 }, { "epoch": 0.3, "learning_rate": 0.00014285714285714287, "loss": 3.4654, "step": 2079 }, { "epoch": 0.3, "learning_rate": 0.00014282825364726276, "loss": 3.3517, "step": 2080 }, { "epoch": 0.3, "learning_rate": 0.00014279936443738266, "loss": 3.4709, "step": 2081 }, { "epoch": 0.3, "learning_rate": 0.00014277047522750252, "loss": 3.4689, "step": 2082 }, { "epoch": 0.3, "learning_rate": 0.00014274158601762242, "loss": 3.4866, "step": 2083 }, { "epoch": 0.3, "learning_rate": 0.0001427126968077423, "loss": 3.266, "step": 2084 }, { "epoch": 0.3, "learning_rate": 0.0001426838075978622, "loss": 3.4905, "step": 2085 }, { "epoch": 0.3, "learning_rate": 0.0001426549183879821, "loss": 3.5003, "step": 2086 }, { "epoch": 0.3, "learning_rate": 0.000142626029178102, "loss": 3.4256, "step": 2087 }, { "epoch": 0.3, "learning_rate": 0.00014259713996822189, "loss": 3.4049, "step": 2088 }, { "epoch": 0.3, "learning_rate": 0.00014256825075834178, "loss": 3.5358, "step": 2089 }, { "epoch": 0.3, "learning_rate": 0.00014253936154846165, "loss": 3.4838, "step": 2090 }, { "epoch": 0.3, "learning_rate": 0.00014251047233858154, "loss": 3.3808, "step": 2091 }, { "epoch": 0.3, "learning_rate": 0.00014248158312870143, "loss": 3.3242, "step": 2092 }, { "epoch": 0.3, "learning_rate": 0.00014245269391882133, "loss": 3.336, "step": 2093 }, { "epoch": 0.3, "learning_rate": 0.00014242380470894122, "loss": 3.3955, "step": 2094 }, { "epoch": 0.3, "learning_rate": 0.0001423949154990611, "loss": 3.5222, "step": 2095 }, { "epoch": 0.3, "learning_rate": 0.00014236602628918098, "loss": 3.4184, "step": 2096 }, { "epoch": 0.3, "learning_rate": 0.0001423371370793009, "loss": 3.4204, "step": 2097 }, { "epoch": 0.3, "learning_rate": 0.0001423082478694208, "loss": 3.4188, "step": 2098 }, { "epoch": 0.3, "learning_rate": 0.00014227935865954066, "loss": 3.4482, "step": 2099 }, { "epoch": 0.3, "learning_rate": 0.00014225046944966056, "loss": 3.4424, "step": 2100 }, { "epoch": 0.3, "learning_rate": 0.00014222158023978045, "loss": 3.3663, "step": 2101 }, { "epoch": 0.3, "learning_rate": 0.00014219269102990034, "loss": 3.3478, "step": 2102 }, { "epoch": 0.3, "learning_rate": 0.00014216380182002024, "loss": 3.2278, "step": 2103 }, { "epoch": 0.3, "learning_rate": 0.0001421349126101401, "loss": 3.4806, "step": 2104 }, { "epoch": 0.3, "learning_rate": 0.00014210602340026, "loss": 3.5108, "step": 2105 }, { "epoch": 0.3, "learning_rate": 0.0001420771341903799, "loss": 3.5468, "step": 2106 }, { "epoch": 0.3, "learning_rate": 0.00014204824498049981, "loss": 3.4712, "step": 2107 }, { "epoch": 0.3, "learning_rate": 0.00014201935577061968, "loss": 3.4026, "step": 2108 }, { "epoch": 0.3, "learning_rate": 0.00014199046656073957, "loss": 3.4801, "step": 2109 }, { "epoch": 0.3, "learning_rate": 0.00014196157735085947, "loss": 3.4062, "step": 2110 }, { "epoch": 0.3, "learning_rate": 0.00014193268814097936, "loss": 3.4245, "step": 2111 }, { "epoch": 0.3, "learning_rate": 0.00014190379893109923, "loss": 3.475, "step": 2112 }, { "epoch": 0.3, "eval_loss": 3.580632209777832, "eval_runtime": 471.7483, "eval_samples_per_second": 43.428, "eval_steps_per_second": 14.476, "step": 2112 }, { "epoch": 0.3, "learning_rate": 0.00014187490972121912, "loss": 3.4145, "step": 2113 }, { "epoch": 0.3, "learning_rate": 0.00014184602051133902, "loss": 3.3238, "step": 2114 }, { "epoch": 0.3, "learning_rate": 0.0001418171313014589, "loss": 3.3204, "step": 2115 }, { "epoch": 0.3, "learning_rate": 0.0001417882420915788, "loss": 3.2878, "step": 2116 }, { "epoch": 0.3, "learning_rate": 0.00014175935288169867, "loss": 3.4137, "step": 2117 }, { "epoch": 0.3, "learning_rate": 0.0001417304636718186, "loss": 3.4031, "step": 2118 }, { "epoch": 0.3, "learning_rate": 0.00014170157446193849, "loss": 3.4523, "step": 2119 }, { "epoch": 0.3, "learning_rate": 0.00014167268525205838, "loss": 3.3077, "step": 2120 }, { "epoch": 0.3, "learning_rate": 0.00014164379604217825, "loss": 3.4158, "step": 2121 }, { "epoch": 0.3, "learning_rate": 0.00014161490683229814, "loss": 3.3561, "step": 2122 }, { "epoch": 0.3, "learning_rate": 0.00014158601762241803, "loss": 3.3478, "step": 2123 }, { "epoch": 0.3, "learning_rate": 0.00014155712841253793, "loss": 3.4943, "step": 2124 }, { "epoch": 0.3, "learning_rate": 0.00014152823920265782, "loss": 3.387, "step": 2125 }, { "epoch": 0.3, "learning_rate": 0.0001414993499927777, "loss": 3.5522, "step": 2126 }, { "epoch": 0.3, "learning_rate": 0.00014147046078289758, "loss": 3.4068, "step": 2127 }, { "epoch": 0.3, "learning_rate": 0.0001414415715730175, "loss": 3.3601, "step": 2128 }, { "epoch": 0.3, "learning_rate": 0.0001414126823631374, "loss": 3.4129, "step": 2129 }, { "epoch": 0.3, "learning_rate": 0.00014138379315325726, "loss": 3.4063, "step": 2130 }, { "epoch": 0.3, "learning_rate": 0.00014135490394337716, "loss": 3.2681, "step": 2131 }, { "epoch": 0.3, "learning_rate": 0.00014132601473349705, "loss": 3.456, "step": 2132 }, { "epoch": 0.3, "learning_rate": 0.00014129712552361694, "loss": 3.4014, "step": 2133 }, { "epoch": 0.3, "learning_rate": 0.0001412682363137368, "loss": 3.2214, "step": 2134 }, { "epoch": 0.3, "learning_rate": 0.0001412393471038567, "loss": 3.2428, "step": 2135 }, { "epoch": 0.3, "learning_rate": 0.0001412104578939766, "loss": 3.3887, "step": 2136 }, { "epoch": 0.3, "learning_rate": 0.0001411815686840965, "loss": 3.5157, "step": 2137 }, { "epoch": 0.3, "learning_rate": 0.0001411526794742164, "loss": 3.4656, "step": 2138 }, { "epoch": 0.3, "learning_rate": 0.00014112379026433628, "loss": 3.4659, "step": 2139 }, { "epoch": 0.3, "learning_rate": 0.00014109490105445617, "loss": 3.4634, "step": 2140 }, { "epoch": 0.3, "learning_rate": 0.00014106601184457607, "loss": 3.4335, "step": 2141 }, { "epoch": 0.3, "learning_rate": 0.00014103712263469596, "loss": 3.4129, "step": 2142 }, { "epoch": 0.31, "learning_rate": 0.00014100823342481583, "loss": 3.38, "step": 2143 }, { "epoch": 0.31, "learning_rate": 0.00014097934421493572, "loss": 3.5137, "step": 2144 }, { "epoch": 0.31, "learning_rate": 0.00014095045500505562, "loss": 3.5617, "step": 2145 }, { "epoch": 0.31, "learning_rate": 0.0001409215657951755, "loss": 3.395, "step": 2146 }, { "epoch": 0.31, "learning_rate": 0.0001408926765852954, "loss": 3.5115, "step": 2147 }, { "epoch": 0.31, "learning_rate": 0.00014086378737541527, "loss": 3.4942, "step": 2148 }, { "epoch": 0.31, "learning_rate": 0.0001408348981655352, "loss": 3.5737, "step": 2149 }, { "epoch": 0.31, "learning_rate": 0.00014080600895565509, "loss": 3.5475, "step": 2150 }, { "epoch": 0.31, "learning_rate": 0.00014077711974577495, "loss": 3.3564, "step": 2151 }, { "epoch": 0.31, "learning_rate": 0.00014074823053589485, "loss": 3.4725, "step": 2152 }, { "epoch": 0.31, "learning_rate": 0.00014071934132601474, "loss": 3.407, "step": 2153 }, { "epoch": 0.31, "learning_rate": 0.00014069045211613463, "loss": 3.5516, "step": 2154 }, { "epoch": 0.31, "learning_rate": 0.00014066156290625453, "loss": 3.5385, "step": 2155 }, { "epoch": 0.31, "learning_rate": 0.0001406326736963744, "loss": 3.35, "step": 2156 }, { "epoch": 0.31, "learning_rate": 0.0001406037844864943, "loss": 3.3215, "step": 2157 }, { "epoch": 0.31, "learning_rate": 0.00014057489527661418, "loss": 3.4591, "step": 2158 }, { "epoch": 0.31, "learning_rate": 0.00014054600606673408, "loss": 3.4866, "step": 2159 }, { "epoch": 0.31, "learning_rate": 0.00014051711685685397, "loss": 3.4184, "step": 2160 }, { "epoch": 0.31, "learning_rate": 0.00014048822764697386, "loss": 3.3715, "step": 2161 }, { "epoch": 0.31, "learning_rate": 0.00014045933843709376, "loss": 3.5308, "step": 2162 }, { "epoch": 0.31, "learning_rate": 0.00014043044922721365, "loss": 3.3307, "step": 2163 }, { "epoch": 0.31, "learning_rate": 0.00014040156001733354, "loss": 3.4918, "step": 2164 }, { "epoch": 0.31, "learning_rate": 0.0001403726708074534, "loss": 3.2267, "step": 2165 }, { "epoch": 0.31, "learning_rate": 0.0001403437815975733, "loss": 3.3426, "step": 2166 }, { "epoch": 0.31, "learning_rate": 0.0001403148923876932, "loss": 3.3965, "step": 2167 }, { "epoch": 0.31, "learning_rate": 0.0001402860031778131, "loss": 3.5155, "step": 2168 }, { "epoch": 0.31, "learning_rate": 0.000140257113967933, "loss": 3.3793, "step": 2169 }, { "epoch": 0.31, "learning_rate": 0.00014022822475805288, "loss": 3.5449, "step": 2170 }, { "epoch": 0.31, "learning_rate": 0.00014019933554817277, "loss": 3.4593, "step": 2171 }, { "epoch": 0.31, "learning_rate": 0.00014017044633829267, "loss": 3.3046, "step": 2172 }, { "epoch": 0.31, "learning_rate": 0.00014014155712841254, "loss": 3.1801, "step": 2173 }, { "epoch": 0.31, "learning_rate": 0.00014011266791853243, "loss": 3.4525, "step": 2174 }, { "epoch": 0.31, "learning_rate": 0.00014008377870865232, "loss": 3.4199, "step": 2175 }, { "epoch": 0.31, "learning_rate": 0.00014005488949877222, "loss": 3.5184, "step": 2176 }, { "epoch": 0.31, "learning_rate": 0.0001400260002888921, "loss": 3.4598, "step": 2177 }, { "epoch": 0.31, "learning_rate": 0.00013999711107901198, "loss": 3.4862, "step": 2178 }, { "epoch": 0.31, "learning_rate": 0.00013996822186913187, "loss": 3.4429, "step": 2179 }, { "epoch": 0.31, "learning_rate": 0.0001399393326592518, "loss": 3.5418, "step": 2180 }, { "epoch": 0.31, "learning_rate": 0.00013991044344937169, "loss": 3.4268, "step": 2181 }, { "epoch": 0.31, "learning_rate": 0.00013988155423949155, "loss": 3.2958, "step": 2182 }, { "epoch": 0.31, "learning_rate": 0.00013985266502961145, "loss": 3.4215, "step": 2183 }, { "epoch": 0.31, "learning_rate": 0.00013982377581973134, "loss": 3.3225, "step": 2184 }, { "epoch": 0.31, "learning_rate": 0.00013979488660985123, "loss": 3.3378, "step": 2185 }, { "epoch": 0.31, "learning_rate": 0.00013976599739997113, "loss": 3.4642, "step": 2186 }, { "epoch": 0.31, "learning_rate": 0.000139737108190091, "loss": 3.4571, "step": 2187 }, { "epoch": 0.31, "learning_rate": 0.0001397082189802109, "loss": 3.3121, "step": 2188 }, { "epoch": 0.31, "learning_rate": 0.00013967932977033078, "loss": 3.4845, "step": 2189 }, { "epoch": 0.31, "learning_rate": 0.00013965044056045068, "loss": 3.3634, "step": 2190 }, { "epoch": 0.31, "learning_rate": 0.00013962155135057057, "loss": 3.3096, "step": 2191 }, { "epoch": 0.31, "learning_rate": 0.00013959266214069046, "loss": 3.3263, "step": 2192 }, { "epoch": 0.31, "learning_rate": 0.00013956377293081036, "loss": 3.12, "step": 2193 }, { "epoch": 0.31, "learning_rate": 0.00013953488372093025, "loss": 3.5795, "step": 2194 }, { "epoch": 0.31, "learning_rate": 0.00013950599451105012, "loss": 3.4364, "step": 2195 }, { "epoch": 0.31, "learning_rate": 0.00013947710530117, "loss": 3.3533, "step": 2196 }, { "epoch": 0.31, "learning_rate": 0.0001394482160912899, "loss": 3.5455, "step": 2197 }, { "epoch": 0.31, "learning_rate": 0.0001394193268814098, "loss": 3.3429, "step": 2198 }, { "epoch": 0.31, "learning_rate": 0.0001393904376715297, "loss": 3.4561, "step": 2199 }, { "epoch": 0.31, "learning_rate": 0.00013936154846164956, "loss": 3.2739, "step": 2200 }, { "epoch": 0.31, "learning_rate": 0.00013933265925176948, "loss": 3.3866, "step": 2201 }, { "epoch": 0.31, "learning_rate": 0.00013930377004188937, "loss": 3.369, "step": 2202 }, { "epoch": 0.31, "learning_rate": 0.00013927488083200927, "loss": 3.4938, "step": 2203 }, { "epoch": 0.31, "learning_rate": 0.00013924599162212914, "loss": 3.4248, "step": 2204 }, { "epoch": 0.31, "learning_rate": 0.00013921710241224903, "loss": 3.4534, "step": 2205 }, { "epoch": 0.31, "learning_rate": 0.00013918821320236892, "loss": 3.4099, "step": 2206 }, { "epoch": 0.31, "learning_rate": 0.00013915932399248882, "loss": 3.4923, "step": 2207 }, { "epoch": 0.31, "learning_rate": 0.0001391304347826087, "loss": 3.4412, "step": 2208 }, { "epoch": 0.31, "learning_rate": 0.00013910154557272858, "loss": 3.4822, "step": 2209 }, { "epoch": 0.31, "learning_rate": 0.00013907265636284847, "loss": 3.3436, "step": 2210 }, { "epoch": 0.31, "learning_rate": 0.00013904376715296836, "loss": 3.3572, "step": 2211 }, { "epoch": 0.31, "learning_rate": 0.00013901487794308829, "loss": 3.3505, "step": 2212 }, { "epoch": 0.32, "learning_rate": 0.00013898598873320815, "loss": 3.408, "step": 2213 }, { "epoch": 0.32, "learning_rate": 0.00013895709952332805, "loss": 3.3081, "step": 2214 }, { "epoch": 0.32, "learning_rate": 0.00013892821031344794, "loss": 3.3781, "step": 2215 }, { "epoch": 0.32, "learning_rate": 0.00013889932110356783, "loss": 3.4703, "step": 2216 }, { "epoch": 0.32, "learning_rate": 0.0001388704318936877, "loss": 3.6122, "step": 2217 }, { "epoch": 0.32, "learning_rate": 0.0001388415426838076, "loss": 3.3728, "step": 2218 }, { "epoch": 0.32, "learning_rate": 0.0001388126534739275, "loss": 3.5355, "step": 2219 }, { "epoch": 0.32, "learning_rate": 0.00013878376426404738, "loss": 3.4331, "step": 2220 }, { "epoch": 0.32, "learning_rate": 0.00013875487505416728, "loss": 3.377, "step": 2221 }, { "epoch": 0.32, "learning_rate": 0.00013872598584428717, "loss": 3.5025, "step": 2222 }, { "epoch": 0.32, "learning_rate": 0.00013869709663440706, "loss": 3.5106, "step": 2223 }, { "epoch": 0.32, "learning_rate": 0.00013866820742452696, "loss": 3.5311, "step": 2224 }, { "epoch": 0.32, "learning_rate": 0.00013863931821464685, "loss": 3.4397, "step": 2225 }, { "epoch": 0.32, "learning_rate": 0.00013861042900476672, "loss": 3.3628, "step": 2226 }, { "epoch": 0.32, "learning_rate": 0.0001385815397948866, "loss": 3.4772, "step": 2227 }, { "epoch": 0.32, "learning_rate": 0.0001385526505850065, "loss": 3.5013, "step": 2228 }, { "epoch": 0.32, "learning_rate": 0.0001385237613751264, "loss": 3.5671, "step": 2229 }, { "epoch": 0.32, "learning_rate": 0.0001384948721652463, "loss": 3.498, "step": 2230 }, { "epoch": 0.32, "learning_rate": 0.00013846598295536616, "loss": 3.3978, "step": 2231 }, { "epoch": 0.32, "learning_rate": 0.00013843709374548605, "loss": 3.3056, "step": 2232 }, { "epoch": 0.32, "learning_rate": 0.00013840820453560597, "loss": 3.3484, "step": 2233 }, { "epoch": 0.32, "learning_rate": 0.00013837931532572587, "loss": 3.4505, "step": 2234 }, { "epoch": 0.32, "learning_rate": 0.00013835042611584574, "loss": 3.4581, "step": 2235 }, { "epoch": 0.32, "learning_rate": 0.00013832153690596563, "loss": 3.4996, "step": 2236 }, { "epoch": 0.32, "learning_rate": 0.00013829264769608552, "loss": 3.2491, "step": 2237 }, { "epoch": 0.32, "learning_rate": 0.00013826375848620542, "loss": 3.3702, "step": 2238 }, { "epoch": 0.32, "learning_rate": 0.00013823486927632528, "loss": 3.4381, "step": 2239 }, { "epoch": 0.32, "learning_rate": 0.00013820598006644518, "loss": 3.4453, "step": 2240 }, { "epoch": 0.32, "learning_rate": 0.00013817709085656507, "loss": 3.4205, "step": 2241 }, { "epoch": 0.32, "learning_rate": 0.00013814820164668497, "loss": 3.5151, "step": 2242 }, { "epoch": 0.32, "learning_rate": 0.00013811931243680486, "loss": 3.4931, "step": 2243 }, { "epoch": 0.32, "learning_rate": 0.00013809042322692475, "loss": 3.4466, "step": 2244 }, { "epoch": 0.32, "learning_rate": 0.00013806153401704465, "loss": 3.3914, "step": 2245 }, { "epoch": 0.32, "learning_rate": 0.00013803264480716454, "loss": 3.4542, "step": 2246 }, { "epoch": 0.32, "learning_rate": 0.00013800375559728443, "loss": 3.3532, "step": 2247 }, { "epoch": 0.32, "learning_rate": 0.0001379748663874043, "loss": 3.4223, "step": 2248 }, { "epoch": 0.32, "learning_rate": 0.0001379459771775242, "loss": 3.3836, "step": 2249 }, { "epoch": 0.32, "learning_rate": 0.0001379170879676441, "loss": 3.4089, "step": 2250 }, { "epoch": 0.32, "learning_rate": 0.00013788819875776398, "loss": 3.4003, "step": 2251 }, { "epoch": 0.32, "learning_rate": 0.00013785930954788388, "loss": 3.3875, "step": 2252 }, { "epoch": 0.32, "learning_rate": 0.00013783042033800374, "loss": 3.512, "step": 2253 }, { "epoch": 0.32, "learning_rate": 0.00013780153112812366, "loss": 3.3483, "step": 2254 }, { "epoch": 0.32, "learning_rate": 0.00013777264191824356, "loss": 3.4381, "step": 2255 }, { "epoch": 0.32, "learning_rate": 0.00013774375270836345, "loss": 3.4253, "step": 2256 }, { "epoch": 0.32, "learning_rate": 0.00013771486349848332, "loss": 3.4555, "step": 2257 }, { "epoch": 0.32, "learning_rate": 0.0001376859742886032, "loss": 3.4515, "step": 2258 }, { "epoch": 0.32, "learning_rate": 0.0001376570850787231, "loss": 3.4872, "step": 2259 }, { "epoch": 0.32, "learning_rate": 0.000137628195868843, "loss": 3.2705, "step": 2260 }, { "epoch": 0.32, "learning_rate": 0.00013759930665896287, "loss": 3.4221, "step": 2261 }, { "epoch": 0.32, "learning_rate": 0.00013757041744908276, "loss": 3.3614, "step": 2262 }, { "epoch": 0.32, "learning_rate": 0.00013754152823920265, "loss": 3.3684, "step": 2263 }, { "epoch": 0.32, "learning_rate": 0.00013751263902932258, "loss": 3.4306, "step": 2264 }, { "epoch": 0.32, "learning_rate": 0.00013748374981944244, "loss": 3.2772, "step": 2265 }, { "epoch": 0.32, "learning_rate": 0.00013745486060956234, "loss": 3.4369, "step": 2266 }, { "epoch": 0.32, "learning_rate": 0.00013742597139968223, "loss": 3.3247, "step": 2267 }, { "epoch": 0.32, "learning_rate": 0.00013739708218980212, "loss": 3.5047, "step": 2268 }, { "epoch": 0.32, "learning_rate": 0.00013736819297992202, "loss": 3.3833, "step": 2269 }, { "epoch": 0.32, "learning_rate": 0.00013733930377004188, "loss": 3.2106, "step": 2270 }, { "epoch": 0.32, "learning_rate": 0.00013731041456016178, "loss": 3.3727, "step": 2271 }, { "epoch": 0.32, "learning_rate": 0.00013728152535028167, "loss": 3.3919, "step": 2272 }, { "epoch": 0.32, "learning_rate": 0.00013725263614040157, "loss": 3.3544, "step": 2273 }, { "epoch": 0.32, "learning_rate": 0.00013722374693052146, "loss": 3.3658, "step": 2274 }, { "epoch": 0.32, "learning_rate": 0.00013719485772064135, "loss": 3.4047, "step": 2275 }, { "epoch": 0.32, "learning_rate": 0.00013716596851076125, "loss": 3.3533, "step": 2276 }, { "epoch": 0.32, "learning_rate": 0.00013713707930088114, "loss": 3.3707, "step": 2277 }, { "epoch": 0.32, "learning_rate": 0.00013710819009100103, "loss": 3.2264, "step": 2278 }, { "epoch": 0.32, "learning_rate": 0.0001370793008811209, "loss": 3.39, "step": 2279 }, { "epoch": 0.32, "learning_rate": 0.0001370504116712408, "loss": 3.457, "step": 2280 }, { "epoch": 0.32, "learning_rate": 0.0001370215224613607, "loss": 3.2658, "step": 2281 }, { "epoch": 0.32, "learning_rate": 0.00013699263325148058, "loss": 3.3738, "step": 2282 }, { "epoch": 0.33, "learning_rate": 0.00013696374404160045, "loss": 3.3054, "step": 2283 }, { "epoch": 0.33, "learning_rate": 0.00013693485483172034, "loss": 3.4937, "step": 2284 }, { "epoch": 0.33, "learning_rate": 0.00013690596562184026, "loss": 3.3848, "step": 2285 }, { "epoch": 0.33, "learning_rate": 0.00013687707641196016, "loss": 3.5145, "step": 2286 }, { "epoch": 0.33, "learning_rate": 0.00013684818720208002, "loss": 3.4478, "step": 2287 }, { "epoch": 0.33, "learning_rate": 0.00013681929799219992, "loss": 3.5072, "step": 2288 }, { "epoch": 0.33, "learning_rate": 0.0001367904087823198, "loss": 3.4196, "step": 2289 }, { "epoch": 0.33, "learning_rate": 0.0001367615195724397, "loss": 3.3121, "step": 2290 }, { "epoch": 0.33, "learning_rate": 0.0001367326303625596, "loss": 3.4513, "step": 2291 }, { "epoch": 0.33, "learning_rate": 0.00013670374115267947, "loss": 3.4903, "step": 2292 }, { "epoch": 0.33, "learning_rate": 0.00013667485194279936, "loss": 3.39, "step": 2293 }, { "epoch": 0.33, "learning_rate": 0.00013664596273291925, "loss": 3.4293, "step": 2294 }, { "epoch": 0.33, "learning_rate": 0.00013661707352303915, "loss": 3.3947, "step": 2295 }, { "epoch": 0.33, "learning_rate": 0.00013658818431315904, "loss": 3.376, "step": 2296 }, { "epoch": 0.33, "learning_rate": 0.00013655929510327894, "loss": 3.3409, "step": 2297 }, { "epoch": 0.33, "learning_rate": 0.00013653040589339883, "loss": 3.2811, "step": 2298 }, { "epoch": 0.33, "learning_rate": 0.00013650151668351872, "loss": 3.4879, "step": 2299 }, { "epoch": 0.33, "learning_rate": 0.0001364726274736386, "loss": 3.5118, "step": 2300 }, { "epoch": 0.33, "learning_rate": 0.00013644373826375848, "loss": 3.4129, "step": 2301 }, { "epoch": 0.33, "learning_rate": 0.00013641484905387838, "loss": 3.4909, "step": 2302 }, { "epoch": 0.33, "learning_rate": 0.00013638595984399827, "loss": 3.3344, "step": 2303 }, { "epoch": 0.33, "learning_rate": 0.00013635707063411817, "loss": 3.2765, "step": 2304 }, { "epoch": 0.33, "learning_rate": 0.00013632818142423803, "loss": 3.4694, "step": 2305 }, { "epoch": 0.33, "learning_rate": 0.00013629929221435795, "loss": 3.3702, "step": 2306 }, { "epoch": 0.33, "learning_rate": 0.00013627040300447785, "loss": 3.3879, "step": 2307 }, { "epoch": 0.33, "learning_rate": 0.00013624151379459774, "loss": 3.4246, "step": 2308 }, { "epoch": 0.33, "learning_rate": 0.0001362126245847176, "loss": 3.3766, "step": 2309 }, { "epoch": 0.33, "learning_rate": 0.0001361837353748375, "loss": 3.4544, "step": 2310 }, { "epoch": 0.33, "learning_rate": 0.0001361548461649574, "loss": 3.3527, "step": 2311 }, { "epoch": 0.33, "learning_rate": 0.0001361259569550773, "loss": 3.3898, "step": 2312 }, { "epoch": 0.33, "learning_rate": 0.00013609706774519718, "loss": 3.3793, "step": 2313 }, { "epoch": 0.33, "learning_rate": 0.00013606817853531705, "loss": 3.3719, "step": 2314 }, { "epoch": 0.33, "learning_rate": 0.00013603928932543694, "loss": 3.5055, "step": 2315 }, { "epoch": 0.33, "learning_rate": 0.00013601040011555684, "loss": 3.4176, "step": 2316 }, { "epoch": 0.33, "learning_rate": 0.00013598151090567676, "loss": 3.1551, "step": 2317 }, { "epoch": 0.33, "learning_rate": 0.00013595262169579662, "loss": 3.4452, "step": 2318 }, { "epoch": 0.33, "learning_rate": 0.00013592373248591652, "loss": 3.351, "step": 2319 }, { "epoch": 0.33, "learning_rate": 0.0001358948432760364, "loss": 3.3997, "step": 2320 }, { "epoch": 0.33, "learning_rate": 0.0001358659540661563, "loss": 3.2877, "step": 2321 }, { "epoch": 0.33, "learning_rate": 0.00013583706485627617, "loss": 3.3394, "step": 2322 }, { "epoch": 0.33, "learning_rate": 0.00013580817564639607, "loss": 3.5017, "step": 2323 }, { "epoch": 0.33, "learning_rate": 0.00013577928643651596, "loss": 3.306, "step": 2324 }, { "epoch": 0.33, "learning_rate": 0.00013575039722663585, "loss": 3.2135, "step": 2325 }, { "epoch": 0.33, "learning_rate": 0.00013572150801675575, "loss": 3.1845, "step": 2326 }, { "epoch": 0.33, "learning_rate": 0.00013569261880687564, "loss": 3.1219, "step": 2327 }, { "epoch": 0.33, "learning_rate": 0.00013566372959699554, "loss": 3.4218, "step": 2328 }, { "epoch": 0.33, "learning_rate": 0.00013563484038711543, "loss": 3.425, "step": 2329 }, { "epoch": 0.33, "learning_rate": 0.00013560595117723532, "loss": 3.3029, "step": 2330 }, { "epoch": 0.33, "learning_rate": 0.0001355770619673552, "loss": 3.3517, "step": 2331 }, { "epoch": 0.33, "learning_rate": 0.00013554817275747508, "loss": 3.3517, "step": 2332 }, { "epoch": 0.33, "learning_rate": 0.00013551928354759498, "loss": 3.444, "step": 2333 }, { "epoch": 0.33, "learning_rate": 0.00013549039433771487, "loss": 3.3205, "step": 2334 }, { "epoch": 0.33, "learning_rate": 0.00013546150512783477, "loss": 3.4313, "step": 2335 }, { "epoch": 0.33, "learning_rate": 0.00013543261591795463, "loss": 3.4755, "step": 2336 }, { "epoch": 0.33, "learning_rate": 0.00013540372670807453, "loss": 3.3698, "step": 2337 }, { "epoch": 0.33, "learning_rate": 0.00013537483749819445, "loss": 3.5406, "step": 2338 }, { "epoch": 0.33, "learning_rate": 0.00013534594828831434, "loss": 3.4552, "step": 2339 }, { "epoch": 0.33, "learning_rate": 0.0001353170590784342, "loss": 3.3954, "step": 2340 }, { "epoch": 0.33, "learning_rate": 0.0001352881698685541, "loss": 3.3485, "step": 2341 }, { "epoch": 0.33, "learning_rate": 0.000135259280658674, "loss": 3.4813, "step": 2342 }, { "epoch": 0.33, "learning_rate": 0.0001352303914487939, "loss": 3.4158, "step": 2343 }, { "epoch": 0.33, "learning_rate": 0.00013520150223891376, "loss": 3.416, "step": 2344 }, { "epoch": 0.33, "learning_rate": 0.00013517261302903365, "loss": 3.4413, "step": 2345 }, { "epoch": 0.33, "learning_rate": 0.00013514372381915354, "loss": 3.4087, "step": 2346 }, { "epoch": 0.33, "learning_rate": 0.00013511483460927344, "loss": 3.4527, "step": 2347 }, { "epoch": 0.33, "learning_rate": 0.00013508594539939333, "loss": 3.4639, "step": 2348 }, { "epoch": 0.33, "learning_rate": 0.00013505705618951322, "loss": 3.2068, "step": 2349 }, { "epoch": 0.33, "learning_rate": 0.00013502816697963312, "loss": 3.5575, "step": 2350 }, { "epoch": 0.33, "learning_rate": 0.000134999277769753, "loss": 3.1795, "step": 2351 }, { "epoch": 0.33, "learning_rate": 0.0001349703885598729, "loss": 3.4778, "step": 2352 }, { "epoch": 0.34, "learning_rate": 0.00013494149934999277, "loss": 3.4111, "step": 2353 }, { "epoch": 0.34, "learning_rate": 0.00013491261014011267, "loss": 3.1266, "step": 2354 }, { "epoch": 0.34, "learning_rate": 0.00013488372093023256, "loss": 3.3929, "step": 2355 }, { "epoch": 0.34, "learning_rate": 0.00013485483172035245, "loss": 3.3329, "step": 2356 }, { "epoch": 0.34, "learning_rate": 0.00013482594251047235, "loss": 3.4474, "step": 2357 }, { "epoch": 0.34, "learning_rate": 0.00013479705330059222, "loss": 3.4609, "step": 2358 }, { "epoch": 0.34, "learning_rate": 0.00013476816409071214, "loss": 3.3622, "step": 2359 }, { "epoch": 0.34, "learning_rate": 0.00013473927488083203, "loss": 3.3481, "step": 2360 }, { "epoch": 0.34, "learning_rate": 0.00013471038567095192, "loss": 3.4563, "step": 2361 }, { "epoch": 0.34, "learning_rate": 0.0001346814964610718, "loss": 3.295, "step": 2362 }, { "epoch": 0.34, "learning_rate": 0.00013465260725119168, "loss": 3.4981, "step": 2363 }, { "epoch": 0.34, "learning_rate": 0.00013462371804131158, "loss": 3.4384, "step": 2364 }, { "epoch": 0.34, "learning_rate": 0.00013459482883143147, "loss": 3.3174, "step": 2365 }, { "epoch": 0.34, "learning_rate": 0.00013456593962155134, "loss": 3.431, "step": 2366 }, { "epoch": 0.34, "learning_rate": 0.00013453705041167123, "loss": 3.4281, "step": 2367 }, { "epoch": 0.34, "learning_rate": 0.00013450816120179113, "loss": 3.4862, "step": 2368 }, { "epoch": 0.34, "learning_rate": 0.00013447927199191105, "loss": 3.4451, "step": 2369 }, { "epoch": 0.34, "learning_rate": 0.00013445038278203091, "loss": 3.3016, "step": 2370 }, { "epoch": 0.34, "learning_rate": 0.0001344214935721508, "loss": 3.3705, "step": 2371 }, { "epoch": 0.34, "learning_rate": 0.0001343926043622707, "loss": 3.3258, "step": 2372 }, { "epoch": 0.34, "learning_rate": 0.0001343637151523906, "loss": 3.3826, "step": 2373 }, { "epoch": 0.34, "learning_rate": 0.0001343348259425105, "loss": 3.3718, "step": 2374 }, { "epoch": 0.34, "learning_rate": 0.00013430593673263036, "loss": 3.4815, "step": 2375 }, { "epoch": 0.34, "learning_rate": 0.00013427704752275025, "loss": 3.3265, "step": 2376 }, { "epoch": 0.34, "learning_rate": 0.00013424815831287014, "loss": 3.4326, "step": 2377 }, { "epoch": 0.34, "learning_rate": 0.00013421926910299004, "loss": 3.3151, "step": 2378 }, { "epoch": 0.34, "learning_rate": 0.00013419037989310993, "loss": 3.324, "step": 2379 }, { "epoch": 0.34, "learning_rate": 0.00013416149068322983, "loss": 3.2986, "step": 2380 }, { "epoch": 0.34, "learning_rate": 0.00013413260147334972, "loss": 3.3388, "step": 2381 }, { "epoch": 0.34, "learning_rate": 0.0001341037122634696, "loss": 3.2725, "step": 2382 }, { "epoch": 0.34, "learning_rate": 0.0001340748230535895, "loss": 3.4595, "step": 2383 }, { "epoch": 0.34, "learning_rate": 0.00013404593384370937, "loss": 3.4725, "step": 2384 }, { "epoch": 0.34, "learning_rate": 0.00013401704463382927, "loss": 3.4385, "step": 2385 }, { "epoch": 0.34, "learning_rate": 0.00013398815542394916, "loss": 3.3203, "step": 2386 }, { "epoch": 0.34, "learning_rate": 0.00013395926621406905, "loss": 3.5133, "step": 2387 }, { "epoch": 0.34, "learning_rate": 0.00013393037700418892, "loss": 3.4137, "step": 2388 }, { "epoch": 0.34, "learning_rate": 0.00013390148779430882, "loss": 3.4752, "step": 2389 }, { "epoch": 0.34, "learning_rate": 0.00013387259858442874, "loss": 3.3159, "step": 2390 }, { "epoch": 0.34, "learning_rate": 0.00013384370937454863, "loss": 3.37, "step": 2391 }, { "epoch": 0.34, "learning_rate": 0.0001338148201646685, "loss": 3.4084, "step": 2392 }, { "epoch": 0.34, "learning_rate": 0.0001337859309547884, "loss": 3.4467, "step": 2393 }, { "epoch": 0.34, "learning_rate": 0.00013375704174490828, "loss": 3.3266, "step": 2394 }, { "epoch": 0.34, "learning_rate": 0.00013372815253502818, "loss": 3.2987, "step": 2395 }, { "epoch": 0.34, "learning_rate": 0.00013369926332514807, "loss": 3.3875, "step": 2396 }, { "epoch": 0.34, "learning_rate": 0.00013367037411526794, "loss": 3.417, "step": 2397 }, { "epoch": 0.34, "learning_rate": 0.00013364148490538783, "loss": 3.4194, "step": 2398 }, { "epoch": 0.34, "learning_rate": 0.00013361259569550773, "loss": 3.4125, "step": 2399 }, { "epoch": 0.34, "learning_rate": 0.00013358370648562765, "loss": 3.3874, "step": 2400 }, { "epoch": 0.34, "learning_rate": 0.00013355481727574751, "loss": 3.4618, "step": 2401 }, { "epoch": 0.34, "learning_rate": 0.0001335259280658674, "loss": 3.4174, "step": 2402 }, { "epoch": 0.34, "learning_rate": 0.0001334970388559873, "loss": 3.3945, "step": 2403 }, { "epoch": 0.34, "learning_rate": 0.0001334681496461072, "loss": 3.4469, "step": 2404 }, { "epoch": 0.34, "learning_rate": 0.0001334392604362271, "loss": 3.5016, "step": 2405 }, { "epoch": 0.34, "learning_rate": 0.00013341037122634696, "loss": 3.4477, "step": 2406 }, { "epoch": 0.34, "learning_rate": 0.00013338148201646685, "loss": 3.3503, "step": 2407 }, { "epoch": 0.34, "learning_rate": 0.00013335259280658674, "loss": 3.3697, "step": 2408 }, { "epoch": 0.34, "learning_rate": 0.00013332370359670664, "loss": 3.2187, "step": 2409 }, { "epoch": 0.34, "learning_rate": 0.0001332948143868265, "loss": 3.3541, "step": 2410 }, { "epoch": 0.34, "learning_rate": 0.00013326592517694643, "loss": 3.4176, "step": 2411 }, { "epoch": 0.34, "learning_rate": 0.00013323703596706632, "loss": 3.3427, "step": 2412 }, { "epoch": 0.34, "learning_rate": 0.0001332081467571862, "loss": 3.3391, "step": 2413 }, { "epoch": 0.34, "learning_rate": 0.00013317925754730608, "loss": 3.392, "step": 2414 }, { "epoch": 0.34, "learning_rate": 0.00013315036833742597, "loss": 3.3353, "step": 2415 }, { "epoch": 0.34, "learning_rate": 0.00013312147912754587, "loss": 3.4673, "step": 2416 }, { "epoch": 0.34, "learning_rate": 0.00013309258991766576, "loss": 3.315, "step": 2417 }, { "epoch": 0.34, "learning_rate": 0.00013306370070778565, "loss": 3.1815, "step": 2418 }, { "epoch": 0.34, "learning_rate": 0.00013303481149790552, "loss": 3.3533, "step": 2419 }, { "epoch": 0.34, "learning_rate": 0.00013300592228802542, "loss": 3.2504, "step": 2420 }, { "epoch": 0.34, "learning_rate": 0.00013297703307814534, "loss": 3.4406, "step": 2421 }, { "epoch": 0.34, "learning_rate": 0.00013294814386826523, "loss": 3.3511, "step": 2422 }, { "epoch": 0.34, "learning_rate": 0.0001329192546583851, "loss": 3.4214, "step": 2423 }, { "epoch": 0.35, "learning_rate": 0.000132890365448505, "loss": 3.3204, "step": 2424 }, { "epoch": 0.35, "learning_rate": 0.00013286147623862488, "loss": 3.4374, "step": 2425 }, { "epoch": 0.35, "learning_rate": 0.00013283258702874478, "loss": 3.405, "step": 2426 }, { "epoch": 0.35, "learning_rate": 0.00013280369781886465, "loss": 3.2408, "step": 2427 }, { "epoch": 0.35, "learning_rate": 0.00013277480860898454, "loss": 3.3318, "step": 2428 }, { "epoch": 0.35, "learning_rate": 0.00013274591939910443, "loss": 3.4731, "step": 2429 }, { "epoch": 0.35, "learning_rate": 0.00013271703018922433, "loss": 3.4582, "step": 2430 }, { "epoch": 0.35, "learning_rate": 0.00013268814097934422, "loss": 3.4412, "step": 2431 }, { "epoch": 0.35, "learning_rate": 0.00013265925176946411, "loss": 3.3804, "step": 2432 }, { "epoch": 0.35, "learning_rate": 0.000132630362559584, "loss": 3.4339, "step": 2433 }, { "epoch": 0.35, "learning_rate": 0.0001326014733497039, "loss": 3.2806, "step": 2434 }, { "epoch": 0.35, "learning_rate": 0.0001325725841398238, "loss": 3.2909, "step": 2435 }, { "epoch": 0.35, "learning_rate": 0.00013254369492994366, "loss": 3.4289, "step": 2436 }, { "epoch": 0.35, "learning_rate": 0.00013251480572006356, "loss": 3.1086, "step": 2437 }, { "epoch": 0.35, "learning_rate": 0.00013248591651018345, "loss": 3.4366, "step": 2438 }, { "epoch": 0.35, "learning_rate": 0.00013245702730030334, "loss": 3.4952, "step": 2439 }, { "epoch": 0.35, "learning_rate": 0.00013242813809042324, "loss": 3.2876, "step": 2440 }, { "epoch": 0.35, "learning_rate": 0.0001323992488805431, "loss": 3.3862, "step": 2441 }, { "epoch": 0.35, "learning_rate": 0.00013237035967066303, "loss": 3.3508, "step": 2442 }, { "epoch": 0.35, "learning_rate": 0.00013234147046078292, "loss": 3.5075, "step": 2443 }, { "epoch": 0.35, "learning_rate": 0.0001323125812509028, "loss": 3.0807, "step": 2444 }, { "epoch": 0.35, "learning_rate": 0.00013228369204102268, "loss": 3.3371, "step": 2445 }, { "epoch": 0.35, "learning_rate": 0.00013225480283114257, "loss": 3.3085, "step": 2446 }, { "epoch": 0.35, "learning_rate": 0.00013222591362126247, "loss": 3.4499, "step": 2447 }, { "epoch": 0.35, "learning_rate": 0.00013219702441138236, "loss": 3.474, "step": 2448 }, { "epoch": 0.35, "learning_rate": 0.00013216813520150223, "loss": 3.4154, "step": 2449 }, { "epoch": 0.35, "learning_rate": 0.00013213924599162212, "loss": 3.5276, "step": 2450 }, { "epoch": 0.35, "learning_rate": 0.00013211035678174202, "loss": 3.432, "step": 2451 }, { "epoch": 0.35, "learning_rate": 0.0001320814675718619, "loss": 3.3362, "step": 2452 }, { "epoch": 0.35, "learning_rate": 0.0001320525783619818, "loss": 3.3559, "step": 2453 }, { "epoch": 0.35, "learning_rate": 0.0001320236891521017, "loss": 3.2714, "step": 2454 }, { "epoch": 0.35, "learning_rate": 0.0001319947999422216, "loss": 3.2934, "step": 2455 }, { "epoch": 0.35, "learning_rate": 0.00013196591073234148, "loss": 3.3839, "step": 2456 }, { "epoch": 0.35, "learning_rate": 0.00013193702152246138, "loss": 3.3512, "step": 2457 }, { "epoch": 0.35, "learning_rate": 0.00013190813231258125, "loss": 3.2705, "step": 2458 }, { "epoch": 0.35, "learning_rate": 0.00013187924310270114, "loss": 3.4098, "step": 2459 }, { "epoch": 0.35, "learning_rate": 0.00013185035389282103, "loss": 3.2906, "step": 2460 }, { "epoch": 0.35, "learning_rate": 0.00013182146468294093, "loss": 3.389, "step": 2461 }, { "epoch": 0.35, "learning_rate": 0.00013179257547306082, "loss": 3.354, "step": 2462 }, { "epoch": 0.35, "learning_rate": 0.00013176368626318071, "loss": 3.4475, "step": 2463 }, { "epoch": 0.35, "learning_rate": 0.0001317347970533006, "loss": 3.2476, "step": 2464 }, { "epoch": 0.35, "eval_loss": 3.5674850940704346, "eval_runtime": 473.0302, "eval_samples_per_second": 43.31, "eval_steps_per_second": 14.437, "step": 2464 }, { "epoch": 0.35, "learning_rate": 0.0001317059078434205, "loss": 3.4002, "step": 2465 }, { "epoch": 0.35, "learning_rate": 0.0001316770186335404, "loss": 3.429, "step": 2466 }, { "epoch": 0.35, "learning_rate": 0.00013164812942366026, "loss": 3.3282, "step": 2467 }, { "epoch": 0.35, "learning_rate": 0.00013161924021378016, "loss": 3.1434, "step": 2468 }, { "epoch": 0.35, "learning_rate": 0.00013159035100390005, "loss": 3.3893, "step": 2469 }, { "epoch": 0.35, "learning_rate": 0.00013156146179401994, "loss": 3.3807, "step": 2470 }, { "epoch": 0.35, "learning_rate": 0.0001315325725841398, "loss": 3.374, "step": 2471 }, { "epoch": 0.35, "learning_rate": 0.0001315036833742597, "loss": 3.3385, "step": 2472 }, { "epoch": 0.35, "learning_rate": 0.0001314747941643796, "loss": 3.3285, "step": 2473 }, { "epoch": 0.35, "learning_rate": 0.00013144590495449952, "loss": 3.4922, "step": 2474 }, { "epoch": 0.35, "learning_rate": 0.00013141701574461939, "loss": 3.4045, "step": 2475 }, { "epoch": 0.35, "learning_rate": 0.00013138812653473928, "loss": 3.3272, "step": 2476 }, { "epoch": 0.35, "learning_rate": 0.00013135923732485917, "loss": 3.373, "step": 2477 }, { "epoch": 0.35, "learning_rate": 0.00013133034811497907, "loss": 3.3136, "step": 2478 }, { "epoch": 0.35, "learning_rate": 0.00013130145890509896, "loss": 3.2604, "step": 2479 }, { "epoch": 0.35, "learning_rate": 0.00013127256969521883, "loss": 3.4602, "step": 2480 }, { "epoch": 0.35, "learning_rate": 0.00013124368048533872, "loss": 3.361, "step": 2481 }, { "epoch": 0.35, "learning_rate": 0.00013121479127545862, "loss": 3.3301, "step": 2482 }, { "epoch": 0.35, "learning_rate": 0.0001311859020655785, "loss": 3.3257, "step": 2483 }, { "epoch": 0.35, "learning_rate": 0.0001311570128556984, "loss": 3.3816, "step": 2484 }, { "epoch": 0.35, "learning_rate": 0.0001311281236458183, "loss": 3.4853, "step": 2485 }, { "epoch": 0.35, "learning_rate": 0.0001310992344359382, "loss": 3.4588, "step": 2486 }, { "epoch": 0.35, "learning_rate": 0.00013107034522605809, "loss": 3.369, "step": 2487 }, { "epoch": 0.35, "learning_rate": 0.00013104145601617798, "loss": 3.3041, "step": 2488 }, { "epoch": 0.35, "learning_rate": 0.00013101256680629785, "loss": 3.4367, "step": 2489 }, { "epoch": 0.35, "learning_rate": 0.00013098367759641774, "loss": 3.2189, "step": 2490 }, { "epoch": 0.35, "learning_rate": 0.00013095478838653763, "loss": 3.2695, "step": 2491 }, { "epoch": 0.35, "learning_rate": 0.00013092589917665753, "loss": 3.4266, "step": 2492 }, { "epoch": 0.35, "learning_rate": 0.0001308970099667774, "loss": 3.4851, "step": 2493 }, { "epoch": 0.36, "learning_rate": 0.0001308681207568973, "loss": 3.4867, "step": 2494 }, { "epoch": 0.36, "learning_rate": 0.0001308392315470172, "loss": 3.5863, "step": 2495 }, { "epoch": 0.36, "learning_rate": 0.0001308103423371371, "loss": 3.3697, "step": 2496 }, { "epoch": 0.36, "learning_rate": 0.00013078145312725697, "loss": 3.4082, "step": 2497 }, { "epoch": 0.36, "learning_rate": 0.00013075256391737686, "loss": 3.4489, "step": 2498 }, { "epoch": 0.36, "learning_rate": 0.00013072367470749676, "loss": 3.4199, "step": 2499 }, { "epoch": 0.36, "learning_rate": 0.00013069478549761665, "loss": 3.354, "step": 2500 }, { "epoch": 0.36, "learning_rate": 0.00013066589628773654, "loss": 3.4063, "step": 2501 }, { "epoch": 0.36, "learning_rate": 0.0001306370070778564, "loss": 3.4643, "step": 2502 }, { "epoch": 0.36, "learning_rate": 0.0001306081178679763, "loss": 3.3565, "step": 2503 }, { "epoch": 0.36, "learning_rate": 0.0001305792286580962, "loss": 3.4586, "step": 2504 }, { "epoch": 0.36, "learning_rate": 0.00013055033944821612, "loss": 3.4305, "step": 2505 }, { "epoch": 0.36, "learning_rate": 0.00013052145023833599, "loss": 3.4861, "step": 2506 }, { "epoch": 0.36, "learning_rate": 0.00013049256102845588, "loss": 3.2312, "step": 2507 }, { "epoch": 0.36, "learning_rate": 0.00013046367181857577, "loss": 3.2431, "step": 2508 }, { "epoch": 0.36, "learning_rate": 0.00013043478260869567, "loss": 3.5262, "step": 2509 }, { "epoch": 0.36, "learning_rate": 0.00013040589339881556, "loss": 3.3597, "step": 2510 }, { "epoch": 0.36, "learning_rate": 0.00013037700418893543, "loss": 3.3521, "step": 2511 }, { "epoch": 0.36, "learning_rate": 0.00013034811497905532, "loss": 3.2393, "step": 2512 }, { "epoch": 0.36, "learning_rate": 0.00013031922576917522, "loss": 3.494, "step": 2513 }, { "epoch": 0.36, "learning_rate": 0.0001302903365592951, "loss": 3.5039, "step": 2514 }, { "epoch": 0.36, "learning_rate": 0.00013026144734941498, "loss": 3.3433, "step": 2515 }, { "epoch": 0.36, "learning_rate": 0.0001302325581395349, "loss": 3.4088, "step": 2516 }, { "epoch": 0.36, "learning_rate": 0.0001302036689296548, "loss": 3.2848, "step": 2517 }, { "epoch": 0.36, "learning_rate": 0.00013017477971977469, "loss": 3.4788, "step": 2518 }, { "epoch": 0.36, "learning_rate": 0.00013014589050989455, "loss": 3.2091, "step": 2519 }, { "epoch": 0.36, "learning_rate": 0.00013011700130001445, "loss": 3.3265, "step": 2520 }, { "epoch": 0.36, "learning_rate": 0.00013008811209013434, "loss": 3.4179, "step": 2521 }, { "epoch": 0.36, "learning_rate": 0.00013005922288025423, "loss": 3.4583, "step": 2522 }, { "epoch": 0.36, "learning_rate": 0.00013003033367037413, "loss": 3.4588, "step": 2523 }, { "epoch": 0.36, "learning_rate": 0.000130001444460494, "loss": 3.3187, "step": 2524 }, { "epoch": 0.36, "learning_rate": 0.0001299725552506139, "loss": 3.3585, "step": 2525 }, { "epoch": 0.36, "learning_rate": 0.0001299436660407338, "loss": 3.2796, "step": 2526 }, { "epoch": 0.36, "learning_rate": 0.0001299147768308537, "loss": 3.3668, "step": 2527 }, { "epoch": 0.36, "learning_rate": 0.00012988588762097357, "loss": 3.2533, "step": 2528 }, { "epoch": 0.36, "learning_rate": 0.00012985699841109346, "loss": 3.5402, "step": 2529 }, { "epoch": 0.36, "learning_rate": 0.00012982810920121336, "loss": 3.336, "step": 2530 }, { "epoch": 0.36, "learning_rate": 0.00012979921999133325, "loss": 3.4623, "step": 2531 }, { "epoch": 0.36, "learning_rate": 0.00012977033078145314, "loss": 3.4201, "step": 2532 }, { "epoch": 0.36, "learning_rate": 0.000129741441571573, "loss": 3.3559, "step": 2533 }, { "epoch": 0.36, "learning_rate": 0.0001297125523616929, "loss": 3.3933, "step": 2534 }, { "epoch": 0.36, "learning_rate": 0.0001296836631518128, "loss": 3.4174, "step": 2535 }, { "epoch": 0.36, "learning_rate": 0.0001296547739419327, "loss": 3.3951, "step": 2536 }, { "epoch": 0.36, "learning_rate": 0.0001296258847320526, "loss": 3.2896, "step": 2537 }, { "epoch": 0.36, "learning_rate": 0.00012959699552217248, "loss": 3.4864, "step": 2538 }, { "epoch": 0.36, "learning_rate": 0.00012956810631229237, "loss": 3.4116, "step": 2539 }, { "epoch": 0.36, "learning_rate": 0.00012953921710241227, "loss": 3.4263, "step": 2540 }, { "epoch": 0.36, "learning_rate": 0.00012951032789253213, "loss": 3.4341, "step": 2541 }, { "epoch": 0.36, "learning_rate": 0.00012948143868265203, "loss": 3.2924, "step": 2542 }, { "epoch": 0.36, "learning_rate": 0.00012945254947277192, "loss": 3.3333, "step": 2543 }, { "epoch": 0.36, "learning_rate": 0.00012942366026289182, "loss": 3.3644, "step": 2544 }, { "epoch": 0.36, "learning_rate": 0.0001293947710530117, "loss": 3.3937, "step": 2545 }, { "epoch": 0.36, "learning_rate": 0.00012936588184313158, "loss": 3.3669, "step": 2546 }, { "epoch": 0.36, "learning_rate": 0.0001293369926332515, "loss": 3.4506, "step": 2547 }, { "epoch": 0.36, "learning_rate": 0.0001293081034233714, "loss": 3.4005, "step": 2548 }, { "epoch": 0.36, "learning_rate": 0.00012927921421349129, "loss": 3.347, "step": 2549 }, { "epoch": 0.36, "learning_rate": 0.00012925032500361115, "loss": 3.3494, "step": 2550 }, { "epoch": 0.36, "learning_rate": 0.00012922143579373105, "loss": 3.3931, "step": 2551 }, { "epoch": 0.36, "learning_rate": 0.00012919254658385094, "loss": 3.4486, "step": 2552 }, { "epoch": 0.36, "learning_rate": 0.00012916365737397083, "loss": 3.3955, "step": 2553 }, { "epoch": 0.36, "learning_rate": 0.0001291347681640907, "loss": 3.323, "step": 2554 }, { "epoch": 0.36, "learning_rate": 0.0001291058789542106, "loss": 3.4641, "step": 2555 }, { "epoch": 0.36, "learning_rate": 0.0001290769897443305, "loss": 3.4741, "step": 2556 }, { "epoch": 0.36, "learning_rate": 0.00012904810053445038, "loss": 3.3527, "step": 2557 }, { "epoch": 0.36, "learning_rate": 0.00012901921132457028, "loss": 3.4107, "step": 2558 }, { "epoch": 0.36, "learning_rate": 0.00012899032211469017, "loss": 3.3844, "step": 2559 }, { "epoch": 0.36, "learning_rate": 0.00012896143290481006, "loss": 3.3398, "step": 2560 }, { "epoch": 0.36, "learning_rate": 0.00012893254369492996, "loss": 3.3155, "step": 2561 }, { "epoch": 0.36, "learning_rate": 0.00012890365448504985, "loss": 3.4171, "step": 2562 }, { "epoch": 0.36, "learning_rate": 0.00012887476527516972, "loss": 3.3677, "step": 2563 }, { "epoch": 0.37, "learning_rate": 0.0001288458760652896, "loss": 3.1637, "step": 2564 }, { "epoch": 0.37, "learning_rate": 0.0001288169868554095, "loss": 3.3501, "step": 2565 }, { "epoch": 0.37, "learning_rate": 0.0001287880976455294, "loss": 3.2347, "step": 2566 }, { "epoch": 0.37, "learning_rate": 0.0001287592084356493, "loss": 3.3296, "step": 2567 }, { "epoch": 0.37, "learning_rate": 0.0001287303192257692, "loss": 3.4267, "step": 2568 }, { "epoch": 0.37, "learning_rate": 0.00012870143001588908, "loss": 3.3299, "step": 2569 }, { "epoch": 0.37, "learning_rate": 0.00012867254080600897, "loss": 3.3744, "step": 2570 }, { "epoch": 0.37, "learning_rate": 0.00012864365159612887, "loss": 3.4453, "step": 2571 }, { "epoch": 0.37, "learning_rate": 0.00012861476238624873, "loss": 3.4061, "step": 2572 }, { "epoch": 0.37, "learning_rate": 0.00012858587317636863, "loss": 3.3093, "step": 2573 }, { "epoch": 0.37, "learning_rate": 0.00012855698396648852, "loss": 3.1693, "step": 2574 }, { "epoch": 0.37, "learning_rate": 0.00012852809475660842, "loss": 3.1817, "step": 2575 }, { "epoch": 0.37, "learning_rate": 0.00012849920554672828, "loss": 3.4069, "step": 2576 }, { "epoch": 0.37, "learning_rate": 0.00012847031633684818, "loss": 3.5068, "step": 2577 }, { "epoch": 0.37, "learning_rate": 0.0001284414271269681, "loss": 3.3693, "step": 2578 }, { "epoch": 0.37, "learning_rate": 0.000128412537917088, "loss": 3.3957, "step": 2579 }, { "epoch": 0.37, "learning_rate": 0.00012838364870720786, "loss": 3.36, "step": 2580 }, { "epoch": 0.37, "learning_rate": 0.00012835475949732775, "loss": 3.3984, "step": 2581 }, { "epoch": 0.37, "learning_rate": 0.00012832587028744765, "loss": 3.1863, "step": 2582 }, { "epoch": 0.37, "learning_rate": 0.00012829698107756754, "loss": 3.3402, "step": 2583 }, { "epoch": 0.37, "learning_rate": 0.00012826809186768743, "loss": 3.3369, "step": 2584 }, { "epoch": 0.37, "learning_rate": 0.0001282392026578073, "loss": 3.3675, "step": 2585 }, { "epoch": 0.37, "learning_rate": 0.0001282103134479272, "loss": 3.3442, "step": 2586 }, { "epoch": 0.37, "learning_rate": 0.0001281814242380471, "loss": 3.4422, "step": 2587 }, { "epoch": 0.37, "learning_rate": 0.00012815253502816698, "loss": 3.2933, "step": 2588 }, { "epoch": 0.37, "learning_rate": 0.00012812364581828688, "loss": 3.4378, "step": 2589 }, { "epoch": 0.37, "learning_rate": 0.00012809475660840677, "loss": 3.4258, "step": 2590 }, { "epoch": 0.37, "learning_rate": 0.00012806586739852666, "loss": 3.2842, "step": 2591 }, { "epoch": 0.37, "learning_rate": 0.00012803697818864656, "loss": 3.0408, "step": 2592 }, { "epoch": 0.37, "learning_rate": 0.00012800808897876645, "loss": 3.2241, "step": 2593 }, { "epoch": 0.37, "learning_rate": 0.00012797919976888632, "loss": 3.4057, "step": 2594 }, { "epoch": 0.37, "learning_rate": 0.0001279503105590062, "loss": 3.3407, "step": 2595 }, { "epoch": 0.37, "learning_rate": 0.0001279214213491261, "loss": 3.4498, "step": 2596 }, { "epoch": 0.37, "learning_rate": 0.000127892532139246, "loss": 3.4478, "step": 2597 }, { "epoch": 0.37, "learning_rate": 0.00012786364292936587, "loss": 3.3925, "step": 2598 }, { "epoch": 0.37, "learning_rate": 0.0001278347537194858, "loss": 3.3481, "step": 2599 }, { "epoch": 0.37, "learning_rate": 0.00012780586450960568, "loss": 3.3213, "step": 2600 }, { "epoch": 0.37, "learning_rate": 0.00012777697529972557, "loss": 3.3323, "step": 2601 }, { "epoch": 0.37, "learning_rate": 0.00012774808608984544, "loss": 3.3765, "step": 2602 }, { "epoch": 0.37, "learning_rate": 0.00012771919687996534, "loss": 3.3472, "step": 2603 }, { "epoch": 0.37, "learning_rate": 0.00012769030767008523, "loss": 3.376, "step": 2604 }, { "epoch": 0.37, "learning_rate": 0.00012766141846020512, "loss": 3.356, "step": 2605 }, { "epoch": 0.37, "learning_rate": 0.00012763252925032502, "loss": 3.4621, "step": 2606 }, { "epoch": 0.37, "learning_rate": 0.00012760364004044488, "loss": 3.3554, "step": 2607 }, { "epoch": 0.37, "learning_rate": 0.00012757475083056478, "loss": 3.439, "step": 2608 }, { "epoch": 0.37, "learning_rate": 0.00012754586162068467, "loss": 3.3964, "step": 2609 }, { "epoch": 0.37, "learning_rate": 0.0001275169724108046, "loss": 3.2209, "step": 2610 }, { "epoch": 0.37, "learning_rate": 0.00012748808320092446, "loss": 3.471, "step": 2611 }, { "epoch": 0.37, "learning_rate": 0.00012745919399104435, "loss": 3.2721, "step": 2612 }, { "epoch": 0.37, "learning_rate": 0.00012743030478116425, "loss": 3.3398, "step": 2613 }, { "epoch": 0.37, "learning_rate": 0.00012740141557128414, "loss": 3.3427, "step": 2614 }, { "epoch": 0.37, "learning_rate": 0.00012737252636140403, "loss": 3.4032, "step": 2615 }, { "epoch": 0.37, "learning_rate": 0.0001273436371515239, "loss": 3.2084, "step": 2616 }, { "epoch": 0.37, "learning_rate": 0.0001273147479416438, "loss": 3.348, "step": 2617 }, { "epoch": 0.37, "learning_rate": 0.0001272858587317637, "loss": 3.2441, "step": 2618 }, { "epoch": 0.37, "learning_rate": 0.00012725696952188358, "loss": 3.391, "step": 2619 }, { "epoch": 0.37, "learning_rate": 0.00012722808031200348, "loss": 3.4738, "step": 2620 }, { "epoch": 0.37, "learning_rate": 0.00012719919110212337, "loss": 3.4117, "step": 2621 }, { "epoch": 0.37, "learning_rate": 0.00012717030189224326, "loss": 3.2362, "step": 2622 }, { "epoch": 0.37, "learning_rate": 0.00012714141268236316, "loss": 3.3843, "step": 2623 }, { "epoch": 0.37, "learning_rate": 0.00012711252347248302, "loss": 3.2797, "step": 2624 }, { "epoch": 0.37, "learning_rate": 0.00012708363426260292, "loss": 3.3762, "step": 2625 }, { "epoch": 0.37, "learning_rate": 0.0001270547450527228, "loss": 3.3659, "step": 2626 }, { "epoch": 0.37, "learning_rate": 0.0001270258558428427, "loss": 3.2781, "step": 2627 }, { "epoch": 0.37, "learning_rate": 0.0001269969666329626, "loss": 3.3275, "step": 2628 }, { "epoch": 0.37, "learning_rate": 0.00012696807742308247, "loss": 3.3819, "step": 2629 }, { "epoch": 0.37, "learning_rate": 0.00012693918821320236, "loss": 3.4597, "step": 2630 }, { "epoch": 0.37, "learning_rate": 0.00012691029900332228, "loss": 3.4048, "step": 2631 }, { "epoch": 0.37, "learning_rate": 0.00012688140979344217, "loss": 3.3594, "step": 2632 }, { "epoch": 0.37, "learning_rate": 0.00012685252058356204, "loss": 3.4678, "step": 2633 }, { "epoch": 0.38, "learning_rate": 0.00012682363137368194, "loss": 3.1888, "step": 2634 }, { "epoch": 0.38, "learning_rate": 0.00012679474216380183, "loss": 3.4054, "step": 2635 }, { "epoch": 0.38, "learning_rate": 0.00012676585295392172, "loss": 3.2659, "step": 2636 }, { "epoch": 0.38, "learning_rate": 0.00012673696374404162, "loss": 3.1928, "step": 2637 }, { "epoch": 0.38, "learning_rate": 0.00012670807453416148, "loss": 3.3957, "step": 2638 }, { "epoch": 0.38, "learning_rate": 0.00012667918532428138, "loss": 3.4169, "step": 2639 }, { "epoch": 0.38, "learning_rate": 0.00012665029611440127, "loss": 3.4016, "step": 2640 }, { "epoch": 0.38, "learning_rate": 0.00012662140690452116, "loss": 3.3982, "step": 2641 }, { "epoch": 0.38, "learning_rate": 0.00012659251769464106, "loss": 3.3423, "step": 2642 }, { "epoch": 0.38, "learning_rate": 0.00012656362848476095, "loss": 3.439, "step": 2643 }, { "epoch": 0.38, "learning_rate": 0.00012653473927488085, "loss": 3.3031, "step": 2644 }, { "epoch": 0.38, "learning_rate": 0.00012650585006500074, "loss": 3.4652, "step": 2645 }, { "epoch": 0.38, "learning_rate": 0.0001264769608551206, "loss": 3.3652, "step": 2646 }, { "epoch": 0.38, "learning_rate": 0.0001264480716452405, "loss": 3.3633, "step": 2647 }, { "epoch": 0.38, "learning_rate": 0.0001264191824353604, "loss": 3.3795, "step": 2648 }, { "epoch": 0.38, "learning_rate": 0.0001263902932254803, "loss": 3.5622, "step": 2649 }, { "epoch": 0.38, "learning_rate": 0.00012636140401560018, "loss": 3.3977, "step": 2650 }, { "epoch": 0.38, "learning_rate": 0.00012633251480572005, "loss": 3.4627, "step": 2651 }, { "epoch": 0.38, "learning_rate": 0.00012630362559583997, "loss": 3.4903, "step": 2652 }, { "epoch": 0.38, "learning_rate": 0.00012627473638595986, "loss": 3.4199, "step": 2653 }, { "epoch": 0.38, "learning_rate": 0.00012624584717607976, "loss": 3.3015, "step": 2654 }, { "epoch": 0.38, "learning_rate": 0.00012621695796619962, "loss": 3.2783, "step": 2655 }, { "epoch": 0.38, "learning_rate": 0.00012618806875631952, "loss": 3.3574, "step": 2656 }, { "epoch": 0.38, "learning_rate": 0.0001261591795464394, "loss": 3.3708, "step": 2657 }, { "epoch": 0.38, "learning_rate": 0.0001261302903365593, "loss": 3.5374, "step": 2658 }, { "epoch": 0.38, "learning_rate": 0.0001261014011266792, "loss": 3.4267, "step": 2659 }, { "epoch": 0.38, "learning_rate": 0.00012607251191679907, "loss": 3.4563, "step": 2660 }, { "epoch": 0.38, "learning_rate": 0.00012604362270691896, "loss": 3.3468, "step": 2661 }, { "epoch": 0.38, "learning_rate": 0.00012601473349703888, "loss": 3.2677, "step": 2662 }, { "epoch": 0.38, "learning_rate": 0.00012598584428715875, "loss": 3.3923, "step": 2663 }, { "epoch": 0.38, "learning_rate": 0.00012595695507727864, "loss": 3.3761, "step": 2664 }, { "epoch": 0.38, "learning_rate": 0.00012592806586739854, "loss": 3.4044, "step": 2665 }, { "epoch": 0.38, "learning_rate": 0.00012589917665751843, "loss": 3.2343, "step": 2666 }, { "epoch": 0.38, "learning_rate": 0.00012587028744763832, "loss": 3.4267, "step": 2667 }, { "epoch": 0.38, "learning_rate": 0.0001258413982377582, "loss": 3.4803, "step": 2668 }, { "epoch": 0.38, "learning_rate": 0.00012581250902787808, "loss": 3.4012, "step": 2669 }, { "epoch": 0.38, "learning_rate": 0.00012578361981799798, "loss": 3.3552, "step": 2670 }, { "epoch": 0.38, "learning_rate": 0.00012575473060811787, "loss": 3.204, "step": 2671 }, { "epoch": 0.38, "learning_rate": 0.00012572584139823777, "loss": 3.3883, "step": 2672 }, { "epoch": 0.38, "learning_rate": 0.00012569695218835766, "loss": 3.4008, "step": 2673 }, { "epoch": 0.38, "learning_rate": 0.00012566806297847755, "loss": 3.3178, "step": 2674 }, { "epoch": 0.38, "learning_rate": 0.00012563917376859745, "loss": 3.0956, "step": 2675 }, { "epoch": 0.38, "learning_rate": 0.00012561028455871734, "loss": 3.325, "step": 2676 }, { "epoch": 0.38, "learning_rate": 0.0001255813953488372, "loss": 3.417, "step": 2677 }, { "epoch": 0.38, "learning_rate": 0.0001255525061389571, "loss": 3.4145, "step": 2678 }, { "epoch": 0.38, "learning_rate": 0.000125523616929077, "loss": 3.5485, "step": 2679 }, { "epoch": 0.38, "learning_rate": 0.0001254947277191969, "loss": 3.293, "step": 2680 }, { "epoch": 0.38, "learning_rate": 0.00012546583850931676, "loss": 3.4376, "step": 2681 }, { "epoch": 0.38, "learning_rate": 0.00012543694929943665, "loss": 3.427, "step": 2682 }, { "epoch": 0.38, "learning_rate": 0.00012540806008955657, "loss": 3.4608, "step": 2683 }, { "epoch": 0.38, "learning_rate": 0.00012537917087967646, "loss": 3.4159, "step": 2684 }, { "epoch": 0.38, "learning_rate": 0.00012535028166979633, "loss": 3.3954, "step": 2685 }, { "epoch": 0.38, "learning_rate": 0.00012532139245991622, "loss": 3.4478, "step": 2686 }, { "epoch": 0.38, "learning_rate": 0.00012529250325003612, "loss": 3.5245, "step": 2687 }, { "epoch": 0.38, "learning_rate": 0.000125263614040156, "loss": 3.43, "step": 2688 }, { "epoch": 0.38, "learning_rate": 0.0001252347248302759, "loss": 3.5288, "step": 2689 }, { "epoch": 0.38, "learning_rate": 0.00012520583562039577, "loss": 3.4635, "step": 2690 }, { "epoch": 0.38, "learning_rate": 0.00012517694641051567, "loss": 3.2762, "step": 2691 }, { "epoch": 0.38, "learning_rate": 0.00012514805720063556, "loss": 3.207, "step": 2692 }, { "epoch": 0.38, "learning_rate": 0.00012511916799075545, "loss": 3.318, "step": 2693 }, { "epoch": 0.38, "learning_rate": 0.00012509027878087535, "loss": 3.4903, "step": 2694 }, { "epoch": 0.38, "learning_rate": 0.00012506138957099524, "loss": 3.4562, "step": 2695 }, { "epoch": 0.38, "learning_rate": 0.00012503250036111514, "loss": 3.3621, "step": 2696 }, { "epoch": 0.38, "learning_rate": 0.00012500361115123503, "loss": 3.3899, "step": 2697 }, { "epoch": 0.38, "learning_rate": 0.00012497472194135492, "loss": 3.3823, "step": 2698 }, { "epoch": 0.38, "learning_rate": 0.0001249458327314748, "loss": 3.4669, "step": 2699 }, { "epoch": 0.38, "learning_rate": 0.00012491694352159468, "loss": 3.4761, "step": 2700 }, { "epoch": 0.38, "learning_rate": 0.00012488805431171458, "loss": 3.3686, "step": 2701 }, { "epoch": 0.38, "learning_rate": 0.00012485916510183447, "loss": 3.2844, "step": 2702 }, { "epoch": 0.38, "learning_rate": 0.00012483027589195434, "loss": 3.4819, "step": 2703 }, { "epoch": 0.38, "learning_rate": 0.00012480138668207426, "loss": 3.3464, "step": 2704 }, { "epoch": 0.39, "learning_rate": 0.00012477249747219415, "loss": 3.3565, "step": 2705 }, { "epoch": 0.39, "learning_rate": 0.00012474360826231405, "loss": 3.6032, "step": 2706 }, { "epoch": 0.39, "learning_rate": 0.0001247147190524339, "loss": 3.3503, "step": 2707 }, { "epoch": 0.39, "learning_rate": 0.0001246858298425538, "loss": 3.462, "step": 2708 }, { "epoch": 0.39, "learning_rate": 0.0001246569406326737, "loss": 3.4015, "step": 2709 }, { "epoch": 0.39, "learning_rate": 0.0001246280514227936, "loss": 3.3831, "step": 2710 }, { "epoch": 0.39, "learning_rate": 0.0001245991622129135, "loss": 3.4502, "step": 2711 }, { "epoch": 0.39, "learning_rate": 0.00012457027300303336, "loss": 3.5121, "step": 2712 }, { "epoch": 0.39, "learning_rate": 0.00012454138379315325, "loss": 3.3088, "step": 2713 }, { "epoch": 0.39, "learning_rate": 0.00012451249458327314, "loss": 3.405, "step": 2714 }, { "epoch": 0.39, "learning_rate": 0.00012448360537339306, "loss": 3.4379, "step": 2715 }, { "epoch": 0.39, "learning_rate": 0.00012445471616351293, "loss": 3.4802, "step": 2716 }, { "epoch": 0.39, "learning_rate": 0.00012442582695363282, "loss": 3.3756, "step": 2717 }, { "epoch": 0.39, "learning_rate": 0.00012439693774375272, "loss": 3.3297, "step": 2718 }, { "epoch": 0.39, "learning_rate": 0.0001243680485338726, "loss": 3.4115, "step": 2719 }, { "epoch": 0.39, "learning_rate": 0.0001243391593239925, "loss": 3.256, "step": 2720 }, { "epoch": 0.39, "learning_rate": 0.00012431027011411237, "loss": 3.3898, "step": 2721 }, { "epoch": 0.39, "learning_rate": 0.00012428138090423227, "loss": 3.2545, "step": 2722 }, { "epoch": 0.39, "learning_rate": 0.00012425249169435216, "loss": 3.2719, "step": 2723 }, { "epoch": 0.39, "learning_rate": 0.00012422360248447205, "loss": 3.3735, "step": 2724 }, { "epoch": 0.39, "learning_rate": 0.00012419471327459195, "loss": 3.3794, "step": 2725 }, { "epoch": 0.39, "learning_rate": 0.00012416582406471184, "loss": 3.4381, "step": 2726 }, { "epoch": 0.39, "learning_rate": 0.00012413693485483174, "loss": 3.3723, "step": 2727 }, { "epoch": 0.39, "learning_rate": 0.00012410804564495163, "loss": 3.153, "step": 2728 }, { "epoch": 0.39, "learning_rate": 0.0001240791564350715, "loss": 3.3809, "step": 2729 }, { "epoch": 0.39, "learning_rate": 0.0001240502672251914, "loss": 3.3281, "step": 2730 }, { "epoch": 0.39, "learning_rate": 0.00012402137801531128, "loss": 3.1046, "step": 2731 }, { "epoch": 0.39, "learning_rate": 0.00012399248880543118, "loss": 3.3417, "step": 2732 }, { "epoch": 0.39, "learning_rate": 0.00012396359959555107, "loss": 3.3571, "step": 2733 }, { "epoch": 0.39, "learning_rate": 0.00012393471038567094, "loss": 3.493, "step": 2734 }, { "epoch": 0.39, "learning_rate": 0.00012390582117579083, "loss": 3.3214, "step": 2735 }, { "epoch": 0.39, "learning_rate": 0.00012387693196591075, "loss": 3.4362, "step": 2736 }, { "epoch": 0.39, "learning_rate": 0.00012384804275603065, "loss": 3.4042, "step": 2737 }, { "epoch": 0.39, "learning_rate": 0.00012381915354615051, "loss": 3.4319, "step": 2738 }, { "epoch": 0.39, "learning_rate": 0.0001237902643362704, "loss": 3.4458, "step": 2739 }, { "epoch": 0.39, "learning_rate": 0.0001237613751263903, "loss": 3.4421, "step": 2740 }, { "epoch": 0.39, "learning_rate": 0.0001237324859165102, "loss": 3.3754, "step": 2741 }, { "epoch": 0.39, "learning_rate": 0.0001237035967066301, "loss": 3.347, "step": 2742 }, { "epoch": 0.39, "learning_rate": 0.00012367470749674996, "loss": 3.4606, "step": 2743 }, { "epoch": 0.39, "learning_rate": 0.00012364581828686985, "loss": 3.3032, "step": 2744 }, { "epoch": 0.39, "learning_rate": 0.00012361692907698974, "loss": 3.3964, "step": 2745 }, { "epoch": 0.39, "learning_rate": 0.00012358803986710964, "loss": 3.355, "step": 2746 }, { "epoch": 0.39, "learning_rate": 0.00012355915065722953, "loss": 3.3482, "step": 2747 }, { "epoch": 0.39, "learning_rate": 0.00012353026144734942, "loss": 3.4613, "step": 2748 }, { "epoch": 0.39, "learning_rate": 0.00012350137223746932, "loss": 3.3546, "step": 2749 }, { "epoch": 0.39, "learning_rate": 0.0001234724830275892, "loss": 3.2237, "step": 2750 }, { "epoch": 0.39, "learning_rate": 0.00012344359381770908, "loss": 3.3636, "step": 2751 }, { "epoch": 0.39, "learning_rate": 0.00012341470460782897, "loss": 3.2352, "step": 2752 }, { "epoch": 0.39, "learning_rate": 0.00012338581539794887, "loss": 3.4965, "step": 2753 }, { "epoch": 0.39, "learning_rate": 0.00012335692618806876, "loss": 3.2916, "step": 2754 }, { "epoch": 0.39, "learning_rate": 0.00012332803697818865, "loss": 3.3528, "step": 2755 }, { "epoch": 0.39, "learning_rate": 0.00012329914776830852, "loss": 3.2192, "step": 2756 }, { "epoch": 0.39, "learning_rate": 0.00012327025855842844, "loss": 3.3678, "step": 2757 }, { "epoch": 0.39, "learning_rate": 0.00012324136934854834, "loss": 3.2173, "step": 2758 }, { "epoch": 0.39, "learning_rate": 0.00012321248013866823, "loss": 3.2887, "step": 2759 }, { "epoch": 0.39, "learning_rate": 0.0001231835909287881, "loss": 3.3319, "step": 2760 }, { "epoch": 0.39, "learning_rate": 0.000123154701718908, "loss": 3.4748, "step": 2761 }, { "epoch": 0.39, "learning_rate": 0.00012312581250902788, "loss": 3.2698, "step": 2762 }, { "epoch": 0.39, "learning_rate": 0.00012309692329914778, "loss": 3.4107, "step": 2763 }, { "epoch": 0.39, "learning_rate": 0.00012306803408926767, "loss": 3.3461, "step": 2764 }, { "epoch": 0.39, "learning_rate": 0.00012303914487938754, "loss": 3.2252, "step": 2765 }, { "epoch": 0.39, "learning_rate": 0.00012301025566950743, "loss": 3.225, "step": 2766 }, { "epoch": 0.39, "learning_rate": 0.00012298136645962735, "loss": 3.4453, "step": 2767 }, { "epoch": 0.39, "learning_rate": 0.00012295247724974722, "loss": 3.4721, "step": 2768 }, { "epoch": 0.39, "learning_rate": 0.00012292358803986711, "loss": 3.3088, "step": 2769 }, { "epoch": 0.39, "learning_rate": 0.000122894698829987, "loss": 3.4014, "step": 2770 }, { "epoch": 0.39, "learning_rate": 0.0001228658096201069, "loss": 3.4508, "step": 2771 }, { "epoch": 0.39, "learning_rate": 0.0001228369204102268, "loss": 3.4795, "step": 2772 }, { "epoch": 0.39, "learning_rate": 0.00012280803120034666, "loss": 3.3166, "step": 2773 }, { "epoch": 0.39, "learning_rate": 0.00012277914199046656, "loss": 3.3391, "step": 2774 }, { "epoch": 0.4, "learning_rate": 0.00012275025278058645, "loss": 3.2663, "step": 2775 }, { "epoch": 0.4, "learning_rate": 0.00012272136357070634, "loss": 3.3781, "step": 2776 }, { "epoch": 0.4, "learning_rate": 0.00012269247436082624, "loss": 3.3014, "step": 2777 }, { "epoch": 0.4, "learning_rate": 0.00012266358515094613, "loss": 3.2865, "step": 2778 }, { "epoch": 0.4, "learning_rate": 0.00012263469594106602, "loss": 3.2788, "step": 2779 }, { "epoch": 0.4, "learning_rate": 0.00012260580673118592, "loss": 3.2579, "step": 2780 }, { "epoch": 0.4, "learning_rate": 0.0001225769175213058, "loss": 3.3732, "step": 2781 }, { "epoch": 0.4, "learning_rate": 0.00012254802831142568, "loss": 3.3919, "step": 2782 }, { "epoch": 0.4, "learning_rate": 0.00012251913910154557, "loss": 3.3965, "step": 2783 }, { "epoch": 0.4, "learning_rate": 0.00012249024989166547, "loss": 3.3287, "step": 2784 }, { "epoch": 0.4, "learning_rate": 0.00012246136068178536, "loss": 3.4794, "step": 2785 }, { "epoch": 0.4, "learning_rate": 0.00012243247147190525, "loss": 3.3119, "step": 2786 }, { "epoch": 0.4, "learning_rate": 0.00012240358226202512, "loss": 3.2659, "step": 2787 }, { "epoch": 0.4, "learning_rate": 0.00012237469305214504, "loss": 3.4235, "step": 2788 }, { "epoch": 0.4, "learning_rate": 0.00012234580384226494, "loss": 3.4274, "step": 2789 }, { "epoch": 0.4, "learning_rate": 0.0001223169146323848, "loss": 3.1281, "step": 2790 }, { "epoch": 0.4, "learning_rate": 0.0001222880254225047, "loss": 3.4995, "step": 2791 }, { "epoch": 0.4, "learning_rate": 0.0001222591362126246, "loss": 3.5272, "step": 2792 }, { "epoch": 0.4, "learning_rate": 0.00012223024700274448, "loss": 3.2849, "step": 2793 }, { "epoch": 0.4, "learning_rate": 0.00012220135779286438, "loss": 3.4098, "step": 2794 }, { "epoch": 0.4, "learning_rate": 0.00012217246858298424, "loss": 3.42, "step": 2795 }, { "epoch": 0.4, "learning_rate": 0.00012214357937310414, "loss": 3.2024, "step": 2796 }, { "epoch": 0.4, "learning_rate": 0.00012211469016322403, "loss": 3.4517, "step": 2797 }, { "epoch": 0.4, "learning_rate": 0.00012208580095334395, "loss": 3.2603, "step": 2798 }, { "epoch": 0.4, "learning_rate": 0.00012205691174346383, "loss": 3.0616, "step": 2799 }, { "epoch": 0.4, "learning_rate": 0.00012202802253358371, "loss": 3.447, "step": 2800 }, { "epoch": 0.4, "learning_rate": 0.00012199913332370361, "loss": 3.2967, "step": 2801 }, { "epoch": 0.4, "learning_rate": 0.0001219702441138235, "loss": 3.2181, "step": 2802 }, { "epoch": 0.4, "learning_rate": 0.00012194135490394338, "loss": 3.2925, "step": 2803 }, { "epoch": 0.4, "learning_rate": 0.00012191246569406328, "loss": 3.4124, "step": 2804 }, { "epoch": 0.4, "learning_rate": 0.00012188357648418316, "loss": 3.2474, "step": 2805 }, { "epoch": 0.4, "learning_rate": 0.00012185468727430305, "loss": 3.3938, "step": 2806 }, { "epoch": 0.4, "learning_rate": 0.00012182579806442293, "loss": 3.3929, "step": 2807 }, { "epoch": 0.4, "learning_rate": 0.00012179690885454282, "loss": 3.463, "step": 2808 }, { "epoch": 0.4, "learning_rate": 0.00012176801964466273, "loss": 3.2457, "step": 2809 }, { "epoch": 0.4, "learning_rate": 0.00012173913043478263, "loss": 3.3338, "step": 2810 }, { "epoch": 0.4, "learning_rate": 0.0001217102412249025, "loss": 3.4464, "step": 2811 }, { "epoch": 0.4, "learning_rate": 0.0001216813520150224, "loss": 3.3941, "step": 2812 }, { "epoch": 0.4, "learning_rate": 0.00012165246280514229, "loss": 3.4187, "step": 2813 }, { "epoch": 0.4, "learning_rate": 0.00012162357359526217, "loss": 3.4447, "step": 2814 }, { "epoch": 0.4, "learning_rate": 0.00012159468438538207, "loss": 3.3622, "step": 2815 }, { "epoch": 0.4, "learning_rate": 0.00012156579517550195, "loss": 3.2339, "step": 2816 }, { "epoch": 0.4, "eval_loss": 3.5397603511810303, "eval_runtime": 473.3374, "eval_samples_per_second": 43.282, "eval_steps_per_second": 14.427, "step": 2816 }, { "epoch": 0.4, "learning_rate": 0.00012153690596562184, "loss": 3.2623, "step": 2817 }, { "epoch": 0.4, "learning_rate": 0.00012150801675574172, "loss": 3.2769, "step": 2818 }, { "epoch": 0.4, "learning_rate": 0.00012147912754586164, "loss": 3.2308, "step": 2819 }, { "epoch": 0.4, "learning_rate": 0.00012145023833598152, "loss": 3.3513, "step": 2820 }, { "epoch": 0.4, "learning_rate": 0.00012142134912610142, "loss": 3.3549, "step": 2821 }, { "epoch": 0.4, "learning_rate": 0.0001213924599162213, "loss": 3.2788, "step": 2822 }, { "epoch": 0.4, "learning_rate": 0.00012136357070634119, "loss": 3.5271, "step": 2823 }, { "epoch": 0.4, "learning_rate": 0.00012133468149646108, "loss": 3.1716, "step": 2824 }, { "epoch": 0.4, "learning_rate": 0.00012130579228658096, "loss": 3.3601, "step": 2825 }, { "epoch": 0.4, "learning_rate": 0.00012127690307670086, "loss": 3.4085, "step": 2826 }, { "epoch": 0.4, "learning_rate": 0.00012124801386682074, "loss": 3.2235, "step": 2827 }, { "epoch": 0.4, "learning_rate": 0.00012121912465694063, "loss": 3.4152, "step": 2828 }, { "epoch": 0.4, "learning_rate": 0.00012119023544706051, "loss": 3.291, "step": 2829 }, { "epoch": 0.4, "learning_rate": 0.00012116134623718043, "loss": 3.322, "step": 2830 }, { "epoch": 0.4, "learning_rate": 0.00012113245702730031, "loss": 3.3296, "step": 2831 }, { "epoch": 0.4, "learning_rate": 0.00012110356781742021, "loss": 3.2322, "step": 2832 }, { "epoch": 0.4, "learning_rate": 0.00012107467860754009, "loss": 3.4277, "step": 2833 }, { "epoch": 0.4, "learning_rate": 0.00012104578939765998, "loss": 3.1975, "step": 2834 }, { "epoch": 0.4, "learning_rate": 0.00012101690018777988, "loss": 3.3079, "step": 2835 }, { "epoch": 0.4, "learning_rate": 0.00012098801097789976, "loss": 3.4171, "step": 2836 }, { "epoch": 0.4, "learning_rate": 0.00012095912176801965, "loss": 3.206, "step": 2837 }, { "epoch": 0.4, "learning_rate": 0.00012093023255813953, "loss": 3.3719, "step": 2838 }, { "epoch": 0.4, "learning_rate": 0.00012090134334825942, "loss": 3.536, "step": 2839 }, { "epoch": 0.4, "learning_rate": 0.00012087245413837933, "loss": 3.3528, "step": 2840 }, { "epoch": 0.4, "learning_rate": 0.00012084356492849923, "loss": 3.3847, "step": 2841 }, { "epoch": 0.4, "learning_rate": 0.0001208146757186191, "loss": 2.9773, "step": 2842 }, { "epoch": 0.4, "learning_rate": 0.000120785786508739, "loss": 3.3853, "step": 2843 }, { "epoch": 0.4, "learning_rate": 0.00012075689729885888, "loss": 3.3528, "step": 2844 }, { "epoch": 0.41, "learning_rate": 0.00012072800808897877, "loss": 3.3483, "step": 2845 }, { "epoch": 0.41, "learning_rate": 0.00012069911887909865, "loss": 3.4658, "step": 2846 }, { "epoch": 0.41, "learning_rate": 0.00012067022966921855, "loss": 3.346, "step": 2847 }, { "epoch": 0.41, "learning_rate": 0.00012064134045933844, "loss": 3.2132, "step": 2848 }, { "epoch": 0.41, "learning_rate": 0.00012061245124945832, "loss": 3.4038, "step": 2849 }, { "epoch": 0.41, "learning_rate": 0.00012058356203957822, "loss": 3.3602, "step": 2850 }, { "epoch": 0.41, "learning_rate": 0.00012055467282969812, "loss": 3.4381, "step": 2851 }, { "epoch": 0.41, "learning_rate": 0.00012052578361981802, "loss": 3.4591, "step": 2852 }, { "epoch": 0.41, "learning_rate": 0.0001204968944099379, "loss": 3.3465, "step": 2853 }, { "epoch": 0.41, "learning_rate": 0.00012046800520005779, "loss": 3.3748, "step": 2854 }, { "epoch": 0.41, "learning_rate": 0.00012043911599017767, "loss": 3.3602, "step": 2855 }, { "epoch": 0.41, "learning_rate": 0.00012041022678029756, "loss": 3.3046, "step": 2856 }, { "epoch": 0.41, "learning_rate": 0.00012038133757041745, "loss": 3.4129, "step": 2857 }, { "epoch": 0.41, "learning_rate": 0.00012035244836053734, "loss": 3.3221, "step": 2858 }, { "epoch": 0.41, "learning_rate": 0.00012032355915065723, "loss": 3.351, "step": 2859 }, { "epoch": 0.41, "learning_rate": 0.00012029466994077711, "loss": 3.4366, "step": 2860 }, { "epoch": 0.41, "learning_rate": 0.00012026578073089702, "loss": 3.3561, "step": 2861 }, { "epoch": 0.41, "learning_rate": 0.00012023689152101691, "loss": 3.4635, "step": 2862 }, { "epoch": 0.41, "learning_rate": 0.00012020800231113681, "loss": 3.3082, "step": 2863 }, { "epoch": 0.41, "learning_rate": 0.00012017911310125669, "loss": 3.4234, "step": 2864 }, { "epoch": 0.41, "learning_rate": 0.00012015022389137658, "loss": 3.3442, "step": 2865 }, { "epoch": 0.41, "learning_rate": 0.00012012133468149646, "loss": 3.3602, "step": 2866 }, { "epoch": 0.41, "learning_rate": 0.00012009244547161636, "loss": 3.0126, "step": 2867 }, { "epoch": 0.41, "learning_rate": 0.00012006355626173624, "loss": 3.2896, "step": 2868 }, { "epoch": 0.41, "learning_rate": 0.00012003466705185613, "loss": 3.4567, "step": 2869 }, { "epoch": 0.41, "learning_rate": 0.00012000577784197602, "loss": 3.2361, "step": 2870 }, { "epoch": 0.41, "learning_rate": 0.0001199768886320959, "loss": 3.4387, "step": 2871 }, { "epoch": 0.41, "learning_rate": 0.00011994799942221581, "loss": 3.395, "step": 2872 }, { "epoch": 0.41, "learning_rate": 0.0001199191102123357, "loss": 3.3643, "step": 2873 }, { "epoch": 0.41, "learning_rate": 0.0001198902210024556, "loss": 3.4342, "step": 2874 }, { "epoch": 0.41, "learning_rate": 0.00011986133179257548, "loss": 3.4494, "step": 2875 }, { "epoch": 0.41, "learning_rate": 0.00011983244258269537, "loss": 3.3813, "step": 2876 }, { "epoch": 0.41, "learning_rate": 0.00011980355337281525, "loss": 3.1537, "step": 2877 }, { "epoch": 0.41, "learning_rate": 0.00011977466416293515, "loss": 3.4073, "step": 2878 }, { "epoch": 0.41, "learning_rate": 0.00011974577495305503, "loss": 3.402, "step": 2879 }, { "epoch": 0.41, "learning_rate": 0.00011971688574317492, "loss": 3.2713, "step": 2880 }, { "epoch": 0.41, "learning_rate": 0.00011968799653329482, "loss": 3.454, "step": 2881 }, { "epoch": 0.41, "learning_rate": 0.00011965910732341472, "loss": 3.2313, "step": 2882 }, { "epoch": 0.41, "learning_rate": 0.0001196302181135346, "loss": 3.3605, "step": 2883 }, { "epoch": 0.41, "learning_rate": 0.0001196013289036545, "loss": 3.3283, "step": 2884 }, { "epoch": 0.41, "learning_rate": 0.00011957243969377439, "loss": 3.4797, "step": 2885 }, { "epoch": 0.41, "learning_rate": 0.00011954355048389427, "loss": 3.4396, "step": 2886 }, { "epoch": 0.41, "learning_rate": 0.00011951466127401417, "loss": 3.4563, "step": 2887 }, { "epoch": 0.41, "learning_rate": 0.00011948577206413405, "loss": 3.4401, "step": 2888 }, { "epoch": 0.41, "learning_rate": 0.00011945688285425394, "loss": 3.4105, "step": 2889 }, { "epoch": 0.41, "learning_rate": 0.00011942799364437382, "loss": 3.2352, "step": 2890 }, { "epoch": 0.41, "learning_rate": 0.00011939910443449371, "loss": 3.5398, "step": 2891 }, { "epoch": 0.41, "learning_rate": 0.00011937021522461361, "loss": 3.2313, "step": 2892 }, { "epoch": 0.41, "learning_rate": 0.00011934132601473351, "loss": 3.0719, "step": 2893 }, { "epoch": 0.41, "learning_rate": 0.0001193124368048534, "loss": 3.3121, "step": 2894 }, { "epoch": 0.41, "learning_rate": 0.00011928354759497329, "loss": 3.2686, "step": 2895 }, { "epoch": 0.41, "learning_rate": 0.00011925465838509318, "loss": 3.3653, "step": 2896 }, { "epoch": 0.41, "learning_rate": 0.00011922576917521306, "loss": 3.4363, "step": 2897 }, { "epoch": 0.41, "learning_rate": 0.00011919687996533296, "loss": 3.525, "step": 2898 }, { "epoch": 0.41, "learning_rate": 0.00011916799075545284, "loss": 3.3923, "step": 2899 }, { "epoch": 0.41, "learning_rate": 0.00011913910154557273, "loss": 3.4238, "step": 2900 }, { "epoch": 0.41, "learning_rate": 0.00011911021233569261, "loss": 3.2937, "step": 2901 }, { "epoch": 0.41, "learning_rate": 0.0001190813231258125, "loss": 3.37, "step": 2902 }, { "epoch": 0.41, "learning_rate": 0.00011905243391593241, "loss": 3.3827, "step": 2903 }, { "epoch": 0.41, "learning_rate": 0.0001190235447060523, "loss": 3.4186, "step": 2904 }, { "epoch": 0.41, "learning_rate": 0.00011899465549617219, "loss": 3.2927, "step": 2905 }, { "epoch": 0.41, "learning_rate": 0.00011896576628629208, "loss": 3.2217, "step": 2906 }, { "epoch": 0.41, "learning_rate": 0.00011893687707641197, "loss": 3.3386, "step": 2907 }, { "epoch": 0.41, "learning_rate": 0.00011890798786653185, "loss": 3.3211, "step": 2908 }, { "epoch": 0.41, "learning_rate": 0.00011887909865665175, "loss": 3.4936, "step": 2909 }, { "epoch": 0.41, "learning_rate": 0.00011885020944677163, "loss": 3.3862, "step": 2910 }, { "epoch": 0.41, "learning_rate": 0.00011882132023689152, "loss": 3.3776, "step": 2911 }, { "epoch": 0.41, "learning_rate": 0.0001187924310270114, "loss": 3.3151, "step": 2912 }, { "epoch": 0.41, "learning_rate": 0.0001187635418171313, "loss": 3.1704, "step": 2913 }, { "epoch": 0.41, "learning_rate": 0.0001187346526072512, "loss": 3.3042, "step": 2914 }, { "epoch": 0.42, "learning_rate": 0.0001187057633973711, "loss": 3.0431, "step": 2915 }, { "epoch": 0.42, "learning_rate": 0.00011867687418749098, "loss": 3.2402, "step": 2916 }, { "epoch": 0.42, "learning_rate": 0.00011864798497761087, "loss": 3.3413, "step": 2917 }, { "epoch": 0.42, "learning_rate": 0.00011861909576773077, "loss": 3.4283, "step": 2918 }, { "epoch": 0.42, "learning_rate": 0.00011859020655785065, "loss": 3.2246, "step": 2919 }, { "epoch": 0.42, "learning_rate": 0.00011856131734797054, "loss": 3.5293, "step": 2920 }, { "epoch": 0.42, "learning_rate": 0.00011853242813809042, "loss": 3.309, "step": 2921 }, { "epoch": 0.42, "learning_rate": 0.00011850353892821031, "loss": 3.4406, "step": 2922 }, { "epoch": 0.42, "learning_rate": 0.0001184746497183302, "loss": 3.3076, "step": 2923 }, { "epoch": 0.42, "learning_rate": 0.00011844576050845011, "loss": 3.3118, "step": 2924 }, { "epoch": 0.42, "learning_rate": 0.00011841687129857, "loss": 3.3957, "step": 2925 }, { "epoch": 0.42, "learning_rate": 0.00011838798208868989, "loss": 3.3812, "step": 2926 }, { "epoch": 0.42, "learning_rate": 0.00011835909287880977, "loss": 3.3103, "step": 2927 }, { "epoch": 0.42, "learning_rate": 0.00011833020366892966, "loss": 3.3207, "step": 2928 }, { "epoch": 0.42, "learning_rate": 0.00011830131445904956, "loss": 3.3636, "step": 2929 }, { "epoch": 0.42, "learning_rate": 0.00011827242524916944, "loss": 3.2148, "step": 2930 }, { "epoch": 0.42, "learning_rate": 0.00011824353603928933, "loss": 3.4342, "step": 2931 }, { "epoch": 0.42, "learning_rate": 0.00011821464682940921, "loss": 3.418, "step": 2932 }, { "epoch": 0.42, "learning_rate": 0.0001181857576195291, "loss": 3.1815, "step": 2933 }, { "epoch": 0.42, "learning_rate": 0.00011815686840964899, "loss": 3.3941, "step": 2934 }, { "epoch": 0.42, "learning_rate": 0.0001181279791997689, "loss": 3.2534, "step": 2935 }, { "epoch": 0.42, "learning_rate": 0.00011809908998988879, "loss": 3.2224, "step": 2936 }, { "epoch": 0.42, "learning_rate": 0.00011807020078000868, "loss": 3.2472, "step": 2937 }, { "epoch": 0.42, "learning_rate": 0.00011804131157012856, "loss": 3.3013, "step": 2938 }, { "epoch": 0.42, "learning_rate": 0.00011801242236024845, "loss": 3.4542, "step": 2939 }, { "epoch": 0.42, "learning_rate": 0.00011798353315036835, "loss": 3.3669, "step": 2940 }, { "epoch": 0.42, "learning_rate": 0.00011795464394048823, "loss": 3.2054, "step": 2941 }, { "epoch": 0.42, "learning_rate": 0.00011792575473060812, "loss": 3.4628, "step": 2942 }, { "epoch": 0.42, "learning_rate": 0.000117896865520728, "loss": 3.314, "step": 2943 }, { "epoch": 0.42, "learning_rate": 0.0001178679763108479, "loss": 3.2845, "step": 2944 }, { "epoch": 0.42, "learning_rate": 0.0001178390871009678, "loss": 3.3936, "step": 2945 }, { "epoch": 0.42, "learning_rate": 0.0001178101978910877, "loss": 3.3622, "step": 2946 }, { "epoch": 0.42, "learning_rate": 0.00011778130868120758, "loss": 3.4131, "step": 2947 }, { "epoch": 0.42, "learning_rate": 0.00011775241947132747, "loss": 3.2771, "step": 2948 }, { "epoch": 0.42, "learning_rate": 0.00011772353026144735, "loss": 3.4104, "step": 2949 }, { "epoch": 0.42, "learning_rate": 0.00011769464105156725, "loss": 3.4032, "step": 2950 }, { "epoch": 0.42, "learning_rate": 0.00011766575184168714, "loss": 3.2302, "step": 2951 }, { "epoch": 0.42, "learning_rate": 0.00011763686263180702, "loss": 3.3607, "step": 2952 }, { "epoch": 0.42, "learning_rate": 0.00011760797342192691, "loss": 3.2546, "step": 2953 }, { "epoch": 0.42, "learning_rate": 0.0001175790842120468, "loss": 3.2651, "step": 2954 }, { "epoch": 0.42, "learning_rate": 0.00011755019500216669, "loss": 3.2851, "step": 2955 }, { "epoch": 0.42, "learning_rate": 0.0001175213057922866, "loss": 3.4795, "step": 2956 }, { "epoch": 0.42, "learning_rate": 0.00011749241658240649, "loss": 3.4549, "step": 2957 }, { "epoch": 0.42, "learning_rate": 0.00011746352737252637, "loss": 3.3106, "step": 2958 }, { "epoch": 0.42, "learning_rate": 0.00011743463816264626, "loss": 3.363, "step": 2959 }, { "epoch": 0.42, "learning_rate": 0.00011740574895276614, "loss": 3.2841, "step": 2960 }, { "epoch": 0.42, "learning_rate": 0.00011737685974288604, "loss": 3.4212, "step": 2961 }, { "epoch": 0.42, "learning_rate": 0.00011734797053300593, "loss": 3.3983, "step": 2962 }, { "epoch": 0.42, "learning_rate": 0.00011731908132312581, "loss": 3.2952, "step": 2963 }, { "epoch": 0.42, "learning_rate": 0.0001172901921132457, "loss": 3.4007, "step": 2964 }, { "epoch": 0.42, "learning_rate": 0.00011726130290336559, "loss": 3.3081, "step": 2965 }, { "epoch": 0.42, "learning_rate": 0.00011723241369348549, "loss": 3.4531, "step": 2966 }, { "epoch": 0.42, "learning_rate": 0.00011720352448360539, "loss": 3.3969, "step": 2967 }, { "epoch": 0.42, "learning_rate": 0.00011717463527372528, "loss": 3.3213, "step": 2968 }, { "epoch": 0.42, "learning_rate": 0.00011714574606384516, "loss": 3.3514, "step": 2969 }, { "epoch": 0.42, "learning_rate": 0.00011711685685396505, "loss": 3.3203, "step": 2970 }, { "epoch": 0.42, "learning_rate": 0.00011708796764408493, "loss": 3.3769, "step": 2971 }, { "epoch": 0.42, "learning_rate": 0.00011705907843420483, "loss": 3.3929, "step": 2972 }, { "epoch": 0.42, "learning_rate": 0.00011703018922432471, "loss": 3.4084, "step": 2973 }, { "epoch": 0.42, "learning_rate": 0.0001170013000144446, "loss": 3.2764, "step": 2974 }, { "epoch": 0.42, "learning_rate": 0.0001169724108045645, "loss": 3.3365, "step": 2975 }, { "epoch": 0.42, "learning_rate": 0.0001169435215946844, "loss": 3.137, "step": 2976 }, { "epoch": 0.42, "learning_rate": 0.00011691463238480428, "loss": 3.3314, "step": 2977 }, { "epoch": 0.42, "learning_rate": 0.00011688574317492418, "loss": 3.2965, "step": 2978 }, { "epoch": 0.42, "learning_rate": 0.00011685685396504407, "loss": 3.4489, "step": 2979 }, { "epoch": 0.42, "learning_rate": 0.00011682796475516395, "loss": 3.2876, "step": 2980 }, { "epoch": 0.42, "learning_rate": 0.00011679907554528385, "loss": 3.3736, "step": 2981 }, { "epoch": 0.42, "learning_rate": 0.00011677018633540373, "loss": 3.4143, "step": 2982 }, { "epoch": 0.42, "learning_rate": 0.00011674129712552362, "loss": 3.3062, "step": 2983 }, { "epoch": 0.42, "learning_rate": 0.0001167124079156435, "loss": 3.3008, "step": 2984 }, { "epoch": 0.43, "learning_rate": 0.0001166835187057634, "loss": 3.5034, "step": 2985 }, { "epoch": 0.43, "learning_rate": 0.00011665462949588329, "loss": 3.2881, "step": 2986 }, { "epoch": 0.43, "learning_rate": 0.0001166257402860032, "loss": 3.3307, "step": 2987 }, { "epoch": 0.43, "learning_rate": 0.00011659685107612308, "loss": 3.3547, "step": 2988 }, { "epoch": 0.43, "learning_rate": 0.00011656796186624297, "loss": 3.1519, "step": 2989 }, { "epoch": 0.43, "learning_rate": 0.00011653907265636286, "loss": 3.4225, "step": 2990 }, { "epoch": 0.43, "learning_rate": 0.00011651018344648274, "loss": 3.3672, "step": 2991 }, { "epoch": 0.43, "learning_rate": 0.00011648129423660264, "loss": 3.4003, "step": 2992 }, { "epoch": 0.43, "learning_rate": 0.00011645240502672252, "loss": 3.4046, "step": 2993 }, { "epoch": 0.43, "learning_rate": 0.00011642351581684241, "loss": 3.3142, "step": 2994 }, { "epoch": 0.43, "learning_rate": 0.00011639462660696229, "loss": 3.3948, "step": 2995 }, { "epoch": 0.43, "learning_rate": 0.00011636573739708219, "loss": 3.3539, "step": 2996 }, { "epoch": 0.43, "learning_rate": 0.00011633684818720209, "loss": 3.4269, "step": 2997 }, { "epoch": 0.43, "learning_rate": 0.00011630795897732199, "loss": 3.4374, "step": 2998 }, { "epoch": 0.43, "learning_rate": 0.00011627906976744187, "loss": 3.212, "step": 2999 }, { "epoch": 0.43, "learning_rate": 0.00011625018055756176, "loss": 3.3384, "step": 3000 }, { "epoch": 0.43, "learning_rate": 0.00011622129134768165, "loss": 3.5069, "step": 3001 }, { "epoch": 0.43, "learning_rate": 0.00011619240213780153, "loss": 3.2896, "step": 3002 }, { "epoch": 0.43, "learning_rate": 0.00011616351292792143, "loss": 3.2173, "step": 3003 }, { "epoch": 0.43, "learning_rate": 0.00011613462371804131, "loss": 3.2881, "step": 3004 }, { "epoch": 0.43, "learning_rate": 0.0001161057345081612, "loss": 3.4238, "step": 3005 }, { "epoch": 0.43, "learning_rate": 0.00011607684529828108, "loss": 3.3646, "step": 3006 }, { "epoch": 0.43, "learning_rate": 0.00011604795608840098, "loss": 3.2412, "step": 3007 }, { "epoch": 0.43, "learning_rate": 0.00011601906687852088, "loss": 3.3506, "step": 3008 }, { "epoch": 0.43, "learning_rate": 0.00011599017766864078, "loss": 3.3297, "step": 3009 }, { "epoch": 0.43, "learning_rate": 0.00011596128845876066, "loss": 3.3462, "step": 3010 }, { "epoch": 0.43, "learning_rate": 0.00011593239924888055, "loss": 3.3547, "step": 3011 }, { "epoch": 0.43, "learning_rate": 0.00011590351003900045, "loss": 3.4532, "step": 3012 }, { "epoch": 0.43, "learning_rate": 0.00011587462082912033, "loss": 3.4124, "step": 3013 }, { "epoch": 0.43, "learning_rate": 0.00011584573161924022, "loss": 3.3584, "step": 3014 }, { "epoch": 0.43, "learning_rate": 0.0001158168424093601, "loss": 3.3526, "step": 3015 }, { "epoch": 0.43, "learning_rate": 0.00011578795319948, "loss": 3.338, "step": 3016 }, { "epoch": 0.43, "learning_rate": 0.00011575906398959987, "loss": 3.4493, "step": 3017 }, { "epoch": 0.43, "learning_rate": 0.0001157301747797198, "loss": 3.3514, "step": 3018 }, { "epoch": 0.43, "learning_rate": 0.00011570128556983968, "loss": 3.423, "step": 3019 }, { "epoch": 0.43, "learning_rate": 0.00011567239635995957, "loss": 3.3087, "step": 3020 }, { "epoch": 0.43, "learning_rate": 0.00011564350715007945, "loss": 3.3231, "step": 3021 }, { "epoch": 0.43, "learning_rate": 0.00011561461794019934, "loss": 3.2946, "step": 3022 }, { "epoch": 0.43, "learning_rate": 0.00011558572873031924, "loss": 3.3502, "step": 3023 }, { "epoch": 0.43, "learning_rate": 0.00011555683952043912, "loss": 3.3376, "step": 3024 }, { "epoch": 0.43, "learning_rate": 0.00011552795031055901, "loss": 3.4109, "step": 3025 }, { "epoch": 0.43, "learning_rate": 0.00011549906110067889, "loss": 3.3582, "step": 3026 }, { "epoch": 0.43, "learning_rate": 0.00011547017189079879, "loss": 3.2769, "step": 3027 }, { "epoch": 0.43, "learning_rate": 0.00011544128268091867, "loss": 3.4471, "step": 3028 }, { "epoch": 0.43, "learning_rate": 0.00011541239347103859, "loss": 3.3088, "step": 3029 }, { "epoch": 0.43, "learning_rate": 0.00011538350426115847, "loss": 3.434, "step": 3030 }, { "epoch": 0.43, "learning_rate": 0.00011535461505127836, "loss": 3.4577, "step": 3031 }, { "epoch": 0.43, "learning_rate": 0.00011532572584139824, "loss": 3.4747, "step": 3032 }, { "epoch": 0.43, "learning_rate": 0.00011529683663151814, "loss": 3.339, "step": 3033 }, { "epoch": 0.43, "learning_rate": 0.00011526794742163803, "loss": 3.2112, "step": 3034 }, { "epoch": 0.43, "learning_rate": 0.00011523905821175791, "loss": 3.2662, "step": 3035 }, { "epoch": 0.43, "learning_rate": 0.0001152101690018778, "loss": 3.3228, "step": 3036 }, { "epoch": 0.43, "learning_rate": 0.00011518127979199768, "loss": 3.4458, "step": 3037 }, { "epoch": 0.43, "learning_rate": 0.00011515239058211758, "loss": 3.4888, "step": 3038 }, { "epoch": 0.43, "learning_rate": 0.00011512350137223748, "loss": 3.4086, "step": 3039 }, { "epoch": 0.43, "learning_rate": 0.00011509461216235738, "loss": 3.3899, "step": 3040 }, { "epoch": 0.43, "learning_rate": 0.00011506572295247726, "loss": 3.2718, "step": 3041 }, { "epoch": 0.43, "learning_rate": 0.00011503683374259715, "loss": 3.3835, "step": 3042 }, { "epoch": 0.43, "learning_rate": 0.00011500794453271703, "loss": 3.3354, "step": 3043 }, { "epoch": 0.43, "learning_rate": 0.00011497905532283693, "loss": 3.3969, "step": 3044 }, { "epoch": 0.43, "learning_rate": 0.00011495016611295682, "loss": 3.2528, "step": 3045 }, { "epoch": 0.43, "learning_rate": 0.0001149212769030767, "loss": 3.2295, "step": 3046 }, { "epoch": 0.43, "learning_rate": 0.0001148923876931966, "loss": 3.4159, "step": 3047 }, { "epoch": 0.43, "learning_rate": 0.00011486349848331647, "loss": 3.4099, "step": 3048 }, { "epoch": 0.43, "learning_rate": 0.00011483460927343637, "loss": 3.3859, "step": 3049 }, { "epoch": 0.43, "learning_rate": 0.00011480572006355628, "loss": 3.381, "step": 3050 }, { "epoch": 0.43, "learning_rate": 0.00011477683085367617, "loss": 3.1571, "step": 3051 }, { "epoch": 0.43, "learning_rate": 0.00011474794164379605, "loss": 3.3107, "step": 3052 }, { "epoch": 0.43, "learning_rate": 0.00011471905243391594, "loss": 3.3559, "step": 3053 }, { "epoch": 0.43, "learning_rate": 0.00011469016322403582, "loss": 3.3384, "step": 3054 }, { "epoch": 0.43, "learning_rate": 0.00011466127401415572, "loss": 3.2054, "step": 3055 }, { "epoch": 0.44, "learning_rate": 0.00011463238480427561, "loss": 3.3614, "step": 3056 }, { "epoch": 0.44, "learning_rate": 0.00011460349559439549, "loss": 3.418, "step": 3057 }, { "epoch": 0.44, "learning_rate": 0.00011457460638451539, "loss": 3.2436, "step": 3058 }, { "epoch": 0.44, "learning_rate": 0.00011454571717463527, "loss": 3.3524, "step": 3059 }, { "epoch": 0.44, "learning_rate": 0.00011451682796475517, "loss": 3.5018, "step": 3060 }, { "epoch": 0.44, "learning_rate": 0.00011448793875487507, "loss": 3.4198, "step": 3061 }, { "epoch": 0.44, "learning_rate": 0.00011445904954499496, "loss": 3.183, "step": 3062 }, { "epoch": 0.44, "learning_rate": 0.00011443016033511484, "loss": 3.4058, "step": 3063 }, { "epoch": 0.44, "learning_rate": 0.00011440127112523474, "loss": 3.3522, "step": 3064 }, { "epoch": 0.44, "learning_rate": 0.00011437238191535462, "loss": 3.2152, "step": 3065 }, { "epoch": 0.44, "learning_rate": 0.00011434349270547451, "loss": 3.2856, "step": 3066 }, { "epoch": 0.44, "learning_rate": 0.0001143146034955944, "loss": 3.3922, "step": 3067 }, { "epoch": 0.44, "learning_rate": 0.00011428571428571428, "loss": 3.3337, "step": 3068 }, { "epoch": 0.44, "learning_rate": 0.00011425682507583418, "loss": 3.3856, "step": 3069 }, { "epoch": 0.44, "learning_rate": 0.00011422793586595406, "loss": 3.4281, "step": 3070 }, { "epoch": 0.44, "learning_rate": 0.00011419904665607396, "loss": 3.2898, "step": 3071 }, { "epoch": 0.44, "learning_rate": 0.00011417015744619386, "loss": 3.2965, "step": 3072 }, { "epoch": 0.44, "learning_rate": 0.00011414126823631375, "loss": 3.3825, "step": 3073 }, { "epoch": 0.44, "learning_rate": 0.00011411237902643363, "loss": 3.4231, "step": 3074 }, { "epoch": 0.44, "learning_rate": 0.00011408348981655353, "loss": 3.4035, "step": 3075 }, { "epoch": 0.44, "learning_rate": 0.00011405460060667341, "loss": 3.4066, "step": 3076 }, { "epoch": 0.44, "learning_rate": 0.0001140257113967933, "loss": 3.2967, "step": 3077 }, { "epoch": 0.44, "learning_rate": 0.0001139968221869132, "loss": 3.3218, "step": 3078 }, { "epoch": 0.44, "learning_rate": 0.00011396793297703307, "loss": 3.461, "step": 3079 }, { "epoch": 0.44, "learning_rate": 0.00011393904376715297, "loss": 3.4681, "step": 3080 }, { "epoch": 0.44, "learning_rate": 0.00011391015455727288, "loss": 3.2937, "step": 3081 }, { "epoch": 0.44, "learning_rate": 0.00011388126534739276, "loss": 3.3003, "step": 3082 }, { "epoch": 0.44, "learning_rate": 0.00011385237613751265, "loss": 3.4808, "step": 3083 }, { "epoch": 0.44, "learning_rate": 0.00011382348692763254, "loss": 3.3351, "step": 3084 }, { "epoch": 0.44, "learning_rate": 0.00011379459771775242, "loss": 3.3666, "step": 3085 }, { "epoch": 0.44, "learning_rate": 0.00011376570850787232, "loss": 3.3265, "step": 3086 }, { "epoch": 0.44, "learning_rate": 0.0001137368192979922, "loss": 3.1792, "step": 3087 }, { "epoch": 0.44, "learning_rate": 0.00011370793008811209, "loss": 3.3736, "step": 3088 }, { "epoch": 0.44, "learning_rate": 0.00011367904087823199, "loss": 3.2193, "step": 3089 }, { "epoch": 0.44, "learning_rate": 0.00011365015166835187, "loss": 3.3988, "step": 3090 }, { "epoch": 0.44, "learning_rate": 0.00011362126245847176, "loss": 3.3794, "step": 3091 }, { "epoch": 0.44, "learning_rate": 0.00011359237324859167, "loss": 3.2979, "step": 3092 }, { "epoch": 0.44, "learning_rate": 0.00011356348403871155, "loss": 3.3949, "step": 3093 }, { "epoch": 0.44, "learning_rate": 0.00011353459482883144, "loss": 3.3369, "step": 3094 }, { "epoch": 0.44, "learning_rate": 0.00011350570561895134, "loss": 3.2751, "step": 3095 }, { "epoch": 0.44, "learning_rate": 0.00011347681640907122, "loss": 3.4321, "step": 3096 }, { "epoch": 0.44, "learning_rate": 0.00011344792719919111, "loss": 3.3877, "step": 3097 }, { "epoch": 0.44, "learning_rate": 0.00011341903798931099, "loss": 3.3482, "step": 3098 }, { "epoch": 0.44, "learning_rate": 0.00011339014877943088, "loss": 3.2839, "step": 3099 }, { "epoch": 0.44, "learning_rate": 0.00011336125956955076, "loss": 3.3243, "step": 3100 }, { "epoch": 0.44, "learning_rate": 0.00011333237035967066, "loss": 3.4563, "step": 3101 }, { "epoch": 0.44, "learning_rate": 0.00011330348114979057, "loss": 3.4088, "step": 3102 }, { "epoch": 0.44, "learning_rate": 0.00011327459193991046, "loss": 3.281, "step": 3103 }, { "epoch": 0.44, "learning_rate": 0.00011324570273003034, "loss": 3.2996, "step": 3104 }, { "epoch": 0.44, "learning_rate": 0.00011321681352015023, "loss": 3.1804, "step": 3105 }, { "epoch": 0.44, "learning_rate": 0.00011318792431027013, "loss": 3.3312, "step": 3106 }, { "epoch": 0.44, "learning_rate": 0.00011315903510039001, "loss": 3.4377, "step": 3107 }, { "epoch": 0.44, "learning_rate": 0.0001131301458905099, "loss": 3.4095, "step": 3108 }, { "epoch": 0.44, "learning_rate": 0.00011310125668062978, "loss": 3.3624, "step": 3109 }, { "epoch": 0.44, "learning_rate": 0.00011307236747074968, "loss": 3.3228, "step": 3110 }, { "epoch": 0.44, "learning_rate": 0.00011304347826086956, "loss": 3.43, "step": 3111 }, { "epoch": 0.44, "learning_rate": 0.00011301458905098945, "loss": 3.3281, "step": 3112 }, { "epoch": 0.44, "learning_rate": 0.00011298569984110936, "loss": 3.243, "step": 3113 }, { "epoch": 0.44, "learning_rate": 0.00011295681063122925, "loss": 3.4537, "step": 3114 }, { "epoch": 0.44, "learning_rate": 0.00011292792142134913, "loss": 3.393, "step": 3115 }, { "epoch": 0.44, "learning_rate": 0.00011289903221146902, "loss": 3.3423, "step": 3116 }, { "epoch": 0.44, "learning_rate": 0.00011287014300158892, "loss": 3.4138, "step": 3117 }, { "epoch": 0.44, "learning_rate": 0.0001128412537917088, "loss": 3.2931, "step": 3118 }, { "epoch": 0.44, "learning_rate": 0.00011281236458182869, "loss": 3.4065, "step": 3119 }, { "epoch": 0.44, "learning_rate": 0.00011278347537194857, "loss": 3.2166, "step": 3120 }, { "epoch": 0.44, "learning_rate": 0.00011275458616206847, "loss": 3.3086, "step": 3121 }, { "epoch": 0.44, "learning_rate": 0.00011272569695218835, "loss": 3.3501, "step": 3122 }, { "epoch": 0.44, "learning_rate": 0.00011269680774230827, "loss": 3.5366, "step": 3123 }, { "epoch": 0.44, "learning_rate": 0.00011266791853242815, "loss": 3.4074, "step": 3124 }, { "epoch": 0.44, "learning_rate": 0.00011263902932254804, "loss": 3.1839, "step": 3125 }, { "epoch": 0.45, "learning_rate": 0.00011261014011266792, "loss": 3.3956, "step": 3126 }, { "epoch": 0.45, "learning_rate": 0.00011258125090278782, "loss": 3.3401, "step": 3127 }, { "epoch": 0.45, "learning_rate": 0.00011255236169290771, "loss": 3.3493, "step": 3128 }, { "epoch": 0.45, "learning_rate": 0.00011252347248302759, "loss": 3.38, "step": 3129 }, { "epoch": 0.45, "learning_rate": 0.00011249458327314748, "loss": 3.2987, "step": 3130 }, { "epoch": 0.45, "learning_rate": 0.00011246569406326736, "loss": 3.2619, "step": 3131 }, { "epoch": 0.45, "learning_rate": 0.00011243680485338726, "loss": 3.3791, "step": 3132 }, { "epoch": 0.45, "learning_rate": 0.00011240791564350714, "loss": 3.401, "step": 3133 }, { "epoch": 0.45, "learning_rate": 0.00011237902643362706, "loss": 3.1383, "step": 3134 }, { "epoch": 0.45, "learning_rate": 0.00011235013722374694, "loss": 3.3987, "step": 3135 }, { "epoch": 0.45, "learning_rate": 0.00011232124801386683, "loss": 3.248, "step": 3136 }, { "epoch": 0.45, "learning_rate": 0.00011229235880398671, "loss": 3.2962, "step": 3137 }, { "epoch": 0.45, "learning_rate": 0.00011226346959410661, "loss": 3.2754, "step": 3138 }, { "epoch": 0.45, "learning_rate": 0.0001122345803842265, "loss": 3.4079, "step": 3139 }, { "epoch": 0.45, "learning_rate": 0.00011220569117434638, "loss": 3.4831, "step": 3140 }, { "epoch": 0.45, "learning_rate": 0.00011217680196446628, "loss": 3.4135, "step": 3141 }, { "epoch": 0.45, "learning_rate": 0.00011214791275458616, "loss": 3.3875, "step": 3142 }, { "epoch": 0.45, "learning_rate": 0.00011211902354470605, "loss": 3.2953, "step": 3143 }, { "epoch": 0.45, "learning_rate": 0.00011209013433482596, "loss": 3.4627, "step": 3144 }, { "epoch": 0.45, "learning_rate": 0.00011206124512494585, "loss": 3.2988, "step": 3145 }, { "epoch": 0.45, "learning_rate": 0.00011203235591506573, "loss": 3.2434, "step": 3146 }, { "epoch": 0.45, "learning_rate": 0.00011200346670518562, "loss": 3.4888, "step": 3147 }, { "epoch": 0.45, "learning_rate": 0.0001119745774953055, "loss": 3.3249, "step": 3148 }, { "epoch": 0.45, "learning_rate": 0.0001119456882854254, "loss": 3.4025, "step": 3149 }, { "epoch": 0.45, "learning_rate": 0.00011191679907554529, "loss": 3.4046, "step": 3150 }, { "epoch": 0.45, "learning_rate": 0.00011188790986566517, "loss": 3.2726, "step": 3151 }, { "epoch": 0.45, "learning_rate": 0.00011185902065578507, "loss": 3.3567, "step": 3152 }, { "epoch": 0.45, "learning_rate": 0.00011183013144590495, "loss": 3.3017, "step": 3153 }, { "epoch": 0.45, "learning_rate": 0.00011180124223602484, "loss": 3.3534, "step": 3154 }, { "epoch": 0.45, "learning_rate": 0.00011177235302614475, "loss": 3.3808, "step": 3155 }, { "epoch": 0.45, "learning_rate": 0.00011174346381626464, "loss": 3.2337, "step": 3156 }, { "epoch": 0.45, "learning_rate": 0.00011171457460638452, "loss": 3.3304, "step": 3157 }, { "epoch": 0.45, "learning_rate": 0.00011168568539650442, "loss": 3.2466, "step": 3158 }, { "epoch": 0.45, "learning_rate": 0.0001116567961866243, "loss": 3.4152, "step": 3159 }, { "epoch": 0.45, "learning_rate": 0.00011162790697674419, "loss": 3.2942, "step": 3160 }, { "epoch": 0.45, "learning_rate": 0.00011159901776686408, "loss": 3.3444, "step": 3161 }, { "epoch": 0.45, "learning_rate": 0.00011157012855698396, "loss": 3.3307, "step": 3162 }, { "epoch": 0.45, "learning_rate": 0.00011154123934710386, "loss": 3.3356, "step": 3163 }, { "epoch": 0.45, "learning_rate": 0.00011151235013722374, "loss": 3.4839, "step": 3164 }, { "epoch": 0.45, "learning_rate": 0.00011148346092734366, "loss": 3.4511, "step": 3165 }, { "epoch": 0.45, "learning_rate": 0.00011145457171746354, "loss": 3.3769, "step": 3166 }, { "epoch": 0.45, "learning_rate": 0.00011142568250758343, "loss": 3.2655, "step": 3167 }, { "epoch": 0.45, "learning_rate": 0.00011139679329770331, "loss": 3.3711, "step": 3168 }, { "epoch": 0.45, "eval_loss": 3.5232768058776855, "eval_runtime": 472.4363, "eval_samples_per_second": 43.365, "eval_steps_per_second": 14.455, "step": 3168 }, { "epoch": 0.45, "learning_rate": 0.00011136790408782321, "loss": 3.407, "step": 3169 }, { "epoch": 0.45, "learning_rate": 0.00011133901487794309, "loss": 3.4157, "step": 3170 }, { "epoch": 0.45, "learning_rate": 0.00011131012566806298, "loss": 3.2203, "step": 3171 }, { "epoch": 0.45, "learning_rate": 0.00011128123645818288, "loss": 3.3327, "step": 3172 }, { "epoch": 0.45, "learning_rate": 0.00011125234724830276, "loss": 3.2629, "step": 3173 }, { "epoch": 0.45, "learning_rate": 0.00011122345803842265, "loss": 3.3861, "step": 3174 }, { "epoch": 0.45, "learning_rate": 0.00011119456882854256, "loss": 3.3472, "step": 3175 }, { "epoch": 0.45, "learning_rate": 0.00011116567961866244, "loss": 3.4091, "step": 3176 }, { "epoch": 0.45, "learning_rate": 0.00011113679040878233, "loss": 3.2157, "step": 3177 }, { "epoch": 0.45, "learning_rate": 0.00011110790119890222, "loss": 3.3241, "step": 3178 }, { "epoch": 0.45, "learning_rate": 0.0001110790119890221, "loss": 3.3768, "step": 3179 }, { "epoch": 0.45, "learning_rate": 0.000111050122779142, "loss": 3.3307, "step": 3180 }, { "epoch": 0.45, "learning_rate": 0.00011102123356926188, "loss": 3.4113, "step": 3181 }, { "epoch": 0.45, "learning_rate": 0.00011099234435938177, "loss": 3.2805, "step": 3182 }, { "epoch": 0.45, "learning_rate": 0.00011096345514950167, "loss": 3.4032, "step": 3183 }, { "epoch": 0.45, "learning_rate": 0.00011093456593962155, "loss": 3.0521, "step": 3184 }, { "epoch": 0.45, "learning_rate": 0.00011090567672974144, "loss": 3.2905, "step": 3185 }, { "epoch": 0.45, "learning_rate": 0.00011087678751986135, "loss": 3.2763, "step": 3186 }, { "epoch": 0.45, "learning_rate": 0.00011084789830998123, "loss": 3.2978, "step": 3187 }, { "epoch": 0.45, "learning_rate": 0.00011081900910010112, "loss": 3.5079, "step": 3188 }, { "epoch": 0.45, "learning_rate": 0.00011079011989022102, "loss": 3.3663, "step": 3189 }, { "epoch": 0.45, "learning_rate": 0.0001107612306803409, "loss": 3.3789, "step": 3190 }, { "epoch": 0.45, "learning_rate": 0.00011073234147046079, "loss": 3.3341, "step": 3191 }, { "epoch": 0.45, "learning_rate": 0.00011070345226058067, "loss": 3.3016, "step": 3192 }, { "epoch": 0.45, "learning_rate": 0.00011067456305070056, "loss": 3.3406, "step": 3193 }, { "epoch": 0.45, "learning_rate": 0.00011064567384082046, "loss": 3.239, "step": 3194 }, { "epoch": 0.45, "learning_rate": 0.00011061678463094034, "loss": 3.3135, "step": 3195 }, { "epoch": 0.46, "learning_rate": 0.00011058789542106025, "loss": 3.4249, "step": 3196 }, { "epoch": 0.46, "learning_rate": 0.00011055900621118014, "loss": 3.3227, "step": 3197 }, { "epoch": 0.46, "learning_rate": 0.00011053011700130002, "loss": 3.2321, "step": 3198 }, { "epoch": 0.46, "learning_rate": 0.00011050122779141991, "loss": 3.3633, "step": 3199 }, { "epoch": 0.46, "learning_rate": 0.00011047233858153981, "loss": 3.1829, "step": 3200 }, { "epoch": 0.46, "learning_rate": 0.00011044344937165969, "loss": 3.4205, "step": 3201 }, { "epoch": 0.46, "learning_rate": 0.00011041456016177958, "loss": 3.3592, "step": 3202 }, { "epoch": 0.46, "learning_rate": 0.00011038567095189946, "loss": 3.295, "step": 3203 }, { "epoch": 0.46, "learning_rate": 0.00011035678174201936, "loss": 3.4284, "step": 3204 }, { "epoch": 0.46, "learning_rate": 0.00011032789253213925, "loss": 3.2576, "step": 3205 }, { "epoch": 0.46, "learning_rate": 0.00011029900332225913, "loss": 3.2283, "step": 3206 }, { "epoch": 0.46, "learning_rate": 0.00011027011411237904, "loss": 3.3789, "step": 3207 }, { "epoch": 0.46, "learning_rate": 0.00011024122490249893, "loss": 3.3567, "step": 3208 }, { "epoch": 0.46, "learning_rate": 0.00011021233569261881, "loss": 3.2874, "step": 3209 }, { "epoch": 0.46, "learning_rate": 0.0001101834464827387, "loss": 3.2771, "step": 3210 }, { "epoch": 0.46, "learning_rate": 0.0001101545572728586, "loss": 3.4531, "step": 3211 }, { "epoch": 0.46, "learning_rate": 0.00011012566806297848, "loss": 3.4304, "step": 3212 }, { "epoch": 0.46, "learning_rate": 0.00011009677885309837, "loss": 3.3653, "step": 3213 }, { "epoch": 0.46, "learning_rate": 0.00011006788964321825, "loss": 3.198, "step": 3214 }, { "epoch": 0.46, "learning_rate": 0.00011003900043333815, "loss": 3.4138, "step": 3215 }, { "epoch": 0.46, "learning_rate": 0.00011001011122345804, "loss": 3.3546, "step": 3216 }, { "epoch": 0.46, "learning_rate": 0.00010998122201357795, "loss": 3.361, "step": 3217 }, { "epoch": 0.46, "learning_rate": 0.00010995233280369783, "loss": 3.4561, "step": 3218 }, { "epoch": 0.46, "learning_rate": 0.00010992344359381772, "loss": 3.3528, "step": 3219 }, { "epoch": 0.46, "learning_rate": 0.0001098945543839376, "loss": 3.2822, "step": 3220 }, { "epoch": 0.46, "learning_rate": 0.0001098656651740575, "loss": 3.138, "step": 3221 }, { "epoch": 0.46, "learning_rate": 0.00010983677596417739, "loss": 3.3132, "step": 3222 }, { "epoch": 0.46, "learning_rate": 0.00010980788675429727, "loss": 3.1742, "step": 3223 }, { "epoch": 0.46, "learning_rate": 0.00010977899754441716, "loss": 3.4067, "step": 3224 }, { "epoch": 0.46, "learning_rate": 0.00010975010833453704, "loss": 3.121, "step": 3225 }, { "epoch": 0.46, "learning_rate": 0.00010972121912465694, "loss": 3.2398, "step": 3226 }, { "epoch": 0.46, "learning_rate": 0.00010969232991477683, "loss": 3.3237, "step": 3227 }, { "epoch": 0.46, "learning_rate": 0.00010966344070489674, "loss": 3.2556, "step": 3228 }, { "epoch": 0.46, "learning_rate": 0.00010963455149501662, "loss": 3.3521, "step": 3229 }, { "epoch": 0.46, "learning_rate": 0.00010960566228513651, "loss": 3.2995, "step": 3230 }, { "epoch": 0.46, "learning_rate": 0.0001095767730752564, "loss": 3.2207, "step": 3231 }, { "epoch": 0.46, "learning_rate": 0.00010954788386537629, "loss": 3.2197, "step": 3232 }, { "epoch": 0.46, "learning_rate": 0.00010951899465549618, "loss": 3.2293, "step": 3233 }, { "epoch": 0.46, "learning_rate": 0.00010949010544561606, "loss": 3.2608, "step": 3234 }, { "epoch": 0.46, "learning_rate": 0.00010946121623573596, "loss": 3.0432, "step": 3235 }, { "epoch": 0.46, "learning_rate": 0.00010943232702585584, "loss": 3.3144, "step": 3236 }, { "epoch": 0.46, "learning_rate": 0.00010940343781597573, "loss": 3.304, "step": 3237 }, { "epoch": 0.46, "learning_rate": 0.00010937454860609564, "loss": 3.3748, "step": 3238 }, { "epoch": 0.46, "learning_rate": 0.00010934565939621553, "loss": 3.3572, "step": 3239 }, { "epoch": 0.46, "learning_rate": 0.00010931677018633541, "loss": 3.3201, "step": 3240 }, { "epoch": 0.46, "learning_rate": 0.0001092878809764553, "loss": 3.3465, "step": 3241 }, { "epoch": 0.46, "learning_rate": 0.00010925899176657519, "loss": 3.4183, "step": 3242 }, { "epoch": 0.46, "learning_rate": 0.00010923010255669508, "loss": 3.2054, "step": 3243 }, { "epoch": 0.46, "learning_rate": 0.00010920121334681497, "loss": 3.3875, "step": 3244 }, { "epoch": 0.46, "learning_rate": 0.00010917232413693485, "loss": 3.2239, "step": 3245 }, { "epoch": 0.46, "learning_rate": 0.00010914343492705475, "loss": 3.4065, "step": 3246 }, { "epoch": 0.46, "learning_rate": 0.00010911454571717463, "loss": 3.3298, "step": 3247 }, { "epoch": 0.46, "learning_rate": 0.00010908565650729452, "loss": 3.503, "step": 3248 }, { "epoch": 0.46, "learning_rate": 0.00010905676729741443, "loss": 3.1974, "step": 3249 }, { "epoch": 0.46, "learning_rate": 0.00010902787808753432, "loss": 3.2414, "step": 3250 }, { "epoch": 0.46, "learning_rate": 0.0001089989888776542, "loss": 3.2385, "step": 3251 }, { "epoch": 0.46, "learning_rate": 0.0001089700996677741, "loss": 3.3298, "step": 3252 }, { "epoch": 0.46, "learning_rate": 0.00010894121045789398, "loss": 3.3801, "step": 3253 }, { "epoch": 0.46, "learning_rate": 0.00010891232124801387, "loss": 3.1857, "step": 3254 }, { "epoch": 0.46, "learning_rate": 0.00010888343203813376, "loss": 3.2238, "step": 3255 }, { "epoch": 0.46, "learning_rate": 0.00010885454282825365, "loss": 3.3985, "step": 3256 }, { "epoch": 0.46, "learning_rate": 0.00010882565361837354, "loss": 3.4367, "step": 3257 }, { "epoch": 0.46, "learning_rate": 0.00010879676440849342, "loss": 3.287, "step": 3258 }, { "epoch": 0.46, "learning_rate": 0.00010876787519861334, "loss": 3.4034, "step": 3259 }, { "epoch": 0.46, "learning_rate": 0.00010873898598873322, "loss": 3.3362, "step": 3260 }, { "epoch": 0.46, "learning_rate": 0.00010871009677885311, "loss": 3.3303, "step": 3261 }, { "epoch": 0.46, "learning_rate": 0.000108681207568973, "loss": 3.3775, "step": 3262 }, { "epoch": 0.46, "learning_rate": 0.00010865231835909289, "loss": 3.3738, "step": 3263 }, { "epoch": 0.46, "learning_rate": 0.00010862342914921277, "loss": 3.2712, "step": 3264 }, { "epoch": 0.46, "learning_rate": 0.00010859453993933266, "loss": 3.3352, "step": 3265 }, { "epoch": 0.47, "learning_rate": 0.00010856565072945256, "loss": 3.3474, "step": 3266 }, { "epoch": 0.47, "learning_rate": 0.00010853676151957244, "loss": 3.2761, "step": 3267 }, { "epoch": 0.47, "learning_rate": 0.00010850787230969233, "loss": 3.2135, "step": 3268 }, { "epoch": 0.47, "learning_rate": 0.00010847898309981221, "loss": 3.3481, "step": 3269 }, { "epoch": 0.47, "learning_rate": 0.00010845009388993213, "loss": 3.3173, "step": 3270 }, { "epoch": 0.47, "learning_rate": 0.00010842120468005201, "loss": 3.2898, "step": 3271 }, { "epoch": 0.47, "learning_rate": 0.0001083923154701719, "loss": 3.2506, "step": 3272 }, { "epoch": 0.47, "learning_rate": 0.00010836342626029179, "loss": 3.2026, "step": 3273 }, { "epoch": 0.47, "learning_rate": 0.00010833453705041168, "loss": 3.3621, "step": 3274 }, { "epoch": 0.47, "learning_rate": 0.00010830564784053156, "loss": 3.2253, "step": 3275 }, { "epoch": 0.47, "learning_rate": 0.00010827675863065145, "loss": 3.3631, "step": 3276 }, { "epoch": 0.47, "learning_rate": 0.00010824786942077135, "loss": 3.4034, "step": 3277 }, { "epoch": 0.47, "learning_rate": 0.00010821898021089123, "loss": 3.3907, "step": 3278 }, { "epoch": 0.47, "learning_rate": 0.00010819009100101112, "loss": 3.3238, "step": 3279 }, { "epoch": 0.47, "learning_rate": 0.00010816120179113103, "loss": 3.4008, "step": 3280 }, { "epoch": 0.47, "learning_rate": 0.00010813231258125092, "loss": 3.1676, "step": 3281 }, { "epoch": 0.47, "learning_rate": 0.0001081034233713708, "loss": 3.1822, "step": 3282 }, { "epoch": 0.47, "learning_rate": 0.0001080745341614907, "loss": 3.2965, "step": 3283 }, { "epoch": 0.47, "learning_rate": 0.00010804564495161058, "loss": 3.406, "step": 3284 }, { "epoch": 0.47, "learning_rate": 0.00010801675574173047, "loss": 3.2503, "step": 3285 }, { "epoch": 0.47, "learning_rate": 0.00010798786653185035, "loss": 3.3469, "step": 3286 }, { "epoch": 0.47, "learning_rate": 0.00010795897732197025, "loss": 3.4601, "step": 3287 }, { "epoch": 0.47, "learning_rate": 0.00010793008811209014, "loss": 3.3476, "step": 3288 }, { "epoch": 0.47, "learning_rate": 0.00010790119890221002, "loss": 3.1634, "step": 3289 }, { "epoch": 0.47, "learning_rate": 0.00010787230969232991, "loss": 3.2858, "step": 3290 }, { "epoch": 0.47, "learning_rate": 0.00010784342048244982, "loss": 3.3034, "step": 3291 }, { "epoch": 0.47, "learning_rate": 0.00010781453127256971, "loss": 3.2368, "step": 3292 }, { "epoch": 0.47, "learning_rate": 0.0001077856420626896, "loss": 3.2122, "step": 3293 }, { "epoch": 0.47, "learning_rate": 0.00010775675285280949, "loss": 3.2798, "step": 3294 }, { "epoch": 0.47, "learning_rate": 0.00010772786364292937, "loss": 3.2974, "step": 3295 }, { "epoch": 0.47, "learning_rate": 0.00010769897443304926, "loss": 3.3698, "step": 3296 }, { "epoch": 0.47, "learning_rate": 0.00010767008522316914, "loss": 3.3504, "step": 3297 }, { "epoch": 0.47, "learning_rate": 0.00010764119601328904, "loss": 3.1445, "step": 3298 }, { "epoch": 0.47, "learning_rate": 0.00010761230680340893, "loss": 3.3549, "step": 3299 }, { "epoch": 0.47, "learning_rate": 0.00010758341759352881, "loss": 3.1997, "step": 3300 }, { "epoch": 0.47, "learning_rate": 0.00010755452838364872, "loss": 3.2635, "step": 3301 }, { "epoch": 0.47, "learning_rate": 0.00010752563917376861, "loss": 3.3632, "step": 3302 }, { "epoch": 0.47, "learning_rate": 0.00010749674996388849, "loss": 3.2498, "step": 3303 }, { "epoch": 0.47, "learning_rate": 0.00010746786075400839, "loss": 3.2627, "step": 3304 }, { "epoch": 0.47, "learning_rate": 0.00010743897154412828, "loss": 3.3686, "step": 3305 }, { "epoch": 0.47, "learning_rate": 0.00010741008233424816, "loss": 3.2816, "step": 3306 }, { "epoch": 0.47, "learning_rate": 0.00010738119312436805, "loss": 3.2955, "step": 3307 }, { "epoch": 0.47, "learning_rate": 0.00010735230391448793, "loss": 3.3908, "step": 3308 }, { "epoch": 0.47, "learning_rate": 0.00010732341470460783, "loss": 3.282, "step": 3309 }, { "epoch": 0.47, "learning_rate": 0.00010729452549472772, "loss": 3.3469, "step": 3310 }, { "epoch": 0.47, "learning_rate": 0.0001072656362848476, "loss": 3.2559, "step": 3311 }, { "epoch": 0.47, "learning_rate": 0.00010723674707496751, "loss": 3.3632, "step": 3312 }, { "epoch": 0.47, "learning_rate": 0.0001072078578650874, "loss": 3.3206, "step": 3313 }, { "epoch": 0.47, "learning_rate": 0.00010717896865520728, "loss": 3.3854, "step": 3314 }, { "epoch": 0.47, "learning_rate": 0.00010715007944532718, "loss": 3.2409, "step": 3315 }, { "epoch": 0.47, "learning_rate": 0.00010712119023544707, "loss": 3.3704, "step": 3316 }, { "epoch": 0.47, "learning_rate": 0.00010709230102556695, "loss": 3.202, "step": 3317 }, { "epoch": 0.47, "learning_rate": 0.00010706341181568685, "loss": 3.3145, "step": 3318 }, { "epoch": 0.47, "learning_rate": 0.00010703452260580673, "loss": 3.2447, "step": 3319 }, { "epoch": 0.47, "learning_rate": 0.00010700563339592662, "loss": 3.4438, "step": 3320 }, { "epoch": 0.47, "learning_rate": 0.00010697674418604651, "loss": 3.2226, "step": 3321 }, { "epoch": 0.47, "learning_rate": 0.00010694785497616642, "loss": 3.3073, "step": 3322 }, { "epoch": 0.47, "learning_rate": 0.0001069189657662863, "loss": 3.31, "step": 3323 }, { "epoch": 0.47, "learning_rate": 0.0001068900765564062, "loss": 3.1201, "step": 3324 }, { "epoch": 0.47, "learning_rate": 0.00010686118734652608, "loss": 3.4242, "step": 3325 }, { "epoch": 0.47, "learning_rate": 0.00010683229813664597, "loss": 3.2957, "step": 3326 }, { "epoch": 0.47, "learning_rate": 0.00010680340892676586, "loss": 3.3738, "step": 3327 }, { "epoch": 0.47, "learning_rate": 0.00010677451971688574, "loss": 3.1818, "step": 3328 }, { "epoch": 0.47, "learning_rate": 0.00010674563050700564, "loss": 3.3446, "step": 3329 }, { "epoch": 0.47, "learning_rate": 0.00010671674129712552, "loss": 3.3844, "step": 3330 }, { "epoch": 0.47, "learning_rate": 0.00010668785208724541, "loss": 3.3296, "step": 3331 }, { "epoch": 0.47, "learning_rate": 0.0001066589628773653, "loss": 3.3716, "step": 3332 }, { "epoch": 0.47, "learning_rate": 0.00010663007366748521, "loss": 3.1275, "step": 3333 }, { "epoch": 0.47, "learning_rate": 0.00010660118445760509, "loss": 3.0919, "step": 3334 }, { "epoch": 0.47, "learning_rate": 0.00010657229524772499, "loss": 3.3065, "step": 3335 }, { "epoch": 0.47, "learning_rate": 0.00010654340603784487, "loss": 3.3737, "step": 3336 }, { "epoch": 0.48, "learning_rate": 0.00010651451682796476, "loss": 3.3247, "step": 3337 }, { "epoch": 0.48, "learning_rate": 0.00010648562761808465, "loss": 3.0945, "step": 3338 }, { "epoch": 0.48, "learning_rate": 0.00010645673840820453, "loss": 3.3309, "step": 3339 }, { "epoch": 0.48, "learning_rate": 0.00010642784919832443, "loss": 3.3925, "step": 3340 }, { "epoch": 0.48, "learning_rate": 0.00010639895998844431, "loss": 3.3906, "step": 3341 }, { "epoch": 0.48, "learning_rate": 0.0001063700707785642, "loss": 3.3281, "step": 3342 }, { "epoch": 0.48, "learning_rate": 0.00010634118156868411, "loss": 3.3451, "step": 3343 }, { "epoch": 0.48, "learning_rate": 0.000106312292358804, "loss": 3.4958, "step": 3344 }, { "epoch": 0.48, "learning_rate": 0.00010628340314892388, "loss": 3.3795, "step": 3345 }, { "epoch": 0.48, "learning_rate": 0.00010625451393904378, "loss": 3.1946, "step": 3346 }, { "epoch": 0.48, "learning_rate": 0.00010622562472916366, "loss": 3.2881, "step": 3347 }, { "epoch": 0.48, "learning_rate": 0.00010619673551928355, "loss": 3.3135, "step": 3348 }, { "epoch": 0.48, "learning_rate": 0.00010616784630940345, "loss": 3.3223, "step": 3349 }, { "epoch": 0.48, "learning_rate": 0.00010613895709952333, "loss": 3.4084, "step": 3350 }, { "epoch": 0.48, "learning_rate": 0.00010611006788964322, "loss": 3.1788, "step": 3351 }, { "epoch": 0.48, "learning_rate": 0.0001060811786797631, "loss": 3.2679, "step": 3352 }, { "epoch": 0.48, "learning_rate": 0.000106052289469883, "loss": 3.2749, "step": 3353 }, { "epoch": 0.48, "learning_rate": 0.0001060234002600029, "loss": 3.2444, "step": 3354 }, { "epoch": 0.48, "learning_rate": 0.0001059945110501228, "loss": 3.3425, "step": 3355 }, { "epoch": 0.48, "learning_rate": 0.00010596562184024268, "loss": 3.2667, "step": 3356 }, { "epoch": 0.48, "learning_rate": 0.00010593673263036257, "loss": 3.4639, "step": 3357 }, { "epoch": 0.48, "learning_rate": 0.00010590784342048245, "loss": 3.283, "step": 3358 }, { "epoch": 0.48, "learning_rate": 0.00010587895421060234, "loss": 3.3782, "step": 3359 }, { "epoch": 0.48, "learning_rate": 0.00010585006500072224, "loss": 3.3153, "step": 3360 }, { "epoch": 0.48, "learning_rate": 0.00010582117579084212, "loss": 3.3003, "step": 3361 }, { "epoch": 0.48, "learning_rate": 0.00010579228658096201, "loss": 3.3667, "step": 3362 }, { "epoch": 0.48, "learning_rate": 0.00010576339737108189, "loss": 3.1026, "step": 3363 }, { "epoch": 0.48, "learning_rate": 0.00010573450816120181, "loss": 3.3164, "step": 3364 }, { "epoch": 0.48, "learning_rate": 0.00010570561895132169, "loss": 3.3183, "step": 3365 }, { "epoch": 0.48, "learning_rate": 0.00010567672974144159, "loss": 3.2863, "step": 3366 }, { "epoch": 0.48, "learning_rate": 0.00010564784053156147, "loss": 3.4557, "step": 3367 }, { "epoch": 0.48, "learning_rate": 0.00010561895132168136, "loss": 3.3675, "step": 3368 }, { "epoch": 0.48, "learning_rate": 0.00010559006211180124, "loss": 3.2719, "step": 3369 }, { "epoch": 0.48, "learning_rate": 0.00010556117290192113, "loss": 3.3039, "step": 3370 }, { "epoch": 0.48, "learning_rate": 0.00010553228369204103, "loss": 3.3442, "step": 3371 }, { "epoch": 0.48, "learning_rate": 0.00010550339448216091, "loss": 3.3411, "step": 3372 }, { "epoch": 0.48, "learning_rate": 0.0001054745052722808, "loss": 3.2531, "step": 3373 }, { "epoch": 0.48, "learning_rate": 0.00010544561606240071, "loss": 3.3079, "step": 3374 }, { "epoch": 0.48, "learning_rate": 0.0001054167268525206, "loss": 3.2034, "step": 3375 }, { "epoch": 0.48, "learning_rate": 0.00010538783764264048, "loss": 3.1568, "step": 3376 }, { "epoch": 0.48, "learning_rate": 0.00010535894843276038, "loss": 3.3041, "step": 3377 }, { "epoch": 0.48, "learning_rate": 0.00010533005922288026, "loss": 3.2839, "step": 3378 }, { "epoch": 0.48, "learning_rate": 0.00010530117001300015, "loss": 3.2965, "step": 3379 }, { "epoch": 0.48, "learning_rate": 0.00010527228080312003, "loss": 3.2, "step": 3380 }, { "epoch": 0.48, "learning_rate": 0.00010524339159323993, "loss": 3.2672, "step": 3381 }, { "epoch": 0.48, "learning_rate": 0.00010521450238335982, "loss": 3.3248, "step": 3382 }, { "epoch": 0.48, "learning_rate": 0.0001051856131734797, "loss": 3.4735, "step": 3383 }, { "epoch": 0.48, "learning_rate": 0.0001051567239635996, "loss": 3.4084, "step": 3384 }, { "epoch": 0.48, "learning_rate": 0.0001051278347537195, "loss": 3.1859, "step": 3385 }, { "epoch": 0.48, "learning_rate": 0.0001050989455438394, "loss": 3.2975, "step": 3386 }, { "epoch": 0.48, "learning_rate": 0.00010507005633395928, "loss": 3.4183, "step": 3387 }, { "epoch": 0.48, "learning_rate": 0.00010504116712407917, "loss": 3.276, "step": 3388 }, { "epoch": 0.48, "learning_rate": 0.00010501227791419905, "loss": 3.2373, "step": 3389 }, { "epoch": 0.48, "learning_rate": 0.00010498338870431894, "loss": 3.3277, "step": 3390 }, { "epoch": 0.48, "learning_rate": 0.00010495449949443882, "loss": 3.2967, "step": 3391 }, { "epoch": 0.48, "learning_rate": 0.00010492561028455872, "loss": 3.476, "step": 3392 }, { "epoch": 0.48, "learning_rate": 0.00010489672107467861, "loss": 3.1755, "step": 3393 }, { "epoch": 0.48, "learning_rate": 0.00010486783186479849, "loss": 3.3237, "step": 3394 }, { "epoch": 0.48, "learning_rate": 0.0001048389426549184, "loss": 3.3224, "step": 3395 }, { "epoch": 0.48, "learning_rate": 0.00010481005344503829, "loss": 3.212, "step": 3396 }, { "epoch": 0.48, "learning_rate": 0.00010478116423515819, "loss": 3.2954, "step": 3397 }, { "epoch": 0.48, "learning_rate": 0.00010475227502527807, "loss": 3.4549, "step": 3398 }, { "epoch": 0.48, "learning_rate": 0.00010472338581539796, "loss": 3.2889, "step": 3399 }, { "epoch": 0.48, "learning_rate": 0.00010469449660551784, "loss": 3.3868, "step": 3400 }, { "epoch": 0.48, "learning_rate": 0.00010466560739563773, "loss": 3.3967, "step": 3401 }, { "epoch": 0.48, "learning_rate": 0.00010463671818575762, "loss": 3.2687, "step": 3402 }, { "epoch": 0.48, "learning_rate": 0.00010460782897587751, "loss": 3.2324, "step": 3403 }, { "epoch": 0.48, "learning_rate": 0.0001045789397659974, "loss": 3.3779, "step": 3404 }, { "epoch": 0.48, "learning_rate": 0.00010455005055611728, "loss": 3.2455, "step": 3405 }, { "epoch": 0.48, "learning_rate": 0.00010452116134623719, "loss": 3.3532, "step": 3406 }, { "epoch": 0.49, "learning_rate": 0.00010449227213635708, "loss": 3.2354, "step": 3407 }, { "epoch": 0.49, "learning_rate": 0.00010446338292647698, "loss": 3.3761, "step": 3408 }, { "epoch": 0.49, "learning_rate": 0.00010443449371659686, "loss": 3.2817, "step": 3409 }, { "epoch": 0.49, "learning_rate": 0.00010440560450671675, "loss": 3.2361, "step": 3410 }, { "epoch": 0.49, "learning_rate": 0.00010437671529683663, "loss": 3.2874, "step": 3411 }, { "epoch": 0.49, "learning_rate": 0.00010434782608695653, "loss": 3.3086, "step": 3412 }, { "epoch": 0.49, "learning_rate": 0.0001043189368770764, "loss": 3.3358, "step": 3413 }, { "epoch": 0.49, "learning_rate": 0.0001042900476671963, "loss": 3.2998, "step": 3414 }, { "epoch": 0.49, "learning_rate": 0.0001042611584573162, "loss": 3.3185, "step": 3415 }, { "epoch": 0.49, "learning_rate": 0.0001042322692474361, "loss": 3.2077, "step": 3416 }, { "epoch": 0.49, "learning_rate": 0.00010420338003755598, "loss": 3.3911, "step": 3417 }, { "epoch": 0.49, "learning_rate": 0.00010417449082767588, "loss": 3.3192, "step": 3418 }, { "epoch": 0.49, "learning_rate": 0.00010414560161779577, "loss": 3.3712, "step": 3419 }, { "epoch": 0.49, "learning_rate": 0.00010411671240791565, "loss": 3.2438, "step": 3420 }, { "epoch": 0.49, "learning_rate": 0.00010408782319803554, "loss": 3.3327, "step": 3421 }, { "epoch": 0.49, "learning_rate": 0.00010405893398815542, "loss": 3.2294, "step": 3422 }, { "epoch": 0.49, "learning_rate": 0.00010403004477827532, "loss": 3.4021, "step": 3423 }, { "epoch": 0.49, "learning_rate": 0.0001040011555683952, "loss": 3.3835, "step": 3424 }, { "epoch": 0.49, "learning_rate": 0.00010397226635851509, "loss": 3.4587, "step": 3425 }, { "epoch": 0.49, "learning_rate": 0.00010394337714863499, "loss": 3.4684, "step": 3426 }, { "epoch": 0.49, "learning_rate": 0.00010391448793875489, "loss": 3.3067, "step": 3427 }, { "epoch": 0.49, "learning_rate": 0.00010388559872887477, "loss": 3.1844, "step": 3428 }, { "epoch": 0.49, "learning_rate": 0.00010385670951899467, "loss": 3.3517, "step": 3429 }, { "epoch": 0.49, "learning_rate": 0.00010382782030911456, "loss": 3.3095, "step": 3430 }, { "epoch": 0.49, "learning_rate": 0.00010379893109923444, "loss": 3.3435, "step": 3431 }, { "epoch": 0.49, "learning_rate": 0.00010377004188935433, "loss": 3.2317, "step": 3432 }, { "epoch": 0.49, "learning_rate": 0.00010374115267947422, "loss": 3.2868, "step": 3433 }, { "epoch": 0.49, "learning_rate": 0.00010371226346959411, "loss": 3.3199, "step": 3434 }, { "epoch": 0.49, "learning_rate": 0.00010368337425971399, "loss": 3.167, "step": 3435 }, { "epoch": 0.49, "learning_rate": 0.00010365448504983388, "loss": 3.3248, "step": 3436 }, { "epoch": 0.49, "learning_rate": 0.00010362559583995379, "loss": 3.262, "step": 3437 }, { "epoch": 0.49, "learning_rate": 0.00010359670663007368, "loss": 3.3651, "step": 3438 }, { "epoch": 0.49, "learning_rate": 0.00010356781742019356, "loss": 3.3711, "step": 3439 }, { "epoch": 0.49, "learning_rate": 0.00010353892821031346, "loss": 3.2943, "step": 3440 }, { "epoch": 0.49, "learning_rate": 0.00010351003900043334, "loss": 3.2476, "step": 3441 }, { "epoch": 0.49, "learning_rate": 0.00010348114979055323, "loss": 3.1206, "step": 3442 }, { "epoch": 0.49, "learning_rate": 0.00010345226058067313, "loss": 3.3442, "step": 3443 }, { "epoch": 0.49, "learning_rate": 0.000103423371370793, "loss": 3.378, "step": 3444 }, { "epoch": 0.49, "learning_rate": 0.0001033944821609129, "loss": 3.4165, "step": 3445 }, { "epoch": 0.49, "learning_rate": 0.00010336559295103278, "loss": 3.3018, "step": 3446 }, { "epoch": 0.49, "learning_rate": 0.00010333670374115267, "loss": 3.4229, "step": 3447 }, { "epoch": 0.49, "learning_rate": 0.00010330781453127258, "loss": 3.3003, "step": 3448 }, { "epoch": 0.49, "learning_rate": 0.00010327892532139248, "loss": 3.2404, "step": 3449 }, { "epoch": 0.49, "learning_rate": 0.00010325003611151236, "loss": 3.361, "step": 3450 }, { "epoch": 0.49, "learning_rate": 0.00010322114690163225, "loss": 3.2063, "step": 3451 }, { "epoch": 0.49, "learning_rate": 0.00010319225769175213, "loss": 3.3266, "step": 3452 }, { "epoch": 0.49, "learning_rate": 0.00010316336848187202, "loss": 3.3469, "step": 3453 }, { "epoch": 0.49, "learning_rate": 0.00010313447927199192, "loss": 3.2305, "step": 3454 }, { "epoch": 0.49, "learning_rate": 0.0001031055900621118, "loss": 3.2308, "step": 3455 }, { "epoch": 0.49, "learning_rate": 0.00010307670085223169, "loss": 3.3362, "step": 3456 }, { "epoch": 0.49, "learning_rate": 0.00010304781164235157, "loss": 3.3026, "step": 3457 }, { "epoch": 0.49, "learning_rate": 0.00010301892243247149, "loss": 3.2693, "step": 3458 }, { "epoch": 0.49, "learning_rate": 0.00010299003322259137, "loss": 3.36, "step": 3459 }, { "epoch": 0.49, "learning_rate": 0.00010296114401271127, "loss": 3.1219, "step": 3460 }, { "epoch": 0.49, "learning_rate": 0.00010293225480283115, "loss": 3.2911, "step": 3461 }, { "epoch": 0.49, "learning_rate": 0.00010290336559295104, "loss": 3.4447, "step": 3462 }, { "epoch": 0.49, "learning_rate": 0.00010287447638307092, "loss": 3.2468, "step": 3463 }, { "epoch": 0.49, "learning_rate": 0.00010284558717319082, "loss": 3.3301, "step": 3464 }, { "epoch": 0.49, "learning_rate": 0.00010281669796331071, "loss": 3.2482, "step": 3465 }, { "epoch": 0.49, "learning_rate": 0.00010278780875343059, "loss": 3.4332, "step": 3466 }, { "epoch": 0.49, "learning_rate": 0.00010275891954355048, "loss": 3.3375, "step": 3467 }, { "epoch": 0.49, "learning_rate": 0.00010273003033367036, "loss": 3.2605, "step": 3468 }, { "epoch": 0.49, "learning_rate": 0.00010270114112379028, "loss": 3.3278, "step": 3469 }, { "epoch": 0.49, "learning_rate": 0.00010267225191391016, "loss": 3.1607, "step": 3470 }, { "epoch": 0.49, "learning_rate": 0.00010264336270403006, "loss": 3.1191, "step": 3471 }, { "epoch": 0.49, "learning_rate": 0.00010261447349414994, "loss": 3.3582, "step": 3472 }, { "epoch": 0.49, "learning_rate": 0.00010258558428426983, "loss": 3.3878, "step": 3473 }, { "epoch": 0.49, "learning_rate": 0.00010255669507438971, "loss": 3.3283, "step": 3474 }, { "epoch": 0.49, "learning_rate": 0.0001025278058645096, "loss": 3.3213, "step": 3475 }, { "epoch": 0.49, "learning_rate": 0.0001024989166546295, "loss": 3.2, "step": 3476 }, { "epoch": 0.5, "learning_rate": 0.00010247002744474938, "loss": 3.3431, "step": 3477 }, { "epoch": 0.5, "learning_rate": 0.00010244113823486927, "loss": 3.3376, "step": 3478 }, { "epoch": 0.5, "learning_rate": 0.00010241224902498918, "loss": 3.3633, "step": 3479 }, { "epoch": 0.5, "learning_rate": 0.00010238335981510908, "loss": 3.3458, "step": 3480 }, { "epoch": 0.5, "learning_rate": 0.00010235447060522896, "loss": 3.3621, "step": 3481 }, { "epoch": 0.5, "learning_rate": 0.00010232558139534885, "loss": 3.3093, "step": 3482 }, { "epoch": 0.5, "learning_rate": 0.00010229669218546873, "loss": 3.2172, "step": 3483 }, { "epoch": 0.5, "learning_rate": 0.00010226780297558862, "loss": 3.3148, "step": 3484 }, { "epoch": 0.5, "learning_rate": 0.0001022389137657085, "loss": 3.2161, "step": 3485 }, { "epoch": 0.5, "learning_rate": 0.0001022100245558284, "loss": 3.3887, "step": 3486 }, { "epoch": 0.5, "learning_rate": 0.00010218113534594829, "loss": 3.2185, "step": 3487 }, { "epoch": 0.5, "learning_rate": 0.00010215224613606817, "loss": 3.2962, "step": 3488 }, { "epoch": 0.5, "learning_rate": 0.00010212335692618807, "loss": 3.4699, "step": 3489 }, { "epoch": 0.5, "learning_rate": 0.00010209446771630797, "loss": 3.3173, "step": 3490 }, { "epoch": 0.5, "learning_rate": 0.00010206557850642787, "loss": 3.3064, "step": 3491 }, { "epoch": 0.5, "learning_rate": 0.00010203668929654775, "loss": 3.2422, "step": 3492 }, { "epoch": 0.5, "learning_rate": 0.00010200780008666764, "loss": 3.454, "step": 3493 }, { "epoch": 0.5, "learning_rate": 0.00010197891087678752, "loss": 3.3527, "step": 3494 }, { "epoch": 0.5, "learning_rate": 0.00010195002166690742, "loss": 3.2701, "step": 3495 }, { "epoch": 0.5, "learning_rate": 0.0001019211324570273, "loss": 3.3632, "step": 3496 }, { "epoch": 0.5, "learning_rate": 0.00010189224324714719, "loss": 3.2785, "step": 3497 }, { "epoch": 0.5, "learning_rate": 0.00010186335403726708, "loss": 3.2947, "step": 3498 }, { "epoch": 0.5, "learning_rate": 0.00010183446482738696, "loss": 3.31, "step": 3499 }, { "epoch": 0.5, "learning_rate": 0.00010180557561750687, "loss": 3.2879, "step": 3500 }, { "epoch": 0.5, "learning_rate": 0.00010177668640762676, "loss": 3.3353, "step": 3501 }, { "epoch": 0.5, "learning_rate": 0.00010174779719774666, "loss": 3.2496, "step": 3502 }, { "epoch": 0.5, "learning_rate": 0.00010171890798786654, "loss": 3.3229, "step": 3503 }, { "epoch": 0.5, "learning_rate": 0.00010169001877798643, "loss": 3.2976, "step": 3504 }, { "epoch": 0.5, "learning_rate": 0.00010166112956810631, "loss": 3.4325, "step": 3505 }, { "epoch": 0.5, "learning_rate": 0.00010163224035822621, "loss": 3.1573, "step": 3506 }, { "epoch": 0.5, "learning_rate": 0.00010160335114834609, "loss": 3.1567, "step": 3507 }, { "epoch": 0.5, "learning_rate": 0.00010157446193846598, "loss": 3.2892, "step": 3508 }, { "epoch": 0.5, "learning_rate": 0.00010154557272858587, "loss": 3.3654, "step": 3509 }, { "epoch": 0.5, "learning_rate": 0.00010151668351870576, "loss": 3.2534, "step": 3510 }, { "epoch": 0.5, "learning_rate": 0.00010148779430882566, "loss": 3.1937, "step": 3511 }, { "epoch": 0.5, "learning_rate": 0.00010145890509894556, "loss": 3.2221, "step": 3512 }, { "epoch": 0.5, "learning_rate": 0.00010143001588906545, "loss": 3.145, "step": 3513 }, { "epoch": 0.5, "learning_rate": 0.00010140112667918533, "loss": 3.1539, "step": 3514 }, { "epoch": 0.5, "learning_rate": 0.00010137223746930522, "loss": 3.34, "step": 3515 }, { "epoch": 0.5, "learning_rate": 0.0001013433482594251, "loss": 3.4149, "step": 3516 }, { "epoch": 0.5, "learning_rate": 0.000101314459049545, "loss": 3.3189, "step": 3517 }, { "epoch": 0.5, "learning_rate": 0.00010128556983966488, "loss": 3.2401, "step": 3518 }, { "epoch": 0.5, "learning_rate": 0.00010125668062978477, "loss": 3.2709, "step": 3519 }, { "epoch": 0.5, "learning_rate": 0.00010122779141990467, "loss": 3.4195, "step": 3520 }, { "epoch": 0.5, "eval_loss": 3.5005860328674316, "eval_runtime": 472.0927, "eval_samples_per_second": 43.396, "eval_steps_per_second": 14.465, "step": 3520 }, { "epoch": 0.5, "learning_rate": 0.00010119890221002457, "loss": 3.2958, "step": 3521 }, { "epoch": 0.5, "learning_rate": 0.00010117001300014445, "loss": 3.4184, "step": 3522 }, { "epoch": 0.5, "learning_rate": 0.00010114112379026435, "loss": 3.2575, "step": 3523 }, { "epoch": 0.5, "learning_rate": 0.00010111223458038424, "loss": 3.3084, "step": 3524 }, { "epoch": 0.5, "learning_rate": 0.00010108334537050412, "loss": 3.1578, "step": 3525 }, { "epoch": 0.5, "learning_rate": 0.00010105445616062402, "loss": 3.3057, "step": 3526 }, { "epoch": 0.5, "learning_rate": 0.0001010255669507439, "loss": 3.3618, "step": 3527 }, { "epoch": 0.5, "learning_rate": 0.00010099667774086379, "loss": 3.4165, "step": 3528 }, { "epoch": 0.5, "learning_rate": 0.00010096778853098367, "loss": 3.0307, "step": 3529 }, { "epoch": 0.5, "learning_rate": 0.00010093889932110356, "loss": 3.4035, "step": 3530 }, { "epoch": 0.5, "learning_rate": 0.00010091001011122346, "loss": 3.2687, "step": 3531 }, { "epoch": 0.5, "learning_rate": 0.00010088112090134337, "loss": 3.2818, "step": 3532 }, { "epoch": 0.5, "learning_rate": 0.00010085223169146325, "loss": 3.4536, "step": 3533 }, { "epoch": 0.5, "learning_rate": 0.00010082334248158314, "loss": 3.2161, "step": 3534 }, { "epoch": 0.5, "learning_rate": 0.00010079445327170303, "loss": 3.1778, "step": 3535 }, { "epoch": 0.5, "learning_rate": 0.00010076556406182291, "loss": 3.2902, "step": 3536 }, { "epoch": 0.5, "learning_rate": 0.00010073667485194281, "loss": 3.3542, "step": 3537 }, { "epoch": 0.5, "learning_rate": 0.00010070778564206269, "loss": 3.3223, "step": 3538 }, { "epoch": 0.5, "learning_rate": 0.00010067889643218258, "loss": 3.2997, "step": 3539 }, { "epoch": 0.5, "learning_rate": 0.00010065000722230246, "loss": 3.297, "step": 3540 }, { "epoch": 0.5, "learning_rate": 0.00010062111801242236, "loss": 3.2426, "step": 3541 }, { "epoch": 0.5, "learning_rate": 0.00010059222880254226, "loss": 3.3119, "step": 3542 }, { "epoch": 0.5, "learning_rate": 0.00010056333959266216, "loss": 3.3011, "step": 3543 }, { "epoch": 0.5, "learning_rate": 0.00010053445038278204, "loss": 3.4242, "step": 3544 }, { "epoch": 0.5, "learning_rate": 0.00010050556117290193, "loss": 3.3547, "step": 3545 }, { "epoch": 0.5, "learning_rate": 0.00010047667196302182, "loss": 3.1823, "step": 3546 }, { "epoch": 0.51, "learning_rate": 0.0001004477827531417, "loss": 3.3671, "step": 3547 }, { "epoch": 0.51, "learning_rate": 0.0001004188935432616, "loss": 3.2729, "step": 3548 }, { "epoch": 0.51, "learning_rate": 0.00010039000433338148, "loss": 3.2731, "step": 3549 }, { "epoch": 0.51, "learning_rate": 0.00010036111512350137, "loss": 3.3776, "step": 3550 }, { "epoch": 0.51, "learning_rate": 0.00010033222591362125, "loss": 3.3522, "step": 3551 }, { "epoch": 0.51, "learning_rate": 0.00010030333670374115, "loss": 3.2845, "step": 3552 }, { "epoch": 0.51, "learning_rate": 0.00010027444749386105, "loss": 3.2711, "step": 3553 }, { "epoch": 0.51, "learning_rate": 0.00010024555828398095, "loss": 3.3528, "step": 3554 }, { "epoch": 0.51, "learning_rate": 0.00010021666907410083, "loss": 3.3548, "step": 3555 }, { "epoch": 0.51, "learning_rate": 0.00010018777986422072, "loss": 3.1627, "step": 3556 }, { "epoch": 0.51, "learning_rate": 0.00010015889065434062, "loss": 3.2985, "step": 3557 }, { "epoch": 0.51, "learning_rate": 0.0001001300014444605, "loss": 3.4028, "step": 3558 }, { "epoch": 0.51, "learning_rate": 0.00010010111223458039, "loss": 3.318, "step": 3559 }, { "epoch": 0.51, "learning_rate": 0.00010007222302470027, "loss": 3.2277, "step": 3560 }, { "epoch": 0.51, "learning_rate": 0.00010004333381482016, "loss": 3.3456, "step": 3561 }, { "epoch": 0.51, "learning_rate": 0.00010001444460494004, "loss": 3.2859, "step": 3562 }, { "epoch": 0.51, "learning_rate": 9.998555539505995e-05, "loss": 3.3805, "step": 3563 }, { "epoch": 0.51, "learning_rate": 9.995666618517983e-05, "loss": 3.2169, "step": 3564 }, { "epoch": 0.51, "learning_rate": 9.992777697529973e-05, "loss": 3.2084, "step": 3565 }, { "epoch": 0.51, "learning_rate": 9.989888776541962e-05, "loss": 3.4749, "step": 3566 }, { "epoch": 0.51, "learning_rate": 9.986999855553951e-05, "loss": 3.2897, "step": 3567 }, { "epoch": 0.51, "learning_rate": 9.98411093456594e-05, "loss": 3.3012, "step": 3568 }, { "epoch": 0.51, "learning_rate": 9.981222013577929e-05, "loss": 3.3644, "step": 3569 }, { "epoch": 0.51, "learning_rate": 9.978333092589918e-05, "loss": 3.2685, "step": 3570 }, { "epoch": 0.51, "learning_rate": 9.975444171601908e-05, "loss": 3.4293, "step": 3571 }, { "epoch": 0.51, "learning_rate": 9.972555250613897e-05, "loss": 3.4324, "step": 3572 }, { "epoch": 0.51, "learning_rate": 9.969666329625885e-05, "loss": 3.3411, "step": 3573 }, { "epoch": 0.51, "learning_rate": 9.966777408637874e-05, "loss": 3.3643, "step": 3574 }, { "epoch": 0.51, "learning_rate": 9.963888487649862e-05, "loss": 3.2441, "step": 3575 }, { "epoch": 0.51, "learning_rate": 9.960999566661853e-05, "loss": 3.3386, "step": 3576 }, { "epoch": 0.51, "learning_rate": 9.958110645673841e-05, "loss": 3.3044, "step": 3577 }, { "epoch": 0.51, "learning_rate": 9.95522172468583e-05, "loss": 3.2719, "step": 3578 }, { "epoch": 0.51, "learning_rate": 9.952332803697819e-05, "loss": 3.4724, "step": 3579 }, { "epoch": 0.51, "learning_rate": 9.949443882709808e-05, "loss": 3.4531, "step": 3580 }, { "epoch": 0.51, "learning_rate": 9.946554961721797e-05, "loss": 3.2413, "step": 3581 }, { "epoch": 0.51, "learning_rate": 9.943666040733787e-05, "loss": 3.3331, "step": 3582 }, { "epoch": 0.51, "learning_rate": 9.940777119745776e-05, "loss": 3.2825, "step": 3583 }, { "epoch": 0.51, "learning_rate": 9.937888198757764e-05, "loss": 3.1648, "step": 3584 }, { "epoch": 0.51, "learning_rate": 9.934999277769753e-05, "loss": 3.3269, "step": 3585 }, { "epoch": 0.51, "learning_rate": 9.932110356781741e-05, "loss": 3.2855, "step": 3586 }, { "epoch": 0.51, "learning_rate": 9.929221435793732e-05, "loss": 3.2763, "step": 3587 }, { "epoch": 0.51, "learning_rate": 9.92633251480572e-05, "loss": 3.3442, "step": 3588 }, { "epoch": 0.51, "learning_rate": 9.92344359381771e-05, "loss": 3.3143, "step": 3589 }, { "epoch": 0.51, "learning_rate": 9.920554672829698e-05, "loss": 3.1068, "step": 3590 }, { "epoch": 0.51, "learning_rate": 9.917665751841687e-05, "loss": 3.3014, "step": 3591 }, { "epoch": 0.51, "learning_rate": 9.914776830853676e-05, "loss": 3.2842, "step": 3592 }, { "epoch": 0.51, "learning_rate": 9.911887909865666e-05, "loss": 3.3375, "step": 3593 }, { "epoch": 0.51, "learning_rate": 9.908998988877655e-05, "loss": 3.2577, "step": 3594 }, { "epoch": 0.51, "learning_rate": 9.906110067889643e-05, "loss": 3.3795, "step": 3595 }, { "epoch": 0.51, "learning_rate": 9.903221146901633e-05, "loss": 3.3381, "step": 3596 }, { "epoch": 0.51, "learning_rate": 9.900332225913622e-05, "loss": 3.305, "step": 3597 }, { "epoch": 0.51, "learning_rate": 9.897443304925611e-05, "loss": 3.3266, "step": 3598 }, { "epoch": 0.51, "learning_rate": 9.8945543839376e-05, "loss": 3.3164, "step": 3599 }, { "epoch": 0.51, "learning_rate": 9.891665462949589e-05, "loss": 3.4941, "step": 3600 }, { "epoch": 0.51, "learning_rate": 9.888776541961577e-05, "loss": 3.3784, "step": 3601 }, { "epoch": 0.51, "learning_rate": 9.885887620973568e-05, "loss": 3.3169, "step": 3602 }, { "epoch": 0.51, "learning_rate": 9.882998699985556e-05, "loss": 3.3696, "step": 3603 }, { "epoch": 0.51, "learning_rate": 9.880109778997545e-05, "loss": 3.2779, "step": 3604 }, { "epoch": 0.51, "learning_rate": 9.877220858009534e-05, "loss": 3.4196, "step": 3605 }, { "epoch": 0.51, "learning_rate": 9.874331937021522e-05, "loss": 3.364, "step": 3606 }, { "epoch": 0.51, "learning_rate": 9.871443016033512e-05, "loss": 3.365, "step": 3607 }, { "epoch": 0.51, "learning_rate": 9.868554095045501e-05, "loss": 3.2851, "step": 3608 }, { "epoch": 0.51, "learning_rate": 9.86566517405749e-05, "loss": 3.4338, "step": 3609 }, { "epoch": 0.51, "learning_rate": 9.862776253069479e-05, "loss": 3.3938, "step": 3610 }, { "epoch": 0.51, "learning_rate": 9.859887332081468e-05, "loss": 3.1677, "step": 3611 }, { "epoch": 0.51, "learning_rate": 9.856998411093456e-05, "loss": 3.2969, "step": 3612 }, { "epoch": 0.51, "learning_rate": 9.854109490105447e-05, "loss": 3.1708, "step": 3613 }, { "epoch": 0.51, "learning_rate": 9.851220569117435e-05, "loss": 3.3359, "step": 3614 }, { "epoch": 0.51, "learning_rate": 9.848331648129424e-05, "loss": 3.3044, "step": 3615 }, { "epoch": 0.51, "learning_rate": 9.845442727141413e-05, "loss": 3.3036, "step": 3616 }, { "epoch": 0.51, "learning_rate": 9.842553806153401e-05, "loss": 3.1884, "step": 3617 }, { "epoch": 0.52, "learning_rate": 9.839664885165392e-05, "loss": 3.3224, "step": 3618 }, { "epoch": 0.52, "learning_rate": 9.83677596417738e-05, "loss": 3.3364, "step": 3619 }, { "epoch": 0.52, "learning_rate": 9.83388704318937e-05, "loss": 3.1456, "step": 3620 }, { "epoch": 0.52, "learning_rate": 9.830998122201358e-05, "loss": 3.3708, "step": 3621 }, { "epoch": 0.52, "learning_rate": 9.828109201213347e-05, "loss": 3.351, "step": 3622 }, { "epoch": 0.52, "learning_rate": 9.825220280225336e-05, "loss": 3.4185, "step": 3623 }, { "epoch": 0.52, "learning_rate": 9.822331359237326e-05, "loss": 3.2952, "step": 3624 }, { "epoch": 0.52, "learning_rate": 9.819442438249314e-05, "loss": 3.3222, "step": 3625 }, { "epoch": 0.52, "learning_rate": 9.816553517261303e-05, "loss": 3.2495, "step": 3626 }, { "epoch": 0.52, "learning_rate": 9.813664596273293e-05, "loss": 3.3842, "step": 3627 }, { "epoch": 0.52, "learning_rate": 9.810775675285282e-05, "loss": 3.3091, "step": 3628 }, { "epoch": 0.52, "learning_rate": 9.807886754297271e-05, "loss": 3.1676, "step": 3629 }, { "epoch": 0.52, "learning_rate": 9.80499783330926e-05, "loss": 3.2684, "step": 3630 }, { "epoch": 0.52, "learning_rate": 9.802108912321249e-05, "loss": 3.2345, "step": 3631 }, { "epoch": 0.52, "learning_rate": 9.799219991333237e-05, "loss": 3.2182, "step": 3632 }, { "epoch": 0.52, "learning_rate": 9.796331070345226e-05, "loss": 3.3009, "step": 3633 }, { "epoch": 0.52, "learning_rate": 9.793442149357216e-05, "loss": 3.1447, "step": 3634 }, { "epoch": 0.52, "learning_rate": 9.790553228369205e-05, "loss": 3.1446, "step": 3635 }, { "epoch": 0.52, "learning_rate": 9.787664307381193e-05, "loss": 3.2587, "step": 3636 }, { "epoch": 0.52, "learning_rate": 9.784775386393182e-05, "loss": 3.3582, "step": 3637 }, { "epoch": 0.52, "learning_rate": 9.781886465405172e-05, "loss": 3.3572, "step": 3638 }, { "epoch": 0.52, "learning_rate": 9.778997544417161e-05, "loss": 3.3, "step": 3639 }, { "epoch": 0.52, "learning_rate": 9.77610862342915e-05, "loss": 3.276, "step": 3640 }, { "epoch": 0.52, "learning_rate": 9.773219702441139e-05, "loss": 3.1791, "step": 3641 }, { "epoch": 0.52, "learning_rate": 9.770330781453128e-05, "loss": 3.3582, "step": 3642 }, { "epoch": 0.52, "learning_rate": 9.767441860465116e-05, "loss": 3.4701, "step": 3643 }, { "epoch": 0.52, "learning_rate": 9.764552939477107e-05, "loss": 3.4079, "step": 3644 }, { "epoch": 0.52, "learning_rate": 9.761664018489095e-05, "loss": 3.1479, "step": 3645 }, { "epoch": 0.52, "learning_rate": 9.758775097501084e-05, "loss": 3.3389, "step": 3646 }, { "epoch": 0.52, "learning_rate": 9.755886176513072e-05, "loss": 3.2733, "step": 3647 }, { "epoch": 0.52, "learning_rate": 9.752997255525062e-05, "loss": 3.3105, "step": 3648 }, { "epoch": 0.52, "learning_rate": 9.750108334537051e-05, "loss": 3.3759, "step": 3649 }, { "epoch": 0.52, "learning_rate": 9.74721941354904e-05, "loss": 3.1932, "step": 3650 }, { "epoch": 0.52, "learning_rate": 9.74433049256103e-05, "loss": 3.2846, "step": 3651 }, { "epoch": 0.52, "learning_rate": 9.741441571573018e-05, "loss": 3.3393, "step": 3652 }, { "epoch": 0.52, "learning_rate": 9.738552650585007e-05, "loss": 3.062, "step": 3653 }, { "epoch": 0.52, "learning_rate": 9.735663729596995e-05, "loss": 3.3248, "step": 3654 }, { "epoch": 0.52, "learning_rate": 9.732774808608986e-05, "loss": 3.3914, "step": 3655 }, { "epoch": 0.52, "learning_rate": 9.729885887620974e-05, "loss": 3.3856, "step": 3656 }, { "epoch": 0.52, "learning_rate": 9.726996966632963e-05, "loss": 3.2255, "step": 3657 }, { "epoch": 0.52, "learning_rate": 9.724108045644951e-05, "loss": 3.2713, "step": 3658 }, { "epoch": 0.52, "learning_rate": 9.72121912465694e-05, "loss": 3.2563, "step": 3659 }, { "epoch": 0.52, "learning_rate": 9.71833020366893e-05, "loss": 3.351, "step": 3660 }, { "epoch": 0.52, "learning_rate": 9.71544128268092e-05, "loss": 3.2914, "step": 3661 }, { "epoch": 0.52, "learning_rate": 9.712552361692909e-05, "loss": 3.2493, "step": 3662 }, { "epoch": 0.52, "learning_rate": 9.709663440704897e-05, "loss": 3.2843, "step": 3663 }, { "epoch": 0.52, "learning_rate": 9.706774519716886e-05, "loss": 3.3055, "step": 3664 }, { "epoch": 0.52, "learning_rate": 9.703885598728876e-05, "loss": 3.3493, "step": 3665 }, { "epoch": 0.52, "learning_rate": 9.700996677740865e-05, "loss": 3.4551, "step": 3666 }, { "epoch": 0.52, "learning_rate": 9.698107756752853e-05, "loss": 3.2609, "step": 3667 }, { "epoch": 0.52, "learning_rate": 9.695218835764842e-05, "loss": 3.2567, "step": 3668 }, { "epoch": 0.52, "learning_rate": 9.69232991477683e-05, "loss": 3.3386, "step": 3669 }, { "epoch": 0.52, "learning_rate": 9.689440993788821e-05, "loss": 3.343, "step": 3670 }, { "epoch": 0.52, "learning_rate": 9.686552072800809e-05, "loss": 3.2134, "step": 3671 }, { "epoch": 0.52, "learning_rate": 9.683663151812799e-05, "loss": 3.4653, "step": 3672 }, { "epoch": 0.52, "learning_rate": 9.680774230824788e-05, "loss": 3.305, "step": 3673 }, { "epoch": 0.52, "learning_rate": 9.677885309836776e-05, "loss": 3.2433, "step": 3674 }, { "epoch": 0.52, "learning_rate": 9.674996388848765e-05, "loss": 3.3959, "step": 3675 }, { "epoch": 0.52, "learning_rate": 9.672107467860755e-05, "loss": 3.3096, "step": 3676 }, { "epoch": 0.52, "learning_rate": 9.669218546872744e-05, "loss": 3.2794, "step": 3677 }, { "epoch": 0.52, "learning_rate": 9.666329625884732e-05, "loss": 3.2811, "step": 3678 }, { "epoch": 0.52, "learning_rate": 9.663440704896722e-05, "loss": 3.2929, "step": 3679 }, { "epoch": 0.52, "learning_rate": 9.66055178390871e-05, "loss": 3.1585, "step": 3680 }, { "epoch": 0.52, "learning_rate": 9.6576628629207e-05, "loss": 3.2812, "step": 3681 }, { "epoch": 0.52, "learning_rate": 9.654773941932688e-05, "loss": 3.192, "step": 3682 }, { "epoch": 0.52, "learning_rate": 9.651885020944678e-05, "loss": 3.3528, "step": 3683 }, { "epoch": 0.52, "learning_rate": 9.648996099956667e-05, "loss": 3.041, "step": 3684 }, { "epoch": 0.52, "learning_rate": 9.646107178968655e-05, "loss": 3.3006, "step": 3685 }, { "epoch": 0.52, "learning_rate": 9.643218257980645e-05, "loss": 3.4209, "step": 3686 }, { "epoch": 0.52, "learning_rate": 9.640329336992634e-05, "loss": 3.3817, "step": 3687 }, { "epoch": 0.53, "learning_rate": 9.637440416004623e-05, "loss": 3.3264, "step": 3688 }, { "epoch": 0.53, "learning_rate": 9.634551495016611e-05, "loss": 3.2459, "step": 3689 }, { "epoch": 0.53, "learning_rate": 9.6316625740286e-05, "loss": 3.3551, "step": 3690 }, { "epoch": 0.53, "learning_rate": 9.62877365304059e-05, "loss": 3.2263, "step": 3691 }, { "epoch": 0.53, "learning_rate": 9.62588473205258e-05, "loss": 3.1857, "step": 3692 }, { "epoch": 0.53, "learning_rate": 9.622995811064567e-05, "loss": 3.3901, "step": 3693 }, { "epoch": 0.53, "learning_rate": 9.620106890076557e-05, "loss": 3.348, "step": 3694 }, { "epoch": 0.53, "learning_rate": 9.617217969088545e-05, "loss": 3.2752, "step": 3695 }, { "epoch": 0.53, "learning_rate": 9.614329048100534e-05, "loss": 3.3257, "step": 3696 }, { "epoch": 0.53, "learning_rate": 9.611440127112524e-05, "loss": 3.2138, "step": 3697 }, { "epoch": 0.53, "learning_rate": 9.608551206124513e-05, "loss": 3.2224, "step": 3698 }, { "epoch": 0.53, "learning_rate": 9.605662285136502e-05, "loss": 3.3144, "step": 3699 }, { "epoch": 0.53, "learning_rate": 9.60277336414849e-05, "loss": 2.9823, "step": 3700 }, { "epoch": 0.53, "learning_rate": 9.59988444316048e-05, "loss": 3.3617, "step": 3701 }, { "epoch": 0.53, "learning_rate": 9.596995522172469e-05, "loss": 3.1095, "step": 3702 }, { "epoch": 0.53, "learning_rate": 9.594106601184459e-05, "loss": 3.3501, "step": 3703 }, { "epoch": 0.53, "learning_rate": 9.591217680196447e-05, "loss": 3.3588, "step": 3704 }, { "epoch": 0.53, "learning_rate": 9.588328759208436e-05, "loss": 3.2918, "step": 3705 }, { "epoch": 0.53, "learning_rate": 9.585439838220424e-05, "loss": 3.2578, "step": 3706 }, { "epoch": 0.53, "learning_rate": 9.582550917232415e-05, "loss": 3.2849, "step": 3707 }, { "epoch": 0.53, "learning_rate": 9.579661996244403e-05, "loss": 3.3586, "step": 3708 }, { "epoch": 0.53, "learning_rate": 9.576773075256392e-05, "loss": 3.0917, "step": 3709 }, { "epoch": 0.53, "learning_rate": 9.573884154268382e-05, "loss": 3.2508, "step": 3710 }, { "epoch": 0.53, "learning_rate": 9.57099523328037e-05, "loss": 3.3001, "step": 3711 }, { "epoch": 0.53, "learning_rate": 9.56810631229236e-05, "loss": 3.2744, "step": 3712 }, { "epoch": 0.53, "learning_rate": 9.565217391304348e-05, "loss": 3.283, "step": 3713 }, { "epoch": 0.53, "learning_rate": 9.562328470316338e-05, "loss": 3.2927, "step": 3714 }, { "epoch": 0.53, "learning_rate": 9.559439549328326e-05, "loss": 3.3497, "step": 3715 }, { "epoch": 0.53, "learning_rate": 9.556550628340315e-05, "loss": 3.1693, "step": 3716 }, { "epoch": 0.53, "learning_rate": 9.553661707352305e-05, "loss": 3.2967, "step": 3717 }, { "epoch": 0.53, "learning_rate": 9.550772786364294e-05, "loss": 3.2895, "step": 3718 }, { "epoch": 0.53, "learning_rate": 9.547883865376282e-05, "loss": 3.2079, "step": 3719 }, { "epoch": 0.53, "learning_rate": 9.544994944388271e-05, "loss": 3.3238, "step": 3720 }, { "epoch": 0.53, "learning_rate": 9.542106023400261e-05, "loss": 3.2215, "step": 3721 }, { "epoch": 0.53, "learning_rate": 9.539217102412249e-05, "loss": 3.2758, "step": 3722 }, { "epoch": 0.53, "learning_rate": 9.53632818142424e-05, "loss": 3.4565, "step": 3723 }, { "epoch": 0.53, "learning_rate": 9.533439260436227e-05, "loss": 3.1476, "step": 3724 }, { "epoch": 0.53, "learning_rate": 9.530550339448217e-05, "loss": 3.1799, "step": 3725 }, { "epoch": 0.53, "learning_rate": 9.527661418460205e-05, "loss": 3.2619, "step": 3726 }, { "epoch": 0.53, "learning_rate": 9.524772497472194e-05, "loss": 3.3384, "step": 3727 }, { "epoch": 0.53, "learning_rate": 9.521883576484184e-05, "loss": 3.3392, "step": 3728 }, { "epoch": 0.53, "learning_rate": 9.518994655496173e-05, "loss": 3.2635, "step": 3729 }, { "epoch": 0.53, "learning_rate": 9.516105734508161e-05, "loss": 3.2973, "step": 3730 }, { "epoch": 0.53, "learning_rate": 9.51321681352015e-05, "loss": 3.1182, "step": 3731 }, { "epoch": 0.53, "learning_rate": 9.51032789253214e-05, "loss": 3.366, "step": 3732 }, { "epoch": 0.53, "learning_rate": 9.507438971544129e-05, "loss": 3.2735, "step": 3733 }, { "epoch": 0.53, "learning_rate": 9.504550050556119e-05, "loss": 3.1093, "step": 3734 }, { "epoch": 0.53, "learning_rate": 9.501661129568107e-05, "loss": 3.2478, "step": 3735 }, { "epoch": 0.53, "learning_rate": 9.498772208580096e-05, "loss": 3.2874, "step": 3736 }, { "epoch": 0.53, "learning_rate": 9.495883287592084e-05, "loss": 3.1797, "step": 3737 }, { "epoch": 0.53, "learning_rate": 9.492994366604075e-05, "loss": 3.3518, "step": 3738 }, { "epoch": 0.53, "learning_rate": 9.490105445616063e-05, "loss": 3.3398, "step": 3739 }, { "epoch": 0.53, "learning_rate": 9.487216524628052e-05, "loss": 3.093, "step": 3740 }, { "epoch": 0.53, "learning_rate": 9.48432760364004e-05, "loss": 3.443, "step": 3741 }, { "epoch": 0.53, "learning_rate": 9.48143868265203e-05, "loss": 3.2541, "step": 3742 }, { "epoch": 0.53, "learning_rate": 9.478549761664019e-05, "loss": 3.2678, "step": 3743 }, { "epoch": 0.53, "learning_rate": 9.475660840676008e-05, "loss": 3.3353, "step": 3744 }, { "epoch": 0.53, "learning_rate": 9.472771919687998e-05, "loss": 3.2779, "step": 3745 }, { "epoch": 0.53, "learning_rate": 9.469882998699986e-05, "loss": 3.386, "step": 3746 }, { "epoch": 0.53, "learning_rate": 9.466994077711975e-05, "loss": 3.1789, "step": 3747 }, { "epoch": 0.53, "learning_rate": 9.464105156723963e-05, "loss": 3.2896, "step": 3748 }, { "epoch": 0.53, "learning_rate": 9.461216235735954e-05, "loss": 3.3565, "step": 3749 }, { "epoch": 0.53, "learning_rate": 9.458327314747942e-05, "loss": 3.1587, "step": 3750 }, { "epoch": 0.53, "learning_rate": 9.455438393759931e-05, "loss": 3.3827, "step": 3751 }, { "epoch": 0.53, "learning_rate": 9.45254947277192e-05, "loss": 3.3155, "step": 3752 }, { "epoch": 0.53, "learning_rate": 9.449660551783909e-05, "loss": 3.2712, "step": 3753 }, { "epoch": 0.53, "learning_rate": 9.446771630795898e-05, "loss": 3.2463, "step": 3754 }, { "epoch": 0.53, "learning_rate": 9.443882709807888e-05, "loss": 3.1915, "step": 3755 }, { "epoch": 0.53, "learning_rate": 9.440993788819877e-05, "loss": 3.1901, "step": 3756 }, { "epoch": 0.53, "learning_rate": 9.438104867831865e-05, "loss": 3.2365, "step": 3757 }, { "epoch": 0.54, "learning_rate": 9.435215946843854e-05, "loss": 3.3428, "step": 3758 }, { "epoch": 0.54, "learning_rate": 9.432327025855844e-05, "loss": 3.2661, "step": 3759 }, { "epoch": 0.54, "learning_rate": 9.429438104867833e-05, "loss": 3.3797, "step": 3760 }, { "epoch": 0.54, "learning_rate": 9.426549183879821e-05, "loss": 3.4258, "step": 3761 }, { "epoch": 0.54, "learning_rate": 9.42366026289181e-05, "loss": 3.2706, "step": 3762 }, { "epoch": 0.54, "learning_rate": 9.420771341903798e-05, "loss": 3.2997, "step": 3763 }, { "epoch": 0.54, "learning_rate": 9.417882420915788e-05, "loss": 3.3774, "step": 3764 }, { "epoch": 0.54, "learning_rate": 9.414993499927777e-05, "loss": 3.2793, "step": 3765 }, { "epoch": 0.54, "learning_rate": 9.412104578939767e-05, "loss": 3.2433, "step": 3766 }, { "epoch": 0.54, "learning_rate": 9.409215657951756e-05, "loss": 3.328, "step": 3767 }, { "epoch": 0.54, "learning_rate": 9.406326736963744e-05, "loss": 3.2594, "step": 3768 }, { "epoch": 0.54, "learning_rate": 9.403437815975733e-05, "loss": 3.3453, "step": 3769 }, { "epoch": 0.54, "learning_rate": 9.400548894987723e-05, "loss": 3.2623, "step": 3770 }, { "epoch": 0.54, "learning_rate": 9.397659973999712e-05, "loss": 3.3762, "step": 3771 }, { "epoch": 0.54, "learning_rate": 9.3947710530117e-05, "loss": 3.2156, "step": 3772 }, { "epoch": 0.54, "learning_rate": 9.39188213202369e-05, "loss": 3.2412, "step": 3773 }, { "epoch": 0.54, "learning_rate": 9.388993211035678e-05, "loss": 3.3074, "step": 3774 }, { "epoch": 0.54, "learning_rate": 9.386104290047668e-05, "loss": 3.2955, "step": 3775 }, { "epoch": 0.54, "learning_rate": 9.383215369059656e-05, "loss": 3.3387, "step": 3776 }, { "epoch": 0.54, "learning_rate": 9.380326448071646e-05, "loss": 3.2532, "step": 3777 }, { "epoch": 0.54, "learning_rate": 9.377437527083635e-05, "loss": 3.4034, "step": 3778 }, { "epoch": 0.54, "learning_rate": 9.374548606095623e-05, "loss": 3.2695, "step": 3779 }, { "epoch": 0.54, "learning_rate": 9.371659685107614e-05, "loss": 3.3823, "step": 3780 }, { "epoch": 0.54, "learning_rate": 9.368770764119602e-05, "loss": 3.3463, "step": 3781 }, { "epoch": 0.54, "learning_rate": 9.365881843131591e-05, "loss": 3.2318, "step": 3782 }, { "epoch": 0.54, "learning_rate": 9.36299292214358e-05, "loss": 3.1585, "step": 3783 }, { "epoch": 0.54, "learning_rate": 9.360104001155569e-05, "loss": 3.2317, "step": 3784 }, { "epoch": 0.54, "learning_rate": 9.357215080167557e-05, "loss": 3.2546, "step": 3785 }, { "epoch": 0.54, "learning_rate": 9.354326159179548e-05, "loss": 3.3546, "step": 3786 }, { "epoch": 0.54, "learning_rate": 9.351437238191536e-05, "loss": 3.291, "step": 3787 }, { "epoch": 0.54, "learning_rate": 9.348548317203525e-05, "loss": 3.2058, "step": 3788 }, { "epoch": 0.54, "learning_rate": 9.345659396215514e-05, "loss": 3.3634, "step": 3789 }, { "epoch": 0.54, "learning_rate": 9.342770475227502e-05, "loss": 3.2855, "step": 3790 }, { "epoch": 0.54, "learning_rate": 9.339881554239493e-05, "loss": 3.3437, "step": 3791 }, { "epoch": 0.54, "learning_rate": 9.336992633251481e-05, "loss": 3.3228, "step": 3792 }, { "epoch": 0.54, "learning_rate": 9.33410371226347e-05, "loss": 3.4097, "step": 3793 }, { "epoch": 0.54, "learning_rate": 9.331214791275459e-05, "loss": 3.2167, "step": 3794 }, { "epoch": 0.54, "learning_rate": 9.328325870287448e-05, "loss": 3.2635, "step": 3795 }, { "epoch": 0.54, "learning_rate": 9.325436949299437e-05, "loss": 3.1602, "step": 3796 }, { "epoch": 0.54, "learning_rate": 9.322548028311427e-05, "loss": 3.2665, "step": 3797 }, { "epoch": 0.54, "learning_rate": 9.319659107323415e-05, "loss": 3.2843, "step": 3798 }, { "epoch": 0.54, "learning_rate": 9.316770186335404e-05, "loss": 3.2841, "step": 3799 }, { "epoch": 0.54, "learning_rate": 9.313881265347393e-05, "loss": 3.0637, "step": 3800 }, { "epoch": 0.54, "learning_rate": 9.310992344359383e-05, "loss": 3.301, "step": 3801 }, { "epoch": 0.54, "learning_rate": 9.308103423371371e-05, "loss": 3.3319, "step": 3802 }, { "epoch": 0.54, "learning_rate": 9.30521450238336e-05, "loss": 3.1172, "step": 3803 }, { "epoch": 0.54, "learning_rate": 9.30232558139535e-05, "loss": 3.3949, "step": 3804 }, { "epoch": 0.54, "learning_rate": 9.299436660407338e-05, "loss": 3.3156, "step": 3805 }, { "epoch": 0.54, "learning_rate": 9.296547739419327e-05, "loss": 3.2566, "step": 3806 }, { "epoch": 0.54, "learning_rate": 9.293658818431316e-05, "loss": 3.171, "step": 3807 }, { "epoch": 0.54, "learning_rate": 9.290769897443306e-05, "loss": 3.1897, "step": 3808 }, { "epoch": 0.54, "learning_rate": 9.287880976455294e-05, "loss": 3.3065, "step": 3809 }, { "epoch": 0.54, "learning_rate": 9.284992055467283e-05, "loss": 3.3067, "step": 3810 }, { "epoch": 0.54, "learning_rate": 9.282103134479273e-05, "loss": 3.2692, "step": 3811 }, { "epoch": 0.54, "learning_rate": 9.279214213491262e-05, "loss": 3.1924, "step": 3812 }, { "epoch": 0.54, "learning_rate": 9.27632529250325e-05, "loss": 3.241, "step": 3813 }, { "epoch": 0.54, "learning_rate": 9.27343637151524e-05, "loss": 3.3199, "step": 3814 }, { "epoch": 0.54, "learning_rate": 9.270547450527229e-05, "loss": 3.2505, "step": 3815 }, { "epoch": 0.54, "learning_rate": 9.267658529539217e-05, "loss": 3.3046, "step": 3816 }, { "epoch": 0.54, "learning_rate": 9.264769608551208e-05, "loss": 3.2151, "step": 3817 }, { "epoch": 0.54, "learning_rate": 9.261880687563196e-05, "loss": 3.2005, "step": 3818 }, { "epoch": 0.54, "learning_rate": 9.258991766575185e-05, "loss": 3.3655, "step": 3819 }, { "epoch": 0.54, "learning_rate": 9.256102845587173e-05, "loss": 3.1541, "step": 3820 }, { "epoch": 0.54, "learning_rate": 9.253213924599162e-05, "loss": 3.2828, "step": 3821 }, { "epoch": 0.54, "learning_rate": 9.250325003611152e-05, "loss": 3.3187, "step": 3822 }, { "epoch": 0.54, "learning_rate": 9.247436082623141e-05, "loss": 3.2536, "step": 3823 }, { "epoch": 0.54, "learning_rate": 9.244547161635129e-05, "loss": 3.3031, "step": 3824 }, { "epoch": 0.54, "learning_rate": 9.241658240647119e-05, "loss": 3.1891, "step": 3825 }, { "epoch": 0.54, "learning_rate": 9.238769319659108e-05, "loss": 3.0456, "step": 3826 }, { "epoch": 0.54, "learning_rate": 9.235880398671097e-05, "loss": 3.3972, "step": 3827 }, { "epoch": 0.55, "learning_rate": 9.232991477683087e-05, "loss": 3.3103, "step": 3828 }, { "epoch": 0.55, "learning_rate": 9.230102556695075e-05, "loss": 3.0148, "step": 3829 }, { "epoch": 0.55, "learning_rate": 9.227213635707064e-05, "loss": 3.2918, "step": 3830 }, { "epoch": 0.55, "learning_rate": 9.224324714719052e-05, "loss": 3.3179, "step": 3831 }, { "epoch": 0.55, "learning_rate": 9.221435793731041e-05, "loss": 3.3337, "step": 3832 }, { "epoch": 0.55, "learning_rate": 9.218546872743031e-05, "loss": 3.3814, "step": 3833 }, { "epoch": 0.55, "learning_rate": 9.21565795175502e-05, "loss": 3.3344, "step": 3834 }, { "epoch": 0.55, "learning_rate": 9.212769030767008e-05, "loss": 3.1587, "step": 3835 }, { "epoch": 0.55, "learning_rate": 9.209880109778998e-05, "loss": 3.2824, "step": 3836 }, { "epoch": 0.55, "learning_rate": 9.206991188790987e-05, "loss": 3.2514, "step": 3837 }, { "epoch": 0.55, "learning_rate": 9.204102267802976e-05, "loss": 3.245, "step": 3838 }, { "epoch": 0.55, "learning_rate": 9.201213346814966e-05, "loss": 3.1114, "step": 3839 }, { "epoch": 0.55, "learning_rate": 9.198324425826954e-05, "loss": 3.1667, "step": 3840 }, { "epoch": 0.55, "learning_rate": 9.195435504838943e-05, "loss": 3.2927, "step": 3841 }, { "epoch": 0.55, "learning_rate": 9.192546583850931e-05, "loss": 3.1644, "step": 3842 }, { "epoch": 0.55, "learning_rate": 9.189657662862922e-05, "loss": 3.3846, "step": 3843 }, { "epoch": 0.55, "learning_rate": 9.18676874187491e-05, "loss": 3.3639, "step": 3844 }, { "epoch": 0.55, "learning_rate": 9.1838798208869e-05, "loss": 3.2076, "step": 3845 }, { "epoch": 0.55, "learning_rate": 9.180990899898887e-05, "loss": 3.2439, "step": 3846 }, { "epoch": 0.55, "learning_rate": 9.178101978910877e-05, "loss": 3.2574, "step": 3847 }, { "epoch": 0.55, "learning_rate": 9.175213057922866e-05, "loss": 3.3944, "step": 3848 }, { "epoch": 0.55, "learning_rate": 9.172324136934856e-05, "loss": 3.3432, "step": 3849 }, { "epoch": 0.55, "learning_rate": 9.169435215946845e-05, "loss": 3.451, "step": 3850 }, { "epoch": 0.55, "learning_rate": 9.166546294958833e-05, "loss": 3.4377, "step": 3851 }, { "epoch": 0.55, "learning_rate": 9.163657373970822e-05, "loss": 3.2292, "step": 3852 }, { "epoch": 0.55, "learning_rate": 9.16076845298281e-05, "loss": 3.2505, "step": 3853 }, { "epoch": 0.55, "learning_rate": 9.157879531994801e-05, "loss": 3.1625, "step": 3854 }, { "epoch": 0.55, "learning_rate": 9.154990611006789e-05, "loss": 3.3417, "step": 3855 }, { "epoch": 0.55, "learning_rate": 9.152101690018779e-05, "loss": 3.3231, "step": 3856 }, { "epoch": 0.55, "learning_rate": 9.149212769030767e-05, "loss": 3.4253, "step": 3857 }, { "epoch": 0.55, "learning_rate": 9.146323848042756e-05, "loss": 3.2481, "step": 3858 }, { "epoch": 0.55, "learning_rate": 9.143434927054745e-05, "loss": 3.2928, "step": 3859 }, { "epoch": 0.55, "learning_rate": 9.140546006066735e-05, "loss": 3.3022, "step": 3860 }, { "epoch": 0.55, "learning_rate": 9.137657085078724e-05, "loss": 3.2664, "step": 3861 }, { "epoch": 0.55, "learning_rate": 9.134768164090712e-05, "loss": 3.3661, "step": 3862 }, { "epoch": 0.55, "learning_rate": 9.131879243102702e-05, "loss": 3.2142, "step": 3863 }, { "epoch": 0.55, "learning_rate": 9.128990322114691e-05, "loss": 3.0478, "step": 3864 }, { "epoch": 0.55, "learning_rate": 9.12610140112668e-05, "loss": 3.285, "step": 3865 }, { "epoch": 0.55, "learning_rate": 9.123212480138668e-05, "loss": 3.1, "step": 3866 }, { "epoch": 0.55, "learning_rate": 9.120323559150658e-05, "loss": 3.2835, "step": 3867 }, { "epoch": 0.55, "learning_rate": 9.117434638162646e-05, "loss": 3.3589, "step": 3868 }, { "epoch": 0.55, "learning_rate": 9.114545717174636e-05, "loss": 3.2728, "step": 3869 }, { "epoch": 0.55, "learning_rate": 9.111656796186624e-05, "loss": 3.2323, "step": 3870 }, { "epoch": 0.55, "learning_rate": 9.108767875198614e-05, "loss": 3.2699, "step": 3871 }, { "epoch": 0.55, "learning_rate": 9.105878954210603e-05, "loss": 3.1105, "step": 3872 }, { "epoch": 0.55, "eval_loss": 3.4803874492645264, "eval_runtime": 471.7317, "eval_samples_per_second": 43.429, "eval_steps_per_second": 14.476, "step": 3872 }, { "epoch": 0.55, "learning_rate": 9.102990033222591e-05, "loss": 3.3334, "step": 3873 }, { "epoch": 0.55, "learning_rate": 9.10010111223458e-05, "loss": 3.2822, "step": 3874 }, { "epoch": 0.55, "learning_rate": 9.09721219124657e-05, "loss": 3.1631, "step": 3875 }, { "epoch": 0.55, "learning_rate": 9.09432327025856e-05, "loss": 3.2771, "step": 3876 }, { "epoch": 0.55, "learning_rate": 9.091434349270547e-05, "loss": 3.2999, "step": 3877 }, { "epoch": 0.55, "learning_rate": 9.088545428282537e-05, "loss": 3.19, "step": 3878 }, { "epoch": 0.55, "learning_rate": 9.085656507294525e-05, "loss": 3.405, "step": 3879 }, { "epoch": 0.55, "learning_rate": 9.082767586306516e-05, "loss": 3.3027, "step": 3880 }, { "epoch": 0.55, "learning_rate": 9.079878665318504e-05, "loss": 3.1167, "step": 3881 }, { "epoch": 0.55, "learning_rate": 9.076989744330493e-05, "loss": 3.2915, "step": 3882 }, { "epoch": 0.55, "learning_rate": 9.074100823342482e-05, "loss": 3.2832, "step": 3883 }, { "epoch": 0.55, "learning_rate": 9.07121190235447e-05, "loss": 3.032, "step": 3884 }, { "epoch": 0.55, "learning_rate": 9.068322981366461e-05, "loss": 3.1557, "step": 3885 }, { "epoch": 0.55, "learning_rate": 9.065434060378449e-05, "loss": 3.199, "step": 3886 }, { "epoch": 0.55, "learning_rate": 9.062545139390439e-05, "loss": 3.2869, "step": 3887 }, { "epoch": 0.55, "learning_rate": 9.059656218402427e-05, "loss": 3.3465, "step": 3888 }, { "epoch": 0.55, "learning_rate": 9.056767297414416e-05, "loss": 3.2342, "step": 3889 }, { "epoch": 0.55, "learning_rate": 9.053878376426405e-05, "loss": 3.2444, "step": 3890 }, { "epoch": 0.55, "learning_rate": 9.050989455438395e-05, "loss": 3.198, "step": 3891 }, { "epoch": 0.55, "learning_rate": 9.048100534450383e-05, "loss": 3.2915, "step": 3892 }, { "epoch": 0.55, "learning_rate": 9.045211613462372e-05, "loss": 3.181, "step": 3893 }, { "epoch": 0.55, "learning_rate": 9.042322692474362e-05, "loss": 3.3077, "step": 3894 }, { "epoch": 0.55, "learning_rate": 9.03943377148635e-05, "loss": 3.298, "step": 3895 }, { "epoch": 0.55, "learning_rate": 9.03654485049834e-05, "loss": 3.032, "step": 3896 }, { "epoch": 0.55, "learning_rate": 9.033655929510328e-05, "loss": 3.2134, "step": 3897 }, { "epoch": 0.55, "learning_rate": 9.030767008522318e-05, "loss": 3.2998, "step": 3898 }, { "epoch": 0.56, "learning_rate": 9.027878087534306e-05, "loss": 3.1998, "step": 3899 }, { "epoch": 0.56, "learning_rate": 9.024989166546295e-05, "loss": 3.1195, "step": 3900 }, { "epoch": 0.56, "learning_rate": 9.022100245558285e-05, "loss": 3.2613, "step": 3901 }, { "epoch": 0.56, "learning_rate": 9.019211324570274e-05, "loss": 3.364, "step": 3902 }, { "epoch": 0.56, "learning_rate": 9.016322403582262e-05, "loss": 3.2895, "step": 3903 }, { "epoch": 0.56, "learning_rate": 9.013433482594251e-05, "loss": 3.2736, "step": 3904 }, { "epoch": 0.56, "learning_rate": 9.01054456160624e-05, "loss": 3.3313, "step": 3905 }, { "epoch": 0.56, "learning_rate": 9.00765564061823e-05, "loss": 3.2997, "step": 3906 }, { "epoch": 0.56, "learning_rate": 9.00476671963022e-05, "loss": 3.3214, "step": 3907 }, { "epoch": 0.56, "learning_rate": 9.001877798642207e-05, "loss": 3.3161, "step": 3908 }, { "epoch": 0.56, "learning_rate": 8.998988877654197e-05, "loss": 3.3021, "step": 3909 }, { "epoch": 0.56, "learning_rate": 8.996099956666185e-05, "loss": 3.3702, "step": 3910 }, { "epoch": 0.56, "learning_rate": 8.993211035678176e-05, "loss": 3.2741, "step": 3911 }, { "epoch": 0.56, "learning_rate": 8.990322114690164e-05, "loss": 3.289, "step": 3912 }, { "epoch": 0.56, "learning_rate": 8.987433193702153e-05, "loss": 3.3696, "step": 3913 }, { "epoch": 0.56, "learning_rate": 8.984544272714141e-05, "loss": 3.3681, "step": 3914 }, { "epoch": 0.56, "learning_rate": 8.98165535172613e-05, "loss": 3.2758, "step": 3915 }, { "epoch": 0.56, "learning_rate": 8.97876643073812e-05, "loss": 3.2286, "step": 3916 }, { "epoch": 0.56, "learning_rate": 8.975877509750109e-05, "loss": 3.2878, "step": 3917 }, { "epoch": 0.56, "learning_rate": 8.972988588762099e-05, "loss": 3.4119, "step": 3918 }, { "epoch": 0.56, "learning_rate": 8.970099667774087e-05, "loss": 3.2123, "step": 3919 }, { "epoch": 0.56, "learning_rate": 8.967210746786076e-05, "loss": 3.0235, "step": 3920 }, { "epoch": 0.56, "learning_rate": 8.964321825798064e-05, "loss": 3.3914, "step": 3921 }, { "epoch": 0.56, "learning_rate": 8.961432904810055e-05, "loss": 3.1288, "step": 3922 }, { "epoch": 0.56, "learning_rate": 8.958543983822043e-05, "loss": 3.238, "step": 3923 }, { "epoch": 0.56, "learning_rate": 8.955655062834032e-05, "loss": 3.1957, "step": 3924 }, { "epoch": 0.56, "learning_rate": 8.95276614184602e-05, "loss": 3.249, "step": 3925 }, { "epoch": 0.56, "learning_rate": 8.94987722085801e-05, "loss": 3.181, "step": 3926 }, { "epoch": 0.56, "learning_rate": 8.946988299869999e-05, "loss": 3.281, "step": 3927 }, { "epoch": 0.56, "learning_rate": 8.944099378881988e-05, "loss": 3.2452, "step": 3928 }, { "epoch": 0.56, "learning_rate": 8.941210457893976e-05, "loss": 3.2608, "step": 3929 }, { "epoch": 0.56, "learning_rate": 8.938321536905966e-05, "loss": 3.3254, "step": 3930 }, { "epoch": 0.56, "learning_rate": 8.935432615917955e-05, "loss": 3.3185, "step": 3931 }, { "epoch": 0.56, "learning_rate": 8.932543694929945e-05, "loss": 3.2227, "step": 3932 }, { "epoch": 0.56, "learning_rate": 8.929654773941934e-05, "loss": 3.2378, "step": 3933 }, { "epoch": 0.56, "learning_rate": 8.926765852953922e-05, "loss": 3.3324, "step": 3934 }, { "epoch": 0.56, "learning_rate": 8.923876931965911e-05, "loss": 3.4224, "step": 3935 }, { "epoch": 0.56, "learning_rate": 8.9209880109779e-05, "loss": 3.2827, "step": 3936 }, { "epoch": 0.56, "learning_rate": 8.91809908998989e-05, "loss": 3.2855, "step": 3937 }, { "epoch": 0.56, "learning_rate": 8.915210169001878e-05, "loss": 3.0395, "step": 3938 }, { "epoch": 0.56, "learning_rate": 8.912321248013867e-05, "loss": 3.3829, "step": 3939 }, { "epoch": 0.56, "learning_rate": 8.909432327025856e-05, "loss": 3.2371, "step": 3940 }, { "epoch": 0.56, "learning_rate": 8.906543406037845e-05, "loss": 3.2505, "step": 3941 }, { "epoch": 0.56, "learning_rate": 8.903654485049834e-05, "loss": 3.3156, "step": 3942 }, { "epoch": 0.56, "learning_rate": 8.900765564061824e-05, "loss": 3.0712, "step": 3943 }, { "epoch": 0.56, "learning_rate": 8.897876643073813e-05, "loss": 3.3497, "step": 3944 }, { "epoch": 0.56, "learning_rate": 8.894987722085801e-05, "loss": 3.2546, "step": 3945 }, { "epoch": 0.56, "learning_rate": 8.89209880109779e-05, "loss": 3.153, "step": 3946 }, { "epoch": 0.56, "learning_rate": 8.889209880109778e-05, "loss": 3.0865, "step": 3947 }, { "epoch": 0.56, "learning_rate": 8.886320959121769e-05, "loss": 3.2698, "step": 3948 }, { "epoch": 0.56, "learning_rate": 8.883432038133757e-05, "loss": 3.2959, "step": 3949 }, { "epoch": 0.56, "learning_rate": 8.880543117145747e-05, "loss": 3.2559, "step": 3950 }, { "epoch": 0.56, "learning_rate": 8.877654196157735e-05, "loss": 3.3895, "step": 3951 }, { "epoch": 0.56, "learning_rate": 8.874765275169724e-05, "loss": 3.3017, "step": 3952 }, { "epoch": 0.56, "learning_rate": 8.871876354181713e-05, "loss": 3.1077, "step": 3953 }, { "epoch": 0.56, "learning_rate": 8.868987433193703e-05, "loss": 3.1145, "step": 3954 }, { "epoch": 0.56, "learning_rate": 8.866098512205692e-05, "loss": 3.0593, "step": 3955 }, { "epoch": 0.56, "learning_rate": 8.86320959121768e-05, "loss": 3.1752, "step": 3956 }, { "epoch": 0.56, "learning_rate": 8.86032067022967e-05, "loss": 3.1846, "step": 3957 }, { "epoch": 0.56, "learning_rate": 8.857431749241659e-05, "loss": 3.3596, "step": 3958 }, { "epoch": 0.56, "learning_rate": 8.854542828253648e-05, "loss": 3.2812, "step": 3959 }, { "epoch": 0.56, "learning_rate": 8.851653907265636e-05, "loss": 3.1094, "step": 3960 }, { "epoch": 0.56, "learning_rate": 8.848764986277626e-05, "loss": 3.2295, "step": 3961 }, { "epoch": 0.56, "learning_rate": 8.845876065289614e-05, "loss": 3.2622, "step": 3962 }, { "epoch": 0.56, "learning_rate": 8.842987144301603e-05, "loss": 3.0867, "step": 3963 }, { "epoch": 0.56, "learning_rate": 8.840098223313593e-05, "loss": 3.2774, "step": 3964 }, { "epoch": 0.56, "learning_rate": 8.837209302325582e-05, "loss": 3.3415, "step": 3965 }, { "epoch": 0.56, "learning_rate": 8.834320381337571e-05, "loss": 3.2803, "step": 3966 }, { "epoch": 0.56, "learning_rate": 8.83143146034956e-05, "loss": 3.3382, "step": 3967 }, { "epoch": 0.56, "learning_rate": 8.828542539361549e-05, "loss": 3.2908, "step": 3968 }, { "epoch": 0.57, "learning_rate": 8.825653618373538e-05, "loss": 3.2303, "step": 3969 }, { "epoch": 0.57, "learning_rate": 8.822764697385528e-05, "loss": 3.2081, "step": 3970 }, { "epoch": 0.57, "learning_rate": 8.819875776397516e-05, "loss": 3.1726, "step": 3971 }, { "epoch": 0.57, "learning_rate": 8.816986855409505e-05, "loss": 3.2753, "step": 3972 }, { "epoch": 0.57, "learning_rate": 8.814097934421493e-05, "loss": 3.1992, "step": 3973 }, { "epoch": 0.57, "learning_rate": 8.811209013433484e-05, "loss": 3.1367, "step": 3974 }, { "epoch": 0.57, "learning_rate": 8.808320092445472e-05, "loss": 3.3159, "step": 3975 }, { "epoch": 0.57, "learning_rate": 8.805431171457461e-05, "loss": 3.3195, "step": 3976 }, { "epoch": 0.57, "learning_rate": 8.80254225046945e-05, "loss": 3.3893, "step": 3977 }, { "epoch": 0.57, "learning_rate": 8.799653329481438e-05, "loss": 3.2885, "step": 3978 }, { "epoch": 0.57, "learning_rate": 8.796764408493429e-05, "loss": 3.2368, "step": 3979 }, { "epoch": 0.57, "learning_rate": 8.793875487505417e-05, "loss": 3.2786, "step": 3980 }, { "epoch": 0.57, "learning_rate": 8.790986566517407e-05, "loss": 3.1629, "step": 3981 }, { "epoch": 0.57, "learning_rate": 8.788097645529395e-05, "loss": 3.1387, "step": 3982 }, { "epoch": 0.57, "learning_rate": 8.785208724541384e-05, "loss": 3.2488, "step": 3983 }, { "epoch": 0.57, "learning_rate": 8.782319803553372e-05, "loss": 3.2105, "step": 3984 }, { "epoch": 0.57, "learning_rate": 8.779430882565363e-05, "loss": 3.2135, "step": 3985 }, { "epoch": 0.57, "learning_rate": 8.776541961577351e-05, "loss": 3.1729, "step": 3986 }, { "epoch": 0.57, "learning_rate": 8.77365304058934e-05, "loss": 3.286, "step": 3987 }, { "epoch": 0.57, "learning_rate": 8.77076411960133e-05, "loss": 3.224, "step": 3988 }, { "epoch": 0.57, "learning_rate": 8.767875198613318e-05, "loss": 3.298, "step": 3989 }, { "epoch": 0.57, "learning_rate": 8.764986277625308e-05, "loss": 3.1093, "step": 3990 }, { "epoch": 0.57, "learning_rate": 8.762097356637296e-05, "loss": 3.1439, "step": 3991 }, { "epoch": 0.57, "learning_rate": 8.759208435649286e-05, "loss": 3.3523, "step": 3992 }, { "epoch": 0.57, "learning_rate": 8.756319514661274e-05, "loss": 3.172, "step": 3993 }, { "epoch": 0.57, "learning_rate": 8.753430593673263e-05, "loss": 3.3619, "step": 3994 }, { "epoch": 0.57, "learning_rate": 8.750541672685253e-05, "loss": 3.366, "step": 3995 }, { "epoch": 0.57, "learning_rate": 8.747652751697242e-05, "loss": 3.3856, "step": 3996 }, { "epoch": 0.57, "learning_rate": 8.74476383070923e-05, "loss": 3.2243, "step": 3997 }, { "epoch": 0.57, "learning_rate": 8.74187490972122e-05, "loss": 3.2988, "step": 3998 }, { "epoch": 0.57, "learning_rate": 8.738985988733209e-05, "loss": 3.3289, "step": 3999 }, { "epoch": 0.57, "learning_rate": 8.736097067745198e-05, "loss": 3.259, "step": 4000 }, { "epoch": 0.57, "learning_rate": 8.733208146757188e-05, "loss": 3.258, "step": 4001 }, { "epoch": 0.57, "learning_rate": 8.730319225769176e-05, "loss": 3.406, "step": 4002 }, { "epoch": 0.57, "learning_rate": 8.727430304781165e-05, "loss": 3.3033, "step": 4003 }, { "epoch": 0.57, "learning_rate": 8.724541383793153e-05, "loss": 3.2368, "step": 4004 }, { "epoch": 0.57, "learning_rate": 8.721652462805142e-05, "loss": 3.0177, "step": 4005 }, { "epoch": 0.57, "learning_rate": 8.718763541817132e-05, "loss": 3.2402, "step": 4006 }, { "epoch": 0.57, "learning_rate": 8.715874620829121e-05, "loss": 3.1792, "step": 4007 }, { "epoch": 0.57, "learning_rate": 8.712985699841109e-05, "loss": 3.092, "step": 4008 }, { "epoch": 0.57, "learning_rate": 8.710096778853099e-05, "loss": 3.124, "step": 4009 }, { "epoch": 0.57, "learning_rate": 8.707207857865088e-05, "loss": 3.3448, "step": 4010 }, { "epoch": 0.57, "learning_rate": 8.704318936877077e-05, "loss": 3.331, "step": 4011 }, { "epoch": 0.57, "learning_rate": 8.701430015889067e-05, "loss": 3.339, "step": 4012 }, { "epoch": 0.57, "learning_rate": 8.698541094901055e-05, "loss": 3.1541, "step": 4013 }, { "epoch": 0.57, "learning_rate": 8.695652173913044e-05, "loss": 3.3561, "step": 4014 }, { "epoch": 0.57, "learning_rate": 8.692763252925032e-05, "loss": 3.1285, "step": 4015 }, { "epoch": 0.57, "learning_rate": 8.689874331937023e-05, "loss": 3.4157, "step": 4016 }, { "epoch": 0.57, "learning_rate": 8.686985410949011e-05, "loss": 3.2565, "step": 4017 }, { "epoch": 0.57, "learning_rate": 8.684096489961e-05, "loss": 3.2633, "step": 4018 }, { "epoch": 0.57, "learning_rate": 8.681207568972988e-05, "loss": 3.2736, "step": 4019 }, { "epoch": 0.57, "learning_rate": 8.678318647984978e-05, "loss": 3.2032, "step": 4020 }, { "epoch": 0.57, "learning_rate": 8.675429726996967e-05, "loss": 3.2755, "step": 4021 }, { "epoch": 0.57, "learning_rate": 8.672540806008956e-05, "loss": 3.0297, "step": 4022 }, { "epoch": 0.57, "learning_rate": 8.669651885020946e-05, "loss": 3.1787, "step": 4023 }, { "epoch": 0.57, "learning_rate": 8.666762964032934e-05, "loss": 3.2807, "step": 4024 }, { "epoch": 0.57, "learning_rate": 8.663874043044923e-05, "loss": 3.252, "step": 4025 }, { "epoch": 0.57, "learning_rate": 8.660985122056913e-05, "loss": 3.2084, "step": 4026 }, { "epoch": 0.57, "learning_rate": 8.658096201068902e-05, "loss": 3.2354, "step": 4027 }, { "epoch": 0.57, "learning_rate": 8.65520728008089e-05, "loss": 3.2249, "step": 4028 }, { "epoch": 0.57, "learning_rate": 8.65231835909288e-05, "loss": 3.2624, "step": 4029 }, { "epoch": 0.57, "learning_rate": 8.649429438104867e-05, "loss": 3.2647, "step": 4030 }, { "epoch": 0.57, "learning_rate": 8.646540517116857e-05, "loss": 3.3059, "step": 4031 }, { "epoch": 0.57, "learning_rate": 8.643651596128846e-05, "loss": 3.3355, "step": 4032 }, { "epoch": 0.57, "learning_rate": 8.640762675140836e-05, "loss": 3.2986, "step": 4033 }, { "epoch": 0.57, "learning_rate": 8.637873754152825e-05, "loss": 3.4018, "step": 4034 }, { "epoch": 0.57, "learning_rate": 8.634984833164813e-05, "loss": 3.2099, "step": 4035 }, { "epoch": 0.57, "learning_rate": 8.632095912176802e-05, "loss": 3.2506, "step": 4036 }, { "epoch": 0.57, "learning_rate": 8.629206991188792e-05, "loss": 3.0363, "step": 4037 }, { "epoch": 0.57, "learning_rate": 8.626318070200781e-05, "loss": 3.3057, "step": 4038 }, { "epoch": 0.58, "learning_rate": 8.623429149212769e-05, "loss": 3.174, "step": 4039 }, { "epoch": 0.58, "learning_rate": 8.620540228224759e-05, "loss": 3.1564, "step": 4040 }, { "epoch": 0.58, "learning_rate": 8.617651307236747e-05, "loss": 3.2195, "step": 4041 }, { "epoch": 0.58, "learning_rate": 8.614762386248737e-05, "loss": 3.3382, "step": 4042 }, { "epoch": 0.58, "learning_rate": 8.611873465260725e-05, "loss": 3.2327, "step": 4043 }, { "epoch": 0.58, "learning_rate": 8.608984544272715e-05, "loss": 3.2639, "step": 4044 }, { "epoch": 0.58, "learning_rate": 8.606095623284704e-05, "loss": 3.3016, "step": 4045 }, { "epoch": 0.58, "learning_rate": 8.603206702296692e-05, "loss": 3.2926, "step": 4046 }, { "epoch": 0.58, "learning_rate": 8.600317781308681e-05, "loss": 3.277, "step": 4047 }, { "epoch": 0.58, "learning_rate": 8.597428860320671e-05, "loss": 3.0697, "step": 4048 }, { "epoch": 0.58, "learning_rate": 8.59453993933266e-05, "loss": 3.2062, "step": 4049 }, { "epoch": 0.58, "learning_rate": 8.591651018344648e-05, "loss": 3.2135, "step": 4050 }, { "epoch": 0.58, "learning_rate": 8.588762097356638e-05, "loss": 3.3941, "step": 4051 }, { "epoch": 0.58, "learning_rate": 8.585873176368626e-05, "loss": 3.2611, "step": 4052 }, { "epoch": 0.58, "learning_rate": 8.582984255380616e-05, "loss": 3.1288, "step": 4053 }, { "epoch": 0.58, "learning_rate": 8.580095334392604e-05, "loss": 3.1586, "step": 4054 }, { "epoch": 0.58, "learning_rate": 8.577206413404594e-05, "loss": 3.2621, "step": 4055 }, { "epoch": 0.58, "learning_rate": 8.574317492416582e-05, "loss": 3.1419, "step": 4056 }, { "epoch": 0.58, "learning_rate": 8.571428571428571e-05, "loss": 3.2366, "step": 4057 }, { "epoch": 0.58, "learning_rate": 8.56853965044056e-05, "loss": 3.0991, "step": 4058 }, { "epoch": 0.58, "learning_rate": 8.56565072945255e-05, "loss": 3.2461, "step": 4059 }, { "epoch": 0.58, "learning_rate": 8.56276180846454e-05, "loss": 3.157, "step": 4060 }, { "epoch": 0.58, "learning_rate": 8.559872887476527e-05, "loss": 3.279, "step": 4061 }, { "epoch": 0.58, "learning_rate": 8.556983966488517e-05, "loss": 3.3706, "step": 4062 }, { "epoch": 0.58, "learning_rate": 8.554095045500506e-05, "loss": 3.2767, "step": 4063 }, { "epoch": 0.58, "learning_rate": 8.551206124512496e-05, "loss": 3.2843, "step": 4064 }, { "epoch": 0.58, "learning_rate": 8.548317203524484e-05, "loss": 3.2912, "step": 4065 }, { "epoch": 0.58, "learning_rate": 8.545428282536473e-05, "loss": 3.3588, "step": 4066 }, { "epoch": 0.58, "learning_rate": 8.542539361548461e-05, "loss": 3.1223, "step": 4067 }, { "epoch": 0.58, "learning_rate": 8.539650440560452e-05, "loss": 3.1939, "step": 4068 }, { "epoch": 0.58, "learning_rate": 8.53676151957244e-05, "loss": 3.2885, "step": 4069 }, { "epoch": 0.58, "learning_rate": 8.533872598584429e-05, "loss": 3.1524, "step": 4070 }, { "epoch": 0.58, "learning_rate": 8.530983677596419e-05, "loss": 3.2319, "step": 4071 }, { "epoch": 0.58, "learning_rate": 8.528094756608407e-05, "loss": 3.2579, "step": 4072 }, { "epoch": 0.58, "learning_rate": 8.525205835620396e-05, "loss": 3.2752, "step": 4073 }, { "epoch": 0.58, "learning_rate": 8.522316914632385e-05, "loss": 3.1005, "step": 4074 }, { "epoch": 0.58, "learning_rate": 8.519427993644375e-05, "loss": 3.2904, "step": 4075 }, { "epoch": 0.58, "learning_rate": 8.516539072656363e-05, "loss": 3.2652, "step": 4076 }, { "epoch": 0.58, "learning_rate": 8.513650151668352e-05, "loss": 3.3645, "step": 4077 }, { "epoch": 0.58, "learning_rate": 8.51076123068034e-05, "loss": 3.3647, "step": 4078 }, { "epoch": 0.58, "learning_rate": 8.507872309692331e-05, "loss": 3.197, "step": 4079 }, { "epoch": 0.58, "learning_rate": 8.504983388704319e-05, "loss": 3.2224, "step": 4080 }, { "epoch": 0.58, "learning_rate": 8.502094467716308e-05, "loss": 3.2377, "step": 4081 }, { "epoch": 0.58, "learning_rate": 8.499205546728298e-05, "loss": 3.2946, "step": 4082 }, { "epoch": 0.58, "learning_rate": 8.496316625740286e-05, "loss": 3.2442, "step": 4083 }, { "epoch": 0.58, "learning_rate": 8.493427704752276e-05, "loss": 3.2016, "step": 4084 }, { "epoch": 0.58, "learning_rate": 8.490538783764264e-05, "loss": 3.2061, "step": 4085 }, { "epoch": 0.58, "learning_rate": 8.487649862776254e-05, "loss": 3.3251, "step": 4086 }, { "epoch": 0.58, "learning_rate": 8.484760941788242e-05, "loss": 3.3625, "step": 4087 }, { "epoch": 0.58, "learning_rate": 8.481872020800231e-05, "loss": 3.0163, "step": 4088 }, { "epoch": 0.58, "learning_rate": 8.47898309981222e-05, "loss": 3.1424, "step": 4089 }, { "epoch": 0.58, "learning_rate": 8.47609417882421e-05, "loss": 3.3728, "step": 4090 }, { "epoch": 0.58, "learning_rate": 8.473205257836198e-05, "loss": 3.2705, "step": 4091 }, { "epoch": 0.58, "learning_rate": 8.470316336848187e-05, "loss": 3.3873, "step": 4092 }, { "epoch": 0.58, "learning_rate": 8.467427415860177e-05, "loss": 3.3183, "step": 4093 }, { "epoch": 0.58, "learning_rate": 8.464538494872165e-05, "loss": 3.2233, "step": 4094 }, { "epoch": 0.58, "learning_rate": 8.461649573884156e-05, "loss": 3.2937, "step": 4095 }, { "epoch": 0.58, "learning_rate": 8.458760652896144e-05, "loss": 3.3, "step": 4096 }, { "epoch": 0.58, "learning_rate": 8.455871731908133e-05, "loss": 3.148, "step": 4097 }, { "epoch": 0.58, "learning_rate": 8.452982810920121e-05, "loss": 3.2463, "step": 4098 }, { "epoch": 0.58, "learning_rate": 8.45009388993211e-05, "loss": 3.2626, "step": 4099 }, { "epoch": 0.58, "learning_rate": 8.4472049689441e-05, "loss": 3.2149, "step": 4100 }, { "epoch": 0.58, "learning_rate": 8.444316047956089e-05, "loss": 3.2738, "step": 4101 }, { "epoch": 0.58, "learning_rate": 8.441427126968077e-05, "loss": 3.258, "step": 4102 }, { "epoch": 0.58, "learning_rate": 8.438538205980067e-05, "loss": 3.2738, "step": 4103 }, { "epoch": 0.58, "learning_rate": 8.435649284992056e-05, "loss": 3.14, "step": 4104 }, { "epoch": 0.58, "learning_rate": 8.432760364004045e-05, "loss": 3.409, "step": 4105 }, { "epoch": 0.58, "learning_rate": 8.429871443016035e-05, "loss": 3.2291, "step": 4106 }, { "epoch": 0.58, "learning_rate": 8.426982522028023e-05, "loss": 3.0735, "step": 4107 }, { "epoch": 0.58, "learning_rate": 8.424093601040012e-05, "loss": 3.3047, "step": 4108 }, { "epoch": 0.59, "learning_rate": 8.421204680052e-05, "loss": 3.0758, "step": 4109 }, { "epoch": 0.59, "learning_rate": 8.418315759063991e-05, "loss": 3.1981, "step": 4110 }, { "epoch": 0.59, "learning_rate": 8.415426838075979e-05, "loss": 3.2431, "step": 4111 }, { "epoch": 0.59, "learning_rate": 8.412537917087968e-05, "loss": 3.2672, "step": 4112 }, { "epoch": 0.59, "learning_rate": 8.409648996099956e-05, "loss": 3.1203, "step": 4113 }, { "epoch": 0.59, "learning_rate": 8.406760075111946e-05, "loss": 3.1629, "step": 4114 }, { "epoch": 0.59, "learning_rate": 8.403871154123935e-05, "loss": 3.2416, "step": 4115 }, { "epoch": 0.59, "learning_rate": 8.400982233135925e-05, "loss": 3.074, "step": 4116 }, { "epoch": 0.59, "learning_rate": 8.398093312147914e-05, "loss": 3.3175, "step": 4117 }, { "epoch": 0.59, "learning_rate": 8.395204391159902e-05, "loss": 3.2744, "step": 4118 }, { "epoch": 0.59, "learning_rate": 8.392315470171891e-05, "loss": 3.289, "step": 4119 }, { "epoch": 0.59, "learning_rate": 8.389426549183879e-05, "loss": 3.2475, "step": 4120 }, { "epoch": 0.59, "learning_rate": 8.38653762819587e-05, "loss": 3.3102, "step": 4121 }, { "epoch": 0.59, "learning_rate": 8.383648707207858e-05, "loss": 3.3482, "step": 4122 }, { "epoch": 0.59, "learning_rate": 8.380759786219847e-05, "loss": 3.2501, "step": 4123 }, { "epoch": 0.59, "learning_rate": 8.377870865231835e-05, "loss": 3.0875, "step": 4124 }, { "epoch": 0.59, "learning_rate": 8.374981944243825e-05, "loss": 3.3484, "step": 4125 }, { "epoch": 0.59, "learning_rate": 8.372093023255814e-05, "loss": 3.3203, "step": 4126 }, { "epoch": 0.59, "learning_rate": 8.369204102267804e-05, "loss": 2.9373, "step": 4127 }, { "epoch": 0.59, "learning_rate": 8.366315181279793e-05, "loss": 3.3178, "step": 4128 }, { "epoch": 0.59, "learning_rate": 8.363426260291781e-05, "loss": 3.1093, "step": 4129 }, { "epoch": 0.59, "learning_rate": 8.36053733930377e-05, "loss": 3.1304, "step": 4130 }, { "epoch": 0.59, "learning_rate": 8.35764841831576e-05, "loss": 3.1829, "step": 4131 }, { "epoch": 0.59, "learning_rate": 8.354759497327749e-05, "loss": 3.3268, "step": 4132 }, { "epoch": 0.59, "learning_rate": 8.351870576339737e-05, "loss": 3.2223, "step": 4133 }, { "epoch": 0.59, "learning_rate": 8.348981655351727e-05, "loss": 3.1491, "step": 4134 }, { "epoch": 0.59, "learning_rate": 8.346092734363715e-05, "loss": 3.3163, "step": 4135 }, { "epoch": 0.59, "learning_rate": 8.343203813375705e-05, "loss": 3.1891, "step": 4136 }, { "epoch": 0.59, "learning_rate": 8.340314892387693e-05, "loss": 3.2958, "step": 4137 }, { "epoch": 0.59, "learning_rate": 8.337425971399683e-05, "loss": 3.2614, "step": 4138 }, { "epoch": 0.59, "learning_rate": 8.334537050411672e-05, "loss": 3.1945, "step": 4139 }, { "epoch": 0.59, "learning_rate": 8.33164812942366e-05, "loss": 3.2473, "step": 4140 }, { "epoch": 0.59, "learning_rate": 8.32875920843565e-05, "loss": 3.3148, "step": 4141 }, { "epoch": 0.59, "learning_rate": 8.325870287447639e-05, "loss": 3.2322, "step": 4142 }, { "epoch": 0.59, "learning_rate": 8.322981366459628e-05, "loss": 3.1535, "step": 4143 }, { "epoch": 0.59, "learning_rate": 8.320092445471616e-05, "loss": 3.348, "step": 4144 }, { "epoch": 0.59, "learning_rate": 8.317203524483606e-05, "loss": 3.3239, "step": 4145 }, { "epoch": 0.59, "learning_rate": 8.314314603495594e-05, "loss": 3.2792, "step": 4146 }, { "epoch": 0.59, "learning_rate": 8.311425682507585e-05, "loss": 3.249, "step": 4147 }, { "epoch": 0.59, "learning_rate": 8.308536761519573e-05, "loss": 3.2805, "step": 4148 }, { "epoch": 0.59, "learning_rate": 8.305647840531562e-05, "loss": 3.2893, "step": 4149 }, { "epoch": 0.59, "learning_rate": 8.302758919543551e-05, "loss": 3.2423, "step": 4150 }, { "epoch": 0.59, "learning_rate": 8.29986999855554e-05, "loss": 3.2868, "step": 4151 }, { "epoch": 0.59, "learning_rate": 8.29698107756753e-05, "loss": 3.3046, "step": 4152 }, { "epoch": 0.59, "learning_rate": 8.294092156579518e-05, "loss": 3.0475, "step": 4153 }, { "epoch": 0.59, "learning_rate": 8.291203235591507e-05, "loss": 3.4062, "step": 4154 }, { "epoch": 0.59, "learning_rate": 8.288314314603496e-05, "loss": 3.2152, "step": 4155 }, { "epoch": 0.59, "learning_rate": 8.285425393615485e-05, "loss": 3.3383, "step": 4156 }, { "epoch": 0.59, "learning_rate": 8.282536472627474e-05, "loss": 3.2178, "step": 4157 }, { "epoch": 0.59, "learning_rate": 8.279647551639464e-05, "loss": 3.221, "step": 4158 }, { "epoch": 0.59, "learning_rate": 8.276758630651452e-05, "loss": 3.2245, "step": 4159 }, { "epoch": 0.59, "learning_rate": 8.273869709663441e-05, "loss": 3.2269, "step": 4160 }, { "epoch": 0.59, "learning_rate": 8.27098078867543e-05, "loss": 3.2183, "step": 4161 }, { "epoch": 0.59, "learning_rate": 8.268091867687418e-05, "loss": 3.1523, "step": 4162 }, { "epoch": 0.59, "learning_rate": 8.265202946699408e-05, "loss": 3.1758, "step": 4163 }, { "epoch": 0.59, "learning_rate": 8.262314025711397e-05, "loss": 3.2164, "step": 4164 }, { "epoch": 0.59, "learning_rate": 8.259425104723387e-05, "loss": 3.2022, "step": 4165 }, { "epoch": 0.59, "learning_rate": 8.256536183735375e-05, "loss": 3.2189, "step": 4166 }, { "epoch": 0.59, "learning_rate": 8.253647262747364e-05, "loss": 3.2521, "step": 4167 }, { "epoch": 0.59, "learning_rate": 8.250758341759353e-05, "loss": 3.2237, "step": 4168 }, { "epoch": 0.59, "learning_rate": 8.247869420771343e-05, "loss": 3.2787, "step": 4169 }, { "epoch": 0.59, "learning_rate": 8.244980499783331e-05, "loss": 3.1623, "step": 4170 }, { "epoch": 0.59, "learning_rate": 8.24209157879532e-05, "loss": 3.2421, "step": 4171 }, { "epoch": 0.59, "learning_rate": 8.23920265780731e-05, "loss": 3.239, "step": 4172 }, { "epoch": 0.59, "learning_rate": 8.236313736819299e-05, "loss": 3.2289, "step": 4173 }, { "epoch": 0.59, "learning_rate": 8.233424815831287e-05, "loss": 3.2067, "step": 4174 }, { "epoch": 0.59, "learning_rate": 8.230535894843276e-05, "loss": 3.1906, "step": 4175 }, { "epoch": 0.59, "learning_rate": 8.227646973855266e-05, "loss": 3.3086, "step": 4176 }, { "epoch": 0.59, "learning_rate": 8.224758052867254e-05, "loss": 3.205, "step": 4177 }, { "epoch": 0.59, "learning_rate": 8.221869131879245e-05, "loss": 3.1586, "step": 4178 }, { "epoch": 0.6, "learning_rate": 8.218980210891233e-05, "loss": 3.2744, "step": 4179 }, { "epoch": 0.6, "learning_rate": 8.216091289903222e-05, "loss": 3.1454, "step": 4180 }, { "epoch": 0.6, "learning_rate": 8.21320236891521e-05, "loss": 3.2559, "step": 4181 }, { "epoch": 0.6, "learning_rate": 8.2103134479272e-05, "loss": 3.2432, "step": 4182 }, { "epoch": 0.6, "learning_rate": 8.207424526939189e-05, "loss": 3.2066, "step": 4183 }, { "epoch": 0.6, "learning_rate": 8.204535605951178e-05, "loss": 3.1526, "step": 4184 }, { "epoch": 0.6, "learning_rate": 8.201646684963166e-05, "loss": 3.1424, "step": 4185 }, { "epoch": 0.6, "learning_rate": 8.198757763975156e-05, "loss": 3.1921, "step": 4186 }, { "epoch": 0.6, "learning_rate": 8.195868842987145e-05, "loss": 3.1171, "step": 4187 }, { "epoch": 0.6, "learning_rate": 8.192979921999133e-05, "loss": 3.1883, "step": 4188 }, { "epoch": 0.6, "learning_rate": 8.190091001011124e-05, "loss": 3.3401, "step": 4189 }, { "epoch": 0.6, "learning_rate": 8.187202080023112e-05, "loss": 3.1864, "step": 4190 }, { "epoch": 0.6, "learning_rate": 8.184313159035101e-05, "loss": 3.259, "step": 4191 }, { "epoch": 0.6, "learning_rate": 8.181424238047089e-05, "loss": 3.1787, "step": 4192 }, { "epoch": 0.6, "learning_rate": 8.178535317059078e-05, "loss": 3.1158, "step": 4193 }, { "epoch": 0.6, "learning_rate": 8.175646396071068e-05, "loss": 3.2656, "step": 4194 }, { "epoch": 0.6, "learning_rate": 8.172757475083057e-05, "loss": 3.2065, "step": 4195 }, { "epoch": 0.6, "learning_rate": 8.169868554095045e-05, "loss": 3.2744, "step": 4196 }, { "epoch": 0.6, "learning_rate": 8.166979633107035e-05, "loss": 3.0871, "step": 4197 }, { "epoch": 0.6, "learning_rate": 8.164090712119024e-05, "loss": 3.3143, "step": 4198 }, { "epoch": 0.6, "learning_rate": 8.161201791131013e-05, "loss": 3.1667, "step": 4199 }, { "epoch": 0.6, "learning_rate": 8.158312870143003e-05, "loss": 3.2188, "step": 4200 }, { "epoch": 0.6, "learning_rate": 8.155423949154991e-05, "loss": 3.1783, "step": 4201 }, { "epoch": 0.6, "learning_rate": 8.15253502816698e-05, "loss": 3.2553, "step": 4202 }, { "epoch": 0.6, "learning_rate": 8.149646107178968e-05, "loss": 3.3402, "step": 4203 }, { "epoch": 0.6, "learning_rate": 8.146757186190958e-05, "loss": 3.2964, "step": 4204 }, { "epoch": 0.6, "learning_rate": 8.143868265202947e-05, "loss": 3.258, "step": 4205 }, { "epoch": 0.6, "learning_rate": 8.140979344214936e-05, "loss": 3.1899, "step": 4206 }, { "epoch": 0.6, "learning_rate": 8.138090423226924e-05, "loss": 3.2875, "step": 4207 }, { "epoch": 0.6, "learning_rate": 8.135201502238914e-05, "loss": 3.2679, "step": 4208 }, { "epoch": 0.6, "learning_rate": 8.132312581250903e-05, "loss": 3.0585, "step": 4209 }, { "epoch": 0.6, "learning_rate": 8.129423660262893e-05, "loss": 3.2907, "step": 4210 }, { "epoch": 0.6, "learning_rate": 8.126534739274882e-05, "loss": 3.1095, "step": 4211 }, { "epoch": 0.6, "learning_rate": 8.12364581828687e-05, "loss": 3.2634, "step": 4212 }, { "epoch": 0.6, "learning_rate": 8.12075689729886e-05, "loss": 3.3216, "step": 4213 }, { "epoch": 0.6, "learning_rate": 8.117867976310847e-05, "loss": 3.2927, "step": 4214 }, { "epoch": 0.6, "learning_rate": 8.114979055322838e-05, "loss": 3.2301, "step": 4215 }, { "epoch": 0.6, "learning_rate": 8.112090134334826e-05, "loss": 3.2072, "step": 4216 }, { "epoch": 0.6, "learning_rate": 8.109201213346816e-05, "loss": 3.283, "step": 4217 }, { "epoch": 0.6, "learning_rate": 8.106312292358804e-05, "loss": 3.1787, "step": 4218 }, { "epoch": 0.6, "learning_rate": 8.103423371370793e-05, "loss": 3.2416, "step": 4219 }, { "epoch": 0.6, "learning_rate": 8.100534450382782e-05, "loss": 3.0936, "step": 4220 }, { "epoch": 0.6, "learning_rate": 8.097645529394772e-05, "loss": 3.2907, "step": 4221 }, { "epoch": 0.6, "learning_rate": 8.094756608406761e-05, "loss": 3.1733, "step": 4222 }, { "epoch": 0.6, "learning_rate": 8.091867687418749e-05, "loss": 3.381, "step": 4223 }, { "epoch": 0.6, "learning_rate": 8.088978766430739e-05, "loss": 3.2477, "step": 4224 }, { "epoch": 0.6, "eval_loss": 3.454590082168579, "eval_runtime": 471.9004, "eval_samples_per_second": 43.414, "eval_steps_per_second": 14.471, "step": 4224 }, { "epoch": 0.6, "learning_rate": 8.086089845442728e-05, "loss": 3.1189, "step": 4225 }, { "epoch": 0.6, "learning_rate": 8.083200924454717e-05, "loss": 3.2583, "step": 4226 }, { "epoch": 0.6, "learning_rate": 8.080312003466705e-05, "loss": 3.3358, "step": 4227 }, { "epoch": 0.6, "learning_rate": 8.077423082478695e-05, "loss": 3.2218, "step": 4228 }, { "epoch": 0.6, "learning_rate": 8.074534161490683e-05, "loss": 3.2638, "step": 4229 }, { "epoch": 0.6, "learning_rate": 8.071645240502672e-05, "loss": 3.1001, "step": 4230 }, { "epoch": 0.6, "learning_rate": 8.068756319514661e-05, "loss": 3.1898, "step": 4231 }, { "epoch": 0.6, "learning_rate": 8.065867398526651e-05, "loss": 3.2099, "step": 4232 }, { "epoch": 0.6, "learning_rate": 8.06297847753864e-05, "loss": 3.2063, "step": 4233 }, { "epoch": 0.6, "learning_rate": 8.060089556550628e-05, "loss": 3.223, "step": 4234 }, { "epoch": 0.6, "learning_rate": 8.057200635562618e-05, "loss": 3.2809, "step": 4235 }, { "epoch": 0.6, "learning_rate": 8.054311714574607e-05, "loss": 3.2364, "step": 4236 }, { "epoch": 0.6, "learning_rate": 8.051422793586596e-05, "loss": 3.132, "step": 4237 }, { "epoch": 0.6, "learning_rate": 8.048533872598584e-05, "loss": 3.1899, "step": 4238 }, { "epoch": 0.6, "learning_rate": 8.045644951610574e-05, "loss": 3.2914, "step": 4239 }, { "epoch": 0.6, "learning_rate": 8.042756030622562e-05, "loss": 3.1883, "step": 4240 }, { "epoch": 0.6, "learning_rate": 8.039867109634553e-05, "loss": 3.1069, "step": 4241 }, { "epoch": 0.6, "learning_rate": 8.03697818864654e-05, "loss": 3.2493, "step": 4242 }, { "epoch": 0.6, "learning_rate": 8.03408926765853e-05, "loss": 3.1019, "step": 4243 }, { "epoch": 0.6, "learning_rate": 8.03120034667052e-05, "loss": 3.4186, "step": 4244 }, { "epoch": 0.6, "learning_rate": 8.028311425682507e-05, "loss": 2.9726, "step": 4245 }, { "epoch": 0.6, "learning_rate": 8.025422504694498e-05, "loss": 3.1249, "step": 4246 }, { "epoch": 0.6, "learning_rate": 8.022533583706486e-05, "loss": 3.0704, "step": 4247 }, { "epoch": 0.6, "learning_rate": 8.019644662718476e-05, "loss": 3.1305, "step": 4248 }, { "epoch": 0.6, "learning_rate": 8.016755741730464e-05, "loss": 3.2742, "step": 4249 }, { "epoch": 0.61, "learning_rate": 8.013866820742453e-05, "loss": 3.2609, "step": 4250 }, { "epoch": 0.61, "learning_rate": 8.010977899754441e-05, "loss": 3.2674, "step": 4251 }, { "epoch": 0.61, "learning_rate": 8.008088978766432e-05, "loss": 3.274, "step": 4252 }, { "epoch": 0.61, "learning_rate": 8.00520005777842e-05, "loss": 3.1981, "step": 4253 }, { "epoch": 0.61, "learning_rate": 8.002311136790409e-05, "loss": 3.1098, "step": 4254 }, { "epoch": 0.61, "learning_rate": 7.999422215802399e-05, "loss": 3.3066, "step": 4255 }, { "epoch": 0.61, "learning_rate": 7.996533294814387e-05, "loss": 3.2974, "step": 4256 }, { "epoch": 0.61, "learning_rate": 7.993644373826377e-05, "loss": 3.2273, "step": 4257 }, { "epoch": 0.61, "learning_rate": 7.990755452838365e-05, "loss": 3.3769, "step": 4258 }, { "epoch": 0.61, "learning_rate": 7.987866531850355e-05, "loss": 3.2165, "step": 4259 }, { "epoch": 0.61, "learning_rate": 7.984977610862343e-05, "loss": 3.2988, "step": 4260 }, { "epoch": 0.61, "learning_rate": 7.982088689874332e-05, "loss": 3.1474, "step": 4261 }, { "epoch": 0.61, "learning_rate": 7.979199768886321e-05, "loss": 3.1661, "step": 4262 }, { "epoch": 0.61, "learning_rate": 7.976310847898311e-05, "loss": 2.9669, "step": 4263 }, { "epoch": 0.61, "learning_rate": 7.973421926910299e-05, "loss": 3.1976, "step": 4264 }, { "epoch": 0.61, "learning_rate": 7.970533005922288e-05, "loss": 3.2131, "step": 4265 }, { "epoch": 0.61, "learning_rate": 7.967644084934278e-05, "loss": 3.2106, "step": 4266 }, { "epoch": 0.61, "learning_rate": 7.964755163946267e-05, "loss": 3.2483, "step": 4267 }, { "epoch": 0.61, "learning_rate": 7.961866242958256e-05, "loss": 3.3123, "step": 4268 }, { "epoch": 0.61, "learning_rate": 7.958977321970244e-05, "loss": 3.2434, "step": 4269 }, { "epoch": 0.61, "learning_rate": 7.956088400982234e-05, "loss": 3.2349, "step": 4270 }, { "epoch": 0.61, "learning_rate": 7.953199479994222e-05, "loss": 3.2684, "step": 4271 }, { "epoch": 0.61, "learning_rate": 7.950310559006211e-05, "loss": 3.3313, "step": 4272 }, { "epoch": 0.61, "learning_rate": 7.9474216380182e-05, "loss": 3.1969, "step": 4273 }, { "epoch": 0.61, "learning_rate": 7.94453271703019e-05, "loss": 3.1348, "step": 4274 }, { "epoch": 0.61, "learning_rate": 7.941643796042178e-05, "loss": 3.153, "step": 4275 }, { "epoch": 0.61, "learning_rate": 7.938754875054167e-05, "loss": 3.3107, "step": 4276 }, { "epoch": 0.61, "learning_rate": 7.935865954066157e-05, "loss": 2.9463, "step": 4277 }, { "epoch": 0.61, "learning_rate": 7.932977033078146e-05, "loss": 3.2237, "step": 4278 }, { "epoch": 0.61, "learning_rate": 7.930088112090136e-05, "loss": 3.2952, "step": 4279 }, { "epoch": 0.61, "learning_rate": 7.927199191102124e-05, "loss": 3.112, "step": 4280 }, { "epoch": 0.61, "learning_rate": 7.924310270114113e-05, "loss": 3.2988, "step": 4281 }, { "epoch": 0.61, "learning_rate": 7.921421349126101e-05, "loss": 3.2512, "step": 4282 }, { "epoch": 0.61, "learning_rate": 7.918532428138092e-05, "loss": 3.35, "step": 4283 }, { "epoch": 0.61, "learning_rate": 7.91564350715008e-05, "loss": 3.2903, "step": 4284 }, { "epoch": 0.61, "learning_rate": 7.912754586162069e-05, "loss": 3.2621, "step": 4285 }, { "epoch": 0.61, "learning_rate": 7.909865665174057e-05, "loss": 3.3227, "step": 4286 }, { "epoch": 0.61, "learning_rate": 7.906976744186047e-05, "loss": 3.1815, "step": 4287 }, { "epoch": 0.61, "learning_rate": 7.904087823198036e-05, "loss": 3.2774, "step": 4288 }, { "epoch": 0.61, "learning_rate": 7.901198902210025e-05, "loss": 3.2155, "step": 4289 }, { "epoch": 0.61, "learning_rate": 7.898309981222013e-05, "loss": 3.3794, "step": 4290 }, { "epoch": 0.61, "learning_rate": 7.895421060234003e-05, "loss": 3.351, "step": 4291 }, { "epoch": 0.61, "learning_rate": 7.892532139245992e-05, "loss": 3.1811, "step": 4292 }, { "epoch": 0.61, "learning_rate": 7.88964321825798e-05, "loss": 3.2645, "step": 4293 }, { "epoch": 0.61, "learning_rate": 7.886754297269971e-05, "loss": 3.2618, "step": 4294 }, { "epoch": 0.61, "learning_rate": 7.883865376281959e-05, "loss": 3.2929, "step": 4295 }, { "epoch": 0.61, "learning_rate": 7.880976455293948e-05, "loss": 3.3232, "step": 4296 }, { "epoch": 0.61, "learning_rate": 7.878087534305936e-05, "loss": 3.2274, "step": 4297 }, { "epoch": 0.61, "learning_rate": 7.875198613317926e-05, "loss": 3.2881, "step": 4298 }, { "epoch": 0.61, "learning_rate": 7.872309692329915e-05, "loss": 3.2647, "step": 4299 }, { "epoch": 0.61, "learning_rate": 7.869420771341904e-05, "loss": 3.3605, "step": 4300 }, { "epoch": 0.61, "learning_rate": 7.866531850353893e-05, "loss": 3.2639, "step": 4301 }, { "epoch": 0.61, "learning_rate": 7.863642929365882e-05, "loss": 3.3084, "step": 4302 }, { "epoch": 0.61, "learning_rate": 7.860754008377871e-05, "loss": 3.0701, "step": 4303 }, { "epoch": 0.61, "learning_rate": 7.85786508738986e-05, "loss": 3.2976, "step": 4304 }, { "epoch": 0.61, "learning_rate": 7.85497616640185e-05, "loss": 3.1141, "step": 4305 }, { "epoch": 0.61, "learning_rate": 7.852087245413838e-05, "loss": 3.0212, "step": 4306 }, { "epoch": 0.61, "learning_rate": 7.849198324425827e-05, "loss": 3.327, "step": 4307 }, { "epoch": 0.61, "learning_rate": 7.846309403437815e-05, "loss": 3.2982, "step": 4308 }, { "epoch": 0.61, "learning_rate": 7.843420482449806e-05, "loss": 3.1753, "step": 4309 }, { "epoch": 0.61, "learning_rate": 7.840531561461794e-05, "loss": 3.0708, "step": 4310 }, { "epoch": 0.61, "learning_rate": 7.837642640473784e-05, "loss": 3.3137, "step": 4311 }, { "epoch": 0.61, "learning_rate": 7.834753719485772e-05, "loss": 3.1944, "step": 4312 }, { "epoch": 0.61, "learning_rate": 7.831864798497761e-05, "loss": 3.2517, "step": 4313 }, { "epoch": 0.61, "learning_rate": 7.82897587750975e-05, "loss": 3.2313, "step": 4314 }, { "epoch": 0.61, "learning_rate": 7.82608695652174e-05, "loss": 3.1304, "step": 4315 }, { "epoch": 0.61, "learning_rate": 7.823198035533729e-05, "loss": 3.2009, "step": 4316 }, { "epoch": 0.61, "learning_rate": 7.820309114545717e-05, "loss": 3.3346, "step": 4317 }, { "epoch": 0.61, "learning_rate": 7.817420193557707e-05, "loss": 3.2339, "step": 4318 }, { "epoch": 0.61, "learning_rate": 7.814531272569695e-05, "loss": 3.3033, "step": 4319 }, { "epoch": 0.62, "learning_rate": 7.811642351581685e-05, "loss": 3.2246, "step": 4320 }, { "epoch": 0.62, "learning_rate": 7.808753430593673e-05, "loss": 3.3153, "step": 4321 }, { "epoch": 0.62, "learning_rate": 7.805864509605663e-05, "loss": 3.2447, "step": 4322 }, { "epoch": 0.62, "learning_rate": 7.802975588617651e-05, "loss": 3.2197, "step": 4323 }, { "epoch": 0.62, "learning_rate": 7.80008666762964e-05, "loss": 3.257, "step": 4324 }, { "epoch": 0.62, "learning_rate": 7.79719774664163e-05, "loss": 3.1468, "step": 4325 }, { "epoch": 0.62, "learning_rate": 7.794308825653619e-05, "loss": 3.2364, "step": 4326 }, { "epoch": 0.62, "learning_rate": 7.791419904665608e-05, "loss": 3.3317, "step": 4327 }, { "epoch": 0.62, "learning_rate": 7.788530983677596e-05, "loss": 3.3097, "step": 4328 }, { "epoch": 0.62, "learning_rate": 7.785642062689586e-05, "loss": 3.265, "step": 4329 }, { "epoch": 0.62, "learning_rate": 7.782753141701575e-05, "loss": 3.2576, "step": 4330 }, { "epoch": 0.62, "learning_rate": 7.779864220713564e-05, "loss": 3.2598, "step": 4331 }, { "epoch": 0.62, "learning_rate": 7.776975299725553e-05, "loss": 3.2786, "step": 4332 }, { "epoch": 0.62, "learning_rate": 7.774086378737542e-05, "loss": 3.1964, "step": 4333 }, { "epoch": 0.62, "learning_rate": 7.77119745774953e-05, "loss": 3.3129, "step": 4334 }, { "epoch": 0.62, "learning_rate": 7.76830853676152e-05, "loss": 3.333, "step": 4335 }, { "epoch": 0.62, "learning_rate": 7.765419615773509e-05, "loss": 3.2189, "step": 4336 }, { "epoch": 0.62, "learning_rate": 7.762530694785498e-05, "loss": 3.2821, "step": 4337 }, { "epoch": 0.62, "learning_rate": 7.759641773797487e-05, "loss": 3.2736, "step": 4338 }, { "epoch": 0.62, "learning_rate": 7.756752852809475e-05, "loss": 3.1567, "step": 4339 }, { "epoch": 0.62, "learning_rate": 7.753863931821465e-05, "loss": 3.1936, "step": 4340 }, { "epoch": 0.62, "learning_rate": 7.750975010833454e-05, "loss": 3.2622, "step": 4341 }, { "epoch": 0.62, "learning_rate": 7.748086089845444e-05, "loss": 3.3282, "step": 4342 }, { "epoch": 0.62, "learning_rate": 7.745197168857432e-05, "loss": 3.2657, "step": 4343 }, { "epoch": 0.62, "learning_rate": 7.742308247869421e-05, "loss": 3.1931, "step": 4344 }, { "epoch": 0.62, "learning_rate": 7.739419326881409e-05, "loss": 3.3269, "step": 4345 }, { "epoch": 0.62, "learning_rate": 7.7365304058934e-05, "loss": 3.2184, "step": 4346 }, { "epoch": 0.62, "learning_rate": 7.733641484905388e-05, "loss": 3.1989, "step": 4347 }, { "epoch": 0.62, "learning_rate": 7.730752563917377e-05, "loss": 3.1841, "step": 4348 }, { "epoch": 0.62, "learning_rate": 7.727863642929367e-05, "loss": 3.155, "step": 4349 }, { "epoch": 0.62, "learning_rate": 7.724974721941355e-05, "loss": 3.2665, "step": 4350 }, { "epoch": 0.62, "learning_rate": 7.722085800953345e-05, "loss": 3.2832, "step": 4351 }, { "epoch": 0.62, "learning_rate": 7.719196879965333e-05, "loss": 3.127, "step": 4352 }, { "epoch": 0.62, "learning_rate": 7.716307958977323e-05, "loss": 3.2496, "step": 4353 }, { "epoch": 0.62, "learning_rate": 7.713419037989311e-05, "loss": 3.0619, "step": 4354 }, { "epoch": 0.62, "learning_rate": 7.7105301170013e-05, "loss": 3.2216, "step": 4355 }, { "epoch": 0.62, "learning_rate": 7.70764119601329e-05, "loss": 3.1777, "step": 4356 }, { "epoch": 0.62, "learning_rate": 7.704752275025279e-05, "loss": 3.2618, "step": 4357 }, { "epoch": 0.62, "learning_rate": 7.701863354037267e-05, "loss": 3.328, "step": 4358 }, { "epoch": 0.62, "learning_rate": 7.698974433049256e-05, "loss": 3.1858, "step": 4359 }, { "epoch": 0.62, "learning_rate": 7.696085512061246e-05, "loss": 3.3507, "step": 4360 }, { "epoch": 0.62, "learning_rate": 7.693196591073234e-05, "loss": 3.1983, "step": 4361 }, { "epoch": 0.62, "learning_rate": 7.690307670085225e-05, "loss": 3.0417, "step": 4362 }, { "epoch": 0.62, "learning_rate": 7.687418749097213e-05, "loss": 3.1231, "step": 4363 }, { "epoch": 0.62, "learning_rate": 7.684529828109202e-05, "loss": 3.1928, "step": 4364 }, { "epoch": 0.62, "learning_rate": 7.68164090712119e-05, "loss": 3.1603, "step": 4365 }, { "epoch": 0.62, "learning_rate": 7.67875198613318e-05, "loss": 3.1971, "step": 4366 }, { "epoch": 0.62, "learning_rate": 7.675863065145169e-05, "loss": 3.3221, "step": 4367 }, { "epoch": 0.62, "learning_rate": 7.672974144157158e-05, "loss": 3.2545, "step": 4368 }, { "epoch": 0.62, "learning_rate": 7.670085223169146e-05, "loss": 3.2316, "step": 4369 }, { "epoch": 0.62, "learning_rate": 7.667196302181136e-05, "loss": 3.2825, "step": 4370 }, { "epoch": 0.62, "learning_rate": 7.664307381193125e-05, "loss": 3.1697, "step": 4371 }, { "epoch": 0.62, "learning_rate": 7.661418460205114e-05, "loss": 3.3155, "step": 4372 }, { "epoch": 0.62, "learning_rate": 7.658529539217104e-05, "loss": 3.2699, "step": 4373 }, { "epoch": 0.62, "learning_rate": 7.655640618229092e-05, "loss": 3.1791, "step": 4374 }, { "epoch": 0.62, "learning_rate": 7.652751697241081e-05, "loss": 3.2443, "step": 4375 }, { "epoch": 0.62, "learning_rate": 7.649862776253069e-05, "loss": 3.1897, "step": 4376 }, { "epoch": 0.62, "learning_rate": 7.64697385526506e-05, "loss": 3.1354, "step": 4377 }, { "epoch": 0.62, "learning_rate": 7.644084934277048e-05, "loss": 3.2865, "step": 4378 }, { "epoch": 0.62, "learning_rate": 7.641196013289037e-05, "loss": 3.1777, "step": 4379 }, { "epoch": 0.62, "learning_rate": 7.638307092301025e-05, "loss": 3.1771, "step": 4380 }, { "epoch": 0.62, "learning_rate": 7.635418171313015e-05, "loss": 3.1502, "step": 4381 }, { "epoch": 0.62, "learning_rate": 7.632529250325004e-05, "loss": 3.0661, "step": 4382 }, { "epoch": 0.62, "learning_rate": 7.629640329336993e-05, "loss": 3.1649, "step": 4383 }, { "epoch": 0.62, "learning_rate": 7.626751408348983e-05, "loss": 3.2292, "step": 4384 }, { "epoch": 0.62, "learning_rate": 7.623862487360971e-05, "loss": 3.0547, "step": 4385 }, { "epoch": 0.62, "learning_rate": 7.62097356637296e-05, "loss": 3.2867, "step": 4386 }, { "epoch": 0.62, "learning_rate": 7.618084645384948e-05, "loss": 3.2806, "step": 4387 }, { "epoch": 0.62, "learning_rate": 7.615195724396939e-05, "loss": 3.1973, "step": 4388 }, { "epoch": 0.62, "learning_rate": 7.612306803408927e-05, "loss": 3.1027, "step": 4389 }, { "epoch": 0.63, "learning_rate": 7.609417882420916e-05, "loss": 3.3486, "step": 4390 }, { "epoch": 0.63, "learning_rate": 7.606528961432904e-05, "loss": 3.2801, "step": 4391 }, { "epoch": 0.63, "learning_rate": 7.603640040444894e-05, "loss": 3.1514, "step": 4392 }, { "epoch": 0.63, "learning_rate": 7.600751119456883e-05, "loss": 3.1717, "step": 4393 }, { "epoch": 0.63, "learning_rate": 7.597862198468873e-05, "loss": 3.2219, "step": 4394 }, { "epoch": 0.63, "learning_rate": 7.594973277480862e-05, "loss": 3.2382, "step": 4395 }, { "epoch": 0.63, "learning_rate": 7.59208435649285e-05, "loss": 3.0522, "step": 4396 }, { "epoch": 0.63, "learning_rate": 7.58919543550484e-05, "loss": 3.1792, "step": 4397 }, { "epoch": 0.63, "learning_rate": 7.586306514516829e-05, "loss": 3.2079, "step": 4398 }, { "epoch": 0.63, "learning_rate": 7.583417593528818e-05, "loss": 3.2015, "step": 4399 }, { "epoch": 0.63, "learning_rate": 7.580528672540806e-05, "loss": 3.1522, "step": 4400 }, { "epoch": 0.63, "learning_rate": 7.577639751552796e-05, "loss": 3.2472, "step": 4401 }, { "epoch": 0.63, "learning_rate": 7.574750830564784e-05, "loss": 3.2378, "step": 4402 }, { "epoch": 0.63, "learning_rate": 7.571861909576773e-05, "loss": 3.2979, "step": 4403 }, { "epoch": 0.63, "learning_rate": 7.568972988588762e-05, "loss": 3.1266, "step": 4404 }, { "epoch": 0.63, "learning_rate": 7.566084067600752e-05, "loss": 2.9579, "step": 4405 }, { "epoch": 0.63, "learning_rate": 7.563195146612741e-05, "loss": 3.2063, "step": 4406 }, { "epoch": 0.63, "learning_rate": 7.560306225624729e-05, "loss": 3.2874, "step": 4407 }, { "epoch": 0.63, "learning_rate": 7.557417304636718e-05, "loss": 3.2408, "step": 4408 }, { "epoch": 0.63, "learning_rate": 7.554528383648708e-05, "loss": 3.2621, "step": 4409 }, { "epoch": 0.63, "learning_rate": 7.551639462660697e-05, "loss": 3.0716, "step": 4410 }, { "epoch": 0.63, "learning_rate": 7.548750541672685e-05, "loss": 3.2889, "step": 4411 }, { "epoch": 0.63, "learning_rate": 7.545861620684675e-05, "loss": 3.2795, "step": 4412 }, { "epoch": 0.63, "learning_rate": 7.542972699696663e-05, "loss": 3.088, "step": 4413 }, { "epoch": 0.63, "learning_rate": 7.540083778708653e-05, "loss": 3.2983, "step": 4414 }, { "epoch": 0.63, "learning_rate": 7.537194857720641e-05, "loss": 3.19, "step": 4415 }, { "epoch": 0.63, "learning_rate": 7.534305936732631e-05, "loss": 3.1916, "step": 4416 }, { "epoch": 0.63, "learning_rate": 7.53141701574462e-05, "loss": 3.298, "step": 4417 }, { "epoch": 0.63, "learning_rate": 7.528528094756608e-05, "loss": 3.1589, "step": 4418 }, { "epoch": 0.63, "learning_rate": 7.525639173768598e-05, "loss": 3.306, "step": 4419 }, { "epoch": 0.63, "learning_rate": 7.522750252780587e-05, "loss": 3.2359, "step": 4420 }, { "epoch": 0.63, "learning_rate": 7.519861331792576e-05, "loss": 3.278, "step": 4421 }, { "epoch": 0.63, "learning_rate": 7.516972410804564e-05, "loss": 3.3765, "step": 4422 }, { "epoch": 0.63, "learning_rate": 7.514083489816554e-05, "loss": 3.2689, "step": 4423 }, { "epoch": 0.63, "learning_rate": 7.511194568828543e-05, "loss": 3.3587, "step": 4424 }, { "epoch": 0.63, "learning_rate": 7.508305647840533e-05, "loss": 3.3364, "step": 4425 }, { "epoch": 0.63, "learning_rate": 7.50541672685252e-05, "loss": 3.2131, "step": 4426 }, { "epoch": 0.63, "learning_rate": 7.50252780586451e-05, "loss": 3.2932, "step": 4427 }, { "epoch": 0.63, "learning_rate": 7.499638884876498e-05, "loss": 3.2665, "step": 4428 }, { "epoch": 0.63, "learning_rate": 7.496749963888487e-05, "loss": 3.381, "step": 4429 }, { "epoch": 0.63, "learning_rate": 7.493861042900477e-05, "loss": 3.2373, "step": 4430 }, { "epoch": 0.63, "learning_rate": 7.490972121912466e-05, "loss": 3.1545, "step": 4431 }, { "epoch": 0.63, "learning_rate": 7.488083200924456e-05, "loss": 3.2665, "step": 4432 }, { "epoch": 0.63, "learning_rate": 7.485194279936444e-05, "loss": 3.3637, "step": 4433 }, { "epoch": 0.63, "learning_rate": 7.482305358948433e-05, "loss": 3.3338, "step": 4434 }, { "epoch": 0.63, "learning_rate": 7.479416437960422e-05, "loss": 3.117, "step": 4435 }, { "epoch": 0.63, "learning_rate": 7.476527516972412e-05, "loss": 2.8719, "step": 4436 }, { "epoch": 0.63, "learning_rate": 7.4736385959844e-05, "loss": 3.2194, "step": 4437 }, { "epoch": 0.63, "learning_rate": 7.470749674996389e-05, "loss": 3.3096, "step": 4438 }, { "epoch": 0.63, "learning_rate": 7.467860754008377e-05, "loss": 3.3919, "step": 4439 }, { "epoch": 0.63, "learning_rate": 7.464971833020368e-05, "loss": 3.2348, "step": 4440 }, { "epoch": 0.63, "learning_rate": 7.462082912032356e-05, "loss": 3.2321, "step": 4441 }, { "epoch": 0.63, "learning_rate": 7.459193991044345e-05, "loss": 3.0917, "step": 4442 }, { "epoch": 0.63, "learning_rate": 7.456305070056335e-05, "loss": 3.1843, "step": 4443 }, { "epoch": 0.63, "learning_rate": 7.453416149068323e-05, "loss": 3.1699, "step": 4444 }, { "epoch": 0.63, "learning_rate": 7.450527228080313e-05, "loss": 3.2327, "step": 4445 }, { "epoch": 0.63, "learning_rate": 7.447638307092301e-05, "loss": 3.2365, "step": 4446 }, { "epoch": 0.63, "learning_rate": 7.444749386104291e-05, "loss": 3.1183, "step": 4447 }, { "epoch": 0.63, "learning_rate": 7.441860465116279e-05, "loss": 3.2592, "step": 4448 }, { "epoch": 0.63, "learning_rate": 7.438971544128268e-05, "loss": 3.095, "step": 4449 }, { "epoch": 0.63, "learning_rate": 7.436082623140256e-05, "loss": 3.2686, "step": 4450 }, { "epoch": 0.63, "learning_rate": 7.433193702152247e-05, "loss": 3.2056, "step": 4451 }, { "epoch": 0.63, "learning_rate": 7.430304781164235e-05, "loss": 3.2602, "step": 4452 }, { "epoch": 0.63, "learning_rate": 7.427415860176224e-05, "loss": 3.1665, "step": 4453 }, { "epoch": 0.63, "learning_rate": 7.424526939188214e-05, "loss": 3.0854, "step": 4454 }, { "epoch": 0.63, "learning_rate": 7.421638018200202e-05, "loss": 3.0639, "step": 4455 }, { "epoch": 0.63, "learning_rate": 7.418749097212193e-05, "loss": 3.1661, "step": 4456 }, { "epoch": 0.63, "learning_rate": 7.41586017622418e-05, "loss": 3.1701, "step": 4457 }, { "epoch": 0.63, "learning_rate": 7.41297125523617e-05, "loss": 3.1404, "step": 4458 }, { "epoch": 0.63, "learning_rate": 7.410082334248158e-05, "loss": 3.1746, "step": 4459 }, { "epoch": 0.64, "learning_rate": 7.407193413260147e-05, "loss": 3.2634, "step": 4460 }, { "epoch": 0.64, "learning_rate": 7.404304492272137e-05, "loss": 3.058, "step": 4461 }, { "epoch": 0.64, "learning_rate": 7.401415571284126e-05, "loss": 3.1933, "step": 4462 }, { "epoch": 0.64, "learning_rate": 7.398526650296114e-05, "loss": 3.2946, "step": 4463 }, { "epoch": 0.64, "learning_rate": 7.395637729308104e-05, "loss": 3.3623, "step": 4464 }, { "epoch": 0.64, "learning_rate": 7.392748808320093e-05, "loss": 3.1468, "step": 4465 }, { "epoch": 0.64, "learning_rate": 7.389859887332082e-05, "loss": 3.2427, "step": 4466 }, { "epoch": 0.64, "learning_rate": 7.386970966344072e-05, "loss": 3.2678, "step": 4467 }, { "epoch": 0.64, "learning_rate": 7.38408204535606e-05, "loss": 3.2054, "step": 4468 }, { "epoch": 0.64, "learning_rate": 7.381193124368049e-05, "loss": 3.1882, "step": 4469 }, { "epoch": 0.64, "learning_rate": 7.378304203380037e-05, "loss": 3.1352, "step": 4470 }, { "epoch": 0.64, "learning_rate": 7.375415282392027e-05, "loss": 3.2657, "step": 4471 }, { "epoch": 0.64, "learning_rate": 7.372526361404016e-05, "loss": 3.2018, "step": 4472 }, { "epoch": 0.64, "learning_rate": 7.369637440416005e-05, "loss": 3.1582, "step": 4473 }, { "epoch": 0.64, "learning_rate": 7.366748519427993e-05, "loss": 3.2181, "step": 4474 }, { "epoch": 0.64, "learning_rate": 7.363859598439983e-05, "loss": 3.1828, "step": 4475 }, { "epoch": 0.64, "learning_rate": 7.360970677451972e-05, "loss": 3.3757, "step": 4476 }, { "epoch": 0.64, "learning_rate": 7.358081756463961e-05, "loss": 3.2835, "step": 4477 }, { "epoch": 0.64, "learning_rate": 7.355192835475951e-05, "loss": 3.2192, "step": 4478 }, { "epoch": 0.64, "learning_rate": 7.352303914487939e-05, "loss": 3.1814, "step": 4479 }, { "epoch": 0.64, "learning_rate": 7.349414993499928e-05, "loss": 3.0554, "step": 4480 }, { "epoch": 0.64, "learning_rate": 7.346526072511916e-05, "loss": 3.3356, "step": 4481 }, { "epoch": 0.64, "learning_rate": 7.343637151523907e-05, "loss": 3.2975, "step": 4482 }, { "epoch": 0.64, "learning_rate": 7.340748230535895e-05, "loss": 3.392, "step": 4483 }, { "epoch": 0.64, "learning_rate": 7.337859309547884e-05, "loss": 3.2069, "step": 4484 }, { "epoch": 0.64, "learning_rate": 7.334970388559872e-05, "loss": 3.2134, "step": 4485 }, { "epoch": 0.64, "learning_rate": 7.332081467571862e-05, "loss": 3.2042, "step": 4486 }, { "epoch": 0.64, "learning_rate": 7.329192546583851e-05, "loss": 3.3986, "step": 4487 }, { "epoch": 0.64, "learning_rate": 7.32630362559584e-05, "loss": 3.3561, "step": 4488 }, { "epoch": 0.64, "learning_rate": 7.32341470460783e-05, "loss": 3.2048, "step": 4489 }, { "epoch": 0.64, "learning_rate": 7.320525783619818e-05, "loss": 3.2647, "step": 4490 }, { "epoch": 0.64, "learning_rate": 7.317636862631807e-05, "loss": 3.2076, "step": 4491 }, { "epoch": 0.64, "learning_rate": 7.314747941643795e-05, "loss": 3.2791, "step": 4492 }, { "epoch": 0.64, "learning_rate": 7.311859020655786e-05, "loss": 3.2383, "step": 4493 }, { "epoch": 0.64, "learning_rate": 7.308970099667774e-05, "loss": 3.27, "step": 4494 }, { "epoch": 0.64, "learning_rate": 7.306081178679764e-05, "loss": 3.1158, "step": 4495 }, { "epoch": 0.64, "learning_rate": 7.303192257691752e-05, "loss": 3.2556, "step": 4496 }, { "epoch": 0.64, "learning_rate": 7.300303336703741e-05, "loss": 3.281, "step": 4497 }, { "epoch": 0.64, "learning_rate": 7.29741441571573e-05, "loss": 3.1698, "step": 4498 }, { "epoch": 0.64, "learning_rate": 7.29452549472772e-05, "loss": 3.2967, "step": 4499 }, { "epoch": 0.64, "learning_rate": 7.291636573739709e-05, "loss": 3.2783, "step": 4500 }, { "epoch": 0.64, "learning_rate": 7.288747652751697e-05, "loss": 3.315, "step": 4501 }, { "epoch": 0.64, "learning_rate": 7.285858731763687e-05, "loss": 3.2573, "step": 4502 }, { "epoch": 0.64, "learning_rate": 7.282969810775676e-05, "loss": 3.1854, "step": 4503 }, { "epoch": 0.64, "learning_rate": 7.280080889787665e-05, "loss": 3.235, "step": 4504 }, { "epoch": 0.64, "learning_rate": 7.277191968799653e-05, "loss": 3.2233, "step": 4505 }, { "epoch": 0.64, "learning_rate": 7.274303047811643e-05, "loss": 3.2246, "step": 4506 }, { "epoch": 0.64, "learning_rate": 7.271414126823631e-05, "loss": 3.2204, "step": 4507 }, { "epoch": 0.64, "learning_rate": 7.268525205835622e-05, "loss": 3.2522, "step": 4508 }, { "epoch": 0.64, "learning_rate": 7.26563628484761e-05, "loss": 3.0833, "step": 4509 }, { "epoch": 0.64, "learning_rate": 7.262747363859599e-05, "loss": 3.07, "step": 4510 }, { "epoch": 0.64, "learning_rate": 7.259858442871588e-05, "loss": 3.1149, "step": 4511 }, { "epoch": 0.64, "learning_rate": 7.256969521883576e-05, "loss": 3.2049, "step": 4512 }, { "epoch": 0.64, "learning_rate": 7.254080600895567e-05, "loss": 3.3168, "step": 4513 }, { "epoch": 0.64, "learning_rate": 7.251191679907555e-05, "loss": 3.2935, "step": 4514 }, { "epoch": 0.64, "learning_rate": 7.248302758919544e-05, "loss": 3.1564, "step": 4515 }, { "epoch": 0.64, "learning_rate": 7.245413837931533e-05, "loss": 3.2737, "step": 4516 }, { "epoch": 0.64, "learning_rate": 7.242524916943522e-05, "loss": 3.1057, "step": 4517 }, { "epoch": 0.64, "learning_rate": 7.23963599595551e-05, "loss": 3.2497, "step": 4518 }, { "epoch": 0.64, "learning_rate": 7.2367470749675e-05, "loss": 3.2421, "step": 4519 }, { "epoch": 0.64, "learning_rate": 7.233858153979489e-05, "loss": 3.2509, "step": 4520 }, { "epoch": 0.64, "learning_rate": 7.230969232991478e-05, "loss": 3.2018, "step": 4521 }, { "epoch": 0.64, "learning_rate": 7.228080312003467e-05, "loss": 3.1461, "step": 4522 }, { "epoch": 0.64, "learning_rate": 7.225191391015455e-05, "loss": 3.1516, "step": 4523 }, { "epoch": 0.64, "learning_rate": 7.222302470027445e-05, "loss": 3.2691, "step": 4524 }, { "epoch": 0.64, "learning_rate": 7.219413549039434e-05, "loss": 3.4174, "step": 4525 }, { "epoch": 0.64, "learning_rate": 7.216524628051424e-05, "loss": 3.2661, "step": 4526 }, { "epoch": 0.64, "learning_rate": 7.213635707063412e-05, "loss": 3.1742, "step": 4527 }, { "epoch": 0.64, "learning_rate": 7.210746786075401e-05, "loss": 3.2276, "step": 4528 }, { "epoch": 0.64, "learning_rate": 7.20785786508739e-05, "loss": 3.2369, "step": 4529 }, { "epoch": 0.64, "learning_rate": 7.20496894409938e-05, "loss": 3.2119, "step": 4530 }, { "epoch": 0.65, "learning_rate": 7.202080023111368e-05, "loss": 3.2396, "step": 4531 }, { "epoch": 0.65, "learning_rate": 7.199191102123357e-05, "loss": 3.1502, "step": 4532 }, { "epoch": 0.65, "learning_rate": 7.196302181135347e-05, "loss": 3.1763, "step": 4533 }, { "epoch": 0.65, "learning_rate": 7.193413260147336e-05, "loss": 3.1303, "step": 4534 }, { "epoch": 0.65, "learning_rate": 7.190524339159324e-05, "loss": 3.0942, "step": 4535 }, { "epoch": 0.65, "learning_rate": 7.187635418171313e-05, "loss": 3.1065, "step": 4536 }, { "epoch": 0.65, "learning_rate": 7.184746497183303e-05, "loss": 3.2077, "step": 4537 }, { "epoch": 0.65, "learning_rate": 7.181857576195291e-05, "loss": 3.2625, "step": 4538 }, { "epoch": 0.65, "learning_rate": 7.17896865520728e-05, "loss": 3.237, "step": 4539 }, { "epoch": 0.65, "learning_rate": 7.17607973421927e-05, "loss": 3.3133, "step": 4540 }, { "epoch": 0.65, "learning_rate": 7.173190813231259e-05, "loss": 3.2684, "step": 4541 }, { "epoch": 0.65, "learning_rate": 7.170301892243247e-05, "loss": 3.2643, "step": 4542 }, { "epoch": 0.65, "learning_rate": 7.167412971255236e-05, "loss": 3.2464, "step": 4543 }, { "epoch": 0.65, "learning_rate": 7.164524050267226e-05, "loss": 3.2603, "step": 4544 }, { "epoch": 0.65, "learning_rate": 7.161635129279215e-05, "loss": 3.1629, "step": 4545 }, { "epoch": 0.65, "learning_rate": 7.158746208291203e-05, "loss": 3.2386, "step": 4546 }, { "epoch": 0.65, "learning_rate": 7.155857287303193e-05, "loss": 3.322, "step": 4547 }, { "epoch": 0.65, "learning_rate": 7.152968366315182e-05, "loss": 2.9695, "step": 4548 }, { "epoch": 0.65, "learning_rate": 7.15007944532717e-05, "loss": 3.1846, "step": 4549 }, { "epoch": 0.65, "learning_rate": 7.14719052433916e-05, "loss": 3.2918, "step": 4550 }, { "epoch": 0.65, "learning_rate": 7.144301603351149e-05, "loss": 3.2516, "step": 4551 }, { "epoch": 0.65, "learning_rate": 7.141412682363138e-05, "loss": 3.2238, "step": 4552 }, { "epoch": 0.65, "learning_rate": 7.138523761375126e-05, "loss": 3.2354, "step": 4553 }, { "epoch": 0.65, "learning_rate": 7.135634840387115e-05, "loss": 3.1473, "step": 4554 }, { "epoch": 0.65, "learning_rate": 7.132745919399105e-05, "loss": 3.3592, "step": 4555 }, { "epoch": 0.65, "learning_rate": 7.129856998411094e-05, "loss": 3.2755, "step": 4556 }, { "epoch": 0.65, "learning_rate": 7.126968077423082e-05, "loss": 3.1127, "step": 4557 }, { "epoch": 0.65, "learning_rate": 7.124079156435072e-05, "loss": 3.1792, "step": 4558 }, { "epoch": 0.65, "learning_rate": 7.121190235447061e-05, "loss": 3.3286, "step": 4559 }, { "epoch": 0.65, "learning_rate": 7.118301314459049e-05, "loss": 3.2957, "step": 4560 }, { "epoch": 0.65, "learning_rate": 7.11541239347104e-05, "loss": 3.025, "step": 4561 }, { "epoch": 0.65, "learning_rate": 7.112523472483028e-05, "loss": 3.189, "step": 4562 }, { "epoch": 0.65, "learning_rate": 7.109634551495017e-05, "loss": 3.1633, "step": 4563 }, { "epoch": 0.65, "learning_rate": 7.106745630507005e-05, "loss": 3.0469, "step": 4564 }, { "epoch": 0.65, "learning_rate": 7.103856709518995e-05, "loss": 3.3885, "step": 4565 }, { "epoch": 0.65, "learning_rate": 7.100967788530984e-05, "loss": 3.293, "step": 4566 }, { "epoch": 0.65, "learning_rate": 7.098078867542973e-05, "loss": 3.1842, "step": 4567 }, { "epoch": 0.65, "learning_rate": 7.095189946554961e-05, "loss": 3.2436, "step": 4568 }, { "epoch": 0.65, "learning_rate": 7.092301025566951e-05, "loss": 3.2228, "step": 4569 }, { "epoch": 0.65, "learning_rate": 7.08941210457894e-05, "loss": 3.3918, "step": 4570 }, { "epoch": 0.65, "learning_rate": 7.08652318359093e-05, "loss": 3.0825, "step": 4571 }, { "epoch": 0.65, "learning_rate": 7.083634262602919e-05, "loss": 3.2249, "step": 4572 }, { "epoch": 0.65, "learning_rate": 7.080745341614907e-05, "loss": 3.0199, "step": 4573 }, { "epoch": 0.65, "learning_rate": 7.077856420626896e-05, "loss": 3.2107, "step": 4574 }, { "epoch": 0.65, "learning_rate": 7.074967499638884e-05, "loss": 3.1974, "step": 4575 }, { "epoch": 0.65, "learning_rate": 7.072078578650875e-05, "loss": 3.2753, "step": 4576 }, { "epoch": 0.65, "eval_loss": 3.4311070442199707, "eval_runtime": 471.4356, "eval_samples_per_second": 43.457, "eval_steps_per_second": 14.486, "step": 4576 }, { "epoch": 0.65, "learning_rate": 7.069189657662863e-05, "loss": 3.0329, "step": 4577 }, { "epoch": 0.65, "learning_rate": 7.066300736674853e-05, "loss": 3.3126, "step": 4578 }, { "epoch": 0.65, "learning_rate": 7.06341181568684e-05, "loss": 3.1901, "step": 4579 }, { "epoch": 0.65, "learning_rate": 7.06052289469883e-05, "loss": 3.163, "step": 4580 }, { "epoch": 0.65, "learning_rate": 7.05763397371082e-05, "loss": 3.1173, "step": 4581 }, { "epoch": 0.65, "learning_rate": 7.054745052722809e-05, "loss": 3.244, "step": 4582 }, { "epoch": 0.65, "learning_rate": 7.051856131734798e-05, "loss": 3.2602, "step": 4583 }, { "epoch": 0.65, "learning_rate": 7.048967210746786e-05, "loss": 2.991, "step": 4584 }, { "epoch": 0.65, "learning_rate": 7.046078289758776e-05, "loss": 3.2214, "step": 4585 }, { "epoch": 0.65, "learning_rate": 7.043189368770764e-05, "loss": 3.2427, "step": 4586 }, { "epoch": 0.65, "learning_rate": 7.040300447782754e-05, "loss": 3.2009, "step": 4587 }, { "epoch": 0.65, "learning_rate": 7.037411526794742e-05, "loss": 3.191, "step": 4588 }, { "epoch": 0.65, "learning_rate": 7.034522605806732e-05, "loss": 3.2651, "step": 4589 }, { "epoch": 0.65, "learning_rate": 7.03163368481872e-05, "loss": 3.3027, "step": 4590 }, { "epoch": 0.65, "learning_rate": 7.028744763830709e-05, "loss": 3.2212, "step": 4591 }, { "epoch": 0.65, "learning_rate": 7.025855842842698e-05, "loss": 3.2133, "step": 4592 }, { "epoch": 0.65, "learning_rate": 7.022966921854688e-05, "loss": 3.1425, "step": 4593 }, { "epoch": 0.65, "learning_rate": 7.020078000866677e-05, "loss": 3.2199, "step": 4594 }, { "epoch": 0.65, "learning_rate": 7.017189079878665e-05, "loss": 3.2051, "step": 4595 }, { "epoch": 0.65, "learning_rate": 7.014300158890655e-05, "loss": 3.1071, "step": 4596 }, { "epoch": 0.65, "learning_rate": 7.011411237902644e-05, "loss": 3.2108, "step": 4597 }, { "epoch": 0.65, "learning_rate": 7.008522316914633e-05, "loss": 3.1735, "step": 4598 }, { "epoch": 0.65, "learning_rate": 7.005633395926621e-05, "loss": 3.1795, "step": 4599 }, { "epoch": 0.65, "learning_rate": 7.002744474938611e-05, "loss": 3.1864, "step": 4600 }, { "epoch": 0.66, "learning_rate": 6.999855553950599e-05, "loss": 3.3092, "step": 4601 }, { "epoch": 0.66, "learning_rate": 6.99696663296259e-05, "loss": 3.1383, "step": 4602 }, { "epoch": 0.66, "learning_rate": 6.994077711974578e-05, "loss": 3.1475, "step": 4603 }, { "epoch": 0.66, "learning_rate": 6.991188790986567e-05, "loss": 3.2344, "step": 4604 }, { "epoch": 0.66, "learning_rate": 6.988299869998556e-05, "loss": 3.1652, "step": 4605 }, { "epoch": 0.66, "learning_rate": 6.985410949010544e-05, "loss": 3.2111, "step": 4606 }, { "epoch": 0.66, "learning_rate": 6.982522028022534e-05, "loss": 3.224, "step": 4607 }, { "epoch": 0.66, "learning_rate": 6.979633107034523e-05, "loss": 3.1232, "step": 4608 }, { "epoch": 0.66, "learning_rate": 6.976744186046513e-05, "loss": 3.2263, "step": 4609 }, { "epoch": 0.66, "learning_rate": 6.9738552650585e-05, "loss": 3.1037, "step": 4610 }, { "epoch": 0.66, "learning_rate": 6.97096634407049e-05, "loss": 3.2389, "step": 4611 }, { "epoch": 0.66, "learning_rate": 6.968077423082478e-05, "loss": 3.185, "step": 4612 }, { "epoch": 0.66, "learning_rate": 6.965188502094469e-05, "loss": 3.2433, "step": 4613 }, { "epoch": 0.66, "learning_rate": 6.962299581106457e-05, "loss": 3.2258, "step": 4614 }, { "epoch": 0.66, "learning_rate": 6.959410660118446e-05, "loss": 3.1148, "step": 4615 }, { "epoch": 0.66, "learning_rate": 6.956521739130436e-05, "loss": 3.2667, "step": 4616 }, { "epoch": 0.66, "learning_rate": 6.953632818142424e-05, "loss": 3.1186, "step": 4617 }, { "epoch": 0.66, "learning_rate": 6.950743897154414e-05, "loss": 3.085, "step": 4618 }, { "epoch": 0.66, "learning_rate": 6.947854976166402e-05, "loss": 3.1934, "step": 4619 }, { "epoch": 0.66, "learning_rate": 6.944966055178392e-05, "loss": 3.0886, "step": 4620 }, { "epoch": 0.66, "learning_rate": 6.94207713419038e-05, "loss": 3.1788, "step": 4621 }, { "epoch": 0.66, "learning_rate": 6.939188213202369e-05, "loss": 3.2761, "step": 4622 }, { "epoch": 0.66, "learning_rate": 6.936299292214358e-05, "loss": 3.237, "step": 4623 }, { "epoch": 0.66, "learning_rate": 6.933410371226348e-05, "loss": 3.1859, "step": 4624 }, { "epoch": 0.66, "learning_rate": 6.930521450238336e-05, "loss": 3.1281, "step": 4625 }, { "epoch": 0.66, "learning_rate": 6.927632529250325e-05, "loss": 3.1964, "step": 4626 }, { "epoch": 0.66, "learning_rate": 6.924743608262315e-05, "loss": 3.3033, "step": 4627 }, { "epoch": 0.66, "learning_rate": 6.921854687274303e-05, "loss": 3.1437, "step": 4628 }, { "epoch": 0.66, "learning_rate": 6.918965766286293e-05, "loss": 3.1905, "step": 4629 }, { "epoch": 0.66, "learning_rate": 6.916076845298281e-05, "loss": 3.2168, "step": 4630 }, { "epoch": 0.66, "learning_rate": 6.913187924310271e-05, "loss": 3.0697, "step": 4631 }, { "epoch": 0.66, "learning_rate": 6.910299003322259e-05, "loss": 3.2662, "step": 4632 }, { "epoch": 0.66, "learning_rate": 6.907410082334248e-05, "loss": 3.0232, "step": 4633 }, { "epoch": 0.66, "learning_rate": 6.904521161346238e-05, "loss": 3.0126, "step": 4634 }, { "epoch": 0.66, "learning_rate": 6.901632240358227e-05, "loss": 3.2893, "step": 4635 }, { "epoch": 0.66, "learning_rate": 6.898743319370215e-05, "loss": 3.0733, "step": 4636 }, { "epoch": 0.66, "learning_rate": 6.895854398382204e-05, "loss": 3.175, "step": 4637 }, { "epoch": 0.66, "learning_rate": 6.892965477394194e-05, "loss": 3.0857, "step": 4638 }, { "epoch": 0.66, "learning_rate": 6.890076556406183e-05, "loss": 3.2243, "step": 4639 }, { "epoch": 0.66, "learning_rate": 6.887187635418173e-05, "loss": 3.2138, "step": 4640 }, { "epoch": 0.66, "learning_rate": 6.88429871443016e-05, "loss": 3.2269, "step": 4641 }, { "epoch": 0.66, "learning_rate": 6.88140979344215e-05, "loss": 3.1455, "step": 4642 }, { "epoch": 0.66, "learning_rate": 6.878520872454138e-05, "loss": 3.24, "step": 4643 }, { "epoch": 0.66, "learning_rate": 6.875631951466129e-05, "loss": 3.0995, "step": 4644 }, { "epoch": 0.66, "learning_rate": 6.872743030478117e-05, "loss": 3.1545, "step": 4645 }, { "epoch": 0.66, "learning_rate": 6.869854109490106e-05, "loss": 3.2984, "step": 4646 }, { "epoch": 0.66, "learning_rate": 6.866965188502094e-05, "loss": 3.1757, "step": 4647 }, { "epoch": 0.66, "learning_rate": 6.864076267514084e-05, "loss": 3.2271, "step": 4648 }, { "epoch": 0.66, "learning_rate": 6.861187346526073e-05, "loss": 3.2733, "step": 4649 }, { "epoch": 0.66, "learning_rate": 6.858298425538062e-05, "loss": 3.1991, "step": 4650 }, { "epoch": 0.66, "learning_rate": 6.855409504550052e-05, "loss": 3.2941, "step": 4651 }, { "epoch": 0.66, "learning_rate": 6.85252058356204e-05, "loss": 3.1293, "step": 4652 }, { "epoch": 0.66, "learning_rate": 6.849631662574029e-05, "loss": 3.1675, "step": 4653 }, { "epoch": 0.66, "learning_rate": 6.846742741586017e-05, "loss": 3.1558, "step": 4654 }, { "epoch": 0.66, "learning_rate": 6.843853820598008e-05, "loss": 3.1124, "step": 4655 }, { "epoch": 0.66, "learning_rate": 6.840964899609996e-05, "loss": 2.9734, "step": 4656 }, { "epoch": 0.66, "learning_rate": 6.838075978621985e-05, "loss": 3.2422, "step": 4657 }, { "epoch": 0.66, "learning_rate": 6.835187057633973e-05, "loss": 3.0933, "step": 4658 }, { "epoch": 0.66, "learning_rate": 6.832298136645963e-05, "loss": 3.1473, "step": 4659 }, { "epoch": 0.66, "learning_rate": 6.829409215657952e-05, "loss": 3.221, "step": 4660 }, { "epoch": 0.66, "learning_rate": 6.826520294669941e-05, "loss": 3.2445, "step": 4661 }, { "epoch": 0.66, "learning_rate": 6.82363137368193e-05, "loss": 3.1867, "step": 4662 }, { "epoch": 0.66, "learning_rate": 6.820742452693919e-05, "loss": 3.2059, "step": 4663 }, { "epoch": 0.66, "learning_rate": 6.817853531705908e-05, "loss": 3.1624, "step": 4664 }, { "epoch": 0.66, "learning_rate": 6.814964610717898e-05, "loss": 3.0782, "step": 4665 }, { "epoch": 0.66, "learning_rate": 6.812075689729887e-05, "loss": 3.2645, "step": 4666 }, { "epoch": 0.66, "learning_rate": 6.809186768741875e-05, "loss": 3.1695, "step": 4667 }, { "epoch": 0.66, "learning_rate": 6.806297847753864e-05, "loss": 3.2342, "step": 4668 }, { "epoch": 0.66, "learning_rate": 6.803408926765852e-05, "loss": 3.159, "step": 4669 }, { "epoch": 0.66, "learning_rate": 6.800520005777842e-05, "loss": 3.1207, "step": 4670 }, { "epoch": 0.67, "learning_rate": 6.797631084789831e-05, "loss": 3.1898, "step": 4671 }, { "epoch": 0.67, "learning_rate": 6.79474216380182e-05, "loss": 3.2039, "step": 4672 }, { "epoch": 0.67, "learning_rate": 6.791853242813809e-05, "loss": 3.0811, "step": 4673 }, { "epoch": 0.67, "learning_rate": 6.788964321825798e-05, "loss": 3.2256, "step": 4674 }, { "epoch": 0.67, "learning_rate": 6.786075400837787e-05, "loss": 3.1116, "step": 4675 }, { "epoch": 0.67, "learning_rate": 6.783186479849777e-05, "loss": 3.0967, "step": 4676 }, { "epoch": 0.67, "learning_rate": 6.780297558861766e-05, "loss": 3.2738, "step": 4677 }, { "epoch": 0.67, "learning_rate": 6.777408637873754e-05, "loss": 3.2311, "step": 4678 }, { "epoch": 0.67, "learning_rate": 6.774519716885744e-05, "loss": 3.3502, "step": 4679 }, { "epoch": 0.67, "learning_rate": 6.771630795897732e-05, "loss": 3.2233, "step": 4680 }, { "epoch": 0.67, "learning_rate": 6.768741874909722e-05, "loss": 3.283, "step": 4681 }, { "epoch": 0.67, "learning_rate": 6.76585295392171e-05, "loss": 3.2997, "step": 4682 }, { "epoch": 0.67, "learning_rate": 6.7629640329337e-05, "loss": 3.1227, "step": 4683 }, { "epoch": 0.67, "learning_rate": 6.760075111945688e-05, "loss": 3.256, "step": 4684 }, { "epoch": 0.67, "learning_rate": 6.757186190957677e-05, "loss": 3.1657, "step": 4685 }, { "epoch": 0.67, "learning_rate": 6.754297269969667e-05, "loss": 3.2956, "step": 4686 }, { "epoch": 0.67, "learning_rate": 6.751408348981656e-05, "loss": 3.1841, "step": 4687 }, { "epoch": 0.67, "learning_rate": 6.748519427993645e-05, "loss": 3.2518, "step": 4688 }, { "epoch": 0.67, "learning_rate": 6.745630507005633e-05, "loss": 3.0815, "step": 4689 }, { "epoch": 0.67, "learning_rate": 6.742741586017623e-05, "loss": 3.1521, "step": 4690 }, { "epoch": 0.67, "learning_rate": 6.739852665029611e-05, "loss": 3.2677, "step": 4691 }, { "epoch": 0.67, "learning_rate": 6.736963744041601e-05, "loss": 3.1445, "step": 4692 }, { "epoch": 0.67, "learning_rate": 6.73407482305359e-05, "loss": 3.217, "step": 4693 }, { "epoch": 0.67, "learning_rate": 6.731185902065579e-05, "loss": 3.1792, "step": 4694 }, { "epoch": 0.67, "learning_rate": 6.728296981077567e-05, "loss": 2.9674, "step": 4695 }, { "epoch": 0.67, "learning_rate": 6.725408060089556e-05, "loss": 3.1301, "step": 4696 }, { "epoch": 0.67, "learning_rate": 6.722519139101546e-05, "loss": 3.2711, "step": 4697 }, { "epoch": 0.67, "learning_rate": 6.719630218113535e-05, "loss": 3.3011, "step": 4698 }, { "epoch": 0.67, "learning_rate": 6.716741297125524e-05, "loss": 3.1289, "step": 4699 }, { "epoch": 0.67, "learning_rate": 6.713852376137512e-05, "loss": 3.322, "step": 4700 }, { "epoch": 0.67, "learning_rate": 6.710963455149502e-05, "loss": 3.2275, "step": 4701 }, { "epoch": 0.67, "learning_rate": 6.708074534161491e-05, "loss": 3.2683, "step": 4702 }, { "epoch": 0.67, "learning_rate": 6.70518561317348e-05, "loss": 3.3123, "step": 4703 }, { "epoch": 0.67, "learning_rate": 6.702296692185469e-05, "loss": 3.1394, "step": 4704 }, { "epoch": 0.67, "learning_rate": 6.699407771197458e-05, "loss": 3.1832, "step": 4705 }, { "epoch": 0.67, "learning_rate": 6.696518850209446e-05, "loss": 3.2946, "step": 4706 }, { "epoch": 0.67, "learning_rate": 6.693629929221437e-05, "loss": 3.1702, "step": 4707 }, { "epoch": 0.67, "learning_rate": 6.690741008233425e-05, "loss": 3.2187, "step": 4708 }, { "epoch": 0.67, "learning_rate": 6.687852087245414e-05, "loss": 3.2411, "step": 4709 }, { "epoch": 0.67, "learning_rate": 6.684963166257404e-05, "loss": 3.2604, "step": 4710 }, { "epoch": 0.67, "learning_rate": 6.682074245269392e-05, "loss": 3.261, "step": 4711 }, { "epoch": 0.67, "learning_rate": 6.679185324281382e-05, "loss": 3.3329, "step": 4712 }, { "epoch": 0.67, "learning_rate": 6.67629640329337e-05, "loss": 3.1801, "step": 4713 }, { "epoch": 0.67, "learning_rate": 6.67340748230536e-05, "loss": 3.0755, "step": 4714 }, { "epoch": 0.67, "learning_rate": 6.670518561317348e-05, "loss": 3.2324, "step": 4715 }, { "epoch": 0.67, "learning_rate": 6.667629640329337e-05, "loss": 3.2004, "step": 4716 }, { "epoch": 0.67, "learning_rate": 6.664740719341325e-05, "loss": 3.0358, "step": 4717 }, { "epoch": 0.67, "learning_rate": 6.661851798353316e-05, "loss": 3.093, "step": 4718 }, { "epoch": 0.67, "learning_rate": 6.658962877365304e-05, "loss": 3.2078, "step": 4719 }, { "epoch": 0.67, "learning_rate": 6.656073956377293e-05, "loss": 3.3535, "step": 4720 }, { "epoch": 0.67, "learning_rate": 6.653185035389283e-05, "loss": 3.2595, "step": 4721 }, { "epoch": 0.67, "learning_rate": 6.650296114401271e-05, "loss": 3.1418, "step": 4722 }, { "epoch": 0.67, "learning_rate": 6.647407193413262e-05, "loss": 2.9937, "step": 4723 }, { "epoch": 0.67, "learning_rate": 6.64451827242525e-05, "loss": 3.2423, "step": 4724 }, { "epoch": 0.67, "learning_rate": 6.641629351437239e-05, "loss": 3.3218, "step": 4725 }, { "epoch": 0.67, "learning_rate": 6.638740430449227e-05, "loss": 3.1807, "step": 4726 }, { "epoch": 0.67, "learning_rate": 6.635851509461216e-05, "loss": 3.1824, "step": 4727 }, { "epoch": 0.67, "learning_rate": 6.632962588473206e-05, "loss": 3.1208, "step": 4728 }, { "epoch": 0.67, "learning_rate": 6.630073667485195e-05, "loss": 3.2159, "step": 4729 }, { "epoch": 0.67, "learning_rate": 6.627184746497183e-05, "loss": 3.1804, "step": 4730 }, { "epoch": 0.67, "learning_rate": 6.624295825509173e-05, "loss": 3.1377, "step": 4731 }, { "epoch": 0.67, "learning_rate": 6.621406904521162e-05, "loss": 3.2806, "step": 4732 }, { "epoch": 0.67, "learning_rate": 6.618517983533151e-05, "loss": 3.2248, "step": 4733 }, { "epoch": 0.67, "learning_rate": 6.61562906254514e-05, "loss": 3.1957, "step": 4734 }, { "epoch": 0.67, "learning_rate": 6.612740141557129e-05, "loss": 3.1038, "step": 4735 }, { "epoch": 0.67, "learning_rate": 6.609851220569118e-05, "loss": 3.2546, "step": 4736 }, { "epoch": 0.67, "learning_rate": 6.606962299581106e-05, "loss": 3.1971, "step": 4737 }, { "epoch": 0.67, "learning_rate": 6.604073378593095e-05, "loss": 3.2071, "step": 4738 }, { "epoch": 0.67, "learning_rate": 6.601184457605085e-05, "loss": 3.3514, "step": 4739 }, { "epoch": 0.67, "learning_rate": 6.598295536617074e-05, "loss": 3.2246, "step": 4740 }, { "epoch": 0.68, "learning_rate": 6.595406615629062e-05, "loss": 3.0967, "step": 4741 }, { "epoch": 0.68, "learning_rate": 6.592517694641052e-05, "loss": 3.1957, "step": 4742 }, { "epoch": 0.68, "learning_rate": 6.589628773653041e-05, "loss": 3.242, "step": 4743 }, { "epoch": 0.68, "learning_rate": 6.58673985266503e-05, "loss": 3.1726, "step": 4744 }, { "epoch": 0.68, "learning_rate": 6.58385093167702e-05, "loss": 3.1347, "step": 4745 }, { "epoch": 0.68, "learning_rate": 6.580962010689008e-05, "loss": 3.0802, "step": 4746 }, { "epoch": 0.68, "learning_rate": 6.578073089700997e-05, "loss": 3.2203, "step": 4747 }, { "epoch": 0.68, "learning_rate": 6.575184168712985e-05, "loss": 3.27, "step": 4748 }, { "epoch": 0.68, "learning_rate": 6.572295247724976e-05, "loss": 3.3669, "step": 4749 }, { "epoch": 0.68, "learning_rate": 6.569406326736964e-05, "loss": 3.0405, "step": 4750 }, { "epoch": 0.68, "learning_rate": 6.566517405748953e-05, "loss": 3.1361, "step": 4751 }, { "epoch": 0.68, "learning_rate": 6.563628484760941e-05, "loss": 3.2701, "step": 4752 }, { "epoch": 0.68, "learning_rate": 6.560739563772931e-05, "loss": 3.2333, "step": 4753 }, { "epoch": 0.68, "learning_rate": 6.55785064278492e-05, "loss": 3.1564, "step": 4754 }, { "epoch": 0.68, "learning_rate": 6.55496172179691e-05, "loss": 3.0823, "step": 4755 }, { "epoch": 0.68, "learning_rate": 6.552072800808899e-05, "loss": 3.1326, "step": 4756 }, { "epoch": 0.68, "learning_rate": 6.549183879820887e-05, "loss": 3.0293, "step": 4757 }, { "epoch": 0.68, "learning_rate": 6.546294958832876e-05, "loss": 3.1572, "step": 4758 }, { "epoch": 0.68, "learning_rate": 6.543406037844864e-05, "loss": 3.1817, "step": 4759 }, { "epoch": 0.68, "learning_rate": 6.540517116856855e-05, "loss": 3.2333, "step": 4760 }, { "epoch": 0.68, "learning_rate": 6.537628195868843e-05, "loss": 3.2094, "step": 4761 }, { "epoch": 0.68, "learning_rate": 6.534739274880833e-05, "loss": 3.1592, "step": 4762 }, { "epoch": 0.68, "learning_rate": 6.53185035389282e-05, "loss": 3.084, "step": 4763 }, { "epoch": 0.68, "learning_rate": 6.52896143290481e-05, "loss": 3.2225, "step": 4764 }, { "epoch": 0.68, "learning_rate": 6.526072511916799e-05, "loss": 3.0779, "step": 4765 }, { "epoch": 0.68, "learning_rate": 6.523183590928789e-05, "loss": 3.1048, "step": 4766 }, { "epoch": 0.68, "learning_rate": 6.520294669940778e-05, "loss": 3.2311, "step": 4767 }, { "epoch": 0.68, "learning_rate": 6.517405748952766e-05, "loss": 3.187, "step": 4768 }, { "epoch": 0.68, "learning_rate": 6.514516827964755e-05, "loss": 3.1343, "step": 4769 }, { "epoch": 0.68, "learning_rate": 6.511627906976745e-05, "loss": 3.1523, "step": 4770 }, { "epoch": 0.68, "learning_rate": 6.508738985988734e-05, "loss": 3.0041, "step": 4771 }, { "epoch": 0.68, "learning_rate": 6.505850065000722e-05, "loss": 3.1949, "step": 4772 }, { "epoch": 0.68, "learning_rate": 6.502961144012712e-05, "loss": 3.1414, "step": 4773 }, { "epoch": 0.68, "learning_rate": 6.5000722230247e-05, "loss": 3.0958, "step": 4774 }, { "epoch": 0.68, "learning_rate": 6.49718330203669e-05, "loss": 3.2459, "step": 4775 }, { "epoch": 0.68, "learning_rate": 6.494294381048678e-05, "loss": 3.1617, "step": 4776 }, { "epoch": 0.68, "learning_rate": 6.491405460060668e-05, "loss": 3.1454, "step": 4777 }, { "epoch": 0.68, "learning_rate": 6.488516539072657e-05, "loss": 3.1938, "step": 4778 }, { "epoch": 0.68, "learning_rate": 6.485627618084645e-05, "loss": 3.3808, "step": 4779 }, { "epoch": 0.68, "learning_rate": 6.482738697096635e-05, "loss": 3.0696, "step": 4780 }, { "epoch": 0.68, "learning_rate": 6.479849776108624e-05, "loss": 3.2487, "step": 4781 }, { "epoch": 0.68, "learning_rate": 6.476960855120613e-05, "loss": 3.3306, "step": 4782 }, { "epoch": 0.68, "learning_rate": 6.474071934132601e-05, "loss": 3.1838, "step": 4783 }, { "epoch": 0.68, "learning_rate": 6.471183013144591e-05, "loss": 3.1287, "step": 4784 }, { "epoch": 0.68, "learning_rate": 6.468294092156579e-05, "loss": 3.1744, "step": 4785 }, { "epoch": 0.68, "learning_rate": 6.46540517116857e-05, "loss": 3.2149, "step": 4786 }, { "epoch": 0.68, "learning_rate": 6.462516250180558e-05, "loss": 3.0864, "step": 4787 }, { "epoch": 0.68, "learning_rate": 6.459627329192547e-05, "loss": 3.2708, "step": 4788 }, { "epoch": 0.68, "learning_rate": 6.456738408204535e-05, "loss": 3.0833, "step": 4789 }, { "epoch": 0.68, "learning_rate": 6.453849487216524e-05, "loss": 3.1123, "step": 4790 }, { "epoch": 0.68, "learning_rate": 6.450960566228514e-05, "loss": 3.1003, "step": 4791 }, { "epoch": 0.68, "learning_rate": 6.448071645240503e-05, "loss": 3.1161, "step": 4792 }, { "epoch": 0.68, "learning_rate": 6.445182724252493e-05, "loss": 3.1413, "step": 4793 }, { "epoch": 0.68, "learning_rate": 6.44229380326448e-05, "loss": 3.1732, "step": 4794 }, { "epoch": 0.68, "learning_rate": 6.43940488227647e-05, "loss": 3.1983, "step": 4795 }, { "epoch": 0.68, "learning_rate": 6.43651596128846e-05, "loss": 3.1057, "step": 4796 }, { "epoch": 0.68, "learning_rate": 6.433627040300449e-05, "loss": 3.2081, "step": 4797 }, { "epoch": 0.68, "learning_rate": 6.430738119312437e-05, "loss": 3.2113, "step": 4798 }, { "epoch": 0.68, "learning_rate": 6.427849198324426e-05, "loss": 3.3364, "step": 4799 }, { "epoch": 0.68, "learning_rate": 6.424960277336414e-05, "loss": 3.321, "step": 4800 }, { "epoch": 0.68, "learning_rate": 6.422071356348405e-05, "loss": 3.0925, "step": 4801 }, { "epoch": 0.68, "learning_rate": 6.419182435360393e-05, "loss": 3.1668, "step": 4802 }, { "epoch": 0.68, "learning_rate": 6.416293514372382e-05, "loss": 3.1238, "step": 4803 }, { "epoch": 0.68, "learning_rate": 6.413404593384372e-05, "loss": 3.1487, "step": 4804 }, { "epoch": 0.68, "learning_rate": 6.41051567239636e-05, "loss": 2.9601, "step": 4805 }, { "epoch": 0.68, "learning_rate": 6.407626751408349e-05, "loss": 3.0914, "step": 4806 }, { "epoch": 0.68, "learning_rate": 6.404737830420338e-05, "loss": 3.3274, "step": 4807 }, { "epoch": 0.68, "learning_rate": 6.401848909432328e-05, "loss": 3.3154, "step": 4808 }, { "epoch": 0.68, "learning_rate": 6.398959988444316e-05, "loss": 3.0696, "step": 4809 }, { "epoch": 0.68, "learning_rate": 6.396071067456305e-05, "loss": 3.0216, "step": 4810 }, { "epoch": 0.68, "learning_rate": 6.393182146468293e-05, "loss": 3.2504, "step": 4811 }, { "epoch": 0.69, "learning_rate": 6.390293225480284e-05, "loss": 3.142, "step": 4812 }, { "epoch": 0.69, "learning_rate": 6.387404304492272e-05, "loss": 3.1038, "step": 4813 }, { "epoch": 0.69, "learning_rate": 6.384515383504261e-05, "loss": 3.1674, "step": 4814 }, { "epoch": 0.69, "learning_rate": 6.381626462516251e-05, "loss": 3.2564, "step": 4815 }, { "epoch": 0.69, "learning_rate": 6.378737541528239e-05, "loss": 3.144, "step": 4816 }, { "epoch": 0.69, "learning_rate": 6.37584862054023e-05, "loss": 3.2916, "step": 4817 }, { "epoch": 0.69, "learning_rate": 6.372959699552218e-05, "loss": 3.3567, "step": 4818 }, { "epoch": 0.69, "learning_rate": 6.370070778564207e-05, "loss": 3.1593, "step": 4819 }, { "epoch": 0.69, "learning_rate": 6.367181857576195e-05, "loss": 3.218, "step": 4820 }, { "epoch": 0.69, "learning_rate": 6.364292936588184e-05, "loss": 3.347, "step": 4821 }, { "epoch": 0.69, "learning_rate": 6.361404015600174e-05, "loss": 3.205, "step": 4822 }, { "epoch": 0.69, "learning_rate": 6.358515094612163e-05, "loss": 3.1968, "step": 4823 }, { "epoch": 0.69, "learning_rate": 6.355626173624151e-05, "loss": 3.1964, "step": 4824 }, { "epoch": 0.69, "learning_rate": 6.35273725263614e-05, "loss": 3.2347, "step": 4825 }, { "epoch": 0.69, "learning_rate": 6.34984833164813e-05, "loss": 3.3071, "step": 4826 }, { "epoch": 0.69, "learning_rate": 6.346959410660118e-05, "loss": 3.2471, "step": 4827 }, { "epoch": 0.69, "learning_rate": 6.344070489672109e-05, "loss": 3.2021, "step": 4828 }, { "epoch": 0.69, "learning_rate": 6.341181568684097e-05, "loss": 3.1955, "step": 4829 }, { "epoch": 0.69, "learning_rate": 6.338292647696086e-05, "loss": 3.0071, "step": 4830 }, { "epoch": 0.69, "learning_rate": 6.335403726708074e-05, "loss": 3.2419, "step": 4831 }, { "epoch": 0.69, "learning_rate": 6.332514805720064e-05, "loss": 3.0888, "step": 4832 }, { "epoch": 0.69, "learning_rate": 6.329625884732053e-05, "loss": 3.1945, "step": 4833 }, { "epoch": 0.69, "learning_rate": 6.326736963744042e-05, "loss": 3.0264, "step": 4834 }, { "epoch": 0.69, "learning_rate": 6.32384804275603e-05, "loss": 3.1711, "step": 4835 }, { "epoch": 0.69, "learning_rate": 6.32095912176802e-05, "loss": 3.1518, "step": 4836 }, { "epoch": 0.69, "learning_rate": 6.318070200780009e-05, "loss": 3.0763, "step": 4837 }, { "epoch": 0.69, "learning_rate": 6.315181279791998e-05, "loss": 3.1948, "step": 4838 }, { "epoch": 0.69, "learning_rate": 6.312292358803988e-05, "loss": 3.2302, "step": 4839 }, { "epoch": 0.69, "learning_rate": 6.309403437815976e-05, "loss": 3.1315, "step": 4840 }, { "epoch": 0.69, "learning_rate": 6.306514516827965e-05, "loss": 3.0718, "step": 4841 }, { "epoch": 0.69, "learning_rate": 6.303625595839953e-05, "loss": 3.2503, "step": 4842 }, { "epoch": 0.69, "learning_rate": 6.300736674851944e-05, "loss": 3.1226, "step": 4843 }, { "epoch": 0.69, "learning_rate": 6.297847753863932e-05, "loss": 3.2664, "step": 4844 }, { "epoch": 0.69, "learning_rate": 6.294958832875921e-05, "loss": 2.9803, "step": 4845 }, { "epoch": 0.69, "learning_rate": 6.29206991188791e-05, "loss": 3.2725, "step": 4846 }, { "epoch": 0.69, "learning_rate": 6.289180990899899e-05, "loss": 3.1647, "step": 4847 }, { "epoch": 0.69, "learning_rate": 6.286292069911888e-05, "loss": 3.164, "step": 4848 }, { "epoch": 0.69, "learning_rate": 6.283403148923878e-05, "loss": 3.2146, "step": 4849 }, { "epoch": 0.69, "learning_rate": 6.280514227935867e-05, "loss": 3.2211, "step": 4850 }, { "epoch": 0.69, "learning_rate": 6.277625306947855e-05, "loss": 3.0367, "step": 4851 }, { "epoch": 0.69, "learning_rate": 6.274736385959844e-05, "loss": 3.1545, "step": 4852 }, { "epoch": 0.69, "learning_rate": 6.271847464971832e-05, "loss": 3.185, "step": 4853 }, { "epoch": 0.69, "learning_rate": 6.268958543983823e-05, "loss": 3.0758, "step": 4854 }, { "epoch": 0.69, "learning_rate": 6.266069622995811e-05, "loss": 3.1946, "step": 4855 }, { "epoch": 0.69, "learning_rate": 6.2631807020078e-05, "loss": 3.0128, "step": 4856 }, { "epoch": 0.69, "learning_rate": 6.260291781019789e-05, "loss": 3.2722, "step": 4857 }, { "epoch": 0.69, "learning_rate": 6.257402860031778e-05, "loss": 3.0703, "step": 4858 }, { "epoch": 0.69, "learning_rate": 6.254513939043767e-05, "loss": 3.3005, "step": 4859 }, { "epoch": 0.69, "learning_rate": 6.251625018055757e-05, "loss": 3.2755, "step": 4860 }, { "epoch": 0.69, "learning_rate": 6.248736097067746e-05, "loss": 3.1733, "step": 4861 }, { "epoch": 0.69, "learning_rate": 6.245847176079734e-05, "loss": 3.0191, "step": 4862 }, { "epoch": 0.69, "learning_rate": 6.242958255091724e-05, "loss": 3.1909, "step": 4863 }, { "epoch": 0.69, "learning_rate": 6.240069334103713e-05, "loss": 3.344, "step": 4864 }, { "epoch": 0.69, "learning_rate": 6.237180413115702e-05, "loss": 3.2325, "step": 4865 }, { "epoch": 0.69, "learning_rate": 6.23429149212769e-05, "loss": 3.1482, "step": 4866 }, { "epoch": 0.69, "learning_rate": 6.23140257113968e-05, "loss": 3.2666, "step": 4867 }, { "epoch": 0.69, "learning_rate": 6.228513650151668e-05, "loss": 3.1017, "step": 4868 }, { "epoch": 0.69, "learning_rate": 6.225624729163657e-05, "loss": 3.0212, "step": 4869 }, { "epoch": 0.69, "learning_rate": 6.222735808175647e-05, "loss": 3.141, "step": 4870 }, { "epoch": 0.69, "learning_rate": 6.219846887187636e-05, "loss": 3.1939, "step": 4871 }, { "epoch": 0.69, "learning_rate": 6.216957966199625e-05, "loss": 3.1901, "step": 4872 }, { "epoch": 0.69, "learning_rate": 6.214069045211613e-05, "loss": 2.9811, "step": 4873 }, { "epoch": 0.69, "learning_rate": 6.211180124223603e-05, "loss": 3.1416, "step": 4874 }, { "epoch": 0.69, "learning_rate": 6.208291203235592e-05, "loss": 3.2035, "step": 4875 }, { "epoch": 0.69, "learning_rate": 6.205402282247581e-05, "loss": 3.3768, "step": 4876 }, { "epoch": 0.69, "learning_rate": 6.20251336125957e-05, "loss": 3.1864, "step": 4877 }, { "epoch": 0.69, "learning_rate": 6.199624440271559e-05, "loss": 3.2768, "step": 4878 }, { "epoch": 0.69, "learning_rate": 6.196735519283547e-05, "loss": 3.2096, "step": 4879 }, { "epoch": 0.69, "learning_rate": 6.193846598295538e-05, "loss": 3.2389, "step": 4880 }, { "epoch": 0.69, "learning_rate": 6.190957677307526e-05, "loss": 3.1871, "step": 4881 }, { "epoch": 0.7, "learning_rate": 6.188068756319515e-05, "loss": 3.1195, "step": 4882 }, { "epoch": 0.7, "learning_rate": 6.185179835331504e-05, "loss": 3.1172, "step": 4883 }, { "epoch": 0.7, "learning_rate": 6.182290914343492e-05, "loss": 3.2083, "step": 4884 }, { "epoch": 0.7, "learning_rate": 6.179401993355482e-05, "loss": 3.2845, "step": 4885 }, { "epoch": 0.7, "learning_rate": 6.176513072367471e-05, "loss": 3.2313, "step": 4886 }, { "epoch": 0.7, "learning_rate": 6.17362415137946e-05, "loss": 3.2976, "step": 4887 }, { "epoch": 0.7, "learning_rate": 6.170735230391449e-05, "loss": 3.2979, "step": 4888 }, { "epoch": 0.7, "learning_rate": 6.167846309403438e-05, "loss": 3.2395, "step": 4889 }, { "epoch": 0.7, "learning_rate": 6.164957388415426e-05, "loss": 3.2283, "step": 4890 }, { "epoch": 0.7, "learning_rate": 6.162068467427417e-05, "loss": 3.1972, "step": 4891 }, { "epoch": 0.7, "learning_rate": 6.159179546439405e-05, "loss": 3.2107, "step": 4892 }, { "epoch": 0.7, "learning_rate": 6.156290625451394e-05, "loss": 3.1887, "step": 4893 }, { "epoch": 0.7, "learning_rate": 6.153401704463384e-05, "loss": 3.0887, "step": 4894 }, { "epoch": 0.7, "learning_rate": 6.150512783475372e-05, "loss": 3.2432, "step": 4895 }, { "epoch": 0.7, "learning_rate": 6.147623862487361e-05, "loss": 3.0324, "step": 4896 }, { "epoch": 0.7, "learning_rate": 6.14473494149935e-05, "loss": 3.1536, "step": 4897 }, { "epoch": 0.7, "learning_rate": 6.14184602051134e-05, "loss": 3.0445, "step": 4898 }, { "epoch": 0.7, "learning_rate": 6.138957099523328e-05, "loss": 3.224, "step": 4899 }, { "epoch": 0.7, "learning_rate": 6.136068178535317e-05, "loss": 3.297, "step": 4900 }, { "epoch": 0.7, "learning_rate": 6.133179257547307e-05, "loss": 3.2922, "step": 4901 }, { "epoch": 0.7, "learning_rate": 6.130290336559296e-05, "loss": 3.2604, "step": 4902 }, { "epoch": 0.7, "learning_rate": 6.127401415571284e-05, "loss": 3.1391, "step": 4903 }, { "epoch": 0.7, "learning_rate": 6.124512494583273e-05, "loss": 3.2075, "step": 4904 }, { "epoch": 0.7, "learning_rate": 6.121623573595263e-05, "loss": 3.2132, "step": 4905 }, { "epoch": 0.7, "learning_rate": 6.118734652607252e-05, "loss": 3.0788, "step": 4906 }, { "epoch": 0.7, "learning_rate": 6.11584573161924e-05, "loss": 3.1895, "step": 4907 }, { "epoch": 0.7, "learning_rate": 6.11295681063123e-05, "loss": 3.2684, "step": 4908 }, { "epoch": 0.7, "learning_rate": 6.110067889643219e-05, "loss": 3.1677, "step": 4909 }, { "epoch": 0.7, "learning_rate": 6.107178968655207e-05, "loss": 3.1967, "step": 4910 }, { "epoch": 0.7, "learning_rate": 6.104290047667198e-05, "loss": 3.1881, "step": 4911 }, { "epoch": 0.7, "learning_rate": 6.101401126679186e-05, "loss": 3.2648, "step": 4912 }, { "epoch": 0.7, "learning_rate": 6.098512205691175e-05, "loss": 3.2341, "step": 4913 }, { "epoch": 0.7, "learning_rate": 6.095623284703164e-05, "loss": 3.1041, "step": 4914 }, { "epoch": 0.7, "learning_rate": 6.0927343637151525e-05, "loss": 3.1151, "step": 4915 }, { "epoch": 0.7, "learning_rate": 6.089845442727141e-05, "loss": 3.178, "step": 4916 }, { "epoch": 0.7, "learning_rate": 6.086956521739131e-05, "loss": 3.0908, "step": 4917 }, { "epoch": 0.7, "learning_rate": 6.08406760075112e-05, "loss": 3.1385, "step": 4918 }, { "epoch": 0.7, "learning_rate": 6.081178679763109e-05, "loss": 3.1282, "step": 4919 }, { "epoch": 0.7, "learning_rate": 6.0782897587750974e-05, "loss": 3.2666, "step": 4920 }, { "epoch": 0.7, "learning_rate": 6.075400837787086e-05, "loss": 3.1628, "step": 4921 }, { "epoch": 0.7, "learning_rate": 6.072511916799076e-05, "loss": 3.1016, "step": 4922 }, { "epoch": 0.7, "learning_rate": 6.069622995811065e-05, "loss": 3.1877, "step": 4923 }, { "epoch": 0.7, "learning_rate": 6.066734074823054e-05, "loss": 3.241, "step": 4924 }, { "epoch": 0.7, "learning_rate": 6.063845153835043e-05, "loss": 3.2473, "step": 4925 }, { "epoch": 0.7, "learning_rate": 6.0609562328470316e-05, "loss": 3.3245, "step": 4926 }, { "epoch": 0.7, "learning_rate": 6.058067311859022e-05, "loss": 3.0849, "step": 4927 }, { "epoch": 0.7, "learning_rate": 6.0551783908710104e-05, "loss": 3.1867, "step": 4928 }, { "epoch": 0.7, "eval_loss": 3.405430316925049, "eval_runtime": 471.5274, "eval_samples_per_second": 43.448, "eval_steps_per_second": 14.483, "step": 4928 }, { "epoch": 0.7, "learning_rate": 6.052289469882999e-05, "loss": 3.2446, "step": 4929 }, { "epoch": 0.7, "learning_rate": 6.049400548894988e-05, "loss": 3.0791, "step": 4930 }, { "epoch": 0.7, "learning_rate": 6.0465116279069765e-05, "loss": 3.1938, "step": 4931 }, { "epoch": 0.7, "learning_rate": 6.0436227069189666e-05, "loss": 3.1848, "step": 4932 }, { "epoch": 0.7, "learning_rate": 6.040733785930955e-05, "loss": 3.236, "step": 4933 }, { "epoch": 0.7, "learning_rate": 6.037844864942944e-05, "loss": 3.0853, "step": 4934 }, { "epoch": 0.7, "learning_rate": 6.034955943954933e-05, "loss": 3.2123, "step": 4935 }, { "epoch": 0.7, "learning_rate": 6.032067022966922e-05, "loss": 3.2857, "step": 4936 }, { "epoch": 0.7, "learning_rate": 6.029178101978911e-05, "loss": 3.1235, "step": 4937 }, { "epoch": 0.7, "learning_rate": 6.026289180990901e-05, "loss": 2.9182, "step": 4938 }, { "epoch": 0.7, "learning_rate": 6.0234002600028895e-05, "loss": 3.1968, "step": 4939 }, { "epoch": 0.7, "learning_rate": 6.020511339014878e-05, "loss": 3.1346, "step": 4940 }, { "epoch": 0.7, "learning_rate": 6.017622418026867e-05, "loss": 3.0872, "step": 4941 }, { "epoch": 0.7, "learning_rate": 6.0147334970388557e-05, "loss": 3.1449, "step": 4942 }, { "epoch": 0.7, "learning_rate": 6.011844576050846e-05, "loss": 3.015, "step": 4943 }, { "epoch": 0.7, "learning_rate": 6.0089556550628344e-05, "loss": 3.2121, "step": 4944 }, { "epoch": 0.7, "learning_rate": 6.006066734074823e-05, "loss": 3.1967, "step": 4945 }, { "epoch": 0.7, "learning_rate": 6.003177813086812e-05, "loss": 3.0941, "step": 4946 }, { "epoch": 0.7, "learning_rate": 6.000288892098801e-05, "loss": 3.1809, "step": 4947 }, { "epoch": 0.7, "learning_rate": 5.9973999711107906e-05, "loss": 2.9571, "step": 4948 }, { "epoch": 0.7, "learning_rate": 5.99451105012278e-05, "loss": 3.1754, "step": 4949 }, { "epoch": 0.7, "learning_rate": 5.991622129134769e-05, "loss": 3.143, "step": 4950 }, { "epoch": 0.7, "learning_rate": 5.9887332081467574e-05, "loss": 3.0815, "step": 4951 }, { "epoch": 0.71, "learning_rate": 5.985844287158746e-05, "loss": 3.2158, "step": 4952 }, { "epoch": 0.71, "learning_rate": 5.982955366170736e-05, "loss": 3.1671, "step": 4953 }, { "epoch": 0.71, "learning_rate": 5.980066445182725e-05, "loss": 3.2071, "step": 4954 }, { "epoch": 0.71, "learning_rate": 5.9771775241947136e-05, "loss": 3.1664, "step": 4955 }, { "epoch": 0.71, "learning_rate": 5.974288603206702e-05, "loss": 3.1808, "step": 4956 }, { "epoch": 0.71, "learning_rate": 5.971399682218691e-05, "loss": 3.1447, "step": 4957 }, { "epoch": 0.71, "learning_rate": 5.9685107612306804e-05, "loss": 3.2338, "step": 4958 }, { "epoch": 0.71, "learning_rate": 5.96562184024267e-05, "loss": 3.191, "step": 4959 }, { "epoch": 0.71, "learning_rate": 5.962732919254659e-05, "loss": 3.1847, "step": 4960 }, { "epoch": 0.71, "learning_rate": 5.959843998266648e-05, "loss": 3.018, "step": 4961 }, { "epoch": 0.71, "learning_rate": 5.9569550772786365e-05, "loss": 3.0103, "step": 4962 }, { "epoch": 0.71, "learning_rate": 5.954066156290625e-05, "loss": 3.1028, "step": 4963 }, { "epoch": 0.71, "learning_rate": 5.951177235302615e-05, "loss": 3.2361, "step": 4964 }, { "epoch": 0.71, "learning_rate": 5.948288314314604e-05, "loss": 3.2101, "step": 4965 }, { "epoch": 0.71, "learning_rate": 5.945399393326593e-05, "loss": 3.3008, "step": 4966 }, { "epoch": 0.71, "learning_rate": 5.9425104723385814e-05, "loss": 3.0315, "step": 4967 }, { "epoch": 0.71, "learning_rate": 5.93962155135057e-05, "loss": 3.2395, "step": 4968 }, { "epoch": 0.71, "learning_rate": 5.93673263036256e-05, "loss": 3.1923, "step": 4969 }, { "epoch": 0.71, "learning_rate": 5.933843709374549e-05, "loss": 3.185, "step": 4970 }, { "epoch": 0.71, "learning_rate": 5.930954788386538e-05, "loss": 2.9979, "step": 4971 }, { "epoch": 0.71, "learning_rate": 5.928065867398527e-05, "loss": 3.169, "step": 4972 }, { "epoch": 0.71, "learning_rate": 5.925176946410516e-05, "loss": 3.1904, "step": 4973 }, { "epoch": 0.71, "learning_rate": 5.922288025422506e-05, "loss": 3.1637, "step": 4974 }, { "epoch": 0.71, "learning_rate": 5.9193991044344944e-05, "loss": 2.9797, "step": 4975 }, { "epoch": 0.71, "learning_rate": 5.916510183446483e-05, "loss": 3.1532, "step": 4976 }, { "epoch": 0.71, "learning_rate": 5.913621262458472e-05, "loss": 3.1193, "step": 4977 }, { "epoch": 0.71, "learning_rate": 5.9107323414704606e-05, "loss": 3.1299, "step": 4978 }, { "epoch": 0.71, "learning_rate": 5.907843420482449e-05, "loss": 3.0867, "step": 4979 }, { "epoch": 0.71, "learning_rate": 5.904954499494439e-05, "loss": 3.1889, "step": 4980 }, { "epoch": 0.71, "learning_rate": 5.902065578506428e-05, "loss": 3.192, "step": 4981 }, { "epoch": 0.71, "learning_rate": 5.8991766575184174e-05, "loss": 3.2168, "step": 4982 }, { "epoch": 0.71, "learning_rate": 5.896287736530406e-05, "loss": 3.2185, "step": 4983 }, { "epoch": 0.71, "learning_rate": 5.893398815542395e-05, "loss": 3.176, "step": 4984 }, { "epoch": 0.71, "learning_rate": 5.890509894554385e-05, "loss": 3.1294, "step": 4985 }, { "epoch": 0.71, "learning_rate": 5.8876209735663736e-05, "loss": 3.1345, "step": 4986 }, { "epoch": 0.71, "learning_rate": 5.884732052578362e-05, "loss": 3.3109, "step": 4987 }, { "epoch": 0.71, "learning_rate": 5.881843131590351e-05, "loss": 3.0086, "step": 4988 }, { "epoch": 0.71, "learning_rate": 5.87895421060234e-05, "loss": 3.2247, "step": 4989 }, { "epoch": 0.71, "learning_rate": 5.87606528961433e-05, "loss": 3.1655, "step": 4990 }, { "epoch": 0.71, "learning_rate": 5.8731763686263185e-05, "loss": 3.1094, "step": 4991 }, { "epoch": 0.71, "learning_rate": 5.870287447638307e-05, "loss": 3.1659, "step": 4992 }, { "epoch": 0.71, "learning_rate": 5.8673985266502965e-05, "loss": 3.1532, "step": 4993 }, { "epoch": 0.71, "learning_rate": 5.864509605662285e-05, "loss": 3.2451, "step": 4994 }, { "epoch": 0.71, "learning_rate": 5.8616206846742746e-05, "loss": 3.1007, "step": 4995 }, { "epoch": 0.71, "learning_rate": 5.858731763686264e-05, "loss": 3.1713, "step": 4996 }, { "epoch": 0.71, "learning_rate": 5.855842842698253e-05, "loss": 3.1186, "step": 4997 }, { "epoch": 0.71, "learning_rate": 5.8529539217102414e-05, "loss": 3.1076, "step": 4998 }, { "epoch": 0.71, "learning_rate": 5.85006500072223e-05, "loss": 3.2508, "step": 4999 }, { "epoch": 0.71, "learning_rate": 5.84717607973422e-05, "loss": 3.0121, "step": 5000 }, { "epoch": 0.71, "learning_rate": 5.844287158746209e-05, "loss": 3.1222, "step": 5001 }, { "epoch": 0.71, "learning_rate": 5.8413982377581976e-05, "loss": 3.19, "step": 5002 }, { "epoch": 0.71, "learning_rate": 5.838509316770186e-05, "loss": 3.1305, "step": 5003 }, { "epoch": 0.71, "learning_rate": 5.835620395782175e-05, "loss": 3.1201, "step": 5004 }, { "epoch": 0.71, "learning_rate": 5.8327314747941644e-05, "loss": 3.2659, "step": 5005 }, { "epoch": 0.71, "learning_rate": 5.829842553806154e-05, "loss": 3.1431, "step": 5006 }, { "epoch": 0.71, "learning_rate": 5.826953632818143e-05, "loss": 3.2168, "step": 5007 }, { "epoch": 0.71, "learning_rate": 5.824064711830132e-05, "loss": 3.1828, "step": 5008 }, { "epoch": 0.71, "learning_rate": 5.8211757908421206e-05, "loss": 3.1976, "step": 5009 }, { "epoch": 0.71, "learning_rate": 5.818286869854109e-05, "loss": 3.0626, "step": 5010 }, { "epoch": 0.71, "learning_rate": 5.815397948866099e-05, "loss": 3.1023, "step": 5011 }, { "epoch": 0.71, "learning_rate": 5.812509027878088e-05, "loss": 3.1348, "step": 5012 }, { "epoch": 0.71, "learning_rate": 5.809620106890077e-05, "loss": 3.2077, "step": 5013 }, { "epoch": 0.71, "learning_rate": 5.8067311859020655e-05, "loss": 3.312, "step": 5014 }, { "epoch": 0.71, "learning_rate": 5.803842264914054e-05, "loss": 3.3103, "step": 5015 }, { "epoch": 0.71, "learning_rate": 5.800953343926044e-05, "loss": 3.1398, "step": 5016 }, { "epoch": 0.71, "learning_rate": 5.798064422938033e-05, "loss": 3.1509, "step": 5017 }, { "epoch": 0.71, "learning_rate": 5.795175501950022e-05, "loss": 3.2007, "step": 5018 }, { "epoch": 0.71, "learning_rate": 5.792286580962011e-05, "loss": 3.2066, "step": 5019 }, { "epoch": 0.71, "learning_rate": 5.789397659974e-05, "loss": 3.1849, "step": 5020 }, { "epoch": 0.71, "learning_rate": 5.78650873898599e-05, "loss": 3.2473, "step": 5021 }, { "epoch": 0.72, "learning_rate": 5.7836198179979785e-05, "loss": 3.1957, "step": 5022 }, { "epoch": 0.72, "learning_rate": 5.780730897009967e-05, "loss": 3.1588, "step": 5023 }, { "epoch": 0.72, "learning_rate": 5.777841976021956e-05, "loss": 3.1026, "step": 5024 }, { "epoch": 0.72, "learning_rate": 5.7749530550339446e-05, "loss": 3.0227, "step": 5025 }, { "epoch": 0.72, "learning_rate": 5.772064134045933e-05, "loss": 3.1725, "step": 5026 }, { "epoch": 0.72, "learning_rate": 5.7691752130579234e-05, "loss": 3.3349, "step": 5027 }, { "epoch": 0.72, "learning_rate": 5.766286292069912e-05, "loss": 3.179, "step": 5028 }, { "epoch": 0.72, "learning_rate": 5.7633973710819014e-05, "loss": 3.2301, "step": 5029 }, { "epoch": 0.72, "learning_rate": 5.76050845009389e-05, "loss": 3.1841, "step": 5030 }, { "epoch": 0.72, "learning_rate": 5.757619529105879e-05, "loss": 3.1111, "step": 5031 }, { "epoch": 0.72, "learning_rate": 5.754730608117869e-05, "loss": 3.1174, "step": 5032 }, { "epoch": 0.72, "learning_rate": 5.7518416871298576e-05, "loss": 3.1633, "step": 5033 }, { "epoch": 0.72, "learning_rate": 5.748952766141846e-05, "loss": 2.9734, "step": 5034 }, { "epoch": 0.72, "learning_rate": 5.746063845153835e-05, "loss": 3.1083, "step": 5035 }, { "epoch": 0.72, "learning_rate": 5.743174924165824e-05, "loss": 3.211, "step": 5036 }, { "epoch": 0.72, "learning_rate": 5.740286003177814e-05, "loss": 3.1702, "step": 5037 }, { "epoch": 0.72, "learning_rate": 5.7373970821898025e-05, "loss": 3.2022, "step": 5038 }, { "epoch": 0.72, "learning_rate": 5.734508161201791e-05, "loss": 3.1054, "step": 5039 }, { "epoch": 0.72, "learning_rate": 5.7316192402137806e-05, "loss": 3.1327, "step": 5040 }, { "epoch": 0.72, "learning_rate": 5.728730319225769e-05, "loss": 3.0072, "step": 5041 }, { "epoch": 0.72, "learning_rate": 5.725841398237759e-05, "loss": 3.1727, "step": 5042 }, { "epoch": 0.72, "learning_rate": 5.722952477249748e-05, "loss": 3.247, "step": 5043 }, { "epoch": 0.72, "learning_rate": 5.720063556261737e-05, "loss": 3.1123, "step": 5044 }, { "epoch": 0.72, "learning_rate": 5.7171746352737255e-05, "loss": 3.1567, "step": 5045 }, { "epoch": 0.72, "learning_rate": 5.714285714285714e-05, "loss": 3.1468, "step": 5046 }, { "epoch": 0.72, "learning_rate": 5.711396793297703e-05, "loss": 3.1196, "step": 5047 }, { "epoch": 0.72, "learning_rate": 5.708507872309693e-05, "loss": 3.193, "step": 5048 }, { "epoch": 0.72, "learning_rate": 5.7056189513216816e-05, "loss": 3.0999, "step": 5049 }, { "epoch": 0.72, "learning_rate": 5.7027300303336703e-05, "loss": 3.2171, "step": 5050 }, { "epoch": 0.72, "learning_rate": 5.69984110934566e-05, "loss": 3.2542, "step": 5051 }, { "epoch": 0.72, "learning_rate": 5.6969521883576484e-05, "loss": 3.2912, "step": 5052 }, { "epoch": 0.72, "learning_rate": 5.694063267369638e-05, "loss": 3.1588, "step": 5053 }, { "epoch": 0.72, "learning_rate": 5.691174346381627e-05, "loss": 3.2516, "step": 5054 }, { "epoch": 0.72, "learning_rate": 5.688285425393616e-05, "loss": 3.1515, "step": 5055 }, { "epoch": 0.72, "learning_rate": 5.6853965044056046e-05, "loss": 3.2265, "step": 5056 }, { "epoch": 0.72, "learning_rate": 5.682507583417593e-05, "loss": 3.1729, "step": 5057 }, { "epoch": 0.72, "learning_rate": 5.6796186624295834e-05, "loss": 3.2611, "step": 5058 }, { "epoch": 0.72, "learning_rate": 5.676729741441572e-05, "loss": 3.2122, "step": 5059 }, { "epoch": 0.72, "learning_rate": 5.673840820453561e-05, "loss": 3.3132, "step": 5060 }, { "epoch": 0.72, "learning_rate": 5.6709518994655495e-05, "loss": 3.195, "step": 5061 }, { "epoch": 0.72, "learning_rate": 5.668062978477538e-05, "loss": 3.208, "step": 5062 }, { "epoch": 0.72, "learning_rate": 5.665174057489528e-05, "loss": 3.2916, "step": 5063 }, { "epoch": 0.72, "learning_rate": 5.662285136501517e-05, "loss": 3.1674, "step": 5064 }, { "epoch": 0.72, "learning_rate": 5.6593962155135063e-05, "loss": 3.1717, "step": 5065 }, { "epoch": 0.72, "learning_rate": 5.656507294525495e-05, "loss": 3.1739, "step": 5066 }, { "epoch": 0.72, "learning_rate": 5.653618373537484e-05, "loss": 3.0539, "step": 5067 }, { "epoch": 0.72, "learning_rate": 5.6507294525494725e-05, "loss": 3.1643, "step": 5068 }, { "epoch": 0.72, "learning_rate": 5.6478405315614625e-05, "loss": 3.1571, "step": 5069 }, { "epoch": 0.72, "learning_rate": 5.644951610573451e-05, "loss": 3.1703, "step": 5070 }, { "epoch": 0.72, "learning_rate": 5.64206268958544e-05, "loss": 3.1374, "step": 5071 }, { "epoch": 0.72, "learning_rate": 5.6391737685974286e-05, "loss": 3.1227, "step": 5072 }, { "epoch": 0.72, "learning_rate": 5.636284847609417e-05, "loss": 3.1776, "step": 5073 }, { "epoch": 0.72, "learning_rate": 5.6333959266214074e-05, "loss": 3.2057, "step": 5074 }, { "epoch": 0.72, "learning_rate": 5.630507005633396e-05, "loss": 3.2371, "step": 5075 }, { "epoch": 0.72, "learning_rate": 5.6276180846453855e-05, "loss": 3.2429, "step": 5076 }, { "epoch": 0.72, "learning_rate": 5.624729163657374e-05, "loss": 3.1389, "step": 5077 }, { "epoch": 0.72, "learning_rate": 5.621840242669363e-05, "loss": 3.2158, "step": 5078 }, { "epoch": 0.72, "learning_rate": 5.618951321681353e-05, "loss": 3.2406, "step": 5079 }, { "epoch": 0.72, "learning_rate": 5.6160624006933417e-05, "loss": 2.9815, "step": 5080 }, { "epoch": 0.72, "learning_rate": 5.6131734797053304e-05, "loss": 3.1771, "step": 5081 }, { "epoch": 0.72, "learning_rate": 5.610284558717319e-05, "loss": 3.0717, "step": 5082 }, { "epoch": 0.72, "learning_rate": 5.607395637729308e-05, "loss": 3.1711, "step": 5083 }, { "epoch": 0.72, "learning_rate": 5.604506716741298e-05, "loss": 3.1081, "step": 5084 }, { "epoch": 0.72, "learning_rate": 5.6016177957532865e-05, "loss": 3.1248, "step": 5085 }, { "epoch": 0.72, "learning_rate": 5.598728874765275e-05, "loss": 3.0333, "step": 5086 }, { "epoch": 0.72, "learning_rate": 5.5958399537772646e-05, "loss": 3.2139, "step": 5087 }, { "epoch": 0.72, "learning_rate": 5.592951032789253e-05, "loss": 3.2611, "step": 5088 }, { "epoch": 0.72, "learning_rate": 5.590062111801242e-05, "loss": 3.1232, "step": 5089 }, { "epoch": 0.72, "learning_rate": 5.587173190813232e-05, "loss": 3.109, "step": 5090 }, { "epoch": 0.72, "learning_rate": 5.584284269825221e-05, "loss": 3.1966, "step": 5091 }, { "epoch": 0.72, "learning_rate": 5.5813953488372095e-05, "loss": 3.1803, "step": 5092 }, { "epoch": 0.73, "learning_rate": 5.578506427849198e-05, "loss": 3.0442, "step": 5093 }, { "epoch": 0.73, "learning_rate": 5.575617506861187e-05, "loss": 3.1491, "step": 5094 }, { "epoch": 0.73, "learning_rate": 5.572728585873177e-05, "loss": 3.3084, "step": 5095 }, { "epoch": 0.73, "learning_rate": 5.569839664885166e-05, "loss": 3.2134, "step": 5096 }, { "epoch": 0.73, "learning_rate": 5.5669507438971544e-05, "loss": 3.219, "step": 5097 }, { "epoch": 0.73, "learning_rate": 5.564061822909144e-05, "loss": 2.9762, "step": 5098 }, { "epoch": 0.73, "learning_rate": 5.5611729019211325e-05, "loss": 2.9179, "step": 5099 }, { "epoch": 0.73, "learning_rate": 5.558283980933122e-05, "loss": 3.2013, "step": 5100 }, { "epoch": 0.73, "learning_rate": 5.555395059945111e-05, "loss": 2.9588, "step": 5101 }, { "epoch": 0.73, "learning_rate": 5.5525061389571e-05, "loss": 3.0973, "step": 5102 }, { "epoch": 0.73, "learning_rate": 5.5496172179690886e-05, "loss": 3.1533, "step": 5103 }, { "epoch": 0.73, "learning_rate": 5.5467282969810774e-05, "loss": 3.1741, "step": 5104 }, { "epoch": 0.73, "learning_rate": 5.5438393759930674e-05, "loss": 3.0876, "step": 5105 }, { "epoch": 0.73, "learning_rate": 5.540950455005056e-05, "loss": 3.2014, "step": 5106 }, { "epoch": 0.73, "learning_rate": 5.538061534017045e-05, "loss": 3.1576, "step": 5107 }, { "epoch": 0.73, "learning_rate": 5.5351726130290335e-05, "loss": 3.1465, "step": 5108 }, { "epoch": 0.73, "learning_rate": 5.532283692041023e-05, "loss": 3.2683, "step": 5109 }, { "epoch": 0.73, "learning_rate": 5.529394771053012e-05, "loss": 3.266, "step": 5110 }, { "epoch": 0.73, "learning_rate": 5.526505850065001e-05, "loss": 3.1291, "step": 5111 }, { "epoch": 0.73, "learning_rate": 5.5236169290769904e-05, "loss": 3.2675, "step": 5112 }, { "epoch": 0.73, "learning_rate": 5.520728008088979e-05, "loss": 3.2307, "step": 5113 }, { "epoch": 0.73, "learning_rate": 5.517839087100968e-05, "loss": 2.9991, "step": 5114 }, { "epoch": 0.73, "learning_rate": 5.5149501661129565e-05, "loss": 3.1703, "step": 5115 }, { "epoch": 0.73, "learning_rate": 5.5120612451249466e-05, "loss": 3.059, "step": 5116 }, { "epoch": 0.73, "learning_rate": 5.509172324136935e-05, "loss": 3.1885, "step": 5117 }, { "epoch": 0.73, "learning_rate": 5.506283403148924e-05, "loss": 3.2284, "step": 5118 }, { "epoch": 0.73, "learning_rate": 5.503394482160913e-05, "loss": 3.1545, "step": 5119 }, { "epoch": 0.73, "learning_rate": 5.500505561172902e-05, "loss": 3.1512, "step": 5120 }, { "epoch": 0.73, "learning_rate": 5.4976166401848914e-05, "loss": 3.2021, "step": 5121 }, { "epoch": 0.73, "learning_rate": 5.49472771919688e-05, "loss": 3.1952, "step": 5122 }, { "epoch": 0.73, "learning_rate": 5.4918387982088695e-05, "loss": 3.1533, "step": 5123 }, { "epoch": 0.73, "learning_rate": 5.488949877220858e-05, "loss": 3.2113, "step": 5124 }, { "epoch": 0.73, "learning_rate": 5.486060956232847e-05, "loss": 3.1937, "step": 5125 }, { "epoch": 0.73, "learning_rate": 5.483172035244837e-05, "loss": 3.1545, "step": 5126 }, { "epoch": 0.73, "learning_rate": 5.480283114256826e-05, "loss": 3.2336, "step": 5127 }, { "epoch": 0.73, "learning_rate": 5.4773941932688144e-05, "loss": 3.1997, "step": 5128 }, { "epoch": 0.73, "learning_rate": 5.474505272280803e-05, "loss": 3.0685, "step": 5129 }, { "epoch": 0.73, "learning_rate": 5.471616351292792e-05, "loss": 3.0653, "step": 5130 }, { "epoch": 0.73, "learning_rate": 5.468727430304782e-05, "loss": 3.2056, "step": 5131 }, { "epoch": 0.73, "learning_rate": 5.4658385093167706e-05, "loss": 3.2199, "step": 5132 }, { "epoch": 0.73, "learning_rate": 5.462949588328759e-05, "loss": 3.2525, "step": 5133 }, { "epoch": 0.73, "learning_rate": 5.460060667340749e-05, "loss": 3.2594, "step": 5134 }, { "epoch": 0.73, "learning_rate": 5.4571717463527374e-05, "loss": 3.2341, "step": 5135 }, { "epoch": 0.73, "learning_rate": 5.454282825364726e-05, "loss": 3.2136, "step": 5136 }, { "epoch": 0.73, "learning_rate": 5.451393904376716e-05, "loss": 3.1541, "step": 5137 }, { "epoch": 0.73, "learning_rate": 5.448504983388705e-05, "loss": 3.1913, "step": 5138 }, { "epoch": 0.73, "learning_rate": 5.4456160624006935e-05, "loss": 3.0299, "step": 5139 }, { "epoch": 0.73, "learning_rate": 5.442727141412682e-05, "loss": 3.1656, "step": 5140 }, { "epoch": 0.73, "learning_rate": 5.439838220424671e-05, "loss": 3.237, "step": 5141 }, { "epoch": 0.73, "learning_rate": 5.436949299436661e-05, "loss": 3.2268, "step": 5142 }, { "epoch": 0.73, "learning_rate": 5.43406037844865e-05, "loss": 3.1631, "step": 5143 }, { "epoch": 0.73, "learning_rate": 5.4311714574606384e-05, "loss": 3.1169, "step": 5144 }, { "epoch": 0.73, "learning_rate": 5.428282536472628e-05, "loss": 3.0628, "step": 5145 }, { "epoch": 0.73, "learning_rate": 5.4253936154846165e-05, "loss": 3.2385, "step": 5146 }, { "epoch": 0.73, "learning_rate": 5.4225046944966066e-05, "loss": 3.1183, "step": 5147 }, { "epoch": 0.73, "learning_rate": 5.419615773508595e-05, "loss": 3.0587, "step": 5148 }, { "epoch": 0.73, "learning_rate": 5.416726852520584e-05, "loss": 3.1037, "step": 5149 }, { "epoch": 0.73, "learning_rate": 5.413837931532573e-05, "loss": 3.0909, "step": 5150 }, { "epoch": 0.73, "learning_rate": 5.4109490105445614e-05, "loss": 2.9987, "step": 5151 }, { "epoch": 0.73, "learning_rate": 5.4080600895565515e-05, "loss": 3.2461, "step": 5152 }, { "epoch": 0.73, "learning_rate": 5.40517116856854e-05, "loss": 3.1846, "step": 5153 }, { "epoch": 0.73, "learning_rate": 5.402282247580529e-05, "loss": 3.1396, "step": 5154 }, { "epoch": 0.73, "learning_rate": 5.3993933265925176e-05, "loss": 3.2256, "step": 5155 }, { "epoch": 0.73, "learning_rate": 5.396504405604507e-05, "loss": 3.1149, "step": 5156 }, { "epoch": 0.73, "learning_rate": 5.3936154846164957e-05, "loss": 3.1215, "step": 5157 }, { "epoch": 0.73, "learning_rate": 5.390726563628486e-05, "loss": 3.1063, "step": 5158 }, { "epoch": 0.73, "learning_rate": 5.3878376426404744e-05, "loss": 3.1478, "step": 5159 }, { "epoch": 0.73, "learning_rate": 5.384948721652463e-05, "loss": 3.1244, "step": 5160 }, { "epoch": 0.73, "learning_rate": 5.382059800664452e-05, "loss": 3.1537, "step": 5161 }, { "epoch": 0.73, "learning_rate": 5.3791708796764405e-05, "loss": 3.1722, "step": 5162 }, { "epoch": 0.74, "learning_rate": 5.3762819586884306e-05, "loss": 3.1656, "step": 5163 }, { "epoch": 0.74, "learning_rate": 5.373393037700419e-05, "loss": 3.2235, "step": 5164 }, { "epoch": 0.74, "learning_rate": 5.370504116712408e-05, "loss": 3.2493, "step": 5165 }, { "epoch": 0.74, "learning_rate": 5.367615195724397e-05, "loss": 3.0734, "step": 5166 }, { "epoch": 0.74, "learning_rate": 5.364726274736386e-05, "loss": 3.2295, "step": 5167 }, { "epoch": 0.74, "learning_rate": 5.3618373537483755e-05, "loss": 3.1616, "step": 5168 }, { "epoch": 0.74, "learning_rate": 5.358948432760364e-05, "loss": 3.2689, "step": 5169 }, { "epoch": 0.74, "learning_rate": 5.3560595117723536e-05, "loss": 3.252, "step": 5170 }, { "epoch": 0.74, "learning_rate": 5.353170590784342e-05, "loss": 3.1139, "step": 5171 }, { "epoch": 0.74, "learning_rate": 5.350281669796331e-05, "loss": 3.2654, "step": 5172 }, { "epoch": 0.74, "learning_rate": 5.347392748808321e-05, "loss": 3.2094, "step": 5173 }, { "epoch": 0.74, "learning_rate": 5.34450382782031e-05, "loss": 3.0202, "step": 5174 }, { "epoch": 0.74, "learning_rate": 5.3416149068322984e-05, "loss": 3.2164, "step": 5175 }, { "epoch": 0.74, "learning_rate": 5.338725985844287e-05, "loss": 3.143, "step": 5176 }, { "epoch": 0.74, "learning_rate": 5.335837064856276e-05, "loss": 3.2617, "step": 5177 }, { "epoch": 0.74, "learning_rate": 5.332948143868265e-05, "loss": 3.0766, "step": 5178 }, { "epoch": 0.74, "learning_rate": 5.3300592228802546e-05, "loss": 3.2323, "step": 5179 }, { "epoch": 0.74, "learning_rate": 5.327170301892243e-05, "loss": 3.1394, "step": 5180 }, { "epoch": 0.74, "learning_rate": 5.324281380904233e-05, "loss": 3.1591, "step": 5181 }, { "epoch": 0.74, "learning_rate": 5.3213924599162214e-05, "loss": 3.0493, "step": 5182 }, { "epoch": 0.74, "learning_rate": 5.31850353892821e-05, "loss": 3.2181, "step": 5183 }, { "epoch": 0.74, "learning_rate": 5.3156146179402e-05, "loss": 3.153, "step": 5184 }, { "epoch": 0.74, "learning_rate": 5.312725696952189e-05, "loss": 3.163, "step": 5185 }, { "epoch": 0.74, "learning_rate": 5.3098367759641776e-05, "loss": 3.1937, "step": 5186 }, { "epoch": 0.74, "learning_rate": 5.306947854976166e-05, "loss": 3.1086, "step": 5187 }, { "epoch": 0.74, "learning_rate": 5.304058933988155e-05, "loss": 3.1899, "step": 5188 }, { "epoch": 0.74, "learning_rate": 5.301170013000145e-05, "loss": 2.8025, "step": 5189 }, { "epoch": 0.74, "learning_rate": 5.298281092012134e-05, "loss": 3.2051, "step": 5190 }, { "epoch": 0.74, "learning_rate": 5.2953921710241225e-05, "loss": 3.1696, "step": 5191 }, { "epoch": 0.74, "learning_rate": 5.292503250036112e-05, "loss": 2.8641, "step": 5192 }, { "epoch": 0.74, "learning_rate": 5.2896143290481006e-05, "loss": 3.0356, "step": 5193 }, { "epoch": 0.74, "learning_rate": 5.2867254080600906e-05, "loss": 2.9993, "step": 5194 }, { "epoch": 0.74, "learning_rate": 5.283836487072079e-05, "loss": 3.1589, "step": 5195 }, { "epoch": 0.74, "learning_rate": 5.280947566084068e-05, "loss": 2.8759, "step": 5196 }, { "epoch": 0.74, "learning_rate": 5.278058645096057e-05, "loss": 3.2366, "step": 5197 }, { "epoch": 0.74, "learning_rate": 5.2751697241080454e-05, "loss": 3.2196, "step": 5198 }, { "epoch": 0.74, "learning_rate": 5.2722808031200355e-05, "loss": 3.126, "step": 5199 }, { "epoch": 0.74, "learning_rate": 5.269391882132024e-05, "loss": 3.0893, "step": 5200 }, { "epoch": 0.74, "learning_rate": 5.266502961144013e-05, "loss": 3.2265, "step": 5201 }, { "epoch": 0.74, "learning_rate": 5.2636140401560016e-05, "loss": 3.0793, "step": 5202 }, { "epoch": 0.74, "learning_rate": 5.260725119167991e-05, "loss": 3.1028, "step": 5203 }, { "epoch": 0.74, "learning_rate": 5.25783619817998e-05, "loss": 3.12, "step": 5204 }, { "epoch": 0.74, "learning_rate": 5.25494727719197e-05, "loss": 3.0613, "step": 5205 }, { "epoch": 0.74, "learning_rate": 5.2520583562039585e-05, "loss": 3.0577, "step": 5206 }, { "epoch": 0.74, "learning_rate": 5.249169435215947e-05, "loss": 3.079, "step": 5207 }, { "epoch": 0.74, "learning_rate": 5.246280514227936e-05, "loss": 3.2207, "step": 5208 }, { "epoch": 0.74, "learning_rate": 5.2433915932399246e-05, "loss": 3.1076, "step": 5209 }, { "epoch": 0.74, "learning_rate": 5.2405026722519146e-05, "loss": 3.1991, "step": 5210 }, { "epoch": 0.74, "learning_rate": 5.2376137512639033e-05, "loss": 3.2086, "step": 5211 }, { "epoch": 0.74, "learning_rate": 5.234724830275892e-05, "loss": 2.8687, "step": 5212 }, { "epoch": 0.74, "learning_rate": 5.231835909287881e-05, "loss": 3.1321, "step": 5213 }, { "epoch": 0.74, "learning_rate": 5.22894698829987e-05, "loss": 3.2268, "step": 5214 }, { "epoch": 0.74, "learning_rate": 5.2260580673118595e-05, "loss": 3.2203, "step": 5215 }, { "epoch": 0.74, "learning_rate": 5.223169146323849e-05, "loss": 3.1536, "step": 5216 }, { "epoch": 0.74, "learning_rate": 5.2202802253358376e-05, "loss": 3.2663, "step": 5217 }, { "epoch": 0.74, "learning_rate": 5.217391304347826e-05, "loss": 3.2019, "step": 5218 }, { "epoch": 0.74, "learning_rate": 5.214502383359815e-05, "loss": 3.0641, "step": 5219 }, { "epoch": 0.74, "learning_rate": 5.211613462371805e-05, "loss": 3.0838, "step": 5220 }, { "epoch": 0.74, "learning_rate": 5.208724541383794e-05, "loss": 3.1684, "step": 5221 }, { "epoch": 0.74, "learning_rate": 5.2058356203957825e-05, "loss": 3.149, "step": 5222 }, { "epoch": 0.74, "learning_rate": 5.202946699407771e-05, "loss": 3.0806, "step": 5223 }, { "epoch": 0.74, "learning_rate": 5.20005777841976e-05, "loss": 3.2016, "step": 5224 }, { "epoch": 0.74, "learning_rate": 5.197168857431749e-05, "loss": 3.1248, "step": 5225 }, { "epoch": 0.74, "learning_rate": 5.1942799364437387e-05, "loss": 3.2296, "step": 5226 }, { "epoch": 0.74, "learning_rate": 5.191391015455728e-05, "loss": 3.1643, "step": 5227 }, { "epoch": 0.74, "learning_rate": 5.188502094467717e-05, "loss": 3.119, "step": 5228 }, { "epoch": 0.74, "learning_rate": 5.1856131734797054e-05, "loss": 3.0145, "step": 5229 }, { "epoch": 0.74, "learning_rate": 5.182724252491694e-05, "loss": 3.1054, "step": 5230 }, { "epoch": 0.74, "learning_rate": 5.179835331503684e-05, "loss": 2.9761, "step": 5231 }, { "epoch": 0.74, "learning_rate": 5.176946410515673e-05, "loss": 3.1658, "step": 5232 }, { "epoch": 0.75, "learning_rate": 5.1740574895276616e-05, "loss": 3.1663, "step": 5233 }, { "epoch": 0.75, "learning_rate": 5.17116856853965e-05, "loss": 3.1418, "step": 5234 }, { "epoch": 0.75, "learning_rate": 5.168279647551639e-05, "loss": 3.1757, "step": 5235 }, { "epoch": 0.75, "learning_rate": 5.165390726563629e-05, "loss": 3.2377, "step": 5236 }, { "epoch": 0.75, "learning_rate": 5.162501805575618e-05, "loss": 3.1921, "step": 5237 }, { "epoch": 0.75, "learning_rate": 5.1596128845876065e-05, "loss": 3.0378, "step": 5238 }, { "epoch": 0.75, "learning_rate": 5.156723963599596e-05, "loss": 3.1379, "step": 5239 }, { "epoch": 0.75, "learning_rate": 5.1538350426115846e-05, "loss": 3.1644, "step": 5240 }, { "epoch": 0.75, "learning_rate": 5.1509461216235747e-05, "loss": 3.212, "step": 5241 }, { "epoch": 0.75, "learning_rate": 5.1480572006355634e-05, "loss": 2.8968, "step": 5242 }, { "epoch": 0.75, "learning_rate": 5.145168279647552e-05, "loss": 3.2158, "step": 5243 }, { "epoch": 0.75, "learning_rate": 5.142279358659541e-05, "loss": 2.9899, "step": 5244 }, { "epoch": 0.75, "learning_rate": 5.1393904376715295e-05, "loss": 2.9403, "step": 5245 }, { "epoch": 0.75, "learning_rate": 5.136501516683518e-05, "loss": 3.2206, "step": 5246 }, { "epoch": 0.75, "learning_rate": 5.133612595695508e-05, "loss": 3.1934, "step": 5247 }, { "epoch": 0.75, "learning_rate": 5.130723674707497e-05, "loss": 3.1608, "step": 5248 }, { "epoch": 0.75, "learning_rate": 5.1278347537194856e-05, "loss": 3.0498, "step": 5249 }, { "epoch": 0.75, "learning_rate": 5.124945832731475e-05, "loss": 3.1471, "step": 5250 }, { "epoch": 0.75, "learning_rate": 5.122056911743464e-05, "loss": 3.0589, "step": 5251 }, { "epoch": 0.75, "learning_rate": 5.119167990755454e-05, "loss": 3.2602, "step": 5252 }, { "epoch": 0.75, "learning_rate": 5.1162790697674425e-05, "loss": 3.211, "step": 5253 }, { "epoch": 0.75, "learning_rate": 5.113390148779431e-05, "loss": 3.0975, "step": 5254 }, { "epoch": 0.75, "learning_rate": 5.11050122779142e-05, "loss": 3.1378, "step": 5255 }, { "epoch": 0.75, "learning_rate": 5.1076123068034086e-05, "loss": 3.0698, "step": 5256 }, { "epoch": 0.75, "learning_rate": 5.104723385815399e-05, "loss": 3.2347, "step": 5257 }, { "epoch": 0.75, "learning_rate": 5.1018344648273874e-05, "loss": 3.2977, "step": 5258 }, { "epoch": 0.75, "learning_rate": 5.098945543839376e-05, "loss": 3.1856, "step": 5259 }, { "epoch": 0.75, "learning_rate": 5.096056622851365e-05, "loss": 3.0422, "step": 5260 }, { "epoch": 0.75, "learning_rate": 5.093167701863354e-05, "loss": 3.1303, "step": 5261 }, { "epoch": 0.75, "learning_rate": 5.0902787808753436e-05, "loss": 3.1463, "step": 5262 }, { "epoch": 0.75, "learning_rate": 5.087389859887333e-05, "loss": 3.1041, "step": 5263 }, { "epoch": 0.75, "learning_rate": 5.0845009388993216e-05, "loss": 3.029, "step": 5264 }, { "epoch": 0.75, "learning_rate": 5.0816120179113103e-05, "loss": 3.1371, "step": 5265 }, { "epoch": 0.75, "learning_rate": 5.078723096923299e-05, "loss": 3.002, "step": 5266 }, { "epoch": 0.75, "learning_rate": 5.075834175935288e-05, "loss": 3.1604, "step": 5267 }, { "epoch": 0.75, "learning_rate": 5.072945254947278e-05, "loss": 3.1258, "step": 5268 }, { "epoch": 0.75, "learning_rate": 5.0700563339592665e-05, "loss": 3.0734, "step": 5269 }, { "epoch": 0.75, "learning_rate": 5.067167412971255e-05, "loss": 3.2201, "step": 5270 }, { "epoch": 0.75, "learning_rate": 5.064278491983244e-05, "loss": 3.1679, "step": 5271 }, { "epoch": 0.75, "learning_rate": 5.061389570995233e-05, "loss": 3.1449, "step": 5272 }, { "epoch": 0.75, "learning_rate": 5.058500650007223e-05, "loss": 3.2038, "step": 5273 }, { "epoch": 0.75, "learning_rate": 5.055611729019212e-05, "loss": 3.28, "step": 5274 }, { "epoch": 0.75, "learning_rate": 5.052722808031201e-05, "loss": 3.1376, "step": 5275 }, { "epoch": 0.75, "learning_rate": 5.0498338870431895e-05, "loss": 3.0618, "step": 5276 }, { "epoch": 0.75, "learning_rate": 5.046944966055178e-05, "loss": 3.1943, "step": 5277 }, { "epoch": 0.75, "learning_rate": 5.044056045067168e-05, "loss": 3.162, "step": 5278 }, { "epoch": 0.75, "learning_rate": 5.041167124079157e-05, "loss": 3.1661, "step": 5279 }, { "epoch": 0.75, "learning_rate": 5.038278203091146e-05, "loss": 3.2009, "step": 5280 }, { "epoch": 0.75, "eval_loss": 3.3782970905303955, "eval_runtime": 472.5543, "eval_samples_per_second": 43.354, "eval_steps_per_second": 14.451, "step": 5280 }, { "epoch": 0.75, "learning_rate": 5.0353892821031344e-05, "loss": 2.8974, "step": 5281 }, { "epoch": 0.75, "learning_rate": 5.032500361115123e-05, "loss": 3.1747, "step": 5282 }, { "epoch": 0.75, "learning_rate": 5.029611440127113e-05, "loss": 3.2308, "step": 5283 }, { "epoch": 0.75, "learning_rate": 5.026722519139102e-05, "loss": 3.224, "step": 5284 }, { "epoch": 0.75, "learning_rate": 5.023833598151091e-05, "loss": 3.0434, "step": 5285 }, { "epoch": 0.75, "learning_rate": 5.02094467716308e-05, "loss": 3.265, "step": 5286 }, { "epoch": 0.75, "learning_rate": 5.0180557561750686e-05, "loss": 3.176, "step": 5287 }, { "epoch": 0.75, "learning_rate": 5.015166835187057e-05, "loss": 3.1993, "step": 5288 }, { "epoch": 0.75, "learning_rate": 5.0122779141990474e-05, "loss": 3.1893, "step": 5289 }, { "epoch": 0.75, "learning_rate": 5.009388993211036e-05, "loss": 3.1407, "step": 5290 }, { "epoch": 0.75, "learning_rate": 5.006500072223025e-05, "loss": 3.1743, "step": 5291 }, { "epoch": 0.75, "learning_rate": 5.0036111512350135e-05, "loss": 3.0759, "step": 5292 }, { "epoch": 0.75, "learning_rate": 5.000722230247002e-05, "loss": 3.1738, "step": 5293 }, { "epoch": 0.75, "learning_rate": 4.9978333092589916e-05, "loss": 3.016, "step": 5294 }, { "epoch": 0.75, "learning_rate": 4.994944388270981e-05, "loss": 3.1617, "step": 5295 }, { "epoch": 0.75, "learning_rate": 4.99205546728297e-05, "loss": 3.0395, "step": 5296 }, { "epoch": 0.75, "learning_rate": 4.989166546294959e-05, "loss": 3.0232, "step": 5297 }, { "epoch": 0.75, "learning_rate": 4.9862776253069485e-05, "loss": 3.1016, "step": 5298 }, { "epoch": 0.75, "learning_rate": 4.983388704318937e-05, "loss": 3.1152, "step": 5299 }, { "epoch": 0.75, "learning_rate": 4.9804997833309265e-05, "loss": 3.0738, "step": 5300 }, { "epoch": 0.75, "learning_rate": 4.977610862342915e-05, "loss": 3.1971, "step": 5301 }, { "epoch": 0.75, "learning_rate": 4.974721941354904e-05, "loss": 3.1543, "step": 5302 }, { "epoch": 0.76, "learning_rate": 4.971833020366893e-05, "loss": 3.2111, "step": 5303 }, { "epoch": 0.76, "learning_rate": 4.968944099378882e-05, "loss": 3.056, "step": 5304 }, { "epoch": 0.76, "learning_rate": 4.966055178390871e-05, "loss": 3.0425, "step": 5305 }, { "epoch": 0.76, "learning_rate": 4.96316625740286e-05, "loss": 3.1703, "step": 5306 }, { "epoch": 0.76, "learning_rate": 4.960277336414849e-05, "loss": 3.1008, "step": 5307 }, { "epoch": 0.76, "learning_rate": 4.957388415426838e-05, "loss": 3.2465, "step": 5308 }, { "epoch": 0.76, "learning_rate": 4.9544994944388276e-05, "loss": 3.1674, "step": 5309 }, { "epoch": 0.76, "learning_rate": 4.951610573450816e-05, "loss": 3.0894, "step": 5310 }, { "epoch": 0.76, "learning_rate": 4.948721652462806e-05, "loss": 3.2196, "step": 5311 }, { "epoch": 0.76, "learning_rate": 4.9458327314747944e-05, "loss": 3.2325, "step": 5312 }, { "epoch": 0.76, "learning_rate": 4.942943810486784e-05, "loss": 3.2285, "step": 5313 }, { "epoch": 0.76, "learning_rate": 4.9400548894987725e-05, "loss": 3.1028, "step": 5314 }, { "epoch": 0.76, "learning_rate": 4.937165968510761e-05, "loss": 3.1758, "step": 5315 }, { "epoch": 0.76, "learning_rate": 4.9342770475227506e-05, "loss": 3.1665, "step": 5316 }, { "epoch": 0.76, "learning_rate": 4.931388126534739e-05, "loss": 3.0878, "step": 5317 }, { "epoch": 0.76, "learning_rate": 4.928499205546728e-05, "loss": 3.2068, "step": 5318 }, { "epoch": 0.76, "learning_rate": 4.9256102845587174e-05, "loss": 3.0782, "step": 5319 }, { "epoch": 0.76, "learning_rate": 4.922721363570707e-05, "loss": 3.2516, "step": 5320 }, { "epoch": 0.76, "learning_rate": 4.919832442582696e-05, "loss": 3.1966, "step": 5321 }, { "epoch": 0.76, "learning_rate": 4.916943521594685e-05, "loss": 3.1208, "step": 5322 }, { "epoch": 0.76, "learning_rate": 4.9140546006066735e-05, "loss": 2.9893, "step": 5323 }, { "epoch": 0.76, "learning_rate": 4.911165679618663e-05, "loss": 2.9886, "step": 5324 }, { "epoch": 0.76, "learning_rate": 4.9082767586306516e-05, "loss": 3.2782, "step": 5325 }, { "epoch": 0.76, "learning_rate": 4.905387837642641e-05, "loss": 2.8643, "step": 5326 }, { "epoch": 0.76, "learning_rate": 4.90249891665463e-05, "loss": 3.2197, "step": 5327 }, { "epoch": 0.76, "learning_rate": 4.8996099956666184e-05, "loss": 3.1602, "step": 5328 }, { "epoch": 0.76, "learning_rate": 4.896721074678608e-05, "loss": 3.2031, "step": 5329 }, { "epoch": 0.76, "learning_rate": 4.8938321536905965e-05, "loss": 3.152, "step": 5330 }, { "epoch": 0.76, "learning_rate": 4.890943232702586e-05, "loss": 3.1258, "step": 5331 }, { "epoch": 0.76, "learning_rate": 4.888054311714575e-05, "loss": 3.1495, "step": 5332 }, { "epoch": 0.76, "learning_rate": 4.885165390726564e-05, "loss": 3.0992, "step": 5333 }, { "epoch": 0.76, "learning_rate": 4.8822764697385533e-05, "loss": 3.0927, "step": 5334 }, { "epoch": 0.76, "learning_rate": 4.879387548750542e-05, "loss": 3.1619, "step": 5335 }, { "epoch": 0.76, "learning_rate": 4.876498627762531e-05, "loss": 3.1023, "step": 5336 }, { "epoch": 0.76, "learning_rate": 4.87360970677452e-05, "loss": 3.1128, "step": 5337 }, { "epoch": 0.76, "learning_rate": 4.870720785786509e-05, "loss": 3.104, "step": 5338 }, { "epoch": 0.76, "learning_rate": 4.8678318647984976e-05, "loss": 2.8925, "step": 5339 }, { "epoch": 0.76, "learning_rate": 4.864942943810487e-05, "loss": 3.1436, "step": 5340 }, { "epoch": 0.76, "learning_rate": 4.8620540228224756e-05, "loss": 3.2427, "step": 5341 }, { "epoch": 0.76, "learning_rate": 4.859165101834465e-05, "loss": 3.2214, "step": 5342 }, { "epoch": 0.76, "learning_rate": 4.8562761808464544e-05, "loss": 3.1747, "step": 5343 }, { "epoch": 0.76, "learning_rate": 4.853387259858443e-05, "loss": 3.2018, "step": 5344 }, { "epoch": 0.76, "learning_rate": 4.8504983388704325e-05, "loss": 3.1354, "step": 5345 }, { "epoch": 0.76, "learning_rate": 4.847609417882421e-05, "loss": 3.0884, "step": 5346 }, { "epoch": 0.76, "learning_rate": 4.8447204968944106e-05, "loss": 3.1204, "step": 5347 }, { "epoch": 0.76, "learning_rate": 4.841831575906399e-05, "loss": 3.0706, "step": 5348 }, { "epoch": 0.76, "learning_rate": 4.838942654918388e-05, "loss": 3.154, "step": 5349 }, { "epoch": 0.76, "learning_rate": 4.8360537339303774e-05, "loss": 3.1685, "step": 5350 }, { "epoch": 0.76, "learning_rate": 4.833164812942366e-05, "loss": 3.2801, "step": 5351 }, { "epoch": 0.76, "learning_rate": 4.830275891954355e-05, "loss": 3.0305, "step": 5352 }, { "epoch": 0.76, "learning_rate": 4.827386970966344e-05, "loss": 2.9677, "step": 5353 }, { "epoch": 0.76, "learning_rate": 4.8244980499783335e-05, "loss": 3.1084, "step": 5354 }, { "epoch": 0.76, "learning_rate": 4.821609128990322e-05, "loss": 2.9526, "step": 5355 }, { "epoch": 0.76, "learning_rate": 4.8187202080023116e-05, "loss": 3.0777, "step": 5356 }, { "epoch": 0.76, "learning_rate": 4.8158312870143e-05, "loss": 3.1239, "step": 5357 }, { "epoch": 0.76, "learning_rate": 4.81294236602629e-05, "loss": 3.1601, "step": 5358 }, { "epoch": 0.76, "learning_rate": 4.8100534450382784e-05, "loss": 3.2667, "step": 5359 }, { "epoch": 0.76, "learning_rate": 4.807164524050267e-05, "loss": 3.0284, "step": 5360 }, { "epoch": 0.76, "learning_rate": 4.8042756030622565e-05, "loss": 3.1923, "step": 5361 }, { "epoch": 0.76, "learning_rate": 4.801386682074245e-05, "loss": 3.1982, "step": 5362 }, { "epoch": 0.76, "learning_rate": 4.7984977610862346e-05, "loss": 3.2425, "step": 5363 }, { "epoch": 0.76, "learning_rate": 4.795608840098223e-05, "loss": 3.1461, "step": 5364 }, { "epoch": 0.76, "learning_rate": 4.792719919110212e-05, "loss": 3.1147, "step": 5365 }, { "epoch": 0.76, "learning_rate": 4.7898309981222014e-05, "loss": 3.2444, "step": 5366 }, { "epoch": 0.76, "learning_rate": 4.786942077134191e-05, "loss": 3.2765, "step": 5367 }, { "epoch": 0.76, "learning_rate": 4.78405315614618e-05, "loss": 3.2053, "step": 5368 }, { "epoch": 0.76, "learning_rate": 4.781164235158169e-05, "loss": 3.1532, "step": 5369 }, { "epoch": 0.76, "learning_rate": 4.7782753141701576e-05, "loss": 3.1784, "step": 5370 }, { "epoch": 0.76, "learning_rate": 4.775386393182147e-05, "loss": 3.062, "step": 5371 }, { "epoch": 0.76, "learning_rate": 4.7724974721941357e-05, "loss": 3.0357, "step": 5372 }, { "epoch": 0.77, "learning_rate": 4.7696085512061244e-05, "loss": 3.2979, "step": 5373 }, { "epoch": 0.77, "learning_rate": 4.766719630218114e-05, "loss": 3.0243, "step": 5374 }, { "epoch": 0.77, "learning_rate": 4.7638307092301024e-05, "loss": 3.2944, "step": 5375 }, { "epoch": 0.77, "learning_rate": 4.760941788242092e-05, "loss": 3.2016, "step": 5376 }, { "epoch": 0.77, "learning_rate": 4.7580528672540805e-05, "loss": 3.1149, "step": 5377 }, { "epoch": 0.77, "learning_rate": 4.75516394626607e-05, "loss": 3.2387, "step": 5378 }, { "epoch": 0.77, "learning_rate": 4.752275025278059e-05, "loss": 3.1331, "step": 5379 }, { "epoch": 0.77, "learning_rate": 4.749386104290048e-05, "loss": 3.1386, "step": 5380 }, { "epoch": 0.77, "learning_rate": 4.7464971833020374e-05, "loss": 3.1489, "step": 5381 }, { "epoch": 0.77, "learning_rate": 4.743608262314026e-05, "loss": 2.882, "step": 5382 }, { "epoch": 0.77, "learning_rate": 4.740719341326015e-05, "loss": 3.078, "step": 5383 }, { "epoch": 0.77, "learning_rate": 4.737830420338004e-05, "loss": 3.0792, "step": 5384 }, { "epoch": 0.77, "learning_rate": 4.734941499349993e-05, "loss": 3.0568, "step": 5385 }, { "epoch": 0.77, "learning_rate": 4.7320525783619816e-05, "loss": 3.1153, "step": 5386 }, { "epoch": 0.77, "learning_rate": 4.729163657373971e-05, "loss": 3.2472, "step": 5387 }, { "epoch": 0.77, "learning_rate": 4.72627473638596e-05, "loss": 3.1513, "step": 5388 }, { "epoch": 0.77, "learning_rate": 4.723385815397949e-05, "loss": 3.273, "step": 5389 }, { "epoch": 0.77, "learning_rate": 4.7204968944099384e-05, "loss": 3.1339, "step": 5390 }, { "epoch": 0.77, "learning_rate": 4.717607973421927e-05, "loss": 3.2117, "step": 5391 }, { "epoch": 0.77, "learning_rate": 4.7147190524339165e-05, "loss": 3.0859, "step": 5392 }, { "epoch": 0.77, "learning_rate": 4.711830131445905e-05, "loss": 3.0746, "step": 5393 }, { "epoch": 0.77, "learning_rate": 4.708941210457894e-05, "loss": 3.1658, "step": 5394 }, { "epoch": 0.77, "learning_rate": 4.706052289469883e-05, "loss": 3.1445, "step": 5395 }, { "epoch": 0.77, "learning_rate": 4.703163368481872e-05, "loss": 3.0719, "step": 5396 }, { "epoch": 0.77, "learning_rate": 4.7002744474938614e-05, "loss": 3.182, "step": 5397 }, { "epoch": 0.77, "learning_rate": 4.69738552650585e-05, "loss": 3.207, "step": 5398 }, { "epoch": 0.77, "learning_rate": 4.694496605517839e-05, "loss": 3.1019, "step": 5399 }, { "epoch": 0.77, "learning_rate": 4.691607684529828e-05, "loss": 3.1441, "step": 5400 }, { "epoch": 0.77, "learning_rate": 4.6887187635418176e-05, "loss": 3.1312, "step": 5401 }, { "epoch": 0.77, "learning_rate": 4.685829842553807e-05, "loss": 3.0884, "step": 5402 }, { "epoch": 0.77, "learning_rate": 4.682940921565796e-05, "loss": 3.0809, "step": 5403 }, { "epoch": 0.77, "learning_rate": 4.6800520005777844e-05, "loss": 2.9426, "step": 5404 }, { "epoch": 0.77, "learning_rate": 4.677163079589774e-05, "loss": 3.1012, "step": 5405 }, { "epoch": 0.77, "learning_rate": 4.6742741586017625e-05, "loss": 3.2305, "step": 5406 }, { "epoch": 0.77, "learning_rate": 4.671385237613751e-05, "loss": 3.169, "step": 5407 }, { "epoch": 0.77, "learning_rate": 4.6684963166257406e-05, "loss": 3.237, "step": 5408 }, { "epoch": 0.77, "learning_rate": 4.665607395637729e-05, "loss": 3.2113, "step": 5409 }, { "epoch": 0.77, "learning_rate": 4.6627184746497186e-05, "loss": 3.3055, "step": 5410 }, { "epoch": 0.77, "learning_rate": 4.6598295536617073e-05, "loss": 2.98, "step": 5411 }, { "epoch": 0.77, "learning_rate": 4.656940632673697e-05, "loss": 3.0582, "step": 5412 }, { "epoch": 0.77, "learning_rate": 4.6540517116856854e-05, "loss": 3.2416, "step": 5413 }, { "epoch": 0.77, "learning_rate": 4.651162790697675e-05, "loss": 3.1169, "step": 5414 }, { "epoch": 0.77, "learning_rate": 4.6482738697096635e-05, "loss": 3.0852, "step": 5415 }, { "epoch": 0.77, "learning_rate": 4.645384948721653e-05, "loss": 3.0634, "step": 5416 }, { "epoch": 0.77, "learning_rate": 4.6424960277336416e-05, "loss": 3.0914, "step": 5417 }, { "epoch": 0.77, "learning_rate": 4.639607106745631e-05, "loss": 3.1037, "step": 5418 }, { "epoch": 0.77, "learning_rate": 4.63671818575762e-05, "loss": 3.0749, "step": 5419 }, { "epoch": 0.77, "learning_rate": 4.6338292647696084e-05, "loss": 2.9571, "step": 5420 }, { "epoch": 0.77, "learning_rate": 4.630940343781598e-05, "loss": 3.0772, "step": 5421 }, { "epoch": 0.77, "learning_rate": 4.6280514227935865e-05, "loss": 3.1799, "step": 5422 }, { "epoch": 0.77, "learning_rate": 4.625162501805576e-05, "loss": 3.3737, "step": 5423 }, { "epoch": 0.77, "learning_rate": 4.6222735808175646e-05, "loss": 2.9617, "step": 5424 }, { "epoch": 0.77, "learning_rate": 4.619384659829554e-05, "loss": 3.2483, "step": 5425 }, { "epoch": 0.77, "learning_rate": 4.6164957388415433e-05, "loss": 3.2585, "step": 5426 }, { "epoch": 0.77, "learning_rate": 4.613606817853532e-05, "loss": 3.2683, "step": 5427 }, { "epoch": 0.77, "learning_rate": 4.610717896865521e-05, "loss": 3.1022, "step": 5428 }, { "epoch": 0.77, "learning_rate": 4.60782897587751e-05, "loss": 3.2188, "step": 5429 }, { "epoch": 0.77, "learning_rate": 4.604940054889499e-05, "loss": 3.1245, "step": 5430 }, { "epoch": 0.77, "learning_rate": 4.602051133901488e-05, "loss": 3.2285, "step": 5431 }, { "epoch": 0.77, "learning_rate": 4.599162212913477e-05, "loss": 3.1879, "step": 5432 }, { "epoch": 0.77, "learning_rate": 4.5962732919254656e-05, "loss": 3.0554, "step": 5433 }, { "epoch": 0.77, "learning_rate": 4.593384370937455e-05, "loss": 3.1055, "step": 5434 }, { "epoch": 0.77, "learning_rate": 4.590495449949444e-05, "loss": 3.2009, "step": 5435 }, { "epoch": 0.77, "learning_rate": 4.587606528961433e-05, "loss": 3.1747, "step": 5436 }, { "epoch": 0.77, "learning_rate": 4.5847176079734225e-05, "loss": 3.0973, "step": 5437 }, { "epoch": 0.77, "learning_rate": 4.581828686985411e-05, "loss": 3.0826, "step": 5438 }, { "epoch": 0.77, "learning_rate": 4.5789397659974006e-05, "loss": 3.0979, "step": 5439 }, { "epoch": 0.77, "learning_rate": 4.576050845009389e-05, "loss": 3.0841, "step": 5440 }, { "epoch": 0.77, "learning_rate": 4.573161924021378e-05, "loss": 2.9547, "step": 5441 }, { "epoch": 0.77, "learning_rate": 4.5702730030333674e-05, "loss": 3.1232, "step": 5442 }, { "epoch": 0.77, "learning_rate": 4.567384082045356e-05, "loss": 3.2677, "step": 5443 }, { "epoch": 0.78, "learning_rate": 4.5644951610573454e-05, "loss": 3.2299, "step": 5444 }, { "epoch": 0.78, "learning_rate": 4.561606240069334e-05, "loss": 3.1185, "step": 5445 }, { "epoch": 0.78, "learning_rate": 4.558717319081323e-05, "loss": 3.0334, "step": 5446 }, { "epoch": 0.78, "learning_rate": 4.555828398093312e-05, "loss": 3.1425, "step": 5447 }, { "epoch": 0.78, "learning_rate": 4.5529394771053016e-05, "loss": 3.058, "step": 5448 }, { "epoch": 0.78, "learning_rate": 4.55005055611729e-05, "loss": 3.0042, "step": 5449 }, { "epoch": 0.78, "learning_rate": 4.54716163512928e-05, "loss": 3.1667, "step": 5450 }, { "epoch": 0.78, "learning_rate": 4.5442727141412684e-05, "loss": 3.1671, "step": 5451 }, { "epoch": 0.78, "learning_rate": 4.541383793153258e-05, "loss": 3.0781, "step": 5452 }, { "epoch": 0.78, "learning_rate": 4.5384948721652465e-05, "loss": 3.0669, "step": 5453 }, { "epoch": 0.78, "learning_rate": 4.535605951177235e-05, "loss": 3.0942, "step": 5454 }, { "epoch": 0.78, "learning_rate": 4.5327170301892246e-05, "loss": 3.0422, "step": 5455 }, { "epoch": 0.78, "learning_rate": 4.529828109201213e-05, "loss": 3.1276, "step": 5456 }, { "epoch": 0.78, "learning_rate": 4.526939188213203e-05, "loss": 2.9173, "step": 5457 }, { "epoch": 0.78, "learning_rate": 4.5240502672251914e-05, "loss": 3.0792, "step": 5458 }, { "epoch": 0.78, "learning_rate": 4.521161346237181e-05, "loss": 3.1322, "step": 5459 }, { "epoch": 0.78, "learning_rate": 4.51827242524917e-05, "loss": 3.112, "step": 5460 }, { "epoch": 0.78, "learning_rate": 4.515383504261159e-05, "loss": 3.1221, "step": 5461 }, { "epoch": 0.78, "learning_rate": 4.5124945832731476e-05, "loss": 3.1167, "step": 5462 }, { "epoch": 0.78, "learning_rate": 4.509605662285137e-05, "loss": 3.2434, "step": 5463 }, { "epoch": 0.78, "learning_rate": 4.5067167412971256e-05, "loss": 3.0045, "step": 5464 }, { "epoch": 0.78, "learning_rate": 4.503827820309115e-05, "loss": 3.0463, "step": 5465 }, { "epoch": 0.78, "learning_rate": 4.500938899321104e-05, "loss": 2.8559, "step": 5466 }, { "epoch": 0.78, "learning_rate": 4.4980499783330924e-05, "loss": 3.0624, "step": 5467 }, { "epoch": 0.78, "learning_rate": 4.495161057345082e-05, "loss": 3.1129, "step": 5468 }, { "epoch": 0.78, "learning_rate": 4.4922721363570705e-05, "loss": 3.1786, "step": 5469 }, { "epoch": 0.78, "learning_rate": 4.48938321536906e-05, "loss": 2.9897, "step": 5470 }, { "epoch": 0.78, "learning_rate": 4.486494294381049e-05, "loss": 3.013, "step": 5471 }, { "epoch": 0.78, "learning_rate": 4.483605373393038e-05, "loss": 3.2182, "step": 5472 }, { "epoch": 0.78, "learning_rate": 4.4807164524050274e-05, "loss": 3.0045, "step": 5473 }, { "epoch": 0.78, "learning_rate": 4.477827531417016e-05, "loss": 2.9859, "step": 5474 }, { "epoch": 0.78, "learning_rate": 4.474938610429005e-05, "loss": 3.1631, "step": 5475 }, { "epoch": 0.78, "learning_rate": 4.472049689440994e-05, "loss": 3.3028, "step": 5476 }, { "epoch": 0.78, "learning_rate": 4.469160768452983e-05, "loss": 3.1968, "step": 5477 }, { "epoch": 0.78, "learning_rate": 4.466271847464972e-05, "loss": 3.1583, "step": 5478 }, { "epoch": 0.78, "learning_rate": 4.463382926476961e-05, "loss": 3.1995, "step": 5479 }, { "epoch": 0.78, "learning_rate": 4.46049400548895e-05, "loss": 3.1569, "step": 5480 }, { "epoch": 0.78, "learning_rate": 4.457605084500939e-05, "loss": 3.0926, "step": 5481 }, { "epoch": 0.78, "learning_rate": 4.454716163512928e-05, "loss": 3.1807, "step": 5482 }, { "epoch": 0.78, "learning_rate": 4.451827242524917e-05, "loss": 3.1208, "step": 5483 }, { "epoch": 0.78, "learning_rate": 4.4489383215369065e-05, "loss": 3.1641, "step": 5484 }, { "epoch": 0.78, "learning_rate": 4.446049400548895e-05, "loss": 3.1255, "step": 5485 }, { "epoch": 0.78, "learning_rate": 4.4431604795608846e-05, "loss": 3.2746, "step": 5486 }, { "epoch": 0.78, "learning_rate": 4.440271558572873e-05, "loss": 3.1542, "step": 5487 }, { "epoch": 0.78, "learning_rate": 4.437382637584862e-05, "loss": 2.9932, "step": 5488 }, { "epoch": 0.78, "learning_rate": 4.4344937165968514e-05, "loss": 3.2184, "step": 5489 }, { "epoch": 0.78, "learning_rate": 4.43160479560884e-05, "loss": 3.1889, "step": 5490 }, { "epoch": 0.78, "learning_rate": 4.4287158746208295e-05, "loss": 3.1113, "step": 5491 }, { "epoch": 0.78, "learning_rate": 4.425826953632818e-05, "loss": 3.135, "step": 5492 }, { "epoch": 0.78, "learning_rate": 4.422938032644807e-05, "loss": 3.2097, "step": 5493 }, { "epoch": 0.78, "learning_rate": 4.420049111656796e-05, "loss": 3.2467, "step": 5494 }, { "epoch": 0.78, "learning_rate": 4.417160190668786e-05, "loss": 3.2019, "step": 5495 }, { "epoch": 0.78, "learning_rate": 4.4142712696807744e-05, "loss": 3.1264, "step": 5496 }, { "epoch": 0.78, "learning_rate": 4.411382348692764e-05, "loss": 3.0359, "step": 5497 }, { "epoch": 0.78, "learning_rate": 4.4084934277047525e-05, "loss": 3.1811, "step": 5498 }, { "epoch": 0.78, "learning_rate": 4.405604506716742e-05, "loss": 3.1627, "step": 5499 }, { "epoch": 0.78, "learning_rate": 4.4027155857287305e-05, "loss": 3.0806, "step": 5500 }, { "epoch": 0.78, "learning_rate": 4.399826664740719e-05, "loss": 3.117, "step": 5501 }, { "epoch": 0.78, "learning_rate": 4.3969377437527086e-05, "loss": 3.1178, "step": 5502 }, { "epoch": 0.78, "learning_rate": 4.394048822764697e-05, "loss": 3.1311, "step": 5503 }, { "epoch": 0.78, "learning_rate": 4.391159901776686e-05, "loss": 3.1437, "step": 5504 }, { "epoch": 0.78, "learning_rate": 4.3882709807886754e-05, "loss": 3.1687, "step": 5505 }, { "epoch": 0.78, "learning_rate": 4.385382059800665e-05, "loss": 3.1019, "step": 5506 }, { "epoch": 0.78, "learning_rate": 4.382493138812654e-05, "loss": 3.0737, "step": 5507 }, { "epoch": 0.78, "learning_rate": 4.379604217824643e-05, "loss": 3.1824, "step": 5508 }, { "epoch": 0.78, "learning_rate": 4.3767152968366316e-05, "loss": 3.1009, "step": 5509 }, { "epoch": 0.78, "learning_rate": 4.373826375848621e-05, "loss": 3.096, "step": 5510 }, { "epoch": 0.78, "learning_rate": 4.37093745486061e-05, "loss": 3.1401, "step": 5511 }, { "epoch": 0.78, "learning_rate": 4.368048533872599e-05, "loss": 3.092, "step": 5512 }, { "epoch": 0.78, "learning_rate": 4.365159612884588e-05, "loss": 3.1106, "step": 5513 }, { "epoch": 0.79, "learning_rate": 4.3622706918965765e-05, "loss": 3.0776, "step": 5514 }, { "epoch": 0.79, "learning_rate": 4.359381770908566e-05, "loss": 2.9404, "step": 5515 }, { "epoch": 0.79, "learning_rate": 4.3564928499205546e-05, "loss": 3.2286, "step": 5516 }, { "epoch": 0.79, "learning_rate": 4.353603928932544e-05, "loss": 3.0828, "step": 5517 }, { "epoch": 0.79, "learning_rate": 4.350715007944533e-05, "loss": 2.9596, "step": 5518 }, { "epoch": 0.79, "learning_rate": 4.347826086956522e-05, "loss": 3.0728, "step": 5519 }, { "epoch": 0.79, "learning_rate": 4.3449371659685114e-05, "loss": 3.1399, "step": 5520 }, { "epoch": 0.79, "learning_rate": 4.3420482449805e-05, "loss": 3.0716, "step": 5521 }, { "epoch": 0.79, "learning_rate": 4.339159323992489e-05, "loss": 3.1167, "step": 5522 }, { "epoch": 0.79, "learning_rate": 4.336270403004478e-05, "loss": 3.0735, "step": 5523 }, { "epoch": 0.79, "learning_rate": 4.333381482016467e-05, "loss": 3.0712, "step": 5524 }, { "epoch": 0.79, "learning_rate": 4.330492561028456e-05, "loss": 3.264, "step": 5525 }, { "epoch": 0.79, "learning_rate": 4.327603640040445e-05, "loss": 3.0417, "step": 5526 }, { "epoch": 0.79, "learning_rate": 4.324714719052434e-05, "loss": 3.154, "step": 5527 }, { "epoch": 0.79, "learning_rate": 4.321825798064423e-05, "loss": 3.085, "step": 5528 }, { "epoch": 0.79, "learning_rate": 4.3189368770764125e-05, "loss": 3.2064, "step": 5529 }, { "epoch": 0.79, "learning_rate": 4.316047956088401e-05, "loss": 3.1593, "step": 5530 }, { "epoch": 0.79, "learning_rate": 4.3131590351003906e-05, "loss": 3.197, "step": 5531 }, { "epoch": 0.79, "learning_rate": 4.310270114112379e-05, "loss": 2.9919, "step": 5532 }, { "epoch": 0.79, "learning_rate": 4.3073811931243686e-05, "loss": 3.201, "step": 5533 }, { "epoch": 0.79, "learning_rate": 4.3044922721363574e-05, "loss": 2.9411, "step": 5534 }, { "epoch": 0.79, "learning_rate": 4.301603351148346e-05, "loss": 3.051, "step": 5535 }, { "epoch": 0.79, "learning_rate": 4.2987144301603354e-05, "loss": 3.1746, "step": 5536 }, { "epoch": 0.79, "learning_rate": 4.295825509172324e-05, "loss": 3.1132, "step": 5537 }, { "epoch": 0.79, "learning_rate": 4.292936588184313e-05, "loss": 3.1978, "step": 5538 }, { "epoch": 0.79, "learning_rate": 4.290047667196302e-05, "loss": 2.9316, "step": 5539 }, { "epoch": 0.79, "learning_rate": 4.287158746208291e-05, "loss": 3.1084, "step": 5540 }, { "epoch": 0.79, "learning_rate": 4.28426982522028e-05, "loss": 3.1946, "step": 5541 }, { "epoch": 0.79, "learning_rate": 4.28138090423227e-05, "loss": 3.1314, "step": 5542 }, { "epoch": 0.79, "learning_rate": 4.2784919832442584e-05, "loss": 2.9638, "step": 5543 }, { "epoch": 0.79, "learning_rate": 4.275603062256248e-05, "loss": 3.1317, "step": 5544 }, { "epoch": 0.79, "learning_rate": 4.2727141412682365e-05, "loss": 3.0273, "step": 5545 }, { "epoch": 0.79, "learning_rate": 4.269825220280226e-05, "loss": 3.1777, "step": 5546 }, { "epoch": 0.79, "learning_rate": 4.2669362992922146e-05, "loss": 3.1611, "step": 5547 }, { "epoch": 0.79, "learning_rate": 4.264047378304203e-05, "loss": 3.0826, "step": 5548 }, { "epoch": 0.79, "learning_rate": 4.261158457316193e-05, "loss": 3.1074, "step": 5549 }, { "epoch": 0.79, "learning_rate": 4.2582695363281814e-05, "loss": 3.1507, "step": 5550 }, { "epoch": 0.79, "learning_rate": 4.25538061534017e-05, "loss": 2.9495, "step": 5551 }, { "epoch": 0.79, "learning_rate": 4.2524916943521595e-05, "loss": 3.155, "step": 5552 }, { "epoch": 0.79, "learning_rate": 4.249602773364149e-05, "loss": 3.105, "step": 5553 }, { "epoch": 0.79, "learning_rate": 4.246713852376138e-05, "loss": 3.1968, "step": 5554 }, { "epoch": 0.79, "learning_rate": 4.243824931388127e-05, "loss": 3.0571, "step": 5555 }, { "epoch": 0.79, "learning_rate": 4.2409360104001156e-05, "loss": 3.086, "step": 5556 }, { "epoch": 0.79, "learning_rate": 4.238047089412105e-05, "loss": 3.1495, "step": 5557 }, { "epoch": 0.79, "learning_rate": 4.235158168424094e-05, "loss": 3.0802, "step": 5558 }, { "epoch": 0.79, "learning_rate": 4.2322692474360824e-05, "loss": 3.2062, "step": 5559 }, { "epoch": 0.79, "learning_rate": 4.229380326448072e-05, "loss": 2.9959, "step": 5560 }, { "epoch": 0.79, "learning_rate": 4.2264914054600605e-05, "loss": 3.0546, "step": 5561 }, { "epoch": 0.79, "learning_rate": 4.22360248447205e-05, "loss": 3.0392, "step": 5562 }, { "epoch": 0.79, "learning_rate": 4.2207135634840386e-05, "loss": 3.0652, "step": 5563 }, { "epoch": 0.79, "learning_rate": 4.217824642496028e-05, "loss": 2.9144, "step": 5564 }, { "epoch": 0.79, "learning_rate": 4.2149357215080174e-05, "loss": 2.9547, "step": 5565 }, { "epoch": 0.79, "learning_rate": 4.212046800520006e-05, "loss": 3.2341, "step": 5566 }, { "epoch": 0.79, "learning_rate": 4.2091578795319955e-05, "loss": 3.0326, "step": 5567 }, { "epoch": 0.79, "learning_rate": 4.206268958543984e-05, "loss": 3.1595, "step": 5568 }, { "epoch": 0.79, "learning_rate": 4.203380037555973e-05, "loss": 3.2042, "step": 5569 }, { "epoch": 0.79, "learning_rate": 4.200491116567962e-05, "loss": 3.252, "step": 5570 }, { "epoch": 0.79, "learning_rate": 4.197602195579951e-05, "loss": 3.0884, "step": 5571 }, { "epoch": 0.79, "learning_rate": 4.1947132745919397e-05, "loss": 3.1607, "step": 5572 }, { "epoch": 0.79, "learning_rate": 4.191824353603929e-05, "loss": 3.0982, "step": 5573 }, { "epoch": 0.79, "learning_rate": 4.188935432615918e-05, "loss": 3.1095, "step": 5574 }, { "epoch": 0.79, "learning_rate": 4.186046511627907e-05, "loss": 3.0324, "step": 5575 }, { "epoch": 0.79, "learning_rate": 4.1831575906398965e-05, "loss": 3.0945, "step": 5576 }, { "epoch": 0.79, "learning_rate": 4.180268669651885e-05, "loss": 3.096, "step": 5577 }, { "epoch": 0.79, "learning_rate": 4.1773797486638746e-05, "loss": 3.102, "step": 5578 }, { "epoch": 0.79, "learning_rate": 4.174490827675863e-05, "loss": 3.1228, "step": 5579 }, { "epoch": 0.79, "learning_rate": 4.171601906687853e-05, "loss": 3.1557, "step": 5580 }, { "epoch": 0.79, "learning_rate": 4.1687129856998414e-05, "loss": 3.12, "step": 5581 }, { "epoch": 0.79, "learning_rate": 4.16582406471183e-05, "loss": 3.3135, "step": 5582 }, { "epoch": 0.79, "learning_rate": 4.1629351437238195e-05, "loss": 3.0905, "step": 5583 }, { "epoch": 0.8, "learning_rate": 4.160046222735808e-05, "loss": 3.2271, "step": 5584 }, { "epoch": 0.8, "learning_rate": 4.157157301747797e-05, "loss": 3.1322, "step": 5585 }, { "epoch": 0.8, "learning_rate": 4.154268380759786e-05, "loss": 3.0577, "step": 5586 }, { "epoch": 0.8, "learning_rate": 4.1513794597717757e-05, "loss": 2.9445, "step": 5587 }, { "epoch": 0.8, "learning_rate": 4.148490538783765e-05, "loss": 3.1296, "step": 5588 }, { "epoch": 0.8, "learning_rate": 4.145601617795754e-05, "loss": 3.1376, "step": 5589 }, { "epoch": 0.8, "learning_rate": 4.1427126968077424e-05, "loss": 3.1108, "step": 5590 }, { "epoch": 0.8, "learning_rate": 4.139823775819732e-05, "loss": 3.1172, "step": 5591 }, { "epoch": 0.8, "learning_rate": 4.1369348548317205e-05, "loss": 3.1765, "step": 5592 }, { "epoch": 0.8, "learning_rate": 4.134045933843709e-05, "loss": 3.1219, "step": 5593 }, { "epoch": 0.8, "learning_rate": 4.1311570128556986e-05, "loss": 3.1936, "step": 5594 }, { "epoch": 0.8, "learning_rate": 4.128268091867687e-05, "loss": 3.1505, "step": 5595 }, { "epoch": 0.8, "learning_rate": 4.125379170879677e-05, "loss": 3.1478, "step": 5596 }, { "epoch": 0.8, "learning_rate": 4.1224902498916654e-05, "loss": 3.1023, "step": 5597 }, { "epoch": 0.8, "learning_rate": 4.119601328903655e-05, "loss": 3.0069, "step": 5598 }, { "epoch": 0.8, "learning_rate": 4.1167124079156435e-05, "loss": 3.054, "step": 5599 }, { "epoch": 0.8, "learning_rate": 4.113823486927633e-05, "loss": 2.9706, "step": 5600 }, { "epoch": 0.8, "learning_rate": 4.110934565939622e-05, "loss": 3.152, "step": 5601 }, { "epoch": 0.8, "learning_rate": 4.108045644951611e-05, "loss": 3.076, "step": 5602 }, { "epoch": 0.8, "learning_rate": 4.1051567239636e-05, "loss": 3.2783, "step": 5603 }, { "epoch": 0.8, "learning_rate": 4.102267802975589e-05, "loss": 3.0151, "step": 5604 }, { "epoch": 0.8, "learning_rate": 4.099378881987578e-05, "loss": 3.1927, "step": 5605 }, { "epoch": 0.8, "learning_rate": 4.0964899609995665e-05, "loss": 3.0818, "step": 5606 }, { "epoch": 0.8, "learning_rate": 4.093601040011556e-05, "loss": 3.0521, "step": 5607 }, { "epoch": 0.8, "learning_rate": 4.0907121190235446e-05, "loss": 3.1366, "step": 5608 }, { "epoch": 0.8, "learning_rate": 4.087823198035534e-05, "loss": 3.1397, "step": 5609 }, { "epoch": 0.8, "learning_rate": 4.0849342770475226e-05, "loss": 3.1935, "step": 5610 }, { "epoch": 0.8, "learning_rate": 4.082045356059512e-05, "loss": 3.1474, "step": 5611 }, { "epoch": 0.8, "learning_rate": 4.0791564350715014e-05, "loss": 3.0972, "step": 5612 }, { "epoch": 0.8, "learning_rate": 4.07626751408349e-05, "loss": 3.0839, "step": 5613 }, { "epoch": 0.8, "learning_rate": 4.073378593095479e-05, "loss": 3.0802, "step": 5614 }, { "epoch": 0.8, "learning_rate": 4.070489672107468e-05, "loss": 3.189, "step": 5615 }, { "epoch": 0.8, "learning_rate": 4.067600751119457e-05, "loss": 3.0959, "step": 5616 }, { "epoch": 0.8, "learning_rate": 4.064711830131446e-05, "loss": 3.2138, "step": 5617 }, { "epoch": 0.8, "learning_rate": 4.061822909143435e-05, "loss": 2.9459, "step": 5618 }, { "epoch": 0.8, "learning_rate": 4.058933988155424e-05, "loss": 3.188, "step": 5619 }, { "epoch": 0.8, "learning_rate": 4.056045067167413e-05, "loss": 3.2137, "step": 5620 }, { "epoch": 0.8, "learning_rate": 4.053156146179402e-05, "loss": 3.0826, "step": 5621 }, { "epoch": 0.8, "learning_rate": 4.050267225191391e-05, "loss": 3.2074, "step": 5622 }, { "epoch": 0.8, "learning_rate": 4.0473783042033806e-05, "loss": 3.1752, "step": 5623 }, { "epoch": 0.8, "learning_rate": 4.044489383215369e-05, "loss": 3.0614, "step": 5624 }, { "epoch": 0.8, "learning_rate": 4.0416004622273586e-05, "loss": 3.1443, "step": 5625 }, { "epoch": 0.8, "learning_rate": 4.0387115412393473e-05, "loss": 3.1138, "step": 5626 }, { "epoch": 0.8, "learning_rate": 4.035822620251336e-05, "loss": 3.0843, "step": 5627 }, { "epoch": 0.8, "learning_rate": 4.0329336992633254e-05, "loss": 3.1314, "step": 5628 }, { "epoch": 0.8, "learning_rate": 4.030044778275314e-05, "loss": 3.0496, "step": 5629 }, { "epoch": 0.8, "learning_rate": 4.0271558572873035e-05, "loss": 3.2638, "step": 5630 }, { "epoch": 0.8, "learning_rate": 4.024266936299292e-05, "loss": 3.176, "step": 5631 }, { "epoch": 0.8, "learning_rate": 4.021378015311281e-05, "loss": 3.1037, "step": 5632 }, { "epoch": 0.8, "eval_loss": 3.35094952583313, "eval_runtime": 472.6533, "eval_samples_per_second": 43.345, "eval_steps_per_second": 14.448, "step": 5632 }, { "epoch": 0.8, "learning_rate": 4.01848909432327e-05, "loss": 3.1948, "step": 5633 }, { "epoch": 0.8, "learning_rate": 4.01560017333526e-05, "loss": 3.1198, "step": 5634 }, { "epoch": 0.8, "learning_rate": 4.012711252347249e-05, "loss": 3.2102, "step": 5635 }, { "epoch": 0.8, "learning_rate": 4.009822331359238e-05, "loss": 3.1267, "step": 5636 }, { "epoch": 0.8, "learning_rate": 4.0069334103712265e-05, "loss": 3.1205, "step": 5637 }, { "epoch": 0.8, "learning_rate": 4.004044489383216e-05, "loss": 3.1644, "step": 5638 }, { "epoch": 0.8, "learning_rate": 4.0011555683952046e-05, "loss": 3.211, "step": 5639 }, { "epoch": 0.8, "learning_rate": 3.998266647407193e-05, "loss": 3.1573, "step": 5640 }, { "epoch": 0.8, "learning_rate": 3.995377726419183e-05, "loss": 3.0154, "step": 5641 }, { "epoch": 0.8, "learning_rate": 3.9924888054311714e-05, "loss": 2.8478, "step": 5642 }, { "epoch": 0.8, "learning_rate": 3.989599884443161e-05, "loss": 3.2054, "step": 5643 }, { "epoch": 0.8, "learning_rate": 3.9867109634551495e-05, "loss": 3.1459, "step": 5644 }, { "epoch": 0.8, "learning_rate": 3.983822042467139e-05, "loss": 3.2107, "step": 5645 }, { "epoch": 0.8, "learning_rate": 3.980933121479128e-05, "loss": 3.0071, "step": 5646 }, { "epoch": 0.8, "learning_rate": 3.978044200491117e-05, "loss": 3.0957, "step": 5647 }, { "epoch": 0.8, "learning_rate": 3.9751552795031056e-05, "loss": 3.1346, "step": 5648 }, { "epoch": 0.8, "learning_rate": 3.972266358515095e-05, "loss": 3.0385, "step": 5649 }, { "epoch": 0.8, "learning_rate": 3.969377437527084e-05, "loss": 2.986, "step": 5650 }, { "epoch": 0.8, "learning_rate": 3.966488516539073e-05, "loss": 3.1062, "step": 5651 }, { "epoch": 0.8, "learning_rate": 3.963599595551062e-05, "loss": 3.1744, "step": 5652 }, { "epoch": 0.8, "learning_rate": 3.9607106745630505e-05, "loss": 3.0897, "step": 5653 }, { "epoch": 0.81, "learning_rate": 3.95782175357504e-05, "loss": 3.2293, "step": 5654 }, { "epoch": 0.81, "learning_rate": 3.9549328325870286e-05, "loss": 3.1602, "step": 5655 }, { "epoch": 0.81, "learning_rate": 3.952043911599018e-05, "loss": 3.1055, "step": 5656 }, { "epoch": 0.81, "learning_rate": 3.949154990611007e-05, "loss": 2.9947, "step": 5657 }, { "epoch": 0.81, "learning_rate": 3.946266069622996e-05, "loss": 3.0883, "step": 5658 }, { "epoch": 0.81, "learning_rate": 3.9433771486349854e-05, "loss": 3.0944, "step": 5659 }, { "epoch": 0.81, "learning_rate": 3.940488227646974e-05, "loss": 3.1408, "step": 5660 }, { "epoch": 0.81, "learning_rate": 3.937599306658963e-05, "loss": 3.1542, "step": 5661 }, { "epoch": 0.81, "learning_rate": 3.934710385670952e-05, "loss": 3.1758, "step": 5662 }, { "epoch": 0.81, "learning_rate": 3.931821464682941e-05, "loss": 3.0643, "step": 5663 }, { "epoch": 0.81, "learning_rate": 3.92893254369493e-05, "loss": 3.2559, "step": 5664 }, { "epoch": 0.81, "learning_rate": 3.926043622706919e-05, "loss": 3.1238, "step": 5665 }, { "epoch": 0.81, "learning_rate": 3.923154701718908e-05, "loss": 3.1534, "step": 5666 }, { "epoch": 0.81, "learning_rate": 3.920265780730897e-05, "loss": 3.1156, "step": 5667 }, { "epoch": 0.81, "learning_rate": 3.917376859742886e-05, "loss": 3.1027, "step": 5668 }, { "epoch": 0.81, "learning_rate": 3.914487938754875e-05, "loss": 3.1216, "step": 5669 }, { "epoch": 0.81, "learning_rate": 3.9115990177668646e-05, "loss": 3.0997, "step": 5670 }, { "epoch": 0.81, "learning_rate": 3.908710096778853e-05, "loss": 3.183, "step": 5671 }, { "epoch": 0.81, "learning_rate": 3.905821175790843e-05, "loss": 3.1182, "step": 5672 }, { "epoch": 0.81, "learning_rate": 3.9029322548028314e-05, "loss": 3.1436, "step": 5673 }, { "epoch": 0.81, "learning_rate": 3.90004333381482e-05, "loss": 3.1885, "step": 5674 }, { "epoch": 0.81, "learning_rate": 3.8971544128268095e-05, "loss": 3.1091, "step": 5675 }, { "epoch": 0.81, "learning_rate": 3.894265491838798e-05, "loss": 3.0086, "step": 5676 }, { "epoch": 0.81, "learning_rate": 3.8913765708507876e-05, "loss": 3.1322, "step": 5677 }, { "epoch": 0.81, "learning_rate": 3.888487649862776e-05, "loss": 3.086, "step": 5678 }, { "epoch": 0.81, "learning_rate": 3.885598728874765e-05, "loss": 3.1215, "step": 5679 }, { "epoch": 0.81, "learning_rate": 3.8827098078867544e-05, "loss": 3.0616, "step": 5680 }, { "epoch": 0.81, "learning_rate": 3.879820886898744e-05, "loss": 3.3159, "step": 5681 }, { "epoch": 0.81, "learning_rate": 3.8769319659107324e-05, "loss": 3.1251, "step": 5682 }, { "epoch": 0.81, "learning_rate": 3.874043044922722e-05, "loss": 3.1339, "step": 5683 }, { "epoch": 0.81, "learning_rate": 3.8711541239347105e-05, "loss": 3.2388, "step": 5684 }, { "epoch": 0.81, "learning_rate": 3.8682652029467e-05, "loss": 3.132, "step": 5685 }, { "epoch": 0.81, "learning_rate": 3.8653762819586886e-05, "loss": 3.1474, "step": 5686 }, { "epoch": 0.81, "learning_rate": 3.862487360970677e-05, "loss": 3.1231, "step": 5687 }, { "epoch": 0.81, "learning_rate": 3.859598439982667e-05, "loss": 3.1913, "step": 5688 }, { "epoch": 0.81, "learning_rate": 3.8567095189946554e-05, "loss": 3.2096, "step": 5689 }, { "epoch": 0.81, "learning_rate": 3.853820598006645e-05, "loss": 3.144, "step": 5690 }, { "epoch": 0.81, "learning_rate": 3.8509316770186335e-05, "loss": 3.079, "step": 5691 }, { "epoch": 0.81, "learning_rate": 3.848042756030623e-05, "loss": 3.1755, "step": 5692 }, { "epoch": 0.81, "learning_rate": 3.845153835042612e-05, "loss": 3.1755, "step": 5693 }, { "epoch": 0.81, "learning_rate": 3.842264914054601e-05, "loss": 3.0532, "step": 5694 }, { "epoch": 0.81, "learning_rate": 3.83937599306659e-05, "loss": 3.1325, "step": 5695 }, { "epoch": 0.81, "learning_rate": 3.836487072078579e-05, "loss": 3.0894, "step": 5696 }, { "epoch": 0.81, "learning_rate": 3.833598151090568e-05, "loss": 2.9988, "step": 5697 }, { "epoch": 0.81, "learning_rate": 3.830709230102557e-05, "loss": 2.995, "step": 5698 }, { "epoch": 0.81, "learning_rate": 3.827820309114546e-05, "loss": 3.0924, "step": 5699 }, { "epoch": 0.81, "learning_rate": 3.8249313881265345e-05, "loss": 3.1804, "step": 5700 }, { "epoch": 0.81, "learning_rate": 3.822042467138524e-05, "loss": 3.1738, "step": 5701 }, { "epoch": 0.81, "learning_rate": 3.8191535461505126e-05, "loss": 2.9256, "step": 5702 }, { "epoch": 0.81, "learning_rate": 3.816264625162502e-05, "loss": 3.1161, "step": 5703 }, { "epoch": 0.81, "learning_rate": 3.8133757041744914e-05, "loss": 3.2177, "step": 5704 }, { "epoch": 0.81, "learning_rate": 3.81048678318648e-05, "loss": 2.9096, "step": 5705 }, { "epoch": 0.81, "learning_rate": 3.8075978621984695e-05, "loss": 3.1869, "step": 5706 }, { "epoch": 0.81, "learning_rate": 3.804708941210458e-05, "loss": 2.9959, "step": 5707 }, { "epoch": 0.81, "learning_rate": 3.801820020222447e-05, "loss": 3.15, "step": 5708 }, { "epoch": 0.81, "learning_rate": 3.798931099234436e-05, "loss": 3.203, "step": 5709 }, { "epoch": 0.81, "learning_rate": 3.796042178246425e-05, "loss": 3.1453, "step": 5710 }, { "epoch": 0.81, "learning_rate": 3.7931532572584144e-05, "loss": 3.0416, "step": 5711 }, { "epoch": 0.81, "learning_rate": 3.790264336270403e-05, "loss": 3.1197, "step": 5712 }, { "epoch": 0.81, "learning_rate": 3.787375415282392e-05, "loss": 3.0855, "step": 5713 }, { "epoch": 0.81, "learning_rate": 3.784486494294381e-05, "loss": 3.2529, "step": 5714 }, { "epoch": 0.81, "learning_rate": 3.7815975733063705e-05, "loss": 3.1082, "step": 5715 }, { "epoch": 0.81, "learning_rate": 3.778708652318359e-05, "loss": 3.0494, "step": 5716 }, { "epoch": 0.81, "learning_rate": 3.7758197313303486e-05, "loss": 3.1625, "step": 5717 }, { "epoch": 0.81, "learning_rate": 3.772930810342337e-05, "loss": 3.1332, "step": 5718 }, { "epoch": 0.81, "learning_rate": 3.770041889354327e-05, "loss": 2.9911, "step": 5719 }, { "epoch": 0.81, "learning_rate": 3.7671529683663154e-05, "loss": 3.1648, "step": 5720 }, { "epoch": 0.81, "learning_rate": 3.764264047378304e-05, "loss": 3.1478, "step": 5721 }, { "epoch": 0.81, "learning_rate": 3.7613751263902935e-05, "loss": 3.2288, "step": 5722 }, { "epoch": 0.81, "learning_rate": 3.758486205402282e-05, "loss": 3.0045, "step": 5723 }, { "epoch": 0.81, "learning_rate": 3.7555972844142716e-05, "loss": 3.1102, "step": 5724 }, { "epoch": 0.82, "learning_rate": 3.75270836342626e-05, "loss": 3.0512, "step": 5725 }, { "epoch": 0.82, "learning_rate": 3.749819442438249e-05, "loss": 3.0181, "step": 5726 }, { "epoch": 0.82, "learning_rate": 3.7469305214502384e-05, "loss": 3.0271, "step": 5727 }, { "epoch": 0.82, "learning_rate": 3.744041600462228e-05, "loss": 3.019, "step": 5728 }, { "epoch": 0.82, "learning_rate": 3.7411526794742165e-05, "loss": 3.1775, "step": 5729 }, { "epoch": 0.82, "learning_rate": 3.738263758486206e-05, "loss": 3.1779, "step": 5730 }, { "epoch": 0.82, "learning_rate": 3.7353748374981946e-05, "loss": 2.8991, "step": 5731 }, { "epoch": 0.82, "learning_rate": 3.732485916510184e-05, "loss": 3.1068, "step": 5732 }, { "epoch": 0.82, "learning_rate": 3.7295969955221727e-05, "loss": 2.9594, "step": 5733 }, { "epoch": 0.82, "learning_rate": 3.7267080745341614e-05, "loss": 3.0418, "step": 5734 }, { "epoch": 0.82, "learning_rate": 3.723819153546151e-05, "loss": 3.0018, "step": 5735 }, { "epoch": 0.82, "learning_rate": 3.7209302325581394e-05, "loss": 3.1529, "step": 5736 }, { "epoch": 0.82, "learning_rate": 3.718041311570128e-05, "loss": 3.0914, "step": 5737 }, { "epoch": 0.82, "learning_rate": 3.7151523905821175e-05, "loss": 3.1752, "step": 5738 }, { "epoch": 0.82, "learning_rate": 3.712263469594107e-05, "loss": 3.0554, "step": 5739 }, { "epoch": 0.82, "learning_rate": 3.709374548606096e-05, "loss": 3.147, "step": 5740 }, { "epoch": 0.82, "learning_rate": 3.706485627618085e-05, "loss": 3.1708, "step": 5741 }, { "epoch": 0.82, "learning_rate": 3.703596706630074e-05, "loss": 3.1171, "step": 5742 }, { "epoch": 0.82, "learning_rate": 3.700707785642063e-05, "loss": 3.1213, "step": 5743 }, { "epoch": 0.82, "learning_rate": 3.697818864654052e-05, "loss": 3.188, "step": 5744 }, { "epoch": 0.82, "learning_rate": 3.694929943666041e-05, "loss": 2.901, "step": 5745 }, { "epoch": 0.82, "learning_rate": 3.69204102267803e-05, "loss": 3.0635, "step": 5746 }, { "epoch": 0.82, "learning_rate": 3.6891521016900186e-05, "loss": 3.1305, "step": 5747 }, { "epoch": 0.82, "learning_rate": 3.686263180702008e-05, "loss": 3.0389, "step": 5748 }, { "epoch": 0.82, "learning_rate": 3.683374259713997e-05, "loss": 3.125, "step": 5749 }, { "epoch": 0.82, "learning_rate": 3.680485338725986e-05, "loss": 3.0956, "step": 5750 }, { "epoch": 0.82, "learning_rate": 3.6775964177379754e-05, "loss": 3.1115, "step": 5751 }, { "epoch": 0.82, "learning_rate": 3.674707496749964e-05, "loss": 3.0745, "step": 5752 }, { "epoch": 0.82, "learning_rate": 3.6718185757619535e-05, "loss": 3.2283, "step": 5753 }, { "epoch": 0.82, "learning_rate": 3.668929654773942e-05, "loss": 3.2078, "step": 5754 }, { "epoch": 0.82, "learning_rate": 3.666040733785931e-05, "loss": 3.0762, "step": 5755 }, { "epoch": 0.82, "learning_rate": 3.66315181279792e-05, "loss": 3.127, "step": 5756 }, { "epoch": 0.82, "learning_rate": 3.660262891809909e-05, "loss": 3.0701, "step": 5757 }, { "epoch": 0.82, "learning_rate": 3.657373970821898e-05, "loss": 3.0519, "step": 5758 }, { "epoch": 0.82, "learning_rate": 3.654485049833887e-05, "loss": 3.0686, "step": 5759 }, { "epoch": 0.82, "learning_rate": 3.651596128845876e-05, "loss": 3.1107, "step": 5760 }, { "epoch": 0.82, "learning_rate": 3.648707207857865e-05, "loss": 3.004, "step": 5761 }, { "epoch": 0.82, "learning_rate": 3.6458182868698546e-05, "loss": 2.9062, "step": 5762 }, { "epoch": 0.82, "learning_rate": 3.642929365881843e-05, "loss": 2.9686, "step": 5763 }, { "epoch": 0.82, "learning_rate": 3.640040444893833e-05, "loss": 3.1951, "step": 5764 }, { "epoch": 0.82, "learning_rate": 3.6371515239058214e-05, "loss": 3.1462, "step": 5765 }, { "epoch": 0.82, "learning_rate": 3.634262602917811e-05, "loss": 3.2414, "step": 5766 }, { "epoch": 0.82, "learning_rate": 3.6313736819297995e-05, "loss": 3.0106, "step": 5767 }, { "epoch": 0.82, "learning_rate": 3.628484760941788e-05, "loss": 3.2559, "step": 5768 }, { "epoch": 0.82, "learning_rate": 3.6255958399537775e-05, "loss": 3.1398, "step": 5769 }, { "epoch": 0.82, "learning_rate": 3.622706918965766e-05, "loss": 3.1913, "step": 5770 }, { "epoch": 0.82, "learning_rate": 3.619817997977755e-05, "loss": 3.045, "step": 5771 }, { "epoch": 0.82, "learning_rate": 3.6169290769897443e-05, "loss": 3.1046, "step": 5772 }, { "epoch": 0.82, "learning_rate": 3.614040156001734e-05, "loss": 3.1989, "step": 5773 }, { "epoch": 0.82, "learning_rate": 3.6111512350137224e-05, "loss": 3.2277, "step": 5774 }, { "epoch": 0.82, "learning_rate": 3.608262314025712e-05, "loss": 3.0579, "step": 5775 }, { "epoch": 0.82, "learning_rate": 3.6053733930377005e-05, "loss": 3.0565, "step": 5776 }, { "epoch": 0.82, "learning_rate": 3.60248447204969e-05, "loss": 3.2018, "step": 5777 }, { "epoch": 0.82, "learning_rate": 3.5995955510616786e-05, "loss": 3.2889, "step": 5778 }, { "epoch": 0.82, "learning_rate": 3.596706630073668e-05, "loss": 3.0778, "step": 5779 }, { "epoch": 0.82, "learning_rate": 3.593817709085657e-05, "loss": 3.1019, "step": 5780 }, { "epoch": 0.82, "learning_rate": 3.5909287880976454e-05, "loss": 3.241, "step": 5781 }, { "epoch": 0.82, "learning_rate": 3.588039867109635e-05, "loss": 3.1829, "step": 5782 }, { "epoch": 0.82, "learning_rate": 3.5851509461216235e-05, "loss": 3.1955, "step": 5783 }, { "epoch": 0.82, "learning_rate": 3.582262025133613e-05, "loss": 3.2168, "step": 5784 }, { "epoch": 0.82, "learning_rate": 3.5793731041456016e-05, "loss": 3.1748, "step": 5785 }, { "epoch": 0.82, "learning_rate": 3.576484183157591e-05, "loss": 3.2433, "step": 5786 }, { "epoch": 0.82, "learning_rate": 3.57359526216958e-05, "loss": 3.0951, "step": 5787 }, { "epoch": 0.82, "learning_rate": 3.570706341181569e-05, "loss": 3.1137, "step": 5788 }, { "epoch": 0.82, "learning_rate": 3.567817420193558e-05, "loss": 3.1623, "step": 5789 }, { "epoch": 0.82, "learning_rate": 3.564928499205547e-05, "loss": 3.0718, "step": 5790 }, { "epoch": 0.82, "learning_rate": 3.562039578217536e-05, "loss": 3.1426, "step": 5791 }, { "epoch": 0.82, "learning_rate": 3.5591506572295245e-05, "loss": 3.1258, "step": 5792 }, { "epoch": 0.82, "learning_rate": 3.556261736241514e-05, "loss": 3.1174, "step": 5793 }, { "epoch": 0.82, "learning_rate": 3.5533728152535026e-05, "loss": 3.0803, "step": 5794 }, { "epoch": 0.83, "learning_rate": 3.550483894265492e-05, "loss": 3.174, "step": 5795 }, { "epoch": 0.83, "learning_rate": 3.547594973277481e-05, "loss": 3.209, "step": 5796 }, { "epoch": 0.83, "learning_rate": 3.54470605228947e-05, "loss": 3.1218, "step": 5797 }, { "epoch": 0.83, "learning_rate": 3.5418171313014595e-05, "loss": 3.1554, "step": 5798 }, { "epoch": 0.83, "learning_rate": 3.538928210313448e-05, "loss": 3.1106, "step": 5799 }, { "epoch": 0.83, "learning_rate": 3.5360392893254376e-05, "loss": 3.1266, "step": 5800 }, { "epoch": 0.83, "learning_rate": 3.533150368337426e-05, "loss": 3.1263, "step": 5801 }, { "epoch": 0.83, "learning_rate": 3.530261447349415e-05, "loss": 3.1582, "step": 5802 }, { "epoch": 0.83, "learning_rate": 3.5273725263614044e-05, "loss": 3.1887, "step": 5803 }, { "epoch": 0.83, "learning_rate": 3.524483605373393e-05, "loss": 3.0272, "step": 5804 }, { "epoch": 0.83, "learning_rate": 3.521594684385382e-05, "loss": 3.1066, "step": 5805 }, { "epoch": 0.83, "learning_rate": 3.518705763397371e-05, "loss": 3.1363, "step": 5806 }, { "epoch": 0.83, "learning_rate": 3.51581684240936e-05, "loss": 3.0005, "step": 5807 }, { "epoch": 0.83, "learning_rate": 3.512927921421349e-05, "loss": 3.1468, "step": 5808 }, { "epoch": 0.83, "learning_rate": 3.5100390004333386e-05, "loss": 3.137, "step": 5809 }, { "epoch": 0.83, "learning_rate": 3.507150079445327e-05, "loss": 2.8747, "step": 5810 }, { "epoch": 0.83, "learning_rate": 3.504261158457317e-05, "loss": 3.1055, "step": 5811 }, { "epoch": 0.83, "learning_rate": 3.5013722374693054e-05, "loss": 3.0413, "step": 5812 }, { "epoch": 0.83, "learning_rate": 3.498483316481295e-05, "loss": 3.0186, "step": 5813 }, { "epoch": 0.83, "learning_rate": 3.4955943954932835e-05, "loss": 3.0483, "step": 5814 }, { "epoch": 0.83, "learning_rate": 3.492705474505272e-05, "loss": 2.9472, "step": 5815 }, { "epoch": 0.83, "learning_rate": 3.4898165535172616e-05, "loss": 3.1869, "step": 5816 }, { "epoch": 0.83, "learning_rate": 3.48692763252925e-05, "loss": 3.091, "step": 5817 }, { "epoch": 0.83, "learning_rate": 3.484038711541239e-05, "loss": 3.0505, "step": 5818 }, { "epoch": 0.83, "learning_rate": 3.4811497905532284e-05, "loss": 3.0532, "step": 5819 }, { "epoch": 0.83, "learning_rate": 3.478260869565218e-05, "loss": 2.9293, "step": 5820 }, { "epoch": 0.83, "learning_rate": 3.475371948577207e-05, "loss": 3.1341, "step": 5821 }, { "epoch": 0.83, "learning_rate": 3.472483027589196e-05, "loss": 2.8325, "step": 5822 }, { "epoch": 0.83, "learning_rate": 3.4695941066011846e-05, "loss": 3.2622, "step": 5823 }, { "epoch": 0.83, "learning_rate": 3.466705185613174e-05, "loss": 3.1963, "step": 5824 }, { "epoch": 0.83, "learning_rate": 3.4638162646251626e-05, "loss": 3.0659, "step": 5825 }, { "epoch": 0.83, "learning_rate": 3.4609273436371513e-05, "loss": 3.1355, "step": 5826 }, { "epoch": 0.83, "learning_rate": 3.458038422649141e-05, "loss": 3.0923, "step": 5827 }, { "epoch": 0.83, "learning_rate": 3.4551495016611294e-05, "loss": 3.1681, "step": 5828 }, { "epoch": 0.83, "learning_rate": 3.452260580673119e-05, "loss": 3.0811, "step": 5829 }, { "epoch": 0.83, "learning_rate": 3.4493716596851075e-05, "loss": 3.1874, "step": 5830 }, { "epoch": 0.83, "learning_rate": 3.446482738697097e-05, "loss": 3.1023, "step": 5831 }, { "epoch": 0.83, "learning_rate": 3.443593817709086e-05, "loss": 3.1716, "step": 5832 }, { "epoch": 0.83, "learning_rate": 3.440704896721075e-05, "loss": 2.9498, "step": 5833 }, { "epoch": 0.83, "learning_rate": 3.4378159757330644e-05, "loss": 3.0727, "step": 5834 }, { "epoch": 0.83, "learning_rate": 3.434927054745053e-05, "loss": 3.0121, "step": 5835 }, { "epoch": 0.83, "learning_rate": 3.432038133757042e-05, "loss": 3.1766, "step": 5836 }, { "epoch": 0.83, "learning_rate": 3.429149212769031e-05, "loss": 3.0586, "step": 5837 }, { "epoch": 0.83, "learning_rate": 3.42626029178102e-05, "loss": 3.2201, "step": 5838 }, { "epoch": 0.83, "learning_rate": 3.4233713707930086e-05, "loss": 3.0646, "step": 5839 }, { "epoch": 0.83, "learning_rate": 3.420482449804998e-05, "loss": 3.1721, "step": 5840 }, { "epoch": 0.83, "learning_rate": 3.417593528816987e-05, "loss": 3.0589, "step": 5841 }, { "epoch": 0.83, "learning_rate": 3.414704607828976e-05, "loss": 3.1793, "step": 5842 }, { "epoch": 0.83, "learning_rate": 3.411815686840965e-05, "loss": 3.114, "step": 5843 }, { "epoch": 0.83, "learning_rate": 3.408926765852954e-05, "loss": 3.0718, "step": 5844 }, { "epoch": 0.83, "learning_rate": 3.4060378448649435e-05, "loss": 3.2071, "step": 5845 }, { "epoch": 0.83, "learning_rate": 3.403148923876932e-05, "loss": 3.1263, "step": 5846 }, { "epoch": 0.83, "learning_rate": 3.400260002888921e-05, "loss": 3.1385, "step": 5847 }, { "epoch": 0.83, "learning_rate": 3.39737108190091e-05, "loss": 3.032, "step": 5848 }, { "epoch": 0.83, "learning_rate": 3.394482160912899e-05, "loss": 3.0534, "step": 5849 }, { "epoch": 0.83, "learning_rate": 3.3915932399248884e-05, "loss": 3.0448, "step": 5850 }, { "epoch": 0.83, "learning_rate": 3.388704318936877e-05, "loss": 3.0444, "step": 5851 }, { "epoch": 0.83, "learning_rate": 3.385815397948866e-05, "loss": 3.1049, "step": 5852 }, { "epoch": 0.83, "learning_rate": 3.382926476960855e-05, "loss": 3.0882, "step": 5853 }, { "epoch": 0.83, "learning_rate": 3.380037555972844e-05, "loss": 3.1759, "step": 5854 }, { "epoch": 0.83, "learning_rate": 3.377148634984833e-05, "loss": 3.0127, "step": 5855 }, { "epoch": 0.83, "learning_rate": 3.374259713996823e-05, "loss": 3.1281, "step": 5856 }, { "epoch": 0.83, "learning_rate": 3.3713707930088114e-05, "loss": 3.1889, "step": 5857 }, { "epoch": 0.83, "learning_rate": 3.368481872020801e-05, "loss": 2.9567, "step": 5858 }, { "epoch": 0.83, "learning_rate": 3.3655929510327895e-05, "loss": 3.0444, "step": 5859 }, { "epoch": 0.83, "learning_rate": 3.362704030044778e-05, "loss": 3.0166, "step": 5860 }, { "epoch": 0.83, "learning_rate": 3.3598151090567675e-05, "loss": 3.1204, "step": 5861 }, { "epoch": 0.83, "learning_rate": 3.356926188068756e-05, "loss": 3.0538, "step": 5862 }, { "epoch": 0.83, "learning_rate": 3.3540372670807456e-05, "loss": 3.235, "step": 5863 }, { "epoch": 0.83, "learning_rate": 3.351148346092734e-05, "loss": 3.2553, "step": 5864 }, { "epoch": 0.84, "learning_rate": 3.348259425104723e-05, "loss": 2.9994, "step": 5865 }, { "epoch": 0.84, "learning_rate": 3.3453705041167124e-05, "loss": 3.061, "step": 5866 }, { "epoch": 0.84, "learning_rate": 3.342481583128702e-05, "loss": 3.1033, "step": 5867 }, { "epoch": 0.84, "learning_rate": 3.339592662140691e-05, "loss": 3.0325, "step": 5868 }, { "epoch": 0.84, "learning_rate": 3.33670374115268e-05, "loss": 3.1356, "step": 5869 }, { "epoch": 0.84, "learning_rate": 3.3338148201646686e-05, "loss": 3.1409, "step": 5870 }, { "epoch": 0.84, "learning_rate": 3.330925899176658e-05, "loss": 2.8731, "step": 5871 }, { "epoch": 0.84, "learning_rate": 3.328036978188647e-05, "loss": 2.917, "step": 5872 }, { "epoch": 0.84, "learning_rate": 3.3251480572006354e-05, "loss": 3.1651, "step": 5873 }, { "epoch": 0.84, "learning_rate": 3.322259136212625e-05, "loss": 3.0817, "step": 5874 }, { "epoch": 0.84, "learning_rate": 3.3193702152246135e-05, "loss": 3.1778, "step": 5875 }, { "epoch": 0.84, "learning_rate": 3.316481294236603e-05, "loss": 3.1177, "step": 5876 }, { "epoch": 0.84, "learning_rate": 3.3135923732485916e-05, "loss": 2.9929, "step": 5877 }, { "epoch": 0.84, "learning_rate": 3.310703452260581e-05, "loss": 3.0402, "step": 5878 }, { "epoch": 0.84, "learning_rate": 3.30781453127257e-05, "loss": 3.0111, "step": 5879 }, { "epoch": 0.84, "learning_rate": 3.304925610284559e-05, "loss": 3.1887, "step": 5880 }, { "epoch": 0.84, "learning_rate": 3.302036689296548e-05, "loss": 2.7138, "step": 5881 }, { "epoch": 0.84, "learning_rate": 3.299147768308537e-05, "loss": 3.0009, "step": 5882 }, { "epoch": 0.84, "learning_rate": 3.296258847320526e-05, "loss": 2.9829, "step": 5883 }, { "epoch": 0.84, "learning_rate": 3.293369926332515e-05, "loss": 3.099, "step": 5884 }, { "epoch": 0.84, "learning_rate": 3.290481005344504e-05, "loss": 3.1086, "step": 5885 }, { "epoch": 0.84, "learning_rate": 3.2875920843564926e-05, "loss": 3.0638, "step": 5886 }, { "epoch": 0.84, "learning_rate": 3.284703163368482e-05, "loss": 3.0933, "step": 5887 }, { "epoch": 0.84, "learning_rate": 3.281814242380471e-05, "loss": 3.1918, "step": 5888 }, { "epoch": 0.84, "learning_rate": 3.27892532139246e-05, "loss": 3.117, "step": 5889 }, { "epoch": 0.84, "learning_rate": 3.2760364004044495e-05, "loss": 3.009, "step": 5890 }, { "epoch": 0.84, "learning_rate": 3.273147479416438e-05, "loss": 3.1886, "step": 5891 }, { "epoch": 0.84, "learning_rate": 3.2702585584284276e-05, "loss": 2.9678, "step": 5892 }, { "epoch": 0.84, "learning_rate": 3.267369637440416e-05, "loss": 3.154, "step": 5893 }, { "epoch": 0.84, "learning_rate": 3.264480716452405e-05, "loss": 3.207, "step": 5894 }, { "epoch": 0.84, "learning_rate": 3.2615917954643944e-05, "loss": 3.0728, "step": 5895 }, { "epoch": 0.84, "learning_rate": 3.258702874476383e-05, "loss": 3.0138, "step": 5896 }, { "epoch": 0.84, "learning_rate": 3.2558139534883724e-05, "loss": 3.0363, "step": 5897 }, { "epoch": 0.84, "learning_rate": 3.252925032500361e-05, "loss": 3.1933, "step": 5898 }, { "epoch": 0.84, "learning_rate": 3.25003611151235e-05, "loss": 3.0942, "step": 5899 }, { "epoch": 0.84, "learning_rate": 3.247147190524339e-05, "loss": 3.1072, "step": 5900 }, { "epoch": 0.84, "learning_rate": 3.2442582695363286e-05, "loss": 3.1678, "step": 5901 }, { "epoch": 0.84, "learning_rate": 3.241369348548317e-05, "loss": 3.0824, "step": 5902 }, { "epoch": 0.84, "learning_rate": 3.238480427560307e-05, "loss": 3.0526, "step": 5903 }, { "epoch": 0.84, "learning_rate": 3.2355915065722954e-05, "loss": 3.0983, "step": 5904 }, { "epoch": 0.84, "learning_rate": 3.232702585584285e-05, "loss": 3.2043, "step": 5905 }, { "epoch": 0.84, "learning_rate": 3.2298136645962735e-05, "loss": 3.1118, "step": 5906 }, { "epoch": 0.84, "learning_rate": 3.226924743608262e-05, "loss": 3.1107, "step": 5907 }, { "epoch": 0.84, "learning_rate": 3.2240358226202516e-05, "loss": 3.1229, "step": 5908 }, { "epoch": 0.84, "learning_rate": 3.22114690163224e-05, "loss": 3.1159, "step": 5909 }, { "epoch": 0.84, "learning_rate": 3.21825798064423e-05, "loss": 3.0419, "step": 5910 }, { "epoch": 0.84, "learning_rate": 3.2153690596562184e-05, "loss": 3.2061, "step": 5911 }, { "epoch": 0.84, "learning_rate": 3.212480138668207e-05, "loss": 3.106, "step": 5912 }, { "epoch": 0.84, "learning_rate": 3.2095912176801965e-05, "loss": 3.0385, "step": 5913 }, { "epoch": 0.84, "learning_rate": 3.206702296692186e-05, "loss": 3.1538, "step": 5914 }, { "epoch": 0.84, "learning_rate": 3.2038133757041745e-05, "loss": 3.0929, "step": 5915 }, { "epoch": 0.84, "learning_rate": 3.200924454716164e-05, "loss": 3.0517, "step": 5916 }, { "epoch": 0.84, "learning_rate": 3.1980355337281526e-05, "loss": 2.9422, "step": 5917 }, { "epoch": 0.84, "learning_rate": 3.195146612740142e-05, "loss": 3.1486, "step": 5918 }, { "epoch": 0.84, "learning_rate": 3.192257691752131e-05, "loss": 3.1113, "step": 5919 }, { "epoch": 0.84, "learning_rate": 3.1893687707641194e-05, "loss": 3.1429, "step": 5920 }, { "epoch": 0.84, "learning_rate": 3.186479849776109e-05, "loss": 3.0844, "step": 5921 }, { "epoch": 0.84, "learning_rate": 3.1835909287880975e-05, "loss": 3.0432, "step": 5922 }, { "epoch": 0.84, "learning_rate": 3.180702007800087e-05, "loss": 2.9963, "step": 5923 }, { "epoch": 0.84, "learning_rate": 3.1778130868120756e-05, "loss": 2.9608, "step": 5924 }, { "epoch": 0.84, "learning_rate": 3.174924165824065e-05, "loss": 3.1549, "step": 5925 }, { "epoch": 0.84, "learning_rate": 3.1720352448360544e-05, "loss": 2.9382, "step": 5926 }, { "epoch": 0.84, "learning_rate": 3.169146323848043e-05, "loss": 3.1066, "step": 5927 }, { "epoch": 0.84, "learning_rate": 3.166257402860032e-05, "loss": 3.0371, "step": 5928 }, { "epoch": 0.84, "learning_rate": 3.163368481872021e-05, "loss": 3.04, "step": 5929 }, { "epoch": 0.84, "learning_rate": 3.16047956088401e-05, "loss": 3.1512, "step": 5930 }, { "epoch": 0.84, "learning_rate": 3.157590639895999e-05, "loss": 3.1722, "step": 5931 }, { "epoch": 0.84, "learning_rate": 3.154701718907988e-05, "loss": 2.9079, "step": 5932 }, { "epoch": 0.84, "learning_rate": 3.1518127979199767e-05, "loss": 2.8963, "step": 5933 }, { "epoch": 0.84, "learning_rate": 3.148923876931966e-05, "loss": 3.0942, "step": 5934 }, { "epoch": 0.85, "learning_rate": 3.146034955943955e-05, "loss": 3.1648, "step": 5935 }, { "epoch": 0.85, "learning_rate": 3.143146034955944e-05, "loss": 3.0909, "step": 5936 }, { "epoch": 0.85, "learning_rate": 3.1402571139679335e-05, "loss": 3.0367, "step": 5937 }, { "epoch": 0.85, "learning_rate": 3.137368192979922e-05, "loss": 2.9949, "step": 5938 }, { "epoch": 0.85, "learning_rate": 3.1344792719919116e-05, "loss": 3.0726, "step": 5939 }, { "epoch": 0.85, "learning_rate": 3.1315903510039e-05, "loss": 3.2591, "step": 5940 }, { "epoch": 0.85, "learning_rate": 3.128701430015889e-05, "loss": 2.9792, "step": 5941 }, { "epoch": 0.85, "learning_rate": 3.1258125090278784e-05, "loss": 3.0628, "step": 5942 }, { "epoch": 0.85, "learning_rate": 3.122923588039867e-05, "loss": 2.9426, "step": 5943 }, { "epoch": 0.85, "learning_rate": 3.1200346670518565e-05, "loss": 3.031, "step": 5944 }, { "epoch": 0.85, "learning_rate": 3.117145746063845e-05, "loss": 3.1403, "step": 5945 }, { "epoch": 0.85, "learning_rate": 3.114256825075834e-05, "loss": 3.1605, "step": 5946 }, { "epoch": 0.85, "learning_rate": 3.111367904087823e-05, "loss": 2.9976, "step": 5947 }, { "epoch": 0.85, "learning_rate": 3.1084789830998127e-05, "loss": 3.139, "step": 5948 }, { "epoch": 0.85, "learning_rate": 3.1055900621118014e-05, "loss": 3.1015, "step": 5949 }, { "epoch": 0.85, "learning_rate": 3.102701141123791e-05, "loss": 2.9959, "step": 5950 }, { "epoch": 0.85, "learning_rate": 3.0998122201357794e-05, "loss": 3.1794, "step": 5951 }, { "epoch": 0.85, "learning_rate": 3.096923299147769e-05, "loss": 3.0914, "step": 5952 }, { "epoch": 0.85, "learning_rate": 3.0940343781597575e-05, "loss": 3.0307, "step": 5953 }, { "epoch": 0.85, "learning_rate": 3.091145457171746e-05, "loss": 2.9845, "step": 5954 }, { "epoch": 0.85, "learning_rate": 3.0882565361837356e-05, "loss": 3.0399, "step": 5955 }, { "epoch": 0.85, "learning_rate": 3.085367615195724e-05, "loss": 3.2198, "step": 5956 }, { "epoch": 0.85, "learning_rate": 3.082478694207713e-05, "loss": 3.1431, "step": 5957 }, { "epoch": 0.85, "learning_rate": 3.0795897732197024e-05, "loss": 2.9562, "step": 5958 }, { "epoch": 0.85, "learning_rate": 3.076700852231692e-05, "loss": 3.0612, "step": 5959 }, { "epoch": 0.85, "learning_rate": 3.0738119312436805e-05, "loss": 2.9404, "step": 5960 }, { "epoch": 0.85, "learning_rate": 3.07092301025567e-05, "loss": 3.0973, "step": 5961 }, { "epoch": 0.85, "learning_rate": 3.0680340892676586e-05, "loss": 3.1981, "step": 5962 }, { "epoch": 0.85, "learning_rate": 3.065145168279648e-05, "loss": 3.0318, "step": 5963 }, { "epoch": 0.85, "learning_rate": 3.062256247291637e-05, "loss": 3.0276, "step": 5964 }, { "epoch": 0.85, "learning_rate": 3.059367326303626e-05, "loss": 2.9113, "step": 5965 }, { "epoch": 0.85, "learning_rate": 3.056478405315615e-05, "loss": 3.1369, "step": 5966 }, { "epoch": 0.85, "learning_rate": 3.0535894843276035e-05, "loss": 3.108, "step": 5967 }, { "epoch": 0.85, "learning_rate": 3.050700563339593e-05, "loss": 3.0283, "step": 5968 }, { "epoch": 0.85, "learning_rate": 3.047811642351582e-05, "loss": 3.0095, "step": 5969 }, { "epoch": 0.85, "learning_rate": 3.0449227213635706e-05, "loss": 3.19, "step": 5970 }, { "epoch": 0.85, "learning_rate": 3.04203380037556e-05, "loss": 3.0185, "step": 5971 }, { "epoch": 0.85, "learning_rate": 3.0391448793875487e-05, "loss": 3.0372, "step": 5972 }, { "epoch": 0.85, "learning_rate": 3.036255958399538e-05, "loss": 2.9137, "step": 5973 }, { "epoch": 0.85, "learning_rate": 3.033367037411527e-05, "loss": 3.1789, "step": 5974 }, { "epoch": 0.85, "learning_rate": 3.0304781164235158e-05, "loss": 3.2761, "step": 5975 }, { "epoch": 0.85, "learning_rate": 3.0275891954355052e-05, "loss": 2.9749, "step": 5976 }, { "epoch": 0.85, "learning_rate": 3.024700274447494e-05, "loss": 2.9962, "step": 5977 }, { "epoch": 0.85, "learning_rate": 3.0218113534594833e-05, "loss": 3.0592, "step": 5978 }, { "epoch": 0.85, "learning_rate": 3.018922432471472e-05, "loss": 3.1442, "step": 5979 }, { "epoch": 0.85, "learning_rate": 3.016033511483461e-05, "loss": 3.0294, "step": 5980 }, { "epoch": 0.85, "learning_rate": 3.0131445904954504e-05, "loss": 3.1141, "step": 5981 }, { "epoch": 0.85, "learning_rate": 3.010255669507439e-05, "loss": 2.9618, "step": 5982 }, { "epoch": 0.85, "learning_rate": 3.0073667485194278e-05, "loss": 2.9459, "step": 5983 }, { "epoch": 0.85, "learning_rate": 3.0044778275314172e-05, "loss": 3.0394, "step": 5984 }, { "epoch": 0.85, "eval_loss": 3.3314239978790283, "eval_runtime": 472.5204, "eval_samples_per_second": 43.357, "eval_steps_per_second": 14.452, "step": 5984 }, { "epoch": 0.85, "learning_rate": 3.001588906543406e-05, "loss": 3.1192, "step": 5985 }, { "epoch": 0.85, "learning_rate": 2.9986999855553953e-05, "loss": 2.9188, "step": 5986 }, { "epoch": 0.85, "learning_rate": 2.9958110645673843e-05, "loss": 2.913, "step": 5987 }, { "epoch": 0.85, "learning_rate": 2.992922143579373e-05, "loss": 3.1198, "step": 5988 }, { "epoch": 0.85, "learning_rate": 2.9900332225913624e-05, "loss": 2.9629, "step": 5989 }, { "epoch": 0.85, "learning_rate": 2.987144301603351e-05, "loss": 3.0012, "step": 5990 }, { "epoch": 0.85, "learning_rate": 2.9842553806153402e-05, "loss": 3.0383, "step": 5991 }, { "epoch": 0.85, "learning_rate": 2.9813664596273296e-05, "loss": 3.0143, "step": 5992 }, { "epoch": 0.85, "learning_rate": 2.9784775386393183e-05, "loss": 3.0603, "step": 5993 }, { "epoch": 0.85, "learning_rate": 2.9755886176513076e-05, "loss": 2.8586, "step": 5994 }, { "epoch": 0.85, "learning_rate": 2.9726996966632964e-05, "loss": 3.0506, "step": 5995 }, { "epoch": 0.85, "learning_rate": 2.969810775675285e-05, "loss": 3.1137, "step": 5996 }, { "epoch": 0.85, "learning_rate": 2.9669218546872744e-05, "loss": 2.989, "step": 5997 }, { "epoch": 0.85, "learning_rate": 2.9640329336992635e-05, "loss": 3.1343, "step": 5998 }, { "epoch": 0.85, "learning_rate": 2.961144012711253e-05, "loss": 2.9752, "step": 5999 }, { "epoch": 0.85, "learning_rate": 2.9582550917232416e-05, "loss": 3.0701, "step": 6000 }, { "epoch": 0.85, "learning_rate": 2.9553661707352303e-05, "loss": 3.1102, "step": 6001 }, { "epoch": 0.85, "learning_rate": 2.9524772497472197e-05, "loss": 3.1009, "step": 6002 }, { "epoch": 0.85, "learning_rate": 2.9495883287592087e-05, "loss": 3.004, "step": 6003 }, { "epoch": 0.85, "learning_rate": 2.9466994077711974e-05, "loss": 3.1166, "step": 6004 }, { "epoch": 0.85, "learning_rate": 2.9438104867831868e-05, "loss": 3.0657, "step": 6005 }, { "epoch": 0.86, "learning_rate": 2.9409215657951755e-05, "loss": 2.9392, "step": 6006 }, { "epoch": 0.86, "learning_rate": 2.938032644807165e-05, "loss": 3.1314, "step": 6007 }, { "epoch": 0.86, "learning_rate": 2.9351437238191536e-05, "loss": 3.0163, "step": 6008 }, { "epoch": 0.86, "learning_rate": 2.9322548028311426e-05, "loss": 3.2141, "step": 6009 }, { "epoch": 0.86, "learning_rate": 2.929365881843132e-05, "loss": 2.911, "step": 6010 }, { "epoch": 0.86, "learning_rate": 2.9264769608551207e-05, "loss": 2.9646, "step": 6011 }, { "epoch": 0.86, "learning_rate": 2.92358803986711e-05, "loss": 3.0786, "step": 6012 }, { "epoch": 0.86, "learning_rate": 2.9206991188790988e-05, "loss": 3.0391, "step": 6013 }, { "epoch": 0.86, "learning_rate": 2.9178101978910875e-05, "loss": 3.1584, "step": 6014 }, { "epoch": 0.86, "learning_rate": 2.914921276903077e-05, "loss": 3.0752, "step": 6015 }, { "epoch": 0.86, "learning_rate": 2.912032355915066e-05, "loss": 3.1557, "step": 6016 }, { "epoch": 0.86, "learning_rate": 2.9091434349270546e-05, "loss": 3.0763, "step": 6017 }, { "epoch": 0.86, "learning_rate": 2.906254513939044e-05, "loss": 3.075, "step": 6018 }, { "epoch": 0.86, "learning_rate": 2.9033655929510327e-05, "loss": 3.0939, "step": 6019 }, { "epoch": 0.86, "learning_rate": 2.900476671963022e-05, "loss": 3.0377, "step": 6020 }, { "epoch": 0.86, "learning_rate": 2.897587750975011e-05, "loss": 3.0022, "step": 6021 }, { "epoch": 0.86, "learning_rate": 2.894698829987e-05, "loss": 3.0906, "step": 6022 }, { "epoch": 0.86, "learning_rate": 2.8918099089989892e-05, "loss": 3.1968, "step": 6023 }, { "epoch": 0.86, "learning_rate": 2.888920988010978e-05, "loss": 3.0314, "step": 6024 }, { "epoch": 0.86, "learning_rate": 2.8860320670229666e-05, "loss": 3.1483, "step": 6025 }, { "epoch": 0.86, "learning_rate": 2.883143146034956e-05, "loss": 3.1315, "step": 6026 }, { "epoch": 0.86, "learning_rate": 2.880254225046945e-05, "loss": 3.1747, "step": 6027 }, { "epoch": 0.86, "learning_rate": 2.8773653040589345e-05, "loss": 3.1125, "step": 6028 }, { "epoch": 0.86, "learning_rate": 2.874476383070923e-05, "loss": 3.2254, "step": 6029 }, { "epoch": 0.86, "learning_rate": 2.871587462082912e-05, "loss": 3.0684, "step": 6030 }, { "epoch": 0.86, "learning_rate": 2.8686985410949013e-05, "loss": 3.0463, "step": 6031 }, { "epoch": 0.86, "learning_rate": 2.8658096201068903e-05, "loss": 2.9878, "step": 6032 }, { "epoch": 0.86, "learning_rate": 2.8629206991188793e-05, "loss": 3.1846, "step": 6033 }, { "epoch": 0.86, "learning_rate": 2.8600317781308684e-05, "loss": 3.2068, "step": 6034 }, { "epoch": 0.86, "learning_rate": 2.857142857142857e-05, "loss": 2.9923, "step": 6035 }, { "epoch": 0.86, "learning_rate": 2.8542539361548465e-05, "loss": 2.9978, "step": 6036 }, { "epoch": 0.86, "learning_rate": 2.8513650151668352e-05, "loss": 3.0313, "step": 6037 }, { "epoch": 0.86, "learning_rate": 2.8484760941788242e-05, "loss": 3.124, "step": 6038 }, { "epoch": 0.86, "learning_rate": 2.8455871731908136e-05, "loss": 2.8379, "step": 6039 }, { "epoch": 0.86, "learning_rate": 2.8426982522028023e-05, "loss": 3.0884, "step": 6040 }, { "epoch": 0.86, "learning_rate": 2.8398093312147917e-05, "loss": 2.9867, "step": 6041 }, { "epoch": 0.86, "learning_rate": 2.8369204102267804e-05, "loss": 3.1433, "step": 6042 }, { "epoch": 0.86, "learning_rate": 2.834031489238769e-05, "loss": 3.1496, "step": 6043 }, { "epoch": 0.86, "learning_rate": 2.8311425682507585e-05, "loss": 3.0847, "step": 6044 }, { "epoch": 0.86, "learning_rate": 2.8282536472627475e-05, "loss": 3.1325, "step": 6045 }, { "epoch": 0.86, "learning_rate": 2.8253647262747362e-05, "loss": 3.072, "step": 6046 }, { "epoch": 0.86, "learning_rate": 2.8224758052867256e-05, "loss": 3.1329, "step": 6047 }, { "epoch": 0.86, "learning_rate": 2.8195868842987143e-05, "loss": 2.9888, "step": 6048 }, { "epoch": 0.86, "learning_rate": 2.8166979633107037e-05, "loss": 3.1742, "step": 6049 }, { "epoch": 0.86, "learning_rate": 2.8138090423226927e-05, "loss": 3.077, "step": 6050 }, { "epoch": 0.86, "learning_rate": 2.8109201213346814e-05, "loss": 2.8865, "step": 6051 }, { "epoch": 0.86, "learning_rate": 2.8080312003466708e-05, "loss": 3.0586, "step": 6052 }, { "epoch": 0.86, "learning_rate": 2.8051422793586595e-05, "loss": 2.852, "step": 6053 }, { "epoch": 0.86, "learning_rate": 2.802253358370649e-05, "loss": 3.1931, "step": 6054 }, { "epoch": 0.86, "learning_rate": 2.7993644373826376e-05, "loss": 3.0282, "step": 6055 }, { "epoch": 0.86, "learning_rate": 2.7964755163946267e-05, "loss": 3.0781, "step": 6056 }, { "epoch": 0.86, "learning_rate": 2.793586595406616e-05, "loss": 3.1172, "step": 6057 }, { "epoch": 0.86, "learning_rate": 2.7906976744186048e-05, "loss": 3.1747, "step": 6058 }, { "epoch": 0.86, "learning_rate": 2.7878087534305935e-05, "loss": 3.1268, "step": 6059 }, { "epoch": 0.86, "learning_rate": 2.784919832442583e-05, "loss": 3.1027, "step": 6060 }, { "epoch": 0.86, "learning_rate": 2.782030911454572e-05, "loss": 3.1948, "step": 6061 }, { "epoch": 0.86, "learning_rate": 2.779141990466561e-05, "loss": 2.917, "step": 6062 }, { "epoch": 0.86, "learning_rate": 2.77625306947855e-05, "loss": 3.135, "step": 6063 }, { "epoch": 0.86, "learning_rate": 2.7733641484905387e-05, "loss": 3.2288, "step": 6064 }, { "epoch": 0.86, "learning_rate": 2.770475227502528e-05, "loss": 2.9871, "step": 6065 }, { "epoch": 0.86, "learning_rate": 2.7675863065145168e-05, "loss": 3.0479, "step": 6066 }, { "epoch": 0.86, "learning_rate": 2.764697385526506e-05, "loss": 3.2121, "step": 6067 }, { "epoch": 0.86, "learning_rate": 2.7618084645384952e-05, "loss": 2.9775, "step": 6068 }, { "epoch": 0.86, "learning_rate": 2.758919543550484e-05, "loss": 3.032, "step": 6069 }, { "epoch": 0.86, "learning_rate": 2.7560306225624733e-05, "loss": 2.9528, "step": 6070 }, { "epoch": 0.86, "learning_rate": 2.753141701574462e-05, "loss": 2.944, "step": 6071 }, { "epoch": 0.86, "learning_rate": 2.750252780586451e-05, "loss": 3.0658, "step": 6072 }, { "epoch": 0.86, "learning_rate": 2.74736385959844e-05, "loss": 3.151, "step": 6073 }, { "epoch": 0.86, "learning_rate": 2.744474938610429e-05, "loss": 3.1508, "step": 6074 }, { "epoch": 0.86, "learning_rate": 2.7415860176224185e-05, "loss": 3.1112, "step": 6075 }, { "epoch": 0.87, "learning_rate": 2.7386970966344072e-05, "loss": 3.1306, "step": 6076 }, { "epoch": 0.87, "learning_rate": 2.735808175646396e-05, "loss": 3.0435, "step": 6077 }, { "epoch": 0.87, "learning_rate": 2.7329192546583853e-05, "loss": 3.1812, "step": 6078 }, { "epoch": 0.87, "learning_rate": 2.7300303336703743e-05, "loss": 3.0725, "step": 6079 }, { "epoch": 0.87, "learning_rate": 2.727141412682363e-05, "loss": 3.0793, "step": 6080 }, { "epoch": 0.87, "learning_rate": 2.7242524916943524e-05, "loss": 3.0949, "step": 6081 }, { "epoch": 0.87, "learning_rate": 2.721363570706341e-05, "loss": 3.0673, "step": 6082 }, { "epoch": 0.87, "learning_rate": 2.7184746497183305e-05, "loss": 3.1137, "step": 6083 }, { "epoch": 0.87, "learning_rate": 2.7155857287303192e-05, "loss": 3.1059, "step": 6084 }, { "epoch": 0.87, "learning_rate": 2.7126968077423083e-05, "loss": 3.1905, "step": 6085 }, { "epoch": 0.87, "learning_rate": 2.7098078867542976e-05, "loss": 3.1762, "step": 6086 }, { "epoch": 0.87, "learning_rate": 2.7069189657662863e-05, "loss": 3.1323, "step": 6087 }, { "epoch": 0.87, "learning_rate": 2.7040300447782757e-05, "loss": 2.8869, "step": 6088 }, { "epoch": 0.87, "learning_rate": 2.7011411237902644e-05, "loss": 3.0922, "step": 6089 }, { "epoch": 0.87, "learning_rate": 2.6982522028022535e-05, "loss": 3.1239, "step": 6090 }, { "epoch": 0.87, "learning_rate": 2.695363281814243e-05, "loss": 3.0454, "step": 6091 }, { "epoch": 0.87, "learning_rate": 2.6924743608262316e-05, "loss": 3.1074, "step": 6092 }, { "epoch": 0.87, "learning_rate": 2.6895854398382203e-05, "loss": 3.1191, "step": 6093 }, { "epoch": 0.87, "learning_rate": 2.6866965188502097e-05, "loss": 3.0851, "step": 6094 }, { "epoch": 0.87, "learning_rate": 2.6838075978621984e-05, "loss": 2.9811, "step": 6095 }, { "epoch": 0.87, "learning_rate": 2.6809186768741877e-05, "loss": 3.1226, "step": 6096 }, { "epoch": 0.87, "learning_rate": 2.6780297558861768e-05, "loss": 3.1572, "step": 6097 }, { "epoch": 0.87, "learning_rate": 2.6751408348981655e-05, "loss": 3.0967, "step": 6098 }, { "epoch": 0.87, "learning_rate": 2.672251913910155e-05, "loss": 3.071, "step": 6099 }, { "epoch": 0.87, "learning_rate": 2.6693629929221436e-05, "loss": 3.1452, "step": 6100 }, { "epoch": 0.87, "learning_rate": 2.6664740719341326e-05, "loss": 3.0124, "step": 6101 }, { "epoch": 0.87, "learning_rate": 2.6635851509461217e-05, "loss": 3.021, "step": 6102 }, { "epoch": 0.87, "learning_rate": 2.6606962299581107e-05, "loss": 3.0597, "step": 6103 }, { "epoch": 0.87, "learning_rate": 2.6578073089701e-05, "loss": 3.1272, "step": 6104 }, { "epoch": 0.87, "learning_rate": 2.6549183879820888e-05, "loss": 3.0598, "step": 6105 }, { "epoch": 0.87, "learning_rate": 2.6520294669940775e-05, "loss": 3.0873, "step": 6106 }, { "epoch": 0.87, "learning_rate": 2.649140546006067e-05, "loss": 3.3011, "step": 6107 }, { "epoch": 0.87, "learning_rate": 2.646251625018056e-05, "loss": 2.9501, "step": 6108 }, { "epoch": 0.87, "learning_rate": 2.6433627040300453e-05, "loss": 3.1859, "step": 6109 }, { "epoch": 0.87, "learning_rate": 2.640473783042034e-05, "loss": 3.1604, "step": 6110 }, { "epoch": 0.87, "learning_rate": 2.6375848620540227e-05, "loss": 3.0502, "step": 6111 }, { "epoch": 0.87, "learning_rate": 2.634695941066012e-05, "loss": 3.1266, "step": 6112 }, { "epoch": 0.87, "learning_rate": 2.6318070200780008e-05, "loss": 3.1003, "step": 6113 }, { "epoch": 0.87, "learning_rate": 2.62891809908999e-05, "loss": 2.9669, "step": 6114 }, { "epoch": 0.87, "learning_rate": 2.6260291781019792e-05, "loss": 3.0027, "step": 6115 }, { "epoch": 0.87, "learning_rate": 2.623140257113968e-05, "loss": 3.2412, "step": 6116 }, { "epoch": 0.87, "learning_rate": 2.6202513361259573e-05, "loss": 3.1539, "step": 6117 }, { "epoch": 0.87, "learning_rate": 2.617362415137946e-05, "loss": 3.1696, "step": 6118 }, { "epoch": 0.87, "learning_rate": 2.614473494149935e-05, "loss": 3.1418, "step": 6119 }, { "epoch": 0.87, "learning_rate": 2.6115845731619244e-05, "loss": 2.9554, "step": 6120 }, { "epoch": 0.87, "learning_rate": 2.608695652173913e-05, "loss": 2.9984, "step": 6121 }, { "epoch": 0.87, "learning_rate": 2.6058067311859025e-05, "loss": 3.2487, "step": 6122 }, { "epoch": 0.87, "learning_rate": 2.6029178101978912e-05, "loss": 3.0161, "step": 6123 }, { "epoch": 0.87, "learning_rate": 2.60002888920988e-05, "loss": 3.0282, "step": 6124 }, { "epoch": 0.87, "learning_rate": 2.5971399682218693e-05, "loss": 3.081, "step": 6125 }, { "epoch": 0.87, "learning_rate": 2.5942510472338584e-05, "loss": 3.0999, "step": 6126 }, { "epoch": 0.87, "learning_rate": 2.591362126245847e-05, "loss": 3.1143, "step": 6127 }, { "epoch": 0.87, "learning_rate": 2.5884732052578365e-05, "loss": 3.0133, "step": 6128 }, { "epoch": 0.87, "learning_rate": 2.585584284269825e-05, "loss": 3.0934, "step": 6129 }, { "epoch": 0.87, "learning_rate": 2.5826953632818145e-05, "loss": 2.9122, "step": 6130 }, { "epoch": 0.87, "learning_rate": 2.5798064422938033e-05, "loss": 3.0558, "step": 6131 }, { "epoch": 0.87, "learning_rate": 2.5769175213057923e-05, "loss": 3.1037, "step": 6132 }, { "epoch": 0.87, "learning_rate": 2.5740286003177817e-05, "loss": 3.083, "step": 6133 }, { "epoch": 0.87, "learning_rate": 2.5711396793297704e-05, "loss": 3.2274, "step": 6134 }, { "epoch": 0.87, "learning_rate": 2.568250758341759e-05, "loss": 3.0305, "step": 6135 }, { "epoch": 0.87, "learning_rate": 2.5653618373537485e-05, "loss": 3.1308, "step": 6136 }, { "epoch": 0.87, "learning_rate": 2.5624729163657375e-05, "loss": 3.1218, "step": 6137 }, { "epoch": 0.87, "learning_rate": 2.559583995377727e-05, "loss": 3.1344, "step": 6138 }, { "epoch": 0.87, "learning_rate": 2.5566950743897156e-05, "loss": 3.2072, "step": 6139 }, { "epoch": 0.87, "learning_rate": 2.5538061534017043e-05, "loss": 3.0067, "step": 6140 }, { "epoch": 0.87, "learning_rate": 2.5509172324136937e-05, "loss": 3.1007, "step": 6141 }, { "epoch": 0.87, "learning_rate": 2.5480283114256824e-05, "loss": 3.1694, "step": 6142 }, { "epoch": 0.87, "learning_rate": 2.5451393904376718e-05, "loss": 3.1094, "step": 6143 }, { "epoch": 0.87, "learning_rate": 2.5422504694496608e-05, "loss": 3.0832, "step": 6144 }, { "epoch": 0.87, "learning_rate": 2.5393615484616495e-05, "loss": 3.2072, "step": 6145 }, { "epoch": 0.88, "learning_rate": 2.536472627473639e-05, "loss": 3.1934, "step": 6146 }, { "epoch": 0.88, "learning_rate": 2.5335837064856276e-05, "loss": 3.0024, "step": 6147 }, { "epoch": 0.88, "learning_rate": 2.5306947854976167e-05, "loss": 3.0558, "step": 6148 }, { "epoch": 0.88, "learning_rate": 2.527805864509606e-05, "loss": 3.0448, "step": 6149 }, { "epoch": 0.88, "learning_rate": 2.5249169435215947e-05, "loss": 3.2103, "step": 6150 }, { "epoch": 0.88, "learning_rate": 2.522028022533584e-05, "loss": 3.0402, "step": 6151 }, { "epoch": 0.88, "learning_rate": 2.519139101545573e-05, "loss": 3.0199, "step": 6152 }, { "epoch": 0.88, "learning_rate": 2.5162501805575615e-05, "loss": 2.9997, "step": 6153 }, { "epoch": 0.88, "learning_rate": 2.513361259569551e-05, "loss": 3.0992, "step": 6154 }, { "epoch": 0.88, "learning_rate": 2.51047233858154e-05, "loss": 3.0769, "step": 6155 }, { "epoch": 0.88, "learning_rate": 2.5075834175935287e-05, "loss": 3.0348, "step": 6156 }, { "epoch": 0.88, "learning_rate": 2.504694496605518e-05, "loss": 3.0612, "step": 6157 }, { "epoch": 0.88, "learning_rate": 2.5018055756175068e-05, "loss": 3.1116, "step": 6158 }, { "epoch": 0.88, "learning_rate": 2.4989166546294958e-05, "loss": 3.1809, "step": 6159 }, { "epoch": 0.88, "learning_rate": 2.496027733641485e-05, "loss": 2.9785, "step": 6160 }, { "epoch": 0.88, "learning_rate": 2.4931388126534742e-05, "loss": 3.1658, "step": 6161 }, { "epoch": 0.88, "learning_rate": 2.4902498916654633e-05, "loss": 3.0982, "step": 6162 }, { "epoch": 0.88, "learning_rate": 2.487360970677452e-05, "loss": 3.153, "step": 6163 }, { "epoch": 0.88, "learning_rate": 2.484472049689441e-05, "loss": 3.1239, "step": 6164 }, { "epoch": 0.88, "learning_rate": 2.48158312870143e-05, "loss": 3.0978, "step": 6165 }, { "epoch": 0.88, "learning_rate": 2.478694207713419e-05, "loss": 3.008, "step": 6166 }, { "epoch": 0.88, "learning_rate": 2.475805286725408e-05, "loss": 3.0674, "step": 6167 }, { "epoch": 0.88, "learning_rate": 2.4729163657373972e-05, "loss": 2.8771, "step": 6168 }, { "epoch": 0.88, "learning_rate": 2.4700274447493862e-05, "loss": 3.0125, "step": 6169 }, { "epoch": 0.88, "learning_rate": 2.4671385237613753e-05, "loss": 3.1738, "step": 6170 }, { "epoch": 0.88, "learning_rate": 2.464249602773364e-05, "loss": 3.0806, "step": 6171 }, { "epoch": 0.88, "learning_rate": 2.4613606817853534e-05, "loss": 2.9466, "step": 6172 }, { "epoch": 0.88, "learning_rate": 2.4584717607973424e-05, "loss": 3.0043, "step": 6173 }, { "epoch": 0.88, "learning_rate": 2.4555828398093315e-05, "loss": 3.0152, "step": 6174 }, { "epoch": 0.88, "learning_rate": 2.4526939188213205e-05, "loss": 3.0443, "step": 6175 }, { "epoch": 0.88, "learning_rate": 2.4498049978333092e-05, "loss": 3.0692, "step": 6176 }, { "epoch": 0.88, "learning_rate": 2.4469160768452982e-05, "loss": 3.1542, "step": 6177 }, { "epoch": 0.88, "learning_rate": 2.4440271558572876e-05, "loss": 3.1115, "step": 6178 }, { "epoch": 0.88, "learning_rate": 2.4411382348692767e-05, "loss": 3.1023, "step": 6179 }, { "epoch": 0.88, "learning_rate": 2.4382493138812654e-05, "loss": 3.1114, "step": 6180 }, { "epoch": 0.88, "learning_rate": 2.4353603928932544e-05, "loss": 3.1446, "step": 6181 }, { "epoch": 0.88, "learning_rate": 2.4324714719052435e-05, "loss": 3.0185, "step": 6182 }, { "epoch": 0.88, "learning_rate": 2.4295825509172325e-05, "loss": 2.9706, "step": 6183 }, { "epoch": 0.88, "learning_rate": 2.4266936299292216e-05, "loss": 3.0785, "step": 6184 }, { "epoch": 0.88, "learning_rate": 2.4238047089412106e-05, "loss": 2.9412, "step": 6185 }, { "epoch": 0.88, "learning_rate": 2.4209157879531996e-05, "loss": 3.1385, "step": 6186 }, { "epoch": 0.88, "learning_rate": 2.4180268669651887e-05, "loss": 3.2132, "step": 6187 }, { "epoch": 0.88, "learning_rate": 2.4151379459771774e-05, "loss": 3.1793, "step": 6188 }, { "epoch": 0.88, "learning_rate": 2.4122490249891668e-05, "loss": 3.1368, "step": 6189 }, { "epoch": 0.88, "learning_rate": 2.4093601040011558e-05, "loss": 2.9862, "step": 6190 }, { "epoch": 0.88, "learning_rate": 2.406471183013145e-05, "loss": 3.1003, "step": 6191 }, { "epoch": 0.88, "learning_rate": 2.4035822620251336e-05, "loss": 3.0735, "step": 6192 }, { "epoch": 0.88, "learning_rate": 2.4006933410371226e-05, "loss": 2.8659, "step": 6193 }, { "epoch": 0.88, "learning_rate": 2.3978044200491117e-05, "loss": 3.0916, "step": 6194 }, { "epoch": 0.88, "learning_rate": 2.3949154990611007e-05, "loss": 3.1532, "step": 6195 }, { "epoch": 0.88, "learning_rate": 2.39202657807309e-05, "loss": 3.1385, "step": 6196 }, { "epoch": 0.88, "learning_rate": 2.3891376570850788e-05, "loss": 2.9107, "step": 6197 }, { "epoch": 0.88, "learning_rate": 2.3862487360970678e-05, "loss": 3.0304, "step": 6198 }, { "epoch": 0.88, "learning_rate": 2.383359815109057e-05, "loss": 3.1211, "step": 6199 }, { "epoch": 0.88, "learning_rate": 2.380470894121046e-05, "loss": 3.0548, "step": 6200 }, { "epoch": 0.88, "learning_rate": 2.377581973133035e-05, "loss": 3.1243, "step": 6201 }, { "epoch": 0.88, "learning_rate": 2.374693052145024e-05, "loss": 3.1833, "step": 6202 }, { "epoch": 0.88, "learning_rate": 2.371804131157013e-05, "loss": 2.9298, "step": 6203 }, { "epoch": 0.88, "learning_rate": 2.368915210169002e-05, "loss": 3.1059, "step": 6204 }, { "epoch": 0.88, "learning_rate": 2.3660262891809908e-05, "loss": 3.1305, "step": 6205 }, { "epoch": 0.88, "learning_rate": 2.36313736819298e-05, "loss": 3.0229, "step": 6206 }, { "epoch": 0.88, "learning_rate": 2.3602484472049692e-05, "loss": 3.1692, "step": 6207 }, { "epoch": 0.88, "learning_rate": 2.3573595262169583e-05, "loss": 3.1101, "step": 6208 }, { "epoch": 0.88, "learning_rate": 2.354470605228947e-05, "loss": 2.9789, "step": 6209 }, { "epoch": 0.88, "learning_rate": 2.351581684240936e-05, "loss": 3.1158, "step": 6210 }, { "epoch": 0.88, "learning_rate": 2.348692763252925e-05, "loss": 3.0718, "step": 6211 }, { "epoch": 0.88, "learning_rate": 2.345803842264914e-05, "loss": 3.157, "step": 6212 }, { "epoch": 0.88, "learning_rate": 2.3429149212769035e-05, "loss": 3.1713, "step": 6213 }, { "epoch": 0.88, "learning_rate": 2.3400260002888922e-05, "loss": 2.9399, "step": 6214 }, { "epoch": 0.88, "learning_rate": 2.3371370793008812e-05, "loss": 3.1105, "step": 6215 }, { "epoch": 0.89, "learning_rate": 2.3342481583128703e-05, "loss": 3.0805, "step": 6216 }, { "epoch": 0.89, "learning_rate": 2.3313592373248593e-05, "loss": 3.1651, "step": 6217 }, { "epoch": 0.89, "learning_rate": 2.3284703163368484e-05, "loss": 2.9673, "step": 6218 }, { "epoch": 0.89, "learning_rate": 2.3255813953488374e-05, "loss": 3.0281, "step": 6219 }, { "epoch": 0.89, "learning_rate": 2.3226924743608265e-05, "loss": 3.0074, "step": 6220 }, { "epoch": 0.89, "learning_rate": 2.3198035533728155e-05, "loss": 3.175, "step": 6221 }, { "epoch": 0.89, "learning_rate": 2.3169146323848042e-05, "loss": 3.1241, "step": 6222 }, { "epoch": 0.89, "learning_rate": 2.3140257113967932e-05, "loss": 2.9221, "step": 6223 }, { "epoch": 0.89, "learning_rate": 2.3111367904087823e-05, "loss": 3.089, "step": 6224 }, { "epoch": 0.89, "learning_rate": 2.3082478694207717e-05, "loss": 3.1303, "step": 6225 }, { "epoch": 0.89, "learning_rate": 2.3053589484327604e-05, "loss": 3.0978, "step": 6226 }, { "epoch": 0.89, "learning_rate": 2.3024700274447494e-05, "loss": 2.8762, "step": 6227 }, { "epoch": 0.89, "learning_rate": 2.2995811064567385e-05, "loss": 2.7943, "step": 6228 }, { "epoch": 0.89, "learning_rate": 2.2966921854687275e-05, "loss": 3.1311, "step": 6229 }, { "epoch": 0.89, "learning_rate": 2.2938032644807165e-05, "loss": 3.0437, "step": 6230 }, { "epoch": 0.89, "learning_rate": 2.2909143434927056e-05, "loss": 3.0388, "step": 6231 }, { "epoch": 0.89, "learning_rate": 2.2880254225046946e-05, "loss": 3.1408, "step": 6232 }, { "epoch": 0.89, "learning_rate": 2.2851365015166837e-05, "loss": 3.0528, "step": 6233 }, { "epoch": 0.89, "learning_rate": 2.2822475805286727e-05, "loss": 3.1269, "step": 6234 }, { "epoch": 0.89, "learning_rate": 2.2793586595406614e-05, "loss": 3.1778, "step": 6235 }, { "epoch": 0.89, "learning_rate": 2.2764697385526508e-05, "loss": 3.1633, "step": 6236 }, { "epoch": 0.89, "learning_rate": 2.27358081756464e-05, "loss": 3.0909, "step": 6237 }, { "epoch": 0.89, "learning_rate": 2.270691896576629e-05, "loss": 3.0567, "step": 6238 }, { "epoch": 0.89, "learning_rate": 2.2678029755886176e-05, "loss": 2.9684, "step": 6239 }, { "epoch": 0.89, "learning_rate": 2.2649140546006066e-05, "loss": 3.0475, "step": 6240 }, { "epoch": 0.89, "learning_rate": 2.2620251336125957e-05, "loss": 3.0568, "step": 6241 }, { "epoch": 0.89, "learning_rate": 2.259136212624585e-05, "loss": 3.1187, "step": 6242 }, { "epoch": 0.89, "learning_rate": 2.2562472916365738e-05, "loss": 3.0751, "step": 6243 }, { "epoch": 0.89, "learning_rate": 2.2533583706485628e-05, "loss": 3.0804, "step": 6244 }, { "epoch": 0.89, "learning_rate": 2.250469449660552e-05, "loss": 3.1316, "step": 6245 }, { "epoch": 0.89, "learning_rate": 2.247580528672541e-05, "loss": 2.9817, "step": 6246 }, { "epoch": 0.89, "learning_rate": 2.24469160768453e-05, "loss": 3.0022, "step": 6247 }, { "epoch": 0.89, "learning_rate": 2.241802686696519e-05, "loss": 3.0777, "step": 6248 }, { "epoch": 0.89, "learning_rate": 2.238913765708508e-05, "loss": 3.0788, "step": 6249 }, { "epoch": 0.89, "learning_rate": 2.236024844720497e-05, "loss": 3.1099, "step": 6250 }, { "epoch": 0.89, "learning_rate": 2.233135923732486e-05, "loss": 3.0913, "step": 6251 }, { "epoch": 0.89, "learning_rate": 2.230247002744475e-05, "loss": 3.0541, "step": 6252 }, { "epoch": 0.89, "learning_rate": 2.227358081756464e-05, "loss": 2.9763, "step": 6253 }, { "epoch": 0.89, "learning_rate": 2.2244691607684533e-05, "loss": 3.0568, "step": 6254 }, { "epoch": 0.89, "learning_rate": 2.2215802397804423e-05, "loss": 3.2268, "step": 6255 }, { "epoch": 0.89, "learning_rate": 2.218691318792431e-05, "loss": 3.0721, "step": 6256 }, { "epoch": 0.89, "learning_rate": 2.21580239780442e-05, "loss": 3.1068, "step": 6257 }, { "epoch": 0.89, "learning_rate": 2.212913476816409e-05, "loss": 3.0956, "step": 6258 }, { "epoch": 0.89, "learning_rate": 2.210024555828398e-05, "loss": 2.968, "step": 6259 }, { "epoch": 0.89, "learning_rate": 2.2071356348403872e-05, "loss": 3.0614, "step": 6260 }, { "epoch": 0.89, "learning_rate": 2.2042467138523762e-05, "loss": 2.9459, "step": 6261 }, { "epoch": 0.89, "learning_rate": 2.2013577928643653e-05, "loss": 3.0805, "step": 6262 }, { "epoch": 0.89, "learning_rate": 2.1984688718763543e-05, "loss": 2.9983, "step": 6263 }, { "epoch": 0.89, "learning_rate": 2.195579950888343e-05, "loss": 3.1816, "step": 6264 }, { "epoch": 0.89, "learning_rate": 2.1926910299003324e-05, "loss": 3.1293, "step": 6265 }, { "epoch": 0.89, "learning_rate": 2.1898021089123214e-05, "loss": 3.0368, "step": 6266 }, { "epoch": 0.89, "learning_rate": 2.1869131879243105e-05, "loss": 3.0531, "step": 6267 }, { "epoch": 0.89, "learning_rate": 2.1840242669362995e-05, "loss": 3.1396, "step": 6268 }, { "epoch": 0.89, "learning_rate": 2.1811353459482882e-05, "loss": 2.9849, "step": 6269 }, { "epoch": 0.89, "learning_rate": 2.1782464249602773e-05, "loss": 3.1006, "step": 6270 }, { "epoch": 0.89, "learning_rate": 2.1753575039722667e-05, "loss": 3.1267, "step": 6271 }, { "epoch": 0.89, "learning_rate": 2.1724685829842557e-05, "loss": 3.1102, "step": 6272 }, { "epoch": 0.89, "learning_rate": 2.1695796619962444e-05, "loss": 3.1123, "step": 6273 }, { "epoch": 0.89, "learning_rate": 2.1666907410082335e-05, "loss": 3.0632, "step": 6274 }, { "epoch": 0.89, "learning_rate": 2.1638018200202225e-05, "loss": 3.1324, "step": 6275 }, { "epoch": 0.89, "learning_rate": 2.1609128990322115e-05, "loss": 2.9165, "step": 6276 }, { "epoch": 0.89, "learning_rate": 2.1580239780442006e-05, "loss": 3.1479, "step": 6277 }, { "epoch": 0.89, "learning_rate": 2.1551350570561896e-05, "loss": 3.0407, "step": 6278 }, { "epoch": 0.89, "learning_rate": 2.1522461360681787e-05, "loss": 3.1366, "step": 6279 }, { "epoch": 0.89, "learning_rate": 2.1493572150801677e-05, "loss": 3.132, "step": 6280 }, { "epoch": 0.89, "learning_rate": 2.1464682940921564e-05, "loss": 3.1849, "step": 6281 }, { "epoch": 0.89, "learning_rate": 2.1435793731041455e-05, "loss": 3.0809, "step": 6282 }, { "epoch": 0.89, "learning_rate": 2.140690452116135e-05, "loss": 3.1511, "step": 6283 }, { "epoch": 0.89, "learning_rate": 2.137801531128124e-05, "loss": 3.1877, "step": 6284 }, { "epoch": 0.89, "learning_rate": 2.134912610140113e-05, "loss": 3.0118, "step": 6285 }, { "epoch": 0.89, "learning_rate": 2.1320236891521016e-05, "loss": 3.0608, "step": 6286 }, { "epoch": 0.9, "learning_rate": 2.1291347681640907e-05, "loss": 2.9224, "step": 6287 }, { "epoch": 0.9, "learning_rate": 2.1262458471760797e-05, "loss": 3.0317, "step": 6288 }, { "epoch": 0.9, "learning_rate": 2.123356926188069e-05, "loss": 3.1024, "step": 6289 }, { "epoch": 0.9, "learning_rate": 2.1204680052000578e-05, "loss": 2.9715, "step": 6290 }, { "epoch": 0.9, "learning_rate": 2.117579084212047e-05, "loss": 3.0075, "step": 6291 }, { "epoch": 0.9, "learning_rate": 2.114690163224036e-05, "loss": 3.0364, "step": 6292 }, { "epoch": 0.9, "learning_rate": 2.111801242236025e-05, "loss": 3.0769, "step": 6293 }, { "epoch": 0.9, "learning_rate": 2.108912321248014e-05, "loss": 2.9219, "step": 6294 }, { "epoch": 0.9, "learning_rate": 2.106023400260003e-05, "loss": 3.1058, "step": 6295 }, { "epoch": 0.9, "learning_rate": 2.103134479271992e-05, "loss": 3.0322, "step": 6296 }, { "epoch": 0.9, "learning_rate": 2.100245558283981e-05, "loss": 2.9246, "step": 6297 }, { "epoch": 0.9, "learning_rate": 2.0973566372959698e-05, "loss": 3.0454, "step": 6298 }, { "epoch": 0.9, "learning_rate": 2.094467716307959e-05, "loss": 3.1417, "step": 6299 }, { "epoch": 0.9, "learning_rate": 2.0915787953199483e-05, "loss": 2.9522, "step": 6300 }, { "epoch": 0.9, "learning_rate": 2.0886898743319373e-05, "loss": 3.0798, "step": 6301 }, { "epoch": 0.9, "learning_rate": 2.0858009533439263e-05, "loss": 3.0087, "step": 6302 }, { "epoch": 0.9, "learning_rate": 2.082912032355915e-05, "loss": 3.03, "step": 6303 }, { "epoch": 0.9, "learning_rate": 2.080023111367904e-05, "loss": 3.1378, "step": 6304 }, { "epoch": 0.9, "learning_rate": 2.077134190379893e-05, "loss": 3.0412, "step": 6305 }, { "epoch": 0.9, "learning_rate": 2.0742452693918825e-05, "loss": 3.1259, "step": 6306 }, { "epoch": 0.9, "learning_rate": 2.0713563484038712e-05, "loss": 2.9775, "step": 6307 }, { "epoch": 0.9, "learning_rate": 2.0684674274158603e-05, "loss": 2.9517, "step": 6308 }, { "epoch": 0.9, "learning_rate": 2.0655785064278493e-05, "loss": 3.0625, "step": 6309 }, { "epoch": 0.9, "learning_rate": 2.0626895854398384e-05, "loss": 2.9316, "step": 6310 }, { "epoch": 0.9, "learning_rate": 2.0598006644518274e-05, "loss": 3.1149, "step": 6311 }, { "epoch": 0.9, "learning_rate": 2.0569117434638164e-05, "loss": 3.2036, "step": 6312 }, { "epoch": 0.9, "learning_rate": 2.0540228224758055e-05, "loss": 3.1569, "step": 6313 }, { "epoch": 0.9, "learning_rate": 2.0511339014877945e-05, "loss": 3.1524, "step": 6314 }, { "epoch": 0.9, "learning_rate": 2.0482449804997832e-05, "loss": 3.0428, "step": 6315 }, { "epoch": 0.9, "learning_rate": 2.0453560595117723e-05, "loss": 3.0148, "step": 6316 }, { "epoch": 0.9, "learning_rate": 2.0424671385237613e-05, "loss": 3.051, "step": 6317 }, { "epoch": 0.9, "learning_rate": 2.0395782175357507e-05, "loss": 3.0329, "step": 6318 }, { "epoch": 0.9, "learning_rate": 2.0366892965477394e-05, "loss": 2.9549, "step": 6319 }, { "epoch": 0.9, "learning_rate": 2.0338003755597285e-05, "loss": 3.1519, "step": 6320 }, { "epoch": 0.9, "learning_rate": 2.0309114545717175e-05, "loss": 2.9162, "step": 6321 }, { "epoch": 0.9, "learning_rate": 2.0280225335837065e-05, "loss": 3.012, "step": 6322 }, { "epoch": 0.9, "learning_rate": 2.0251336125956956e-05, "loss": 2.8915, "step": 6323 }, { "epoch": 0.9, "learning_rate": 2.0222446916076846e-05, "loss": 3.0236, "step": 6324 }, { "epoch": 0.9, "learning_rate": 2.0193557706196737e-05, "loss": 3.0933, "step": 6325 }, { "epoch": 0.9, "learning_rate": 2.0164668496316627e-05, "loss": 3.1941, "step": 6326 }, { "epoch": 0.9, "learning_rate": 2.0135779286436518e-05, "loss": 3.1418, "step": 6327 }, { "epoch": 0.9, "learning_rate": 2.0106890076556405e-05, "loss": 3.0804, "step": 6328 }, { "epoch": 0.9, "learning_rate": 2.00780008666763e-05, "loss": 3.0721, "step": 6329 }, { "epoch": 0.9, "learning_rate": 2.004911165679619e-05, "loss": 3.2047, "step": 6330 }, { "epoch": 0.9, "learning_rate": 2.002022244691608e-05, "loss": 3.0389, "step": 6331 }, { "epoch": 0.9, "learning_rate": 1.9991333237035966e-05, "loss": 3.1679, "step": 6332 }, { "epoch": 0.9, "learning_rate": 1.9962444027155857e-05, "loss": 3.0885, "step": 6333 }, { "epoch": 0.9, "learning_rate": 1.9933554817275747e-05, "loss": 3.0729, "step": 6334 }, { "epoch": 0.9, "learning_rate": 1.990466560739564e-05, "loss": 2.9343, "step": 6335 }, { "epoch": 0.9, "learning_rate": 1.9875776397515528e-05, "loss": 3.0759, "step": 6336 }, { "epoch": 0.9, "eval_loss": 3.3094747066497803, "eval_runtime": 472.097, "eval_samples_per_second": 43.396, "eval_steps_per_second": 14.465, "step": 6336 }, { "epoch": 0.9, "learning_rate": 1.984688718763542e-05, "loss": 3.1271, "step": 6337 }, { "epoch": 0.9, "learning_rate": 1.981799797775531e-05, "loss": 2.8808, "step": 6338 }, { "epoch": 0.9, "learning_rate": 1.97891087678752e-05, "loss": 2.9369, "step": 6339 }, { "epoch": 0.9, "learning_rate": 1.976021955799509e-05, "loss": 3.0485, "step": 6340 }, { "epoch": 0.9, "learning_rate": 1.973133034811498e-05, "loss": 3.0099, "step": 6341 }, { "epoch": 0.9, "learning_rate": 1.970244113823487e-05, "loss": 3.0134, "step": 6342 }, { "epoch": 0.9, "learning_rate": 1.967355192835476e-05, "loss": 3.1073, "step": 6343 }, { "epoch": 0.9, "learning_rate": 1.964466271847465e-05, "loss": 3.1248, "step": 6344 }, { "epoch": 0.9, "learning_rate": 1.961577350859454e-05, "loss": 2.9222, "step": 6345 }, { "epoch": 0.9, "learning_rate": 1.958688429871443e-05, "loss": 3.0365, "step": 6346 }, { "epoch": 0.9, "learning_rate": 1.9557995088834323e-05, "loss": 3.0688, "step": 6347 }, { "epoch": 0.9, "learning_rate": 1.9529105878954213e-05, "loss": 3.073, "step": 6348 }, { "epoch": 0.9, "learning_rate": 1.95002166690741e-05, "loss": 2.8182, "step": 6349 }, { "epoch": 0.9, "learning_rate": 1.947132745919399e-05, "loss": 3.1043, "step": 6350 }, { "epoch": 0.9, "learning_rate": 1.944243824931388e-05, "loss": 3.1836, "step": 6351 }, { "epoch": 0.9, "learning_rate": 1.9413549039433772e-05, "loss": 3.1243, "step": 6352 }, { "epoch": 0.9, "learning_rate": 1.9384659829553662e-05, "loss": 3.1342, "step": 6353 }, { "epoch": 0.9, "learning_rate": 1.9355770619673553e-05, "loss": 3.1523, "step": 6354 }, { "epoch": 0.9, "learning_rate": 1.9326881409793443e-05, "loss": 3.1038, "step": 6355 }, { "epoch": 0.9, "learning_rate": 1.9297992199913334e-05, "loss": 2.985, "step": 6356 }, { "epoch": 0.91, "learning_rate": 1.9269102990033224e-05, "loss": 2.9722, "step": 6357 }, { "epoch": 0.91, "learning_rate": 1.9240213780153114e-05, "loss": 3.0341, "step": 6358 }, { "epoch": 0.91, "learning_rate": 1.9211324570273005e-05, "loss": 3.0669, "step": 6359 }, { "epoch": 0.91, "learning_rate": 1.9182435360392895e-05, "loss": 2.9711, "step": 6360 }, { "epoch": 0.91, "learning_rate": 1.9153546150512786e-05, "loss": 3.0699, "step": 6361 }, { "epoch": 0.91, "learning_rate": 1.9124656940632673e-05, "loss": 2.9977, "step": 6362 }, { "epoch": 0.91, "learning_rate": 1.9095767730752563e-05, "loss": 3.0904, "step": 6363 }, { "epoch": 0.91, "learning_rate": 1.9066878520872457e-05, "loss": 3.2007, "step": 6364 }, { "epoch": 0.91, "learning_rate": 1.9037989310992347e-05, "loss": 3.0675, "step": 6365 }, { "epoch": 0.91, "learning_rate": 1.9009100101112234e-05, "loss": 2.9933, "step": 6366 }, { "epoch": 0.91, "learning_rate": 1.8980210891232125e-05, "loss": 3.0551, "step": 6367 }, { "epoch": 0.91, "learning_rate": 1.8951321681352015e-05, "loss": 3.1746, "step": 6368 }, { "epoch": 0.91, "learning_rate": 1.8922432471471906e-05, "loss": 2.9598, "step": 6369 }, { "epoch": 0.91, "learning_rate": 1.8893543261591796e-05, "loss": 2.9074, "step": 6370 }, { "epoch": 0.91, "learning_rate": 1.8864654051711687e-05, "loss": 3.0811, "step": 6371 }, { "epoch": 0.91, "learning_rate": 1.8835764841831577e-05, "loss": 2.9267, "step": 6372 }, { "epoch": 0.91, "learning_rate": 1.8806875631951468e-05, "loss": 2.8797, "step": 6373 }, { "epoch": 0.91, "learning_rate": 1.8777986422071358e-05, "loss": 2.917, "step": 6374 }, { "epoch": 0.91, "learning_rate": 1.8749097212191245e-05, "loss": 3.0835, "step": 6375 }, { "epoch": 0.91, "learning_rate": 1.872020800231114e-05, "loss": 2.9676, "step": 6376 }, { "epoch": 0.91, "learning_rate": 1.869131879243103e-05, "loss": 3.1162, "step": 6377 }, { "epoch": 0.91, "learning_rate": 1.866242958255092e-05, "loss": 3.0682, "step": 6378 }, { "epoch": 0.91, "learning_rate": 1.8633540372670807e-05, "loss": 3.166, "step": 6379 }, { "epoch": 0.91, "learning_rate": 1.8604651162790697e-05, "loss": 3.1365, "step": 6380 }, { "epoch": 0.91, "learning_rate": 1.8575761952910588e-05, "loss": 3.1739, "step": 6381 }, { "epoch": 0.91, "learning_rate": 1.854687274303048e-05, "loss": 3.0823, "step": 6382 }, { "epoch": 0.91, "learning_rate": 1.851798353315037e-05, "loss": 3.1357, "step": 6383 }, { "epoch": 0.91, "learning_rate": 1.848909432327026e-05, "loss": 3.0598, "step": 6384 }, { "epoch": 0.91, "learning_rate": 1.846020511339015e-05, "loss": 2.9985, "step": 6385 }, { "epoch": 0.91, "learning_rate": 1.843131590351004e-05, "loss": 3.013, "step": 6386 }, { "epoch": 0.91, "learning_rate": 1.840242669362993e-05, "loss": 3.0844, "step": 6387 }, { "epoch": 0.91, "learning_rate": 1.837353748374982e-05, "loss": 2.892, "step": 6388 }, { "epoch": 0.91, "learning_rate": 1.834464827386971e-05, "loss": 2.9809, "step": 6389 }, { "epoch": 0.91, "learning_rate": 1.83157590639896e-05, "loss": 3.1594, "step": 6390 }, { "epoch": 0.91, "learning_rate": 1.828686985410949e-05, "loss": 3.0109, "step": 6391 }, { "epoch": 0.91, "learning_rate": 1.825798064422938e-05, "loss": 3.0524, "step": 6392 }, { "epoch": 0.91, "learning_rate": 1.8229091434349273e-05, "loss": 3.11, "step": 6393 }, { "epoch": 0.91, "learning_rate": 1.8200202224469163e-05, "loss": 3.0801, "step": 6394 }, { "epoch": 0.91, "learning_rate": 1.8171313014589054e-05, "loss": 3.0253, "step": 6395 }, { "epoch": 0.91, "learning_rate": 1.814242380470894e-05, "loss": 3.1307, "step": 6396 }, { "epoch": 0.91, "learning_rate": 1.811353459482883e-05, "loss": 3.0491, "step": 6397 }, { "epoch": 0.91, "learning_rate": 1.8084645384948722e-05, "loss": 2.8271, "step": 6398 }, { "epoch": 0.91, "learning_rate": 1.8055756175068612e-05, "loss": 3.1495, "step": 6399 }, { "epoch": 0.91, "learning_rate": 1.8026866965188503e-05, "loss": 3.1997, "step": 6400 }, { "epoch": 0.91, "learning_rate": 1.7997977755308393e-05, "loss": 3.0777, "step": 6401 }, { "epoch": 0.91, "learning_rate": 1.7969088545428283e-05, "loss": 3.0433, "step": 6402 }, { "epoch": 0.91, "learning_rate": 1.7940199335548174e-05, "loss": 2.9814, "step": 6403 }, { "epoch": 0.91, "learning_rate": 1.7911310125668064e-05, "loss": 3.0668, "step": 6404 }, { "epoch": 0.91, "learning_rate": 1.7882420915787955e-05, "loss": 3.0224, "step": 6405 }, { "epoch": 0.91, "learning_rate": 1.7853531705907845e-05, "loss": 2.9634, "step": 6406 }, { "epoch": 0.91, "learning_rate": 1.7824642496027736e-05, "loss": 3.064, "step": 6407 }, { "epoch": 0.91, "learning_rate": 1.7795753286147623e-05, "loss": 3.1138, "step": 6408 }, { "epoch": 0.91, "learning_rate": 1.7766864076267513e-05, "loss": 2.8335, "step": 6409 }, { "epoch": 0.91, "learning_rate": 1.7737974866387404e-05, "loss": 3.0095, "step": 6410 }, { "epoch": 0.91, "learning_rate": 1.7709085656507297e-05, "loss": 3.1095, "step": 6411 }, { "epoch": 0.91, "learning_rate": 1.7680196446627188e-05, "loss": 2.9441, "step": 6412 }, { "epoch": 0.91, "learning_rate": 1.7651307236747075e-05, "loss": 3.0799, "step": 6413 }, { "epoch": 0.91, "learning_rate": 1.7622418026866965e-05, "loss": 3.0277, "step": 6414 }, { "epoch": 0.91, "learning_rate": 1.7593528816986856e-05, "loss": 2.9749, "step": 6415 }, { "epoch": 0.91, "learning_rate": 1.7564639607106746e-05, "loss": 3.1017, "step": 6416 }, { "epoch": 0.91, "learning_rate": 1.7535750397226637e-05, "loss": 3.1977, "step": 6417 }, { "epoch": 0.91, "learning_rate": 1.7506861187346527e-05, "loss": 3.0746, "step": 6418 }, { "epoch": 0.91, "learning_rate": 1.7477971977466418e-05, "loss": 3.0608, "step": 6419 }, { "epoch": 0.91, "learning_rate": 1.7449082767586308e-05, "loss": 3.1119, "step": 6420 }, { "epoch": 0.91, "learning_rate": 1.7420193557706195e-05, "loss": 2.9672, "step": 6421 }, { "epoch": 0.91, "learning_rate": 1.739130434782609e-05, "loss": 3.1024, "step": 6422 }, { "epoch": 0.91, "learning_rate": 1.736241513794598e-05, "loss": 3.1323, "step": 6423 }, { "epoch": 0.91, "learning_rate": 1.733352592806587e-05, "loss": 3.093, "step": 6424 }, { "epoch": 0.91, "learning_rate": 1.7304636718185757e-05, "loss": 3.0176, "step": 6425 }, { "epoch": 0.91, "learning_rate": 1.7275747508305647e-05, "loss": 3.1097, "step": 6426 }, { "epoch": 0.92, "learning_rate": 1.7246858298425538e-05, "loss": 2.881, "step": 6427 }, { "epoch": 0.92, "learning_rate": 1.721796908854543e-05, "loss": 3.1614, "step": 6428 }, { "epoch": 0.92, "learning_rate": 1.7189079878665322e-05, "loss": 3.0169, "step": 6429 }, { "epoch": 0.92, "learning_rate": 1.716019066878521e-05, "loss": 3.0446, "step": 6430 }, { "epoch": 0.92, "learning_rate": 1.71313014589051e-05, "loss": 2.9543, "step": 6431 }, { "epoch": 0.92, "learning_rate": 1.710241224902499e-05, "loss": 3.0359, "step": 6432 }, { "epoch": 0.92, "learning_rate": 1.707352303914488e-05, "loss": 3.0817, "step": 6433 }, { "epoch": 0.92, "learning_rate": 1.704463382926477e-05, "loss": 3.0606, "step": 6434 }, { "epoch": 0.92, "learning_rate": 1.701574461938466e-05, "loss": 3.0164, "step": 6435 }, { "epoch": 0.92, "learning_rate": 1.698685540950455e-05, "loss": 3.1532, "step": 6436 }, { "epoch": 0.92, "learning_rate": 1.6957966199624442e-05, "loss": 2.9962, "step": 6437 }, { "epoch": 0.92, "learning_rate": 1.692907698974433e-05, "loss": 3.1381, "step": 6438 }, { "epoch": 0.92, "learning_rate": 1.690018777986422e-05, "loss": 3.0888, "step": 6439 }, { "epoch": 0.92, "learning_rate": 1.6871298569984113e-05, "loss": 3.0824, "step": 6440 }, { "epoch": 0.92, "learning_rate": 1.6842409360104004e-05, "loss": 2.7761, "step": 6441 }, { "epoch": 0.92, "learning_rate": 1.681352015022389e-05, "loss": 3.1271, "step": 6442 }, { "epoch": 0.92, "learning_rate": 1.678463094034378e-05, "loss": 3.1044, "step": 6443 }, { "epoch": 0.92, "learning_rate": 1.675574173046367e-05, "loss": 3.0952, "step": 6444 }, { "epoch": 0.92, "learning_rate": 1.6726852520583562e-05, "loss": 2.8992, "step": 6445 }, { "epoch": 0.92, "learning_rate": 1.6697963310703456e-05, "loss": 3.1165, "step": 6446 }, { "epoch": 0.92, "learning_rate": 1.6669074100823343e-05, "loss": 3.2024, "step": 6447 }, { "epoch": 0.92, "learning_rate": 1.6640184890943233e-05, "loss": 2.7159, "step": 6448 }, { "epoch": 0.92, "learning_rate": 1.6611295681063124e-05, "loss": 3.1082, "step": 6449 }, { "epoch": 0.92, "learning_rate": 1.6582406471183014e-05, "loss": 3.1386, "step": 6450 }, { "epoch": 0.92, "learning_rate": 1.6553517261302905e-05, "loss": 3.0407, "step": 6451 }, { "epoch": 0.92, "learning_rate": 1.6524628051422795e-05, "loss": 3.0281, "step": 6452 }, { "epoch": 0.92, "learning_rate": 1.6495738841542686e-05, "loss": 3.0393, "step": 6453 }, { "epoch": 0.92, "learning_rate": 1.6466849631662576e-05, "loss": 3.0324, "step": 6454 }, { "epoch": 0.92, "learning_rate": 1.6437960421782463e-05, "loss": 2.9705, "step": 6455 }, { "epoch": 0.92, "learning_rate": 1.6409071211902354e-05, "loss": 2.9463, "step": 6456 }, { "epoch": 0.92, "learning_rate": 1.6380182002022247e-05, "loss": 3.0211, "step": 6457 }, { "epoch": 0.92, "learning_rate": 1.6351292792142138e-05, "loss": 2.9502, "step": 6458 }, { "epoch": 0.92, "learning_rate": 1.6322403582262025e-05, "loss": 2.9689, "step": 6459 }, { "epoch": 0.92, "learning_rate": 1.6293514372381915e-05, "loss": 3.1397, "step": 6460 }, { "epoch": 0.92, "learning_rate": 1.6264625162501806e-05, "loss": 3.0759, "step": 6461 }, { "epoch": 0.92, "learning_rate": 1.6235735952621696e-05, "loss": 2.9888, "step": 6462 }, { "epoch": 0.92, "learning_rate": 1.6206846742741587e-05, "loss": 3.0348, "step": 6463 }, { "epoch": 0.92, "learning_rate": 1.6177957532861477e-05, "loss": 3.0546, "step": 6464 }, { "epoch": 0.92, "learning_rate": 1.6149068322981367e-05, "loss": 3.0652, "step": 6465 }, { "epoch": 0.92, "learning_rate": 1.6120179113101258e-05, "loss": 2.9981, "step": 6466 }, { "epoch": 0.92, "learning_rate": 1.609128990322115e-05, "loss": 3.0046, "step": 6467 }, { "epoch": 0.92, "learning_rate": 1.6062400693341035e-05, "loss": 2.9306, "step": 6468 }, { "epoch": 0.92, "learning_rate": 1.603351148346093e-05, "loss": 3.0131, "step": 6469 }, { "epoch": 0.92, "learning_rate": 1.600462227358082e-05, "loss": 3.0597, "step": 6470 }, { "epoch": 0.92, "learning_rate": 1.597573306370071e-05, "loss": 2.9273, "step": 6471 }, { "epoch": 0.92, "learning_rate": 1.5946843853820597e-05, "loss": 3.0676, "step": 6472 }, { "epoch": 0.92, "learning_rate": 1.5917954643940488e-05, "loss": 3.0043, "step": 6473 }, { "epoch": 0.92, "learning_rate": 1.5889065434060378e-05, "loss": 3.0176, "step": 6474 }, { "epoch": 0.92, "learning_rate": 1.5860176224180272e-05, "loss": 3.1659, "step": 6475 }, { "epoch": 0.92, "learning_rate": 1.583128701430016e-05, "loss": 3.2128, "step": 6476 }, { "epoch": 0.92, "learning_rate": 1.580239780442005e-05, "loss": 3.0655, "step": 6477 }, { "epoch": 0.92, "learning_rate": 1.577350859453994e-05, "loss": 2.962, "step": 6478 }, { "epoch": 0.92, "learning_rate": 1.574461938465983e-05, "loss": 3.2248, "step": 6479 }, { "epoch": 0.92, "learning_rate": 1.571573017477972e-05, "loss": 3.1467, "step": 6480 }, { "epoch": 0.92, "learning_rate": 1.568684096489961e-05, "loss": 2.7129, "step": 6481 }, { "epoch": 0.92, "learning_rate": 1.56579517550195e-05, "loss": 3.1727, "step": 6482 }, { "epoch": 0.92, "learning_rate": 1.5629062545139392e-05, "loss": 2.9672, "step": 6483 }, { "epoch": 0.92, "learning_rate": 1.5600173335259282e-05, "loss": 3.0074, "step": 6484 }, { "epoch": 0.92, "learning_rate": 1.557128412537917e-05, "loss": 3.0807, "step": 6485 }, { "epoch": 0.92, "learning_rate": 1.5542394915499063e-05, "loss": 3.1327, "step": 6486 }, { "epoch": 0.92, "learning_rate": 1.5513505705618954e-05, "loss": 2.9074, "step": 6487 }, { "epoch": 0.92, "learning_rate": 1.5484616495738844e-05, "loss": 3.0862, "step": 6488 }, { "epoch": 0.92, "learning_rate": 1.545572728585873e-05, "loss": 3.0671, "step": 6489 }, { "epoch": 0.92, "learning_rate": 1.542683807597862e-05, "loss": 3.0044, "step": 6490 }, { "epoch": 0.92, "learning_rate": 1.5397948866098512e-05, "loss": 3.1161, "step": 6491 }, { "epoch": 0.92, "learning_rate": 1.5369059656218402e-05, "loss": 2.9369, "step": 6492 }, { "epoch": 0.92, "learning_rate": 1.5340170446338293e-05, "loss": 3.1136, "step": 6493 }, { "epoch": 0.92, "learning_rate": 1.5311281236458183e-05, "loss": 3.0749, "step": 6494 }, { "epoch": 0.92, "learning_rate": 1.5282392026578074e-05, "loss": 3.1212, "step": 6495 }, { "epoch": 0.92, "learning_rate": 1.5253502816697964e-05, "loss": 3.0919, "step": 6496 }, { "epoch": 0.93, "learning_rate": 1.5224613606817853e-05, "loss": 3.0911, "step": 6497 }, { "epoch": 0.93, "learning_rate": 1.5195724396937743e-05, "loss": 3.2119, "step": 6498 }, { "epoch": 0.93, "learning_rate": 1.5166835187057636e-05, "loss": 3.1722, "step": 6499 }, { "epoch": 0.93, "learning_rate": 1.5137945977177526e-05, "loss": 3.1116, "step": 6500 }, { "epoch": 0.93, "learning_rate": 1.5109056767297416e-05, "loss": 2.9683, "step": 6501 }, { "epoch": 0.93, "learning_rate": 1.5080167557417305e-05, "loss": 3.0567, "step": 6502 }, { "epoch": 0.93, "learning_rate": 1.5051278347537196e-05, "loss": 3.0653, "step": 6503 }, { "epoch": 0.93, "learning_rate": 1.5022389137657086e-05, "loss": 2.9544, "step": 6504 }, { "epoch": 0.93, "learning_rate": 1.4993499927776976e-05, "loss": 3.0834, "step": 6505 }, { "epoch": 0.93, "learning_rate": 1.4964610717896865e-05, "loss": 3.1934, "step": 6506 }, { "epoch": 0.93, "learning_rate": 1.4935721508016756e-05, "loss": 3.0452, "step": 6507 }, { "epoch": 0.93, "learning_rate": 1.4906832298136648e-05, "loss": 3.1946, "step": 6508 }, { "epoch": 0.93, "learning_rate": 1.4877943088256538e-05, "loss": 2.9808, "step": 6509 }, { "epoch": 0.93, "learning_rate": 1.4849053878376425e-05, "loss": 3.0162, "step": 6510 }, { "epoch": 0.93, "learning_rate": 1.4820164668496317e-05, "loss": 3.0212, "step": 6511 }, { "epoch": 0.93, "learning_rate": 1.4791275458616208e-05, "loss": 2.8753, "step": 6512 }, { "epoch": 0.93, "learning_rate": 1.4762386248736098e-05, "loss": 2.995, "step": 6513 }, { "epoch": 0.93, "learning_rate": 1.4733497038855987e-05, "loss": 2.9755, "step": 6514 }, { "epoch": 0.93, "learning_rate": 1.4704607828975877e-05, "loss": 2.9062, "step": 6515 }, { "epoch": 0.93, "learning_rate": 1.4675718619095768e-05, "loss": 3.1349, "step": 6516 }, { "epoch": 0.93, "learning_rate": 1.464682940921566e-05, "loss": 2.9564, "step": 6517 }, { "epoch": 0.93, "learning_rate": 1.461794019933555e-05, "loss": 3.0887, "step": 6518 }, { "epoch": 0.93, "learning_rate": 1.4589050989455438e-05, "loss": 3.1461, "step": 6519 }, { "epoch": 0.93, "learning_rate": 1.456016177957533e-05, "loss": 2.9868, "step": 6520 }, { "epoch": 0.93, "learning_rate": 1.453127256969522e-05, "loss": 2.8988, "step": 6521 }, { "epoch": 0.93, "learning_rate": 1.450238335981511e-05, "loss": 2.965, "step": 6522 }, { "epoch": 0.93, "learning_rate": 1.4473494149935e-05, "loss": 2.9583, "step": 6523 }, { "epoch": 0.93, "learning_rate": 1.444460494005489e-05, "loss": 3.0323, "step": 6524 }, { "epoch": 0.93, "learning_rate": 1.441571573017478e-05, "loss": 3.0841, "step": 6525 }, { "epoch": 0.93, "learning_rate": 1.4386826520294672e-05, "loss": 3.0853, "step": 6526 }, { "epoch": 0.93, "learning_rate": 1.435793731041456e-05, "loss": 3.1281, "step": 6527 }, { "epoch": 0.93, "learning_rate": 1.4329048100534451e-05, "loss": 2.9758, "step": 6528 }, { "epoch": 0.93, "learning_rate": 1.4300158890654342e-05, "loss": 3.1012, "step": 6529 }, { "epoch": 0.93, "learning_rate": 1.4271269680774232e-05, "loss": 3.1736, "step": 6530 }, { "epoch": 0.93, "learning_rate": 1.4242380470894121e-05, "loss": 2.9168, "step": 6531 }, { "epoch": 0.93, "learning_rate": 1.4213491261014012e-05, "loss": 2.9395, "step": 6532 }, { "epoch": 0.93, "learning_rate": 1.4184602051133902e-05, "loss": 3.179, "step": 6533 }, { "epoch": 0.93, "learning_rate": 1.4155712841253792e-05, "loss": 3.0333, "step": 6534 }, { "epoch": 0.93, "learning_rate": 1.4126823631373681e-05, "loss": 3.0926, "step": 6535 }, { "epoch": 0.93, "learning_rate": 1.4097934421493572e-05, "loss": 3.0646, "step": 6536 }, { "epoch": 0.93, "learning_rate": 1.4069045211613464e-05, "loss": 3.0286, "step": 6537 }, { "epoch": 0.93, "learning_rate": 1.4040156001733354e-05, "loss": 3.095, "step": 6538 }, { "epoch": 0.93, "learning_rate": 1.4011266791853245e-05, "loss": 3.0268, "step": 6539 }, { "epoch": 0.93, "learning_rate": 1.3982377581973133e-05, "loss": 3.0744, "step": 6540 }, { "epoch": 0.93, "learning_rate": 1.3953488372093024e-05, "loss": 3.1404, "step": 6541 }, { "epoch": 0.93, "learning_rate": 1.3924599162212914e-05, "loss": 3.1033, "step": 6542 }, { "epoch": 0.93, "learning_rate": 1.3895709952332805e-05, "loss": 3.0797, "step": 6543 }, { "epoch": 0.93, "learning_rate": 1.3866820742452693e-05, "loss": 3.1705, "step": 6544 }, { "epoch": 0.93, "learning_rate": 1.3837931532572584e-05, "loss": 3.0224, "step": 6545 }, { "epoch": 0.93, "learning_rate": 1.3809042322692476e-05, "loss": 2.8371, "step": 6546 }, { "epoch": 0.93, "learning_rate": 1.3780153112812366e-05, "loss": 2.915, "step": 6547 }, { "epoch": 0.93, "learning_rate": 1.3751263902932255e-05, "loss": 2.9611, "step": 6548 }, { "epoch": 0.93, "learning_rate": 1.3722374693052146e-05, "loss": 3.1396, "step": 6549 }, { "epoch": 0.93, "learning_rate": 1.3693485483172036e-05, "loss": 2.9345, "step": 6550 }, { "epoch": 0.93, "learning_rate": 1.3664596273291926e-05, "loss": 3.0005, "step": 6551 }, { "epoch": 0.93, "learning_rate": 1.3635707063411815e-05, "loss": 2.9932, "step": 6552 }, { "epoch": 0.93, "learning_rate": 1.3606817853531706e-05, "loss": 3.0044, "step": 6553 }, { "epoch": 0.93, "learning_rate": 1.3577928643651596e-05, "loss": 2.9093, "step": 6554 }, { "epoch": 0.93, "learning_rate": 1.3549039433771488e-05, "loss": 3.0998, "step": 6555 }, { "epoch": 0.93, "learning_rate": 1.3520150223891379e-05, "loss": 3.0678, "step": 6556 }, { "epoch": 0.93, "learning_rate": 1.3491261014011267e-05, "loss": 3.1757, "step": 6557 }, { "epoch": 0.93, "learning_rate": 1.3462371804131158e-05, "loss": 2.8707, "step": 6558 }, { "epoch": 0.93, "learning_rate": 1.3433482594251048e-05, "loss": 3.0569, "step": 6559 }, { "epoch": 0.93, "learning_rate": 1.3404593384370939e-05, "loss": 3.0525, "step": 6560 }, { "epoch": 0.93, "learning_rate": 1.3375704174490827e-05, "loss": 3.019, "step": 6561 }, { "epoch": 0.93, "learning_rate": 1.3346814964610718e-05, "loss": 3.0802, "step": 6562 }, { "epoch": 0.93, "learning_rate": 1.3317925754730608e-05, "loss": 3.1213, "step": 6563 }, { "epoch": 0.93, "learning_rate": 1.32890365448505e-05, "loss": 3.1184, "step": 6564 }, { "epoch": 0.93, "learning_rate": 1.3260147334970387e-05, "loss": 3.0327, "step": 6565 }, { "epoch": 0.93, "learning_rate": 1.323125812509028e-05, "loss": 3.0315, "step": 6566 }, { "epoch": 0.94, "learning_rate": 1.320236891521017e-05, "loss": 3.0842, "step": 6567 }, { "epoch": 0.94, "learning_rate": 1.317347970533006e-05, "loss": 3.0889, "step": 6568 }, { "epoch": 0.94, "learning_rate": 1.314459049544995e-05, "loss": 3.0679, "step": 6569 }, { "epoch": 0.94, "learning_rate": 1.311570128556984e-05, "loss": 3.0597, "step": 6570 }, { "epoch": 0.94, "learning_rate": 1.308681207568973e-05, "loss": 2.9619, "step": 6571 }, { "epoch": 0.94, "learning_rate": 1.3057922865809622e-05, "loss": 3.0097, "step": 6572 }, { "epoch": 0.94, "learning_rate": 1.3029033655929513e-05, "loss": 3.1552, "step": 6573 }, { "epoch": 0.94, "learning_rate": 1.30001444460494e-05, "loss": 3.0397, "step": 6574 }, { "epoch": 0.94, "learning_rate": 1.2971255236169292e-05, "loss": 3.1342, "step": 6575 }, { "epoch": 0.94, "learning_rate": 1.2942366026289182e-05, "loss": 3.0991, "step": 6576 }, { "epoch": 0.94, "learning_rate": 1.2913476816409073e-05, "loss": 3.0415, "step": 6577 }, { "epoch": 0.94, "learning_rate": 1.2884587606528961e-05, "loss": 3.0041, "step": 6578 }, { "epoch": 0.94, "learning_rate": 1.2855698396648852e-05, "loss": 2.9867, "step": 6579 }, { "epoch": 0.94, "learning_rate": 1.2826809186768742e-05, "loss": 2.9954, "step": 6580 }, { "epoch": 0.94, "learning_rate": 1.2797919976888634e-05, "loss": 3.0415, "step": 6581 }, { "epoch": 0.94, "learning_rate": 1.2769030767008522e-05, "loss": 3.0023, "step": 6582 }, { "epoch": 0.94, "learning_rate": 1.2740141557128412e-05, "loss": 2.8956, "step": 6583 }, { "epoch": 0.94, "learning_rate": 1.2711252347248304e-05, "loss": 2.8938, "step": 6584 }, { "epoch": 0.94, "learning_rate": 1.2682363137368195e-05, "loss": 3.0022, "step": 6585 }, { "epoch": 0.94, "learning_rate": 1.2653473927488083e-05, "loss": 3.0178, "step": 6586 }, { "epoch": 0.94, "learning_rate": 1.2624584717607974e-05, "loss": 3.0648, "step": 6587 }, { "epoch": 0.94, "learning_rate": 1.2595695507727864e-05, "loss": 3.0671, "step": 6588 }, { "epoch": 0.94, "learning_rate": 1.2566806297847755e-05, "loss": 3.0802, "step": 6589 }, { "epoch": 0.94, "learning_rate": 1.2537917087967643e-05, "loss": 3.0391, "step": 6590 }, { "epoch": 0.94, "learning_rate": 1.2509027878087534e-05, "loss": 3.1359, "step": 6591 }, { "epoch": 0.94, "learning_rate": 1.2480138668207424e-05, "loss": 2.9749, "step": 6592 }, { "epoch": 0.94, "learning_rate": 1.2451249458327316e-05, "loss": 3.0734, "step": 6593 }, { "epoch": 0.94, "learning_rate": 1.2422360248447205e-05, "loss": 3.0312, "step": 6594 }, { "epoch": 0.94, "learning_rate": 1.2393471038567096e-05, "loss": 3.1918, "step": 6595 }, { "epoch": 0.94, "learning_rate": 1.2364581828686986e-05, "loss": 2.9507, "step": 6596 }, { "epoch": 0.94, "learning_rate": 1.2335692618806876e-05, "loss": 3.1098, "step": 6597 }, { "epoch": 0.94, "learning_rate": 1.2306803408926767e-05, "loss": 2.9422, "step": 6598 }, { "epoch": 0.94, "learning_rate": 1.2277914199046657e-05, "loss": 3.2444, "step": 6599 }, { "epoch": 0.94, "learning_rate": 1.2249024989166546e-05, "loss": 3.0561, "step": 6600 }, { "epoch": 0.94, "learning_rate": 1.2220135779286438e-05, "loss": 3.0919, "step": 6601 }, { "epoch": 0.94, "learning_rate": 1.2191246569406327e-05, "loss": 2.9444, "step": 6602 }, { "epoch": 0.94, "learning_rate": 1.2162357359526217e-05, "loss": 3.1651, "step": 6603 }, { "epoch": 0.94, "learning_rate": 1.2133468149646108e-05, "loss": 3.0717, "step": 6604 }, { "epoch": 0.94, "learning_rate": 1.2104578939765998e-05, "loss": 2.861, "step": 6605 }, { "epoch": 0.94, "learning_rate": 1.2075689729885887e-05, "loss": 3.1206, "step": 6606 }, { "epoch": 0.94, "learning_rate": 1.2046800520005779e-05, "loss": 3.0615, "step": 6607 }, { "epoch": 0.94, "learning_rate": 1.2017911310125668e-05, "loss": 3.1829, "step": 6608 }, { "epoch": 0.94, "learning_rate": 1.1989022100245558e-05, "loss": 3.0526, "step": 6609 }, { "epoch": 0.94, "learning_rate": 1.196013289036545e-05, "loss": 3.0154, "step": 6610 }, { "epoch": 0.94, "learning_rate": 1.1931243680485339e-05, "loss": 3.0023, "step": 6611 }, { "epoch": 0.94, "learning_rate": 1.190235447060523e-05, "loss": 2.9746, "step": 6612 }, { "epoch": 0.94, "learning_rate": 1.187346526072512e-05, "loss": 3.0954, "step": 6613 }, { "epoch": 0.94, "learning_rate": 1.184457605084501e-05, "loss": 3.0068, "step": 6614 }, { "epoch": 0.94, "learning_rate": 1.18156868409649e-05, "loss": 3.1252, "step": 6615 }, { "epoch": 0.94, "learning_rate": 1.1786797631084791e-05, "loss": 3.038, "step": 6616 }, { "epoch": 0.94, "learning_rate": 1.175790842120468e-05, "loss": 3.0386, "step": 6617 }, { "epoch": 0.94, "learning_rate": 1.172901921132457e-05, "loss": 3.0446, "step": 6618 }, { "epoch": 0.94, "learning_rate": 1.1700130001444461e-05, "loss": 3.0548, "step": 6619 }, { "epoch": 0.94, "learning_rate": 1.1671240791564351e-05, "loss": 3.117, "step": 6620 }, { "epoch": 0.94, "learning_rate": 1.1642351581684242e-05, "loss": 2.9915, "step": 6621 }, { "epoch": 0.94, "learning_rate": 1.1613462371804132e-05, "loss": 3.1946, "step": 6622 }, { "epoch": 0.94, "learning_rate": 1.1584573161924021e-05, "loss": 2.9643, "step": 6623 }, { "epoch": 0.94, "learning_rate": 1.1555683952043911e-05, "loss": 3.0892, "step": 6624 }, { "epoch": 0.94, "learning_rate": 1.1526794742163802e-05, "loss": 2.959, "step": 6625 }, { "epoch": 0.94, "learning_rate": 1.1497905532283692e-05, "loss": 2.9867, "step": 6626 }, { "epoch": 0.94, "learning_rate": 1.1469016322403583e-05, "loss": 3.0531, "step": 6627 }, { "epoch": 0.94, "learning_rate": 1.1440127112523473e-05, "loss": 3.1347, "step": 6628 }, { "epoch": 0.94, "learning_rate": 1.1411237902643364e-05, "loss": 3.1322, "step": 6629 }, { "epoch": 0.94, "learning_rate": 1.1382348692763254e-05, "loss": 2.8694, "step": 6630 }, { "epoch": 0.94, "learning_rate": 1.1353459482883144e-05, "loss": 3.0304, "step": 6631 }, { "epoch": 0.94, "learning_rate": 1.1324570273003033e-05, "loss": 3.0735, "step": 6632 }, { "epoch": 0.94, "learning_rate": 1.1295681063122925e-05, "loss": 3.0781, "step": 6633 }, { "epoch": 0.94, "learning_rate": 1.1266791853242814e-05, "loss": 3.2113, "step": 6634 }, { "epoch": 0.94, "learning_rate": 1.1237902643362705e-05, "loss": 3.1034, "step": 6635 }, { "epoch": 0.94, "learning_rate": 1.1209013433482595e-05, "loss": 3.0906, "step": 6636 }, { "epoch": 0.94, "learning_rate": 1.1180124223602485e-05, "loss": 3.0979, "step": 6637 }, { "epoch": 0.95, "learning_rate": 1.1151235013722374e-05, "loss": 3.0023, "step": 6638 }, { "epoch": 0.95, "learning_rate": 1.1122345803842266e-05, "loss": 3.1516, "step": 6639 }, { "epoch": 0.95, "learning_rate": 1.1093456593962155e-05, "loss": 3.141, "step": 6640 }, { "epoch": 0.95, "learning_rate": 1.1064567384082045e-05, "loss": 3.0904, "step": 6641 }, { "epoch": 0.95, "learning_rate": 1.1035678174201936e-05, "loss": 3.1283, "step": 6642 }, { "epoch": 0.95, "learning_rate": 1.1006788964321826e-05, "loss": 2.9994, "step": 6643 }, { "epoch": 0.95, "learning_rate": 1.0977899754441715e-05, "loss": 3.1035, "step": 6644 }, { "epoch": 0.95, "learning_rate": 1.0949010544561607e-05, "loss": 3.1685, "step": 6645 }, { "epoch": 0.95, "learning_rate": 1.0920121334681498e-05, "loss": 2.9159, "step": 6646 }, { "epoch": 0.95, "learning_rate": 1.0891232124801386e-05, "loss": 3.0004, "step": 6647 }, { "epoch": 0.95, "learning_rate": 1.0862342914921279e-05, "loss": 3.0733, "step": 6648 }, { "epoch": 0.95, "learning_rate": 1.0833453705041167e-05, "loss": 2.9666, "step": 6649 }, { "epoch": 0.95, "learning_rate": 1.0804564495161058e-05, "loss": 3.0647, "step": 6650 }, { "epoch": 0.95, "learning_rate": 1.0775675285280948e-05, "loss": 3.0639, "step": 6651 }, { "epoch": 0.95, "learning_rate": 1.0746786075400839e-05, "loss": 3.1523, "step": 6652 }, { "epoch": 0.95, "learning_rate": 1.0717896865520727e-05, "loss": 3.0446, "step": 6653 }, { "epoch": 0.95, "learning_rate": 1.068900765564062e-05, "loss": 2.9252, "step": 6654 }, { "epoch": 0.95, "learning_rate": 1.0660118445760508e-05, "loss": 3.0308, "step": 6655 }, { "epoch": 0.95, "learning_rate": 1.0631229235880399e-05, "loss": 3.2152, "step": 6656 }, { "epoch": 0.95, "learning_rate": 1.0602340026000289e-05, "loss": 3.0464, "step": 6657 }, { "epoch": 0.95, "learning_rate": 1.057345081612018e-05, "loss": 3.028, "step": 6658 }, { "epoch": 0.95, "learning_rate": 1.054456160624007e-05, "loss": 3.1068, "step": 6659 }, { "epoch": 0.95, "learning_rate": 1.051567239635996e-05, "loss": 2.9705, "step": 6660 }, { "epoch": 0.95, "learning_rate": 1.0486783186479849e-05, "loss": 2.9226, "step": 6661 }, { "epoch": 0.95, "learning_rate": 1.0457893976599741e-05, "loss": 2.8683, "step": 6662 }, { "epoch": 0.95, "learning_rate": 1.0429004766719632e-05, "loss": 3.086, "step": 6663 }, { "epoch": 0.95, "learning_rate": 1.040011555683952e-05, "loss": 2.85, "step": 6664 }, { "epoch": 0.95, "learning_rate": 1.0371226346959413e-05, "loss": 3.0683, "step": 6665 }, { "epoch": 0.95, "learning_rate": 1.0342337137079301e-05, "loss": 3.0026, "step": 6666 }, { "epoch": 0.95, "learning_rate": 1.0313447927199192e-05, "loss": 3.0649, "step": 6667 }, { "epoch": 0.95, "learning_rate": 1.0284558717319082e-05, "loss": 3.0641, "step": 6668 }, { "epoch": 0.95, "learning_rate": 1.0255669507438973e-05, "loss": 3.0668, "step": 6669 }, { "epoch": 0.95, "learning_rate": 1.0226780297558861e-05, "loss": 3.0345, "step": 6670 }, { "epoch": 0.95, "learning_rate": 1.0197891087678754e-05, "loss": 3.0709, "step": 6671 }, { "epoch": 0.95, "learning_rate": 1.0169001877798642e-05, "loss": 2.8739, "step": 6672 }, { "epoch": 0.95, "learning_rate": 1.0140112667918533e-05, "loss": 3.0179, "step": 6673 }, { "epoch": 0.95, "learning_rate": 1.0111223458038423e-05, "loss": 3.0285, "step": 6674 }, { "epoch": 0.95, "learning_rate": 1.0082334248158314e-05, "loss": 3.0476, "step": 6675 }, { "epoch": 0.95, "learning_rate": 1.0053445038278202e-05, "loss": 3.0647, "step": 6676 }, { "epoch": 0.95, "learning_rate": 1.0024555828398094e-05, "loss": 3.0383, "step": 6677 }, { "epoch": 0.95, "learning_rate": 9.995666618517983e-06, "loss": 3.0514, "step": 6678 }, { "epoch": 0.95, "learning_rate": 9.966777408637874e-06, "loss": 3.198, "step": 6679 }, { "epoch": 0.95, "learning_rate": 9.937888198757764e-06, "loss": 3.1583, "step": 6680 }, { "epoch": 0.95, "learning_rate": 9.908998988877655e-06, "loss": 2.9471, "step": 6681 }, { "epoch": 0.95, "learning_rate": 9.880109778997545e-06, "loss": 3.0897, "step": 6682 }, { "epoch": 0.95, "learning_rate": 9.851220569117435e-06, "loss": 3.1209, "step": 6683 }, { "epoch": 0.95, "learning_rate": 9.822331359237326e-06, "loss": 2.9804, "step": 6684 }, { "epoch": 0.95, "learning_rate": 9.793442149357215e-06, "loss": 2.9855, "step": 6685 }, { "epoch": 0.95, "learning_rate": 9.764552939477107e-06, "loss": 3.0023, "step": 6686 }, { "epoch": 0.95, "learning_rate": 9.735663729596995e-06, "loss": 3.104, "step": 6687 }, { "epoch": 0.95, "learning_rate": 9.706774519716886e-06, "loss": 3.1989, "step": 6688 }, { "epoch": 0.95, "eval_loss": 3.2940800189971924, "eval_runtime": 471.7893, "eval_samples_per_second": 43.424, "eval_steps_per_second": 14.475, "step": 6688 }, { "epoch": 0.95, "learning_rate": 9.677885309836776e-06, "loss": 3.0624, "step": 6689 }, { "epoch": 0.95, "learning_rate": 9.648996099956667e-06, "loss": 3.0859, "step": 6690 }, { "epoch": 0.95, "learning_rate": 9.620106890076557e-06, "loss": 3.1474, "step": 6691 }, { "epoch": 0.95, "learning_rate": 9.591217680196448e-06, "loss": 2.9431, "step": 6692 }, { "epoch": 0.95, "learning_rate": 9.562328470316336e-06, "loss": 3.0516, "step": 6693 }, { "epoch": 0.95, "learning_rate": 9.533439260436229e-06, "loss": 3.0875, "step": 6694 }, { "epoch": 0.95, "learning_rate": 9.504550050556117e-06, "loss": 3.0427, "step": 6695 }, { "epoch": 0.95, "learning_rate": 9.475660840676008e-06, "loss": 2.9797, "step": 6696 }, { "epoch": 0.95, "learning_rate": 9.446771630795898e-06, "loss": 3.033, "step": 6697 }, { "epoch": 0.95, "learning_rate": 9.417882420915789e-06, "loss": 2.9754, "step": 6698 }, { "epoch": 0.95, "learning_rate": 9.388993211035679e-06, "loss": 2.9346, "step": 6699 }, { "epoch": 0.95, "learning_rate": 9.36010400115557e-06, "loss": 3.1591, "step": 6700 }, { "epoch": 0.95, "learning_rate": 9.33121479127546e-06, "loss": 3.022, "step": 6701 }, { "epoch": 0.95, "learning_rate": 9.302325581395349e-06, "loss": 3.0103, "step": 6702 }, { "epoch": 0.95, "learning_rate": 9.27343637151524e-06, "loss": 3.0677, "step": 6703 }, { "epoch": 0.95, "learning_rate": 9.24454716163513e-06, "loss": 3.1395, "step": 6704 }, { "epoch": 0.95, "learning_rate": 9.21565795175502e-06, "loss": 2.9199, "step": 6705 }, { "epoch": 0.95, "learning_rate": 9.18676874187491e-06, "loss": 3.1351, "step": 6706 }, { "epoch": 0.95, "learning_rate": 9.1578795319948e-06, "loss": 3.0665, "step": 6707 }, { "epoch": 0.96, "learning_rate": 9.12899032211469e-06, "loss": 3.0258, "step": 6708 }, { "epoch": 0.96, "learning_rate": 9.100101112234582e-06, "loss": 3.0946, "step": 6709 }, { "epoch": 0.96, "learning_rate": 9.07121190235447e-06, "loss": 3.0898, "step": 6710 }, { "epoch": 0.96, "learning_rate": 9.042322692474361e-06, "loss": 2.9603, "step": 6711 }, { "epoch": 0.96, "learning_rate": 9.013433482594251e-06, "loss": 3.1076, "step": 6712 }, { "epoch": 0.96, "learning_rate": 8.984544272714142e-06, "loss": 3.1297, "step": 6713 }, { "epoch": 0.96, "learning_rate": 8.955655062834032e-06, "loss": 3.1439, "step": 6714 }, { "epoch": 0.96, "learning_rate": 8.926765852953923e-06, "loss": 2.9976, "step": 6715 }, { "epoch": 0.96, "learning_rate": 8.897876643073811e-06, "loss": 3.1051, "step": 6716 }, { "epoch": 0.96, "learning_rate": 8.868987433193702e-06, "loss": 2.979, "step": 6717 }, { "epoch": 0.96, "learning_rate": 8.840098223313594e-06, "loss": 3.129, "step": 6718 }, { "epoch": 0.96, "learning_rate": 8.811209013433483e-06, "loss": 2.9428, "step": 6719 }, { "epoch": 0.96, "learning_rate": 8.782319803553373e-06, "loss": 3.0957, "step": 6720 }, { "epoch": 0.96, "learning_rate": 8.753430593673264e-06, "loss": 3.0369, "step": 6721 }, { "epoch": 0.96, "learning_rate": 8.724541383793154e-06, "loss": 3.0829, "step": 6722 }, { "epoch": 0.96, "learning_rate": 8.695652173913044e-06, "loss": 3.1139, "step": 6723 }, { "epoch": 0.96, "learning_rate": 8.666762964032935e-06, "loss": 2.9817, "step": 6724 }, { "epoch": 0.96, "learning_rate": 8.637873754152824e-06, "loss": 3.0779, "step": 6725 }, { "epoch": 0.96, "learning_rate": 8.608984544272716e-06, "loss": 3.1538, "step": 6726 }, { "epoch": 0.96, "learning_rate": 8.580095334392604e-06, "loss": 3.0275, "step": 6727 }, { "epoch": 0.96, "learning_rate": 8.551206124512495e-06, "loss": 2.9255, "step": 6728 }, { "epoch": 0.96, "learning_rate": 8.522316914632385e-06, "loss": 2.8025, "step": 6729 }, { "epoch": 0.96, "learning_rate": 8.493427704752276e-06, "loss": 3.1091, "step": 6730 }, { "epoch": 0.96, "learning_rate": 8.464538494872165e-06, "loss": 3.1142, "step": 6731 }, { "epoch": 0.96, "learning_rate": 8.435649284992057e-06, "loss": 3.0966, "step": 6732 }, { "epoch": 0.96, "learning_rate": 8.406760075111945e-06, "loss": 3.0464, "step": 6733 }, { "epoch": 0.96, "learning_rate": 8.377870865231836e-06, "loss": 3.0345, "step": 6734 }, { "epoch": 0.96, "learning_rate": 8.348981655351728e-06, "loss": 3.0332, "step": 6735 }, { "epoch": 0.96, "learning_rate": 8.320092445471617e-06, "loss": 2.8829, "step": 6736 }, { "epoch": 0.96, "learning_rate": 8.291203235591507e-06, "loss": 3.0757, "step": 6737 }, { "epoch": 0.96, "learning_rate": 8.262314025711398e-06, "loss": 3.1809, "step": 6738 }, { "epoch": 0.96, "learning_rate": 8.233424815831288e-06, "loss": 3.0978, "step": 6739 }, { "epoch": 0.96, "learning_rate": 8.204535605951177e-06, "loss": 3.0285, "step": 6740 }, { "epoch": 0.96, "learning_rate": 8.175646396071069e-06, "loss": 3.2434, "step": 6741 }, { "epoch": 0.96, "learning_rate": 8.146757186190958e-06, "loss": 2.9622, "step": 6742 }, { "epoch": 0.96, "learning_rate": 8.117867976310848e-06, "loss": 2.9389, "step": 6743 }, { "epoch": 0.96, "learning_rate": 8.088978766430739e-06, "loss": 3.2191, "step": 6744 }, { "epoch": 0.96, "learning_rate": 8.060089556550629e-06, "loss": 3.0754, "step": 6745 }, { "epoch": 0.96, "learning_rate": 8.031200346670518e-06, "loss": 3.0331, "step": 6746 }, { "epoch": 0.96, "learning_rate": 8.00231113679041e-06, "loss": 3.0158, "step": 6747 }, { "epoch": 0.96, "learning_rate": 7.973421926910299e-06, "loss": 3.0167, "step": 6748 }, { "epoch": 0.96, "learning_rate": 7.944532717030189e-06, "loss": 3.1016, "step": 6749 }, { "epoch": 0.96, "learning_rate": 7.91564350715008e-06, "loss": 2.97, "step": 6750 }, { "epoch": 0.96, "learning_rate": 7.88675429726997e-06, "loss": 3.0349, "step": 6751 }, { "epoch": 0.96, "learning_rate": 7.85786508738986e-06, "loss": 3.0124, "step": 6752 }, { "epoch": 0.96, "learning_rate": 7.82897587750975e-06, "loss": 3.0379, "step": 6753 }, { "epoch": 0.96, "learning_rate": 7.800086667629641e-06, "loss": 3.1907, "step": 6754 }, { "epoch": 0.96, "learning_rate": 7.771197457749532e-06, "loss": 3.0362, "step": 6755 }, { "epoch": 0.96, "learning_rate": 7.742308247869422e-06, "loss": 3.059, "step": 6756 }, { "epoch": 0.96, "learning_rate": 7.71341903798931e-06, "loss": 3.0152, "step": 6757 }, { "epoch": 0.96, "learning_rate": 7.684529828109201e-06, "loss": 3.0157, "step": 6758 }, { "epoch": 0.96, "learning_rate": 7.655640618229092e-06, "loss": 3.0653, "step": 6759 }, { "epoch": 0.96, "learning_rate": 7.626751408348982e-06, "loss": 3.1621, "step": 6760 }, { "epoch": 0.96, "learning_rate": 7.597862198468872e-06, "loss": 3.0576, "step": 6761 }, { "epoch": 0.96, "learning_rate": 7.568972988588763e-06, "loss": 3.1017, "step": 6762 }, { "epoch": 0.96, "learning_rate": 7.540083778708653e-06, "loss": 3.0208, "step": 6763 }, { "epoch": 0.96, "learning_rate": 7.511194568828543e-06, "loss": 3.0832, "step": 6764 }, { "epoch": 0.96, "learning_rate": 7.482305358948433e-06, "loss": 3.0535, "step": 6765 }, { "epoch": 0.96, "learning_rate": 7.453416149068324e-06, "loss": 2.9272, "step": 6766 }, { "epoch": 0.96, "learning_rate": 7.424526939188213e-06, "loss": 3.0445, "step": 6767 }, { "epoch": 0.96, "learning_rate": 7.395637729308104e-06, "loss": 3.1716, "step": 6768 }, { "epoch": 0.96, "learning_rate": 7.3667485194279935e-06, "loss": 2.9934, "step": 6769 }, { "epoch": 0.96, "learning_rate": 7.337859309547884e-06, "loss": 3.1079, "step": 6770 }, { "epoch": 0.96, "learning_rate": 7.308970099667775e-06, "loss": 2.8938, "step": 6771 }, { "epoch": 0.96, "learning_rate": 7.280080889787665e-06, "loss": 2.8871, "step": 6772 }, { "epoch": 0.96, "learning_rate": 7.251191679907555e-06, "loss": 2.9292, "step": 6773 }, { "epoch": 0.96, "learning_rate": 7.222302470027445e-06, "loss": 3.0288, "step": 6774 }, { "epoch": 0.96, "learning_rate": 7.193413260147336e-06, "loss": 3.0358, "step": 6775 }, { "epoch": 0.96, "learning_rate": 7.164524050267226e-06, "loss": 3.0653, "step": 6776 }, { "epoch": 0.96, "learning_rate": 7.135634840387116e-06, "loss": 3.0944, "step": 6777 }, { "epoch": 0.97, "learning_rate": 7.106745630507006e-06, "loss": 3.1736, "step": 6778 }, { "epoch": 0.97, "learning_rate": 7.077856420626896e-06, "loss": 2.9908, "step": 6779 }, { "epoch": 0.97, "learning_rate": 7.048967210746786e-06, "loss": 2.9514, "step": 6780 }, { "epoch": 0.97, "learning_rate": 7.020078000866677e-06, "loss": 2.9189, "step": 6781 }, { "epoch": 0.97, "learning_rate": 6.991188790986567e-06, "loss": 3.1035, "step": 6782 }, { "epoch": 0.97, "learning_rate": 6.962299581106457e-06, "loss": 3.0636, "step": 6783 }, { "epoch": 0.97, "learning_rate": 6.933410371226347e-06, "loss": 3.0794, "step": 6784 }, { "epoch": 0.97, "learning_rate": 6.904521161346238e-06, "loss": 3.0011, "step": 6785 }, { "epoch": 0.97, "learning_rate": 6.8756319514661276e-06, "loss": 3.1414, "step": 6786 }, { "epoch": 0.97, "learning_rate": 6.846742741586018e-06, "loss": 3.1243, "step": 6787 }, { "epoch": 0.97, "learning_rate": 6.817853531705908e-06, "loss": 2.9568, "step": 6788 }, { "epoch": 0.97, "learning_rate": 6.788964321825798e-06, "loss": 3.0535, "step": 6789 }, { "epoch": 0.97, "learning_rate": 6.760075111945689e-06, "loss": 3.0086, "step": 6790 }, { "epoch": 0.97, "learning_rate": 6.731185902065579e-06, "loss": 3.0518, "step": 6791 }, { "epoch": 0.97, "learning_rate": 6.702296692185469e-06, "loss": 2.9835, "step": 6792 }, { "epoch": 0.97, "learning_rate": 6.673407482305359e-06, "loss": 2.9584, "step": 6793 }, { "epoch": 0.97, "learning_rate": 6.64451827242525e-06, "loss": 3.036, "step": 6794 }, { "epoch": 0.97, "learning_rate": 6.61562906254514e-06, "loss": 3.0023, "step": 6795 }, { "epoch": 0.97, "learning_rate": 6.58673985266503e-06, "loss": 2.9843, "step": 6796 }, { "epoch": 0.97, "learning_rate": 6.55785064278492e-06, "loss": 3.0756, "step": 6797 }, { "epoch": 0.97, "learning_rate": 6.528961432904811e-06, "loss": 3.0288, "step": 6798 }, { "epoch": 0.97, "learning_rate": 6.5000722230247e-06, "loss": 2.9967, "step": 6799 }, { "epoch": 0.97, "learning_rate": 6.471183013144591e-06, "loss": 3.0806, "step": 6800 }, { "epoch": 0.97, "learning_rate": 6.442293803264481e-06, "loss": 3.0906, "step": 6801 }, { "epoch": 0.97, "learning_rate": 6.413404593384371e-06, "loss": 2.9722, "step": 6802 }, { "epoch": 0.97, "learning_rate": 6.384515383504261e-06, "loss": 3.0042, "step": 6803 }, { "epoch": 0.97, "learning_rate": 6.355626173624152e-06, "loss": 3.1041, "step": 6804 }, { "epoch": 0.97, "learning_rate": 6.326736963744042e-06, "loss": 3.0505, "step": 6805 }, { "epoch": 0.97, "learning_rate": 6.297847753863932e-06, "loss": 3.1167, "step": 6806 }, { "epoch": 0.97, "learning_rate": 6.268958543983822e-06, "loss": 3.0737, "step": 6807 }, { "epoch": 0.97, "learning_rate": 6.240069334103712e-06, "loss": 2.9312, "step": 6808 }, { "epoch": 0.97, "learning_rate": 6.2111801242236025e-06, "loss": 3.0523, "step": 6809 }, { "epoch": 0.97, "learning_rate": 6.182290914343493e-06, "loss": 3.1139, "step": 6810 }, { "epoch": 0.97, "learning_rate": 6.153401704463383e-06, "loss": 3.1251, "step": 6811 }, { "epoch": 0.97, "learning_rate": 6.124512494583273e-06, "loss": 3.1028, "step": 6812 }, { "epoch": 0.97, "learning_rate": 6.0956232847031634e-06, "loss": 3.0303, "step": 6813 }, { "epoch": 0.97, "learning_rate": 6.066734074823054e-06, "loss": 2.9433, "step": 6814 }, { "epoch": 0.97, "learning_rate": 6.0378448649429435e-06, "loss": 3.0504, "step": 6815 }, { "epoch": 0.97, "learning_rate": 6.008955655062834e-06, "loss": 3.0348, "step": 6816 }, { "epoch": 0.97, "learning_rate": 5.980066445182725e-06, "loss": 3.0943, "step": 6817 }, { "epoch": 0.97, "learning_rate": 5.951177235302615e-06, "loss": 3.1233, "step": 6818 }, { "epoch": 0.97, "learning_rate": 5.922288025422505e-06, "loss": 3.0396, "step": 6819 }, { "epoch": 0.97, "learning_rate": 5.893398815542396e-06, "loss": 3.0299, "step": 6820 }, { "epoch": 0.97, "learning_rate": 5.864509605662285e-06, "loss": 3.071, "step": 6821 }, { "epoch": 0.97, "learning_rate": 5.835620395782176e-06, "loss": 3.1082, "step": 6822 }, { "epoch": 0.97, "learning_rate": 5.806731185902066e-06, "loss": 3.0669, "step": 6823 }, { "epoch": 0.97, "learning_rate": 5.777841976021956e-06, "loss": 2.9796, "step": 6824 }, { "epoch": 0.97, "learning_rate": 5.748952766141846e-06, "loss": 2.9577, "step": 6825 }, { "epoch": 0.97, "learning_rate": 5.720063556261737e-06, "loss": 3.0809, "step": 6826 }, { "epoch": 0.97, "learning_rate": 5.691174346381627e-06, "loss": 3.0723, "step": 6827 }, { "epoch": 0.97, "learning_rate": 5.662285136501517e-06, "loss": 2.9341, "step": 6828 }, { "epoch": 0.97, "learning_rate": 5.633395926621407e-06, "loss": 2.9875, "step": 6829 }, { "epoch": 0.97, "learning_rate": 5.6045067167412975e-06, "loss": 2.9204, "step": 6830 }, { "epoch": 0.97, "learning_rate": 5.575617506861187e-06, "loss": 3.0588, "step": 6831 }, { "epoch": 0.97, "learning_rate": 5.5467282969810775e-06, "loss": 2.9367, "step": 6832 }, { "epoch": 0.97, "learning_rate": 5.517839087100968e-06, "loss": 3.0618, "step": 6833 }, { "epoch": 0.97, "learning_rate": 5.4889498772208576e-06, "loss": 3.0858, "step": 6834 }, { "epoch": 0.97, "learning_rate": 5.460060667340749e-06, "loss": 3.086, "step": 6835 }, { "epoch": 0.97, "learning_rate": 5.431171457460639e-06, "loss": 2.8882, "step": 6836 }, { "epoch": 0.97, "learning_rate": 5.402282247580529e-06, "loss": 3.1037, "step": 6837 }, { "epoch": 0.97, "learning_rate": 5.373393037700419e-06, "loss": 2.9876, "step": 6838 }, { "epoch": 0.97, "learning_rate": 5.34450382782031e-06, "loss": 2.9321, "step": 6839 }, { "epoch": 0.97, "learning_rate": 5.315614617940199e-06, "loss": 3.0526, "step": 6840 }, { "epoch": 0.97, "learning_rate": 5.28672540806009e-06, "loss": 2.7436, "step": 6841 }, { "epoch": 0.97, "learning_rate": 5.25783619817998e-06, "loss": 2.9718, "step": 6842 }, { "epoch": 0.97, "learning_rate": 5.228946988299871e-06, "loss": 3.1092, "step": 6843 }, { "epoch": 0.97, "learning_rate": 5.20005777841976e-06, "loss": 2.9495, "step": 6844 }, { "epoch": 0.97, "learning_rate": 5.171168568539651e-06, "loss": 2.9479, "step": 6845 }, { "epoch": 0.97, "learning_rate": 5.142279358659541e-06, "loss": 3.1482, "step": 6846 }, { "epoch": 0.97, "learning_rate": 5.113390148779431e-06, "loss": 2.9749, "step": 6847 }, { "epoch": 0.98, "learning_rate": 5.084500938899321e-06, "loss": 3.0205, "step": 6848 }, { "epoch": 0.98, "learning_rate": 5.0556117290192116e-06, "loss": 2.9451, "step": 6849 }, { "epoch": 0.98, "learning_rate": 5.026722519139101e-06, "loss": 3.0533, "step": 6850 }, { "epoch": 0.98, "learning_rate": 4.997833309258992e-06, "loss": 3.0174, "step": 6851 }, { "epoch": 0.98, "learning_rate": 4.968944099378882e-06, "loss": 2.9644, "step": 6852 }, { "epoch": 0.98, "learning_rate": 4.9400548894987725e-06, "loss": 3.1064, "step": 6853 }, { "epoch": 0.98, "learning_rate": 4.911165679618663e-06, "loss": 3.1061, "step": 6854 }, { "epoch": 0.98, "learning_rate": 4.882276469738553e-06, "loss": 3.0388, "step": 6855 }, { "epoch": 0.98, "learning_rate": 4.853387259858443e-06, "loss": 3.088, "step": 6856 }, { "epoch": 0.98, "learning_rate": 4.824498049978333e-06, "loss": 2.6724, "step": 6857 }, { "epoch": 0.98, "learning_rate": 4.795608840098224e-06, "loss": 3.093, "step": 6858 }, { "epoch": 0.98, "learning_rate": 4.766719630218114e-06, "loss": 3.15, "step": 6859 }, { "epoch": 0.98, "learning_rate": 4.737830420338004e-06, "loss": 3.016, "step": 6860 }, { "epoch": 0.98, "learning_rate": 4.708941210457894e-06, "loss": 2.842, "step": 6861 }, { "epoch": 0.98, "learning_rate": 4.680052000577785e-06, "loss": 3.1013, "step": 6862 }, { "epoch": 0.98, "learning_rate": 4.651162790697674e-06, "loss": 3.0444, "step": 6863 }, { "epoch": 0.98, "learning_rate": 4.622273580817565e-06, "loss": 3.098, "step": 6864 }, { "epoch": 0.98, "learning_rate": 4.593384370937455e-06, "loss": 3.0278, "step": 6865 }, { "epoch": 0.98, "learning_rate": 4.564495161057345e-06, "loss": 3.0869, "step": 6866 }, { "epoch": 0.98, "learning_rate": 4.535605951177235e-06, "loss": 2.7475, "step": 6867 }, { "epoch": 0.98, "learning_rate": 4.506716741297126e-06, "loss": 3.1009, "step": 6868 }, { "epoch": 0.98, "learning_rate": 4.477827531417016e-06, "loss": 3.0051, "step": 6869 }, { "epoch": 0.98, "learning_rate": 4.448938321536906e-06, "loss": 2.9816, "step": 6870 }, { "epoch": 0.98, "learning_rate": 4.420049111656797e-06, "loss": 3.052, "step": 6871 }, { "epoch": 0.98, "learning_rate": 4.3911599017766865e-06, "loss": 3.0582, "step": 6872 }, { "epoch": 0.98, "learning_rate": 4.362270691896577e-06, "loss": 2.9568, "step": 6873 }, { "epoch": 0.98, "learning_rate": 4.3333814820164674e-06, "loss": 2.86, "step": 6874 }, { "epoch": 0.98, "learning_rate": 4.304492272136358e-06, "loss": 3.1061, "step": 6875 }, { "epoch": 0.98, "learning_rate": 4.2756030622562475e-06, "loss": 3.0858, "step": 6876 }, { "epoch": 0.98, "learning_rate": 4.246713852376138e-06, "loss": 2.994, "step": 6877 }, { "epoch": 0.98, "learning_rate": 4.217824642496028e-06, "loss": 3.067, "step": 6878 }, { "epoch": 0.98, "learning_rate": 4.188935432615918e-06, "loss": 2.9959, "step": 6879 }, { "epoch": 0.98, "learning_rate": 4.160046222735808e-06, "loss": 2.8929, "step": 6880 }, { "epoch": 0.98, "learning_rate": 4.131157012855699e-06, "loss": 3.0604, "step": 6881 }, { "epoch": 0.98, "learning_rate": 4.102267802975588e-06, "loss": 3.0014, "step": 6882 }, { "epoch": 0.98, "learning_rate": 4.073378593095479e-06, "loss": 2.997, "step": 6883 }, { "epoch": 0.98, "learning_rate": 4.044489383215369e-06, "loss": 2.9582, "step": 6884 }, { "epoch": 0.98, "learning_rate": 4.015600173335259e-06, "loss": 3.0997, "step": 6885 }, { "epoch": 0.98, "learning_rate": 3.986710963455149e-06, "loss": 3.0248, "step": 6886 }, { "epoch": 0.98, "learning_rate": 3.95782175357504e-06, "loss": 3.1458, "step": 6887 }, { "epoch": 0.98, "learning_rate": 3.92893254369493e-06, "loss": 2.9781, "step": 6888 }, { "epoch": 0.98, "learning_rate": 3.900043333814821e-06, "loss": 3.1087, "step": 6889 }, { "epoch": 0.98, "learning_rate": 3.871154123934711e-06, "loss": 2.7062, "step": 6890 }, { "epoch": 0.98, "learning_rate": 3.842264914054601e-06, "loss": 3.1132, "step": 6891 }, { "epoch": 0.98, "learning_rate": 3.813375704174491e-06, "loss": 3.1139, "step": 6892 }, { "epoch": 0.98, "learning_rate": 3.7844864942943815e-06, "loss": 2.9154, "step": 6893 }, { "epoch": 0.98, "learning_rate": 3.7555972844142715e-06, "loss": 2.9927, "step": 6894 }, { "epoch": 0.98, "learning_rate": 3.726708074534162e-06, "loss": 3.1764, "step": 6895 }, { "epoch": 0.98, "learning_rate": 3.697818864654052e-06, "loss": 3.0163, "step": 6896 }, { "epoch": 0.98, "learning_rate": 3.668929654773942e-06, "loss": 2.9691, "step": 6897 }, { "epoch": 0.98, "learning_rate": 3.6400404448938324e-06, "loss": 2.9428, "step": 6898 }, { "epoch": 0.98, "learning_rate": 3.6111512350137224e-06, "loss": 3.0541, "step": 6899 }, { "epoch": 0.98, "learning_rate": 3.582262025133613e-06, "loss": 3.0231, "step": 6900 }, { "epoch": 0.98, "learning_rate": 3.553372815253503e-06, "loss": 3.0052, "step": 6901 }, { "epoch": 0.98, "learning_rate": 3.524483605373393e-06, "loss": 2.9979, "step": 6902 }, { "epoch": 0.98, "learning_rate": 3.4955943954932833e-06, "loss": 2.8792, "step": 6903 }, { "epoch": 0.98, "learning_rate": 3.4667051856131733e-06, "loss": 3.1465, "step": 6904 }, { "epoch": 0.98, "learning_rate": 3.4378159757330638e-06, "loss": 3.148, "step": 6905 }, { "epoch": 0.98, "learning_rate": 3.408926765852954e-06, "loss": 3.0153, "step": 6906 }, { "epoch": 0.98, "learning_rate": 3.3800375559728447e-06, "loss": 3.0236, "step": 6907 }, { "epoch": 0.98, "learning_rate": 3.3511483460927347e-06, "loss": 3.0493, "step": 6908 }, { "epoch": 0.98, "learning_rate": 3.322259136212625e-06, "loss": 3.0399, "step": 6909 }, { "epoch": 0.98, "learning_rate": 3.293369926332515e-06, "loss": 3.0536, "step": 6910 }, { "epoch": 0.98, "learning_rate": 3.2644807164524056e-06, "loss": 3.0736, "step": 6911 }, { "epoch": 0.98, "learning_rate": 3.2355915065722956e-06, "loss": 3.1185, "step": 6912 }, { "epoch": 0.98, "learning_rate": 3.2067022966921856e-06, "loss": 3.0759, "step": 6913 }, { "epoch": 0.98, "learning_rate": 3.177813086812076e-06, "loss": 2.9949, "step": 6914 }, { "epoch": 0.98, "learning_rate": 3.148923876931966e-06, "loss": 2.9747, "step": 6915 }, { "epoch": 0.98, "learning_rate": 3.120034667051856e-06, "loss": 3.0707, "step": 6916 }, { "epoch": 0.98, "learning_rate": 3.0911454571717465e-06, "loss": 3.0587, "step": 6917 }, { "epoch": 0.98, "learning_rate": 3.0622562472916365e-06, "loss": 3.1504, "step": 6918 }, { "epoch": 0.99, "learning_rate": 3.033367037411527e-06, "loss": 3.1535, "step": 6919 }, { "epoch": 0.99, "learning_rate": 3.004477827531417e-06, "loss": 2.9331, "step": 6920 }, { "epoch": 0.99, "learning_rate": 2.9755886176513074e-06, "loss": 2.8455, "step": 6921 }, { "epoch": 0.99, "learning_rate": 2.946699407771198e-06, "loss": 2.8775, "step": 6922 }, { "epoch": 0.99, "learning_rate": 2.917810197891088e-06, "loss": 2.9542, "step": 6923 }, { "epoch": 0.99, "learning_rate": 2.888920988010978e-06, "loss": 3.0825, "step": 6924 }, { "epoch": 0.99, "learning_rate": 2.8600317781308683e-06, "loss": 2.9671, "step": 6925 }, { "epoch": 0.99, "learning_rate": 2.8311425682507583e-06, "loss": 3.0341, "step": 6926 }, { "epoch": 0.99, "learning_rate": 2.8022533583706487e-06, "loss": 3.0661, "step": 6927 }, { "epoch": 0.99, "learning_rate": 2.7733641484905388e-06, "loss": 3.0443, "step": 6928 }, { "epoch": 0.99, "learning_rate": 2.7444749386104288e-06, "loss": 2.8629, "step": 6929 }, { "epoch": 0.99, "learning_rate": 2.7155857287303196e-06, "loss": 3.1635, "step": 6930 }, { "epoch": 0.99, "learning_rate": 2.6866965188502097e-06, "loss": 3.1478, "step": 6931 }, { "epoch": 0.99, "learning_rate": 2.6578073089700997e-06, "loss": 2.8978, "step": 6932 }, { "epoch": 0.99, "learning_rate": 2.62891809908999e-06, "loss": 2.9421, "step": 6933 }, { "epoch": 0.99, "learning_rate": 2.60002888920988e-06, "loss": 3.0774, "step": 6934 }, { "epoch": 0.99, "learning_rate": 2.5711396793297706e-06, "loss": 3.1546, "step": 6935 }, { "epoch": 0.99, "learning_rate": 2.5422504694496606e-06, "loss": 3.0412, "step": 6936 }, { "epoch": 0.99, "learning_rate": 2.5133612595695506e-06, "loss": 3.2098, "step": 6937 }, { "epoch": 0.99, "learning_rate": 2.484472049689441e-06, "loss": 3.1855, "step": 6938 }, { "epoch": 0.99, "learning_rate": 2.4555828398093315e-06, "loss": 2.9524, "step": 6939 }, { "epoch": 0.99, "learning_rate": 2.4266936299292215e-06, "loss": 3.0207, "step": 6940 }, { "epoch": 0.99, "learning_rate": 2.397804420049112e-06, "loss": 3.0075, "step": 6941 }, { "epoch": 0.99, "learning_rate": 2.368915210169002e-06, "loss": 2.9822, "step": 6942 }, { "epoch": 0.99, "learning_rate": 2.3400260002888924e-06, "loss": 3.1671, "step": 6943 }, { "epoch": 0.99, "learning_rate": 2.3111367904087824e-06, "loss": 3.0587, "step": 6944 }, { "epoch": 0.99, "learning_rate": 2.2822475805286724e-06, "loss": 2.9804, "step": 6945 }, { "epoch": 0.99, "learning_rate": 2.253358370648563e-06, "loss": 3.0, "step": 6946 }, { "epoch": 0.99, "learning_rate": 2.224469160768453e-06, "loss": 3.1084, "step": 6947 }, { "epoch": 0.99, "learning_rate": 2.1955799508883433e-06, "loss": 2.9242, "step": 6948 }, { "epoch": 0.99, "learning_rate": 2.1666907410082337e-06, "loss": 3.1705, "step": 6949 }, { "epoch": 0.99, "learning_rate": 2.1378015311281237e-06, "loss": 2.9616, "step": 6950 }, { "epoch": 0.99, "learning_rate": 2.108912321248014e-06, "loss": 3.0162, "step": 6951 }, { "epoch": 0.99, "learning_rate": 2.080023111367904e-06, "loss": 3.0889, "step": 6952 }, { "epoch": 0.99, "learning_rate": 2.051133901487794e-06, "loss": 3.18, "step": 6953 }, { "epoch": 0.99, "learning_rate": 2.0222446916076846e-06, "loss": 2.9249, "step": 6954 }, { "epoch": 0.99, "learning_rate": 1.9933554817275746e-06, "loss": 3.0066, "step": 6955 }, { "epoch": 0.99, "learning_rate": 1.964466271847465e-06, "loss": 3.0889, "step": 6956 }, { "epoch": 0.99, "learning_rate": 1.9355770619673555e-06, "loss": 3.0842, "step": 6957 }, { "epoch": 0.99, "learning_rate": 1.9066878520872455e-06, "loss": 2.9891, "step": 6958 }, { "epoch": 0.99, "learning_rate": 1.8777986422071358e-06, "loss": 2.9088, "step": 6959 }, { "epoch": 0.99, "learning_rate": 1.848909432327026e-06, "loss": 3.0368, "step": 6960 }, { "epoch": 0.99, "learning_rate": 1.8200202224469162e-06, "loss": 3.0724, "step": 6961 }, { "epoch": 0.99, "learning_rate": 1.7911310125668064e-06, "loss": 3.0317, "step": 6962 }, { "epoch": 0.99, "learning_rate": 1.7622418026866964e-06, "loss": 3.0485, "step": 6963 }, { "epoch": 0.99, "learning_rate": 1.7333525928065867e-06, "loss": 3.0453, "step": 6964 }, { "epoch": 0.99, "learning_rate": 1.704463382926477e-06, "loss": 3.0199, "step": 6965 }, { "epoch": 0.99, "learning_rate": 1.6755741730463673e-06, "loss": 3.0125, "step": 6966 }, { "epoch": 0.99, "learning_rate": 1.6466849631662576e-06, "loss": 2.9638, "step": 6967 }, { "epoch": 0.99, "learning_rate": 1.6177957532861478e-06, "loss": 3.0054, "step": 6968 }, { "epoch": 0.99, "learning_rate": 1.588906543406038e-06, "loss": 2.8664, "step": 6969 }, { "epoch": 0.99, "learning_rate": 1.560017333525928e-06, "loss": 2.8927, "step": 6970 }, { "epoch": 0.99, "learning_rate": 1.5311281236458183e-06, "loss": 2.8634, "step": 6971 }, { "epoch": 0.99, "learning_rate": 1.5022389137657085e-06, "loss": 2.9638, "step": 6972 }, { "epoch": 0.99, "learning_rate": 1.473349703885599e-06, "loss": 3.0843, "step": 6973 }, { "epoch": 0.99, "learning_rate": 1.444460494005489e-06, "loss": 3.1211, "step": 6974 }, { "epoch": 0.99, "learning_rate": 1.4155712841253792e-06, "loss": 3.0891, "step": 6975 }, { "epoch": 0.99, "learning_rate": 1.3866820742452694e-06, "loss": 3.0453, "step": 6976 }, { "epoch": 0.99, "learning_rate": 1.3577928643651598e-06, "loss": 2.896, "step": 6977 }, { "epoch": 0.99, "learning_rate": 1.3289036544850498e-06, "loss": 3.1392, "step": 6978 }, { "epoch": 0.99, "learning_rate": 1.30001444460494e-06, "loss": 2.8895, "step": 6979 }, { "epoch": 0.99, "learning_rate": 1.2711252347248303e-06, "loss": 3.0352, "step": 6980 }, { "epoch": 0.99, "learning_rate": 1.2422360248447205e-06, "loss": 2.9906, "step": 6981 }, { "epoch": 0.99, "learning_rate": 1.2133468149646107e-06, "loss": 2.8133, "step": 6982 }, { "epoch": 0.99, "learning_rate": 1.184457605084501e-06, "loss": 3.0085, "step": 6983 }, { "epoch": 0.99, "learning_rate": 1.1555683952043912e-06, "loss": 2.9782, "step": 6984 }, { "epoch": 0.99, "learning_rate": 1.1266791853242814e-06, "loss": 3.1336, "step": 6985 }, { "epoch": 0.99, "learning_rate": 1.0977899754441716e-06, "loss": 2.9151, "step": 6986 }, { "epoch": 0.99, "learning_rate": 1.0689007655640619e-06, "loss": 3.0004, "step": 6987 }, { "epoch": 0.99, "learning_rate": 1.040011555683952e-06, "loss": 2.9352, "step": 6988 }, { "epoch": 1.0, "learning_rate": 1.0111223458038423e-06, "loss": 3.1152, "step": 6989 }, { "epoch": 1.0, "learning_rate": 9.822331359237325e-07, "loss": 2.9697, "step": 6990 }, { "epoch": 1.0, "learning_rate": 9.533439260436228e-07, "loss": 3.1496, "step": 6991 }, { "epoch": 1.0, "learning_rate": 9.24454716163513e-07, "loss": 2.913, "step": 6992 }, { "epoch": 1.0, "learning_rate": 8.955655062834032e-07, "loss": 3.0368, "step": 6993 }, { "epoch": 1.0, "learning_rate": 8.666762964032933e-07, "loss": 2.9764, "step": 6994 }, { "epoch": 1.0, "learning_rate": 8.377870865231837e-07, "loss": 3.0714, "step": 6995 }, { "epoch": 1.0, "learning_rate": 8.088978766430739e-07, "loss": 3.0611, "step": 6996 }, { "epoch": 1.0, "learning_rate": 7.80008666762964e-07, "loss": 3.0141, "step": 6997 }, { "epoch": 1.0, "learning_rate": 7.511194568828542e-07, "loss": 2.9486, "step": 6998 }, { "epoch": 1.0, "learning_rate": 7.222302470027445e-07, "loss": 3.0568, "step": 6999 }, { "epoch": 1.0, "learning_rate": 6.933410371226347e-07, "loss": 2.9752, "step": 7000 }, { "epoch": 1.0, "learning_rate": 6.644518272425249e-07, "loss": 3.0737, "step": 7001 }, { "epoch": 1.0, "learning_rate": 6.355626173624151e-07, "loss": 3.0703, "step": 7002 }, { "epoch": 1.0, "learning_rate": 6.066734074823054e-07, "loss": 2.9669, "step": 7003 }, { "epoch": 1.0, "learning_rate": 5.777841976021956e-07, "loss": 3.0173, "step": 7004 }, { "epoch": 1.0, "learning_rate": 5.488949877220858e-07, "loss": 3.0607, "step": 7005 }, { "epoch": 1.0, "learning_rate": 5.20005777841976e-07, "loss": 3.0821, "step": 7006 }, { "epoch": 1.0, "learning_rate": 4.911165679618663e-07, "loss": 3.0936, "step": 7007 }, { "epoch": 1.0, "learning_rate": 4.622273580817565e-07, "loss": 3.0225, "step": 7008 }, { "epoch": 1.0, "learning_rate": 4.3333814820164667e-07, "loss": 3.1118, "step": 7009 }, { "epoch": 1.0, "learning_rate": 4.0444893832153695e-07, "loss": 2.9692, "step": 7010 }, { "epoch": 1.0, "learning_rate": 3.755597284414271e-07, "loss": 2.9797, "step": 7011 }, { "epoch": 1.0, "learning_rate": 3.4667051856131735e-07, "loss": 3.0496, "step": 7012 }, { "epoch": 1.0, "learning_rate": 3.1778130868120757e-07, "loss": 3.0703, "step": 7013 }, { "epoch": 1.0, "learning_rate": 2.888920988010978e-07, "loss": 3.0217, "step": 7014 }, { "epoch": 1.0, "learning_rate": 2.60002888920988e-07, "loss": 3.0717, "step": 7015 }, { "epoch": 1.0, "learning_rate": 2.3111367904087825e-07, "loss": 2.8553, "step": 7016 }, { "epoch": 1.0, "learning_rate": 2.0222446916076847e-07, "loss": 3.0506, "step": 7017 }, { "epoch": 1.0, "learning_rate": 1.7333525928065867e-07, "loss": 3.0767, "step": 7018 }, { "epoch": 1.0, "learning_rate": 1.444460494005489e-07, "loss": 2.9865, "step": 7019 }, { "epoch": 1.0, "learning_rate": 1.1555683952043912e-07, "loss": 3.0635, "step": 7020 }, { "epoch": 1.0, "learning_rate": 8.666762964032934e-08, "loss": 3.1211, "step": 7021 }, { "epoch": 1.0, "learning_rate": 5.777841976021956e-08, "loss": 3.0291, "step": 7022 }, { "epoch": 1.0, "learning_rate": 2.888920988010978e-08, "loss": 2.9262, "step": 7023 } ], "logging_steps": 1, "max_steps": 7023, "num_train_epochs": 1, "save_steps": 500, "total_flos": 6.982535499177001e+18, "trial_name": null, "trial_params": null }