{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14619883040935672, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.2695, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.4688, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 1.2927, "step": 3 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 1.2911, "step": 4 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 1.1655, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.00012, "loss": 1.3511, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 1.2358, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 1.287, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 1.1919, "step": 9 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.3329, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019999993359236733, "loss": 1.0765, "step": 11 }, { "epoch": 0.01, "learning_rate": 0.00019999973436955748, "loss": 1.2112, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.00019999940233183508, "loss": 1.2021, "step": 13 }, { "epoch": 0.01, "learning_rate": 0.0001999989374796411, "loss": 1.306, "step": 14 }, { "epoch": 0.01, "learning_rate": 0.00019999833981359296, "loss": 1.2094, "step": 15 }, { "epoch": 0.01, "learning_rate": 0.00019999760933448442, "loss": 1.3033, "step": 16 }, { "epoch": 0.01, "learning_rate": 0.00019999674604328566, "loss": 1.2345, "step": 17 }, { "epoch": 0.01, "learning_rate": 0.00019999574994114335, "loss": 1.1763, "step": 18 }, { "epoch": 0.01, "learning_rate": 0.00019999462102938037, "loss": 1.2333, "step": 19 }, { "epoch": 0.01, "learning_rate": 0.00019999335930949612, "loss": 1.213, "step": 20 }, { "epoch": 0.01, "eval_loss": 1.181932806968689, "eval_runtime": 76.4827, "eval_samples_per_second": 0.732, "eval_steps_per_second": 0.183, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.00019999196478316637, "loss": 1.1985, "step": 21 }, { "epoch": 0.02, "learning_rate": 0.00019999043745224323, "loss": 1.2438, "step": 22 }, { "epoch": 0.02, "learning_rate": 0.00019998877731875524, "loss": 1.0714, "step": 23 }, { "epoch": 0.02, "learning_rate": 0.00019998698438490736, "loss": 1.2336, "step": 24 }, { "epoch": 0.02, "learning_rate": 0.00019998505865308084, "loss": 1.1357, "step": 25 }, { "epoch": 0.02, "learning_rate": 0.00019998300012583333, "loss": 1.1176, "step": 26 }, { "epoch": 0.02, "learning_rate": 0.0001999808088058989, "loss": 1.2537, "step": 27 }, { "epoch": 0.02, "learning_rate": 0.0001999784846961879, "loss": 1.2098, "step": 28 }, { "epoch": 0.02, "learning_rate": 0.0001999760277997872, "loss": 1.3226, "step": 29 }, { "epoch": 0.02, "learning_rate": 0.00019997343811995984, "loss": 1.271, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.00019997071566014535, "loss": 1.1423, "step": 31 }, { "epoch": 0.02, "learning_rate": 0.0001999678604239596, "loss": 1.1422, "step": 32 }, { "epoch": 0.02, "learning_rate": 0.00019996487241519473, "loss": 1.2029, "step": 33 }, { "epoch": 0.02, "learning_rate": 0.0001999617516378193, "loss": 1.1305, "step": 34 }, { "epoch": 0.03, "learning_rate": 0.00019995849809597814, "loss": 1.3264, "step": 35 }, { "epoch": 0.03, "learning_rate": 0.0001999551117939925, "loss": 1.2266, "step": 36 }, { "epoch": 0.03, "learning_rate": 0.0001999515927363599, "loss": 1.1214, "step": 37 }, { "epoch": 0.03, "learning_rate": 0.00019994794092775418, "loss": 1.2081, "step": 38 }, { "epoch": 0.03, "learning_rate": 0.00019994415637302547, "loss": 1.2039, "step": 39 }, { "epoch": 0.03, "learning_rate": 0.00019994023907720027, "loss": 1.2026, "step": 40 }, { "epoch": 0.03, "eval_loss": 1.152363896369934, "eval_runtime": 76.6763, "eval_samples_per_second": 0.73, "eval_steps_per_second": 0.183, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.00019993618904548131, "loss": 1.2681, "step": 41 }, { "epoch": 0.03, "learning_rate": 0.0001999320062832477, "loss": 1.0463, "step": 42 }, { "epoch": 0.03, "learning_rate": 0.00019992769079605477, "loss": 1.2364, "step": 43 }, { "epoch": 0.03, "learning_rate": 0.00019992324258963413, "loss": 1.1188, "step": 44 }, { "epoch": 0.03, "learning_rate": 0.00019991866166989367, "loss": 1.2932, "step": 45 }, { "epoch": 0.03, "learning_rate": 0.00019991394804291758, "loss": 1.1784, "step": 46 }, { "epoch": 0.03, "learning_rate": 0.00019990910171496627, "loss": 1.2541, "step": 47 }, { "epoch": 0.04, "learning_rate": 0.0001999041226924764, "loss": 1.1132, "step": 48 }, { "epoch": 0.04, "learning_rate": 0.00019989901098206082, "loss": 1.1555, "step": 49 }, { "epoch": 0.04, "learning_rate": 0.00019989376659050877, "loss": 1.3394, "step": 50 }, { "epoch": 0.04, "learning_rate": 0.0001998883895247855, "loss": 1.2951, "step": 51 }, { "epoch": 0.04, "learning_rate": 0.00019988287979203265, "loss": 1.2399, "step": 52 }, { "epoch": 0.04, "learning_rate": 0.0001998772373995679, "loss": 1.1664, "step": 53 }, { "epoch": 0.04, "learning_rate": 0.0001998714623548853, "loss": 1.117, "step": 54 }, { "epoch": 0.04, "learning_rate": 0.00019986555466565493, "loss": 1.1931, "step": 55 }, { "epoch": 0.04, "learning_rate": 0.00019985951433972314, "loss": 1.2058, "step": 56 }, { "epoch": 0.04, "learning_rate": 0.00019985334138511237, "loss": 1.1864, "step": 57 }, { "epoch": 0.04, "learning_rate": 0.0001998470358100213, "loss": 1.2206, "step": 58 }, { "epoch": 0.04, "learning_rate": 0.00019984059762282467, "loss": 1.0549, "step": 59 }, { "epoch": 0.04, "learning_rate": 0.00019983402683207332, "loss": 1.0102, "step": 60 }, { "epoch": 0.04, "eval_loss": 1.1410892009735107, "eval_runtime": 76.6475, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.183, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.00019982732344649433, "loss": 1.0848, "step": 61 }, { "epoch": 0.05, "learning_rate": 0.00019982048747499081, "loss": 1.3566, "step": 62 }, { "epoch": 0.05, "learning_rate": 0.00019981351892664194, "loss": 1.2252, "step": 63 }, { "epoch": 0.05, "learning_rate": 0.00019980641781070307, "loss": 1.2283, "step": 64 }, { "epoch": 0.05, "learning_rate": 0.00019979918413660553, "loss": 1.1728, "step": 65 }, { "epoch": 0.05, "learning_rate": 0.00019979181791395672, "loss": 1.0926, "step": 66 }, { "epoch": 0.05, "learning_rate": 0.00019978431915254017, "loss": 1.1868, "step": 67 }, { "epoch": 0.05, "learning_rate": 0.00019977668786231534, "loss": 1.0868, "step": 68 }, { "epoch": 0.05, "learning_rate": 0.00019976892405341773, "loss": 1.1116, "step": 69 }, { "epoch": 0.05, "learning_rate": 0.00019976102773615892, "loss": 1.1881, "step": 70 }, { "epoch": 0.05, "learning_rate": 0.00019975299892102636, "loss": 1.1795, "step": 71 }, { "epoch": 0.05, "learning_rate": 0.00019974483761868358, "loss": 1.1028, "step": 72 }, { "epoch": 0.05, "learning_rate": 0.00019973654383997007, "loss": 0.9675, "step": 73 }, { "epoch": 0.05, "learning_rate": 0.00019972811759590118, "loss": 1.2491, "step": 74 }, { "epoch": 0.05, "learning_rate": 0.00019971955889766825, "loss": 1.1393, "step": 75 }, { "epoch": 0.06, "learning_rate": 0.00019971086775663857, "loss": 1.1288, "step": 76 }, { "epoch": 0.06, "learning_rate": 0.00019970204418435526, "loss": 1.1794, "step": 77 }, { "epoch": 0.06, "learning_rate": 0.0001996930881925374, "loss": 0.9819, "step": 78 }, { "epoch": 0.06, "learning_rate": 0.0001996839997930799, "loss": 1.1291, "step": 79 }, { "epoch": 0.06, "learning_rate": 0.0001996747789980536, "loss": 1.2502, "step": 80 }, { "epoch": 0.06, "eval_loss": 1.1336756944656372, "eval_runtime": 76.5931, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.183, "step": 80 }, { "epoch": 0.06, "learning_rate": 0.000199665425819705, "loss": 1.0629, "step": 81 }, { "epoch": 0.06, "learning_rate": 0.00019965594027045665, "loss": 1.0234, "step": 82 }, { "epoch": 0.06, "learning_rate": 0.00019964632236290681, "loss": 1.1065, "step": 83 }, { "epoch": 0.06, "learning_rate": 0.00019963657210982948, "loss": 1.1702, "step": 84 }, { "epoch": 0.06, "learning_rate": 0.0001996266895241745, "loss": 1.2096, "step": 85 }, { "epoch": 0.06, "learning_rate": 0.00019961667461906743, "loss": 1.232, "step": 86 }, { "epoch": 0.06, "learning_rate": 0.00019960652740780966, "loss": 1.0296, "step": 87 }, { "epoch": 0.06, "learning_rate": 0.0001995962479038782, "loss": 1.1686, "step": 88 }, { "epoch": 0.07, "learning_rate": 0.00019958583612092576, "loss": 1.1196, "step": 89 }, { "epoch": 0.07, "learning_rate": 0.00019957529207278082, "loss": 1.3479, "step": 90 }, { "epoch": 0.07, "learning_rate": 0.0001995646157734475, "loss": 1.1783, "step": 91 }, { "epoch": 0.07, "learning_rate": 0.0001995538072371055, "loss": 1.2225, "step": 92 }, { "epoch": 0.07, "learning_rate": 0.00019954286647811027, "loss": 1.0859, "step": 93 }, { "epoch": 0.07, "learning_rate": 0.00019953179351099275, "loss": 1.0763, "step": 94 }, { "epoch": 0.07, "learning_rate": 0.00019952058835045957, "loss": 1.2688, "step": 95 }, { "epoch": 0.07, "learning_rate": 0.0001995092510113929, "loss": 1.1738, "step": 96 }, { "epoch": 0.07, "learning_rate": 0.00019949778150885042, "loss": 1.1326, "step": 97 }, { "epoch": 0.07, "learning_rate": 0.0001994861798580654, "loss": 1.1722, "step": 98 }, { "epoch": 0.07, "learning_rate": 0.0001994744460744466, "loss": 1.2148, "step": 99 }, { "epoch": 0.07, "learning_rate": 0.00019946258017357828, "loss": 1.2499, "step": 100 }, { "epoch": 0.07, "eval_loss": 1.1291786432266235, "eval_runtime": 76.5934, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.183, "step": 100 }, { "epoch": 0.07, "learning_rate": 0.00019945058217122016, "loss": 1.1765, "step": 101 }, { "epoch": 0.07, "learning_rate": 0.00019943845208330742, "loss": 1.2101, "step": 102 }, { "epoch": 0.08, "learning_rate": 0.0001994261899259507, "loss": 1.1028, "step": 103 }, { "epoch": 0.08, "learning_rate": 0.00019941379571543596, "loss": 1.2198, "step": 104 }, { "epoch": 0.08, "learning_rate": 0.00019940126946822465, "loss": 1.2474, "step": 105 }, { "epoch": 0.08, "learning_rate": 0.00019938861120095353, "loss": 1.1917, "step": 106 }, { "epoch": 0.08, "learning_rate": 0.0001993758209304347, "loss": 1.2239, "step": 107 }, { "epoch": 0.08, "learning_rate": 0.00019936289867365556, "loss": 1.2406, "step": 108 }, { "epoch": 0.08, "learning_rate": 0.0001993498444477789, "loss": 1.2051, "step": 109 }, { "epoch": 0.08, "learning_rate": 0.00019933665827014273, "loss": 1.146, "step": 110 }, { "epoch": 0.08, "learning_rate": 0.00019932334015826023, "loss": 1.0871, "step": 111 }, { "epoch": 0.08, "learning_rate": 0.00019930989012981992, "loss": 1.1184, "step": 112 }, { "epoch": 0.08, "learning_rate": 0.00019929630820268552, "loss": 1.0224, "step": 113 }, { "epoch": 0.08, "learning_rate": 0.00019928259439489589, "loss": 1.0958, "step": 114 }, { "epoch": 0.08, "learning_rate": 0.0001992687487246651, "loss": 1.1811, "step": 115 }, { "epoch": 0.08, "learning_rate": 0.00019925477121038218, "loss": 1.3126, "step": 116 }, { "epoch": 0.09, "learning_rate": 0.00019924066187061156, "loss": 1.0572, "step": 117 }, { "epoch": 0.09, "learning_rate": 0.0001992264207240925, "loss": 1.0597, "step": 118 }, { "epoch": 0.09, "learning_rate": 0.00019921204778973944, "loss": 1.1559, "step": 119 }, { "epoch": 0.09, "learning_rate": 0.00019919754308664187, "loss": 1.2146, "step": 120 }, { "epoch": 0.09, "eval_loss": 1.1247767210006714, "eval_runtime": 80.1123, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.175, "step": 120 }, { "epoch": 0.09, "learning_rate": 0.0001991829066340642, "loss": 1.2127, "step": 121 }, { "epoch": 0.09, "learning_rate": 0.00019916813845144587, "loss": 1.1637, "step": 122 }, { "epoch": 0.09, "learning_rate": 0.00019915323855840131, "loss": 1.2383, "step": 123 }, { "epoch": 0.09, "learning_rate": 0.00019913820697471985, "loss": 1.0641, "step": 124 }, { "epoch": 0.09, "learning_rate": 0.00019912304372036573, "loss": 1.115, "step": 125 }, { "epoch": 0.09, "learning_rate": 0.000199107748815478, "loss": 1.0733, "step": 126 }, { "epoch": 0.09, "learning_rate": 0.00019909232228037076, "loss": 1.2392, "step": 127 }, { "epoch": 0.09, "learning_rate": 0.00019907676413553267, "loss": 1.3308, "step": 128 }, { "epoch": 0.09, "learning_rate": 0.00019906107440162743, "loss": 1.1723, "step": 129 }, { "epoch": 0.1, "learning_rate": 0.00019904525309949334, "loss": 1.1327, "step": 130 }, { "epoch": 0.1, "learning_rate": 0.00019902930025014347, "loss": 1.1309, "step": 131 }, { "epoch": 0.1, "learning_rate": 0.00019901321587476574, "loss": 1.1563, "step": 132 }, { "epoch": 0.1, "learning_rate": 0.00019899699999472258, "loss": 1.1824, "step": 133 }, { "epoch": 0.1, "learning_rate": 0.0001989806526315512, "loss": 1.2014, "step": 134 }, { "epoch": 0.1, "learning_rate": 0.00019896417380696333, "loss": 1.182, "step": 135 }, { "epoch": 0.1, "learning_rate": 0.0001989475635428454, "loss": 1.1519, "step": 136 }, { "epoch": 0.1, "learning_rate": 0.0001989308218612584, "loss": 1.2197, "step": 137 }, { "epoch": 0.1, "learning_rate": 0.00019891394878443784, "loss": 1.01, "step": 138 }, { "epoch": 0.1, "learning_rate": 0.0001988969443347937, "loss": 1.2011, "step": 139 }, { "epoch": 0.1, "learning_rate": 0.0001988798085349105, "loss": 1.1338, "step": 140 }, { "epoch": 0.1, "eval_loss": 1.120542287826538, "eval_runtime": 79.9836, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.175, "step": 140 }, { "epoch": 0.1, "learning_rate": 0.00019886254140754722, "loss": 1.0325, "step": 141 }, { "epoch": 0.1, "learning_rate": 0.0001988451429756372, "loss": 1.2331, "step": 142 }, { "epoch": 0.1, "learning_rate": 0.00019882761326228825, "loss": 1.1751, "step": 143 }, { "epoch": 0.11, "learning_rate": 0.0001988099522907825, "loss": 1.2066, "step": 144 }, { "epoch": 0.11, "learning_rate": 0.00019879216008457642, "loss": 1.1406, "step": 145 }, { "epoch": 0.11, "learning_rate": 0.00019877423666730075, "loss": 1.2226, "step": 146 }, { "epoch": 0.11, "learning_rate": 0.00019875618206276053, "loss": 1.1612, "step": 147 }, { "epoch": 0.11, "learning_rate": 0.00019873799629493508, "loss": 1.203, "step": 148 }, { "epoch": 0.11, "learning_rate": 0.0001987196793879778, "loss": 1.1134, "step": 149 }, { "epoch": 0.11, "learning_rate": 0.00019870123136621638, "loss": 1.164, "step": 150 }, { "epoch": 0.11, "learning_rate": 0.00019868265225415265, "loss": 0.9973, "step": 151 }, { "epoch": 0.11, "learning_rate": 0.0001986639420764624, "loss": 1.0642, "step": 152 }, { "epoch": 0.11, "learning_rate": 0.00019864510085799568, "loss": 1.1626, "step": 153 }, { "epoch": 0.11, "learning_rate": 0.00019862612862377648, "loss": 1.1529, "step": 154 }, { "epoch": 0.11, "learning_rate": 0.00019860702539900287, "loss": 1.2552, "step": 155 }, { "epoch": 0.11, "learning_rate": 0.00019858779120904678, "loss": 1.1867, "step": 156 }, { "epoch": 0.11, "learning_rate": 0.00019856842607945418, "loss": 1.2168, "step": 157 }, { "epoch": 0.12, "learning_rate": 0.00019854893003594493, "loss": 1.1797, "step": 158 }, { "epoch": 0.12, "learning_rate": 0.00019852930310441274, "loss": 1.2606, "step": 159 }, { "epoch": 0.12, "learning_rate": 0.00019850954531092517, "loss": 1.2599, "step": 160 }, { "epoch": 0.12, "eval_loss": 1.1176784038543701, "eval_runtime": 79.9641, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.175, "step": 160 }, { "epoch": 0.12, "learning_rate": 0.00019848965668172356, "loss": 0.9995, "step": 161 }, { "epoch": 0.12, "learning_rate": 0.0001984696372432231, "loss": 1.2258, "step": 162 }, { "epoch": 0.12, "learning_rate": 0.00019844948702201265, "loss": 1.085, "step": 163 }, { "epoch": 0.12, "learning_rate": 0.00019842920604485473, "loss": 1.125, "step": 164 }, { "epoch": 0.12, "learning_rate": 0.0001984087943386856, "loss": 1.1549, "step": 165 }, { "epoch": 0.12, "learning_rate": 0.00019838825193061518, "loss": 1.1749, "step": 166 }, { "epoch": 0.12, "learning_rate": 0.00019836757884792683, "loss": 1.1702, "step": 167 }, { "epoch": 0.12, "learning_rate": 0.0001983467751180776, "loss": 1.2037, "step": 168 }, { "epoch": 0.12, "learning_rate": 0.00019832584076869805, "loss": 1.0019, "step": 169 }, { "epoch": 0.12, "learning_rate": 0.00019830477582759212, "loss": 1.1183, "step": 170 }, { "epoch": 0.12, "learning_rate": 0.00019828358032273735, "loss": 1.0197, "step": 171 }, { "epoch": 0.13, "learning_rate": 0.00019826225428228457, "loss": 1.107, "step": 172 }, { "epoch": 0.13, "learning_rate": 0.000198240797734558, "loss": 1.1855, "step": 173 }, { "epoch": 0.13, "learning_rate": 0.0001982192107080552, "loss": 1.2044, "step": 174 }, { "epoch": 0.13, "learning_rate": 0.00019819749323144709, "loss": 1.1586, "step": 175 }, { "epoch": 0.13, "learning_rate": 0.00019817564533357773, "loss": 1.1092, "step": 176 }, { "epoch": 0.13, "learning_rate": 0.00019815366704346453, "loss": 1.2592, "step": 177 }, { "epoch": 0.13, "learning_rate": 0.00019813155839029797, "loss": 1.1808, "step": 178 }, { "epoch": 0.13, "learning_rate": 0.00019810931940344176, "loss": 1.1179, "step": 179 }, { "epoch": 0.13, "learning_rate": 0.0001980869501124326, "loss": 1.1333, "step": 180 }, { "epoch": 0.13, "eval_loss": 1.1160587072372437, "eval_runtime": 79.8665, "eval_samples_per_second": 0.701, "eval_steps_per_second": 0.175, "step": 180 }, { "epoch": 0.13, "learning_rate": 0.00019806445054698039, "loss": 1.0633, "step": 181 }, { "epoch": 0.13, "learning_rate": 0.00019804182073696793, "loss": 1.1252, "step": 182 }, { "epoch": 0.13, "learning_rate": 0.00019801906071245111, "loss": 1.1522, "step": 183 }, { "epoch": 0.13, "learning_rate": 0.0001979961705036587, "loss": 1.1706, "step": 184 }, { "epoch": 0.14, "learning_rate": 0.00019797315014099238, "loss": 1.1142, "step": 185 }, { "epoch": 0.14, "learning_rate": 0.00019794999965502672, "loss": 1.0345, "step": 186 }, { "epoch": 0.14, "learning_rate": 0.0001979267190765091, "loss": 1.1029, "step": 187 }, { "epoch": 0.14, "learning_rate": 0.00019790330843635966, "loss": 1.1628, "step": 188 }, { "epoch": 0.14, "learning_rate": 0.00019787976776567133, "loss": 1.1569, "step": 189 }, { "epoch": 0.14, "learning_rate": 0.00019785609709570973, "loss": 1.2024, "step": 190 }, { "epoch": 0.14, "learning_rate": 0.00019783229645791307, "loss": 1.1602, "step": 191 }, { "epoch": 0.14, "learning_rate": 0.00019780836588389225, "loss": 1.1588, "step": 192 }, { "epoch": 0.14, "learning_rate": 0.00019778430540543077, "loss": 1.1104, "step": 193 }, { "epoch": 0.14, "learning_rate": 0.00019776011505448455, "loss": 1.1437, "step": 194 }, { "epoch": 0.14, "learning_rate": 0.00019773579486318213, "loss": 1.2098, "step": 195 }, { "epoch": 0.14, "learning_rate": 0.00019771134486382436, "loss": 1.125, "step": 196 }, { "epoch": 0.14, "learning_rate": 0.00019768676508888467, "loss": 1.0576, "step": 197 }, { "epoch": 0.14, "learning_rate": 0.00019766205557100868, "loss": 1.1982, "step": 198 }, { "epoch": 0.15, "learning_rate": 0.00019763721634301443, "loss": 1.1395, "step": 199 }, { "epoch": 0.15, "learning_rate": 0.0001976122474378922, "loss": 1.2491, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.1136831045150757, "eval_runtime": 79.9646, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.175, "step": 200 } ], "max_steps": 2736, "num_train_epochs": 2, "total_flos": 1.588426799910912e+16, "trial_name": null, "trial_params": null }