|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 536, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018656716417910447, |
|
"grad_norm": 2.57972321339612, |
|
"learning_rate": 1.8518518518518518e-07, |
|
"loss": 2.092, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009328358208955223, |
|
"grad_norm": 2.3735119503905793, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 2.0863, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 2.569828354184601, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 2.0955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027985074626865673, |
|
"grad_norm": 2.3365878728458847, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 2.078, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 2.258292749629048, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 2.0535, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04664179104477612, |
|
"grad_norm": 2.046409624248939, |
|
"learning_rate": 4.62962962962963e-06, |
|
"loss": 1.9936, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.055970149253731345, |
|
"grad_norm": 1.7763941617915107, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.9172, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06529850746268656, |
|
"grad_norm": 1.5289482279435747, |
|
"learning_rate": 6.481481481481482e-06, |
|
"loss": 1.8216, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 1.3374159627357958, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 1.7237, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08395522388059702, |
|
"grad_norm": 1.038988897758925, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.6205, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09328358208955224, |
|
"grad_norm": 0.5713243982808286, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 1.5364, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10261194029850747, |
|
"grad_norm": 0.3049992132098635, |
|
"learning_rate": 9.999893795201304e-06, |
|
"loss": 1.491, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 0.36148501744582134, |
|
"learning_rate": 9.996177100962714e-06, |
|
"loss": 1.473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12126865671641791, |
|
"grad_norm": 0.37327333184682066, |
|
"learning_rate": 9.987154677711482e-06, |
|
"loss": 1.4594, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13059701492537312, |
|
"grad_norm": 0.31133363324217617, |
|
"learning_rate": 9.972836106879936e-06, |
|
"loss": 1.4442, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13992537313432835, |
|
"grad_norm": 0.2375327029162037, |
|
"learning_rate": 9.953236594185396e-06, |
|
"loss": 1.4258, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 0.19998776915472055, |
|
"learning_rate": 9.928376953482343e-06, |
|
"loss": 1.4093, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15858208955223882, |
|
"grad_norm": 0.19069793139067803, |
|
"learning_rate": 9.898283584658988e-06, |
|
"loss": 1.4029, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16791044776119404, |
|
"grad_norm": 0.17979433709822593, |
|
"learning_rate": 9.86298844560169e-06, |
|
"loss": 1.3876, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17723880597014927, |
|
"grad_norm": 0.1750060584120198, |
|
"learning_rate": 9.822529018257049e-06, |
|
"loss": 1.3842, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 0.17054790759610952, |
|
"learning_rate": 9.776948268827658e-06, |
|
"loss": 1.3756, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1958955223880597, |
|
"grad_norm": 0.17397761641849066, |
|
"learning_rate": 9.726294602143807e-06, |
|
"loss": 1.3544, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20522388059701493, |
|
"grad_norm": 0.17494592787959154, |
|
"learning_rate": 9.670621810259596e-06, |
|
"loss": 1.338, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21455223880597016, |
|
"grad_norm": 0.17874287198232588, |
|
"learning_rate": 9.609989015328052e-06, |
|
"loss": 1.3321, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 0.18177787677872814, |
|
"learning_rate": 9.544460606815901e-06, |
|
"loss": 1.3231, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2332089552238806, |
|
"grad_norm": 0.17485139906929326, |
|
"learning_rate": 9.474106173124667e-06, |
|
"loss": 1.3029, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24253731343283583, |
|
"grad_norm": 0.16541538036070813, |
|
"learning_rate": 9.399000427690736e-06, |
|
"loss": 1.2914, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.251865671641791, |
|
"grad_norm": 0.15475993597407106, |
|
"learning_rate": 9.31922312964284e-06, |
|
"loss": 1.2731, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 0.14876033685341322, |
|
"learning_rate": 9.234858999101232e-06, |
|
"loss": 1.2612, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.27052238805970147, |
|
"grad_norm": 0.13102260537341828, |
|
"learning_rate": 9.1459976272085e-06, |
|
"loss": 1.2406, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2798507462686567, |
|
"grad_norm": 0.1231370916125911, |
|
"learning_rate": 9.052733380987555e-06, |
|
"loss": 1.2402, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2891791044776119, |
|
"grad_norm": 0.11617076992843688, |
|
"learning_rate": 8.955165303127841e-06, |
|
"loss": 1.2287, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.10711292754756604, |
|
"learning_rate": 8.853397006806183e-06, |
|
"loss": 1.2293, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.30783582089552236, |
|
"grad_norm": 0.10583142709305077, |
|
"learning_rate": 8.747536565653966e-06, |
|
"loss": 1.2196, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.31716417910447764, |
|
"grad_norm": 0.10071371091495551, |
|
"learning_rate": 8.637696398987517e-06, |
|
"loss": 1.2218, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32649253731343286, |
|
"grad_norm": 0.09786904166381197, |
|
"learning_rate": 8.523993152423522e-06, |
|
"loss": 1.2076, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 0.09308216883502402, |
|
"learning_rate": 8.406547574006326e-06, |
|
"loss": 1.2069, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3451492537313433, |
|
"grad_norm": 0.09403316911201794, |
|
"learning_rate": 8.285484385978598e-06, |
|
"loss": 1.2151, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.35447761194029853, |
|
"grad_norm": 0.09276385606464647, |
|
"learning_rate": 8.160932152331587e-06, |
|
"loss": 1.2033, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.36380597014925375, |
|
"grad_norm": 0.09044032546865181, |
|
"learning_rate": 8.03302314227559e-06, |
|
"loss": 1.2028, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 0.08960581233300505, |
|
"learning_rate": 7.90189318977564e-06, |
|
"loss": 1.2036, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3824626865671642, |
|
"grad_norm": 0.08546838672930177, |
|
"learning_rate": 7.767681549301576e-06, |
|
"loss": 1.1932, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3917910447761194, |
|
"grad_norm": 0.08706831337193889, |
|
"learning_rate": 7.630530747945672e-06, |
|
"loss": 1.2016, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.40111940298507465, |
|
"grad_norm": 0.08501828118277771, |
|
"learning_rate": 7.490586434064893e-06, |
|
"loss": 1.1984, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.41044776119402987, |
|
"grad_norm": 0.08231749758944934, |
|
"learning_rate": 7.3479972226084925e-06, |
|
"loss": 1.1934, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4197761194029851, |
|
"grad_norm": 0.0846325939740857, |
|
"learning_rate": 7.202914537295211e-06, |
|
"loss": 1.1871, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4291044776119403, |
|
"grad_norm": 0.0839943062987978, |
|
"learning_rate": 7.055492449807684e-06, |
|
"loss": 1.1847, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.43843283582089554, |
|
"grad_norm": 0.0842974194320567, |
|
"learning_rate": 6.905887516174827e-06, |
|
"loss": 1.1823, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 0.08246202494295994, |
|
"learning_rate": 6.754258610515949e-06, |
|
"loss": 1.1874, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.457089552238806, |
|
"grad_norm": 0.08059952388104133, |
|
"learning_rate": 6.60076675632314e-06, |
|
"loss": 1.1768, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4664179104477612, |
|
"grad_norm": 0.08278287209635887, |
|
"learning_rate": 6.445574955461134e-06, |
|
"loss": 1.1743, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47574626865671643, |
|
"grad_norm": 0.08064613485931339, |
|
"learning_rate": 6.288848015066211e-06, |
|
"loss": 1.1787, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.48507462686567165, |
|
"grad_norm": 0.08402784054638568, |
|
"learning_rate": 6.130752372527981e-06, |
|
"loss": 1.1797, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4944029850746269, |
|
"grad_norm": 0.07813087594533498, |
|
"learning_rate": 5.9714559187399094e-06, |
|
"loss": 1.1814, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.503731343283582, |
|
"grad_norm": 0.08253282031035683, |
|
"learning_rate": 5.811127819806277e-06, |
|
"loss": 1.1767, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5130597014925373, |
|
"grad_norm": 0.0795025665014561, |
|
"learning_rate": 5.649938337394932e-06, |
|
"loss": 1.1684, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 0.08201269451158993, |
|
"learning_rate": 5.4880586479265774e-06, |
|
"loss": 1.1704, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5317164179104478, |
|
"grad_norm": 0.08039664736174637, |
|
"learning_rate": 5.325660660792657e-06, |
|
"loss": 1.1704, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5410447761194029, |
|
"grad_norm": 0.08098088631592301, |
|
"learning_rate": 5.162916835794843e-06, |
|
"loss": 1.1722, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5503731343283582, |
|
"grad_norm": 0.08014102460089276, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1748, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5597014925373134, |
|
"grad_norm": 0.08067154136929545, |
|
"learning_rate": 4.837083164205159e-06, |
|
"loss": 1.1741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5690298507462687, |
|
"grad_norm": 0.07702944221391651, |
|
"learning_rate": 4.6743393392073435e-06, |
|
"loss": 1.1734, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5783582089552238, |
|
"grad_norm": 0.08366251841352572, |
|
"learning_rate": 4.511941352073424e-06, |
|
"loss": 1.1674, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5876865671641791, |
|
"grad_norm": 0.08401515307564614, |
|
"learning_rate": 4.3500616626050705e-06, |
|
"loss": 1.171, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.07880234388575032, |
|
"learning_rate": 4.188872180193723e-06, |
|
"loss": 1.1617, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6063432835820896, |
|
"grad_norm": 0.08075724129202204, |
|
"learning_rate": 4.028544081260093e-06, |
|
"loss": 1.1664, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6156716417910447, |
|
"grad_norm": 0.08145891878774718, |
|
"learning_rate": 3.869247627472021e-06, |
|
"loss": 1.1647, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.07981030112455705, |
|
"learning_rate": 3.7111519849337908e-06, |
|
"loss": 1.1661, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6343283582089553, |
|
"grad_norm": 0.07719220748480114, |
|
"learning_rate": 3.554425044538868e-06, |
|
"loss": 1.164, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6436567164179104, |
|
"grad_norm": 0.07965079201392798, |
|
"learning_rate": 3.3992332436768615e-06, |
|
"loss": 1.1685, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6529850746268657, |
|
"grad_norm": 0.07929508580781615, |
|
"learning_rate": 3.2457413894840516e-06, |
|
"loss": 1.166, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6623134328358209, |
|
"grad_norm": 0.08260668169292941, |
|
"learning_rate": 3.0941124838251734e-06, |
|
"loss": 1.1641, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 0.07880705452928782, |
|
"learning_rate": 2.944507550192318e-06, |
|
"loss": 1.1697, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6809701492537313, |
|
"grad_norm": 0.08617336475255008, |
|
"learning_rate": 2.7970854627047893e-06, |
|
"loss": 1.1617, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6902985074626866, |
|
"grad_norm": 0.08504447838669933, |
|
"learning_rate": 2.6520027773915075e-06, |
|
"loss": 1.1694, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6996268656716418, |
|
"grad_norm": 0.08177432381677913, |
|
"learning_rate": 2.509413565935107e-06, |
|
"loss": 1.1663, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7089552238805971, |
|
"grad_norm": 0.08035338660262055, |
|
"learning_rate": 2.3694692520543293e-06, |
|
"loss": 1.1617, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7182835820895522, |
|
"grad_norm": 0.08099302263307083, |
|
"learning_rate": 2.2323184506984257e-06, |
|
"loss": 1.1601, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7276119402985075, |
|
"grad_norm": 0.0788199344055389, |
|
"learning_rate": 2.098106810224362e-06, |
|
"loss": 1.1678, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7369402985074627, |
|
"grad_norm": 0.0810220420611739, |
|
"learning_rate": 1.9669768577244107e-06, |
|
"loss": 1.1659, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 0.07813643594984888, |
|
"learning_rate": 1.8390678476684143e-06, |
|
"loss": 1.1619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7555970149253731, |
|
"grad_norm": 0.07787996158898178, |
|
"learning_rate": 1.7145156140214032e-06, |
|
"loss": 1.1647, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7649253731343284, |
|
"grad_norm": 0.08149237459503167, |
|
"learning_rate": 1.5934524259936757e-06, |
|
"loss": 1.1663, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7742537313432836, |
|
"grad_norm": 0.08133121141847001, |
|
"learning_rate": 1.4760068475764789e-06, |
|
"loss": 1.1481, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7835820895522388, |
|
"grad_norm": 0.07796965232583761, |
|
"learning_rate": 1.3623036010124845e-06, |
|
"loss": 1.1592, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.792910447761194, |
|
"grad_norm": 0.07862074938919475, |
|
"learning_rate": 1.2524634343460335e-06, |
|
"loss": 1.1537, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8022388059701493, |
|
"grad_norm": 0.08047006906879658, |
|
"learning_rate": 1.1466029931938182e-06, |
|
"loss": 1.1575, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8115671641791045, |
|
"grad_norm": 0.07919282621278656, |
|
"learning_rate": 1.0448346968721596e-06, |
|
"loss": 1.1591, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 0.08069925871151505, |
|
"learning_rate": 9.472666190124457e-07, |
|
"loss": 1.1588, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8302238805970149, |
|
"grad_norm": 0.07925111301368296, |
|
"learning_rate": 8.540023727915015e-07, |
|
"loss": 1.1552, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8395522388059702, |
|
"grad_norm": 0.08107260891150957, |
|
"learning_rate": 7.651410008987698e-07, |
|
"loss": 1.1568, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8488805970149254, |
|
"grad_norm": 0.07855238487611285, |
|
"learning_rate": 6.807768703571616e-07, |
|
"loss": 1.1498, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8582089552238806, |
|
"grad_norm": 0.08067373717155321, |
|
"learning_rate": 6.009995723092655e-07, |
|
"loss": 1.163, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8675373134328358, |
|
"grad_norm": 0.083123312332487, |
|
"learning_rate": 5.258938268753344e-07, |
|
"loss": 1.155, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8768656716417911, |
|
"grad_norm": 0.07901297859441447, |
|
"learning_rate": 4.555393931841001e-07, |
|
"loss": 1.1615, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8861940298507462, |
|
"grad_norm": 0.07759270476319681, |
|
"learning_rate": 3.9001098467194907e-07, |
|
"loss": 1.1575, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.07822412070732043, |
|
"learning_rate": 3.2937818974040637e-07, |
|
"loss": 1.1597, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9048507462686567, |
|
"grad_norm": 0.0799398408165139, |
|
"learning_rate": 2.737053978561943e-07, |
|
"loss": 1.1602, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.914179104477612, |
|
"grad_norm": 0.07777635865631954, |
|
"learning_rate": 2.2305173117234236e-07, |
|
"loss": 1.1568, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9235074626865671, |
|
"grad_norm": 0.08036060154131985, |
|
"learning_rate": 1.7747098174295208e-07, |
|
"loss": 1.1652, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9328358208955224, |
|
"grad_norm": 0.08306752546345667, |
|
"learning_rate": 1.3701155439831249e-07, |
|
"loss": 1.1558, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9421641791044776, |
|
"grad_norm": 0.07992649084819468, |
|
"learning_rate": 1.017164153410144e-07, |
|
"loss": 1.1566, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9514925373134329, |
|
"grad_norm": 0.07911970141612473, |
|
"learning_rate": 7.16230465176565e-08, |
|
"loss": 1.1559, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.960820895522388, |
|
"grad_norm": 0.0846383351709255, |
|
"learning_rate": 4.6763405814604926e-08, |
|
"loss": 1.1514, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 0.07934050493191654, |
|
"learning_rate": 2.7163893120066288e-08, |
|
"loss": 1.1542, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9794776119402985, |
|
"grad_norm": 0.08167448133397126, |
|
"learning_rate": 1.284532228851998e-08, |
|
"loss": 1.161, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9888059701492538, |
|
"grad_norm": 0.07857080161491098, |
|
"learning_rate": 3.822899037286276e-09, |
|
"loss": 1.1579, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9981343283582089, |
|
"grad_norm": 0.07847093836951612, |
|
"learning_rate": 1.0620479869771772e-10, |
|
"loss": 1.1582, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 3.2225, |
|
"eval_samples_per_second": 3.103, |
|
"eval_steps_per_second": 0.931, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 536, |
|
"total_flos": 555957746663424.0, |
|
"train_loss": 1.2752919873194908, |
|
"train_runtime": 16895.3411, |
|
"train_samples_per_second": 2.029, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 536, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 555957746663424.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|