|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990766389658357, |
|
"eval_steps": 500, |
|
"global_step": 541, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018467220683287165, |
|
"grad_norm": 0.14637957513332367, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 0.7788, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009233610341643583, |
|
"grad_norm": 0.14028523862361908, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.7893, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018467220683287166, |
|
"grad_norm": 0.15927864611148834, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.7918, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027700831024930747, |
|
"grad_norm": 0.15092229843139648, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.7808, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03693444136657433, |
|
"grad_norm": 0.13150204718112946, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.7754, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.046168051708217916, |
|
"grad_norm": 0.12067870795726776, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.7826, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.055401662049861494, |
|
"grad_norm": 0.11293910443782806, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.7565, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06463527239150507, |
|
"grad_norm": 0.1150021106004715, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 0.757, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07386888273314866, |
|
"grad_norm": 0.11217568814754486, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.7454, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08310249307479224, |
|
"grad_norm": 0.12646108865737915, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 0.7404, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09233610341643583, |
|
"grad_norm": 0.1182728186249733, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.7605, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10156971375807941, |
|
"grad_norm": 0.12191201001405716, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7729, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11080332409972299, |
|
"grad_norm": 0.12068731337785721, |
|
"learning_rate": 0.00019994777247895855, |
|
"loss": 0.7588, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12003693444136658, |
|
"grad_norm": 0.11018814891576767, |
|
"learning_rate": 0.00019979114447011323, |
|
"loss": 0.7459, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12927054478301014, |
|
"grad_norm": 0.12209581583738327, |
|
"learning_rate": 0.00019953027957931658, |
|
"loss": 0.7549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13850415512465375, |
|
"grad_norm": 0.13291500508785248, |
|
"learning_rate": 0.00019916545029310012, |
|
"loss": 0.7473, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14773776546629733, |
|
"grad_norm": 0.10997017472982407, |
|
"learning_rate": 0.00019869703769404828, |
|
"loss": 0.7471, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1569713758079409, |
|
"grad_norm": 0.11135584861040115, |
|
"learning_rate": 0.00019812553106273847, |
|
"loss": 0.733, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16620498614958448, |
|
"grad_norm": 0.10819930583238602, |
|
"learning_rate": 0.00019745152736666302, |
|
"loss": 0.7276, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 0.12196492403745651, |
|
"learning_rate": 0.0001966757306366662, |
|
"loss": 0.7468, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18467220683287167, |
|
"grad_norm": 0.10541563481092453, |
|
"learning_rate": 0.0001957989512315489, |
|
"loss": 0.728, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19390581717451524, |
|
"grad_norm": 0.11377202719449997, |
|
"learning_rate": 0.00019482210499160765, |
|
"loss": 0.7404, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20313942751615882, |
|
"grad_norm": 0.10512705147266388, |
|
"learning_rate": 0.0001937462122819935, |
|
"loss": 0.7292, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2123730378578024, |
|
"grad_norm": 0.1054597944021225, |
|
"learning_rate": 0.00019257239692688907, |
|
"loss": 0.7057, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22160664819944598, |
|
"grad_norm": 0.10640154033899307, |
|
"learning_rate": 0.00019130188503561741, |
|
"loss": 0.746, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23084025854108955, |
|
"grad_norm": 0.11859942972660065, |
|
"learning_rate": 0.00018993600372190932, |
|
"loss": 0.7538, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24007386888273316, |
|
"grad_norm": 0.11077919602394104, |
|
"learning_rate": 0.00018847617971766577, |
|
"loss": 0.741, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24930747922437674, |
|
"grad_norm": 0.10690341889858246, |
|
"learning_rate": 0.00018692393788266479, |
|
"loss": 0.7507, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2585410895660203, |
|
"grad_norm": 0.1062847375869751, |
|
"learning_rate": 0.0001852808996117683, |
|
"loss": 0.73, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2677746999076639, |
|
"grad_norm": 0.1105961948633194, |
|
"learning_rate": 0.00018354878114129367, |
|
"loss": 0.7134, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2770083102493075, |
|
"grad_norm": 0.10495726019144058, |
|
"learning_rate": 0.00018172939175631808, |
|
"loss": 0.7514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28624192059095105, |
|
"grad_norm": 0.1045067086815834, |
|
"learning_rate": 0.0001798246319007893, |
|
"loss": 0.7531, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.29547553093259465, |
|
"grad_norm": 0.11551333218812943, |
|
"learning_rate": 0.00017783649119241602, |
|
"loss": 0.7344, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3047091412742382, |
|
"grad_norm": 0.11248873919248581, |
|
"learning_rate": 0.0001757670463444118, |
|
"loss": 0.7307, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3139427516158818, |
|
"grad_norm": 0.11373342573642731, |
|
"learning_rate": 0.00017361845899626355, |
|
"loss": 0.745, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3231763619575254, |
|
"grad_norm": 0.11083344370126724, |
|
"learning_rate": 0.00017139297345578994, |
|
"loss": 0.7422, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.33240997229916897, |
|
"grad_norm": 0.11349718272686005, |
|
"learning_rate": 0.0001690929143548488, |
|
"loss": 0.7215, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34164358264081257, |
|
"grad_norm": 0.11683487892150879, |
|
"learning_rate": 0.00016672068422114196, |
|
"loss": 0.7456, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.11161104589700699, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 0.7457, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3601108033240997, |
|
"grad_norm": 0.10704860836267471, |
|
"learning_rate": 0.00016176969530934572, |
|
"loss": 0.7598, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.36934441366574333, |
|
"grad_norm": 0.10302165895700455, |
|
"learning_rate": 0.0001591961080888076, |
|
"loss": 0.7605, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3785780240073869, |
|
"grad_norm": 0.10772555321455002, |
|
"learning_rate": 0.00015656068754865387, |
|
"loss": 0.7405, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3878116343490305, |
|
"grad_norm": 0.11499334126710892, |
|
"learning_rate": 0.0001538661865185188, |
|
"loss": 0.744, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39704524469067404, |
|
"grad_norm": 0.1089358702301979, |
|
"learning_rate": 0.00015111541954058734, |
|
"loss": 0.7445, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.40627885503231764, |
|
"grad_norm": 0.1021597683429718, |
|
"learning_rate": 0.00014831125992966385, |
|
"loss": 0.728, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4155124653739612, |
|
"grad_norm": 0.10720939934253693, |
|
"learning_rate": 0.00014545663677185006, |
|
"loss": 0.7323, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4247460757156048, |
|
"grad_norm": 0.11227314919233322, |
|
"learning_rate": 0.00014255453186496673, |
|
"loss": 0.7157, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4339796860572484, |
|
"grad_norm": 0.10462162643671036, |
|
"learning_rate": 0.0001396079766039157, |
|
"loss": 0.7293, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.44321329639889195, |
|
"grad_norm": 0.10160651803016663, |
|
"learning_rate": 0.0001366200488142348, |
|
"loss": 0.7226, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.45244690674053556, |
|
"grad_norm": 0.10895903408527374, |
|
"learning_rate": 0.00013359386953715421, |
|
"loss": 0.7511, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4616805170821791, |
|
"grad_norm": 0.1027149111032486, |
|
"learning_rate": 0.00013053259976951133, |
|
"loss": 0.7123, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4709141274238227, |
|
"grad_norm": 0.1131887212395668, |
|
"learning_rate": 0.00012743943716193016, |
|
"loss": 0.7571, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4801477377654663, |
|
"grad_norm": 0.11355064809322357, |
|
"learning_rate": 0.00012431761267871417, |
|
"loss": 0.7386, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48938134810710987, |
|
"grad_norm": 0.11614394187927246, |
|
"learning_rate": 0.0001211703872229411, |
|
"loss": 0.7142, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4986149584487535, |
|
"grad_norm": 0.1130671575665474, |
|
"learning_rate": 0.00011800104823028515, |
|
"loss": 0.7439, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5078485687903971, |
|
"grad_norm": 0.10703490674495697, |
|
"learning_rate": 0.0001148129062351249, |
|
"loss": 0.7283, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5170821791320406, |
|
"grad_norm": 0.10461420565843582, |
|
"learning_rate": 0.00011160929141252303, |
|
"loss": 0.7469, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.10531424731016159, |
|
"learning_rate": 0.00010839355009969068, |
|
"loss": 0.7529, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5355493998153278, |
|
"grad_norm": 0.11467906832695007, |
|
"learning_rate": 0.00010516904130056946, |
|
"loss": 0.7525, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5447830101569714, |
|
"grad_norm": 0.10647362470626831, |
|
"learning_rate": 0.00010193913317718244, |
|
"loss": 0.7286, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.554016620498615, |
|
"grad_norm": 0.10562632977962494, |
|
"learning_rate": 9.870719953141917e-05, |
|
"loss": 0.7548, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5632502308402585, |
|
"grad_norm": 0.11012793332338333, |
|
"learning_rate": 9.547661628092937e-05, |
|
"loss": 0.7276, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5724838411819021, |
|
"grad_norm": 0.11083028465509415, |
|
"learning_rate": 9.225075793280692e-05, |
|
"loss": 0.7387, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5817174515235457, |
|
"grad_norm": 0.10291365534067154, |
|
"learning_rate": 8.903299405874684e-05, |
|
"loss": 0.7388, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5909510618651893, |
|
"grad_norm": 0.10362917929887772, |
|
"learning_rate": 8.582668577535797e-05, |
|
"loss": 0.7478, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6001846722068329, |
|
"grad_norm": 0.10294944047927856, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.7367, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6094182825484764, |
|
"grad_norm": 0.10952912271022797, |
|
"learning_rate": 7.94618171189618e-05, |
|
"loss": 0.7486, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.61865189289012, |
|
"grad_norm": 0.09998750686645508, |
|
"learning_rate": 7.630990517218808e-05, |
|
"loss": 0.721, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6278855032317636, |
|
"grad_norm": 0.10416150093078613, |
|
"learning_rate": 7.318273872393625e-05, |
|
"loss": 0.738, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6371191135734072, |
|
"grad_norm": 0.09991578012704849, |
|
"learning_rate": 7.008358425723585e-05, |
|
"loss": 0.7496, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6463527239150508, |
|
"grad_norm": 0.11119415611028671, |
|
"learning_rate": 6.701567899518924e-05, |
|
"loss": 0.7455, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6555863342566943, |
|
"grad_norm": 0.10467606037855148, |
|
"learning_rate": 6.398222751952899e-05, |
|
"loss": 0.7404, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6648199445983379, |
|
"grad_norm": 0.11238551884889603, |
|
"learning_rate": 6.098639842327052e-05, |
|
"loss": 0.7471, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6740535549399815, |
|
"grad_norm": 0.10642090439796448, |
|
"learning_rate": 5.80313210009571e-05, |
|
"loss": 0.7573, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6832871652816251, |
|
"grad_norm": 0.10575485974550247, |
|
"learning_rate": 5.5120081979953785e-05, |
|
"loss": 0.7315, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6925207756232687, |
|
"grad_norm": 0.1076451987028122, |
|
"learning_rate": 5.22557222962051e-05, |
|
"loss": 0.7292, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.10904475301504135, |
|
"learning_rate": 4.9441233917824106e-05, |
|
"loss": 0.7196, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7109879963065558, |
|
"grad_norm": 0.10976432263851166, |
|
"learning_rate": 4.66795567198309e-05, |
|
"loss": 0.7277, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7202216066481995, |
|
"grad_norm": 0.10619588196277618, |
|
"learning_rate": 4.397357541330476e-05, |
|
"loss": 0.7501, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7294552169898431, |
|
"grad_norm": 0.11529234051704407, |
|
"learning_rate": 4.132611653215822e-05, |
|
"loss": 0.7325, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7386888273314867, |
|
"grad_norm": 0.10589467734098434, |
|
"learning_rate": 3.873994548067972e-05, |
|
"loss": 0.743, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7479224376731302, |
|
"grad_norm": 0.10833586007356644, |
|
"learning_rate": 3.621776364492939e-05, |
|
"loss": 0.7325, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7571560480147738, |
|
"grad_norm": 0.10319048166275024, |
|
"learning_rate": 3.376220557100523e-05, |
|
"loss": 0.7322, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7663896583564174, |
|
"grad_norm": 0.10046496242284775, |
|
"learning_rate": 3.137583621312665e-05, |
|
"loss": 0.7121, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.775623268698061, |
|
"grad_norm": 0.10600613802671432, |
|
"learning_rate": 2.906114825441072e-05, |
|
"loss": 0.7422, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7848568790397045, |
|
"grad_norm": 0.11138713359832764, |
|
"learning_rate": 2.6820559503138797e-05, |
|
"loss": 0.719, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7940904893813481, |
|
"grad_norm": 0.10552503913640976, |
|
"learning_rate": 2.465641036723393e-05, |
|
"loss": 0.7346, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8033240997229917, |
|
"grad_norm": 0.10045293718576431, |
|
"learning_rate": 2.2570961409586754e-05, |
|
"loss": 0.7144, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8125577100646353, |
|
"grad_norm": 0.10837393254041672, |
|
"learning_rate": 2.0566390986783646e-05, |
|
"loss": 0.7312, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8217913204062789, |
|
"grad_norm": 0.1052984818816185, |
|
"learning_rate": 1.864479297370325e-05, |
|
"loss": 0.7329, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8310249307479224, |
|
"grad_norm": 0.10482414066791534, |
|
"learning_rate": 1.6808174576358848e-05, |
|
"loss": 0.735, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.840258541089566, |
|
"grad_norm": 0.10467493534088135, |
|
"learning_rate": 1.505845423527027e-05, |
|
"loss": 0.7211, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8494921514312096, |
|
"grad_norm": 0.1039409339427948, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.7193, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8587257617728532, |
|
"grad_norm": 0.10943976044654846, |
|
"learning_rate": 1.18269257278392e-05, |
|
"loss": 0.7511, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8679593721144968, |
|
"grad_norm": 0.09965581446886063, |
|
"learning_rate": 1.0348493055959062e-05, |
|
"loss": 0.7138, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.11060912162065506, |
|
"learning_rate": 8.963705903385345e-06, |
|
"loss": 0.7349, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8864265927977839, |
|
"grad_norm": 0.10819496214389801, |
|
"learning_rate": 7.674010750120964e-06, |
|
"loss": 0.7309, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8956602031394275, |
|
"grad_norm": 0.10742160677909851, |
|
"learning_rate": 6.480754747781037e-06, |
|
"loss": 0.716, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9048938134810711, |
|
"grad_norm": 0.10659054666757584, |
|
"learning_rate": 5.385184312424974e-06, |
|
"loss": 0.7406, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9141274238227147, |
|
"grad_norm": 0.10261913388967514, |
|
"learning_rate": 4.3884438226120424e-06, |
|
"loss": 0.7282, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9233610341643582, |
|
"grad_norm": 0.10389312356710434, |
|
"learning_rate": 3.4915744240403558e-06, |
|
"loss": 0.7363, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9325946445060018, |
|
"grad_norm": 0.1055300384759903, |
|
"learning_rate": 2.6955129420176196e-06, |
|
"loss": 0.7312, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9418282548476454, |
|
"grad_norm": 0.10658875107765198, |
|
"learning_rate": 2.0010909028998827e-06, |
|
"loss": 0.7383, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.951061865189289, |
|
"grad_norm": 0.10060267895460129, |
|
"learning_rate": 1.409033665520354e-06, |
|
"loss": 0.7119, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9602954755309326, |
|
"grad_norm": 0.103419728577137, |
|
"learning_rate": 9.199596635154683e-07, |
|
"loss": 0.7294, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9695290858725761, |
|
"grad_norm": 0.10551342368125916, |
|
"learning_rate": 5.343797593398536e-07, |
|
"loss": 0.7168, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9787626962142197, |
|
"grad_norm": 0.11032087355852127, |
|
"learning_rate": 2.5269671064467313e-07, |
|
"loss": 0.7491, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9879963065558633, |
|
"grad_norm": 0.10744015127420425, |
|
"learning_rate": 7.520474957699586e-08, |
|
"loss": 0.7463, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.997229916897507, |
|
"grad_norm": 0.10710299015045166, |
|
"learning_rate": 2.0892754394208346e-09, |
|
"loss": 0.7327, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9990766389658357, |
|
"eval_loss": 0.9449617266654968, |
|
"eval_runtime": 130.1564, |
|
"eval_samples_per_second": 8.874, |
|
"eval_steps_per_second": 0.561, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9990766389658357, |
|
"step": 541, |
|
"total_flos": 2.226930684415443e+18, |
|
"train_loss": 0.7396127296243269, |
|
"train_runtime": 26304.3051, |
|
"train_samples_per_second": 3.951, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 541, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.226930684415443e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|