|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2187, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004572473708276177, |
|
"grad_norm": 8.096893602247533, |
|
"learning_rate": 4.5662100456621004e-08, |
|
"loss": 4.0564, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002286236854138089, |
|
"grad_norm": 9.362922497604572, |
|
"learning_rate": 2.2831050228310502e-07, |
|
"loss": 4.0286, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004572473708276178, |
|
"grad_norm": 8.436200820951127, |
|
"learning_rate": 4.5662100456621004e-07, |
|
"loss": 4.0215, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006858710562414266, |
|
"grad_norm": 8.311166463949595, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 3.995, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009144947416552356, |
|
"grad_norm": 8.314737539002055, |
|
"learning_rate": 9.132420091324201e-07, |
|
"loss": 4.0089, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011431184270690443, |
|
"grad_norm": 8.207038819761589, |
|
"learning_rate": 1.1415525114155251e-06, |
|
"loss": 4.0363, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013717421124828532, |
|
"grad_norm": 7.559088257570073, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 3.9854, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01600365797896662, |
|
"grad_norm": 7.1469550333759315, |
|
"learning_rate": 1.5981735159817353e-06, |
|
"loss": 3.9172, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01828989483310471, |
|
"grad_norm": 5.971779564925808, |
|
"learning_rate": 1.8264840182648401e-06, |
|
"loss": 3.7833, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0205761316872428, |
|
"grad_norm": 5.297232176162252, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 3.7258, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.022862368541380886, |
|
"grad_norm": 4.217560181118984, |
|
"learning_rate": 2.2831050228310503e-06, |
|
"loss": 3.6069, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025148605395518976, |
|
"grad_norm": 3.786598130029432, |
|
"learning_rate": 2.511415525114155e-06, |
|
"loss": 3.553, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.027434842249657063, |
|
"grad_norm": 2.6863763381878782, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 3.4564, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.029721079103795154, |
|
"grad_norm": 2.322206779034821, |
|
"learning_rate": 2.9680365296803653e-06, |
|
"loss": 3.331, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03200731595793324, |
|
"grad_norm": 1.7328570105860337, |
|
"learning_rate": 3.1963470319634706e-06, |
|
"loss": 3.2806, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03429355281207133, |
|
"grad_norm": 1.4846651112766411, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 3.2356, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03657978966620942, |
|
"grad_norm": 1.1848731903024705, |
|
"learning_rate": 3.6529680365296803e-06, |
|
"loss": 3.1934, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.038866026520347506, |
|
"grad_norm": 1.0381920877926754, |
|
"learning_rate": 3.881278538812785e-06, |
|
"loss": 3.1245, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0411522633744856, |
|
"grad_norm": 0.9141218079482407, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 3.0468, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04343850022862369, |
|
"grad_norm": 1.0161753499176187, |
|
"learning_rate": 4.337899543378996e-06, |
|
"loss": 3.0329, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04572473708276177, |
|
"grad_norm": 0.8996453649527762, |
|
"learning_rate": 4.566210045662101e-06, |
|
"loss": 2.9958, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04801097393689986, |
|
"grad_norm": 0.8082797335102323, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 2.9651, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05029721079103795, |
|
"grad_norm": 0.7090431405422901, |
|
"learning_rate": 5.02283105022831e-06, |
|
"loss": 2.927, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05258344764517604, |
|
"grad_norm": 1.2265537925663061, |
|
"learning_rate": 5.251141552511416e-06, |
|
"loss": 2.862, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05486968449931413, |
|
"grad_norm": 0.650571444620453, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 2.857, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05715592135345222, |
|
"grad_norm": 0.7089485111846239, |
|
"learning_rate": 5.7077625570776266e-06, |
|
"loss": 2.8209, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05944215820759031, |
|
"grad_norm": 0.5922494361050838, |
|
"learning_rate": 5.936073059360731e-06, |
|
"loss": 2.8037, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06172839506172839, |
|
"grad_norm": 0.5597217919230902, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 2.7487, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06401463191586648, |
|
"grad_norm": 0.6045746583730743, |
|
"learning_rate": 6.392694063926941e-06, |
|
"loss": 2.6981, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06630086877000457, |
|
"grad_norm": 0.6479924774135967, |
|
"learning_rate": 6.621004566210046e-06, |
|
"loss": 2.7036, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06858710562414266, |
|
"grad_norm": 0.760723993748018, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 2.6821, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07087334247828075, |
|
"grad_norm": 0.5889973577684341, |
|
"learning_rate": 7.077625570776257e-06, |
|
"loss": 2.6882, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07315957933241884, |
|
"grad_norm": 0.6201384588278992, |
|
"learning_rate": 7.305936073059361e-06, |
|
"loss": 2.6441, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07544581618655692, |
|
"grad_norm": 0.645862573214957, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 2.5878, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07773205304069501, |
|
"grad_norm": 0.880791499233313, |
|
"learning_rate": 7.76255707762557e-06, |
|
"loss": 2.5665, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0800182898948331, |
|
"grad_norm": 0.7581098091472079, |
|
"learning_rate": 7.990867579908676e-06, |
|
"loss": 2.5423, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0823045267489712, |
|
"grad_norm": 0.7502504535360037, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 2.5348, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08459076360310928, |
|
"grad_norm": 0.9587325899501735, |
|
"learning_rate": 8.447488584474887e-06, |
|
"loss": 2.4652, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08687700045724737, |
|
"grad_norm": 1.0327228370595574, |
|
"learning_rate": 8.675799086757991e-06, |
|
"loss": 2.4066, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08916323731138547, |
|
"grad_norm": 0.8853835960264104, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 2.3642, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09144947416552354, |
|
"grad_norm": 1.0446953486337078, |
|
"learning_rate": 9.132420091324201e-06, |
|
"loss": 2.3237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09373571101966163, |
|
"grad_norm": 1.1013758488210148, |
|
"learning_rate": 9.360730593607307e-06, |
|
"loss": 2.2331, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09602194787379972, |
|
"grad_norm": 1.2192543249794923, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 2.1264, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09830818472793781, |
|
"grad_norm": 1.3533953895273099, |
|
"learning_rate": 9.817351598173517e-06, |
|
"loss": 2.0554, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1005944215820759, |
|
"grad_norm": 1.1876482609404326, |
|
"learning_rate": 9.999993629265979e-06, |
|
"loss": 1.9859, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.102880658436214, |
|
"grad_norm": 1.1847416528253172, |
|
"learning_rate": 9.999770655279843e-06, |
|
"loss": 1.8986, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10516689529035209, |
|
"grad_norm": 1.3137466650624998, |
|
"learning_rate": 9.999229160826947e-06, |
|
"loss": 1.8, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10745313214449016, |
|
"grad_norm": 1.830150495140023, |
|
"learning_rate": 9.998369180404283e-06, |
|
"loss": 1.7138, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10973936899862825, |
|
"grad_norm": 1.1159850299398295, |
|
"learning_rate": 9.997190768798639e-06, |
|
"loss": 1.6867, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11202560585276634, |
|
"grad_norm": 0.9727694366367986, |
|
"learning_rate": 9.995694001083103e-06, |
|
"loss": 1.6469, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11431184270690443, |
|
"grad_norm": 1.135743426814773, |
|
"learning_rate": 9.993878972612276e-06, |
|
"loss": 1.5607, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11659807956104253, |
|
"grad_norm": 1.0363437963731608, |
|
"learning_rate": 9.991745799016206e-06, |
|
"loss": 1.5332, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11888431641518062, |
|
"grad_norm": 1.018006180331875, |
|
"learning_rate": 9.989294616193018e-06, |
|
"loss": 1.4962, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1211705532693187, |
|
"grad_norm": 0.9493951106581935, |
|
"learning_rate": 9.986525580300253e-06, |
|
"loss": 1.4403, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12345679012345678, |
|
"grad_norm": 4.150830186272059, |
|
"learning_rate": 9.983438867744923e-06, |
|
"loss": 1.4382, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12574302697759487, |
|
"grad_norm": 0.8458476848705546, |
|
"learning_rate": 9.980034675172274e-06, |
|
"loss": 1.4248, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12802926383173296, |
|
"grad_norm": 1.8171861028727991, |
|
"learning_rate": 9.976313219453255e-06, |
|
"loss": 1.4055, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13031550068587106, |
|
"grad_norm": 0.7389926811741014, |
|
"learning_rate": 9.972274737670702e-06, |
|
"loss": 1.4033, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13260173754000915, |
|
"grad_norm": 0.8834746515415843, |
|
"learning_rate": 9.967919487104237e-06, |
|
"loss": 1.3724, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13488797439414724, |
|
"grad_norm": 0.8166186304734012, |
|
"learning_rate": 9.963247745213876e-06, |
|
"loss": 1.3721, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.13717421124828533, |
|
"grad_norm": 0.6771475216933378, |
|
"learning_rate": 9.958259809622353e-06, |
|
"loss": 1.3555, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13946044810242342, |
|
"grad_norm": 0.60525762012324, |
|
"learning_rate": 9.952955998096155e-06, |
|
"loss": 1.36, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1417466849565615, |
|
"grad_norm": 0.6126617626167846, |
|
"learning_rate": 9.94733664852529e-06, |
|
"loss": 1.353, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1440329218106996, |
|
"grad_norm": 0.6630794657190928, |
|
"learning_rate": 9.941402118901743e-06, |
|
"loss": 1.3359, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1463191586648377, |
|
"grad_norm": 0.6758533351396738, |
|
"learning_rate": 9.935152787296689e-06, |
|
"loss": 1.3402, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14860539551897575, |
|
"grad_norm": 0.739719330356037, |
|
"learning_rate": 9.928589051836392e-06, |
|
"loss": 1.3346, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15089163237311384, |
|
"grad_norm": 0.7258290118963521, |
|
"learning_rate": 9.921711330676848e-06, |
|
"loss": 1.3356, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15317786922725193, |
|
"grad_norm": 0.6274092924270468, |
|
"learning_rate": 9.91452006197715e-06, |
|
"loss": 1.3362, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15546410608139002, |
|
"grad_norm": 0.768028072114212, |
|
"learning_rate": 9.907015703871558e-06, |
|
"loss": 1.3214, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15775034293552812, |
|
"grad_norm": 0.7738373400419118, |
|
"learning_rate": 9.899198734440335e-06, |
|
"loss": 1.331, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1600365797896662, |
|
"grad_norm": 0.6855410863811031, |
|
"learning_rate": 9.891069651679273e-06, |
|
"loss": 1.3142, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1623228166438043, |
|
"grad_norm": 0.6405023247699122, |
|
"learning_rate": 9.882628973467972e-06, |
|
"loss": 1.3171, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.1646090534979424, |
|
"grad_norm": 0.6764400756880153, |
|
"learning_rate": 9.873877237536854e-06, |
|
"loss": 1.3189, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16689529035208048, |
|
"grad_norm": 0.6298462983903607, |
|
"learning_rate": 9.86481500143289e-06, |
|
"loss": 1.3059, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16918152720621857, |
|
"grad_norm": 0.6606697771559132, |
|
"learning_rate": 9.855442842484101e-06, |
|
"loss": 1.3267, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17146776406035666, |
|
"grad_norm": 0.5895037669135822, |
|
"learning_rate": 9.84576135776276e-06, |
|
"loss": 1.3057, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17375400091449475, |
|
"grad_norm": 0.5762405642901876, |
|
"learning_rate": 9.835771164047365e-06, |
|
"loss": 1.3016, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17604023776863284, |
|
"grad_norm": 0.6301891918568133, |
|
"learning_rate": 9.825472897783344e-06, |
|
"loss": 1.3046, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17832647462277093, |
|
"grad_norm": 0.6189017845225122, |
|
"learning_rate": 9.814867215042503e-06, |
|
"loss": 1.3089, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.18061271147690902, |
|
"grad_norm": 0.6279515665165573, |
|
"learning_rate": 9.803954791481239e-06, |
|
"loss": 1.3011, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.18289894833104708, |
|
"grad_norm": 0.6380039476156935, |
|
"learning_rate": 9.792736322297489e-06, |
|
"loss": 1.2758, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 0.7506004279154695, |
|
"learning_rate": 9.781212522186442e-06, |
|
"loss": 1.312, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.18747142203932327, |
|
"grad_norm": 0.7054181242720778, |
|
"learning_rate": 9.769384125295012e-06, |
|
"loss": 1.3112, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18975765889346136, |
|
"grad_norm": 0.5797880483237029, |
|
"learning_rate": 9.757251885175063e-06, |
|
"loss": 1.2998, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.19204389574759945, |
|
"grad_norm": 0.6040659600524477, |
|
"learning_rate": 9.744816574735405e-06, |
|
"loss": 1.3018, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19433013260173754, |
|
"grad_norm": 0.7044299546094256, |
|
"learning_rate": 9.732078986192552e-06, |
|
"loss": 1.2818, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19661636945587563, |
|
"grad_norm": 0.567841572649114, |
|
"learning_rate": 9.719039931020258e-06, |
|
"loss": 1.2733, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19890260631001372, |
|
"grad_norm": 0.5378351616772565, |
|
"learning_rate": 9.705700239897809e-06, |
|
"loss": 1.2861, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2011888431641518, |
|
"grad_norm": 0.5372339490006793, |
|
"learning_rate": 9.692060762657118e-06, |
|
"loss": 1.2821, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2034750800182899, |
|
"grad_norm": 0.6353680076674888, |
|
"learning_rate": 9.678122368228571e-06, |
|
"loss": 1.2643, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.205761316872428, |
|
"grad_norm": 0.6263499547366734, |
|
"learning_rate": 9.66388594458568e-06, |
|
"loss": 1.2826, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20804755372656608, |
|
"grad_norm": 0.6119180746423146, |
|
"learning_rate": 9.649352398688506e-06, |
|
"loss": 1.2856, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.21033379058070417, |
|
"grad_norm": 0.6640618234127624, |
|
"learning_rate": 9.634522656425885e-06, |
|
"loss": 1.2765, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21262002743484226, |
|
"grad_norm": 0.6253602428713037, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 1.2661, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.21490626428898033, |
|
"grad_norm": 0.6463257272674591, |
|
"learning_rate": 9.603978380648375e-06, |
|
"loss": 1.2838, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21719250114311842, |
|
"grad_norm": 0.6916869993480118, |
|
"learning_rate": 9.588265793018141e-06, |
|
"loss": 1.2785, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2194787379972565, |
|
"grad_norm": 0.578420093141111, |
|
"learning_rate": 9.572260900667794e-06, |
|
"loss": 1.2627, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2217649748513946, |
|
"grad_norm": 0.6016744117162259, |
|
"learning_rate": 9.555964723221258e-06, |
|
"loss": 1.2672, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2240512117055327, |
|
"grad_norm": 0.6325422647436533, |
|
"learning_rate": 9.539378298859365e-06, |
|
"loss": 1.2667, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22633744855967078, |
|
"grad_norm": 0.674420764332063, |
|
"learning_rate": 9.522502684253709e-06, |
|
"loss": 1.2601, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.22862368541380887, |
|
"grad_norm": 0.6942742236531446, |
|
"learning_rate": 9.505338954499332e-06, |
|
"loss": 1.275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23090992226794696, |
|
"grad_norm": 0.5661617220667517, |
|
"learning_rate": 9.487888203046232e-06, |
|
"loss": 1.2683, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.23319615912208505, |
|
"grad_norm": 0.6389133947347537, |
|
"learning_rate": 9.4701515416297e-06, |
|
"loss": 1.2659, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23548239597622314, |
|
"grad_norm": 0.561786602813537, |
|
"learning_rate": 9.452130100199504e-06, |
|
"loss": 1.2664, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.23776863283036123, |
|
"grad_norm": 0.5666699221383189, |
|
"learning_rate": 9.433825026847891e-06, |
|
"loss": 1.2573, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.24005486968449932, |
|
"grad_norm": 0.6718711112993888, |
|
"learning_rate": 9.415237487736452e-06, |
|
"loss": 1.2545, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2423411065386374, |
|
"grad_norm": 0.5637527283960878, |
|
"learning_rate": 9.396368667021835e-06, |
|
"loss": 1.2723, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2446273433927755, |
|
"grad_norm": 0.583426898925874, |
|
"learning_rate": 9.377219766780288e-06, |
|
"loss": 1.2473, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 0.7422622561747031, |
|
"learning_rate": 9.3577920069311e-06, |
|
"loss": 1.2609, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24919981710105166, |
|
"grad_norm": 0.7536416453907702, |
|
"learning_rate": 9.338086625158867e-06, |
|
"loss": 1.2655, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.25148605395518975, |
|
"grad_norm": 0.5911621999933799, |
|
"learning_rate": 9.318104876834652e-06, |
|
"loss": 1.2652, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.25377229080932784, |
|
"grad_norm": 0.6482915887304207, |
|
"learning_rate": 9.297848034936007e-06, |
|
"loss": 1.2488, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.25605852766346593, |
|
"grad_norm": 0.7813862221549358, |
|
"learning_rate": 9.277317389965871e-06, |
|
"loss": 1.2678, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.258344764517604, |
|
"grad_norm": 0.601959447185496, |
|
"learning_rate": 9.256514249870366e-06, |
|
"loss": 1.2549, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.2606310013717421, |
|
"grad_norm": 0.5439593292691556, |
|
"learning_rate": 9.235439939955458e-06, |
|
"loss": 1.2311, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2629172382258802, |
|
"grad_norm": 0.6462948109732727, |
|
"learning_rate": 9.214095802802533e-06, |
|
"loss": 1.2605, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2652034750800183, |
|
"grad_norm": 0.6523908850821281, |
|
"learning_rate": 9.192483198182876e-06, |
|
"loss": 1.2577, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2674897119341564, |
|
"grad_norm": 0.6285230592028435, |
|
"learning_rate": 9.170603502971017e-06, |
|
"loss": 1.233, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.2697759487882945, |
|
"grad_norm": 0.5990676661488948, |
|
"learning_rate": 9.148458111057043e-06, |
|
"loss": 1.2444, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.27206218564243256, |
|
"grad_norm": 0.5443537881683997, |
|
"learning_rate": 9.12604843325778e-06, |
|
"loss": 1.2282, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.27434842249657065, |
|
"grad_norm": 0.5804764131758829, |
|
"learning_rate": 9.103375897226919e-06, |
|
"loss": 1.253, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27663465935070874, |
|
"grad_norm": 0.5905170219986889, |
|
"learning_rate": 9.080441947364065e-06, |
|
"loss": 1.2472, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.27892089620484684, |
|
"grad_norm": 0.6003218456115103, |
|
"learning_rate": 9.057248044722718e-06, |
|
"loss": 1.2421, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2812071330589849, |
|
"grad_norm": 0.5683857920528798, |
|
"learning_rate": 9.033795666917191e-06, |
|
"loss": 1.2551, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.283493369913123, |
|
"grad_norm": 0.5908776822300396, |
|
"learning_rate": 9.010086308028487e-06, |
|
"loss": 1.2375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2857796067672611, |
|
"grad_norm": 0.6118010788168986, |
|
"learning_rate": 8.986121478509096e-06, |
|
"loss": 1.2347, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2880658436213992, |
|
"grad_norm": 0.5787813457678733, |
|
"learning_rate": 8.961902705086785e-06, |
|
"loss": 1.2395, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2903520804755373, |
|
"grad_norm": 0.6290839595278495, |
|
"learning_rate": 8.937431530667329e-06, |
|
"loss": 1.2263, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.2926383173296754, |
|
"grad_norm": 0.5459763353494508, |
|
"learning_rate": 8.912709514236218e-06, |
|
"loss": 1.2285, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.29492455418381347, |
|
"grad_norm": 0.6301840515917086, |
|
"learning_rate": 8.887738230759334e-06, |
|
"loss": 1.2374, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.2972107910379515, |
|
"grad_norm": 0.5413584040020849, |
|
"learning_rate": 8.862519271082624e-06, |
|
"loss": 1.2505, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2994970278920896, |
|
"grad_norm": 0.5979355091788396, |
|
"learning_rate": 8.83705424183074e-06, |
|
"loss": 1.2238, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3017832647462277, |
|
"grad_norm": 0.6873493941298675, |
|
"learning_rate": 8.811344765304698e-06, |
|
"loss": 1.2262, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3040695016003658, |
|
"grad_norm": 0.6699975954695512, |
|
"learning_rate": 8.785392479378522e-06, |
|
"loss": 1.23, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.30635573845450387, |
|
"grad_norm": 0.6860546025784545, |
|
"learning_rate": 8.759199037394888e-06, |
|
"loss": 1.2424, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"grad_norm": 0.7598573834174616, |
|
"learning_rate": 8.732766108059814e-06, |
|
"loss": 1.2138, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.31092821216278005, |
|
"grad_norm": 0.723323270057115, |
|
"learning_rate": 8.70609537533634e-06, |
|
"loss": 1.2373, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.31321444901691814, |
|
"grad_norm": 0.6170455054157933, |
|
"learning_rate": 8.679188538337248e-06, |
|
"loss": 1.2257, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.31550068587105623, |
|
"grad_norm": 0.7413957440287698, |
|
"learning_rate": 8.652047311216823e-06, |
|
"loss": 1.2075, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3177869227251943, |
|
"grad_norm": 0.7424365012242525, |
|
"learning_rate": 8.62467342306164e-06, |
|
"loss": 1.2238, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3200731595793324, |
|
"grad_norm": 0.8566227798899636, |
|
"learning_rate": 8.597068617780419e-06, |
|
"loss": 1.2278, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3223593964334705, |
|
"grad_norm": 0.647075376724737, |
|
"learning_rate": 8.569234653992916e-06, |
|
"loss": 1.2407, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3246456332876086, |
|
"grad_norm": 0.6249088936722902, |
|
"learning_rate": 8.541173304917895e-06, |
|
"loss": 1.2231, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3269318701417467, |
|
"grad_norm": 0.70817264277616, |
|
"learning_rate": 8.512886358260162e-06, |
|
"loss": 1.2345, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.3292181069958848, |
|
"grad_norm": 0.5956107721750036, |
|
"learning_rate": 8.484375616096658e-06, |
|
"loss": 1.225, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.33150434385002286, |
|
"grad_norm": 0.6062042871270218, |
|
"learning_rate": 8.455642894761684e-06, |
|
"loss": 1.2185, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.33379058070416096, |
|
"grad_norm": 0.66611343630398, |
|
"learning_rate": 8.426690024731161e-06, |
|
"loss": 1.2171, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33607681755829905, |
|
"grad_norm": 0.6006939272932527, |
|
"learning_rate": 8.39751885050603e-06, |
|
"loss": 1.2168, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.33836305441243714, |
|
"grad_norm": 0.5888998376074026, |
|
"learning_rate": 8.36813123049474e-06, |
|
"loss": 1.2447, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3406492912665752, |
|
"grad_norm": 0.6170255283448466, |
|
"learning_rate": 8.338529036894855e-06, |
|
"loss": 1.2386, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3429355281207133, |
|
"grad_norm": 0.6592250171561639, |
|
"learning_rate": 8.308714155573785e-06, |
|
"loss": 1.2095, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3452217649748514, |
|
"grad_norm": 0.5948350472440084, |
|
"learning_rate": 8.278688485948634e-06, |
|
"loss": 1.2204, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3475080018289895, |
|
"grad_norm": 0.6884759018973265, |
|
"learning_rate": 8.248453940865204e-06, |
|
"loss": 1.2205, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3497942386831276, |
|
"grad_norm": 0.5629453296642776, |
|
"learning_rate": 8.218012446476128e-06, |
|
"loss": 1.2087, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3520804755372657, |
|
"grad_norm": 0.5703699859674032, |
|
"learning_rate": 8.187365942118162e-06, |
|
"loss": 1.2038, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.35436671239140377, |
|
"grad_norm": 0.5758055939006159, |
|
"learning_rate": 8.156516380188635e-06, |
|
"loss": 1.2015, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.35665294924554186, |
|
"grad_norm": 0.6814380489670292, |
|
"learning_rate": 8.125465726021068e-06, |
|
"loss": 1.2267, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.35893918609967995, |
|
"grad_norm": 0.58819101648096, |
|
"learning_rate": 8.09421595775997e-06, |
|
"loss": 1.2065, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.36122542295381804, |
|
"grad_norm": 0.599220106737159, |
|
"learning_rate": 8.062769066234807e-06, |
|
"loss": 1.2084, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3635116598079561, |
|
"grad_norm": 0.5687079813226833, |
|
"learning_rate": 8.031127054833192e-06, |
|
"loss": 1.2311, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.36579789666209417, |
|
"grad_norm": 0.6076443328436887, |
|
"learning_rate": 7.999291939373232e-06, |
|
"loss": 1.209, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.36808413351623226, |
|
"grad_norm": 0.5767468288489239, |
|
"learning_rate": 7.967265747975124e-06, |
|
"loss": 1.2153, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 0.6275130557605428, |
|
"learning_rate": 7.93505052093194e-06, |
|
"loss": 1.2206, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.37265660722450844, |
|
"grad_norm": 0.5920904031157348, |
|
"learning_rate": 7.90264831057965e-06, |
|
"loss": 1.2149, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.37494284407864653, |
|
"grad_norm": 0.5841477404583847, |
|
"learning_rate": 7.870061181166372e-06, |
|
"loss": 1.2134, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3772290809327846, |
|
"grad_norm": 0.545565275285448, |
|
"learning_rate": 7.837291208720867e-06, |
|
"loss": 1.2185, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3795153177869227, |
|
"grad_norm": 0.6183231148929101, |
|
"learning_rate": 7.804340480920274e-06, |
|
"loss": 1.2064, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3818015546410608, |
|
"grad_norm": 0.5801259298558049, |
|
"learning_rate": 7.771211096957125e-06, |
|
"loss": 1.2049, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3840877914951989, |
|
"grad_norm": 0.579347207611424, |
|
"learning_rate": 7.737905167405596e-06, |
|
"loss": 1.2185, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.386374028349337, |
|
"grad_norm": 0.6262921976973932, |
|
"learning_rate": 7.704424814087056e-06, |
|
"loss": 1.2137, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3886602652034751, |
|
"grad_norm": 0.6070706881138944, |
|
"learning_rate": 7.670772169934902e-06, |
|
"loss": 1.2177, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39094650205761317, |
|
"grad_norm": 0.5688216055326876, |
|
"learning_rate": 7.636949378858647e-06, |
|
"loss": 1.2016, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.39323273891175126, |
|
"grad_norm": 0.6166249078020826, |
|
"learning_rate": 7.602958595607375e-06, |
|
"loss": 1.1957, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39551897576588935, |
|
"grad_norm": 0.5778886288472463, |
|
"learning_rate": 7.568801985632439e-06, |
|
"loss": 1.2105, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.39780521262002744, |
|
"grad_norm": 0.6732218435967291, |
|
"learning_rate": 7.5344817249495195e-06, |
|
"loss": 1.2047, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.40009144947416553, |
|
"grad_norm": 0.672208759556888, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1854, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4023776863283036, |
|
"grad_norm": 0.6180565492464766, |
|
"learning_rate": 7.465359007511667e-06, |
|
"loss": 1.185, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4046639231824417, |
|
"grad_norm": 0.6266745151721254, |
|
"learning_rate": 7.430560954358764e-06, |
|
"loss": 1.2082, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4069501600365798, |
|
"grad_norm": 0.6163182978581346, |
|
"learning_rate": 7.395608057421406e-06, |
|
"loss": 1.2194, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4092363968907179, |
|
"grad_norm": 0.6262674693601461, |
|
"learning_rate": 7.360502543444339e-06, |
|
"loss": 1.2188, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.411522633744856, |
|
"grad_norm": 0.5549642780561265, |
|
"learning_rate": 7.325246648895089e-06, |
|
"loss": 1.1986, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41380887059899407, |
|
"grad_norm": 0.5540368046559051, |
|
"learning_rate": 7.289842619821475e-06, |
|
"loss": 1.2175, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.41609510745313216, |
|
"grad_norm": 0.587023330497459, |
|
"learning_rate": 7.254292711708529e-06, |
|
"loss": 1.2029, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.41838134430727025, |
|
"grad_norm": 0.5513581130094706, |
|
"learning_rate": 7.218599189334799e-06, |
|
"loss": 1.2009, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.42066758116140834, |
|
"grad_norm": 0.7237520794327035, |
|
"learning_rate": 7.182764326628068e-06, |
|
"loss": 1.2063, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.42295381801554643, |
|
"grad_norm": 0.5476819110298711, |
|
"learning_rate": 7.146790406520491e-06, |
|
"loss": 1.2107, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4252400548696845, |
|
"grad_norm": 0.5753924094787153, |
|
"learning_rate": 7.1106797208031554e-06, |
|
"loss": 1.2133, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4275262917238226, |
|
"grad_norm": 0.6489054914059448, |
|
"learning_rate": 7.0744345699800755e-06, |
|
"loss": 1.1991, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.42981252857796065, |
|
"grad_norm": 0.6239602498665449, |
|
"learning_rate": 7.038057263121639e-06, |
|
"loss": 1.1937, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.43209876543209874, |
|
"grad_norm": 0.5954140813357963, |
|
"learning_rate": 7.001550117717499e-06, |
|
"loss": 1.2092, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.43438500228623683, |
|
"grad_norm": 0.5953175778315464, |
|
"learning_rate": 6.9649154595289326e-06, |
|
"loss": 1.1957, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4366712391403749, |
|
"grad_norm": 0.6030938627687562, |
|
"learning_rate": 6.92815562244068e-06, |
|
"loss": 1.1827, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.438957475994513, |
|
"grad_norm": 0.6882999466791362, |
|
"learning_rate": 6.891272948312251e-06, |
|
"loss": 1.2102, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4412437128486511, |
|
"grad_norm": 0.6080281045836577, |
|
"learning_rate": 6.854269786828741e-06, |
|
"loss": 1.2093, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4435299497027892, |
|
"grad_norm": 0.756192409869553, |
|
"learning_rate": 6.817148495351131e-06, |
|
"loss": 1.2159, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4458161865569273, |
|
"grad_norm": 0.5892520162590819, |
|
"learning_rate": 6.779911438766117e-06, |
|
"loss": 1.193, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4481024234110654, |
|
"grad_norm": 0.6265917897470434, |
|
"learning_rate": 6.742560989335438e-06, |
|
"loss": 1.1951, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.45038866026520347, |
|
"grad_norm": 0.5927415516536023, |
|
"learning_rate": 6.705099526544757e-06, |
|
"loss": 1.1973, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.45267489711934156, |
|
"grad_norm": 0.5602604942191215, |
|
"learning_rate": 6.667529436952064e-06, |
|
"loss": 1.1945, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.45496113397347965, |
|
"grad_norm": 0.751574883051813, |
|
"learning_rate": 6.629853114035643e-06, |
|
"loss": 1.2134, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.45724737082761774, |
|
"grad_norm": 0.6000318274839507, |
|
"learning_rate": 6.5920729580415795e-06, |
|
"loss": 1.2104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.45953360768175583, |
|
"grad_norm": 0.5783065549399249, |
|
"learning_rate": 6.554191375830861e-06, |
|
"loss": 1.2016, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.4618198445358939, |
|
"grad_norm": 0.5751980188798808, |
|
"learning_rate": 6.516210780726032e-06, |
|
"loss": 1.1794, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.464106081390032, |
|
"grad_norm": 0.6096335885035103, |
|
"learning_rate": 6.478133592357455e-06, |
|
"loss": 1.1816, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4663923182441701, |
|
"grad_norm": 0.5848690144740822, |
|
"learning_rate": 6.43996223650916e-06, |
|
"loss": 1.1735, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4686785550983082, |
|
"grad_norm": 0.6273777569367492, |
|
"learning_rate": 6.401699144964306e-06, |
|
"loss": 1.1864, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4709647919524463, |
|
"grad_norm": 0.5772389229176554, |
|
"learning_rate": 6.3633467553502625e-06, |
|
"loss": 1.1953, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4732510288065844, |
|
"grad_norm": 0.6320660706578101, |
|
"learning_rate": 6.32490751098331e-06, |
|
"loss": 1.1778, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.47553726566072246, |
|
"grad_norm": 0.628014857385664, |
|
"learning_rate": 6.286383860712982e-06, |
|
"loss": 1.1978, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.47782350251486055, |
|
"grad_norm": 0.6165011857453245, |
|
"learning_rate": 6.247778258766069e-06, |
|
"loss": 1.1783, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.48010973936899864, |
|
"grad_norm": 0.6680859473813631, |
|
"learning_rate": 6.209093164590253e-06, |
|
"loss": 1.1883, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48239597622313674, |
|
"grad_norm": 0.6230269069079273, |
|
"learning_rate": 6.170331042697425e-06, |
|
"loss": 1.1923, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.4846822130772748, |
|
"grad_norm": 0.6472681484163015, |
|
"learning_rate": 6.131494362506693e-06, |
|
"loss": 1.1826, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4869684499314129, |
|
"grad_norm": 0.6799978087591872, |
|
"learning_rate": 6.09258559818704e-06, |
|
"loss": 1.1829, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.489254686785551, |
|
"grad_norm": 0.5617426984448537, |
|
"learning_rate": 6.053607228499719e-06, |
|
"loss": 1.1941, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4915409236396891, |
|
"grad_norm": 0.6444058153599652, |
|
"learning_rate": 6.014561736640334e-06, |
|
"loss": 1.2, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 0.6016265988080601, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 1.1655, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4961133973479652, |
|
"grad_norm": 0.7053148286233416, |
|
"learning_rate": 5.936279340410082e-06, |
|
"loss": 1.172, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.4983996342021033, |
|
"grad_norm": 0.5586357561653685, |
|
"learning_rate": 5.8970474231770445e-06, |
|
"loss": 1.1922, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5006858710562414, |
|
"grad_norm": 0.7895760074140119, |
|
"learning_rate": 5.857758357729892e-06, |
|
"loss": 1.1839, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5029721079103795, |
|
"grad_norm": 0.7313666592611404, |
|
"learning_rate": 5.8184146470577265e-06, |
|
"loss": 1.1813, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5052583447645176, |
|
"grad_norm": 0.6067591576327228, |
|
"learning_rate": 5.779018797630934e-06, |
|
"loss": 1.1855, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5075445816186557, |
|
"grad_norm": 0.6144330199450508, |
|
"learning_rate": 5.739573319241505e-06, |
|
"loss": 1.1924, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5098308184727938, |
|
"grad_norm": 0.6075048668745815, |
|
"learning_rate": 5.7000807248431466e-06, |
|
"loss": 1.1783, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5121170553269319, |
|
"grad_norm": 0.6763365315316732, |
|
"learning_rate": 5.66054353039118e-06, |
|
"loss": 1.1873, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.51440329218107, |
|
"grad_norm": 0.652936999197392, |
|
"learning_rate": 5.620964254682267e-06, |
|
"loss": 1.2019, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.516689529035208, |
|
"grad_norm": 0.7510930690144121, |
|
"learning_rate": 5.58134541919394e-06, |
|
"loss": 1.1863, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5189757658893461, |
|
"grad_norm": 0.7485282723991191, |
|
"learning_rate": 5.5416895479239665e-06, |
|
"loss": 1.1878, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5212620027434842, |
|
"grad_norm": 0.6650793765929232, |
|
"learning_rate": 5.501999167229554e-06, |
|
"loss": 1.1844, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5235482395976223, |
|
"grad_norm": 0.6617004106280673, |
|
"learning_rate": 5.4622768056664e-06, |
|
"loss": 1.1819, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5258344764517604, |
|
"grad_norm": 0.639306148093516, |
|
"learning_rate": 5.42252499382761e-06, |
|
"loss": 1.1844, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5281207133058985, |
|
"grad_norm": 0.590573720499581, |
|
"learning_rate": 5.38274626418248e-06, |
|
"loss": 1.1848, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5304069501600366, |
|
"grad_norm": 0.625235396788826, |
|
"learning_rate": 5.3429431509151515e-06, |
|
"loss": 1.1904, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5326931870141747, |
|
"grad_norm": 0.5840052674712635, |
|
"learning_rate": 5.303118189763187e-06, |
|
"loss": 1.1829, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5349794238683128, |
|
"grad_norm": 0.5940842973816081, |
|
"learning_rate": 5.263273917856e-06, |
|
"loss": 1.1774, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5372656607224509, |
|
"grad_norm": 0.5991239115995499, |
|
"learning_rate": 5.22341287355324e-06, |
|
"loss": 1.1857, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.539551897576589, |
|
"grad_norm": 0.6248756548437343, |
|
"learning_rate": 5.183537596283075e-06, |
|
"loss": 1.1799, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.541838134430727, |
|
"grad_norm": 0.6023807247895316, |
|
"learning_rate": 5.143650626380417e-06, |
|
"loss": 1.1858, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5441243712848651, |
|
"grad_norm": 0.6101959497751839, |
|
"learning_rate": 5.103754504925071e-06, |
|
"loss": 1.1961, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5464106081390032, |
|
"grad_norm": 0.569676114190435, |
|
"learning_rate": 5.06385177357987e-06, |
|
"loss": 1.1766, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5486968449931413, |
|
"grad_norm": 0.5819652008689743, |
|
"learning_rate": 5.023944974428739e-06, |
|
"loss": 1.1734, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5509830818472794, |
|
"grad_norm": 0.5661449507234365, |
|
"learning_rate": 4.9840366498147495e-06, |
|
"loss": 1.1908, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5532693187014175, |
|
"grad_norm": 0.6109491726102372, |
|
"learning_rate": 4.944129342178156e-06, |
|
"loss": 1.1784, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.5811074689104263, |
|
"learning_rate": 4.90422559389443e-06, |
|
"loss": 1.1746, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5578417924096937, |
|
"grad_norm": 0.6060458081756667, |
|
"learning_rate": 4.864327947112281e-06, |
|
"loss": 1.195, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5601280292638318, |
|
"grad_norm": 0.6226718536570417, |
|
"learning_rate": 4.82443894359171e-06, |
|
"loss": 1.1786, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5624142661179699, |
|
"grad_norm": 0.5995864510713481, |
|
"learning_rate": 4.784561124542088e-06, |
|
"loss": 1.1791, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5647005029721079, |
|
"grad_norm": 0.5701958838449743, |
|
"learning_rate": 4.744697030460248e-06, |
|
"loss": 1.1647, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.566986739826246, |
|
"grad_norm": 0.6293939505655973, |
|
"learning_rate": 4.7048492009686525e-06, |
|
"loss": 1.1692, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5692729766803841, |
|
"grad_norm": 0.6850447194966206, |
|
"learning_rate": 4.6650201746535926e-06, |
|
"loss": 1.1673, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5715592135345222, |
|
"grad_norm": 0.6040120516739561, |
|
"learning_rate": 4.625212488903467e-06, |
|
"loss": 1.1834, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5738454503886603, |
|
"grad_norm": 0.5686706476550618, |
|
"learning_rate": 4.585428679747133e-06, |
|
"loss": 1.1716, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5761316872427984, |
|
"grad_norm": 0.5946931657837966, |
|
"learning_rate": 4.545671281692331e-06, |
|
"loss": 1.1705, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5784179240969365, |
|
"grad_norm": 0.6120143356512502, |
|
"learning_rate": 4.505942827564242e-06, |
|
"loss": 1.1807, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5807041609510746, |
|
"grad_norm": 0.6341171747185648, |
|
"learning_rate": 4.466245848344106e-06, |
|
"loss": 1.1839, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5829903978052127, |
|
"grad_norm": 0.6494090868678567, |
|
"learning_rate": 4.426582873007999e-06, |
|
"loss": 1.1684, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5852766346593508, |
|
"grad_norm": 0.6252524175950205, |
|
"learning_rate": 4.386956428365701e-06, |
|
"loss": 1.1878, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5875628715134888, |
|
"grad_norm": 0.5911175497758677, |
|
"learning_rate": 4.347369038899744e-06, |
|
"loss": 1.1828, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5898491083676269, |
|
"grad_norm": 0.5988939599453593, |
|
"learning_rate": 4.307823226604555e-06, |
|
"loss": 1.1735, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5921353452217649, |
|
"grad_norm": 0.5813355536422021, |
|
"learning_rate": 4.2683215108258145e-06, |
|
"loss": 1.1706, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.594421582075903, |
|
"grad_norm": 0.6208043705991068, |
|
"learning_rate": 4.228866408099945e-06, |
|
"loss": 1.1907, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5967078189300411, |
|
"grad_norm": 0.6512006631857741, |
|
"learning_rate": 4.189460431993788e-06, |
|
"loss": 1.1951, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5989940557841792, |
|
"grad_norm": 0.5845471180993255, |
|
"learning_rate": 4.150106092944475e-06, |
|
"loss": 1.1717, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6012802926383173, |
|
"grad_norm": 0.5949045334275538, |
|
"learning_rate": 4.110805898099492e-06, |
|
"loss": 1.1833, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6035665294924554, |
|
"grad_norm": 0.5971913414181261, |
|
"learning_rate": 4.071562351156966e-06, |
|
"loss": 1.1786, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6058527663465935, |
|
"grad_norm": 0.6178601149254982, |
|
"learning_rate": 4.032377952206148e-06, |
|
"loss": 1.1793, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6081390032007316, |
|
"grad_norm": 0.6046188006147395, |
|
"learning_rate": 3.993255197568154e-06, |
|
"loss": 1.169, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6104252400548696, |
|
"grad_norm": 0.5919458656130715, |
|
"learning_rate": 3.954196579636918e-06, |
|
"loss": 1.1692, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6127114769090077, |
|
"grad_norm": 0.5727049539306068, |
|
"learning_rate": 3.91520458672042e-06, |
|
"loss": 1.1747, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6149977137631458, |
|
"grad_norm": 0.6040809405921704, |
|
"learning_rate": 3.876281702882156e-06, |
|
"loss": 1.1935, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 0.5747789602798682, |
|
"learning_rate": 3.837430407782896e-06, |
|
"loss": 1.175, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.619570187471422, |
|
"grad_norm": 0.6001909994942644, |
|
"learning_rate": 3.7986531765226965e-06, |
|
"loss": 1.1718, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6218564243255601, |
|
"grad_norm": 0.5499338552551708, |
|
"learning_rate": 3.759952479483232e-06, |
|
"loss": 1.1615, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6241426611796982, |
|
"grad_norm": 0.62697610396954, |
|
"learning_rate": 3.7213307821704115e-06, |
|
"loss": 1.1616, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6264288980338363, |
|
"grad_norm": 0.637904015143814, |
|
"learning_rate": 3.6827905450573022e-06, |
|
"loss": 1.1784, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6287151348879744, |
|
"grad_norm": 0.6235229612947039, |
|
"learning_rate": 3.6443342234273905e-06, |
|
"loss": 1.1674, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6310013717421125, |
|
"grad_norm": 0.744429415227132, |
|
"learning_rate": 3.6059642672181537e-06, |
|
"loss": 1.1678, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6332876085962506, |
|
"grad_norm": 0.5903117671660288, |
|
"learning_rate": 3.5676831208649887e-06, |
|
"loss": 1.1661, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6355738454503886, |
|
"grad_norm": 0.5977435348831742, |
|
"learning_rate": 3.5294932231454838e-06, |
|
"loss": 1.1655, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6378600823045267, |
|
"grad_norm": 0.6262251229258455, |
|
"learning_rate": 3.4913970070240388e-06, |
|
"loss": 1.1827, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6401463191586648, |
|
"grad_norm": 0.6039362156672261, |
|
"learning_rate": 3.4533968994968913e-06, |
|
"loss": 1.162, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6424325560128029, |
|
"grad_norm": 0.610471777862986, |
|
"learning_rate": 3.41549532143748e-06, |
|
"loss": 1.1719, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.644718792866941, |
|
"grad_norm": 0.6124948412563855, |
|
"learning_rate": 3.3776946874422268e-06, |
|
"loss": 1.161, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6470050297210791, |
|
"grad_norm": 0.596054515528405, |
|
"learning_rate": 3.3399974056767095e-06, |
|
"loss": 1.1677, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6492912665752172, |
|
"grad_norm": 0.6199519548446956, |
|
"learning_rate": 3.30240587772224e-06, |
|
"loss": 1.1731, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6515775034293553, |
|
"grad_norm": 0.6123382818220521, |
|
"learning_rate": 3.2649224984228756e-06, |
|
"loss": 1.1751, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6538637402834934, |
|
"grad_norm": 0.6521756883889377, |
|
"learning_rate": 3.227549655732843e-06, |
|
"loss": 1.1746, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6561499771376315, |
|
"grad_norm": 0.6292502440238857, |
|
"learning_rate": 3.19028973056441e-06, |
|
"loss": 1.1796, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6584362139917695, |
|
"grad_norm": 0.7223300006546375, |
|
"learning_rate": 3.153145096636211e-06, |
|
"loss": 1.1769, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6607224508459076, |
|
"grad_norm": 0.6123252900962536, |
|
"learning_rate": 3.1161181203220146e-06, |
|
"loss": 1.1798, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6630086877000457, |
|
"grad_norm": 0.6176590524451245, |
|
"learning_rate": 3.079211160499975e-06, |
|
"loss": 1.1628, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6652949245541838, |
|
"grad_norm": 0.6851380779593121, |
|
"learning_rate": 3.0424265684023556e-06, |
|
"loss": 1.1621, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6675811614083219, |
|
"grad_norm": 0.6135186798564677, |
|
"learning_rate": 3.0057666874657365e-06, |
|
"loss": 1.1817, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.66986739826246, |
|
"grad_norm": 0.6162664151552476, |
|
"learning_rate": 2.9692338531817205e-06, |
|
"loss": 1.1621, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6721536351165981, |
|
"grad_norm": 0.6209879083469707, |
|
"learning_rate": 2.9328303929481507e-06, |
|
"loss": 1.1788, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6744398719707362, |
|
"grad_norm": 0.6564960801220917, |
|
"learning_rate": 2.8965586259208295e-06, |
|
"loss": 1.1497, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6767261088248743, |
|
"grad_norm": 0.6100366044161921, |
|
"learning_rate": 2.860420862865787e-06, |
|
"loss": 1.1641, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6790123456790124, |
|
"grad_norm": 0.6401282278697755, |
|
"learning_rate": 2.82441940601205e-06, |
|
"loss": 1.1647, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6812985825331505, |
|
"grad_norm": 0.5948814066139619, |
|
"learning_rate": 2.7885565489049948e-06, |
|
"loss": 1.1862, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6835848193872885, |
|
"grad_norm": 0.575891260626997, |
|
"learning_rate": 2.7528345762602125e-06, |
|
"loss": 1.149, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.6858710562414266, |
|
"grad_norm": 0.6321328549868929, |
|
"learning_rate": 2.7172557638179674e-06, |
|
"loss": 1.1722, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6881572930955647, |
|
"grad_norm": 0.620537429422375, |
|
"learning_rate": 2.681822378198221e-06, |
|
"loss": 1.1667, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6904435299497028, |
|
"grad_norm": 0.5916688359774108, |
|
"learning_rate": 2.6465366767562162e-06, |
|
"loss": 1.1742, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6927297668038409, |
|
"grad_norm": 0.649532932905328, |
|
"learning_rate": 2.611400907438685e-06, |
|
"loss": 1.1664, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.695016003657979, |
|
"grad_norm": 0.5887639490410209, |
|
"learning_rate": 2.5764173086406306e-06, |
|
"loss": 1.1684, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6973022405121171, |
|
"grad_norm": 0.5909674256777088, |
|
"learning_rate": 2.5415881090627227e-06, |
|
"loss": 1.1681, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6995884773662552, |
|
"grad_norm": 0.6669572713903603, |
|
"learning_rate": 2.506915527569318e-06, |
|
"loss": 1.1692, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7018747142203933, |
|
"grad_norm": 0.6291006193664693, |
|
"learning_rate": 2.472401773047107e-06, |
|
"loss": 1.1707, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.7041609510745314, |
|
"grad_norm": 0.6241336853751712, |
|
"learning_rate": 2.438049044264382e-06, |
|
"loss": 1.1763, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7064471879286695, |
|
"grad_norm": 0.6233093811845397, |
|
"learning_rate": 2.4038595297309712e-06, |
|
"loss": 1.1595, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.7087334247828075, |
|
"grad_norm": 0.6099376654855213, |
|
"learning_rate": 2.3698354075588105e-06, |
|
"loss": 1.1815, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7110196616369456, |
|
"grad_norm": 0.608739940642273, |
|
"learning_rate": 2.3359788453231723e-06, |
|
"loss": 1.1558, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.7133058984910837, |
|
"grad_norm": 0.6060804682823651, |
|
"learning_rate": 2.3022919999245964e-06, |
|
"loss": 1.1737, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7155921353452218, |
|
"grad_norm": 0.6554029837627439, |
|
"learning_rate": 2.2687770174514674e-06, |
|
"loss": 1.1763, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.7178783721993599, |
|
"grad_norm": 0.6199763037940721, |
|
"learning_rate": 2.23543603304329e-06, |
|
"loss": 1.1668, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.720164609053498, |
|
"grad_norm": 0.7002533112076955, |
|
"learning_rate": 2.20227117075468e-06, |
|
"loss": 1.1717, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7224508459076361, |
|
"grad_norm": 0.5685258465602809, |
|
"learning_rate": 2.1692845434200323e-06, |
|
"loss": 1.1793, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7247370827617741, |
|
"grad_norm": 0.5988803647429354, |
|
"learning_rate": 2.136478252518924e-06, |
|
"loss": 1.1762, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7270233196159122, |
|
"grad_norm": 0.6220944262982843, |
|
"learning_rate": 2.103854388042243e-06, |
|
"loss": 1.1732, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7293095564700502, |
|
"grad_norm": 0.5872374752551915, |
|
"learning_rate": 2.071415028359026e-06, |
|
"loss": 1.1653, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7315957933241883, |
|
"grad_norm": 0.6315378201627972, |
|
"learning_rate": 2.0391622400840665e-06, |
|
"loss": 1.1631, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7338820301783264, |
|
"grad_norm": 0.6166479295990325, |
|
"learning_rate": 2.0070980779462513e-06, |
|
"loss": 1.1632, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7361682670324645, |
|
"grad_norm": 0.6082820756952414, |
|
"learning_rate": 1.975224584657648e-06, |
|
"loss": 1.1609, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7384545038866026, |
|
"grad_norm": 0.5711567863660318, |
|
"learning_rate": 1.943543790783392e-06, |
|
"loss": 1.1629, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 0.5934876997772376, |
|
"learning_rate": 1.9120577146123125e-06, |
|
"loss": 1.1711, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7430269775948788, |
|
"grad_norm": 0.6044258229955937, |
|
"learning_rate": 1.8807683620283496e-06, |
|
"loss": 1.1792, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7453132144490169, |
|
"grad_norm": 0.6414108282805848, |
|
"learning_rate": 1.8496777263827775e-06, |
|
"loss": 1.1909, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.747599451303155, |
|
"grad_norm": 0.5928077840962543, |
|
"learning_rate": 1.8187877883672024e-06, |
|
"loss": 1.177, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.7498856881572931, |
|
"grad_norm": 0.5674967348667851, |
|
"learning_rate": 1.7881005158873826e-06, |
|
"loss": 1.1698, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7521719250114312, |
|
"grad_norm": 0.6190325214784786, |
|
"learning_rate": 1.757617863937865e-06, |
|
"loss": 1.1564, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7544581618655692, |
|
"grad_norm": 0.5994621485851359, |
|
"learning_rate": 1.7273417744774323e-06, |
|
"loss": 1.1682, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7567443987197073, |
|
"grad_norm": 0.6486512119864596, |
|
"learning_rate": 1.6972741763053835e-06, |
|
"loss": 1.1695, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7590306355738454, |
|
"grad_norm": 0.6124244446703457, |
|
"learning_rate": 1.6674169849386606e-06, |
|
"loss": 1.1735, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7613168724279835, |
|
"grad_norm": 0.6215393083401685, |
|
"learning_rate": 1.6377721024898214e-06, |
|
"loss": 1.1611, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7636031092821216, |
|
"grad_norm": 0.6379465283211975, |
|
"learning_rate": 1.608341417545849e-06, |
|
"loss": 1.1481, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7658893461362597, |
|
"grad_norm": 0.5646658898706897, |
|
"learning_rate": 1.5791268050478487e-06, |
|
"loss": 1.1732, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7681755829903978, |
|
"grad_norm": 0.6028441016085894, |
|
"learning_rate": 1.5501301261715896e-06, |
|
"loss": 1.1703, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7704618198445359, |
|
"grad_norm": 0.6313316478647917, |
|
"learning_rate": 1.5213532282089466e-06, |
|
"loss": 1.1631, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.772748056698674, |
|
"grad_norm": 0.600237347487572, |
|
"learning_rate": 1.4927979444502028e-06, |
|
"loss": 1.1642, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7750342935528121, |
|
"grad_norm": 0.5957448361281138, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 1.1668, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7773205304069501, |
|
"grad_norm": 0.5872437663700951, |
|
"learning_rate": 1.4363594819977606e-06, |
|
"loss": 1.1707, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7796067672610882, |
|
"grad_norm": 0.7075549655922131, |
|
"learning_rate": 1.4084798988300684e-06, |
|
"loss": 1.1723, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7818930041152263, |
|
"grad_norm": 0.6203199463017092, |
|
"learning_rate": 1.3808291206892232e-06, |
|
"loss": 1.1668, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7841792409693644, |
|
"grad_norm": 0.5759538308213393, |
|
"learning_rate": 1.3534089091237757e-06, |
|
"loss": 1.1598, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7864654778235025, |
|
"grad_norm": 0.5942123152988342, |
|
"learning_rate": 1.3262210109935719e-06, |
|
"loss": 1.1699, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7887517146776406, |
|
"grad_norm": 0.6597153339968819, |
|
"learning_rate": 1.2992671583584587e-06, |
|
"loss": 1.163, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7910379515317787, |
|
"grad_norm": 0.5994756887911626, |
|
"learning_rate": 1.2725490683679458e-06, |
|
"loss": 1.1797, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7933241883859168, |
|
"grad_norm": 0.5942174681280669, |
|
"learning_rate": 1.2460684431518055e-06, |
|
"loss": 1.1649, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.7956104252400549, |
|
"grad_norm": 0.5884403788886147, |
|
"learning_rate": 1.2198269697116416e-06, |
|
"loss": 1.1627, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.797896662094193, |
|
"grad_norm": 0.5917506875732326, |
|
"learning_rate": 1.1938263198134087e-06, |
|
"loss": 1.1729, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.8001828989483311, |
|
"grad_norm": 0.5689945244963683, |
|
"learning_rate": 1.168068149880912e-06, |
|
"loss": 1.1639, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8024691358024691, |
|
"grad_norm": 0.5945700377730089, |
|
"learning_rate": 1.1425541008902852e-06, |
|
"loss": 1.1616, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.8047553726566072, |
|
"grad_norm": 0.5960318855848052, |
|
"learning_rate": 1.1172857982654445e-06, |
|
"loss": 1.1796, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8070416095107453, |
|
"grad_norm": 0.606906781862042, |
|
"learning_rate": 1.092264851774536e-06, |
|
"loss": 1.1524, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.8093278463648834, |
|
"grad_norm": 0.6686014083887466, |
|
"learning_rate": 1.067492855427385e-06, |
|
"loss": 1.1681, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8116140832190215, |
|
"grad_norm": 0.6637295349703526, |
|
"learning_rate": 1.0429713873739505e-06, |
|
"loss": 1.1603, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8139003200731596, |
|
"grad_norm": 0.5937746781646984, |
|
"learning_rate": 1.0187020098037759e-06, |
|
"loss": 1.1577, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8161865569272977, |
|
"grad_norm": 0.6154438358761861, |
|
"learning_rate": 9.946862688464753e-07, |
|
"loss": 1.1596, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.8184727937814358, |
|
"grad_norm": 0.6511739287376433, |
|
"learning_rate": 9.709256944732343e-07, |
|
"loss": 1.1707, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8207590306355739, |
|
"grad_norm": 0.6174881374069865, |
|
"learning_rate": 9.474218003993275e-07, |
|
"loss": 1.1775, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.823045267489712, |
|
"grad_norm": 0.5791204684491382, |
|
"learning_rate": 9.241760839877023e-07, |
|
"loss": 1.1571, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.82533150434385, |
|
"grad_norm": 0.6464260391976697, |
|
"learning_rate": 9.011900261535767e-07, |
|
"loss": 1.1713, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.8276177411979881, |
|
"grad_norm": 0.6102288143326278, |
|
"learning_rate": 8.784650912700909e-07, |
|
"loss": 1.1654, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8299039780521262, |
|
"grad_norm": 0.6226743471510658, |
|
"learning_rate": 8.560027270750276e-07, |
|
"loss": 1.1655, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8321902149062643, |
|
"grad_norm": 0.6079710775307922, |
|
"learning_rate": 8.338043645785698e-07, |
|
"loss": 1.1669, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8344764517604024, |
|
"grad_norm": 0.6077180347148399, |
|
"learning_rate": 8.118714179721404e-07, |
|
"loss": 1.1529, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8367626886145405, |
|
"grad_norm": 0.6420590181680129, |
|
"learning_rate": 7.902052845383112e-07, |
|
"loss": 1.1662, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8390489254686786, |
|
"grad_norm": 0.5675937752707487, |
|
"learning_rate": 7.6880734456178e-07, |
|
"loss": 1.1638, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8413351623228167, |
|
"grad_norm": 0.5963600943686237, |
|
"learning_rate": 7.476789612414414e-07, |
|
"loss": 1.1648, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8436213991769548, |
|
"grad_norm": 0.6248451529177521, |
|
"learning_rate": 7.268214806035423e-07, |
|
"loss": 1.1704, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8459076360310929, |
|
"grad_norm": 0.6582130785897107, |
|
"learning_rate": 7.062362314159211e-07, |
|
"loss": 1.1716, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.848193872885231, |
|
"grad_norm": 0.6104979563533071, |
|
"learning_rate": 6.859245251033697e-07, |
|
"loss": 1.1551, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.850480109739369, |
|
"grad_norm": 0.6291505363028616, |
|
"learning_rate": 6.658876556640781e-07, |
|
"loss": 1.1606, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8527663465935071, |
|
"grad_norm": 0.626351910055198, |
|
"learning_rate": 6.461268995871967e-07, |
|
"loss": 1.1648, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8550525834476452, |
|
"grad_norm": 0.5991977091276379, |
|
"learning_rate": 6.266435157715222e-07, |
|
"loss": 1.1403, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8573388203017832, |
|
"grad_norm": 0.6133109082285381, |
|
"learning_rate": 6.074387454452891e-07, |
|
"loss": 1.1578, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8596250571559213, |
|
"grad_norm": 0.6062420232877472, |
|
"learning_rate": 5.885138120870965e-07, |
|
"loss": 1.1422, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8619112940100594, |
|
"grad_norm": 0.5920619164293491, |
|
"learning_rate": 5.698699213479697e-07, |
|
"loss": 1.1503, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8641975308641975, |
|
"grad_norm": 0.6179934405963249, |
|
"learning_rate": 5.515082609745465e-07, |
|
"loss": 1.1728, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8664837677183356, |
|
"grad_norm": 0.6191884681224713, |
|
"learning_rate": 5.334300007334065e-07, |
|
"loss": 1.1514, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8687700045724737, |
|
"grad_norm": 0.6148818189812965, |
|
"learning_rate": 5.156362923365587e-07, |
|
"loss": 1.1772, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8710562414266118, |
|
"grad_norm": 0.5927964681781609, |
|
"learning_rate": 4.981282693680584e-07, |
|
"loss": 1.1747, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8733424782807498, |
|
"grad_norm": 0.630038523819453, |
|
"learning_rate": 4.80907047211796e-07, |
|
"loss": 1.1638, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8756287151348879, |
|
"grad_norm": 0.5822419290829026, |
|
"learning_rate": 4.639737229804403e-07, |
|
"loss": 1.1667, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.877914951989026, |
|
"grad_norm": 0.6169634205827448, |
|
"learning_rate": 4.473293754455399e-07, |
|
"loss": 1.1695, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8802011888431641, |
|
"grad_norm": 0.5892947845386679, |
|
"learning_rate": 4.3097506496880325e-07, |
|
"loss": 1.1684, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8824874256973022, |
|
"grad_norm": 0.6796811793089527, |
|
"learning_rate": 4.149118334345403e-07, |
|
"loss": 1.1604, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8847736625514403, |
|
"grad_norm": 0.5951100132603444, |
|
"learning_rate": 3.9914070418329123e-07, |
|
"loss": 1.1632, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8870598994055784, |
|
"grad_norm": 0.6710610553022762, |
|
"learning_rate": 3.836626819466338e-07, |
|
"loss": 1.1455, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8893461362597165, |
|
"grad_norm": 0.6128779790737046, |
|
"learning_rate": 3.684787527831707e-07, |
|
"loss": 1.1609, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8916323731138546, |
|
"grad_norm": 0.5800567298586133, |
|
"learning_rate": 3.53589884015712e-07, |
|
"loss": 1.1636, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8939186099679927, |
|
"grad_norm": 0.5600191099569565, |
|
"learning_rate": 3.3899702416965166e-07, |
|
"loss": 1.1721, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8962048468221308, |
|
"grad_norm": 0.5964683215562515, |
|
"learning_rate": 3.247011029125391e-07, |
|
"loss": 1.1508, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8984910836762688, |
|
"grad_norm": 0.6125213377358303, |
|
"learning_rate": 3.1070303099485055e-07, |
|
"loss": 1.1716, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.9007773205304069, |
|
"grad_norm": 0.5812964318078312, |
|
"learning_rate": 2.9700370019197287e-07, |
|
"loss": 1.1495, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.903063557384545, |
|
"grad_norm": 0.5947330421470328, |
|
"learning_rate": 2.8360398324738415e-07, |
|
"loss": 1.1446, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9053497942386831, |
|
"grad_norm": 0.5936630268160432, |
|
"learning_rate": 2.7050473381706186e-07, |
|
"loss": 1.1519, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9076360310928212, |
|
"grad_norm": 0.6228979256825669, |
|
"learning_rate": 2.577067864150906e-07, |
|
"loss": 1.1688, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.9099222679469593, |
|
"grad_norm": 0.6500515468078818, |
|
"learning_rate": 2.452109563605065e-07, |
|
"loss": 1.1718, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9122085048010974, |
|
"grad_norm": 0.568112374463465, |
|
"learning_rate": 2.330180397253473e-07, |
|
"loss": 1.169, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.9144947416552355, |
|
"grad_norm": 0.6014335143268985, |
|
"learning_rate": 2.2112881328394287e-07, |
|
"loss": 1.1556, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9167809785093736, |
|
"grad_norm": 0.5814781144236604, |
|
"learning_rate": 2.0954403446342753e-07, |
|
"loss": 1.1688, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.9190672153635117, |
|
"grad_norm": 0.6269697024329176, |
|
"learning_rate": 1.9826444129548317e-07, |
|
"loss": 1.1791, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9213534522176497, |
|
"grad_norm": 0.5793724546294099, |
|
"learning_rate": 1.8729075236932903e-07, |
|
"loss": 1.1736, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.9236396890717878, |
|
"grad_norm": 0.5757028817840649, |
|
"learning_rate": 1.7662366678593502e-07, |
|
"loss": 1.1674, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 0.6383512892284545, |
|
"learning_rate": 1.6626386411348783e-07, |
|
"loss": 1.1725, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.928212162780064, |
|
"grad_norm": 0.6064267969457637, |
|
"learning_rate": 1.56212004344099e-07, |
|
"loss": 1.1596, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9304983996342021, |
|
"grad_norm": 0.6046327277263103, |
|
"learning_rate": 1.4646872785175182e-07, |
|
"loss": 1.1616, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9327846364883402, |
|
"grad_norm": 0.611959733363112, |
|
"learning_rate": 1.3703465535151505e-07, |
|
"loss": 1.1614, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9350708733424783, |
|
"grad_norm": 0.6153837948383357, |
|
"learning_rate": 1.2791038785999243e-07, |
|
"loss": 1.1494, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9373571101966164, |
|
"grad_norm": 0.5507733416769363, |
|
"learning_rate": 1.1909650665703265e-07, |
|
"loss": 1.1331, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9396433470507545, |
|
"grad_norm": 0.5787602661155832, |
|
"learning_rate": 1.1059357324870456e-07, |
|
"loss": 1.1548, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9419295839048926, |
|
"grad_norm": 0.5848374134615248, |
|
"learning_rate": 1.024021293315175e-07, |
|
"loss": 1.1628, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9442158207590307, |
|
"grad_norm": 0.585861722501522, |
|
"learning_rate": 9.452269675791603e-08, |
|
"loss": 1.1424, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9465020576131687, |
|
"grad_norm": 0.5870866242087308, |
|
"learning_rate": 8.69557775030344e-08, |
|
"loss": 1.181, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9487882944673068, |
|
"grad_norm": 0.5917858310575264, |
|
"learning_rate": 7.970185363271432e-08, |
|
"loss": 1.1564, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9510745313214449, |
|
"grad_norm": 0.6272259568011471, |
|
"learning_rate": 7.276138727279669e-08, |
|
"loss": 1.1659, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.953360768175583, |
|
"grad_norm": 0.607366888512829, |
|
"learning_rate": 6.613482057968023e-08, |
|
"loss": 1.1612, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9556470050297211, |
|
"grad_norm": 0.61579614820576, |
|
"learning_rate": 5.982257571215178e-08, |
|
"loss": 1.1644, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9579332418838592, |
|
"grad_norm": 0.6162342496797737, |
|
"learning_rate": 5.382505480449274e-08, |
|
"loss": 1.1439, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9602194787379973, |
|
"grad_norm": 0.5880335959078453, |
|
"learning_rate": 4.814263994086077e-08, |
|
"loss": 1.1405, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9625057155921354, |
|
"grad_norm": 0.5978901392727579, |
|
"learning_rate": 4.2775693130948094e-08, |
|
"loss": 1.1792, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9647919524462735, |
|
"grad_norm": 0.5725207858399001, |
|
"learning_rate": 3.772455628691829e-08, |
|
"loss": 1.1679, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9670781893004116, |
|
"grad_norm": 0.6126681514493614, |
|
"learning_rate": 3.2989551201624836e-08, |
|
"loss": 1.1621, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9693644261545497, |
|
"grad_norm": 0.6026354249744876, |
|
"learning_rate": 2.857097952810972e-08, |
|
"loss": 1.1728, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9716506630086877, |
|
"grad_norm": 0.5876159431495082, |
|
"learning_rate": 2.4469122760388264e-08, |
|
"loss": 1.1552, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9739368998628258, |
|
"grad_norm": 0.5795939734314318, |
|
"learning_rate": 2.0684242215511797e-08, |
|
"loss": 1.1586, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9762231367169639, |
|
"grad_norm": 0.6100064497073957, |
|
"learning_rate": 1.7216579016925415e-08, |
|
"loss": 1.1585, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.978509373571102, |
|
"grad_norm": 0.6410024148442394, |
|
"learning_rate": 1.4066354079101396e-08, |
|
"loss": 1.1576, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9807956104252401, |
|
"grad_norm": 0.5946394925998356, |
|
"learning_rate": 1.1233768093468766e-08, |
|
"loss": 1.1565, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9830818472793782, |
|
"grad_norm": 0.5993080705042445, |
|
"learning_rate": 8.719001515627434e-09, |
|
"loss": 1.1649, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9853680841335163, |
|
"grad_norm": 0.5857680491868433, |
|
"learning_rate": 6.5222145538501595e-09, |
|
"loss": 1.176, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 0.6157142971328977, |
|
"learning_rate": 4.643547158878492e-09, |
|
"loss": 1.146, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9899405578417924, |
|
"grad_norm": 0.6005659801135901, |
|
"learning_rate": 3.0831190150054646e-09, |
|
"loss": 1.1607, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.9922267946959304, |
|
"grad_norm": 0.5963682235084494, |
|
"learning_rate": 1.8410295324505778e-09, |
|
"loss": 1.1668, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9945130315500685, |
|
"grad_norm": 0.649218390898171, |
|
"learning_rate": 9.173578410281992e-10, |
|
"loss": 1.1602, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9967992684042066, |
|
"grad_norm": 0.612662110275474, |
|
"learning_rate": 3.1216278510493027e-10, |
|
"loss": 1.1596, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9990855052583447, |
|
"grad_norm": 0.6025732837303296, |
|
"learning_rate": 2.548291985149387e-11, |
|
"loss": 1.147, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 4.0833, |
|
"eval_samples_per_second": 2.449, |
|
"eval_steps_per_second": 0.735, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2187, |
|
"total_flos": 9703359095242752.0, |
|
"train_loss": 1.3940648635773556, |
|
"train_runtime": 19118.6692, |
|
"train_samples_per_second": 1.83, |
|
"train_steps_per_second": 0.114 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2187, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9703359095242752.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|