|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.4935064935064934, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006493506493506494, |
|
"grad_norm": 33.13806915283203, |
|
"learning_rate": 9.999999013039593e-05, |
|
"loss": 5.645, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012987012987012988, |
|
"grad_norm": 23.576648712158203, |
|
"learning_rate": 9.99999605215876e-05, |
|
"loss": 3.9143, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01948051948051948, |
|
"grad_norm": 8.88882827758789, |
|
"learning_rate": 9.999991117358668e-05, |
|
"loss": 3.524, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.025974025974025976, |
|
"grad_norm": 7.060589790344238, |
|
"learning_rate": 9.999984208641271e-05, |
|
"loss": 3.3346, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.032467532467532464, |
|
"grad_norm": 6.797203063964844, |
|
"learning_rate": 9.999975326009292e-05, |
|
"loss": 3.097, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03896103896103896, |
|
"grad_norm": 5.928432464599609, |
|
"learning_rate": 9.999964469466236e-05, |
|
"loss": 3.0141, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 5.673449516296387, |
|
"learning_rate": 9.999951639016395e-05, |
|
"loss": 3.1291, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05194805194805195, |
|
"grad_norm": 5.7318596839904785, |
|
"learning_rate": 9.99993683466483e-05, |
|
"loss": 2.9019, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05844155844155844, |
|
"grad_norm": 6.2937493324279785, |
|
"learning_rate": 9.999920056417385e-05, |
|
"loss": 2.9567, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06493506493506493, |
|
"grad_norm": 5.941080570220947, |
|
"learning_rate": 9.999901304280685e-05, |
|
"loss": 3.2404, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 5.431388854980469, |
|
"learning_rate": 9.999880578262135e-05, |
|
"loss": 2.8637, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07792207792207792, |
|
"grad_norm": 5.87606143951416, |
|
"learning_rate": 9.999857878369916e-05, |
|
"loss": 2.9333, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08441558441558442, |
|
"grad_norm": 5.996065616607666, |
|
"learning_rate": 9.999833204612988e-05, |
|
"loss": 3.2531, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 5.6484832763671875, |
|
"learning_rate": 9.999806557001093e-05, |
|
"loss": 2.8224, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09740259740259741, |
|
"grad_norm": 5.376333713531494, |
|
"learning_rate": 9.99977793554475e-05, |
|
"loss": 2.7981, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1038961038961039, |
|
"grad_norm": 5.346993446350098, |
|
"learning_rate": 9.999747340255259e-05, |
|
"loss": 2.6952, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11038961038961038, |
|
"grad_norm": 5.37903356552124, |
|
"learning_rate": 9.999714771144701e-05, |
|
"loss": 2.7209, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11688311688311688, |
|
"grad_norm": 5.450172424316406, |
|
"learning_rate": 9.99968022822593e-05, |
|
"loss": 2.7421, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12337662337662338, |
|
"grad_norm": 5.602138042449951, |
|
"learning_rate": 9.999643711512586e-05, |
|
"loss": 2.4858, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12987012987012986, |
|
"grad_norm": 5.360761642456055, |
|
"learning_rate": 9.999605221019081e-05, |
|
"loss": 2.7158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 6.177570343017578, |
|
"learning_rate": 9.999564756760615e-05, |
|
"loss": 2.6152, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 6.498959541320801, |
|
"learning_rate": 9.99952231875316e-05, |
|
"loss": 2.7611, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14935064935064934, |
|
"grad_norm": 5.865311622619629, |
|
"learning_rate": 9.999477907013473e-05, |
|
"loss": 2.6758, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15584415584415584, |
|
"grad_norm": 6.0329508781433105, |
|
"learning_rate": 9.999431521559082e-05, |
|
"loss": 2.6535, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16233766233766234, |
|
"grad_norm": 5.814820766448975, |
|
"learning_rate": 9.999383162408304e-05, |
|
"loss": 2.6313, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16883116883116883, |
|
"grad_norm": 6.224546432495117, |
|
"learning_rate": 9.999332829580226e-05, |
|
"loss": 2.627, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17532467532467533, |
|
"grad_norm": 5.967427730560303, |
|
"learning_rate": 9.999280523094724e-05, |
|
"loss": 2.5997, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 5.6386213302612305, |
|
"learning_rate": 9.999226242972444e-05, |
|
"loss": 2.4397, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18831168831168832, |
|
"grad_norm": 5.742101669311523, |
|
"learning_rate": 9.999169989234815e-05, |
|
"loss": 2.6343, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.19480519480519481, |
|
"grad_norm": 5.685164928436279, |
|
"learning_rate": 9.999111761904046e-05, |
|
"loss": 2.3948, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2012987012987013, |
|
"grad_norm": 5.332027912139893, |
|
"learning_rate": 9.999051561003123e-05, |
|
"loss": 2.4506, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"grad_norm": 5.785808086395264, |
|
"learning_rate": 9.998989386555814e-05, |
|
"loss": 2.7135, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 6.223892688751221, |
|
"learning_rate": 9.998925238586665e-05, |
|
"loss": 2.5245, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.22077922077922077, |
|
"grad_norm": 5.84872579574585, |
|
"learning_rate": 9.998859117121e-05, |
|
"loss": 2.4981, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 5.572268486022949, |
|
"learning_rate": 9.998791022184922e-05, |
|
"loss": 2.608, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23376623376623376, |
|
"grad_norm": 5.090015411376953, |
|
"learning_rate": 9.998720953805312e-05, |
|
"loss": 2.4585, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24025974025974026, |
|
"grad_norm": 5.478267192840576, |
|
"learning_rate": 9.998648912009835e-05, |
|
"loss": 2.3653, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.24675324675324675, |
|
"grad_norm": 5.772948741912842, |
|
"learning_rate": 9.998574896826931e-05, |
|
"loss": 2.438, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2532467532467532, |
|
"grad_norm": 4.907359600067139, |
|
"learning_rate": 9.998498908285819e-05, |
|
"loss": 2.2816, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2597402597402597, |
|
"grad_norm": 5.5392045974731445, |
|
"learning_rate": 9.9984209464165e-05, |
|
"loss": 2.3041, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2662337662337662, |
|
"grad_norm": 4.999871253967285, |
|
"learning_rate": 9.99834101124975e-05, |
|
"loss": 2.1686, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 6.051657676696777, |
|
"learning_rate": 9.998259102817129e-05, |
|
"loss": 2.3825, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2792207792207792, |
|
"grad_norm": 4.7146687507629395, |
|
"learning_rate": 9.99817522115097e-05, |
|
"loss": 2.2417, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 4.671162128448486, |
|
"learning_rate": 9.998089366284391e-05, |
|
"loss": 2.241, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2922077922077922, |
|
"grad_norm": 6.131312847137451, |
|
"learning_rate": 9.998001538251282e-05, |
|
"loss": 2.5224, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2987012987012987, |
|
"grad_norm": 5.212986469268799, |
|
"learning_rate": 9.997911737086322e-05, |
|
"loss": 2.452, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3051948051948052, |
|
"grad_norm": 4.900334358215332, |
|
"learning_rate": 9.997819962824957e-05, |
|
"loss": 2.4364, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3116883116883117, |
|
"grad_norm": 5.208124160766602, |
|
"learning_rate": 9.997726215503422e-05, |
|
"loss": 2.3829, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 4.627975940704346, |
|
"learning_rate": 9.997630495158728e-05, |
|
"loss": 2.0702, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3246753246753247, |
|
"grad_norm": 5.099819183349609, |
|
"learning_rate": 9.997532801828658e-05, |
|
"loss": 2.3067, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33116883116883117, |
|
"grad_norm": 4.694891929626465, |
|
"learning_rate": 9.997433135551786e-05, |
|
"loss": 2.3014, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.33766233766233766, |
|
"grad_norm": 5.41646146774292, |
|
"learning_rate": 9.997331496367455e-05, |
|
"loss": 2.4805, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.34415584415584416, |
|
"grad_norm": 5.233139514923096, |
|
"learning_rate": 9.997227884315791e-05, |
|
"loss": 2.2605, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.35064935064935066, |
|
"grad_norm": 5.671755313873291, |
|
"learning_rate": 9.9971222994377e-05, |
|
"loss": 2.3303, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 5.070577144622803, |
|
"learning_rate": 9.997014741774866e-05, |
|
"loss": 2.3019, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 4.925657272338867, |
|
"learning_rate": 9.996905211369748e-05, |
|
"loss": 2.3937, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.37012987012987014, |
|
"grad_norm": 5.051799774169922, |
|
"learning_rate": 9.996793708265586e-05, |
|
"loss": 2.358, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.37662337662337664, |
|
"grad_norm": 4.3391828536987305, |
|
"learning_rate": 9.996680232506405e-05, |
|
"loss": 2.0576, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38311688311688313, |
|
"grad_norm": 4.852685451507568, |
|
"learning_rate": 9.996564784137e-05, |
|
"loss": 2.3678, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.38961038961038963, |
|
"grad_norm": 4.842132091522217, |
|
"learning_rate": 9.996447363202946e-05, |
|
"loss": 2.3493, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3961038961038961, |
|
"grad_norm": 4.50392484664917, |
|
"learning_rate": 9.996327969750605e-05, |
|
"loss": 2.516, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.4025974025974026, |
|
"grad_norm": 5.139745712280273, |
|
"learning_rate": 9.996206603827105e-05, |
|
"loss": 2.2679, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 4.663613319396973, |
|
"learning_rate": 9.996083265480365e-05, |
|
"loss": 2.2269, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4155844155844156, |
|
"grad_norm": 4.955394744873047, |
|
"learning_rate": 9.995957954759071e-05, |
|
"loss": 2.5408, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.42207792207792205, |
|
"grad_norm": 4.935218334197998, |
|
"learning_rate": 9.9958306717127e-05, |
|
"loss": 2.4971, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 4.313268661499023, |
|
"learning_rate": 9.995701416391499e-05, |
|
"loss": 2.2366, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.43506493506493504, |
|
"grad_norm": 4.81383752822876, |
|
"learning_rate": 9.995570188846495e-05, |
|
"loss": 2.7036, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.44155844155844154, |
|
"grad_norm": 5.073368549346924, |
|
"learning_rate": 9.995436989129495e-05, |
|
"loss": 2.2867, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.44805194805194803, |
|
"grad_norm": 4.764294147491455, |
|
"learning_rate": 9.995301817293084e-05, |
|
"loss": 2.5181, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 4.322338104248047, |
|
"learning_rate": 9.995164673390625e-05, |
|
"loss": 2.3062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.461038961038961, |
|
"grad_norm": 4.6427764892578125, |
|
"learning_rate": 9.995025557476261e-05, |
|
"loss": 2.2503, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4675324675324675, |
|
"grad_norm": 4.552596092224121, |
|
"learning_rate": 9.994884469604912e-05, |
|
"loss": 2.268, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.474025974025974, |
|
"grad_norm": 3.8830368518829346, |
|
"learning_rate": 9.99474140983228e-05, |
|
"loss": 2.0846, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4805194805194805, |
|
"grad_norm": 4.275032043457031, |
|
"learning_rate": 9.994596378214841e-05, |
|
"loss": 2.2989, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.487012987012987, |
|
"grad_norm": 4.56163215637207, |
|
"learning_rate": 9.994449374809851e-05, |
|
"loss": 2.0471, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4935064935064935, |
|
"grad_norm": 4.3414626121521, |
|
"learning_rate": 9.994300399675342e-05, |
|
"loss": 2.2404, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.33914041519165, |
|
"learning_rate": 9.994149452870133e-05, |
|
"loss": 2.419, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5064935064935064, |
|
"grad_norm": 4.387986660003662, |
|
"learning_rate": 9.99399653445381e-05, |
|
"loss": 2.3415, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.512987012987013, |
|
"grad_norm": 4.46196985244751, |
|
"learning_rate": 9.993841644486747e-05, |
|
"loss": 2.0966, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5194805194805194, |
|
"grad_norm": 4.377128601074219, |
|
"learning_rate": 9.993684783030088e-05, |
|
"loss": 2.1728, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.525974025974026, |
|
"grad_norm": 4.3036789894104, |
|
"learning_rate": 9.99352595014576e-05, |
|
"loss": 2.3914, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5324675324675324, |
|
"grad_norm": 3.62605619430542, |
|
"learning_rate": 9.993365145896473e-05, |
|
"loss": 1.768, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.538961038961039, |
|
"grad_norm": 4.524649143218994, |
|
"learning_rate": 9.993202370345705e-05, |
|
"loss": 2.3089, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 4.413171291351318, |
|
"learning_rate": 9.993037623557716e-05, |
|
"loss": 2.3156, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.551948051948052, |
|
"grad_norm": 4.606533527374268, |
|
"learning_rate": 9.992870905597548e-05, |
|
"loss": 2.4141, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5584415584415584, |
|
"grad_norm": 4.309783935546875, |
|
"learning_rate": 9.99270221653102e-05, |
|
"loss": 2.1423, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.564935064935065, |
|
"grad_norm": 4.384764671325684, |
|
"learning_rate": 9.992531556424726e-05, |
|
"loss": 2.5358, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 4.653176784515381, |
|
"learning_rate": 9.99235892534604e-05, |
|
"loss": 2.5152, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.577922077922078, |
|
"grad_norm": 4.79496955871582, |
|
"learning_rate": 9.992184323363112e-05, |
|
"loss": 2.1936, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5844155844155844, |
|
"grad_norm": 3.893005847930908, |
|
"learning_rate": 9.992007750544876e-05, |
|
"loss": 1.916, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 4.456315040588379, |
|
"learning_rate": 9.991829206961037e-05, |
|
"loss": 2.6147, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5974025974025974, |
|
"grad_norm": 4.070108890533447, |
|
"learning_rate": 9.991648692682083e-05, |
|
"loss": 2.3899, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6038961038961039, |
|
"grad_norm": 4.310725212097168, |
|
"learning_rate": 9.991466207779278e-05, |
|
"loss": 1.9895, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6103896103896104, |
|
"grad_norm": 4.388233184814453, |
|
"learning_rate": 9.991281752324664e-05, |
|
"loss": 2.43, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6168831168831169, |
|
"grad_norm": 4.071033000946045, |
|
"learning_rate": 9.99109532639106e-05, |
|
"loss": 2.6234, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6233766233766234, |
|
"grad_norm": 4.230044841766357, |
|
"learning_rate": 9.990906930052064e-05, |
|
"loss": 2.2542, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6298701298701299, |
|
"grad_norm": 4.155112266540527, |
|
"learning_rate": 9.990716563382055e-05, |
|
"loss": 2.386, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 3.8967134952545166, |
|
"learning_rate": 9.990524226456182e-05, |
|
"loss": 2.1869, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 3.701253890991211, |
|
"learning_rate": 9.99032991935038e-05, |
|
"loss": 2.0778, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6493506493506493, |
|
"grad_norm": 3.9027299880981445, |
|
"learning_rate": 9.990133642141359e-05, |
|
"loss": 2.3151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6558441558441559, |
|
"grad_norm": 3.9109201431274414, |
|
"learning_rate": 9.989935394906602e-05, |
|
"loss": 2.2484, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6623376623376623, |
|
"grad_norm": 3.9390170574188232, |
|
"learning_rate": 9.989735177724378e-05, |
|
"loss": 2.1411, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6688311688311688, |
|
"grad_norm": 3.8148396015167236, |
|
"learning_rate": 9.989532990673728e-05, |
|
"loss": 2.238, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6753246753246753, |
|
"grad_norm": 3.8671321868896484, |
|
"learning_rate": 9.989328833834471e-05, |
|
"loss": 2.1264, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 4.0604448318481445, |
|
"learning_rate": 9.989122707287208e-05, |
|
"loss": 2.3146, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6883116883116883, |
|
"grad_norm": 4.460545539855957, |
|
"learning_rate": 9.988914611113311e-05, |
|
"loss": 2.2619, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6948051948051948, |
|
"grad_norm": 3.8163511753082275, |
|
"learning_rate": 9.988704545394936e-05, |
|
"loss": 2.3224, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.7012987012987013, |
|
"grad_norm": 3.963921070098877, |
|
"learning_rate": 9.988492510215011e-05, |
|
"loss": 2.0558, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7077922077922078, |
|
"grad_norm": 3.638936996459961, |
|
"learning_rate": 9.988278505657247e-05, |
|
"loss": 2.2582, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 3.886962890625, |
|
"learning_rate": 9.988062531806126e-05, |
|
"loss": 2.369, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7207792207792207, |
|
"grad_norm": 3.7281506061553955, |
|
"learning_rate": 9.987844588746915e-05, |
|
"loss": 2.3923, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 4.045536041259766, |
|
"learning_rate": 9.987624676565652e-05, |
|
"loss": 2.2701, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7337662337662337, |
|
"grad_norm": 3.914747953414917, |
|
"learning_rate": 9.987402795349154e-05, |
|
"loss": 2.3457, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7402597402597403, |
|
"grad_norm": 3.742039203643799, |
|
"learning_rate": 9.98717894518502e-05, |
|
"loss": 2.1281, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7467532467532467, |
|
"grad_norm": 3.6615986824035645, |
|
"learning_rate": 9.986953126161619e-05, |
|
"loss": 2.2539, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7532467532467533, |
|
"grad_norm": 4.145374298095703, |
|
"learning_rate": 9.986725338368102e-05, |
|
"loss": 2.168, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7597402597402597, |
|
"grad_norm": 3.4575271606445312, |
|
"learning_rate": 9.986495581894395e-05, |
|
"loss": 2.2219, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7662337662337663, |
|
"grad_norm": 3.4362294673919678, |
|
"learning_rate": 9.986263856831204e-05, |
|
"loss": 2.1648, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 3.6311452388763428, |
|
"learning_rate": 9.986030163270011e-05, |
|
"loss": 2.2759, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7792207792207793, |
|
"grad_norm": 3.4129133224487305, |
|
"learning_rate": 9.98579450130307e-05, |
|
"loss": 1.9268, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 3.2100963592529297, |
|
"learning_rate": 9.98555687102342e-05, |
|
"loss": 2.0587, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7922077922077922, |
|
"grad_norm": 3.662796974182129, |
|
"learning_rate": 9.985317272524876e-05, |
|
"loss": 2.0628, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7987012987012987, |
|
"grad_norm": 3.4176554679870605, |
|
"learning_rate": 9.985075705902022e-05, |
|
"loss": 2.1833, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.8051948051948052, |
|
"grad_norm": 3.2369673252105713, |
|
"learning_rate": 9.98483217125023e-05, |
|
"loss": 2.226, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8116883116883117, |
|
"grad_norm": 3.5990474224090576, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 2.2601, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 2.904496431350708, |
|
"learning_rate": 9.984339198245175e-05, |
|
"loss": 1.7978, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8246753246753247, |
|
"grad_norm": 3.4239206314086914, |
|
"learning_rate": 9.98408976008653e-05, |
|
"loss": 1.8985, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8311688311688312, |
|
"grad_norm": 3.4764034748077393, |
|
"learning_rate": 9.983838354288181e-05, |
|
"loss": 2.0324, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8376623376623377, |
|
"grad_norm": 3.777717351913452, |
|
"learning_rate": 9.98358498094938e-05, |
|
"loss": 2.35, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8441558441558441, |
|
"grad_norm": 3.3230550289154053, |
|
"learning_rate": 9.983329640170149e-05, |
|
"loss": 2.0381, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8506493506493507, |
|
"grad_norm": 3.5832202434539795, |
|
"learning_rate": 9.9830723320513e-05, |
|
"loss": 2.1329, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 3.628079414367676, |
|
"learning_rate": 9.982813056694412e-05, |
|
"loss": 2.1468, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 3.3164730072021484, |
|
"learning_rate": 9.982551814201839e-05, |
|
"loss": 2.1018, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8701298701298701, |
|
"grad_norm": 3.202061414718628, |
|
"learning_rate": 9.98228860467672e-05, |
|
"loss": 2.1209, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8766233766233766, |
|
"grad_norm": 3.5353541374206543, |
|
"learning_rate": 9.982023428222962e-05, |
|
"loss": 2.3157, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8831168831168831, |
|
"grad_norm": 3.284064292907715, |
|
"learning_rate": 9.981756284945256e-05, |
|
"loss": 2.1389, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8896103896103896, |
|
"grad_norm": 3.548656463623047, |
|
"learning_rate": 9.981487174949065e-05, |
|
"loss": 2.0996, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8961038961038961, |
|
"grad_norm": 3.6342179775238037, |
|
"learning_rate": 9.981216098340629e-05, |
|
"loss": 2.2534, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9025974025974026, |
|
"grad_norm": 3.3490617275238037, |
|
"learning_rate": 9.980943055226964e-05, |
|
"loss": 2.0916, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 3.469787120819092, |
|
"learning_rate": 9.980668045715864e-05, |
|
"loss": 2.0929, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9155844155844156, |
|
"grad_norm": 3.651165723800659, |
|
"learning_rate": 9.980391069915897e-05, |
|
"loss": 2.3875, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.922077922077922, |
|
"grad_norm": 3.3916146755218506, |
|
"learning_rate": 9.980112127936409e-05, |
|
"loss": 2.2071, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 3.484081745147705, |
|
"learning_rate": 9.979831219887525e-05, |
|
"loss": 2.2033, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.935064935064935, |
|
"grad_norm": 3.538928270339966, |
|
"learning_rate": 9.979548345880141e-05, |
|
"loss": 2.2221, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9415584415584416, |
|
"grad_norm": 3.1394541263580322, |
|
"learning_rate": 9.979263506025929e-05, |
|
"loss": 2.1657, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.948051948051948, |
|
"grad_norm": 3.273376941680908, |
|
"learning_rate": 9.978976700437342e-05, |
|
"loss": 2.0307, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 3.474287748336792, |
|
"learning_rate": 9.978687929227606e-05, |
|
"loss": 2.2569, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.961038961038961, |
|
"grad_norm": 3.40504789352417, |
|
"learning_rate": 9.978397192510721e-05, |
|
"loss": 2.2194, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9675324675324676, |
|
"grad_norm": 3.5432350635528564, |
|
"learning_rate": 9.978104490401467e-05, |
|
"loss": 2.208, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.974025974025974, |
|
"grad_norm": 3.4987633228302, |
|
"learning_rate": 9.977809823015401e-05, |
|
"loss": 2.108, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9805194805194806, |
|
"grad_norm": 2.981435775756836, |
|
"learning_rate": 9.977513190468848e-05, |
|
"loss": 1.9615, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.987012987012987, |
|
"grad_norm": 3.688192129135132, |
|
"learning_rate": 9.977214592878916e-05, |
|
"loss": 2.3847, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9935064935064936, |
|
"grad_norm": 3.7138729095458984, |
|
"learning_rate": 9.976914030363487e-05, |
|
"loss": 2.1349, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 798.8204956054688, |
|
"learning_rate": 9.976611503041218e-05, |
|
"loss": 1.9848, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.0064935064935066, |
|
"grad_norm": 3.2269575595855713, |
|
"learning_rate": 9.976307011031542e-05, |
|
"loss": 1.985, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0129870129870129, |
|
"grad_norm": 3.4152016639709473, |
|
"learning_rate": 9.976000554454668e-05, |
|
"loss": 1.8731, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0194805194805194, |
|
"grad_norm": 2.8445022106170654, |
|
"learning_rate": 9.975692133431579e-05, |
|
"loss": 1.6258, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.025974025974026, |
|
"grad_norm": 3.289297580718994, |
|
"learning_rate": 9.975381748084035e-05, |
|
"loss": 1.9762, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.0324675324675325, |
|
"grad_norm": 3.303457260131836, |
|
"learning_rate": 9.975069398534574e-05, |
|
"loss": 1.8754, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.0389610389610389, |
|
"grad_norm": 3.0584588050842285, |
|
"learning_rate": 9.974755084906502e-05, |
|
"loss": 1.7928, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 3.4114456176757812, |
|
"learning_rate": 9.974438807323907e-05, |
|
"loss": 1.9171, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.051948051948052, |
|
"grad_norm": 3.154326915740967, |
|
"learning_rate": 9.974120565911652e-05, |
|
"loss": 1.8924, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.0584415584415585, |
|
"grad_norm": 4.066158771514893, |
|
"learning_rate": 9.973800360795372e-05, |
|
"loss": 2.1918, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.0649350649350648, |
|
"grad_norm": 3.5308237075805664, |
|
"learning_rate": 9.97347819210148e-05, |
|
"loss": 2.0059, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 3.352773427963257, |
|
"learning_rate": 9.973154059957162e-05, |
|
"loss": 2.0407, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.077922077922078, |
|
"grad_norm": 3.2745213508605957, |
|
"learning_rate": 9.972827964490381e-05, |
|
"loss": 1.9063, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.0844155844155845, |
|
"grad_norm": 3.0867488384246826, |
|
"learning_rate": 9.972499905829875e-05, |
|
"loss": 1.7633, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 3.094118118286133, |
|
"learning_rate": 9.972169884105153e-05, |
|
"loss": 1.7058, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0974025974025974, |
|
"grad_norm": 3.409409284591675, |
|
"learning_rate": 9.971837899446505e-05, |
|
"loss": 1.9263, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.103896103896104, |
|
"grad_norm": 3.7799603939056396, |
|
"learning_rate": 9.971503951984995e-05, |
|
"loss": 2.0513, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1103896103896105, |
|
"grad_norm": 3.531250238418579, |
|
"learning_rate": 9.971168041852456e-05, |
|
"loss": 1.7284, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.1168831168831168, |
|
"grad_norm": 3.0355734825134277, |
|
"learning_rate": 9.970830169181505e-05, |
|
"loss": 1.7999, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.1233766233766234, |
|
"grad_norm": 3.7033843994140625, |
|
"learning_rate": 9.970490334105524e-05, |
|
"loss": 2.1174, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.12987012987013, |
|
"grad_norm": 3.9485671520233154, |
|
"learning_rate": 9.970148536758677e-05, |
|
"loss": 2.0429, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 3.2483878135681152, |
|
"learning_rate": 9.9698047772759e-05, |
|
"loss": 1.8513, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 3.4199907779693604, |
|
"learning_rate": 9.969459055792903e-05, |
|
"loss": 1.7395, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1493506493506493, |
|
"grad_norm": 3.7996020317077637, |
|
"learning_rate": 9.969111372446171e-05, |
|
"loss": 1.9037, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.155844155844156, |
|
"grad_norm": 3.3956806659698486, |
|
"learning_rate": 9.968761727372964e-05, |
|
"loss": 1.6993, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.1623376623376624, |
|
"grad_norm": 3.3464372158050537, |
|
"learning_rate": 9.96841012071132e-05, |
|
"loss": 1.7862, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.1688311688311688, |
|
"grad_norm": 3.4040322303771973, |
|
"learning_rate": 9.968056552600043e-05, |
|
"loss": 2.0307, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1753246753246753, |
|
"grad_norm": 3.239704132080078, |
|
"learning_rate": 9.967701023178717e-05, |
|
"loss": 1.7453, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 3.682248115539551, |
|
"learning_rate": 9.967343532587702e-05, |
|
"loss": 1.8286, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.1883116883116882, |
|
"grad_norm": 3.8498799800872803, |
|
"learning_rate": 9.966984080968128e-05, |
|
"loss": 2.072, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.1948051948051948, |
|
"grad_norm": 3.3957226276397705, |
|
"learning_rate": 9.9666226684619e-05, |
|
"loss": 1.8372, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.2012987012987013, |
|
"grad_norm": 3.5456008911132812, |
|
"learning_rate": 9.966259295211697e-05, |
|
"loss": 1.9703, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.2077922077922079, |
|
"grad_norm": 3.291201591491699, |
|
"learning_rate": 9.965893961360976e-05, |
|
"loss": 1.9931, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 3.3770711421966553, |
|
"learning_rate": 9.965526667053963e-05, |
|
"loss": 1.9248, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.2207792207792207, |
|
"grad_norm": 3.346139669418335, |
|
"learning_rate": 9.965157412435663e-05, |
|
"loss": 1.9951, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 2.8916828632354736, |
|
"learning_rate": 9.964786197651847e-05, |
|
"loss": 1.7675, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.2337662337662338, |
|
"grad_norm": 3.3172147274017334, |
|
"learning_rate": 9.964413022849068e-05, |
|
"loss": 1.7783, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2402597402597402, |
|
"grad_norm": 3.2727859020233154, |
|
"learning_rate": 9.96403788817465e-05, |
|
"loss": 2.0687, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.2467532467532467, |
|
"grad_norm": 2.8867673873901367, |
|
"learning_rate": 9.963660793776688e-05, |
|
"loss": 1.467, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.2532467532467533, |
|
"grad_norm": 3.398193359375, |
|
"learning_rate": 9.963281739804054e-05, |
|
"loss": 2.084, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.2597402597402598, |
|
"grad_norm": 3.4608664512634277, |
|
"learning_rate": 9.962900726406391e-05, |
|
"loss": 2.0284, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.2662337662337662, |
|
"grad_norm": 2.9325497150421143, |
|
"learning_rate": 9.96251775373412e-05, |
|
"loss": 1.7167, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 3.263169765472412, |
|
"learning_rate": 9.96213282193843e-05, |
|
"loss": 1.9843, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.2792207792207793, |
|
"grad_norm": 3.2453436851501465, |
|
"learning_rate": 9.961745931171287e-05, |
|
"loss": 1.804, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 3.2929818630218506, |
|
"learning_rate": 9.96135708158543e-05, |
|
"loss": 2.0279, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.2922077922077921, |
|
"grad_norm": 3.565657377243042, |
|
"learning_rate": 9.96096627333437e-05, |
|
"loss": 1.9242, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 3.3671059608459473, |
|
"learning_rate": 9.96057350657239e-05, |
|
"loss": 1.7902, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3051948051948052, |
|
"grad_norm": 3.2640137672424316, |
|
"learning_rate": 9.96017878145455e-05, |
|
"loss": 1.9485, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.3116883116883118, |
|
"grad_norm": 3.6283884048461914, |
|
"learning_rate": 9.959782098136683e-05, |
|
"loss": 2.204, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 3.5066027641296387, |
|
"learning_rate": 9.959383456775391e-05, |
|
"loss": 2.0808, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.3246753246753247, |
|
"grad_norm": 3.4553568363189697, |
|
"learning_rate": 9.958982857528052e-05, |
|
"loss": 1.8267, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.3311688311688312, |
|
"grad_norm": 3.434098482131958, |
|
"learning_rate": 9.958580300552815e-05, |
|
"loss": 2.0149, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.3376623376623376, |
|
"grad_norm": 3.090224504470825, |
|
"learning_rate": 9.958175786008604e-05, |
|
"loss": 1.6188, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.344155844155844, |
|
"grad_norm": 3.104416847229004, |
|
"learning_rate": 9.957769314055117e-05, |
|
"loss": 1.8435, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.3506493506493507, |
|
"grad_norm": 3.1267154216766357, |
|
"learning_rate": 9.957360884852817e-05, |
|
"loss": 1.839, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 3.384131908416748, |
|
"learning_rate": 9.956950498562953e-05, |
|
"loss": 2.0758, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 3.1655869483947754, |
|
"learning_rate": 9.956538155347534e-05, |
|
"loss": 1.6829, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.37012987012987, |
|
"grad_norm": 3.373323917388916, |
|
"learning_rate": 9.956123855369346e-05, |
|
"loss": 1.8551, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.3766233766233766, |
|
"grad_norm": 3.1319708824157715, |
|
"learning_rate": 9.955707598791952e-05, |
|
"loss": 1.7109, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.3831168831168832, |
|
"grad_norm": 3.38543963432312, |
|
"learning_rate": 9.95528938577968e-05, |
|
"loss": 1.9705, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.3896103896103895, |
|
"grad_norm": 3.442453145980835, |
|
"learning_rate": 9.954869216497635e-05, |
|
"loss": 1.7815, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.396103896103896, |
|
"grad_norm": 3.13667368888855, |
|
"learning_rate": 9.954447091111694e-05, |
|
"loss": 1.9754, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.4025974025974026, |
|
"grad_norm": 3.447659492492676, |
|
"learning_rate": 9.954023009788504e-05, |
|
"loss": 2.269, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 3.6094558238983154, |
|
"learning_rate": 9.953596972695487e-05, |
|
"loss": 2.0854, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.4155844155844157, |
|
"grad_norm": 3.1890039443969727, |
|
"learning_rate": 9.953168980000835e-05, |
|
"loss": 1.993, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.422077922077922, |
|
"grad_norm": 3.3757541179656982, |
|
"learning_rate": 9.952739031873512e-05, |
|
"loss": 2.0556, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 3.047400712966919, |
|
"learning_rate": 9.952307128483256e-05, |
|
"loss": 1.9276, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.435064935064935, |
|
"grad_norm": 3.213284730911255, |
|
"learning_rate": 9.951873270000576e-05, |
|
"loss": 2.0443, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.4415584415584415, |
|
"grad_norm": 3.250971794128418, |
|
"learning_rate": 9.95143745659675e-05, |
|
"loss": 1.9392, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.448051948051948, |
|
"grad_norm": 3.3131914138793945, |
|
"learning_rate": 9.950999688443833e-05, |
|
"loss": 1.8581, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 2.970548629760742, |
|
"learning_rate": 9.950559965714648e-05, |
|
"loss": 1.7365, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.4610389610389611, |
|
"grad_norm": 3.0069758892059326, |
|
"learning_rate": 9.950118288582788e-05, |
|
"loss": 1.4736, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.4675324675324675, |
|
"grad_norm": 2.8806533813476562, |
|
"learning_rate": 9.949674657222624e-05, |
|
"loss": 1.6931, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.474025974025974, |
|
"grad_norm": 2.8493435382843018, |
|
"learning_rate": 9.949229071809293e-05, |
|
"loss": 1.6771, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.4805194805194806, |
|
"grad_norm": 3.4888033866882324, |
|
"learning_rate": 9.948781532518705e-05, |
|
"loss": 1.9593, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.487012987012987, |
|
"grad_norm": 3.370680332183838, |
|
"learning_rate": 9.948332039527541e-05, |
|
"loss": 1.9331, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.4935064935064934, |
|
"grad_norm": 3.5115890502929688, |
|
"learning_rate": 9.947880593013255e-05, |
|
"loss": 2.0011, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.091661214828491, |
|
"learning_rate": 9.947427193154071e-05, |
|
"loss": 1.8513, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.5064935064935066, |
|
"grad_norm": 3.062349796295166, |
|
"learning_rate": 9.946971840128981e-05, |
|
"loss": 1.7876, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.512987012987013, |
|
"grad_norm": 3.1313397884368896, |
|
"learning_rate": 9.946514534117754e-05, |
|
"loss": 1.8766, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.5194805194805194, |
|
"grad_norm": 3.2378571033477783, |
|
"learning_rate": 9.946055275300928e-05, |
|
"loss": 1.9461, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.525974025974026, |
|
"grad_norm": 3.385910987854004, |
|
"learning_rate": 9.945594063859809e-05, |
|
"loss": 1.8297, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.5324675324675323, |
|
"grad_norm": 3.524550676345825, |
|
"learning_rate": 9.945130899976477e-05, |
|
"loss": 1.9914, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.5389610389610389, |
|
"grad_norm": 3.3748812675476074, |
|
"learning_rate": 9.944665783833782e-05, |
|
"loss": 1.8368, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 3.096031904220581, |
|
"learning_rate": 9.944198715615342e-05, |
|
"loss": 1.783, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.551948051948052, |
|
"grad_norm": 3.375197410583496, |
|
"learning_rate": 9.943729695505552e-05, |
|
"loss": 2.073, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.5584415584415585, |
|
"grad_norm": 2.887650966644287, |
|
"learning_rate": 9.94325872368957e-05, |
|
"loss": 1.8864, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.564935064935065, |
|
"grad_norm": 3.060448169708252, |
|
"learning_rate": 9.942785800353332e-05, |
|
"loss": 1.7054, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 3.2923696041107178, |
|
"learning_rate": 9.942310925683538e-05, |
|
"loss": 1.999, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.577922077922078, |
|
"grad_norm": 2.896110773086548, |
|
"learning_rate": 9.941834099867659e-05, |
|
"loss": 1.832, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.5844155844155843, |
|
"grad_norm": 2.9442901611328125, |
|
"learning_rate": 9.941355323093943e-05, |
|
"loss": 2.1465, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 2.99206280708313, |
|
"learning_rate": 9.940874595551404e-05, |
|
"loss": 1.7449, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.5974025974025974, |
|
"grad_norm": 3.049651622772217, |
|
"learning_rate": 9.940391917429818e-05, |
|
"loss": 1.5516, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.603896103896104, |
|
"grad_norm": 3.3082385063171387, |
|
"learning_rate": 9.939907288919747e-05, |
|
"loss": 2.0167, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.6103896103896105, |
|
"grad_norm": 3.1149022579193115, |
|
"learning_rate": 9.939420710212511e-05, |
|
"loss": 2.0022, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.616883116883117, |
|
"grad_norm": 2.9124245643615723, |
|
"learning_rate": 9.938932181500205e-05, |
|
"loss": 1.9863, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.6233766233766234, |
|
"grad_norm": 3.180870294570923, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 1.9872, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.62987012987013, |
|
"grad_norm": 3.2378902435302734, |
|
"learning_rate": 9.9379492748326e-05, |
|
"loss": 1.9957, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 2.9901888370513916, |
|
"learning_rate": 9.937454897265337e-05, |
|
"loss": 1.7305, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 3.472109794616699, |
|
"learning_rate": 9.936958570469077e-05, |
|
"loss": 2.1072, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.6493506493506493, |
|
"grad_norm": 3.1424944400787354, |
|
"learning_rate": 9.93646029463976e-05, |
|
"loss": 1.961, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.655844155844156, |
|
"grad_norm": 3.177325487136841, |
|
"learning_rate": 9.935960069974096e-05, |
|
"loss": 1.7897, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.6623376623376624, |
|
"grad_norm": 3.207707166671753, |
|
"learning_rate": 9.935457896669568e-05, |
|
"loss": 1.8971, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.6688311688311688, |
|
"grad_norm": 3.142314910888672, |
|
"learning_rate": 9.934953774924424e-05, |
|
"loss": 1.8993, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.6753246753246753, |
|
"grad_norm": 3.1800239086151123, |
|
"learning_rate": 9.934447704937684e-05, |
|
"loss": 1.9482, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 2.71478271484375, |
|
"learning_rate": 9.933939686909137e-05, |
|
"loss": 1.7173, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.6883116883116882, |
|
"grad_norm": 3.2946617603302, |
|
"learning_rate": 9.93342972103934e-05, |
|
"loss": 2.0823, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6948051948051948, |
|
"grad_norm": 3.1132423877716064, |
|
"learning_rate": 9.93291780752962e-05, |
|
"loss": 1.9475, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.7012987012987013, |
|
"grad_norm": 3.383772373199463, |
|
"learning_rate": 9.932403946582072e-05, |
|
"loss": 2.1187, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.7077922077922079, |
|
"grad_norm": 3.0686593055725098, |
|
"learning_rate": 9.931888138399561e-05, |
|
"loss": 1.7843, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 3.1363070011138916, |
|
"learning_rate": 9.931370383185718e-05, |
|
"loss": 2.0091, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.7207792207792207, |
|
"grad_norm": 3.5003199577331543, |
|
"learning_rate": 9.930850681144945e-05, |
|
"loss": 2.1529, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 3.456496000289917, |
|
"learning_rate": 9.930329032482413e-05, |
|
"loss": 2.0149, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.7337662337662336, |
|
"grad_norm": 2.9732820987701416, |
|
"learning_rate": 9.92980543740406e-05, |
|
"loss": 1.7583, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.7402597402597402, |
|
"grad_norm": 3.0580830574035645, |
|
"learning_rate": 9.929279896116594e-05, |
|
"loss": 1.9094, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.7467532467532467, |
|
"grad_norm": 2.9448652267456055, |
|
"learning_rate": 9.92875240882749e-05, |
|
"loss": 1.8479, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.7532467532467533, |
|
"grad_norm": 3.3782246112823486, |
|
"learning_rate": 9.928222975744991e-05, |
|
"loss": 1.7582, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7597402597402598, |
|
"grad_norm": 3.2021212577819824, |
|
"learning_rate": 9.927691597078108e-05, |
|
"loss": 2.0634, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.7662337662337664, |
|
"grad_norm": 3.008000135421753, |
|
"learning_rate": 9.927158273036625e-05, |
|
"loss": 1.8569, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 3.255671501159668, |
|
"learning_rate": 9.926623003831084e-05, |
|
"loss": 1.8678, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.7792207792207793, |
|
"grad_norm": 3.4665205478668213, |
|
"learning_rate": 9.926085789672806e-05, |
|
"loss": 2.1407, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 3.0679426193237305, |
|
"learning_rate": 9.92554663077387e-05, |
|
"loss": 2.0964, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.7922077922077921, |
|
"grad_norm": 3.3258745670318604, |
|
"learning_rate": 9.92500552734713e-05, |
|
"loss": 2.0542, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.7987012987012987, |
|
"grad_norm": 3.138080596923828, |
|
"learning_rate": 9.924462479606207e-05, |
|
"loss": 1.9672, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.8051948051948052, |
|
"grad_norm": 3.225432872772217, |
|
"learning_rate": 9.923917487765484e-05, |
|
"loss": 1.9354, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.8116883116883118, |
|
"grad_norm": 2.762915849685669, |
|
"learning_rate": 9.923370552040116e-05, |
|
"loss": 1.7821, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 3.107593059539795, |
|
"learning_rate": 9.922821672646027e-05, |
|
"loss": 1.8868, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8246753246753247, |
|
"grad_norm": 3.052213668823242, |
|
"learning_rate": 9.922270849799905e-05, |
|
"loss": 1.8148, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.8311688311688312, |
|
"grad_norm": 3.04420804977417, |
|
"learning_rate": 9.921718083719203e-05, |
|
"loss": 2.0534, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.8376623376623376, |
|
"grad_norm": 3.083042860031128, |
|
"learning_rate": 9.921163374622147e-05, |
|
"loss": 2.0427, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.844155844155844, |
|
"grad_norm": 2.7837564945220947, |
|
"learning_rate": 9.920606722727725e-05, |
|
"loss": 1.5982, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.8506493506493507, |
|
"grad_norm": 2.737576961517334, |
|
"learning_rate": 9.920048128255699e-05, |
|
"loss": 1.6942, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 2.9239234924316406, |
|
"learning_rate": 9.919487591426591e-05, |
|
"loss": 1.9588, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 2.5686419010162354, |
|
"learning_rate": 9.918925112461688e-05, |
|
"loss": 1.798, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.87012987012987, |
|
"grad_norm": 3.106424570083618, |
|
"learning_rate": 9.918360691583056e-05, |
|
"loss": 1.9961, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.8766233766233766, |
|
"grad_norm": 3.469996452331543, |
|
"learning_rate": 9.91779432901351e-05, |
|
"loss": 2.1417, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.883116883116883, |
|
"grad_norm": 3.0456113815307617, |
|
"learning_rate": 9.917226024976649e-05, |
|
"loss": 1.922, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8896103896103895, |
|
"grad_norm": 3.158688545227051, |
|
"learning_rate": 9.916655779696826e-05, |
|
"loss": 1.8516, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.896103896103896, |
|
"grad_norm": 2.9286715984344482, |
|
"learning_rate": 9.916083593399166e-05, |
|
"loss": 2.0368, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.9025974025974026, |
|
"grad_norm": 2.7478342056274414, |
|
"learning_rate": 9.91550946630956e-05, |
|
"loss": 1.7571, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 2.5094292163848877, |
|
"learning_rate": 9.914933398654663e-05, |
|
"loss": 1.7007, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.9155844155844157, |
|
"grad_norm": 2.9641265869140625, |
|
"learning_rate": 9.914355390661896e-05, |
|
"loss": 1.7764, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.922077922077922, |
|
"grad_norm": 3.029686689376831, |
|
"learning_rate": 9.913775442559452e-05, |
|
"loss": 1.9938, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 3.169807195663452, |
|
"learning_rate": 9.91319355457628e-05, |
|
"loss": 1.9341, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.935064935064935, |
|
"grad_norm": 2.8388686180114746, |
|
"learning_rate": 9.912609726942103e-05, |
|
"loss": 1.96, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.9415584415584415, |
|
"grad_norm": 3.2075181007385254, |
|
"learning_rate": 9.912023959887408e-05, |
|
"loss": 2.0209, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.948051948051948, |
|
"grad_norm": 2.870790481567383, |
|
"learning_rate": 9.911436253643445e-05, |
|
"loss": 1.7023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 3.17559814453125, |
|
"learning_rate": 9.910846608442229e-05, |
|
"loss": 1.8861, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.9610389610389611, |
|
"grad_norm": 3.2682406902313232, |
|
"learning_rate": 9.910255024516546e-05, |
|
"loss": 1.9807, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.9675324675324677, |
|
"grad_norm": 3.0713870525360107, |
|
"learning_rate": 9.909661502099943e-05, |
|
"loss": 1.7816, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.974025974025974, |
|
"grad_norm": 2.817713975906372, |
|
"learning_rate": 9.909066041426733e-05, |
|
"loss": 1.802, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.9805194805194806, |
|
"grad_norm": 3.0280492305755615, |
|
"learning_rate": 9.908468642731995e-05, |
|
"loss": 2.1874, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.987012987012987, |
|
"grad_norm": 2.799920082092285, |
|
"learning_rate": 9.907869306251572e-05, |
|
"loss": 1.6937, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.9935064935064934, |
|
"grad_norm": 2.9983012676239014, |
|
"learning_rate": 9.907268032222071e-05, |
|
"loss": 1.9305, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4888.9140625, |
|
"learning_rate": 9.90666482088087e-05, |
|
"loss": 1.771, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.0064935064935066, |
|
"grad_norm": 3.2918434143066406, |
|
"learning_rate": 9.906059672466101e-05, |
|
"loss": 1.651, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.012987012987013, |
|
"grad_norm": 3.1957743167877197, |
|
"learning_rate": 9.90545258721667e-05, |
|
"loss": 1.7395, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.0194805194805197, |
|
"grad_norm": 3.090162515640259, |
|
"learning_rate": 9.904843565372248e-05, |
|
"loss": 1.6515, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.0259740259740258, |
|
"grad_norm": 3.333552598953247, |
|
"learning_rate": 9.904232607173262e-05, |
|
"loss": 1.6349, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.0324675324675323, |
|
"grad_norm": 3.266960382461548, |
|
"learning_rate": 9.903619712860912e-05, |
|
"loss": 1.6787, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.038961038961039, |
|
"grad_norm": 3.407783031463623, |
|
"learning_rate": 9.903004882677156e-05, |
|
"loss": 1.5224, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 3.5633609294891357, |
|
"learning_rate": 9.902388116864722e-05, |
|
"loss": 1.7562, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.051948051948052, |
|
"grad_norm": 3.1980199813842773, |
|
"learning_rate": 9.901769415667099e-05, |
|
"loss": 1.6488, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.0584415584415585, |
|
"grad_norm": 2.946018695831299, |
|
"learning_rate": 9.90114877932854e-05, |
|
"loss": 1.3956, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.064935064935065, |
|
"grad_norm": 3.0959718227386475, |
|
"learning_rate": 9.900526208094061e-05, |
|
"loss": 1.6679, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.0714285714285716, |
|
"grad_norm": 3.6357126235961914, |
|
"learning_rate": 9.899901702209445e-05, |
|
"loss": 1.6934, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.0779220779220777, |
|
"grad_norm": 3.2969744205474854, |
|
"learning_rate": 9.899275261921234e-05, |
|
"loss": 1.7037, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0844155844155843, |
|
"grad_norm": 3.406505584716797, |
|
"learning_rate": 9.898646887476741e-05, |
|
"loss": 1.6802, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 3.356058120727539, |
|
"learning_rate": 9.898016579124037e-05, |
|
"loss": 1.6779, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.0974025974025974, |
|
"grad_norm": 3.2132608890533447, |
|
"learning_rate": 9.897384337111957e-05, |
|
"loss": 1.6753, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.103896103896104, |
|
"grad_norm": 3.860316514968872, |
|
"learning_rate": 9.8967501616901e-05, |
|
"loss": 1.7702, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.1103896103896105, |
|
"grad_norm": 3.283928871154785, |
|
"learning_rate": 9.896114053108829e-05, |
|
"loss": 1.874, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.116883116883117, |
|
"grad_norm": 3.458469867706299, |
|
"learning_rate": 9.895476011619269e-05, |
|
"loss": 1.6123, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.1233766233766236, |
|
"grad_norm": 3.1832275390625, |
|
"learning_rate": 9.89483603747331e-05, |
|
"loss": 1.5368, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.1298701298701297, |
|
"grad_norm": 3.380354166030884, |
|
"learning_rate": 9.894194130923602e-05, |
|
"loss": 1.8633, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 3.0025081634521484, |
|
"learning_rate": 9.89355029222356e-05, |
|
"loss": 1.693, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 3.223750591278076, |
|
"learning_rate": 9.892904521627361e-05, |
|
"loss": 1.6421, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.1493506493506493, |
|
"grad_norm": 2.978912830352783, |
|
"learning_rate": 9.892256819389947e-05, |
|
"loss": 1.6442, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.155844155844156, |
|
"grad_norm": 3.126190185546875, |
|
"learning_rate": 9.891607185767018e-05, |
|
"loss": 1.642, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.1623376623376624, |
|
"grad_norm": 3.196380615234375, |
|
"learning_rate": 9.890955621015039e-05, |
|
"loss": 1.8094, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.168831168831169, |
|
"grad_norm": 3.2463197708129883, |
|
"learning_rate": 9.890302125391239e-05, |
|
"loss": 1.7948, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.175324675324675, |
|
"grad_norm": 3.05271053314209, |
|
"learning_rate": 9.88964669915361e-05, |
|
"loss": 1.7538, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 2.841326951980591, |
|
"learning_rate": 9.888989342560899e-05, |
|
"loss": 1.7039, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.188311688311688, |
|
"grad_norm": 2.989499807357788, |
|
"learning_rate": 9.888330055872623e-05, |
|
"loss": 1.6692, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.1948051948051948, |
|
"grad_norm": 3.3557872772216797, |
|
"learning_rate": 9.887668839349057e-05, |
|
"loss": 1.6721, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.2012987012987013, |
|
"grad_norm": 3.342548370361328, |
|
"learning_rate": 9.88700569325124e-05, |
|
"loss": 1.83, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.207792207792208, |
|
"grad_norm": 2.9942660331726074, |
|
"learning_rate": 9.886340617840968e-05, |
|
"loss": 1.8081, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.2142857142857144, |
|
"grad_norm": 3.1562952995300293, |
|
"learning_rate": 9.885673613380806e-05, |
|
"loss": 1.7479, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.220779220779221, |
|
"grad_norm": 2.7380170822143555, |
|
"learning_rate": 9.885004680134076e-05, |
|
"loss": 1.3577, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 2.7151942253112793, |
|
"learning_rate": 9.884333818364861e-05, |
|
"loss": 1.3956, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.2337662337662336, |
|
"grad_norm": 3.0294029712677, |
|
"learning_rate": 9.883661028338008e-05, |
|
"loss": 1.5567, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.24025974025974, |
|
"grad_norm": 3.0623183250427246, |
|
"learning_rate": 9.882986310319124e-05, |
|
"loss": 1.6769, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.2467532467532467, |
|
"grad_norm": 3.1786789894104004, |
|
"learning_rate": 9.882309664574575e-05, |
|
"loss": 1.595, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.2532467532467533, |
|
"grad_norm": 3.2232789993286133, |
|
"learning_rate": 9.881631091371491e-05, |
|
"loss": 1.6652, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.25974025974026, |
|
"grad_norm": 2.559070110321045, |
|
"learning_rate": 9.880950590977765e-05, |
|
"loss": 1.3795, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.2662337662337664, |
|
"grad_norm": 3.3524162769317627, |
|
"learning_rate": 9.880268163662042e-05, |
|
"loss": 1.8738, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 3.0645384788513184, |
|
"learning_rate": 9.879583809693738e-05, |
|
"loss": 1.6108, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.279220779220779, |
|
"grad_norm": 3.547124147415161, |
|
"learning_rate": 9.878897529343023e-05, |
|
"loss": 1.9958, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 2.875279188156128, |
|
"learning_rate": 9.87820932288083e-05, |
|
"loss": 1.5304, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.292207792207792, |
|
"grad_norm": 3.039005756378174, |
|
"learning_rate": 9.877519190578852e-05, |
|
"loss": 1.6787, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.2987012987012987, |
|
"grad_norm": 3.231783151626587, |
|
"learning_rate": 9.876827132709544e-05, |
|
"loss": 1.8304, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.3051948051948052, |
|
"grad_norm": 3.3293938636779785, |
|
"learning_rate": 9.876133149546118e-05, |
|
"loss": 1.8101, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.311688311688312, |
|
"grad_norm": 3.1368279457092285, |
|
"learning_rate": 9.875437241362546e-05, |
|
"loss": 1.7841, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 3.4561681747436523, |
|
"learning_rate": 9.874739408433565e-05, |
|
"loss": 1.8775, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.324675324675325, |
|
"grad_norm": 2.352166175842285, |
|
"learning_rate": 9.874039651034666e-05, |
|
"loss": 1.4749, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.331168831168831, |
|
"grad_norm": 2.7010676860809326, |
|
"learning_rate": 9.873337969442101e-05, |
|
"loss": 1.6247, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.3376623376623376, |
|
"grad_norm": 2.420346260070801, |
|
"learning_rate": 9.872634363932887e-05, |
|
"loss": 1.2657, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.344155844155844, |
|
"grad_norm": 3.2130322456359863, |
|
"learning_rate": 9.871928834784792e-05, |
|
"loss": 1.796, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.3506493506493507, |
|
"grad_norm": 2.915154457092285, |
|
"learning_rate": 9.87122138227635e-05, |
|
"loss": 1.6665, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.357142857142857, |
|
"grad_norm": 3.347503185272217, |
|
"learning_rate": 9.870512006686851e-05, |
|
"loss": 1.599, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 3.082561492919922, |
|
"learning_rate": 9.869800708296346e-05, |
|
"loss": 1.9103, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.3701298701298703, |
|
"grad_norm": 2.9106686115264893, |
|
"learning_rate": 9.869087487385644e-05, |
|
"loss": 1.8083, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.3766233766233764, |
|
"grad_norm": 3.256690740585327, |
|
"learning_rate": 9.868372344236313e-05, |
|
"loss": 1.6163, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.383116883116883, |
|
"grad_norm": 3.0932395458221436, |
|
"learning_rate": 9.867655279130683e-05, |
|
"loss": 1.7318, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.3896103896103895, |
|
"grad_norm": 2.975876569747925, |
|
"learning_rate": 9.866936292351836e-05, |
|
"loss": 1.8791, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.396103896103896, |
|
"grad_norm": 3.221210241317749, |
|
"learning_rate": 9.866215384183619e-05, |
|
"loss": 1.8514, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.4025974025974026, |
|
"grad_norm": 3.188079833984375, |
|
"learning_rate": 9.865492554910633e-05, |
|
"loss": 1.8689, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 3.175229072570801, |
|
"learning_rate": 9.864767804818243e-05, |
|
"loss": 1.8805, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.4155844155844157, |
|
"grad_norm": 3.077758312225342, |
|
"learning_rate": 9.864041134192563e-05, |
|
"loss": 1.9167, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.4220779220779223, |
|
"grad_norm": 2.952587366104126, |
|
"learning_rate": 9.863312543320477e-05, |
|
"loss": 1.7636, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 2.8241958618164062, |
|
"learning_rate": 9.86258203248962e-05, |
|
"loss": 1.583, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.435064935064935, |
|
"grad_norm": 3.0791385173797607, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 1.5679, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.4415584415584415, |
|
"grad_norm": 3.0636885166168213, |
|
"learning_rate": 9.861115252105921e-05, |
|
"loss": 1.6682, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.448051948051948, |
|
"grad_norm": 2.9893672466278076, |
|
"learning_rate": 9.860378983132143e-05, |
|
"loss": 1.8759, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 2.585669755935669, |
|
"learning_rate": 9.859640795357716e-05, |
|
"loss": 1.602, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.461038961038961, |
|
"grad_norm": 2.8519511222839355, |
|
"learning_rate": 9.858900689074064e-05, |
|
"loss": 1.8531, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.4675324675324677, |
|
"grad_norm": 2.6581485271453857, |
|
"learning_rate": 9.85815866457337e-05, |
|
"loss": 1.6298, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.474025974025974, |
|
"grad_norm": 2.6713006496429443, |
|
"learning_rate": 9.857414722148574e-05, |
|
"loss": 1.7688, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.4805194805194803, |
|
"grad_norm": 2.8108949661254883, |
|
"learning_rate": 9.856668862093372e-05, |
|
"loss": 1.7104, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.487012987012987, |
|
"grad_norm": 2.4003746509552, |
|
"learning_rate": 9.855921084702219e-05, |
|
"loss": 1.4872, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.4935064935064934, |
|
"grad_norm": 2.9604358673095703, |
|
"learning_rate": 9.855171390270324e-05, |
|
"loss": 1.6916, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.9553635120391846, |
|
"learning_rate": 9.854419779093655e-05, |
|
"loss": 1.65, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.5064935064935066, |
|
"grad_norm": 2.6719701290130615, |
|
"learning_rate": 9.853666251468937e-05, |
|
"loss": 1.5492, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.512987012987013, |
|
"grad_norm": 2.918327808380127, |
|
"learning_rate": 9.85291080769365e-05, |
|
"loss": 1.8282, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.5194805194805197, |
|
"grad_norm": 3.0886974334716797, |
|
"learning_rate": 9.852153448066032e-05, |
|
"loss": 1.7593, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.525974025974026, |
|
"grad_norm": 2.843238592147827, |
|
"learning_rate": 9.851394172885074e-05, |
|
"loss": 1.7201, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.5324675324675323, |
|
"grad_norm": 2.817976951599121, |
|
"learning_rate": 9.85063298245053e-05, |
|
"loss": 1.7933, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.538961038961039, |
|
"grad_norm": 2.8269622325897217, |
|
"learning_rate": 9.849869877062902e-05, |
|
"loss": 1.7015, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 2.727435827255249, |
|
"learning_rate": 9.849104857023455e-05, |
|
"loss": 1.481, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.551948051948052, |
|
"grad_norm": 2.7080495357513428, |
|
"learning_rate": 9.848337922634206e-05, |
|
"loss": 1.4346, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.5584415584415585, |
|
"grad_norm": 3.0474908351898193, |
|
"learning_rate": 9.847569074197926e-05, |
|
"loss": 1.6259, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.564935064935065, |
|
"grad_norm": 2.7546396255493164, |
|
"learning_rate": 9.846798312018146e-05, |
|
"loss": 1.5626, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 2.9604411125183105, |
|
"learning_rate": 9.846025636399152e-05, |
|
"loss": 2.0086, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.5779220779220777, |
|
"grad_norm": 3.0381553173065186, |
|
"learning_rate": 9.845251047645983e-05, |
|
"loss": 2.0266, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.5844155844155843, |
|
"grad_norm": 2.540466070175171, |
|
"learning_rate": 9.844474546064435e-05, |
|
"loss": 1.4928, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 2.5260584354400635, |
|
"learning_rate": 9.843696131961058e-05, |
|
"loss": 1.5486, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.5974025974025974, |
|
"grad_norm": 2.7667641639709473, |
|
"learning_rate": 9.842915805643155e-05, |
|
"loss": 1.6139, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.603896103896104, |
|
"grad_norm": 2.7333874702453613, |
|
"learning_rate": 9.842133567418792e-05, |
|
"loss": 1.747, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.6103896103896105, |
|
"grad_norm": 2.620067596435547, |
|
"learning_rate": 9.841349417596779e-05, |
|
"loss": 1.6353, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.616883116883117, |
|
"grad_norm": 2.723745584487915, |
|
"learning_rate": 9.84056335648669e-05, |
|
"loss": 1.5413, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.6233766233766236, |
|
"grad_norm": 2.789571523666382, |
|
"learning_rate": 9.839775384398847e-05, |
|
"loss": 1.9104, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.62987012987013, |
|
"grad_norm": 2.624312162399292, |
|
"learning_rate": 9.838985501644328e-05, |
|
"loss": 1.7595, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 2.454328775405884, |
|
"learning_rate": 9.838193708534968e-05, |
|
"loss": 1.5174, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.642857142857143, |
|
"grad_norm": 2.664504051208496, |
|
"learning_rate": 9.837400005383354e-05, |
|
"loss": 1.5799, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.6493506493506493, |
|
"grad_norm": 2.553387403488159, |
|
"learning_rate": 9.83660439250283e-05, |
|
"loss": 1.6922, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.655844155844156, |
|
"grad_norm": 2.5766408443450928, |
|
"learning_rate": 9.835806870207487e-05, |
|
"loss": 1.8922, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.6623376623376624, |
|
"grad_norm": 2.8266332149505615, |
|
"learning_rate": 9.835007438812177e-05, |
|
"loss": 1.6223, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.6688311688311686, |
|
"grad_norm": 2.6766135692596436, |
|
"learning_rate": 9.834206098632499e-05, |
|
"loss": 1.6929, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.675324675324675, |
|
"grad_norm": 2.5043816566467285, |
|
"learning_rate": 9.833402849984815e-05, |
|
"loss": 1.5207, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 2.6566975116729736, |
|
"learning_rate": 9.832597693186232e-05, |
|
"loss": 1.597, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.688311688311688, |
|
"grad_norm": 2.4868650436401367, |
|
"learning_rate": 9.831790628554612e-05, |
|
"loss": 1.6636, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.6948051948051948, |
|
"grad_norm": 2.5328221321105957, |
|
"learning_rate": 9.830981656408574e-05, |
|
"loss": 1.7525, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.7012987012987013, |
|
"grad_norm": 2.5664961338043213, |
|
"learning_rate": 9.830170777067485e-05, |
|
"loss": 1.6539, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.707792207792208, |
|
"grad_norm": 2.7295408248901367, |
|
"learning_rate": 9.829357990851468e-05, |
|
"loss": 1.8107, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 2.501190662384033, |
|
"learning_rate": 9.8285432980814e-05, |
|
"loss": 1.4882, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.720779220779221, |
|
"grad_norm": 2.678788185119629, |
|
"learning_rate": 9.827726699078908e-05, |
|
"loss": 1.711, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 2.4673514366149902, |
|
"learning_rate": 9.82690819416637e-05, |
|
"loss": 1.6805, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.7337662337662336, |
|
"grad_norm": 2.555209159851074, |
|
"learning_rate": 9.826087783666921e-05, |
|
"loss": 1.705, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.74025974025974, |
|
"grad_norm": 2.5114858150482178, |
|
"learning_rate": 9.825265467904445e-05, |
|
"loss": 1.6545, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.7467532467532467, |
|
"grad_norm": 2.7648122310638428, |
|
"learning_rate": 9.824441247203579e-05, |
|
"loss": 1.741, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.7532467532467533, |
|
"grad_norm": 2.949734687805176, |
|
"learning_rate": 9.823615121889716e-05, |
|
"loss": 1.8766, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.75974025974026, |
|
"grad_norm": 2.765125036239624, |
|
"learning_rate": 9.822787092288991e-05, |
|
"loss": 1.674, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.7662337662337664, |
|
"grad_norm": 2.5355770587921143, |
|
"learning_rate": 9.821957158728301e-05, |
|
"loss": 1.7228, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 2.7192254066467285, |
|
"learning_rate": 9.82112532153529e-05, |
|
"loss": 1.8595, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.779220779220779, |
|
"grad_norm": 2.6829566955566406, |
|
"learning_rate": 9.820291581038355e-05, |
|
"loss": 1.758, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.7857142857142856, |
|
"grad_norm": 2.2499940395355225, |
|
"learning_rate": 9.819455937566642e-05, |
|
"loss": 1.427, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.792207792207792, |
|
"grad_norm": 2.475795030593872, |
|
"learning_rate": 9.81861839145005e-05, |
|
"loss": 1.7117, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.7987012987012987, |
|
"grad_norm": 2.8484835624694824, |
|
"learning_rate": 9.817778943019228e-05, |
|
"loss": 1.8138, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.8051948051948052, |
|
"grad_norm": 2.821648359298706, |
|
"learning_rate": 9.816937592605579e-05, |
|
"loss": 1.6179, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.811688311688312, |
|
"grad_norm": 2.4450900554656982, |
|
"learning_rate": 9.816094340541256e-05, |
|
"loss": 1.4323, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 2.6112494468688965, |
|
"learning_rate": 9.815249187159157e-05, |
|
"loss": 1.8037, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.824675324675325, |
|
"grad_norm": 2.6128196716308594, |
|
"learning_rate": 9.814402132792939e-05, |
|
"loss": 1.6578, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.8311688311688314, |
|
"grad_norm": 2.560913562774658, |
|
"learning_rate": 9.813553177777003e-05, |
|
"loss": 1.8626, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.8376623376623376, |
|
"grad_norm": 2.2726757526397705, |
|
"learning_rate": 9.812702322446505e-05, |
|
"loss": 1.3101, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.844155844155844, |
|
"grad_norm": 2.4498019218444824, |
|
"learning_rate": 9.81184956713735e-05, |
|
"loss": 1.6713, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.8506493506493507, |
|
"grad_norm": 2.671379804611206, |
|
"learning_rate": 9.810994912186189e-05, |
|
"loss": 1.7618, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 2.9830217361450195, |
|
"learning_rate": 9.81013835793043e-05, |
|
"loss": 1.8335, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 2.728466749191284, |
|
"learning_rate": 9.809279904708224e-05, |
|
"loss": 1.6927, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.87012987012987, |
|
"grad_norm": 2.514505624771118, |
|
"learning_rate": 9.808419552858477e-05, |
|
"loss": 1.7542, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.8766233766233764, |
|
"grad_norm": 2.831462860107422, |
|
"learning_rate": 9.80755730272084e-05, |
|
"loss": 1.6759, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.883116883116883, |
|
"grad_norm": 2.8181638717651367, |
|
"learning_rate": 9.806693154635718e-05, |
|
"loss": 1.8944, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.8896103896103895, |
|
"grad_norm": 2.7978835105895996, |
|
"learning_rate": 9.80582710894426e-05, |
|
"loss": 1.7783, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.896103896103896, |
|
"grad_norm": 2.823338747024536, |
|
"learning_rate": 9.80495916598837e-05, |
|
"loss": 1.7405, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.9025974025974026, |
|
"grad_norm": 2.665881395339966, |
|
"learning_rate": 9.804089326110697e-05, |
|
"loss": 1.6666, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 2.69315767288208, |
|
"learning_rate": 9.80321758965464e-05, |
|
"loss": 1.759, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.9155844155844157, |
|
"grad_norm": 2.6156058311462402, |
|
"learning_rate": 9.802343956964348e-05, |
|
"loss": 1.6456, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.9220779220779223, |
|
"grad_norm": 2.9610443115234375, |
|
"learning_rate": 9.801468428384716e-05, |
|
"loss": 1.8471, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.928571428571429, |
|
"grad_norm": 2.6673457622528076, |
|
"learning_rate": 9.800591004261388e-05, |
|
"loss": 1.6938, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.935064935064935, |
|
"grad_norm": 2.542597532272339, |
|
"learning_rate": 9.79971168494076e-05, |
|
"loss": 1.5416, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.9415584415584415, |
|
"grad_norm": 2.7253355979919434, |
|
"learning_rate": 9.79883047076997e-05, |
|
"loss": 1.7721, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.948051948051948, |
|
"grad_norm": 2.6640748977661133, |
|
"learning_rate": 9.797947362096908e-05, |
|
"loss": 1.9404, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 2.5272445678710938, |
|
"learning_rate": 9.797062359270215e-05, |
|
"loss": 1.6494, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.961038961038961, |
|
"grad_norm": 2.679551839828491, |
|
"learning_rate": 9.796175462639272e-05, |
|
"loss": 1.5135, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.9675324675324677, |
|
"grad_norm": 2.832418918609619, |
|
"learning_rate": 9.795286672554213e-05, |
|
"loss": 1.7057, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.974025974025974, |
|
"grad_norm": 2.6729843616485596, |
|
"learning_rate": 9.794395989365918e-05, |
|
"loss": 1.7592, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.9805194805194803, |
|
"grad_norm": 2.605210065841675, |
|
"learning_rate": 9.793503413426015e-05, |
|
"loss": 1.7291, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.987012987012987, |
|
"grad_norm": 2.906468391418457, |
|
"learning_rate": 9.79260894508688e-05, |
|
"loss": 1.6162, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.9935064935064934, |
|
"grad_norm": 2.4108238220214844, |
|
"learning_rate": 9.791712584701634e-05, |
|
"loss": 1.5179, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3461.14306640625, |
|
"learning_rate": 9.790814332624143e-05, |
|
"loss": 1.6267, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.0064935064935066, |
|
"grad_norm": 2.691493511199951, |
|
"learning_rate": 9.789914189209029e-05, |
|
"loss": 1.4611, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 3.012987012987013, |
|
"grad_norm": 2.130276918411255, |
|
"learning_rate": 9.789012154811647e-05, |
|
"loss": 1.0821, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 3.0194805194805197, |
|
"grad_norm": 2.652697801589966, |
|
"learning_rate": 9.788108229788111e-05, |
|
"loss": 1.4394, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.0259740259740258, |
|
"grad_norm": 2.6161065101623535, |
|
"learning_rate": 9.787202414495276e-05, |
|
"loss": 1.5084, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 3.0324675324675323, |
|
"grad_norm": 2.4646644592285156, |
|
"learning_rate": 9.786294709290741e-05, |
|
"loss": 1.375, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 3.038961038961039, |
|
"grad_norm": 2.6410462856292725, |
|
"learning_rate": 9.785385114532857e-05, |
|
"loss": 1.4476, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.0454545454545454, |
|
"grad_norm": 2.5742151737213135, |
|
"learning_rate": 9.784473630580713e-05, |
|
"loss": 1.4318, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 3.051948051948052, |
|
"grad_norm": 2.7610526084899902, |
|
"learning_rate": 9.783560257794154e-05, |
|
"loss": 1.5746, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.0584415584415585, |
|
"grad_norm": 2.6887526512145996, |
|
"learning_rate": 9.78264499653376e-05, |
|
"loss": 1.5727, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 3.064935064935065, |
|
"grad_norm": 2.533906936645508, |
|
"learning_rate": 9.781727847160865e-05, |
|
"loss": 1.2746, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 3.0714285714285716, |
|
"grad_norm": 2.960747241973877, |
|
"learning_rate": 9.780808810037543e-05, |
|
"loss": 1.5435, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 3.0779220779220777, |
|
"grad_norm": 2.580984115600586, |
|
"learning_rate": 9.779887885526615e-05, |
|
"loss": 1.4734, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 3.0844155844155843, |
|
"grad_norm": 2.5366058349609375, |
|
"learning_rate": 9.778965073991651e-05, |
|
"loss": 1.2849, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.090909090909091, |
|
"grad_norm": 2.808206796646118, |
|
"learning_rate": 9.778040375796959e-05, |
|
"loss": 1.3771, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.0974025974025974, |
|
"grad_norm": 3.073631763458252, |
|
"learning_rate": 9.777113791307598e-05, |
|
"loss": 1.3693, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 3.103896103896104, |
|
"grad_norm": 2.810788869857788, |
|
"learning_rate": 9.776185320889363e-05, |
|
"loss": 1.6245, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 3.1103896103896105, |
|
"grad_norm": 2.9978721141815186, |
|
"learning_rate": 9.775254964908807e-05, |
|
"loss": 1.5436, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 3.116883116883117, |
|
"grad_norm": 2.744044065475464, |
|
"learning_rate": 9.774322723733216e-05, |
|
"loss": 1.6038, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.1233766233766236, |
|
"grad_norm": 2.7648191452026367, |
|
"learning_rate": 9.773388597730623e-05, |
|
"loss": 1.3454, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 3.1298701298701297, |
|
"grad_norm": 2.9355039596557617, |
|
"learning_rate": 9.772452587269808e-05, |
|
"loss": 1.5168, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 3.1363636363636362, |
|
"grad_norm": 2.6012725830078125, |
|
"learning_rate": 9.771514692720293e-05, |
|
"loss": 1.4007, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 2.895432949066162, |
|
"learning_rate": 9.770574914452343e-05, |
|
"loss": 1.5449, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 3.1493506493506493, |
|
"grad_norm": 3.0441057682037354, |
|
"learning_rate": 9.769633252836969e-05, |
|
"loss": 1.7258, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.155844155844156, |
|
"grad_norm": 2.6639978885650635, |
|
"learning_rate": 9.768689708245922e-05, |
|
"loss": 1.2904, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 3.1623376623376624, |
|
"grad_norm": 2.972113847732544, |
|
"learning_rate": 9.767744281051701e-05, |
|
"loss": 1.5279, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 3.168831168831169, |
|
"grad_norm": 2.602752685546875, |
|
"learning_rate": 9.766796971627543e-05, |
|
"loss": 1.4921, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 3.175324675324675, |
|
"grad_norm": 2.472797393798828, |
|
"learning_rate": 9.765847780347432e-05, |
|
"loss": 1.4381, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 3.1818181818181817, |
|
"grad_norm": 2.773179531097412, |
|
"learning_rate": 9.764896707586096e-05, |
|
"loss": 1.471, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.188311688311688, |
|
"grad_norm": 2.8864376544952393, |
|
"learning_rate": 9.763943753718998e-05, |
|
"loss": 1.4751, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 3.1948051948051948, |
|
"grad_norm": 2.8007543087005615, |
|
"learning_rate": 9.762988919122355e-05, |
|
"loss": 1.3703, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 3.2012987012987013, |
|
"grad_norm": 2.5158145427703857, |
|
"learning_rate": 9.762032204173116e-05, |
|
"loss": 1.2792, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 3.207792207792208, |
|
"grad_norm": 2.662209987640381, |
|
"learning_rate": 9.761073609248981e-05, |
|
"loss": 1.5026, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"grad_norm": 2.8098185062408447, |
|
"learning_rate": 9.760113134728384e-05, |
|
"loss": 1.5946, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.220779220779221, |
|
"grad_norm": 2.5345005989074707, |
|
"learning_rate": 9.759150780990507e-05, |
|
"loss": 1.4414, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 3.227272727272727, |
|
"grad_norm": 2.4922263622283936, |
|
"learning_rate": 9.758186548415273e-05, |
|
"loss": 1.4034, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 3.2337662337662336, |
|
"grad_norm": 2.613332509994507, |
|
"learning_rate": 9.757220437383346e-05, |
|
"loss": 1.5602, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 3.24025974025974, |
|
"grad_norm": 2.9960076808929443, |
|
"learning_rate": 9.756252448276127e-05, |
|
"loss": 1.4437, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 3.2467532467532467, |
|
"grad_norm": 2.567506790161133, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.5557, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.2532467532467533, |
|
"grad_norm": 2.7986555099487305, |
|
"learning_rate": 9.754310837365155e-05, |
|
"loss": 1.458, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 3.25974025974026, |
|
"grad_norm": 2.6835241317749023, |
|
"learning_rate": 9.753337216327917e-05, |
|
"loss": 1.4782, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 3.2662337662337664, |
|
"grad_norm": 2.7536139488220215, |
|
"learning_rate": 9.752361718748423e-05, |
|
"loss": 1.5209, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 3.2727272727272725, |
|
"grad_norm": 2.784055709838867, |
|
"learning_rate": 9.751384345011787e-05, |
|
"loss": 1.5919, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.279220779220779, |
|
"grad_norm": 2.5098867416381836, |
|
"learning_rate": 9.750405095503859e-05, |
|
"loss": 1.5246, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 3.2857142857142856, |
|
"grad_norm": 2.397177219390869, |
|
"learning_rate": 9.749423970611231e-05, |
|
"loss": 1.3737, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 3.292207792207792, |
|
"grad_norm": 2.790895938873291, |
|
"learning_rate": 9.748440970721236e-05, |
|
"loss": 1.5794, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 3.2987012987012987, |
|
"grad_norm": 2.7680368423461914, |
|
"learning_rate": 9.747456096221945e-05, |
|
"loss": 1.5599, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 3.3051948051948052, |
|
"grad_norm": 2.880476951599121, |
|
"learning_rate": 9.746469347502174e-05, |
|
"loss": 1.6697, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 3.311688311688312, |
|
"grad_norm": 2.7532639503479004, |
|
"learning_rate": 9.745480724951473e-05, |
|
"loss": 1.5952, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.3181818181818183, |
|
"grad_norm": 2.685209035873413, |
|
"learning_rate": 9.744490228960138e-05, |
|
"loss": 1.5274, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 3.324675324675325, |
|
"grad_norm": 2.940244674682617, |
|
"learning_rate": 9.743497859919196e-05, |
|
"loss": 1.5009, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 3.331168831168831, |
|
"grad_norm": 2.9467644691467285, |
|
"learning_rate": 9.742503618220422e-05, |
|
"loss": 1.6353, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 3.3376623376623376, |
|
"grad_norm": 2.8322834968566895, |
|
"learning_rate": 9.741507504256327e-05, |
|
"loss": 1.653, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 3.344155844155844, |
|
"grad_norm": 3.078629493713379, |
|
"learning_rate": 9.74050951842016e-05, |
|
"loss": 1.6408, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 3.3506493506493507, |
|
"grad_norm": 2.8227908611297607, |
|
"learning_rate": 9.739509661105912e-05, |
|
"loss": 1.6134, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 3.357142857142857, |
|
"grad_norm": 2.8084356784820557, |
|
"learning_rate": 9.738507932708307e-05, |
|
"loss": 1.6953, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 2.492335557937622, |
|
"learning_rate": 9.737504333622813e-05, |
|
"loss": 1.534, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 3.3701298701298703, |
|
"grad_norm": 2.6720969676971436, |
|
"learning_rate": 9.736498864245638e-05, |
|
"loss": 1.4763, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 3.3766233766233764, |
|
"grad_norm": 2.631711006164551, |
|
"learning_rate": 9.735491524973722e-05, |
|
"loss": 1.5452, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.383116883116883, |
|
"grad_norm": 3.2638559341430664, |
|
"learning_rate": 9.734482316204747e-05, |
|
"loss": 1.5939, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 3.3896103896103895, |
|
"grad_norm": 2.783334255218506, |
|
"learning_rate": 9.733471238337136e-05, |
|
"loss": 1.448, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 3.396103896103896, |
|
"grad_norm": 2.8106093406677246, |
|
"learning_rate": 9.73245829177004e-05, |
|
"loss": 1.378, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 3.4025974025974026, |
|
"grad_norm": 2.796281099319458, |
|
"learning_rate": 9.73144347690336e-05, |
|
"loss": 1.5344, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 3.409090909090909, |
|
"grad_norm": 2.7715847492218018, |
|
"learning_rate": 9.730426794137727e-05, |
|
"loss": 1.3997, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.4155844155844157, |
|
"grad_norm": 2.7550740242004395, |
|
"learning_rate": 9.729408243874511e-05, |
|
"loss": 1.6974, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 3.4220779220779223, |
|
"grad_norm": 2.8042678833007812, |
|
"learning_rate": 9.728387826515819e-05, |
|
"loss": 1.432, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 2.7147512435913086, |
|
"learning_rate": 9.727365542464497e-05, |
|
"loss": 1.6309, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 3.435064935064935, |
|
"grad_norm": 2.6216771602630615, |
|
"learning_rate": 9.726341392124127e-05, |
|
"loss": 1.4175, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 3.4415584415584415, |
|
"grad_norm": 2.668849468231201, |
|
"learning_rate": 9.725315375899024e-05, |
|
"loss": 1.6321, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.448051948051948, |
|
"grad_norm": 2.8898913860321045, |
|
"learning_rate": 9.724287494194247e-05, |
|
"loss": 1.5724, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 3.4545454545454546, |
|
"grad_norm": 2.313223361968994, |
|
"learning_rate": 9.723257747415584e-05, |
|
"loss": 1.357, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 3.461038961038961, |
|
"grad_norm": 2.627986431121826, |
|
"learning_rate": 9.722226135969566e-05, |
|
"loss": 1.5693, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 3.4675324675324677, |
|
"grad_norm": 2.4512863159179688, |
|
"learning_rate": 9.721192660263453e-05, |
|
"loss": 1.6062, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 3.474025974025974, |
|
"grad_norm": 2.7816243171691895, |
|
"learning_rate": 9.72015732070525e-05, |
|
"loss": 1.6978, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 3.4805194805194803, |
|
"grad_norm": 2.9086880683898926, |
|
"learning_rate": 9.719120117703687e-05, |
|
"loss": 1.5865, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 3.487012987012987, |
|
"grad_norm": 2.2172327041625977, |
|
"learning_rate": 9.718081051668239e-05, |
|
"loss": 1.3989, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 3.4935064935064934, |
|
"grad_norm": 2.8032913208007812, |
|
"learning_rate": 9.717040123009111e-05, |
|
"loss": 1.6331, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 2.4231390953063965, |
|
"learning_rate": 9.715997332137248e-05, |
|
"loss": 1.5196, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 3.5064935064935066, |
|
"grad_norm": 2.8790225982666016, |
|
"learning_rate": 9.714952679464323e-05, |
|
"loss": 1.5077, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.512987012987013, |
|
"grad_norm": 2.6722092628479004, |
|
"learning_rate": 9.713906165402751e-05, |
|
"loss": 1.6147, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 3.5194805194805197, |
|
"grad_norm": 2.7589612007141113, |
|
"learning_rate": 9.71285779036568e-05, |
|
"loss": 1.428, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 3.525974025974026, |
|
"grad_norm": 2.4026832580566406, |
|
"learning_rate": 9.71180755476699e-05, |
|
"loss": 1.2818, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 3.5324675324675323, |
|
"grad_norm": 2.9387869834899902, |
|
"learning_rate": 9.710755459021296e-05, |
|
"loss": 1.5423, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 3.538961038961039, |
|
"grad_norm": 2.7179949283599854, |
|
"learning_rate": 9.709701503543954e-05, |
|
"loss": 1.5384, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 3.5454545454545454, |
|
"grad_norm": 2.889209508895874, |
|
"learning_rate": 9.708645688751044e-05, |
|
"loss": 1.6434, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.551948051948052, |
|
"grad_norm": 2.6073267459869385, |
|
"learning_rate": 9.707588015059386e-05, |
|
"loss": 1.729, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 3.5584415584415585, |
|
"grad_norm": 2.7963855266571045, |
|
"learning_rate": 9.706528482886535e-05, |
|
"loss": 1.7076, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 3.564935064935065, |
|
"grad_norm": 2.9273459911346436, |
|
"learning_rate": 9.705467092650775e-05, |
|
"loss": 1.5023, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 2.3651621341705322, |
|
"learning_rate": 9.704403844771128e-05, |
|
"loss": 1.3576, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.5779220779220777, |
|
"grad_norm": 2.7819669246673584, |
|
"learning_rate": 9.703338739667346e-05, |
|
"loss": 1.7064, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 3.5844155844155843, |
|
"grad_norm": 2.7776331901550293, |
|
"learning_rate": 9.702271777759916e-05, |
|
"loss": 1.6858, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 3.590909090909091, |
|
"grad_norm": 2.456737995147705, |
|
"learning_rate": 9.701202959470058e-05, |
|
"loss": 1.5001, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 3.5974025974025974, |
|
"grad_norm": 2.6922426223754883, |
|
"learning_rate": 9.700132285219724e-05, |
|
"loss": 1.6717, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 3.603896103896104, |
|
"grad_norm": 2.968545913696289, |
|
"learning_rate": 9.699059755431598e-05, |
|
"loss": 1.6364, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.6103896103896105, |
|
"grad_norm": 2.709141731262207, |
|
"learning_rate": 9.697985370529101e-05, |
|
"loss": 1.7164, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 3.616883116883117, |
|
"grad_norm": 2.5271835327148438, |
|
"learning_rate": 9.696909130936382e-05, |
|
"loss": 1.3943, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 3.6233766233766236, |
|
"grad_norm": 2.51953125, |
|
"learning_rate": 9.695831037078322e-05, |
|
"loss": 1.5367, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 3.62987012987013, |
|
"grad_norm": 2.145310401916504, |
|
"learning_rate": 9.694751089380536e-05, |
|
"loss": 1.2712, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 2.6842525005340576, |
|
"learning_rate": 9.693669288269372e-05, |
|
"loss": 1.5694, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.642857142857143, |
|
"grad_norm": 2.610161781311035, |
|
"learning_rate": 9.692585634171905e-05, |
|
"loss": 1.6228, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 3.6493506493506493, |
|
"grad_norm": 2.376155376434326, |
|
"learning_rate": 9.691500127515945e-05, |
|
"loss": 1.438, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.655844155844156, |
|
"grad_norm": 3.225393533706665, |
|
"learning_rate": 9.690412768730035e-05, |
|
"loss": 1.7239, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 3.6623376623376624, |
|
"grad_norm": 2.3960697650909424, |
|
"learning_rate": 9.689323558243446e-05, |
|
"loss": 1.4696, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 3.6688311688311686, |
|
"grad_norm": 2.8606388568878174, |
|
"learning_rate": 9.688232496486178e-05, |
|
"loss": 1.5147, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 3.675324675324675, |
|
"grad_norm": 2.504814863204956, |
|
"learning_rate": 9.687139583888972e-05, |
|
"loss": 1.5208, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 3.6818181818181817, |
|
"grad_norm": 2.563558340072632, |
|
"learning_rate": 9.686044820883285e-05, |
|
"loss": 1.3693, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 3.688311688311688, |
|
"grad_norm": 2.8867974281311035, |
|
"learning_rate": 9.684948207901315e-05, |
|
"loss": 1.6681, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 3.6948051948051948, |
|
"grad_norm": 2.5309319496154785, |
|
"learning_rate": 9.68384974537599e-05, |
|
"loss": 1.5182, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 3.7012987012987013, |
|
"grad_norm": 2.7548654079437256, |
|
"learning_rate": 9.682749433740962e-05, |
|
"loss": 1.7318, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.707792207792208, |
|
"grad_norm": 2.2360949516296387, |
|
"learning_rate": 9.681647273430618e-05, |
|
"loss": 1.223, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 2.6854865550994873, |
|
"learning_rate": 9.680543264880076e-05, |
|
"loss": 1.6428, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 3.720779220779221, |
|
"grad_norm": 2.4563329219818115, |
|
"learning_rate": 9.679437408525174e-05, |
|
"loss": 1.5267, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 3.7272727272727275, |
|
"grad_norm": 2.505563735961914, |
|
"learning_rate": 9.678329704802494e-05, |
|
"loss": 1.4726, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 3.7337662337662336, |
|
"grad_norm": 2.7705132961273193, |
|
"learning_rate": 9.677220154149336e-05, |
|
"loss": 1.6072, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.74025974025974, |
|
"grad_norm": 2.2069296836853027, |
|
"learning_rate": 9.676108757003735e-05, |
|
"loss": 1.1699, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.7467532467532467, |
|
"grad_norm": 2.6705710887908936, |
|
"learning_rate": 9.674995513804452e-05, |
|
"loss": 1.5892, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 3.7532467532467533, |
|
"grad_norm": 2.477724313735962, |
|
"learning_rate": 9.673880424990977e-05, |
|
"loss": 1.4687, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 3.75974025974026, |
|
"grad_norm": 2.465447425842285, |
|
"learning_rate": 9.672763491003531e-05, |
|
"loss": 1.5392, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 3.7662337662337664, |
|
"grad_norm": 2.462146282196045, |
|
"learning_rate": 9.671644712283061e-05, |
|
"loss": 1.5552, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.7727272727272725, |
|
"grad_norm": 2.4764628410339355, |
|
"learning_rate": 9.670524089271242e-05, |
|
"loss": 1.6651, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 3.779220779220779, |
|
"grad_norm": 2.6047585010528564, |
|
"learning_rate": 9.669401622410482e-05, |
|
"loss": 1.4174, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 3.7857142857142856, |
|
"grad_norm": 2.6200366020202637, |
|
"learning_rate": 9.668277312143907e-05, |
|
"loss": 1.5273, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 3.792207792207792, |
|
"grad_norm": 3.028610944747925, |
|
"learning_rate": 9.667151158915382e-05, |
|
"loss": 1.698, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 3.7987012987012987, |
|
"grad_norm": 2.632977247238159, |
|
"learning_rate": 9.666023163169493e-05, |
|
"loss": 1.6539, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.8051948051948052, |
|
"grad_norm": 2.5790421962738037, |
|
"learning_rate": 9.664893325351555e-05, |
|
"loss": 1.5818, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 3.811688311688312, |
|
"grad_norm": 2.546786069869995, |
|
"learning_rate": 9.663761645907609e-05, |
|
"loss": 1.6406, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 3.8181818181818183, |
|
"grad_norm": 2.581007957458496, |
|
"learning_rate": 9.662628125284425e-05, |
|
"loss": 1.4747, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 3.824675324675325, |
|
"grad_norm": 2.5376641750335693, |
|
"learning_rate": 9.6614927639295e-05, |
|
"loss": 1.5814, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 3.8311688311688314, |
|
"grad_norm": 2.6359288692474365, |
|
"learning_rate": 9.660355562291055e-05, |
|
"loss": 1.5488, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.8376623376623376, |
|
"grad_norm": 2.6092121601104736, |
|
"learning_rate": 9.65921652081804e-05, |
|
"loss": 1.4414, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 3.844155844155844, |
|
"grad_norm": 2.4724388122558594, |
|
"learning_rate": 9.65807563996013e-05, |
|
"loss": 1.4849, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 3.8506493506493507, |
|
"grad_norm": 2.303741693496704, |
|
"learning_rate": 9.656932920167727e-05, |
|
"loss": 1.6192, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 3.857142857142857, |
|
"grad_norm": 2.2833895683288574, |
|
"learning_rate": 9.65578836189196e-05, |
|
"loss": 1.389, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 3.8636363636363638, |
|
"grad_norm": 2.262861728668213, |
|
"learning_rate": 9.654641965584678e-05, |
|
"loss": 1.5055, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.87012987012987, |
|
"grad_norm": 2.427997350692749, |
|
"learning_rate": 9.653493731698467e-05, |
|
"loss": 1.5428, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 3.8766233766233764, |
|
"grad_norm": 2.410557270050049, |
|
"learning_rate": 9.652343660686626e-05, |
|
"loss": 1.5879, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 3.883116883116883, |
|
"grad_norm": 2.3627400398254395, |
|
"learning_rate": 9.651191753003186e-05, |
|
"loss": 1.4858, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 3.8896103896103895, |
|
"grad_norm": 2.57161283493042, |
|
"learning_rate": 9.650038009102905e-05, |
|
"loss": 1.6244, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 3.896103896103896, |
|
"grad_norm": 2.7876200675964355, |
|
"learning_rate": 9.648882429441257e-05, |
|
"loss": 1.6798, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.9025974025974026, |
|
"grad_norm": 2.5130650997161865, |
|
"learning_rate": 9.647725014474452e-05, |
|
"loss": 1.4941, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 3.909090909090909, |
|
"grad_norm": 2.831350326538086, |
|
"learning_rate": 9.646565764659417e-05, |
|
"loss": 1.6509, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 3.9155844155844157, |
|
"grad_norm": 2.643336534500122, |
|
"learning_rate": 9.645404680453805e-05, |
|
"loss": 1.5174, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 3.9220779220779223, |
|
"grad_norm": 2.471973180770874, |
|
"learning_rate": 9.644241762315995e-05, |
|
"loss": 1.6618, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 3.928571428571429, |
|
"grad_norm": 2.491856575012207, |
|
"learning_rate": 9.643077010705087e-05, |
|
"loss": 1.4538, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.935064935064935, |
|
"grad_norm": 2.444056749343872, |
|
"learning_rate": 9.641910426080908e-05, |
|
"loss": 1.6486, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 3.9415584415584415, |
|
"grad_norm": 2.5531413555145264, |
|
"learning_rate": 9.640742008904005e-05, |
|
"loss": 1.5494, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 3.948051948051948, |
|
"grad_norm": 2.229311466217041, |
|
"learning_rate": 9.639571759635654e-05, |
|
"loss": 1.3976, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 3.9545454545454546, |
|
"grad_norm": 2.342977285385132, |
|
"learning_rate": 9.638399678737848e-05, |
|
"loss": 1.6625, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 3.961038961038961, |
|
"grad_norm": 2.172034978866577, |
|
"learning_rate": 9.637225766673307e-05, |
|
"loss": 1.3824, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.9675324675324677, |
|
"grad_norm": 2.5296199321746826, |
|
"learning_rate": 9.636050023905473e-05, |
|
"loss": 1.6165, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 3.974025974025974, |
|
"grad_norm": 2.536747455596924, |
|
"learning_rate": 9.63487245089851e-05, |
|
"loss": 1.7385, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 3.9805194805194803, |
|
"grad_norm": 2.177907705307007, |
|
"learning_rate": 9.633693048117306e-05, |
|
"loss": 1.4146, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 3.987012987012987, |
|
"grad_norm": 2.305320978164673, |
|
"learning_rate": 9.632511816027469e-05, |
|
"loss": 1.4506, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 3.9935064935064934, |
|
"grad_norm": 2.482697010040283, |
|
"learning_rate": 9.631328755095333e-05, |
|
"loss": 1.5763, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.43980073928833, |
|
"learning_rate": 9.630143865787951e-05, |
|
"loss": 1.6993, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 4.0064935064935066, |
|
"grad_norm": 2.2191660404205322, |
|
"learning_rate": 9.628957148573098e-05, |
|
"loss": 1.2847, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 4.012987012987013, |
|
"grad_norm": 2.345017671585083, |
|
"learning_rate": 9.62776860391927e-05, |
|
"loss": 1.2782, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 4.01948051948052, |
|
"grad_norm": 2.5423858165740967, |
|
"learning_rate": 9.626578232295689e-05, |
|
"loss": 1.6394, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 4.025974025974026, |
|
"grad_norm": 2.2237322330474854, |
|
"learning_rate": 9.62538603417229e-05, |
|
"loss": 1.2239, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.032467532467533, |
|
"grad_norm": 2.228304147720337, |
|
"learning_rate": 9.62419201001974e-05, |
|
"loss": 1.3274, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 4.038961038961039, |
|
"grad_norm": 2.5428571701049805, |
|
"learning_rate": 9.622996160309414e-05, |
|
"loss": 1.2906, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 4.045454545454546, |
|
"grad_norm": 2.316067695617676, |
|
"learning_rate": 9.62179848551342e-05, |
|
"loss": 1.2922, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 4.0519480519480515, |
|
"grad_norm": 2.2502663135528564, |
|
"learning_rate": 9.620598986104578e-05, |
|
"loss": 1.2758, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 4.058441558441558, |
|
"grad_norm": 2.4511966705322266, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.3631, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 4.064935064935065, |
|
"grad_norm": 2.5539820194244385, |
|
"learning_rate": 9.61819451534325e-05, |
|
"loss": 1.3669, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 4.071428571428571, |
|
"grad_norm": 2.432616949081421, |
|
"learning_rate": 9.616989544940009e-05, |
|
"loss": 1.4103, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 4.077922077922078, |
|
"grad_norm": 2.256044387817383, |
|
"learning_rate": 9.615782751822413e-05, |
|
"loss": 1.2974, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 4.084415584415584, |
|
"grad_norm": 2.4633290767669678, |
|
"learning_rate": 9.614574136466888e-05, |
|
"loss": 1.3829, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 4.090909090909091, |
|
"grad_norm": 2.8581559658050537, |
|
"learning_rate": 9.613363699350575e-05, |
|
"loss": 1.4883, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.097402597402597, |
|
"grad_norm": 2.2781195640563965, |
|
"learning_rate": 9.612151440951334e-05, |
|
"loss": 1.362, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 4.103896103896104, |
|
"grad_norm": 2.3701205253601074, |
|
"learning_rate": 9.610937361747748e-05, |
|
"loss": 1.2678, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 4.1103896103896105, |
|
"grad_norm": 2.225470542907715, |
|
"learning_rate": 9.609721462219114e-05, |
|
"loss": 1.2274, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 4.116883116883117, |
|
"grad_norm": 2.603336811065674, |
|
"learning_rate": 9.60850374284545e-05, |
|
"loss": 1.4969, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 4.123376623376624, |
|
"grad_norm": 2.4754090309143066, |
|
"learning_rate": 9.607284204107493e-05, |
|
"loss": 1.2693, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 4.12987012987013, |
|
"grad_norm": 2.260408639907837, |
|
"learning_rate": 9.606062846486698e-05, |
|
"loss": 1.2367, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 4.136363636363637, |
|
"grad_norm": 2.4500088691711426, |
|
"learning_rate": 9.604839670465236e-05, |
|
"loss": 1.3531, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 4.142857142857143, |
|
"grad_norm": 2.718536853790283, |
|
"learning_rate": 9.603614676526e-05, |
|
"loss": 1.4347, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 4.14935064935065, |
|
"grad_norm": 2.782520294189453, |
|
"learning_rate": 9.602387865152597e-05, |
|
"loss": 1.3553, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 4.1558441558441555, |
|
"grad_norm": 2.552777051925659, |
|
"learning_rate": 9.601159236829352e-05, |
|
"loss": 1.4462, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.162337662337662, |
|
"grad_norm": 2.2942702770233154, |
|
"learning_rate": 9.599928792041308e-05, |
|
"loss": 1.2738, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 4.1688311688311686, |
|
"grad_norm": 2.392411231994629, |
|
"learning_rate": 9.598696531274227e-05, |
|
"loss": 1.3295, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.175324675324675, |
|
"grad_norm": 2.619590997695923, |
|
"learning_rate": 9.597462455014585e-05, |
|
"loss": 1.3489, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 4.181818181818182, |
|
"grad_norm": 2.656822443008423, |
|
"learning_rate": 9.596226563749575e-05, |
|
"loss": 1.511, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 4.188311688311688, |
|
"grad_norm": 2.4362051486968994, |
|
"learning_rate": 9.594988857967106e-05, |
|
"loss": 1.2914, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 4.194805194805195, |
|
"grad_norm": 2.1643314361572266, |
|
"learning_rate": 9.593749338155809e-05, |
|
"loss": 1.1751, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 4.201298701298701, |
|
"grad_norm": 2.725790023803711, |
|
"learning_rate": 9.592508004805023e-05, |
|
"loss": 1.5275, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 4.207792207792208, |
|
"grad_norm": 2.422140598297119, |
|
"learning_rate": 9.59126485840481e-05, |
|
"loss": 1.3667, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 4.214285714285714, |
|
"grad_norm": 2.537302017211914, |
|
"learning_rate": 9.59001989944594e-05, |
|
"loss": 1.4091, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 4.220779220779221, |
|
"grad_norm": 2.6183526515960693, |
|
"learning_rate": 9.588773128419906e-05, |
|
"loss": 1.4391, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.2272727272727275, |
|
"grad_norm": 2.544766426086426, |
|
"learning_rate": 9.587524545818913e-05, |
|
"loss": 1.4811, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 4.233766233766234, |
|
"grad_norm": 2.4699995517730713, |
|
"learning_rate": 9.586274152135884e-05, |
|
"loss": 1.3246, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 4.240259740259741, |
|
"grad_norm": 2.2006990909576416, |
|
"learning_rate": 9.58502194786445e-05, |
|
"loss": 1.2254, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 4.246753246753247, |
|
"grad_norm": 2.6973538398742676, |
|
"learning_rate": 9.583767933498964e-05, |
|
"loss": 1.4668, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 4.253246753246753, |
|
"grad_norm": 2.2840769290924072, |
|
"learning_rate": 9.58251210953449e-05, |
|
"loss": 1.2807, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 4.259740259740259, |
|
"grad_norm": 2.4915285110473633, |
|
"learning_rate": 9.58125447646681e-05, |
|
"loss": 1.4697, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 4.266233766233766, |
|
"grad_norm": 2.6363418102264404, |
|
"learning_rate": 9.579995034792414e-05, |
|
"loss": 1.3831, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 4.2727272727272725, |
|
"grad_norm": 2.5724446773529053, |
|
"learning_rate": 9.578733785008513e-05, |
|
"loss": 1.4606, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 4.279220779220779, |
|
"grad_norm": 2.6677348613739014, |
|
"learning_rate": 9.577470727613025e-05, |
|
"loss": 1.488, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 2.5872716903686523, |
|
"learning_rate": 9.576205863104588e-05, |
|
"loss": 1.4098, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.292207792207792, |
|
"grad_norm": 2.636482000350952, |
|
"learning_rate": 9.57493919198255e-05, |
|
"loss": 1.4056, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 4.298701298701299, |
|
"grad_norm": 2.4787089824676514, |
|
"learning_rate": 9.573670714746972e-05, |
|
"loss": 1.3252, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 4.305194805194805, |
|
"grad_norm": 2.544703245162964, |
|
"learning_rate": 9.572400431898627e-05, |
|
"loss": 1.213, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 4.311688311688312, |
|
"grad_norm": 2.3949763774871826, |
|
"learning_rate": 9.571128343939005e-05, |
|
"loss": 1.4002, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 4.318181818181818, |
|
"grad_norm": 2.5433011054992676, |
|
"learning_rate": 9.569854451370307e-05, |
|
"loss": 1.4224, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 4.324675324675325, |
|
"grad_norm": 2.485722780227661, |
|
"learning_rate": 9.568578754695442e-05, |
|
"loss": 1.3944, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 4.3311688311688314, |
|
"grad_norm": 2.7001330852508545, |
|
"learning_rate": 9.567301254418038e-05, |
|
"loss": 1.3847, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 4.337662337662338, |
|
"grad_norm": 2.6091723442077637, |
|
"learning_rate": 9.566021951042433e-05, |
|
"loss": 1.48, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 4.3441558441558445, |
|
"grad_norm": 2.37465238571167, |
|
"learning_rate": 9.56474084507367e-05, |
|
"loss": 1.2859, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 4.35064935064935, |
|
"grad_norm": 2.298042058944702, |
|
"learning_rate": 9.563457937017515e-05, |
|
"loss": 1.2833, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.357142857142857, |
|
"grad_norm": 2.418095827102661, |
|
"learning_rate": 9.562173227380436e-05, |
|
"loss": 1.2609, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 4.363636363636363, |
|
"grad_norm": 2.5434913635253906, |
|
"learning_rate": 9.56088671666962e-05, |
|
"loss": 1.2695, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 4.37012987012987, |
|
"grad_norm": 2.607151508331299, |
|
"learning_rate": 9.559598405392958e-05, |
|
"loss": 1.4324, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 4.376623376623376, |
|
"grad_norm": 2.42596173286438, |
|
"learning_rate": 9.558308294059054e-05, |
|
"loss": 1.4373, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 4.383116883116883, |
|
"grad_norm": 2.508871078491211, |
|
"learning_rate": 9.557016383177227e-05, |
|
"loss": 1.4397, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 4.3896103896103895, |
|
"grad_norm": 2.110358715057373, |
|
"learning_rate": 9.555722673257501e-05, |
|
"loss": 1.139, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 4.396103896103896, |
|
"grad_norm": 2.3169806003570557, |
|
"learning_rate": 9.554427164810611e-05, |
|
"loss": 1.231, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 4.402597402597403, |
|
"grad_norm": 2.418757200241089, |
|
"learning_rate": 9.553129858348006e-05, |
|
"loss": 1.4744, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 4.409090909090909, |
|
"grad_norm": 2.496831178665161, |
|
"learning_rate": 9.55183075438184e-05, |
|
"loss": 1.4325, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 4.415584415584416, |
|
"grad_norm": 2.282007932662964, |
|
"learning_rate": 9.550529853424979e-05, |
|
"loss": 1.2713, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.422077922077922, |
|
"grad_norm": 2.46980619430542, |
|
"learning_rate": 9.549227155990999e-05, |
|
"loss": 1.426, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 4.428571428571429, |
|
"grad_norm": 2.3510262966156006, |
|
"learning_rate": 9.547922662594183e-05, |
|
"loss": 1.2731, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 4.435064935064935, |
|
"grad_norm": 2.6384055614471436, |
|
"learning_rate": 9.546616373749525e-05, |
|
"loss": 1.4716, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 4.441558441558442, |
|
"grad_norm": 2.60732102394104, |
|
"learning_rate": 9.545308289972728e-05, |
|
"loss": 1.4314, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 4.448051948051948, |
|
"grad_norm": 2.352541923522949, |
|
"learning_rate": 9.543998411780201e-05, |
|
"loss": 1.3909, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 4.454545454545454, |
|
"grad_norm": 2.4203426837921143, |
|
"learning_rate": 9.542686739689065e-05, |
|
"loss": 1.3476, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 4.461038961038961, |
|
"grad_norm": 2.63857364654541, |
|
"learning_rate": 9.541373274217145e-05, |
|
"loss": 1.4261, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 4.467532467532467, |
|
"grad_norm": 2.3278229236602783, |
|
"learning_rate": 9.540058015882979e-05, |
|
"loss": 1.2848, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 4.474025974025974, |
|
"grad_norm": 2.334977388381958, |
|
"learning_rate": 9.538740965205808e-05, |
|
"loss": 1.3625, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 4.48051948051948, |
|
"grad_norm": 2.4834561347961426, |
|
"learning_rate": 9.537422122705585e-05, |
|
"loss": 1.302, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.487012987012987, |
|
"grad_norm": 2.0580313205718994, |
|
"learning_rate": 9.536101488902966e-05, |
|
"loss": 1.1823, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 4.4935064935064934, |
|
"grad_norm": 2.5352213382720947, |
|
"learning_rate": 9.534779064319318e-05, |
|
"loss": 1.3543, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 2.424487829208374, |
|
"learning_rate": 9.533454849476712e-05, |
|
"loss": 1.4382, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 4.5064935064935066, |
|
"grad_norm": 2.486064910888672, |
|
"learning_rate": 9.532128844897928e-05, |
|
"loss": 1.3491, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 4.512987012987013, |
|
"grad_norm": 2.4160115718841553, |
|
"learning_rate": 9.530801051106449e-05, |
|
"loss": 1.401, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 4.51948051948052, |
|
"grad_norm": 2.027177095413208, |
|
"learning_rate": 9.529471468626472e-05, |
|
"loss": 1.1387, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 4.525974025974026, |
|
"grad_norm": 2.3802859783172607, |
|
"learning_rate": 9.528140097982889e-05, |
|
"loss": 1.504, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 4.532467532467533, |
|
"grad_norm": 2.4218432903289795, |
|
"learning_rate": 9.526806939701309e-05, |
|
"loss": 1.3862, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 4.538961038961039, |
|
"grad_norm": 2.2717883586883545, |
|
"learning_rate": 9.52547199430804e-05, |
|
"loss": 1.1086, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 2.2099993228912354, |
|
"learning_rate": 9.524135262330098e-05, |
|
"loss": 1.3446, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.551948051948052, |
|
"grad_norm": 2.0593318939208984, |
|
"learning_rate": 9.522796744295202e-05, |
|
"loss": 1.0246, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 4.558441558441558, |
|
"grad_norm": 2.5161662101745605, |
|
"learning_rate": 9.52145644073178e-05, |
|
"loss": 1.51, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 4.564935064935065, |
|
"grad_norm": 2.523601531982422, |
|
"learning_rate": 9.520114352168958e-05, |
|
"loss": 1.4339, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 2.5355663299560547, |
|
"learning_rate": 9.518770479136578e-05, |
|
"loss": 1.3231, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 4.577922077922078, |
|
"grad_norm": 2.8767991065979004, |
|
"learning_rate": 9.517424822165175e-05, |
|
"loss": 1.5899, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 4.584415584415584, |
|
"grad_norm": 2.6482222080230713, |
|
"learning_rate": 9.516077381785994e-05, |
|
"loss": 1.337, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 4.590909090909091, |
|
"grad_norm": 2.7159810066223145, |
|
"learning_rate": 9.514728158530983e-05, |
|
"loss": 1.4175, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 4.597402597402597, |
|
"grad_norm": 2.59430193901062, |
|
"learning_rate": 9.513377152932796e-05, |
|
"loss": 1.4217, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 4.603896103896104, |
|
"grad_norm": 2.4445676803588867, |
|
"learning_rate": 9.512024365524787e-05, |
|
"loss": 1.4127, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 4.6103896103896105, |
|
"grad_norm": 2.483778953552246, |
|
"learning_rate": 9.510669796841014e-05, |
|
"loss": 1.4122, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.616883116883117, |
|
"grad_norm": 2.4489808082580566, |
|
"learning_rate": 9.509313447416242e-05, |
|
"loss": 1.5403, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 4.623376623376624, |
|
"grad_norm": 2.4049267768859863, |
|
"learning_rate": 9.507955317785934e-05, |
|
"loss": 1.4104, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 4.62987012987013, |
|
"grad_norm": 2.906292200088501, |
|
"learning_rate": 9.506595408486259e-05, |
|
"loss": 1.7337, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 4.636363636363637, |
|
"grad_norm": 2.2860770225524902, |
|
"learning_rate": 9.505233720054087e-05, |
|
"loss": 1.3166, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 4.642857142857143, |
|
"grad_norm": 2.660557270050049, |
|
"learning_rate": 9.503870253026991e-05, |
|
"loss": 1.4689, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 4.64935064935065, |
|
"grad_norm": 2.4646763801574707, |
|
"learning_rate": 9.502505007943248e-05, |
|
"loss": 1.4584, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 4.6558441558441555, |
|
"grad_norm": 2.287764072418213, |
|
"learning_rate": 9.501137985341832e-05, |
|
"loss": 1.4074, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 4.662337662337662, |
|
"grad_norm": 3.6270928382873535, |
|
"learning_rate": 9.499769185762425e-05, |
|
"loss": 1.5872, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 4.6688311688311686, |
|
"grad_norm": 2.5962812900543213, |
|
"learning_rate": 9.498398609745405e-05, |
|
"loss": 1.5825, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 4.675324675324675, |
|
"grad_norm": 2.382645845413208, |
|
"learning_rate": 9.497026257831855e-05, |
|
"loss": 1.3595, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.681818181818182, |
|
"grad_norm": 2.0103063583374023, |
|
"learning_rate": 9.49565213056356e-05, |
|
"loss": 1.1057, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 4.688311688311688, |
|
"grad_norm": 2.2708091735839844, |
|
"learning_rate": 9.494276228482998e-05, |
|
"loss": 1.3579, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 4.694805194805195, |
|
"grad_norm": 2.530606508255005, |
|
"learning_rate": 9.492898552133358e-05, |
|
"loss": 1.4606, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 4.701298701298701, |
|
"grad_norm": 2.033069372177124, |
|
"learning_rate": 9.491519102058522e-05, |
|
"loss": 1.1266, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 4.707792207792208, |
|
"grad_norm": 2.533560037612915, |
|
"learning_rate": 9.490137878803079e-05, |
|
"loss": 1.5121, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 4.714285714285714, |
|
"grad_norm": 2.3064355850219727, |
|
"learning_rate": 9.48875488291231e-05, |
|
"loss": 1.3564, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 4.720779220779221, |
|
"grad_norm": 2.3468894958496094, |
|
"learning_rate": 9.487370114932202e-05, |
|
"loss": 1.4063, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 2.2847111225128174, |
|
"learning_rate": 9.485983575409438e-05, |
|
"loss": 1.3642, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 4.733766233766234, |
|
"grad_norm": 2.188500165939331, |
|
"learning_rate": 9.484595264891402e-05, |
|
"loss": 1.2824, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 4.740259740259741, |
|
"grad_norm": 2.442934036254883, |
|
"learning_rate": 9.483205183926181e-05, |
|
"loss": 1.4042, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.746753246753247, |
|
"grad_norm": 2.4196629524230957, |
|
"learning_rate": 9.48181333306255e-05, |
|
"loss": 1.4626, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 4.753246753246753, |
|
"grad_norm": 2.290989637374878, |
|
"learning_rate": 9.480419712849995e-05, |
|
"loss": 1.2912, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 4.759740259740259, |
|
"grad_norm": 2.430495262145996, |
|
"learning_rate": 9.479024323838693e-05, |
|
"loss": 1.4117, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 4.766233766233766, |
|
"grad_norm": 2.0131633281707764, |
|
"learning_rate": 9.477627166579522e-05, |
|
"loss": 1.1689, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 4.7727272727272725, |
|
"grad_norm": 2.582155466079712, |
|
"learning_rate": 9.476228241624059e-05, |
|
"loss": 1.6276, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 4.779220779220779, |
|
"grad_norm": 2.3215701580047607, |
|
"learning_rate": 9.474827549524574e-05, |
|
"loss": 1.3326, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 4.785714285714286, |
|
"grad_norm": 2.2553138732910156, |
|
"learning_rate": 9.473425090834041e-05, |
|
"loss": 1.3568, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 4.792207792207792, |
|
"grad_norm": 2.5642342567443848, |
|
"learning_rate": 9.472020866106128e-05, |
|
"loss": 1.4779, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 4.798701298701299, |
|
"grad_norm": 2.465707778930664, |
|
"learning_rate": 9.470614875895201e-05, |
|
"loss": 1.4478, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 4.805194805194805, |
|
"grad_norm": 2.395329236984253, |
|
"learning_rate": 9.46920712075632e-05, |
|
"loss": 1.3339, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.811688311688312, |
|
"grad_norm": 2.5474603176116943, |
|
"learning_rate": 9.467797601245246e-05, |
|
"loss": 1.4824, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 4.818181818181818, |
|
"grad_norm": 2.498077154159546, |
|
"learning_rate": 9.466386317918436e-05, |
|
"loss": 1.4558, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 4.824675324675325, |
|
"grad_norm": 2.258089065551758, |
|
"learning_rate": 9.464973271333042e-05, |
|
"loss": 1.3571, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 4.8311688311688314, |
|
"grad_norm": 2.5701329708099365, |
|
"learning_rate": 9.463558462046912e-05, |
|
"loss": 1.5302, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 4.837662337662338, |
|
"grad_norm": 2.347843885421753, |
|
"learning_rate": 9.46214189061859e-05, |
|
"loss": 1.3368, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 4.8441558441558445, |
|
"grad_norm": 2.3211004734039307, |
|
"learning_rate": 9.460723557607316e-05, |
|
"loss": 1.3966, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 4.85064935064935, |
|
"grad_norm": 2.381417989730835, |
|
"learning_rate": 9.459303463573026e-05, |
|
"loss": 1.267, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 2.3821098804473877, |
|
"learning_rate": 9.457881609076352e-05, |
|
"loss": 1.4147, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 4.863636363636363, |
|
"grad_norm": 2.3875110149383545, |
|
"learning_rate": 9.456457994678616e-05, |
|
"loss": 1.3116, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 4.87012987012987, |
|
"grad_norm": 2.3099076747894287, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 1.2902, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.876623376623376, |
|
"grad_norm": 2.0546610355377197, |
|
"learning_rate": 9.45360548842874e-05, |
|
"loss": 1.1813, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 4.883116883116883, |
|
"grad_norm": 2.4475390911102295, |
|
"learning_rate": 9.452176597702725e-05, |
|
"loss": 1.5366, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 4.8896103896103895, |
|
"grad_norm": 2.145540237426758, |
|
"learning_rate": 9.450745949327896e-05, |
|
"loss": 1.1983, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 4.896103896103896, |
|
"grad_norm": 2.694192886352539, |
|
"learning_rate": 9.449313543869055e-05, |
|
"loss": 1.5095, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 4.902597402597403, |
|
"grad_norm": 2.1562998294830322, |
|
"learning_rate": 9.447879381891692e-05, |
|
"loss": 1.1382, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 4.909090909090909, |
|
"grad_norm": 2.4446427822113037, |
|
"learning_rate": 9.446443463961986e-05, |
|
"loss": 1.4053, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 4.915584415584416, |
|
"grad_norm": 2.055360794067383, |
|
"learning_rate": 9.445005790646819e-05, |
|
"loss": 1.3194, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 4.922077922077922, |
|
"grad_norm": 2.397583246231079, |
|
"learning_rate": 9.443566362513763e-05, |
|
"loss": 1.5682, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 4.928571428571429, |
|
"grad_norm": 2.185490846633911, |
|
"learning_rate": 9.442125180131078e-05, |
|
"loss": 1.4359, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 4.935064935064935, |
|
"grad_norm": 2.365079402923584, |
|
"learning_rate": 9.440682244067724e-05, |
|
"loss": 1.4141, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.941558441558442, |
|
"grad_norm": 2.510624408721924, |
|
"learning_rate": 9.439237554893344e-05, |
|
"loss": 1.5375, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 4.948051948051948, |
|
"grad_norm": 2.369779109954834, |
|
"learning_rate": 9.437791113178282e-05, |
|
"loss": 1.4662, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 4.954545454545455, |
|
"grad_norm": 2.063880443572998, |
|
"learning_rate": 9.43634291949357e-05, |
|
"loss": 1.3133, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 4.961038961038961, |
|
"grad_norm": 2.3315935134887695, |
|
"learning_rate": 9.434892974410932e-05, |
|
"loss": 1.4277, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 4.967532467532467, |
|
"grad_norm": 2.5497238636016846, |
|
"learning_rate": 9.433441278502783e-05, |
|
"loss": 1.4894, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 4.974025974025974, |
|
"grad_norm": 2.4642364978790283, |
|
"learning_rate": 9.431987832342228e-05, |
|
"loss": 1.4023, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 4.98051948051948, |
|
"grad_norm": 2.380721092224121, |
|
"learning_rate": 9.430532636503068e-05, |
|
"loss": 1.3227, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 4.987012987012987, |
|
"grad_norm": 2.2699685096740723, |
|
"learning_rate": 9.429075691559787e-05, |
|
"loss": 1.352, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 4.9935064935064934, |
|
"grad_norm": 2.156005382537842, |
|
"learning_rate": 9.427616998087568e-05, |
|
"loss": 1.3047, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1483.0968017578125, |
|
"learning_rate": 9.426156556662276e-05, |
|
"loss": 1.3802, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.0064935064935066, |
|
"grad_norm": 2.4799044132232666, |
|
"learning_rate": 9.424694367860473e-05, |
|
"loss": 1.214, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 5.012987012987013, |
|
"grad_norm": 3.611436128616333, |
|
"learning_rate": 9.423230432259409e-05, |
|
"loss": 1.1361, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 5.01948051948052, |
|
"grad_norm": 2.477595567703247, |
|
"learning_rate": 9.421764750437019e-05, |
|
"loss": 1.322, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 5.025974025974026, |
|
"grad_norm": 2.0933218002319336, |
|
"learning_rate": 9.420297322971933e-05, |
|
"loss": 1.1123, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 5.032467532467533, |
|
"grad_norm": 2.1223325729370117, |
|
"learning_rate": 9.418828150443467e-05, |
|
"loss": 1.2504, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 5.038961038961039, |
|
"grad_norm": 2.640428304672241, |
|
"learning_rate": 9.41735723343163e-05, |
|
"loss": 1.3819, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 5.045454545454546, |
|
"grad_norm": 2.309741258621216, |
|
"learning_rate": 9.415884572517113e-05, |
|
"loss": 1.2354, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 5.0519480519480515, |
|
"grad_norm": 2.4606881141662598, |
|
"learning_rate": 9.414410168281302e-05, |
|
"loss": 1.4091, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 5.058441558441558, |
|
"grad_norm": 2.1588926315307617, |
|
"learning_rate": 9.412934021306267e-05, |
|
"loss": 1.1284, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 5.064935064935065, |
|
"grad_norm": 2.3062281608581543, |
|
"learning_rate": 9.411456132174767e-05, |
|
"loss": 1.2039, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.071428571428571, |
|
"grad_norm": 2.493637800216675, |
|
"learning_rate": 9.40997650147025e-05, |
|
"loss": 1.2556, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 5.077922077922078, |
|
"grad_norm": 2.254223346710205, |
|
"learning_rate": 9.408495129776852e-05, |
|
"loss": 1.14, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 5.084415584415584, |
|
"grad_norm": 2.187127113342285, |
|
"learning_rate": 9.407012017679393e-05, |
|
"loss": 1.1906, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 5.090909090909091, |
|
"grad_norm": 2.017028570175171, |
|
"learning_rate": 9.405527165763384e-05, |
|
"loss": 1.0582, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 5.097402597402597, |
|
"grad_norm": 2.3100154399871826, |
|
"learning_rate": 9.404040574615018e-05, |
|
"loss": 1.3244, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 5.103896103896104, |
|
"grad_norm": 2.231184482574463, |
|
"learning_rate": 9.402552244821182e-05, |
|
"loss": 1.0768, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 5.1103896103896105, |
|
"grad_norm": 2.422355890274048, |
|
"learning_rate": 9.401062176969442e-05, |
|
"loss": 1.1453, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 5.116883116883117, |
|
"grad_norm": 2.3468523025512695, |
|
"learning_rate": 9.399570371648052e-05, |
|
"loss": 1.1517, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 5.123376623376624, |
|
"grad_norm": 2.1444785594940186, |
|
"learning_rate": 9.398076829445958e-05, |
|
"loss": 1.0645, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 5.12987012987013, |
|
"grad_norm": 2.26538348197937, |
|
"learning_rate": 9.396581550952781e-05, |
|
"loss": 1.1867, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.136363636363637, |
|
"grad_norm": 2.3012781143188477, |
|
"learning_rate": 9.395084536758838e-05, |
|
"loss": 1.1908, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 5.142857142857143, |
|
"grad_norm": 2.353574514389038, |
|
"learning_rate": 9.393585787455124e-05, |
|
"loss": 1.1686, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 5.14935064935065, |
|
"grad_norm": 2.434039354324341, |
|
"learning_rate": 9.392085303633323e-05, |
|
"loss": 1.1923, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 5.1558441558441555, |
|
"grad_norm": 2.4139668941497803, |
|
"learning_rate": 9.3905830858858e-05, |
|
"loss": 1.2742, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 5.162337662337662, |
|
"grad_norm": 2.5167605876922607, |
|
"learning_rate": 9.389079134805609e-05, |
|
"loss": 1.2763, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 5.1688311688311686, |
|
"grad_norm": 2.29848313331604, |
|
"learning_rate": 9.387573450986484e-05, |
|
"loss": 1.2501, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 5.175324675324675, |
|
"grad_norm": 2.4731240272521973, |
|
"learning_rate": 9.386066035022848e-05, |
|
"loss": 1.4086, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 5.181818181818182, |
|
"grad_norm": 2.226640224456787, |
|
"learning_rate": 9.384556887509802e-05, |
|
"loss": 1.1452, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 5.188311688311688, |
|
"grad_norm": 2.1757423877716064, |
|
"learning_rate": 9.383046009043134e-05, |
|
"loss": 1.2051, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 5.194805194805195, |
|
"grad_norm": 2.5131468772888184, |
|
"learning_rate": 9.381533400219318e-05, |
|
"loss": 1.3115, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.201298701298701, |
|
"grad_norm": 2.236072063446045, |
|
"learning_rate": 9.380019061635506e-05, |
|
"loss": 1.1337, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 5.207792207792208, |
|
"grad_norm": 2.25288987159729, |
|
"learning_rate": 9.378502993889533e-05, |
|
"loss": 1.2222, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 5.214285714285714, |
|
"grad_norm": 2.3511617183685303, |
|
"learning_rate": 9.37698519757992e-05, |
|
"loss": 1.228, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 5.220779220779221, |
|
"grad_norm": 2.281393051147461, |
|
"learning_rate": 9.375465673305869e-05, |
|
"loss": 1.1854, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 5.2272727272727275, |
|
"grad_norm": 2.451622247695923, |
|
"learning_rate": 9.373944421667265e-05, |
|
"loss": 1.2707, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 5.233766233766234, |
|
"grad_norm": 2.3030169010162354, |
|
"learning_rate": 9.372421443264671e-05, |
|
"loss": 1.0945, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 5.240259740259741, |
|
"grad_norm": 2.3412272930145264, |
|
"learning_rate": 9.370896738699339e-05, |
|
"loss": 1.1891, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 5.246753246753247, |
|
"grad_norm": 2.457958936691284, |
|
"learning_rate": 9.369370308573198e-05, |
|
"loss": 1.2034, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 5.253246753246753, |
|
"grad_norm": 2.3870041370391846, |
|
"learning_rate": 9.367842153488854e-05, |
|
"loss": 1.1308, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 5.259740259740259, |
|
"grad_norm": 2.373983860015869, |
|
"learning_rate": 9.366312274049602e-05, |
|
"loss": 1.2246, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.266233766233766, |
|
"grad_norm": 2.238525152206421, |
|
"learning_rate": 9.364780670859412e-05, |
|
"loss": 1.1794, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 5.2727272727272725, |
|
"grad_norm": 2.2688422203063965, |
|
"learning_rate": 9.363247344522939e-05, |
|
"loss": 1.3021, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 5.279220779220779, |
|
"grad_norm": 2.4562861919403076, |
|
"learning_rate": 9.361712295645515e-05, |
|
"loss": 1.2968, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 5.285714285714286, |
|
"grad_norm": 1.8892732858657837, |
|
"learning_rate": 9.360175524833153e-05, |
|
"loss": 0.895, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 5.292207792207792, |
|
"grad_norm": 2.4016454219818115, |
|
"learning_rate": 9.358637032692545e-05, |
|
"loss": 1.1788, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 5.298701298701299, |
|
"grad_norm": 2.198923349380493, |
|
"learning_rate": 9.357096819831064e-05, |
|
"loss": 1.1777, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 5.305194805194805, |
|
"grad_norm": 2.5778071880340576, |
|
"learning_rate": 9.355554886856762e-05, |
|
"loss": 1.2441, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 5.311688311688312, |
|
"grad_norm": 2.578562021255493, |
|
"learning_rate": 9.354011234378369e-05, |
|
"loss": 1.3336, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 5.318181818181818, |
|
"grad_norm": 2.29508376121521, |
|
"learning_rate": 9.352465863005296e-05, |
|
"loss": 1.3143, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 5.324675324675325, |
|
"grad_norm": 2.3751208782196045, |
|
"learning_rate": 9.35091877334763e-05, |
|
"loss": 1.3194, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.3311688311688314, |
|
"grad_norm": 2.456490993499756, |
|
"learning_rate": 9.349369966016134e-05, |
|
"loss": 1.2553, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 5.337662337662338, |
|
"grad_norm": 1.9089747667312622, |
|
"learning_rate": 9.347819441622261e-05, |
|
"loss": 0.9778, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 5.3441558441558445, |
|
"grad_norm": 2.301745891571045, |
|
"learning_rate": 9.346267200778126e-05, |
|
"loss": 1.1925, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 5.35064935064935, |
|
"grad_norm": 2.3186118602752686, |
|
"learning_rate": 9.344713244096533e-05, |
|
"loss": 1.2568, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 5.357142857142857, |
|
"grad_norm": 2.4224462509155273, |
|
"learning_rate": 9.343157572190957e-05, |
|
"loss": 1.2227, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 2.4902286529541016, |
|
"learning_rate": 9.341600185675554e-05, |
|
"loss": 1.2466, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 5.37012987012987, |
|
"grad_norm": 2.3658058643341064, |
|
"learning_rate": 9.340041085165155e-05, |
|
"loss": 1.288, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 5.376623376623376, |
|
"grad_norm": 2.256941318511963, |
|
"learning_rate": 9.33848027127527e-05, |
|
"loss": 1.2251, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 5.383116883116883, |
|
"grad_norm": 2.4632041454315186, |
|
"learning_rate": 9.336917744622081e-05, |
|
"loss": 1.3159, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 5.3896103896103895, |
|
"grad_norm": 2.1642262935638428, |
|
"learning_rate": 9.33535350582245e-05, |
|
"loss": 1.1923, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 5.396103896103896, |
|
"grad_norm": 2.154273748397827, |
|
"learning_rate": 9.333787555493914e-05, |
|
"loss": 1.1474, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 5.402597402597403, |
|
"grad_norm": 2.319180965423584, |
|
"learning_rate": 9.332219894254686e-05, |
|
"loss": 1.2664, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 5.409090909090909, |
|
"grad_norm": 2.0859668254852295, |
|
"learning_rate": 9.330650522723652e-05, |
|
"loss": 1.0003, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 5.415584415584416, |
|
"grad_norm": 2.426114082336426, |
|
"learning_rate": 9.329079441520377e-05, |
|
"loss": 1.249, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 5.422077922077922, |
|
"grad_norm": 2.3975894451141357, |
|
"learning_rate": 9.327506651265095e-05, |
|
"loss": 1.2747, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 5.428571428571429, |
|
"grad_norm": 2.561692476272583, |
|
"learning_rate": 9.325932152578725e-05, |
|
"loss": 1.1971, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 5.435064935064935, |
|
"grad_norm": 2.311870813369751, |
|
"learning_rate": 9.324355946082848e-05, |
|
"loss": 1.1388, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 5.441558441558442, |
|
"grad_norm": 2.2985804080963135, |
|
"learning_rate": 9.322778032399728e-05, |
|
"loss": 1.1915, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 5.448051948051948, |
|
"grad_norm": 2.5095090866088867, |
|
"learning_rate": 9.321198412152301e-05, |
|
"loss": 1.2289, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"grad_norm": 2.4977407455444336, |
|
"learning_rate": 9.319617085964176e-05, |
|
"loss": 1.1548, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.461038961038961, |
|
"grad_norm": 2.479334831237793, |
|
"learning_rate": 9.318034054459637e-05, |
|
"loss": 1.2881, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 5.467532467532467, |
|
"grad_norm": 2.316788911819458, |
|
"learning_rate": 9.316449318263635e-05, |
|
"loss": 1.0807, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 5.474025974025974, |
|
"grad_norm": 2.3556737899780273, |
|
"learning_rate": 9.314862878001803e-05, |
|
"loss": 1.3983, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 5.48051948051948, |
|
"grad_norm": 2.27823805809021, |
|
"learning_rate": 9.313274734300439e-05, |
|
"loss": 1.2266, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 5.487012987012987, |
|
"grad_norm": 2.213878870010376, |
|
"learning_rate": 9.31168488778652e-05, |
|
"loss": 1.1607, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 5.4935064935064934, |
|
"grad_norm": 2.411067485809326, |
|
"learning_rate": 9.310093339087692e-05, |
|
"loss": 1.3101, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 2.3228533267974854, |
|
"learning_rate": 9.308500088832272e-05, |
|
"loss": 1.1342, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 5.5064935064935066, |
|
"grad_norm": 2.511704683303833, |
|
"learning_rate": 9.30690513764925e-05, |
|
"loss": 1.3019, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 5.512987012987013, |
|
"grad_norm": 2.267284393310547, |
|
"learning_rate": 9.305308486168288e-05, |
|
"loss": 1.2113, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 5.51948051948052, |
|
"grad_norm": 2.3928427696228027, |
|
"learning_rate": 9.30371013501972e-05, |
|
"loss": 1.2584, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.525974025974026, |
|
"grad_norm": 2.4797680377960205, |
|
"learning_rate": 9.302110084834545e-05, |
|
"loss": 1.2511, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 5.532467532467533, |
|
"grad_norm": 2.280144214630127, |
|
"learning_rate": 9.300508336244444e-05, |
|
"loss": 1.3214, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 5.538961038961039, |
|
"grad_norm": 2.3917574882507324, |
|
"learning_rate": 9.298904889881757e-05, |
|
"loss": 1.3104, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 5.545454545454545, |
|
"grad_norm": 1.8771497011184692, |
|
"learning_rate": 9.297299746379502e-05, |
|
"loss": 1.0212, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 5.551948051948052, |
|
"grad_norm": 2.3260936737060547, |
|
"learning_rate": 9.295692906371363e-05, |
|
"loss": 1.2671, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 5.558441558441558, |
|
"grad_norm": 2.335958957672119, |
|
"learning_rate": 9.294084370491694e-05, |
|
"loss": 1.2515, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.564935064935065, |
|
"grad_norm": 2.451331853866577, |
|
"learning_rate": 9.292474139375522e-05, |
|
"loss": 1.0896, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 5.571428571428571, |
|
"grad_norm": 2.263322353363037, |
|
"learning_rate": 9.29086221365854e-05, |
|
"loss": 1.247, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 5.577922077922078, |
|
"grad_norm": 2.4265804290771484, |
|
"learning_rate": 9.289248593977109e-05, |
|
"loss": 1.2482, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 5.584415584415584, |
|
"grad_norm": 2.4868714809417725, |
|
"learning_rate": 9.287633280968261e-05, |
|
"loss": 1.2864, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.590909090909091, |
|
"grad_norm": 2.502161979675293, |
|
"learning_rate": 9.286016275269698e-05, |
|
"loss": 1.2699, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 5.597402597402597, |
|
"grad_norm": 2.301053762435913, |
|
"learning_rate": 9.284397577519788e-05, |
|
"loss": 1.2422, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 5.603896103896104, |
|
"grad_norm": 2.7383460998535156, |
|
"learning_rate": 9.282777188357565e-05, |
|
"loss": 1.3225, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 5.6103896103896105, |
|
"grad_norm": 2.5754849910736084, |
|
"learning_rate": 9.281155108422733e-05, |
|
"loss": 1.3382, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 5.616883116883117, |
|
"grad_norm": 2.30465030670166, |
|
"learning_rate": 9.279531338355666e-05, |
|
"loss": 1.2114, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 5.623376623376624, |
|
"grad_norm": 2.1572651863098145, |
|
"learning_rate": 9.2779058787974e-05, |
|
"loss": 1.2831, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 5.62987012987013, |
|
"grad_norm": 2.3018293380737305, |
|
"learning_rate": 9.276278730389642e-05, |
|
"loss": 1.1312, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 5.636363636363637, |
|
"grad_norm": 2.3740503787994385, |
|
"learning_rate": 9.274649893774767e-05, |
|
"loss": 1.2546, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 5.642857142857143, |
|
"grad_norm": 2.359429121017456, |
|
"learning_rate": 9.273019369595809e-05, |
|
"loss": 1.2289, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 5.64935064935065, |
|
"grad_norm": 2.7115275859832764, |
|
"learning_rate": 9.271387158496476e-05, |
|
"loss": 1.3554, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.6558441558441555, |
|
"grad_norm": 1.951027274131775, |
|
"learning_rate": 9.269753261121138e-05, |
|
"loss": 1.1395, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 5.662337662337662, |
|
"grad_norm": 2.2826426029205322, |
|
"learning_rate": 9.268117678114834e-05, |
|
"loss": 1.2125, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 5.6688311688311686, |
|
"grad_norm": 2.1922526359558105, |
|
"learning_rate": 9.266480410123264e-05, |
|
"loss": 1.0959, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 5.675324675324675, |
|
"grad_norm": 2.3663859367370605, |
|
"learning_rate": 9.264841457792795e-05, |
|
"loss": 1.2634, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 5.681818181818182, |
|
"grad_norm": 2.077533483505249, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 1.1805, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.688311688311688, |
|
"grad_norm": 2.255629062652588, |
|
"learning_rate": 9.26155850270396e-05, |
|
"loss": 1.3269, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 5.694805194805195, |
|
"grad_norm": 2.387958288192749, |
|
"learning_rate": 9.259914501241652e-05, |
|
"loss": 1.2001, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 5.701298701298701, |
|
"grad_norm": 2.3133974075317383, |
|
"learning_rate": 9.258268818032561e-05, |
|
"loss": 1.2889, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 5.707792207792208, |
|
"grad_norm": 2.17250919342041, |
|
"learning_rate": 9.256621453726379e-05, |
|
"loss": 1.2761, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 2.288163900375366, |
|
"learning_rate": 9.254972408973461e-05, |
|
"loss": 1.2545, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.720779220779221, |
|
"grad_norm": 2.2603843212127686, |
|
"learning_rate": 9.25332168442482e-05, |
|
"loss": 1.2539, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 5.7272727272727275, |
|
"grad_norm": 2.111243963241577, |
|
"learning_rate": 9.251669280732137e-05, |
|
"loss": 1.2284, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 5.733766233766234, |
|
"grad_norm": 9.176555633544922, |
|
"learning_rate": 9.250015198547757e-05, |
|
"loss": 1.3237, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 5.740259740259741, |
|
"grad_norm": 2.652176856994629, |
|
"learning_rate": 9.248359438524683e-05, |
|
"loss": 1.3154, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 5.746753246753247, |
|
"grad_norm": 2.4932117462158203, |
|
"learning_rate": 9.246702001316583e-05, |
|
"loss": 1.2712, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 5.753246753246753, |
|
"grad_norm": 2.2778451442718506, |
|
"learning_rate": 9.245042887577788e-05, |
|
"loss": 1.3088, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 5.759740259740259, |
|
"grad_norm": 2.240194082260132, |
|
"learning_rate": 9.243382097963291e-05, |
|
"loss": 1.1532, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 5.766233766233766, |
|
"grad_norm": 2.4618711471557617, |
|
"learning_rate": 9.241719633128743e-05, |
|
"loss": 1.4605, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 5.7727272727272725, |
|
"grad_norm": 2.5479254722595215, |
|
"learning_rate": 9.24005549373046e-05, |
|
"loss": 1.3666, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 5.779220779220779, |
|
"grad_norm": 2.277982473373413, |
|
"learning_rate": 9.238389680425416e-05, |
|
"loss": 1.3209, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.785714285714286, |
|
"grad_norm": 2.3099091053009033, |
|
"learning_rate": 9.236722193871252e-05, |
|
"loss": 1.2911, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 5.792207792207792, |
|
"grad_norm": 2.1005160808563232, |
|
"learning_rate": 9.23505303472626e-05, |
|
"loss": 1.1609, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 5.798701298701299, |
|
"grad_norm": 2.325089693069458, |
|
"learning_rate": 9.233382203649401e-05, |
|
"loss": 1.2268, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 5.805194805194805, |
|
"grad_norm": 2.2837464809417725, |
|
"learning_rate": 9.231709701300293e-05, |
|
"loss": 1.2928, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 5.811688311688312, |
|
"grad_norm": 2.1417577266693115, |
|
"learning_rate": 9.230035528339211e-05, |
|
"loss": 1.2102, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 5.818181818181818, |
|
"grad_norm": 2.3167195320129395, |
|
"learning_rate": 9.228359685427095e-05, |
|
"loss": 1.2982, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 5.824675324675325, |
|
"grad_norm": 2.281616449356079, |
|
"learning_rate": 9.226682173225537e-05, |
|
"loss": 1.2442, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 5.8311688311688314, |
|
"grad_norm": 2.245262622833252, |
|
"learning_rate": 9.225002992396796e-05, |
|
"loss": 1.2757, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 5.837662337662338, |
|
"grad_norm": 2.2756450176239014, |
|
"learning_rate": 9.223322143603785e-05, |
|
"loss": 1.1611, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 5.8441558441558445, |
|
"grad_norm": 2.2296695709228516, |
|
"learning_rate": 9.221639627510076e-05, |
|
"loss": 1.2486, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.85064935064935, |
|
"grad_norm": 2.456972599029541, |
|
"learning_rate": 9.2199554447799e-05, |
|
"loss": 1.2553, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 5.857142857142857, |
|
"grad_norm": 2.2772555351257324, |
|
"learning_rate": 9.218269596078146e-05, |
|
"loss": 1.1621, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 5.863636363636363, |
|
"grad_norm": 2.2198710441589355, |
|
"learning_rate": 9.216582082070358e-05, |
|
"loss": 1.1448, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 5.87012987012987, |
|
"grad_norm": 2.201732873916626, |
|
"learning_rate": 9.214892903422744e-05, |
|
"loss": 1.1754, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 5.876623376623376, |
|
"grad_norm": 2.263493061065674, |
|
"learning_rate": 9.213202060802161e-05, |
|
"loss": 1.3105, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.883116883116883, |
|
"grad_norm": 2.3806960582733154, |
|
"learning_rate": 9.21150955487613e-05, |
|
"loss": 1.2363, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 5.8896103896103895, |
|
"grad_norm": 2.324129104614258, |
|
"learning_rate": 9.209815386312824e-05, |
|
"loss": 1.3469, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 5.896103896103896, |
|
"grad_norm": 2.4612674713134766, |
|
"learning_rate": 9.208119555781074e-05, |
|
"loss": 1.3424, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 5.902597402597403, |
|
"grad_norm": 2.341952323913574, |
|
"learning_rate": 9.206422063950367e-05, |
|
"loss": 1.2235, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 5.909090909090909, |
|
"grad_norm": 2.305021047592163, |
|
"learning_rate": 9.204722911490846e-05, |
|
"loss": 1.3399, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.915584415584416, |
|
"grad_norm": 2.174239158630371, |
|
"learning_rate": 9.203022099073309e-05, |
|
"loss": 1.106, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 5.922077922077922, |
|
"grad_norm": 2.136263847351074, |
|
"learning_rate": 9.201319627369211e-05, |
|
"loss": 1.185, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 5.928571428571429, |
|
"grad_norm": 2.0439507961273193, |
|
"learning_rate": 9.199615497050659e-05, |
|
"loss": 1.1936, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 5.935064935064935, |
|
"grad_norm": 2.331047773361206, |
|
"learning_rate": 9.19790970879042e-05, |
|
"loss": 1.2955, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 5.941558441558442, |
|
"grad_norm": 2.1538991928100586, |
|
"learning_rate": 9.19620226326191e-05, |
|
"loss": 1.2146, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 5.948051948051948, |
|
"grad_norm": 2.235663414001465, |
|
"learning_rate": 9.194493161139199e-05, |
|
"loss": 1.2987, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 5.954545454545455, |
|
"grad_norm": 2.266671657562256, |
|
"learning_rate": 9.192782403097018e-05, |
|
"loss": 1.2618, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 5.961038961038961, |
|
"grad_norm": 2.287224292755127, |
|
"learning_rate": 9.191069989810744e-05, |
|
"loss": 1.2801, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 5.967532467532467, |
|
"grad_norm": 2.454397678375244, |
|
"learning_rate": 9.189355921956412e-05, |
|
"loss": 1.3529, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 5.974025974025974, |
|
"grad_norm": 2.4021215438842773, |
|
"learning_rate": 9.187640200210708e-05, |
|
"loss": 1.2859, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.98051948051948, |
|
"grad_norm": 2.4222583770751953, |
|
"learning_rate": 9.185922825250974e-05, |
|
"loss": 1.3745, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 5.987012987012987, |
|
"grad_norm": 1.9028486013412476, |
|
"learning_rate": 9.1842037977552e-05, |
|
"loss": 1.0603, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 5.9935064935064934, |
|
"grad_norm": 2.0316622257232666, |
|
"learning_rate": 9.182483118402033e-05, |
|
"loss": 1.1392, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 264.6945495605469, |
|
"learning_rate": 9.180760787870765e-05, |
|
"loss": 1.1341, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 6.0064935064935066, |
|
"grad_norm": 1.8904297351837158, |
|
"learning_rate": 9.179036806841352e-05, |
|
"loss": 0.9749, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 6.012987012987013, |
|
"grad_norm": 1.7428091764450073, |
|
"learning_rate": 9.17731117599439e-05, |
|
"loss": 0.857, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 6.01948051948052, |
|
"grad_norm": 2.1916327476501465, |
|
"learning_rate": 9.175583896011131e-05, |
|
"loss": 1.1159, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 6.025974025974026, |
|
"grad_norm": 2.009938955307007, |
|
"learning_rate": 9.173854967573479e-05, |
|
"loss": 0.985, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 6.032467532467533, |
|
"grad_norm": 2.4173030853271484, |
|
"learning_rate": 9.172124391363985e-05, |
|
"loss": 1.1468, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 6.038961038961039, |
|
"grad_norm": 2.157717227935791, |
|
"learning_rate": 9.170392168065857e-05, |
|
"loss": 1.0836, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 6.045454545454546, |
|
"grad_norm": 1.9934571981430054, |
|
"learning_rate": 9.168658298362946e-05, |
|
"loss": 1.0066, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 6.0519480519480515, |
|
"grad_norm": 2.120309352874756, |
|
"learning_rate": 9.166922782939758e-05, |
|
"loss": 1.1275, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 6.058441558441558, |
|
"grad_norm": 2.1822173595428467, |
|
"learning_rate": 9.165185622481447e-05, |
|
"loss": 1.1083, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 6.064935064935065, |
|
"grad_norm": 2.1950483322143555, |
|
"learning_rate": 9.163446817673817e-05, |
|
"loss": 1.0423, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 6.071428571428571, |
|
"grad_norm": 2.231881856918335, |
|
"learning_rate": 9.161706369203317e-05, |
|
"loss": 1.086, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 6.077922077922078, |
|
"grad_norm": 2.369540214538574, |
|
"learning_rate": 9.159964277757054e-05, |
|
"loss": 0.9978, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.084415584415584, |
|
"grad_norm": 2.1250226497650146, |
|
"learning_rate": 9.158220544022773e-05, |
|
"loss": 0.9875, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 6.090909090909091, |
|
"grad_norm": 2.21665096282959, |
|
"learning_rate": 9.156475168688877e-05, |
|
"loss": 1.0885, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 6.097402597402597, |
|
"grad_norm": 1.7771512269973755, |
|
"learning_rate": 9.154728152444408e-05, |
|
"loss": 0.789, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 6.103896103896104, |
|
"grad_norm": 2.3178699016571045, |
|
"learning_rate": 9.152979495979063e-05, |
|
"loss": 1.0956, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.1103896103896105, |
|
"grad_norm": 2.534238576889038, |
|
"learning_rate": 9.151229199983184e-05, |
|
"loss": 1.092, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 6.116883116883117, |
|
"grad_norm": 2.4788708686828613, |
|
"learning_rate": 9.14947726514776e-05, |
|
"loss": 1.1408, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 6.123376623376624, |
|
"grad_norm": 2.188908576965332, |
|
"learning_rate": 9.147723692164427e-05, |
|
"loss": 1.0523, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 6.12987012987013, |
|
"grad_norm": 1.940743327140808, |
|
"learning_rate": 9.145968481725467e-05, |
|
"loss": 0.8914, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 6.136363636363637, |
|
"grad_norm": 2.2595057487487793, |
|
"learning_rate": 9.14421163452381e-05, |
|
"loss": 1.064, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 6.142857142857143, |
|
"grad_norm": 2.3622570037841797, |
|
"learning_rate": 9.142453151253032e-05, |
|
"loss": 1.0703, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 6.14935064935065, |
|
"grad_norm": 2.097458600997925, |
|
"learning_rate": 9.140693032607353e-05, |
|
"loss": 0.9451, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 6.1558441558441555, |
|
"grad_norm": 2.064142942428589, |
|
"learning_rate": 9.138931279281639e-05, |
|
"loss": 0.9752, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 6.162337662337662, |
|
"grad_norm": 2.3236732482910156, |
|
"learning_rate": 9.137167891971407e-05, |
|
"loss": 1.0357, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 6.1688311688311686, |
|
"grad_norm": 2.375629186630249, |
|
"learning_rate": 9.135402871372808e-05, |
|
"loss": 1.1145, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.175324675324675, |
|
"grad_norm": 2.575246572494507, |
|
"learning_rate": 9.13363621818265e-05, |
|
"loss": 1.2448, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 6.181818181818182, |
|
"grad_norm": 2.221675395965576, |
|
"learning_rate": 9.131867933098378e-05, |
|
"loss": 1.0203, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 6.188311688311688, |
|
"grad_norm": 2.4579875469207764, |
|
"learning_rate": 9.13009801681808e-05, |
|
"loss": 1.1714, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 6.194805194805195, |
|
"grad_norm": 2.4114532470703125, |
|
"learning_rate": 9.128326470040495e-05, |
|
"loss": 1.0732, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 6.201298701298701, |
|
"grad_norm": 2.374720335006714, |
|
"learning_rate": 9.126553293464998e-05, |
|
"loss": 1.0896, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 6.207792207792208, |
|
"grad_norm": 2.4675614833831787, |
|
"learning_rate": 9.124778487791615e-05, |
|
"loss": 1.1355, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 6.214285714285714, |
|
"grad_norm": 2.0805985927581787, |
|
"learning_rate": 9.123002053721005e-05, |
|
"loss": 1.0721, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 6.220779220779221, |
|
"grad_norm": 2.3115074634552, |
|
"learning_rate": 9.121223991954484e-05, |
|
"loss": 1.0833, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 6.2272727272727275, |
|
"grad_norm": 2.4905924797058105, |
|
"learning_rate": 9.119444303193996e-05, |
|
"loss": 1.0787, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 6.233766233766234, |
|
"grad_norm": 2.3526439666748047, |
|
"learning_rate": 9.117662988142138e-05, |
|
"loss": 1.1503, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.240259740259741, |
|
"grad_norm": 2.615757703781128, |
|
"learning_rate": 9.115880047502142e-05, |
|
"loss": 1.1554, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 6.246753246753247, |
|
"grad_norm": 2.563284158706665, |
|
"learning_rate": 9.114095481977888e-05, |
|
"loss": 1.3076, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 6.253246753246753, |
|
"grad_norm": 2.29913592338562, |
|
"learning_rate": 9.112309292273891e-05, |
|
"loss": 1.1206, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 6.259740259740259, |
|
"grad_norm": 2.1598641872406006, |
|
"learning_rate": 9.110521479095312e-05, |
|
"loss": 1.0892, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 6.266233766233766, |
|
"grad_norm": 2.0765597820281982, |
|
"learning_rate": 9.108732043147952e-05, |
|
"loss": 1.013, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 6.2727272727272725, |
|
"grad_norm": 2.281052589416504, |
|
"learning_rate": 9.10694098513825e-05, |
|
"loss": 1.1901, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 6.279220779220779, |
|
"grad_norm": 2.169802665710449, |
|
"learning_rate": 9.10514830577329e-05, |
|
"loss": 1.0905, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 6.285714285714286, |
|
"grad_norm": 2.2257354259490967, |
|
"learning_rate": 9.103354005760791e-05, |
|
"loss": 1.0368, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 6.292207792207792, |
|
"grad_norm": 2.169473648071289, |
|
"learning_rate": 9.101558085809114e-05, |
|
"loss": 1.0166, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 6.298701298701299, |
|
"grad_norm": 2.37520694732666, |
|
"learning_rate": 9.099760546627261e-05, |
|
"loss": 1.1822, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 6.305194805194805, |
|
"grad_norm": 2.1943604946136475, |
|
"learning_rate": 9.097961388924873e-05, |
|
"loss": 1.1304, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 6.311688311688312, |
|
"grad_norm": 2.553886651992798, |
|
"learning_rate": 9.096160613412228e-05, |
|
"loss": 1.3013, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 6.318181818181818, |
|
"grad_norm": 2.3251588344573975, |
|
"learning_rate": 9.094358220800243e-05, |
|
"loss": 1.0648, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 6.324675324675325, |
|
"grad_norm": 2.508411407470703, |
|
"learning_rate": 9.092554211800474e-05, |
|
"loss": 1.1993, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 6.3311688311688314, |
|
"grad_norm": 2.10422682762146, |
|
"learning_rate": 9.090748587125118e-05, |
|
"loss": 1.1125, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 6.337662337662338, |
|
"grad_norm": 2.246373176574707, |
|
"learning_rate": 9.088941347487003e-05, |
|
"loss": 1.1525, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 6.3441558441558445, |
|
"grad_norm": 2.19040846824646, |
|
"learning_rate": 9.0871324935996e-05, |
|
"loss": 1.0981, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 6.35064935064935, |
|
"grad_norm": 2.4398250579833984, |
|
"learning_rate": 9.085322026177017e-05, |
|
"loss": 1.2318, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 6.357142857142857, |
|
"grad_norm": 2.36503267288208, |
|
"learning_rate": 9.083509945933997e-05, |
|
"loss": 1.1359, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"grad_norm": 2.545269012451172, |
|
"learning_rate": 9.081696253585921e-05, |
|
"loss": 1.0359, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.37012987012987, |
|
"grad_norm": 2.456841468811035, |
|
"learning_rate": 9.079880949848805e-05, |
|
"loss": 1.1641, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 6.376623376623376, |
|
"grad_norm": 2.2646665573120117, |
|
"learning_rate": 9.078064035439301e-05, |
|
"loss": 1.1425, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 6.383116883116883, |
|
"grad_norm": 2.244145154953003, |
|
"learning_rate": 9.076245511074703e-05, |
|
"loss": 1.0961, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 6.3896103896103895, |
|
"grad_norm": 2.3785531520843506, |
|
"learning_rate": 9.074425377472931e-05, |
|
"loss": 1.1008, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 6.396103896103896, |
|
"grad_norm": 2.486354351043701, |
|
"learning_rate": 9.072603635352548e-05, |
|
"loss": 1.1167, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 6.402597402597403, |
|
"grad_norm": 2.5050578117370605, |
|
"learning_rate": 9.070780285432745e-05, |
|
"loss": 1.1885, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 6.409090909090909, |
|
"grad_norm": 2.260310649871826, |
|
"learning_rate": 9.068955328433355e-05, |
|
"loss": 1.1354, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 6.415584415584416, |
|
"grad_norm": 2.1151485443115234, |
|
"learning_rate": 9.067128765074842e-05, |
|
"loss": 1.0162, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 6.422077922077922, |
|
"grad_norm": 2.35497784614563, |
|
"learning_rate": 9.065300596078303e-05, |
|
"loss": 1.0928, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 6.428571428571429, |
|
"grad_norm": 2.429023504257202, |
|
"learning_rate": 9.06347082216547e-05, |
|
"loss": 1.1322, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 6.435064935064935, |
|
"grad_norm": 2.556748867034912, |
|
"learning_rate": 9.06163944405871e-05, |
|
"loss": 1.208, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 6.441558441558442, |
|
"grad_norm": 2.451526403427124, |
|
"learning_rate": 9.059806462481023e-05, |
|
"loss": 1.0806, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 6.448051948051948, |
|
"grad_norm": 2.4315357208251953, |
|
"learning_rate": 9.057971878156036e-05, |
|
"loss": 1.2842, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 6.454545454545454, |
|
"grad_norm": 2.3418030738830566, |
|
"learning_rate": 9.056135691808019e-05, |
|
"loss": 1.1049, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 6.461038961038961, |
|
"grad_norm": 2.454900026321411, |
|
"learning_rate": 9.054297904161868e-05, |
|
"loss": 1.2332, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 6.467532467532467, |
|
"grad_norm": 2.3480823040008545, |
|
"learning_rate": 9.052458515943111e-05, |
|
"loss": 1.0631, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 6.474025974025974, |
|
"grad_norm": 2.2208402156829834, |
|
"learning_rate": 9.050617527877911e-05, |
|
"loss": 1.1265, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 6.48051948051948, |
|
"grad_norm": 2.285851240158081, |
|
"learning_rate": 9.048774940693062e-05, |
|
"loss": 1.0793, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 6.487012987012987, |
|
"grad_norm": 1.9776757955551147, |
|
"learning_rate": 9.046930755115985e-05, |
|
"loss": 0.8993, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 6.4935064935064934, |
|
"grad_norm": 2.425506591796875, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.183, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 33, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 776174615424000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|