|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 3105, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004830917874396135, |
|
"grad_norm": 0.4590633809566498, |
|
"learning_rate": 1.3986464711569449e-05, |
|
"loss": 3.9553, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00966183574879227, |
|
"grad_norm": 0.562667965888977, |
|
"learning_rate": 1.396390589751853e-05, |
|
"loss": 3.8462, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014492753623188406, |
|
"grad_norm": 0.5178580284118652, |
|
"learning_rate": 1.3941347083467613e-05, |
|
"loss": 3.7563, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01932367149758454, |
|
"grad_norm": 0.549095869064331, |
|
"learning_rate": 1.3918788269416693e-05, |
|
"loss": 3.5661, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024154589371980676, |
|
"grad_norm": 0.5863974094390869, |
|
"learning_rate": 1.3896229455365775e-05, |
|
"loss": 3.3998, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028985507246376812, |
|
"grad_norm": 0.6717987060546875, |
|
"learning_rate": 1.3873670641314857e-05, |
|
"loss": 3.2327, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.033816425120772944, |
|
"grad_norm": 0.6742504835128784, |
|
"learning_rate": 1.3851111827263939e-05, |
|
"loss": 3.0419, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03864734299516908, |
|
"grad_norm": 0.7488217949867249, |
|
"learning_rate": 1.382855301321302e-05, |
|
"loss": 2.8092, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 0.7981260418891907, |
|
"learning_rate": 1.3805994199162101e-05, |
|
"loss": 2.5637, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04830917874396135, |
|
"grad_norm": 0.7837016582489014, |
|
"learning_rate": 1.3783435385111182e-05, |
|
"loss": 2.311, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05314009661835749, |
|
"grad_norm": 0.8491142988204956, |
|
"learning_rate": 1.3760876571060263e-05, |
|
"loss": 2.0814, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.057971014492753624, |
|
"grad_norm": 0.7609491348266602, |
|
"learning_rate": 1.3738317757009345e-05, |
|
"loss": 1.7811, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06280193236714976, |
|
"grad_norm": 0.7346836924552917, |
|
"learning_rate": 1.3715758942958427e-05, |
|
"loss": 1.5668, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06763285024154589, |
|
"grad_norm": 0.7201610803604126, |
|
"learning_rate": 1.369320012890751e-05, |
|
"loss": 1.3152, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07246376811594203, |
|
"grad_norm": 0.6400141716003418, |
|
"learning_rate": 1.3670641314856591e-05, |
|
"loss": 1.0742, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07729468599033816, |
|
"grad_norm": 0.38841813802719116, |
|
"learning_rate": 1.3648082500805672e-05, |
|
"loss": 0.9316, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0821256038647343, |
|
"grad_norm": 0.4104098677635193, |
|
"learning_rate": 1.3625523686754754e-05, |
|
"loss": 0.8594, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.5033922791481018, |
|
"learning_rate": 1.3602964872703834e-05, |
|
"loss": 0.7873, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09178743961352658, |
|
"grad_norm": 0.3223589360713959, |
|
"learning_rate": 1.3580406058652916e-05, |
|
"loss": 0.7265, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0966183574879227, |
|
"grad_norm": 0.31837838888168335, |
|
"learning_rate": 1.3557847244601998e-05, |
|
"loss": 0.7056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10144927536231885, |
|
"grad_norm": 0.35547807812690735, |
|
"learning_rate": 1.353528843055108e-05, |
|
"loss": 0.6684, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10628019323671498, |
|
"grad_norm": 0.3624265789985657, |
|
"learning_rate": 1.351272961650016e-05, |
|
"loss": 0.6424, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 0.37934672832489014, |
|
"learning_rate": 1.3490170802449242e-05, |
|
"loss": 0.6473, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11594202898550725, |
|
"grad_norm": 0.43373095989227295, |
|
"learning_rate": 1.3467611988398324e-05, |
|
"loss": 0.6108, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12077294685990338, |
|
"grad_norm": 0.46837344765663147, |
|
"learning_rate": 1.3445053174347406e-05, |
|
"loss": 0.5718, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12560386473429952, |
|
"grad_norm": 0.4655485153198242, |
|
"learning_rate": 1.3422494360296488e-05, |
|
"loss": 0.5618, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 0.5438677072525024, |
|
"learning_rate": 1.3399935546245569e-05, |
|
"loss": 0.5834, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13526570048309178, |
|
"grad_norm": 0.5986974239349365, |
|
"learning_rate": 1.3377376732194649e-05, |
|
"loss": 0.51, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14009661835748793, |
|
"grad_norm": 0.7286536693572998, |
|
"learning_rate": 1.3354817918143731e-05, |
|
"loss": 0.4704, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 0.9337557554244995, |
|
"learning_rate": 1.3332259104092813e-05, |
|
"loss": 0.4379, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1497584541062802, |
|
"grad_norm": 1.0971410274505615, |
|
"learning_rate": 1.3309700290041895e-05, |
|
"loss": 0.3994, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15458937198067632, |
|
"grad_norm": 1.5142974853515625, |
|
"learning_rate": 1.3287141475990977e-05, |
|
"loss": 0.3397, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15942028985507245, |
|
"grad_norm": 0.5735320448875427, |
|
"learning_rate": 1.3264582661940057e-05, |
|
"loss": 0.3047, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1642512077294686, |
|
"grad_norm": 0.31310656666755676, |
|
"learning_rate": 1.324202384788914e-05, |
|
"loss": 0.2761, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16908212560386474, |
|
"grad_norm": 0.3159743845462799, |
|
"learning_rate": 1.3219465033838221e-05, |
|
"loss": 0.2584, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.2747821509838104, |
|
"learning_rate": 1.3196906219787303e-05, |
|
"loss": 0.2696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.178743961352657, |
|
"grad_norm": 0.26819175481796265, |
|
"learning_rate": 1.3174347405736383e-05, |
|
"loss": 0.2633, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18357487922705315, |
|
"grad_norm": 0.27827367186546326, |
|
"learning_rate": 1.3151788591685465e-05, |
|
"loss": 0.2587, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18840579710144928, |
|
"grad_norm": 0.30673256516456604, |
|
"learning_rate": 1.3129229777634546e-05, |
|
"loss": 0.2749, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1932367149758454, |
|
"grad_norm": 0.28767552971839905, |
|
"learning_rate": 1.3106670963583628e-05, |
|
"loss": 0.2527, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19806763285024154, |
|
"grad_norm": 0.2788391709327698, |
|
"learning_rate": 1.308411214953271e-05, |
|
"loss": 0.2548, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2028985507246377, |
|
"grad_norm": 0.26774516701698303, |
|
"learning_rate": 1.3061553335481792e-05, |
|
"loss": 0.2426, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20772946859903382, |
|
"grad_norm": 0.3280729651451111, |
|
"learning_rate": 1.3038994521430874e-05, |
|
"loss": 0.2343, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.21256038647342995, |
|
"grad_norm": 0.28210124373435974, |
|
"learning_rate": 1.3016435707379956e-05, |
|
"loss": 0.2385, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.2706020176410675, |
|
"learning_rate": 1.2993876893329036e-05, |
|
"loss": 0.2418, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.2814071476459503, |
|
"learning_rate": 1.2971318079278118e-05, |
|
"loss": 0.2309, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22705314009661837, |
|
"grad_norm": 0.311310738325119, |
|
"learning_rate": 1.2948759265227198e-05, |
|
"loss": 0.239, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2318840579710145, |
|
"grad_norm": 0.31521573662757874, |
|
"learning_rate": 1.292620045117628e-05, |
|
"loss": 0.2271, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23671497584541062, |
|
"grad_norm": 0.3365338146686554, |
|
"learning_rate": 1.2903641637125362e-05, |
|
"loss": 0.2418, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24154589371980675, |
|
"grad_norm": 0.32416385412216187, |
|
"learning_rate": 1.2881082823074444e-05, |
|
"loss": 0.2282, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2463768115942029, |
|
"grad_norm": 0.3272862434387207, |
|
"learning_rate": 1.2858524009023525e-05, |
|
"loss": 0.2319, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.25120772946859904, |
|
"grad_norm": 0.34287795424461365, |
|
"learning_rate": 1.2835965194972607e-05, |
|
"loss": 0.2529, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2560386473429952, |
|
"grad_norm": 0.338498055934906, |
|
"learning_rate": 1.2813406380921689e-05, |
|
"loss": 0.2216, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.34228625893592834, |
|
"learning_rate": 1.279084756687077e-05, |
|
"loss": 0.2306, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.26570048309178745, |
|
"grad_norm": 0.38022157549858093, |
|
"learning_rate": 1.2768288752819853e-05, |
|
"loss": 0.2091, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.27053140096618356, |
|
"grad_norm": 0.35013625025749207, |
|
"learning_rate": 1.2745729938768935e-05, |
|
"loss": 0.2219, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2753623188405797, |
|
"grad_norm": 0.3915255665779114, |
|
"learning_rate": 1.2723171124718013e-05, |
|
"loss": 0.202, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.28019323671497587, |
|
"grad_norm": 0.4278201758861542, |
|
"learning_rate": 1.2700612310667095e-05, |
|
"loss": 0.2223, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.28502415458937197, |
|
"grad_norm": 0.4377511441707611, |
|
"learning_rate": 1.2678053496616177e-05, |
|
"loss": 0.2001, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2898550724637681, |
|
"grad_norm": 0.44731107354164124, |
|
"learning_rate": 1.265549468256526e-05, |
|
"loss": 0.1884, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2946859903381642, |
|
"grad_norm": 0.4644255042076111, |
|
"learning_rate": 1.2632935868514341e-05, |
|
"loss": 0.195, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2995169082125604, |
|
"grad_norm": 0.46685394644737244, |
|
"learning_rate": 1.2610377054463423e-05, |
|
"loss": 0.1867, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 0.484323650598526, |
|
"learning_rate": 1.2587818240412503e-05, |
|
"loss": 0.1855, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.30917874396135264, |
|
"grad_norm": 0.4667232632637024, |
|
"learning_rate": 1.2565259426361585e-05, |
|
"loss": 0.1823, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3140096618357488, |
|
"grad_norm": 0.5028926134109497, |
|
"learning_rate": 1.2542700612310667e-05, |
|
"loss": 0.1726, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3188405797101449, |
|
"grad_norm": 0.5125951766967773, |
|
"learning_rate": 1.252014179825975e-05, |
|
"loss": 0.1709, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.32367149758454106, |
|
"grad_norm": 0.4960808753967285, |
|
"learning_rate": 1.249758298420883e-05, |
|
"loss": 0.1539, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3285024154589372, |
|
"grad_norm": 0.42437031865119934, |
|
"learning_rate": 1.2475024170157912e-05, |
|
"loss": 0.1484, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.32370465993881226, |
|
"learning_rate": 1.2452465356106992e-05, |
|
"loss": 0.128, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.33816425120772947, |
|
"grad_norm": 0.2940502166748047, |
|
"learning_rate": 1.2429906542056074e-05, |
|
"loss": 0.1152, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.34299516908212563, |
|
"grad_norm": 0.3140239715576172, |
|
"learning_rate": 1.2407347728005156e-05, |
|
"loss": 0.1466, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.26790526509284973, |
|
"learning_rate": 1.2384788913954238e-05, |
|
"loss": 0.1148, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3526570048309179, |
|
"grad_norm": 0.276149183511734, |
|
"learning_rate": 1.236223009990332e-05, |
|
"loss": 0.0996, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.357487922705314, |
|
"grad_norm": 0.24558521807193756, |
|
"learning_rate": 1.2339671285852402e-05, |
|
"loss": 0.105, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.36231884057971014, |
|
"grad_norm": 0.31819969415664673, |
|
"learning_rate": 1.2317112471801482e-05, |
|
"loss": 0.0968, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3671497584541063, |
|
"grad_norm": 0.28777143359184265, |
|
"learning_rate": 1.2294553657750564e-05, |
|
"loss": 0.1029, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3719806763285024, |
|
"grad_norm": 0.26321807503700256, |
|
"learning_rate": 1.2271994843699645e-05, |
|
"loss": 0.1018, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.37681159420289856, |
|
"grad_norm": 0.23004086315631866, |
|
"learning_rate": 1.2249436029648727e-05, |
|
"loss": 0.0912, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.38164251207729466, |
|
"grad_norm": 0.21847793459892273, |
|
"learning_rate": 1.2226877215597809e-05, |
|
"loss": 0.0913, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3864734299516908, |
|
"grad_norm": 0.19117720425128937, |
|
"learning_rate": 1.220431840154689e-05, |
|
"loss": 0.0895, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 0.2535386085510254, |
|
"learning_rate": 1.2181759587495971e-05, |
|
"loss": 0.1103, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3961352657004831, |
|
"grad_norm": 0.26542574167251587, |
|
"learning_rate": 1.2159200773445053e-05, |
|
"loss": 0.0923, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.40096618357487923, |
|
"grad_norm": 0.20197734236717224, |
|
"learning_rate": 1.2136641959394135e-05, |
|
"loss": 0.0931, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4057971014492754, |
|
"grad_norm": 0.20673911273479462, |
|
"learning_rate": 1.2114083145343217e-05, |
|
"loss": 0.1138, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4106280193236715, |
|
"grad_norm": 0.24391110241413116, |
|
"learning_rate": 1.2091524331292299e-05, |
|
"loss": 0.094, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.41545893719806765, |
|
"grad_norm": 0.2456451952457428, |
|
"learning_rate": 1.2068965517241379e-05, |
|
"loss": 0.1078, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.42028985507246375, |
|
"grad_norm": 0.29903218150138855, |
|
"learning_rate": 1.204640670319046e-05, |
|
"loss": 0.0999, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4251207729468599, |
|
"grad_norm": 0.17596346139907837, |
|
"learning_rate": 1.2023847889139541e-05, |
|
"loss": 0.0995, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.42995169082125606, |
|
"grad_norm": 0.14841659367084503, |
|
"learning_rate": 1.2001289075088623e-05, |
|
"loss": 0.0934, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.18399696052074432, |
|
"learning_rate": 1.1978730261037705e-05, |
|
"loss": 0.0967, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4396135265700483, |
|
"grad_norm": 0.1746302992105484, |
|
"learning_rate": 1.1956171446986787e-05, |
|
"loss": 0.0947, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.2423829287290573, |
|
"learning_rate": 1.1933612632935868e-05, |
|
"loss": 0.0936, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4492753623188406, |
|
"grad_norm": 0.15260176360607147, |
|
"learning_rate": 1.191105381888495e-05, |
|
"loss": 0.1034, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.45410628019323673, |
|
"grad_norm": 0.2334187626838684, |
|
"learning_rate": 1.1888495004834032e-05, |
|
"loss": 0.1053, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.45893719806763283, |
|
"grad_norm": 0.19356365501880646, |
|
"learning_rate": 1.1865936190783114e-05, |
|
"loss": 0.0842, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.463768115942029, |
|
"grad_norm": 0.20395216345787048, |
|
"learning_rate": 1.1843377376732194e-05, |
|
"loss": 0.0792, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.46859903381642515, |
|
"grad_norm": 0.1807161122560501, |
|
"learning_rate": 1.1820818562681276e-05, |
|
"loss": 0.0882, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.47342995169082125, |
|
"grad_norm": 0.16710110008716583, |
|
"learning_rate": 1.1798259748630358e-05, |
|
"loss": 0.0822, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 0.1776697188615799, |
|
"learning_rate": 1.1775700934579438e-05, |
|
"loss": 0.0922, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4830917874396135, |
|
"grad_norm": 0.21817447245121002, |
|
"learning_rate": 1.175314212052852e-05, |
|
"loss": 0.0949, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48792270531400966, |
|
"grad_norm": 0.20692448318004608, |
|
"learning_rate": 1.1730583306477602e-05, |
|
"loss": 0.0907, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4927536231884058, |
|
"grad_norm": 0.1886768341064453, |
|
"learning_rate": 1.1708024492426684e-05, |
|
"loss": 0.0858, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4975845410628019, |
|
"grad_norm": 0.19374988973140717, |
|
"learning_rate": 1.1685465678375766e-05, |
|
"loss": 0.084, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5024154589371981, |
|
"grad_norm": 0.1982010304927826, |
|
"learning_rate": 1.1662906864324847e-05, |
|
"loss": 0.087, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5072463768115942, |
|
"grad_norm": 0.292267769575119, |
|
"learning_rate": 1.1640348050273929e-05, |
|
"loss": 0.0918, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5120772946859904, |
|
"grad_norm": 0.19581086933612823, |
|
"learning_rate": 1.1617789236223009e-05, |
|
"loss": 0.1012, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5169082125603864, |
|
"grad_norm": 0.1730077862739563, |
|
"learning_rate": 1.159523042217209e-05, |
|
"loss": 0.0853, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.20485533773899078, |
|
"learning_rate": 1.1572671608121173e-05, |
|
"loss": 0.093, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5265700483091788, |
|
"grad_norm": 0.2086704820394516, |
|
"learning_rate": 1.1550112794070255e-05, |
|
"loss": 0.0945, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5314009661835749, |
|
"grad_norm": 0.15911467373371124, |
|
"learning_rate": 1.1527553980019335e-05, |
|
"loss": 0.1034, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5362318840579711, |
|
"grad_norm": 0.2168796807527542, |
|
"learning_rate": 1.1504995165968417e-05, |
|
"loss": 0.0945, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5410628019323671, |
|
"grad_norm": 0.20228448510169983, |
|
"learning_rate": 1.1482436351917499e-05, |
|
"loss": 0.1029, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5458937198067633, |
|
"grad_norm": 0.2441129982471466, |
|
"learning_rate": 1.1459877537866581e-05, |
|
"loss": 0.0906, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5507246376811594, |
|
"grad_norm": 0.22443729639053345, |
|
"learning_rate": 1.1437318723815663e-05, |
|
"loss": 0.0994, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.18132899701595306, |
|
"learning_rate": 1.1414759909764745e-05, |
|
"loss": 0.0938, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5603864734299517, |
|
"grad_norm": 0.19448505342006683, |
|
"learning_rate": 1.1392201095713824e-05, |
|
"loss": 0.0835, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 0.23075686395168304, |
|
"learning_rate": 1.1369642281662906e-05, |
|
"loss": 0.0983, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5700483091787439, |
|
"grad_norm": 0.22883069515228271, |
|
"learning_rate": 1.1347083467611988e-05, |
|
"loss": 0.0787, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5748792270531401, |
|
"grad_norm": 0.23262719810009003, |
|
"learning_rate": 1.132452465356107e-05, |
|
"loss": 0.0939, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5797101449275363, |
|
"grad_norm": 0.20541128516197205, |
|
"learning_rate": 1.1301965839510152e-05, |
|
"loss": 0.0776, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5845410628019324, |
|
"grad_norm": 0.21663478016853333, |
|
"learning_rate": 1.1279407025459234e-05, |
|
"loss": 0.0918, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5893719806763285, |
|
"grad_norm": 0.22586220502853394, |
|
"learning_rate": 1.1256848211408314e-05, |
|
"loss": 0.0824, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5942028985507246, |
|
"grad_norm": 0.1860446035861969, |
|
"learning_rate": 1.1234289397357396e-05, |
|
"loss": 0.0853, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5990338164251208, |
|
"grad_norm": 0.195932075381279, |
|
"learning_rate": 1.1211730583306478e-05, |
|
"loss": 0.0818, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6038647342995169, |
|
"grad_norm": 0.19570867717266083, |
|
"learning_rate": 1.118917176925556e-05, |
|
"loss": 0.0859, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.16349905729293823, |
|
"learning_rate": 1.116661295520464e-05, |
|
"loss": 0.0938, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6135265700483091, |
|
"grad_norm": 0.1926320493221283, |
|
"learning_rate": 1.1144054141153722e-05, |
|
"loss": 0.0846, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6183574879227053, |
|
"grad_norm": 0.19020161032676697, |
|
"learning_rate": 1.1121495327102803e-05, |
|
"loss": 0.086, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6231884057971014, |
|
"grad_norm": 0.20265896618366241, |
|
"learning_rate": 1.1098936513051885e-05, |
|
"loss": 0.0793, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6280193236714976, |
|
"grad_norm": 0.17398878931999207, |
|
"learning_rate": 1.1076377699000967e-05, |
|
"loss": 0.09, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6328502415458938, |
|
"grad_norm": 0.19005955755710602, |
|
"learning_rate": 1.1053818884950049e-05, |
|
"loss": 0.0792, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6376811594202898, |
|
"grad_norm": 0.18029935657978058, |
|
"learning_rate": 1.103126007089913e-05, |
|
"loss": 0.0923, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.642512077294686, |
|
"grad_norm": 0.1881086826324463, |
|
"learning_rate": 1.1008701256848212e-05, |
|
"loss": 0.0936, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6473429951690821, |
|
"grad_norm": 0.269255667924881, |
|
"learning_rate": 1.0986142442797293e-05, |
|
"loss": 0.0916, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.18038909137248993, |
|
"learning_rate": 1.0963583628746373e-05, |
|
"loss": 0.0855, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6570048309178744, |
|
"grad_norm": 0.17990528047084808, |
|
"learning_rate": 1.0941024814695455e-05, |
|
"loss": 0.0926, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6618357487922706, |
|
"grad_norm": 0.2431405931711197, |
|
"learning_rate": 1.0918466000644537e-05, |
|
"loss": 0.0917, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.23243603110313416, |
|
"learning_rate": 1.0895907186593619e-05, |
|
"loss": 0.1046, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6714975845410628, |
|
"grad_norm": 0.20667722821235657, |
|
"learning_rate": 1.0873348372542701e-05, |
|
"loss": 0.0952, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6763285024154589, |
|
"grad_norm": 0.20045587420463562, |
|
"learning_rate": 1.0850789558491781e-05, |
|
"loss": 0.0839, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6811594202898551, |
|
"grad_norm": 0.15829257667064667, |
|
"learning_rate": 1.0828230744440863e-05, |
|
"loss": 0.0931, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6859903381642513, |
|
"grad_norm": 0.18778935074806213, |
|
"learning_rate": 1.0805671930389945e-05, |
|
"loss": 0.083, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6908212560386473, |
|
"grad_norm": 0.1949867457151413, |
|
"learning_rate": 1.0783113116339027e-05, |
|
"loss": 0.0797, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.3177832365036011, |
|
"learning_rate": 1.076055430228811e-05, |
|
"loss": 0.081, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7004830917874396, |
|
"grad_norm": 0.1714804619550705, |
|
"learning_rate": 1.073799548823719e-05, |
|
"loss": 0.0908, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7053140096618358, |
|
"grad_norm": 0.25471800565719604, |
|
"learning_rate": 1.071543667418627e-05, |
|
"loss": 0.0988, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7101449275362319, |
|
"grad_norm": 0.21141599118709564, |
|
"learning_rate": 1.0692877860135352e-05, |
|
"loss": 0.0944, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.714975845410628, |
|
"grad_norm": 0.17371544241905212, |
|
"learning_rate": 1.0670319046084434e-05, |
|
"loss": 0.0836, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7198067632850241, |
|
"grad_norm": 0.19493460655212402, |
|
"learning_rate": 1.0647760232033516e-05, |
|
"loss": 0.0855, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"grad_norm": 0.25241127610206604, |
|
"learning_rate": 1.0625201417982598e-05, |
|
"loss": 0.0844, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7294685990338164, |
|
"grad_norm": 0.2515096664428711, |
|
"learning_rate": 1.060264260393168e-05, |
|
"loss": 0.0883, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7342995169082126, |
|
"grad_norm": 0.15292327105998993, |
|
"learning_rate": 1.058008378988076e-05, |
|
"loss": 0.0792, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 0.20495273172855377, |
|
"learning_rate": 1.0557524975829842e-05, |
|
"loss": 0.0789, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7439613526570048, |
|
"grad_norm": 0.261168897151947, |
|
"learning_rate": 1.0534966161778924e-05, |
|
"loss": 0.0832, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.748792270531401, |
|
"grad_norm": 0.28218600153923035, |
|
"learning_rate": 1.0512407347728004e-05, |
|
"loss": 0.1046, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7536231884057971, |
|
"grad_norm": 0.1737246960401535, |
|
"learning_rate": 1.0489848533677086e-05, |
|
"loss": 0.0722, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7584541062801933, |
|
"grad_norm": 0.24183641374111176, |
|
"learning_rate": 1.0467289719626168e-05, |
|
"loss": 0.0752, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7632850241545893, |
|
"grad_norm": 0.23685990273952484, |
|
"learning_rate": 1.0444730905575249e-05, |
|
"loss": 0.1037, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7681159420289855, |
|
"grad_norm": 0.22956091165542603, |
|
"learning_rate": 1.042217209152433e-05, |
|
"loss": 0.0761, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7729468599033816, |
|
"grad_norm": 0.18922095000743866, |
|
"learning_rate": 1.0399613277473413e-05, |
|
"loss": 0.0885, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.18391458690166473, |
|
"learning_rate": 1.0377054463422495e-05, |
|
"loss": 0.0859, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.27890563011169434, |
|
"learning_rate": 1.0354495649371577e-05, |
|
"loss": 0.0924, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7874396135265701, |
|
"grad_norm": 0.22491532564163208, |
|
"learning_rate": 1.0331936835320657e-05, |
|
"loss": 0.0811, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7922705314009661, |
|
"grad_norm": 0.21809989213943481, |
|
"learning_rate": 1.0309378021269739e-05, |
|
"loss": 0.0784, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7971014492753623, |
|
"grad_norm": 0.27180778980255127, |
|
"learning_rate": 1.028681920721882e-05, |
|
"loss": 0.088, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8019323671497585, |
|
"grad_norm": 0.22717216610908508, |
|
"learning_rate": 1.0264260393167901e-05, |
|
"loss": 0.0755, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8067632850241546, |
|
"grad_norm": 0.19013768434524536, |
|
"learning_rate": 1.0241701579116983e-05, |
|
"loss": 0.0782, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8115942028985508, |
|
"grad_norm": 0.2028125375509262, |
|
"learning_rate": 1.0219142765066065e-05, |
|
"loss": 0.1034, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8164251207729468, |
|
"grad_norm": 0.24243703484535217, |
|
"learning_rate": 1.0196583951015146e-05, |
|
"loss": 0.0899, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.821256038647343, |
|
"grad_norm": 0.21742011606693268, |
|
"learning_rate": 1.0174025136964228e-05, |
|
"loss": 0.0898, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.2000913769006729, |
|
"learning_rate": 1.015146632291331e-05, |
|
"loss": 0.0828, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8309178743961353, |
|
"grad_norm": 0.1902933269739151, |
|
"learning_rate": 1.0128907508862392e-05, |
|
"loss": 0.0934, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8357487922705314, |
|
"grad_norm": 0.20363092422485352, |
|
"learning_rate": 1.0106348694811474e-05, |
|
"loss": 0.081, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8405797101449275, |
|
"grad_norm": 0.2238474041223526, |
|
"learning_rate": 1.0083789880760556e-05, |
|
"loss": 0.0963, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8454106280193237, |
|
"grad_norm": 0.19188345968723297, |
|
"learning_rate": 1.0061231066709634e-05, |
|
"loss": 0.0809, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8502415458937198, |
|
"grad_norm": 0.18286921083927155, |
|
"learning_rate": 1.0038672252658716e-05, |
|
"loss": 0.0891, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.855072463768116, |
|
"grad_norm": 0.19798459112644196, |
|
"learning_rate": 1.0016113438607798e-05, |
|
"loss": 0.0789, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8599033816425121, |
|
"grad_norm": 0.1937275230884552, |
|
"learning_rate": 9.99355462455688e-06, |
|
"loss": 0.0748, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8647342995169082, |
|
"grad_norm": 0.2399519830942154, |
|
"learning_rate": 9.970995810505962e-06, |
|
"loss": 0.0941, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.2435486763715744, |
|
"learning_rate": 9.948436996455044e-06, |
|
"loss": 0.078, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8743961352657005, |
|
"grad_norm": 0.22818566858768463, |
|
"learning_rate": 9.925878182404124e-06, |
|
"loss": 0.0813, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8792270531400966, |
|
"grad_norm": 0.19992083311080933, |
|
"learning_rate": 9.903319368353206e-06, |
|
"loss": 0.0757, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8840579710144928, |
|
"grad_norm": 0.24121499061584473, |
|
"learning_rate": 9.880760554302288e-06, |
|
"loss": 0.0878, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.2414121776819229, |
|
"learning_rate": 9.85820174025137e-06, |
|
"loss": 0.076, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.893719806763285, |
|
"grad_norm": 0.17777179181575775, |
|
"learning_rate": 9.83564292620045e-06, |
|
"loss": 0.0903, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8985507246376812, |
|
"grad_norm": 0.23024319112300873, |
|
"learning_rate": 9.813084112149533e-06, |
|
"loss": 0.0965, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9033816425120773, |
|
"grad_norm": 0.20664696395397186, |
|
"learning_rate": 9.790525298098613e-06, |
|
"loss": 0.0707, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9082125603864735, |
|
"grad_norm": 0.1725015491247177, |
|
"learning_rate": 9.767966484047695e-06, |
|
"loss": 0.0821, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 0.2138936072587967, |
|
"learning_rate": 9.745407669996777e-06, |
|
"loss": 0.087, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9178743961352657, |
|
"grad_norm": 0.24879959225654602, |
|
"learning_rate": 9.722848855945859e-06, |
|
"loss": 0.0782, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9227053140096618, |
|
"grad_norm": 0.24507424235343933, |
|
"learning_rate": 9.700290041894941e-06, |
|
"loss": 0.0976, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.927536231884058, |
|
"grad_norm": 0.21825656294822693, |
|
"learning_rate": 9.677731227844023e-06, |
|
"loss": 0.0846, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9323671497584541, |
|
"grad_norm": 0.22634956240653992, |
|
"learning_rate": 9.655172413793103e-06, |
|
"loss": 0.0833, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9371980676328503, |
|
"grad_norm": 0.20103132724761963, |
|
"learning_rate": 9.632613599742184e-06, |
|
"loss": 0.0739, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9420289855072463, |
|
"grad_norm": 0.19459068775177002, |
|
"learning_rate": 9.610054785691266e-06, |
|
"loss": 0.0841, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9468599033816425, |
|
"grad_norm": 0.18598385155200958, |
|
"learning_rate": 9.587495971640348e-06, |
|
"loss": 0.0734, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9516908212560387, |
|
"grad_norm": 0.24302643537521362, |
|
"learning_rate": 9.56493715758943e-06, |
|
"loss": 0.0893, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.23758938908576965, |
|
"learning_rate": 9.542378343538512e-06, |
|
"loss": 0.0763, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.961352657004831, |
|
"grad_norm": 0.2180752158164978, |
|
"learning_rate": 9.519819529487592e-06, |
|
"loss": 0.0792, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"grad_norm": 0.22509507834911346, |
|
"learning_rate": 9.497260715436674e-06, |
|
"loss": 0.0682, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9710144927536232, |
|
"grad_norm": 0.197494238615036, |
|
"learning_rate": 9.474701901385756e-06, |
|
"loss": 0.0903, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9758454106280193, |
|
"grad_norm": 0.1817607879638672, |
|
"learning_rate": 9.452143087334838e-06, |
|
"loss": 0.0846, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9806763285024155, |
|
"grad_norm": 0.19075438380241394, |
|
"learning_rate": 9.42958427328392e-06, |
|
"loss": 0.0846, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9855072463768116, |
|
"grad_norm": 0.15087321400642395, |
|
"learning_rate": 9.407025459233e-06, |
|
"loss": 0.0753, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9903381642512077, |
|
"grad_norm": 0.2226846069097519, |
|
"learning_rate": 9.38446664518208e-06, |
|
"loss": 0.0666, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9951690821256038, |
|
"grad_norm": 0.30765634775161743, |
|
"learning_rate": 9.361907831131162e-06, |
|
"loss": 0.0779, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4031631052494049, |
|
"learning_rate": 9.339349017080244e-06, |
|
"loss": 0.095, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 338.4602, |
|
"eval_samples_per_second": 3.055, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.0048309178743962, |
|
"grad_norm": 0.2173592448234558, |
|
"learning_rate": 9.316790203029326e-06, |
|
"loss": 0.0803, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.0096618357487923, |
|
"grad_norm": 0.22241808474063873, |
|
"learning_rate": 9.294231388978408e-06, |
|
"loss": 0.0909, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.0144927536231885, |
|
"grad_norm": 0.2699296474456787, |
|
"learning_rate": 9.27167257492749e-06, |
|
"loss": 0.0842, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.0193236714975846, |
|
"grad_norm": 0.27080684900283813, |
|
"learning_rate": 9.24911376087657e-06, |
|
"loss": 0.0837, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.0241545893719808, |
|
"grad_norm": 0.1808546930551529, |
|
"learning_rate": 9.226554946825653e-06, |
|
"loss": 0.0816, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.0289855072463767, |
|
"grad_norm": 0.19763918220996857, |
|
"learning_rate": 9.203996132774735e-06, |
|
"loss": 0.0705, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.0338164251207729, |
|
"grad_norm": 0.21294108033180237, |
|
"learning_rate": 9.181437318723815e-06, |
|
"loss": 0.0726, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.038647342995169, |
|
"grad_norm": 0.19769993424415588, |
|
"learning_rate": 9.158878504672897e-06, |
|
"loss": 0.0743, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.23708152770996094, |
|
"learning_rate": 9.136319690621979e-06, |
|
"loss": 0.0785, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0483091787439613, |
|
"grad_norm": 0.232899010181427, |
|
"learning_rate": 9.11376087657106e-06, |
|
"loss": 0.093, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.0531400966183575, |
|
"grad_norm": 0.267478883266449, |
|
"learning_rate": 9.091202062520141e-06, |
|
"loss": 0.0901, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0579710144927537, |
|
"grad_norm": 0.23761190474033356, |
|
"learning_rate": 9.068643248469223e-06, |
|
"loss": 0.0898, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.0628019323671498, |
|
"grad_norm": 0.19679813086986542, |
|
"learning_rate": 9.046084434418305e-06, |
|
"loss": 0.0877, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.067632850241546, |
|
"grad_norm": 0.20915761590003967, |
|
"learning_rate": 9.023525620367387e-06, |
|
"loss": 0.0731, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.0724637681159421, |
|
"grad_norm": 0.18718890845775604, |
|
"learning_rate": 9.000966806316468e-06, |
|
"loss": 0.0885, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.077294685990338, |
|
"grad_norm": 0.29885435104370117, |
|
"learning_rate": 8.97840799226555e-06, |
|
"loss": 0.0861, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.0821256038647342, |
|
"grad_norm": 0.16953594982624054, |
|
"learning_rate": 8.95584917821463e-06, |
|
"loss": 0.0862, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.21629682183265686, |
|
"learning_rate": 8.933290364163712e-06, |
|
"loss": 0.1022, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.0917874396135265, |
|
"grad_norm": 0.26614615321159363, |
|
"learning_rate": 8.910731550112794e-06, |
|
"loss": 0.0825, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0966183574879227, |
|
"grad_norm": 0.2642468810081482, |
|
"learning_rate": 8.888172736061876e-06, |
|
"loss": 0.0796, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.1014492753623188, |
|
"grad_norm": 0.16877882182598114, |
|
"learning_rate": 8.865613922010956e-06, |
|
"loss": 0.0726, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.106280193236715, |
|
"grad_norm": 0.2619246542453766, |
|
"learning_rate": 8.843055107960038e-06, |
|
"loss": 0.0835, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.2424723505973816, |
|
"learning_rate": 8.82049629390912e-06, |
|
"loss": 0.0896, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.1159420289855073, |
|
"grad_norm": 0.20973582565784454, |
|
"learning_rate": 8.797937479858202e-06, |
|
"loss": 0.0751, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.1207729468599035, |
|
"grad_norm": 0.23418009281158447, |
|
"learning_rate": 8.775378665807284e-06, |
|
"loss": 0.077, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.1256038647342996, |
|
"grad_norm": 0.3117668032646179, |
|
"learning_rate": 8.752819851756366e-06, |
|
"loss": 0.0769, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.25092753767967224, |
|
"learning_rate": 8.730261037705446e-06, |
|
"loss": 0.0729, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.1352657004830917, |
|
"grad_norm": 0.1926090270280838, |
|
"learning_rate": 8.707702223654527e-06, |
|
"loss": 0.0772, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.1400966183574879, |
|
"grad_norm": 0.27212995290756226, |
|
"learning_rate": 8.685143409603609e-06, |
|
"loss": 0.0712, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.144927536231884, |
|
"grad_norm": 0.2097581923007965, |
|
"learning_rate": 8.66258459555269e-06, |
|
"loss": 0.0767, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.1497584541062802, |
|
"grad_norm": 0.2765638828277588, |
|
"learning_rate": 8.640025781501773e-06, |
|
"loss": 0.0885, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.1545893719806763, |
|
"grad_norm": 0.28414320945739746, |
|
"learning_rate": 8.617466967450855e-06, |
|
"loss": 0.0631, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.1594202898550725, |
|
"grad_norm": 0.21230548620224, |
|
"learning_rate": 8.594908153399935e-06, |
|
"loss": 0.0848, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1642512077294687, |
|
"grad_norm": 0.1870320439338684, |
|
"learning_rate": 8.572349339349017e-06, |
|
"loss": 0.0724, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.1690821256038648, |
|
"grad_norm": 0.23322801291942596, |
|
"learning_rate": 8.549790525298099e-06, |
|
"loss": 0.0846, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.22248071432113647, |
|
"learning_rate": 8.52723171124718e-06, |
|
"loss": 0.0624, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.178743961352657, |
|
"grad_norm": 0.196117103099823, |
|
"learning_rate": 8.504672897196261e-06, |
|
"loss": 0.093, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.183574879227053, |
|
"grad_norm": 0.2212802767753601, |
|
"learning_rate": 8.482114083145343e-06, |
|
"loss": 0.0853, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.1884057971014492, |
|
"grad_norm": 0.17421841621398926, |
|
"learning_rate": 8.459555269094424e-06, |
|
"loss": 0.0747, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.1932367149758454, |
|
"grad_norm": 0.2645537853240967, |
|
"learning_rate": 8.436996455043506e-06, |
|
"loss": 0.0859, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.1980676328502415, |
|
"grad_norm": 0.27182498574256897, |
|
"learning_rate": 8.414437640992588e-06, |
|
"loss": 0.0943, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.2028985507246377, |
|
"grad_norm": 0.20389291644096375, |
|
"learning_rate": 8.39187882694167e-06, |
|
"loss": 0.0783, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.2077294685990339, |
|
"grad_norm": 0.3193868398666382, |
|
"learning_rate": 8.369320012890752e-06, |
|
"loss": 0.0917, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.21256038647343, |
|
"grad_norm": 0.2852030098438263, |
|
"learning_rate": 8.346761198839834e-06, |
|
"loss": 0.077, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.256452739238739, |
|
"learning_rate": 8.324202384788914e-06, |
|
"loss": 0.0804, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.209047332406044, |
|
"learning_rate": 8.301643570737994e-06, |
|
"loss": 0.0928, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.2270531400966185, |
|
"grad_norm": 0.21215900778770447, |
|
"learning_rate": 8.279084756687076e-06, |
|
"loss": 0.0788, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.2318840579710144, |
|
"grad_norm": 0.15550634264945984, |
|
"learning_rate": 8.256525942636158e-06, |
|
"loss": 0.0804, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.2367149758454106, |
|
"grad_norm": 0.16960662603378296, |
|
"learning_rate": 8.23396712858524e-06, |
|
"loss": 0.0737, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.2415458937198067, |
|
"grad_norm": 0.20484741032123566, |
|
"learning_rate": 8.211408314534322e-06, |
|
"loss": 0.0794, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.2463768115942029, |
|
"grad_norm": 0.24889996647834778, |
|
"learning_rate": 8.188849500483402e-06, |
|
"loss": 0.0903, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.251207729468599, |
|
"grad_norm": 0.23695576190948486, |
|
"learning_rate": 8.166290686432484e-06, |
|
"loss": 0.0826, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.2560386473429952, |
|
"grad_norm": 0.23449349403381348, |
|
"learning_rate": 8.143731872381566e-06, |
|
"loss": 0.0922, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.2362452745437622, |
|
"learning_rate": 8.121173058330648e-06, |
|
"loss": 0.0716, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.2657004830917875, |
|
"grad_norm": 0.33280622959136963, |
|
"learning_rate": 8.09861424427973e-06, |
|
"loss": 0.0909, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.2705314009661834, |
|
"grad_norm": 0.22267523407936096, |
|
"learning_rate": 8.07605543022881e-06, |
|
"loss": 0.0816, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.2753623188405796, |
|
"grad_norm": 0.23176385462284088, |
|
"learning_rate": 8.053496616177891e-06, |
|
"loss": 0.091, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.2801932367149758, |
|
"grad_norm": 0.21951176226139069, |
|
"learning_rate": 8.030937802126973e-06, |
|
"loss": 0.0752, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.285024154589372, |
|
"grad_norm": 0.19361701607704163, |
|
"learning_rate": 8.008378988076055e-06, |
|
"loss": 0.0731, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.289855072463768, |
|
"grad_norm": 0.2284880429506302, |
|
"learning_rate": 7.985820174025137e-06, |
|
"loss": 0.0821, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.2946859903381642, |
|
"grad_norm": 0.28775539994239807, |
|
"learning_rate": 7.963261359974219e-06, |
|
"loss": 0.0865, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2995169082125604, |
|
"grad_norm": 0.22133222222328186, |
|
"learning_rate": 7.940702545923301e-06, |
|
"loss": 0.0722, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.2120644450187683, |
|
"learning_rate": 7.918143731872381e-06, |
|
"loss": 0.0642, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.3091787439613527, |
|
"grad_norm": 0.2922479212284088, |
|
"learning_rate": 7.895584917821463e-06, |
|
"loss": 0.0804, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.3140096618357489, |
|
"grad_norm": 0.2302795797586441, |
|
"learning_rate": 7.873026103770545e-06, |
|
"loss": 0.0726, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.318840579710145, |
|
"grad_norm": 0.28763264417648315, |
|
"learning_rate": 7.850467289719626e-06, |
|
"loss": 0.0976, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.3236714975845412, |
|
"grad_norm": 0.2106347233057022, |
|
"learning_rate": 7.827908475668708e-06, |
|
"loss": 0.0744, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.3285024154589373, |
|
"grad_norm": 0.23215855658054352, |
|
"learning_rate": 7.80534966161779e-06, |
|
"loss": 0.0916, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.20885543525218964, |
|
"learning_rate": 7.78279084756687e-06, |
|
"loss": 0.0774, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.3381642512077294, |
|
"grad_norm": 0.20533576607704163, |
|
"learning_rate": 7.760232033515952e-06, |
|
"loss": 0.0751, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.3429951690821256, |
|
"grad_norm": 0.20719490945339203, |
|
"learning_rate": 7.737673219465034e-06, |
|
"loss": 0.0816, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.19761165976524353, |
|
"learning_rate": 7.715114405414116e-06, |
|
"loss": 0.0804, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.3526570048309179, |
|
"grad_norm": 0.20369771122932434, |
|
"learning_rate": 7.692555591363198e-06, |
|
"loss": 0.0845, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.357487922705314, |
|
"grad_norm": 0.20887012779712677, |
|
"learning_rate": 7.669996777312278e-06, |
|
"loss": 0.0704, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.3623188405797102, |
|
"grad_norm": 0.29784587025642395, |
|
"learning_rate": 7.64743796326136e-06, |
|
"loss": 0.0866, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.3671497584541064, |
|
"grad_norm": 0.31036221981048584, |
|
"learning_rate": 7.62487914921044e-06, |
|
"loss": 0.0862, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.3719806763285023, |
|
"grad_norm": 0.25198647379875183, |
|
"learning_rate": 7.602320335159522e-06, |
|
"loss": 0.0825, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.3768115942028984, |
|
"grad_norm": 0.24515630304813385, |
|
"learning_rate": 7.579761521108604e-06, |
|
"loss": 0.0787, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.3816425120772946, |
|
"grad_norm": 0.22536733746528625, |
|
"learning_rate": 7.5572027070576855e-06, |
|
"loss": 0.0928, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.3864734299516908, |
|
"grad_norm": 0.23405781388282776, |
|
"learning_rate": 7.5346438930067675e-06, |
|
"loss": 0.0917, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.24243396520614624, |
|
"learning_rate": 7.5120850789558495e-06, |
|
"loss": 0.0675, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.396135265700483, |
|
"grad_norm": 0.2637854814529419, |
|
"learning_rate": 7.489526264904931e-06, |
|
"loss": 0.0863, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.4009661835748792, |
|
"grad_norm": 0.2491244375705719, |
|
"learning_rate": 7.466967450854013e-06, |
|
"loss": 0.0808, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.4057971014492754, |
|
"grad_norm": 0.23132705688476562, |
|
"learning_rate": 7.444408636803095e-06, |
|
"loss": 0.0797, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.4106280193236715, |
|
"grad_norm": 0.2987098693847656, |
|
"learning_rate": 7.421849822752176e-06, |
|
"loss": 0.0766, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.4154589371980677, |
|
"grad_norm": 0.23995457589626312, |
|
"learning_rate": 7.399291008701256e-06, |
|
"loss": 0.0764, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.4202898550724639, |
|
"grad_norm": 0.21818973124027252, |
|
"learning_rate": 7.376732194650338e-06, |
|
"loss": 0.09, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.42512077294686, |
|
"grad_norm": 0.19304029643535614, |
|
"learning_rate": 7.354173380599419e-06, |
|
"loss": 0.0759, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.4299516908212562, |
|
"grad_norm": 0.26081785559654236, |
|
"learning_rate": 7.331614566548501e-06, |
|
"loss": 0.0781, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.23940761387348175, |
|
"learning_rate": 7.309055752497583e-06, |
|
"loss": 0.085, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.4396135265700483, |
|
"grad_norm": 0.21909761428833008, |
|
"learning_rate": 7.286496938446664e-06, |
|
"loss": 0.0815, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.16527162492275238, |
|
"learning_rate": 7.263938124395746e-06, |
|
"loss": 0.0698, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"grad_norm": 0.21258555352687836, |
|
"learning_rate": 7.241379310344828e-06, |
|
"loss": 0.0806, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4541062801932367, |
|
"grad_norm": 0.18572719395160675, |
|
"learning_rate": 7.2188204962939095e-06, |
|
"loss": 0.0757, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.458937198067633, |
|
"grad_norm": 0.16916704177856445, |
|
"learning_rate": 7.19626168224299e-06, |
|
"loss": 0.07, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.463768115942029, |
|
"grad_norm": 0.289044588804245, |
|
"learning_rate": 7.173702868192072e-06, |
|
"loss": 0.0656, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.4685990338164252, |
|
"grad_norm": 0.27173757553100586, |
|
"learning_rate": 7.151144054141153e-06, |
|
"loss": 0.0704, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.4734299516908211, |
|
"grad_norm": 0.2929324209690094, |
|
"learning_rate": 7.128585240090235e-06, |
|
"loss": 0.0833, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.2387627214193344, |
|
"learning_rate": 7.106026426039317e-06, |
|
"loss": 0.075, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.4830917874396135, |
|
"grad_norm": 0.3277483582496643, |
|
"learning_rate": 7.083467611988398e-06, |
|
"loss": 0.074, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.4879227053140096, |
|
"grad_norm": 0.23673392832279205, |
|
"learning_rate": 7.06090879793748e-06, |
|
"loss": 0.0697, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.4927536231884058, |
|
"grad_norm": 0.19109922647476196, |
|
"learning_rate": 7.038349983886562e-06, |
|
"loss": 0.0775, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.497584541062802, |
|
"grad_norm": 0.2344091832637787, |
|
"learning_rate": 7.015791169835643e-06, |
|
"loss": 0.0644, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.502415458937198, |
|
"grad_norm": 0.28420698642730713, |
|
"learning_rate": 6.993232355784724e-06, |
|
"loss": 0.0935, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.5072463768115942, |
|
"grad_norm": 0.2632888853549957, |
|
"learning_rate": 6.970673541733806e-06, |
|
"loss": 0.083, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.5120772946859904, |
|
"grad_norm": 0.2461112141609192, |
|
"learning_rate": 6.9481147276828875e-06, |
|
"loss": 0.0729, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.5169082125603865, |
|
"grad_norm": 0.2015853226184845, |
|
"learning_rate": 6.9255559136319695e-06, |
|
"loss": 0.0836, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.2409069985151291, |
|
"learning_rate": 6.902997099581051e-06, |
|
"loss": 0.0797, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.5265700483091789, |
|
"grad_norm": 0.2014143019914627, |
|
"learning_rate": 6.880438285530132e-06, |
|
"loss": 0.09, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.531400966183575, |
|
"grad_norm": 0.2173725664615631, |
|
"learning_rate": 6.857879471479214e-06, |
|
"loss": 0.0648, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.5362318840579712, |
|
"grad_norm": 0.20185904204845428, |
|
"learning_rate": 6.835320657428296e-06, |
|
"loss": 0.0924, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.541062801932367, |
|
"grad_norm": 0.29456228017807007, |
|
"learning_rate": 6.812761843377377e-06, |
|
"loss": 0.0764, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.5458937198067633, |
|
"grad_norm": 0.22320301830768585, |
|
"learning_rate": 6.790203029326458e-06, |
|
"loss": 0.0754, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.5507246376811594, |
|
"grad_norm": 0.2032977044582367, |
|
"learning_rate": 6.76764421527554e-06, |
|
"loss": 0.089, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.24341309070587158, |
|
"learning_rate": 6.745085401224621e-06, |
|
"loss": 0.0767, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.5603864734299517, |
|
"grad_norm": 0.22675780951976776, |
|
"learning_rate": 6.722526587173703e-06, |
|
"loss": 0.0811, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.2980429232120514, |
|
"learning_rate": 6.699967773122784e-06, |
|
"loss": 0.0714, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.5700483091787438, |
|
"grad_norm": 0.2221527248620987, |
|
"learning_rate": 6.6774089590718655e-06, |
|
"loss": 0.0811, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.57487922705314, |
|
"grad_norm": 0.29102587699890137, |
|
"learning_rate": 6.6548501450209474e-06, |
|
"loss": 0.0717, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.5797101449275361, |
|
"grad_norm": 0.24565882980823517, |
|
"learning_rate": 6.632291330970029e-06, |
|
"loss": 0.0688, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.5845410628019323, |
|
"grad_norm": 0.2056146264076233, |
|
"learning_rate": 6.609732516919111e-06, |
|
"loss": 0.0739, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.5893719806763285, |
|
"grad_norm": 0.25777336955070496, |
|
"learning_rate": 6.587173702868192e-06, |
|
"loss": 0.0746, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.5942028985507246, |
|
"grad_norm": 0.20640453696250916, |
|
"learning_rate": 6.564614888817273e-06, |
|
"loss": 0.0757, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5990338164251208, |
|
"grad_norm": 0.16480913758277893, |
|
"learning_rate": 6.542056074766355e-06, |
|
"loss": 0.0752, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.603864734299517, |
|
"grad_norm": 0.23693595826625824, |
|
"learning_rate": 6.519497260715437e-06, |
|
"loss": 0.0813, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.24152866005897522, |
|
"learning_rate": 6.496938446664518e-06, |
|
"loss": 0.0784, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.6135265700483092, |
|
"grad_norm": 0.23890602588653564, |
|
"learning_rate": 6.474379632613599e-06, |
|
"loss": 0.0813, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.6183574879227054, |
|
"grad_norm": 0.2686842679977417, |
|
"learning_rate": 6.451820818562681e-06, |
|
"loss": 0.0833, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.6231884057971016, |
|
"grad_norm": 0.2103358954191208, |
|
"learning_rate": 6.429262004511762e-06, |
|
"loss": 0.0906, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.6280193236714977, |
|
"grad_norm": 0.23938271403312683, |
|
"learning_rate": 6.406703190460844e-06, |
|
"loss": 0.0721, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.6328502415458939, |
|
"grad_norm": 0.1797400861978531, |
|
"learning_rate": 6.384144376409926e-06, |
|
"loss": 0.0678, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.6376811594202898, |
|
"grad_norm": 0.23905880749225616, |
|
"learning_rate": 6.361585562359007e-06, |
|
"loss": 0.0886, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.642512077294686, |
|
"grad_norm": 0.19138076901435852, |
|
"learning_rate": 6.339026748308089e-06, |
|
"loss": 0.0705, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.6473429951690821, |
|
"grad_norm": 0.19759757816791534, |
|
"learning_rate": 6.3164679342571706e-06, |
|
"loss": 0.0772, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.22951267659664154, |
|
"learning_rate": 6.293909120206252e-06, |
|
"loss": 0.0701, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.6570048309178744, |
|
"grad_norm": 0.3317079246044159, |
|
"learning_rate": 6.271350306155334e-06, |
|
"loss": 0.0838, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.6618357487922706, |
|
"grad_norm": 0.2875089645385742, |
|
"learning_rate": 6.248791492104415e-06, |
|
"loss": 0.0711, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.22365209460258484, |
|
"learning_rate": 6.226232678053496e-06, |
|
"loss": 0.0913, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.6714975845410627, |
|
"grad_norm": 0.26004156470298767, |
|
"learning_rate": 6.203673864002578e-06, |
|
"loss": 0.0749, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.6763285024154588, |
|
"grad_norm": 0.24029529094696045, |
|
"learning_rate": 6.18111504995166e-06, |
|
"loss": 0.0872, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.681159420289855, |
|
"grad_norm": 0.2503759562969208, |
|
"learning_rate": 6.158556235900741e-06, |
|
"loss": 0.0662, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.6859903381642511, |
|
"grad_norm": 0.24961721897125244, |
|
"learning_rate": 6.135997421849822e-06, |
|
"loss": 0.0805, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.6908212560386473, |
|
"grad_norm": 0.20291025936603546, |
|
"learning_rate": 6.113438607798904e-06, |
|
"loss": 0.0723, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.24923092126846313, |
|
"learning_rate": 6.0908797937479854e-06, |
|
"loss": 0.0766, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.7004830917874396, |
|
"grad_norm": 0.3006664514541626, |
|
"learning_rate": 6.068320979697067e-06, |
|
"loss": 0.0767, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.7053140096618358, |
|
"grad_norm": 0.22034914791584015, |
|
"learning_rate": 6.045762165646149e-06, |
|
"loss": 0.0716, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.710144927536232, |
|
"grad_norm": 0.22951188683509827, |
|
"learning_rate": 6.02320335159523e-06, |
|
"loss": 0.0713, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.714975845410628, |
|
"grad_norm": 0.22270874679088593, |
|
"learning_rate": 6.000644537544312e-06, |
|
"loss": 0.0671, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.7198067632850242, |
|
"grad_norm": 0.23195502161979675, |
|
"learning_rate": 5.978085723493394e-06, |
|
"loss": 0.0864, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.7246376811594204, |
|
"grad_norm": 0.2421010136604309, |
|
"learning_rate": 5.955526909442475e-06, |
|
"loss": 0.0886, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.7294685990338166, |
|
"grad_norm": 0.20693883299827576, |
|
"learning_rate": 5.932968095391557e-06, |
|
"loss": 0.0715, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.7342995169082127, |
|
"grad_norm": 0.32137101888656616, |
|
"learning_rate": 5.910409281340638e-06, |
|
"loss": 0.0639, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.21108365058898926, |
|
"learning_rate": 5.887850467289719e-06, |
|
"loss": 0.0786, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.7439613526570048, |
|
"grad_norm": 0.2952270805835724, |
|
"learning_rate": 5.865291653238801e-06, |
|
"loss": 0.0641, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.748792270531401, |
|
"grad_norm": 0.26709944009780884, |
|
"learning_rate": 5.842732839187883e-06, |
|
"loss": 0.0698, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.7536231884057971, |
|
"grad_norm": 0.30126988887786865, |
|
"learning_rate": 5.820174025136964e-06, |
|
"loss": 0.0773, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.7584541062801933, |
|
"grad_norm": 0.2402152717113495, |
|
"learning_rate": 5.797615211086045e-06, |
|
"loss": 0.0778, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.7632850241545892, |
|
"grad_norm": 0.19652244448661804, |
|
"learning_rate": 5.775056397035127e-06, |
|
"loss": 0.082, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.7681159420289854, |
|
"grad_norm": 0.21389204263687134, |
|
"learning_rate": 5.7524975829842086e-06, |
|
"loss": 0.0727, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.7729468599033815, |
|
"grad_norm": 0.2189796268939972, |
|
"learning_rate": 5.7299387689332905e-06, |
|
"loss": 0.0757, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.28000935912132263, |
|
"learning_rate": 5.7073799548823725e-06, |
|
"loss": 0.0803, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.24566881358623505, |
|
"learning_rate": 5.684821140831453e-06, |
|
"loss": 0.0815, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.78743961352657, |
|
"grad_norm": 0.22037634253501892, |
|
"learning_rate": 5.662262326780535e-06, |
|
"loss": 0.0871, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.7922705314009661, |
|
"grad_norm": 0.1990278661251068, |
|
"learning_rate": 5.639703512729617e-06, |
|
"loss": 0.0756, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.7971014492753623, |
|
"grad_norm": 0.3180176615715027, |
|
"learning_rate": 5.617144698678698e-06, |
|
"loss": 0.0735, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.8019323671497585, |
|
"grad_norm": 0.2075718492269516, |
|
"learning_rate": 5.59458588462778e-06, |
|
"loss": 0.0665, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.8067632850241546, |
|
"grad_norm": 0.2611768841743469, |
|
"learning_rate": 5.572027070576861e-06, |
|
"loss": 0.0873, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.8115942028985508, |
|
"grad_norm": 0.22146160900592804, |
|
"learning_rate": 5.549468256525942e-06, |
|
"loss": 0.0638, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.816425120772947, |
|
"grad_norm": 0.29287296533584595, |
|
"learning_rate": 5.526909442475024e-06, |
|
"loss": 0.0812, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.821256038647343, |
|
"grad_norm": 0.2280767410993576, |
|
"learning_rate": 5.504350628424106e-06, |
|
"loss": 0.0766, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.20453138649463654, |
|
"learning_rate": 5.4817918143731865e-06, |
|
"loss": 0.0748, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.8309178743961354, |
|
"grad_norm": 0.2855188250541687, |
|
"learning_rate": 5.4592330003222685e-06, |
|
"loss": 0.0901, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.8357487922705316, |
|
"grad_norm": 0.21556098759174347, |
|
"learning_rate": 5.4366741862713505e-06, |
|
"loss": 0.0735, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.8405797101449275, |
|
"grad_norm": 0.3091937303543091, |
|
"learning_rate": 5.414115372220432e-06, |
|
"loss": 0.0636, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.8454106280193237, |
|
"grad_norm": 0.2939262390136719, |
|
"learning_rate": 5.391556558169514e-06, |
|
"loss": 0.0753, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.8502415458937198, |
|
"grad_norm": 0.2101174294948578, |
|
"learning_rate": 5.368997744118595e-06, |
|
"loss": 0.0714, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.855072463768116, |
|
"grad_norm": 0.2570497691631317, |
|
"learning_rate": 5.346438930067676e-06, |
|
"loss": 0.0877, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.8599033816425121, |
|
"grad_norm": 0.2754373848438263, |
|
"learning_rate": 5.323880116016758e-06, |
|
"loss": 0.0729, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.864734299516908, |
|
"grad_norm": 0.2952544391155243, |
|
"learning_rate": 5.30132130196584e-06, |
|
"loss": 0.0714, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.2360425889492035, |
|
"learning_rate": 5.278762487914921e-06, |
|
"loss": 0.0711, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.8743961352657004, |
|
"grad_norm": 0.22847935557365417, |
|
"learning_rate": 5.256203673864002e-06, |
|
"loss": 0.07, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.8792270531400965, |
|
"grad_norm": 0.26060476899147034, |
|
"learning_rate": 5.233644859813084e-06, |
|
"loss": 0.086, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.8840579710144927, |
|
"grad_norm": 0.28593048453330994, |
|
"learning_rate": 5.211086045762165e-06, |
|
"loss": 0.0782, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.2553214430809021, |
|
"learning_rate": 5.188527231711247e-06, |
|
"loss": 0.0689, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.893719806763285, |
|
"grad_norm": 0.38168102502822876, |
|
"learning_rate": 5.1659684176603285e-06, |
|
"loss": 0.0917, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.8985507246376812, |
|
"grad_norm": 0.22879190742969513, |
|
"learning_rate": 5.14340960360941e-06, |
|
"loss": 0.0833, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.9033816425120773, |
|
"grad_norm": 0.19676880538463593, |
|
"learning_rate": 5.120850789558492e-06, |
|
"loss": 0.0594, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.9082125603864735, |
|
"grad_norm": 0.36660292744636536, |
|
"learning_rate": 5.098291975507573e-06, |
|
"loss": 0.0932, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.23486468195915222, |
|
"learning_rate": 5.075733161456655e-06, |
|
"loss": 0.0941, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.9178743961352658, |
|
"grad_norm": 0.2950279414653778, |
|
"learning_rate": 5.053174347405737e-06, |
|
"loss": 0.0796, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.922705314009662, |
|
"grad_norm": 0.1995108425617218, |
|
"learning_rate": 5.030615533354817e-06, |
|
"loss": 0.0766, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.927536231884058, |
|
"grad_norm": 0.3509507179260254, |
|
"learning_rate": 5.008056719303899e-06, |
|
"loss": 0.0718, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.9323671497584543, |
|
"grad_norm": 0.22868584096431732, |
|
"learning_rate": 4.985497905252981e-06, |
|
"loss": 0.0724, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.9371980676328504, |
|
"grad_norm": 0.270059734582901, |
|
"learning_rate": 4.962939091202062e-06, |
|
"loss": 0.0761, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.9420289855072463, |
|
"grad_norm": 0.24437829852104187, |
|
"learning_rate": 4.940380277151144e-06, |
|
"loss": 0.0729, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.9468599033816425, |
|
"grad_norm": 0.24446424841880798, |
|
"learning_rate": 4.917821463100225e-06, |
|
"loss": 0.0648, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.9516908212560387, |
|
"grad_norm": 0.21626543998718262, |
|
"learning_rate": 4.8952626490493065e-06, |
|
"loss": 0.0739, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.20689117908477783, |
|
"learning_rate": 4.8727038349983885e-06, |
|
"loss": 0.0701, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.961352657004831, |
|
"grad_norm": 0.2660706043243408, |
|
"learning_rate": 4.8501450209474705e-06, |
|
"loss": 0.0571, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.966183574879227, |
|
"grad_norm": 0.24084658920764923, |
|
"learning_rate": 4.827586206896552e-06, |
|
"loss": 0.0764, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.971014492753623, |
|
"grad_norm": 0.2771299481391907, |
|
"learning_rate": 4.805027392845633e-06, |
|
"loss": 0.0738, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.9758454106280192, |
|
"grad_norm": 0.2248222976922989, |
|
"learning_rate": 4.782468578794715e-06, |
|
"loss": 0.0774, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.9806763285024154, |
|
"grad_norm": 0.22526535391807556, |
|
"learning_rate": 4.759909764743796e-06, |
|
"loss": 0.0678, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.9855072463768115, |
|
"grad_norm": 0.21107898652553558, |
|
"learning_rate": 4.737350950692878e-06, |
|
"loss": 0.1011, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.9903381642512077, |
|
"grad_norm": 0.22934384644031525, |
|
"learning_rate": 4.71479213664196e-06, |
|
"loss": 0.0715, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.9951690821256038, |
|
"grad_norm": 0.2517627775669098, |
|
"learning_rate": 4.69223332259104e-06, |
|
"loss": 0.0796, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.40475329756736755, |
|
"learning_rate": 4.669674508540122e-06, |
|
"loss": 0.0919, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_runtime": 339.1035, |
|
"eval_samples_per_second": 3.049, |
|
"eval_steps_per_second": 0.383, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.004830917874396, |
|
"grad_norm": 0.23014891147613525, |
|
"learning_rate": 4.647115694489204e-06, |
|
"loss": 0.0721, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.0096618357487923, |
|
"grad_norm": 0.292595773935318, |
|
"learning_rate": 4.624556880438285e-06, |
|
"loss": 0.0797, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.0144927536231885, |
|
"grad_norm": 0.2784234583377838, |
|
"learning_rate": 4.601998066387367e-06, |
|
"loss": 0.0783, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.0193236714975846, |
|
"grad_norm": 0.21615320444107056, |
|
"learning_rate": 4.5794392523364485e-06, |
|
"loss": 0.0794, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.024154589371981, |
|
"grad_norm": 0.30054816603660583, |
|
"learning_rate": 4.55688043828553e-06, |
|
"loss": 0.078, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.028985507246377, |
|
"grad_norm": 0.21918036043643951, |
|
"learning_rate": 4.534321624234612e-06, |
|
"loss": 0.0706, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.033816425120773, |
|
"grad_norm": 0.22675025463104248, |
|
"learning_rate": 4.511762810183694e-06, |
|
"loss": 0.0578, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.0386473429951693, |
|
"grad_norm": 0.3500133454799652, |
|
"learning_rate": 4.489203996132775e-06, |
|
"loss": 0.077, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.0434782608695654, |
|
"grad_norm": 0.2782948315143585, |
|
"learning_rate": 4.466645182081856e-06, |
|
"loss": 0.0747, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.0483091787439616, |
|
"grad_norm": 0.3685343265533447, |
|
"learning_rate": 4.444086368030938e-06, |
|
"loss": 0.0775, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.0531400966183573, |
|
"grad_norm": 0.26994946599006653, |
|
"learning_rate": 4.421527553980019e-06, |
|
"loss": 0.076, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.0579710144927534, |
|
"grad_norm": 0.2926693856716156, |
|
"learning_rate": 4.398968739929101e-06, |
|
"loss": 0.0797, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.0628019323671496, |
|
"grad_norm": 0.26841118931770325, |
|
"learning_rate": 4.376409925878183e-06, |
|
"loss": 0.0733, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.0676328502415457, |
|
"grad_norm": 0.25837743282318115, |
|
"learning_rate": 4.353851111827263e-06, |
|
"loss": 0.0572, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.072463768115942, |
|
"grad_norm": 0.23347356915473938, |
|
"learning_rate": 4.331292297776345e-06, |
|
"loss": 0.0824, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.077294685990338, |
|
"grad_norm": 0.31139683723449707, |
|
"learning_rate": 4.308733483725427e-06, |
|
"loss": 0.0801, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.082125603864734, |
|
"grad_norm": 0.33561673760414124, |
|
"learning_rate": 4.2861746696745085e-06, |
|
"loss": 0.0816, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.0869565217391304, |
|
"grad_norm": 0.2744121551513672, |
|
"learning_rate": 4.26361585562359e-06, |
|
"loss": 0.0709, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.0917874396135265, |
|
"grad_norm": 0.29332056641578674, |
|
"learning_rate": 4.241057041572672e-06, |
|
"loss": 0.0768, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.0966183574879227, |
|
"grad_norm": 0.26820820569992065, |
|
"learning_rate": 4.218498227521753e-06, |
|
"loss": 0.0854, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.101449275362319, |
|
"grad_norm": 0.3563501536846161, |
|
"learning_rate": 4.195939413470835e-06, |
|
"loss": 0.0829, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.106280193236715, |
|
"grad_norm": 0.35537421703338623, |
|
"learning_rate": 4.173380599419917e-06, |
|
"loss": 0.0763, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 0.2760440707206726, |
|
"learning_rate": 4.150821785368997e-06, |
|
"loss": 0.092, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.1159420289855073, |
|
"grad_norm": 0.21750731766223907, |
|
"learning_rate": 4.128262971318079e-06, |
|
"loss": 0.0756, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.1207729468599035, |
|
"grad_norm": 0.2815890610218048, |
|
"learning_rate": 4.105704157267161e-06, |
|
"loss": 0.0844, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.1256038647342996, |
|
"grad_norm": 0.20408152043819427, |
|
"learning_rate": 4.083145343216242e-06, |
|
"loss": 0.0603, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.130434782608696, |
|
"grad_norm": 0.2452622503042221, |
|
"learning_rate": 4.060586529165324e-06, |
|
"loss": 0.0767, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.135265700483092, |
|
"grad_norm": 0.3027113080024719, |
|
"learning_rate": 4.038027715114405e-06, |
|
"loss": 0.0716, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.140096618357488, |
|
"grad_norm": 0.23567864298820496, |
|
"learning_rate": 4.0154689010634865e-06, |
|
"loss": 0.0845, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 2.1449275362318843, |
|
"grad_norm": 0.28407129645347595, |
|
"learning_rate": 3.9929100870125685e-06, |
|
"loss": 0.0784, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.14975845410628, |
|
"grad_norm": 0.28088170289993286, |
|
"learning_rate": 3.9703512729616505e-06, |
|
"loss": 0.0771, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.154589371980676, |
|
"grad_norm": 0.3641108274459839, |
|
"learning_rate": 3.947792458910732e-06, |
|
"loss": 0.0791, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.1594202898550723, |
|
"grad_norm": 0.23423610627651215, |
|
"learning_rate": 3.925233644859813e-06, |
|
"loss": 0.0735, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 2.1642512077294684, |
|
"grad_norm": 0.21887804567813873, |
|
"learning_rate": 3.902674830808895e-06, |
|
"loss": 0.0795, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.1690821256038646, |
|
"grad_norm": 0.24810364842414856, |
|
"learning_rate": 3.880116016757976e-06, |
|
"loss": 0.076, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.217853844165802, |
|
"learning_rate": 3.857557202707058e-06, |
|
"loss": 0.0794, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.178743961352657, |
|
"grad_norm": 0.28543898463249207, |
|
"learning_rate": 3.834998388656139e-06, |
|
"loss": 0.0707, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 2.183574879227053, |
|
"grad_norm": 0.2932458221912384, |
|
"learning_rate": 3.81243957460522e-06, |
|
"loss": 0.0715, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.1884057971014492, |
|
"grad_norm": 0.3077555000782013, |
|
"learning_rate": 3.789880760554302e-06, |
|
"loss": 0.0756, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 2.1932367149758454, |
|
"grad_norm": 0.295901358127594, |
|
"learning_rate": 3.7673219465033837e-06, |
|
"loss": 0.0785, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.1980676328502415, |
|
"grad_norm": 0.2174501270055771, |
|
"learning_rate": 3.7447631324524653e-06, |
|
"loss": 0.0578, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.2028985507246377, |
|
"grad_norm": 0.2652744948863983, |
|
"learning_rate": 3.7222043184015473e-06, |
|
"loss": 0.0579, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.207729468599034, |
|
"grad_norm": 0.34323185682296753, |
|
"learning_rate": 3.699645504350628e-06, |
|
"loss": 0.072, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 2.21256038647343, |
|
"grad_norm": 0.3072277903556824, |
|
"learning_rate": 3.6770866902997096e-06, |
|
"loss": 0.0676, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.217391304347826, |
|
"grad_norm": 0.27712109684944153, |
|
"learning_rate": 3.6545278762487916e-06, |
|
"loss": 0.0699, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.2862177789211273, |
|
"learning_rate": 3.631969062197873e-06, |
|
"loss": 0.0643, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.2270531400966185, |
|
"grad_norm": 0.2914809286594391, |
|
"learning_rate": 3.6094102481469547e-06, |
|
"loss": 0.0702, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 2.2318840579710146, |
|
"grad_norm": 0.19755889475345612, |
|
"learning_rate": 3.586851434096036e-06, |
|
"loss": 0.0817, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.236714975845411, |
|
"grad_norm": 0.25922340154647827, |
|
"learning_rate": 3.5642926200451175e-06, |
|
"loss": 0.0602, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 2.241545893719807, |
|
"grad_norm": 0.30358242988586426, |
|
"learning_rate": 3.541733805994199e-06, |
|
"loss": 0.0725, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.246376811594203, |
|
"grad_norm": 0.2505339980125427, |
|
"learning_rate": 3.519174991943281e-06, |
|
"loss": 0.079, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.2512077294685993, |
|
"grad_norm": 0.2911323308944702, |
|
"learning_rate": 3.496616177892362e-06, |
|
"loss": 0.0673, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.2560386473429954, |
|
"grad_norm": 0.3253360092639923, |
|
"learning_rate": 3.4740573638414437e-06, |
|
"loss": 0.0776, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 2.260869565217391, |
|
"grad_norm": 0.2546384036540985, |
|
"learning_rate": 3.4514985497905253e-06, |
|
"loss": 0.0689, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.2657004830917873, |
|
"grad_norm": 0.29095250368118286, |
|
"learning_rate": 3.428939735739607e-06, |
|
"loss": 0.0812, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.2705314009661834, |
|
"grad_norm": 0.29789912700653076, |
|
"learning_rate": 3.4063809216886884e-06, |
|
"loss": 0.087, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.2753623188405796, |
|
"grad_norm": 0.23721112310886383, |
|
"learning_rate": 3.38382210763777e-06, |
|
"loss": 0.075, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 2.2801932367149758, |
|
"grad_norm": 0.2618652284145355, |
|
"learning_rate": 3.3612632935868516e-06, |
|
"loss": 0.0781, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.285024154589372, |
|
"grad_norm": 0.3185523748397827, |
|
"learning_rate": 3.3387044795359327e-06, |
|
"loss": 0.0865, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 2.289855072463768, |
|
"grad_norm": 0.30211564898490906, |
|
"learning_rate": 3.3161456654850143e-06, |
|
"loss": 0.0755, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.2946859903381642, |
|
"grad_norm": 0.18218393623828888, |
|
"learning_rate": 3.293586851434096e-06, |
|
"loss": 0.0695, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.2995169082125604, |
|
"grad_norm": 0.20001597702503204, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"loss": 0.0744, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.3043478260869565, |
|
"grad_norm": 0.37984150648117065, |
|
"learning_rate": 3.248469223332259e-06, |
|
"loss": 0.0585, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 2.3091787439613527, |
|
"grad_norm": 0.31228166818618774, |
|
"learning_rate": 3.2259104092813406e-06, |
|
"loss": 0.0731, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.314009661835749, |
|
"grad_norm": 0.27851906418800354, |
|
"learning_rate": 3.203351595230422e-06, |
|
"loss": 0.0767, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 2.318840579710145, |
|
"grad_norm": 0.22976937890052795, |
|
"learning_rate": 3.1807927811795033e-06, |
|
"loss": 0.0738, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.323671497584541, |
|
"grad_norm": 0.24843037128448486, |
|
"learning_rate": 3.1582339671285853e-06, |
|
"loss": 0.0792, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 2.3285024154589373, |
|
"grad_norm": 0.23123487830162048, |
|
"learning_rate": 3.135675153077667e-06, |
|
"loss": 0.0752, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.23363561928272247, |
|
"learning_rate": 3.113116339026748e-06, |
|
"loss": 0.0693, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 2.3381642512077296, |
|
"grad_norm": 0.2371598780155182, |
|
"learning_rate": 3.09055752497583e-06, |
|
"loss": 0.0781, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.342995169082126, |
|
"grad_norm": 0.320534884929657, |
|
"learning_rate": 3.067998710924911e-06, |
|
"loss": 0.0635, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"grad_norm": 0.2920200824737549, |
|
"learning_rate": 3.0454398968739927e-06, |
|
"loss": 0.0771, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.3526570048309177, |
|
"grad_norm": 0.32089921832084656, |
|
"learning_rate": 3.0228810828230747e-06, |
|
"loss": 0.0733, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 2.357487922705314, |
|
"grad_norm": 0.2733156979084015, |
|
"learning_rate": 3.000322268772156e-06, |
|
"loss": 0.0681, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.36231884057971, |
|
"grad_norm": 0.24564507603645325, |
|
"learning_rate": 2.9777634547212374e-06, |
|
"loss": 0.0771, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 2.367149758454106, |
|
"grad_norm": 0.24026136100292206, |
|
"learning_rate": 2.955204640670319e-06, |
|
"loss": 0.0748, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.3719806763285023, |
|
"grad_norm": 0.20703287422657013, |
|
"learning_rate": 2.9326458266194006e-06, |
|
"loss": 0.0688, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 2.3768115942028984, |
|
"grad_norm": 0.18269629776477814, |
|
"learning_rate": 2.910087012568482e-06, |
|
"loss": 0.0728, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.3816425120772946, |
|
"grad_norm": 0.3421408236026764, |
|
"learning_rate": 2.8875281985175637e-06, |
|
"loss": 0.0679, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 2.3864734299516908, |
|
"grad_norm": 0.4087986350059509, |
|
"learning_rate": 2.8649693844666453e-06, |
|
"loss": 0.0791, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.391304347826087, |
|
"grad_norm": 0.2629115879535675, |
|
"learning_rate": 2.8424105704157264e-06, |
|
"loss": 0.074, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.396135265700483, |
|
"grad_norm": 0.2295183390378952, |
|
"learning_rate": 2.8198517563648084e-06, |
|
"loss": 0.0739, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.4009661835748792, |
|
"grad_norm": 0.31765657663345337, |
|
"learning_rate": 2.79729294231389e-06, |
|
"loss": 0.0708, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 2.4057971014492754, |
|
"grad_norm": 0.31528520584106445, |
|
"learning_rate": 2.774734128262971e-06, |
|
"loss": 0.0673, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.4106280193236715, |
|
"grad_norm": 0.2358902543783188, |
|
"learning_rate": 2.752175314212053e-06, |
|
"loss": 0.0543, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 2.4154589371980677, |
|
"grad_norm": 0.2725466787815094, |
|
"learning_rate": 2.7296165001611343e-06, |
|
"loss": 0.0703, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.420289855072464, |
|
"grad_norm": 0.24531903862953186, |
|
"learning_rate": 2.707057686110216e-06, |
|
"loss": 0.0715, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 2.42512077294686, |
|
"grad_norm": 0.29307085275650024, |
|
"learning_rate": 2.6844988720592974e-06, |
|
"loss": 0.0752, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.429951690821256, |
|
"grad_norm": 0.2959176003932953, |
|
"learning_rate": 2.661940058008379e-06, |
|
"loss": 0.0685, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 2.4347826086956523, |
|
"grad_norm": 0.2573854923248291, |
|
"learning_rate": 2.6393812439574605e-06, |
|
"loss": 0.0664, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.4396135265700485, |
|
"grad_norm": 0.3154689371585846, |
|
"learning_rate": 2.616822429906542e-06, |
|
"loss": 0.0615, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.21446138620376587, |
|
"learning_rate": 2.5942636158556237e-06, |
|
"loss": 0.0635, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.449275362318841, |
|
"grad_norm": 0.3040371537208557, |
|
"learning_rate": 2.571704801804705e-06, |
|
"loss": 0.0788, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 2.454106280193237, |
|
"grad_norm": 0.2636314034461975, |
|
"learning_rate": 2.5491459877537864e-06, |
|
"loss": 0.072, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.4589371980676327, |
|
"grad_norm": 0.26327863335609436, |
|
"learning_rate": 2.5265871737028684e-06, |
|
"loss": 0.0777, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 2.463768115942029, |
|
"grad_norm": 0.28980839252471924, |
|
"learning_rate": 2.5040283596519495e-06, |
|
"loss": 0.0694, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.468599033816425, |
|
"grad_norm": 0.2889906167984009, |
|
"learning_rate": 2.481469545601031e-06, |
|
"loss": 0.0703, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 2.473429951690821, |
|
"grad_norm": 0.2539612650871277, |
|
"learning_rate": 2.4589107315501127e-06, |
|
"loss": 0.0894, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.4782608695652173, |
|
"grad_norm": 0.25100603699684143, |
|
"learning_rate": 2.4363519174991943e-06, |
|
"loss": 0.0649, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 2.4830917874396135, |
|
"grad_norm": 0.24855615198612213, |
|
"learning_rate": 2.413793103448276e-06, |
|
"loss": 0.0687, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.4879227053140096, |
|
"grad_norm": 0.2766883671283722, |
|
"learning_rate": 2.3912342893973574e-06, |
|
"loss": 0.0712, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.4927536231884058, |
|
"grad_norm": 0.24230973422527313, |
|
"learning_rate": 2.368675475346439e-06, |
|
"loss": 0.0792, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.497584541062802, |
|
"grad_norm": 0.2981168031692505, |
|
"learning_rate": 2.34611666129552e-06, |
|
"loss": 0.0724, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 2.502415458937198, |
|
"grad_norm": 0.26249799132347107, |
|
"learning_rate": 2.323557847244602e-06, |
|
"loss": 0.0727, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.5072463768115942, |
|
"grad_norm": 0.23193541169166565, |
|
"learning_rate": 2.3009990331936837e-06, |
|
"loss": 0.0658, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 2.5120772946859904, |
|
"grad_norm": 0.3478648364543915, |
|
"learning_rate": 2.278440219142765e-06, |
|
"loss": 0.0766, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.5169082125603865, |
|
"grad_norm": 0.2009768933057785, |
|
"learning_rate": 2.255881405091847e-06, |
|
"loss": 0.0735, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 2.5217391304347827, |
|
"grad_norm": 0.2750122547149658, |
|
"learning_rate": 2.233322591040928e-06, |
|
"loss": 0.0778, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.526570048309179, |
|
"grad_norm": 0.22165286540985107, |
|
"learning_rate": 2.2107637769900095e-06, |
|
"loss": 0.0656, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 2.531400966183575, |
|
"grad_norm": 0.26584914326667786, |
|
"learning_rate": 2.1882049629390915e-06, |
|
"loss": 0.0723, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.536231884057971, |
|
"grad_norm": 0.30248183012008667, |
|
"learning_rate": 2.1656461488881727e-06, |
|
"loss": 0.0647, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.541062801932367, |
|
"grad_norm": 0.2667482793331146, |
|
"learning_rate": 2.1430873348372542e-06, |
|
"loss": 0.0694, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.545893719806763, |
|
"grad_norm": 0.2767150402069092, |
|
"learning_rate": 2.120528520786336e-06, |
|
"loss": 0.0818, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 2.550724637681159, |
|
"grad_norm": 0.30463531613349915, |
|
"learning_rate": 2.0979697067354174e-06, |
|
"loss": 0.0684, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 0.2667052447795868, |
|
"learning_rate": 2.0754108926844985e-06, |
|
"loss": 0.068, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 2.5603864734299515, |
|
"grad_norm": 0.37567076086997986, |
|
"learning_rate": 2.0528520786335805e-06, |
|
"loss": 0.0578, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.5652173913043477, |
|
"grad_norm": 0.24227222800254822, |
|
"learning_rate": 2.030293264582662e-06, |
|
"loss": 0.0748, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 2.570048309178744, |
|
"grad_norm": 0.3247409760951996, |
|
"learning_rate": 2.0077344505317432e-06, |
|
"loss": 0.073, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.57487922705314, |
|
"grad_norm": 0.30261141061782837, |
|
"learning_rate": 1.9851756364808252e-06, |
|
"loss": 0.0722, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 2.579710144927536, |
|
"grad_norm": 0.2872192859649658, |
|
"learning_rate": 1.9626168224299064e-06, |
|
"loss": 0.0728, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.5845410628019323, |
|
"grad_norm": 0.3606136441230774, |
|
"learning_rate": 1.940058008378988e-06, |
|
"loss": 0.0735, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.5893719806763285, |
|
"grad_norm": 0.21871723234653473, |
|
"learning_rate": 1.9174991943280695e-06, |
|
"loss": 0.0682, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.5942028985507246, |
|
"grad_norm": 0.2941882312297821, |
|
"learning_rate": 1.894940380277151e-06, |
|
"loss": 0.0722, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 2.5990338164251208, |
|
"grad_norm": 0.31706181168556213, |
|
"learning_rate": 1.8723815662262327e-06, |
|
"loss": 0.0698, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.603864734299517, |
|
"grad_norm": 0.25599217414855957, |
|
"learning_rate": 1.849822752175314e-06, |
|
"loss": 0.0691, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 0.2954462468624115, |
|
"learning_rate": 1.8272639381243958e-06, |
|
"loss": 0.0831, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.6135265700483092, |
|
"grad_norm": 0.31768399477005005, |
|
"learning_rate": 1.8047051240734774e-06, |
|
"loss": 0.0684, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 2.6183574879227054, |
|
"grad_norm": 0.2380971759557724, |
|
"learning_rate": 1.7821463100225587e-06, |
|
"loss": 0.0604, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.6231884057971016, |
|
"grad_norm": 0.2857172191143036, |
|
"learning_rate": 1.7595874959716405e-06, |
|
"loss": 0.0648, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 2.6280193236714977, |
|
"grad_norm": 0.2866944968700409, |
|
"learning_rate": 1.7370286819207219e-06, |
|
"loss": 0.067, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.632850241545894, |
|
"grad_norm": 0.3259107172489166, |
|
"learning_rate": 1.7144698678698034e-06, |
|
"loss": 0.0789, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.63768115942029, |
|
"grad_norm": 0.23563902080059052, |
|
"learning_rate": 1.691911053818885e-06, |
|
"loss": 0.0826, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.642512077294686, |
|
"grad_norm": 0.33754512667655945, |
|
"learning_rate": 1.6693522397679664e-06, |
|
"loss": 0.0756, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 2.6473429951690823, |
|
"grad_norm": 0.22349333763122559, |
|
"learning_rate": 1.646793425717048e-06, |
|
"loss": 0.0773, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.6521739130434785, |
|
"grad_norm": 0.42616990208625793, |
|
"learning_rate": 1.6242346116661295e-06, |
|
"loss": 0.0676, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 2.6570048309178746, |
|
"grad_norm": 0.27920448780059814, |
|
"learning_rate": 1.601675797615211e-06, |
|
"loss": 0.07, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.661835748792271, |
|
"grad_norm": 0.34114235639572144, |
|
"learning_rate": 1.5791169835642926e-06, |
|
"loss": 0.0807, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.2515537142753601, |
|
"learning_rate": 1.556558169513374e-06, |
|
"loss": 0.0739, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.6714975845410627, |
|
"grad_norm": 0.24267147481441498, |
|
"learning_rate": 1.5339993554624556e-06, |
|
"loss": 0.0727, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 2.676328502415459, |
|
"grad_norm": 0.290988564491272, |
|
"learning_rate": 1.5114405414115374e-06, |
|
"loss": 0.0739, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.681159420289855, |
|
"grad_norm": 0.3821360766887665, |
|
"learning_rate": 1.4888817273606187e-06, |
|
"loss": 0.0792, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.685990338164251, |
|
"grad_norm": 0.284109890460968, |
|
"learning_rate": 1.4663229133097003e-06, |
|
"loss": 0.0767, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.6908212560386473, |
|
"grad_norm": 0.303076833486557, |
|
"learning_rate": 1.4437640992587819e-06, |
|
"loss": 0.0714, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 2.6956521739130435, |
|
"grad_norm": 0.37678495049476624, |
|
"learning_rate": 1.4212052852078632e-06, |
|
"loss": 0.0555, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.7004830917874396, |
|
"grad_norm": 0.23108994960784912, |
|
"learning_rate": 1.398646471156945e-06, |
|
"loss": 0.0833, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 2.7053140096618358, |
|
"grad_norm": 0.3246385157108307, |
|
"learning_rate": 1.3760876571060266e-06, |
|
"loss": 0.076, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.710144927536232, |
|
"grad_norm": 0.2140025794506073, |
|
"learning_rate": 1.353528843055108e-06, |
|
"loss": 0.0791, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 2.714975845410628, |
|
"grad_norm": 0.2923656404018402, |
|
"learning_rate": 1.3309700290041895e-06, |
|
"loss": 0.0892, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.7198067632850242, |
|
"grad_norm": 0.2978055775165558, |
|
"learning_rate": 1.308411214953271e-06, |
|
"loss": 0.0647, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 2.7246376811594204, |
|
"grad_norm": 0.2982514500617981, |
|
"learning_rate": 1.2858524009023524e-06, |
|
"loss": 0.0677, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.7294685990338166, |
|
"grad_norm": 0.2721270024776459, |
|
"learning_rate": 1.2632935868514342e-06, |
|
"loss": 0.0633, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.7342995169082127, |
|
"grad_norm": 0.2582114636898041, |
|
"learning_rate": 1.2407347728005156e-06, |
|
"loss": 0.0721, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.7391304347826084, |
|
"grad_norm": 0.2242422103881836, |
|
"learning_rate": 1.2181759587495971e-06, |
|
"loss": 0.0694, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 2.7439613526570046, |
|
"grad_norm": 0.2729090750217438, |
|
"learning_rate": 1.1956171446986787e-06, |
|
"loss": 0.0726, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.7487922705314007, |
|
"grad_norm": 0.34203121066093445, |
|
"learning_rate": 1.17305833064776e-06, |
|
"loss": 0.0796, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 2.753623188405797, |
|
"grad_norm": 0.30749765038490295, |
|
"learning_rate": 1.1504995165968418e-06, |
|
"loss": 0.07, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.758454106280193, |
|
"grad_norm": 0.3750080168247223, |
|
"learning_rate": 1.1279407025459234e-06, |
|
"loss": 0.08, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 2.763285024154589, |
|
"grad_norm": 0.32321617007255554, |
|
"learning_rate": 1.1053818884950048e-06, |
|
"loss": 0.082, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.7681159420289854, |
|
"grad_norm": 0.25304415822029114, |
|
"learning_rate": 1.0828230744440863e-06, |
|
"loss": 0.076, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 2.7729468599033815, |
|
"grad_norm": 0.30696550011634827, |
|
"learning_rate": 1.060264260393168e-06, |
|
"loss": 0.0703, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 0.3218288719654083, |
|
"learning_rate": 1.0377054463422493e-06, |
|
"loss": 0.0696, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.782608695652174, |
|
"grad_norm": 0.2573774755001068, |
|
"learning_rate": 1.015146632291331e-06, |
|
"loss": 0.0711, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.78743961352657, |
|
"grad_norm": 0.3438413143157959, |
|
"learning_rate": 9.925878182404126e-07, |
|
"loss": 0.0805, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 2.792270531400966, |
|
"grad_norm": 0.3613496422767639, |
|
"learning_rate": 9.70029004189494e-07, |
|
"loss": 0.0742, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.7971014492753623, |
|
"grad_norm": 0.2860325276851654, |
|
"learning_rate": 9.474701901385755e-07, |
|
"loss": 0.0735, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 2.8019323671497585, |
|
"grad_norm": 0.240507572889328, |
|
"learning_rate": 9.24911376087657e-07, |
|
"loss": 0.0677, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.8067632850241546, |
|
"grad_norm": 0.28737547993659973, |
|
"learning_rate": 9.023525620367387e-07, |
|
"loss": 0.0666, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 2.8115942028985508, |
|
"grad_norm": 0.34197041392326355, |
|
"learning_rate": 8.797937479858203e-07, |
|
"loss": 0.0799, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.816425120772947, |
|
"grad_norm": 0.326251745223999, |
|
"learning_rate": 8.572349339349017e-07, |
|
"loss": 0.0691, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 2.821256038647343, |
|
"grad_norm": 0.42289331555366516, |
|
"learning_rate": 8.346761198839832e-07, |
|
"loss": 0.0746, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.8260869565217392, |
|
"grad_norm": 0.28735774755477905, |
|
"learning_rate": 8.121173058330648e-07, |
|
"loss": 0.0782, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.8309178743961354, |
|
"grad_norm": 0.29395702481269836, |
|
"learning_rate": 7.895584917821463e-07, |
|
"loss": 0.08, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.8357487922705316, |
|
"grad_norm": 0.3306836187839508, |
|
"learning_rate": 7.669996777312278e-07, |
|
"loss": 0.0869, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 2.8405797101449277, |
|
"grad_norm": 0.2740659713745117, |
|
"learning_rate": 7.444408636803094e-07, |
|
"loss": 0.064, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.845410628019324, |
|
"grad_norm": 0.28304237127304077, |
|
"learning_rate": 7.218820496293909e-07, |
|
"loss": 0.0769, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 2.85024154589372, |
|
"grad_norm": 0.3081373870372772, |
|
"learning_rate": 6.993232355784725e-07, |
|
"loss": 0.0783, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.855072463768116, |
|
"grad_norm": 0.3063504099845886, |
|
"learning_rate": 6.76764421527554e-07, |
|
"loss": 0.0643, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 2.8599033816425123, |
|
"grad_norm": 0.2641620635986328, |
|
"learning_rate": 6.542056074766355e-07, |
|
"loss": 0.0658, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.864734299516908, |
|
"grad_norm": 0.3239176869392395, |
|
"learning_rate": 6.316467934257171e-07, |
|
"loss": 0.0677, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 2.869565217391304, |
|
"grad_norm": 0.23815782368183136, |
|
"learning_rate": 6.090879793747986e-07, |
|
"loss": 0.0686, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.8743961352657004, |
|
"grad_norm": 0.26518934965133667, |
|
"learning_rate": 5.8652916532388e-07, |
|
"loss": 0.073, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.8792270531400965, |
|
"grad_norm": 0.2455345243215561, |
|
"learning_rate": 5.639703512729617e-07, |
|
"loss": 0.0664, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.8840579710144927, |
|
"grad_norm": 0.2730591893196106, |
|
"learning_rate": 5.414115372220432e-07, |
|
"loss": 0.0745, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.3046686351299286, |
|
"learning_rate": 5.188527231711246e-07, |
|
"loss": 0.0637, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.893719806763285, |
|
"grad_norm": 0.26765045523643494, |
|
"learning_rate": 4.962939091202063e-07, |
|
"loss": 0.0818, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"grad_norm": 0.2611401677131653, |
|
"learning_rate": 4.7373509506928777e-07, |
|
"loss": 0.0871, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.9033816425120773, |
|
"grad_norm": 0.3256029486656189, |
|
"learning_rate": 4.5117628101836934e-07, |
|
"loss": 0.0772, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 2.9082125603864735, |
|
"grad_norm": 0.3779186010360718, |
|
"learning_rate": 4.2861746696745086e-07, |
|
"loss": 0.0709, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.9130434782608696, |
|
"grad_norm": 0.248891681432724, |
|
"learning_rate": 4.060586529165324e-07, |
|
"loss": 0.0836, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 2.917874396135266, |
|
"grad_norm": 0.27647843956947327, |
|
"learning_rate": 3.834998388656139e-07, |
|
"loss": 0.0636, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.922705314009662, |
|
"grad_norm": 0.28876233100891113, |
|
"learning_rate": 3.6094102481469546e-07, |
|
"loss": 0.0648, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.927536231884058, |
|
"grad_norm": 0.26836660504341125, |
|
"learning_rate": 3.38382210763777e-07, |
|
"loss": 0.0726, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.9323671497584543, |
|
"grad_norm": 0.2655857503414154, |
|
"learning_rate": 3.1582339671285855e-07, |
|
"loss": 0.0736, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 2.9371980676328504, |
|
"grad_norm": 0.30681997537612915, |
|
"learning_rate": 2.9326458266194e-07, |
|
"loss": 0.0688, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.942028985507246, |
|
"grad_norm": 0.3034045994281769, |
|
"learning_rate": 2.707057686110216e-07, |
|
"loss": 0.0611, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 2.9468599033816423, |
|
"grad_norm": 0.24807259440422058, |
|
"learning_rate": 2.4814695456010315e-07, |
|
"loss": 0.0782, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.9516908212560384, |
|
"grad_norm": 0.34220463037490845, |
|
"learning_rate": 2.2558814050918467e-07, |
|
"loss": 0.0751, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 2.9565217391304346, |
|
"grad_norm": 0.2882407009601593, |
|
"learning_rate": 2.030293264582662e-07, |
|
"loss": 0.0686, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.9613526570048307, |
|
"grad_norm": 0.31148266792297363, |
|
"learning_rate": 1.8047051240734773e-07, |
|
"loss": 0.0668, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 2.966183574879227, |
|
"grad_norm": 0.2847365736961365, |
|
"learning_rate": 1.5791169835642927e-07, |
|
"loss": 0.0785, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.971014492753623, |
|
"grad_norm": 0.2872695028781891, |
|
"learning_rate": 1.353528843055108e-07, |
|
"loss": 0.0723, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.975845410628019, |
|
"grad_norm": 0.24350111186504364, |
|
"learning_rate": 1.1279407025459234e-07, |
|
"loss": 0.0669, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.9806763285024154, |
|
"grad_norm": 0.2746003270149231, |
|
"learning_rate": 9.023525620367387e-08, |
|
"loss": 0.0814, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 2.9855072463768115, |
|
"grad_norm": 0.255521684885025, |
|
"learning_rate": 6.76764421527554e-08, |
|
"loss": 0.0774, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.9903381642512077, |
|
"grad_norm": 0.35289525985717773, |
|
"learning_rate": 4.511762810183693e-08, |
|
"loss": 0.0645, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 2.995169082125604, |
|
"grad_norm": 0.279884934425354, |
|
"learning_rate": 2.2558814050918466e-08, |
|
"loss": 0.0738, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4045591652393341, |
|
"learning_rate": 0.0, |
|
"loss": 0.0711, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_runtime": 338.6263, |
|
"eval_samples_per_second": 3.054, |
|
"eval_steps_per_second": 0.384, |
|
"step": 3105 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3105, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1255832139272192e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|