|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.996258885147775, |
|
"eval_steps": 500, |
|
"global_step": 3340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014964459408903853, |
|
"grad_norm": 0.5450117588043213, |
|
"learning_rate": 4.999972352489418e-05, |
|
"loss": 1.3208, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.029928918817807706, |
|
"grad_norm": 0.5009211301803589, |
|
"learning_rate": 4.9998894105691785e-05, |
|
"loss": 1.2903, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04489337822671156, |
|
"grad_norm": 0.45117729902267456, |
|
"learning_rate": 4.9997511760737915e-05, |
|
"loss": 1.2271, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05985783763561541, |
|
"grad_norm": 0.5625576376914978, |
|
"learning_rate": 4.999557652060729e-05, |
|
"loss": 1.186, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07482229704451927, |
|
"grad_norm": 0.519900381565094, |
|
"learning_rate": 4.999308842810357e-05, |
|
"loss": 1.1302, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08978675645342311, |
|
"grad_norm": 0.593724250793457, |
|
"learning_rate": 4.999004753825842e-05, |
|
"loss": 1.1372, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10475121586232697, |
|
"grad_norm": 0.663527250289917, |
|
"learning_rate": 4.998645391833024e-05, |
|
"loss": 1.0359, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11971567527123082, |
|
"grad_norm": 0.6744909286499023, |
|
"learning_rate": 4.9982307647802765e-05, |
|
"loss": 1.0511, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13468013468013468, |
|
"grad_norm": 0.5474820137023926, |
|
"learning_rate": 4.9977608818383226e-05, |
|
"loss": 0.9909, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14964459408903855, |
|
"grad_norm": 0.5713778734207153, |
|
"learning_rate": 4.9972357534000394e-05, |
|
"loss": 1.0139, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1646090534979424, |
|
"grad_norm": 0.6148533225059509, |
|
"learning_rate": 4.99665539108022e-05, |
|
"loss": 1.0156, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17957351290684623, |
|
"grad_norm": 0.6597582697868347, |
|
"learning_rate": 4.996019807715324e-05, |
|
"loss": 0.995, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1945379723157501, |
|
"grad_norm": 0.6145315170288086, |
|
"learning_rate": 4.9953290173631896e-05, |
|
"loss": 0.9641, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20950243172465394, |
|
"grad_norm": 0.7690613865852356, |
|
"learning_rate": 4.994583035302723e-05, |
|
"loss": 0.9934, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2244668911335578, |
|
"grad_norm": 0.7555385828018188, |
|
"learning_rate": 4.9937818780335646e-05, |
|
"loss": 0.946, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.23943135054246165, |
|
"grad_norm": 0.7178649306297302, |
|
"learning_rate": 4.992925563275714e-05, |
|
"loss": 0.91, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2543958099513655, |
|
"grad_norm": 0.7035357356071472, |
|
"learning_rate": 4.99201410996915e-05, |
|
"loss": 0.9612, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.26936026936026936, |
|
"grad_norm": 0.7382842898368835, |
|
"learning_rate": 4.9910475382734034e-05, |
|
"loss": 0.8687, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2843247287691732, |
|
"grad_norm": 0.8056157231330872, |
|
"learning_rate": 4.990025869567117e-05, |
|
"loss": 0.9038, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2992891881780771, |
|
"grad_norm": 0.9074241518974304, |
|
"learning_rate": 4.988949126447567e-05, |
|
"loss": 0.9063, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31425364758698093, |
|
"grad_norm": 0.7684347033500671, |
|
"learning_rate": 4.987817332730166e-05, |
|
"loss": 0.9065, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3292181069958848, |
|
"grad_norm": 0.8751540184020996, |
|
"learning_rate": 4.986630513447938e-05, |
|
"loss": 0.9492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3441825664047886, |
|
"grad_norm": 0.8586133718490601, |
|
"learning_rate": 4.985388694850963e-05, |
|
"loss": 0.9085, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.35914702581369246, |
|
"grad_norm": 0.7148111462593079, |
|
"learning_rate": 4.984091904405793e-05, |
|
"loss": 0.9125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37411148522259635, |
|
"grad_norm": 0.7763463258743286, |
|
"learning_rate": 4.9827401707948504e-05, |
|
"loss": 0.9019, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3890759446315002, |
|
"grad_norm": 0.9074414372444153, |
|
"learning_rate": 4.981333523915792e-05, |
|
"loss": 0.8188, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 1.0170953273773193, |
|
"learning_rate": 4.979871994880845e-05, |
|
"loss": 0.8757, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4190048634493079, |
|
"grad_norm": 0.8414100408554077, |
|
"learning_rate": 4.97835561601612e-05, |
|
"loss": 0.8711, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.43396932285821177, |
|
"grad_norm": 0.9481101036071777, |
|
"learning_rate": 4.9767844208608984e-05, |
|
"loss": 0.8371, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4489337822671156, |
|
"grad_norm": 0.8936446905136108, |
|
"learning_rate": 4.9751584441668874e-05, |
|
"loss": 0.8282, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46389824167601945, |
|
"grad_norm": 0.906244695186615, |
|
"learning_rate": 4.973477721897454e-05, |
|
"loss": 0.8702, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4788627010849233, |
|
"grad_norm": 0.9465859532356262, |
|
"learning_rate": 4.971742291226827e-05, |
|
"loss": 0.8779, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 0.752061128616333, |
|
"learning_rate": 4.969952190539276e-05, |
|
"loss": 0.8855, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.508791619902731, |
|
"grad_norm": 0.9224424958229065, |
|
"learning_rate": 4.968107459428265e-05, |
|
"loss": 0.8211, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5237560793116348, |
|
"grad_norm": 0.8061802387237549, |
|
"learning_rate": 4.9662081386955714e-05, |
|
"loss": 0.84, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5387205387205387, |
|
"grad_norm": 1.4521487951278687, |
|
"learning_rate": 4.964254270350387e-05, |
|
"loss": 0.8529, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5536849981294426, |
|
"grad_norm": 0.807750403881073, |
|
"learning_rate": 4.9622458976083885e-05, |
|
"loss": 0.8891, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5686494575383464, |
|
"grad_norm": 0.9400819540023804, |
|
"learning_rate": 4.960183064890782e-05, |
|
"loss": 0.8705, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5836139169472503, |
|
"grad_norm": 0.8715450763702393, |
|
"learning_rate": 4.958065817823318e-05, |
|
"loss": 0.8671, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5985783763561542, |
|
"grad_norm": 0.9633534550666809, |
|
"learning_rate": 4.955894203235284e-05, |
|
"loss": 0.8379, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.613542835765058, |
|
"grad_norm": 0.8975620865821838, |
|
"learning_rate": 4.953668269158472e-05, |
|
"loss": 0.8086, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6285072951739619, |
|
"grad_norm": 0.904045045375824, |
|
"learning_rate": 4.9513880648261114e-05, |
|
"loss": 0.8183, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6434717545828657, |
|
"grad_norm": 1.009617805480957, |
|
"learning_rate": 4.949053640671778e-05, |
|
"loss": 0.8557, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6584362139917695, |
|
"grad_norm": 1.1475061178207397, |
|
"learning_rate": 4.946665048328287e-05, |
|
"loss": 0.8809, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"grad_norm": 1.087480068206787, |
|
"learning_rate": 4.944222340626543e-05, |
|
"loss": 0.7887, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6883651328095772, |
|
"grad_norm": 0.9593074321746826, |
|
"learning_rate": 4.9417255715943766e-05, |
|
"loss": 0.8965, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7033295922184811, |
|
"grad_norm": 1.103148102760315, |
|
"learning_rate": 4.939174796455346e-05, |
|
"loss": 0.9189, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7182940516273849, |
|
"grad_norm": 0.9249380826950073, |
|
"learning_rate": 4.936570071627518e-05, |
|
"loss": 0.8793, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7332585110362888, |
|
"grad_norm": 1.03147554397583, |
|
"learning_rate": 4.933911454722217e-05, |
|
"loss": 0.8052, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7482229704451927, |
|
"grad_norm": 1.0135599374771118, |
|
"learning_rate": 4.9311990045427553e-05, |
|
"loss": 0.8033, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7631874298540965, |
|
"grad_norm": 1.0295403003692627, |
|
"learning_rate": 4.928432781083128e-05, |
|
"loss": 0.9045, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7781518892630004, |
|
"grad_norm": 0.9905064105987549, |
|
"learning_rate": 4.92561284552669e-05, |
|
"loss": 0.8486, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7931163486719043, |
|
"grad_norm": 0.9656111598014832, |
|
"learning_rate": 4.9227392602447996e-05, |
|
"loss": 0.8324, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 0.9249575734138489, |
|
"learning_rate": 4.91981208879544e-05, |
|
"loss": 0.8172, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.823045267489712, |
|
"grad_norm": 0.904988706111908, |
|
"learning_rate": 4.9168313959218135e-05, |
|
"loss": 0.8258, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8380097268986157, |
|
"grad_norm": 1.060915231704712, |
|
"learning_rate": 4.913797247550912e-05, |
|
"loss": 0.867, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8529741863075196, |
|
"grad_norm": 1.017268180847168, |
|
"learning_rate": 4.910709710792054e-05, |
|
"loss": 0.7974, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8679386457164235, |
|
"grad_norm": 1.0362051725387573, |
|
"learning_rate": 4.9075688539354025e-05, |
|
"loss": 0.8596, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8829031051253273, |
|
"grad_norm": 0.9945353269577026, |
|
"learning_rate": 4.904374746450459e-05, |
|
"loss": 0.8076, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8978675645342312, |
|
"grad_norm": 0.986596941947937, |
|
"learning_rate": 4.901127458984516e-05, |
|
"loss": 0.8126, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.912832023943135, |
|
"grad_norm": 1.016927719116211, |
|
"learning_rate": 4.8978270633611086e-05, |
|
"loss": 0.817, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9277964833520389, |
|
"grad_norm": 1.0122638940811157, |
|
"learning_rate": 4.8944736325784136e-05, |
|
"loss": 0.9226, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9427609427609428, |
|
"grad_norm": 1.04526948928833, |
|
"learning_rate": 4.891067240807641e-05, |
|
"loss": 0.7878, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9577254021698466, |
|
"grad_norm": 0.8837614059448242, |
|
"learning_rate": 4.887607963391394e-05, |
|
"loss": 0.8187, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9726898615787505, |
|
"grad_norm": 1.1496906280517578, |
|
"learning_rate": 4.884095876841999e-05, |
|
"loss": 0.8531, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 0.9902486205101013, |
|
"learning_rate": 4.880531058839816e-05, |
|
"loss": 0.7615, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0026187803965583, |
|
"grad_norm": 1.0682637691497803, |
|
"learning_rate": 4.87691358823152e-05, |
|
"loss": 0.8132, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.017583239805462, |
|
"grad_norm": 1.0381455421447754, |
|
"learning_rate": 4.8732435450283565e-05, |
|
"loss": 0.7877, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0325476992143658, |
|
"grad_norm": 0.9033912420272827, |
|
"learning_rate": 4.869521010404373e-05, |
|
"loss": 0.7892, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.0475121586232696, |
|
"grad_norm": 0.9539169669151306, |
|
"learning_rate": 4.86574606669462e-05, |
|
"loss": 0.8192, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0624766180321736, |
|
"grad_norm": 0.8774738907814026, |
|
"learning_rate": 4.861918797393336e-05, |
|
"loss": 0.753, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.0774410774410774, |
|
"grad_norm": 1.1601481437683105, |
|
"learning_rate": 4.8580392871520946e-05, |
|
"loss": 0.8113, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0924055368499812, |
|
"grad_norm": 0.9350395202636719, |
|
"learning_rate": 4.854107621777938e-05, |
|
"loss": 0.8731, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.1073699962588852, |
|
"grad_norm": 1.0182013511657715, |
|
"learning_rate": 4.8501238882314715e-05, |
|
"loss": 0.8649, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.122334455667789, |
|
"grad_norm": 1.158241629600525, |
|
"learning_rate": 4.84608817462495e-05, |
|
"loss": 0.82, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1372989150766928, |
|
"grad_norm": 1.0360873937606812, |
|
"learning_rate": 4.8420005702203196e-05, |
|
"loss": 0.8236, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1522633744855968, |
|
"grad_norm": 1.1079692840576172, |
|
"learning_rate": 4.83786116542725e-05, |
|
"loss": 0.7584, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.1672278338945006, |
|
"grad_norm": 1.0275564193725586, |
|
"learning_rate": 4.833670051801131e-05, |
|
"loss": 0.7847, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1821922933034044, |
|
"grad_norm": 1.1246060132980347, |
|
"learning_rate": 4.829427322041049e-05, |
|
"loss": 0.7597, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.1971567527123081, |
|
"grad_norm": 1.0290372371673584, |
|
"learning_rate": 4.825133069987737e-05, |
|
"loss": 0.733, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 1.1085125207901, |
|
"learning_rate": 4.820787390621499e-05, |
|
"loss": 0.7729, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.227085671530116, |
|
"grad_norm": 1.2001007795333862, |
|
"learning_rate": 4.816390380060108e-05, |
|
"loss": 0.769, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.24205013093902, |
|
"grad_norm": 1.1357756853103638, |
|
"learning_rate": 4.8119421355566796e-05, |
|
"loss": 0.8017, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2570145903479237, |
|
"grad_norm": 1.0524897575378418, |
|
"learning_rate": 4.807442755497524e-05, |
|
"loss": 0.7916, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2719790497568275, |
|
"grad_norm": 1.1913483142852783, |
|
"learning_rate": 4.802892339399967e-05, |
|
"loss": 0.8058, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2869435091657313, |
|
"grad_norm": 1.2256290912628174, |
|
"learning_rate": 4.7982909879101515e-05, |
|
"loss": 0.8267, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.3019079685746353, |
|
"grad_norm": 1.0073180198669434, |
|
"learning_rate": 4.7936388028008084e-05, |
|
"loss": 0.8316, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.316872427983539, |
|
"grad_norm": 1.030885934829712, |
|
"learning_rate": 4.7889358869690056e-05, |
|
"loss": 0.7874, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3318368873924429, |
|
"grad_norm": 1.1515103578567505, |
|
"learning_rate": 4.784182344433878e-05, |
|
"loss": 0.8268, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.3468013468013469, |
|
"grad_norm": 0.8818157315254211, |
|
"learning_rate": 4.779378280334318e-05, |
|
"loss": 0.8366, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3617658062102507, |
|
"grad_norm": 0.9439221024513245, |
|
"learning_rate": 4.7745238009266556e-05, |
|
"loss": 0.8279, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.3767302656191545, |
|
"grad_norm": 0.9476526379585266, |
|
"learning_rate": 4.7696190135823094e-05, |
|
"loss": 0.807, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3916947250280582, |
|
"grad_norm": 0.9290974736213684, |
|
"learning_rate": 4.764664026785405e-05, |
|
"loss": 0.8259, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.4066591844369623, |
|
"grad_norm": 1.007653832435608, |
|
"learning_rate": 4.759658950130385e-05, |
|
"loss": 0.8344, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.421623643845866, |
|
"grad_norm": 1.1456594467163086, |
|
"learning_rate": 4.7546038943195736e-05, |
|
"loss": 0.7565, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.43658810325477, |
|
"grad_norm": 1.0801887512207031, |
|
"learning_rate": 4.749498971160742e-05, |
|
"loss": 0.7771, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4515525626636738, |
|
"grad_norm": 0.9851332306861877, |
|
"learning_rate": 4.744344293564621e-05, |
|
"loss": 0.7803, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.4665170220725776, |
|
"grad_norm": 1.1050950288772583, |
|
"learning_rate": 4.739139975542415e-05, |
|
"loss": 0.8118, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 1.030402421951294, |
|
"learning_rate": 4.7338861322032726e-05, |
|
"loss": 0.849, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4964459408903854, |
|
"grad_norm": 0.9448444843292236, |
|
"learning_rate": 4.7285828797517465e-05, |
|
"loss": 0.7255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5114104002992892, |
|
"grad_norm": 1.0439302921295166, |
|
"learning_rate": 4.723230335485218e-05, |
|
"loss": 0.7413, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.5263748597081932, |
|
"grad_norm": 1.234944462776184, |
|
"learning_rate": 4.717828617791308e-05, |
|
"loss": 0.7648, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.541339319117097, |
|
"grad_norm": 1.1755177974700928, |
|
"learning_rate": 4.7123778461452536e-05, |
|
"loss": 0.7203, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.5563037785260008, |
|
"grad_norm": 0.9818833470344543, |
|
"learning_rate": 4.7068781411072686e-05, |
|
"loss": 0.7813, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5712682379349046, |
|
"grad_norm": 1.286037802696228, |
|
"learning_rate": 4.7013296243198746e-05, |
|
"loss": 0.8098, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.5862326973438083, |
|
"grad_norm": 1.2840811014175415, |
|
"learning_rate": 4.695732418505214e-05, |
|
"loss": 0.7752, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.6011971567527123, |
|
"grad_norm": 0.9121096730232239, |
|
"learning_rate": 4.690086647462331e-05, |
|
"loss": 0.8124, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.6161616161616161, |
|
"grad_norm": 1.1097527742385864, |
|
"learning_rate": 4.684392436064439e-05, |
|
"loss": 0.8453, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6311260755705201, |
|
"grad_norm": 1.0087509155273438, |
|
"learning_rate": 4.678649910256152e-05, |
|
"loss": 0.7736, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.646090534979424, |
|
"grad_norm": 1.028320074081421, |
|
"learning_rate": 4.6728591970507055e-05, |
|
"loss": 0.8248, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6610549943883277, |
|
"grad_norm": 1.2182048559188843, |
|
"learning_rate": 4.6670204245271444e-05, |
|
"loss": 0.7903, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.6760194537972315, |
|
"grad_norm": 1.0399212837219238, |
|
"learning_rate": 4.661133721827486e-05, |
|
"loss": 0.7495, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6909839132061353, |
|
"grad_norm": 1.0186119079589844, |
|
"learning_rate": 4.655199219153873e-05, |
|
"loss": 0.76, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.7059483726150393, |
|
"grad_norm": 1.2510963678359985, |
|
"learning_rate": 4.649217047765685e-05, |
|
"loss": 0.7618, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7209128320239433, |
|
"grad_norm": 1.3667418956756592, |
|
"learning_rate": 4.643187339976639e-05, |
|
"loss": 0.8169, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.735877291432847, |
|
"grad_norm": 1.1502622365951538, |
|
"learning_rate": 4.637110229151863e-05, |
|
"loss": 0.8384, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.7508417508417509, |
|
"grad_norm": 0.9805833697319031, |
|
"learning_rate": 4.6309858497049464e-05, |
|
"loss": 0.757, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.7658062102506547, |
|
"grad_norm": 1.111222743988037, |
|
"learning_rate": 4.6248143370949636e-05, |
|
"loss": 0.8712, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7807706696595584, |
|
"grad_norm": 1.0927162170410156, |
|
"learning_rate": 4.618595827823486e-05, |
|
"loss": 0.8009, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.7957351290684624, |
|
"grad_norm": 1.218826174736023, |
|
"learning_rate": 4.612330459431552e-05, |
|
"loss": 0.8323, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8106995884773662, |
|
"grad_norm": 1.233129858970642, |
|
"learning_rate": 4.606018370496633e-05, |
|
"loss": 0.7373, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.8256640478862702, |
|
"grad_norm": 0.9207433462142944, |
|
"learning_rate": 4.5996597006295655e-05, |
|
"loss": 0.7533, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.840628507295174, |
|
"grad_norm": 1.1917186975479126, |
|
"learning_rate": 4.593254590471464e-05, |
|
"loss": 0.7831, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.8555929667040778, |
|
"grad_norm": 0.9462292194366455, |
|
"learning_rate": 4.586803181690609e-05, |
|
"loss": 0.7733, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8705574261129816, |
|
"grad_norm": 0.9503220915794373, |
|
"learning_rate": 4.580305616979314e-05, |
|
"loss": 0.8178, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.8855218855218854, |
|
"grad_norm": 0.8902071118354797, |
|
"learning_rate": 4.573762040050772e-05, |
|
"loss": 0.8028, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.9004863449307894, |
|
"grad_norm": 1.1471889019012451, |
|
"learning_rate": 4.567172595635871e-05, |
|
"loss": 0.7499, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.9154508043396934, |
|
"grad_norm": 1.063602328300476, |
|
"learning_rate": 4.560537429479998e-05, |
|
"loss": 0.7516, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9304152637485972, |
|
"grad_norm": 0.9486026763916016, |
|
"learning_rate": 4.553856688339817e-05, |
|
"loss": 0.7598, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.945379723157501, |
|
"grad_norm": 0.9863812923431396, |
|
"learning_rate": 4.547130519980014e-05, |
|
"loss": 0.8039, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9603441825664047, |
|
"grad_norm": 1.0052098035812378, |
|
"learning_rate": 4.54035907317004e-05, |
|
"loss": 0.7574, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"grad_norm": 1.0039119720458984, |
|
"learning_rate": 4.533542497680812e-05, |
|
"loss": 0.7594, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9902731013842125, |
|
"grad_norm": 1.0683871507644653, |
|
"learning_rate": 4.5266809442814035e-05, |
|
"loss": 0.7489, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.0052375607931165, |
|
"grad_norm": 0.964434802532196, |
|
"learning_rate": 4.519774564735711e-05, |
|
"loss": 0.7376, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"grad_norm": 1.0128512382507324, |
|
"learning_rate": 4.512823511799098e-05, |
|
"loss": 0.7275, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.035166479610924, |
|
"grad_norm": 1.0383211374282837, |
|
"learning_rate": 4.5058279392150096e-05, |
|
"loss": 0.749, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.050130939019828, |
|
"grad_norm": 0.9630417227745056, |
|
"learning_rate": 4.4987880017115793e-05, |
|
"loss": 0.7563, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.0650953984287317, |
|
"grad_norm": 1.2037107944488525, |
|
"learning_rate": 4.491703854998207e-05, |
|
"loss": 0.7426, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0800598578376355, |
|
"grad_norm": 1.144274353981018, |
|
"learning_rate": 4.484575655762107e-05, |
|
"loss": 0.7323, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.0950243172465393, |
|
"grad_norm": 1.2053053379058838, |
|
"learning_rate": 4.477403561664852e-05, |
|
"loss": 0.7474, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.1099887766554435, |
|
"grad_norm": 1.039339542388916, |
|
"learning_rate": 4.4701877313388784e-05, |
|
"loss": 0.766, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.1249532360643473, |
|
"grad_norm": 1.0573480129241943, |
|
"learning_rate": 4.462928324383985e-05, |
|
"loss": 0.8314, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.139917695473251, |
|
"grad_norm": 1.2173911333084106, |
|
"learning_rate": 4.455625501363794e-05, |
|
"loss": 0.7388, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.154882154882155, |
|
"grad_norm": 1.1535879373550415, |
|
"learning_rate": 4.448279423802207e-05, |
|
"loss": 0.7698, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.1698466142910586, |
|
"grad_norm": 0.985844075679779, |
|
"learning_rate": 4.44089025417983e-05, |
|
"loss": 0.7692, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.1848110736999624, |
|
"grad_norm": 1.0249568223953247, |
|
"learning_rate": 4.43345815593038e-05, |
|
"loss": 0.7261, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.1997755331088666, |
|
"grad_norm": 1.305114507675171, |
|
"learning_rate": 4.425983293437069e-05, |
|
"loss": 0.8001, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.2147399925177704, |
|
"grad_norm": 1.1478573083877563, |
|
"learning_rate": 4.4184658320289675e-05, |
|
"loss": 0.8036, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.229704451926674, |
|
"grad_norm": 0.9925107359886169, |
|
"learning_rate": 4.410905937977352e-05, |
|
"loss": 0.7775, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.244668911335578, |
|
"grad_norm": 1.0552047491073608, |
|
"learning_rate": 4.403303778492022e-05, |
|
"loss": 0.7449, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.259633370744482, |
|
"grad_norm": 1.0449482202529907, |
|
"learning_rate": 4.395659521717607e-05, |
|
"loss": 0.7197, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.2745978301533856, |
|
"grad_norm": 1.0985392332077026, |
|
"learning_rate": 4.3879733367298405e-05, |
|
"loss": 0.7691, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.28956228956229, |
|
"grad_norm": 1.1123350858688354, |
|
"learning_rate": 4.3802453935318294e-05, |
|
"loss": 0.7501, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.3045267489711936, |
|
"grad_norm": 1.2243092060089111, |
|
"learning_rate": 4.372475863050286e-05, |
|
"loss": 0.7606, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.3194912083800974, |
|
"grad_norm": 0.9873678088188171, |
|
"learning_rate": 4.364664917131751e-05, |
|
"loss": 0.7605, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.334455667789001, |
|
"grad_norm": 1.1722878217697144, |
|
"learning_rate": 4.3568127285387925e-05, |
|
"loss": 0.7186, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.349420127197905, |
|
"grad_norm": 1.1724722385406494, |
|
"learning_rate": 4.348919470946185e-05, |
|
"loss": 0.7614, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.3643845866068087, |
|
"grad_norm": 1.0196776390075684, |
|
"learning_rate": 4.340985318937066e-05, |
|
"loss": 0.7537, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.3793490460157125, |
|
"grad_norm": 1.0410211086273193, |
|
"learning_rate": 4.333010447999077e-05, |
|
"loss": 0.7246, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.3943135054246163, |
|
"grad_norm": 1.0125558376312256, |
|
"learning_rate": 4.3249950345204806e-05, |
|
"loss": 0.7561, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4092779648335205, |
|
"grad_norm": 0.9731377363204956, |
|
"learning_rate": 4.31693925578626e-05, |
|
"loss": 0.7418, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"grad_norm": 1.1109338998794556, |
|
"learning_rate": 4.3088432899741985e-05, |
|
"loss": 0.6956, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.439206883651328, |
|
"grad_norm": 1.4336915016174316, |
|
"learning_rate": 4.3007073161509345e-05, |
|
"loss": 0.7715, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.454171343060232, |
|
"grad_norm": 1.0564075708389282, |
|
"learning_rate": 4.292531514268008e-05, |
|
"loss": 0.7782, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 1.1472092866897583, |
|
"learning_rate": 4.2843160651578726e-05, |
|
"loss": 0.7125, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.48410026187804, |
|
"grad_norm": 0.8819020390510559, |
|
"learning_rate": 4.276061150529903e-05, |
|
"loss": 0.7647, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.4990647212869437, |
|
"grad_norm": 1.1399636268615723, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.7327, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.5140291806958475, |
|
"grad_norm": 1.3246550559997559, |
|
"learning_rate": 4.259433655918404e-05, |
|
"loss": 0.7553, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.5289936401047513, |
|
"grad_norm": 1.1041203737258911, |
|
"learning_rate": 4.2510614437019416e-05, |
|
"loss": 0.7685, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.543958099513655, |
|
"grad_norm": 1.099228024482727, |
|
"learning_rate": 4.242650501493642e-05, |
|
"loss": 0.7207, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.558922558922559, |
|
"grad_norm": 1.140915036201477, |
|
"learning_rate": 4.2342010153267986e-05, |
|
"loss": 0.7253, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.5738870183314626, |
|
"grad_norm": 1.020257830619812, |
|
"learning_rate": 4.2257131720872164e-05, |
|
"loss": 0.8055, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.5888514777403664, |
|
"grad_norm": 1.1489384174346924, |
|
"learning_rate": 4.2171871595090826e-05, |
|
"loss": 0.7747, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.6038159371492706, |
|
"grad_norm": 1.0359724760055542, |
|
"learning_rate": 4.2086231661708185e-05, |
|
"loss": 0.7525, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.6187803965581744, |
|
"grad_norm": 1.1845808029174805, |
|
"learning_rate": 4.200021381490899e-05, |
|
"loss": 0.7259, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.633744855967078, |
|
"grad_norm": 1.0849671363830566, |
|
"learning_rate": 4.191381995723672e-05, |
|
"loss": 0.7267, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.648709315375982, |
|
"grad_norm": 1.1475163698196411, |
|
"learning_rate": 4.182705199955144e-05, |
|
"loss": 0.7862, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.6636737747848858, |
|
"grad_norm": 1.1838606595993042, |
|
"learning_rate": 4.173991186098757e-05, |
|
"loss": 0.8079, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.67863823419379, |
|
"grad_norm": 0.9914394021034241, |
|
"learning_rate": 4.165240146891145e-05, |
|
"loss": 0.7319, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.6936026936026938, |
|
"grad_norm": 1.149774193763733, |
|
"learning_rate": 4.1564522758878656e-05, |
|
"loss": 0.7478, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.7085671530115976, |
|
"grad_norm": 1.1517828702926636, |
|
"learning_rate": 4.147627767459124e-05, |
|
"loss": 0.8038, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.7235316124205013, |
|
"grad_norm": 1.1382722854614258, |
|
"learning_rate": 4.138766816785474e-05, |
|
"loss": 0.7596, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.738496071829405, |
|
"grad_norm": 0.9787604808807373, |
|
"learning_rate": 4.1298696198534955e-05, |
|
"loss": 0.6991, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.753460531238309, |
|
"grad_norm": 1.0162303447723389, |
|
"learning_rate": 4.1209363734514674e-05, |
|
"loss": 0.7014, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.7684249906472127, |
|
"grad_norm": 1.106070637702942, |
|
"learning_rate": 4.1119672751650074e-05, |
|
"loss": 0.8249, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.7833894500561165, |
|
"grad_norm": 1.2757290601730347, |
|
"learning_rate": 4.102962523372709e-05, |
|
"loss": 0.7091, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.7983539094650207, |
|
"grad_norm": 1.107681155204773, |
|
"learning_rate": 4.093922317241748e-05, |
|
"loss": 0.8038, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.8133183688739245, |
|
"grad_norm": 1.2649710178375244, |
|
"learning_rate": 4.0848468567234796e-05, |
|
"loss": 0.7707, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.8282828282828283, |
|
"grad_norm": 1.0683587789535522, |
|
"learning_rate": 4.075736342549018e-05, |
|
"loss": 0.7483, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.843247287691732, |
|
"grad_norm": 1.1959580183029175, |
|
"learning_rate": 4.066590976224791e-05, |
|
"loss": 0.7838, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.858211747100636, |
|
"grad_norm": 1.1794955730438232, |
|
"learning_rate": 4.0574109600280886e-05, |
|
"loss": 0.7758, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.87317620650954, |
|
"grad_norm": 1.0079686641693115, |
|
"learning_rate": 4.048196497002588e-05, |
|
"loss": 0.7591, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.888140665918444, |
|
"grad_norm": 1.265084981918335, |
|
"learning_rate": 4.038947790953859e-05, |
|
"loss": 0.7012, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.9031051253273477, |
|
"grad_norm": 1.0062329769134521, |
|
"learning_rate": 4.0296650464448616e-05, |
|
"loss": 0.8008, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.9180695847362514, |
|
"grad_norm": 1.1500215530395508, |
|
"learning_rate": 4.020348468791416e-05, |
|
"loss": 0.7492, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.9330340441451552, |
|
"grad_norm": 1.2765411138534546, |
|
"learning_rate": 4.0109982640576674e-05, |
|
"loss": 0.7736, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.947998503554059, |
|
"grad_norm": 1.0655264854431152, |
|
"learning_rate": 4.001614639051521e-05, |
|
"loss": 0.7198, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 1.269967794418335, |
|
"learning_rate": 3.9921978013200766e-05, |
|
"loss": 0.7513, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.9779274223718666, |
|
"grad_norm": 1.0883420705795288, |
|
"learning_rate": 3.98274795914503e-05, |
|
"loss": 0.804, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.992891881780771, |
|
"grad_norm": 1.0706652402877808, |
|
"learning_rate": 3.973265321538069e-05, |
|
"loss": 0.6987, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0078563411896746, |
|
"grad_norm": 1.1653481721878052, |
|
"learning_rate": 3.963750098236253e-05, |
|
"loss": 0.8132, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 3.0228208005985784, |
|
"grad_norm": 1.1991537809371948, |
|
"learning_rate": 3.954202499697373e-05, |
|
"loss": 0.7291, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.037785260007482, |
|
"grad_norm": 1.0241738557815552, |
|
"learning_rate": 3.944622737095294e-05, |
|
"loss": 0.7181, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 3.052749719416386, |
|
"grad_norm": 1.00551438331604, |
|
"learning_rate": 3.9350110223152844e-05, |
|
"loss": 0.732, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.0677141788252897, |
|
"grad_norm": 1.3171230554580688, |
|
"learning_rate": 3.925367567949335e-05, |
|
"loss": 0.8267, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.082678638234194, |
|
"grad_norm": 1.0425944328308105, |
|
"learning_rate": 3.9156925872914506e-05, |
|
"loss": 0.6677, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.0976430976430978, |
|
"grad_norm": 1.0785578489303589, |
|
"learning_rate": 3.905986294332935e-05, |
|
"loss": 0.7701, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.1126075570520015, |
|
"grad_norm": 1.089606523513794, |
|
"learning_rate": 3.8962489037576586e-05, |
|
"loss": 0.6776, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.1275720164609053, |
|
"grad_norm": 1.234337329864502, |
|
"learning_rate": 3.8864806309373076e-05, |
|
"loss": 0.7917, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 3.142536475869809, |
|
"grad_norm": 1.1076655387878418, |
|
"learning_rate": 3.876681691926624e-05, |
|
"loss": 0.7032, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.157500935278713, |
|
"grad_norm": 1.0013830661773682, |
|
"learning_rate": 3.866852303458623e-05, |
|
"loss": 0.7442, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 3.1724653946876167, |
|
"grad_norm": 1.2173100709915161, |
|
"learning_rate": 3.856992682939803e-05, |
|
"loss": 0.6668, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.187429854096521, |
|
"grad_norm": 1.1837962865829468, |
|
"learning_rate": 3.847103048445333e-05, |
|
"loss": 0.7408, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 3.2023943135054247, |
|
"grad_norm": 1.1347203254699707, |
|
"learning_rate": 3.837183618714233e-05, |
|
"loss": 0.7615, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.2173587729143285, |
|
"grad_norm": 1.1322319507598877, |
|
"learning_rate": 3.827234613144533e-05, |
|
"loss": 0.6853, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.2323232323232323, |
|
"grad_norm": 1.24197256565094, |
|
"learning_rate": 3.817256251788425e-05, |
|
"loss": 0.6563, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.247287691732136, |
|
"grad_norm": 1.1162407398223877, |
|
"learning_rate": 3.807248755347387e-05, |
|
"loss": 0.7744, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 3.2622521511410403, |
|
"grad_norm": 1.0863921642303467, |
|
"learning_rate": 3.79721234516731e-05, |
|
"loss": 0.7257, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.277216610549944, |
|
"grad_norm": 1.1099414825439453, |
|
"learning_rate": 3.787147243233602e-05, |
|
"loss": 0.7711, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 3.292181069958848, |
|
"grad_norm": 1.3686941862106323, |
|
"learning_rate": 3.77705367216627e-05, |
|
"loss": 0.7528, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.3071455293677516, |
|
"grad_norm": 1.1821480989456177, |
|
"learning_rate": 3.766931855215006e-05, |
|
"loss": 0.7642, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 3.3221099887766554, |
|
"grad_norm": 1.0392365455627441, |
|
"learning_rate": 3.756782016254242e-05, |
|
"loss": 0.7566, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.337074448185559, |
|
"grad_norm": 1.1076756715774536, |
|
"learning_rate": 3.746604379778203e-05, |
|
"loss": 0.6818, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 3.352038907594463, |
|
"grad_norm": 1.2123860120773315, |
|
"learning_rate": 3.7363991708959386e-05, |
|
"loss": 0.7248, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.3670033670033668, |
|
"grad_norm": 1.1529157161712646, |
|
"learning_rate": 3.726166615326344e-05, |
|
"loss": 0.7569, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.381967826412271, |
|
"grad_norm": 1.0874592065811157, |
|
"learning_rate": 3.715906939393172e-05, |
|
"loss": 0.7775, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.396932285821175, |
|
"grad_norm": 1.1039067506790161, |
|
"learning_rate": 3.70562037002002e-05, |
|
"loss": 0.7637, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 3.4118967452300786, |
|
"grad_norm": 1.1319911479949951, |
|
"learning_rate": 3.695307134725317e-05, |
|
"loss": 0.7701, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.4268612046389824, |
|
"grad_norm": 1.3674046993255615, |
|
"learning_rate": 3.684967461617289e-05, |
|
"loss": 0.7202, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 3.441825664047886, |
|
"grad_norm": 1.214239478111267, |
|
"learning_rate": 3.674601579388913e-05, |
|
"loss": 0.736, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.45679012345679, |
|
"grad_norm": 1.1035867929458618, |
|
"learning_rate": 3.66420971731286e-05, |
|
"loss": 0.7361, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 3.471754582865694, |
|
"grad_norm": 1.1282587051391602, |
|
"learning_rate": 3.653792105236422e-05, |
|
"loss": 0.7012, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.486719042274598, |
|
"grad_norm": 1.4782813787460327, |
|
"learning_rate": 3.6433489735764334e-05, |
|
"loss": 0.6902, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 3.5016835016835017, |
|
"grad_norm": 1.2365137338638306, |
|
"learning_rate": 3.6328805533141684e-05, |
|
"loss": 0.7524, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.5166479610924055, |
|
"grad_norm": 1.1180927753448486, |
|
"learning_rate": 3.622387075990233e-05, |
|
"loss": 0.727, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.5316124205013093, |
|
"grad_norm": 1.1830908060073853, |
|
"learning_rate": 3.611868773699449e-05, |
|
"loss": 0.7811, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.546576879910213, |
|
"grad_norm": 1.2073471546173096, |
|
"learning_rate": 3.6013258790857154e-05, |
|
"loss": 0.7164, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 3.561541339319117, |
|
"grad_norm": 1.1175339221954346, |
|
"learning_rate": 3.590758625336864e-05, |
|
"loss": 0.7238, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.576505798728021, |
|
"grad_norm": 1.2776098251342773, |
|
"learning_rate": 3.5801672461795034e-05, |
|
"loss": 0.7886, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 3.591470258136925, |
|
"grad_norm": 1.021897792816162, |
|
"learning_rate": 3.569551975873847e-05, |
|
"loss": 0.7491, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6064347175458287, |
|
"grad_norm": 1.1327399015426636, |
|
"learning_rate": 3.558913049208534e-05, |
|
"loss": 0.7499, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 3.6213991769547325, |
|
"grad_norm": 1.105021595954895, |
|
"learning_rate": 3.548250701495432e-05, |
|
"loss": 0.6803, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 1.1674951314926147, |
|
"learning_rate": 3.537565168564442e-05, |
|
"loss": 0.7302, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 3.6513280957725405, |
|
"grad_norm": 1.0947383642196655, |
|
"learning_rate": 3.526856686758269e-05, |
|
"loss": 0.7106, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.6662925551814443, |
|
"grad_norm": 1.1993179321289062, |
|
"learning_rate": 3.5161254929272046e-05, |
|
"loss": 0.793, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 3.681257014590348, |
|
"grad_norm": 1.1340358257293701, |
|
"learning_rate": 3.505371824423885e-05, |
|
"loss": 0.8239, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.696221473999252, |
|
"grad_norm": 1.7940102815628052, |
|
"learning_rate": 3.494595919098041e-05, |
|
"loss": 0.6556, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 3.7111859334081556, |
|
"grad_norm": 1.231870174407959, |
|
"learning_rate": 3.483798015291239e-05, |
|
"loss": 0.7934, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.7261503928170594, |
|
"grad_norm": 1.1797089576721191, |
|
"learning_rate": 3.4729783518316056e-05, |
|
"loss": 0.773, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 3.741114852225963, |
|
"grad_norm": 1.148738980293274, |
|
"learning_rate": 3.462137168028549e-05, |
|
"loss": 0.7345, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.756079311634867, |
|
"grad_norm": 1.1555569171905518, |
|
"learning_rate": 3.4512747036674644e-05, |
|
"loss": 0.7036, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 3.771043771043771, |
|
"grad_norm": 1.172443151473999, |
|
"learning_rate": 3.440391199004431e-05, |
|
"loss": 0.8012, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.786008230452675, |
|
"grad_norm": 1.4849857091903687, |
|
"learning_rate": 3.4294868947608964e-05, |
|
"loss": 0.7567, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 3.8009726898615788, |
|
"grad_norm": 1.1539514064788818, |
|
"learning_rate": 3.4185620321183545e-05, |
|
"loss": 0.7258, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.8159371492704826, |
|
"grad_norm": 1.3457891941070557, |
|
"learning_rate": 3.4076168527130094e-05, |
|
"loss": 0.7048, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.8309016086793863, |
|
"grad_norm": 1.1766413450241089, |
|
"learning_rate": 3.396651598630432e-05, |
|
"loss": 0.7275, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.8458660680882906, |
|
"grad_norm": 1.2674963474273682, |
|
"learning_rate": 3.3856665124002054e-05, |
|
"loss": 0.6935, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 3.8608305274971944, |
|
"grad_norm": 1.256225347518921, |
|
"learning_rate": 3.37466183699056e-05, |
|
"loss": 0.7128, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.875794986906098, |
|
"grad_norm": 1.098443627357483, |
|
"learning_rate": 3.363637815802998e-05, |
|
"loss": 0.6997, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 3.890759446315002, |
|
"grad_norm": 1.1758556365966797, |
|
"learning_rate": 3.352594692666915e-05, |
|
"loss": 0.6989, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.9057239057239057, |
|
"grad_norm": 1.265360951423645, |
|
"learning_rate": 3.3415327118342015e-05, |
|
"loss": 0.7412, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 3.9206883651328095, |
|
"grad_norm": 1.212694525718689, |
|
"learning_rate": 3.3304521179738437e-05, |
|
"loss": 0.7208, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.9356528245417133, |
|
"grad_norm": 1.2661161422729492, |
|
"learning_rate": 3.319353156166509e-05, |
|
"loss": 0.7097, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 3.950617283950617, |
|
"grad_norm": 1.0489590167999268, |
|
"learning_rate": 3.3082360718991304e-05, |
|
"loss": 0.7063, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.9655817433595213, |
|
"grad_norm": 1.304627537727356, |
|
"learning_rate": 3.297101111059471e-05, |
|
"loss": 0.7256, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.980546202768425, |
|
"grad_norm": 1.1489557027816772, |
|
"learning_rate": 3.2859485199306885e-05, |
|
"loss": 0.7, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.995510662177329, |
|
"grad_norm": 1.1346006393432617, |
|
"learning_rate": 3.274778545185888e-05, |
|
"loss": 0.7179, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 4.010475121586233, |
|
"grad_norm": 1.1032986640930176, |
|
"learning_rate": 3.263591433882666e-05, |
|
"loss": 0.7768, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.025439580995137, |
|
"grad_norm": 1.5759029388427734, |
|
"learning_rate": 3.252387433457645e-05, |
|
"loss": 0.6737, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 4.040404040404041, |
|
"grad_norm": 1.208949327468872, |
|
"learning_rate": 3.241166791721001e-05, |
|
"loss": 0.648, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.0553684998129444, |
|
"grad_norm": 1.332651972770691, |
|
"learning_rate": 3.2299297568509835e-05, |
|
"loss": 0.7591, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 4.070332959221848, |
|
"grad_norm": 1.2246062755584717, |
|
"learning_rate": 3.2186765773884245e-05, |
|
"loss": 0.6756, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.085297418630752, |
|
"grad_norm": 1.288155198097229, |
|
"learning_rate": 3.2074075022312417e-05, |
|
"loss": 0.7229, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 4.100261878039656, |
|
"grad_norm": 1.2574666738510132, |
|
"learning_rate": 3.196122780628936e-05, |
|
"loss": 0.7267, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.11522633744856, |
|
"grad_norm": 1.1441593170166016, |
|
"learning_rate": 3.1848226621770744e-05, |
|
"loss": 0.7363, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 4.130190796857463, |
|
"grad_norm": 1.3331890106201172, |
|
"learning_rate": 3.173507396811774e-05, |
|
"loss": 0.7083, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.145155256266367, |
|
"grad_norm": 1.2898328304290771, |
|
"learning_rate": 3.162177234804168e-05, |
|
"loss": 0.6997, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 4.160119715675271, |
|
"grad_norm": 1.0942869186401367, |
|
"learning_rate": 3.150832426754877e-05, |
|
"loss": 0.7047, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.175084175084175, |
|
"grad_norm": 1.1719509363174438, |
|
"learning_rate": 3.1394732235884615e-05, |
|
"loss": 0.6965, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 4.1900486344930785, |
|
"grad_norm": 1.2531814575195312, |
|
"learning_rate": 3.1280998765478727e-05, |
|
"loss": 0.7139, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.205013093901983, |
|
"grad_norm": 1.9164917469024658, |
|
"learning_rate": 3.116712637188897e-05, |
|
"loss": 0.7125, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 4.219977553310887, |
|
"grad_norm": 1.1178081035614014, |
|
"learning_rate": 3.10531175737459e-05, |
|
"loss": 0.7247, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.234942012719791, |
|
"grad_norm": 1.3899163007736206, |
|
"learning_rate": 3.0938974892697095e-05, |
|
"loss": 0.6983, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 4.2499064721286945, |
|
"grad_norm": 1.175616979598999, |
|
"learning_rate": 3.082470085335133e-05, |
|
"loss": 0.7491, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.264870931537598, |
|
"grad_norm": 1.1819936037063599, |
|
"learning_rate": 3.071029798322279e-05, |
|
"loss": 0.6763, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 4.279835390946502, |
|
"grad_norm": 1.109136939048767, |
|
"learning_rate": 3.0595768812675104e-05, |
|
"loss": 0.7401, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.294799850355406, |
|
"grad_norm": 1.1672794818878174, |
|
"learning_rate": 3.048111587486545e-05, |
|
"loss": 0.6849, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 4.30976430976431, |
|
"grad_norm": 1.3153440952301025, |
|
"learning_rate": 3.0366341705688468e-05, |
|
"loss": 0.7617, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.3247287691732135, |
|
"grad_norm": 1.1830875873565674, |
|
"learning_rate": 3.025144884372021e-05, |
|
"loss": 0.7097, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 4.339693228582117, |
|
"grad_norm": 1.1907213926315308, |
|
"learning_rate": 3.0136439830161967e-05, |
|
"loss": 0.6899, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.354657687991021, |
|
"grad_norm": 1.1969035863876343, |
|
"learning_rate": 3.0021317208784074e-05, |
|
"loss": 0.7034, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 4.369622147399925, |
|
"grad_norm": 1.181558609008789, |
|
"learning_rate": 2.990608352586965e-05, |
|
"loss": 0.7223, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.3845866068088295, |
|
"grad_norm": 1.2934932708740234, |
|
"learning_rate": 2.979074133015827e-05, |
|
"loss": 0.7026, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 4.399551066217733, |
|
"grad_norm": 1.2202645540237427, |
|
"learning_rate": 2.9675293172789583e-05, |
|
"loss": 0.734, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.414515525626637, |
|
"grad_norm": 1.2204395532608032, |
|
"learning_rate": 2.9559741607246922e-05, |
|
"loss": 0.7691, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 4.429479985035541, |
|
"grad_norm": 1.2286920547485352, |
|
"learning_rate": 2.9444089189300783e-05, |
|
"loss": 0.7691, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 1.1387462615966797, |
|
"learning_rate": 2.932833847695234e-05, |
|
"loss": 0.7064, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 4.459408903853348, |
|
"grad_norm": 1.3988234996795654, |
|
"learning_rate": 2.9212492030376814e-05, |
|
"loss": 0.6983, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.474373363262252, |
|
"grad_norm": 1.144126296043396, |
|
"learning_rate": 2.90965524118669e-05, |
|
"loss": 0.7616, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 4.489337822671156, |
|
"grad_norm": 1.073025107383728, |
|
"learning_rate": 2.8980522185776065e-05, |
|
"loss": 0.7386, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.50430228208006, |
|
"grad_norm": 1.3249400854110718, |
|
"learning_rate": 2.8864403918461812e-05, |
|
"loss": 0.6959, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 4.519266741488964, |
|
"grad_norm": 1.3409395217895508, |
|
"learning_rate": 2.874820017822899e-05, |
|
"loss": 0.696, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.534231200897867, |
|
"grad_norm": 1.2458475828170776, |
|
"learning_rate": 2.8631913535272888e-05, |
|
"loss": 0.7367, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 4.549195660306771, |
|
"grad_norm": 1.3703510761260986, |
|
"learning_rate": 2.8515546561622462e-05, |
|
"loss": 0.7221, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.564160119715675, |
|
"grad_norm": 1.0971307754516602, |
|
"learning_rate": 2.839910183108342e-05, |
|
"loss": 0.7485, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 4.57912457912458, |
|
"grad_norm": 1.198792815208435, |
|
"learning_rate": 2.828258191918131e-05, |
|
"loss": 0.8012, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.5940890385334825, |
|
"grad_norm": 1.2157917022705078, |
|
"learning_rate": 2.816598940310452e-05, |
|
"loss": 0.6885, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 4.609053497942387, |
|
"grad_norm": 1.2653173208236694, |
|
"learning_rate": 2.8049326861647302e-05, |
|
"loss": 0.7332, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.624017957351291, |
|
"grad_norm": 1.1794272661209106, |
|
"learning_rate": 2.7932596875152744e-05, |
|
"loss": 0.7952, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 4.638982416760195, |
|
"grad_norm": 1.2640421390533447, |
|
"learning_rate": 2.781580202545568e-05, |
|
"loss": 0.7742, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.6539468761690985, |
|
"grad_norm": 2.050365686416626, |
|
"learning_rate": 2.7698944895825572e-05, |
|
"loss": 0.7715, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 4.668911335578002, |
|
"grad_norm": 1.2108561992645264, |
|
"learning_rate": 2.7582028070909415e-05, |
|
"loss": 0.7624, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.683875794986906, |
|
"grad_norm": 1.2775709629058838, |
|
"learning_rate": 2.746505413667452e-05, |
|
"loss": 0.6833, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 4.69884025439581, |
|
"grad_norm": 1.178462266921997, |
|
"learning_rate": 2.7348025680351363e-05, |
|
"loss": 0.6924, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.713804713804714, |
|
"grad_norm": 1.1037096977233887, |
|
"learning_rate": 2.7230945290376325e-05, |
|
"loss": 0.6909, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 4.7287691732136174, |
|
"grad_norm": 1.241242527961731, |
|
"learning_rate": 2.7113815556334478e-05, |
|
"loss": 0.7844, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.743733632622521, |
|
"grad_norm": 1.220365047454834, |
|
"learning_rate": 2.6996639068902253e-05, |
|
"loss": 0.7149, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 4.758698092031425, |
|
"grad_norm": 1.3249695301055908, |
|
"learning_rate": 2.6879418419790204e-05, |
|
"loss": 0.6882, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.77366255144033, |
|
"grad_norm": 1.3471956253051758, |
|
"learning_rate": 2.6762156201685628e-05, |
|
"loss": 0.7442, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 4.788627010849233, |
|
"grad_norm": 1.2055602073669434, |
|
"learning_rate": 2.6644855008195267e-05, |
|
"loss": 0.7078, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.803591470258137, |
|
"grad_norm": 1.2006497383117676, |
|
"learning_rate": 2.6527517433787913e-05, |
|
"loss": 0.6789, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 4.818555929667041, |
|
"grad_norm": 1.1846423149108887, |
|
"learning_rate": 2.641014607373702e-05, |
|
"loss": 0.6703, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.833520389075945, |
|
"grad_norm": 1.2655390501022339, |
|
"learning_rate": 2.6292743524063334e-05, |
|
"loss": 0.6671, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 4.848484848484849, |
|
"grad_norm": 1.250118374824524, |
|
"learning_rate": 2.6175312381477442e-05, |
|
"loss": 0.6936, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 4.863449307893752, |
|
"grad_norm": 1.2387151718139648, |
|
"learning_rate": 2.6057855243322344e-05, |
|
"loss": 0.6755, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 4.878413767302656, |
|
"grad_norm": 1.3161075115203857, |
|
"learning_rate": 2.5940374707516015e-05, |
|
"loss": 0.6515, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 4.89337822671156, |
|
"grad_norm": 1.219498872756958, |
|
"learning_rate": 2.582287337249394e-05, |
|
"loss": 0.7108, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 4.908342686120464, |
|
"grad_norm": 1.4078658819198608, |
|
"learning_rate": 2.570535383715165e-05, |
|
"loss": 0.7038, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 4.9233071455293675, |
|
"grad_norm": 1.140682578086853, |
|
"learning_rate": 2.558781870078722e-05, |
|
"loss": 0.6804, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"grad_norm": 1.4205572605133057, |
|
"learning_rate": 2.547027056304379e-05, |
|
"loss": 0.7491, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.953236064347175, |
|
"grad_norm": 1.2967289686203003, |
|
"learning_rate": 2.5352712023852066e-05, |
|
"loss": 0.7297, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 4.96820052375608, |
|
"grad_norm": 1.2759593725204468, |
|
"learning_rate": 2.5235145683372814e-05, |
|
"loss": 0.6731, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 4.983164983164983, |
|
"grad_norm": 1.1895300149917603, |
|
"learning_rate": 2.5117574141939337e-05, |
|
"loss": 0.7156, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 4.998129442573887, |
|
"grad_norm": 1.1513454914093018, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7455, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.013093901982791, |
|
"grad_norm": 1.231416940689087, |
|
"learning_rate": 2.4882425858060668e-05, |
|
"loss": 0.7206, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 5.028058361391695, |
|
"grad_norm": 1.270216941833496, |
|
"learning_rate": 2.47648543166272e-05, |
|
"loss": 0.6685, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.043022820800599, |
|
"grad_norm": 1.4066438674926758, |
|
"learning_rate": 2.4647287976147946e-05, |
|
"loss": 0.6722, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 5.0579872802095025, |
|
"grad_norm": 1.3440229892730713, |
|
"learning_rate": 2.452972943695621e-05, |
|
"loss": 0.7271, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.072951739618406, |
|
"grad_norm": 1.1897931098937988, |
|
"learning_rate": 2.441218129921278e-05, |
|
"loss": 0.6775, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 5.08791619902731, |
|
"grad_norm": 1.2431669235229492, |
|
"learning_rate": 2.4294646162848354e-05, |
|
"loss": 0.7324, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.102880658436214, |
|
"grad_norm": 1.4123824834823608, |
|
"learning_rate": 2.4177126627506067e-05, |
|
"loss": 0.7041, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 5.117845117845118, |
|
"grad_norm": 1.3087615966796875, |
|
"learning_rate": 2.405962529248399e-05, |
|
"loss": 0.6902, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.132809577254021, |
|
"grad_norm": 1.1675366163253784, |
|
"learning_rate": 2.394214475667767e-05, |
|
"loss": 0.7462, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 5.147774036662925, |
|
"grad_norm": 1.1870967149734497, |
|
"learning_rate": 2.3824687618522567e-05, |
|
"loss": 0.7482, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.162738496071829, |
|
"grad_norm": 1.1886534690856934, |
|
"learning_rate": 2.370725647593666e-05, |
|
"loss": 0.7026, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 5.177702955480734, |
|
"grad_norm": 1.3220059871673584, |
|
"learning_rate": 2.3589853926262977e-05, |
|
"loss": 0.681, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 5.1926674148896375, |
|
"grad_norm": 1.2325706481933594, |
|
"learning_rate": 2.3472482566212093e-05, |
|
"loss": 0.7101, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 5.207631874298541, |
|
"grad_norm": 1.1882089376449585, |
|
"learning_rate": 2.3355144991804735e-05, |
|
"loss": 0.6857, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.222596333707445, |
|
"grad_norm": 1.3109657764434814, |
|
"learning_rate": 2.323784379831438e-05, |
|
"loss": 0.7127, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 5.237560793116349, |
|
"grad_norm": 1.1432446241378784, |
|
"learning_rate": 2.3120581580209808e-05, |
|
"loss": 0.6823, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.252525252525253, |
|
"grad_norm": 1.3307565450668335, |
|
"learning_rate": 2.3003360931097757e-05, |
|
"loss": 0.7118, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 5.267489711934156, |
|
"grad_norm": 1.6253339052200317, |
|
"learning_rate": 2.2886184443665525e-05, |
|
"loss": 0.7521, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.28245417134306, |
|
"grad_norm": 1.3010215759277344, |
|
"learning_rate": 2.2769054709623674e-05, |
|
"loss": 0.7331, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 5.297418630751964, |
|
"grad_norm": 1.2219674587249756, |
|
"learning_rate": 2.2651974319648643e-05, |
|
"loss": 0.7031, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.312383090160868, |
|
"grad_norm": 1.2299708127975464, |
|
"learning_rate": 2.2534945863325487e-05, |
|
"loss": 0.6622, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 5.3273475495697715, |
|
"grad_norm": 1.1474329233169556, |
|
"learning_rate": 2.241797192909059e-05, |
|
"loss": 0.6662, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.342312008978675, |
|
"grad_norm": 1.1639771461486816, |
|
"learning_rate": 2.2301055104174433e-05, |
|
"loss": 0.6913, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 5.357276468387579, |
|
"grad_norm": 1.2043278217315674, |
|
"learning_rate": 2.218419797454433e-05, |
|
"loss": 0.6777, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 5.372240927796484, |
|
"grad_norm": 1.2802300453186035, |
|
"learning_rate": 2.206740312484726e-05, |
|
"loss": 0.6608, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 5.3872053872053876, |
|
"grad_norm": 1.2886018753051758, |
|
"learning_rate": 2.19506731383527e-05, |
|
"loss": 0.6696, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.402169846614291, |
|
"grad_norm": 1.6271384954452515, |
|
"learning_rate": 2.1834010596895487e-05, |
|
"loss": 0.7117, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 5.417134306023195, |
|
"grad_norm": 1.3303827047348022, |
|
"learning_rate": 2.1717418080818696e-05, |
|
"loss": 0.6851, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.432098765432099, |
|
"grad_norm": 1.3058645725250244, |
|
"learning_rate": 2.1600898168916584e-05, |
|
"loss": 0.7386, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 5.447063224841003, |
|
"grad_norm": 1.3986623287200928, |
|
"learning_rate": 2.148445343837755e-05, |
|
"loss": 0.6995, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.4620276842499065, |
|
"grad_norm": 1.2918411493301392, |
|
"learning_rate": 2.1368086464727125e-05, |
|
"loss": 0.6936, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 5.47699214365881, |
|
"grad_norm": 1.1513465642929077, |
|
"learning_rate": 2.1251799821771012e-05, |
|
"loss": 0.7228, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.491956603067714, |
|
"grad_norm": 1.233217716217041, |
|
"learning_rate": 2.1135596081538184e-05, |
|
"loss": 0.77, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 5.506921062476618, |
|
"grad_norm": 1.2311054468154907, |
|
"learning_rate": 2.1019477814223944e-05, |
|
"loss": 0.6844, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.521885521885522, |
|
"grad_norm": 1.3642069101333618, |
|
"learning_rate": 2.09034475881331e-05, |
|
"loss": 0.7025, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 5.536849981294425, |
|
"grad_norm": 1.311928391456604, |
|
"learning_rate": 2.0787507969623192e-05, |
|
"loss": 0.6874, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.55181444070333, |
|
"grad_norm": 1.2631707191467285, |
|
"learning_rate": 2.0671661523047663e-05, |
|
"loss": 0.7446, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 5.566778900112233, |
|
"grad_norm": 1.1697314977645874, |
|
"learning_rate": 2.0555910810699223e-05, |
|
"loss": 0.7386, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.581743359521138, |
|
"grad_norm": 1.2585618495941162, |
|
"learning_rate": 2.0440258392753084e-05, |
|
"loss": 0.7292, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 5.596707818930041, |
|
"grad_norm": 1.357924461364746, |
|
"learning_rate": 2.032470682721042e-05, |
|
"loss": 0.7167, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.611672278338945, |
|
"grad_norm": 1.0884181261062622, |
|
"learning_rate": 2.0209258669841737e-05, |
|
"loss": 0.7249, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 5.626636737747849, |
|
"grad_norm": 1.3644371032714844, |
|
"learning_rate": 2.0093916474130353e-05, |
|
"loss": 0.7436, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.641601197156753, |
|
"grad_norm": 1.1837425231933594, |
|
"learning_rate": 1.997868279121593e-05, |
|
"loss": 0.6922, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 5.656565656565657, |
|
"grad_norm": 1.3669867515563965, |
|
"learning_rate": 1.9863560169838042e-05, |
|
"loss": 0.7689, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.67153011597456, |
|
"grad_norm": 1.3488072156906128, |
|
"learning_rate": 1.97485511562798e-05, |
|
"loss": 0.7074, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 5.686494575383464, |
|
"grad_norm": 1.1839897632598877, |
|
"learning_rate": 1.9633658294311535e-05, |
|
"loss": 0.7115, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.701459034792368, |
|
"grad_norm": 1.3153859376907349, |
|
"learning_rate": 1.9518884125134556e-05, |
|
"loss": 0.723, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 5.716423494201272, |
|
"grad_norm": 1.2922106981277466, |
|
"learning_rate": 1.9404231187324902e-05, |
|
"loss": 0.6543, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 5.7313879536101755, |
|
"grad_norm": 1.3643290996551514, |
|
"learning_rate": 1.928970201677722e-05, |
|
"loss": 0.7399, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 5.74635241301908, |
|
"grad_norm": 1.188324213027954, |
|
"learning_rate": 1.9175299146648674e-05, |
|
"loss": 0.6795, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.761316872427983, |
|
"grad_norm": 1.4890059232711792, |
|
"learning_rate": 1.906102510730291e-05, |
|
"loss": 0.721, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 5.776281331836888, |
|
"grad_norm": 1.4943420886993408, |
|
"learning_rate": 1.8946882426254105e-05, |
|
"loss": 0.6991, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 5.7912457912457915, |
|
"grad_norm": 1.2924257516860962, |
|
"learning_rate": 1.8832873628111038e-05, |
|
"loss": 0.7136, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 5.806210250654695, |
|
"grad_norm": 1.3031319379806519, |
|
"learning_rate": 1.8719001234521283e-05, |
|
"loss": 0.6695, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 5.821174710063599, |
|
"grad_norm": 1.2206610441207886, |
|
"learning_rate": 1.860526776411539e-05, |
|
"loss": 0.6473, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 5.836139169472503, |
|
"grad_norm": 1.173349142074585, |
|
"learning_rate": 1.849167573245123e-05, |
|
"loss": 0.6412, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.851103628881407, |
|
"grad_norm": 1.5744128227233887, |
|
"learning_rate": 1.8378227651958326e-05, |
|
"loss": 0.6956, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 5.8660680882903105, |
|
"grad_norm": 1.170933723449707, |
|
"learning_rate": 1.8264926031882272e-05, |
|
"loss": 0.7798, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 5.881032547699214, |
|
"grad_norm": 1.5066628456115723, |
|
"learning_rate": 1.8151773378229265e-05, |
|
"loss": 0.7011, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 5.895997007108118, |
|
"grad_norm": 1.2198915481567383, |
|
"learning_rate": 1.8038772193710646e-05, |
|
"loss": 0.724, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 5.910961466517022, |
|
"grad_norm": 1.227023959159851, |
|
"learning_rate": 1.792592497768759e-05, |
|
"loss": 0.6702, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 1.3417410850524902, |
|
"learning_rate": 1.7813234226115764e-05, |
|
"loss": 0.747, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 5.94089038533483, |
|
"grad_norm": 1.3337069749832153, |
|
"learning_rate": 1.7700702431490174e-05, |
|
"loss": 0.669, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 5.955854844743733, |
|
"grad_norm": 1.2036738395690918, |
|
"learning_rate": 1.7588332082789993e-05, |
|
"loss": 0.7339, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 5.970819304152638, |
|
"grad_norm": 1.1622107028961182, |
|
"learning_rate": 1.747612566542356e-05, |
|
"loss": 0.6925, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 5.985783763561542, |
|
"grad_norm": 1.3639973402023315, |
|
"learning_rate": 1.7364085661173347e-05, |
|
"loss": 0.6798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.000748222970445, |
|
"grad_norm": 1.2021132707595825, |
|
"learning_rate": 1.725221454814112e-05, |
|
"loss": 0.7133, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 6.015712682379349, |
|
"grad_norm": 1.4045711755752563, |
|
"learning_rate": 1.7140514800693124e-05, |
|
"loss": 0.6953, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 6.030677141788253, |
|
"grad_norm": 1.2548061609268188, |
|
"learning_rate": 1.7028988889405296e-05, |
|
"loss": 0.6381, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 6.045641601197157, |
|
"grad_norm": 1.1166868209838867, |
|
"learning_rate": 1.69176392810087e-05, |
|
"loss": 0.7127, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 6.0606060606060606, |
|
"grad_norm": 1.2931350469589233, |
|
"learning_rate": 1.6806468438334917e-05, |
|
"loss": 0.7081, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 6.075570520014964, |
|
"grad_norm": 1.365538239479065, |
|
"learning_rate": 1.6695478820261573e-05, |
|
"loss": 0.6766, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 6.090534979423868, |
|
"grad_norm": 1.4035921096801758, |
|
"learning_rate": 1.658467288165799e-05, |
|
"loss": 0.6857, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 6.105499438832772, |
|
"grad_norm": 1.0855042934417725, |
|
"learning_rate": 1.647405307333085e-05, |
|
"loss": 0.7685, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.120463898241676, |
|
"grad_norm": 1.4982078075408936, |
|
"learning_rate": 1.6363621841970022e-05, |
|
"loss": 0.7044, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 6.1354283576505795, |
|
"grad_norm": 1.233553171157837, |
|
"learning_rate": 1.625338163009441e-05, |
|
"loss": 0.6415, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 6.150392817059484, |
|
"grad_norm": 2.476423978805542, |
|
"learning_rate": 1.6143334875997952e-05, |
|
"loss": 0.7047, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 6.165357276468388, |
|
"grad_norm": 1.2853014469146729, |
|
"learning_rate": 1.6033484013695687e-05, |
|
"loss": 0.7164, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 6.180321735877292, |
|
"grad_norm": 1.376776933670044, |
|
"learning_rate": 1.5923831472869915e-05, |
|
"loss": 0.6773, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 6.1952861952861955, |
|
"grad_norm": 1.2735328674316406, |
|
"learning_rate": 1.581437967881647e-05, |
|
"loss": 0.6457, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 6.210250654695099, |
|
"grad_norm": 1.3325200080871582, |
|
"learning_rate": 1.5705131052391042e-05, |
|
"loss": 0.7297, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 6.225215114104003, |
|
"grad_norm": 1.1959949731826782, |
|
"learning_rate": 1.5596088009955695e-05, |
|
"loss": 0.7535, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 6.240179573512907, |
|
"grad_norm": 1.307750940322876, |
|
"learning_rate": 1.5487252963325362e-05, |
|
"loss": 0.7605, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 6.255144032921811, |
|
"grad_norm": 1.3463622331619263, |
|
"learning_rate": 1.5378628319714512e-05, |
|
"loss": 0.7251, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 6.270108492330714, |
|
"grad_norm": 1.2366999387741089, |
|
"learning_rate": 1.5270216481683953e-05, |
|
"loss": 0.6835, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 6.285072951739618, |
|
"grad_norm": 1.2593817710876465, |
|
"learning_rate": 1.5162019847087617e-05, |
|
"loss": 0.6598, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.300037411148522, |
|
"grad_norm": 1.3024280071258545, |
|
"learning_rate": 1.5054040809019584e-05, |
|
"loss": 0.6683, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 6.315001870557426, |
|
"grad_norm": 1.4586106538772583, |
|
"learning_rate": 1.4946281755761152e-05, |
|
"loss": 0.6762, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 6.32996632996633, |
|
"grad_norm": 1.338810920715332, |
|
"learning_rate": 1.4838745070727958e-05, |
|
"loss": 0.6821, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 6.344930789375233, |
|
"grad_norm": 1.425808310508728, |
|
"learning_rate": 1.4731433132417316e-05, |
|
"loss": 0.6303, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 6.359895248784138, |
|
"grad_norm": 1.1587165594100952, |
|
"learning_rate": 1.4624348314355585e-05, |
|
"loss": 0.6306, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 6.374859708193042, |
|
"grad_norm": 1.3677455186843872, |
|
"learning_rate": 1.4517492985045678e-05, |
|
"loss": 0.7352, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 6.389824167601946, |
|
"grad_norm": 1.4579230546951294, |
|
"learning_rate": 1.4410869507914669e-05, |
|
"loss": 0.6911, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 6.404788627010849, |
|
"grad_norm": 1.3865454196929932, |
|
"learning_rate": 1.4304480241261528e-05, |
|
"loss": 0.6651, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 6.419753086419753, |
|
"grad_norm": 1.1365728378295898, |
|
"learning_rate": 1.4198327538204961e-05, |
|
"loss": 0.6779, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 6.434717545828657, |
|
"grad_norm": 1.271693229675293, |
|
"learning_rate": 1.409241374663136e-05, |
|
"loss": 0.7289, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 6.449682005237561, |
|
"grad_norm": 1.314024567604065, |
|
"learning_rate": 1.3986741209142845e-05, |
|
"loss": 0.6656, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 6.4646464646464645, |
|
"grad_norm": 1.2013462781906128, |
|
"learning_rate": 1.3881312263005519e-05, |
|
"loss": 0.6836, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 6.479610924055368, |
|
"grad_norm": 1.332503080368042, |
|
"learning_rate": 1.3776129240097673e-05, |
|
"loss": 0.7178, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 6.494575383464272, |
|
"grad_norm": 1.4150094985961914, |
|
"learning_rate": 1.3671194466858334e-05, |
|
"loss": 0.6895, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 6.509539842873176, |
|
"grad_norm": 1.3232195377349854, |
|
"learning_rate": 1.356651026423566e-05, |
|
"loss": 0.7292, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 6.524504302282081, |
|
"grad_norm": 1.324210286140442, |
|
"learning_rate": 1.3462078947635781e-05, |
|
"loss": 0.756, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 6.5394687616909835, |
|
"grad_norm": 1.2665998935699463, |
|
"learning_rate": 1.335790282687141e-05, |
|
"loss": 0.6959, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 6.554433221099888, |
|
"grad_norm": 1.1720548868179321, |
|
"learning_rate": 1.325398420611088e-05, |
|
"loss": 0.7918, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 6.569397680508792, |
|
"grad_norm": 1.0761444568634033, |
|
"learning_rate": 1.3150325383827117e-05, |
|
"loss": 0.679, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 6.584362139917696, |
|
"grad_norm": 1.4445922374725342, |
|
"learning_rate": 1.3046928652746832e-05, |
|
"loss": 0.802, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.5993265993265995, |
|
"grad_norm": 1.2890619039535522, |
|
"learning_rate": 1.2943796299799809e-05, |
|
"loss": 0.747, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 6.614291058735503, |
|
"grad_norm": 1.3807190656661987, |
|
"learning_rate": 1.2840930606068289e-05, |
|
"loss": 0.6693, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 6.629255518144407, |
|
"grad_norm": 1.4410628080368042, |
|
"learning_rate": 1.273833384673656e-05, |
|
"loss": 0.7011, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 6.644219977553311, |
|
"grad_norm": 1.255650520324707, |
|
"learning_rate": 1.2636008291040618e-05, |
|
"loss": 0.7627, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 6.659184436962215, |
|
"grad_norm": 1.2652361392974854, |
|
"learning_rate": 1.2533956202217975e-05, |
|
"loss": 0.6859, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 6.674148896371118, |
|
"grad_norm": 1.2963732481002808, |
|
"learning_rate": 1.243217983745758e-05, |
|
"loss": 0.7204, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 6.689113355780022, |
|
"grad_norm": 1.4088592529296875, |
|
"learning_rate": 1.2330681447849951e-05, |
|
"loss": 0.6392, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 6.704077815188926, |
|
"grad_norm": 1.3027905225753784, |
|
"learning_rate": 1.2229463278337308e-05, |
|
"loss": 0.7128, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 6.71904227459783, |
|
"grad_norm": 1.2761296033859253, |
|
"learning_rate": 1.2128527567663988e-05, |
|
"loss": 0.7145, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 6.7340067340067336, |
|
"grad_norm": 1.4830342531204224, |
|
"learning_rate": 1.2027876548326897e-05, |
|
"loss": 0.6784, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 6.748971193415638, |
|
"grad_norm": 1.2457510232925415, |
|
"learning_rate": 1.1927512446526142e-05, |
|
"loss": 0.6929, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 6.763935652824542, |
|
"grad_norm": 1.4039334058761597, |
|
"learning_rate": 1.1827437482115759e-05, |
|
"loss": 0.7516, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 6.778900112233446, |
|
"grad_norm": 1.3703151941299438, |
|
"learning_rate": 1.172765386855467e-05, |
|
"loss": 0.699, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 6.79386457164235, |
|
"grad_norm": 1.3183362483978271, |
|
"learning_rate": 1.1628163812857674e-05, |
|
"loss": 0.7607, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 6.808829031051253, |
|
"grad_norm": 1.2728744745254517, |
|
"learning_rate": 1.1528969515546672e-05, |
|
"loss": 0.6541, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 6.823793490460157, |
|
"grad_norm": 1.2783997058868408, |
|
"learning_rate": 1.1430073170601968e-05, |
|
"loss": 0.684, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 6.838757949869061, |
|
"grad_norm": 1.145731806755066, |
|
"learning_rate": 1.1331476965413773e-05, |
|
"loss": 0.7134, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 6.853722409277965, |
|
"grad_norm": 1.3381609916687012, |
|
"learning_rate": 1.1233183080733764e-05, |
|
"loss": 0.7275, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 6.8686868686868685, |
|
"grad_norm": 1.2908689975738525, |
|
"learning_rate": 1.1135193690626925e-05, |
|
"loss": 0.6796, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 6.883651328095772, |
|
"grad_norm": 1.5330723524093628, |
|
"learning_rate": 1.1037510962423425e-05, |
|
"loss": 0.674, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.898615787504676, |
|
"grad_norm": 1.3555113077163696, |
|
"learning_rate": 1.0940137056670655e-05, |
|
"loss": 0.6678, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 6.91358024691358, |
|
"grad_norm": 1.2070436477661133, |
|
"learning_rate": 1.0843074127085507e-05, |
|
"loss": 0.6954, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 6.928544706322484, |
|
"grad_norm": 1.4584565162658691, |
|
"learning_rate": 1.074632432050665e-05, |
|
"loss": 0.6517, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 6.943509165731388, |
|
"grad_norm": 1.2838579416275024, |
|
"learning_rate": 1.0649889776847161e-05, |
|
"loss": 0.6424, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 6.958473625140292, |
|
"grad_norm": 1.2093007564544678, |
|
"learning_rate": 1.0553772629047067e-05, |
|
"loss": 0.7396, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 6.973438084549196, |
|
"grad_norm": 1.5044478178024292, |
|
"learning_rate": 1.0457975003026276e-05, |
|
"loss": 0.6806, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 6.9884025439581, |
|
"grad_norm": 1.2098227739334106, |
|
"learning_rate": 1.0362499017637472e-05, |
|
"loss": 0.6835, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 7.0033670033670035, |
|
"grad_norm": 1.259406566619873, |
|
"learning_rate": 1.0267346784619324e-05, |
|
"loss": 0.6672, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.018331462775907, |
|
"grad_norm": 1.2552211284637451, |
|
"learning_rate": 1.0172520408549716e-05, |
|
"loss": 0.6341, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 7.033295922184811, |
|
"grad_norm": 1.3525948524475098, |
|
"learning_rate": 1.0078021986799238e-05, |
|
"loss": 0.6665, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 7.048260381593715, |
|
"grad_norm": 1.2309094667434692, |
|
"learning_rate": 9.983853609484786e-06, |
|
"loss": 0.6903, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 7.063224841002619, |
|
"grad_norm": 1.2575538158416748, |
|
"learning_rate": 9.890017359423325e-06, |
|
"loss": 0.7205, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 7.078189300411522, |
|
"grad_norm": 1.2174732685089111, |
|
"learning_rate": 9.796515312085841e-06, |
|
"loss": 0.6929, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 7.093153759820426, |
|
"grad_norm": 1.4941829442977905, |
|
"learning_rate": 9.703349535551387e-06, |
|
"loss": 0.6346, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 7.10811821922933, |
|
"grad_norm": 1.3313934803009033, |
|
"learning_rate": 9.610522090461415e-06, |
|
"loss": 0.6626, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 7.123082678638234, |
|
"grad_norm": 1.1870646476745605, |
|
"learning_rate": 9.518035029974126e-06, |
|
"loss": 0.6738, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 7.138047138047138, |
|
"grad_norm": 1.376810073852539, |
|
"learning_rate": 9.425890399719115e-06, |
|
"loss": 0.657, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 7.153011597456042, |
|
"grad_norm": 1.2887132167816162, |
|
"learning_rate": 9.334090237752094e-06, |
|
"loss": 0.712, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.167976056864946, |
|
"grad_norm": 1.4136420488357544, |
|
"learning_rate": 9.242636574509828e-06, |
|
"loss": 0.7623, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 7.18294051627385, |
|
"grad_norm": 1.2454450130462646, |
|
"learning_rate": 9.151531432765203e-06, |
|
"loss": 0.7891, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.197904975682754, |
|
"grad_norm": 1.3656915426254272, |
|
"learning_rate": 9.060776827582529e-06, |
|
"loss": 0.6479, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 7.212869435091657, |
|
"grad_norm": 1.3422670364379883, |
|
"learning_rate": 8.970374766272915e-06, |
|
"loss": 0.7534, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 7.227833894500561, |
|
"grad_norm": 1.4018194675445557, |
|
"learning_rate": 8.880327248349937e-06, |
|
"loss": 0.679, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 7.242798353909465, |
|
"grad_norm": 1.4204267263412476, |
|
"learning_rate": 8.790636265485334e-06, |
|
"loss": 0.6811, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 7.257762813318369, |
|
"grad_norm": 1.3640581369400024, |
|
"learning_rate": 8.701303801465052e-06, |
|
"loss": 0.6518, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 1.255414366722107, |
|
"learning_rate": 8.612331832145268e-06, |
|
"loss": 0.6485, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 7.287691732136176, |
|
"grad_norm": 1.3959693908691406, |
|
"learning_rate": 8.523722325408758e-06, |
|
"loss": 0.6528, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 7.30265619154508, |
|
"grad_norm": 1.3679065704345703, |
|
"learning_rate": 8.435477241121353e-06, |
|
"loss": 0.6834, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 7.317620650953984, |
|
"grad_norm": 1.1936756372451782, |
|
"learning_rate": 8.347598531088554e-06, |
|
"loss": 0.6883, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 7.3325851103628885, |
|
"grad_norm": 1.3999428749084473, |
|
"learning_rate": 8.260088139012435e-06, |
|
"loss": 0.6906, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 7.347549569771792, |
|
"grad_norm": 1.3568490743637085, |
|
"learning_rate": 8.17294800044856e-06, |
|
"loss": 0.7172, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 7.362514029180696, |
|
"grad_norm": 1.362327218055725, |
|
"learning_rate": 8.086180042763283e-06, |
|
"loss": 0.6523, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 7.3774784885896, |
|
"grad_norm": 1.2796952724456787, |
|
"learning_rate": 7.999786185091008e-06, |
|
"loss": 0.7196, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 7.392442947998504, |
|
"grad_norm": 1.339594841003418, |
|
"learning_rate": 7.913768338291821e-06, |
|
"loss": 0.6475, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 1.3105710744857788, |
|
"learning_rate": 7.828128404909171e-06, |
|
"loss": 0.6756, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 7.422371866816311, |
|
"grad_norm": 1.3429076671600342, |
|
"learning_rate": 7.742868279127848e-06, |
|
"loss": 0.6886, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 7.437336326225215, |
|
"grad_norm": 1.4829093217849731, |
|
"learning_rate": 7.657989846732019e-06, |
|
"loss": 0.6894, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 7.452300785634119, |
|
"grad_norm": 1.4806331396102905, |
|
"learning_rate": 7.573494985063579e-06, |
|
"loss": 0.6653, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 7.467265245043023, |
|
"grad_norm": 1.2165873050689697, |
|
"learning_rate": 7.489385562980589e-06, |
|
"loss": 0.7941, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 7.482229704451926, |
|
"grad_norm": 1.4139281511306763, |
|
"learning_rate": 7.4056634408159685e-06, |
|
"loss": 0.689, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.49719416386083, |
|
"grad_norm": 1.307259202003479, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 0.6626, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 7.512158623269734, |
|
"grad_norm": 1.5060079097747803, |
|
"learning_rate": 7.2393884947009745e-06, |
|
"loss": 0.7061, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 7.527123082678639, |
|
"grad_norm": 1.623346209526062, |
|
"learning_rate": 7.156839348421279e-06, |
|
"loss": 0.6958, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 7.542087542087542, |
|
"grad_norm": 1.3768142461776733, |
|
"learning_rate": 7.074684857319927e-06, |
|
"loss": 0.7661, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 7.557052001496446, |
|
"grad_norm": 1.7065874338150024, |
|
"learning_rate": 6.992926838490657e-06, |
|
"loss": 0.6989, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 7.57201646090535, |
|
"grad_norm": 1.4630271196365356, |
|
"learning_rate": 6.91156710025802e-06, |
|
"loss": 0.761, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 7.586980920314254, |
|
"grad_norm": 1.3342783451080322, |
|
"learning_rate": 6.830607442137405e-06, |
|
"loss": 0.6834, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 7.6019453797231575, |
|
"grad_norm": 1.3920519351959229, |
|
"learning_rate": 6.7500496547951984e-06, |
|
"loss": 0.6939, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 7.616909839132061, |
|
"grad_norm": 1.4310715198516846, |
|
"learning_rate": 6.6698955200092396e-06, |
|
"loss": 0.6789, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 7.631874298540965, |
|
"grad_norm": 1.2729769945144653, |
|
"learning_rate": 6.590146810629347e-06, |
|
"loss": 0.6925, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 7.646838757949869, |
|
"grad_norm": 1.2772436141967773, |
|
"learning_rate": 6.510805290538158e-06, |
|
"loss": 0.6714, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 7.661803217358773, |
|
"grad_norm": 1.3461037874221802, |
|
"learning_rate": 6.431872714612072e-06, |
|
"loss": 0.6973, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 7.6767676767676765, |
|
"grad_norm": 1.2915376424789429, |
|
"learning_rate": 6.353350828682494e-06, |
|
"loss": 0.6669, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 7.69173213617658, |
|
"grad_norm": 1.287246584892273, |
|
"learning_rate": 6.275241369497142e-06, |
|
"loss": 0.7157, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 7.706696595585484, |
|
"grad_norm": 1.4065686464309692, |
|
"learning_rate": 6.197546064681714e-06, |
|
"loss": 0.7474, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 7.721661054994389, |
|
"grad_norm": 1.5173590183258057, |
|
"learning_rate": 6.120266632701599e-06, |
|
"loss": 0.6442, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 7.7366255144032925, |
|
"grad_norm": 1.2145261764526367, |
|
"learning_rate": 6.043404782823939e-06, |
|
"loss": 0.6729, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 7.751589973812196, |
|
"grad_norm": 1.3860505819320679, |
|
"learning_rate": 5.966962215079786e-06, |
|
"loss": 0.7085, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 7.7665544332211, |
|
"grad_norm": 1.2852251529693604, |
|
"learning_rate": 5.890940620226479e-06, |
|
"loss": 0.6983, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 7.781518892630004, |
|
"grad_norm": 1.2326298952102661, |
|
"learning_rate": 5.815341679710326e-06, |
|
"loss": 0.6758, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.796483352038908, |
|
"grad_norm": 1.2480541467666626, |
|
"learning_rate": 5.740167065629312e-06, |
|
"loss": 0.6605, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 7.811447811447811, |
|
"grad_norm": 1.2479559183120728, |
|
"learning_rate": 5.665418440696202e-06, |
|
"loss": 0.6348, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 7.826412270856715, |
|
"grad_norm": 1.3373992443084717, |
|
"learning_rate": 5.591097458201699e-06, |
|
"loss": 0.746, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 7.841376730265619, |
|
"grad_norm": 1.3737704753875732, |
|
"learning_rate": 5.51720576197794e-06, |
|
"loss": 0.6511, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 7.856341189674523, |
|
"grad_norm": 1.3783513307571411, |
|
"learning_rate": 5.443744986362071e-06, |
|
"loss": 0.6767, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 7.871305649083427, |
|
"grad_norm": 1.2600133419036865, |
|
"learning_rate": 5.370716756160157e-06, |
|
"loss": 0.6918, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 7.88627010849233, |
|
"grad_norm": 1.254599928855896, |
|
"learning_rate": 5.298122686611212e-06, |
|
"loss": 0.7017, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 7.901234567901234, |
|
"grad_norm": 1.2620840072631836, |
|
"learning_rate": 5.2259643833514896e-06, |
|
"loss": 0.7181, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 7.916199027310139, |
|
"grad_norm": 1.2185419797897339, |
|
"learning_rate": 5.154243442378934e-06, |
|
"loss": 0.7121, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 7.931163486719043, |
|
"grad_norm": 1.360809564590454, |
|
"learning_rate": 5.082961450017943e-06, |
|
"loss": 0.6642, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 7.946127946127946, |
|
"grad_norm": 1.3635886907577515, |
|
"learning_rate": 5.012119982884209e-06, |
|
"loss": 0.7676, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 7.96109240553685, |
|
"grad_norm": 1.37740159034729, |
|
"learning_rate": 4.9417206078499115e-06, |
|
"loss": 0.6912, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 7.976056864945754, |
|
"grad_norm": 1.2868249416351318, |
|
"learning_rate": 4.871764882009025e-06, |
|
"loss": 0.6582, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 7.991021324354658, |
|
"grad_norm": 1.4278684854507446, |
|
"learning_rate": 4.802254352642882e-06, |
|
"loss": 0.6806, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 8.005985783763562, |
|
"grad_norm": 1.2541025876998901, |
|
"learning_rate": 4.7331905571859705e-06, |
|
"loss": 0.6896, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 8.020950243172466, |
|
"grad_norm": 1.2635290622711182, |
|
"learning_rate": 4.664575023191886e-06, |
|
"loss": 0.6491, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 8.035914702581369, |
|
"grad_norm": 1.266473412513733, |
|
"learning_rate": 4.5964092682996065e-06, |
|
"loss": 0.6457, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 8.050879161990274, |
|
"grad_norm": 1.4658360481262207, |
|
"learning_rate": 4.528694800199859e-06, |
|
"loss": 0.673, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 8.065843621399177, |
|
"grad_norm": 1.3015804290771484, |
|
"learning_rate": 4.46143311660184e-06, |
|
"loss": 0.661, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 8.080808080808081, |
|
"grad_norm": 1.334692358970642, |
|
"learning_rate": 4.394625705200011e-06, |
|
"loss": 0.7065, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.095772540216984, |
|
"grad_norm": 1.2139922380447388, |
|
"learning_rate": 4.328274043641295e-06, |
|
"loss": 0.7074, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 8.110736999625889, |
|
"grad_norm": 1.2450037002563477, |
|
"learning_rate": 4.262379599492283e-06, |
|
"loss": 0.666, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 8.125701459034792, |
|
"grad_norm": 1.3340483903884888, |
|
"learning_rate": 4.196943830206859e-06, |
|
"loss": 0.6469, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 8.140665918443696, |
|
"grad_norm": 1.3370238542556763, |
|
"learning_rate": 4.131968183093912e-06, |
|
"loss": 0.6642, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 8.1556303778526, |
|
"grad_norm": 1.2851170301437378, |
|
"learning_rate": 4.067454095285362e-06, |
|
"loss": 0.6602, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 8.170594837261504, |
|
"grad_norm": 1.5661766529083252, |
|
"learning_rate": 4.003402993704353e-06, |
|
"loss": 0.6465, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 8.185559296670407, |
|
"grad_norm": 1.2045555114746094, |
|
"learning_rate": 3.939816295033677e-06, |
|
"loss": 0.6823, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 8.200523756079312, |
|
"grad_norm": 1.3167060613632202, |
|
"learning_rate": 3.8766954056844855e-06, |
|
"loss": 0.7163, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 8.215488215488216, |
|
"grad_norm": 1.3332468271255493, |
|
"learning_rate": 3.8140417217651438e-06, |
|
"loss": 0.7558, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 8.23045267489712, |
|
"grad_norm": 1.344228744506836, |
|
"learning_rate": 3.7518566290503626e-06, |
|
"loss": 0.7451, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 8.245417134306024, |
|
"grad_norm": 1.346323847770691, |
|
"learning_rate": 3.690141502950542e-06, |
|
"loss": 0.6998, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 8.260381593714927, |
|
"grad_norm": 1.3617771863937378, |
|
"learning_rate": 3.6288977084813767e-06, |
|
"loss": 0.6885, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 8.275346053123831, |
|
"grad_norm": 1.2529648542404175, |
|
"learning_rate": 3.568126600233615e-06, |
|
"loss": 0.6851, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 8.290310512532734, |
|
"grad_norm": 1.4627494812011719, |
|
"learning_rate": 3.5078295223431536e-06, |
|
"loss": 0.7307, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 8.305274971941639, |
|
"grad_norm": 1.3447396755218506, |
|
"learning_rate": 3.4480078084612677e-06, |
|
"loss": 0.6878, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 8.320239431350542, |
|
"grad_norm": 1.2098687887191772, |
|
"learning_rate": 3.388662781725141e-06, |
|
"loss": 0.6968, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 8.335203890759447, |
|
"grad_norm": 1.2697949409484863, |
|
"learning_rate": 3.3297957547285626e-06, |
|
"loss": 0.7097, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 8.35016835016835, |
|
"grad_norm": 1.344548225402832, |
|
"learning_rate": 3.2714080294929477e-06, |
|
"loss": 0.6899, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 8.365132809577254, |
|
"grad_norm": 1.283050298690796, |
|
"learning_rate": 3.2135008974384874e-06, |
|
"loss": 0.611, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 8.380097268986157, |
|
"grad_norm": 1.4077322483062744, |
|
"learning_rate": 3.1560756393556183e-06, |
|
"loss": 0.6673, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.395061728395062, |
|
"grad_norm": 1.4759045839309692, |
|
"learning_rate": 3.0991335253766934e-06, |
|
"loss": 0.7485, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 8.410026187803966, |
|
"grad_norm": 1.3058305978775024, |
|
"learning_rate": 3.042675814947868e-06, |
|
"loss": 0.6873, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 8.42499064721287, |
|
"grad_norm": 1.3055214881896973, |
|
"learning_rate": 2.986703756801257e-06, |
|
"loss": 0.7064, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 8.439955106621774, |
|
"grad_norm": 1.2436131238937378, |
|
"learning_rate": 2.931218588927315e-06, |
|
"loss": 0.6871, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 8.454919566030677, |
|
"grad_norm": 1.5080686807632446, |
|
"learning_rate": 2.8762215385474633e-06, |
|
"loss": 0.7363, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 8.469884025439582, |
|
"grad_norm": 1.3684037923812866, |
|
"learning_rate": 2.8217138220869187e-06, |
|
"loss": 0.6719, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 8.484848484848484, |
|
"grad_norm": 1.3375248908996582, |
|
"learning_rate": 2.7676966451478214e-06, |
|
"loss": 0.6715, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 8.499812944257389, |
|
"grad_norm": 1.4447715282440186, |
|
"learning_rate": 2.714171202482538e-06, |
|
"loss": 0.6697, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 8.514777403666292, |
|
"grad_norm": 1.4097157716751099, |
|
"learning_rate": 2.661138677967279e-06, |
|
"loss": 0.7199, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 8.529741863075197, |
|
"grad_norm": 1.4371775388717651, |
|
"learning_rate": 2.6086002445758566e-06, |
|
"loss": 0.681, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 8.5447063224841, |
|
"grad_norm": 1.353463053703308, |
|
"learning_rate": 2.5565570643537954e-06, |
|
"loss": 0.6461, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 8.559670781893004, |
|
"grad_norm": 1.2656768560409546, |
|
"learning_rate": 2.505010288392587e-06, |
|
"loss": 0.723, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 8.574635241301909, |
|
"grad_norm": 1.3458527326583862, |
|
"learning_rate": 2.4539610568042657e-06, |
|
"loss": 0.6481, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 8.589599700710812, |
|
"grad_norm": 1.4183650016784668, |
|
"learning_rate": 2.4034104986961627e-06, |
|
"loss": 0.7229, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 8.604564160119716, |
|
"grad_norm": 1.3535906076431274, |
|
"learning_rate": 2.3533597321459516e-06, |
|
"loss": 0.6762, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 8.61952861952862, |
|
"grad_norm": 1.4276947975158691, |
|
"learning_rate": 2.303809864176909e-06, |
|
"loss": 0.6379, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 8.634493078937524, |
|
"grad_norm": 1.312292218208313, |
|
"learning_rate": 2.254761990733445e-06, |
|
"loss": 0.6753, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 8.649457538346427, |
|
"grad_norm": 1.3349074125289917, |
|
"learning_rate": 2.206217196656826e-06, |
|
"loss": 0.7395, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 8.664421997755332, |
|
"grad_norm": 1.367660403251648, |
|
"learning_rate": 2.1581765556612233e-06, |
|
"loss": 0.7564, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 8.679386457164235, |
|
"grad_norm": 1.302215337753296, |
|
"learning_rate": 2.1106411303099455e-06, |
|
"loss": 0.6862, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.69435091657314, |
|
"grad_norm": 1.2132118940353394, |
|
"learning_rate": 2.0636119719919246e-06, |
|
"loss": 0.7351, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 8.709315375982042, |
|
"grad_norm": 1.4168857336044312, |
|
"learning_rate": 2.017090120898485e-06, |
|
"loss": 0.6748, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 8.724279835390947, |
|
"grad_norm": 1.5280455350875854, |
|
"learning_rate": 1.971076606000327e-06, |
|
"loss": 0.6935, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 8.73924429479985, |
|
"grad_norm": 1.440262794494629, |
|
"learning_rate": 1.9255724450247674e-06, |
|
"loss": 0.6629, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 8.754208754208754, |
|
"grad_norm": 1.36149263381958, |
|
"learning_rate": 1.8805786444332092e-06, |
|
"loss": 0.6644, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 8.769173213617659, |
|
"grad_norm": 1.3209813833236694, |
|
"learning_rate": 1.836096199398929e-06, |
|
"loss": 0.6469, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 8.784137673026562, |
|
"grad_norm": 1.3598469495773315, |
|
"learning_rate": 1.7921260937850099e-06, |
|
"loss": 0.646, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 8.799102132435467, |
|
"grad_norm": 1.2802332639694214, |
|
"learning_rate": 1.7486693001226268e-06, |
|
"loss": 0.7487, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 8.81406659184437, |
|
"grad_norm": 1.3110156059265137, |
|
"learning_rate": 1.7057267795895115e-06, |
|
"loss": 0.702, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 8.829031051253274, |
|
"grad_norm": 1.324245572090149, |
|
"learning_rate": 1.6632994819886977e-06, |
|
"loss": 0.6807, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 8.843995510662177, |
|
"grad_norm": 1.2745212316513062, |
|
"learning_rate": 1.6213883457275065e-06, |
|
"loss": 0.6846, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 8.858959970071082, |
|
"grad_norm": 1.4197077751159668, |
|
"learning_rate": 1.579994297796808e-06, |
|
"loss": 0.7325, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 8.873924429479985, |
|
"grad_norm": 1.3314228057861328, |
|
"learning_rate": 1.5391182537505072e-06, |
|
"loss": 0.6899, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 1.3566371202468872, |
|
"learning_rate": 1.4987611176852878e-06, |
|
"loss": 0.6596, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 8.903853348297792, |
|
"grad_norm": 1.3632760047912598, |
|
"learning_rate": 1.4589237822206282e-06, |
|
"loss": 0.7111, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 8.918817807706697, |
|
"grad_norm": 1.4764022827148438, |
|
"learning_rate": 1.419607128479053e-06, |
|
"loss": 0.7168, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 8.9337822671156, |
|
"grad_norm": 1.1871962547302246, |
|
"learning_rate": 1.3808120260666441e-06, |
|
"loss": 0.7182, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 8.948746726524504, |
|
"grad_norm": 1.2561469078063965, |
|
"learning_rate": 1.3425393330538022e-06, |
|
"loss": 0.6455, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 8.963711185933409, |
|
"grad_norm": 1.4918162822723389, |
|
"learning_rate": 1.3047898959562765e-06, |
|
"loss": 0.7042, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 8.978675645342312, |
|
"grad_norm": 1.3534742593765259, |
|
"learning_rate": 1.267564549716435e-06, |
|
"loss": 0.6742, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.993640104751217, |
|
"grad_norm": 1.4959015846252441, |
|
"learning_rate": 1.2308641176848046e-06, |
|
"loss": 0.6838, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 9.00860456416012, |
|
"grad_norm": 1.309097409248352, |
|
"learning_rate": 1.1946894116018404e-06, |
|
"loss": 0.6411, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 9.023569023569024, |
|
"grad_norm": 1.3958250284194946, |
|
"learning_rate": 1.159041231580016e-06, |
|
"loss": 0.7136, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 9.038533482977927, |
|
"grad_norm": 1.307607650756836, |
|
"learning_rate": 1.1239203660860648e-06, |
|
"loss": 0.7436, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 9.053497942386832, |
|
"grad_norm": 1.273493766784668, |
|
"learning_rate": 1.0893275919235945e-06, |
|
"loss": 0.7149, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 9.068462401795735, |
|
"grad_norm": 1.4512149095535278, |
|
"learning_rate": 1.05526367421587e-06, |
|
"loss": 0.7207, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 9.08342686120464, |
|
"grad_norm": 1.3597697019577026, |
|
"learning_rate": 1.0217293663889155e-06, |
|
"loss": 0.6602, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 9.098391320613542, |
|
"grad_norm": 1.4251606464385986, |
|
"learning_rate": 9.88725410154842e-07, |
|
"loss": 0.7312, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 9.113355780022447, |
|
"grad_norm": 1.3595529794692993, |
|
"learning_rate": 9.562525354954193e-07, |
|
"loss": 0.7044, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 9.12832023943135, |
|
"grad_norm": 1.2834125757217407, |
|
"learning_rate": 9.243114606459741e-07, |
|
"loss": 0.7221, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 9.143284698840255, |
|
"grad_norm": 1.3886545896530151, |
|
"learning_rate": 8.92902892079464e-07, |
|
"loss": 0.6504, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 9.158249158249157, |
|
"grad_norm": 1.533457636833191, |
|
"learning_rate": 8.620275244908827e-07, |
|
"loss": 0.6788, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 9.173213617658062, |
|
"grad_norm": 1.493024230003357, |
|
"learning_rate": 8.31686040781865e-07, |
|
"loss": 0.6803, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 9.188178077066967, |
|
"grad_norm": 1.2318785190582275, |
|
"learning_rate": 8.018791120456087e-07, |
|
"loss": 0.6904, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 9.20314253647587, |
|
"grad_norm": 1.4301903247833252, |
|
"learning_rate": 7.726073975520082e-07, |
|
"loss": 0.6777, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 9.218106995884774, |
|
"grad_norm": 1.322068452835083, |
|
"learning_rate": 7.438715447331018e-07, |
|
"loss": 0.685, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 9.233071455293677, |
|
"grad_norm": 1.2603065967559814, |
|
"learning_rate": 7.156721891687202e-07, |
|
"loss": 0.6712, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 9.248035914702582, |
|
"grad_norm": 1.4191964864730835, |
|
"learning_rate": 6.880099545724522e-07, |
|
"loss": 0.7124, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 9.263000374111485, |
|
"grad_norm": 1.411106824874878, |
|
"learning_rate": 6.608854527778319e-07, |
|
"loss": 0.6788, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 9.27796483352039, |
|
"grad_norm": 1.3679730892181396, |
|
"learning_rate": 6.342992837248235e-07, |
|
"loss": 0.69, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.292929292929292, |
|
"grad_norm": 1.2826892137527466, |
|
"learning_rate": 6.082520354465382e-07, |
|
"loss": 0.7124, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 9.307893752338197, |
|
"grad_norm": 1.2693568468093872, |
|
"learning_rate": 5.82744284056233e-07, |
|
"loss": 0.6702, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 9.3228582117471, |
|
"grad_norm": 1.512661099433899, |
|
"learning_rate": 5.577765937345686e-07, |
|
"loss": 0.663, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 9.337822671156005, |
|
"grad_norm": 1.5203378200531006, |
|
"learning_rate": 5.333495167171353e-07, |
|
"loss": 0.6927, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 9.352787130564908, |
|
"grad_norm": 1.541284203529358, |
|
"learning_rate": 5.094635932822223e-07, |
|
"loss": 0.6629, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 9.367751589973812, |
|
"grad_norm": 1.2277456521987915, |
|
"learning_rate": 4.861193517388923e-07, |
|
"loss": 0.7342, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 9.382716049382717, |
|
"grad_norm": 1.3728615045547485, |
|
"learning_rate": 4.6331730841527587e-07, |
|
"loss": 0.6597, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 9.39768050879162, |
|
"grad_norm": 1.2458422183990479, |
|
"learning_rate": 4.4105796764715714e-07, |
|
"loss": 0.6654, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 9.412644968200524, |
|
"grad_norm": 1.41146981716156, |
|
"learning_rate": 4.1934182176683045e-07, |
|
"loss": 0.7134, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 9.427609427609427, |
|
"grad_norm": 1.306872010231018, |
|
"learning_rate": 3.9816935109218413e-07, |
|
"loss": 0.6154, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 9.442573887018332, |
|
"grad_norm": 1.3811511993408203, |
|
"learning_rate": 3.7754102391611424e-07, |
|
"loss": 0.6862, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 9.457538346427235, |
|
"grad_norm": 1.2896925210952759, |
|
"learning_rate": 3.5745729649613034e-07, |
|
"loss": 0.6778, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 9.47250280583614, |
|
"grad_norm": 1.1952729225158691, |
|
"learning_rate": 3.3791861304428574e-07, |
|
"loss": 0.6891, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 9.487467265245042, |
|
"grad_norm": 1.3523303270339966, |
|
"learning_rate": 3.189254057173491e-07, |
|
"loss": 0.6576, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 9.502431724653947, |
|
"grad_norm": 1.3650611639022827, |
|
"learning_rate": 3.004780946072372e-07, |
|
"loss": 0.6533, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 9.51739618406285, |
|
"grad_norm": 1.2557883262634277, |
|
"learning_rate": 2.825770877317363e-07, |
|
"loss": 0.7639, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 9.532360643471755, |
|
"grad_norm": 1.24583899974823, |
|
"learning_rate": 2.6522278102546485e-07, |
|
"loss": 0.6856, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 9.547325102880658, |
|
"grad_norm": 1.48171865940094, |
|
"learning_rate": 2.484155583311276e-07, |
|
"loss": 0.6486, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 9.562289562289562, |
|
"grad_norm": 1.2459851503372192, |
|
"learning_rate": 2.3215579139101996e-07, |
|
"loss": 0.6377, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 9.577254021698467, |
|
"grad_norm": 1.1139715909957886, |
|
"learning_rate": 2.1644383983880357e-07, |
|
"loss": 0.6703, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.59221848110737, |
|
"grad_norm": 1.4323070049285889, |
|
"learning_rate": 2.012800511915547e-07, |
|
"loss": 0.6743, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 9.607182940516275, |
|
"grad_norm": 1.3250705003738403, |
|
"learning_rate": 1.8666476084208129e-07, |
|
"loss": 0.7117, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 9.622147399925177, |
|
"grad_norm": 1.4704447984695435, |
|
"learning_rate": 1.7259829205149568e-07, |
|
"loss": 0.6817, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 9.637111859334082, |
|
"grad_norm": 1.2608110904693604, |
|
"learning_rate": 1.5908095594207583e-07, |
|
"loss": 0.7122, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 9.652076318742985, |
|
"grad_norm": 1.4275965690612793, |
|
"learning_rate": 1.4611305149037358e-07, |
|
"loss": 0.6386, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 9.66704077815189, |
|
"grad_norm": 1.163145899772644, |
|
"learning_rate": 1.336948655206144e-07, |
|
"loss": 0.6882, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 9.682005237560793, |
|
"grad_norm": 1.4491647481918335, |
|
"learning_rate": 1.218266726983386e-07, |
|
"loss": 0.6826, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 9.696969696969697, |
|
"grad_norm": 1.3578821420669556, |
|
"learning_rate": 1.1050873552433394e-07, |
|
"loss": 0.7251, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 9.7119341563786, |
|
"grad_norm": 1.2743161916732788, |
|
"learning_rate": 9.974130432883199e-08, |
|
"loss": 0.7072, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 9.726898615787505, |
|
"grad_norm": 1.2915595769882202, |
|
"learning_rate": 8.952461726596528e-08, |
|
"loss": 0.6555, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 9.741863075196408, |
|
"grad_norm": 1.2591161727905273, |
|
"learning_rate": 7.985890030850762e-08, |
|
"loss": 0.6642, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 9.756827534605312, |
|
"grad_norm": 1.373780369758606, |
|
"learning_rate": 7.074436724286704e-08, |
|
"loss": 0.6987, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 9.771791994014217, |
|
"grad_norm": 1.3390823602676392, |
|
"learning_rate": 6.218121966436175e-08, |
|
"loss": 0.7699, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 9.78675645342312, |
|
"grad_norm": 1.4248472452163696, |
|
"learning_rate": 5.416964697276261e-08, |
|
"loss": 0.6654, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 9.801720912832025, |
|
"grad_norm": 1.315335988998413, |
|
"learning_rate": 4.670982636810761e-08, |
|
"loss": 0.6681, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 9.816685372240928, |
|
"grad_norm": 1.28786039352417, |
|
"learning_rate": 3.9801922846766095e-08, |
|
"loss": 0.7033, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 9.831649831649832, |
|
"grad_norm": 1.4623719453811646, |
|
"learning_rate": 3.3446089197805565e-08, |
|
"loss": 0.6899, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 9.846614291058735, |
|
"grad_norm": 1.390443205833435, |
|
"learning_rate": 2.7642465999613842e-08, |
|
"loss": 0.6837, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 9.86157875046764, |
|
"grad_norm": 1.2957769632339478, |
|
"learning_rate": 2.2391181616776556e-08, |
|
"loss": 0.6578, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"grad_norm": 1.454103708267212, |
|
"learning_rate": 1.7692352197240526e-08, |
|
"loss": 0.6546, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.891507669285447, |
|
"grad_norm": 1.2412161827087402, |
|
"learning_rate": 1.354608166976301e-08, |
|
"loss": 0.6437, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 9.90647212869435, |
|
"grad_norm": 1.3440452814102173, |
|
"learning_rate": 9.952461741585817e-09, |
|
"loss": 0.726, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 9.921436588103255, |
|
"grad_norm": 1.402801513671875, |
|
"learning_rate": 6.9115718964257726e-09, |
|
"loss": 0.6458, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 9.936401047512158, |
|
"grad_norm": 1.253630518913269, |
|
"learning_rate": 4.423479392709484e-09, |
|
"loss": 0.6936, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 9.951365506921062, |
|
"grad_norm": 1.3306463956832886, |
|
"learning_rate": 2.48823926208841e-09, |
|
"loss": 0.7048, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 9.966329966329967, |
|
"grad_norm": 1.2049169540405273, |
|
"learning_rate": 1.10589430822039e-09, |
|
"loss": 0.6899, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 9.98129442573887, |
|
"grad_norm": 1.4228817224502563, |
|
"learning_rate": 2.764751058259574e-10, |
|
"loss": 0.7091, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 9.996258885147775, |
|
"grad_norm": 1.234875202178955, |
|
"learning_rate": 0.0, |
|
"loss": 0.6954, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 9.996258885147775, |
|
"step": 3340, |
|
"total_flos": 1.2597949543307674e+18, |
|
"train_loss": 0.7376079930516775, |
|
"train_runtime": 29450.1993, |
|
"train_samples_per_second": 1.815, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 1.2597949543307674e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|