diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12768 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.999905824739841, + "eval_steps": 500, + "global_step": 10618, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.999994528653223e-06, + "loss": 1.8507, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 9.999978114624865e-06, + "loss": 1.4021, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 9.999950757950849e-06, + "loss": 1.3668, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 9.999912458691045e-06, + "loss": 1.3339, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 9.999863216929276e-06, + "loss": 1.3698, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 9.999803032773304e-06, + "loss": 1.3425, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 9.999731906354849e-06, + "loss": 1.2789, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 9.999649837829572e-06, + "loss": 1.3366, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 9.999556827377083e-06, + "loss": 1.3464, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 9.99945287520094e-06, + "loss": 1.3353, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 9.999337981528646e-06, + "loss": 1.3226, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 9.99921214661165e-06, + "loss": 1.2716, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 9.999075370725346e-06, + "loss": 1.3135, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 9.998927654169074e-06, + "loss": 1.3213, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 9.998768997266117e-06, + "loss": 1.3275, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 9.998599400363704e-06, + "loss": 1.3679, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 9.998418863832999e-06, + "loss": 1.2311, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 9.99822738806912e-06, + "loss": 1.3258, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 9.998024973491114e-06, + "loss": 1.2636, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 9.997811620541976e-06, + "loss": 1.3358, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 9.997587329688637e-06, + "loss": 1.236, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 9.997352101421962e-06, + "loss": 1.347, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 9.997105936256764e-06, + "loss": 1.3071, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 9.99684883473178e-06, + "loss": 1.3087, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 9.996580797409687e-06, + "loss": 1.3576, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 9.996301824877099e-06, + "loss": 1.3194, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 9.996011917744553e-06, + "loss": 1.3217, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 9.995711076646526e-06, + "loss": 1.2644, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 9.995399302241418e-06, + "loss": 1.2433, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 9.99507659521156e-06, + "loss": 1.2806, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 9.994742956263208e-06, + "loss": 1.2965, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 9.994398386126545e-06, + "loss": 1.3131, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 9.994042885555676e-06, + "loss": 1.2713, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 9.993676455328629e-06, + "loss": 1.3255, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 9.993299096247348e-06, + "loss": 1.3215, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 9.9929108091377e-06, + "loss": 1.2773, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 9.992511594849465e-06, + "loss": 1.3232, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 9.99210145425634e-06, + "loss": 1.3358, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 9.991680388255934e-06, + "loss": 1.3245, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 9.991248397769763e-06, + "loss": 1.3153, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 9.990805483743258e-06, + "loss": 1.3301, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 9.990351647145753e-06, + "loss": 1.3016, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 9.989886888970488e-06, + "loss": 1.2853, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 9.9894112102346e-06, + "loss": 1.2947, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 9.988924611979137e-06, + "loss": 1.3119, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 9.988427095269031e-06, + "loss": 1.2601, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 9.987918661193121e-06, + "loss": 1.2447, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 9.987399310864135e-06, + "loss": 1.2965, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 9.986869045418689e-06, + "loss": 1.2928, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 9.98632786601729e-06, + "loss": 1.3183, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 9.98577577384433e-06, + "loss": 1.3156, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 9.985212770108087e-06, + "loss": 1.1941, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 9.984638856040712e-06, + "loss": 1.2811, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 9.98405403289824e-06, + "loss": 1.3099, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 9.98345830196058e-06, + "loss": 1.319, + "step": 275 + }, + { + "epoch": 0.05, + "learning_rate": 9.982851664531511e-06, + "loss": 1.2916, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 9.982234121938684e-06, + "loss": 1.2767, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 9.981605675533612e-06, + "loss": 1.3185, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 9.980966326691677e-06, + "loss": 1.2588, + "step": 295 + }, + { + "epoch": 0.06, + "learning_rate": 9.98031607681212e-06, + "loss": 1.2949, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 9.979654927318033e-06, + "loss": 1.2925, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 9.97898287965637e-06, + "loss": 1.2809, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 9.978299935297935e-06, + "loss": 1.3291, + "step": 315 + }, + { + "epoch": 0.06, + "learning_rate": 9.977606095737375e-06, + "loss": 1.302, + "step": 320 + }, + { + "epoch": 0.06, + "learning_rate": 9.976901362493188e-06, + "loss": 1.2554, + "step": 325 + }, + { + "epoch": 0.06, + "learning_rate": 9.976185737107708e-06, + "loss": 1.2703, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 9.975459221147108e-06, + "loss": 1.2658, + "step": 335 + }, + { + "epoch": 0.06, + "learning_rate": 9.9747218162014e-06, + "loss": 1.2915, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 9.973973523884419e-06, + "loss": 1.2699, + "step": 345 + }, + { + "epoch": 0.07, + "learning_rate": 9.973214345833833e-06, + "loss": 1.3081, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 9.972444283711133e-06, + "loss": 1.2531, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 9.971663339201632e-06, + "loss": 1.2416, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 9.970871514014453e-06, + "loss": 1.2025, + "step": 365 + }, + { + "epoch": 0.07, + "learning_rate": 9.970068809882538e-06, + "loss": 1.3003, + "step": 370 + }, + { + "epoch": 0.07, + "learning_rate": 9.969255228562637e-06, + "loss": 1.2742, + "step": 375 + }, + { + "epoch": 0.07, + "learning_rate": 9.968430771835305e-06, + "loss": 1.3026, + "step": 380 + }, + { + "epoch": 0.07, + "learning_rate": 9.967595441504894e-06, + "loss": 1.306, + "step": 385 + }, + { + "epoch": 0.07, + "learning_rate": 9.966749239399558e-06, + "loss": 1.2697, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 9.965892167371244e-06, + "loss": 1.3176, + "step": 395 + }, + { + "epoch": 0.08, + "learning_rate": 9.965024227295688e-06, + "loss": 1.2822, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 9.964145421072409e-06, + "loss": 1.2875, + "step": 405 + }, + { + "epoch": 0.08, + "learning_rate": 9.96325575062471e-06, + "loss": 1.2609, + "step": 410 + }, + { + "epoch": 0.08, + "learning_rate": 9.962355217899666e-06, + "loss": 1.2669, + "step": 415 + }, + { + "epoch": 0.08, + "learning_rate": 9.961443824868131e-06, + "loss": 1.2581, + "step": 420 + }, + { + "epoch": 0.08, + "learning_rate": 9.960521573524722e-06, + "loss": 1.2905, + "step": 425 + }, + { + "epoch": 0.08, + "learning_rate": 9.959588465887824e-06, + "loss": 1.2999, + "step": 430 + }, + { + "epoch": 0.08, + "learning_rate": 9.958644503999578e-06, + "loss": 1.3133, + "step": 435 + }, + { + "epoch": 0.08, + "learning_rate": 9.95768968992588e-06, + "loss": 1.3436, + "step": 440 + }, + { + "epoch": 0.08, + "learning_rate": 9.956724025756378e-06, + "loss": 1.2272, + "step": 445 + }, + { + "epoch": 0.08, + "learning_rate": 9.955747513604466e-06, + "loss": 1.3054, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 9.95476015560728e-06, + "loss": 1.2063, + "step": 455 + }, + { + "epoch": 0.09, + "learning_rate": 9.953761953925687e-06, + "loss": 1.263, + "step": 460 + }, + { + "epoch": 0.09, + "learning_rate": 9.952752910744295e-06, + "loss": 1.2657, + "step": 465 + }, + { + "epoch": 0.09, + "learning_rate": 9.95173302827143e-06, + "loss": 1.2713, + "step": 470 + }, + { + "epoch": 0.09, + "learning_rate": 9.950702308739149e-06, + "loss": 1.3124, + "step": 475 + }, + { + "epoch": 0.09, + "learning_rate": 9.949660754403216e-06, + "loss": 1.303, + "step": 480 + }, + { + "epoch": 0.09, + "learning_rate": 9.948608367543115e-06, + "loss": 1.2714, + "step": 485 + }, + { + "epoch": 0.09, + "learning_rate": 9.94754515046204e-06, + "loss": 1.2749, + "step": 490 + }, + { + "epoch": 0.09, + "learning_rate": 9.946471105486874e-06, + "loss": 1.281, + "step": 495 + }, + { + "epoch": 0.09, + "learning_rate": 9.945386234968213e-06, + "loss": 1.2042, + "step": 500 + }, + { + "epoch": 0.1, + "learning_rate": 9.944290541280337e-06, + "loss": 1.2807, + "step": 505 + }, + { + "epoch": 0.1, + "learning_rate": 9.943184026821211e-06, + "loss": 1.2698, + "step": 510 + }, + { + "epoch": 0.1, + "learning_rate": 9.94206669401249e-06, + "loss": 1.2716, + "step": 515 + }, + { + "epoch": 0.1, + "learning_rate": 9.940938545299492e-06, + "loss": 1.2998, + "step": 520 + }, + { + "epoch": 0.1, + "learning_rate": 9.939799583151222e-06, + "loss": 1.2695, + "step": 525 + }, + { + "epoch": 0.1, + "learning_rate": 9.93864981006034e-06, + "loss": 1.2706, + "step": 530 + }, + { + "epoch": 0.1, + "learning_rate": 9.937489228543166e-06, + "loss": 1.2435, + "step": 535 + }, + { + "epoch": 0.1, + "learning_rate": 9.936317841139682e-06, + "loss": 1.2424, + "step": 540 + }, + { + "epoch": 0.1, + "learning_rate": 9.935135650413512e-06, + "loss": 1.2579, + "step": 545 + }, + { + "epoch": 0.1, + "learning_rate": 9.933942658951927e-06, + "loss": 1.3081, + "step": 550 + }, + { + "epoch": 0.1, + "learning_rate": 9.932738869365836e-06, + "loss": 1.2869, + "step": 555 + }, + { + "epoch": 0.11, + "learning_rate": 9.931524284289777e-06, + "loss": 1.2493, + "step": 560 + }, + { + "epoch": 0.11, + "learning_rate": 9.930298906381918e-06, + "loss": 1.2675, + "step": 565 + }, + { + "epoch": 0.11, + "learning_rate": 9.929062738324047e-06, + "loss": 1.2846, + "step": 570 + }, + { + "epoch": 0.11, + "learning_rate": 9.927815782821561e-06, + "loss": 1.2714, + "step": 575 + }, + { + "epoch": 0.11, + "learning_rate": 9.926558042603476e-06, + "loss": 1.2439, + "step": 580 + }, + { + "epoch": 0.11, + "learning_rate": 9.925289520422402e-06, + "loss": 1.2844, + "step": 585 + }, + { + "epoch": 0.11, + "learning_rate": 9.924010219054552e-06, + "loss": 1.2298, + "step": 590 + }, + { + "epoch": 0.11, + "learning_rate": 9.922720141299721e-06, + "loss": 1.2103, + "step": 595 + }, + { + "epoch": 0.11, + "learning_rate": 9.921419289981297e-06, + "loss": 1.2766, + "step": 600 + }, + { + "epoch": 0.11, + "learning_rate": 9.920107667946246e-06, + "loss": 1.2139, + "step": 605 + }, + { + "epoch": 0.11, + "learning_rate": 9.918785278065102e-06, + "loss": 1.2914, + "step": 610 + }, + { + "epoch": 0.12, + "learning_rate": 9.917452123231964e-06, + "loss": 1.2117, + "step": 615 + }, + { + "epoch": 0.12, + "learning_rate": 9.916108206364495e-06, + "loss": 1.2564, + "step": 620 + }, + { + "epoch": 0.12, + "learning_rate": 9.914753530403909e-06, + "loss": 1.2346, + "step": 625 + }, + { + "epoch": 0.12, + "learning_rate": 9.913388098314966e-06, + "loss": 1.2285, + "step": 630 + }, + { + "epoch": 0.12, + "learning_rate": 9.91201191308597e-06, + "loss": 1.2825, + "step": 635 + }, + { + "epoch": 0.12, + "learning_rate": 9.91062497772875e-06, + "loss": 1.2464, + "step": 640 + }, + { + "epoch": 0.12, + "learning_rate": 9.909227295278674e-06, + "loss": 1.2736, + "step": 645 + }, + { + "epoch": 0.12, + "learning_rate": 9.907818868794619e-06, + "loss": 1.3481, + "step": 650 + }, + { + "epoch": 0.12, + "learning_rate": 9.906399701358984e-06, + "loss": 1.3355, + "step": 655 + }, + { + "epoch": 0.12, + "learning_rate": 9.904969796077672e-06, + "loss": 1.2001, + "step": 660 + }, + { + "epoch": 0.13, + "learning_rate": 9.903529156080083e-06, + "loss": 1.2797, + "step": 665 + }, + { + "epoch": 0.13, + "learning_rate": 9.902077784519117e-06, + "loss": 1.2513, + "step": 670 + }, + { + "epoch": 0.13, + "learning_rate": 9.900615684571155e-06, + "loss": 1.2671, + "step": 675 + }, + { + "epoch": 0.13, + "learning_rate": 9.899142859436057e-06, + "loss": 1.241, + "step": 680 + }, + { + "epoch": 0.13, + "learning_rate": 9.897659312337163e-06, + "loss": 1.2976, + "step": 685 + }, + { + "epoch": 0.13, + "learning_rate": 9.896165046521272e-06, + "loss": 1.2553, + "step": 690 + }, + { + "epoch": 0.13, + "learning_rate": 9.894660065258638e-06, + "loss": 1.2546, + "step": 695 + }, + { + "epoch": 0.13, + "learning_rate": 9.893144371842975e-06, + "loss": 1.2879, + "step": 700 + }, + { + "epoch": 0.13, + "learning_rate": 9.891617969591435e-06, + "loss": 1.2645, + "step": 705 + }, + { + "epoch": 0.13, + "learning_rate": 9.890080861844612e-06, + "loss": 1.2744, + "step": 710 + }, + { + "epoch": 0.13, + "learning_rate": 9.88853305196652e-06, + "loss": 1.2994, + "step": 715 + }, + { + "epoch": 0.14, + "learning_rate": 9.886974543344604e-06, + "loss": 1.2771, + "step": 720 + }, + { + "epoch": 0.14, + "learning_rate": 9.885405339389721e-06, + "loss": 1.2538, + "step": 725 + }, + { + "epoch": 0.14, + "learning_rate": 9.883825443536133e-06, + "loss": 1.2017, + "step": 730 + }, + { + "epoch": 0.14, + "learning_rate": 9.882234859241506e-06, + "loss": 1.2562, + "step": 735 + }, + { + "epoch": 0.14, + "learning_rate": 9.880633589986891e-06, + "loss": 1.305, + "step": 740 + }, + { + "epoch": 0.14, + "learning_rate": 9.879021639276732e-06, + "loss": 1.2444, + "step": 745 + }, + { + "epoch": 0.14, + "learning_rate": 9.877399010638842e-06, + "loss": 1.2616, + "step": 750 + }, + { + "epoch": 0.14, + "learning_rate": 9.875765707624408e-06, + "loss": 1.3, + "step": 755 + }, + { + "epoch": 0.14, + "learning_rate": 9.874121733807978e-06, + "loss": 1.2517, + "step": 760 + }, + { + "epoch": 0.14, + "learning_rate": 9.872467092787452e-06, + "loss": 1.2363, + "step": 765 + }, + { + "epoch": 0.15, + "learning_rate": 9.870801788184074e-06, + "loss": 1.2549, + "step": 770 + }, + { + "epoch": 0.15, + "learning_rate": 9.86912582364243e-06, + "loss": 1.2361, + "step": 775 + }, + { + "epoch": 0.15, + "learning_rate": 9.867439202830431e-06, + "loss": 1.2699, + "step": 780 + }, + { + "epoch": 0.15, + "learning_rate": 9.865741929439314e-06, + "loss": 1.2812, + "step": 785 + }, + { + "epoch": 0.15, + "learning_rate": 9.864034007183628e-06, + "loss": 1.2381, + "step": 790 + }, + { + "epoch": 0.15, + "learning_rate": 9.862315439801224e-06, + "loss": 1.2707, + "step": 795 + }, + { + "epoch": 0.15, + "learning_rate": 9.860586231053254e-06, + "loss": 1.2527, + "step": 800 + }, + { + "epoch": 0.15, + "learning_rate": 9.85884638472416e-06, + "loss": 1.2304, + "step": 805 + }, + { + "epoch": 0.15, + "learning_rate": 9.857095904621662e-06, + "loss": 1.2258, + "step": 810 + }, + { + "epoch": 0.15, + "learning_rate": 9.855334794576756e-06, + "loss": 1.2851, + "step": 815 + }, + { + "epoch": 0.15, + "learning_rate": 9.853563058443697e-06, + "loss": 1.2311, + "step": 820 + }, + { + "epoch": 0.16, + "learning_rate": 9.851780700099995e-06, + "loss": 1.245, + "step": 825 + }, + { + "epoch": 0.16, + "learning_rate": 9.849987723446415e-06, + "loss": 1.2513, + "step": 830 + }, + { + "epoch": 0.16, + "learning_rate": 9.848184132406955e-06, + "loss": 1.2234, + "step": 835 + }, + { + "epoch": 0.16, + "learning_rate": 9.846369930928841e-06, + "loss": 1.2398, + "step": 840 + }, + { + "epoch": 0.16, + "learning_rate": 9.844545122982528e-06, + "loss": 1.274, + "step": 845 + }, + { + "epoch": 0.16, + "learning_rate": 9.842709712561675e-06, + "loss": 1.2703, + "step": 850 + }, + { + "epoch": 0.16, + "learning_rate": 9.84086370368315e-06, + "loss": 1.2394, + "step": 855 + }, + { + "epoch": 0.16, + "learning_rate": 9.839007100387015e-06, + "loss": 1.2349, + "step": 860 + }, + { + "epoch": 0.16, + "learning_rate": 9.837139906736516e-06, + "loss": 1.2681, + "step": 865 + }, + { + "epoch": 0.16, + "learning_rate": 9.835262126818084e-06, + "loss": 1.2424, + "step": 870 + }, + { + "epoch": 0.16, + "learning_rate": 9.833373764741307e-06, + "loss": 1.2286, + "step": 875 + }, + { + "epoch": 0.17, + "learning_rate": 9.831474824638942e-06, + "loss": 1.2393, + "step": 880 + }, + { + "epoch": 0.17, + "learning_rate": 9.829565310666893e-06, + "loss": 1.2585, + "step": 885 + }, + { + "epoch": 0.17, + "learning_rate": 9.827645227004203e-06, + "loss": 1.1964, + "step": 890 + }, + { + "epoch": 0.17, + "learning_rate": 9.825714577853052e-06, + "loss": 1.2804, + "step": 895 + }, + { + "epoch": 0.17, + "learning_rate": 9.823773367438737e-06, + "loss": 1.2708, + "step": 900 + }, + { + "epoch": 0.17, + "learning_rate": 9.821821600009675e-06, + "loss": 1.2707, + "step": 905 + }, + { + "epoch": 0.17, + "learning_rate": 9.819859279837385e-06, + "loss": 1.2573, + "step": 910 + }, + { + "epoch": 0.17, + "learning_rate": 9.81788641121648e-06, + "loss": 1.2277, + "step": 915 + }, + { + "epoch": 0.17, + "learning_rate": 9.815902998464656e-06, + "loss": 1.2458, + "step": 920 + }, + { + "epoch": 0.17, + "learning_rate": 9.813909045922693e-06, + "loss": 1.2527, + "step": 925 + }, + { + "epoch": 0.18, + "learning_rate": 9.811904557954432e-06, + "loss": 1.2622, + "step": 930 + }, + { + "epoch": 0.18, + "learning_rate": 9.80988953894677e-06, + "loss": 1.2964, + "step": 935 + }, + { + "epoch": 0.18, + "learning_rate": 9.807863993309659e-06, + "loss": 1.2748, + "step": 940 + }, + { + "epoch": 0.18, + "learning_rate": 9.805827925476081e-06, + "loss": 1.2746, + "step": 945 + }, + { + "epoch": 0.18, + "learning_rate": 9.80378133990205e-06, + "loss": 1.2681, + "step": 950 + }, + { + "epoch": 0.18, + "learning_rate": 9.801724241066597e-06, + "loss": 1.2258, + "step": 955 + }, + { + "epoch": 0.18, + "learning_rate": 9.799656633471762e-06, + "loss": 1.2985, + "step": 960 + }, + { + "epoch": 0.18, + "learning_rate": 9.797578521642585e-06, + "loss": 1.2894, + "step": 965 + }, + { + "epoch": 0.18, + "learning_rate": 9.795489910127096e-06, + "loss": 1.254, + "step": 970 + }, + { + "epoch": 0.18, + "learning_rate": 9.793390803496299e-06, + "loss": 1.2879, + "step": 975 + }, + { + "epoch": 0.18, + "learning_rate": 9.791281206344172e-06, + "loss": 1.2504, + "step": 980 + }, + { + "epoch": 0.19, + "learning_rate": 9.789161123287649e-06, + "loss": 1.2561, + "step": 985 + }, + { + "epoch": 0.19, + "learning_rate": 9.787030558966615e-06, + "loss": 1.2503, + "step": 990 + }, + { + "epoch": 0.19, + "learning_rate": 9.784889518043892e-06, + "loss": 1.2776, + "step": 995 + }, + { + "epoch": 0.19, + "learning_rate": 9.782738005205232e-06, + "loss": 1.2433, + "step": 1000 + }, + { + "epoch": 0.19, + "learning_rate": 9.780576025159301e-06, + "loss": 1.2934, + "step": 1005 + }, + { + "epoch": 0.19, + "learning_rate": 9.778403582637679e-06, + "loss": 1.2654, + "step": 1010 + }, + { + "epoch": 0.19, + "learning_rate": 9.776220682394841e-06, + "loss": 1.2501, + "step": 1015 + }, + { + "epoch": 0.19, + "learning_rate": 9.774027329208144e-06, + "loss": 1.2686, + "step": 1020 + }, + { + "epoch": 0.19, + "learning_rate": 9.771823527877832e-06, + "loss": 1.2319, + "step": 1025 + }, + { + "epoch": 0.19, + "learning_rate": 9.769609283227006e-06, + "loss": 1.2093, + "step": 1030 + }, + { + "epoch": 0.19, + "learning_rate": 9.767384600101629e-06, + "loss": 1.2621, + "step": 1035 + }, + { + "epoch": 0.2, + "learning_rate": 9.765149483370503e-06, + "loss": 1.2476, + "step": 1040 + }, + { + "epoch": 0.2, + "learning_rate": 9.76290393792527e-06, + "loss": 1.2902, + "step": 1045 + }, + { + "epoch": 0.2, + "learning_rate": 9.76064796868039e-06, + "loss": 1.2564, + "step": 1050 + }, + { + "epoch": 0.2, + "learning_rate": 9.758381580573142e-06, + "loss": 1.2317, + "step": 1055 + }, + { + "epoch": 0.2, + "learning_rate": 9.756104778563604e-06, + "loss": 1.2248, + "step": 1060 + }, + { + "epoch": 0.2, + "learning_rate": 9.753817567634645e-06, + "loss": 1.2295, + "step": 1065 + }, + { + "epoch": 0.2, + "learning_rate": 9.751519952791912e-06, + "loss": 1.2673, + "step": 1070 + }, + { + "epoch": 0.2, + "learning_rate": 9.749211939063827e-06, + "loss": 1.2297, + "step": 1075 + }, + { + "epoch": 0.2, + "learning_rate": 9.746893531501567e-06, + "loss": 1.2802, + "step": 1080 + }, + { + "epoch": 0.2, + "learning_rate": 9.744564735179058e-06, + "loss": 1.2593, + "step": 1085 + }, + { + "epoch": 0.21, + "learning_rate": 9.742225555192956e-06, + "loss": 1.1905, + "step": 1090 + }, + { + "epoch": 0.21, + "learning_rate": 9.739875996662652e-06, + "loss": 1.2163, + "step": 1095 + }, + { + "epoch": 0.21, + "learning_rate": 9.737516064730244e-06, + "loss": 1.1864, + "step": 1100 + }, + { + "epoch": 0.21, + "learning_rate": 9.735145764560535e-06, + "loss": 1.3442, + "step": 1105 + }, + { + "epoch": 0.21, + "learning_rate": 9.732765101341014e-06, + "loss": 1.302, + "step": 1110 + }, + { + "epoch": 0.21, + "learning_rate": 9.730374080281864e-06, + "loss": 1.1606, + "step": 1115 + }, + { + "epoch": 0.21, + "learning_rate": 9.727972706615917e-06, + "loss": 1.2689, + "step": 1120 + }, + { + "epoch": 0.21, + "learning_rate": 9.72556098559868e-06, + "loss": 1.2357, + "step": 1125 + }, + { + "epoch": 0.21, + "learning_rate": 9.723138922508295e-06, + "loss": 1.2881, + "step": 1130 + }, + { + "epoch": 0.21, + "learning_rate": 9.72070652264554e-06, + "loss": 1.2428, + "step": 1135 + }, + { + "epoch": 0.21, + "learning_rate": 9.718263791333816e-06, + "loss": 1.2177, + "step": 1140 + }, + { + "epoch": 0.22, + "learning_rate": 9.715810733919138e-06, + "loss": 1.2671, + "step": 1145 + }, + { + "epoch": 0.22, + "learning_rate": 9.713347355770114e-06, + "loss": 1.2208, + "step": 1150 + }, + { + "epoch": 0.22, + "learning_rate": 9.710873662277943e-06, + "loss": 1.2466, + "step": 1155 + }, + { + "epoch": 0.22, + "learning_rate": 9.708389658856399e-06, + "loss": 1.2036, + "step": 1160 + }, + { + "epoch": 0.22, + "learning_rate": 9.70589535094182e-06, + "loss": 1.2119, + "step": 1165 + }, + { + "epoch": 0.22, + "learning_rate": 9.703390743993095e-06, + "loss": 1.2693, + "step": 1170 + }, + { + "epoch": 0.22, + "learning_rate": 9.700875843491655e-06, + "loss": 1.2261, + "step": 1175 + }, + { + "epoch": 0.22, + "learning_rate": 9.698350654941457e-06, + "loss": 1.3165, + "step": 1180 + }, + { + "epoch": 0.22, + "learning_rate": 9.695815183868971e-06, + "loss": 1.2342, + "step": 1185 + }, + { + "epoch": 0.22, + "learning_rate": 9.693269435823176e-06, + "loss": 1.1965, + "step": 1190 + }, + { + "epoch": 0.23, + "learning_rate": 9.69071341637554e-06, + "loss": 1.312, + "step": 1195 + }, + { + "epoch": 0.23, + "learning_rate": 9.688147131120009e-06, + "loss": 1.2269, + "step": 1200 + }, + { + "epoch": 0.23, + "learning_rate": 9.685570585672999e-06, + "loss": 1.297, + "step": 1205 + }, + { + "epoch": 0.23, + "learning_rate": 9.68298378567338e-06, + "loss": 1.2408, + "step": 1210 + }, + { + "epoch": 0.23, + "learning_rate": 9.680386736782463e-06, + "loss": 1.2226, + "step": 1215 + }, + { + "epoch": 0.23, + "learning_rate": 9.677779444683988e-06, + "loss": 1.2508, + "step": 1220 + }, + { + "epoch": 0.23, + "learning_rate": 9.67516191508412e-06, + "loss": 1.2752, + "step": 1225 + }, + { + "epoch": 0.23, + "learning_rate": 9.672534153711417e-06, + "loss": 1.222, + "step": 1230 + }, + { + "epoch": 0.23, + "learning_rate": 9.669896166316843e-06, + "loss": 1.306, + "step": 1235 + }, + { + "epoch": 0.23, + "learning_rate": 9.667247958673731e-06, + "loss": 1.2811, + "step": 1240 + }, + { + "epoch": 0.23, + "learning_rate": 9.664589536577788e-06, + "loss": 1.3114, + "step": 1245 + }, + { + "epoch": 0.24, + "learning_rate": 9.661920905847072e-06, + "loss": 1.2425, + "step": 1250 + }, + { + "epoch": 0.24, + "learning_rate": 9.659242072321988e-06, + "loss": 1.2564, + "step": 1255 + }, + { + "epoch": 0.24, + "learning_rate": 9.656553041865264e-06, + "loss": 1.2775, + "step": 1260 + }, + { + "epoch": 0.24, + "learning_rate": 9.653853820361949e-06, + "loss": 1.2803, + "step": 1265 + }, + { + "epoch": 0.24, + "learning_rate": 9.651144413719393e-06, + "loss": 1.2335, + "step": 1270 + }, + { + "epoch": 0.24, + "learning_rate": 9.648424827867236e-06, + "loss": 1.2067, + "step": 1275 + }, + { + "epoch": 0.24, + "learning_rate": 9.645695068757398e-06, + "loss": 1.2266, + "step": 1280 + }, + { + "epoch": 0.24, + "learning_rate": 9.64295514236406e-06, + "loss": 1.263, + "step": 1285 + }, + { + "epoch": 0.24, + "learning_rate": 9.640205054683663e-06, + "loss": 1.2369, + "step": 1290 + }, + { + "epoch": 0.24, + "learning_rate": 9.637444811734876e-06, + "loss": 1.2629, + "step": 1295 + }, + { + "epoch": 0.24, + "learning_rate": 9.634674419558599e-06, + "loss": 1.1917, + "step": 1300 + }, + { + "epoch": 0.25, + "learning_rate": 9.63189388421794e-06, + "loss": 1.2513, + "step": 1305 + }, + { + "epoch": 0.25, + "learning_rate": 9.629103211798212e-06, + "loss": 1.2235, + "step": 1310 + }, + { + "epoch": 0.25, + "learning_rate": 9.626302408406907e-06, + "loss": 1.2851, + "step": 1315 + }, + { + "epoch": 0.25, + "learning_rate": 9.623491480173692e-06, + "loss": 1.2207, + "step": 1320 + }, + { + "epoch": 0.25, + "learning_rate": 9.620670433250394e-06, + "loss": 1.2435, + "step": 1325 + }, + { + "epoch": 0.25, + "learning_rate": 9.61783927381098e-06, + "loss": 1.272, + "step": 1330 + }, + { + "epoch": 0.25, + "learning_rate": 9.614998008051554e-06, + "loss": 1.2395, + "step": 1335 + }, + { + "epoch": 0.25, + "learning_rate": 9.612146642190338e-06, + "loss": 1.2018, + "step": 1340 + }, + { + "epoch": 0.25, + "learning_rate": 9.609285182467653e-06, + "loss": 1.2505, + "step": 1345 + }, + { + "epoch": 0.25, + "learning_rate": 9.606413635145918e-06, + "loss": 1.2256, + "step": 1350 + }, + { + "epoch": 0.26, + "learning_rate": 9.603532006509622e-06, + "loss": 1.2002, + "step": 1355 + }, + { + "epoch": 0.26, + "learning_rate": 9.600640302865325e-06, + "loss": 1.2226, + "step": 1360 + }, + { + "epoch": 0.26, + "learning_rate": 9.597738530541628e-06, + "loss": 1.2916, + "step": 1365 + }, + { + "epoch": 0.26, + "learning_rate": 9.594826695889172e-06, + "loss": 1.256, + "step": 1370 + }, + { + "epoch": 0.26, + "learning_rate": 9.591904805280623e-06, + "loss": 1.1941, + "step": 1375 + }, + { + "epoch": 0.26, + "learning_rate": 9.58897286511065e-06, + "loss": 1.2103, + "step": 1380 + }, + { + "epoch": 0.26, + "learning_rate": 9.58603088179592e-06, + "loss": 1.2854, + "step": 1385 + }, + { + "epoch": 0.26, + "learning_rate": 9.583078861775074e-06, + "loss": 1.2441, + "step": 1390 + }, + { + "epoch": 0.26, + "learning_rate": 9.580116811508725e-06, + "loss": 1.252, + "step": 1395 + }, + { + "epoch": 0.26, + "learning_rate": 9.57714473747943e-06, + "loss": 1.2738, + "step": 1400 + }, + { + "epoch": 0.26, + "learning_rate": 9.574162646191693e-06, + "loss": 1.1774, + "step": 1405 + }, + { + "epoch": 0.27, + "learning_rate": 9.571170544171933e-06, + "loss": 1.3026, + "step": 1410 + }, + { + "epoch": 0.27, + "learning_rate": 9.568168437968484e-06, + "loss": 1.2147, + "step": 1415 + }, + { + "epoch": 0.27, + "learning_rate": 9.56515633415157e-06, + "loss": 1.2113, + "step": 1420 + }, + { + "epoch": 0.27, + "learning_rate": 9.562134239313297e-06, + "loss": 1.2178, + "step": 1425 + }, + { + "epoch": 0.27, + "learning_rate": 9.559102160067635e-06, + "loss": 1.2427, + "step": 1430 + }, + { + "epoch": 0.27, + "learning_rate": 9.55606010305041e-06, + "loss": 1.2265, + "step": 1435 + }, + { + "epoch": 0.27, + "learning_rate": 9.553008074919278e-06, + "loss": 1.2229, + "step": 1440 + }, + { + "epoch": 0.27, + "learning_rate": 9.549946082353724e-06, + "loss": 1.2404, + "step": 1445 + }, + { + "epoch": 0.27, + "learning_rate": 9.546874132055036e-06, + "loss": 1.2487, + "step": 1450 + }, + { + "epoch": 0.27, + "learning_rate": 9.543792230746294e-06, + "loss": 1.1743, + "step": 1455 + }, + { + "epoch": 0.27, + "learning_rate": 9.540700385172363e-06, + "loss": 1.2155, + "step": 1460 + }, + { + "epoch": 0.28, + "learning_rate": 9.537598602099862e-06, + "loss": 1.2592, + "step": 1465 + }, + { + "epoch": 0.28, + "learning_rate": 9.534486888317165e-06, + "loss": 1.2013, + "step": 1470 + }, + { + "epoch": 0.28, + "learning_rate": 9.531365250634378e-06, + "loss": 1.2442, + "step": 1475 + }, + { + "epoch": 0.28, + "learning_rate": 9.52823369588333e-06, + "loss": 1.2093, + "step": 1480 + }, + { + "epoch": 0.28, + "learning_rate": 9.525092230917544e-06, + "loss": 1.268, + "step": 1485 + }, + { + "epoch": 0.28, + "learning_rate": 9.521940862612238e-06, + "loss": 1.2355, + "step": 1490 + }, + { + "epoch": 0.28, + "learning_rate": 9.518779597864308e-06, + "loss": 1.2574, + "step": 1495 + }, + { + "epoch": 0.28, + "learning_rate": 9.515608443592299e-06, + "loss": 1.2691, + "step": 1500 + }, + { + "epoch": 0.28, + "learning_rate": 9.512427406736408e-06, + "loss": 1.2654, + "step": 1505 + }, + { + "epoch": 0.28, + "learning_rate": 9.509236494258459e-06, + "loss": 1.2545, + "step": 1510 + }, + { + "epoch": 0.29, + "learning_rate": 9.506035713141883e-06, + "loss": 1.2126, + "step": 1515 + }, + { + "epoch": 0.29, + "learning_rate": 9.502825070391716e-06, + "loss": 1.2408, + "step": 1520 + }, + { + "epoch": 0.29, + "learning_rate": 9.499604573034573e-06, + "loss": 1.2606, + "step": 1525 + }, + { + "epoch": 0.29, + "learning_rate": 9.49637422811864e-06, + "loss": 1.2559, + "step": 1530 + }, + { + "epoch": 0.29, + "learning_rate": 9.493134042713648e-06, + "loss": 1.2271, + "step": 1535 + }, + { + "epoch": 0.29, + "learning_rate": 9.48988402391087e-06, + "loss": 1.1991, + "step": 1540 + }, + { + "epoch": 0.29, + "learning_rate": 9.486624178823096e-06, + "loss": 1.2146, + "step": 1545 + }, + { + "epoch": 0.29, + "learning_rate": 9.483354514584627e-06, + "loss": 1.2348, + "step": 1550 + }, + { + "epoch": 0.29, + "learning_rate": 9.480075038351247e-06, + "loss": 1.2658, + "step": 1555 + }, + { + "epoch": 0.29, + "learning_rate": 9.476785757300218e-06, + "loss": 1.2381, + "step": 1560 + }, + { + "epoch": 0.29, + "learning_rate": 9.473486678630258e-06, + "loss": 1.2132, + "step": 1565 + }, + { + "epoch": 0.3, + "learning_rate": 9.47017780956153e-06, + "loss": 1.2107, + "step": 1570 + }, + { + "epoch": 0.3, + "learning_rate": 9.466859157335616e-06, + "loss": 1.2225, + "step": 1575 + }, + { + "epoch": 0.3, + "learning_rate": 9.463530729215525e-06, + "loss": 1.2764, + "step": 1580 + }, + { + "epoch": 0.3, + "learning_rate": 9.460192532485642e-06, + "loss": 1.2234, + "step": 1585 + }, + { + "epoch": 0.3, + "learning_rate": 9.456844574451744e-06, + "loss": 1.2226, + "step": 1590 + }, + { + "epoch": 0.3, + "learning_rate": 9.453486862440966e-06, + "loss": 1.238, + "step": 1595 + }, + { + "epoch": 0.3, + "learning_rate": 9.450119403801792e-06, + "loss": 1.2171, + "step": 1600 + }, + { + "epoch": 0.3, + "learning_rate": 9.446742205904033e-06, + "loss": 1.2226, + "step": 1605 + }, + { + "epoch": 0.3, + "learning_rate": 9.443355276138818e-06, + "loss": 1.2035, + "step": 1610 + }, + { + "epoch": 0.3, + "learning_rate": 9.439958621918574e-06, + "loss": 1.2131, + "step": 1615 + }, + { + "epoch": 0.31, + "learning_rate": 9.436552250677013e-06, + "loss": 1.2377, + "step": 1620 + }, + { + "epoch": 0.31, + "learning_rate": 9.433136169869106e-06, + "loss": 1.2182, + "step": 1625 + }, + { + "epoch": 0.31, + "learning_rate": 9.42971038697108e-06, + "loss": 1.2777, + "step": 1630 + }, + { + "epoch": 0.31, + "learning_rate": 9.426274909480396e-06, + "loss": 1.2405, + "step": 1635 + }, + { + "epoch": 0.31, + "learning_rate": 9.422829744915724e-06, + "loss": 1.2263, + "step": 1640 + }, + { + "epoch": 0.31, + "learning_rate": 9.419374900816945e-06, + "loss": 1.2349, + "step": 1645 + }, + { + "epoch": 0.31, + "learning_rate": 9.415910384745116e-06, + "loss": 1.2043, + "step": 1650 + }, + { + "epoch": 0.31, + "learning_rate": 9.412436204282466e-06, + "loss": 1.1849, + "step": 1655 + }, + { + "epoch": 0.31, + "learning_rate": 9.408952367032374e-06, + "loss": 1.247, + "step": 1660 + }, + { + "epoch": 0.31, + "learning_rate": 9.405458880619349e-06, + "loss": 1.1786, + "step": 1665 + }, + { + "epoch": 0.31, + "learning_rate": 9.401955752689028e-06, + "loss": 1.2198, + "step": 1670 + }, + { + "epoch": 0.32, + "learning_rate": 9.398442990908134e-06, + "loss": 1.2289, + "step": 1675 + }, + { + "epoch": 0.32, + "learning_rate": 9.39492060296449e-06, + "loss": 1.2274, + "step": 1680 + }, + { + "epoch": 0.32, + "learning_rate": 9.391388596566973e-06, + "loss": 1.25, + "step": 1685 + }, + { + "epoch": 0.32, + "learning_rate": 9.387846979445516e-06, + "loss": 1.2243, + "step": 1690 + }, + { + "epoch": 0.32, + "learning_rate": 9.384295759351087e-06, + "loss": 1.1992, + "step": 1695 + }, + { + "epoch": 0.32, + "learning_rate": 9.38073494405567e-06, + "loss": 1.2448, + "step": 1700 + }, + { + "epoch": 0.32, + "learning_rate": 9.377164541352244e-06, + "loss": 1.174, + "step": 1705 + }, + { + "epoch": 0.32, + "learning_rate": 9.373584559054775e-06, + "loss": 1.2098, + "step": 1710 + }, + { + "epoch": 0.32, + "learning_rate": 9.369995004998194e-06, + "loss": 1.2776, + "step": 1715 + }, + { + "epoch": 0.32, + "learning_rate": 9.366395887038375e-06, + "loss": 1.2248, + "step": 1720 + }, + { + "epoch": 0.32, + "learning_rate": 9.362787213052131e-06, + "loss": 1.2531, + "step": 1725 + }, + { + "epoch": 0.33, + "learning_rate": 9.359168990937185e-06, + "loss": 1.1699, + "step": 1730 + }, + { + "epoch": 0.33, + "learning_rate": 9.355541228612152e-06, + "loss": 1.2779, + "step": 1735 + }, + { + "epoch": 0.33, + "learning_rate": 9.351903934016534e-06, + "loss": 1.2744, + "step": 1740 + }, + { + "epoch": 0.33, + "learning_rate": 9.348257115110689e-06, + "loss": 1.2604, + "step": 1745 + }, + { + "epoch": 0.33, + "learning_rate": 9.344600779875823e-06, + "loss": 1.2822, + "step": 1750 + }, + { + "epoch": 0.33, + "learning_rate": 9.340934936313966e-06, + "loss": 1.2412, + "step": 1755 + }, + { + "epoch": 0.33, + "learning_rate": 9.337259592447958e-06, + "loss": 1.2014, + "step": 1760 + }, + { + "epoch": 0.33, + "learning_rate": 9.333574756321433e-06, + "loss": 1.2505, + "step": 1765 + }, + { + "epoch": 0.33, + "learning_rate": 9.329880435998797e-06, + "loss": 1.2217, + "step": 1770 + }, + { + "epoch": 0.33, + "learning_rate": 9.326176639565213e-06, + "loss": 1.1982, + "step": 1775 + }, + { + "epoch": 0.34, + "learning_rate": 9.322463375126582e-06, + "loss": 1.2393, + "step": 1780 + }, + { + "epoch": 0.34, + "learning_rate": 9.318740650809527e-06, + "loss": 1.2119, + "step": 1785 + }, + { + "epoch": 0.34, + "learning_rate": 9.315008474761377e-06, + "loss": 1.2616, + "step": 1790 + }, + { + "epoch": 0.34, + "learning_rate": 9.31126685515014e-06, + "loss": 1.212, + "step": 1795 + }, + { + "epoch": 0.34, + "learning_rate": 9.3075158001645e-06, + "loss": 1.2452, + "step": 1800 + }, + { + "epoch": 0.34, + "learning_rate": 9.303755318013781e-06, + "loss": 1.2247, + "step": 1805 + }, + { + "epoch": 0.34, + "learning_rate": 9.299985416927944e-06, + "loss": 1.2688, + "step": 1810 + }, + { + "epoch": 0.34, + "learning_rate": 9.296206105157567e-06, + "loss": 1.2336, + "step": 1815 + }, + { + "epoch": 0.34, + "learning_rate": 9.292417390973818e-06, + "loss": 1.2903, + "step": 1820 + }, + { + "epoch": 0.34, + "learning_rate": 9.288619282668447e-06, + "loss": 1.1809, + "step": 1825 + }, + { + "epoch": 0.34, + "learning_rate": 9.284811788553757e-06, + "loss": 1.1478, + "step": 1830 + }, + { + "epoch": 0.35, + "learning_rate": 9.280994916962598e-06, + "loss": 1.1931, + "step": 1835 + }, + { + "epoch": 0.35, + "learning_rate": 9.277168676248343e-06, + "loss": 1.2113, + "step": 1840 + }, + { + "epoch": 0.35, + "learning_rate": 9.273333074784866e-06, + "loss": 1.2857, + "step": 1845 + }, + { + "epoch": 0.35, + "learning_rate": 9.26948812096653e-06, + "loss": 1.2701, + "step": 1850 + }, + { + "epoch": 0.35, + "learning_rate": 9.265633823208164e-06, + "loss": 1.2603, + "step": 1855 + }, + { + "epoch": 0.35, + "learning_rate": 9.261770189945051e-06, + "loss": 1.2784, + "step": 1860 + }, + { + "epoch": 0.35, + "learning_rate": 9.257897229632899e-06, + "loss": 1.1626, + "step": 1865 + }, + { + "epoch": 0.35, + "learning_rate": 9.254014950747832e-06, + "loss": 1.1917, + "step": 1870 + }, + { + "epoch": 0.35, + "learning_rate": 9.250123361786367e-06, + "loss": 1.2627, + "step": 1875 + }, + { + "epoch": 0.35, + "learning_rate": 9.2462224712654e-06, + "loss": 1.2233, + "step": 1880 + }, + { + "epoch": 0.36, + "learning_rate": 9.24231228772218e-06, + "loss": 1.2783, + "step": 1885 + }, + { + "epoch": 0.36, + "learning_rate": 9.23839281971429e-06, + "loss": 1.2814, + "step": 1890 + }, + { + "epoch": 0.36, + "learning_rate": 9.234464075819646e-06, + "loss": 1.2279, + "step": 1895 + }, + { + "epoch": 0.36, + "learning_rate": 9.23052606463645e-06, + "loss": 1.1962, + "step": 1900 + }, + { + "epoch": 0.36, + "learning_rate": 9.226578794783194e-06, + "loss": 1.2925, + "step": 1905 + }, + { + "epoch": 0.36, + "learning_rate": 9.22262227489863e-06, + "loss": 1.2339, + "step": 1910 + }, + { + "epoch": 0.36, + "learning_rate": 9.218656513641755e-06, + "loss": 1.2711, + "step": 1915 + }, + { + "epoch": 0.36, + "learning_rate": 9.214681519691793e-06, + "loss": 1.2424, + "step": 1920 + }, + { + "epoch": 0.36, + "learning_rate": 9.210697301748169e-06, + "loss": 1.2055, + "step": 1925 + }, + { + "epoch": 0.36, + "learning_rate": 9.2067038685305e-06, + "loss": 1.2522, + "step": 1930 + }, + { + "epoch": 0.36, + "learning_rate": 9.20270122877857e-06, + "loss": 1.2236, + "step": 1935 + }, + { + "epoch": 0.37, + "learning_rate": 9.19868939125231e-06, + "loss": 1.2118, + "step": 1940 + }, + { + "epoch": 0.37, + "learning_rate": 9.19466836473178e-06, + "loss": 1.2734, + "step": 1945 + }, + { + "epoch": 0.37, + "learning_rate": 9.190638158017156e-06, + "loss": 1.1754, + "step": 1950 + }, + { + "epoch": 0.37, + "learning_rate": 9.186598779928699e-06, + "loss": 1.2419, + "step": 1955 + }, + { + "epoch": 0.37, + "learning_rate": 9.182550239306744e-06, + "loss": 1.1804, + "step": 1960 + }, + { + "epoch": 0.37, + "learning_rate": 9.17849254501168e-06, + "loss": 1.2179, + "step": 1965 + }, + { + "epoch": 0.37, + "learning_rate": 9.174425705923926e-06, + "loss": 1.2009, + "step": 1970 + }, + { + "epoch": 0.37, + "learning_rate": 9.170349730943919e-06, + "loss": 1.2527, + "step": 1975 + }, + { + "epoch": 0.37, + "learning_rate": 9.16626462899209e-06, + "loss": 1.2335, + "step": 1980 + }, + { + "epoch": 0.37, + "learning_rate": 9.162170409008837e-06, + "loss": 1.2779, + "step": 1985 + }, + { + "epoch": 0.37, + "learning_rate": 9.158067079954524e-06, + "loss": 1.2157, + "step": 1990 + }, + { + "epoch": 0.38, + "learning_rate": 9.153954650809442e-06, + "loss": 1.1972, + "step": 1995 + }, + { + "epoch": 0.38, + "learning_rate": 9.149833130573804e-06, + "loss": 1.2105, + "step": 2000 + }, + { + "epoch": 0.38, + "learning_rate": 9.145702528267717e-06, + "loss": 1.2661, + "step": 2005 + }, + { + "epoch": 0.38, + "learning_rate": 9.141562852931161e-06, + "loss": 1.2491, + "step": 2010 + }, + { + "epoch": 0.38, + "learning_rate": 9.137414113623978e-06, + "loss": 1.2591, + "step": 2015 + }, + { + "epoch": 0.38, + "learning_rate": 9.133256319425842e-06, + "loss": 1.2602, + "step": 2020 + }, + { + "epoch": 0.38, + "learning_rate": 9.129089479436249e-06, + "loss": 1.205, + "step": 2025 + }, + { + "epoch": 0.38, + "learning_rate": 9.12491360277449e-06, + "loss": 1.2094, + "step": 2030 + }, + { + "epoch": 0.38, + "learning_rate": 9.120728698579631e-06, + "loss": 1.2246, + "step": 2035 + }, + { + "epoch": 0.38, + "learning_rate": 9.116534776010498e-06, + "loss": 1.1941, + "step": 2040 + }, + { + "epoch": 0.39, + "learning_rate": 9.112331844245652e-06, + "loss": 1.2306, + "step": 2045 + }, + { + "epoch": 0.39, + "learning_rate": 9.108119912483373e-06, + "loss": 1.1952, + "step": 2050 + }, + { + "epoch": 0.39, + "learning_rate": 9.103898989941638e-06, + "loss": 1.2694, + "step": 2055 + }, + { + "epoch": 0.39, + "learning_rate": 9.099669085858094e-06, + "loss": 1.2486, + "step": 2060 + }, + { + "epoch": 0.39, + "learning_rate": 9.095430209490055e-06, + "loss": 1.2793, + "step": 2065 + }, + { + "epoch": 0.39, + "learning_rate": 9.091182370114464e-06, + "loss": 1.2487, + "step": 2070 + }, + { + "epoch": 0.39, + "learning_rate": 9.086925577027883e-06, + "loss": 1.2174, + "step": 2075 + }, + { + "epoch": 0.39, + "learning_rate": 9.082659839546467e-06, + "loss": 1.2356, + "step": 2080 + }, + { + "epoch": 0.39, + "learning_rate": 9.078385167005947e-06, + "loss": 1.2258, + "step": 2085 + }, + { + "epoch": 0.39, + "learning_rate": 9.074101568761614e-06, + "loss": 1.1835, + "step": 2090 + }, + { + "epoch": 0.39, + "learning_rate": 9.069809054188281e-06, + "loss": 1.2433, + "step": 2095 + }, + { + "epoch": 0.4, + "learning_rate": 9.065507632680288e-06, + "loss": 1.1928, + "step": 2100 + }, + { + "epoch": 0.4, + "learning_rate": 9.061197313651461e-06, + "loss": 1.2483, + "step": 2105 + }, + { + "epoch": 0.4, + "learning_rate": 9.056878106535103e-06, + "loss": 1.2212, + "step": 2110 + }, + { + "epoch": 0.4, + "learning_rate": 9.05255002078396e-06, + "loss": 1.2793, + "step": 2115 + }, + { + "epoch": 0.4, + "learning_rate": 9.048213065870218e-06, + "loss": 1.2435, + "step": 2120 + }, + { + "epoch": 0.4, + "learning_rate": 9.043867251285471e-06, + "loss": 1.2285, + "step": 2125 + }, + { + "epoch": 0.4, + "learning_rate": 9.039512586540704e-06, + "loss": 1.2551, + "step": 2130 + }, + { + "epoch": 0.4, + "learning_rate": 9.035149081166266e-06, + "loss": 1.2745, + "step": 2135 + }, + { + "epoch": 0.4, + "learning_rate": 9.030776744711863e-06, + "loss": 1.2519, + "step": 2140 + }, + { + "epoch": 0.4, + "learning_rate": 9.026395586746514e-06, + "loss": 1.2055, + "step": 2145 + }, + { + "epoch": 0.4, + "learning_rate": 9.02200561685856e-06, + "loss": 1.2822, + "step": 2150 + }, + { + "epoch": 0.41, + "learning_rate": 9.017606844655617e-06, + "loss": 1.2673, + "step": 2155 + }, + { + "epoch": 0.41, + "learning_rate": 9.013199279764568e-06, + "loss": 1.2026, + "step": 2160 + }, + { + "epoch": 0.41, + "learning_rate": 9.008782931831542e-06, + "loss": 1.2051, + "step": 2165 + }, + { + "epoch": 0.41, + "learning_rate": 9.004357810521884e-06, + "loss": 1.2396, + "step": 2170 + }, + { + "epoch": 0.41, + "learning_rate": 8.999923925520146e-06, + "loss": 1.1841, + "step": 2175 + }, + { + "epoch": 0.41, + "learning_rate": 8.995481286530057e-06, + "loss": 1.2257, + "step": 2180 + }, + { + "epoch": 0.41, + "learning_rate": 8.991029903274502e-06, + "loss": 1.225, + "step": 2185 + }, + { + "epoch": 0.41, + "learning_rate": 8.986569785495507e-06, + "loss": 1.2269, + "step": 2190 + }, + { + "epoch": 0.41, + "learning_rate": 8.982100942954212e-06, + "loss": 1.2354, + "step": 2195 + }, + { + "epoch": 0.41, + "learning_rate": 8.977623385430853e-06, + "loss": 1.2075, + "step": 2200 + }, + { + "epoch": 0.42, + "learning_rate": 8.973137122724737e-06, + "loss": 1.2428, + "step": 2205 + }, + { + "epoch": 0.42, + "learning_rate": 8.968642164654224e-06, + "loss": 1.2487, + "step": 2210 + }, + { + "epoch": 0.42, + "learning_rate": 8.964138521056702e-06, + "loss": 1.2172, + "step": 2215 + }, + { + "epoch": 0.42, + "learning_rate": 8.959626201788572e-06, + "loss": 1.2484, + "step": 2220 + }, + { + "epoch": 0.42, + "learning_rate": 8.955105216725218e-06, + "loss": 1.2855, + "step": 2225 + }, + { + "epoch": 0.42, + "learning_rate": 8.950575575760992e-06, + "loss": 1.1771, + "step": 2230 + }, + { + "epoch": 0.42, + "learning_rate": 8.946037288809187e-06, + "loss": 1.2179, + "step": 2235 + }, + { + "epoch": 0.42, + "learning_rate": 8.941490365802019e-06, + "loss": 1.1735, + "step": 2240 + }, + { + "epoch": 0.42, + "learning_rate": 8.936934816690608e-06, + "loss": 1.2504, + "step": 2245 + }, + { + "epoch": 0.42, + "learning_rate": 8.932370651444948e-06, + "loss": 1.2025, + "step": 2250 + }, + { + "epoch": 0.42, + "learning_rate": 8.927797880053891e-06, + "loss": 1.2622, + "step": 2255 + }, + { + "epoch": 0.43, + "learning_rate": 8.923216512525125e-06, + "loss": 1.1986, + "step": 2260 + }, + { + "epoch": 0.43, + "learning_rate": 8.918626558885148e-06, + "loss": 1.2653, + "step": 2265 + }, + { + "epoch": 0.43, + "learning_rate": 8.914028029179256e-06, + "loss": 1.2053, + "step": 2270 + }, + { + "epoch": 0.43, + "learning_rate": 8.909420933471504e-06, + "loss": 1.2075, + "step": 2275 + }, + { + "epoch": 0.43, + "learning_rate": 8.904805281844701e-06, + "loss": 1.1797, + "step": 2280 + }, + { + "epoch": 0.43, + "learning_rate": 8.900181084400382e-06, + "loss": 1.2713, + "step": 2285 + }, + { + "epoch": 0.43, + "learning_rate": 8.895548351258777e-06, + "loss": 1.1784, + "step": 2290 + }, + { + "epoch": 0.43, + "learning_rate": 8.890907092558808e-06, + "loss": 1.2071, + "step": 2295 + }, + { + "epoch": 0.43, + "learning_rate": 8.886257318458043e-06, + "loss": 1.2129, + "step": 2300 + }, + { + "epoch": 0.43, + "learning_rate": 8.881599039132696e-06, + "loss": 1.2184, + "step": 2305 + }, + { + "epoch": 0.44, + "learning_rate": 8.876932264777592e-06, + "loss": 1.2125, + "step": 2310 + }, + { + "epoch": 0.44, + "learning_rate": 8.872257005606146e-06, + "loss": 1.1682, + "step": 2315 + }, + { + "epoch": 0.44, + "learning_rate": 8.867573271850345e-06, + "loss": 1.2314, + "step": 2320 + }, + { + "epoch": 0.44, + "learning_rate": 8.862881073760721e-06, + "loss": 1.2905, + "step": 2325 + }, + { + "epoch": 0.44, + "learning_rate": 8.858180421606331e-06, + "loss": 1.2555, + "step": 2330 + }, + { + "epoch": 0.44, + "learning_rate": 8.853471325674734e-06, + "loss": 1.2296, + "step": 2335 + }, + { + "epoch": 0.44, + "learning_rate": 8.848753796271969e-06, + "loss": 1.2571, + "step": 2340 + }, + { + "epoch": 0.44, + "learning_rate": 8.844027843722532e-06, + "loss": 1.259, + "step": 2345 + }, + { + "epoch": 0.44, + "learning_rate": 8.839293478369353e-06, + "loss": 1.2189, + "step": 2350 + }, + { + "epoch": 0.44, + "learning_rate": 8.834550710573773e-06, + "loss": 1.26, + "step": 2355 + }, + { + "epoch": 0.44, + "learning_rate": 8.829799550715523e-06, + "loss": 1.2393, + "step": 2360 + }, + { + "epoch": 0.45, + "learning_rate": 8.825040009192702e-06, + "loss": 1.2267, + "step": 2365 + }, + { + "epoch": 0.45, + "learning_rate": 8.820272096421751e-06, + "loss": 1.2461, + "step": 2370 + }, + { + "epoch": 0.45, + "learning_rate": 8.815495822837428e-06, + "loss": 1.1922, + "step": 2375 + }, + { + "epoch": 0.45, + "learning_rate": 8.810711198892796e-06, + "loss": 1.2045, + "step": 2380 + }, + { + "epoch": 0.45, + "learning_rate": 8.805918235059187e-06, + "loss": 1.2488, + "step": 2385 + }, + { + "epoch": 0.45, + "learning_rate": 8.801116941826193e-06, + "loss": 1.2355, + "step": 2390 + }, + { + "epoch": 0.45, + "learning_rate": 8.796307329701625e-06, + "loss": 1.1635, + "step": 2395 + }, + { + "epoch": 0.45, + "learning_rate": 8.791489409211507e-06, + "loss": 1.2351, + "step": 2400 + }, + { + "epoch": 0.45, + "learning_rate": 8.786663190900044e-06, + "loss": 1.2343, + "step": 2405 + }, + { + "epoch": 0.45, + "learning_rate": 8.781828685329604e-06, + "loss": 1.1944, + "step": 2410 + }, + { + "epoch": 0.45, + "learning_rate": 8.776985903080687e-06, + "loss": 1.2083, + "step": 2415 + }, + { + "epoch": 0.46, + "learning_rate": 8.772134854751911e-06, + "loss": 1.1828, + "step": 2420 + }, + { + "epoch": 0.46, + "learning_rate": 8.76727555095998e-06, + "loss": 1.188, + "step": 2425 + }, + { + "epoch": 0.46, + "learning_rate": 8.762408002339672e-06, + "loss": 1.2454, + "step": 2430 + }, + { + "epoch": 0.46, + "learning_rate": 8.757532219543802e-06, + "loss": 1.2282, + "step": 2435 + }, + { + "epoch": 0.46, + "learning_rate": 8.752648213243215e-06, + "loss": 1.2582, + "step": 2440 + }, + { + "epoch": 0.46, + "learning_rate": 8.747755994126744e-06, + "loss": 1.1919, + "step": 2445 + }, + { + "epoch": 0.46, + "learning_rate": 8.7428555729012e-06, + "loss": 1.1555, + "step": 2450 + }, + { + "epoch": 0.46, + "learning_rate": 8.737946960291345e-06, + "loss": 1.2121, + "step": 2455 + }, + { + "epoch": 0.46, + "learning_rate": 8.733030167039867e-06, + "loss": 1.1924, + "step": 2460 + }, + { + "epoch": 0.46, + "learning_rate": 8.72810520390736e-06, + "loss": 1.2168, + "step": 2465 + }, + { + "epoch": 0.47, + "learning_rate": 8.723172081672296e-06, + "loss": 1.2029, + "step": 2470 + }, + { + "epoch": 0.47, + "learning_rate": 8.718230811131001e-06, + "loss": 1.2274, + "step": 2475 + }, + { + "epoch": 0.47, + "learning_rate": 8.713281403097641e-06, + "loss": 1.2323, + "step": 2480 + }, + { + "epoch": 0.47, + "learning_rate": 8.708323868404185e-06, + "loss": 1.2312, + "step": 2485 + }, + { + "epoch": 0.47, + "learning_rate": 8.703358217900393e-06, + "loss": 1.2186, + "step": 2490 + }, + { + "epoch": 0.47, + "learning_rate": 8.698384462453778e-06, + "loss": 1.2791, + "step": 2495 + }, + { + "epoch": 0.47, + "learning_rate": 8.693402612949598e-06, + "loss": 1.2184, + "step": 2500 + }, + { + "epoch": 0.47, + "learning_rate": 8.688412680290824e-06, + "loss": 1.1967, + "step": 2505 + }, + { + "epoch": 0.47, + "learning_rate": 8.683414675398118e-06, + "loss": 1.2046, + "step": 2510 + }, + { + "epoch": 0.47, + "learning_rate": 8.678408609209808e-06, + "loss": 1.2623, + "step": 2515 + }, + { + "epoch": 0.47, + "learning_rate": 8.67339449268186e-06, + "loss": 1.282, + "step": 2520 + }, + { + "epoch": 0.48, + "learning_rate": 8.668372336787862e-06, + "loss": 1.2712, + "step": 2525 + }, + { + "epoch": 0.48, + "learning_rate": 8.663342152519001e-06, + "loss": 1.2462, + "step": 2530 + }, + { + "epoch": 0.48, + "learning_rate": 8.658303950884028e-06, + "loss": 1.2301, + "step": 2535 + }, + { + "epoch": 0.48, + "learning_rate": 8.65325774290924e-06, + "loss": 1.2617, + "step": 2540 + }, + { + "epoch": 0.48, + "learning_rate": 8.648203539638463e-06, + "loss": 1.226, + "step": 2545 + }, + { + "epoch": 0.48, + "learning_rate": 8.643141352133012e-06, + "loss": 1.266, + "step": 2550 + }, + { + "epoch": 0.48, + "learning_rate": 8.638071191471683e-06, + "loss": 1.1974, + "step": 2555 + }, + { + "epoch": 0.48, + "learning_rate": 8.632993068750716e-06, + "loss": 1.2307, + "step": 2560 + }, + { + "epoch": 0.48, + "learning_rate": 8.627906995083783e-06, + "loss": 1.2828, + "step": 2565 + }, + { + "epoch": 0.48, + "learning_rate": 8.622812981601952e-06, + "loss": 1.2055, + "step": 2570 + }, + { + "epoch": 0.49, + "learning_rate": 8.617711039453667e-06, + "loss": 1.1971, + "step": 2575 + }, + { + "epoch": 0.49, + "learning_rate": 8.612601179804726e-06, + "loss": 1.2713, + "step": 2580 + }, + { + "epoch": 0.49, + "learning_rate": 8.607483413838257e-06, + "loss": 1.2934, + "step": 2585 + }, + { + "epoch": 0.49, + "learning_rate": 8.602357752754687e-06, + "loss": 1.2552, + "step": 2590 + }, + { + "epoch": 0.49, + "learning_rate": 8.597224207771724e-06, + "loss": 1.1734, + "step": 2595 + }, + { + "epoch": 0.49, + "learning_rate": 8.59208279012433e-06, + "loss": 1.222, + "step": 2600 + }, + { + "epoch": 0.49, + "learning_rate": 8.586933511064697e-06, + "loss": 1.2552, + "step": 2605 + }, + { + "epoch": 0.49, + "learning_rate": 8.581776381862221e-06, + "loss": 1.2447, + "step": 2610 + }, + { + "epoch": 0.49, + "learning_rate": 8.576611413803481e-06, + "loss": 1.2373, + "step": 2615 + }, + { + "epoch": 0.49, + "learning_rate": 8.571438618192205e-06, + "loss": 1.2734, + "step": 2620 + }, + { + "epoch": 0.49, + "learning_rate": 8.56625800634926e-06, + "loss": 1.2762, + "step": 2625 + }, + { + "epoch": 0.5, + "learning_rate": 8.561069589612616e-06, + "loss": 1.2284, + "step": 2630 + }, + { + "epoch": 0.5, + "learning_rate": 8.55587337933732e-06, + "loss": 1.2375, + "step": 2635 + }, + { + "epoch": 0.5, + "learning_rate": 8.550669386895485e-06, + "loss": 1.2465, + "step": 2640 + }, + { + "epoch": 0.5, + "learning_rate": 8.545457623676245e-06, + "loss": 1.2921, + "step": 2645 + }, + { + "epoch": 0.5, + "learning_rate": 8.540238101085748e-06, + "loss": 1.274, + "step": 2650 + }, + { + "epoch": 0.5, + "learning_rate": 8.535010830547119e-06, + "loss": 1.1872, + "step": 2655 + }, + { + "epoch": 0.5, + "learning_rate": 8.529775823500445e-06, + "loss": 1.2393, + "step": 2660 + }, + { + "epoch": 0.5, + "learning_rate": 8.52453309140274e-06, + "loss": 1.2729, + "step": 2665 + }, + { + "epoch": 0.5, + "learning_rate": 8.519282645727926e-06, + "loss": 1.2459, + "step": 2670 + }, + { + "epoch": 0.5, + "learning_rate": 8.514024497966806e-06, + "loss": 1.2547, + "step": 2675 + }, + { + "epoch": 0.5, + "learning_rate": 8.508758659627044e-06, + "loss": 1.2458, + "step": 2680 + }, + { + "epoch": 0.51, + "learning_rate": 8.503485142233123e-06, + "loss": 1.1926, + "step": 2685 + }, + { + "epoch": 0.51, + "learning_rate": 8.498203957326348e-06, + "loss": 1.2358, + "step": 2690 + }, + { + "epoch": 0.51, + "learning_rate": 8.492915116464793e-06, + "loss": 1.2535, + "step": 2695 + }, + { + "epoch": 0.51, + "learning_rate": 8.487618631223292e-06, + "loss": 1.2301, + "step": 2700 + }, + { + "epoch": 0.51, + "learning_rate": 8.482314513193405e-06, + "loss": 1.2693, + "step": 2705 + }, + { + "epoch": 0.51, + "learning_rate": 8.477002773983402e-06, + "loss": 1.2129, + "step": 2710 + }, + { + "epoch": 0.51, + "learning_rate": 8.471683425218233e-06, + "loss": 1.1915, + "step": 2715 + }, + { + "epoch": 0.51, + "learning_rate": 8.466356478539492e-06, + "loss": 1.2216, + "step": 2720 + }, + { + "epoch": 0.51, + "learning_rate": 8.461021945605414e-06, + "loss": 1.2107, + "step": 2725 + }, + { + "epoch": 0.51, + "learning_rate": 8.45567983809083e-06, + "loss": 1.2027, + "step": 2730 + }, + { + "epoch": 0.52, + "learning_rate": 8.450330167687145e-06, + "loss": 1.24, + "step": 2735 + }, + { + "epoch": 0.52, + "learning_rate": 8.444972946102322e-06, + "loss": 1.2375, + "step": 2740 + }, + { + "epoch": 0.52, + "learning_rate": 8.439608185060847e-06, + "loss": 1.2444, + "step": 2745 + }, + { + "epoch": 0.52, + "learning_rate": 8.43423589630371e-06, + "loss": 1.223, + "step": 2750 + }, + { + "epoch": 0.52, + "learning_rate": 8.428856091588371e-06, + "loss": 1.2897, + "step": 2755 + }, + { + "epoch": 0.52, + "learning_rate": 8.423468782688742e-06, + "loss": 1.2659, + "step": 2760 + }, + { + "epoch": 0.52, + "learning_rate": 8.418073981395154e-06, + "loss": 1.2025, + "step": 2765 + }, + { + "epoch": 0.52, + "learning_rate": 8.412671699514344e-06, + "loss": 1.2103, + "step": 2770 + }, + { + "epoch": 0.52, + "learning_rate": 8.407261948869408e-06, + "loss": 1.2428, + "step": 2775 + }, + { + "epoch": 0.52, + "learning_rate": 8.4018447412998e-06, + "loss": 1.2459, + "step": 2780 + }, + { + "epoch": 0.52, + "learning_rate": 8.396420088661288e-06, + "loss": 1.2237, + "step": 2785 + }, + { + "epoch": 0.53, + "learning_rate": 8.390988002825931e-06, + "loss": 1.2496, + "step": 2790 + }, + { + "epoch": 0.53, + "learning_rate": 8.385548495682064e-06, + "loss": 1.1963, + "step": 2795 + }, + { + "epoch": 0.53, + "learning_rate": 8.380101579134253e-06, + "loss": 1.2577, + "step": 2800 + }, + { + "epoch": 0.53, + "learning_rate": 8.37464726510329e-06, + "loss": 1.1994, + "step": 2805 + }, + { + "epoch": 0.53, + "learning_rate": 8.369185565526152e-06, + "loss": 1.233, + "step": 2810 + }, + { + "epoch": 0.53, + "learning_rate": 8.36371649235598e-06, + "loss": 1.2643, + "step": 2815 + }, + { + "epoch": 0.53, + "learning_rate": 8.35824005756205e-06, + "loss": 1.2162, + "step": 2820 + }, + { + "epoch": 0.53, + "learning_rate": 8.352756273129754e-06, + "loss": 1.221, + "step": 2825 + }, + { + "epoch": 0.53, + "learning_rate": 8.347265151060565e-06, + "loss": 1.2717, + "step": 2830 + }, + { + "epoch": 0.53, + "learning_rate": 8.341766703372018e-06, + "loss": 1.1876, + "step": 2835 + }, + { + "epoch": 0.53, + "learning_rate": 8.336260942097677e-06, + "loss": 1.2074, + "step": 2840 + }, + { + "epoch": 0.54, + "learning_rate": 8.330747879287113e-06, + "loss": 1.241, + "step": 2845 + }, + { + "epoch": 0.54, + "learning_rate": 8.325227527005882e-06, + "loss": 1.1824, + "step": 2850 + }, + { + "epoch": 0.54, + "learning_rate": 8.319699897335481e-06, + "loss": 1.1995, + "step": 2855 + }, + { + "epoch": 0.54, + "learning_rate": 8.314165002373349e-06, + "loss": 1.2089, + "step": 2860 + }, + { + "epoch": 0.54, + "learning_rate": 8.308622854232813e-06, + "loss": 1.2289, + "step": 2865 + }, + { + "epoch": 0.54, + "learning_rate": 8.303073465043082e-06, + "loss": 1.2583, + "step": 2870 + }, + { + "epoch": 0.54, + "learning_rate": 8.297516846949205e-06, + "loss": 1.2434, + "step": 2875 + }, + { + "epoch": 0.54, + "learning_rate": 8.291953012112059e-06, + "loss": 1.237, + "step": 2880 + }, + { + "epoch": 0.54, + "learning_rate": 8.286381972708312e-06, + "loss": 1.1752, + "step": 2885 + }, + { + "epoch": 0.54, + "learning_rate": 8.280803740930397e-06, + "loss": 1.2871, + "step": 2890 + }, + { + "epoch": 0.55, + "learning_rate": 8.275218328986495e-06, + "loss": 1.2307, + "step": 2895 + }, + { + "epoch": 0.55, + "learning_rate": 8.26962574910049e-06, + "loss": 1.2672, + "step": 2900 + }, + { + "epoch": 0.55, + "learning_rate": 8.264026013511963e-06, + "loss": 1.258, + "step": 2905 + }, + { + "epoch": 0.55, + "learning_rate": 8.258419134476153e-06, + "loss": 1.1937, + "step": 2910 + }, + { + "epoch": 0.55, + "learning_rate": 8.25280512426393e-06, + "loss": 1.2247, + "step": 2915 + }, + { + "epoch": 0.55, + "learning_rate": 8.247183995161774e-06, + "loss": 1.2339, + "step": 2920 + }, + { + "epoch": 0.55, + "learning_rate": 8.24155575947174e-06, + "loss": 1.1705, + "step": 2925 + }, + { + "epoch": 0.55, + "learning_rate": 8.235920429511446e-06, + "loss": 1.2424, + "step": 2930 + }, + { + "epoch": 0.55, + "learning_rate": 8.230278017614024e-06, + "loss": 1.2242, + "step": 2935 + }, + { + "epoch": 0.55, + "learning_rate": 8.224628536128114e-06, + "loss": 1.1931, + "step": 2940 + }, + { + "epoch": 0.55, + "learning_rate": 8.218971997417824e-06, + "loss": 1.2189, + "step": 2945 + }, + { + "epoch": 0.56, + "learning_rate": 8.213308413862707e-06, + "loss": 1.2524, + "step": 2950 + }, + { + "epoch": 0.56, + "learning_rate": 8.207637797857737e-06, + "loss": 1.2658, + "step": 2955 + }, + { + "epoch": 0.56, + "learning_rate": 8.201960161813276e-06, + "loss": 1.2396, + "step": 2960 + }, + { + "epoch": 0.56, + "learning_rate": 8.196275518155048e-06, + "loss": 1.2209, + "step": 2965 + }, + { + "epoch": 0.56, + "learning_rate": 8.19058387932412e-06, + "loss": 1.1996, + "step": 2970 + }, + { + "epoch": 0.56, + "learning_rate": 8.18488525777686e-06, + "loss": 1.2233, + "step": 2975 + }, + { + "epoch": 0.56, + "learning_rate": 8.179179665984922e-06, + "loss": 1.1898, + "step": 2980 + }, + { + "epoch": 0.56, + "learning_rate": 8.173467116435218e-06, + "loss": 1.2393, + "step": 2985 + }, + { + "epoch": 0.56, + "learning_rate": 8.16774762162988e-06, + "loss": 1.2518, + "step": 2990 + }, + { + "epoch": 0.56, + "learning_rate": 8.162021194086246e-06, + "loss": 1.2343, + "step": 2995 + }, + { + "epoch": 0.57, + "learning_rate": 8.156287846336823e-06, + "loss": 1.2181, + "step": 3000 + }, + { + "epoch": 0.57, + "learning_rate": 8.150547590929267e-06, + "loss": 1.2471, + "step": 3005 + }, + { + "epoch": 0.57, + "learning_rate": 8.144800440426345e-06, + "loss": 1.2433, + "step": 3010 + }, + { + "epoch": 0.57, + "learning_rate": 8.139046407405921e-06, + "loss": 1.2464, + "step": 3015 + }, + { + "epoch": 0.57, + "learning_rate": 8.13328550446092e-06, + "loss": 1.1921, + "step": 3020 + }, + { + "epoch": 0.57, + "learning_rate": 8.1275177441993e-06, + "loss": 1.1828, + "step": 3025 + }, + { + "epoch": 0.57, + "learning_rate": 8.121743139244027e-06, + "loss": 1.2437, + "step": 3030 + }, + { + "epoch": 0.57, + "learning_rate": 8.115961702233048e-06, + "loss": 1.2355, + "step": 3035 + }, + { + "epoch": 0.57, + "learning_rate": 8.110173445819263e-06, + "loss": 1.2269, + "step": 3040 + }, + { + "epoch": 0.57, + "learning_rate": 8.104378382670491e-06, + "loss": 1.1938, + "step": 3045 + }, + { + "epoch": 0.57, + "learning_rate": 8.098576525469457e-06, + "loss": 1.2, + "step": 3050 + }, + { + "epoch": 0.58, + "learning_rate": 8.092767886913748e-06, + "loss": 1.2103, + "step": 3055 + }, + { + "epoch": 0.58, + "learning_rate": 8.086952479715793e-06, + "loss": 1.2651, + "step": 3060 + }, + { + "epoch": 0.58, + "learning_rate": 8.081130316602838e-06, + "loss": 1.1872, + "step": 3065 + }, + { + "epoch": 0.58, + "learning_rate": 8.075301410316912e-06, + "loss": 1.255, + "step": 3070 + }, + { + "epoch": 0.58, + "learning_rate": 8.0694657736148e-06, + "loss": 1.2627, + "step": 3075 + }, + { + "epoch": 0.58, + "learning_rate": 8.06362341926802e-06, + "loss": 1.1728, + "step": 3080 + }, + { + "epoch": 0.58, + "learning_rate": 8.057774360062793e-06, + "loss": 1.2404, + "step": 3085 + }, + { + "epoch": 0.58, + "learning_rate": 8.051918608800009e-06, + "loss": 1.2136, + "step": 3090 + }, + { + "epoch": 0.58, + "learning_rate": 8.046056178295206e-06, + "loss": 1.2057, + "step": 3095 + }, + { + "epoch": 0.58, + "learning_rate": 8.040187081378542e-06, + "loss": 1.2423, + "step": 3100 + }, + { + "epoch": 0.58, + "learning_rate": 8.034311330894761e-06, + "loss": 1.1803, + "step": 3105 + }, + { + "epoch": 0.59, + "learning_rate": 8.02842893970317e-06, + "loss": 1.2006, + "step": 3110 + }, + { + "epoch": 0.59, + "learning_rate": 8.022539920677613e-06, + "loss": 1.1922, + "step": 3115 + }, + { + "epoch": 0.59, + "learning_rate": 8.016644286706433e-06, + "loss": 1.2263, + "step": 3120 + }, + { + "epoch": 0.59, + "learning_rate": 8.010742050692455e-06, + "loss": 1.2477, + "step": 3125 + }, + { + "epoch": 0.59, + "learning_rate": 8.00483322555295e-06, + "loss": 1.226, + "step": 3130 + }, + { + "epoch": 0.59, + "learning_rate": 7.99891782421961e-06, + "loss": 1.265, + "step": 3135 + }, + { + "epoch": 0.59, + "learning_rate": 7.99299585963852e-06, + "loss": 1.227, + "step": 3140 + }, + { + "epoch": 0.59, + "learning_rate": 7.98706734477013e-06, + "loss": 1.2216, + "step": 3145 + }, + { + "epoch": 0.59, + "learning_rate": 7.981132292589224e-06, + "loss": 1.2616, + "step": 3150 + }, + { + "epoch": 0.59, + "learning_rate": 7.975190716084893e-06, + "loss": 1.1903, + "step": 3155 + }, + { + "epoch": 0.6, + "learning_rate": 7.969242628260507e-06, + "loss": 1.2308, + "step": 3160 + }, + { + "epoch": 0.6, + "learning_rate": 7.963288042133686e-06, + "loss": 1.3016, + "step": 3165 + }, + { + "epoch": 0.6, + "learning_rate": 7.957326970736275e-06, + "loss": 1.1936, + "step": 3170 + }, + { + "epoch": 0.6, + "learning_rate": 7.951359427114306e-06, + "loss": 1.2297, + "step": 3175 + }, + { + "epoch": 0.6, + "learning_rate": 7.945385424327981e-06, + "loss": 1.2291, + "step": 3180 + }, + { + "epoch": 0.6, + "learning_rate": 7.939404975451636e-06, + "loss": 1.2212, + "step": 3185 + }, + { + "epoch": 0.6, + "learning_rate": 7.933418093573714e-06, + "loss": 1.2414, + "step": 3190 + }, + { + "epoch": 0.6, + "learning_rate": 7.927424791796738e-06, + "loss": 1.2382, + "step": 3195 + }, + { + "epoch": 0.6, + "learning_rate": 7.921425083237284e-06, + "loss": 1.2401, + "step": 3200 + }, + { + "epoch": 0.6, + "learning_rate": 7.915418981025943e-06, + "loss": 1.186, + "step": 3205 + }, + { + "epoch": 0.6, + "learning_rate": 7.909406498307303e-06, + "loss": 1.1709, + "step": 3210 + }, + { + "epoch": 0.61, + "learning_rate": 7.903387648239916e-06, + "loss": 1.2169, + "step": 3215 + }, + { + "epoch": 0.61, + "learning_rate": 7.897362443996268e-06, + "loss": 1.2647, + "step": 3220 + }, + { + "epoch": 0.61, + "learning_rate": 7.89133089876275e-06, + "loss": 1.2416, + "step": 3225 + }, + { + "epoch": 0.61, + "learning_rate": 7.885293025739634e-06, + "loss": 1.1713, + "step": 3230 + }, + { + "epoch": 0.61, + "learning_rate": 7.87924883814104e-06, + "loss": 1.2494, + "step": 3235 + }, + { + "epoch": 0.61, + "learning_rate": 7.873198349194903e-06, + "loss": 1.1865, + "step": 3240 + }, + { + "epoch": 0.61, + "learning_rate": 7.867141572142958e-06, + "loss": 1.2434, + "step": 3245 + }, + { + "epoch": 0.61, + "learning_rate": 7.861078520240689e-06, + "loss": 1.2221, + "step": 3250 + }, + { + "epoch": 0.61, + "learning_rate": 7.855009206757325e-06, + "loss": 1.2389, + "step": 3255 + }, + { + "epoch": 0.61, + "learning_rate": 7.84893364497579e-06, + "loss": 1.2234, + "step": 3260 + }, + { + "epoch": 0.61, + "learning_rate": 7.842851848192688e-06, + "loss": 1.2358, + "step": 3265 + }, + { + "epoch": 0.62, + "learning_rate": 7.836763829718269e-06, + "loss": 1.2015, + "step": 3270 + }, + { + "epoch": 0.62, + "learning_rate": 7.830669602876393e-06, + "loss": 1.2222, + "step": 3275 + }, + { + "epoch": 0.62, + "learning_rate": 7.824569181004513e-06, + "loss": 1.2535, + "step": 3280 + }, + { + "epoch": 0.62, + "learning_rate": 7.818462577453639e-06, + "loss": 1.2217, + "step": 3285 + }, + { + "epoch": 0.62, + "learning_rate": 7.812349805588308e-06, + "loss": 1.2082, + "step": 3290 + }, + { + "epoch": 0.62, + "learning_rate": 7.806230878786561e-06, + "loss": 1.2427, + "step": 3295 + }, + { + "epoch": 0.62, + "learning_rate": 7.8001058104399e-06, + "loss": 1.2168, + "step": 3300 + }, + { + "epoch": 0.62, + "learning_rate": 7.793974613953281e-06, + "loss": 1.1697, + "step": 3305 + }, + { + "epoch": 0.62, + "learning_rate": 7.78783730274506e-06, + "loss": 1.2049, + "step": 3310 + }, + { + "epoch": 0.62, + "learning_rate": 7.781693890246982e-06, + "loss": 1.2151, + "step": 3315 + }, + { + "epoch": 0.63, + "learning_rate": 7.775544389904142e-06, + "loss": 1.2278, + "step": 3320 + }, + { + "epoch": 0.63, + "learning_rate": 7.769388815174963e-06, + "loss": 1.1966, + "step": 3325 + }, + { + "epoch": 0.63, + "learning_rate": 7.763227179531155e-06, + "loss": 1.2076, + "step": 3330 + }, + { + "epoch": 0.63, + "learning_rate": 7.757059496457698e-06, + "loss": 1.2531, + "step": 3335 + }, + { + "epoch": 0.63, + "learning_rate": 7.750885779452804e-06, + "loss": 1.2001, + "step": 3340 + }, + { + "epoch": 0.63, + "learning_rate": 7.744706042027891e-06, + "loss": 1.2402, + "step": 3345 + }, + { + "epoch": 0.63, + "learning_rate": 7.738520297707556e-06, + "loss": 1.2372, + "step": 3350 + }, + { + "epoch": 0.63, + "learning_rate": 7.73232856002954e-06, + "loss": 1.2054, + "step": 3355 + }, + { + "epoch": 0.63, + "learning_rate": 7.726130842544697e-06, + "loss": 1.2171, + "step": 3360 + }, + { + "epoch": 0.63, + "learning_rate": 7.719927158816976e-06, + "loss": 1.2503, + "step": 3365 + }, + { + "epoch": 0.63, + "learning_rate": 7.713717522423374e-06, + "loss": 1.2144, + "step": 3370 + }, + { + "epoch": 0.64, + "learning_rate": 7.707501946953926e-06, + "loss": 1.1641, + "step": 3375 + }, + { + "epoch": 0.64, + "learning_rate": 7.701280446011653e-06, + "loss": 1.2765, + "step": 3380 + }, + { + "epoch": 0.64, + "learning_rate": 7.695053033212558e-06, + "loss": 1.261, + "step": 3385 + }, + { + "epoch": 0.64, + "learning_rate": 7.688819722185572e-06, + "loss": 1.2039, + "step": 3390 + }, + { + "epoch": 0.64, + "learning_rate": 7.682580526572535e-06, + "loss": 1.1917, + "step": 3395 + }, + { + "epoch": 0.64, + "learning_rate": 7.67633546002817e-06, + "loss": 1.1857, + "step": 3400 + }, + { + "epoch": 0.64, + "learning_rate": 7.670084536220051e-06, + "loss": 1.2213, + "step": 3405 + }, + { + "epoch": 0.64, + "learning_rate": 7.663827768828558e-06, + "loss": 1.187, + "step": 3410 + }, + { + "epoch": 0.64, + "learning_rate": 7.657565171546878e-06, + "loss": 1.1954, + "step": 3415 + }, + { + "epoch": 0.64, + "learning_rate": 7.65129675808094e-06, + "loss": 1.2657, + "step": 3420 + }, + { + "epoch": 0.65, + "learning_rate": 7.645022542149412e-06, + "loss": 1.2075, + "step": 3425 + }, + { + "epoch": 0.65, + "learning_rate": 7.63874253748366e-06, + "loss": 1.1668, + "step": 3430 + }, + { + "epoch": 0.65, + "learning_rate": 7.632456757827718e-06, + "loss": 1.2017, + "step": 3435 + }, + { + "epoch": 0.65, + "learning_rate": 7.626165216938255e-06, + "loss": 1.1722, + "step": 3440 + }, + { + "epoch": 0.65, + "learning_rate": 7.619867928584552e-06, + "loss": 1.2087, + "step": 3445 + }, + { + "epoch": 0.65, + "learning_rate": 7.61356490654847e-06, + "loss": 1.2347, + "step": 3450 + }, + { + "epoch": 0.65, + "learning_rate": 7.607256164624418e-06, + "loss": 1.189, + "step": 3455 + }, + { + "epoch": 0.65, + "learning_rate": 7.600941716619319e-06, + "loss": 1.2171, + "step": 3460 + }, + { + "epoch": 0.65, + "learning_rate": 7.594621576352589e-06, + "loss": 1.2067, + "step": 3465 + }, + { + "epoch": 0.65, + "learning_rate": 7.588295757656098e-06, + "loss": 1.1861, + "step": 3470 + }, + { + "epoch": 0.65, + "learning_rate": 7.581964274374145e-06, + "loss": 1.2247, + "step": 3475 + }, + { + "epoch": 0.66, + "learning_rate": 7.575627140363429e-06, + "loss": 1.2087, + "step": 3480 + }, + { + "epoch": 0.66, + "learning_rate": 7.5692843694930105e-06, + "loss": 1.2169, + "step": 3485 + }, + { + "epoch": 0.66, + "learning_rate": 7.56293597564429e-06, + "loss": 1.213, + "step": 3490 + }, + { + "epoch": 0.66, + "learning_rate": 7.556581972710972e-06, + "loss": 1.2218, + "step": 3495 + }, + { + "epoch": 0.66, + "learning_rate": 7.550222374599039e-06, + "loss": 1.2029, + "step": 3500 + }, + { + "epoch": 0.66, + "learning_rate": 7.543857195226718e-06, + "loss": 1.2768, + "step": 3505 + }, + { + "epoch": 0.66, + "learning_rate": 7.537486448524449e-06, + "loss": 1.1583, + "step": 3510 + }, + { + "epoch": 0.66, + "learning_rate": 7.5311101484348605e-06, + "loss": 1.2053, + "step": 3515 + }, + { + "epoch": 0.66, + "learning_rate": 7.52472830891273e-06, + "loss": 1.2213, + "step": 3520 + }, + { + "epoch": 0.66, + "learning_rate": 7.51834094392496e-06, + "loss": 1.2203, + "step": 3525 + }, + { + "epoch": 0.66, + "learning_rate": 7.511948067450547e-06, + "loss": 1.2004, + "step": 3530 + }, + { + "epoch": 0.67, + "learning_rate": 7.505549693480548e-06, + "loss": 1.2027, + "step": 3535 + }, + { + "epoch": 0.67, + "learning_rate": 7.499145836018053e-06, + "loss": 1.2387, + "step": 3540 + }, + { + "epoch": 0.67, + "learning_rate": 7.492736509078151e-06, + "loss": 1.2813, + "step": 3545 + }, + { + "epoch": 0.67, + "learning_rate": 7.486321726687902e-06, + "loss": 1.2308, + "step": 3550 + }, + { + "epoch": 0.67, + "learning_rate": 7.479901502886309e-06, + "loss": 1.2244, + "step": 3555 + }, + { + "epoch": 0.67, + "learning_rate": 7.4734758517242744e-06, + "loss": 1.192, + "step": 3560 + }, + { + "epoch": 0.67, + "learning_rate": 7.467044787264588e-06, + "loss": 1.1914, + "step": 3565 + }, + { + "epoch": 0.67, + "learning_rate": 7.460608323581883e-06, + "loss": 1.2356, + "step": 3570 + }, + { + "epoch": 0.67, + "learning_rate": 7.45416647476261e-06, + "loss": 1.196, + "step": 3575 + }, + { + "epoch": 0.67, + "learning_rate": 7.447719254905003e-06, + "loss": 1.2119, + "step": 3580 + }, + { + "epoch": 0.68, + "learning_rate": 7.441266678119052e-06, + "loss": 1.198, + "step": 3585 + }, + { + "epoch": 0.68, + "learning_rate": 7.4348087585264735e-06, + "loss": 1.1974, + "step": 3590 + }, + { + "epoch": 0.68, + "learning_rate": 7.4283455102606725e-06, + "loss": 1.1709, + "step": 3595 + }, + { + "epoch": 0.68, + "learning_rate": 7.421876947466721e-06, + "loss": 1.2211, + "step": 3600 + }, + { + "epoch": 0.68, + "learning_rate": 7.415403084301313e-06, + "loss": 1.2434, + "step": 3605 + }, + { + "epoch": 0.68, + "learning_rate": 7.4089239349327545e-06, + "loss": 1.2492, + "step": 3610 + }, + { + "epoch": 0.68, + "learning_rate": 7.402439513540912e-06, + "loss": 1.2323, + "step": 3615 + }, + { + "epoch": 0.68, + "learning_rate": 7.3959498343171934e-06, + "loss": 1.2079, + "step": 3620 + }, + { + "epoch": 0.68, + "learning_rate": 7.3894549114645135e-06, + "loss": 1.1491, + "step": 3625 + }, + { + "epoch": 0.68, + "learning_rate": 7.38295475919726e-06, + "loss": 1.2059, + "step": 3630 + }, + { + "epoch": 0.68, + "learning_rate": 7.376449391741269e-06, + "loss": 1.2593, + "step": 3635 + }, + { + "epoch": 0.69, + "learning_rate": 7.3699388233337905e-06, + "loss": 1.2003, + "step": 3640 + }, + { + "epoch": 0.69, + "learning_rate": 7.3634230682234565e-06, + "loss": 1.2113, + "step": 3645 + }, + { + "epoch": 0.69, + "learning_rate": 7.356902140670245e-06, + "loss": 1.2085, + "step": 3650 + }, + { + "epoch": 0.69, + "learning_rate": 7.3503760549454604e-06, + "loss": 1.1613, + "step": 3655 + }, + { + "epoch": 0.69, + "learning_rate": 7.343844825331694e-06, + "loss": 1.2167, + "step": 3660 + }, + { + "epoch": 0.69, + "learning_rate": 7.337308466122797e-06, + "loss": 1.2306, + "step": 3665 + }, + { + "epoch": 0.69, + "learning_rate": 7.33076699162384e-06, + "loss": 1.2402, + "step": 3670 + }, + { + "epoch": 0.69, + "learning_rate": 7.324220416151097e-06, + "loss": 1.2608, + "step": 3675 + }, + { + "epoch": 0.69, + "learning_rate": 7.317668754031999e-06, + "loss": 1.1872, + "step": 3680 + }, + { + "epoch": 0.69, + "learning_rate": 7.311112019605113e-06, + "loss": 1.2006, + "step": 3685 + }, + { + "epoch": 0.7, + "learning_rate": 7.304550227220109e-06, + "loss": 1.2038, + "step": 3690 + }, + { + "epoch": 0.7, + "learning_rate": 7.29798339123772e-06, + "loss": 1.2772, + "step": 3695 + }, + { + "epoch": 0.7, + "learning_rate": 7.291411526029721e-06, + "loss": 1.238, + "step": 3700 + }, + { + "epoch": 0.7, + "learning_rate": 7.284834645978895e-06, + "loss": 1.2405, + "step": 3705 + }, + { + "epoch": 0.7, + "learning_rate": 7.278252765479e-06, + "loss": 1.2214, + "step": 3710 + }, + { + "epoch": 0.7, + "learning_rate": 7.271665898934731e-06, + "loss": 1.1704, + "step": 3715 + }, + { + "epoch": 0.7, + "learning_rate": 7.265074060761707e-06, + "loss": 1.1888, + "step": 3720 + }, + { + "epoch": 0.7, + "learning_rate": 7.258477265386416e-06, + "loss": 1.244, + "step": 3725 + }, + { + "epoch": 0.7, + "learning_rate": 7.251875527246202e-06, + "loss": 1.2039, + "step": 3730 + }, + { + "epoch": 0.7, + "learning_rate": 7.245268860789224e-06, + "loss": 1.1615, + "step": 3735 + }, + { + "epoch": 0.7, + "learning_rate": 7.238657280474427e-06, + "loss": 1.2295, + "step": 3740 + }, + { + "epoch": 0.71, + "learning_rate": 7.2320408007715125e-06, + "loss": 1.2395, + "step": 3745 + }, + { + "epoch": 0.71, + "learning_rate": 7.225419436160901e-06, + "loss": 1.2437, + "step": 3750 + }, + { + "epoch": 0.71, + "learning_rate": 7.218793201133703e-06, + "loss": 1.197, + "step": 3755 + }, + { + "epoch": 0.71, + "learning_rate": 7.212162110191694e-06, + "loss": 1.203, + "step": 3760 + }, + { + "epoch": 0.71, + "learning_rate": 7.205526177847273e-06, + "loss": 1.1997, + "step": 3765 + }, + { + "epoch": 0.71, + "learning_rate": 7.198885418623432e-06, + "loss": 1.2124, + "step": 3770 + }, + { + "epoch": 0.71, + "learning_rate": 7.192239847053732e-06, + "loss": 1.2156, + "step": 3775 + }, + { + "epoch": 0.71, + "learning_rate": 7.185589477682262e-06, + "loss": 1.2385, + "step": 3780 + }, + { + "epoch": 0.71, + "learning_rate": 7.178934325063615e-06, + "loss": 1.2015, + "step": 3785 + }, + { + "epoch": 0.71, + "learning_rate": 7.1722744037628475e-06, + "loss": 1.2082, + "step": 3790 + }, + { + "epoch": 0.71, + "learning_rate": 7.165609728355458e-06, + "loss": 1.2339, + "step": 3795 + }, + { + "epoch": 0.72, + "learning_rate": 7.158940313427345e-06, + "loss": 1.2596, + "step": 3800 + }, + { + "epoch": 0.72, + "learning_rate": 7.15226617357478e-06, + "loss": 1.2164, + "step": 3805 + }, + { + "epoch": 0.72, + "learning_rate": 7.145587323404379e-06, + "loss": 1.2783, + "step": 3810 + }, + { + "epoch": 0.72, + "learning_rate": 7.1389037775330615e-06, + "loss": 1.2137, + "step": 3815 + }, + { + "epoch": 0.72, + "learning_rate": 7.132215550588029e-06, + "loss": 1.2264, + "step": 3820 + }, + { + "epoch": 0.72, + "learning_rate": 7.125522657206723e-06, + "loss": 1.1773, + "step": 3825 + }, + { + "epoch": 0.72, + "learning_rate": 7.1188251120368e-06, + "loss": 1.2011, + "step": 3830 + }, + { + "epoch": 0.72, + "learning_rate": 7.112122929736099e-06, + "loss": 1.2112, + "step": 3835 + }, + { + "epoch": 0.72, + "learning_rate": 7.105416124972604e-06, + "loss": 1.2051, + "step": 3840 + }, + { + "epoch": 0.72, + "learning_rate": 7.098704712424416e-06, + "loss": 1.2367, + "step": 3845 + }, + { + "epoch": 0.73, + "learning_rate": 7.091988706779722e-06, + "loss": 1.2704, + "step": 3850 + }, + { + "epoch": 0.73, + "learning_rate": 7.085268122736759e-06, + "loss": 1.2379, + "step": 3855 + }, + { + "epoch": 0.73, + "learning_rate": 7.0785429750037886e-06, + "loss": 1.1949, + "step": 3860 + }, + { + "epoch": 0.73, + "learning_rate": 7.071813278299054e-06, + "loss": 1.223, + "step": 3865 + }, + { + "epoch": 0.73, + "learning_rate": 7.065079047350758e-06, + "loss": 1.2187, + "step": 3870 + }, + { + "epoch": 0.73, + "learning_rate": 7.058340296897026e-06, + "loss": 1.2694, + "step": 3875 + }, + { + "epoch": 0.73, + "learning_rate": 7.0515970416858715e-06, + "loss": 1.2184, + "step": 3880 + }, + { + "epoch": 0.73, + "learning_rate": 7.0448492964751744e-06, + "loss": 1.2705, + "step": 3885 + }, + { + "epoch": 0.73, + "learning_rate": 7.038097076032634e-06, + "loss": 1.1638, + "step": 3890 + }, + { + "epoch": 0.73, + "learning_rate": 7.031340395135744e-06, + "loss": 1.19, + "step": 3895 + }, + { + "epoch": 0.73, + "learning_rate": 7.024579268571765e-06, + "loss": 1.1315, + "step": 3900 + }, + { + "epoch": 0.74, + "learning_rate": 7.017813711137683e-06, + "loss": 1.2275, + "step": 3905 + }, + { + "epoch": 0.74, + "learning_rate": 7.011043737640183e-06, + "loss": 1.1985, + "step": 3910 + }, + { + "epoch": 0.74, + "learning_rate": 7.004269362895615e-06, + "loss": 1.2149, + "step": 3915 + }, + { + "epoch": 0.74, + "learning_rate": 6.997490601729957e-06, + "loss": 1.1797, + "step": 3920 + }, + { + "epoch": 0.74, + "learning_rate": 6.9907074689787926e-06, + "loss": 1.2348, + "step": 3925 + }, + { + "epoch": 0.74, + "learning_rate": 6.98391997948727e-06, + "loss": 1.2054, + "step": 3930 + }, + { + "epoch": 0.74, + "learning_rate": 6.9771281481100725e-06, + "loss": 1.1544, + "step": 3935 + }, + { + "epoch": 0.74, + "learning_rate": 6.9703319897113864e-06, + "loss": 1.2191, + "step": 3940 + }, + { + "epoch": 0.74, + "learning_rate": 6.963531519164868e-06, + "loss": 1.1916, + "step": 3945 + }, + { + "epoch": 0.74, + "learning_rate": 6.956726751353608e-06, + "loss": 1.1639, + "step": 3950 + }, + { + "epoch": 0.74, + "learning_rate": 6.949917701170108e-06, + "loss": 1.2133, + "step": 3955 + }, + { + "epoch": 0.75, + "learning_rate": 6.943104383516235e-06, + "loss": 1.2159, + "step": 3960 + }, + { + "epoch": 0.75, + "learning_rate": 6.936286813303199e-06, + "loss": 1.1678, + "step": 3965 + }, + { + "epoch": 0.75, + "learning_rate": 6.9294650054515155e-06, + "loss": 1.1663, + "step": 3970 + }, + { + "epoch": 0.75, + "learning_rate": 6.922638974890978e-06, + "loss": 1.1957, + "step": 3975 + }, + { + "epoch": 0.75, + "learning_rate": 6.915808736560614e-06, + "loss": 1.1947, + "step": 3980 + }, + { + "epoch": 0.75, + "learning_rate": 6.90897430540867e-06, + "loss": 1.2102, + "step": 3985 + }, + { + "epoch": 0.75, + "learning_rate": 6.902135696392559e-06, + "loss": 1.2129, + "step": 3990 + }, + { + "epoch": 0.75, + "learning_rate": 6.895292924478842e-06, + "loss": 1.2128, + "step": 3995 + }, + { + "epoch": 0.75, + "learning_rate": 6.888446004643192e-06, + "loss": 1.1944, + "step": 4000 + }, + { + "epoch": 0.75, + "learning_rate": 6.881594951870357e-06, + "loss": 1.1894, + "step": 4005 + }, + { + "epoch": 0.76, + "learning_rate": 6.874739781154132e-06, + "loss": 1.2022, + "step": 4010 + }, + { + "epoch": 0.76, + "learning_rate": 6.867880507497322e-06, + "loss": 1.2787, + "step": 4015 + }, + { + "epoch": 0.76, + "learning_rate": 6.861017145911715e-06, + "loss": 1.1975, + "step": 4020 + }, + { + "epoch": 0.76, + "learning_rate": 6.854149711418041e-06, + "loss": 1.2539, + "step": 4025 + }, + { + "epoch": 0.76, + "learning_rate": 6.847278219045948e-06, + "loss": 1.2487, + "step": 4030 + }, + { + "epoch": 0.76, + "learning_rate": 6.8404026838339635e-06, + "loss": 1.2308, + "step": 4035 + }, + { + "epoch": 0.76, + "learning_rate": 6.83352312082946e-06, + "loss": 1.2168, + "step": 4040 + }, + { + "epoch": 0.76, + "learning_rate": 6.82663954508863e-06, + "loss": 1.1773, + "step": 4045 + }, + { + "epoch": 0.76, + "learning_rate": 6.819751971676445e-06, + "loss": 1.2334, + "step": 4050 + }, + { + "epoch": 0.76, + "learning_rate": 6.812860415666625e-06, + "loss": 1.1629, + "step": 4055 + }, + { + "epoch": 0.76, + "learning_rate": 6.805964892141608e-06, + "loss": 1.2163, + "step": 4060 + }, + { + "epoch": 0.77, + "learning_rate": 6.799065416192512e-06, + "loss": 1.2282, + "step": 4065 + }, + { + "epoch": 0.77, + "learning_rate": 6.79216200291911e-06, + "loss": 1.1721, + "step": 4070 + }, + { + "epoch": 0.77, + "learning_rate": 6.785254667429789e-06, + "loss": 1.1628, + "step": 4075 + }, + { + "epoch": 0.77, + "learning_rate": 6.77834342484152e-06, + "loss": 1.2204, + "step": 4080 + }, + { + "epoch": 0.77, + "learning_rate": 6.771428290279823e-06, + "loss": 1.1416, + "step": 4085 + }, + { + "epoch": 0.77, + "learning_rate": 6.764509278878739e-06, + "loss": 1.1805, + "step": 4090 + }, + { + "epoch": 0.77, + "learning_rate": 6.7575864057807925e-06, + "loss": 1.2613, + "step": 4095 + }, + { + "epoch": 0.77, + "learning_rate": 6.7506596861369585e-06, + "loss": 1.2193, + "step": 4100 + }, + { + "epoch": 0.77, + "learning_rate": 6.743729135106634e-06, + "loss": 1.1889, + "step": 4105 + }, + { + "epoch": 0.77, + "learning_rate": 6.736794767857593e-06, + "loss": 1.249, + "step": 4110 + }, + { + "epoch": 0.78, + "learning_rate": 6.72985659956597e-06, + "loss": 1.2622, + "step": 4115 + }, + { + "epoch": 0.78, + "learning_rate": 6.722914645416215e-06, + "loss": 1.2191, + "step": 4120 + }, + { + "epoch": 0.78, + "learning_rate": 6.715968920601063e-06, + "loss": 1.1936, + "step": 4125 + }, + { + "epoch": 0.78, + "learning_rate": 6.7090194403215005e-06, + "loss": 1.3156, + "step": 4130 + }, + { + "epoch": 0.78, + "learning_rate": 6.7020662197867345e-06, + "loss": 1.214, + "step": 4135 + }, + { + "epoch": 0.78, + "learning_rate": 6.695109274214159e-06, + "loss": 1.2034, + "step": 4140 + }, + { + "epoch": 0.78, + "learning_rate": 6.688148618829317e-06, + "loss": 1.2401, + "step": 4145 + }, + { + "epoch": 0.78, + "learning_rate": 6.681184268865872e-06, + "loss": 1.1809, + "step": 4150 + }, + { + "epoch": 0.78, + "learning_rate": 6.674216239565576e-06, + "loss": 1.2267, + "step": 4155 + }, + { + "epoch": 0.78, + "learning_rate": 6.667244546178228e-06, + "loss": 1.2277, + "step": 4160 + }, + { + "epoch": 0.78, + "learning_rate": 6.6602692039616515e-06, + "loss": 1.2366, + "step": 4165 + }, + { + "epoch": 0.79, + "learning_rate": 6.6532902281816505e-06, + "loss": 1.1875, + "step": 4170 + }, + { + "epoch": 0.79, + "learning_rate": 6.6463076341119834e-06, + "loss": 1.1938, + "step": 4175 + }, + { + "epoch": 0.79, + "learning_rate": 6.639321437034331e-06, + "loss": 1.2021, + "step": 4180 + }, + { + "epoch": 0.79, + "learning_rate": 6.632331652238252e-06, + "loss": 1.1998, + "step": 4185 + }, + { + "epoch": 0.79, + "learning_rate": 6.625338295021162e-06, + "loss": 1.2098, + "step": 4190 + }, + { + "epoch": 0.79, + "learning_rate": 6.618341380688297e-06, + "loss": 1.1834, + "step": 4195 + }, + { + "epoch": 0.79, + "learning_rate": 6.611340924552672e-06, + "loss": 1.2557, + "step": 4200 + }, + { + "epoch": 0.79, + "learning_rate": 6.604336941935058e-06, + "loss": 1.1757, + "step": 4205 + }, + { + "epoch": 0.79, + "learning_rate": 6.5973294481639394e-06, + "loss": 1.1653, + "step": 4210 + }, + { + "epoch": 0.79, + "learning_rate": 6.5903184585754895e-06, + "loss": 1.2064, + "step": 4215 + }, + { + "epoch": 0.79, + "learning_rate": 6.5833039885135304e-06, + "loss": 1.2517, + "step": 4220 + }, + { + "epoch": 0.8, + "learning_rate": 6.576286053329504e-06, + "loss": 1.2282, + "step": 4225 + }, + { + "epoch": 0.8, + "learning_rate": 6.569264668382427e-06, + "loss": 1.2097, + "step": 4230 + }, + { + "epoch": 0.8, + "learning_rate": 6.562239849038876e-06, + "loss": 1.2141, + "step": 4235 + }, + { + "epoch": 0.8, + "learning_rate": 6.555211610672939e-06, + "loss": 1.1976, + "step": 4240 + }, + { + "epoch": 0.8, + "learning_rate": 6.5481799686661885e-06, + "loss": 1.1911, + "step": 4245 + }, + { + "epoch": 0.8, + "learning_rate": 6.5411449384076466e-06, + "loss": 1.2159, + "step": 4250 + }, + { + "epoch": 0.8, + "learning_rate": 6.534106535293745e-06, + "loss": 1.2672, + "step": 4255 + }, + { + "epoch": 0.8, + "learning_rate": 6.527064774728307e-06, + "loss": 1.217, + "step": 4260 + }, + { + "epoch": 0.8, + "learning_rate": 6.520019672122493e-06, + "loss": 1.2274, + "step": 4265 + }, + { + "epoch": 0.8, + "learning_rate": 6.512971242894786e-06, + "loss": 1.1312, + "step": 4270 + }, + { + "epoch": 0.81, + "learning_rate": 6.505919502470946e-06, + "loss": 1.2732, + "step": 4275 + }, + { + "epoch": 0.81, + "learning_rate": 6.498864466283978e-06, + "loss": 1.247, + "step": 4280 + }, + { + "epoch": 0.81, + "learning_rate": 6.491806149774104e-06, + "loss": 1.1909, + "step": 4285 + }, + { + "epoch": 0.81, + "learning_rate": 6.484744568388721e-06, + "loss": 1.1798, + "step": 4290 + }, + { + "epoch": 0.81, + "learning_rate": 6.4776797375823745e-06, + "loss": 1.204, + "step": 4295 + }, + { + "epoch": 0.81, + "learning_rate": 6.47061167281672e-06, + "loss": 1.2188, + "step": 4300 + }, + { + "epoch": 0.81, + "learning_rate": 6.46354038956049e-06, + "loss": 1.2311, + "step": 4305 + }, + { + "epoch": 0.81, + "learning_rate": 6.456465903289463e-06, + "loss": 1.2286, + "step": 4310 + }, + { + "epoch": 0.81, + "learning_rate": 6.449388229486424e-06, + "loss": 1.2317, + "step": 4315 + }, + { + "epoch": 0.81, + "learning_rate": 6.44230738364114e-06, + "loss": 1.2469, + "step": 4320 + }, + { + "epoch": 0.81, + "learning_rate": 6.435223381250312e-06, + "loss": 1.1677, + "step": 4325 + }, + { + "epoch": 0.82, + "learning_rate": 6.428136237817555e-06, + "loss": 1.2235, + "step": 4330 + }, + { + "epoch": 0.82, + "learning_rate": 6.421045968853358e-06, + "loss": 1.1933, + "step": 4335 + }, + { + "epoch": 0.82, + "learning_rate": 6.4139525898750475e-06, + "loss": 1.2784, + "step": 4340 + }, + { + "epoch": 0.82, + "learning_rate": 6.406856116406759e-06, + "loss": 1.2077, + "step": 4345 + }, + { + "epoch": 0.82, + "learning_rate": 6.399756563979399e-06, + "loss": 1.233, + "step": 4350 + }, + { + "epoch": 0.82, + "learning_rate": 6.392653948130612e-06, + "loss": 1.2218, + "step": 4355 + }, + { + "epoch": 0.82, + "learning_rate": 6.385548284404749e-06, + "loss": 1.2521, + "step": 4360 + }, + { + "epoch": 0.82, + "learning_rate": 6.37843958835283e-06, + "loss": 1.2754, + "step": 4365 + }, + { + "epoch": 0.82, + "learning_rate": 6.371327875532509e-06, + "loss": 1.2187, + "step": 4370 + }, + { + "epoch": 0.82, + "learning_rate": 6.3642131615080484e-06, + "loss": 1.1939, + "step": 4375 + }, + { + "epoch": 0.82, + "learning_rate": 6.357095461850274e-06, + "loss": 1.17, + "step": 4380 + }, + { + "epoch": 0.83, + "learning_rate": 6.349974792136546e-06, + "loss": 1.2572, + "step": 4385 + }, + { + "epoch": 0.83, + "learning_rate": 6.342851167950728e-06, + "loss": 1.2022, + "step": 4390 + }, + { + "epoch": 0.83, + "learning_rate": 6.335724604883145e-06, + "loss": 1.2181, + "step": 4395 + }, + { + "epoch": 0.83, + "learning_rate": 6.328595118530557e-06, + "loss": 1.1926, + "step": 4400 + }, + { + "epoch": 0.83, + "learning_rate": 6.321462724496121e-06, + "loss": 1.1626, + "step": 4405 + }, + { + "epoch": 0.83, + "learning_rate": 6.314327438389357e-06, + "loss": 1.2403, + "step": 4410 + }, + { + "epoch": 0.83, + "learning_rate": 6.307189275826115e-06, + "loss": 1.1694, + "step": 4415 + }, + { + "epoch": 0.83, + "learning_rate": 6.300048252428541e-06, + "loss": 1.2631, + "step": 4420 + }, + { + "epoch": 0.83, + "learning_rate": 6.29290438382504e-06, + "loss": 1.2437, + "step": 4425 + }, + { + "epoch": 0.83, + "learning_rate": 6.285757685650244e-06, + "loss": 1.2903, + "step": 4430 + }, + { + "epoch": 0.84, + "learning_rate": 6.278608173544981e-06, + "loss": 1.1827, + "step": 4435 + }, + { + "epoch": 0.84, + "learning_rate": 6.271455863156235e-06, + "loss": 1.1708, + "step": 4440 + }, + { + "epoch": 0.84, + "learning_rate": 6.264300770137113e-06, + "loss": 1.2226, + "step": 4445 + }, + { + "epoch": 0.84, + "learning_rate": 6.257142910146812e-06, + "loss": 1.1801, + "step": 4450 + }, + { + "epoch": 0.84, + "learning_rate": 6.249982298850587e-06, + "loss": 1.1773, + "step": 4455 + }, + { + "epoch": 0.84, + "learning_rate": 6.242818951919714e-06, + "loss": 1.2259, + "step": 4460 + }, + { + "epoch": 0.84, + "learning_rate": 6.235652885031453e-06, + "loss": 1.2121, + "step": 4465 + }, + { + "epoch": 0.84, + "learning_rate": 6.22848411386902e-06, + "loss": 1.1843, + "step": 4470 + }, + { + "epoch": 0.84, + "learning_rate": 6.221312654121549e-06, + "loss": 1.1318, + "step": 4475 + }, + { + "epoch": 0.84, + "learning_rate": 6.2141385214840546e-06, + "loss": 1.2114, + "step": 4480 + }, + { + "epoch": 0.84, + "learning_rate": 6.206961731657406e-06, + "loss": 1.2366, + "step": 4485 + }, + { + "epoch": 0.85, + "learning_rate": 6.199782300348285e-06, + "loss": 1.17, + "step": 4490 + }, + { + "epoch": 0.85, + "learning_rate": 6.192600243269156e-06, + "loss": 1.2296, + "step": 4495 + }, + { + "epoch": 0.85, + "learning_rate": 6.1854155761382255e-06, + "loss": 1.2345, + "step": 4500 + }, + { + "epoch": 0.85, + "learning_rate": 6.178228314679417e-06, + "loss": 1.2075, + "step": 4505 + }, + { + "epoch": 0.85, + "learning_rate": 6.171038474622335e-06, + "loss": 1.2097, + "step": 4510 + }, + { + "epoch": 0.85, + "learning_rate": 6.163846071702218e-06, + "loss": 1.1675, + "step": 4515 + }, + { + "epoch": 0.85, + "learning_rate": 6.15665112165992e-06, + "loss": 1.2223, + "step": 4520 + }, + { + "epoch": 0.85, + "learning_rate": 6.149453640241866e-06, + "loss": 1.1885, + "step": 4525 + }, + { + "epoch": 0.85, + "learning_rate": 6.1422536432000236e-06, + "loss": 1.2363, + "step": 4530 + }, + { + "epoch": 0.85, + "learning_rate": 6.135051146291866e-06, + "loss": 1.1956, + "step": 4535 + }, + { + "epoch": 0.86, + "learning_rate": 6.127846165280337e-06, + "loss": 1.1864, + "step": 4540 + }, + { + "epoch": 0.86, + "learning_rate": 6.120638715933813e-06, + "loss": 1.204, + "step": 4545 + }, + { + "epoch": 0.86, + "learning_rate": 6.113428814026079e-06, + "loss": 1.2122, + "step": 4550 + }, + { + "epoch": 0.86, + "learning_rate": 6.106216475336284e-06, + "loss": 1.1645, + "step": 4555 + }, + { + "epoch": 0.86, + "learning_rate": 6.099001715648909e-06, + "loss": 1.2178, + "step": 4560 + }, + { + "epoch": 0.86, + "learning_rate": 6.091784550753736e-06, + "loss": 1.1878, + "step": 4565 + }, + { + "epoch": 0.86, + "learning_rate": 6.08456499644581e-06, + "loss": 1.2439, + "step": 4570 + }, + { + "epoch": 0.86, + "learning_rate": 6.0773430685254046e-06, + "loss": 1.1814, + "step": 4575 + }, + { + "epoch": 0.86, + "learning_rate": 6.070118782797989e-06, + "loss": 1.1224, + "step": 4580 + }, + { + "epoch": 0.86, + "learning_rate": 6.062892155074191e-06, + "loss": 1.1976, + "step": 4585 + }, + { + "epoch": 0.86, + "learning_rate": 6.055663201169768e-06, + "loss": 1.2058, + "step": 4590 + }, + { + "epoch": 0.87, + "learning_rate": 6.0484319369055614e-06, + "loss": 1.1937, + "step": 4595 + }, + { + "epoch": 0.87, + "learning_rate": 6.0411983781074755e-06, + "loss": 1.2392, + "step": 4600 + }, + { + "epoch": 0.87, + "learning_rate": 6.033962540606434e-06, + "loss": 1.2675, + "step": 4605 + }, + { + "epoch": 0.87, + "learning_rate": 6.026724440238345e-06, + "loss": 1.1912, + "step": 4610 + }, + { + "epoch": 0.87, + "learning_rate": 6.019484092844076e-06, + "loss": 1.2325, + "step": 4615 + }, + { + "epoch": 0.87, + "learning_rate": 6.0122415142694025e-06, + "loss": 1.1694, + "step": 4620 + }, + { + "epoch": 0.87, + "learning_rate": 6.004996720364988e-06, + "loss": 1.2358, + "step": 4625 + }, + { + "epoch": 0.87, + "learning_rate": 5.997749726986349e-06, + "loss": 1.18, + "step": 4630 + }, + { + "epoch": 0.87, + "learning_rate": 5.990500549993809e-06, + "loss": 1.1803, + "step": 4635 + }, + { + "epoch": 0.87, + "learning_rate": 5.983249205252473e-06, + "loss": 1.2042, + "step": 4640 + }, + { + "epoch": 0.87, + "learning_rate": 5.9759957086321866e-06, + "loss": 1.222, + "step": 4645 + }, + { + "epoch": 0.88, + "learning_rate": 5.968740076007511e-06, + "loss": 1.2193, + "step": 4650 + }, + { + "epoch": 0.88, + "learning_rate": 5.961482323257677e-06, + "loss": 1.2022, + "step": 4655 + }, + { + "epoch": 0.88, + "learning_rate": 5.95422246626656e-06, + "loss": 1.2196, + "step": 4660 + }, + { + "epoch": 0.88, + "learning_rate": 5.9469605209226365e-06, + "loss": 1.2323, + "step": 4665 + }, + { + "epoch": 0.88, + "learning_rate": 5.9396965031189545e-06, + "loss": 1.1626, + "step": 4670 + }, + { + "epoch": 0.88, + "learning_rate": 5.9324304287530995e-06, + "loss": 1.1781, + "step": 4675 + }, + { + "epoch": 0.88, + "learning_rate": 5.925162313727155e-06, + "loss": 1.2148, + "step": 4680 + }, + { + "epoch": 0.88, + "learning_rate": 5.917892173947673e-06, + "loss": 1.1856, + "step": 4685 + }, + { + "epoch": 0.88, + "learning_rate": 5.9106200253256375e-06, + "loss": 1.1836, + "step": 4690 + }, + { + "epoch": 0.88, + "learning_rate": 5.903345883776424e-06, + "loss": 1.1742, + "step": 4695 + }, + { + "epoch": 0.89, + "learning_rate": 5.896069765219774e-06, + "loss": 1.1929, + "step": 4700 + }, + { + "epoch": 0.89, + "learning_rate": 5.8887916855797564e-06, + "loss": 1.2479, + "step": 4705 + }, + { + "epoch": 0.89, + "learning_rate": 5.881511660784729e-06, + "loss": 1.2069, + "step": 4710 + }, + { + "epoch": 0.89, + "learning_rate": 5.874229706767307e-06, + "loss": 1.2265, + "step": 4715 + }, + { + "epoch": 0.89, + "learning_rate": 5.866945839464329e-06, + "loss": 1.2433, + "step": 4720 + }, + { + "epoch": 0.89, + "learning_rate": 5.859660074816821e-06, + "loss": 1.2233, + "step": 4725 + }, + { + "epoch": 0.89, + "learning_rate": 5.852372428769961e-06, + "loss": 1.2252, + "step": 4730 + }, + { + "epoch": 0.89, + "learning_rate": 5.845082917273046e-06, + "loss": 1.2181, + "step": 4735 + }, + { + "epoch": 0.89, + "learning_rate": 5.837791556279452e-06, + "loss": 1.2103, + "step": 4740 + }, + { + "epoch": 0.89, + "learning_rate": 5.830498361746603e-06, + "loss": 1.1682, + "step": 4745 + }, + { + "epoch": 0.89, + "learning_rate": 5.823203349635942e-06, + "loss": 1.1862, + "step": 4750 + }, + { + "epoch": 0.9, + "learning_rate": 5.815906535912883e-06, + "loss": 1.1779, + "step": 4755 + }, + { + "epoch": 0.9, + "learning_rate": 5.808607936546787e-06, + "loss": 1.2325, + "step": 4760 + }, + { + "epoch": 0.9, + "learning_rate": 5.8013075675109174e-06, + "loss": 1.2146, + "step": 4765 + }, + { + "epoch": 0.9, + "learning_rate": 5.794005444782418e-06, + "loss": 1.1979, + "step": 4770 + }, + { + "epoch": 0.9, + "learning_rate": 5.786701584342265e-06, + "loss": 1.226, + "step": 4775 + }, + { + "epoch": 0.9, + "learning_rate": 5.7793960021752424e-06, + "loss": 1.1729, + "step": 4780 + }, + { + "epoch": 0.9, + "learning_rate": 5.7720887142698966e-06, + "loss": 1.2265, + "step": 4785 + }, + { + "epoch": 0.9, + "learning_rate": 5.764779736618512e-06, + "loss": 1.1881, + "step": 4790 + }, + { + "epoch": 0.9, + "learning_rate": 5.757469085217068e-06, + "loss": 1.2046, + "step": 4795 + }, + { + "epoch": 0.9, + "learning_rate": 5.7501567760652075e-06, + "loss": 1.2291, + "step": 4800 + }, + { + "epoch": 0.91, + "learning_rate": 5.742842825166206e-06, + "loss": 1.2548, + "step": 4805 + }, + { + "epoch": 0.91, + "learning_rate": 5.735527248526922e-06, + "loss": 1.2161, + "step": 4810 + }, + { + "epoch": 0.91, + "learning_rate": 5.728210062157784e-06, + "loss": 1.2241, + "step": 4815 + }, + { + "epoch": 0.91, + "learning_rate": 5.720891282072734e-06, + "loss": 1.1633, + "step": 4820 + }, + { + "epoch": 0.91, + "learning_rate": 5.713570924289209e-06, + "loss": 1.1876, + "step": 4825 + }, + { + "epoch": 0.91, + "learning_rate": 5.706249004828091e-06, + "loss": 1.1748, + "step": 4830 + }, + { + "epoch": 0.91, + "learning_rate": 5.6989255397136864e-06, + "loss": 1.15, + "step": 4835 + }, + { + "epoch": 0.91, + "learning_rate": 5.691600544973682e-06, + "loss": 1.2395, + "step": 4840 + }, + { + "epoch": 0.91, + "learning_rate": 5.684274036639114e-06, + "loss": 1.1762, + "step": 4845 + }, + { + "epoch": 0.91, + "learning_rate": 5.676946030744326e-06, + "loss": 1.2414, + "step": 4850 + }, + { + "epoch": 0.91, + "learning_rate": 5.669616543326946e-06, + "loss": 1.2158, + "step": 4855 + }, + { + "epoch": 0.92, + "learning_rate": 5.6622855904278385e-06, + "loss": 1.1991, + "step": 4860 + }, + { + "epoch": 0.92, + "learning_rate": 5.654953188091078e-06, + "loss": 1.2135, + "step": 4865 + }, + { + "epoch": 0.92, + "learning_rate": 5.6476193523639114e-06, + "loss": 1.2314, + "step": 4870 + }, + { + "epoch": 0.92, + "learning_rate": 5.640284099296723e-06, + "loss": 1.1973, + "step": 4875 + }, + { + "epoch": 0.92, + "learning_rate": 5.632947444942998e-06, + "loss": 1.2072, + "step": 4880 + }, + { + "epoch": 0.92, + "learning_rate": 5.625609405359287e-06, + "loss": 1.2057, + "step": 4885 + }, + { + "epoch": 0.92, + "learning_rate": 5.618269996605174e-06, + "loss": 1.229, + "step": 4890 + }, + { + "epoch": 0.92, + "learning_rate": 5.61092923474324e-06, + "loss": 1.2082, + "step": 4895 + }, + { + "epoch": 0.92, + "learning_rate": 5.6035871358390256e-06, + "loss": 1.2337, + "step": 4900 + }, + { + "epoch": 0.92, + "learning_rate": 5.596243715961e-06, + "loss": 1.2147, + "step": 4905 + }, + { + "epoch": 0.92, + "learning_rate": 5.58889899118052e-06, + "loss": 1.2048, + "step": 4910 + }, + { + "epoch": 0.93, + "learning_rate": 5.581552977571801e-06, + "loss": 1.16, + "step": 4915 + }, + { + "epoch": 0.93, + "learning_rate": 5.574205691211878e-06, + "loss": 1.2421, + "step": 4920 + }, + { + "epoch": 0.93, + "learning_rate": 5.566857148180572e-06, + "loss": 1.2817, + "step": 4925 + }, + { + "epoch": 0.93, + "learning_rate": 5.559507364560454e-06, + "loss": 1.213, + "step": 4930 + }, + { + "epoch": 0.93, + "learning_rate": 5.552156356436808e-06, + "loss": 1.2511, + "step": 4935 + }, + { + "epoch": 0.93, + "learning_rate": 5.544804139897601e-06, + "loss": 1.183, + "step": 4940 + }, + { + "epoch": 0.93, + "learning_rate": 5.537450731033447e-06, + "loss": 1.2229, + "step": 4945 + }, + { + "epoch": 0.93, + "learning_rate": 5.530096145937561e-06, + "loss": 1.2265, + "step": 4950 + }, + { + "epoch": 0.93, + "learning_rate": 5.522740400705739e-06, + "loss": 1.1829, + "step": 4955 + }, + { + "epoch": 0.93, + "learning_rate": 5.515383511436314e-06, + "loss": 1.1802, + "step": 4960 + }, + { + "epoch": 0.94, + "learning_rate": 5.508025494230124e-06, + "loss": 1.199, + "step": 4965 + }, + { + "epoch": 0.94, + "learning_rate": 5.5006663651904735e-06, + "loss": 1.2155, + "step": 4970 + }, + { + "epoch": 0.94, + "learning_rate": 5.4933061404231025e-06, + "loss": 1.1956, + "step": 4975 + }, + { + "epoch": 0.94, + "learning_rate": 5.485944836036145e-06, + "loss": 1.1507, + "step": 4980 + }, + { + "epoch": 0.94, + "learning_rate": 5.478582468140103e-06, + "loss": 1.1442, + "step": 4985 + }, + { + "epoch": 0.94, + "learning_rate": 5.471219052847804e-06, + "loss": 1.2285, + "step": 4990 + }, + { + "epoch": 0.94, + "learning_rate": 5.463854606274364e-06, + "loss": 1.2077, + "step": 4995 + }, + { + "epoch": 0.94, + "learning_rate": 5.4564891445371635e-06, + "loss": 1.1715, + "step": 5000 + }, + { + "epoch": 0.94, + "learning_rate": 5.4491226837558e-06, + "loss": 1.1847, + "step": 5005 + }, + { + "epoch": 0.94, + "learning_rate": 5.441755240052056e-06, + "loss": 1.2311, + "step": 5010 + }, + { + "epoch": 0.94, + "learning_rate": 5.434386829549868e-06, + "loss": 1.2502, + "step": 5015 + }, + { + "epoch": 0.95, + "learning_rate": 5.42701746837529e-06, + "loss": 1.1395, + "step": 5020 + }, + { + "epoch": 0.95, + "learning_rate": 5.419647172656451e-06, + "loss": 1.2054, + "step": 5025 + }, + { + "epoch": 0.95, + "learning_rate": 5.412275958523528e-06, + "loss": 1.2361, + "step": 5030 + }, + { + "epoch": 0.95, + "learning_rate": 5.4049038421087115e-06, + "loss": 1.1857, + "step": 5035 + }, + { + "epoch": 0.95, + "learning_rate": 5.397530839546162e-06, + "loss": 1.2728, + "step": 5040 + }, + { + "epoch": 0.95, + "learning_rate": 5.3901569669719835e-06, + "loss": 1.1826, + "step": 5045 + }, + { + "epoch": 0.95, + "learning_rate": 5.382782240524177e-06, + "loss": 1.1855, + "step": 5050 + }, + { + "epoch": 0.95, + "learning_rate": 5.375406676342618e-06, + "loss": 1.2245, + "step": 5055 + }, + { + "epoch": 0.95, + "learning_rate": 5.368030290569018e-06, + "loss": 1.1587, + "step": 5060 + }, + { + "epoch": 0.95, + "learning_rate": 5.360653099346881e-06, + "loss": 1.2477, + "step": 5065 + }, + { + "epoch": 0.95, + "learning_rate": 5.353275118821473e-06, + "loss": 1.2036, + "step": 5070 + }, + { + "epoch": 0.96, + "learning_rate": 5.345896365139792e-06, + "loss": 1.1283, + "step": 5075 + }, + { + "epoch": 0.96, + "learning_rate": 5.338516854450526e-06, + "loss": 1.1743, + "step": 5080 + }, + { + "epoch": 0.96, + "learning_rate": 5.331136602904019e-06, + "loss": 1.1775, + "step": 5085 + }, + { + "epoch": 0.96, + "learning_rate": 5.3237556266522394e-06, + "loss": 1.2209, + "step": 5090 + }, + { + "epoch": 0.96, + "learning_rate": 5.316373941848738e-06, + "loss": 1.1405, + "step": 5095 + }, + { + "epoch": 0.96, + "learning_rate": 5.308991564648617e-06, + "loss": 1.2079, + "step": 5100 + }, + { + "epoch": 0.96, + "learning_rate": 5.301608511208495e-06, + "loss": 1.1826, + "step": 5105 + }, + { + "epoch": 0.96, + "learning_rate": 5.294224797686471e-06, + "loss": 1.2145, + "step": 5110 + }, + { + "epoch": 0.96, + "learning_rate": 5.286840440242087e-06, + "loss": 1.1783, + "step": 5115 + }, + { + "epoch": 0.96, + "learning_rate": 5.279455455036296e-06, + "loss": 1.1832, + "step": 5120 + }, + { + "epoch": 0.97, + "learning_rate": 5.272069858231424e-06, + "loss": 1.1996, + "step": 5125 + }, + { + "epoch": 0.97, + "learning_rate": 5.2646836659911345e-06, + "loss": 1.2062, + "step": 5130 + }, + { + "epoch": 0.97, + "learning_rate": 5.257296894480397e-06, + "loss": 1.1821, + "step": 5135 + }, + { + "epoch": 0.97, + "learning_rate": 5.249909559865446e-06, + "loss": 1.2551, + "step": 5140 + }, + { + "epoch": 0.97, + "learning_rate": 5.242521678313748e-06, + "loss": 1.2246, + "step": 5145 + }, + { + "epoch": 0.97, + "learning_rate": 5.235133265993968e-06, + "loss": 1.1615, + "step": 5150 + }, + { + "epoch": 0.97, + "learning_rate": 5.227744339075934e-06, + "loss": 1.1644, + "step": 5155 + }, + { + "epoch": 0.97, + "learning_rate": 5.2203549137305975e-06, + "loss": 1.1886, + "step": 5160 + }, + { + "epoch": 0.97, + "learning_rate": 5.212965006130003e-06, + "loss": 1.1512, + "step": 5165 + }, + { + "epoch": 0.97, + "learning_rate": 5.205574632447248e-06, + "loss": 1.159, + "step": 5170 + }, + { + "epoch": 0.97, + "learning_rate": 5.198183808856452e-06, + "loss": 1.2089, + "step": 5175 + }, + { + "epoch": 0.98, + "learning_rate": 5.190792551532716e-06, + "loss": 1.2619, + "step": 5180 + }, + { + "epoch": 0.98, + "learning_rate": 5.183400876652098e-06, + "loss": 1.1157, + "step": 5185 + }, + { + "epoch": 0.98, + "learning_rate": 5.176008800391562e-06, + "loss": 1.2277, + "step": 5190 + }, + { + "epoch": 0.98, + "learning_rate": 5.168616338928951e-06, + "loss": 1.1965, + "step": 5195 + }, + { + "epoch": 0.98, + "learning_rate": 5.161223508442957e-06, + "loss": 1.1968, + "step": 5200 + }, + { + "epoch": 0.98, + "learning_rate": 5.153830325113072e-06, + "loss": 1.2189, + "step": 5205 + }, + { + "epoch": 0.98, + "learning_rate": 5.1464368051195654e-06, + "loss": 1.1644, + "step": 5210 + }, + { + "epoch": 0.98, + "learning_rate": 5.139042964643443e-06, + "loss": 1.2008, + "step": 5215 + }, + { + "epoch": 0.98, + "learning_rate": 5.131648819866411e-06, + "loss": 1.1781, + "step": 5220 + }, + { + "epoch": 0.98, + "learning_rate": 5.124254386970838e-06, + "loss": 1.1887, + "step": 5225 + }, + { + "epoch": 0.99, + "learning_rate": 5.116859682139731e-06, + "loss": 1.2135, + "step": 5230 + }, + { + "epoch": 0.99, + "learning_rate": 5.109464721556685e-06, + "loss": 1.2185, + "step": 5235 + }, + { + "epoch": 0.99, + "learning_rate": 5.102069521405859e-06, + "loss": 1.179, + "step": 5240 + }, + { + "epoch": 0.99, + "learning_rate": 5.094674097871933e-06, + "loss": 1.1761, + "step": 5245 + }, + { + "epoch": 0.99, + "learning_rate": 5.08727846714008e-06, + "loss": 1.162, + "step": 5250 + }, + { + "epoch": 0.99, + "learning_rate": 5.0798826453959226e-06, + "loss": 1.2233, + "step": 5255 + }, + { + "epoch": 0.99, + "learning_rate": 5.072486648825504e-06, + "loss": 1.1984, + "step": 5260 + }, + { + "epoch": 0.99, + "learning_rate": 5.065090493615248e-06, + "loss": 1.2081, + "step": 5265 + }, + { + "epoch": 0.99, + "learning_rate": 5.057694195951926e-06, + "loss": 1.2395, + "step": 5270 + }, + { + "epoch": 0.99, + "learning_rate": 5.050297772022624e-06, + "loss": 1.2568, + "step": 5275 + }, + { + "epoch": 0.99, + "learning_rate": 5.0429012380147e-06, + "loss": 1.2215, + "step": 5280 + }, + { + "epoch": 1.0, + "learning_rate": 5.035504610115758e-06, + "loss": 1.244, + "step": 5285 + }, + { + "epoch": 1.0, + "learning_rate": 5.0281079045136e-06, + "loss": 1.1802, + "step": 5290 + }, + { + "epoch": 1.0, + "learning_rate": 5.020711137396206e-06, + "loss": 1.2338, + "step": 5295 + }, + { + "epoch": 1.0, + "learning_rate": 5.013314324951686e-06, + "loss": 1.2284, + "step": 5300 + }, + { + "epoch": 1.0, + "learning_rate": 5.00591748336825e-06, + "loss": 1.1705, + "step": 5305 + }, + { + "epoch": 1.0, + "learning_rate": 4.998520628834173e-06, + "loss": 1.2354, + "step": 5310 + }, + { + "epoch": 1.0, + "learning_rate": 4.9911237775377555e-06, + "loss": 1.0522, + "step": 5315 + }, + { + "epoch": 1.0, + "learning_rate": 4.983726945667296e-06, + "loss": 0.9854, + "step": 5320 + }, + { + "epoch": 1.0, + "learning_rate": 4.976330149411046e-06, + "loss": 1.0361, + "step": 5325 + }, + { + "epoch": 1.0, + "learning_rate": 4.968933404957179e-06, + "loss": 1.0717, + "step": 5330 + }, + { + "epoch": 1.0, + "learning_rate": 4.96153672849376e-06, + "loss": 1.0425, + "step": 5335 + }, + { + "epoch": 1.01, + "learning_rate": 4.954140136208698e-06, + "loss": 0.9848, + "step": 5340 + }, + { + "epoch": 1.01, + "learning_rate": 4.946743644289724e-06, + "loss": 1.0565, + "step": 5345 + }, + { + "epoch": 1.01, + "learning_rate": 4.939347268924345e-06, + "loss": 1.0274, + "step": 5350 + }, + { + "epoch": 1.01, + "learning_rate": 4.931951026299816e-06, + "loss": 1.0006, + "step": 5355 + }, + { + "epoch": 1.01, + "learning_rate": 4.9245549326031025e-06, + "loss": 1.0687, + "step": 5360 + }, + { + "epoch": 1.01, + "learning_rate": 4.917159004020837e-06, + "loss": 1.0681, + "step": 5365 + }, + { + "epoch": 1.01, + "learning_rate": 4.909763256739299e-06, + "loss": 1.033, + "step": 5370 + }, + { + "epoch": 1.01, + "learning_rate": 4.902367706944367e-06, + "loss": 1.0706, + "step": 5375 + }, + { + "epoch": 1.01, + "learning_rate": 4.8949723708214865e-06, + "loss": 1.0469, + "step": 5380 + }, + { + "epoch": 1.01, + "learning_rate": 4.887577264555641e-06, + "loss": 1.0651, + "step": 5385 + }, + { + "epoch": 1.02, + "learning_rate": 4.880182404331302e-06, + "loss": 1.0429, + "step": 5390 + }, + { + "epoch": 1.02, + "learning_rate": 4.872787806332409e-06, + "loss": 1.0406, + "step": 5395 + }, + { + "epoch": 1.02, + "learning_rate": 4.865393486742326e-06, + "loss": 0.9984, + "step": 5400 + }, + { + "epoch": 1.02, + "learning_rate": 4.857999461743808e-06, + "loss": 0.9838, + "step": 5405 + }, + { + "epoch": 1.02, + "learning_rate": 4.8506057475189665e-06, + "loss": 1.0564, + "step": 5410 + }, + { + "epoch": 1.02, + "learning_rate": 4.843212360249229e-06, + "loss": 1.059, + "step": 5415 + }, + { + "epoch": 1.02, + "learning_rate": 4.83581931611531e-06, + "loss": 0.9688, + "step": 5420 + }, + { + "epoch": 1.02, + "learning_rate": 4.828426631297175e-06, + "loss": 1.0389, + "step": 5425 + }, + { + "epoch": 1.02, + "learning_rate": 4.821034321973998e-06, + "loss": 1.0601, + "step": 5430 + }, + { + "epoch": 1.02, + "learning_rate": 4.813642404324135e-06, + "loss": 1.0204, + "step": 5435 + }, + { + "epoch": 1.02, + "learning_rate": 4.806250894525085e-06, + "loss": 1.0368, + "step": 5440 + }, + { + "epoch": 1.03, + "learning_rate": 4.798859808753452e-06, + "loss": 1.0375, + "step": 5445 + }, + { + "epoch": 1.03, + "learning_rate": 4.791469163184913e-06, + "loss": 1.0709, + "step": 5450 + }, + { + "epoch": 1.03, + "learning_rate": 4.784078973994185e-06, + "loss": 1.0217, + "step": 5455 + }, + { + "epoch": 1.03, + "learning_rate": 4.7766892573549795e-06, + "loss": 1.0082, + "step": 5460 + }, + { + "epoch": 1.03, + "learning_rate": 4.76930002943998e-06, + "loss": 1.034, + "step": 5465 + }, + { + "epoch": 1.03, + "learning_rate": 4.761911306420795e-06, + "loss": 0.9979, + "step": 5470 + }, + { + "epoch": 1.03, + "learning_rate": 4.754523104467933e-06, + "loss": 1.0395, + "step": 5475 + }, + { + "epoch": 1.03, + "learning_rate": 4.747135439750761e-06, + "loss": 1.0463, + "step": 5480 + }, + { + "epoch": 1.03, + "learning_rate": 4.739748328437467e-06, + "loss": 1.0347, + "step": 5485 + }, + { + "epoch": 1.03, + "learning_rate": 4.73236178669503e-06, + "loss": 1.031, + "step": 5490 + }, + { + "epoch": 1.03, + "learning_rate": 4.7249758306891836e-06, + "loss": 1.0639, + "step": 5495 + }, + { + "epoch": 1.04, + "learning_rate": 4.717590476584378e-06, + "loss": 1.0773, + "step": 5500 + }, + { + "epoch": 1.04, + "learning_rate": 4.710205740543749e-06, + "loss": 1.0902, + "step": 5505 + }, + { + "epoch": 1.04, + "learning_rate": 4.702821638729073e-06, + "loss": 1.0695, + "step": 5510 + }, + { + "epoch": 1.04, + "learning_rate": 4.695438187300745e-06, + "loss": 1.0029, + "step": 5515 + }, + { + "epoch": 1.04, + "learning_rate": 4.688055402417732e-06, + "loss": 1.0023, + "step": 5520 + }, + { + "epoch": 1.04, + "learning_rate": 4.680673300237548e-06, + "loss": 1.053, + "step": 5525 + }, + { + "epoch": 1.04, + "learning_rate": 4.6732918969162085e-06, + "loss": 0.9941, + "step": 5530 + }, + { + "epoch": 1.04, + "learning_rate": 4.665911208608199e-06, + "loss": 1.0649, + "step": 5535 + }, + { + "epoch": 1.04, + "learning_rate": 4.658531251466444e-06, + "loss": 1.057, + "step": 5540 + }, + { + "epoch": 1.04, + "learning_rate": 4.651152041642263e-06, + "loss": 0.9844, + "step": 5545 + }, + { + "epoch": 1.05, + "learning_rate": 4.643773595285343e-06, + "loss": 1.0367, + "step": 5550 + }, + { + "epoch": 1.05, + "learning_rate": 4.636395928543699e-06, + "loss": 1.0047, + "step": 5555 + }, + { + "epoch": 1.05, + "learning_rate": 4.629019057563641e-06, + "loss": 1.0491, + "step": 5560 + }, + { + "epoch": 1.05, + "learning_rate": 4.621642998489737e-06, + "loss": 1.0534, + "step": 5565 + }, + { + "epoch": 1.05, + "learning_rate": 4.614267767464778e-06, + "loss": 1.0255, + "step": 5570 + }, + { + "epoch": 1.05, + "learning_rate": 4.606893380629742e-06, + "loss": 1.0086, + "step": 5575 + }, + { + "epoch": 1.05, + "learning_rate": 4.599519854123758e-06, + "loss": 1.0521, + "step": 5580 + }, + { + "epoch": 1.05, + "learning_rate": 4.592147204084078e-06, + "loss": 1.0184, + "step": 5585 + }, + { + "epoch": 1.05, + "learning_rate": 4.58477544664603e-06, + "loss": 1.034, + "step": 5590 + }, + { + "epoch": 1.05, + "learning_rate": 4.577404597942991e-06, + "loss": 1.0741, + "step": 5595 + }, + { + "epoch": 1.05, + "learning_rate": 4.570034674106348e-06, + "loss": 0.9852, + "step": 5600 + }, + { + "epoch": 1.06, + "learning_rate": 4.562665691265466e-06, + "loss": 1.0007, + "step": 5605 + }, + { + "epoch": 1.06, + "learning_rate": 4.555297665547646e-06, + "loss": 1.003, + "step": 5610 + }, + { + "epoch": 1.06, + "learning_rate": 4.547930613078102e-06, + "loss": 1.0468, + "step": 5615 + }, + { + "epoch": 1.06, + "learning_rate": 4.540564549979911e-06, + "loss": 1.0503, + "step": 5620 + }, + { + "epoch": 1.06, + "learning_rate": 4.533199492373987e-06, + "loss": 1.0404, + "step": 5625 + }, + { + "epoch": 1.06, + "learning_rate": 4.525835456379045e-06, + "loss": 0.9809, + "step": 5630 + }, + { + "epoch": 1.06, + "learning_rate": 4.518472458111562e-06, + "loss": 1.0032, + "step": 5635 + }, + { + "epoch": 1.06, + "learning_rate": 4.511110513685744e-06, + "loss": 1.0327, + "step": 5640 + }, + { + "epoch": 1.06, + "learning_rate": 4.503749639213491e-06, + "loss": 1.0155, + "step": 5645 + }, + { + "epoch": 1.06, + "learning_rate": 4.496389850804365e-06, + "loss": 1.0591, + "step": 5650 + }, + { + "epoch": 1.07, + "learning_rate": 4.489031164565545e-06, + "loss": 1.0272, + "step": 5655 + }, + { + "epoch": 1.07, + "learning_rate": 4.481673596601802e-06, + "loss": 1.0068, + "step": 5660 + }, + { + "epoch": 1.07, + "learning_rate": 4.474317163015458e-06, + "loss": 1.0255, + "step": 5665 + }, + { + "epoch": 1.07, + "learning_rate": 4.4669618799063516e-06, + "loss": 1.0601, + "step": 5670 + }, + { + "epoch": 1.07, + "learning_rate": 4.459607763371806e-06, + "loss": 1.0159, + "step": 5675 + }, + { + "epoch": 1.07, + "learning_rate": 4.452254829506589e-06, + "loss": 1.0418, + "step": 5680 + }, + { + "epoch": 1.07, + "learning_rate": 4.444903094402882e-06, + "loss": 1.002, + "step": 5685 + }, + { + "epoch": 1.07, + "learning_rate": 4.4375525741502406e-06, + "loss": 1.0388, + "step": 5690 + }, + { + "epoch": 1.07, + "learning_rate": 4.430203284835565e-06, + "loss": 1.0215, + "step": 5695 + }, + { + "epoch": 1.07, + "learning_rate": 4.422855242543056e-06, + "loss": 1.034, + "step": 5700 + }, + { + "epoch": 1.07, + "learning_rate": 4.415508463354191e-06, + "loss": 1.0031, + "step": 5705 + }, + { + "epoch": 1.08, + "learning_rate": 4.408162963347682e-06, + "loss": 0.9963, + "step": 5710 + }, + { + "epoch": 1.08, + "learning_rate": 4.400818758599436e-06, + "loss": 1.0124, + "step": 5715 + }, + { + "epoch": 1.08, + "learning_rate": 4.3934758651825334e-06, + "loss": 0.9911, + "step": 5720 + }, + { + "epoch": 1.08, + "learning_rate": 4.386134299167178e-06, + "loss": 1.0429, + "step": 5725 + }, + { + "epoch": 1.08, + "learning_rate": 4.378794076620672e-06, + "loss": 1.0365, + "step": 5730 + }, + { + "epoch": 1.08, + "learning_rate": 4.371455213607378e-06, + "loss": 1.0009, + "step": 5735 + }, + { + "epoch": 1.08, + "learning_rate": 4.3641177261886795e-06, + "loss": 1.0523, + "step": 5740 + }, + { + "epoch": 1.08, + "learning_rate": 4.356781630422955e-06, + "loss": 1.0195, + "step": 5745 + }, + { + "epoch": 1.08, + "learning_rate": 4.349446942365529e-06, + "loss": 1.019, + "step": 5750 + }, + { + "epoch": 1.08, + "learning_rate": 4.342113678068655e-06, + "loss": 0.989, + "step": 5755 + }, + { + "epoch": 1.08, + "learning_rate": 4.334781853581463e-06, + "loss": 1.0186, + "step": 5760 + }, + { + "epoch": 1.09, + "learning_rate": 4.327451484949936e-06, + "loss": 0.9944, + "step": 5765 + }, + { + "epoch": 1.09, + "learning_rate": 4.320122588216868e-06, + "loss": 1.0536, + "step": 5770 + }, + { + "epoch": 1.09, + "learning_rate": 4.312795179421836e-06, + "loss": 1.0433, + "step": 5775 + }, + { + "epoch": 1.09, + "learning_rate": 4.305469274601156e-06, + "loss": 1.0048, + "step": 5780 + }, + { + "epoch": 1.09, + "learning_rate": 4.298144889787855e-06, + "loss": 1.0851, + "step": 5785 + }, + { + "epoch": 1.09, + "learning_rate": 4.290822041011632e-06, + "loss": 1.047, + "step": 5790 + }, + { + "epoch": 1.09, + "learning_rate": 4.283500744298825e-06, + "loss": 1.0821, + "step": 5795 + }, + { + "epoch": 1.09, + "learning_rate": 4.276181015672375e-06, + "loss": 1.0109, + "step": 5800 + }, + { + "epoch": 1.09, + "learning_rate": 4.268862871151792e-06, + "loss": 1.0063, + "step": 5805 + }, + { + "epoch": 1.09, + "learning_rate": 4.261546326753119e-06, + "loss": 0.98, + "step": 5810 + }, + { + "epoch": 1.1, + "learning_rate": 4.254231398488897e-06, + "loss": 0.9981, + "step": 5815 + }, + { + "epoch": 1.1, + "learning_rate": 4.246918102368128e-06, + "loss": 1.0593, + "step": 5820 + }, + { + "epoch": 1.1, + "learning_rate": 4.2396064543962445e-06, + "loss": 0.9974, + "step": 5825 + }, + { + "epoch": 1.1, + "learning_rate": 4.232296470575071e-06, + "loss": 1.0358, + "step": 5830 + }, + { + "epoch": 1.1, + "learning_rate": 4.2249881669027904e-06, + "loss": 1.0716, + "step": 5835 + }, + { + "epoch": 1.1, + "learning_rate": 4.217681559373909e-06, + "loss": 1.0109, + "step": 5840 + }, + { + "epoch": 1.1, + "learning_rate": 4.2103766639792185e-06, + "loss": 1.0453, + "step": 5845 + }, + { + "epoch": 1.1, + "learning_rate": 4.203073496705765e-06, + "loss": 0.9672, + "step": 5850 + }, + { + "epoch": 1.1, + "learning_rate": 4.195772073536814e-06, + "loss": 1.0695, + "step": 5855 + }, + { + "epoch": 1.1, + "learning_rate": 4.188472410451814e-06, + "loss": 0.9622, + "step": 5860 + }, + { + "epoch": 1.1, + "learning_rate": 4.181174523426359e-06, + "loss": 0.9904, + "step": 5865 + }, + { + "epoch": 1.11, + "learning_rate": 4.1738784284321554e-06, + "loss": 1.0098, + "step": 5870 + }, + { + "epoch": 1.11, + "learning_rate": 4.166584141436992e-06, + "loss": 1.0066, + "step": 5875 + }, + { + "epoch": 1.11, + "learning_rate": 4.159291678404697e-06, + "loss": 1.0213, + "step": 5880 + }, + { + "epoch": 1.11, + "learning_rate": 4.1520010552951075e-06, + "loss": 1.0107, + "step": 5885 + }, + { + "epoch": 1.11, + "learning_rate": 4.144712288064037e-06, + "loss": 1.0401, + "step": 5890 + }, + { + "epoch": 1.11, + "learning_rate": 4.137425392663231e-06, + "loss": 1.0016, + "step": 5895 + }, + { + "epoch": 1.11, + "learning_rate": 4.130140385040346e-06, + "loss": 1.0589, + "step": 5900 + }, + { + "epoch": 1.11, + "learning_rate": 4.122857281138902e-06, + "loss": 1.061, + "step": 5905 + }, + { + "epoch": 1.11, + "learning_rate": 4.115576096898253e-06, + "loss": 0.9996, + "step": 5910 + }, + { + "epoch": 1.11, + "learning_rate": 4.108296848253552e-06, + "loss": 1.0077, + "step": 5915 + }, + { + "epoch": 1.12, + "learning_rate": 4.101019551135718e-06, + "loss": 1.0388, + "step": 5920 + }, + { + "epoch": 1.12, + "learning_rate": 4.093744221471396e-06, + "loss": 1.0023, + "step": 5925 + }, + { + "epoch": 1.12, + "learning_rate": 4.086470875182929e-06, + "loss": 1.0066, + "step": 5930 + }, + { + "epoch": 1.12, + "learning_rate": 4.079199528188315e-06, + "loss": 0.9858, + "step": 5935 + }, + { + "epoch": 1.12, + "learning_rate": 4.0719301964011774e-06, + "loss": 0.9718, + "step": 5940 + }, + { + "epoch": 1.12, + "learning_rate": 4.064662895730732e-06, + "loss": 1.0067, + "step": 5945 + }, + { + "epoch": 1.12, + "learning_rate": 4.057397642081747e-06, + "loss": 0.9826, + "step": 5950 + }, + { + "epoch": 1.12, + "learning_rate": 4.050134451354511e-06, + "loss": 1.0515, + "step": 5955 + }, + { + "epoch": 1.12, + "learning_rate": 4.0428733394448005e-06, + "loss": 1.0115, + "step": 5960 + }, + { + "epoch": 1.12, + "learning_rate": 4.035614322243836e-06, + "loss": 0.9815, + "step": 5965 + }, + { + "epoch": 1.12, + "learning_rate": 4.02835741563826e-06, + "loss": 0.9842, + "step": 5970 + }, + { + "epoch": 1.13, + "learning_rate": 4.021102635510093e-06, + "loss": 1.0193, + "step": 5975 + }, + { + "epoch": 1.13, + "learning_rate": 4.013849997736703e-06, + "loss": 1.0357, + "step": 5980 + }, + { + "epoch": 1.13, + "learning_rate": 4.006599518190768e-06, + "loss": 1.0696, + "step": 5985 + }, + { + "epoch": 1.13, + "learning_rate": 3.999351212740242e-06, + "loss": 0.9836, + "step": 5990 + }, + { + "epoch": 1.13, + "learning_rate": 3.992105097248324e-06, + "loss": 1.0348, + "step": 5995 + }, + { + "epoch": 1.13, + "learning_rate": 3.984861187573416e-06, + "loss": 0.9809, + "step": 6000 + }, + { + "epoch": 1.13, + "learning_rate": 3.977619499569096e-06, + "loss": 1.0009, + "step": 6005 + }, + { + "epoch": 1.13, + "learning_rate": 3.970380049084081e-06, + "loss": 1.0564, + "step": 6010 + }, + { + "epoch": 1.13, + "learning_rate": 3.9631428519621845e-06, + "loss": 0.9941, + "step": 6015 + }, + { + "epoch": 1.13, + "learning_rate": 3.9559079240422916e-06, + "loss": 1.0416, + "step": 6020 + }, + { + "epoch": 1.13, + "learning_rate": 3.948675281158329e-06, + "loss": 1.0269, + "step": 6025 + }, + { + "epoch": 1.14, + "learning_rate": 3.94144493913921e-06, + "loss": 1.0337, + "step": 6030 + }, + { + "epoch": 1.14, + "learning_rate": 3.9342169138088184e-06, + "loss": 1.0, + "step": 6035 + }, + { + "epoch": 1.14, + "learning_rate": 3.92699122098597e-06, + "loss": 1.0053, + "step": 6040 + }, + { + "epoch": 1.14, + "learning_rate": 3.91976787648437e-06, + "loss": 1.0512, + "step": 6045 + }, + { + "epoch": 1.14, + "learning_rate": 3.91254689611259e-06, + "loss": 0.9962, + "step": 6050 + }, + { + "epoch": 1.14, + "learning_rate": 3.905328295674026e-06, + "loss": 1.0107, + "step": 6055 + }, + { + "epoch": 1.14, + "learning_rate": 3.898112090966861e-06, + "loss": 1.0099, + "step": 6060 + }, + { + "epoch": 1.14, + "learning_rate": 3.89089829778404e-06, + "loss": 0.9886, + "step": 6065 + }, + { + "epoch": 1.14, + "learning_rate": 3.883686931913229e-06, + "loss": 0.9906, + "step": 6070 + }, + { + "epoch": 1.14, + "learning_rate": 3.876478009136781e-06, + "loss": 0.9999, + "step": 6075 + }, + { + "epoch": 1.15, + "learning_rate": 3.869271545231704e-06, + "loss": 0.9761, + "step": 6080 + }, + { + "epoch": 1.15, + "learning_rate": 3.862067555969621e-06, + "loss": 0.9534, + "step": 6085 + }, + { + "epoch": 1.15, + "learning_rate": 3.854866057116742e-06, + "loss": 1.0182, + "step": 6090 + }, + { + "epoch": 1.15, + "learning_rate": 3.847667064433825e-06, + "loss": 1.0035, + "step": 6095 + }, + { + "epoch": 1.15, + "learning_rate": 3.840470593676148e-06, + "loss": 1.0189, + "step": 6100 + }, + { + "epoch": 1.15, + "learning_rate": 3.8332766605934624e-06, + "loss": 0.9904, + "step": 6105 + }, + { + "epoch": 1.15, + "learning_rate": 3.82608528092997e-06, + "loss": 1.0312, + "step": 6110 + }, + { + "epoch": 1.15, + "learning_rate": 3.818896470424283e-06, + "loss": 1.0193, + "step": 6115 + }, + { + "epoch": 1.15, + "learning_rate": 3.8117102448093922e-06, + "loss": 1.0401, + "step": 6120 + }, + { + "epoch": 1.15, + "learning_rate": 3.8045266198126314e-06, + "loss": 1.0227, + "step": 6125 + }, + { + "epoch": 1.15, + "learning_rate": 3.797345611155642e-06, + "loss": 1.0284, + "step": 6130 + }, + { + "epoch": 1.16, + "learning_rate": 3.7901672345543366e-06, + "loss": 0.9987, + "step": 6135 + }, + { + "epoch": 1.16, + "learning_rate": 3.7829915057188727e-06, + "loss": 0.9681, + "step": 6140 + }, + { + "epoch": 1.16, + "learning_rate": 3.775818440353609e-06, + "loss": 1.0071, + "step": 6145 + }, + { + "epoch": 1.16, + "learning_rate": 3.7686480541570796e-06, + "loss": 1.0211, + "step": 6150 + }, + { + "epoch": 1.16, + "learning_rate": 3.7614803628219498e-06, + "loss": 0.9708, + "step": 6155 + }, + { + "epoch": 1.16, + "learning_rate": 3.754315382034991e-06, + "loss": 0.9845, + "step": 6160 + }, + { + "epoch": 1.16, + "learning_rate": 3.7471531274770394e-06, + "loss": 1.0052, + "step": 6165 + }, + { + "epoch": 1.16, + "learning_rate": 3.7399936148229676e-06, + "loss": 1.0226, + "step": 6170 + }, + { + "epoch": 1.16, + "learning_rate": 3.7328368597416477e-06, + "loss": 1.0353, + "step": 6175 + }, + { + "epoch": 1.16, + "learning_rate": 3.7256828778959116e-06, + "loss": 0.9976, + "step": 6180 + }, + { + "epoch": 1.16, + "learning_rate": 3.7185316849425272e-06, + "loss": 0.9702, + "step": 6185 + }, + { + "epoch": 1.17, + "learning_rate": 3.711383296532158e-06, + "loss": 1.0527, + "step": 6190 + }, + { + "epoch": 1.17, + "learning_rate": 3.7042377283093272e-06, + "loss": 1.0742, + "step": 6195 + }, + { + "epoch": 1.17, + "learning_rate": 3.697094995912389e-06, + "loss": 1.0034, + "step": 6200 + }, + { + "epoch": 1.17, + "learning_rate": 3.6899551149734893e-06, + "loss": 1.0184, + "step": 6205 + }, + { + "epoch": 1.17, + "learning_rate": 3.6828181011185326e-06, + "loss": 1.0611, + "step": 6210 + }, + { + "epoch": 1.17, + "learning_rate": 3.6756839699671515e-06, + "loss": 1.0041, + "step": 6215 + }, + { + "epoch": 1.17, + "learning_rate": 3.668552737132668e-06, + "loss": 1.0123, + "step": 6220 + }, + { + "epoch": 1.17, + "learning_rate": 3.6614244182220608e-06, + "loss": 0.9527, + "step": 6225 + }, + { + "epoch": 1.17, + "learning_rate": 3.654299028835932e-06, + "loss": 1.0069, + "step": 6230 + }, + { + "epoch": 1.17, + "learning_rate": 3.6471765845684715e-06, + "loss": 1.0283, + "step": 6235 + }, + { + "epoch": 1.18, + "learning_rate": 3.640057101007425e-06, + "loss": 1.009, + "step": 6240 + }, + { + "epoch": 1.18, + "learning_rate": 3.632940593734058e-06, + "loss": 1.0457, + "step": 6245 + }, + { + "epoch": 1.18, + "learning_rate": 3.625827078323122e-06, + "loss": 0.9835, + "step": 6250 + }, + { + "epoch": 1.18, + "learning_rate": 3.618716570342821e-06, + "loss": 1.0182, + "step": 6255 + }, + { + "epoch": 1.18, + "learning_rate": 3.611609085354776e-06, + "loss": 0.9658, + "step": 6260 + }, + { + "epoch": 1.18, + "learning_rate": 3.6045046389139922e-06, + "loss": 1.0461, + "step": 6265 + }, + { + "epoch": 1.18, + "learning_rate": 3.5974032465688286e-06, + "loss": 1.0559, + "step": 6270 + }, + { + "epoch": 1.18, + "learning_rate": 3.5903049238609555e-06, + "loss": 1.0029, + "step": 6275 + }, + { + "epoch": 1.18, + "learning_rate": 3.583209686325327e-06, + "loss": 0.9966, + "step": 6280 + }, + { + "epoch": 1.18, + "learning_rate": 3.5761175494901466e-06, + "loss": 1.0301, + "step": 6285 + }, + { + "epoch": 1.18, + "learning_rate": 3.5690285288768284e-06, + "loss": 0.9866, + "step": 6290 + }, + { + "epoch": 1.19, + "learning_rate": 3.56194263999997e-06, + "loss": 1.0016, + "step": 6295 + }, + { + "epoch": 1.19, + "learning_rate": 3.554859898367312e-06, + "loss": 1.035, + "step": 6300 + }, + { + "epoch": 1.19, + "learning_rate": 3.547780319479709e-06, + "loss": 0.9535, + "step": 6305 + }, + { + "epoch": 1.19, + "learning_rate": 3.5407039188310933e-06, + "loss": 1.0038, + "step": 6310 + }, + { + "epoch": 1.19, + "learning_rate": 3.5336307119084425e-06, + "loss": 0.9908, + "step": 6315 + }, + { + "epoch": 1.19, + "learning_rate": 3.526560714191745e-06, + "loss": 0.9635, + "step": 6320 + }, + { + "epoch": 1.19, + "learning_rate": 3.5194939411539604e-06, + "loss": 1.0042, + "step": 6325 + }, + { + "epoch": 1.19, + "learning_rate": 3.5124304082609978e-06, + "loss": 1.0142, + "step": 6330 + }, + { + "epoch": 1.19, + "learning_rate": 3.5053701309716724e-06, + "loss": 1.0364, + "step": 6335 + }, + { + "epoch": 1.19, + "learning_rate": 3.498313124737674e-06, + "loss": 0.9686, + "step": 6340 + }, + { + "epoch": 1.2, + "learning_rate": 3.4912594050035346e-06, + "loss": 1.0308, + "step": 6345 + }, + { + "epoch": 1.2, + "learning_rate": 3.4842089872065915e-06, + "loss": 0.9767, + "step": 6350 + }, + { + "epoch": 1.2, + "learning_rate": 3.4771618867769574e-06, + "loss": 1.0148, + "step": 6355 + }, + { + "epoch": 1.2, + "learning_rate": 3.4701181191374855e-06, + "loss": 1.0103, + "step": 6360 + }, + { + "epoch": 1.2, + "learning_rate": 3.4630776997037324e-06, + "loss": 1.0346, + "step": 6365 + }, + { + "epoch": 1.2, + "learning_rate": 3.45604064388393e-06, + "loss": 1.0342, + "step": 6370 + }, + { + "epoch": 1.2, + "learning_rate": 3.4490069670789468e-06, + "loss": 0.9953, + "step": 6375 + }, + { + "epoch": 1.2, + "learning_rate": 3.4419766846822553e-06, + "loss": 0.9898, + "step": 6380 + }, + { + "epoch": 1.2, + "learning_rate": 3.434949812079903e-06, + "loss": 1.0205, + "step": 6385 + }, + { + "epoch": 1.2, + "learning_rate": 3.4279263646504702e-06, + "loss": 0.9656, + "step": 6390 + }, + { + "epoch": 1.2, + "learning_rate": 3.420906357765046e-06, + "loss": 1.0617, + "step": 6395 + }, + { + "epoch": 1.21, + "learning_rate": 3.4138898067871864e-06, + "loss": 1.0423, + "step": 6400 + }, + { + "epoch": 1.21, + "learning_rate": 3.406876727072884e-06, + "loss": 0.9904, + "step": 6405 + }, + { + "epoch": 1.21, + "learning_rate": 3.3998671339705362e-06, + "loss": 0.9708, + "step": 6410 + }, + { + "epoch": 1.21, + "learning_rate": 3.3928610428209097e-06, + "loss": 1.0459, + "step": 6415 + }, + { + "epoch": 1.21, + "learning_rate": 3.3858584689571038e-06, + "loss": 1.0256, + "step": 6420 + }, + { + "epoch": 1.21, + "learning_rate": 3.3788594277045233e-06, + "loss": 0.9976, + "step": 6425 + }, + { + "epoch": 1.21, + "learning_rate": 3.3718639343808414e-06, + "loss": 1.0649, + "step": 6430 + }, + { + "epoch": 1.21, + "learning_rate": 3.364872004295966e-06, + "loss": 1.0167, + "step": 6435 + }, + { + "epoch": 1.21, + "learning_rate": 3.3578836527520078e-06, + "loss": 1.0348, + "step": 6440 + }, + { + "epoch": 1.21, + "learning_rate": 3.3508988950432418e-06, + "loss": 0.9988, + "step": 6445 + }, + { + "epoch": 1.21, + "learning_rate": 3.3439177464560833e-06, + "loss": 1.0256, + "step": 6450 + }, + { + "epoch": 1.22, + "learning_rate": 3.336940222269045e-06, + "loss": 0.9936, + "step": 6455 + }, + { + "epoch": 1.22, + "learning_rate": 3.3299663377527087e-06, + "loss": 0.9776, + "step": 6460 + }, + { + "epoch": 1.22, + "learning_rate": 3.3229961081696917e-06, + "loss": 0.9983, + "step": 6465 + }, + { + "epoch": 1.22, + "learning_rate": 3.3160295487746096e-06, + "loss": 0.9799, + "step": 6470 + }, + { + "epoch": 1.22, + "learning_rate": 3.309066674814049e-06, + "loss": 1.0058, + "step": 6475 + }, + { + "epoch": 1.22, + "learning_rate": 3.3021075015265277e-06, + "loss": 1.0097, + "step": 6480 + }, + { + "epoch": 1.22, + "learning_rate": 3.295152044142466e-06, + "loss": 1.035, + "step": 6485 + }, + { + "epoch": 1.22, + "learning_rate": 3.2882003178841536e-06, + "loss": 1.0444, + "step": 6490 + }, + { + "epoch": 1.22, + "learning_rate": 3.2812523379657103e-06, + "loss": 1.0425, + "step": 6495 + }, + { + "epoch": 1.22, + "learning_rate": 3.274308119593059e-06, + "loss": 1.023, + "step": 6500 + }, + { + "epoch": 1.23, + "learning_rate": 3.267367677963892e-06, + "loss": 1.0267, + "step": 6505 + }, + { + "epoch": 1.23, + "learning_rate": 3.2604310282676332e-06, + "loss": 1.0291, + "step": 6510 + }, + { + "epoch": 1.23, + "learning_rate": 3.253498185685409e-06, + "loss": 0.9999, + "step": 6515 + }, + { + "epoch": 1.23, + "learning_rate": 3.2465691653900144e-06, + "loss": 1.0131, + "step": 6520 + }, + { + "epoch": 1.23, + "learning_rate": 3.2396439825458792e-06, + "loss": 1.0255, + "step": 6525 + }, + { + "epoch": 1.23, + "learning_rate": 3.2327226523090328e-06, + "loss": 0.9865, + "step": 6530 + }, + { + "epoch": 1.23, + "learning_rate": 3.2258051898270766e-06, + "loss": 0.9833, + "step": 6535 + }, + { + "epoch": 1.23, + "learning_rate": 3.218891610239142e-06, + "loss": 1.0122, + "step": 6540 + }, + { + "epoch": 1.23, + "learning_rate": 3.211981928675867e-06, + "loss": 1.014, + "step": 6545 + }, + { + "epoch": 1.23, + "learning_rate": 3.2050761602593568e-06, + "loss": 0.9609, + "step": 6550 + }, + { + "epoch": 1.23, + "learning_rate": 3.1981743201031535e-06, + "loss": 1.0376, + "step": 6555 + }, + { + "epoch": 1.24, + "learning_rate": 3.191276423312202e-06, + "loss": 1.0241, + "step": 6560 + }, + { + "epoch": 1.24, + "learning_rate": 3.1843824849828144e-06, + "loss": 1.0482, + "step": 6565 + }, + { + "epoch": 1.24, + "learning_rate": 3.1774925202026425e-06, + "loss": 0.983, + "step": 6570 + }, + { + "epoch": 1.24, + "learning_rate": 3.1706065440506418e-06, + "loss": 0.9573, + "step": 6575 + }, + { + "epoch": 1.24, + "learning_rate": 3.1637245715970364e-06, + "loss": 1.0136, + "step": 6580 + }, + { + "epoch": 1.24, + "learning_rate": 3.156846617903292e-06, + "loss": 1.0236, + "step": 6585 + }, + { + "epoch": 1.24, + "learning_rate": 3.1499726980220734e-06, + "loss": 1.0485, + "step": 6590 + }, + { + "epoch": 1.24, + "learning_rate": 3.1431028269972215e-06, + "loss": 1.0364, + "step": 6595 + }, + { + "epoch": 1.24, + "learning_rate": 3.1362370198637147e-06, + "loss": 0.9959, + "step": 6600 + }, + { + "epoch": 1.24, + "learning_rate": 3.1293752916476387e-06, + "loss": 1.022, + "step": 6605 + }, + { + "epoch": 1.24, + "learning_rate": 3.1225176573661513e-06, + "loss": 1.0229, + "step": 6610 + }, + { + "epoch": 1.25, + "learning_rate": 3.1156641320274494e-06, + "loss": 1.0539, + "step": 6615 + }, + { + "epoch": 1.25, + "learning_rate": 3.1088147306307385e-06, + "loss": 1.029, + "step": 6620 + }, + { + "epoch": 1.25, + "learning_rate": 3.1019694681662e-06, + "loss": 1.0065, + "step": 6625 + }, + { + "epoch": 1.25, + "learning_rate": 3.0951283596149547e-06, + "loss": 1.0124, + "step": 6630 + }, + { + "epoch": 1.25, + "learning_rate": 3.088291419949033e-06, + "loss": 1.0286, + "step": 6635 + }, + { + "epoch": 1.25, + "learning_rate": 3.081458664131344e-06, + "loss": 0.978, + "step": 6640 + }, + { + "epoch": 1.25, + "learning_rate": 3.0746301071156363e-06, + "loss": 1.0259, + "step": 6645 + }, + { + "epoch": 1.25, + "learning_rate": 3.067805763846473e-06, + "loss": 1.0355, + "step": 6650 + }, + { + "epoch": 1.25, + "learning_rate": 3.0609856492591937e-06, + "loss": 0.9806, + "step": 6655 + }, + { + "epoch": 1.25, + "learning_rate": 3.0541697782798806e-06, + "loss": 0.9485, + "step": 6660 + }, + { + "epoch": 1.26, + "learning_rate": 3.0473581658253337e-06, + "loss": 1.053, + "step": 6665 + }, + { + "epoch": 1.26, + "learning_rate": 3.0405508268030295e-06, + "loss": 1.0301, + "step": 6670 + }, + { + "epoch": 1.26, + "learning_rate": 3.033747776111092e-06, + "loss": 0.9644, + "step": 6675 + }, + { + "epoch": 1.26, + "learning_rate": 3.026949028638264e-06, + "loss": 0.9973, + "step": 6680 + }, + { + "epoch": 1.26, + "learning_rate": 3.020154599263865e-06, + "loss": 0.931, + "step": 6685 + }, + { + "epoch": 1.26, + "learning_rate": 3.013364502857766e-06, + "loss": 1.0684, + "step": 6690 + }, + { + "epoch": 1.26, + "learning_rate": 3.006578754280358e-06, + "loss": 1.04, + "step": 6695 + }, + { + "epoch": 1.26, + "learning_rate": 2.999797368382513e-06, + "loss": 1.0232, + "step": 6700 + }, + { + "epoch": 1.26, + "learning_rate": 2.9930203600055596e-06, + "loss": 0.9895, + "step": 6705 + }, + { + "epoch": 1.26, + "learning_rate": 2.986247743981238e-06, + "loss": 0.9993, + "step": 6710 + }, + { + "epoch": 1.26, + "learning_rate": 2.979479535131684e-06, + "loss": 0.969, + "step": 6715 + }, + { + "epoch": 1.27, + "learning_rate": 2.9727157482693835e-06, + "loss": 1.0122, + "step": 6720 + }, + { + "epoch": 1.27, + "learning_rate": 2.965956398197146e-06, + "loss": 1.0306, + "step": 6725 + }, + { + "epoch": 1.27, + "learning_rate": 2.959201499708072e-06, + "loss": 1.0005, + "step": 6730 + }, + { + "epoch": 1.27, + "learning_rate": 2.9524510675855155e-06, + "loss": 1.0664, + "step": 6735 + }, + { + "epoch": 1.27, + "learning_rate": 2.9457051166030616e-06, + "loss": 1.0063, + "step": 6740 + }, + { + "epoch": 1.27, + "learning_rate": 2.938963661524483e-06, + "loss": 1.022, + "step": 6745 + }, + { + "epoch": 1.27, + "learning_rate": 2.932226717103716e-06, + "loss": 0.9959, + "step": 6750 + }, + { + "epoch": 1.27, + "learning_rate": 2.9254942980848245e-06, + "loss": 1.0167, + "step": 6755 + }, + { + "epoch": 1.27, + "learning_rate": 2.9187664192019676e-06, + "loss": 1.0054, + "step": 6760 + }, + { + "epoch": 1.27, + "learning_rate": 2.9120430951793677e-06, + "loss": 0.9725, + "step": 6765 + }, + { + "epoch": 1.28, + "learning_rate": 2.9053243407312835e-06, + "loss": 1.0028, + "step": 6770 + }, + { + "epoch": 1.28, + "learning_rate": 2.898610170561964e-06, + "loss": 0.9773, + "step": 6775 + }, + { + "epoch": 1.28, + "learning_rate": 2.8919005993656357e-06, + "loss": 1.0138, + "step": 6780 + }, + { + "epoch": 1.28, + "learning_rate": 2.8851956418264523e-06, + "loss": 1.0131, + "step": 6785 + }, + { + "epoch": 1.28, + "learning_rate": 2.8784953126184704e-06, + "loss": 0.9964, + "step": 6790 + }, + { + "epoch": 1.28, + "learning_rate": 2.871799626405625e-06, + "loss": 1.0025, + "step": 6795 + }, + { + "epoch": 1.28, + "learning_rate": 2.8651085978416798e-06, + "loss": 1.0074, + "step": 6800 + }, + { + "epoch": 1.28, + "learning_rate": 2.858422241570214e-06, + "loss": 0.968, + "step": 6805 + }, + { + "epoch": 1.28, + "learning_rate": 2.851740572224575e-06, + "loss": 1.0276, + "step": 6810 + }, + { + "epoch": 1.28, + "learning_rate": 2.8450636044278535e-06, + "loss": 1.0326, + "step": 6815 + }, + { + "epoch": 1.28, + "learning_rate": 2.838391352792855e-06, + "loss": 0.9835, + "step": 6820 + }, + { + "epoch": 1.29, + "learning_rate": 2.8317238319220564e-06, + "loss": 0.9922, + "step": 6825 + }, + { + "epoch": 1.29, + "learning_rate": 2.825061056407591e-06, + "loss": 0.9719, + "step": 6830 + }, + { + "epoch": 1.29, + "learning_rate": 2.8184030408311947e-06, + "loss": 0.9973, + "step": 6835 + }, + { + "epoch": 1.29, + "learning_rate": 2.811749799764198e-06, + "loss": 1.0141, + "step": 6840 + }, + { + "epoch": 1.29, + "learning_rate": 2.8051013477674736e-06, + "loss": 1.0195, + "step": 6845 + }, + { + "epoch": 1.29, + "learning_rate": 2.7984576993914143e-06, + "loss": 1.0166, + "step": 6850 + }, + { + "epoch": 1.29, + "learning_rate": 2.791818869175906e-06, + "loss": 1.0177, + "step": 6855 + }, + { + "epoch": 1.29, + "learning_rate": 2.7851848716502815e-06, + "loss": 1.0475, + "step": 6860 + }, + { + "epoch": 1.29, + "learning_rate": 2.7785557213333047e-06, + "loss": 0.9932, + "step": 6865 + }, + { + "epoch": 1.29, + "learning_rate": 2.771931432733126e-06, + "loss": 1.0372, + "step": 6870 + }, + { + "epoch": 1.29, + "learning_rate": 2.765312020347256e-06, + "loss": 1.0422, + "step": 6875 + }, + { + "epoch": 1.3, + "learning_rate": 2.758697498662538e-06, + "loss": 0.9787, + "step": 6880 + }, + { + "epoch": 1.3, + "learning_rate": 2.7520878821551063e-06, + "loss": 0.9881, + "step": 6885 + }, + { + "epoch": 1.3, + "learning_rate": 2.745483185290363e-06, + "loss": 0.9499, + "step": 6890 + }, + { + "epoch": 1.3, + "learning_rate": 2.7388834225229445e-06, + "loss": 1.0188, + "step": 6895 + }, + { + "epoch": 1.3, + "learning_rate": 2.732288608296685e-06, + "loss": 1.0068, + "step": 6900 + }, + { + "epoch": 1.3, + "learning_rate": 2.7256987570445925e-06, + "loss": 1.03, + "step": 6905 + }, + { + "epoch": 1.3, + "learning_rate": 2.7191138831888113e-06, + "loss": 1.0066, + "step": 6910 + }, + { + "epoch": 1.3, + "learning_rate": 2.71253400114059e-06, + "loss": 0.9573, + "step": 6915 + }, + { + "epoch": 1.3, + "learning_rate": 2.7059591253002595e-06, + "loss": 1.0329, + "step": 6920 + }, + { + "epoch": 1.3, + "learning_rate": 2.699389270057186e-06, + "loss": 1.0484, + "step": 6925 + }, + { + "epoch": 1.31, + "learning_rate": 2.692824449789756e-06, + "loss": 0.9456, + "step": 6930 + }, + { + "epoch": 1.31, + "learning_rate": 2.68626467886533e-06, + "loss": 1.0412, + "step": 6935 + }, + { + "epoch": 1.31, + "learning_rate": 2.67970997164022e-06, + "loss": 1.0089, + "step": 6940 + }, + { + "epoch": 1.31, + "learning_rate": 2.67316034245966e-06, + "loss": 1.0235, + "step": 6945 + }, + { + "epoch": 1.31, + "learning_rate": 2.666615805657762e-06, + "loss": 1.0095, + "step": 6950 + }, + { + "epoch": 1.31, + "learning_rate": 2.6600763755575037e-06, + "loss": 1.0048, + "step": 6955 + }, + { + "epoch": 1.31, + "learning_rate": 2.653542066470678e-06, + "loss": 0.9806, + "step": 6960 + }, + { + "epoch": 1.31, + "learning_rate": 2.6470128926978717e-06, + "loss": 1.0156, + "step": 6965 + }, + { + "epoch": 1.31, + "learning_rate": 2.640488868528438e-06, + "loss": 1.0357, + "step": 6970 + }, + { + "epoch": 1.31, + "learning_rate": 2.633970008240453e-06, + "loss": 1.0101, + "step": 6975 + }, + { + "epoch": 1.31, + "learning_rate": 2.627456326100698e-06, + "loss": 1.0412, + "step": 6980 + }, + { + "epoch": 1.32, + "learning_rate": 2.620947836364617e-06, + "loss": 0.9919, + "step": 6985 + }, + { + "epoch": 1.32, + "learning_rate": 2.614444553276289e-06, + "loss": 0.9888, + "step": 6990 + }, + { + "epoch": 1.32, + "learning_rate": 2.607946491068406e-06, + "loss": 1.0312, + "step": 6995 + }, + { + "epoch": 1.32, + "learning_rate": 2.6014536639622245e-06, + "loss": 1.0067, + "step": 7000 + }, + { + "epoch": 1.32, + "learning_rate": 2.594966086167551e-06, + "loss": 1.0137, + "step": 7005 + }, + { + "epoch": 1.32, + "learning_rate": 2.5884837718826972e-06, + "loss": 0.999, + "step": 7010 + }, + { + "epoch": 1.32, + "learning_rate": 2.5820067352944612e-06, + "loss": 1.0558, + "step": 7015 + }, + { + "epoch": 1.32, + "learning_rate": 2.5755349905780892e-06, + "loss": 0.9686, + "step": 7020 + }, + { + "epoch": 1.32, + "learning_rate": 2.5690685518972447e-06, + "loss": 1.0499, + "step": 7025 + }, + { + "epoch": 1.32, + "learning_rate": 2.562607433403976e-06, + "loss": 1.0096, + "step": 7030 + }, + { + "epoch": 1.33, + "learning_rate": 2.5561516492386943e-06, + "loss": 1.0189, + "step": 7035 + }, + { + "epoch": 1.33, + "learning_rate": 2.5497012135301312e-06, + "loss": 1.0226, + "step": 7040 + }, + { + "epoch": 1.33, + "learning_rate": 2.543256140395317e-06, + "loss": 0.9886, + "step": 7045 + }, + { + "epoch": 1.33, + "learning_rate": 2.5368164439395434e-06, + "loss": 1.0389, + "step": 7050 + }, + { + "epoch": 1.33, + "learning_rate": 2.5303821382563322e-06, + "loss": 0.9977, + "step": 7055 + }, + { + "epoch": 1.33, + "learning_rate": 2.5239532374274145e-06, + "loss": 1.0542, + "step": 7060 + }, + { + "epoch": 1.33, + "learning_rate": 2.517529755522685e-06, + "loss": 0.9817, + "step": 7065 + }, + { + "epoch": 1.33, + "learning_rate": 2.5111117066001867e-06, + "loss": 1.0289, + "step": 7070 + }, + { + "epoch": 1.33, + "learning_rate": 2.5046991047060643e-06, + "loss": 0.961, + "step": 7075 + }, + { + "epoch": 1.33, + "learning_rate": 2.498291963874548e-06, + "loss": 1.0004, + "step": 7080 + }, + { + "epoch": 1.33, + "learning_rate": 2.4918902981279125e-06, + "loss": 1.0639, + "step": 7085 + }, + { + "epoch": 1.34, + "learning_rate": 2.4854941214764496e-06, + "loss": 1.0327, + "step": 7090 + }, + { + "epoch": 1.34, + "learning_rate": 2.479103447918442e-06, + "loss": 1.0185, + "step": 7095 + }, + { + "epoch": 1.34, + "learning_rate": 2.4727182914401243e-06, + "loss": 1.0146, + "step": 7100 + }, + { + "epoch": 1.34, + "learning_rate": 2.4663386660156607e-06, + "loss": 1.0032, + "step": 7105 + }, + { + "epoch": 1.34, + "learning_rate": 2.459964585607107e-06, + "loss": 1.0132, + "step": 7110 + }, + { + "epoch": 1.34, + "learning_rate": 2.453596064164383e-06, + "loss": 0.9677, + "step": 7115 + }, + { + "epoch": 1.34, + "learning_rate": 2.447233115625249e-06, + "loss": 1.0285, + "step": 7120 + }, + { + "epoch": 1.34, + "learning_rate": 2.4408757539152587e-06, + "loss": 1.0293, + "step": 7125 + }, + { + "epoch": 1.34, + "learning_rate": 2.4345239929477498e-06, + "loss": 1.0272, + "step": 7130 + }, + { + "epoch": 1.34, + "learning_rate": 2.4281778466237925e-06, + "loss": 0.9806, + "step": 7135 + }, + { + "epoch": 1.34, + "learning_rate": 2.4218373288321757e-06, + "loss": 0.9967, + "step": 7140 + }, + { + "epoch": 1.35, + "learning_rate": 2.41550245344937e-06, + "loss": 0.993, + "step": 7145 + }, + { + "epoch": 1.35, + "learning_rate": 2.409173234339493e-06, + "loss": 0.9943, + "step": 7150 + }, + { + "epoch": 1.35, + "learning_rate": 2.402849685354285e-06, + "loss": 0.9987, + "step": 7155 + }, + { + "epoch": 1.35, + "learning_rate": 2.39653182033308e-06, + "loss": 0.9289, + "step": 7160 + }, + { + "epoch": 1.35, + "learning_rate": 2.3902196531027676e-06, + "loss": 1.0155, + "step": 7165 + }, + { + "epoch": 1.35, + "learning_rate": 2.3839131974777737e-06, + "loss": 0.9853, + "step": 7170 + }, + { + "epoch": 1.35, + "learning_rate": 2.3776124672600183e-06, + "loss": 0.9608, + "step": 7175 + }, + { + "epoch": 1.35, + "learning_rate": 2.371317476238891e-06, + "loss": 1.0193, + "step": 7180 + }, + { + "epoch": 1.35, + "learning_rate": 2.3650282381912287e-06, + "loss": 1.004, + "step": 7185 + }, + { + "epoch": 1.35, + "learning_rate": 2.3587447668812674e-06, + "loss": 1.0586, + "step": 7190 + }, + { + "epoch": 1.36, + "learning_rate": 2.352467076060631e-06, + "loss": 0.9975, + "step": 7195 + }, + { + "epoch": 1.36, + "learning_rate": 2.3461951794682873e-06, + "loss": 1.0465, + "step": 7200 + }, + { + "epoch": 1.36, + "learning_rate": 2.3399290908305235e-06, + "loss": 0.977, + "step": 7205 + }, + { + "epoch": 1.36, + "learning_rate": 2.3336688238609195e-06, + "loss": 1.0139, + "step": 7210 + }, + { + "epoch": 1.36, + "learning_rate": 2.3274143922603094e-06, + "loss": 1.0116, + "step": 7215 + }, + { + "epoch": 1.36, + "learning_rate": 2.321165809716762e-06, + "loss": 0.9869, + "step": 7220 + }, + { + "epoch": 1.36, + "learning_rate": 2.3149230899055408e-06, + "loss": 1.015, + "step": 7225 + }, + { + "epoch": 1.36, + "learning_rate": 2.3086862464890776e-06, + "loss": 1.0494, + "step": 7230 + }, + { + "epoch": 1.36, + "learning_rate": 2.3024552931169485e-06, + "loss": 1.0413, + "step": 7235 + }, + { + "epoch": 1.36, + "learning_rate": 2.296230243425834e-06, + "loss": 0.9762, + "step": 7240 + }, + { + "epoch": 1.36, + "learning_rate": 2.290011111039499e-06, + "loss": 1.0236, + "step": 7245 + }, + { + "epoch": 1.37, + "learning_rate": 2.2837979095687534e-06, + "loss": 0.9891, + "step": 7250 + }, + { + "epoch": 1.37, + "learning_rate": 2.277590652611428e-06, + "loss": 1.0041, + "step": 7255 + }, + { + "epoch": 1.37, + "learning_rate": 2.2713893537523476e-06, + "loss": 1.0105, + "step": 7260 + }, + { + "epoch": 1.37, + "learning_rate": 2.2651940265632943e-06, + "loss": 1.0194, + "step": 7265 + }, + { + "epoch": 1.37, + "learning_rate": 2.2590046846029794e-06, + "loss": 1.062, + "step": 7270 + }, + { + "epoch": 1.37, + "learning_rate": 2.2528213414170215e-06, + "loss": 0.9766, + "step": 7275 + }, + { + "epoch": 1.37, + "learning_rate": 2.246644010537901e-06, + "loss": 1.0246, + "step": 7280 + }, + { + "epoch": 1.37, + "learning_rate": 2.2404727054849513e-06, + "loss": 0.9829, + "step": 7285 + }, + { + "epoch": 1.37, + "learning_rate": 2.2343074397643095e-06, + "loss": 1.0424, + "step": 7290 + }, + { + "epoch": 1.37, + "learning_rate": 2.2281482268688963e-06, + "loss": 1.0679, + "step": 7295 + }, + { + "epoch": 1.37, + "learning_rate": 2.2219950802783922e-06, + "loss": 1.0045, + "step": 7300 + }, + { + "epoch": 1.38, + "learning_rate": 2.2158480134591927e-06, + "loss": 1.0126, + "step": 7305 + }, + { + "epoch": 1.38, + "learning_rate": 2.2097070398643937e-06, + "loss": 0.9906, + "step": 7310 + }, + { + "epoch": 1.38, + "learning_rate": 2.2035721729337524e-06, + "loss": 0.9894, + "step": 7315 + }, + { + "epoch": 1.38, + "learning_rate": 2.1974434260936637e-06, + "loss": 1.0052, + "step": 7320 + }, + { + "epoch": 1.38, + "learning_rate": 2.191320812757128e-06, + "loss": 0.9687, + "step": 7325 + }, + { + "epoch": 1.38, + "learning_rate": 2.1852043463237184e-06, + "loss": 0.9843, + "step": 7330 + }, + { + "epoch": 1.38, + "learning_rate": 2.1790940401795617e-06, + "loss": 1.0324, + "step": 7335 + }, + { + "epoch": 1.38, + "learning_rate": 2.1729899076972967e-06, + "loss": 1.0006, + "step": 7340 + }, + { + "epoch": 1.38, + "learning_rate": 2.166891962236057e-06, + "loss": 0.9852, + "step": 7345 + }, + { + "epoch": 1.38, + "learning_rate": 2.16080021714143e-06, + "loss": 1.0015, + "step": 7350 + }, + { + "epoch": 1.39, + "learning_rate": 2.1547146857454344e-06, + "loss": 1.0451, + "step": 7355 + }, + { + "epoch": 1.39, + "learning_rate": 2.1486353813664946e-06, + "loss": 0.9813, + "step": 7360 + }, + { + "epoch": 1.39, + "learning_rate": 2.1425623173094e-06, + "loss": 0.9696, + "step": 7365 + }, + { + "epoch": 1.39, + "learning_rate": 2.1364955068652905e-06, + "loss": 1.0468, + "step": 7370 + }, + { + "epoch": 1.39, + "learning_rate": 2.1304349633116135e-06, + "loss": 0.9883, + "step": 7375 + }, + { + "epoch": 1.39, + "learning_rate": 2.1243806999121007e-06, + "loss": 1.0037, + "step": 7380 + }, + { + "epoch": 1.39, + "learning_rate": 2.1183327299167443e-06, + "loss": 1.0428, + "step": 7385 + }, + { + "epoch": 1.39, + "learning_rate": 2.112291066561763e-06, + "loss": 1.0181, + "step": 7390 + }, + { + "epoch": 1.39, + "learning_rate": 2.1062557230695665e-06, + "loss": 0.9839, + "step": 7395 + }, + { + "epoch": 1.39, + "learning_rate": 2.1002267126487414e-06, + "loss": 0.9951, + "step": 7400 + }, + { + "epoch": 1.39, + "learning_rate": 2.0942040484940075e-06, + "loss": 1.0095, + "step": 7405 + }, + { + "epoch": 1.4, + "learning_rate": 2.0881877437862006e-06, + "loss": 1.0413, + "step": 7410 + }, + { + "epoch": 1.4, + "learning_rate": 2.0821778116922362e-06, + "loss": 1.0079, + "step": 7415 + }, + { + "epoch": 1.4, + "learning_rate": 2.076174265365081e-06, + "loss": 1.0003, + "step": 7420 + }, + { + "epoch": 1.4, + "learning_rate": 2.0701771179437323e-06, + "loss": 1.0734, + "step": 7425 + }, + { + "epoch": 1.4, + "learning_rate": 2.0641863825531756e-06, + "loss": 0.9623, + "step": 7430 + }, + { + "epoch": 1.4, + "learning_rate": 2.0582020723043706e-06, + "loss": 1.0149, + "step": 7435 + }, + { + "epoch": 1.4, + "learning_rate": 2.05222420029421e-06, + "loss": 0.9964, + "step": 7440 + }, + { + "epoch": 1.4, + "learning_rate": 2.046252779605497e-06, + "loss": 0.9638, + "step": 7445 + }, + { + "epoch": 1.4, + "learning_rate": 2.04028782330692e-06, + "loss": 0.9799, + "step": 7450 + }, + { + "epoch": 1.4, + "learning_rate": 2.034329344453014e-06, + "loss": 1.0072, + "step": 7455 + }, + { + "epoch": 1.41, + "learning_rate": 2.0283773560841425e-06, + "loss": 1.0516, + "step": 7460 + }, + { + "epoch": 1.41, + "learning_rate": 2.0224318712264623e-06, + "loss": 1.039, + "step": 7465 + }, + { + "epoch": 1.41, + "learning_rate": 2.0164929028918943e-06, + "loss": 1.0007, + "step": 7470 + }, + { + "epoch": 1.41, + "learning_rate": 2.010560464078105e-06, + "loss": 1.0043, + "step": 7475 + }, + { + "epoch": 1.41, + "learning_rate": 2.0046345677684626e-06, + "loss": 0.9936, + "step": 7480 + }, + { + "epoch": 1.41, + "learning_rate": 1.998715226932024e-06, + "loss": 1.0092, + "step": 7485 + }, + { + "epoch": 1.41, + "learning_rate": 1.9928024545234935e-06, + "loss": 1.009, + "step": 7490 + }, + { + "epoch": 1.41, + "learning_rate": 1.986896263483202e-06, + "loss": 1.0476, + "step": 7495 + }, + { + "epoch": 1.41, + "learning_rate": 1.9809966667370793e-06, + "loss": 1.0099, + "step": 7500 + }, + { + "epoch": 1.41, + "learning_rate": 1.975103677196619e-06, + "loss": 1.0572, + "step": 7505 + }, + { + "epoch": 1.41, + "learning_rate": 1.969217307758857e-06, + "loss": 1.0021, + "step": 7510 + }, + { + "epoch": 1.42, + "learning_rate": 1.9633375713063442e-06, + "loss": 0.9912, + "step": 7515 + }, + { + "epoch": 1.42, + "learning_rate": 1.9574644807071063e-06, + "loss": 0.9782, + "step": 7520 + }, + { + "epoch": 1.42, + "learning_rate": 1.951598048814634e-06, + "loss": 1.0154, + "step": 7525 + }, + { + "epoch": 1.42, + "learning_rate": 1.945738288467839e-06, + "loss": 1.0426, + "step": 7530 + }, + { + "epoch": 1.42, + "learning_rate": 1.939885212491031e-06, + "loss": 1.0245, + "step": 7535 + }, + { + "epoch": 1.42, + "learning_rate": 1.9340388336938976e-06, + "loss": 0.9687, + "step": 7540 + }, + { + "epoch": 1.42, + "learning_rate": 1.9281991648714613e-06, + "loss": 0.9701, + "step": 7545 + }, + { + "epoch": 1.42, + "learning_rate": 1.922366218804067e-06, + "loss": 0.977, + "step": 7550 + }, + { + "epoch": 1.42, + "learning_rate": 1.9165400082573393e-06, + "loss": 0.9886, + "step": 7555 + }, + { + "epoch": 1.42, + "learning_rate": 1.910720545982169e-06, + "loss": 1.0078, + "step": 7560 + }, + { + "epoch": 1.42, + "learning_rate": 1.9049078447146729e-06, + "loss": 0.9872, + "step": 7565 + }, + { + "epoch": 1.43, + "learning_rate": 1.8991019171761714e-06, + "loss": 1.0382, + "step": 7570 + }, + { + "epoch": 1.43, + "learning_rate": 1.8933027760731642e-06, + "loss": 0.9738, + "step": 7575 + }, + { + "epoch": 1.43, + "learning_rate": 1.8875104340972939e-06, + "loss": 0.9879, + "step": 7580 + }, + { + "epoch": 1.43, + "learning_rate": 1.8817249039253277e-06, + "loss": 0.9897, + "step": 7585 + }, + { + "epoch": 1.43, + "learning_rate": 1.8759461982191206e-06, + "loss": 0.9712, + "step": 7590 + }, + { + "epoch": 1.43, + "learning_rate": 1.870174329625592e-06, + "loss": 1.0142, + "step": 7595 + }, + { + "epoch": 1.43, + "learning_rate": 1.8644093107767042e-06, + "loss": 1.0416, + "step": 7600 + }, + { + "epoch": 1.43, + "learning_rate": 1.8586511542894192e-06, + "loss": 1.002, + "step": 7605 + }, + { + "epoch": 1.43, + "learning_rate": 1.8528998727656899e-06, + "loss": 0.9846, + "step": 7610 + }, + { + "epoch": 1.43, + "learning_rate": 1.8471554787924162e-06, + "loss": 1.063, + "step": 7615 + }, + { + "epoch": 1.44, + "learning_rate": 1.8414179849414255e-06, + "loss": 0.9915, + "step": 7620 + }, + { + "epoch": 1.44, + "learning_rate": 1.8356874037694483e-06, + "loss": 1.0099, + "step": 7625 + }, + { + "epoch": 1.44, + "learning_rate": 1.8299637478180798e-06, + "loss": 1.0092, + "step": 7630 + }, + { + "epoch": 1.44, + "learning_rate": 1.8242470296137644e-06, + "loss": 1.0749, + "step": 7635 + }, + { + "epoch": 1.44, + "learning_rate": 1.8185372616677633e-06, + "loss": 0.9846, + "step": 7640 + }, + { + "epoch": 1.44, + "learning_rate": 1.8128344564761207e-06, + "loss": 0.9644, + "step": 7645 + }, + { + "epoch": 1.44, + "learning_rate": 1.8071386265196506e-06, + "loss": 0.9569, + "step": 7650 + }, + { + "epoch": 1.44, + "learning_rate": 1.801449784263895e-06, + "loss": 1.0062, + "step": 7655 + }, + { + "epoch": 1.44, + "learning_rate": 1.795767942159104e-06, + "loss": 1.0086, + "step": 7660 + }, + { + "epoch": 1.44, + "learning_rate": 1.790093112640212e-06, + "loss": 0.9769, + "step": 7665 + }, + { + "epoch": 1.44, + "learning_rate": 1.7844253081268003e-06, + "loss": 0.9795, + "step": 7670 + }, + { + "epoch": 1.45, + "learning_rate": 1.7787645410230815e-06, + "loss": 1.0027, + "step": 7675 + }, + { + "epoch": 1.45, + "learning_rate": 1.773110823717862e-06, + "loss": 1.0327, + "step": 7680 + }, + { + "epoch": 1.45, + "learning_rate": 1.767464168584519e-06, + "loss": 1.0469, + "step": 7685 + }, + { + "epoch": 1.45, + "learning_rate": 1.7618245879809797e-06, + "loss": 1.0232, + "step": 7690 + }, + { + "epoch": 1.45, + "learning_rate": 1.7561920942496807e-06, + "loss": 1.0175, + "step": 7695 + }, + { + "epoch": 1.45, + "learning_rate": 1.7505666997175564e-06, + "loss": 0.9814, + "step": 7700 + }, + { + "epoch": 1.45, + "learning_rate": 1.7449484166959984e-06, + "loss": 1.0214, + "step": 7705 + }, + { + "epoch": 1.45, + "learning_rate": 1.7393372574808354e-06, + "loss": 0.9433, + "step": 7710 + }, + { + "epoch": 1.45, + "learning_rate": 1.7337332343523095e-06, + "loss": 1.0303, + "step": 7715 + }, + { + "epoch": 1.45, + "learning_rate": 1.728136359575039e-06, + "loss": 0.9507, + "step": 7720 + }, + { + "epoch": 1.46, + "learning_rate": 1.7225466453980044e-06, + "loss": 0.9991, + "step": 7725 + }, + { + "epoch": 1.46, + "learning_rate": 1.7169641040545098e-06, + "loss": 0.9836, + "step": 7730 + }, + { + "epoch": 1.46, + "learning_rate": 1.7113887477621615e-06, + "loss": 0.996, + "step": 7735 + }, + { + "epoch": 1.46, + "learning_rate": 1.7058205887228458e-06, + "loss": 0.9772, + "step": 7740 + }, + { + "epoch": 1.46, + "learning_rate": 1.7002596391226905e-06, + "loss": 1.0214, + "step": 7745 + }, + { + "epoch": 1.46, + "learning_rate": 1.6947059111320535e-06, + "loss": 0.9767, + "step": 7750 + }, + { + "epoch": 1.46, + "learning_rate": 1.6891594169054797e-06, + "loss": 1.0001, + "step": 7755 + }, + { + "epoch": 1.46, + "learning_rate": 1.6836201685816878e-06, + "loss": 1.0066, + "step": 7760 + }, + { + "epoch": 1.46, + "learning_rate": 1.678088178283539e-06, + "loss": 1.0153, + "step": 7765 + }, + { + "epoch": 1.46, + "learning_rate": 1.6725634581180066e-06, + "loss": 0.9883, + "step": 7770 + }, + { + "epoch": 1.46, + "learning_rate": 1.6670460201761523e-06, + "loss": 0.9997, + "step": 7775 + }, + { + "epoch": 1.47, + "learning_rate": 1.6615358765331075e-06, + "loss": 0.9908, + "step": 7780 + }, + { + "epoch": 1.47, + "learning_rate": 1.65603303924803e-06, + "loss": 1.0155, + "step": 7785 + }, + { + "epoch": 1.47, + "learning_rate": 1.6505375203640961e-06, + "loss": 1.0399, + "step": 7790 + }, + { + "epoch": 1.47, + "learning_rate": 1.6450493319084581e-06, + "loss": 1.0216, + "step": 7795 + }, + { + "epoch": 1.47, + "learning_rate": 1.6395684858922328e-06, + "loss": 1.0126, + "step": 7800 + }, + { + "epoch": 1.47, + "learning_rate": 1.6340949943104617e-06, + "loss": 0.9929, + "step": 7805 + }, + { + "epoch": 1.47, + "learning_rate": 1.6286288691420914e-06, + "loss": 1.0062, + "step": 7810 + }, + { + "epoch": 1.47, + "learning_rate": 1.6231701223499519e-06, + "loss": 0.9688, + "step": 7815 + }, + { + "epoch": 1.47, + "learning_rate": 1.6177187658807182e-06, + "loss": 0.9612, + "step": 7820 + }, + { + "epoch": 1.47, + "learning_rate": 1.6122748116648984e-06, + "loss": 0.9724, + "step": 7825 + }, + { + "epoch": 1.47, + "learning_rate": 1.6068382716167957e-06, + "loss": 1.0499, + "step": 7830 + }, + { + "epoch": 1.48, + "learning_rate": 1.601409157634487e-06, + "loss": 0.97, + "step": 7835 + }, + { + "epoch": 1.48, + "learning_rate": 1.595987481599801e-06, + "loss": 1.0185, + "step": 7840 + }, + { + "epoch": 1.48, + "learning_rate": 1.5905732553782827e-06, + "loss": 0.9706, + "step": 7845 + }, + { + "epoch": 1.48, + "learning_rate": 1.5851664908191788e-06, + "loss": 0.9821, + "step": 7850 + }, + { + "epoch": 1.48, + "learning_rate": 1.5797671997554016e-06, + "loss": 1.0146, + "step": 7855 + }, + { + "epoch": 1.48, + "learning_rate": 1.5743753940035067e-06, + "loss": 0.973, + "step": 7860 + }, + { + "epoch": 1.48, + "learning_rate": 1.568991085363673e-06, + "loss": 1.017, + "step": 7865 + }, + { + "epoch": 1.48, + "learning_rate": 1.5636142856196652e-06, + "loss": 1.0181, + "step": 7870 + }, + { + "epoch": 1.48, + "learning_rate": 1.5582450065388205e-06, + "loss": 0.9986, + "step": 7875 + }, + { + "epoch": 1.48, + "learning_rate": 1.5528832598720117e-06, + "loss": 0.9966, + "step": 7880 + }, + { + "epoch": 1.49, + "learning_rate": 1.54752905735363e-06, + "loss": 1.0512, + "step": 7885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5421824107015565e-06, + "loss": 1.0491, + "step": 7890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5368433316171321e-06, + "loss": 1.0325, + "step": 7895 + }, + { + "epoch": 1.49, + "learning_rate": 1.531511831785138e-06, + "loss": 0.961, + "step": 7900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5261879228737697e-06, + "loss": 0.9795, + "step": 7905 + }, + { + "epoch": 1.49, + "learning_rate": 1.520871616534606e-06, + "loss": 0.9812, + "step": 7910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515562924402591e-06, + "loss": 0.9549, + "step": 7915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5102618580960022e-06, + "loss": 0.9947, + "step": 7920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5049684292164268e-06, + "loss": 0.9944, + "step": 7925 + }, + { + "epoch": 1.49, + "learning_rate": 1.4996826493487422e-06, + "loss": 1.0354, + "step": 7930 + }, + { + "epoch": 1.49, + "learning_rate": 1.4944045300610776e-06, + "loss": 1.0001, + "step": 7935 + }, + { + "epoch": 1.5, + "learning_rate": 1.4891340829048068e-06, + "loss": 0.9898, + "step": 7940 + }, + { + "epoch": 1.5, + "learning_rate": 1.4838713194145044e-06, + "loss": 0.969, + "step": 7945 + }, + { + "epoch": 1.5, + "learning_rate": 1.4786162511079305e-06, + "loss": 0.9292, + "step": 7950 + }, + { + "epoch": 1.5, + "learning_rate": 1.4733688894860087e-06, + "loss": 0.9704, + "step": 7955 + }, + { + "epoch": 1.5, + "learning_rate": 1.4681292460327905e-06, + "loss": 1.013, + "step": 7960 + }, + { + "epoch": 1.5, + "learning_rate": 1.462897332215441e-06, + "loss": 1.0124, + "step": 7965 + }, + { + "epoch": 1.5, + "learning_rate": 1.4576731594842053e-06, + "loss": 1.0041, + "step": 7970 + }, + { + "epoch": 1.5, + "learning_rate": 1.4524567392723848e-06, + "loss": 1.0264, + "step": 7975 + }, + { + "epoch": 1.5, + "learning_rate": 1.4472480829963215e-06, + "loss": 0.976, + "step": 7980 + }, + { + "epoch": 1.5, + "learning_rate": 1.4420472020553572e-06, + "loss": 0.9835, + "step": 7985 + }, + { + "epoch": 1.5, + "learning_rate": 1.436854107831825e-06, + "loss": 0.9903, + "step": 7990 + }, + { + "epoch": 1.51, + "learning_rate": 1.4316688116910104e-06, + "loss": 1.0028, + "step": 7995 + }, + { + "epoch": 1.51, + "learning_rate": 1.426491324981134e-06, + "loss": 1.0626, + "step": 8000 + }, + { + "epoch": 1.51, + "learning_rate": 1.4213216590333267e-06, + "loss": 0.9957, + "step": 8005 + }, + { + "epoch": 1.51, + "learning_rate": 1.4161598251616043e-06, + "loss": 0.9839, + "step": 8010 + }, + { + "epoch": 1.51, + "learning_rate": 1.4110058346628375e-06, + "loss": 1.0247, + "step": 8015 + }, + { + "epoch": 1.51, + "learning_rate": 1.4058596988167361e-06, + "loss": 1.0008, + "step": 8020 + }, + { + "epoch": 1.51, + "learning_rate": 1.4007214288858156e-06, + "loss": 1.0017, + "step": 8025 + }, + { + "epoch": 1.51, + "learning_rate": 1.3955910361153806e-06, + "loss": 1.0104, + "step": 8030 + }, + { + "epoch": 1.51, + "learning_rate": 1.3904685317334926e-06, + "loss": 0.9825, + "step": 8035 + }, + { + "epoch": 1.51, + "learning_rate": 1.3853539269509525e-06, + "loss": 0.9759, + "step": 8040 + }, + { + "epoch": 1.52, + "learning_rate": 1.38024723296127e-06, + "loss": 0.9844, + "step": 8045 + }, + { + "epoch": 1.52, + "learning_rate": 1.3751484609406407e-06, + "loss": 1.0319, + "step": 8050 + }, + { + "epoch": 1.52, + "learning_rate": 1.370057622047928e-06, + "loss": 1.0118, + "step": 8055 + }, + { + "epoch": 1.52, + "learning_rate": 1.3649747274246267e-06, + "loss": 1.0164, + "step": 8060 + }, + { + "epoch": 1.52, + "learning_rate": 1.3598997881948512e-06, + "loss": 1.0283, + "step": 8065 + }, + { + "epoch": 1.52, + "learning_rate": 1.3548328154653013e-06, + "loss": 0.9834, + "step": 8070 + }, + { + "epoch": 1.52, + "learning_rate": 1.3497738203252415e-06, + "loss": 0.9829, + "step": 8075 + }, + { + "epoch": 1.52, + "learning_rate": 1.3447228138464812e-06, + "loss": 0.9738, + "step": 8080 + }, + { + "epoch": 1.52, + "learning_rate": 1.3396798070833406e-06, + "loss": 0.9093, + "step": 8085 + }, + { + "epoch": 1.52, + "learning_rate": 1.3346448110726385e-06, + "loss": 0.9741, + "step": 8090 + }, + { + "epoch": 1.52, + "learning_rate": 1.329617836833657e-06, + "loss": 1.0285, + "step": 8095 + }, + { + "epoch": 1.53, + "learning_rate": 1.3245988953681221e-06, + "loss": 1.067, + "step": 8100 + }, + { + "epoch": 1.53, + "learning_rate": 1.319587997660185e-06, + "loss": 1.0426, + "step": 8105 + }, + { + "epoch": 1.53, + "learning_rate": 1.3145851546763855e-06, + "loss": 0.9884, + "step": 8110 + }, + { + "epoch": 1.53, + "learning_rate": 1.3095903773656432e-06, + "loss": 0.9974, + "step": 8115 + }, + { + "epoch": 1.53, + "learning_rate": 1.304603676659219e-06, + "loss": 1.0406, + "step": 8120 + }, + { + "epoch": 1.53, + "learning_rate": 1.299625063470698e-06, + "loss": 1.0303, + "step": 8125 + }, + { + "epoch": 1.53, + "learning_rate": 1.2946545486959739e-06, + "loss": 1.0013, + "step": 8130 + }, + { + "epoch": 1.53, + "learning_rate": 1.2896921432132076e-06, + "loss": 1.0501, + "step": 8135 + }, + { + "epoch": 1.53, + "learning_rate": 1.2847378578828145e-06, + "loss": 1.035, + "step": 8140 + }, + { + "epoch": 1.53, + "learning_rate": 1.2797917035474421e-06, + "loss": 0.9811, + "step": 8145 + }, + { + "epoch": 1.54, + "learning_rate": 1.2748536910319381e-06, + "loss": 0.9942, + "step": 8150 + }, + { + "epoch": 1.54, + "learning_rate": 1.2699238311433365e-06, + "loss": 0.9914, + "step": 8155 + }, + { + "epoch": 1.54, + "learning_rate": 1.2650021346708258e-06, + "loss": 1.0016, + "step": 8160 + }, + { + "epoch": 1.54, + "learning_rate": 1.2600886123857276e-06, + "loss": 1.0056, + "step": 8165 + }, + { + "epoch": 1.54, + "learning_rate": 1.2551832750414782e-06, + "loss": 1.0331, + "step": 8170 + }, + { + "epoch": 1.54, + "learning_rate": 1.2502861333735954e-06, + "loss": 0.9904, + "step": 8175 + }, + { + "epoch": 1.54, + "learning_rate": 1.2453971980996665e-06, + "loss": 1.0247, + "step": 8180 + }, + { + "epoch": 1.54, + "learning_rate": 1.2405164799193143e-06, + "loss": 1.0195, + "step": 8185 + }, + { + "epoch": 1.54, + "learning_rate": 1.235643989514177e-06, + "loss": 1.0077, + "step": 8190 + }, + { + "epoch": 1.54, + "learning_rate": 1.2307797375478925e-06, + "loss": 0.9906, + "step": 8195 + }, + { + "epoch": 1.54, + "learning_rate": 1.2259237346660613e-06, + "loss": 0.9833, + "step": 8200 + }, + { + "epoch": 1.55, + "learning_rate": 1.2210759914962367e-06, + "loss": 0.9782, + "step": 8205 + }, + { + "epoch": 1.55, + "learning_rate": 1.2162365186478902e-06, + "loss": 1.0459, + "step": 8210 + }, + { + "epoch": 1.55, + "learning_rate": 1.211405326712395e-06, + "loss": 0.9974, + "step": 8215 + }, + { + "epoch": 1.55, + "learning_rate": 1.2065824262630037e-06, + "loss": 0.9904, + "step": 8220 + }, + { + "epoch": 1.55, + "learning_rate": 1.2017678278548184e-06, + "loss": 0.9625, + "step": 8225 + }, + { + "epoch": 1.55, + "learning_rate": 1.196961542024776e-06, + "loss": 0.9987, + "step": 8230 + }, + { + "epoch": 1.55, + "learning_rate": 1.1921635792916185e-06, + "loss": 0.987, + "step": 8235 + }, + { + "epoch": 1.55, + "learning_rate": 1.1873739501558723e-06, + "loss": 1.0117, + "step": 8240 + }, + { + "epoch": 1.55, + "learning_rate": 1.1825926650998276e-06, + "loss": 1.0068, + "step": 8245 + }, + { + "epoch": 1.55, + "learning_rate": 1.1778197345875092e-06, + "loss": 1.0296, + "step": 8250 + }, + { + "epoch": 1.55, + "learning_rate": 1.1730551690646624e-06, + "loss": 0.9519, + "step": 8255 + }, + { + "epoch": 1.56, + "learning_rate": 1.1682989789587246e-06, + "loss": 0.9607, + "step": 8260 + }, + { + "epoch": 1.56, + "learning_rate": 1.1635511746787985e-06, + "loss": 0.9565, + "step": 8265 + }, + { + "epoch": 1.56, + "learning_rate": 1.1588117666156417e-06, + "loss": 1.0062, + "step": 8270 + }, + { + "epoch": 1.56, + "learning_rate": 1.1540807651416274e-06, + "loss": 0.9759, + "step": 8275 + }, + { + "epoch": 1.56, + "learning_rate": 1.14935818061074e-06, + "loss": 0.9834, + "step": 8280 + }, + { + "epoch": 1.56, + "learning_rate": 1.1446440233585365e-06, + "loss": 1.0031, + "step": 8285 + }, + { + "epoch": 1.56, + "learning_rate": 1.1399383037021317e-06, + "loss": 1.0102, + "step": 8290 + }, + { + "epoch": 1.56, + "learning_rate": 1.135241031940177e-06, + "loss": 1.006, + "step": 8295 + }, + { + "epoch": 1.56, + "learning_rate": 1.130552218352831e-06, + "loss": 0.9901, + "step": 8300 + }, + { + "epoch": 1.56, + "learning_rate": 1.125871873201747e-06, + "loss": 1.0177, + "step": 8305 + }, + { + "epoch": 1.57, + "learning_rate": 1.1212000067300399e-06, + "loss": 1.0048, + "step": 8310 + }, + { + "epoch": 1.57, + "learning_rate": 1.1165366291622687e-06, + "loss": 1.0037, + "step": 8315 + }, + { + "epoch": 1.57, + "learning_rate": 1.1118817507044188e-06, + "loss": 0.9942, + "step": 8320 + }, + { + "epoch": 1.57, + "learning_rate": 1.1072353815438691e-06, + "loss": 0.9472, + "step": 8325 + }, + { + "epoch": 1.57, + "learning_rate": 1.1025975318493814e-06, + "loss": 1.0209, + "step": 8330 + }, + { + "epoch": 1.57, + "learning_rate": 1.0979682117710671e-06, + "loss": 1.0373, + "step": 8335 + }, + { + "epoch": 1.57, + "learning_rate": 1.093347431440372e-06, + "loss": 1.0145, + "step": 8340 + }, + { + "epoch": 1.57, + "learning_rate": 1.0887352009700548e-06, + "loss": 1.0336, + "step": 8345 + }, + { + "epoch": 1.57, + "learning_rate": 1.0841315304541578e-06, + "loss": 1.0059, + "step": 8350 + }, + { + "epoch": 1.57, + "learning_rate": 1.079536429967995e-06, + "loss": 0.9977, + "step": 8355 + }, + { + "epoch": 1.57, + "learning_rate": 1.0749499095681204e-06, + "loss": 0.9639, + "step": 8360 + }, + { + "epoch": 1.58, + "learning_rate": 1.07037197929231e-06, + "loss": 1.0021, + "step": 8365 + }, + { + "epoch": 1.58, + "learning_rate": 1.0658026491595442e-06, + "loss": 1.0374, + "step": 8370 + }, + { + "epoch": 1.58, + "learning_rate": 1.061241929169976e-06, + "loss": 1.007, + "step": 8375 + }, + { + "epoch": 1.58, + "learning_rate": 1.0566898293049188e-06, + "loss": 0.9839, + "step": 8380 + }, + { + "epoch": 1.58, + "learning_rate": 1.0521463595268217e-06, + "loss": 0.9828, + "step": 8385 + }, + { + "epoch": 1.58, + "learning_rate": 1.04761152977924e-06, + "loss": 0.9932, + "step": 8390 + }, + { + "epoch": 1.58, + "learning_rate": 1.0430853499868282e-06, + "loss": 0.9747, + "step": 8395 + }, + { + "epoch": 1.58, + "learning_rate": 1.0385678300553043e-06, + "loss": 1.0261, + "step": 8400 + }, + { + "epoch": 1.58, + "learning_rate": 1.0340589798714334e-06, + "loss": 0.9775, + "step": 8405 + }, + { + "epoch": 1.58, + "learning_rate": 1.0295588093030123e-06, + "loss": 1.0098, + "step": 8410 + }, + { + "epoch": 1.58, + "learning_rate": 1.0250673281988355e-06, + "loss": 0.9396, + "step": 8415 + }, + { + "epoch": 1.59, + "learning_rate": 1.0205845463886855e-06, + "loss": 1.0189, + "step": 8420 + }, + { + "epoch": 1.59, + "learning_rate": 1.0161104736833033e-06, + "loss": 0.9858, + "step": 8425 + }, + { + "epoch": 1.59, + "learning_rate": 1.0116451198743682e-06, + "loss": 1.0138, + "step": 8430 + }, + { + "epoch": 1.59, + "learning_rate": 1.0071884947344835e-06, + "loss": 0.958, + "step": 8435 + }, + { + "epoch": 1.59, + "learning_rate": 1.0027406080171421e-06, + "loss": 0.9922, + "step": 8440 + }, + { + "epoch": 1.59, + "learning_rate": 9.983014694567189e-07, + "loss": 0.9678, + "step": 8445 + }, + { + "epoch": 1.59, + "learning_rate": 9.9387108876844e-07, + "loss": 1.0036, + "step": 8450 + }, + { + "epoch": 1.59, + "learning_rate": 9.89449475648363e-07, + "loss": 1.0105, + "step": 8455 + }, + { + "epoch": 1.59, + "learning_rate": 9.850366397733624e-07, + "loss": 0.9776, + "step": 8460 + }, + { + "epoch": 1.59, + "learning_rate": 9.806325908010978e-07, + "loss": 0.9267, + "step": 8465 + }, + { + "epoch": 1.6, + "learning_rate": 9.76237338370003e-07, + "loss": 1.0401, + "step": 8470 + }, + { + "epoch": 1.6, + "learning_rate": 9.718508920992576e-07, + "loss": 1.0059, + "step": 8475 + }, + { + "epoch": 1.6, + "learning_rate": 9.674732615887666e-07, + "loss": 1.0046, + "step": 8480 + }, + { + "epoch": 1.6, + "learning_rate": 9.631044564191473e-07, + "loss": 1.0045, + "step": 8485 + }, + { + "epoch": 1.6, + "learning_rate": 9.587444861516964e-07, + "loss": 0.9767, + "step": 8490 + }, + { + "epoch": 1.6, + "learning_rate": 9.543933603283794e-07, + "loss": 1.0321, + "step": 8495 + }, + { + "epoch": 1.6, + "learning_rate": 9.500510884718017e-07, + "loss": 1.015, + "step": 8500 + }, + { + "epoch": 1.6, + "learning_rate": 9.457176800851948e-07, + "loss": 0.9711, + "step": 8505 + }, + { + "epoch": 1.6, + "learning_rate": 9.413931446523917e-07, + "loss": 1.0119, + "step": 8510 + }, + { + "epoch": 1.6, + "learning_rate": 9.370774916378022e-07, + "loss": 1.0316, + "step": 8515 + }, + { + "epoch": 1.6, + "learning_rate": 9.327707304864042e-07, + "loss": 0.9668, + "step": 8520 + }, + { + "epoch": 1.61, + "learning_rate": 9.284728706237089e-07, + "loss": 0.9267, + "step": 8525 + }, + { + "epoch": 1.61, + "learning_rate": 9.241839214557474e-07, + "loss": 1.0602, + "step": 8530 + }, + { + "epoch": 1.61, + "learning_rate": 9.199038923690545e-07, + "loss": 0.983, + "step": 8535 + }, + { + "epoch": 1.61, + "learning_rate": 9.156327927306363e-07, + "loss": 0.9854, + "step": 8540 + }, + { + "epoch": 1.61, + "learning_rate": 9.113706318879623e-07, + "loss": 0.9946, + "step": 8545 + }, + { + "epoch": 1.61, + "learning_rate": 9.071174191689358e-07, + "loss": 1.0014, + "step": 8550 + }, + { + "epoch": 1.61, + "learning_rate": 9.028731638818756e-07, + "loss": 1.0559, + "step": 8555 + }, + { + "epoch": 1.61, + "learning_rate": 8.986378753155012e-07, + "loss": 0.9842, + "step": 8560 + }, + { + "epoch": 1.61, + "learning_rate": 8.944115627389032e-07, + "loss": 1.0126, + "step": 8565 + }, + { + "epoch": 1.61, + "learning_rate": 8.90194235401533e-07, + "loss": 1.0187, + "step": 8570 + }, + { + "epoch": 1.62, + "learning_rate": 8.859859025331735e-07, + "loss": 0.9868, + "step": 8575 + }, + { + "epoch": 1.62, + "learning_rate": 8.81786573343923e-07, + "loss": 1.0083, + "step": 8580 + }, + { + "epoch": 1.62, + "learning_rate": 8.775962570241775e-07, + "loss": 1.0306, + "step": 8585 + }, + { + "epoch": 1.62, + "learning_rate": 8.734149627446048e-07, + "loss": 0.9807, + "step": 8590 + }, + { + "epoch": 1.62, + "learning_rate": 8.692426996561315e-07, + "loss": 0.9988, + "step": 8595 + }, + { + "epoch": 1.62, + "learning_rate": 8.650794768899162e-07, + "loss": 0.9973, + "step": 8600 + }, + { + "epoch": 1.62, + "learning_rate": 8.609253035573311e-07, + "loss": 0.9622, + "step": 8605 + }, + { + "epoch": 1.62, + "learning_rate": 8.567801887499483e-07, + "loss": 0.9839, + "step": 8610 + }, + { + "epoch": 1.62, + "learning_rate": 8.526441415395093e-07, + "loss": 1.0232, + "step": 8615 + }, + { + "epoch": 1.62, + "learning_rate": 8.485171709779161e-07, + "loss": 1.0403, + "step": 8620 + }, + { + "epoch": 1.62, + "learning_rate": 8.443992860972011e-07, + "loss": 1.0501, + "step": 8625 + }, + { + "epoch": 1.63, + "learning_rate": 8.40290495909516e-07, + "loss": 0.9862, + "step": 8630 + }, + { + "epoch": 1.63, + "learning_rate": 8.36190809407108e-07, + "loss": 0.9605, + "step": 8635 + }, + { + "epoch": 1.63, + "learning_rate": 8.321002355622993e-07, + "loss": 1.0393, + "step": 8640 + }, + { + "epoch": 1.63, + "learning_rate": 8.280187833274672e-07, + "loss": 0.9765, + "step": 8645 + }, + { + "epoch": 1.63, + "learning_rate": 8.239464616350307e-07, + "loss": 1.0238, + "step": 8650 + }, + { + "epoch": 1.63, + "learning_rate": 8.19883279397421e-07, + "loss": 0.9585, + "step": 8655 + }, + { + "epoch": 1.63, + "learning_rate": 8.158292455070721e-07, + "loss": 1.0253, + "step": 8660 + }, + { + "epoch": 1.63, + "learning_rate": 8.117843688363925e-07, + "loss": 0.9903, + "step": 8665 + }, + { + "epoch": 1.63, + "learning_rate": 8.077486582377508e-07, + "loss": 1.0217, + "step": 8670 + }, + { + "epoch": 1.63, + "learning_rate": 8.037221225434583e-07, + "loss": 0.9856, + "step": 8675 + }, + { + "epoch": 1.63, + "learning_rate": 7.997047705657412e-07, + "loss": 0.9672, + "step": 8680 + }, + { + "epoch": 1.64, + "learning_rate": 7.956966110967324e-07, + "loss": 0.9625, + "step": 8685 + }, + { + "epoch": 1.64, + "learning_rate": 7.91697652908443e-07, + "loss": 1.0186, + "step": 8690 + }, + { + "epoch": 1.64, + "learning_rate": 7.877079047527469e-07, + "loss": 1.0295, + "step": 8695 + }, + { + "epoch": 1.64, + "learning_rate": 7.837273753613645e-07, + "loss": 1.0459, + "step": 8700 + }, + { + "epoch": 1.64, + "learning_rate": 7.797560734458359e-07, + "loss": 0.9633, + "step": 8705 + }, + { + "epoch": 1.64, + "learning_rate": 7.757940076975117e-07, + "loss": 0.9896, + "step": 8710 + }, + { + "epoch": 1.64, + "learning_rate": 7.718411867875253e-07, + "loss": 0.97, + "step": 8715 + }, + { + "epoch": 1.64, + "learning_rate": 7.678976193667764e-07, + "loss": 0.9848, + "step": 8720 + }, + { + "epoch": 1.64, + "learning_rate": 7.639633140659175e-07, + "loss": 0.9779, + "step": 8725 + }, + { + "epoch": 1.64, + "learning_rate": 7.600382794953259e-07, + "loss": 0.9598, + "step": 8730 + }, + { + "epoch": 1.65, + "learning_rate": 7.561225242450943e-07, + "loss": 1.0434, + "step": 8735 + }, + { + "epoch": 1.65, + "learning_rate": 7.522160568850018e-07, + "loss": 1.0151, + "step": 8740 + }, + { + "epoch": 1.65, + "learning_rate": 7.483188859645047e-07, + "loss": 0.9849, + "step": 8745 + }, + { + "epoch": 1.65, + "learning_rate": 7.444310200127136e-07, + "loss": 0.9867, + "step": 8750 + }, + { + "epoch": 1.65, + "learning_rate": 7.405524675383713e-07, + "loss": 1.0014, + "step": 8755 + }, + { + "epoch": 1.65, + "learning_rate": 7.366832370298427e-07, + "loss": 1.0249, + "step": 8760 + }, + { + "epoch": 1.65, + "learning_rate": 7.328233369550874e-07, + "loss": 0.968, + "step": 8765 + }, + { + "epoch": 1.65, + "learning_rate": 7.289727757616444e-07, + "loss": 1.0253, + "step": 8770 + }, + { + "epoch": 1.65, + "learning_rate": 7.251315618766186e-07, + "loss": 0.9783, + "step": 8775 + }, + { + "epoch": 1.65, + "learning_rate": 7.212997037066527e-07, + "loss": 1.0391, + "step": 8780 + }, + { + "epoch": 1.65, + "learning_rate": 7.17477209637919e-07, + "loss": 0.9626, + "step": 8785 + }, + { + "epoch": 1.66, + "learning_rate": 7.136640880360935e-07, + "loss": 1.0033, + "step": 8790 + }, + { + "epoch": 1.66, + "learning_rate": 7.098603472463384e-07, + "loss": 0.9924, + "step": 8795 + }, + { + "epoch": 1.66, + "learning_rate": 7.060659955932897e-07, + "loss": 0.9939, + "step": 8800 + }, + { + "epoch": 1.66, + "learning_rate": 7.022810413810311e-07, + "loss": 0.9911, + "step": 8805 + }, + { + "epoch": 1.66, + "learning_rate": 6.985054928930835e-07, + "loss": 0.969, + "step": 8810 + }, + { + "epoch": 1.66, + "learning_rate": 6.9473935839238e-07, + "loss": 0.9968, + "step": 8815 + }, + { + "epoch": 1.66, + "learning_rate": 6.909826461212509e-07, + "loss": 0.9673, + "step": 8820 + }, + { + "epoch": 1.66, + "learning_rate": 6.872353643014079e-07, + "loss": 0.9651, + "step": 8825 + }, + { + "epoch": 1.66, + "learning_rate": 6.834975211339207e-07, + "loss": 0.982, + "step": 8830 + }, + { + "epoch": 1.66, + "learning_rate": 6.797691247992055e-07, + "loss": 1.0256, + "step": 8835 + }, + { + "epoch": 1.67, + "learning_rate": 6.760501834570011e-07, + "loss": 0.9187, + "step": 8840 + }, + { + "epoch": 1.67, + "learning_rate": 6.723407052463538e-07, + "loss": 1.0243, + "step": 8845 + }, + { + "epoch": 1.67, + "learning_rate": 6.68640698285602e-07, + "loss": 0.9892, + "step": 8850 + }, + { + "epoch": 1.67, + "learning_rate": 6.649501706723522e-07, + "loss": 1.0034, + "step": 8855 + }, + { + "epoch": 1.67, + "learning_rate": 6.612691304834695e-07, + "loss": 0.9674, + "step": 8860 + }, + { + "epoch": 1.67, + "learning_rate": 6.57597585775051e-07, + "loss": 1.0126, + "step": 8865 + }, + { + "epoch": 1.67, + "learning_rate": 6.539355445824125e-07, + "loss": 1.0072, + "step": 8870 + }, + { + "epoch": 1.67, + "learning_rate": 6.502830149200773e-07, + "loss": 0.9848, + "step": 8875 + }, + { + "epoch": 1.67, + "learning_rate": 6.466400047817461e-07, + "loss": 0.991, + "step": 8880 + }, + { + "epoch": 1.67, + "learning_rate": 6.430065221402865e-07, + "loss": 0.9503, + "step": 8885 + }, + { + "epoch": 1.67, + "learning_rate": 6.393825749477178e-07, + "loss": 0.9877, + "step": 8890 + }, + { + "epoch": 1.68, + "learning_rate": 6.357681711351865e-07, + "loss": 0.9854, + "step": 8895 + }, + { + "epoch": 1.68, + "learning_rate": 6.321633186129578e-07, + "loss": 1.0191, + "step": 8900 + }, + { + "epoch": 1.68, + "learning_rate": 6.285680252703891e-07, + "loss": 1.0263, + "step": 8905 + }, + { + "epoch": 1.68, + "learning_rate": 6.249822989759191e-07, + "loss": 0.9759, + "step": 8910 + }, + { + "epoch": 1.68, + "learning_rate": 6.214061475770499e-07, + "loss": 0.9643, + "step": 8915 + }, + { + "epoch": 1.68, + "learning_rate": 6.178395789003255e-07, + "loss": 0.9902, + "step": 8920 + }, + { + "epoch": 1.68, + "learning_rate": 6.142826007513214e-07, + "loss": 1.0067, + "step": 8925 + }, + { + "epoch": 1.68, + "learning_rate": 6.107352209146211e-07, + "loss": 1.0193, + "step": 8930 + }, + { + "epoch": 1.68, + "learning_rate": 6.071974471538023e-07, + "loss": 0.9872, + "step": 8935 + }, + { + "epoch": 1.68, + "learning_rate": 6.036692872114203e-07, + "loss": 0.9438, + "step": 8940 + }, + { + "epoch": 1.68, + "learning_rate": 6.001507488089891e-07, + "loss": 0.9497, + "step": 8945 + }, + { + "epoch": 1.69, + "learning_rate": 5.966418396469676e-07, + "loss": 0.9883, + "step": 8950 + }, + { + "epoch": 1.69, + "learning_rate": 5.931425674047386e-07, + "loss": 1.0051, + "step": 8955 + }, + { + "epoch": 1.69, + "learning_rate": 5.896529397405936e-07, + "loss": 0.955, + "step": 8960 + }, + { + "epoch": 1.69, + "learning_rate": 5.861729642917202e-07, + "loss": 0.9636, + "step": 8965 + }, + { + "epoch": 1.69, + "learning_rate": 5.827026486741766e-07, + "loss": 1.007, + "step": 8970 + }, + { + "epoch": 1.69, + "learning_rate": 5.792420004828859e-07, + "loss": 0.9726, + "step": 8975 + }, + { + "epoch": 1.69, + "learning_rate": 5.757910272916078e-07, + "loss": 0.9744, + "step": 8980 + }, + { + "epoch": 1.69, + "learning_rate": 5.723497366529329e-07, + "loss": 1.0138, + "step": 8985 + }, + { + "epoch": 1.69, + "learning_rate": 5.689181360982588e-07, + "loss": 1.0067, + "step": 8990 + }, + { + "epoch": 1.69, + "learning_rate": 5.654962331377728e-07, + "loss": 0.9926, + "step": 8995 + }, + { + "epoch": 1.7, + "learning_rate": 5.620840352604473e-07, + "loss": 1.0074, + "step": 9000 + }, + { + "epoch": 1.7, + "learning_rate": 5.586815499340064e-07, + "loss": 0.9769, + "step": 9005 + }, + { + "epoch": 1.7, + "learning_rate": 5.552887846049204e-07, + "loss": 0.9856, + "step": 9010 + }, + { + "epoch": 1.7, + "learning_rate": 5.519057466983896e-07, + "loss": 1.044, + "step": 9015 + }, + { + "epoch": 1.7, + "learning_rate": 5.485324436183214e-07, + "loss": 0.92, + "step": 9020 + }, + { + "epoch": 1.7, + "learning_rate": 5.451688827473217e-07, + "loss": 0.9903, + "step": 9025 + }, + { + "epoch": 1.7, + "learning_rate": 5.418150714466736e-07, + "loss": 1.021, + "step": 9030 + }, + { + "epoch": 1.7, + "learning_rate": 5.384710170563212e-07, + "loss": 1.0316, + "step": 9035 + }, + { + "epoch": 1.7, + "learning_rate": 5.351367268948593e-07, + "loss": 1.0387, + "step": 9040 + }, + { + "epoch": 1.7, + "learning_rate": 5.318122082595089e-07, + "loss": 0.9826, + "step": 9045 + }, + { + "epoch": 1.7, + "learning_rate": 5.284974684261101e-07, + "loss": 1.0169, + "step": 9050 + }, + { + "epoch": 1.71, + "learning_rate": 5.251925146490977e-07, + "loss": 0.9853, + "step": 9055 + }, + { + "epoch": 1.71, + "learning_rate": 5.218973541614902e-07, + "loss": 0.987, + "step": 9060 + }, + { + "epoch": 1.71, + "learning_rate": 5.186119941748763e-07, + "loss": 0.962, + "step": 9065 + }, + { + "epoch": 1.71, + "learning_rate": 5.15336441879391e-07, + "loss": 1.0041, + "step": 9070 + }, + { + "epoch": 1.71, + "learning_rate": 5.120707044437095e-07, + "loss": 0.9375, + "step": 9075 + }, + { + "epoch": 1.71, + "learning_rate": 5.088147890150236e-07, + "loss": 0.9796, + "step": 9080 + }, + { + "epoch": 1.71, + "learning_rate": 5.055687027190293e-07, + "loss": 0.9812, + "step": 9085 + }, + { + "epoch": 1.71, + "learning_rate": 5.023324526599139e-07, + "loss": 0.9641, + "step": 9090 + }, + { + "epoch": 1.71, + "learning_rate": 4.991060459203339e-07, + "loss": 0.9975, + "step": 9095 + }, + { + "epoch": 1.71, + "learning_rate": 4.95889489561408e-07, + "loss": 1.0031, + "step": 9100 + }, + { + "epoch": 1.71, + "learning_rate": 4.926827906226928e-07, + "loss": 0.9925, + "step": 9105 + }, + { + "epoch": 1.72, + "learning_rate": 4.894859561221715e-07, + "loss": 1.0428, + "step": 9110 + }, + { + "epoch": 1.72, + "learning_rate": 4.862989930562434e-07, + "loss": 0.9984, + "step": 9115 + }, + { + "epoch": 1.72, + "learning_rate": 4.83121908399698e-07, + "loss": 0.9843, + "step": 9120 + }, + { + "epoch": 1.72, + "learning_rate": 4.799547091057083e-07, + "loss": 0.9559, + "step": 9125 + }, + { + "epoch": 1.72, + "learning_rate": 4.767974021058147e-07, + "loss": 0.9861, + "step": 9130 + }, + { + "epoch": 1.72, + "learning_rate": 4.736499943099032e-07, + "loss": 1.0061, + "step": 9135 + }, + { + "epoch": 1.72, + "learning_rate": 4.7051249260619933e-07, + "loss": 1.0023, + "step": 9140 + }, + { + "epoch": 1.72, + "learning_rate": 4.673849038612471e-07, + "loss": 0.9917, + "step": 9145 + }, + { + "epoch": 1.72, + "learning_rate": 4.6426723491989344e-07, + "loss": 0.9885, + "step": 9150 + }, + { + "epoch": 1.72, + "learning_rate": 4.6115949260527984e-07, + "loss": 0.9859, + "step": 9155 + }, + { + "epoch": 1.73, + "learning_rate": 4.58061683718819e-07, + "loss": 0.999, + "step": 9160 + }, + { + "epoch": 1.73, + "learning_rate": 4.5497381504018724e-07, + "loss": 0.976, + "step": 9165 + }, + { + "epoch": 1.73, + "learning_rate": 4.518958933273038e-07, + "loss": 0.99, + "step": 9170 + }, + { + "epoch": 1.73, + "learning_rate": 4.488279253163186e-07, + "loss": 1.0054, + "step": 9175 + }, + { + "epoch": 1.73, + "learning_rate": 4.4576991772160014e-07, + "loss": 0.9667, + "step": 9180 + }, + { + "epoch": 1.73, + "learning_rate": 4.4272187723571446e-07, + "loss": 1.0176, + "step": 9185 + }, + { + "epoch": 1.73, + "learning_rate": 4.3968381052941876e-07, + "loss": 0.9915, + "step": 9190 + }, + { + "epoch": 1.73, + "learning_rate": 4.366557242516378e-07, + "loss": 1.0613, + "step": 9195 + }, + { + "epoch": 1.73, + "learning_rate": 4.3363762502945496e-07, + "loss": 1.0286, + "step": 9200 + }, + { + "epoch": 1.73, + "learning_rate": 4.306295194680987e-07, + "loss": 0.997, + "step": 9205 + }, + { + "epoch": 1.73, + "learning_rate": 4.2763141415092414e-07, + "loss": 0.972, + "step": 9210 + }, + { + "epoch": 1.74, + "learning_rate": 4.2464331563940085e-07, + "loss": 1.0305, + "step": 9215 + }, + { + "epoch": 1.74, + "learning_rate": 4.216652304730973e-07, + "loss": 1.055, + "step": 9220 + }, + { + "epoch": 1.74, + "learning_rate": 4.1869716516966975e-07, + "loss": 1.0262, + "step": 9225 + }, + { + "epoch": 1.74, + "learning_rate": 4.157391262248428e-07, + "loss": 1.0102, + "step": 9230 + }, + { + "epoch": 1.74, + "learning_rate": 4.1279112011239854e-07, + "loss": 0.9798, + "step": 9235 + }, + { + "epoch": 1.74, + "learning_rate": 4.098531532841643e-07, + "loss": 1.0373, + "step": 9240 + }, + { + "epoch": 1.74, + "learning_rate": 4.069252321699929e-07, + "loss": 1.0202, + "step": 9245 + }, + { + "epoch": 1.74, + "learning_rate": 4.0400736317775303e-07, + "loss": 0.9893, + "step": 9250 + }, + { + "epoch": 1.74, + "learning_rate": 4.0109955269331534e-07, + "loss": 0.9882, + "step": 9255 + }, + { + "epoch": 1.74, + "learning_rate": 3.982018070805338e-07, + "loss": 1.014, + "step": 9260 + }, + { + "epoch": 1.75, + "learning_rate": 3.953141326812382e-07, + "loss": 0.9922, + "step": 9265 + }, + { + "epoch": 1.75, + "learning_rate": 3.9243653581521544e-07, + "loss": 0.9957, + "step": 9270 + }, + { + "epoch": 1.75, + "learning_rate": 3.895690227801968e-07, + "loss": 0.9637, + "step": 9275 + }, + { + "epoch": 1.75, + "learning_rate": 3.8671159985184725e-07, + "loss": 0.9959, + "step": 9280 + }, + { + "epoch": 1.75, + "learning_rate": 3.8386427328374496e-07, + "loss": 1.0288, + "step": 9285 + }, + { + "epoch": 1.75, + "learning_rate": 3.81027049307377e-07, + "loss": 0.9789, + "step": 9290 + }, + { + "epoch": 1.75, + "learning_rate": 3.781999341321169e-07, + "loss": 1.0493, + "step": 9295 + }, + { + "epoch": 1.75, + "learning_rate": 3.753829339452147e-07, + "loss": 1.0578, + "step": 9300 + }, + { + "epoch": 1.75, + "learning_rate": 3.725760549117857e-07, + "loss": 0.9472, + "step": 9305 + }, + { + "epoch": 1.75, + "learning_rate": 3.6977930317479206e-07, + "loss": 0.9805, + "step": 9310 + }, + { + "epoch": 1.75, + "learning_rate": 3.6699268485503426e-07, + "loss": 0.9856, + "step": 9315 + }, + { + "epoch": 1.76, + "learning_rate": 3.642162060511345e-07, + "loss": 1.024, + "step": 9320 + }, + { + "epoch": 1.76, + "learning_rate": 3.614498728395227e-07, + "loss": 0.9993, + "step": 9325 + }, + { + "epoch": 1.76, + "learning_rate": 3.5869369127442823e-07, + "loss": 0.9743, + "step": 9330 + }, + { + "epoch": 1.76, + "learning_rate": 3.5594766738785926e-07, + "loss": 0.987, + "step": 9335 + }, + { + "epoch": 1.76, + "learning_rate": 3.5321180718959627e-07, + "loss": 0.9956, + "step": 9340 + }, + { + "epoch": 1.76, + "learning_rate": 3.5048611666717583e-07, + "loss": 0.996, + "step": 9345 + }, + { + "epoch": 1.76, + "learning_rate": 3.4777060178587506e-07, + "loss": 0.9994, + "step": 9350 + }, + { + "epoch": 1.76, + "learning_rate": 3.450652684887057e-07, + "loss": 1.0151, + "step": 9355 + }, + { + "epoch": 1.76, + "learning_rate": 3.4237012269639325e-07, + "loss": 0.9988, + "step": 9360 + }, + { + "epoch": 1.76, + "learning_rate": 3.3968517030736893e-07, + "loss": 0.9518, + "step": 9365 + }, + { + "epoch": 1.76, + "learning_rate": 3.370104171977545e-07, + "loss": 1.0358, + "step": 9370 + }, + { + "epoch": 1.77, + "learning_rate": 3.343458692213508e-07, + "loss": 0.9904, + "step": 9375 + }, + { + "epoch": 1.77, + "learning_rate": 3.316915322096248e-07, + "loss": 0.9831, + "step": 9380 + }, + { + "epoch": 1.77, + "learning_rate": 3.290474119716963e-07, + "loss": 0.9646, + "step": 9385 + }, + { + "epoch": 1.77, + "learning_rate": 3.2641351429432257e-07, + "loss": 0.9685, + "step": 9390 + }, + { + "epoch": 1.77, + "learning_rate": 3.2378984494189305e-07, + "loss": 1.0001, + "step": 9395 + }, + { + "epoch": 1.77, + "learning_rate": 3.2117640965640794e-07, + "loss": 0.9918, + "step": 9400 + }, + { + "epoch": 1.77, + "learning_rate": 3.185732141574732e-07, + "loss": 0.9764, + "step": 9405 + }, + { + "epoch": 1.77, + "learning_rate": 3.159802641422816e-07, + "loss": 1.0041, + "step": 9410 + }, + { + "epoch": 1.77, + "learning_rate": 3.1339756528560473e-07, + "loss": 0.9795, + "step": 9415 + }, + { + "epoch": 1.77, + "learning_rate": 3.1082512323977965e-07, + "loss": 0.9923, + "step": 9420 + }, + { + "epoch": 1.78, + "learning_rate": 3.0826294363469456e-07, + "loss": 0.9992, + "step": 9425 + }, + { + "epoch": 1.78, + "learning_rate": 3.0571103207778006e-07, + "loss": 1.0075, + "step": 9430 + }, + { + "epoch": 1.78, + "learning_rate": 3.03169394153992e-07, + "loss": 1.0872, + "step": 9435 + }, + { + "epoch": 1.78, + "learning_rate": 3.006380354258037e-07, + "loss": 1.0026, + "step": 9440 + }, + { + "epoch": 1.78, + "learning_rate": 2.9811696143319246e-07, + "loss": 0.9731, + "step": 9445 + }, + { + "epoch": 1.78, + "learning_rate": 2.956061776936248e-07, + "loss": 1.0383, + "step": 9450 + }, + { + "epoch": 1.78, + "learning_rate": 2.931056897020501e-07, + "loss": 0.9793, + "step": 9455 + }, + { + "epoch": 1.78, + "learning_rate": 2.9061550293088104e-07, + "loss": 0.9794, + "step": 9460 + }, + { + "epoch": 1.78, + "learning_rate": 2.8813562282998973e-07, + "loss": 0.9626, + "step": 9465 + }, + { + "epoch": 1.78, + "learning_rate": 2.856660548266887e-07, + "loss": 0.9648, + "step": 9470 + }, + { + "epoch": 1.78, + "learning_rate": 2.8320680432572245e-07, + "loss": 0.9974, + "step": 9475 + }, + { + "epoch": 1.79, + "learning_rate": 2.807578767092578e-07, + "loss": 0.9994, + "step": 9480 + }, + { + "epoch": 1.79, + "learning_rate": 2.7831927733686595e-07, + "loss": 0.9218, + "step": 9485 + }, + { + "epoch": 1.79, + "learning_rate": 2.7589101154551657e-07, + "loss": 0.9894, + "step": 9490 + }, + { + "epoch": 1.79, + "learning_rate": 2.7347308464956367e-07, + "loss": 1.0228, + "step": 9495 + }, + { + "epoch": 1.79, + "learning_rate": 2.710655019407332e-07, + "loss": 0.9644, + "step": 9500 + }, + { + "epoch": 1.79, + "learning_rate": 2.686682686881148e-07, + "loss": 0.9731, + "step": 9505 + }, + { + "epoch": 1.79, + "learning_rate": 2.662813901381445e-07, + "loss": 1.0288, + "step": 9510 + }, + { + "epoch": 1.79, + "learning_rate": 2.639048715145981e-07, + "loss": 0.9898, + "step": 9515 + }, + { + "epoch": 1.79, + "learning_rate": 2.615387180185802e-07, + "loss": 0.9612, + "step": 9520 + }, + { + "epoch": 1.79, + "learning_rate": 2.591829348285074e-07, + "loss": 0.9785, + "step": 9525 + }, + { + "epoch": 1.79, + "learning_rate": 2.56837527100105e-07, + "loss": 0.9774, + "step": 9530 + }, + { + "epoch": 1.8, + "learning_rate": 2.545024999663864e-07, + "loss": 0.9833, + "step": 9535 + }, + { + "epoch": 1.8, + "learning_rate": 2.5217785853764944e-07, + "loss": 1.0435, + "step": 9540 + }, + { + "epoch": 1.8, + "learning_rate": 2.498636079014627e-07, + "loss": 1.0122, + "step": 9545 + }, + { + "epoch": 1.8, + "learning_rate": 2.47559753122652e-07, + "loss": 0.9966, + "step": 9550 + }, + { + "epoch": 1.8, + "learning_rate": 2.452662992432936e-07, + "loss": 1.0294, + "step": 9555 + }, + { + "epoch": 1.8, + "learning_rate": 2.4298325128270017e-07, + "loss": 1.0121, + "step": 9560 + }, + { + "epoch": 1.8, + "learning_rate": 2.407106142374094e-07, + "loss": 0.9641, + "step": 9565 + }, + { + "epoch": 1.8, + "learning_rate": 2.3844839308117653e-07, + "loss": 0.9893, + "step": 9570 + }, + { + "epoch": 1.8, + "learning_rate": 2.3619659276495954e-07, + "loss": 0.998, + "step": 9575 + }, + { + "epoch": 1.8, + "learning_rate": 2.339552182169119e-07, + "loss": 0.9964, + "step": 9580 + }, + { + "epoch": 1.81, + "learning_rate": 2.31724274342367e-07, + "loss": 1.0028, + "step": 9585 + }, + { + "epoch": 1.81, + "learning_rate": 2.2950376602383195e-07, + "loss": 0.9907, + "step": 9590 + }, + { + "epoch": 1.81, + "learning_rate": 2.2729369812097667e-07, + "loss": 1.02, + "step": 9595 + }, + { + "epoch": 1.81, + "learning_rate": 2.2509407547061823e-07, + "loss": 0.9512, + "step": 9600 + }, + { + "epoch": 1.81, + "learning_rate": 2.2290490288671807e-07, + "loss": 0.9573, + "step": 9605 + }, + { + "epoch": 1.81, + "learning_rate": 2.2072618516036372e-07, + "loss": 0.9834, + "step": 9610 + }, + { + "epoch": 1.81, + "learning_rate": 2.1855792705976264e-07, + "loss": 1.0152, + "step": 9615 + }, + { + "epoch": 1.81, + "learning_rate": 2.1640013333023402e-07, + "loss": 0.9628, + "step": 9620 + }, + { + "epoch": 1.81, + "learning_rate": 2.1425280869419085e-07, + "loss": 1.0288, + "step": 9625 + }, + { + "epoch": 1.81, + "learning_rate": 2.1211595785113727e-07, + "loss": 0.9941, + "step": 9630 + }, + { + "epoch": 1.81, + "learning_rate": 2.0998958547765347e-07, + "loss": 0.9589, + "step": 9635 + }, + { + "epoch": 1.82, + "learning_rate": 2.0787369622738807e-07, + "loss": 0.9729, + "step": 9640 + }, + { + "epoch": 1.82, + "learning_rate": 2.0576829473104687e-07, + "loss": 0.9687, + "step": 9645 + }, + { + "epoch": 1.82, + "learning_rate": 2.0367338559638184e-07, + "loss": 0.9604, + "step": 9650 + }, + { + "epoch": 1.82, + "learning_rate": 2.0158897340818274e-07, + "loss": 0.9401, + "step": 9655 + }, + { + "epoch": 1.82, + "learning_rate": 1.9951506272826716e-07, + "loss": 0.9812, + "step": 9660 + }, + { + "epoch": 1.82, + "learning_rate": 1.9745165809546772e-07, + "loss": 0.9752, + "step": 9665 + }, + { + "epoch": 1.82, + "learning_rate": 1.9539876402562717e-07, + "loss": 0.9794, + "step": 9670 + }, + { + "epoch": 1.82, + "learning_rate": 1.9335638501158216e-07, + "loss": 1.0045, + "step": 9675 + }, + { + "epoch": 1.82, + "learning_rate": 1.9132452552315772e-07, + "loss": 1.0158, + "step": 9680 + }, + { + "epoch": 1.82, + "learning_rate": 1.8930319000715847e-07, + "loss": 0.9873, + "step": 9685 + }, + { + "epoch": 1.83, + "learning_rate": 1.8729238288735406e-07, + "loss": 0.944, + "step": 9690 + }, + { + "epoch": 1.83, + "learning_rate": 1.8529210856447545e-07, + "loss": 0.9513, + "step": 9695 + }, + { + "epoch": 1.83, + "learning_rate": 1.8330237141619912e-07, + "loss": 1.0099, + "step": 9700 + }, + { + "epoch": 1.83, + "learning_rate": 1.8132317579714232e-07, + "loss": 0.9773, + "step": 9705 + }, + { + "epoch": 1.83, + "learning_rate": 1.7935452603885074e-07, + "loss": 0.9655, + "step": 9710 + }, + { + "epoch": 1.83, + "learning_rate": 1.7739642644979071e-07, + "loss": 0.9862, + "step": 9715 + }, + { + "epoch": 1.83, + "learning_rate": 1.7544888131533987e-07, + "loss": 1.0405, + "step": 9720 + }, + { + "epoch": 1.83, + "learning_rate": 1.7351189489777486e-07, + "loss": 1.0087, + "step": 9725 + }, + { + "epoch": 1.83, + "learning_rate": 1.715854714362669e-07, + "loss": 1.0246, + "step": 9730 + }, + { + "epoch": 1.83, + "learning_rate": 1.6966961514686743e-07, + "loss": 0.9786, + "step": 9735 + }, + { + "epoch": 1.83, + "learning_rate": 1.677643302225007e-07, + "loss": 0.9962, + "step": 9740 + }, + { + "epoch": 1.84, + "learning_rate": 1.658696208329591e-07, + "loss": 0.9492, + "step": 9745 + }, + { + "epoch": 1.84, + "learning_rate": 1.639854911248867e-07, + "loss": 1.0017, + "step": 9750 + }, + { + "epoch": 1.84, + "learning_rate": 1.6211194522177287e-07, + "loss": 0.9738, + "step": 9755 + }, + { + "epoch": 1.84, + "learning_rate": 1.6024898722394765e-07, + "loss": 0.9666, + "step": 9760 + }, + { + "epoch": 1.84, + "learning_rate": 1.583966212085647e-07, + "loss": 0.9243, + "step": 9765 + }, + { + "epoch": 1.84, + "learning_rate": 1.565548512296e-07, + "loss": 1.0617, + "step": 9770 + }, + { + "epoch": 1.84, + "learning_rate": 1.5472368131783766e-07, + "loss": 1.0325, + "step": 9775 + }, + { + "epoch": 1.84, + "learning_rate": 1.5290311548086412e-07, + "loss": 1.0123, + "step": 9780 + }, + { + "epoch": 1.84, + "learning_rate": 1.5109315770305942e-07, + "loss": 0.9989, + "step": 9785 + }, + { + "epoch": 1.84, + "learning_rate": 1.492938119455839e-07, + "loss": 0.9708, + "step": 9790 + }, + { + "epoch": 1.84, + "learning_rate": 1.475050821463775e-07, + "loss": 1.0057, + "step": 9795 + }, + { + "epoch": 1.85, + "learning_rate": 1.457269722201432e-07, + "loss": 0.9792, + "step": 9800 + }, + { + "epoch": 1.85, + "learning_rate": 1.439594860583443e-07, + "loss": 0.9827, + "step": 9805 + }, + { + "epoch": 1.85, + "learning_rate": 1.422026275291921e-07, + "loss": 1.0001, + "step": 9810 + }, + { + "epoch": 1.85, + "learning_rate": 1.4045640047763986e-07, + "loss": 1.008, + "step": 9815 + }, + { + "epoch": 1.85, + "learning_rate": 1.387208087253733e-07, + "loss": 0.9918, + "step": 9820 + }, + { + "epoch": 1.85, + "learning_rate": 1.369958560708018e-07, + "loss": 0.9983, + "step": 9825 + }, + { + "epoch": 1.85, + "learning_rate": 1.3528154628905055e-07, + "loss": 1.0213, + "step": 9830 + }, + { + "epoch": 1.85, + "learning_rate": 1.3357788313195397e-07, + "loss": 1.0056, + "step": 9835 + }, + { + "epoch": 1.85, + "learning_rate": 1.3188487032804397e-07, + "loss": 1.0007, + "step": 9840 + }, + { + "epoch": 1.85, + "learning_rate": 1.3020251158254505e-07, + "loss": 0.971, + "step": 9845 + }, + { + "epoch": 1.86, + "learning_rate": 1.2853081057736415e-07, + "loss": 0.9911, + "step": 9850 + }, + { + "epoch": 1.86, + "learning_rate": 1.2686977097108366e-07, + "loss": 0.9954, + "step": 9855 + }, + { + "epoch": 1.86, + "learning_rate": 1.2521939639895343e-07, + "loss": 1.0268, + "step": 9860 + }, + { + "epoch": 1.86, + "learning_rate": 1.2357969047288143e-07, + "loss": 0.9966, + "step": 9865 + }, + { + "epoch": 1.86, + "learning_rate": 1.2195065678142826e-07, + "loss": 1.0024, + "step": 9870 + }, + { + "epoch": 1.86, + "learning_rate": 1.2033229888979704e-07, + "loss": 1.0428, + "step": 9875 + }, + { + "epoch": 1.86, + "learning_rate": 1.1872462033982624e-07, + "loss": 0.9671, + "step": 9880 + }, + { + "epoch": 1.86, + "learning_rate": 1.1712762464998307e-07, + "loss": 0.9602, + "step": 9885 + }, + { + "epoch": 1.86, + "learning_rate": 1.155413153153545e-07, + "loss": 0.9832, + "step": 9890 + }, + { + "epoch": 1.86, + "learning_rate": 1.1396569580763905e-07, + "loss": 0.9698, + "step": 9895 + }, + { + "epoch": 1.86, + "learning_rate": 1.1240076957514224e-07, + "loss": 0.9876, + "step": 9900 + }, + { + "epoch": 1.87, + "learning_rate": 1.108465400427644e-07, + "loss": 0.9923, + "step": 9905 + }, + { + "epoch": 1.87, + "learning_rate": 1.0930301061199856e-07, + "loss": 0.9866, + "step": 9910 + }, + { + "epoch": 1.87, + "learning_rate": 1.0777018466091749e-07, + "loss": 0.9829, + "step": 9915 + }, + { + "epoch": 1.87, + "learning_rate": 1.0624806554417e-07, + "loss": 1.0067, + "step": 9920 + }, + { + "epoch": 1.87, + "learning_rate": 1.0473665659297361e-07, + "loss": 1.0359, + "step": 9925 + }, + { + "epoch": 1.87, + "learning_rate": 1.032359611151046e-07, + "loss": 0.9923, + "step": 9930 + }, + { + "epoch": 1.87, + "learning_rate": 1.0174598239489353e-07, + "loss": 0.9796, + "step": 9935 + }, + { + "epoch": 1.87, + "learning_rate": 1.0026672369321589e-07, + "loss": 0.9877, + "step": 9940 + }, + { + "epoch": 1.87, + "learning_rate": 9.879818824748755e-08, + "loss": 0.9713, + "step": 9945 + }, + { + "epoch": 1.87, + "learning_rate": 9.734037927165486e-08, + "loss": 0.9924, + "step": 9950 + }, + { + "epoch": 1.88, + "learning_rate": 9.589329995618845e-08, + "loss": 0.9724, + "step": 9955 + }, + { + "epoch": 1.88, + "learning_rate": 9.445695346807837e-08, + "loss": 1.0025, + "step": 9960 + }, + { + "epoch": 1.88, + "learning_rate": 9.303134295082339e-08, + "loss": 0.9767, + "step": 9965 + }, + { + "epoch": 1.88, + "learning_rate": 9.161647152442887e-08, + "loss": 1.0034, + "step": 9970 + }, + { + "epoch": 1.88, + "learning_rate": 9.021234228539511e-08, + "loss": 0.96, + "step": 9975 + }, + { + "epoch": 1.88, + "learning_rate": 8.881895830671227e-08, + "loss": 1.0509, + "step": 9980 + }, + { + "epoch": 1.88, + "learning_rate": 8.743632263785606e-08, + "loss": 0.9745, + "step": 9985 + }, + { + "epoch": 1.88, + "learning_rate": 8.606443830477817e-08, + "loss": 0.9531, + "step": 9990 + }, + { + "epoch": 1.88, + "learning_rate": 8.470330830990026e-08, + "loss": 1.0242, + "step": 9995 + }, + { + "epoch": 1.88, + "learning_rate": 8.335293563210889e-08, + "loss": 0.995, + "step": 10000 + }, + { + "epoch": 1.88, + "learning_rate": 8.201332322674504e-08, + "loss": 1.0034, + "step": 10005 + }, + { + "epoch": 1.89, + "learning_rate": 8.068447402560464e-08, + "loss": 1.0096, + "step": 10010 + }, + { + "epoch": 1.89, + "learning_rate": 7.93663909369241e-08, + "loss": 0.9714, + "step": 10015 + }, + { + "epoch": 1.89, + "learning_rate": 7.805907684537984e-08, + "loss": 1.0069, + "step": 10020 + }, + { + "epoch": 1.89, + "learning_rate": 7.676253461207873e-08, + "loss": 0.9708, + "step": 10025 + }, + { + "epoch": 1.89, + "learning_rate": 7.547676707455431e-08, + "loss": 0.9917, + "step": 10030 + }, + { + "epoch": 1.89, + "learning_rate": 7.420177704675846e-08, + "loss": 1.0077, + "step": 10035 + }, + { + "epoch": 1.89, + "learning_rate": 7.293756731905633e-08, + "loss": 0.9175, + "step": 10040 + }, + { + "epoch": 1.89, + "learning_rate": 7.168414065821917e-08, + "loss": 1.0149, + "step": 10045 + }, + { + "epoch": 1.89, + "learning_rate": 7.044149980742099e-08, + "loss": 0.9853, + "step": 10050 + }, + { + "epoch": 1.89, + "learning_rate": 6.920964748622805e-08, + "loss": 1.0303, + "step": 10055 + }, + { + "epoch": 1.89, + "learning_rate": 6.798858639059768e-08, + "loss": 1.0283, + "step": 10060 + }, + { + "epoch": 1.9, + "learning_rate": 6.677831919286892e-08, + "loss": 0.9688, + "step": 10065 + }, + { + "epoch": 1.9, + "learning_rate": 6.557884854175855e-08, + "loss": 0.994, + "step": 10070 + }, + { + "epoch": 1.9, + "learning_rate": 6.43901770623545e-08, + "loss": 0.9554, + "step": 10075 + }, + { + "epoch": 1.9, + "learning_rate": 6.321230735611083e-08, + "loss": 1.0381, + "step": 10080 + }, + { + "epoch": 1.9, + "learning_rate": 6.204524200083994e-08, + "loss": 0.9653, + "step": 10085 + }, + { + "epoch": 1.9, + "learning_rate": 6.088898355071094e-08, + "loss": 0.9959, + "step": 10090 + }, + { + "epoch": 1.9, + "learning_rate": 5.974353453623849e-08, + "loss": 1.0315, + "step": 10095 + }, + { + "epoch": 1.9, + "learning_rate": 5.8608897464283444e-08, + "loss": 0.9617, + "step": 10100 + }, + { + "epoch": 1.9, + "learning_rate": 5.748507481804222e-08, + "loss": 1.0278, + "step": 10105 + }, + { + "epoch": 1.9, + "learning_rate": 5.637206905704462e-08, + "loss": 1.0349, + "step": 10110 + }, + { + "epoch": 1.91, + "learning_rate": 5.526988261714661e-08, + "loss": 0.9845, + "step": 10115 + }, + { + "epoch": 1.91, + "learning_rate": 5.417851791052642e-08, + "loss": 0.9364, + "step": 10120 + }, + { + "epoch": 1.91, + "learning_rate": 5.309797732567734e-08, + "loss": 1.0102, + "step": 10125 + }, + { + "epoch": 1.91, + "learning_rate": 5.2028263227404393e-08, + "loss": 1.0084, + "step": 10130 + }, + { + "epoch": 1.91, + "learning_rate": 5.0969377956817664e-08, + "loss": 0.9695, + "step": 10135 + }, + { + "epoch": 1.91, + "learning_rate": 4.992132383133008e-08, + "loss": 0.9675, + "step": 10140 + }, + { + "epoch": 1.91, + "learning_rate": 4.888410314464742e-08, + "loss": 0.9477, + "step": 10145 + }, + { + "epoch": 1.91, + "learning_rate": 4.7857718166767764e-08, + "loss": 1.0022, + "step": 10150 + }, + { + "epoch": 1.91, + "learning_rate": 4.6842171143974824e-08, + "loss": 1.0033, + "step": 10155 + }, + { + "epoch": 1.91, + "learning_rate": 4.583746429883129e-08, + "loss": 1.0093, + "step": 10160 + }, + { + "epoch": 1.91, + "learning_rate": 4.484359983017828e-08, + "loss": 1.0255, + "step": 10165 + }, + { + "epoch": 1.92, + "learning_rate": 4.386057991312531e-08, + "loss": 1.0021, + "step": 10170 + }, + { + "epoch": 1.92, + "learning_rate": 4.288840669905148e-08, + "loss": 1.0019, + "step": 10175 + }, + { + "epoch": 1.92, + "learning_rate": 4.1927082315593725e-08, + "loss": 0.9963, + "step": 10180 + }, + { + "epoch": 1.92, + "learning_rate": 4.0976608866648005e-08, + "loss": 1.0084, + "step": 10185 + }, + { + "epoch": 1.92, + "learning_rate": 4.003698843236314e-08, + "loss": 0.9378, + "step": 10190 + }, + { + "epoch": 1.92, + "learning_rate": 3.91082230691342e-08, + "loss": 0.9991, + "step": 10195 + }, + { + "epoch": 1.92, + "learning_rate": 3.8190314809600226e-08, + "loss": 0.981, + "step": 10200 + }, + { + "epoch": 1.92, + "learning_rate": 3.728326566263818e-08, + "loss": 1.0188, + "step": 10205 + }, + { + "epoch": 1.92, + "learning_rate": 3.63870776133618e-08, + "loss": 0.9712, + "step": 10210 + }, + { + "epoch": 1.92, + "learning_rate": 3.5501752623112174e-08, + "loss": 1.0227, + "step": 10215 + }, + { + "epoch": 1.92, + "learning_rate": 3.462729262945774e-08, + "loss": 1.0069, + "step": 10220 + }, + { + "epoch": 1.93, + "learning_rate": 3.376369954618819e-08, + "loss": 1.0695, + "step": 10225 + }, + { + "epoch": 1.93, + "learning_rate": 3.291097526330944e-08, + "loss": 0.9937, + "step": 10230 + }, + { + "epoch": 1.93, + "learning_rate": 3.2069121647043125e-08, + "loss": 1.0103, + "step": 10235 + }, + { + "epoch": 1.93, + "learning_rate": 3.1238140539817105e-08, + "loss": 0.972, + "step": 10240 + }, + { + "epoch": 1.93, + "learning_rate": 3.041803376026664e-08, + "loss": 0.9385, + "step": 10245 + }, + { + "epoch": 1.93, + "learning_rate": 2.960880310322656e-08, + "loss": 1.0596, + "step": 10250 + }, + { + "epoch": 1.93, + "learning_rate": 2.8810450339730177e-08, + "loss": 0.9672, + "step": 10255 + }, + { + "epoch": 1.93, + "learning_rate": 2.8022977217002646e-08, + "loss": 0.9674, + "step": 10260 + }, + { + "epoch": 1.93, + "learning_rate": 2.7246385458459814e-08, + "loss": 0.9845, + "step": 10265 + }, + { + "epoch": 1.93, + "learning_rate": 2.6480676763702696e-08, + "loss": 1.0514, + "step": 10270 + }, + { + "epoch": 1.94, + "learning_rate": 2.572585280851414e-08, + "loss": 0.9756, + "step": 10275 + }, + { + "epoch": 1.94, + "learning_rate": 2.4981915244856047e-08, + "loss": 0.9719, + "step": 10280 + }, + { + "epoch": 1.94, + "learning_rate": 2.4248865700864377e-08, + "loss": 0.9828, + "step": 10285 + }, + { + "epoch": 1.94, + "learning_rate": 2.3526705780846926e-08, + "loss": 1.0349, + "step": 10290 + }, + { + "epoch": 1.94, + "learning_rate": 2.2815437065277223e-08, + "loss": 1.0193, + "step": 10295 + }, + { + "epoch": 1.94, + "learning_rate": 2.21150611107962e-08, + "loss": 0.9598, + "step": 10300 + }, + { + "epoch": 1.94, + "learning_rate": 2.1425579450202185e-08, + "loss": 0.9831, + "step": 10305 + }, + { + "epoch": 1.94, + "learning_rate": 2.074699359245369e-08, + "loss": 1.0146, + "step": 10310 + }, + { + "epoch": 1.94, + "learning_rate": 2.0079305022661088e-08, + "loss": 1.0242, + "step": 10315 + }, + { + "epoch": 1.94, + "learning_rate": 1.942251520208771e-08, + "loss": 0.9737, + "step": 10320 + }, + { + "epoch": 1.94, + "learning_rate": 1.8776625568142637e-08, + "loss": 1.0052, + "step": 10325 + }, + { + "epoch": 1.95, + "learning_rate": 1.8141637534380697e-08, + "loss": 0.9711, + "step": 10330 + }, + { + "epoch": 1.95, + "learning_rate": 1.7517552490498024e-08, + "loss": 0.982, + "step": 10335 + }, + { + "epoch": 1.95, + "learning_rate": 1.690437180232818e-08, + "loss": 1.0527, + "step": 10340 + }, + { + "epoch": 1.95, + "learning_rate": 1.6302096811841584e-08, + "loss": 1.0409, + "step": 10345 + }, + { + "epoch": 1.95, + "learning_rate": 1.5710728837139976e-08, + "loss": 0.9326, + "step": 10350 + }, + { + "epoch": 1.95, + "learning_rate": 1.5130269172455303e-08, + "loss": 0.9897, + "step": 10355 + }, + { + "epoch": 1.95, + "learning_rate": 1.4560719088145825e-08, + "loss": 0.9835, + "step": 10360 + }, + { + "epoch": 1.95, + "learning_rate": 1.4002079830693904e-08, + "loss": 1.0158, + "step": 10365 + }, + { + "epoch": 1.95, + "learning_rate": 1.3454352622703227e-08, + "loss": 0.9689, + "step": 10370 + }, + { + "epoch": 1.95, + "learning_rate": 1.2917538662896023e-08, + "loss": 1.0638, + "step": 10375 + }, + { + "epoch": 1.96, + "learning_rate": 1.23916391261103e-08, + "loss": 0.9282, + "step": 10380 + }, + { + "epoch": 1.96, + "learning_rate": 1.1876655163298167e-08, + "loss": 1.026, + "step": 10385 + }, + { + "epoch": 1.96, + "learning_rate": 1.1372587901521403e-08, + "loss": 1.0539, + "step": 10390 + }, + { + "epoch": 1.96, + "learning_rate": 1.0879438443950341e-08, + "loss": 0.9515, + "step": 10395 + }, + { + "epoch": 1.96, + "learning_rate": 1.0397207869862203e-08, + "loss": 0.9298, + "step": 10400 + }, + { + "epoch": 1.96, + "learning_rate": 9.92589723463777e-09, + "loss": 1.0442, + "step": 10405 + }, + { + "epoch": 1.96, + "learning_rate": 9.465507569757505e-09, + "loss": 1.0101, + "step": 10410 + }, + { + "epoch": 1.96, + "learning_rate": 9.016039882802641e-09, + "loss": 0.9829, + "step": 10415 + }, + { + "epoch": 1.96, + "learning_rate": 8.57749515745021e-09, + "loss": 1.0031, + "step": 10420 + }, + { + "epoch": 1.96, + "learning_rate": 8.149874353473031e-09, + "loss": 0.9802, + "step": 10425 + }, + { + "epoch": 1.96, + "learning_rate": 7.733178406735265e-09, + "loss": 0.9793, + "step": 10430 + }, + { + "epoch": 1.97, + "learning_rate": 7.327408229191868e-09, + "loss": 0.9753, + "step": 10435 + }, + { + "epoch": 1.97, + "learning_rate": 6.932564708887479e-09, + "loss": 1.0311, + "step": 10440 + }, + { + "epoch": 1.97, + "learning_rate": 6.548648709951422e-09, + "loss": 0.9717, + "step": 10445 + }, + { + "epoch": 1.97, + "learning_rate": 6.1756610725993706e-09, + "loss": 0.9608, + "step": 10450 + }, + { + "epoch": 1.97, + "learning_rate": 5.8136026131294655e-09, + "loss": 0.984, + "step": 10455 + }, + { + "epoch": 1.97, + "learning_rate": 5.462474123919536e-09, + "loss": 1.0421, + "step": 10460 + }, + { + "epoch": 1.97, + "learning_rate": 5.122276373428769e-09, + "loss": 1.0026, + "step": 10465 + }, + { + "epoch": 1.97, + "learning_rate": 4.793010106193264e-09, + "loss": 0.9449, + "step": 10470 + }, + { + "epoch": 1.97, + "learning_rate": 4.474676042823811e-09, + "loss": 0.9989, + "step": 10475 + }, + { + "epoch": 1.97, + "learning_rate": 4.167274880008676e-09, + "loss": 1.0245, + "step": 10480 + }, + { + "epoch": 1.97, + "learning_rate": 3.870807290505263e-09, + "loss": 0.9744, + "step": 10485 + }, + { + "epoch": 1.98, + "learning_rate": 3.585273923145671e-09, + "loss": 0.9621, + "step": 10490 + }, + { + "epoch": 1.98, + "learning_rate": 3.310675402830588e-09, + "loss": 0.99, + "step": 10495 + }, + { + "epoch": 1.98, + "learning_rate": 3.0470123305287314e-09, + "loss": 1.0376, + "step": 10500 + }, + { + "epoch": 1.98, + "learning_rate": 2.7942852832779645e-09, + "loss": 1.0177, + "step": 10505 + }, + { + "epoch": 1.98, + "learning_rate": 2.5524948141802952e-09, + "loss": 1.0608, + "step": 10510 + }, + { + "epoch": 1.98, + "learning_rate": 2.3216414524041e-09, + "loss": 1.013, + "step": 10515 + }, + { + "epoch": 1.98, + "learning_rate": 2.10172570318079e-09, + "loss": 1.0446, + "step": 10520 + }, + { + "epoch": 1.98, + "learning_rate": 1.8927480478042605e-09, + "loss": 0.9628, + "step": 10525 + }, + { + "epoch": 1.98, + "learning_rate": 1.694708943630885e-09, + "loss": 0.9825, + "step": 10530 + }, + { + "epoch": 1.98, + "learning_rate": 1.5076088240756347e-09, + "loss": 1.0036, + "step": 10535 + }, + { + "epoch": 1.99, + "learning_rate": 1.3314480986159616e-09, + "loss": 0.9861, + "step": 10540 + }, + { + "epoch": 1.99, + "learning_rate": 1.1662271527851377e-09, + "loss": 1.0007, + "step": 10545 + }, + { + "epoch": 1.99, + "learning_rate": 1.0119463481761405e-09, + "loss": 0.9734, + "step": 10550 + }, + { + "epoch": 1.99, + "learning_rate": 8.68606022438323e-10, + "loss": 1.0294, + "step": 10555 + }, + { + "epoch": 1.99, + "learning_rate": 7.362064892774134e-10, + "loss": 0.9354, + "step": 10560 + }, + { + "epoch": 1.99, + "learning_rate": 6.147480384555149e-10, + "loss": 0.9794, + "step": 10565 + }, + { + "epoch": 1.99, + "learning_rate": 5.042309357888853e-10, + "loss": 1.0016, + "step": 10570 + }, + { + "epoch": 1.99, + "learning_rate": 4.0465542314793715e-10, + "loss": 0.9636, + "step": 10575 + }, + { + "epoch": 1.99, + "learning_rate": 3.1602171845834805e-10, + "loss": 0.9528, + "step": 10580 + }, + { + "epoch": 1.99, + "learning_rate": 2.3833001569772975e-10, + "loss": 0.9932, + "step": 10585 + }, + { + "epoch": 1.99, + "learning_rate": 1.7158048489784863e-10, + "loss": 0.9984, + "step": 10590 + }, + { + "epoch": 2.0, + "learning_rate": 1.1577327214240542e-10, + "loss": 0.9615, + "step": 10595 + }, + { + "epoch": 2.0, + "learning_rate": 7.090849956759016e-11, + "loss": 1.0146, + "step": 10600 + }, + { + "epoch": 2.0, + "learning_rate": 3.6986265362082276e-11, + "loss": 0.9899, + "step": 10605 + }, + { + "epoch": 2.0, + "learning_rate": 1.4006643765385186e-11, + "loss": 1.0174, + "step": 10610 + }, + { + "epoch": 2.0, + "learning_rate": 1.9696850700468007e-12, + "loss": 1.0443, + "step": 10615 + }, + { + "epoch": 2.0, + "step": 10618, + "total_flos": 4.810450444432179e+17, + "train_loss": 1.1182698494837005, + "train_runtime": 99534.9844, + "train_samples_per_second": 13.655, + "train_steps_per_second": 0.107 + } + ], + "logging_steps": 5, + "max_steps": 10618, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 4.810450444432179e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}