{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14968, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.680919294494922e-05, "grad_norm": 0.3203125, "learning_rate": 1.3360053440213764e-07, "loss": 1.5739, "step": 1 }, { "epoch": 0.00033404596472474615, "grad_norm": 0.431640625, "learning_rate": 6.680026720106881e-07, "loss": 1.6599, "step": 5 }, { "epoch": 0.0006680919294494923, "grad_norm": 0.263671875, "learning_rate": 1.3360053440213763e-06, "loss": 1.6221, "step": 10 }, { "epoch": 0.0010021378941742383, "grad_norm": 0.361328125, "learning_rate": 2.004008016032064e-06, "loss": 1.5389, "step": 15 }, { "epoch": 0.0013361838588989846, "grad_norm": 0.318359375, "learning_rate": 2.6720106880427525e-06, "loss": 1.5856, "step": 20 }, { "epoch": 0.0016702298236237307, "grad_norm": 0.33984375, "learning_rate": 3.3400133600534405e-06, "loss": 1.5469, "step": 25 }, { "epoch": 0.0020042757883484766, "grad_norm": 0.361328125, "learning_rate": 4.008016032064128e-06, "loss": 1.5633, "step": 30 }, { "epoch": 0.0023383217530732227, "grad_norm": 0.279296875, "learning_rate": 4.676018704074817e-06, "loss": 1.5587, "step": 35 }, { "epoch": 0.002672367717797969, "grad_norm": 0.353515625, "learning_rate": 5.344021376085505e-06, "loss": 1.6141, "step": 40 }, { "epoch": 0.0030064136825227153, "grad_norm": 0.318359375, "learning_rate": 6.012024048096193e-06, "loss": 1.5299, "step": 45 }, { "epoch": 0.0033404596472474614, "grad_norm": 0.283203125, "learning_rate": 6.680026720106881e-06, "loss": 1.5561, "step": 50 }, { "epoch": 0.0036745056119722075, "grad_norm": 0.35546875, "learning_rate": 7.348029392117569e-06, "loss": 1.5794, "step": 55 }, { "epoch": 0.004008551576696953, "grad_norm": 0.310546875, "learning_rate": 8.016032064128256e-06, "loss": 1.5619, "step": 60 }, { "epoch": 0.004342597541421699, "grad_norm": 0.2265625, "learning_rate": 8.684034736138945e-06, "loss": 1.5249, "step": 65 }, { "epoch": 0.004676643506146445, "grad_norm": 0.77734375, "learning_rate": 9.352037408149633e-06, "loss": 1.5814, "step": 70 }, { "epoch": 0.0050106894708711915, "grad_norm": 0.2138671875, "learning_rate": 1.0020040080160322e-05, "loss": 1.483, "step": 75 }, { "epoch": 0.005344735435595938, "grad_norm": 0.267578125, "learning_rate": 1.068804275217101e-05, "loss": 1.3885, "step": 80 }, { "epoch": 0.0056787814003206845, "grad_norm": 0.640625, "learning_rate": 1.1356045424181697e-05, "loss": 1.4409, "step": 85 }, { "epoch": 0.006012827365045431, "grad_norm": 0.33984375, "learning_rate": 1.2024048096192385e-05, "loss": 1.4832, "step": 90 }, { "epoch": 0.006346873329770177, "grad_norm": 0.236328125, "learning_rate": 1.2692050768203074e-05, "loss": 1.4454, "step": 95 }, { "epoch": 0.006680919294494923, "grad_norm": 0.26171875, "learning_rate": 1.3360053440213762e-05, "loss": 1.5162, "step": 100 }, { "epoch": 0.007014965259219669, "grad_norm": 0.251953125, "learning_rate": 1.4028056112224449e-05, "loss": 1.4367, "step": 105 }, { "epoch": 0.007349011223944415, "grad_norm": 0.212890625, "learning_rate": 1.4696058784235137e-05, "loss": 1.4453, "step": 110 }, { "epoch": 0.007683057188669161, "grad_norm": 0.23046875, "learning_rate": 1.5364061456245828e-05, "loss": 1.4518, "step": 115 }, { "epoch": 0.008017103153393906, "grad_norm": 0.2060546875, "learning_rate": 1.6032064128256513e-05, "loss": 1.4095, "step": 120 }, { "epoch": 0.008351149118118652, "grad_norm": 0.1904296875, "learning_rate": 1.6700066800267204e-05, "loss": 1.3303, "step": 125 }, { "epoch": 0.008685195082843399, "grad_norm": 0.2265625, "learning_rate": 1.736806947227789e-05, "loss": 1.5248, "step": 130 }, { "epoch": 0.009019241047568145, "grad_norm": 0.203125, "learning_rate": 1.8036072144288578e-05, "loss": 1.3274, "step": 135 }, { "epoch": 0.00935328701229289, "grad_norm": 0.177734375, "learning_rate": 1.8704074816299266e-05, "loss": 1.4265, "step": 140 }, { "epoch": 0.009687332977017637, "grad_norm": 0.259765625, "learning_rate": 1.9372077488309955e-05, "loss": 1.3648, "step": 145 }, { "epoch": 0.010021378941742383, "grad_norm": 0.169921875, "learning_rate": 2.0040080160320643e-05, "loss": 1.2991, "step": 150 }, { "epoch": 0.010355424906467129, "grad_norm": 0.1865234375, "learning_rate": 2.070808283233133e-05, "loss": 1.4394, "step": 155 }, { "epoch": 0.010689470871191877, "grad_norm": 0.17578125, "learning_rate": 2.137608550434202e-05, "loss": 1.3224, "step": 160 }, { "epoch": 0.011023516835916623, "grad_norm": 0.1767578125, "learning_rate": 2.2044088176352705e-05, "loss": 1.3659, "step": 165 }, { "epoch": 0.011357562800641369, "grad_norm": 0.1689453125, "learning_rate": 2.2712090848363394e-05, "loss": 1.3688, "step": 170 }, { "epoch": 0.011691608765366115, "grad_norm": 0.1865234375, "learning_rate": 2.3380093520374082e-05, "loss": 1.3416, "step": 175 }, { "epoch": 0.012025654730090861, "grad_norm": 0.16796875, "learning_rate": 2.404809619238477e-05, "loss": 1.3394, "step": 180 }, { "epoch": 0.012359700694815607, "grad_norm": 0.1572265625, "learning_rate": 2.471609886439546e-05, "loss": 1.2949, "step": 185 }, { "epoch": 0.012693746659540353, "grad_norm": 0.1611328125, "learning_rate": 2.5384101536406147e-05, "loss": 1.4109, "step": 190 }, { "epoch": 0.0130277926242651, "grad_norm": 0.1982421875, "learning_rate": 2.6052104208416833e-05, "loss": 1.3706, "step": 195 }, { "epoch": 0.013361838588989846, "grad_norm": 0.166015625, "learning_rate": 2.6720106880427524e-05, "loss": 1.3242, "step": 200 }, { "epoch": 0.013695884553714592, "grad_norm": 0.1962890625, "learning_rate": 2.7388109552438213e-05, "loss": 1.3187, "step": 205 }, { "epoch": 0.014029930518439338, "grad_norm": 0.1806640625, "learning_rate": 2.8056112224448898e-05, "loss": 1.4374, "step": 210 }, { "epoch": 0.014363976483164084, "grad_norm": 0.1572265625, "learning_rate": 2.8724114896459586e-05, "loss": 1.2938, "step": 215 }, { "epoch": 0.01469802244788883, "grad_norm": 0.21875, "learning_rate": 2.9392117568470275e-05, "loss": 1.3603, "step": 220 }, { "epoch": 0.015032068412613576, "grad_norm": 0.20703125, "learning_rate": 3.0060120240480967e-05, "loss": 1.3989, "step": 225 }, { "epoch": 0.015366114377338322, "grad_norm": 0.1884765625, "learning_rate": 3.0728122912491655e-05, "loss": 1.3631, "step": 230 }, { "epoch": 0.015700160342063067, "grad_norm": 0.1748046875, "learning_rate": 3.139612558450234e-05, "loss": 1.318, "step": 235 }, { "epoch": 0.016034206306787813, "grad_norm": 0.2060546875, "learning_rate": 3.2064128256513025e-05, "loss": 1.344, "step": 240 }, { "epoch": 0.01636825227151256, "grad_norm": 0.1875, "learning_rate": 3.273213092852371e-05, "loss": 1.3326, "step": 245 }, { "epoch": 0.016702298236237305, "grad_norm": 0.216796875, "learning_rate": 3.340013360053441e-05, "loss": 1.2978, "step": 250 }, { "epoch": 0.01703634420096205, "grad_norm": 0.228515625, "learning_rate": 3.4068136272545094e-05, "loss": 1.2572, "step": 255 }, { "epoch": 0.017370390165686797, "grad_norm": 0.1943359375, "learning_rate": 3.473613894455578e-05, "loss": 1.2778, "step": 260 }, { "epoch": 0.017704436130411543, "grad_norm": 0.30078125, "learning_rate": 3.5404141616566464e-05, "loss": 1.2495, "step": 265 }, { "epoch": 0.01803848209513629, "grad_norm": 0.1826171875, "learning_rate": 3.6072144288577156e-05, "loss": 1.3069, "step": 270 }, { "epoch": 0.018372528059861035, "grad_norm": 0.2099609375, "learning_rate": 3.674014696058785e-05, "loss": 1.3499, "step": 275 }, { "epoch": 0.01870657402458578, "grad_norm": 0.2255859375, "learning_rate": 3.740814963259853e-05, "loss": 1.3404, "step": 280 }, { "epoch": 0.019040619989310528, "grad_norm": 0.21875, "learning_rate": 3.807615230460922e-05, "loss": 1.2487, "step": 285 }, { "epoch": 0.019374665954035274, "grad_norm": 0.212890625, "learning_rate": 3.874415497661991e-05, "loss": 1.3379, "step": 290 }, { "epoch": 0.01970871191876002, "grad_norm": 0.1953125, "learning_rate": 3.94121576486306e-05, "loss": 1.2742, "step": 295 }, { "epoch": 0.020042757883484766, "grad_norm": 0.193359375, "learning_rate": 4.0080160320641287e-05, "loss": 1.3057, "step": 300 }, { "epoch": 0.020376803848209512, "grad_norm": 0.185546875, "learning_rate": 4.074816299265197e-05, "loss": 1.2915, "step": 305 }, { "epoch": 0.020710849812934258, "grad_norm": 0.1962890625, "learning_rate": 4.141616566466266e-05, "loss": 1.2924, "step": 310 }, { "epoch": 0.021044895777659008, "grad_norm": 0.19140625, "learning_rate": 4.208416833667335e-05, "loss": 1.2266, "step": 315 }, { "epoch": 0.021378941742383754, "grad_norm": 0.2080078125, "learning_rate": 4.275217100868404e-05, "loss": 1.3178, "step": 320 }, { "epoch": 0.0217129877071085, "grad_norm": 0.2060546875, "learning_rate": 4.3420173680694725e-05, "loss": 1.2772, "step": 325 }, { "epoch": 0.022047033671833246, "grad_norm": 0.189453125, "learning_rate": 4.408817635270541e-05, "loss": 1.3171, "step": 330 }, { "epoch": 0.022381079636557992, "grad_norm": 0.236328125, "learning_rate": 4.47561790247161e-05, "loss": 1.3287, "step": 335 }, { "epoch": 0.022715125601282738, "grad_norm": 0.2119140625, "learning_rate": 4.542418169672679e-05, "loss": 1.3499, "step": 340 }, { "epoch": 0.023049171566007484, "grad_norm": 0.2216796875, "learning_rate": 4.609218436873748e-05, "loss": 1.2967, "step": 345 }, { "epoch": 0.02338321753073223, "grad_norm": 0.2236328125, "learning_rate": 4.6760187040748164e-05, "loss": 1.2266, "step": 350 }, { "epoch": 0.023717263495456976, "grad_norm": 0.2021484375, "learning_rate": 4.742818971275885e-05, "loss": 1.265, "step": 355 }, { "epoch": 0.024051309460181722, "grad_norm": 0.2021484375, "learning_rate": 4.809619238476954e-05, "loss": 1.2426, "step": 360 }, { "epoch": 0.02438535542490647, "grad_norm": 0.244140625, "learning_rate": 4.876419505678023e-05, "loss": 1.3144, "step": 365 }, { "epoch": 0.024719401389631215, "grad_norm": 0.208984375, "learning_rate": 4.943219772879092e-05, "loss": 1.2892, "step": 370 }, { "epoch": 0.02505344735435596, "grad_norm": 0.216796875, "learning_rate": 5.01002004008016e-05, "loss": 1.1596, "step": 375 }, { "epoch": 0.025387493319080707, "grad_norm": 0.2119140625, "learning_rate": 5.0768203072812295e-05, "loss": 1.3151, "step": 380 }, { "epoch": 0.025721539283805453, "grad_norm": 0.2236328125, "learning_rate": 5.143620574482299e-05, "loss": 1.2643, "step": 385 }, { "epoch": 0.0260555852485302, "grad_norm": 0.2041015625, "learning_rate": 5.2104208416833665e-05, "loss": 1.2695, "step": 390 }, { "epoch": 0.026389631213254945, "grad_norm": 0.2041015625, "learning_rate": 5.277221108884436e-05, "loss": 1.2472, "step": 395 }, { "epoch": 0.02672367717797969, "grad_norm": 0.2314453125, "learning_rate": 5.344021376085505e-05, "loss": 1.3066, "step": 400 }, { "epoch": 0.027057723142704437, "grad_norm": 0.251953125, "learning_rate": 5.4108216432865734e-05, "loss": 1.2664, "step": 405 }, { "epoch": 0.027391769107429183, "grad_norm": 0.251953125, "learning_rate": 5.4776219104876426e-05, "loss": 1.221, "step": 410 }, { "epoch": 0.02772581507215393, "grad_norm": 0.201171875, "learning_rate": 5.5444221776887104e-05, "loss": 1.3246, "step": 415 }, { "epoch": 0.028059861036878676, "grad_norm": 0.22265625, "learning_rate": 5.6112224448897796e-05, "loss": 1.2621, "step": 420 }, { "epoch": 0.028393907001603422, "grad_norm": 0.2578125, "learning_rate": 5.6780227120908494e-05, "loss": 1.3305, "step": 425 }, { "epoch": 0.028727952966328168, "grad_norm": 0.2080078125, "learning_rate": 5.744822979291917e-05, "loss": 1.2464, "step": 430 }, { "epoch": 0.029061998931052914, "grad_norm": 0.2001953125, "learning_rate": 5.8116232464929865e-05, "loss": 1.284, "step": 435 }, { "epoch": 0.02939604489577766, "grad_norm": 0.2109375, "learning_rate": 5.878423513694055e-05, "loss": 1.3277, "step": 440 }, { "epoch": 0.029730090860502406, "grad_norm": 0.2216796875, "learning_rate": 5.945223780895124e-05, "loss": 1.2383, "step": 445 }, { "epoch": 0.030064136825227152, "grad_norm": 0.2080078125, "learning_rate": 6.012024048096193e-05, "loss": 1.2604, "step": 450 }, { "epoch": 0.0303981827899519, "grad_norm": 0.228515625, "learning_rate": 6.078824315297261e-05, "loss": 1.302, "step": 455 }, { "epoch": 0.030732228754676644, "grad_norm": 0.1982421875, "learning_rate": 6.145624582498331e-05, "loss": 1.2275, "step": 460 }, { "epoch": 0.03106627471940139, "grad_norm": 0.21484375, "learning_rate": 6.212424849699398e-05, "loss": 1.3504, "step": 465 }, { "epoch": 0.03140032068412613, "grad_norm": 0.2080078125, "learning_rate": 6.279225116900468e-05, "loss": 1.2612, "step": 470 }, { "epoch": 0.03173436664885088, "grad_norm": 0.23828125, "learning_rate": 6.346025384101537e-05, "loss": 1.2555, "step": 475 }, { "epoch": 0.032068412613575625, "grad_norm": 0.1943359375, "learning_rate": 6.412825651302605e-05, "loss": 1.2519, "step": 480 }, { "epoch": 0.032402458578300375, "grad_norm": 0.2255859375, "learning_rate": 6.479625918503675e-05, "loss": 1.2233, "step": 485 }, { "epoch": 0.03273650454302512, "grad_norm": 0.2080078125, "learning_rate": 6.546426185704742e-05, "loss": 1.2148, "step": 490 }, { "epoch": 0.03307055050774987, "grad_norm": 0.2021484375, "learning_rate": 6.613226452905812e-05, "loss": 1.2288, "step": 495 }, { "epoch": 0.03340459647247461, "grad_norm": 0.224609375, "learning_rate": 6.680026720106882e-05, "loss": 1.3175, "step": 500 }, { "epoch": 0.03373864243719936, "grad_norm": 0.193359375, "learning_rate": 6.746826987307949e-05, "loss": 1.2848, "step": 505 }, { "epoch": 0.0340726884019241, "grad_norm": 0.2412109375, "learning_rate": 6.813627254509019e-05, "loss": 1.2759, "step": 510 }, { "epoch": 0.03440673436664885, "grad_norm": 0.1943359375, "learning_rate": 6.880427521710087e-05, "loss": 1.3355, "step": 515 }, { "epoch": 0.034740780331373594, "grad_norm": 0.201171875, "learning_rate": 6.947227788911156e-05, "loss": 1.2873, "step": 520 }, { "epoch": 0.035074826296098344, "grad_norm": 0.21484375, "learning_rate": 7.014028056112226e-05, "loss": 1.27, "step": 525 }, { "epoch": 0.035408872260823086, "grad_norm": 0.2060546875, "learning_rate": 7.080828323313293e-05, "loss": 1.1939, "step": 530 }, { "epoch": 0.035742918225547836, "grad_norm": 0.212890625, "learning_rate": 7.147628590514363e-05, "loss": 1.2536, "step": 535 }, { "epoch": 0.03607696419027258, "grad_norm": 0.2001953125, "learning_rate": 7.214428857715431e-05, "loss": 1.276, "step": 540 }, { "epoch": 0.03641101015499733, "grad_norm": 0.2158203125, "learning_rate": 7.2812291249165e-05, "loss": 1.2621, "step": 545 }, { "epoch": 0.03674505611972207, "grad_norm": 0.2451171875, "learning_rate": 7.34802939211757e-05, "loss": 1.2361, "step": 550 }, { "epoch": 0.03707910208444682, "grad_norm": 0.2119140625, "learning_rate": 7.414829659318637e-05, "loss": 1.3944, "step": 555 }, { "epoch": 0.03741314804917156, "grad_norm": 0.2099609375, "learning_rate": 7.481629926519707e-05, "loss": 1.2375, "step": 560 }, { "epoch": 0.03774719401389631, "grad_norm": 0.2255859375, "learning_rate": 7.548430193720776e-05, "loss": 1.2653, "step": 565 }, { "epoch": 0.038081239978621055, "grad_norm": 0.208984375, "learning_rate": 7.615230460921844e-05, "loss": 1.2265, "step": 570 }, { "epoch": 0.038415285943345805, "grad_norm": 0.2001953125, "learning_rate": 7.682030728122913e-05, "loss": 1.263, "step": 575 }, { "epoch": 0.03874933190807055, "grad_norm": 0.2021484375, "learning_rate": 7.748830995323982e-05, "loss": 1.1832, "step": 580 }, { "epoch": 0.0390833778727953, "grad_norm": 0.2138671875, "learning_rate": 7.81563126252505e-05, "loss": 1.2563, "step": 585 }, { "epoch": 0.03941742383752004, "grad_norm": 0.1865234375, "learning_rate": 7.88243152972612e-05, "loss": 1.2236, "step": 590 }, { "epoch": 0.03975146980224479, "grad_norm": 0.1904296875, "learning_rate": 7.949231796927187e-05, "loss": 1.2766, "step": 595 }, { "epoch": 0.04008551576696953, "grad_norm": 0.1982421875, "learning_rate": 8.016032064128257e-05, "loss": 1.2377, "step": 600 }, { "epoch": 0.04041956173169428, "grad_norm": 0.2255859375, "learning_rate": 8.082832331329326e-05, "loss": 1.2083, "step": 605 }, { "epoch": 0.040753607696419024, "grad_norm": 0.201171875, "learning_rate": 8.149632598530394e-05, "loss": 1.3272, "step": 610 }, { "epoch": 0.04108765366114377, "grad_norm": 0.1953125, "learning_rate": 8.216432865731464e-05, "loss": 1.2428, "step": 615 }, { "epoch": 0.041421699625868516, "grad_norm": 0.2197265625, "learning_rate": 8.283233132932531e-05, "loss": 1.3088, "step": 620 }, { "epoch": 0.041755745590593266, "grad_norm": 0.1962890625, "learning_rate": 8.350033400133601e-05, "loss": 1.2174, "step": 625 }, { "epoch": 0.042089791555318015, "grad_norm": 0.2490234375, "learning_rate": 8.41683366733467e-05, "loss": 1.1424, "step": 630 }, { "epoch": 0.04242383752004276, "grad_norm": 0.234375, "learning_rate": 8.483633934535738e-05, "loss": 1.2562, "step": 635 }, { "epoch": 0.04275788348476751, "grad_norm": 0.1982421875, "learning_rate": 8.550434201736808e-05, "loss": 1.2747, "step": 640 }, { "epoch": 0.04309192944949225, "grad_norm": 0.1953125, "learning_rate": 8.617234468937875e-05, "loss": 1.2727, "step": 645 }, { "epoch": 0.043425975414217, "grad_norm": 0.1953125, "learning_rate": 8.684034736138945e-05, "loss": 1.2413, "step": 650 }, { "epoch": 0.04376002137894174, "grad_norm": 0.2158203125, "learning_rate": 8.750835003340014e-05, "loss": 1.2405, "step": 655 }, { "epoch": 0.04409406734366649, "grad_norm": 0.1875, "learning_rate": 8.817635270541082e-05, "loss": 1.2667, "step": 660 }, { "epoch": 0.044428113308391234, "grad_norm": 0.23828125, "learning_rate": 8.884435537742152e-05, "loss": 1.2387, "step": 665 }, { "epoch": 0.044762159273115984, "grad_norm": 0.193359375, "learning_rate": 8.95123580494322e-05, "loss": 1.1747, "step": 670 }, { "epoch": 0.04509620523784073, "grad_norm": 0.1962890625, "learning_rate": 9.018036072144289e-05, "loss": 1.1909, "step": 675 }, { "epoch": 0.045430251202565476, "grad_norm": 0.2470703125, "learning_rate": 9.084836339345357e-05, "loss": 1.2968, "step": 680 }, { "epoch": 0.04576429716729022, "grad_norm": 0.2041015625, "learning_rate": 9.151636606546426e-05, "loss": 1.2266, "step": 685 }, { "epoch": 0.04609834313201497, "grad_norm": 0.2216796875, "learning_rate": 9.218436873747496e-05, "loss": 1.1976, "step": 690 }, { "epoch": 0.04643238909673971, "grad_norm": 0.2080078125, "learning_rate": 9.285237140948564e-05, "loss": 1.2633, "step": 695 }, { "epoch": 0.04676643506146446, "grad_norm": 0.2041015625, "learning_rate": 9.352037408149633e-05, "loss": 1.1995, "step": 700 }, { "epoch": 0.0471004810261892, "grad_norm": 0.2021484375, "learning_rate": 9.418837675350703e-05, "loss": 1.2037, "step": 705 }, { "epoch": 0.04743452699091395, "grad_norm": 0.2060546875, "learning_rate": 9.48563794255177e-05, "loss": 1.2941, "step": 710 }, { "epoch": 0.047768572955638695, "grad_norm": 0.1923828125, "learning_rate": 9.55243820975284e-05, "loss": 1.1902, "step": 715 }, { "epoch": 0.048102618920363445, "grad_norm": 0.205078125, "learning_rate": 9.619238476953908e-05, "loss": 1.214, "step": 720 }, { "epoch": 0.04843666488508819, "grad_norm": 0.1865234375, "learning_rate": 9.686038744154977e-05, "loss": 1.1536, "step": 725 }, { "epoch": 0.04877071084981294, "grad_norm": 0.189453125, "learning_rate": 9.752839011356047e-05, "loss": 1.2421, "step": 730 }, { "epoch": 0.04910475681453768, "grad_norm": 0.2138671875, "learning_rate": 9.819639278557115e-05, "loss": 1.2786, "step": 735 }, { "epoch": 0.04943880277926243, "grad_norm": 0.2001953125, "learning_rate": 9.886439545758184e-05, "loss": 1.2637, "step": 740 }, { "epoch": 0.04977284874398717, "grad_norm": 0.197265625, "learning_rate": 9.953239812959252e-05, "loss": 1.2836, "step": 745 }, { "epoch": 0.05010689470871192, "grad_norm": 0.1826171875, "learning_rate": 0.0001002004008016032, "loss": 1.281, "step": 750 }, { "epoch": 0.050440940673436664, "grad_norm": 0.1904296875, "learning_rate": 0.0001008684034736139, "loss": 1.2277, "step": 755 }, { "epoch": 0.050774986638161414, "grad_norm": 0.2265625, "learning_rate": 0.00010153640614562459, "loss": 1.2501, "step": 760 }, { "epoch": 0.051109032602886156, "grad_norm": 0.181640625, "learning_rate": 0.00010220440881763526, "loss": 1.294, "step": 765 }, { "epoch": 0.051443078567610906, "grad_norm": 0.2099609375, "learning_rate": 0.00010287241148964597, "loss": 1.2892, "step": 770 }, { "epoch": 0.05177712453233565, "grad_norm": 0.1845703125, "learning_rate": 0.00010354041416165665, "loss": 1.2675, "step": 775 }, { "epoch": 0.0521111704970604, "grad_norm": 0.2099609375, "learning_rate": 0.00010420841683366733, "loss": 1.2157, "step": 780 }, { "epoch": 0.05244521646178514, "grad_norm": 0.1884765625, "learning_rate": 0.00010487641950567804, "loss": 1.2635, "step": 785 }, { "epoch": 0.05277926242650989, "grad_norm": 0.17578125, "learning_rate": 0.00010554442217768871, "loss": 1.2452, "step": 790 }, { "epoch": 0.05311330839123463, "grad_norm": 0.1953125, "learning_rate": 0.0001062124248496994, "loss": 1.1851, "step": 795 }, { "epoch": 0.05344735435595938, "grad_norm": 0.251953125, "learning_rate": 0.0001068804275217101, "loss": 1.2847, "step": 800 }, { "epoch": 0.053781400320684125, "grad_norm": 0.201171875, "learning_rate": 0.00010754843019372078, "loss": 1.2923, "step": 805 }, { "epoch": 0.054115446285408875, "grad_norm": 0.1962890625, "learning_rate": 0.00010821643286573147, "loss": 1.1858, "step": 810 }, { "epoch": 0.05444949225013362, "grad_norm": 0.20703125, "learning_rate": 0.00010888443553774215, "loss": 1.2945, "step": 815 }, { "epoch": 0.05478353821485837, "grad_norm": 0.1982421875, "learning_rate": 0.00010955243820975285, "loss": 1.3139, "step": 820 }, { "epoch": 0.05511758417958311, "grad_norm": 0.173828125, "learning_rate": 0.00011022044088176354, "loss": 1.1953, "step": 825 }, { "epoch": 0.05545163014430786, "grad_norm": 0.19921875, "learning_rate": 0.00011088844355377421, "loss": 1.2724, "step": 830 }, { "epoch": 0.0557856761090326, "grad_norm": 0.1904296875, "learning_rate": 0.00011155644622578492, "loss": 1.3072, "step": 835 }, { "epoch": 0.05611972207375735, "grad_norm": 0.2255859375, "learning_rate": 0.00011222444889779559, "loss": 1.2644, "step": 840 }, { "epoch": 0.056453768038482094, "grad_norm": 0.1904296875, "learning_rate": 0.00011289245156980628, "loss": 1.2475, "step": 845 }, { "epoch": 0.056787814003206843, "grad_norm": 0.1796875, "learning_rate": 0.00011356045424181699, "loss": 1.1897, "step": 850 }, { "epoch": 0.057121859967931586, "grad_norm": 0.2080078125, "learning_rate": 0.00011422845691382766, "loss": 1.2661, "step": 855 }, { "epoch": 0.057455905932656336, "grad_norm": 0.2294921875, "learning_rate": 0.00011489645958583835, "loss": 1.2381, "step": 860 }, { "epoch": 0.05778995189738108, "grad_norm": 0.173828125, "learning_rate": 0.00011556446225784903, "loss": 1.2789, "step": 865 }, { "epoch": 0.05812399786210583, "grad_norm": 0.201171875, "learning_rate": 0.00011623246492985973, "loss": 1.2097, "step": 870 }, { "epoch": 0.05845804382683057, "grad_norm": 0.185546875, "learning_rate": 0.00011690046760187041, "loss": 1.2372, "step": 875 }, { "epoch": 0.05879208979155532, "grad_norm": 0.1982421875, "learning_rate": 0.0001175684702738811, "loss": 1.2229, "step": 880 }, { "epoch": 0.05912613575628006, "grad_norm": 0.1748046875, "learning_rate": 0.0001182364729458918, "loss": 1.2605, "step": 885 }, { "epoch": 0.05946018172100481, "grad_norm": 0.173828125, "learning_rate": 0.00011890447561790248, "loss": 1.2223, "step": 890 }, { "epoch": 0.059794227685729555, "grad_norm": 0.1708984375, "learning_rate": 0.00011957247828991315, "loss": 1.2466, "step": 895 }, { "epoch": 0.060128273650454304, "grad_norm": 0.2216796875, "learning_rate": 0.00012024048096192387, "loss": 1.2771, "step": 900 }, { "epoch": 0.06046231961517905, "grad_norm": 0.2109375, "learning_rate": 0.00012090848363393454, "loss": 1.2373, "step": 905 }, { "epoch": 0.0607963655799038, "grad_norm": 0.220703125, "learning_rate": 0.00012157648630594522, "loss": 1.2916, "step": 910 }, { "epoch": 0.06113041154462854, "grad_norm": 0.2021484375, "learning_rate": 0.0001222444889779559, "loss": 1.2504, "step": 915 }, { "epoch": 0.06146445750935329, "grad_norm": 0.1865234375, "learning_rate": 0.00012291249164996662, "loss": 1.2714, "step": 920 }, { "epoch": 0.06179850347407803, "grad_norm": 0.1904296875, "learning_rate": 0.0001235804943219773, "loss": 1.2904, "step": 925 }, { "epoch": 0.06213254943880278, "grad_norm": 0.17578125, "learning_rate": 0.00012424849699398796, "loss": 1.254, "step": 930 }, { "epoch": 0.062466595403527524, "grad_norm": 0.2373046875, "learning_rate": 0.0001249164996659987, "loss": 1.3173, "step": 935 }, { "epoch": 0.06280064136825227, "grad_norm": 0.189453125, "learning_rate": 0.00012558450233800936, "loss": 1.2083, "step": 940 }, { "epoch": 0.06313468733297702, "grad_norm": 0.1845703125, "learning_rate": 0.00012625250501002003, "loss": 1.2594, "step": 945 }, { "epoch": 0.06346873329770177, "grad_norm": 0.189453125, "learning_rate": 0.00012692050768203073, "loss": 1.2027, "step": 950 }, { "epoch": 0.06380277926242651, "grad_norm": 0.171875, "learning_rate": 0.00012758851035404143, "loss": 1.1886, "step": 955 }, { "epoch": 0.06413682522715125, "grad_norm": 0.1767578125, "learning_rate": 0.0001282565130260521, "loss": 1.1855, "step": 960 }, { "epoch": 0.064470871191876, "grad_norm": 0.1787109375, "learning_rate": 0.0001289245156980628, "loss": 1.2326, "step": 965 }, { "epoch": 0.06480491715660075, "grad_norm": 0.19140625, "learning_rate": 0.0001295925183700735, "loss": 1.2502, "step": 970 }, { "epoch": 0.0651389631213255, "grad_norm": 0.18359375, "learning_rate": 0.00013026052104208417, "loss": 1.1563, "step": 975 }, { "epoch": 0.06547300908605024, "grad_norm": 0.1748046875, "learning_rate": 0.00013092852371409484, "loss": 1.2203, "step": 980 }, { "epoch": 0.06580705505077498, "grad_norm": 0.19140625, "learning_rate": 0.00013159652638610557, "loss": 1.3099, "step": 985 }, { "epoch": 0.06614110101549973, "grad_norm": 0.1826171875, "learning_rate": 0.00013226452905811624, "loss": 1.2904, "step": 990 }, { "epoch": 0.06647514698022448, "grad_norm": 0.197265625, "learning_rate": 0.0001329325317301269, "loss": 1.2602, "step": 995 }, { "epoch": 0.06680919294494922, "grad_norm": 0.1650390625, "learning_rate": 0.00013360053440213764, "loss": 1.1857, "step": 1000 }, { "epoch": 0.06714323890967397, "grad_norm": 0.19921875, "learning_rate": 0.0001342685370741483, "loss": 1.2519, "step": 1005 }, { "epoch": 0.06747728487439872, "grad_norm": 0.17578125, "learning_rate": 0.00013493653974615898, "loss": 1.1755, "step": 1010 }, { "epoch": 0.06781133083912347, "grad_norm": 0.173828125, "learning_rate": 0.00013560454241816968, "loss": 1.2496, "step": 1015 }, { "epoch": 0.0681453768038482, "grad_norm": 0.1650390625, "learning_rate": 0.00013627254509018038, "loss": 1.2386, "step": 1020 }, { "epoch": 0.06847942276857295, "grad_norm": 0.1728515625, "learning_rate": 0.00013694054776219105, "loss": 1.2811, "step": 1025 }, { "epoch": 0.0688134687332977, "grad_norm": 0.1787109375, "learning_rate": 0.00013760855043420175, "loss": 1.2174, "step": 1030 }, { "epoch": 0.06914751469802245, "grad_norm": 0.21875, "learning_rate": 0.00013827655310621244, "loss": 1.2521, "step": 1035 }, { "epoch": 0.06948156066274719, "grad_norm": 0.201171875, "learning_rate": 0.00013894455577822312, "loss": 1.2147, "step": 1040 }, { "epoch": 0.06981560662747194, "grad_norm": 0.275390625, "learning_rate": 0.0001396125584502338, "loss": 1.2223, "step": 1045 }, { "epoch": 0.07014965259219669, "grad_norm": 0.177734375, "learning_rate": 0.0001402805611222445, "loss": 1.1894, "step": 1050 }, { "epoch": 0.07048369855692144, "grad_norm": 0.193359375, "learning_rate": 0.00014094856379425518, "loss": 1.2567, "step": 1055 }, { "epoch": 0.07081774452164617, "grad_norm": 0.19921875, "learning_rate": 0.00014161656646626586, "loss": 1.2306, "step": 1060 }, { "epoch": 0.07115179048637092, "grad_norm": 0.1806640625, "learning_rate": 0.00014228456913827658, "loss": 1.19, "step": 1065 }, { "epoch": 0.07148583645109567, "grad_norm": 0.1796875, "learning_rate": 0.00014295257181028725, "loss": 1.2903, "step": 1070 }, { "epoch": 0.07181988241582042, "grad_norm": 0.171875, "learning_rate": 0.00014362057448229792, "loss": 1.1799, "step": 1075 }, { "epoch": 0.07215392838054516, "grad_norm": 0.1748046875, "learning_rate": 0.00014428857715430862, "loss": 1.2499, "step": 1080 }, { "epoch": 0.0724879743452699, "grad_norm": 0.189453125, "learning_rate": 0.00014495657982631932, "loss": 1.256, "step": 1085 }, { "epoch": 0.07282202030999466, "grad_norm": 0.193359375, "learning_rate": 0.00014562458249833, "loss": 1.3308, "step": 1090 }, { "epoch": 0.0731560662747194, "grad_norm": 0.1875, "learning_rate": 0.0001462925851703407, "loss": 1.2816, "step": 1095 }, { "epoch": 0.07349011223944414, "grad_norm": 0.16796875, "learning_rate": 0.0001469605878423514, "loss": 1.2938, "step": 1100 }, { "epoch": 0.07382415820416889, "grad_norm": 0.1875, "learning_rate": 0.00014762859051436206, "loss": 1.3056, "step": 1105 }, { "epoch": 0.07415820416889364, "grad_norm": 0.1533203125, "learning_rate": 0.00014829659318637273, "loss": 1.2525, "step": 1110 }, { "epoch": 0.07449225013361839, "grad_norm": 0.1923828125, "learning_rate": 0.00014896459585838346, "loss": 1.1993, "step": 1115 }, { "epoch": 0.07482629609834313, "grad_norm": 0.193359375, "learning_rate": 0.00014963259853039413, "loss": 1.2613, "step": 1120 }, { "epoch": 0.07516034206306788, "grad_norm": 0.1875, "learning_rate": 0.0001503006012024048, "loss": 1.2884, "step": 1125 }, { "epoch": 0.07549438802779262, "grad_norm": 0.1875, "learning_rate": 0.00015096860387441553, "loss": 1.2162, "step": 1130 }, { "epoch": 0.07582843399251737, "grad_norm": 0.158203125, "learning_rate": 0.0001516366065464262, "loss": 1.2131, "step": 1135 }, { "epoch": 0.07616247995724211, "grad_norm": 0.1728515625, "learning_rate": 0.00015230460921843687, "loss": 1.2212, "step": 1140 }, { "epoch": 0.07649652592196686, "grad_norm": 0.1640625, "learning_rate": 0.00015297261189044757, "loss": 1.1528, "step": 1145 }, { "epoch": 0.07683057188669161, "grad_norm": 0.16796875, "learning_rate": 0.00015364061456245827, "loss": 1.125, "step": 1150 }, { "epoch": 0.07716461785141636, "grad_norm": 0.1943359375, "learning_rate": 0.00015430861723446894, "loss": 1.2868, "step": 1155 }, { "epoch": 0.0774986638161411, "grad_norm": 0.216796875, "learning_rate": 0.00015497661990647964, "loss": 1.2277, "step": 1160 }, { "epoch": 0.07783270978086584, "grad_norm": 0.1728515625, "learning_rate": 0.00015564462257849034, "loss": 1.2085, "step": 1165 }, { "epoch": 0.0781667557455906, "grad_norm": 0.18359375, "learning_rate": 0.000156312625250501, "loss": 1.1623, "step": 1170 }, { "epoch": 0.07850080171031534, "grad_norm": 0.2138671875, "learning_rate": 0.00015698062792251168, "loss": 1.2236, "step": 1175 }, { "epoch": 0.07883484767504008, "grad_norm": 0.1630859375, "learning_rate": 0.0001576486305945224, "loss": 1.2148, "step": 1180 }, { "epoch": 0.07916889363976483, "grad_norm": 0.1630859375, "learning_rate": 0.00015831663326653308, "loss": 1.2213, "step": 1185 }, { "epoch": 0.07950293960448958, "grad_norm": 0.21875, "learning_rate": 0.00015898463593854375, "loss": 1.1879, "step": 1190 }, { "epoch": 0.07983698556921433, "grad_norm": 0.16796875, "learning_rate": 0.00015965263861055445, "loss": 1.2233, "step": 1195 }, { "epoch": 0.08017103153393906, "grad_norm": 0.1650390625, "learning_rate": 0.00016032064128256515, "loss": 1.2497, "step": 1200 }, { "epoch": 0.08050507749866381, "grad_norm": 0.16015625, "learning_rate": 0.00016098864395457582, "loss": 1.1905, "step": 1205 }, { "epoch": 0.08083912346338856, "grad_norm": 0.1796875, "learning_rate": 0.00016165664662658652, "loss": 1.1979, "step": 1210 }, { "epoch": 0.08117316942811331, "grad_norm": 0.1884765625, "learning_rate": 0.00016232464929859721, "loss": 1.2394, "step": 1215 }, { "epoch": 0.08150721539283805, "grad_norm": 0.1787109375, "learning_rate": 0.0001629926519706079, "loss": 1.2193, "step": 1220 }, { "epoch": 0.0818412613575628, "grad_norm": 0.185546875, "learning_rate": 0.00016366065464261859, "loss": 1.169, "step": 1225 }, { "epoch": 0.08217530732228755, "grad_norm": 0.1728515625, "learning_rate": 0.00016432865731462928, "loss": 1.2845, "step": 1230 }, { "epoch": 0.0825093532870123, "grad_norm": 0.1630859375, "learning_rate": 0.00016499665998663996, "loss": 1.298, "step": 1235 }, { "epoch": 0.08284339925173703, "grad_norm": 0.16796875, "learning_rate": 0.00016566466265865063, "loss": 1.2208, "step": 1240 }, { "epoch": 0.08317744521646178, "grad_norm": 0.181640625, "learning_rate": 0.00016633266533066135, "loss": 1.2539, "step": 1245 }, { "epoch": 0.08351149118118653, "grad_norm": 0.1728515625, "learning_rate": 0.00016700066800267202, "loss": 1.2138, "step": 1250 }, { "epoch": 0.08384553714591128, "grad_norm": 0.17578125, "learning_rate": 0.0001676686706746827, "loss": 1.2358, "step": 1255 }, { "epoch": 0.08417958311063603, "grad_norm": 0.177734375, "learning_rate": 0.0001683366733466934, "loss": 1.2247, "step": 1260 }, { "epoch": 0.08451362907536077, "grad_norm": 0.1591796875, "learning_rate": 0.0001690046760187041, "loss": 1.3043, "step": 1265 }, { "epoch": 0.08484767504008552, "grad_norm": 0.193359375, "learning_rate": 0.00016967267869071476, "loss": 1.1915, "step": 1270 }, { "epoch": 0.08518172100481027, "grad_norm": 0.171875, "learning_rate": 0.00017034068136272546, "loss": 1.151, "step": 1275 }, { "epoch": 0.08551576696953501, "grad_norm": 0.1669921875, "learning_rate": 0.00017100868403473616, "loss": 1.2174, "step": 1280 }, { "epoch": 0.08584981293425975, "grad_norm": 0.162109375, "learning_rate": 0.00017167668670674683, "loss": 1.2237, "step": 1285 }, { "epoch": 0.0861838588989845, "grad_norm": 0.171875, "learning_rate": 0.0001723446893787575, "loss": 1.2163, "step": 1290 }, { "epoch": 0.08651790486370925, "grad_norm": 0.1884765625, "learning_rate": 0.00017301269205076823, "loss": 1.3327, "step": 1295 }, { "epoch": 0.086851950828434, "grad_norm": 0.173828125, "learning_rate": 0.0001736806947227789, "loss": 1.2554, "step": 1300 }, { "epoch": 0.08718599679315873, "grad_norm": 0.158203125, "learning_rate": 0.00017434869739478957, "loss": 1.2164, "step": 1305 }, { "epoch": 0.08752004275788348, "grad_norm": 0.1611328125, "learning_rate": 0.00017501670006680027, "loss": 1.1899, "step": 1310 }, { "epoch": 0.08785408872260823, "grad_norm": 0.171875, "learning_rate": 0.00017568470273881097, "loss": 1.2598, "step": 1315 }, { "epoch": 0.08818813468733298, "grad_norm": 0.1748046875, "learning_rate": 0.00017635270541082164, "loss": 1.2093, "step": 1320 }, { "epoch": 0.08852218065205772, "grad_norm": 0.1591796875, "learning_rate": 0.00017702070808283234, "loss": 1.1936, "step": 1325 }, { "epoch": 0.08885622661678247, "grad_norm": 0.1845703125, "learning_rate": 0.00017768871075484304, "loss": 1.3092, "step": 1330 }, { "epoch": 0.08919027258150722, "grad_norm": 0.16796875, "learning_rate": 0.0001783567134268537, "loss": 1.2603, "step": 1335 }, { "epoch": 0.08952431854623197, "grad_norm": 0.169921875, "learning_rate": 0.0001790247160988644, "loss": 1.1828, "step": 1340 }, { "epoch": 0.0898583645109567, "grad_norm": 0.19140625, "learning_rate": 0.0001796927187708751, "loss": 1.2985, "step": 1345 }, { "epoch": 0.09019241047568145, "grad_norm": 0.16796875, "learning_rate": 0.00018036072144288578, "loss": 1.1845, "step": 1350 }, { "epoch": 0.0905264564404062, "grad_norm": 0.2333984375, "learning_rate": 0.00018102872411489645, "loss": 1.1018, "step": 1355 }, { "epoch": 0.09086050240513095, "grad_norm": 0.1748046875, "learning_rate": 0.00018169672678690715, "loss": 1.1901, "step": 1360 }, { "epoch": 0.09119454836985569, "grad_norm": 0.1689453125, "learning_rate": 0.00018236472945891785, "loss": 1.2401, "step": 1365 }, { "epoch": 0.09152859433458044, "grad_norm": 0.1669921875, "learning_rate": 0.00018303273213092852, "loss": 1.2479, "step": 1370 }, { "epoch": 0.09186264029930519, "grad_norm": 0.1650390625, "learning_rate": 0.00018370073480293922, "loss": 1.1413, "step": 1375 }, { "epoch": 0.09219668626402994, "grad_norm": 0.21875, "learning_rate": 0.00018436873747494992, "loss": 1.2119, "step": 1380 }, { "epoch": 0.09253073222875467, "grad_norm": 0.1650390625, "learning_rate": 0.0001850367401469606, "loss": 1.2831, "step": 1385 }, { "epoch": 0.09286477819347942, "grad_norm": 0.158203125, "learning_rate": 0.0001857047428189713, "loss": 1.2524, "step": 1390 }, { "epoch": 0.09319882415820417, "grad_norm": 0.1943359375, "learning_rate": 0.00018637274549098199, "loss": 1.2294, "step": 1395 }, { "epoch": 0.09353287012292892, "grad_norm": 0.2041015625, "learning_rate": 0.00018704074816299266, "loss": 1.1901, "step": 1400 }, { "epoch": 0.09386691608765366, "grad_norm": 0.2021484375, "learning_rate": 0.00018770875083500336, "loss": 1.2334, "step": 1405 }, { "epoch": 0.0942009620523784, "grad_norm": 0.1923828125, "learning_rate": 0.00018837675350701405, "loss": 1.2637, "step": 1410 }, { "epoch": 0.09453500801710316, "grad_norm": 0.171875, "learning_rate": 0.00018904475617902473, "loss": 1.2487, "step": 1415 }, { "epoch": 0.0948690539818279, "grad_norm": 0.197265625, "learning_rate": 0.0001897127588510354, "loss": 1.2222, "step": 1420 }, { "epoch": 0.09520309994655264, "grad_norm": 0.1533203125, "learning_rate": 0.0001903807615230461, "loss": 1.2418, "step": 1425 }, { "epoch": 0.09553714591127739, "grad_norm": 0.173828125, "learning_rate": 0.0001910487641950568, "loss": 1.2197, "step": 1430 }, { "epoch": 0.09587119187600214, "grad_norm": 0.1728515625, "learning_rate": 0.00019171676686706747, "loss": 1.1918, "step": 1435 }, { "epoch": 0.09620523784072689, "grad_norm": 0.173828125, "learning_rate": 0.00019238476953907816, "loss": 1.2533, "step": 1440 }, { "epoch": 0.09653928380545163, "grad_norm": 0.1708984375, "learning_rate": 0.00019305277221108886, "loss": 1.2039, "step": 1445 }, { "epoch": 0.09687332977017638, "grad_norm": 0.169921875, "learning_rate": 0.00019372077488309953, "loss": 1.218, "step": 1450 }, { "epoch": 0.09720737573490112, "grad_norm": 0.2060546875, "learning_rate": 0.00019438877755511023, "loss": 1.2209, "step": 1455 }, { "epoch": 0.09754142169962587, "grad_norm": 0.173828125, "learning_rate": 0.00019505678022712093, "loss": 1.1822, "step": 1460 }, { "epoch": 0.09787546766435061, "grad_norm": 0.1650390625, "learning_rate": 0.0001957247828991316, "loss": 1.2394, "step": 1465 }, { "epoch": 0.09820951362907536, "grad_norm": 0.2001953125, "learning_rate": 0.0001963927855711423, "loss": 1.2377, "step": 1470 }, { "epoch": 0.09854355959380011, "grad_norm": 0.171875, "learning_rate": 0.00019706078824315297, "loss": 1.2419, "step": 1475 }, { "epoch": 0.09887760555852486, "grad_norm": 0.17578125, "learning_rate": 0.00019772879091516367, "loss": 1.2013, "step": 1480 }, { "epoch": 0.0992116515232496, "grad_norm": 0.158203125, "learning_rate": 0.00019839679358717434, "loss": 1.2789, "step": 1485 }, { "epoch": 0.09954569748797434, "grad_norm": 0.1611328125, "learning_rate": 0.00019906479625918504, "loss": 1.2002, "step": 1490 }, { "epoch": 0.0998797434526991, "grad_norm": 0.158203125, "learning_rate": 0.00019973279893119574, "loss": 1.2699, "step": 1495 }, { "epoch": 0.10021378941742384, "grad_norm": 0.16796875, "learning_rate": 0.00019999997552557096, "loss": 1.2202, "step": 1500 }, { "epoch": 0.10054783538214858, "grad_norm": 0.1669921875, "learning_rate": 0.00019999982595965903, "loss": 1.1978, "step": 1505 }, { "epoch": 0.10088188134687333, "grad_norm": 0.1767578125, "learning_rate": 0.00019999954042494334, "loss": 1.1724, "step": 1510 }, { "epoch": 0.10121592731159808, "grad_norm": 0.1669921875, "learning_rate": 0.00019999911892181214, "loss": 1.1925, "step": 1515 }, { "epoch": 0.10154997327632283, "grad_norm": 0.1845703125, "learning_rate": 0.0001999985614508385, "loss": 1.194, "step": 1520 }, { "epoch": 0.10188401924104756, "grad_norm": 0.1826171875, "learning_rate": 0.0001999978680127804, "loss": 1.2429, "step": 1525 }, { "epoch": 0.10221806520577231, "grad_norm": 0.1640625, "learning_rate": 0.00019999703860858073, "loss": 1.2303, "step": 1530 }, { "epoch": 0.10255211117049706, "grad_norm": 0.15234375, "learning_rate": 0.0001999960732393672, "loss": 1.2445, "step": 1535 }, { "epoch": 0.10288615713522181, "grad_norm": 0.1572265625, "learning_rate": 0.0001999949719064525, "loss": 1.2214, "step": 1540 }, { "epoch": 0.10322020309994655, "grad_norm": 0.19140625, "learning_rate": 0.00019999373461133398, "loss": 1.3003, "step": 1545 }, { "epoch": 0.1035542490646713, "grad_norm": 0.16015625, "learning_rate": 0.00019999236135569408, "loss": 1.1941, "step": 1550 }, { "epoch": 0.10388829502939605, "grad_norm": 0.1728515625, "learning_rate": 0.00019999085214139994, "loss": 1.1791, "step": 1555 }, { "epoch": 0.1042223409941208, "grad_norm": 0.16796875, "learning_rate": 0.00019998920697050364, "loss": 1.2118, "step": 1560 }, { "epoch": 0.10455638695884553, "grad_norm": 0.181640625, "learning_rate": 0.0001999874258452421, "loss": 1.252, "step": 1565 }, { "epoch": 0.10489043292357028, "grad_norm": 0.1572265625, "learning_rate": 0.00019998550876803708, "loss": 1.1598, "step": 1570 }, { "epoch": 0.10522447888829503, "grad_norm": 0.169921875, "learning_rate": 0.00019998345574149526, "loss": 1.2936, "step": 1575 }, { "epoch": 0.10555852485301978, "grad_norm": 0.15625, "learning_rate": 0.0001999812667684081, "loss": 1.2732, "step": 1580 }, { "epoch": 0.10589257081774452, "grad_norm": 0.169921875, "learning_rate": 0.0001999789418517519, "loss": 1.2097, "step": 1585 }, { "epoch": 0.10622661678246927, "grad_norm": 0.162109375, "learning_rate": 0.00019997648099468786, "loss": 1.3343, "step": 1590 }, { "epoch": 0.10656066274719402, "grad_norm": 0.205078125, "learning_rate": 0.000199973884200562, "loss": 1.2659, "step": 1595 }, { "epoch": 0.10689470871191876, "grad_norm": 0.166015625, "learning_rate": 0.00019997115147290506, "loss": 1.2585, "step": 1600 }, { "epoch": 0.1072287546766435, "grad_norm": 0.1630859375, "learning_rate": 0.0001999682828154328, "loss": 1.2175, "step": 1605 }, { "epoch": 0.10756280064136825, "grad_norm": 0.154296875, "learning_rate": 0.00019996527823204567, "loss": 1.208, "step": 1610 }, { "epoch": 0.107896846606093, "grad_norm": 0.154296875, "learning_rate": 0.000199962137726829, "loss": 1.2164, "step": 1615 }, { "epoch": 0.10823089257081775, "grad_norm": 0.169921875, "learning_rate": 0.00019995886130405287, "loss": 1.2604, "step": 1620 }, { "epoch": 0.10856493853554249, "grad_norm": 0.1640625, "learning_rate": 0.00019995544896817222, "loss": 1.276, "step": 1625 }, { "epoch": 0.10889898450026723, "grad_norm": 0.171875, "learning_rate": 0.00019995190072382677, "loss": 1.2216, "step": 1630 }, { "epoch": 0.10923303046499198, "grad_norm": 0.1513671875, "learning_rate": 0.000199948216575841, "loss": 1.2231, "step": 1635 }, { "epoch": 0.10956707642971673, "grad_norm": 0.1904296875, "learning_rate": 0.0001999443965292243, "loss": 1.1956, "step": 1640 }, { "epoch": 0.10990112239444147, "grad_norm": 0.1630859375, "learning_rate": 0.00019994044058917063, "loss": 1.2923, "step": 1645 }, { "epoch": 0.11023516835916622, "grad_norm": 0.193359375, "learning_rate": 0.00019993634876105896, "loss": 1.2428, "step": 1650 }, { "epoch": 0.11056921432389097, "grad_norm": 0.15625, "learning_rate": 0.0001999321210504528, "loss": 1.1977, "step": 1655 }, { "epoch": 0.11090326028861572, "grad_norm": 0.169921875, "learning_rate": 0.00019992775746310062, "loss": 1.201, "step": 1660 }, { "epoch": 0.11123730625334045, "grad_norm": 0.162109375, "learning_rate": 0.00019992325800493547, "loss": 1.2272, "step": 1665 }, { "epoch": 0.1115713522180652, "grad_norm": 0.1669921875, "learning_rate": 0.00019991862268207527, "loss": 1.1801, "step": 1670 }, { "epoch": 0.11190539818278995, "grad_norm": 0.1669921875, "learning_rate": 0.00019991385150082265, "loss": 1.2178, "step": 1675 }, { "epoch": 0.1122394441475147, "grad_norm": 0.1572265625, "learning_rate": 0.00019990894446766485, "loss": 1.2739, "step": 1680 }, { "epoch": 0.11257349011223944, "grad_norm": 0.185546875, "learning_rate": 0.00019990390158927402, "loss": 1.2091, "step": 1685 }, { "epoch": 0.11290753607696419, "grad_norm": 0.1611328125, "learning_rate": 0.00019989872287250684, "loss": 1.1765, "step": 1690 }, { "epoch": 0.11324158204168894, "grad_norm": 0.1611328125, "learning_rate": 0.00019989340832440478, "loss": 1.2185, "step": 1695 }, { "epoch": 0.11357562800641369, "grad_norm": 0.18359375, "learning_rate": 0.00019988795795219396, "loss": 1.2335, "step": 1700 }, { "epoch": 0.11390967397113842, "grad_norm": 0.158203125, "learning_rate": 0.00019988237176328527, "loss": 1.2053, "step": 1705 }, { "epoch": 0.11424371993586317, "grad_norm": 0.15625, "learning_rate": 0.00019987664976527412, "loss": 1.2055, "step": 1710 }, { "epoch": 0.11457776590058792, "grad_norm": 0.1611328125, "learning_rate": 0.00019987079196594069, "loss": 1.25, "step": 1715 }, { "epoch": 0.11491181186531267, "grad_norm": 0.1767578125, "learning_rate": 0.0001998647983732498, "loss": 1.2435, "step": 1720 }, { "epoch": 0.11524585783003741, "grad_norm": 0.1572265625, "learning_rate": 0.00019985866899535079, "loss": 1.16, "step": 1725 }, { "epoch": 0.11557990379476216, "grad_norm": 0.1611328125, "learning_rate": 0.0001998524038405778, "loss": 1.2465, "step": 1730 }, { "epoch": 0.1159139497594869, "grad_norm": 0.173828125, "learning_rate": 0.00019984600291744948, "loss": 1.2855, "step": 1735 }, { "epoch": 0.11624799572421166, "grad_norm": 0.166015625, "learning_rate": 0.0001998394662346691, "loss": 1.2028, "step": 1740 }, { "epoch": 0.11658204168893639, "grad_norm": 0.154296875, "learning_rate": 0.00019983279380112454, "loss": 1.2496, "step": 1745 }, { "epoch": 0.11691608765366114, "grad_norm": 0.2021484375, "learning_rate": 0.00019982598562588822, "loss": 1.2577, "step": 1750 }, { "epoch": 0.11725013361838589, "grad_norm": 0.1591796875, "learning_rate": 0.00019981904171821716, "loss": 1.2954, "step": 1755 }, { "epoch": 0.11758417958311064, "grad_norm": 0.17578125, "learning_rate": 0.0001998119620875529, "loss": 1.2704, "step": 1760 }, { "epoch": 0.11791822554783538, "grad_norm": 0.1533203125, "learning_rate": 0.0001998047467435216, "loss": 1.2034, "step": 1765 }, { "epoch": 0.11825227151256013, "grad_norm": 0.1611328125, "learning_rate": 0.00019979739569593385, "loss": 1.2341, "step": 1770 }, { "epoch": 0.11858631747728487, "grad_norm": 0.173828125, "learning_rate": 0.00019978990895478483, "loss": 1.2971, "step": 1775 }, { "epoch": 0.11892036344200962, "grad_norm": 0.1640625, "learning_rate": 0.00019978228653025416, "loss": 1.2409, "step": 1780 }, { "epoch": 0.11925440940673436, "grad_norm": 0.1650390625, "learning_rate": 0.000199774528432706, "loss": 1.2282, "step": 1785 }, { "epoch": 0.11958845537145911, "grad_norm": 0.1630859375, "learning_rate": 0.00019976663467268893, "loss": 1.2715, "step": 1790 }, { "epoch": 0.11992250133618386, "grad_norm": 0.162109375, "learning_rate": 0.00019975860526093604, "loss": 1.2379, "step": 1795 }, { "epoch": 0.12025654730090861, "grad_norm": 0.1689453125, "learning_rate": 0.00019975044020836485, "loss": 1.2745, "step": 1800 }, { "epoch": 0.12059059326563334, "grad_norm": 0.17578125, "learning_rate": 0.0001997421395260773, "loss": 1.2342, "step": 1805 }, { "epoch": 0.1209246392303581, "grad_norm": 0.1748046875, "learning_rate": 0.00019973370322535976, "loss": 1.2811, "step": 1810 }, { "epoch": 0.12125868519508284, "grad_norm": 0.1669921875, "learning_rate": 0.00019972513131768298, "loss": 1.2948, "step": 1815 }, { "epoch": 0.1215927311598076, "grad_norm": 0.16796875, "learning_rate": 0.0001997164238147021, "loss": 1.2323, "step": 1820 }, { "epoch": 0.12192677712453233, "grad_norm": 0.162109375, "learning_rate": 0.00019970758072825658, "loss": 1.2297, "step": 1825 }, { "epoch": 0.12226082308925708, "grad_norm": 0.158203125, "learning_rate": 0.00019969860207037034, "loss": 1.1426, "step": 1830 }, { "epoch": 0.12259486905398183, "grad_norm": 0.1572265625, "learning_rate": 0.00019968948785325158, "loss": 1.3128, "step": 1835 }, { "epoch": 0.12292891501870658, "grad_norm": 0.166015625, "learning_rate": 0.00019968023808929276, "loss": 1.247, "step": 1840 }, { "epoch": 0.12326296098343131, "grad_norm": 0.17578125, "learning_rate": 0.00019967085279107077, "loss": 1.1878, "step": 1845 }, { "epoch": 0.12359700694815606, "grad_norm": 0.1640625, "learning_rate": 0.00019966133197134664, "loss": 1.1013, "step": 1850 }, { "epoch": 0.12393105291288081, "grad_norm": 0.1669921875, "learning_rate": 0.00019965167564306576, "loss": 1.2437, "step": 1855 }, { "epoch": 0.12426509887760556, "grad_norm": 0.1611328125, "learning_rate": 0.00019964188381935776, "loss": 1.233, "step": 1860 }, { "epoch": 0.1245991448423303, "grad_norm": 0.173828125, "learning_rate": 0.00019963195651353645, "loss": 1.2822, "step": 1865 }, { "epoch": 0.12493319080705505, "grad_norm": 0.17578125, "learning_rate": 0.00019962189373909996, "loss": 1.2954, "step": 1870 }, { "epoch": 0.1252672367717798, "grad_norm": 0.15234375, "learning_rate": 0.0001996116955097305, "loss": 1.2188, "step": 1875 }, { "epoch": 0.12560128273650453, "grad_norm": 0.166015625, "learning_rate": 0.0001996013618392945, "loss": 1.2498, "step": 1880 }, { "epoch": 0.12593532870122928, "grad_norm": 0.1650390625, "learning_rate": 0.00019959089274184256, "loss": 1.1953, "step": 1885 }, { "epoch": 0.12626937466595403, "grad_norm": 0.166015625, "learning_rate": 0.00019958028823160946, "loss": 1.2453, "step": 1890 }, { "epoch": 0.12660342063067878, "grad_norm": 0.15625, "learning_rate": 0.00019956954832301397, "loss": 1.1966, "step": 1895 }, { "epoch": 0.12693746659540353, "grad_norm": 0.1611328125, "learning_rate": 0.0001995586730306591, "loss": 1.2389, "step": 1900 }, { "epoch": 0.12727151256012828, "grad_norm": 0.1474609375, "learning_rate": 0.00019954766236933183, "loss": 1.2609, "step": 1905 }, { "epoch": 0.12760555852485303, "grad_norm": 0.1591796875, "learning_rate": 0.00019953651635400333, "loss": 1.2263, "step": 1910 }, { "epoch": 0.12793960448957778, "grad_norm": 0.16015625, "learning_rate": 0.00019952523499982864, "loss": 1.1892, "step": 1915 }, { "epoch": 0.1282736504543025, "grad_norm": 0.16796875, "learning_rate": 0.00019951381832214698, "loss": 1.1666, "step": 1920 }, { "epoch": 0.12860769641902725, "grad_norm": 0.154296875, "learning_rate": 0.00019950226633648144, "loss": 1.2589, "step": 1925 }, { "epoch": 0.128941742383752, "grad_norm": 0.1650390625, "learning_rate": 0.00019949057905853919, "loss": 1.1863, "step": 1930 }, { "epoch": 0.12927578834847675, "grad_norm": 0.150390625, "learning_rate": 0.00019947875650421127, "loss": 1.1823, "step": 1935 }, { "epoch": 0.1296098343132015, "grad_norm": 0.1650390625, "learning_rate": 0.0001994667986895727, "loss": 1.2282, "step": 1940 }, { "epoch": 0.12994388027792625, "grad_norm": 0.1640625, "learning_rate": 0.00019945470563088248, "loss": 1.1948, "step": 1945 }, { "epoch": 0.130277926242651, "grad_norm": 0.166015625, "learning_rate": 0.00019944247734458333, "loss": 1.2441, "step": 1950 }, { "epoch": 0.13061197220737575, "grad_norm": 0.1728515625, "learning_rate": 0.00019943011384730198, "loss": 1.1913, "step": 1955 }, { "epoch": 0.13094601817210047, "grad_norm": 0.169921875, "learning_rate": 0.00019941761515584894, "loss": 1.2447, "step": 1960 }, { "epoch": 0.13128006413682522, "grad_norm": 0.1884765625, "learning_rate": 0.00019940498128721856, "loss": 1.2564, "step": 1965 }, { "epoch": 0.13161411010154997, "grad_norm": 0.1708984375, "learning_rate": 0.00019939221225858902, "loss": 1.193, "step": 1970 }, { "epoch": 0.13194815606627472, "grad_norm": 0.1748046875, "learning_rate": 0.00019937930808732222, "loss": 1.3114, "step": 1975 }, { "epoch": 0.13228220203099947, "grad_norm": 0.193359375, "learning_rate": 0.00019936626879096383, "loss": 1.1698, "step": 1980 }, { "epoch": 0.13261624799572422, "grad_norm": 0.1630859375, "learning_rate": 0.00019935309438724326, "loss": 1.2549, "step": 1985 }, { "epoch": 0.13295029396044897, "grad_norm": 0.1591796875, "learning_rate": 0.00019933978489407366, "loss": 1.1987, "step": 1990 }, { "epoch": 0.13328433992517372, "grad_norm": 0.16015625, "learning_rate": 0.00019932634032955178, "loss": 1.1868, "step": 1995 }, { "epoch": 0.13361838588989844, "grad_norm": 0.173828125, "learning_rate": 0.00019931276071195804, "loss": 1.2345, "step": 2000 }, { "epoch": 0.1339524318546232, "grad_norm": 0.15625, "learning_rate": 0.00019929904605975657, "loss": 1.2149, "step": 2005 }, { "epoch": 0.13428647781934794, "grad_norm": 0.16796875, "learning_rate": 0.00019928519639159507, "loss": 1.2072, "step": 2010 }, { "epoch": 0.1346205237840727, "grad_norm": 31.375, "learning_rate": 0.00019927121172630473, "loss": 1.2617, "step": 2015 }, { "epoch": 0.13495456974879744, "grad_norm": 0.1650390625, "learning_rate": 0.0001992570920829004, "loss": 1.2062, "step": 2020 }, { "epoch": 0.1352886157135222, "grad_norm": 0.1591796875, "learning_rate": 0.0001992428374805804, "loss": 1.1609, "step": 2025 }, { "epoch": 0.13562266167824694, "grad_norm": 0.1591796875, "learning_rate": 0.0001992284479387266, "loss": 1.2839, "step": 2030 }, { "epoch": 0.13595670764297169, "grad_norm": 0.1845703125, "learning_rate": 0.00019921392347690435, "loss": 1.1998, "step": 2035 }, { "epoch": 0.1362907536076964, "grad_norm": 0.162109375, "learning_rate": 0.0001991992641148624, "loss": 1.2281, "step": 2040 }, { "epoch": 0.13662479957242116, "grad_norm": 0.1533203125, "learning_rate": 0.00019918446987253287, "loss": 1.1933, "step": 2045 }, { "epoch": 0.1369588455371459, "grad_norm": 0.1689453125, "learning_rate": 0.00019916954077003147, "loss": 1.1923, "step": 2050 }, { "epoch": 0.13729289150187066, "grad_norm": 0.3359375, "learning_rate": 0.00019915447682765705, "loss": 1.1975, "step": 2055 }, { "epoch": 0.1376269374665954, "grad_norm": 0.1708984375, "learning_rate": 0.000199139278065892, "loss": 1.23, "step": 2060 }, { "epoch": 0.13796098343132016, "grad_norm": 0.181640625, "learning_rate": 0.0001991239445054019, "loss": 1.2278, "step": 2065 }, { "epoch": 0.1382950293960449, "grad_norm": 0.1611328125, "learning_rate": 0.0001991084761670356, "loss": 1.1755, "step": 2070 }, { "epoch": 0.13862907536076965, "grad_norm": 0.1708984375, "learning_rate": 0.00019909287307182534, "loss": 1.1468, "step": 2075 }, { "epoch": 0.13896312132549438, "grad_norm": 0.1630859375, "learning_rate": 0.00019907713524098638, "loss": 1.2159, "step": 2080 }, { "epoch": 0.13929716729021913, "grad_norm": 0.158203125, "learning_rate": 0.0001990612626959174, "loss": 1.1975, "step": 2085 }, { "epoch": 0.13963121325494388, "grad_norm": 0.2080078125, "learning_rate": 0.0001990452554582001, "loss": 1.2577, "step": 2090 }, { "epoch": 0.13996525921966863, "grad_norm": 0.1767578125, "learning_rate": 0.00019902911354959936, "loss": 1.1977, "step": 2095 }, { "epoch": 0.14029930518439337, "grad_norm": 0.1591796875, "learning_rate": 0.00019901283699206323, "loss": 1.2434, "step": 2100 }, { "epoch": 0.14063335114911812, "grad_norm": 0.201171875, "learning_rate": 0.00019899642580772274, "loss": 1.2707, "step": 2105 }, { "epoch": 0.14096739711384287, "grad_norm": 0.16796875, "learning_rate": 0.000198979880018892, "loss": 1.1902, "step": 2110 }, { "epoch": 0.14130144307856762, "grad_norm": 0.16796875, "learning_rate": 0.00019896319964806823, "loss": 1.215, "step": 2115 }, { "epoch": 0.14163548904329235, "grad_norm": 0.166015625, "learning_rate": 0.00019894638471793153, "loss": 1.2495, "step": 2120 }, { "epoch": 0.1419695350080171, "grad_norm": 0.158203125, "learning_rate": 0.000198929435251345, "loss": 1.2628, "step": 2125 }, { "epoch": 0.14230358097274184, "grad_norm": 0.17578125, "learning_rate": 0.00019891235127135465, "loss": 1.2156, "step": 2130 }, { "epoch": 0.1426376269374666, "grad_norm": 0.1875, "learning_rate": 0.00019889513280118946, "loss": 1.1396, "step": 2135 }, { "epoch": 0.14297167290219134, "grad_norm": 0.15234375, "learning_rate": 0.00019887777986426117, "loss": 1.2356, "step": 2140 }, { "epoch": 0.1433057188669161, "grad_norm": 0.1640625, "learning_rate": 0.00019886029248416441, "loss": 1.2302, "step": 2145 }, { "epoch": 0.14363976483164084, "grad_norm": 0.15625, "learning_rate": 0.00019884267068467662, "loss": 1.199, "step": 2150 }, { "epoch": 0.1439738107963656, "grad_norm": 0.16015625, "learning_rate": 0.00019882491448975796, "loss": 1.3024, "step": 2155 }, { "epoch": 0.14430785676109031, "grad_norm": 0.173828125, "learning_rate": 0.00019880702392355138, "loss": 1.188, "step": 2160 }, { "epoch": 0.14464190272581506, "grad_norm": 0.1767578125, "learning_rate": 0.00019878899901038254, "loss": 1.2895, "step": 2165 }, { "epoch": 0.1449759486905398, "grad_norm": 0.1826171875, "learning_rate": 0.00019877083977475968, "loss": 1.2214, "step": 2170 }, { "epoch": 0.14530999465526456, "grad_norm": 0.1767578125, "learning_rate": 0.00019875254624137376, "loss": 1.2429, "step": 2175 }, { "epoch": 0.1456440406199893, "grad_norm": 0.20703125, "learning_rate": 0.00019873411843509832, "loss": 1.2168, "step": 2180 }, { "epoch": 0.14597808658471406, "grad_norm": 0.154296875, "learning_rate": 0.00019871555638098954, "loss": 1.2398, "step": 2185 }, { "epoch": 0.1463121325494388, "grad_norm": 0.1611328125, "learning_rate": 0.00019869686010428597, "loss": 1.2839, "step": 2190 }, { "epoch": 0.14664617851416356, "grad_norm": 0.1650390625, "learning_rate": 0.00019867802963040881, "loss": 1.2139, "step": 2195 }, { "epoch": 0.14698022447888828, "grad_norm": 0.169921875, "learning_rate": 0.00019865906498496162, "loss": 1.2241, "step": 2200 }, { "epoch": 0.14731427044361303, "grad_norm": 0.1630859375, "learning_rate": 0.00019863996619373054, "loss": 1.2731, "step": 2205 }, { "epoch": 0.14764831640833778, "grad_norm": 0.1630859375, "learning_rate": 0.00019862073328268394, "loss": 1.2639, "step": 2210 }, { "epoch": 0.14798236237306253, "grad_norm": 0.1572265625, "learning_rate": 0.00019860136627797262, "loss": 1.2519, "step": 2215 }, { "epoch": 0.14831640833778728, "grad_norm": 0.166015625, "learning_rate": 0.0001985818652059298, "loss": 1.2553, "step": 2220 }, { "epoch": 0.14865045430251203, "grad_norm": 0.1689453125, "learning_rate": 0.0001985622300930708, "loss": 1.2233, "step": 2225 }, { "epoch": 0.14898450026723678, "grad_norm": 0.1875, "learning_rate": 0.0001985424609660933, "loss": 1.1913, "step": 2230 }, { "epoch": 0.14931854623196153, "grad_norm": 0.1611328125, "learning_rate": 0.00019852255785187724, "loss": 1.217, "step": 2235 }, { "epoch": 0.14965259219668625, "grad_norm": 0.1728515625, "learning_rate": 0.00019850252077748467, "loss": 1.2496, "step": 2240 }, { "epoch": 0.149986638161411, "grad_norm": 0.17578125, "learning_rate": 0.00019848234977015984, "loss": 1.251, "step": 2245 }, { "epoch": 0.15032068412613575, "grad_norm": 0.1796875, "learning_rate": 0.00019846204485732903, "loss": 1.2931, "step": 2250 }, { "epoch": 0.1506547300908605, "grad_norm": 0.1708984375, "learning_rate": 0.0001984416060666007, "loss": 1.2315, "step": 2255 }, { "epoch": 0.15098877605558525, "grad_norm": 0.1591796875, "learning_rate": 0.00019842103342576517, "loss": 1.1459, "step": 2260 }, { "epoch": 0.15132282202031, "grad_norm": 0.177734375, "learning_rate": 0.00019840032696279494, "loss": 1.2563, "step": 2265 }, { "epoch": 0.15165686798503475, "grad_norm": 0.15234375, "learning_rate": 0.00019837948670584437, "loss": 1.212, "step": 2270 }, { "epoch": 0.1519909139497595, "grad_norm": 0.1572265625, "learning_rate": 0.00019835851268324982, "loss": 1.3146, "step": 2275 }, { "epoch": 0.15232495991448422, "grad_norm": 0.15625, "learning_rate": 0.00019833740492352934, "loss": 1.2306, "step": 2280 }, { "epoch": 0.15265900587920897, "grad_norm": 0.162109375, "learning_rate": 0.00019831616345538305, "loss": 1.2235, "step": 2285 }, { "epoch": 0.15299305184393372, "grad_norm": 0.1689453125, "learning_rate": 0.0001982947883076927, "loss": 1.1821, "step": 2290 }, { "epoch": 0.15332709780865847, "grad_norm": 0.16796875, "learning_rate": 0.00019827327950952195, "loss": 1.2706, "step": 2295 }, { "epoch": 0.15366114377338322, "grad_norm": 0.16015625, "learning_rate": 0.00019825163709011605, "loss": 1.1829, "step": 2300 }, { "epoch": 0.15399518973810797, "grad_norm": 0.166015625, "learning_rate": 0.000198229861078902, "loss": 1.194, "step": 2305 }, { "epoch": 0.15432923570283272, "grad_norm": 0.15625, "learning_rate": 0.00019820795150548846, "loss": 1.2001, "step": 2310 }, { "epoch": 0.15466328166755747, "grad_norm": 0.1689453125, "learning_rate": 0.00019818590839966563, "loss": 1.1742, "step": 2315 }, { "epoch": 0.1549973276322822, "grad_norm": 0.1572265625, "learning_rate": 0.00019816373179140534, "loss": 1.2324, "step": 2320 }, { "epoch": 0.15533137359700694, "grad_norm": 0.166015625, "learning_rate": 0.00019814142171086088, "loss": 1.1786, "step": 2325 }, { "epoch": 0.1556654195617317, "grad_norm": 0.154296875, "learning_rate": 0.0001981189781883671, "loss": 1.2989, "step": 2330 }, { "epoch": 0.15599946552645644, "grad_norm": 0.162109375, "learning_rate": 0.00019809640125444016, "loss": 1.2714, "step": 2335 }, { "epoch": 0.1563335114911812, "grad_norm": 0.1669921875, "learning_rate": 0.00019807369093977778, "loss": 1.2568, "step": 2340 }, { "epoch": 0.15666755745590594, "grad_norm": 0.154296875, "learning_rate": 0.00019805084727525895, "loss": 1.2131, "step": 2345 }, { "epoch": 0.1570016034206307, "grad_norm": 0.173828125, "learning_rate": 0.00019802787029194393, "loss": 1.1464, "step": 2350 }, { "epoch": 0.15733564938535544, "grad_norm": 0.2265625, "learning_rate": 0.00019800476002107437, "loss": 1.2525, "step": 2355 }, { "epoch": 0.15766969535008016, "grad_norm": 0.1748046875, "learning_rate": 0.000197981516494073, "loss": 1.2007, "step": 2360 }, { "epoch": 0.1580037413148049, "grad_norm": 0.171875, "learning_rate": 0.0001979581397425439, "loss": 1.2535, "step": 2365 }, { "epoch": 0.15833778727952966, "grad_norm": 0.1689453125, "learning_rate": 0.0001979346297982722, "loss": 1.235, "step": 2370 }, { "epoch": 0.1586718332442544, "grad_norm": 0.1728515625, "learning_rate": 0.0001979109866932241, "loss": 1.162, "step": 2375 }, { "epoch": 0.15900587920897916, "grad_norm": 0.1669921875, "learning_rate": 0.00019788721045954692, "loss": 1.2224, "step": 2380 }, { "epoch": 0.1593399251737039, "grad_norm": 0.166015625, "learning_rate": 0.000197863301129569, "loss": 1.3007, "step": 2385 }, { "epoch": 0.15967397113842866, "grad_norm": 0.1591796875, "learning_rate": 0.00019783925873579966, "loss": 1.2242, "step": 2390 }, { "epoch": 0.1600080171031534, "grad_norm": 0.1689453125, "learning_rate": 0.000197815083310929, "loss": 1.2477, "step": 2395 }, { "epoch": 0.16034206306787813, "grad_norm": 0.1611328125, "learning_rate": 0.00019779077488782824, "loss": 1.1975, "step": 2400 }, { "epoch": 0.16067610903260288, "grad_norm": 0.1787109375, "learning_rate": 0.0001977663334995492, "loss": 1.2646, "step": 2405 }, { "epoch": 0.16101015499732763, "grad_norm": 0.1640625, "learning_rate": 0.0001977417591793247, "loss": 1.2167, "step": 2410 }, { "epoch": 0.16134420096205238, "grad_norm": 0.1513671875, "learning_rate": 0.00019771705196056812, "loss": 1.2048, "step": 2415 }, { "epoch": 0.16167824692677712, "grad_norm": 0.1650390625, "learning_rate": 0.00019769221187687368, "loss": 1.183, "step": 2420 }, { "epoch": 0.16201229289150187, "grad_norm": 0.173828125, "learning_rate": 0.0001976672389620162, "loss": 1.281, "step": 2425 }, { "epoch": 0.16234633885622662, "grad_norm": 0.158203125, "learning_rate": 0.0001976421332499511, "loss": 1.2045, "step": 2430 }, { "epoch": 0.16268038482095137, "grad_norm": 0.1650390625, "learning_rate": 0.00019761689477481434, "loss": 1.243, "step": 2435 }, { "epoch": 0.1630144307856761, "grad_norm": 0.1982421875, "learning_rate": 0.0001975915235709225, "loss": 1.1954, "step": 2440 }, { "epoch": 0.16334847675040085, "grad_norm": 0.1767578125, "learning_rate": 0.00019756601967277256, "loss": 1.1924, "step": 2445 }, { "epoch": 0.1636825227151256, "grad_norm": 0.166015625, "learning_rate": 0.00019754038311504187, "loss": 1.1605, "step": 2450 }, { "epoch": 0.16401656867985034, "grad_norm": 0.154296875, "learning_rate": 0.00019751461393258829, "loss": 1.3125, "step": 2455 }, { "epoch": 0.1643506146445751, "grad_norm": 0.1796875, "learning_rate": 0.00019748871216044984, "loss": 1.2302, "step": 2460 }, { "epoch": 0.16468466060929984, "grad_norm": 0.181640625, "learning_rate": 0.00019746267783384496, "loss": 1.2435, "step": 2465 }, { "epoch": 0.1650187065740246, "grad_norm": 0.1748046875, "learning_rate": 0.00019743651098817227, "loss": 1.1917, "step": 2470 }, { "epoch": 0.16535275253874934, "grad_norm": 0.1689453125, "learning_rate": 0.00019741021165901054, "loss": 1.2182, "step": 2475 }, { "epoch": 0.16568679850347406, "grad_norm": 0.16796875, "learning_rate": 0.00019738377988211877, "loss": 1.2176, "step": 2480 }, { "epoch": 0.1660208444681988, "grad_norm": 0.1884765625, "learning_rate": 0.000197357215693436, "loss": 1.2962, "step": 2485 }, { "epoch": 0.16635489043292356, "grad_norm": 0.1787109375, "learning_rate": 0.00019733051912908126, "loss": 1.2024, "step": 2490 }, { "epoch": 0.1666889363976483, "grad_norm": 0.1650390625, "learning_rate": 0.00019730369022535362, "loss": 1.1819, "step": 2495 }, { "epoch": 0.16702298236237306, "grad_norm": 0.1826171875, "learning_rate": 0.0001972767290187321, "loss": 1.1487, "step": 2500 }, { "epoch": 0.1673570283270978, "grad_norm": 0.1796875, "learning_rate": 0.0001972496355458756, "loss": 1.2591, "step": 2505 }, { "epoch": 0.16769107429182256, "grad_norm": 0.1611328125, "learning_rate": 0.00019722240984362284, "loss": 1.2167, "step": 2510 }, { "epoch": 0.1680251202565473, "grad_norm": 0.1728515625, "learning_rate": 0.00019719505194899233, "loss": 1.2727, "step": 2515 }, { "epoch": 0.16835916622127206, "grad_norm": 0.1787109375, "learning_rate": 0.00019716756189918234, "loss": 1.2353, "step": 2520 }, { "epoch": 0.16869321218599678, "grad_norm": 0.16015625, "learning_rate": 0.0001971399397315709, "loss": 1.1215, "step": 2525 }, { "epoch": 0.16902725815072153, "grad_norm": 0.1630859375, "learning_rate": 0.00019711218548371546, "loss": 1.2287, "step": 2530 }, { "epoch": 0.16936130411544628, "grad_norm": 0.162109375, "learning_rate": 0.00019708429919335335, "loss": 1.2297, "step": 2535 }, { "epoch": 0.16969535008017103, "grad_norm": 0.1630859375, "learning_rate": 0.00019705628089840122, "loss": 1.252, "step": 2540 }, { "epoch": 0.17002939604489578, "grad_norm": 0.1787109375, "learning_rate": 0.0001970281306369553, "loss": 1.2608, "step": 2545 }, { "epoch": 0.17036344200962053, "grad_norm": 0.1748046875, "learning_rate": 0.0001969998484472912, "loss": 1.1916, "step": 2550 }, { "epoch": 0.17069748797434528, "grad_norm": 0.1513671875, "learning_rate": 0.00019697143436786397, "loss": 1.1868, "step": 2555 }, { "epoch": 0.17103153393907003, "grad_norm": 0.1611328125, "learning_rate": 0.00019694288843730796, "loss": 1.1567, "step": 2560 }, { "epoch": 0.17136557990379475, "grad_norm": 0.1728515625, "learning_rate": 0.0001969142106944368, "loss": 1.2043, "step": 2565 }, { "epoch": 0.1716996258685195, "grad_norm": 0.15625, "learning_rate": 0.00019688540117824332, "loss": 1.1954, "step": 2570 }, { "epoch": 0.17203367183324425, "grad_norm": 0.1787109375, "learning_rate": 0.00019685645992789956, "loss": 1.1889, "step": 2575 }, { "epoch": 0.172367717797969, "grad_norm": 0.1533203125, "learning_rate": 0.00019682738698275663, "loss": 1.1758, "step": 2580 }, { "epoch": 0.17270176376269375, "grad_norm": 0.1591796875, "learning_rate": 0.0001967981823823448, "loss": 1.2197, "step": 2585 }, { "epoch": 0.1730358097274185, "grad_norm": 0.154296875, "learning_rate": 0.0001967688461663732, "loss": 1.1922, "step": 2590 }, { "epoch": 0.17336985569214325, "grad_norm": 0.1640625, "learning_rate": 0.0001967393783747301, "loss": 1.2168, "step": 2595 }, { "epoch": 0.173703901656868, "grad_norm": 0.19140625, "learning_rate": 0.00019670977904748252, "loss": 1.2387, "step": 2600 }, { "epoch": 0.17403794762159272, "grad_norm": 0.158203125, "learning_rate": 0.00019668004822487634, "loss": 1.186, "step": 2605 }, { "epoch": 0.17437199358631747, "grad_norm": 0.1640625, "learning_rate": 0.00019665018594733634, "loss": 1.2346, "step": 2610 }, { "epoch": 0.17470603955104222, "grad_norm": 0.1533203125, "learning_rate": 0.00019662019225546594, "loss": 1.2375, "step": 2615 }, { "epoch": 0.17504008551576697, "grad_norm": 0.1611328125, "learning_rate": 0.00019659006719004727, "loss": 1.221, "step": 2620 }, { "epoch": 0.17537413148049172, "grad_norm": 0.1513671875, "learning_rate": 0.00019655981079204113, "loss": 1.1965, "step": 2625 }, { "epoch": 0.17570817744521647, "grad_norm": 0.15625, "learning_rate": 0.0001965294231025868, "loss": 1.1957, "step": 2630 }, { "epoch": 0.17604222340994122, "grad_norm": 0.1640625, "learning_rate": 0.00019649890416300217, "loss": 1.2214, "step": 2635 }, { "epoch": 0.17637626937466597, "grad_norm": 0.166015625, "learning_rate": 0.00019646825401478356, "loss": 1.2766, "step": 2640 }, { "epoch": 0.1767103153393907, "grad_norm": 0.16796875, "learning_rate": 0.00019643747269960566, "loss": 1.241, "step": 2645 }, { "epoch": 0.17704436130411544, "grad_norm": 0.154296875, "learning_rate": 0.0001964065602593215, "loss": 1.1926, "step": 2650 }, { "epoch": 0.1773784072688402, "grad_norm": 0.1787109375, "learning_rate": 0.0001963755167359625, "loss": 1.2595, "step": 2655 }, { "epoch": 0.17771245323356494, "grad_norm": 0.169921875, "learning_rate": 0.00019634434217173817, "loss": 1.2557, "step": 2660 }, { "epoch": 0.1780464991982897, "grad_norm": 0.16796875, "learning_rate": 0.0001963130366090363, "loss": 1.2221, "step": 2665 }, { "epoch": 0.17838054516301444, "grad_norm": 0.1591796875, "learning_rate": 0.00019628160009042275, "loss": 1.2297, "step": 2670 }, { "epoch": 0.1787145911277392, "grad_norm": 0.17578125, "learning_rate": 0.00019625003265864147, "loss": 1.2172, "step": 2675 }, { "epoch": 0.17904863709246394, "grad_norm": 0.1669921875, "learning_rate": 0.00019621833435661437, "loss": 1.2086, "step": 2680 }, { "epoch": 0.17938268305718866, "grad_norm": 0.1650390625, "learning_rate": 0.00019618650522744137, "loss": 1.1887, "step": 2685 }, { "epoch": 0.1797167290219134, "grad_norm": 0.1533203125, "learning_rate": 0.00019615454531440017, "loss": 1.2667, "step": 2690 }, { "epoch": 0.18005077498663816, "grad_norm": 0.1708984375, "learning_rate": 0.00019612245466094641, "loss": 1.2165, "step": 2695 }, { "epoch": 0.1803848209513629, "grad_norm": 0.1630859375, "learning_rate": 0.00019609023331071344, "loss": 1.2485, "step": 2700 }, { "epoch": 0.18071886691608766, "grad_norm": 0.171875, "learning_rate": 0.0001960578813075123, "loss": 1.2353, "step": 2705 }, { "epoch": 0.1810529128808124, "grad_norm": 0.169921875, "learning_rate": 0.00019602539869533167, "loss": 1.2373, "step": 2710 }, { "epoch": 0.18138695884553716, "grad_norm": 0.166015625, "learning_rate": 0.00019599278551833788, "loss": 1.1801, "step": 2715 }, { "epoch": 0.1817210048102619, "grad_norm": 0.216796875, "learning_rate": 0.00019596004182087477, "loss": 1.2516, "step": 2720 }, { "epoch": 0.18205505077498663, "grad_norm": 0.1708984375, "learning_rate": 0.00019592716764746363, "loss": 1.2321, "step": 2725 }, { "epoch": 0.18238909673971138, "grad_norm": 0.193359375, "learning_rate": 0.00019589416304280314, "loss": 1.2326, "step": 2730 }, { "epoch": 0.18272314270443613, "grad_norm": 0.169921875, "learning_rate": 0.00019586102805176932, "loss": 1.2251, "step": 2735 }, { "epoch": 0.18305718866916088, "grad_norm": 0.1513671875, "learning_rate": 0.00019582776271941557, "loss": 1.1592, "step": 2740 }, { "epoch": 0.18339123463388562, "grad_norm": 0.15625, "learning_rate": 0.00019579436709097237, "loss": 1.2147, "step": 2745 }, { "epoch": 0.18372528059861037, "grad_norm": 0.1640625, "learning_rate": 0.00019576084121184745, "loss": 1.1742, "step": 2750 }, { "epoch": 0.18405932656333512, "grad_norm": 0.166015625, "learning_rate": 0.00019572718512762566, "loss": 1.1736, "step": 2755 }, { "epoch": 0.18439337252805987, "grad_norm": 0.2138671875, "learning_rate": 0.00019569339888406883, "loss": 1.1304, "step": 2760 }, { "epoch": 0.1847274184927846, "grad_norm": 0.1787109375, "learning_rate": 0.0001956594825271158, "loss": 1.2708, "step": 2765 }, { "epoch": 0.18506146445750934, "grad_norm": 0.1572265625, "learning_rate": 0.00019562543610288232, "loss": 1.1739, "step": 2770 }, { "epoch": 0.1853955104222341, "grad_norm": 0.16015625, "learning_rate": 0.00019559125965766096, "loss": 1.2504, "step": 2775 }, { "epoch": 0.18572955638695884, "grad_norm": 0.15625, "learning_rate": 0.0001955569532379211, "loss": 1.1925, "step": 2780 }, { "epoch": 0.1860636023516836, "grad_norm": 0.166015625, "learning_rate": 0.00019552251689030893, "loss": 1.2344, "step": 2785 }, { "epoch": 0.18639764831640834, "grad_norm": 0.1689453125, "learning_rate": 0.00019548795066164713, "loss": 1.1737, "step": 2790 }, { "epoch": 0.1867316942811331, "grad_norm": 0.17578125, "learning_rate": 0.00019545325459893512, "loss": 1.2059, "step": 2795 }, { "epoch": 0.18706574024585784, "grad_norm": 0.1513671875, "learning_rate": 0.0001954184287493488, "loss": 1.1795, "step": 2800 }, { "epoch": 0.18739978621058256, "grad_norm": 0.16796875, "learning_rate": 0.00019538347316024052, "loss": 1.266, "step": 2805 }, { "epoch": 0.1877338321753073, "grad_norm": 0.1708984375, "learning_rate": 0.00019534838787913902, "loss": 1.2198, "step": 2810 }, { "epoch": 0.18806787814003206, "grad_norm": 0.173828125, "learning_rate": 0.0001953131729537495, "loss": 1.2272, "step": 2815 }, { "epoch": 0.1884019241047568, "grad_norm": 0.1591796875, "learning_rate": 0.00019527782843195335, "loss": 1.1629, "step": 2820 }, { "epoch": 0.18873597006948156, "grad_norm": 0.1630859375, "learning_rate": 0.00019524235436180814, "loss": 1.2099, "step": 2825 }, { "epoch": 0.1890700160342063, "grad_norm": 0.16015625, "learning_rate": 0.00019520675079154763, "loss": 1.2457, "step": 2830 }, { "epoch": 0.18940406199893106, "grad_norm": 0.1845703125, "learning_rate": 0.00019517101776958166, "loss": 1.1824, "step": 2835 }, { "epoch": 0.1897381079636558, "grad_norm": 0.158203125, "learning_rate": 0.00019513515534449606, "loss": 1.2036, "step": 2840 }, { "epoch": 0.19007215392838053, "grad_norm": 0.1669921875, "learning_rate": 0.00019509916356505268, "loss": 1.1686, "step": 2845 }, { "epoch": 0.19040619989310528, "grad_norm": 0.1708984375, "learning_rate": 0.0001950630424801891, "loss": 1.1715, "step": 2850 }, { "epoch": 0.19074024585783003, "grad_norm": 0.1650390625, "learning_rate": 0.00019502679213901893, "loss": 1.3139, "step": 2855 }, { "epoch": 0.19107429182255478, "grad_norm": 0.169921875, "learning_rate": 0.00019499041259083132, "loss": 1.1451, "step": 2860 }, { "epoch": 0.19140833778727953, "grad_norm": 0.1640625, "learning_rate": 0.00019495390388509122, "loss": 1.2861, "step": 2865 }, { "epoch": 0.19174238375200428, "grad_norm": 0.1552734375, "learning_rate": 0.00019491726607143918, "loss": 1.2522, "step": 2870 }, { "epoch": 0.19207642971672903, "grad_norm": 0.1904296875, "learning_rate": 0.00019488049919969127, "loss": 1.2232, "step": 2875 }, { "epoch": 0.19241047568145378, "grad_norm": 0.16796875, "learning_rate": 0.00019484360331983907, "loss": 1.265, "step": 2880 }, { "epoch": 0.1927445216461785, "grad_norm": 0.1630859375, "learning_rate": 0.00019480657848204954, "loss": 1.1885, "step": 2885 }, { "epoch": 0.19307856761090325, "grad_norm": 0.17578125, "learning_rate": 0.00019476942473666497, "loss": 1.2748, "step": 2890 }, { "epoch": 0.193412613575628, "grad_norm": 0.17578125, "learning_rate": 0.000194732142134203, "loss": 1.223, "step": 2895 }, { "epoch": 0.19374665954035275, "grad_norm": 0.1630859375, "learning_rate": 0.00019469473072535642, "loss": 1.1482, "step": 2900 }, { "epoch": 0.1940807055050775, "grad_norm": 0.2041015625, "learning_rate": 0.0001946571905609931, "loss": 1.1468, "step": 2905 }, { "epoch": 0.19441475146980225, "grad_norm": 0.1748046875, "learning_rate": 0.00019461952169215615, "loss": 1.334, "step": 2910 }, { "epoch": 0.194748797434527, "grad_norm": 0.1640625, "learning_rate": 0.00019458172417006347, "loss": 1.2264, "step": 2915 }, { "epoch": 0.19508284339925175, "grad_norm": 0.1708984375, "learning_rate": 0.00019454379804610805, "loss": 1.2272, "step": 2920 }, { "epoch": 0.19541688936397647, "grad_norm": 0.1796875, "learning_rate": 0.00019450574337185765, "loss": 1.235, "step": 2925 }, { "epoch": 0.19575093532870122, "grad_norm": 0.1689453125, "learning_rate": 0.00019446756019905482, "loss": 1.2242, "step": 2930 }, { "epoch": 0.19608498129342597, "grad_norm": 0.1689453125, "learning_rate": 0.00019442924857961694, "loss": 1.2805, "step": 2935 }, { "epoch": 0.19641902725815072, "grad_norm": 0.1611328125, "learning_rate": 0.00019439080856563585, "loss": 1.2176, "step": 2940 }, { "epoch": 0.19675307322287547, "grad_norm": 0.173828125, "learning_rate": 0.00019435224020937812, "loss": 1.1892, "step": 2945 }, { "epoch": 0.19708711918760022, "grad_norm": 0.177734375, "learning_rate": 0.0001943135435632848, "loss": 1.3424, "step": 2950 }, { "epoch": 0.19742116515232497, "grad_norm": 0.1611328125, "learning_rate": 0.00019427471867997128, "loss": 1.2189, "step": 2955 }, { "epoch": 0.19775521111704972, "grad_norm": 0.162109375, "learning_rate": 0.00019423576561222744, "loss": 1.2175, "step": 2960 }, { "epoch": 0.19808925708177444, "grad_norm": 0.1689453125, "learning_rate": 0.00019419668441301733, "loss": 1.2744, "step": 2965 }, { "epoch": 0.1984233030464992, "grad_norm": 0.1591796875, "learning_rate": 0.00019415747513547936, "loss": 1.2161, "step": 2970 }, { "epoch": 0.19875734901122394, "grad_norm": 0.16015625, "learning_rate": 0.00019411813783292594, "loss": 1.1341, "step": 2975 }, { "epoch": 0.1990913949759487, "grad_norm": 0.154296875, "learning_rate": 0.00019407867255884367, "loss": 1.2168, "step": 2980 }, { "epoch": 0.19942544094067344, "grad_norm": 0.166015625, "learning_rate": 0.0001940390793668931, "loss": 1.2508, "step": 2985 }, { "epoch": 0.1997594869053982, "grad_norm": 0.1630859375, "learning_rate": 0.00019399935831090868, "loss": 1.2179, "step": 2990 }, { "epoch": 0.20009353287012294, "grad_norm": 0.1748046875, "learning_rate": 0.00019395950944489876, "loss": 1.292, "step": 2995 }, { "epoch": 0.20042757883484769, "grad_norm": 0.1552734375, "learning_rate": 0.0001939195328230455, "loss": 1.2248, "step": 3000 }, { "epoch": 0.2007616247995724, "grad_norm": 0.1630859375, "learning_rate": 0.00019387942849970465, "loss": 1.2645, "step": 3005 }, { "epoch": 0.20109567076429716, "grad_norm": 0.16796875, "learning_rate": 0.0001938391965294058, "loss": 1.1764, "step": 3010 }, { "epoch": 0.2014297167290219, "grad_norm": 0.16796875, "learning_rate": 0.00019379883696685183, "loss": 1.1976, "step": 3015 }, { "epoch": 0.20176376269374666, "grad_norm": 0.1611328125, "learning_rate": 0.00019375834986691933, "loss": 1.2362, "step": 3020 }, { "epoch": 0.2020978086584714, "grad_norm": 0.16796875, "learning_rate": 0.0001937177352846582, "loss": 1.2767, "step": 3025 }, { "epoch": 0.20243185462319616, "grad_norm": 0.166015625, "learning_rate": 0.0001936769932752917, "loss": 1.2597, "step": 3030 }, { "epoch": 0.2027659005879209, "grad_norm": 0.150390625, "learning_rate": 0.00019363612389421638, "loss": 1.2247, "step": 3035 }, { "epoch": 0.20309994655264565, "grad_norm": 0.166015625, "learning_rate": 0.00019359512719700192, "loss": 1.1646, "step": 3040 }, { "epoch": 0.20343399251737038, "grad_norm": 0.1650390625, "learning_rate": 0.00019355400323939112, "loss": 1.1893, "step": 3045 }, { "epoch": 0.20376803848209513, "grad_norm": 0.2373046875, "learning_rate": 0.00019351275207729984, "loss": 1.2971, "step": 3050 }, { "epoch": 0.20410208444681988, "grad_norm": 0.1787109375, "learning_rate": 0.0001934713737668169, "loss": 1.2324, "step": 3055 }, { "epoch": 0.20443613041154463, "grad_norm": 0.1845703125, "learning_rate": 0.000193429868364204, "loss": 1.1936, "step": 3060 }, { "epoch": 0.20477017637626937, "grad_norm": 0.158203125, "learning_rate": 0.0001933882359258956, "loss": 1.2243, "step": 3065 }, { "epoch": 0.20510422234099412, "grad_norm": 0.146484375, "learning_rate": 0.00019334647650849897, "loss": 1.1963, "step": 3070 }, { "epoch": 0.20543826830571887, "grad_norm": 0.1708984375, "learning_rate": 0.00019330459016879395, "loss": 1.2023, "step": 3075 }, { "epoch": 0.20577231427044362, "grad_norm": 0.166015625, "learning_rate": 0.00019326257696373304, "loss": 1.2018, "step": 3080 }, { "epoch": 0.20610636023516835, "grad_norm": 0.173828125, "learning_rate": 0.00019322043695044116, "loss": 1.1572, "step": 3085 }, { "epoch": 0.2064404061998931, "grad_norm": 0.1650390625, "learning_rate": 0.0001931781701862157, "loss": 1.2123, "step": 3090 }, { "epoch": 0.20677445216461784, "grad_norm": 0.15234375, "learning_rate": 0.00019313577672852632, "loss": 1.2391, "step": 3095 }, { "epoch": 0.2071084981293426, "grad_norm": 0.166015625, "learning_rate": 0.00019309325663501508, "loss": 1.2305, "step": 3100 }, { "epoch": 0.20744254409406734, "grad_norm": 0.1708984375, "learning_rate": 0.00019305060996349606, "loss": 1.1387, "step": 3105 }, { "epoch": 0.2077765900587921, "grad_norm": 0.166015625, "learning_rate": 0.00019300783677195563, "loss": 1.2658, "step": 3110 }, { "epoch": 0.20811063602351684, "grad_norm": 0.1923828125, "learning_rate": 0.00019296493711855198, "loss": 1.2224, "step": 3115 }, { "epoch": 0.2084446819882416, "grad_norm": 0.1630859375, "learning_rate": 0.00019292191106161542, "loss": 1.1874, "step": 3120 }, { "epoch": 0.20877872795296634, "grad_norm": 0.166015625, "learning_rate": 0.00019287875865964808, "loss": 1.1604, "step": 3125 }, { "epoch": 0.20911277391769106, "grad_norm": 0.1591796875, "learning_rate": 0.00019283547997132381, "loss": 1.2352, "step": 3130 }, { "epoch": 0.2094468198824158, "grad_norm": 0.15234375, "learning_rate": 0.00019279207505548825, "loss": 1.2506, "step": 3135 }, { "epoch": 0.20978086584714056, "grad_norm": 0.1572265625, "learning_rate": 0.00019274854397115866, "loss": 1.2556, "step": 3140 }, { "epoch": 0.2101149118118653, "grad_norm": 0.18359375, "learning_rate": 0.00019270488677752387, "loss": 1.2294, "step": 3145 }, { "epoch": 0.21044895777659006, "grad_norm": 0.29296875, "learning_rate": 0.0001926611035339441, "loss": 1.1891, "step": 3150 }, { "epoch": 0.2107830037413148, "grad_norm": 0.16015625, "learning_rate": 0.00019261719429995098, "loss": 1.2341, "step": 3155 }, { "epoch": 0.21111704970603956, "grad_norm": 0.1884765625, "learning_rate": 0.00019257315913524754, "loss": 1.2232, "step": 3160 }, { "epoch": 0.2114510956707643, "grad_norm": 0.1748046875, "learning_rate": 0.00019252899809970794, "loss": 1.3083, "step": 3165 }, { "epoch": 0.21178514163548903, "grad_norm": 0.181640625, "learning_rate": 0.00019248471125337752, "loss": 1.2538, "step": 3170 }, { "epoch": 0.21211918760021378, "grad_norm": 0.1552734375, "learning_rate": 0.00019244029865647267, "loss": 1.2017, "step": 3175 }, { "epoch": 0.21245323356493853, "grad_norm": 0.1689453125, "learning_rate": 0.00019239576036938078, "loss": 1.2138, "step": 3180 }, { "epoch": 0.21278727952966328, "grad_norm": 0.1611328125, "learning_rate": 0.00019235109645266015, "loss": 1.1828, "step": 3185 }, { "epoch": 0.21312132549438803, "grad_norm": 0.158203125, "learning_rate": 0.00019230630696703984, "loss": 1.208, "step": 3190 }, { "epoch": 0.21345537145911278, "grad_norm": 0.1640625, "learning_rate": 0.0001922613919734197, "loss": 1.141, "step": 3195 }, { "epoch": 0.21378941742383753, "grad_norm": 0.15625, "learning_rate": 0.00019221635153287024, "loss": 1.2047, "step": 3200 }, { "epoch": 0.21412346338856228, "grad_norm": 0.1689453125, "learning_rate": 0.00019217118570663249, "loss": 1.2994, "step": 3205 }, { "epoch": 0.214457509353287, "grad_norm": 0.158203125, "learning_rate": 0.000192125894556118, "loss": 1.1787, "step": 3210 }, { "epoch": 0.21479155531801175, "grad_norm": 0.15625, "learning_rate": 0.0001920804781429087, "loss": 1.2072, "step": 3215 }, { "epoch": 0.2151256012827365, "grad_norm": 0.1689453125, "learning_rate": 0.00019203493652875686, "loss": 1.1978, "step": 3220 }, { "epoch": 0.21545964724746125, "grad_norm": 0.154296875, "learning_rate": 0.00019198926977558495, "loss": 1.1938, "step": 3225 }, { "epoch": 0.215793693212186, "grad_norm": 0.1875, "learning_rate": 0.00019194347794548565, "loss": 1.253, "step": 3230 }, { "epoch": 0.21612773917691075, "grad_norm": 0.1591796875, "learning_rate": 0.0001918975611007217, "loss": 1.198, "step": 3235 }, { "epoch": 0.2164617851416355, "grad_norm": 0.17578125, "learning_rate": 0.00019185151930372574, "loss": 1.2049, "step": 3240 }, { "epoch": 0.21679583110636025, "grad_norm": 0.1728515625, "learning_rate": 0.00019180535261710037, "loss": 1.2952, "step": 3245 }, { "epoch": 0.21712987707108497, "grad_norm": 0.2001953125, "learning_rate": 0.000191759061103618, "loss": 1.1782, "step": 3250 }, { "epoch": 0.21746392303580972, "grad_norm": 0.1669921875, "learning_rate": 0.00019171264482622076, "loss": 1.1968, "step": 3255 }, { "epoch": 0.21779796900053447, "grad_norm": 0.1630859375, "learning_rate": 0.00019166610384802038, "loss": 1.2566, "step": 3260 }, { "epoch": 0.21813201496525922, "grad_norm": 0.2001953125, "learning_rate": 0.00019161943823229824, "loss": 1.2565, "step": 3265 }, { "epoch": 0.21846606092998397, "grad_norm": 0.15625, "learning_rate": 0.00019157264804250506, "loss": 1.1353, "step": 3270 }, { "epoch": 0.21880010689470872, "grad_norm": 0.1728515625, "learning_rate": 0.00019152573334226114, "loss": 1.2901, "step": 3275 }, { "epoch": 0.21913415285943347, "grad_norm": 0.1572265625, "learning_rate": 0.00019147869419535577, "loss": 1.1798, "step": 3280 }, { "epoch": 0.21946819882415822, "grad_norm": 0.17578125, "learning_rate": 0.0001914315306657478, "loss": 1.2607, "step": 3285 }, { "epoch": 0.21980224478888294, "grad_norm": 0.1640625, "learning_rate": 0.0001913842428175649, "loss": 1.1978, "step": 3290 }, { "epoch": 0.2201362907536077, "grad_norm": 0.1708984375, "learning_rate": 0.00019133683071510395, "loss": 1.1645, "step": 3295 }, { "epoch": 0.22047033671833244, "grad_norm": 0.1650390625, "learning_rate": 0.00019128929442283074, "loss": 1.1069, "step": 3300 }, { "epoch": 0.2208043826830572, "grad_norm": 0.1494140625, "learning_rate": 0.0001912416340053799, "loss": 1.279, "step": 3305 }, { "epoch": 0.22113842864778194, "grad_norm": 0.1845703125, "learning_rate": 0.00019119384952755484, "loss": 1.2085, "step": 3310 }, { "epoch": 0.2214724746125067, "grad_norm": 0.173828125, "learning_rate": 0.00019114594105432766, "loss": 1.1412, "step": 3315 }, { "epoch": 0.22180652057723144, "grad_norm": 0.1630859375, "learning_rate": 0.00019109790865083905, "loss": 1.1815, "step": 3320 }, { "epoch": 0.22214056654195619, "grad_norm": 0.1796875, "learning_rate": 0.00019104975238239818, "loss": 1.238, "step": 3325 }, { "epoch": 0.2224746125066809, "grad_norm": 0.1865234375, "learning_rate": 0.00019100147231448274, "loss": 1.2348, "step": 3330 }, { "epoch": 0.22280865847140566, "grad_norm": 0.1689453125, "learning_rate": 0.0001909530685127386, "loss": 1.2156, "step": 3335 }, { "epoch": 0.2231427044361304, "grad_norm": 0.1689453125, "learning_rate": 0.00019090454104298004, "loss": 1.1559, "step": 3340 }, { "epoch": 0.22347675040085516, "grad_norm": 0.1572265625, "learning_rate": 0.00019085588997118927, "loss": 1.1645, "step": 3345 }, { "epoch": 0.2238107963655799, "grad_norm": 0.1865234375, "learning_rate": 0.00019080711536351676, "loss": 1.2397, "step": 3350 }, { "epoch": 0.22414484233030466, "grad_norm": 0.1572265625, "learning_rate": 0.00019075821728628087, "loss": 1.2215, "step": 3355 }, { "epoch": 0.2244788882950294, "grad_norm": 0.1767578125, "learning_rate": 0.00019070919580596783, "loss": 1.242, "step": 3360 }, { "epoch": 0.22481293425975415, "grad_norm": 0.19921875, "learning_rate": 0.00019066005098923168, "loss": 1.2694, "step": 3365 }, { "epoch": 0.22514698022447888, "grad_norm": 0.15625, "learning_rate": 0.00019061078290289415, "loss": 1.2075, "step": 3370 }, { "epoch": 0.22548102618920363, "grad_norm": 0.1630859375, "learning_rate": 0.00019056139161394462, "loss": 1.2271, "step": 3375 }, { "epoch": 0.22581507215392838, "grad_norm": 0.177734375, "learning_rate": 0.00019051187718953992, "loss": 1.2696, "step": 3380 }, { "epoch": 0.22614911811865313, "grad_norm": 0.166015625, "learning_rate": 0.00019046223969700434, "loss": 1.2897, "step": 3385 }, { "epoch": 0.22648316408337787, "grad_norm": 0.18359375, "learning_rate": 0.0001904124792038295, "loss": 1.1922, "step": 3390 }, { "epoch": 0.22681721004810262, "grad_norm": 0.16015625, "learning_rate": 0.00019036259577767426, "loss": 1.2858, "step": 3395 }, { "epoch": 0.22715125601282737, "grad_norm": 0.1884765625, "learning_rate": 0.00019031258948636466, "loss": 1.1695, "step": 3400 }, { "epoch": 0.22748530197755212, "grad_norm": 0.150390625, "learning_rate": 0.00019026246039789376, "loss": 1.1508, "step": 3405 }, { "epoch": 0.22781934794227685, "grad_norm": 0.16015625, "learning_rate": 0.0001902122085804216, "loss": 1.1518, "step": 3410 }, { "epoch": 0.2281533939070016, "grad_norm": 0.1640625, "learning_rate": 0.0001901618341022751, "loss": 1.213, "step": 3415 }, { "epoch": 0.22848743987172634, "grad_norm": 0.162109375, "learning_rate": 0.00019011133703194797, "loss": 1.2272, "step": 3420 }, { "epoch": 0.2288214858364511, "grad_norm": 0.1630859375, "learning_rate": 0.00019006071743810055, "loss": 1.2391, "step": 3425 }, { "epoch": 0.22915553180117584, "grad_norm": 0.16796875, "learning_rate": 0.00019000997538955985, "loss": 1.2277, "step": 3430 }, { "epoch": 0.2294895777659006, "grad_norm": 0.150390625, "learning_rate": 0.0001899591109553193, "loss": 1.2127, "step": 3435 }, { "epoch": 0.22982362373062534, "grad_norm": 0.2236328125, "learning_rate": 0.00018990812420453885, "loss": 1.2484, "step": 3440 }, { "epoch": 0.2301576696953501, "grad_norm": 0.1708984375, "learning_rate": 0.00018985701520654466, "loss": 1.262, "step": 3445 }, { "epoch": 0.23049171566007481, "grad_norm": 0.1845703125, "learning_rate": 0.00018980578403082917, "loss": 1.1882, "step": 3450 }, { "epoch": 0.23082576162479956, "grad_norm": 0.1748046875, "learning_rate": 0.00018975443074705086, "loss": 1.217, "step": 3455 }, { "epoch": 0.2311598075895243, "grad_norm": 0.1669921875, "learning_rate": 0.00018970295542503434, "loss": 1.2297, "step": 3460 }, { "epoch": 0.23149385355424906, "grad_norm": 0.154296875, "learning_rate": 0.0001896513581347701, "loss": 1.1437, "step": 3465 }, { "epoch": 0.2318278995189738, "grad_norm": 0.171875, "learning_rate": 0.0001895996389464145, "loss": 1.2221, "step": 3470 }, { "epoch": 0.23216194548369856, "grad_norm": 0.16796875, "learning_rate": 0.0001895477979302896, "loss": 1.2822, "step": 3475 }, { "epoch": 0.2324959914484233, "grad_norm": 0.17578125, "learning_rate": 0.00018949583515688313, "loss": 1.2558, "step": 3480 }, { "epoch": 0.23283003741314806, "grad_norm": 0.1591796875, "learning_rate": 0.0001894437506968484, "loss": 1.2, "step": 3485 }, { "epoch": 0.23316408337787278, "grad_norm": 0.1611328125, "learning_rate": 0.00018939154462100418, "loss": 1.1973, "step": 3490 }, { "epoch": 0.23349812934259753, "grad_norm": 0.171875, "learning_rate": 0.0001893392170003345, "loss": 1.1924, "step": 3495 }, { "epoch": 0.23383217530732228, "grad_norm": 0.166015625, "learning_rate": 0.0001892867679059887, "loss": 1.2833, "step": 3500 }, { "epoch": 0.23416622127204703, "grad_norm": 0.173828125, "learning_rate": 0.0001892341974092814, "loss": 1.2072, "step": 3505 }, { "epoch": 0.23450026723677178, "grad_norm": 0.1611328125, "learning_rate": 0.00018918150558169217, "loss": 1.2041, "step": 3510 }, { "epoch": 0.23483431320149653, "grad_norm": 0.162109375, "learning_rate": 0.00018912869249486556, "loss": 1.1624, "step": 3515 }, { "epoch": 0.23516835916622128, "grad_norm": 0.18359375, "learning_rate": 0.000189075758220611, "loss": 1.2527, "step": 3520 }, { "epoch": 0.23550240513094603, "grad_norm": 0.173828125, "learning_rate": 0.0001890227028309028, "loss": 1.0995, "step": 3525 }, { "epoch": 0.23583645109567075, "grad_norm": 0.181640625, "learning_rate": 0.00018896952639787978, "loss": 1.2456, "step": 3530 }, { "epoch": 0.2361704970603955, "grad_norm": 0.1533203125, "learning_rate": 0.00018891622899384544, "loss": 1.19, "step": 3535 }, { "epoch": 0.23650454302512025, "grad_norm": 0.162109375, "learning_rate": 0.0001888628106912678, "loss": 1.2165, "step": 3540 }, { "epoch": 0.236838588989845, "grad_norm": 0.1650390625, "learning_rate": 0.00018880927156277914, "loss": 1.1449, "step": 3545 }, { "epoch": 0.23717263495456975, "grad_norm": 0.1640625, "learning_rate": 0.00018875561168117617, "loss": 1.2019, "step": 3550 }, { "epoch": 0.2375066809192945, "grad_norm": 0.162109375, "learning_rate": 0.00018870183111941965, "loss": 1.2256, "step": 3555 }, { "epoch": 0.23784072688401925, "grad_norm": 0.16015625, "learning_rate": 0.00018864792995063455, "loss": 1.141, "step": 3560 }, { "epoch": 0.238174772848744, "grad_norm": 0.16796875, "learning_rate": 0.00018859390824810975, "loss": 1.1647, "step": 3565 }, { "epoch": 0.23850881881346872, "grad_norm": 0.1640625, "learning_rate": 0.00018853976608529803, "loss": 1.2427, "step": 3570 }, { "epoch": 0.23884286477819347, "grad_norm": 0.1708984375, "learning_rate": 0.00018848550353581597, "loss": 1.2271, "step": 3575 }, { "epoch": 0.23917691074291822, "grad_norm": 0.166015625, "learning_rate": 0.00018843112067344387, "loss": 1.1665, "step": 3580 }, { "epoch": 0.23951095670764297, "grad_norm": 0.158203125, "learning_rate": 0.00018837661757212555, "loss": 1.1786, "step": 3585 }, { "epoch": 0.23984500267236772, "grad_norm": 0.162109375, "learning_rate": 0.00018832199430596835, "loss": 1.2068, "step": 3590 }, { "epoch": 0.24017904863709247, "grad_norm": 0.1767578125, "learning_rate": 0.000188267250949243, "loss": 1.2176, "step": 3595 }, { "epoch": 0.24051309460181722, "grad_norm": 0.1494140625, "learning_rate": 0.00018821238757638358, "loss": 1.2054, "step": 3600 }, { "epoch": 0.24084714056654197, "grad_norm": 0.1650390625, "learning_rate": 0.00018815740426198724, "loss": 1.1923, "step": 3605 }, { "epoch": 0.2411811865312667, "grad_norm": 0.1767578125, "learning_rate": 0.00018810230108081425, "loss": 1.18, "step": 3610 }, { "epoch": 0.24151523249599144, "grad_norm": 0.169921875, "learning_rate": 0.00018804707810778792, "loss": 1.1351, "step": 3615 }, { "epoch": 0.2418492784607162, "grad_norm": 0.166015625, "learning_rate": 0.00018799173541799436, "loss": 1.2466, "step": 3620 }, { "epoch": 0.24218332442544094, "grad_norm": 0.1640625, "learning_rate": 0.00018793627308668248, "loss": 1.2114, "step": 3625 }, { "epoch": 0.2425173703901657, "grad_norm": 0.16796875, "learning_rate": 0.00018788069118926397, "loss": 1.216, "step": 3630 }, { "epoch": 0.24285141635489044, "grad_norm": 0.1640625, "learning_rate": 0.0001878249898013129, "loss": 1.2487, "step": 3635 }, { "epoch": 0.2431854623196152, "grad_norm": 0.1630859375, "learning_rate": 0.00018776916899856594, "loss": 1.2802, "step": 3640 }, { "epoch": 0.24351950828433994, "grad_norm": 0.154296875, "learning_rate": 0.00018771322885692213, "loss": 1.2153, "step": 3645 }, { "epoch": 0.24385355424906466, "grad_norm": 0.173828125, "learning_rate": 0.00018765716945244273, "loss": 1.1273, "step": 3650 }, { "epoch": 0.2441876002137894, "grad_norm": 0.173828125, "learning_rate": 0.00018760099086135115, "loss": 1.1616, "step": 3655 }, { "epoch": 0.24452164617851416, "grad_norm": 0.1640625, "learning_rate": 0.00018754469316003292, "loss": 1.2522, "step": 3660 }, { "epoch": 0.2448556921432389, "grad_norm": 0.1689453125, "learning_rate": 0.00018748827642503545, "loss": 1.2216, "step": 3665 }, { "epoch": 0.24518973810796366, "grad_norm": 0.162109375, "learning_rate": 0.0001874317407330681, "loss": 1.1305, "step": 3670 }, { "epoch": 0.2455237840726884, "grad_norm": 0.18359375, "learning_rate": 0.0001873750861610018, "loss": 1.179, "step": 3675 }, { "epoch": 0.24585783003741316, "grad_norm": 0.169921875, "learning_rate": 0.00018731831278586932, "loss": 1.2022, "step": 3680 }, { "epoch": 0.2461918760021379, "grad_norm": 0.169921875, "learning_rate": 0.0001872614206848648, "loss": 1.1367, "step": 3685 }, { "epoch": 0.24652592196686263, "grad_norm": 0.1669921875, "learning_rate": 0.00018720440993534395, "loss": 1.1631, "step": 3690 }, { "epoch": 0.24685996793158738, "grad_norm": 0.185546875, "learning_rate": 0.0001871472806148237, "loss": 1.2256, "step": 3695 }, { "epoch": 0.24719401389631213, "grad_norm": 0.158203125, "learning_rate": 0.00018709003280098225, "loss": 1.2209, "step": 3700 }, { "epoch": 0.24752805986103688, "grad_norm": 0.16015625, "learning_rate": 0.0001870326665716589, "loss": 1.1896, "step": 3705 }, { "epoch": 0.24786210582576162, "grad_norm": 0.166015625, "learning_rate": 0.00018697518200485398, "loss": 1.2845, "step": 3710 }, { "epoch": 0.24819615179048637, "grad_norm": 0.1630859375, "learning_rate": 0.00018691757917872867, "loss": 1.1432, "step": 3715 }, { "epoch": 0.24853019775521112, "grad_norm": 0.1767578125, "learning_rate": 0.00018685985817160503, "loss": 1.265, "step": 3720 }, { "epoch": 0.24886424371993587, "grad_norm": 0.201171875, "learning_rate": 0.0001868020190619657, "loss": 1.2452, "step": 3725 }, { "epoch": 0.2491982896846606, "grad_norm": 0.1591796875, "learning_rate": 0.00018674406192845398, "loss": 1.2468, "step": 3730 }, { "epoch": 0.24953233564938535, "grad_norm": 0.1689453125, "learning_rate": 0.00018668598684987368, "loss": 1.1719, "step": 3735 }, { "epoch": 0.2498663816141101, "grad_norm": 0.1650390625, "learning_rate": 0.00018662779390518885, "loss": 1.1946, "step": 3740 }, { "epoch": 0.25020042757883487, "grad_norm": 0.166015625, "learning_rate": 0.00018656948317352393, "loss": 1.3211, "step": 3745 }, { "epoch": 0.2505344735435596, "grad_norm": 0.1708984375, "learning_rate": 0.00018651105473416345, "loss": 1.2501, "step": 3750 }, { "epoch": 0.2508685195082843, "grad_norm": 0.1650390625, "learning_rate": 0.000186452508666552, "loss": 1.2048, "step": 3755 }, { "epoch": 0.25120256547300907, "grad_norm": 0.1611328125, "learning_rate": 0.0001863938450502941, "loss": 1.2069, "step": 3760 }, { "epoch": 0.2515366114377338, "grad_norm": 0.1640625, "learning_rate": 0.00018633506396515407, "loss": 1.1784, "step": 3765 }, { "epoch": 0.25187065740245856, "grad_norm": 0.1728515625, "learning_rate": 0.00018627616549105606, "loss": 1.1697, "step": 3770 }, { "epoch": 0.2522047033671833, "grad_norm": 0.1611328125, "learning_rate": 0.00018621714970808374, "loss": 1.1564, "step": 3775 }, { "epoch": 0.25253874933190806, "grad_norm": 0.185546875, "learning_rate": 0.00018615801669648026, "loss": 1.1939, "step": 3780 }, { "epoch": 0.2528727952966328, "grad_norm": 0.169921875, "learning_rate": 0.00018609876653664825, "loss": 1.2784, "step": 3785 }, { "epoch": 0.25320684126135756, "grad_norm": 0.1650390625, "learning_rate": 0.00018603939930914956, "loss": 1.1739, "step": 3790 }, { "epoch": 0.2535408872260823, "grad_norm": 0.162109375, "learning_rate": 0.00018597991509470524, "loss": 1.2513, "step": 3795 }, { "epoch": 0.25387493319080706, "grad_norm": 0.154296875, "learning_rate": 0.00018592031397419545, "loss": 1.1615, "step": 3800 }, { "epoch": 0.2542089791555318, "grad_norm": 0.1640625, "learning_rate": 0.0001858605960286592, "loss": 1.2272, "step": 3805 }, { "epoch": 0.25454302512025656, "grad_norm": 0.1591796875, "learning_rate": 0.00018580076133929444, "loss": 1.2155, "step": 3810 }, { "epoch": 0.2548770710849813, "grad_norm": 0.1689453125, "learning_rate": 0.00018574080998745785, "loss": 1.2615, "step": 3815 }, { "epoch": 0.25521111704970606, "grad_norm": 0.1669921875, "learning_rate": 0.00018568074205466465, "loss": 1.2998, "step": 3820 }, { "epoch": 0.2555451630144308, "grad_norm": 0.15234375, "learning_rate": 0.00018562055762258862, "loss": 1.1872, "step": 3825 }, { "epoch": 0.25587920897915556, "grad_norm": 0.1796875, "learning_rate": 0.00018556025677306205, "loss": 1.195, "step": 3830 }, { "epoch": 0.25621325494388025, "grad_norm": 0.16015625, "learning_rate": 0.00018549983958807533, "loss": 1.1688, "step": 3835 }, { "epoch": 0.256547300908605, "grad_norm": 0.173828125, "learning_rate": 0.00018543930614977719, "loss": 1.2799, "step": 3840 }, { "epoch": 0.25688134687332975, "grad_norm": 0.1669921875, "learning_rate": 0.00018537865654047432, "loss": 1.1605, "step": 3845 }, { "epoch": 0.2572153928380545, "grad_norm": 0.1689453125, "learning_rate": 0.00018531789084263143, "loss": 1.2179, "step": 3850 }, { "epoch": 0.25754943880277925, "grad_norm": 0.1630859375, "learning_rate": 0.0001852570091388711, "loss": 1.1944, "step": 3855 }, { "epoch": 0.257883484767504, "grad_norm": 0.177734375, "learning_rate": 0.00018519601151197352, "loss": 1.2285, "step": 3860 }, { "epoch": 0.25821753073222875, "grad_norm": 0.1640625, "learning_rate": 0.00018513489804487666, "loss": 1.2673, "step": 3865 }, { "epoch": 0.2585515766969535, "grad_norm": 0.1796875, "learning_rate": 0.00018507366882067583, "loss": 1.2363, "step": 3870 }, { "epoch": 0.25888562266167825, "grad_norm": 0.154296875, "learning_rate": 0.00018501232392262385, "loss": 1.266, "step": 3875 }, { "epoch": 0.259219668626403, "grad_norm": 0.169921875, "learning_rate": 0.00018495086343413083, "loss": 1.2432, "step": 3880 }, { "epoch": 0.25955371459112775, "grad_norm": 0.1845703125, "learning_rate": 0.00018488928743876394, "loss": 1.2437, "step": 3885 }, { "epoch": 0.2598877605558525, "grad_norm": 0.16015625, "learning_rate": 0.00018482759602024752, "loss": 1.2451, "step": 3890 }, { "epoch": 0.26022180652057725, "grad_norm": 0.1689453125, "learning_rate": 0.0001847657892624628, "loss": 1.2399, "step": 3895 }, { "epoch": 0.260555852485302, "grad_norm": 0.1708984375, "learning_rate": 0.0001847038672494478, "loss": 1.2465, "step": 3900 }, { "epoch": 0.26088989845002675, "grad_norm": 0.154296875, "learning_rate": 0.0001846418300653973, "loss": 1.2469, "step": 3905 }, { "epoch": 0.2612239444147515, "grad_norm": 0.169921875, "learning_rate": 0.0001845796777946627, "loss": 1.1983, "step": 3910 }, { "epoch": 0.2615579903794762, "grad_norm": 0.1630859375, "learning_rate": 0.0001845174105217518, "loss": 1.1489, "step": 3915 }, { "epoch": 0.26189203634420094, "grad_norm": 0.1904296875, "learning_rate": 0.00018445502833132883, "loss": 1.1875, "step": 3920 }, { "epoch": 0.2622260823089257, "grad_norm": 0.1787109375, "learning_rate": 0.00018439253130821427, "loss": 1.1905, "step": 3925 }, { "epoch": 0.26256012827365044, "grad_norm": 0.16015625, "learning_rate": 0.0001843299195373847, "loss": 1.2525, "step": 3930 }, { "epoch": 0.2628941742383752, "grad_norm": 0.166015625, "learning_rate": 0.00018426719310397273, "loss": 1.215, "step": 3935 }, { "epoch": 0.26322822020309994, "grad_norm": 0.1669921875, "learning_rate": 0.0001842043520932669, "loss": 1.2433, "step": 3940 }, { "epoch": 0.2635622661678247, "grad_norm": 0.1611328125, "learning_rate": 0.0001841413965907116, "loss": 1.1854, "step": 3945 }, { "epoch": 0.26389631213254944, "grad_norm": 0.1650390625, "learning_rate": 0.00018407832668190671, "loss": 1.1176, "step": 3950 }, { "epoch": 0.2642303580972742, "grad_norm": 0.1630859375, "learning_rate": 0.00018401514245260783, "loss": 1.2202, "step": 3955 }, { "epoch": 0.26456440406199894, "grad_norm": 0.162109375, "learning_rate": 0.000183951843988726, "loss": 1.2002, "step": 3960 }, { "epoch": 0.2648984500267237, "grad_norm": 0.1591796875, "learning_rate": 0.00018388843137632748, "loss": 1.2431, "step": 3965 }, { "epoch": 0.26523249599144844, "grad_norm": 0.162109375, "learning_rate": 0.00018382490470163378, "loss": 1.1313, "step": 3970 }, { "epoch": 0.2655665419561732, "grad_norm": 0.16015625, "learning_rate": 0.00018376126405102153, "loss": 1.2414, "step": 3975 }, { "epoch": 0.26590058792089794, "grad_norm": 0.197265625, "learning_rate": 0.00018369750951102232, "loss": 1.2146, "step": 3980 }, { "epoch": 0.2662346338856227, "grad_norm": 0.1796875, "learning_rate": 0.00018363364116832256, "loss": 1.1797, "step": 3985 }, { "epoch": 0.26656867985034743, "grad_norm": 0.162109375, "learning_rate": 0.00018356965910976348, "loss": 1.236, "step": 3990 }, { "epoch": 0.26690272581507213, "grad_norm": 0.1552734375, "learning_rate": 0.00018350556342234078, "loss": 1.1745, "step": 3995 }, { "epoch": 0.2672367717797969, "grad_norm": 0.1640625, "learning_rate": 0.00018344135419320483, "loss": 1.1889, "step": 4000 }, { "epoch": 0.2675708177445216, "grad_norm": 0.16015625, "learning_rate": 0.00018337703150966025, "loss": 1.2263, "step": 4005 }, { "epoch": 0.2679048637092464, "grad_norm": 0.15625, "learning_rate": 0.000183312595459166, "loss": 1.1455, "step": 4010 }, { "epoch": 0.2682389096739711, "grad_norm": 0.171875, "learning_rate": 0.0001832480461293352, "loss": 1.1681, "step": 4015 }, { "epoch": 0.2685729556386959, "grad_norm": 0.169921875, "learning_rate": 0.0001831833836079349, "loss": 1.2668, "step": 4020 }, { "epoch": 0.2689070016034206, "grad_norm": 0.1650390625, "learning_rate": 0.00018311860798288609, "loss": 1.2165, "step": 4025 }, { "epoch": 0.2692410475681454, "grad_norm": 0.16796875, "learning_rate": 0.00018305371934226362, "loss": 1.2629, "step": 4030 }, { "epoch": 0.2695750935328701, "grad_norm": 0.177734375, "learning_rate": 0.00018298871777429595, "loss": 1.1995, "step": 4035 }, { "epoch": 0.2699091394975949, "grad_norm": 0.16015625, "learning_rate": 0.00018292360336736506, "loss": 1.1677, "step": 4040 }, { "epoch": 0.2702431854623196, "grad_norm": 0.1640625, "learning_rate": 0.00018285837621000636, "loss": 1.2907, "step": 4045 }, { "epoch": 0.2705772314270444, "grad_norm": 0.1689453125, "learning_rate": 0.00018279303639090865, "loss": 1.2465, "step": 4050 }, { "epoch": 0.2709112773917691, "grad_norm": 0.1689453125, "learning_rate": 0.00018272758399891383, "loss": 1.1909, "step": 4055 }, { "epoch": 0.2712453233564939, "grad_norm": 0.1591796875, "learning_rate": 0.00018266201912301688, "loss": 1.2178, "step": 4060 }, { "epoch": 0.2715793693212186, "grad_norm": 0.173828125, "learning_rate": 0.00018259634185236574, "loss": 1.2175, "step": 4065 }, { "epoch": 0.27191341528594337, "grad_norm": 0.169921875, "learning_rate": 0.00018253055227626116, "loss": 1.1551, "step": 4070 }, { "epoch": 0.27224746125066807, "grad_norm": 0.1669921875, "learning_rate": 0.00018246465048415663, "loss": 1.1781, "step": 4075 }, { "epoch": 0.2725815072153928, "grad_norm": 0.1748046875, "learning_rate": 0.00018239863656565813, "loss": 1.2389, "step": 4080 }, { "epoch": 0.27291555318011756, "grad_norm": 0.1640625, "learning_rate": 0.00018233251061052421, "loss": 1.3091, "step": 4085 }, { "epoch": 0.2732495991448423, "grad_norm": 0.16015625, "learning_rate": 0.00018226627270866562, "loss": 1.2638, "step": 4090 }, { "epoch": 0.27358364510956706, "grad_norm": 0.1767578125, "learning_rate": 0.00018219992295014548, "loss": 1.194, "step": 4095 }, { "epoch": 0.2739176910742918, "grad_norm": 0.166015625, "learning_rate": 0.00018213346142517884, "loss": 1.2313, "step": 4100 }, { "epoch": 0.27425173703901656, "grad_norm": 0.1591796875, "learning_rate": 0.00018206688822413288, "loss": 1.1716, "step": 4105 }, { "epoch": 0.2745857830037413, "grad_norm": 0.158203125, "learning_rate": 0.00018200020343752646, "loss": 1.2846, "step": 4110 }, { "epoch": 0.27491982896846606, "grad_norm": 0.1611328125, "learning_rate": 0.00018193340715603033, "loss": 1.154, "step": 4115 }, { "epoch": 0.2752538749331908, "grad_norm": 0.1748046875, "learning_rate": 0.00018186649947046668, "loss": 1.211, "step": 4120 }, { "epoch": 0.27558792089791556, "grad_norm": 0.169921875, "learning_rate": 0.0001817994804718093, "loss": 1.2262, "step": 4125 }, { "epoch": 0.2759219668626403, "grad_norm": 0.16796875, "learning_rate": 0.00018173235025118324, "loss": 1.2007, "step": 4130 }, { "epoch": 0.27625601282736506, "grad_norm": 0.162109375, "learning_rate": 0.0001816651088998649, "loss": 1.228, "step": 4135 }, { "epoch": 0.2765900587920898, "grad_norm": 0.1640625, "learning_rate": 0.00018159775650928155, "loss": 1.2326, "step": 4140 }, { "epoch": 0.27692410475681456, "grad_norm": 0.1689453125, "learning_rate": 0.0001815302931710117, "loss": 1.1988, "step": 4145 }, { "epoch": 0.2772581507215393, "grad_norm": 0.1708984375, "learning_rate": 0.00018146271897678457, "loss": 1.2058, "step": 4150 }, { "epoch": 0.277592196686264, "grad_norm": 0.1708984375, "learning_rate": 0.00018139503401848017, "loss": 1.3223, "step": 4155 }, { "epoch": 0.27792624265098875, "grad_norm": 0.1630859375, "learning_rate": 0.00018132723838812907, "loss": 1.2199, "step": 4160 }, { "epoch": 0.2782602886157135, "grad_norm": 0.2080078125, "learning_rate": 0.00018125933217791234, "loss": 1.2513, "step": 4165 }, { "epoch": 0.27859433458043825, "grad_norm": 0.16015625, "learning_rate": 0.00018119131548016137, "loss": 1.2097, "step": 4170 }, { "epoch": 0.278928380545163, "grad_norm": 0.19140625, "learning_rate": 0.00018112318838735787, "loss": 1.1601, "step": 4175 }, { "epoch": 0.27926242650988775, "grad_norm": 0.15625, "learning_rate": 0.00018105495099213353, "loss": 1.2154, "step": 4180 }, { "epoch": 0.2795964724746125, "grad_norm": 0.166015625, "learning_rate": 0.00018098660338727017, "loss": 1.1971, "step": 4185 }, { "epoch": 0.27993051843933725, "grad_norm": 0.1708984375, "learning_rate": 0.0001809181456656993, "loss": 1.1371, "step": 4190 }, { "epoch": 0.280264564404062, "grad_norm": 0.1708984375, "learning_rate": 0.00018084957792050224, "loss": 1.2547, "step": 4195 }, { "epoch": 0.28059861036878675, "grad_norm": 0.185546875, "learning_rate": 0.0001807809002449099, "loss": 1.201, "step": 4200 }, { "epoch": 0.2809326563335115, "grad_norm": 0.16796875, "learning_rate": 0.00018071211273230263, "loss": 1.2014, "step": 4205 }, { "epoch": 0.28126670229823625, "grad_norm": 0.1669921875, "learning_rate": 0.00018064321547621022, "loss": 1.2495, "step": 4210 }, { "epoch": 0.281600748262961, "grad_norm": 0.1689453125, "learning_rate": 0.00018057420857031157, "loss": 1.1429, "step": 4215 }, { "epoch": 0.28193479422768575, "grad_norm": 0.173828125, "learning_rate": 0.0001805050921084347, "loss": 1.1956, "step": 4220 }, { "epoch": 0.2822688401924105, "grad_norm": 0.1708984375, "learning_rate": 0.0001804358661845566, "loss": 1.1969, "step": 4225 }, { "epoch": 0.28260288615713525, "grad_norm": 0.1640625, "learning_rate": 0.00018036653089280308, "loss": 1.1448, "step": 4230 }, { "epoch": 0.28293693212185994, "grad_norm": 0.1806640625, "learning_rate": 0.00018029708632744871, "loss": 1.2246, "step": 4235 }, { "epoch": 0.2832709780865847, "grad_norm": 0.169921875, "learning_rate": 0.00018022753258291658, "loss": 1.2453, "step": 4240 }, { "epoch": 0.28360502405130944, "grad_norm": 0.1533203125, "learning_rate": 0.00018015786975377824, "loss": 1.1488, "step": 4245 }, { "epoch": 0.2839390700160342, "grad_norm": 0.1708984375, "learning_rate": 0.00018008809793475358, "loss": 1.1986, "step": 4250 }, { "epoch": 0.28427311598075894, "grad_norm": 0.1708984375, "learning_rate": 0.00018001821722071068, "loss": 1.2035, "step": 4255 }, { "epoch": 0.2846071619454837, "grad_norm": 0.1669921875, "learning_rate": 0.00017994822770666565, "loss": 1.2267, "step": 4260 }, { "epoch": 0.28494120791020844, "grad_norm": 0.18359375, "learning_rate": 0.0001798781294877826, "loss": 1.2708, "step": 4265 }, { "epoch": 0.2852752538749332, "grad_norm": 0.166015625, "learning_rate": 0.00017980792265937336, "loss": 1.2181, "step": 4270 }, { "epoch": 0.28560929983965794, "grad_norm": 0.1669921875, "learning_rate": 0.00017973760731689753, "loss": 1.2263, "step": 4275 }, { "epoch": 0.2859433458043827, "grad_norm": 0.2236328125, "learning_rate": 0.00017966718355596218, "loss": 1.1812, "step": 4280 }, { "epoch": 0.28627739176910744, "grad_norm": 0.1708984375, "learning_rate": 0.00017959665147232177, "loss": 1.1684, "step": 4285 }, { "epoch": 0.2866114377338322, "grad_norm": 0.1630859375, "learning_rate": 0.00017952601116187823, "loss": 1.1948, "step": 4290 }, { "epoch": 0.28694548369855694, "grad_norm": 0.166015625, "learning_rate": 0.00017945526272068038, "loss": 1.2083, "step": 4295 }, { "epoch": 0.2872795296632817, "grad_norm": 0.1806640625, "learning_rate": 0.00017938440624492427, "loss": 1.2976, "step": 4300 }, { "epoch": 0.28761357562800643, "grad_norm": 0.1708984375, "learning_rate": 0.00017931344183095272, "loss": 1.1737, "step": 4305 }, { "epoch": 0.2879476215927312, "grad_norm": 0.1708984375, "learning_rate": 0.00017924236957525544, "loss": 1.3016, "step": 4310 }, { "epoch": 0.2882816675574559, "grad_norm": 0.16796875, "learning_rate": 0.00017917118957446864, "loss": 1.183, "step": 4315 }, { "epoch": 0.28861571352218063, "grad_norm": 0.1611328125, "learning_rate": 0.00017909990192537504, "loss": 1.2075, "step": 4320 }, { "epoch": 0.2889497594869054, "grad_norm": 0.166015625, "learning_rate": 0.00017902850672490387, "loss": 1.1567, "step": 4325 }, { "epoch": 0.2892838054516301, "grad_norm": 0.1669921875, "learning_rate": 0.00017895700407013045, "loss": 1.1603, "step": 4330 }, { "epoch": 0.2896178514163549, "grad_norm": 0.171875, "learning_rate": 0.00017888539405827624, "loss": 1.1885, "step": 4335 }, { "epoch": 0.2899518973810796, "grad_norm": 0.16015625, "learning_rate": 0.0001788136767867087, "loss": 1.2023, "step": 4340 }, { "epoch": 0.2902859433458044, "grad_norm": 0.171875, "learning_rate": 0.00017874185235294113, "loss": 1.2503, "step": 4345 }, { "epoch": 0.2906199893105291, "grad_norm": 0.171875, "learning_rate": 0.0001786699208546325, "loss": 1.229, "step": 4350 }, { "epoch": 0.2909540352752539, "grad_norm": 0.1748046875, "learning_rate": 0.00017859788238958738, "loss": 1.2023, "step": 4355 }, { "epoch": 0.2912880812399786, "grad_norm": 0.154296875, "learning_rate": 0.00017852573705575583, "loss": 1.2153, "step": 4360 }, { "epoch": 0.2916221272047034, "grad_norm": 0.173828125, "learning_rate": 0.00017845348495123308, "loss": 1.1974, "step": 4365 }, { "epoch": 0.2919561731694281, "grad_norm": 0.1591796875, "learning_rate": 0.00017838112617425968, "loss": 1.1569, "step": 4370 }, { "epoch": 0.2922902191341529, "grad_norm": 0.1650390625, "learning_rate": 0.00017830866082322116, "loss": 1.2243, "step": 4375 }, { "epoch": 0.2926242650988776, "grad_norm": 0.166015625, "learning_rate": 0.00017823608899664796, "loss": 1.2616, "step": 4380 }, { "epoch": 0.2929583110636024, "grad_norm": 0.173828125, "learning_rate": 0.0001781634107932153, "loss": 1.2427, "step": 4385 }, { "epoch": 0.2932923570283271, "grad_norm": 0.1669921875, "learning_rate": 0.000178090626311743, "loss": 1.2594, "step": 4390 }, { "epoch": 0.29362640299305187, "grad_norm": 0.162109375, "learning_rate": 0.0001780177356511955, "loss": 1.1679, "step": 4395 }, { "epoch": 0.29396044895777657, "grad_norm": 0.16796875, "learning_rate": 0.00017794473891068142, "loss": 1.1783, "step": 4400 }, { "epoch": 0.2942944949225013, "grad_norm": 0.17578125, "learning_rate": 0.0001778716361894538, "loss": 1.2501, "step": 4405 }, { "epoch": 0.29462854088722606, "grad_norm": 0.27734375, "learning_rate": 0.00017779842758690973, "loss": 1.2928, "step": 4410 }, { "epoch": 0.2949625868519508, "grad_norm": 0.1640625, "learning_rate": 0.00017772511320259023, "loss": 1.1966, "step": 4415 }, { "epoch": 0.29529663281667556, "grad_norm": 0.162109375, "learning_rate": 0.0001776516931361801, "loss": 1.28, "step": 4420 }, { "epoch": 0.2956306787814003, "grad_norm": 0.1689453125, "learning_rate": 0.00017757816748750798, "loss": 1.1887, "step": 4425 }, { "epoch": 0.29596472474612506, "grad_norm": 0.1689453125, "learning_rate": 0.00017750453635654591, "loss": 1.2065, "step": 4430 }, { "epoch": 0.2962987707108498, "grad_norm": 0.173828125, "learning_rate": 0.0001774307998434095, "loss": 1.2641, "step": 4435 }, { "epoch": 0.29663281667557456, "grad_norm": 0.16015625, "learning_rate": 0.00017735695804835757, "loss": 1.1513, "step": 4440 }, { "epoch": 0.2969668626402993, "grad_norm": 0.1611328125, "learning_rate": 0.0001772830110717921, "loss": 1.2108, "step": 4445 }, { "epoch": 0.29730090860502406, "grad_norm": 0.1650390625, "learning_rate": 0.00017720895901425805, "loss": 1.1339, "step": 4450 }, { "epoch": 0.2976349545697488, "grad_norm": 0.162109375, "learning_rate": 0.0001771348019764433, "loss": 1.1995, "step": 4455 }, { "epoch": 0.29796900053447356, "grad_norm": 0.1728515625, "learning_rate": 0.0001770605400591785, "loss": 1.2197, "step": 4460 }, { "epoch": 0.2983030464991983, "grad_norm": 0.16015625, "learning_rate": 0.00017698617336343685, "loss": 1.226, "step": 4465 }, { "epoch": 0.29863709246392306, "grad_norm": 0.208984375, "learning_rate": 0.000176911701990334, "loss": 1.1742, "step": 4470 }, { "epoch": 0.2989711384286478, "grad_norm": 0.1640625, "learning_rate": 0.00017683712604112798, "loss": 1.2294, "step": 4475 }, { "epoch": 0.2993051843933725, "grad_norm": 0.1669921875, "learning_rate": 0.00017676244561721905, "loss": 1.1323, "step": 4480 }, { "epoch": 0.29963923035809725, "grad_norm": 0.169921875, "learning_rate": 0.00017668766082014936, "loss": 1.1976, "step": 4485 }, { "epoch": 0.299973276322822, "grad_norm": 0.1572265625, "learning_rate": 0.00017661277175160314, "loss": 1.3004, "step": 4490 }, { "epoch": 0.30030732228754675, "grad_norm": 0.1591796875, "learning_rate": 0.00017653777851340635, "loss": 1.2209, "step": 4495 }, { "epoch": 0.3006413682522715, "grad_norm": 0.169921875, "learning_rate": 0.0001764626812075266, "loss": 1.3061, "step": 4500 }, { "epoch": 0.30097541421699625, "grad_norm": 0.1591796875, "learning_rate": 0.00017638747993607286, "loss": 1.2313, "step": 4505 }, { "epoch": 0.301309460181721, "grad_norm": 0.166015625, "learning_rate": 0.00017631217480129573, "loss": 1.2196, "step": 4510 }, { "epoch": 0.30164350614644575, "grad_norm": 0.16796875, "learning_rate": 0.00017623676590558675, "loss": 1.2387, "step": 4515 }, { "epoch": 0.3019775521111705, "grad_norm": 0.1884765625, "learning_rate": 0.00017616125335147875, "loss": 1.1669, "step": 4520 }, { "epoch": 0.30231159807589525, "grad_norm": 0.1845703125, "learning_rate": 0.00017608563724164536, "loss": 1.2865, "step": 4525 }, { "epoch": 0.30264564404062, "grad_norm": 0.162109375, "learning_rate": 0.0001760099176789012, "loss": 1.191, "step": 4530 }, { "epoch": 0.30297969000534475, "grad_norm": 0.1669921875, "learning_rate": 0.00017593409476620127, "loss": 1.2484, "step": 4535 }, { "epoch": 0.3033137359700695, "grad_norm": 0.166015625, "learning_rate": 0.00017585816860664135, "loss": 1.2039, "step": 4540 }, { "epoch": 0.30364778193479425, "grad_norm": 0.197265625, "learning_rate": 0.00017578213930345753, "loss": 1.2493, "step": 4545 }, { "epoch": 0.303981827899519, "grad_norm": 0.1943359375, "learning_rate": 0.0001757060069600261, "loss": 1.1464, "step": 4550 }, { "epoch": 0.30431587386424375, "grad_norm": 0.1650390625, "learning_rate": 0.00017562977167986344, "loss": 1.2524, "step": 4555 }, { "epoch": 0.30464991982896844, "grad_norm": 0.1767578125, "learning_rate": 0.00017555343356662597, "loss": 1.1804, "step": 4560 }, { "epoch": 0.3049839657936932, "grad_norm": 0.171875, "learning_rate": 0.00017547699272410988, "loss": 1.1869, "step": 4565 }, { "epoch": 0.30531801175841794, "grad_norm": 0.16015625, "learning_rate": 0.00017540044925625102, "loss": 1.1751, "step": 4570 }, { "epoch": 0.3056520577231427, "grad_norm": 0.1630859375, "learning_rate": 0.00017532380326712487, "loss": 1.2452, "step": 4575 }, { "epoch": 0.30598610368786744, "grad_norm": 0.1708984375, "learning_rate": 0.0001752470548609462, "loss": 1.2203, "step": 4580 }, { "epoch": 0.3063201496525922, "grad_norm": 0.171875, "learning_rate": 0.00017517020414206913, "loss": 1.2208, "step": 4585 }, { "epoch": 0.30665419561731694, "grad_norm": 0.1650390625, "learning_rate": 0.00017509325121498677, "loss": 1.1297, "step": 4590 }, { "epoch": 0.3069882415820417, "grad_norm": 0.1650390625, "learning_rate": 0.00017501619618433135, "loss": 1.2869, "step": 4595 }, { "epoch": 0.30732228754676644, "grad_norm": 0.166015625, "learning_rate": 0.00017493903915487377, "loss": 1.2867, "step": 4600 }, { "epoch": 0.3076563335114912, "grad_norm": 0.166015625, "learning_rate": 0.0001748617802315238, "loss": 1.1794, "step": 4605 }, { "epoch": 0.30799037947621594, "grad_norm": 0.154296875, "learning_rate": 0.0001747844195193296, "loss": 1.1931, "step": 4610 }, { "epoch": 0.3083244254409407, "grad_norm": 0.1728515625, "learning_rate": 0.0001747069571234778, "loss": 1.1756, "step": 4615 }, { "epoch": 0.30865847140566544, "grad_norm": 0.1650390625, "learning_rate": 0.00017462939314929327, "loss": 1.2366, "step": 4620 }, { "epoch": 0.3089925173703902, "grad_norm": 0.1806640625, "learning_rate": 0.000174551727702239, "loss": 1.1803, "step": 4625 }, { "epoch": 0.30932656333511493, "grad_norm": 0.1875, "learning_rate": 0.00017447396088791597, "loss": 1.2106, "step": 4630 }, { "epoch": 0.3096606092998397, "grad_norm": 0.15625, "learning_rate": 0.00017439609281206297, "loss": 1.2554, "step": 4635 }, { "epoch": 0.3099946552645644, "grad_norm": 0.1845703125, "learning_rate": 0.00017431812358055645, "loss": 1.1544, "step": 4640 }, { "epoch": 0.31032870122928913, "grad_norm": 0.2021484375, "learning_rate": 0.00017424005329941047, "loss": 1.1944, "step": 4645 }, { "epoch": 0.3106627471940139, "grad_norm": 0.1708984375, "learning_rate": 0.00017416188207477638, "loss": 1.2195, "step": 4650 }, { "epoch": 0.3109967931587386, "grad_norm": 0.1708984375, "learning_rate": 0.00017408361001294292, "loss": 1.2621, "step": 4655 }, { "epoch": 0.3113308391234634, "grad_norm": 0.1630859375, "learning_rate": 0.0001740052372203358, "loss": 1.2301, "step": 4660 }, { "epoch": 0.3116648850881881, "grad_norm": 0.1630859375, "learning_rate": 0.00017392676380351775, "loss": 1.1779, "step": 4665 }, { "epoch": 0.3119989310529129, "grad_norm": 0.1689453125, "learning_rate": 0.00017384818986918837, "loss": 1.1954, "step": 4670 }, { "epoch": 0.3123329770176376, "grad_norm": 0.1923828125, "learning_rate": 0.00017376951552418386, "loss": 1.2073, "step": 4675 }, { "epoch": 0.3126670229823624, "grad_norm": 0.1669921875, "learning_rate": 0.00017369074087547696, "loss": 1.1381, "step": 4680 }, { "epoch": 0.3130010689470871, "grad_norm": 0.1728515625, "learning_rate": 0.00017361186603017685, "loss": 1.2583, "step": 4685 }, { "epoch": 0.3133351149118119, "grad_norm": 0.1748046875, "learning_rate": 0.00017353289109552883, "loss": 1.2078, "step": 4690 }, { "epoch": 0.3136691608765366, "grad_norm": 0.1796875, "learning_rate": 0.00017345381617891442, "loss": 1.2324, "step": 4695 }, { "epoch": 0.3140032068412614, "grad_norm": 0.171875, "learning_rate": 0.000173374641387851, "loss": 1.1969, "step": 4700 }, { "epoch": 0.3143372528059861, "grad_norm": 0.162109375, "learning_rate": 0.0001732953668299918, "loss": 1.1406, "step": 4705 }, { "epoch": 0.31467129877071087, "grad_norm": 0.1650390625, "learning_rate": 0.0001732159926131256, "loss": 1.1391, "step": 4710 }, { "epoch": 0.3150053447354356, "grad_norm": 0.1591796875, "learning_rate": 0.00017313651884517684, "loss": 1.1954, "step": 4715 }, { "epoch": 0.3153393907001603, "grad_norm": 0.171875, "learning_rate": 0.00017305694563420524, "loss": 1.218, "step": 4720 }, { "epoch": 0.31567343666488507, "grad_norm": 0.1640625, "learning_rate": 0.00017297727308840564, "loss": 1.2108, "step": 4725 }, { "epoch": 0.3160074826296098, "grad_norm": 0.16796875, "learning_rate": 0.00017289750131610813, "loss": 1.2214, "step": 4730 }, { "epoch": 0.31634152859433456, "grad_norm": 0.1591796875, "learning_rate": 0.00017281763042577763, "loss": 1.3101, "step": 4735 }, { "epoch": 0.3166755745590593, "grad_norm": 0.1650390625, "learning_rate": 0.00017273766052601378, "loss": 1.2593, "step": 4740 }, { "epoch": 0.31700962052378406, "grad_norm": 0.1728515625, "learning_rate": 0.00017265759172555085, "loss": 1.2348, "step": 4745 }, { "epoch": 0.3173436664885088, "grad_norm": 0.177734375, "learning_rate": 0.0001725774241332577, "loss": 1.1378, "step": 4750 }, { "epoch": 0.31767771245323356, "grad_norm": 0.15234375, "learning_rate": 0.00017249715785813737, "loss": 1.1839, "step": 4755 }, { "epoch": 0.3180117584179583, "grad_norm": 0.1728515625, "learning_rate": 0.00017241679300932717, "loss": 1.2049, "step": 4760 }, { "epoch": 0.31834580438268306, "grad_norm": 0.1650390625, "learning_rate": 0.00017233632969609842, "loss": 1.2061, "step": 4765 }, { "epoch": 0.3186798503474078, "grad_norm": 0.1865234375, "learning_rate": 0.00017225576802785636, "loss": 1.2253, "step": 4770 }, { "epoch": 0.31901389631213256, "grad_norm": 0.154296875, "learning_rate": 0.0001721751081141398, "loss": 1.2239, "step": 4775 }, { "epoch": 0.3193479422768573, "grad_norm": 0.1708984375, "learning_rate": 0.00017209435006462136, "loss": 1.1895, "step": 4780 }, { "epoch": 0.31968198824158206, "grad_norm": 0.1689453125, "learning_rate": 0.00017201349398910694, "loss": 1.3404, "step": 4785 }, { "epoch": 0.3200160342063068, "grad_norm": 0.16015625, "learning_rate": 0.00017193253999753575, "loss": 1.199, "step": 4790 }, { "epoch": 0.32035008017103156, "grad_norm": 0.171875, "learning_rate": 0.00017185148819998022, "loss": 1.2542, "step": 4795 }, { "epoch": 0.32068412613575625, "grad_norm": 0.169921875, "learning_rate": 0.0001717703387066456, "loss": 1.2918, "step": 4800 }, { "epoch": 0.321018172100481, "grad_norm": 0.16796875, "learning_rate": 0.00017168909162787016, "loss": 1.1468, "step": 4805 }, { "epoch": 0.32135221806520575, "grad_norm": 0.1728515625, "learning_rate": 0.00017160774707412476, "loss": 1.2845, "step": 4810 }, { "epoch": 0.3216862640299305, "grad_norm": 0.17578125, "learning_rate": 0.00017152630515601281, "loss": 1.1376, "step": 4815 }, { "epoch": 0.32202030999465525, "grad_norm": 0.1728515625, "learning_rate": 0.0001714447659842701, "loss": 1.2282, "step": 4820 }, { "epoch": 0.32235435595938, "grad_norm": 0.1689453125, "learning_rate": 0.00017136312966976465, "loss": 1.2559, "step": 4825 }, { "epoch": 0.32268840192410475, "grad_norm": 0.1552734375, "learning_rate": 0.00017128139632349658, "loss": 1.1685, "step": 4830 }, { "epoch": 0.3230224478888295, "grad_norm": 0.16015625, "learning_rate": 0.00017119956605659792, "loss": 1.1611, "step": 4835 }, { "epoch": 0.32335649385355425, "grad_norm": 0.16015625, "learning_rate": 0.0001711176389803325, "loss": 1.2007, "step": 4840 }, { "epoch": 0.323690539818279, "grad_norm": 0.1708984375, "learning_rate": 0.0001710356152060958, "loss": 1.1714, "step": 4845 }, { "epoch": 0.32402458578300375, "grad_norm": 0.17578125, "learning_rate": 0.00017095349484541478, "loss": 1.1959, "step": 4850 }, { "epoch": 0.3243586317477285, "grad_norm": 0.1669921875, "learning_rate": 0.00017087127800994767, "loss": 1.2585, "step": 4855 }, { "epoch": 0.32469267771245325, "grad_norm": 0.19140625, "learning_rate": 0.00017078896481148388, "loss": 1.1676, "step": 4860 }, { "epoch": 0.325026723677178, "grad_norm": 0.1708984375, "learning_rate": 0.00017070655536194397, "loss": 1.1425, "step": 4865 }, { "epoch": 0.32536076964190275, "grad_norm": 0.1689453125, "learning_rate": 0.00017062404977337918, "loss": 1.1537, "step": 4870 }, { "epoch": 0.3256948156066275, "grad_norm": 0.1708984375, "learning_rate": 0.00017054144815797164, "loss": 1.1753, "step": 4875 }, { "epoch": 0.3260288615713522, "grad_norm": 0.169921875, "learning_rate": 0.00017045875062803395, "loss": 1.2631, "step": 4880 }, { "epoch": 0.32636290753607694, "grad_norm": 0.154296875, "learning_rate": 0.00017037595729600913, "loss": 1.2179, "step": 4885 }, { "epoch": 0.3266969535008017, "grad_norm": 0.1591796875, "learning_rate": 0.00017029306827447049, "loss": 1.1933, "step": 4890 }, { "epoch": 0.32703099946552644, "grad_norm": 0.1845703125, "learning_rate": 0.00017021008367612144, "loss": 1.2329, "step": 4895 }, { "epoch": 0.3273650454302512, "grad_norm": 0.1708984375, "learning_rate": 0.00017012700361379533, "loss": 1.2341, "step": 4900 }, { "epoch": 0.32769909139497594, "grad_norm": 0.1708984375, "learning_rate": 0.00017004382820045533, "loss": 1.1853, "step": 4905 }, { "epoch": 0.3280331373597007, "grad_norm": 0.166015625, "learning_rate": 0.0001699605575491943, "loss": 1.1806, "step": 4910 }, { "epoch": 0.32836718332442544, "grad_norm": 0.1708984375, "learning_rate": 0.00016987719177323445, "loss": 1.2325, "step": 4915 }, { "epoch": 0.3287012292891502, "grad_norm": 0.16015625, "learning_rate": 0.0001697937309859275, "loss": 1.2619, "step": 4920 }, { "epoch": 0.32903527525387494, "grad_norm": 0.166015625, "learning_rate": 0.00016971017530075427, "loss": 1.2323, "step": 4925 }, { "epoch": 0.3293693212185997, "grad_norm": 0.265625, "learning_rate": 0.0001696265248313246, "loss": 1.2028, "step": 4930 }, { "epoch": 0.32970336718332444, "grad_norm": 0.1796875, "learning_rate": 0.00016954277969137723, "loss": 1.1464, "step": 4935 }, { "epoch": 0.3300374131480492, "grad_norm": 0.1865234375, "learning_rate": 0.00016945893999477965, "loss": 1.2066, "step": 4940 }, { "epoch": 0.33037145911277394, "grad_norm": 0.1796875, "learning_rate": 0.00016937500585552785, "loss": 1.1981, "step": 4945 }, { "epoch": 0.3307055050774987, "grad_norm": 0.1689453125, "learning_rate": 0.00016929097738774634, "loss": 1.1917, "step": 4950 }, { "epoch": 0.33103955104222343, "grad_norm": 0.1708984375, "learning_rate": 0.00016920685470568777, "loss": 1.1599, "step": 4955 }, { "epoch": 0.33137359700694813, "grad_norm": 0.1796875, "learning_rate": 0.00016912263792373295, "loss": 1.2997, "step": 4960 }, { "epoch": 0.3317076429716729, "grad_norm": 0.154296875, "learning_rate": 0.00016903832715639062, "loss": 1.1939, "step": 4965 }, { "epoch": 0.3320416889363976, "grad_norm": 0.171875, "learning_rate": 0.00016895392251829736, "loss": 1.1861, "step": 4970 }, { "epoch": 0.3323757349011224, "grad_norm": 0.158203125, "learning_rate": 0.00016886942412421734, "loss": 1.282, "step": 4975 }, { "epoch": 0.3327097808658471, "grad_norm": 0.1796875, "learning_rate": 0.00016878483208904217, "loss": 1.2393, "step": 4980 }, { "epoch": 0.3330438268305719, "grad_norm": 0.166015625, "learning_rate": 0.00016870014652779086, "loss": 1.2781, "step": 4985 }, { "epoch": 0.3333778727952966, "grad_norm": 0.1923828125, "learning_rate": 0.00016861536755560956, "loss": 1.242, "step": 4990 }, { "epoch": 0.3337119187600214, "grad_norm": 0.1611328125, "learning_rate": 0.0001685304952877714, "loss": 1.1841, "step": 4995 }, { "epoch": 0.3340459647247461, "grad_norm": 0.162109375, "learning_rate": 0.0001684455298396764, "loss": 1.2658, "step": 5000 }, { "epoch": 0.3343800106894709, "grad_norm": 0.1669921875, "learning_rate": 0.00016836047132685132, "loss": 1.3034, "step": 5005 }, { "epoch": 0.3347140566541956, "grad_norm": 0.1640625, "learning_rate": 0.0001682753198649493, "loss": 1.1442, "step": 5010 }, { "epoch": 0.3350481026189204, "grad_norm": 0.16796875, "learning_rate": 0.00016819007556975003, "loss": 1.2386, "step": 5015 }, { "epoch": 0.3353821485836451, "grad_norm": 0.1787109375, "learning_rate": 0.0001681047385571594, "loss": 1.3074, "step": 5020 }, { "epoch": 0.3357161945483699, "grad_norm": 0.169921875, "learning_rate": 0.0001680193089432092, "loss": 1.215, "step": 5025 }, { "epoch": 0.3360502405130946, "grad_norm": 0.1669921875, "learning_rate": 0.00016793378684405735, "loss": 1.2023, "step": 5030 }, { "epoch": 0.33638428647781937, "grad_norm": 0.16015625, "learning_rate": 0.00016784817237598744, "loss": 1.1826, "step": 5035 }, { "epoch": 0.3367183324425441, "grad_norm": 0.1845703125, "learning_rate": 0.0001677624656554086, "loss": 1.2126, "step": 5040 }, { "epoch": 0.3370523784072688, "grad_norm": 0.16015625, "learning_rate": 0.00016767666679885546, "loss": 1.2062, "step": 5045 }, { "epoch": 0.33738642437199357, "grad_norm": 0.1640625, "learning_rate": 0.00016759077592298788, "loss": 1.2673, "step": 5050 }, { "epoch": 0.3377204703367183, "grad_norm": 0.16796875, "learning_rate": 0.00016750479314459087, "loss": 1.2311, "step": 5055 }, { "epoch": 0.33805451630144306, "grad_norm": 0.1669921875, "learning_rate": 0.00016741871858057437, "loss": 1.1168, "step": 5060 }, { "epoch": 0.3383885622661678, "grad_norm": 0.1865234375, "learning_rate": 0.00016733255234797318, "loss": 1.1359, "step": 5065 }, { "epoch": 0.33872260823089256, "grad_norm": 0.171875, "learning_rate": 0.00016724629456394666, "loss": 1.1967, "step": 5070 }, { "epoch": 0.3390566541956173, "grad_norm": 0.169921875, "learning_rate": 0.0001671599453457787, "loss": 1.2424, "step": 5075 }, { "epoch": 0.33939070016034206, "grad_norm": 0.1611328125, "learning_rate": 0.00016707350481087754, "loss": 1.1751, "step": 5080 }, { "epoch": 0.3397247461250668, "grad_norm": 0.1728515625, "learning_rate": 0.00016698697307677548, "loss": 1.1932, "step": 5085 }, { "epoch": 0.34005879208979156, "grad_norm": 0.189453125, "learning_rate": 0.00016690035026112893, "loss": 1.2002, "step": 5090 }, { "epoch": 0.3403928380545163, "grad_norm": 0.1689453125, "learning_rate": 0.0001668136364817181, "loss": 1.2033, "step": 5095 }, { "epoch": 0.34072688401924106, "grad_norm": 0.189453125, "learning_rate": 0.0001667268318564469, "loss": 1.1139, "step": 5100 }, { "epoch": 0.3410609299839658, "grad_norm": 0.1611328125, "learning_rate": 0.0001666399365033427, "loss": 1.2557, "step": 5105 }, { "epoch": 0.34139497594869056, "grad_norm": 0.1669921875, "learning_rate": 0.00016655295054055633, "loss": 1.1648, "step": 5110 }, { "epoch": 0.3417290219134153, "grad_norm": 0.1884765625, "learning_rate": 0.0001664658740863617, "loss": 1.2161, "step": 5115 }, { "epoch": 0.34206306787814006, "grad_norm": 0.158203125, "learning_rate": 0.00016637870725915593, "loss": 1.2772, "step": 5120 }, { "epoch": 0.34239711384286475, "grad_norm": 0.1640625, "learning_rate": 0.00016629145017745878, "loss": 1.2373, "step": 5125 }, { "epoch": 0.3427311598075895, "grad_norm": 0.173828125, "learning_rate": 0.000166204102959913, "loss": 1.1672, "step": 5130 }, { "epoch": 0.34306520577231425, "grad_norm": 0.1787109375, "learning_rate": 0.00016611666572528372, "loss": 1.2734, "step": 5135 }, { "epoch": 0.343399251737039, "grad_norm": 0.20703125, "learning_rate": 0.00016602913859245847, "loss": 1.2255, "step": 5140 }, { "epoch": 0.34373329770176375, "grad_norm": 0.1826171875, "learning_rate": 0.0001659415216804471, "loss": 1.2051, "step": 5145 }, { "epoch": 0.3440673436664885, "grad_norm": 0.1787109375, "learning_rate": 0.00016585381510838144, "loss": 1.203, "step": 5150 }, { "epoch": 0.34440138963121325, "grad_norm": 0.166015625, "learning_rate": 0.00016576601899551534, "loss": 1.1811, "step": 5155 }, { "epoch": 0.344735435595938, "grad_norm": 0.1708984375, "learning_rate": 0.00016567813346122427, "loss": 1.2409, "step": 5160 }, { "epoch": 0.34506948156066275, "grad_norm": 0.162109375, "learning_rate": 0.00016559015862500538, "loss": 1.1553, "step": 5165 }, { "epoch": 0.3454035275253875, "grad_norm": 0.1806640625, "learning_rate": 0.00016550209460647718, "loss": 1.2214, "step": 5170 }, { "epoch": 0.34573757349011225, "grad_norm": 0.173828125, "learning_rate": 0.00016541394152537952, "loss": 1.2735, "step": 5175 }, { "epoch": 0.346071619454837, "grad_norm": 0.1669921875, "learning_rate": 0.00016532569950157325, "loss": 1.2076, "step": 5180 }, { "epoch": 0.34640566541956175, "grad_norm": 0.1708984375, "learning_rate": 0.00016523736865504021, "loss": 1.1431, "step": 5185 }, { "epoch": 0.3467397113842865, "grad_norm": 0.18359375, "learning_rate": 0.00016514894910588305, "loss": 1.2222, "step": 5190 }, { "epoch": 0.34707375734901125, "grad_norm": 0.1650390625, "learning_rate": 0.00016506044097432495, "loss": 1.1915, "step": 5195 }, { "epoch": 0.347407803313736, "grad_norm": 0.1708984375, "learning_rate": 0.00016497184438070956, "loss": 1.2848, "step": 5200 }, { "epoch": 0.3477418492784607, "grad_norm": 0.1845703125, "learning_rate": 0.0001648831594455008, "loss": 1.1968, "step": 5205 }, { "epoch": 0.34807589524318544, "grad_norm": 0.1982421875, "learning_rate": 0.00016479438628928277, "loss": 1.2443, "step": 5210 }, { "epoch": 0.3484099412079102, "grad_norm": 0.1650390625, "learning_rate": 0.00016470552503275947, "loss": 1.1879, "step": 5215 }, { "epoch": 0.34874398717263494, "grad_norm": 0.1650390625, "learning_rate": 0.0001646165757967547, "loss": 1.1934, "step": 5220 }, { "epoch": 0.3490780331373597, "grad_norm": 0.1640625, "learning_rate": 0.00016452753870221183, "loss": 1.2376, "step": 5225 }, { "epoch": 0.34941207910208444, "grad_norm": 0.1689453125, "learning_rate": 0.0001644384138701938, "loss": 1.2077, "step": 5230 }, { "epoch": 0.3497461250668092, "grad_norm": 0.1669921875, "learning_rate": 0.00016434920142188278, "loss": 1.2144, "step": 5235 }, { "epoch": 0.35008017103153394, "grad_norm": 0.16015625, "learning_rate": 0.00016425990147858003, "loss": 1.2423, "step": 5240 }, { "epoch": 0.3504142169962587, "grad_norm": 0.1728515625, "learning_rate": 0.00016417051416170594, "loss": 1.1769, "step": 5245 }, { "epoch": 0.35074826296098344, "grad_norm": 0.1640625, "learning_rate": 0.00016408103959279945, "loss": 1.1844, "step": 5250 }, { "epoch": 0.3510823089257082, "grad_norm": 0.169921875, "learning_rate": 0.00016399147789351837, "loss": 1.1612, "step": 5255 }, { "epoch": 0.35141635489043294, "grad_norm": 0.173828125, "learning_rate": 0.00016390182918563887, "loss": 1.2393, "step": 5260 }, { "epoch": 0.3517504008551577, "grad_norm": 0.1806640625, "learning_rate": 0.0001638120935910554, "loss": 1.187, "step": 5265 }, { "epoch": 0.35208444681988244, "grad_norm": 0.177734375, "learning_rate": 0.00016372227123178057, "loss": 1.1734, "step": 5270 }, { "epoch": 0.3524184927846072, "grad_norm": 0.1640625, "learning_rate": 0.00016363236222994505, "loss": 1.1329, "step": 5275 }, { "epoch": 0.35275253874933193, "grad_norm": 0.1689453125, "learning_rate": 0.0001635423667077972, "loss": 1.2187, "step": 5280 }, { "epoch": 0.35308658471405663, "grad_norm": 0.1708984375, "learning_rate": 0.0001634522847877031, "loss": 1.2226, "step": 5285 }, { "epoch": 0.3534206306787814, "grad_norm": 0.166015625, "learning_rate": 0.00016336211659214621, "loss": 1.1586, "step": 5290 }, { "epoch": 0.3537546766435061, "grad_norm": 0.1767578125, "learning_rate": 0.00016327186224372747, "loss": 1.1645, "step": 5295 }, { "epoch": 0.3540887226082309, "grad_norm": 0.162109375, "learning_rate": 0.00016318152186516472, "loss": 1.2019, "step": 5300 }, { "epoch": 0.3544227685729556, "grad_norm": 0.1689453125, "learning_rate": 0.00016309109557929302, "loss": 1.2287, "step": 5305 }, { "epoch": 0.3547568145376804, "grad_norm": 0.1689453125, "learning_rate": 0.00016300058350906404, "loss": 1.1239, "step": 5310 }, { "epoch": 0.3550908605024051, "grad_norm": 0.17578125, "learning_rate": 0.00016290998577754622, "loss": 1.1858, "step": 5315 }, { "epoch": 0.3554249064671299, "grad_norm": 0.16796875, "learning_rate": 0.00016281930250792442, "loss": 1.2656, "step": 5320 }, { "epoch": 0.3557589524318546, "grad_norm": 0.1669921875, "learning_rate": 0.00016272853382349979, "loss": 1.1655, "step": 5325 }, { "epoch": 0.3560929983965794, "grad_norm": 0.166015625, "learning_rate": 0.00016263767984768965, "loss": 1.2054, "step": 5330 }, { "epoch": 0.3564270443613041, "grad_norm": 0.166015625, "learning_rate": 0.00016254674070402731, "loss": 1.1843, "step": 5335 }, { "epoch": 0.3567610903260289, "grad_norm": 0.173828125, "learning_rate": 0.0001624557165161618, "loss": 1.2551, "step": 5340 }, { "epoch": 0.3570951362907536, "grad_norm": 0.177734375, "learning_rate": 0.00016236460740785784, "loss": 1.179, "step": 5345 }, { "epoch": 0.3574291822554784, "grad_norm": 0.169921875, "learning_rate": 0.00016227341350299568, "loss": 1.2391, "step": 5350 }, { "epoch": 0.3577632282202031, "grad_norm": 0.17578125, "learning_rate": 0.00016218213492557072, "loss": 1.1763, "step": 5355 }, { "epoch": 0.35809727418492787, "grad_norm": 0.1767578125, "learning_rate": 0.00016209077179969356, "loss": 1.1736, "step": 5360 }, { "epoch": 0.35843132014965257, "grad_norm": 0.1748046875, "learning_rate": 0.00016199932424958984, "loss": 1.1226, "step": 5365 }, { "epoch": 0.3587653661143773, "grad_norm": 0.162109375, "learning_rate": 0.00016190779239959988, "loss": 1.1357, "step": 5370 }, { "epoch": 0.35909941207910206, "grad_norm": 0.2080078125, "learning_rate": 0.00016181617637417862, "loss": 1.232, "step": 5375 }, { "epoch": 0.3594334580438268, "grad_norm": 0.1796875, "learning_rate": 0.00016172447629789555, "loss": 1.1751, "step": 5380 }, { "epoch": 0.35976750400855156, "grad_norm": 0.1591796875, "learning_rate": 0.00016163269229543437, "loss": 1.2229, "step": 5385 }, { "epoch": 0.3601015499732763, "grad_norm": 0.16015625, "learning_rate": 0.00016154082449159284, "loss": 1.2067, "step": 5390 }, { "epoch": 0.36043559593800106, "grad_norm": 0.17578125, "learning_rate": 0.00016144887301128283, "loss": 1.2289, "step": 5395 }, { "epoch": 0.3607696419027258, "grad_norm": 0.173828125, "learning_rate": 0.00016135683797952982, "loss": 1.1391, "step": 5400 }, { "epoch": 0.36110368786745056, "grad_norm": 0.1796875, "learning_rate": 0.00016126471952147297, "loss": 1.2043, "step": 5405 }, { "epoch": 0.3614377338321753, "grad_norm": 0.162109375, "learning_rate": 0.00016117251776236492, "loss": 1.2456, "step": 5410 }, { "epoch": 0.36177177979690006, "grad_norm": 0.19921875, "learning_rate": 0.00016108023282757143, "loss": 1.3066, "step": 5415 }, { "epoch": 0.3621058257616248, "grad_norm": 0.166015625, "learning_rate": 0.00016098786484257147, "loss": 1.1601, "step": 5420 }, { "epoch": 0.36243987172634956, "grad_norm": 0.1640625, "learning_rate": 0.00016089541393295696, "loss": 1.2307, "step": 5425 }, { "epoch": 0.3627739176910743, "grad_norm": 0.1875, "learning_rate": 0.00016080288022443241, "loss": 1.2327, "step": 5430 }, { "epoch": 0.36310796365579906, "grad_norm": 0.1708984375, "learning_rate": 0.0001607102638428151, "loss": 1.1261, "step": 5435 }, { "epoch": 0.3634420096205238, "grad_norm": 0.1669921875, "learning_rate": 0.00016061756491403463, "loss": 1.1447, "step": 5440 }, { "epoch": 0.3637760555852485, "grad_norm": 0.1630859375, "learning_rate": 0.00016052478356413282, "loss": 1.2484, "step": 5445 }, { "epoch": 0.36411010154997325, "grad_norm": 0.1640625, "learning_rate": 0.00016043191991926356, "loss": 1.2338, "step": 5450 }, { "epoch": 0.364444147514698, "grad_norm": 0.1630859375, "learning_rate": 0.0001603389741056927, "loss": 1.1808, "step": 5455 }, { "epoch": 0.36477819347942275, "grad_norm": 0.1630859375, "learning_rate": 0.00016024594624979775, "loss": 1.2165, "step": 5460 }, { "epoch": 0.3651122394441475, "grad_norm": 0.1630859375, "learning_rate": 0.0001601528364780678, "loss": 1.1568, "step": 5465 }, { "epoch": 0.36544628540887225, "grad_norm": 0.166015625, "learning_rate": 0.00016005964491710328, "loss": 1.2476, "step": 5470 }, { "epoch": 0.365780331373597, "grad_norm": 0.1806640625, "learning_rate": 0.00015996637169361593, "loss": 1.2275, "step": 5475 }, { "epoch": 0.36611437733832175, "grad_norm": 0.169921875, "learning_rate": 0.00015987301693442838, "loss": 1.1998, "step": 5480 }, { "epoch": 0.3664484233030465, "grad_norm": 0.1611328125, "learning_rate": 0.00015977958076647428, "loss": 1.2051, "step": 5485 }, { "epoch": 0.36678246926777125, "grad_norm": 0.1708984375, "learning_rate": 0.0001596860633167978, "loss": 1.2561, "step": 5490 }, { "epoch": 0.367116515232496, "grad_norm": 0.162109375, "learning_rate": 0.0001595924647125538, "loss": 1.1694, "step": 5495 }, { "epoch": 0.36745056119722075, "grad_norm": 0.1650390625, "learning_rate": 0.00015949878508100733, "loss": 1.2307, "step": 5500 }, { "epoch": 0.3677846071619455, "grad_norm": 0.162109375, "learning_rate": 0.00015940502454953376, "loss": 1.2637, "step": 5505 }, { "epoch": 0.36811865312667025, "grad_norm": 0.1650390625, "learning_rate": 0.00015931118324561837, "loss": 1.1909, "step": 5510 }, { "epoch": 0.368452699091395, "grad_norm": 0.1708984375, "learning_rate": 0.00015921726129685624, "loss": 1.2717, "step": 5515 }, { "epoch": 0.36878674505611975, "grad_norm": 0.1552734375, "learning_rate": 0.00015912325883095217, "loss": 1.138, "step": 5520 }, { "epoch": 0.36912079102084444, "grad_norm": 0.1669921875, "learning_rate": 0.0001590291759757204, "loss": 1.1574, "step": 5525 }, { "epoch": 0.3694548369855692, "grad_norm": 0.17578125, "learning_rate": 0.00015893501285908448, "loss": 1.2592, "step": 5530 }, { "epoch": 0.36978888295029394, "grad_norm": 0.1650390625, "learning_rate": 0.00015884076960907711, "loss": 1.2071, "step": 5535 }, { "epoch": 0.3701229289150187, "grad_norm": 0.162109375, "learning_rate": 0.00015874644635383996, "loss": 1.249, "step": 5540 }, { "epoch": 0.37045697487974344, "grad_norm": 0.1611328125, "learning_rate": 0.00015865204322162337, "loss": 1.1818, "step": 5545 }, { "epoch": 0.3707910208444682, "grad_norm": 0.162109375, "learning_rate": 0.00015855756034078647, "loss": 1.2327, "step": 5550 }, { "epoch": 0.37112506680919294, "grad_norm": 0.1650390625, "learning_rate": 0.00015846299783979669, "loss": 1.2458, "step": 5555 }, { "epoch": 0.3714591127739177, "grad_norm": 0.1650390625, "learning_rate": 0.0001583683558472297, "loss": 1.2344, "step": 5560 }, { "epoch": 0.37179315873864244, "grad_norm": 0.15625, "learning_rate": 0.0001582736344917694, "loss": 1.1543, "step": 5565 }, { "epoch": 0.3721272047033672, "grad_norm": 0.177734375, "learning_rate": 0.00015817883390220746, "loss": 1.223, "step": 5570 }, { "epoch": 0.37246125066809194, "grad_norm": 0.1767578125, "learning_rate": 0.00015808395420744334, "loss": 1.1616, "step": 5575 }, { "epoch": 0.3727952966328167, "grad_norm": 0.171875, "learning_rate": 0.00015798899553648403, "loss": 1.1649, "step": 5580 }, { "epoch": 0.37312934259754144, "grad_norm": 0.1806640625, "learning_rate": 0.00015789395801844397, "loss": 1.1598, "step": 5585 }, { "epoch": 0.3734633885622662, "grad_norm": 0.1767578125, "learning_rate": 0.00015779884178254472, "loss": 1.1505, "step": 5590 }, { "epoch": 0.37379743452699093, "grad_norm": 0.1826171875, "learning_rate": 0.00015770364695811493, "loss": 1.2566, "step": 5595 }, { "epoch": 0.3741314804917157, "grad_norm": 0.1748046875, "learning_rate": 0.0001576083736745901, "loss": 1.1727, "step": 5600 }, { "epoch": 0.3744655264564404, "grad_norm": 0.1640625, "learning_rate": 0.00015751302206151236, "loss": 1.2458, "step": 5605 }, { "epoch": 0.37479957242116513, "grad_norm": 0.1962890625, "learning_rate": 0.0001574175922485304, "loss": 1.1991, "step": 5610 }, { "epoch": 0.3751336183858899, "grad_norm": 0.177734375, "learning_rate": 0.00015732208436539927, "loss": 1.2558, "step": 5615 }, { "epoch": 0.3754676643506146, "grad_norm": 0.166015625, "learning_rate": 0.00015722649854198005, "loss": 1.1166, "step": 5620 }, { "epoch": 0.3758017103153394, "grad_norm": 0.173828125, "learning_rate": 0.0001571308349082399, "loss": 1.152, "step": 5625 }, { "epoch": 0.3761357562800641, "grad_norm": 0.169921875, "learning_rate": 0.00015703509359425176, "loss": 1.2202, "step": 5630 }, { "epoch": 0.3764698022447889, "grad_norm": 0.166015625, "learning_rate": 0.00015693927473019417, "loss": 1.2383, "step": 5635 }, { "epoch": 0.3768038482095136, "grad_norm": 0.1787109375, "learning_rate": 0.0001568433784463511, "loss": 1.1833, "step": 5640 }, { "epoch": 0.3771378941742384, "grad_norm": 0.1767578125, "learning_rate": 0.0001567474048731118, "loss": 1.1411, "step": 5645 }, { "epoch": 0.3774719401389631, "grad_norm": 0.171875, "learning_rate": 0.00015665135414097065, "loss": 1.1714, "step": 5650 }, { "epoch": 0.3778059861036879, "grad_norm": 0.16796875, "learning_rate": 0.0001565552263805269, "loss": 1.2308, "step": 5655 }, { "epoch": 0.3781400320684126, "grad_norm": 0.166015625, "learning_rate": 0.00015645902172248453, "loss": 1.2254, "step": 5660 }, { "epoch": 0.3784740780331374, "grad_norm": 0.1689453125, "learning_rate": 0.00015636274029765207, "loss": 1.2096, "step": 5665 }, { "epoch": 0.3788081239978621, "grad_norm": 0.1845703125, "learning_rate": 0.00015626638223694252, "loss": 1.2076, "step": 5670 }, { "epoch": 0.3791421699625869, "grad_norm": 0.181640625, "learning_rate": 0.00015616994767137294, "loss": 1.2172, "step": 5675 }, { "epoch": 0.3794762159273116, "grad_norm": 0.1650390625, "learning_rate": 0.0001560734367320645, "loss": 1.2691, "step": 5680 }, { "epoch": 0.37981026189203637, "grad_norm": 0.2119140625, "learning_rate": 0.00015597684955024222, "loss": 1.1919, "step": 5685 }, { "epoch": 0.38014430785676107, "grad_norm": 0.162109375, "learning_rate": 0.00015588018625723477, "loss": 1.2075, "step": 5690 }, { "epoch": 0.3804783538214858, "grad_norm": 0.1611328125, "learning_rate": 0.00015578344698447428, "loss": 1.1873, "step": 5695 }, { "epoch": 0.38081239978621056, "grad_norm": 0.169921875, "learning_rate": 0.0001556866318634962, "loss": 1.2008, "step": 5700 }, { "epoch": 0.3811464457509353, "grad_norm": 0.1650390625, "learning_rate": 0.00015558974102593913, "loss": 1.1994, "step": 5705 }, { "epoch": 0.38148049171566006, "grad_norm": 0.193359375, "learning_rate": 0.0001554927746035446, "loss": 1.241, "step": 5710 }, { "epoch": 0.3818145376803848, "grad_norm": 0.162109375, "learning_rate": 0.00015539573272815697, "loss": 1.2538, "step": 5715 }, { "epoch": 0.38214858364510956, "grad_norm": 0.1630859375, "learning_rate": 0.00015529861553172314, "loss": 1.1708, "step": 5720 }, { "epoch": 0.3824826296098343, "grad_norm": 0.1630859375, "learning_rate": 0.00015520142314629239, "loss": 1.1842, "step": 5725 }, { "epoch": 0.38281667557455906, "grad_norm": 0.1796875, "learning_rate": 0.00015510415570401626, "loss": 1.1753, "step": 5730 }, { "epoch": 0.3831507215392838, "grad_norm": 0.1708984375, "learning_rate": 0.0001550068133371484, "loss": 1.2631, "step": 5735 }, { "epoch": 0.38348476750400856, "grad_norm": 0.173828125, "learning_rate": 0.0001549093961780443, "loss": 1.2291, "step": 5740 }, { "epoch": 0.3838188134687333, "grad_norm": 0.166015625, "learning_rate": 0.0001548119043591611, "loss": 1.228, "step": 5745 }, { "epoch": 0.38415285943345806, "grad_norm": 0.2578125, "learning_rate": 0.00015471433801305756, "loss": 1.1866, "step": 5750 }, { "epoch": 0.3844869053981828, "grad_norm": 0.173828125, "learning_rate": 0.00015461669727239363, "loss": 1.1626, "step": 5755 }, { "epoch": 0.38482095136290756, "grad_norm": 0.162109375, "learning_rate": 0.0001545189822699305, "loss": 1.277, "step": 5760 }, { "epoch": 0.3851549973276323, "grad_norm": 0.16015625, "learning_rate": 0.00015442119313853033, "loss": 1.219, "step": 5765 }, { "epoch": 0.385489043292357, "grad_norm": 0.162109375, "learning_rate": 0.0001543233300111561, "loss": 1.2212, "step": 5770 }, { "epoch": 0.38582308925708175, "grad_norm": 0.1669921875, "learning_rate": 0.0001542253930208713, "loss": 1.2303, "step": 5775 }, { "epoch": 0.3861571352218065, "grad_norm": 0.166015625, "learning_rate": 0.00015412738230083993, "loss": 1.2315, "step": 5780 }, { "epoch": 0.38649118118653125, "grad_norm": 0.18359375, "learning_rate": 0.00015402929798432629, "loss": 1.224, "step": 5785 }, { "epoch": 0.386825227151256, "grad_norm": 0.15625, "learning_rate": 0.00015393114020469462, "loss": 1.1687, "step": 5790 }, { "epoch": 0.38715927311598075, "grad_norm": 0.17578125, "learning_rate": 0.0001538329090954091, "loss": 1.1959, "step": 5795 }, { "epoch": 0.3874933190807055, "grad_norm": 0.1796875, "learning_rate": 0.0001537346047900337, "loss": 1.2247, "step": 5800 }, { "epoch": 0.38782736504543025, "grad_norm": 0.1689453125, "learning_rate": 0.00015363622742223175, "loss": 1.2288, "step": 5805 }, { "epoch": 0.388161411010155, "grad_norm": 0.1728515625, "learning_rate": 0.0001535377771257661, "loss": 1.2688, "step": 5810 }, { "epoch": 0.38849545697487975, "grad_norm": 0.1865234375, "learning_rate": 0.0001534392540344986, "loss": 1.1998, "step": 5815 }, { "epoch": 0.3888295029396045, "grad_norm": 0.16796875, "learning_rate": 0.00015334065828239023, "loss": 1.1662, "step": 5820 }, { "epoch": 0.38916354890432925, "grad_norm": 0.162109375, "learning_rate": 0.00015324199000350062, "loss": 1.2877, "step": 5825 }, { "epoch": 0.389497594869054, "grad_norm": 0.173828125, "learning_rate": 0.00015314324933198806, "loss": 1.2341, "step": 5830 }, { "epoch": 0.38983164083377875, "grad_norm": 0.1611328125, "learning_rate": 0.0001530444364021094, "loss": 1.1665, "step": 5835 }, { "epoch": 0.3901656867985035, "grad_norm": 0.1630859375, "learning_rate": 0.00015294555134821956, "loss": 1.2566, "step": 5840 }, { "epoch": 0.39049973276322825, "grad_norm": 0.166015625, "learning_rate": 0.0001528465943047716, "loss": 1.1577, "step": 5845 }, { "epoch": 0.39083377872795294, "grad_norm": 0.1728515625, "learning_rate": 0.00015274756540631644, "loss": 1.1293, "step": 5850 }, { "epoch": 0.3911678246926777, "grad_norm": 0.1796875, "learning_rate": 0.00015264846478750278, "loss": 1.1906, "step": 5855 }, { "epoch": 0.39150187065740244, "grad_norm": 0.1845703125, "learning_rate": 0.00015254929258307678, "loss": 1.2896, "step": 5860 }, { "epoch": 0.3918359166221272, "grad_norm": 0.1708984375, "learning_rate": 0.0001524500489278819, "loss": 1.1557, "step": 5865 }, { "epoch": 0.39216996258685194, "grad_norm": 0.1689453125, "learning_rate": 0.00015235073395685877, "loss": 1.1846, "step": 5870 }, { "epoch": 0.3925040085515767, "grad_norm": 0.1708984375, "learning_rate": 0.00015225134780504505, "loss": 1.2353, "step": 5875 }, { "epoch": 0.39283805451630144, "grad_norm": 0.1650390625, "learning_rate": 0.00015215189060757507, "loss": 1.1962, "step": 5880 }, { "epoch": 0.3931721004810262, "grad_norm": 0.1767578125, "learning_rate": 0.00015205236249967995, "loss": 1.1898, "step": 5885 }, { "epoch": 0.39350614644575094, "grad_norm": 0.1708984375, "learning_rate": 0.000151952763616687, "loss": 1.2293, "step": 5890 }, { "epoch": 0.3938401924104757, "grad_norm": 0.1669921875, "learning_rate": 0.00015185309409401985, "loss": 1.1423, "step": 5895 }, { "epoch": 0.39417423837520044, "grad_norm": 0.16015625, "learning_rate": 0.00015175335406719827, "loss": 1.1552, "step": 5900 }, { "epoch": 0.3945082843399252, "grad_norm": 0.1630859375, "learning_rate": 0.00015165354367183777, "loss": 1.1193, "step": 5905 }, { "epoch": 0.39484233030464994, "grad_norm": 0.1591796875, "learning_rate": 0.00015155366304364962, "loss": 1.1415, "step": 5910 }, { "epoch": 0.3951763762693747, "grad_norm": 0.17578125, "learning_rate": 0.00015145371231844047, "loss": 1.1684, "step": 5915 }, { "epoch": 0.39551042223409943, "grad_norm": 0.177734375, "learning_rate": 0.00015135369163211252, "loss": 1.1763, "step": 5920 }, { "epoch": 0.3958444681988242, "grad_norm": 0.1669921875, "learning_rate": 0.00015125360112066275, "loss": 1.11, "step": 5925 }, { "epoch": 0.3961785141635489, "grad_norm": 0.169921875, "learning_rate": 0.0001511534409201834, "loss": 1.1879, "step": 5930 }, { "epoch": 0.39651256012827363, "grad_norm": 0.1669921875, "learning_rate": 0.00015105321116686132, "loss": 1.2056, "step": 5935 }, { "epoch": 0.3968466060929984, "grad_norm": 0.166015625, "learning_rate": 0.00015095291199697784, "loss": 1.187, "step": 5940 }, { "epoch": 0.3971806520577231, "grad_norm": 0.1728515625, "learning_rate": 0.0001508525435469089, "loss": 1.2731, "step": 5945 }, { "epoch": 0.3975146980224479, "grad_norm": 0.1630859375, "learning_rate": 0.00015075210595312448, "loss": 1.1658, "step": 5950 }, { "epoch": 0.3978487439871726, "grad_norm": 0.1748046875, "learning_rate": 0.0001506515993521886, "loss": 1.154, "step": 5955 }, { "epoch": 0.3981827899518974, "grad_norm": 0.1806640625, "learning_rate": 0.0001505510238807591, "loss": 1.195, "step": 5960 }, { "epoch": 0.3985168359166221, "grad_norm": 0.185546875, "learning_rate": 0.00015045037967558754, "loss": 1.2198, "step": 5965 }, { "epoch": 0.3988508818813469, "grad_norm": 0.1767578125, "learning_rate": 0.00015034966687351884, "loss": 1.1808, "step": 5970 }, { "epoch": 0.3991849278460716, "grad_norm": 0.166015625, "learning_rate": 0.00015024888561149125, "loss": 1.1722, "step": 5975 }, { "epoch": 0.3995189738107964, "grad_norm": 0.17578125, "learning_rate": 0.00015014803602653607, "loss": 1.1929, "step": 5980 }, { "epoch": 0.3998530197755211, "grad_norm": 0.171875, "learning_rate": 0.0001500471182557775, "loss": 1.1078, "step": 5985 }, { "epoch": 0.4001870657402459, "grad_norm": 0.158203125, "learning_rate": 0.00014994613243643248, "loss": 1.193, "step": 5990 }, { "epoch": 0.4005211117049706, "grad_norm": 0.1728515625, "learning_rate": 0.00014984507870581046, "loss": 1.1894, "step": 5995 }, { "epoch": 0.40085515766969537, "grad_norm": 0.1650390625, "learning_rate": 0.00014974395720131328, "loss": 1.2523, "step": 6000 }, { "epoch": 0.4011892036344201, "grad_norm": 0.2119140625, "learning_rate": 0.00014964276806043477, "loss": 1.2658, "step": 6005 }, { "epoch": 0.4015232495991448, "grad_norm": 0.169921875, "learning_rate": 0.0001495415114207609, "loss": 1.2058, "step": 6010 }, { "epoch": 0.40185729556386957, "grad_norm": 0.1728515625, "learning_rate": 0.0001494401874199694, "loss": 1.1817, "step": 6015 }, { "epoch": 0.4021913415285943, "grad_norm": 0.1650390625, "learning_rate": 0.00014933879619582943, "loss": 1.1867, "step": 6020 }, { "epoch": 0.40252538749331906, "grad_norm": 0.1513671875, "learning_rate": 0.00014923733788620175, "loss": 1.2503, "step": 6025 }, { "epoch": 0.4028594334580438, "grad_norm": 0.16796875, "learning_rate": 0.0001491358126290382, "loss": 1.2104, "step": 6030 }, { "epoch": 0.40319347942276856, "grad_norm": 0.1591796875, "learning_rate": 0.0001490342205623817, "loss": 1.2023, "step": 6035 }, { "epoch": 0.4035275253874933, "grad_norm": 0.1708984375, "learning_rate": 0.00014893256182436609, "loss": 1.154, "step": 6040 }, { "epoch": 0.40386157135221806, "grad_norm": 0.1591796875, "learning_rate": 0.00014883083655321567, "loss": 1.0871, "step": 6045 }, { "epoch": 0.4041956173169428, "grad_norm": 0.1689453125, "learning_rate": 0.00014872904488724535, "loss": 1.1745, "step": 6050 }, { "epoch": 0.40452966328166756, "grad_norm": 0.162109375, "learning_rate": 0.0001486271869648603, "loss": 1.2076, "step": 6055 }, { "epoch": 0.4048637092463923, "grad_norm": 0.1640625, "learning_rate": 0.00014852526292455576, "loss": 1.2039, "step": 6060 }, { "epoch": 0.40519775521111706, "grad_norm": 0.17578125, "learning_rate": 0.00014842327290491688, "loss": 1.1722, "step": 6065 }, { "epoch": 0.4055318011758418, "grad_norm": 0.1748046875, "learning_rate": 0.00014832121704461848, "loss": 1.1733, "step": 6070 }, { "epoch": 0.40586584714056656, "grad_norm": 0.1669921875, "learning_rate": 0.00014821909548242497, "loss": 1.2495, "step": 6075 }, { "epoch": 0.4061998931052913, "grad_norm": 0.1630859375, "learning_rate": 0.00014811690835718998, "loss": 1.1307, "step": 6080 }, { "epoch": 0.40653393907001606, "grad_norm": 0.162109375, "learning_rate": 0.00014801465580785648, "loss": 1.2543, "step": 6085 }, { "epoch": 0.40686798503474075, "grad_norm": 0.166015625, "learning_rate": 0.00014791233797345618, "loss": 1.1583, "step": 6090 }, { "epoch": 0.4072020309994655, "grad_norm": 0.1689453125, "learning_rate": 0.00014780995499310973, "loss": 1.2537, "step": 6095 }, { "epoch": 0.40753607696419025, "grad_norm": 0.17578125, "learning_rate": 0.00014770750700602623, "loss": 1.26, "step": 6100 }, { "epoch": 0.407870122928915, "grad_norm": 0.185546875, "learning_rate": 0.00014760499415150327, "loss": 1.2814, "step": 6105 }, { "epoch": 0.40820416889363975, "grad_norm": 0.162109375, "learning_rate": 0.00014750241656892653, "loss": 1.1918, "step": 6110 }, { "epoch": 0.4085382148583645, "grad_norm": 0.1669921875, "learning_rate": 0.00014739977439776983, "loss": 1.2569, "step": 6115 }, { "epoch": 0.40887226082308925, "grad_norm": 0.166015625, "learning_rate": 0.00014729706777759474, "loss": 1.2649, "step": 6120 }, { "epoch": 0.409206306787814, "grad_norm": 0.1689453125, "learning_rate": 0.00014719429684805041, "loss": 1.2275, "step": 6125 }, { "epoch": 0.40954035275253875, "grad_norm": 0.1884765625, "learning_rate": 0.00014709146174887356, "loss": 1.2521, "step": 6130 }, { "epoch": 0.4098743987172635, "grad_norm": 0.193359375, "learning_rate": 0.00014698856261988804, "loss": 1.2256, "step": 6135 }, { "epoch": 0.41020844468198825, "grad_norm": 0.177734375, "learning_rate": 0.00014688559960100483, "loss": 1.1749, "step": 6140 }, { "epoch": 0.410542490646713, "grad_norm": 0.1689453125, "learning_rate": 0.0001467825728322217, "loss": 1.1549, "step": 6145 }, { "epoch": 0.41087653661143775, "grad_norm": 0.1708984375, "learning_rate": 0.00014667948245362329, "loss": 1.2315, "step": 6150 }, { "epoch": 0.4112105825761625, "grad_norm": 0.18359375, "learning_rate": 0.00014657632860538047, "loss": 1.1947, "step": 6155 }, { "epoch": 0.41154462854088725, "grad_norm": 0.1875, "learning_rate": 0.00014647311142775056, "loss": 1.2204, "step": 6160 }, { "epoch": 0.411878674505612, "grad_norm": 0.1787109375, "learning_rate": 0.00014636983106107703, "loss": 1.2413, "step": 6165 }, { "epoch": 0.4122127204703367, "grad_norm": 0.1708984375, "learning_rate": 0.00014626648764578916, "loss": 1.1824, "step": 6170 }, { "epoch": 0.41254676643506144, "grad_norm": 0.17578125, "learning_rate": 0.000146163081322402, "loss": 1.2953, "step": 6175 }, { "epoch": 0.4128808123997862, "grad_norm": 0.2021484375, "learning_rate": 0.00014605961223151614, "loss": 1.1684, "step": 6180 }, { "epoch": 0.41321485836451094, "grad_norm": 0.171875, "learning_rate": 0.00014595608051381752, "loss": 1.1756, "step": 6185 }, { "epoch": 0.4135489043292357, "grad_norm": 0.1748046875, "learning_rate": 0.0001458524863100772, "loss": 1.2342, "step": 6190 }, { "epoch": 0.41388295029396044, "grad_norm": 0.158203125, "learning_rate": 0.00014574882976115124, "loss": 1.2005, "step": 6195 }, { "epoch": 0.4142169962586852, "grad_norm": 0.16796875, "learning_rate": 0.00014564511100798044, "loss": 1.2599, "step": 6200 }, { "epoch": 0.41455104222340994, "grad_norm": 0.1708984375, "learning_rate": 0.00014554133019159022, "loss": 1.2165, "step": 6205 }, { "epoch": 0.4148850881881347, "grad_norm": 0.1650390625, "learning_rate": 0.00014543748745309034, "loss": 1.1746, "step": 6210 }, { "epoch": 0.41521913415285944, "grad_norm": 0.1826171875, "learning_rate": 0.00014533358293367481, "loss": 1.2066, "step": 6215 }, { "epoch": 0.4155531801175842, "grad_norm": 0.169921875, "learning_rate": 0.00014522961677462153, "loss": 1.1775, "step": 6220 }, { "epoch": 0.41588722608230894, "grad_norm": 0.1630859375, "learning_rate": 0.0001451255891172924, "loss": 1.1441, "step": 6225 }, { "epoch": 0.4162212720470337, "grad_norm": 0.169921875, "learning_rate": 0.0001450215001031327, "loss": 1.2242, "step": 6230 }, { "epoch": 0.41655531801175844, "grad_norm": 0.1640625, "learning_rate": 0.00014491734987367137, "loss": 1.2, "step": 6235 }, { "epoch": 0.4168893639764832, "grad_norm": 0.162109375, "learning_rate": 0.00014481313857052044, "loss": 1.2246, "step": 6240 }, { "epoch": 0.41722340994120793, "grad_norm": 0.1787109375, "learning_rate": 0.00014470886633537498, "loss": 1.1961, "step": 6245 }, { "epoch": 0.4175574559059327, "grad_norm": 0.1982421875, "learning_rate": 0.000144604533310013, "loss": 1.2179, "step": 6250 }, { "epoch": 0.4178915018706574, "grad_norm": 0.1640625, "learning_rate": 0.00014450013963629508, "loss": 1.1481, "step": 6255 }, { "epoch": 0.4182255478353821, "grad_norm": 0.1748046875, "learning_rate": 0.00014439568545616437, "loss": 1.1695, "step": 6260 }, { "epoch": 0.4185595938001069, "grad_norm": 0.1650390625, "learning_rate": 0.0001442911709116461, "loss": 1.1626, "step": 6265 }, { "epoch": 0.4188936397648316, "grad_norm": 0.16796875, "learning_rate": 0.0001441865961448478, "loss": 1.1622, "step": 6270 }, { "epoch": 0.4192276857295564, "grad_norm": 0.173828125, "learning_rate": 0.0001440819612979587, "loss": 1.2383, "step": 6275 }, { "epoch": 0.4195617316942811, "grad_norm": 0.1630859375, "learning_rate": 0.00014397726651324983, "loss": 1.2379, "step": 6280 }, { "epoch": 0.4198957776590059, "grad_norm": 0.169921875, "learning_rate": 0.00014387251193307367, "loss": 1.1498, "step": 6285 }, { "epoch": 0.4202298236237306, "grad_norm": 0.1669921875, "learning_rate": 0.00014376769769986405, "loss": 1.2617, "step": 6290 }, { "epoch": 0.4205638695884554, "grad_norm": 0.1767578125, "learning_rate": 0.00014366282395613587, "loss": 1.3093, "step": 6295 }, { "epoch": 0.4208979155531801, "grad_norm": 0.173828125, "learning_rate": 0.0001435578908444849, "loss": 1.1134, "step": 6300 }, { "epoch": 0.4212319615179049, "grad_norm": 0.1767578125, "learning_rate": 0.00014345289850758777, "loss": 1.2403, "step": 6305 }, { "epoch": 0.4215660074826296, "grad_norm": 0.1728515625, "learning_rate": 0.00014334784708820144, "loss": 1.1466, "step": 6310 }, { "epoch": 0.4219000534473544, "grad_norm": 0.1650390625, "learning_rate": 0.00014324273672916343, "loss": 1.1237, "step": 6315 }, { "epoch": 0.4222340994120791, "grad_norm": 0.1611328125, "learning_rate": 0.00014313756757339122, "loss": 1.2737, "step": 6320 }, { "epoch": 0.42256814537680387, "grad_norm": 0.1689453125, "learning_rate": 0.00014303233976388236, "loss": 1.1712, "step": 6325 }, { "epoch": 0.4229021913415286, "grad_norm": 0.19140625, "learning_rate": 0.00014292705344371402, "loss": 1.1599, "step": 6330 }, { "epoch": 0.4232362373062533, "grad_norm": 0.16796875, "learning_rate": 0.00014282170875604307, "loss": 1.163, "step": 6335 }, { "epoch": 0.42357028327097807, "grad_norm": 0.1630859375, "learning_rate": 0.00014271630584410558, "loss": 1.1546, "step": 6340 }, { "epoch": 0.4239043292357028, "grad_norm": 0.16796875, "learning_rate": 0.00014261084485121697, "loss": 1.232, "step": 6345 }, { "epoch": 0.42423837520042756, "grad_norm": 0.1640625, "learning_rate": 0.00014250532592077148, "loss": 1.2507, "step": 6350 }, { "epoch": 0.4245724211651523, "grad_norm": 0.1630859375, "learning_rate": 0.00014239974919624224, "loss": 1.216, "step": 6355 }, { "epoch": 0.42490646712987706, "grad_norm": 0.162109375, "learning_rate": 0.00014229411482118083, "loss": 1.2722, "step": 6360 }, { "epoch": 0.4252405130946018, "grad_norm": 0.1748046875, "learning_rate": 0.00014218842293921738, "loss": 1.1772, "step": 6365 }, { "epoch": 0.42557455905932656, "grad_norm": 0.1982421875, "learning_rate": 0.00014208267369406012, "loss": 1.2117, "step": 6370 }, { "epoch": 0.4259086050240513, "grad_norm": 0.1650390625, "learning_rate": 0.0001419768672294952, "loss": 1.1975, "step": 6375 }, { "epoch": 0.42624265098877606, "grad_norm": 0.1689453125, "learning_rate": 0.00014187100368938678, "loss": 1.1905, "step": 6380 }, { "epoch": 0.4265766969535008, "grad_norm": 0.171875, "learning_rate": 0.00014176508321767637, "loss": 1.1521, "step": 6385 }, { "epoch": 0.42691074291822556, "grad_norm": 0.1572265625, "learning_rate": 0.00014165910595838313, "loss": 1.2083, "step": 6390 }, { "epoch": 0.4272447888829503, "grad_norm": 0.1708984375, "learning_rate": 0.00014155307205560323, "loss": 1.2467, "step": 6395 }, { "epoch": 0.42757883484767506, "grad_norm": 0.1630859375, "learning_rate": 0.00014144698165351, "loss": 1.2489, "step": 6400 }, { "epoch": 0.4279128808123998, "grad_norm": 0.173828125, "learning_rate": 0.00014134083489635355, "loss": 1.2708, "step": 6405 }, { "epoch": 0.42824692677712456, "grad_norm": 0.1728515625, "learning_rate": 0.00014123463192846058, "loss": 1.1689, "step": 6410 }, { "epoch": 0.42858097274184925, "grad_norm": 0.158203125, "learning_rate": 0.00014112837289423426, "loss": 1.1623, "step": 6415 }, { "epoch": 0.428915018706574, "grad_norm": 0.169921875, "learning_rate": 0.00014102205793815398, "loss": 1.1605, "step": 6420 }, { "epoch": 0.42924906467129875, "grad_norm": 0.169921875, "learning_rate": 0.00014091568720477518, "loss": 1.288, "step": 6425 }, { "epoch": 0.4295831106360235, "grad_norm": 0.1806640625, "learning_rate": 0.00014080926083872907, "loss": 1.1916, "step": 6430 }, { "epoch": 0.42991715660074825, "grad_norm": 0.169921875, "learning_rate": 0.00014070277898472263, "loss": 1.2131, "step": 6435 }, { "epoch": 0.430251202565473, "grad_norm": 0.1982421875, "learning_rate": 0.00014059624178753817, "loss": 1.1224, "step": 6440 }, { "epoch": 0.43058524853019775, "grad_norm": 0.1708984375, "learning_rate": 0.0001404896493920333, "loss": 1.244, "step": 6445 }, { "epoch": 0.4309192944949225, "grad_norm": 0.15625, "learning_rate": 0.0001403830019431407, "loss": 1.232, "step": 6450 }, { "epoch": 0.43125334045964725, "grad_norm": 0.166015625, "learning_rate": 0.00014027629958586788, "loss": 1.2099, "step": 6455 }, { "epoch": 0.431587386424372, "grad_norm": 0.1640625, "learning_rate": 0.00014016954246529696, "loss": 1.229, "step": 6460 }, { "epoch": 0.43192143238909675, "grad_norm": 0.1689453125, "learning_rate": 0.00014006273072658462, "loss": 1.239, "step": 6465 }, { "epoch": 0.4322554783538215, "grad_norm": 0.2255859375, "learning_rate": 0.00013995586451496177, "loss": 1.2018, "step": 6470 }, { "epoch": 0.43258952431854625, "grad_norm": 0.1689453125, "learning_rate": 0.0001398489439757333, "loss": 1.1465, "step": 6475 }, { "epoch": 0.432923570283271, "grad_norm": 0.1708984375, "learning_rate": 0.00013974196925427816, "loss": 1.2251, "step": 6480 }, { "epoch": 0.43325761624799575, "grad_norm": 0.1650390625, "learning_rate": 0.00013963494049604871, "loss": 1.2515, "step": 6485 }, { "epoch": 0.4335916622127205, "grad_norm": 0.1748046875, "learning_rate": 0.00013952785784657106, "loss": 1.2291, "step": 6490 }, { "epoch": 0.4339257081774452, "grad_norm": 0.1767578125, "learning_rate": 0.0001394207214514444, "loss": 1.2463, "step": 6495 }, { "epoch": 0.43425975414216994, "grad_norm": 0.1650390625, "learning_rate": 0.00013931353145634102, "loss": 1.152, "step": 6500 }, { "epoch": 0.4345938001068947, "grad_norm": 0.16796875, "learning_rate": 0.0001392062880070062, "loss": 1.2804, "step": 6505 }, { "epoch": 0.43492784607161944, "grad_norm": 0.17578125, "learning_rate": 0.00013909899124925774, "loss": 1.1706, "step": 6510 }, { "epoch": 0.4352618920363442, "grad_norm": 0.1748046875, "learning_rate": 0.0001389916413289861, "loss": 1.1676, "step": 6515 }, { "epoch": 0.43559593800106894, "grad_norm": 0.1826171875, "learning_rate": 0.00013888423839215395, "loss": 1.1549, "step": 6520 }, { "epoch": 0.4359299839657937, "grad_norm": 0.177734375, "learning_rate": 0.0001387767825847959, "loss": 1.2364, "step": 6525 }, { "epoch": 0.43626402993051844, "grad_norm": 0.1591796875, "learning_rate": 0.0001386692740530187, "loss": 1.244, "step": 6530 }, { "epoch": 0.4365980758952432, "grad_norm": 0.177734375, "learning_rate": 0.00013856171294300066, "loss": 1.1887, "step": 6535 }, { "epoch": 0.43693212185996794, "grad_norm": 0.1669921875, "learning_rate": 0.00013845409940099152, "loss": 1.0936, "step": 6540 }, { "epoch": 0.4372661678246927, "grad_norm": 0.166015625, "learning_rate": 0.00013834643357331245, "loss": 1.2448, "step": 6545 }, { "epoch": 0.43760021378941744, "grad_norm": 0.1904296875, "learning_rate": 0.0001382387156063556, "loss": 1.1776, "step": 6550 }, { "epoch": 0.4379342597541422, "grad_norm": 0.1591796875, "learning_rate": 0.0001381309456465841, "loss": 1.2031, "step": 6555 }, { "epoch": 0.43826830571886694, "grad_norm": 0.16015625, "learning_rate": 0.0001380231238405317, "loss": 1.2349, "step": 6560 }, { "epoch": 0.4386023516835917, "grad_norm": 0.177734375, "learning_rate": 0.00013791525033480268, "loss": 1.1826, "step": 6565 }, { "epoch": 0.43893639764831643, "grad_norm": 0.1611328125, "learning_rate": 0.00013780732527607156, "loss": 1.2273, "step": 6570 }, { "epoch": 0.43927044361304113, "grad_norm": 0.1728515625, "learning_rate": 0.00013769934881108312, "loss": 1.1712, "step": 6575 }, { "epoch": 0.4396044895777659, "grad_norm": 0.177734375, "learning_rate": 0.00013759132108665182, "loss": 1.1598, "step": 6580 }, { "epoch": 0.4399385355424906, "grad_norm": 0.1708984375, "learning_rate": 0.000137483242249662, "loss": 1.1881, "step": 6585 }, { "epoch": 0.4402725815072154, "grad_norm": 0.1669921875, "learning_rate": 0.00013737511244706733, "loss": 1.1738, "step": 6590 }, { "epoch": 0.4406066274719401, "grad_norm": 0.1611328125, "learning_rate": 0.00013726693182589093, "loss": 1.2403, "step": 6595 }, { "epoch": 0.4409406734366649, "grad_norm": 0.1669921875, "learning_rate": 0.00013715870053322492, "loss": 1.1563, "step": 6600 }, { "epoch": 0.4412747194013896, "grad_norm": 0.19140625, "learning_rate": 0.0001370504187162304, "loss": 1.1397, "step": 6605 }, { "epoch": 0.4416087653661144, "grad_norm": 0.1630859375, "learning_rate": 0.00013694208652213703, "loss": 1.1837, "step": 6610 }, { "epoch": 0.4419428113308391, "grad_norm": 0.1728515625, "learning_rate": 0.00013683370409824317, "loss": 1.196, "step": 6615 }, { "epoch": 0.4422768572955639, "grad_norm": 0.1767578125, "learning_rate": 0.00013672527159191525, "loss": 1.2161, "step": 6620 }, { "epoch": 0.4426109032602886, "grad_norm": 0.1689453125, "learning_rate": 0.00013661678915058797, "loss": 1.2789, "step": 6625 }, { "epoch": 0.4429449492250134, "grad_norm": 0.171875, "learning_rate": 0.00013650825692176387, "loss": 1.2474, "step": 6630 }, { "epoch": 0.4432789951897381, "grad_norm": 0.1748046875, "learning_rate": 0.00013639967505301313, "loss": 1.2226, "step": 6635 }, { "epoch": 0.4436130411544629, "grad_norm": 0.1748046875, "learning_rate": 0.00013629104369197351, "loss": 1.2119, "step": 6640 }, { "epoch": 0.4439470871191876, "grad_norm": 0.1669921875, "learning_rate": 0.00013618236298635003, "loss": 1.1872, "step": 6645 }, { "epoch": 0.44428113308391237, "grad_norm": 0.197265625, "learning_rate": 0.0001360736330839148, "loss": 1.2023, "step": 6650 }, { "epoch": 0.44461517904863707, "grad_norm": 0.171875, "learning_rate": 0.00013596485413250683, "loss": 1.1973, "step": 6655 }, { "epoch": 0.4449492250133618, "grad_norm": 0.169921875, "learning_rate": 0.0001358560262800318, "loss": 1.1778, "step": 6660 }, { "epoch": 0.44528327097808656, "grad_norm": 0.1669921875, "learning_rate": 0.00013574714967446192, "loss": 1.1713, "step": 6665 }, { "epoch": 0.4456173169428113, "grad_norm": 0.1640625, "learning_rate": 0.00013563822446383564, "loss": 1.2628, "step": 6670 }, { "epoch": 0.44595136290753606, "grad_norm": 0.16796875, "learning_rate": 0.00013552925079625755, "loss": 1.2013, "step": 6675 }, { "epoch": 0.4462854088722608, "grad_norm": 0.177734375, "learning_rate": 0.00013542022881989803, "loss": 1.2194, "step": 6680 }, { "epoch": 0.44661945483698556, "grad_norm": 0.1640625, "learning_rate": 0.00013531115868299336, "loss": 1.1687, "step": 6685 }, { "epoch": 0.4469535008017103, "grad_norm": 0.1650390625, "learning_rate": 0.000135202040533845, "loss": 1.1385, "step": 6690 }, { "epoch": 0.44728754676643506, "grad_norm": 0.1806640625, "learning_rate": 0.00013509287452081995, "loss": 1.225, "step": 6695 }, { "epoch": 0.4476215927311598, "grad_norm": 0.1650390625, "learning_rate": 0.00013498366079235015, "loss": 1.2127, "step": 6700 }, { "epoch": 0.44795563869588456, "grad_norm": 0.166015625, "learning_rate": 0.0001348743994969325, "loss": 1.2052, "step": 6705 }, { "epoch": 0.4482896846606093, "grad_norm": 0.1953125, "learning_rate": 0.00013476509078312845, "loss": 1.2059, "step": 6710 }, { "epoch": 0.44862373062533406, "grad_norm": 0.193359375, "learning_rate": 0.0001346557347995641, "loss": 1.1859, "step": 6715 }, { "epoch": 0.4489577765900588, "grad_norm": 0.1640625, "learning_rate": 0.00013454633169492967, "loss": 1.1002, "step": 6720 }, { "epoch": 0.44929182255478356, "grad_norm": 0.1650390625, "learning_rate": 0.00013443688161797953, "loss": 1.1503, "step": 6725 }, { "epoch": 0.4496258685195083, "grad_norm": 0.158203125, "learning_rate": 0.00013432738471753195, "loss": 1.1687, "step": 6730 }, { "epoch": 0.449959914484233, "grad_norm": 0.1708984375, "learning_rate": 0.00013421784114246873, "loss": 1.2168, "step": 6735 }, { "epoch": 0.45029396044895775, "grad_norm": 0.158203125, "learning_rate": 0.00013410825104173528, "loss": 1.2876, "step": 6740 }, { "epoch": 0.4506280064136825, "grad_norm": 0.16015625, "learning_rate": 0.00013399861456434017, "loss": 1.2127, "step": 6745 }, { "epoch": 0.45096205237840725, "grad_norm": 0.255859375, "learning_rate": 0.00013388893185935512, "loss": 1.229, "step": 6750 }, { "epoch": 0.451296098343132, "grad_norm": 0.1640625, "learning_rate": 0.00013377920307591453, "loss": 1.2186, "step": 6755 }, { "epoch": 0.45163014430785675, "grad_norm": 0.162109375, "learning_rate": 0.00013366942836321575, "loss": 1.1846, "step": 6760 }, { "epoch": 0.4519641902725815, "grad_norm": 0.177734375, "learning_rate": 0.00013355960787051827, "loss": 1.2155, "step": 6765 }, { "epoch": 0.45229823623730625, "grad_norm": 0.1572265625, "learning_rate": 0.000133449741747144, "loss": 1.2372, "step": 6770 }, { "epoch": 0.452632282202031, "grad_norm": 0.1904296875, "learning_rate": 0.00013333983014247687, "loss": 1.2568, "step": 6775 }, { "epoch": 0.45296632816675575, "grad_norm": 0.1689453125, "learning_rate": 0.0001332298732059626, "loss": 1.1603, "step": 6780 }, { "epoch": 0.4533003741314805, "grad_norm": 0.1787109375, "learning_rate": 0.0001331198710871086, "loss": 1.309, "step": 6785 }, { "epoch": 0.45363442009620525, "grad_norm": 0.169921875, "learning_rate": 0.00013300982393548368, "loss": 1.1875, "step": 6790 }, { "epoch": 0.45396846606093, "grad_norm": 0.1650390625, "learning_rate": 0.00013289973190071797, "loss": 1.2092, "step": 6795 }, { "epoch": 0.45430251202565475, "grad_norm": 0.1650390625, "learning_rate": 0.00013278959513250243, "loss": 1.2222, "step": 6800 }, { "epoch": 0.4546365579903795, "grad_norm": 0.1650390625, "learning_rate": 0.0001326794137805891, "loss": 1.1878, "step": 6805 }, { "epoch": 0.45497060395510425, "grad_norm": 0.173828125, "learning_rate": 0.0001325691879947904, "loss": 1.1878, "step": 6810 }, { "epoch": 0.45530464991982894, "grad_norm": 0.1669921875, "learning_rate": 0.0001324589179249793, "loss": 1.2343, "step": 6815 }, { "epoch": 0.4556386958845537, "grad_norm": 0.1845703125, "learning_rate": 0.000132348603721089, "loss": 1.2661, "step": 6820 }, { "epoch": 0.45597274184927844, "grad_norm": 0.1708984375, "learning_rate": 0.00013223824553311263, "loss": 1.3178, "step": 6825 }, { "epoch": 0.4563067878140032, "grad_norm": 0.1767578125, "learning_rate": 0.00013212784351110312, "loss": 1.1792, "step": 6830 }, { "epoch": 0.45664083377872794, "grad_norm": 0.16796875, "learning_rate": 0.00013201739780517311, "loss": 1.1898, "step": 6835 }, { "epoch": 0.4569748797434527, "grad_norm": 0.1875, "learning_rate": 0.00013190690856549456, "loss": 1.2211, "step": 6840 }, { "epoch": 0.45730892570817744, "grad_norm": 0.162109375, "learning_rate": 0.00013179637594229858, "loss": 1.1925, "step": 6845 }, { "epoch": 0.4576429716729022, "grad_norm": 0.1669921875, "learning_rate": 0.00013168580008587536, "loss": 1.2215, "step": 6850 }, { "epoch": 0.45797701763762694, "grad_norm": 0.171875, "learning_rate": 0.0001315751811465738, "loss": 1.1885, "step": 6855 }, { "epoch": 0.4583110636023517, "grad_norm": 0.16796875, "learning_rate": 0.00013146451927480146, "loss": 1.221, "step": 6860 }, { "epoch": 0.45864510956707644, "grad_norm": 0.173828125, "learning_rate": 0.00013135381462102413, "loss": 1.2224, "step": 6865 }, { "epoch": 0.4589791555318012, "grad_norm": 0.1669921875, "learning_rate": 0.000131243067335766, "loss": 1.1484, "step": 6870 }, { "epoch": 0.45931320149652594, "grad_norm": 0.1708984375, "learning_rate": 0.00013113227756960898, "loss": 1.2461, "step": 6875 }, { "epoch": 0.4596472474612507, "grad_norm": 0.1728515625, "learning_rate": 0.00013102144547319286, "loss": 1.2175, "step": 6880 }, { "epoch": 0.45998129342597543, "grad_norm": 0.1689453125, "learning_rate": 0.00013091057119721505, "loss": 1.2129, "step": 6885 }, { "epoch": 0.4603153393907002, "grad_norm": 0.1591796875, "learning_rate": 0.00013079965489243015, "loss": 1.1792, "step": 6890 }, { "epoch": 0.46064938535542493, "grad_norm": 0.1640625, "learning_rate": 0.00013068869670965008, "loss": 1.195, "step": 6895 }, { "epoch": 0.46098343132014963, "grad_norm": 0.1728515625, "learning_rate": 0.00013057769679974358, "loss": 1.1676, "step": 6900 }, { "epoch": 0.4613174772848744, "grad_norm": 0.16015625, "learning_rate": 0.00013046665531363615, "loss": 1.1518, "step": 6905 }, { "epoch": 0.4616515232495991, "grad_norm": 0.1806640625, "learning_rate": 0.00013035557240230982, "loss": 1.2451, "step": 6910 }, { "epoch": 0.4619855692143239, "grad_norm": 0.1953125, "learning_rate": 0.00013024444821680304, "loss": 1.2153, "step": 6915 }, { "epoch": 0.4623196151790486, "grad_norm": 0.1708984375, "learning_rate": 0.0001301332829082102, "loss": 1.2301, "step": 6920 }, { "epoch": 0.4626536611437734, "grad_norm": 0.1591796875, "learning_rate": 0.00013002207662768175, "loss": 1.2094, "step": 6925 }, { "epoch": 0.4629877071084981, "grad_norm": 0.1767578125, "learning_rate": 0.0001299108295264238, "loss": 1.2514, "step": 6930 }, { "epoch": 0.4633217530732229, "grad_norm": 0.1806640625, "learning_rate": 0.00012979954175569797, "loss": 1.2319, "step": 6935 }, { "epoch": 0.4636557990379476, "grad_norm": 0.15625, "learning_rate": 0.00012968821346682113, "loss": 1.1269, "step": 6940 }, { "epoch": 0.4639898450026724, "grad_norm": 0.16796875, "learning_rate": 0.00012957684481116537, "loss": 1.1569, "step": 6945 }, { "epoch": 0.4643238909673971, "grad_norm": 0.197265625, "learning_rate": 0.00012946543594015753, "loss": 1.1848, "step": 6950 }, { "epoch": 0.4646579369321219, "grad_norm": 0.1787109375, "learning_rate": 0.00012935398700527915, "loss": 1.1894, "step": 6955 }, { "epoch": 0.4649919828968466, "grad_norm": 0.1689453125, "learning_rate": 0.00012924249815806632, "loss": 1.2352, "step": 6960 }, { "epoch": 0.4653260288615714, "grad_norm": 0.1748046875, "learning_rate": 0.00012913096955010937, "loss": 1.2918, "step": 6965 }, { "epoch": 0.4656600748262961, "grad_norm": 0.1650390625, "learning_rate": 0.00012901940133305267, "loss": 1.2738, "step": 6970 }, { "epoch": 0.46599412079102087, "grad_norm": 0.1826171875, "learning_rate": 0.00012890779365859443, "loss": 1.0624, "step": 6975 }, { "epoch": 0.46632816675574557, "grad_norm": 0.185546875, "learning_rate": 0.00012879614667848655, "loss": 1.2576, "step": 6980 }, { "epoch": 0.4666622127204703, "grad_norm": 0.1728515625, "learning_rate": 0.00012868446054453434, "loss": 1.2957, "step": 6985 }, { "epoch": 0.46699625868519506, "grad_norm": 0.171875, "learning_rate": 0.00012857273540859643, "loss": 1.1472, "step": 6990 }, { "epoch": 0.4673303046499198, "grad_norm": 0.205078125, "learning_rate": 0.0001284609714225843, "loss": 1.1081, "step": 6995 }, { "epoch": 0.46766435061464456, "grad_norm": 0.1796875, "learning_rate": 0.00012834916873846245, "loss": 1.2406, "step": 7000 }, { "epoch": 0.4679983965793693, "grad_norm": 0.1669921875, "learning_rate": 0.00012823732750824794, "loss": 1.2261, "step": 7005 }, { "epoch": 0.46833244254409406, "grad_norm": 0.228515625, "learning_rate": 0.00012812544788401014, "loss": 1.2323, "step": 7010 }, { "epoch": 0.4686664885088188, "grad_norm": 0.1650390625, "learning_rate": 0.00012801353001787072, "loss": 1.1986, "step": 7015 }, { "epoch": 0.46900053447354356, "grad_norm": 0.166015625, "learning_rate": 0.0001279015740620034, "loss": 1.1924, "step": 7020 }, { "epoch": 0.4693345804382683, "grad_norm": 0.1708984375, "learning_rate": 0.00012778958016863357, "loss": 1.204, "step": 7025 }, { "epoch": 0.46966862640299306, "grad_norm": 0.1865234375, "learning_rate": 0.0001276775484900382, "loss": 1.206, "step": 7030 }, { "epoch": 0.4700026723677178, "grad_norm": 0.18359375, "learning_rate": 0.00012756547917854578, "loss": 1.2013, "step": 7035 }, { "epoch": 0.47033671833244256, "grad_norm": 0.2060546875, "learning_rate": 0.0001274533723865358, "loss": 1.2634, "step": 7040 }, { "epoch": 0.4706707642971673, "grad_norm": 0.1650390625, "learning_rate": 0.00012734122826643884, "loss": 1.302, "step": 7045 }, { "epoch": 0.47100481026189206, "grad_norm": 0.1708984375, "learning_rate": 0.00012722904697073616, "loss": 1.2133, "step": 7050 }, { "epoch": 0.4713388562266168, "grad_norm": 0.1728515625, "learning_rate": 0.00012711682865195964, "loss": 1.1755, "step": 7055 }, { "epoch": 0.4716729021913415, "grad_norm": 0.166015625, "learning_rate": 0.00012700457346269137, "loss": 1.2221, "step": 7060 }, { "epoch": 0.47200694815606625, "grad_norm": 0.1845703125, "learning_rate": 0.00012689228155556373, "loss": 1.2812, "step": 7065 }, { "epoch": 0.472340994120791, "grad_norm": 0.16015625, "learning_rate": 0.00012677995308325887, "loss": 1.2279, "step": 7070 }, { "epoch": 0.47267504008551575, "grad_norm": 0.17578125, "learning_rate": 0.0001266675881985088, "loss": 1.1915, "step": 7075 }, { "epoch": 0.4730090860502405, "grad_norm": 0.1640625, "learning_rate": 0.00012655518705409496, "loss": 1.187, "step": 7080 }, { "epoch": 0.47334313201496525, "grad_norm": 0.1630859375, "learning_rate": 0.00012644274980284806, "loss": 1.1714, "step": 7085 }, { "epoch": 0.47367717797969, "grad_norm": 0.22265625, "learning_rate": 0.00012633027659764804, "loss": 1.2443, "step": 7090 }, { "epoch": 0.47401122394441475, "grad_norm": 0.1689453125, "learning_rate": 0.00012621776759142356, "loss": 1.2601, "step": 7095 }, { "epoch": 0.4743452699091395, "grad_norm": 0.177734375, "learning_rate": 0.0001261052229371521, "loss": 1.2128, "step": 7100 }, { "epoch": 0.47467931587386425, "grad_norm": 0.1689453125, "learning_rate": 0.00012599264278785952, "loss": 1.252, "step": 7105 }, { "epoch": 0.475013361838589, "grad_norm": 0.173828125, "learning_rate": 0.00012588002729661994, "loss": 1.1306, "step": 7110 }, { "epoch": 0.47534740780331375, "grad_norm": 0.166015625, "learning_rate": 0.00012576737661655559, "loss": 1.2137, "step": 7115 }, { "epoch": 0.4756814537680385, "grad_norm": 0.166015625, "learning_rate": 0.00012565469090083651, "loss": 1.2275, "step": 7120 }, { "epoch": 0.47601549973276325, "grad_norm": 0.166015625, "learning_rate": 0.0001255419703026804, "loss": 1.1858, "step": 7125 }, { "epoch": 0.476349545697488, "grad_norm": 0.1669921875, "learning_rate": 0.00012542921497535233, "loss": 1.1734, "step": 7130 }, { "epoch": 0.47668359166221275, "grad_norm": 0.17578125, "learning_rate": 0.00012531642507216474, "loss": 1.1865, "step": 7135 }, { "epoch": 0.47701763762693744, "grad_norm": 0.1669921875, "learning_rate": 0.00012520360074647687, "loss": 1.2398, "step": 7140 }, { "epoch": 0.4773516835916622, "grad_norm": 0.1689453125, "learning_rate": 0.00012509074215169493, "loss": 1.2366, "step": 7145 }, { "epoch": 0.47768572955638694, "grad_norm": 0.166015625, "learning_rate": 0.00012497784944127166, "loss": 1.1808, "step": 7150 }, { "epoch": 0.4780197755211117, "grad_norm": 0.1650390625, "learning_rate": 0.00012486492276870615, "loss": 1.1907, "step": 7155 }, { "epoch": 0.47835382148583644, "grad_norm": 0.1806640625, "learning_rate": 0.00012475196228754374, "loss": 1.1947, "step": 7160 }, { "epoch": 0.4786878674505612, "grad_norm": 0.1640625, "learning_rate": 0.00012463896815137582, "loss": 1.1483, "step": 7165 }, { "epoch": 0.47902191341528594, "grad_norm": 0.1865234375, "learning_rate": 0.00012452594051383923, "loss": 1.1198, "step": 7170 }, { "epoch": 0.4793559593800107, "grad_norm": 0.1728515625, "learning_rate": 0.00012441287952861673, "loss": 1.2169, "step": 7175 }, { "epoch": 0.47969000534473544, "grad_norm": 0.17578125, "learning_rate": 0.00012429978534943617, "loss": 1.2445, "step": 7180 }, { "epoch": 0.4800240513094602, "grad_norm": 0.1787109375, "learning_rate": 0.00012418665813007066, "loss": 1.1686, "step": 7185 }, { "epoch": 0.48035809727418494, "grad_norm": 0.1748046875, "learning_rate": 0.00012407349802433818, "loss": 1.2286, "step": 7190 }, { "epoch": 0.4806921432389097, "grad_norm": 0.1689453125, "learning_rate": 0.00012396030518610143, "loss": 1.2835, "step": 7195 }, { "epoch": 0.48102618920363444, "grad_norm": 0.171875, "learning_rate": 0.00012384707976926767, "loss": 1.2347, "step": 7200 }, { "epoch": 0.4813602351683592, "grad_norm": 0.169921875, "learning_rate": 0.00012373382192778834, "loss": 1.1946, "step": 7205 }, { "epoch": 0.48169428113308393, "grad_norm": 0.1689453125, "learning_rate": 0.00012362053181565912, "loss": 1.1201, "step": 7210 }, { "epoch": 0.4820283270978087, "grad_norm": 0.1708984375, "learning_rate": 0.00012350720958691943, "loss": 1.1542, "step": 7215 }, { "epoch": 0.4823623730625334, "grad_norm": 0.185546875, "learning_rate": 0.00012339385539565244, "loss": 1.1681, "step": 7220 }, { "epoch": 0.48269641902725813, "grad_norm": 0.173828125, "learning_rate": 0.00012328046939598475, "loss": 1.2992, "step": 7225 }, { "epoch": 0.4830304649919829, "grad_norm": 0.1650390625, "learning_rate": 0.00012316705174208624, "loss": 1.1038, "step": 7230 }, { "epoch": 0.4833645109567076, "grad_norm": 0.255859375, "learning_rate": 0.00012305360258816977, "loss": 1.141, "step": 7235 }, { "epoch": 0.4836985569214324, "grad_norm": 0.1591796875, "learning_rate": 0.00012294012208849105, "loss": 1.1671, "step": 7240 }, { "epoch": 0.4840326028861571, "grad_norm": 0.154296875, "learning_rate": 0.00012282661039734848, "loss": 1.2421, "step": 7245 }, { "epoch": 0.4843666488508819, "grad_norm": 0.166015625, "learning_rate": 0.0001227130676690828, "loss": 1.2105, "step": 7250 }, { "epoch": 0.4847006948156066, "grad_norm": 0.173828125, "learning_rate": 0.00012259949405807696, "loss": 1.1635, "step": 7255 }, { "epoch": 0.4850347407803314, "grad_norm": 0.16796875, "learning_rate": 0.00012248588971875587, "loss": 1.2874, "step": 7260 }, { "epoch": 0.4853687867450561, "grad_norm": 0.166015625, "learning_rate": 0.00012237225480558637, "loss": 1.1925, "step": 7265 }, { "epoch": 0.4857028327097809, "grad_norm": 0.173828125, "learning_rate": 0.00012225858947307664, "loss": 1.208, "step": 7270 }, { "epoch": 0.4860368786745056, "grad_norm": 0.162109375, "learning_rate": 0.0001221448938757764, "loss": 1.2509, "step": 7275 }, { "epoch": 0.4863709246392304, "grad_norm": 0.169921875, "learning_rate": 0.00012203116816827646, "loss": 1.1224, "step": 7280 }, { "epoch": 0.4867049706039551, "grad_norm": 0.16796875, "learning_rate": 0.00012191741250520861, "loss": 1.1747, "step": 7285 }, { "epoch": 0.48703901656867987, "grad_norm": 0.16796875, "learning_rate": 0.00012180362704124523, "loss": 1.1535, "step": 7290 }, { "epoch": 0.4873730625334046, "grad_norm": 0.166015625, "learning_rate": 0.00012168981193109945, "loss": 1.213, "step": 7295 }, { "epoch": 0.4877071084981293, "grad_norm": 0.19140625, "learning_rate": 0.00012157596732952448, "loss": 1.1681, "step": 7300 }, { "epoch": 0.48804115446285407, "grad_norm": 0.1650390625, "learning_rate": 0.00012146209339131376, "loss": 1.1547, "step": 7305 }, { "epoch": 0.4883752004275788, "grad_norm": 0.203125, "learning_rate": 0.00012134819027130062, "loss": 1.1921, "step": 7310 }, { "epoch": 0.48870924639230356, "grad_norm": 0.1640625, "learning_rate": 0.00012123425812435803, "loss": 1.2203, "step": 7315 }, { "epoch": 0.4890432923570283, "grad_norm": 0.1630859375, "learning_rate": 0.00012112029710539842, "loss": 1.1512, "step": 7320 }, { "epoch": 0.48937733832175306, "grad_norm": 0.181640625, "learning_rate": 0.00012100630736937348, "loss": 1.192, "step": 7325 }, { "epoch": 0.4897113842864778, "grad_norm": 0.173828125, "learning_rate": 0.00012089228907127403, "loss": 1.1679, "step": 7330 }, { "epoch": 0.49004543025120256, "grad_norm": 0.1591796875, "learning_rate": 0.00012077824236612958, "loss": 1.2825, "step": 7335 }, { "epoch": 0.4903794762159273, "grad_norm": 0.1845703125, "learning_rate": 0.00012066416740900836, "loss": 1.1705, "step": 7340 }, { "epoch": 0.49071352218065206, "grad_norm": 0.1669921875, "learning_rate": 0.00012055006435501703, "loss": 1.1813, "step": 7345 }, { "epoch": 0.4910475681453768, "grad_norm": 0.18359375, "learning_rate": 0.0001204359333593004, "loss": 1.2041, "step": 7350 }, { "epoch": 0.49138161411010156, "grad_norm": 0.16796875, "learning_rate": 0.00012032177457704124, "loss": 1.2289, "step": 7355 }, { "epoch": 0.4917156600748263, "grad_norm": 0.1630859375, "learning_rate": 0.00012020758816346023, "loss": 1.1914, "step": 7360 }, { "epoch": 0.49204970603955106, "grad_norm": 0.1650390625, "learning_rate": 0.00012009337427381549, "loss": 1.1316, "step": 7365 }, { "epoch": 0.4923837520042758, "grad_norm": 0.1533203125, "learning_rate": 0.00011997913306340257, "loss": 1.1829, "step": 7370 }, { "epoch": 0.49271779796900056, "grad_norm": 0.1767578125, "learning_rate": 0.00011986486468755412, "loss": 1.2015, "step": 7375 }, { "epoch": 0.49305184393372525, "grad_norm": 0.1591796875, "learning_rate": 0.00011975056930163978, "loss": 1.169, "step": 7380 }, { "epoch": 0.49338588989845, "grad_norm": 0.25390625, "learning_rate": 0.00011963624706106589, "loss": 1.2223, "step": 7385 }, { "epoch": 0.49371993586317475, "grad_norm": 0.1689453125, "learning_rate": 0.0001195218981212753, "loss": 1.2101, "step": 7390 }, { "epoch": 0.4940539818278995, "grad_norm": 0.173828125, "learning_rate": 0.00011940752263774717, "loss": 1.278, "step": 7395 }, { "epoch": 0.49438802779262425, "grad_norm": 0.1748046875, "learning_rate": 0.0001192931207659967, "loss": 1.173, "step": 7400 }, { "epoch": 0.494722073757349, "grad_norm": 0.1572265625, "learning_rate": 0.00011917869266157513, "loss": 1.1914, "step": 7405 }, { "epoch": 0.49505611972207375, "grad_norm": 0.15234375, "learning_rate": 0.00011906423848006913, "loss": 1.2184, "step": 7410 }, { "epoch": 0.4953901656867985, "grad_norm": 0.1689453125, "learning_rate": 0.000118949758377101, "loss": 1.3098, "step": 7415 }, { "epoch": 0.49572421165152325, "grad_norm": 0.1845703125, "learning_rate": 0.00011883525250832828, "loss": 1.2174, "step": 7420 }, { "epoch": 0.496058257616248, "grad_norm": 0.1630859375, "learning_rate": 0.00011872072102944346, "loss": 1.2241, "step": 7425 }, { "epoch": 0.49639230358097275, "grad_norm": 0.1591796875, "learning_rate": 0.00011860616409617386, "loss": 1.1798, "step": 7430 }, { "epoch": 0.4967263495456975, "grad_norm": 0.1630859375, "learning_rate": 0.0001184915818642815, "loss": 1.1564, "step": 7435 }, { "epoch": 0.49706039551042225, "grad_norm": 0.158203125, "learning_rate": 0.00011837697448956275, "loss": 1.2022, "step": 7440 }, { "epoch": 0.497394441475147, "grad_norm": 0.1640625, "learning_rate": 0.0001182623421278481, "loss": 1.1389, "step": 7445 }, { "epoch": 0.49772848743987175, "grad_norm": 0.1689453125, "learning_rate": 0.00011814768493500213, "loss": 1.1532, "step": 7450 }, { "epoch": 0.4980625334045965, "grad_norm": 0.1669921875, "learning_rate": 0.00011803300306692306, "loss": 1.1738, "step": 7455 }, { "epoch": 0.4983965793693212, "grad_norm": 0.162109375, "learning_rate": 0.00011791829667954277, "loss": 1.243, "step": 7460 }, { "epoch": 0.49873062533404594, "grad_norm": 0.177734375, "learning_rate": 0.00011780356592882645, "loss": 1.1898, "step": 7465 }, { "epoch": 0.4990646712987707, "grad_norm": 0.169921875, "learning_rate": 0.00011768881097077238, "loss": 1.1964, "step": 7470 }, { "epoch": 0.49939871726349544, "grad_norm": 0.1708984375, "learning_rate": 0.00011757403196141172, "loss": 1.1461, "step": 7475 }, { "epoch": 0.4997327632282202, "grad_norm": 0.181640625, "learning_rate": 0.00011745922905680849, "loss": 1.2334, "step": 7480 }, { "epoch": 0.500066809192945, "grad_norm": 0.169921875, "learning_rate": 0.00011734440241305902, "loss": 1.2266, "step": 7485 }, { "epoch": 0.5004008551576697, "grad_norm": 0.1640625, "learning_rate": 0.00011722955218629204, "loss": 1.2048, "step": 7490 }, { "epoch": 0.5007349011223945, "grad_norm": 0.169921875, "learning_rate": 0.00011711467853266826, "loss": 1.2477, "step": 7495 }, { "epoch": 0.5010689470871192, "grad_norm": 0.16796875, "learning_rate": 0.00011699978160838032, "loss": 1.1918, "step": 7500 }, { "epoch": 0.5014029930518439, "grad_norm": 0.177734375, "learning_rate": 0.00011688486156965246, "loss": 1.1969, "step": 7505 }, { "epoch": 0.5017370390165686, "grad_norm": 0.1630859375, "learning_rate": 0.00011676991857274035, "loss": 1.2102, "step": 7510 }, { "epoch": 0.5020710849812934, "grad_norm": 0.1669921875, "learning_rate": 0.0001166549527739309, "loss": 1.2321, "step": 7515 }, { "epoch": 0.5024051309460181, "grad_norm": 0.1650390625, "learning_rate": 0.00011653996432954192, "loss": 1.1598, "step": 7520 }, { "epoch": 0.5027391769107429, "grad_norm": 0.177734375, "learning_rate": 0.00011642495339592224, "loss": 1.2661, "step": 7525 }, { "epoch": 0.5030732228754676, "grad_norm": 0.1669921875, "learning_rate": 0.00011630992012945099, "loss": 1.1201, "step": 7530 }, { "epoch": 0.5034072688401924, "grad_norm": 0.1728515625, "learning_rate": 0.00011619486468653785, "loss": 1.2572, "step": 7535 }, { "epoch": 0.5037413148049171, "grad_norm": 0.1796875, "learning_rate": 0.00011607978722362258, "loss": 1.1534, "step": 7540 }, { "epoch": 0.5040753607696419, "grad_norm": 0.177734375, "learning_rate": 0.00011596468789717494, "loss": 1.1787, "step": 7545 }, { "epoch": 0.5044094067343666, "grad_norm": 0.1767578125, "learning_rate": 0.0001158495668636944, "loss": 1.1895, "step": 7550 }, { "epoch": 0.5047434526990914, "grad_norm": 0.185546875, "learning_rate": 0.00011573442427970982, "loss": 1.2308, "step": 7555 }, { "epoch": 0.5050774986638161, "grad_norm": 0.1689453125, "learning_rate": 0.00011561926030177957, "loss": 1.183, "step": 7560 }, { "epoch": 0.5054115446285409, "grad_norm": 0.1650390625, "learning_rate": 0.00011550407508649095, "loss": 1.1378, "step": 7565 }, { "epoch": 0.5057455905932656, "grad_norm": 0.1806640625, "learning_rate": 0.00011538886879046023, "loss": 1.2704, "step": 7570 }, { "epoch": 0.5060796365579904, "grad_norm": 0.1767578125, "learning_rate": 0.00011527364157033227, "loss": 1.0969, "step": 7575 }, { "epoch": 0.5064136825227151, "grad_norm": 0.17578125, "learning_rate": 0.00011515839358278045, "loss": 1.2111, "step": 7580 }, { "epoch": 0.5067477284874399, "grad_norm": 0.1796875, "learning_rate": 0.0001150431249845063, "loss": 1.2049, "step": 7585 }, { "epoch": 0.5070817744521646, "grad_norm": 0.1708984375, "learning_rate": 0.00011492783593223948, "loss": 1.2238, "step": 7590 }, { "epoch": 0.5074158204168894, "grad_norm": 0.1650390625, "learning_rate": 0.00011481252658273737, "loss": 1.1828, "step": 7595 }, { "epoch": 0.5077498663816141, "grad_norm": 0.173828125, "learning_rate": 0.00011469719709278503, "loss": 1.1336, "step": 7600 }, { "epoch": 0.5080839123463389, "grad_norm": 0.185546875, "learning_rate": 0.00011458184761919481, "loss": 1.2439, "step": 7605 }, { "epoch": 0.5084179583110636, "grad_norm": 0.1787109375, "learning_rate": 0.00011446647831880633, "loss": 1.2414, "step": 7610 }, { "epoch": 0.5087520042757884, "grad_norm": 0.171875, "learning_rate": 0.00011435108934848608, "loss": 1.1807, "step": 7615 }, { "epoch": 0.5090860502405131, "grad_norm": 0.1728515625, "learning_rate": 0.00011423568086512737, "loss": 1.1715, "step": 7620 }, { "epoch": 0.5094200962052379, "grad_norm": 0.1591796875, "learning_rate": 0.00011412025302564999, "loss": 1.2178, "step": 7625 }, { "epoch": 0.5097541421699626, "grad_norm": 0.158203125, "learning_rate": 0.00011400480598700006, "loss": 1.1287, "step": 7630 }, { "epoch": 0.5100881881346874, "grad_norm": 0.177734375, "learning_rate": 0.00011388933990614982, "loss": 1.1457, "step": 7635 }, { "epoch": 0.5104222340994121, "grad_norm": 0.1845703125, "learning_rate": 0.0001137738549400974, "loss": 1.1473, "step": 7640 }, { "epoch": 0.5107562800641369, "grad_norm": 0.1728515625, "learning_rate": 0.00011365835124586657, "loss": 1.2109, "step": 7645 }, { "epoch": 0.5110903260288616, "grad_norm": 0.1640625, "learning_rate": 0.00011354282898050661, "loss": 1.1932, "step": 7650 }, { "epoch": 0.5114243719935864, "grad_norm": 0.1689453125, "learning_rate": 0.00011342728830109209, "loss": 1.2119, "step": 7655 }, { "epoch": 0.5117584179583111, "grad_norm": 0.1728515625, "learning_rate": 0.00011331172936472243, "loss": 1.1444, "step": 7660 }, { "epoch": 0.5120924639230358, "grad_norm": 0.16796875, "learning_rate": 0.00011319615232852217, "loss": 1.2466, "step": 7665 }, { "epoch": 0.5124265098877605, "grad_norm": 0.169921875, "learning_rate": 0.00011308055734964018, "loss": 1.1991, "step": 7670 }, { "epoch": 0.5127605558524853, "grad_norm": 0.1650390625, "learning_rate": 0.00011296494458524986, "loss": 1.149, "step": 7675 }, { "epoch": 0.51309460181721, "grad_norm": 0.169921875, "learning_rate": 0.00011284931419254883, "loss": 1.2253, "step": 7680 }, { "epoch": 0.5134286477819348, "grad_norm": 0.166015625, "learning_rate": 0.00011273366632875859, "loss": 1.2265, "step": 7685 }, { "epoch": 0.5137626937466595, "grad_norm": 0.1572265625, "learning_rate": 0.00011261800115112441, "loss": 1.2184, "step": 7690 }, { "epoch": 0.5140967397113843, "grad_norm": 0.1640625, "learning_rate": 0.00011250231881691517, "loss": 1.1543, "step": 7695 }, { "epoch": 0.514430785676109, "grad_norm": 0.1767578125, "learning_rate": 0.00011238661948342302, "loss": 1.1452, "step": 7700 }, { "epoch": 0.5147648316408338, "grad_norm": 0.17578125, "learning_rate": 0.00011227090330796317, "loss": 1.1757, "step": 7705 }, { "epoch": 0.5150988776055585, "grad_norm": 0.162109375, "learning_rate": 0.00011215517044787387, "loss": 1.2306, "step": 7710 }, { "epoch": 0.5154329235702833, "grad_norm": 0.1650390625, "learning_rate": 0.00011203942106051595, "loss": 1.1709, "step": 7715 }, { "epoch": 0.515766969535008, "grad_norm": 0.171875, "learning_rate": 0.00011192365530327275, "loss": 1.2167, "step": 7720 }, { "epoch": 0.5161010154997328, "grad_norm": 0.1611328125, "learning_rate": 0.00011180787333354983, "loss": 1.1912, "step": 7725 }, { "epoch": 0.5164350614644575, "grad_norm": 0.1640625, "learning_rate": 0.00011169207530877486, "loss": 1.2212, "step": 7730 }, { "epoch": 0.5167691074291823, "grad_norm": 0.1748046875, "learning_rate": 0.00011157626138639727, "loss": 1.2778, "step": 7735 }, { "epoch": 0.517103153393907, "grad_norm": 0.169921875, "learning_rate": 0.00011146043172388819, "loss": 1.2223, "step": 7740 }, { "epoch": 0.5174371993586317, "grad_norm": 0.1689453125, "learning_rate": 0.00011134458647874007, "loss": 1.2156, "step": 7745 }, { "epoch": 0.5177712453233565, "grad_norm": 0.1611328125, "learning_rate": 0.00011122872580846652, "loss": 1.1062, "step": 7750 }, { "epoch": 0.5181052912880812, "grad_norm": 0.171875, "learning_rate": 0.00011111284987060228, "loss": 1.1876, "step": 7755 }, { "epoch": 0.518439337252806, "grad_norm": 0.166015625, "learning_rate": 0.00011099695882270272, "loss": 1.1698, "step": 7760 }, { "epoch": 0.5187733832175307, "grad_norm": 0.16796875, "learning_rate": 0.00011088105282234376, "loss": 1.1686, "step": 7765 }, { "epoch": 0.5191074291822555, "grad_norm": 0.1640625, "learning_rate": 0.00011076513202712167, "loss": 1.1526, "step": 7770 }, { "epoch": 0.5194414751469802, "grad_norm": 0.16796875, "learning_rate": 0.00011064919659465289, "loss": 1.1648, "step": 7775 }, { "epoch": 0.519775521111705, "grad_norm": 0.1708984375, "learning_rate": 0.00011053324668257368, "loss": 1.1674, "step": 7780 }, { "epoch": 0.5201095670764297, "grad_norm": 0.162109375, "learning_rate": 0.00011041728244854004, "loss": 1.1817, "step": 7785 }, { "epoch": 0.5204436130411545, "grad_norm": 0.1611328125, "learning_rate": 0.00011030130405022738, "loss": 1.1222, "step": 7790 }, { "epoch": 0.5207776590058792, "grad_norm": 0.1708984375, "learning_rate": 0.00011018531164533048, "loss": 1.1805, "step": 7795 }, { "epoch": 0.521111704970604, "grad_norm": 0.16796875, "learning_rate": 0.00011006930539156308, "loss": 1.1889, "step": 7800 }, { "epoch": 0.5214457509353287, "grad_norm": 0.16796875, "learning_rate": 0.00010995328544665775, "loss": 1.1897, "step": 7805 }, { "epoch": 0.5217797969000535, "grad_norm": 0.169921875, "learning_rate": 0.00010983725196836574, "loss": 1.1815, "step": 7810 }, { "epoch": 0.5221138428647782, "grad_norm": 0.1640625, "learning_rate": 0.00010972120511445656, "loss": 1.1831, "step": 7815 }, { "epoch": 0.522447888829503, "grad_norm": 0.16796875, "learning_rate": 0.00010960514504271813, "loss": 1.1478, "step": 7820 }, { "epoch": 0.5227819347942276, "grad_norm": 0.1611328125, "learning_rate": 0.00010948907191095612, "loss": 1.2321, "step": 7825 }, { "epoch": 0.5231159807589524, "grad_norm": 0.1826171875, "learning_rate": 0.00010937298587699409, "loss": 1.1213, "step": 7830 }, { "epoch": 0.5234500267236771, "grad_norm": 0.1787109375, "learning_rate": 0.00010925688709867312, "loss": 1.2076, "step": 7835 }, { "epoch": 0.5237840726884019, "grad_norm": 0.1806640625, "learning_rate": 0.0001091407757338516, "loss": 1.2197, "step": 7840 }, { "epoch": 0.5241181186531266, "grad_norm": 0.171875, "learning_rate": 0.00010902465194040501, "loss": 1.2402, "step": 7845 }, { "epoch": 0.5244521646178514, "grad_norm": 0.1708984375, "learning_rate": 0.00010890851587622577, "loss": 1.2424, "step": 7850 }, { "epoch": 0.5247862105825761, "grad_norm": 0.1708984375, "learning_rate": 0.00010879236769922301, "loss": 1.2017, "step": 7855 }, { "epoch": 0.5251202565473009, "grad_norm": 0.1796875, "learning_rate": 0.00010867620756732219, "loss": 1.2269, "step": 7860 }, { "epoch": 0.5254543025120256, "grad_norm": 0.1708984375, "learning_rate": 0.00010856003563846526, "loss": 1.1909, "step": 7865 }, { "epoch": 0.5257883484767504, "grad_norm": 0.1650390625, "learning_rate": 0.00010844385207061001, "loss": 1.1847, "step": 7870 }, { "epoch": 0.5261223944414751, "grad_norm": 0.162109375, "learning_rate": 0.00010832765702173011, "loss": 1.1651, "step": 7875 }, { "epoch": 0.5264564404061999, "grad_norm": 0.166015625, "learning_rate": 0.00010821145064981487, "loss": 1.1334, "step": 7880 }, { "epoch": 0.5267904863709246, "grad_norm": 0.1640625, "learning_rate": 0.00010809523311286897, "loss": 1.1816, "step": 7885 }, { "epoch": 0.5271245323356494, "grad_norm": 0.1748046875, "learning_rate": 0.00010797900456891228, "loss": 1.2023, "step": 7890 }, { "epoch": 0.5274585783003741, "grad_norm": 0.171875, "learning_rate": 0.00010786276517597968, "loss": 1.1492, "step": 7895 }, { "epoch": 0.5277926242650989, "grad_norm": 0.1826171875, "learning_rate": 0.0001077465150921207, "loss": 1.2383, "step": 7900 }, { "epoch": 0.5281266702298236, "grad_norm": 0.162109375, "learning_rate": 0.00010763025447539948, "loss": 1.1999, "step": 7905 }, { "epoch": 0.5284607161945484, "grad_norm": 0.18359375, "learning_rate": 0.00010751398348389446, "loss": 1.1763, "step": 7910 }, { "epoch": 0.5287947621592731, "grad_norm": 0.173828125, "learning_rate": 0.00010739770227569821, "loss": 1.2671, "step": 7915 }, { "epoch": 0.5291288081239979, "grad_norm": 0.1767578125, "learning_rate": 0.00010728141100891716, "loss": 1.2537, "step": 7920 }, { "epoch": 0.5294628540887226, "grad_norm": 0.1708984375, "learning_rate": 0.00010716510984167142, "loss": 1.2583, "step": 7925 }, { "epoch": 0.5297969000534474, "grad_norm": 0.16796875, "learning_rate": 0.00010704879893209463, "loss": 1.177, "step": 7930 }, { "epoch": 0.5301309460181721, "grad_norm": 0.1865234375, "learning_rate": 0.00010693247843833352, "loss": 1.2566, "step": 7935 }, { "epoch": 0.5304649919828969, "grad_norm": 0.1708984375, "learning_rate": 0.00010681614851854802, "loss": 1.1253, "step": 7940 }, { "epoch": 0.5307990379476216, "grad_norm": 0.1845703125, "learning_rate": 0.00010669980933091079, "loss": 1.2334, "step": 7945 }, { "epoch": 0.5311330839123464, "grad_norm": 0.228515625, "learning_rate": 0.0001065834610336071, "loss": 1.2108, "step": 7950 }, { "epoch": 0.5314671298770711, "grad_norm": 0.18359375, "learning_rate": 0.0001064671037848346, "loss": 1.2085, "step": 7955 }, { "epoch": 0.5318011758417959, "grad_norm": 0.1767578125, "learning_rate": 0.00010635073774280315, "loss": 1.1995, "step": 7960 }, { "epoch": 0.5321352218065206, "grad_norm": 0.1748046875, "learning_rate": 0.00010623436306573455, "loss": 1.162, "step": 7965 }, { "epoch": 0.5324692677712454, "grad_norm": 0.193359375, "learning_rate": 0.00010611797991186229, "loss": 1.2132, "step": 7970 }, { "epoch": 0.5328033137359701, "grad_norm": 0.1708984375, "learning_rate": 0.00010600158843943149, "loss": 1.2682, "step": 7975 }, { "epoch": 0.5331373597006949, "grad_norm": 0.17578125, "learning_rate": 0.00010588518880669842, "loss": 1.1729, "step": 7980 }, { "epoch": 0.5334714056654195, "grad_norm": 0.1787109375, "learning_rate": 0.00010576878117193066, "loss": 1.1858, "step": 7985 }, { "epoch": 0.5338054516301443, "grad_norm": 0.185546875, "learning_rate": 0.00010565236569340646, "loss": 1.2354, "step": 7990 }, { "epoch": 0.534139497594869, "grad_norm": 0.171875, "learning_rate": 0.00010553594252941488, "loss": 1.1875, "step": 7995 }, { "epoch": 0.5344735435595938, "grad_norm": 0.1533203125, "learning_rate": 0.00010541951183825536, "loss": 1.1688, "step": 8000 }, { "epoch": 0.5348075895243185, "grad_norm": 0.16015625, "learning_rate": 0.00010530307377823762, "loss": 1.1839, "step": 8005 }, { "epoch": 0.5351416354890433, "grad_norm": 0.1728515625, "learning_rate": 0.00010518662850768133, "loss": 1.1939, "step": 8010 }, { "epoch": 0.535475681453768, "grad_norm": 0.169921875, "learning_rate": 0.00010507017618491603, "loss": 1.2179, "step": 8015 }, { "epoch": 0.5358097274184928, "grad_norm": 0.16796875, "learning_rate": 0.00010495371696828083, "loss": 1.2316, "step": 8020 }, { "epoch": 0.5361437733832175, "grad_norm": 0.1708984375, "learning_rate": 0.00010483725101612419, "loss": 1.2384, "step": 8025 }, { "epoch": 0.5364778193479423, "grad_norm": 0.197265625, "learning_rate": 0.00010472077848680378, "loss": 1.1438, "step": 8030 }, { "epoch": 0.536811865312667, "grad_norm": 0.1708984375, "learning_rate": 0.00010460429953868614, "loss": 1.2008, "step": 8035 }, { "epoch": 0.5371459112773918, "grad_norm": 0.1708984375, "learning_rate": 0.00010448781433014663, "loss": 1.1596, "step": 8040 }, { "epoch": 0.5374799572421165, "grad_norm": 0.1787109375, "learning_rate": 0.00010437132301956897, "loss": 1.156, "step": 8045 }, { "epoch": 0.5378140032068413, "grad_norm": 0.1689453125, "learning_rate": 0.00010425482576534545, "loss": 1.1974, "step": 8050 }, { "epoch": 0.538148049171566, "grad_norm": 0.1630859375, "learning_rate": 0.00010413832272587609, "loss": 1.091, "step": 8055 }, { "epoch": 0.5384820951362908, "grad_norm": 0.173828125, "learning_rate": 0.00010402181405956906, "loss": 1.262, "step": 8060 }, { "epoch": 0.5388161411010155, "grad_norm": 0.177734375, "learning_rate": 0.00010390529992484004, "loss": 1.169, "step": 8065 }, { "epoch": 0.5391501870657402, "grad_norm": 0.1787109375, "learning_rate": 0.00010378878048011218, "loss": 1.1552, "step": 8070 }, { "epoch": 0.539484233030465, "grad_norm": 0.1669921875, "learning_rate": 0.00010367225588381584, "loss": 1.195, "step": 8075 }, { "epoch": 0.5398182789951897, "grad_norm": 0.1669921875, "learning_rate": 0.00010355572629438846, "loss": 1.239, "step": 8080 }, { "epoch": 0.5401523249599145, "grad_norm": 0.1708984375, "learning_rate": 0.00010343919187027413, "loss": 1.2839, "step": 8085 }, { "epoch": 0.5404863709246392, "grad_norm": 0.1572265625, "learning_rate": 0.00010332265276992362, "loss": 1.203, "step": 8090 }, { "epoch": 0.540820416889364, "grad_norm": 0.1689453125, "learning_rate": 0.00010320610915179402, "loss": 1.1622, "step": 8095 }, { "epoch": 0.5411544628540887, "grad_norm": 0.17578125, "learning_rate": 0.00010308956117434858, "loss": 1.1227, "step": 8100 }, { "epoch": 0.5414885088188135, "grad_norm": 0.1728515625, "learning_rate": 0.00010297300899605644, "loss": 1.2179, "step": 8105 }, { "epoch": 0.5418225547835382, "grad_norm": 0.1728515625, "learning_rate": 0.00010285645277539252, "loss": 1.1927, "step": 8110 }, { "epoch": 0.542156600748263, "grad_norm": 0.1669921875, "learning_rate": 0.00010273989267083717, "loss": 1.1551, "step": 8115 }, { "epoch": 0.5424906467129877, "grad_norm": 0.1689453125, "learning_rate": 0.000102623328840876, "loss": 1.2515, "step": 8120 }, { "epoch": 0.5428246926777125, "grad_norm": 0.16796875, "learning_rate": 0.00010250676144399984, "loss": 1.1713, "step": 8125 }, { "epoch": 0.5431587386424372, "grad_norm": 0.1806640625, "learning_rate": 0.00010239019063870416, "loss": 1.1947, "step": 8130 }, { "epoch": 0.543492784607162, "grad_norm": 0.177734375, "learning_rate": 0.00010227361658348922, "loss": 1.2157, "step": 8135 }, { "epoch": 0.5438268305718867, "grad_norm": 0.1630859375, "learning_rate": 0.00010215703943685964, "loss": 1.1747, "step": 8140 }, { "epoch": 0.5441608765366115, "grad_norm": 0.162109375, "learning_rate": 0.0001020404593573242, "loss": 1.2436, "step": 8145 }, { "epoch": 0.5444949225013361, "grad_norm": 0.166015625, "learning_rate": 0.00010192387650339579, "loss": 1.1607, "step": 8150 }, { "epoch": 0.5448289684660609, "grad_norm": 0.16796875, "learning_rate": 0.00010180729103359094, "loss": 1.1594, "step": 8155 }, { "epoch": 0.5451630144307856, "grad_norm": 0.1640625, "learning_rate": 0.00010169070310642983, "loss": 1.1701, "step": 8160 }, { "epoch": 0.5454970603955104, "grad_norm": 0.1728515625, "learning_rate": 0.0001015741128804359, "loss": 1.2794, "step": 8165 }, { "epoch": 0.5458311063602351, "grad_norm": 0.1845703125, "learning_rate": 0.00010145752051413584, "loss": 1.2377, "step": 8170 }, { "epoch": 0.5461651523249599, "grad_norm": 0.185546875, "learning_rate": 0.00010134092616605908, "loss": 1.1754, "step": 8175 }, { "epoch": 0.5464991982896846, "grad_norm": 0.1630859375, "learning_rate": 0.0001012243299947379, "loss": 1.1771, "step": 8180 }, { "epoch": 0.5468332442544094, "grad_norm": 0.1796875, "learning_rate": 0.00010110773215870695, "loss": 1.2177, "step": 8185 }, { "epoch": 0.5471672902191341, "grad_norm": 0.1708984375, "learning_rate": 0.00010099113281650325, "loss": 1.2478, "step": 8190 }, { "epoch": 0.5475013361838589, "grad_norm": 0.1748046875, "learning_rate": 0.00010087453212666574, "loss": 1.1897, "step": 8195 }, { "epoch": 0.5478353821485836, "grad_norm": 0.16796875, "learning_rate": 0.0001007579302477353, "loss": 1.179, "step": 8200 }, { "epoch": 0.5481694281133084, "grad_norm": 0.1650390625, "learning_rate": 0.00010064132733825438, "loss": 1.1982, "step": 8205 }, { "epoch": 0.5485034740780331, "grad_norm": 0.1787109375, "learning_rate": 0.00010052472355676683, "loss": 1.1418, "step": 8210 }, { "epoch": 0.5488375200427579, "grad_norm": 0.1767578125, "learning_rate": 0.00010040811906181769, "loss": 1.1776, "step": 8215 }, { "epoch": 0.5491715660074826, "grad_norm": 0.169921875, "learning_rate": 0.00010029151401195298, "loss": 1.133, "step": 8220 }, { "epoch": 0.5495056119722074, "grad_norm": 0.18359375, "learning_rate": 0.00010017490856571945, "loss": 1.2514, "step": 8225 }, { "epoch": 0.5498396579369321, "grad_norm": 0.158203125, "learning_rate": 0.00010005830288166445, "loss": 1.1875, "step": 8230 }, { "epoch": 0.5501737039016569, "grad_norm": 0.18359375, "learning_rate": 9.994169711833555e-05, "loss": 1.2005, "step": 8235 }, { "epoch": 0.5505077498663816, "grad_norm": 0.173828125, "learning_rate": 9.982509143428054e-05, "loss": 1.2274, "step": 8240 }, { "epoch": 0.5508417958311064, "grad_norm": 0.1611328125, "learning_rate": 9.970848598804705e-05, "loss": 1.2221, "step": 8245 }, { "epoch": 0.5511758417958311, "grad_norm": 0.1796875, "learning_rate": 9.959188093818234e-05, "loss": 1.2734, "step": 8250 }, { "epoch": 0.5515098877605559, "grad_norm": 0.193359375, "learning_rate": 9.947527644323319e-05, "loss": 1.1843, "step": 8255 }, { "epoch": 0.5518439337252806, "grad_norm": 0.1689453125, "learning_rate": 9.935867266174566e-05, "loss": 1.2689, "step": 8260 }, { "epoch": 0.5521779796900054, "grad_norm": 0.18359375, "learning_rate": 9.924206975226471e-05, "loss": 1.3259, "step": 8265 }, { "epoch": 0.5525120256547301, "grad_norm": 0.1787109375, "learning_rate": 9.912546787333427e-05, "loss": 1.1586, "step": 8270 }, { "epoch": 0.5528460716194549, "grad_norm": 0.1669921875, "learning_rate": 9.900886718349676e-05, "loss": 1.1334, "step": 8275 }, { "epoch": 0.5531801175841796, "grad_norm": 0.1669921875, "learning_rate": 9.889226784129306e-05, "loss": 1.2147, "step": 8280 }, { "epoch": 0.5535141635489044, "grad_norm": 0.181640625, "learning_rate": 9.877567000526213e-05, "loss": 1.1871, "step": 8285 }, { "epoch": 0.5538482095136291, "grad_norm": 0.1640625, "learning_rate": 9.865907383394096e-05, "loss": 1.2581, "step": 8290 }, { "epoch": 0.5541822554783539, "grad_norm": 0.16796875, "learning_rate": 9.854247948586417e-05, "loss": 1.1312, "step": 8295 }, { "epoch": 0.5545163014430786, "grad_norm": 0.1806640625, "learning_rate": 9.84258871195641e-05, "loss": 1.1728, "step": 8300 }, { "epoch": 0.5548503474078034, "grad_norm": 0.1669921875, "learning_rate": 9.830929689357019e-05, "loss": 1.2742, "step": 8305 }, { "epoch": 0.555184393372528, "grad_norm": 0.166015625, "learning_rate": 9.819270896640908e-05, "loss": 1.2569, "step": 8310 }, { "epoch": 0.5555184393372528, "grad_norm": 0.17578125, "learning_rate": 9.807612349660423e-05, "loss": 1.2754, "step": 8315 }, { "epoch": 0.5558524853019775, "grad_norm": 0.1669921875, "learning_rate": 9.795954064267581e-05, "loss": 1.1742, "step": 8320 }, { "epoch": 0.5561865312667023, "grad_norm": 0.1728515625, "learning_rate": 9.784296056314037e-05, "loss": 1.1708, "step": 8325 }, { "epoch": 0.556520577231427, "grad_norm": 0.44140625, "learning_rate": 9.772638341651079e-05, "loss": 1.2346, "step": 8330 }, { "epoch": 0.5568546231961518, "grad_norm": 0.1982421875, "learning_rate": 9.760980936129585e-05, "loss": 1.1983, "step": 8335 }, { "epoch": 0.5571886691608765, "grad_norm": 0.166015625, "learning_rate": 9.749323855600017e-05, "loss": 1.2003, "step": 8340 }, { "epoch": 0.5575227151256013, "grad_norm": 0.1689453125, "learning_rate": 9.737667115912402e-05, "loss": 1.2915, "step": 8345 }, { "epoch": 0.557856761090326, "grad_norm": 0.171875, "learning_rate": 9.726010732916288e-05, "loss": 1.2653, "step": 8350 }, { "epoch": 0.5581908070550508, "grad_norm": 0.1806640625, "learning_rate": 9.714354722460753e-05, "loss": 1.2065, "step": 8355 }, { "epoch": 0.5585248530197755, "grad_norm": 0.1865234375, "learning_rate": 9.702699100394355e-05, "loss": 1.1583, "step": 8360 }, { "epoch": 0.5588588989845003, "grad_norm": 0.2060546875, "learning_rate": 9.691043882565145e-05, "loss": 1.189, "step": 8365 }, { "epoch": 0.559192944949225, "grad_norm": 0.162109375, "learning_rate": 9.6793890848206e-05, "loss": 1.2118, "step": 8370 }, { "epoch": 0.5595269909139498, "grad_norm": 0.1640625, "learning_rate": 9.66773472300764e-05, "loss": 1.1771, "step": 8375 }, { "epoch": 0.5598610368786745, "grad_norm": 0.173828125, "learning_rate": 9.656080812972591e-05, "loss": 1.287, "step": 8380 }, { "epoch": 0.5601950828433993, "grad_norm": 0.162109375, "learning_rate": 9.644427370561157e-05, "loss": 1.1679, "step": 8385 }, { "epoch": 0.560529128808124, "grad_norm": 0.1826171875, "learning_rate": 9.632774411618414e-05, "loss": 1.1996, "step": 8390 }, { "epoch": 0.5608631747728487, "grad_norm": 0.1767578125, "learning_rate": 9.621121951988783e-05, "loss": 1.2401, "step": 8395 }, { "epoch": 0.5611972207375735, "grad_norm": 0.1640625, "learning_rate": 9.609470007516e-05, "loss": 1.1879, "step": 8400 }, { "epoch": 0.5615312667022982, "grad_norm": 0.1708984375, "learning_rate": 9.597818594043096e-05, "loss": 1.2456, "step": 8405 }, { "epoch": 0.561865312667023, "grad_norm": 0.18359375, "learning_rate": 9.586167727412395e-05, "loss": 1.1403, "step": 8410 }, { "epoch": 0.5621993586317477, "grad_norm": 0.1748046875, "learning_rate": 9.574517423465462e-05, "loss": 1.2011, "step": 8415 }, { "epoch": 0.5625334045964725, "grad_norm": 0.1708984375, "learning_rate": 9.562867698043101e-05, "loss": 1.1682, "step": 8420 }, { "epoch": 0.5628674505611972, "grad_norm": 0.1767578125, "learning_rate": 9.55121856698534e-05, "loss": 1.1735, "step": 8425 }, { "epoch": 0.563201496525922, "grad_norm": 0.162109375, "learning_rate": 9.539570046131389e-05, "loss": 1.1531, "step": 8430 }, { "epoch": 0.5635355424906467, "grad_norm": 0.1640625, "learning_rate": 9.527922151319626e-05, "loss": 1.1076, "step": 8435 }, { "epoch": 0.5638695884553715, "grad_norm": 0.173828125, "learning_rate": 9.516274898387582e-05, "loss": 1.1787, "step": 8440 }, { "epoch": 0.5642036344200962, "grad_norm": 0.1640625, "learning_rate": 9.504628303171922e-05, "loss": 1.2271, "step": 8445 }, { "epoch": 0.564537680384821, "grad_norm": 0.18359375, "learning_rate": 9.492982381508398e-05, "loss": 1.1947, "step": 8450 }, { "epoch": 0.5648717263495457, "grad_norm": 0.173828125, "learning_rate": 9.481337149231868e-05, "loss": 1.1849, "step": 8455 }, { "epoch": 0.5652057723142705, "grad_norm": 0.2001953125, "learning_rate": 9.469692622176239e-05, "loss": 1.2134, "step": 8460 }, { "epoch": 0.5655398182789952, "grad_norm": 0.1572265625, "learning_rate": 9.458048816174465e-05, "loss": 1.2114, "step": 8465 }, { "epoch": 0.5658738642437199, "grad_norm": 0.1865234375, "learning_rate": 9.446405747058513e-05, "loss": 1.2596, "step": 8470 }, { "epoch": 0.5662079102084446, "grad_norm": 0.169921875, "learning_rate": 9.434763430659357e-05, "loss": 1.1842, "step": 8475 }, { "epoch": 0.5665419561731694, "grad_norm": 0.1611328125, "learning_rate": 9.423121882806934e-05, "loss": 1.1554, "step": 8480 }, { "epoch": 0.5668760021378941, "grad_norm": 0.1611328125, "learning_rate": 9.411481119330156e-05, "loss": 1.1946, "step": 8485 }, { "epoch": 0.5672100481026189, "grad_norm": 0.158203125, "learning_rate": 9.399841156056853e-05, "loss": 1.134, "step": 8490 }, { "epoch": 0.5675440940673436, "grad_norm": 0.193359375, "learning_rate": 9.388202008813772e-05, "loss": 1.1399, "step": 8495 }, { "epoch": 0.5678781400320684, "grad_norm": 0.1669921875, "learning_rate": 9.376563693426548e-05, "loss": 1.1826, "step": 8500 }, { "epoch": 0.5682121859967931, "grad_norm": 0.1640625, "learning_rate": 9.364926225719686e-05, "loss": 1.1893, "step": 8505 }, { "epoch": 0.5685462319615179, "grad_norm": 0.1630859375, "learning_rate": 9.353289621516539e-05, "loss": 1.158, "step": 8510 }, { "epoch": 0.5688802779262426, "grad_norm": 0.1787109375, "learning_rate": 9.341653896639293e-05, "loss": 1.1754, "step": 8515 }, { "epoch": 0.5692143238909674, "grad_norm": 0.1787109375, "learning_rate": 9.330019066908923e-05, "loss": 1.2279, "step": 8520 }, { "epoch": 0.5695483698556921, "grad_norm": 0.181640625, "learning_rate": 9.318385148145199e-05, "loss": 1.2369, "step": 8525 }, { "epoch": 0.5698824158204169, "grad_norm": 0.1689453125, "learning_rate": 9.306752156166652e-05, "loss": 1.1594, "step": 8530 }, { "epoch": 0.5702164617851416, "grad_norm": 0.1826171875, "learning_rate": 9.295120106790542e-05, "loss": 1.2326, "step": 8535 }, { "epoch": 0.5705505077498664, "grad_norm": 0.166015625, "learning_rate": 9.283489015832857e-05, "loss": 1.2411, "step": 8540 }, { "epoch": 0.5708845537145911, "grad_norm": 0.169921875, "learning_rate": 9.271858899108285e-05, "loss": 1.1679, "step": 8545 }, { "epoch": 0.5712185996793159, "grad_norm": 0.1640625, "learning_rate": 9.260229772430181e-05, "loss": 1.194, "step": 8550 }, { "epoch": 0.5715526456440406, "grad_norm": 0.1708984375, "learning_rate": 9.248601651610556e-05, "loss": 1.2654, "step": 8555 }, { "epoch": 0.5718866916087654, "grad_norm": 0.1728515625, "learning_rate": 9.236974552460055e-05, "loss": 1.2777, "step": 8560 }, { "epoch": 0.5722207375734901, "grad_norm": 0.1845703125, "learning_rate": 9.225348490787935e-05, "loss": 1.2713, "step": 8565 }, { "epoch": 0.5725547835382149, "grad_norm": 0.1708984375, "learning_rate": 9.213723482402035e-05, "loss": 1.1932, "step": 8570 }, { "epoch": 0.5728888295029396, "grad_norm": 0.1689453125, "learning_rate": 9.202099543108771e-05, "loss": 1.2309, "step": 8575 }, { "epoch": 0.5732228754676644, "grad_norm": 0.1796875, "learning_rate": 9.190476688713103e-05, "loss": 1.1724, "step": 8580 }, { "epoch": 0.5735569214323891, "grad_norm": 0.1748046875, "learning_rate": 9.178854935018516e-05, "loss": 1.1986, "step": 8585 }, { "epoch": 0.5738909673971139, "grad_norm": 0.173828125, "learning_rate": 9.167234297826992e-05, "loss": 1.1978, "step": 8590 }, { "epoch": 0.5742250133618386, "grad_norm": 0.1689453125, "learning_rate": 9.155614792939004e-05, "loss": 1.1005, "step": 8595 }, { "epoch": 0.5745590593265634, "grad_norm": 0.189453125, "learning_rate": 9.143996436153476e-05, "loss": 1.2731, "step": 8600 }, { "epoch": 0.5748931052912881, "grad_norm": 0.1708984375, "learning_rate": 9.13237924326778e-05, "loss": 1.2199, "step": 8605 }, { "epoch": 0.5752271512560129, "grad_norm": 0.181640625, "learning_rate": 9.120763230077703e-05, "loss": 1.1892, "step": 8610 }, { "epoch": 0.5755611972207376, "grad_norm": 0.208984375, "learning_rate": 9.109148412377426e-05, "loss": 1.261, "step": 8615 }, { "epoch": 0.5758952431854624, "grad_norm": 0.16015625, "learning_rate": 9.097534805959502e-05, "loss": 1.2014, "step": 8620 }, { "epoch": 0.5762292891501871, "grad_norm": 0.1611328125, "learning_rate": 9.085922426614844e-05, "loss": 1.2072, "step": 8625 }, { "epoch": 0.5765633351149118, "grad_norm": 0.166015625, "learning_rate": 9.074311290132693e-05, "loss": 1.154, "step": 8630 }, { "epoch": 0.5768973810796365, "grad_norm": 0.1796875, "learning_rate": 9.062701412300592e-05, "loss": 1.2357, "step": 8635 }, { "epoch": 0.5772314270443613, "grad_norm": 0.201171875, "learning_rate": 9.051092808904389e-05, "loss": 1.1894, "step": 8640 }, { "epoch": 0.577565473009086, "grad_norm": 0.2060546875, "learning_rate": 9.03948549572819e-05, "loss": 1.2215, "step": 8645 }, { "epoch": 0.5778995189738108, "grad_norm": 0.1669921875, "learning_rate": 9.027879488554346e-05, "loss": 1.2391, "step": 8650 }, { "epoch": 0.5782335649385355, "grad_norm": 0.1865234375, "learning_rate": 9.016274803163431e-05, "loss": 1.2081, "step": 8655 }, { "epoch": 0.5785676109032603, "grad_norm": 0.16015625, "learning_rate": 9.004671455334228e-05, "loss": 1.1283, "step": 8660 }, { "epoch": 0.578901656867985, "grad_norm": 0.173828125, "learning_rate": 8.993069460843693e-05, "loss": 1.2581, "step": 8665 }, { "epoch": 0.5792357028327098, "grad_norm": 0.1748046875, "learning_rate": 8.981468835466953e-05, "loss": 1.1975, "step": 8670 }, { "epoch": 0.5795697487974345, "grad_norm": 0.181640625, "learning_rate": 8.969869594977263e-05, "loss": 1.2193, "step": 8675 }, { "epoch": 0.5799037947621593, "grad_norm": 0.1591796875, "learning_rate": 8.958271755145999e-05, "loss": 1.1285, "step": 8680 }, { "epoch": 0.580237840726884, "grad_norm": 0.173828125, "learning_rate": 8.946675331742636e-05, "loss": 1.1692, "step": 8685 }, { "epoch": 0.5805718866916088, "grad_norm": 0.1611328125, "learning_rate": 8.935080340534715e-05, "loss": 1.14, "step": 8690 }, { "epoch": 0.5809059326563335, "grad_norm": 0.1708984375, "learning_rate": 8.923486797287834e-05, "loss": 1.2068, "step": 8695 }, { "epoch": 0.5812399786210583, "grad_norm": 0.16796875, "learning_rate": 8.911894717765625e-05, "loss": 1.2347, "step": 8700 }, { "epoch": 0.581574024585783, "grad_norm": 0.2041015625, "learning_rate": 8.90030411772973e-05, "loss": 1.1138, "step": 8705 }, { "epoch": 0.5819080705505077, "grad_norm": 0.16015625, "learning_rate": 8.888715012939773e-05, "loss": 1.2134, "step": 8710 }, { "epoch": 0.5822421165152325, "grad_norm": 0.1728515625, "learning_rate": 8.87712741915335e-05, "loss": 1.1751, "step": 8715 }, { "epoch": 0.5825761624799572, "grad_norm": 0.173828125, "learning_rate": 8.865541352125998e-05, "loss": 1.2041, "step": 8720 }, { "epoch": 0.582910208444682, "grad_norm": 0.388671875, "learning_rate": 8.853956827611182e-05, "loss": 1.2135, "step": 8725 }, { "epoch": 0.5832442544094067, "grad_norm": 0.1806640625, "learning_rate": 8.842373861360271e-05, "loss": 1.2284, "step": 8730 }, { "epoch": 0.5835783003741315, "grad_norm": 0.1767578125, "learning_rate": 8.830792469122517e-05, "loss": 1.2092, "step": 8735 }, { "epoch": 0.5839123463388562, "grad_norm": 0.1767578125, "learning_rate": 8.819212666645018e-05, "loss": 1.2435, "step": 8740 }, { "epoch": 0.584246392303581, "grad_norm": 0.1904296875, "learning_rate": 8.807634469672727e-05, "loss": 1.1988, "step": 8745 }, { "epoch": 0.5845804382683057, "grad_norm": 0.1611328125, "learning_rate": 8.796057893948409e-05, "loss": 1.264, "step": 8750 }, { "epoch": 0.5849144842330305, "grad_norm": 0.171875, "learning_rate": 8.784482955212614e-05, "loss": 1.1828, "step": 8755 }, { "epoch": 0.5852485301977552, "grad_norm": 0.1650390625, "learning_rate": 8.772909669203684e-05, "loss": 1.1714, "step": 8760 }, { "epoch": 0.58558257616248, "grad_norm": 0.1640625, "learning_rate": 8.7613380516577e-05, "loss": 1.2169, "step": 8765 }, { "epoch": 0.5859166221272047, "grad_norm": 0.158203125, "learning_rate": 8.749768118308485e-05, "loss": 1.1714, "step": 8770 }, { "epoch": 0.5862506680919295, "grad_norm": 0.16796875, "learning_rate": 8.73819988488756e-05, "loss": 1.1609, "step": 8775 }, { "epoch": 0.5865847140566542, "grad_norm": 0.1650390625, "learning_rate": 8.726633367124146e-05, "loss": 1.1455, "step": 8780 }, { "epoch": 0.586918760021379, "grad_norm": 0.1640625, "learning_rate": 8.715068580745116e-05, "loss": 1.2395, "step": 8785 }, { "epoch": 0.5872528059861037, "grad_norm": 0.1865234375, "learning_rate": 8.703505541475015e-05, "loss": 1.2282, "step": 8790 }, { "epoch": 0.5875868519508284, "grad_norm": 0.171875, "learning_rate": 8.691944265035985e-05, "loss": 1.2212, "step": 8795 }, { "epoch": 0.5879208979155531, "grad_norm": 0.16796875, "learning_rate": 8.680384767147785e-05, "loss": 1.2372, "step": 8800 }, { "epoch": 0.5882549438802779, "grad_norm": 0.1708984375, "learning_rate": 8.668827063527758e-05, "loss": 1.1653, "step": 8805 }, { "epoch": 0.5885889898450026, "grad_norm": 0.1962890625, "learning_rate": 8.657271169890797e-05, "loss": 1.1536, "step": 8810 }, { "epoch": 0.5889230358097274, "grad_norm": 0.162109375, "learning_rate": 8.645717101949338e-05, "loss": 1.1778, "step": 8815 }, { "epoch": 0.5892570817744521, "grad_norm": 0.162109375, "learning_rate": 8.634164875413343e-05, "loss": 1.2575, "step": 8820 }, { "epoch": 0.5895911277391769, "grad_norm": 0.1669921875, "learning_rate": 8.622614505990263e-05, "loss": 1.1312, "step": 8825 }, { "epoch": 0.5899251737039016, "grad_norm": 0.1728515625, "learning_rate": 8.61106600938502e-05, "loss": 1.1868, "step": 8830 }, { "epoch": 0.5902592196686264, "grad_norm": 0.1708984375, "learning_rate": 8.599519401299997e-05, "loss": 1.2483, "step": 8835 }, { "epoch": 0.5905932656333511, "grad_norm": 0.20703125, "learning_rate": 8.587974697435005e-05, "loss": 1.2095, "step": 8840 }, { "epoch": 0.5909273115980759, "grad_norm": 0.1640625, "learning_rate": 8.576431913487265e-05, "loss": 1.1909, "step": 8845 }, { "epoch": 0.5912613575628006, "grad_norm": 0.169921875, "learning_rate": 8.564891065151392e-05, "loss": 1.2462, "step": 8850 }, { "epoch": 0.5915954035275254, "grad_norm": 0.1669921875, "learning_rate": 8.553352168119368e-05, "loss": 1.2182, "step": 8855 }, { "epoch": 0.5919294494922501, "grad_norm": 0.1630859375, "learning_rate": 8.54181523808052e-05, "loss": 1.1494, "step": 8860 }, { "epoch": 0.5922634954569749, "grad_norm": 0.16796875, "learning_rate": 8.530280290721499e-05, "loss": 1.1958, "step": 8865 }, { "epoch": 0.5925975414216996, "grad_norm": 0.171875, "learning_rate": 8.518747341726265e-05, "loss": 1.2156, "step": 8870 }, { "epoch": 0.5929315873864244, "grad_norm": 0.171875, "learning_rate": 8.507216406776056e-05, "loss": 1.1586, "step": 8875 }, { "epoch": 0.5932656333511491, "grad_norm": 0.169921875, "learning_rate": 8.495687501549371e-05, "loss": 1.1507, "step": 8880 }, { "epoch": 0.5935996793158739, "grad_norm": 0.173828125, "learning_rate": 8.484160641721958e-05, "loss": 1.1466, "step": 8885 }, { "epoch": 0.5939337252805986, "grad_norm": 0.1689453125, "learning_rate": 8.472635842966776e-05, "loss": 1.178, "step": 8890 }, { "epoch": 0.5942677712453234, "grad_norm": 0.1728515625, "learning_rate": 8.46111312095398e-05, "loss": 1.2257, "step": 8895 }, { "epoch": 0.5946018172100481, "grad_norm": 0.1689453125, "learning_rate": 8.449592491350909e-05, "loss": 1.1655, "step": 8900 }, { "epoch": 0.5949358631747729, "grad_norm": 0.1767578125, "learning_rate": 8.438073969822047e-05, "loss": 1.1703, "step": 8905 }, { "epoch": 0.5952699091394976, "grad_norm": 0.1943359375, "learning_rate": 8.426557572029019e-05, "loss": 1.3285, "step": 8910 }, { "epoch": 0.5956039551042224, "grad_norm": 0.1669921875, "learning_rate": 8.415043313630563e-05, "loss": 1.1821, "step": 8915 }, { "epoch": 0.5959380010689471, "grad_norm": 0.169921875, "learning_rate": 8.403531210282507e-05, "loss": 1.2104, "step": 8920 }, { "epoch": 0.5962720470336719, "grad_norm": 0.1630859375, "learning_rate": 8.392021277637743e-05, "loss": 1.1807, "step": 8925 }, { "epoch": 0.5966060929983966, "grad_norm": 0.1669921875, "learning_rate": 8.380513531346218e-05, "loss": 1.2005, "step": 8930 }, { "epoch": 0.5969401389631214, "grad_norm": 0.171875, "learning_rate": 8.369007987054906e-05, "loss": 1.2966, "step": 8935 }, { "epoch": 0.5972741849278461, "grad_norm": 0.1806640625, "learning_rate": 8.357504660407779e-05, "loss": 1.16, "step": 8940 }, { "epoch": 0.5976082308925709, "grad_norm": 0.1650390625, "learning_rate": 8.346003567045806e-05, "loss": 1.1801, "step": 8945 }, { "epoch": 0.5979422768572956, "grad_norm": 0.1708984375, "learning_rate": 8.334504722606912e-05, "loss": 1.2457, "step": 8950 }, { "epoch": 0.5982763228220203, "grad_norm": 0.15234375, "learning_rate": 8.323008142725967e-05, "loss": 1.1636, "step": 8955 }, { "epoch": 0.598610368786745, "grad_norm": 0.1767578125, "learning_rate": 8.311513843034755e-05, "loss": 1.1566, "step": 8960 }, { "epoch": 0.5989444147514698, "grad_norm": 0.1640625, "learning_rate": 8.300021839161969e-05, "loss": 1.2084, "step": 8965 }, { "epoch": 0.5992784607161945, "grad_norm": 0.169921875, "learning_rate": 8.288532146733173e-05, "loss": 1.1723, "step": 8970 }, { "epoch": 0.5996125066809193, "grad_norm": 0.181640625, "learning_rate": 8.277044781370799e-05, "loss": 1.2041, "step": 8975 }, { "epoch": 0.599946552645644, "grad_norm": 0.1611328125, "learning_rate": 8.265559758694099e-05, "loss": 1.2412, "step": 8980 }, { "epoch": 0.6002805986103688, "grad_norm": 0.181640625, "learning_rate": 8.254077094319153e-05, "loss": 1.1691, "step": 8985 }, { "epoch": 0.6006146445750935, "grad_norm": 0.162109375, "learning_rate": 8.24259680385883e-05, "loss": 1.1641, "step": 8990 }, { "epoch": 0.6009486905398183, "grad_norm": 0.16015625, "learning_rate": 8.231118902922767e-05, "loss": 1.1753, "step": 8995 }, { "epoch": 0.601282736504543, "grad_norm": 0.1640625, "learning_rate": 8.219643407117356e-05, "loss": 1.1704, "step": 9000 }, { "epoch": 0.6016167824692678, "grad_norm": 0.232421875, "learning_rate": 8.208170332045723e-05, "loss": 1.1438, "step": 9005 }, { "epoch": 0.6019508284339925, "grad_norm": 0.1630859375, "learning_rate": 8.196699693307695e-05, "loss": 1.1879, "step": 9010 }, { "epoch": 0.6022848743987173, "grad_norm": 0.1728515625, "learning_rate": 8.18523150649979e-05, "loss": 1.2226, "step": 9015 }, { "epoch": 0.602618920363442, "grad_norm": 0.166015625, "learning_rate": 8.173765787215193e-05, "loss": 1.2468, "step": 9020 }, { "epoch": 0.6029529663281668, "grad_norm": 0.181640625, "learning_rate": 8.162302551043728e-05, "loss": 1.1547, "step": 9025 }, { "epoch": 0.6032870122928915, "grad_norm": 0.201171875, "learning_rate": 8.150841813571849e-05, "loss": 1.1479, "step": 9030 }, { "epoch": 0.6036210582576162, "grad_norm": 0.1826171875, "learning_rate": 8.139383590382614e-05, "loss": 1.2236, "step": 9035 }, { "epoch": 0.603955104222341, "grad_norm": 0.16015625, "learning_rate": 8.127927897055658e-05, "loss": 1.2452, "step": 9040 }, { "epoch": 0.6042891501870657, "grad_norm": 0.1708984375, "learning_rate": 8.116474749167175e-05, "loss": 1.1888, "step": 9045 }, { "epoch": 0.6046231961517905, "grad_norm": 0.1748046875, "learning_rate": 8.105024162289901e-05, "loss": 1.1423, "step": 9050 }, { "epoch": 0.6049572421165152, "grad_norm": 0.193359375, "learning_rate": 8.093576151993092e-05, "loss": 1.1972, "step": 9055 }, { "epoch": 0.60529128808124, "grad_norm": 0.1650390625, "learning_rate": 8.082130733842492e-05, "loss": 1.1641, "step": 9060 }, { "epoch": 0.6056253340459647, "grad_norm": 0.1650390625, "learning_rate": 8.070687923400328e-05, "loss": 1.2438, "step": 9065 }, { "epoch": 0.6059593800106895, "grad_norm": 0.166015625, "learning_rate": 8.059247736225285e-05, "loss": 1.2421, "step": 9070 }, { "epoch": 0.6062934259754142, "grad_norm": 0.173828125, "learning_rate": 8.047810187872473e-05, "loss": 1.2011, "step": 9075 }, { "epoch": 0.606627471940139, "grad_norm": 0.1748046875, "learning_rate": 8.036375293893413e-05, "loss": 1.1614, "step": 9080 }, { "epoch": 0.6069615179048637, "grad_norm": 0.1640625, "learning_rate": 8.024943069836023e-05, "loss": 1.121, "step": 9085 }, { "epoch": 0.6072955638695885, "grad_norm": 0.15625, "learning_rate": 8.013513531244593e-05, "loss": 1.1455, "step": 9090 }, { "epoch": 0.6076296098343132, "grad_norm": 0.1728515625, "learning_rate": 8.002086693659746e-05, "loss": 1.157, "step": 9095 }, { "epoch": 0.607963655799038, "grad_norm": 0.1787109375, "learning_rate": 7.990662572618452e-05, "loss": 1.1843, "step": 9100 }, { "epoch": 0.6082977017637627, "grad_norm": 0.20703125, "learning_rate": 7.979241183653978e-05, "loss": 1.2587, "step": 9105 }, { "epoch": 0.6086317477284875, "grad_norm": 0.2060546875, "learning_rate": 7.967822542295877e-05, "loss": 1.2498, "step": 9110 }, { "epoch": 0.6089657936932121, "grad_norm": 0.171875, "learning_rate": 7.956406664069964e-05, "loss": 1.2265, "step": 9115 }, { "epoch": 0.6092998396579369, "grad_norm": 0.1748046875, "learning_rate": 7.9449935644983e-05, "loss": 1.1527, "step": 9120 }, { "epoch": 0.6096338856226616, "grad_norm": 0.1708984375, "learning_rate": 7.933583259099162e-05, "loss": 1.1783, "step": 9125 }, { "epoch": 0.6099679315873864, "grad_norm": 0.16015625, "learning_rate": 7.922175763387043e-05, "loss": 1.2366, "step": 9130 }, { "epoch": 0.6103019775521111, "grad_norm": 0.1708984375, "learning_rate": 7.910771092872599e-05, "loss": 1.1738, "step": 9135 }, { "epoch": 0.6106360235168359, "grad_norm": 0.1728515625, "learning_rate": 7.899369263062654e-05, "loss": 1.1757, "step": 9140 }, { "epoch": 0.6109700694815606, "grad_norm": 0.1962890625, "learning_rate": 7.887970289460161e-05, "loss": 1.28, "step": 9145 }, { "epoch": 0.6113041154462854, "grad_norm": 0.1708984375, "learning_rate": 7.876574187564201e-05, "loss": 1.1575, "step": 9150 }, { "epoch": 0.6116381614110101, "grad_norm": 0.16796875, "learning_rate": 7.865180972869938e-05, "loss": 1.1488, "step": 9155 }, { "epoch": 0.6119722073757349, "grad_norm": 0.1728515625, "learning_rate": 7.853790660868625e-05, "loss": 1.2271, "step": 9160 }, { "epoch": 0.6123062533404596, "grad_norm": 0.1611328125, "learning_rate": 7.842403267047556e-05, "loss": 1.1493, "step": 9165 }, { "epoch": 0.6126402993051844, "grad_norm": 0.1630859375, "learning_rate": 7.831018806890058e-05, "loss": 1.1521, "step": 9170 }, { "epoch": 0.6129743452699091, "grad_norm": 0.232421875, "learning_rate": 7.819637295875478e-05, "loss": 1.0877, "step": 9175 }, { "epoch": 0.6133083912346339, "grad_norm": 0.1728515625, "learning_rate": 7.808258749479142e-05, "loss": 1.2084, "step": 9180 }, { "epoch": 0.6136424371993586, "grad_norm": 0.1826171875, "learning_rate": 7.796883183172352e-05, "loss": 1.2174, "step": 9185 }, { "epoch": 0.6139764831640834, "grad_norm": 0.17578125, "learning_rate": 7.785510612422359e-05, "loss": 1.2459, "step": 9190 }, { "epoch": 0.6143105291288081, "grad_norm": 0.1640625, "learning_rate": 7.774141052692337e-05, "loss": 1.232, "step": 9195 }, { "epoch": 0.6146445750935329, "grad_norm": 0.169921875, "learning_rate": 7.762774519441366e-05, "loss": 1.1734, "step": 9200 }, { "epoch": 0.6149786210582576, "grad_norm": 0.1728515625, "learning_rate": 7.751411028124414e-05, "loss": 1.1733, "step": 9205 }, { "epoch": 0.6153126670229824, "grad_norm": 0.1826171875, "learning_rate": 7.740050594192308e-05, "loss": 1.2005, "step": 9210 }, { "epoch": 0.6156467129877071, "grad_norm": 0.173828125, "learning_rate": 7.728693233091721e-05, "loss": 1.2093, "step": 9215 }, { "epoch": 0.6159807589524319, "grad_norm": 0.1640625, "learning_rate": 7.717338960265152e-05, "loss": 1.1545, "step": 9220 }, { "epoch": 0.6163148049171566, "grad_norm": 0.173828125, "learning_rate": 7.705987791150895e-05, "loss": 1.2138, "step": 9225 }, { "epoch": 0.6166488508818814, "grad_norm": 0.169921875, "learning_rate": 7.694639741183027e-05, "loss": 1.2281, "step": 9230 }, { "epoch": 0.6169828968466061, "grad_norm": 0.1806640625, "learning_rate": 7.68329482579138e-05, "loss": 1.2119, "step": 9235 }, { "epoch": 0.6173169428113309, "grad_norm": 0.166015625, "learning_rate": 7.671953060401528e-05, "loss": 1.2096, "step": 9240 }, { "epoch": 0.6176509887760556, "grad_norm": 0.1728515625, "learning_rate": 7.660614460434754e-05, "loss": 1.2063, "step": 9245 }, { "epoch": 0.6179850347407804, "grad_norm": 0.1611328125, "learning_rate": 7.649279041308057e-05, "loss": 1.2487, "step": 9250 }, { "epoch": 0.6183190807055051, "grad_norm": 0.173828125, "learning_rate": 7.637946818434087e-05, "loss": 1.2365, "step": 9255 }, { "epoch": 0.6186531266702299, "grad_norm": 0.193359375, "learning_rate": 7.626617807221166e-05, "loss": 1.1823, "step": 9260 }, { "epoch": 0.6189871726349546, "grad_norm": 0.1708984375, "learning_rate": 7.615292023073235e-05, "loss": 1.1834, "step": 9265 }, { "epoch": 0.6193212185996794, "grad_norm": 0.1640625, "learning_rate": 7.603969481389856e-05, "loss": 1.248, "step": 9270 }, { "epoch": 0.619655264564404, "grad_norm": 0.181640625, "learning_rate": 7.592650197566181e-05, "loss": 1.2045, "step": 9275 }, { "epoch": 0.6199893105291288, "grad_norm": 0.16796875, "learning_rate": 7.581334186992934e-05, "loss": 1.184, "step": 9280 }, { "epoch": 0.6203233564938535, "grad_norm": 0.1767578125, "learning_rate": 7.570021465056384e-05, "loss": 1.1881, "step": 9285 }, { "epoch": 0.6206574024585783, "grad_norm": 0.16796875, "learning_rate": 7.558712047138328e-05, "loss": 1.2263, "step": 9290 }, { "epoch": 0.620991448423303, "grad_norm": 0.1826171875, "learning_rate": 7.547405948616079e-05, "loss": 1.2007, "step": 9295 }, { "epoch": 0.6213254943880278, "grad_norm": 0.189453125, "learning_rate": 7.536103184862424e-05, "loss": 1.1391, "step": 9300 }, { "epoch": 0.6216595403527525, "grad_norm": 0.1650390625, "learning_rate": 7.524803771245628e-05, "loss": 1.2094, "step": 9305 }, { "epoch": 0.6219935863174773, "grad_norm": 0.173828125, "learning_rate": 7.513507723129386e-05, "loss": 1.2207, "step": 9310 }, { "epoch": 0.622327632282202, "grad_norm": 0.1689453125, "learning_rate": 7.502215055872838e-05, "loss": 1.2251, "step": 9315 }, { "epoch": 0.6226616782469268, "grad_norm": 0.166015625, "learning_rate": 7.49092578483051e-05, "loss": 1.163, "step": 9320 }, { "epoch": 0.6229957242116515, "grad_norm": 0.205078125, "learning_rate": 7.479639925352318e-05, "loss": 1.2131, "step": 9325 }, { "epoch": 0.6233297701763763, "grad_norm": 0.1669921875, "learning_rate": 7.468357492783531e-05, "loss": 1.1521, "step": 9330 }, { "epoch": 0.623663816141101, "grad_norm": 0.1669921875, "learning_rate": 7.457078502464768e-05, "loss": 1.1685, "step": 9335 }, { "epoch": 0.6239978621058258, "grad_norm": 0.1689453125, "learning_rate": 7.44580296973196e-05, "loss": 1.2071, "step": 9340 }, { "epoch": 0.6243319080705505, "grad_norm": 0.1640625, "learning_rate": 7.434530909916351e-05, "loss": 1.2186, "step": 9345 }, { "epoch": 0.6246659540352753, "grad_norm": 0.1943359375, "learning_rate": 7.423262338344444e-05, "loss": 1.1182, "step": 9350 }, { "epoch": 0.625, "grad_norm": 0.166015625, "learning_rate": 7.411997270338008e-05, "loss": 1.149, "step": 9355 }, { "epoch": 0.6253340459647247, "grad_norm": 0.1748046875, "learning_rate": 7.400735721214052e-05, "loss": 1.15, "step": 9360 }, { "epoch": 0.6256680919294495, "grad_norm": 0.1689453125, "learning_rate": 7.389477706284793e-05, "loss": 1.1869, "step": 9365 }, { "epoch": 0.6260021378941742, "grad_norm": 0.166015625, "learning_rate": 7.378223240857643e-05, "loss": 1.2278, "step": 9370 }, { "epoch": 0.626336183858899, "grad_norm": 0.1708984375, "learning_rate": 7.366972340235197e-05, "loss": 1.1563, "step": 9375 }, { "epoch": 0.6266702298236237, "grad_norm": 0.1708984375, "learning_rate": 7.355725019715195e-05, "loss": 1.1964, "step": 9380 }, { "epoch": 0.6270042757883485, "grad_norm": 0.1884765625, "learning_rate": 7.344481294590509e-05, "loss": 1.1799, "step": 9385 }, { "epoch": 0.6273383217530732, "grad_norm": 0.1689453125, "learning_rate": 7.333241180149123e-05, "loss": 1.2004, "step": 9390 }, { "epoch": 0.627672367717798, "grad_norm": 0.1796875, "learning_rate": 7.322004691674118e-05, "loss": 1.3072, "step": 9395 }, { "epoch": 0.6280064136825227, "grad_norm": 0.169921875, "learning_rate": 7.310771844443631e-05, "loss": 1.3167, "step": 9400 }, { "epoch": 0.6283404596472475, "grad_norm": 0.1591796875, "learning_rate": 7.299542653730865e-05, "loss": 1.1872, "step": 9405 }, { "epoch": 0.6286745056119722, "grad_norm": 0.1650390625, "learning_rate": 7.288317134804038e-05, "loss": 1.1386, "step": 9410 }, { "epoch": 0.629008551576697, "grad_norm": 0.1572265625, "learning_rate": 7.277095302926385e-05, "loss": 1.1987, "step": 9415 }, { "epoch": 0.6293425975414217, "grad_norm": 0.181640625, "learning_rate": 7.265877173356116e-05, "loss": 1.1932, "step": 9420 }, { "epoch": 0.6296766435061465, "grad_norm": 0.177734375, "learning_rate": 7.254662761346423e-05, "loss": 1.1853, "step": 9425 }, { "epoch": 0.6300106894708712, "grad_norm": 0.173828125, "learning_rate": 7.243452082145423e-05, "loss": 1.1307, "step": 9430 }, { "epoch": 0.630344735435596, "grad_norm": 0.173828125, "learning_rate": 7.232245150996181e-05, "loss": 1.1789, "step": 9435 }, { "epoch": 0.6306787814003206, "grad_norm": 0.1611328125, "learning_rate": 7.221041983136646e-05, "loss": 1.2618, "step": 9440 }, { "epoch": 0.6310128273650454, "grad_norm": 0.1728515625, "learning_rate": 7.209842593799662e-05, "loss": 1.2339, "step": 9445 }, { "epoch": 0.6313468733297701, "grad_norm": 0.1796875, "learning_rate": 7.198646998212928e-05, "loss": 1.1588, "step": 9450 }, { "epoch": 0.6316809192944949, "grad_norm": 0.1767578125, "learning_rate": 7.187455211598989e-05, "loss": 1.2706, "step": 9455 }, { "epoch": 0.6320149652592196, "grad_norm": 0.162109375, "learning_rate": 7.176267249175209e-05, "loss": 1.2431, "step": 9460 }, { "epoch": 0.6323490112239444, "grad_norm": 0.1748046875, "learning_rate": 7.165083126153756e-05, "loss": 1.1589, "step": 9465 }, { "epoch": 0.6326830571886691, "grad_norm": 0.1650390625, "learning_rate": 7.153902857741571e-05, "loss": 1.3123, "step": 9470 }, { "epoch": 0.6330171031533939, "grad_norm": 0.2255859375, "learning_rate": 7.14272645914036e-05, "loss": 1.2838, "step": 9475 }, { "epoch": 0.6333511491181186, "grad_norm": 0.169921875, "learning_rate": 7.131553945546568e-05, "loss": 1.2397, "step": 9480 }, { "epoch": 0.6336851950828434, "grad_norm": 0.189453125, "learning_rate": 7.120385332151348e-05, "loss": 1.1422, "step": 9485 }, { "epoch": 0.6340192410475681, "grad_norm": 0.1748046875, "learning_rate": 7.109220634140558e-05, "loss": 1.2258, "step": 9490 }, { "epoch": 0.6343532870122929, "grad_norm": 0.1669921875, "learning_rate": 7.098059866694733e-05, "loss": 1.1345, "step": 9495 }, { "epoch": 0.6346873329770176, "grad_norm": 0.169921875, "learning_rate": 7.086903044989064e-05, "loss": 1.1749, "step": 9500 }, { "epoch": 0.6350213789417424, "grad_norm": 0.1865234375, "learning_rate": 7.075750184193368e-05, "loss": 1.1437, "step": 9505 }, { "epoch": 0.6353554249064671, "grad_norm": 0.1669921875, "learning_rate": 7.064601299472087e-05, "loss": 1.1846, "step": 9510 }, { "epoch": 0.6356894708711919, "grad_norm": 0.16796875, "learning_rate": 7.053456405984253e-05, "loss": 1.2066, "step": 9515 }, { "epoch": 0.6360235168359166, "grad_norm": 0.177734375, "learning_rate": 7.042315518883467e-05, "loss": 1.203, "step": 9520 }, { "epoch": 0.6363575628006414, "grad_norm": 0.173828125, "learning_rate": 7.031178653317886e-05, "loss": 1.2101, "step": 9525 }, { "epoch": 0.6366916087653661, "grad_norm": 0.1904296875, "learning_rate": 7.020045824430205e-05, "loss": 1.1811, "step": 9530 }, { "epoch": 0.6370256547300909, "grad_norm": 0.177734375, "learning_rate": 7.008917047357624e-05, "loss": 1.2073, "step": 9535 }, { "epoch": 0.6373597006948156, "grad_norm": 0.1767578125, "learning_rate": 6.997792337231827e-05, "loss": 1.1933, "step": 9540 }, { "epoch": 0.6376937466595404, "grad_norm": 0.169921875, "learning_rate": 6.986671709178985e-05, "loss": 1.1735, "step": 9545 }, { "epoch": 0.6380277926242651, "grad_norm": 0.1689453125, "learning_rate": 6.975555178319701e-05, "loss": 1.2503, "step": 9550 }, { "epoch": 0.6383618385889899, "grad_norm": 0.177734375, "learning_rate": 6.964442759769017e-05, "loss": 1.2005, "step": 9555 }, { "epoch": 0.6386958845537146, "grad_norm": 0.1884765625, "learning_rate": 6.953334468636386e-05, "loss": 1.2382, "step": 9560 }, { "epoch": 0.6390299305184394, "grad_norm": 0.1689453125, "learning_rate": 6.942230320025645e-05, "loss": 1.0983, "step": 9565 }, { "epoch": 0.6393639764831641, "grad_norm": 0.1796875, "learning_rate": 6.931130329034993e-05, "loss": 1.1712, "step": 9570 }, { "epoch": 0.6396980224478889, "grad_norm": 0.1669921875, "learning_rate": 6.920034510756986e-05, "loss": 1.2003, "step": 9575 }, { "epoch": 0.6400320684126136, "grad_norm": 0.162109375, "learning_rate": 6.9089428802785e-05, "loss": 1.15, "step": 9580 }, { "epoch": 0.6403661143773384, "grad_norm": 0.18359375, "learning_rate": 6.897855452680715e-05, "loss": 1.2444, "step": 9585 }, { "epoch": 0.6407001603420631, "grad_norm": 0.1796875, "learning_rate": 6.886772243039105e-05, "loss": 1.2818, "step": 9590 }, { "epoch": 0.6410342063067879, "grad_norm": 0.181640625, "learning_rate": 6.875693266423404e-05, "loss": 1.1941, "step": 9595 }, { "epoch": 0.6413682522715125, "grad_norm": 0.1689453125, "learning_rate": 6.864618537897588e-05, "loss": 1.184, "step": 9600 }, { "epoch": 0.6417022982362373, "grad_norm": 0.1650390625, "learning_rate": 6.853548072519859e-05, "loss": 1.114, "step": 9605 }, { "epoch": 0.642036344200962, "grad_norm": 0.189453125, "learning_rate": 6.842481885342625e-05, "loss": 1.2692, "step": 9610 }, { "epoch": 0.6423703901656868, "grad_norm": 0.1669921875, "learning_rate": 6.831419991412464e-05, "loss": 1.1408, "step": 9615 }, { "epoch": 0.6427044361304115, "grad_norm": 0.158203125, "learning_rate": 6.820362405770143e-05, "loss": 1.2433, "step": 9620 }, { "epoch": 0.6430384820951363, "grad_norm": 0.1767578125, "learning_rate": 6.809309143450545e-05, "loss": 1.2076, "step": 9625 }, { "epoch": 0.643372528059861, "grad_norm": 0.177734375, "learning_rate": 6.798260219482691e-05, "loss": 1.2467, "step": 9630 }, { "epoch": 0.6437065740245858, "grad_norm": 0.1845703125, "learning_rate": 6.787215648889689e-05, "loss": 1.2093, "step": 9635 }, { "epoch": 0.6440406199893105, "grad_norm": 0.181640625, "learning_rate": 6.77617544668874e-05, "loss": 1.2324, "step": 9640 }, { "epoch": 0.6443746659540353, "grad_norm": 0.171875, "learning_rate": 6.765139627891099e-05, "loss": 1.1822, "step": 9645 }, { "epoch": 0.64470871191876, "grad_norm": 0.1845703125, "learning_rate": 6.754108207502069e-05, "loss": 1.223, "step": 9650 }, { "epoch": 0.6450427578834848, "grad_norm": 0.1611328125, "learning_rate": 6.743081200520962e-05, "loss": 1.1462, "step": 9655 }, { "epoch": 0.6453768038482095, "grad_norm": 0.1630859375, "learning_rate": 6.732058621941092e-05, "loss": 1.2425, "step": 9660 }, { "epoch": 0.6457108498129343, "grad_norm": 0.173828125, "learning_rate": 6.721040486749756e-05, "loss": 1.245, "step": 9665 }, { "epoch": 0.646044895777659, "grad_norm": 0.1689453125, "learning_rate": 6.710026809928206e-05, "loss": 1.1829, "step": 9670 }, { "epoch": 0.6463789417423838, "grad_norm": 0.1875, "learning_rate": 6.69901760645163e-05, "loss": 1.1876, "step": 9675 }, { "epoch": 0.6467129877071085, "grad_norm": 0.1689453125, "learning_rate": 6.68801289128914e-05, "loss": 1.3096, "step": 9680 }, { "epoch": 0.6470470336718332, "grad_norm": 0.1826171875, "learning_rate": 6.677012679403743e-05, "loss": 1.2622, "step": 9685 }, { "epoch": 0.647381079636558, "grad_norm": 0.1748046875, "learning_rate": 6.666016985752316e-05, "loss": 1.2291, "step": 9690 }, { "epoch": 0.6477151256012827, "grad_norm": 0.1806640625, "learning_rate": 6.655025825285601e-05, "loss": 1.2274, "step": 9695 }, { "epoch": 0.6480491715660075, "grad_norm": 0.1767578125, "learning_rate": 6.644039212948177e-05, "loss": 1.2035, "step": 9700 }, { "epoch": 0.6483832175307322, "grad_norm": 0.1884765625, "learning_rate": 6.633057163678426e-05, "loss": 1.229, "step": 9705 }, { "epoch": 0.648717263495457, "grad_norm": 0.1767578125, "learning_rate": 6.622079692408545e-05, "loss": 1.319, "step": 9710 }, { "epoch": 0.6490513094601817, "grad_norm": 0.166015625, "learning_rate": 6.611106814064492e-05, "loss": 1.1854, "step": 9715 }, { "epoch": 0.6493853554249065, "grad_norm": 0.1689453125, "learning_rate": 6.600138543565986e-05, "loss": 1.1671, "step": 9720 }, { "epoch": 0.6497194013896312, "grad_norm": 0.169921875, "learning_rate": 6.589174895826475e-05, "loss": 1.2083, "step": 9725 }, { "epoch": 0.650053447354356, "grad_norm": 0.162109375, "learning_rate": 6.57821588575313e-05, "loss": 1.147, "step": 9730 }, { "epoch": 0.6503874933190807, "grad_norm": 0.1708984375, "learning_rate": 6.567261528246806e-05, "loss": 1.2656, "step": 9735 }, { "epoch": 0.6507215392838055, "grad_norm": 0.228515625, "learning_rate": 6.556311838202046e-05, "loss": 1.2546, "step": 9740 }, { "epoch": 0.6510555852485302, "grad_norm": 0.173828125, "learning_rate": 6.545366830507034e-05, "loss": 1.2111, "step": 9745 }, { "epoch": 0.651389631213255, "grad_norm": 0.16796875, "learning_rate": 6.534426520043594e-05, "loss": 1.2212, "step": 9750 }, { "epoch": 0.6517236771779797, "grad_norm": 0.1689453125, "learning_rate": 6.523490921687157e-05, "loss": 1.128, "step": 9755 }, { "epoch": 0.6520577231427044, "grad_norm": 0.17578125, "learning_rate": 6.512560050306756e-05, "loss": 1.197, "step": 9760 }, { "epoch": 0.6523917691074291, "grad_norm": 0.1650390625, "learning_rate": 6.501633920764989e-05, "loss": 1.2123, "step": 9765 }, { "epoch": 0.6527258150721539, "grad_norm": 0.173828125, "learning_rate": 6.490712547918006e-05, "loss": 1.2868, "step": 9770 }, { "epoch": 0.6530598610368786, "grad_norm": 0.1728515625, "learning_rate": 6.479795946615501e-05, "loss": 1.2247, "step": 9775 }, { "epoch": 0.6533939070016034, "grad_norm": 0.1708984375, "learning_rate": 6.468884131700668e-05, "loss": 1.2421, "step": 9780 }, { "epoch": 0.6537279529663281, "grad_norm": 0.1748046875, "learning_rate": 6.457977118010196e-05, "loss": 1.2825, "step": 9785 }, { "epoch": 0.6540619989310529, "grad_norm": 0.1767578125, "learning_rate": 6.44707492037425e-05, "loss": 1.1924, "step": 9790 }, { "epoch": 0.6543960448957776, "grad_norm": 0.1953125, "learning_rate": 6.436177553616438e-05, "loss": 1.2154, "step": 9795 }, { "epoch": 0.6547300908605024, "grad_norm": 0.1826171875, "learning_rate": 6.42528503255381e-05, "loss": 1.1975, "step": 9800 }, { "epoch": 0.6550641368252271, "grad_norm": 0.1591796875, "learning_rate": 6.414397371996821e-05, "loss": 1.0709, "step": 9805 }, { "epoch": 0.6553981827899519, "grad_norm": 0.1748046875, "learning_rate": 6.403514586749318e-05, "loss": 1.2159, "step": 9810 }, { "epoch": 0.6557322287546766, "grad_norm": 0.1728515625, "learning_rate": 6.392636691608521e-05, "loss": 1.2559, "step": 9815 }, { "epoch": 0.6560662747194014, "grad_norm": 0.169921875, "learning_rate": 6.381763701365e-05, "loss": 1.1734, "step": 9820 }, { "epoch": 0.6564003206841261, "grad_norm": 0.1611328125, "learning_rate": 6.370895630802652e-05, "loss": 1.1594, "step": 9825 }, { "epoch": 0.6567343666488509, "grad_norm": 0.154296875, "learning_rate": 6.36003249469869e-05, "loss": 1.1063, "step": 9830 }, { "epoch": 0.6570684126135756, "grad_norm": 0.17578125, "learning_rate": 6.349174307823616e-05, "loss": 1.1498, "step": 9835 }, { "epoch": 0.6574024585783004, "grad_norm": 0.1630859375, "learning_rate": 6.338321084941205e-05, "loss": 1.2358, "step": 9840 }, { "epoch": 0.6577365045430251, "grad_norm": 0.1787109375, "learning_rate": 6.327472840808478e-05, "loss": 1.2042, "step": 9845 }, { "epoch": 0.6580705505077499, "grad_norm": 0.171875, "learning_rate": 6.316629590175688e-05, "loss": 1.1814, "step": 9850 }, { "epoch": 0.6584045964724746, "grad_norm": 0.1708984375, "learning_rate": 6.305791347786299e-05, "loss": 1.2021, "step": 9855 }, { "epoch": 0.6587386424371994, "grad_norm": 0.1669921875, "learning_rate": 6.294958128376962e-05, "loss": 1.1821, "step": 9860 }, { "epoch": 0.6590726884019241, "grad_norm": 0.17578125, "learning_rate": 6.284129946677508e-05, "loss": 1.1893, "step": 9865 }, { "epoch": 0.6594067343666489, "grad_norm": 0.166015625, "learning_rate": 6.27330681741091e-05, "loss": 1.1647, "step": 9870 }, { "epoch": 0.6597407803313736, "grad_norm": 0.173828125, "learning_rate": 6.26248875529327e-05, "loss": 1.2649, "step": 9875 }, { "epoch": 0.6600748262960984, "grad_norm": 0.1728515625, "learning_rate": 6.251675775033804e-05, "loss": 1.2161, "step": 9880 }, { "epoch": 0.6604088722608231, "grad_norm": 0.1640625, "learning_rate": 6.24086789133482e-05, "loss": 1.1798, "step": 9885 }, { "epoch": 0.6607429182255479, "grad_norm": 0.1767578125, "learning_rate": 6.23006511889169e-05, "loss": 1.1734, "step": 9890 }, { "epoch": 0.6610769641902726, "grad_norm": 0.1748046875, "learning_rate": 6.219267472392843e-05, "loss": 1.215, "step": 9895 }, { "epoch": 0.6614110101549974, "grad_norm": 0.181640625, "learning_rate": 6.208474966519735e-05, "loss": 1.1571, "step": 9900 }, { "epoch": 0.6617450561197221, "grad_norm": 0.1708984375, "learning_rate": 6.197687615946832e-05, "loss": 1.2906, "step": 9905 }, { "epoch": 0.6620791020844469, "grad_norm": 0.1650390625, "learning_rate": 6.186905435341592e-05, "loss": 1.1861, "step": 9910 }, { "epoch": 0.6624131480491716, "grad_norm": 0.166015625, "learning_rate": 6.17612843936444e-05, "loss": 1.1958, "step": 9915 }, { "epoch": 0.6627471940138963, "grad_norm": 0.1708984375, "learning_rate": 6.165356642668754e-05, "loss": 1.2155, "step": 9920 }, { "epoch": 0.663081239978621, "grad_norm": 0.1611328125, "learning_rate": 6.154590059900849e-05, "loss": 1.1376, "step": 9925 }, { "epoch": 0.6634152859433458, "grad_norm": 0.1796875, "learning_rate": 6.143828705699936e-05, "loss": 1.2243, "step": 9930 }, { "epoch": 0.6637493319080705, "grad_norm": 0.17578125, "learning_rate": 6.13307259469813e-05, "loss": 1.2159, "step": 9935 }, { "epoch": 0.6640833778727953, "grad_norm": 0.181640625, "learning_rate": 6.122321741520412e-05, "loss": 1.2191, "step": 9940 }, { "epoch": 0.66441742383752, "grad_norm": 0.1748046875, "learning_rate": 6.111576160784611e-05, "loss": 1.1936, "step": 9945 }, { "epoch": 0.6647514698022448, "grad_norm": 0.205078125, "learning_rate": 6.1008358671013885e-05, "loss": 1.2535, "step": 9950 }, { "epoch": 0.6650855157669695, "grad_norm": 0.169921875, "learning_rate": 6.090100875074225e-05, "loss": 1.1455, "step": 9955 }, { "epoch": 0.6654195617316943, "grad_norm": 0.1748046875, "learning_rate": 6.079371199299384e-05, "loss": 1.1946, "step": 9960 }, { "epoch": 0.665753607696419, "grad_norm": 0.16796875, "learning_rate": 6.0686468543659005e-05, "loss": 1.2062, "step": 9965 }, { "epoch": 0.6660876536611438, "grad_norm": 0.1708984375, "learning_rate": 6.057927854855565e-05, "loss": 1.2187, "step": 9970 }, { "epoch": 0.6664216996258685, "grad_norm": 0.1796875, "learning_rate": 6.0472142153428954e-05, "loss": 1.2224, "step": 9975 }, { "epoch": 0.6667557455905933, "grad_norm": 0.1796875, "learning_rate": 6.036505950395126e-05, "loss": 1.1661, "step": 9980 }, { "epoch": 0.667089791555318, "grad_norm": 0.169921875, "learning_rate": 6.025803074572185e-05, "loss": 1.2129, "step": 9985 }, { "epoch": 0.6674238375200428, "grad_norm": 0.19140625, "learning_rate": 6.0151056024266695e-05, "loss": 1.2235, "step": 9990 }, { "epoch": 0.6677578834847675, "grad_norm": 0.1669921875, "learning_rate": 6.0044135485038265e-05, "loss": 1.1978, "step": 9995 }, { "epoch": 0.6680919294494923, "grad_norm": 0.17578125, "learning_rate": 5.9937269273415386e-05, "loss": 1.2076, "step": 10000 }, { "epoch": 0.668425975414217, "grad_norm": 0.1767578125, "learning_rate": 5.983045753470308e-05, "loss": 1.2702, "step": 10005 }, { "epoch": 0.6687600213789417, "grad_norm": 0.1689453125, "learning_rate": 5.972370041413218e-05, "loss": 1.241, "step": 10010 }, { "epoch": 0.6690940673436665, "grad_norm": 0.1669921875, "learning_rate": 5.961699805685932e-05, "loss": 1.2256, "step": 10015 }, { "epoch": 0.6694281133083912, "grad_norm": 0.1669921875, "learning_rate": 5.95103506079667e-05, "loss": 1.1762, "step": 10020 }, { "epoch": 0.669762159273116, "grad_norm": 0.1728515625, "learning_rate": 5.940375821246186e-05, "loss": 1.1551, "step": 10025 }, { "epoch": 0.6700962052378407, "grad_norm": 0.1689453125, "learning_rate": 5.9297221015277394e-05, "loss": 1.1554, "step": 10030 }, { "epoch": 0.6704302512025655, "grad_norm": 0.17578125, "learning_rate": 5.9190739161270956e-05, "loss": 1.1697, "step": 10035 }, { "epoch": 0.6707642971672902, "grad_norm": 0.1669921875, "learning_rate": 5.9084312795224874e-05, "loss": 1.1717, "step": 10040 }, { "epoch": 0.671098343132015, "grad_norm": 0.169921875, "learning_rate": 5.8977942061846034e-05, "loss": 1.2296, "step": 10045 }, { "epoch": 0.6714323890967397, "grad_norm": 0.177734375, "learning_rate": 5.8871627105765746e-05, "loss": 1.2098, "step": 10050 }, { "epoch": 0.6717664350614645, "grad_norm": 0.166015625, "learning_rate": 5.876536807153943e-05, "loss": 1.1903, "step": 10055 }, { "epoch": 0.6721004810261892, "grad_norm": 0.177734375, "learning_rate": 5.865916510364648e-05, "loss": 1.1912, "step": 10060 }, { "epoch": 0.672434526990914, "grad_norm": 0.16015625, "learning_rate": 5.855301834649003e-05, "loss": 1.1724, "step": 10065 }, { "epoch": 0.6727685729556387, "grad_norm": 0.16796875, "learning_rate": 5.8446927944396815e-05, "loss": 1.2208, "step": 10070 }, { "epoch": 0.6731026189203635, "grad_norm": 0.1689453125, "learning_rate": 5.834089404161689e-05, "loss": 1.2138, "step": 10075 }, { "epoch": 0.6734366648850882, "grad_norm": 0.16796875, "learning_rate": 5.8234916782323646e-05, "loss": 1.2382, "step": 10080 }, { "epoch": 0.6737707108498129, "grad_norm": 0.173828125, "learning_rate": 5.812899631061327e-05, "loss": 1.1906, "step": 10085 }, { "epoch": 0.6741047568145376, "grad_norm": 0.1708984375, "learning_rate": 5.80231327705048e-05, "loss": 1.2238, "step": 10090 }, { "epoch": 0.6744388027792624, "grad_norm": 0.173828125, "learning_rate": 5.791732630593991e-05, "loss": 1.1796, "step": 10095 }, { "epoch": 0.6747728487439871, "grad_norm": 0.1669921875, "learning_rate": 5.781157706078264e-05, "loss": 1.247, "step": 10100 }, { "epoch": 0.6751068947087119, "grad_norm": 0.18359375, "learning_rate": 5.770588517881918e-05, "loss": 1.2604, "step": 10105 }, { "epoch": 0.6754409406734366, "grad_norm": 0.1728515625, "learning_rate": 5.760025080375777e-05, "loss": 1.2595, "step": 10110 }, { "epoch": 0.6757749866381614, "grad_norm": 0.16796875, "learning_rate": 5.749467407922853e-05, "loss": 1.1983, "step": 10115 }, { "epoch": 0.6761090326028861, "grad_norm": 0.1767578125, "learning_rate": 5.738915514878307e-05, "loss": 1.2557, "step": 10120 }, { "epoch": 0.6764430785676109, "grad_norm": 0.1689453125, "learning_rate": 5.728369415589443e-05, "loss": 1.1756, "step": 10125 }, { "epoch": 0.6767771245323356, "grad_norm": 0.173828125, "learning_rate": 5.717829124395699e-05, "loss": 1.2396, "step": 10130 }, { "epoch": 0.6771111704970604, "grad_norm": 0.1689453125, "learning_rate": 5.707294655628599e-05, "loss": 1.1818, "step": 10135 }, { "epoch": 0.6774452164617851, "grad_norm": 0.1767578125, "learning_rate": 5.696766023611768e-05, "loss": 1.1824, "step": 10140 }, { "epoch": 0.6777792624265099, "grad_norm": 0.1552734375, "learning_rate": 5.686243242660876e-05, "loss": 1.1367, "step": 10145 }, { "epoch": 0.6781133083912346, "grad_norm": 0.1591796875, "learning_rate": 5.6757263270836594e-05, "loss": 1.2218, "step": 10150 }, { "epoch": 0.6784473543559594, "grad_norm": 0.17578125, "learning_rate": 5.6652152911798585e-05, "loss": 1.2477, "step": 10155 }, { "epoch": 0.6787814003206841, "grad_norm": 0.1611328125, "learning_rate": 5.6547101492412265e-05, "loss": 1.2255, "step": 10160 }, { "epoch": 0.6791154462854089, "grad_norm": 0.1650390625, "learning_rate": 5.644210915551509e-05, "loss": 1.2308, "step": 10165 }, { "epoch": 0.6794494922501336, "grad_norm": 0.1611328125, "learning_rate": 5.633717604386415e-05, "loss": 1.1479, "step": 10170 }, { "epoch": 0.6797835382148584, "grad_norm": 0.16796875, "learning_rate": 5.6232302300135966e-05, "loss": 1.171, "step": 10175 }, { "epoch": 0.6801175841795831, "grad_norm": 0.1806640625, "learning_rate": 5.612748806692632e-05, "loss": 1.2255, "step": 10180 }, { "epoch": 0.6804516301443079, "grad_norm": 0.171875, "learning_rate": 5.602273348675019e-05, "loss": 1.1911, "step": 10185 }, { "epoch": 0.6807856761090326, "grad_norm": 0.1611328125, "learning_rate": 5.5918038702041343e-05, "loss": 1.1586, "step": 10190 }, { "epoch": 0.6811197220737574, "grad_norm": 0.16796875, "learning_rate": 5.5813403855152224e-05, "loss": 1.2018, "step": 10195 }, { "epoch": 0.6814537680384821, "grad_norm": 0.16796875, "learning_rate": 5.5708829088353875e-05, "loss": 1.1946, "step": 10200 }, { "epoch": 0.6817878140032069, "grad_norm": 0.169921875, "learning_rate": 5.560431454383566e-05, "loss": 1.2084, "step": 10205 }, { "epoch": 0.6821218599679316, "grad_norm": 0.1650390625, "learning_rate": 5.549986036370491e-05, "loss": 1.2056, "step": 10210 }, { "epoch": 0.6824559059326564, "grad_norm": 0.16796875, "learning_rate": 5.5395466689987044e-05, "loss": 1.1836, "step": 10215 }, { "epoch": 0.6827899518973811, "grad_norm": 0.171875, "learning_rate": 5.529113366462504e-05, "loss": 1.1598, "step": 10220 }, { "epoch": 0.6831239978621059, "grad_norm": 0.1748046875, "learning_rate": 5.518686142947962e-05, "loss": 1.1612, "step": 10225 }, { "epoch": 0.6834580438268306, "grad_norm": 0.17578125, "learning_rate": 5.508265012632865e-05, "loss": 1.2547, "step": 10230 }, { "epoch": 0.6837920897915554, "grad_norm": 0.1787109375, "learning_rate": 5.497849989686732e-05, "loss": 1.2068, "step": 10235 }, { "epoch": 0.6841261357562801, "grad_norm": 0.1728515625, "learning_rate": 5.4874410882707635e-05, "loss": 1.2069, "step": 10240 }, { "epoch": 0.6844601817210048, "grad_norm": 0.1640625, "learning_rate": 5.4770383225378486e-05, "loss": 1.1858, "step": 10245 }, { "epoch": 0.6847942276857295, "grad_norm": 0.16796875, "learning_rate": 5.466641706632525e-05, "loss": 1.1981, "step": 10250 }, { "epoch": 0.6851282736504543, "grad_norm": 0.1728515625, "learning_rate": 5.456251254690967e-05, "loss": 1.2036, "step": 10255 }, { "epoch": 0.685462319615179, "grad_norm": 0.1572265625, "learning_rate": 5.4458669808409766e-05, "loss": 1.2206, "step": 10260 }, { "epoch": 0.6857963655799038, "grad_norm": 0.1611328125, "learning_rate": 5.435488899201957e-05, "loss": 1.1534, "step": 10265 }, { "epoch": 0.6861304115446285, "grad_norm": 0.1767578125, "learning_rate": 5.42511702388488e-05, "loss": 1.2095, "step": 10270 }, { "epoch": 0.6864644575093533, "grad_norm": 0.1728515625, "learning_rate": 5.4147513689922815e-05, "loss": 1.1465, "step": 10275 }, { "epoch": 0.686798503474078, "grad_norm": 0.162109375, "learning_rate": 5.404391948618252e-05, "loss": 1.2494, "step": 10280 }, { "epoch": 0.6871325494388028, "grad_norm": 0.166015625, "learning_rate": 5.39403877684839e-05, "loss": 1.2441, "step": 10285 }, { "epoch": 0.6874665954035275, "grad_norm": 0.1689453125, "learning_rate": 5.3836918677598015e-05, "loss": 1.1477, "step": 10290 }, { "epoch": 0.6878006413682523, "grad_norm": 0.1787109375, "learning_rate": 5.373351235421084e-05, "loss": 1.244, "step": 10295 }, { "epoch": 0.688134687332977, "grad_norm": 0.1787109375, "learning_rate": 5.3630168938922984e-05, "loss": 1.252, "step": 10300 }, { "epoch": 0.6884687332977018, "grad_norm": 0.1865234375, "learning_rate": 5.352688857224945e-05, "loss": 1.1898, "step": 10305 }, { "epoch": 0.6888027792624265, "grad_norm": 0.16015625, "learning_rate": 5.3423671394619554e-05, "loss": 1.1629, "step": 10310 }, { "epoch": 0.6891368252271513, "grad_norm": 0.1650390625, "learning_rate": 5.332051754637676e-05, "loss": 1.2108, "step": 10315 }, { "epoch": 0.689470871191876, "grad_norm": 0.15625, "learning_rate": 5.321742716777829e-05, "loss": 1.1881, "step": 10320 }, { "epoch": 0.6898049171566007, "grad_norm": 0.1630859375, "learning_rate": 5.311440039899521e-05, "loss": 1.1336, "step": 10325 }, { "epoch": 0.6901389631213255, "grad_norm": 0.1728515625, "learning_rate": 5.301143738011197e-05, "loss": 1.1887, "step": 10330 }, { "epoch": 0.6904730090860502, "grad_norm": 0.1669921875, "learning_rate": 5.290853825112647e-05, "loss": 1.1783, "step": 10335 }, { "epoch": 0.690807055050775, "grad_norm": 0.17578125, "learning_rate": 5.2805703151949616e-05, "loss": 1.2454, "step": 10340 }, { "epoch": 0.6911411010154997, "grad_norm": 0.1591796875, "learning_rate": 5.2702932222405286e-05, "loss": 1.2103, "step": 10345 }, { "epoch": 0.6914751469802245, "grad_norm": 0.173828125, "learning_rate": 5.2600225602230166e-05, "loss": 1.2498, "step": 10350 }, { "epoch": 0.6918091929449492, "grad_norm": 0.1689453125, "learning_rate": 5.249758343107348e-05, "loss": 1.1575, "step": 10355 }, { "epoch": 0.692143238909674, "grad_norm": 0.1708984375, "learning_rate": 5.239500584849678e-05, "loss": 1.2488, "step": 10360 }, { "epoch": 0.6924772848743987, "grad_norm": 0.16796875, "learning_rate": 5.229249299397378e-05, "loss": 1.2236, "step": 10365 }, { "epoch": 0.6928113308391235, "grad_norm": 0.181640625, "learning_rate": 5.219004500689031e-05, "loss": 1.1939, "step": 10370 }, { "epoch": 0.6931453768038482, "grad_norm": 0.171875, "learning_rate": 5.2087662026543846e-05, "loss": 1.2497, "step": 10375 }, { "epoch": 0.693479422768573, "grad_norm": 0.193359375, "learning_rate": 5.1985344192143534e-05, "loss": 1.1932, "step": 10380 }, { "epoch": 0.6938134687332977, "grad_norm": 0.16796875, "learning_rate": 5.188309164281e-05, "loss": 1.1782, "step": 10385 }, { "epoch": 0.6941475146980225, "grad_norm": 0.177734375, "learning_rate": 5.1780904517575046e-05, "loss": 1.2641, "step": 10390 }, { "epoch": 0.6944815606627472, "grad_norm": 0.169921875, "learning_rate": 5.1678782955381534e-05, "loss": 1.1262, "step": 10395 }, { "epoch": 0.694815606627472, "grad_norm": 0.1826171875, "learning_rate": 5.157672709508312e-05, "loss": 1.2646, "step": 10400 }, { "epoch": 0.6951496525921966, "grad_norm": 0.166015625, "learning_rate": 5.147473707544425e-05, "loss": 1.1851, "step": 10405 }, { "epoch": 0.6954836985569214, "grad_norm": 0.1875, "learning_rate": 5.137281303513969e-05, "loss": 1.1714, "step": 10410 }, { "epoch": 0.6958177445216461, "grad_norm": 0.171875, "learning_rate": 5.127095511275466e-05, "loss": 1.2067, "step": 10415 }, { "epoch": 0.6961517904863709, "grad_norm": 0.1650390625, "learning_rate": 5.116916344678435e-05, "loss": 1.2317, "step": 10420 }, { "epoch": 0.6964858364510956, "grad_norm": 0.1650390625, "learning_rate": 5.106743817563395e-05, "loss": 1.2716, "step": 10425 }, { "epoch": 0.6968198824158204, "grad_norm": 0.177734375, "learning_rate": 5.0965779437618314e-05, "loss": 1.1254, "step": 10430 }, { "epoch": 0.6971539283805451, "grad_norm": 0.1806640625, "learning_rate": 5.086418737096186e-05, "loss": 1.211, "step": 10435 }, { "epoch": 0.6974879743452699, "grad_norm": 0.1728515625, "learning_rate": 5.076266211379826e-05, "loss": 1.2339, "step": 10440 }, { "epoch": 0.6978220203099946, "grad_norm": 0.169921875, "learning_rate": 5.066120380417056e-05, "loss": 1.2717, "step": 10445 }, { "epoch": 0.6981560662747194, "grad_norm": 0.1748046875, "learning_rate": 5.055981258003064e-05, "loss": 1.2531, "step": 10450 }, { "epoch": 0.6984901122394441, "grad_norm": 0.1630859375, "learning_rate": 5.0458488579239116e-05, "loss": 1.1889, "step": 10455 }, { "epoch": 0.6988241582041689, "grad_norm": 0.1767578125, "learning_rate": 5.035723193956523e-05, "loss": 1.1753, "step": 10460 }, { "epoch": 0.6991582041688936, "grad_norm": 0.1689453125, "learning_rate": 5.025604279868676e-05, "loss": 1.303, "step": 10465 }, { "epoch": 0.6994922501336184, "grad_norm": 0.1728515625, "learning_rate": 5.015492129418957e-05, "loss": 1.2091, "step": 10470 }, { "epoch": 0.6998262960983431, "grad_norm": 0.169921875, "learning_rate": 5.005386756356754e-05, "loss": 1.2594, "step": 10475 }, { "epoch": 0.7001603420630679, "grad_norm": 0.171875, "learning_rate": 4.995288174422251e-05, "loss": 1.1741, "step": 10480 }, { "epoch": 0.7004943880277926, "grad_norm": 0.1689453125, "learning_rate": 4.985196397346397e-05, "loss": 1.2114, "step": 10485 }, { "epoch": 0.7008284339925174, "grad_norm": 0.181640625, "learning_rate": 4.97511143885088e-05, "loss": 1.1966, "step": 10490 }, { "epoch": 0.7011624799572421, "grad_norm": 0.1728515625, "learning_rate": 4.9650333126481174e-05, "loss": 1.1899, "step": 10495 }, { "epoch": 0.7014965259219669, "grad_norm": 0.1767578125, "learning_rate": 4.954962032441249e-05, "loss": 1.1736, "step": 10500 }, { "epoch": 0.7018305718866916, "grad_norm": 0.1708984375, "learning_rate": 4.9448976119240895e-05, "loss": 1.1972, "step": 10505 }, { "epoch": 0.7021646178514164, "grad_norm": 0.1591796875, "learning_rate": 4.934840064781143e-05, "loss": 1.2075, "step": 10510 }, { "epoch": 0.7024986638161411, "grad_norm": 0.17578125, "learning_rate": 4.924789404687552e-05, "loss": 1.2075, "step": 10515 }, { "epoch": 0.7028327097808659, "grad_norm": 0.17578125, "learning_rate": 4.914745645309111e-05, "loss": 1.198, "step": 10520 }, { "epoch": 0.7031667557455906, "grad_norm": 0.173828125, "learning_rate": 4.904708800302218e-05, "loss": 1.2011, "step": 10525 }, { "epoch": 0.7035008017103154, "grad_norm": 0.16796875, "learning_rate": 4.8946788833138724e-05, "loss": 1.1074, "step": 10530 }, { "epoch": 0.7038348476750401, "grad_norm": 0.1669921875, "learning_rate": 4.884655907981659e-05, "loss": 1.1243, "step": 10535 }, { "epoch": 0.7041688936397649, "grad_norm": 0.1748046875, "learning_rate": 4.874639887933725e-05, "loss": 1.1672, "step": 10540 }, { "epoch": 0.7045029396044896, "grad_norm": 0.1591796875, "learning_rate": 4.864630836788753e-05, "loss": 1.1574, "step": 10545 }, { "epoch": 0.7048369855692144, "grad_norm": 0.171875, "learning_rate": 4.854628768155951e-05, "loss": 1.1272, "step": 10550 }, { "epoch": 0.7051710315339391, "grad_norm": 0.1689453125, "learning_rate": 4.844633695635041e-05, "loss": 1.2193, "step": 10555 }, { "epoch": 0.7055050774986639, "grad_norm": 0.17578125, "learning_rate": 4.834645632816227e-05, "loss": 1.2342, "step": 10560 }, { "epoch": 0.7058391234633885, "grad_norm": 0.173828125, "learning_rate": 4.824664593280175e-05, "loss": 1.2653, "step": 10565 }, { "epoch": 0.7061731694281133, "grad_norm": 0.1728515625, "learning_rate": 4.8146905905980144e-05, "loss": 1.1794, "step": 10570 }, { "epoch": 0.706507215392838, "grad_norm": 0.169921875, "learning_rate": 4.804723638331303e-05, "loss": 1.2321, "step": 10575 }, { "epoch": 0.7068412613575628, "grad_norm": 0.1787109375, "learning_rate": 4.7947637500320084e-05, "loss": 1.1332, "step": 10580 }, { "epoch": 0.7071753073222875, "grad_norm": 0.177734375, "learning_rate": 4.7848109392424914e-05, "loss": 1.2459, "step": 10585 }, { "epoch": 0.7075093532870123, "grad_norm": 0.18359375, "learning_rate": 4.774865219495498e-05, "loss": 1.254, "step": 10590 }, { "epoch": 0.707843399251737, "grad_norm": 0.1953125, "learning_rate": 4.7649266043141236e-05, "loss": 1.2396, "step": 10595 }, { "epoch": 0.7081774452164618, "grad_norm": 0.1669921875, "learning_rate": 4.7549951072118137e-05, "loss": 1.125, "step": 10600 }, { "epoch": 0.7085114911811865, "grad_norm": 0.1650390625, "learning_rate": 4.7450707416923225e-05, "loss": 1.2224, "step": 10605 }, { "epoch": 0.7088455371459113, "grad_norm": 0.16796875, "learning_rate": 4.7351535212497213e-05, "loss": 1.2234, "step": 10610 }, { "epoch": 0.709179583110636, "grad_norm": 0.173828125, "learning_rate": 4.725243459368358e-05, "loss": 1.1677, "step": 10615 }, { "epoch": 0.7095136290753608, "grad_norm": 0.17578125, "learning_rate": 4.7153405695228457e-05, "loss": 1.2209, "step": 10620 }, { "epoch": 0.7098476750400855, "grad_norm": 0.18359375, "learning_rate": 4.7054448651780434e-05, "loss": 1.1557, "step": 10625 }, { "epoch": 0.7101817210048103, "grad_norm": 0.181640625, "learning_rate": 4.695556359789061e-05, "loss": 1.2074, "step": 10630 }, { "epoch": 0.710515766969535, "grad_norm": 0.1708984375, "learning_rate": 4.685675066801194e-05, "loss": 1.0386, "step": 10635 }, { "epoch": 0.7108498129342598, "grad_norm": 0.17578125, "learning_rate": 4.675800999649943e-05, "loss": 1.2289, "step": 10640 }, { "epoch": 0.7111838588989845, "grad_norm": 0.1640625, "learning_rate": 4.66593417176098e-05, "loss": 1.2262, "step": 10645 }, { "epoch": 0.7115179048637092, "grad_norm": 0.1689453125, "learning_rate": 4.656074596550142e-05, "loss": 1.1148, "step": 10650 }, { "epoch": 0.711851950828434, "grad_norm": 0.158203125, "learning_rate": 4.646222287423391e-05, "loss": 1.2033, "step": 10655 }, { "epoch": 0.7121859967931587, "grad_norm": 0.1708984375, "learning_rate": 4.636377257776826e-05, "loss": 1.1739, "step": 10660 }, { "epoch": 0.7125200427578835, "grad_norm": 0.1669921875, "learning_rate": 4.626539520996632e-05, "loss": 1.1661, "step": 10665 }, { "epoch": 0.7128540887226082, "grad_norm": 0.16015625, "learning_rate": 4.6167090904590916e-05, "loss": 1.2023, "step": 10670 }, { "epoch": 0.713188134687333, "grad_norm": 0.1982421875, "learning_rate": 4.606885979530544e-05, "loss": 1.2215, "step": 10675 }, { "epoch": 0.7135221806520577, "grad_norm": 0.181640625, "learning_rate": 4.597070201567374e-05, "loss": 1.2264, "step": 10680 }, { "epoch": 0.7138562266167825, "grad_norm": 0.17578125, "learning_rate": 4.5872617699160095e-05, "loss": 1.1838, "step": 10685 }, { "epoch": 0.7141902725815072, "grad_norm": 0.1904296875, "learning_rate": 4.577460697912873e-05, "loss": 1.2051, "step": 10690 }, { "epoch": 0.714524318546232, "grad_norm": 0.169921875, "learning_rate": 4.567666998884395e-05, "loss": 1.1821, "step": 10695 }, { "epoch": 0.7148583645109567, "grad_norm": 0.1728515625, "learning_rate": 4.557880686146968e-05, "loss": 1.2537, "step": 10700 }, { "epoch": 0.7151924104756815, "grad_norm": 0.189453125, "learning_rate": 4.548101773006953e-05, "loss": 1.1674, "step": 10705 }, { "epoch": 0.7155264564404062, "grad_norm": 0.1728515625, "learning_rate": 4.5383302727606426e-05, "loss": 1.1552, "step": 10710 }, { "epoch": 0.715860502405131, "grad_norm": 0.1748046875, "learning_rate": 4.528566198694246e-05, "loss": 1.2517, "step": 10715 }, { "epoch": 0.7161945483698557, "grad_norm": 0.193359375, "learning_rate": 4.5188095640838865e-05, "loss": 1.176, "step": 10720 }, { "epoch": 0.7165285943345805, "grad_norm": 0.1611328125, "learning_rate": 4.50906038219557e-05, "loss": 1.1779, "step": 10725 }, { "epoch": 0.7168626402993051, "grad_norm": 0.1923828125, "learning_rate": 4.499318666285162e-05, "loss": 1.1945, "step": 10730 }, { "epoch": 0.7171966862640299, "grad_norm": 0.177734375, "learning_rate": 4.489584429598375e-05, "loss": 1.2406, "step": 10735 }, { "epoch": 0.7175307322287546, "grad_norm": 0.1728515625, "learning_rate": 4.4798576853707664e-05, "loss": 1.207, "step": 10740 }, { "epoch": 0.7178647781934794, "grad_norm": 0.1787109375, "learning_rate": 4.470138446827692e-05, "loss": 1.2622, "step": 10745 }, { "epoch": 0.7181988241582041, "grad_norm": 0.1708984375, "learning_rate": 4.4604267271843046e-05, "loss": 1.1559, "step": 10750 }, { "epoch": 0.7185328701229289, "grad_norm": 0.1728515625, "learning_rate": 4.4507225396455385e-05, "loss": 1.2233, "step": 10755 }, { "epoch": 0.7188669160876536, "grad_norm": 0.169921875, "learning_rate": 4.44102589740609e-05, "loss": 1.2279, "step": 10760 }, { "epoch": 0.7192009620523784, "grad_norm": 0.16796875, "learning_rate": 4.431336813650385e-05, "loss": 1.2143, "step": 10765 }, { "epoch": 0.7195350080171031, "grad_norm": 0.21484375, "learning_rate": 4.421655301552575e-05, "loss": 1.2, "step": 10770 }, { "epoch": 0.7198690539818279, "grad_norm": 0.1689453125, "learning_rate": 4.411981374276527e-05, "loss": 1.2192, "step": 10775 }, { "epoch": 0.7202030999465526, "grad_norm": 0.1669921875, "learning_rate": 4.402315044975778e-05, "loss": 1.1967, "step": 10780 }, { "epoch": 0.7205371459112774, "grad_norm": 0.1884765625, "learning_rate": 4.3926563267935514e-05, "loss": 1.2115, "step": 10785 }, { "epoch": 0.7208711918760021, "grad_norm": 0.1796875, "learning_rate": 4.383005232862707e-05, "loss": 1.2096, "step": 10790 }, { "epoch": 0.7212052378407269, "grad_norm": 0.1826171875, "learning_rate": 4.37336177630575e-05, "loss": 1.1461, "step": 10795 }, { "epoch": 0.7215392838054516, "grad_norm": 0.177734375, "learning_rate": 4.363725970234794e-05, "loss": 1.2384, "step": 10800 }, { "epoch": 0.7218733297701764, "grad_norm": 0.173828125, "learning_rate": 4.354097827751552e-05, "loss": 1.2205, "step": 10805 }, { "epoch": 0.7222073757349011, "grad_norm": 0.2119140625, "learning_rate": 4.344477361947309e-05, "loss": 1.1431, "step": 10810 }, { "epoch": 0.7225414216996259, "grad_norm": 0.177734375, "learning_rate": 4.334864585902935e-05, "loss": 1.1884, "step": 10815 }, { "epoch": 0.7228754676643506, "grad_norm": 0.1650390625, "learning_rate": 4.3252595126888205e-05, "loss": 1.1578, "step": 10820 }, { "epoch": 0.7232095136290754, "grad_norm": 0.1669921875, "learning_rate": 4.31566215536489e-05, "loss": 1.2039, "step": 10825 }, { "epoch": 0.7235435595938001, "grad_norm": 0.181640625, "learning_rate": 4.3060725269805846e-05, "loss": 1.1527, "step": 10830 }, { "epoch": 0.7238776055585249, "grad_norm": 0.173828125, "learning_rate": 4.296490640574826e-05, "loss": 1.174, "step": 10835 }, { "epoch": 0.7242116515232496, "grad_norm": 0.169921875, "learning_rate": 4.2869165091760086e-05, "loss": 1.2951, "step": 10840 }, { "epoch": 0.7245456974879744, "grad_norm": 0.1708984375, "learning_rate": 4.2773501458019936e-05, "loss": 1.2161, "step": 10845 }, { "epoch": 0.7248797434526991, "grad_norm": 0.162109375, "learning_rate": 4.267791563460074e-05, "loss": 1.1598, "step": 10850 }, { "epoch": 0.7252137894174239, "grad_norm": 0.1845703125, "learning_rate": 4.258240775146961e-05, "loss": 1.2698, "step": 10855 }, { "epoch": 0.7255478353821486, "grad_norm": 0.173828125, "learning_rate": 4.248697793848768e-05, "loss": 1.2178, "step": 10860 }, { "epoch": 0.7258818813468734, "grad_norm": 0.1630859375, "learning_rate": 4.239162632540994e-05, "loss": 1.1651, "step": 10865 }, { "epoch": 0.7262159273115981, "grad_norm": 0.1728515625, "learning_rate": 4.229635304188507e-05, "loss": 1.2835, "step": 10870 }, { "epoch": 0.7265499732763229, "grad_norm": 0.1669921875, "learning_rate": 4.2201158217455296e-05, "loss": 1.1627, "step": 10875 }, { "epoch": 0.7268840192410476, "grad_norm": 0.15234375, "learning_rate": 4.210604198155607e-05, "loss": 1.2025, "step": 10880 }, { "epoch": 0.7272180652057724, "grad_norm": 0.1689453125, "learning_rate": 4.201100446351597e-05, "loss": 1.2049, "step": 10885 }, { "epoch": 0.727552111170497, "grad_norm": 0.17578125, "learning_rate": 4.1916045792556694e-05, "loss": 1.2161, "step": 10890 }, { "epoch": 0.7278861571352218, "grad_norm": 0.1708984375, "learning_rate": 4.182116609779259e-05, "loss": 1.1577, "step": 10895 }, { "epoch": 0.7282202030999465, "grad_norm": 0.189453125, "learning_rate": 4.1726365508230616e-05, "loss": 1.2406, "step": 10900 }, { "epoch": 0.7285542490646713, "grad_norm": 0.1650390625, "learning_rate": 4.163164415277029e-05, "loss": 1.173, "step": 10905 }, { "epoch": 0.728888295029396, "grad_norm": 0.2060546875, "learning_rate": 4.1537002160203344e-05, "loss": 1.2019, "step": 10910 }, { "epoch": 0.7292223409941208, "grad_norm": 0.166015625, "learning_rate": 4.1442439659213564e-05, "loss": 1.2311, "step": 10915 }, { "epoch": 0.7295563869588455, "grad_norm": 0.1669921875, "learning_rate": 4.134795677837663e-05, "loss": 1.1683, "step": 10920 }, { "epoch": 0.7298904329235703, "grad_norm": 0.1572265625, "learning_rate": 4.125355364616009e-05, "loss": 1.1664, "step": 10925 }, { "epoch": 0.730224478888295, "grad_norm": 0.171875, "learning_rate": 4.115923039092293e-05, "loss": 1.1981, "step": 10930 }, { "epoch": 0.7305585248530198, "grad_norm": 0.17578125, "learning_rate": 4.1064987140915544e-05, "loss": 1.1545, "step": 10935 }, { "epoch": 0.7308925708177445, "grad_norm": 0.1748046875, "learning_rate": 4.097082402427962e-05, "loss": 1.1627, "step": 10940 }, { "epoch": 0.7312266167824693, "grad_norm": 0.171875, "learning_rate": 4.087674116904786e-05, "loss": 1.2434, "step": 10945 }, { "epoch": 0.731560662747194, "grad_norm": 0.18359375, "learning_rate": 4.07827387031438e-05, "loss": 1.1772, "step": 10950 }, { "epoch": 0.7318947087119188, "grad_norm": 0.1748046875, "learning_rate": 4.068881675438165e-05, "loss": 1.1721, "step": 10955 }, { "epoch": 0.7322287546766435, "grad_norm": 0.1708984375, "learning_rate": 4.0594975450466255e-05, "loss": 1.141, "step": 10960 }, { "epoch": 0.7325628006413683, "grad_norm": 0.1748046875, "learning_rate": 4.050121491899266e-05, "loss": 1.2159, "step": 10965 }, { "epoch": 0.732896846606093, "grad_norm": 0.1708984375, "learning_rate": 4.040753528744623e-05, "loss": 1.219, "step": 10970 }, { "epoch": 0.7332308925708177, "grad_norm": 0.166015625, "learning_rate": 4.0313936683202205e-05, "loss": 1.1963, "step": 10975 }, { "epoch": 0.7335649385355425, "grad_norm": 0.16796875, "learning_rate": 4.0220419233525754e-05, "loss": 1.1704, "step": 10980 }, { "epoch": 0.7338989845002672, "grad_norm": 0.1708984375, "learning_rate": 4.0126983065571643e-05, "loss": 1.1832, "step": 10985 }, { "epoch": 0.734233030464992, "grad_norm": 0.1669921875, "learning_rate": 4.003362830638409e-05, "loss": 1.2385, "step": 10990 }, { "epoch": 0.7345670764297167, "grad_norm": 0.171875, "learning_rate": 3.9940355082896694e-05, "loss": 1.1976, "step": 10995 }, { "epoch": 0.7349011223944415, "grad_norm": 0.1767578125, "learning_rate": 3.984716352193222e-05, "loss": 1.1941, "step": 11000 }, { "epoch": 0.7352351683591662, "grad_norm": 0.1669921875, "learning_rate": 3.975405375020228e-05, "loss": 1.1729, "step": 11005 }, { "epoch": 0.735569214323891, "grad_norm": 0.171875, "learning_rate": 3.96610258943073e-05, "loss": 1.2462, "step": 11010 }, { "epoch": 0.7359032602886157, "grad_norm": 0.1875, "learning_rate": 3.956808008073646e-05, "loss": 1.2269, "step": 11015 }, { "epoch": 0.7362373062533405, "grad_norm": 0.1630859375, "learning_rate": 3.9475216435867225e-05, "loss": 1.1627, "step": 11020 }, { "epoch": 0.7365713522180652, "grad_norm": 0.181640625, "learning_rate": 3.938243508596539e-05, "loss": 1.1939, "step": 11025 }, { "epoch": 0.73690539818279, "grad_norm": 0.1650390625, "learning_rate": 3.9289736157184876e-05, "loss": 1.2428, "step": 11030 }, { "epoch": 0.7372394441475147, "grad_norm": 0.171875, "learning_rate": 3.9197119775567595e-05, "loss": 1.1643, "step": 11035 }, { "epoch": 0.7375734901122395, "grad_norm": 0.181640625, "learning_rate": 3.910458606704309e-05, "loss": 1.1663, "step": 11040 }, { "epoch": 0.7379075360769642, "grad_norm": 0.17578125, "learning_rate": 3.901213515742856e-05, "loss": 1.2113, "step": 11045 }, { "epoch": 0.7382415820416889, "grad_norm": 0.181640625, "learning_rate": 3.891976717242861e-05, "loss": 1.0892, "step": 11050 }, { "epoch": 0.7385756280064136, "grad_norm": 0.1669921875, "learning_rate": 3.8827482237635105e-05, "loss": 1.2396, "step": 11055 }, { "epoch": 0.7389096739711384, "grad_norm": 0.1669921875, "learning_rate": 3.8735280478527035e-05, "loss": 1.1426, "step": 11060 }, { "epoch": 0.7392437199358631, "grad_norm": 0.1796875, "learning_rate": 3.8643162020470224e-05, "loss": 1.1233, "step": 11065 }, { "epoch": 0.7395777659005879, "grad_norm": 0.1669921875, "learning_rate": 3.85511269887172e-05, "loss": 1.2431, "step": 11070 }, { "epoch": 0.7399118118653126, "grad_norm": 0.171875, "learning_rate": 3.8459175508407184e-05, "loss": 1.283, "step": 11075 }, { "epoch": 0.7402458578300374, "grad_norm": 0.1767578125, "learning_rate": 3.8367307704565706e-05, "loss": 1.1919, "step": 11080 }, { "epoch": 0.7405799037947621, "grad_norm": 0.18359375, "learning_rate": 3.827552370210448e-05, "loss": 1.2189, "step": 11085 }, { "epoch": 0.7409139497594869, "grad_norm": 0.1669921875, "learning_rate": 3.818382362582137e-05, "loss": 1.2248, "step": 11090 }, { "epoch": 0.7412479957242116, "grad_norm": 0.1767578125, "learning_rate": 3.809220760040014e-05, "loss": 1.1446, "step": 11095 }, { "epoch": 0.7415820416889364, "grad_norm": 0.1640625, "learning_rate": 3.8000675750410186e-05, "loss": 1.1236, "step": 11100 }, { "epoch": 0.7419160876536611, "grad_norm": 0.1689453125, "learning_rate": 3.7909228200306436e-05, "loss": 1.1346, "step": 11105 }, { "epoch": 0.7422501336183859, "grad_norm": 0.1640625, "learning_rate": 3.7817865074429314e-05, "loss": 1.1425, "step": 11110 }, { "epoch": 0.7425841795831106, "grad_norm": 0.166015625, "learning_rate": 3.7726586497004334e-05, "loss": 1.1919, "step": 11115 }, { "epoch": 0.7429182255478354, "grad_norm": 0.169921875, "learning_rate": 3.7635392592142174e-05, "loss": 1.1797, "step": 11120 }, { "epoch": 0.7432522715125601, "grad_norm": 0.1708984375, "learning_rate": 3.7544283483838215e-05, "loss": 1.2874, "step": 11125 }, { "epoch": 0.7435863174772849, "grad_norm": 0.166015625, "learning_rate": 3.745325929597272e-05, "loss": 1.1107, "step": 11130 }, { "epoch": 0.7439203634420096, "grad_norm": 0.1748046875, "learning_rate": 3.736232015231038e-05, "loss": 1.1931, "step": 11135 }, { "epoch": 0.7442544094067344, "grad_norm": 0.171875, "learning_rate": 3.7271466176500224e-05, "loss": 1.1346, "step": 11140 }, { "epoch": 0.7445884553714591, "grad_norm": 0.171875, "learning_rate": 3.718069749207559e-05, "loss": 1.2507, "step": 11145 }, { "epoch": 0.7449225013361839, "grad_norm": 0.1796875, "learning_rate": 3.7090014222453794e-05, "loss": 1.2236, "step": 11150 }, { "epoch": 0.7452565473009086, "grad_norm": 0.1728515625, "learning_rate": 3.699941649093599e-05, "loss": 1.2067, "step": 11155 }, { "epoch": 0.7455905932656334, "grad_norm": 0.1669921875, "learning_rate": 3.6908904420707e-05, "loss": 1.2222, "step": 11160 }, { "epoch": 0.7459246392303581, "grad_norm": 0.1689453125, "learning_rate": 3.6818478134835285e-05, "loss": 1.1647, "step": 11165 }, { "epoch": 0.7462586851950829, "grad_norm": 0.177734375, "learning_rate": 3.672813775627259e-05, "loss": 1.2522, "step": 11170 }, { "epoch": 0.7465927311598076, "grad_norm": 0.1572265625, "learning_rate": 3.663788340785379e-05, "loss": 1.2151, "step": 11175 }, { "epoch": 0.7469267771245324, "grad_norm": 0.166015625, "learning_rate": 3.6547715212296906e-05, "loss": 1.1831, "step": 11180 }, { "epoch": 0.7472608230892571, "grad_norm": 0.1669921875, "learning_rate": 3.645763329220281e-05, "loss": 1.2159, "step": 11185 }, { "epoch": 0.7475948690539819, "grad_norm": 0.1767578125, "learning_rate": 3.636763777005499e-05, "loss": 1.2279, "step": 11190 }, { "epoch": 0.7479289150187066, "grad_norm": 0.17578125, "learning_rate": 3.627772876821944e-05, "loss": 1.1907, "step": 11195 }, { "epoch": 0.7482629609834314, "grad_norm": 0.1708984375, "learning_rate": 3.618790640894465e-05, "loss": 1.1535, "step": 11200 }, { "epoch": 0.7485970069481561, "grad_norm": 0.1689453125, "learning_rate": 3.609817081436119e-05, "loss": 1.1594, "step": 11205 }, { "epoch": 0.7489310529128808, "grad_norm": 0.1767578125, "learning_rate": 3.600852210648164e-05, "loss": 1.2243, "step": 11210 }, { "epoch": 0.7492650988776055, "grad_norm": 0.205078125, "learning_rate": 3.591896040720054e-05, "loss": 1.2602, "step": 11215 }, { "epoch": 0.7495991448423303, "grad_norm": 0.171875, "learning_rate": 3.5829485838294093e-05, "loss": 1.2086, "step": 11220 }, { "epoch": 0.749933190807055, "grad_norm": 0.1669921875, "learning_rate": 3.5740098521419985e-05, "loss": 1.2441, "step": 11225 }, { "epoch": 0.7502672367717798, "grad_norm": 0.18359375, "learning_rate": 3.565079857811728e-05, "loss": 1.1788, "step": 11230 }, { "epoch": 0.7506012827365045, "grad_norm": 0.1611328125, "learning_rate": 3.556158612980624e-05, "loss": 1.1962, "step": 11235 }, { "epoch": 0.7509353287012293, "grad_norm": 0.1689453125, "learning_rate": 3.5472461297788185e-05, "loss": 1.2109, "step": 11240 }, { "epoch": 0.751269374665954, "grad_norm": 0.1748046875, "learning_rate": 3.538342420324534e-05, "loss": 1.2655, "step": 11245 }, { "epoch": 0.7516034206306788, "grad_norm": 0.1708984375, "learning_rate": 3.529447496724053e-05, "loss": 1.1728, "step": 11250 }, { "epoch": 0.7519374665954035, "grad_norm": 0.1630859375, "learning_rate": 3.5205613710717234e-05, "loss": 1.121, "step": 11255 }, { "epoch": 0.7522715125601283, "grad_norm": 0.18359375, "learning_rate": 3.511684055449922e-05, "loss": 1.1685, "step": 11260 }, { "epoch": 0.752605558524853, "grad_norm": 0.1689453125, "learning_rate": 3.5028155619290495e-05, "loss": 1.162, "step": 11265 }, { "epoch": 0.7529396044895778, "grad_norm": 0.1904296875, "learning_rate": 3.493955902567505e-05, "loss": 1.2662, "step": 11270 }, { "epoch": 0.7532736504543025, "grad_norm": 0.16796875, "learning_rate": 3.4851050894116946e-05, "loss": 1.1836, "step": 11275 }, { "epoch": 0.7536076964190273, "grad_norm": 0.1669921875, "learning_rate": 3.476263134495978e-05, "loss": 1.18, "step": 11280 }, { "epoch": 0.753941742383752, "grad_norm": 0.162109375, "learning_rate": 3.467430049842678e-05, "loss": 1.1368, "step": 11285 }, { "epoch": 0.7542757883484768, "grad_norm": 0.171875, "learning_rate": 3.4586058474620495e-05, "loss": 1.2558, "step": 11290 }, { "epoch": 0.7546098343132015, "grad_norm": 0.1708984375, "learning_rate": 3.4497905393522835e-05, "loss": 1.1602, "step": 11295 }, { "epoch": 0.7549438802779262, "grad_norm": 0.1708984375, "learning_rate": 3.4409841374994634e-05, "loss": 1.264, "step": 11300 }, { "epoch": 0.755277926242651, "grad_norm": 0.1806640625, "learning_rate": 3.432186653877575e-05, "loss": 1.2065, "step": 11305 }, { "epoch": 0.7556119722073757, "grad_norm": 0.1650390625, "learning_rate": 3.423398100448466e-05, "loss": 1.2051, "step": 11310 }, { "epoch": 0.7559460181721005, "grad_norm": 0.166015625, "learning_rate": 3.414618489161856e-05, "loss": 1.112, "step": 11315 }, { "epoch": 0.7562800641368252, "grad_norm": 0.1826171875, "learning_rate": 3.4058478319552936e-05, "loss": 1.2844, "step": 11320 }, { "epoch": 0.75661411010155, "grad_norm": 0.1767578125, "learning_rate": 3.397086140754153e-05, "loss": 1.2591, "step": 11325 }, { "epoch": 0.7569481560662747, "grad_norm": 0.1728515625, "learning_rate": 3.388333427471627e-05, "loss": 1.1908, "step": 11330 }, { "epoch": 0.7572822020309995, "grad_norm": 0.1708984375, "learning_rate": 3.3795897040087e-05, "loss": 1.21, "step": 11335 }, { "epoch": 0.7576162479957242, "grad_norm": 0.1669921875, "learning_rate": 3.3708549822541225e-05, "loss": 1.1525, "step": 11340 }, { "epoch": 0.757950293960449, "grad_norm": 0.1748046875, "learning_rate": 3.36212927408441e-05, "loss": 1.2159, "step": 11345 }, { "epoch": 0.7582843399251737, "grad_norm": 0.166015625, "learning_rate": 3.3534125913638316e-05, "loss": 1.2143, "step": 11350 }, { "epoch": 0.7586183858898985, "grad_norm": 0.1630859375, "learning_rate": 3.344704945944372e-05, "loss": 1.2032, "step": 11355 }, { "epoch": 0.7589524318546232, "grad_norm": 0.169921875, "learning_rate": 3.336006349665731e-05, "loss": 1.1575, "step": 11360 }, { "epoch": 0.759286477819348, "grad_norm": 0.1708984375, "learning_rate": 3.32731681435531e-05, "loss": 1.1845, "step": 11365 }, { "epoch": 0.7596205237840727, "grad_norm": 0.1748046875, "learning_rate": 3.3186363518281907e-05, "loss": 1.1324, "step": 11370 }, { "epoch": 0.7599545697487974, "grad_norm": 0.17578125, "learning_rate": 3.30996497388711e-05, "loss": 1.2672, "step": 11375 }, { "epoch": 0.7602886157135221, "grad_norm": 0.1787109375, "learning_rate": 3.301302692322453e-05, "loss": 1.2024, "step": 11380 }, { "epoch": 0.7606226616782469, "grad_norm": 0.1806640625, "learning_rate": 3.292649518912251e-05, "loss": 1.2042, "step": 11385 }, { "epoch": 0.7609567076429716, "grad_norm": 0.1796875, "learning_rate": 3.284005465422134e-05, "loss": 1.1876, "step": 11390 }, { "epoch": 0.7612907536076964, "grad_norm": 0.1787109375, "learning_rate": 3.275370543605337e-05, "loss": 1.1774, "step": 11395 }, { "epoch": 0.7616247995724211, "grad_norm": 0.19140625, "learning_rate": 3.266744765202684e-05, "loss": 1.1879, "step": 11400 }, { "epoch": 0.7619588455371459, "grad_norm": 0.169921875, "learning_rate": 3.2581281419425644e-05, "loss": 1.1583, "step": 11405 }, { "epoch": 0.7622928915018706, "grad_norm": 0.1728515625, "learning_rate": 3.2495206855409165e-05, "loss": 1.1837, "step": 11410 }, { "epoch": 0.7626269374665954, "grad_norm": 0.1708984375, "learning_rate": 3.2409224077012134e-05, "loss": 1.1778, "step": 11415 }, { "epoch": 0.7629609834313201, "grad_norm": 0.1728515625, "learning_rate": 3.232333320114457e-05, "loss": 1.1836, "step": 11420 }, { "epoch": 0.7632950293960449, "grad_norm": 0.173828125, "learning_rate": 3.223753434459139e-05, "loss": 1.194, "step": 11425 }, { "epoch": 0.7636290753607696, "grad_norm": 0.1640625, "learning_rate": 3.2151827624012574e-05, "loss": 1.2349, "step": 11430 }, { "epoch": 0.7639631213254944, "grad_norm": 0.169921875, "learning_rate": 3.206621315594264e-05, "loss": 1.2218, "step": 11435 }, { "epoch": 0.7642971672902191, "grad_norm": 0.17578125, "learning_rate": 3.1980691056790814e-05, "loss": 1.2077, "step": 11440 }, { "epoch": 0.7646312132549439, "grad_norm": 0.1640625, "learning_rate": 3.189526144284066e-05, "loss": 1.1235, "step": 11445 }, { "epoch": 0.7649652592196686, "grad_norm": 0.18359375, "learning_rate": 3.180992443025001e-05, "loss": 1.2072, "step": 11450 }, { "epoch": 0.7652993051843934, "grad_norm": 0.1669921875, "learning_rate": 3.17246801350507e-05, "loss": 1.2245, "step": 11455 }, { "epoch": 0.7656333511491181, "grad_norm": 0.1943359375, "learning_rate": 3.163952867314871e-05, "loss": 1.2003, "step": 11460 }, { "epoch": 0.7659673971138429, "grad_norm": 0.1806640625, "learning_rate": 3.155447016032361e-05, "loss": 1.2126, "step": 11465 }, { "epoch": 0.7663014430785676, "grad_norm": 0.166015625, "learning_rate": 3.146950471222865e-05, "loss": 1.2016, "step": 11470 }, { "epoch": 0.7666354890432924, "grad_norm": 0.171875, "learning_rate": 3.138463244439048e-05, "loss": 1.2201, "step": 11475 }, { "epoch": 0.7669695350080171, "grad_norm": 0.169921875, "learning_rate": 3.1299853472209186e-05, "loss": 1.2002, "step": 11480 }, { "epoch": 0.7673035809727419, "grad_norm": 0.1826171875, "learning_rate": 3.121516791095787e-05, "loss": 1.2091, "step": 11485 }, { "epoch": 0.7676376269374666, "grad_norm": 0.1767578125, "learning_rate": 3.113057587578271e-05, "loss": 1.2976, "step": 11490 }, { "epoch": 0.7679716729021914, "grad_norm": 0.16796875, "learning_rate": 3.1046077481702654e-05, "loss": 1.1261, "step": 11495 }, { "epoch": 0.7683057188669161, "grad_norm": 0.1630859375, "learning_rate": 3.096167284360939e-05, "loss": 1.2017, "step": 11500 }, { "epoch": 0.7686397648316409, "grad_norm": 0.17578125, "learning_rate": 3.087736207626709e-05, "loss": 1.1789, "step": 11505 }, { "epoch": 0.7689738107963656, "grad_norm": 0.1748046875, "learning_rate": 3.0793145294312255e-05, "loss": 1.2321, "step": 11510 }, { "epoch": 0.7693078567610904, "grad_norm": 0.1728515625, "learning_rate": 3.0709022612253656e-05, "loss": 1.2757, "step": 11515 }, { "epoch": 0.7696419027258151, "grad_norm": 0.181640625, "learning_rate": 3.062499414447215e-05, "loss": 1.2314, "step": 11520 }, { "epoch": 0.7699759486905399, "grad_norm": 0.17578125, "learning_rate": 3.054106000522039e-05, "loss": 1.1666, "step": 11525 }, { "epoch": 0.7703099946552646, "grad_norm": 0.16796875, "learning_rate": 3.0457220308622782e-05, "loss": 1.1709, "step": 11530 }, { "epoch": 0.7706440406199893, "grad_norm": 0.1767578125, "learning_rate": 3.0373475168675435e-05, "loss": 1.2248, "step": 11535 }, { "epoch": 0.770978086584714, "grad_norm": 0.1669921875, "learning_rate": 3.0289824699245784e-05, "loss": 1.2131, "step": 11540 }, { "epoch": 0.7713121325494388, "grad_norm": 0.1787109375, "learning_rate": 3.0206269014072518e-05, "loss": 1.2146, "step": 11545 }, { "epoch": 0.7716461785141635, "grad_norm": 0.1728515625, "learning_rate": 3.0122808226765554e-05, "loss": 1.1952, "step": 11550 }, { "epoch": 0.7719802244788883, "grad_norm": 0.166015625, "learning_rate": 3.003944245080573e-05, "loss": 1.1605, "step": 11555 }, { "epoch": 0.772314270443613, "grad_norm": 0.1669921875, "learning_rate": 2.9956171799544686e-05, "loss": 1.2247, "step": 11560 }, { "epoch": 0.7726483164083378, "grad_norm": 0.171875, "learning_rate": 2.9872996386204678e-05, "loss": 1.1881, "step": 11565 }, { "epoch": 0.7729823623730625, "grad_norm": 0.169921875, "learning_rate": 2.9789916323878597e-05, "loss": 1.2223, "step": 11570 }, { "epoch": 0.7733164083377873, "grad_norm": 0.1806640625, "learning_rate": 2.970693172552953e-05, "loss": 1.2298, "step": 11575 }, { "epoch": 0.773650454302512, "grad_norm": 0.1669921875, "learning_rate": 2.9624042703990896e-05, "loss": 1.2414, "step": 11580 }, { "epoch": 0.7739845002672368, "grad_norm": 0.1708984375, "learning_rate": 2.9541249371966064e-05, "loss": 1.1649, "step": 11585 }, { "epoch": 0.7743185462319615, "grad_norm": 0.169921875, "learning_rate": 2.945855184202837e-05, "loss": 1.1757, "step": 11590 }, { "epoch": 0.7746525921966863, "grad_norm": 0.16796875, "learning_rate": 2.937595022662083e-05, "loss": 1.1983, "step": 11595 }, { "epoch": 0.774986638161411, "grad_norm": 0.1767578125, "learning_rate": 2.9293444638056045e-05, "loss": 1.27, "step": 11600 }, { "epoch": 0.7753206841261358, "grad_norm": 0.1689453125, "learning_rate": 2.921103518851609e-05, "loss": 1.2107, "step": 11605 }, { "epoch": 0.7756547300908605, "grad_norm": 0.177734375, "learning_rate": 2.9128721990052345e-05, "loss": 1.167, "step": 11610 }, { "epoch": 0.7759887760555852, "grad_norm": 0.17578125, "learning_rate": 2.9046505154585235e-05, "loss": 1.2401, "step": 11615 }, { "epoch": 0.77632282202031, "grad_norm": 0.1611328125, "learning_rate": 2.8964384793904188e-05, "loss": 1.1416, "step": 11620 }, { "epoch": 0.7766568679850347, "grad_norm": 0.1728515625, "learning_rate": 2.8882361019667502e-05, "loss": 1.2621, "step": 11625 }, { "epoch": 0.7769909139497595, "grad_norm": 0.1845703125, "learning_rate": 2.8800433943402115e-05, "loss": 1.2146, "step": 11630 }, { "epoch": 0.7773249599144842, "grad_norm": 0.1728515625, "learning_rate": 2.8718603676503475e-05, "loss": 1.2552, "step": 11635 }, { "epoch": 0.777659005879209, "grad_norm": 0.203125, "learning_rate": 2.8636870330235356e-05, "loss": 1.2535, "step": 11640 }, { "epoch": 0.7779930518439337, "grad_norm": 0.1728515625, "learning_rate": 2.8555234015729904e-05, "loss": 1.1534, "step": 11645 }, { "epoch": 0.7783270978086585, "grad_norm": 0.201171875, "learning_rate": 2.8473694843987198e-05, "loss": 1.2312, "step": 11650 }, { "epoch": 0.7786611437733832, "grad_norm": 0.1767578125, "learning_rate": 2.839225292587525e-05, "loss": 1.1423, "step": 11655 }, { "epoch": 0.778995189738108, "grad_norm": 0.181640625, "learning_rate": 2.831090837212984e-05, "loss": 1.2034, "step": 11660 }, { "epoch": 0.7793292357028327, "grad_norm": 0.1630859375, "learning_rate": 2.8229661293354427e-05, "loss": 1.1359, "step": 11665 }, { "epoch": 0.7796632816675575, "grad_norm": 0.181640625, "learning_rate": 2.8148511800019827e-05, "loss": 1.2567, "step": 11670 }, { "epoch": 0.7799973276322822, "grad_norm": 0.1728515625, "learning_rate": 2.8067460002464252e-05, "loss": 1.2415, "step": 11675 }, { "epoch": 0.780331373597007, "grad_norm": 0.1689453125, "learning_rate": 2.7986506010893088e-05, "loss": 1.1646, "step": 11680 }, { "epoch": 0.7806654195617317, "grad_norm": 0.1630859375, "learning_rate": 2.7905649935378673e-05, "loss": 1.1926, "step": 11685 }, { "epoch": 0.7809994655264565, "grad_norm": 0.173828125, "learning_rate": 2.7824891885860227e-05, "loss": 1.2113, "step": 11690 }, { "epoch": 0.7813335114911811, "grad_norm": 0.16796875, "learning_rate": 2.7744231972143687e-05, "loss": 1.1887, "step": 11695 }, { "epoch": 0.7816675574559059, "grad_norm": 0.1669921875, "learning_rate": 2.7663670303901566e-05, "loss": 1.2085, "step": 11700 }, { "epoch": 0.7820016034206306, "grad_norm": 0.16796875, "learning_rate": 2.758320699067284e-05, "loss": 1.2066, "step": 11705 }, { "epoch": 0.7823356493853554, "grad_norm": 0.173828125, "learning_rate": 2.7502842141862672e-05, "loss": 1.2673, "step": 11710 }, { "epoch": 0.7826696953500801, "grad_norm": 0.1767578125, "learning_rate": 2.742257586674233e-05, "loss": 1.2183, "step": 11715 }, { "epoch": 0.7830037413148049, "grad_norm": 0.171875, "learning_rate": 2.7342408274449184e-05, "loss": 1.166, "step": 11720 }, { "epoch": 0.7833377872795296, "grad_norm": 0.1630859375, "learning_rate": 2.7262339473986286e-05, "loss": 1.1372, "step": 11725 }, { "epoch": 0.7836718332442544, "grad_norm": 0.1806640625, "learning_rate": 2.71823695742224e-05, "loss": 1.1694, "step": 11730 }, { "epoch": 0.7840058792089791, "grad_norm": 0.181640625, "learning_rate": 2.710249868389185e-05, "loss": 1.219, "step": 11735 }, { "epoch": 0.7843399251737039, "grad_norm": 0.1767578125, "learning_rate": 2.7022726911594363e-05, "loss": 1.2113, "step": 11740 }, { "epoch": 0.7846739711384286, "grad_norm": 0.158203125, "learning_rate": 2.6943054365794818e-05, "loss": 1.1246, "step": 11745 }, { "epoch": 0.7850080171031534, "grad_norm": 0.1923828125, "learning_rate": 2.6863481154823168e-05, "loss": 1.1696, "step": 11750 }, { "epoch": 0.7853420630678781, "grad_norm": 0.173828125, "learning_rate": 2.678400738687442e-05, "loss": 1.1715, "step": 11755 }, { "epoch": 0.7856761090326029, "grad_norm": 0.1728515625, "learning_rate": 2.6704633170008232e-05, "loss": 1.1638, "step": 11760 }, { "epoch": 0.7860101549973276, "grad_norm": 0.169921875, "learning_rate": 2.662535861214902e-05, "loss": 1.2395, "step": 11765 }, { "epoch": 0.7863442009620524, "grad_norm": 0.1865234375, "learning_rate": 2.654618382108558e-05, "loss": 1.2303, "step": 11770 }, { "epoch": 0.7866782469267771, "grad_norm": 0.1611328125, "learning_rate": 2.6467108904471184e-05, "loss": 1.2723, "step": 11775 }, { "epoch": 0.7870122928915019, "grad_norm": 0.1689453125, "learning_rate": 2.6388133969823193e-05, "loss": 1.1856, "step": 11780 }, { "epoch": 0.7873463388562266, "grad_norm": 0.1640625, "learning_rate": 2.6309259124523046e-05, "loss": 1.2234, "step": 11785 }, { "epoch": 0.7876803848209514, "grad_norm": 0.173828125, "learning_rate": 2.6230484475816132e-05, "loss": 1.1786, "step": 11790 }, { "epoch": 0.7880144307856761, "grad_norm": 0.173828125, "learning_rate": 2.6151810130811638e-05, "loss": 1.2053, "step": 11795 }, { "epoch": 0.7883484767504009, "grad_norm": 0.1640625, "learning_rate": 2.6073236196482263e-05, "loss": 1.1418, "step": 11800 }, { "epoch": 0.7886825227151256, "grad_norm": 0.1962890625, "learning_rate": 2.599476277966423e-05, "loss": 1.1912, "step": 11805 }, { "epoch": 0.7890165686798504, "grad_norm": 0.1650390625, "learning_rate": 2.591638998705711e-05, "loss": 1.1552, "step": 11810 }, { "epoch": 0.7893506146445751, "grad_norm": 0.1572265625, "learning_rate": 2.583811792522365e-05, "loss": 1.148, "step": 11815 }, { "epoch": 0.7896846606092999, "grad_norm": 0.1943359375, "learning_rate": 2.5759946700589556e-05, "loss": 1.2014, "step": 11820 }, { "epoch": 0.7900187065740246, "grad_norm": 0.177734375, "learning_rate": 2.568187641944354e-05, "loss": 1.1741, "step": 11825 }, { "epoch": 0.7903527525387494, "grad_norm": 0.1748046875, "learning_rate": 2.5603907187937038e-05, "loss": 1.1774, "step": 11830 }, { "epoch": 0.7906867985034741, "grad_norm": 0.169921875, "learning_rate": 2.5526039112084044e-05, "loss": 1.2101, "step": 11835 }, { "epoch": 0.7910208444681989, "grad_norm": 0.1796875, "learning_rate": 2.5448272297761e-05, "loss": 1.1802, "step": 11840 }, { "epoch": 0.7913548904329236, "grad_norm": 0.1806640625, "learning_rate": 2.5370606850706757e-05, "loss": 1.194, "step": 11845 }, { "epoch": 0.7916889363976484, "grad_norm": 0.1796875, "learning_rate": 2.5293042876522245e-05, "loss": 1.2415, "step": 11850 }, { "epoch": 0.7920229823623731, "grad_norm": 0.1787109375, "learning_rate": 2.521558048067042e-05, "loss": 1.2278, "step": 11855 }, { "epoch": 0.7923570283270978, "grad_norm": 0.1650390625, "learning_rate": 2.5138219768476203e-05, "loss": 1.2145, "step": 11860 }, { "epoch": 0.7926910742918225, "grad_norm": 0.189453125, "learning_rate": 2.5060960845126235e-05, "loss": 1.179, "step": 11865 }, { "epoch": 0.7930251202565473, "grad_norm": 0.1572265625, "learning_rate": 2.4983803815668694e-05, "loss": 1.1905, "step": 11870 }, { "epoch": 0.793359166221272, "grad_norm": 0.19921875, "learning_rate": 2.4906748785013267e-05, "loss": 1.1982, "step": 11875 }, { "epoch": 0.7936932121859968, "grad_norm": 0.1728515625, "learning_rate": 2.4829795857930904e-05, "loss": 1.2422, "step": 11880 }, { "epoch": 0.7940272581507215, "grad_norm": 0.17578125, "learning_rate": 2.4752945139053785e-05, "loss": 1.2127, "step": 11885 }, { "epoch": 0.7943613041154463, "grad_norm": 0.185546875, "learning_rate": 2.4676196732875144e-05, "loss": 1.1725, "step": 11890 }, { "epoch": 0.794695350080171, "grad_norm": 0.1796875, "learning_rate": 2.4599550743749e-05, "loss": 1.2145, "step": 11895 }, { "epoch": 0.7950293960448958, "grad_norm": 0.1689453125, "learning_rate": 2.4523007275890152e-05, "loss": 1.1748, "step": 11900 }, { "epoch": 0.7953634420096205, "grad_norm": 0.171875, "learning_rate": 2.4446566433374065e-05, "loss": 1.2528, "step": 11905 }, { "epoch": 0.7956974879743453, "grad_norm": 0.1689453125, "learning_rate": 2.4370228320136613e-05, "loss": 1.2329, "step": 11910 }, { "epoch": 0.79603153393907, "grad_norm": 0.1767578125, "learning_rate": 2.429399303997394e-05, "loss": 1.2073, "step": 11915 }, { "epoch": 0.7963655799037948, "grad_norm": 0.18359375, "learning_rate": 2.4217860696542482e-05, "loss": 1.1722, "step": 11920 }, { "epoch": 0.7966996258685195, "grad_norm": 0.173828125, "learning_rate": 2.414183139335866e-05, "loss": 1.24, "step": 11925 }, { "epoch": 0.7970336718332443, "grad_norm": 0.169921875, "learning_rate": 2.406590523379877e-05, "loss": 1.1457, "step": 11930 }, { "epoch": 0.797367717797969, "grad_norm": 0.171875, "learning_rate": 2.399008232109885e-05, "loss": 1.2234, "step": 11935 }, { "epoch": 0.7977017637626937, "grad_norm": 0.1767578125, "learning_rate": 2.3914362758354658e-05, "loss": 1.2298, "step": 11940 }, { "epoch": 0.7980358097274185, "grad_norm": 0.1796875, "learning_rate": 2.383874664852127e-05, "loss": 1.1725, "step": 11945 }, { "epoch": 0.7983698556921432, "grad_norm": 0.169921875, "learning_rate": 2.3763234094413277e-05, "loss": 1.171, "step": 11950 }, { "epoch": 0.798703901656868, "grad_norm": 0.169921875, "learning_rate": 2.3687825198704296e-05, "loss": 1.1286, "step": 11955 }, { "epoch": 0.7990379476215927, "grad_norm": 0.1669921875, "learning_rate": 2.3612520063927145e-05, "loss": 1.22, "step": 11960 }, { "epoch": 0.7993719935863175, "grad_norm": 0.1669921875, "learning_rate": 2.353731879247345e-05, "loss": 1.1227, "step": 11965 }, { "epoch": 0.7997060395510422, "grad_norm": 0.19921875, "learning_rate": 2.346222148659365e-05, "loss": 1.1955, "step": 11970 }, { "epoch": 0.800040085515767, "grad_norm": 0.1630859375, "learning_rate": 2.3387228248396842e-05, "loss": 1.1408, "step": 11975 }, { "epoch": 0.8003741314804917, "grad_norm": 0.177734375, "learning_rate": 2.3312339179850652e-05, "loss": 1.1877, "step": 11980 }, { "epoch": 0.8007081774452165, "grad_norm": 0.1708984375, "learning_rate": 2.3237554382781002e-05, "loss": 1.1573, "step": 11985 }, { "epoch": 0.8010422234099412, "grad_norm": 0.1787109375, "learning_rate": 2.316287395887202e-05, "loss": 1.1767, "step": 11990 }, { "epoch": 0.801376269374666, "grad_norm": 0.171875, "learning_rate": 2.3088298009666033e-05, "loss": 1.3085, "step": 11995 }, { "epoch": 0.8017103153393907, "grad_norm": 0.16015625, "learning_rate": 2.3013826636563198e-05, "loss": 1.1452, "step": 12000 }, { "epoch": 0.8020443613041155, "grad_norm": 0.1865234375, "learning_rate": 2.2939459940821518e-05, "loss": 1.2353, "step": 12005 }, { "epoch": 0.8023784072688402, "grad_norm": 0.181640625, "learning_rate": 2.2865198023556698e-05, "loss": 1.2847, "step": 12010 }, { "epoch": 0.802712453233565, "grad_norm": 0.1728515625, "learning_rate": 2.2791040985741974e-05, "loss": 1.2256, "step": 12015 }, { "epoch": 0.8030464991982896, "grad_norm": 0.185546875, "learning_rate": 2.271698892820794e-05, "loss": 1.1917, "step": 12020 }, { "epoch": 0.8033805451630144, "grad_norm": 0.1826171875, "learning_rate": 2.264304195164243e-05, "loss": 1.2397, "step": 12025 }, { "epoch": 0.8037145911277391, "grad_norm": 0.1708984375, "learning_rate": 2.2569200156590507e-05, "loss": 1.1771, "step": 12030 }, { "epoch": 0.8040486370924639, "grad_norm": 0.177734375, "learning_rate": 2.2495463643454085e-05, "loss": 1.1452, "step": 12035 }, { "epoch": 0.8043826830571886, "grad_norm": 0.1630859375, "learning_rate": 2.2421832512492057e-05, "loss": 1.2384, "step": 12040 }, { "epoch": 0.8047167290219134, "grad_norm": 0.169921875, "learning_rate": 2.23483068638199e-05, "loss": 1.1587, "step": 12045 }, { "epoch": 0.8050507749866381, "grad_norm": 0.1796875, "learning_rate": 2.22748867974098e-05, "loss": 1.2088, "step": 12050 }, { "epoch": 0.8053848209513629, "grad_norm": 0.1748046875, "learning_rate": 2.220157241309028e-05, "loss": 1.2517, "step": 12055 }, { "epoch": 0.8057188669160876, "grad_norm": 0.162109375, "learning_rate": 2.2128363810546205e-05, "loss": 1.0946, "step": 12060 }, { "epoch": 0.8060529128808124, "grad_norm": 0.1669921875, "learning_rate": 2.205526108931857e-05, "loss": 1.1852, "step": 12065 }, { "epoch": 0.8063869588455371, "grad_norm": 0.1640625, "learning_rate": 2.1982264348804525e-05, "loss": 1.1549, "step": 12070 }, { "epoch": 0.8067210048102619, "grad_norm": 0.1748046875, "learning_rate": 2.1909373688257008e-05, "loss": 1.2467, "step": 12075 }, { "epoch": 0.8070550507749866, "grad_norm": 0.173828125, "learning_rate": 2.183658920678474e-05, "loss": 1.1084, "step": 12080 }, { "epoch": 0.8073890967397114, "grad_norm": 0.173828125, "learning_rate": 2.1763911003352055e-05, "loss": 1.2701, "step": 12085 }, { "epoch": 0.8077231427044361, "grad_norm": 0.171875, "learning_rate": 2.1691339176778856e-05, "loss": 1.1389, "step": 12090 }, { "epoch": 0.8080571886691609, "grad_norm": 0.1650390625, "learning_rate": 2.161887382574035e-05, "loss": 1.2096, "step": 12095 }, { "epoch": 0.8083912346338856, "grad_norm": 0.1728515625, "learning_rate": 2.1546515048766914e-05, "loss": 1.2264, "step": 12100 }, { "epoch": 0.8087252805986104, "grad_norm": 0.1669921875, "learning_rate": 2.1474262944244196e-05, "loss": 1.1749, "step": 12105 }, { "epoch": 0.8090593265633351, "grad_norm": 0.16796875, "learning_rate": 2.140211761041262e-05, "loss": 1.2378, "step": 12110 }, { "epoch": 0.8093933725280599, "grad_norm": 0.1669921875, "learning_rate": 2.133007914536753e-05, "loss": 1.1184, "step": 12115 }, { "epoch": 0.8097274184927846, "grad_norm": 0.171875, "learning_rate": 2.125814764705889e-05, "loss": 1.1524, "step": 12120 }, { "epoch": 0.8100614644575094, "grad_norm": 0.171875, "learning_rate": 2.1186323213291316e-05, "loss": 1.2009, "step": 12125 }, { "epoch": 0.8103955104222341, "grad_norm": 0.1962890625, "learning_rate": 2.1114605941723777e-05, "loss": 1.1628, "step": 12130 }, { "epoch": 0.8107295563869589, "grad_norm": 0.1650390625, "learning_rate": 2.104299592986958e-05, "loss": 1.2373, "step": 12135 }, { "epoch": 0.8110636023516836, "grad_norm": 0.1748046875, "learning_rate": 2.0971493275096133e-05, "loss": 1.2578, "step": 12140 }, { "epoch": 0.8113976483164084, "grad_norm": 0.1767578125, "learning_rate": 2.0900098074624952e-05, "loss": 1.1665, "step": 12145 }, { "epoch": 0.8117316942811331, "grad_norm": 0.1845703125, "learning_rate": 2.08288104255314e-05, "loss": 1.1408, "step": 12150 }, { "epoch": 0.8120657402458579, "grad_norm": 0.1650390625, "learning_rate": 2.0757630424744568e-05, "loss": 1.1506, "step": 12155 }, { "epoch": 0.8123997862105826, "grad_norm": 0.173828125, "learning_rate": 2.0686558169047256e-05, "loss": 1.2123, "step": 12160 }, { "epoch": 0.8127338321753074, "grad_norm": 0.1708984375, "learning_rate": 2.0615593755075734e-05, "loss": 1.1628, "step": 12165 }, { "epoch": 0.8130678781400321, "grad_norm": 0.1708984375, "learning_rate": 2.0544737279319636e-05, "loss": 1.2352, "step": 12170 }, { "epoch": 0.8134019241047569, "grad_norm": 0.1572265625, "learning_rate": 2.0473988838121783e-05, "loss": 1.1936, "step": 12175 }, { "epoch": 0.8137359700694815, "grad_norm": 0.1669921875, "learning_rate": 2.0403348527678222e-05, "loss": 1.1521, "step": 12180 }, { "epoch": 0.8140700160342063, "grad_norm": 0.1767578125, "learning_rate": 2.0332816444037873e-05, "loss": 1.2135, "step": 12185 }, { "epoch": 0.814404061998931, "grad_norm": 0.1796875, "learning_rate": 2.0262392683102493e-05, "loss": 1.2112, "step": 12190 }, { "epoch": 0.8147381079636558, "grad_norm": 0.1708984375, "learning_rate": 2.0192077340626636e-05, "loss": 1.1762, "step": 12195 }, { "epoch": 0.8150721539283805, "grad_norm": 0.1708984375, "learning_rate": 2.012187051221742e-05, "loss": 1.1734, "step": 12200 }, { "epoch": 0.8154061998931053, "grad_norm": 0.181640625, "learning_rate": 2.005177229333437e-05, "loss": 1.1807, "step": 12205 }, { "epoch": 0.81574024585783, "grad_norm": 0.1650390625, "learning_rate": 1.998178277928934e-05, "loss": 1.1999, "step": 12210 }, { "epoch": 0.8160742918225548, "grad_norm": 0.1904296875, "learning_rate": 1.9911902065246447e-05, "loss": 1.1723, "step": 12215 }, { "epoch": 0.8164083377872795, "grad_norm": 0.162109375, "learning_rate": 1.9842130246221768e-05, "loss": 1.1958, "step": 12220 }, { "epoch": 0.8167423837520043, "grad_norm": 0.16796875, "learning_rate": 1.977246741708344e-05, "loss": 1.2349, "step": 12225 }, { "epoch": 0.817076429716729, "grad_norm": 0.1689453125, "learning_rate": 1.9702913672551292e-05, "loss": 1.1717, "step": 12230 }, { "epoch": 0.8174104756814538, "grad_norm": 0.169921875, "learning_rate": 1.9633469107196932e-05, "loss": 1.2384, "step": 12235 }, { "epoch": 0.8177445216461785, "grad_norm": 0.1767578125, "learning_rate": 1.956413381544344e-05, "loss": 1.1851, "step": 12240 }, { "epoch": 0.8180785676109033, "grad_norm": 0.173828125, "learning_rate": 1.9494907891565316e-05, "loss": 1.2535, "step": 12245 }, { "epoch": 0.818412613575628, "grad_norm": 0.1611328125, "learning_rate": 1.942579142968842e-05, "loss": 1.1402, "step": 12250 }, { "epoch": 0.8187466595403528, "grad_norm": 0.1875, "learning_rate": 1.9356784523789772e-05, "loss": 1.2167, "step": 12255 }, { "epoch": 0.8190807055050775, "grad_norm": 0.1728515625, "learning_rate": 1.928788726769737e-05, "loss": 1.1381, "step": 12260 }, { "epoch": 0.8194147514698022, "grad_norm": 0.1669921875, "learning_rate": 1.9219099755090107e-05, "loss": 1.1904, "step": 12265 }, { "epoch": 0.819748797434527, "grad_norm": 0.173828125, "learning_rate": 1.915042207949779e-05, "loss": 1.2316, "step": 12270 }, { "epoch": 0.8200828433992517, "grad_norm": 0.1650390625, "learning_rate": 1.908185433430074e-05, "loss": 1.1822, "step": 12275 }, { "epoch": 0.8204168893639765, "grad_norm": 0.19921875, "learning_rate": 1.901339661272985e-05, "loss": 1.2357, "step": 12280 }, { "epoch": 0.8207509353287012, "grad_norm": 0.19140625, "learning_rate": 1.8945049007866446e-05, "loss": 1.1806, "step": 12285 }, { "epoch": 0.821084981293426, "grad_norm": 0.1689453125, "learning_rate": 1.887681161264214e-05, "loss": 1.1728, "step": 12290 }, { "epoch": 0.8214190272581507, "grad_norm": 0.1923828125, "learning_rate": 1.880868451983865e-05, "loss": 1.2062, "step": 12295 }, { "epoch": 0.8217530732228755, "grad_norm": 0.166015625, "learning_rate": 1.874066782208771e-05, "loss": 1.1982, "step": 12300 }, { "epoch": 0.8220871191876002, "grad_norm": 0.1748046875, "learning_rate": 1.8672761611870958e-05, "loss": 1.2017, "step": 12305 }, { "epoch": 0.822421165152325, "grad_norm": 0.1708984375, "learning_rate": 1.8604965981519827e-05, "loss": 1.1544, "step": 12310 }, { "epoch": 0.8227552111170497, "grad_norm": 0.1845703125, "learning_rate": 1.8537281023215436e-05, "loss": 1.0842, "step": 12315 }, { "epoch": 0.8230892570817745, "grad_norm": 0.1943359375, "learning_rate": 1.846970682898833e-05, "loss": 1.2518, "step": 12320 }, { "epoch": 0.8234233030464992, "grad_norm": 0.1689453125, "learning_rate": 1.8402243490718474e-05, "loss": 1.2632, "step": 12325 }, { "epoch": 0.823757349011224, "grad_norm": 0.1787109375, "learning_rate": 1.8334891100135166e-05, "loss": 1.2021, "step": 12330 }, { "epoch": 0.8240913949759487, "grad_norm": 0.1640625, "learning_rate": 1.8267649748816772e-05, "loss": 1.1112, "step": 12335 }, { "epoch": 0.8244254409406734, "grad_norm": 0.16015625, "learning_rate": 1.820051952819072e-05, "loss": 1.2062, "step": 12340 }, { "epoch": 0.8247594869053981, "grad_norm": 0.177734375, "learning_rate": 1.8133500529533308e-05, "loss": 1.2504, "step": 12345 }, { "epoch": 0.8250935328701229, "grad_norm": 0.1728515625, "learning_rate": 1.806659284396969e-05, "loss": 1.2482, "step": 12350 }, { "epoch": 0.8254275788348476, "grad_norm": 0.1708984375, "learning_rate": 1.799979656247355e-05, "loss": 1.1268, "step": 12355 }, { "epoch": 0.8257616247995724, "grad_norm": 0.1650390625, "learning_rate": 1.793311177586714e-05, "loss": 1.1863, "step": 12360 }, { "epoch": 0.8260956707642971, "grad_norm": 0.1904296875, "learning_rate": 1.786653857482118e-05, "loss": 1.2579, "step": 12365 }, { "epoch": 0.8264297167290219, "grad_norm": 0.173828125, "learning_rate": 1.780007704985457e-05, "loss": 1.1932, "step": 12370 }, { "epoch": 0.8267637626937466, "grad_norm": 0.1669921875, "learning_rate": 1.77337272913344e-05, "loss": 1.2517, "step": 12375 }, { "epoch": 0.8270978086584714, "grad_norm": 0.1806640625, "learning_rate": 1.766748938947581e-05, "loss": 1.2773, "step": 12380 }, { "epoch": 0.8274318546231961, "grad_norm": 0.1630859375, "learning_rate": 1.760136343434188e-05, "loss": 1.1525, "step": 12385 }, { "epoch": 0.8277659005879209, "grad_norm": 0.169921875, "learning_rate": 1.7535349515843392e-05, "loss": 1.1876, "step": 12390 }, { "epoch": 0.8280999465526456, "grad_norm": 0.169921875, "learning_rate": 1.746944772373883e-05, "loss": 1.1088, "step": 12395 }, { "epoch": 0.8284339925173704, "grad_norm": 0.19140625, "learning_rate": 1.740365814763427e-05, "loss": 1.2222, "step": 12400 }, { "epoch": 0.8287680384820951, "grad_norm": 0.173828125, "learning_rate": 1.733798087698313e-05, "loss": 1.2101, "step": 12405 }, { "epoch": 0.8291020844468199, "grad_norm": 0.1669921875, "learning_rate": 1.727241600108619e-05, "loss": 1.2127, "step": 12410 }, { "epoch": 0.8294361304115446, "grad_norm": 0.1689453125, "learning_rate": 1.7206963609091352e-05, "loss": 1.1727, "step": 12415 }, { "epoch": 0.8297701763762694, "grad_norm": 0.1669921875, "learning_rate": 1.7141623789993655e-05, "loss": 1.1811, "step": 12420 }, { "epoch": 0.8301042223409941, "grad_norm": 0.181640625, "learning_rate": 1.7076396632634994e-05, "loss": 1.179, "step": 12425 }, { "epoch": 0.8304382683057189, "grad_norm": 0.166015625, "learning_rate": 1.7011282225704074e-05, "loss": 1.1736, "step": 12430 }, { "epoch": 0.8307723142704436, "grad_norm": 0.16015625, "learning_rate": 1.694628065773638e-05, "loss": 1.1563, "step": 12435 }, { "epoch": 0.8311063602351684, "grad_norm": 0.162109375, "learning_rate": 1.6881392017113917e-05, "loss": 1.2117, "step": 12440 }, { "epoch": 0.8314404061998931, "grad_norm": 0.173828125, "learning_rate": 1.6816616392065142e-05, "loss": 1.1943, "step": 12445 }, { "epoch": 0.8317744521646179, "grad_norm": 0.171875, "learning_rate": 1.6751953870664817e-05, "loss": 1.2046, "step": 12450 }, { "epoch": 0.8321084981293426, "grad_norm": 0.1611328125, "learning_rate": 1.6687404540833996e-05, "loss": 1.1596, "step": 12455 }, { "epoch": 0.8324425440940674, "grad_norm": 0.1728515625, "learning_rate": 1.6622968490339773e-05, "loss": 1.265, "step": 12460 }, { "epoch": 0.8327765900587921, "grad_norm": 0.1630859375, "learning_rate": 1.6558645806795193e-05, "loss": 1.2546, "step": 12465 }, { "epoch": 0.8331106360235169, "grad_norm": 0.162109375, "learning_rate": 1.6494436577659222e-05, "loss": 1.1452, "step": 12470 }, { "epoch": 0.8334446819882416, "grad_norm": 0.1767578125, "learning_rate": 1.643034089023655e-05, "loss": 1.2115, "step": 12475 }, { "epoch": 0.8337787279529664, "grad_norm": 0.169921875, "learning_rate": 1.6366358831677454e-05, "loss": 1.156, "step": 12480 }, { "epoch": 0.8341127739176911, "grad_norm": 0.171875, "learning_rate": 1.6302490488977705e-05, "loss": 1.1572, "step": 12485 }, { "epoch": 0.8344468198824159, "grad_norm": 0.1728515625, "learning_rate": 1.623873594897848e-05, "loss": 1.1823, "step": 12490 }, { "epoch": 0.8347808658471406, "grad_norm": 0.1865234375, "learning_rate": 1.6175095298366217e-05, "loss": 1.1672, "step": 12495 }, { "epoch": 0.8351149118118654, "grad_norm": 0.1689453125, "learning_rate": 1.6111568623672533e-05, "loss": 1.1418, "step": 12500 }, { "epoch": 0.83544895777659, "grad_norm": 0.173828125, "learning_rate": 1.6048156011274018e-05, "loss": 1.2192, "step": 12505 }, { "epoch": 0.8357830037413148, "grad_norm": 0.1708984375, "learning_rate": 1.598485754739215e-05, "loss": 1.2165, "step": 12510 }, { "epoch": 0.8361170497060395, "grad_norm": 0.1669921875, "learning_rate": 1.59216733180933e-05, "loss": 1.2849, "step": 12515 }, { "epoch": 0.8364510956707643, "grad_norm": 0.181640625, "learning_rate": 1.585860340928844e-05, "loss": 1.2269, "step": 12520 }, { "epoch": 0.836785141635489, "grad_norm": 0.1669921875, "learning_rate": 1.579564790673308e-05, "loss": 1.2391, "step": 12525 }, { "epoch": 0.8371191876002138, "grad_norm": 0.1669921875, "learning_rate": 1.5732806896027287e-05, "loss": 1.1784, "step": 12530 }, { "epoch": 0.8374532335649385, "grad_norm": 0.1708984375, "learning_rate": 1.5670080462615345e-05, "loss": 1.1549, "step": 12535 }, { "epoch": 0.8377872795296633, "grad_norm": 0.171875, "learning_rate": 1.5607468691785776e-05, "loss": 1.1978, "step": 12540 }, { "epoch": 0.838121325494388, "grad_norm": 0.181640625, "learning_rate": 1.554497166867118e-05, "loss": 1.2579, "step": 12545 }, { "epoch": 0.8384553714591128, "grad_norm": 0.166015625, "learning_rate": 1.5482589478248222e-05, "loss": 1.2783, "step": 12550 }, { "epoch": 0.8387894174238375, "grad_norm": 0.162109375, "learning_rate": 1.5420322205337333e-05, "loss": 1.2231, "step": 12555 }, { "epoch": 0.8391234633885623, "grad_norm": 0.169921875, "learning_rate": 1.5358169934602706e-05, "loss": 1.1347, "step": 12560 }, { "epoch": 0.839457509353287, "grad_norm": 0.1826171875, "learning_rate": 1.5296132750552207e-05, "loss": 1.1165, "step": 12565 }, { "epoch": 0.8397915553180118, "grad_norm": 0.1826171875, "learning_rate": 1.5234210737537225e-05, "loss": 1.2383, "step": 12570 }, { "epoch": 0.8401256012827365, "grad_norm": 0.1689453125, "learning_rate": 1.5172403979752492e-05, "loss": 1.1735, "step": 12575 }, { "epoch": 0.8404596472474613, "grad_norm": 0.1787109375, "learning_rate": 1.5110712561236062e-05, "loss": 1.2072, "step": 12580 }, { "epoch": 0.840793693212186, "grad_norm": 0.1708984375, "learning_rate": 1.5049136565869205e-05, "loss": 1.1671, "step": 12585 }, { "epoch": 0.8411277391769107, "grad_norm": 0.1572265625, "learning_rate": 1.4987676077376156e-05, "loss": 1.2047, "step": 12590 }, { "epoch": 0.8414617851416355, "grad_norm": 0.1669921875, "learning_rate": 1.4926331179324205e-05, "loss": 1.1631, "step": 12595 }, { "epoch": 0.8417958311063602, "grad_norm": 0.16796875, "learning_rate": 1.4865101955123362e-05, "loss": 1.1725, "step": 12600 }, { "epoch": 0.842129877071085, "grad_norm": 0.17578125, "learning_rate": 1.4803988488026487e-05, "loss": 1.2299, "step": 12605 }, { "epoch": 0.8424639230358097, "grad_norm": 0.1640625, "learning_rate": 1.4742990861128924e-05, "loss": 1.1771, "step": 12610 }, { "epoch": 0.8427979690005345, "grad_norm": 0.1728515625, "learning_rate": 1.4682109157368561e-05, "loss": 1.2413, "step": 12615 }, { "epoch": 0.8431320149652592, "grad_norm": 0.1796875, "learning_rate": 1.4621343459525671e-05, "loss": 1.1604, "step": 12620 }, { "epoch": 0.843466060929984, "grad_norm": 0.169921875, "learning_rate": 1.4560693850222828e-05, "loss": 1.1893, "step": 12625 }, { "epoch": 0.8438001068947087, "grad_norm": 0.17578125, "learning_rate": 1.450016041192469e-05, "loss": 1.1732, "step": 12630 }, { "epoch": 0.8441341528594335, "grad_norm": 0.1767578125, "learning_rate": 1.4439743226937975e-05, "loss": 1.1983, "step": 12635 }, { "epoch": 0.8444681988241582, "grad_norm": 0.1748046875, "learning_rate": 1.437944237741139e-05, "loss": 1.2534, "step": 12640 }, { "epoch": 0.844802244788883, "grad_norm": 0.171875, "learning_rate": 1.4319257945335408e-05, "loss": 1.2628, "step": 12645 }, { "epoch": 0.8451362907536077, "grad_norm": 0.1767578125, "learning_rate": 1.425919001254219e-05, "loss": 1.1942, "step": 12650 }, { "epoch": 0.8454703367183325, "grad_norm": 0.158203125, "learning_rate": 1.419923866070556e-05, "loss": 1.1969, "step": 12655 }, { "epoch": 0.8458043826830572, "grad_norm": 0.177734375, "learning_rate": 1.4139403971340815e-05, "loss": 1.2407, "step": 12660 }, { "epoch": 0.8461384286477819, "grad_norm": 0.18359375, "learning_rate": 1.4079686025804584e-05, "loss": 1.2333, "step": 12665 }, { "epoch": 0.8464724746125066, "grad_norm": 0.185546875, "learning_rate": 1.4020084905294761e-05, "loss": 1.2168, "step": 12670 }, { "epoch": 0.8468065205772314, "grad_norm": 0.162109375, "learning_rate": 1.3960600690850466e-05, "loss": 1.2205, "step": 12675 }, { "epoch": 0.8471405665419561, "grad_norm": 0.169921875, "learning_rate": 1.3901233463351771e-05, "loss": 1.1949, "step": 12680 }, { "epoch": 0.8474746125066809, "grad_norm": 0.16015625, "learning_rate": 1.3841983303519756e-05, "loss": 1.22, "step": 12685 }, { "epoch": 0.8478086584714056, "grad_norm": 0.18359375, "learning_rate": 1.3782850291916271e-05, "loss": 1.1553, "step": 12690 }, { "epoch": 0.8481427044361304, "grad_norm": 0.1708984375, "learning_rate": 1.3723834508943945e-05, "loss": 1.2315, "step": 12695 }, { "epoch": 0.8484767504008551, "grad_norm": 0.1767578125, "learning_rate": 1.3664936034845933e-05, "loss": 1.1529, "step": 12700 }, { "epoch": 0.8488107963655799, "grad_norm": 0.1611328125, "learning_rate": 1.360615494970594e-05, "loss": 1.1605, "step": 12705 }, { "epoch": 0.8491448423303046, "grad_norm": 0.1796875, "learning_rate": 1.3547491333448003e-05, "loss": 1.1639, "step": 12710 }, { "epoch": 0.8494788882950294, "grad_norm": 0.205078125, "learning_rate": 1.348894526583655e-05, "loss": 1.189, "step": 12715 }, { "epoch": 0.8498129342597541, "grad_norm": 0.1787109375, "learning_rate": 1.343051682647607e-05, "loss": 1.2305, "step": 12720 }, { "epoch": 0.8501469802244789, "grad_norm": 0.1728515625, "learning_rate": 1.3372206094811158e-05, "loss": 1.1985, "step": 12725 }, { "epoch": 0.8504810261892036, "grad_norm": 0.1767578125, "learning_rate": 1.3314013150126336e-05, "loss": 1.1517, "step": 12730 }, { "epoch": 0.8508150721539284, "grad_norm": 0.171875, "learning_rate": 1.3255938071546026e-05, "loss": 1.1212, "step": 12735 }, { "epoch": 0.8511491181186531, "grad_norm": 0.1787109375, "learning_rate": 1.3197980938034305e-05, "loss": 1.1748, "step": 12740 }, { "epoch": 0.8514831640833779, "grad_norm": 0.1669921875, "learning_rate": 1.3140141828394992e-05, "loss": 1.2564, "step": 12745 }, { "epoch": 0.8518172100481026, "grad_norm": 0.162109375, "learning_rate": 1.3082420821271324e-05, "loss": 1.2606, "step": 12750 }, { "epoch": 0.8521512560128274, "grad_norm": 0.16796875, "learning_rate": 1.3024817995146032e-05, "loss": 1.2375, "step": 12755 }, { "epoch": 0.8524853019775521, "grad_norm": 0.1787109375, "learning_rate": 1.2967333428341121e-05, "loss": 1.1506, "step": 12760 }, { "epoch": 0.8528193479422769, "grad_norm": 0.173828125, "learning_rate": 1.290996719901777e-05, "loss": 1.2125, "step": 12765 }, { "epoch": 0.8531533939070016, "grad_norm": 0.177734375, "learning_rate": 1.2852719385176303e-05, "loss": 1.231, "step": 12770 }, { "epoch": 0.8534874398717264, "grad_norm": 0.1689453125, "learning_rate": 1.279559006465607e-05, "loss": 1.2045, "step": 12775 }, { "epoch": 0.8538214858364511, "grad_norm": 0.1708984375, "learning_rate": 1.2738579315135224e-05, "loss": 1.1227, "step": 12780 }, { "epoch": 0.8541555318011759, "grad_norm": 0.1640625, "learning_rate": 1.268168721413071e-05, "loss": 1.1242, "step": 12785 }, { "epoch": 0.8544895777659006, "grad_norm": 0.1796875, "learning_rate": 1.262491383899823e-05, "loss": 1.1677, "step": 12790 }, { "epoch": 0.8548236237306254, "grad_norm": 0.216796875, "learning_rate": 1.2568259266931958e-05, "loss": 1.2305, "step": 12795 }, { "epoch": 0.8551576696953501, "grad_norm": 0.1728515625, "learning_rate": 1.251172357496455e-05, "loss": 1.1436, "step": 12800 }, { "epoch": 0.8554917156600749, "grad_norm": 0.19140625, "learning_rate": 1.2455306839967085e-05, "loss": 1.178, "step": 12805 }, { "epoch": 0.8558257616247996, "grad_norm": 0.1708984375, "learning_rate": 1.2399009138648854e-05, "loss": 1.1356, "step": 12810 }, { "epoch": 0.8561598075895244, "grad_norm": 0.1796875, "learning_rate": 1.2342830547557305e-05, "loss": 1.2149, "step": 12815 }, { "epoch": 0.8564938535542491, "grad_norm": 0.1669921875, "learning_rate": 1.228677114307788e-05, "loss": 1.185, "step": 12820 }, { "epoch": 0.8568278995189738, "grad_norm": 0.185546875, "learning_rate": 1.2230831001434084e-05, "loss": 1.2487, "step": 12825 }, { "epoch": 0.8571619454836985, "grad_norm": 0.173828125, "learning_rate": 1.2175010198687143e-05, "loss": 1.1808, "step": 12830 }, { "epoch": 0.8574959914484233, "grad_norm": 0.1728515625, "learning_rate": 1.2119308810736064e-05, "loss": 1.234, "step": 12835 }, { "epoch": 0.857830037413148, "grad_norm": 0.1728515625, "learning_rate": 1.2063726913317508e-05, "loss": 1.1794, "step": 12840 }, { "epoch": 0.8581640833778728, "grad_norm": 0.169921875, "learning_rate": 1.2008264582005657e-05, "loss": 1.2039, "step": 12845 }, { "epoch": 0.8584981293425975, "grad_norm": 0.1748046875, "learning_rate": 1.195292189221211e-05, "loss": 1.1237, "step": 12850 }, { "epoch": 0.8588321753073223, "grad_norm": 0.1767578125, "learning_rate": 1.189769891918575e-05, "loss": 1.2079, "step": 12855 }, { "epoch": 0.859166221272047, "grad_norm": 0.2080078125, "learning_rate": 1.1842595738012774e-05, "loss": 1.1594, "step": 12860 }, { "epoch": 0.8595002672367718, "grad_norm": 0.173828125, "learning_rate": 1.1787612423616412e-05, "loss": 1.2106, "step": 12865 }, { "epoch": 0.8598343132014965, "grad_norm": 0.1845703125, "learning_rate": 1.1732749050756987e-05, "loss": 1.2124, "step": 12870 }, { "epoch": 0.8601683591662213, "grad_norm": 0.173828125, "learning_rate": 1.1678005694031657e-05, "loss": 1.2354, "step": 12875 }, { "epoch": 0.860502405130946, "grad_norm": 0.1806640625, "learning_rate": 1.1623382427874474e-05, "loss": 1.1976, "step": 12880 }, { "epoch": 0.8608364510956708, "grad_norm": 0.185546875, "learning_rate": 1.156887932655616e-05, "loss": 1.1636, "step": 12885 }, { "epoch": 0.8611704970603955, "grad_norm": 0.1708984375, "learning_rate": 1.1514496464184056e-05, "loss": 1.2191, "step": 12890 }, { "epoch": 0.8615045430251203, "grad_norm": 0.1728515625, "learning_rate": 1.1460233914701968e-05, "loss": 1.2312, "step": 12895 }, { "epoch": 0.861838588989845, "grad_norm": 0.1767578125, "learning_rate": 1.1406091751890257e-05, "loss": 1.2598, "step": 12900 }, { "epoch": 0.8621726349545697, "grad_norm": 0.1689453125, "learning_rate": 1.135207004936546e-05, "loss": 1.175, "step": 12905 }, { "epoch": 0.8625066809192945, "grad_norm": 0.16796875, "learning_rate": 1.1298168880580362e-05, "loss": 1.2295, "step": 12910 }, { "epoch": 0.8628407268840192, "grad_norm": 0.1640625, "learning_rate": 1.1244388318823851e-05, "loss": 1.173, "step": 12915 }, { "epoch": 0.863174772848744, "grad_norm": 0.2236328125, "learning_rate": 1.1190728437220877e-05, "loss": 1.2834, "step": 12920 }, { "epoch": 0.8635088188134687, "grad_norm": 0.173828125, "learning_rate": 1.113718930873222e-05, "loss": 1.2072, "step": 12925 }, { "epoch": 0.8638428647781935, "grad_norm": 0.177734375, "learning_rate": 1.1083771006154553e-05, "loss": 1.2174, "step": 12930 }, { "epoch": 0.8641769107429182, "grad_norm": 0.171875, "learning_rate": 1.103047360212024e-05, "loss": 1.1909, "step": 12935 }, { "epoch": 0.864510956707643, "grad_norm": 0.1767578125, "learning_rate": 1.0977297169097234e-05, "loss": 1.1942, "step": 12940 }, { "epoch": 0.8648450026723677, "grad_norm": 0.1708984375, "learning_rate": 1.0924241779389011e-05, "loss": 1.1526, "step": 12945 }, { "epoch": 0.8651790486370925, "grad_norm": 0.1708984375, "learning_rate": 1.0871307505134476e-05, "loss": 1.2266, "step": 12950 }, { "epoch": 0.8655130946018172, "grad_norm": 0.1669921875, "learning_rate": 1.0818494418307845e-05, "loss": 1.2367, "step": 12955 }, { "epoch": 0.865847140566542, "grad_norm": 0.1689453125, "learning_rate": 1.076580259071861e-05, "loss": 1.2017, "step": 12960 }, { "epoch": 0.8661811865312667, "grad_norm": 0.1728515625, "learning_rate": 1.0713232094011316e-05, "loss": 1.1665, "step": 12965 }, { "epoch": 0.8665152324959915, "grad_norm": 0.1689453125, "learning_rate": 1.0660782999665542e-05, "loss": 1.2247, "step": 12970 }, { "epoch": 0.8668492784607162, "grad_norm": 0.173828125, "learning_rate": 1.0608455378995851e-05, "loss": 1.1821, "step": 12975 }, { "epoch": 0.867183324425441, "grad_norm": 0.1689453125, "learning_rate": 1.0556249303151599e-05, "loss": 1.1619, "step": 12980 }, { "epoch": 0.8675173703901656, "grad_norm": 0.177734375, "learning_rate": 1.050416484311686e-05, "loss": 1.1964, "step": 12985 }, { "epoch": 0.8678514163548904, "grad_norm": 0.1728515625, "learning_rate": 1.0452202069710393e-05, "loss": 1.1573, "step": 12990 }, { "epoch": 0.8681854623196151, "grad_norm": 0.1708984375, "learning_rate": 1.0400361053585506e-05, "loss": 1.1311, "step": 12995 }, { "epoch": 0.8685195082843399, "grad_norm": 0.1748046875, "learning_rate": 1.0348641865229914e-05, "loss": 1.1306, "step": 13000 }, { "epoch": 0.8688535542490646, "grad_norm": 0.1728515625, "learning_rate": 1.0297044574965675e-05, "loss": 1.2016, "step": 13005 }, { "epoch": 0.8691876002137894, "grad_norm": 0.1884765625, "learning_rate": 1.024556925294916e-05, "loss": 1.2006, "step": 13010 }, { "epoch": 0.8695216461785141, "grad_norm": 0.1748046875, "learning_rate": 1.0194215969170872e-05, "loss": 1.1202, "step": 13015 }, { "epoch": 0.8698556921432389, "grad_norm": 0.1748046875, "learning_rate": 1.0142984793455346e-05, "loss": 1.2478, "step": 13020 }, { "epoch": 0.8701897381079636, "grad_norm": 0.171875, "learning_rate": 1.0091875795461147e-05, "loss": 1.1605, "step": 13025 }, { "epoch": 0.8705237840726884, "grad_norm": 0.19140625, "learning_rate": 1.00408890446807e-05, "loss": 1.2451, "step": 13030 }, { "epoch": 0.8708578300374131, "grad_norm": 0.173828125, "learning_rate": 9.990024610440185e-06, "loss": 1.2272, "step": 13035 }, { "epoch": 0.8711918760021379, "grad_norm": 0.16796875, "learning_rate": 9.939282561899466e-06, "loss": 1.112, "step": 13040 }, { "epoch": 0.8715259219668626, "grad_norm": 0.1728515625, "learning_rate": 9.888662968052053e-06, "loss": 1.2417, "step": 13045 }, { "epoch": 0.8718599679315874, "grad_norm": 0.18359375, "learning_rate": 9.838165897724894e-06, "loss": 1.2227, "step": 13050 }, { "epoch": 0.8721940138963121, "grad_norm": 0.16796875, "learning_rate": 9.787791419578407e-06, "loss": 1.2224, "step": 13055 }, { "epoch": 0.8725280598610369, "grad_norm": 0.1630859375, "learning_rate": 9.737539602106238e-06, "loss": 1.1799, "step": 13060 }, { "epoch": 0.8728621058257616, "grad_norm": 0.1962890625, "learning_rate": 9.687410513635354e-06, "loss": 1.2489, "step": 13065 }, { "epoch": 0.8731961517904864, "grad_norm": 0.1728515625, "learning_rate": 9.637404222325763e-06, "loss": 1.1243, "step": 13070 }, { "epoch": 0.8735301977552111, "grad_norm": 0.162109375, "learning_rate": 9.587520796170524e-06, "loss": 1.1682, "step": 13075 }, { "epoch": 0.8738642437199359, "grad_norm": 0.1708984375, "learning_rate": 9.53776030299568e-06, "loss": 1.1071, "step": 13080 }, { "epoch": 0.8741982896846606, "grad_norm": 0.1748046875, "learning_rate": 9.488122810460097e-06, "loss": 1.2338, "step": 13085 }, { "epoch": 0.8745323356493854, "grad_norm": 0.1669921875, "learning_rate": 9.438608386055403e-06, "loss": 1.23, "step": 13090 }, { "epoch": 0.8748663816141101, "grad_norm": 0.1630859375, "learning_rate": 9.38921709710585e-06, "loss": 1.1198, "step": 13095 }, { "epoch": 0.8752004275788349, "grad_norm": 0.1767578125, "learning_rate": 9.339949010768346e-06, "loss": 1.2296, "step": 13100 }, { "epoch": 0.8755344735435596, "grad_norm": 0.1943359375, "learning_rate": 9.290804194032199e-06, "loss": 1.1535, "step": 13105 }, { "epoch": 0.8758685195082844, "grad_norm": 0.17578125, "learning_rate": 9.24178271371915e-06, "loss": 1.2012, "step": 13110 }, { "epoch": 0.8762025654730091, "grad_norm": 0.1923828125, "learning_rate": 9.192884636483246e-06, "loss": 1.2506, "step": 13115 }, { "epoch": 0.8765366114377339, "grad_norm": 0.1875, "learning_rate": 9.144110028810737e-06, "loss": 1.2568, "step": 13120 }, { "epoch": 0.8768706574024586, "grad_norm": 0.1728515625, "learning_rate": 9.095458957019987e-06, "loss": 1.1589, "step": 13125 }, { "epoch": 0.8772047033671834, "grad_norm": 0.173828125, "learning_rate": 9.04693148726139e-06, "loss": 1.2607, "step": 13130 }, { "epoch": 0.8775387493319081, "grad_norm": 0.185546875, "learning_rate": 8.998527685517255e-06, "loss": 1.2026, "step": 13135 }, { "epoch": 0.8778727952966329, "grad_norm": 0.16796875, "learning_rate": 8.950247617601793e-06, "loss": 1.1373, "step": 13140 }, { "epoch": 0.8782068412613576, "grad_norm": 0.158203125, "learning_rate": 8.902091349160968e-06, "loss": 1.1837, "step": 13145 }, { "epoch": 0.8785408872260823, "grad_norm": 0.169921875, "learning_rate": 8.854058945672372e-06, "loss": 1.1745, "step": 13150 }, { "epoch": 0.878874933190807, "grad_norm": 0.1689453125, "learning_rate": 8.806150472445185e-06, "loss": 1.2169, "step": 13155 }, { "epoch": 0.8792089791555318, "grad_norm": 0.181640625, "learning_rate": 8.758365994620132e-06, "loss": 1.1807, "step": 13160 }, { "epoch": 0.8795430251202565, "grad_norm": 0.1650390625, "learning_rate": 8.710705577169299e-06, "loss": 1.1301, "step": 13165 }, { "epoch": 0.8798770710849813, "grad_norm": 0.17578125, "learning_rate": 8.663169284896078e-06, "loss": 1.2744, "step": 13170 }, { "epoch": 0.880211117049706, "grad_norm": 0.1787109375, "learning_rate": 8.615757182435124e-06, "loss": 1.1536, "step": 13175 }, { "epoch": 0.8805451630144308, "grad_norm": 0.177734375, "learning_rate": 8.568469334252238e-06, "loss": 1.2433, "step": 13180 }, { "epoch": 0.8808792089791555, "grad_norm": 0.185546875, "learning_rate": 8.52130580464423e-06, "loss": 1.2025, "step": 13185 }, { "epoch": 0.8812132549438803, "grad_norm": 0.1640625, "learning_rate": 8.474266657738895e-06, "loss": 1.069, "step": 13190 }, { "epoch": 0.881547300908605, "grad_norm": 0.171875, "learning_rate": 8.427351957494921e-06, "loss": 1.1772, "step": 13195 }, { "epoch": 0.8818813468733298, "grad_norm": 0.1796875, "learning_rate": 8.380561767701778e-06, "loss": 1.2106, "step": 13200 }, { "epoch": 0.8822153928380545, "grad_norm": 0.208984375, "learning_rate": 8.333896151979636e-06, "loss": 1.2206, "step": 13205 }, { "epoch": 0.8825494388027793, "grad_norm": 0.1845703125, "learning_rate": 8.287355173779265e-06, "loss": 1.1584, "step": 13210 }, { "epoch": 0.882883484767504, "grad_norm": 0.181640625, "learning_rate": 8.240938896382022e-06, "loss": 1.1826, "step": 13215 }, { "epoch": 0.8832175307322288, "grad_norm": 0.2099609375, "learning_rate": 8.194647382899656e-06, "loss": 1.2083, "step": 13220 }, { "epoch": 0.8835515766969535, "grad_norm": 0.1650390625, "learning_rate": 8.148480696274275e-06, "loss": 1.2252, "step": 13225 }, { "epoch": 0.8838856226616782, "grad_norm": 0.1708984375, "learning_rate": 8.102438899278298e-06, "loss": 1.1475, "step": 13230 }, { "epoch": 0.884219668626403, "grad_norm": 0.185546875, "learning_rate": 8.056522054514337e-06, "loss": 1.2059, "step": 13235 }, { "epoch": 0.8845537145911277, "grad_norm": 0.1728515625, "learning_rate": 8.010730224415064e-06, "loss": 1.2262, "step": 13240 }, { "epoch": 0.8848877605558525, "grad_norm": 0.166015625, "learning_rate": 7.965063471243161e-06, "loss": 1.1197, "step": 13245 }, { "epoch": 0.8852218065205772, "grad_norm": 0.173828125, "learning_rate": 7.919521857091328e-06, "loss": 1.1822, "step": 13250 }, { "epoch": 0.885555852485302, "grad_norm": 0.1611328125, "learning_rate": 7.874105443882041e-06, "loss": 1.1623, "step": 13255 }, { "epoch": 0.8858898984500267, "grad_norm": 0.171875, "learning_rate": 7.828814293367525e-06, "loss": 1.2486, "step": 13260 }, { "epoch": 0.8862239444147515, "grad_norm": 0.1591796875, "learning_rate": 7.783648467129767e-06, "loss": 1.1448, "step": 13265 }, { "epoch": 0.8865579903794762, "grad_norm": 0.1611328125, "learning_rate": 7.738608026580296e-06, "loss": 1.1805, "step": 13270 }, { "epoch": 0.886892036344201, "grad_norm": 0.171875, "learning_rate": 7.693693032960181e-06, "loss": 1.2759, "step": 13275 }, { "epoch": 0.8872260823089257, "grad_norm": 0.1796875, "learning_rate": 7.648903547339858e-06, "loss": 1.1487, "step": 13280 }, { "epoch": 0.8875601282736505, "grad_norm": 0.1865234375, "learning_rate": 7.604239630619225e-06, "loss": 1.1906, "step": 13285 }, { "epoch": 0.8878941742383752, "grad_norm": 0.169921875, "learning_rate": 7.559701343527348e-06, "loss": 1.1496, "step": 13290 }, { "epoch": 0.8882282202031, "grad_norm": 0.16796875, "learning_rate": 7.515288746622495e-06, "loss": 1.1543, "step": 13295 }, { "epoch": 0.8885622661678247, "grad_norm": 0.17578125, "learning_rate": 7.471001900292063e-06, "loss": 1.1974, "step": 13300 }, { "epoch": 0.8888963121325495, "grad_norm": 0.1650390625, "learning_rate": 7.42684086475246e-06, "loss": 1.2546, "step": 13305 }, { "epoch": 0.8892303580972741, "grad_norm": 0.173828125, "learning_rate": 7.382805700049023e-06, "loss": 1.2219, "step": 13310 }, { "epoch": 0.8895644040619989, "grad_norm": 0.1650390625, "learning_rate": 7.338896466055934e-06, "loss": 1.1687, "step": 13315 }, { "epoch": 0.8898984500267236, "grad_norm": 0.166015625, "learning_rate": 7.295113222476147e-06, "loss": 1.1647, "step": 13320 }, { "epoch": 0.8902324959914484, "grad_norm": 0.16015625, "learning_rate": 7.251456028841319e-06, "loss": 1.171, "step": 13325 }, { "epoch": 0.8905665419561731, "grad_norm": 0.1796875, "learning_rate": 7.207924944511757e-06, "loss": 1.1786, "step": 13330 }, { "epoch": 0.8909005879208979, "grad_norm": 0.1748046875, "learning_rate": 7.1645200286762245e-06, "loss": 1.2024, "step": 13335 }, { "epoch": 0.8912346338856226, "grad_norm": 0.166015625, "learning_rate": 7.121241340351947e-06, "loss": 1.2028, "step": 13340 }, { "epoch": 0.8915686798503474, "grad_norm": 0.2001953125, "learning_rate": 7.078088938384597e-06, "loss": 1.2511, "step": 13345 }, { "epoch": 0.8919027258150721, "grad_norm": 0.173828125, "learning_rate": 7.035062881448051e-06, "loss": 1.1716, "step": 13350 }, { "epoch": 0.8922367717797969, "grad_norm": 0.158203125, "learning_rate": 6.99216322804439e-06, "loss": 1.1883, "step": 13355 }, { "epoch": 0.8925708177445216, "grad_norm": 0.177734375, "learning_rate": 6.9493900365039335e-06, "loss": 1.2068, "step": 13360 }, { "epoch": 0.8929048637092464, "grad_norm": 0.181640625, "learning_rate": 6.9067433649849465e-06, "loss": 1.2287, "step": 13365 }, { "epoch": 0.8932389096739711, "grad_norm": 0.173828125, "learning_rate": 6.864223271473702e-06, "loss": 1.2195, "step": 13370 }, { "epoch": 0.8935729556386959, "grad_norm": 0.2216796875, "learning_rate": 6.821829813784331e-06, "loss": 1.2318, "step": 13375 }, { "epoch": 0.8939070016034206, "grad_norm": 0.1669921875, "learning_rate": 6.779563049558857e-06, "loss": 1.1857, "step": 13380 }, { "epoch": 0.8942410475681454, "grad_norm": 0.162109375, "learning_rate": 6.7374230362669655e-06, "loss": 1.1986, "step": 13385 }, { "epoch": 0.8945750935328701, "grad_norm": 0.1708984375, "learning_rate": 6.695409831206045e-06, "loss": 1.1192, "step": 13390 }, { "epoch": 0.8949091394975949, "grad_norm": 0.1767578125, "learning_rate": 6.653523491501035e-06, "loss": 1.1814, "step": 13395 }, { "epoch": 0.8952431854623196, "grad_norm": 0.173828125, "learning_rate": 6.611764074104409e-06, "loss": 1.2354, "step": 13400 }, { "epoch": 0.8955772314270444, "grad_norm": 0.177734375, "learning_rate": 6.5701316357960285e-06, "loss": 1.1647, "step": 13405 }, { "epoch": 0.8959112773917691, "grad_norm": 0.18359375, "learning_rate": 6.52862623318311e-06, "loss": 1.1939, "step": 13410 }, { "epoch": 0.8962453233564939, "grad_norm": 0.1796875, "learning_rate": 6.487247922700157e-06, "loss": 1.1808, "step": 13415 }, { "epoch": 0.8965793693212186, "grad_norm": 0.1708984375, "learning_rate": 6.445996760608896e-06, "loss": 1.2396, "step": 13420 }, { "epoch": 0.8969134152859434, "grad_norm": 0.16796875, "learning_rate": 6.404872802998108e-06, "loss": 1.182, "step": 13425 }, { "epoch": 0.8972474612506681, "grad_norm": 0.16015625, "learning_rate": 6.3638761057836285e-06, "loss": 1.1529, "step": 13430 }, { "epoch": 0.8975815072153929, "grad_norm": 0.1748046875, "learning_rate": 6.323006724708302e-06, "loss": 1.2241, "step": 13435 }, { "epoch": 0.8979155531801176, "grad_norm": 0.1845703125, "learning_rate": 6.2822647153418235e-06, "loss": 1.2137, "step": 13440 }, { "epoch": 0.8982495991448424, "grad_norm": 0.1650390625, "learning_rate": 6.24165013308069e-06, "loss": 1.1781, "step": 13445 }, { "epoch": 0.8985836451095671, "grad_norm": 0.2060546875, "learning_rate": 6.2011630331481826e-06, "loss": 1.2033, "step": 13450 }, { "epoch": 0.8989176910742919, "grad_norm": 0.1884765625, "learning_rate": 6.160803470594234e-06, "loss": 1.3137, "step": 13455 }, { "epoch": 0.8992517370390166, "grad_norm": 0.1669921875, "learning_rate": 6.120571500295347e-06, "loss": 1.2285, "step": 13460 }, { "epoch": 0.8995857830037414, "grad_norm": 0.16796875, "learning_rate": 6.080467176954519e-06, "loss": 1.1531, "step": 13465 }, { "epoch": 0.899919828968466, "grad_norm": 0.1669921875, "learning_rate": 6.040490555101241e-06, "loss": 1.1854, "step": 13470 }, { "epoch": 0.9002538749331908, "grad_norm": 0.1689453125, "learning_rate": 6.000641689091324e-06, "loss": 1.1357, "step": 13475 }, { "epoch": 0.9005879208979155, "grad_norm": 0.1689453125, "learning_rate": 5.960920633106926e-06, "loss": 1.2068, "step": 13480 }, { "epoch": 0.9009219668626403, "grad_norm": 0.173828125, "learning_rate": 5.921327441156321e-06, "loss": 1.2706, "step": 13485 }, { "epoch": 0.901256012827365, "grad_norm": 0.171875, "learning_rate": 5.8818621670740595e-06, "loss": 1.2044, "step": 13490 }, { "epoch": 0.9015900587920898, "grad_norm": 0.169921875, "learning_rate": 5.84252486452066e-06, "loss": 1.1731, "step": 13495 }, { "epoch": 0.9019241047568145, "grad_norm": 0.1796875, "learning_rate": 5.803315586982671e-06, "loss": 1.1598, "step": 13500 }, { "epoch": 0.9022581507215393, "grad_norm": 0.171875, "learning_rate": 5.764234387772593e-06, "loss": 1.1737, "step": 13505 }, { "epoch": 0.902592196686264, "grad_norm": 0.1572265625, "learning_rate": 5.7252813200287306e-06, "loss": 1.1653, "step": 13510 }, { "epoch": 0.9029262426509888, "grad_norm": 0.1552734375, "learning_rate": 5.686456436715227e-06, "loss": 1.1973, "step": 13515 }, { "epoch": 0.9032602886157135, "grad_norm": 0.173828125, "learning_rate": 5.647759790621876e-06, "loss": 1.2162, "step": 13520 }, { "epoch": 0.9035943345804383, "grad_norm": 0.1826171875, "learning_rate": 5.609191434364159e-06, "loss": 1.2118, "step": 13525 }, { "epoch": 0.903928380545163, "grad_norm": 0.1708984375, "learning_rate": 5.5707514203830915e-06, "loss": 1.2077, "step": 13530 }, { "epoch": 0.9042624265098878, "grad_norm": 0.1728515625, "learning_rate": 5.532439800945188e-06, "loss": 1.2367, "step": 13535 }, { "epoch": 0.9045964724746125, "grad_norm": 0.177734375, "learning_rate": 5.494256628142358e-06, "loss": 1.2476, "step": 13540 }, { "epoch": 0.9049305184393373, "grad_norm": 0.177734375, "learning_rate": 5.45620195389196e-06, "loss": 1.1336, "step": 13545 }, { "epoch": 0.905264564404062, "grad_norm": 0.1650390625, "learning_rate": 5.418275829936537e-06, "loss": 1.2059, "step": 13550 }, { "epoch": 0.9055986103687867, "grad_norm": 0.1845703125, "learning_rate": 5.380478307843872e-06, "loss": 1.181, "step": 13555 }, { "epoch": 0.9059326563335115, "grad_norm": 0.162109375, "learning_rate": 5.3428094390068906e-06, "loss": 1.1992, "step": 13560 }, { "epoch": 0.9062667022982362, "grad_norm": 0.1728515625, "learning_rate": 5.3052692746436095e-06, "loss": 1.1258, "step": 13565 }, { "epoch": 0.906600748262961, "grad_norm": 0.1650390625, "learning_rate": 5.2678578657970105e-06, "loss": 1.1891, "step": 13570 }, { "epoch": 0.9069347942276857, "grad_norm": 0.169921875, "learning_rate": 5.2305752633350355e-06, "loss": 1.1366, "step": 13575 }, { "epoch": 0.9072688401924105, "grad_norm": 0.1650390625, "learning_rate": 5.193421517950481e-06, "loss": 1.1117, "step": 13580 }, { "epoch": 0.9076028861571352, "grad_norm": 0.1796875, "learning_rate": 5.1563966801609465e-06, "loss": 1.228, "step": 13585 }, { "epoch": 0.90793693212186, "grad_norm": 0.1640625, "learning_rate": 5.119500800308741e-06, "loss": 1.2213, "step": 13590 }, { "epoch": 0.9082709780865847, "grad_norm": 0.169921875, "learning_rate": 5.082733928560835e-06, "loss": 1.2232, "step": 13595 }, { "epoch": 0.9086050240513095, "grad_norm": 0.169921875, "learning_rate": 5.0460961149087824e-06, "loss": 1.1233, "step": 13600 }, { "epoch": 0.9089390700160342, "grad_norm": 0.173828125, "learning_rate": 5.009587409168703e-06, "loss": 1.2048, "step": 13605 }, { "epoch": 0.909273115980759, "grad_norm": 0.169921875, "learning_rate": 4.9732078609811e-06, "loss": 1.1446, "step": 13610 }, { "epoch": 0.9096071619454837, "grad_norm": 0.1806640625, "learning_rate": 4.936957519810892e-06, "loss": 1.2648, "step": 13615 }, { "epoch": 0.9099412079102085, "grad_norm": 0.169921875, "learning_rate": 4.900836434947353e-06, "loss": 1.2087, "step": 13620 }, { "epoch": 0.9102752538749332, "grad_norm": 0.171875, "learning_rate": 4.864844655503953e-06, "loss": 1.2085, "step": 13625 }, { "epoch": 0.9106092998396579, "grad_norm": 0.1806640625, "learning_rate": 4.8289822304183665e-06, "loss": 1.2029, "step": 13630 }, { "epoch": 0.9109433458043826, "grad_norm": 0.169921875, "learning_rate": 4.793249208452388e-06, "loss": 1.1732, "step": 13635 }, { "epoch": 0.9112773917691074, "grad_norm": 0.1923828125, "learning_rate": 4.757645638191876e-06, "loss": 1.2723, "step": 13640 }, { "epoch": 0.9116114377338321, "grad_norm": 0.1748046875, "learning_rate": 4.722171568046674e-06, "loss": 1.2018, "step": 13645 }, { "epoch": 0.9119454836985569, "grad_norm": 0.171875, "learning_rate": 4.686827046250497e-06, "loss": 1.1453, "step": 13650 }, { "epoch": 0.9122795296632816, "grad_norm": 0.16796875, "learning_rate": 4.651612120860993e-06, "loss": 1.2069, "step": 13655 }, { "epoch": 0.9126135756280064, "grad_norm": 0.1689453125, "learning_rate": 4.616526839759516e-06, "loss": 1.2072, "step": 13660 }, { "epoch": 0.9129476215927311, "grad_norm": 0.17578125, "learning_rate": 4.58157125065124e-06, "loss": 1.2006, "step": 13665 }, { "epoch": 0.9132816675574559, "grad_norm": 0.173828125, "learning_rate": 4.546745401064889e-06, "loss": 1.237, "step": 13670 }, { "epoch": 0.9136157135221806, "grad_norm": 0.173828125, "learning_rate": 4.512049338352875e-06, "loss": 1.1559, "step": 13675 }, { "epoch": 0.9139497594869054, "grad_norm": 0.1669921875, "learning_rate": 4.477483109691083e-06, "loss": 1.2091, "step": 13680 }, { "epoch": 0.9142838054516301, "grad_norm": 0.1640625, "learning_rate": 4.443046762078884e-06, "loss": 1.1398, "step": 13685 }, { "epoch": 0.9146178514163549, "grad_norm": 0.1796875, "learning_rate": 4.408740342339046e-06, "loss": 1.1943, "step": 13690 }, { "epoch": 0.9149518973810796, "grad_norm": 0.1708984375, "learning_rate": 4.3745638971177005e-06, "loss": 1.1941, "step": 13695 }, { "epoch": 0.9152859433458044, "grad_norm": 0.1865234375, "learning_rate": 4.34051747288422e-06, "loss": 1.2382, "step": 13700 }, { "epoch": 0.9156199893105291, "grad_norm": 0.1669921875, "learning_rate": 4.306601115931175e-06, "loss": 1.1271, "step": 13705 }, { "epoch": 0.9159540352752539, "grad_norm": 0.169921875, "learning_rate": 4.2728148723743536e-06, "loss": 1.2612, "step": 13710 }, { "epoch": 0.9162880812399786, "grad_norm": 0.1787109375, "learning_rate": 4.239158788152564e-06, "loss": 1.1464, "step": 13715 }, { "epoch": 0.9166221272047034, "grad_norm": 0.1689453125, "learning_rate": 4.205632909027668e-06, "loss": 1.1814, "step": 13720 }, { "epoch": 0.9169561731694281, "grad_norm": 0.1708984375, "learning_rate": 4.172237280584445e-06, "loss": 1.2345, "step": 13725 }, { "epoch": 0.9172902191341529, "grad_norm": 0.1767578125, "learning_rate": 4.138971948230674e-06, "loss": 1.1822, "step": 13730 }, { "epoch": 0.9176242650988776, "grad_norm": 0.173828125, "learning_rate": 4.105836957196873e-06, "loss": 1.1968, "step": 13735 }, { "epoch": 0.9179583110636024, "grad_norm": 0.1611328125, "learning_rate": 4.072832352536382e-06, "loss": 1.1399, "step": 13740 }, { "epoch": 0.9182923570283271, "grad_norm": 0.16015625, "learning_rate": 4.039958179125225e-06, "loss": 1.1361, "step": 13745 }, { "epoch": 0.9186264029930519, "grad_norm": 0.1748046875, "learning_rate": 4.007214481662125e-06, "loss": 1.1929, "step": 13750 }, { "epoch": 0.9189604489577766, "grad_norm": 0.171875, "learning_rate": 3.974601304668346e-06, "loss": 1.1354, "step": 13755 }, { "epoch": 0.9192944949225014, "grad_norm": 0.162109375, "learning_rate": 3.94211869248774e-06, "loss": 1.1908, "step": 13760 }, { "epoch": 0.9196285408872261, "grad_norm": 0.1767578125, "learning_rate": 3.909766689286576e-06, "loss": 1.1672, "step": 13765 }, { "epoch": 0.9199625868519509, "grad_norm": 0.169921875, "learning_rate": 3.877545339053601e-06, "loss": 1.1175, "step": 13770 }, { "epoch": 0.9202966328166756, "grad_norm": 0.177734375, "learning_rate": 3.845454685599847e-06, "loss": 1.1686, "step": 13775 }, { "epoch": 0.9206306787814004, "grad_norm": 0.171875, "learning_rate": 3.813494772558657e-06, "loss": 1.1441, "step": 13780 }, { "epoch": 0.9209647247461251, "grad_norm": 0.1806640625, "learning_rate": 3.7816656433856348e-06, "loss": 1.1806, "step": 13785 }, { "epoch": 0.9212987707108499, "grad_norm": 0.1787109375, "learning_rate": 3.7499673413585513e-06, "loss": 1.1616, "step": 13790 }, { "epoch": 0.9216328166755745, "grad_norm": 0.166015625, "learning_rate": 3.718399909577275e-06, "loss": 1.1416, "step": 13795 }, { "epoch": 0.9219668626402993, "grad_norm": 0.166015625, "learning_rate": 3.686963390963727e-06, "loss": 1.1897, "step": 13800 }, { "epoch": 0.922300908605024, "grad_norm": 0.18359375, "learning_rate": 3.65565782826186e-06, "loss": 1.1332, "step": 13805 }, { "epoch": 0.9226349545697488, "grad_norm": 0.1650390625, "learning_rate": 3.624483264037537e-06, "loss": 1.1628, "step": 13810 }, { "epoch": 0.9229690005344735, "grad_norm": 0.162109375, "learning_rate": 3.593439740678506e-06, "loss": 1.2139, "step": 13815 }, { "epoch": 0.9233030464991983, "grad_norm": 0.169921875, "learning_rate": 3.5625273003943583e-06, "loss": 1.176, "step": 13820 }, { "epoch": 0.923637092463923, "grad_norm": 0.1796875, "learning_rate": 3.5317459852164502e-06, "loss": 1.2045, "step": 13825 }, { "epoch": 0.9239711384286478, "grad_norm": 0.1669921875, "learning_rate": 3.501095836997825e-06, "loss": 1.2195, "step": 13830 }, { "epoch": 0.9243051843933725, "grad_norm": 0.1689453125, "learning_rate": 3.4705768974132024e-06, "loss": 1.1469, "step": 13835 }, { "epoch": 0.9246392303580973, "grad_norm": 0.1650390625, "learning_rate": 3.4401892079588993e-06, "loss": 1.1442, "step": 13840 }, { "epoch": 0.924973276322822, "grad_norm": 0.18359375, "learning_rate": 3.4099328099527427e-06, "loss": 1.2137, "step": 13845 }, { "epoch": 0.9253073222875468, "grad_norm": 0.171875, "learning_rate": 3.3798077445340915e-06, "loss": 1.2369, "step": 13850 }, { "epoch": 0.9256413682522715, "grad_norm": 0.1689453125, "learning_rate": 3.34981405266368e-06, "loss": 1.1292, "step": 13855 }, { "epoch": 0.9259754142169963, "grad_norm": 0.1826171875, "learning_rate": 3.3199517751236753e-06, "loss": 1.1946, "step": 13860 }, { "epoch": 0.926309460181721, "grad_norm": 0.16796875, "learning_rate": 3.29022095251752e-06, "loss": 1.156, "step": 13865 }, { "epoch": 0.9266435061464458, "grad_norm": 0.162109375, "learning_rate": 3.2606216252699106e-06, "loss": 1.2873, "step": 13870 }, { "epoch": 0.9269775521111705, "grad_norm": 0.173828125, "learning_rate": 3.2311538336267766e-06, "loss": 1.2188, "step": 13875 }, { "epoch": 0.9273115980758952, "grad_norm": 0.1640625, "learning_rate": 3.2018176176552116e-06, "loss": 1.1445, "step": 13880 }, { "epoch": 0.92764564404062, "grad_norm": 0.1669921875, "learning_rate": 3.1726130172433755e-06, "loss": 1.1774, "step": 13885 }, { "epoch": 0.9279796900053447, "grad_norm": 0.177734375, "learning_rate": 3.143540072100459e-06, "loss": 1.2333, "step": 13890 }, { "epoch": 0.9283137359700695, "grad_norm": 0.171875, "learning_rate": 3.114598821756698e-06, "loss": 1.2043, "step": 13895 }, { "epoch": 0.9286477819347942, "grad_norm": 0.177734375, "learning_rate": 3.0857893055632246e-06, "loss": 1.2579, "step": 13900 }, { "epoch": 0.928981827899519, "grad_norm": 0.166015625, "learning_rate": 3.0571115626920498e-06, "loss": 1.1417, "step": 13905 }, { "epoch": 0.9293158738642437, "grad_norm": 0.171875, "learning_rate": 3.0285656321360267e-06, "loss": 1.1325, "step": 13910 }, { "epoch": 0.9296499198289685, "grad_norm": 0.1640625, "learning_rate": 3.0001515527088074e-06, "loss": 1.1791, "step": 13915 }, { "epoch": 0.9299839657936932, "grad_norm": 0.1640625, "learning_rate": 2.9718693630447214e-06, "loss": 1.1859, "step": 13920 }, { "epoch": 0.930318011758418, "grad_norm": 0.169921875, "learning_rate": 2.9437191015987854e-06, "loss": 1.1939, "step": 13925 }, { "epoch": 0.9306520577231427, "grad_norm": 0.1845703125, "learning_rate": 2.9157008066466707e-06, "loss": 1.2171, "step": 13930 }, { "epoch": 0.9309861036878675, "grad_norm": 0.1708984375, "learning_rate": 2.8878145162845373e-06, "loss": 1.2151, "step": 13935 }, { "epoch": 0.9313201496525922, "grad_norm": 0.1796875, "learning_rate": 2.8600602684291546e-06, "loss": 1.2412, "step": 13940 }, { "epoch": 0.931654195617317, "grad_norm": 0.171875, "learning_rate": 2.832438100817658e-06, "loss": 1.1997, "step": 13945 }, { "epoch": 0.9319882415820417, "grad_norm": 0.1708984375, "learning_rate": 2.8049480510076944e-06, "loss": 1.2391, "step": 13950 }, { "epoch": 0.9323222875467664, "grad_norm": 0.2119140625, "learning_rate": 2.7775901563771967e-06, "loss": 1.267, "step": 13955 }, { "epoch": 0.9326563335114911, "grad_norm": 0.1728515625, "learning_rate": 2.750364454124432e-06, "loss": 1.1957, "step": 13960 }, { "epoch": 0.9329903794762159, "grad_norm": 0.171875, "learning_rate": 2.7232709812679114e-06, "loss": 1.1725, "step": 13965 }, { "epoch": 0.9333244254409406, "grad_norm": 0.1728515625, "learning_rate": 2.6963097746463882e-06, "loss": 1.085, "step": 13970 }, { "epoch": 0.9336584714056654, "grad_norm": 0.166015625, "learning_rate": 2.6694808709187613e-06, "loss": 1.1619, "step": 13975 }, { "epoch": 0.9339925173703901, "grad_norm": 0.189453125, "learning_rate": 2.6427843065640167e-06, "loss": 1.2746, "step": 13980 }, { "epoch": 0.9343265633351149, "grad_norm": 0.1796875, "learning_rate": 2.6162201178812294e-06, "loss": 1.2403, "step": 13985 }, { "epoch": 0.9346606092998396, "grad_norm": 0.1767578125, "learning_rate": 2.589788340989474e-06, "loss": 1.2107, "step": 13990 }, { "epoch": 0.9349946552645644, "grad_norm": 0.1708984375, "learning_rate": 2.5634890118277686e-06, "loss": 1.2215, "step": 13995 }, { "epoch": 0.9353287012292891, "grad_norm": 0.169921875, "learning_rate": 2.537322166155065e-06, "loss": 1.1664, "step": 14000 }, { "epoch": 0.9356627471940139, "grad_norm": 0.1689453125, "learning_rate": 2.5112878395501805e-06, "loss": 1.209, "step": 14005 }, { "epoch": 0.9359967931587386, "grad_norm": 0.203125, "learning_rate": 2.4853860674117325e-06, "loss": 1.1431, "step": 14010 }, { "epoch": 0.9363308391234634, "grad_norm": 0.16796875, "learning_rate": 2.4596168849581158e-06, "loss": 1.2497, "step": 14015 }, { "epoch": 0.9366648850881881, "grad_norm": 0.1748046875, "learning_rate": 2.4339803272274366e-06, "loss": 1.1754, "step": 14020 }, { "epoch": 0.9369989310529129, "grad_norm": 0.1796875, "learning_rate": 2.4084764290774775e-06, "loss": 1.1658, "step": 14025 }, { "epoch": 0.9373329770176376, "grad_norm": 0.16796875, "learning_rate": 2.3831052251856445e-06, "loss": 1.2201, "step": 14030 }, { "epoch": 0.9376670229823624, "grad_norm": 0.1875, "learning_rate": 2.3578667500489206e-06, "loss": 1.2038, "step": 14035 }, { "epoch": 0.9380010689470871, "grad_norm": 0.1630859375, "learning_rate": 2.3327610379838105e-06, "loss": 1.1809, "step": 14040 }, { "epoch": 0.9383351149118119, "grad_norm": 0.17578125, "learning_rate": 2.307788123126331e-06, "loss": 1.1757, "step": 14045 }, { "epoch": 0.9386691608765366, "grad_norm": 0.1796875, "learning_rate": 2.282948039431898e-06, "loss": 1.209, "step": 14050 }, { "epoch": 0.9390032068412614, "grad_norm": 0.18359375, "learning_rate": 2.258240820675317e-06, "loss": 1.1629, "step": 14055 }, { "epoch": 0.9393372528059861, "grad_norm": 0.1748046875, "learning_rate": 2.233666500450793e-06, "loss": 1.1975, "step": 14060 }, { "epoch": 0.9396712987707109, "grad_norm": 0.1669921875, "learning_rate": 2.2092251121717757e-06, "loss": 1.1843, "step": 14065 }, { "epoch": 0.9400053447354356, "grad_norm": 0.17578125, "learning_rate": 2.1849166890709927e-06, "loss": 1.1829, "step": 14070 }, { "epoch": 0.9403393907001604, "grad_norm": 0.1728515625, "learning_rate": 2.160741264200361e-06, "loss": 1.1872, "step": 14075 }, { "epoch": 0.9406734366648851, "grad_norm": 0.1728515625, "learning_rate": 2.1366988704309976e-06, "loss": 1.2531, "step": 14080 }, { "epoch": 0.9410074826296099, "grad_norm": 0.158203125, "learning_rate": 2.112789540453086e-06, "loss": 1.1454, "step": 14085 }, { "epoch": 0.9413415285943346, "grad_norm": 0.177734375, "learning_rate": 2.089013306775922e-06, "loss": 1.1905, "step": 14090 }, { "epoch": 0.9416755745590594, "grad_norm": 0.1728515625, "learning_rate": 2.065370201727823e-06, "loss": 1.1781, "step": 14095 }, { "epoch": 0.9420096205237841, "grad_norm": 0.17578125, "learning_rate": 2.0418602574561074e-06, "loss": 1.158, "step": 14100 }, { "epoch": 0.9423436664885089, "grad_norm": 0.1806640625, "learning_rate": 2.0184835059270047e-06, "loss": 1.2032, "step": 14105 }, { "epoch": 0.9426777124532336, "grad_norm": 0.1748046875, "learning_rate": 1.9952399789256558e-06, "loss": 1.135, "step": 14110 }, { "epoch": 0.9430117584179583, "grad_norm": 0.1728515625, "learning_rate": 1.9721297080560807e-06, "loss": 1.225, "step": 14115 }, { "epoch": 0.943345804382683, "grad_norm": 0.1884765625, "learning_rate": 1.9491527247410657e-06, "loss": 1.2238, "step": 14120 }, { "epoch": 0.9436798503474078, "grad_norm": 0.2451171875, "learning_rate": 1.92630906022222e-06, "loss": 1.0923, "step": 14125 }, { "epoch": 0.9440138963121325, "grad_norm": 0.173828125, "learning_rate": 1.9035987455598425e-06, "loss": 1.2037, "step": 14130 }, { "epoch": 0.9443479422768573, "grad_norm": 0.1708984375, "learning_rate": 1.8810218116329326e-06, "loss": 1.2147, "step": 14135 }, { "epoch": 0.944681988241582, "grad_norm": 0.162109375, "learning_rate": 1.8585782891391345e-06, "loss": 1.1872, "step": 14140 }, { "epoch": 0.9450160342063068, "grad_norm": 0.1826171875, "learning_rate": 1.8362682085946825e-06, "loss": 1.2518, "step": 14145 }, { "epoch": 0.9453500801710315, "grad_norm": 0.1787109375, "learning_rate": 1.8140916003343778e-06, "loss": 1.2239, "step": 14150 }, { "epoch": 0.9456841261357563, "grad_norm": 0.169921875, "learning_rate": 1.7920484945115557e-06, "loss": 1.1243, "step": 14155 }, { "epoch": 0.946018172100481, "grad_norm": 0.2060546875, "learning_rate": 1.770138921098008e-06, "loss": 1.1931, "step": 14160 }, { "epoch": 0.9463522180652058, "grad_norm": 0.1884765625, "learning_rate": 1.7483629098839605e-06, "loss": 1.2633, "step": 14165 }, { "epoch": 0.9466862640299305, "grad_norm": 0.1923828125, "learning_rate": 1.7267204904780621e-06, "loss": 1.2289, "step": 14170 }, { "epoch": 0.9470203099946553, "grad_norm": 0.166015625, "learning_rate": 1.7052116923072958e-06, "loss": 1.2129, "step": 14175 }, { "epoch": 0.94735435595938, "grad_norm": 0.1708984375, "learning_rate": 1.683836544616979e-06, "loss": 1.2649, "step": 14180 }, { "epoch": 0.9476884019241048, "grad_norm": 0.162109375, "learning_rate": 1.6625950764706743e-06, "loss": 1.1021, "step": 14185 }, { "epoch": 0.9480224478888295, "grad_norm": 0.1806640625, "learning_rate": 1.641487316750212e-06, "loss": 1.2487, "step": 14190 }, { "epoch": 0.9483564938535542, "grad_norm": 0.17578125, "learning_rate": 1.6205132941556122e-06, "loss": 1.2798, "step": 14195 }, { "epoch": 0.948690539818279, "grad_norm": 0.1630859375, "learning_rate": 1.5996730372050627e-06, "loss": 1.1935, "step": 14200 }, { "epoch": 0.9490245857830037, "grad_norm": 0.1640625, "learning_rate": 1.5789665742348415e-06, "loss": 1.1511, "step": 14205 }, { "epoch": 0.9493586317477285, "grad_norm": 0.177734375, "learning_rate": 1.558393933399338e-06, "loss": 1.2232, "step": 14210 }, { "epoch": 0.9496926777124532, "grad_norm": 0.1689453125, "learning_rate": 1.537955142670966e-06, "loss": 1.2133, "step": 14215 }, { "epoch": 0.950026723677178, "grad_norm": 0.173828125, "learning_rate": 1.517650229840173e-06, "loss": 1.1668, "step": 14220 }, { "epoch": 0.9503607696419027, "grad_norm": 0.169921875, "learning_rate": 1.4974792225153188e-06, "loss": 1.2862, "step": 14225 }, { "epoch": 0.9506948156066275, "grad_norm": 0.1708984375, "learning_rate": 1.4774421481227762e-06, "loss": 1.2089, "step": 14230 }, { "epoch": 0.9510288615713522, "grad_norm": 0.162109375, "learning_rate": 1.4575390339067296e-06, "loss": 1.1126, "step": 14235 }, { "epoch": 0.951362907536077, "grad_norm": 0.1796875, "learning_rate": 1.4377699069292427e-06, "loss": 1.1774, "step": 14240 }, { "epoch": 0.9516969535008017, "grad_norm": 0.166015625, "learning_rate": 1.418134794070236e-06, "loss": 1.1765, "step": 14245 }, { "epoch": 0.9520309994655265, "grad_norm": 0.173828125, "learning_rate": 1.3986337220273759e-06, "loss": 1.1659, "step": 14250 }, { "epoch": 0.9523650454302512, "grad_norm": 0.173828125, "learning_rate": 1.3792667173160855e-06, "loss": 1.2359, "step": 14255 }, { "epoch": 0.952699091394976, "grad_norm": 0.169921875, "learning_rate": 1.3600338062694784e-06, "loss": 1.1513, "step": 14260 }, { "epoch": 0.9530331373597007, "grad_norm": 0.162109375, "learning_rate": 1.3409350150383803e-06, "loss": 1.1959, "step": 14265 }, { "epoch": 0.9533671833244255, "grad_norm": 0.181640625, "learning_rate": 1.32197036959123e-06, "loss": 1.1792, "step": 14270 }, { "epoch": 0.9537012292891501, "grad_norm": 0.171875, "learning_rate": 1.3031398957140562e-06, "loss": 1.2564, "step": 14275 }, { "epoch": 0.9540352752538749, "grad_norm": 0.1767578125, "learning_rate": 1.284443619010478e-06, "loss": 1.2568, "step": 14280 }, { "epoch": 0.9543693212185996, "grad_norm": 0.1826171875, "learning_rate": 1.265881564901672e-06, "loss": 1.2136, "step": 14285 }, { "epoch": 0.9547033671833244, "grad_norm": 0.201171875, "learning_rate": 1.2474537586262603e-06, "loss": 1.2579, "step": 14290 }, { "epoch": 0.9550374131480491, "grad_norm": 0.1708984375, "learning_rate": 1.2291602252403444e-06, "loss": 1.2214, "step": 14295 }, { "epoch": 0.9553714591127739, "grad_norm": 0.189453125, "learning_rate": 1.2110009896174944e-06, "loss": 1.1595, "step": 14300 }, { "epoch": 0.9557055050774986, "grad_norm": 0.169921875, "learning_rate": 1.1929760764486264e-06, "loss": 1.1858, "step": 14305 }, { "epoch": 0.9560395510422234, "grad_norm": 0.162109375, "learning_rate": 1.1750855102420578e-06, "loss": 1.1783, "step": 14310 }, { "epoch": 0.9563735970069481, "grad_norm": 0.1689453125, "learning_rate": 1.1573293153233966e-06, "loss": 1.257, "step": 14315 }, { "epoch": 0.9567076429716729, "grad_norm": 0.16796875, "learning_rate": 1.1397075158355975e-06, "loss": 1.197, "step": 14320 }, { "epoch": 0.9570416889363976, "grad_norm": 0.1708984375, "learning_rate": 1.1222201357388496e-06, "loss": 1.2113, "step": 14325 }, { "epoch": 0.9573757349011224, "grad_norm": 0.16796875, "learning_rate": 1.1048671988105553e-06, "loss": 1.1578, "step": 14330 }, { "epoch": 0.9577097808658471, "grad_norm": 0.181640625, "learning_rate": 1.0876487286453408e-06, "loss": 1.1104, "step": 14335 }, { "epoch": 0.9580438268305719, "grad_norm": 0.1728515625, "learning_rate": 1.0705647486550118e-06, "loss": 1.23, "step": 14340 }, { "epoch": 0.9583778727952966, "grad_norm": 0.181640625, "learning_rate": 1.0536152820684874e-06, "loss": 1.2529, "step": 14345 }, { "epoch": 0.9587119187600214, "grad_norm": 0.16796875, "learning_rate": 1.036800351931777e-06, "loss": 1.2079, "step": 14350 }, { "epoch": 0.9590459647247461, "grad_norm": 0.173828125, "learning_rate": 1.0201199811080032e-06, "loss": 1.2061, "step": 14355 }, { "epoch": 0.9593800106894709, "grad_norm": 0.1630859375, "learning_rate": 1.0035741922772902e-06, "loss": 1.1448, "step": 14360 }, { "epoch": 0.9597140566541956, "grad_norm": 0.173828125, "learning_rate": 9.871630079367977e-07, "loss": 1.2618, "step": 14365 }, { "epoch": 0.9600481026189204, "grad_norm": 0.173828125, "learning_rate": 9.708864504006433e-07, "loss": 1.1832, "step": 14370 }, { "epoch": 0.9603821485836451, "grad_norm": 0.1630859375, "learning_rate": 9.547445417999123e-07, "loss": 1.1876, "step": 14375 }, { "epoch": 0.9607161945483699, "grad_norm": 0.16796875, "learning_rate": 9.387373040826153e-07, "loss": 1.1768, "step": 14380 }, { "epoch": 0.9610502405130946, "grad_norm": 0.1650390625, "learning_rate": 9.228647590136308e-07, "loss": 1.1675, "step": 14385 }, { "epoch": 0.9613842864778194, "grad_norm": 0.1689453125, "learning_rate": 9.071269281746842e-07, "loss": 1.1261, "step": 14390 }, { "epoch": 0.9617183324425441, "grad_norm": 0.357421875, "learning_rate": 8.915238329643805e-07, "loss": 1.198, "step": 14395 }, { "epoch": 0.9620523784072689, "grad_norm": 0.1728515625, "learning_rate": 8.760554945981048e-07, "loss": 1.2774, "step": 14400 }, { "epoch": 0.9623864243719936, "grad_norm": 0.1748046875, "learning_rate": 8.607219341079887e-07, "loss": 1.2643, "step": 14405 }, { "epoch": 0.9627204703367184, "grad_norm": 0.1708984375, "learning_rate": 8.455231723429324e-07, "loss": 1.1497, "step": 14410 }, { "epoch": 0.9630545163014431, "grad_norm": 0.1708984375, "learning_rate": 8.304592299685387e-07, "loss": 1.2001, "step": 14415 }, { "epoch": 0.9633885622661679, "grad_norm": 0.1611328125, "learning_rate": 8.155301274671234e-07, "loss": 1.2162, "step": 14420 }, { "epoch": 0.9637226082308926, "grad_norm": 0.1650390625, "learning_rate": 8.007358851376378e-07, "loss": 1.2046, "step": 14425 }, { "epoch": 0.9640566541956174, "grad_norm": 0.162109375, "learning_rate": 7.860765230956579e-07, "loss": 1.1698, "step": 14430 }, { "epoch": 0.9643907001603421, "grad_norm": 0.1767578125, "learning_rate": 7.71552061273395e-07, "loss": 1.2303, "step": 14435 }, { "epoch": 0.9647247461250668, "grad_norm": 0.1943359375, "learning_rate": 7.571625194196074e-07, "loss": 1.1499, "step": 14440 }, { "epoch": 0.9650587920897915, "grad_norm": 0.17578125, "learning_rate": 7.429079170996222e-07, "loss": 1.1865, "step": 14445 }, { "epoch": 0.9653928380545163, "grad_norm": 0.166015625, "learning_rate": 7.287882736952912e-07, "loss": 1.2239, "step": 14450 }, { "epoch": 0.965726884019241, "grad_norm": 0.1708984375, "learning_rate": 7.14803608404957e-07, "loss": 1.134, "step": 14455 }, { "epoch": 0.9660609299839658, "grad_norm": 0.189453125, "learning_rate": 7.009539402434207e-07, "loss": 1.2218, "step": 14460 }, { "epoch": 0.9663949759486905, "grad_norm": 0.171875, "learning_rate": 6.87239288041952e-07, "loss": 1.2575, "step": 14465 }, { "epoch": 0.9667290219134153, "grad_norm": 0.1748046875, "learning_rate": 6.736596704482456e-07, "loss": 1.1718, "step": 14470 }, { "epoch": 0.96706306787814, "grad_norm": 0.1708984375, "learning_rate": 6.60215105926365e-07, "loss": 1.1994, "step": 14475 }, { "epoch": 0.9673971138428648, "grad_norm": 0.16796875, "learning_rate": 6.46905612756743e-07, "loss": 1.1799, "step": 14480 }, { "epoch": 0.9677311598075895, "grad_norm": 0.1650390625, "learning_rate": 6.337312090361924e-07, "loss": 1.2205, "step": 14485 }, { "epoch": 0.9680652057723143, "grad_norm": 0.1689453125, "learning_rate": 6.206919126777955e-07, "loss": 1.2612, "step": 14490 }, { "epoch": 0.968399251737039, "grad_norm": 0.1630859375, "learning_rate": 6.077877414109923e-07, "loss": 1.1688, "step": 14495 }, { "epoch": 0.9687332977017638, "grad_norm": 0.1728515625, "learning_rate": 5.950187127814477e-07, "loss": 1.2077, "step": 14500 }, { "epoch": 0.9690673436664885, "grad_norm": 0.1748046875, "learning_rate": 5.823848441510737e-07, "loss": 1.1738, "step": 14505 }, { "epoch": 0.9694013896312133, "grad_norm": 0.173828125, "learning_rate": 5.698861526980404e-07, "loss": 1.1966, "step": 14510 }, { "epoch": 0.969735435595938, "grad_norm": 0.1708984375, "learning_rate": 5.575226554166757e-07, "loss": 1.145, "step": 14515 }, { "epoch": 0.9700694815606627, "grad_norm": 0.171875, "learning_rate": 5.452943691175216e-07, "loss": 1.1979, "step": 14520 }, { "epoch": 0.9704035275253875, "grad_norm": 0.1689453125, "learning_rate": 5.332013104272782e-07, "loss": 1.1926, "step": 14525 }, { "epoch": 0.9707375734901122, "grad_norm": 0.1796875, "learning_rate": 5.212434957887369e-07, "loss": 1.2438, "step": 14530 }, { "epoch": 0.971071619454837, "grad_norm": 0.1728515625, "learning_rate": 5.094209414608253e-07, "loss": 1.2502, "step": 14535 }, { "epoch": 0.9714056654195617, "grad_norm": 0.1767578125, "learning_rate": 4.977336635185736e-07, "loss": 1.1783, "step": 14540 }, { "epoch": 0.9717397113842865, "grad_norm": 0.171875, "learning_rate": 4.861816778530371e-07, "loss": 1.2503, "step": 14545 }, { "epoch": 0.9720737573490112, "grad_norm": 0.1787109375, "learning_rate": 4.7476500017136215e-07, "loss": 1.2635, "step": 14550 }, { "epoch": 0.972407803313736, "grad_norm": 0.166015625, "learning_rate": 4.6348364599668737e-07, "loss": 1.1629, "step": 14555 }, { "epoch": 0.9727418492784607, "grad_norm": 0.1669921875, "learning_rate": 4.523376306681537e-07, "loss": 1.1369, "step": 14560 }, { "epoch": 0.9730758952431855, "grad_norm": 0.1748046875, "learning_rate": 4.413269693409161e-07, "loss": 1.2048, "step": 14565 }, { "epoch": 0.9734099412079102, "grad_norm": 0.16796875, "learning_rate": 4.3045167698603226e-07, "loss": 1.2306, "step": 14570 }, { "epoch": 0.973743987172635, "grad_norm": 0.1728515625, "learning_rate": 4.197117683905627e-07, "loss": 1.1437, "step": 14575 }, { "epoch": 0.9740780331373597, "grad_norm": 0.166015625, "learning_rate": 4.0910725815742626e-07, "loss": 1.0792, "step": 14580 }, { "epoch": 0.9744120791020845, "grad_norm": 0.1767578125, "learning_rate": 3.986381607055112e-07, "loss": 1.2339, "step": 14585 }, { "epoch": 0.9747461250668092, "grad_norm": 0.171875, "learning_rate": 3.883044902695199e-07, "loss": 1.1773, "step": 14590 }, { "epoch": 0.975080171031534, "grad_norm": 0.1767578125, "learning_rate": 3.781062609000463e-07, "loss": 1.1915, "step": 14595 }, { "epoch": 0.9754142169962586, "grad_norm": 0.1767578125, "learning_rate": 3.680434864635429e-07, "loss": 1.1747, "step": 14600 }, { "epoch": 0.9757482629609834, "grad_norm": 0.1689453125, "learning_rate": 3.5811618064226505e-07, "loss": 1.1539, "step": 14605 }, { "epoch": 0.9760823089257081, "grad_norm": 0.166015625, "learning_rate": 3.483243569342487e-07, "loss": 1.2025, "step": 14610 }, { "epoch": 0.9764163548904329, "grad_norm": 0.1748046875, "learning_rate": 3.3866802865337723e-07, "loss": 1.1391, "step": 14615 }, { "epoch": 0.9767504008551576, "grad_norm": 0.1806640625, "learning_rate": 3.29147208929248e-07, "loss": 1.1749, "step": 14620 }, { "epoch": 0.9770844468198824, "grad_norm": 0.1796875, "learning_rate": 3.1976191070722803e-07, "loss": 1.1628, "step": 14625 }, { "epoch": 0.9774184927846071, "grad_norm": 0.166015625, "learning_rate": 3.1051214674843174e-07, "loss": 1.1674, "step": 14630 }, { "epoch": 0.9777525387493319, "grad_norm": 0.1669921875, "learning_rate": 3.013979296296543e-07, "loss": 1.2079, "step": 14635 }, { "epoch": 0.9780865847140566, "grad_norm": 0.177734375, "learning_rate": 2.92419271743416e-07, "loss": 1.2202, "step": 14640 }, { "epoch": 0.9784206306787814, "grad_norm": 0.1748046875, "learning_rate": 2.8357618529792905e-07, "loss": 1.2067, "step": 14645 }, { "epoch": 0.9787546766435061, "grad_norm": 0.1669921875, "learning_rate": 2.74868682317031e-07, "loss": 1.191, "step": 14650 }, { "epoch": 0.9790887226082309, "grad_norm": 0.1708984375, "learning_rate": 2.6629677464024006e-07, "loss": 1.1576, "step": 14655 }, { "epoch": 0.9794227685729556, "grad_norm": 0.1669921875, "learning_rate": 2.5786047392268864e-07, "loss": 1.1496, "step": 14660 }, { "epoch": 0.9797568145376804, "grad_norm": 0.169921875, "learning_rate": 2.4955979163514554e-07, "loss": 1.2359, "step": 14665 }, { "epoch": 0.9800908605024051, "grad_norm": 0.1787109375, "learning_rate": 2.413947390639715e-07, "loss": 1.2322, "step": 14670 }, { "epoch": 0.9804249064671299, "grad_norm": 0.171875, "learning_rate": 2.3336532731108584e-07, "loss": 1.2106, "step": 14675 }, { "epoch": 0.9807589524318546, "grad_norm": 0.18359375, "learning_rate": 2.2547156729403329e-07, "loss": 1.1849, "step": 14680 }, { "epoch": 0.9810929983965794, "grad_norm": 0.18359375, "learning_rate": 2.177134697458616e-07, "loss": 1.225, "step": 14685 }, { "epoch": 0.9814270443613041, "grad_norm": 0.1748046875, "learning_rate": 2.100910452151883e-07, "loss": 1.1283, "step": 14690 }, { "epoch": 0.9817610903260289, "grad_norm": 0.1767578125, "learning_rate": 2.0260430406615626e-07, "loss": 1.1439, "step": 14695 }, { "epoch": 0.9820951362907536, "grad_norm": 0.1689453125, "learning_rate": 1.9525325647841152e-07, "loss": 1.1861, "step": 14700 }, { "epoch": 0.9824291822554784, "grad_norm": 0.1748046875, "learning_rate": 1.8803791244710322e-07, "loss": 1.2404, "step": 14705 }, { "epoch": 0.9827632282202031, "grad_norm": 0.1669921875, "learning_rate": 1.8095828178286145e-07, "loss": 1.2448, "step": 14710 }, { "epoch": 0.9830972741849279, "grad_norm": 0.1611328125, "learning_rate": 1.7401437411179722e-07, "loss": 1.2503, "step": 14715 }, { "epoch": 0.9834313201496526, "grad_norm": 0.173828125, "learning_rate": 1.6720619887548028e-07, "loss": 1.2129, "step": 14720 }, { "epoch": 0.9837653661143774, "grad_norm": 0.169921875, "learning_rate": 1.605337653309058e-07, "loss": 1.1759, "step": 14725 }, { "epoch": 0.9840994120791021, "grad_norm": 0.1728515625, "learning_rate": 1.539970825505277e-07, "loss": 1.1244, "step": 14730 }, { "epoch": 0.9844334580438269, "grad_norm": 0.1669921875, "learning_rate": 1.4759615942220306e-07, "loss": 1.1535, "step": 14735 }, { "epoch": 0.9847675040085516, "grad_norm": 0.1845703125, "learning_rate": 1.4133100464922555e-07, "loss": 1.1768, "step": 14740 }, { "epoch": 0.9851015499732764, "grad_norm": 0.1826171875, "learning_rate": 1.352016267502365e-07, "loss": 1.2574, "step": 14745 }, { "epoch": 0.9854355959380011, "grad_norm": 0.1962890625, "learning_rate": 1.292080340593249e-07, "loss": 1.1745, "step": 14750 }, { "epoch": 0.9857696419027259, "grad_norm": 0.166015625, "learning_rate": 1.2335023472589414e-07, "loss": 1.2315, "step": 14755 }, { "epoch": 0.9861036878674505, "grad_norm": 0.1787109375, "learning_rate": 1.1762823671475076e-07, "loss": 1.2231, "step": 14760 }, { "epoch": 0.9864377338321753, "grad_norm": 0.162109375, "learning_rate": 1.12042047806038e-07, "loss": 1.2253, "step": 14765 }, { "epoch": 0.9867717797969, "grad_norm": 0.1767578125, "learning_rate": 1.0659167559523564e-07, "loss": 1.1431, "step": 14770 }, { "epoch": 0.9871058257616248, "grad_norm": 0.1767578125, "learning_rate": 1.012771274931823e-07, "loss": 1.1837, "step": 14775 }, { "epoch": 0.9874398717263495, "grad_norm": 0.19921875, "learning_rate": 9.609841072599768e-08, "loss": 1.2645, "step": 14780 }, { "epoch": 0.9877739176910743, "grad_norm": 0.1669921875, "learning_rate": 9.105553233513809e-08, "loss": 1.2164, "step": 14785 }, { "epoch": 0.988107963655799, "grad_norm": 0.169921875, "learning_rate": 8.614849917737423e-08, "loss": 1.2218, "step": 14790 }, { "epoch": 0.9884420096205238, "grad_norm": 0.16796875, "learning_rate": 8.137731792472458e-08, "loss": 1.1711, "step": 14795 }, { "epoch": 0.9887760555852485, "grad_norm": 0.1650390625, "learning_rate": 7.674199506453317e-08, "loss": 1.2162, "step": 14800 }, { "epoch": 0.9891101015499733, "grad_norm": 0.1669921875, "learning_rate": 7.224253689940286e-08, "loss": 1.2323, "step": 14805 }, { "epoch": 0.989444147514698, "grad_norm": 0.1708984375, "learning_rate": 6.787894954720653e-08, "loss": 1.1196, "step": 14810 }, { "epoch": 0.9897781934794228, "grad_norm": 0.158203125, "learning_rate": 6.365123894107594e-08, "loss": 1.2014, "step": 14815 }, { "epoch": 0.9901122394441475, "grad_norm": 0.1748046875, "learning_rate": 5.955941082936845e-08, "loss": 1.1848, "step": 14820 }, { "epoch": 0.9904462854088723, "grad_norm": 0.1689453125, "learning_rate": 5.560347077572248e-08, "loss": 1.1779, "step": 14825 }, { "epoch": 0.990780331373597, "grad_norm": 0.162109375, "learning_rate": 5.1783424158990954e-08, "loss": 1.208, "step": 14830 }, { "epoch": 0.9911143773383218, "grad_norm": 0.17578125, "learning_rate": 4.809927617324128e-08, "loss": 1.2122, "step": 14835 }, { "epoch": 0.9914484233030465, "grad_norm": 0.17578125, "learning_rate": 4.4551031827788636e-08, "loss": 1.1781, "step": 14840 }, { "epoch": 0.9917824692677712, "grad_norm": 0.16796875, "learning_rate": 4.113869594712938e-08, "loss": 1.1899, "step": 14845 }, { "epoch": 0.992116515232496, "grad_norm": 0.1689453125, "learning_rate": 3.7862273171007657e-08, "loss": 1.1715, "step": 14850 }, { "epoch": 0.9924505611972207, "grad_norm": 0.1650390625, "learning_rate": 3.472176795432658e-08, "loss": 1.1051, "step": 14855 }, { "epoch": 0.9927846071619455, "grad_norm": 0.1728515625, "learning_rate": 3.171718456720374e-08, "loss": 1.1639, "step": 14860 }, { "epoch": 0.9931186531266702, "grad_norm": 0.1650390625, "learning_rate": 2.8848527094949006e-08, "loss": 1.1834, "step": 14865 }, { "epoch": 0.993452699091395, "grad_norm": 0.1728515625, "learning_rate": 2.611579943804232e-08, "loss": 1.274, "step": 14870 }, { "epoch": 0.9937867450561197, "grad_norm": 0.1591796875, "learning_rate": 2.3519005312144792e-08, "loss": 1.1908, "step": 14875 }, { "epoch": 0.9941207910208445, "grad_norm": 0.193359375, "learning_rate": 2.105814824810981e-08, "loss": 1.2198, "step": 14880 }, { "epoch": 0.9944548369855692, "grad_norm": 0.2001953125, "learning_rate": 1.873323159191642e-08, "loss": 1.1147, "step": 14885 }, { "epoch": 0.994788882950294, "grad_norm": 0.169921875, "learning_rate": 1.6544258504747058e-08, "loss": 1.1683, "step": 14890 }, { "epoch": 0.9951229289150187, "grad_norm": 0.1708984375, "learning_rate": 1.4491231962920904e-08, "loss": 1.2365, "step": 14895 }, { "epoch": 0.9954569748797435, "grad_norm": 0.1796875, "learning_rate": 1.2574154757916123e-08, "loss": 1.1823, "step": 14900 }, { "epoch": 0.9957910208444682, "grad_norm": 0.1689453125, "learning_rate": 1.0793029496369844e-08, "loss": 1.1889, "step": 14905 }, { "epoch": 0.996125066809193, "grad_norm": 0.1728515625, "learning_rate": 9.147858600067061e-09, "loss": 1.2029, "step": 14910 }, { "epoch": 0.9964591127739177, "grad_norm": 0.166015625, "learning_rate": 7.63864430591843e-09, "loss": 1.199, "step": 14915 }, { "epoch": 0.9967931587386424, "grad_norm": 0.17578125, "learning_rate": 6.265388666004679e-09, "loss": 1.1928, "step": 14920 }, { "epoch": 0.9971272047033671, "grad_norm": 0.1787109375, "learning_rate": 5.028093547498891e-09, "loss": 1.2173, "step": 14925 }, { "epoch": 0.9974612506680919, "grad_norm": 0.166015625, "learning_rate": 3.926760632777526e-09, "loss": 1.1781, "step": 14930 }, { "epoch": 0.9977952966328166, "grad_norm": 0.17578125, "learning_rate": 2.9613914192760937e-09, "loss": 1.1657, "step": 14935 }, { "epoch": 0.9981293425975414, "grad_norm": 0.1953125, "learning_rate": 2.131987219622378e-09, "loss": 1.1723, "step": 14940 }, { "epoch": 0.9984633885622661, "grad_norm": 0.1611328125, "learning_rate": 1.4385491615365177e-09, "loss": 1.1883, "step": 14945 }, { "epoch": 0.9987974345269909, "grad_norm": 0.1708984375, "learning_rate": 8.810781878865193e-10, "loss": 1.23, "step": 14950 }, { "epoch": 0.9991314804917156, "grad_norm": 0.16796875, "learning_rate": 4.595750566660506e-10, "loss": 1.1041, "step": 14955 }, { "epoch": 0.9994655264564404, "grad_norm": 0.2021484375, "learning_rate": 1.7404034097223688e-10, "loss": 1.2243, "step": 14960 }, { "epoch": 0.9997995724211651, "grad_norm": 0.1748046875, "learning_rate": 2.4474429050069803e-11, "loss": 1.2883, "step": 14965 }, { "epoch": 1.0, "step": 14968, "total_flos": 1.1104476526052114e+19, "train_loss": 0.0, "train_runtime": 6.7946, "train_samples_per_second": 17623.245, "train_steps_per_second": 2202.924 } ], "logging_steps": 5, "max_steps": 14968, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1104476526052114e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }