ArabianGPT-1.5B / trainer_state.json
riotu-lab's picture
upload files
faaa9a4 verified
raw
history blame contribute delete
No virus
127 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04694358968884695,
"eval_steps": 500,
"global_step": 5110000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0004999999998149024,
"loss": 8.5332,
"step": 5000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999990629436,
"loss": 8.1061,
"step": 10000
},
{
"epoch": 0.0,
"learning_rate": 0.000499999997732555,
"loss": 8.1201,
"step": 15000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999958237364,
"loss": 8.1658,
"step": 20000
},
{
"epoch": 0.0,
"learning_rate": 0.000499999993336488,
"loss": 8.2813,
"step": 25000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999902708098,
"loss": 8.3271,
"step": 30000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999866267018,
"loss": 8.3799,
"step": 35000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999824041639,
"loss": 8.4484,
"step": 40000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999776031961,
"loss": 8.4779,
"step": 45000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999722237985,
"loss": 8.4402,
"step": 50000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999662659712,
"loss": 8.4371,
"step": 55000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999597297141,
"loss": 8.4231,
"step": 60000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999526150273,
"loss": 8.3955,
"step": 65000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999449219107,
"loss": 8.4,
"step": 70000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999366503643,
"loss": 8.414,
"step": 75000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999278003882,
"loss": 8.4149,
"step": 80000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999999183719824,
"loss": 8.4205,
"step": 85000
},
{
"epoch": 0.0,
"learning_rate": 0.000499999908365147,
"loss": 8.4157,
"step": 90000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998977798819,
"loss": 8.4017,
"step": 95000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998866161871,
"loss": 8.4086,
"step": 100000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998748740628,
"loss": 8.3796,
"step": 105000
},
{
"epoch": 0.0,
"learning_rate": 0.000499999862553509,
"loss": 8.4095,
"step": 110000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998496545254,
"loss": 8.4944,
"step": 115000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998361771126,
"loss": 8.4752,
"step": 120000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998221212701,
"loss": 8.4344,
"step": 125000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999998074869983,
"loss": 8.4137,
"step": 130000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997922742969,
"loss": 8.468,
"step": 135000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997764831663,
"loss": 8.5349,
"step": 140000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997601136063,
"loss": 8.5042,
"step": 145000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997431656169,
"loss": 8.5147,
"step": 150000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997256391984,
"loss": 8.4722,
"step": 155000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999997075343505,
"loss": 8.4144,
"step": 160000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999996888510735,
"loss": 8.3935,
"step": 165000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999996695893673,
"loss": 8.4244,
"step": 170000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999996497492322,
"loss": 8.4991,
"step": 175000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999996293306679,
"loss": 8.5739,
"step": 180000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999996083336746,
"loss": 8.5794,
"step": 185000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999995867582523,
"loss": 8.5878,
"step": 190000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999995646044011,
"loss": 8.6133,
"step": 195000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999995418721212,
"loss": 8.5593,
"step": 200000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999995185614123,
"loss": 8.5519,
"step": 205000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999994946722748,
"loss": 8.5628,
"step": 210000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999994702047085,
"loss": 8.5084,
"step": 215000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999994451587136,
"loss": 8.5437,
"step": 220000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999994195342902,
"loss": 8.5508,
"step": 225000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999993933314382,
"loss": 8.5281,
"step": 230000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999993665501577,
"loss": 8.5282,
"step": 235000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999993391904488,
"loss": 8.5707,
"step": 240000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999993112523117,
"loss": 8.5596,
"step": 245000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999992827357463,
"loss": 8.554,
"step": 250000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999992536407527,
"loss": 8.5556,
"step": 255000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999992239673309,
"loss": 8.5767,
"step": 260000
},
{
"epoch": 0.0,
"learning_rate": 0.000499999193715481,
"loss": 8.579,
"step": 265000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999991628852031,
"loss": 8.5437,
"step": 270000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999991314764974,
"loss": 8.5226,
"step": 275000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999990994893638,
"loss": 8.5095,
"step": 280000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999990669238024,
"loss": 8.521,
"step": 285000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999990337798134,
"loss": 8.5687,
"step": 290000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999990000573966,
"loss": 8.567,
"step": 295000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999989657565524,
"loss": 8.5399,
"step": 300000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999989308772806,
"loss": 8.5336,
"step": 305000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999988954195816,
"loss": 8.5427,
"step": 310000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999988593834551,
"loss": 8.5369,
"step": 315000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999988227689015,
"loss": 8.5183,
"step": 320000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999987855759207,
"loss": 8.5165,
"step": 325000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999987478045128,
"loss": 8.5009,
"step": 330000
},
{
"epoch": 0.0,
"learning_rate": 0.000499998709454678,
"loss": 8.4832,
"step": 335000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999986705264164,
"loss": 8.4453,
"step": 340000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999986310197279,
"loss": 8.4457,
"step": 345000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999985909346127,
"loss": 8.4958,
"step": 350000
},
{
"epoch": 0.0,
"learning_rate": 0.000499998550271071,
"loss": 8.5252,
"step": 355000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999985090291027,
"loss": 8.526,
"step": 360000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999984672087081,
"loss": 8.4962,
"step": 365000
},
{
"epoch": 0.0,
"learning_rate": 0.000499998424809887,
"loss": 8.471,
"step": 370000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999983818326398,
"loss": 8.4943,
"step": 375000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999983382769665,
"loss": 8.5109,
"step": 380000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999982941428673,
"loss": 8.5156,
"step": 385000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999982494303419,
"loss": 8.4645,
"step": 390000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999982041393909,
"loss": 8.4879,
"step": 395000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999981582700142,
"loss": 8.5108,
"step": 400000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999981118222117,
"loss": 8.545,
"step": 405000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999980647959839,
"loss": 8.5518,
"step": 410000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999980171913307,
"loss": 8.5067,
"step": 415000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999979690082521,
"loss": 8.537,
"step": 420000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999979202467483,
"loss": 8.4697,
"step": 425000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999978709068197,
"loss": 8.4782,
"step": 430000
},
{
"epoch": 0.0,
"learning_rate": 0.000499997820988466,
"loss": 8.4981,
"step": 435000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999977704916876,
"loss": 8.5571,
"step": 440000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999977194164844,
"loss": 8.514,
"step": 445000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999976677628566,
"loss": 8.4959,
"step": 450000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999976155308043,
"loss": 8.58,
"step": 455000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999975627203278,
"loss": 8.6084,
"step": 460000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999975093314269,
"loss": 8.5743,
"step": 465000
},
{
"epoch": 0.0,
"learning_rate": 0.000499997455364102,
"loss": 8.5643,
"step": 470000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999974008183532,
"loss": 8.4961,
"step": 475000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999973456941804,
"loss": 8.4412,
"step": 480000
},
{
"epoch": 0.0,
"learning_rate": 0.000499997289991584,
"loss": 8.4485,
"step": 485000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999972337105639,
"loss": 8.4707,
"step": 490000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999971768511204,
"loss": 8.4722,
"step": 495000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999971194132537,
"loss": 8.486,
"step": 500000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999970613969636,
"loss": 8.438,
"step": 505000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999970028022505,
"loss": 8.4606,
"step": 510000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999969436291146,
"loss": 8.518,
"step": 515000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999968838775557,
"loss": 8.5148,
"step": 520000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999968235475743,
"loss": 8.5136,
"step": 525000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999967626391703,
"loss": 8.4632,
"step": 530000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999967011523439,
"loss": 8.4725,
"step": 535000
},
{
"epoch": 0.0,
"learning_rate": 0.0004999966390870954,
"loss": 8.4696,
"step": 540000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999965764434247,
"loss": 8.4397,
"step": 545000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999965132213321,
"loss": 8.4486,
"step": 550000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999964494208178,
"loss": 8.4202,
"step": 555000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999963850418817,
"loss": 8.4795,
"step": 560000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999963200845243,
"loss": 8.5227,
"step": 565000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999962545487453,
"loss": 8.5248,
"step": 570000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999961884345453,
"loss": 8.5308,
"step": 575000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999961217419241,
"loss": 8.5287,
"step": 580000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999960544708822,
"loss": 8.5622,
"step": 585000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999959866214195,
"loss": 8.5303,
"step": 590000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999959181935361,
"loss": 8.4332,
"step": 595000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999958491872324,
"loss": 8.4176,
"step": 600000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999957796025085,
"loss": 8.3863,
"step": 605000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999957094393644,
"loss": 8.3405,
"step": 610000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999956386978003,
"loss": 8.3725,
"step": 615000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999955673778165,
"loss": 8.4165,
"step": 620000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999954954794132,
"loss": 8.3808,
"step": 625000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999954230025904,
"loss": 8.3515,
"step": 630000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999953499473482,
"loss": 8.3555,
"step": 635000
},
{
"epoch": 0.01,
"learning_rate": 0.000499995276313687,
"loss": 8.4179,
"step": 640000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999952021016069,
"loss": 8.4277,
"step": 645000
},
{
"epoch": 0.01,
"learning_rate": 0.000499995127311108,
"loss": 8.4402,
"step": 650000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999950519421905,
"loss": 8.4472,
"step": 655000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999949759948546,
"loss": 8.4497,
"step": 660000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999948994691005,
"loss": 8.4436,
"step": 665000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999948223649283,
"loss": 8.4619,
"step": 670000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999947446823382,
"loss": 8.4528,
"step": 675000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999946664213305,
"loss": 8.4476,
"step": 680000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999945875819051,
"loss": 8.5146,
"step": 685000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999945081640625,
"loss": 8.5345,
"step": 690000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999944281678027,
"loss": 8.5706,
"step": 695000
},
{
"epoch": 0.01,
"learning_rate": 0.000499994347593126,
"loss": 8.5314,
"step": 700000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999942664400324,
"loss": 8.5054,
"step": 705000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999941847085223,
"loss": 8.5103,
"step": 710000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999941023985958,
"loss": 8.5057,
"step": 715000
},
{
"epoch": 0.01,
"learning_rate": 0.000499994019510253,
"loss": 8.4851,
"step": 720000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999939360434942,
"loss": 8.4865,
"step": 725000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999938519983196,
"loss": 8.4787,
"step": 730000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999937673747293,
"loss": 8.4496,
"step": 735000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999936821727237,
"loss": 8.4326,
"step": 740000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999935963923027,
"loss": 8.387,
"step": 745000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999935100334667,
"loss": 8.4004,
"step": 750000
},
{
"epoch": 0.01,
"learning_rate": 0.000499993423096216,
"loss": 8.4077,
"step": 755000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999933355805504,
"loss": 8.3867,
"step": 760000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999932474864706,
"loss": 8.4258,
"step": 765000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999931588139764,
"loss": 8.4821,
"step": 770000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999930695630682,
"loss": 8.4397,
"step": 775000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999929797337462,
"loss": 8.5221,
"step": 780000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999928893260105,
"loss": 8.4864,
"step": 785000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999927983398616,
"loss": 8.4338,
"step": 790000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999927067752993,
"loss": 8.4292,
"step": 795000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999926146323241,
"loss": 8.3973,
"step": 800000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999925219109361,
"loss": 8.4031,
"step": 805000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999924286111355,
"loss": 8.3967,
"step": 810000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999923347329226,
"loss": 8.4021,
"step": 815000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999922402762977,
"loss": 8.414,
"step": 820000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999921452412606,
"loss": 8.4847,
"step": 825000
},
{
"epoch": 0.01,
"learning_rate": 0.000499992049627812,
"loss": 8.4377,
"step": 830000
},
{
"epoch": 0.01,
"learning_rate": 0.000499991953435952,
"loss": 8.4167,
"step": 835000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999918566656806,
"loss": 8.445,
"step": 840000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999917593169984,
"loss": 8.4625,
"step": 845000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999916613899052,
"loss": 8.459,
"step": 850000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999915628844015,
"loss": 8.4831,
"step": 855000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999914638004875,
"loss": 8.478,
"step": 860000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999913641381633,
"loss": 8.4598,
"step": 865000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999912638974292,
"loss": 8.4568,
"step": 870000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999911630782856,
"loss": 8.4423,
"step": 875000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999910616807323,
"loss": 8.4638,
"step": 880000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999909597047702,
"loss": 8.4405,
"step": 885000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999908571503989,
"loss": 8.4464,
"step": 890000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999907540176189,
"loss": 8.4808,
"step": 895000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999906503064305,
"loss": 8.437,
"step": 900000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999905460168339,
"loss": 8.4214,
"step": 905000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999904411488293,
"loss": 8.4418,
"step": 910000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999903357024169,
"loss": 8.5303,
"step": 915000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999902296775971,
"loss": 8.4865,
"step": 920000
},
{
"epoch": 0.01,
"learning_rate": 0.00049999012307437,
"loss": 8.4779,
"step": 925000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999900158927358,
"loss": 8.4884,
"step": 930000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999899081326949,
"loss": 8.4641,
"step": 935000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999897997942475,
"loss": 8.485,
"step": 940000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999896908773939,
"loss": 8.4615,
"step": 945000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999895813821341,
"loss": 8.4412,
"step": 950000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999894713084688,
"loss": 8.4274,
"step": 955000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999893606563978,
"loss": 8.4331,
"step": 960000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999892494259216,
"loss": 8.4737,
"step": 965000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999891376170404,
"loss": 8.4471,
"step": 970000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999890252297545,
"loss": 8.4929,
"step": 975000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999889122640642,
"loss": 8.4602,
"step": 980000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999887987199697,
"loss": 8.4671,
"step": 985000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999886845974712,
"loss": 8.4589,
"step": 990000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999885698965689,
"loss": 8.47,
"step": 995000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999884546172634,
"loss": 8.5097,
"step": 1000000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999883387595546,
"loss": 8.5082,
"step": 1005000
},
{
"epoch": 0.01,
"learning_rate": 0.000499988222323443,
"loss": 8.4846,
"step": 1010000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999881053089287,
"loss": 8.4732,
"step": 1015000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999879877160121,
"loss": 8.443,
"step": 1020000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999878695446934,
"loss": 8.4313,
"step": 1025000
},
{
"epoch": 0.01,
"learning_rate": 0.000499987750794973,
"loss": 8.4374,
"step": 1030000
},
{
"epoch": 0.01,
"learning_rate": 0.000499987631466851,
"loss": 8.4242,
"step": 1035000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999875115603279,
"loss": 8.4282,
"step": 1040000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999873910754036,
"loss": 8.4442,
"step": 1045000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999872700120788,
"loss": 8.5036,
"step": 1050000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999871483703536,
"loss": 8.4339,
"step": 1055000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999870261502281,
"loss": 8.399,
"step": 1060000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999869033517028,
"loss": 8.4156,
"step": 1065000
},
{
"epoch": 0.01,
"learning_rate": 0.000499986779974778,
"loss": 8.478,
"step": 1070000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999866560194539,
"loss": 8.5151,
"step": 1075000
},
{
"epoch": 0.01,
"learning_rate": 0.000499986531485731,
"loss": 8.5111,
"step": 1080000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999864063736091,
"loss": 8.5124,
"step": 1085000
},
{
"epoch": 0.01,
"learning_rate": 0.000499986280683089,
"loss": 8.4821,
"step": 1090000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999861544141706,
"loss": 8.4595,
"step": 1095000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999860275668545,
"loss": 8.4339,
"step": 1100000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999859001411409,
"loss": 8.3923,
"step": 1105000
},
{
"epoch": 0.01,
"learning_rate": 0.00049998577213703,
"loss": 8.3647,
"step": 1110000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999856435545222,
"loss": 8.3865,
"step": 1115000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999855143936176,
"loss": 8.4136,
"step": 1120000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999853846543169,
"loss": 8.4033,
"step": 1125000
},
{
"epoch": 0.01,
"learning_rate": 0.00049998525433662,
"loss": 8.4242,
"step": 1130000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999851234405274,
"loss": 8.3723,
"step": 1135000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999849919660393,
"loss": 8.4118,
"step": 1140000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999848599131562,
"loss": 8.3804,
"step": 1145000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999847272818781,
"loss": 8.4146,
"step": 1150000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999845940722056,
"loss": 8.4656,
"step": 1155000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999844602841388,
"loss": 8.4474,
"step": 1160000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999843259176781,
"loss": 8.4551,
"step": 1165000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999841909728239,
"loss": 8.4472,
"step": 1170000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999840554495763,
"loss": 8.4341,
"step": 1175000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999839193479358,
"loss": 8.4335,
"step": 1180000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999837826679027,
"loss": 8.4043,
"step": 1185000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999836454094771,
"loss": 8.3666,
"step": 1190000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999835075726595,
"loss": 8.3444,
"step": 1195000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999833691574503,
"loss": 8.3216,
"step": 1200000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999832301638497,
"loss": 8.3572,
"step": 1205000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999830905918581,
"loss": 8.3965,
"step": 1210000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999829504414756,
"loss": 8.4237,
"step": 1215000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999828097127029,
"loss": 8.3765,
"step": 1220000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999826684055398,
"loss": 8.3266,
"step": 1225000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999825265199872,
"loss": 8.322,
"step": 1230000
},
{
"epoch": 0.01,
"learning_rate": 0.000499982384056045,
"loss": 8.3367,
"step": 1235000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999822410137139,
"loss": 8.3544,
"step": 1240000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999820973929939,
"loss": 8.3379,
"step": 1245000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999819531938854,
"loss": 8.359,
"step": 1250000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999818084163889,
"loss": 8.3699,
"step": 1255000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999816630605047,
"loss": 8.3825,
"step": 1260000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999815171262328,
"loss": 8.3895,
"step": 1265000
},
{
"epoch": 0.01,
"learning_rate": 0.000499981370613574,
"loss": 8.3835,
"step": 1270000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999812235225284,
"loss": 8.3684,
"step": 1275000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999810758530964,
"loss": 8.3881,
"step": 1280000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999809276052783,
"loss": 8.376,
"step": 1285000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999807787790746,
"loss": 8.4006,
"step": 1290000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999806293744853,
"loss": 8.3775,
"step": 1295000
},
{
"epoch": 0.01,
"learning_rate": 0.000499980479391511,
"loss": 8.3253,
"step": 1300000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999803288301521,
"loss": 8.3663,
"step": 1305000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999801776904088,
"loss": 8.4,
"step": 1310000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999800259722815,
"loss": 8.3802,
"step": 1315000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999798736757706,
"loss": 8.3608,
"step": 1320000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999797208008763,
"loss": 8.3663,
"step": 1325000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999795673475992,
"loss": 8.3449,
"step": 1330000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999794133159394,
"loss": 8.3615,
"step": 1335000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999792587058974,
"loss": 8.3849,
"step": 1340000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999791035174736,
"loss": 8.3867,
"step": 1345000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999789477506682,
"loss": 8.3461,
"step": 1350000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999787914054816,
"loss": 8.2994,
"step": 1355000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999786344819144,
"loss": 8.3118,
"step": 1360000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999784769799666,
"loss": 8.3385,
"step": 1365000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999783188996388,
"loss": 8.3052,
"step": 1370000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999781602409313,
"loss": 8.3561,
"step": 1375000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999780010038445,
"loss": 8.3534,
"step": 1380000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999778411883786,
"loss": 8.3714,
"step": 1385000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999776807945342,
"loss": 8.3757,
"step": 1390000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999775198223117,
"loss": 8.3769,
"step": 1395000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999773582717112,
"loss": 8.3468,
"step": 1400000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999771961427332,
"loss": 8.3378,
"step": 1405000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999770334353782,
"loss": 8.3782,
"step": 1410000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999768701496464,
"loss": 8.3934,
"step": 1415000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999767062855384,
"loss": 8.3977,
"step": 1420000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999765418430543,
"loss": 8.3509,
"step": 1425000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999763768221946,
"loss": 8.3453,
"step": 1430000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999762112229598,
"loss": 8.2951,
"step": 1435000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999760450453501,
"loss": 8.3644,
"step": 1440000
},
{
"epoch": 0.01,
"learning_rate": 0.000499975878289366,
"loss": 8.3911,
"step": 1445000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999757109550078,
"loss": 8.3925,
"step": 1450000
},
{
"epoch": 0.01,
"learning_rate": 0.000499975543042276,
"loss": 8.3537,
"step": 1455000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999753745511709,
"loss": 8.379,
"step": 1460000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999752054816929,
"loss": 8.3786,
"step": 1465000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999750358338425,
"loss": 8.3861,
"step": 1470000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999748656076198,
"loss": 8.4382,
"step": 1475000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999746948030256,
"loss": 8.4304,
"step": 1480000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999745234200599,
"loss": 8.4402,
"step": 1485000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999743514587234,
"loss": 8.3865,
"step": 1490000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999741789190165,
"loss": 8.4104,
"step": 1495000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999740058009392,
"loss": 8.4022,
"step": 1500000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999738321044923,
"loss": 8.4134,
"step": 1505000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999736578296762,
"loss": 8.4199,
"step": 1510000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999734829764911,
"loss": 8.4059,
"step": 1515000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999733075449375,
"loss": 8.3262,
"step": 1520000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999731315350158,
"loss": 8.2811,
"step": 1525000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999729549467263,
"loss": 8.2573,
"step": 1530000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999727777800696,
"loss": 8.3032,
"step": 1535000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999726000350461,
"loss": 8.3267,
"step": 1540000
},
{
"epoch": 0.01,
"learning_rate": 0.000499972421711656,
"loss": 8.3838,
"step": 1545000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999722428098999,
"loss": 8.38,
"step": 1550000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999720633297782,
"loss": 8.3361,
"step": 1555000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999718832712913,
"loss": 8.3276,
"step": 1560000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999717026344394,
"loss": 8.294,
"step": 1565000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999715214192233,
"loss": 8.3313,
"step": 1570000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999713396256432,
"loss": 8.37,
"step": 1575000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999711572536995,
"loss": 8.3763,
"step": 1580000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999709743033928,
"loss": 8.4021,
"step": 1585000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999707907747233,
"loss": 8.3807,
"step": 1590000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999706066676915,
"loss": 8.4022,
"step": 1595000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999704219822979,
"loss": 8.4175,
"step": 1600000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999702367185429,
"loss": 8.3805,
"step": 1605000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999700508764267,
"loss": 8.3644,
"step": 1610000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999698644559501,
"loss": 8.3482,
"step": 1615000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999696774571134,
"loss": 8.3087,
"step": 1620000
},
{
"epoch": 0.01,
"learning_rate": 0.000499969489879917,
"loss": 8.2912,
"step": 1625000
},
{
"epoch": 0.01,
"learning_rate": 0.0004999693017243612,
"loss": 8.2988,
"step": 1630000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999691129904467,
"loss": 8.2964,
"step": 1635000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999689236781737,
"loss": 8.3291,
"step": 1640000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999687337875427,
"loss": 8.3241,
"step": 1645000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999685433185544,
"loss": 8.3738,
"step": 1650000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999683522712088,
"loss": 8.3744,
"step": 1655000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999681606455066,
"loss": 8.3795,
"step": 1660000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999679684414483,
"loss": 8.4056,
"step": 1665000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999677756590342,
"loss": 8.3822,
"step": 1670000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999675822982648,
"loss": 8.3657,
"step": 1675000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999673883591406,
"loss": 8.3292,
"step": 1680000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999671938416619,
"loss": 8.3594,
"step": 1685000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999669987458292,
"loss": 8.3728,
"step": 1690000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999668030716431,
"loss": 8.3581,
"step": 1695000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999666068191039,
"loss": 8.3038,
"step": 1700000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999664099882121,
"loss": 8.3271,
"step": 1705000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999662125789682,
"loss": 8.307,
"step": 1710000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999660145913726,
"loss": 8.3064,
"step": 1715000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999658160254258,
"loss": 8.3224,
"step": 1720000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999656168811282,
"loss": 8.3283,
"step": 1725000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999654171584802,
"loss": 8.2952,
"step": 1730000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999652168574825,
"loss": 8.263,
"step": 1735000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999650159781353,
"loss": 8.2434,
"step": 1740000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999648145204393,
"loss": 8.2752,
"step": 1745000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999646124843948,
"loss": 8.2715,
"step": 1750000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999644098700023,
"loss": 8.2639,
"step": 1755000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999642066772622,
"loss": 8.2642,
"step": 1760000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999640029061752,
"loss": 8.2577,
"step": 1765000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999637985567415,
"loss": 8.2835,
"step": 1770000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999635936289618,
"loss": 8.3218,
"step": 1775000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999633881228365,
"loss": 8.3525,
"step": 1780000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999631820383659,
"loss": 8.3198,
"step": 1785000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999629753755507,
"loss": 8.3218,
"step": 1790000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999627681343913,
"loss": 8.2785,
"step": 1795000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999625603148882,
"loss": 8.2772,
"step": 1800000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999623519170419,
"loss": 8.3014,
"step": 1805000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999621429408528,
"loss": 8.3073,
"step": 1810000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999619333863214,
"loss": 8.2848,
"step": 1815000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999617232534483,
"loss": 8.3051,
"step": 1820000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999615125422339,
"loss": 8.2821,
"step": 1825000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999613012526786,
"loss": 8.2864,
"step": 1830000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999610893847829,
"loss": 8.293,
"step": 1835000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999608769385475,
"loss": 8.261,
"step": 1840000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999606639139728,
"loss": 8.2727,
"step": 1845000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999604503110592,
"loss": 8.283,
"step": 1850000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999602361298073,
"loss": 8.2857,
"step": 1855000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999600213702174,
"loss": 8.3006,
"step": 1860000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999598060322902,
"loss": 8.3414,
"step": 1865000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999595901160262,
"loss": 8.3064,
"step": 1870000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999593736214257,
"loss": 8.2756,
"step": 1875000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999591565484895,
"loss": 8.2984,
"step": 1880000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999589388972178,
"loss": 8.3053,
"step": 1885000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999587206676113,
"loss": 8.2835,
"step": 1890000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999585018596705,
"loss": 8.289,
"step": 1895000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999582824733958,
"loss": 8.2592,
"step": 1900000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999580625087878,
"loss": 8.2264,
"step": 1905000
},
{
"epoch": 0.02,
"learning_rate": 0.000499957841965847,
"loss": 8.2443,
"step": 1910000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999576208445738,
"loss": 8.244,
"step": 1915000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999573991449687,
"loss": 8.2067,
"step": 1920000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999571768670325,
"loss": 8.2082,
"step": 1925000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999569540107654,
"loss": 8.2183,
"step": 1930000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999567305761681,
"loss": 8.2743,
"step": 1935000
},
{
"epoch": 0.02,
"learning_rate": 0.000499956506563241,
"loss": 8.2565,
"step": 1940000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999562819719847,
"loss": 8.2867,
"step": 1945000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999560568023997,
"loss": 8.3107,
"step": 1950000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999558310544865,
"loss": 8.3057,
"step": 1955000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999556047282456,
"loss": 8.3432,
"step": 1960000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999553778236776,
"loss": 8.3441,
"step": 1965000
},
{
"epoch": 0.02,
"learning_rate": 0.000499955150340783,
"loss": 8.3092,
"step": 1970000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999549222795622,
"loss": 8.2568,
"step": 1975000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999546936400159,
"loss": 8.2439,
"step": 1980000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999544644221446,
"loss": 8.2598,
"step": 1985000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999542346259488,
"loss": 8.2371,
"step": 1990000
},
{
"epoch": 0.02,
"learning_rate": 0.000499954004251429,
"loss": 8.2255,
"step": 1995000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999537732985857,
"loss": 8.2308,
"step": 2000000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999535417674196,
"loss": 8.2345,
"step": 2005000
},
{
"epoch": 0.02,
"learning_rate": 0.000499953309657931,
"loss": 8.2342,
"step": 2010000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999530769701207,
"loss": 8.2473,
"step": 2015000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999528437039891,
"loss": 8.2992,
"step": 2020000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999526098595367,
"loss": 8.2668,
"step": 2025000
},
{
"epoch": 0.02,
"learning_rate": 0.000499952375436764,
"loss": 8.2865,
"step": 2030000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999521404356718,
"loss": 8.2854,
"step": 2035000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999519048562603,
"loss": 8.2697,
"step": 2040000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999516686985304,
"loss": 8.2916,
"step": 2045000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999514319624823,
"loss": 8.2798,
"step": 2050000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999511946481167,
"loss": 8.299,
"step": 2055000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999509567554343,
"loss": 8.2762,
"step": 2060000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999507182844355,
"loss": 8.247,
"step": 2065000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999504792351209,
"loss": 8.2446,
"step": 2070000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999502396074908,
"loss": 8.2318,
"step": 2075000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999499994015462,
"loss": 8.266,
"step": 2080000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999497586172873,
"loss": 8.2625,
"step": 2085000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999495172547148,
"loss": 8.2462,
"step": 2090000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999492753138293,
"loss": 8.2909,
"step": 2095000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999490327946312,
"loss": 8.2519,
"step": 2100000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999487896971212,
"loss": 8.2138,
"step": 2105000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999485460212998,
"loss": 8.236,
"step": 2110000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999483017671676,
"loss": 8.2644,
"step": 2115000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999480569347252,
"loss": 8.317,
"step": 2120000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999478115239732,
"loss": 8.292,
"step": 2125000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999475655349119,
"loss": 8.2746,
"step": 2130000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999473189675422,
"loss": 8.2593,
"step": 2135000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999470718218645,
"loss": 8.2784,
"step": 2140000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999468240978794,
"loss": 8.2614,
"step": 2145000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999465757955875,
"loss": 8.2859,
"step": 2150000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999463269149892,
"loss": 8.2754,
"step": 2155000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999460774560853,
"loss": 8.2623,
"step": 2160000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999458274188764,
"loss": 8.2901,
"step": 2165000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999455768033628,
"loss": 8.2651,
"step": 2170000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999453256095454,
"loss": 8.2527,
"step": 2175000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999450738374246,
"loss": 8.2246,
"step": 2180000
},
{
"epoch": 0.02,
"learning_rate": 0.000499944821487001,
"loss": 8.222,
"step": 2185000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999445685582752,
"loss": 8.1857,
"step": 2190000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999443150512479,
"loss": 8.1593,
"step": 2195000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999440609659195,
"loss": 8.1482,
"step": 2200000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999438063022906,
"loss": 8.2034,
"step": 2205000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999435510603619,
"loss": 8.2233,
"step": 2210000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999432952401341,
"loss": 8.2332,
"step": 2215000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999430388416074,
"loss": 8.2519,
"step": 2220000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999427818647827,
"loss": 8.2316,
"step": 2225000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999425243096605,
"loss": 8.2477,
"step": 2230000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999422661762416,
"loss": 8.2251,
"step": 2235000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999420074645262,
"loss": 8.2782,
"step": 2240000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999417481745153,
"loss": 8.26,
"step": 2245000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999414883062092,
"loss": 8.2635,
"step": 2250000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999412278596087,
"loss": 8.2708,
"step": 2255000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999409668347142,
"loss": 8.2981,
"step": 2260000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999407052315265,
"loss": 8.2581,
"step": 2265000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999404430500461,
"loss": 8.2532,
"step": 2270000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999401802902737,
"loss": 8.2697,
"step": 2275000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999399169522098,
"loss": 8.2703,
"step": 2280000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999396530358551,
"loss": 8.2604,
"step": 2285000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999393885412101,
"loss": 8.2728,
"step": 2290000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999391234682756,
"loss": 8.2636,
"step": 2295000
},
{
"epoch": 0.02,
"learning_rate": 0.000499938857817052,
"loss": 8.2473,
"step": 2300000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999385915875399,
"loss": 8.2496,
"step": 2305000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999383247797401,
"loss": 8.2689,
"step": 2310000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999380573936532,
"loss": 8.303,
"step": 2315000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999377894292798,
"loss": 8.2291,
"step": 2320000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999375208866203,
"loss": 8.2713,
"step": 2325000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999372517656756,
"loss": 8.3408,
"step": 2330000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999369820664463,
"loss": 8.2794,
"step": 2335000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999367117889328,
"loss": 8.3002,
"step": 2340000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999364409331358,
"loss": 8.2982,
"step": 2345000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999361694990562,
"loss": 8.2878,
"step": 2350000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999358974866943,
"loss": 8.2482,
"step": 2355000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999356248960509,
"loss": 8.2382,
"step": 2360000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999353517271267,
"loss": 8.2243,
"step": 2365000
},
{
"epoch": 0.02,
"learning_rate": 0.000499935077979922,
"loss": 8.1924,
"step": 2370000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999348036544378,
"loss": 8.1962,
"step": 2375000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999345287506745,
"loss": 8.1667,
"step": 2380000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999342532686328,
"loss": 8.1605,
"step": 2385000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999339772083134,
"loss": 8.1692,
"step": 2390000
},
{
"epoch": 0.02,
"learning_rate": 0.000499933700569717,
"loss": 8.1645,
"step": 2395000
},
{
"epoch": 0.02,
"learning_rate": 0.000499933423352844,
"loss": 8.1408,
"step": 2400000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999331455576953,
"loss": 8.134,
"step": 2405000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999328671842712,
"loss": 8.1468,
"step": 2410000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999325882325728,
"loss": 8.113,
"step": 2415000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999323087026004,
"loss": 8.1044,
"step": 2420000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999320285943548,
"loss": 8.0701,
"step": 2425000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999317479078366,
"loss": 8.0466,
"step": 2430000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999314666430465,
"loss": 8.0082,
"step": 2435000
},
{
"epoch": 0.02,
"learning_rate": 0.000499931184799985,
"loss": 8.0217,
"step": 2440000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999309023786529,
"loss": 8.0033,
"step": 2445000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999306193790509,
"loss": 8.0105,
"step": 2450000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999303358011794,
"loss": 7.9985,
"step": 2455000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999300516450393,
"loss": 8.0164,
"step": 2460000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999297669106312,
"loss": 8.0093,
"step": 2465000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999294815979557,
"loss": 8.0111,
"step": 2470000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999291957070135,
"loss": 8.0337,
"step": 2475000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999289092378053,
"loss": 8.0641,
"step": 2480000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999286221903317,
"loss": 7.9939,
"step": 2485000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999283345645934,
"loss": 8.0228,
"step": 2490000
},
{
"epoch": 0.02,
"learning_rate": 0.000499928046360591,
"loss": 8.0458,
"step": 2495000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999277575783253,
"loss": 8.0302,
"step": 2500000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999274682177968,
"loss": 8.0451,
"step": 2505000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999271782790063,
"loss": 8.0334,
"step": 2510000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999268877619545,
"loss": 8.0034,
"step": 2515000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999265966666419,
"loss": 7.9877,
"step": 2520000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999263049930692,
"loss": 8.0222,
"step": 2525000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999260127412374,
"loss": 7.993,
"step": 2530000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999257199111468,
"loss": 7.9746,
"step": 2535000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999254265027982,
"loss": 7.9478,
"step": 2540000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999251325161922,
"loss": 7.9584,
"step": 2545000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999248379513296,
"loss": 7.9407,
"step": 2550000
},
{
"epoch": 0.02,
"learning_rate": 0.000499924542808211,
"loss": 7.8694,
"step": 2555000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999242470868372,
"loss": 7.8591,
"step": 2560000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999239507872088,
"loss": 7.8295,
"step": 2565000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999236539093266,
"loss": 7.7761,
"step": 2570000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999233564531911,
"loss": 7.7571,
"step": 2575000
},
{
"epoch": 0.02,
"learning_rate": 0.000499923058418803,
"loss": 7.7923,
"step": 2580000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999227598061631,
"loss": 7.7985,
"step": 2585000
},
{
"epoch": 0.02,
"learning_rate": 0.000499922460615272,
"loss": 7.7888,
"step": 2590000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999221608461306,
"loss": 7.7891,
"step": 2595000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999218604987393,
"loss": 7.7764,
"step": 2600000
},
{
"epoch": 0.02,
"learning_rate": 0.000499921559573099,
"loss": 7.7866,
"step": 2605000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999212580692103,
"loss": 7.779,
"step": 2610000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999209559870738,
"loss": 7.7859,
"step": 2615000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999206533266905,
"loss": 7.7553,
"step": 2620000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999203500880609,
"loss": 7.723,
"step": 2625000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999200462711857,
"loss": 7.6725,
"step": 2630000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999197418760656,
"loss": 7.7191,
"step": 2635000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999194369027014,
"loss": 7.7714,
"step": 2640000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999191313510937,
"loss": 7.7341,
"step": 2645000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999188252212432,
"loss": 7.7491,
"step": 2650000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999185185131507,
"loss": 7.7664,
"step": 2655000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999182112268169,
"loss": 7.7736,
"step": 2660000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999179033622424,
"loss": 7.7262,
"step": 2665000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999175949194281,
"loss": 7.6883,
"step": 2670000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999172858983745,
"loss": 7.7069,
"step": 2675000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999169762990825,
"loss": 7.6417,
"step": 2680000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999166661215527,
"loss": 7.6366,
"step": 2685000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999163553657858,
"loss": 7.6692,
"step": 2690000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999160440317825,
"loss": 7.6494,
"step": 2695000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999157321195438,
"loss": 7.6101,
"step": 2700000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999154196290701,
"loss": 7.6184,
"step": 2705000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999151065603622,
"loss": 7.6237,
"step": 2710000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999147929134209,
"loss": 7.6552,
"step": 2715000
},
{
"epoch": 0.02,
"learning_rate": 0.0004999144786882469,
"loss": 7.6579,
"step": 2720000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999141638848409,
"loss": 7.6001,
"step": 2725000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999138485032035,
"loss": 7.5975,
"step": 2730000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999135325433358,
"loss": 7.6248,
"step": 2735000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999132160052382,
"loss": 7.6358,
"step": 2740000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999128988889115,
"loss": 7.5984,
"step": 2745000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999125811943565,
"loss": 7.5925,
"step": 2750000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999122629215739,
"loss": 7.6375,
"step": 2755000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999119440705644,
"loss": 7.6285,
"step": 2760000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999116246413288,
"loss": 7.6137,
"step": 2765000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999113046338678,
"loss": 7.6145,
"step": 2770000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999109840481822,
"loss": 7.6126,
"step": 2775000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999106628842726,
"loss": 7.5928,
"step": 2780000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999103411421399,
"loss": 7.5962,
"step": 2785000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999100188217848,
"loss": 7.5763,
"step": 2790000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999096959232082,
"loss": 7.5711,
"step": 2795000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999093724464104,
"loss": 7.5754,
"step": 2800000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999090483913926,
"loss": 7.5963,
"step": 2805000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999087237581553,
"loss": 7.581,
"step": 2810000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999083985466992,
"loss": 7.5872,
"step": 2815000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999080727570254,
"loss": 7.5772,
"step": 2820000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999077463891343,
"loss": 7.5629,
"step": 2825000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999074194430268,
"loss": 7.5575,
"step": 2830000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999070919187037,
"loss": 7.5157,
"step": 2835000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999067638161657,
"loss": 7.5362,
"step": 2840000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999064351354135,
"loss": 7.5166,
"step": 2845000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999061058764481,
"loss": 7.5412,
"step": 2850000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999057760392699,
"loss": 7.5056,
"step": 2855000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999054456238798,
"loss": 7.5058,
"step": 2860000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999051146302787,
"loss": 7.4933,
"step": 2865000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999047830584674,
"loss": 7.4638,
"step": 2870000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999044509084463,
"loss": 7.4749,
"step": 2875000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999041181802165,
"loss": 7.4796,
"step": 2880000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999037848737787,
"loss": 7.4789,
"step": 2885000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999034509891337,
"loss": 7.4779,
"step": 2890000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999031165262822,
"loss": 7.4957,
"step": 2895000
},
{
"epoch": 0.03,
"learning_rate": 0.000499902781485225,
"loss": 7.4947,
"step": 2900000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999024458659628,
"loss": 7.5077,
"step": 2905000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999021096684966,
"loss": 7.491,
"step": 2910000
},
{
"epoch": 0.03,
"learning_rate": 0.000499901772892827,
"loss": 7.4886,
"step": 2915000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999014355389547,
"loss": 7.4775,
"step": 2920000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999010976068807,
"loss": 7.4578,
"step": 2925000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999007590966056,
"loss": 7.4282,
"step": 2930000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999004200081303,
"loss": 7.4256,
"step": 2935000
},
{
"epoch": 0.03,
"learning_rate": 0.0004999000803414556,
"loss": 7.4379,
"step": 2940000
},
{
"epoch": 0.03,
"learning_rate": 0.000499899740096582,
"loss": 7.4527,
"step": 2945000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998993992735107,
"loss": 7.4464,
"step": 2950000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998990578722422,
"loss": 7.4258,
"step": 2955000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998987158927775,
"loss": 7.4279,
"step": 2960000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998983733351172,
"loss": 7.4214,
"step": 2965000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998980301992622,
"loss": 7.3997,
"step": 2970000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998976864852133,
"loss": 7.3827,
"step": 2975000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998973421929711,
"loss": 7.3728,
"step": 2980000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998969973225368,
"loss": 7.3785,
"step": 2985000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998966518739109,
"loss": 7.3772,
"step": 2990000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998963058470941,
"loss": 7.371,
"step": 2995000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998959592420874,
"loss": 7.375,
"step": 3000000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998956120588916,
"loss": 7.3659,
"step": 3005000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998952642975076,
"loss": 7.3832,
"step": 3010000
},
{
"epoch": 0.03,
"learning_rate": 0.000499894915957936,
"loss": 7.3574,
"step": 3015000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998945670401776,
"loss": 7.3203,
"step": 3020000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998942175442332,
"loss": 7.3212,
"step": 3025000
},
{
"epoch": 0.03,
"learning_rate": 0.000499893867470104,
"loss": 7.3108,
"step": 3030000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998935168177901,
"loss": 7.3063,
"step": 3035000
},
{
"epoch": 0.03,
"learning_rate": 0.000499893165587293,
"loss": 7.2776,
"step": 3040000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998928137786131,
"loss": 7.2842,
"step": 3045000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998924613917513,
"loss": 7.2903,
"step": 3050000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998921084267086,
"loss": 7.2383,
"step": 3055000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998917548834856,
"loss": 7.2406,
"step": 3060000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998914007620832,
"loss": 7.2557,
"step": 3065000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998910460625021,
"loss": 7.2607,
"step": 3070000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998906907847433,
"loss": 7.2446,
"step": 3075000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998903349288077,
"loss": 7.2361,
"step": 3080000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998899784946957,
"loss": 7.2172,
"step": 3085000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998896214824086,
"loss": 7.2064,
"step": 3090000
},
{
"epoch": 0.03,
"learning_rate": 0.000499889263891947,
"loss": 7.2123,
"step": 3095000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998889057233117,
"loss": 7.165,
"step": 3100000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998885469765036,
"loss": 7.1333,
"step": 3105000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998881876515234,
"loss": 7.1097,
"step": 3110000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998878277483722,
"loss": 7.1224,
"step": 3115000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998874672670505,
"loss": 7.1403,
"step": 3120000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998871062075595,
"loss": 7.1434,
"step": 3125000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998867445698998,
"loss": 7.1053,
"step": 3130000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998863823540723,
"loss": 7.0769,
"step": 3135000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998860195600777,
"loss": 7.061,
"step": 3140000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998856561879171,
"loss": 7.0489,
"step": 3145000
},
{
"epoch": 0.03,
"learning_rate": 0.000499885292237591,
"loss": 7.0647,
"step": 3150000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998849277091006,
"loss": 7.0588,
"step": 3155000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998845626024465,
"loss": 7.0329,
"step": 3160000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998841969176297,
"loss": 7.0196,
"step": 3165000
},
{
"epoch": 0.03,
"learning_rate": 0.000499883830654651,
"loss": 7.0169,
"step": 3170000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998834638135112,
"loss": 7.0068,
"step": 3175000
},
{
"epoch": 0.03,
"learning_rate": 0.000499883096394211,
"loss": 6.9748,
"step": 3180000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998827283967517,
"loss": 6.9723,
"step": 3185000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998823598211337,
"loss": 6.9527,
"step": 3190000
},
{
"epoch": 0.03,
"learning_rate": 0.000499881990667358,
"loss": 6.9318,
"step": 3195000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998816209354256,
"loss": 6.9324,
"step": 3200000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998812506253371,
"loss": 6.9347,
"step": 3205000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998808797370936,
"loss": 6.9538,
"step": 3210000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998805082706958,
"loss": 6.93,
"step": 3215000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998801362261446,
"loss": 6.9095,
"step": 3220000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998797636034408,
"loss": 6.8771,
"step": 3225000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998793904025855,
"loss": 6.8535,
"step": 3230000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998790166235794,
"loss": 6.8399,
"step": 3235000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998786422664232,
"loss": 6.8154,
"step": 3240000
},
{
"epoch": 0.03,
"learning_rate": 0.000499878267331118,
"loss": 6.8082,
"step": 3245000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998778918176647,
"loss": 6.7983,
"step": 3250000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998775157260639,
"loss": 6.7802,
"step": 3255000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998771390563167,
"loss": 6.7625,
"step": 3260000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998767618084239,
"loss": 6.747,
"step": 3265000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998763839823864,
"loss": 6.7384,
"step": 3270000
},
{
"epoch": 0.03,
"learning_rate": 0.000499876005578205,
"loss": 6.7496,
"step": 3275000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998756265958806,
"loss": 6.7399,
"step": 3280000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998752470354143,
"loss": 6.7096,
"step": 3285000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998748668968066,
"loss": 6.6752,
"step": 3290000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998744861800585,
"loss": 6.6532,
"step": 3295000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998741048851711,
"loss": 6.6338,
"step": 3300000
},
{
"epoch": 0.03,
"learning_rate": 0.000499873723012145,
"loss": 6.5932,
"step": 3305000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998733405609813,
"loss": 6.5927,
"step": 3310000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998729575316808,
"loss": 6.5595,
"step": 3315000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998725739242443,
"loss": 6.5342,
"step": 3320000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998721897386729,
"loss": 6.5184,
"step": 3325000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998718049749673,
"loss": 6.5091,
"step": 3330000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998714196331284,
"loss": 6.4739,
"step": 3335000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998710337131571,
"loss": 6.44,
"step": 3340000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998706472150545,
"loss": 6.431,
"step": 3345000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998702601388211,
"loss": 6.3974,
"step": 3350000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998698724844582,
"loss": 6.3681,
"step": 3355000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998694842519664,
"loss": 6.3462,
"step": 3360000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998690954413468,
"loss": 6.3171,
"step": 3365000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998687060526002,
"loss": 6.3039,
"step": 3370000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998683160857275,
"loss": 6.2849,
"step": 3375000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998679255407295,
"loss": 6.2751,
"step": 3380000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998675344176074,
"loss": 6.2425,
"step": 3385000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998671427163619,
"loss": 6.2323,
"step": 3390000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998667504369939,
"loss": 6.2101,
"step": 3395000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998663575795043,
"loss": 6.206,
"step": 3400000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998659641438941,
"loss": 6.189,
"step": 3405000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998655701301642,
"loss": 6.1647,
"step": 3410000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998651755383154,
"loss": 6.159,
"step": 3415000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998647803683488,
"loss": 6.1456,
"step": 3420000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998643846202649,
"loss": 6.1323,
"step": 3425000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998639882940652,
"loss": 6.1335,
"step": 3430000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998635913897502,
"loss": 6.1191,
"step": 3435000
},
{
"epoch": 0.03,
"learning_rate": 0.000499863193907321,
"loss": 6.0935,
"step": 3440000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998627958467786,
"loss": 6.0872,
"step": 3445000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998623972081235,
"loss": 6.0752,
"step": 3450000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998619979913571,
"loss": 6.0658,
"step": 3455000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998615981964802,
"loss": 6.0556,
"step": 3460000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998611978234935,
"loss": 6.0453,
"step": 3465000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998607968723981,
"loss": 6.0446,
"step": 3470000
},
{
"epoch": 0.03,
"learning_rate": 0.000499860395343195,
"loss": 6.0357,
"step": 3475000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998599932358851,
"loss": 6.044,
"step": 3480000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998595905504692,
"loss": 6.0299,
"step": 3485000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998591872869483,
"loss": 6.0288,
"step": 3490000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998587834453233,
"loss": 6.0161,
"step": 3495000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998583790255952,
"loss": 6.0054,
"step": 3500000
},
{
"epoch": 0.03,
"learning_rate": 0.000499857974027765,
"loss": 5.9942,
"step": 3505000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998575684518334,
"loss": 5.9857,
"step": 3510000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998571622978016,
"loss": 5.9877,
"step": 3515000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998567555656704,
"loss": 5.9862,
"step": 3520000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998563482554407,
"loss": 5.9698,
"step": 3525000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998559403671136,
"loss": 5.9642,
"step": 3530000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998555319006898,
"loss": 5.9608,
"step": 3535000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998551228561707,
"loss": 5.9667,
"step": 3540000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998547132335566,
"loss": 5.9588,
"step": 3545000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998543030328489,
"loss": 5.946,
"step": 3550000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998538922540485,
"loss": 5.9504,
"step": 3555000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998534808971563,
"loss": 5.9391,
"step": 3560000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998530689621733,
"loss": 5.9356,
"step": 3565000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998526564491002,
"loss": 5.9244,
"step": 3570000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998522433579384,
"loss": 5.9272,
"step": 3575000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998518296886885,
"loss": 5.925,
"step": 3580000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998514154413515,
"loss": 5.9168,
"step": 3585000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998510006159284,
"loss": 5.9175,
"step": 3590000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998505852124202,
"loss": 5.9058,
"step": 3595000
},
{
"epoch": 0.03,
"learning_rate": 0.000499850169230828,
"loss": 5.8985,
"step": 3600000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998497526711524,
"loss": 5.9005,
"step": 3605000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998493355333946,
"loss": 5.9032,
"step": 3610000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998489178175557,
"loss": 5.8991,
"step": 3615000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998484995236363,
"loss": 5.8947,
"step": 3620000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998480806516377,
"loss": 5.8731,
"step": 3625000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998476612015606,
"loss": 5.8891,
"step": 3630000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998472411734063,
"loss": 5.8775,
"step": 3635000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998468205671755,
"loss": 5.8829,
"step": 3640000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998463993828692,
"loss": 5.8646,
"step": 3645000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998459776204884,
"loss": 5.8804,
"step": 3650000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998455552800343,
"loss": 5.8714,
"step": 3655000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998451323615075,
"loss": 5.8705,
"step": 3660000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998447088649092,
"loss": 5.866,
"step": 3665000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998442847902404,
"loss": 5.8661,
"step": 3670000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998438601375018,
"loss": 5.8625,
"step": 3675000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998434349066948,
"loss": 5.8624,
"step": 3680000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998430090978202,
"loss": 5.8611,
"step": 3685000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998425827108789,
"loss": 5.8612,
"step": 3690000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998421557458719,
"loss": 5.8462,
"step": 3695000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998417282028005,
"loss": 5.8616,
"step": 3700000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998413000816651,
"loss": 5.8545,
"step": 3705000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998408713824673,
"loss": 5.8612,
"step": 3710000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998404421052077,
"loss": 5.8517,
"step": 3715000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998400122498874,
"loss": 5.855,
"step": 3720000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998395818165075,
"loss": 5.842,
"step": 3725000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998391508050687,
"loss": 5.8499,
"step": 3730000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998387192155724,
"loss": 5.8411,
"step": 3735000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998382870480193,
"loss": 5.8442,
"step": 3740000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998378543024105,
"loss": 5.8376,
"step": 3745000
},
{
"epoch": 0.03,
"learning_rate": 0.000499837420978747,
"loss": 5.8465,
"step": 3750000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998369870770298,
"loss": 5.838,
"step": 3755000
},
{
"epoch": 0.03,
"learning_rate": 0.00049983655259726,
"loss": 5.8371,
"step": 3760000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998361175394384,
"loss": 5.8365,
"step": 3765000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998356819035662,
"loss": 5.8392,
"step": 3770000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998352456896442,
"loss": 5.8358,
"step": 3775000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998348088976737,
"loss": 5.8399,
"step": 3780000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998343715276554,
"loss": 5.8297,
"step": 3785000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998339335795905,
"loss": 5.825,
"step": 3790000
},
{
"epoch": 0.03,
"learning_rate": 0.00049983349505348,
"loss": 5.8235,
"step": 3795000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998330559493248,
"loss": 5.8283,
"step": 3800000
},
{
"epoch": 0.03,
"learning_rate": 0.0004998326162671262,
"loss": 5.8185,
"step": 3805000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998321760068848,
"loss": 5.8234,
"step": 3810000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998317351686019,
"loss": 5.8218,
"step": 3815000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998312937522785,
"loss": 5.8146,
"step": 3820000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998308517579155,
"loss": 5.8176,
"step": 3825000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998304091855141,
"loss": 5.8182,
"step": 3830000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998299660350751,
"loss": 5.8208,
"step": 3835000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998295223065998,
"loss": 5.8194,
"step": 3840000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998290780000891,
"loss": 5.829,
"step": 3845000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998286331155438,
"loss": 5.8147,
"step": 3850000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998281876529652,
"loss": 5.8143,
"step": 3855000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998277416123543,
"loss": 5.8108,
"step": 3860000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998272949937122,
"loss": 5.8082,
"step": 3865000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998268477970397,
"loss": 5.8181,
"step": 3870000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998264000223379,
"loss": 5.8044,
"step": 3875000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998259516696081,
"loss": 5.8118,
"step": 3880000
},
{
"epoch": 0.04,
"learning_rate": 0.000499825502738851,
"loss": 5.8059,
"step": 3885000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998250532300678,
"loss": 5.8096,
"step": 3890000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998246031432595,
"loss": 5.8124,
"step": 3895000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998241524784272,
"loss": 5.8068,
"step": 3900000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998237012355719,
"loss": 5.8133,
"step": 3905000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998232494146947,
"loss": 5.8056,
"step": 3910000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998227970157965,
"loss": 5.8104,
"step": 3915000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998223440388784,
"loss": 5.8119,
"step": 3920000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998218904839416,
"loss": 5.8128,
"step": 3925000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998214363509869,
"loss": 5.8021,
"step": 3930000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998209816400156,
"loss": 5.8091,
"step": 3935000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998205263510286,
"loss": 5.7921,
"step": 3940000
},
{
"epoch": 0.04,
"learning_rate": 0.000499820070484027,
"loss": 5.8137,
"step": 3945000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998196140390118,
"loss": 5.8,
"step": 3950000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998191570159842,
"loss": 5.8085,
"step": 3955000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998186994149451,
"loss": 5.8102,
"step": 3960000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998182412358955,
"loss": 5.801,
"step": 3965000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998177824788367,
"loss": 5.7999,
"step": 3970000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998173231437696,
"loss": 5.8047,
"step": 3975000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998168632306954,
"loss": 5.804,
"step": 3980000
},
{
"epoch": 0.04,
"learning_rate": 0.000499816402739615,
"loss": 5.8068,
"step": 3985000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998159416705294,
"loss": 5.7973,
"step": 3990000
},
{
"epoch": 0.04,
"learning_rate": 0.00049981548002344,
"loss": 5.7941,
"step": 3995000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998150177983476,
"loss": 5.7839,
"step": 4000000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998145549952533,
"loss": 5.8023,
"step": 4005000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998140916141582,
"loss": 5.7993,
"step": 4010000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998136276550634,
"loss": 5.7915,
"step": 4015000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998131631179701,
"loss": 5.8068,
"step": 4020000
},
{
"epoch": 0.04,
"learning_rate": 0.000499812698002879,
"loss": 5.7889,
"step": 4025000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998122323097916,
"loss": 5.7873,
"step": 4030000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998117660387086,
"loss": 5.7947,
"step": 4035000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998112991896313,
"loss": 5.7878,
"step": 4040000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998108317625608,
"loss": 5.8034,
"step": 4045000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998103637574981,
"loss": 5.7911,
"step": 4050000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998098951744443,
"loss": 5.7927,
"step": 4055000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998094260134005,
"loss": 5.7973,
"step": 4060000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998089562743678,
"loss": 5.8046,
"step": 4065000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998084859573472,
"loss": 5.7947,
"step": 4070000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998080150623399,
"loss": 5.7934,
"step": 4075000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998075435893467,
"loss": 5.7906,
"step": 4080000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998070715383692,
"loss": 5.7772,
"step": 4085000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998065989094082,
"loss": 5.7962,
"step": 4090000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998061257024647,
"loss": 5.802,
"step": 4095000
},
{
"epoch": 0.04,
"learning_rate": 0.00049980565191754,
"loss": 5.7967,
"step": 4100000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998051775546351,
"loss": 5.7827,
"step": 4105000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998047026137511,
"loss": 5.7844,
"step": 4110000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998042270948891,
"loss": 5.7964,
"step": 4115000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998037509980502,
"loss": 5.8,
"step": 4120000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998032743232355,
"loss": 5.7951,
"step": 4125000
},
{
"epoch": 0.04,
"learning_rate": 0.000499802797070446,
"loss": 5.8,
"step": 4130000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998023192396831,
"loss": 5.7958,
"step": 4135000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998018408309476,
"loss": 5.8041,
"step": 4140000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998013618442408,
"loss": 5.783,
"step": 4145000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998008822795636,
"loss": 5.7961,
"step": 4150000
},
{
"epoch": 0.04,
"learning_rate": 0.0004998004021369174,
"loss": 5.7924,
"step": 4155000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997999214163031,
"loss": 5.7976,
"step": 4160000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997994401177219,
"loss": 5.798,
"step": 4165000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997989582411748,
"loss": 5.7874,
"step": 4170000
},
{
"epoch": 0.04,
"learning_rate": 0.000499798475786663,
"loss": 5.7848,
"step": 4175000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997979927541876,
"loss": 5.787,
"step": 4180000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997975091437497,
"loss": 5.7947,
"step": 4185000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997970249553505,
"loss": 5.7894,
"step": 4190000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997965401889911,
"loss": 5.7835,
"step": 4195000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997960548446725,
"loss": 5.7828,
"step": 4200000
},
{
"epoch": 0.04,
"learning_rate": 0.000499795568922396,
"loss": 5.7876,
"step": 4205000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997950824221626,
"loss": 5.7814,
"step": 4210000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997945953439735,
"loss": 5.7835,
"step": 4215000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997941076878297,
"loss": 5.7961,
"step": 4220000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997936194537325,
"loss": 5.7924,
"step": 4225000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997931306416828,
"loss": 5.7789,
"step": 4230000
},
{
"epoch": 0.04,
"learning_rate": 0.000499792641251682,
"loss": 5.7848,
"step": 4235000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997921512837311,
"loss": 5.7747,
"step": 4240000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997916607378312,
"loss": 5.7827,
"step": 4245000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997911696139835,
"loss": 5.7856,
"step": 4250000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997906779121892,
"loss": 5.783,
"step": 4255000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997901856324493,
"loss": 5.7864,
"step": 4260000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997896927747649,
"loss": 5.7805,
"step": 4265000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997891993391373,
"loss": 5.7904,
"step": 4270000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997887053255676,
"loss": 5.7943,
"step": 4275000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997882107340569,
"loss": 5.7962,
"step": 4280000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997877155646063,
"loss": 5.7891,
"step": 4285000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997872198172171,
"loss": 5.7867,
"step": 4290000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997867234918902,
"loss": 5.7902,
"step": 4295000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997862265886271,
"loss": 5.7774,
"step": 4300000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997857291074286,
"loss": 5.7809,
"step": 4305000
},
{
"epoch": 0.04,
"learning_rate": 0.000499785231048296,
"loss": 5.7785,
"step": 4310000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997847324112306,
"loss": 5.7871,
"step": 4315000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997842331962332,
"loss": 5.7785,
"step": 4320000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997837334033054,
"loss": 5.7766,
"step": 4325000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997832330324479,
"loss": 5.7901,
"step": 4330000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997827320836622,
"loss": 5.7853,
"step": 4335000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997822305569493,
"loss": 5.7874,
"step": 4340000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997817284523104,
"loss": 5.7914,
"step": 4345000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997812257697467,
"loss": 5.7902,
"step": 4350000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997807225092593,
"loss": 5.7816,
"step": 4355000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997802186708493,
"loss": 5.775,
"step": 4360000
},
{
"epoch": 0.04,
"learning_rate": 0.000499779714254518,
"loss": 5.7831,
"step": 4365000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997792092602666,
"loss": 5.7777,
"step": 4370000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997787036880961,
"loss": 5.779,
"step": 4375000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997781975380077,
"loss": 5.7796,
"step": 4380000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997776908100028,
"loss": 5.7781,
"step": 4385000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997771835040823,
"loss": 5.7771,
"step": 4390000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997766756202475,
"loss": 5.7827,
"step": 4395000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997761671584995,
"loss": 5.783,
"step": 4400000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997756581188395,
"loss": 5.7835,
"step": 4405000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997751485012688,
"loss": 5.7873,
"step": 4410000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997746383057885,
"loss": 5.7744,
"step": 4415000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997741275323997,
"loss": 5.7752,
"step": 4420000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997736161811037,
"loss": 5.7843,
"step": 4425000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997731042519015,
"loss": 5.7788,
"step": 4430000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997725917447945,
"loss": 5.7817,
"step": 4435000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997720786597838,
"loss": 5.775,
"step": 4440000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997715649968706,
"loss": 5.7807,
"step": 4445000
},
{
"epoch": 0.04,
"learning_rate": 0.000499771050756056,
"loss": 5.7893,
"step": 4450000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997705359373412,
"loss": 5.7742,
"step": 4455000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997700205407276,
"loss": 5.7769,
"step": 4460000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997695045662162,
"loss": 5.7797,
"step": 4465000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997689880138083,
"loss": 5.7801,
"step": 4470000
},
{
"epoch": 0.04,
"learning_rate": 0.000499768470883505,
"loss": 5.777,
"step": 4475000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997679531753075,
"loss": 5.7898,
"step": 4480000
},
{
"epoch": 0.04,
"learning_rate": 0.000499767434889217,
"loss": 5.7771,
"step": 4485000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997669160252348,
"loss": 5.7716,
"step": 4490000
},
{
"epoch": 0.04,
"learning_rate": 0.000499766396583362,
"loss": 5.7714,
"step": 4495000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997658765635998,
"loss": 5.7832,
"step": 4500000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997653559659495,
"loss": 5.7738,
"step": 4505000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997648347904122,
"loss": 5.7686,
"step": 4510000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997643130369891,
"loss": 5.7733,
"step": 4515000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997637907056816,
"loss": 5.7771,
"step": 4520000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997632677964906,
"loss": 5.7837,
"step": 4525000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997627443094175,
"loss": 5.7774,
"step": 4530000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997622202444636,
"loss": 5.7821,
"step": 4535000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997616956016298,
"loss": 5.761,
"step": 4540000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997611703809177,
"loss": 5.7795,
"step": 4545000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997606445823283,
"loss": 5.7794,
"step": 4550000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997601182058628,
"loss": 5.7741,
"step": 4555000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997595912515224,
"loss": 5.7792,
"step": 4560000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997590637193084,
"loss": 5.7692,
"step": 4565000
},
{
"epoch": 0.04,
"learning_rate": 0.000499758535609222,
"loss": 5.7858,
"step": 4570000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997580069212644,
"loss": 5.7781,
"step": 4575000
},
{
"epoch": 0.04,
"learning_rate": 0.000499757477655437,
"loss": 5.7855,
"step": 4580000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997569478117407,
"loss": 5.7668,
"step": 4585000
},
{
"epoch": 0.04,
"learning_rate": 0.000499756417390177,
"loss": 5.7681,
"step": 4590000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997558863907469,
"loss": 5.7772,
"step": 4595000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997553548134518,
"loss": 5.7633,
"step": 4600000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997548226582929,
"loss": 5.7828,
"step": 4605000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997542899252714,
"loss": 5.7726,
"step": 4610000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997537566143886,
"loss": 5.7748,
"step": 4615000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997532227256456,
"loss": 5.7676,
"step": 4620000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997526882590437,
"loss": 5.784,
"step": 4625000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997521532145841,
"loss": 5.7698,
"step": 4630000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997516175922682,
"loss": 5.7794,
"step": 4635000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997510813920972,
"loss": 5.7775,
"step": 4640000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997505446140721,
"loss": 5.7744,
"step": 4645000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997500072581943,
"loss": 5.7749,
"step": 4650000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997494693244652,
"loss": 5.7874,
"step": 4655000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997489308128857,
"loss": 5.7749,
"step": 4660000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997483917234574,
"loss": 5.776,
"step": 4665000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997478520561814,
"loss": 5.7645,
"step": 4670000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997473118110589,
"loss": 5.7727,
"step": 4675000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997467709880912,
"loss": 5.7834,
"step": 4680000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997462295872794,
"loss": 5.7788,
"step": 4685000
},
{
"epoch": 0.04,
"learning_rate": 0.000499745687608625,
"loss": 5.7777,
"step": 4690000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997451450521292,
"loss": 5.7823,
"step": 4695000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997446019177932,
"loss": 5.7738,
"step": 4700000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997440582056182,
"loss": 5.7861,
"step": 4705000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997435139156055,
"loss": 5.7768,
"step": 4710000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997429690477564,
"loss": 5.7802,
"step": 4715000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997424236020722,
"loss": 5.7791,
"step": 4720000
},
{
"epoch": 0.04,
"learning_rate": 0.000499741877578554,
"loss": 5.7771,
"step": 4725000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997413309772032,
"loss": 5.7744,
"step": 4730000
},
{
"epoch": 0.04,
"learning_rate": 0.000499740783798021,
"loss": 5.7682,
"step": 4735000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997402360410087,
"loss": 5.771,
"step": 4740000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997396877061676,
"loss": 5.7762,
"step": 4745000
},
{
"epoch": 0.04,
"learning_rate": 0.000499739138793499,
"loss": 5.7693,
"step": 4750000
},
{
"epoch": 0.04,
"learning_rate": 0.000499738589303004,
"loss": 5.7779,
"step": 4755000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997380392346839,
"loss": 5.7696,
"step": 4760000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997374885885402,
"loss": 5.7817,
"step": 4765000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997369373645738,
"loss": 5.775,
"step": 4770000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997363855627864,
"loss": 5.777,
"step": 4775000
},
{
"epoch": 0.04,
"learning_rate": 0.000499735833183179,
"loss": 5.7665,
"step": 4780000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997352802257529,
"loss": 5.7802,
"step": 4785000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997347266905095,
"loss": 5.7794,
"step": 4790000
},
{
"epoch": 0.04,
"learning_rate": 0.00049973417257745,
"loss": 5.7793,
"step": 4795000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997336178865756,
"loss": 5.7804,
"step": 4800000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997330626178878,
"loss": 5.7776,
"step": 4805000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997325067713877,
"loss": 5.7697,
"step": 4810000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997319503470765,
"loss": 5.7728,
"step": 4815000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997313933449559,
"loss": 5.7885,
"step": 4820000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997308357650267,
"loss": 5.7713,
"step": 4825000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997302776072905,
"loss": 5.792,
"step": 4830000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997297188717486,
"loss": 5.7741,
"step": 4835000
},
{
"epoch": 0.04,
"learning_rate": 0.000499729159558402,
"loss": 5.7726,
"step": 4840000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997285996672522,
"loss": 5.7735,
"step": 4845000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997280391983007,
"loss": 5.7674,
"step": 4850000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997274781515484,
"loss": 5.7837,
"step": 4855000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997269165269968,
"loss": 5.7831,
"step": 4860000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997263543246472,
"loss": 5.7744,
"step": 4865000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997257915445009,
"loss": 5.7695,
"step": 4870000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997252281865592,
"loss": 5.7722,
"step": 4875000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997246642508233,
"loss": 5.7746,
"step": 4880000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997240997372946,
"loss": 5.7707,
"step": 4885000
},
{
"epoch": 0.04,
"learning_rate": 0.0004997235346459744,
"loss": 5.789,
"step": 4890000
},
{
"epoch": 0.04,
"learning_rate": 0.000499722968976864,
"loss": 5.7864,
"step": 4895000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997224027299647,
"loss": 5.7804,
"step": 4900000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997218359052779,
"loss": 5.7705,
"step": 4905000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997212685028048,
"loss": 5.7645,
"step": 4910000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997207005225467,
"loss": 5.7834,
"step": 4915000
},
{
"epoch": 0.05,
"learning_rate": 0.000499720131964505,
"loss": 5.7885,
"step": 4920000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997195628286809,
"loss": 5.7779,
"step": 4925000
},
{
"epoch": 0.05,
"learning_rate": 0.000499718993115076,
"loss": 5.7758,
"step": 4930000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997184228236912,
"loss": 5.7711,
"step": 4935000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997178519545281,
"loss": 5.7807,
"step": 4940000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997172805075879,
"loss": 5.7835,
"step": 4945000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997167084828721,
"loss": 5.7817,
"step": 4950000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997161358803818,
"loss": 5.7753,
"step": 4955000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997155627001185,
"loss": 5.7697,
"step": 4960000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997149889420832,
"loss": 5.7705,
"step": 4965000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997144146062778,
"loss": 5.7725,
"step": 4970000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997138396927031,
"loss": 5.7804,
"step": 4975000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997132642013607,
"loss": 5.7711,
"step": 4980000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997126881322518,
"loss": 5.7751,
"step": 4985000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997121114853779,
"loss": 5.7851,
"step": 4990000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997115342607402,
"loss": 5.7743,
"step": 4995000
},
{
"epoch": 0.05,
"learning_rate": 0.00049971095645834,
"loss": 5.7762,
"step": 5000000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997103780781788,
"loss": 5.7669,
"step": 5005000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997097991202578,
"loss": 5.7694,
"step": 5010000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997092195845784,
"loss": 5.7716,
"step": 5015000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997086394711419,
"loss": 5.7884,
"step": 5020000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997080587799496,
"loss": 5.7788,
"step": 5025000
},
{
"epoch": 0.05,
"learning_rate": 0.000499707477511003,
"loss": 5.7753,
"step": 5030000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997068956643034,
"loss": 5.7691,
"step": 5035000
},
{
"epoch": 0.05,
"learning_rate": 0.000499706313239852,
"loss": 5.781,
"step": 5040000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997057302376502,
"loss": 5.7799,
"step": 5045000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997051466576995,
"loss": 5.7671,
"step": 5050000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997045625000012,
"loss": 5.7771,
"step": 5055000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997039777645565,
"loss": 5.7738,
"step": 5060000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997033924513669,
"loss": 5.7746,
"step": 5065000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997028065604337,
"loss": 5.7758,
"step": 5070000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997022200917582,
"loss": 5.7802,
"step": 5075000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997016330453418,
"loss": 5.7693,
"step": 5080000
},
{
"epoch": 0.05,
"learning_rate": 0.000499701045421186,
"loss": 5.7806,
"step": 5085000
},
{
"epoch": 0.05,
"learning_rate": 0.0004997004572192919,
"loss": 5.7844,
"step": 5090000
},
{
"epoch": 0.05,
"learning_rate": 0.000499699868439661,
"loss": 5.7856,
"step": 5095000
},
{
"epoch": 0.05,
"learning_rate": 0.0004996992790822949,
"loss": 5.7773,
"step": 5100000
},
{
"epoch": 0.05,
"learning_rate": 0.0004996986891471945,
"loss": 5.7725,
"step": 5105000
},
{
"epoch": 0.05,
"learning_rate": 0.0004996980986343614,
"loss": 5.779,
"step": 5110000
}
],
"logging_steps": 5000,
"max_steps": 326562159,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5000,
"total_flos": 8.278749111273246e+19,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}