{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998654648190501, "eval_steps": 929, "global_step": 1858, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005381407237992735, "grad_norm": 0.1708984375, "learning_rate": 2e-05, "loss": 1.7582, "step": 1 }, { "epoch": 0.0005381407237992735, "eval_loss": 1.7564055919647217, "eval_runtime": 1070.6897, "eval_samples_per_second": 4.751, "eval_steps_per_second": 2.376, "step": 1 }, { "epoch": 0.001076281447598547, "grad_norm": 0.1669921875, "learning_rate": 4e-05, "loss": 1.7331, "step": 2 }, { "epoch": 0.0016144221713978205, "grad_norm": 0.1669921875, "learning_rate": 6e-05, "loss": 1.7681, "step": 3 }, { "epoch": 0.002152562895197094, "grad_norm": 0.1650390625, "learning_rate": 8e-05, "loss": 1.8229, "step": 4 }, { "epoch": 0.0026907036189963676, "grad_norm": 0.173828125, "learning_rate": 0.0001, "loss": 1.7704, "step": 5 }, { "epoch": 0.003228844342795641, "grad_norm": 0.1689453125, "learning_rate": 0.00012, "loss": 1.7375, "step": 6 }, { "epoch": 0.0037669850665949145, "grad_norm": 0.1474609375, "learning_rate": 0.00014, "loss": 1.6453, "step": 7 }, { "epoch": 0.004305125790394188, "grad_norm": 0.1455078125, "learning_rate": 0.00016, "loss": 1.736, "step": 8 }, { "epoch": 0.004843266514193462, "grad_norm": 0.1474609375, "learning_rate": 0.00018, "loss": 1.7635, "step": 9 }, { "epoch": 0.005381407237992735, "grad_norm": 0.158203125, "learning_rate": 0.0002, "loss": 1.7327, "step": 10 }, { "epoch": 0.005919547961792009, "grad_norm": 0.1787109375, "learning_rate": 0.0001999999640698613, "loss": 1.8415, "step": 11 }, { "epoch": 0.006457688685591282, "grad_norm": 0.177734375, "learning_rate": 0.00019999985627947102, "loss": 1.6672, "step": 12 }, { "epoch": 0.006995829409390556, "grad_norm": 0.1650390625, "learning_rate": 0.0001999996766289066, "loss": 1.736, "step": 13 }, { "epoch": 0.007533970133189829, "grad_norm": 0.1552734375, "learning_rate": 0.00019999942511829714, "loss": 1.7363, "step": 14 }, { "epoch": 0.008072110856989103, "grad_norm": 0.1494140625, "learning_rate": 0.00019999910174782338, "loss": 1.727, "step": 15 }, { "epoch": 0.008610251580788377, "grad_norm": 0.1494140625, "learning_rate": 0.0001999987065177177, "loss": 1.6646, "step": 16 }, { "epoch": 0.00914839230458765, "grad_norm": 0.16015625, "learning_rate": 0.0001999982394282641, "loss": 1.7491, "step": 17 }, { "epoch": 0.009686533028386924, "grad_norm": 0.146484375, "learning_rate": 0.0001999977004797983, "loss": 1.5799, "step": 18 }, { "epoch": 0.010224673752186197, "grad_norm": 0.14453125, "learning_rate": 0.00019999708967270748, "loss": 1.6069, "step": 19 }, { "epoch": 0.01076281447598547, "grad_norm": 0.1640625, "learning_rate": 0.00019999640700743063, "loss": 1.7531, "step": 20 }, { "epoch": 0.011300955199784744, "grad_norm": 0.1484375, "learning_rate": 0.0001999956524844583, "loss": 1.7023, "step": 21 }, { "epoch": 0.011839095923584017, "grad_norm": 0.146484375, "learning_rate": 0.00019999482610433272, "loss": 1.6703, "step": 22 }, { "epoch": 0.01237723664738329, "grad_norm": 0.1513671875, "learning_rate": 0.0001999939278676477, "loss": 1.7103, "step": 23 }, { "epoch": 0.012915377371182564, "grad_norm": 0.158203125, "learning_rate": 0.00019999295777504872, "loss": 1.7811, "step": 24 }, { "epoch": 0.013453518094981838, "grad_norm": 0.1533203125, "learning_rate": 0.0001999919158272329, "loss": 1.7914, "step": 25 }, { "epoch": 0.013991658818781111, "grad_norm": 0.1513671875, "learning_rate": 0.00019999080202494896, "loss": 1.6868, "step": 26 }, { "epoch": 0.014529799542580385, "grad_norm": 0.15625, "learning_rate": 0.00019998961636899734, "loss": 1.7685, "step": 27 }, { "epoch": 0.015067940266379658, "grad_norm": 0.1572265625, "learning_rate": 0.00019998835886022998, "loss": 1.6023, "step": 28 }, { "epoch": 0.015606080990178932, "grad_norm": 0.1708984375, "learning_rate": 0.00019998702949955058, "loss": 1.752, "step": 29 }, { "epoch": 0.016144221713978207, "grad_norm": 0.1552734375, "learning_rate": 0.00019998562828791442, "loss": 1.6972, "step": 30 }, { "epoch": 0.01668236243777748, "grad_norm": 0.1640625, "learning_rate": 0.00019998415522632837, "loss": 1.6587, "step": 31 }, { "epoch": 0.017220503161576754, "grad_norm": 0.1572265625, "learning_rate": 0.00019998261031585105, "loss": 1.6595, "step": 32 }, { "epoch": 0.017758643885376027, "grad_norm": 0.1591796875, "learning_rate": 0.00019998099355759258, "loss": 1.646, "step": 33 }, { "epoch": 0.0182967846091753, "grad_norm": 0.1689453125, "learning_rate": 0.00019997930495271478, "loss": 1.669, "step": 34 }, { "epoch": 0.018834925332974574, "grad_norm": 0.1640625, "learning_rate": 0.0001999775445024311, "loss": 1.6462, "step": 35 }, { "epoch": 0.019373066056773847, "grad_norm": 0.1611328125, "learning_rate": 0.0001999757122080066, "loss": 1.6264, "step": 36 }, { "epoch": 0.01991120678057312, "grad_norm": 0.162109375, "learning_rate": 0.00019997380807075795, "loss": 1.6036, "step": 37 }, { "epoch": 0.020449347504372394, "grad_norm": 0.1689453125, "learning_rate": 0.00019997183209205348, "loss": 1.7258, "step": 38 }, { "epoch": 0.020987488228171668, "grad_norm": 0.1572265625, "learning_rate": 0.00019996978427331316, "loss": 1.6599, "step": 39 }, { "epoch": 0.02152562895197094, "grad_norm": 0.1650390625, "learning_rate": 0.00019996766461600853, "loss": 1.6436, "step": 40 }, { "epoch": 0.022063769675770215, "grad_norm": 0.17578125, "learning_rate": 0.00019996547312166278, "loss": 1.6144, "step": 41 }, { "epoch": 0.022601910399569488, "grad_norm": 0.16796875, "learning_rate": 0.00019996320979185074, "loss": 1.6133, "step": 42 }, { "epoch": 0.02314005112336876, "grad_norm": 0.173828125, "learning_rate": 0.00019996087462819884, "loss": 1.6613, "step": 43 }, { "epoch": 0.023678191847168035, "grad_norm": 0.1689453125, "learning_rate": 0.00019995846763238512, "loss": 1.5875, "step": 44 }, { "epoch": 0.02421633257096731, "grad_norm": 0.1669921875, "learning_rate": 0.0001999559888061393, "loss": 1.5179, "step": 45 }, { "epoch": 0.02475447329476658, "grad_norm": 0.1748046875, "learning_rate": 0.00019995343815124258, "loss": 1.7113, "step": 46 }, { "epoch": 0.025292614018565855, "grad_norm": 0.1748046875, "learning_rate": 0.00019995081566952797, "loss": 1.6607, "step": 47 }, { "epoch": 0.02583075474236513, "grad_norm": 0.1787109375, "learning_rate": 0.00019994812136287995, "loss": 1.7359, "step": 48 }, { "epoch": 0.026368895466164402, "grad_norm": 0.1826171875, "learning_rate": 0.0001999453552332346, "loss": 1.6086, "step": 49 }, { "epoch": 0.026907036189963675, "grad_norm": 0.1748046875, "learning_rate": 0.00019994251728257977, "loss": 1.5743, "step": 50 }, { "epoch": 0.02744517691376295, "grad_norm": 0.1806640625, "learning_rate": 0.00019993960751295476, "loss": 1.623, "step": 51 }, { "epoch": 0.027983317637562222, "grad_norm": 0.171875, "learning_rate": 0.00019993662592645057, "loss": 1.5712, "step": 52 }, { "epoch": 0.028521458361361496, "grad_norm": 0.1767578125, "learning_rate": 0.00019993357252520973, "loss": 1.5411, "step": 53 }, { "epoch": 0.02905959908516077, "grad_norm": 0.1845703125, "learning_rate": 0.00019993044731142647, "loss": 1.653, "step": 54 }, { "epoch": 0.029597739808960043, "grad_norm": 1.4296875, "learning_rate": 0.00019992725028734657, "loss": 2.8213, "step": 55 }, { "epoch": 0.030135880532759316, "grad_norm": 0.1884765625, "learning_rate": 0.00019992398145526736, "loss": 1.6002, "step": 56 }, { "epoch": 0.03067402125655859, "grad_norm": 0.1953125, "learning_rate": 0.0001999206408175379, "loss": 1.6772, "step": 57 }, { "epoch": 0.031212161980357863, "grad_norm": 0.197265625, "learning_rate": 0.00019991722837655877, "loss": 1.7308, "step": 58 }, { "epoch": 0.031750302704157136, "grad_norm": 0.1923828125, "learning_rate": 0.00019991374413478214, "loss": 1.6025, "step": 59 }, { "epoch": 0.03228844342795641, "grad_norm": 0.1796875, "learning_rate": 0.0001999101880947118, "loss": 1.5677, "step": 60 }, { "epoch": 0.03282658415175568, "grad_norm": 0.1884765625, "learning_rate": 0.00019990656025890315, "loss": 1.5899, "step": 61 }, { "epoch": 0.03336472487555496, "grad_norm": 0.1884765625, "learning_rate": 0.00019990286062996314, "loss": 1.5741, "step": 62 }, { "epoch": 0.03390286559935423, "grad_norm": 0.185546875, "learning_rate": 0.00019989908921055037, "loss": 1.5989, "step": 63 }, { "epoch": 0.03444100632315351, "grad_norm": 0.1845703125, "learning_rate": 0.00019989524600337493, "loss": 1.5642, "step": 64 }, { "epoch": 0.03497914704695278, "grad_norm": 0.1845703125, "learning_rate": 0.0001998913310111986, "loss": 1.6306, "step": 65 }, { "epoch": 0.035517287770752054, "grad_norm": 0.1787109375, "learning_rate": 0.0001998873442368347, "loss": 1.5547, "step": 66 }, { "epoch": 0.036055428494551324, "grad_norm": 0.201171875, "learning_rate": 0.00019988328568314816, "loss": 1.5534, "step": 67 }, { "epoch": 0.0365935692183506, "grad_norm": 0.1904296875, "learning_rate": 0.00019987915535305543, "loss": 1.5992, "step": 68 }, { "epoch": 0.03713170994214987, "grad_norm": 0.1806640625, "learning_rate": 0.00019987495324952457, "loss": 1.6437, "step": 69 }, { "epoch": 0.03766985066594915, "grad_norm": 0.1826171875, "learning_rate": 0.00019987067937557527, "loss": 1.5228, "step": 70 }, { "epoch": 0.03820799138974842, "grad_norm": 0.18359375, "learning_rate": 0.00019986633373427867, "loss": 1.5794, "step": 71 }, { "epoch": 0.038746132113547695, "grad_norm": 0.193359375, "learning_rate": 0.00019986191632875765, "loss": 1.6305, "step": 72 }, { "epoch": 0.039284272837346965, "grad_norm": 0.1904296875, "learning_rate": 0.00019985742716218657, "loss": 1.6157, "step": 73 }, { "epoch": 0.03982241356114624, "grad_norm": 0.1884765625, "learning_rate": 0.00019985286623779125, "loss": 1.5696, "step": 74 }, { "epoch": 0.04036055428494551, "grad_norm": 0.1806640625, "learning_rate": 0.00019984823355884926, "loss": 1.4752, "step": 75 }, { "epoch": 0.04089869500874479, "grad_norm": 0.1943359375, "learning_rate": 0.0001998435291286897, "loss": 1.5289, "step": 76 }, { "epoch": 0.04143683573254406, "grad_norm": 0.193359375, "learning_rate": 0.0001998387529506931, "loss": 1.6047, "step": 77 }, { "epoch": 0.041974976456343335, "grad_norm": 0.19140625, "learning_rate": 0.00019983390502829166, "loss": 1.5861, "step": 78 }, { "epoch": 0.042513117180142605, "grad_norm": 0.1845703125, "learning_rate": 0.00019982898536496913, "loss": 1.5065, "step": 79 }, { "epoch": 0.04305125790394188, "grad_norm": 0.1904296875, "learning_rate": 0.0001998239939642608, "loss": 1.5311, "step": 80 }, { "epoch": 0.04358939862774115, "grad_norm": 0.201171875, "learning_rate": 0.00019981893082975346, "loss": 1.5567, "step": 81 }, { "epoch": 0.04412753935154043, "grad_norm": 0.181640625, "learning_rate": 0.0001998137959650855, "loss": 1.5768, "step": 82 }, { "epoch": 0.0446656800753397, "grad_norm": 0.19921875, "learning_rate": 0.00019980858937394694, "loss": 1.6252, "step": 83 }, { "epoch": 0.045203820799138976, "grad_norm": 0.197265625, "learning_rate": 0.00019980331106007911, "loss": 1.5492, "step": 84 }, { "epoch": 0.045741961522938246, "grad_norm": 0.1875, "learning_rate": 0.00019979796102727513, "loss": 1.5478, "step": 85 }, { "epoch": 0.04628010224673752, "grad_norm": 0.1845703125, "learning_rate": 0.0001997925392793795, "loss": 1.5767, "step": 86 }, { "epoch": 0.04681824297053679, "grad_norm": 0.2060546875, "learning_rate": 0.00019978704582028828, "loss": 1.637, "step": 87 }, { "epoch": 0.04735638369433607, "grad_norm": 0.1962890625, "learning_rate": 0.00019978148065394915, "loss": 1.5993, "step": 88 }, { "epoch": 0.04789452441813534, "grad_norm": 0.185546875, "learning_rate": 0.00019977584378436122, "loss": 1.5114, "step": 89 }, { "epoch": 0.04843266514193462, "grad_norm": 0.1953125, "learning_rate": 0.00019977013521557514, "loss": 1.6407, "step": 90 }, { "epoch": 0.04897080586573389, "grad_norm": 0.197265625, "learning_rate": 0.00019976435495169313, "loss": 1.6321, "step": 91 }, { "epoch": 0.04950894658953316, "grad_norm": 0.19140625, "learning_rate": 0.00019975850299686892, "loss": 1.5429, "step": 92 }, { "epoch": 0.05004708731333243, "grad_norm": 0.1953125, "learning_rate": 0.0001997525793553077, "loss": 1.6344, "step": 93 }, { "epoch": 0.05058522803713171, "grad_norm": 0.1904296875, "learning_rate": 0.00019974658403126621, "loss": 1.5728, "step": 94 }, { "epoch": 0.05112336876093098, "grad_norm": 0.18359375, "learning_rate": 0.00019974051702905277, "loss": 1.4928, "step": 95 }, { "epoch": 0.05166150948473026, "grad_norm": 0.173828125, "learning_rate": 0.00019973437835302707, "loss": 1.5221, "step": 96 }, { "epoch": 0.05219965020852953, "grad_norm": 0.197265625, "learning_rate": 0.00019972816800760044, "loss": 1.5483, "step": 97 }, { "epoch": 0.052737790932328804, "grad_norm": 0.2158203125, "learning_rate": 0.00019972188599723563, "loss": 1.5854, "step": 98 }, { "epoch": 0.053275931656128074, "grad_norm": 0.2109375, "learning_rate": 0.0001997155323264469, "loss": 1.5553, "step": 99 }, { "epoch": 0.05381407237992735, "grad_norm": 0.208984375, "learning_rate": 0.00019970910699979998, "loss": 1.6431, "step": 100 }, { "epoch": 0.05435221310372663, "grad_norm": 0.193359375, "learning_rate": 0.00019970261002191222, "loss": 1.5758, "step": 101 }, { "epoch": 0.0548903538275259, "grad_norm": 0.1953125, "learning_rate": 0.00019969604139745227, "loss": 1.4189, "step": 102 }, { "epoch": 0.055428494551325175, "grad_norm": 0.19921875, "learning_rate": 0.00019968940113114045, "loss": 1.6308, "step": 103 }, { "epoch": 0.055966635275124445, "grad_norm": 0.2001953125, "learning_rate": 0.00019968268922774843, "loss": 1.6202, "step": 104 }, { "epoch": 0.05650477599892372, "grad_norm": 0.2001953125, "learning_rate": 0.00019967590569209938, "loss": 1.654, "step": 105 }, { "epoch": 0.05704291672272299, "grad_norm": 0.2001953125, "learning_rate": 0.000199669050529068, "loss": 1.5916, "step": 106 }, { "epoch": 0.05758105744652227, "grad_norm": 0.2158203125, "learning_rate": 0.00019966212374358043, "loss": 1.6968, "step": 107 }, { "epoch": 0.05811919817032154, "grad_norm": 0.1943359375, "learning_rate": 0.00019965512534061424, "loss": 1.5067, "step": 108 }, { "epoch": 0.058657338894120815, "grad_norm": 0.484375, "learning_rate": 0.00019964805532519852, "loss": 2.5387, "step": 109 }, { "epoch": 0.059195479617920085, "grad_norm": 0.1943359375, "learning_rate": 0.00019964091370241384, "loss": 1.5211, "step": 110 }, { "epoch": 0.05973362034171936, "grad_norm": 0.1923828125, "learning_rate": 0.00019963370047739216, "loss": 1.5901, "step": 111 }, { "epoch": 0.06027176106551863, "grad_norm": 0.306640625, "learning_rate": 0.00019962641565531692, "loss": 2.3356, "step": 112 }, { "epoch": 0.06080990178931791, "grad_norm": 0.203125, "learning_rate": 0.00019961905924142302, "loss": 1.6551, "step": 113 }, { "epoch": 0.06134804251311718, "grad_norm": 0.2109375, "learning_rate": 0.0001996116312409968, "loss": 1.6245, "step": 114 }, { "epoch": 0.061886183236916456, "grad_norm": 0.19921875, "learning_rate": 0.000199604131659376, "loss": 1.5338, "step": 115 }, { "epoch": 0.062424323960715726, "grad_norm": 0.2041015625, "learning_rate": 0.00019959656050194994, "loss": 1.5739, "step": 116 }, { "epoch": 0.062962464684515, "grad_norm": 0.2001953125, "learning_rate": 0.0001995889177741592, "loss": 1.6059, "step": 117 }, { "epoch": 0.06350060540831427, "grad_norm": 0.201171875, "learning_rate": 0.00019958120348149584, "loss": 1.6693, "step": 118 }, { "epoch": 0.06403874613211355, "grad_norm": 0.1982421875, "learning_rate": 0.00019957341762950344, "loss": 1.6165, "step": 119 }, { "epoch": 0.06457688685591283, "grad_norm": 0.41796875, "learning_rate": 0.00019956556022377692, "loss": 2.405, "step": 120 }, { "epoch": 0.06511502757971209, "grad_norm": 0.208984375, "learning_rate": 0.0001995576312699626, "loss": 1.4751, "step": 121 }, { "epoch": 0.06565316830351137, "grad_norm": 0.205078125, "learning_rate": 0.00019954963077375827, "loss": 1.6499, "step": 122 }, { "epoch": 0.06619130902731064, "grad_norm": 0.2099609375, "learning_rate": 0.00019954155874091312, "loss": 1.6151, "step": 123 }, { "epoch": 0.06672944975110992, "grad_norm": 0.216796875, "learning_rate": 0.00019953341517722772, "loss": 1.5165, "step": 124 }, { "epoch": 0.06726759047490918, "grad_norm": 0.2099609375, "learning_rate": 0.00019952520008855405, "loss": 1.5494, "step": 125 }, { "epoch": 0.06780573119870846, "grad_norm": 0.1982421875, "learning_rate": 0.00019951691348079554, "loss": 1.5743, "step": 126 }, { "epoch": 0.06834387192250774, "grad_norm": 0.1982421875, "learning_rate": 0.0001995085553599069, "loss": 1.5696, "step": 127 }, { "epoch": 0.06888201264630701, "grad_norm": 0.193359375, "learning_rate": 0.00019950012573189432, "loss": 1.5523, "step": 128 }, { "epoch": 0.06942015337010628, "grad_norm": 0.19921875, "learning_rate": 0.0001994916246028154, "loss": 1.5326, "step": 129 }, { "epoch": 0.06995829409390555, "grad_norm": 0.19921875, "learning_rate": 0.000199483051978779, "loss": 1.5639, "step": 130 }, { "epoch": 0.07049643481770483, "grad_norm": 0.205078125, "learning_rate": 0.00019947440786594552, "loss": 1.5575, "step": 131 }, { "epoch": 0.07103457554150411, "grad_norm": 0.205078125, "learning_rate": 0.00019946569227052656, "loss": 1.4759, "step": 132 }, { "epoch": 0.07157271626530337, "grad_norm": 0.2021484375, "learning_rate": 0.00019945690519878524, "loss": 1.6524, "step": 133 }, { "epoch": 0.07211085698910265, "grad_norm": 0.2041015625, "learning_rate": 0.00019944804665703592, "loss": 1.5728, "step": 134 }, { "epoch": 0.07264899771290192, "grad_norm": 0.2119140625, "learning_rate": 0.0001994391166516444, "loss": 1.6025, "step": 135 }, { "epoch": 0.0731871384367012, "grad_norm": 0.208984375, "learning_rate": 0.0001994301151890278, "loss": 1.624, "step": 136 }, { "epoch": 0.07372527916050046, "grad_norm": 0.201171875, "learning_rate": 0.0001994210422756546, "loss": 1.5558, "step": 137 }, { "epoch": 0.07426341988429974, "grad_norm": 0.20703125, "learning_rate": 0.00019941189791804464, "loss": 1.6189, "step": 138 }, { "epoch": 0.07480156060809902, "grad_norm": 0.1982421875, "learning_rate": 0.00019940268212276905, "loss": 1.6133, "step": 139 }, { "epoch": 0.0753397013318983, "grad_norm": 0.1962890625, "learning_rate": 0.00019939339489645033, "loss": 1.4786, "step": 140 }, { "epoch": 0.07587784205569756, "grad_norm": 0.201171875, "learning_rate": 0.0001993840362457623, "loss": 1.5586, "step": 141 }, { "epoch": 0.07641598277949684, "grad_norm": 0.203125, "learning_rate": 0.00019937460617743013, "loss": 1.5394, "step": 142 }, { "epoch": 0.07695412350329611, "grad_norm": 0.212890625, "learning_rate": 0.0001993651046982303, "loss": 1.5113, "step": 143 }, { "epoch": 0.07749226422709539, "grad_norm": 0.193359375, "learning_rate": 0.00019935553181499058, "loss": 1.5028, "step": 144 }, { "epoch": 0.07803040495089465, "grad_norm": 0.19921875, "learning_rate": 0.00019934588753459006, "loss": 1.6129, "step": 145 }, { "epoch": 0.07856854567469393, "grad_norm": 0.197265625, "learning_rate": 0.00019933617186395917, "loss": 1.5152, "step": 146 }, { "epoch": 0.0791066863984932, "grad_norm": 0.1982421875, "learning_rate": 0.0001993263848100796, "loss": 1.5805, "step": 147 }, { "epoch": 0.07964482712229248, "grad_norm": 0.2060546875, "learning_rate": 0.00019931652637998441, "loss": 1.5038, "step": 148 }, { "epoch": 0.08018296784609175, "grad_norm": 0.1982421875, "learning_rate": 0.00019930659658075784, "loss": 1.618, "step": 149 }, { "epoch": 0.08072110856989102, "grad_norm": 0.2041015625, "learning_rate": 0.00019929659541953546, "loss": 1.6652, "step": 150 }, { "epoch": 0.0812592492936903, "grad_norm": 0.2119140625, "learning_rate": 0.00019928652290350418, "loss": 1.6616, "step": 151 }, { "epoch": 0.08179739001748958, "grad_norm": 0.1943359375, "learning_rate": 0.00019927637903990208, "loss": 1.5274, "step": 152 }, { "epoch": 0.08233553074128885, "grad_norm": 0.2236328125, "learning_rate": 0.00019926616383601862, "loss": 1.5803, "step": 153 }, { "epoch": 0.08287367146508812, "grad_norm": 0.1953125, "learning_rate": 0.00019925587729919446, "loss": 1.5152, "step": 154 }, { "epoch": 0.0834118121888874, "grad_norm": 0.1884765625, "learning_rate": 0.0001992455194368215, "loss": 1.4949, "step": 155 }, { "epoch": 0.08394995291268667, "grad_norm": 0.197265625, "learning_rate": 0.00019923509025634298, "loss": 1.512, "step": 156 }, { "epoch": 0.08448809363648595, "grad_norm": 0.203125, "learning_rate": 0.00019922458976525333, "loss": 1.6326, "step": 157 }, { "epoch": 0.08502623436028521, "grad_norm": 0.2177734375, "learning_rate": 0.0001992140179710982, "loss": 1.5598, "step": 158 }, { "epoch": 0.08556437508408449, "grad_norm": 0.205078125, "learning_rate": 0.0001992033748814745, "loss": 1.5762, "step": 159 }, { "epoch": 0.08610251580788376, "grad_norm": 0.1923828125, "learning_rate": 0.00019919266050403046, "loss": 1.4855, "step": 160 }, { "epoch": 0.08664065653168304, "grad_norm": 0.1865234375, "learning_rate": 0.0001991818748464654, "loss": 1.3728, "step": 161 }, { "epoch": 0.0871787972554823, "grad_norm": 0.1923828125, "learning_rate": 0.00019917101791652993, "loss": 1.5738, "step": 162 }, { "epoch": 0.08771693797928158, "grad_norm": 0.1982421875, "learning_rate": 0.00019916008972202585, "loss": 1.5524, "step": 163 }, { "epoch": 0.08825507870308086, "grad_norm": 0.203125, "learning_rate": 0.0001991490902708063, "loss": 1.5035, "step": 164 }, { "epoch": 0.08879321942688014, "grad_norm": 0.19921875, "learning_rate": 0.00019913801957077536, "loss": 1.5146, "step": 165 }, { "epoch": 0.0893313601506794, "grad_norm": 0.203125, "learning_rate": 0.00019912687762988857, "loss": 1.4932, "step": 166 }, { "epoch": 0.08986950087447867, "grad_norm": 0.2138671875, "learning_rate": 0.00019911566445615251, "loss": 1.629, "step": 167 }, { "epoch": 0.09040764159827795, "grad_norm": 0.197265625, "learning_rate": 0.00019910438005762502, "loss": 1.5364, "step": 168 }, { "epoch": 0.09094578232207723, "grad_norm": 0.2099609375, "learning_rate": 0.0001990930244424151, "loss": 1.5145, "step": 169 }, { "epoch": 0.09148392304587649, "grad_norm": 0.2109375, "learning_rate": 0.00019908159761868293, "loss": 1.6213, "step": 170 }, { "epoch": 0.09202206376967577, "grad_norm": 0.2060546875, "learning_rate": 0.00019907009959463983, "loss": 1.5669, "step": 171 }, { "epoch": 0.09256020449347505, "grad_norm": 0.1953125, "learning_rate": 0.00019905853037854835, "loss": 1.5249, "step": 172 }, { "epoch": 0.09309834521727432, "grad_norm": 0.205078125, "learning_rate": 0.00019904688997872215, "loss": 1.5548, "step": 173 }, { "epoch": 0.09363648594107359, "grad_norm": 0.203125, "learning_rate": 0.00019903517840352602, "loss": 1.6024, "step": 174 }, { "epoch": 0.09417462666487286, "grad_norm": 0.2099609375, "learning_rate": 0.00019902339566137597, "loss": 1.462, "step": 175 }, { "epoch": 0.09471276738867214, "grad_norm": 0.212890625, "learning_rate": 0.00019901154176073913, "loss": 1.51, "step": 176 }, { "epoch": 0.09525090811247142, "grad_norm": 0.2138671875, "learning_rate": 0.00019899961671013367, "loss": 1.5489, "step": 177 }, { "epoch": 0.09578904883627068, "grad_norm": 0.2392578125, "learning_rate": 0.00019898762051812904, "loss": 1.6222, "step": 178 }, { "epoch": 0.09632718956006996, "grad_norm": 0.2265625, "learning_rate": 0.00019897555319334568, "loss": 1.5607, "step": 179 }, { "epoch": 0.09686533028386923, "grad_norm": 0.201171875, "learning_rate": 0.00019896341474445525, "loss": 1.5468, "step": 180 }, { "epoch": 0.09740347100766851, "grad_norm": 0.208984375, "learning_rate": 0.00019895120518018044, "loss": 1.4968, "step": 181 }, { "epoch": 0.09794161173146777, "grad_norm": 0.2177734375, "learning_rate": 0.00019893892450929509, "loss": 1.6015, "step": 182 }, { "epoch": 0.09847975245526705, "grad_norm": 0.197265625, "learning_rate": 0.00019892657274062415, "loss": 1.5322, "step": 183 }, { "epoch": 0.09901789317906633, "grad_norm": 0.2119140625, "learning_rate": 0.00019891414988304357, "loss": 1.5657, "step": 184 }, { "epoch": 0.0995560339028656, "grad_norm": 0.2001953125, "learning_rate": 0.00019890165594548052, "loss": 1.5313, "step": 185 }, { "epoch": 0.10009417462666487, "grad_norm": 0.3984375, "learning_rate": 0.0001988890909369131, "loss": 2.4587, "step": 186 }, { "epoch": 0.10063231535046414, "grad_norm": 0.2060546875, "learning_rate": 0.00019887645486637063, "loss": 1.4734, "step": 187 }, { "epoch": 0.10117045607426342, "grad_norm": 0.2060546875, "learning_rate": 0.0001988637477429334, "loss": 1.5846, "step": 188 }, { "epoch": 0.1017085967980627, "grad_norm": 0.287109375, "learning_rate": 0.00019885096957573278, "loss": 2.2911, "step": 189 }, { "epoch": 0.10224673752186196, "grad_norm": 0.2021484375, "learning_rate": 0.00019883812037395124, "loss": 1.5146, "step": 190 }, { "epoch": 0.10278487824566124, "grad_norm": 0.201171875, "learning_rate": 0.00019882520014682218, "loss": 1.6141, "step": 191 }, { "epoch": 0.10332301896946051, "grad_norm": 0.2080078125, "learning_rate": 0.00019881220890363016, "loss": 1.549, "step": 192 }, { "epoch": 0.10386115969325979, "grad_norm": 0.2119140625, "learning_rate": 0.00019879914665371068, "loss": 1.5209, "step": 193 }, { "epoch": 0.10439930041705905, "grad_norm": 0.2041015625, "learning_rate": 0.0001987860134064504, "loss": 1.5379, "step": 194 }, { "epoch": 0.10493744114085833, "grad_norm": 0.20703125, "learning_rate": 0.00019877280917128678, "loss": 1.5903, "step": 195 }, { "epoch": 0.10547558186465761, "grad_norm": 0.2080078125, "learning_rate": 0.00019875953395770852, "loss": 1.6408, "step": 196 }, { "epoch": 0.10601372258845689, "grad_norm": 0.20703125, "learning_rate": 0.00019874618777525517, "loss": 1.572, "step": 197 }, { "epoch": 0.10655186331225615, "grad_norm": 0.2109375, "learning_rate": 0.00019873277063351736, "loss": 1.5012, "step": 198 }, { "epoch": 0.10709000403605543, "grad_norm": 0.21875, "learning_rate": 0.0001987192825421367, "loss": 1.5195, "step": 199 }, { "epoch": 0.1076281447598547, "grad_norm": 0.20703125, "learning_rate": 0.00019870572351080574, "loss": 1.6245, "step": 200 }, { "epoch": 0.10816628548365398, "grad_norm": 0.2021484375, "learning_rate": 0.00019869209354926804, "loss": 1.5189, "step": 201 }, { "epoch": 0.10870442620745326, "grad_norm": 0.197265625, "learning_rate": 0.00019867839266731813, "loss": 1.5211, "step": 202 }, { "epoch": 0.10924256693125252, "grad_norm": 0.2177734375, "learning_rate": 0.00019866462087480153, "loss": 1.5874, "step": 203 }, { "epoch": 0.1097807076550518, "grad_norm": 0.20703125, "learning_rate": 0.00019865077818161464, "loss": 1.6052, "step": 204 }, { "epoch": 0.11031884837885107, "grad_norm": 0.201171875, "learning_rate": 0.0001986368645977049, "loss": 1.4936, "step": 205 }, { "epoch": 0.11085698910265035, "grad_norm": 0.212890625, "learning_rate": 0.00019862288013307061, "loss": 1.4787, "step": 206 }, { "epoch": 0.11139512982644961, "grad_norm": 0.201171875, "learning_rate": 0.00019860882479776112, "loss": 1.4796, "step": 207 }, { "epoch": 0.11193327055024889, "grad_norm": 0.2099609375, "learning_rate": 0.0001985946986018765, "loss": 1.5306, "step": 208 }, { "epoch": 0.11247141127404817, "grad_norm": 0.208984375, "learning_rate": 0.00019858050155556803, "loss": 1.665, "step": 209 }, { "epoch": 0.11300955199784744, "grad_norm": 0.2216796875, "learning_rate": 0.00019856623366903763, "loss": 1.5373, "step": 210 }, { "epoch": 0.1135476927216467, "grad_norm": 0.2109375, "learning_rate": 0.0001985518949525383, "loss": 1.5626, "step": 211 }, { "epoch": 0.11408583344544598, "grad_norm": 0.208984375, "learning_rate": 0.00019853748541637386, "loss": 1.4775, "step": 212 }, { "epoch": 0.11462397416924526, "grad_norm": 0.208984375, "learning_rate": 0.00019852300507089906, "loss": 1.6152, "step": 213 }, { "epoch": 0.11516211489304454, "grad_norm": 0.2060546875, "learning_rate": 0.0001985084539265195, "loss": 1.4363, "step": 214 }, { "epoch": 0.1157002556168438, "grad_norm": 0.205078125, "learning_rate": 0.00019849383199369166, "loss": 1.5324, "step": 215 }, { "epoch": 0.11623839634064308, "grad_norm": 0.2109375, "learning_rate": 0.00019847913928292297, "loss": 1.4841, "step": 216 }, { "epoch": 0.11677653706444235, "grad_norm": 0.2041015625, "learning_rate": 0.00019846437580477155, "loss": 1.6342, "step": 217 }, { "epoch": 0.11731467778824163, "grad_norm": 0.2119140625, "learning_rate": 0.00019844954156984654, "loss": 1.5868, "step": 218 }, { "epoch": 0.1178528185120409, "grad_norm": 0.19921875, "learning_rate": 0.00019843463658880786, "loss": 1.462, "step": 219 }, { "epoch": 0.11839095923584017, "grad_norm": 0.2099609375, "learning_rate": 0.00019841966087236624, "loss": 1.4839, "step": 220 }, { "epoch": 0.11892909995963945, "grad_norm": 0.2001953125, "learning_rate": 0.00019840461443128334, "loss": 1.5156, "step": 221 }, { "epoch": 0.11946724068343872, "grad_norm": 0.2099609375, "learning_rate": 0.0001983894972763715, "loss": 1.4733, "step": 222 }, { "epoch": 0.12000538140723799, "grad_norm": 0.2041015625, "learning_rate": 0.00019837430941849394, "loss": 1.5143, "step": 223 }, { "epoch": 0.12054352213103726, "grad_norm": 0.197265625, "learning_rate": 0.0001983590508685648, "loss": 1.5608, "step": 224 }, { "epoch": 0.12108166285483654, "grad_norm": 0.2041015625, "learning_rate": 0.0001983437216375488, "loss": 1.5341, "step": 225 }, { "epoch": 0.12161980357863582, "grad_norm": 0.2177734375, "learning_rate": 0.00019832832173646164, "loss": 1.6734, "step": 226 }, { "epoch": 0.12215794430243508, "grad_norm": 0.19921875, "learning_rate": 0.0001983128511763697, "loss": 1.4705, "step": 227 }, { "epoch": 0.12269608502623436, "grad_norm": 0.19140625, "learning_rate": 0.0001982973099683902, "loss": 1.4851, "step": 228 }, { "epoch": 0.12323422575003364, "grad_norm": 0.2001953125, "learning_rate": 0.00019828169812369104, "loss": 1.5959, "step": 229 }, { "epoch": 0.12377236647383291, "grad_norm": 0.1953125, "learning_rate": 0.00019826601565349096, "loss": 1.5285, "step": 230 }, { "epoch": 0.12431050719763218, "grad_norm": 0.201171875, "learning_rate": 0.00019825026256905948, "loss": 1.3816, "step": 231 }, { "epoch": 0.12484864792143145, "grad_norm": 0.2138671875, "learning_rate": 0.00019823443888171674, "loss": 1.514, "step": 232 }, { "epoch": 0.12538678864523073, "grad_norm": 0.212890625, "learning_rate": 0.0001982185446028337, "loss": 1.5329, "step": 233 }, { "epoch": 0.12592492936903, "grad_norm": 0.1904296875, "learning_rate": 0.00019820257974383204, "loss": 1.4556, "step": 234 }, { "epoch": 0.12646307009282928, "grad_norm": 0.19921875, "learning_rate": 0.00019818654431618416, "loss": 1.4932, "step": 235 }, { "epoch": 0.12700121081662855, "grad_norm": 0.1943359375, "learning_rate": 0.00019817043833141317, "loss": 1.538, "step": 236 }, { "epoch": 0.1275393515404278, "grad_norm": 0.208984375, "learning_rate": 0.00019815426180109285, "loss": 1.5676, "step": 237 }, { "epoch": 0.1280774922642271, "grad_norm": 0.1982421875, "learning_rate": 0.0001981380147368477, "loss": 1.5562, "step": 238 }, { "epoch": 0.12861563298802636, "grad_norm": 0.2041015625, "learning_rate": 0.00019812169715035293, "loss": 1.5464, "step": 239 }, { "epoch": 0.12915377371182565, "grad_norm": 0.21875, "learning_rate": 0.00019810530905333437, "loss": 1.6453, "step": 240 }, { "epoch": 0.12969191443562492, "grad_norm": 0.2109375, "learning_rate": 0.0001980888504575686, "loss": 1.6214, "step": 241 }, { "epoch": 0.13023005515942418, "grad_norm": 0.193359375, "learning_rate": 0.0001980723213748828, "loss": 1.4382, "step": 242 }, { "epoch": 0.13076819588322347, "grad_norm": 0.201171875, "learning_rate": 0.00019805572181715478, "loss": 1.553, "step": 243 }, { "epoch": 0.13130633660702273, "grad_norm": 0.2080078125, "learning_rate": 0.000198039051796313, "loss": 1.5791, "step": 244 }, { "epoch": 0.131844477330822, "grad_norm": 0.1923828125, "learning_rate": 0.00019802231132433668, "loss": 1.4855, "step": 245 }, { "epoch": 0.1323826180546213, "grad_norm": 0.2109375, "learning_rate": 0.00019800550041325555, "loss": 1.3994, "step": 246 }, { "epoch": 0.13292075877842055, "grad_norm": 0.2060546875, "learning_rate": 0.00019798861907514988, "loss": 1.4994, "step": 247 }, { "epoch": 0.13345889950221984, "grad_norm": 0.2060546875, "learning_rate": 0.00019797166732215076, "loss": 1.476, "step": 248 }, { "epoch": 0.1339970402260191, "grad_norm": 0.2275390625, "learning_rate": 0.00019795464516643972, "loss": 1.5839, "step": 249 }, { "epoch": 0.13453518094981837, "grad_norm": 0.203125, "learning_rate": 0.00019793755262024888, "loss": 1.577, "step": 250 }, { "epoch": 0.13507332167361766, "grad_norm": 0.2021484375, "learning_rate": 0.0001979203896958611, "loss": 1.4234, "step": 251 }, { "epoch": 0.13561146239741692, "grad_norm": 0.201171875, "learning_rate": 0.0001979031564056096, "loss": 1.4, "step": 252 }, { "epoch": 0.1361496031212162, "grad_norm": 0.2099609375, "learning_rate": 0.00019788585276187836, "loss": 1.5617, "step": 253 }, { "epoch": 0.13668774384501547, "grad_norm": 0.21484375, "learning_rate": 0.00019786847877710176, "loss": 1.5093, "step": 254 }, { "epoch": 0.13722588456881474, "grad_norm": 0.2060546875, "learning_rate": 0.0001978510344637648, "loss": 1.5284, "step": 255 }, { "epoch": 0.13776402529261403, "grad_norm": 0.20703125, "learning_rate": 0.00019783351983440305, "loss": 1.4392, "step": 256 }, { "epoch": 0.1383021660164133, "grad_norm": 0.2119140625, "learning_rate": 0.00019781593490160253, "loss": 1.5118, "step": 257 }, { "epoch": 0.13884030674021255, "grad_norm": 0.2099609375, "learning_rate": 0.00019779827967799984, "loss": 1.472, "step": 258 }, { "epoch": 0.13937844746401185, "grad_norm": 0.2119140625, "learning_rate": 0.0001977805541762821, "loss": 1.5746, "step": 259 }, { "epoch": 0.1399165881878111, "grad_norm": 0.2060546875, "learning_rate": 0.00019776275840918686, "loss": 1.4725, "step": 260 }, { "epoch": 0.1404547289116104, "grad_norm": 0.208984375, "learning_rate": 0.00019774489238950222, "loss": 1.4252, "step": 261 }, { "epoch": 0.14099286963540966, "grad_norm": 0.205078125, "learning_rate": 0.00019772695613006676, "loss": 1.5825, "step": 262 }, { "epoch": 0.14153101035920893, "grad_norm": 0.2177734375, "learning_rate": 0.00019770894964376954, "loss": 1.495, "step": 263 }, { "epoch": 0.14206915108300822, "grad_norm": 0.353515625, "learning_rate": 0.00019769087294355003, "loss": 2.323, "step": 264 }, { "epoch": 0.14260729180680748, "grad_norm": 0.2109375, "learning_rate": 0.00019767272604239824, "loss": 1.5132, "step": 265 }, { "epoch": 0.14314543253060674, "grad_norm": 0.2080078125, "learning_rate": 0.00019765450895335455, "loss": 1.5579, "step": 266 }, { "epoch": 0.14368357325440603, "grad_norm": 0.2021484375, "learning_rate": 0.00019763622168950982, "loss": 1.5783, "step": 267 }, { "epoch": 0.1442217139782053, "grad_norm": 0.2138671875, "learning_rate": 0.00019761786426400534, "loss": 1.4916, "step": 268 }, { "epoch": 0.1447598547020046, "grad_norm": 0.224609375, "learning_rate": 0.0001975994366900328, "loss": 1.3823, "step": 269 }, { "epoch": 0.14529799542580385, "grad_norm": 0.23046875, "learning_rate": 0.0001975809389808343, "loss": 1.5722, "step": 270 }, { "epoch": 0.1458361361496031, "grad_norm": 0.2001953125, "learning_rate": 0.00019756237114970237, "loss": 1.52, "step": 271 }, { "epoch": 0.1463742768734024, "grad_norm": 0.205078125, "learning_rate": 0.00019754373320997985, "loss": 1.4784, "step": 272 }, { "epoch": 0.14691241759720167, "grad_norm": 0.2158203125, "learning_rate": 0.00019752502517506008, "loss": 1.4993, "step": 273 }, { "epoch": 0.14745055832100093, "grad_norm": 0.2158203125, "learning_rate": 0.00019750624705838665, "loss": 1.4469, "step": 274 }, { "epoch": 0.14798869904480022, "grad_norm": 0.2080078125, "learning_rate": 0.00019748739887345362, "loss": 1.551, "step": 275 }, { "epoch": 0.14852683976859948, "grad_norm": 0.203125, "learning_rate": 0.0001974684806338053, "loss": 1.4534, "step": 276 }, { "epoch": 0.14906498049239877, "grad_norm": 0.22265625, "learning_rate": 0.00019744949235303642, "loss": 1.474, "step": 277 }, { "epoch": 0.14960312121619804, "grad_norm": 0.212890625, "learning_rate": 0.00019743043404479204, "loss": 1.4862, "step": 278 }, { "epoch": 0.1501412619399973, "grad_norm": 0.2099609375, "learning_rate": 0.00019741130572276743, "loss": 1.5401, "step": 279 }, { "epoch": 0.1506794026637966, "grad_norm": 0.2099609375, "learning_rate": 0.0001973921074007083, "loss": 1.4687, "step": 280 }, { "epoch": 0.15121754338759585, "grad_norm": 0.2080078125, "learning_rate": 0.00019737283909241065, "loss": 1.4511, "step": 281 }, { "epoch": 0.15175568411139512, "grad_norm": 0.19921875, "learning_rate": 0.00019735350081172067, "loss": 1.498, "step": 282 }, { "epoch": 0.1522938248351944, "grad_norm": 0.2021484375, "learning_rate": 0.00019733409257253497, "loss": 1.4395, "step": 283 }, { "epoch": 0.15283196555899367, "grad_norm": 0.2080078125, "learning_rate": 0.00019731461438880033, "loss": 1.6071, "step": 284 }, { "epoch": 0.15337010628279296, "grad_norm": 0.1953125, "learning_rate": 0.00019729506627451382, "loss": 1.4045, "step": 285 }, { "epoch": 0.15390824700659222, "grad_norm": 0.2080078125, "learning_rate": 0.00019727544824372278, "loss": 1.5087, "step": 286 }, { "epoch": 0.1544463877303915, "grad_norm": 0.197265625, "learning_rate": 0.00019725576031052477, "loss": 1.4775, "step": 287 }, { "epoch": 0.15498452845419078, "grad_norm": 0.2041015625, "learning_rate": 0.00019723600248906765, "loss": 1.4937, "step": 288 }, { "epoch": 0.15552266917799004, "grad_norm": 0.2119140625, "learning_rate": 0.00019721617479354935, "loss": 1.4997, "step": 289 }, { "epoch": 0.1560608099017893, "grad_norm": 0.208984375, "learning_rate": 0.00019719627723821818, "loss": 1.4807, "step": 290 }, { "epoch": 0.1565989506255886, "grad_norm": 0.2099609375, "learning_rate": 0.00019717630983737255, "loss": 1.5091, "step": 291 }, { "epoch": 0.15713709134938786, "grad_norm": 0.20703125, "learning_rate": 0.00019715627260536112, "loss": 1.4348, "step": 292 }, { "epoch": 0.15767523207318715, "grad_norm": 0.2158203125, "learning_rate": 0.00019713616555658265, "loss": 1.5814, "step": 293 }, { "epoch": 0.1582133727969864, "grad_norm": 0.193359375, "learning_rate": 0.00019711598870548617, "loss": 1.5146, "step": 294 }, { "epoch": 0.15875151352078568, "grad_norm": 0.21484375, "learning_rate": 0.00019709574206657078, "loss": 1.5344, "step": 295 }, { "epoch": 0.15928965424458497, "grad_norm": 0.20703125, "learning_rate": 0.0001970754256543858, "loss": 1.5548, "step": 296 }, { "epoch": 0.15982779496838423, "grad_norm": 0.203125, "learning_rate": 0.00019705503948353064, "loss": 1.4442, "step": 297 }, { "epoch": 0.1603659356921835, "grad_norm": 0.212890625, "learning_rate": 0.00019703458356865486, "loss": 1.4709, "step": 298 }, { "epoch": 0.16090407641598278, "grad_norm": 0.2119140625, "learning_rate": 0.00019701405792445814, "loss": 1.5029, "step": 299 }, { "epoch": 0.16144221713978205, "grad_norm": 0.203125, "learning_rate": 0.0001969934625656903, "loss": 1.5074, "step": 300 }, { "epoch": 0.16198035786358134, "grad_norm": 0.201171875, "learning_rate": 0.00019697279750715118, "loss": 1.4905, "step": 301 }, { "epoch": 0.1625184985873806, "grad_norm": 0.212890625, "learning_rate": 0.00019695206276369075, "loss": 1.5355, "step": 302 }, { "epoch": 0.16305663931117986, "grad_norm": 0.20703125, "learning_rate": 0.00019693125835020908, "loss": 1.4728, "step": 303 }, { "epoch": 0.16359478003497915, "grad_norm": 0.212890625, "learning_rate": 0.00019691038428165628, "loss": 1.4916, "step": 304 }, { "epoch": 0.16413292075877842, "grad_norm": 0.205078125, "learning_rate": 0.00019688944057303246, "loss": 1.419, "step": 305 }, { "epoch": 0.1646710614825777, "grad_norm": 0.2177734375, "learning_rate": 0.0001968684272393879, "loss": 1.4904, "step": 306 }, { "epoch": 0.16520920220637697, "grad_norm": 0.2119140625, "learning_rate": 0.00019684734429582278, "loss": 1.5573, "step": 307 }, { "epoch": 0.16574734293017623, "grad_norm": 0.203125, "learning_rate": 0.0001968261917574874, "loss": 1.3973, "step": 308 }, { "epoch": 0.16628548365397552, "grad_norm": 0.1953125, "learning_rate": 0.000196804969639582, "loss": 1.4926, "step": 309 }, { "epoch": 0.1668236243777748, "grad_norm": 0.2060546875, "learning_rate": 0.0001967836779573569, "loss": 1.5067, "step": 310 }, { "epoch": 0.16736176510157405, "grad_norm": 0.2197265625, "learning_rate": 0.0001967623167261123, "loss": 1.506, "step": 311 }, { "epoch": 0.16789990582537334, "grad_norm": 0.2109375, "learning_rate": 0.00019674088596119852, "loss": 1.3667, "step": 312 }, { "epoch": 0.1684380465491726, "grad_norm": 0.19921875, "learning_rate": 0.00019671938567801566, "loss": 1.4587, "step": 313 }, { "epoch": 0.1689761872729719, "grad_norm": 0.197265625, "learning_rate": 0.000196697815892014, "loss": 1.5018, "step": 314 }, { "epoch": 0.16951432799677116, "grad_norm": 0.212890625, "learning_rate": 0.0001966761766186936, "loss": 1.4218, "step": 315 }, { "epoch": 0.17005246872057042, "grad_norm": 0.20703125, "learning_rate": 0.0001966544678736044, "loss": 1.4775, "step": 316 }, { "epoch": 0.1705906094443697, "grad_norm": 0.2177734375, "learning_rate": 0.00019663268967234657, "loss": 1.5061, "step": 317 }, { "epoch": 0.17112875016816897, "grad_norm": 0.2236328125, "learning_rate": 0.0001966108420305698, "loss": 1.63, "step": 318 }, { "epoch": 0.17166689089196824, "grad_norm": 0.3828125, "learning_rate": 0.000196588924963974, "loss": 2.3272, "step": 319 }, { "epoch": 0.17220503161576753, "grad_norm": 0.19921875, "learning_rate": 0.00019656693848830873, "loss": 1.5386, "step": 320 }, { "epoch": 0.1727431723395668, "grad_norm": 0.2314453125, "learning_rate": 0.00019654488261937358, "loss": 1.5491, "step": 321 }, { "epoch": 0.17328131306336608, "grad_norm": 0.2109375, "learning_rate": 0.00019652275737301798, "loss": 1.4732, "step": 322 }, { "epoch": 0.17381945378716535, "grad_norm": 0.2109375, "learning_rate": 0.00019650056276514116, "loss": 1.5137, "step": 323 }, { "epoch": 0.1743575945109646, "grad_norm": 0.197265625, "learning_rate": 0.00019647829881169223, "loss": 1.4594, "step": 324 }, { "epoch": 0.1748957352347639, "grad_norm": 0.212890625, "learning_rate": 0.00019645596552867015, "loss": 1.419, "step": 325 }, { "epoch": 0.17543387595856316, "grad_norm": 0.2109375, "learning_rate": 0.00019643356293212364, "loss": 1.5204, "step": 326 }, { "epoch": 0.17597201668236243, "grad_norm": 0.2236328125, "learning_rate": 0.0001964110910381513, "loss": 1.5411, "step": 327 }, { "epoch": 0.17651015740616172, "grad_norm": 0.2099609375, "learning_rate": 0.0001963885498629015, "loss": 1.5041, "step": 328 }, { "epoch": 0.17704829812996098, "grad_norm": 0.208984375, "learning_rate": 0.00019636593942257236, "loss": 1.5413, "step": 329 }, { "epoch": 0.17758643885376027, "grad_norm": 0.2255859375, "learning_rate": 0.00019634325973341183, "loss": 1.4774, "step": 330 }, { "epoch": 0.17812457957755953, "grad_norm": 0.2138671875, "learning_rate": 0.0001963205108117176, "loss": 1.5432, "step": 331 }, { "epoch": 0.1786627203013588, "grad_norm": 0.19921875, "learning_rate": 0.00019629769267383708, "loss": 1.4809, "step": 332 }, { "epoch": 0.1792008610251581, "grad_norm": 0.197265625, "learning_rate": 0.0001962748053361675, "loss": 1.377, "step": 333 }, { "epoch": 0.17973900174895735, "grad_norm": 0.2001953125, "learning_rate": 0.0001962518488151557, "loss": 1.4411, "step": 334 }, { "epoch": 0.1802771424727566, "grad_norm": 0.2158203125, "learning_rate": 0.00019622882312729834, "loss": 1.5049, "step": 335 }, { "epoch": 0.1808152831965559, "grad_norm": 0.2080078125, "learning_rate": 0.00019620572828914177, "loss": 1.4284, "step": 336 }, { "epoch": 0.18135342392035517, "grad_norm": 0.2265625, "learning_rate": 0.00019618256431728194, "loss": 1.4757, "step": 337 }, { "epoch": 0.18189156464415446, "grad_norm": 0.2197265625, "learning_rate": 0.00019615933122836454, "loss": 1.527, "step": 338 }, { "epoch": 0.18242970536795372, "grad_norm": 0.2275390625, "learning_rate": 0.00019613602903908501, "loss": 1.5197, "step": 339 }, { "epoch": 0.18296784609175298, "grad_norm": 0.197265625, "learning_rate": 0.0001961126577661883, "loss": 1.3836, "step": 340 }, { "epoch": 0.18350598681555227, "grad_norm": 0.2265625, "learning_rate": 0.0001960892174264691, "loss": 1.4984, "step": 341 }, { "epoch": 0.18404412753935154, "grad_norm": 0.2216796875, "learning_rate": 0.00019606570803677172, "loss": 1.4793, "step": 342 }, { "epoch": 0.1845822682631508, "grad_norm": 0.2119140625, "learning_rate": 0.00019604212961399, "loss": 1.4315, "step": 343 }, { "epoch": 0.1851204089869501, "grad_norm": 0.2099609375, "learning_rate": 0.00019601848217506757, "loss": 1.4876, "step": 344 }, { "epoch": 0.18565854971074935, "grad_norm": 0.2216796875, "learning_rate": 0.00019599476573699747, "loss": 1.5419, "step": 345 }, { "epoch": 0.18619669043454865, "grad_norm": 0.2080078125, "learning_rate": 0.00019597098031682238, "loss": 1.5461, "step": 346 }, { "epoch": 0.1867348311583479, "grad_norm": 0.2109375, "learning_rate": 0.0001959471259316346, "loss": 1.5542, "step": 347 }, { "epoch": 0.18727297188214717, "grad_norm": 0.2060546875, "learning_rate": 0.00019592320259857602, "loss": 1.4667, "step": 348 }, { "epoch": 0.18781111260594646, "grad_norm": 0.2099609375, "learning_rate": 0.0001958992103348379, "loss": 1.4917, "step": 349 }, { "epoch": 0.18834925332974572, "grad_norm": 0.220703125, "learning_rate": 0.00019587514915766124, "loss": 1.5377, "step": 350 }, { "epoch": 0.18888739405354502, "grad_norm": 0.20703125, "learning_rate": 0.0001958510190843364, "loss": 1.4712, "step": 351 }, { "epoch": 0.18942553477734428, "grad_norm": 0.21484375, "learning_rate": 0.00019582682013220338, "loss": 1.4718, "step": 352 }, { "epoch": 0.18996367550114354, "grad_norm": 0.2109375, "learning_rate": 0.00019580255231865156, "loss": 1.4986, "step": 353 }, { "epoch": 0.19050181622494283, "grad_norm": 0.2099609375, "learning_rate": 0.00019577821566111987, "loss": 1.4649, "step": 354 }, { "epoch": 0.1910399569487421, "grad_norm": 0.2109375, "learning_rate": 0.0001957538101770967, "loss": 1.4784, "step": 355 }, { "epoch": 0.19157809767254136, "grad_norm": 0.1962890625, "learning_rate": 0.00019572933588411995, "loss": 1.3753, "step": 356 }, { "epoch": 0.19211623839634065, "grad_norm": 0.203125, "learning_rate": 0.00019570479279977683, "loss": 1.381, "step": 357 }, { "epoch": 0.1926543791201399, "grad_norm": 0.2041015625, "learning_rate": 0.00019568018094170414, "loss": 1.5059, "step": 358 }, { "epoch": 0.1931925198439392, "grad_norm": 0.357421875, "learning_rate": 0.000195655500327588, "loss": 2.3679, "step": 359 }, { "epoch": 0.19373066056773847, "grad_norm": 0.2197265625, "learning_rate": 0.00019563075097516394, "loss": 1.622, "step": 360 }, { "epoch": 0.19426880129153773, "grad_norm": 0.208984375, "learning_rate": 0.00019560593290221697, "loss": 1.4603, "step": 361 }, { "epoch": 0.19480694201533702, "grad_norm": 0.2099609375, "learning_rate": 0.00019558104612658138, "loss": 1.4245, "step": 362 }, { "epoch": 0.19534508273913628, "grad_norm": 0.2373046875, "learning_rate": 0.00019555609066614088, "loss": 1.4518, "step": 363 }, { "epoch": 0.19588322346293555, "grad_norm": 0.2099609375, "learning_rate": 0.00019553106653882857, "loss": 1.4979, "step": 364 }, { "epoch": 0.19642136418673484, "grad_norm": 0.2080078125, "learning_rate": 0.00019550597376262685, "loss": 1.5492, "step": 365 }, { "epoch": 0.1969595049105341, "grad_norm": 0.21484375, "learning_rate": 0.0001954808123555674, "loss": 1.5043, "step": 366 }, { "epoch": 0.1974976456343334, "grad_norm": 0.20703125, "learning_rate": 0.00019545558233573134, "loss": 1.4719, "step": 367 }, { "epoch": 0.19803578635813265, "grad_norm": 0.208984375, "learning_rate": 0.00019543028372124904, "loss": 1.4462, "step": 368 }, { "epoch": 0.19857392708193192, "grad_norm": 0.19921875, "learning_rate": 0.0001954049165303001, "loss": 1.4122, "step": 369 }, { "epoch": 0.1991120678057312, "grad_norm": 0.2109375, "learning_rate": 0.0001953794807811135, "loss": 1.4104, "step": 370 }, { "epoch": 0.19965020852953047, "grad_norm": 0.208984375, "learning_rate": 0.0001953539764919674, "loss": 1.5391, "step": 371 }, { "epoch": 0.20018834925332973, "grad_norm": 0.365234375, "learning_rate": 0.00019532840368118926, "loss": 2.2657, "step": 372 }, { "epoch": 0.20072648997712902, "grad_norm": 0.2099609375, "learning_rate": 0.00019530276236715582, "loss": 1.4712, "step": 373 }, { "epoch": 0.2012646307009283, "grad_norm": 0.2197265625, "learning_rate": 0.00019527705256829295, "loss": 1.5345, "step": 374 }, { "epoch": 0.20180277142472758, "grad_norm": 0.2158203125, "learning_rate": 0.0001952512743030758, "loss": 1.4518, "step": 375 }, { "epoch": 0.20234091214852684, "grad_norm": 0.2138671875, "learning_rate": 0.00019522542759002867, "loss": 1.5711, "step": 376 }, { "epoch": 0.2028790528723261, "grad_norm": 0.2197265625, "learning_rate": 0.00019519951244772514, "loss": 1.6261, "step": 377 }, { "epoch": 0.2034171935961254, "grad_norm": 0.2177734375, "learning_rate": 0.0001951735288947879, "loss": 1.4582, "step": 378 }, { "epoch": 0.20395533431992466, "grad_norm": 0.22265625, "learning_rate": 0.00019514747694988872, "loss": 1.4431, "step": 379 }, { "epoch": 0.20449347504372392, "grad_norm": 0.2138671875, "learning_rate": 0.0001951213566317487, "loss": 1.4038, "step": 380 }, { "epoch": 0.2050316157675232, "grad_norm": 0.2060546875, "learning_rate": 0.0001950951679591379, "loss": 1.4893, "step": 381 }, { "epoch": 0.20556975649132248, "grad_norm": 0.216796875, "learning_rate": 0.00019506891095087561, "loss": 1.4905, "step": 382 }, { "epoch": 0.20610789721512177, "grad_norm": 0.2158203125, "learning_rate": 0.00019504258562583017, "loss": 1.5254, "step": 383 }, { "epoch": 0.20664603793892103, "grad_norm": 0.22265625, "learning_rate": 0.00019501619200291908, "loss": 1.4897, "step": 384 }, { "epoch": 0.2071841786627203, "grad_norm": 0.2119140625, "learning_rate": 0.00019498973010110881, "loss": 1.5918, "step": 385 }, { "epoch": 0.20772231938651958, "grad_norm": 0.212890625, "learning_rate": 0.000194963199939415, "loss": 1.5164, "step": 386 }, { "epoch": 0.20826046011031885, "grad_norm": 0.2109375, "learning_rate": 0.00019493660153690226, "loss": 1.4676, "step": 387 }, { "epoch": 0.2087986008341181, "grad_norm": 0.2138671875, "learning_rate": 0.0001949099349126843, "loss": 1.4774, "step": 388 }, { "epoch": 0.2093367415579174, "grad_norm": 0.2119140625, "learning_rate": 0.0001948832000859238, "loss": 1.5192, "step": 389 }, { "epoch": 0.20987488228171666, "grad_norm": 0.212890625, "learning_rate": 0.00019485639707583253, "loss": 1.5615, "step": 390 }, { "epoch": 0.21041302300551595, "grad_norm": 0.208984375, "learning_rate": 0.00019482952590167118, "loss": 1.4687, "step": 391 }, { "epoch": 0.21095116372931522, "grad_norm": 0.216796875, "learning_rate": 0.00019480258658274946, "loss": 1.4751, "step": 392 }, { "epoch": 0.21148930445311448, "grad_norm": 0.2001953125, "learning_rate": 0.00019477557913842602, "loss": 1.4982, "step": 393 }, { "epoch": 0.21202744517691377, "grad_norm": 0.193359375, "learning_rate": 0.00019474850358810848, "loss": 1.3562, "step": 394 }, { "epoch": 0.21256558590071303, "grad_norm": 0.2119140625, "learning_rate": 0.00019472135995125345, "loss": 1.5323, "step": 395 }, { "epoch": 0.2131037266245123, "grad_norm": 0.20703125, "learning_rate": 0.00019469414824736637, "loss": 1.3719, "step": 396 }, { "epoch": 0.2136418673483116, "grad_norm": 0.21875, "learning_rate": 0.00019466686849600167, "loss": 1.5205, "step": 397 }, { "epoch": 0.21418000807211085, "grad_norm": 0.2080078125, "learning_rate": 0.00019463952071676268, "loss": 1.4204, "step": 398 }, { "epoch": 0.21471814879591014, "grad_norm": 0.2099609375, "learning_rate": 0.00019461210492930156, "loss": 1.406, "step": 399 }, { "epoch": 0.2152562895197094, "grad_norm": 0.2001953125, "learning_rate": 0.00019458462115331938, "loss": 1.4722, "step": 400 }, { "epoch": 0.21579443024350867, "grad_norm": 0.220703125, "learning_rate": 0.000194557069408566, "loss": 1.4759, "step": 401 }, { "epoch": 0.21633257096730796, "grad_norm": 0.2060546875, "learning_rate": 0.0001945294497148403, "loss": 1.4428, "step": 402 }, { "epoch": 0.21687071169110722, "grad_norm": 0.208984375, "learning_rate": 0.00019450176209198977, "loss": 1.5056, "step": 403 }, { "epoch": 0.2174088524149065, "grad_norm": 0.2060546875, "learning_rate": 0.00019447400655991085, "loss": 1.4489, "step": 404 }, { "epoch": 0.21794699313870577, "grad_norm": 0.20703125, "learning_rate": 0.00019444618313854876, "loss": 1.449, "step": 405 }, { "epoch": 0.21848513386250504, "grad_norm": 0.2138671875, "learning_rate": 0.00019441829184789743, "loss": 1.553, "step": 406 }, { "epoch": 0.21902327458630433, "grad_norm": 0.20703125, "learning_rate": 0.00019439033270799967, "loss": 1.4056, "step": 407 }, { "epoch": 0.2195614153101036, "grad_norm": 0.2080078125, "learning_rate": 0.00019436230573894697, "loss": 1.4119, "step": 408 }, { "epoch": 0.22009955603390285, "grad_norm": 0.2177734375, "learning_rate": 0.00019433421096087962, "loss": 1.5186, "step": 409 }, { "epoch": 0.22063769675770215, "grad_norm": 0.2158203125, "learning_rate": 0.00019430604839398656, "loss": 1.42, "step": 410 }, { "epoch": 0.2211758374815014, "grad_norm": 0.2060546875, "learning_rate": 0.0001942778180585055, "loss": 1.4613, "step": 411 }, { "epoch": 0.2217139782053007, "grad_norm": 0.2236328125, "learning_rate": 0.00019424951997472288, "loss": 1.4538, "step": 412 }, { "epoch": 0.22225211892909996, "grad_norm": 0.2138671875, "learning_rate": 0.00019422115416297374, "loss": 1.5958, "step": 413 }, { "epoch": 0.22279025965289923, "grad_norm": 0.2177734375, "learning_rate": 0.00019419272064364183, "loss": 1.5201, "step": 414 }, { "epoch": 0.22332840037669852, "grad_norm": 0.21484375, "learning_rate": 0.0001941642194371596, "loss": 1.4799, "step": 415 }, { "epoch": 0.22386654110049778, "grad_norm": 0.220703125, "learning_rate": 0.00019413565056400803, "loss": 1.4192, "step": 416 }, { "epoch": 0.22440468182429704, "grad_norm": 0.212890625, "learning_rate": 0.00019410701404471686, "loss": 1.4438, "step": 417 }, { "epoch": 0.22494282254809633, "grad_norm": 0.2041015625, "learning_rate": 0.0001940783098998643, "loss": 1.4646, "step": 418 }, { "epoch": 0.2254809632718956, "grad_norm": 0.2138671875, "learning_rate": 0.00019404953815007727, "loss": 1.5041, "step": 419 }, { "epoch": 0.2260191039956949, "grad_norm": 0.2099609375, "learning_rate": 0.0001940206988160312, "loss": 1.4406, "step": 420 }, { "epoch": 0.22655724471949415, "grad_norm": 0.228515625, "learning_rate": 0.00019399179191845022, "loss": 1.5186, "step": 421 }, { "epoch": 0.2270953854432934, "grad_norm": 0.2177734375, "learning_rate": 0.00019396281747810677, "loss": 1.4693, "step": 422 }, { "epoch": 0.2276335261670927, "grad_norm": 0.220703125, "learning_rate": 0.00019393377551582203, "loss": 1.4222, "step": 423 }, { "epoch": 0.22817166689089197, "grad_norm": 0.2080078125, "learning_rate": 0.0001939046660524656, "loss": 1.4757, "step": 424 }, { "epoch": 0.22870980761469123, "grad_norm": 0.208984375, "learning_rate": 0.00019387548910895568, "loss": 1.4877, "step": 425 }, { "epoch": 0.22924794833849052, "grad_norm": 0.212890625, "learning_rate": 0.00019384624470625882, "loss": 1.4985, "step": 426 }, { "epoch": 0.22978608906228978, "grad_norm": 0.2021484375, "learning_rate": 0.00019381693286539022, "loss": 1.4626, "step": 427 }, { "epoch": 0.23032422978608907, "grad_norm": 0.208984375, "learning_rate": 0.00019378755360741342, "loss": 1.4725, "step": 428 }, { "epoch": 0.23086237050988834, "grad_norm": 0.298828125, "learning_rate": 0.0001937581069534404, "loss": 2.2538, "step": 429 }, { "epoch": 0.2314005112336876, "grad_norm": 0.2197265625, "learning_rate": 0.00019372859292463162, "loss": 1.3707, "step": 430 }, { "epoch": 0.2319386519574869, "grad_norm": 0.2041015625, "learning_rate": 0.00019369901154219597, "loss": 1.4088, "step": 431 }, { "epoch": 0.23247679268128615, "grad_norm": 0.2099609375, "learning_rate": 0.00019366936282739068, "loss": 1.4649, "step": 432 }, { "epoch": 0.23301493340508542, "grad_norm": 0.2197265625, "learning_rate": 0.00019363964680152145, "loss": 1.4643, "step": 433 }, { "epoch": 0.2335530741288847, "grad_norm": 0.208984375, "learning_rate": 0.00019360986348594222, "loss": 1.381, "step": 434 }, { "epoch": 0.23409121485268397, "grad_norm": 0.21484375, "learning_rate": 0.00019358001290205543, "loss": 1.5055, "step": 435 }, { "epoch": 0.23462935557648326, "grad_norm": 0.205078125, "learning_rate": 0.0001935500950713118, "loss": 1.4859, "step": 436 }, { "epoch": 0.23516749630028252, "grad_norm": 0.2177734375, "learning_rate": 0.0001935201100152103, "loss": 1.3848, "step": 437 }, { "epoch": 0.2357056370240818, "grad_norm": 0.2236328125, "learning_rate": 0.00019349005775529828, "loss": 1.5259, "step": 438 }, { "epoch": 0.23624377774788108, "grad_norm": 0.2236328125, "learning_rate": 0.00019345993831317146, "loss": 1.5266, "step": 439 }, { "epoch": 0.23678191847168034, "grad_norm": 0.2099609375, "learning_rate": 0.0001934297517104737, "loss": 1.4933, "step": 440 }, { "epoch": 0.2373200591954796, "grad_norm": 0.2216796875, "learning_rate": 0.00019339949796889715, "loss": 1.5771, "step": 441 }, { "epoch": 0.2378581999192789, "grad_norm": 0.203125, "learning_rate": 0.00019336917711018227, "loss": 1.4351, "step": 442 }, { "epoch": 0.23839634064307816, "grad_norm": 0.216796875, "learning_rate": 0.00019333878915611772, "loss": 1.5557, "step": 443 }, { "epoch": 0.23893448136687745, "grad_norm": 0.2177734375, "learning_rate": 0.00019330833412854035, "loss": 1.4701, "step": 444 }, { "epoch": 0.2394726220906767, "grad_norm": 0.212890625, "learning_rate": 0.00019327781204933522, "loss": 1.5157, "step": 445 }, { "epoch": 0.24001076281447598, "grad_norm": 0.216796875, "learning_rate": 0.00019324722294043558, "loss": 1.433, "step": 446 }, { "epoch": 0.24054890353827527, "grad_norm": 0.2119140625, "learning_rate": 0.00019321656682382287, "loss": 1.5413, "step": 447 }, { "epoch": 0.24108704426207453, "grad_norm": 0.19921875, "learning_rate": 0.0001931858437215267, "loss": 1.3988, "step": 448 }, { "epoch": 0.24162518498587382, "grad_norm": 0.2109375, "learning_rate": 0.00019315505365562465, "loss": 1.4643, "step": 449 }, { "epoch": 0.24216332570967308, "grad_norm": 0.208984375, "learning_rate": 0.00019312419664824267, "loss": 1.4472, "step": 450 }, { "epoch": 0.24270146643347235, "grad_norm": 0.228515625, "learning_rate": 0.00019309327272155462, "loss": 1.4666, "step": 451 }, { "epoch": 0.24323960715727164, "grad_norm": 0.2138671875, "learning_rate": 0.00019306228189778254, "loss": 1.4414, "step": 452 }, { "epoch": 0.2437777478810709, "grad_norm": 0.21484375, "learning_rate": 0.00019303122419919654, "loss": 1.4495, "step": 453 }, { "epoch": 0.24431588860487016, "grad_norm": 0.212890625, "learning_rate": 0.00019300009964811477, "loss": 1.4361, "step": 454 }, { "epoch": 0.24485402932866945, "grad_norm": 0.22265625, "learning_rate": 0.00019296890826690338, "loss": 1.4363, "step": 455 }, { "epoch": 0.24539217005246872, "grad_norm": 0.216796875, "learning_rate": 0.00019293765007797662, "loss": 1.4436, "step": 456 }, { "epoch": 0.245930310776268, "grad_norm": 0.2236328125, "learning_rate": 0.00019290632510379668, "loss": 1.4459, "step": 457 }, { "epoch": 0.24646845150006727, "grad_norm": 0.201171875, "learning_rate": 0.00019287493336687378, "loss": 1.3458, "step": 458 }, { "epoch": 0.24700659222386653, "grad_norm": 0.2236328125, "learning_rate": 0.00019284347488976615, "loss": 1.4754, "step": 459 }, { "epoch": 0.24754473294766582, "grad_norm": 0.2177734375, "learning_rate": 0.0001928119496950799, "loss": 1.4253, "step": 460 }, { "epoch": 0.2480828736714651, "grad_norm": 0.2216796875, "learning_rate": 0.00019278035780546913, "loss": 1.4455, "step": 461 }, { "epoch": 0.24862101439526435, "grad_norm": 0.220703125, "learning_rate": 0.00019274869924363586, "loss": 1.4316, "step": 462 }, { "epoch": 0.24915915511906364, "grad_norm": 0.208984375, "learning_rate": 0.00019271697403233003, "loss": 1.4716, "step": 463 }, { "epoch": 0.2496972958428629, "grad_norm": 0.2099609375, "learning_rate": 0.00019268518219434943, "loss": 1.4704, "step": 464 }, { "epoch": 0.2502354365666622, "grad_norm": 0.212890625, "learning_rate": 0.00019265332375253982, "loss": 1.4668, "step": 465 }, { "epoch": 0.25077357729046146, "grad_norm": 0.2080078125, "learning_rate": 0.00019262139872979473, "loss": 1.3333, "step": 466 }, { "epoch": 0.2513117180142607, "grad_norm": 0.2138671875, "learning_rate": 0.00019258940714905553, "loss": 1.424, "step": 467 }, { "epoch": 0.25184985873806, "grad_norm": 0.208984375, "learning_rate": 0.00019255734903331155, "loss": 1.4917, "step": 468 }, { "epoch": 0.2523879994618593, "grad_norm": 0.21875, "learning_rate": 0.00019252522440559977, "loss": 1.478, "step": 469 }, { "epoch": 0.25292614018565857, "grad_norm": 0.208984375, "learning_rate": 0.00019249303328900506, "loss": 1.4658, "step": 470 }, { "epoch": 0.25346428090945783, "grad_norm": 0.21484375, "learning_rate": 0.00019246077570666005, "loss": 1.4492, "step": 471 }, { "epoch": 0.2540024216332571, "grad_norm": 0.2138671875, "learning_rate": 0.00019242845168174511, "loss": 1.3941, "step": 472 }, { "epoch": 0.25454056235705635, "grad_norm": 0.21484375, "learning_rate": 0.00019239606123748845, "loss": 1.4658, "step": 473 }, { "epoch": 0.2550787030808556, "grad_norm": 0.22265625, "learning_rate": 0.0001923636043971658, "loss": 1.4544, "step": 474 }, { "epoch": 0.25561684380465494, "grad_norm": 0.2255859375, "learning_rate": 0.00019233108118410082, "loss": 1.4761, "step": 475 }, { "epoch": 0.2561549845284542, "grad_norm": 0.212890625, "learning_rate": 0.00019229849162166477, "loss": 1.4471, "step": 476 }, { "epoch": 0.25669312525225346, "grad_norm": 0.212890625, "learning_rate": 0.0001922658357332766, "loss": 1.455, "step": 477 }, { "epoch": 0.2572312659760527, "grad_norm": 0.21484375, "learning_rate": 0.00019223311354240292, "loss": 1.4698, "step": 478 }, { "epoch": 0.257769406699852, "grad_norm": 0.2119140625, "learning_rate": 0.00019220032507255802, "loss": 1.3281, "step": 479 }, { "epoch": 0.2583075474236513, "grad_norm": 0.2216796875, "learning_rate": 0.00019216747034730373, "loss": 1.3735, "step": 480 }, { "epoch": 0.25884568814745057, "grad_norm": 0.2216796875, "learning_rate": 0.00019213454939024952, "loss": 1.5597, "step": 481 }, { "epoch": 0.25938382887124983, "grad_norm": 0.212890625, "learning_rate": 0.0001921015622250526, "loss": 1.4664, "step": 482 }, { "epoch": 0.2599219695950491, "grad_norm": 0.224609375, "learning_rate": 0.0001920685088754175, "loss": 1.5484, "step": 483 }, { "epoch": 0.26046011031884836, "grad_norm": 0.212890625, "learning_rate": 0.00019203538936509658, "loss": 1.3937, "step": 484 }, { "epoch": 0.2609982510426477, "grad_norm": 0.2109375, "learning_rate": 0.00019200220371788952, "loss": 1.3719, "step": 485 }, { "epoch": 0.26153639176644694, "grad_norm": 0.212890625, "learning_rate": 0.00019196895195764362, "loss": 1.5276, "step": 486 }, { "epoch": 0.2620745324902462, "grad_norm": 0.2099609375, "learning_rate": 0.00019193563410825374, "loss": 1.4742, "step": 487 }, { "epoch": 0.26261267321404547, "grad_norm": 0.2041015625, "learning_rate": 0.00019190225019366218, "loss": 1.4892, "step": 488 }, { "epoch": 0.26315081393784473, "grad_norm": 0.2314453125, "learning_rate": 0.00019186880023785863, "loss": 1.5407, "step": 489 }, { "epoch": 0.263688954661644, "grad_norm": 0.2099609375, "learning_rate": 0.00019183528426488042, "loss": 1.4692, "step": 490 }, { "epoch": 0.2642270953854433, "grad_norm": 0.2265625, "learning_rate": 0.00019180170229881217, "loss": 1.4877, "step": 491 }, { "epoch": 0.2647652361092426, "grad_norm": 0.21484375, "learning_rate": 0.00019176805436378595, "loss": 1.4592, "step": 492 }, { "epoch": 0.26530337683304184, "grad_norm": 0.228515625, "learning_rate": 0.00019173434048398133, "loss": 1.4898, "step": 493 }, { "epoch": 0.2658415175568411, "grad_norm": 0.2216796875, "learning_rate": 0.00019170056068362518, "loss": 1.5417, "step": 494 }, { "epoch": 0.26637965828064036, "grad_norm": 0.220703125, "learning_rate": 0.00019166671498699168, "loss": 1.467, "step": 495 }, { "epoch": 0.2669177990044397, "grad_norm": 0.2119140625, "learning_rate": 0.00019163280341840254, "loss": 1.4494, "step": 496 }, { "epoch": 0.26745593972823895, "grad_norm": 0.208984375, "learning_rate": 0.00019159882600222665, "loss": 1.3437, "step": 497 }, { "epoch": 0.2679940804520382, "grad_norm": 0.2236328125, "learning_rate": 0.00019156478276288028, "loss": 1.5355, "step": 498 }, { "epoch": 0.26853222117583747, "grad_norm": 0.224609375, "learning_rate": 0.00019153067372482702, "loss": 1.4241, "step": 499 }, { "epoch": 0.26907036189963673, "grad_norm": 0.21484375, "learning_rate": 0.00019149649891257774, "loss": 1.3884, "step": 500 }, { "epoch": 0.26960850262343605, "grad_norm": 0.23046875, "learning_rate": 0.00019146225835069047, "loss": 1.5033, "step": 501 }, { "epoch": 0.2701466433472353, "grad_norm": 0.2138671875, "learning_rate": 0.00019142795206377066, "loss": 1.376, "step": 502 }, { "epoch": 0.2706847840710346, "grad_norm": 0.212890625, "learning_rate": 0.00019139358007647084, "loss": 1.4836, "step": 503 }, { "epoch": 0.27122292479483384, "grad_norm": 0.2158203125, "learning_rate": 0.00019135914241349085, "loss": 1.525, "step": 504 }, { "epoch": 0.2717610655186331, "grad_norm": 0.220703125, "learning_rate": 0.00019132463909957772, "loss": 1.4399, "step": 505 }, { "epoch": 0.2722992062424324, "grad_norm": 0.20703125, "learning_rate": 0.00019129007015952556, "loss": 1.3456, "step": 506 }, { "epoch": 0.2728373469662317, "grad_norm": 0.2158203125, "learning_rate": 0.00019125543561817574, "loss": 1.5063, "step": 507 }, { "epoch": 0.27337548769003095, "grad_norm": 0.216796875, "learning_rate": 0.00019122073550041676, "loss": 1.479, "step": 508 }, { "epoch": 0.2739136284138302, "grad_norm": 0.2255859375, "learning_rate": 0.00019118596983118416, "loss": 1.4596, "step": 509 }, { "epoch": 0.2744517691376295, "grad_norm": 0.2236328125, "learning_rate": 0.00019115113863546067, "loss": 1.4647, "step": 510 }, { "epoch": 0.27498990986142874, "grad_norm": 0.2275390625, "learning_rate": 0.00019111624193827613, "loss": 1.4694, "step": 511 }, { "epoch": 0.27552805058522806, "grad_norm": 0.224609375, "learning_rate": 0.00019108127976470737, "loss": 1.5425, "step": 512 }, { "epoch": 0.2760661913090273, "grad_norm": 0.22265625, "learning_rate": 0.00019104625213987829, "loss": 1.4287, "step": 513 }, { "epoch": 0.2766043320328266, "grad_norm": 0.2275390625, "learning_rate": 0.00019101115908895982, "loss": 1.4607, "step": 514 }, { "epoch": 0.27714247275662585, "grad_norm": 0.232421875, "learning_rate": 0.00019097600063717004, "loss": 1.4483, "step": 515 }, { "epoch": 0.2776806134804251, "grad_norm": 0.2392578125, "learning_rate": 0.00019094077680977377, "loss": 1.4465, "step": 516 }, { "epoch": 0.2782187542042244, "grad_norm": 0.224609375, "learning_rate": 0.000190905487632083, "loss": 1.5425, "step": 517 }, { "epoch": 0.2787568949280237, "grad_norm": 0.205078125, "learning_rate": 0.00019087013312945667, "loss": 1.2989, "step": 518 }, { "epoch": 0.27929503565182295, "grad_norm": 0.224609375, "learning_rate": 0.00019083471332730057, "loss": 1.4652, "step": 519 }, { "epoch": 0.2798331763756222, "grad_norm": 0.21484375, "learning_rate": 0.0001907992282510675, "loss": 1.4795, "step": 520 }, { "epoch": 0.2803713170994215, "grad_norm": 0.2099609375, "learning_rate": 0.0001907636779262571, "loss": 1.5207, "step": 521 }, { "epoch": 0.2809094578232208, "grad_norm": 0.2177734375, "learning_rate": 0.000190728062378416, "loss": 1.5211, "step": 522 }, { "epoch": 0.28144759854702006, "grad_norm": 0.20703125, "learning_rate": 0.00019069238163313756, "loss": 1.449, "step": 523 }, { "epoch": 0.2819857392708193, "grad_norm": 0.2109375, "learning_rate": 0.0001906566357160621, "loss": 1.4805, "step": 524 }, { "epoch": 0.2825238799946186, "grad_norm": 0.20703125, "learning_rate": 0.0001906208246528767, "loss": 1.4109, "step": 525 }, { "epoch": 0.28306202071841785, "grad_norm": 0.228515625, "learning_rate": 0.00019058494846931537, "loss": 1.4737, "step": 526 }, { "epoch": 0.2836001614422171, "grad_norm": 0.21484375, "learning_rate": 0.00019054900719115873, "loss": 1.5104, "step": 527 }, { "epoch": 0.28413830216601643, "grad_norm": 0.2197265625, "learning_rate": 0.00019051300084423433, "loss": 1.3641, "step": 528 }, { "epoch": 0.2846764428898157, "grad_norm": 0.2314453125, "learning_rate": 0.00019047692945441645, "loss": 1.4848, "step": 529 }, { "epoch": 0.28521458361361496, "grad_norm": 0.220703125, "learning_rate": 0.0001904407930476261, "loss": 1.4888, "step": 530 }, { "epoch": 0.2857527243374142, "grad_norm": 0.216796875, "learning_rate": 0.00019040459164983094, "loss": 1.5015, "step": 531 }, { "epoch": 0.2862908650612135, "grad_norm": 0.2177734375, "learning_rate": 0.00019036832528704546, "loss": 1.5145, "step": 532 }, { "epoch": 0.2868290057850128, "grad_norm": 0.22265625, "learning_rate": 0.0001903319939853307, "loss": 1.4621, "step": 533 }, { "epoch": 0.28736714650881207, "grad_norm": 0.22265625, "learning_rate": 0.0001902955977707945, "loss": 1.4122, "step": 534 }, { "epoch": 0.28790528723261133, "grad_norm": 0.220703125, "learning_rate": 0.00019025913666959122, "loss": 1.4248, "step": 535 }, { "epoch": 0.2884434279564106, "grad_norm": 0.2099609375, "learning_rate": 0.000190222610707922, "loss": 1.4316, "step": 536 }, { "epoch": 0.28898156868020985, "grad_norm": 0.216796875, "learning_rate": 0.0001901860199120344, "loss": 1.5035, "step": 537 }, { "epoch": 0.2895197094040092, "grad_norm": 0.21484375, "learning_rate": 0.0001901493643082227, "loss": 1.4461, "step": 538 }, { "epoch": 0.29005785012780844, "grad_norm": 0.2099609375, "learning_rate": 0.00019011264392282775, "loss": 1.4424, "step": 539 }, { "epoch": 0.2905959908516077, "grad_norm": 0.220703125, "learning_rate": 0.0001900758587822369, "loss": 1.5117, "step": 540 }, { "epoch": 0.29113413157540696, "grad_norm": 0.212890625, "learning_rate": 0.00019003900891288405, "loss": 1.4687, "step": 541 }, { "epoch": 0.2916722722992062, "grad_norm": 0.2177734375, "learning_rate": 0.00019000209434124962, "loss": 1.4127, "step": 542 }, { "epoch": 0.2922104130230055, "grad_norm": 0.2255859375, "learning_rate": 0.0001899651150938605, "loss": 1.4584, "step": 543 }, { "epoch": 0.2927485537468048, "grad_norm": 0.2294921875, "learning_rate": 0.00018992807119729013, "loss": 1.4802, "step": 544 }, { "epoch": 0.29328669447060407, "grad_norm": 0.224609375, "learning_rate": 0.00018989096267815829, "loss": 1.4894, "step": 545 }, { "epoch": 0.29382483519440333, "grad_norm": 0.2109375, "learning_rate": 0.00018985378956313135, "loss": 1.3848, "step": 546 }, { "epoch": 0.2943629759182026, "grad_norm": 0.2265625, "learning_rate": 0.00018981655187892194, "loss": 1.4838, "step": 547 }, { "epoch": 0.29490111664200186, "grad_norm": 0.234375, "learning_rate": 0.0001897792496522892, "loss": 1.3684, "step": 548 }, { "epoch": 0.2954392573658012, "grad_norm": 0.224609375, "learning_rate": 0.00018974188291003862, "loss": 1.4801, "step": 549 }, { "epoch": 0.29597739808960044, "grad_norm": 0.2177734375, "learning_rate": 0.00018970445167902202, "loss": 1.4885, "step": 550 }, { "epoch": 0.2965155388133997, "grad_norm": 0.21875, "learning_rate": 0.00018966695598613757, "loss": 1.4182, "step": 551 }, { "epoch": 0.29705367953719897, "grad_norm": 0.21875, "learning_rate": 0.00018962939585832984, "loss": 1.5058, "step": 552 }, { "epoch": 0.29759182026099823, "grad_norm": 0.220703125, "learning_rate": 0.00018959177132258962, "loss": 1.4588, "step": 553 }, { "epoch": 0.29812996098479755, "grad_norm": 0.2158203125, "learning_rate": 0.00018955408240595395, "loss": 1.3189, "step": 554 }, { "epoch": 0.2986681017085968, "grad_norm": 0.228515625, "learning_rate": 0.00018951632913550626, "loss": 1.4263, "step": 555 }, { "epoch": 0.2992062424323961, "grad_norm": 0.228515625, "learning_rate": 0.00018947851153837612, "loss": 1.5, "step": 556 }, { "epoch": 0.29974438315619534, "grad_norm": 0.220703125, "learning_rate": 0.00018944062964173933, "loss": 1.5255, "step": 557 }, { "epoch": 0.3002825238799946, "grad_norm": 0.2216796875, "learning_rate": 0.00018940268347281803, "loss": 1.4152, "step": 558 }, { "epoch": 0.3008206646037939, "grad_norm": 0.21875, "learning_rate": 0.00018936467305888033, "loss": 1.3961, "step": 559 }, { "epoch": 0.3013588053275932, "grad_norm": 0.2373046875, "learning_rate": 0.00018932659842724067, "loss": 1.4421, "step": 560 }, { "epoch": 0.30189694605139245, "grad_norm": 0.216796875, "learning_rate": 0.00018928845960525958, "loss": 1.4262, "step": 561 }, { "epoch": 0.3024350867751917, "grad_norm": 0.2216796875, "learning_rate": 0.00018925025662034376, "loss": 1.3985, "step": 562 }, { "epoch": 0.30297322749899097, "grad_norm": 0.20703125, "learning_rate": 0.00018921198949994593, "loss": 1.4267, "step": 563 }, { "epoch": 0.30351136822279023, "grad_norm": 0.2158203125, "learning_rate": 0.00018917365827156492, "loss": 1.331, "step": 564 }, { "epoch": 0.30404950894658955, "grad_norm": 0.2236328125, "learning_rate": 0.00018913526296274573, "loss": 1.4568, "step": 565 }, { "epoch": 0.3045876496703888, "grad_norm": 0.22265625, "learning_rate": 0.0001890968036010793, "loss": 1.4106, "step": 566 }, { "epoch": 0.3051257903941881, "grad_norm": 0.220703125, "learning_rate": 0.00018905828021420268, "loss": 1.4243, "step": 567 }, { "epoch": 0.30566393111798734, "grad_norm": 0.2099609375, "learning_rate": 0.0001890196928297988, "loss": 1.3591, "step": 568 }, { "epoch": 0.3062020718417866, "grad_norm": 0.21484375, "learning_rate": 0.00018898104147559673, "loss": 1.4708, "step": 569 }, { "epoch": 0.3067402125655859, "grad_norm": 0.2197265625, "learning_rate": 0.0001889423261793714, "loss": 1.3857, "step": 570 }, { "epoch": 0.3072783532893852, "grad_norm": 0.2177734375, "learning_rate": 0.00018890354696894375, "loss": 1.5143, "step": 571 }, { "epoch": 0.30781649401318445, "grad_norm": 0.2216796875, "learning_rate": 0.0001888647038721806, "loss": 1.418, "step": 572 }, { "epoch": 0.3083546347369837, "grad_norm": 0.224609375, "learning_rate": 0.00018882579691699476, "loss": 1.415, "step": 573 }, { "epoch": 0.308892775460783, "grad_norm": 0.2138671875, "learning_rate": 0.00018878682613134484, "loss": 1.417, "step": 574 }, { "epoch": 0.3094309161845823, "grad_norm": 0.2216796875, "learning_rate": 0.00018874779154323538, "loss": 1.3709, "step": 575 }, { "epoch": 0.30996905690838156, "grad_norm": 0.240234375, "learning_rate": 0.00018870869318071668, "loss": 1.5415, "step": 576 }, { "epoch": 0.3105071976321808, "grad_norm": 0.2080078125, "learning_rate": 0.000188669531071885, "loss": 1.2727, "step": 577 }, { "epoch": 0.3110453383559801, "grad_norm": 0.23046875, "learning_rate": 0.0001886303052448823, "loss": 1.5166, "step": 578 }, { "epoch": 0.31158347907977935, "grad_norm": 0.22265625, "learning_rate": 0.00018859101572789636, "loss": 1.4518, "step": 579 }, { "epoch": 0.3121216198035786, "grad_norm": 0.2333984375, "learning_rate": 0.00018855166254916082, "loss": 1.4415, "step": 580 }, { "epoch": 0.31265976052737793, "grad_norm": 0.232421875, "learning_rate": 0.0001885122457369549, "loss": 1.3818, "step": 581 }, { "epoch": 0.3131979012511772, "grad_norm": 0.2119140625, "learning_rate": 0.00018847276531960364, "loss": 1.4667, "step": 582 }, { "epoch": 0.31373604197497645, "grad_norm": 0.2275390625, "learning_rate": 0.00018843322132547778, "loss": 1.4701, "step": 583 }, { "epoch": 0.3142741826987757, "grad_norm": 0.22265625, "learning_rate": 0.00018839361378299377, "loss": 1.4709, "step": 584 }, { "epoch": 0.314812323422575, "grad_norm": 0.212890625, "learning_rate": 0.0001883539427206137, "loss": 1.4306, "step": 585 }, { "epoch": 0.3153504641463743, "grad_norm": 0.2255859375, "learning_rate": 0.00018831420816684525, "loss": 1.2951, "step": 586 }, { "epoch": 0.31588860487017356, "grad_norm": 0.2216796875, "learning_rate": 0.00018827441015024186, "loss": 1.3972, "step": 587 }, { "epoch": 0.3164267455939728, "grad_norm": 0.2138671875, "learning_rate": 0.00018823454869940242, "loss": 1.3513, "step": 588 }, { "epoch": 0.3169648863177721, "grad_norm": 0.2109375, "learning_rate": 0.00018819462384297154, "loss": 1.2944, "step": 589 }, { "epoch": 0.31750302704157135, "grad_norm": 0.212890625, "learning_rate": 0.0001881546356096393, "loss": 1.3759, "step": 590 }, { "epoch": 0.31804116776537067, "grad_norm": 0.2236328125, "learning_rate": 0.00018811458402814138, "loss": 1.3959, "step": 591 }, { "epoch": 0.31857930848916993, "grad_norm": 0.2177734375, "learning_rate": 0.00018807446912725892, "loss": 1.3541, "step": 592 }, { "epoch": 0.3191174492129692, "grad_norm": 0.21484375, "learning_rate": 0.00018803429093581863, "loss": 1.4361, "step": 593 }, { "epoch": 0.31965558993676846, "grad_norm": 0.2275390625, "learning_rate": 0.00018799404948269266, "loss": 1.4325, "step": 594 }, { "epoch": 0.3201937306605677, "grad_norm": 0.2265625, "learning_rate": 0.00018795374479679862, "loss": 1.4656, "step": 595 }, { "epoch": 0.320731871384367, "grad_norm": 0.2275390625, "learning_rate": 0.00018791337690709958, "loss": 1.4351, "step": 596 }, { "epoch": 0.3212700121081663, "grad_norm": 0.2080078125, "learning_rate": 0.00018787294584260403, "loss": 1.3327, "step": 597 }, { "epoch": 0.32180815283196557, "grad_norm": 0.2216796875, "learning_rate": 0.0001878324516323658, "loss": 1.4963, "step": 598 }, { "epoch": 0.32234629355576483, "grad_norm": 0.21484375, "learning_rate": 0.00018779189430548422, "loss": 1.3057, "step": 599 }, { "epoch": 0.3228844342795641, "grad_norm": 0.234375, "learning_rate": 0.0001877512738911038, "loss": 1.5477, "step": 600 }, { "epoch": 0.32342257500336336, "grad_norm": 0.2197265625, "learning_rate": 0.00018771059041841455, "loss": 1.3597, "step": 601 }, { "epoch": 0.3239607157271627, "grad_norm": 0.2158203125, "learning_rate": 0.0001876698439166517, "loss": 1.5098, "step": 602 }, { "epoch": 0.32449885645096194, "grad_norm": 0.2236328125, "learning_rate": 0.00018762903441509583, "loss": 1.4475, "step": 603 }, { "epoch": 0.3250369971747612, "grad_norm": 0.2373046875, "learning_rate": 0.00018758816194307273, "loss": 1.4579, "step": 604 }, { "epoch": 0.32557513789856046, "grad_norm": 0.2197265625, "learning_rate": 0.00018754722652995347, "loss": 1.4571, "step": 605 }, { "epoch": 0.3261132786223597, "grad_norm": 0.22265625, "learning_rate": 0.0001875062282051544, "loss": 1.4888, "step": 606 }, { "epoch": 0.32665141934615904, "grad_norm": 0.2216796875, "learning_rate": 0.0001874651669981369, "loss": 1.4016, "step": 607 }, { "epoch": 0.3271895600699583, "grad_norm": 0.224609375, "learning_rate": 0.00018742404293840785, "loss": 1.4321, "step": 608 }, { "epoch": 0.32772770079375757, "grad_norm": 0.2353515625, "learning_rate": 0.000187382856055519, "loss": 1.5098, "step": 609 }, { "epoch": 0.32826584151755683, "grad_norm": 0.2109375, "learning_rate": 0.0001873416063790674, "loss": 1.3488, "step": 610 }, { "epoch": 0.3288039822413561, "grad_norm": 0.2236328125, "learning_rate": 0.00018730029393869513, "loss": 1.4631, "step": 611 }, { "epoch": 0.3293421229651554, "grad_norm": 0.2197265625, "learning_rate": 0.00018725891876408945, "loss": 1.5034, "step": 612 }, { "epoch": 0.3298802636889547, "grad_norm": 0.2236328125, "learning_rate": 0.0001872174808849827, "loss": 1.4788, "step": 613 }, { "epoch": 0.33041840441275394, "grad_norm": 0.2177734375, "learning_rate": 0.00018717598033115226, "loss": 1.3157, "step": 614 }, { "epoch": 0.3309565451365532, "grad_norm": 0.2255859375, "learning_rate": 0.00018713441713242047, "loss": 1.5024, "step": 615 }, { "epoch": 0.33149468586035247, "grad_norm": 0.22265625, "learning_rate": 0.00018709279131865487, "loss": 1.359, "step": 616 }, { "epoch": 0.33203282658415173, "grad_norm": 0.2197265625, "learning_rate": 0.00018705110291976777, "loss": 1.4067, "step": 617 }, { "epoch": 0.33257096730795105, "grad_norm": 0.2177734375, "learning_rate": 0.00018700935196571666, "loss": 1.4675, "step": 618 }, { "epoch": 0.3331091080317503, "grad_norm": 0.2314453125, "learning_rate": 0.00018696753848650386, "loss": 1.464, "step": 619 }, { "epoch": 0.3336472487555496, "grad_norm": 0.2333984375, "learning_rate": 0.00018692566251217663, "loss": 1.545, "step": 620 }, { "epoch": 0.33418538947934884, "grad_norm": 0.22265625, "learning_rate": 0.00018688372407282716, "loss": 1.4438, "step": 621 }, { "epoch": 0.3347235302031481, "grad_norm": 0.2158203125, "learning_rate": 0.0001868417231985926, "loss": 1.3707, "step": 622 }, { "epoch": 0.3352616709269474, "grad_norm": 0.2099609375, "learning_rate": 0.00018679965991965484, "loss": 1.3325, "step": 623 }, { "epoch": 0.3357998116507467, "grad_norm": 0.234375, "learning_rate": 0.0001867575342662406, "loss": 1.4575, "step": 624 }, { "epoch": 0.33633795237454595, "grad_norm": 0.2158203125, "learning_rate": 0.0001867153462686216, "loss": 1.4012, "step": 625 }, { "epoch": 0.3368760930983452, "grad_norm": 0.21484375, "learning_rate": 0.00018667309595711426, "loss": 1.4178, "step": 626 }, { "epoch": 0.33741423382214447, "grad_norm": 0.220703125, "learning_rate": 0.00018663078336207968, "loss": 1.3928, "step": 627 }, { "epoch": 0.3379523745459438, "grad_norm": 0.31640625, "learning_rate": 0.00018658840851392387, "loss": 2.3036, "step": 628 }, { "epoch": 0.33849051526974305, "grad_norm": 0.240234375, "learning_rate": 0.00018654597144309752, "loss": 1.4223, "step": 629 }, { "epoch": 0.3390286559935423, "grad_norm": 0.216796875, "learning_rate": 0.00018650347218009597, "loss": 1.4101, "step": 630 }, { "epoch": 0.3395667967173416, "grad_norm": 0.2177734375, "learning_rate": 0.00018646091075545936, "loss": 1.3925, "step": 631 }, { "epoch": 0.34010493744114084, "grad_norm": 0.2236328125, "learning_rate": 0.00018641828719977244, "loss": 1.5148, "step": 632 }, { "epoch": 0.3406430781649401, "grad_norm": 0.224609375, "learning_rate": 0.0001863756015436646, "loss": 1.4261, "step": 633 }, { "epoch": 0.3411812188887394, "grad_norm": 0.228515625, "learning_rate": 0.00018633285381780987, "loss": 1.433, "step": 634 }, { "epoch": 0.3417193596125387, "grad_norm": 0.2236328125, "learning_rate": 0.0001862900440529269, "loss": 1.3957, "step": 635 }, { "epoch": 0.34225750033633795, "grad_norm": 0.380859375, "learning_rate": 0.00018624717227977888, "loss": 2.1437, "step": 636 }, { "epoch": 0.3427956410601372, "grad_norm": 0.2158203125, "learning_rate": 0.0001862042385291736, "loss": 1.367, "step": 637 }, { "epoch": 0.3433337817839365, "grad_norm": 0.2177734375, "learning_rate": 0.00018616124283196342, "loss": 1.3987, "step": 638 }, { "epoch": 0.3438719225077358, "grad_norm": 0.2392578125, "learning_rate": 0.00018611818521904508, "loss": 1.4498, "step": 639 }, { "epoch": 0.34441006323153506, "grad_norm": 0.22265625, "learning_rate": 0.00018607506572135995, "loss": 1.5276, "step": 640 }, { "epoch": 0.3449482039553343, "grad_norm": 0.216796875, "learning_rate": 0.00018603188436989383, "loss": 1.3298, "step": 641 }, { "epoch": 0.3454863446791336, "grad_norm": 0.2275390625, "learning_rate": 0.00018598864119567693, "loss": 1.3334, "step": 642 }, { "epoch": 0.34602448540293285, "grad_norm": 0.2314453125, "learning_rate": 0.00018594533622978395, "loss": 1.4365, "step": 643 }, { "epoch": 0.34656262612673217, "grad_norm": 0.2333984375, "learning_rate": 0.0001859019695033339, "loss": 1.4079, "step": 644 }, { "epoch": 0.34710076685053143, "grad_norm": 0.21875, "learning_rate": 0.00018585854104749027, "loss": 1.4521, "step": 645 }, { "epoch": 0.3476389075743307, "grad_norm": 0.234375, "learning_rate": 0.0001858150508934609, "loss": 1.4696, "step": 646 }, { "epoch": 0.34817704829812995, "grad_norm": 0.2099609375, "learning_rate": 0.00018577149907249787, "loss": 1.3575, "step": 647 }, { "epoch": 0.3487151890219292, "grad_norm": 0.2099609375, "learning_rate": 0.00018572788561589768, "loss": 1.2868, "step": 648 }, { "epoch": 0.34925332974572854, "grad_norm": 0.216796875, "learning_rate": 0.00018568421055500104, "loss": 1.3483, "step": 649 }, { "epoch": 0.3497914704695278, "grad_norm": 0.22265625, "learning_rate": 0.00018564047392119302, "loss": 1.4523, "step": 650 }, { "epoch": 0.35032961119332706, "grad_norm": 0.224609375, "learning_rate": 0.0001855966757459029, "loss": 1.3448, "step": 651 }, { "epoch": 0.3508677519171263, "grad_norm": 0.2177734375, "learning_rate": 0.00018555281606060412, "loss": 1.4428, "step": 652 }, { "epoch": 0.3514058926409256, "grad_norm": 0.220703125, "learning_rate": 0.00018550889489681437, "loss": 1.3247, "step": 653 }, { "epoch": 0.35194403336472485, "grad_norm": 0.2275390625, "learning_rate": 0.00018546491228609553, "loss": 1.4187, "step": 654 }, { "epoch": 0.35248217408852417, "grad_norm": 0.2275390625, "learning_rate": 0.00018542086826005364, "loss": 1.4342, "step": 655 }, { "epoch": 0.35302031481232343, "grad_norm": 0.2197265625, "learning_rate": 0.00018537676285033887, "loss": 1.2653, "step": 656 }, { "epoch": 0.3535584555361227, "grad_norm": 0.2255859375, "learning_rate": 0.00018533259608864545, "loss": 1.439, "step": 657 }, { "epoch": 0.35409659625992196, "grad_norm": 0.2265625, "learning_rate": 0.0001852883680067118, "loss": 1.4782, "step": 658 }, { "epoch": 0.3546347369837212, "grad_norm": 0.22265625, "learning_rate": 0.00018524407863632026, "loss": 1.4856, "step": 659 }, { "epoch": 0.35517287770752054, "grad_norm": 0.2138671875, "learning_rate": 0.00018519972800929736, "loss": 1.3895, "step": 660 }, { "epoch": 0.3557110184313198, "grad_norm": 0.2216796875, "learning_rate": 0.00018515531615751358, "loss": 1.4766, "step": 661 }, { "epoch": 0.35624915915511907, "grad_norm": 0.2138671875, "learning_rate": 0.00018511084311288335, "loss": 1.3859, "step": 662 }, { "epoch": 0.35678729987891833, "grad_norm": 0.212890625, "learning_rate": 0.00018506630890736516, "loss": 1.3162, "step": 663 }, { "epoch": 0.3573254406027176, "grad_norm": 0.2255859375, "learning_rate": 0.00018502171357296144, "loss": 1.4255, "step": 664 }, { "epoch": 0.3578635813265169, "grad_norm": 0.21875, "learning_rate": 0.00018497705714171845, "loss": 1.417, "step": 665 }, { "epoch": 0.3584017220503162, "grad_norm": 0.21875, "learning_rate": 0.00018493233964572646, "loss": 1.4231, "step": 666 }, { "epoch": 0.35893986277411544, "grad_norm": 0.228515625, "learning_rate": 0.00018488756111711963, "loss": 1.4204, "step": 667 }, { "epoch": 0.3594780034979147, "grad_norm": 0.220703125, "learning_rate": 0.00018484272158807586, "loss": 1.4191, "step": 668 }, { "epoch": 0.36001614422171396, "grad_norm": 0.228515625, "learning_rate": 0.00018479782109081703, "loss": 1.473, "step": 669 }, { "epoch": 0.3605542849455132, "grad_norm": 0.2197265625, "learning_rate": 0.00018475285965760868, "loss": 1.5355, "step": 670 }, { "epoch": 0.36109242566931254, "grad_norm": 0.232421875, "learning_rate": 0.00018470783732076033, "loss": 1.4846, "step": 671 }, { "epoch": 0.3616305663931118, "grad_norm": 0.2265625, "learning_rate": 0.00018466275411262504, "loss": 1.4308, "step": 672 }, { "epoch": 0.36216870711691107, "grad_norm": 0.224609375, "learning_rate": 0.0001846176100655998, "loss": 1.4464, "step": 673 }, { "epoch": 0.36270684784071033, "grad_norm": 0.2314453125, "learning_rate": 0.00018457240521212525, "loss": 1.4532, "step": 674 }, { "epoch": 0.3632449885645096, "grad_norm": 0.228515625, "learning_rate": 0.0001845271395846857, "loss": 1.4433, "step": 675 }, { "epoch": 0.3637831292883089, "grad_norm": 0.228515625, "learning_rate": 0.00018448181321580916, "loss": 1.5254, "step": 676 }, { "epoch": 0.3643212700121082, "grad_norm": 0.25, "learning_rate": 0.00018443642613806728, "loss": 1.4722, "step": 677 }, { "epoch": 0.36485941073590744, "grad_norm": 0.2236328125, "learning_rate": 0.00018439097838407532, "loss": 1.4615, "step": 678 }, { "epoch": 0.3653975514597067, "grad_norm": 0.21875, "learning_rate": 0.0001843454699864922, "loss": 1.3927, "step": 679 }, { "epoch": 0.36593569218350597, "grad_norm": 0.2109375, "learning_rate": 0.00018429990097802037, "loss": 1.3899, "step": 680 }, { "epoch": 0.3664738329073053, "grad_norm": 0.2216796875, "learning_rate": 0.00018425427139140586, "loss": 1.3358, "step": 681 }, { "epoch": 0.36701197363110455, "grad_norm": 0.23046875, "learning_rate": 0.00018420858125943817, "loss": 1.3763, "step": 682 }, { "epoch": 0.3675501143549038, "grad_norm": 0.232421875, "learning_rate": 0.00018416283061495039, "loss": 1.5283, "step": 683 }, { "epoch": 0.3680882550787031, "grad_norm": 0.2353515625, "learning_rate": 0.00018411701949081906, "loss": 1.4097, "step": 684 }, { "epoch": 0.36862639580250234, "grad_norm": 0.212890625, "learning_rate": 0.00018407114791996415, "loss": 1.3735, "step": 685 }, { "epoch": 0.3691645365263016, "grad_norm": 0.2275390625, "learning_rate": 0.00018402521593534918, "loss": 1.4214, "step": 686 }, { "epoch": 0.3697026772501009, "grad_norm": 0.2275390625, "learning_rate": 0.0001839792235699809, "loss": 1.4675, "step": 687 }, { "epoch": 0.3702408179739002, "grad_norm": 0.2333984375, "learning_rate": 0.00018393317085690965, "loss": 1.52, "step": 688 }, { "epoch": 0.37077895869769945, "grad_norm": 0.21875, "learning_rate": 0.00018388705782922894, "loss": 1.3673, "step": 689 }, { "epoch": 0.3713170994214987, "grad_norm": 0.23046875, "learning_rate": 0.00018384088452007578, "loss": 1.4274, "step": 690 }, { "epoch": 0.37185524014529797, "grad_norm": 0.2373046875, "learning_rate": 0.0001837946509626304, "loss": 1.4018, "step": 691 }, { "epoch": 0.3723933808690973, "grad_norm": 0.2314453125, "learning_rate": 0.0001837483571901164, "loss": 1.3661, "step": 692 }, { "epoch": 0.37293152159289655, "grad_norm": 0.2275390625, "learning_rate": 0.0001837020032358006, "loss": 1.5242, "step": 693 }, { "epoch": 0.3734696623166958, "grad_norm": 0.21875, "learning_rate": 0.00018365558913299308, "loss": 1.4069, "step": 694 }, { "epoch": 0.3740078030404951, "grad_norm": 0.208984375, "learning_rate": 0.00018360911491504712, "loss": 1.3506, "step": 695 }, { "epoch": 0.37454594376429434, "grad_norm": 0.228515625, "learning_rate": 0.0001835625806153593, "loss": 1.4158, "step": 696 }, { "epoch": 0.37508408448809366, "grad_norm": 0.2216796875, "learning_rate": 0.0001835159862673692, "loss": 1.3896, "step": 697 }, { "epoch": 0.3756222252118929, "grad_norm": 0.2216796875, "learning_rate": 0.00018346933190455968, "loss": 1.3737, "step": 698 }, { "epoch": 0.3761603659356922, "grad_norm": 0.21484375, "learning_rate": 0.00018342261756045672, "loss": 1.408, "step": 699 }, { "epoch": 0.37669850665949145, "grad_norm": 0.2109375, "learning_rate": 0.00018337584326862935, "loss": 1.3943, "step": 700 }, { "epoch": 0.3772366473832907, "grad_norm": 0.2314453125, "learning_rate": 0.0001833290090626897, "loss": 1.369, "step": 701 }, { "epoch": 0.37777478810709003, "grad_norm": 0.2197265625, "learning_rate": 0.00018328211497629302, "loss": 1.318, "step": 702 }, { "epoch": 0.3783129288308893, "grad_norm": 0.228515625, "learning_rate": 0.00018323516104313745, "loss": 1.4576, "step": 703 }, { "epoch": 0.37885106955468856, "grad_norm": 0.2265625, "learning_rate": 0.00018318814729696427, "loss": 1.3765, "step": 704 }, { "epoch": 0.3793892102784878, "grad_norm": 0.2255859375, "learning_rate": 0.00018314107377155766, "loss": 1.2401, "step": 705 }, { "epoch": 0.3799273510022871, "grad_norm": 0.21875, "learning_rate": 0.0001830939405007448, "loss": 1.4357, "step": 706 }, { "epoch": 0.38046549172608635, "grad_norm": 0.2265625, "learning_rate": 0.0001830467475183958, "loss": 1.3921, "step": 707 }, { "epoch": 0.38100363244988567, "grad_norm": 0.2314453125, "learning_rate": 0.00018299949485842365, "loss": 1.5502, "step": 708 }, { "epoch": 0.38154177317368493, "grad_norm": 0.2236328125, "learning_rate": 0.00018295218255478423, "loss": 1.3938, "step": 709 }, { "epoch": 0.3820799138974842, "grad_norm": 0.2255859375, "learning_rate": 0.00018290481064147635, "loss": 1.3954, "step": 710 }, { "epoch": 0.38261805462128345, "grad_norm": 0.21875, "learning_rate": 0.0001828573791525415, "loss": 1.3151, "step": 711 }, { "epoch": 0.3831561953450827, "grad_norm": 0.228515625, "learning_rate": 0.00018280988812206418, "loss": 1.2889, "step": 712 }, { "epoch": 0.38369433606888204, "grad_norm": 0.228515625, "learning_rate": 0.00018276233758417155, "loss": 1.5746, "step": 713 }, { "epoch": 0.3842324767926813, "grad_norm": 0.21875, "learning_rate": 0.0001827147275730335, "loss": 1.3934, "step": 714 }, { "epoch": 0.38477061751648056, "grad_norm": 0.21875, "learning_rate": 0.00018266705812286274, "loss": 1.3557, "step": 715 }, { "epoch": 0.3853087582402798, "grad_norm": 0.212890625, "learning_rate": 0.00018261932926791477, "loss": 1.3831, "step": 716 }, { "epoch": 0.3858468989640791, "grad_norm": 0.2265625, "learning_rate": 0.00018257154104248753, "loss": 1.4853, "step": 717 }, { "epoch": 0.3863850396878784, "grad_norm": 0.22265625, "learning_rate": 0.00018252369348092183, "loss": 1.3714, "step": 718 }, { "epoch": 0.38692318041167767, "grad_norm": 0.2177734375, "learning_rate": 0.0001824757866176011, "loss": 1.3944, "step": 719 }, { "epoch": 0.38746132113547693, "grad_norm": 0.220703125, "learning_rate": 0.0001824278204869513, "loss": 1.4504, "step": 720 }, { "epoch": 0.3879994618592762, "grad_norm": 0.21875, "learning_rate": 0.00018237979512344106, "loss": 1.4202, "step": 721 }, { "epoch": 0.38853760258307546, "grad_norm": 0.216796875, "learning_rate": 0.00018233171056158148, "loss": 1.4116, "step": 722 }, { "epoch": 0.3890757433068747, "grad_norm": 0.22265625, "learning_rate": 0.0001822835668359263, "loss": 1.443, "step": 723 }, { "epoch": 0.38961388403067404, "grad_norm": 0.2265625, "learning_rate": 0.00018223536398107176, "loss": 1.4317, "step": 724 }, { "epoch": 0.3901520247544733, "grad_norm": 0.2236328125, "learning_rate": 0.00018218710203165653, "loss": 1.4373, "step": 725 }, { "epoch": 0.39069016547827257, "grad_norm": 0.2158203125, "learning_rate": 0.00018213878102236176, "loss": 1.3768, "step": 726 }, { "epoch": 0.39122830620207183, "grad_norm": 0.2158203125, "learning_rate": 0.0001820904009879111, "loss": 1.3171, "step": 727 }, { "epoch": 0.3917664469258711, "grad_norm": 0.2353515625, "learning_rate": 0.00018204196196307056, "loss": 1.5049, "step": 728 }, { "epoch": 0.3923045876496704, "grad_norm": 0.224609375, "learning_rate": 0.00018199346398264856, "loss": 1.4167, "step": 729 }, { "epoch": 0.3928427283734697, "grad_norm": 0.234375, "learning_rate": 0.0001819449070814959, "loss": 1.3772, "step": 730 }, { "epoch": 0.39338086909726894, "grad_norm": 0.216796875, "learning_rate": 0.00018189629129450568, "loss": 1.3102, "step": 731 }, { "epoch": 0.3939190098210682, "grad_norm": 0.216796875, "learning_rate": 0.00018184761665661334, "loss": 1.3912, "step": 732 }, { "epoch": 0.39445715054486746, "grad_norm": 0.2060546875, "learning_rate": 0.00018179888320279662, "loss": 1.3428, "step": 733 }, { "epoch": 0.3949952912686668, "grad_norm": 0.22265625, "learning_rate": 0.00018175009096807555, "loss": 1.3557, "step": 734 }, { "epoch": 0.39553343199246604, "grad_norm": 0.2177734375, "learning_rate": 0.0001817012399875123, "loss": 1.4364, "step": 735 }, { "epoch": 0.3960715727162653, "grad_norm": 0.23828125, "learning_rate": 0.00018165233029621134, "loss": 1.3848, "step": 736 }, { "epoch": 0.39660971344006457, "grad_norm": 0.2177734375, "learning_rate": 0.00018160336192931932, "loss": 1.3125, "step": 737 }, { "epoch": 0.39714785416386383, "grad_norm": 0.2431640625, "learning_rate": 0.00018155433492202507, "loss": 1.4548, "step": 738 }, { "epoch": 0.3976859948876631, "grad_norm": 0.2294921875, "learning_rate": 0.00018150524930955948, "loss": 1.4005, "step": 739 }, { "epoch": 0.3982241356114624, "grad_norm": 0.2373046875, "learning_rate": 0.00018145610512719564, "loss": 1.4752, "step": 740 }, { "epoch": 0.3987622763352617, "grad_norm": 0.2353515625, "learning_rate": 0.00018140690241024872, "loss": 1.4982, "step": 741 }, { "epoch": 0.39930041705906094, "grad_norm": 0.2255859375, "learning_rate": 0.00018135764119407585, "loss": 1.3939, "step": 742 }, { "epoch": 0.3998385577828602, "grad_norm": 0.21875, "learning_rate": 0.00018130832151407633, "loss": 1.427, "step": 743 }, { "epoch": 0.40037669850665947, "grad_norm": 0.2158203125, "learning_rate": 0.00018125894340569146, "loss": 1.4377, "step": 744 }, { "epoch": 0.4009148392304588, "grad_norm": 0.220703125, "learning_rate": 0.0001812095069044044, "loss": 1.4266, "step": 745 }, { "epoch": 0.40145297995425805, "grad_norm": 0.2265625, "learning_rate": 0.0001811600120457404, "loss": 1.3443, "step": 746 }, { "epoch": 0.4019911206780573, "grad_norm": 0.2197265625, "learning_rate": 0.00018111045886526662, "loss": 1.3701, "step": 747 }, { "epoch": 0.4025292614018566, "grad_norm": 0.220703125, "learning_rate": 0.00018106084739859206, "loss": 1.4007, "step": 748 }, { "epoch": 0.40306740212565584, "grad_norm": 0.2265625, "learning_rate": 0.0001810111776813677, "loss": 1.4319, "step": 749 }, { "epoch": 0.40360554284945516, "grad_norm": 0.2197265625, "learning_rate": 0.00018096144974928636, "loss": 1.4038, "step": 750 }, { "epoch": 0.4041436835732544, "grad_norm": 0.224609375, "learning_rate": 0.00018091166363808257, "loss": 1.3168, "step": 751 }, { "epoch": 0.4046818242970537, "grad_norm": 0.22265625, "learning_rate": 0.0001808618193835329, "loss": 1.3627, "step": 752 }, { "epoch": 0.40521996502085295, "grad_norm": 0.22265625, "learning_rate": 0.00018081191702145548, "loss": 1.4304, "step": 753 }, { "epoch": 0.4057581057446522, "grad_norm": 0.228515625, "learning_rate": 0.00018076195658771032, "loss": 1.4698, "step": 754 }, { "epoch": 0.4062962464684515, "grad_norm": 0.2294921875, "learning_rate": 0.0001807119381181991, "loss": 1.4403, "step": 755 }, { "epoch": 0.4068343871922508, "grad_norm": 0.224609375, "learning_rate": 0.00018066186164886525, "loss": 1.4328, "step": 756 }, { "epoch": 0.40737252791605005, "grad_norm": 0.21484375, "learning_rate": 0.00018061172721569384, "loss": 1.3228, "step": 757 }, { "epoch": 0.4079106686398493, "grad_norm": 0.2255859375, "learning_rate": 0.00018056153485471166, "loss": 1.3624, "step": 758 }, { "epoch": 0.4084488093636486, "grad_norm": 0.2275390625, "learning_rate": 0.00018051128460198703, "loss": 1.5251, "step": 759 }, { "epoch": 0.40898695008744784, "grad_norm": 0.2255859375, "learning_rate": 0.00018046097649362995, "loss": 1.478, "step": 760 }, { "epoch": 0.40952509081124716, "grad_norm": 0.2158203125, "learning_rate": 0.00018041061056579192, "loss": 1.3706, "step": 761 }, { "epoch": 0.4100632315350464, "grad_norm": 0.22265625, "learning_rate": 0.00018036018685466614, "loss": 1.3984, "step": 762 }, { "epoch": 0.4106013722588457, "grad_norm": 0.2275390625, "learning_rate": 0.0001803097053964871, "loss": 1.3865, "step": 763 }, { "epoch": 0.41113951298264495, "grad_norm": 0.2177734375, "learning_rate": 0.000180259166227531, "loss": 1.3756, "step": 764 }, { "epoch": 0.4116776537064442, "grad_norm": 0.224609375, "learning_rate": 0.0001802085693841154, "loss": 1.4097, "step": 765 }, { "epoch": 0.41221579443024353, "grad_norm": 0.220703125, "learning_rate": 0.00018015791490259934, "loss": 1.359, "step": 766 }, { "epoch": 0.4127539351540428, "grad_norm": 0.2216796875, "learning_rate": 0.00018010720281938327, "loss": 1.3345, "step": 767 }, { "epoch": 0.41329207587784206, "grad_norm": 0.22265625, "learning_rate": 0.000180056433170909, "loss": 1.3102, "step": 768 }, { "epoch": 0.4138302166016413, "grad_norm": 0.224609375, "learning_rate": 0.0001800056059936598, "loss": 1.3688, "step": 769 }, { "epoch": 0.4143683573254406, "grad_norm": 0.22265625, "learning_rate": 0.0001799547213241602, "loss": 1.4389, "step": 770 }, { "epoch": 0.4149064980492399, "grad_norm": 0.2412109375, "learning_rate": 0.00017990377919897604, "loss": 1.4733, "step": 771 }, { "epoch": 0.41544463877303917, "grad_norm": 0.2119140625, "learning_rate": 0.00017985277965471447, "loss": 1.3206, "step": 772 }, { "epoch": 0.41598277949683843, "grad_norm": 0.2236328125, "learning_rate": 0.000179801722728024, "loss": 1.3806, "step": 773 }, { "epoch": 0.4165209202206377, "grad_norm": 0.2265625, "learning_rate": 0.0001797506084555941, "loss": 1.2873, "step": 774 }, { "epoch": 0.41705906094443695, "grad_norm": 0.240234375, "learning_rate": 0.00017969943687415576, "loss": 1.4445, "step": 775 }, { "epoch": 0.4175972016682362, "grad_norm": 0.2314453125, "learning_rate": 0.00017964820802048102, "loss": 1.502, "step": 776 }, { "epoch": 0.41813534239203554, "grad_norm": 0.2421875, "learning_rate": 0.00017959692193138304, "loss": 1.4017, "step": 777 }, { "epoch": 0.4186734831158348, "grad_norm": 0.216796875, "learning_rate": 0.00017954557864371612, "loss": 1.4029, "step": 778 }, { "epoch": 0.41921162383963406, "grad_norm": 0.2265625, "learning_rate": 0.00017949417819437574, "loss": 1.3669, "step": 779 }, { "epoch": 0.4197497645634333, "grad_norm": 0.2255859375, "learning_rate": 0.00017944272062029839, "loss": 1.3779, "step": 780 }, { "epoch": 0.4202879052872326, "grad_norm": 0.23828125, "learning_rate": 0.00017939120595846162, "loss": 1.4993, "step": 781 }, { "epoch": 0.4208260460110319, "grad_norm": 0.21484375, "learning_rate": 0.00017933963424588401, "loss": 1.3916, "step": 782 }, { "epoch": 0.42136418673483117, "grad_norm": 0.23828125, "learning_rate": 0.0001792880055196251, "loss": 1.47, "step": 783 }, { "epoch": 0.42190232745863043, "grad_norm": 0.236328125, "learning_rate": 0.0001792363198167855, "loss": 1.4027, "step": 784 }, { "epoch": 0.4224404681824297, "grad_norm": 0.2158203125, "learning_rate": 0.00017918457717450669, "loss": 1.3253, "step": 785 }, { "epoch": 0.42297860890622896, "grad_norm": 0.21875, "learning_rate": 0.00017913277762997103, "loss": 1.3691, "step": 786 }, { "epoch": 0.4235167496300283, "grad_norm": 0.234375, "learning_rate": 0.00017908092122040188, "loss": 1.3081, "step": 787 }, { "epoch": 0.42405489035382754, "grad_norm": 0.23828125, "learning_rate": 0.00017902900798306336, "loss": 1.3796, "step": 788 }, { "epoch": 0.4245930310776268, "grad_norm": 0.22265625, "learning_rate": 0.00017897703795526044, "loss": 1.3761, "step": 789 }, { "epoch": 0.42513117180142607, "grad_norm": 0.244140625, "learning_rate": 0.00017892501117433896, "loss": 1.5093, "step": 790 }, { "epoch": 0.42566931252522533, "grad_norm": 0.23046875, "learning_rate": 0.00017887292767768551, "loss": 1.3834, "step": 791 }, { "epoch": 0.4262074532490246, "grad_norm": 0.2314453125, "learning_rate": 0.00017882078750272742, "loss": 1.4134, "step": 792 }, { "epoch": 0.4267455939728239, "grad_norm": 0.2333984375, "learning_rate": 0.0001787685906869328, "loss": 1.4267, "step": 793 }, { "epoch": 0.4272837346966232, "grad_norm": 0.232421875, "learning_rate": 0.0001787163372678104, "loss": 1.4198, "step": 794 }, { "epoch": 0.42782187542042244, "grad_norm": 0.2216796875, "learning_rate": 0.00017866402728290966, "loss": 1.3585, "step": 795 }, { "epoch": 0.4283600161442217, "grad_norm": 0.224609375, "learning_rate": 0.0001786116607698207, "loss": 1.3827, "step": 796 }, { "epoch": 0.42889815686802096, "grad_norm": 0.2314453125, "learning_rate": 0.00017855923776617422, "loss": 1.352, "step": 797 }, { "epoch": 0.4294362975918203, "grad_norm": 0.2314453125, "learning_rate": 0.00017850675830964158, "loss": 1.2917, "step": 798 }, { "epoch": 0.42997443831561954, "grad_norm": 0.2314453125, "learning_rate": 0.00017845422243793464, "loss": 1.4824, "step": 799 }, { "epoch": 0.4305125790394188, "grad_norm": 0.2412109375, "learning_rate": 0.00017840163018880578, "loss": 1.5449, "step": 800 }, { "epoch": 0.43105071976321807, "grad_norm": 0.2373046875, "learning_rate": 0.000178348981600048, "loss": 1.3621, "step": 801 }, { "epoch": 0.43158886048701733, "grad_norm": 0.2255859375, "learning_rate": 0.00017829627670949466, "loss": 1.4171, "step": 802 }, { "epoch": 0.43212700121081665, "grad_norm": 0.224609375, "learning_rate": 0.00017824351555501973, "loss": 1.283, "step": 803 }, { "epoch": 0.4326651419346159, "grad_norm": 0.240234375, "learning_rate": 0.00017819069817453744, "loss": 1.4348, "step": 804 }, { "epoch": 0.4332032826584152, "grad_norm": 0.2333984375, "learning_rate": 0.00017813782460600255, "loss": 1.4224, "step": 805 }, { "epoch": 0.43374142338221444, "grad_norm": 0.2216796875, "learning_rate": 0.00017808489488741012, "loss": 1.4318, "step": 806 }, { "epoch": 0.4342795641060137, "grad_norm": 0.23046875, "learning_rate": 0.00017803190905679562, "loss": 1.3723, "step": 807 }, { "epoch": 0.434817704829813, "grad_norm": 0.2333984375, "learning_rate": 0.0001779788671522348, "loss": 1.3999, "step": 808 }, { "epoch": 0.4353558455536123, "grad_norm": 0.2216796875, "learning_rate": 0.00017792576921184372, "loss": 1.3121, "step": 809 }, { "epoch": 0.43589398627741155, "grad_norm": 0.2177734375, "learning_rate": 0.0001778726152737787, "loss": 1.3446, "step": 810 }, { "epoch": 0.4364321270012108, "grad_norm": 0.2314453125, "learning_rate": 0.00017781940537623634, "loss": 1.3883, "step": 811 }, { "epoch": 0.4369702677250101, "grad_norm": 0.236328125, "learning_rate": 0.0001777661395574534, "loss": 1.4028, "step": 812 }, { "epoch": 0.43750840844880934, "grad_norm": 0.2236328125, "learning_rate": 0.0001777128178557068, "loss": 1.3903, "step": 813 }, { "epoch": 0.43804654917260866, "grad_norm": 0.224609375, "learning_rate": 0.0001776594403093138, "loss": 1.3894, "step": 814 }, { "epoch": 0.4385846898964079, "grad_norm": 0.2412109375, "learning_rate": 0.00017760600695663147, "loss": 1.5036, "step": 815 }, { "epoch": 0.4391228306202072, "grad_norm": 0.2314453125, "learning_rate": 0.0001775525178360573, "loss": 1.3681, "step": 816 }, { "epoch": 0.43966097134400645, "grad_norm": 0.2275390625, "learning_rate": 0.00017749897298602866, "loss": 1.3563, "step": 817 }, { "epoch": 0.4401991120678057, "grad_norm": 0.2197265625, "learning_rate": 0.00017744537244502307, "loss": 1.3046, "step": 818 }, { "epoch": 0.440737252791605, "grad_norm": 0.22265625, "learning_rate": 0.00017739171625155798, "loss": 1.3647, "step": 819 }, { "epoch": 0.4412753935154043, "grad_norm": 0.2490234375, "learning_rate": 0.0001773380044441909, "loss": 1.4214, "step": 820 }, { "epoch": 0.44181353423920355, "grad_norm": 0.2197265625, "learning_rate": 0.00017728423706151928, "loss": 1.4277, "step": 821 }, { "epoch": 0.4423516749630028, "grad_norm": 0.2265625, "learning_rate": 0.00017723041414218052, "loss": 1.3252, "step": 822 }, { "epoch": 0.4428898156868021, "grad_norm": 0.2373046875, "learning_rate": 0.0001771765357248519, "loss": 1.3895, "step": 823 }, { "epoch": 0.4434279564106014, "grad_norm": 0.2294921875, "learning_rate": 0.00017712260184825061, "loss": 1.3821, "step": 824 }, { "epoch": 0.44396609713440066, "grad_norm": 0.2265625, "learning_rate": 0.00017706861255113374, "loss": 1.4293, "step": 825 }, { "epoch": 0.4445042378581999, "grad_norm": 0.224609375, "learning_rate": 0.00017701456787229804, "loss": 1.3978, "step": 826 }, { "epoch": 0.4450423785819992, "grad_norm": 0.228515625, "learning_rate": 0.00017696046785058026, "loss": 1.433, "step": 827 }, { "epoch": 0.44558051930579845, "grad_norm": 0.228515625, "learning_rate": 0.00017690631252485677, "loss": 1.2649, "step": 828 }, { "epoch": 0.4461186600295977, "grad_norm": 0.232421875, "learning_rate": 0.00017685210193404375, "loss": 1.3677, "step": 829 }, { "epoch": 0.44665680075339703, "grad_norm": 0.234375, "learning_rate": 0.0001767978361170971, "loss": 1.402, "step": 830 }, { "epoch": 0.4471949414771963, "grad_norm": 0.2216796875, "learning_rate": 0.00017674351511301237, "loss": 1.3327, "step": 831 }, { "epoch": 0.44773308220099556, "grad_norm": 0.240234375, "learning_rate": 0.00017668913896082478, "loss": 1.3692, "step": 832 }, { "epoch": 0.4482712229247948, "grad_norm": 0.21875, "learning_rate": 0.0001766347076996092, "loss": 1.3657, "step": 833 }, { "epoch": 0.4488093636485941, "grad_norm": 0.22265625, "learning_rate": 0.00017658022136848007, "loss": 1.3453, "step": 834 }, { "epoch": 0.4493475043723934, "grad_norm": 0.2314453125, "learning_rate": 0.00017652568000659142, "loss": 1.4385, "step": 835 }, { "epoch": 0.44988564509619267, "grad_norm": 0.2236328125, "learning_rate": 0.00017647108365313684, "loss": 1.4767, "step": 836 }, { "epoch": 0.45042378581999193, "grad_norm": 0.22265625, "learning_rate": 0.0001764164323473494, "loss": 1.4603, "step": 837 }, { "epoch": 0.4509619265437912, "grad_norm": 0.2197265625, "learning_rate": 0.0001763617261285017, "loss": 1.3292, "step": 838 }, { "epoch": 0.45150006726759045, "grad_norm": 0.224609375, "learning_rate": 0.00017630696503590575, "loss": 1.5133, "step": 839 }, { "epoch": 0.4520382079913898, "grad_norm": 0.2177734375, "learning_rate": 0.00017625214910891305, "loss": 1.3648, "step": 840 }, { "epoch": 0.45257634871518904, "grad_norm": 0.2236328125, "learning_rate": 0.0001761972783869145, "loss": 1.382, "step": 841 }, { "epoch": 0.4531144894389883, "grad_norm": 0.228515625, "learning_rate": 0.0001761423529093403, "loss": 1.4399, "step": 842 }, { "epoch": 0.45365263016278756, "grad_norm": 0.2236328125, "learning_rate": 0.00017608737271566005, "loss": 1.4229, "step": 843 }, { "epoch": 0.4541907708865868, "grad_norm": 0.220703125, "learning_rate": 0.00017603233784538273, "loss": 1.4064, "step": 844 }, { "epoch": 0.4547289116103861, "grad_norm": 0.236328125, "learning_rate": 0.00017597724833805648, "loss": 1.292, "step": 845 }, { "epoch": 0.4552670523341854, "grad_norm": 0.2255859375, "learning_rate": 0.00017592210423326887, "loss": 1.3291, "step": 846 }, { "epoch": 0.45580519305798467, "grad_norm": 0.228515625, "learning_rate": 0.0001758669055706465, "loss": 1.3997, "step": 847 }, { "epoch": 0.45634333378178393, "grad_norm": 0.224609375, "learning_rate": 0.00017581165238985533, "loss": 1.3308, "step": 848 }, { "epoch": 0.4568814745055832, "grad_norm": 0.21875, "learning_rate": 0.00017575634473060046, "loss": 1.2127, "step": 849 }, { "epoch": 0.45741961522938246, "grad_norm": 0.2265625, "learning_rate": 0.00017570098263262608, "loss": 1.3506, "step": 850 }, { "epoch": 0.4579577559531818, "grad_norm": 0.2119140625, "learning_rate": 0.00017564556613571558, "loss": 1.226, "step": 851 }, { "epoch": 0.45849589667698104, "grad_norm": 0.2265625, "learning_rate": 0.00017559009527969144, "loss": 1.3292, "step": 852 }, { "epoch": 0.4590340374007803, "grad_norm": 0.220703125, "learning_rate": 0.00017553457010441512, "loss": 1.3106, "step": 853 }, { "epoch": 0.45957217812457957, "grad_norm": 0.2412109375, "learning_rate": 0.00017547899064978715, "loss": 1.3851, "step": 854 }, { "epoch": 0.46011031884837883, "grad_norm": 0.2392578125, "learning_rate": 0.00017542335695574718, "loss": 1.298, "step": 855 }, { "epoch": 0.46064845957217815, "grad_norm": 0.2255859375, "learning_rate": 0.00017536766906227363, "loss": 1.3934, "step": 856 }, { "epoch": 0.4611866002959774, "grad_norm": 0.23046875, "learning_rate": 0.00017531192700938407, "loss": 1.4399, "step": 857 }, { "epoch": 0.4617247410197767, "grad_norm": 0.2265625, "learning_rate": 0.00017525613083713476, "loss": 1.4116, "step": 858 }, { "epoch": 0.46226288174357594, "grad_norm": 0.2294921875, "learning_rate": 0.00017520028058562112, "loss": 1.4515, "step": 859 }, { "epoch": 0.4628010224673752, "grad_norm": 0.2275390625, "learning_rate": 0.0001751443762949772, "loss": 1.411, "step": 860 }, { "epoch": 0.4633391631911745, "grad_norm": 0.23046875, "learning_rate": 0.00017508841800537606, "loss": 1.4628, "step": 861 }, { "epoch": 0.4638773039149738, "grad_norm": 0.22265625, "learning_rate": 0.0001750324057570294, "loss": 1.3857, "step": 862 }, { "epoch": 0.46441544463877305, "grad_norm": 0.2294921875, "learning_rate": 0.0001749763395901878, "loss": 1.4621, "step": 863 }, { "epoch": 0.4649535853625723, "grad_norm": 0.232421875, "learning_rate": 0.00017492021954514065, "loss": 1.3932, "step": 864 }, { "epoch": 0.46549172608637157, "grad_norm": 0.2275390625, "learning_rate": 0.00017486404566221587, "loss": 1.4389, "step": 865 }, { "epoch": 0.46602986681017083, "grad_norm": 0.2158203125, "learning_rate": 0.00017480781798178018, "loss": 1.2689, "step": 866 }, { "epoch": 0.46656800753397015, "grad_norm": 0.224609375, "learning_rate": 0.000174751536544239, "loss": 1.3426, "step": 867 }, { "epoch": 0.4671061482577694, "grad_norm": 0.2314453125, "learning_rate": 0.00017469520139003626, "loss": 1.4075, "step": 868 }, { "epoch": 0.4676442889815687, "grad_norm": 0.228515625, "learning_rate": 0.0001746388125596546, "loss": 1.4355, "step": 869 }, { "epoch": 0.46818242970536794, "grad_norm": 0.23046875, "learning_rate": 0.00017458237009361518, "loss": 1.4677, "step": 870 }, { "epoch": 0.4687205704291672, "grad_norm": 0.234375, "learning_rate": 0.00017452587403247771, "loss": 1.3809, "step": 871 }, { "epoch": 0.4692587111529665, "grad_norm": 0.2294921875, "learning_rate": 0.00017446932441684044, "loss": 1.4057, "step": 872 }, { "epoch": 0.4697968518767658, "grad_norm": 0.2314453125, "learning_rate": 0.00017441272128734003, "loss": 1.3605, "step": 873 }, { "epoch": 0.47033499260056505, "grad_norm": 0.2294921875, "learning_rate": 0.0001743560646846517, "loss": 1.4507, "step": 874 }, { "epoch": 0.4708731333243643, "grad_norm": 0.234375, "learning_rate": 0.00017429935464948902, "loss": 1.4033, "step": 875 }, { "epoch": 0.4714112740481636, "grad_norm": 0.23046875, "learning_rate": 0.00017424259122260396, "loss": 1.4266, "step": 876 }, { "epoch": 0.4719494147719629, "grad_norm": 0.2265625, "learning_rate": 0.00017418577444478688, "loss": 1.3604, "step": 877 }, { "epoch": 0.47248755549576216, "grad_norm": 0.23046875, "learning_rate": 0.00017412890435686648, "loss": 1.5138, "step": 878 }, { "epoch": 0.4730256962195614, "grad_norm": 0.22265625, "learning_rate": 0.0001740719809997098, "loss": 1.3832, "step": 879 }, { "epoch": 0.4735638369433607, "grad_norm": 0.2333984375, "learning_rate": 0.0001740150044142221, "loss": 1.3764, "step": 880 }, { "epoch": 0.47410197766715995, "grad_norm": 0.2275390625, "learning_rate": 0.00017395797464134688, "loss": 1.3, "step": 881 }, { "epoch": 0.4746401183909592, "grad_norm": 0.25390625, "learning_rate": 0.00017390089172206592, "loss": 1.4891, "step": 882 }, { "epoch": 0.4751782591147585, "grad_norm": 0.2265625, "learning_rate": 0.00017384375569739916, "loss": 1.3297, "step": 883 }, { "epoch": 0.4757163998385578, "grad_norm": 0.234375, "learning_rate": 0.00017378656660840475, "loss": 1.4769, "step": 884 }, { "epoch": 0.47625454056235705, "grad_norm": 0.2431640625, "learning_rate": 0.00017372932449617883, "loss": 1.4123, "step": 885 }, { "epoch": 0.4767926812861563, "grad_norm": 0.23046875, "learning_rate": 0.0001736720294018558, "loss": 1.4418, "step": 886 }, { "epoch": 0.4773308220099556, "grad_norm": 0.2353515625, "learning_rate": 0.0001736146813666081, "loss": 1.4974, "step": 887 }, { "epoch": 0.4778689627337549, "grad_norm": 0.23046875, "learning_rate": 0.00017355728043164613, "loss": 1.4165, "step": 888 }, { "epoch": 0.47840710345755416, "grad_norm": 0.2333984375, "learning_rate": 0.0001734998266382184, "loss": 1.3662, "step": 889 }, { "epoch": 0.4789452441813534, "grad_norm": 0.220703125, "learning_rate": 0.00017344232002761137, "loss": 1.3792, "step": 890 }, { "epoch": 0.4794833849051527, "grad_norm": 0.2353515625, "learning_rate": 0.00017338476064114938, "loss": 1.4459, "step": 891 }, { "epoch": 0.48002152562895195, "grad_norm": 0.232421875, "learning_rate": 0.00017332714852019487, "loss": 1.4832, "step": 892 }, { "epoch": 0.48055966635275127, "grad_norm": 0.2314453125, "learning_rate": 0.00017326948370614798, "loss": 1.4867, "step": 893 }, { "epoch": 0.48109780707655053, "grad_norm": 0.2412109375, "learning_rate": 0.00017321176624044687, "loss": 1.4345, "step": 894 }, { "epoch": 0.4816359478003498, "grad_norm": 0.2353515625, "learning_rate": 0.00017315399616456745, "loss": 1.4623, "step": 895 }, { "epoch": 0.48217408852414906, "grad_norm": 0.220703125, "learning_rate": 0.00017309617352002343, "loss": 1.3592, "step": 896 }, { "epoch": 0.4827122292479483, "grad_norm": 0.25390625, "learning_rate": 0.00017303829834836636, "loss": 1.4635, "step": 897 }, { "epoch": 0.48325036997174764, "grad_norm": 0.23046875, "learning_rate": 0.00017298037069118547, "loss": 1.3556, "step": 898 }, { "epoch": 0.4837885106955469, "grad_norm": 0.23828125, "learning_rate": 0.00017292239059010775, "loss": 1.2722, "step": 899 }, { "epoch": 0.48432665141934617, "grad_norm": 0.2373046875, "learning_rate": 0.00017286435808679787, "loss": 1.4436, "step": 900 }, { "epoch": 0.48486479214314543, "grad_norm": 0.234375, "learning_rate": 0.0001728062732229581, "loss": 1.3681, "step": 901 }, { "epoch": 0.4854029328669447, "grad_norm": 0.2373046875, "learning_rate": 0.00017274813604032846, "loss": 1.3574, "step": 902 }, { "epoch": 0.48594107359074395, "grad_norm": 0.234375, "learning_rate": 0.00017268994658068643, "loss": 1.4701, "step": 903 }, { "epoch": 0.4864792143145433, "grad_norm": 0.2216796875, "learning_rate": 0.00017263170488584717, "loss": 1.3268, "step": 904 }, { "epoch": 0.48701735503834254, "grad_norm": 0.23828125, "learning_rate": 0.0001725734109976633, "loss": 1.3766, "step": 905 }, { "epoch": 0.4875554957621418, "grad_norm": 0.2216796875, "learning_rate": 0.0001725150649580249, "loss": 1.2945, "step": 906 }, { "epoch": 0.48809363648594106, "grad_norm": 0.228515625, "learning_rate": 0.00017245666680885973, "loss": 1.3239, "step": 907 }, { "epoch": 0.4886317772097403, "grad_norm": 0.228515625, "learning_rate": 0.00017239821659213272, "loss": 1.4147, "step": 908 }, { "epoch": 0.48916991793353964, "grad_norm": 0.23828125, "learning_rate": 0.00017233971434984648, "loss": 1.3968, "step": 909 }, { "epoch": 0.4897080586573389, "grad_norm": 0.2255859375, "learning_rate": 0.00017228116012404083, "loss": 1.348, "step": 910 }, { "epoch": 0.49024619938113817, "grad_norm": 0.2333984375, "learning_rate": 0.00017222255395679296, "loss": 1.3949, "step": 911 }, { "epoch": 0.49078434010493743, "grad_norm": 0.2333984375, "learning_rate": 0.0001721638958902175, "loss": 1.4182, "step": 912 }, { "epoch": 0.4913224808287367, "grad_norm": 0.232421875, "learning_rate": 0.00017210518596646625, "loss": 1.421, "step": 913 }, { "epoch": 0.491860621552536, "grad_norm": 0.2294921875, "learning_rate": 0.00017204642422772833, "loss": 1.3025, "step": 914 }, { "epoch": 0.4923987622763353, "grad_norm": 0.23828125, "learning_rate": 0.00017198761071623013, "loss": 1.4478, "step": 915 }, { "epoch": 0.49293690300013454, "grad_norm": 0.22265625, "learning_rate": 0.00017192874547423514, "loss": 1.3049, "step": 916 }, { "epoch": 0.4934750437239338, "grad_norm": 0.2333984375, "learning_rate": 0.00017186982854404414, "loss": 1.4266, "step": 917 }, { "epoch": 0.49401318444773307, "grad_norm": 0.2314453125, "learning_rate": 0.00017181085996799498, "loss": 1.3182, "step": 918 }, { "epoch": 0.49455132517153233, "grad_norm": 0.2373046875, "learning_rate": 0.00017175183978846258, "loss": 1.3656, "step": 919 }, { "epoch": 0.49508946589533165, "grad_norm": 0.228515625, "learning_rate": 0.00017169276804785913, "loss": 1.3843, "step": 920 }, { "epoch": 0.4956276066191309, "grad_norm": 0.2373046875, "learning_rate": 0.00017163364478863365, "loss": 1.3436, "step": 921 }, { "epoch": 0.4961657473429302, "grad_norm": 0.216796875, "learning_rate": 0.0001715744700532723, "loss": 1.2968, "step": 922 }, { "epoch": 0.49670388806672944, "grad_norm": 0.2333984375, "learning_rate": 0.00017151524388429821, "loss": 1.4009, "step": 923 }, { "epoch": 0.4972420287905287, "grad_norm": 0.220703125, "learning_rate": 0.0001714559663242715, "loss": 1.4155, "step": 924 }, { "epoch": 0.497780169514328, "grad_norm": 0.2353515625, "learning_rate": 0.00017139663741578916, "loss": 1.4512, "step": 925 }, { "epoch": 0.4983183102381273, "grad_norm": 0.236328125, "learning_rate": 0.00017133725720148513, "loss": 1.3718, "step": 926 }, { "epoch": 0.49885645096192655, "grad_norm": 0.2236328125, "learning_rate": 0.00017127782572403015, "loss": 1.4641, "step": 927 }, { "epoch": 0.4993945916857258, "grad_norm": 0.2392578125, "learning_rate": 0.00017121834302613186, "loss": 1.3682, "step": 928 }, { "epoch": 0.49993273240952507, "grad_norm": 0.2314453125, "learning_rate": 0.00017115880915053475, "loss": 1.4051, "step": 929 }, { "epoch": 0.49993273240952507, "eval_loss": 1.3825759887695312, "eval_runtime": 1073.6017, "eval_samples_per_second": 4.738, "eval_steps_per_second": 2.37, "step": 929 }, { "epoch": 0.5004708731333244, "grad_norm": 0.234375, "learning_rate": 0.00017109922414001996, "loss": 1.4411, "step": 930 }, { "epoch": 0.5010090138571236, "grad_norm": 0.2353515625, "learning_rate": 0.00017103958803740547, "loss": 1.3091, "step": 931 }, { "epoch": 0.5015471545809229, "grad_norm": 0.2294921875, "learning_rate": 0.00017097990088554594, "loss": 1.2881, "step": 932 }, { "epoch": 0.5020852953047222, "grad_norm": 0.240234375, "learning_rate": 0.00017092016272733274, "loss": 1.3643, "step": 933 }, { "epoch": 0.5026234360285214, "grad_norm": 0.2314453125, "learning_rate": 0.00017086037360569388, "loss": 1.3101, "step": 934 }, { "epoch": 0.5031615767523208, "grad_norm": 0.2314453125, "learning_rate": 0.00017080053356359395, "loss": 1.4151, "step": 935 }, { "epoch": 0.50369971747612, "grad_norm": 0.2294921875, "learning_rate": 0.0001707406426440342, "loss": 1.2479, "step": 936 }, { "epoch": 0.5042378581999193, "grad_norm": 0.23828125, "learning_rate": 0.0001706807008900524, "loss": 1.3682, "step": 937 }, { "epoch": 0.5047759989237186, "grad_norm": 0.236328125, "learning_rate": 0.00017062070834472287, "loss": 1.3831, "step": 938 }, { "epoch": 0.5053141396475178, "grad_norm": 0.21875, "learning_rate": 0.00017056066505115642, "loss": 1.2887, "step": 939 }, { "epoch": 0.5058522803713171, "grad_norm": 0.2294921875, "learning_rate": 0.00017050057105250031, "loss": 1.329, "step": 940 }, { "epoch": 0.5063904210951163, "grad_norm": 0.2236328125, "learning_rate": 0.00017044042639193828, "loss": 1.3087, "step": 941 }, { "epoch": 0.5069285618189157, "grad_norm": 0.37109375, "learning_rate": 0.00017038023111269043, "loss": 2.211, "step": 942 }, { "epoch": 0.5074667025427149, "grad_norm": 0.2333984375, "learning_rate": 0.0001703199852580133, "loss": 1.4062, "step": 943 }, { "epoch": 0.5080048432665142, "grad_norm": 0.2255859375, "learning_rate": 0.00017025968887119964, "loss": 1.3219, "step": 944 }, { "epoch": 0.5085429839903135, "grad_norm": 0.22265625, "learning_rate": 0.00017019934199557867, "loss": 1.2774, "step": 945 }, { "epoch": 0.5090811247141127, "grad_norm": 0.2265625, "learning_rate": 0.00017013894467451582, "loss": 1.3782, "step": 946 }, { "epoch": 0.509619265437912, "grad_norm": 0.2392578125, "learning_rate": 0.00017007849695141274, "loss": 1.4835, "step": 947 }, { "epoch": 0.5101574061617112, "grad_norm": 0.2412109375, "learning_rate": 0.00017001799886970733, "loss": 1.4217, "step": 948 }, { "epoch": 0.5106955468855106, "grad_norm": 0.2177734375, "learning_rate": 0.00016995745047287372, "loss": 1.3548, "step": 949 }, { "epoch": 0.5112336876093099, "grad_norm": 0.23046875, "learning_rate": 0.0001698968518044221, "loss": 1.3792, "step": 950 }, { "epoch": 0.5117718283331091, "grad_norm": 0.2412109375, "learning_rate": 0.0001698362029078989, "loss": 1.339, "step": 951 }, { "epoch": 0.5123099690569084, "grad_norm": 0.224609375, "learning_rate": 0.0001697755038268866, "loss": 1.3394, "step": 952 }, { "epoch": 0.5128481097807076, "grad_norm": 0.2314453125, "learning_rate": 0.0001697147546050036, "loss": 1.4133, "step": 953 }, { "epoch": 0.5133862505045069, "grad_norm": 0.2294921875, "learning_rate": 0.0001696539552859046, "loss": 1.3045, "step": 954 }, { "epoch": 0.5139243912283062, "grad_norm": 0.2216796875, "learning_rate": 0.0001695931059132801, "loss": 1.3433, "step": 955 }, { "epoch": 0.5144625319521055, "grad_norm": 0.228515625, "learning_rate": 0.00016953220653085661, "loss": 1.369, "step": 956 }, { "epoch": 0.5150006726759048, "grad_norm": 0.2265625, "learning_rate": 0.00016947125718239664, "loss": 1.3788, "step": 957 }, { "epoch": 0.515538813399704, "grad_norm": 0.2197265625, "learning_rate": 0.00016941025791169856, "loss": 1.3797, "step": 958 }, { "epoch": 0.5160769541235033, "grad_norm": 0.24609375, "learning_rate": 0.00016934920876259657, "loss": 1.328, "step": 959 }, { "epoch": 0.5166150948473026, "grad_norm": 0.224609375, "learning_rate": 0.0001692881097789608, "loss": 1.3224, "step": 960 }, { "epoch": 0.5171532355711018, "grad_norm": 0.2314453125, "learning_rate": 0.0001692269610046971, "loss": 1.3271, "step": 961 }, { "epoch": 0.5176913762949011, "grad_norm": 0.2236328125, "learning_rate": 0.00016916576248374719, "loss": 1.3927, "step": 962 }, { "epoch": 0.5182295170187003, "grad_norm": 0.236328125, "learning_rate": 0.0001691045142600885, "loss": 1.3578, "step": 963 }, { "epoch": 0.5187676577424997, "grad_norm": 0.23046875, "learning_rate": 0.00016904321637773416, "loss": 1.4664, "step": 964 }, { "epoch": 0.519305798466299, "grad_norm": 0.2216796875, "learning_rate": 0.00016898186888073303, "loss": 1.3773, "step": 965 }, { "epoch": 0.5198439391900982, "grad_norm": 0.2314453125, "learning_rate": 0.0001689204718131695, "loss": 1.3386, "step": 966 }, { "epoch": 0.5203820799138975, "grad_norm": 0.23828125, "learning_rate": 0.00016885902521916374, "loss": 1.4111, "step": 967 }, { "epoch": 0.5209202206376967, "grad_norm": 0.228515625, "learning_rate": 0.0001687975291428715, "loss": 1.4257, "step": 968 }, { "epoch": 0.521458361361496, "grad_norm": 0.23046875, "learning_rate": 0.0001687359836284839, "loss": 1.2638, "step": 969 }, { "epoch": 0.5219965020852954, "grad_norm": 0.2373046875, "learning_rate": 0.00016867438872022783, "loss": 1.3872, "step": 970 }, { "epoch": 0.5225346428090946, "grad_norm": 0.2275390625, "learning_rate": 0.00016861274446236548, "loss": 1.3661, "step": 971 }, { "epoch": 0.5230727835328939, "grad_norm": 0.236328125, "learning_rate": 0.00016855105089919466, "loss": 1.4329, "step": 972 }, { "epoch": 0.5236109242566931, "grad_norm": 0.2255859375, "learning_rate": 0.00016848930807504846, "loss": 1.3575, "step": 973 }, { "epoch": 0.5241490649804924, "grad_norm": 0.2294921875, "learning_rate": 0.0001684275160342955, "loss": 1.299, "step": 974 }, { "epoch": 0.5246872057042917, "grad_norm": 0.232421875, "learning_rate": 0.0001683656748213397, "loss": 1.2279, "step": 975 }, { "epoch": 0.5252253464280909, "grad_norm": 0.2314453125, "learning_rate": 0.0001683037844806203, "loss": 1.3648, "step": 976 }, { "epoch": 0.5257634871518903, "grad_norm": 0.2431640625, "learning_rate": 0.00016824184505661192, "loss": 1.316, "step": 977 }, { "epoch": 0.5263016278756895, "grad_norm": 0.251953125, "learning_rate": 0.00016817985659382435, "loss": 1.4335, "step": 978 }, { "epoch": 0.5268397685994888, "grad_norm": 0.224609375, "learning_rate": 0.0001681178191368027, "loss": 1.2724, "step": 979 }, { "epoch": 0.527377909323288, "grad_norm": 0.232421875, "learning_rate": 0.00016805573273012727, "loss": 1.2877, "step": 980 }, { "epoch": 0.5279160500470873, "grad_norm": 0.232421875, "learning_rate": 0.00016799359741841353, "loss": 1.4567, "step": 981 }, { "epoch": 0.5284541907708866, "grad_norm": 0.2333984375, "learning_rate": 0.000167931413246312, "loss": 1.2723, "step": 982 }, { "epoch": 0.5289923314946858, "grad_norm": 0.2236328125, "learning_rate": 0.00016786918025850852, "loss": 1.3567, "step": 983 }, { "epoch": 0.5295304722184851, "grad_norm": 0.244140625, "learning_rate": 0.00016780689849972379, "loss": 1.3794, "step": 984 }, { "epoch": 0.5300686129422844, "grad_norm": 0.2392578125, "learning_rate": 0.00016774456801471369, "loss": 1.3341, "step": 985 }, { "epoch": 0.5306067536660837, "grad_norm": 0.232421875, "learning_rate": 0.00016768218884826913, "loss": 1.3469, "step": 986 }, { "epoch": 0.531144894389883, "grad_norm": 0.2490234375, "learning_rate": 0.0001676197610452159, "loss": 1.4695, "step": 987 }, { "epoch": 0.5316830351136822, "grad_norm": 0.2392578125, "learning_rate": 0.0001675572846504148, "loss": 1.3691, "step": 988 }, { "epoch": 0.5322211758374815, "grad_norm": 0.2392578125, "learning_rate": 0.0001674947597087615, "loss": 1.446, "step": 989 }, { "epoch": 0.5327593165612807, "grad_norm": 0.240234375, "learning_rate": 0.0001674321862651867, "loss": 1.4717, "step": 990 }, { "epoch": 0.53329745728508, "grad_norm": 0.244140625, "learning_rate": 0.00016736956436465573, "loss": 1.4333, "step": 991 }, { "epoch": 0.5338355980088794, "grad_norm": 0.2265625, "learning_rate": 0.00016730689405216895, "loss": 1.32, "step": 992 }, { "epoch": 0.5343737387326786, "grad_norm": 0.220703125, "learning_rate": 0.00016724417537276138, "loss": 1.391, "step": 993 }, { "epoch": 0.5349118794564779, "grad_norm": 0.2275390625, "learning_rate": 0.00016718140837150284, "loss": 1.4092, "step": 994 }, { "epoch": 0.5354500201802771, "grad_norm": 0.248046875, "learning_rate": 0.00016711859309349788, "loss": 1.49, "step": 995 }, { "epoch": 0.5359881609040764, "grad_norm": 0.2294921875, "learning_rate": 0.00016705572958388576, "loss": 1.3697, "step": 996 }, { "epoch": 0.5365263016278757, "grad_norm": 0.2275390625, "learning_rate": 0.0001669928178878403, "loss": 1.3703, "step": 997 }, { "epoch": 0.5370644423516749, "grad_norm": 0.2333984375, "learning_rate": 0.0001669298580505701, "loss": 1.358, "step": 998 }, { "epoch": 0.5376025830754743, "grad_norm": 0.2314453125, "learning_rate": 0.0001668668501173182, "loss": 1.4154, "step": 999 }, { "epoch": 0.5381407237992735, "grad_norm": 0.236328125, "learning_rate": 0.00016680379413336232, "loss": 1.3186, "step": 1000 }, { "epoch": 0.5386788645230728, "grad_norm": 0.2236328125, "learning_rate": 0.00016674069014401468, "loss": 1.3391, "step": 1001 }, { "epoch": 0.5392170052468721, "grad_norm": 0.2373046875, "learning_rate": 0.00016667753819462196, "loss": 1.4323, "step": 1002 }, { "epoch": 0.5397551459706713, "grad_norm": 0.234375, "learning_rate": 0.00016661433833056527, "loss": 1.2893, "step": 1003 }, { "epoch": 0.5402932866944706, "grad_norm": 0.2353515625, "learning_rate": 0.0001665510905972603, "loss": 1.4276, "step": 1004 }, { "epoch": 0.5408314274182698, "grad_norm": 0.2294921875, "learning_rate": 0.00016648779504015702, "loss": 1.2575, "step": 1005 }, { "epoch": 0.5413695681420692, "grad_norm": 0.2333984375, "learning_rate": 0.00016642445170473978, "loss": 1.365, "step": 1006 }, { "epoch": 0.5419077088658685, "grad_norm": 0.2392578125, "learning_rate": 0.00016636106063652725, "loss": 1.432, "step": 1007 }, { "epoch": 0.5424458495896677, "grad_norm": 0.2333984375, "learning_rate": 0.00016629762188107247, "loss": 1.3537, "step": 1008 }, { "epoch": 0.542983990313467, "grad_norm": 0.2431640625, "learning_rate": 0.00016623413548396272, "loss": 1.3139, "step": 1009 }, { "epoch": 0.5435221310372662, "grad_norm": 0.248046875, "learning_rate": 0.00016617060149081944, "loss": 1.4055, "step": 1010 }, { "epoch": 0.5440602717610655, "grad_norm": 0.2412109375, "learning_rate": 0.00016610701994729837, "loss": 1.3965, "step": 1011 }, { "epoch": 0.5445984124848648, "grad_norm": 0.2373046875, "learning_rate": 0.00016604339089908935, "loss": 1.3901, "step": 1012 }, { "epoch": 0.545136553208664, "grad_norm": 0.2294921875, "learning_rate": 0.0001659797143919165, "loss": 1.3331, "step": 1013 }, { "epoch": 0.5456746939324634, "grad_norm": 0.228515625, "learning_rate": 0.00016591599047153778, "loss": 1.3335, "step": 1014 }, { "epoch": 0.5462128346562626, "grad_norm": 0.228515625, "learning_rate": 0.0001658522191837455, "loss": 1.3696, "step": 1015 }, { "epoch": 0.5467509753800619, "grad_norm": 0.2255859375, "learning_rate": 0.00016578840057436577, "loss": 1.3214, "step": 1016 }, { "epoch": 0.5472891161038611, "grad_norm": 0.234375, "learning_rate": 0.00016572453468925895, "loss": 1.3744, "step": 1017 }, { "epoch": 0.5478272568276604, "grad_norm": 0.2216796875, "learning_rate": 0.00016566062157431915, "loss": 1.3317, "step": 1018 }, { "epoch": 0.5483653975514597, "grad_norm": 0.2333984375, "learning_rate": 0.00016559666127547454, "loss": 1.377, "step": 1019 }, { "epoch": 0.548903538275259, "grad_norm": 0.228515625, "learning_rate": 0.00016553265383868716, "loss": 1.4166, "step": 1020 }, { "epoch": 0.5494416789990583, "grad_norm": 0.220703125, "learning_rate": 0.00016546859930995295, "loss": 1.3697, "step": 1021 }, { "epoch": 0.5499798197228575, "grad_norm": 0.2392578125, "learning_rate": 0.00016540449773530167, "loss": 1.3901, "step": 1022 }, { "epoch": 0.5505179604466568, "grad_norm": 0.2333984375, "learning_rate": 0.00016534034916079686, "loss": 1.4608, "step": 1023 }, { "epoch": 0.5510561011704561, "grad_norm": 0.240234375, "learning_rate": 0.00016527615363253593, "loss": 1.4028, "step": 1024 }, { "epoch": 0.5515942418942553, "grad_norm": 0.2236328125, "learning_rate": 0.00016521191119664986, "loss": 1.2576, "step": 1025 }, { "epoch": 0.5521323826180546, "grad_norm": 0.2470703125, "learning_rate": 0.00016514762189930352, "loss": 1.3618, "step": 1026 }, { "epoch": 0.5526705233418538, "grad_norm": 0.2392578125, "learning_rate": 0.00016508328578669537, "loss": 1.355, "step": 1027 }, { "epoch": 0.5532086640656532, "grad_norm": 0.2412109375, "learning_rate": 0.00016501890290505748, "loss": 1.4299, "step": 1028 }, { "epoch": 0.5537468047894525, "grad_norm": 0.2333984375, "learning_rate": 0.00016495447330065562, "loss": 1.4432, "step": 1029 }, { "epoch": 0.5542849455132517, "grad_norm": 0.228515625, "learning_rate": 0.00016488999701978903, "loss": 1.3583, "step": 1030 }, { "epoch": 0.554823086237051, "grad_norm": 0.2333984375, "learning_rate": 0.00016482547410879062, "loss": 1.4316, "step": 1031 }, { "epoch": 0.5553612269608502, "grad_norm": 0.2451171875, "learning_rate": 0.00016476090461402663, "loss": 1.4067, "step": 1032 }, { "epoch": 0.5558993676846495, "grad_norm": 0.232421875, "learning_rate": 0.00016469628858189697, "loss": 1.3949, "step": 1033 }, { "epoch": 0.5564375084084489, "grad_norm": 0.23046875, "learning_rate": 0.00016463162605883484, "loss": 1.3744, "step": 1034 }, { "epoch": 0.5569756491322481, "grad_norm": 0.2353515625, "learning_rate": 0.00016456691709130694, "loss": 1.4166, "step": 1035 }, { "epoch": 0.5575137898560474, "grad_norm": 0.234375, "learning_rate": 0.0001645021617258133, "loss": 1.4847, "step": 1036 }, { "epoch": 0.5580519305798466, "grad_norm": 0.240234375, "learning_rate": 0.0001644373600088873, "loss": 1.4005, "step": 1037 }, { "epoch": 0.5585900713036459, "grad_norm": 0.228515625, "learning_rate": 0.00016437251198709565, "loss": 1.4456, "step": 1038 }, { "epoch": 0.5591282120274452, "grad_norm": 0.23046875, "learning_rate": 0.00016430761770703836, "loss": 1.4331, "step": 1039 }, { "epoch": 0.5596663527512444, "grad_norm": 0.2236328125, "learning_rate": 0.00016424267721534854, "loss": 1.2192, "step": 1040 }, { "epoch": 0.5602044934750438, "grad_norm": 0.2294921875, "learning_rate": 0.00016417769055869266, "loss": 1.3761, "step": 1041 }, { "epoch": 0.560742634198843, "grad_norm": 0.2353515625, "learning_rate": 0.00016411265778377033, "loss": 1.3339, "step": 1042 }, { "epoch": 0.5612807749226423, "grad_norm": 0.2421875, "learning_rate": 0.00016404757893731425, "loss": 1.4496, "step": 1043 }, { "epoch": 0.5618189156464416, "grad_norm": 0.2431640625, "learning_rate": 0.00016398245406609025, "loss": 1.3584, "step": 1044 }, { "epoch": 0.5623570563702408, "grad_norm": 0.2353515625, "learning_rate": 0.0001639172832168973, "loss": 1.3626, "step": 1045 }, { "epoch": 0.5628951970940401, "grad_norm": 0.2255859375, "learning_rate": 0.0001638520664365673, "loss": 1.2959, "step": 1046 }, { "epoch": 0.5634333378178393, "grad_norm": 0.224609375, "learning_rate": 0.00016378680377196524, "loss": 1.4337, "step": 1047 }, { "epoch": 0.5639714785416386, "grad_norm": 0.2412109375, "learning_rate": 0.00016372149526998903, "loss": 1.3861, "step": 1048 }, { "epoch": 0.564509619265438, "grad_norm": 0.2294921875, "learning_rate": 0.00016365614097756955, "loss": 1.3777, "step": 1049 }, { "epoch": 0.5650477599892372, "grad_norm": 0.236328125, "learning_rate": 0.0001635907409416706, "loss": 1.3562, "step": 1050 }, { "epoch": 0.5655859007130365, "grad_norm": 0.21875, "learning_rate": 0.00016352529520928877, "loss": 1.3622, "step": 1051 }, { "epoch": 0.5661240414368357, "grad_norm": 0.2412109375, "learning_rate": 0.0001634598038274536, "loss": 1.3873, "step": 1052 }, { "epoch": 0.566662182160635, "grad_norm": 0.2314453125, "learning_rate": 0.00016339426684322734, "loss": 1.344, "step": 1053 }, { "epoch": 0.5672003228844342, "grad_norm": 0.2412109375, "learning_rate": 0.00016332868430370508, "loss": 1.3859, "step": 1054 }, { "epoch": 0.5677384636082335, "grad_norm": 0.232421875, "learning_rate": 0.0001632630562560146, "loss": 1.3485, "step": 1055 }, { "epoch": 0.5682766043320329, "grad_norm": 0.234375, "learning_rate": 0.00016319738274731636, "loss": 1.3681, "step": 1056 }, { "epoch": 0.5688147450558321, "grad_norm": 0.2451171875, "learning_rate": 0.0001631316638248036, "loss": 1.3486, "step": 1057 }, { "epoch": 0.5693528857796314, "grad_norm": 0.234375, "learning_rate": 0.00016306589953570208, "loss": 1.2237, "step": 1058 }, { "epoch": 0.5698910265034306, "grad_norm": 0.2373046875, "learning_rate": 0.0001630000899272702, "loss": 1.4329, "step": 1059 }, { "epoch": 0.5704291672272299, "grad_norm": 0.240234375, "learning_rate": 0.00016293423504679897, "loss": 1.4775, "step": 1060 }, { "epoch": 0.5709673079510292, "grad_norm": 0.228515625, "learning_rate": 0.0001628683349416118, "loss": 1.3362, "step": 1061 }, { "epoch": 0.5715054486748284, "grad_norm": 0.23046875, "learning_rate": 0.00016280238965906473, "loss": 1.323, "step": 1062 }, { "epoch": 0.5720435893986278, "grad_norm": 0.2431640625, "learning_rate": 0.00016273639924654627, "loss": 1.3387, "step": 1063 }, { "epoch": 0.572581730122427, "grad_norm": 0.234375, "learning_rate": 0.00016267036375147725, "loss": 1.4227, "step": 1064 }, { "epoch": 0.5731198708462263, "grad_norm": 0.240234375, "learning_rate": 0.00016260428322131102, "loss": 1.3891, "step": 1065 }, { "epoch": 0.5736580115700256, "grad_norm": 0.23828125, "learning_rate": 0.00016253815770353314, "loss": 1.2949, "step": 1066 }, { "epoch": 0.5741961522938248, "grad_norm": 0.2421875, "learning_rate": 0.0001624719872456617, "loss": 1.4138, "step": 1067 }, { "epoch": 0.5747342930176241, "grad_norm": 0.2265625, "learning_rate": 0.00016240577189524687, "loss": 1.3719, "step": 1068 }, { "epoch": 0.5752724337414233, "grad_norm": 0.224609375, "learning_rate": 0.00016233951169987128, "loss": 1.31, "step": 1069 }, { "epoch": 0.5758105744652227, "grad_norm": 0.244140625, "learning_rate": 0.00016227320670714966, "loss": 1.5289, "step": 1070 }, { "epoch": 0.576348715189022, "grad_norm": 0.2333984375, "learning_rate": 0.00016220685696472891, "loss": 1.388, "step": 1071 }, { "epoch": 0.5768868559128212, "grad_norm": 0.232421875, "learning_rate": 0.0001621404625202882, "loss": 1.2946, "step": 1072 }, { "epoch": 0.5774249966366205, "grad_norm": 0.236328125, "learning_rate": 0.00016207402342153875, "loss": 1.3271, "step": 1073 }, { "epoch": 0.5779631373604197, "grad_norm": 0.2431640625, "learning_rate": 0.00016200753971622387, "loss": 1.4814, "step": 1074 }, { "epoch": 0.578501278084219, "grad_norm": 0.228515625, "learning_rate": 0.00016194101145211888, "loss": 1.347, "step": 1075 }, { "epoch": 0.5790394188080183, "grad_norm": 0.25390625, "learning_rate": 0.0001618744386770313, "loss": 1.459, "step": 1076 }, { "epoch": 0.5795775595318176, "grad_norm": 0.53515625, "learning_rate": 0.0001618078214388004, "loss": 2.228, "step": 1077 }, { "epoch": 0.5801157002556169, "grad_norm": 0.2294921875, "learning_rate": 0.0001617411597852976, "loss": 1.4022, "step": 1078 }, { "epoch": 0.5806538409794161, "grad_norm": 0.234375, "learning_rate": 0.00016167445376442604, "loss": 1.3033, "step": 1079 }, { "epoch": 0.5811919817032154, "grad_norm": 0.26171875, "learning_rate": 0.00016160770342412098, "loss": 1.3834, "step": 1080 }, { "epoch": 0.5817301224270147, "grad_norm": 0.2275390625, "learning_rate": 0.0001615409088123493, "loss": 1.2705, "step": 1081 }, { "epoch": 0.5822682631508139, "grad_norm": 0.2353515625, "learning_rate": 0.00016147406997710982, "loss": 1.2619, "step": 1082 }, { "epoch": 0.5828064038746132, "grad_norm": 0.2255859375, "learning_rate": 0.00016140718696643313, "loss": 1.3126, "step": 1083 }, { "epoch": 0.5833445445984125, "grad_norm": 0.2412109375, "learning_rate": 0.00016134025982838152, "loss": 1.3522, "step": 1084 }, { "epoch": 0.5838826853222118, "grad_norm": 0.2353515625, "learning_rate": 0.00016127328861104907, "loss": 1.3303, "step": 1085 }, { "epoch": 0.584420826046011, "grad_norm": 0.2353515625, "learning_rate": 0.00016120627336256143, "loss": 1.3421, "step": 1086 }, { "epoch": 0.5849589667698103, "grad_norm": 0.240234375, "learning_rate": 0.00016113921413107597, "loss": 1.363, "step": 1087 }, { "epoch": 0.5854971074936096, "grad_norm": 0.236328125, "learning_rate": 0.0001610721109647816, "loss": 1.4322, "step": 1088 }, { "epoch": 0.5860352482174088, "grad_norm": 0.2333984375, "learning_rate": 0.00016100496391189889, "loss": 1.3396, "step": 1089 }, { "epoch": 0.5865733889412081, "grad_norm": 0.232421875, "learning_rate": 0.0001609377730206799, "loss": 1.3636, "step": 1090 }, { "epoch": 0.5871115296650073, "grad_norm": 0.2236328125, "learning_rate": 0.00016087053833940815, "loss": 1.3227, "step": 1091 }, { "epoch": 0.5876496703888067, "grad_norm": 0.234375, "learning_rate": 0.00016080325991639865, "loss": 1.3264, "step": 1092 }, { "epoch": 0.588187811112606, "grad_norm": 0.224609375, "learning_rate": 0.00016073593779999796, "loss": 1.2775, "step": 1093 }, { "epoch": 0.5887259518364052, "grad_norm": 0.23046875, "learning_rate": 0.0001606685720385839, "loss": 1.4222, "step": 1094 }, { "epoch": 0.5892640925602045, "grad_norm": 0.22265625, "learning_rate": 0.0001606011626805656, "loss": 1.2928, "step": 1095 }, { "epoch": 0.5898022332840037, "grad_norm": 0.236328125, "learning_rate": 0.00016053370977438372, "loss": 1.3722, "step": 1096 }, { "epoch": 0.590340374007803, "grad_norm": 0.2353515625, "learning_rate": 0.00016046621336851007, "loss": 1.4678, "step": 1097 }, { "epoch": 0.5908785147316024, "grad_norm": 0.2177734375, "learning_rate": 0.00016039867351144778, "loss": 1.3175, "step": 1098 }, { "epoch": 0.5914166554554016, "grad_norm": 0.392578125, "learning_rate": 0.00016033109025173113, "loss": 2.1371, "step": 1099 }, { "epoch": 0.5919547961792009, "grad_norm": 0.2421875, "learning_rate": 0.00016026346363792567, "loss": 1.3465, "step": 1100 }, { "epoch": 0.5924929369030001, "grad_norm": 0.2470703125, "learning_rate": 0.00016019579371862803, "loss": 1.3397, "step": 1101 }, { "epoch": 0.5930310776267994, "grad_norm": 0.2412109375, "learning_rate": 0.00016012808054246606, "loss": 1.4465, "step": 1102 }, { "epoch": 0.5935692183505987, "grad_norm": 0.25, "learning_rate": 0.00016006032415809865, "loss": 1.3794, "step": 1103 }, { "epoch": 0.5941073590743979, "grad_norm": 0.2373046875, "learning_rate": 0.00015999252461421564, "loss": 1.3752, "step": 1104 }, { "epoch": 0.5946454997981973, "grad_norm": 0.2392578125, "learning_rate": 0.000159924681959538, "loss": 1.322, "step": 1105 }, { "epoch": 0.5951836405219965, "grad_norm": 0.240234375, "learning_rate": 0.00015985679624281774, "loss": 1.3831, "step": 1106 }, { "epoch": 0.5957217812457958, "grad_norm": 0.23046875, "learning_rate": 0.00015978886751283756, "loss": 1.4909, "step": 1107 }, { "epoch": 0.5962599219695951, "grad_norm": 0.232421875, "learning_rate": 0.00015972089581841142, "loss": 1.3228, "step": 1108 }, { "epoch": 0.5967980626933943, "grad_norm": 0.2265625, "learning_rate": 0.00015965288120838383, "loss": 1.384, "step": 1109 }, { "epoch": 0.5973362034171936, "grad_norm": 0.2294921875, "learning_rate": 0.0001595848237316303, "loss": 1.3014, "step": 1110 }, { "epoch": 0.5978743441409928, "grad_norm": 0.2255859375, "learning_rate": 0.00015951672343705715, "loss": 1.2732, "step": 1111 }, { "epoch": 0.5984124848647921, "grad_norm": 0.236328125, "learning_rate": 0.00015944858037360144, "loss": 1.3612, "step": 1112 }, { "epoch": 0.5989506255885915, "grad_norm": 0.244140625, "learning_rate": 0.00015938039459023097, "loss": 1.4358, "step": 1113 }, { "epoch": 0.5994887663123907, "grad_norm": 0.2294921875, "learning_rate": 0.00015931216613594424, "loss": 1.399, "step": 1114 }, { "epoch": 0.60002690703619, "grad_norm": 0.232421875, "learning_rate": 0.00015924389505977038, "loss": 1.4282, "step": 1115 }, { "epoch": 0.6005650477599892, "grad_norm": 0.234375, "learning_rate": 0.00015917558141076915, "loss": 1.3966, "step": 1116 }, { "epoch": 0.6011031884837885, "grad_norm": 0.2373046875, "learning_rate": 0.00015910722523803096, "loss": 1.3784, "step": 1117 }, { "epoch": 0.6016413292075878, "grad_norm": 0.244140625, "learning_rate": 0.00015903882659067678, "loss": 1.236, "step": 1118 }, { "epoch": 0.602179469931387, "grad_norm": 0.265625, "learning_rate": 0.000158970385517858, "loss": 1.3915, "step": 1119 }, { "epoch": 0.6027176106551864, "grad_norm": 0.2353515625, "learning_rate": 0.00015890190206875664, "loss": 1.3723, "step": 1120 }, { "epoch": 0.6032557513789856, "grad_norm": 0.236328125, "learning_rate": 0.00015883337629258502, "loss": 1.3609, "step": 1121 }, { "epoch": 0.6037938921027849, "grad_norm": 0.2470703125, "learning_rate": 0.00015876480823858602, "loss": 1.4014, "step": 1122 }, { "epoch": 0.6043320328265841, "grad_norm": 0.23828125, "learning_rate": 0.00015869619795603278, "loss": 1.374, "step": 1123 }, { "epoch": 0.6048701735503834, "grad_norm": 0.2392578125, "learning_rate": 0.00015862754549422886, "loss": 1.339, "step": 1124 }, { "epoch": 0.6054083142741827, "grad_norm": 0.23828125, "learning_rate": 0.0001585588509025081, "loss": 1.3734, "step": 1125 }, { "epoch": 0.6059464549979819, "grad_norm": 0.2412109375, "learning_rate": 0.00015849011423023463, "loss": 1.3704, "step": 1126 }, { "epoch": 0.6064845957217813, "grad_norm": 0.232421875, "learning_rate": 0.00015842133552680285, "loss": 1.2864, "step": 1127 }, { "epoch": 0.6070227364455805, "grad_norm": 0.2294921875, "learning_rate": 0.00015835251484163726, "loss": 1.3016, "step": 1128 }, { "epoch": 0.6075608771693798, "grad_norm": 0.236328125, "learning_rate": 0.00015828365222419265, "loss": 1.3524, "step": 1129 }, { "epoch": 0.6080990178931791, "grad_norm": 0.2314453125, "learning_rate": 0.00015821474772395385, "loss": 1.3154, "step": 1130 }, { "epoch": 0.6086371586169783, "grad_norm": 0.2373046875, "learning_rate": 0.00015814580139043586, "loss": 1.4078, "step": 1131 }, { "epoch": 0.6091752993407776, "grad_norm": 0.23046875, "learning_rate": 0.0001580768132731837, "loss": 1.3495, "step": 1132 }, { "epoch": 0.6097134400645768, "grad_norm": 0.2431640625, "learning_rate": 0.0001580077834217724, "loss": 1.3994, "step": 1133 }, { "epoch": 0.6102515807883762, "grad_norm": 0.275390625, "learning_rate": 0.00015793871188580702, "loss": 1.5253, "step": 1134 }, { "epoch": 0.6107897215121755, "grad_norm": 0.234375, "learning_rate": 0.00015786959871492254, "loss": 1.3709, "step": 1135 }, { "epoch": 0.6113278622359747, "grad_norm": 0.240234375, "learning_rate": 0.0001578004439587839, "loss": 1.4044, "step": 1136 }, { "epoch": 0.611866002959774, "grad_norm": 0.236328125, "learning_rate": 0.00015773124766708588, "loss": 1.3559, "step": 1137 }, { "epoch": 0.6124041436835732, "grad_norm": 0.416015625, "learning_rate": 0.00015766200988955316, "loss": 2.1497, "step": 1138 }, { "epoch": 0.6129422844073725, "grad_norm": 0.240234375, "learning_rate": 0.00015759273067594013, "loss": 1.424, "step": 1139 }, { "epoch": 0.6134804251311718, "grad_norm": 0.2353515625, "learning_rate": 0.0001575234100760311, "loss": 1.3488, "step": 1140 }, { "epoch": 0.614018565854971, "grad_norm": 0.2412109375, "learning_rate": 0.00015745404813964, "loss": 1.3414, "step": 1141 }, { "epoch": 0.6145567065787704, "grad_norm": 0.232421875, "learning_rate": 0.00015738464491661054, "loss": 1.3755, "step": 1142 }, { "epoch": 0.6150948473025696, "grad_norm": 0.23828125, "learning_rate": 0.00015731520045681604, "loss": 1.3528, "step": 1143 }, { "epoch": 0.6156329880263689, "grad_norm": 0.2353515625, "learning_rate": 0.0001572457148101595, "loss": 1.3434, "step": 1144 }, { "epoch": 0.6161711287501682, "grad_norm": 0.25390625, "learning_rate": 0.0001571761880265735, "loss": 1.4538, "step": 1145 }, { "epoch": 0.6167092694739674, "grad_norm": 0.23828125, "learning_rate": 0.00015710662015602016, "loss": 1.2867, "step": 1146 }, { "epoch": 0.6172474101977667, "grad_norm": 0.232421875, "learning_rate": 0.00015703701124849116, "loss": 1.3069, "step": 1147 }, { "epoch": 0.617785550921566, "grad_norm": 0.2275390625, "learning_rate": 0.00015696736135400766, "loss": 1.2392, "step": 1148 }, { "epoch": 0.6183236916453653, "grad_norm": 0.2412109375, "learning_rate": 0.00015689767052262026, "loss": 1.5216, "step": 1149 }, { "epoch": 0.6188618323691646, "grad_norm": 0.244140625, "learning_rate": 0.00015682793880440895, "loss": 1.5018, "step": 1150 }, { "epoch": 0.6193999730929638, "grad_norm": 0.25, "learning_rate": 0.0001567581662494832, "loss": 1.4003, "step": 1151 }, { "epoch": 0.6199381138167631, "grad_norm": 0.22265625, "learning_rate": 0.00015668835290798172, "loss": 1.2382, "step": 1152 }, { "epoch": 0.6204762545405623, "grad_norm": 0.2373046875, "learning_rate": 0.0001566184988300726, "loss": 1.3889, "step": 1153 }, { "epoch": 0.6210143952643616, "grad_norm": 0.2265625, "learning_rate": 0.00015654860406595313, "loss": 1.3943, "step": 1154 }, { "epoch": 0.621552535988161, "grad_norm": 0.2353515625, "learning_rate": 0.00015647866866584992, "loss": 1.3618, "step": 1155 }, { "epoch": 0.6220906767119602, "grad_norm": 0.2421875, "learning_rate": 0.00015640869268001876, "loss": 1.4145, "step": 1156 }, { "epoch": 0.6226288174357595, "grad_norm": 0.234375, "learning_rate": 0.0001563386761587445, "loss": 1.3641, "step": 1157 }, { "epoch": 0.6231669581595587, "grad_norm": 0.2333984375, "learning_rate": 0.0001562686191523413, "loss": 1.4162, "step": 1158 }, { "epoch": 0.623705098883358, "grad_norm": 0.2373046875, "learning_rate": 0.00015619852171115228, "loss": 1.2747, "step": 1159 }, { "epoch": 0.6242432396071572, "grad_norm": 0.240234375, "learning_rate": 0.00015612838388554966, "loss": 1.3128, "step": 1160 }, { "epoch": 0.6247813803309565, "grad_norm": 0.263671875, "learning_rate": 0.00015605820572593468, "loss": 1.4293, "step": 1161 }, { "epoch": 0.6253195210547559, "grad_norm": 0.259765625, "learning_rate": 0.00015598798728273753, "loss": 1.4034, "step": 1162 }, { "epoch": 0.6258576617785551, "grad_norm": 0.255859375, "learning_rate": 0.00015591772860641743, "loss": 1.554, "step": 1163 }, { "epoch": 0.6263958025023544, "grad_norm": 0.240234375, "learning_rate": 0.00015584742974746242, "loss": 1.3728, "step": 1164 }, { "epoch": 0.6269339432261536, "grad_norm": 0.2275390625, "learning_rate": 0.0001557770907563895, "loss": 1.4741, "step": 1165 }, { "epoch": 0.6274720839499529, "grad_norm": 0.2294921875, "learning_rate": 0.00015570671168374438, "loss": 1.2993, "step": 1166 }, { "epoch": 0.6280102246737522, "grad_norm": 0.2353515625, "learning_rate": 0.00015563629258010173, "loss": 1.3174, "step": 1167 }, { "epoch": 0.6285483653975514, "grad_norm": 0.24609375, "learning_rate": 0.0001555658334960649, "loss": 1.3239, "step": 1168 }, { "epoch": 0.6290865061213508, "grad_norm": 0.240234375, "learning_rate": 0.00015549533448226596, "loss": 1.4422, "step": 1169 }, { "epoch": 0.62962464684515, "grad_norm": 0.2216796875, "learning_rate": 0.00015542479558936574, "loss": 1.206, "step": 1170 }, { "epoch": 0.6301627875689493, "grad_norm": 0.244140625, "learning_rate": 0.00015535421686805364, "loss": 1.4003, "step": 1171 }, { "epoch": 0.6307009282927486, "grad_norm": 0.236328125, "learning_rate": 0.00015528359836904773, "loss": 1.3495, "step": 1172 }, { "epoch": 0.6312390690165478, "grad_norm": 0.2353515625, "learning_rate": 0.0001552129401430947, "loss": 1.2942, "step": 1173 }, { "epoch": 0.6317772097403471, "grad_norm": 0.2294921875, "learning_rate": 0.0001551422422409697, "loss": 1.3873, "step": 1174 }, { "epoch": 0.6323153504641463, "grad_norm": 0.224609375, "learning_rate": 0.00015507150471347645, "loss": 1.2701, "step": 1175 }, { "epoch": 0.6328534911879456, "grad_norm": 0.2431640625, "learning_rate": 0.00015500072761144714, "loss": 1.3399, "step": 1176 }, { "epoch": 0.633391631911745, "grad_norm": 0.2373046875, "learning_rate": 0.0001549299109857424, "loss": 1.325, "step": 1177 }, { "epoch": 0.6339297726355442, "grad_norm": 0.232421875, "learning_rate": 0.00015485905488725124, "loss": 1.3602, "step": 1178 }, { "epoch": 0.6344679133593435, "grad_norm": 0.240234375, "learning_rate": 0.00015478815936689107, "loss": 1.4706, "step": 1179 }, { "epoch": 0.6350060540831427, "grad_norm": 0.23828125, "learning_rate": 0.00015471722447560758, "loss": 1.4505, "step": 1180 }, { "epoch": 0.635544194806942, "grad_norm": 0.234375, "learning_rate": 0.00015464625026437477, "loss": 1.3437, "step": 1181 }, { "epoch": 0.6360823355307413, "grad_norm": 0.23828125, "learning_rate": 0.00015457523678419496, "loss": 1.4372, "step": 1182 }, { "epoch": 0.6366204762545405, "grad_norm": 0.2431640625, "learning_rate": 0.0001545041840860986, "loss": 1.3844, "step": 1183 }, { "epoch": 0.6371586169783399, "grad_norm": 0.244140625, "learning_rate": 0.00015443309222114434, "loss": 1.2637, "step": 1184 }, { "epoch": 0.6376967577021391, "grad_norm": 0.2294921875, "learning_rate": 0.00015436196124041902, "loss": 1.3513, "step": 1185 }, { "epoch": 0.6382348984259384, "grad_norm": 0.22265625, "learning_rate": 0.00015429079119503753, "loss": 1.3109, "step": 1186 }, { "epoch": 0.6387730391497377, "grad_norm": 0.23046875, "learning_rate": 0.0001542195821361429, "loss": 1.3632, "step": 1187 }, { "epoch": 0.6393111798735369, "grad_norm": 0.23828125, "learning_rate": 0.00015414833411490613, "loss": 1.4611, "step": 1188 }, { "epoch": 0.6398493205973362, "grad_norm": 0.2421875, "learning_rate": 0.00015407704718252625, "loss": 1.3382, "step": 1189 }, { "epoch": 0.6403874613211354, "grad_norm": 0.2421875, "learning_rate": 0.00015400572139023027, "loss": 1.3803, "step": 1190 }, { "epoch": 0.6409256020449348, "grad_norm": 0.236328125, "learning_rate": 0.00015393435678927303, "loss": 1.3472, "step": 1191 }, { "epoch": 0.641463742768734, "grad_norm": 0.24609375, "learning_rate": 0.00015386295343093744, "loss": 1.3902, "step": 1192 }, { "epoch": 0.6420018834925333, "grad_norm": 0.232421875, "learning_rate": 0.00015379151136653407, "loss": 1.2989, "step": 1193 }, { "epoch": 0.6425400242163326, "grad_norm": 0.2451171875, "learning_rate": 0.0001537200306474014, "loss": 1.4642, "step": 1194 }, { "epoch": 0.6430781649401318, "grad_norm": 0.240234375, "learning_rate": 0.0001536485113249057, "loss": 1.2959, "step": 1195 }, { "epoch": 0.6436163056639311, "grad_norm": 0.2392578125, "learning_rate": 0.00015357695345044094, "loss": 1.3585, "step": 1196 }, { "epoch": 0.6441544463877303, "grad_norm": 0.2451171875, "learning_rate": 0.00015350535707542878, "loss": 1.3114, "step": 1197 }, { "epoch": 0.6446925871115297, "grad_norm": 0.2353515625, "learning_rate": 0.00015343372225131861, "loss": 1.3271, "step": 1198 }, { "epoch": 0.645230727835329, "grad_norm": 0.234375, "learning_rate": 0.00015336204902958742, "loss": 1.2614, "step": 1199 }, { "epoch": 0.6457688685591282, "grad_norm": 0.251953125, "learning_rate": 0.00015329033746173975, "loss": 1.3796, "step": 1200 }, { "epoch": 0.6463070092829275, "grad_norm": 0.2421875, "learning_rate": 0.00015321858759930775, "loss": 1.3464, "step": 1201 }, { "epoch": 0.6468451500067267, "grad_norm": 0.2470703125, "learning_rate": 0.00015314679949385103, "loss": 1.3991, "step": 1202 }, { "epoch": 0.647383290730526, "grad_norm": 0.236328125, "learning_rate": 0.00015307497319695684, "loss": 1.3175, "step": 1203 }, { "epoch": 0.6479214314543253, "grad_norm": 0.2412109375, "learning_rate": 0.0001530031087602396, "loss": 1.2938, "step": 1204 }, { "epoch": 0.6484595721781246, "grad_norm": 0.244140625, "learning_rate": 0.00015293120623534144, "loss": 1.3121, "step": 1205 }, { "epoch": 0.6489977129019239, "grad_norm": 0.2451171875, "learning_rate": 0.0001528592656739316, "loss": 1.3534, "step": 1206 }, { "epoch": 0.6495358536257231, "grad_norm": 0.25, "learning_rate": 0.00015278728712770682, "loss": 1.2992, "step": 1207 }, { "epoch": 0.6500739943495224, "grad_norm": 0.2392578125, "learning_rate": 0.00015271527064839106, "loss": 1.4259, "step": 1208 }, { "epoch": 0.6506121350733217, "grad_norm": 0.2392578125, "learning_rate": 0.0001526432162877356, "loss": 1.3478, "step": 1209 }, { "epoch": 0.6511502757971209, "grad_norm": 0.251953125, "learning_rate": 0.00015257112409751885, "loss": 1.3553, "step": 1210 }, { "epoch": 0.6516884165209202, "grad_norm": 0.236328125, "learning_rate": 0.00015249899412954652, "loss": 1.2989, "step": 1211 }, { "epoch": 0.6522265572447195, "grad_norm": 0.25390625, "learning_rate": 0.00015242682643565133, "loss": 1.317, "step": 1212 }, { "epoch": 0.6527646979685188, "grad_norm": 0.2353515625, "learning_rate": 0.00015235462106769326, "loss": 1.4339, "step": 1213 }, { "epoch": 0.6533028386923181, "grad_norm": 0.2373046875, "learning_rate": 0.00015228237807755925, "loss": 1.4144, "step": 1214 }, { "epoch": 0.6538409794161173, "grad_norm": 0.248046875, "learning_rate": 0.00015221009751716332, "loss": 1.3479, "step": 1215 }, { "epoch": 0.6543791201399166, "grad_norm": 0.2431640625, "learning_rate": 0.00015213777943844648, "loss": 1.2568, "step": 1216 }, { "epoch": 0.6549172608637158, "grad_norm": 0.255859375, "learning_rate": 0.0001520654238933767, "loss": 1.3918, "step": 1217 }, { "epoch": 0.6554554015875151, "grad_norm": 0.26171875, "learning_rate": 0.00015199303093394882, "loss": 1.4101, "step": 1218 }, { "epoch": 0.6559935423113145, "grad_norm": 0.25, "learning_rate": 0.00015192060061218477, "loss": 1.4515, "step": 1219 }, { "epoch": 0.6565316830351137, "grad_norm": 0.244140625, "learning_rate": 0.00015184813298013304, "loss": 1.3524, "step": 1220 }, { "epoch": 0.657069823758913, "grad_norm": 0.2431640625, "learning_rate": 0.00015177562808986914, "loss": 1.313, "step": 1221 }, { "epoch": 0.6576079644827122, "grad_norm": 0.244140625, "learning_rate": 0.00015170308599349522, "loss": 1.3055, "step": 1222 }, { "epoch": 0.6581461052065115, "grad_norm": 0.2451171875, "learning_rate": 0.00015163050674314033, "loss": 1.2533, "step": 1223 }, { "epoch": 0.6586842459303108, "grad_norm": 0.251953125, "learning_rate": 0.00015155789039096004, "loss": 1.3024, "step": 1224 }, { "epoch": 0.65922238665411, "grad_norm": 0.236328125, "learning_rate": 0.00015148523698913668, "loss": 1.3645, "step": 1225 }, { "epoch": 0.6597605273779094, "grad_norm": 0.236328125, "learning_rate": 0.00015141254658987923, "loss": 1.254, "step": 1226 }, { "epoch": 0.6602986681017086, "grad_norm": 0.2412109375, "learning_rate": 0.00015133981924542314, "loss": 1.4778, "step": 1227 }, { "epoch": 0.6608368088255079, "grad_norm": 0.236328125, "learning_rate": 0.00015126705500803056, "loss": 1.3672, "step": 1228 }, { "epoch": 0.6613749495493071, "grad_norm": 0.2353515625, "learning_rate": 0.00015119425392999, "loss": 1.3481, "step": 1229 }, { "epoch": 0.6619130902731064, "grad_norm": 0.244140625, "learning_rate": 0.00015112141606361655, "loss": 1.4495, "step": 1230 }, { "epoch": 0.6624512309969057, "grad_norm": 0.23828125, "learning_rate": 0.00015104854146125168, "loss": 1.2669, "step": 1231 }, { "epoch": 0.6629893717207049, "grad_norm": 0.2421875, "learning_rate": 0.00015097563017526334, "loss": 1.4271, "step": 1232 }, { "epoch": 0.6635275124445043, "grad_norm": 0.248046875, "learning_rate": 0.00015090268225804575, "loss": 1.2614, "step": 1233 }, { "epoch": 0.6640656531683035, "grad_norm": 0.2421875, "learning_rate": 0.00015082969776201947, "loss": 1.4365, "step": 1234 }, { "epoch": 0.6646037938921028, "grad_norm": 0.23828125, "learning_rate": 0.0001507566767396314, "loss": 1.3652, "step": 1235 }, { "epoch": 0.6651419346159021, "grad_norm": 0.2314453125, "learning_rate": 0.00015068361924335457, "loss": 1.3852, "step": 1236 }, { "epoch": 0.6656800753397013, "grad_norm": 0.2451171875, "learning_rate": 0.00015061052532568838, "loss": 1.2569, "step": 1237 }, { "epoch": 0.6662182160635006, "grad_norm": 0.2392578125, "learning_rate": 0.0001505373950391583, "loss": 1.3395, "step": 1238 }, { "epoch": 0.6667563567872998, "grad_norm": 0.2412109375, "learning_rate": 0.00015046422843631593, "loss": 1.3, "step": 1239 }, { "epoch": 0.6672944975110991, "grad_norm": 0.23828125, "learning_rate": 0.000150391025569739, "loss": 1.3228, "step": 1240 }, { "epoch": 0.6678326382348985, "grad_norm": 0.2333984375, "learning_rate": 0.00015031778649203132, "loss": 1.3036, "step": 1241 }, { "epoch": 0.6683707789586977, "grad_norm": 0.236328125, "learning_rate": 0.0001502445112558227, "loss": 1.3296, "step": 1242 }, { "epoch": 0.668908919682497, "grad_norm": 0.2353515625, "learning_rate": 0.00015017119991376888, "loss": 1.3666, "step": 1243 }, { "epoch": 0.6694470604062962, "grad_norm": 0.2421875, "learning_rate": 0.00015009785251855165, "loss": 1.4121, "step": 1244 }, { "epoch": 0.6699852011300955, "grad_norm": 0.24609375, "learning_rate": 0.00015002446912287863, "loss": 1.3395, "step": 1245 }, { "epoch": 0.6705233418538948, "grad_norm": 0.2265625, "learning_rate": 0.0001499510497794833, "loss": 1.3324, "step": 1246 }, { "epoch": 0.671061482577694, "grad_norm": 0.224609375, "learning_rate": 0.00014987759454112505, "loss": 1.2273, "step": 1247 }, { "epoch": 0.6715996233014934, "grad_norm": 0.244140625, "learning_rate": 0.000149804103460589, "loss": 1.4007, "step": 1248 }, { "epoch": 0.6721377640252926, "grad_norm": 0.24609375, "learning_rate": 0.00014973057659068602, "loss": 1.4425, "step": 1249 }, { "epoch": 0.6726759047490919, "grad_norm": 0.232421875, "learning_rate": 0.00014965701398425273, "loss": 1.3303, "step": 1250 }, { "epoch": 0.6732140454728912, "grad_norm": 0.2431640625, "learning_rate": 0.00014958341569415147, "loss": 1.4153, "step": 1251 }, { "epoch": 0.6737521861966904, "grad_norm": 0.228515625, "learning_rate": 0.00014950978177327015, "loss": 1.3161, "step": 1252 }, { "epoch": 0.6742903269204897, "grad_norm": 0.2412109375, "learning_rate": 0.0001494361122745223, "loss": 1.2975, "step": 1253 }, { "epoch": 0.6748284676442889, "grad_norm": 0.2353515625, "learning_rate": 0.000149362407250847, "loss": 1.3114, "step": 1254 }, { "epoch": 0.6753666083680883, "grad_norm": 0.236328125, "learning_rate": 0.00014928866675520896, "loss": 1.3991, "step": 1255 }, { "epoch": 0.6759047490918876, "grad_norm": 0.2275390625, "learning_rate": 0.00014921489084059823, "loss": 1.1595, "step": 1256 }, { "epoch": 0.6764428898156868, "grad_norm": 0.244140625, "learning_rate": 0.00014914107956003043, "loss": 1.3914, "step": 1257 }, { "epoch": 0.6769810305394861, "grad_norm": 0.255859375, "learning_rate": 0.0001490672329665465, "loss": 1.3536, "step": 1258 }, { "epoch": 0.6775191712632853, "grad_norm": 0.2490234375, "learning_rate": 0.00014899335111321287, "loss": 1.3595, "step": 1259 }, { "epoch": 0.6780573119870846, "grad_norm": 0.228515625, "learning_rate": 0.00014891943405312126, "loss": 1.2749, "step": 1260 }, { "epoch": 0.678595452710884, "grad_norm": 0.240234375, "learning_rate": 0.00014884548183938855, "loss": 1.3226, "step": 1261 }, { "epoch": 0.6791335934346832, "grad_norm": 0.244140625, "learning_rate": 0.00014877149452515717, "loss": 1.4209, "step": 1262 }, { "epoch": 0.6796717341584825, "grad_norm": 0.2373046875, "learning_rate": 0.00014869747216359445, "loss": 1.3036, "step": 1263 }, { "epoch": 0.6802098748822817, "grad_norm": 0.2470703125, "learning_rate": 0.0001486234148078932, "loss": 1.3885, "step": 1264 }, { "epoch": 0.680748015606081, "grad_norm": 0.2353515625, "learning_rate": 0.00014854932251127115, "loss": 1.3446, "step": 1265 }, { "epoch": 0.6812861563298802, "grad_norm": 0.2314453125, "learning_rate": 0.0001484751953269713, "loss": 1.3847, "step": 1266 }, { "epoch": 0.6818242970536795, "grad_norm": 0.2451171875, "learning_rate": 0.00014840103330826158, "loss": 1.3831, "step": 1267 }, { "epoch": 0.6823624377774788, "grad_norm": 0.240234375, "learning_rate": 0.0001483268365084351, "loss": 1.2853, "step": 1268 }, { "epoch": 0.682900578501278, "grad_norm": 0.2392578125, "learning_rate": 0.00014825260498080978, "loss": 1.3413, "step": 1269 }, { "epoch": 0.6834387192250774, "grad_norm": 0.2421875, "learning_rate": 0.0001481783387787287, "loss": 1.4031, "step": 1270 }, { "epoch": 0.6839768599488766, "grad_norm": 0.244140625, "learning_rate": 0.00014810403795555973, "loss": 1.2665, "step": 1271 }, { "epoch": 0.6845150006726759, "grad_norm": 0.255859375, "learning_rate": 0.00014802970256469564, "loss": 1.3489, "step": 1272 }, { "epoch": 0.6850531413964752, "grad_norm": 0.2470703125, "learning_rate": 0.00014795533265955403, "loss": 1.2669, "step": 1273 }, { "epoch": 0.6855912821202744, "grad_norm": 0.2373046875, "learning_rate": 0.00014788092829357737, "loss": 1.315, "step": 1274 }, { "epoch": 0.6861294228440737, "grad_norm": 0.248046875, "learning_rate": 0.00014780648952023275, "loss": 1.3851, "step": 1275 }, { "epoch": 0.686667563567873, "grad_norm": 0.251953125, "learning_rate": 0.00014773201639301222, "loss": 1.3571, "step": 1276 }, { "epoch": 0.6872057042916723, "grad_norm": 0.2421875, "learning_rate": 0.00014765750896543223, "loss": 1.324, "step": 1277 }, { "epoch": 0.6877438450154716, "grad_norm": 0.2353515625, "learning_rate": 0.0001475829672910341, "loss": 1.3415, "step": 1278 }, { "epoch": 0.6882819857392708, "grad_norm": 0.255859375, "learning_rate": 0.00014750839142338372, "loss": 1.4431, "step": 1279 }, { "epoch": 0.6888201264630701, "grad_norm": 0.2431640625, "learning_rate": 0.00014743378141607145, "loss": 1.3782, "step": 1280 }, { "epoch": 0.6893582671868693, "grad_norm": 0.2578125, "learning_rate": 0.00014735913732271224, "loss": 1.4513, "step": 1281 }, { "epoch": 0.6898964079106686, "grad_norm": 0.2353515625, "learning_rate": 0.00014728445919694563, "loss": 1.3424, "step": 1282 }, { "epoch": 0.690434548634468, "grad_norm": 0.2392578125, "learning_rate": 0.00014720974709243542, "loss": 1.3911, "step": 1283 }, { "epoch": 0.6909726893582672, "grad_norm": 0.2412109375, "learning_rate": 0.00014713500106286999, "loss": 1.2888, "step": 1284 }, { "epoch": 0.6915108300820665, "grad_norm": 0.2412109375, "learning_rate": 0.00014706022116196208, "loss": 1.4262, "step": 1285 }, { "epoch": 0.6920489708058657, "grad_norm": 0.2294921875, "learning_rate": 0.00014698540744344864, "loss": 1.3787, "step": 1286 }, { "epoch": 0.692587111529665, "grad_norm": 0.224609375, "learning_rate": 0.0001469105599610911, "loss": 1.2185, "step": 1287 }, { "epoch": 0.6931252522534643, "grad_norm": 0.236328125, "learning_rate": 0.00014683567876867503, "loss": 1.2795, "step": 1288 }, { "epoch": 0.6936633929772635, "grad_norm": 0.236328125, "learning_rate": 0.0001467607639200103, "loss": 1.3309, "step": 1289 }, { "epoch": 0.6942015337010629, "grad_norm": 0.234375, "learning_rate": 0.00014668581546893085, "loss": 1.4143, "step": 1290 }, { "epoch": 0.6947396744248621, "grad_norm": 0.2421875, "learning_rate": 0.00014661083346929492, "loss": 1.2657, "step": 1291 }, { "epoch": 0.6952778151486614, "grad_norm": 0.2470703125, "learning_rate": 0.00014653581797498478, "loss": 1.32, "step": 1292 }, { "epoch": 0.6958159558724607, "grad_norm": 0.2373046875, "learning_rate": 0.0001464607690399067, "loss": 1.2326, "step": 1293 }, { "epoch": 0.6963540965962599, "grad_norm": 0.2470703125, "learning_rate": 0.00014638568671799114, "loss": 1.3343, "step": 1294 }, { "epoch": 0.6968922373200592, "grad_norm": 0.240234375, "learning_rate": 0.00014631057106319242, "loss": 1.3358, "step": 1295 }, { "epoch": 0.6974303780438584, "grad_norm": 0.25, "learning_rate": 0.00014623542212948887, "loss": 1.4446, "step": 1296 }, { "epoch": 0.6979685187676578, "grad_norm": 0.25390625, "learning_rate": 0.00014616023997088268, "loss": 1.3425, "step": 1297 }, { "epoch": 0.6985066594914571, "grad_norm": 0.240234375, "learning_rate": 0.0001460850246414, "loss": 1.3908, "step": 1298 }, { "epoch": 0.6990448002152563, "grad_norm": 0.234375, "learning_rate": 0.0001460097761950908, "loss": 1.2799, "step": 1299 }, { "epoch": 0.6995829409390556, "grad_norm": 0.25, "learning_rate": 0.00014593449468602878, "loss": 1.416, "step": 1300 }, { "epoch": 0.7001210816628548, "grad_norm": 0.25, "learning_rate": 0.00014585918016831145, "loss": 1.4835, "step": 1301 }, { "epoch": 0.7006592223866541, "grad_norm": 0.25, "learning_rate": 0.00014578383269606005, "loss": 1.3643, "step": 1302 }, { "epoch": 0.7011973631104533, "grad_norm": 0.24609375, "learning_rate": 0.00014570845232341942, "loss": 1.3693, "step": 1303 }, { "epoch": 0.7017355038342526, "grad_norm": 0.2373046875, "learning_rate": 0.0001456330391045582, "loss": 1.2837, "step": 1304 }, { "epoch": 0.702273644558052, "grad_norm": 0.2421875, "learning_rate": 0.00014555759309366849, "loss": 1.2872, "step": 1305 }, { "epoch": 0.7028117852818512, "grad_norm": 0.251953125, "learning_rate": 0.00014548211434496598, "loss": 1.3025, "step": 1306 }, { "epoch": 0.7033499260056505, "grad_norm": 0.2431640625, "learning_rate": 0.00014540660291268993, "loss": 1.3136, "step": 1307 }, { "epoch": 0.7038880667294497, "grad_norm": 0.259765625, "learning_rate": 0.00014533105885110306, "loss": 1.3427, "step": 1308 }, { "epoch": 0.704426207453249, "grad_norm": 0.234375, "learning_rate": 0.00014525548221449154, "loss": 1.285, "step": 1309 }, { "epoch": 0.7049643481770483, "grad_norm": 0.25, "learning_rate": 0.000145179873057165, "loss": 1.3437, "step": 1310 }, { "epoch": 0.7055024889008475, "grad_norm": 0.2412109375, "learning_rate": 0.00014510423143345631, "loss": 1.2946, "step": 1311 }, { "epoch": 0.7060406296246469, "grad_norm": 0.2353515625, "learning_rate": 0.00014502855739772182, "loss": 1.3083, "step": 1312 }, { "epoch": 0.7065787703484461, "grad_norm": 0.2412109375, "learning_rate": 0.00014495285100434106, "loss": 1.2911, "step": 1313 }, { "epoch": 0.7071169110722454, "grad_norm": 0.26171875, "learning_rate": 0.00014487711230771685, "loss": 1.389, "step": 1314 }, { "epoch": 0.7076550517960447, "grad_norm": 0.2431640625, "learning_rate": 0.00014480134136227526, "loss": 1.2704, "step": 1315 }, { "epoch": 0.7081931925198439, "grad_norm": 0.2470703125, "learning_rate": 0.0001447255382224655, "loss": 1.3572, "step": 1316 }, { "epoch": 0.7087313332436432, "grad_norm": 0.244140625, "learning_rate": 0.00014464970294275986, "loss": 1.3285, "step": 1317 }, { "epoch": 0.7092694739674424, "grad_norm": 0.248046875, "learning_rate": 0.00014457383557765386, "loss": 1.386, "step": 1318 }, { "epoch": 0.7098076146912418, "grad_norm": 0.2412109375, "learning_rate": 0.00014449793618166593, "loss": 1.3793, "step": 1319 }, { "epoch": 0.7103457554150411, "grad_norm": 0.251953125, "learning_rate": 0.00014442200480933761, "loss": 1.3788, "step": 1320 }, { "epoch": 0.7108838961388403, "grad_norm": 0.2451171875, "learning_rate": 0.00014434604151523345, "loss": 1.2678, "step": 1321 }, { "epoch": 0.7114220368626396, "grad_norm": 0.2412109375, "learning_rate": 0.00014427004635394079, "loss": 1.3381, "step": 1322 }, { "epoch": 0.7119601775864388, "grad_norm": 0.25390625, "learning_rate": 0.00014419401938007005, "loss": 1.4055, "step": 1323 }, { "epoch": 0.7124983183102381, "grad_norm": 0.2431640625, "learning_rate": 0.00014411796064825436, "loss": 1.2953, "step": 1324 }, { "epoch": 0.7130364590340375, "grad_norm": 0.2470703125, "learning_rate": 0.0001440418702131498, "loss": 1.3325, "step": 1325 }, { "epoch": 0.7135745997578367, "grad_norm": 0.2734375, "learning_rate": 0.00014396574812943508, "loss": 1.5108, "step": 1326 }, { "epoch": 0.714112740481636, "grad_norm": 0.22265625, "learning_rate": 0.0001438895944518118, "loss": 1.2297, "step": 1327 }, { "epoch": 0.7146508812054352, "grad_norm": 0.2451171875, "learning_rate": 0.00014381340923500421, "loss": 1.3535, "step": 1328 }, { "epoch": 0.7151890219292345, "grad_norm": 0.2294921875, "learning_rate": 0.0001437371925337592, "loss": 1.2263, "step": 1329 }, { "epoch": 0.7157271626530338, "grad_norm": 0.2314453125, "learning_rate": 0.0001436609444028463, "loss": 1.3451, "step": 1330 }, { "epoch": 0.716265303376833, "grad_norm": 0.2392578125, "learning_rate": 0.00014358466489705767, "loss": 1.3095, "step": 1331 }, { "epoch": 0.7168034441006323, "grad_norm": 0.2353515625, "learning_rate": 0.00014350835407120788, "loss": 1.281, "step": 1332 }, { "epoch": 0.7173415848244316, "grad_norm": 0.23828125, "learning_rate": 0.00014343201198013417, "loss": 1.237, "step": 1333 }, { "epoch": 0.7178797255482309, "grad_norm": 0.244140625, "learning_rate": 0.00014335563867869618, "loss": 1.2265, "step": 1334 }, { "epoch": 0.7184178662720301, "grad_norm": 0.255859375, "learning_rate": 0.000143279234221776, "loss": 1.4295, "step": 1335 }, { "epoch": 0.7189560069958294, "grad_norm": 0.255859375, "learning_rate": 0.00014320279866427796, "loss": 1.3654, "step": 1336 }, { "epoch": 0.7194941477196287, "grad_norm": 0.236328125, "learning_rate": 0.00014312633206112899, "loss": 1.2168, "step": 1337 }, { "epoch": 0.7200322884434279, "grad_norm": 0.2392578125, "learning_rate": 0.0001430498344672782, "loss": 1.3746, "step": 1338 }, { "epoch": 0.7205704291672272, "grad_norm": 0.2353515625, "learning_rate": 0.0001429733059376969, "loss": 1.256, "step": 1339 }, { "epoch": 0.7211085698910265, "grad_norm": 0.2451171875, "learning_rate": 0.00014289674652737876, "loss": 1.2855, "step": 1340 }, { "epoch": 0.7216467106148258, "grad_norm": 0.23828125, "learning_rate": 0.0001428201562913396, "loss": 1.3285, "step": 1341 }, { "epoch": 0.7221848513386251, "grad_norm": 0.240234375, "learning_rate": 0.00014274353528461728, "loss": 1.3631, "step": 1342 }, { "epoch": 0.7227229920624243, "grad_norm": 0.25390625, "learning_rate": 0.00014266688356227194, "loss": 1.4521, "step": 1343 }, { "epoch": 0.7232611327862236, "grad_norm": 0.240234375, "learning_rate": 0.00014259020117938573, "loss": 1.3481, "step": 1344 }, { "epoch": 0.7237992735100228, "grad_norm": 0.234375, "learning_rate": 0.00014251348819106278, "loss": 1.2626, "step": 1345 }, { "epoch": 0.7243374142338221, "grad_norm": 0.2431640625, "learning_rate": 0.00014243674465242933, "loss": 1.3893, "step": 1346 }, { "epoch": 0.7248755549576215, "grad_norm": 0.25, "learning_rate": 0.0001423599706186334, "loss": 1.3794, "step": 1347 }, { "epoch": 0.7254136956814207, "grad_norm": 0.2314453125, "learning_rate": 0.0001422831661448451, "loss": 1.2648, "step": 1348 }, { "epoch": 0.72595183640522, "grad_norm": 0.2275390625, "learning_rate": 0.00014220633128625632, "loss": 1.2747, "step": 1349 }, { "epoch": 0.7264899771290192, "grad_norm": 0.25390625, "learning_rate": 0.00014212946609808077, "loss": 1.3594, "step": 1350 }, { "epoch": 0.7270281178528185, "grad_norm": 0.2392578125, "learning_rate": 0.000142052570635554, "loss": 1.3381, "step": 1351 }, { "epoch": 0.7275662585766178, "grad_norm": 0.2373046875, "learning_rate": 0.00014197564495393333, "loss": 1.2782, "step": 1352 }, { "epoch": 0.728104399300417, "grad_norm": 0.248046875, "learning_rate": 0.00014189868910849778, "loss": 1.44, "step": 1353 }, { "epoch": 0.7286425400242164, "grad_norm": 0.240234375, "learning_rate": 0.00014182170315454797, "loss": 1.3467, "step": 1354 }, { "epoch": 0.7291806807480156, "grad_norm": 0.2373046875, "learning_rate": 0.00014174468714740627, "loss": 1.3098, "step": 1355 }, { "epoch": 0.7297188214718149, "grad_norm": 0.2353515625, "learning_rate": 0.00014166764114241658, "loss": 1.2868, "step": 1356 }, { "epoch": 0.7302569621956142, "grad_norm": 0.248046875, "learning_rate": 0.00014159056519494437, "loss": 1.2835, "step": 1357 }, { "epoch": 0.7307951029194134, "grad_norm": 0.251953125, "learning_rate": 0.00014151345936037663, "loss": 1.2598, "step": 1358 }, { "epoch": 0.7313332436432127, "grad_norm": 0.2412109375, "learning_rate": 0.00014143632369412185, "loss": 1.4196, "step": 1359 }, { "epoch": 0.7318713843670119, "grad_norm": 0.2412109375, "learning_rate": 0.0001413591582516099, "loss": 1.3327, "step": 1360 }, { "epoch": 0.7324095250908113, "grad_norm": 0.2412109375, "learning_rate": 0.00014128196308829208, "loss": 1.2758, "step": 1361 }, { "epoch": 0.7329476658146106, "grad_norm": 0.234375, "learning_rate": 0.0001412047382596411, "loss": 1.3785, "step": 1362 }, { "epoch": 0.7334858065384098, "grad_norm": 0.2373046875, "learning_rate": 0.0001411274838211509, "loss": 1.2155, "step": 1363 }, { "epoch": 0.7340239472622091, "grad_norm": 0.2451171875, "learning_rate": 0.00014105019982833673, "loss": 1.3726, "step": 1364 }, { "epoch": 0.7345620879860083, "grad_norm": 0.255859375, "learning_rate": 0.00014097288633673508, "loss": 1.3156, "step": 1365 }, { "epoch": 0.7351002287098076, "grad_norm": 0.2578125, "learning_rate": 0.00014089554340190365, "loss": 1.364, "step": 1366 }, { "epoch": 0.7356383694336069, "grad_norm": 0.2373046875, "learning_rate": 0.0001408181710794213, "loss": 1.4218, "step": 1367 }, { "epoch": 0.7361765101574061, "grad_norm": 0.2431640625, "learning_rate": 0.00014074076942488794, "loss": 1.318, "step": 1368 }, { "epoch": 0.7367146508812055, "grad_norm": 0.2421875, "learning_rate": 0.0001406633384939247, "loss": 1.3049, "step": 1369 }, { "epoch": 0.7372527916050047, "grad_norm": 0.2470703125, "learning_rate": 0.00014058587834217355, "loss": 1.2746, "step": 1370 }, { "epoch": 0.737790932328804, "grad_norm": 0.240234375, "learning_rate": 0.0001405083890252977, "loss": 1.3534, "step": 1371 }, { "epoch": 0.7383290730526032, "grad_norm": 0.24609375, "learning_rate": 0.0001404308705989811, "loss": 1.3856, "step": 1372 }, { "epoch": 0.7388672137764025, "grad_norm": 0.25390625, "learning_rate": 0.00014035332311892874, "loss": 1.3743, "step": 1373 }, { "epoch": 0.7394053545002018, "grad_norm": 0.2421875, "learning_rate": 0.0001402757466408664, "loss": 1.3625, "step": 1374 }, { "epoch": 0.739943495224001, "grad_norm": 0.23828125, "learning_rate": 0.0001401981412205408, "loss": 1.3217, "step": 1375 }, { "epoch": 0.7404816359478004, "grad_norm": 0.248046875, "learning_rate": 0.00014012050691371948, "loss": 1.3497, "step": 1376 }, { "epoch": 0.7410197766715996, "grad_norm": 0.234375, "learning_rate": 0.00014004284377619052, "loss": 1.3103, "step": 1377 }, { "epoch": 0.7415579173953989, "grad_norm": 0.2353515625, "learning_rate": 0.00013996515186376297, "loss": 1.3004, "step": 1378 }, { "epoch": 0.7420960581191982, "grad_norm": 0.2314453125, "learning_rate": 0.00013988743123226642, "loss": 1.2801, "step": 1379 }, { "epoch": 0.7426341988429974, "grad_norm": 0.228515625, "learning_rate": 0.00013980968193755115, "loss": 1.3641, "step": 1380 }, { "epoch": 0.7431723395667967, "grad_norm": 0.224609375, "learning_rate": 0.00013973190403548797, "loss": 1.3595, "step": 1381 }, { "epoch": 0.7437104802905959, "grad_norm": 0.2451171875, "learning_rate": 0.00013965409758196837, "loss": 1.335, "step": 1382 }, { "epoch": 0.7442486210143953, "grad_norm": 0.2373046875, "learning_rate": 0.0001395762626329042, "loss": 1.3519, "step": 1383 }, { "epoch": 0.7447867617381946, "grad_norm": 0.248046875, "learning_rate": 0.00013949839924422798, "loss": 1.3251, "step": 1384 }, { "epoch": 0.7453249024619938, "grad_norm": 0.25, "learning_rate": 0.0001394205074718924, "loss": 1.4367, "step": 1385 }, { "epoch": 0.7458630431857931, "grad_norm": 0.25, "learning_rate": 0.00013934258737187088, "loss": 1.2778, "step": 1386 }, { "epoch": 0.7464011839095923, "grad_norm": 0.23828125, "learning_rate": 0.0001392646390001569, "loss": 1.4308, "step": 1387 }, { "epoch": 0.7469393246333916, "grad_norm": 0.2353515625, "learning_rate": 0.00013918666241276442, "loss": 1.4286, "step": 1388 }, { "epoch": 0.747477465357191, "grad_norm": 0.2490234375, "learning_rate": 0.00013910865766572758, "loss": 1.41, "step": 1389 }, { "epoch": 0.7480156060809902, "grad_norm": 0.2490234375, "learning_rate": 0.0001390306248151009, "loss": 1.3603, "step": 1390 }, { "epoch": 0.7485537468047895, "grad_norm": 0.2578125, "learning_rate": 0.00013895256391695894, "loss": 1.326, "step": 1391 }, { "epoch": 0.7490918875285887, "grad_norm": 0.23828125, "learning_rate": 0.00013887447502739647, "loss": 1.2575, "step": 1392 }, { "epoch": 0.749630028252388, "grad_norm": 0.25, "learning_rate": 0.00013879635820252846, "loss": 1.3327, "step": 1393 }, { "epoch": 0.7501681689761873, "grad_norm": 0.2421875, "learning_rate": 0.00013871821349848977, "loss": 1.2816, "step": 1394 }, { "epoch": 0.7507063096999865, "grad_norm": 0.259765625, "learning_rate": 0.00013864004097143546, "loss": 1.3641, "step": 1395 }, { "epoch": 0.7512444504237858, "grad_norm": 0.25390625, "learning_rate": 0.0001385618406775405, "loss": 1.237, "step": 1396 }, { "epoch": 0.751782591147585, "grad_norm": 0.263671875, "learning_rate": 0.0001384836126729999, "loss": 1.4491, "step": 1397 }, { "epoch": 0.7523207318713844, "grad_norm": 0.2412109375, "learning_rate": 0.00013840535701402845, "loss": 1.3788, "step": 1398 }, { "epoch": 0.7528588725951837, "grad_norm": 0.255859375, "learning_rate": 0.00013832707375686094, "loss": 1.4013, "step": 1399 }, { "epoch": 0.7533970133189829, "grad_norm": 0.392578125, "learning_rate": 0.00013824876295775188, "loss": 2.1867, "step": 1400 }, { "epoch": 0.7539351540427822, "grad_norm": 0.255859375, "learning_rate": 0.00013817042467297565, "loss": 1.4717, "step": 1401 }, { "epoch": 0.7544732947665814, "grad_norm": 0.244140625, "learning_rate": 0.00013809205895882635, "loss": 1.3356, "step": 1402 }, { "epoch": 0.7550114354903807, "grad_norm": 0.259765625, "learning_rate": 0.00013801366587161786, "loss": 1.3147, "step": 1403 }, { "epoch": 0.7555495762141801, "grad_norm": 0.251953125, "learning_rate": 0.00013793524546768356, "loss": 1.3351, "step": 1404 }, { "epoch": 0.7560877169379793, "grad_norm": 0.2431640625, "learning_rate": 0.00013785679780337667, "loss": 1.411, "step": 1405 }, { "epoch": 0.7566258576617786, "grad_norm": 0.232421875, "learning_rate": 0.00013777832293506985, "loss": 1.3504, "step": 1406 }, { "epoch": 0.7571639983855778, "grad_norm": 0.255859375, "learning_rate": 0.00013769982091915538, "loss": 1.2823, "step": 1407 }, { "epoch": 0.7577021391093771, "grad_norm": 0.2392578125, "learning_rate": 0.00013762129181204503, "loss": 1.3067, "step": 1408 }, { "epoch": 0.7582402798331763, "grad_norm": 0.251953125, "learning_rate": 0.00013754273567017, "loss": 1.421, "step": 1409 }, { "epoch": 0.7587784205569756, "grad_norm": 0.2353515625, "learning_rate": 0.00013746415254998096, "loss": 1.3048, "step": 1410 }, { "epoch": 0.759316561280775, "grad_norm": 0.2412109375, "learning_rate": 0.00013738554250794795, "loss": 1.3447, "step": 1411 }, { "epoch": 0.7598547020045742, "grad_norm": 0.2373046875, "learning_rate": 0.00013730690560056043, "loss": 1.2715, "step": 1412 }, { "epoch": 0.7603928427283735, "grad_norm": 0.25, "learning_rate": 0.000137228241884327, "loss": 1.4157, "step": 1413 }, { "epoch": 0.7609309834521727, "grad_norm": 0.2314453125, "learning_rate": 0.00013714955141577574, "loss": 1.3702, "step": 1414 }, { "epoch": 0.761469124175972, "grad_norm": 0.251953125, "learning_rate": 0.00013707083425145374, "loss": 1.4022, "step": 1415 }, { "epoch": 0.7620072648997713, "grad_norm": 0.24609375, "learning_rate": 0.00013699209044792742, "loss": 1.3717, "step": 1416 }, { "epoch": 0.7625454056235705, "grad_norm": 0.2421875, "learning_rate": 0.00013691332006178226, "loss": 1.2973, "step": 1417 }, { "epoch": 0.7630835463473699, "grad_norm": 0.25390625, "learning_rate": 0.00013683452314962294, "loss": 1.303, "step": 1418 }, { "epoch": 0.7636216870711691, "grad_norm": 0.25390625, "learning_rate": 0.00013675569976807306, "loss": 1.3817, "step": 1419 }, { "epoch": 0.7641598277949684, "grad_norm": 0.2490234375, "learning_rate": 0.0001366768499737754, "loss": 1.3219, "step": 1420 }, { "epoch": 0.7646979685187677, "grad_norm": 0.251953125, "learning_rate": 0.0001365979738233916, "loss": 1.3424, "step": 1421 }, { "epoch": 0.7652361092425669, "grad_norm": 0.24609375, "learning_rate": 0.0001365190713736023, "loss": 1.302, "step": 1422 }, { "epoch": 0.7657742499663662, "grad_norm": 0.244140625, "learning_rate": 0.00013644014268110696, "loss": 1.3324, "step": 1423 }, { "epoch": 0.7663123906901654, "grad_norm": 0.2275390625, "learning_rate": 0.00013636118780262403, "loss": 1.2984, "step": 1424 }, { "epoch": 0.7668505314139648, "grad_norm": 0.2412109375, "learning_rate": 0.00013628220679489067, "loss": 1.3094, "step": 1425 }, { "epoch": 0.7673886721377641, "grad_norm": 0.2353515625, "learning_rate": 0.00013620319971466285, "loss": 1.3575, "step": 1426 }, { "epoch": 0.7679268128615633, "grad_norm": 0.2451171875, "learning_rate": 0.00013612416661871533, "loss": 1.3, "step": 1427 }, { "epoch": 0.7684649535853626, "grad_norm": 0.240234375, "learning_rate": 0.0001360451075638414, "loss": 1.4541, "step": 1428 }, { "epoch": 0.7690030943091618, "grad_norm": 0.25, "learning_rate": 0.00013596602260685322, "loss": 1.3153, "step": 1429 }, { "epoch": 0.7695412350329611, "grad_norm": 0.248046875, "learning_rate": 0.00013588691180458143, "loss": 1.3671, "step": 1430 }, { "epoch": 0.7700793757567604, "grad_norm": 0.255859375, "learning_rate": 0.0001358077752138752, "loss": 1.3639, "step": 1431 }, { "epoch": 0.7706175164805597, "grad_norm": 0.24609375, "learning_rate": 0.00013572861289160243, "loss": 1.4285, "step": 1432 }, { "epoch": 0.771155657204359, "grad_norm": 0.244140625, "learning_rate": 0.0001356494248946493, "loss": 1.3957, "step": 1433 }, { "epoch": 0.7716937979281582, "grad_norm": 0.2431640625, "learning_rate": 0.00013557021127992057, "loss": 1.3042, "step": 1434 }, { "epoch": 0.7722319386519575, "grad_norm": 0.2392578125, "learning_rate": 0.00013549097210433932, "loss": 1.3508, "step": 1435 }, { "epoch": 0.7727700793757568, "grad_norm": 0.2490234375, "learning_rate": 0.00013541170742484705, "loss": 1.4978, "step": 1436 }, { "epoch": 0.773308220099556, "grad_norm": 0.2578125, "learning_rate": 0.00013533241729840362, "loss": 1.4333, "step": 1437 }, { "epoch": 0.7738463608233553, "grad_norm": 0.24609375, "learning_rate": 0.00013525310178198705, "loss": 1.219, "step": 1438 }, { "epoch": 0.7743845015471545, "grad_norm": 0.240234375, "learning_rate": 0.0001351737609325938, "loss": 1.361, "step": 1439 }, { "epoch": 0.7749226422709539, "grad_norm": 0.248046875, "learning_rate": 0.00013509439480723835, "loss": 1.3598, "step": 1440 }, { "epoch": 0.7754607829947531, "grad_norm": 0.2421875, "learning_rate": 0.00013501500346295347, "loss": 1.273, "step": 1441 }, { "epoch": 0.7759989237185524, "grad_norm": 0.2392578125, "learning_rate": 0.00013493558695678993, "loss": 1.3334, "step": 1442 }, { "epoch": 0.7765370644423517, "grad_norm": 0.25, "learning_rate": 0.0001348561453458167, "loss": 1.3323, "step": 1443 }, { "epoch": 0.7770752051661509, "grad_norm": 0.248046875, "learning_rate": 0.0001347766786871207, "loss": 1.4001, "step": 1444 }, { "epoch": 0.7776133458899502, "grad_norm": 0.23828125, "learning_rate": 0.00013469718703780693, "loss": 1.3118, "step": 1445 }, { "epoch": 0.7781514866137494, "grad_norm": 0.2470703125, "learning_rate": 0.00013461767045499834, "loss": 1.4641, "step": 1446 }, { "epoch": 0.7786896273375488, "grad_norm": 0.2392578125, "learning_rate": 0.0001345381289958357, "loss": 1.3311, "step": 1447 }, { "epoch": 0.7792277680613481, "grad_norm": 0.2373046875, "learning_rate": 0.00013445856271747776, "loss": 1.3011, "step": 1448 }, { "epoch": 0.7797659087851473, "grad_norm": 0.25390625, "learning_rate": 0.00013437897167710105, "loss": 1.321, "step": 1449 }, { "epoch": 0.7803040495089466, "grad_norm": 0.26171875, "learning_rate": 0.00013429935593189994, "loss": 1.4149, "step": 1450 }, { "epoch": 0.7808421902327458, "grad_norm": 0.255859375, "learning_rate": 0.0001342197155390865, "loss": 1.3832, "step": 1451 }, { "epoch": 0.7813803309565451, "grad_norm": 0.259765625, "learning_rate": 0.00013414005055589057, "loss": 1.3665, "step": 1452 }, { "epoch": 0.7819184716803445, "grad_norm": 0.248046875, "learning_rate": 0.00013406036103955958, "loss": 1.275, "step": 1453 }, { "epoch": 0.7824566124041437, "grad_norm": 0.255859375, "learning_rate": 0.00013398064704735863, "loss": 1.3756, "step": 1454 }, { "epoch": 0.782994753127943, "grad_norm": 0.2431640625, "learning_rate": 0.00013390090863657047, "loss": 1.2789, "step": 1455 }, { "epoch": 0.7835328938517422, "grad_norm": 0.25, "learning_rate": 0.00013382114586449532, "loss": 1.3625, "step": 1456 }, { "epoch": 0.7840710345755415, "grad_norm": 0.24609375, "learning_rate": 0.00013374135878845093, "loss": 1.335, "step": 1457 }, { "epoch": 0.7846091752993408, "grad_norm": 0.248046875, "learning_rate": 0.00013366154746577254, "loss": 1.258, "step": 1458 }, { "epoch": 0.78514731602314, "grad_norm": 0.240234375, "learning_rate": 0.00013358171195381274, "loss": 1.3591, "step": 1459 }, { "epoch": 0.7856854567469393, "grad_norm": 0.2412109375, "learning_rate": 0.00013350185230994157, "loss": 1.4031, "step": 1460 }, { "epoch": 0.7862235974707386, "grad_norm": 0.23828125, "learning_rate": 0.00013342196859154637, "loss": 1.3579, "step": 1461 }, { "epoch": 0.7867617381945379, "grad_norm": 0.24609375, "learning_rate": 0.00013334206085603185, "loss": 1.3153, "step": 1462 }, { "epoch": 0.7872998789183372, "grad_norm": 0.251953125, "learning_rate": 0.0001332621291608199, "loss": 1.3701, "step": 1463 }, { "epoch": 0.7878380196421364, "grad_norm": 0.2353515625, "learning_rate": 0.00013318217356334967, "loss": 1.2643, "step": 1464 }, { "epoch": 0.7883761603659357, "grad_norm": 0.2353515625, "learning_rate": 0.00013310219412107746, "loss": 1.3701, "step": 1465 }, { "epoch": 0.7889143010897349, "grad_norm": 0.2392578125, "learning_rate": 0.00013302219089147673, "loss": 1.383, "step": 1466 }, { "epoch": 0.7894524418135342, "grad_norm": 0.2412109375, "learning_rate": 0.00013294216393203802, "loss": 1.3033, "step": 1467 }, { "epoch": 0.7899905825373336, "grad_norm": 0.24609375, "learning_rate": 0.00013286211330026894, "loss": 1.3413, "step": 1468 }, { "epoch": 0.7905287232611328, "grad_norm": 0.25390625, "learning_rate": 0.00013278203905369405, "loss": 1.3058, "step": 1469 }, { "epoch": 0.7910668639849321, "grad_norm": 0.244140625, "learning_rate": 0.00013270194124985498, "loss": 1.2549, "step": 1470 }, { "epoch": 0.7916050047087313, "grad_norm": 0.2421875, "learning_rate": 0.0001326218199463102, "loss": 1.2857, "step": 1471 }, { "epoch": 0.7921431454325306, "grad_norm": 0.25, "learning_rate": 0.00013254167520063509, "loss": 1.3263, "step": 1472 }, { "epoch": 0.7926812861563299, "grad_norm": 0.248046875, "learning_rate": 0.00013246150707042195, "loss": 1.2691, "step": 1473 }, { "epoch": 0.7932194268801291, "grad_norm": 0.25390625, "learning_rate": 0.00013238131561327975, "loss": 1.4265, "step": 1474 }, { "epoch": 0.7937575676039285, "grad_norm": 0.234375, "learning_rate": 0.00013230110088683432, "loss": 1.3458, "step": 1475 }, { "epoch": 0.7942957083277277, "grad_norm": 0.25390625, "learning_rate": 0.00013222086294872819, "loss": 1.3699, "step": 1476 }, { "epoch": 0.794833849051527, "grad_norm": 0.2470703125, "learning_rate": 0.00013214060185662058, "loss": 1.3693, "step": 1477 }, { "epoch": 0.7953719897753262, "grad_norm": 0.2431640625, "learning_rate": 0.0001320603176681873, "loss": 1.3616, "step": 1478 }, { "epoch": 0.7959101304991255, "grad_norm": 0.2490234375, "learning_rate": 0.0001319800104411208, "loss": 1.4449, "step": 1479 }, { "epoch": 0.7964482712229248, "grad_norm": 0.2431640625, "learning_rate": 0.0001318996802331301, "loss": 1.3048, "step": 1480 }, { "epoch": 0.796986411946724, "grad_norm": 0.2314453125, "learning_rate": 0.00013181932710194067, "loss": 1.3138, "step": 1481 }, { "epoch": 0.7975245526705234, "grad_norm": 0.2421875, "learning_rate": 0.00013173895110529452, "loss": 1.3209, "step": 1482 }, { "epoch": 0.7980626933943226, "grad_norm": 0.2392578125, "learning_rate": 0.00013165855230095008, "loss": 1.3796, "step": 1483 }, { "epoch": 0.7986008341181219, "grad_norm": 0.2392578125, "learning_rate": 0.00013157813074668212, "loss": 1.354, "step": 1484 }, { "epoch": 0.7991389748419212, "grad_norm": 0.2490234375, "learning_rate": 0.0001314976865002818, "loss": 1.3225, "step": 1485 }, { "epoch": 0.7996771155657204, "grad_norm": 0.236328125, "learning_rate": 0.00013141721961955658, "loss": 1.3368, "step": 1486 }, { "epoch": 0.8002152562895197, "grad_norm": 0.2412109375, "learning_rate": 0.0001313367301623302, "loss": 1.2326, "step": 1487 }, { "epoch": 0.8007533970133189, "grad_norm": 0.2490234375, "learning_rate": 0.00013125621818644258, "loss": 1.3459, "step": 1488 }, { "epoch": 0.8012915377371183, "grad_norm": 0.24609375, "learning_rate": 0.00013117568374974988, "loss": 1.2959, "step": 1489 }, { "epoch": 0.8018296784609176, "grad_norm": 0.2451171875, "learning_rate": 0.00013109512691012433, "loss": 1.3096, "step": 1490 }, { "epoch": 0.8023678191847168, "grad_norm": 0.23828125, "learning_rate": 0.00013101454772545434, "loss": 1.3567, "step": 1491 }, { "epoch": 0.8029059599085161, "grad_norm": 0.248046875, "learning_rate": 0.00013093394625364433, "loss": 1.3211, "step": 1492 }, { "epoch": 0.8034441006323153, "grad_norm": 0.2470703125, "learning_rate": 0.00013085332255261472, "loss": 1.3036, "step": 1493 }, { "epoch": 0.8039822413561146, "grad_norm": 0.2490234375, "learning_rate": 0.00013077267668030194, "loss": 1.2872, "step": 1494 }, { "epoch": 0.8045203820799139, "grad_norm": 0.248046875, "learning_rate": 0.00013069200869465832, "loss": 1.2424, "step": 1495 }, { "epoch": 0.8050585228037132, "grad_norm": 0.2490234375, "learning_rate": 0.00013061131865365212, "loss": 1.3637, "step": 1496 }, { "epoch": 0.8055966635275125, "grad_norm": 0.24609375, "learning_rate": 0.00013053060661526736, "loss": 1.3036, "step": 1497 }, { "epoch": 0.8061348042513117, "grad_norm": 0.240234375, "learning_rate": 0.00013044987263750407, "loss": 1.3011, "step": 1498 }, { "epoch": 0.806672944975111, "grad_norm": 0.25, "learning_rate": 0.00013036911677837776, "loss": 1.303, "step": 1499 }, { "epoch": 0.8072110856989103, "grad_norm": 0.25390625, "learning_rate": 0.00013028833909592, "loss": 1.3872, "step": 1500 }, { "epoch": 0.8077492264227095, "grad_norm": 0.24609375, "learning_rate": 0.00013020753964817762, "loss": 1.29, "step": 1501 }, { "epoch": 0.8082873671465088, "grad_norm": 0.2578125, "learning_rate": 0.00013012671849321351, "loss": 1.2495, "step": 1502 }, { "epoch": 0.808825507870308, "grad_norm": 0.2373046875, "learning_rate": 0.0001300458756891059, "loss": 1.3116, "step": 1503 }, { "epoch": 0.8093636485941074, "grad_norm": 0.2451171875, "learning_rate": 0.00012996501129394872, "loss": 1.3445, "step": 1504 }, { "epoch": 0.8099017893179067, "grad_norm": 0.2412109375, "learning_rate": 0.00012988412536585125, "loss": 1.3455, "step": 1505 }, { "epoch": 0.8104399300417059, "grad_norm": 0.2353515625, "learning_rate": 0.00012980321796293836, "loss": 1.3027, "step": 1506 }, { "epoch": 0.8109780707655052, "grad_norm": 0.2392578125, "learning_rate": 0.00012972228914335043, "loss": 1.2841, "step": 1507 }, { "epoch": 0.8115162114893044, "grad_norm": 0.2490234375, "learning_rate": 0.00012964133896524306, "loss": 1.4055, "step": 1508 }, { "epoch": 0.8120543522131037, "grad_norm": 0.2353515625, "learning_rate": 0.00012956036748678726, "loss": 1.3362, "step": 1509 }, { "epoch": 0.812592492936903, "grad_norm": 0.2392578125, "learning_rate": 0.00012947937476616938, "loss": 1.3448, "step": 1510 }, { "epoch": 0.8131306336607023, "grad_norm": 0.2470703125, "learning_rate": 0.000129398360861591, "loss": 1.3695, "step": 1511 }, { "epoch": 0.8136687743845016, "grad_norm": 0.251953125, "learning_rate": 0.000129317325831269, "loss": 1.3227, "step": 1512 }, { "epoch": 0.8142069151083008, "grad_norm": 0.2421875, "learning_rate": 0.0001292362697334353, "loss": 1.3318, "step": 1513 }, { "epoch": 0.8147450558321001, "grad_norm": 0.2451171875, "learning_rate": 0.00012915519262633705, "loss": 1.3395, "step": 1514 }, { "epoch": 0.8152831965558993, "grad_norm": 0.24609375, "learning_rate": 0.0001290740945682365, "loss": 1.3089, "step": 1515 }, { "epoch": 0.8158213372796986, "grad_norm": 0.259765625, "learning_rate": 0.00012899297561741097, "loss": 1.466, "step": 1516 }, { "epoch": 0.816359478003498, "grad_norm": 0.25390625, "learning_rate": 0.0001289118358321527, "loss": 1.3418, "step": 1517 }, { "epoch": 0.8168976187272972, "grad_norm": 0.251953125, "learning_rate": 0.00012883067527076904, "loss": 1.4166, "step": 1518 }, { "epoch": 0.8174357594510965, "grad_norm": 0.2431640625, "learning_rate": 0.00012874949399158215, "loss": 1.3477, "step": 1519 }, { "epoch": 0.8179739001748957, "grad_norm": 0.265625, "learning_rate": 0.0001286682920529291, "loss": 1.3595, "step": 1520 }, { "epoch": 0.818512040898695, "grad_norm": 0.248046875, "learning_rate": 0.00012858706951316185, "loss": 1.3072, "step": 1521 }, { "epoch": 0.8190501816224943, "grad_norm": 0.255859375, "learning_rate": 0.00012850582643064712, "loss": 1.2949, "step": 1522 }, { "epoch": 0.8195883223462935, "grad_norm": 0.24609375, "learning_rate": 0.0001284245628637665, "loss": 1.3064, "step": 1523 }, { "epoch": 0.8201264630700928, "grad_norm": 0.240234375, "learning_rate": 0.0001283432788709161, "loss": 1.233, "step": 1524 }, { "epoch": 0.8206646037938921, "grad_norm": 0.2451171875, "learning_rate": 0.00012826197451050691, "loss": 1.3387, "step": 1525 }, { "epoch": 0.8212027445176914, "grad_norm": 0.2451171875, "learning_rate": 0.00012818064984096444, "loss": 1.3529, "step": 1526 }, { "epoch": 0.8217408852414907, "grad_norm": 0.2490234375, "learning_rate": 0.0001280993049207288, "loss": 1.3386, "step": 1527 }, { "epoch": 0.8222790259652899, "grad_norm": 0.2451171875, "learning_rate": 0.0001280179398082547, "loss": 1.3631, "step": 1528 }, { "epoch": 0.8228171666890892, "grad_norm": 0.25, "learning_rate": 0.0001279365545620113, "loss": 1.3025, "step": 1529 }, { "epoch": 0.8233553074128884, "grad_norm": 0.2431640625, "learning_rate": 0.00012785514924048235, "loss": 1.2929, "step": 1530 }, { "epoch": 0.8238934481366877, "grad_norm": 0.2490234375, "learning_rate": 0.00012777372390216583, "loss": 1.3675, "step": 1531 }, { "epoch": 0.8244315888604871, "grad_norm": 0.248046875, "learning_rate": 0.00012769227860557432, "loss": 1.3447, "step": 1532 }, { "epoch": 0.8249697295842863, "grad_norm": 0.2578125, "learning_rate": 0.00012761081340923456, "loss": 1.3704, "step": 1533 }, { "epoch": 0.8255078703080856, "grad_norm": 0.2490234375, "learning_rate": 0.00012752932837168767, "loss": 1.2801, "step": 1534 }, { "epoch": 0.8260460110318848, "grad_norm": 0.248046875, "learning_rate": 0.00012744782355148906, "loss": 1.2889, "step": 1535 }, { "epoch": 0.8265841517556841, "grad_norm": 0.255859375, "learning_rate": 0.0001273662990072083, "loss": 1.3733, "step": 1536 }, { "epoch": 0.8271222924794834, "grad_norm": 0.2490234375, "learning_rate": 0.00012728475479742915, "loss": 1.4152, "step": 1537 }, { "epoch": 0.8276604332032826, "grad_norm": 0.240234375, "learning_rate": 0.00012720319098074952, "loss": 1.2203, "step": 1538 }, { "epoch": 0.828198573927082, "grad_norm": 0.2578125, "learning_rate": 0.00012712160761578137, "loss": 1.362, "step": 1539 }, { "epoch": 0.8287367146508812, "grad_norm": 0.25, "learning_rate": 0.0001270400047611508, "loss": 1.2846, "step": 1540 }, { "epoch": 0.8292748553746805, "grad_norm": 0.240234375, "learning_rate": 0.00012695838247549777, "loss": 1.2788, "step": 1541 }, { "epoch": 0.8298129960984798, "grad_norm": 0.33203125, "learning_rate": 0.0001268767408174763, "loss": 2.0553, "step": 1542 }, { "epoch": 0.830351136822279, "grad_norm": 0.25, "learning_rate": 0.00012679507984575432, "loss": 1.3147, "step": 1543 }, { "epoch": 0.8308892775460783, "grad_norm": 0.26953125, "learning_rate": 0.00012671339961901364, "loss": 1.3196, "step": 1544 }, { "epoch": 0.8314274182698775, "grad_norm": 0.2421875, "learning_rate": 0.00012663170019594987, "loss": 1.2926, "step": 1545 }, { "epoch": 0.8319655589936769, "grad_norm": 0.2421875, "learning_rate": 0.0001265499816352725, "loss": 1.3158, "step": 1546 }, { "epoch": 0.8325036997174762, "grad_norm": 0.2470703125, "learning_rate": 0.00012646824399570466, "loss": 1.3633, "step": 1547 }, { "epoch": 0.8330418404412754, "grad_norm": 0.234375, "learning_rate": 0.00012638648733598323, "loss": 1.2698, "step": 1548 }, { "epoch": 0.8335799811650747, "grad_norm": 0.2470703125, "learning_rate": 0.0001263047117148588, "loss": 1.3196, "step": 1549 }, { "epoch": 0.8341181218888739, "grad_norm": 0.2470703125, "learning_rate": 0.0001262229171910956, "loss": 1.3687, "step": 1550 }, { "epoch": 0.8346562626126732, "grad_norm": 0.267578125, "learning_rate": 0.00012614110382347133, "loss": 1.4101, "step": 1551 }, { "epoch": 0.8351944033364724, "grad_norm": 0.2431640625, "learning_rate": 0.00012605927167077735, "loss": 1.3611, "step": 1552 }, { "epoch": 0.8357325440602718, "grad_norm": 0.298828125, "learning_rate": 0.00012597742079181848, "loss": 2.0335, "step": 1553 }, { "epoch": 0.8362706847840711, "grad_norm": 0.2421875, "learning_rate": 0.00012589555124541292, "loss": 1.372, "step": 1554 }, { "epoch": 0.8368088255078703, "grad_norm": 0.2470703125, "learning_rate": 0.00012581366309039242, "loss": 1.3431, "step": 1555 }, { "epoch": 0.8373469662316696, "grad_norm": 0.25, "learning_rate": 0.00012573175638560199, "loss": 1.2775, "step": 1556 }, { "epoch": 0.8378851069554688, "grad_norm": 0.255859375, "learning_rate": 0.00012564983118990011, "loss": 1.4147, "step": 1557 }, { "epoch": 0.8384232476792681, "grad_norm": 0.251953125, "learning_rate": 0.0001255678875621583, "loss": 1.3102, "step": 1558 }, { "epoch": 0.8389613884030674, "grad_norm": 0.2470703125, "learning_rate": 0.0001254859255612616, "loss": 1.2824, "step": 1559 }, { "epoch": 0.8394995291268667, "grad_norm": 0.23828125, "learning_rate": 0.0001254039452461081, "loss": 1.2486, "step": 1560 }, { "epoch": 0.840037669850666, "grad_norm": 0.251953125, "learning_rate": 0.0001253219466756091, "loss": 1.322, "step": 1561 }, { "epoch": 0.8405758105744652, "grad_norm": 0.2578125, "learning_rate": 0.0001252399299086889, "loss": 1.4637, "step": 1562 }, { "epoch": 0.8411139512982645, "grad_norm": 0.25, "learning_rate": 0.0001251578950042851, "loss": 1.3893, "step": 1563 }, { "epoch": 0.8416520920220638, "grad_norm": 0.2373046875, "learning_rate": 0.00012507584202134812, "loss": 1.3062, "step": 1564 }, { "epoch": 0.842190232745863, "grad_norm": 0.2490234375, "learning_rate": 0.00012499377101884152, "loss": 1.372, "step": 1565 }, { "epoch": 0.8427283734696623, "grad_norm": 0.259765625, "learning_rate": 0.00012491168205574176, "loss": 1.4578, "step": 1566 }, { "epoch": 0.8432665141934615, "grad_norm": 0.240234375, "learning_rate": 0.00012482957519103813, "loss": 1.3074, "step": 1567 }, { "epoch": 0.8438046549172609, "grad_norm": 0.2470703125, "learning_rate": 0.00012474745048373292, "loss": 1.3096, "step": 1568 }, { "epoch": 0.8443427956410602, "grad_norm": 0.251953125, "learning_rate": 0.0001246653079928411, "loss": 1.396, "step": 1569 }, { "epoch": 0.8448809363648594, "grad_norm": 0.2451171875, "learning_rate": 0.00012458314777739055, "loss": 1.3406, "step": 1570 }, { "epoch": 0.8454190770886587, "grad_norm": 0.2353515625, "learning_rate": 0.00012450096989642185, "loss": 1.2934, "step": 1571 }, { "epoch": 0.8459572178124579, "grad_norm": 0.2421875, "learning_rate": 0.00012441877440898816, "loss": 1.2218, "step": 1572 }, { "epoch": 0.8464953585362572, "grad_norm": 0.25390625, "learning_rate": 0.00012433656137415542, "loss": 1.4042, "step": 1573 }, { "epoch": 0.8470334992600566, "grad_norm": 0.255859375, "learning_rate": 0.00012425433085100224, "loss": 1.3234, "step": 1574 }, { "epoch": 0.8475716399838558, "grad_norm": 0.25, "learning_rate": 0.00012417208289861959, "loss": 1.3724, "step": 1575 }, { "epoch": 0.8481097807076551, "grad_norm": 0.244140625, "learning_rate": 0.0001240898175761111, "loss": 1.4008, "step": 1576 }, { "epoch": 0.8486479214314543, "grad_norm": 0.236328125, "learning_rate": 0.00012400753494259292, "loss": 1.3078, "step": 1577 }, { "epoch": 0.8491860621552536, "grad_norm": 0.24609375, "learning_rate": 0.00012392523505719348, "loss": 1.3682, "step": 1578 }, { "epoch": 0.8497242028790529, "grad_norm": 0.244140625, "learning_rate": 0.0001238429179790538, "loss": 1.2524, "step": 1579 }, { "epoch": 0.8502623436028521, "grad_norm": 0.248046875, "learning_rate": 0.0001237605837673271, "loss": 1.2448, "step": 1580 }, { "epoch": 0.8508004843266515, "grad_norm": 0.240234375, "learning_rate": 0.00012367823248117898, "loss": 1.2604, "step": 1581 }, { "epoch": 0.8513386250504507, "grad_norm": 0.2373046875, "learning_rate": 0.00012359586417978733, "loss": 1.3066, "step": 1582 }, { "epoch": 0.85187676577425, "grad_norm": 0.25, "learning_rate": 0.0001235134789223422, "loss": 1.2406, "step": 1583 }, { "epoch": 0.8524149064980492, "grad_norm": 0.255859375, "learning_rate": 0.00012343107676804593, "loss": 1.2802, "step": 1584 }, { "epoch": 0.8529530472218485, "grad_norm": 0.2490234375, "learning_rate": 0.00012334865777611283, "loss": 1.3244, "step": 1585 }, { "epoch": 0.8534911879456478, "grad_norm": 0.2431640625, "learning_rate": 0.00012326622200576956, "loss": 1.3391, "step": 1586 }, { "epoch": 0.854029328669447, "grad_norm": 0.255859375, "learning_rate": 0.00012318376951625455, "loss": 1.3661, "step": 1587 }, { "epoch": 0.8545674693932463, "grad_norm": 0.2431640625, "learning_rate": 0.00012310130036681847, "loss": 1.3743, "step": 1588 }, { "epoch": 0.8551056101170456, "grad_norm": 0.2373046875, "learning_rate": 0.00012301881461672387, "loss": 1.2485, "step": 1589 }, { "epoch": 0.8556437508408449, "grad_norm": 0.2490234375, "learning_rate": 0.00012293631232524523, "loss": 1.4279, "step": 1590 }, { "epoch": 0.8561818915646442, "grad_norm": 0.25, "learning_rate": 0.00012285379355166893, "loss": 1.3939, "step": 1591 }, { "epoch": 0.8567200322884434, "grad_norm": 0.265625, "learning_rate": 0.00012277125835529317, "loss": 1.3666, "step": 1592 }, { "epoch": 0.8572581730122427, "grad_norm": 0.25, "learning_rate": 0.00012268870679542799, "loss": 1.388, "step": 1593 }, { "epoch": 0.8577963137360419, "grad_norm": 0.2392578125, "learning_rate": 0.00012260613893139515, "loss": 1.2688, "step": 1594 }, { "epoch": 0.8583344544598412, "grad_norm": 0.236328125, "learning_rate": 0.00012252355482252823, "loss": 1.3559, "step": 1595 }, { "epoch": 0.8588725951836406, "grad_norm": 0.2431640625, "learning_rate": 0.00012244095452817227, "loss": 1.3407, "step": 1596 }, { "epoch": 0.8594107359074398, "grad_norm": 0.2451171875, "learning_rate": 0.00012235833810768417, "loss": 1.3329, "step": 1597 }, { "epoch": 0.8599488766312391, "grad_norm": 0.2470703125, "learning_rate": 0.00012227570562043223, "loss": 1.3179, "step": 1598 }, { "epoch": 0.8604870173550383, "grad_norm": 0.25, "learning_rate": 0.00012219305712579648, "loss": 1.2519, "step": 1599 }, { "epoch": 0.8610251580788376, "grad_norm": 0.2412109375, "learning_rate": 0.00012211039268316827, "loss": 1.3062, "step": 1600 }, { "epoch": 0.8615632988026369, "grad_norm": 0.24609375, "learning_rate": 0.00012202771235195058, "loss": 1.3702, "step": 1601 }, { "epoch": 0.8621014395264361, "grad_norm": 0.2490234375, "learning_rate": 0.0001219450161915577, "loss": 1.3994, "step": 1602 }, { "epoch": 0.8626395802502355, "grad_norm": 0.248046875, "learning_rate": 0.00012186230426141528, "loss": 1.2979, "step": 1603 }, { "epoch": 0.8631777209740347, "grad_norm": 0.248046875, "learning_rate": 0.00012177957662096037, "loss": 1.3031, "step": 1604 }, { "epoch": 0.863715861697834, "grad_norm": 0.2451171875, "learning_rate": 0.0001216968333296413, "loss": 1.3463, "step": 1605 }, { "epoch": 0.8642540024216333, "grad_norm": 0.24609375, "learning_rate": 0.00012161407444691762, "loss": 1.2959, "step": 1606 }, { "epoch": 0.8647921431454325, "grad_norm": 0.251953125, "learning_rate": 0.00012153130003226005, "loss": 1.3313, "step": 1607 }, { "epoch": 0.8653302838692318, "grad_norm": 0.2578125, "learning_rate": 0.00012144851014515055, "loss": 1.3415, "step": 1608 }, { "epoch": 0.865868424593031, "grad_norm": 0.24609375, "learning_rate": 0.00012136570484508216, "loss": 1.3716, "step": 1609 }, { "epoch": 0.8664065653168304, "grad_norm": 0.259765625, "learning_rate": 0.00012128288419155902, "loss": 1.296, "step": 1610 }, { "epoch": 0.8669447060406297, "grad_norm": 0.2421875, "learning_rate": 0.00012120004824409625, "loss": 1.3024, "step": 1611 }, { "epoch": 0.8674828467644289, "grad_norm": 0.2412109375, "learning_rate": 0.00012111719706222, "loss": 1.2783, "step": 1612 }, { "epoch": 0.8680209874882282, "grad_norm": 0.251953125, "learning_rate": 0.00012103433070546737, "loss": 1.3971, "step": 1613 }, { "epoch": 0.8685591282120274, "grad_norm": 0.251953125, "learning_rate": 0.00012095144923338633, "loss": 1.3031, "step": 1614 }, { "epoch": 0.8690972689358267, "grad_norm": 0.2333984375, "learning_rate": 0.00012086855270553574, "loss": 1.2814, "step": 1615 }, { "epoch": 0.869635409659626, "grad_norm": 0.240234375, "learning_rate": 0.0001207856411814853, "loss": 1.4164, "step": 1616 }, { "epoch": 0.8701735503834253, "grad_norm": 0.255859375, "learning_rate": 0.00012070271472081542, "loss": 1.3779, "step": 1617 }, { "epoch": 0.8707116911072246, "grad_norm": 0.236328125, "learning_rate": 0.00012061977338311736, "loss": 1.3091, "step": 1618 }, { "epoch": 0.8712498318310238, "grad_norm": 0.24609375, "learning_rate": 0.0001205368172279929, "loss": 1.3574, "step": 1619 }, { "epoch": 0.8717879725548231, "grad_norm": 0.24609375, "learning_rate": 0.00012045384631505465, "loss": 1.2963, "step": 1620 }, { "epoch": 0.8723261132786223, "grad_norm": 0.24609375, "learning_rate": 0.00012037086070392567, "loss": 1.2234, "step": 1621 }, { "epoch": 0.8728642540024216, "grad_norm": 0.2578125, "learning_rate": 0.0001202878604542397, "loss": 1.3964, "step": 1622 }, { "epoch": 0.8734023947262209, "grad_norm": 0.2451171875, "learning_rate": 0.0001202048456256409, "loss": 1.2892, "step": 1623 }, { "epoch": 0.8739405354500202, "grad_norm": 0.236328125, "learning_rate": 0.00012012181627778401, "loss": 1.2449, "step": 1624 }, { "epoch": 0.8744786761738195, "grad_norm": 0.23828125, "learning_rate": 0.00012003877247033411, "loss": 1.1975, "step": 1625 }, { "epoch": 0.8750168168976187, "grad_norm": 0.2431640625, "learning_rate": 0.0001199557142629667, "loss": 1.3827, "step": 1626 }, { "epoch": 0.875554957621418, "grad_norm": 0.255859375, "learning_rate": 0.0001198726417153677, "loss": 1.2604, "step": 1627 }, { "epoch": 0.8760930983452173, "grad_norm": 0.240234375, "learning_rate": 0.00011978955488723322, "loss": 1.331, "step": 1628 }, { "epoch": 0.8766312390690165, "grad_norm": 0.2578125, "learning_rate": 0.00011970645383826971, "loss": 1.3575, "step": 1629 }, { "epoch": 0.8771693797928158, "grad_norm": 0.23828125, "learning_rate": 0.0001196233386281938, "loss": 1.2197, "step": 1630 }, { "epoch": 0.877707520516615, "grad_norm": 0.2470703125, "learning_rate": 0.00011954020931673229, "loss": 1.3934, "step": 1631 }, { "epoch": 0.8782456612404144, "grad_norm": 0.259765625, "learning_rate": 0.00011945706596362216, "loss": 1.3462, "step": 1632 }, { "epoch": 0.8787838019642137, "grad_norm": 0.2578125, "learning_rate": 0.00011937390862861046, "loss": 1.3998, "step": 1633 }, { "epoch": 0.8793219426880129, "grad_norm": 0.2470703125, "learning_rate": 0.0001192907373714543, "loss": 1.3528, "step": 1634 }, { "epoch": 0.8798600834118122, "grad_norm": 0.2431640625, "learning_rate": 0.00011920755225192073, "loss": 1.4042, "step": 1635 }, { "epoch": 0.8803982241356114, "grad_norm": 0.2490234375, "learning_rate": 0.00011912435332978684, "loss": 1.2999, "step": 1636 }, { "epoch": 0.8809363648594107, "grad_norm": 0.2421875, "learning_rate": 0.0001190411406648396, "loss": 1.3558, "step": 1637 }, { "epoch": 0.88147450558321, "grad_norm": 0.2421875, "learning_rate": 0.00011895791431687584, "loss": 1.3452, "step": 1638 }, { "epoch": 0.8820126463070093, "grad_norm": 0.244140625, "learning_rate": 0.00011887467434570227, "loss": 1.2629, "step": 1639 }, { "epoch": 0.8825507870308086, "grad_norm": 0.240234375, "learning_rate": 0.00011879142081113535, "loss": 1.4297, "step": 1640 }, { "epoch": 0.8830889277546078, "grad_norm": 0.251953125, "learning_rate": 0.0001187081537730013, "loss": 1.3785, "step": 1641 }, { "epoch": 0.8836270684784071, "grad_norm": 0.248046875, "learning_rate": 0.00011862487329113606, "loss": 1.3327, "step": 1642 }, { "epoch": 0.8841652092022064, "grad_norm": 0.255859375, "learning_rate": 0.00011854157942538522, "loss": 1.3294, "step": 1643 }, { "epoch": 0.8847033499260056, "grad_norm": 0.2412109375, "learning_rate": 0.00011845827223560393, "loss": 1.254, "step": 1644 }, { "epoch": 0.885241490649805, "grad_norm": 0.2421875, "learning_rate": 0.00011837495178165706, "loss": 1.3136, "step": 1645 }, { "epoch": 0.8857796313736042, "grad_norm": 0.251953125, "learning_rate": 0.00011829161812341882, "loss": 1.2809, "step": 1646 }, { "epoch": 0.8863177720974035, "grad_norm": 0.25, "learning_rate": 0.00011820827132077306, "loss": 1.3256, "step": 1647 }, { "epoch": 0.8868559128212028, "grad_norm": 0.2431640625, "learning_rate": 0.00011812491143361304, "loss": 1.212, "step": 1648 }, { "epoch": 0.887394053545002, "grad_norm": 0.2392578125, "learning_rate": 0.00011804153852184137, "loss": 1.2657, "step": 1649 }, { "epoch": 0.8879321942688013, "grad_norm": 0.2490234375, "learning_rate": 0.00011795815264537007, "loss": 1.3137, "step": 1650 }, { "epoch": 0.8884703349926005, "grad_norm": 0.251953125, "learning_rate": 0.00011787475386412048, "loss": 1.3303, "step": 1651 }, { "epoch": 0.8890084757163998, "grad_norm": 0.251953125, "learning_rate": 0.00011779134223802316, "loss": 1.4024, "step": 1652 }, { "epoch": 0.8895466164401992, "grad_norm": 0.24609375, "learning_rate": 0.00011770791782701794, "loss": 1.3265, "step": 1653 }, { "epoch": 0.8900847571639984, "grad_norm": 0.2490234375, "learning_rate": 0.00011762448069105387, "loss": 1.3459, "step": 1654 }, { "epoch": 0.8906228978877977, "grad_norm": 0.259765625, "learning_rate": 0.00011754103089008905, "loss": 1.3153, "step": 1655 }, { "epoch": 0.8911610386115969, "grad_norm": 0.2578125, "learning_rate": 0.00011745756848409081, "loss": 1.3086, "step": 1656 }, { "epoch": 0.8916991793353962, "grad_norm": 0.251953125, "learning_rate": 0.00011737409353303541, "loss": 1.3459, "step": 1657 }, { "epoch": 0.8922373200591954, "grad_norm": 0.361328125, "learning_rate": 0.00011729060609690823, "loss": 2.0565, "step": 1658 }, { "epoch": 0.8927754607829947, "grad_norm": 0.265625, "learning_rate": 0.00011720710623570351, "loss": 1.4146, "step": 1659 }, { "epoch": 0.8933136015067941, "grad_norm": 0.251953125, "learning_rate": 0.0001171235940094245, "loss": 1.3094, "step": 1660 }, { "epoch": 0.8938517422305933, "grad_norm": 0.2421875, "learning_rate": 0.00011704006947808337, "loss": 1.2686, "step": 1661 }, { "epoch": 0.8943898829543926, "grad_norm": 0.25, "learning_rate": 0.00011695653270170101, "loss": 1.2573, "step": 1662 }, { "epoch": 0.8949280236781918, "grad_norm": 0.2421875, "learning_rate": 0.00011687298374030723, "loss": 1.3528, "step": 1663 }, { "epoch": 0.8954661644019911, "grad_norm": 0.2412109375, "learning_rate": 0.00011678942265394056, "loss": 1.4116, "step": 1664 }, { "epoch": 0.8960043051257904, "grad_norm": 0.236328125, "learning_rate": 0.00011670584950264816, "loss": 1.2493, "step": 1665 }, { "epoch": 0.8965424458495896, "grad_norm": 0.251953125, "learning_rate": 0.00011662226434648595, "loss": 1.3454, "step": 1666 }, { "epoch": 0.897080586573389, "grad_norm": 0.2490234375, "learning_rate": 0.00011653866724551847, "loss": 1.3721, "step": 1667 }, { "epoch": 0.8976187272971882, "grad_norm": 0.24609375, "learning_rate": 0.00011645505825981884, "loss": 1.4005, "step": 1668 }, { "epoch": 0.8981568680209875, "grad_norm": 0.2451171875, "learning_rate": 0.0001163714374494687, "loss": 1.2726, "step": 1669 }, { "epoch": 0.8986950087447868, "grad_norm": 0.2490234375, "learning_rate": 0.00011628780487455819, "loss": 1.2838, "step": 1670 }, { "epoch": 0.899233149468586, "grad_norm": 0.25390625, "learning_rate": 0.00011620416059518593, "loss": 1.2947, "step": 1671 }, { "epoch": 0.8997712901923853, "grad_norm": 0.259765625, "learning_rate": 0.0001161205046714589, "loss": 1.3431, "step": 1672 }, { "epoch": 0.9003094309161845, "grad_norm": 0.26171875, "learning_rate": 0.0001160368371634925, "loss": 1.4556, "step": 1673 }, { "epoch": 0.9008475716399839, "grad_norm": 0.25390625, "learning_rate": 0.00011595315813141041, "loss": 1.3737, "step": 1674 }, { "epoch": 0.9013857123637832, "grad_norm": 0.267578125, "learning_rate": 0.00011586946763534466, "loss": 1.2927, "step": 1675 }, { "epoch": 0.9019238530875824, "grad_norm": 0.251953125, "learning_rate": 0.0001157857657354354, "loss": 1.3774, "step": 1676 }, { "epoch": 0.9024619938113817, "grad_norm": 0.248046875, "learning_rate": 0.00011570205249183115, "loss": 1.2884, "step": 1677 }, { "epoch": 0.9030001345351809, "grad_norm": 0.248046875, "learning_rate": 0.00011561832796468837, "loss": 1.2799, "step": 1678 }, { "epoch": 0.9035382752589802, "grad_norm": 0.255859375, "learning_rate": 0.00011553459221417182, "loss": 1.4441, "step": 1679 }, { "epoch": 0.9040764159827795, "grad_norm": 0.244140625, "learning_rate": 0.00011545084530045419, "loss": 1.2878, "step": 1680 }, { "epoch": 0.9046145567065788, "grad_norm": 0.265625, "learning_rate": 0.00011536708728371627, "loss": 1.3645, "step": 1681 }, { "epoch": 0.9051526974303781, "grad_norm": 0.2451171875, "learning_rate": 0.0001152833182241468, "loss": 1.2928, "step": 1682 }, { "epoch": 0.9056908381541773, "grad_norm": 0.255859375, "learning_rate": 0.00011519953818194247, "loss": 1.3143, "step": 1683 }, { "epoch": 0.9062289788779766, "grad_norm": 0.2470703125, "learning_rate": 0.0001151157472173078, "loss": 1.2278, "step": 1684 }, { "epoch": 0.9067671196017759, "grad_norm": 0.255859375, "learning_rate": 0.00011503194539045528, "loss": 1.4088, "step": 1685 }, { "epoch": 0.9073052603255751, "grad_norm": 0.25390625, "learning_rate": 0.00011494813276160509, "loss": 1.3729, "step": 1686 }, { "epoch": 0.9078434010493744, "grad_norm": 0.2490234375, "learning_rate": 0.00011486430939098525, "loss": 1.3276, "step": 1687 }, { "epoch": 0.9083815417731737, "grad_norm": 0.2314453125, "learning_rate": 0.00011478047533883143, "loss": 1.2483, "step": 1688 }, { "epoch": 0.908919682496973, "grad_norm": 0.251953125, "learning_rate": 0.00011469663066538703, "loss": 1.3417, "step": 1689 }, { "epoch": 0.9094578232207722, "grad_norm": 0.259765625, "learning_rate": 0.00011461277543090308, "loss": 1.3804, "step": 1690 }, { "epoch": 0.9099959639445715, "grad_norm": 0.234375, "learning_rate": 0.00011452890969563813, "loss": 1.2778, "step": 1691 }, { "epoch": 0.9105341046683708, "grad_norm": 0.24609375, "learning_rate": 0.0001144450335198584, "loss": 1.3851, "step": 1692 }, { "epoch": 0.91107224539217, "grad_norm": 0.2412109375, "learning_rate": 0.00011436114696383749, "loss": 1.3229, "step": 1693 }, { "epoch": 0.9116103861159693, "grad_norm": 0.240234375, "learning_rate": 0.00011427725008785654, "loss": 1.3353, "step": 1694 }, { "epoch": 0.9121485268397685, "grad_norm": 0.244140625, "learning_rate": 0.0001141933429522041, "loss": 1.2724, "step": 1695 }, { "epoch": 0.9126866675635679, "grad_norm": 0.25, "learning_rate": 0.00011410942561717601, "loss": 1.3499, "step": 1696 }, { "epoch": 0.9132248082873672, "grad_norm": 0.2392578125, "learning_rate": 0.00011402549814307554, "loss": 1.2776, "step": 1697 }, { "epoch": 0.9137629490111664, "grad_norm": 0.259765625, "learning_rate": 0.00011394156059021319, "loss": 1.3134, "step": 1698 }, { "epoch": 0.9143010897349657, "grad_norm": 0.2451171875, "learning_rate": 0.00011385761301890675, "loss": 1.3906, "step": 1699 }, { "epoch": 0.9148392304587649, "grad_norm": 0.25390625, "learning_rate": 0.00011377365548948113, "loss": 1.3173, "step": 1700 }, { "epoch": 0.9153773711825642, "grad_norm": 0.2421875, "learning_rate": 0.00011368968806226843, "loss": 1.2378, "step": 1701 }, { "epoch": 0.9159155119063636, "grad_norm": 0.259765625, "learning_rate": 0.00011360571079760798, "loss": 1.2364, "step": 1702 }, { "epoch": 0.9164536526301628, "grad_norm": 0.248046875, "learning_rate": 0.00011352172375584596, "loss": 1.315, "step": 1703 }, { "epoch": 0.9169917933539621, "grad_norm": 0.2578125, "learning_rate": 0.00011343772699733579, "loss": 1.3199, "step": 1704 }, { "epoch": 0.9175299340777613, "grad_norm": 0.255859375, "learning_rate": 0.00011335372058243767, "loss": 1.3526, "step": 1705 }, { "epoch": 0.9180680748015606, "grad_norm": 0.251953125, "learning_rate": 0.00011326970457151891, "loss": 1.3314, "step": 1706 }, { "epoch": 0.9186062155253599, "grad_norm": 0.251953125, "learning_rate": 0.00011318567902495362, "loss": 1.3572, "step": 1707 }, { "epoch": 0.9191443562491591, "grad_norm": 0.244140625, "learning_rate": 0.00011310164400312284, "loss": 1.401, "step": 1708 }, { "epoch": 0.9196824969729585, "grad_norm": 0.24609375, "learning_rate": 0.00011301759956641432, "loss": 1.2656, "step": 1709 }, { "epoch": 0.9202206376967577, "grad_norm": 0.2412109375, "learning_rate": 0.00011293354577522263, "loss": 1.2566, "step": 1710 }, { "epoch": 0.920758778420557, "grad_norm": 0.255859375, "learning_rate": 0.00011284948268994908, "loss": 1.2848, "step": 1711 }, { "epoch": 0.9212969191443563, "grad_norm": 0.25, "learning_rate": 0.00011276541037100159, "loss": 1.438, "step": 1712 }, { "epoch": 0.9218350598681555, "grad_norm": 0.2490234375, "learning_rate": 0.00011268132887879486, "loss": 1.2702, "step": 1713 }, { "epoch": 0.9223732005919548, "grad_norm": 0.251953125, "learning_rate": 0.00011259723827374995, "loss": 1.2293, "step": 1714 }, { "epoch": 0.922911341315754, "grad_norm": 0.2451171875, "learning_rate": 0.00011251313861629474, "loss": 1.2318, "step": 1715 }, { "epoch": 0.9234494820395533, "grad_norm": 0.2470703125, "learning_rate": 0.00011242902996686333, "loss": 1.2708, "step": 1716 }, { "epoch": 0.9239876227633527, "grad_norm": 0.2578125, "learning_rate": 0.00011234491238589658, "loss": 1.3279, "step": 1717 }, { "epoch": 0.9245257634871519, "grad_norm": 0.2470703125, "learning_rate": 0.00011226078593384148, "loss": 1.2255, "step": 1718 }, { "epoch": 0.9250639042109512, "grad_norm": 0.25390625, "learning_rate": 0.00011217665067115163, "loss": 1.4086, "step": 1719 }, { "epoch": 0.9256020449347504, "grad_norm": 0.255859375, "learning_rate": 0.00011209250665828683, "loss": 1.3996, "step": 1720 }, { "epoch": 0.9261401856585497, "grad_norm": 0.2470703125, "learning_rate": 0.00011200835395571318, "loss": 1.3118, "step": 1721 }, { "epoch": 0.926678326382349, "grad_norm": 0.251953125, "learning_rate": 0.0001119241926239031, "loss": 1.3661, "step": 1722 }, { "epoch": 0.9272164671061482, "grad_norm": 0.25390625, "learning_rate": 0.0001118400227233351, "loss": 1.3399, "step": 1723 }, { "epoch": 0.9277546078299476, "grad_norm": 0.267578125, "learning_rate": 0.00011175584431449393, "loss": 1.3181, "step": 1724 }, { "epoch": 0.9282927485537468, "grad_norm": 0.25, "learning_rate": 0.00011167165745787041, "loss": 1.3195, "step": 1725 }, { "epoch": 0.9288308892775461, "grad_norm": 0.34765625, "learning_rate": 0.00011158746221396148, "loss": 2.1026, "step": 1726 }, { "epoch": 0.9293690300013453, "grad_norm": 0.23828125, "learning_rate": 0.00011150325864327003, "loss": 1.2804, "step": 1727 }, { "epoch": 0.9299071707251446, "grad_norm": 0.306640625, "learning_rate": 0.00011141904680630503, "loss": 1.9933, "step": 1728 }, { "epoch": 0.9304453114489439, "grad_norm": 0.25, "learning_rate": 0.00011133482676358132, "loss": 1.3152, "step": 1729 }, { "epoch": 0.9309834521727431, "grad_norm": 0.263671875, "learning_rate": 0.00011125059857561966, "loss": 1.3688, "step": 1730 }, { "epoch": 0.9315215928965425, "grad_norm": 0.267578125, "learning_rate": 0.00011116636230294664, "loss": 1.2989, "step": 1731 }, { "epoch": 0.9320597336203417, "grad_norm": 0.24609375, "learning_rate": 0.00011108211800609469, "loss": 1.2957, "step": 1732 }, { "epoch": 0.932597874344141, "grad_norm": 0.25390625, "learning_rate": 0.000110997865745602, "loss": 1.2937, "step": 1733 }, { "epoch": 0.9331360150679403, "grad_norm": 0.25390625, "learning_rate": 0.00011091360558201249, "loss": 1.3062, "step": 1734 }, { "epoch": 0.9336741557917395, "grad_norm": 0.259765625, "learning_rate": 0.0001108293375758757, "loss": 1.3826, "step": 1735 }, { "epoch": 0.9342122965155388, "grad_norm": 0.2421875, "learning_rate": 0.00011074506178774695, "loss": 1.2598, "step": 1736 }, { "epoch": 0.934750437239338, "grad_norm": 0.2421875, "learning_rate": 0.00011066077827818693, "loss": 1.3847, "step": 1737 }, { "epoch": 0.9352885779631374, "grad_norm": 0.2451171875, "learning_rate": 0.00011057648710776212, "loss": 1.2915, "step": 1738 }, { "epoch": 0.9358267186869367, "grad_norm": 0.244140625, "learning_rate": 0.00011049218833704427, "loss": 1.3326, "step": 1739 }, { "epoch": 0.9363648594107359, "grad_norm": 0.2373046875, "learning_rate": 0.00011040788202661084, "loss": 1.3037, "step": 1740 }, { "epoch": 0.9369030001345352, "grad_norm": 0.255859375, "learning_rate": 0.00011032356823704446, "loss": 1.3637, "step": 1741 }, { "epoch": 0.9374411408583344, "grad_norm": 0.248046875, "learning_rate": 0.00011023924702893333, "loss": 1.2545, "step": 1742 }, { "epoch": 0.9379792815821337, "grad_norm": 0.24609375, "learning_rate": 0.00011015491846287087, "loss": 1.2323, "step": 1743 }, { "epoch": 0.938517422305933, "grad_norm": 0.25, "learning_rate": 0.00011007058259945584, "loss": 1.2536, "step": 1744 }, { "epoch": 0.9390555630297323, "grad_norm": 0.25390625, "learning_rate": 0.00010998623949929224, "loss": 1.4054, "step": 1745 }, { "epoch": 0.9395937037535316, "grad_norm": 0.2490234375, "learning_rate": 0.00010990188922298919, "loss": 1.2618, "step": 1746 }, { "epoch": 0.9401318444773308, "grad_norm": 0.2431640625, "learning_rate": 0.0001098175318311611, "loss": 1.3386, "step": 1747 }, { "epoch": 0.9406699852011301, "grad_norm": 0.279296875, "learning_rate": 0.00010973316738442738, "loss": 1.3686, "step": 1748 }, { "epoch": 0.9412081259249294, "grad_norm": 0.25, "learning_rate": 0.00010964879594341262, "loss": 1.2629, "step": 1749 }, { "epoch": 0.9417462666487286, "grad_norm": 0.271484375, "learning_rate": 0.00010956441756874627, "loss": 1.4194, "step": 1750 }, { "epoch": 0.9422844073725279, "grad_norm": 0.2470703125, "learning_rate": 0.00010948003232106298, "loss": 1.307, "step": 1751 }, { "epoch": 0.9428225480963272, "grad_norm": 0.26171875, "learning_rate": 0.00010939564026100213, "loss": 1.3625, "step": 1752 }, { "epoch": 0.9433606888201265, "grad_norm": 0.2451171875, "learning_rate": 0.00010931124144920812, "loss": 1.3276, "step": 1753 }, { "epoch": 0.9438988295439258, "grad_norm": 0.2412109375, "learning_rate": 0.00010922683594633021, "loss": 1.3023, "step": 1754 }, { "epoch": 0.944436970267725, "grad_norm": 0.2490234375, "learning_rate": 0.00010914242381302238, "loss": 1.2617, "step": 1755 }, { "epoch": 0.9449751109915243, "grad_norm": 0.2392578125, "learning_rate": 0.00010905800510994342, "loss": 1.2914, "step": 1756 }, { "epoch": 0.9455132517153235, "grad_norm": 0.25, "learning_rate": 0.00010897357989775688, "loss": 1.2535, "step": 1757 }, { "epoch": 0.9460513924391228, "grad_norm": 0.25390625, "learning_rate": 0.00010888914823713092, "loss": 1.2992, "step": 1758 }, { "epoch": 0.9465895331629222, "grad_norm": 0.25390625, "learning_rate": 0.00010880471018873837, "loss": 1.2861, "step": 1759 }, { "epoch": 0.9471276738867214, "grad_norm": 0.24609375, "learning_rate": 0.00010872026581325668, "loss": 1.2279, "step": 1760 }, { "epoch": 0.9476658146105207, "grad_norm": 0.271484375, "learning_rate": 0.00010863581517136776, "loss": 1.2621, "step": 1761 }, { "epoch": 0.9482039553343199, "grad_norm": 0.251953125, "learning_rate": 0.00010855135832375809, "loss": 1.3924, "step": 1762 }, { "epoch": 0.9487420960581192, "grad_norm": 0.263671875, "learning_rate": 0.00010846689533111864, "loss": 1.2631, "step": 1763 }, { "epoch": 0.9492802367819184, "grad_norm": 0.251953125, "learning_rate": 0.0001083824262541447, "loss": 1.3912, "step": 1764 }, { "epoch": 0.9498183775057177, "grad_norm": 0.25390625, "learning_rate": 0.000108297951153536, "loss": 1.2903, "step": 1765 }, { "epoch": 0.950356518229517, "grad_norm": 0.25, "learning_rate": 0.00010821347008999658, "loss": 1.2876, "step": 1766 }, { "epoch": 0.9508946589533163, "grad_norm": 0.26171875, "learning_rate": 0.00010812898312423478, "loss": 1.3599, "step": 1767 }, { "epoch": 0.9514327996771156, "grad_norm": 0.2734375, "learning_rate": 0.00010804449031696315, "loss": 1.3222, "step": 1768 }, { "epoch": 0.9519709404009148, "grad_norm": 0.25390625, "learning_rate": 0.00010795999172889845, "loss": 1.1656, "step": 1769 }, { "epoch": 0.9525090811247141, "grad_norm": 0.25390625, "learning_rate": 0.00010787548742076164, "loss": 1.3604, "step": 1770 }, { "epoch": 0.9530472218485134, "grad_norm": 0.251953125, "learning_rate": 0.00010779097745327768, "loss": 1.328, "step": 1771 }, { "epoch": 0.9535853625723126, "grad_norm": 0.255859375, "learning_rate": 0.00010770646188717577, "loss": 1.3502, "step": 1772 }, { "epoch": 0.954123503296112, "grad_norm": 0.2578125, "learning_rate": 0.0001076219407831889, "loss": 1.3955, "step": 1773 }, { "epoch": 0.9546616440199112, "grad_norm": 0.2412109375, "learning_rate": 0.0001075374142020543, "loss": 1.321, "step": 1774 }, { "epoch": 0.9551997847437105, "grad_norm": 0.25390625, "learning_rate": 0.0001074528822045129, "loss": 1.3254, "step": 1775 }, { "epoch": 0.9557379254675098, "grad_norm": 0.24609375, "learning_rate": 0.0001073683448513097, "loss": 1.3367, "step": 1776 }, { "epoch": 0.956276066191309, "grad_norm": 0.2392578125, "learning_rate": 0.00010728380220319345, "loss": 1.2726, "step": 1777 }, { "epoch": 0.9568142069151083, "grad_norm": 0.255859375, "learning_rate": 0.0001071992543209167, "loss": 1.4006, "step": 1778 }, { "epoch": 0.9573523476389075, "grad_norm": 0.2451171875, "learning_rate": 0.00010711470126523587, "loss": 1.3431, "step": 1779 }, { "epoch": 0.9578904883627068, "grad_norm": 0.25390625, "learning_rate": 0.00010703014309691094, "loss": 1.3701, "step": 1780 }, { "epoch": 0.9584286290865062, "grad_norm": 0.263671875, "learning_rate": 0.00010694557987670571, "loss": 1.3554, "step": 1781 }, { "epoch": 0.9589667698103054, "grad_norm": 0.251953125, "learning_rate": 0.00010686101166538749, "loss": 1.3034, "step": 1782 }, { "epoch": 0.9595049105341047, "grad_norm": 0.259765625, "learning_rate": 0.00010677643852372725, "loss": 1.2904, "step": 1783 }, { "epoch": 0.9600430512579039, "grad_norm": 0.2421875, "learning_rate": 0.00010669186051249949, "loss": 1.2657, "step": 1784 }, { "epoch": 0.9605811919817032, "grad_norm": 0.26171875, "learning_rate": 0.0001066072776924822, "loss": 1.3982, "step": 1785 }, { "epoch": 0.9611193327055025, "grad_norm": 0.25390625, "learning_rate": 0.00010652269012445684, "loss": 1.2843, "step": 1786 }, { "epoch": 0.9616574734293017, "grad_norm": 0.2451171875, "learning_rate": 0.00010643809786920825, "loss": 1.2614, "step": 1787 }, { "epoch": 0.9621956141531011, "grad_norm": 0.234375, "learning_rate": 0.00010635350098752468, "loss": 1.2618, "step": 1788 }, { "epoch": 0.9627337548769003, "grad_norm": 0.2490234375, "learning_rate": 0.00010626889954019768, "loss": 1.2838, "step": 1789 }, { "epoch": 0.9632718956006996, "grad_norm": 0.255859375, "learning_rate": 0.00010618429358802208, "loss": 1.3208, "step": 1790 }, { "epoch": 0.9638100363244989, "grad_norm": 0.24609375, "learning_rate": 0.00010609968319179596, "loss": 1.419, "step": 1791 }, { "epoch": 0.9643481770482981, "grad_norm": 0.259765625, "learning_rate": 0.00010601506841232056, "loss": 1.242, "step": 1792 }, { "epoch": 0.9648863177720974, "grad_norm": 0.255859375, "learning_rate": 0.0001059304493104003, "loss": 1.3237, "step": 1793 }, { "epoch": 0.9654244584958966, "grad_norm": 0.25390625, "learning_rate": 0.00010584582594684274, "loss": 1.3362, "step": 1794 }, { "epoch": 0.965962599219696, "grad_norm": 0.2490234375, "learning_rate": 0.00010576119838245844, "loss": 1.2919, "step": 1795 }, { "epoch": 0.9665007399434953, "grad_norm": 0.2490234375, "learning_rate": 0.00010567656667806096, "loss": 1.3438, "step": 1796 }, { "epoch": 0.9670388806672945, "grad_norm": 0.25390625, "learning_rate": 0.00010559193089446698, "loss": 1.3472, "step": 1797 }, { "epoch": 0.9675770213910938, "grad_norm": 0.25, "learning_rate": 0.00010550729109249588, "loss": 1.3087, "step": 1798 }, { "epoch": 0.968115162114893, "grad_norm": 0.2421875, "learning_rate": 0.00010542264733297021, "loss": 1.3663, "step": 1799 }, { "epoch": 0.9686533028386923, "grad_norm": 0.2275390625, "learning_rate": 0.00010533799967671502, "loss": 1.2478, "step": 1800 }, { "epoch": 0.9691914435624915, "grad_norm": 0.244140625, "learning_rate": 0.00010525334818455854, "loss": 1.3629, "step": 1801 }, { "epoch": 0.9697295842862909, "grad_norm": 0.2470703125, "learning_rate": 0.00010516869291733143, "loss": 1.3043, "step": 1802 }, { "epoch": 0.9702677250100902, "grad_norm": 0.248046875, "learning_rate": 0.00010508403393586727, "loss": 1.3348, "step": 1803 }, { "epoch": 0.9708058657338894, "grad_norm": 0.2490234375, "learning_rate": 0.00010499937130100221, "loss": 1.3604, "step": 1804 }, { "epoch": 0.9713440064576887, "grad_norm": 0.2578125, "learning_rate": 0.00010491470507357508, "loss": 1.2873, "step": 1805 }, { "epoch": 0.9718821471814879, "grad_norm": 0.26171875, "learning_rate": 0.00010483003531442726, "loss": 1.3233, "step": 1806 }, { "epoch": 0.9724202879052872, "grad_norm": 0.25, "learning_rate": 0.00010474536208440262, "loss": 1.2688, "step": 1807 }, { "epoch": 0.9729584286290865, "grad_norm": 0.255859375, "learning_rate": 0.00010466068544434766, "loss": 1.3404, "step": 1808 }, { "epoch": 0.9734965693528858, "grad_norm": 0.2470703125, "learning_rate": 0.0001045760054551112, "loss": 1.2274, "step": 1809 }, { "epoch": 0.9740347100766851, "grad_norm": 0.26953125, "learning_rate": 0.00010449132217754454, "loss": 1.261, "step": 1810 }, { "epoch": 0.9745728508004843, "grad_norm": 0.251953125, "learning_rate": 0.00010440663567250127, "loss": 1.2829, "step": 1811 }, { "epoch": 0.9751109915242836, "grad_norm": 0.2451171875, "learning_rate": 0.0001043219460008374, "loss": 1.2767, "step": 1812 }, { "epoch": 0.9756491322480829, "grad_norm": 0.2431640625, "learning_rate": 0.00010423725322341116, "loss": 1.2715, "step": 1813 }, { "epoch": 0.9761872729718821, "grad_norm": 0.2392578125, "learning_rate": 0.00010415255740108299, "loss": 1.2839, "step": 1814 }, { "epoch": 0.9767254136956814, "grad_norm": 0.2470703125, "learning_rate": 0.00010406785859471553, "loss": 1.3113, "step": 1815 }, { "epoch": 0.9772635544194807, "grad_norm": 0.251953125, "learning_rate": 0.00010398315686517361, "loss": 1.2291, "step": 1816 }, { "epoch": 0.97780169514328, "grad_norm": 0.25, "learning_rate": 0.00010389845227332413, "loss": 1.3089, "step": 1817 }, { "epoch": 0.9783398358670793, "grad_norm": 0.251953125, "learning_rate": 0.00010381374488003603, "loss": 1.3839, "step": 1818 }, { "epoch": 0.9788779765908785, "grad_norm": 0.240234375, "learning_rate": 0.00010372903474618028, "loss": 1.27, "step": 1819 }, { "epoch": 0.9794161173146778, "grad_norm": 0.267578125, "learning_rate": 0.0001036443219326298, "loss": 1.3506, "step": 1820 }, { "epoch": 0.979954258038477, "grad_norm": 0.2470703125, "learning_rate": 0.00010355960650025947, "loss": 1.3497, "step": 1821 }, { "epoch": 0.9804923987622763, "grad_norm": 0.2412109375, "learning_rate": 0.00010347488850994607, "loss": 1.3146, "step": 1822 }, { "epoch": 0.9810305394860757, "grad_norm": 0.25390625, "learning_rate": 0.00010339016802256809, "loss": 1.3179, "step": 1823 }, { "epoch": 0.9815686802098749, "grad_norm": 0.248046875, "learning_rate": 0.00010330544509900602, "loss": 1.3295, "step": 1824 }, { "epoch": 0.9821068209336742, "grad_norm": 0.265625, "learning_rate": 0.0001032207198001419, "loss": 1.4846, "step": 1825 }, { "epoch": 0.9826449616574734, "grad_norm": 0.25390625, "learning_rate": 0.0001031359921868596, "loss": 1.2771, "step": 1826 }, { "epoch": 0.9831831023812727, "grad_norm": 0.251953125, "learning_rate": 0.00010305126232004463, "loss": 1.3885, "step": 1827 }, { "epoch": 0.983721243105072, "grad_norm": 0.265625, "learning_rate": 0.00010296653026058409, "loss": 1.4358, "step": 1828 }, { "epoch": 0.9842593838288712, "grad_norm": 0.251953125, "learning_rate": 0.00010288179606936666, "loss": 1.2668, "step": 1829 }, { "epoch": 0.9847975245526706, "grad_norm": 0.24609375, "learning_rate": 0.00010279705980728258, "loss": 1.3104, "step": 1830 }, { "epoch": 0.9853356652764698, "grad_norm": 0.251953125, "learning_rate": 0.00010271232153522359, "loss": 1.2717, "step": 1831 }, { "epoch": 0.9858738060002691, "grad_norm": 0.2470703125, "learning_rate": 0.00010262758131408279, "loss": 1.2985, "step": 1832 }, { "epoch": 0.9864119467240683, "grad_norm": 0.25390625, "learning_rate": 0.00010254283920475481, "loss": 1.2885, "step": 1833 }, { "epoch": 0.9869500874478676, "grad_norm": 0.251953125, "learning_rate": 0.00010245809526813546, "loss": 1.3573, "step": 1834 }, { "epoch": 0.9874882281716669, "grad_norm": 0.2451171875, "learning_rate": 0.00010237334956512208, "loss": 1.2272, "step": 1835 }, { "epoch": 0.9880263688954661, "grad_norm": 0.23828125, "learning_rate": 0.00010228860215661309, "loss": 1.2766, "step": 1836 }, { "epoch": 0.9885645096192655, "grad_norm": 0.24609375, "learning_rate": 0.00010220385310350828, "loss": 1.2172, "step": 1837 }, { "epoch": 0.9891026503430647, "grad_norm": 0.26171875, "learning_rate": 0.00010211910246670849, "loss": 1.359, "step": 1838 }, { "epoch": 0.989640791066864, "grad_norm": 0.2451171875, "learning_rate": 0.00010203435030711577, "loss": 1.2103, "step": 1839 }, { "epoch": 0.9901789317906633, "grad_norm": 0.25, "learning_rate": 0.00010194959668563331, "loss": 1.2803, "step": 1840 }, { "epoch": 0.9907170725144625, "grad_norm": 0.25390625, "learning_rate": 0.00010186484166316526, "loss": 1.2256, "step": 1841 }, { "epoch": 0.9912552132382618, "grad_norm": 0.2490234375, "learning_rate": 0.0001017800853006168, "loss": 1.2931, "step": 1842 }, { "epoch": 0.991793353962061, "grad_norm": 0.2412109375, "learning_rate": 0.00010169532765889411, "loss": 1.2395, "step": 1843 }, { "epoch": 0.9923314946858603, "grad_norm": 0.2451171875, "learning_rate": 0.00010161056879890427, "loss": 1.3306, "step": 1844 }, { "epoch": 0.9928696354096597, "grad_norm": 0.265625, "learning_rate": 0.0001015258087815552, "loss": 1.2885, "step": 1845 }, { "epoch": 0.9934077761334589, "grad_norm": 0.25, "learning_rate": 0.00010144104766775572, "loss": 1.3232, "step": 1846 }, { "epoch": 0.9939459168572582, "grad_norm": 0.2392578125, "learning_rate": 0.00010135628551841538, "loss": 1.2745, "step": 1847 }, { "epoch": 0.9944840575810574, "grad_norm": 0.271484375, "learning_rate": 0.00010127152239444453, "loss": 1.3829, "step": 1848 }, { "epoch": 0.9950221983048567, "grad_norm": 0.25, "learning_rate": 0.00010118675835675415, "loss": 1.3545, "step": 1849 }, { "epoch": 0.995560339028656, "grad_norm": 0.2412109375, "learning_rate": 0.00010110199346625592, "loss": 1.2417, "step": 1850 }, { "epoch": 0.9960984797524552, "grad_norm": 0.255859375, "learning_rate": 0.00010101722778386214, "loss": 1.2712, "step": 1851 }, { "epoch": 0.9966366204762546, "grad_norm": 0.259765625, "learning_rate": 0.00010093246137048561, "loss": 1.3922, "step": 1852 }, { "epoch": 0.9971747612000538, "grad_norm": 0.25390625, "learning_rate": 0.0001008476942870398, "loss": 1.3037, "step": 1853 }, { "epoch": 0.9977129019238531, "grad_norm": 0.255859375, "learning_rate": 0.00010076292659443852, "loss": 1.2977, "step": 1854 }, { "epoch": 0.9982510426476524, "grad_norm": 0.255859375, "learning_rate": 0.00010067815835359604, "loss": 1.3124, "step": 1855 }, { "epoch": 0.9987891833714516, "grad_norm": 0.255859375, "learning_rate": 0.00010059338962542713, "loss": 1.2956, "step": 1856 }, { "epoch": 0.9993273240952509, "grad_norm": 0.2431640625, "learning_rate": 0.00010050862047084672, "loss": 1.3028, "step": 1857 }, { "epoch": 0.9998654648190501, "grad_norm": 0.2578125, "learning_rate": 0.00010042385095077032, "loss": 1.3911, "step": 1858 }, { "epoch": 0.9998654648190501, "eval_loss": 1.3096983432769775, "eval_runtime": 1073.1733, "eval_samples_per_second": 4.74, "eval_steps_per_second": 2.371, "step": 1858 } ], "logging_steps": 1, "max_steps": 3716, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1858, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.751514662030541e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }